NFFT Logo 3.2.2
fastsum_benchomp.c
00001 /*
00002  * Copyright (c) 2002, 2012 Jens Keiner, Stefan Kunis, Daniel Potts
00003  *
00004  * This program is free software; you can redistribute it and/or modify it under
00005  * the terms of the GNU General Public License as published by the Free Software
00006  * Foundation; either version 2 of the License, or (at your option) any later
00007  * version.
00008  *
00009  * This program is distributed in the hope that it will be useful, but WITHOUT
00010  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00011  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
00012  * details.
00013  *
00014  * You should have received a copy of the GNU General Public License along with
00015  * this program; if not, write to the Free Software Foundation, Inc., 51
00016  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00017  */
00018 #include <stdio.h>
00019 #include <stdlib.h>
00020 #include <string.h>
00021 #include <unistd.h>
00022 
00023 #include "config.h"
00024 
00025 #include <nfft3.h>
00026 #include <nfft3util.h>
00027 
00028 #define NREPEAT 5
00029 
00030 static FILE* file_out_tex = NULL;
00031 
00032 int get_nthreads_array(int **arr)
00033 {
00034   int max_threads = nfft_get_omp_num_threads();
00035   int alloc_num = 2;
00036   int k;
00037   int ret_number = 0;
00038   int max_threads_pw2 = (max_threads / 2) * 2 == max_threads ? 1 : 0;
00039 
00040   if (max_threads <= 5)
00041   {
00042     *arr = (int*) malloc(max_threads*sizeof(int));
00043     for (k = 0; k < max_threads; k++)
00044       *(*arr + k) = k+1;
00045     return max_threads;
00046   }
00047 
00048   for (k = 1; k <= max_threads; k*=2, alloc_num++);
00049 
00050   *arr = (int*) malloc(alloc_num*sizeof(int));
00051 
00052   for (k = 1; k <= max_threads; k*=2)
00053   {
00054     if (k != max_threads && 2*k > max_threads && max_threads_pw2)
00055     {
00056       *(*arr + ret_number) = max_threads/2;
00057       ret_number++;
00058     }
00059 
00060     *(*arr + ret_number) = k;
00061     ret_number++;
00062 
00063     if (k != max_threads && 2*k > max_threads)
00064     {
00065       *(*arr + ret_number) = max_threads;
00066       ret_number++;
00067       break;
00068     }
00069   }
00070 
00071   return ret_number;
00072 } 
00073   
00074 
00075 void check_result_value(const int val, const int ok, const char *msg)
00076 {
00077   if (val != ok)
00078   {
00079     fprintf(stderr, "ERROR %s: %d not %d\n", msg, val, ok);
00080 
00081     exit(1);
00082   }
00083 }
00084 
00085 void run_test_create(int d, int L, int M)
00086 {
00087   char cmd[1025];
00088 
00089   snprintf(cmd, 1024, "./fastsum_benchomp_createdataset %d %d %d > fastsum_benchomp_test.data", d, L, M);
00090   fprintf(stderr, "%s\n", cmd);
00091   check_result_value(system(cmd), 0, "createdataset");
00092 }
00093 
00094 void run_test_init_output()
00095 {
00096   FILE *f = fopen("fastsum_benchomp_test.result", "w");
00097   if (f!= NULL)
00098     fclose(f);
00099 }
00100 
00101 typedef struct
00102 {
00103   int d;
00104   int L;
00105   int M;
00106   int n;
00107   int m;
00108   int p;
00109   char *kernel_name;
00110   double c;
00111   double eps_I;
00112   double eps_B;
00113 } s_param;
00114 
00115 typedef struct
00116 {
00117   double avg;
00118   double min;
00119   double max;
00120 } s_resval;
00121 
00122 typedef struct
00123 {
00124   int nthreads;
00125   s_resval resval[16];
00126 } s_result;
00127 
00128 typedef struct
00129 {
00130   s_param param;
00131   s_result *results;
00132   int nresults;
00133 } s_testset;
00134 
00135 void run_test(s_resval *res, int nrepeat, int n, int m, int p, char *kernel_name, double c, double eps_I, double eps_B, int nthreads)
00136 {
00137   char cmd[1025];
00138   int r,t;
00139   
00140   for (t = 0; t < 16; t++)
00141   {
00142     res[t].avg = 0.0; res[t].min = 1.0/0.0; res[t].max = 0.0;
00143   }
00144 
00145   if (nthreads < 2)
00146     snprintf(cmd, 1024, "./fastsum_benchomp_detail_single %d %d %d %s %lg %lg %lg < fastsum_benchomp_test.data > fastsum_benchomp_test.out", n, m, p, kernel_name, c, eps_I, eps_B);
00147   else
00148     snprintf(cmd, 1024, "./fastsum_benchomp_detail_threads %d %d %d %s %lg %lg %lg %d < fastsum_benchomp_test.data > fastsum_benchomp_test.out", n, m, p, kernel_name, c, eps_I, eps_B, nthreads);
00149   fprintf(stderr, "%s\n", cmd);
00150   check_result_value(system(cmd), 0, cmd);
00151 
00152   for (r = 0; r < nrepeat; r++)
00153   {
00154     int retval;
00155     double v[16];
00156     FILE *f;
00157     check_result_value(system(cmd), 0, cmd);
00158     f = fopen("fastsum_benchomp_test.out", "r");
00159     retval = fscanf(f, "%lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg %lg", v, v+1, v+2, v+3, v+4, v+5, v+6, v+7, v+8, v+9, v+10, v+11, v+12, v+13, v+14, v+15);
00160     check_result_value(retval, 16, "read fastsum_benchomp_test.out");
00161     fclose(f);
00162 
00163     for (t = 0; t < 16; t++)
00164     {
00165       res[t].avg += v[t];
00166       if (res[t].min > v[t])
00167         res[t].min = v[t];
00168       if (res[t].max < v[t])
00169         res[t].max = v[t];
00170     }
00171   }
00172 
00173   for (t = 0; t < 16; t++)
00174     res[t].avg /= nrepeat;
00175 
00176   fprintf(stderr, "%d %d: ", nthreads, nrepeat);
00177   for (t = 0; t < 16; t++)
00178     fprintf(stderr, "%.3e %.3e %.3e | ", res[t].avg, res[t].min, res[t].max);
00179   fprintf(stderr, "\n");
00180 }
00181 
00182 const char *get_psi_string(int flags)
00183 {
00184   if (flags & PRE_PSI)
00185     return "prepsi";
00186   else if (flags & PRE_ONE_PSI)
00187     return "unknownPSI";
00188 
00189   return "nopsi";
00190 }
00191 const char *get_sort_string(int flags)
00192 {
00193   if (flags & NFFT_OMP_BLOCKWISE_ADJOINT)
00194     return "";
00195 
00196   if (flags & NFFT_SORT_NODES)
00197     return "sorted";
00198 
00199   return "unsorted";
00200 }
00201 
00202 const char *get_adjoint_omp_string(int flags)
00203 {
00204   if (flags & NFFT_OMP_BLOCKWISE_ADJOINT)
00205     return "blockwise";
00206 
00207     return "";
00208 }
00209 
00210 #define MASK_FSUM_D (1U<<0)
00211 #define MASK_FSUM_L (1U<<1)
00212 #define MASK_FSUM_M (1U<<2)
00213 #define MASK_FSUM_MULTIBW (1U<<3)
00214 #define MASK_FSUM_WINM (1U<<4)
00215 #define MASK_FSUM_P (1U<<5)
00216 #define MASK_FSUM_KERNEL (1U<<6)
00217 #define MASK_FSUM_EPSI (1U<<7)
00218 #define MASK_FSUM_EPSB (1U<<8)
00219 
00220 unsigned int fastsum_determine_different_parameters(s_testset *testsets, int ntestsets)
00221 {
00222   int t;
00223   unsigned int mask = 0;
00224 
00225   if (ntestsets < 2)
00226     return 0;
00227 
00228   for (t = 1; t < ntestsets; t++)
00229   {
00230     if (testsets[t-1].param.d != testsets[t].param.d)
00231       mask |= MASK_FSUM_D;
00232     if (testsets[t-1].param.L != testsets[t].param.L)
00233       mask |= MASK_FSUM_L;
00234     if (testsets[t-1].param.M != testsets[t].param.M)
00235       mask |= MASK_FSUM_M;
00236     if (testsets[t-1].param.n != testsets[t].param.n)
00237       mask |= MASK_FSUM_MULTIBW;
00238     if (testsets[t-1].param.m != testsets[t].param.m)
00239       mask |= MASK_FSUM_WINM;
00240     if (testsets[t-1].param.p != testsets[t].param.p)
00241       mask |= MASK_FSUM_P;
00242     if (strcmp(testsets[t-1].param.kernel_name, testsets[t].param.kernel_name) != 0)
00243       mask |= MASK_FSUM_KERNEL;
00244     if (testsets[t-1].param.eps_I != testsets[t].param.eps_I)
00245       mask |= MASK_FSUM_EPSI;
00246     if (testsets[t-1].param.eps_B != testsets[t].param.eps_B)
00247       mask |= MASK_FSUM_EPSB;
00248   }
00249 
00250   return mask;
00251 }
00252 
00253 void strEscapeUnderscore(char *dst, char *src, int maxlen)
00254 {
00255   int i = 0;
00256   int len;
00257   int offset = 0;
00258 
00259   while (src[i] != '\0' && len + offset < maxlen - 1)
00260   {
00261     if (src[i] == '_')
00262       len = snprintf(dst+offset, maxlen-offset, "\\_{}");
00263     else
00264       len = snprintf(dst+offset, maxlen-offset, "%c", src[i]);
00265     offset += len;
00266     i++;
00267   }
00268 }
00269 
00270 void fastsum_get_plot_title_minus_indep(char *outstr, int maxlen, char *hostname, s_param param, unsigned int diff_mask)
00271 {
00272   unsigned int mask = ~diff_mask;
00273   int offset = 0;
00274   int len;
00275 
00276   len = snprintf(outstr, maxlen, "%s", hostname);
00277   if (len < 0 || len+offset >= maxlen-1) return;
00278   offset += len;
00279 
00280   if (mask & MASK_FSUM_D)
00281   {
00282     len = snprintf(outstr+offset, maxlen-offset, " %dd fastsum", param.d);
00283     if (len < 0 || len+offset >= maxlen-1) return;
00284     offset += len;
00285   }
00286 
00287   if ((mask & (MASK_FSUM_L | MASK_FSUM_M)) && param.L == param.M)
00288   {
00289     len = snprintf(outstr+offset, maxlen-offset, " L=M=%d", param.L);
00290     if (len < 0 || len+offset >= maxlen-1) return;
00291     offset += len;
00292   }
00293   else
00294   {
00295     if (mask & MASK_FSUM_L)
00296     {
00297       len = snprintf(outstr+offset, maxlen-offset, " L=%d", param.L);
00298       if (len < 0 || len+offset >= maxlen-1) return;
00299       offset += len;
00300     }
00301 
00302     if (mask & MASK_FSUM_M)
00303     {
00304       len = snprintf(outstr+offset, maxlen-offset, " M=%d", param.M);
00305       if (len < 0 || len+offset >= maxlen-1) return;
00306       offset += len;
00307     }
00308   }
00309 
00310   if (mask & MASK_FSUM_MULTIBW)
00311   {
00312     len = snprintf(outstr+offset, maxlen-offset, " n=%d", param.n);
00313     if (len < 0 || len+offset >= maxlen-1) return;
00314     offset += len;
00315   }
00316 
00317   if (mask & MASK_FSUM_WINM)
00318   {
00319     len = snprintf(outstr+offset, maxlen-offset, " m=%d", param.m);
00320     if (len < 0 || len+offset >= maxlen-1) return;
00321     offset += len;
00322   }
00323 
00324   if (mask & MASK_FSUM_P)
00325   {
00326     len = snprintf(outstr+offset, maxlen-offset, " p=%d", param.p);
00327     if (len < 0 || len+offset >= maxlen-1) return;
00328     offset += len;
00329   }
00330 
00331   if (mask & MASK_FSUM_KERNEL)
00332   {
00333     char tmp[maxlen];
00334     strEscapeUnderscore(tmp, param.kernel_name, maxlen);
00335 
00336     len = snprintf(outstr+offset, maxlen-offset, " %s", tmp);
00337     if (len < 0 || len+offset >= maxlen-1) return;
00338     offset += len;
00339   }
00340 
00341   if ((mask & (MASK_FSUM_EPSI | MASK_FSUM_EPSB)) && param.eps_I == param.eps_B)
00342   {
00343     len = snprintf(outstr+offset, maxlen-offset, " $\\varepsilon_\\mathrm{I}$=$\\varepsilon_\\mathrm{B}$=%g", param.eps_I);
00344     if (len < 0 || len+offset >= maxlen-1) return;
00345     offset += len;
00346   }
00347   else
00348   {
00349     if (mask & MASK_FSUM_EPSI)
00350     {
00351       len = snprintf(outstr+offset, maxlen-offset, " $\\varepsilon_\\mathrm{I}$=%g", param.eps_I);
00352       if (len < 0 || len+offset >= maxlen-1) return;
00353       offset += len;
00354     }
00355 
00356     if (mask & MASK_FSUM_EPSB)
00357     {
00358       len = snprintf(outstr+offset, maxlen-offset, " $\\varepsilon_\\mathrm{B}$=%g", param.eps_B);
00359       if (len < 0 || len+offset >= maxlen-1) return;
00360       offset += len;
00361     }
00362   }
00363 }
00364 
00365 void nfft_adjoint_print_output_histo_DFBRT(FILE *out, s_testset testset)
00366 {
00367   int i, size = testset.nresults;
00368   char hostname[1025];
00369 
00370   if (gethostname(hostname, 1024) != 0)
00371     strncpy(hostname, "unnamed", 1024);
00372 
00373   fprintf(out, "\\begin{tikzpicture}\n");
00374   fprintf(out, "\\begin{axis}[");
00375   fprintf(out, "width=0.9\\textwidth, height=0.6\\textwidth, ");
00376   fprintf(out, "symbolic x coords={");
00377   for (i = 0; i < size; i++)
00378     if (i > 0)
00379       fprintf(out, ",%d", testset.results[i].nthreads);
00380     else
00381       fprintf(out, "%d", testset.results[i].nthreads);
00382 
00383   fprintf(out, "}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=-1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, ");
00384   fprintf(out, " title={%s %dd $\\textrm{NFFT}^\\top$ N=%d $\\sigma$=2 M=%d m=%d prepsi sorted}", hostname, testset.param.d, testset.param.n, testset.param.M, testset.param.m);
00385   fprintf(out, " ]\n");
00386   fprintf(out, "\\addplot coordinates {");
00387   for (i = 0; i < size; i++)
00388     fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[10].avg);
00389   fprintf(out, "};\n");
00390 
00391   fprintf(out, "\\addplot coordinates {");
00392   for (i = 0; i < size; i++)
00393     fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[11].avg);
00394   fprintf(out, "};\n");
00395 
00396   fprintf(out, "\\addplot coordinates {");
00397   for (i = 0; i < size; i++)
00398     fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[12].avg);
00399   fprintf(out, "};\n");
00400 
00401   fprintf(out, "\\addplot coordinates {");
00402   for (i = 0; i < size; i++)
00403     fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[1].avg);
00404   fprintf(out, "};\n");
00405 
00406   fprintf(out, "\\addplot coordinates {");
00407   for (i = 0; i < size; i++)
00408     fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[4].avg + testset.results[i].resval[1].avg);
00409   fprintf(out, "};\n");
00410   fprintf(out, "\\legend{D,$\\textrm{F}^\\top$,$\\textrm{B}^\\top$,prepsi,total}\n");
00411   fprintf(out, "\\end{axis}\n");
00412   fprintf(out, "\\end{tikzpicture}\n");
00413   fprintf(out, "\n\n");
00414 
00415   fflush(out);
00416 }
00417 
00418 void nfft_trafo_print_output_histo_DFBRT(FILE *out, s_testset testset)
00419 {
00420   int i, size = testset.nresults;
00421   char hostname[1025];
00422 
00423   if (gethostname(hostname, 1024) != 0)
00424     strncpy(hostname, "unnamed", 1024);
00425 
00426   fprintf(out, "\\begin{tikzpicture}\n");
00427   fprintf(out, "\\begin{axis}[");
00428   fprintf(out, "width=0.9\\textwidth, height=0.6\\textwidth, ");
00429   fprintf(out, "symbolic x coords={");
00430   for (i = 0; i < size; i++)
00431     if (i > 0)
00432       fprintf(out, ",%d", testset.results[i].nthreads);
00433     else
00434       fprintf(out, "%d", testset.results[i].nthreads);
00435 
00436   fprintf(out, "}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=-1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, ");
00437   fprintf(out, " title={%s %dd $\\textrm{NFFT}$ N=%d $\\sigma$=2 M=%d m=%d prepsi sorted}", hostname, testset.param.d, testset.param.n, testset.param.M, testset.param.m);
00438   fprintf(out, " ]\n");
00439   fprintf(out, "\\addplot coordinates {");
00440   for (i = 0; i < size; i++)
00441     fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[13].avg);
00442   fprintf(out, "};\n");
00443 
00444   fprintf(out, "\\addplot coordinates {");
00445   for (i = 0; i < size; i++)
00446     fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[14].avg);
00447   fprintf(out, "};\n");
00448 
00449   fprintf(out, "\\addplot coordinates {");
00450   for (i = 0; i < size; i++)
00451     fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[15].avg);
00452   fprintf(out, "};\n");
00453 
00454   fprintf(out, "\\addplot coordinates {");
00455   for (i = 0; i < size; i++)
00456     fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[2].avg);
00457   fprintf(out, "};\n");
00458 
00459   fprintf(out, "\\addplot coordinates {");
00460   for (i = 0; i < size; i++)
00461     fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[6].avg + testset.results[i].resval[2].avg);
00462   fprintf(out, "};\n");
00463   fprintf(out, "\\legend{D,F,B,prepsi,total}\n");
00464   fprintf(out, "\\end{axis}\n");
00465   fprintf(out, "\\end{tikzpicture}\n");
00466   fprintf(out, "\n\n");
00467 
00468   fflush(out);
00469 }
00470 
00471 void fastsum_print_output_histo_PreRfNfT(FILE *out, s_testset testset)
00472 {
00473   int i, size = testset.nresults;
00474   char hostname[1025];
00475   char plottitle[1025];
00476 
00477   if (gethostname(hostname, 1024) != 0)
00478     strncpy(hostname, "unnamed", 1024);
00479 
00480   fastsum_get_plot_title_minus_indep(plottitle, 1024, hostname, testset.param, 0);
00481 
00482   fprintf(out, "\\begin{tikzpicture}\n");
00483   fprintf(out, "\\begin{axis}[");
00484   fprintf(out, "width=0.9\\textwidth, height=0.6\\textwidth, ");
00485   fprintf(out, "symbolic x coords={");
00486   for (i = 0; i < size; i++)
00487     if (i > 0)
00488       fprintf(out, ",%d", testset.results[i].nthreads);
00489     else
00490       fprintf(out, "%d", testset.results[i].nthreads);
00491 
00492   fprintf(out, "}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, ");
00493   fprintf(out, " title={%s}", plottitle);
00494   fprintf(out, " ]\n");
00495   fprintf(out, "\\addplot coordinates {");
00496   for (i = 0; i < size; i++)
00497     fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[1].avg+testset.results[i].resval[2].avg);
00498   fprintf(out, "};\n");
00499 
00500   fprintf(out, "\\addplot coordinates {");
00501   for (i = 0; i < size; i++)
00502     fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[3].avg);
00503   fprintf(out, "};\n");
00504 
00505   fprintf(out, "\\addplot coordinates {");
00506   for (i = 0; i < size; i++)
00507     fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[4].avg + testset.results[i].resval[5].avg + testset.results[i].resval[6].avg);
00508   fprintf(out, "};\n");
00509 
00510   fprintf(out, "\\addplot coordinates {");
00511   for (i = 0; i < size; i++)
00512     fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[7].avg);
00513   fprintf(out, "};\n");
00514 
00515   fprintf(out, "\\addplot coordinates {");
00516   for (i = 0; i < size; i++)
00517     fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[9].avg - testset.results[i].resval[0].avg);
00518   fprintf(out, "};\n");
00519   fprintf(out, "\\legend{prepsi (step 1b),init nearfield (step 1c),far field (steps 2a-c),nearfield (step 2d),total $-$ step 1a}\n");
00520   fprintf(out, "\\end{axis}\n");
00521   fprintf(out, "\\end{tikzpicture}\n");
00522   fprintf(out, "\n\n");
00523 
00524   fflush(out);
00525 }
00526 
00527 void fastsum_print_output_speedup_total_minus_indep(FILE *out, s_testset *testsets, int ntestsets)
00528 {
00529   int i, t;
00530   char hostname[1025];
00531   char plottitle[1025];
00532   unsigned int diff_mask = fastsum_determine_different_parameters(testsets, ntestsets);
00533 
00534   if (gethostname(hostname, 1024) != 0)
00535     strncpy(hostname, "unnamed", 1024);
00536 
00537   fastsum_get_plot_title_minus_indep(plottitle, 1024, hostname, testsets[0].param, diff_mask | MASK_FSUM_WINM);
00538 
00539   fprintf(out, "\\begin{tikzpicture}\n");
00540   fprintf(out, "\\begin{axis}[");
00541   fprintf(out, "width=0.9\\textwidth, height=0.6\\textwidth, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Speedup, xtick=data, legend style={ legend pos = north west, legend columns=1}, ymajorgrids=true, yminorgrids=true, minor y tick num=4, ");
00542   fprintf(out, " title={%s}", plottitle);
00543   fprintf(out, " ]\n");
00544 
00545   for (t = 0; t < ntestsets; t++)
00546   {
00547     s_testset testset = testsets[t];
00548 
00549     double tref = 0.0;
00550     for (i = 0; i < testset.nresults; i++)
00551       if (testset.results[i].nthreads == 1)
00552         tref = testset.results[i].resval[9].avg - testset.results[i].resval[0].avg;
00553 
00554     fprintf(out, "\\addplot coordinates {");
00555     for (i = 0; i < testset.nresults; i++)
00556       fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, tref/(testset.results[i].resval[9].avg - testset.results[i].resval[0].avg));
00557     fprintf(out, "};\n");
00558 
00559     for (i = 0; i < testset.nresults; i++)
00560     {
00561       fprintf(stderr, "%d:%.3f  ", testset.results[i].nthreads, tref/(testset.results[i].resval[9].avg - testset.results[i].resval[0].avg));
00562     }
00563     fprintf(stderr, "\n\n");
00564   }
00565 
00566   fprintf(out, "\\legend{{");
00567   for (t = 0; t < ntestsets; t++)
00568   {
00569     char title[256];
00570     if (t > 0)
00571       fprintf(out, "},{");
00572     fastsum_get_plot_title_minus_indep(title, 255, "", testsets[t].param, ~(diff_mask | MASK_FSUM_WINM));
00573     fprintf(out, "%s", title);
00574   }
00575   fprintf(out, "}}\n");
00576   fprintf(out, "\\end{axis}\n");
00577   fprintf(out, "\\end{tikzpicture}\n");
00578   fprintf(out, "\n\n");
00579 
00580   fflush(out);
00581 }
00582 
00583 void run_testset(s_testset *testset, int d, int L, int M, int n, int m, int p, char *kernel_name, double c, double eps_I, double eps_B, int *nthreads_array, int n_threads_array_size)
00584 {
00585   int i;
00586   testset->param.d = d;
00587   testset->param.L = L;
00588   testset->param.M = M;
00589   testset->param.n = n;
00590   testset->param.m = m;
00591   testset->param.p = p;
00592   testset->param.kernel_name = kernel_name;
00593   testset->param.c = c;
00594   testset->param.eps_I = eps_I;
00595   testset->param.eps_B = eps_B;
00596 
00597   testset->results = (s_result*) malloc(n_threads_array_size*sizeof(s_result));
00598   testset->nresults = n_threads_array_size;
00599 
00600   run_test_create(testset->param.d, testset->param.L, testset->param.M);
00601   for (i = 0; i < n_threads_array_size; i++)
00602   {
00603     testset->results[i].nthreads = nthreads_array[i];
00604     run_test(testset->results[i].resval, NREPEAT, testset->param.n, testset->param.m, testset->param.p, testset->param.kernel_name, testset->param.c, testset->param.eps_I, testset->param.eps_B, testset->results[i].nthreads);
00605   }
00606 
00607 }
00608 
00609 void test1(int *nthreads_array, int n_threads_array_size)
00610 {
00611   s_testset testsets[1];
00612 
00613 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00614   run_testset(&testsets[0], 3, 100000, 100000, 128, 4, 7, "one_over_x", 0.0, 0.03125, 0.03125, nthreads_array, n_threads_array_size);
00615 
00616   fastsum_print_output_speedup_total_minus_indep(file_out_tex, testsets, 1);
00617 
00618   fastsum_print_output_histo_PreRfNfT(file_out_tex, testsets[0]);
00619 
00620   nfft_adjoint_print_output_histo_DFBRT(file_out_tex, testsets[0]);
00621 
00622   nfft_trafo_print_output_histo_DFBRT(file_out_tex, testsets[0]);
00623 #endif
00624 }
00625 
00626 int main(int argc, char** argv)
00627 {
00628   int *nthreads_array;
00629   int n_threads_array_size = get_nthreads_array(&nthreads_array);
00630   int k;
00631 
00632 #if !(defined MEASURE_TIME && defined MEASURE_TIME_FFTW)
00633   fprintf(stderr, "WARNING: Detailed time measurements are not activated.\n");
00634   fprintf(stderr, "Please re-run the configure script with options\n");
00635   fprintf(stderr, "--enable-measure-time --enable-measure-time-fftw --enable-openmp\n");
00636   fprintf(stderr, "and run \"make clean all\"\n\n");
00637 #endif
00638 
00639   for (k = 0; k < n_threads_array_size; k++)
00640     fprintf(stderr, "%d ", nthreads_array[k]);
00641   fprintf(stderr, "\n");
00642 
00643   file_out_tex = fopen("fastsum_benchomp_results_plots.tex", "w");
00644 
00645   test1(nthreads_array, n_threads_array_size);
00646 
00647   fclose(file_out_tex);
00648 
00649   return 0;
00650 }
00651 

Generated on Fri Oct 12 2012 by Doxygen 1.8.0-20120409