NFFT Logo 3.2.2
nfsft_benchomp.c
00001 /*
00002  * Copyright (c) 2002, 2012 Jens Keiner, Stefan Kunis, Daniel Potts
00003  *
00004  * This program is free software; you can redistribute it and/or modify it under
00005  * the terms of the GNU General Public License as published by the Free Software
00006  * Foundation; either version 2 of the License, or (at your option) any later
00007  * version.
00008  *
00009  * This program is distributed in the hope that it will be useful, but WITHOUT
00010  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00011  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
00012  * details.
00013  *
00014  * You should have received a copy of the GNU General Public License along with
00015  * this program; if not, write to the Free Software Foundation, Inc., 51
00016  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00017  */
00018 #include <stdio.h>
00019 #include <stdlib.h>
00020 #include <string.h>
00021 #include <unistd.h>
00022 
00023 #include "config.h"
00024 
00025 #include <nfft3.h>
00026 #include <nfft3util.h>
00027 
00028 #define NREPEAT 5
00029 
00030 static FILE* file_out_tex = NULL;
00031 
00032 int get_nthreads_array(int **arr)
00033 {
00034   int max_threads = nfft_get_omp_num_threads();
00035   int alloc_num = 2;
00036   int k;
00037   int ret_number = 0;
00038   int max_threads_pw2 = (max_threads / 2) * 2 == max_threads ? 1 : 0;
00039 
00040   if (max_threads <= 5)
00041   {
00042     *arr = (int*) malloc(max_threads*sizeof(int));
00043     for (k = 0; k < max_threads; k++)
00044       *(*arr + k) = k+1;
00045     return max_threads;
00046   }
00047 
00048   for (k = 1; k <= max_threads; k*=2, alloc_num++);
00049 
00050   *arr = (int*) malloc(alloc_num*sizeof(int));
00051 
00052   for (k = 1; k <= max_threads; k*=2)
00053   {
00054     if (k != max_threads && 2*k > max_threads && max_threads_pw2)
00055     {
00056       *(*arr + ret_number) = max_threads/2;
00057       ret_number++;
00058     }
00059 
00060     *(*arr + ret_number) = k;
00061     ret_number++;
00062 
00063     if (k != max_threads && 2*k > max_threads)
00064     {
00065       *(*arr + ret_number) = max_threads;
00066       ret_number++;
00067       break;
00068     }
00069   }
00070 
00071   return ret_number;
00072 } 
00073   
00074 
00075 void check_result_value(const int val, const int ok, const char *msg)
00076 {
00077   if (val != ok)
00078   {
00079     fprintf(stderr, "ERROR %s: %d not %d\n", msg, val, ok);
00080 
00081     exit(1);
00082   }
00083 }
00084 
00085 void run_test_create(int trafo_adjoint, int N, int M)
00086 {
00087   char cmd[1025];
00088 
00089   snprintf(cmd, 1024, "./nfsft_benchomp_createdataset %d %d %d > nfsft_benchomp_test.data", trafo_adjoint, N, M);
00090   fprintf(stderr, "%s\n", cmd);
00091   check_result_value(system(cmd), 0, "createdataset");
00092 }
00093 
00094 void run_test_init_output()
00095 {
00096   FILE *f = fopen("nfsft_benchomp_test.result", "w");
00097   if (f!= NULL)
00098     fclose(f);
00099 }
00100 
00101 typedef struct
00102 {
00103   int trafo_adjoint;
00104   int N;
00105   int M;
00106   int m;
00107   int nfsft_flags;
00108   int psi_flags;
00109 } s_param;
00110 
00111 typedef struct
00112 {
00113   double avg;
00114   double min;
00115   double max;
00116 } s_resval;
00117 
00118 typedef struct
00119 {
00120   int nthreads;
00121   s_resval resval[6];
00122 } s_result;
00123 
00124 typedef struct
00125 {
00126   s_param param;
00127   s_result *results;
00128   int nresults;
00129 } s_testset;
00130 
00131 void run_test(s_resval *res, int nrepeat, int m, int nfsft_flags, int psi_flags, int nthreads)
00132 {
00133   FILE *f;
00134   char cmd[1025];
00135   int r,t;
00136   
00137   for (t = 0; t < 6; t++)
00138   {
00139     res[t].avg = 0.0; res[t].min = 1.0/0.0; res[t].max = 0.0;
00140   }
00141 
00142   if (nthreads < 2)
00143     snprintf(cmd, 1024, "./nfsft_benchomp_detail_single %d %d %d %d < nfsft_benchomp_test.data > nfsft_benchomp_test.out", m, nfsft_flags, psi_flags, nrepeat);
00144   else
00145     snprintf(cmd, 1024, "./nfsft_benchomp_detail_threads %d %d %d %d %d < nfsft_benchomp_test.data > nfsft_benchomp_test.out", m, nfsft_flags, psi_flags, nrepeat, nthreads);
00146   fprintf(stderr, "%s\n", cmd);
00147 
00148   check_result_value(system(cmd), 0, cmd);
00149 
00150   f = fopen("nfsft_benchomp_test.out", "r");
00151   for (r = 0; r < nrepeat; r++)
00152   {
00153     int retval;
00154     double v[6];
00155 //    FILE *f;
00156 //    check_result_value(system(cmd), 0, cmd);
00157 //    f = fopen("nfsft_benchomp_test.out", "r");
00158     retval = fscanf(f, "%lg %lg %lg %lg %lg %lg", v, v+1, v+2, v+3, v+4, v+5);
00159     check_result_value(retval, 6, "read nfsft_benchomp_test.out");
00160 //    fclose(f);
00161 //    fprintf(stderr, "%.3e %.3e %.3e %.3e %.3e %.3e\n", v[0], v[1], v[2], v[3], v[4], v[5]);
00162     for (t = 0; t < 6; t++)
00163     {
00164       res[t].avg += v[t];
00165       if (res[t].min > v[t])
00166         res[t].min = v[t];
00167       if (res[t].max < v[t])
00168         res[t].max = v[t];
00169     }
00170   }
00171   fclose(f);
00172 
00173   for (t = 0; t < 6; t++)
00174     res[t].avg /= nrepeat;
00175 
00176   fprintf(stderr, "%d %d: ", nthreads, nrepeat);
00177   for (t = 0; t < 6; t++)
00178     fprintf(stderr, "%.3e %.3e %.3e | ", res[t].avg, res[t].min, res[t].max);
00179   fprintf(stderr, "\n");
00180 }
00181 
00182 const char *get_psi_string(int flags)
00183 {
00184   if (flags & PRE_PSI)
00185     return "prepsi";
00186   else if (flags & PRE_ONE_PSI)
00187     return "unknownPSI";
00188 
00189   return "nopsi";
00190 }
00191 const char *get_sort_string(int flags)
00192 {
00193   if (flags & NFFT_SORT_NODES)
00194     return "sorted";
00195 
00196     return "unsorted";
00197 }
00198 
00199 const char *get_adjoint_omp_string(int flags)
00200 {
00201   if (flags & NFFT_OMP_BLOCKWISE_ADJOINT)
00202     return "blockwise";
00203 
00204     return "";
00205 }
00206 
00207 #define MASK_TA (1U<<1)
00208 #define MASK_N (1U<<2)
00209 #define MASK_M (1U<<4)
00210 #define MASK_WINM (1U<<5)
00211 #define MASK_FLAGS_PSI (1U<<6)
00212 #define MASK_FLAGS_SORT (1U<<7)
00213 #define MASK_FLAGS_BW (1U<<8)
00214 #define MASK_FLAGS_FPT (1U<<9)
00215 
00216 unsigned int determine_different_parameters(s_testset *testsets, int ntestsets)
00217 {
00218   int t;
00219   unsigned int mask = 0;
00220 
00221   if (ntestsets < 2)
00222     return 0;
00223 
00224   for (t = 1; t < ntestsets; t++)
00225   {
00226     if (testsets[t-1].param.trafo_adjoint != testsets[t].param.trafo_adjoint)
00227       mask |= MASK_TA;
00228     if (testsets[t-1].param.N != testsets[t].param.N)
00229       mask |= MASK_N;
00230     if (testsets[t-1].param.M != testsets[t].param.M)
00231       mask |= MASK_M;
00232     if (testsets[t-1].param.m != testsets[t].param.m)
00233       mask |= MASK_WINM;
00234     if ((testsets[t-1].param.psi_flags & PRE_ONE_PSI) != (testsets[t].param.psi_flags & PRE_ONE_PSI))
00235       mask |= MASK_FLAGS_PSI;
00236     if ((testsets[t-1].param.psi_flags & NFFT_SORT_NODES) != (testsets[t].param.psi_flags & NFFT_SORT_NODES))
00237       mask |= MASK_FLAGS_SORT;
00238     if ((testsets[t-1].param.psi_flags & NFFT_OMP_BLOCKWISE_ADJOINT) != (testsets[t].param.psi_flags & NFFT_OMP_BLOCKWISE_ADJOINT))
00239       mask |= MASK_FLAGS_BW;
00240     if ((testsets[t-1].param.nfsft_flags & NFSFT_USE_DPT) != (testsets[t].param.nfsft_flags & NFSFT_USE_DPT))
00241       mask |= MASK_FLAGS_FPT;
00242   }
00243 
00244   return mask;
00245 }
00246 
00247 void get_plot_title(char *outstr, int maxlen, char *hostname, s_param param, unsigned int diff_mask)
00248 {
00249   unsigned int mask = ~diff_mask;
00250   int offset = 0;
00251   int len;
00252 
00253   len = snprintf(outstr, maxlen, "%s", hostname);
00254   if (len < 0 || len+offset >= maxlen-1) return;
00255   offset += len;
00256 
00257   if (mask & MASK_TA)
00258   {
00259     len = snprintf(outstr+offset, maxlen-offset, " $\\mathrm{NFSFT}%s$", param.trafo_adjoint==0?"":"^\\top");
00260     if (len < 0 || len+offset >= maxlen-1) return;
00261     offset += len;
00262   }
00263 
00264   if (mask & MASK_N)
00265   {
00266     len = snprintf(outstr+offset, maxlen-offset, " N=%d", param.N);
00267     if (len < 0 || len+offset >= maxlen-1) return;
00268     offset += len;
00269   }
00270 
00271   if (mask & MASK_M)
00272   {
00273     len = snprintf(outstr+offset, maxlen-offset, " M=%d", param.M);
00274     if (len < 0 || len+offset >= maxlen-1) return;
00275     offset += len;
00276   }
00277 
00278   if (mask & MASK_WINM)
00279   {
00280     len = snprintf(outstr+offset, maxlen-offset, " m=%d", param.m);
00281     if (len < 0 || len+offset >= maxlen-1) return;
00282     offset += len;
00283   }
00284 
00285   if (mask & MASK_FLAGS_PSI)
00286   {
00287     len = snprintf(outstr+offset, maxlen-offset, " %s", get_psi_string(param.psi_flags));
00288     if (len < 0 || len+offset >= maxlen-1) return;
00289     offset += len;
00290   }
00291 
00292   if (mask & MASK_FLAGS_SORT)
00293   {
00294     len = snprintf(outstr+offset, maxlen-offset, " %s", get_sort_string(param.psi_flags));
00295     if (len < 0 || len+offset >= maxlen-1) return;
00296     offset += len;
00297   }
00298 
00299   if ((mask & MASK_FLAGS_BW) && strlen(get_adjoint_omp_string(param.psi_flags)) > 0)
00300   {
00301     len = snprintf(outstr+offset, maxlen-offset, " %s", get_adjoint_omp_string(param.psi_flags));
00302     if (len < 0 || len+offset >= maxlen-1) return;
00303     offset += len;
00304   }
00305 
00306   if (mask & MASK_FLAGS_FPT)
00307   {
00308     len = snprintf(outstr+offset, maxlen-offset, param.nfsft_flags & NFSFT_USE_DPT ? " DPT" : "");
00309     if (len < 0 || len+offset >= maxlen-1) return;
00310     offset += len;
00311   }
00312 
00313 }
00314 
00315 void print_output_speedup_total_tref(FILE *out, s_testset *testsets, int ntestsets, int use_tref, double tref)
00316 {
00317   int i, t;
00318   char hostname[1025];
00319   char plottitle[1025];
00320   unsigned int diff_mask = determine_different_parameters(testsets, ntestsets);
00321 
00322   if (gethostname(hostname, 1024) != 0)
00323     strncpy(hostname, "unnamed", 1024);
00324 
00325   get_plot_title(plottitle, 1024, hostname, testsets[0].param, diff_mask);
00326 
00327   fprintf(out, "\\begin{tikzpicture}\n");
00328   fprintf(out, "\\begin{axis}[");
00329   fprintf(out, "width=0.9\\textwidth, height=0.6\\textwidth, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Speedup, xtick=data, legend style={ legend pos = north west, legend columns=1}, ymajorgrids=true, yminorgrids=true, minor y tick num=4, ");
00330   fprintf(out, " title={%s}", plottitle);
00331   fprintf(out, " ]\n");
00332 
00333   for (t = 0; t < ntestsets; t++)
00334   {
00335     s_testset testset = testsets[t];
00336     fprintf(stderr, "%s $\\mathrm{NFSFT}%s$ N=%d M=%d m=%d %s %s %s}", hostname, testset.param.trafo_adjoint==0?"":"^\\top", testset.param.N, testset.param.M, testset.param.m, get_psi_string(testset.param.psi_flags), get_sort_string(testset.param.psi_flags), get_adjoint_omp_string(testset.param.psi_flags));
00337     fprintf(stderr, "\n");
00338 
00339     fprintf(out, "\\addplot coordinates {");
00340     for (i = 0; i < testset.nresults; i++)
00341       if (use_tref == 1)
00342         fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, tref/testset.results[i].resval[5].avg);
00343       else
00344         fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[0].resval[5].avg/testset.results[i].resval[5].avg);
00345     fprintf(out, "};\n");
00346 
00347     for (i = 0; i < testset.nresults; i++)
00348       if (use_tref == 1)
00349         fprintf(stderr, "%d:%.3f  ", testset.results[i].nthreads, tref/testset.results[i].resval[5].avg);
00350       else
00351         fprintf(stderr, "%d:%.3f  ", testset.results[i].nthreads, testset.results[0].resval[5].avg/testset.results[i].resval[5].avg);
00352     fprintf(stderr, "\n\n");
00353   }
00354 
00355   fprintf(out, "\\legend{{");
00356   for (t = 0; t < ntestsets; t++)
00357   {
00358     char title[256];
00359     if (t > 0)
00360       fprintf(out, "},{");
00361     get_plot_title(title, 255, "", testsets[t].param, ~(diff_mask));
00362     fprintf(out, "%s", title);
00363   }
00364   fprintf(out, "}}\n");
00365   fprintf(out, "\\end{axis}\n");
00366   fprintf(out, "\\end{tikzpicture}\n");
00367   fprintf(out, "\n\n");
00368 
00369   fflush(out);
00370 }
00371 
00372 void print_output_speedup_total(FILE *out, s_testset *testsets, int ntestsets, int use_tref)
00373 {
00374   double tref = 1.0/0.0;
00375   int t, k;
00376 
00377   if (use_tref == 1)
00378     for (t = 0; t < ntestsets; t++)
00379       for (k = 0; k < testsets[t].nresults; k++)
00380         if (testsets[t].results[k].nthreads == 1 && testsets[t].results[k].resval[5].avg < tref)
00381           tref = testsets[t].results[k].resval[5].avg;
00382 
00383   print_output_speedup_total_tref(out, testsets, ntestsets, use_tref, tref);
00384 }
00385 
00386 void print_output_histo_PENRT(FILE *out, s_testset testset)
00387 {
00388   int i, size = testset.nresults;
00389   char hostname[1025];
00390 
00391   if (gethostname(hostname, 1024) != 0)
00392     strncpy(hostname, "unnamed", 1024);
00393 
00394   fprintf(out, "\\begin{tikzpicture}\n");
00395   fprintf(out, "\\begin{axis}[");
00396   fprintf(out, "width=0.9\\textwidth, height=0.6\\textwidth, ");
00397   fprintf(out, "symbolic x coords={");
00398   for (i = 0; i < size; i++)
00399     if (i > 0)
00400       fprintf(out, ",%d", testset.results[i].nthreads);
00401     else
00402       fprintf(out, "%d", testset.results[i].nthreads);
00403 
00404   fprintf(out, "}, x tick label style={ /pgf/number format/1000 sep=}, xlabel=Number of threads, ylabel=Time in s, xtick=data, legend style={legend columns=-1}, ybar, bar width=7pt, ymajorgrids=true, yminorgrids=true, minor y tick num=1, ");
00405   fprintf(out, " title={%s $\\mathrm{NFSFT}%s$ N=%d M=%d m=%d %s %s %s}", hostname, testset.param.trafo_adjoint==0?"":"^\\top", testset.param.N, testset.param.M, testset.param.m, get_psi_string(testset.param.psi_flags), get_sort_string(testset.param.psi_flags), get_adjoint_omp_string(testset.param.psi_flags));
00406   fprintf(out, " ]\n");
00407   fprintf(out, "\\addplot coordinates {");
00408   for (i = 0; i < size; i++)
00409     fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[1].avg);
00410   fprintf(out, "};\n");
00411 
00412   fprintf(out, "\\addplot coordinates {");
00413   for (i = 0; i < size; i++)
00414     fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[2].avg);
00415   fprintf(out, "};\n");
00416 
00417   fprintf(out, "\\addplot coordinates {");
00418   for (i = 0; i < size; i++)
00419     fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[3].avg);
00420   fprintf(out, "};\n");
00421 
00422   fprintf(out, "\\addplot coordinates {");
00423   for (i = 0; i < size; i++)
00424     fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[0].avg + testset.results[i].resval[4].avg);
00425   fprintf(out, "};\n");
00426 
00427   fprintf(out, "\\addplot coordinates {");
00428   for (i = 0; i < size; i++)
00429     fprintf(out, "(%d, %.6e) ", testset.results[i].nthreads, testset.results[i].resval[5].avg);
00430   fprintf(out, "};\n");
00431   fprintf(out, "\\legend{%s,%s,$\\mathrm{NFFT}%s$,rest,total}\n", testset.param.nfsft_flags & NFSFT_USE_DPT ? "DPT" : "FPT", testset.param.trafo_adjoint==0?"c2e":"$\\mathrm{c2e}^\\top$", testset.param.trafo_adjoint==0?"":"^\\top");
00432   fprintf(out, "\\end{axis}\n");
00433   fprintf(out, "\\end{tikzpicture}\n");
00434   fprintf(out, "\n\n");
00435 
00436   fflush(out);
00437 }
00438 
00439 void run_testset(s_testset *testset, int trafo_adjoint, int N, int M, int m, int nfsft_flags, int psi_flags, int *nthreads_array, int n_threads_array_size)
00440 {
00441   int i;
00442   testset->param.trafo_adjoint = trafo_adjoint;
00443   testset->param.N = N;
00444   testset->param.M = M;
00445   testset->param.m = m;
00446   testset->param.nfsft_flags = nfsft_flags;
00447   testset->param.psi_flags = psi_flags;
00448 
00449   testset->results = (s_result*) malloc(n_threads_array_size*sizeof(s_result));
00450   testset->nresults = n_threads_array_size;
00451 
00452   run_test_create(testset->param.trafo_adjoint, testset->param.N, testset->param.M);
00453   for (i = 0; i < n_threads_array_size; i++)
00454   {
00455     testset->results[i].nthreads = nthreads_array[i];
00456     run_test(testset->results[i].resval, NREPEAT, testset->param.m, testset->param.nfsft_flags, testset->param.psi_flags, testset->results[i].nthreads = nthreads_array[i]);
00457   }
00458 
00459 }
00460 
00461 void test1(int *nthreads_array, int n_threads_array_size, int m)
00462 {
00463   s_testset testsets[4];
00464 
00465   run_testset(&testsets[0], 0, 1024, 1000000, m, 0, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
00466 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00467   print_output_histo_PENRT(file_out_tex, testsets[0]);
00468 #endif
00469 
00470   run_testset(&testsets[1], 1, 1024, 1000000, m, 0, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size);
00471 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00472   print_output_histo_PENRT(file_out_tex, testsets[1]);
00473 #endif
00474 
00475   print_output_speedup_total(file_out_tex, testsets, 2, 0);
00476 
00477   run_testset(&testsets[2], 0, 1024, 1000000, m, NFSFT_USE_DPT, NFFT_SORT_NODES, nthreads_array, n_threads_array_size);
00478 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00479   print_output_histo_PENRT(file_out_tex, testsets[2]);
00480 #endif
00481 
00482   run_testset(&testsets[3], 1, 1024, 1000000, m, NFSFT_USE_DPT, NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT, nthreads_array, n_threads_array_size);
00483 #if defined MEASURE_TIME && defined MEASURE_TIME_FFTW
00484   print_output_histo_PENRT(file_out_tex, testsets[3]);
00485 #endif
00486 
00487   print_output_speedup_total(file_out_tex, testsets+2, 2, 0);
00488 }
00489 
00490 int main(int argc, char** argv)
00491 {
00492   int *nthreads_array;
00493   int n_threads_array_size = get_nthreads_array(&nthreads_array);
00494   int k;
00495 
00496 #if !(defined MEASURE_TIME && defined MEASURE_TIME_FFTW)
00497   fprintf(stderr, "WARNING: Detailed time measurements for NFSFT are not activated.\n");
00498   fprintf(stderr, "For more detailed plots, please re-run the configure script with options\n");
00499   fprintf(stderr, "--enable-measure-time --enable-measure-time-fftw --enable-nfsft --enable-openmp\n");
00500   fprintf(stderr, "and run \"make clean all\"\n\n");
00501 #endif
00502 
00503   for (k = 0; k < n_threads_array_size; k++)
00504     fprintf(stderr, "%d ", nthreads_array[k]);
00505   fprintf(stderr, "\n");
00506 
00507   file_out_tex = fopen("nfsft_benchomp_results_plots.tex", "w");
00508 
00509   test1(nthreads_array, n_threads_array_size, 2);
00510   test1(nthreads_array, n_threads_array_size, 4);
00511   test1(nthreads_array, n_threads_array_size, 6);
00512   test1(nthreads_array, n_threads_array_size, 8);
00513 
00514   fclose(file_out_tex);
00515 
00516   return 0;
00517 }

Generated on Fri Oct 12 2012 by Doxygen 1.8.0-20120409