00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include <stdio.h>
00022 #include <math.h>
00023 #include <string.h>
00024 #include <stdlib.h>
00025 #include <complex.h>
00026
00027 #include "nfft3util.h"
00028 #include "nfft3.h"
00029 #include "infft.h"
00030 #include "fastsum.h"
00031 #include "kernels.h"
00032 #ifdef _OPENMP
00033 #include <omp.h>
00034 #endif
00035
00036 int bench_openmp(FILE *infile, int n, int m, int p, double _Complex (*kernel)(double , int , const double *), double c, double eps_I, double eps_B)
00037 {
00038 fastsum_plan my_fastsum_plan;
00039 int d, L, M;
00040 int t, j;
00041 double re,im;
00042 double r_max = 0.25 - my_fastsum_plan.eps_B/2.0;
00043 ticks t0, t1;
00044 double tt_total;
00045
00046 fscanf(infile, "%d %d %d", &d, &L, &M);
00047
00048 #ifdef _OPENMP
00049 fftw_import_wisdom_from_filename("fastsum_benchomp_detail_threads.plan");
00050 #else
00051 fftw_import_wisdom_from_filename("fastsum_benchomp_detail_single.plan");
00052 #endif
00053
00054 fastsum_init_guru(&my_fastsum_plan, d, L, M, kernel, &c, NEARFIELD_BOXES, n, m, p, eps_I, eps_B);
00055
00056 #ifdef _OPENMP
00057 fftw_export_wisdom_to_filename("fastsum_benchomp_detail_threads.plan");
00058 #else
00059 fftw_export_wisdom_to_filename("fastsum_benchomp_detail_single.plan");
00060 #endif
00061
00062 for (j=0; j < L; j++)
00063 {
00064 for (t=0; t < d; t++)
00065 {
00066 double v;
00067 fscanf(infile, "%lg", &v);
00068 my_fastsum_plan.x[d*j+t] = v * r_max;
00069 }
00070 }
00071
00072 for (j=0; j < L; j++)
00073 {
00074 fscanf(infile, "%lg %lg", &re, &im);
00075 my_fastsum_plan.alpha[j] = re + _Complex_I * im;
00076 }
00077
00078 for (j=0; j < M; j++)
00079 {
00080 for (t=0; t < d; t++)
00081 {
00082 double v;
00083 fscanf(infile, "%lg", &v);
00084 my_fastsum_plan.y[d*j+t] = v * r_max;
00085 }
00086 }
00087
00089 t0 = getticks();
00090 fastsum_precompute(&my_fastsum_plan);
00091
00093 fastsum_trafo(&my_fastsum_plan);
00094 t1 = getticks();
00095 tt_total = nfft_elapsed_seconds(t1,t0);
00096
00097 #ifndef MEASURE_TIME
00098 my_fastsum_plan.MEASURE_TIME_t[0] = 0.0;
00099 my_fastsum_plan.MEASURE_TIME_t[1] = 0.0;
00100 my_fastsum_plan.MEASURE_TIME_t[2] = 0.0;
00101 my_fastsum_plan.MEASURE_TIME_t[3] = 0.0;
00102 my_fastsum_plan.MEASURE_TIME_t[4] = 0.0;
00103 my_fastsum_plan.MEASURE_TIME_t[5] = 0.0;
00104 my_fastsum_plan.MEASURE_TIME_t[6] = 0.0;
00105 my_fastsum_plan.MEASURE_TIME_t[7] = 0.0;
00106 my_fastsum_plan.mv1.MEASURE_TIME_t[0] = 0.0;
00107 my_fastsum_plan.mv1.MEASURE_TIME_t[2] = 0.0;
00108 my_fastsum_plan.mv2.MEASURE_TIME_t[0] = 0.0;
00109 my_fastsum_plan.mv2.MEASURE_TIME_t[2] = 0.0;
00110 #endif
00111 #ifndef MEASURE_TIME_FFTW
00112 my_fastsum_plan.mv1.MEASURE_TIME_t[1] = 0.0;
00113 my_fastsum_plan.mv2.MEASURE_TIME_t[1] = 0.0;
00114 #endif
00115
00116 printf("%.6e %.6e %.6e %6e %.6e %.6e %.6e %.6e %.6e %6e %.6e %.6e %6e %.6e %.6e %6e\n", my_fastsum_plan.MEASURE_TIME_t[0], my_fastsum_plan.MEASURE_TIME_t[1], my_fastsum_plan.MEASURE_TIME_t[2], my_fastsum_plan.MEASURE_TIME_t[3], my_fastsum_plan.MEASURE_TIME_t[4], my_fastsum_plan.MEASURE_TIME_t[5], my_fastsum_plan.MEASURE_TIME_t[6], my_fastsum_plan.MEASURE_TIME_t[7], tt_total-my_fastsum_plan.MEASURE_TIME_t[0]-my_fastsum_plan.MEASURE_TIME_t[1]-my_fastsum_plan.MEASURE_TIME_t[2]-my_fastsum_plan.MEASURE_TIME_t[3]-my_fastsum_plan.MEASURE_TIME_t[4]-my_fastsum_plan.MEASURE_TIME_t[5]-my_fastsum_plan.MEASURE_TIME_t[6]-my_fastsum_plan.MEASURE_TIME_t[7], tt_total, my_fastsum_plan.mv1.MEASURE_TIME_t[0], my_fastsum_plan.mv1.MEASURE_TIME_t[1], my_fastsum_plan.mv1.MEASURE_TIME_t[2], my_fastsum_plan.mv2.MEASURE_TIME_t[0], my_fastsum_plan.mv2.MEASURE_TIME_t[1], my_fastsum_plan.mv2.MEASURE_TIME_t[2]);
00117
00118 fastsum_finalize(&my_fastsum_plan);
00119
00120 return 0;
00121 }
00122
00123 int main(int argc, char **argv)
00124 {
00125 int n;
00126 int m;
00127 int p;
00128 char *s;
00129 double _Complex (*kernel)(double , int , const double *);
00130 double c;
00131 double eps_I;
00132 double eps_B;
00135 #ifdef _OPENMP
00136 int nthreads;
00137
00138 if (argc != 9)
00139 return 1;
00140
00141 nthreads = atoi(argv[8]);
00142 fftw_init_threads();
00143 omp_set_num_threads(nthreads);
00144 #else
00145 if (argc != 8)
00146 return 1;
00147 #endif
00148
00149 n=atoi(argv[1]);
00150 m=atoi(argv[2]);
00151 p=atoi(argv[3]);
00152 s=argv[4];
00153 c=atof(argv[5]);
00154 eps_I=atof(argv[6]);
00155 eps_B=atof(argv[7]);
00156 if (strcmp(s,"gaussian")==0)
00157 kernel = gaussian;
00158 else if (strcmp(s,"multiquadric")==0)
00159 kernel = multiquadric;
00160 else if (strcmp(s,"inverse_multiquadric")==0)
00161 kernel = inverse_multiquadric;
00162 else if (strcmp(s,"logarithm")==0)
00163 kernel = logarithm;
00164 else if (strcmp(s,"thinplate_spline")==0)
00165 kernel = thinplate_spline;
00166 else if (strcmp(s,"one_over_square")==0)
00167 kernel = one_over_square;
00168 else if (strcmp(s,"one_over_modulus")==0)
00169 kernel = one_over_modulus;
00170 else if (strcmp(s,"one_over_x")==0)
00171 kernel = one_over_x;
00172 else if (strcmp(s,"inverse_multiquadric3")==0)
00173 kernel = inverse_multiquadric3;
00174 else if (strcmp(s,"sinc_kernel")==0)
00175 kernel = sinc_kernel;
00176 else if (strcmp(s,"cosc")==0)
00177 kernel = cosc;
00178 else if (strcmp(s,"cot")==0)
00179 kernel = kcot;
00180 else
00181 {
00182 s="multiquadric";
00183 kernel = multiquadric;
00184 }
00185
00186 bench_openmp(stdin, n, m, p, kernel, c, eps_I, eps_B);
00187
00188 return 0;
00189 }
00190