00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include <stdio.h>
00022 #include <math.h>
00023 #include <string.h>
00024 #include <stdlib.h>
00025 #include <complex.h>
00026
00027 #include "nfft3util.h"
00028 #include "nfft3.h"
00029 #include "infft.h"
00030 #ifdef _OPENMP
00031 #include <omp.h>
00032 #endif
00033
00034 void bench_openmp_readfile(FILE *infile, int *trafo_adjoint, int *N, int *M, double **x, C **f_hat, C **f)
00035 {
00036 double re,im;
00037 int k, n, j, t;
00038 nfsft_plan plan;
00039
00040 fscanf(infile, "%d %d %d", trafo_adjoint, N, M);
00041 *x = (double *)nfft_malloc(2*(*M)*sizeof(double));
00042 *f_hat = (C*)nfft_malloc((2*(*N)+2) * (2*(*N)+2) * sizeof(C));
00043 *f = (C*)nfft_malloc((*M)*sizeof(C));
00044
00045 memset(*f_hat,0U,(2*(*N)+2) * (2*(*N)+2) * sizeof(C));
00046 memset(*f,0U,(*M)*sizeof(C));
00047
00048 #ifdef _OPENMP
00049 fftw_import_wisdom_from_filename("nfsft_benchomp_detail_threads.plan");
00050 #else
00051 fftw_import_wisdom_from_filename("nfsft_benchomp_detail_single.plan");
00052 #endif
00053
00054 nfsft_init_guru(&plan, *N, *M, NFSFT_MALLOC_X | NFSFT_MALLOC_F |
00055 NFSFT_MALLOC_F_HAT | NFSFT_NORMALIZED | NFSFT_PRESERVE_F_HAT,
00056 PRE_PHI_HUT | FFTW_INIT | FFT_OUT_OF_PLACE, 6);
00057
00058 #ifdef _OPENMP
00059 fftw_export_wisdom_to_filename("nfsft_benchomp_detail_threads.plan");
00060 #else
00061 fftw_export_wisdom_to_filename("nfsft_benchomp_detail_single.plan");
00062 #endif
00063
00064 for (j=0; j < *M; j++)
00065 for (t=0; t < 2; t++)
00066 fscanf(infile, "%lg", (*x)+2*j+t);
00067
00068 if (trafo_adjoint==0)
00069 {
00070 for (k = 0; k <= *N; k++)
00071 for (n = -k; n <= k; n++)
00072 {
00073 fscanf(infile, "%lg %lg", &re, &im);
00074 (*f_hat)[NFSFT_INDEX(k,n,&plan)] = re + _Complex_I * im;
00075 }
00076 }
00077 else
00078 {
00079 for (j=0; j < *M; j++)
00080 {
00081 fscanf(infile, "%lg %lg", &re, &im);
00082 (*f)[j] = re + _Complex_I * im;
00083 }
00084 }
00085
00086 nfsft_finalize(&plan);
00087 }
00088
00089 void bench_openmp(int trafo_adjoint, int N, int M, double *x, C *f_hat, C *f, int m, int nfsft_flags, int psi_flags)
00090 {
00091 nfsft_plan plan;
00092 int k, n;
00093
00094 int t, j;
00095 ticks t0, t1;
00096 double tt_total, tt_pre;
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115 nfsft_init_guru(&plan, N, M, nfsft_flags | NFSFT_MALLOC_X | NFSFT_MALLOC_F |
00116 NFSFT_MALLOC_F_HAT | NFSFT_NORMALIZED | NFSFT_PRESERVE_F_HAT,
00117 PRE_PHI_HUT | psi_flags | FFTW_INIT | FFT_OUT_OF_PLACE, m);
00118
00119
00120
00121
00122
00123
00124
00125 for (j=0; j < plan.M_total; j++)
00126 {
00127 for (t=0; t < 2; t++)
00128
00129 plan.x[2*j+t] = x[2*j+t];
00130 }
00131
00132 if (trafo_adjoint==0)
00133 {
00134 memset(plan.f_hat,0U,plan.N_total*sizeof(double _Complex));
00135 for (k = 0; k <= plan.N; k++)
00136 for (n = -k; n <= k; n++)
00137 {
00138
00139
00140 plan.f_hat[NFSFT_INDEX(k,n,&plan)] = f_hat[NFSFT_INDEX(k,n,&plan)];
00141 }
00142 }
00143 else
00144 {
00145 for (j=0; j < plan.M_total; j++)
00146 {
00147
00148
00149 plan.f[j] = f[j];
00150 }
00151
00152 memset(plan.f_hat,0U,plan.N_total*sizeof(double _Complex));
00153 }
00154
00155 t0 = getticks();
00156
00157 nfsft_precompute_x(&plan);
00158 t1 = getticks();
00159 tt_pre = nfft_elapsed_seconds(t1,t0);
00160
00161 if (trafo_adjoint==0)
00162 nfsft_trafo(&plan);
00163 else
00164 nfsft_adjoint(&plan);
00165 t1 = getticks();
00166 tt_total = nfft_elapsed_seconds(t1,t0);
00167
00168 #ifndef MEASURE_TIME
00169 plan.MEASURE_TIME_t[0] = 0.0;
00170 plan.MEASURE_TIME_t[2] = 0.0;
00171 #endif
00172
00173 #ifndef MEASURE_TIME_FFTW
00174 plan.MEASURE_TIME_t[1] = 0.0;
00175 #endif
00176
00177 printf("%.6e %.6e %6e %.6e %.6e %.6e\n", tt_pre, plan.MEASURE_TIME_t[0], plan.MEASURE_TIME_t[1], plan.MEASURE_TIME_t[2], tt_total-tt_pre-plan.MEASURE_TIME_t[0]-plan.MEASURE_TIME_t[1]-plan.MEASURE_TIME_t[2], tt_total);
00178
00180 nfsft_finalize(&plan);
00181 }
00182
00183 int main(int argc, char **argv)
00184 {
00185 int m, nfsft_flags, psi_flags;
00186 int nrepeat;
00187 int trafo_adjoint, N, M, r;
00188 double *x;
00189 C *f_hat, *f;
00190 #ifdef _OPENMP
00191 int nthreads;
00192
00193 if (argc != 6)
00194 return 1;
00195
00196 nthreads = atoi(argv[5]);
00197 fftw_init_threads();
00198 omp_set_num_threads(nthreads);
00199 #else
00200 if (argc != 5)
00201 return 1;
00202 #endif
00203
00204 m = atoi(argv[1]);
00205 nfsft_flags = atoi(argv[2]);
00206 psi_flags = atoi(argv[3]);
00207 nrepeat = atoi(argv[4]);
00208
00209 bench_openmp_readfile(stdin, &trafo_adjoint, &N, &M, &x, &f_hat, &f);
00210
00211
00212 nfsft_precompute(N,1000.0,0U,0U);
00213
00214 for (r = 0; r < nrepeat; r++)
00215 bench_openmp(trafo_adjoint, N, M, x, f_hat, f, m, nfsft_flags, psi_flags);
00216
00217 return 0;
00218 }