NFFT Logo 3.2.2
nfsft_benchomp_detail.c
00001 /*
00002  * Copyright (c) 2002, 2012 Jens Keiner, Stefan Kunis, Daniel Potts
00003  *
00004  * This program is free software; you can redistribute it and/or modify it under
00005  * the terms of the GNU General Public License as published by the Free Software
00006  * Foundation; either version 2 of the License, or (at your option) any later
00007  * version.
00008  *
00009  * This program is distributed in the hope that it will be useful, but WITHOUT
00010  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00011  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
00012  * details.
00013  *
00014  * You should have received a copy of the GNU General Public License along with
00015  * this program; if not, write to the Free Software Foundation, Inc., 51
00016  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00017  */
00018 
00019 /* $Id: simple_test.c 3372 2009-10-21 06:04:05Z skunis $ */
00020 
00021 #include <stdio.h>
00022 #include <math.h>
00023 #include <string.h>
00024 #include <stdlib.h>
00025 #include <complex.h>
00026 
00027 #include "nfft3util.h"
00028 #include "nfft3.h"
00029 #include "infft.h"
00030 #ifdef _OPENMP
00031 #include <omp.h>
00032 #endif
00033 
00034 void bench_openmp_readfile(FILE *infile, int *trafo_adjoint, int *N, int *M, double **x, C **f_hat, C **f)
00035 {
00036   double re,im;
00037   int k, n, j, t;
00038   nfsft_plan plan;
00039 
00040   fscanf(infile, "%d %d %d", trafo_adjoint, N, M);
00041   *x = (double *)nfft_malloc(2*(*M)*sizeof(double));
00042   *f_hat = (C*)nfft_malloc((2*(*N)+2) * (2*(*N)+2) * sizeof(C));
00043   *f = (C*)nfft_malloc((*M)*sizeof(C));
00044 
00045   memset(*f_hat,0U,(2*(*N)+2) * (2*(*N)+2) * sizeof(C));
00046   memset(*f,0U,(*M)*sizeof(C));
00047 
00048 #ifdef _OPENMP
00049   fftw_import_wisdom_from_filename("nfsft_benchomp_detail_threads.plan");
00050 #else
00051   fftw_import_wisdom_from_filename("nfsft_benchomp_detail_single.plan");
00052 #endif
00053 
00054   nfsft_init_guru(&plan, *N, *M, NFSFT_MALLOC_X | NFSFT_MALLOC_F |
00055     NFSFT_MALLOC_F_HAT | NFSFT_NORMALIZED | NFSFT_PRESERVE_F_HAT,
00056     PRE_PHI_HUT | FFTW_INIT | FFT_OUT_OF_PLACE, 6);
00057 
00058 #ifdef _OPENMP
00059   fftw_export_wisdom_to_filename("nfsft_benchomp_detail_threads.plan");
00060 #else
00061   fftw_export_wisdom_to_filename("nfsft_benchomp_detail_single.plan");
00062 #endif
00063 
00064   for (j=0; j < *M; j++)
00065     for (t=0; t < 2; t++)
00066       fscanf(infile, "%lg", (*x)+2*j+t);
00067 
00068   if (trafo_adjoint==0)
00069   {
00070     for (k = 0; k <= *N; k++)
00071       for (n = -k; n <= k; n++)
00072       {
00073         fscanf(infile, "%lg %lg", &re, &im);
00074         (*f_hat)[NFSFT_INDEX(k,n,&plan)] = re + _Complex_I * im;
00075       }
00076   }
00077   else
00078   {
00079     for (j=0; j < *M; j++)
00080     {
00081       fscanf(infile, "%lg %lg", &re, &im);
00082       (*f)[j] = re + _Complex_I * im;
00083     }
00084   }
00085 
00086   nfsft_finalize(&plan);
00087 }
00088 
00089 void bench_openmp(int trafo_adjoint, int N, int M, double *x, C *f_hat, C *f, int m, int nfsft_flags, int psi_flags)
00090 {
00091   nfsft_plan plan;
00092   int k, n;
00093 //  int N, M, trafo_adjoint;
00094   int t, j;
00095   ticks t0, t1;
00096   double tt_total, tt_pre;
00097 
00098 //  fscanf(infile, "%d %d %d", &trafo_adjoint, &N, &M);
00099 
00100 /*#ifdef _OPENMP
00101   fftw_import_wisdom_from_filename("nfsft_benchomp_detail_threads.plan");
00102 #else
00103   fftw_import_wisdom_from_filename("nfsft_benchomp_detail_single.plan");
00104 #endif*/
00105 
00106   /* precomputation (for fast polynomial transform) */
00107 //  nfsft_precompute(N,1000.0,0U,0U);
00108 
00109   /* Initialize transform plan using the guru interface. All input and output
00110    * arrays are allocated by nfsft_init_guru(). Computations are performed with
00111    * respect to L^2-normalized spherical harmonics Y_k^n. The array of spherical
00112    * Fourier coefficients is preserved during transformations. The NFFT uses a
00113    * cut-off parameter m = 6. See the NFFT 3 manual for details.
00114    */
00115   nfsft_init_guru(&plan, N, M, nfsft_flags | NFSFT_MALLOC_X | NFSFT_MALLOC_F |
00116     NFSFT_MALLOC_F_HAT | NFSFT_NORMALIZED | NFSFT_PRESERVE_F_HAT,
00117     PRE_PHI_HUT | psi_flags | FFTW_INIT | FFT_OUT_OF_PLACE, m);
00118 
00119 /*#ifdef _OPENMP
00120   fftw_export_wisdom_to_filename("nfsft_benchomp_detail_threads.plan");
00121 #else
00122   fftw_export_wisdom_to_filename("nfsft_benchomp_detail_single.plan");
00123 #endif*/
00124 
00125   for (j=0; j < plan.M_total; j++)
00126   {
00127     for (t=0; t < 2; t++)
00128  //     fscanf(infile, "%lg", plan.x+2*j+t);
00129       plan.x[2*j+t] = x[2*j+t];
00130   }
00131 
00132   if (trafo_adjoint==0)
00133   {
00134     memset(plan.f_hat,0U,plan.N_total*sizeof(double _Complex));
00135     for (k = 0; k <= plan.N; k++)
00136       for (n = -k; n <= k; n++)
00137       {
00138 //        fscanf(infile, "%lg %lg", &re, &im);
00139 //        plan.f_hat[NFSFT_INDEX(k,n,&plan)] = re + _Complex_I * im;
00140         plan.f_hat[NFSFT_INDEX(k,n,&plan)] = f_hat[NFSFT_INDEX(k,n,&plan)];
00141       }
00142   }
00143   else
00144   {
00145     for (j=0; j < plan.M_total; j++)
00146     {
00147 //      fscanf(infile, "%lg %lg", &re, &im);
00148 //      plan.f[j] = re + _Complex_I * im;
00149       plan.f[j] = f[j];
00150     }
00151 
00152     memset(plan.f_hat,0U,plan.N_total*sizeof(double _Complex));
00153   }
00154 
00155   t0 = getticks();
00156   /* precomputation (for NFFT, node-dependent) */
00157   nfsft_precompute_x(&plan);
00158   t1 = getticks();
00159   tt_pre = nfft_elapsed_seconds(t1,t0);
00160 
00161   if (trafo_adjoint==0)
00162     nfsft_trafo(&plan);
00163   else
00164     nfsft_adjoint(&plan);
00165   t1 = getticks();
00166   tt_total = nfft_elapsed_seconds(t1,t0);
00167 
00168 #ifndef MEASURE_TIME
00169   plan.MEASURE_TIME_t[0] = 0.0;
00170   plan.MEASURE_TIME_t[2] = 0.0;
00171 #endif
00172 
00173 #ifndef MEASURE_TIME_FFTW
00174   plan.MEASURE_TIME_t[1] = 0.0;
00175 #endif
00176 
00177   printf("%.6e %.6e %6e %.6e %.6e %.6e\n", tt_pre, plan.MEASURE_TIME_t[0], plan.MEASURE_TIME_t[1], plan.MEASURE_TIME_t[2], tt_total-tt_pre-plan.MEASURE_TIME_t[0]-plan.MEASURE_TIME_t[1]-plan.MEASURE_TIME_t[2], tt_total);
00178 
00180   nfsft_finalize(&plan);
00181 }
00182 
00183 int main(int argc, char **argv)
00184 {
00185   int m, nfsft_flags, psi_flags;
00186   int nrepeat;
00187   int trafo_adjoint, N, M, r;
00188   double *x;
00189   C *f_hat, *f;
00190 #ifdef _OPENMP
00191   int nthreads;
00192 
00193   if (argc != 6)
00194     return 1;
00195 
00196   nthreads = atoi(argv[5]);
00197   fftw_init_threads();
00198   omp_set_num_threads(nthreads);
00199 #else
00200   if (argc != 5)
00201     return 1;
00202 #endif
00203 
00204   m = atoi(argv[1]);
00205   nfsft_flags = atoi(argv[2]);
00206   psi_flags = atoi(argv[3]);
00207   nrepeat = atoi(argv[4]);
00208 
00209   bench_openmp_readfile(stdin, &trafo_adjoint, &N, &M, &x, &f_hat, &f);
00210 
00211   /* precomputation (for fast polynomial transform) */
00212   nfsft_precompute(N,1000.0,0U,0U);
00213 
00214   for (r = 0; r < nrepeat; r++)
00215     bench_openmp(trafo_adjoint, N, M, x, f_hat, f, m, nfsft_flags, psi_flags);
00216 
00217   return 0;
00218 }

Generated on Fri Oct 12 2012 by Doxygen 1.8.0-20120409