NFFT Logo 3.2.2
fastgauss.c
00001 /*
00002  * Copyright (c) 2002, 2012 Jens Keiner, Stefan Kunis, Daniel Potts
00003  *
00004  * This program is free software; you can redistribute it and/or modify it under
00005  * the terms of the GNU General Public License as published by the Free Software
00006  * Foundation; either version 2 of the License, or (at your option) any later
00007  * version.
00008  *
00009  * This program is distributed in the hope that it will be useful, but WITHOUT
00010  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00011  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
00012  * details.
00013  *
00014  * You should have received a copy of the GNU General Public License along with
00015  * this program; if not, write to the Free Software Foundation, Inc., 51
00016  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00017  */
00018 
00019 /* $Id: fastgauss.c 3896 2012-10-10 12:19:26Z tovo $ */
00020 
00027 #include "config.h"
00028 
00029 #include <stdio.h>
00030 #include <stdlib.h>
00031 #include <math.h>
00032 #ifdef HAVE_COMPLEX_H
00033   #include <complex.h>
00034 #endif
00035 
00036 #include "nfft3.h"
00037 #include "nfft3util.h"
00038 #include "infft.h"
00039 
00048 #define DGT_PRE_CEXP     (1U<< 0)
00049 
00059 #define FGT_NDFT         (1U<< 1)
00060 
00069 #define FGT_APPROX_B     (1U<< 2)
00070 
00072 typedef struct
00073 {
00074   int N;                                
00075   int M;                                
00077   double _Complex *alpha;                
00078   double _Complex *f;                    
00080   unsigned flags;                       
00083   double _Complex sigma;                 
00085   double *x;                            
00086   double *y;                            
00088   double _Complex *pre_cexp;             
00090   int n;                                
00091   double p;                             
00093   double _Complex *b;                    
00095   nfft_plan *nplan1;                    
00096   nfft_plan *nplan2;                    
00098 } fgt_plan;
00099 
00107 void dgt_trafo(fgt_plan *ths)
00108 {
00109   int j,k,l;
00110 
00111   for(j=0; j<ths->M; j++)
00112     ths->f[j] = 0;
00113 
00114   if(ths->flags & DGT_PRE_CEXP)
00115     for(j=0,l=0; j<ths->M; j++)
00116       for(k=0; k<ths->N; k++,l++)
00117         ths->f[j] += ths->alpha[k]*ths->pre_cexp[l];
00118   else
00119     for(j=0; j<ths->M; j++)
00120       for(k=0; k<ths->N; k++)
00121         ths->f[j] += ths->alpha[k]*cexp(-ths->sigma*(ths->y[j]-ths->x[k])*
00122           (ths->y[j]-ths->x[k]));
00123 }
00124 
00132 void fgt_trafo(fgt_plan *ths)
00133 {
00134   int l;
00135 
00136   if(ths->flags & FGT_NDFT)
00137     {
00138       nfft_adjoint_direct(ths->nplan1);
00139 
00140       for(l=0; l<ths->n; l++)
00141         ths->nplan1->f_hat[l] *= ths->b[l];
00142 
00143       nfft_trafo_direct(ths->nplan2);
00144     }
00145   else
00146     {
00147       nfft_adjoint(ths->nplan1);
00148 
00149       for(l=0; l<ths->n; l++)
00150         ths->nplan1->f_hat[l] *= ths->b[l];
00151 
00152       nfft_trafo(ths->nplan2);
00153     }
00154 }
00155 
00170 void fgt_init_guru(fgt_plan *ths, int N, int M, double _Complex sigma, int n,
00171        double p, int m, unsigned flags)
00172 {
00173   int j,n_fftw;
00174   fftw_plan fplan;
00175 
00176   ths->M = M;
00177   ths->N = N;
00178   ths->sigma = sigma;
00179   ths->flags = flags;
00180 
00181   ths->x = (double*)nfft_malloc(ths->N*sizeof(double));
00182   ths->y = (double*)nfft_malloc(ths->M*sizeof(double));
00183   ths->alpha = (double _Complex*)nfft_malloc(ths->N*sizeof(double _Complex));
00184   ths->f = (double _Complex*)nfft_malloc(ths->M*sizeof(double _Complex));
00185 
00186   ths->n = n;
00187   ths->p = p;
00188 
00189   ths->b = (double _Complex*)nfft_malloc(ths->n*sizeof(double _Complex));
00190 
00191   ths->nplan1 = (nfft_plan*) nfft_malloc(sizeof(nfft_plan));
00192   ths->nplan2 = (nfft_plan*) nfft_malloc(sizeof(nfft_plan));
00193 
00194   n_fftw=X(next_power_of_2)(2*ths->n);
00195 
00196   nfft_init_guru(ths->nplan1, 1, &(ths->n), ths->N, &n_fftw, m, PRE_PHI_HUT|
00197                  PRE_PSI| MALLOC_X| MALLOC_F_HAT| FFTW_INIT, FFTW_MEASURE);
00198   nfft_init_guru(ths->nplan2, 1, &(ths->n), ths->M, &n_fftw, m, PRE_PHI_HUT|
00199                  PRE_PSI| MALLOC_X| FFTW_INIT, FFTW_MEASURE);
00200 
00201   ths->nplan1->f = ths->alpha;
00202   ths->nplan2->f_hat = ths->nplan1->f_hat;
00203   ths->nplan2->f = ths->f;
00204 
00205   if(ths->flags & FGT_APPROX_B)
00206     {
00207       fplan = fftw_plan_dft_1d(ths->n, ths->b, ths->b, FFTW_FORWARD,
00208                                FFTW_MEASURE);
00209 
00210       for(j=0; j<ths->n; j++)
00211   ths->b[j] = cexp(-ths->p*ths->p*ths->sigma*(j-ths->n/2)*(j-ths->n/2)/
00212                           ((double)ths->n*ths->n)) / ths->n;
00213 
00214       nfft_fftshift_complex(ths->b, 1, &ths->n);
00215       fftw_execute(fplan);
00216       nfft_fftshift_complex(ths->b, 1, &ths->n);
00217 
00218       fftw_destroy_plan(fplan);
00219     }
00220   else
00221     {
00222       for(j=0; j<ths->n; j++)
00223   ths->b[j] = 1.0/ths->p * csqrt(PI/ths->sigma)*
00224     cexp(-PI*PI*(j-ths->n/2)*(j-ths->n/2)/
00225          (ths->p*ths->p*ths->sigma));
00226     }
00227 }
00228 
00240 void fgt_init(fgt_plan *ths, int N, int M, double _Complex sigma, double eps)
00241 {
00242   double p;
00243   int n;
00244 
00245   p=0.5+sqrt(-log(eps)/creal(sigma));
00246   if(p<1)
00247     p=1;
00248 
00249   n=2*((int)ceil(p*cabs(sigma)/PI * sqrt(-log(eps)/creal(sigma))));
00250 
00251   if(N*M<=((int)(1U<<20)))
00252     fgt_init_guru(ths, N, M, sigma, n, p, 7, DGT_PRE_CEXP);
00253   else
00254     fgt_init_guru(ths, N, M, sigma, n, p, 7, 0);
00255 }
00256 
00263 void fgt_init_node_dependent(fgt_plan *ths)
00264 {
00265   int j,k,l;
00266 
00267   if(ths->flags & DGT_PRE_CEXP)
00268    {
00269      ths->pre_cexp=(double _Complex*)nfft_malloc(ths->M*ths->N*
00270             sizeof(double _Complex));
00271 
00272      for(j=0,l=0; j<ths->M; j++)
00273        for(k=0; k<ths->N; k++,l++)
00274          ths->pre_cexp[l]=cexp(-ths->sigma*(ths->y[j]-ths->x[k])*
00275                                 (ths->y[j]-ths->x[k]));
00276    }
00277 
00278   for(j=0; j<ths->nplan1->M_total; j++)
00279     ths->nplan1->x[j] = ths->x[j]/ths->p;
00280   for(j=0; j<ths->nplan2->M_total; j++)
00281     ths->nplan2->x[j] = ths->y[j]/ths->p;
00282 
00283   if(ths->nplan1->nfft_flags & PRE_PSI)
00284     nfft_precompute_psi(ths->nplan1);
00285   if(ths->nplan2->nfft_flags & PRE_PSI)
00286     nfft_precompute_psi(ths->nplan2);
00287 }
00288 
00295 void fgt_finalize(fgt_plan *ths)
00296 {
00297   nfft_finalize(ths->nplan2);
00298   nfft_finalize(ths->nplan1);
00299 
00300   nfft_free(ths->nplan2);
00301   nfft_free(ths->nplan1);
00302 
00303   nfft_free(ths->b);
00304 
00305   nfft_free(ths->f);
00306   nfft_free(ths->y);
00307 
00308   nfft_free(ths->alpha);
00309   nfft_free(ths->x);
00310 }
00311 
00318 void fgt_test_init_rand(fgt_plan *ths)
00319 {
00320   int j,k;
00321 
00322   for(k=0; k<ths->N; k++)
00323     ths->x[k] = (double)rand()/(2.0*RAND_MAX)-1.0/4.0;
00324 
00325   for(j=0; j<ths->M; j++)
00326     ths->y[j] = (double)rand()/(2.0*RAND_MAX)-1.0/4.0;
00327 
00328   for(k=0; k<ths->N; k++)
00329     ths->alpha[k] =   (double)rand()/(RAND_MAX)-1.0/2.0
00330             + _Complex_I*(double)rand()/(RAND_MAX)-I/2.0;
00331 }
00332 
00341 double fgt_test_measure_time(fgt_plan *ths, unsigned dgt)
00342 {
00343   int r;
00344   ticks t0, t1;
00345   double t_out;
00346   double tau=0.01;
00347 
00348   t_out=0;
00349   r=0;
00350   while(t_out<tau)
00351     {
00352       r++;
00353       t0 = getticks();
00354       if (dgt)
00355         dgt_trafo(ths);
00356       else
00357         fgt_trafo(ths);
00358       t1 = getticks();
00359       t_out += nfft_elapsed_seconds(t1,t0);
00360     }
00361   t_out/=r;
00362 
00363   return t_out;
00364 }
00365 
00375 void fgt_test_simple(int N, int M, double _Complex sigma, double eps)
00376 {
00377   fgt_plan my_plan;
00378   double _Complex *swap_dgt;
00379 
00380   fgt_init(&my_plan, N, M, sigma, eps);
00381   swap_dgt = (double _Complex*)nfft_malloc(my_plan.M*sizeof(double _Complex));
00382 
00383   fgt_test_init_rand(&my_plan);
00384   fgt_init_node_dependent(&my_plan);
00385 
00386   NFFT_SWAP_complex(swap_dgt,my_plan.f);
00387   dgt_trafo(&my_plan);
00388   nfft_vpr_complex(my_plan.f,my_plan.M,"discrete gauss transform");
00389   NFFT_SWAP_complex(swap_dgt,my_plan.f);
00390 
00391   fgt_trafo(&my_plan);
00392   nfft_vpr_complex(my_plan.f,my_plan.M,"fast gauss transform");
00393 
00394   printf("\n relative error: %1.3e\n", X(error_l_infty_1_complex)(swap_dgt,
00395          my_plan.f, my_plan.M, my_plan.alpha, my_plan.N));
00396 
00397   nfft_free(swap_dgt);
00398   fgt_finalize(&my_plan);
00399 }
00400 
00410 void fgt_test_andersson(void)
00411 {
00412   fgt_plan my_plan;
00413   double _Complex *swap_dgt;
00414   int N;
00415 
00416   double _Complex sigma=4*(138+ _Complex_I*100);
00417   int n=128;
00418   int N_dgt_pre_exp=(int)(1U<<11);
00419   int N_dgt=(int)(1U<<19);
00420 
00421   printf("n=%d, sigma=%1.3e+i%1.3e\n",n,creal(sigma),cimag(sigma));
00422 
00423   for(N=((int)(1U<<6)); N<((int)(1U<<22)); N=N<<1)
00424     {
00425       printf("$%d$\t & ",N);
00426 
00427       if(N<N_dgt_pre_exp)
00428         fgt_init_guru(&my_plan, N, N, sigma, n, 1, 7, DGT_PRE_CEXP);
00429       else
00430         fgt_init_guru(&my_plan, N, N, sigma, n, 1, 7, 0);
00431 
00432       swap_dgt = (double _Complex*)nfft_malloc(my_plan.M*
00433                 sizeof(double _Complex));
00434 
00435       fgt_test_init_rand(&my_plan);
00436 
00437       fgt_init_node_dependent(&my_plan);
00438 
00439       if(N<N_dgt)
00440   {
00441           NFFT_SWAP_complex(swap_dgt,my_plan.f);
00442           if(N<N_dgt_pre_exp)
00443             my_plan.flags^=DGT_PRE_CEXP;
00444 
00445     printf("$%1.1e$\t & ",fgt_test_measure_time(&my_plan, 1));
00446           if(N<N_dgt_pre_exp)
00447             my_plan.flags^=DGT_PRE_CEXP;
00448           NFFT_SWAP_complex(swap_dgt,my_plan.f);
00449   }
00450       else
00451   printf("\t\t & ");
00452 
00453       if(N<N_dgt_pre_exp)
00454   printf("$%1.1e$\t & ",fgt_test_measure_time(&my_plan, 1));
00455       else
00456   printf("\t\t & ");
00457 
00458       my_plan.flags^=FGT_NDFT;
00459       printf("$%1.1e$\t & ",fgt_test_measure_time(&my_plan, 0));
00460       my_plan.flags^=FGT_NDFT;
00461 
00462       printf("$%1.1e$\t & ",fgt_test_measure_time(&my_plan, 0));
00463 
00464       printf("$%1.1e$\t \\\\ \n",
00465        X(error_l_infty_1_complex)(swap_dgt, my_plan.f, my_plan.M,
00466             my_plan.alpha, my_plan.N));
00467       fflush(stdout);
00468 
00469       nfft_free(swap_dgt);
00470       fgt_finalize(&my_plan);
00471       fftw_cleanup();
00472     }
00473 }
00474 
00481 void fgt_test_error(void)
00482 {
00483   fgt_plan my_plan;
00484   double _Complex *swap_dgt;
00485   int n,mi;
00486 
00487   double _Complex sigma=4*(138+ _Complex_I*100);
00488   int N=1000;
00489   int M=1000;
00490   int m[2]={7,3};
00491 
00492   printf("N=%d;\tM=%d;\nsigma=%1.3e+i*%1.3e;\n",N,M,creal(sigma),cimag(sigma));
00493   printf("error=[\n");
00494 
00495   swap_dgt = (double _Complex*)nfft_malloc(M*sizeof(double _Complex));
00496 
00497   for(n=8; n<=128; n+=4)
00498     {
00499       printf("%d\t",n);
00500       for(mi=0;mi<2;mi++)
00501         {
00502           fgt_init_guru(&my_plan, N, M, sigma, n, 1, m[mi], 0);
00503           fgt_test_init_rand(&my_plan);
00504           fgt_init_node_dependent(&my_plan);
00505 
00506           NFFT_SWAP_complex(swap_dgt,my_plan.f);
00507           dgt_trafo(&my_plan);
00508           NFFT_SWAP_complex(swap_dgt,my_plan.f);
00509 
00510           fgt_trafo(&my_plan);
00511 
00512           printf("%1.3e\t", X(error_l_infty_1_complex)(swap_dgt, my_plan.f,
00513                  my_plan.M, my_plan.alpha, my_plan.N));
00514           fflush(stdout);
00515 
00516           fgt_finalize(&my_plan);
00517           fftw_cleanup();
00518         }
00519       printf("\n");
00520     }
00521   printf("];\n");
00522 
00523   nfft_free(swap_dgt);
00524 }
00525 
00532 void fgt_test_error_p(void)
00533 {
00534   fgt_plan my_plan;
00535   double _Complex *swap_dgt;
00536   int n,pi;
00537 
00538   double _Complex sigma=20+40*_Complex_I;
00539   int N=1000;
00540   int M=1000;
00541   double p[3]={1,1.5,2};
00542 
00543   printf("N=%d;\tM=%d;\nsigma=%1.3e+i*%1.3e;\n",N,M,creal(sigma),cimag(sigma));
00544   printf("error=[\n");
00545 
00546   swap_dgt = (double _Complex*)nfft_malloc(M*sizeof(double _Complex));
00547 
00548   for(n=8; n<=128; n+=4)
00549     {
00550       printf("%d\t",n);
00551       for(pi=0;pi<3;pi++)
00552         {
00553           fgt_init_guru(&my_plan, N, M, sigma, n, p[pi], 7, 0);
00554           fgt_test_init_rand(&my_plan);
00555           fgt_init_node_dependent(&my_plan);
00556 
00557           NFFT_SWAP_complex(swap_dgt,my_plan.f);
00558           dgt_trafo(&my_plan);
00559           NFFT_SWAP_complex(swap_dgt,my_plan.f);
00560 
00561           fgt_trafo(&my_plan);
00562 
00563           printf("%1.3e\t", X(error_l_infty_1_complex)(swap_dgt, my_plan.f,
00564                  my_plan.M, my_plan.alpha, my_plan.N));
00565           fflush(stdout);
00566 
00567           fgt_finalize(&my_plan);
00568           fftw_cleanup();
00569         }
00570       printf("\n");
00571     }
00572   printf("];\n");
00573 }
00574 
00580 int main(int argc,char **argv)
00581 {
00582   if(argc!=2)
00583     {
00584       fprintf(stderr,"fastgauss type\n");
00585       fprintf(stderr," type\n");
00586       fprintf(stderr,"  0 - Simple test.\n");
00587       fprintf(stderr,"  1 - Compares accuracy and execution time.\n");
00588       fprintf(stderr,"      Pipe to output_andersson.tex\n");
00589       fprintf(stderr,"  2 - Compares accuracy.\n");
00590       fprintf(stderr,"      Pipe to output_error.m\n");
00591       fprintf(stderr,"  3 - Compares accuracy.\n");
00592       fprintf(stderr,"      Pipe to output_error_p.m\n");
00593       return -1;
00594     }
00595 
00596   if(atoi(argv[1])==0)
00597     fgt_test_simple(10, 10, 5+3*_Complex_I, 0.001);
00598 
00599   if(atoi(argv[1])==1)
00600     fgt_test_andersson();
00601 
00602   if(atoi(argv[1])==2)
00603     fgt_test_error();
00604 
00605   if(atoi(argv[1])==3)
00606     fgt_test_error_p();
00607 
00608   return 1;
00609 }
00610 /* \} */

Generated on Fri Oct 12 2012 by Doxygen 1.8.0-20120409