00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00027 #include <stdio.h>
00028 #include <stdlib.h>
00029 #include <math.h>
00030 #include <sys/resource.h>
00031 #include <time.h>
00032 #include <complex.h>
00033
00034 #include "nfft3.h"
00035 #include "nfft3util.h"
00036
00045 #define DGT_PRE_CEXP (1U<< 0)
00046
00056 #define FGT_NDFT (1U<< 1)
00057
00066 #define FGT_APPROX_B (1U<< 2)
00067
00069 typedef struct
00070 {
00071 int N;
00072 int M;
00074 double _Complex *alpha;
00075 double _Complex *f;
00077 unsigned flags;
00080 double _Complex sigma;
00082 double *x;
00083 double *y;
00085 double _Complex *pre_cexp;
00087 int n;
00088 double p;
00090 double _Complex *b;
00092 nfft_plan *nplan1;
00093 nfft_plan *nplan2;
00095 } fgt_plan;
00096
00104 void dgt_trafo(fgt_plan *ths)
00105 {
00106 int j,k,l;
00107
00108 for(j=0; j<ths->M; j++)
00109 ths->f[j] = 0;
00110
00111 if(ths->flags & DGT_PRE_CEXP)
00112 for(j=0,l=0; j<ths->M; j++)
00113 for(k=0; k<ths->N; k++,l++)
00114 ths->f[j] += ths->alpha[k]*ths->pre_cexp[l];
00115 else
00116 for(j=0; j<ths->M; j++)
00117 for(k=0; k<ths->N; k++)
00118 ths->f[j] += ths->alpha[k]*cexp(-ths->sigma*(ths->y[j]-ths->x[k])*
00119 (ths->y[j]-ths->x[k]));
00120 }
00121
00129 void fgt_trafo(fgt_plan *ths)
00130 {
00131 int l;
00132
00133 if(ths->flags & FGT_NDFT)
00134 {
00135 ndft_adjoint(ths->nplan1);
00136
00137 for(l=0; l<ths->n; l++)
00138 ths->nplan1->f_hat[l] *= ths->b[l];
00139
00140 ndft_trafo(ths->nplan2);
00141 }
00142 else
00143 {
00144 nfft_adjoint(ths->nplan1);
00145
00146 for(l=0; l<ths->n; l++)
00147 ths->nplan1->f_hat[l] *= ths->b[l];
00148
00149 nfft_trafo(ths->nplan2);
00150 }
00151 }
00152
00167 void fgt_init_guru(fgt_plan *ths, int N, int M, double _Complex sigma, int n,
00168 double p, int m, unsigned flags)
00169 {
00170 int j,n_fftw;
00171 fftw_plan fplan;
00172
00173 ths->M = M;
00174 ths->N = N;
00175 ths->sigma = sigma;
00176 ths->flags = flags;
00177
00178 ths->x = (double*)nfft_malloc(ths->N*sizeof(double));
00179 ths->y = (double*)nfft_malloc(ths->M*sizeof(double));
00180 ths->alpha = (double _Complex*)nfft_malloc(ths->N*sizeof(double _Complex));
00181 ths->f = (double _Complex*)nfft_malloc(ths->M*sizeof(double _Complex));
00182
00183 ths->n = n;
00184 ths->p = p;
00185
00186 ths->b = (double _Complex*)nfft_malloc(ths->n*sizeof(double _Complex));
00187
00188 ths->nplan1 = (nfft_plan*) nfft_malloc(sizeof(nfft_plan));
00189 ths->nplan2 = (nfft_plan*) nfft_malloc(sizeof(nfft_plan));
00190
00191 n_fftw=nfft_next_power_of_2(2*ths->n);
00192
00193 nfft_init_guru(ths->nplan1, 1, &(ths->n), ths->N, &n_fftw, m, PRE_PHI_HUT|
00194 PRE_PSI| MALLOC_X| MALLOC_F_HAT| FFTW_INIT, FFTW_MEASURE);
00195 nfft_init_guru(ths->nplan2, 1, &(ths->n), ths->M, &n_fftw, m, PRE_PHI_HUT|
00196 PRE_PSI| MALLOC_X| FFTW_INIT, FFTW_MEASURE);
00197
00198 ths->nplan1->f = ths->alpha;
00199 ths->nplan2->f_hat = ths->nplan1->f_hat;
00200 ths->nplan2->f = ths->f;
00201
00202 if(ths->flags & FGT_APPROX_B)
00203 {
00204 fplan = fftw_plan_dft_1d(ths->n, ths->b, ths->b, FFTW_FORWARD,
00205 FFTW_MEASURE);
00206
00207 for(j=0; j<ths->n; j++)
00208 ths->b[j] = cexp(-ths->p*ths->p*ths->sigma*(j-ths->n/2)*(j-ths->n/2)/
00209 ((double)ths->n*ths->n)) / ths->n;
00210
00211 nfft_fftshift_complex(ths->b, 1, &ths->n);
00212 fftw_execute(fplan);
00213 nfft_fftshift_complex(ths->b, 1, &ths->n);
00214
00215 fftw_destroy_plan(fplan);
00216 }
00217 else
00218 {
00219 for(j=0; j<ths->n; j++)
00220 ths->b[j] = 1.0/ths->p * csqrt(PI/ths->sigma)*
00221 cexp(-PI*PI*(j-ths->n/2)*(j-ths->n/2)/
00222 (ths->p*ths->p*ths->sigma));
00223 }
00224 }
00225
00237 void fgt_init(fgt_plan *ths, int N, int M, double _Complex sigma, double eps)
00238 {
00239 double p;
00240 int n;
00241
00242 p=0.5+sqrt(-log(eps)/creal(sigma));
00243 if(p<1)
00244 p=1;
00245
00246 n=2*((int)ceil(p*cabs(sigma)/PI * sqrt(-log(eps)/creal(sigma))));
00247
00248 if(N*M<=((int)(1U<<20)))
00249 fgt_init_guru(ths, N, M, sigma, n, p, 7, DGT_PRE_CEXP);
00250 else
00251 fgt_init_guru(ths, N, M, sigma, n, p, 7, 0);
00252 }
00253
00260 void fgt_init_node_dependent(fgt_plan *ths)
00261 {
00262 int j,k,l;
00263
00264 if(ths->flags & DGT_PRE_CEXP)
00265 {
00266 ths->pre_cexp=(double _Complex*)nfft_malloc(ths->M*ths->N*
00267 sizeof(double _Complex));
00268
00269 for(j=0,l=0; j<ths->M; j++)
00270 for(k=0; k<ths->N; k++,l++)
00271 ths->pre_cexp[l]=cexp(-ths->sigma*(ths->y[j]-ths->x[k])*
00272 (ths->y[j]-ths->x[k]));
00273 }
00274
00275 for(j=0; j<ths->nplan1->M_total; j++)
00276 ths->nplan1->x[j] = ths->x[j]/ths->p;
00277 for(j=0; j<ths->nplan2->M_total; j++)
00278 ths->nplan2->x[j] = ths->y[j]/ths->p;
00279
00280 if(ths->nplan1->nfft_flags & PRE_PSI)
00281 nfft_precompute_psi(ths->nplan1);
00282 if(ths->nplan2->nfft_flags & PRE_PSI)
00283 nfft_precompute_psi(ths->nplan2);
00284 }
00285
00292 void fgt_finalize(fgt_plan *ths)
00293 {
00294 nfft_finalize(ths->nplan2);
00295 nfft_finalize(ths->nplan1);
00296
00297 nfft_free(ths->nplan2);
00298 nfft_free(ths->nplan1);
00299
00300 nfft_free(ths->b);
00301
00302 nfft_free(ths->f);
00303 nfft_free(ths->y);
00304
00305 nfft_free(ths->alpha);
00306 nfft_free(ths->x);
00307 }
00308
00315 void fgt_test_init_rand(fgt_plan *ths)
00316 {
00317 int j,k;
00318
00319 for(k=0; k<ths->N; k++)
00320 ths->x[k] = (double)rand()/(2.0*RAND_MAX)-1.0/4.0;
00321
00322 for(j=0; j<ths->M; j++)
00323 ths->y[j] = (double)rand()/(2.0*RAND_MAX)-1.0/4.0;
00324
00325 for(k=0; k<ths->N; k++)
00326 ths->alpha[k] = (double)rand()/(RAND_MAX)-1.0/2.0
00327 + _Complex_I*(double)rand()/(RAND_MAX)-I/2.0;
00328 }
00329
00338 double fgt_test_measure_time(fgt_plan *ths, unsigned dgt)
00339 {
00340 int r;
00341 double t_out,t;
00342 double tau=0.01;
00343
00344 t_out=0;
00345 r=0;
00346 while(t_out<tau)
00347 {
00348 r++;
00349 if(dgt)
00350 {
00351 t=nfft_second();
00352 dgt_trafo(ths);
00353 }
00354 else
00355 {
00356 t=nfft_second();
00357 fgt_trafo(ths);
00358 }
00359 t_out+=nfft_second()-t;
00360 }
00361 t_out/=r;
00362
00363 return t_out;
00364 }
00365
00375 void fgt_test_simple(int N, int M, double _Complex sigma, double eps)
00376 {
00377 fgt_plan my_plan;
00378 double _Complex *swap_dgt;
00379
00380 fgt_init(&my_plan, N, M, sigma, eps);
00381 swap_dgt = (double _Complex*)nfft_malloc(my_plan.M*sizeof(double _Complex));
00382
00383 fgt_test_init_rand(&my_plan);
00384 fgt_init_node_dependent(&my_plan);
00385
00386 NFFT_SWAP_complex(swap_dgt,my_plan.f);
00387 dgt_trafo(&my_plan);
00388 nfft_vpr_complex(my_plan.f,my_plan.M,"discrete gauss transform");
00389 NFFT_SWAP_complex(swap_dgt,my_plan.f);
00390
00391 fgt_trafo(&my_plan);
00392 nfft_vpr_complex(my_plan.f,my_plan.M,"fast gauss transform");
00393
00394 printf("\n relative error: %1.3e\n", nfft_error_l_infty_1_complex(swap_dgt,
00395 my_plan.f, my_plan.M, my_plan.alpha, my_plan.N));
00396
00397 nfft_free(swap_dgt);
00398 fgt_finalize(&my_plan);
00399 }
00400
00410 void fgt_test_andersson(void)
00411 {
00412 fgt_plan my_plan;
00413 double _Complex *swap_dgt;
00414 int N;
00415
00416 double _Complex sigma=4*(138+ _Complex_I*100);
00417 int n=128;
00418 int N_dgt_pre_exp=(int)(1U<<11);
00419 int N_dgt=(int)(1U<<19);
00420
00421 printf("n=%d, sigma=%1.3e+i%1.3e\n",n,creal(sigma),cimag(sigma));
00422
00423 for(N=((int)(1U<<6)); N<((int)(1U<<22)); N=N<<1)
00424 {
00425 printf("$%d$\t & ",N);
00426
00427 if(N<N_dgt_pre_exp)
00428 fgt_init_guru(&my_plan, N, N, sigma, n, 1, 7, DGT_PRE_CEXP);
00429 else
00430 fgt_init_guru(&my_plan, N, N, sigma, n, 1, 7, 0);
00431
00432 swap_dgt = (double _Complex*)nfft_malloc(my_plan.M*
00433 sizeof(double _Complex));
00434
00435 fgt_test_init_rand(&my_plan);
00436
00437 fgt_init_node_dependent(&my_plan);
00438
00439 if(N<N_dgt)
00440 {
00441 NFFT_SWAP_complex(swap_dgt,my_plan.f);
00442 if(N<N_dgt_pre_exp)
00443 my_plan.flags^=DGT_PRE_CEXP;
00444
00445 printf("$%1.1e$\t & ",fgt_test_measure_time(&my_plan, 1));
00446 if(N<N_dgt_pre_exp)
00447 my_plan.flags^=DGT_PRE_CEXP;
00448 NFFT_SWAP_complex(swap_dgt,my_plan.f);
00449 }
00450 else
00451 printf("\t\t & ");
00452
00453 if(N<N_dgt_pre_exp)
00454 printf("$%1.1e$\t & ",fgt_test_measure_time(&my_plan, 1));
00455 else
00456 printf("\t\t & ");
00457
00458 my_plan.flags^=FGT_NDFT;
00459 printf("$%1.1e$\t & ",fgt_test_measure_time(&my_plan, 0));
00460 my_plan.flags^=FGT_NDFT;
00461
00462 printf("$%1.1e$\t & ",fgt_test_measure_time(&my_plan, 0));
00463
00464 printf("$%1.1e$\t \\\\ \n",
00465 nfft_error_l_infty_1_complex(swap_dgt, my_plan.f, my_plan.M,
00466 my_plan.alpha, my_plan.N));
00467 fflush(stdout);
00468
00469 nfft_free(swap_dgt);
00470 fgt_finalize(&my_plan);
00471 fftw_cleanup();
00472 }
00473 }
00474
00481 void fgt_test_error(void)
00482 {
00483 fgt_plan my_plan;
00484 double _Complex *swap_dgt;
00485 int n,mi;
00486
00487 double _Complex sigma=4*(138+ _Complex_I*100);
00488 int N=1000;
00489 int M=1000;
00490 int m[2]={7,3};
00491
00492 printf("N=%d;\tM=%d;\nsigma=%1.3e+i*%1.3e;\n",N,M,creal(sigma),cimag(sigma));
00493 printf("error=[\n");
00494
00495 swap_dgt = (double _Complex*)nfft_malloc(M*sizeof(double _Complex));
00496
00497 for(n=8; n<=128; n+=4)
00498 {
00499 printf("%d\t",n);
00500 for(mi=0;mi<2;mi++)
00501 {
00502 fgt_init_guru(&my_plan, N, M, sigma, n, 1, m[mi], 0);
00503 fgt_test_init_rand(&my_plan);
00504 fgt_init_node_dependent(&my_plan);
00505
00506 NFFT_SWAP_complex(swap_dgt,my_plan.f);
00507 dgt_trafo(&my_plan);
00508 NFFT_SWAP_complex(swap_dgt,my_plan.f);
00509
00510 fgt_trafo(&my_plan);
00511
00512 printf("%1.3e\t", nfft_error_l_infty_1_complex(swap_dgt, my_plan.f,
00513 my_plan.M, my_plan.alpha, my_plan.N));
00514 fflush(stdout);
00515
00516 fgt_finalize(&my_plan);
00517 fftw_cleanup();
00518 }
00519 printf("\n");
00520 }
00521 printf("];\n");
00522
00523 nfft_free(swap_dgt);
00524 }
00525
00532 void fgt_test_error_p(void)
00533 {
00534 fgt_plan my_plan;
00535 double _Complex *swap_dgt;
00536 int n,pi;
00537
00538 double _Complex sigma=20+40*_Complex_I;
00539 int N=1000;
00540 int M=1000;
00541 double p[3]={1,1.5,2};
00542
00543 printf("N=%d;\tM=%d;\nsigma=%1.3e+i*%1.3e;\n",N,M,creal(sigma),cimag(sigma));
00544 printf("error=[\n");
00545
00546 swap_dgt = (double _Complex*)nfft_malloc(M*sizeof(double _Complex));
00547
00548 for(n=8; n<=128; n+=4)
00549 {
00550 printf("%d\t",n);
00551 for(pi=0;pi<3;pi++)
00552 {
00553 fgt_init_guru(&my_plan, N, M, sigma, n, p[pi], 7, 0);
00554 fgt_test_init_rand(&my_plan);
00555 fgt_init_node_dependent(&my_plan);
00556
00557 NFFT_SWAP_complex(swap_dgt,my_plan.f);
00558 dgt_trafo(&my_plan);
00559 NFFT_SWAP_complex(swap_dgt,my_plan.f);
00560
00561 fgt_trafo(&my_plan);
00562
00563 printf("%1.3e\t", nfft_error_l_infty_1_complex(swap_dgt, my_plan.f,
00564 my_plan.M, my_plan.alpha, my_plan.N));
00565 fflush(stdout);
00566
00567 fgt_finalize(&my_plan);
00568 fftw_cleanup();
00569 }
00570 printf("\n");
00571 }
00572 printf("];\n");
00573 }
00574
00580 int main(int argc,char **argv)
00581 {
00582 if(argc!=2)
00583 {
00584 fprintf(stderr,"fastgauss type\n");
00585 fprintf(stderr," type\n");
00586 fprintf(stderr," 0 - Simple test.\n");
00587 fprintf(stderr," 1 - Compares accuracy and execution time.\n");
00588 fprintf(stderr," Pipe to output_andersson.tex\n");
00589 fprintf(stderr," 2 - Compares accuracy.\n");
00590 fprintf(stderr," Pipe to output_error.m\n");
00591 fprintf(stderr," 3 - Compares accuracy.\n");
00592 fprintf(stderr," Pipe to output_error_p.m\n");
00593 return -1;
00594 }
00595
00596 if(atoi(argv[1])==0)
00597 fgt_test_simple(10, 10, 5+3*_Complex_I, 0.001);
00598
00599 if(atoi(argv[1])==1)
00600 fgt_test_andersson();
00601
00602 if(atoi(argv[1])==2)
00603 fgt_test_error();
00604
00605 if(atoi(argv[1])==3)
00606 fgt_test_error_p();
00607
00608 return 1;
00609 }
00610