00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00027 #include "config.h"
00028
00029 #include <stdio.h>
00030 #include <stdlib.h>
00031 #include <math.h>
00032 #ifdef HAVE_COMPLEX_H
00033 #include <complex.h>
00034 #endif
00035
00036 #include "nfft3.h"
00037 #include "nfft3util.h"
00038 #include "infft.h"
00039
00048 #define DGT_PRE_CEXP (1U<< 0)
00049
00059 #define FGT_NDFT (1U<< 1)
00060
00069 #define FGT_APPROX_B (1U<< 2)
00070
00072 typedef struct
00073 {
00074 int N;
00075 int M;
00077 double _Complex *alpha;
00078 double _Complex *f;
00080 unsigned flags;
00083 double _Complex sigma;
00085 double *x;
00086 double *y;
00088 double _Complex *pre_cexp;
00090 int n;
00091 double p;
00093 double _Complex *b;
00095 nfft_plan *nplan1;
00096 nfft_plan *nplan2;
00098 } fgt_plan;
00099
00107 void dgt_trafo(fgt_plan *ths)
00108 {
00109 int j,k,l;
00110
00111 for(j=0; j<ths->M; j++)
00112 ths->f[j] = 0;
00113
00114 if(ths->flags & DGT_PRE_CEXP)
00115 for(j=0,l=0; j<ths->M; j++)
00116 for(k=0; k<ths->N; k++,l++)
00117 ths->f[j] += ths->alpha[k]*ths->pre_cexp[l];
00118 else
00119 for(j=0; j<ths->M; j++)
00120 for(k=0; k<ths->N; k++)
00121 ths->f[j] += ths->alpha[k]*cexp(-ths->sigma*(ths->y[j]-ths->x[k])*
00122 (ths->y[j]-ths->x[k]));
00123 }
00124
00132 void fgt_trafo(fgt_plan *ths)
00133 {
00134 int l;
00135
00136 if(ths->flags & FGT_NDFT)
00137 {
00138 nfft_adjoint_direct(ths->nplan1);
00139
00140 for(l=0; l<ths->n; l++)
00141 ths->nplan1->f_hat[l] *= ths->b[l];
00142
00143 nfft_trafo_direct(ths->nplan2);
00144 }
00145 else
00146 {
00147 nfft_adjoint(ths->nplan1);
00148
00149 for(l=0; l<ths->n; l++)
00150 ths->nplan1->f_hat[l] *= ths->b[l];
00151
00152 nfft_trafo(ths->nplan2);
00153 }
00154 }
00155
00170 void fgt_init_guru(fgt_plan *ths, int N, int M, double _Complex sigma, int n,
00171 double p, int m, unsigned flags)
00172 {
00173 int j,n_fftw;
00174 fftw_plan fplan;
00175
00176 ths->M = M;
00177 ths->N = N;
00178 ths->sigma = sigma;
00179 ths->flags = flags;
00180
00181 ths->x = (double*)nfft_malloc(ths->N*sizeof(double));
00182 ths->y = (double*)nfft_malloc(ths->M*sizeof(double));
00183 ths->alpha = (double _Complex*)nfft_malloc(ths->N*sizeof(double _Complex));
00184 ths->f = (double _Complex*)nfft_malloc(ths->M*sizeof(double _Complex));
00185
00186 ths->n = n;
00187 ths->p = p;
00188
00189 ths->b = (double _Complex*)nfft_malloc(ths->n*sizeof(double _Complex));
00190
00191 ths->nplan1 = (nfft_plan*) nfft_malloc(sizeof(nfft_plan));
00192 ths->nplan2 = (nfft_plan*) nfft_malloc(sizeof(nfft_plan));
00193
00194 n_fftw=X(next_power_of_2)(2*ths->n);
00195
00196 nfft_init_guru(ths->nplan1, 1, &(ths->n), ths->N, &n_fftw, m, PRE_PHI_HUT|
00197 PRE_PSI| MALLOC_X| MALLOC_F_HAT| FFTW_INIT, FFTW_MEASURE);
00198 nfft_init_guru(ths->nplan2, 1, &(ths->n), ths->M, &n_fftw, m, PRE_PHI_HUT|
00199 PRE_PSI| MALLOC_X| FFTW_INIT, FFTW_MEASURE);
00200
00201 ths->nplan1->f = ths->alpha;
00202 ths->nplan2->f_hat = ths->nplan1->f_hat;
00203 ths->nplan2->f = ths->f;
00204
00205 if(ths->flags & FGT_APPROX_B)
00206 {
00207 fplan = fftw_plan_dft_1d(ths->n, ths->b, ths->b, FFTW_FORWARD,
00208 FFTW_MEASURE);
00209
00210 for(j=0; j<ths->n; j++)
00211 ths->b[j] = cexp(-ths->p*ths->p*ths->sigma*(j-ths->n/2)*(j-ths->n/2)/
00212 ((double)ths->n*ths->n)) / ths->n;
00213
00214 nfft_fftshift_complex(ths->b, 1, &ths->n);
00215 fftw_execute(fplan);
00216 nfft_fftshift_complex(ths->b, 1, &ths->n);
00217
00218 fftw_destroy_plan(fplan);
00219 }
00220 else
00221 {
00222 for(j=0; j<ths->n; j++)
00223 ths->b[j] = 1.0/ths->p * csqrt(PI/ths->sigma)*
00224 cexp(-PI*PI*(j-ths->n/2)*(j-ths->n/2)/
00225 (ths->p*ths->p*ths->sigma));
00226 }
00227 }
00228
00240 void fgt_init(fgt_plan *ths, int N, int M, double _Complex sigma, double eps)
00241 {
00242 double p;
00243 int n;
00244
00245 p=0.5+sqrt(-log(eps)/creal(sigma));
00246 if(p<1)
00247 p=1;
00248
00249 n=2*((int)ceil(p*cabs(sigma)/PI * sqrt(-log(eps)/creal(sigma))));
00250
00251 if(N*M<=((int)(1U<<20)))
00252 fgt_init_guru(ths, N, M, sigma, n, p, 7, DGT_PRE_CEXP);
00253 else
00254 fgt_init_guru(ths, N, M, sigma, n, p, 7, 0);
00255 }
00256
00263 void fgt_init_node_dependent(fgt_plan *ths)
00264 {
00265 int j,k,l;
00266
00267 if(ths->flags & DGT_PRE_CEXP)
00268 {
00269 ths->pre_cexp=(double _Complex*)nfft_malloc(ths->M*ths->N*
00270 sizeof(double _Complex));
00271
00272 for(j=0,l=0; j<ths->M; j++)
00273 for(k=0; k<ths->N; k++,l++)
00274 ths->pre_cexp[l]=cexp(-ths->sigma*(ths->y[j]-ths->x[k])*
00275 (ths->y[j]-ths->x[k]));
00276 }
00277
00278 for(j=0; j<ths->nplan1->M_total; j++)
00279 ths->nplan1->x[j] = ths->x[j]/ths->p;
00280 for(j=0; j<ths->nplan2->M_total; j++)
00281 ths->nplan2->x[j] = ths->y[j]/ths->p;
00282
00283 if(ths->nplan1->nfft_flags & PRE_PSI)
00284 nfft_precompute_psi(ths->nplan1);
00285 if(ths->nplan2->nfft_flags & PRE_PSI)
00286 nfft_precompute_psi(ths->nplan2);
00287 }
00288
00295 void fgt_finalize(fgt_plan *ths)
00296 {
00297 nfft_finalize(ths->nplan2);
00298 nfft_finalize(ths->nplan1);
00299
00300 nfft_free(ths->nplan2);
00301 nfft_free(ths->nplan1);
00302
00303 nfft_free(ths->b);
00304
00305 nfft_free(ths->f);
00306 nfft_free(ths->y);
00307
00308 nfft_free(ths->alpha);
00309 nfft_free(ths->x);
00310 }
00311
00318 void fgt_test_init_rand(fgt_plan *ths)
00319 {
00320 int j,k;
00321
00322 for(k=0; k<ths->N; k++)
00323 ths->x[k] = (double)rand()/(2.0*RAND_MAX)-1.0/4.0;
00324
00325 for(j=0; j<ths->M; j++)
00326 ths->y[j] = (double)rand()/(2.0*RAND_MAX)-1.0/4.0;
00327
00328 for(k=0; k<ths->N; k++)
00329 ths->alpha[k] = (double)rand()/(RAND_MAX)-1.0/2.0
00330 + _Complex_I*(double)rand()/(RAND_MAX)-I/2.0;
00331 }
00332
00341 double fgt_test_measure_time(fgt_plan *ths, unsigned dgt)
00342 {
00343 int r;
00344 ticks t0, t1;
00345 double t_out;
00346 double tau=0.01;
00347
00348 t_out=0;
00349 r=0;
00350 while(t_out<tau)
00351 {
00352 r++;
00353 t0 = getticks();
00354 if (dgt)
00355 dgt_trafo(ths);
00356 else
00357 fgt_trafo(ths);
00358 t1 = getticks();
00359 t_out += nfft_elapsed_seconds(t1,t0);
00360 }
00361 t_out/=r;
00362
00363 return t_out;
00364 }
00365
00375 void fgt_test_simple(int N, int M, double _Complex sigma, double eps)
00376 {
00377 fgt_plan my_plan;
00378 double _Complex *swap_dgt;
00379
00380 fgt_init(&my_plan, N, M, sigma, eps);
00381 swap_dgt = (double _Complex*)nfft_malloc(my_plan.M*sizeof(double _Complex));
00382
00383 fgt_test_init_rand(&my_plan);
00384 fgt_init_node_dependent(&my_plan);
00385
00386 NFFT_SWAP_complex(swap_dgt,my_plan.f);
00387 dgt_trafo(&my_plan);
00388 nfft_vpr_complex(my_plan.f,my_plan.M,"discrete gauss transform");
00389 NFFT_SWAP_complex(swap_dgt,my_plan.f);
00390
00391 fgt_trafo(&my_plan);
00392 nfft_vpr_complex(my_plan.f,my_plan.M,"fast gauss transform");
00393
00394 printf("\n relative error: %1.3e\n", X(error_l_infty_1_complex)(swap_dgt,
00395 my_plan.f, my_plan.M, my_plan.alpha, my_plan.N));
00396
00397 nfft_free(swap_dgt);
00398 fgt_finalize(&my_plan);
00399 }
00400
00410 void fgt_test_andersson(void)
00411 {
00412 fgt_plan my_plan;
00413 double _Complex *swap_dgt;
00414 int N;
00415
00416 double _Complex sigma=4*(138+ _Complex_I*100);
00417 int n=128;
00418 int N_dgt_pre_exp=(int)(1U<<11);
00419 int N_dgt=(int)(1U<<19);
00420
00421 printf("n=%d, sigma=%1.3e+i%1.3e\n",n,creal(sigma),cimag(sigma));
00422
00423 for(N=((int)(1U<<6)); N<((int)(1U<<22)); N=N<<1)
00424 {
00425 printf("$%d$\t & ",N);
00426
00427 if(N<N_dgt_pre_exp)
00428 fgt_init_guru(&my_plan, N, N, sigma, n, 1, 7, DGT_PRE_CEXP);
00429 else
00430 fgt_init_guru(&my_plan, N, N, sigma, n, 1, 7, 0);
00431
00432 swap_dgt = (double _Complex*)nfft_malloc(my_plan.M*
00433 sizeof(double _Complex));
00434
00435 fgt_test_init_rand(&my_plan);
00436
00437 fgt_init_node_dependent(&my_plan);
00438
00439 if(N<N_dgt)
00440 {
00441 NFFT_SWAP_complex(swap_dgt,my_plan.f);
00442 if(N<N_dgt_pre_exp)
00443 my_plan.flags^=DGT_PRE_CEXP;
00444
00445 printf("$%1.1e$\t & ",fgt_test_measure_time(&my_plan, 1));
00446 if(N<N_dgt_pre_exp)
00447 my_plan.flags^=DGT_PRE_CEXP;
00448 NFFT_SWAP_complex(swap_dgt,my_plan.f);
00449 }
00450 else
00451 printf("\t\t & ");
00452
00453 if(N<N_dgt_pre_exp)
00454 printf("$%1.1e$\t & ",fgt_test_measure_time(&my_plan, 1));
00455 else
00456 printf("\t\t & ");
00457
00458 my_plan.flags^=FGT_NDFT;
00459 printf("$%1.1e$\t & ",fgt_test_measure_time(&my_plan, 0));
00460 my_plan.flags^=FGT_NDFT;
00461
00462 printf("$%1.1e$\t & ",fgt_test_measure_time(&my_plan, 0));
00463
00464 printf("$%1.1e$\t \\\\ \n",
00465 X(error_l_infty_1_complex)(swap_dgt, my_plan.f, my_plan.M,
00466 my_plan.alpha, my_plan.N));
00467 fflush(stdout);
00468
00469 nfft_free(swap_dgt);
00470 fgt_finalize(&my_plan);
00471 fftw_cleanup();
00472 }
00473 }
00474
00481 void fgt_test_error(void)
00482 {
00483 fgt_plan my_plan;
00484 double _Complex *swap_dgt;
00485 int n,mi;
00486
00487 double _Complex sigma=4*(138+ _Complex_I*100);
00488 int N=1000;
00489 int M=1000;
00490 int m[2]={7,3};
00491
00492 printf("N=%d;\tM=%d;\nsigma=%1.3e+i*%1.3e;\n",N,M,creal(sigma),cimag(sigma));
00493 printf("error=[\n");
00494
00495 swap_dgt = (double _Complex*)nfft_malloc(M*sizeof(double _Complex));
00496
00497 for(n=8; n<=128; n+=4)
00498 {
00499 printf("%d\t",n);
00500 for(mi=0;mi<2;mi++)
00501 {
00502 fgt_init_guru(&my_plan, N, M, sigma, n, 1, m[mi], 0);
00503 fgt_test_init_rand(&my_plan);
00504 fgt_init_node_dependent(&my_plan);
00505
00506 NFFT_SWAP_complex(swap_dgt,my_plan.f);
00507 dgt_trafo(&my_plan);
00508 NFFT_SWAP_complex(swap_dgt,my_plan.f);
00509
00510 fgt_trafo(&my_plan);
00511
00512 printf("%1.3e\t", X(error_l_infty_1_complex)(swap_dgt, my_plan.f,
00513 my_plan.M, my_plan.alpha, my_plan.N));
00514 fflush(stdout);
00515
00516 fgt_finalize(&my_plan);
00517 fftw_cleanup();
00518 }
00519 printf("\n");
00520 }
00521 printf("];\n");
00522
00523 nfft_free(swap_dgt);
00524 }
00525
00532 void fgt_test_error_p(void)
00533 {
00534 fgt_plan my_plan;
00535 double _Complex *swap_dgt;
00536 int n,pi;
00537
00538 double _Complex sigma=20+40*_Complex_I;
00539 int N=1000;
00540 int M=1000;
00541 double p[3]={1,1.5,2};
00542
00543 printf("N=%d;\tM=%d;\nsigma=%1.3e+i*%1.3e;\n",N,M,creal(sigma),cimag(sigma));
00544 printf("error=[\n");
00545
00546 swap_dgt = (double _Complex*)nfft_malloc(M*sizeof(double _Complex));
00547
00548 for(n=8; n<=128; n+=4)
00549 {
00550 printf("%d\t",n);
00551 for(pi=0;pi<3;pi++)
00552 {
00553 fgt_init_guru(&my_plan, N, M, sigma, n, p[pi], 7, 0);
00554 fgt_test_init_rand(&my_plan);
00555 fgt_init_node_dependent(&my_plan);
00556
00557 NFFT_SWAP_complex(swap_dgt,my_plan.f);
00558 dgt_trafo(&my_plan);
00559 NFFT_SWAP_complex(swap_dgt,my_plan.f);
00560
00561 fgt_trafo(&my_plan);
00562
00563 printf("%1.3e\t", X(error_l_infty_1_complex)(swap_dgt, my_plan.f,
00564 my_plan.M, my_plan.alpha, my_plan.N));
00565 fflush(stdout);
00566
00567 fgt_finalize(&my_plan);
00568 fftw_cleanup();
00569 }
00570 printf("\n");
00571 }
00572 printf("];\n");
00573 }
00574
00580 int main(int argc,char **argv)
00581 {
00582 if(argc!=2)
00583 {
00584 fprintf(stderr,"fastgauss type\n");
00585 fprintf(stderr," type\n");
00586 fprintf(stderr," 0 - Simple test.\n");
00587 fprintf(stderr," 1 - Compares accuracy and execution time.\n");
00588 fprintf(stderr," Pipe to output_andersson.tex\n");
00589 fprintf(stderr," 2 - Compares accuracy.\n");
00590 fprintf(stderr," Pipe to output_error.m\n");
00591 fprintf(stderr," 3 - Compares accuracy.\n");
00592 fprintf(stderr," Pipe to output_error_p.m\n");
00593 return -1;
00594 }
00595
00596 if(atoi(argv[1])==0)
00597 fgt_test_simple(10, 10, 5+3*_Complex_I, 0.001);
00598
00599 if(atoi(argv[1])==1)
00600 fgt_test_andersson();
00601
00602 if(atoi(argv[1])==2)
00603 fgt_test_error();
00604
00605 if(atoi(argv[1])==3)
00606 fgt_test_error_p();
00607
00608 return 1;
00609 }
00610