00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00026 #include "config.h"
00027
00028
00029 #include <stdio.h>
00030 #include <stdlib.h>
00031 #include <math.h>
00032 #include <float.h>
00033 #ifdef HAVE_COMPLEX_H
00034 #include <complex.h>
00035 #endif
00036
00037
00038 #include "nfft3.h"
00039
00040
00041 #include "nfft3util.h"
00042 #include "infft.h"
00043
00044
00045 #define SYMBOL_ABEL_POISSON(k,h) (pow(h,k))
00046
00047
00048 #define SYMBOL_SINGULARITY(k,h) ((2.0/(2*k+1))*pow(h,k))
00049
00050
00051
00053 #define KT_ABEL_POISSON (0)
00054
00055 #define KT_SINGULARITY (1)
00056
00057 #define KT_LOC_SUPP (2)
00058
00059 #define KT_GAUSSIAN (3)
00060
00062 enum pvalue {NO = 0, YES = 1, BOTH = 2};
00063
00078 static inline double innerProduct(const double phi1, const double theta1,
00079 const double phi2, const double theta2)
00080 {
00081 double pi2theta1 = PI2*theta1, pi2theta2 = PI2*theta2;
00082 return (cos(pi2theta1)*cos(pi2theta2)
00083 + sin(pi2theta1)*sin(pi2theta2)*cos(PI2*(phi1-phi2)));
00084 }
00085
00097 static inline double poissonKernel(const double x, const double h)
00098 {
00099 return (1.0/(PI4))*((1.0-h)*(1.0+h))/pow(sqrt(1.0-2.0*h*x+h*h),3.0);
00100 }
00101
00113 static inline double singularityKernel(const double x, const double h)
00114 {
00115 return (1.0/(PI2))/sqrt(1.0-2.0*h*x+h*h);
00116 }
00117
00131 static inline double locallySupportedKernel(const double x, const double h,
00132 const double lambda)
00133 {
00134 return (x<=h)?(0.0):(pow((x-h),lambda));
00135 }
00136
00149 static inline double gaussianKernel(const double x, const double sigma)
00150 {
00151 return exp(2.0*sigma*(x-1.0));
00152 }
00153
00164 int main (int argc, char **argv)
00165 {
00166 double **p;
00167
00168 int *m;
00169 int **ld;
00170
00171 int ip;
00172 int im;
00173 int ild;
00174 int ipp;
00175 int ip_max;
00176 int im_max;
00177 int ild_max;
00178 int ipp_max;
00179 int tc_max;
00180 int m_max;
00181
00182 int l_max;
00183
00184 int d_max;
00185
00186 long ld_max_prec;
00187
00188 long l_max_prec;
00189
00190 int tc;
00191 int kt;
00192 int cutoff;
00193 double threshold;
00194 double t_d;
00195 double t_dp;
00196
00197 double t_fd;
00198 double t_f;
00199 double temp;
00200 double err_f;
00201 double err_fd;
00202 ticks t0, t1;
00203 int precompute = NO;
00204 fftw_complex *ptr;
00205 double* steed;
00206 fftw_complex *b;
00207 fftw_complex *f_hat;
00208 fftw_complex *a;
00209 double *xi;
00210 double *eta;
00211 fftw_complex *f_m;
00212 fftw_complex *f;
00213 fftw_complex *prec = NULL;
00214 nfsft_plan plan;
00215 nfsft_plan plan_adjoint;
00216 int i;
00217 int k;
00218 int n;
00219 int d;
00220 int l;
00221 int use_nfsft;
00222 int use_nfft;
00223 int use_fpt;
00224 int rinc;
00225 double constant;
00226
00227
00228 fscanf(stdin,"testcases=%d\n",&tc_max);
00229 fprintf(stdout,"%d\n",tc_max);
00230
00231
00232 for (tc = 0; tc < tc_max; tc++)
00233 {
00234
00235 fscanf(stdin,"nfsft=%d\n",&use_nfsft);
00236 fprintf(stdout,"%d\n",use_nfsft);
00237 if (use_nfsft != NO)
00238 {
00239
00240 fscanf(stdin,"nfft=%d\n",&use_nfft);
00241 fprintf(stdout,"%d\n",use_nfft);
00242 if (use_nfft != NO)
00243 {
00244
00245 fscanf(stdin,"cutoff=%d\n",&cutoff);
00246 fprintf(stdout,"%d\n",cutoff);
00247 }
00248 else
00249 {
00250
00251
00252 cutoff = 1;
00253 }
00254
00255 fscanf(stdin,"fpt=%d\n",&use_fpt);
00256 fprintf(stdout,"%d\n",use_fpt);
00257
00258 fscanf(stdin,"threshold=%lf\n",&threshold);
00259 fprintf(stdout,"%lf\n",threshold);
00260 }
00261 else
00262 {
00263
00264
00265 cutoff = 3;
00266 threshold = 1000000000000.0;
00267 }
00268
00269
00270 m_max = 0;
00271
00272 l_max = 0;
00273
00274 d_max = 0;
00275
00276 l_max_prec = 0;
00277
00278 ld_max_prec = 0;
00279
00280
00281
00282 fscanf(stdin,"kernel=%d\n",&kt);
00283 fprintf(stdout,"%d\n",kt);
00284
00285
00286 fscanf(stdin,"parameter_sets=%d\n",&ip_max);
00287 fprintf(stdout,"%d\n",ip_max);
00288
00289
00290 p = (double**) nfft_malloc(ip_max*sizeof(double*));
00291
00292
00293
00294
00295 fscanf(stdin,"parameters=%d\n",&ipp_max);
00296 fprintf(stdout,"%d\n",ipp_max);
00297
00298 for (ip = 0; ip < ip_max; ip++)
00299 {
00300
00301 p[ip] = (double*) nfft_malloc(ipp_max*sizeof(double));
00302
00303
00304 for (ipp = 0; ipp < ipp_max; ipp++)
00305 {
00306
00307 fscanf(stdin,"%lf\n",&p[ip][ipp]);
00308 fprintf(stdout,"%lf\n",p[ip][ipp]);
00309 }
00310 }
00311
00312
00313 fscanf(stdin,"bandwidths=%d\n",&im_max);
00314 fprintf(stdout,"%d\n",im_max);
00315 m = (int*) nfft_malloc(im_max*sizeof(int));
00316
00317
00318 for (im = 0; im < im_max; im++)
00319 {
00320
00321 fscanf(stdin,"%d\n",&m[im]);
00322 fprintf(stdout,"%d\n",m[im]);
00323 m_max = NFFT_MAX(m_max,m[im]);
00324 }
00325
00326
00327 fscanf(stdin,"node_sets=%d\n",&ild_max);
00328 fprintf(stdout,"%d\n",ild_max);
00329 ld = (int**) nfft_malloc(ild_max*sizeof(int*));
00330
00331
00332 for (ild = 0; ild < ild_max; ild++)
00333 {
00334
00335 ld[ild] = (int*) nfft_malloc(5*sizeof(int));
00336
00337
00338 fscanf(stdin,"L=%d ",&ld[ild][0]);
00339 fprintf(stdout,"%d\n",ld[ild][0]);
00340 l_max = NFFT_MAX(l_max,ld[ild][0]);
00341
00342
00343 fscanf(stdin,"D=%d ",&ld[ild][1]);
00344 fprintf(stdout,"%d\n",ld[ild][1]);
00345 d_max = NFFT_MAX(d_max,ld[ild][1]);
00346
00347
00348 fscanf(stdin,"compare=%d ",&ld[ild][2]);
00349 fprintf(stdout,"%d\n",ld[ild][2]);
00350
00351
00352 if (ld[ild][2] == YES)
00353 {
00354
00355 fscanf(stdin,"precomputed=%d\n",&ld[ild][3]);
00356 fprintf(stdout,"%d\n",ld[ild][3]);
00357
00358
00359
00360 fscanf(stdin,"repetitions=%d\n",&ld[ild][4]);
00361 fprintf(stdout,"%d\n",ld[ild][4]);
00362
00363
00364 if (ld[ild][3] == YES)
00365 {
00366
00367 ld_max_prec = NFFT_MAX(ld_max_prec,ld[ild][0]*ld[ild][1]);
00368
00369 l_max_prec = NFFT_MAX(l_max_prec,ld[ild][0]);
00370
00371 precompute = YES;
00372 }
00373 }
00374 else
00375 {
00376
00377 ld[ild][4] = 1;
00378 }
00379 }
00380
00381
00382 b = (fftw_complex*) nfft_malloc(l_max*sizeof(fftw_complex));
00383 eta = (double*) nfft_malloc(2*l_max*sizeof(double));
00384 f_hat = (fftw_complex*) nfft_malloc(NFSFT_F_HAT_SIZE(m_max)*sizeof(fftw_complex));
00385 a = (fftw_complex*) nfft_malloc((m_max+1)*sizeof(fftw_complex));
00386 xi = (double*) nfft_malloc(2*d_max*sizeof(double));
00387 f_m = (fftw_complex*) nfft_malloc(d_max*sizeof(fftw_complex));
00388 f = (fftw_complex*) nfft_malloc(d_max*sizeof(fftw_complex));
00389
00390
00391 if (precompute == YES)
00392 {
00393 prec = (fftw_complex*) nfft_malloc(ld_max_prec*sizeof(fftw_complex));
00394 }
00395
00396
00397 for (l = 0; l < l_max; l++)
00398 {
00399 b[l] = (((double)rand())/RAND_MAX) - 0.5;
00400 eta[2*l] = (((double)rand())/RAND_MAX) - 0.5;
00401 eta[2*l+1] = acos(2.0*(((double)rand())/RAND_MAX) - 1.0)/(PI2);
00402 }
00403
00404
00405 for (d = 0; d < d_max; d++)
00406 {
00407 xi[2*d] = (((double)rand())/RAND_MAX) - 0.5;
00408 xi[2*d+1] = acos(2.0*(((double)rand())/RAND_MAX) - 1.0)/(PI2);
00409 }
00410
00411
00412 nfsft_precompute(m_max,threshold,
00413 ((use_nfsft==NO)?(NFSFT_NO_FAST_ALGORITHM):(0U)), 0U);
00414
00415
00416 for (ip = 0; ip < ip_max; ip++)
00417 {
00418
00419 switch (kt)
00420 {
00421 case KT_ABEL_POISSON:
00422
00423 for (k = 0; k <= m_max; k++)
00424 a[k] = SYMBOL_ABEL_POISSON(k,p[ip][0]);
00425 break;
00426
00427 case KT_SINGULARITY:
00428
00429
00430 for (k = 0; k <= m_max; k++)
00431 a[k] = SYMBOL_SINGULARITY(k,p[ip][0]);
00432 break;
00433
00434 case KT_LOC_SUPP:
00435
00436
00437 a[0] = 1.0;
00438 if (1 <= m_max)
00439 a[1] = ((p[ip][1]+1+p[ip][0])/(p[ip][1]+2.0))*a[0];
00440 for (k = 2; k <= m_max; k++)
00441 a[k] = (1.0/(k+p[ip][1]+1))*((2*k-1)*p[ip][0]*a[k-1] -
00442 (k-p[ip][1]-2)*a[k-2]);
00443 break;
00444
00445 case KT_GAUSSIAN:
00446
00447 steed = (double*) nfft_malloc((m_max+1)*sizeof(double));
00448 nfft_smbi(2.0*p[ip][0],0.5,m_max+1,2,steed);
00449 for (k = 0; k <= m_max; k++)
00450 a[k] = PI2*(sqrt(PI/p[ip][0]))*steed[k];
00451
00452 nfft_free(steed);
00453 break;
00454 }
00455
00456
00457 for (k = 0; k <= m_max; k++)
00458 a[k] *= (2*k+1)/(PI4);
00459
00460
00461 for (ild = 0; ild < ild_max; ild++)
00462 {
00463
00464 if (ld[ild][2] != NO)
00465 {
00466
00467
00468 if (ld[ild][3] != NO)
00469 {
00470
00471 ptr = prec;
00472
00473 rinc = l_max_prec-ld[ild][0];
00474
00475
00476 for (d = 0; d < ld[ild][1]; d++)
00477 {
00478
00479 for (l = 0; l < ld[ild][0]; l++)
00480 {
00481
00482
00483 temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]);
00484
00485
00486 switch (kt)
00487 {
00488 case KT_ABEL_POISSON:
00489
00490 *ptr++ = poissonKernel(temp,p[ip][0]);
00491 break;
00492
00493 case KT_SINGULARITY:
00494
00495
00496 *ptr++ = singularityKernel(temp,p[ip][0]);
00497 break;
00498
00499 case KT_LOC_SUPP:
00500
00501
00502 *ptr++ = locallySupportedKernel(temp,p[ip][0],p[ip][1]);
00503 break;
00504
00505 case KT_GAUSSIAN:
00506
00507
00508 *ptr++ = gaussianKernel(temp,p[ip][0]);
00509 break;
00510 }
00511 }
00512
00513 ptr += rinc;
00514 }
00515
00516
00517 t_dp = 0.0;
00518
00519
00520 t0 = getticks();
00521
00522
00523 for (i = 0; i < ld[ild][4]; i++)
00524 {
00525
00526
00527 ptr = prec;
00528
00529 rinc = l_max_prec-ld[ild][0];
00530
00531
00532 if (kt == KT_LOC_SUPP)
00533 {
00534
00535
00536
00537 constant = ((p[ip][1]+1)/(PI2*pow(1-p[ip][0],p[ip][1]+1)));
00538
00539
00540 for (d = 0; d < ld[ild][1]; d++)
00541 {
00542
00543 f[d] = 0.0;
00544
00545
00546 for (l = 0; l < ld[ild][0]; l++)
00547 f[d] += b[l]*(*ptr++);
00548
00549
00550 f[d] *= constant;
00551
00552
00553 ptr += rinc;
00554 }
00555 }
00556 else
00557 {
00558
00559 for (d = 0; d < ld[ild][1]; d++)
00560 {
00561
00562 f[d] = 0.0;
00563
00564
00565 for (l = 0; l < ld[ild][0]; l++)
00566 f[d] += b[l]*(*ptr++);
00567
00568
00569 ptr += rinc;
00570 }
00571 }
00572 }
00573
00574
00575 t1 = getticks();
00576 t_dp = nfft_elapsed_seconds(t1,t0);
00577
00578
00579 t_dp = t_dp/((double)ld[ild][4]);
00580 }
00581 else
00582 {
00583
00584 t_dp = -1.0;
00585 }
00586
00587
00588 t_d = 0.0;
00589
00590
00591 t0 = getticks();
00592
00593
00594 for (i = 0; i < ld[ild][4]; i++)
00595 {
00596
00597 switch (kt)
00598 {
00599 case KT_ABEL_POISSON:
00600
00601
00602 for (d = 0; d < ld[ild][1]; d++)
00603 {
00604
00605 f[d] = 0.0;
00606
00607
00608 for (l = 0; l < ld[ild][0]; l++)
00609 {
00610
00611
00612 temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]);
00613
00614
00615
00616 f[d] += b[l]*poissonKernel(temp,p[ip][0]);
00617 }
00618 }
00619 break;
00620
00621 case KT_SINGULARITY:
00622
00623 for (d = 0; d < ld[ild][1]; d++)
00624 {
00625
00626 f[d] = 0.0;
00627
00628
00629 for (l = 0; l < ld[ild][0]; l++)
00630 {
00631
00632
00633 temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]);
00634
00635
00636
00637 f[d] += b[l]*singularityKernel(temp,p[ip][0]);
00638 }
00639 }
00640 break;
00641
00642 case KT_LOC_SUPP:
00643
00644 constant = ((p[ip][1]+1)/(PI2*pow(1-p[ip][0],p[ip][1]+1)));
00645
00646
00647 for (d = 0; d < ld[ild][1]; d++)
00648 {
00649
00650 f[d] = 0.0;
00651
00652
00653 for (l = 0; l < ld[ild][0]; l++)
00654 {
00655
00656
00657 temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]);
00658
00659
00660
00661 f[d] += b[l]*locallySupportedKernel(temp,p[ip][0],p[ip][1]);
00662 }
00663
00664
00665 f[d] *= constant;
00666 }
00667 break;
00668
00669 case KT_GAUSSIAN:
00670
00671 for (d = 0; d < ld[ild][1]; d++)
00672 {
00673
00674 f[d] = 0.0;
00675
00676
00677 for (l = 0; l < ld[ild][0]; l++)
00678 {
00679
00680
00681 temp = innerProduct(eta[2*l],eta[2*l+1],xi[2*d],xi[2*d+1]);
00682
00683
00684 f[d] += b[l]*gaussianKernel(temp,p[ip][0]);
00685 }
00686 }
00687 break;
00688 }
00689 }
00690
00691
00692 t1 = getticks();
00693 t_d = nfft_elapsed_seconds(t1,t0);
00694
00695 t_d = t_d/((double)ld[ild][4]);
00696 }
00697 else
00698 {
00699
00700 t_d = -1.0;
00701 t_dp = -1.0;
00702 }
00703
00704
00705
00706 err_fd = -1.0;
00707 err_f = -1.0;
00708 t_fd = -1.0;
00709 t_f = -1.0;
00710
00711
00712 for (im = 0; im < im_max; im++)
00713 {
00714
00715 nfsft_init_guru(&plan_adjoint, m[im],ld[ild][0],
00716 ((use_nfft!=0)?(0U):(NFSFT_USE_NDFT)) |
00717 ((use_fpt!=0)?(0U):(NFSFT_USE_DPT)),
00718 PRE_PHI_HUT | PRE_PSI | FFTW_INIT |
00719 FFT_OUT_OF_PLACE, cutoff);
00720 nfsft_init_guru(&plan,m[im],ld[ild][1],
00721 ((use_nfft!=0)?(0U):(NFSFT_USE_NDFT)) |
00722 ((use_fpt!=0)?(0U):(NFSFT_USE_DPT)),
00723 PRE_PHI_HUT | PRE_PSI | FFTW_INIT |
00724 FFT_OUT_OF_PLACE,
00725 cutoff);
00726 plan_adjoint.f_hat = f_hat;
00727 plan_adjoint.x = eta;
00728 plan_adjoint.f = b;
00729 plan.f_hat = f_hat;
00730 plan.x = xi;
00731 plan.f = f_m;
00732 nfsft_precompute_x(&plan_adjoint);
00733 nfsft_precompute_x(&plan);
00734
00735
00736 if (use_nfsft == BOTH)
00737 {
00738
00739 t_fd = 0.0;
00740
00741
00742 t0 = getticks();
00743
00744
00745 for (i = 0; i < ld[ild][4]; i++)
00746 {
00747
00748
00749 nfsft_adjoint_direct(&plan_adjoint);
00750
00751
00752 for (k = 0; k <= m[im]; k++)
00753 for (n = -k; n <= k; n++)
00754 f_hat[NFSFT_INDEX(k,n,&plan_adjoint)] *= a[k];
00755
00756
00757 nfsft_trafo_direct(&plan);
00758
00759 }
00760
00761
00762 t1 = getticks();
00763 t_fd = nfft_elapsed_seconds(t1,t0);
00764
00765
00766 t_fd = t_fd/((double)ld[ild][4]);
00767
00768
00769 if (ld[ild][2] != NO)
00770 {
00771
00772 err_fd = X(error_l_infty_1_complex)(f, f_m, ld[ild][1], b,
00773 ld[ild][0]);
00774 }
00775 }
00776
00777
00778 if (use_nfsft != NO)
00779 {
00780
00781 t_f = 0.0;
00782 }
00783 else
00784 {
00785
00786
00787 t_fd = 0.0;
00788 }
00789
00790
00791 t0 = getticks();
00792
00793
00794 for (i = 0; i < ld[ild][4]; i++)
00795 {
00796
00797 if (use_nfsft != NO)
00798 {
00799
00800 nfsft_adjoint(&plan_adjoint);
00801 }
00802 else
00803 {
00804
00805 nfsft_adjoint_direct(&plan_adjoint);
00806 }
00807
00808
00809 for (k = 0; k <= m[im]; k++)
00810 for (n = -k; n <= k; n++)
00811 f_hat[NFSFT_INDEX(k,n,&plan_adjoint)] *= a[k];
00812
00813
00814 if (use_nfsft != NO)
00815 {
00816
00817 nfsft_trafo(&plan);
00818 }
00819 else
00820 {
00821
00822 nfsft_trafo_direct(&plan);
00823 }
00824 }
00825
00826
00827 t1 = getticks();
00828
00829 if (use_nfsft != NO)
00830 t_f = nfft_elapsed_seconds(t1,t0);
00831 else
00832 t_fd = nfft_elapsed_seconds(t1,t0);
00833
00834
00835 if (use_nfsft != NO)
00836 {
00837
00838 t_f = t_f/((double)ld[ild][4]);
00839 }
00840 else
00841 {
00842
00843 t_fd = t_fd/((double)ld[ild][4]);
00844 }
00845
00846
00847 if (ld[ild][2] != NO)
00848 {
00849
00850 if (use_nfsft != NO)
00851 {
00852
00853 err_f = X(error_l_infty_1_complex)(f, f_m, ld[ild][1], b,
00854 ld[ild][0]);
00855 }
00856 else
00857 {
00858
00859 err_fd = X(error_l_infty_1_complex)(f, f_m, ld[ild][1], b,
00860 ld[ild][0]);
00861 }
00862 }
00863
00864
00865 fprintf(stdout,"%e\n%e\n%e\n%e\n%e\n%e\n\n",t_d,t_dp,t_fd,t_f,err_fd,
00866 err_f);
00867
00868
00869 nfsft_finalize(&plan_adjoint);
00870 nfsft_finalize(&plan);
00871 }
00872
00873 }
00874 }
00875
00876
00877 nfsft_forget();
00878
00879
00880
00881 if (precompute == YES)
00882 {
00883
00884 nfft_free(prec);
00885 }
00886
00887 nfft_free(f);
00888 nfft_free(f_m);
00889 nfft_free(xi);
00890 nfft_free(eta);
00891 nfft_free(a);
00892 nfft_free(f_hat);
00893 nfft_free(b);
00894
00895
00896 for (ild = 0; ild < ild_max; ild++)
00897 nfft_free(ld[ild]);
00898 nfft_free(ld);
00899
00900
00901 nfft_free(m);
00902
00903
00904 for (ip = 0; ip < ip_max; ip++)
00905 nfft_free(p[ip]);
00906 nfft_free(p);
00907 }
00908
00909
00910 return EXIT_SUCCESS;
00911 }
00912