00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00027 #include "config.h"
00028
00029
00030 #include <math.h>
00031 #include <stdlib.h>
00032 #include <string.h>
00033 #ifdef HAVE_COMPLEX_H
00034 #include <complex.h>
00035 #endif
00036
00037 #ifdef _OPENMP
00038 #include <omp.h>
00039 #endif
00040
00041
00042 #include "nfft3util.h"
00043
00044
00045 #include "nfft3.h"
00046
00047 #include "infft.h"
00048
00049
00050 #include "legendre.h"
00051
00052
00053 #include "api.h"
00054
00055
00065 #define NFSFT_DEFAULT_NFFT_CUTOFF 6
00066
00072 #define NFSFT_DEFAULT_THRESHOLD 1000
00073
00079 #define NFSFT_BREAK_EVEN 5
00080
00087 static struct nfsft_wisdom wisdom = {false,0U,-1,-1,0,0,0,0,0};
00088
00111 static inline void c2e(nfsft_plan *plan)
00112 {
00113 int k;
00114 int n;
00115 double _Complex last;
00116 double _Complex act;
00117 double _Complex *xp;
00118 double _Complex *xm;
00119 int low;
00120 int up;
00121 int lowe;
00122 int upe;
00124
00125 memset(plan->f_hat_intern,0U,(2*plan->N+2)*sizeof(double _Complex));
00126
00127
00128 lowe = -plan->N + (plan->N%2);
00129 upe = -lowe;
00130
00131
00132 for (n = lowe; n <= upe; n += 2)
00133 {
00134
00135
00136 xm = &(plan->f_hat_intern[NFSFT_INDEX(-1,n,plan)]);
00137 xp = &(plan->f_hat_intern[NFSFT_INDEX(+1,n,plan)]);
00138 for(k = 1; k <= plan->N; k++)
00139 {
00140 *xp *= 0.5;
00141 *xm-- = *xp++;
00142 }
00143
00144
00145 *xm = 0.0;
00146 }
00147
00148
00149 low = -plan->N + (1-plan->N%2);
00150 up = -low;
00151
00152
00153
00154 for (n = low; n <= up; n += 2)
00155 {
00156
00157
00158
00159 plan->f_hat_intern[NFSFT_INDEX(0,n,plan)] *= 2.0;
00160 xp = &(plan->f_hat_intern[NFSFT_INDEX(-plan->N-1,n,plan)]);
00161
00162
00163 *xp++ = 0.0;
00164 xm = &(plan->f_hat_intern[NFSFT_INDEX(plan->N,n,plan)]);
00165 last = *xm;
00166 *xm = 0.5 * _Complex_I * (0.5*xm[-1]);
00167 *xp++ = -(*xm--);
00168 for (k = plan->N-1; k > 0; k--)
00169 {
00170 act = *xm;
00171 *xm = 0.5 * _Complex_I * (0.5*(xm[-1] - last));
00172 *xp++ = -(*xm--);
00173 last = act;
00174 }
00175 *xm = 0.0;
00176 }
00177 }
00178
00189 static inline void c2e_transposed(nfsft_plan *plan)
00190 {
00191 int k;
00192 int n;
00193 double _Complex last;
00194 double _Complex act;
00195 double _Complex *xp;
00196 double _Complex *xm;
00197 int low;
00198 int up;
00199 int lowe;
00200 int upe;
00202
00203 lowe = -plan->N + (plan->N%2);
00204 upe = -lowe;
00205
00206
00207 for (n = lowe; n <= upe; n += 2)
00208 {
00209
00210
00211 xm = &(plan->f_hat[NFSFT_INDEX(-1,n,plan)]);
00212 xp = &(plan->f_hat[NFSFT_INDEX(+1,n,plan)]);
00213 for(k = 1; k <= plan->N; k++)
00214 {
00215 *xp += *xm--;
00216 *xp++ *= 0.5;;
00217 }
00218 }
00219
00220
00221 low = -plan->N + (1-plan->N%2);
00222 up = -low;
00223
00224
00225 for (n = low; n <= up; n += 2)
00226 {
00227
00228
00229 xm = &(plan->f_hat[NFSFT_INDEX(-1,n,plan)]);
00230 xp = &(plan->f_hat[NFSFT_INDEX(+1,n,plan)]);
00231 for(k = 1; k <= plan->N; k++)
00232 {
00233 *xp++ -= *xm--;
00234 }
00235
00236 plan->f_hat[NFSFT_INDEX(0,n,plan)] =
00237 -0.25*_Complex_I*plan->f_hat[NFSFT_INDEX(1,n,plan)];
00238 last = plan->f_hat[NFSFT_INDEX(1,n,plan)];
00239 plan->f_hat[NFSFT_INDEX(1,n,plan)] =
00240 -0.25*_Complex_I*plan->f_hat[NFSFT_INDEX(2,n,plan)];
00241
00242 xp = &(plan->f_hat[NFSFT_INDEX(2,n,plan)]);
00243 for (k = 2; k < plan->N; k++)
00244 {
00245 act = *xp;
00246 *xp = -0.25 * _Complex_I * (xp[1] - last);
00247 xp++;
00248 last = act;
00249 }
00250 *xp = 0.25 * _Complex_I * last;
00251
00252 plan->f_hat[NFSFT_INDEX(0,n,plan)] *= 2.0;
00253 }
00254 }
00255
00256 void nfsft_init(nfsft_plan *plan, int N, int M)
00257 {
00258
00259 nfsft_init_advanced(plan, N, M, NFSFT_MALLOC_X | NFSFT_MALLOC_F |
00260 NFSFT_MALLOC_F_HAT);
00261 }
00262
00263 void nfsft_init_advanced(nfsft_plan* plan, int N, int M,
00264 unsigned int flags)
00265 {
00266
00267 nfsft_init_guru(plan, N, M, flags, PRE_PHI_HUT | PRE_PSI | FFTW_INIT |
00268 FFT_OUT_OF_PLACE, NFSFT_DEFAULT_NFFT_CUTOFF);
00269 }
00270
00271 void nfsft_init_guru(nfsft_plan *plan, int N, int M, unsigned int flags,
00272 unsigned int nfft_flags, int nfft_cutoff)
00273 {
00274 int *nfft_size;
00275 int *fftw_size;
00276
00277
00278 plan->flags = flags;
00279
00280
00281 plan->N = N;
00282 plan->M_total = M;
00283
00284
00285
00286
00287
00288
00289
00290 plan->N_total = (2*plan->N+2)*(2*plan->N+2);
00291
00292
00293
00294 if (plan->flags & NFSFT_PRESERVE_F_HAT)
00295 {
00296 plan->f_hat_intern = (double _Complex*) nfft_malloc(plan->N_total*
00297 sizeof(double _Complex));
00298 }
00299
00300
00301 if (plan->flags & NFSFT_MALLOC_F_HAT)
00302 {
00303 plan->f_hat = (double _Complex*) nfft_malloc(plan->N_total*
00304 sizeof(double _Complex));
00305 }
00306
00307
00308 if (plan->flags & NFSFT_MALLOC_F)
00309 {
00310 plan->f = (double _Complex*) nfft_malloc(plan->M_total*sizeof(double _Complex));
00311 }
00312
00313
00314 if (plan->flags & NFSFT_MALLOC_X)
00315 {
00316 plan->x = (double*) nfft_malloc(plan->M_total*2*sizeof(double));
00317 }
00318
00319
00320 if (plan->flags & NFSFT_NO_FAST_ALGORITHM)
00321 {
00322 }
00323 else
00324 {
00325 nfft_size = (int*)nfft_malloc(2*sizeof(int));
00326 fftw_size = (int*)nfft_malloc(2*sizeof(int));
00327
00329 nfft_size[0] = 2*plan->N+2;
00330 nfft_size[1] = 2*plan->N+2;
00331 fftw_size[0] = 4*plan->N;
00332 fftw_size[1] = 4*plan->N;
00333
00335 nfft_init_guru(&plan->plan_nfft, 2, nfft_size, plan->M_total, fftw_size,
00336 nfft_cutoff, nfft_flags,
00337 FFTW_ESTIMATE | FFTW_DESTROY_INPUT);
00338
00339
00340 plan->plan_nfft.x = plan->x;
00341
00342 plan->plan_nfft.f = plan->f;
00343
00344 plan->plan_nfft.f_hat = plan->f_hat;
00345
00348
00349
00350
00351
00352 nfft_free(nfft_size);
00353 nfft_free(fftw_size);
00354 }
00355
00356 plan->mv_trafo = (void (*) (void* ))nfsft_trafo;
00357 plan->mv_adjoint = (void (*) (void* ))nfsft_adjoint;
00358 }
00359
00360 void nfsft_precompute(int N, double kappa, unsigned int nfsft_flags,
00361 unsigned int fpt_flags)
00362 {
00363 int n;
00364
00365
00366 if (wisdom.initialized == true)
00367 {
00368 return;
00369 }
00370
00371 #ifdef _OPENMP
00372 #pragma omp parallel default(shared)
00373 {
00374 int nthreads = omp_get_num_threads();
00375 int threadid = omp_get_thread_num();
00376 #pragma omp single
00377 {
00378 wisdom.nthreads = nthreads;
00379 }
00380 }
00381 #endif
00382
00383
00384 wisdom.flags = nfsft_flags;
00385
00386
00387
00388 X(next_power_of_2_exp)(N,&wisdom.N_MAX,&wisdom.T_MAX);
00389
00390
00391 if (wisdom.flags & NFSFT_NO_DIRECT_ALGORITHM)
00392 {
00393 wisdom.alpha = NULL;
00394 wisdom.beta = NULL;
00395 wisdom.gamma = NULL;
00396 }
00397 else
00398 {
00399
00400 wisdom.alpha = (double*) nfft_malloc((wisdom.N_MAX+1)*(wisdom.N_MAX+2)*
00401 sizeof(double));
00402 wisdom.beta = (double*) nfft_malloc((wisdom.N_MAX+1)*(wisdom.N_MAX+2)*
00403 sizeof(double));
00404 wisdom.gamma = (double*) nfft_malloc((wisdom.N_MAX+1)*(wisdom.N_MAX+2)*
00405 sizeof(double));
00407
00408
00409 alpha_al_all(wisdom.alpha,wisdom.N_MAX);
00410 beta_al_all(wisdom.beta,wisdom.N_MAX);
00411 gamma_al_all(wisdom.gamma,wisdom.N_MAX);
00412 }
00413
00414
00415 if (wisdom.flags & NFSFT_NO_FAST_ALGORITHM)
00416 {
00417 }
00418 else if (wisdom.N_MAX >= NFSFT_BREAK_EVEN)
00419 {
00420
00421
00422
00423 if (wisdom.alpha != NULL)
00424 {
00425 #ifdef _OPENMP
00426 #pragma omp parallel default(shared) private(n)
00427 {
00428 int nthreads = omp_get_num_threads();
00429 int threadid = omp_get_thread_num();
00430 #pragma omp single
00431 {
00432 wisdom.nthreads = nthreads;
00433 wisdom.set_threads = (fpt_set*) nfft_malloc(nthreads*sizeof(fpt_set));
00434 }
00435
00436 wisdom.set_threads[threadid] = fpt_init(wisdom.N_MAX+1, wisdom.T_MAX,
00437 fpt_flags | FPT_AL_SYMMETRY | FPT_PERSISTENT_DATA);
00438 for (n = 0; n <= wisdom.N_MAX; n++)
00439 fpt_precompute(wisdom.set_threads[threadid],n,&wisdom.alpha[ROW(n)],
00440 &wisdom.beta[ROW(n)], &wisdom.gamma[ROW(n)],n,kappa);
00441 }
00442
00443 #else
00444
00445
00446 wisdom.set = fpt_init(wisdom.N_MAX+1, wisdom.T_MAX,
00447 fpt_flags | FPT_AL_SYMMETRY | FPT_PERSISTENT_DATA);
00448 for (n = 0; n <= wisdom.N_MAX; n++)
00449 {
00450
00451
00452
00453 fpt_precompute(wisdom.set,n,&wisdom.alpha[ROW(n)],&wisdom.beta[ROW(n)],
00454 &wisdom.gamma[ROW(n)],n,kappa);
00455 }
00456 #endif
00457 }
00458 else
00459 {
00460 #ifdef _OPENMP
00461 #pragma omp parallel default(shared) private(n)
00462 {
00463 double *alpha, *beta, *gamma;
00464 int nthreads = omp_get_num_threads();
00465 int threadid = omp_get_thread_num();
00466 #pragma omp single
00467 {
00468 wisdom.nthreads = nthreads;
00469 wisdom.set_threads = (fpt_set*) nfft_malloc(nthreads*sizeof(fpt_set));
00470 }
00471
00472 alpha = (double*) nfft_malloc((wisdom.N_MAX+2)*sizeof(double));
00473 beta = (double*) nfft_malloc((wisdom.N_MAX+2)*sizeof(double));
00474 gamma = (double*) nfft_malloc((wisdom.N_MAX+2)*sizeof(double));
00475 wisdom.set_threads[threadid] = fpt_init(wisdom.N_MAX+1, wisdom.T_MAX,
00476 fpt_flags | FPT_AL_SYMMETRY);
00477
00478 for (n = 0; n <= wisdom.N_MAX; n++)
00479 {
00480 alpha_al_row(alpha,wisdom.N_MAX,n);
00481 beta_al_row(beta,wisdom.N_MAX,n);
00482 gamma_al_row(gamma,wisdom.N_MAX,n);
00483
00484
00485 fpt_precompute(wisdom.set_threads[threadid],n,alpha,beta,gamma,n,
00486 kappa);
00487 }
00488
00489 nfft_free(alpha);
00490 nfft_free(beta);
00491 nfft_free(gamma);
00492 }
00493 #else
00494
00495 wisdom.alpha = (double*) nfft_malloc((wisdom.N_MAX+2)*sizeof(double));
00496 wisdom.beta = (double*) nfft_malloc((wisdom.N_MAX+2)*sizeof(double));
00497 wisdom.gamma = (double*) nfft_malloc((wisdom.N_MAX+2)*sizeof(double));
00498 wisdom.set = fpt_init(wisdom.N_MAX+1, wisdom.T_MAX,
00499 fpt_flags | FPT_AL_SYMMETRY);
00500 for (n = 0; n <= wisdom.N_MAX; n++)
00501 {
00502
00503
00504
00505
00506 alpha_al_row(wisdom.alpha,wisdom.N_MAX,n);
00507 beta_al_row(wisdom.beta,wisdom.N_MAX,n);
00508 gamma_al_row(wisdom.gamma,wisdom.N_MAX,n);
00509
00510
00511 fpt_precompute(wisdom.set,n,wisdom.alpha,wisdom.beta,wisdom.gamma,n,
00512 kappa);
00513 }
00514
00515 nfft_free(wisdom.alpha);
00516 nfft_free(wisdom.beta);
00517 nfft_free(wisdom.gamma);
00518 #endif
00519 wisdom.alpha = NULL;
00520 wisdom.beta = NULL;
00521 wisdom.gamma = NULL;
00522 }
00523 }
00524
00525
00526 wisdom.initialized = true;
00527 }
00528
00529 void nfsft_forget(void)
00530 {
00531
00532 if (wisdom.initialized == false)
00533 {
00534
00535 return;
00536 }
00537
00538
00539 if (wisdom.flags & NFSFT_NO_DIRECT_ALGORITHM)
00540 {
00541 }
00542 else
00543 {
00544
00545 nfft_free(wisdom.alpha);
00546 nfft_free(wisdom.beta);
00547 nfft_free(wisdom.gamma);
00548 wisdom.alpha = NULL;
00549 wisdom.beta = NULL;
00550 wisdom.gamma = NULL;
00551 }
00552
00553
00554 if (wisdom.flags & NFSFT_NO_FAST_ALGORITHM)
00555 {
00556 }
00557 else if (wisdom.N_MAX >= NFSFT_BREAK_EVEN)
00558 {
00559 #ifdef _OPENMP
00560 int k;
00561 for (k = 0; k < wisdom.nthreads; k++)
00562 fpt_finalize(wisdom.set_threads[k]);
00563 nfft_free(wisdom.set_threads);
00564 #else
00565
00566 fpt_finalize(wisdom.set);
00567 #endif
00568 }
00569
00570
00571 wisdom.initialized = false;
00572 }
00573
00574
00575 void nfsft_finalize(nfsft_plan *plan)
00576 {
00577 if (!plan)
00578 return;
00579
00580
00581 nfft_finalize(&plan->plan_nfft);
00582
00583
00584
00585 if (plan->flags & NFSFT_PRESERVE_F_HAT)
00586 {
00587 nfft_free(plan->f_hat_intern);
00588 }
00589
00590
00591 if (plan->flags & NFSFT_MALLOC_F_HAT)
00592 {
00593
00594 nfft_free(plan->f_hat);
00595 }
00596
00597
00598 if (plan->flags & NFSFT_MALLOC_F)
00599 {
00600
00601 nfft_free(plan->f);
00602 }
00603
00604
00605 if (plan->flags & NFSFT_MALLOC_X)
00606 {
00607
00608 nfft_free(plan->x);
00609 }
00610 }
00611
00612 void nfsft_trafo_direct(nfsft_plan *plan)
00613 {
00614 int m;
00615 int k;
00616 int n;
00617 int n_abs;
00618 double *alpha;
00619
00620
00621 double *gamma;
00622
00623
00624 double _Complex *a;
00625 double _Complex it1;
00626 double _Complex it2;
00627 double _Complex temp;
00628 double _Complex f_m;
00629
00630 double stheta;
00631 double sphi;
00632
00633 #ifdef MEASURE_TIME
00634 plan->MEASURE_TIME_t[0] = 0.0;
00635 plan->MEASURE_TIME_t[1] = 0.0;
00636 plan->MEASURE_TIME_t[2] = 0.0;
00637 #endif
00638
00639 if (wisdom.flags & NFSFT_NO_DIRECT_ALGORITHM)
00640 {
00641 return;
00642 }
00643
00644
00645 if (plan->flags & NFSFT_PRESERVE_F_HAT)
00646 {
00647 memcpy(plan->f_hat_intern,plan->f_hat,plan->N_total*
00648 sizeof(double _Complex));
00649 }
00650 else
00651 {
00652 plan->f_hat_intern = plan->f_hat;
00653 }
00654
00655
00656
00657
00658 if (plan->flags & NFSFT_NORMALIZED)
00659 {
00660
00661 #pragma omp parallel for default(shared) private(k,n)
00662 for (k = 0; k <= plan->N; k++)
00663 {
00664 for (n = -k; n <= k; n++)
00665 {
00666
00667 plan->f_hat_intern[NFSFT_INDEX(k,n,plan)] *=
00668 sqrt((2*k+1)/(4.0*PI));
00669 }
00670 }
00671 }
00672
00673
00674 if (plan->N == 0)
00675 {
00676
00677
00678
00679 for (m = 0; m < plan->M_total; m++)
00680 {
00681 plan->f[m] = plan->f_hat_intern[NFSFT_INDEX(0,0,plan)];
00682 }
00683 }
00684 else
00685 {
00686
00687
00688
00689
00690
00691
00692
00693 #pragma omp parallel for default(shared) private(m,stheta,sphi,f_m,n,a,n_abs,alpha,gamma,it2,it1,k,temp)
00694 for (m = 0; m < plan->M_total; m++)
00695 {
00696
00697 stheta = cos(2.0*PI*plan->x[2*m+1]);
00698
00699 sphi = 2.0*PI*plan->x[2*m];
00700
00701
00702 f_m = 0.0;
00703
00704
00705
00706
00707
00708 for (n = -plan->N; n <= plan->N; n++)
00709 {
00710
00711 a = &(plan->f_hat_intern[NFSFT_INDEX(0,n,plan)]);
00712
00713
00714 n_abs = abs(n);
00715
00716
00717 alpha = &(wisdom.alpha[ROW(n_abs)]);
00718 gamma = &(wisdom.gamma[ROW(n_abs)]);
00719
00720
00721 it2 = a[plan->N];
00722 it1 = a[plan->N-1];
00723 for (k = plan->N; k > n_abs + 1; k--)
00724 {
00725 temp = a[k-2] + it2 * gamma[k];
00726 it2 = it1 + it2 * alpha[k] * stheta;
00727 it1 = temp;
00728 }
00729
00730
00731 if (n_abs < plan->N)
00732 {
00733 it2 = it1 + it2 * wisdom.alpha[ROWK(n_abs)+1] * stheta;
00734 }
00735
00736
00737
00738
00739
00740
00741
00742 f_m += it2 * wisdom.gamma[ROW(n_abs)] *
00743 pow(1- stheta * stheta, 0.5*n_abs) * cexp(_Complex_I*n*sphi);
00744 }
00745
00746
00747 plan->f[m] = f_m;
00748 }
00749 }
00750 }
00751
00752 void nfsft_adjoint_direct(nfsft_plan *plan)
00753 {
00754 int m;
00755 int k;
00756 int n;
00757 int n_abs;
00758 double *alpha;
00759
00760
00761 double *gamma;
00762
00763
00764 double _Complex it1;
00765 double _Complex it2;
00766 double _Complex temp;
00767 double stheta;
00768 double sphi;
00769
00770 #ifdef MEASURE_TIME
00771 plan->MEASURE_TIME_t[0] = 0.0;
00772 plan->MEASURE_TIME_t[1] = 0.0;
00773 plan->MEASURE_TIME_t[2] = 0.0;
00774 #endif
00775
00776 if (wisdom.flags & NFSFT_NO_DIRECT_ALGORITHM)
00777 {
00778 return;
00779 }
00780
00781
00782 memset(plan->f_hat,0U,plan->N_total*sizeof(double _Complex));
00783
00784
00785 if (plan->N == 0)
00786 {
00787
00788
00789
00790 for (m = 0; m < plan->M_total; m++)
00791 {
00792 plan->f_hat[NFSFT_INDEX(0,0,plan)] += plan->f[m];
00793 }
00794 }
00795 else
00796 {
00797
00798
00799 #ifdef _OPENMP
00800
00801 #pragma omp parallel for default(shared) private(n,n_abs,alpha,gamma,m,stheta,sphi,it2,it1,k,temp)
00802 for (n = -plan->N; n <= plan->N; n++)
00803 {
00804
00805 n_abs = abs(n);
00806
00807
00808 alpha = &(wisdom.alpha[ROW(n_abs)]);
00809 gamma = &(wisdom.gamma[ROW(n_abs)]);
00810
00811
00812 for (m = 0; m < plan->M_total; m++)
00813 {
00814
00815 stheta = cos(2.0*PI*plan->x[2*m+1]);
00816
00817 sphi = 2.0*PI*plan->x[2*m];
00818
00819
00820
00821
00822 it1 = plan->f[m] * wisdom.gamma[ROW(n_abs)] *
00823 pow(1 - stheta * stheta, 0.5*n_abs) * cexp(-_Complex_I*n*sphi);
00824 plan->f_hat[NFSFT_INDEX(n_abs,n,plan)] += it1;
00825 it2 = 0.0;
00826
00827 if (n_abs < plan->N)
00828 {
00829 it2 = it1 * wisdom.alpha[ROWK(n_abs)+1] * stheta;
00830 plan->f_hat[NFSFT_INDEX(n_abs+1,n,plan)] += it2;
00831 }
00832
00833
00834 for (k = n_abs+2; k <= plan->N; k++)
00835 {
00836 temp = it2;
00837 it2 = alpha[k] * stheta * it2 + gamma[k] * it1;
00838 it1 = temp;
00839 plan->f_hat[NFSFT_INDEX(k,n,plan)] += it2;
00840 }
00841 }
00842 }
00843 #else
00844
00845 for (m = 0; m < plan->M_total; m++)
00846 {
00847
00848 stheta = cos(2.0*PI*plan->x[2*m+1]);
00849
00850 sphi = 2.0*PI*plan->x[2*m];
00851
00852
00853 for (n = -plan->N; n <= plan->N; n++)
00854 {
00855
00856 n_abs = abs(n);
00857
00858
00859 alpha = &(wisdom.alpha[ROW(n_abs)]);
00860 gamma = &(wisdom.gamma[ROW(n_abs)]);
00861
00862
00863
00864
00865 it1 = plan->f[m] * wisdom.gamma[ROW(n_abs)] *
00866 pow(1 - stheta * stheta, 0.5*n_abs) * cexp(-_Complex_I*n*sphi);
00867 plan->f_hat[NFSFT_INDEX(n_abs,n,plan)] += it1;
00868 it2 = 0.0;
00869
00870 if (n_abs < plan->N)
00871 {
00872 it2 = it1 * wisdom.alpha[ROWK(n_abs)+1] * stheta;
00873 plan->f_hat[NFSFT_INDEX(n_abs+1,n,plan)] += it2;
00874 }
00875
00876
00877 for (k = n_abs+2; k <= plan->N; k++)
00878 {
00879 temp = it2;
00880 it2 = alpha[k] * stheta * it2 + gamma[k] * it1;
00881 it1 = temp;
00882 plan->f_hat[NFSFT_INDEX(k,n,plan)] += it2;
00883 }
00884 }
00885 }
00886 #endif
00887 }
00888
00889
00890
00891
00892 if (plan->flags & NFSFT_NORMALIZED)
00893 {
00894
00895 #pragma omp parallel for default(shared) private(k,n)
00896 for (k = 0; k <= plan->N; k++)
00897 {
00898 for (n = -k; n <= k; n++)
00899 {
00900
00901 plan->f_hat[NFSFT_INDEX(k,n,plan)] *=
00902 sqrt((2*k+1)/(4.0*PI));
00903 }
00904 }
00905 }
00906
00907
00908 if (plan->flags & NFSFT_ZERO_F_HAT)
00909 {
00910 for (n = -plan->N; n <= plan->N+1; n++)
00911 {
00912 memset(&plan->f_hat[NFSFT_INDEX(-plan->N-1,n,plan)],0U,
00913 (plan->N+1+abs(n))*sizeof(double _Complex));
00914 }
00915 }
00916 }
00917
00918 void nfsft_trafo(nfsft_plan *plan)
00919 {
00920 int k;
00921 int n;
00922 #ifdef MEASURE_TIME
00923 ticks t0, t1;
00924 #endif
00925 #ifdef DEBUG
00926 double t, t_pre, t_nfft, t_fpt, t_c2e, t_norm;
00927 t_pre = 0.0;
00928 t_norm = 0.0;
00929 t_fpt = 0.0;
00930 t_c2e = 0.0;
00931 t_nfft = 0.0;
00932 #endif
00933
00934 #ifdef MEASURE_TIME
00935 plan->MEASURE_TIME_t[0] = 0.0;
00936 plan->MEASURE_TIME_t[1] = 0.0;
00937 plan->MEASURE_TIME_t[2] = 0.0;
00938 #endif
00939
00940 if (wisdom.flags & NFSFT_NO_FAST_ALGORITHM)
00941 {
00942 return;
00943 }
00944
00945
00946
00947
00948 if (wisdom.initialized == 0 || plan->N > wisdom.N_MAX)
00949 {
00950 return;
00951 }
00952
00953
00954 if (plan->N < NFSFT_BREAK_EVEN)
00955 {
00956
00957 nfsft_trafo_direct(plan);
00958 }
00959
00960
00961 else if (plan->N <= wisdom.N_MAX)
00962 {
00963
00964 if (plan->flags & NFSFT_PRESERVE_F_HAT)
00965 {
00966 memcpy(plan->f_hat_intern,plan->f_hat,plan->N_total*
00967 sizeof(double _Complex));
00968 }
00969 else
00970 {
00971 plan->f_hat_intern = plan->f_hat;
00972 }
00973
00974
00975
00976
00977 plan->plan_nfft.x = plan->x;
00978 plan->plan_nfft.f = plan->f;
00979 plan->plan_nfft.f_hat = plan->f_hat_intern;
00980
00981
00982
00983
00984 if (plan->flags & NFSFT_NORMALIZED)
00985 {
00986
00987 #pragma omp parallel for default(shared) private(k,n)
00988 for (k = 0; k <= plan->N; k++)
00989 {
00990 for (n = -k; n <= k; n++)
00991 {
00992
00993 plan->f_hat_intern[NFSFT_INDEX(k,n,plan)] *=
00994 sqrt((2*k+1)/(4.0*PI));
00995 }
00996 }
00997 }
00998
00999 #ifdef MEASURE_TIME
01000 t0 = getticks();
01001 #endif
01002
01003 if (plan->flags & NFSFT_USE_DPT)
01004 {
01005 #ifdef _OPENMP
01006 #pragma omp parallel for default(shared) private(n) num_threads(wisdom.nthreads)
01007 for (n = -plan->N; n <= plan->N; n++)
01008 fpt_trafo_direct(wisdom.set_threads[omp_get_thread_num()],abs(n),
01009 &plan->f_hat_intern[NFSFT_INDEX(abs(n),n,plan)],
01010 &plan->f_hat_intern[NFSFT_INDEX(0,n,plan)],
01011 plan->N,0U);
01012 #else
01013
01014 for (n = -plan->N; n <= plan->N; n++)
01015 {
01016
01017
01018 fpt_trafo_direct(wisdom.set,abs(n),
01019 &plan->f_hat_intern[NFSFT_INDEX(abs(n),n,plan)],
01020 &plan->f_hat_intern[NFSFT_INDEX(0,n,plan)],
01021 plan->N,0U);
01022 }
01023 #endif
01024 }
01025 else
01026 {
01027 #ifdef _OPENMP
01028 #pragma omp parallel for default(shared) private(n) num_threads(wisdom.nthreads)
01029 for (n = -plan->N; n <= plan->N; n++)
01030 fpt_trafo(wisdom.set_threads[omp_get_thread_num()],abs(n),
01031 &plan->f_hat_intern[NFSFT_INDEX(abs(n),n,plan)],
01032 &plan->f_hat_intern[NFSFT_INDEX(0,n,plan)],
01033 plan->N,0U);
01034 #else
01035
01036 for (n = -plan->N; n <= plan->N; n++)
01037 {
01038
01039
01040 fpt_trafo(wisdom.set,abs(n),
01041 &plan->f_hat_intern[NFSFT_INDEX(abs(n),n,plan)],
01042 &plan->f_hat_intern[NFSFT_INDEX(0,n,plan)],
01043 plan->N,0U);
01044 }
01045 #endif
01046 }
01047 #ifdef MEASURE_TIME
01048 t1 = getticks();
01049 plan->MEASURE_TIME_t[0] = nfft_elapsed_seconds(t1,t0);
01050 #endif
01051
01052 #ifdef MEASURE_TIME
01053 t0 = getticks();
01054 #endif
01055
01056 c2e(plan);
01057 #ifdef MEASURE_TIME
01058 t1 = getticks();
01059 plan->MEASURE_TIME_t[1] = nfft_elapsed_seconds(t1,t0);
01060 #endif
01061
01062 #ifdef MEASURE_TIME
01063 t0 = getticks();
01064 #endif
01065
01066
01067
01068 if (plan->flags & NFSFT_USE_NDFT)
01069 {
01070
01071 nfft_trafo_direct(&plan->plan_nfft);
01072 }
01073 else
01074 {
01075
01076
01077 nfft_trafo_2d(&plan->plan_nfft);
01078 }
01079 #ifdef MEASURE_TIME
01080 t1 = getticks();
01081 plan->MEASURE_TIME_t[2] = nfft_elapsed_seconds(t1,t0);
01082 #endif
01083 }
01084 }
01085
01086 void nfsft_adjoint(nfsft_plan *plan)
01087 {
01088 int k;
01089 int n;
01090 #ifdef MEASURE_TIME
01091 ticks t0, t1;
01092 #endif
01093
01094 #ifdef MEASURE_TIME
01095 plan->MEASURE_TIME_t[0] = 0.0;
01096 plan->MEASURE_TIME_t[1] = 0.0;
01097 plan->MEASURE_TIME_t[2] = 0.0;
01098 #endif
01099
01100 if (wisdom.flags & NFSFT_NO_FAST_ALGORITHM)
01101 {
01102 return;
01103 }
01104
01105
01106
01107
01108 if (wisdom.initialized == 0 || plan->N > wisdom.N_MAX)
01109 {
01110 return;
01111 }
01112
01113
01114 if (plan->N < NFSFT_BREAK_EVEN)
01115 {
01116
01117 nfsft_adjoint_direct(plan);
01118 }
01119
01120 else if (plan->N <= wisdom.N_MAX)
01121 {
01122
01123
01124
01125
01126
01127 plan->plan_nfft.x = plan->x;
01128 plan->plan_nfft.f = plan->f;
01129 plan->plan_nfft.f_hat = plan->f_hat;
01130
01131 #ifdef MEASURE_TIME
01132 t0 = getticks();
01133 #endif
01134
01135
01136
01137 if (plan->flags & NFSFT_USE_NDFT)
01138 {
01139
01140
01141
01142 nfft_adjoint_direct(&plan->plan_nfft);
01143 }
01144 else
01145 {
01146
01147
01148
01149
01150 nfft_adjoint_2d(&plan->plan_nfft);
01151 }
01152 #ifdef MEASURE_TIME
01153 t1 = getticks();
01154 plan->MEASURE_TIME_t[2] = nfft_elapsed_seconds(t1,t0);
01155 #endif
01156
01157
01158
01159 #ifdef MEASURE_TIME
01160 t0 = getticks();
01161 #endif
01162
01163 c2e_transposed(plan);
01164 #ifdef MEASURE_TIME
01165 t1 = getticks();
01166 plan->MEASURE_TIME_t[1] = nfft_elapsed_seconds(t1,t0);
01167 #endif
01168
01169 #ifdef MEASURE_TIME
01170 t0 = getticks();
01171 #endif
01172
01173 if (plan->flags & NFSFT_USE_DPT)
01174 {
01175 #ifdef _OPENMP
01176 #pragma omp parallel for default(shared) private(n) num_threads(wisdom.nthreads)
01177 for (n = -plan->N; n <= plan->N; n++)
01178 fpt_transposed_direct(wisdom.set_threads[omp_get_thread_num()],abs(n),
01179 &plan->f_hat[NFSFT_INDEX(abs(n),n,plan)],
01180 &plan->f_hat[NFSFT_INDEX(0,n,plan)],
01181 plan->N,0U);
01182 #else
01183
01184 for (n = -plan->N; n <= plan->N; n++)
01185 {
01186
01187
01188 fpt_transposed_direct(wisdom.set,abs(n),
01189 &plan->f_hat[NFSFT_INDEX(abs(n),n,plan)],
01190 &plan->f_hat[NFSFT_INDEX(0,n,plan)],
01191 plan->N,0U);
01192 }
01193 #endif
01194 }
01195 else
01196 {
01197 #ifdef _OPENMP
01198 #pragma omp parallel for default(shared) private(n) num_threads(wisdom.nthreads)
01199 for (n = -plan->N; n <= plan->N; n++)
01200 fpt_transposed(wisdom.set_threads[omp_get_thread_num()],abs(n),
01201 &plan->f_hat[NFSFT_INDEX(abs(n),n,plan)],
01202 &plan->f_hat[NFSFT_INDEX(0,n,plan)],
01203 plan->N,0U);
01204 #else
01205
01206
01207 for (n = -plan->N; n <= plan->N; n++)
01208 {
01209
01210
01211 fpt_transposed(wisdom.set,abs(n),
01212 &plan->f_hat[NFSFT_INDEX(abs(n),n,plan)],
01213 &plan->f_hat[NFSFT_INDEX(0,n,plan)],
01214 plan->N,0U);
01215 }
01216 #endif
01217 }
01218 #ifdef MEASURE_TIME
01219 t1 = getticks();
01220 plan->MEASURE_TIME_t[0] = nfft_elapsed_seconds(t1,t0);
01221 #endif
01222
01223
01224
01225
01226 if (plan->flags & NFSFT_NORMALIZED)
01227 {
01228
01229
01230
01231 #pragma omp parallel for default(shared) private(k,n)
01232 for (k = 0; k <= plan->N; k++)
01233 {
01234 for (n = -k; n <= k; n++)
01235 {
01236
01237 plan->f_hat[NFSFT_INDEX(k,n,plan)] *=
01238 sqrt((2*k+1)/(4.0*PI));
01239 }
01240 }
01241 }
01242
01243
01244 if (plan->flags & NFSFT_ZERO_F_HAT)
01245 {
01246
01247
01248 for (n = -plan->N; n <= plan->N+1; n++)
01249 {
01250 memset(&plan->f_hat[NFSFT_INDEX(-plan->N-1,n,plan)],0U,
01251 (plan->N+1+abs(n))*sizeof(double _Complex));
01252 }
01253 }
01254
01255
01256 }
01257 }
01258
01259 void nfsft_precompute_x(nfsft_plan *plan)
01260 {
01261
01262 plan->plan_nfft.x = plan->x;
01263
01264
01265 if (plan->plan_nfft.nfft_flags & PRE_ONE_PSI)
01266 nfft_precompute_one_psi(&plan->plan_nfft);
01267 }
01268