00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00026 #include <stdio.h>
00027 #include <math.h>
00028 #include <string.h>
00029 #include <stdlib.h>
00030
00031 #include <complex.h>
00032
00033 #include "nfft3util.h"
00034 #include "nfft3.h"
00035 #include "infft.h"
00036
00059 #define MACRO_ndft_init_result_trafo \
00060 memset(f,0,ths->M_total*sizeof(double _Complex));
00061 #define MACRO_ndft_init_result_conjugated MACRO_ndft_init_result_trafo
00062 #define MACRO_ndft_init_result_adjoint \
00063 memset(f_hat,0,ths->N_total*sizeof(double _Complex));
00064 #define MACRO_ndft_init_result_transposed MACRO_ndft_init_result_adjoint
00065
00066 #define MACRO_ndft_sign_trafo K2PI*ths->x[j*ths->d+t]
00067 #define MACRO_ndft_sign_conjugated -K2PI*ths->x[j*ths->d+t]
00068 #define MACRO_ndft_sign_adjoint K2PI*ths->x[j*ths->d+t]
00069 #define MACRO_ndft_sign_transposed -K2PI*ths->x[j*ths->d+t]
00070
00071 #define MACRO_init_k_N_Omega_x(which_one) \
00072 { \
00073 for(t = 0; t < ths->d; t++) \
00074 { \
00075 k[t] = -ths->N[t]/2; \
00076 x[t] = MACRO_ndft_sign_ ## which_one; \
00077 Omega[t+1] = k[t]*x[t]+Omega[t]; \
00078 } \
00079 omega=Omega[ths->d]; \
00080 } \
00081
00082 #define MACRO_count_k_N_Omega \
00083 { \
00084 for(t = ths->d-1; (t >= 1) && (k[t] == ths->N[t]/2-1); t--) \
00085 k[t]-= ths->N[t]-1; \
00086 \
00087 k[t]++; \
00088 \
00089 for(t2 = t; t2 < ths->d; t2++) \
00090 Omega[t2+1] = k[t2]*x[t2]+Omega[t2]; \
00091 \
00092 omega = Omega[ths->d]; \
00093 }
00094
00095 #define MACRO_ndft_compute_trafo f[j] += f_hat[k_L]*cexp(-_Complex_I*omega);
00096
00097 #define MACRO_ndft_compute_conjugated MACRO_ndft_compute_trafo
00098
00099 #define MACRO_ndft_compute_adjoint f_hat[k_L] += f[j]*cexp(+ _Complex_I*omega);
00100
00101 #define MACRO_ndft_compute_transposed MACRO_ndft_compute_adjoint
00102
00103 #if defined(HAVE_LIBDISPATCH)
00104 #define FOR(VAR,VAL) \
00105 dispatch_apply(VAL, dispatch_get_global_queue(0, 0), ^(size_t VAR)
00106 #else
00107 #define FOR(VAR,VAL) \
00108 int VAR; \
00109 for (VAR = 0; VAR < VAL; VAR++)
00110 #endif
00111
00112 #if defined(HAVE_LIBDISPATCH)
00113 #define END_FOR );
00114 #else
00115 #define END_FOR
00116 #endif
00117
00118 #define MACRO_ndft(which_one) \
00119 void ndft_ ## which_one (nfft_plan *ths) \
00120 { \
00121 C *f_hat = ths->f_hat, *f = ths->f; \
00122 \
00123 MACRO_ndft_init_result_ ## which_one \
00124 \
00125 if(ths->d == 1) \
00126 { \
00127 const int t = 0; \
00128 FOR(j,ths->M_total) \
00129 { \
00130 int k_L; \
00131 for(k_L = 0; k_L < ths->N_total; k_L++) \
00132 { \
00133 R omega = (k_L - ths->N_total/2) * MACRO_ndft_sign_ ## which_one; \
00134 MACRO_ndft_compute_ ## which_one; \
00135 } \
00136 } \
00137 END_FOR \
00138 } \
00139 else \
00140 { \
00141 double _Complex *f_hat_k; \
00142 FOR(j,ths->M_total) \
00143 { \
00144 int t, t2, k_L; \
00145 double x[ths->d]; \
00146 int k[ths->d]; \
00147 double omega, Omega[ths->d+1]; \
00148 Omega[0] = K(0.0); \
00149 MACRO_init_k_N_Omega_x(which_one); \
00150 for(k_L = 0; k_L < ths->N_total; k_L++) \
00151 { \
00152 MACRO_ndft_compute_ ## which_one; \
00153 MACRO_count_k_N_Omega; \
00154 } \
00155 } \
00156 END_FOR \
00157 } \
00158 }
00159
00160
00163 MACRO_ndft(trafo)
00164 MACRO_ndft(adjoint)
00165
00191 static void nfft_uo(const nfft_plan *ths,const int j,int *up,int *op,const int act_dim)
00192 {
00193 double xj=ths->x[j*ths->d+act_dim];
00194 int c = LRINT(xj * ths->n[act_dim]);
00195
00196 if(xj < 0)
00197 {
00198 (*up) = c-1-(ths->m);
00199 (*op) = c +(ths->m);
00200 }
00201 else
00202 {
00203 (*up) = c -(ths->m);
00204 (*op) = c+1+(ths->m);
00205 }
00206 }
00207
00208 static void nfft_uo2(int *u, int *o, const double x, const int n, const int m)
00209 {
00210 int c = LRINT(x * n);
00211
00212 if(x < 0)
00213 {
00214 *u=(c-1-m+n)%n;
00215 *o=(c+ m+n)%n;
00216 }
00217 else
00218 {
00219 *u=(c -m+n)%n;
00220 *o=(c+1+m+n)%n;
00221 }
00222 }
00223
00224
00225 #define MACRO_nfft_D_compute_A { \
00226 g_hat[k_plain[ths->d]] = f_hat[ks_plain[ths->d]] * c_phi_inv_k[ths->d]; \
00227 }
00228
00229 #define MACRO_nfft_D_compute_T { \
00230 f_hat[ks_plain[ths->d]] = g_hat[k_plain[ths->d]] * c_phi_inv_k[ths->d]; \
00231 }
00232
00233 #define MACRO_nfft_D_init_result_A memset(g_hat,0,ths->n_total* \
00234 sizeof(double _Complex));
00235 #define MACRO_nfft_D_init_result_T memset(f_hat,0,ths->N_total* \
00236 sizeof(double _Complex));
00237
00238 #define MACRO_with_PRE_PHI_HUT * ths->c_phi_inv[t2][ks[t2]];
00239 #define MACRO_without_PRE_PHI_HUT / (PHI_HUT(ks[t2]-ths->N[t2]/2,t2));
00240
00241 #define MACRO_init_k_ks { \
00242 for(t = ths->d-1; t>=0; t--) \
00243 { \
00244 kp[t]= 0; \
00245 k[t] = 0; \
00246 ks[t] = ths->N[t]/2; \
00247 } \
00248 t++; \
00249 }
00250
00251 #define MACRO_update_c_phi_inv_k(which_one) { \
00252 for(t2=t; t2<ths->d; t2++) \
00253 { \
00254 c_phi_inv_k[t2+1]= c_phi_inv_k[t2] MACRO_ ##which_one; \
00255 ks_plain[t2+1]= ks_plain[t2]*ths->N[t2]+ks[t2]; \
00256 k_plain[t2+1]= k_plain[t2]*ths->n[t2]+k[t2]; \
00257 } \
00258 }
00259
00260 #define MACRO_count_k_ks { \
00261 for(t=ths->d-1; (t>0)&& (kp[t]==ths->N[t]-1); t--) \
00262 { \
00263 kp[t]= 0; \
00264 k[t]= 0; \
00265 ks[t]= ths->N[t]/2; \
00266 } \
00267 \
00268 kp[t]++; k[t]++; ks[t]++; \
00269 if(kp[t]==ths->N[t]/2) \
00270 { \
00271 k[t]= ths->n[t]-ths->N[t]/2; \
00272 ks[t]= 0; \
00273 } \
00274 } \
00275
00276
00280 #define MACRO_nfft_D(which_one) \
00281 static inline void nfft_D_ ## which_one (nfft_plan *ths) \
00282 { \
00283 int t, t2; \
00284 int k_L; \
00285 int kp[ths->d]; \
00286 int k[ths->d]; \
00287 int ks[ths->d]; \
00288 double c_phi_inv_k[ths->d+1]; \
00289 int k_plain[ths->d+1]; \
00290 int ks_plain[ths->d+1]; \
00291 double _Complex *f_hat, *g_hat; \
00292 \
00293 f_hat=ths->f_hat; g_hat=ths->g_hat; \
00294 MACRO_nfft_D_init_result_ ## which_one; \
00295 \
00296 c_phi_inv_k[0]=1; \
00297 k_plain[0]=0; \
00298 ks_plain[0]=0; \
00299 \
00300 if(ths->nfft_flags & PRE_PHI_HUT) \
00301 { \
00302 MACRO_init_k_ks; \
00303 \
00304 for(k_L=0; k_L<ths->N_total; k_L++) \
00305 { \
00306 MACRO_update_c_phi_inv_k(with_PRE_PHI_HUT); \
00307 \
00308 MACRO_nfft_D_compute_ ## which_one; \
00309 \
00310 MACRO_count_k_ks; \
00311 } \
00312 } \
00313 else \
00314 { \
00315 MACRO_init_k_ks; \
00316 \
00317 for(k_L=0; k_L<ths->N_total; k_L++) \
00318 { \
00319 MACRO_update_c_phi_inv_k(without_PRE_PHI_HUT); \
00320 \
00321 MACRO_nfft_D_compute_ ## which_one; \
00322 \
00323 MACRO_count_k_ks; \
00324 } \
00325 } \
00326 }
00327
00328 MACRO_nfft_D(A)
00329 MACRO_nfft_D(T)
00330
00334 #define MACRO_nfft_B_init_result_A memset(f,0,ths->M_total* \
00335 sizeof(double _Complex));
00336 #define MACRO_nfft_B_init_result_T memset(g,0,ths->n_total* \
00337 sizeof(double _Complex));
00338
00339 #define MACRO_nfft_B_PRE_FULL_PSI_compute_A { \
00340 (*fj) += ths->psi[ix] * g[ths->psi_index_g[ix]]; \
00341 }
00342
00343 #define MACRO_nfft_B_PRE_FULL_PSI_compute_T { \
00344 g[ths->psi_index_g[ix]] += ths->psi[ix] * (*fj); \
00345 }
00346
00347 #define MACRO_nfft_B_compute_A { \
00348 (*fj) += phi_prod[ths->d] * g[ll_plain[ths->d]]; \
00349 }
00350
00351 #define MACRO_nfft_B_compute_T { \
00352 g[ll_plain[ths->d]] += phi_prod[ths->d] * (*fj); \
00353 }
00354
00355 #define MACRO_with_FG_PSI fg_psi[t2][lj[t2]]
00356
00357 #define MACRO_with_PRE_PSI ths->psi[(j*ths->d+t2)*(2*ths->m+2)+lj[t2]]
00358
00359 #define MACRO_without_PRE_PSI PHI(ths->x[j*ths->d+t2]- \
00360 ((double)l[t2])/ths->n[t2], t2)
00361
00362 #define MACRO_init_uo_l_lj_t { \
00363 for(t = ths->d-1; t>=0; t--) \
00364 { \
00365 nfft_uo(ths,j,&u[t],&o[t],t); \
00366 l[t] = u[t]; \
00367 lj[t] = 0; \
00368 } \
00369 t++; \
00370 }
00371
00372 #define MACRO_update_phi_prod_ll_plain(which_one) { \
00373 for(t2=t; t2<ths->d; t2++) \
00374 { \
00375 phi_prod[t2+1]=phi_prod[t2]* MACRO_ ## which_one; \
00376 ll_plain[t2+1]=ll_plain[t2]*ths->n[t2] +(l[t2]+ths->n[t2])%ths->n[t2]; \
00377 } \
00378 }
00379
00380 #define MACRO_count_uo_l_lj_t { \
00381 for(t = ths->d-1; (t>0)&&(l[t]==o[t]); t--) \
00382 { \
00383 l[t] = u[t]; \
00384 lj[t] = 0; \
00385 } \
00386 \
00387 l[t]++; \
00388 lj[t]++; \
00389 }
00390
00391 #define MACRO_nfft_B(which_one) \
00392 static inline void nfft_B_ ## which_one (nfft_plan *ths) \
00393 { \
00394 int lprod; \
00395 int u[ths->d], o[ths->d]; \
00396 int t, t2; \
00397 int j; \
00398 int l_L, ix; \
00399 int l[ths->d]; \
00400 int lj[ths->d]; \
00401 int ll_plain[ths->d+1]; \
00402 double phi_prod[ths->d+1]; \
00403 double _Complex *f, *g; \
00404 double _Complex *fj; \
00405 double y[ths->d]; \
00406 double fg_psi[ths->d][2*ths->m+2]; \
00407 double fg_exp_l[ths->d][2*ths->m+2]; \
00408 int l_fg,lj_fg; \
00409 double tmpEXP1, tmpEXP2, tmpEXP2sq, tmp1, tmp2, tmp3; \
00410 double ip_w; \
00411 int ip_u; \
00412 int ip_s=ths->K/(ths->m+2); \
00413 \
00414 f=ths->f; g=ths->g; \
00415 \
00416 MACRO_nfft_B_init_result_ ## which_one; \
00417 \
00418 if(ths->nfft_flags & PRE_FULL_PSI) \
00419 { \
00420 for(ix=0, j=0, fj=f; j<ths->M_total; j++, fj++) \
00421 for(l_L=0; l_L<ths->psi_index_f[j]; l_L++, ix++) \
00422 MACRO_nfft_B_PRE_FULL_PSI_compute_ ## which_one; \
00423 return; \
00424 } \
00425 \
00426 phi_prod[0]=1; \
00427 ll_plain[0]=0; \
00428 \
00429 for(t=0,lprod = 1; t<ths->d; t++) \
00430 lprod *= (2*ths->m+2); \
00431 \
00432 if(ths->nfft_flags & PRE_PSI) \
00433 { \
00434 for(j=0, fj=f; j<ths->M_total; j++, fj++) \
00435 { \
00436 MACRO_init_uo_l_lj_t; \
00437 \
00438 for(l_L=0; l_L<lprod; l_L++) \
00439 { \
00440 MACRO_update_phi_prod_ll_plain(with_PRE_PSI); \
00441 \
00442 MACRO_nfft_B_compute_ ## which_one; \
00443 \
00444 MACRO_count_uo_l_lj_t; \
00445 } \
00446 } \
00447 return; \
00448 } \
00449 \
00450 if(ths->nfft_flags & PRE_FG_PSI) \
00451 { \
00452 for(t2=0; t2<ths->d; t2++) \
00453 { \
00454 tmpEXP2 = exp(-1.0/ths->b[t2]); \
00455 tmpEXP2sq = tmpEXP2*tmpEXP2; \
00456 tmp2 = 1.0; \
00457 tmp3 = 1.0; \
00458 fg_exp_l[t2][0] = 1.0; \
00459 for(lj_fg=1; lj_fg <= (2*ths->m+2); lj_fg++) \
00460 { \
00461 tmp3 = tmp2*tmpEXP2; \
00462 tmp2 *= tmpEXP2sq; \
00463 fg_exp_l[t2][lj_fg] = fg_exp_l[t2][lj_fg-1]*tmp3; \
00464 } \
00465 } \
00466 for(j=0, fj=f; j<ths->M_total; j++, fj++) \
00467 { \
00468 MACRO_init_uo_l_lj_t; \
00469 \
00470 for(t2=0; t2<ths->d; t2++) \
00471 { \
00472 fg_psi[t2][0] = ths->psi[2*(j*ths->d+t2)]; \
00473 tmpEXP1 = ths->psi[2*(j*ths->d+t2)+1]; \
00474 tmp1 = 1.0; \
00475 for(l_fg=u[t2]+1, lj_fg=1; l_fg <= o[t2]; l_fg++, lj_fg++) \
00476 { \
00477 tmp1 *= tmpEXP1; \
00478 fg_psi[t2][lj_fg] = fg_psi[t2][0]*tmp1*fg_exp_l[t2][lj_fg]; \
00479 } \
00480 } \
00481 \
00482 for(l_L=0; l_L<lprod; l_L++) \
00483 { \
00484 MACRO_update_phi_prod_ll_plain(with_FG_PSI); \
00485 \
00486 MACRO_nfft_B_compute_ ## which_one; \
00487 \
00488 MACRO_count_uo_l_lj_t; \
00489 } \
00490 } \
00491 return; \
00492 } \
00493 \
00494 if(ths->nfft_flags & FG_PSI) \
00495 { \
00496 for(t2=0; t2<ths->d; t2++) \
00497 { \
00498 tmpEXP2 = exp(-1.0/ths->b[t2]); \
00499 tmpEXP2sq = tmpEXP2*tmpEXP2; \
00500 tmp2 = 1.0; \
00501 tmp3 = 1.0; \
00502 fg_exp_l[t2][0] = 1.0; \
00503 for(lj_fg=1; lj_fg <= (2*ths->m+2); lj_fg++) \
00504 { \
00505 tmp3 = tmp2*tmpEXP2; \
00506 tmp2 *= tmpEXP2sq; \
00507 fg_exp_l[t2][lj_fg] = fg_exp_l[t2][lj_fg-1]*tmp3; \
00508 } \
00509 } \
00510 for(j=0, fj=f; j<ths->M_total; j++, fj++) \
00511 { \
00512 MACRO_init_uo_l_lj_t; \
00513 \
00514 for(t2=0; t2<ths->d; t2++) \
00515 { \
00516 fg_psi[t2][0] = \
00517 (PHI((ths->x[j*ths->d+t2]-((double)u[t2])/ths->n[t2]),t2)); \
00518 \
00519 tmpEXP1 = exp(2.0*(ths->n[t2]*ths->x[j*ths->d+t2] - u[t2]) \
00520 / ths->b[t2]); \
00521 tmp1 = 1.0; \
00522 for(l_fg=u[t2]+1, lj_fg=1; l_fg <= o[t2]; l_fg++, lj_fg++) \
00523 { \
00524 tmp1 *= tmpEXP1; \
00525 fg_psi[t2][lj_fg] = fg_psi[t2][0]*tmp1*fg_exp_l[t2][lj_fg]; \
00526 } \
00527 } \
00528 \
00529 for(l_L=0; l_L<lprod; l_L++) \
00530 { \
00531 MACRO_update_phi_prod_ll_plain(with_FG_PSI); \
00532 \
00533 MACRO_nfft_B_compute_ ## which_one; \
00534 \
00535 MACRO_count_uo_l_lj_t; \
00536 } \
00537 } \
00538 return; \
00539 } \
00540 \
00541 \
00542 if(ths->nfft_flags & PRE_LIN_PSI) \
00543 { \
00544 for(j=0, fj=f; j<ths->M_total; j++, fj++) \
00545 { \
00546 MACRO_init_uo_l_lj_t; \
00547 \
00548 for(t2=0; t2<ths->d; t2++) \
00549 { \
00550 y[t2] = ((ths->n[t2]*ths->x[j*ths->d+t2]- \
00551 (double)u[t2]) * ((double)ths->K))/(ths->m+2); \
00552 ip_u = LRINT(floor(y[t2])); \
00553 ip_w = y[t2]-ip_u; \
00554 for(l_fg=u[t2], lj_fg=0; l_fg <= o[t2]; l_fg++, lj_fg++) \
00555 { \
00556 fg_psi[t2][lj_fg] = ths->psi[(ths->K+1)*t2+ \
00557 abs(ip_u-lj_fg*ip_s)]* \
00558 (1-ip_w) + \
00559 ths->psi[(ths->K+1)*t2+ \
00560 abs(ip_u-lj_fg*ip_s+1)]* \
00561 (ip_w); \
00562 } \
00563 } \
00564 \
00565 for(l_L=0; l_L<lprod; l_L++) \
00566 { \
00567 MACRO_update_phi_prod_ll_plain(with_FG_PSI); \
00568 \
00569 MACRO_nfft_B_compute_ ## which_one; \
00570 \
00571 MACRO_count_uo_l_lj_t; \
00572 } \
00573 } \
00574 return; \
00575 } \
00576 \
00577 \
00578 for(j=0, fj=f; j<ths->M_total; j++, fj++) \
00579 { \
00580 MACRO_init_uo_l_lj_t; \
00581 \
00582 for(l_L=0; l_L<lprod; l_L++) \
00583 { \
00584 MACRO_update_phi_prod_ll_plain(without_PRE_PSI); \
00585 \
00586 MACRO_nfft_B_compute_ ## which_one; \
00587 \
00588 MACRO_count_uo_l_lj_t; \
00589 } \
00590 } \
00591 } \
00592
00593 MACRO_nfft_B(A)
00594 MACRO_nfft_B(T)
00595
00596
00597
00598 static void nfft_1d_init_fg_exp_l(double *fg_exp_l, const int m, const double b)
00599 {
00600 int l;
00601 double fg_exp_b0, fg_exp_b1, fg_exp_b2, fg_exp_b0_sq;
00602
00603 fg_exp_b0 = exp(-1.0/b);
00604 fg_exp_b0_sq = fg_exp_b0*fg_exp_b0;
00605 fg_exp_b1 = 1.0;
00606 fg_exp_b2 = 1.0;
00607 fg_exp_l[0] = 1.0;
00608 for(l=1; l <= 2*m+1; l++)
00609 {
00610 fg_exp_b2 = fg_exp_b1*fg_exp_b0;
00611 fg_exp_b1 *= fg_exp_b0_sq;
00612 fg_exp_l[l] = fg_exp_l[l-1]*fg_exp_b2;
00613 }
00614 }
00615
00616 static void nfft_trafo_1d_compute(double _Complex *fj, const double _Complex *g,const double *psij_const, const double *xj, const int n, const int m)
00617 {
00618 int u,o,l;
00619 const double _Complex *gj;
00620 const double *psij;
00621 psij=psij_const;
00622
00623 nfft_uo2(&u,&o,*xj, n, m);
00624
00625 if(u<o)
00626 for(l=1,gj=g+u,(*fj)=(*psij++) * (*gj++); l<=2*m+1; l++)
00627 (*fj) += (*psij++) * (*gj++);
00628 else
00629 {
00630 for(l=1,gj=g+u,(*fj)=(*psij++) * (*gj++); l<2*m+1-o; l++)
00631 (*fj) += (*psij++) * (*gj++);
00632 for(l=0,gj=g; l<=o; l++)
00633 (*fj) += (*psij++) * (*gj++);
00634 }
00635 }
00636
00637 static void nfft_adjoint_1d_compute(const double _Complex *fj, double _Complex *g,const double *psij_const, const double *xj, const int n, const int m)
00638 {
00639 int u,o,l;
00640 double _Complex *gj;
00641 const double *psij;
00642 psij=psij_const;
00643
00644 nfft_uo2(&u,&o,*xj, n, m);
00645
00646 if(u<o)
00647 for(l=0,gj=g+u; l<=2*m+1; l++)
00648 (*gj++) += (*psij++) * (*fj);
00649 else
00650 {
00651 for(l=0,gj=g+u; l<2*m+1-o; l++)
00652 (*gj++) += (*psij++) * (*fj);
00653 for(l=0,gj=g; l<=o; l++)
00654 (*gj++) += (*psij++) * (*fj);
00655 }
00656 }
00657
00658 static void nfft_trafo_1d_B(nfft_plan *ths)
00659 {
00660 int n,N,u,o,j,M,l,m, *psi_index_g,K,ip_s,ip_u;
00661 double _Complex *fj,*g;
00662 double *psij, *psij_const, *xj, ip_y,ip_w;
00663
00664 double *fg_exp_l, fg_psij0, fg_psij1, fg_psij2;
00665
00666 N=ths->N[0];
00667 n=ths->n[0];
00668 M=ths->M_total;
00669 m=ths->m;
00670
00671 g=ths->g;
00672
00673
00674 if(ths->nfft_flags & PRE_FULL_PSI)
00675 {
00676 psi_index_g=ths->psi_index_g;
00677 for(j=0, fj=ths->f, psij=ths->psi; j<M; j++, fj++)
00678 for(l=1, (*fj)=(*psij++) * g[(*psi_index_g++)]; l<=2*m+1; l++)
00679 (*fj) += (*psij++) * g[(*psi_index_g++)];
00680 return;
00681 }
00682
00683 if(ths->nfft_flags & PRE_PSI)
00684 {
00685 for(j=0,fj=ths->f,xj=ths->x; j<M; j++,fj++,xj++)
00686 nfft_trafo_1d_compute(fj, g, ths->psi+j*(2*m+2), xj, n, m);
00687 return;
00688 }
00689
00690 if(ths->nfft_flags & PRE_FG_PSI)
00691 {
00692 psij_const=(double*)nfft_malloc((2*m+2)*sizeof(double));
00693 fg_exp_l=(double*)nfft_malloc((2*m+2)*sizeof(double));
00694
00695 nfft_1d_init_fg_exp_l(fg_exp_l, m, ths->b[0]);
00696
00697 for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj++)
00698 {
00699 fg_psij0 = ths->psi[2*j];
00700 fg_psij1 = ths->psi[2*j+1];
00701 fg_psij2 = 1.0;
00702 psij_const[0] = fg_psij0;
00703 for(l=1; l<=2*m+1; l++)
00704 {
00705 fg_psij2 *= fg_psij1;
00706 psij_const[l] = fg_psij0*fg_psij2*fg_exp_l[l];
00707 }
00708
00709 nfft_trafo_1d_compute(fj, g, psij_const, xj, n, m);
00710 }
00711 nfft_free(fg_exp_l);
00712 nfft_free(psij_const);
00713 return;
00714 }
00715
00716 if(ths->nfft_flags & FG_PSI)
00717 {
00718 psij_const=(double*)nfft_malloc((2*m+2)*sizeof(double));
00719 fg_exp_l=(double*)nfft_malloc((2*m+2)*sizeof(double));
00720
00721 nfft_1d_init_fg_exp_l(fg_exp_l, m, ths->b[0]);
00722
00723 for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj++)
00724 {
00725 nfft_uo(ths,j,&u,&o,0);
00726 fg_psij0 = (PHI(*xj-((double)u)/n,0));
00727 fg_psij1 = exp(2.0*(n*(*xj) - u)/ths->b[0]);
00728 fg_psij2 = 1.0;
00729 psij_const[0] = fg_psij0;
00730 for(l=1; l<=2*m+1; l++)
00731 {
00732 fg_psij2 *= fg_psij1;
00733 psij_const[l] = fg_psij0*fg_psij2*fg_exp_l[l];
00734 }
00735
00736 nfft_trafo_1d_compute(fj, g, psij_const, xj, n, m);
00737 }
00738 nfft_free(fg_exp_l);
00739 nfft_free(psij_const);
00740 return;
00741 }
00742
00743 if(ths->nfft_flags & PRE_LIN_PSI)
00744 {
00745 psij_const=(double*)nfft_malloc((2*m+2)*sizeof(double));
00746 K=ths->K;
00747 ip_s=K/(m+2);
00748
00749 for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj++)
00750 {
00751 nfft_uo(ths,j,&u,&o,0);
00752
00753 ip_y = fabs(n*(*xj) - u)*((double)ip_s);
00754 ip_u = LRINT(floor(ip_y));
00755 ip_w = ip_y-ip_u;
00756 for(l=0; l < 2*m+2; l++)
00757 psij_const[l] = ths->psi[abs(ip_u-l*ip_s)]*(1.0-ip_w) +
00758 ths->psi[abs(ip_u-l*ip_s+1)]*(ip_w);
00759
00760 nfft_trafo_1d_compute(fj, g, psij_const, xj, n, m);
00761 }
00762 nfft_free(psij_const);
00763 return;
00764 }
00765
00766
00767 psij_const=(double*)nfft_malloc((2*m+2)*sizeof(double));
00768 for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj++)
00769 {
00770 nfft_uo(ths,j,&u,&o,0);
00771
00772 for(l=0;l<=2*m+1;l++)
00773 psij_const[l]=(PHI(*xj-((double)((u+l)))/n,0));
00774
00775 nfft_trafo_1d_compute(fj, g, psij_const, xj, n, m);
00776 }
00777 nfft_free(psij_const);
00778 }
00779
00780 static void nfft_adjoint_1d_B(nfft_plan *ths)
00781 {
00782 int n,u,o,j,M,l,m, *psi_index_g,K,ip_s,ip_u;
00783 double _Complex *fj,*g;
00784 double *psij, *psij_const, *xj, ip_y, ip_w;
00785 double *fg_exp_l, fg_psij0, fg_psij1, fg_psij2;
00786
00787 n=ths->n[0];
00788 M=ths->M_total;
00789 m=ths->m;
00790
00791 g=ths->g;
00792 memset(g,0,ths->n_total*sizeof(double _Complex));
00793
00794 if(ths->nfft_flags & PRE_FULL_PSI)
00795 {
00796 psi_index_g=ths->psi_index_g;
00797 for(j=0, fj=ths->f, psij=ths->psi; j<M; j++, fj++)
00798 for(l=0; l<=2*m+1; l++)
00799 g[*psi_index_g++] += (*psij++) * (*fj);
00800 return;
00801 }
00802
00803 if(ths->nfft_flags & PRE_PSI)
00804 {
00805 for(j=0,fj=ths->f,xj=ths->x; j<M; j++,fj++,xj++)
00806 nfft_adjoint_1d_compute(fj, g, ths->psi+j*(2*m+2), xj, n, m);
00807 return;
00808 }
00809
00810 if(ths->nfft_flags & PRE_FG_PSI)
00811 {
00812 psij_const=(double*)nfft_malloc((2*m+2)*sizeof(double));
00813 fg_exp_l=(double*)nfft_malloc((2*m+2)*sizeof(double));
00814
00815 nfft_1d_init_fg_exp_l(fg_exp_l, m, ths->b[0]);
00816
00817 for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj++)
00818 {
00819 fg_psij0 = ths->psi[2*j];
00820 fg_psij1 = ths->psi[2*j+1];
00821 fg_psij2 = 1.0;
00822 psij_const[0] = fg_psij0;
00823 for(l=1; l<=2*m+1; l++)
00824 {
00825 fg_psij2 *= fg_psij1;
00826 psij_const[l] = fg_psij0*fg_psij2*fg_exp_l[l];
00827 }
00828
00829 nfft_adjoint_1d_compute(fj, g, psij_const, xj, n, m);
00830 }
00831 nfft_free(fg_exp_l);
00832 nfft_free(psij_const);
00833 return;
00834 }
00835
00836 if(ths->nfft_flags & FG_PSI)
00837 {
00838 psij_const=(double*)nfft_malloc((2*m+2)*sizeof(double));
00839 fg_exp_l=(double*)nfft_malloc((2*m+2)*sizeof(double));
00840
00841 nfft_1d_init_fg_exp_l(fg_exp_l, m, ths->b[0]);
00842
00843 for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj++)
00844 {
00845 nfft_uo(ths,j,&u,&o,0);
00846 fg_psij0 = (PHI(*xj-((double)u)/n,0));
00847 fg_psij1 = exp(2.0*(n*(*xj) - u)/ths->b[0]);
00848 fg_psij2 = 1.0;
00849 psij_const[0] = fg_psij0;
00850 for(l=1; l<=2*m+1; l++)
00851 {
00852 fg_psij2 *= fg_psij1;
00853 psij_const[l] = fg_psij0*fg_psij2*fg_exp_l[l];
00854 }
00855
00856 nfft_adjoint_1d_compute(fj, g, psij_const, xj, n, m);
00857 }
00858 nfft_free(fg_exp_l);
00859 nfft_free(psij_const);
00860 return;
00861 }
00862
00863 if(ths->nfft_flags & PRE_LIN_PSI)
00864 {
00865 psij_const=(double*)nfft_malloc((2*m+2)*sizeof(double));
00866 K=ths->K;
00867 ip_s=K/(m+2);
00868
00869 for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj++)
00870 {
00871 nfft_uo(ths,j,&u,&o,0);
00872
00873 ip_y = fabs(n*(*xj) - u)*((double)ip_s);
00874 ip_u = LRINT(floor(ip_y));
00875 ip_w = ip_y-ip_u;
00876 for(l=0; l < 2*m+2; l++)
00877 psij_const[l] = ths->psi[abs(ip_u-l*ip_s)]*(1.0-ip_w) +
00878 ths->psi[abs(ip_u-l*ip_s+1)]*(ip_w);
00879
00880 nfft_adjoint_1d_compute(fj, g, psij_const, xj, n, m);
00881 }
00882 nfft_free(psij_const);
00883 return;
00884 }
00885
00886
00887 psij_const=(double*)nfft_malloc((2*m+2)*sizeof(double));
00888 for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj++)
00889 {
00890 nfft_uo(ths,j,&u,&o,0);
00891
00892 for(l=0;l<=2*m+1;l++)
00893 psij_const[l]=(PHI(*xj-((double)((u+l)))/n,0));
00894
00895 nfft_adjoint_1d_compute(fj, g, psij_const, xj, n, m);
00896 }
00897 nfft_free(psij_const);
00898 }
00899
00900 void nfft_trafo_1d(nfft_plan *ths)
00901 {
00902 int k,n,N;
00903 double _Complex *g_hat1,*g_hat2,*f_hat1,*f_hat2;
00904 double *c_phi_inv1, *c_phi_inv2;
00905
00906 ths->g_hat=ths->g1;
00907 ths->g=ths->g2;
00908
00909 N=ths->N[0];
00910 n=ths->n[0];
00911
00912 f_hat1=ths->f_hat;
00913 f_hat2=&ths->f_hat[N/2];
00914 g_hat1=&ths->g_hat[n-N/2];
00915 g_hat2=ths->g_hat;
00916
00917 TIC(0)
00918 memset(ths->g_hat,0,ths->n_total*sizeof(double _Complex));
00919 if(ths->nfft_flags & PRE_PHI_HUT)
00920 {
00921 c_phi_inv1=ths->c_phi_inv[0];
00922 c_phi_inv2=&ths->c_phi_inv[0][N/2];
00923 for(k=0;k<N/2;k++)
00924 {
00925 (*g_hat1++) = (*f_hat1++) * (*c_phi_inv1++);
00926 (*g_hat2++) = (*f_hat2++) * (*c_phi_inv2++);
00927 }
00928 }
00929 else
00930 for(k=0;k<N/2;k++)
00931 {
00932 (*g_hat1++) = (*f_hat1++) / (PHI_HUT(k-N/2,0));
00933 (*g_hat2++) = (*f_hat2++) / (PHI_HUT(k,0));
00934 }
00935
00936 TOC(0)
00937
00938 TIC_FFTW(1)
00939 fftw_execute(ths->my_fftw_plan1);
00940 TOC_FFTW(1);
00941
00942 TIC(2);
00943 nfft_trafo_1d_B(ths);
00944 TOC(2);
00945 }
00946
00947 void nfft_adjoint_1d(nfft_plan *ths)
00948 {
00949 int k,n,N;
00950 double _Complex *g_hat1,*g_hat2,*f_hat1,*f_hat2;
00951 double *c_phi_inv1, *c_phi_inv2;
00952
00953 N=ths->N[0];
00954 n=ths->n[0];
00955
00956 ths->g_hat=ths->g1;
00957 ths->g=ths->g2;
00958
00959 f_hat1=ths->f_hat;
00960 f_hat2=&ths->f_hat[N/2];
00961 g_hat1=&ths->g_hat[n-N/2];
00962 g_hat2=ths->g_hat;
00963
00964 TIC(2)
00965 nfft_adjoint_1d_B(ths);
00966 TOC(0)
00967
00968 TIC_FFTW(1)
00969 fftw_execute(ths->my_fftw_plan2);
00970 TOC_FFTW(1);
00971
00972 TIC(0)
00973 if(ths->nfft_flags & PRE_PHI_HUT)
00974 {
00975 c_phi_inv1=ths->c_phi_inv[0];
00976 c_phi_inv2=&ths->c_phi_inv[0][N/2];
00977 for(k=0;k<N/2;k++)
00978 {
00979 (*f_hat1++) = (*g_hat1++) * (*c_phi_inv1++);
00980 (*f_hat2++) = (*g_hat2++) * (*c_phi_inv2++);
00981 }
00982 }
00983 else
00984 for(k=0;k<N/2;k++)
00985 {
00986 (*f_hat1++) = (*g_hat1++) / (PHI_HUT(k-N/2,0));
00987 (*f_hat2++) = (*g_hat2++) / (PHI_HUT(k,0));
00988 }
00989 TOC(0)
00990 }
00991
00992
00993
00994
00995 static void nfft_2d_init_fg_exp_l(double *fg_exp_l, const int m, const double b)
00996 {
00997 int l;
00998 double fg_exp_b0, fg_exp_b1, fg_exp_b2, fg_exp_b0_sq;
00999
01000 fg_exp_b0 = exp(-1.0/b);
01001 fg_exp_b0_sq = fg_exp_b0*fg_exp_b0;
01002 fg_exp_b1 = 1.0;
01003 fg_exp_b2 = 1.0;
01004 fg_exp_l[0] = 1.0;
01005 for(l=1; l <= 2*m+1; l++)
01006 {
01007 fg_exp_b2 = fg_exp_b1*fg_exp_b0;
01008 fg_exp_b1 *= fg_exp_b0_sq;
01009 fg_exp_l[l] = fg_exp_l[l-1]*fg_exp_b2;
01010 }
01011 }
01012
01013 static void nfft_trafo_2d_compute(double _Complex *fj, const double _Complex *g,
01014 const double *psij_const0, const double *psij_const1,
01015 const double *xj0, const double *xj1,
01016 const int n0, const int n1, const int m)
01017 {
01018 int u0,o0,l0,u1,o1,l1;
01019 const double _Complex *gj;
01020 const double *psij0,*psij1;
01021
01022 psij0=psij_const0;
01023 psij1=psij_const1;
01024
01025 nfft_uo2(&u0,&o0,*xj0, n0, m);
01026 nfft_uo2(&u1,&o1,*xj1, n1, m);
01027
01028 *fj=0;
01029
01030 if(u0<o0)
01031 if(u1<o1)
01032 for(l0=0; l0<=2*m+1; l0++,psij0++)
01033 {
01034 psij1=psij_const1;
01035 gj=g+(u0+l0)*n1+u1;
01036 for(l1=0; l1<=2*m+1; l1++)
01037 (*fj) += (*psij0) * (*psij1++) * (*gj++);
01038 }
01039 else
01040 for(l0=0; l0<=2*m+1; l0++,psij0++)
01041 {
01042 psij1=psij_const1;
01043 gj=g+(u0+l0)*n1+u1;
01044 for(l1=0; l1<2*m+1-o1; l1++)
01045 (*fj) += (*psij0) * (*psij1++) * (*gj++);
01046 gj=g+(u0+l0)*n1;
01047 for(l1=0; l1<=o1; l1++)
01048 (*fj) += (*psij0) * (*psij1++) * (*gj++);
01049 }
01050 else
01051 if(u1<o1)
01052 {
01053 for(l0=0; l0<2*m+1-o0; l0++,psij0++)
01054 {
01055 psij1=psij_const1;
01056 gj=g+(u0+l0)*n1+u1;
01057 for(l1=0; l1<=2*m+1; l1++)
01058 (*fj) += (*psij0) * (*psij1++) * (*gj++);
01059 }
01060 for(l0=0; l0<=o0; l0++,psij0++)
01061 {
01062 psij1=psij_const1;
01063 gj=g+l0*n1+u1;
01064 for(l1=0; l1<=2*m+1; l1++)
01065 (*fj) += (*psij0) * (*psij1++) * (*gj++);
01066 }
01067 }
01068 else
01069 {
01070 for(l0=0; l0<2*m+1-o0; l0++,psij0++)
01071 {
01072 psij1=psij_const1;
01073 gj=g+(u0+l0)*n1+u1;
01074 for(l1=0; l1<2*m+1-o1; l1++)
01075 (*fj) += (*psij0) * (*psij1++) * (*gj++);
01076 gj=g+(u0+l0)*n1;
01077 for(l1=0; l1<=o1; l1++)
01078 (*fj) += (*psij0) * (*psij1++) * (*gj++);
01079 }
01080 for(l0=0; l0<=o0; l0++,psij0++)
01081 {
01082 psij1=psij_const1;
01083 gj=g+l0*n1+u1;
01084 for(l1=0; l1<2*m+1-o1; l1++)
01085 (*fj) += (*psij0) * (*psij1++) * (*gj++);
01086 gj=g+l0*n1;
01087 for(l1=0; l1<=o1; l1++)
01088 (*fj) += (*psij0) * (*psij1++) * (*gj++);
01089 }
01090 }
01091 }
01092
01093 static void nfft_adjoint_2d_compute(const double _Complex *fj, double _Complex *g,
01094 const double *psij_const0, const double *psij_const1,
01095 const double *xj0, const double *xj1,
01096 const int n0, const int n1, const int m)
01097 {
01098 int u0,o0,l0,u1,o1,l1;
01099 double _Complex *gj;
01100 const double *psij0,*psij1;
01101
01102 psij0=psij_const0;
01103 psij1=psij_const1;
01104
01105 nfft_uo2(&u0,&o0,*xj0, n0, m);
01106 nfft_uo2(&u1,&o1,*xj1, n1, m);
01107
01108 if(u0<o0)
01109 if(u1<o1)
01110 for(l0=0; l0<=2*m+1; l0++,psij0++)
01111 {
01112 psij1=psij_const1;
01113 gj=g+(u0+l0)*n1+u1;
01114 for(l1=0; l1<=2*m+1; l1++)
01115 (*gj++) += (*psij0) * (*psij1++) * (*fj);
01116 }
01117 else
01118 for(l0=0; l0<=2*m+1; l0++,psij0++)
01119 {
01120 psij1=psij_const1;
01121 gj=g+(u0+l0)*n1+u1;
01122 for(l1=0; l1<2*m+1-o1; l1++)
01123 (*gj++) += (*psij0) * (*psij1++) * (*fj);
01124 gj=g+(u0+l0)*n1;
01125 for(l1=0; l1<=o1; l1++)
01126 (*gj++) += (*psij0) * (*psij1++) * (*fj);
01127 }
01128 else
01129 if(u1<o1)
01130 {
01131 for(l0=0; l0<2*m+1-o0; l0++,psij0++)
01132 {
01133 psij1=psij_const1;
01134 gj=g+(u0+l0)*n1+u1;
01135 for(l1=0; l1<=2*m+1; l1++)
01136 (*gj++) += (*psij0) * (*psij1++) * (*fj);
01137 }
01138 for(l0=0; l0<=o0; l0++,psij0++)
01139 {
01140 psij1=psij_const1;
01141 gj=g+l0*n1+u1;
01142 for(l1=0; l1<=2*m+1; l1++)
01143 (*gj++) += (*psij0) * (*psij1++) * (*fj);
01144 }
01145 }
01146 else
01147 {
01148 for(l0=0; l0<2*m+1-o0; l0++,psij0++)
01149 {
01150 psij1=psij_const1;
01151 gj=g+(u0+l0)*n1+u1;
01152 for(l1=0; l1<2*m+1-o1; l1++)
01153 (*gj++) += (*psij0) * (*psij1++) * (*fj);
01154 gj=g+(u0+l0)*n1;
01155 for(l1=0; l1<=o1; l1++)
01156 (*gj++) += (*psij0) * (*psij1++) * (*fj);
01157 }
01158 for(l0=0; l0<=o0; l0++,psij0++)
01159 {
01160 psij1=psij_const1;
01161 gj=g+l0*n1+u1;
01162 for(l1=0; l1<2*m+1-o1; l1++)
01163 (*gj++) += (*psij0) * (*psij1++) * (*fj);
01164 gj=g+l0*n1;
01165 for(l1=0; l1<=o1; l1++)
01166 (*gj++) += (*psij0) * (*psij1++) * (*fj);
01167 }
01168 }
01169 }
01170
01171 static void nfft_trafo_2d_B(nfft_plan *ths)
01172 {
01173 int n0,N0,n1,N1,u,o,j,M,l,m, *psi_index_g,K,ip_s,ip_u;
01174 double _Complex *fj,*g;
01175 double *psij, *psij_const, *xj, ip_y, ip_w;
01176
01177 double *fg_exp_l, fg_psij0, fg_psij1, fg_psij2;
01178
01179 N0=ths->N[0];
01180 n0=ths->n[0];
01181 N1=ths->N[1];
01182 n1=ths->n[1];
01183 M=ths->M_total;
01184 m=ths->m;
01185
01186 g=ths->g;
01187
01188 if(ths->nfft_flags & PRE_FULL_PSI)
01189 {
01190 psi_index_g=ths->psi_index_g;
01191 for(j=0, fj=ths->f, psij=ths->psi; j<M; j++, fj++)
01192 for(l=1, (*fj)=(*psij++) * g[(*psi_index_g++)]; l<(2*m+2)*(2*m+2); l++)
01193 (*fj) += (*psij++) * g[(*psi_index_g++)];
01194 return;
01195 }
01196
01197 if(ths->nfft_flags & PRE_PSI)
01198 {
01199 for(j=0,fj=ths->f,xj=ths->x; j<M; j++,fj++,xj+=2)
01200 nfft_trafo_2d_compute(fj, g, ths->psi+j*2*(2*m+2), ths->psi+(j*2+1)*(2*m+2), xj, xj+1, n0, n1, m);
01201 return;
01202 }
01203
01204 if(ths->nfft_flags & PRE_FG_PSI)
01205 {
01206 psij_const=(double*)nfft_malloc(2*(2*m+2)*sizeof(double));
01207 fg_exp_l=(double*)nfft_malloc(2*(2*m+2)*sizeof(double));
01208
01209 nfft_2d_init_fg_exp_l(fg_exp_l, m, ths->b[0]);
01210 nfft_2d_init_fg_exp_l(fg_exp_l+2*m+2, m, ths->b[1]);
01211
01212 for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj+=2)
01213 {
01214 fg_psij0 = ths->psi[2*j*2];
01215 fg_psij1 = ths->psi[2*j*2+1];
01216 fg_psij2 = 1.0;
01217 psij_const[0] = fg_psij0;
01218 for(l=1; l<=2*m+1; l++)
01219 {
01220 fg_psij2 *= fg_psij1;
01221 psij_const[l] = fg_psij0*fg_psij2*fg_exp_l[l];
01222 }
01223
01224 fg_psij0 = ths->psi[2*(j*2+1)];
01225 fg_psij1 = ths->psi[2*(j*2+1)+1];
01226 fg_psij2 = 1.0;
01227 psij_const[2*m+2] = fg_psij0;
01228 for(l=1; l<=2*m+1; l++)
01229 {
01230 fg_psij2 *= fg_psij1;
01231 psij_const[2*m+2+l] = fg_psij0*fg_psij2*fg_exp_l[2*m+2+l];
01232 }
01233
01234 nfft_trafo_2d_compute(fj, g, psij_const, psij_const+2*m+2, xj, xj+1, n0, n1, m);
01235 }
01236 nfft_free(fg_exp_l);
01237 nfft_free(psij_const);
01238 return;
01239 }
01240
01241 if(ths->nfft_flags & FG_PSI)
01242 {
01243 psij_const=(double*)nfft_malloc(2*(2*m+2)*sizeof(double));
01244 fg_exp_l=(double*)nfft_malloc(2*(2*m+2)*sizeof(double));
01245
01246 nfft_2d_init_fg_exp_l(fg_exp_l, m, ths->b[0]);
01247 nfft_2d_init_fg_exp_l(fg_exp_l+2*m+2, m, ths->b[1]);
01248
01249 for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj+=2)
01250 {
01251 nfft_uo(ths,j,&u,&o,0);
01252 fg_psij0 = (PHI(*xj-((double)u)/n0,0));
01253 fg_psij1 = exp(2.0*(n0*(*xj) - u)/ths->b[0]);
01254 fg_psij2 = 1.0;
01255 psij_const[0] = fg_psij0;
01256 for(l=1; l<=2*m+1; l++)
01257 {
01258 fg_psij2 *= fg_psij1;
01259 psij_const[l] = fg_psij0*fg_psij2*fg_exp_l[l];
01260 }
01261
01262 nfft_uo(ths,j,&u,&o,1);
01263 fg_psij0 = (PHI(*(xj+1)-((double)u)/n1,1));
01264 fg_psij1 = exp(2.0*(n1*(*(xj+1)) - u)/ths->b[1]);
01265 fg_psij2 = 1.0;
01266 psij_const[2*m+2] = fg_psij0;
01267 for(l=1; l<=2*m+1; l++)
01268 {
01269 fg_psij2 *= fg_psij1;
01270 psij_const[2*m+2+l] = fg_psij0*fg_psij2*fg_exp_l[2*m+2+l];
01271 }
01272
01273 nfft_trafo_2d_compute(fj, g, psij_const, psij_const+2*m+2, xj, xj+1, n0, n1, m);
01274 }
01275 nfft_free(fg_exp_l);
01276 nfft_free(psij_const);
01277 return;
01278 }
01279
01280 if(ths->nfft_flags & PRE_LIN_PSI)
01281 {
01282 psij_const=(double*)nfft_malloc(2*(2*m+2)*sizeof(double));
01283 K=ths->K;
01284 ip_s=K/(m+2);
01285
01286 for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj+=2)
01287 {
01288 nfft_uo(ths,j,&u,&o,0);
01289 ip_y = fabs(n0*(*(xj+0)) - u)*((double)ip_s);
01290 ip_u = LRINT(floor(ip_y));
01291 ip_w = ip_y-ip_u;
01292 for(l=0; l < 2*m+2; l++)
01293 psij_const[l] = ths->psi[abs(ip_u-l*ip_s)]*(1.0-ip_w) + ths->psi[abs(ip_u-l*ip_s+1)]*(ip_w);
01294
01295 nfft_uo(ths,j,&u,&o,1);
01296 ip_y = fabs(n1*(*(xj+1)) - u)*((double)ip_s);
01297 ip_u = LRINT(floor(ip_y));
01298 ip_w = ip_y-ip_u;
01299 for(l=0; l < 2*m+2; l++)
01300 psij_const[2*m+2+l] = ths->psi[(K+1)+abs(ip_u-l*ip_s)]*(1.0-ip_w) + ths->psi[(K+1)+abs(ip_u-l*ip_s+1)]*(ip_w);
01301
01302 nfft_trafo_2d_compute(fj, g, psij_const, psij_const+2*m+2, xj, xj+1, n0, n1, m);
01303 }
01304 nfft_free(psij_const);
01305 return;
01306 }
01307
01308
01309 psij_const=(double*)nfft_malloc(2*(2*m+2)*sizeof(double));
01310 for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj+=2)
01311 {
01312 nfft_uo(ths,j,&u,&o,0);
01313 for(l=0;l<=2*m+1;l++)
01314 psij_const[l]=(PHI(*xj-((double)((u+l)))/n0,0));
01315
01316 nfft_uo(ths,j,&u,&o,1);
01317 for(l=0;l<=2*m+1;l++)
01318 psij_const[2*m+2+l]=(PHI(*(xj+1)-((double)((u+l)))/n1,1));
01319
01320 nfft_trafo_2d_compute(fj, g, psij_const, psij_const+2*m+2, xj, xj+1, n0, n1, m);
01321 }
01322 nfft_free(psij_const);
01323 }
01324
01325 static void nfft_adjoint_2d_B(nfft_plan *ths)
01326 {
01327 int n0,N0,n1,N1,u,o,j,M,l,m, *psi_index_g,K,ip_s,ip_u;
01328 double _Complex *fj,*g;
01329 double *psij, *psij_const, *xj ,ip_y, ip_w;
01330
01331 double *fg_exp_l, fg_psij0, fg_psij1, fg_psij2;
01332
01333 N0=ths->N[0];
01334 n0=ths->n[0];
01335 N1=ths->N[1];
01336 n1=ths->n[1];
01337 M=ths->M_total;
01338 m=ths->m;
01339
01340 g=ths->g;
01341 memset(g,0,ths->n_total*sizeof(double _Complex));
01342
01343 if(ths->nfft_flags & PRE_FULL_PSI)
01344 {
01345 psi_index_g=ths->psi_index_g;
01346 for(j=0, fj=ths->f, psij=ths->psi; j<M; j++, fj++)
01347 for(l=0; l<(2*m+2)*(2*m+2); l++)
01348 g[(*psi_index_g++)] += (*psij++) * (*fj);
01349 return;
01350 }
01351
01352 if(ths->nfft_flags & PRE_PSI)
01353 {
01354 for(j=0,fj=ths->f,xj=ths->x; j<M; j++,fj++,xj+=2)
01355 nfft_adjoint_2d_compute(fj, g, ths->psi+j*2*(2*m+2), ths->psi+(j*2+1)*(2*m+2), xj, xj+1, n0, n1, m);
01356 return;
01357 }
01358
01359 if(ths->nfft_flags & PRE_FG_PSI)
01360 {
01361 psij_const=(double*)nfft_malloc(2*(2*m+2)*sizeof(double));
01362 fg_exp_l=(double*)nfft_malloc(2*(2*m+2)*sizeof(double));
01363
01364 nfft_2d_init_fg_exp_l(fg_exp_l, m, ths->b[0]);
01365 nfft_2d_init_fg_exp_l(fg_exp_l+2*m+2, m, ths->b[1]);
01366
01367 for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj+=2)
01368 {
01369 fg_psij0 = ths->psi[2*j*2];
01370 fg_psij1 = ths->psi[2*j*2+1];
01371 fg_psij2 = 1.0;
01372 psij_const[0] = fg_psij0;
01373 for(l=1; l<=2*m+1; l++)
01374 {
01375 fg_psij2 *= fg_psij1;
01376 psij_const[l] = fg_psij0*fg_psij2*fg_exp_l[l];
01377 }
01378
01379 fg_psij0 = ths->psi[2*(j*2+1)];
01380 fg_psij1 = ths->psi[2*(j*2+1)+1];
01381 fg_psij2 = 1.0;
01382 psij_const[2*m+2] = fg_psij0;
01383 for(l=1; l<=2*m+1; l++)
01384 {
01385 fg_psij2 *= fg_psij1;
01386 psij_const[2*m+2+l] = fg_psij0*fg_psij2*fg_exp_l[2*m+2+l];
01387 }
01388
01389 nfft_adjoint_2d_compute(fj, g, psij_const, psij_const+2*m+2, xj, xj+1, n0, n1, m);
01390 }
01391 nfft_free(fg_exp_l);
01392 nfft_free(psij_const);
01393 return;
01394 }
01395
01396 if(ths->nfft_flags & FG_PSI)
01397 {
01398 psij_const=(double*)nfft_malloc(2*(2*m+2)*sizeof(double));
01399 fg_exp_l=(double*)nfft_malloc(2*(2*m+2)*sizeof(double));
01400
01401 nfft_2d_init_fg_exp_l(fg_exp_l, m, ths->b[0]);
01402 nfft_2d_init_fg_exp_l(fg_exp_l+2*m+2, m, ths->b[1]);
01403
01404 for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj+=2)
01405 {
01406 nfft_uo(ths,j,&u,&o,0);
01407 fg_psij0 = (PHI(*xj-((double)u)/n0,0));
01408 fg_psij1 = exp(2.0*(n0*(*xj) - u)/ths->b[0]);
01409 fg_psij2 = 1.0;
01410 psij_const[0] = fg_psij0;
01411 for(l=1; l<=2*m+1; l++)
01412 {
01413 fg_psij2 *= fg_psij1;
01414 psij_const[l] = fg_psij0*fg_psij2*fg_exp_l[l];
01415 }
01416
01417 nfft_uo(ths,j,&u,&o,1);
01418 fg_psij0 = (PHI(*(xj+1)-((double)u)/n1,1));
01419 fg_psij1 = exp(2.0*(n1*(*(xj+1)) - u)/ths->b[1]);
01420 fg_psij2 = 1.0;
01421 psij_const[2*m+2] = fg_psij0;
01422 for(l=1; l<=2*m+1; l++)
01423 {
01424 fg_psij2 *= fg_psij1;
01425 psij_const[2*m+2+l] = fg_psij0*fg_psij2*fg_exp_l[2*m+2+l];
01426 }
01427
01428 nfft_adjoint_2d_compute(fj, g, psij_const, psij_const+2*m+2, xj, xj+1, n0, n1, m);
01429 }
01430 nfft_free(fg_exp_l);
01431 nfft_free(psij_const);
01432 return;
01433 }
01434
01435 if(ths->nfft_flags & PRE_LIN_PSI)
01436 {
01437 psij_const=(double*)nfft_malloc(2*(2*m+2)*sizeof(double));
01438 K=ths->K;
01439 ip_s=K/(m+2);
01440
01441 for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj+=2)
01442 {
01443 nfft_uo(ths,j,&u,&o,0);
01444 ip_y = fabs(n0*(*(xj+0)) - u)*((double)ip_s);
01445 ip_u = LRINT(floor(ip_y));
01446 ip_w = ip_y-ip_u;
01447 for(l=0; l < 2*m+2; l++)
01448 psij_const[l] = ths->psi[abs(ip_u-l*ip_s)]*(1.0-ip_w) +
01449 ths->psi[abs(ip_u-l*ip_s+1)]*(ip_w);
01450
01451 nfft_uo(ths,j,&u,&o,1);
01452 ip_y = fabs(n1*(*(xj+1)) - u)*((double)ip_s);
01453 ip_u = LRINT(floor(ip_y));
01454 ip_w = ip_y-ip_u;
01455 for(l=0; l < 2*m+2; l++)
01456 psij_const[2*m+2+l] = ths->psi[(K+1)+abs(ip_u-l*ip_s)]*(1.0-ip_w) +
01457 ths->psi[(K+1)+abs(ip_u-l*ip_s+1)]*(ip_w);
01458
01459 nfft_adjoint_2d_compute(fj, g, psij_const, psij_const+2*m+2, xj, xj+1, n0, n1, m);
01460 }
01461 nfft_free(psij_const);
01462 return;
01463 }
01464
01465
01466 psij_const=(double*)nfft_malloc(2*(2*m+2)*sizeof(double));
01467 for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj+=2)
01468 {
01469 nfft_uo(ths,j,&u,&o,0);
01470 for(l=0;l<=2*m+1;l++)
01471 psij_const[l]=(PHI(*xj-((double)((u+l)))/n0,0));
01472
01473 nfft_uo(ths,j,&u,&o,1);
01474 for(l=0;l<=2*m+1;l++)
01475 psij_const[2*m+2+l]=(PHI(*(xj+1)-((double)((u+l)))/n1,1));
01476
01477 nfft_adjoint_2d_compute(fj, g, psij_const, psij_const+2*m+2, xj, xj+1, n0, n1, m);
01478 }
01479 nfft_free(psij_const);
01480 }
01481
01482
01483 void nfft_trafo_2d(nfft_plan *ths)
01484 {
01485 int k0,k1,n0,n1,N0,N1;
01486 double _Complex *g_hat,*f_hat;
01487 double *c_phi_inv01, *c_phi_inv02, *c_phi_inv11, *c_phi_inv12;
01488 double ck01, ck02, ck11, ck12;
01489 double _Complex *g_hat11,*f_hat11,*g_hat21,*f_hat21,*g_hat12,*f_hat12,*g_hat22,*f_hat22;
01490
01491 ths->g_hat=ths->g1;
01492 ths->g=ths->g2;
01493
01494 N0=ths->N[0];
01495 N1=ths->N[1];
01496 n0=ths->n[0];
01497 n1=ths->n[1];
01498
01499 f_hat=ths->f_hat;
01500 g_hat=ths->g_hat;
01501
01502 TIC(0)
01503 memset(ths->g_hat,0,ths->n_total*sizeof(double _Complex));
01504 if(ths->nfft_flags & PRE_PHI_HUT)
01505 {
01506 c_phi_inv01=ths->c_phi_inv[0];
01507 c_phi_inv02=&ths->c_phi_inv[0][N0/2];
01508
01509 for(k0=0;k0<N0/2;k0++)
01510 {
01511 ck01=(*c_phi_inv01++);
01512 ck02=(*c_phi_inv02++);
01513
01514 c_phi_inv11=ths->c_phi_inv[1];
01515 c_phi_inv12=&ths->c_phi_inv[1][N1/2];
01516
01517 g_hat11=g_hat + (n0-N0/2+k0)*n1+n1-N1/2;
01518 f_hat11=f_hat + k0*N1;
01519 g_hat21=g_hat + k0*n1+n1-N1/2;
01520 f_hat21=f_hat + (N0/2+k0)*N1;
01521 g_hat12=g_hat + (n0-N0/2+k0)*n1;
01522 f_hat12=f_hat + k0*N1+N1/2;
01523 g_hat22=g_hat + k0*n1;
01524 f_hat22=f_hat + (N0/2+k0)*N1+N1/2;
01525 for(k1=0;k1<N1/2;k1++)
01526 {
01527 ck11=(*c_phi_inv11++);
01528 ck12=(*c_phi_inv12++);
01529
01530 (*g_hat11++) = (*f_hat11++) * ck01 * ck11;
01531 (*g_hat21++) = (*f_hat21++) * ck02 * ck11;
01532 (*g_hat12++) = (*f_hat12++) * ck01 * ck12;
01533 (*g_hat22++) = (*f_hat22++) * ck02 * ck12;
01534 }
01535 }
01536 }
01537 else
01538 for(k0=0;k0<N0/2;k0++)
01539 {
01540 ck01=1./(PHI_HUT(k0-N0/2,0));
01541 ck02=1./(PHI_HUT(k0,0));
01542 for(k1=0;k1<N1/2;k1++)
01543 {
01544 ck11=1./(PHI_HUT(k1-N1/2,1));
01545 ck12=1./(PHI_HUT(k1,1));
01546 g_hat[(n0-N0/2+k0)*n1+n1-N1/2+k1] = f_hat[k0*N1+k1] * ck01 * ck11;
01547 g_hat[k0*n1+n1-N1/2+k1] = f_hat[(N0/2+k0)*N1+k1] * ck02 * ck11;
01548 g_hat[(n0-N0/2+k0)*n1+k1] = f_hat[k0*N1+N1/2+k1] * ck01 * ck12;
01549 g_hat[k0*n1+k1] = f_hat[(N0/2+k0)*N1+N1/2+k1] * ck02 * ck12;
01550 }
01551 }
01552
01553 TOC(0)
01554
01555 TIC_FFTW(1)
01556 fftw_execute(ths->my_fftw_plan1);
01557 TOC_FFTW(1);
01558
01559 TIC(2);
01560 nfft_trafo_2d_B(ths);
01561 TOC(2);
01562 }
01563
01564 void nfft_adjoint_2d(nfft_plan *ths)
01565 {
01566 int k0,k1,n0,n1,N0,N1;
01567 double _Complex *g_hat,*f_hat;
01568 double *c_phi_inv01, *c_phi_inv02, *c_phi_inv11, *c_phi_inv12;
01569 double ck01, ck02, ck11, ck12;
01570 double _Complex *g_hat11,*f_hat11,*g_hat21,*f_hat21,*g_hat12,*f_hat12,*g_hat22,*f_hat22;
01571
01572 ths->g_hat=ths->g1;
01573 ths->g=ths->g2;
01574
01575 N0=ths->N[0];
01576 N1=ths->N[1];
01577 n0=ths->n[0];
01578 n1=ths->n[1];
01579
01580 f_hat=ths->f_hat;
01581 g_hat=ths->g_hat;
01582
01583 TIC(2);
01584 nfft_adjoint_2d_B(ths);
01585 TOC(2);
01586
01587 TIC_FFTW(1)
01588 fftw_execute(ths->my_fftw_plan2);
01589 TOC_FFTW(1);
01590
01591 TIC(0)
01592 if(ths->nfft_flags & PRE_PHI_HUT)
01593 {
01594 c_phi_inv01=ths->c_phi_inv[0];
01595 c_phi_inv02=&ths->c_phi_inv[0][N0/2];
01596
01597 for(k0=0;k0<N0/2;k0++)
01598 {
01599 ck01=(*c_phi_inv01++);
01600 ck02=(*c_phi_inv02++);
01601
01602 c_phi_inv11=ths->c_phi_inv[1];
01603 c_phi_inv12=&ths->c_phi_inv[1][N1/2];
01604 g_hat11=g_hat + (n0-N0/2+k0)*n1+n1-N1/2;
01605 f_hat11=f_hat + k0*N1;
01606 g_hat21=g_hat + k0*n1+n1-N1/2;
01607 f_hat21=f_hat + (N0/2+k0)*N1;
01608 g_hat12=g_hat + (n0-N0/2+k0)*n1;
01609 f_hat12=f_hat + k0*N1+N1/2;
01610 g_hat22=g_hat + k0*n1;
01611 f_hat22=f_hat + (N0/2+k0)*N1+N1/2;
01612 for(k1=0;k1<N1/2;k1++)
01613 {
01614 ck11=(*c_phi_inv11++);
01615 ck12=(*c_phi_inv12++);
01616
01617 (*f_hat11++) = (*g_hat11++) * ck01 * ck11;
01618 (*f_hat21++) = (*g_hat21++) * ck02 * ck11;
01619 (*f_hat12++) = (*g_hat12++) * ck01 * ck12;
01620 (*f_hat22++) = (*g_hat22++) * ck02 * ck12;
01621 }
01622 }
01623 }
01624 else
01625 for(k0=0;k0<N0/2;k0++)
01626 {
01627 ck01=1./(PHI_HUT(k0-N0/2,0));
01628 ck02=1./(PHI_HUT(k0,0));
01629 for(k1=0;k1<N1/2;k1++)
01630 {
01631 ck11=1./(PHI_HUT(k1-N1/2,1));
01632 ck12=1./(PHI_HUT(k1,1));
01633 f_hat[k0*N1+k1] = g_hat[(n0-N0/2+k0)*n1+n1-N1/2+k1] * ck01 * ck11;
01634 f_hat[(N0/2+k0)*N1+k1] = g_hat[k0*n1+n1-N1/2+k1] * ck02 * ck11;
01635 f_hat[k0*N1+N1/2+k1] = g_hat[(n0-N0/2+k0)*n1+k1] * ck01 * ck12;
01636 f_hat[(N0/2+k0)*N1+N1/2+k1] = g_hat[k0*n1+k1] * ck02 * ck12;
01637 }
01638 }
01639 TOC(0)
01640 }
01641
01642
01643
01644 static void nfft_3d_init_fg_exp_l(double *fg_exp_l, const int m, const double b)
01645 {
01646 int l;
01647 double fg_exp_b0, fg_exp_b1, fg_exp_b2, fg_exp_b0_sq;
01648
01649 fg_exp_b0 = exp(-1.0/b);
01650 fg_exp_b0_sq = fg_exp_b0*fg_exp_b0;
01651 fg_exp_b1 = 1.0;
01652 fg_exp_b2 = 1.0;
01653 fg_exp_l[0] = 1.0;
01654 for(l=1; l <= 2*m+1; l++)
01655 {
01656 fg_exp_b2 = fg_exp_b1*fg_exp_b0;
01657 fg_exp_b1 *= fg_exp_b0_sq;
01658 fg_exp_l[l] = fg_exp_l[l-1]*fg_exp_b2;
01659 }
01660 }
01661
01662 static void nfft_trafo_3d_compute(double _Complex *fj, const double _Complex *g,
01663 const double *psij_const0, const double *psij_const1, const double *psij_const2,
01664 const double *xj0, const double *xj1, const double *xj2,
01665 const int n0, const int n1, const int n2, const int m)
01666 {
01667 int u0,o0,l0,u1,o1,l1,u2,o2,l2;
01668 const double _Complex *gj;
01669 const double *psij0,*psij1,*psij2;
01670
01671 psij0=psij_const0;
01672 psij1=psij_const1;
01673 psij2=psij_const2;
01674
01675 nfft_uo2(&u0,&o0,*xj0, n0, m);
01676 nfft_uo2(&u1,&o1,*xj1, n1, m);
01677 nfft_uo2(&u2,&o2,*xj2, n2, m);
01678
01679 *fj=0;
01680
01681 if(u0<o0)
01682 if(u1<o1)
01683 if(u2<o2)
01684 for(l0=0; l0<=2*m+1; l0++,psij0++)
01685 {
01686 psij1=psij_const1;
01687 for(l1=0; l1<=2*m+1; l1++,psij1++)
01688 {
01689 psij2=psij_const2;
01690 gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
01691 for(l2=0; l2<=2*m+1; l2++)
01692 (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01693 }
01694 }
01695 else
01696 for(l0=0; l0<=2*m+1; l0++,psij0++)
01697 {
01698 psij1=psij_const1;
01699 for(l1=0; l1<=2*m+1; l1++,psij1++)
01700 {
01701 psij2=psij_const2;
01702 gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
01703 for(l2=0; l2<2*m+1-o2; l2++)
01704 (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01705 gj=g+((u0+l0)*n1+(u1+l1))*n2;
01706 for(l2=0; l2<=o2; l2++)
01707 (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01708 }
01709 }
01710 else
01711 if(u2<o2)
01712 for(l0=0; l0<=2*m+1; l0++,psij0++)
01713 {
01714 psij1=psij_const1;
01715 for(l1=0; l1<2*m+1-o1; l1++,psij1++)
01716 {
01717 psij2=psij_const2;
01718 gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
01719 for(l2=0; l2<=2*m+1; l2++)
01720 (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01721 }
01722 for(l1=0; l1<=o1; l1++,psij1++)
01723 {
01724 psij2=psij_const2;
01725 gj=g+((u0+l0)*n1+l1)*n2+u2;
01726 for(l2=0; l2<=2*m+1; l2++)
01727 (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01728 }
01729 }
01730 else
01731 {
01732 for(l0=0; l0<=2*m+1; l0++,psij0++)
01733 {
01734 psij1=psij_const1;
01735 for(l1=0; l1<2*m+1-o1; l1++,psij1++)
01736 {
01737 psij2=psij_const2;
01738 gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
01739 for(l2=0; l2<2*m+1-o2; l2++)
01740 (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01741 gj=g+((u0+l0)*n1+(u1+l1))*n2;
01742 for(l2=0; l2<=o2; l2++)
01743 (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01744 }
01745 for(l1=0; l1<=o1; l1++,psij1++)
01746 {
01747 psij2=psij_const2;
01748 gj=g+((u0+l0)*n1+l1)*n2+u2;
01749 for(l2=0; l2<2*m+1-o2; l2++)
01750 (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01751 gj=g+((u0+l0)*n1+l1)*n2;
01752 for(l2=0; l2<=o2; l2++)
01753 (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01754 }
01755 }
01756 }
01757 else
01758 if(u1<o1)
01759 if(u2<o2)
01760 {
01761 for(l0=0; l0<2*m+1-o0; l0++,psij0++)
01762 {
01763 psij1=psij_const1;
01764 for(l1=0; l1<=2*m+1; l1++,psij1++)
01765 {
01766 psij2=psij_const2;
01767 gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
01768 for(l2=0; l2<=2*m+1; l2++)
01769 (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01770 }
01771 }
01772
01773 for(l0=0; l0<=o0; l0++,psij0++)
01774 {
01775 psij1=psij_const1;
01776 for(l1=0; l1<=2*m+1; l1++,psij1++)
01777 {
01778 psij2=psij_const2;
01779 gj=g+(l0*n1+(u1+l1))*n2+u2;
01780 for(l2=0; l2<=2*m+1; l2++)
01781 (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01782 }
01783 }
01784 }
01785 else
01786 {
01787 for(l0=0; l0<2*m+1-o0; l0++,psij0++)
01788 {
01789 psij1=psij_const1;
01790 for(l1=0; l1<=2*m+1; l1++,psij1++)
01791 {
01792 psij2=psij_const2;
01793 gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
01794 for(l2=0; l2<2*m+1-o2; l2++)
01795 (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01796 gj=g+((u0+l0)*n1+(u1+l1))*n2;
01797 for(l2=0; l2<=o2; l2++)
01798 (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01799 }
01800 }
01801
01802 for(l0=0; l0<=o0; l0++,psij0++)
01803 {
01804 psij1=psij_const1;
01805 for(l1=0; l1<=2*m+1; l1++,psij1++)
01806 {
01807 psij2=psij_const2;
01808 gj=g+(l0*n1+(u1+l1))*n2+u2;
01809 for(l2=0; l2<2*m+1-o2; l2++)
01810 (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01811 gj=g+(l0*n1+(u1+l1))*n2;
01812 for(l2=0; l2<=o2; l2++)
01813 (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01814 }
01815 }
01816 }
01817 else
01818 if(u2<o2)
01819 {
01820 for(l0=0; l0<2*m+1-o0; l0++,psij0++)
01821 {
01822 psij1=psij_const1;
01823 for(l1=0; l1<2*m+1-o1; l1++,psij1++)
01824 {
01825 psij2=psij_const2;
01826 gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
01827 for(l2=0; l2<=2*m+1; l2++)
01828 (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01829 }
01830 for(l1=0; l1<=o1; l1++,psij1++)
01831 {
01832 psij2=psij_const2;
01833 gj=g+((u0+l0)*n1+l1)*n2+u2;
01834 for(l2=0; l2<=2*m+1; l2++)
01835 (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01836 }
01837 }
01838 for(l0=0; l0<=o0; l0++,psij0++)
01839 {
01840 psij1=psij_const1;
01841 for(l1=0; l1<2*m+1-o1; l1++,psij1++)
01842 {
01843 psij2=psij_const2;
01844 gj=g+(l0*n1+(u1+l1))*n2+u2;
01845 for(l2=0; l2<=2*m+1; l2++)
01846 (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01847 }
01848 for(l1=0; l1<=o1; l1++,psij1++)
01849 {
01850 psij2=psij_const2;
01851 gj=g+(l0*n1+l1)*n2+u2;
01852 for(l2=0; l2<=2*m+1; l2++)
01853 (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01854 }
01855 }
01856 }
01857 else
01858 {
01859 for(l0=0; l0<2*m+1-o0; l0++,psij0++)
01860 {
01861 psij1=psij_const1;
01862 for(l1=0; l1<2*m+1-o1; l1++,psij1++)
01863 {
01864 psij2=psij_const2;
01865 gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
01866 for(l2=0; l2<2*m+1-o2; l2++)
01867 (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01868 gj=g+((u0+l0)*n1+(u1+l1))*n2;
01869 for(l2=0; l2<=o2; l2++)
01870 (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01871 }
01872 for(l1=0; l1<=o1; l1++,psij1++)
01873 {
01874 psij2=psij_const2;
01875 gj=g+((u0+l0)*n1+l1)*n2+u2;
01876 for(l2=0; l2<2*m+1-o2; l2++)
01877 (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01878 gj=g+((u0+l0)*n1+l1)*n2;
01879 for(l2=0; l2<=o2; l2++)
01880 (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01881 }
01882 }
01883
01884 for(l0=0; l0<=o0; l0++,psij0++)
01885 {
01886 psij1=psij_const1;
01887 for(l1=0; l1<2*m+1-o1; l1++,psij1++)
01888 {
01889 psij2=psij_const2;
01890 gj=g+(l0*n1+(u1+l1))*n2+u2;
01891 for(l2=0; l2<2*m+1-o2; l2++)
01892 (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01893 gj=g+(l0*n1+(u1+l1))*n2;
01894 for(l2=0; l2<=o2; l2++)
01895 (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01896 }
01897 for(l1=0; l1<=o1; l1++,psij1++)
01898 {
01899 psij2=psij_const2;
01900 gj=g+(l0*n1+l1)*n2+u2;
01901 for(l2=0; l2<2*m+1-o2; l2++)
01902 (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01903 gj=g+(l0*n1+l1)*n2;
01904 for(l2=0; l2<=o2; l2++)
01905 (*fj) += (*psij0) * (*psij1) * (*psij2++) * (*gj++);
01906 }
01907 }
01908 }
01909 }
01910
01911 static void nfft_adjoint_3d_compute(const double _Complex *fj, double _Complex *g,
01912 const double *psij_const0, const double *psij_const1, const double *psij_const2,
01913 const double *xj0, const double *xj1, const double *xj2,
01914 const int n0, const int n1, const int n2, const int m)
01915 {
01916 int u0,o0,l0,u1,o1,l1,u2,o2,l2;
01917 double _Complex *gj;
01918 const double *psij0,*psij1,*psij2;
01919
01920 psij0=psij_const0;
01921 psij1=psij_const1;
01922 psij2=psij_const2;
01923
01924 nfft_uo2(&u0,&o0,*xj0, n0, m);
01925 nfft_uo2(&u1,&o1,*xj1, n1, m);
01926 nfft_uo2(&u2,&o2,*xj2, n2, m);
01927
01928 if(u0<o0)
01929 if(u1<o1)
01930 if(u2<o2)
01931 for(l0=0; l0<=2*m+1; l0++,psij0++)
01932 {
01933 psij1=psij_const1;
01934 for(l1=0; l1<=2*m+1; l1++,psij1++)
01935 {
01936 psij2=psij_const2;
01937 gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
01938 for(l2=0; l2<=2*m+1; l2++)
01939 (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
01940 }
01941 }
01942 else
01943 for(l0=0; l0<=2*m+1; l0++,psij0++)
01944 {
01945 psij1=psij_const1;
01946 for(l1=0; l1<=2*m+1; l1++,psij1++)
01947 {
01948 psij2=psij_const2;
01949 gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
01950 for(l2=0; l2<2*m+1-o2; l2++)
01951 (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
01952 gj=g+((u0+l0)*n1+(u1+l1))*n2;
01953 for(l2=0; l2<=o2; l2++)
01954 (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
01955 }
01956 }
01957 else
01958 if(u2<o2)
01959 for(l0=0; l0<=2*m+1; l0++,psij0++)
01960 {
01961 psij1=psij_const1;
01962 for(l1=0; l1<2*m+1-o1; l1++,psij1++)
01963 {
01964 psij2=psij_const2;
01965 gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
01966 for(l2=0; l2<=2*m+1; l2++)
01967 (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
01968 }
01969 for(l1=0; l1<=o1; l1++,psij1++)
01970 {
01971 psij2=psij_const2;
01972 gj=g+((u0+l0)*n1+l1)*n2+u2;
01973 for(l2=0; l2<=2*m+1; l2++)
01974 (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
01975 }
01976 }
01977 else
01978 {
01979 for(l0=0; l0<=2*m+1; l0++,psij0++)
01980 {
01981 psij1=psij_const1;
01982 for(l1=0; l1<2*m+1-o1; l1++,psij1++)
01983 {
01984 psij2=psij_const2;
01985 gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
01986 for(l2=0; l2<2*m+1-o2; l2++)
01987 (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
01988 gj=g+((u0+l0)*n1+(u1+l1))*n2;
01989 for(l2=0; l2<=o2; l2++)
01990 (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
01991 }
01992 for(l1=0; l1<=o1; l1++,psij1++)
01993 {
01994 psij2=psij_const2;
01995 gj=g+((u0+l0)*n1+l1)*n2+u2;
01996 for(l2=0; l2<2*m+1-o2; l2++)
01997 (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
01998 gj=g+((u0+l0)*n1+l1)*n2;
01999 for(l2=0; l2<=o2; l2++)
02000 (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
02001 }
02002 }
02003 }
02004 else
02005 if(u1<o1)
02006 if(u2<o2)
02007 {
02008 for(l0=0; l0<2*m+1-o0; l0++,psij0++)
02009 {
02010 psij1=psij_const1;
02011 for(l1=0; l1<=2*m+1; l1++,psij1++)
02012 {
02013 psij2=psij_const2;
02014 gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
02015 for(l2=0; l2<=2*m+1; l2++)
02016 (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
02017 }
02018 }
02019
02020 for(l0=0; l0<=o0; l0++,psij0++)
02021 {
02022 psij1=psij_const1;
02023 for(l1=0; l1<=2*m+1; l1++,psij1++)
02024 {
02025 psij2=psij_const2;
02026 gj=g+(l0*n1+(u1+l1))*n2+u2;
02027 for(l2=0; l2<=2*m+1; l2++)
02028 (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
02029 }
02030 }
02031 }
02032 else
02033 {
02034 for(l0=0; l0<2*m+1-o0; l0++,psij0++)
02035 {
02036 psij1=psij_const1;
02037 for(l1=0; l1<=2*m+1; l1++,psij1++)
02038 {
02039 psij2=psij_const2;
02040 gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
02041 for(l2=0; l2<2*m+1-o2; l2++)
02042 (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
02043 gj=g+((u0+l0)*n1+(u1+l1))*n2;
02044 for(l2=0; l2<=o2; l2++)
02045 (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
02046 }
02047 }
02048
02049 for(l0=0; l0<=o0; l0++,psij0++)
02050 {
02051 psij1=psij_const1;
02052 for(l1=0; l1<=2*m+1; l1++,psij1++)
02053 {
02054 psij2=psij_const2;
02055 gj=g+(l0*n1+(u1+l1))*n2+u2;
02056 for(l2=0; l2<2*m+1-o2; l2++)
02057 (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
02058 gj=g+(l0*n1+(u1+l1))*n2;
02059 for(l2=0; l2<=o2; l2++)
02060 (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
02061 }
02062 }
02063 }
02064 else
02065 if(u2<o2)
02066 {
02067 for(l0=0; l0<2*m+1-o0; l0++,psij0++)
02068 {
02069 psij1=psij_const1;
02070 for(l1=0; l1<2*m+1-o1; l1++,psij1++)
02071 {
02072 psij2=psij_const2;
02073 gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
02074 for(l2=0; l2<=2*m+1; l2++)
02075 (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
02076 }
02077 for(l1=0; l1<=o1; l1++,psij1++)
02078 {
02079 psij2=psij_const2;
02080 gj=g+((u0+l0)*n1+l1)*n2+u2;
02081 for(l2=0; l2<=2*m+1; l2++)
02082 (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
02083 }
02084 }
02085 for(l0=0; l0<=o0; l0++,psij0++)
02086 {
02087 psij1=psij_const1;
02088 for(l1=0; l1<2*m+1-o1; l1++,psij1++)
02089 {
02090 psij2=psij_const2;
02091 gj=g+(l0*n1+(u1+l1))*n2+u2;
02092 for(l2=0; l2<=2*m+1; l2++)
02093 (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
02094 }
02095 for(l1=0; l1<=o1; l1++,psij1++)
02096 {
02097 psij2=psij_const2;
02098 gj=g+(l0*n1+l1)*n2+u2;
02099 for(l2=0; l2<=2*m+1; l2++)
02100 (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
02101 }
02102 }
02103 }
02104 else
02105 {
02106 for(l0=0; l0<2*m+1-o0; l0++,psij0++)
02107 {
02108 psij1=psij_const1;
02109 for(l1=0; l1<2*m+1-o1; l1++,psij1++)
02110 {
02111 psij2=psij_const2;
02112 gj=g+((u0+l0)*n1+(u1+l1))*n2+u2;
02113 for(l2=0; l2<2*m+1-o2; l2++)
02114 (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
02115 gj=g+((u0+l0)*n1+(u1+l1))*n2;
02116 for(l2=0; l2<=o2; l2++)
02117 (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
02118 }
02119 for(l1=0; l1<=o1; l1++,psij1++)
02120 {
02121 psij2=psij_const2;
02122 gj=g+((u0+l0)*n1+l1)*n2+u2;
02123 for(l2=0; l2<2*m+1-o2; l2++)
02124 (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
02125 gj=g+((u0+l0)*n1+l1)*n2;
02126 for(l2=0; l2<=o2; l2++)
02127 (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
02128 }
02129 }
02130
02131 for(l0=0; l0<=o0; l0++,psij0++)
02132 {
02133 psij1=psij_const1;
02134 for(l1=0; l1<2*m+1-o1; l1++,psij1++)
02135 {
02136 psij2=psij_const2;
02137 gj=g+(l0*n1+(u1+l1))*n2+u2;
02138 for(l2=0; l2<2*m+1-o2; l2++)
02139 (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
02140 gj=g+(l0*n1+(u1+l1))*n2;
02141 for(l2=0; l2<=o2; l2++)
02142 (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
02143 }
02144 for(l1=0; l1<=o1; l1++,psij1++)
02145 {
02146 psij2=psij_const2;
02147 gj=g+(l0*n1+l1)*n2+u2;
02148 for(l2=0; l2<2*m+1-o2; l2++)
02149 (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
02150 gj=g+(l0*n1+l1)*n2;
02151 for(l2=0; l2<=o2; l2++)
02152 (*gj++) += (*psij0) * (*psij1) * (*psij2++) * (*fj);
02153 }
02154 }
02155 }
02156 }
02157
02158
02159 static void nfft_trafo_3d_B(nfft_plan *ths)
02160 {
02161 int n0,N0,n1,N1,n2,N2,u,o,j,M,l,m, *psi_index_g,K,ip_s,ip_u;
02162 double _Complex *fj,*g;
02163 double *psij, *psij_const, *xj, ip_y, ip_w;
02164
02165 double *fg_exp_l, fg_psij0, fg_psij1, fg_psij2;
02166
02167 N0=ths->N[0];
02168 n0=ths->n[0];
02169 N1=ths->N[1];
02170 n1=ths->n[1];
02171 N2=ths->N[2];
02172 n2=ths->n[2];
02173 M=ths->M_total;
02174 m=ths->m;
02175
02176 g=ths->g;
02177
02178 if(ths->nfft_flags & PRE_FULL_PSI)
02179 {
02180 psi_index_g=ths->psi_index_g;
02181 for(j=0, fj=ths->f, psij=ths->psi; j<M; j++, fj++)
02182 for(l=1, (*fj)=(*psij++) * g[(*psi_index_g++)]; l<(2*m+2)*(2*m+2)*(2*m+2); l++)
02183 (*fj) += (*psij++) * g[(*psi_index_g++)];
02184 return;
02185 }
02186
02187 if(ths->nfft_flags & PRE_PSI)
02188 {
02189 for(j=0,fj=ths->f,xj=ths->x; j<M; j++,fj++,xj+=3)
02190 nfft_trafo_3d_compute(fj, g, ths->psi+j*3*(2*m+2), ths->psi+(j*3+1)*(2*m+2), ths->psi+(j*3+2)*(2*m+2), xj, xj+1, xj+2, n0, n1, n2, m);
02191 return;
02192 }
02193
02194 if(ths->nfft_flags & PRE_FG_PSI)
02195 {
02196 psij_const=(double*)nfft_malloc(3*(2*m+2)*sizeof(double));
02197 fg_exp_l=(double*)nfft_malloc(3*(2*m+2)*sizeof(double));
02198
02199 nfft_3d_init_fg_exp_l(fg_exp_l, m, ths->b[0]);
02200 nfft_3d_init_fg_exp_l(fg_exp_l+2*m+2, m, ths->b[1]);
02201 nfft_3d_init_fg_exp_l(fg_exp_l+2*(2*m+2), m, ths->b[2]);
02202
02203 for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj+=3)
02204 {
02205 fg_psij0 = ths->psi[2*j*3];
02206 fg_psij1 = ths->psi[2*j*3+1];
02207 fg_psij2 = 1.0;
02208 psij_const[0] = fg_psij0;
02209 for(l=1; l<=2*m+1; l++)
02210 {
02211 fg_psij2 *= fg_psij1;
02212 psij_const[l] = fg_psij0*fg_psij2*fg_exp_l[l];
02213 }
02214
02215 fg_psij0 = ths->psi[2*(j*3+1)];
02216 fg_psij1 = ths->psi[2*(j*3+1)+1];
02217 fg_psij2 = 1.0;
02218 psij_const[2*m+2] = fg_psij0;
02219 for(l=1; l<=2*m+1; l++)
02220 {
02221 fg_psij2 *= fg_psij1;
02222 psij_const[2*m+2+l] = fg_psij0*fg_psij2*fg_exp_l[2*m+2+l];
02223 }
02224
02225 fg_psij0 = ths->psi[2*(j*3+2)];
02226 fg_psij1 = ths->psi[2*(j*3+2)+1];
02227 fg_psij2 = 1.0;
02228 psij_const[2*(2*m+2)] = fg_psij0;
02229 for(l=1; l<=2*m+1; l++)
02230 {
02231 fg_psij2 *= fg_psij1;
02232 psij_const[2*(2*m+2)+l] = fg_psij0*fg_psij2*fg_exp_l[2*(2*m+2)+l];
02233 }
02234
02235 nfft_trafo_3d_compute(fj, g, psij_const, psij_const+2*m+2, psij_const+(2*m+2)*2, xj, xj+1, xj+2, n0, n1, n2, m);
02236 }
02237 nfft_free(fg_exp_l);
02238 nfft_free(psij_const);
02239 return;
02240 }
02241
02242 if(ths->nfft_flags & FG_PSI)
02243 {
02244 psij_const=(double*)nfft_malloc(3*(2*m+2)*sizeof(double));
02245 fg_exp_l=(double*)nfft_malloc(3*(2*m+2)*sizeof(double));
02246
02247 nfft_3d_init_fg_exp_l(fg_exp_l, m, ths->b[0]);
02248 nfft_3d_init_fg_exp_l(fg_exp_l+2*m+2, m, ths->b[1]);
02249 nfft_3d_init_fg_exp_l(fg_exp_l+2*(2*m+2), m, ths->b[2]);
02250
02251 for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj+=3)
02252 {
02253 nfft_uo(ths,j,&u,&o,0);
02254 fg_psij0 = (PHI(*xj-((double)u)/n0,0));
02255 fg_psij1 = exp(2.0*(n0*(*xj) - u)/ths->b[0]);
02256 fg_psij2 = 1.0;
02257 psij_const[0] = fg_psij0;
02258 for(l=1; l<=2*m+1; l++)
02259 {
02260 fg_psij2 *= fg_psij1;
02261 psij_const[l] = fg_psij0*fg_psij2*fg_exp_l[l];
02262 }
02263
02264 nfft_uo(ths,j,&u,&o,1);
02265 fg_psij0 = (PHI(*(xj+1)-((double)u)/n1,1));
02266 fg_psij1 = exp(2.0*(n1*(*(xj+1)) - u)/ths->b[1]);
02267 fg_psij2 = 1.0;
02268 psij_const[2*m+2] = fg_psij0;
02269 for(l=1; l<=2*m+1; l++)
02270 {
02271 fg_psij2 *= fg_psij1;
02272 psij_const[2*m+2+l] = fg_psij0*fg_psij2*fg_exp_l[2*m+2+l];
02273 }
02274
02275 nfft_uo(ths,j,&u,&o,2);
02276 fg_psij0 = (PHI(*(xj+2)-((double)u)/n2,2));
02277 fg_psij1 = exp(2.0*(n2*(*(xj+2)) - u)/ths->b[2]);
02278 fg_psij2 = 1.0;
02279 psij_const[2*(2*m+2)] = fg_psij0;
02280 for(l=1; l<=2*m+1; l++)
02281 {
02282 fg_psij2 *= fg_psij1;
02283 psij_const[2*(2*m+2)+l] = fg_psij0*fg_psij2*fg_exp_l[2*(2*m+2)+l];
02284 }
02285
02286 nfft_trafo_3d_compute(fj, g, psij_const, psij_const+2*m+2, psij_const+(2*m+2)*2, xj, xj+1, xj+2, n0, n1, n2, m);
02287 }
02288 nfft_free(fg_exp_l);
02289 nfft_free(psij_const);
02290 return;
02291 }
02292
02293 if(ths->nfft_flags & PRE_LIN_PSI)
02294 {
02295 psij_const=(double*)nfft_malloc(3*(2*m+2)*sizeof(double));
02296 K=ths->K;
02297 ip_s=K/(m+2);
02298
02299 for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj+=3)
02300 {
02301 nfft_uo(ths,j,&u,&o,0);
02302 ip_y = fabs(n0*(*(xj+0)) - u)*((double)ip_s);
02303 ip_u = LRINT(floor(ip_y));
02304 ip_w = ip_y-ip_u;
02305 for(l=0; l < 2*m+2; l++)
02306 psij_const[l] = ths->psi[abs(ip_u-l*ip_s)]*(1.0-ip_w) +
02307 ths->psi[abs(ip_u-l*ip_s+1)]*(ip_w);
02308
02309 nfft_uo(ths,j,&u,&o,1);
02310 ip_y = fabs(n1*(*(xj+1)) - u)*((double)ip_s);
02311 ip_u = LRINT(floor(ip_y));
02312 ip_w = ip_y-ip_u;
02313 for(l=0; l < 2*m+2; l++)
02314 psij_const[2*m+2+l] = ths->psi[(K+1)+abs(ip_u-l*ip_s)]*(1.0-ip_w) +
02315 ths->psi[(K+1)+abs(ip_u-l*ip_s+1)]*(ip_w);
02316
02317 nfft_uo(ths,j,&u,&o,2);
02318 ip_y = fabs(n2*(*(xj+2)) - u)*((double)ip_s);
02319 ip_u = LRINT(floor(ip_y));
02320 ip_w = ip_y-ip_u;
02321 for(l=0; l < 2*m+2; l++)
02322 psij_const[2*(2*m+2)+l] = ths->psi[2*(K+1)+abs(ip_u-l*ip_s)]*(1.0-ip_w) +
02323 ths->psi[2*(K+1)+abs(ip_u-l*ip_s+1)]*(ip_w);
02324
02325 nfft_trafo_3d_compute(fj, g, psij_const, psij_const+2*m+2, psij_const+(2*m+2)*2, xj, xj+1, xj+2, n0, n1, n2, m);
02326 }
02327 nfft_free(psij_const);
02328 return;
02329 }
02330
02331
02332 psij_const=(double*)nfft_malloc(3*(2*m+2)*sizeof(double));
02333 for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj+=3)
02334 {
02335 nfft_uo(ths,j,&u,&o,0);
02336 for(l=0;l<=2*m+1;l++)
02337 psij_const[l]=(PHI(*xj-((double)((u+l)))/n0,0));
02338
02339 nfft_uo(ths,j,&u,&o,1);
02340 for(l=0;l<=2*m+1;l++)
02341 psij_const[2*m+2+l]=(PHI(*(xj+1)-((double)((u+l)))/n1,1));
02342
02343 nfft_uo(ths,j,&u,&o,2);
02344 for(l=0;l<=2*m+1;l++)
02345 psij_const[2*(2*m+2)+l]=(PHI(*(xj+2)-((double)((u+l)))/n2,2));
02346
02347 nfft_trafo_3d_compute(fj, g, psij_const, psij_const+2*m+2, psij_const+(2*m+2)*2, xj, xj+1, xj+2, n0, n1, n2, m);
02348 }
02349 nfft_free(psij_const);
02350 }
02351
02352 static void nfft_adjoint_3d_B(nfft_plan *ths)
02353 {
02354 int n0,N0,n1,N1,n2,N2,u,o,j,M,l,m, *psi_index_g,K,ip_s,ip_u;
02355 double _Complex *fj,*g;
02356 double *psij, *psij_const, *xj, ip_y, ip_w;
02357
02358 double *fg_exp_l, fg_psij0, fg_psij1, fg_psij2;
02359
02360 N0=ths->N[0];
02361 n0=ths->n[0];
02362 N1=ths->N[1];
02363 n1=ths->n[1];
02364 N2=ths->N[2];
02365 n2=ths->n[2];
02366 M=ths->M_total;
02367 m=ths->m;
02368
02369 g=ths->g;
02370 memset(g,0,ths->n_total*sizeof(double _Complex));
02371
02372 if(ths->nfft_flags & PRE_FULL_PSI)
02373 {
02374 psi_index_g=ths->psi_index_g;
02375 for(j=0, fj=ths->f, psij=ths->psi; j<M; j++, fj++)
02376 for(l=0; l<(2*m+2)*(2*m+2)*(2*m+2); l++)
02377 g[(*psi_index_g++)] += (*psij++) * (*fj);
02378 return;
02379 }
02380
02381 if(ths->nfft_flags & PRE_PSI)
02382 {
02383 for(j=0,fj=ths->f,xj=ths->x; j<M; j++,fj++,xj+=3)
02384 nfft_adjoint_3d_compute(fj, g, ths->psi+j*3*(2*m+2), ths->psi+(j*3+1)*(2*m+2), ths->psi+(j*3+2)*(2*m+2), xj, xj+1, xj+2, n0, n1, n2, m);
02385 return;
02386 }
02387
02388 if(ths->nfft_flags & PRE_FG_PSI)
02389 {
02390 psij_const=(double*)nfft_malloc(3*(2*m+2)*sizeof(double));
02391 fg_exp_l=(double*)nfft_malloc(3*(2*m+2)*sizeof(double));
02392
02393 nfft_3d_init_fg_exp_l(fg_exp_l, m, ths->b[0]);
02394 nfft_3d_init_fg_exp_l(fg_exp_l+2*m+2, m, ths->b[1]);
02395 nfft_3d_init_fg_exp_l(fg_exp_l+2*(2*m+2), m, ths->b[2]);
02396
02397 for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj+=3)
02398 {
02399 fg_psij0 = ths->psi[2*j*3];
02400 fg_psij1 = ths->psi[2*j*3+1];
02401 fg_psij2 = 1.0;
02402 psij_const[0] = fg_psij0;
02403 for(l=1; l<=2*m+1; l++)
02404 {
02405 fg_psij2 *= fg_psij1;
02406 psij_const[l] = fg_psij0*fg_psij2*fg_exp_l[l];
02407 }
02408
02409 fg_psij0 = ths->psi[2*(j*3+1)];
02410 fg_psij1 = ths->psi[2*(j*3+1)+1];
02411 fg_psij2 = 1.0;
02412 psij_const[2*m+2] = fg_psij0;
02413 for(l=1; l<=2*m+1; l++)
02414 {
02415 fg_psij2 *= fg_psij1;
02416 psij_const[2*m+2+l] = fg_psij0*fg_psij2*fg_exp_l[2*m+2+l];
02417 }
02418
02419 fg_psij0 = ths->psi[2*(j*3+2)];
02420 fg_psij1 = ths->psi[2*(j*3+2)+1];
02421 fg_psij2 = 1.0;
02422 psij_const[2*(2*m+2)] = fg_psij0;
02423 for(l=1; l<=2*m+1; l++)
02424 {
02425 fg_psij2 *= fg_psij1;
02426 psij_const[2*(2*m+2)+l] = fg_psij0*fg_psij2*fg_exp_l[2*(2*m+2)+l];
02427 }
02428
02429 nfft_adjoint_3d_compute(fj, g, psij_const, psij_const+2*m+2, psij_const+(2*m+2)*2, xj, xj+1, xj+2, n0, n1, n2, m);
02430 }
02431 nfft_free(fg_exp_l);
02432 nfft_free(psij_const);
02433 return;
02434 }
02435
02436 if(ths->nfft_flags & FG_PSI)
02437 {
02438 psij_const=(double*)nfft_malloc(3*(2*m+2)*sizeof(double));
02439 fg_exp_l=(double*)nfft_malloc(3*(2*m+2)*sizeof(double));
02440
02441 nfft_3d_init_fg_exp_l(fg_exp_l, m, ths->b[0]);
02442 nfft_3d_init_fg_exp_l(fg_exp_l+2*m+2, m, ths->b[1]);
02443 nfft_3d_init_fg_exp_l(fg_exp_l+2*(2*m+2), m, ths->b[2]);
02444
02445 for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj+=3)
02446 {
02447 nfft_uo(ths,j,&u,&o,0);
02448 fg_psij0 = (PHI(*xj-((double)u)/n0,0));
02449 fg_psij1 = exp(2.0*(n0*(*xj) - u)/ths->b[0]);
02450 fg_psij2 = 1.0;
02451 psij_const[0] = fg_psij0;
02452 for(l=1; l<=2*m+1; l++)
02453 {
02454 fg_psij2 *= fg_psij1;
02455 psij_const[l] = fg_psij0*fg_psij2*fg_exp_l[l];
02456 }
02457
02458 nfft_uo(ths,j,&u,&o,1);
02459 fg_psij0 = (PHI(*(xj+1)-((double)u)/n1,1));
02460 fg_psij1 = exp(2.0*(n1*(*(xj+1)) - u)/ths->b[1]);
02461 fg_psij2 = 1.0;
02462 psij_const[2*m+2] = fg_psij0;
02463 for(l=1; l<=2*m+1; l++)
02464 {
02465 fg_psij2 *= fg_psij1;
02466 psij_const[2*m+2+l] = fg_psij0*fg_psij2*fg_exp_l[2*m+2+l];
02467 }
02468
02469 nfft_uo(ths,j,&u,&o,2);
02470 fg_psij0 = (PHI(*(xj+2)-((double)u)/n2,2));
02471 fg_psij1 = exp(2.0*(n2*(*(xj+2)) - u)/ths->b[2]);
02472 fg_psij2 = 1.0;
02473 psij_const[2*(2*m+2)] = fg_psij0;
02474 for(l=1; l<=2*m+1; l++)
02475 {
02476 fg_psij2 *= fg_psij1;
02477 psij_const[2*(2*m+2)+l] = fg_psij0*fg_psij2*fg_exp_l[2*(2*m+2)+l];
02478 }
02479
02480 nfft_adjoint_3d_compute(fj, g, psij_const, psij_const+2*m+2, psij_const+(2*m+2)*2, xj, xj+1, xj+2, n0, n1, n2, m);
02481 }
02482 nfft_free(fg_exp_l);
02483 nfft_free(psij_const);
02484 return;
02485 }
02486
02487 if(ths->nfft_flags & PRE_LIN_PSI)
02488 {
02489 psij_const=(double*)nfft_malloc(3*(2*m+2)*sizeof(double));
02490 K=ths->K;
02491 ip_s=K/(m+2);
02492
02493 for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj+=3)
02494 {
02495 nfft_uo(ths,j,&u,&o,0);
02496 ip_y = fabs(n0*(*(xj+0)) - u)*((double)ip_s);
02497 ip_u = LRINT(floor(ip_y));
02498 ip_w = ip_y-ip_u;
02499 for(l=0; l < 2*m+2; l++)
02500 psij_const[l] = ths->psi[abs(ip_u-l*ip_s)]*(1.0-ip_w) +
02501 ths->psi[abs(ip_u-l*ip_s+1)]*(ip_w);
02502
02503 nfft_uo(ths,j,&u,&o,1);
02504 ip_y = fabs(n1*(*(xj+1)) - u)*((double)ip_s);
02505 ip_u = LRINT(floor(ip_y));
02506 ip_w = ip_y-ip_u;
02507 for(l=0; l < 2*m+2; l++)
02508 psij_const[2*m+2+l] = ths->psi[(K+1)+abs(ip_u-l*ip_s)]*(1.0-ip_w) +
02509 ths->psi[(K+1)+abs(ip_u-l*ip_s+1)]*(ip_w);
02510
02511 nfft_uo(ths,j,&u,&o,2);
02512 ip_y = fabs(n2*(*(xj+2)) - u)*((double)ip_s);
02513 ip_u = LRINT(floor(ip_y));
02514 ip_w = ip_y-ip_u;
02515 for(l=0; l < 2*m+2; l++)
02516 psij_const[2*(2*m+2)+l] = ths->psi[2*(K+1)+abs(ip_u-l*ip_s)]*(1.0-ip_w) +
02517 ths->psi[2*(K+1)+abs(ip_u-l*ip_s+1)]*(ip_w);
02518
02519 nfft_adjoint_3d_compute(fj, g, psij_const, psij_const+2*m+2, psij_const+(2*m+2)*2, xj, xj+1, xj+2, n0, n1, n2, m);
02520 }
02521 nfft_free(psij_const);
02522 return;
02523 }
02524
02525
02526 psij_const=(double*)nfft_malloc(3*(2*m+2)*sizeof(double));
02527 for(j=0,fj=ths->f,xj=ths->x;j<M;j++,fj++,xj+=3)
02528 {
02529 nfft_uo(ths,j,&u,&o,0);
02530 for(l=0;l<=2*m+1;l++)
02531 psij_const[l]=(PHI(*xj-((double)((u+l)))/n0,0));
02532
02533 nfft_uo(ths,j,&u,&o,1);
02534 for(l=0;l<=2*m+1;l++)
02535 psij_const[2*m+2+l]=(PHI(*(xj+1)-((double)((u+l)))/n1,1));
02536
02537 nfft_uo(ths,j,&u,&o,2);
02538 for(l=0;l<=2*m+1;l++)
02539 psij_const[2*(2*m+2)+l]=(PHI(*(xj+2)-((double)((u+l)))/n2,2));
02540
02541 nfft_adjoint_3d_compute(fj, g, psij_const, psij_const+2*m+2, psij_const+(2*m+2)*2, xj, xj+1, xj+2, n0, n1, n2, m);
02542 }
02543 nfft_free(psij_const);
02544 }
02545
02546 void nfft_trafo_3d(nfft_plan *ths)
02547 {
02548 int k0,k1,k2,n0,n1,n2,N0,N1,N2;
02549 double _Complex *g_hat,*f_hat;
02550 double *c_phi_inv01, *c_phi_inv02, *c_phi_inv11, *c_phi_inv12, *c_phi_inv21, *c_phi_inv22;
02551 double ck01, ck02, ck11, ck12, ck21, ck22;
02552 double _Complex *g_hat111,*f_hat111,*g_hat211,*f_hat211,*g_hat121,*f_hat121,*g_hat221,*f_hat221;
02553 double _Complex *g_hat112,*f_hat112,*g_hat212,*f_hat212,*g_hat122,*f_hat122,*g_hat222,*f_hat222;
02554
02555 ths->g_hat=ths->g1;
02556 ths->g=ths->g2;
02557
02558 N0=ths->N[0];
02559 N1=ths->N[1];
02560 N2=ths->N[2];
02561 n0=ths->n[0];
02562 n1=ths->n[1];
02563 n2=ths->n[2];
02564
02565 f_hat=ths->f_hat;
02566 g_hat=ths->g_hat;
02567
02568 TIC(0)
02569 memset(ths->g_hat,0,ths->n_total*sizeof(double _Complex));
02570 if(ths->nfft_flags & PRE_PHI_HUT)
02571 {
02572 c_phi_inv01=ths->c_phi_inv[0];
02573 c_phi_inv02=&ths->c_phi_inv[0][N0/2];
02574
02575 for(k0=0;k0<N0/2;k0++)
02576 {
02577 ck01=(*c_phi_inv01++);
02578 ck02=(*c_phi_inv02++);
02579 c_phi_inv11=ths->c_phi_inv[1];
02580 c_phi_inv12=&ths->c_phi_inv[1][N1/2];
02581
02582 for(k1=0;k1<N1/2;k1++)
02583 {
02584 ck11=(*c_phi_inv11++);
02585 ck12=(*c_phi_inv12++);
02586 c_phi_inv21=ths->c_phi_inv[2];
02587 c_phi_inv22=&ths->c_phi_inv[2][N2/2];
02588
02589 g_hat111=g_hat + ((n0-N0/2+k0)*n1+n1-N1/2+k1)*n2+n2-N2/2;
02590 f_hat111=f_hat + (k0*N1+k1)*N2;
02591 g_hat211=g_hat + (k0*n1+n1-N1/2+k1)*n2+n2-N2/2;
02592 f_hat211=f_hat + ((N0/2+k0)*N1+k1)*N2;
02593 g_hat121=g_hat + ((n0-N0/2+k0)*n1+k1)*n2+n2-N2/2;
02594 f_hat121=f_hat + (k0*N1+N1/2+k1)*N2;
02595 g_hat221=g_hat + (k0*n1+k1)*n2+n2-N2/2;
02596 f_hat221=f_hat + ((N0/2+k0)*N1+N1/2+k1)*N2;
02597
02598 g_hat112=g_hat + ((n0-N0/2+k0)*n1+n1-N1/2+k1)*n2;
02599 f_hat112=f_hat + (k0*N1+k1)*N2+N2/2;
02600 g_hat212=g_hat + (k0*n1+n1-N1/2+k1)*n2;
02601 f_hat212=f_hat + ((N0/2+k0)*N1+k1)*N2+N2/2;
02602 g_hat122=g_hat + ((n0-N0/2+k0)*n1+k1)*n2;
02603 f_hat122=f_hat + (k0*N1+N1/2+k1)*N2+N2/2;
02604 g_hat222=g_hat + (k0*n1+k1)*n2;
02605 f_hat222=f_hat + ((N0/2+k0)*N1+N1/2+k1)*N2+N2/2;
02606
02607 for(k2=0;k2<N2/2;k2++)
02608 {
02609 ck21=(*c_phi_inv21++);
02610 ck22=(*c_phi_inv22++);
02611
02612 (*g_hat111++) = (*f_hat111++) * ck01 * ck11 * ck21;
02613 (*g_hat211++) = (*f_hat211++) * ck02 * ck11 * ck21;
02614 (*g_hat121++) = (*f_hat121++) * ck01 * ck12 * ck21;
02615 (*g_hat221++) = (*f_hat221++) * ck02 * ck12 * ck21;
02616
02617 (*g_hat112++) = (*f_hat112++) * ck01 * ck11 * ck22;
02618 (*g_hat212++) = (*f_hat212++) * ck02 * ck11 * ck22;
02619 (*g_hat122++) = (*f_hat122++) * ck01 * ck12 * ck22;
02620 (*g_hat222++) = (*f_hat222++) * ck02 * ck12 * ck22;
02621 }
02622 }
02623 }
02624 }
02625 else
02626 for(k0=0;k0<N0/2;k0++)
02627 {
02628 ck01=1./(PHI_HUT(k0-N0/2,0));
02629 ck02=1./(PHI_HUT(k0,0));
02630 for(k1=0;k1<N1/2;k1++)
02631 {
02632 ck11=1./(PHI_HUT(k1-N1/2,1));
02633 ck12=1./(PHI_HUT(k1,1));
02634
02635 for(k2=0;k2<N2/2;k2++)
02636 {
02637 ck21=1./(PHI_HUT(k2-N2/2,2));
02638 ck22=1./(PHI_HUT(k2,2));
02639
02640 g_hat[((n0-N0/2+k0)*n1+n1-N1/2+k1)*n2+n2-N2/2+k2] = f_hat[(k0*N1+k1)*N2+k2] * ck01 * ck11 * ck21;
02641 g_hat[(k0*n1+n1-N1/2+k1)*n2+n2-N2/2+k2] = f_hat[((N0/2+k0)*N1+k1)*N2+k2] * ck02 * ck11 * ck21;
02642 g_hat[((n0-N0/2+k0)*n1+k1)*n2+n2-N2/2+k2] = f_hat[(k0*N1+N1/2+k1)*N2+k2] * ck01 * ck12 * ck21;
02643 g_hat[(k0*n1+k1)*n2+n2-N2/2+k2] = f_hat[((N0/2+k0)*N1+N1/2+k1)*N2+k2] * ck02 * ck12 * ck21;
02644
02645 g_hat[((n0-N0/2+k0)*n1+n1-N1/2+k1)*n2+k2] = f_hat[(k0*N1+k1)*N2+N2/2+k2] * ck01 * ck11 * ck22;
02646 g_hat[(k0*n1+n1-N1/2+k1)*n2+k2] = f_hat[((N0/2+k0)*N1+k1)*N2+N2/2+k2] * ck02 * ck11 * ck22;
02647 g_hat[((n0-N0/2+k0)*n1+k1)*n2+k2] = f_hat[(k0*N1+N1/2+k1)*N2+N2/2+k2] * ck01 * ck12 * ck22;
02648 g_hat[(k0*n1+k1)*n2+k2] = f_hat[((N0/2+k0)*N1+N1/2+k1)*N2+N2/2+k2] * ck02 * ck12 * ck22;
02649 }
02650 }
02651 }
02652
02653 TOC(0)
02654
02655 TIC_FFTW(1)
02656 fftw_execute(ths->my_fftw_plan1);
02657 TOC_FFTW(1);
02658
02659 TIC(2);
02660 nfft_trafo_3d_B(ths);
02661 TOC(2);
02662 }
02663
02664 void nfft_adjoint_3d(nfft_plan *ths)
02665 {
02666 int k0,k1,k2,n0,n1,n2,N0,N1,N2;
02667 double _Complex *g_hat,*f_hat;
02668 double *c_phi_inv01, *c_phi_inv02, *c_phi_inv11, *c_phi_inv12, *c_phi_inv21, *c_phi_inv22;
02669 double ck01, ck02, ck11, ck12, ck21, ck22;
02670 double _Complex *g_hat111,*f_hat111,*g_hat211,*f_hat211,*g_hat121,*f_hat121,*g_hat221,*f_hat221;
02671 double _Complex *g_hat112,*f_hat112,*g_hat212,*f_hat212,*g_hat122,*f_hat122,*g_hat222,*f_hat222;
02672
02673 ths->g_hat=ths->g1;
02674 ths->g=ths->g2;
02675
02676 N0=ths->N[0];
02677 N1=ths->N[1];
02678 N2=ths->N[2];
02679 n0=ths->n[0];
02680 n1=ths->n[1];
02681 n2=ths->n[2];
02682
02683 f_hat=ths->f_hat;
02684 g_hat=ths->g_hat;
02685
02686 TIC(2);
02687 nfft_adjoint_3d_B(ths);
02688 TOC(2);
02689
02690 TIC_FFTW(1)
02691 fftw_execute(ths->my_fftw_plan2);
02692 TOC_FFTW(1);
02693
02694 TIC(0)
02695 if(ths->nfft_flags & PRE_PHI_HUT)
02696 {
02697 c_phi_inv01=ths->c_phi_inv[0];
02698 c_phi_inv02=&ths->c_phi_inv[0][N0/2];
02699
02700 for(k0=0;k0<N0/2;k0++)
02701 {
02702 ck01=(*c_phi_inv01++);
02703 ck02=(*c_phi_inv02++);
02704 c_phi_inv11=ths->c_phi_inv[1];
02705 c_phi_inv12=&ths->c_phi_inv[1][N1/2];
02706
02707 for(k1=0;k1<N1/2;k1++)
02708 {
02709 ck11=(*c_phi_inv11++);
02710 ck12=(*c_phi_inv12++);
02711 c_phi_inv21=ths->c_phi_inv[2];
02712 c_phi_inv22=&ths->c_phi_inv[2][N2/2];
02713
02714 g_hat111=g_hat + ((n0-N0/2+k0)*n1+n1-N1/2+k1)*n2+n2-N2/2;
02715 f_hat111=f_hat + (k0*N1+k1)*N2;
02716 g_hat211=g_hat + (k0*n1+n1-N1/2+k1)*n2+n2-N2/2;
02717 f_hat211=f_hat + ((N0/2+k0)*N1+k1)*N2;
02718 g_hat121=g_hat + ((n0-N0/2+k0)*n1+k1)*n2+n2-N2/2;
02719 f_hat121=f_hat + (k0*N1+N1/2+k1)*N2;
02720 g_hat221=g_hat + (k0*n1+k1)*n2+n2-N2/2;
02721 f_hat221=f_hat + ((N0/2+k0)*N1+N1/2+k1)*N2;
02722
02723 g_hat112=g_hat + ((n0-N0/2+k0)*n1+n1-N1/2+k1)*n2;
02724 f_hat112=f_hat + (k0*N1+k1)*N2+N2/2;
02725 g_hat212=g_hat + (k0*n1+n1-N1/2+k1)*n2;
02726 f_hat212=f_hat + ((N0/2+k0)*N1+k1)*N2+N2/2;
02727 g_hat122=g_hat + ((n0-N0/2+k0)*n1+k1)*n2;
02728 f_hat122=f_hat + (k0*N1+N1/2+k1)*N2+N2/2;
02729 g_hat222=g_hat + (k0*n1+k1)*n2;
02730 f_hat222=f_hat + ((N0/2+k0)*N1+N1/2+k1)*N2+N2/2;
02731
02732 for(k2=0;k2<N2/2;k2++)
02733 {
02734 ck21=(*c_phi_inv21++);
02735 ck22=(*c_phi_inv22++);
02736
02737 (*f_hat111++) = (*g_hat111++) * ck01 * ck11 * ck21;
02738 (*f_hat211++) = (*g_hat211++) * ck02 * ck11 * ck21;
02739 (*f_hat121++) = (*g_hat121++) * ck01 * ck12 * ck21;
02740 (*f_hat221++) = (*g_hat221++) * ck02 * ck12 * ck21;
02741
02742 (*f_hat112++) = (*g_hat112++) * ck01 * ck11 * ck22;
02743 (*f_hat212++) = (*g_hat212++) * ck02 * ck11 * ck22;
02744 (*f_hat122++) = (*g_hat122++) * ck01 * ck12 * ck22;
02745 (*f_hat222++) = (*g_hat222++) * ck02 * ck12 * ck22;
02746 }
02747 }
02748 }
02749 }
02750 else
02751 for(k0=0;k0<N0/2;k0++)
02752 {
02753 ck01=1./(PHI_HUT(k0-N0/2,0));
02754 ck02=1./(PHI_HUT(k0,0));
02755 for(k1=0;k1<N1/2;k1++)
02756 {
02757 ck11=1./(PHI_HUT(k1-N1/2,1));
02758 ck12=1./(PHI_HUT(k1,1));
02759
02760 for(k2=0;k2<N2/2;k2++)
02761 {
02762 ck21=1./(PHI_HUT(k2-N2/2,2));
02763 ck22=1./(PHI_HUT(k2,2));
02764
02765 f_hat[(k0*N1+k1)*N2+k2] = g_hat[((n0-N0/2+k0)*n1+n1-N1/2+k1)*n2+n2-N2/2+k2] * ck01 * ck11 * ck21;
02766 f_hat[((N0/2+k0)*N1+k1)*N2+k2] = g_hat[(k0*n1+n1-N1/2+k1)*n2+n2-N2/2+k2] * ck02 * ck11 * ck21;
02767 f_hat[(k0*N1+N1/2+k1)*N2+k2] = g_hat[((n0-N0/2+k0)*n1+k1)*n2+n2-N2/2+k2] * ck01 * ck12 * ck21;
02768 f_hat[((N0/2+k0)*N1+N1/2+k1)*N2+k2] = g_hat[(k0*n1+k1)*n2+n2-N2/2+k2] * ck02 * ck12 * ck21;
02769
02770 f_hat[(k0*N1+k1)*N2+N2/2+k2] = g_hat[((n0-N0/2+k0)*n1+n1-N1/2+k1)*n2+k2] * ck01 * ck11 * ck22;
02771 f_hat[((N0/2+k0)*N1+k1)*N2+N2/2+k2] = g_hat[(k0*n1+n1-N1/2+k1)*n2+k2] * ck02 * ck11 * ck22;
02772 f_hat[(k0*N1+N1/2+k1)*N2+N2/2+k2] = g_hat[((n0-N0/2+k0)*n1+k1)*n2+k2] * ck01 * ck12 * ck22;
02773 f_hat[((N0/2+k0)*N1+N1/2+k1)*N2+N2/2+k2] = g_hat[(k0*n1+k1)*n2+k2] * ck02 * ck12 * ck22;
02774 }
02775 }
02776 }
02777
02778 TOC(0)
02779 }
02780
02783 void nfft_trafo(nfft_plan *ths)
02784 {
02785 switch(ths->d)
02786 {
02787 case 1: nfft_trafo_1d(ths); break;
02788 case 2: nfft_trafo_2d(ths); break;
02789 case 3: nfft_trafo_3d(ths); break;
02790 default:
02791
02792 ths->g_hat=ths->g1;
02793 ths->g=ths->g2;
02794
02798 TIC(0)
02799 nfft_D_A(ths);
02800 TOC(0)
02801
02802
02806 TIC_FFTW(1)
02807 fftw_execute(ths->my_fftw_plan1);
02808 TOC_FFTW(1)
02809
02810
02813 TIC(2)
02814 nfft_B_A(ths);
02815 TOC(2)
02816 }
02817 }
02818
02819 void nfft_adjoint(nfft_plan *ths)
02820 {
02821 switch(ths->d)
02822 {
02823 case 1: nfft_adjoint_1d(ths); break;
02824 case 2: nfft_adjoint_2d(ths); break;
02825 case 3: nfft_adjoint_3d(ths); break;
02826 default:
02827
02828 ths->g_hat=ths->g1;
02829 ths->g=ths->g2;
02830
02834 TIC(2)
02835 nfft_B_T(ths);
02836 TOC(2)
02837
02838
02842 TIC_FFTW(1)
02843 fftw_execute(ths->my_fftw_plan2);
02844 TOC_FFTW(1)
02845
02846
02849 TIC(0)
02850 nfft_D_T(ths);
02851 TOC(0)
02852 }
02853 }
02854
02855
02858 static void nfft_precompute_phi_hut(nfft_plan *ths)
02859 {
02860 int ks[ths->d];
02861 int t;
02863 ths->c_phi_inv = (double**) nfft_malloc(ths->d*sizeof(double*));
02864
02865 for(t=0; t<ths->d; t++)
02866 {
02867 ths->c_phi_inv[t]= (double*)nfft_malloc(ths->N[t]*sizeof(double));
02868 for(ks[t]=0; ks[t]<ths->N[t]; ks[t]++)
02869 ths->c_phi_inv[t][ks[t]]= 1.0/(PHI_HUT(ks[t]-ths->N[t]/2,t));
02870 }
02871 }
02872
02878 void nfft_precompute_lin_psi(nfft_plan *ths)
02879 {
02880 int t;
02881 int j;
02882 double step;
02884 for (t=0; t<ths->d; t++)
02885 {
02886 step=((double)(ths->m+2))/(ths->K*ths->n[t]);
02887 for(j=0;j<=ths->K;j++)
02888 {
02889 ths->psi[(ths->K+1)*t + j] = PHI(j*step,t);
02890 }
02891 }
02892 }
02893
02894 static void nfft_precompute_fg_psi(nfft_plan *ths)
02895 {
02896 int t;
02897 int j;
02898 int u, o;
02900 for (t=0; t<ths->d; t++)
02901 for(j=0;j<ths->M_total;j++)
02902 {
02903 nfft_uo(ths,j,&u,&o,t);
02904
02905 ths->psi[2*(j*ths->d+t)]=
02906 (PHI((ths->x[j*ths->d+t]-((double)u)/ths->n[t]),t));
02907
02908 ths->psi[2*(j*ths->d+t)+1]=
02909 exp(2.0*(ths->n[t]*ths->x[j*ths->d+t] - u) / ths->b[t]);
02910 }
02911
02912 }
02913
02914 void nfft_precompute_psi(nfft_plan *ths)
02915 {
02916 int t;
02917 int j;
02918 int l;
02919 int lj;
02920 int u, o;
02922 for (t=0; t<ths->d; t++)
02923 for(j=0;j<ths->M_total;j++)
02924 {
02925 nfft_uo(ths,j,&u,&o,t);
02926
02927 for(l=u, lj=0; l <= o; l++, lj++)
02928 ths->psi[(j*ths->d+t)*(2*ths->m+2)+lj]=
02929 (PHI((ths->x[j*ths->d+t]-((double)l)/ths->n[t]),t));
02930 }
02931
02932 }
02933
02934 void nfft_precompute_full_psi(nfft_plan *ths)
02935 {
02936 int t,t2;
02937 int j;
02938 int l_L;
02939 int l[ths->d];
02940 int lj[ths->d];
02941 int ll_plain[ths->d+1];
02942 int lprod;
02943 int u[ths->d], o[ths->d];
02945 double phi_prod[ths->d+1];
02946
02947 int ix,ix_old;
02948
02949 phi_prod[0]=1;
02950 ll_plain[0]=0;
02951
02952 for(t=0,lprod = 1; t<ths->d; t++)
02953 lprod *= 2*ths->m+2;
02954
02955 for(j=0,ix=0,ix_old=0; j<ths->M_total; j++)
02956 {
02957 MACRO_init_uo_l_lj_t;
02958
02959 for(l_L=0; l_L<lprod; l_L++, ix++)
02960 {
02961 MACRO_update_phi_prod_ll_plain(without_PRE_PSI);
02962
02963 ths->psi_index_g[ix]=ll_plain[ths->d];
02964 ths->psi[ix]=phi_prod[ths->d];
02965
02966 MACRO_count_uo_l_lj_t;
02967 }
02968
02969
02970 ths->psi_index_f[j]=ix-ix_old;
02971 ix_old=ix;
02972 }
02973 }
02974
02975 void nfft_precompute_one_psi(nfft_plan *ths)
02976 {
02977 if(ths->nfft_flags & PRE_LIN_PSI)
02978 nfft_precompute_lin_psi(ths);
02979 if(ths->nfft_flags & PRE_FG_PSI)
02980 nfft_precompute_fg_psi(ths);
02981 if(ths->nfft_flags & PRE_PSI)
02982 nfft_precompute_psi(ths);
02983 if(ths->nfft_flags & PRE_FULL_PSI)
02984 nfft_precompute_full_psi(ths);
02985 }
02986
02987
02988 static void nfft_init_help(nfft_plan *ths)
02989 {
02990 int t;
02991 int lprod;
02993 ths->N_total=nfft_prod_int(ths->N, ths->d);
02994 ths->n_total=nfft_prod_int(ths->n, ths->d);
02995
02996 ths->sigma = (double*) nfft_malloc(ths->d*sizeof(double));
02997 for(t = 0;t < ths->d; t++)
02998 ths->sigma[t] = ((double)ths->n[t])/ths->N[t];
02999
03000 WINDOW_HELP_INIT;
03001
03002 if(ths->nfft_flags & MALLOC_X)
03003 ths->x = (double*)nfft_malloc(ths->d*ths->M_total*sizeof(double));
03004
03005 if(ths->nfft_flags & MALLOC_F_HAT)
03006 ths->f_hat = (double _Complex*)nfft_malloc(ths->N_total*sizeof(double _Complex));
03007
03008 if(ths->nfft_flags & MALLOC_F)
03009 ths->f = (double _Complex*)nfft_malloc(ths->M_total*sizeof(double _Complex));
03010
03011 if(ths->nfft_flags & PRE_PHI_HUT)
03012 nfft_precompute_phi_hut(ths);
03013
03014 if(ths->nfft_flags & PRE_LIN_PSI)
03015 {
03016 ths->K=(1U<< 10)*(ths->m+2);
03017 ths->psi = (double*) nfft_malloc((ths->K+1)*ths->d*sizeof(double));
03018 }
03019
03020 if(ths->nfft_flags & PRE_FG_PSI)
03021 ths->psi = (double*) nfft_malloc(ths->M_total*ths->d*2*sizeof(double));
03022
03023 if(ths->nfft_flags & PRE_PSI)
03024 ths->psi = (double*) nfft_malloc(ths->M_total*ths->d*
03025 (2*ths->m+2)*sizeof(double));
03026
03027 if(ths->nfft_flags & PRE_FULL_PSI)
03028 {
03029 for(t=0,lprod = 1; t<ths->d; t++)
03030 lprod *= 2*ths->m+2;
03031
03032 ths->psi = (double*) nfft_malloc(ths->M_total*lprod*sizeof(double));
03033
03034 ths->psi_index_f = (int*) nfft_malloc(ths->M_total*sizeof(int));
03035 ths->psi_index_g = (int*) nfft_malloc(ths->M_total*lprod*sizeof(int));
03036 }
03037
03038 if(ths->nfft_flags & FFTW_INIT)
03039 {
03040 ths->g1=(double _Complex*)nfft_malloc(ths->n_total*sizeof(double _Complex));
03041
03042 if(ths->nfft_flags & FFT_OUT_OF_PLACE)
03043 ths->g2 = (double _Complex*) nfft_malloc(ths->n_total*sizeof(double _Complex));
03044 else
03045 ths->g2 = ths->g1;
03046
03047 ths->my_fftw_plan1 = fftw_plan_dft(ths->d, ths->n, ths->g1, ths->g2, FFTW_FORWARD, ths->fftw_flags);
03048 ths->my_fftw_plan2 = fftw_plan_dft(ths->d, ths->n, ths->g2, ths->g1,
03049 FFTW_BACKWARD, ths->fftw_flags);
03050 }
03051
03052 ths->mv_trafo = (void (*) (void* ))nfft_trafo;
03053 ths->mv_adjoint = (void (*) (void* ))nfft_adjoint;
03054 }
03055
03056 void nfft_init(nfft_plan *ths, int d, int *N, int M_total)
03057 {
03058 int t;
03060 ths->d = d;
03061
03062 ths->N=(int*) nfft_malloc(d*sizeof(int));
03063 for(t = 0;t < d; t++)
03064 ths->N[t] = N[t];
03065
03066 ths->M_total = M_total;
03067
03068 ths->n = (int*) nfft_malloc(d*sizeof(int));
03069 for(t = 0;t < d; t++)
03070 ths->n[t] = 2*nfft_next_power_of_2(ths->N[t]);
03071
03072 WINDOW_HELP_ESTIMATE_m;
03073
03074 ths->nfft_flags = PRE_PHI_HUT| PRE_PSI| MALLOC_X| MALLOC_F_HAT| MALLOC_F|
03075 FFTW_INIT| FFT_OUT_OF_PLACE;
03076 ths->fftw_flags= FFTW_ESTIMATE| FFTW_DESTROY_INPUT;
03077
03078 nfft_init_help(ths);
03079 }
03080
03081 void nfft_init_guru(nfft_plan *ths, int d, int *N, int M_total, int *n,
03082 int m, unsigned nfft_flags, unsigned fftw_flags)
03083 {
03084 int t;
03086 ths->d =d;
03087 ths->N= (int*) nfft_malloc(ths->d*sizeof(int));
03088 for(t=0; t<d; t++)
03089 ths->N[t]= N[t];
03090 ths->M_total= M_total;
03091 ths->n= (int*) nfft_malloc(ths->d*sizeof(int));
03092 for(t=0; t<d; t++)
03093 ths->n[t]= n[t];
03094 ths->m= m;
03095 ths->nfft_flags= nfft_flags;
03096 ths->fftw_flags= fftw_flags;
03097
03098 nfft_init_help(ths);
03099 }
03100
03101 void nfft_init_1d(nfft_plan *ths, int N1, int M_total)
03102 {
03103 int N[1];
03104
03105 N[0]=N1;
03106 nfft_init(ths,1,N,M_total);
03107 }
03108
03109 void nfft_init_2d(nfft_plan *ths, int N1, int N2, int M_total)
03110 {
03111 int N[2];
03112
03113 N[0]=N1;
03114 N[1]=N2;
03115 nfft_init(ths,2,N,M_total);
03116 }
03117
03118 void nfft_init_3d(nfft_plan *ths, int N1, int N2, int N3, int M_total)
03119 {
03120 int N[3];
03121
03122 N[0]=N1;
03123 N[1]=N2;
03124 N[2]=N3;
03125 nfft_init(ths,3,N,M_total);
03126 }
03127
03128 void nfft_check(nfft_plan *ths)
03129 {
03130 int j;
03131
03132 for(j=0;j<ths->M_total*ths->d;j++)
03133 if((ths->x[j]<-0.5) || (ths->x[j]>=0.5))
03134 fprintf(stderr,"nfft_check: ths->x[%d]=%e out of range [-0.5,0.5)\n",
03135 j,ths->x[j]);
03136
03137 for(j=0;j<ths->d;j++)
03138 {
03139 if(ths->sigma[j]<=1)
03140 fprintf(stderr,"nfft_check: oversampling factor too small\n");
03141
03142 if(ths->N[j]<=ths->m)
03143 fprintf(stderr,
03144 "nfft_check: polynomial degree N is smaller than cut-off m\n");
03145
03146 if(ths->N[j]%2==1)
03147 fprintf(stderr,"nfft_check: polynomial degree N has to be even\n");
03148 }
03149 }
03150
03151 void nfft_finalize(nfft_plan *ths)
03152 {
03153 int t;
03154
03155 if(ths->nfft_flags & FFTW_INIT)
03156 {
03157 fftw_destroy_plan(ths->my_fftw_plan2);
03158 fftw_destroy_plan(ths->my_fftw_plan1);
03159
03160 if(ths->nfft_flags & FFT_OUT_OF_PLACE)
03161 nfft_free(ths->g2);
03162
03163 nfft_free(ths->g1);
03164 }
03165
03166 if(ths->nfft_flags & PRE_FULL_PSI)
03167 {
03168 nfft_free(ths->psi_index_g);
03169 nfft_free(ths->psi_index_f);
03170 nfft_free(ths->psi);
03171 }
03172
03173 if(ths->nfft_flags & PRE_PSI)
03174 nfft_free(ths->psi);
03175
03176 if(ths->nfft_flags & PRE_FG_PSI)
03177 nfft_free(ths->psi);
03178
03179 if(ths->nfft_flags & PRE_LIN_PSI)
03180 nfft_free(ths->psi);
03181
03182 if(ths->nfft_flags & PRE_PHI_HUT)
03183 {
03184 for(t=0; t<ths->d; t++)
03185 nfft_free(ths->c_phi_inv[t]);
03186 nfft_free(ths->c_phi_inv);
03187 }
03188
03189 if(ths->nfft_flags & MALLOC_F)
03190 nfft_free(ths->f);
03191
03192 if(ths->nfft_flags & MALLOC_F_HAT)
03193 nfft_free(ths->f_hat);
03194
03195 if(ths->nfft_flags & MALLOC_X)
03196 nfft_free(ths->x);
03197
03198 WINDOW_HELP_FINALIZE;
03199
03200 nfft_free(ths->sigma);
03201 nfft_free(ths->n);
03202 nfft_free(ths->N);
03203 }