00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00027 #include "config.h"
00028
00029 #include <stdlib.h>
00030 #include <string.h>
00031 #include <stdbool.h>
00032 #include <math.h>
00033 #ifdef HAVE_COMPLEX_H
00034 #include <complex.h>
00035 #endif
00036
00037 #include "nfft3.h"
00038 #include "nfft3util.h"
00039 #include "infft.h"
00040
00041
00042
00044 #define K_START_TILDE(x,y) (NFFT_MAX(NFFT_MIN(x,y-2),0))
00045
00047 #define K_END_TILDE(x,y) NFFT_MIN(x,y-1)
00048
00050 #define FIRST_L(x,y) (LRINT(floor((x)/(double)y)))
00051
00053 #define LAST_L(x,y) (LRINT(ceil(((x)+1)/(double)y))-1)
00054
00055 #define N_TILDE(y) (y-1)
00056
00057 #define IS_SYMMETRIC(x,y,z) (x >= ((y-1.0)/z))
00058
00059 #define FPT_BREAK_EVEN 4
00060
00064 typedef struct fpt_step_
00065 {
00066 bool stable;
00069 int Ns;
00070 int ts;
00071 double **a11,**a12,**a21,**a22;
00072 double *g;
00073 } fpt_step;
00074
00078 typedef struct fpt_data_
00079 {
00080 fpt_step **steps;
00081 int k_start;
00082 double *alphaN;
00083 double *betaN;
00084 double *gammaN;
00085 double alpha_0;
00086 double beta_0;
00087 double gamma_m1;
00088
00089 double *_alpha;
00090 double *_beta;
00091 double *_gamma;
00092 } fpt_data;
00093
00097 typedef struct fpt_set_s_
00098 {
00099 int flags;
00100 int M;
00101 int N;
00103 int t;
00104 fpt_data *dpt;
00105 double **xcvecs;
00108 double *xc;
00109 double _Complex *temp;
00110 double _Complex *work;
00111 double _Complex *result;
00112 double _Complex *vec3;
00113 double _Complex *vec4;
00114 double _Complex *z;
00115 fftw_plan *plans_dct3;
00117 fftw_plan *plans_dct2;
00119 fftw_r2r_kind *kinds;
00121 fftw_r2r_kind *kindsr;
00124 int *lengths;
00126
00127 double *xc_slow;
00128 } fpt_set_s;
00129
00130 static inline void abuvxpwy(double a, double b, double _Complex* u, double _Complex* x,
00131 double* v, double _Complex* y, double* w, int n)
00132 {
00133 int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00134 double *v_ptr = v, *w_ptr = w;
00135 for (l = 0; l < n; l++)
00136 *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
00137 }
00138
00139 #define ABUVXPWY_SYMMETRIC(NAME,S1,S2) \
00140 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
00141 double* v, double _Complex* y, double* w, int n) \
00142 { \
00143 const int n2 = n>>1; \
00144 int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
00145 double *v_ptr = v, *w_ptr = w; \
00146 for (l = 0; l < n2; l++) \
00147 *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++)); \
00148 v_ptr--; w_ptr--; \
00149 for (l = 0; l < n2; l++) \
00150 *u_ptr++ = a * (b * S1 * (*v_ptr--) * (*x_ptr++) + S2 * (*w_ptr--) * (*y_ptr++)); \
00151 }
00152
00153 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric1,1.0,-1.0)
00154 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric2,-1.0,1.0)
00155
00156 #define ABUVXPWY_SYMMETRIC_1(NAME,S1) \
00157 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
00158 double* v, double _Complex* y, int n, double *xx) \
00159 { \
00160 const int n2 = n>>1; \
00161 int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
00162 double *v_ptr = v, *xx_ptr = xx; \
00163 for (l = 0; l < n2; l++, v_ptr++) \
00164 *u_ptr++ = a * (b * (*v_ptr) * (*x_ptr++) + ((*v_ptr)*(1.0+*xx_ptr++)) * (*y_ptr++)); \
00165 v_ptr--; \
00166 for (l = 0; l < n2; l++, v_ptr--) \
00167 *u_ptr++ = a * (b * S1 * (*v_ptr) * (*x_ptr++) + (S1 * (*v_ptr) * (1.0+*xx_ptr++)) * (*y_ptr++)); \
00168 }
00169
00170 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_1,1.0)
00171 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_2,-1.0)
00172
00173 #define ABUVXPWY_SYMMETRIC_2(NAME,S1) \
00174 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
00175 double _Complex* y, double* w, int n, double *xx) \
00176 { \
00177 const int n2 = n>>1; \
00178 int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
00179 double *w_ptr = w, *xx_ptr = xx; \
00180 for (l = 0; l < n2; l++, w_ptr++) \
00181 *u_ptr++ = a * (b * (*w_ptr/(1.0+*xx_ptr++)) * (*x_ptr++) + (*w_ptr) * (*y_ptr++)); \
00182 w_ptr--; \
00183 for (l = 0; l < n2; l++, w_ptr--) \
00184 *u_ptr++ = a * (b * (S1 * (*w_ptr)/(1.0+*xx_ptr++) ) * (*x_ptr++) + S1 * (*w_ptr) * (*y_ptr++)); \
00185 }
00186
00187 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_1,1.0)
00188 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_2,-1.0)
00189
00190 static inline void auvxpwy(double a, double _Complex* u, double _Complex* x, double* v,
00191 double _Complex* y, double* w, int n)
00192 {
00193 int l;
00194 double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00195 double *v_ptr = v, *w_ptr = w;
00196 for (l = n; l > 0; l--)
00197 *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
00198 }
00199
00200 static inline void auvxpwy_symmetric(double a, double _Complex* u, double _Complex* x,
00201 double* v, double _Complex* y, double* w, int n)
00202 {
00203 const int n2 = n>>1; \
00204 int l;
00205 double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00206 double *v_ptr = v, *w_ptr = w;
00207 for (l = n2; l > 0; l--)
00208 *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
00209 v_ptr--; w_ptr--;
00210 for (l = n2; l > 0; l--)
00211 *u_ptr++ = a * ((*v_ptr--) * (*x_ptr++) - (*w_ptr--) * (*y_ptr++));
00212 }
00213
00214 static inline void auvxpwy_symmetric_1(double a, double _Complex* u, double _Complex* x,
00215 double* v, double _Complex* y, double* w, int n, double *xx)
00216 {
00217 const int n2 = n>>1; \
00218 int l;
00219 double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00220 double *v_ptr = v, *w_ptr = w, *xx_ptr = xx;
00221 for (l = n2; l > 0; l--, xx_ptr++)
00222 *u_ptr++ = a * (((*v_ptr++)*(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr++)*(1.0+*xx_ptr)) * (*y_ptr++));
00223 v_ptr--; w_ptr--;
00224 for (l = n2; l > 0; l--, xx_ptr++)
00225 *u_ptr++ = a * (-((*v_ptr--)*(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr--)*(1.0+*xx_ptr)) * (*y_ptr++));
00226 }
00227
00228 static inline void auvxpwy_symmetric_2(double a, double _Complex* u, double _Complex* x,
00229 double* v, double _Complex* y, double* w, int n, double *xx)
00230 {
00231 const int n2 = n>>1; \
00232 int l;
00233 double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00234 double *v_ptr = v, *w_ptr = w, *xx_ptr = xx;
00235 for (l = n2; l > 0; l--, xx_ptr++)
00236 *u_ptr++ = a * (((*v_ptr++)/(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr++)/(1.0+*xx_ptr)) * (*y_ptr++));
00237 v_ptr--; w_ptr--;
00238 for (l = n2; l > 0; l--, xx_ptr++)
00239 *u_ptr++ = a * (-((*v_ptr--)/(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr--)/(1.0+*xx_ptr)) * (*y_ptr++));
00240 }
00241
00242 #define FPT_DO_STEP(NAME,M1_FUNCTION,M2_FUNCTION) \
00243 static inline void NAME(double _Complex *a, double _Complex *b, double *a11, double *a12, \
00244 double *a21, double *a22, double g, int tau, fpt_set set) \
00245 { \
00246 \
00247 int length = 1<<(tau+1); \
00248 \
00249 double norm = 1.0/(length<<1); \
00250 \
00251 \
00252 a[0] *= 2.0; \
00253 b[0] *= 2.0; \
00254 \
00255 \
00256 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \
00257 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \
00258 \
00259 \
00260 \
00261 \
00262 \
00263 \
00264 \
00265 \
00266 if (g == 0.0) \
00267 { \
00268 \
00269 \
00270 M2_FUNCTION(norm,b,b,a22,a,a21,length); \
00271 } \
00272 else \
00273 { \
00274 \
00275 \
00276 M2_FUNCTION(norm,set->z,b,a22,a,a21,length); \
00277 M1_FUNCTION(norm*g,a,a,a11,b,a12,length); \
00278 memcpy(b,set->z,length*sizeof(double _Complex)); \
00279 \
00280 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \
00281 \
00282 a[0] *= 0.5; \
00283 } \
00284 \
00285 \
00286 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \
00287 \
00288 b[0] *= 0.5; \
00289 }
00290
00291 FPT_DO_STEP(fpt_do_step,auvxpwy,auvxpwy)
00292 FPT_DO_STEP(fpt_do_step_symmetric,auvxpwy_symmetric,auvxpwy_symmetric)
00293
00294
00295
00296 static inline void fpt_do_step_symmetric_u(double _Complex *a, double _Complex *b,
00297 double *a11, double *a12, double *a21, double *a22, double *x,
00298 double gam, int tau, fpt_set set)
00299 {
00301 int length = 1<<(tau+1);
00303 double norm = 1.0/(length<<1);
00304
00305 UNUSED(a21); UNUSED(a22);
00306
00307
00308 a[0] *= 2.0;
00309 b[0] *= 2.0;
00310
00311
00312 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
00313 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
00314
00315
00316
00317
00318
00319
00320
00321
00322 if (gam == 0.0)
00323 {
00324
00325
00326 auvxpwy_symmetric_1(norm,b,b,a12,a,a11,length,x);
00327 }
00328 else
00329 {
00330
00331
00332 auvxpwy_symmetric_1(norm,set->z,b,a12,a,a11,length,x);
00333 auvxpwy_symmetric(norm*gam,a,a,a11,b,a12,length);
00334 memcpy(b,set->z,length*sizeof(double _Complex));
00335
00336 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
00337
00338 a[0] *= 0.5;
00339 }
00340
00341
00342 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
00343
00344 b[0] *= 0.5;
00345 }
00346
00347 static inline void fpt_do_step_symmetric_l(double _Complex *a, double _Complex *b,
00348 double *a11, double *a12, double *a21, double *a22, double *x, double gam, int tau, fpt_set set)
00349 {
00351 int length = 1<<(tau+1);
00353 double norm = 1.0/(length<<1);
00354
00355
00356 a[0] *= 2.0;
00357 b[0] *= 2.0;
00358
00359 UNUSED(a11); UNUSED(a12);
00360
00361
00362 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
00363 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
00364
00365
00366
00367
00368
00369
00370
00371
00372 if (gam == 0.0)
00373 {
00374
00375
00376 auvxpwy_symmetric(norm,b,b,a22,a,a21,length);
00377 }
00378 else
00379 {
00380
00381
00382 auvxpwy_symmetric(norm,set->z,b,a22,a,a21,length);
00383 auvxpwy_symmetric_2(norm*gam,a,a,a21,b,a22,length,x);
00384 memcpy(b,set->z,length*sizeof(double _Complex));
00385
00386 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
00387
00388 a[0] *= 0.5;
00389 }
00390
00391
00392 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
00393
00394 b[0] *= 0.5;
00395 }
00396
00397 #define FPT_DO_STEP_TRANSPOSED(NAME,M1_FUNCTION,M2_FUNCTION) \
00398 static inline void NAME(double _Complex *a, double _Complex *b, double *a11, \
00399 double *a12, double *a21, double *a22, double g, int tau, fpt_set set) \
00400 { \
00401 \
00402 int length = 1<<(tau+1); \
00403 \
00404 double norm = 1.0/(length<<1); \
00405 \
00406 \
00407 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \
00408 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \
00409 \
00410 \
00411 M1_FUNCTION(norm,g,set->z,a,a11,b,a21,length); \
00412 M2_FUNCTION(norm,g,b,a,a12,b,a22,length); \
00413 memcpy(a,set->z,length*sizeof(double _Complex)); \
00414 \
00415 \
00416 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \
00417 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \
00418 }
00419
00420 FPT_DO_STEP_TRANSPOSED(fpt_do_step_t,abuvxpwy,abuvxpwy)
00421 FPT_DO_STEP_TRANSPOSED(fpt_do_step_t_symmetric,abuvxpwy_symmetric1,abuvxpwy_symmetric2)
00422
00423
00424
00425
00426 static inline void fpt_do_step_t_symmetric_u(double _Complex *a,
00427 double _Complex *b, double *a11, double *a12, double *x,
00428 double gam, int tau, fpt_set set)
00429 {
00431 int length = 1<<(tau+1);
00433 double norm = 1.0/(length<<1);
00434
00435
00436 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
00437 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
00438
00439
00440 abuvxpwy_symmetric1_1(norm,gam,set->z,a,a11,b,length,x);
00441 abuvxpwy_symmetric1_2(norm,gam,b,a,a12,b,length,x);
00442 memcpy(a,set->z,length*sizeof(double _Complex));
00443
00444
00445 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
00446 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
00447 }
00448
00449 static inline void fpt_do_step_t_symmetric_l(double _Complex *a,
00450 double _Complex *b, double *a21, double *a22,
00451 double *x, double gam, int tau, fpt_set set)
00452 {
00454 int length = 1<<(tau+1);
00456 double norm = 1.0/(length<<1);
00457
00458
00459 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
00460 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
00461
00462
00463 abuvxpwy_symmetric2_2(norm,gam,set->z,a,b,a21,length,x);
00464 abuvxpwy_symmetric2_1(norm,gam,b,a,b,a22,length,x);
00465 memcpy(a,set->z,length*sizeof(double _Complex));
00466
00467
00468 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
00469 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
00470 }
00471
00472
00473 static void eval_clenshaw(const double *x, double *y, int size, int k, const double *alpha,
00474 const double *beta, const double *gam)
00475 {
00476
00477
00478
00479 int i,j;
00480 double a,b,x_val_act,a_old;
00481 const double *x_act;
00482 double *y_act;
00483 const double *alpha_act, *beta_act, *gamma_act;
00484
00485
00486 x_act = x;
00487 y_act = y;
00488 for (i = 0; i < size; i++)
00489 {
00490 a = 1.0;
00491 b = 0.0;
00492 x_val_act = *x_act;
00493
00494 if (k == 0)
00495 {
00496 *y_act = 1.0;
00497 }
00498 else
00499 {
00500 alpha_act = &(alpha[k]);
00501 beta_act = &(beta[k]);
00502 gamma_act = &(gam[k]);
00503 for (j = k; j > 1; j--)
00504 {
00505 a_old = a;
00506 a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
00507 b = a_old*(*gamma_act);
00508 alpha_act--;
00509 beta_act--;
00510 gamma_act--;
00511 }
00512 *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
00513 }
00514 x_act++;
00515 y_act++;
00516 }
00517 }
00518
00519 static void eval_clenshaw2(const double *x, double *z, double *y, int size1, int size, int k, const double *alpha,
00520 const double *beta, const double *gam)
00521 {
00522
00523
00524
00525 int i,j;
00526 double a,b,x_val_act,a_old;
00527 const double *x_act;
00528 double *y_act, *z_act;
00529 const double *alpha_act, *beta_act, *gamma_act;
00530
00531
00532 x_act = x;
00533 y_act = y;
00534 z_act = z;
00535 for (i = 0; i < size; i++)
00536 {
00537 a = 1.0;
00538 b = 0.0;
00539 x_val_act = *x_act;
00540
00541 if (k == 0)
00542 {
00543 *y_act = 1.0;
00544 *z_act = 0.0;
00545 }
00546 else
00547 {
00548 alpha_act = &(alpha[k]);
00549 beta_act = &(beta[k]);
00550 gamma_act = &(gam[k]);
00551 for (j = k; j > 1; j--)
00552 {
00553 a_old = a;
00554 a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
00555 b = a_old*(*gamma_act);
00556 alpha_act--;
00557 beta_act--;
00558 gamma_act--;
00559 }
00560 if (i < size1)
00561 *z_act = a;
00562 *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
00563 }
00564
00565 x_act++;
00566 y_act++;
00567 z_act++;
00568 }
00569
00570
00571
00572
00573 }
00574
00575 static int eval_clenshaw_thresh2(const double *x, double *z, double *y, int size, int k,
00576 const double *alpha, const double *beta, const double *gam, const
00577 double threshold)
00578 {
00579
00580
00581
00582 int i,j;
00583 double a,b,x_val_act,a_old;
00584 const double *x_act;
00585 double *y_act, *z_act;
00586 const double *alpha_act, *beta_act, *gamma_act;
00587 R max = -nfft_float_property(NFFT_R_MAX);
00588 const R t = LOG10(FABS(threshold));
00589
00590
00591 x_act = x;
00592 y_act = y;
00593 z_act = z;
00594 for (i = 0; i < size; i++)
00595 {
00596 a = 1.0;
00597 b = 0.0;
00598 x_val_act = *x_act;
00599
00600 if (k == 0)
00601 {
00602 *y_act = 1.0;
00603 *z_act = 0.0;
00604 }
00605 else
00606 {
00607 alpha_act = &(alpha[k]);
00608 beta_act = &(beta[k]);
00609 gamma_act = &(gam[k]);
00610 for (j = k; j > 1; j--)
00611 {
00612 a_old = a;
00613 a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
00614 b = a_old*(*gamma_act);
00615 alpha_act--;
00616 beta_act--;
00617 gamma_act--;
00618 }
00619 *z_act = a;
00620 *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
00621 max = FMAX(max,LOG10(FABS(*y_act)));
00622 if (max > t)
00623 return 1;
00624 }
00625 x_act++;
00626 y_act++;
00627 z_act++;
00628 }
00629 return 0;
00630 }
00631
00632 static inline void eval_sum_clenshaw_fast(const int N, const int M,
00633 const double _Complex *a, const double *x, double _Complex *y,
00634 const double *alpha, const double *beta, const double *gam,
00635 const double lambda)
00636 {
00637 int j,k;
00638 double _Complex tmp1, tmp2, tmp3;
00639 double xc;
00640
00641
00642
00643
00644
00645
00646
00647
00648 if (N == 0)
00649 for (j = 0; j <= M; j++)
00650 y[j] = a[0];
00651 else
00652 {
00653 for (j = 0; j <= M; j++)
00654 {
00655 #if 0
00656 xc = x[j];
00657 tmp2 = a[N];
00658 tmp1 = a[N-1] + (alpha[N-1] * xc + beta[N-1])*tmp2;
00659 for (k = N-1; k > 0; k--)
00660 {
00661 tmp3 = a[k-1]
00662 + (alpha[k-1] * xc + beta[k-1]) * tmp1
00663 + gam[k] * tmp2;
00664 tmp2 = tmp1;
00665 tmp1 = tmp3;
00666 }
00667 y[j] = lambda * tmp1;
00668 #else
00669 xc = x[j];
00670 tmp1 = a[N-1];
00671 tmp2 = a[N];
00672 for (k = N-1; k > 0; k--)
00673 {
00674 tmp3 = a[k-1] + tmp2 * gam[k];
00675 tmp2 *= (alpha[k] * xc + beta[k]);
00676 tmp2 += tmp1;
00677 tmp1 = tmp3;
00678
00679
00680
00681
00682 }
00683 tmp2 *= (alpha[0] * xc + beta[0]);
00684
00685 y[j] = lambda * (tmp2 + tmp1);
00686
00687 #endif
00688 }
00689 }
00690
00691
00692
00693
00694
00695 }
00696
00715 static void eval_sum_clenshaw_transposed(int N, int M, double _Complex* a, double *x,
00716 double _Complex *y, double _Complex *temp, double *alpha, double *beta, double *gam,
00717 double lambda)
00718 {
00719 int j,k;
00720 double _Complex* it1 = temp;
00721 double _Complex* it2 = y;
00722 double _Complex aux;
00723
00724
00725 a[0] = 0.0;
00726 for (j = 0; j <= M; j++)
00727 {
00728 it2[j] = lambda * y[j];
00729 a[0] += it2[j];
00730 }
00731
00732 if (N > 0)
00733 {
00734
00735 a[1] = 0.0;
00736 for (j = 0; j <= M; j++)
00737 {
00738 it1[j] = it2[j];
00739 it2[j] = it2[j] * (alpha[0] * x[j] + beta[0]);
00740 a[1] += it2[j];
00741 }
00742
00743 for (k = 2; k <= N; k++)
00744 {
00745 a[k] = 0.0;
00746 for (j = 0; j <= M; j++)
00747 {
00748 aux = it1[j];
00749 it1[j] = it2[j];
00750 it2[j] = it2[j]*(alpha[k-1] * x[j] + beta[k-1]) + gam[k-1] * aux;
00751 a[k] += it2[j];
00752 }
00753 }
00754 }
00755 }
00756
00757
00758 fpt_set fpt_init(const int M, const int t, const unsigned int flags)
00759 {
00761 int plength;
00763 int tau;
00765 int m;
00766 int k;
00767 #ifdef _OPENMP
00768 int nthreads = nfft_get_omp_num_threads();
00769 #endif
00770
00771
00772 fpt_set_s *set = (fpt_set_s*)nfft_malloc(sizeof(fpt_set_s));
00773
00774
00775 set->flags = flags;
00776
00777 set->M = M;
00778 set->t = t;
00779 set->N = 1<<t;
00780
00781
00782 set->dpt = (fpt_data*) nfft_malloc(M*sizeof(fpt_data));
00783
00784
00785 for (m = 0; m < set->M; m++)
00786 set->dpt[m].steps = 0;
00787
00788
00789
00790
00791
00792
00793
00794
00795
00796 set->xcvecs = (double**) nfft_malloc((set->t)*sizeof(double*));
00797
00798
00799 plength = 4;
00800 for (tau = 1; tau < t+1; tau++)
00801 {
00802
00803 set->xcvecs[tau-1] = (double*) nfft_malloc(plength*sizeof(double));
00804 for (k = 0; k < plength; k++)
00805 {
00806 set->xcvecs[tau-1][k] = cos(((k+0.5)*PI)/plength);
00807 }
00808 plength = plength << 1;
00809 }
00810
00812 set->work = (double _Complex*) nfft_malloc((2*set->N)*sizeof(double _Complex));
00813 set->result = (double _Complex*) nfft_malloc((2*set->N)*sizeof(double _Complex));
00814
00815 set->lengths = (int*) nfft_malloc((set->t)*sizeof(int));
00816 set->plans_dct2 = (fftw_plan*) nfft_malloc(sizeof(fftw_plan)*(set->t));
00817 set->kindsr = (fftw_r2r_kind*) nfft_malloc(2*sizeof(fftw_r2r_kind));
00818 set->kindsr[0] = FFTW_REDFT10;
00819 set->kindsr[1] = FFTW_REDFT10;
00820 for (tau = 0, plength = 4; tau < set->t; tau++, plength<<=1)
00821 {
00822 set->lengths[tau] = plength;
00823 #ifdef _OPENMP
00824 #pragma omp critical (nfft_omp_critical_fftw_plan)
00825 {
00826 fftw_plan_with_nthreads(nthreads);
00827 #endif
00828 set->plans_dct2[tau] =
00829 fftw_plan_many_r2r(1, &set->lengths[tau], 2, (double*)set->work, NULL,
00830 2, 1, (double*)set->result, NULL, 2, 1,set->kindsr,
00831 0);
00832 #ifdef _OPENMP
00833 }
00834 #endif
00835 }
00836
00837
00838 if (!(set->flags & FPT_NO_FAST_ALGORITHM))
00839 {
00841 set->vec3 = (double _Complex*) nfft_malloc(set->N*sizeof(double _Complex));
00842 set->vec4 = (double _Complex*) nfft_malloc(set->N*sizeof(double _Complex));
00843 set->z = (double _Complex*) nfft_malloc(set->N*sizeof(double _Complex));
00844
00846 set->plans_dct3 = (fftw_plan*) nfft_malloc(sizeof(fftw_plan)*(set->t));
00847 set->kinds = (fftw_r2r_kind*) nfft_malloc(2*sizeof(fftw_r2r_kind));
00848 set->kinds[0] = FFTW_REDFT01;
00849 set->kinds[1] = FFTW_REDFT01;
00850 for (tau = 0, plength = 4; tau < set->t; tau++, plength<<=1)
00851 {
00852 set->lengths[tau] = plength;
00853 #ifdef _OPENMP
00854 #pragma omp critical (nfft_omp_critical_fftw_plan)
00855 {
00856 fftw_plan_with_nthreads(nthreads);
00857 #endif
00858 set->plans_dct3[tau] =
00859 fftw_plan_many_r2r(1, &set->lengths[tau], 2, (double*)set->work, NULL,
00860 2, 1, (double*)set->result, NULL, 2, 1, set->kinds,
00861 0);
00862 #ifdef _OPENMP
00863 }
00864 #endif
00865 }
00866 nfft_free(set->lengths);
00867 nfft_free(set->kinds);
00868 nfft_free(set->kindsr);
00869 set->lengths = NULL;
00870 set->kinds = NULL;
00871 set->kindsr = NULL;
00872 }
00873
00874 if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
00875 {
00876 set->xc_slow = (double*) nfft_malloc((set->N+1)*sizeof(double));
00877 set->temp = (double _Complex*) nfft_malloc((set->N+1)*sizeof(double _Complex));
00878 }
00879
00880
00881 return set;
00882 }
00883
00884 void fpt_precompute(fpt_set set, const int m, double *alpha, double *beta,
00885 double *gam, int k_start, const double threshold)
00886 {
00887
00888 int tau;
00889 int l;
00890 int plength;
00892 int degree;
00894 int firstl;
00895 int lastl;
00896 int plength_stab;
00898 int degree_stab;
00900 double *a11;
00902 double *a12;
00904 double *a21;
00906 double *a22;
00908 const double *calpha;
00909 const double *cbeta;
00910 const double *cgamma;
00911 int needstab = 0;
00912 int k_start_tilde;
00913 int N_tilde;
00914 int clength;
00915 int clength_1;
00916 int clength_2;
00917 int t_stab, N_stab;
00918 fpt_data *data;
00919
00920
00921 data = &(set->dpt[m]);
00922
00923
00924 if (data->steps != NULL)
00925 return;
00926
00927
00928 data->k_start = k_start;
00929
00930 data->gamma_m1 = gam[0];
00931
00932
00933 if (!(set->flags & FPT_NO_FAST_ALGORITHM))
00934 {
00935
00936 data->alphaN = (double*) nfft_malloc((set->t-1)*sizeof(double _Complex));
00937 data->betaN = (double*) nfft_malloc((set->t-1)*sizeof(double _Complex));
00938 data->gammaN = (double*) nfft_malloc((set->t-1)*sizeof(double _Complex));
00939
00940 for (tau = 2; tau <= set->t; tau++)
00941 {
00942
00943 data->alphaN[tau-2] = alpha[1<<tau];
00944 data->betaN[tau-2] = beta[1<<tau];
00945 data->gammaN[tau-2] = gam[1<<tau];
00946 }
00947
00948 data->alpha_0 = alpha[1];
00949 data->beta_0 = beta[1];
00950
00951 k_start_tilde = K_START_TILDE(data->k_start,X(next_power_of_2)(data->k_start)
00952 );
00953 N_tilde = N_TILDE(set->N);
00954
00955
00956 data->steps = (fpt_step**) nfft_malloc(sizeof(fpt_step*)*set->t);
00957
00958
00959 plength = 4;
00960 for (tau = 1; tau < set->t; tau++)
00961 {
00962
00963 degree = plength>>1;
00964
00965 firstl = FIRST_L(k_start_tilde,plength);
00966
00967 lastl = LAST_L(N_tilde,plength);
00968
00969
00970
00971 data->steps[tau] = (fpt_step*) nfft_malloc(sizeof(fpt_step)
00972 * (lastl+1));
00973
00974
00975 for (l = firstl; l <= lastl; l++)
00976 {
00977 if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
00978 {
00979
00980
00981 clength = plength/2;
00982 }
00983 else
00984 {
00985 clength = plength;
00986 }
00987
00988
00989 a11 = (double*) nfft_malloc(sizeof(double)*clength);
00990 a12 = (double*) nfft_malloc(sizeof(double)*clength);
00991 a21 = (double*) nfft_malloc(sizeof(double)*clength);
00992 a22 = (double*) nfft_malloc(sizeof(double)*clength);
00993
00994
00995
00996
00997
00998 calpha = &(alpha[plength*l+1+1]);
00999 cbeta = &(beta[plength*l+1+1]);
01000 cgamma = &(gam[plength*l+1+1]);
01001
01002 if (set->flags & FPT_NO_STABILIZATION)
01003 {
01004
01005 calpha--;
01006 cbeta--;
01007 cgamma--;
01008 eval_clenshaw2(set->xcvecs[tau-1], a11, a21, clength, clength, degree-1, calpha, cbeta,
01009 cgamma);
01010 eval_clenshaw2(set->xcvecs[tau-1], a12, a22, clength, clength, degree, calpha, cbeta,
01011 cgamma);
01012 needstab = 0;
01013 }
01014 else
01015 {
01016 calpha--;
01017 cbeta--;
01018 cgamma--;
01019
01020 needstab = eval_clenshaw_thresh2(set->xcvecs[tau-1], a11, a21, clength,
01021 degree-1, calpha, cbeta, cgamma, threshold);
01022 if (needstab == 0)
01023 {
01024
01025 needstab = eval_clenshaw_thresh2(set->xcvecs[tau-1], a12, a22, clength,
01026 degree, calpha, cbeta, cgamma, threshold);
01027 }
01028 }
01029
01030
01031
01032 if (needstab == 0)
01033 {
01034 data->steps[tau][l].a11 = (double**) nfft_malloc(sizeof(double*));
01035 data->steps[tau][l].a12 = (double**) nfft_malloc(sizeof(double*));
01036 data->steps[tau][l].a21 = (double**) nfft_malloc(sizeof(double*));
01037 data->steps[tau][l].a22 = (double**) nfft_malloc(sizeof(double*));
01038 data->steps[tau][l].g = (double*) nfft_malloc(sizeof(double));
01039
01040 data->steps[tau][l].a11[0] = a11;
01041 data->steps[tau][l].a12[0] = a12;
01042 data->steps[tau][l].a21[0] = a21;
01043 data->steps[tau][l].a22[0] = a22;
01044 data->steps[tau][l].g[0] = gam[plength*l+1+1];
01045 data->steps[tau][l].stable = true;
01046 }
01047 else
01048 {
01049
01050 degree_stab = degree*(2*l+1);
01051 X(next_power_of_2_exp)((l+1)*(1<<(tau+1)),&N_stab,&t_stab);
01052
01053
01054 nfft_free(a11);
01055 nfft_free(a12);
01056 nfft_free(a21);
01057 nfft_free(a22);
01058
01059 data->steps[tau][l].a11 = (double**) nfft_malloc(sizeof(double*));
01060 data->steps[tau][l].a12 = (double**)nfft_malloc(sizeof(double*));
01061 data->steps[tau][l].a21 = (double**) nfft_malloc(sizeof(double*));
01062 data->steps[tau][l].a22 = (double**) nfft_malloc(sizeof(double*));
01063 data->steps[tau][l].g = (double*) nfft_malloc(sizeof(double));
01064
01065 plength_stab = N_stab;
01066
01067 if (set->flags & FPT_AL_SYMMETRY)
01068 {
01069 if (m <= 1)
01070 {
01071
01072 clength_1 = plength_stab;
01073 clength_2 = plength_stab;
01074
01075 a11 = (double*) nfft_malloc(sizeof(double)*clength_1);
01076 a12 = (double*) nfft_malloc(sizeof(double)*clength_1);
01077 a21 = (double*) nfft_malloc(sizeof(double)*clength_2);
01078 a22 = (double*) nfft_malloc(sizeof(double)*clength_2);
01079
01080 calpha = &(alpha[1]); cbeta = &(beta[1]); cgamma = &(gam[1]);
01081 eval_clenshaw2(set->xcvecs[t_stab-2], a11, a21, clength_1,
01082 clength_2, degree_stab-1, calpha, cbeta, cgamma);
01083 eval_clenshaw2(set->xcvecs[t_stab-2], a12, a22, clength_1,
01084 clength_2, degree_stab+0, calpha, cbeta, cgamma);
01085 }
01086 else
01087 {
01088 clength = plength_stab/2;
01089 if (m%2 == 0)
01090 {
01091 a11 = (double*) nfft_malloc(sizeof(double)*clength);
01092 a12 = (double*) nfft_malloc(sizeof(double)*clength);
01093 a21 = 0;
01094 a22 = 0;
01095 calpha = &(alpha[2]); cbeta = &(beta[2]); cgamma = &(gam[2]);
01096 eval_clenshaw(set->xcvecs[t_stab-2], a11, clength,
01097 degree_stab-2, calpha, cbeta, cgamma);
01098 eval_clenshaw(set->xcvecs[t_stab-2], a12, clength,
01099 degree_stab-1, calpha, cbeta, cgamma);
01100 }
01101 else
01102 {
01103 a11 = 0;
01104 a12 = 0;
01105 a21 = (double*) nfft_malloc(sizeof(double)*clength);
01106 a22 = (double*) nfft_malloc(sizeof(double)*clength);
01107 calpha = &(alpha[1]); cbeta = &(beta[1]); cgamma = &(gam[1]);
01108 eval_clenshaw(set->xcvecs[t_stab-2], a21, clength,
01109 degree_stab-1,calpha, cbeta, cgamma);
01110 eval_clenshaw(set->xcvecs[t_stab-2], a22, clength,
01111 degree_stab+0, calpha, cbeta, cgamma);
01112 }
01113 }
01114 }
01115 else
01116 {
01117 clength_1 = plength_stab;
01118 clength_2 = plength_stab;
01119 a11 = (double*) nfft_malloc(sizeof(double)*clength_1);
01120 a12 = (double*) nfft_malloc(sizeof(double)*clength_1);
01121 a21 = (double*) nfft_malloc(sizeof(double)*clength_2);
01122 a22 = (double*) nfft_malloc(sizeof(double)*clength_2);
01123 calpha = &(alpha[2]);
01124 cbeta = &(beta[2]);
01125 cgamma = &(gam[2]);
01126 calpha--;
01127 cbeta--;
01128 cgamma--;
01129 eval_clenshaw2(set->xcvecs[t_stab-2], a11, a21, clength_1, clength_2, degree_stab-1,
01130 calpha, cbeta, cgamma);
01131 eval_clenshaw2(set->xcvecs[t_stab-2], a12, a22, clength_1, clength_2, degree_stab+0,
01132 calpha, cbeta, cgamma);
01133
01134 }
01135 data->steps[tau][l].a11[0] = a11;
01136 data->steps[tau][l].a12[0] = a12;
01137 data->steps[tau][l].a21[0] = a21;
01138 data->steps[tau][l].a22[0] = a22;
01139
01140 data->steps[tau][l].g[0] = gam[1+1];
01141 data->steps[tau][l].stable = false;
01142 data->steps[tau][l].ts = t_stab;
01143 data->steps[tau][l].Ns = N_stab;
01144 }
01145 }
01147 plength = plength << 1;
01148 }
01149 }
01150
01151 if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
01152 {
01153
01154 if (set->flags & FPT_PERSISTENT_DATA)
01155 {
01156 data->_alpha = (double*) alpha;
01157 data->_beta = (double*) beta;
01158 data->_gamma = (double*) gam;
01159 }
01160 else
01161 {
01162 data->_alpha = (double*) nfft_malloc((set->N+1)*sizeof(double));
01163 data->_beta = (double*) nfft_malloc((set->N+1)*sizeof(double));
01164 data->_gamma = (double*) nfft_malloc((set->N+1)*sizeof(double));
01165 memcpy(data->_alpha,alpha,(set->N+1)*sizeof(double));
01166 memcpy(data->_beta,beta,(set->N+1)*sizeof(double));
01167 memcpy(data->_gamma,gam,(set->N+1)*sizeof(double));
01168 }
01169 }
01170 }
01171
01172 void fpt_trafo_direct(fpt_set set, const int m, const double _Complex *x, double _Complex *y,
01173 const int k_end, const unsigned int flags)
01174 {
01175 int j;
01176 fpt_data *data = &(set->dpt[m]);
01177 int Nk;
01178 int tk;
01179 double norm;
01180
01181
01182
01183 X(next_power_of_2_exp)(k_end+1,&Nk,&tk);
01184 norm = 2.0/(Nk<<1);
01185
01186
01187
01188 if (set->flags & FPT_NO_DIRECT_ALGORITHM)
01189 {
01190 return;
01191 }
01192
01193 if (flags & FPT_FUNCTION_VALUES)
01194 {
01195
01196 for (j = 0; j <= k_end; j++)
01197 {
01198 set->xc_slow[j] = cos((PI*(j+0.5))/(k_end+1));
01199
01200 }
01201
01202 memset(set->result,0U,data->k_start*sizeof(double _Complex));
01203 memcpy(&set->result[data->k_start],x,(k_end-data->k_start+1)*sizeof(double _Complex));
01204
01205
01206
01207
01208 eval_sum_clenshaw_fast(k_end, k_end, set->result, set->xc_slow,
01209 y, &data->_alpha[1], &data->_beta[1], &data->_gamma[1], data->gamma_m1);
01210 }
01211 else
01212 {
01213 memset(set->temp,0U,data->k_start*sizeof(double _Complex));
01214 memcpy(&set->temp[data->k_start],x,(k_end-data->k_start+1)*sizeof(double _Complex));
01215
01216 eval_sum_clenshaw_fast(k_end, Nk-1, set->temp, set->xcvecs[tk-2],
01217 set->result, &data->_alpha[1], &data->_beta[1], &data->_gamma[1],
01218 data->gamma_m1);
01219
01220 fftw_execute_r2r(set->plans_dct2[tk-2],(double*)set->result,
01221 (double*)set->result);
01222
01223 set->result[0] *= 0.5;
01224 for (j = 0; j < Nk; j++)
01225 {
01226 set->result[j] *= norm;
01227 }
01228
01229 memcpy(y,set->result,(k_end+1)*sizeof(double _Complex));
01230 }
01231 }
01232
01233 void fpt_trafo(fpt_set set, const int m, const double _Complex *x, double _Complex *y,
01234 const int k_end, const unsigned int flags)
01235 {
01236
01237 fpt_data *data = &(set->dpt[m]);
01239 int Nk;
01241 int tk;
01243 int k_start_tilde;
01245 int k_end_tilde;
01246
01248 int tau;
01250 int firstl;
01252 int lastl;
01254 int l;
01256 int plength;
01258 int plength_stab;
01259 int t_stab;
01261 fpt_step *step;
01263 fftw_plan plan = 0;
01264 int length = k_end+1;
01265 fftw_r2r_kind kinds[2] = {FFTW_REDFT01,FFTW_REDFT01};
01266
01268 int k;
01269
01270 double _Complex *work_ptr;
01271 const double _Complex *x_ptr;
01272
01273
01274 if (k_end < FPT_BREAK_EVEN)
01275 {
01276
01277 fpt_trafo_direct(set, m, x, y, k_end, flags);
01278 return;
01279 }
01280
01281 X(next_power_of_2_exp)(k_end,&Nk,&tk);
01282 k_start_tilde = K_START_TILDE(data->k_start,Nk);
01283 k_end_tilde = K_END_TILDE(k_end,Nk);
01284
01285
01286 if (set->flags & FPT_NO_FAST_ALGORITHM)
01287 return;
01288
01289 if (flags & FPT_FUNCTION_VALUES)
01290 {
01291 #ifdef _OPENMP
01292 int nthreads = nfft_get_omp_num_threads();
01293 #pragma omp critical (nfft_omp_critical_fftw_plan)
01294 {
01295 fftw_plan_with_nthreads(nthreads);
01296 #endif
01297 plan = fftw_plan_many_r2r(1, &length, 2, (double*)set->work, NULL, 2, 1,
01298 (double*)set->work, NULL, 2, 1, kinds, 0U);
01299 #ifdef _OPENMP
01300 }
01301 #endif
01302 }
01303
01304
01305 memset(set->result,0U,2*Nk*sizeof(double _Complex));
01306
01307
01308
01309
01310 memset(set->work,0U,2*data->k_start*sizeof(double _Complex));
01311
01312 work_ptr = &set->work[2*data->k_start];
01313 x_ptr = x;
01314
01315 for (k = 0; k <= k_end_tilde-data->k_start; k++)
01316 {
01317 *work_ptr++ = *x_ptr++;
01318 *work_ptr++ = K(0.0);
01319 }
01320
01321
01322 memset(&set->work[2*(k_end_tilde+1)],0U,2*(Nk-1-k_end_tilde)*sizeof(double _Complex));
01323
01324
01325
01326 if (k_end == Nk)
01327 {
01328 set->work[2*(Nk-2)] += data->gammaN[tk-2]*x[Nk-data->k_start];
01329 set->work[2*(Nk-1)] += data->betaN[tk-2]*x[Nk-data->k_start];
01330 set->work[2*(Nk-1)+1] = data->alphaN[tk-2]*x[Nk-data->k_start];
01331 }
01332
01333
01334 plength = 4;
01335 for (tau = 1; tau < tk; tau++)
01336 {
01337
01338 firstl = FIRST_L(k_start_tilde,plength);
01339
01340 lastl = LAST_L(k_end_tilde,plength);
01341
01342
01343 for (l = firstl; l <= lastl; l++)
01344 {
01345
01346 memcpy(set->vec3,&(set->work[(plength/2)*(4*l+2)]),(plength/2)*sizeof(double _Complex));
01347 memcpy(set->vec4,&(set->work[(plength/2)*(4*l+3)]),(plength/2)*sizeof(double _Complex));
01348 memset(&set->vec3[plength/2],0U,(plength/2)*sizeof(double _Complex));
01349 memset(&set->vec4[plength/2],0U,(plength/2)*sizeof(double _Complex));
01350
01351
01352 memcpy(&(set->work[(plength/2)*(4*l+2)]),&(set->work[(plength/2)*(4*l+1)]),(plength/2)*sizeof(double _Complex));
01353 memset(&(set->work[(plength/2)*(4*l+1)]),0U,(plength/2)*sizeof(double _Complex));
01354 memset(&(set->work[(plength/2)*(4*l+3)]),0U,(plength/2)*sizeof(double _Complex));
01355
01356
01357 step = &(data->steps[tau][l]);
01358
01359
01360 if (step->stable)
01361 {
01362
01363 if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
01364 {
01365
01366
01367
01368
01369
01370
01371
01372 fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0],
01373 step->a12[0], step->a21[0], step->a22[0], step->g[0], tau, set);
01374 }
01375 else
01376 {
01377
01378 fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
01379 step->a21[0], step->a22[0], step->g[0], tau, set);
01380 }
01381
01382 if (step->g[0] != 0.0)
01383 {
01384 for (k = 0; k < plength; k++)
01385 {
01386 set->work[plength*2*l+k] += set->vec3[k];
01387 }
01388 }
01389 for (k = 0; k < plength; k++)
01390 {
01391 set->work[plength*(2*l+1)+k] += set->vec4[k];
01392 }
01393 }
01394 else
01395 {
01396
01397
01398
01399 plength_stab = step->Ns;
01400 t_stab = step->ts;
01401
01402
01403
01404
01405
01406
01407
01408
01409
01410
01411
01412 memset(&set->vec3[plength/2],0U,(plength_stab-plength/2)*sizeof(double _Complex));
01413 memset(&set->vec4[plength/2],0U,(plength_stab-plength/2)*sizeof(double _Complex));
01414
01415
01416
01417 if (set->flags & FPT_AL_SYMMETRY)
01418 {
01419 if (m <= 1)
01420 {
01421 fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
01422 step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
01423 }
01424 else if (m%2 == 0)
01425 {
01426
01427
01428 fpt_do_step_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
01429 step->a21[0], step->a22[0],
01430 set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
01431
01432
01433 }
01434 else
01435 {
01436
01437
01438 fpt_do_step_symmetric_l(set->vec3, set->vec4,
01439 step->a11[0], step->a12[0],
01440 step->a21[0],
01441 step->a22[0], set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
01442
01443
01444 }
01445 }
01446 else
01447 {
01448 fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
01449 step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
01450 }
01451
01452 if (step->g[0] != 0.0)
01453 {
01454 for (k = 0; k < plength_stab; k++)
01455 {
01456 set->result[k] += set->vec3[k];
01457 }
01458 }
01459
01460 for (k = 0; k < plength_stab; k++)
01461 {
01462 set->result[Nk+k] += set->vec4[k];
01463 }
01464 }
01465 }
01466
01467 plength = plength<<1;
01468
01469
01470
01471
01472
01473
01474
01475
01476
01477 }
01478
01479
01480
01481 for (k = 0; k < 2*Nk; k++)
01482 {
01483 set->result[k] += set->work[k];
01484 }
01485
01486
01487
01488 y[0] = data->gamma_m1*(set->result[0] + data->beta_0*set->result[Nk] +
01489 data->alpha_0*set->result[Nk+1]*0.5);
01490 y[1] = data->gamma_m1*(set->result[1] + data->beta_0*set->result[Nk+1]+
01491 data->alpha_0*(set->result[Nk]+set->result[Nk+2]*0.5));
01492 y[k_end-1] = data->gamma_m1*(set->result[k_end-1] +
01493 data->beta_0*set->result[Nk+k_end-1] +
01494 data->alpha_0*set->result[Nk+k_end-2]*0.5);
01495 y[k_end] = data->gamma_m1*(0.5*data->alpha_0*set->result[Nk+k_end-1]);
01496 for (k = 2; k <= k_end-2; k++)
01497 {
01498 y[k] = data->gamma_m1*(set->result[k] + data->beta_0*set->result[Nk+k] +
01499 data->alpha_0*0.5*(set->result[Nk+k-1]+set->result[Nk+k+1]));
01500 }
01501
01502 if (flags & FPT_FUNCTION_VALUES)
01503 {
01504 y[0] *= 2.0;
01505 fftw_execute_r2r(plan,(double*)y,(double*)y);
01506 #pragma omp critical (nfft_omp_critical_fftw_plan)
01507 fftw_destroy_plan(plan);
01508 for (k = 0; k <= k_end; k++)
01509 {
01510 y[k] *= 0.5;
01511 }
01512 }
01513 }
01514
01515 void fpt_transposed_direct(fpt_set set, const int m, double _Complex *x,
01516 double _Complex *y, const int k_end, const unsigned int flags)
01517 {
01518 int j;
01519 fpt_data *data = &(set->dpt[m]);
01520 int Nk;
01521 int tk;
01522 double norm;
01523
01524 X(next_power_of_2_exp)(k_end+1,&Nk,&tk);
01525 norm = 2.0/(Nk<<1);
01526
01527 if (set->flags & FPT_NO_DIRECT_ALGORITHM)
01528 {
01529 return;
01530 }
01531
01532 if (flags & FPT_FUNCTION_VALUES)
01533 {
01534 for (j = 0; j <= k_end; j++)
01535 {
01536 set->xc_slow[j] = cos((PI*(j+0.5))/(k_end+1));
01537 }
01538
01539 eval_sum_clenshaw_transposed(k_end, k_end, set->result, set->xc_slow,
01540 y, set->work, &data->_alpha[1], &data->_beta[1], &data->_gamma[1],
01541 data->gamma_m1);
01542
01543 memcpy(x,&set->result[data->k_start],(k_end-data->k_start+1)*
01544 sizeof(double _Complex));
01545 }
01546 else
01547 {
01548 memcpy(set->result,y,(k_end+1)*sizeof(double _Complex));
01549 memset(&set->result[k_end+1],0U,(Nk-k_end-1)*sizeof(double _Complex));
01550
01551 for (j = 0; j < Nk; j++)
01552 {
01553 set->result[j] *= norm;
01554 }
01555
01556 fftw_execute_r2r(set->plans_dct3[tk-2],(double*)set->result,
01557 (double*)set->result);
01558
01559 eval_sum_clenshaw_transposed(k_end, Nk-1, set->temp, set->xcvecs[tk-2],
01560 set->result, set->work, &data->_alpha[1], &data->_beta[1], &data->_gamma[1],
01561 data->gamma_m1);
01562
01563 memcpy(x,&set->temp[data->k_start],(k_end-data->k_start+1)*sizeof(double _Complex));
01564 }
01565 }
01566
01567 void fpt_transposed(fpt_set set, const int m, double _Complex *x,
01568 double _Complex *y, const int k_end, const unsigned int flags)
01569 {
01570
01571 fpt_data *data = &(set->dpt[m]);
01573 int Nk;
01575 int tk;
01577 int k_start_tilde;
01579 int k_end_tilde;
01580
01582 int tau;
01584 int firstl;
01586 int lastl;
01588 int l;
01590 int plength;
01592 int plength_stab;
01594 fpt_step *step;
01596 fftw_plan plan;
01597 int length = k_end+1;
01598 fftw_r2r_kind kinds[2] = {FFTW_REDFT10,FFTW_REDFT10};
01600 int k;
01601 int t_stab;
01602
01603
01604 if (k_end < FPT_BREAK_EVEN)
01605 {
01606
01607 fpt_transposed_direct(set, m, x, y, k_end, flags);
01608 return;
01609 }
01610
01611 X(next_power_of_2_exp)(k_end,&Nk,&tk);
01612 k_start_tilde = K_START_TILDE(data->k_start,Nk);
01613 k_end_tilde = K_END_TILDE(k_end,Nk);
01614
01615
01616 if (set->flags & FPT_NO_FAST_ALGORITHM)
01617 {
01618 return;
01619 }
01620
01621 if (flags & FPT_FUNCTION_VALUES)
01622 {
01623 #ifdef _OPENMP
01624 int nthreads = nfft_get_omp_num_threads();
01625 #pragma omp critical (nfft_omp_critical_fftw_plan)
01626 {
01627 fftw_plan_with_nthreads(nthreads);
01628 #endif
01629 plan = fftw_plan_many_r2r(1, &length, 2, (double*)set->work, NULL, 2, 1,
01630 (double*)set->work, NULL, 2, 1, kinds, 0U);
01631 #ifdef _OPENMP
01632 }
01633 #endif
01634 fftw_execute_r2r(plan,(double*)y,(double*)set->result);
01635 #pragma omp critical (nfft_omp_critical_fftw_plan)
01636 fftw_destroy_plan(plan);
01637 for (k = 0; k <= k_end; k++)
01638 {
01639 set->result[k] *= 0.5;
01640 }
01641 }
01642 else
01643 {
01644 memcpy(set->result,y,(k_end+1)*sizeof(double _Complex));
01645 }
01646
01647
01648 memset(set->work,0U,2*Nk*sizeof(double _Complex));
01649
01650
01651 for (k = 0; k <= k_end; k++)
01652 {
01653 set->work[k] = data->gamma_m1*set->result[k];
01654 }
01655
01656
01657 set->work[Nk] = data->gamma_m1*(data->beta_0*set->result[0] +
01658 data->alpha_0*set->result[1]);
01659 for (k = 1; k < k_end; k++)
01660 {
01661 set->work[Nk+k] = data->gamma_m1*(data->beta_0*set->result[k] +
01662 data->alpha_0*0.5*(set->result[k-1]+set->result[k+1]));
01663 }
01664 if (k_end<Nk)
01665 {
01666 memset(&set->work[k_end],0U,(Nk-k_end)*sizeof(double _Complex));
01667 }
01668
01670 memcpy(set->result,set->work,2*Nk*sizeof(double _Complex));
01671
01672
01673 plength = Nk;
01674 for (tau = tk-1; tau >= 1; tau--)
01675 {
01676
01677 firstl = FIRST_L(k_start_tilde,plength);
01678
01679 lastl = LAST_L(k_end_tilde,plength);
01680
01681
01682 for (l = firstl; l <= lastl; l++)
01683 {
01684
01685 memcpy(set->vec3,&(set->work[(plength/2)*(4*l+0)]),plength*sizeof(double _Complex));
01686 memcpy(set->vec4,&(set->work[(plength/2)*(4*l+2)]),plength*sizeof(double _Complex));
01687
01688 memcpy(&set->work[(plength/2)*(4*l+1)],&(set->work[(plength/2)*(4*l+2)]),
01689 (plength/2)*sizeof(double _Complex));
01690
01691
01692 step = &(data->steps[tau][l]);
01693
01694
01695 if (step->stable)
01696 {
01697 if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
01698 {
01699
01700 fpt_do_step_t_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
01701 step->a21[0], step->a22[0], step->g[0], tau, set);
01702 }
01703 else
01704 {
01705
01706 fpt_do_step_t(set->vec3, set->vec4, step->a11[0], step->a12[0],
01707 step->a21[0], step->a22[0], step->g[0], tau, set);
01708 }
01709 memcpy(&(set->vec3[plength/2]), set->vec4,(plength/2)*sizeof(double _Complex));
01710
01711 for (k = 0; k < plength; k++)
01712 {
01713 set->work[plength*(4*l+2)/2+k] = set->vec3[k];
01714 }
01715 }
01716 else
01717 {
01718
01719 plength_stab = step->Ns;
01720 t_stab = step->ts;
01721
01722 memcpy(set->vec3,set->result,plength_stab*sizeof(double _Complex));
01723 memcpy(set->vec4,&(set->result[Nk]),plength_stab*sizeof(double _Complex));
01724
01725
01726 if (set->flags & FPT_AL_SYMMETRY)
01727 {
01728 if (m <= 1)
01729 {
01730 fpt_do_step_t_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
01731 step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
01732 }
01733 else if (m%2 == 0)
01734 {
01735 fpt_do_step_t_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
01736 set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
01737 }
01738 else
01739 {
01740 fpt_do_step_t_symmetric_l(set->vec3, set->vec4,
01741 step->a21[0], step->a22[0], set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
01742 }
01743 }
01744 else
01745 {
01746 fpt_do_step_t(set->vec3, set->vec4, step->a11[0], step->a12[0],
01747 step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
01748 }
01749
01750 memcpy(&(set->vec3[plength/2]),set->vec4,(plength/2)*sizeof(double _Complex));
01751
01752 for (k = 0; k < plength; k++)
01753 {
01754 set->work[(plength/2)*(4*l+2)+k] = set->vec3[k];
01755 }
01756 }
01757 }
01758
01759 plength = plength>>1;
01760 }
01761
01762
01763 for (k = 0; k <= k_end_tilde-data->k_start; k++)
01764 {
01765 x[k] = set->work[2*(data->k_start+k)];
01766 }
01767 if (k_end == Nk)
01768 {
01769 x[Nk-data->k_start] =
01770 data->gammaN[tk-2]*set->work[2*(Nk-2)]
01771 + data->betaN[tk-2] *set->work[2*(Nk-1)]
01772 + data->alphaN[tk-2]*set->work[2*(Nk-1)+1];
01773 }
01774 }
01775
01776 void fpt_finalize(fpt_set set)
01777 {
01778 int tau;
01779 int l;
01780 int m;
01781 fpt_data *data;
01782 int k_start_tilde;
01783 int N_tilde;
01784 int firstl, lastl;
01785 int plength;
01786 const int M = set->M;
01787
01788
01789 for (m = 0; m < M; m++)
01790 {
01791
01792 data = &set->dpt[m];
01793 if (data->steps != (fpt_step**)NULL)
01794 {
01795 nfft_free(data->alphaN);
01796 nfft_free(data->betaN);
01797 nfft_free(data->gammaN);
01798 data->alphaN = NULL;
01799 data->betaN = NULL;
01800 data->gammaN = NULL;
01801
01802
01803 k_start_tilde = K_START_TILDE(data->k_start,X(next_power_of_2)(data->k_start)
01804 );
01805 N_tilde = N_TILDE(set->N);
01806 plength = 4;
01807 for (tau = 1; tau < set->t; tau++)
01808 {
01809
01810 firstl = FIRST_L(k_start_tilde,plength);
01811
01812 lastl = LAST_L(N_tilde,plength);
01813
01814
01815 for (l = firstl; l <= lastl; l++)
01816 {
01817
01818 nfft_free(data->steps[tau][l].a11[0]);
01819 nfft_free(data->steps[tau][l].a12[0]);
01820 nfft_free(data->steps[tau][l].a21[0]);
01821 nfft_free(data->steps[tau][l].a22[0]);
01822 data->steps[tau][l].a11[0] = NULL;
01823 data->steps[tau][l].a12[0] = NULL;
01824 data->steps[tau][l].a21[0] = NULL;
01825 data->steps[tau][l].a22[0] = NULL;
01826
01827 nfft_free(data->steps[tau][l].a11);
01828 nfft_free(data->steps[tau][l].a12);
01829 nfft_free(data->steps[tau][l].a21);
01830 nfft_free(data->steps[tau][l].a22);
01831 nfft_free(data->steps[tau][l].g);
01832 data->steps[tau][l].a11 = NULL;
01833 data->steps[tau][l].a12 = NULL;
01834 data->steps[tau][l].a21 = NULL;
01835 data->steps[tau][l].a22 = NULL;
01836 data->steps[tau][l].g = NULL;
01837 }
01838
01839 nfft_free(data->steps[tau]);
01840 data->steps[tau] = NULL;
01841
01842 plength = plength<<1;
01843 }
01844
01845 nfft_free(data->steps);
01846 data->steps = NULL;
01847 }
01848
01849 if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
01850 {
01851
01852
01853 if (!(set->flags & FPT_PERSISTENT_DATA))
01854 {
01855 nfft_free(data->_alpha);
01856 nfft_free(data->_beta);
01857 nfft_free(data->_gamma);
01858 }
01859 data->_alpha = NULL;
01860 data->_beta = NULL;
01861 data->_gamma = NULL;
01862 }
01863 }
01864
01865
01866 nfft_free(set->dpt);
01867 set->dpt = NULL;
01868
01869 for (tau = 1; tau < set->t+1; tau++)
01870 {
01871 nfft_free(set->xcvecs[tau-1]);
01872 set->xcvecs[tau-1] = NULL;
01873 }
01874 nfft_free(set->xcvecs);
01875 set->xcvecs = NULL;
01876
01877
01878 nfft_free(set->work);
01879 nfft_free(set->result);
01880
01881
01882 if (!(set->flags & FPT_NO_FAST_ALGORITHM))
01883 {
01884
01885 nfft_free(set->vec3);
01886 nfft_free(set->vec4);
01887 nfft_free(set->z);
01888 set->work = NULL;
01889 set->result = NULL;
01890 set->vec3 = NULL;
01891 set->vec4 = NULL;
01892 set->z = NULL;
01893
01894
01895 for(tau = 0; tau < set->t; tau++)
01896 {
01897 #pragma omp critical (nfft_omp_critical_fftw_plan)
01898 {
01899 fftw_destroy_plan(set->plans_dct3[tau]);
01900 fftw_destroy_plan(set->plans_dct2[tau]);
01901 }
01902 set->plans_dct3[tau] = NULL;
01903 set->plans_dct2[tau] = NULL;
01904 }
01905
01906 nfft_free(set->plans_dct3);
01907 nfft_free(set->plans_dct2);
01908 set->plans_dct3 = NULL;
01909 set->plans_dct2 = NULL;
01910 }
01911
01912 if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
01913 {
01914
01915 nfft_free(set->xc_slow);
01916 set->xc_slow = NULL;
01917 nfft_free(set->temp);
01918 set->temp = NULL;
01919 }
01920
01921
01922 nfft_free(set);
01923 }