00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00027 #include "config.h"
00028
00029 #include <stdlib.h>
00030 #include <string.h>
00031 #include <stdbool.h>
00032 #include <math.h>
00033 #include <complex.h>
00034
00035 #include "nfft3.h"
00036 #include "nfft3util.h"
00037 #include "infft.h"
00038
00039
00040
00042 #define K_START_TILDE(x,y) (NFFT_MAX(NFFT_MIN(x,y-2),0))
00043
00045 #define K_END_TILDE(x,y) NFFT_MIN(x,y-1)
00046
00048 #define FIRST_L(x,y) (LRINT(floor((x)/(double)y)))
00049
00051 #define LAST_L(x,y) (LRINT(ceil(((x)+1)/(double)y))-1)
00052
00053 #define N_TILDE(y) (y-1)
00054
00055 #define IS_SYMMETRIC(x,y,z) (x >= ((y-1.0)/z))
00056
00057 #define FPT_BREAK_EVEN 4
00058
00062 typedef struct fpt_step_
00063 {
00064 bool stable;
00067 int Ns;
00068 int ts;
00069 double **a11,**a12,**a21,**a22;
00070 double *g;
00071 } fpt_step;
00072
00076 typedef struct fpt_data_
00077 {
00078 fpt_step **steps;
00079 int k_start;
00080 double *alphaN;
00081 double *betaN;
00082 double *gammaN;
00083 double alpha_0;
00084 double beta_0;
00085 double gamma_m1;
00086
00087 double *alpha;
00088 double *beta;
00089 double *gamma;
00090 } fpt_data;
00091
00095 typedef struct fpt_set_s_
00096 {
00097 int flags;
00098 int M;
00099 int N;
00101 int t;
00102 fpt_data *dpt;
00103 double **xcvecs;
00106 double *xc;
00107 double _Complex *temp;
00108 double _Complex *work;
00109 double _Complex *result;
00110 double _Complex *vec3;
00111 double _Complex *vec4;
00112 double _Complex *z;
00113 fftw_plan *plans_dct3;
00115 fftw_plan *plans_dct2;
00117 fftw_r2r_kind *kinds;
00119 fftw_r2r_kind *kindsr;
00122 int *lengths;
00124
00125 double *xc_slow;
00126 } fpt_set_s;
00127
00128 static inline void abuvxpwy(double a, double b, double _Complex* u, double _Complex* x,
00129 double* v, double _Complex* y, double* w, int n)
00130 {
00131 int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00132 double *v_ptr = v, *w_ptr = w;
00133 for (l = 0; l < n; l++)
00134 *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
00135 }
00136
00137 #define ABUVXPWY_SYMMETRIC(NAME,S1,S2) \
00138 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
00139 double* v, double _Complex* y, double* w, int n) \
00140 { \
00141 const int n2 = n>>1; \
00142 int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
00143 double *v_ptr = v, *w_ptr = w; \
00144 for (l = 0; l < n2; l++) \
00145 *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++)); \
00146 v_ptr--; w_ptr--; \
00147 for (l = 0; l < n2; l++) \
00148 *u_ptr++ = a * (b * S1 * (*v_ptr--) * (*x_ptr++) + S2 * (*w_ptr--) * (*y_ptr++)); \
00149 }
00150
00151 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric1,1.0,-1.0)
00152 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric2,-1.0,1.0)
00153
00154 #define ABUVXPWY_SYMMETRIC_1(NAME,S1) \
00155 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
00156 double* v, double _Complex* y, int n, double *xx) \
00157 { \
00158 const int n2 = n>>1; \
00159 int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
00160 double *v_ptr = v, *xx_ptr = xx; \
00161 for (l = 0; l < n2; l++, v_ptr++) \
00162 *u_ptr++ = a * (b * (*v_ptr) * (*x_ptr++) + ((*v_ptr)*(1.0+*xx_ptr++)) * (*y_ptr++)); \
00163 v_ptr--; \
00164 for (l = 0; l < n2; l++, v_ptr--) \
00165 *u_ptr++ = a * (b * S1 * (*v_ptr) * (*x_ptr++) + (S1 * (*v_ptr) * (1.0+*xx_ptr++)) * (*y_ptr++)); \
00166 }
00167
00168 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_1,1.0)
00169 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_2,-1.0)
00170
00171 #define ABUVXPWY_SYMMETRIC_2(NAME,S1) \
00172 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
00173 double _Complex* y, double* w, int n, double *xx) \
00174 { \
00175 const int n2 = n>>1; \
00176 int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
00177 double *w_ptr = w, *xx_ptr = xx; \
00178 for (l = 0; l < n2; l++, w_ptr++) \
00179 *u_ptr++ = a * (b * (*w_ptr/(1.0+*xx_ptr++)) * (*x_ptr++) + (*w_ptr) * (*y_ptr++)); \
00180 w_ptr--; \
00181 for (l = 0; l < n2; l++, w_ptr--) \
00182 *u_ptr++ = a * (b * (S1 * (*w_ptr)/(1.0+*xx_ptr++) ) * (*x_ptr++) + S1 * (*w_ptr) * (*y_ptr++)); \
00183 }
00184
00185 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_1,1.0)
00186 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_2,-1.0)
00187
00188 static inline void auvxpwy(double a, double _Complex* u, double _Complex* x, double* v,
00189 double _Complex* y, double* w, int n)
00190 {
00191 int l;
00192 double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00193 double *v_ptr = v, *w_ptr = w;
00194 for (l = n; l > 0; l--)
00195 *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
00196 }
00197
00198 static inline void auvxpwy_symmetric(double a, double _Complex* u, double _Complex* x,
00199 double* v, double _Complex* y, double* w, int n)
00200 {
00201 const int n2 = n>>1; \
00202 int l;
00203 double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00204 double *v_ptr = v, *w_ptr = w;
00205 for (l = n2; l > 0; l--)
00206 *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
00207 v_ptr--; w_ptr--;
00208 for (l = n2; l > 0; l--)
00209 *u_ptr++ = a * ((*v_ptr--) * (*x_ptr++) - (*w_ptr--) * (*y_ptr++));
00210 }
00211
00212 static inline void auvxpwy_symmetric_1(double a, double _Complex* u, double _Complex* x,
00213 double* v, double _Complex* y, double* w, int n, double *xx)
00214 {
00215 const int n2 = n>>1; \
00216 int l;
00217 double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00218 double *v_ptr = v, *w_ptr = w, *xx_ptr = xx;
00219 for (l = n2; l > 0; l--, xx_ptr++)
00220 *u_ptr++ = a * (((*v_ptr++)*(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr++)*(1.0+*xx_ptr)) * (*y_ptr++));
00221 v_ptr--; w_ptr--;
00222 for (l = n2; l > 0; l--, xx_ptr++)
00223 *u_ptr++ = a * (-((*v_ptr--)*(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr--)*(1.0+*xx_ptr)) * (*y_ptr++));
00224 }
00225
00226 static inline void auvxpwy_symmetric_2(double a, double _Complex* u, double _Complex* x,
00227 double* v, double _Complex* y, double* w, int n, double *xx)
00228 {
00229 const int n2 = n>>1; \
00230 int l;
00231 double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00232 double *v_ptr = v, *w_ptr = w, *xx_ptr = xx;
00233 for (l = n2; l > 0; l--, xx_ptr++)
00234 *u_ptr++ = a * (((*v_ptr++)/(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr++)/(1.0+*xx_ptr)) * (*y_ptr++));
00235 v_ptr--; w_ptr--;
00236 for (l = n2; l > 0; l--, xx_ptr++)
00237 *u_ptr++ = a * (-((*v_ptr--)/(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr--)/(1.0+*xx_ptr)) * (*y_ptr++));
00238 }
00239
00240 #define FPT_DO_STEP(NAME,M1_FUNCTION,M2_FUNCTION) \
00241 static inline void NAME(double _Complex *a, double _Complex *b, double *a11, double *a12, \
00242 double *a21, double *a22, double g, int tau, fpt_set set) \
00243 { \
00244 \
00245 int length = 1<<(tau+1); \
00246 \
00247 double norm = 1.0/(length<<1); \
00248 \
00249 \
00250 a[0] *= 2.0; \
00251 b[0] *= 2.0; \
00252 \
00253 \
00254 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \
00255 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \
00256 \
00257 \
00258 \
00259 \
00260 \
00261 \
00262 \
00263 \
00264 if (g == 0.0) \
00265 { \
00266 \
00267 \
00268 M2_FUNCTION(norm,b,b,a22,a,a21,length); \
00269 } \
00270 else \
00271 { \
00272 \
00273 \
00274 M2_FUNCTION(norm,set->z,b,a22,a,a21,length); \
00275 M1_FUNCTION(norm*g,a,a,a11,b,a12,length); \
00276 memcpy(b,set->z,length*sizeof(double _Complex)); \
00277 \
00278 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \
00279 \
00280 a[0] *= 0.5; \
00281 } \
00282 \
00283 \
00284 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \
00285 \
00286 b[0] *= 0.5; \
00287 }
00288
00289 FPT_DO_STEP(fpt_do_step,auvxpwy,auvxpwy)
00290 FPT_DO_STEP(fpt_do_step_symmetric,auvxpwy_symmetric,auvxpwy_symmetric)
00291
00292
00293
00294 static inline void fpt_do_step_symmetric_u(double _Complex *a, double _Complex *b,
00295 double *a11, double *a12, double *a21, double *a22, double *x,
00296 double gamma, int tau, fpt_set set)
00297 {
00299 int length = 1<<(tau+1);
00301 double norm = 1.0/(length<<1);
00302
00303 UNUSED(a21); UNUSED(a22);
00304
00305
00306 a[0] *= 2.0;
00307 b[0] *= 2.0;
00308
00309
00310 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
00311 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
00312
00313
00314
00315
00316
00317
00318
00319
00320 if (gamma == 0.0)
00321 {
00322
00323
00324 auvxpwy_symmetric_1(norm,b,b,a12,a,a11,length,x);
00325 }
00326 else
00327 {
00328
00329
00330 auvxpwy_symmetric_1(norm,set->z,b,a12,a,a11,length,x);
00331 auvxpwy_symmetric(norm*gamma,a,a,a11,b,a12,length);
00332 memcpy(b,set->z,length*sizeof(double _Complex));
00333
00334 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
00335
00336 a[0] *= 0.5;
00337 }
00338
00339
00340 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
00341
00342 b[0] *= 0.5;
00343 }
00344
00345 static inline void fpt_do_step_symmetric_l(double _Complex *a, double _Complex *b,
00346 double *a11, double *a12, double *a21, double *a22, double *x, double gamma, int tau, fpt_set set)
00347 {
00349 int length = 1<<(tau+1);
00351 double norm = 1.0/(length<<1);
00352
00353
00354 a[0] *= 2.0;
00355 b[0] *= 2.0;
00356
00357 UNUSED(a11); UNUSED(a12);
00358
00359
00360 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
00361 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
00362
00363
00364
00365
00366
00367
00368
00369
00370 if (gamma == 0.0)
00371 {
00372
00373
00374 auvxpwy_symmetric(norm,b,b,a22,a,a21,length);
00375 }
00376 else
00377 {
00378
00379
00380 auvxpwy_symmetric(norm,set->z,b,a22,a,a21,length);
00381 auvxpwy_symmetric_2(norm*gamma,a,a,a21,b,a22,length,x);
00382 memcpy(b,set->z,length*sizeof(double _Complex));
00383
00384 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
00385
00386 a[0] *= 0.5;
00387 }
00388
00389
00390 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
00391
00392 b[0] *= 0.5;
00393 }
00394
00395 #define FPT_DO_STEP_TRANSPOSED(NAME,M1_FUNCTION,M2_FUNCTION) \
00396 static inline void NAME(double _Complex *a, double _Complex *b, double *a11, \
00397 double *a12, double *a21, double *a22, double g, int tau, fpt_set set) \
00398 { \
00399 \
00400 int length = 1<<(tau+1); \
00401 \
00402 double norm = 1.0/(length<<1); \
00403 \
00404 \
00405 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \
00406 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \
00407 \
00408 \
00409 M1_FUNCTION(norm,g,set->z,a,a11,b,a21,length); \
00410 M2_FUNCTION(norm,g,b,a,a12,b,a22,length); \
00411 memcpy(a,set->z,length*sizeof(double _Complex)); \
00412 \
00413 \
00414 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \
00415 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \
00416 }
00417
00418 FPT_DO_STEP_TRANSPOSED(fpt_do_step_t,abuvxpwy,abuvxpwy)
00419 FPT_DO_STEP_TRANSPOSED(fpt_do_step_t_symmetric,abuvxpwy_symmetric1,abuvxpwy_symmetric2)
00420
00421
00422
00423
00424 static inline void fpt_do_step_t_symmetric_u(double _Complex *a,
00425 double _Complex *b, double *a11, double *a12, double *x,
00426 double gamma, int tau, fpt_set set)
00427 {
00429 int length = 1<<(tau+1);
00431 double norm = 1.0/(length<<1);
00432
00433
00434 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
00435 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
00436
00437
00438 abuvxpwy_symmetric1_1(norm,gamma,set->z,a,a11,b,length,x);
00439 abuvxpwy_symmetric1_2(norm,gamma,b,a,a12,b,length,x);
00440 memcpy(a,set->z,length*sizeof(double _Complex));
00441
00442
00443 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
00444 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
00445 }
00446
00447 static inline void fpt_do_step_t_symmetric_l(double _Complex *a,
00448 double _Complex *b, double *a21, double *a22,
00449 double *x, double gamma, int tau, fpt_set set)
00450 {
00452 int length = 1<<(tau+1);
00454 double norm = 1.0/(length<<1);
00455
00456
00457 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
00458 fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
00459
00460
00461 abuvxpwy_symmetric2_2(norm,gamma,set->z,a,b,a21,length,x);
00462 abuvxpwy_symmetric2_1(norm,gamma,b,a,b,a22,length,x);
00463 memcpy(a,set->z,length*sizeof(double _Complex));
00464
00465
00466 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
00467 fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
00468 }
00469
00470
00471 static void eval_clenshaw(const double *x, double *y, int size, int k, const double *alpha,
00472 const double *beta, const double *gamma)
00473 {
00474
00475
00476
00477 int i,j;
00478 double a,b,x_val_act,a_old;
00479 const double *x_act;
00480 double *y_act;
00481 const double *alpha_act, *beta_act, *gamma_act;
00482
00483
00484 x_act = x;
00485 y_act = y;
00486 for (i = 0; i < size; i++)
00487 {
00488 a = 1.0;
00489 b = 0.0;
00490 x_val_act = *x_act;
00491
00492 if (k == 0)
00493 {
00494 *y_act = 1.0;
00495 }
00496 else
00497 {
00498 alpha_act = &(alpha[k]);
00499 beta_act = &(beta[k]);
00500 gamma_act = &(gamma[k]);
00501 for (j = k; j > 1; j--)
00502 {
00503 a_old = a;
00504 a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
00505 b = a_old*(*gamma_act);
00506 alpha_act--;
00507 beta_act--;
00508 gamma_act--;
00509 }
00510 *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
00511 }
00512 x_act++;
00513 y_act++;
00514 }
00515 }
00516
00517 static void eval_clenshaw2(const double *x, double *z, double *y, int size1, int size, int k, const double *alpha,
00518 const double *beta, const double *gamma)
00519 {
00520
00521
00522
00523 int i,j;
00524 double a,b,x_val_act,a_old;
00525 const double *x_act;
00526 double *y_act, *z_act;
00527 const double *alpha_act, *beta_act, *gamma_act;
00528
00529
00530 x_act = x;
00531 y_act = y;
00532 z_act = z;
00533 for (i = 0; i < size; i++)
00534 {
00535 a = 1.0;
00536 b = 0.0;
00537 x_val_act = *x_act;
00538
00539 if (k == 0)
00540 {
00541 *y_act = 1.0;
00542 *z_act = 0.0;
00543 }
00544 else
00545 {
00546 alpha_act = &(alpha[k]);
00547 beta_act = &(beta[k]);
00548 gamma_act = &(gamma[k]);
00549 for (j = k; j > 1; j--)
00550 {
00551 a_old = a;
00552 a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
00553 b = a_old*(*gamma_act);
00554 alpha_act--;
00555 beta_act--;
00556 gamma_act--;
00557 }
00558 if (i < size1)
00559 *z_act = a;
00560 *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
00561 }
00562
00563 x_act++;
00564 y_act++;
00565 z_act++;
00566 }
00567
00568
00569
00570
00571 }
00572
00573 static int eval_clenshaw_thresh2(const double *x, double *z, double *y, int size, int k,
00574 const double *alpha, const double *beta, const double *gamma, const
00575 double threshold)
00576 {
00577
00578
00579
00580 int i,j;
00581 double a,b,x_val_act,a_old;
00582 const double *x_act;
00583 double *y_act, *z_act;
00584 const double *alpha_act, *beta_act, *gamma_act;
00585 R max = -R_MAX;
00586 const R t = LOG10(FABS(threshold));
00587
00588
00589 x_act = x;
00590 y_act = y;
00591 z_act = z;
00592 for (i = 0; i < size; i++)
00593 {
00594 a = 1.0;
00595 b = 0.0;
00596 x_val_act = *x_act;
00597
00598 if (k == 0)
00599 {
00600 *y_act = 1.0;
00601 *z_act = 0.0;
00602 }
00603 else
00604 {
00605 alpha_act = &(alpha[k]);
00606 beta_act = &(beta[k]);
00607 gamma_act = &(gamma[k]);
00608 for (j = k; j > 1; j--)
00609 {
00610 a_old = a;
00611 a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
00612 b = a_old*(*gamma_act);
00613 alpha_act--;
00614 beta_act--;
00615 gamma_act--;
00616 }
00617 *z_act = a;
00618 *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
00619 max = FMAX(max,LOG10(FABS(*y_act)));
00620 if (max > t)
00621 return 1;
00622 }
00623 x_act++;
00624 y_act++;
00625 z_act++;
00626 }
00627 return 0;
00628 }
00629
00630 static inline void eval_sum_clenshaw_fast(const int N, const int M,
00631 const double _Complex *a, const double *x, double _Complex *y,
00632 const double *alpha, const double *beta, const double *gamma,
00633 const double lambda)
00634 {
00635 int j,k;
00636 double _Complex tmp1, tmp2, tmp3;
00637 double xc;
00638
00639 if (N == 0)
00640 for (j = 0; j <= M; j++)
00641 y[j] = a[0];
00642 else
00643 {
00644 for (j = 0; j <= M; j++)
00645 {
00646 #if 0
00647 xc = x[j];
00648 tmp2 = a[N];
00649 tmp1 = a[N-1] + (alpha[N-1] * xc + beta[N-1])*tmp2;
00650 for (k = N-1; k > 0; k--)
00651 {
00652 tmp3 = a[k-1]
00653 + (alpha[k-1] * xc + beta[k-1]) * tmp1
00654 + gamma[k] * tmp2;
00655 tmp2 = tmp1;
00656 tmp1 = tmp3;
00657 }
00658 y[j] = lambda * tmp1;
00659 #else
00660 xc = x[j];
00661 tmp1 = a[N-1];
00662 tmp2 = a[N];
00663 for (k = N-1; k > 0; k--)
00664 {
00665 tmp3 = a[k-1] + tmp2 * gamma[k];
00666 tmp2 *= (alpha[k] * xc + beta[k]);
00667 tmp2 += tmp1;
00668 tmp1 = tmp3;
00669 }
00670 tmp2 *= (alpha[0] * xc + beta[0]);
00671 y[j] = lambda * (tmp2 + tmp1);
00672 #endif
00673 }
00674 }
00675 }
00676
00695 static void eval_sum_clenshaw_transposed(int N, int M, double _Complex* a, double *x,
00696 double _Complex *y, double _Complex *temp, double *alpha, double *beta, double *gamma,
00697 double lambda)
00698 {
00699 int j,k;
00700 double _Complex* it1 = temp;
00701 double _Complex* it2 = y;
00702 double _Complex aux;
00703
00704
00705 a[0] = 0.0;
00706 for (j = 0; j <= M; j++)
00707 {
00708 it2[j] = lambda * y[j];
00709 a[0] += it2[j];
00710 }
00711
00712 if (N > 0)
00713 {
00714
00715 a[1] = 0.0;
00716 for (j = 0; j <= M; j++)
00717 {
00718 it1[j] = it2[j];
00719 it2[j] = it2[j] * (alpha[0] * x[j] + beta[0]);
00720 a[1] += it2[j];
00721 }
00722
00723 for (k = 2; k <= N; k++)
00724 {
00725 a[k] = 0.0;
00726 for (j = 0; j <= M; j++)
00727 {
00728 aux = it1[j];
00729 it1[j] = it2[j];
00730 it2[j] = it2[j]*(alpha[k-1] * x[j] + beta[k-1]) + gamma[k-1] * aux;
00731 a[k] += it2[j];
00732 }
00733 }
00734 }
00735 }
00736
00737
00738 fpt_set fpt_init(const int M, const int t, const unsigned int flags)
00739 {
00741 int plength;
00743 int tau;
00745 int m;
00746 int k;
00747
00748
00749 fpt_set_s *set = (fpt_set_s*)nfft_malloc(sizeof(fpt_set_s));
00750
00751
00752 set->flags = flags;
00753
00754 set->M = M;
00755 set->t = t;
00756 set->N = 1<<t;
00757
00758
00759 set->dpt = (fpt_data*) nfft_malloc(M*sizeof(fpt_data));
00760
00761
00762 for (m = 0; m < set->M; m++)
00763 set->dpt[m].steps = 0;
00764
00765
00766
00767
00768
00769
00770
00771
00772
00773 set->xcvecs = (double**) nfft_malloc((set->t)*sizeof(double*));
00774
00775
00776 plength = 4;
00777 for (tau = 1; tau < t+1; tau++)
00778 {
00779
00780 set->xcvecs[tau-1] = (double*) nfft_malloc(plength*sizeof(double));
00781 for (k = 0; k < plength; k++)
00782 {
00783 set->xcvecs[tau-1][k] = cos(((k+0.5)*PI)/plength);
00784 }
00785 plength = plength << 1;
00786 }
00787
00789 set->work = (double _Complex*) nfft_malloc((2*set->N)*sizeof(double _Complex));
00790 set->result = (double _Complex*) nfft_malloc((2*set->N)*sizeof(double _Complex));
00791
00792
00793 if (!(set->flags & FPT_NO_FAST_ALGORITHM))
00794 {
00796 set->vec3 = (double _Complex*) nfft_malloc(set->N*sizeof(double _Complex));
00797 set->vec4 = (double _Complex*) nfft_malloc(set->N*sizeof(double _Complex));
00798 set->z = (double _Complex*) nfft_malloc(set->N*sizeof(double _Complex));
00799
00801 set->plans_dct3 = (fftw_plan*) nfft_malloc(sizeof(fftw_plan)*(set->t));
00802 set->plans_dct2 = (fftw_plan*) nfft_malloc(sizeof(fftw_plan)*(set->t));
00803 set->kinds = (fftw_r2r_kind*) nfft_malloc(2*sizeof(fftw_r2r_kind));
00804 set->kinds[0] = FFTW_REDFT01;
00805 set->kinds[1] = FFTW_REDFT01;
00806 set->kindsr = (fftw_r2r_kind*) nfft_malloc(2*sizeof(fftw_r2r_kind));
00807 set->kindsr[0] = FFTW_REDFT10;
00808 set->kindsr[1] = FFTW_REDFT10;
00809 set->lengths = (int*) nfft_malloc((set->t)*sizeof(int));
00810 for (tau = 0, plength = 4; tau < set->t; tau++, plength<<=1)
00811 {
00812 set->lengths[tau] = plength;
00813 set->plans_dct3[tau] =
00814 fftw_plan_many_r2r(1, &set->lengths[tau], 2, (double*)set->work, NULL,
00815 2, 1, (double*)set->result, NULL, 2, 1, set->kinds,
00816 0);
00817 set->plans_dct2[tau] =
00818 fftw_plan_many_r2r(1, &set->lengths[tau], 2, (double*)set->work, NULL,
00819 2, 1, (double*)set->result, NULL, 2, 1,set->kindsr,
00820 0);
00821 }
00822 nfft_free(set->lengths);
00823 nfft_free(set->kinds);
00824 nfft_free(set->kindsr);
00825 set->lengths = NULL;
00826 set->kinds = NULL;
00827 set->kindsr = NULL;
00828 }
00829
00830 if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
00831 {
00832 set->xc_slow = (double*) nfft_malloc((set->N+1)*sizeof(double));
00833 set->temp = (double _Complex*) nfft_malloc((set->N+1)*sizeof(double _Complex));
00834 }
00835
00836
00837 return set;
00838 }
00839
00840 void fpt_precompute(fpt_set set, const int m, double *alpha, double *beta,
00841 double *gam, int k_start, const double threshold)
00842 {
00843
00844 int tau;
00845 int l;
00846 int plength;
00848 int degree;
00850 int firstl;
00851 int lastl;
00852 int plength_stab;
00854 int degree_stab;
00856 double *a11;
00858 double *a12;
00860 double *a21;
00862 double *a22;
00864 const double *calpha;
00865 const double *cbeta;
00866 const double *cgamma;
00867 int needstab = 0;
00868 int k_start_tilde;
00869 int N_tilde;
00870 int clength;
00871 int clength_1;
00872 int clength_2;
00873 int t_stab, N_stab;
00874 fpt_data *data;
00875
00876
00877 data = &(set->dpt[m]);
00878
00879
00880 if (data->steps != NULL)
00881 return;
00882
00883
00884 data->k_start = k_start;
00885
00886
00887 if (!(set->flags & FPT_NO_FAST_ALGORITHM))
00888 {
00889
00890 data->alphaN = (double*) nfft_malloc((set->t-1)*sizeof(double _Complex));
00891 data->betaN = (double*) nfft_malloc((set->t-1)*sizeof(double _Complex));
00892 data->gammaN = (double*) nfft_malloc((set->t-1)*sizeof(double _Complex));
00893
00894 for (tau = 2; tau <= set->t; tau++)
00895 {
00896
00897 data->alphaN[tau-2] = alpha[1<<tau];
00898 data->betaN[tau-2] = beta[1<<tau];
00899 data->gammaN[tau-2] = gam[1<<tau];
00900 }
00901
00902 data->alpha_0 = alpha[1];
00903 data->beta_0 = beta[1];
00904 data->gamma_m1 = gam[0];
00905
00906 k_start_tilde = K_START_TILDE(data->k_start,nfft_next_power_of_2(data->k_start)
00907 );
00908 N_tilde = N_TILDE(set->N);
00909
00910
00911 data->steps = (fpt_step**) nfft_malloc(sizeof(fpt_step*)*set->t);
00912
00913
00914 plength = 4;
00915 for (tau = 1; tau < set->t; tau++)
00916 {
00917
00918 degree = plength>>1;
00919
00920 firstl = FIRST_L(k_start_tilde,plength);
00921
00922 lastl = LAST_L(N_tilde,plength);
00923
00924
00925
00926 data->steps[tau] = (fpt_step*) nfft_malloc(sizeof(fpt_step)
00927 * (lastl+1));
00928
00929
00930 for (l = firstl; l <= lastl; l++)
00931 {
00932 if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
00933 {
00934
00935
00936 clength = plength/2;
00937 }
00938 else
00939 {
00940 clength = plength;
00941 }
00942
00943
00944 a11 = (double*) nfft_malloc(sizeof(double)*clength);
00945 a12 = (double*) nfft_malloc(sizeof(double)*clength);
00946 a21 = (double*) nfft_malloc(sizeof(double)*clength);
00947 a22 = (double*) nfft_malloc(sizeof(double)*clength);
00948
00949
00950
00951
00952
00953 calpha = &(alpha[plength*l+1+1]);
00954 cbeta = &(beta[plength*l+1+1]);
00955 cgamma = &(gam[plength*l+1+1]);
00956
00957 if (set->flags & FPT_NO_STABILIZATION)
00958 {
00959
00960 calpha--;
00961 cbeta--;
00962 cgamma--;
00963 eval_clenshaw2(set->xcvecs[tau-1], a11, a21, clength, clength, degree-1, calpha, cbeta,
00964 cgamma);
00965 eval_clenshaw2(set->xcvecs[tau-1], a12, a22, clength, clength, degree, calpha, cbeta,
00966 cgamma);
00967 needstab = 0;
00968 }
00969 else
00970 {
00971 calpha--;
00972 cbeta--;
00973 cgamma--;
00974
00975 needstab = eval_clenshaw_thresh2(set->xcvecs[tau-1], a11, a21, clength,
00976 degree-1, calpha, cbeta, cgamma, threshold);
00977 if (needstab == 0)
00978 {
00979
00980 needstab = eval_clenshaw_thresh2(set->xcvecs[tau-1], a12, a22, clength,
00981 degree, calpha, cbeta, cgamma, threshold);
00982 }
00983 }
00984
00985
00986 if (needstab == 0)
00987 {
00988 data->steps[tau][l].a11 = (double**) nfft_malloc(sizeof(double*));
00989 data->steps[tau][l].a12 = (double**) nfft_malloc(sizeof(double*));
00990 data->steps[tau][l].a21 = (double**) nfft_malloc(sizeof(double*));
00991 data->steps[tau][l].a22 = (double**) nfft_malloc(sizeof(double*));
00992 data->steps[tau][l].g = (double*) nfft_malloc(sizeof(double));
00993
00994 data->steps[tau][l].a11[0] = a11;
00995 data->steps[tau][l].a12[0] = a12;
00996 data->steps[tau][l].a21[0] = a21;
00997 data->steps[tau][l].a22[0] = a22;
00998 data->steps[tau][l].g[0] = gam[plength*l+1+1];
00999 data->steps[tau][l].stable = true;
01000 }
01001 else
01002 {
01003
01004 degree_stab = degree*(2*l+1);
01005 nfft_next_power_of_2_exp((l+1)*(1<<(tau+1)),&N_stab,&t_stab);
01006
01007
01008 nfft_free(a11);
01009 nfft_free(a12);
01010 nfft_free(a21);
01011 nfft_free(a22);
01012
01013 data->steps[tau][l].a11 = (double**) nfft_malloc(sizeof(double*));
01014 data->steps[tau][l].a12 = (double**)nfft_malloc(sizeof(double*));
01015 data->steps[tau][l].a21 = (double**) nfft_malloc(sizeof(double*));
01016 data->steps[tau][l].a22 = (double**) nfft_malloc(sizeof(double*));
01017 data->steps[tau][l].g = (double*) nfft_malloc(sizeof(double));
01018
01019 plength_stab = N_stab;
01020
01021 if (set->flags & FPT_AL_SYMMETRY)
01022 {
01023 if (m <= 1)
01024 {
01025
01026 clength_1 = plength_stab;
01027 clength_2 = plength_stab;
01028
01029 a11 = (double*) nfft_malloc(sizeof(double)*clength_1);
01030 a12 = (double*) nfft_malloc(sizeof(double)*clength_1);
01031 a21 = (double*) nfft_malloc(sizeof(double)*clength_2);
01032 a22 = (double*) nfft_malloc(sizeof(double)*clength_2);
01033
01034 calpha = &(alpha[1]); cbeta = &(beta[1]); cgamma = &(gam[1]);
01035 eval_clenshaw2(set->xcvecs[t_stab-2], a11, a21, clength_1,
01036 clength_2, degree_stab-1, calpha, cbeta, cgamma);
01037 eval_clenshaw2(set->xcvecs[t_stab-2], a12, a22, clength_1,
01038 clength_2, degree_stab+0, calpha, cbeta, cgamma);
01039 }
01040 else
01041 {
01042 clength = plength_stab/2;
01043 if (m%2 == 0)
01044 {
01045 a11 = (double*) nfft_malloc(sizeof(double)*clength);
01046 a12 = (double*) nfft_malloc(sizeof(double)*clength);
01047 a21 = 0;
01048 a22 = 0;
01049 calpha = &(alpha[2]); cbeta = &(beta[2]); cgamma = &(gam[2]);
01050 eval_clenshaw(set->xcvecs[t_stab-2], a11, clength,
01051 degree_stab-2, calpha, cbeta, cgamma);
01052 eval_clenshaw(set->xcvecs[t_stab-2], a12, clength,
01053 degree_stab-1, calpha, cbeta, cgamma);
01054 }
01055 else
01056 {
01057 a11 = 0;
01058 a12 = 0;
01059 a21 = (double*) nfft_malloc(sizeof(double)*clength);
01060 a22 = (double*) nfft_malloc(sizeof(double)*clength);
01061 calpha = &(alpha[1]); cbeta = &(beta[1]); cgamma = &(gam[1]);
01062 eval_clenshaw(set->xcvecs[t_stab-2], a21, clength,
01063 degree_stab-1,calpha, cbeta, cgamma);
01064 eval_clenshaw(set->xcvecs[t_stab-2], a22, clength,
01065 degree_stab+0, calpha, cbeta, cgamma);
01066 }
01067 }
01068 }
01069 else
01070 {
01071 clength_1 = plength_stab;
01072 clength_2 = plength_stab;
01073 a11 = (double*) nfft_malloc(sizeof(double)*clength_1);
01074 a12 = (double*) nfft_malloc(sizeof(double)*clength_1);
01075 a21 = (double*) nfft_malloc(sizeof(double)*clength_2);
01076 a22 = (double*) nfft_malloc(sizeof(double)*clength_2);
01077 calpha = &(alpha[2]);
01078 cbeta = &(beta[2]);
01079 cgamma = &(gam[2]);
01080 calpha--;
01081 cbeta--;
01082 cgamma--;
01083 eval_clenshaw2(set->xcvecs[t_stab-2], a11, a21, clength_1, clength_2, degree_stab-1,
01084 calpha, cbeta, cgamma);
01085 eval_clenshaw2(set->xcvecs[t_stab-2], a12, a22, clength_1, clength_2, degree_stab+0,
01086 calpha, cbeta, cgamma);
01087
01088 }
01089 data->steps[tau][l].a11[0] = a11;
01090 data->steps[tau][l].a12[0] = a12;
01091 data->steps[tau][l].a21[0] = a21;
01092 data->steps[tau][l].a22[0] = a22;
01093
01094 data->steps[tau][l].g[0] = gam[1+1];
01095 data->steps[tau][l].stable = false;
01096 data->steps[tau][l].ts = t_stab;
01097 data->steps[tau][l].Ns = N_stab;
01098 }
01099 }
01101 plength = plength << 1;
01102 }
01103 }
01104
01105 if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
01106 {
01107
01108 if (set->flags & FPT_PERSISTENT_DATA)
01109 {
01110 data->alpha = (double*) alpha;
01111 data->beta = (double*) beta;
01112 data->gamma = (double*) gam;
01113 }
01114 else
01115 {
01116 data->alpha = (double*) nfft_malloc((set->N+1)*sizeof(double));
01117 data->beta = (double*) nfft_malloc((set->N+1)*sizeof(double));
01118 data->gamma = (double*) nfft_malloc((set->N+1)*sizeof(double));
01119 memcpy(data->alpha,alpha,(set->N+1)*sizeof(double));
01120 memcpy(data->beta,beta,(set->N+1)*sizeof(double));
01121 memcpy(data->gamma,gam,(set->N+1)*sizeof(double));
01122 }
01123 }
01124 }
01125
01126 void dpt_trafo(fpt_set set, const int m, const double _Complex *x, double _Complex *y,
01127 const int k_end, const unsigned int flags)
01128 {
01129 int j;
01130 fpt_data *data = &(set->dpt[m]);
01131 int Nk;
01132 int tk;
01133 double norm;
01134
01135 nfft_next_power_of_2_exp(k_end+1,&Nk,&tk);
01136 norm = 2.0/(Nk<<1);
01137
01138 if (set->flags & FPT_NO_DIRECT_ALGORITHM)
01139 {
01140 return;
01141 }
01142
01143 if (flags & FPT_FUNCTION_VALUES)
01144 {
01145
01146 for (j = 0; j <= k_end; j++)
01147 {
01148 set->xc_slow[j] = cos((PI*(j+0.5))/(k_end+1));
01149 }
01150
01151 memset(set->result,0U,data->k_start*sizeof(double _Complex));
01152 memcpy(&set->result[data->k_start],x,(k_end-data->k_start+1)*sizeof(double _Complex));
01153
01154
01155
01156
01157 eval_sum_clenshaw_fast(k_end, k_end, set->result, set->xc_slow,
01158 y, &data->alpha[1], &data->beta[1], &data->gamma[1], data->gamma_m1);
01159 }
01160 else
01161 {
01162 memset(set->temp,0U,data->k_start*sizeof(double _Complex));
01163 memcpy(&set->temp[data->k_start],x,(k_end-data->k_start+1)*sizeof(double _Complex));
01164
01165 eval_sum_clenshaw_fast(k_end, Nk-1, set->temp, set->xcvecs[tk-2],
01166 set->result, &data->alpha[1], &data->beta[1], &data->gamma[1],
01167 data->gamma_m1);
01168
01169 fftw_execute_r2r(set->plans_dct2[tk-2],(double*)set->result,
01170 (double*)set->result);
01171
01172 set->result[0] *= 0.5;
01173 for (j = 0; j < Nk; j++)
01174 {
01175 set->result[j] *= norm;
01176 }
01177
01178 memcpy(y,set->result,(k_end+1)*sizeof(double _Complex));
01179 }
01180 }
01181
01182 void fpt_trafo(fpt_set set, const int m, const double _Complex *x, double _Complex *y,
01183 const int k_end, const unsigned int flags)
01184 {
01185
01186 fpt_data *data = &(set->dpt[m]);
01188 int Nk;
01190 int tk;
01192 int k_start_tilde;
01194 int k_end_tilde;
01195
01197 int tau;
01199 int firstl;
01201 int lastl;
01203 int l;
01205 int plength;
01207 int plength_stab;
01208 int t_stab;
01210 fpt_step *step;
01212 fftw_plan plan = 0;
01213 int length = k_end+1;
01214 fftw_r2r_kind kinds[2] = {FFTW_REDFT01,FFTW_REDFT01};
01215
01217 int k;
01218
01219 double _Complex *work_ptr;
01220 const double _Complex *x_ptr;
01221
01222
01223 if (k_end < FPT_BREAK_EVEN)
01224 {
01225
01226 dpt_trafo(set, m, x, y, k_end, flags);
01227 return;
01228 }
01229
01230 nfft_next_power_of_2_exp(k_end,&Nk,&tk);
01231 k_start_tilde = K_START_TILDE(data->k_start,Nk);
01232 k_end_tilde = K_END_TILDE(k_end,Nk);
01233
01234
01235 if (set->flags & FPT_NO_FAST_ALGORITHM)
01236 return;
01237
01238 if (flags & FPT_FUNCTION_VALUES)
01239 plan = fftw_plan_many_r2r(1, &length, 2, (double*)set->work, NULL, 2, 1,
01240 (double*)set->work, NULL, 2, 1, kinds, 0U);
01241
01242
01243 memset(set->result,0U,2*Nk*sizeof(double _Complex));
01244
01245
01246
01247
01248 memset(set->work,0U,2*data->k_start*sizeof(double _Complex));
01249
01250 work_ptr = &set->work[2*data->k_start];
01251 x_ptr = x;
01252
01253 for (k = 0; k <= k_end_tilde-data->k_start; k++)
01254 {
01255 *work_ptr++ = *x_ptr++;
01256 *work_ptr++ = K(0.0);
01257 }
01258
01259
01260 memset(&set->work[2*(k_end_tilde+1)],0U,2*(Nk-1-k_end_tilde)*sizeof(double _Complex));
01261
01262
01263
01264 if (k_end == Nk)
01265 {
01266 set->work[2*(Nk-2)] += data->gammaN[tk-2]*x[Nk-data->k_start];
01267 set->work[2*(Nk-1)] += data->betaN[tk-2]*x[Nk-data->k_start];
01268 set->work[2*(Nk-1)+1] = data->alphaN[tk-2]*x[Nk-data->k_start];
01269 }
01270
01271
01272 plength = 4;
01273 for (tau = 1; tau < tk; tau++)
01274 {
01275
01276 firstl = FIRST_L(k_start_tilde,plength);
01277
01278 lastl = LAST_L(k_end_tilde,plength);
01279
01280
01281 for (l = firstl; l <= lastl; l++)
01282 {
01283
01284 memcpy(set->vec3,&(set->work[(plength/2)*(4*l+2)]),(plength/2)*sizeof(double _Complex));
01285 memcpy(set->vec4,&(set->work[(plength/2)*(4*l+3)]),(plength/2)*sizeof(double _Complex));
01286 memset(&set->vec3[plength/2],0U,(plength/2)*sizeof(double _Complex));
01287 memset(&set->vec4[plength/2],0U,(plength/2)*sizeof(double _Complex));
01288
01289
01290 memcpy(&(set->work[(plength/2)*(4*l+2)]),&(set->work[(plength/2)*(4*l+1)]),(plength/2)*sizeof(double _Complex));
01291 memset(&(set->work[(plength/2)*(4*l+1)]),0U,(plength/2)*sizeof(double _Complex));
01292 memset(&(set->work[(plength/2)*(4*l+3)]),0U,(plength/2)*sizeof(double _Complex));
01293
01294
01295 step = &(data->steps[tau][l]);
01296
01297
01298 if (step->stable)
01299 {
01300
01301 if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
01302 {
01303
01304
01305
01306
01307
01308
01309
01310 fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0],
01311 step->a12[0], step->a21[0], step->a22[0], step->g[0], tau, set);
01312 }
01313 else
01314 {
01315
01316 fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
01317 step->a21[0], step->a22[0], step->g[0], tau, set);
01318 }
01319
01320 if (step->g[0] != 0.0)
01321 {
01322 for (k = 0; k < plength; k++)
01323 {
01324 set->work[plength*2*l+k] += set->vec3[k];
01325 }
01326 }
01327 for (k = 0; k < plength; k++)
01328 {
01329 set->work[plength*(2*l+1)+k] += set->vec4[k];
01330 }
01331 }
01332 else
01333 {
01334
01335
01336
01337 plength_stab = step->Ns;
01338 t_stab = step->ts;
01339
01340
01341
01342
01343
01344
01345
01346
01347
01348
01349
01350 memset(&set->vec3[plength/2],0U,(plength_stab-plength/2)*sizeof(double _Complex));
01351 memset(&set->vec4[plength/2],0U,(plength_stab-plength/2)*sizeof(double _Complex));
01352
01353
01354
01355 if (set->flags & FPT_AL_SYMMETRY)
01356 {
01357 if (m <= 1)
01358 {
01359 fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
01360 step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
01361 }
01362 else if (m%2 == 0)
01363 {
01364
01365
01366 fpt_do_step_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
01367 step->a21[0], step->a22[0],
01368 set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
01369
01370
01371 }
01372 else
01373 {
01374
01375
01376 fpt_do_step_symmetric_l(set->vec3, set->vec4,
01377 step->a11[0], step->a12[0],
01378 step->a21[0],
01379 step->a22[0], set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
01380
01381
01382 }
01383 }
01384 else
01385 {
01386 fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
01387 step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
01388 }
01389
01390 if (step->g[0] != 0.0)
01391 {
01392 for (k = 0; k < plength_stab; k++)
01393 {
01394 set->result[k] += set->vec3[k];
01395 }
01396 }
01397
01398 for (k = 0; k < plength_stab; k++)
01399 {
01400 set->result[Nk+k] += set->vec4[k];
01401 }
01402 }
01403 }
01404
01405 plength = plength<<1;
01406
01407
01408
01409
01410
01411
01412
01413
01414
01415 }
01416
01417
01418
01419 for (k = 0; k < 2*Nk; k++)
01420 {
01421 set->result[k] += set->work[k];
01422 }
01423
01424
01425
01426 y[0] = data->gamma_m1*(set->result[0] + data->beta_0*set->result[Nk] +
01427 data->alpha_0*set->result[Nk+1]*0.5);
01428 y[1] = data->gamma_m1*(set->result[1] + data->beta_0*set->result[Nk+1]+
01429 data->alpha_0*(set->result[Nk]+set->result[Nk+2]*0.5));
01430 y[k_end-1] = data->gamma_m1*(set->result[k_end-1] +
01431 data->beta_0*set->result[Nk+k_end-1] +
01432 data->alpha_0*set->result[Nk+k_end-2]*0.5);
01433 y[k_end] = data->gamma_m1*(0.5*data->alpha_0*set->result[Nk+k_end-1]);
01434 for (k = 2; k <= k_end-2; k++)
01435 {
01436 y[k] = data->gamma_m1*(set->result[k] + data->beta_0*set->result[Nk+k] +
01437 data->alpha_0*0.5*(set->result[Nk+k-1]+set->result[Nk+k+1]));
01438 }
01439
01440 if (flags & FPT_FUNCTION_VALUES)
01441 {
01442 y[0] *= 2.0;
01443 fftw_execute_r2r(plan,(double*)y,(double*)y);
01444 fftw_destroy_plan(plan);
01445 for (k = 0; k <= k_end; k++)
01446 {
01447 y[k] *= 0.5;
01448 }
01449 }
01450 }
01451
01452 void dpt_transposed(fpt_set set, const int m, double _Complex *x,
01453 double _Complex *y, const int k_end, const unsigned int flags)
01454 {
01455 int j;
01456 fpt_data *data = &(set->dpt[m]);
01457 int Nk;
01458 int tk;
01459 double norm;
01460
01461 nfft_next_power_of_2_exp(k_end+1,&Nk,&tk);
01462 norm = 2.0/(Nk<<1);
01463
01464 if (set->flags & FPT_NO_DIRECT_ALGORITHM)
01465 {
01466 return;
01467 }
01468
01469 if (flags & FPT_FUNCTION_VALUES)
01470 {
01471 for (j = 0; j <= k_end; j++)
01472 {
01473 set->xc_slow[j] = cos((PI*(j+0.5))/(k_end+1));
01474 }
01475
01476 eval_sum_clenshaw_transposed(k_end, k_end, set->result, set->xc_slow,
01477 y, set->work, &data->alpha[1], &data->beta[1], &data->gamma[1],
01478 data->gamma_m1);
01479
01480 memcpy(x,&set->result[data->k_start],(k_end-data->k_start+1)*
01481 sizeof(double _Complex));
01482 }
01483 else
01484 {
01485 memcpy(set->result,y,(k_end+1)*sizeof(double _Complex));
01486 memset(&set->result[k_end+1],0U,(Nk-k_end-1)*sizeof(double _Complex));
01487
01488 for (j = 0; j < Nk; j++)
01489 {
01490 set->result[j] *= norm;
01491 }
01492
01493 fftw_execute_r2r(set->plans_dct3[tk-2],(double*)set->result,
01494 (double*)set->result);
01495
01496 eval_sum_clenshaw_transposed(k_end, Nk-1, set->temp, set->xcvecs[tk-2],
01497 set->result, set->work, &data->alpha[1], &data->beta[1], &data->gamma[1],
01498 data->gamma_m1);
01499
01500 memcpy(x,&set->temp[data->k_start],(k_end-data->k_start+1)*sizeof(double _Complex));
01501 }
01502 }
01503
01504 void fpt_transposed(fpt_set set, const int m, double _Complex *x,
01505 double _Complex *y, const int k_end, const unsigned int flags)
01506 {
01507
01508 fpt_data *data = &(set->dpt[m]);
01510 int Nk;
01512 int tk;
01514 int k_start_tilde;
01516 int k_end_tilde;
01517
01519 int tau;
01521 int firstl;
01523 int lastl;
01525 int l;
01527 int plength;
01529 int plength_stab;
01531 fpt_step *step;
01533 fftw_plan plan;
01534 int length = k_end+1;
01535 fftw_r2r_kind kinds[2] = {FFTW_REDFT10,FFTW_REDFT10};
01537 int k;
01538 int t_stab;
01539
01540
01541 if (k_end < FPT_BREAK_EVEN)
01542 {
01543
01544 dpt_transposed(set, m, x, y, k_end, flags);
01545 return;
01546 }
01547
01548 nfft_next_power_of_2_exp(k_end,&Nk,&tk);
01549 k_start_tilde = K_START_TILDE(data->k_start,Nk);
01550 k_end_tilde = K_END_TILDE(k_end,Nk);
01551
01552
01553 if (set->flags & FPT_NO_FAST_ALGORITHM)
01554 {
01555 return;
01556 }
01557
01558 if (flags & FPT_FUNCTION_VALUES)
01559 {
01560 plan = fftw_plan_many_r2r(1, &length, 2, (double*)set->work, NULL, 2, 1,
01561 (double*)set->work, NULL, 2, 1, kinds, 0U);
01562 fftw_execute_r2r(plan,(double*)y,(double*)set->result);
01563 fftw_destroy_plan(plan);
01564 for (k = 0; k <= k_end; k++)
01565 {
01566 set->result[k] *= 0.5;
01567 }
01568 }
01569 else
01570 {
01571 memcpy(set->result,y,(k_end+1)*sizeof(double _Complex));
01572 }
01573
01574
01575 memset(set->work,0U,2*Nk*sizeof(double _Complex));
01576
01577
01578 for (k = 0; k <= k_end; k++)
01579 {
01580 set->work[k] = data->gamma_m1*set->result[k];
01581 }
01582
01583
01584 set->work[Nk] = data->gamma_m1*(data->beta_0*set->result[0] +
01585 data->alpha_0*set->result[1]);
01586 for (k = 1; k < k_end; k++)
01587 {
01588 set->work[Nk+k] = data->gamma_m1*(data->beta_0*set->result[k] +
01589 data->alpha_0*0.5*(set->result[k-1]+set->result[k+1]));
01590 }
01591 if (k_end<Nk)
01592 {
01593 memset(&set->work[k_end],0U,(Nk-k_end)*sizeof(double _Complex));
01594 }
01595
01597 memcpy(set->result,set->work,2*Nk*sizeof(double _Complex));
01598
01599
01600 plength = Nk;
01601 for (tau = tk-1; tau >= 1; tau--)
01602 {
01603
01604 firstl = FIRST_L(k_start_tilde,plength);
01605
01606 lastl = LAST_L(k_end_tilde,plength);
01607
01608
01609 for (l = firstl; l <= lastl; l++)
01610 {
01611
01612 memcpy(set->vec3,&(set->work[(plength/2)*(4*l+0)]),plength*sizeof(double _Complex));
01613 memcpy(set->vec4,&(set->work[(plength/2)*(4*l+2)]),plength*sizeof(double _Complex));
01614
01615 memcpy(&set->work[(plength/2)*(4*l+1)],&(set->work[(plength/2)*(4*l+2)]),
01616 (plength/2)*sizeof(double _Complex));
01617
01618
01619 step = &(data->steps[tau][l]);
01620
01621
01622 if (step->stable)
01623 {
01624 if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
01625 {
01626
01627 fpt_do_step_t_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
01628 step->a21[0], step->a22[0], step->g[0], tau, set);
01629 }
01630 else
01631 {
01632
01633 fpt_do_step_t(set->vec3, set->vec4, step->a11[0], step->a12[0],
01634 step->a21[0], step->a22[0], step->g[0], tau, set);
01635 }
01636 memcpy(&(set->vec3[plength/2]), set->vec4,(plength/2)*sizeof(double _Complex));
01637
01638 for (k = 0; k < plength; k++)
01639 {
01640 set->work[plength*(4*l+2)/2+k] = set->vec3[k];
01641 }
01642 }
01643 else
01644 {
01645
01646 plength_stab = step->Ns;
01647 t_stab = step->ts;
01648
01649 memcpy(set->vec3,set->result,plength_stab*sizeof(double _Complex));
01650 memcpy(set->vec4,&(set->result[Nk]),plength_stab*sizeof(double _Complex));
01651
01652
01653 if (set->flags & FPT_AL_SYMMETRY)
01654 {
01655 if (m <= 1)
01656 {
01657 fpt_do_step_t_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
01658 step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
01659 }
01660 else if (m%2 == 0)
01661 {
01662 fpt_do_step_t_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
01663 set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
01664 }
01665 else
01666 {
01667 fpt_do_step_t_symmetric_l(set->vec3, set->vec4,
01668 step->a21[0], step->a22[0], set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
01669 }
01670 }
01671 else
01672 {
01673 fpt_do_step_t(set->vec3, set->vec4, step->a11[0], step->a12[0],
01674 step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
01675 }
01676
01677 memcpy(&(set->vec3[plength/2]),set->vec4,(plength/2)*sizeof(double _Complex));
01678
01679 for (k = 0; k < plength; k++)
01680 {
01681 set->work[(plength/2)*(4*l+2)+k] = set->vec3[k];
01682 }
01683 }
01684 }
01685
01686 plength = plength>>1;
01687 }
01688
01689
01690 for (k = 0; k <= k_end_tilde-data->k_start; k++)
01691 {
01692 x[k] = set->work[2*(data->k_start+k)];
01693 }
01694 if (k_end == Nk)
01695 {
01696 x[Nk-data->k_start] =
01697 data->gammaN[tk-2]*set->work[2*(Nk-2)]
01698 + data->betaN[tk-2] *set->work[2*(Nk-1)]
01699 + data->alphaN[tk-2]*set->work[2*(Nk-1)+1];
01700 }
01701 }
01702
01703 void fpt_finalize(fpt_set set)
01704 {
01705 int tau;
01706 int l;
01707 int m;
01708 fpt_data *data;
01709 int k_start_tilde;
01710 int N_tilde;
01711 int firstl, lastl;
01712 int plength;
01713 const int M = set->M;
01714
01715
01716 for (m = 0; m < M; m++)
01717 {
01718
01719 data = &set->dpt[m];
01720 if (data->steps != (fpt_step**)NULL)
01721 {
01722 nfft_free(data->alphaN);
01723 nfft_free(data->betaN);
01724 nfft_free(data->gammaN);
01725 data->alphaN = NULL;
01726 data->betaN = NULL;
01727 data->gammaN = NULL;
01728
01729
01730 k_start_tilde = K_START_TILDE(data->k_start,nfft_next_power_of_2(data->k_start)
01731 );
01732 N_tilde = N_TILDE(set->N);
01733 plength = 4;
01734 for (tau = 1; tau < set->t; tau++)
01735 {
01736
01737 firstl = FIRST_L(k_start_tilde,plength);
01738
01739 lastl = LAST_L(N_tilde,plength);
01740
01741
01742 for (l = firstl; l <= lastl; l++)
01743 {
01744
01745 nfft_free(data->steps[tau][l].a11[0]);
01746 nfft_free(data->steps[tau][l].a12[0]);
01747 nfft_free(data->steps[tau][l].a21[0]);
01748 nfft_free(data->steps[tau][l].a22[0]);
01749 data->steps[tau][l].a11[0] = NULL;
01750 data->steps[tau][l].a12[0] = NULL;
01751 data->steps[tau][l].a21[0] = NULL;
01752 data->steps[tau][l].a22[0] = NULL;
01753
01754 nfft_free(data->steps[tau][l].a11);
01755 nfft_free(data->steps[tau][l].a12);
01756 nfft_free(data->steps[tau][l].a21);
01757 nfft_free(data->steps[tau][l].a22);
01758 nfft_free(data->steps[tau][l].g);
01759 data->steps[tau][l].a11 = NULL;
01760 data->steps[tau][l].a12 = NULL;
01761 data->steps[tau][l].a21 = NULL;
01762 data->steps[tau][l].a22 = NULL;
01763 data->steps[tau][l].g = NULL;
01764 }
01765
01766 nfft_free(data->steps[tau]);
01767 data->steps[tau] = NULL;
01768
01769 plength = plength<<1;
01770 }
01771
01772 nfft_free(data->steps);
01773 data->steps = NULL;
01774 }
01775
01776 if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
01777 {
01778
01779
01780 if (!(set->flags & FPT_PERSISTENT_DATA))
01781 {
01782 nfft_free(data->alpha);
01783 nfft_free(data->beta);
01784 nfft_free(data->gamma);
01785 }
01786 data->alpha = NULL;
01787 data->beta = NULL;
01788 data->gamma = NULL;
01789 }
01790 }
01791
01792
01793 nfft_free(set->dpt);
01794 set->dpt = NULL;
01795
01796 for (tau = 1; tau < set->t+1; tau++)
01797 {
01798 nfft_free(set->xcvecs[tau-1]);
01799 set->xcvecs[tau-1] = NULL;
01800 }
01801 nfft_free(set->xcvecs);
01802 set->xcvecs = NULL;
01803
01804
01805 nfft_free(set->work);
01806 nfft_free(set->result);
01807
01808
01809 if (!(set->flags & FPT_NO_FAST_ALGORITHM))
01810 {
01811
01812 nfft_free(set->vec3);
01813 nfft_free(set->vec4);
01814 nfft_free(set->z);
01815 set->work = NULL;
01816 set->result = NULL;
01817 set->vec3 = NULL;
01818 set->vec4 = NULL;
01819 set->z = NULL;
01820
01821
01822 for(tau = 0; tau < set->t; tau++)
01823 {
01824 fftw_destroy_plan(set->plans_dct3[tau]);
01825 fftw_destroy_plan(set->plans_dct2[tau]);
01826 set->plans_dct3[tau] = NULL;
01827 set->plans_dct2[tau] = NULL;
01828 }
01829
01830 nfft_free(set->plans_dct3);
01831 nfft_free(set->plans_dct2);
01832 set->plans_dct3 = NULL;
01833 set->plans_dct2 = NULL;
01834 }
01835
01836 if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
01837 {
01838
01839 nfft_free(set->xc_slow);
01840 set->xc_slow = NULL;
01841 nfft_free(set->temp);
01842 set->temp = NULL;
01843 }
01844
01845
01846 nfft_free(set);
01847 }