NFFT Logo 3.2.2
fpt.c
Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 2002, 2012 Jens Keiner, Stefan Kunis, Daniel Potts
00003  *
00004  * This program is free software; you can redistribute it and/or modify it under
00005  * the terms of the GNU General Public License as published by the Free Software
00006  * Foundation; either version 2 of the License, or (at your option) any later
00007  * version.
00008  *
00009  * This program is distributed in the hope that it will be useful, but WITHOUT
00010  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00011  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
00012  * details.
00013  *
00014  * You should have received a copy of the GNU General Public License along with
00015  * this program; if not, write to the Free Software Foundation, Inc., 51
00016  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00017  */
00018 
00019 /* $Id: fpt.c 3896 2012-10-10 12:19:26Z tovo $ */
00020 
00027 #include "config.h"
00028 
00029 #include <stdlib.h>
00030 #include <string.h>
00031 #include <stdbool.h>
00032 #include <math.h>
00033 #ifdef HAVE_COMPLEX_H
00034 #include <complex.h>
00035 #endif
00036 
00037 #include "nfft3.h"
00038 #include "nfft3util.h"
00039 #include "infft.h"
00040 
00041 /* Macros for index calculation. */
00042 
00044 #define K_START_TILDE(x,y) (NFFT_MAX(NFFT_MIN(x,y-2),0))
00045 
00047 #define K_END_TILDE(x,y) NFFT_MIN(x,y-1)
00048 
00050 #define FIRST_L(x,y) (LRINT(floor((x)/(double)y)))
00051 
00053 #define LAST_L(x,y) (LRINT(ceil(((x)+1)/(double)y))-1)
00054 
00055 #define N_TILDE(y) (y-1)
00056 
00057 #define IS_SYMMETRIC(x,y,z) (x >= ((y-1.0)/z))
00058 
00059 #define FPT_BREAK_EVEN 4
00060 
00064 typedef struct fpt_step_
00065 {
00066   bool stable;                            
00069   int Ns;                                 
00070   int ts;                                 
00071   double **a11,**a12,**a21,**a22;         
00072   double *g;                              
00073 } fpt_step;
00074 
00078 typedef struct fpt_data_
00079 {
00080   fpt_step **steps;                       
00081   int k_start;                            
00082   double *alphaN;                         
00083   double *betaN;                          
00084   double *gammaN;                         
00085   double alpha_0;                         
00086   double beta_0;                          
00087   double gamma_m1;                        
00088   /* Data for direct transform. */        
00089   double *_alpha;                          
00090   double *_beta;                           
00091   double *_gamma;                          
00092 } fpt_data;
00093 
00097 typedef struct fpt_set_s_
00098 {
00099   int flags;                              
00100   int M;                                  
00101   int N;                                  
00103   int t;                                  
00104   fpt_data *dpt;                          
00105   double **xcvecs;                        
00108   double *xc;                             
00109   double _Complex *temp;                          
00110   double _Complex *work;                          
00111   double _Complex *result;                        
00112   double _Complex *vec3;
00113   double _Complex *vec4;
00114   double _Complex *z;
00115   fftw_plan *plans_dct3;                  
00117   fftw_plan *plans_dct2;                  
00119   fftw_r2r_kind *kinds;                   
00121   fftw_r2r_kind *kindsr;                  
00124   int *lengths; 
00126   /* Data for slow transforms. */
00127   double *xc_slow;
00128 } fpt_set_s;
00129 
00130 static inline void abuvxpwy(double a, double b, double _Complex* u, double _Complex* x,
00131   double* v, double _Complex* y, double* w, int n)
00132 {
00133   int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00134   double *v_ptr = v, *w_ptr = w;
00135   for (l = 0; l < n; l++)
00136     *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
00137 }
00138 
00139 #define ABUVXPWY_SYMMETRIC(NAME,S1,S2) \
00140 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
00141   double* v, double _Complex* y, double* w, int n) \
00142 { \
00143   const int n2 = n>>1; \
00144   int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
00145   double *v_ptr = v, *w_ptr = w; \
00146   for (l = 0; l < n2; l++) \
00147     *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++)); \
00148   v_ptr--; w_ptr--; \
00149   for (l = 0; l < n2; l++) \
00150     *u_ptr++ = a * (b * S1 * (*v_ptr--) * (*x_ptr++) + S2 * (*w_ptr--) * (*y_ptr++)); \
00151 }
00152 
00153 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric1,1.0,-1.0)
00154 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric2,-1.0,1.0)
00155 
00156 #define ABUVXPWY_SYMMETRIC_1(NAME,S1) \
00157 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
00158   double* v, double _Complex* y, int n, double *xx) \
00159 { \
00160   const int n2 = n>>1; \
00161   int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
00162   double *v_ptr = v, *xx_ptr = xx; \
00163   for (l = 0; l < n2; l++, v_ptr++) \
00164     *u_ptr++ = a * (b * (*v_ptr) * (*x_ptr++) + ((*v_ptr)*(1.0+*xx_ptr++)) * (*y_ptr++)); \
00165   v_ptr--; \
00166   for (l = 0; l < n2; l++, v_ptr--) \
00167     *u_ptr++ = a * (b * S1 * (*v_ptr) * (*x_ptr++) + (S1 * (*v_ptr) * (1.0+*xx_ptr++)) * (*y_ptr++)); \
00168 }
00169 
00170 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_1,1.0)
00171 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_2,-1.0)
00172 
00173 #define ABUVXPWY_SYMMETRIC_2(NAME,S1) \
00174 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
00175   double _Complex* y, double* w, int n, double *xx) \
00176 { \
00177   const int n2 = n>>1; \
00178   int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
00179   double *w_ptr = w, *xx_ptr = xx; \
00180   for (l = 0; l < n2; l++, w_ptr++) \
00181     *u_ptr++ = a * (b * (*w_ptr/(1.0+*xx_ptr++)) * (*x_ptr++) + (*w_ptr) * (*y_ptr++)); \
00182   w_ptr--; \
00183   for (l = 0; l < n2; l++, w_ptr--) \
00184     *u_ptr++ = a * (b * (S1 * (*w_ptr)/(1.0+*xx_ptr++) ) * (*x_ptr++) + S1 * (*w_ptr) * (*y_ptr++)); \
00185 }
00186 
00187 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_1,1.0)
00188 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_2,-1.0)
00189 
00190 static inline void auvxpwy(double a, double _Complex* u, double _Complex* x, double* v,
00191   double _Complex* y, double* w, int n)
00192 {
00193   int l;
00194   double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00195   double *v_ptr = v, *w_ptr = w;
00196   for (l = n; l > 0; l--)
00197     *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
00198 }
00199 
00200 static inline void auvxpwy_symmetric(double a, double _Complex* u, double _Complex* x,
00201   double* v, double _Complex* y, double* w, int n)
00202 {
00203   const int n2 = n>>1; \
00204   int l;
00205   double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00206   double *v_ptr = v, *w_ptr = w;
00207   for (l = n2; l > 0; l--)
00208     *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
00209   v_ptr--; w_ptr--;
00210   for (l = n2; l > 0; l--)
00211     *u_ptr++ = a * ((*v_ptr--) * (*x_ptr++) - (*w_ptr--) * (*y_ptr++));
00212 }
00213 
00214 static inline void auvxpwy_symmetric_1(double a, double _Complex* u, double _Complex* x,
00215   double* v, double _Complex* y, double* w, int n, double *xx)
00216 {
00217   const int n2 = n>>1; \
00218   int l;
00219   double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00220   double *v_ptr = v, *w_ptr = w, *xx_ptr = xx;
00221   for (l = n2; l > 0; l--, xx_ptr++)
00222     *u_ptr++ = a * (((*v_ptr++)*(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr++)*(1.0+*xx_ptr)) * (*y_ptr++));
00223   v_ptr--; w_ptr--;
00224   for (l = n2; l > 0; l--, xx_ptr++)
00225     *u_ptr++ = a * (-((*v_ptr--)*(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr--)*(1.0+*xx_ptr)) * (*y_ptr++));
00226 }
00227 
00228 static inline void auvxpwy_symmetric_2(double a, double _Complex* u, double _Complex* x,
00229   double* v, double _Complex* y, double* w, int n, double *xx)
00230 {
00231   const int n2 = n>>1; \
00232   int l;
00233   double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
00234   double *v_ptr = v, *w_ptr = w, *xx_ptr = xx;
00235   for (l = n2; l > 0; l--, xx_ptr++)
00236     *u_ptr++ = a * (((*v_ptr++)/(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr++)/(1.0+*xx_ptr)) * (*y_ptr++));
00237   v_ptr--; w_ptr--;
00238   for (l = n2; l > 0; l--, xx_ptr++)
00239     *u_ptr++ = a * (-((*v_ptr--)/(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr--)/(1.0+*xx_ptr)) * (*y_ptr++));
00240 }
00241 
00242 #define FPT_DO_STEP(NAME,M1_FUNCTION,M2_FUNCTION) \
00243 static inline void NAME(double _Complex  *a, double _Complex *b, double *a11, double *a12, \
00244   double *a21, double *a22, double g, int tau, fpt_set set) \
00245 { \
00246  \
00247   int length = 1<<(tau+1); \
00248  \
00249   double norm = 1.0/(length<<1); \
00250   \
00251   /* Compensate for factors introduced by a raw DCT-III. */ \
00252   a[0] *= 2.0; \
00253   b[0] *= 2.0; \
00254   \
00255   /* Compute function values from Chebyshev-coefficients using a DCT-III. */ \
00256   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \
00257   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \
00258   \
00259   /*for (k = 0; k < length; k++)*/ \
00260   /*{*/ \
00261     /*fprintf(stderr,"fpt_do_step: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n",*/ \
00262     /*  a11[k],a12[k],a21[k],a22[k]);*/ \
00263   /*}*/ \
00264   \
00265   /* Check, if gamma is zero. */ \
00266   if (g == 0.0) \
00267   { \
00268     /*fprintf(stderr,"gamma == 0!\n");*/ \
00269     /* Perform multiplication only for second row. */ \
00270     M2_FUNCTION(norm,b,b,a22,a,a21,length); \
00271   } \
00272   else \
00273   { \
00274     /*fprintf(stderr,"gamma != 0!\n");*/ \
00275     /* Perform multiplication for both rows. */ \
00276     M2_FUNCTION(norm,set->z,b,a22,a,a21,length); \
00277     M1_FUNCTION(norm*g,a,a,a11,b,a12,length); \
00278     memcpy(b,set->z,length*sizeof(double _Complex)); \
00279     /* Compute Chebyshev-coefficients using a DCT-II. */ \
00280     fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \
00281     /* Compensate for factors introduced by a raw DCT-II. */ \
00282     a[0] *= 0.5; \
00283   } \
00284   \
00285   /* Compute Chebyshev-coefficients using a DCT-II. */ \
00286   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \
00287   /* Compensate for factors introduced by a raw DCT-II. */ \
00288   b[0] *= 0.5; \
00289 }
00290 
00291 FPT_DO_STEP(fpt_do_step,auvxpwy,auvxpwy)
00292 FPT_DO_STEP(fpt_do_step_symmetric,auvxpwy_symmetric,auvxpwy_symmetric)
00293 /*FPT_DO_STEP(fpt_do_step_symmetric_u,auvxpwy_symmetric,auvxpwy)
00294 FPT_DO_STEP(fpt_do_step_symmetric_l,auvxpwy,auvxpwy_symmetric)*/
00295 
00296 static inline void fpt_do_step_symmetric_u(double _Complex *a, double _Complex *b,
00297   double *a11, double *a12, double *a21, double *a22, double *x,
00298   double gam, int tau, fpt_set set)
00299 {
00301   int length = 1<<(tau+1);
00303   double norm = 1.0/(length<<1);
00304 
00305   UNUSED(a21); UNUSED(a22);
00306 
00307   /* Compensate for factors introduced by a raw DCT-III. */
00308   a[0] *= 2.0;
00309   b[0] *= 2.0;
00310 
00311   /* Compute function values from Chebyshev-coefficients using a DCT-III. */
00312   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
00313   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
00314 
00315   /*for (k = 0; k < length; k++)*/
00316   /*{*/
00317     /*fprintf(stderr,"fpt_do_step: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n",*/
00318     /*  a11[k],a12[k],a21[k],a22[k]);*/
00319   /*}*/
00320 
00321   /* Check, if gamma is zero. */
00322   if (gam == 0.0)
00323   {
00324     /*fprintf(stderr,"gamma == 0!\n");*/
00325     /* Perform multiplication only for second row. */
00326     auvxpwy_symmetric_1(norm,b,b,a12,a,a11,length,x);
00327   }
00328   else
00329   {
00330     /*fprintf(stderr,"gamma != 0!\n");*/
00331     /* Perform multiplication for both rows. */
00332     auvxpwy_symmetric_1(norm,set->z,b,a12,a,a11,length,x);
00333     auvxpwy_symmetric(norm*gam,a,a,a11,b,a12,length);
00334     memcpy(b,set->z,length*sizeof(double _Complex));
00335     /* Compute Chebyshev-coefficients using a DCT-II. */
00336     fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
00337     /* Compensate for factors introduced by a raw DCT-II. */
00338     a[0] *= 0.5;
00339   }
00340 
00341   /* Compute Chebyshev-coefficients using a DCT-II. */
00342   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
00343   /* Compensate for factors introduced by a raw DCT-II. */
00344   b[0] *= 0.5;
00345 }
00346 
00347 static inline void fpt_do_step_symmetric_l(double _Complex  *a, double _Complex *b,
00348   double *a11, double *a12, double *a21, double *a22, double *x, double gam, int tau, fpt_set set)
00349 {
00351   int length = 1<<(tau+1);
00353   double norm = 1.0/(length<<1);
00354 
00355   /* Compensate for factors introduced by a raw DCT-III. */
00356   a[0] *= 2.0;
00357   b[0] *= 2.0;
00358 
00359   UNUSED(a11); UNUSED(a12);
00360 
00361   /* Compute function values from Chebyshev-coefficients using a DCT-III. */
00362   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
00363   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
00364 
00365   /*for (k = 0; k < length; k++)*/
00366   /*{*/
00367     /*fprintf(stderr,"fpt_do_step: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n",*/
00368     /*  a11[k],a12[k],a21[k],a22[k]);*/
00369   /*}*/
00370 
00371   /* Check, if gamma is zero. */
00372   if (gam == 0.0)
00373   {
00374     /*fprintf(stderr,"gamma == 0!\n");*/
00375     /* Perform multiplication only for second row. */
00376     auvxpwy_symmetric(norm,b,b,a22,a,a21,length);
00377   }
00378   else
00379   {
00380     /*fprintf(stderr,"gamma != 0!\n");*/
00381     /* Perform multiplication for both rows. */
00382     auvxpwy_symmetric(norm,set->z,b,a22,a,a21,length);
00383     auvxpwy_symmetric_2(norm*gam,a,a,a21,b,a22,length,x);
00384     memcpy(b,set->z,length*sizeof(double _Complex));
00385     /* Compute Chebyshev-coefficients using a DCT-II. */
00386     fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
00387     /* Compensate for factors introduced by a raw DCT-II. */
00388     a[0] *= 0.5;
00389   }
00390 
00391   /* Compute Chebyshev-coefficients using a DCT-II. */
00392   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
00393   /* Compensate for factors introduced by a raw DCT-II. */
00394   b[0] *= 0.5;
00395 }
00396 
00397 #define FPT_DO_STEP_TRANSPOSED(NAME,M1_FUNCTION,M2_FUNCTION) \
00398 static inline void NAME(double _Complex  *a, double _Complex *b, double *a11, \
00399   double *a12, double *a21, double *a22, double g, int tau, fpt_set set) \
00400 { \
00401  \
00402   int length = 1<<(tau+1); \
00403  \
00404   double norm = 1.0/(length<<1); \
00405   \
00406   /* Compute function values from Chebyshev-coefficients using a DCT-III. */ \
00407   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \
00408   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \
00409   \
00410   /* Perform matrix multiplication. */ \
00411   M1_FUNCTION(norm,g,set->z,a,a11,b,a21,length); \
00412   M2_FUNCTION(norm,g,b,a,a12,b,a22,length); \
00413   memcpy(a,set->z,length*sizeof(double _Complex)); \
00414   \
00415   /* Compute Chebyshev-coefficients using a DCT-II. */ \
00416   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \
00417   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \
00418 }
00419 
00420 FPT_DO_STEP_TRANSPOSED(fpt_do_step_t,abuvxpwy,abuvxpwy)
00421 FPT_DO_STEP_TRANSPOSED(fpt_do_step_t_symmetric,abuvxpwy_symmetric1,abuvxpwy_symmetric2)
00422 /*FPT_DO_STEP_TRANSPOSED(fpt_do_step_t_symmetric_u,abuvxpwy_symmetric1_1,abuvxpwy_symmetric1_2)*/
00423 /*FPT_DO_STEP_TRANSPOSED(fpt_do_step_t_symmetric_l,abuvxpwy_symmetric2_2,abuvxpwy_symmetric2_1)*/
00424 
00425 
00426 static inline void fpt_do_step_t_symmetric_u(double _Complex  *a,
00427   double _Complex *b, double *a11, double *a12, double *x,
00428   double gam, int tau, fpt_set set)
00429 {
00431   int length = 1<<(tau+1);
00433   double norm = 1.0/(length<<1);
00434 
00435   /* Compute function values from Chebyshev-coefficients using a DCT-III. */
00436   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
00437   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
00438 
00439   /* Perform matrix multiplication. */
00440   abuvxpwy_symmetric1_1(norm,gam,set->z,a,a11,b,length,x);
00441   abuvxpwy_symmetric1_2(norm,gam,b,a,a12,b,length,x);
00442   memcpy(a,set->z,length*sizeof(double _Complex));
00443 
00444   /* Compute Chebyshev-coefficients using a DCT-II. */
00445   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
00446   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
00447 }
00448 
00449 static inline void fpt_do_step_t_symmetric_l(double _Complex  *a,
00450   double _Complex *b, double *a21, double *a22,
00451   double *x, double gam, int tau, fpt_set set)
00452 {
00454   int length = 1<<(tau+1);
00456   double norm = 1.0/(length<<1);
00457 
00458   /* Compute function values from Chebyshev-coefficients using a DCT-III. */
00459   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
00460   fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
00461 
00462   /* Perform matrix multiplication. */
00463   abuvxpwy_symmetric2_2(norm,gam,set->z,a,b,a21,length,x);
00464   abuvxpwy_symmetric2_1(norm,gam,b,a,b,a22,length,x);
00465   memcpy(a,set->z,length*sizeof(double _Complex));
00466 
00467   /* Compute Chebyshev-coefficients using a DCT-II. */
00468   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
00469   fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
00470 }
00471 
00472 
00473 static void eval_clenshaw(const double *x, double *y, int size, int k, const double *alpha,
00474   const double *beta, const double *gam)
00475 {
00476   /* Evaluate the associated Legendre polynomial P_{k,nleg} (l,x) for the vector
00477    * of knots  x[0], ..., x[size-1] by the Clenshaw algorithm
00478    */
00479   int i,j;
00480   double a,b,x_val_act,a_old;
00481   const double *x_act;
00482   double *y_act;
00483   const double *alpha_act, *beta_act, *gamma_act;
00484 
00485   /* Traverse all nodes. */
00486   x_act = x;
00487   y_act = y;
00488   for (i = 0; i < size; i++)
00489   {
00490     a = 1.0;
00491     b = 0.0;
00492     x_val_act = *x_act;
00493 
00494     if (k == 0)
00495     {
00496       *y_act = 1.0;
00497     }
00498     else
00499     {
00500       alpha_act = &(alpha[k]);
00501       beta_act = &(beta[k]);
00502       gamma_act = &(gam[k]);
00503       for (j = k; j > 1; j--)
00504       {
00505         a_old = a;
00506         a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
00507          b = a_old*(*gamma_act);
00508         alpha_act--;
00509         beta_act--;
00510         gamma_act--;
00511       }
00512       *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
00513     }
00514     x_act++;
00515     y_act++;
00516   }
00517 }
00518 
00519 static void eval_clenshaw2(const double *x, double *z, double *y, int size1, int size, int k, const double *alpha,
00520   const double *beta, const double *gam)
00521 {
00522   /* Evaluate the associated Legendre polynomial P_{k,nleg} (l,x) for the vector
00523    * of knots  x[0], ..., x[size-1] by the Clenshaw algorithm
00524    */
00525   int i,j;
00526   double a,b,x_val_act,a_old;
00527   const double *x_act;
00528   double *y_act, *z_act;
00529   const double *alpha_act, *beta_act, *gamma_act;
00530 
00531   /* Traverse all nodes. */
00532   x_act = x;
00533   y_act = y;
00534   z_act = z;
00535   for (i = 0; i < size; i++)
00536   {
00537     a = 1.0;
00538     b = 0.0;
00539     x_val_act = *x_act;
00540 
00541     if (k == 0)
00542     {
00543       *y_act = 1.0;
00544       *z_act = 0.0;
00545     }
00546     else
00547     {
00548       alpha_act = &(alpha[k]);
00549       beta_act = &(beta[k]);
00550       gamma_act = &(gam[k]);
00551       for (j = k; j > 1; j--)
00552       {
00553         a_old = a;
00554         a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
00555         b = a_old*(*gamma_act);
00556         alpha_act--;
00557         beta_act--;
00558         gamma_act--;
00559       }
00560       if (i < size1)
00561         *z_act = a;
00562       *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
00563     }
00564 
00565     x_act++;
00566     y_act++;
00567     z_act++;
00568   }
00569   /*gamma_act++;
00570   FILE *f = fopen("/Users/keiner/Desktop/nfsft_debug.txt","a");
00571   fprintf(f,"size1: %10d, size: %10d\n",size1,size);
00572   fclose(f);*/
00573 }
00574 
00575 static int eval_clenshaw_thresh2(const double *x, double *z, double *y, int size, int k,
00576   const double *alpha, const double *beta, const double *gam, const
00577   double threshold)
00578 {
00579   /* Evaluate the associated Legendre polynomial P_{k,nleg} (l,x) for the vector
00580    * of knots  x[0], ..., x[size-1] by the Clenshaw algorithm
00581    */
00582   int i,j;
00583   double a,b,x_val_act,a_old;
00584   const double *x_act;
00585   double *y_act, *z_act;
00586   const double *alpha_act, *beta_act, *gamma_act;
00587   R max = -nfft_float_property(NFFT_R_MAX);
00588   const R t = LOG10(FABS(threshold));
00589 
00590   /* Traverse all nodes. */
00591   x_act = x;
00592   y_act = y;
00593   z_act = z;
00594   for (i = 0; i < size; i++)
00595   {
00596     a = 1.0;
00597     b = 0.0;
00598     x_val_act = *x_act;
00599 
00600     if (k == 0)
00601     {
00602      *y_act = 1.0;
00603      *z_act = 0.0;
00604     }
00605     else
00606     {
00607       alpha_act = &(alpha[k]);
00608       beta_act = &(beta[k]);
00609       gamma_act = &(gam[k]);
00610       for (j = k; j > 1; j--)
00611       {
00612         a_old = a;
00613         a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
00614          b = a_old*(*gamma_act);
00615         alpha_act--;
00616         beta_act--;
00617         gamma_act--;
00618       }
00619       *z_act = a;
00620       *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
00621       max = FMAX(max,LOG10(FABS(*y_act)));
00622       if (max > t)
00623         return 1;
00624     }
00625     x_act++;
00626     y_act++;
00627     z_act++;
00628   }
00629   return 0;
00630 }
00631 
00632 static inline void eval_sum_clenshaw_fast(const int N, const int M,
00633   const double _Complex *a, const double *x, double _Complex *y,
00634   const double *alpha, const double *beta, const double *gam,
00635   const double lambda)
00636 {
00637   int j,k;
00638   double _Complex tmp1, tmp2, tmp3;
00639   double xc;
00640   
00641   /*fprintf(stderr, "Executing eval_sum_clenshaw_fast.\n");  
00642   fprintf(stderr, "Before transform:\n");  
00643   for (j = 0; j < N; j++)
00644     fprintf(stderr, "a[%4d] = %e.\n", j, a[j]);  
00645   for (j = 0; j <= M; j++)
00646     fprintf(stderr, "x[%4d] = %e, y[%4d] = %e.\n", j, x[j], j, y[j]);*/
00647 
00648   if (N == 0)
00649     for (j = 0; j <= M; j++)
00650       y[j] = a[0];
00651   else
00652   {
00653     for (j = 0; j <= M; j++)
00654     {
00655 #if 0
00656       xc = x[j];
00657       tmp2 = a[N];
00658       tmp1 = a[N-1] + (alpha[N-1] * xc + beta[N-1])*tmp2;
00659       for (k = N-1; k > 0; k--)
00660       {
00661         tmp3 =   a[k-1]
00662                + (alpha[k-1] * xc + beta[k-1]) * tmp1
00663                + gam[k] * tmp2;
00664         tmp2 = tmp1;
00665         tmp1 = tmp3;
00666       }
00667       y[j] = lambda * tmp1;
00668 #else
00669       xc = x[j];
00670       tmp1 = a[N-1];
00671       tmp2 = a[N];
00672       for (k = N-1; k > 0; k--)
00673       {
00674         tmp3 = a[k-1] + tmp2 * gam[k];
00675         tmp2 *= (alpha[k] * xc + beta[k]);
00676         tmp2 += tmp1;
00677         tmp1 = tmp3;
00678         /*if (j == 1515) 
00679         {
00680           fprintf(stderr, "k = %d, tmp1 = %e, tmp2 = %e.\n", k, tmp1, tmp2);  
00681         }*/
00682       }
00683       tmp2 *= (alpha[0] * xc + beta[0]);
00684         //fprintf(stderr, "alpha[0] = %e, beta[0] = %e.\n", alpha[0], beta[0]);  
00685       y[j] = lambda * (tmp2 + tmp1);
00686         //fprintf(stderr, "lambda = %e.\n", lambda);  
00687 #endif
00688     }
00689   }
00690   /*fprintf(stderr, "Before transform:\n");  
00691   for (j = 0; j < N; j++)
00692     fprintf(stderr, "a[%4d] = %e.\n", j, a[j]);  
00693   for (j = 0; j <= M; j++)
00694     fprintf(stderr, "x[%4d] = %e, y[%4d] = %e.\n", j, x[j], j, y[j]);  */
00695 }
00696 
00715 static void eval_sum_clenshaw_transposed(int N, int M, double _Complex* a, double *x,
00716   double _Complex *y, double _Complex *temp, double *alpha, double *beta, double *gam,
00717   double lambda)
00718 {
00719   int j,k;
00720   double _Complex* it1 = temp;
00721   double _Complex* it2 = y;
00722   double _Complex aux;
00723 
00724   /* Compute final result by multiplying with the constant lambda */
00725   a[0] = 0.0;
00726   for (j = 0; j <= M; j++)
00727   {
00728     it2[j] = lambda * y[j];
00729     a[0] += it2[j];
00730   }
00731 
00732   if (N > 0)
00733   {
00734     /* Compute final step. */
00735     a[1] = 0.0;
00736     for (j = 0; j <= M; j++)
00737     {
00738       it1[j] = it2[j];
00739       it2[j] = it2[j] * (alpha[0] * x[j] + beta[0]);
00740       a[1] += it2[j];
00741     }
00742 
00743     for (k = 2; k <= N; k++)
00744     {
00745       a[k] = 0.0;
00746       for (j = 0; j <= M; j++)
00747       {
00748         aux = it1[j];
00749         it1[j] = it2[j];
00750         it2[j] = it2[j]*(alpha[k-1] * x[j] + beta[k-1]) + gam[k-1] * aux;
00751         a[k] += it2[j];
00752       }
00753     }
00754   }
00755 }
00756 
00757 
00758 fpt_set fpt_init(const int M, const int t, const unsigned int flags)
00759 {
00761   int plength;
00763   int tau;
00765   int m;
00766   int k;
00767 #ifdef _OPENMP
00768   int nthreads = nfft_get_omp_num_threads();
00769 #endif
00770 
00771   /* Allocate memory for new DPT set. */
00772   fpt_set_s *set = (fpt_set_s*)nfft_malloc(sizeof(fpt_set_s));
00773 
00774   /* Save parameters in structure. */
00775   set->flags = flags;
00776 
00777   set->M = M;
00778   set->t = t;
00779   set->N = 1<<t;
00780 
00781   /* Allocate memory for M transforms. */
00782   set->dpt = (fpt_data*) nfft_malloc(M*sizeof(fpt_data));
00783 
00784   /* Initialize with NULL pointer. */
00785   for (m = 0; m < set->M; m++)
00786     set->dpt[m].steps = 0;
00787 
00788   /* Create arrays with Chebyshev nodes. */
00789 
00790   /* Initialize array with Chebyshev coefficients for the polynomial x. This
00791    * would be trivially an array containing a 1 as second entry with all other
00792    * coefficients set to zero. In order to compensate for the multiplicative
00793    * factor 2 introduced by the DCT-III, we set this coefficient to 0.5 here. */
00794 
00795   /* Allocate memory for array of pointers to node arrays. */
00796   set->xcvecs = (double**) nfft_malloc((set->t)*sizeof(double*));
00797   /* For each polynomial length starting with 4, compute the Chebyshev nodes
00798    * using a DCT-III. */
00799   plength = 4;
00800   for (tau = 1; tau < t+1; tau++)
00801   {
00802     /* Allocate memory for current array. */
00803     set->xcvecs[tau-1] = (double*) nfft_malloc(plength*sizeof(double));
00804     for (k = 0; k < plength; k++)
00805     {
00806       set->xcvecs[tau-1][k] = cos(((k+0.5)*PI)/plength);
00807     }
00808     plength = plength << 1;
00809   }
00810 
00812   set->work = (double _Complex*) nfft_malloc((2*set->N)*sizeof(double _Complex));
00813   set->result = (double _Complex*) nfft_malloc((2*set->N)*sizeof(double _Complex));
00814 
00815   set->lengths = (int*) nfft_malloc((set->t/*-1*/)*sizeof(int));
00816   set->plans_dct2 = (fftw_plan*) nfft_malloc(sizeof(fftw_plan)*(set->t/*-1*/));
00817   set->kindsr     = (fftw_r2r_kind*) nfft_malloc(2*sizeof(fftw_r2r_kind));
00818   set->kindsr[0]  = FFTW_REDFT10;
00819   set->kindsr[1]  = FFTW_REDFT10;
00820   for (tau = 0, plength = 4; tau < set->t/*-1*/; tau++, plength<<=1)
00821   {
00822     set->lengths[tau] = plength;
00823 #ifdef _OPENMP
00824 #pragma omp critical (nfft_omp_critical_fftw_plan)
00825 {
00826     fftw_plan_with_nthreads(nthreads);
00827 #endif
00828     set->plans_dct2[tau] =
00829       fftw_plan_many_r2r(1, &set->lengths[tau], 2, (double*)set->work, NULL,
00830                          2, 1, (double*)set->result, NULL, 2, 1,set->kindsr,
00831                          0);
00832 #ifdef _OPENMP
00833 }
00834 #endif
00835   }
00836 
00837   /* Check if fast transform is activated. */
00838   if (!(set->flags & FPT_NO_FAST_ALGORITHM))
00839   {
00841     set->vec3 = (double _Complex*) nfft_malloc(set->N*sizeof(double _Complex));
00842     set->vec4 = (double _Complex*) nfft_malloc(set->N*sizeof(double _Complex));
00843     set->z = (double _Complex*) nfft_malloc(set->N*sizeof(double _Complex));
00844 
00846     set->plans_dct3 = (fftw_plan*) nfft_malloc(sizeof(fftw_plan)*(set->t/*-1*/));
00847     set->kinds      = (fftw_r2r_kind*) nfft_malloc(2*sizeof(fftw_r2r_kind));
00848     set->kinds[0]   = FFTW_REDFT01;
00849     set->kinds[1]   = FFTW_REDFT01;
00850     for (tau = 0, plength = 4; tau < set->t/*-1*/; tau++, plength<<=1)
00851     {
00852       set->lengths[tau] = plength;
00853 #ifdef _OPENMP
00854 #pragma omp critical (nfft_omp_critical_fftw_plan)
00855 {
00856     fftw_plan_with_nthreads(nthreads);
00857 #endif
00858       set->plans_dct3[tau] =
00859         fftw_plan_many_r2r(1, &set->lengths[tau], 2, (double*)set->work, NULL,
00860                            2, 1, (double*)set->result, NULL, 2, 1, set->kinds,
00861                            0);
00862 #ifdef _OPENMP
00863 }
00864 #endif
00865     }
00866     nfft_free(set->lengths);
00867     nfft_free(set->kinds);
00868     nfft_free(set->kindsr);
00869     set->lengths = NULL;
00870     set->kinds = NULL;
00871     set->kindsr = NULL;
00872   }
00873 
00874   if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
00875   {
00876     set->xc_slow = (double*) nfft_malloc((set->N+1)*sizeof(double));
00877     set->temp = (double _Complex*) nfft_malloc((set->N+1)*sizeof(double _Complex));
00878   }
00879 
00880   /* Return the newly created DPT set. */
00881   return set;
00882 }
00883 
00884 void fpt_precompute(fpt_set set, const int m, double *alpha, double *beta,
00885   double *gam, int k_start, const double threshold)
00886 {
00887 
00888   int tau;          
00889   int l;            
00890   int plength;      
00892   int degree;       
00894   int firstl;       
00895   int lastl;        
00896   int plength_stab; 
00898   int degree_stab;  
00900   double *a11;      
00902   double *a12;      
00904   double *a21;      
00906   double *a22;      
00908   const double *calpha;
00909   const double *cbeta;
00910   const double *cgamma;
00911   int needstab = 0; 
00912   int k_start_tilde;
00913   int N_tilde;
00914   int clength;
00915   int clength_1;
00916   int clength_2;
00917   int t_stab, N_stab;
00918   fpt_data *data;
00919 
00920   /* Get pointer to DPT data. */
00921   data = &(set->dpt[m]);
00922 
00923   /* Check, if already precomputed. */
00924   if (data->steps != NULL)
00925     return;
00926 
00927   /* Save k_start. */
00928   data->k_start = k_start;
00929 
00930   data->gamma_m1 = gam[0];
00931 
00932   /* Check if fast transform is activated. */
00933   if (!(set->flags & FPT_NO_FAST_ALGORITHM))
00934   {
00935     /* Save recursion coefficients. */
00936     data->alphaN = (double*) nfft_malloc((set->t-1)*sizeof(double _Complex));
00937     data->betaN = (double*) nfft_malloc((set->t-1)*sizeof(double _Complex));
00938     data->gammaN = (double*) nfft_malloc((set->t-1)*sizeof(double _Complex));
00939 
00940     for (tau = 2; tau <= set->t; tau++)
00941     {
00942 
00943       data->alphaN[tau-2] = alpha[1<<tau];
00944       data->betaN[tau-2] = beta[1<<tau];
00945       data->gammaN[tau-2] = gam[1<<tau];
00946     }
00947 
00948     data->alpha_0 = alpha[1];
00949     data->beta_0 = beta[1];
00950 
00951     k_start_tilde = K_START_TILDE(data->k_start,X(next_power_of_2)(data->k_start)
00952       /*set->N*/);
00953     N_tilde = N_TILDE(set->N);
00954 
00955     /* Allocate memory for the cascade with t = log_2(N) many levels. */
00956     data->steps = (fpt_step**) nfft_malloc(sizeof(fpt_step*)*set->t);
00957 
00958     /* For tau = 1,...t compute the matrices U_{n,tau,l}. */
00959     plength = 4;
00960     for (tau = 1; tau < set->t; tau++)
00961     {
00962       /* Compute auxilliary values. */
00963       degree = plength>>1;
00964       /* Compute first l. */
00965       firstl = FIRST_L(k_start_tilde,plength);
00966       /* Compute last l. */
00967       lastl = LAST_L(N_tilde,plength);
00968 
00969       /* Allocate memory for current level. This level will contain 2^{t-tau-1}
00970        * many matrices. */
00971       data->steps[tau] = (fpt_step*) nfft_malloc(sizeof(fpt_step)
00972                          * (lastl+1));
00973 
00974       /* For l = 0,...2^{t-tau-1}-1 compute the matrices U_{n,tau,l}. */
00975       for (l = firstl; l <= lastl; l++)
00976       {
00977         if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
00978         {
00979           //fprintf(stderr,"fpt_precompute(%d): symmetric step\n",m);
00980           //fflush(stderr);
00981           clength = plength/2;
00982         }
00983         else
00984         {
00985           clength = plength;
00986         }
00987 
00988         /* Allocate memory for the components of U_{n,tau,l}. */
00989         a11 = (double*) nfft_malloc(sizeof(double)*clength);
00990         a12 = (double*) nfft_malloc(sizeof(double)*clength);
00991         a21 = (double*) nfft_malloc(sizeof(double)*clength);
00992         a22 = (double*) nfft_malloc(sizeof(double)*clength);
00993 
00994         /* Evaluate the associated polynomials at the 2^{tau+1} Chebyshev
00995          * nodes. */
00996 
00997         /* Get the pointers to the three-term recurrence coeffcients. */
00998         calpha = &(alpha[plength*l+1+1]);
00999         cbeta = &(beta[plength*l+1+1]);
01000         cgamma = &(gam[plength*l+1+1]);
01001 
01002         if (set->flags & FPT_NO_STABILIZATION)
01003         {
01004           /* Evaluate P_{2^{tau}-2}^n(\cdot,2^{tau+1}l+2). */
01005           calpha--;
01006           cbeta--;
01007           cgamma--;
01008           eval_clenshaw2(set->xcvecs[tau-1], a11, a21, clength, clength, degree-1, calpha, cbeta,
01009             cgamma);
01010           eval_clenshaw2(set->xcvecs[tau-1], a12, a22, clength, clength, degree, calpha, cbeta,
01011             cgamma);
01012           needstab = 0;
01013         }
01014         else
01015         {
01016           calpha--;
01017           cbeta--;
01018           cgamma--;
01019           /* Evaluate P_{2^{tau}-1}^n(\cdot,2^{tau+1}l+1). */
01020           needstab = eval_clenshaw_thresh2(set->xcvecs[tau-1], a11, a21, clength,
01021             degree-1, calpha, cbeta, cgamma, threshold);
01022           if (needstab == 0)
01023           {
01024             /* Evaluate P_{2^{tau}}^n(\cdot,2^{tau+1}l+1). */
01025             needstab = eval_clenshaw_thresh2(set->xcvecs[tau-1], a12, a22, clength,
01026               degree, calpha, cbeta, cgamma, threshold);
01027           }
01028         }
01029         
01030 
01031         /* Check if stabilization needed. */
01032         if (needstab == 0)
01033         {
01034           data->steps[tau][l].a11 = (double**) nfft_malloc(sizeof(double*));
01035           data->steps[tau][l].a12 = (double**) nfft_malloc(sizeof(double*));
01036           data->steps[tau][l].a21 = (double**) nfft_malloc(sizeof(double*));
01037           data->steps[tau][l].a22 = (double**) nfft_malloc(sizeof(double*));
01038           data->steps[tau][l].g = (double*) nfft_malloc(sizeof(double));
01039           /* No stabilization needed. */
01040           data->steps[tau][l].a11[0] = a11;
01041           data->steps[tau][l].a12[0] = a12;
01042           data->steps[tau][l].a21[0] = a21;
01043           data->steps[tau][l].a22[0] = a22;
01044           data->steps[tau][l].g[0] = gam[plength*l+1+1];
01045           data->steps[tau][l].stable = true;
01046         }
01047         else
01048         {
01049           /* Stabilize. */
01050           degree_stab = degree*(2*l+1);
01051           X(next_power_of_2_exp)((l+1)*(1<<(tau+1)),&N_stab,&t_stab);
01052 
01053           /* Old arrays are to small. */
01054           nfft_free(a11);
01055           nfft_free(a12);
01056           nfft_free(a21);
01057           nfft_free(a22);
01058 
01059           data->steps[tau][l].a11 = (double**) nfft_malloc(sizeof(double*));
01060           data->steps[tau][l].a12 = (double**)nfft_malloc(sizeof(double*));
01061           data->steps[tau][l].a21 = (double**) nfft_malloc(sizeof(double*));
01062           data->steps[tau][l].a22 = (double**) nfft_malloc(sizeof(double*));
01063           data->steps[tau][l].g = (double*) nfft_malloc(sizeof(double));
01064 
01065           plength_stab = N_stab;
01066 
01067           if (set->flags & FPT_AL_SYMMETRY)
01068           {
01069             if (m <= 1)
01070             {
01071               /* This should never be executed */
01072               clength_1 = plength_stab;
01073               clength_2 = plength_stab;
01074               /* Allocate memory for arrays. */
01075               a11 = (double*) nfft_malloc(sizeof(double)*clength_1);
01076               a12 = (double*) nfft_malloc(sizeof(double)*clength_1);
01077               a21 = (double*) nfft_malloc(sizeof(double)*clength_2);
01078               a22 = (double*) nfft_malloc(sizeof(double)*clength_2);
01079               /* Get the pointers to the three-term recurrence coeffcients. */
01080               calpha = &(alpha[1]); cbeta = &(beta[1]); cgamma = &(gam[1]);
01081               eval_clenshaw2(set->xcvecs[t_stab-2], a11, a21, clength_1,
01082                 clength_2, degree_stab-1, calpha, cbeta, cgamma);
01083               eval_clenshaw2(set->xcvecs[t_stab-2], a12, a22, clength_1,
01084                 clength_2, degree_stab+0, calpha, cbeta, cgamma);
01085             }
01086             else
01087             {
01088               clength = plength_stab/2;
01089               if (m%2 == 0)
01090               {
01091                 a11 = (double*) nfft_malloc(sizeof(double)*clength);
01092                 a12 = (double*) nfft_malloc(sizeof(double)*clength);
01093                 a21 = 0;
01094                 a22 = 0;
01095                 calpha = &(alpha[2]); cbeta = &(beta[2]); cgamma = &(gam[2]);
01096                 eval_clenshaw(set->xcvecs[t_stab-2], a11, clength,
01097                   degree_stab-2, calpha, cbeta, cgamma);
01098                 eval_clenshaw(set->xcvecs[t_stab-2], a12, clength,
01099                   degree_stab-1, calpha, cbeta, cgamma);
01100               }
01101               else
01102               {
01103                 a11 = 0;
01104                 a12 = 0;
01105                 a21 = (double*) nfft_malloc(sizeof(double)*clength);
01106                 a22 = (double*) nfft_malloc(sizeof(double)*clength);
01107                 calpha = &(alpha[1]); cbeta = &(beta[1]); cgamma = &(gam[1]);
01108                 eval_clenshaw(set->xcvecs[t_stab-2], a21, clength,
01109                   degree_stab-1,calpha, cbeta, cgamma);
01110                 eval_clenshaw(set->xcvecs[t_stab-2], a22, clength,
01111                   degree_stab+0, calpha, cbeta, cgamma);
01112               }
01113             }
01114           }
01115           else
01116           {
01117             clength_1 = plength_stab;
01118             clength_2 = plength_stab;
01119             a11 = (double*) nfft_malloc(sizeof(double)*clength_1);
01120             a12 = (double*) nfft_malloc(sizeof(double)*clength_1);
01121             a21 = (double*) nfft_malloc(sizeof(double)*clength_2);
01122             a22 = (double*) nfft_malloc(sizeof(double)*clength_2);
01123             calpha = &(alpha[2]);
01124             cbeta = &(beta[2]);
01125             cgamma = &(gam[2]);
01126             calpha--;
01127             cbeta--;
01128             cgamma--;
01129             eval_clenshaw2(set->xcvecs[t_stab-2], a11, a21, clength_1, clength_2, degree_stab-1,
01130               calpha, cbeta, cgamma);
01131             eval_clenshaw2(set->xcvecs[t_stab-2], a12, a22, clength_1, clength_2, degree_stab+0,
01132               calpha, cbeta, cgamma);
01133 
01134           }
01135           data->steps[tau][l].a11[0] = a11;
01136           data->steps[tau][l].a12[0] = a12;
01137           data->steps[tau][l].a21[0] = a21;
01138           data->steps[tau][l].a22[0] = a22;
01139 
01140           data->steps[tau][l].g[0] =  gam[1+1];
01141           data->steps[tau][l].stable = false;
01142           data->steps[tau][l].ts = t_stab;
01143           data->steps[tau][l].Ns = N_stab;
01144         }
01145       }
01147       plength = plength << 1;
01148     }
01149   }
01150 
01151   if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
01152   {
01153     /* Check, if recurrence coefficients must be copied. */
01154     if (set->flags & FPT_PERSISTENT_DATA)
01155     {
01156       data->_alpha = (double*) alpha;
01157       data->_beta = (double*) beta;
01158       data->_gamma = (double*) gam;
01159     }
01160     else
01161     {
01162       data->_alpha = (double*) nfft_malloc((set->N+1)*sizeof(double));
01163       data->_beta = (double*) nfft_malloc((set->N+1)*sizeof(double));
01164       data->_gamma = (double*) nfft_malloc((set->N+1)*sizeof(double));
01165       memcpy(data->_alpha,alpha,(set->N+1)*sizeof(double));
01166       memcpy(data->_beta,beta,(set->N+1)*sizeof(double));
01167       memcpy(data->_gamma,gam,(set->N+1)*sizeof(double));
01168     }
01169   }
01170 }
01171 
01172 void fpt_trafo_direct(fpt_set set, const int m, const double _Complex *x, double _Complex *y,
01173   const int k_end, const unsigned int flags)
01174 {
01175   int j;
01176   fpt_data *data = &(set->dpt[m]);
01177   int Nk;
01178   int tk;
01179   double norm;
01180   
01181     //fprintf(stderr, "Executing dpt.\n");  
01182 
01183   X(next_power_of_2_exp)(k_end+1,&Nk,&tk);
01184   norm = 2.0/(Nk<<1);
01185 
01186     //fprintf(stderr, "Norm = %e.\n", norm);  
01187 
01188   if (set->flags & FPT_NO_DIRECT_ALGORITHM)
01189   {
01190     return;
01191   }
01192 
01193   if (flags & FPT_FUNCTION_VALUES)
01194   {
01195     /* Fill array with Chebyshev nodes. */
01196     for (j = 0; j <= k_end; j++)
01197     {
01198       set->xc_slow[j] = cos((PI*(j+0.5))/(k_end+1));
01199         //fprintf(stderr, "x[%4d] = %e.\n", j, set->xc_slow[j]);  
01200     }
01201 
01202     memset(set->result,0U,data->k_start*sizeof(double _Complex));
01203     memcpy(&set->result[data->k_start],x,(k_end-data->k_start+1)*sizeof(double _Complex));
01204 
01205     /*eval_sum_clenshaw(k_end, k_end, set->result, set->xc_slow,
01206       y, set->work, &data->alpha[1], &data->beta[1], &data->gamma[1],
01207       data->gamma_m1);*/
01208     eval_sum_clenshaw_fast(k_end, k_end, set->result, set->xc_slow,
01209       y, &data->_alpha[1], &data->_beta[1], &data->_gamma[1], data->gamma_m1);
01210   }
01211   else
01212   {
01213     memset(set->temp,0U,data->k_start*sizeof(double _Complex));
01214     memcpy(&set->temp[data->k_start],x,(k_end-data->k_start+1)*sizeof(double _Complex));
01215 
01216     eval_sum_clenshaw_fast(k_end, Nk-1, set->temp, set->xcvecs[tk-2],
01217       set->result, &data->_alpha[1], &data->_beta[1], &data->_gamma[1],
01218       data->gamma_m1);
01219 
01220     fftw_execute_r2r(set->plans_dct2[tk-2],(double*)set->result,
01221       (double*)set->result);
01222 
01223     set->result[0] *= 0.5;
01224     for (j = 0; j < Nk; j++)
01225     {
01226       set->result[j] *= norm;
01227     }
01228 
01229     memcpy(y,set->result,(k_end+1)*sizeof(double _Complex));
01230   }
01231 }
01232 
01233 void fpt_trafo(fpt_set set, const int m, const double _Complex *x, double _Complex *y,
01234   const int k_end, const unsigned int flags)
01235 {
01236   /* Get transformation data. */
01237   fpt_data *data = &(set->dpt[m]);
01239   int Nk;
01241   int tk;
01243   int k_start_tilde;
01245   int k_end_tilde;
01246 
01248   int tau;
01250   int firstl;
01252   int lastl;
01254   int l;
01256   int plength;
01258   int plength_stab;
01259   int t_stab;
01261   fpt_step *step;
01263   fftw_plan plan = 0;
01264   int length = k_end+1;
01265   fftw_r2r_kind kinds[2] = {FFTW_REDFT01,FFTW_REDFT01};
01266 
01268   int k;
01269 
01270   double _Complex *work_ptr;
01271   const double _Complex *x_ptr;
01272 
01273   /* Check, if slow transformation should be used due to small bandwidth. */
01274   if (k_end < FPT_BREAK_EVEN)
01275   {
01276     /* Use NDSFT. */
01277     fpt_trafo_direct(set, m, x, y, k_end, flags);
01278     return;
01279   }
01280 
01281   X(next_power_of_2_exp)(k_end,&Nk,&tk);
01282   k_start_tilde = K_START_TILDE(data->k_start,Nk);
01283   k_end_tilde = K_END_TILDE(k_end,Nk);
01284 
01285   /* Check if fast transform is activated. */
01286   if (set->flags & FPT_NO_FAST_ALGORITHM)
01287     return;
01288 
01289   if (flags & FPT_FUNCTION_VALUES)
01290   {
01291 #ifdef _OPENMP
01292     int nthreads = nfft_get_omp_num_threads();
01293 #pragma omp critical (nfft_omp_critical_fftw_plan)
01294 {
01295     fftw_plan_with_nthreads(nthreads);
01296 #endif
01297     plan = fftw_plan_many_r2r(1, &length, 2, (double*)set->work, NULL, 2, 1,
01298       (double*)set->work, NULL, 2, 1, kinds, 0U);
01299 #ifdef _OPENMP
01300 }
01301 #endif
01302   }
01303 
01304   /* Initialize working arrays. */
01305   memset(set->result,0U,2*Nk*sizeof(double _Complex));
01306 
01307   /* The first step. */
01308 
01309   /* Set the first 2*data->k_start coefficients to zero. */
01310   memset(set->work,0U,2*data->k_start*sizeof(double _Complex));
01311 
01312   work_ptr = &set->work[2*data->k_start];
01313   x_ptr = x;
01314 
01315   for (k = 0; k <= k_end_tilde-data->k_start; k++)
01316   {
01317     *work_ptr++ = *x_ptr++;
01318     *work_ptr++ = K(0.0);
01319   }
01320 
01321   /* Set the last 2*(set->N-1-k_end_tilde) coefficients to zero. */
01322   memset(&set->work[2*(k_end_tilde+1)],0U,2*(Nk-1-k_end_tilde)*sizeof(double _Complex));
01323 
01324   /* If k_end == Nk, use three-term recurrence to map last coefficient x_{Nk} to
01325    * x_{Nk-1} and x_{Nk-2}. */
01326   if (k_end == Nk)
01327   {
01328     set->work[2*(Nk-2)] += data->gammaN[tk-2]*x[Nk-data->k_start];
01329     set->work[2*(Nk-1)] += data->betaN[tk-2]*x[Nk-data->k_start];
01330     set->work[2*(Nk-1)+1] = data->alphaN[tk-2]*x[Nk-data->k_start];
01331   }
01332 
01333   /* Compute the remaining steps. */
01334   plength = 4;
01335   for (tau = 1; tau < tk; tau++)
01336   {
01337     /* Compute first l. */
01338     firstl = FIRST_L(k_start_tilde,plength);
01339     /* Compute last l. */
01340     lastl = LAST_L(k_end_tilde,plength);
01341 
01342     /* Compute the multiplication steps. */
01343     for (l = firstl; l <= lastl; l++)
01344     {
01345       /* Copy vectors to multiply into working arrays zero-padded to twice the length. */
01346       memcpy(set->vec3,&(set->work[(plength/2)*(4*l+2)]),(plength/2)*sizeof(double _Complex));
01347       memcpy(set->vec4,&(set->work[(plength/2)*(4*l+3)]),(plength/2)*sizeof(double _Complex));
01348       memset(&set->vec3[plength/2],0U,(plength/2)*sizeof(double _Complex));
01349       memset(&set->vec4[plength/2],0U,(plength/2)*sizeof(double _Complex));
01350 
01351       /* Copy coefficients into first half. */
01352       memcpy(&(set->work[(plength/2)*(4*l+2)]),&(set->work[(plength/2)*(4*l+1)]),(plength/2)*sizeof(double _Complex));
01353       memset(&(set->work[(plength/2)*(4*l+1)]),0U,(plength/2)*sizeof(double _Complex));
01354       memset(&(set->work[(plength/2)*(4*l+3)]),0U,(plength/2)*sizeof(double _Complex));
01355 
01356       /* Get matrix U_{n,tau,l} */
01357       step = &(data->steps[tau][l]);
01358 
01359       /* Check if step is stable. */
01360       if (step->stable)
01361       {
01362         /* Check, if we should do a symmetrizised step. */
01363         if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
01364         {
01365           /*for (k = 0; k < plength; k++)
01366           {
01367             fprintf(stderr,"fpt_trafo: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n",
01368               step->a11[0][k],step->a12[0][k],step->a21[0][k],step->a22[0][k]);
01369           }*/
01370           /* Multiply third and fourth polynomial with matrix U. */
01371           //fprintf(stderr,"\nhallo\n");
01372           fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0],
01373             step->a12[0], step->a21[0], step->a22[0], step->g[0], tau, set);
01374         }
01375         else
01376         {
01377           /* Multiply third and fourth polynomial with matrix U. */
01378           fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
01379             step->a21[0], step->a22[0], step->g[0], tau, set);
01380         }
01381 
01382         if (step->g[0] != 0.0)
01383         {
01384           for (k = 0; k < plength; k++)
01385           {
01386             set->work[plength*2*l+k] += set->vec3[k];
01387           }
01388         }
01389         for (k = 0; k < plength; k++)
01390         {
01391           set->work[plength*(2*l+1)+k] += set->vec4[k];
01392         }
01393       }
01394       else
01395       {
01396         /* Stabilize. */
01397 
01398         /* The lengh of the polynomials */
01399         plength_stab = step->Ns;
01400         t_stab = step->ts;
01401 
01402         /*---------*/
01403         /*fprintf(stderr,"\nfpt_trafo: stabilizing at tau = %d, l = %d.\n",tau,l);
01404         fprintf(stderr,"\nfpt_trafo: plength_stab = %d.\n",plength_stab);
01405         fprintf(stderr,"\nfpt_trafo: tk = %d.\n",tk);
01406         fprintf(stderr,"\nfpt_trafo: index = %d.\n",tk-tau-1);*/
01407         /*---------*/
01408 
01409         /* Set rest of vectors explicitely to zero */
01410         /*fprintf(stderr,"fpt_trafo: stabilizing: plength = %d, plength_stab = %d\n",
01411           plength, plength_stab);*/
01412         memset(&set->vec3[plength/2],0U,(plength_stab-plength/2)*sizeof(double _Complex));
01413         memset(&set->vec4[plength/2],0U,(plength_stab-plength/2)*sizeof(double _Complex));
01414 
01415         /* Multiply third and fourth polynomial with matrix U. */
01416         /* Check for symmetry. */
01417         if (set->flags & FPT_AL_SYMMETRY)
01418         {
01419           if (m <= 1)
01420           {
01421             fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
01422               step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
01423           }
01424           else if (m%2 == 0)
01425           {
01426             /*fpt_do_step_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
01427               step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);*/
01428             fpt_do_step_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
01429               step->a21[0], step->a22[0],
01430               set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
01431             /*fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
01432               step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);*/
01433           }
01434           else
01435           {
01436             /*fpt_do_step_symmetric_l(set->vec3, set->vec4, step->a11[0], step->a12[0],
01437               step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);*/
01438             fpt_do_step_symmetric_l(set->vec3, set->vec4,
01439               step->a11[0], step->a12[0],
01440               step->a21[0],
01441               step->a22[0], set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
01442             /*fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
01443               step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);*/
01444           }
01445         }
01446         else
01447         {
01448             fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
01449               step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
01450         }
01451 
01452         if (step->g[0] != 0.0)
01453         {
01454           for (k = 0; k < plength_stab; k++)
01455           {
01456             set->result[k] += set->vec3[k];
01457           }
01458         }
01459 
01460         for (k = 0; k < plength_stab; k++)
01461         {
01462           set->result[Nk+k] += set->vec4[k];
01463         }
01464       }
01465     }
01466     /* Double length of polynomials. */
01467     plength = plength<<1;
01468 
01469     /*--------*/
01470     /*for (k = 0; k < 2*Nk; k++)
01471     {
01472       fprintf(stderr,"work[%2d] = %le + I*%le\tresult[%2d] = %le + I*%le\n",
01473         k,creal(set->work[k]),cimag(set->work[k]),k,creal(set->result[k]),
01474         cimag(set->result[k]));
01475     }*/
01476     /*--------*/
01477   }
01478 
01479   /* Add the resulting cascade coeffcients to the coeffcients accumulated from
01480    * the stabilization steps. */
01481   for (k = 0; k < 2*Nk; k++)
01482   {
01483     set->result[k] += set->work[k];
01484   }
01485 
01486   /* The last step. Compute the Chebyshev coeffcients c_k^n from the
01487    * polynomials in front of P_0^n and P_1^n. */
01488   y[0] = data->gamma_m1*(set->result[0] + data->beta_0*set->result[Nk] +
01489     data->alpha_0*set->result[Nk+1]*0.5);
01490   y[1] = data->gamma_m1*(set->result[1] + data->beta_0*set->result[Nk+1]+
01491     data->alpha_0*(set->result[Nk]+set->result[Nk+2]*0.5));
01492   y[k_end-1] = data->gamma_m1*(set->result[k_end-1] +
01493     data->beta_0*set->result[Nk+k_end-1] +
01494     data->alpha_0*set->result[Nk+k_end-2]*0.5);
01495   y[k_end] = data->gamma_m1*(0.5*data->alpha_0*set->result[Nk+k_end-1]);
01496   for (k = 2; k <= k_end-2; k++)
01497   {
01498     y[k] = data->gamma_m1*(set->result[k] + data->beta_0*set->result[Nk+k] +
01499       data->alpha_0*0.5*(set->result[Nk+k-1]+set->result[Nk+k+1]));
01500   }
01501 
01502   if (flags & FPT_FUNCTION_VALUES)
01503   {
01504     y[0] *= 2.0;
01505     fftw_execute_r2r(plan,(double*)y,(double*)y);
01506 #pragma omp critical (nfft_omp_critical_fftw_plan)
01507     fftw_destroy_plan(plan);
01508     for (k = 0; k <= k_end; k++)
01509     {
01510       y[k] *= 0.5;
01511     }
01512   }
01513 }
01514 
01515 void fpt_transposed_direct(fpt_set set, const int m, double _Complex *x,
01516   double _Complex *y, const int k_end, const unsigned int flags)
01517 {
01518   int j;
01519   fpt_data *data = &(set->dpt[m]);
01520   int Nk;
01521   int tk;
01522   double norm;
01523 
01524   X(next_power_of_2_exp)(k_end+1,&Nk,&tk);
01525   norm = 2.0/(Nk<<1);
01526 
01527   if (set->flags & FPT_NO_DIRECT_ALGORITHM)
01528   {
01529     return;
01530   }
01531 
01532   if (flags & FPT_FUNCTION_VALUES)
01533   {
01534     for (j = 0; j <= k_end; j++)
01535     {
01536       set->xc_slow[j] = cos((PI*(j+0.5))/(k_end+1));
01537     }
01538 
01539     eval_sum_clenshaw_transposed(k_end, k_end, set->result, set->xc_slow,
01540       y, set->work, &data->_alpha[1], &data->_beta[1], &data->_gamma[1],
01541       data->gamma_m1);
01542 
01543     memcpy(x,&set->result[data->k_start],(k_end-data->k_start+1)*
01544       sizeof(double _Complex));
01545   }
01546   else
01547   {
01548     memcpy(set->result,y,(k_end+1)*sizeof(double _Complex));
01549     memset(&set->result[k_end+1],0U,(Nk-k_end-1)*sizeof(double _Complex));
01550 
01551     for (j = 0; j < Nk; j++)
01552     {
01553       set->result[j] *= norm;
01554     }
01555 
01556     fftw_execute_r2r(set->plans_dct3[tk-2],(double*)set->result,
01557       (double*)set->result);
01558 
01559     eval_sum_clenshaw_transposed(k_end, Nk-1, set->temp, set->xcvecs[tk-2],
01560       set->result, set->work, &data->_alpha[1], &data->_beta[1], &data->_gamma[1],
01561       data->gamma_m1);
01562 
01563     memcpy(x,&set->temp[data->k_start],(k_end-data->k_start+1)*sizeof(double _Complex));
01564   }
01565 }
01566 
01567 void fpt_transposed(fpt_set set, const int m, double _Complex *x,
01568   double _Complex *y, const int k_end, const unsigned int flags)
01569 {
01570   /* Get transformation data. */
01571   fpt_data *data = &(set->dpt[m]);
01573   int Nk;
01575   int tk;
01577   int k_start_tilde;
01579   int k_end_tilde;
01580 
01582   int tau;
01584   int firstl;
01586   int lastl;
01588   int l;
01590   int plength;
01592   int plength_stab;
01594   fpt_step *step;
01596   fftw_plan plan;
01597   int length = k_end+1;
01598   fftw_r2r_kind kinds[2] = {FFTW_REDFT10,FFTW_REDFT10};
01600   int k;
01601   int t_stab;
01602 
01603   /* Check, if slow transformation should be used due to small bandwidth. */
01604   if (k_end < FPT_BREAK_EVEN)
01605   {
01606     /* Use NDSFT. */
01607     fpt_transposed_direct(set, m, x, y, k_end, flags);
01608     return;
01609   }
01610 
01611   X(next_power_of_2_exp)(k_end,&Nk,&tk);
01612   k_start_tilde = K_START_TILDE(data->k_start,Nk);
01613   k_end_tilde = K_END_TILDE(k_end,Nk);
01614 
01615   /* Check if fast transform is activated. */
01616   if (set->flags & FPT_NO_FAST_ALGORITHM)
01617   {
01618     return;
01619   }
01620 
01621   if (flags & FPT_FUNCTION_VALUES)
01622   {
01623 #ifdef _OPENMP
01624     int nthreads = nfft_get_omp_num_threads();
01625 #pragma omp critical (nfft_omp_critical_fftw_plan)
01626 {
01627     fftw_plan_with_nthreads(nthreads);
01628 #endif
01629     plan = fftw_plan_many_r2r(1, &length, 2, (double*)set->work, NULL, 2, 1,
01630       (double*)set->work, NULL, 2, 1, kinds, 0U);
01631 #ifdef _OPENMP
01632 }
01633 #endif
01634     fftw_execute_r2r(plan,(double*)y,(double*)set->result);
01635 #pragma omp critical (nfft_omp_critical_fftw_plan)
01636     fftw_destroy_plan(plan);
01637     for (k = 0; k <= k_end; k++)
01638     {
01639       set->result[k] *= 0.5;
01640     }
01641   }
01642   else
01643   {
01644     memcpy(set->result,y,(k_end+1)*sizeof(double _Complex));
01645   }
01646 
01647   /* Initialize working arrays. */
01648   memset(set->work,0U,2*Nk*sizeof(double _Complex));
01649 
01650   /* The last step is now the first step. */
01651   for (k = 0; k <= k_end; k++)
01652   {
01653     set->work[k] = data->gamma_m1*set->result[k];
01654   }
01655   //memset(&set->work[k_end+1],0U,(Nk+1-k_end)*sizeof(double _Complex));
01656 
01657   set->work[Nk] = data->gamma_m1*(data->beta_0*set->result[0] +
01658     data->alpha_0*set->result[1]);
01659   for (k = 1; k < k_end; k++)
01660   {
01661     set->work[Nk+k] = data->gamma_m1*(data->beta_0*set->result[k] +
01662       data->alpha_0*0.5*(set->result[k-1]+set->result[k+1]));
01663   }
01664   if (k_end<Nk)
01665   {
01666     memset(&set->work[k_end],0U,(Nk-k_end)*sizeof(double _Complex));
01667   }
01668 
01670   memcpy(set->result,set->work,2*Nk*sizeof(double _Complex));
01671 
01672   /* Compute the remaining steps. */
01673   plength = Nk;
01674   for (tau = tk-1; tau >= 1; tau--)
01675   {
01676     /* Compute first l. */
01677     firstl = FIRST_L(k_start_tilde,plength);
01678     /* Compute last l. */
01679     lastl = LAST_L(k_end_tilde,plength);
01680 
01681     /* Compute the multiplication steps. */
01682     for (l = firstl; l <= lastl; l++)
01683     {
01684       /* Initialize second half of coefficient arrays with zeros. */
01685       memcpy(set->vec3,&(set->work[(plength/2)*(4*l+0)]),plength*sizeof(double _Complex));
01686       memcpy(set->vec4,&(set->work[(plength/2)*(4*l+2)]),plength*sizeof(double _Complex));
01687 
01688       memcpy(&set->work[(plength/2)*(4*l+1)],&(set->work[(plength/2)*(4*l+2)]),
01689         (plength/2)*sizeof(double _Complex));
01690 
01691       /* Get matrix U_{n,tau,l} */
01692       step = &(data->steps[tau][l]);
01693 
01694       /* Check if step is stable. */
01695       if (step->stable)
01696       {
01697         if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
01698         {
01699           /* Multiply third and fourth polynomial with matrix U. */
01700           fpt_do_step_t_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
01701             step->a21[0], step->a22[0], step->g[0], tau, set);
01702         }
01703         else
01704         {
01705           /* Multiply third and fourth polynomial with matrix U. */
01706           fpt_do_step_t(set->vec3, set->vec4, step->a11[0], step->a12[0],
01707             step->a21[0], step->a22[0], step->g[0], tau, set);
01708         }
01709         memcpy(&(set->vec3[plength/2]), set->vec4,(plength/2)*sizeof(double _Complex));
01710 
01711         for (k = 0; k < plength; k++)
01712         {
01713           set->work[plength*(4*l+2)/2+k] = set->vec3[k];
01714         }
01715       }
01716       else
01717       {
01718         /* Stabilize. */
01719         plength_stab = step->Ns;
01720         t_stab = step->ts;
01721 
01722         memcpy(set->vec3,set->result,plength_stab*sizeof(double _Complex));
01723         memcpy(set->vec4,&(set->result[Nk]),plength_stab*sizeof(double _Complex));
01724 
01725         /* Multiply third and fourth polynomial with matrix U. */
01726         if (set->flags & FPT_AL_SYMMETRY)
01727         {
01728           if (m <= 1)
01729           {
01730             fpt_do_step_t_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
01731               step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
01732           }
01733           else if (m%2 == 0)
01734           {
01735             fpt_do_step_t_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
01736               set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
01737           }
01738           else
01739           {
01740             fpt_do_step_t_symmetric_l(set->vec3, set->vec4,
01741               step->a21[0], step->a22[0], set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
01742           }
01743         }
01744         else
01745         {
01746             fpt_do_step_t(set->vec3, set->vec4, step->a11[0], step->a12[0],
01747               step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
01748         }
01749 
01750         memcpy(&(set->vec3[plength/2]),set->vec4,(plength/2)*sizeof(double _Complex));
01751 
01752         for (k = 0; k < plength; k++)
01753         {
01754           set->work[(plength/2)*(4*l+2)+k] = set->vec3[k];
01755         }
01756        }
01757     }
01758     /* Half the length of polynomial arrays. */
01759     plength = plength>>1;
01760   }
01761 
01762   /* First step */
01763   for (k = 0; k <= k_end_tilde-data->k_start; k++)
01764   {
01765     x[k] = set->work[2*(data->k_start+k)];
01766   }
01767   if (k_end == Nk)
01768   {
01769     x[Nk-data->k_start] =
01770         data->gammaN[tk-2]*set->work[2*(Nk-2)]
01771       + data->betaN[tk-2] *set->work[2*(Nk-1)]
01772       + data->alphaN[tk-2]*set->work[2*(Nk-1)+1];
01773   }
01774 }
01775 
01776 void fpt_finalize(fpt_set set)
01777 {
01778   int tau;
01779   int l;
01780   int m;
01781   fpt_data *data;
01782   int k_start_tilde;
01783   int N_tilde;
01784   int firstl, lastl;
01785   int plength;
01786   const int M = set->M;
01787 
01788   /* TODO Clean up DPT transform data structures. */
01789   for (m = 0; m < M; m++)
01790   {
01791     /* Check if precomputed. */
01792     data = &set->dpt[m];
01793     if (data->steps != (fpt_step**)NULL)
01794     {
01795       nfft_free(data->alphaN);
01796       nfft_free(data->betaN);
01797       nfft_free(data->gammaN);
01798       data->alphaN = NULL;
01799       data->betaN = NULL;
01800       data->gammaN = NULL;
01801 
01802       /* Free precomputed data. */
01803       k_start_tilde = K_START_TILDE(data->k_start,X(next_power_of_2)(data->k_start)
01804         /*set->N*/);
01805       N_tilde = N_TILDE(set->N);
01806       plength = 4;
01807       for (tau = 1; tau < set->t; tau++)
01808       {
01809         /* Compute first l. */
01810         firstl = FIRST_L(k_start_tilde,plength);
01811         /* Compute last l. */
01812         lastl = LAST_L(N_tilde,plength);
01813 
01814         /* For l = 0,...2^{t-tau-1}-1 compute the matrices U_{n,tau,l}. */
01815         for (l = firstl; l <= lastl; l++)
01816         {
01817           /* Free components. */
01818           nfft_free(data->steps[tau][l].a11[0]);
01819           nfft_free(data->steps[tau][l].a12[0]);
01820           nfft_free(data->steps[tau][l].a21[0]);
01821           nfft_free(data->steps[tau][l].a22[0]);
01822           data->steps[tau][l].a11[0] = NULL;
01823           data->steps[tau][l].a12[0] = NULL;
01824           data->steps[tau][l].a21[0] = NULL;
01825           data->steps[tau][l].a22[0] = NULL;
01826           /* Free components. */
01827           nfft_free(data->steps[tau][l].a11);
01828           nfft_free(data->steps[tau][l].a12);
01829           nfft_free(data->steps[tau][l].a21);
01830           nfft_free(data->steps[tau][l].a22);
01831           nfft_free(data->steps[tau][l].g);
01832           data->steps[tau][l].a11 = NULL;
01833           data->steps[tau][l].a12 = NULL;
01834           data->steps[tau][l].a21 = NULL;
01835           data->steps[tau][l].a22 = NULL;
01836           data->steps[tau][l].g = NULL;
01837         }
01838         /* Free pointers for current level. */
01839         nfft_free(data->steps[tau]);
01840         data->steps[tau] = NULL;
01841         /* Double length of polynomials. */
01842         plength = plength<<1;
01843       }
01844       /* Free steps. */
01845       nfft_free(data->steps);
01846       data->steps = NULL;
01847     }
01848 
01849     if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
01850     {
01851       /* Check, if recurrence coefficients must be copied. */
01852       //fprintf(stderr,"\nfpt_finalize: %d\n",set->flags & FPT_PERSISTENT_DATA);
01853       if (!(set->flags & FPT_PERSISTENT_DATA))
01854       {
01855         nfft_free(data->_alpha);
01856         nfft_free(data->_beta);
01857         nfft_free(data->_gamma);
01858       }
01859       data->_alpha = NULL;
01860       data->_beta = NULL;
01861       data->_gamma = NULL;
01862     }
01863   }
01864 
01865   /* Delete array of DPT transform data. */
01866   nfft_free(set->dpt);
01867   set->dpt = NULL;
01868 
01869   for (tau = 1; tau < set->t+1; tau++)
01870   {
01871     nfft_free(set->xcvecs[tau-1]);
01872     set->xcvecs[tau-1] = NULL;
01873   }
01874   nfft_free(set->xcvecs);
01875   set->xcvecs = NULL;
01876 
01877   /* Free auxilliary arrays. */
01878   nfft_free(set->work);
01879   nfft_free(set->result);
01880 
01881   /* Check if fast transform is activated. */
01882   if (!(set->flags & FPT_NO_FAST_ALGORITHM))
01883   {
01884     /* Free auxilliary arrays. */
01885     nfft_free(set->vec3);
01886     nfft_free(set->vec4);
01887     nfft_free(set->z);
01888     set->work = NULL;
01889     set->result = NULL;
01890     set->vec3 = NULL;
01891     set->vec4 = NULL;
01892     set->z = NULL;
01893 
01894     /* Free FFTW plans. */
01895     for(tau = 0; tau < set->t/*-1*/; tau++)
01896     {
01897 #pragma omp critical (nfft_omp_critical_fftw_plan)
01898 {
01899       fftw_destroy_plan(set->plans_dct3[tau]);
01900       fftw_destroy_plan(set->plans_dct2[tau]);
01901 }
01902       set->plans_dct3[tau] = NULL;
01903       set->plans_dct2[tau] = NULL;
01904     }
01905 
01906     nfft_free(set->plans_dct3);
01907     nfft_free(set->plans_dct2);
01908     set->plans_dct3 = NULL;
01909     set->plans_dct2 = NULL;
01910   }
01911 
01912   if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
01913   {
01914     /* Delete arrays of Chebyshev nodes. */
01915     nfft_free(set->xc_slow);
01916     set->xc_slow = NULL;
01917     nfft_free(set->temp);
01918     set->temp = NULL;
01919   }
01920 
01921   /* Free DPT set structure. */
01922   nfft_free(set);
01923 }

Generated on Fri Oct 12 2012 by Doxygen 1.8.0-20120409