NFFT Logo 3.2.2
fastsum.c
Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 2002, 2012 Jens Keiner, Stefan Kunis, Daniel Potts
00003  *
00004  * This program is free software; you can redistribute it and/or modify it under
00005  * the terms of the GNU General Public License as published by the Free Software
00006  * Foundation; either version 2 of the License, or (at your option) any later
00007  * version.
00008  *
00009  * This program is distributed in the hope that it will be useful, but WITHOUT
00010  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00011  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
00012  * details.
00013  *
00014  * You should have received a copy of the GNU General Public License along with
00015  * this program; if not, write to the Free Software Foundation, Inc., 51
00016  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00017  */
00018 
00019 /* $Id: fastsum.c 3896 2012-10-10 12:19:26Z tovo $ */
00020 
00027 #include "config.h"
00028 
00029 #include <stdlib.h>
00030 #include <math.h>
00031 #ifdef HAVE_COMPLEX_H
00032 #include <complex.h>
00033 #endif
00034 
00035 #include "nfft3util.h"
00036 #include "nfft3.h"
00037 #include "fastsum.h"
00038 #include "infft.h"
00039 
00041 #include "kernels.h"
00042 
00049 int max_i(int a, int b)
00050 {
00051   return a >= b ? a : b;
00052 }
00053 
00055 double fak(int n)
00056 {
00057   if (n<=1) return 1.0;
00058   else return (double)n*fak(n-1);
00059 }
00060 
00062 double binom(int n, int m)
00063 {
00064   return fak(n)/fak(m)/fak(n-m);
00065 }
00066 
00068 double BasisPoly(int m, int r, double xx)
00069 {
00070   int k;
00071   double sum=0.0;
00072 
00073   for (k=0; k<=m-r; k++) {
00074     sum+=binom(m+k,k)*pow((xx+1.0)/2.0,(double)k);
00075   }
00076   return sum*pow((xx+1.0),(double)r)*pow(1.0-xx,(double)(m+1))/(1<<(m+1))/fak(r); /* 1<<(m+1) = 2^(m+1) */
00077 }
00078 
00080 double _Complex regkern(kernel k, double xx, int p, const double *param, double a, double b)
00081 {
00082   int r;
00083   double _Complex sum=0.0;
00084 
00085   if (xx<-0.5)
00086     xx=-0.5;
00087   if (xx>0.5)
00088     xx=0.5;
00089   if ((xx>=-0.5+b && xx<=-a) || (xx>=a && xx<=0.5-b)) {
00090     return k(xx,0,param);
00091   }
00092   else if (xx<-0.5+b) {
00093     sum=(k(-0.5,0,param)+k(0.5,0,param))/2.0
00094         *BasisPoly(p-1,0,2.0*xx/b+(1.0-b)/b);
00095     for (r=0; r<p; r++) {
00096       sum+=pow(-b/2.0,(double)r)
00097           *k(-0.5+b,r,param)
00098           *BasisPoly(p-1,r,-2.0*xx/b+(b-1.0)/b);
00099     }
00100     return sum;
00101   }
00102   else if ((xx>-a) && (xx<a)) {
00103     for (r=0; r<p; r++) {
00104       sum+=pow(a,(double)r)
00105           *( k(-a,r,param)*BasisPoly(p-1,r,xx/a)
00106               +k( a,r,param)*BasisPoly(p-1,r,-xx/a)*(r & 1 ? -1 : 1));
00107     }
00108     return sum;
00109   }
00110   else if (xx>0.5-b) {
00111     sum=(k(-0.5,0,param)+k(0.5,0,param))/2.0
00112         *BasisPoly(p-1,0,-2.0*xx/b+(1.0-b)/b);
00113     for (r=0; r<p; r++) {
00114       sum+=pow(b/2.0,(double)r)
00115           *k(0.5-b,r,param)
00116           *BasisPoly(p-1,r,2.0*xx/b-(1.0-b)/b);
00117     }
00118     return sum;
00119   }
00120   return k(xx,0,param);
00121 }
00122 
00126 double _Complex regkern1(kernel k, double xx, int p, const double *param, double a, double b)
00127 {
00128   int r;
00129   double _Complex sum=0.0;
00130 
00131   if (xx<-0.5)
00132     xx=-0.5;
00133   if (xx>0.5)
00134     xx=0.5;
00135   if ((xx>=-0.5+b && xx<=-a) || (xx>=a && xx<=0.5-b))
00136   {
00137     return k(xx,0,param);
00138   }
00139   else if ((xx>-a) && (xx<a))
00140   {
00141     for (r=0; r<p; r++) {
00142       sum+=pow(a,(double)r)
00143           *( k(-a,r,param)*BasisPoly(p-1,r,xx/a)
00144               +k( a,r,param)*BasisPoly(p-1,r,-xx/a)*(r & 1 ? -1 : 1));
00145     }
00146     return sum;
00147   }
00148   else if (xx<-0.5+b)
00149   {
00150     for (r=0; r<p; r++) {
00151       sum+=pow(b,(double)r)
00152           *( k(0.5-b,r,param)*BasisPoly(p-1,r,(xx+0.5)/b)
00153               +k(-0.5+b,r,param)*BasisPoly(p-1,r,-(xx+0.5)/b)*(r & 1 ? -1 : 1));
00154     }
00155     return sum;
00156   }
00157   else if (xx>0.5-b)
00158   {
00159     for (r=0; r<p; r++) {
00160       sum+=pow(b,(double)r)
00161           *( k(0.5-b,r,param)*BasisPoly(p-1,r,(xx-0.5)/b)
00162               +k(-0.5+b,r,param)*BasisPoly(p-1,r,-(xx-0.5)/b)*(r & 1 ? -1 : 1));
00163     }
00164     return sum;
00165   }
00166   return k(xx,0,param);
00167 }
00168 
00170 double _Complex regkern2(kernel k, double xx, int p, const double *param, double a, double b)
00171 {
00172   int r;
00173   double _Complex sum=0.0;
00174 
00175   xx=fabs(xx);
00176 
00177   if (xx>0.5) {
00178     for (r=0; r<p; r++) {
00179       sum+=pow(b,(double)r)*k(0.5-b,r,param)
00180           *(BasisPoly(p-1,r,0)+BasisPoly(p-1,r,0));
00181     }
00182     return sum;
00183   }
00184   else if ((a<=xx) && (xx<=0.5-b)) {
00185     return k(xx,0,param);
00186   }
00187   else if (xx<a) {
00188     for (r=0; r<p; r++) {
00189       sum+=pow(-a,(double)r)*k(a,r,param)
00190           *(BasisPoly(p-1,r,xx/a)+BasisPoly(p-1,r,-xx/a));
00191     }
00192     return sum;
00193   }
00194   else if ((0.5-b<xx) && (xx<=0.5)) {
00195     for (r=0; r<p; r++) {
00196       sum+=pow(b,(double)r)*k(0.5-b,r,param)
00197           *(BasisPoly(p-1,r,(xx-0.5)/b)+BasisPoly(p-1,r,-(xx-0.5)/b));
00198     }
00199     return sum;
00200   }
00201   return 0.0;
00202 }
00203 
00207 double _Complex regkern3(kernel k, double xx, int p, const double *param, double a, double b)
00208 {
00209   int r;
00210   double _Complex sum=0.0;
00211 
00212   xx=fabs(xx);
00213 
00214   if (xx>=0.5) {
00215     /*return kern(typ,c,0,0.5);*/
00216     xx=0.5;
00217   }
00218   /* else */
00219   if ((a<=xx) && (xx<=0.5-b)) {
00220     return k(xx,0,param);
00221   }
00222   else if (xx<a) {
00223     for (r=0; r<p; r++) {
00224       sum+=pow(-a,(double)r)*k(a,r,param)
00225           *(BasisPoly(p-1,r,xx/a)+BasisPoly(p-1,r,-xx/a));
00226     }
00227     /*sum=kern(typ,c,0,xx); */
00228     return sum;
00229   }
00230   else if ((0.5-b<xx) && (xx<=0.5)) {
00231     sum=k(0.5,0,param)*BasisPoly(p-1,0,-2.0*xx/b+(1.0-b)/b);
00232     /* sum=regkern2(typ,c,p,a,b, 0.5)*BasisPoly(p-1,0,-2.0*xx/b+(1.0-b)/b); */
00233     for (r=0; r<p; r++) {
00234       sum+=pow(b/2.0,(double)r)
00235           *k(0.5-b,r,param)
00236           *BasisPoly(p-1,r,2.0*xx/b-(1.0-b)/b);
00237     }
00238     return sum;
00239   }
00240   return 0.0;
00241 }
00242 
00244 double _Complex linintkern(const double x, const double _Complex *Add,
00245   const int Ad, const double a)
00246 {
00247   double c,c1,c3;
00248   int r;
00249   double _Complex f1,f2;
00250 
00251   c=x*Ad/a;
00252   r=c; r=abs(r);
00253   f1=Add[r];f2=Add[r+1];
00254   c=fabs(c);
00255   c1=c-r;
00256   c3=c1-1.0;
00257   return (-f1*c3+f2*c1);
00258 }
00259 
00260 double _Complex quadrintkern(const double x, const double _Complex *Add,
00261   const int Ad, const double a)
00262 {
00263   double c,c1,c2,c3;
00264   int r;
00265   double _Complex f0,f1,f2;
00266 
00267   c=x*Ad/a;
00268   r=c; r=abs(r);
00269   if (r==0) {f0=Add[r+1];f1=Add[r];f2=Add[r+1];}
00270   else { f0=Add[r-1];f1=Add[r];f2=Add[r+1];}
00271   c=fabs(c);
00272   c1=c-r;
00273   c2=c1+1.0;
00274   c3=c1-1.0;
00275   return (f0*c1*c3/2.0-f1*c2*c3+f2*c2*c1/2.0);
00276 }
00277 
00279 double _Complex kubintkern(const double x, const double _Complex *Add,
00280   const int Ad, const double a)
00281 {
00282   double c,c1,c2,c3,c4;
00283   int r;
00284   double _Complex f0,f1,f2,f3;
00285   c=x*Ad/a;
00286   r=c; r=abs(r);
00287   if (r==0) {f0=Add[r+1];f1=Add[r];f2=Add[r+1];f3=Add[r+2];}
00288   else { f0=Add[r-1];f1=Add[r];f2=Add[r+1];f3=Add[r+2];}
00289   c=fabs(c);
00290   c1=c-r;
00291   c2=c1+1.0;
00292   c3=c1-1.0;
00293   c4=c1-2.0;
00294   /* return(-f0*(c-r)*(c-r-1.0)*(c-r-2.0)/6.0+f1*(c-r+1.0)*(c-r-1.0)*(c-r-2.0)/2-
00295      f2*(c-r+1.0)*(c-r)*(c-r-2.0)/2+f3*(c-r+1.0)*(c-r)*(c-r-1.0)/6.0); */
00296   return(-f0*c1*c3*c4/6.0+f1*c2*c3*c4/2.0-f2*c2*c1*c4/2.0+f3*c2*c1*c3/6.0);
00297 }
00298 
00300 double _Complex kubintkern1(const double x, const double _Complex *Add,
00301   const int Ad, const double a)
00302 {
00303   double c,c1,c2,c3,c4;
00304   int r;
00305   double _Complex f0,f1,f2,f3;
00306   Add+=2;
00307   c=(x+a)*Ad/2/a;
00308   r=c; r=abs(r);
00309   /*if (r==0) {f0=Add[r];f1=Add[r];f2=Add[r+1];f3=Add[r+2];}
00310   else */
00311   { f0=Add[r-1];f1=Add[r];f2=Add[r+1];f3=Add[r+2];}
00312   c=fabs(c);
00313   c1=c-r;
00314   c2=c1+1.0;
00315   c3=c1-1.0;
00316   c4=c1-2.0;
00317   /* return(-f0*(c-r)*(c-r-1.0)*(c-r-2.0)/6.0+f1*(c-r+1.0)*(c-r-1.0)*(c-r-2.0)/2-
00318      f2*(c-r+1.0)*(c-r)*(c-r-2.0)/2+f3*(c-r+1.0)*(c-r)*(c-r-1.0)/6.0); */
00319   return(-f0*c1*c3*c4/6.0+f1*c2*c3*c4/2.0-f2*c2*c1*c4/2.0+f3*c2*c1*c3/6.0);
00320 }
00321 
00323 void quicksort(int d, int t, double *x, double _Complex *alpha, int N)
00324 {
00325   int lpos=0;
00326   int rpos=N-1;
00327   /*double pivot=x[((N-1)/2)*d+t];*/
00328   double pivot=x[(N/2)*d+t];
00329 
00330   int k;
00331   double temp1;
00332   double _Complex temp2;
00333 
00334   while (lpos<=rpos)
00335   {
00336     while (x[lpos*d+t]<pivot)
00337       lpos++;
00338     while (x[rpos*d+t]>pivot)
00339       rpos--;
00340     if (lpos<=rpos)
00341     {
00342       for (k=0; k<d; k++)
00343       {
00344         temp1=x[lpos*d+k];
00345         x[lpos*d+k]=x[rpos*d+k];
00346         x[rpos*d+k]=temp1;
00347       }
00348       temp2=alpha[lpos];
00349       alpha[lpos]=alpha[rpos];
00350       alpha[rpos]=temp2;
00351 
00352       lpos++;
00353       rpos--;
00354     }
00355   }
00356   if (0<rpos)
00357     quicksort(d,t,x,alpha,rpos+1);
00358   if (lpos<N-1)
00359     quicksort(d,t,x+lpos*d,alpha+lpos,N-lpos);
00360 }
00361 
00363 static void BuildBox(fastsum_plan *ths)
00364 {
00365   int t, l;
00366   int *box_index;
00367   double val[ths->d];
00368 
00369   box_index = (int *) malloc(ths->box_count * sizeof(int));
00370   for (t=0; t < ths->box_count; t++)
00371     box_index[t] = 0;
00372 
00373   for (l=0; l < ths->N_total; l++)
00374   {
00375     int ind = 0;
00376     for (t=0; t < ths->d; t++)
00377     {
00378       val[t] = ths->x[ths->d * l + t] + 0.25 - ths->eps_B/2.0;
00379       ind *= ths->box_count_per_dim;
00380       ind += (int) (val[t] / ths->eps_I);
00381     }
00382     box_index[ind]++;
00383   }
00384 
00385   ths->box_offset[0] = 0;
00386   for (t=1; t<=ths->box_count; t++)
00387   {
00388     ths->box_offset[t] = ths->box_offset[t-1] + box_index[t-1];
00389     box_index[t-1] = ths->box_offset[t-1];
00390   }
00391 
00392   for (l=0; l < ths->N_total; l++)
00393   {
00394     int ind = 0;
00395     for (t=0; t < ths->d; t++)
00396     {
00397       val[t] = ths->x[ths->d * l + t] + 0.25 - ths->eps_B/2.0;
00398       ind *= ths->box_count_per_dim;
00399       ind += (int) (val[t] / ths->eps_I);
00400     }
00401 
00402     ths->box_alpha[box_index[ind]] = ths->alpha[l];
00403 
00404     for (t=0; t < ths->d; t++)
00405     {
00406       ths->box_x[ths->d * box_index[ind] + t] = ths->x[ths->d * l + t];
00407     }
00408     box_index[ind]++;
00409   }
00410   free(box_index);
00411 }
00412 
00414 static inline double _Complex calc_SearchBox(int d, double *y, double *x, double _Complex *alpha, int start, int end_lt, const double _Complex *Add, const int Ad, int p, double a, const kernel k, const double *param, const unsigned flags)
00415 {
00416   double _Complex result = 0.0;
00417 
00418   int m, l;
00419   double r;
00420 
00421   for (m = start; m < end_lt; m++)
00422   {
00423       if (d==1)
00424       {
00425         r = y[0]-x[m];
00426       }
00427       else
00428       {
00429         r=0.0;
00430         for (l=0; l<d; l++)
00431           r+=(y[l]-x[m*d+l])*(y[l]-x[m*d+l]);
00432         r=sqrt(r);
00433       }
00434       if (fabs(r)<a)
00435       {
00436         result += alpha[m]*k(r,0,param);              /* alpha*(kern-regkern) */
00437   if (d==1)
00438   {
00439           if (flags & EXACT_NEARFIELD)
00440             result -= alpha[m]*regkern1(k,r,p,param,a,1.0/16.0); /* exact value (in 1D)  */
00441           else
00442             result -= alpha[m]*kubintkern1(r,Add,Ad,a);               /* spline approximation */
00443   }
00444   else
00445   {
00446           if (flags & EXACT_NEARFIELD)
00447             result -= alpha[m]*regkern(k,r,p,param,a,1.0/16.0);  /* exact value (in dD)  */
00448           else
00449 #if defined(NF_KUB)
00450             result -= alpha[m]*kubintkern(r,Add,Ad,a);                /* spline approximation */
00451 #elif defined(NF_QUADR)
00452             result -= alpha[m]*quadrintkern(r,Add,Ad,a);                /* spline approximation */
00453 #elif defined(NF_LIN)
00454             result -= alpha[m]*linintkern(r,Add,Ad,a);                /* spline approximation */
00455 #else
00456   #error define interpolation method
00457 #endif
00458         }
00459       }
00460   }
00461   return result;
00462 }
00463 
00465 static double _Complex SearchBox(double *y, fastsum_plan *ths)
00466 {
00467   double _Complex val = 0.0;
00468   int t, l;
00469   int y_multiind[ths->d];
00470   int multiindex[ths->d];
00471   int y_ind;
00472 
00473   for (t=0; t < ths->d; t++)
00474   {
00475     y_multiind[t] = ((y[t] + 0.25 - ths->eps_B/2.0) / ths->eps_I);
00476   }
00477 
00478   if (ths->d==1)
00479   {
00480     for (y_ind = max_i(0, y_multiind[0]-1); y_ind < ths->box_count_per_dim && y_ind <= y_multiind[0]+1; y_ind++){
00481       val += calc_SearchBox(ths->d, y, ths->box_x, ths->box_alpha, ths->box_offset[y_ind], ths->box_offset[y_ind+1], ths->Add, ths->Ad, ths->p, ths->eps_I, ths->k, ths->kernel_param, ths->flags);
00482       }
00483   }
00484   else if (ths->d==2)
00485   {
00486     for (multiindex[0] = max_i(0, y_multiind[0]-1); multiindex[0] < ths->box_count_per_dim && multiindex[0] <= y_multiind[0]+1; multiindex[0]++)
00487       for (multiindex[1] = max_i(0, y_multiind[1]-1); multiindex[1] < ths->box_count_per_dim && multiindex[1] <= y_multiind[1]+1; multiindex[1]++)
00488       {
00489         y_ind = (ths->box_count_per_dim * multiindex[0]) + multiindex[1];
00490         val += calc_SearchBox(ths->d, y, ths->box_x, ths->box_alpha, ths->box_offset[y_ind], ths->box_offset[y_ind+1], ths->Add, ths->Ad, ths->p, ths->eps_I, ths->k, ths->kernel_param, ths->flags);
00491       }
00492   }
00493   else if(ths->d==3)
00494   {
00495     for (multiindex[0] = max_i(0, y_multiind[0]-1); multiindex[0] < ths->box_count_per_dim && multiindex[0] <= y_multiind[0]+1; multiindex[0]++)
00496       for (multiindex[1] = max_i(0, y_multiind[1]-1); multiindex[1] < ths->box_count_per_dim && multiindex[1] <= y_multiind[1]+1; multiindex[1]++)
00497         for (multiindex[2] = max_i(0, y_multiind[2]-1); multiindex[2] < ths->box_count_per_dim && multiindex[2] <= y_multiind[2]+1; multiindex[2]++)
00498         {
00499           y_ind = ((ths->box_count_per_dim * multiindex[0]) + multiindex[1]) * ths->box_count_per_dim + multiindex[2];
00500           val += calc_SearchBox(ths->d, y, ths->box_x, ths->box_alpha, ths->box_offset[y_ind], ths->box_offset[y_ind+1], ths->Add, ths->Ad, ths->p, ths->eps_I, ths->k, ths->kernel_param, ths->flags);
00501         }
00502   }
00503   else {
00504     exit(-1);
00505   }
00506   return val;
00507 }
00508 
00510 void BuildTree(int d, int t, double *x, double _Complex *alpha, int N)
00511 {
00512   if (N>1)
00513   {
00514     int m=N/2;
00515 
00516     quicksort(d,t,x,alpha,N);
00517 
00518     BuildTree(d, (t+1)%d, x, alpha, m);
00519     BuildTree(d, (t+1)%d, x+(m+1)*d, alpha+(m+1), N-m-1);
00520   }
00521 }
00522 
00524 double _Complex SearchTree(const int d, const int t, const double *x,
00525   const double _Complex *alpha, const double *xmin, const double *xmax,
00526   const int N, const kernel k, const double *param, const int Ad,
00527   const double _Complex *Add, const int p, const unsigned flags)
00528 {
00529   int m=N/2;
00530   double Min=xmin[t], Max=xmax[t], Median=x[m*d+t];
00531   double a=fabs(Max-Min)/2;
00532   int l;
00533   int E=0;
00534   double r;
00535 
00536   if (N==0)
00537     return 0.0;
00538   if (Min>Median)
00539     return SearchTree(d,(t+1)%d,x+(m+1)*d,alpha+(m+1),xmin,xmax,N-m-1,k,param,Ad,Add,p,flags);
00540   else if (Max<Median)
00541     return SearchTree(d,(t+1)%d,x,alpha,xmin,xmax,m,k,param,Ad,Add,p,flags);
00542   else
00543   {
00544     double _Complex result = 0.0;
00545     E=0;
00546 
00547     for (l=0; l<d; l++)
00548     {
00549       if ( x[m*d+l]>xmin[l] && x[m*d+l]<xmax[l] )
00550         E++;
00551     }
00552 
00553     if (E==d)
00554     {
00555       if (d==1)
00556       {
00557         r = xmin[0]+a-x[m];  /* remember: xmin+a = y */
00558       }
00559       else
00560       {
00561         r=0.0;
00562         for (l=0; l<d; l++)
00563           r+=(xmin[l]+a-x[m*d+l])*(xmin[l]+a-x[m*d+l]);  /* remember: xmin+a = y */
00564         r=sqrt(r);
00565       }
00566       if (fabs(r)<a)
00567       {
00568         result += alpha[m]*k(r,0,param);                         /* alpha*(kern-regkern) */
00569         if (d==1)
00570         {
00571           if (flags & EXACT_NEARFIELD)
00572             result -= alpha[m]*regkern1(k,r,p,param,a,1.0/16.0); /* exact value (in 1D)  */
00573           else
00574             result -= alpha[m]*kubintkern1(r,Add,Ad,a);               /* spline approximation */
00575         }
00576         else
00577         {
00578           if (flags & EXACT_NEARFIELD)
00579             result -= alpha[m]*regkern(k,r,p,param,a,1.0/16.0);  /* exact value (in dD)  */
00580           else
00581 #if defined(NF_KUB)
00582             result -= alpha[m]*kubintkern(r,Add,Ad,a);                /* spline approximation */
00583 #elif defined(NF_QUADR)
00584             result -= alpha[m]*quadrintkern(r,Add,Ad,a);                /* spline approximation */
00585 #elif defined(NF_LIN)
00586             result -= alpha[m]*linintkern(r,Add,Ad,a);                /* spline approximation */
00587 #else
00588   #error define interpolation method
00589 #endif
00590         }
00591       }
00592     }
00593     result += SearchTree(d,(t+1)%d,x+(m+1)*d,alpha+(m+1),xmin,xmax,N-m-1,k,param,Ad,Add,p,flags)
00594       + SearchTree(d,(t+1)%d,x,alpha,xmin,xmax,m,k,param,Ad,Add,p,flags);
00595     return result;
00596   }
00597 }
00598 
00600 void fastsum_init_guru(fastsum_plan *ths, int d, int N_total, int M_total, kernel k, double *param, unsigned flags, int nn, int m, int p, double eps_I, double eps_B)
00601 {
00602   int t;
00603   int N[d], n[d];
00604   int n_total;
00605   int sort_flags_trafo = 0;
00606   int sort_flags_adjoint = 0;
00607 #ifdef _OPENMP
00608   int nthreads = nfft_get_omp_num_threads();
00609 #endif
00610 
00611   if (d > 1)
00612   {
00613     sort_flags_trafo = NFFT_SORT_NODES;
00614 #ifdef _OPENMP
00615     sort_flags_adjoint = NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT;
00616 #else
00617     sort_flags_adjoint = NFFT_SORT_NODES;
00618 #endif
00619   }
00620 
00621   ths->d = d;
00622 
00623   ths->N_total = N_total;
00624   ths->M_total = M_total;
00625 
00626   ths->x = (double *)nfft_malloc(d*N_total*(sizeof(double)));
00627   ths->alpha = (double _Complex *)nfft_malloc(N_total*(sizeof(double _Complex)));
00628 
00629   ths->y = (double *)nfft_malloc(d*M_total*(sizeof(double)));
00630   ths->f = (double _Complex *)nfft_malloc(M_total*(sizeof(double _Complex)));
00631 
00632   ths->k = k;
00633   ths->kernel_param = param;
00634 
00635   ths->flags = flags;
00636 
00637   ths->p = p;
00638   ths->eps_I = eps_I; /* =(double)ths->p/(double)nn; */  
00639   ths->eps_B = eps_B; /* =1.0/16.0; */                   
00642   if (!(ths->flags & EXACT_NEARFIELD))
00643   {
00644     if (ths->d==1)
00645     {
00646       ths->Ad = 4*(ths->p)*(ths->p);
00647       ths->Add = (double _Complex *)nfft_malloc((ths->Ad+5)*(sizeof(double _Complex)));
00648     }
00649     else
00650     {
00651       if (ths->k == one_over_x)
00652       {
00653         double delta = 1e-8;
00654         switch(p)
00655         {
00656           case 2: delta = 1e-3;
00657                   break;
00658           case 3: delta = 1e-4;
00659                   break;
00660           case 4: delta = 1e-5;
00661                   break;
00662           case 5: delta = 1e-6;
00663                   break;
00664           case 6: delta = 1e-6;
00665                   break;
00666           case 7: delta = 1e-7;
00667                   break;
00668           default: delta = 1e-8;
00669         }
00670 
00671 #if defined(NF_KUB)
00672         ths->Ad = max_i(10, (int) ceil(1.4/pow(delta,1.0/4.0)));
00673         ths->Add = (double _Complex *)nfft_malloc((ths->Ad+3)*(sizeof(double _Complex)));
00674 #elif defined(NF_QUADR)
00675         ths->Ad = (int) ceil(2.2/pow(delta,1.0/3.0));
00676         ths->Add = (double _Complex *)nfft_malloc((ths->Ad+3)*(sizeof(double _Complex)));
00677 #elif defined(NF_LIN)
00678         ths->Ad = (int) ceil(1.7/pow(delta,1.0/2.0));
00679         ths->Add = (double _Complex *)nfft_malloc((ths->Ad+3)*(sizeof(double _Complex)));
00680 #else
00681   #error define NF_LIN or NF_QUADR or NF_KUB
00682 #endif
00683       }
00684       else
00685       {
00686         ths->Ad = 2*(ths->p)*(ths->p);
00687         ths->Add = (double _Complex *)nfft_malloc((ths->Ad+3)*(sizeof(double _Complex)));
00688       }
00689     }
00690   }
00691 
00693   ths->n = nn;
00694   for (t=0; t<d; t++)
00695   {
00696     N[t] = nn;
00697     n[t] = 2*nn;
00698   }
00699   nfft_init_guru(&(ths->mv1), d, N, N_total, n, m,
00700                    sort_flags_adjoint |
00701                    PRE_PHI_HUT| PRE_PSI| MALLOC_X | MALLOC_F_HAT| MALLOC_F| FFTW_INIT | FFT_OUT_OF_PLACE,
00702                    FFTW_MEASURE| FFTW_DESTROY_INPUT);
00703   nfft_init_guru(&(ths->mv2), d, N, M_total, n, m,
00704                    sort_flags_trafo |
00705                    PRE_PHI_HUT| PRE_PSI| MALLOC_X | MALLOC_F_HAT| MALLOC_F| FFTW_INIT | FFT_OUT_OF_PLACE,
00706                    FFTW_MEASURE| FFTW_DESTROY_INPUT);
00707 
00709   n_total = 1;
00710   for (t=0; t<d; t++)
00711     n_total *= nn;
00712 
00713   ths->b = (fftw_complex *)nfft_malloc(n_total*sizeof(fftw_complex));
00714 #ifdef _OPENMP
00715 #pragma omp critical (nfft_omp_critical_fftw_plan)
00716 {
00717   fftw_plan_with_nthreads(nthreads);
00718 #endif
00719 
00720   ths->fft_plan = fftw_plan_dft(d,N,ths->b,ths->b,FFTW_FORWARD,FFTW_ESTIMATE);
00721 
00722 #ifdef _OPENMP
00723 }
00724 #endif
00725 
00726   if (ths->flags & NEARFIELD_BOXES)
00727   {
00728     ths->box_count_per_dim = floor((0.5 - ths->eps_B) / ths->eps_I) + 1;
00729     ths->box_count = 1;
00730     for (t=0; t<ths->d; t++)
00731       ths->box_count *= ths->box_count_per_dim;
00732 
00733     ths->box_offset = (int *) nfft_malloc((ths->box_count+1) * sizeof(int));
00734 
00735     ths->box_alpha = (double _Complex *)nfft_malloc(ths->N_total*(sizeof(double _Complex)));
00736 
00737     ths->box_x = (double *) nfft_malloc(ths->d * ths->N_total *  sizeof(double));
00738   }
00739 }
00740 
00742 void fastsum_finalize(fastsum_plan *ths)
00743 {
00744   nfft_free(ths->x);
00745   nfft_free(ths->alpha);
00746   nfft_free(ths->y);
00747   nfft_free(ths->f);
00748 
00749   if (!(ths->flags & EXACT_NEARFIELD))
00750     nfft_free(ths->Add);
00751 
00752   nfft_finalize(&(ths->mv1));
00753   nfft_finalize(&(ths->mv2));
00754 
00755 #ifdef _OPENMP
00756 #pragma omp critical (nfft_omp_critical_fftw_plan)
00757 {
00758 #endif
00759   fftw_destroy_plan(ths->fft_plan);
00760 #ifdef _OPENMP
00761 }
00762 #endif
00763 
00764   nfft_free(ths->b);
00765 
00766   if (ths->flags & NEARFIELD_BOXES)
00767   {
00768     nfft_free(ths->box_offset);
00769     nfft_free(ths->box_alpha);
00770     nfft_free(ths->box_x);
00771   }
00772 }
00773 
00775 void fastsum_exact(fastsum_plan *ths)
00776 {
00777   int j,k;
00778   int t;
00779   double r;
00780 
00781   #pragma omp parallel for default(shared) private(j,k,t,r)
00782   for (j=0; j<ths->M_total; j++)
00783   {
00784     ths->f[j]=0.0;
00785     for (k=0; k<ths->N_total; k++)
00786     {
00787       if (ths->d==1)
00788         r = ths->y[j] - ths->x[k];
00789       else
00790       {
00791         r=0.0;
00792         for (t=0; t<ths->d; t++)
00793           r += (ths->y[j*ths->d+t]-ths->x[k*ths->d+t])*(ths->y[j*ths->d+t]-ths->x[k*ths->d+t]);
00794         r=sqrt(r);
00795       }
00796       ths->f[j] += ths->alpha[k] * ths->k(r,0,ths->kernel_param);
00797     }
00798   }
00799 }
00800 
00802 void fastsum_precompute(fastsum_plan *ths)
00803 {
00804   int j,k,t;
00805   int n_total;
00806   ticks t0, t1;
00807 
00808   ths->MEASURE_TIME_t[0] = 0.0;
00809   ths->MEASURE_TIME_t[1] = 0.0;
00810   ths->MEASURE_TIME_t[2] = 0.0;
00811   ths->MEASURE_TIME_t[3] = 0.0;
00812 
00813 #ifdef MEASURE_TIME
00814   t0 = getticks();
00815 #endif
00816 
00817 
00818   if (ths->flags & NEARFIELD_BOXES)
00819   {
00820     BuildBox(ths);
00821   }
00822   else
00823   {
00825     BuildTree(ths->d,0,ths->x,ths->alpha,ths->N_total);
00826   }
00827 
00828 #ifdef MEASURE_TIME
00829   t1 = getticks();
00830   ths->MEASURE_TIME_t[3] += nfft_elapsed_seconds(t1,t0);
00831 #endif
00832 
00833 
00834 #ifdef MEASURE_TIME
00835   t0 = getticks();
00836 #endif
00837 
00838   if (!(ths->flags & EXACT_NEARFIELD))
00839   {
00840     if (ths->d==1)
00841       #pragma omp parallel for default(shared) private(k)
00842       for (k=-ths->Ad/2-2; k <= ths->Ad/2+2; k++)
00843         ths->Add[k+ths->Ad/2+2] = regkern1(ths->k, ths->eps_I*(double)k/ths->Ad*2, ths->p, ths->kernel_param, ths->eps_I, ths->eps_B);
00844     else
00845       #pragma omp parallel for default(shared) private(k)
00846       for (k=0; k <= ths->Ad+2; k++)
00847         ths->Add[k] = regkern3(ths->k, ths->eps_I*(double)k/ths->Ad, ths->p, ths->kernel_param, ths->eps_I, ths->eps_B);
00848   }
00849 #ifdef MEASURE_TIME
00850   t1 = getticks();
00851   ths->MEASURE_TIME_t[0] += nfft_elapsed_seconds(t1,t0);
00852 #endif
00853 
00854 
00855 #ifdef MEASURE_TIME
00856   t0 = getticks();
00857 #endif
00858 
00859   for (k=0; k<ths->mv1.M_total; k++)
00860     for (t=0; t<ths->mv1.d; t++)
00861       ths->mv1.x[ths->mv1.d*k+t] = - ths->x[ths->mv1.d*k+t];  /* note the factor -1 for transposed transform instead of adjoint*/
00862 
00864   if(ths->mv1.nfft_flags & PRE_LIN_PSI)
00865     nfft_precompute_lin_psi(&(ths->mv1));
00866 
00867   if(ths->mv1.nfft_flags & PRE_PSI)
00868     nfft_precompute_psi(&(ths->mv1));
00869 
00870   if(ths->mv1.nfft_flags & PRE_FULL_PSI)
00871     nfft_precompute_full_psi(&(ths->mv1));
00872 #ifdef MEASURE_TIME
00873   t1 = getticks();
00874   ths->MEASURE_TIME_t[1] += nfft_elapsed_seconds(t1,t0);
00875 #endif
00876 
00878   for(k=0; k<ths->mv1.M_total;k++)
00879     ths->mv1.f[k] = ths->alpha[k];
00880 
00881 #ifdef MEASURE_TIME
00882   t0 = getticks();
00883 #endif
00884 
00885   for (j=0; j<ths->mv2.M_total; j++)
00886     for (t=0; t<ths->mv2.d; t++)
00887       ths->mv2.x[ths->mv2.d*j+t] = - ths->y[ths->mv2.d*j+t];  /* note the factor -1 for conjugated transform instead of standard*/
00888 
00890   if(ths->mv2.nfft_flags & PRE_LIN_PSI)
00891     nfft_precompute_lin_psi(&(ths->mv2));
00892 
00893   if(ths->mv2.nfft_flags & PRE_PSI)
00894     nfft_precompute_psi(&(ths->mv2));
00895 
00896   if(ths->mv2.nfft_flags & PRE_FULL_PSI)
00897     nfft_precompute_full_psi(&(ths->mv2));
00898 #ifdef MEASURE_TIME
00899   t1 = getticks();
00900   ths->MEASURE_TIME_t[2] += nfft_elapsed_seconds(t1,t0);
00901 #endif
00902 
00903 
00904 #ifdef MEASURE_TIME
00905   t0 = getticks();
00906 #endif
00907 
00908   n_total = 1;
00909   for (t=0; t<ths->d; t++)
00910     n_total *= ths->n;
00911 
00912   #pragma omp parallel for default(shared) private(j,k,t)
00913   for (j=0; j<n_total; j++)
00914   {
00915     if (ths->d==1)
00916       ths->b[j] = regkern1(ths->k, (double)j / (ths->n) - 0.5, ths->p, ths->kernel_param, ths->eps_I, ths->eps_B)/n_total;
00917     else
00918     {
00919       k=j;
00920       ths->b[j]=0.0;
00921       for (t=0; t<ths->d; t++)
00922       {
00923         ths->b[j] += ((double)(k % (ths->n)) / (ths->n) - 0.5) * ((double)(k % (ths->n)) / (ths->n) - 0.5);
00924         k = k / (ths->n);
00925       }
00926       ths->b[j] = regkern3(ths->k, sqrt(ths->b[j]), ths->p, ths->kernel_param, ths->eps_I, ths->eps_B)/n_total;
00927     }
00928   }
00929 
00930   nfft_fftshift_complex(ths->b, ths->mv1.d, ths->mv1.N);
00931   fftw_execute(ths->fft_plan);
00932   nfft_fftshift_complex(ths->b, ths->mv1.d, ths->mv1.N);
00933 #ifdef MEASURE_TIME
00934   t1 = getticks();
00935   ths->MEASURE_TIME_t[0] += nfft_elapsed_seconds(t1,t0);
00936 #endif
00937 }
00938 
00940 void fastsum_trafo(fastsum_plan *ths)
00941 {
00942   int j,k,t;
00943   ticks t0, t1;
00944 
00945   ths->MEASURE_TIME_t[4] = 0.0; 
00946   ths->MEASURE_TIME_t[5] = 0.0;
00947   ths->MEASURE_TIME_t[6] = 0.0;
00948   ths->MEASURE_TIME_t[7] = 0.0;
00949 
00950 #ifdef MEASURE_TIME
00951   t0 = getticks();
00952 #endif
00953 
00954   nfft_adjoint(&(ths->mv1));
00955 #ifdef MEASURE_TIME
00956   t1 = getticks();
00957   ths->MEASURE_TIME_t[4] += nfft_elapsed_seconds(t1,t0);
00958 #endif
00959 
00960 
00961 #ifdef MEASURE_TIME
00962   t0 = getticks();
00963 #endif
00964 
00965   #pragma omp parallel for default(shared) private(k)
00966   for (k=0; k<ths->mv2.N_total; k++)
00967     ths->mv2.f_hat[k] = ths->b[k] * ths->mv1.f_hat[k];
00968 #ifdef MEASURE_TIME
00969   t1 = getticks();
00970   ths->MEASURE_TIME_t[5] += nfft_elapsed_seconds(t1,t0);
00971 #endif
00972 
00973 
00974 #ifdef MEASURE_TIME
00975   t0 = getticks();
00976 #endif
00977 
00978   nfft_trafo(&(ths->mv2));
00979 #ifdef MEASURE_TIME
00980   t1 = getticks();
00981   ths->MEASURE_TIME_t[6] += nfft_elapsed_seconds(t1,t0);
00982 #endif
00983 
00984 
00985 #ifdef MEASURE_TIME
00986   t0 = getticks();
00987 #endif
00988 
00989   #pragma omp parallel for default(shared) private(j,k,t)
00990   for (j=0; j<ths->M_total; j++)
00991   {
00992     double ymin[ths->d], ymax[ths->d]; 
00994     if (ths->flags & NEARFIELD_BOXES)
00995     {
00996       ths->f[j] = ths->mv2.f[j] + SearchBox(ths->y + ths->d*j, ths);
00997     }
00998     else
00999     {
01000       for (t=0; t<ths->d; t++)
01001       {
01002         ymin[t] = ths->y[ths->d*j+t] - ths->eps_I;
01003         ymax[t] = ths->y[ths->d*j+t] + ths->eps_I;
01004       }
01005       ths->f[j] = ths->mv2.f[j] + SearchTree(ths->d,0, ths->x, ths->alpha, ymin, ymax, ths->N_total, ths->k, ths->kernel_param, ths->Ad, ths->Add, ths->p, ths->flags);
01006     }
01007     /* ths->f[j] = ths->mv2.f[j]; */
01008     /* ths->f[j] = SearchTree(ths->d,0, ths->x, ths->alpha, ymin, ymax, ths->N_total, ths->k, ths->kernel_param, ths->Ad, ths->Add, ths->p, ths->flags); */
01009   }
01010 
01011 #ifdef MEASURE_TIME
01012   t1 = getticks();
01013   ths->MEASURE_TIME_t[7] += nfft_elapsed_seconds(t1,t0);
01014 #endif
01015 }
01016 /* \} */
01017 
01018 /* fastsum.c */

Generated on Fri Oct 12 2012 by Doxygen 1.8.0-20120409