NFFT Logo 3.2.2
util.c
00001 /*
00002  * Copyright (c) 2002, 2012 Jens Keiner, Stefan Kunis, Daniel Potts
00003  *
00004  * This program is free software; you can redistribute it and/or modify it under
00005  * the terms of the GNU General Public License as published by the Free Software
00006  * Foundation; either version 2 of the License, or (at your option) any later
00007  * version.
00008  *
00009  * This program is distributed in the hope that it will be useful, but WITHOUT
00010  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00011  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
00012  * details.
00013  *
00014  * You should have received a copy of the GNU General Public License along with
00015  * this program; if not, write to the Free Software Foundation, Inc., 51
00016  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00017  */
00018 
00019 /* $Id: util.c 3896 2012-10-10 12:19:26Z tovo $ */
00020 
00025 #include "config.h"
00026 
00027 #include "infft.h"
00028 
00029 #include <stdio.h>
00030 #include <stdlib.h>
00031 #include <float.h>
00032 #include <sys/time.h>
00033 #include "cstripack.h"
00034 #ifdef HAVE_COMPLEX_H
00035 #include <complex.h>
00036 #endif
00037 
00038 #ifdef _OPENMP
00039 #include <omp.h>
00040 #endif
00041 
00042 #include "nfft3.h"
00043 #include "nfft3util.h"
00044 #include "infft.h"
00045 
00046 double nfft_elapsed_seconds(ticks t1, ticks t0)
00047 {
00048   UNUSED(t1);
00049   UNUSED(t0);
00050   return elapsed(t1,t0) / TICKS_PER_SECOND;
00051 }
00052 
00055 int nfft_prod_int(int *vec, int d)
00056 {
00057   int t, prod;
00058 
00059   prod=1;
00060   for(t=0; t<d; t++)
00061     prod *= vec[t];
00062 
00063   return prod;
00064 }
00065 
00068 int nfst_prod_minus_a_int(int *vec, int a, int d)
00069 {
00070   int t, prod;
00071 
00072   prod=1;
00073   for(t=0; t<d; t++)
00074     prod *= vec[t]-a;
00075 
00076   return prod;
00077 }
00078 
00081 int nfft_plain_loop(int *idx,int *N,int d)
00082 {
00083   int t,sum;
00084 
00085   sum = idx[0];
00086   for (t = 1; t < d; t++)
00087     sum = sum * N[t] + idx[t];
00088 
00089   return sum;
00090 }
00091 
00094 double nfft_prod_real(double *vec,int d)
00095 {
00096   int t;
00097   double prod;
00098 
00099   prod=1.0;
00100   for(t=0; t<d; t++)
00101     prod*=vec[t];
00102 
00103   return prod;
00104 }
00105 
00106 static void bspline_help(int k, double x, double *scratch, int j, int ug,
00107   int og, int r)
00108 {
00109   int i; /* row index of the de Boor scheme */
00110   int idx; /* index in scratch */
00111   double a; /* alpha of the de Boor scheme */
00112 
00113   /* computation of one column */
00114   for (i = og + r - k + 1, idx = og; idx >= ug; i--, idx--)
00115   {
00116     a = ((R)(x - i)) / ((R)(k - j));
00117     scratch[idx] = (1 - a) * scratch[idx-1] + a * scratch[idx];
00118   }
00119 } /* bspline_help */
00120 
00124 double nfft_bspline(int k, double x, double *scratch)
00125 {
00126   double result_value;                  
00127   int r;                                
00128   int g1,g2;                            
00129   int j,idx,ug,og;                    
00130   double a;                             
00132   result_value=0.0;
00133   if(0<x && x<k)
00134     {
00135       /* using symmetry around k/2 */
00136       if((k-x)<x) x=k-x;
00137 
00138       r=(int)(ceil(x)-1.0);
00139 
00140       for(idx=0; idx<k; idx++)
00141   scratch[idx]=0.0;
00142 
00143       scratch[k-r-1]=1.0;
00144 
00145       /* bounds of the algorithm */
00146       g1 = r;
00147       g2 = k - 1 - r;
00148       ug = g2;
00149 
00150       /* g1<=g2 holds */
00151 
00152       for(j=1, og=g2+1; j<=g1; j++, og++)
00153   {
00154     a = (x - r + k - 1 - og)/(k - j);
00155     scratch[og] = (1 - a) * scratch[og-1];
00156     bspline_help(k,x,scratch,j,ug+1,og-1,r);
00157     a = (x - r + k - 1 - ug)/(k - j);
00158     scratch[ug] = a * scratch[ug];
00159   }
00160       for(og-- ; j<=g2; j++)
00161   {
00162     bspline_help(k,x,scratch,j,ug+1,og,r);
00163     a = (x - r + k - 1 - ug)/(k - j);
00164     scratch[ug] = a * scratch[ug];
00165   }
00166       for( ; j<k; j++)
00167   {
00168     ug++;
00169     bspline_help(k,x,scratch,j,ug,og,r);
00170   }
00171       result_value = scratch[k-1];
00172     }
00173   return(result_value);
00174 } /* bspline */
00175 
00178 double nfft_dot_complex(double _Complex *x, int n)
00179 {
00180   int k;
00181   double dot;
00182 
00183   for(k=0,dot=0; k<n; k++)
00184     dot+=conj(x[k])*x[k];
00185 
00186   return dot;
00187 }
00188 
00191 double nfft_dot_double(double *x, int n)
00192 {
00193   int k;
00194   double dot;
00195 
00196   for(k=0,dot=0; k<n; k++)
00197     dot+=x[k]*x[k];
00198 
00199   return dot;
00200 }
00201 
00202 
00205 double nfft_dot_w_complex(double _Complex *x, double *w, int n)
00206 {
00207   int k;
00208   double dot;
00209 
00210   for(k=0,dot=0.0; k<n; k++)
00211     dot+=w[k]*conj(x[k])*x[k];
00212 
00213   return dot;
00214 }
00215 
00218 double nfft_dot_w_double(double *x, double *w, int n)
00219 {
00220   int k;
00221   double dot;
00222 
00223   for(k=0,dot=0.0; k<n; k++)
00224     dot+=w[k]*x[k]*x[k];
00225 
00226   return dot;
00227 }
00228 
00229 
00233 double nfft_dot_w_w2_complex(double _Complex *x, double *w, double *w2, int n)
00234 {
00235   int k;
00236   double dot;
00237 
00238   for(k=0,dot=0.0; k<n; k++)
00239     dot+=w[k]*w2[k]*w2[k]*conj(x[k])*x[k];
00240 
00241   return dot;
00242 }
00243 
00247 double nfft_dot_w2_complex(double _Complex *x, double *w2, int n)
00248 {
00249   int k;
00250   double dot;
00251 
00252   for(k=0,dot=0.0; k<n; k++)
00253     dot+=w2[k]*w2[k]*conj(x[k])*x[k];
00254 
00255   return dot;
00256 }
00257 
00260 void nfft_cp_complex(double _Complex *x, double _Complex *y, int n)
00261 {
00262   int k;
00263 
00264   for(k=0;k<n;k++)
00265     x[k]=y[k];
00266 }
00267 
00270 void nfft_cp_double(double *x, double *y, int n)
00271 {
00272   int k;
00273 
00274   for(k=0;k<n;k++)
00275     x[k]=y[k];
00276 }
00277 
00280 void nfft_cp_a_complex(double _Complex *x, double a, double _Complex *y, int n)
00281 {
00282   int k;
00283 
00284   for(k=0;k<n;k++)
00285     x[k]=a*y[k];
00286 }
00287 
00290 void nfft_cp_a_double(double *x, double a, double *y, int n)
00291 {
00292   int k;
00293 
00294   for(k=0;k<n;k++)
00295     x[k]=a*y[k];
00296 }
00297 
00298 
00301 void nfft_cp_w_complex(double _Complex *x, double *w, double _Complex *y, int n)
00302 {
00303   int k;
00304 
00305   for(k=0;k<n;k++)
00306     x[k]=w[k]*y[k];
00307 }
00308 
00311 void nfft_cp_w_double(double *x, double *w, double *y, int n)
00312 {
00313   int k;
00314 
00315   for(k=0;k<n;k++)
00316     x[k]=w[k]*y[k];
00317 }
00318 
00319 
00320 
00323 void nfft_upd_axpy_complex(double _Complex *x, double a, double _Complex *y, int n)
00324 {
00325   int k;
00326 
00327   for(k=0;k<n;k++)
00328     x[k]=a*x[k]+y[k];
00329 }
00330 
00333 void nfft_upd_axpy_double(double *x, double a, double *y, int n)
00334 {
00335   int k;
00336 
00337   for(k=0;k<n;k++)
00338     x[k]=a*x[k]+y[k];
00339 }
00340 
00341 
00344 void nfft_upd_xpay_complex(double _Complex *x, double a, double _Complex *y, int n)
00345 {
00346   int k;
00347 
00348   for(k=0;k<n;k++)
00349     x[k]+=a*y[k];
00350 }
00351 
00354 void nfft_upd_xpay_double(double *x, double a, double *y, int n)
00355 {
00356   int k;
00357 
00358   for(k=0;k<n;k++)
00359     x[k]+=a*y[k];
00360 }
00361 
00362 
00363 
00366 void nfft_upd_axpby_complex(double _Complex *x, double a, double _Complex *y, double b, int n)
00367 {
00368   int k;
00369 
00370   for(k=0;k<n;k++)
00371     x[k]=a*x[k]+b*y[k];
00372 }
00373 
00376 void nfft_upd_axpby_double(double *x, double a, double *y, double b, int n)
00377 {
00378   int k;
00379 
00380   for(k=0;k<n;k++)
00381     x[k]=a*x[k]+b*y[k];
00382 }
00383 
00384 
00387 void nfft_upd_xpawy_complex(double _Complex *x, double a, double *w, double _Complex *y, int n)
00388 {
00389   int k;
00390 
00391   for(k=0;k<n;k++)
00392     x[k]+=a*w[k]*y[k];
00393 }
00394 
00397 void nfft_upd_xpawy_double(double *x, double a, double *w, double *y, int n)
00398 {
00399   int k;
00400 
00401   for(k=0;k<n;k++)
00402     x[k]+=a*w[k]*y[k];
00403 }
00404 
00405 
00406 
00409 void nfft_upd_axpwy_complex(double _Complex *x, double a, double *w, double _Complex *y, int n)
00410 {
00411   int k;
00412 
00413   for(k=0;k<n;k++)
00414     x[k]=a*x[k]+w[k]*y[k];
00415 }
00416 
00419 void nfft_upd_axpwy_double(double *x, double a, double *w, double *y, int n)
00420 {
00421   int k;
00422 
00423   for(k=0;k<n;k++)
00424     x[k]=a*x[k]+w[k]*y[k];
00425 }
00426 
00427 
00428 void nfft_fftshift_complex(double _Complex *x, int d, int* N)
00429 {
00430   int d_pre, d_act, d_post;
00431   int N_pre, N_act, N_post;
00432   int k_pre, k_act, k_post;
00433   int k,k_swap;
00434 
00435   double _Complex x_swap;
00436 
00437   for(d_act=0;d_act<d;d_act++)
00438     {
00439       for(d_pre=0, N_pre=1;d_pre<d_act;d_pre++)
00440   N_pre*=N[d_pre];
00441 
00442       N_act=N[d_act];
00443 
00444       for(d_post=d_act+1, N_post=1;d_post<d;d_post++)
00445   N_post*=N[d_post];
00446 
00447       for(k_pre=0;k_pre<N_pre;k_pre++)
00448   for(k_act=0;k_act<N_act/2;k_act++)
00449     for(k_post=0;k_post<N_post;k_post++)
00450       {
00451         k=(k_pre*N_act+k_act)*N_post+k_post;
00452         k_swap=(k_pre*N_act+k_act+N_act/2)*N_post+k_post;
00453 
00454         x_swap=x[k];
00455         x[k]=x[k_swap];
00456         x[k_swap]=x_swap;
00457       }
00458     }
00459 }
00460 
00463 void nfft_vpr_int(int *x, int n, char *text)
00464 {
00465   int k;
00466 
00467   if(text!=NULL)
00468   {
00469       printf ("\n %s, adr=%p\n", text, (void*)x);
00470       for (k=0; k<n; k++)
00471       {
00472     if (k%8==0)
00473         printf("%6d.\t", k);
00474     printf("%d,", x[k]);
00475     if (k%8==7)
00476         printf("\n");
00477       }
00478       if (n%8!=0)
00479         printf("\n");
00480   }
00481   else
00482       for (k=0; k<n; k++)
00483     printf("%d,\n", x[k]);
00484   fflush(stdout);
00485 }
00486 
00488 void X(vpr_double)(R *x, const int n, const char *text)
00489 {
00490   int k;
00491 
00492   if (x == NULL)
00493   {
00494     printf("null pointer\n");
00495     fflush(stdout);
00496     exit(-1);
00497   }
00498 
00499   if (text != NULL)
00500   {
00501     printf ("\n %s, adr=%p\n", text, (void*)x);
00502 
00503     for (k = 0; k < n; k++)
00504     {
00505       if (k%8 == 0)
00506         printf("%6d.\t", k);
00507 
00508       printf("%+.1" FE ",", x[k]);
00509 
00510       if (k%8 == 7)
00511         printf("\n");
00512     }
00513 
00514     if (n%8 != 0)
00515       printf("\n");
00516   }
00517   else
00518     for (k = 0; k < n; k++)
00519       printf("%+" FE ",\n", x[k]);
00520 
00521   fflush(stdout);
00522 }
00523 
00525 void X(vpr_complex)(C *x, const int n, const char *text)
00526 {
00527   int k;
00528 
00529   if(text != NULL)
00530   {
00531     printf("\n %s, adr=%p\n", text, (void*)x);
00532     for (k = 0; k < n; k++)
00533     {
00534       if (k%4 == 0)
00535         printf("%6d.\t", k);
00536 
00537       printf("%+.1" FE "%+.1" FE "i,", CREAL(x[k]), CIMAG(x[k]));
00538 
00539       if (k%4==3)
00540         printf("\n");
00541     }
00542     if (n%4!=0)
00543       printf("\n");
00544   }
00545   else
00546     for (k = 0; k < n; k++)
00547       printf("%+" FE "%+" FE "i,\n", CREAL(x[k]), CIMAG(x[k]));
00548 
00549   fflush(stdout);
00550 }
00551 
00552 void X(vrand_unit_complex)(C *x, const int n)
00553 {
00554   int k;
00555 
00556   for (k = 0; k < n; k++)
00557     x[k] = nfft_drand48() + II*nfft_drand48();
00558 }
00559 
00560 void X(vrand_shifted_unit_double)(R *x, const int n)
00561 {
00562   int k;
00563 
00564   for (k = 0; k < n; k++)
00565     x[k] = nfft_drand48() - K(0.5);
00566 }
00567 
00569 void X(voronoi_weights_1d)(R *w, R *x, const int M)
00570 {
00571   int j;
00572 
00573   w[0] = (x[1]-x[0])/K(2.0);
00574 
00575   for(j = 1; j < M-1; j++)
00576     w[j] = (x[j+1]-x[j-1])/K(2.0);
00577 
00578   w[M-1] = (x[M-1]-x[M-2])/K(2.0);
00579 }
00580 
00581 void nfft_voronoi_weights_S2(double *w, double *xi, int M)
00582 {
00583   double *x;
00584   double *y;
00585   double *z;
00586   int j;
00587   int k;
00588   int el;
00589   int Mlocal = M;
00590   int lnew;
00591   int ier;
00592   int *list;
00593   int *lptr;
00594   int *lend;
00595   int *near;
00596   int *next;
00597   double  *dist;
00598   int *ltri;
00599   int *listc;
00600   int nb;
00601   double *xc;
00602   double *yc;
00603   double *zc;
00604   double *rc;
00605   double *vr;
00606   int lp;
00607   int lpl;
00608   int kv;
00609   double a;
00610 
00611   /* Allocate memory for auxilliary arrays. */
00612   x = (double*)nfft_malloc(M * sizeof(double));
00613   y = (double*)nfft_malloc(M * sizeof(double));
00614   z = (double*)nfft_malloc(M * sizeof(double));
00615 
00616   list = (int*)nfft_malloc((6*M-12+1)*sizeof(int));
00617   lptr = (int*)nfft_malloc((6*M-12+1)*sizeof(int));
00618   lend = (int*)nfft_malloc((M+1)*sizeof(int));
00619   near = (int*)nfft_malloc((M+1)*sizeof(int));
00620   next = (int*)nfft_malloc((M+1)*sizeof(int));
00621   dist = (double*)nfft_malloc((M+1)*sizeof(double));
00622   ltri = (int*)nfft_malloc((6*M+1)*sizeof(int));
00623   listc = (int*)nfft_malloc((6*M-12+1)*sizeof(int));
00624   xc = (double*)nfft_malloc((2*M-4+1)*sizeof(double));
00625   yc = (double*)nfft_malloc((2*M-4+1)*sizeof(double));
00626   zc = (double*)nfft_malloc((2*M-4+1)*sizeof(double));
00627   rc = (double*)nfft_malloc((2*M-4+1)*sizeof(double));
00628   vr = (double*)nfft_malloc(3*(2*M-4+1)*sizeof(double));
00629 
00630   /* Convert from spherical Coordinates in [0,1/2]x[-1/2,1/2) to Cartesian
00631    * coordinates. */
00632   for (k = 0; k < M; k++)
00633   {
00634     x[k] = sin(2.0*PI*xi[2*k+1])*cos(2.0*PI*xi[2*k]);
00635     y[k] = sin(2.0*PI*xi[2*k+1])*sin(2.0*PI*xi[2*k]);
00636     z[k] = cos(2.0*PI*xi[2*k+1]);
00637   }
00638 
00639   /* Generate Delaunay triangulation. */
00640   trmesh_(&Mlocal, x, y, z, list, lptr, lend, &lnew, near, next, dist, &ier);
00641 
00642   /* Check error flag. */
00643   if (ier == 0)
00644   {
00645     /* Get Voronoi vertices. */
00646     crlist_(&Mlocal, &Mlocal, x, y, z, list, lend, lptr, &lnew, ltri, listc, &nb, xc,
00647       yc, zc, rc, &ier);
00648 
00649     if (ier == 0)
00650     {
00651       /* Calcuate sizes of Voronoi regions. */
00652       for (k = 0; k < M; k++)
00653       {
00654         /* Get last neighbour index. */
00655         lpl = lend[k];
00656         lp = lpl;
00657 
00658         j = 0;
00659         vr[3*j] = x[k];
00660         vr[3*j+1] = y[k];
00661         vr[3*j+2] = z[k];
00662 
00663         do
00664         {
00665           j++;
00666           /* Get next neighbour. */
00667           lp = lptr[lp-1];
00668           kv = listc[lp-1];
00669           vr[3*j] = xc[kv-1];
00670           vr[3*j+1] = yc[kv-1];
00671           vr[3*j+2] = zc[kv-1];
00672           /* fprintf(stderr, "lp = %ld\t", lp); */
00673         } while (lp != lpl);
00674 
00675         a = 0;
00676         for (el = 0; el < j; el++)
00677         {
00678           a += areas_(vr, &vr[3*(el+1)],&vr[3*(((el+1)%j)+1)]);
00679         }
00680 
00681         w[k] = a;
00682       }
00683     }
00684   }
00685 
00686   /* Deallocate memory. */
00687   nfft_free(x);
00688   nfft_free(y);
00689   nfft_free(z);
00690 
00691   nfft_free(list);
00692   nfft_free(lptr);
00693   nfft_free(lend);
00694   nfft_free(near);
00695   nfft_free(next);
00696   nfft_free(dist);
00697   nfft_free(ltri);
00698   nfft_free(listc);
00699   nfft_free(xc);
00700   nfft_free(yc);
00701   nfft_free(zc);
00702   nfft_free(rc);
00703   nfft_free(vr);
00704 }
00705 
00710 R X(modified_fejer)(const int N, const int kk)
00711 {
00712   return (K(2.0)/((R)(N*N))*(K(1.0)-FABS(K(2.0)*kk+K(1.0))/((R)N)));
00713 }
00714 
00716 R X(modified_jackson2)(const int N, const int kk)
00717 {
00718   int kj;
00719   const R n=(N/K(2.0)+K(1.0))/K(2.0);
00720   R result, k;
00721 
00722   for (result = K(0.0), kj = kk; kj <= kk+1; kj++)
00723   {
00724     k = ABS(kj);
00725 
00726     if(k/n < K(1.0))
00727       result += K(1.0) - (K(3.0)*k + K(6.0)*n*POW(k,K(2.0))
00728         - K(3.0)*POW(k,K(3.0)))/(K(2.0)*n*(K(2.0)*POW(n,K(2.0))+K(1.0)));
00729     else
00730       result+= (K(2.0)*n-k)*(POW(2*n-k,K(2.0))-K(1.0))/(K(2.0)
00731         *n*(K(2.0)*POW(n,K(2.0))+K(1.0)));
00732   }
00733 
00734   return result;
00735 }
00736 
00738 R X(modified_jackson4)(const int N, const int kk)
00739 {
00740   int kj;
00741   const R n = (N/K(2.0)+K(3.0))/K(4.0), normalisation = (K(2416.0)*POW(n,K(7.0))
00742     + K(1120.0)*POW(n,K(5.0)) + K(784.0)*POW(n,K(3.0)) + K(720.0)*n);
00743   R result, k;
00744 
00745   for (result = K(0.0), kj = kk; kj <= kk + 1; kj++)
00746   {
00747     k = ABS(kj);
00748 
00749     if (k/n < K(1.0))
00750       result += K(1.0) - (K(1260.0)*k + (K(1680.0)*POW(n, K(5.0))
00751         + K(2240.0)*POW(n, K(3.0)) + K(2940.0)*n)*POW(k, K(2.0))
00752         - K(1715.0)*POW(k, K(3.0)) - (K(560.0)*POW(n, K(3.0))
00753         + K(1400.0)*n)*POW(k, K(4.0)) + K(490.0)*POW(k, K(5.0))
00754         + K(140.0)*n*POW(k, K(6.0)) - K(35.0)*POW(k,K(7.0)))/normalisation;
00755 
00756     if ((K(1.0) <= k/n) && (k/n < K(2.0)))
00757       result += ((K(2472.0)*POW(n, K(7.0)) + K(336.0)*POW(n, K(5.0))
00758         + K(3528.0)*POW(n, K(3.0)) - K(1296.0)*n) - (K(392.0)*POW(n, K(6.0))
00759         - K(3920.0)*POW(n, K(4.0)) + K(8232.0)*POW(n, K(2.0)) - K(756.0))*k
00760         - (K(504.0)*POW(n, K(5.0)) + K(10080.0)*POW(n, K(3.0))
00761         - K(5292.0)*n)*POW(k, K(2.0)) - (K(1960.0)*POW(n, K(4.0))
00762         - K(7840.0)*POW(n, K(2.0)) + K(1029.0))*POW(k, K(3.0))
00763         + (K(2520.0)*POW(n, K(3.0)) - K(2520.0)*n) * POW(k, K(4.0))
00764         - (K(1176.0)*POW(n, K(2.0)) - K(294.0)) * POW(k, K(5.0))
00765         + K(252.0)*n*POW(k, K(6.0)) - K(21.0)*POW(k, K(7.0)))/normalisation;
00766 
00767     if ((K(2.0) <= k/n) && (k/n < K(3.0)))
00768       result += (-(K(1112.0)*POW(n, K(7.0)) - K(12880.0)*POW(n, K(5.0))
00769         + K(7448.0)*POW(n, K(3.0)) - K(720.0)*n) + (K(12152.0)*POW(n, K(6.0))
00770         - K(27440.0)*POW(n, K(4.0)) + K(8232.0)*POW(n, K(2.0)) - K(252.0))*k
00771         - (K(19320.0)*POW(n, K(5.0)) - K(21280.0)*POW(n, K(3.0))
00772         + K(2940.0)*n)*POW(k, K(2.0)) + (K(13720.0)*POW(n, K(4.0))
00773         - K(7840.0)*POW(n, K(2.0)) + K(343.0))*POW(k, K(3.0))
00774         - (K(5320.0)*POW(n, K(3.0)) - K(1400.0)*n)*POW(k, K(4.0))
00775         + (K(1176.0)*POW(n, K(2.0)) - K(98.0))*POW(k, K(5.0))
00776         - K(140.0)*n*POW(k, K(6.0)) + K(7.0) * POW(k, K(7.0)))/normalisation;
00777 
00778     if ((K(3.0) <= k/n) && (k/n < K(4.0)))
00779       result += ((4*n-k)*(POW(4*n-k, K(2.0)) - K(1.0))*(POW(4*n-k, K(2.0))
00780         - K(4.0))*(POW(4*n-k, K(2.0)) - K(9.0)))/normalisation;
00781   }
00782 
00783   return result;
00784 }
00785 
00787 R X(modified_sobolev)(const R mu, const int kk)
00788 {
00789   R result;
00790   int kj, k;
00791 
00792   for (result = K(0.0), kj = kk; kj <= kk+1; kj++)
00793   {
00794     k = ABS(kj);
00795     if (k == 0)
00796       result += K(1.0);
00797     else
00798       result += POW((double)k,-K(2.0)*mu);
00799   }
00800 
00801   return result;
00802 }
00803 
00805 R X(modified_multiquadric)(const R mu, const R c, const int kk)
00806 {
00807   R result;
00808   int kj, k;
00809 
00810   for (result = K(0.0), kj = kk; kj <= kk+1; kj++)
00811     {
00812       k = ABS(kj);
00813       result += POW((double)(k*k + c*c), -mu);
00814     }
00815 
00816   return result;
00817 }
00818 
00819 static inline int scaled_modified_bessel_i_series(const R x, const R alpha,
00820   const int nb, const int ize, R *b)
00821 {
00822   const R enmten = K(4.0)*nfft_float_property(NFFT_R_MIN);
00823   R tempa = K(1.0), empal = K(1.0) + alpha, halfx = K(0.0), tempb = K(0.0);
00824   int n, ncalc = nb;
00825 
00826   if (enmten < x)
00827     halfx = x/K(2.0);
00828 
00829   if (alpha != K(0.0))
00830     tempa = POW(halfx, alpha)/TGAMMA(empal);
00831 
00832   if (ize == 2)
00833     tempa *= EXP(-x);
00834 
00835   if (K(1.0) < x + K(1.0))
00836     tempb = halfx*halfx;
00837 
00838   b[0] = tempa + tempa*tempb/empal;
00839 
00840   if (x != K(0.0) && b[0] == K(0.0))
00841     ncalc = 0;
00842 
00843   if (nb == 1)
00844     return ncalc;
00845 
00846   if (K(0.0) < x)
00847   {
00848     R tempc = halfx, tover = (enmten + enmten)/x;
00849 
00850     if (tempb != K(0.0))
00851       tover = enmten/tempb;
00852 
00853     for (n = 1; n < nb; n++)
00854     {
00855       tempa /= empal;
00856       empal += K(1.0);
00857       tempa *= tempc;
00858 
00859       if (tempa <= tover*empal)
00860         tempa = K(0.0);
00861 
00862       b[n] = tempa + tempa*tempb/empal;
00863 
00864       if (b[n] == K(0.0) && n < ncalc)
00865         ncalc = n;
00866     }
00867   }
00868   else
00869     for (n = 1; n < nb; n++)
00870       b[n] = K(0.0);
00871 
00872   return ncalc;
00873 }
00874 
00875 static inline void scaled_modified_bessel_i_normalize(const R x,
00876   const R alpha, const int nb, const int ize, R *b, const R sum_)
00877 {
00878   const R enmten = K(4.0)*nfft_float_property(NFFT_R_MIN);
00879   R sum = sum_, tempa;
00880   int n;
00881 
00882   /* Normalize, i.e., divide all b[n] by sum */
00883   if (alpha != K(0.0))
00884     sum = sum * TGAMMA(K(1.0) + alpha) * POW(x/K(2.0), -alpha);
00885 
00886   if (ize == 1)
00887     sum *= EXP(-x);
00888 
00889   tempa = enmten;
00890 
00891   if (K(1.0) < sum)
00892     tempa *= sum;
00893 
00894   for (n = 1; n <= nb; n++)
00895   {
00896     if (b[n-1] < tempa)
00897       b[n-1] = K(0.0);
00898 
00899     b[n-1] /= sum;
00900   }
00901 }
00902 
00950 int nfft_smbi(const R x, const R alpha, const int nb, const int ize, R *b)
00951 {
00952   /* machine dependent parameters */
00953   /* NSIG   - DECIMAL SIGNIFICANCE DESIRED.  SHOULD BE SET TO */
00954   /*          IFIX(ALOG10(2)*NBIT+1), WHERE NBIT IS THE NUMBER OF */
00955   /*          BITS IN THE MANTISSA OF A WORKING PRECISION VARIABLE. */
00956   /*          SETTING NSIG LOWER WILL RESULT IN DECREASED ACCURACY */
00957   /*          WHILE SETTING NSIG HIGHER WILL INCREASE CPU TIME */
00958   /*          WITHOUT INCREASING ACCURACY.  THE TRUNCATION ERROR */
00959   /*          IS LIMITED TO A RELATIVE ERROR OF T=.5*10**(-NSIG). */
00960   /* ENTEN  - 10.0 ** K, WHERE K IS THE LARGEST int SUCH THAT */
00961   /*          ENTEN IS MACHINE-REPRESENTABLE IN WORKING PRECISION. */
00962   /* ENSIG  - 10.0 ** NSIG. */
00963   /* RTNSIG - 10.0 ** (-K) FOR THE SMALLEST int K SUCH THAT */
00964   /*          K .GE. NSIG/4. */
00965   /* ENMTEN - THE SMALLEST ABS(X) SUCH THAT X/4 DOES NOT UNDERFLOW. */
00966   /* XLARGE - UPPER LIMIT ON THE MAGNITUDE OF X WHEN IZE=2.  BEAR */
00967   /*          IN MIND THAT IF ABS(X)=N, THEN AT LEAST N ITERATIONS */
00968   /*          OF THE BACKWARD RECURSION WILL BE EXECUTED. */
00969   /* EXPARG - LARGEST WORKING PRECISION ARGUMENT THAT THE LIBRARY */
00970   /*          EXP ROUTINE CAN HANDLE AND UPPER LIMIT ON THE */
00971   /*          MAGNITUDE OF X WHEN IZE=1. */
00972   const int nsig = MANT_DIG + 2;
00973   const R enten = nfft_float_property(NFFT_R_MAX);
00974   const R ensig = POW(K(10.0),(R)nsig);
00975   const R rtnsig = POW(K(10.0),-CEIL((R)nsig/K(4.0)));
00976   const R xlarge = K(1E4);
00977   const R exparg = FLOOR(LOG(POW(K(R_RADIX),K(DBL_MAX_EXP-1))));
00978 
00979   /* System generated locals */
00980   int l, n, nend, magx, nbmx, ncalc, nstart;
00981   R p, em, en, sum, pold, test, empal, tempa, tempb, tempc, psave, plast, tover,
00982     emp2al, psavel;
00983 
00984   magx = LRINT(FLOOR(x));
00985 
00986   /* return if x, nb, or ize out of range */
00987   if (   nb <= 0 || x < K(0.0) || alpha < K(0.0) || K(1.0) <= alpha
00988       || ((ize != 1 || exparg < x) && (ize != 2 || xlarge < x)))
00989     return (MIN(nb,0) - 1);
00990 
00991   /* 2-term ascending series for small x */
00992   if (x < rtnsig)
00993     return scaled_modified_bessel_i_series(x,alpha,nb,ize,b);
00994 
00995   ncalc = nb;
00996   /* forward sweep, Olver's p-sequence */
00997 
00998   nbmx = nb - magx;
00999   n = magx + 1;
01000 
01001   en = (R) (n+n) + (alpha+alpha);
01002   plast = K(1.0);
01003   p = en/x;
01004 
01005   /* significance test */
01006   test = ensig + ensig;
01007 
01008   if ((5*nsig) < (magx << 1))
01009     test = SQRT(test*p);
01010   else
01011     test /= POW(K(1.585),(R)magx);
01012 
01013   if (3 <= nbmx)
01014   {
01015     /* calculate p-sequence until n = nb-1 */
01016     tover = enten/ensig;
01017     nstart = magx+2;
01018     nend = nb - 1;
01019 
01020     for (n = nstart; n <= nend; n++)
01021     {
01022       en += K(2.0);
01023       pold = plast;
01024       plast = p;
01025       p = en*plast/x + pold;
01026       if (p > tover)
01027       {
01028         /* divide p-sequence by tover to avoid overflow. Calculate p-sequence
01029          * until 1 <= |p| */
01030         tover = enten;
01031         p /= tover;
01032         plast /= tover;
01033         psave = p;
01034         psavel = plast;
01035         nstart = n + 1;
01036 
01037         do
01038         {
01039           n++;
01040           en += K(2.0);
01041           pold = plast;
01042           plast = p;
01043           p = en*plast/x + pold;
01044         } while (p <= K(1.0));
01045 
01046         tempb = en/x;
01047 
01048         /* Backward test. Find ncalc as the largest n such that test is passed. */
01049         test = pold*plast*(K(0.5) - K(0.5)/(tempb * tempb))/ensig;
01050         p = plast*tover;
01051         n--;
01052         en -= K(2.0);
01053         nend = MIN(nb,n);
01054 
01055         for (ncalc = nstart; ncalc <= nend; ncalc++)
01056         {
01057           pold = psavel;
01058           psavel = psave;
01059           psave = en*psavel/x + pold;
01060           if (test < psave * psavel)
01061             break;
01062         }
01063 
01064         ncalc--;
01065         goto L80;
01066       }
01067     }
01068 
01069     n = nend;
01070     en = (R) (n+n) + (alpha+alpha);
01071 
01072     /* special significance test for 2 <= nbmx */
01073     test = FMAX(test,SQRT(plast*ensig)*SQRT(p+p));
01074   }
01075 
01076   /* calculate p-sequence until significance test is passed */
01077   do
01078   {
01079     n++;
01080     en += K(2.0);
01081     pold = plast;
01082     plast = p;
01083     p = en*plast/x + pold;
01084   } while (p < test);
01085 
01086   /* Initialize backward recursion and normalization sum. */
01087 L80:
01088   n++;
01089   en += K(2.0);
01090   tempb = K(0.0);
01091   tempa = K(1.0)/p;
01092   em = (R)(n-1);
01093   empal = em + alpha;
01094   emp2al = em - K(1.0) + (alpha+alpha);
01095   sum = tempa*empal*emp2al/em;
01096   nend = n-nb;
01097 
01098   if (nend < 0)
01099   {
01100     /* We have n <= nb. So store b[n] and set higher orders to zero */
01101     b[n-1] = tempa;
01102     nend = -nend;
01103     for (l = 1; l <= nend; ++l)
01104       b[n-1 + l] = K(0.0);
01105   }
01106   else
01107   {
01108     if (nend != 0)
01109     {
01110       /* recur backward via difference equation, calculating b[n] until n = nb */
01111       for (l = 1; l <= nend; ++l)
01112       {
01113         n--;
01114         en -= K(2.0);
01115         tempc = tempb;
01116         tempb = tempa;
01117         tempa = en*tempb/x + tempc;
01118         em -= K(1.0);
01119         emp2al -= K(1.0);
01120 
01121         if (n == 1)
01122           break;
01123 
01124         if (n == 2)
01125           emp2al = K(1.0);
01126 
01127         empal -= K(1.0);
01128         sum = (sum + tempa*empal)*emp2al/em;
01129       }
01130     }
01131 
01132     /* store b[nb] */
01133     b[n-1] = tempa;
01134 
01135     if (nb <= 1)
01136     {
01137       sum = sum + sum + tempa;
01138       scaled_modified_bessel_i_normalize(x,alpha,nb,ize,b,sum);
01139       return ncalc;
01140     }
01141 
01142     /* calculate and store b[nb-1] */
01143     n--;
01144     en -= 2.0;
01145     b[n-1] = en*tempa/x + tempb;
01146 
01147     if (n == 1)
01148     {
01149       sum = sum + sum + b[0];
01150       scaled_modified_bessel_i_normalize(x,alpha,nb,ize,b,sum);
01151       return ncalc;
01152     }
01153 
01154     em -= K(1.0);
01155     emp2al -= K(1.0);
01156 
01157     if (n == 2)
01158       emp2al = K(1.0);
01159 
01160     empal -= K(1.0);
01161     sum = (sum + b[n-1]*empal)*emp2al/em;
01162   }
01163 
01164   nend = n - 2;
01165 
01166   if (nend != 0)
01167   {
01168     /* Calculate and store b[n] until n = 2. */
01169     for (l = 1; l <= nend; ++l)
01170     {
01171       n--;
01172       en -= K(2.0);
01173       b[n-1] = en*b[n]/x + b[n+1];
01174       em -= K(1.0);
01175       emp2al -= K(1.0);
01176 
01177       if (n == 2)
01178         emp2al = K(1.0);
01179 
01180       empal -= K(1.0);
01181       sum = (sum + b[n-1]*empal)*emp2al/em;
01182     }
01183   }
01184 
01185   /* calculate b[1] */
01186   b[0] = K(2.0)*empal*b[1]/x + b[2];
01187   sum = sum + sum + b[0];
01188 
01189   scaled_modified_bessel_i_normalize(x,alpha,nb,ize,b,sum);
01190   return ncalc;
01191 }
01192 
01197 void nfft_assertion_failed(const char *s, int line, const char *file)
01198 {
01199   fflush(stdout);
01200   fprintf(stderr, "nfft: %s:%d: assertion failed: %s\n", file, line, s);
01201 #ifdef HAVE_ABORT
01202   /* Use abort function. */
01203   abort();
01204 #else
01205   /* Use exit function. */
01206   exit(EXIT_FAILURE);
01207 #endif
01208 }
01209 
01210 /* We declare drand48() and srand48() ourselves, if they are is not declared in
01211  * math.h (e.g. on SuSE 9.3), grrr. */
01212 #include "config.h"
01213 #if HAVE_DECL_DRAND48 == 0
01214   extern double drand48(void);
01215 #endif
01216 #if HAVE_DECL_SRAND48 == 0
01217   extern void srand48(long int);
01218 #endif
01219 
01220 double nfft_drand48(void)
01221 {
01222 #ifdef HAVE_DRAND48
01223   return drand48();
01224 #else
01225   return ((R)rand())/((R)RAND_MAX);
01226 #endif
01227 }
01228 
01229 void nfft_srand48(long int seed)
01230 {
01231 #ifdef HAVE_SRAND48
01232   srand48(seed);
01233 #else
01234   srand((unsigned int)seed);
01235 #endif
01236 }
01237 
01238 
01239 #define z_swap(_a_, _b_, _t_)     do { (_t_) = (_a_); (_a_) = (_b_); (_b_) = (_t_); } while (0)
01240 
01246 static void nfft_sort_node_indices_sort_bubble(int n, int *keys)
01247 {
01248   int i, j, ti;
01249 
01250   for (i = 0; i < n; ++i)
01251   {
01252     j = i;
01253     while (j > 0 && keys[2 * j + 0] < keys[2 * (j - 1) + 0])
01254     {
01255       z_swap(keys[2 * j + 0], keys[2 * (j - 1) + 0], ti);
01256       z_swap(keys[2 * j + 1], keys[2 * (j - 1) + 1], ti);
01257       --j;
01258     }
01259   }
01260 }
01261 
01267 static void nfft_sort_node_indices_radix_count(int n, int *keys, int shift, int mask, int *counts)
01268 {
01269   int i, k;
01270 
01271   for (i = 0; i < n; ++i)
01272   {
01273     k = (keys[2 * i + 0] >> shift) & mask;
01274     ++counts[k];
01275   }
01276 }
01277 
01283 static void nfft_sort_node_indices_radix_rearrange(int n, int *keys_in, int *keys_out, int shift, int mask, int *displs)
01284 {
01285   int i, k;
01286 
01287   for (i = 0; i < n; ++i)
01288   {
01289     k = (keys_in[2 * i + 0] >> shift) & mask;
01290     keys_out[2 * displs[k] + 0] = keys_in[2 * i + 0];
01291     keys_out[2 * displs[k] + 1] = keys_in[2 * i + 1];
01292     ++displs[k];
01293   }
01294 }
01295 
01301 void nfft_sort_node_indices_radix_lsdf(int n, int *keys0, int *keys1, int rhigh)
01302 {
01303   const int rwidth = 9;
01304   const int radix_n = 1 << rwidth;
01305   const int radix_mask = radix_n - 1;
01306   const int rhigh_in = rhigh;
01307 
01308   const int tmax =
01309 #ifdef _OPENMP
01310     omp_get_max_threads();
01311 #else
01312     1;
01313 #endif
01314 
01315   int *from, *to, *tmp;
01316 
01317   int i, k, l, h;
01318   int lcounts[tmax * radix_n];
01319 
01320   int tid = 0, tnum = 1;
01321 
01322 
01323   from = keys0;
01324   to = keys1;
01325 
01326   while (rhigh >= 0)
01327   {
01328 #ifdef _OPENMP
01329     #pragma omp parallel private(tid, tnum, i, l, h)
01330     {
01331       tid = omp_get_thread_num();
01332       tnum = omp_get_num_threads();
01333 #endif
01334 
01335       for (i = 0; i < radix_n; ++i) lcounts[tid * radix_n + i] = 0;
01336 
01337       l = (tid * n) / tnum;
01338       h = ((tid + 1) * n) / tnum;
01339 
01340       nfft_sort_node_indices_radix_count(h - l, from + (2 * l), rhigh_in - rhigh, radix_mask, &lcounts[tid * radix_n]);
01341 #ifdef _OPENMP
01342     }
01343 #endif
01344 
01345     k = 0;
01346     for (i = 0; i < radix_n; ++i)
01347     {
01348       for (l = 0; l < tmax; ++l) lcounts[l * radix_n + i] = (k += lcounts[l * radix_n + i]) - lcounts[l * radix_n + i];
01349     }
01350 
01351 #ifdef _OPENMP
01352     #pragma omp parallel private(tid, tnum, i, l, h)
01353     {
01354       tid = omp_get_thread_num();
01355       tnum = omp_get_num_threads();
01356 #endif
01357 
01358       l = (tid * n) / tnum;
01359       h = ((tid + 1) * n) / tnum;
01360 
01361       nfft_sort_node_indices_radix_rearrange(h - l, from + (2 * l), to, rhigh_in - rhigh, radix_mask, &lcounts[tid * radix_n]);
01362 #ifdef _OPENMP
01363     }
01364 #endif
01365 
01366 /*    print_keys(n, to);*/
01367 
01368     tmp = from;
01369     from = to;
01370     to = tmp;
01371 
01372     rhigh -= rwidth;
01373   }
01374 
01375   if (to == keys0) memcpy(to, from, n * 2 * sizeof(int));
01376 }
01377 
01383 void nfft_sort_node_indices_radix_msdf(int n, int *keys0, int *keys1, int rhigh)
01384 {
01385   const int rwidth = 9;
01386   const int radix_n = 1 << rwidth;
01387   const int radix_mask = radix_n - 1;
01388 
01389   const int tmax =
01390 #ifdef _OPENMP
01391     omp_get_max_threads();
01392 #else
01393     1;
01394 #endif
01395 
01396   int i, k, l, h;
01397   int lcounts[tmax * radix_n];
01398 
01399   int counts[radix_n], displs[radix_n];
01400 
01401   int tid = 0, tnum = 1;
01402 
01403 
01404   rhigh -= rwidth;
01405 
01406 #ifdef _OPENMP
01407   #pragma omp parallel private(tid, tnum, i, l, h)
01408   {
01409     tid = omp_get_thread_num();
01410     tnum = omp_get_num_threads();
01411 #endif
01412 
01413     for (i = 0; i < radix_n; ++i) lcounts[tid * radix_n + i] = 0;
01414 
01415     l = (tid * n) / tnum;
01416     h = ((tid + 1) * n) / tnum;
01417 
01418     nfft_sort_node_indices_radix_count(h - l, keys0 + (2 * l), rhigh + 1, radix_mask, &lcounts[tid * radix_n]);
01419 #ifdef _OPENMP
01420   }
01421 #endif
01422 
01423   k = 0;
01424   for (i = 0; i < radix_n; ++i)
01425   {
01426     for (l = 0; l < tmax; ++l) lcounts[l * radix_n + i] = (k += lcounts[l * radix_n + i]) - lcounts[l * radix_n + i];
01427 
01428     displs[i] = lcounts[0 * radix_n + i];
01429     if (i > 0) counts[i - 1] = displs[i] - displs[i - 1];
01430   }
01431   counts[radix_n - 1] = n - displs[radix_n - 1];
01432 
01433 #ifdef _OPENMP
01434   #pragma omp parallel private(tid, tnum, i, l, h)
01435   {
01436     tid = omp_get_thread_num();
01437     tnum = omp_get_num_threads();
01438 #endif
01439 
01440     l = (tid * n) / tnum;
01441     h = ((tid + 1) * n) / tnum;
01442 
01443     nfft_sort_node_indices_radix_rearrange(h - l, keys0 + (2 * l), keys1, rhigh + 1, radix_mask, &lcounts[tid * radix_n]);
01444 #ifdef _OPENMP
01445   }
01446 #endif
01447 
01448   memcpy(keys0, keys1, n * 2 * sizeof(int));
01449 
01450   if (rhigh >= 0)
01451   {
01452     for (i = 0; i < radix_n; ++i)
01453     {
01454       if (counts[i] > 1)
01455       {
01456         if (counts[i] > 256)
01457           nfft_sort_node_indices_radix_msdf(counts[i], keys0 + 2 * displs[i], keys1 + 2 * displs[i], rhigh);
01458         else
01459           nfft_sort_node_indices_sort_bubble(counts[i], keys0 + 2 * displs[i]);
01460       }
01461     }
01462   }
01463 }
01464 
01465 int nfft_get_num_threads(void)
01466 {
01467 #ifdef _OPENMP
01468   return nfft_get_omp_num_threads();
01469 #else
01470   return 1;
01471 #endif
01472 }
01473 
01474 #ifdef _OPENMP
01475 int nfft_get_omp_num_threads(void)
01476 {
01477   int nthreads;
01478   #pragma omp parallel default(shared)
01479   {
01480     int n = omp_get_num_threads();
01481     #pragma omp master
01482     {
01483       nthreads = n;
01484     }
01485   }
01486   return nthreads;
01487 }
01488 #endif

Generated on Fri Oct 12 2012 by Doxygen 1.8.0-20120409