ESYS13  Revision_
LocalOps.h
Go to the documentation of this file.
00001 
00002 /*******************************************************
00003 *
00004 * Copyright (c) 2003-2012 by University of Queensland
00005 * Earth Systems Science Computational Center (ESSCC)
00006 * http://www.uq.edu.au/esscc
00007 *
00008 * Primary Business: Queensland, Australia
00009 * Licensed under the Open Software License version 3.0
00010 * http://www.opensource.org/licenses/osl-3.0.php
00011 *
00012 *******************************************************/
00013 
00014 
00015 #if !defined escript_LocalOps_H
00016 #define escript_LocalOps_H
00017 #if defined(_WIN32) && defined(__INTEL_COMPILER)
00018 #   include <mathimf.h>
00019 #else
00020 #   include <math.h>
00021 #endif
00022 #ifndef M_PI
00023 #   define M_PI           3.14159265358979323846  /* pi */
00024 #endif
00025 
00026 
00035 namespace escript {
00036 
00041 inline
00042 bool nancheck(double d)
00043 {
00044         // Q: so why not just test d!=d?
00045         // A: Coz it doesn't always work [I've checked].
00046         // One theory is that the optimizer skips the test.
00047 #ifdef isnan
00048     return isnan(d);
00049 #elif defined _isnan
00050     return _isnan(d);
00051 #else
00052     return false;
00053 #endif
00054 }
00055 
00060 inline
00061 double makeNaN()
00062 {
00063 #ifdef nan
00064     return nan("");
00065 #else
00066     return sqrt(-1.);
00067 #endif
00068 
00069 }
00070 
00071 
00079 inline
00080 void eigenvalues1(const double A00,double* ev0) {
00081 
00082    *ev0=A00;
00083 
00084 }
00095 inline
00096 void eigenvalues2(const double A00,const double A01,const double A11,
00097                  double* ev0, double* ev1) {
00098       const register double trA=(A00+A11)/2.;
00099       const register double A_00=A00-trA;
00100       const register double A_11=A11-trA;
00101       const register double s=sqrt(A01*A01-A_00*A_11);
00102       *ev0=trA-s;
00103       *ev1=trA+s;
00104 }
00119 inline
00120 void eigenvalues3(const double A00, const double A01, const double A02,
00121                                    const double A11, const double A12,
00122                                                      const double A22,
00123                  double* ev0, double* ev1,double* ev2) {
00124 
00125       const register double trA=(A00+A11+A22)/3.;
00126       const register double A_00=A00-trA;
00127       const register double A_11=A11-trA;
00128       const register double A_22=A22-trA;
00129       const register double A01_2=A01*A01;
00130       const register double A02_2=A02*A02;
00131       const register double A12_2=A12*A12;
00132       const register double p=A02_2+A12_2+A01_2+(A_00*A_00+A_11*A_11+A_22*A_22)/2.;
00133       if (p<=0.) {
00134          *ev2=trA;
00135          *ev1=trA;
00136          *ev0=trA;
00137 
00138       } else {
00139          const register double q=(A02_2*A_11+A12_2*A_00+A01_2*A_22)-(A_00*A_11*A_22+2*A01*A12*A02);
00140          const register double sq_p=sqrt(p/3.);
00141          register double z=-q/(2*pow(sq_p,3));
00142          if (z<-1.) {
00143             z=-1.;
00144          } else if (z>1.) {
00145             z=1.;
00146          }
00147          const register double alpha_3=acos(z)/3.;
00148          *ev2=trA+2.*sq_p*cos(alpha_3);
00149          *ev1=trA-2.*sq_p*cos(alpha_3+M_PI/3.);
00150          *ev0=trA-2.*sq_p*cos(alpha_3-M_PI/3.);
00151       }
00152 }
00162 inline
00163 void  eigenvalues_and_eigenvectors1(const double A00,double* ev0,double* V00,const double tol)
00164 {
00165       eigenvalues1(A00,ev0);
00166       *V00=1.;
00167       return;
00168 }
00180 inline
00181 void  vectorInKernel2(const double A00,const double A10,const double A01,const double A11,
00182                       double* V0, double*V1)
00183 {
00184       register double absA00=fabs(A00);
00185       register double absA10=fabs(A10);
00186       register double absA01=fabs(A01);
00187       register double absA11=fabs(A11);
00188       register double m=absA11>absA10 ? absA11 : absA10;
00189       if (absA00>m || absA01>m) {
00190          *V0=-A01;
00191          *V1=A00;
00192       } else {
00193          if (m<=0) {
00194            *V0=1.;
00195            *V1=0.;
00196          } else {
00197            *V0=A11;
00198            *V1=-A10;
00199          }
00200      }
00201 }
00220 inline
00221 void  vectorInKernel3__nonZeroA00(const double A00,const double A10,const double A20,
00222                                 const double A01,const double A11,const double A21,
00223                                 const double A02,const double A12,const double A22,
00224                                 double* V0,double* V1,double* V2)
00225 {
00226     double TEMP0,TEMP1;
00227     register const double I00=1./A00;
00228     register const double IA10=I00*A10;
00229     register const double IA20=I00*A20;
00230     vectorInKernel2(A11-IA10*A01,A12-IA10*A02,
00231                     A21-IA20*A01,A22-IA20*A02,&TEMP0,&TEMP1);
00232     *V0=-(A10*TEMP0+A20*TEMP1);
00233     *V1=A00*TEMP0;
00234     *V2=A00*TEMP1;
00235 }
00236 
00254 inline
00255 void  eigenvalues_and_eigenvectors2(const double A00,const double A01,const double A11,
00256                                     double* ev0, double* ev1,
00257                                     double* V00, double* V10, double* V01, double* V11,
00258                                     const double tol)
00259 {
00260      double TEMP0,TEMP1;
00261      eigenvalues2(A00,A01,A11,ev0,ev1);
00262      const register double absev0=fabs(*ev0);
00263      const register double absev1=fabs(*ev1);
00264      register double max_ev=absev0>absev1 ? absev0 : absev1;
00265      if (fabs((*ev0)-(*ev1))<tol*max_ev) {
00266         *V00=1.;
00267         *V10=0.;
00268         *V01=0.;
00269         *V11=1.;
00270      } else {
00271         vectorInKernel2(A00-(*ev0),A01,A01,A11-(*ev0),&TEMP0,&TEMP1);
00272         const register double scale=1./sqrt(TEMP0*TEMP0+TEMP1*TEMP1);
00273         if (TEMP0<0.) {
00274             *V00=-TEMP0*scale;
00275             *V10=-TEMP1*scale;
00276             if (TEMP1<0.) {
00277                *V01=  *V10;
00278                *V11=-(*V00);
00279             } else {
00280                *V01=-(*V10);
00281                *V11= (*V00);
00282             }
00283         } else if (TEMP0>0.) {
00284             *V00=TEMP0*scale;
00285             *V10=TEMP1*scale;
00286             if (TEMP1<0.) {
00287                *V01=-(*V10);
00288                *V11= (*V00);
00289             } else {
00290                *V01= (*V10);
00291                *V11=-(*V00);
00292             }
00293         } else {
00294            *V00=0.;
00295            *V10=1;
00296            *V11=0.;
00297            *V01=1.;
00298        }
00299    }
00300 }
00309 inline
00310 void  normalizeVector3(double* V0,double* V1,double* V2)
00311 {
00312     register double s;
00313     if (*V0>0) {
00314         s=1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
00315         *V0*=s;
00316         *V1*=s;
00317         *V2*=s;
00318     } else if (*V0<0)  {
00319         s=-1./sqrt((*V0)*(*V0)+(*V1)*(*V1)+(*V2)*(*V2));
00320         *V0*=s;
00321         *V1*=s;
00322         *V2*=s;
00323     } else {
00324         if (*V1>0) {
00325             s=1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
00326             *V1*=s;
00327             *V2*=s;
00328         } else if (*V1<0)  {
00329             s=-1./sqrt((*V1)*(*V1)+(*V2)*(*V2));
00330             *V1*=s;
00331             *V2*=s;
00332         } else {
00333             *V2=1.;
00334         }
00335     }
00336 }
00363 inline
00364 void  eigenvalues_and_eigenvectors3(const double A00, const double A01, const double A02,
00365                                     const double A11, const double A12, const double A22,
00366                                     double* ev0, double* ev1, double* ev2,
00367                                     double* V00, double* V10, double* V20,
00368                                     double* V01, double* V11, double* V21,
00369                                     double* V02, double* V12, double* V22,
00370                                     const double tol)
00371 {
00372       register const double absA01=fabs(A01);
00373       register const double absA02=fabs(A02);
00374       register const double m=absA01>absA02 ? absA01 : absA02;
00375       if (m<=0) {
00376         double TEMP_V00,TEMP_V10,TEMP_V01,TEMP_V11,TEMP_EV0,TEMP_EV1;
00377         eigenvalues_and_eigenvectors2(A11,A12,A22,
00378                                       &TEMP_EV0,&TEMP_EV1,
00379                                       &TEMP_V00,&TEMP_V10,&TEMP_V01,&TEMP_V11,tol);
00380         if (A00<=TEMP_EV0) {
00381             *V00=1.;
00382             *V10=0.;
00383             *V20=0.;
00384             *V01=0.;
00385             *V11=TEMP_V00;
00386             *V21=TEMP_V10;
00387             *V02=0.;
00388             *V12=TEMP_V01;
00389             *V22=TEMP_V11;
00390             *ev0=A00;
00391             *ev1=TEMP_EV0;
00392             *ev2=TEMP_EV1;
00393         } else if (A00>TEMP_EV1) {
00394             *V02=1.;
00395             *V12=0.;
00396             *V22=0.;
00397             *V00=0.;
00398             *V10=TEMP_V00;
00399             *V20=TEMP_V10;
00400             *V01=0.;
00401             *V11=TEMP_V01;
00402             *V21=TEMP_V11;
00403             *ev0=TEMP_EV0;
00404             *ev1=TEMP_EV1;
00405             *ev2=A00;
00406         } else {
00407             *V01=1.;
00408             *V11=0.;
00409             *V21=0.;
00410             *V00=0.;
00411             *V10=TEMP_V00;
00412             *V20=TEMP_V10;
00413             *V02=0.;
00414             *V12=TEMP_V01;
00415             *V22=TEMP_V11;
00416             *ev0=TEMP_EV0;
00417             *ev1=A00;
00418             *ev2=TEMP_EV1;
00419         }
00420       } else {
00421          eigenvalues3(A00,A01,A02,A11,A12,A22,ev0,ev1,ev2);
00422          const register double absev0=fabs(*ev0);
00423          const register double absev1=fabs(*ev1);
00424          const register double absev2=fabs(*ev2);
00425          register double max_ev=absev0>absev1 ? absev0 : absev1;
00426          max_ev=max_ev>absev2 ? max_ev : absev2;
00427          const register double d_01=fabs((*ev0)-(*ev1));
00428          const register double d_12=fabs((*ev1)-(*ev2));
00429          const register double max_d=d_01>d_12 ? d_01 : d_12;
00430          if (max_d<=tol*max_ev) {
00431              *V00=1.;
00432              *V10=0;
00433              *V20=0;
00434              *V01=0;
00435              *V11=1.;
00436              *V21=0;
00437              *V02=0;
00438              *V12=0;
00439              *V22=1.;
00440          } else {
00441             const register double S00=A00-(*ev0);
00442             const register double absS00=fabs(S00);
00443             if (absS00>m) {
00444                 vectorInKernel3__nonZeroA00(S00,A01,A02,A01,A11-(*ev0),A12,A02,A12,A22-(*ev0),V00,V10,V20);
00445             } else if (absA02<m) {
00446                 vectorInKernel3__nonZeroA00(A01,A11-(*ev0),A12,S00,A01,A02,A02,A12,A22-(*ev0),V00,V10,V20);
00447             } else {
00448                 vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev0),S00,A01,A02,A01,A11-(*ev0),A12,V00,V10,V20);
00449             }
00450             normalizeVector3(V00,V10,V20);;
00451             const register double T00=A00-(*ev2);
00452             const register double absT00=fabs(T00);
00453             if (absT00>m) {
00454                  vectorInKernel3__nonZeroA00(T00,A01,A02,A01,A11-(*ev2),A12,A02,A12,A22-(*ev2),V02,V12,V22);
00455             } else if (absA02<m) {
00456                  vectorInKernel3__nonZeroA00(A01,A11-(*ev2),A12,T00,A01,A02,A02,A12,A22-(*ev2),V02,V12,V22);
00457             } else {
00458                  vectorInKernel3__nonZeroA00(A02,A12,A22-(*ev2),T00,A01,A02,A01,A11-(*ev2),A12,V02,V12,V22);
00459             }
00460             const register double dot=(*V02)*(*V00)+(*V12)*(*V10)+(*V22)*(*V20);
00461             *V02-=dot*(*V00);
00462             *V12-=dot*(*V10);
00463             *V22-=dot*(*V20);
00464             normalizeVector3(V02,V12,V22);
00465             *V01=(*V10)*(*V22)-(*V12)*(*V20);
00466             *V11=(*V20)*(*V02)-(*V00)*(*V22);
00467             *V21=(*V00)*(*V12)-(*V02)*(*V10);
00468             normalizeVector3(V01,V11,V21);
00469          }
00470    }
00471 }
00472 
00473 // General tensor product: arg_2(SL x SR) = arg_0(SL x SM) * arg_1(SM x SR)
00474 // SM is the product of the last axis_offset entries in arg_0.getShape().
00475 inline
00476 void matrix_matrix_product(const int SL, const int SM, const int SR, const double* A, const double* B, double* C, int transpose)
00477 {
00478   if (transpose == 0) {
00479     for (int i=0; i<SL; i++) {
00480       for (int j=0; j<SR; j++) {
00481         double sum = 0.0;
00482         for (int l=0; l<SM; l++) {
00483       sum += A[i+SL*l] * B[l+SM*j];
00484         }
00485         C[i+SL*j] = sum;
00486       }
00487     }
00488   }
00489   else if (transpose == 1) {
00490     for (int i=0; i<SL; i++) {
00491       for (int j=0; j<SR; j++) {
00492         double sum = 0.0;
00493         for (int l=0; l<SM; l++) {
00494       sum += A[i*SM+l] * B[l+SM*j];
00495         }
00496         C[i+SL*j] = sum;
00497       }
00498     }
00499   }
00500   else if (transpose == 2) {
00501     for (int i=0; i<SL; i++) {
00502       for (int j=0; j<SR; j++) {
00503         double sum = 0.0;
00504         for (int l=0; l<SM; l++) {
00505       sum += A[i+SL*l] * B[l*SR+j];
00506         }
00507         C[i+SL*j] = sum;
00508       }
00509     }
00510   }
00511 }
00512 
00513 template <typename UnaryFunction>
00514 inline void tensor_unary_operation(const int size,
00515                  const double *arg1,
00516                  double * argRes,
00517                  UnaryFunction operation)
00518 {
00519   for (int i = 0; i < size; ++i) {
00520     argRes[i] = operation(arg1[i]);
00521   }
00522   return;
00523 }
00524 
00525 template <typename BinaryFunction>
00526 inline void tensor_binary_operation(const int size,
00527                  const double *arg1,
00528                  const double *arg2,
00529                  double * argRes,
00530                  BinaryFunction operation)
00531 {
00532   for (int i = 0; i < size; ++i) {
00533     argRes[i] = operation(arg1[i], arg2[i]);
00534   }
00535   return;
00536 }
00537 
00538 template <typename BinaryFunction>
00539 inline void tensor_binary_operation(const int size,
00540                  double arg1,
00541                  const double *arg2,
00542                  double *argRes,
00543                  BinaryFunction operation)
00544 {
00545   for (int i = 0; i < size; ++i) {
00546     argRes[i] = operation(arg1, arg2[i]);
00547   }
00548   return;
00549 }
00550 
00551 template <typename BinaryFunction>
00552 inline void tensor_binary_operation(const int size,
00553                  const double *arg1,
00554                  double arg2,
00555                  double *argRes,
00556                  BinaryFunction operation)
00557 {
00558   for (int i = 0; i < size; ++i) {
00559     argRes[i] = operation(arg1[i], arg2);
00560   }
00561   return;
00562 }
00563 
00564 } // end of namespace
00565 #endif