ESYS13  Revision_
BlockOps.h
Go to the documentation of this file.
00001 
00002 /*******************************************************
00003 *
00004 * Copyright (c) 2003-2012 by University of Queensland
00005 * Earth Systems Science Computational Center (ESSCC)
00006 * http://www.uq.edu.au/esscc
00007 *
00008 * Primary Business: Queensland, Australia
00009 * Licensed under the Open Software License version 3.0
00010 * http://www.opensource.org/licenses/osl-3.0.php
00011 *
00012 *******************************************************/
00013 
00014 
00015 #ifndef INC_PASO_BLOCKOPS
00016 #define INC_PASO_BLOCKOPS
00017 
00018 #include "Common.h"
00019 #include "Paso.h"
00020 
00021 #ifdef USE_LAPACK
00022    #ifdef MKL_LAPACK
00023       #include <mkl_lapack.h>
00024       #include <mkl_cblas.h>
00025    #else
00026       #include <clapack.h>
00027       #include <cblas.h>
00028    #endif
00029 #endif
00030 
00031 void Paso_BlockOps_solveAll(dim_t n_block,dim_t n,double* D,index_t* pivot,double* x);
00032 
00033 #define PASO_MISSING_CLAPACK Esys_setError(TYPE_ERROR, "You need to install a CLAPACK version to run a block size > 3.")  
00034 
00035 
00036 /* copy functions: */
00037 #define Paso_BlockOps_Cpy_2(R, V) \
00038 {\
00039    *(R+0)=*(V+0);\
00040    *(R+1)=*(V+1);\
00041 }
00042 
00043 #define Paso_BlockOps_Cpy_3(R, V) \
00044 {\
00045    *(R+0)=*(V+0);\
00046    *(R+1)=*(V+1);\
00047    *(R+2)=*(V+2);\
00048 }
00049 
00050 #define Paso_BlockOps_Cpy_N(N,R,V)  memcpy((void*)R, (void*)V, ( (size_t) N ) * sizeof(double) )
00051 
00052 
00053 /* operations R=R-MAT*V (V and R are not overlapping) */
00054 
00055 #define Paso_BlockOps_SMV_2(R,MAT, V) \
00056 {\
00057 register double S1=*(V+0); \
00058 register double S2=*(V+1);\
00059 register double A11=*(MAT+0);\
00060 register double A12=*(MAT+2);\
00061 register double A21=*(MAT+1);\
00062 register double A22=*(MAT+3);\
00063 *(R+0)-=A11 * S1 + A12 * S2;\
00064 *(R+1)-=A21 * S1 + A22 * S2;\
00065 }
00066 
00067 #define Paso_BlockOps_SMV_3(R, MAT, V)  \
00068 {\
00069 register double S1=*(V+0);\
00070 register double S2=*(V+1);\
00071 register double S3=*(V+2);\
00072 register double A11=*(MAT+0);\
00073 register double A21=*(MAT+1);\
00074 register double A31=*(MAT+2);\
00075 register double A12=*(MAT+3);\
00076 register double A22=*(MAT+4);\
00077 register double A32=*(MAT+5);\
00078 register double A13=*(MAT+6);\
00079 register double A23=*(MAT+7);\
00080 register double A33=*(MAT+8);\
00081 *(R+0)-=A11 * S1 + A12 * S2 + A13 * S3; \
00082 *(R+1)-=A21 * S1 + A22 * S2 + A23 * S3;\
00083 *(R+2)-=A31 * S1 + A32 * S2 + A33 * S3;\
00084 }
00085 
00086 
00087 /* Paso_BlockOps_SMV_N */
00088 
00089 #ifdef USE_LAPACK
00090    #define Paso_BlockOps_SMV_N(_N_, _R_, _MAT_, _V_) cblas_dgemv(CblasColMajor,CblasNoTrans,_N_,_N_, -1., _MAT_, _N_, _V_, 1, (1.), _R_, 1)  
00091 #else
00092    #define Paso_BlockOps_SMV_N(N, R, MAT, V)   PASO_MISSING_CLAPACK 
00093 #endif
00094 
00095 #ifdef USE_LAPACK
00096      #define Paso_BlockOps_MV_N(_N_, _R_, _MAT_, _V_) cblas_dgemv(CblasColMajor,CblasNoTrans,_N_,_N_, 1., _MAT_, _N_, _V_, 1, (0.), _R_, 1)  
00097    #else
00098       #define Paso_BlockOps_MV_N(N, R, MAT, V)   PASO_MISSING_CLAPACK 
00099 #endif
00100 
00101 #define Paso_BlockOps_invM_2(invA, A, failed) \
00102 {\
00103    register double A11=*(A+0);\
00104    register double A12=*(A+2);\
00105    register double A21=*(A+1);\
00106    register double A22=*(A+3);\
00107    register double D = A11*A22-A12*A21; \
00108    if (ABS(D) > 0 ){\
00109       D=1./D;\
00110       *(invA)= A22*D;\
00111       *(invA+1)=-A21*D;\
00112       *(invA+2)=-A12*D;\
00113       *(invA+3)= A11*D;\
00114    } else {\
00115       *failed=1;\
00116    }\
00117 }
00118 
00119 #define Paso_BlockOps_invM_3(invA, A, failed) \
00120 {\
00121    register double A11=*(A+0);\
00122    register double A21=*(A+1);\
00123    register double A31=*(A+2);\
00124    register double A12=*(A+3);\
00125    register double A22=*(A+4);\
00126    register double A32=*(A+5);\
00127    register double A13=*(A+6);\
00128    register double A23=*(A+7);\
00129    register double A33=*(A+8);\
00130    register double D =  A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);\
00131    if (ABS(D) > 0 ){\
00132       D=1./D;\
00133       *(invA  )=(A22*A33-A23*A32)*D;\
00134       *(invA+1)=(A31*A23-A21*A33)*D;\
00135       *(invA+2)=(A21*A32-A31*A22)*D;\
00136       *(invA+3)=(A13*A32-A12*A33)*D;\
00137       *(invA+4)=(A11*A33-A31*A13)*D;\
00138       *(invA+5)=(A12*A31-A11*A32)*D;\
00139       *(invA+6)=(A12*A23-A13*A22)*D;\
00140       *(invA+7)=(A13*A21-A11*A23)*D;\
00141       *(invA+8)=(A11*A22-A12*A21)*D;\
00142    } else {\
00143       *failed=1;\
00144    }\
00145 }
00146 
00147 #ifdef USE_LAPACK
00148    #ifdef MKL_LAPACK
00149      #define Paso_BlockOps_invM_N(N, MAT, PIVOT,failed) \
00150      {\
00151         int NN=N; \
00152         int res =0; \
00153         dgetrf(&NN,&NN,MAT,&NN,PIVOT,&res); \
00154         if (res!=0) *failed=1;\
00155      }
00156    #else
00157      #define Paso_BlockOps_invM_N(N, MAT, PIVOT,failed) \
00158      {\
00159          int res=clapack_dgetrf(CblasColMajor, N,N,MAT,N, PIVOT); \
00160          if (res!=0) *failed=1;\
00161      }
00162    #endif
00163 #else
00164    #define Paso_BlockOps_invM_N(N, MAT, PIVOT,failed) PASO_MISSING_CLAPACK
00165 #endif
00166 
00167 #ifdef USE_LAPACK
00168    #ifdef MKL_LAPACK
00169       #define Paso_BlockOps_solve_N(N, X, MAT, PIVOT,failed) \
00170       {\
00171      int NN=N; \
00172      int res =0; \
00173      int ONE=1; \
00174      dgetrs("N", &NN, &ONE, MAT, &NN, PIVOT, X, &NN, &res); \
00175      if (res!=0) *failed=1;\
00176       }
00177    #else
00178       #define Paso_BlockOps_solve_N(N, X, MAT, PIVOT,failed) \
00179       {\
00180       int res=clapack_dgetrs(CblasColMajor, CblasNoTrans, N,1,MAT,N, PIVOT, X, N); \
00181      if (res!=0) *failed=1;\
00182       }
00183    #endif
00184 #else
00185    #define Paso_BlockOps_solve_N(N, X, MAT, PIVOT,failed) PASO_MISSING_CLAPACK
00186 #endif
00187    
00188  
00189 /* inplace matrix vector product */
00190 
00191 #define Paso_BlockOps_MViP_2(MAT, V) \
00192 { \
00193    register double S1=*(V+0);\
00194    register double S2=*(V+1);\
00195    register double A11=*(MAT+0);\
00196    register double A12=*(MAT+2);\
00197    register double A21=*(MAT+1);\
00198    register double A22=*(MAT+3);\
00199    *(V+0)  = A11 * S1 + A12 * S2;\
00200    *(V+1)  = A21 * S1 + A22 * S2;\
00201 }
00202 
00203 
00204 #define Paso_BlockOps_MViP_3(MAT, V) \
00205 { \
00206    register double S1=*(V+0);\
00207    register double S2=*(V+1);\
00208    register double S3=*(V+2);\
00209    register double A11=*(MAT+0);\
00210    register double A21=*(MAT+1);\
00211    register double A31=*(MAT+2);\
00212    register double A12=*(MAT+3);\
00213    register double A22=*(MAT+4);\
00214    register double A32=*(MAT+5);\
00215    register double A13=*(MAT+6);\
00216    register double A23=*(MAT+7);\
00217    register double A33=*(MAT+8);\
00218    *(V+0)=A11 * S1 + A12 * S2 + A13 * S3; \
00219    *(V+1)=A21 * S1 + A22 * S2 + A23 * S3;\
00220    *(V+2)=A31 * S1 + A32 * S2 + A33 * S3;\
00221 }
00222 
00223     
00224 #endif /* #INC_Paso_BlockOpsOPS */