ESYS13
Revision_
|
00001 00002 /******************************************************* 00003 * 00004 * Copyright (c) 2003-2012 by University of Queensland 00005 * Earth Systems Science Computational Center (ESSCC) 00006 * http://www.uq.edu.au/esscc 00007 * 00008 * Primary Business: Queensland, Australia 00009 * Licensed under the Open Software License version 3.0 00010 * http://www.opensource.org/licenses/osl-3.0.php 00011 * 00012 *******************************************************/ 00013 00014 00015 #ifndef INC_PASO_BLOCKOPS 00016 #define INC_PASO_BLOCKOPS 00017 00018 #include "Common.h" 00019 #include "Paso.h" 00020 00021 #ifdef USE_LAPACK 00022 #ifdef MKL_LAPACK 00023 #include <mkl_lapack.h> 00024 #include <mkl_cblas.h> 00025 #else 00026 #include <clapack.h> 00027 #include <cblas.h> 00028 #endif 00029 #endif 00030 00031 void Paso_BlockOps_solveAll(dim_t n_block,dim_t n,double* D,index_t* pivot,double* x); 00032 00033 #define PASO_MISSING_CLAPACK Esys_setError(TYPE_ERROR, "You need to install a CLAPACK version to run a block size > 3.") 00034 00035 00036 /* copy functions: */ 00037 #define Paso_BlockOps_Cpy_2(R, V) \ 00038 {\ 00039 *(R+0)=*(V+0);\ 00040 *(R+1)=*(V+1);\ 00041 } 00042 00043 #define Paso_BlockOps_Cpy_3(R, V) \ 00044 {\ 00045 *(R+0)=*(V+0);\ 00046 *(R+1)=*(V+1);\ 00047 *(R+2)=*(V+2);\ 00048 } 00049 00050 #define Paso_BlockOps_Cpy_N(N,R,V) memcpy((void*)R, (void*)V, ( (size_t) N ) * sizeof(double) ) 00051 00052 00053 /* operations R=R-MAT*V (V and R are not overlapping) */ 00054 00055 #define Paso_BlockOps_SMV_2(R,MAT, V) \ 00056 {\ 00057 register double S1=*(V+0); \ 00058 register double S2=*(V+1);\ 00059 register double A11=*(MAT+0);\ 00060 register double A12=*(MAT+2);\ 00061 register double A21=*(MAT+1);\ 00062 register double A22=*(MAT+3);\ 00063 *(R+0)-=A11 * S1 + A12 * S2;\ 00064 *(R+1)-=A21 * S1 + A22 * S2;\ 00065 } 00066 00067 #define Paso_BlockOps_SMV_3(R, MAT, V) \ 00068 {\ 00069 register double S1=*(V+0);\ 00070 register double S2=*(V+1);\ 00071 register double S3=*(V+2);\ 00072 register double A11=*(MAT+0);\ 00073 register double A21=*(MAT+1);\ 00074 register double A31=*(MAT+2);\ 00075 register double A12=*(MAT+3);\ 00076 register double A22=*(MAT+4);\ 00077 register double A32=*(MAT+5);\ 00078 register double A13=*(MAT+6);\ 00079 register double A23=*(MAT+7);\ 00080 register double A33=*(MAT+8);\ 00081 *(R+0)-=A11 * S1 + A12 * S2 + A13 * S3; \ 00082 *(R+1)-=A21 * S1 + A22 * S2 + A23 * S3;\ 00083 *(R+2)-=A31 * S1 + A32 * S2 + A33 * S3;\ 00084 } 00085 00086 00087 /* Paso_BlockOps_SMV_N */ 00088 00089 #ifdef USE_LAPACK 00090 #define Paso_BlockOps_SMV_N(_N_, _R_, _MAT_, _V_) cblas_dgemv(CblasColMajor,CblasNoTrans,_N_,_N_, -1., _MAT_, _N_, _V_, 1, (1.), _R_, 1) 00091 #else 00092 #define Paso_BlockOps_SMV_N(N, R, MAT, V) PASO_MISSING_CLAPACK 00093 #endif 00094 00095 #ifdef USE_LAPACK 00096 #define Paso_BlockOps_MV_N(_N_, _R_, _MAT_, _V_) cblas_dgemv(CblasColMajor,CblasNoTrans,_N_,_N_, 1., _MAT_, _N_, _V_, 1, (0.), _R_, 1) 00097 #else 00098 #define Paso_BlockOps_MV_N(N, R, MAT, V) PASO_MISSING_CLAPACK 00099 #endif 00100 00101 #define Paso_BlockOps_invM_2(invA, A, failed) \ 00102 {\ 00103 register double A11=*(A+0);\ 00104 register double A12=*(A+2);\ 00105 register double A21=*(A+1);\ 00106 register double A22=*(A+3);\ 00107 register double D = A11*A22-A12*A21; \ 00108 if (ABS(D) > 0 ){\ 00109 D=1./D;\ 00110 *(invA)= A22*D;\ 00111 *(invA+1)=-A21*D;\ 00112 *(invA+2)=-A12*D;\ 00113 *(invA+3)= A11*D;\ 00114 } else {\ 00115 *failed=1;\ 00116 }\ 00117 } 00118 00119 #define Paso_BlockOps_invM_3(invA, A, failed) \ 00120 {\ 00121 register double A11=*(A+0);\ 00122 register double A21=*(A+1);\ 00123 register double A31=*(A+2);\ 00124 register double A12=*(A+3);\ 00125 register double A22=*(A+4);\ 00126 register double A32=*(A+5);\ 00127 register double A13=*(A+6);\ 00128 register double A23=*(A+7);\ 00129 register double A33=*(A+8);\ 00130 register double D = A11*(A22*A33-A23*A32)+ A12*(A31*A23-A21*A33)+A13*(A21*A32-A31*A22);\ 00131 if (ABS(D) > 0 ){\ 00132 D=1./D;\ 00133 *(invA )=(A22*A33-A23*A32)*D;\ 00134 *(invA+1)=(A31*A23-A21*A33)*D;\ 00135 *(invA+2)=(A21*A32-A31*A22)*D;\ 00136 *(invA+3)=(A13*A32-A12*A33)*D;\ 00137 *(invA+4)=(A11*A33-A31*A13)*D;\ 00138 *(invA+5)=(A12*A31-A11*A32)*D;\ 00139 *(invA+6)=(A12*A23-A13*A22)*D;\ 00140 *(invA+7)=(A13*A21-A11*A23)*D;\ 00141 *(invA+8)=(A11*A22-A12*A21)*D;\ 00142 } else {\ 00143 *failed=1;\ 00144 }\ 00145 } 00146 00147 #ifdef USE_LAPACK 00148 #ifdef MKL_LAPACK 00149 #define Paso_BlockOps_invM_N(N, MAT, PIVOT,failed) \ 00150 {\ 00151 int NN=N; \ 00152 int res =0; \ 00153 dgetrf(&NN,&NN,MAT,&NN,PIVOT,&res); \ 00154 if (res!=0) *failed=1;\ 00155 } 00156 #else 00157 #define Paso_BlockOps_invM_N(N, MAT, PIVOT,failed) \ 00158 {\ 00159 int res=clapack_dgetrf(CblasColMajor, N,N,MAT,N, PIVOT); \ 00160 if (res!=0) *failed=1;\ 00161 } 00162 #endif 00163 #else 00164 #define Paso_BlockOps_invM_N(N, MAT, PIVOT,failed) PASO_MISSING_CLAPACK 00165 #endif 00166 00167 #ifdef USE_LAPACK 00168 #ifdef MKL_LAPACK 00169 #define Paso_BlockOps_solve_N(N, X, MAT, PIVOT,failed) \ 00170 {\ 00171 int NN=N; \ 00172 int res =0; \ 00173 int ONE=1; \ 00174 dgetrs("N", &NN, &ONE, MAT, &NN, PIVOT, X, &NN, &res); \ 00175 if (res!=0) *failed=1;\ 00176 } 00177 #else 00178 #define Paso_BlockOps_solve_N(N, X, MAT, PIVOT,failed) \ 00179 {\ 00180 int res=clapack_dgetrs(CblasColMajor, CblasNoTrans, N,1,MAT,N, PIVOT, X, N); \ 00181 if (res!=0) *failed=1;\ 00182 } 00183 #endif 00184 #else 00185 #define Paso_BlockOps_solve_N(N, X, MAT, PIVOT,failed) PASO_MISSING_CLAPACK 00186 #endif 00187 00188 00189 /* inplace matrix vector product */ 00190 00191 #define Paso_BlockOps_MViP_2(MAT, V) \ 00192 { \ 00193 register double S1=*(V+0);\ 00194 register double S2=*(V+1);\ 00195 register double A11=*(MAT+0);\ 00196 register double A12=*(MAT+2);\ 00197 register double A21=*(MAT+1);\ 00198 register double A22=*(MAT+3);\ 00199 *(V+0) = A11 * S1 + A12 * S2;\ 00200 *(V+1) = A21 * S1 + A22 * S2;\ 00201 } 00202 00203 00204 #define Paso_BlockOps_MViP_3(MAT, V) \ 00205 { \ 00206 register double S1=*(V+0);\ 00207 register double S2=*(V+1);\ 00208 register double S3=*(V+2);\ 00209 register double A11=*(MAT+0);\ 00210 register double A21=*(MAT+1);\ 00211 register double A31=*(MAT+2);\ 00212 register double A12=*(MAT+3);\ 00213 register double A22=*(MAT+4);\ 00214 register double A32=*(MAT+5);\ 00215 register double A13=*(MAT+6);\ 00216 register double A23=*(MAT+7);\ 00217 register double A33=*(MAT+8);\ 00218 *(V+0)=A11 * S1 + A12 * S2 + A13 * S3; \ 00219 *(V+1)=A21 * S1 + A22 * S2 + A23 * S3;\ 00220 *(V+2)=A31 * S1 + A32 * S2 + A33 * S3;\ 00221 } 00222 00223 00224 #endif /* #INC_Paso_BlockOpsOPS */