libflame revision_anchor
|
00001 /* 00002 libflame 00003 An object-based infrastructure for developing high-performance 00004 dense linear algebra libraries. 00005 00006 Copyright (C) 2011, The University of Texas 00007 00008 libflame is free software; you can redistribute it and/or modify 00009 it under the terms of the GNU Lesser General Public License as 00010 published by the Free Software Foundation; either version 2.1 of 00011 the License, or (at your option) any later version. 00012 00013 libflame is distributed in the hope that it will be useful, but 00014 WITHOUT ANY WARRANTY; without even the implied warranty of 00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00016 Lesser General Public License for more details. 00017 00018 You should have received a copy of the GNU Lesser General Public 00019 License along with libflame; if you did not receive a copy, see 00020 http://www.gnu.org/licenses/. 00021 00022 For more information, please contact us at flame@cs.utexas.edu or 00023 send mail to: 00024 00025 Field G. Van Zee and/or 00026 Robert A. van de Geijn 00027 The University of Texas at Austin 00028 Department of Computer Sciences 00029 1 University Station C0500 00030 Austin TX 78712 00031 */ 00032 00033 #ifndef BLIS_MACRO_DEFS_H 00034 #define BLIS_MACRO_DEFS_H 00035 00036 // --- Constants --------------------------------------------------------------- 00037 00038 // --- boolean --- 00039 00040 #undef FALSE 00041 #define FALSE 0 00042 00043 #undef TRUE 00044 #define TRUE 1 00045 00046 // --- trans --- 00047 00048 #define BLIS_NO_TRANSPOSE 'n' 00049 #define BLIS_TRANSPOSE 't' 00050 #define BLIS_CONJ_NO_TRANSPOSE 'c' 00051 #define BLIS_CONJ_TRANSPOSE 'h' 00052 00053 // --- conj --- 00054 00055 #define BLIS_NO_CONJUGATE 'n' 00056 #define BLIS_CONJUGATE 'c' 00057 00058 // --- uplo --- 00059 00060 #define BLIS_LOWER_TRIANGULAR 'l' 00061 #define BLIS_UPPER_TRIANGULAR 'u' 00062 00063 // --- side --- 00064 00065 #define BLIS_LEFT 'l' 00066 #define BLIS_RIGHT 'r' 00067 00068 // --- diag --- 00069 00070 #define BLIS_NONUNIT_DIAG 'n' 00071 #define BLIS_UNIT_DIAG 'u' 00072 #define BLIS_ZERO_DIAG 'z' 00073 00074 // --- Functional macros ------------------------------------------------------- 00075 00076 // --- Type agnostic --- 00077 00078 // --- min --- 00079 00080 #define bli_min( x, y ) \ 00081 ( (x) < (y) ? (x) : (y) ) 00082 00083 // --- max --- 00084 00085 #define bli_max( x, y ) \ 00086 ( (x) > (y) ? (x) : (y) ) 00087 00088 // --- Type dependent --- 00089 00090 // --- neg1 --- 00091 00092 // void bli_sneg1( float* x ); 00093 #define bli_sneg1( x ) \ 00094 *(x) *= -1.0F; 00095 00096 // void bli_dneg1( double* x ); 00097 #define bli_dneg1( x ) \ 00098 *(x) *= -1.0; 00099 00100 // void bli_cneg1( scomplex* x ); 00101 #define bli_cneg1( x ) \ 00102 (x)->real *= -1.0F; \ 00103 (x)->imag *= -1.0F; 00104 00105 // void bli_zneg1( dcomplex* x ); 00106 #define bli_zneg1( x ) \ 00107 (x)->real *= -1.0; \ 00108 (x)->imag *= -1.0; 00109 00110 // --- neg2 --- 00111 00112 // void bli_sneg2( float* x, float* y ); 00113 #define bli_sneg2( x, y ) \ 00114 *(y) = -1.0F * *(x); 00115 00116 // void bli_dneg2( double* x, double* y ); 00117 #define bli_dneg2( x, y ) \ 00118 *(y) = -1.0 * *(x); 00119 00120 // void bli_cneg2( scomplex* x, scomplex* y ); 00121 #define bli_cneg2( x, y ) \ 00122 (y)->real = -1.0F * (x)->real; \ 00123 (y)->imag = -1.0F * (x)->imag; 00124 00125 // void bli_zneg2( dcomplex* x, dcomplex* y ); 00126 #define bli_zneg2( x, y ) \ 00127 (y)->real = -1.0 * (x)->real; \ 00128 (y)->imag = -1.0 * (x)->imag; 00129 00130 // --- sqrte --- 00131 00132 // void bli_ssqrte( float* alpha, int* error ); 00133 #define bli_ssqrte( alpha, error ) \ 00134 if ( *(alpha) < 0.0F ) { *(error) = FLA_FAILURE; } \ 00135 else { *(alpha) = ( float ) sqrt( *(alpha) ); *(error) = FLA_SUCCESS; } 00136 00137 // void bli_dsqrte( double* alpha, int* error ); 00138 #define bli_dsqrte( alpha, error ) \ 00139 if ( *(alpha) < 0.0 ) { *(error) = FLA_FAILURE; } \ 00140 else { *(alpha) = ( double ) sqrt( *(alpha) ); *(error) = FLA_SUCCESS; } 00141 00142 // void bli_csqrte( scomplex* alpha, int* error ); 00143 #define bli_csqrte( alpha, error ) \ 00144 if ( (alpha)->real < 0.0F ) \ 00145 { *(error) = FLA_FAILURE; } \ 00146 else { \ 00147 (alpha)->real = ( float ) sqrt( (alpha)->real ); \ 00148 (alpha)->imag = 0.0F; *(error) = FLA_SUCCESS; } 00149 00150 // void bli_zsqrte( dcomplex* alpha, int* error ); 00151 #define bli_zsqrte( alpha, error ) \ 00152 if ( (alpha)->real < 0.0 ) \ 00153 { *(error) = FLA_FAILURE; } \ 00154 else { \ 00155 (alpha)->real = ( double ) sqrt( (alpha)->real ); \ 00156 (alpha)->imag = 0.0; *(error) = FLA_SUCCESS; } 00157 00158 // --- absval2 --- 00159 00160 // void bli_sabsval2( float* alpha, float* sqrtalpha ); 00161 #define bli_sabsval2( alpha, sqrtalpha ) \ 00162 *(sqrtalpha) = ( float ) fabs( ( double ) *(alpha) ); 00163 00164 // void bli_dabsval2( double* alpha, double* sqrtalpha ); 00165 #define bli_dabsval2( alpha, sqrtalpha ) \ 00166 *(sqrtalpha) = fabs( *(alpha) ); 00167 00168 // void bli_cabsval2( scomplex* alpha, scomplex* sqrtalpha ); 00169 #define bli_cabsval2( alpha, sqrtalpha ) \ 00170 (sqrtalpha)->real = ( float ) sqrt( ( double ) (alpha)->real * (alpha)->real + \ 00171 (alpha)->imag * (alpha)->imag ); \ 00172 (sqrtalpha)->imag = 0.0F; 00173 00174 // void bli_zabsval2( dcomplex* alpha, dcomplex* sqrtalpha ); 00175 #define bli_zabsval2( alpha, sqrtalpha ) \ 00176 (sqrtalpha)->real = sqrt( (alpha)->real * (alpha)->real + \ 00177 (alpha)->imag * (alpha)->imag ); \ 00178 (sqrtalpha)->imag = 0.0; 00179 00180 // --- absqr --- 00181 00182 // void bli_sabsqr( float* alpha ); 00183 #define bli_sabsqr( alpha ) \ 00184 *(alpha) = *(alpha) * *(alpha); 00185 00186 // void bli_dabsqr( double* alpha ); 00187 #define bli_dabsqr( alpha ) \ 00188 *(alpha) = *(alpha) * *(alpha); 00189 00190 // void bli_cabsqr( scomplex* alpha ); 00191 #define bli_cabsqr( alpha ) \ 00192 (alpha)->real = (alpha)->real * (alpha)->real + (alpha)->imag * (alpha)->imag; \ 00193 (alpha)->imag = 0.0F; 00194 00195 // void bli_zabsqr( dcomplex* alpha ); 00196 #define bli_zabsqr( alpha ) \ 00197 (alpha)->real = (alpha)->real * (alpha)->real + (alpha)->imag * (alpha)->imag; \ 00198 (alpha)->imag = 0.0; 00199 00200 // --- invscals --- 00201 00202 // void bli_sinvscals( float* a, float* y ); 00203 #define bli_sinvscals( a, y ) \ 00204 *(y) = *(y) / *(a); 00205 00206 // void bli_dinvscals( double* a, double* y ); 00207 #define bli_dinvscals( a, y ) \ 00208 *(y) = *(y) / *(a); 00209 00210 // void bli_cinvscals( scomplex* a, scomplex* y ); 00211 #define bli_cinvscals( a, y ) \ 00212 { \ 00213 float temp = (a)->real * (a)->real + (a)->imag * (a)->imag; \ 00214 float zreal = ( (y)->real * (a)->real + (y)->imag * (a)->imag ) / temp; \ 00215 float zimag = ( (y)->imag * (a)->real - (y)->real * (a)->imag ) / temp; \ 00216 (y)->real = zreal; \ 00217 (y)->imag = zimag; \ 00218 } 00219 00220 // void bli_zinvscals( dcomplex* a, dcomplex* y ); 00221 #define bli_zinvscals( a, y ) \ 00222 { \ 00223 double temp = (a)->real * (a)->real + (a)->imag * (a)->imag; \ 00224 double zreal = ( (y)->real * (a)->real + (y)->imag * (a)->imag ) / temp; \ 00225 double zimag = ( (y)->imag * (a)->real - (y)->real * (a)->imag ) / temp; \ 00226 (y)->real = zreal; \ 00227 (y)->imag = zimag; \ 00228 } 00229 00230 // --- div3 --- 00231 00232 // void bli_sdiv3( float* x, float* y, float* a ); 00233 #define bli_sdiv3( x, y, a ) \ 00234 *(a) = *(x) / *(y); 00235 00236 // void bli_ddiv3( double* x, double* y, double* a ); 00237 #define bli_ddiv3( x, y, a ) \ 00238 *(a) = *(x) / *(y); 00239 00240 // void bli_cdiv3( scomplex* x, scomplex* y, scomplex* a ); 00241 #define bli_cdiv3( x, y, a ) \ 00242 { \ 00243 float temp = (y)->real * (y)->real + (y)->imag * (y)->imag; \ 00244 float areal = ( (x)->real * (y)->real + (x)->imag * (y)->imag ) / temp; \ 00245 float aimag = ( (x)->imag * (y)->real - (x)->real * (y)->imag ) / temp; \ 00246 (a)->real = areal; \ 00247 (a)->imag = aimag; \ 00248 } 00249 00250 // void bli_zdiv3( dcomplex* x, dcomplex* y, dcomplex* a ); 00251 #define bli_zdiv3( x, y, a ) \ 00252 { \ 00253 double temp = (y)->real * (y)->real + (y)->imag * (y)->imag; \ 00254 double areal = ( (x)->real * (y)->real + (x)->imag * (y)->imag ) / temp; \ 00255 double aimag = ( (x)->imag * (y)->real - (x)->real * (y)->imag ) / temp; \ 00256 (a)->real = areal; \ 00257 (a)->imag = aimag; \ 00258 } 00259 00260 // --- add3 --- 00261 00262 // void bli_sadd3( float* x, float* y, float* a ); 00263 #define bli_sadd3( x, y, a ) \ 00264 *(a) = *(x) + *(y); 00265 00266 // void bli_dadd3( double* x, double* y, double* a ); 00267 #define bli_dadd3( x, y, a ) \ 00268 *(a) = *(x) + *(y); 00269 00270 // void bli_cadd3( scomplex* x, scomplex* y, scomplex* a ); 00271 #define bli_cadd3( x, y, a ) \ 00272 { \ 00273 (a)->real = (x)->real + (y)->real; \ 00274 (a)->imag = (x)->imag + (y)->imag; \ 00275 } 00276 00277 // void bli_zadd3( dcomplex* x, dcomplex* y, dcomplex* a ); 00278 #define bli_zadd3( x, y, a ) \ 00279 { \ 00280 (a)->real = (x)->real + (y)->real; \ 00281 (a)->imag = (x)->imag + (y)->imag; \ 00282 } 00283 00284 // --- copys --- 00285 00286 // void bli_scopys( char conj, float* x, float* y ); 00287 #define bli_scopys( conj, x, y ) \ 00288 *(y) = *(x); 00289 00290 // void bli_dcopys( char conj, double* x, double* y ); 00291 #define bli_dcopys( conj, x, y ) \ 00292 *(y) = *(x); 00293 00294 // void bli_ccopys( char conj, scomplex* x, scomplex* y ); 00295 #define bli_ccopys( conj, x, y ) \ 00296 *(y) = *(x); \ 00297 if ( bli_does_conj( conj ) ) (y)->imag *= -1.0F; 00298 00299 // void bli_zcopys( char conj, dcomplex* x, dcomplex* y ); 00300 #define bli_zcopys( conj, x, y ) \ 00301 *(y) = *(x); \ 00302 if ( bli_does_conj( conj ) ) (y)->imag *= -1.0; 00303 00304 // --- scals --- 00305 00306 // void bli_sscals( float* a, float* y ); 00307 #define bli_sscals( a, y ) \ 00308 *(y) = *(a) * *(y); 00309 00310 // void bli_dscals( double* a, double* y ); 00311 #define bli_dscals( a, y ) \ 00312 *(y) = *(a) * *(y); 00313 00314 // void bli_cscals( scomplex* a, scomplex* y ); 00315 #define bli_cscals( a, y ) \ 00316 { \ 00317 float tempr = (a)->real * (y)->real - (a)->imag * (y)->imag; \ 00318 float tempi = (a)->imag * (y)->real + (a)->real * (y)->imag; \ 00319 (y)->real = tempr; \ 00320 (y)->imag = tempi; \ 00321 } 00322 00323 // void bli_zscals( dcomplex* a, dcomplex* y ); 00324 #define bli_zscals( a, y ) \ 00325 { \ 00326 double tempr = (a)->real * (y)->real - (a)->imag * (y)->imag; \ 00327 double tempi = (a)->imag * (y)->real + (a)->real * (y)->imag; \ 00328 (y)->real = tempr; \ 00329 (y)->imag = tempi; \ 00330 } 00331 00332 // --- mult3 --- 00333 00334 // void bli_smult3( float* x, float* y, float* a ); 00335 #define bli_smult3( x, y, a ) \ 00336 *(a) = *(x) * *(y); 00337 00338 // void bli_dmult3( double* x, double* y, double* a ); 00339 #define bli_dmult3( x, y, a ) \ 00340 *(a) = *(x) * *(y); 00341 00342 // void bli_cmult3( scomplex* x, scomplex* y, scomplex* a ); 00343 #define bli_cmult3( x, y, a ) \ 00344 { \ 00345 float tempr = (x)->real * (y)->real - (x)->imag * (y)->imag; \ 00346 float tempi = (x)->imag * (y)->real + (x)->real * (y)->imag; \ 00347 (a)->real = tempr; \ 00348 (a)->imag = tempi; \ 00349 } 00350 00351 // void bli_zmult3( dcomplex* x, dcomplex* y, dcomplex* a ); 00352 #define bli_zmult3( x, y, a ) \ 00353 { \ 00354 double tempr = (x)->real * (y)->real - (x)->imag * (y)->imag; \ 00355 double tempi = (x)->imag * (y)->real + (x)->real * (y)->imag; \ 00356 (a)->real = tempr; \ 00357 (a)->imag = tempi; \ 00358 } 00359 00360 // --- mult4 --- 00361 00362 // void bli_smult4( float* alpha, float* x, float* y1, float* y2 ); 00363 #define bli_smult4( alpha, x, y1, y2 ) \ 00364 *(y2) = *(y1) + *(alpha) * *(x); 00365 00366 // void bli_dmult4( double* alpha, double* x, double* y1, double* y2 ); 00367 #define bli_dmult4( alpha, x, y1, y2 ) \ 00368 *(y2) = *(y1) + *(alpha) * *(x); 00369 00370 // void bli_cmult4( scomplex* alpha, scomplex* x, scomplex* y1, scomplex* y2 ); 00371 #define bli_cmult4( alpha, x, y1, y2 ) \ 00372 { \ 00373 (y2)->real = (y1)->real + (alpha)->real * (x)->real - (alpha)->imag * (x)->imag; \ 00374 (y2)->imag = (y1)->imag + (alpha)->imag * (x)->real + (alpha)->real * (x)->imag; \ 00375 } 00376 00377 // void bli_zmult4( dcomplex* alpha, dcomplex* x, dcomplex* y1, dcomplex* y2 ); 00378 #define bli_zmult4( alpha, x, y1, y2 ) \ 00379 { \ 00380 (y2)->real = (y1)->real + (alpha)->real * (x)->real - (alpha)->imag * (x)->imag; \ 00381 (y2)->imag = (y1)->imag + (alpha)->imag * (x)->real + (alpha)->real * (x)->imag; \ 00382 } 00383 00384 // --- conjs --- 00385 00386 // void bli_sconjs( float* a ); 00387 #define bli_sconjs( a ) \ 00388 ; 00389 00390 // void bli_dconjs( double* a ); 00391 #define bli_dconjs( a ) \ 00392 ; 00393 00394 // void bli_cconjs( scomplex* a ); 00395 #define bli_cconjs( a ) \ 00396 (a)->imag *= -1.0F; 00397 00398 // void bli_zconjs( dcomplex* a ); 00399 #define bli_zconjs( a ) \ 00400 (a)->imag *= -1.0; 00401 00402 // --- copyconj --- 00403 00404 // void bli_scopyconj( float* x, float* y ); 00405 #define bli_scopyconj( x, y ) \ 00406 *(y) = *(x); 00407 00408 // void bli_dcopyconj( double* x, double* y ); 00409 #define bli_dcopyconj( x, y ) \ 00410 *(y) = *(x); 00411 00412 // void bli_ccopyconj( scomplex* x, scomplex* y ); 00413 #define bli_ccopyconj( x, y ) \ 00414 (y)->real = (x)->real; \ 00415 (y)->imag = -1.0F * (x)->imag; 00416 00417 // void bli_zcopyconj( dcomplex* x, dcomplex* y ); 00418 #define bli_zcopyconj( x, y ) \ 00419 (y)->real = (x)->real; \ 00420 (y)->imag = -1.0 * (x)->imag; 00421 00422 // --- eq1 --- 00423 00424 // void bli_seq1( float* alpha ); 00425 #define bli_seq1( alpha ) \ 00426 ( *alpha == 1.0F ) 00427 00428 // void bli_deq1( double* alpha ); 00429 #define bli_deq1( alpha ) \ 00430 ( *alpha == 1.0 ) 00431 00432 // void bli_ceq1( scomplex* alpha ); 00433 #define bli_ceq1( alpha ) \ 00434 ( (alpha)->real == 1.0F && (alpha)->imag == 0.0F ) 00435 00436 // void bli_zeq1( dcomplex* alpha ); 00437 #define bli_zeq1( alpha ) \ 00438 ( (alpha)->real == 1.0 && (alpha)->imag == 0.0 ) 00439 00440 // --- Swapping/toggle macros -------------------------------------------------- 00441 00442 // --- swap_pointers --- 00443 00444 #define bli_sswap_pointers( a, b ) \ 00445 { \ 00446 float* temp = (a); \ 00447 (a) = (b); \ 00448 (b) = temp; \ 00449 } 00450 00451 #define bli_dswap_pointers( a, b ) \ 00452 { \ 00453 double* temp = (a); \ 00454 (a) = (b); \ 00455 (b) = temp; \ 00456 } 00457 00458 #define bli_cswap_pointers( a, b ) \ 00459 { \ 00460 scomplex* temp = (a); \ 00461 (a) = (b); \ 00462 (b) = temp; \ 00463 } 00464 00465 #define bli_zswap_pointers( a, b ) \ 00466 { \ 00467 dcomplex* temp = (a); \ 00468 (a) = (b); \ 00469 (b) = temp; \ 00470 } 00471 00472 // --- swap_ints --- 00473 00474 #define bli_swap_ints( a, b ) \ 00475 { \ 00476 int temp = (a); \ 00477 (a) = (b); \ 00478 (b) = temp; \ 00479 } 00480 00481 // --- swap_chars --- 00482 00483 #define bli_swap_chars( a, b ) \ 00484 { \ 00485 char temp = (a); \ 00486 (a) = (b); \ 00487 (b) = temp; \ 00488 } 00489 00490 // --- toggle_side --- 00491 00492 #define bli_toggle_side( side ) \ 00493 { \ 00494 if ( bli_is_left( side ) ) side = BLIS_RIGHT; \ 00495 else side = BLIS_LEFT; \ 00496 } 00497 00498 // --- toggle_uplo --- 00499 00500 #define bli_toggle_uplo( uplo ) \ 00501 { \ 00502 if ( bli_is_lower( uplo ) ) uplo = BLIS_UPPER_TRIANGULAR; \ 00503 else uplo = BLIS_LOWER_TRIANGULAR; \ 00504 } 00505 00506 // --- toggle_trans --- 00507 #define bli_toggle_trans( trans ) \ 00508 { \ 00509 if ( bli_is_notrans( trans ) ) trans = BLIS_TRANSPOSE; \ 00510 else if ( bli_is_trans( trans ) ) trans = BLIS_NO_TRANSPOSE; \ 00511 else if ( bli_is_conjnotrans( trans ) ) trans = BLIS_CONJ_TRANSPOSE; \ 00512 else trans = BLIS_CONJ_NO_TRANSPOSE; \ 00513 } 00514 00515 // --- toggle_conjtrans --- 00516 #define bli_toggle_conjtrans( trans ) \ 00517 { \ 00518 if ( bli_is_notrans( trans ) ) trans = BLIS_CONJ_TRANSPOSE; \ 00519 else trans = BLIS_NO_TRANSPOSE; \ 00520 } 00521 00522 // --- toggle_conj --- 00523 00524 #define bli_toggle_conj( conj ) \ 00525 { \ 00526 if ( bli_is_conj( conj ) ) conj = BLIS_NO_CONJUGATE; \ 00527 else conj = BLIS_CONJUGATE; \ 00528 } 00529 00530 #endif // #ifndef BLIS_MACRO_DEFS_H