libflame
revision_anchor
|
00001 /* 00002 libflame 00003 An object-based infrastructure for developing high-performance 00004 dense linear algebra libraries. 00005 00006 Copyright (C) 2011, The University of Texas 00007 00008 libflame is free software; you can redistribute it and/or modify 00009 it under the terms of the GNU Lesser General Public License as 00010 published by the Free Software Foundation; either version 2.1 of 00011 the License, or (at your option) any later version. 00012 00013 libflame is distributed in the hope that it will be useful, but 00014 WITHOUT ANY WARRANTY; without even the implied warranty of 00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00016 Lesser General Public License for more details. 00017 00018 You should have received a copy of the GNU Lesser General Public 00019 License along with libflame; if you did not receive a copy, see 00020 http://www.gnu.org/licenses/. 00021 00022 For more information, please contact us at flame@cs.utexas.edu or 00023 send mail to: 00024 00025 Field G. Van Zee and/or 00026 Robert A. van de Geijn 00027 The University of Texas at Austin 00028 Department of Computer Sciences 00029 1 University Station C0500 00030 Austin TX 78712 00031 */ 00032 00033 #ifndef BLIS_MACRO_DEFS_H 00034 #define BLIS_MACRO_DEFS_H 00035 00036 // --- Constants --------------------------------------------------------------- 00037 00038 #define BLIS_NO_INTRINSICS 0 00039 #define BLIS_SSE_INTRINSICS 3 00040 00041 // --- boolean --- 00042 00043 #undef FALSE 00044 #define FALSE 0 00045 00046 #undef TRUE 00047 #define TRUE 1 00048 00049 /* 00050 // --- trans --- 00051 00052 #define BLIS_NO_TRANSPOSE 'n' 00053 #define BLIS_TRANSPOSE 't' 00054 #define BLIS_CONJ_NO_TRANSPOSE 'c' 00055 #define BLIS_CONJ_TRANSPOSE 'h' 00056 00057 // --- conj --- 00058 00059 #define BLIS_NO_CONJUGATE 'n' 00060 #define BLIS_CONJUGATE 'c' 00061 00062 // --- uplo --- 00063 00064 #define BLIS_LOWER_TRIANGULAR 'l' 00065 #define BLIS_UPPER_TRIANGULAR 'u' 00066 00067 // --- side --- 00068 00069 #define BLIS_LEFT 'l' 00070 #define BLIS_RIGHT 'r' 00071 00072 // --- diag --- 00073 00074 #define BLIS_NONUNIT_DIAG 'n' 00075 #define BLIS_UNIT_DIAG 'u' 00076 #define BLIS_ZERO_DIAG 'z' 00077 */ 00078 00079 // --- Functional macros ------------------------------------------------------- 00080 00081 // --- Type-agnostic --- 00082 00083 // --- min --- 00084 00085 #define bli_min( x, y ) \ 00086 ( (x) < (y) ? (x) : (y) ) 00087 00088 // --- max --- 00089 00090 #define bli_max( x, y ) \ 00091 ( (x) > (y) ? (x) : (y) ) 00092 00093 // --- Type-dependent --- 00094 00095 // --- neg1 --- 00096 00097 // void bli_sneg1( float* x ); 00098 #define bli_sneg1( x ) \ 00099 *(x) *= -1.0F; 00100 00101 // void bli_dneg1( double* x ); 00102 #define bli_dneg1( x ) \ 00103 *(x) *= -1.0; 00104 00105 // void bli_cneg1( scomplex* x ); 00106 #define bli_cneg1( x ) \ 00107 (x)->real *= -1.0F; \ 00108 (x)->imag *= -1.0F; 00109 00110 // void bli_zneg1( dcomplex* x ); 00111 #define bli_zneg1( x ) \ 00112 (x)->real *= -1.0; \ 00113 (x)->imag *= -1.0; 00114 00115 // --- neg2 --- 00116 00117 // void bli_sneg2( float* x, float* y ); 00118 #define bli_sneg2( x, y ) \ 00119 *(y) = -1.0F * *(x); 00120 00121 // void bli_dneg2( double* x, double* y ); 00122 #define bli_dneg2( x, y ) \ 00123 *(y) = -1.0 * *(x); 00124 00125 // void bli_cneg2( scomplex* x, scomplex* y ); 00126 #define bli_cneg2( x, y ) \ 00127 (y)->real = -1.0F * (x)->real; \ 00128 (y)->imag = -1.0F * (x)->imag; 00129 00130 // void bli_zneg2( dcomplex* x, dcomplex* y ); 00131 #define bli_zneg2( x, y ) \ 00132 (y)->real = -1.0 * (x)->real; \ 00133 (y)->imag = -1.0 * (x)->imag; 00134 00135 // --- sqrte --- 00136 00137 // void bli_ssqrte( float* alpha, int* error ); 00138 #define bli_ssqrte( alpha, error ) \ 00139 if ( *(alpha) < 0.0F ) { *(error) = FLA_FAILURE; } \ 00140 else { *(alpha) = ( float ) sqrt( *(alpha) ); *(error) = FLA_SUCCESS; } 00141 00142 // void bli_dsqrte( double* alpha, int* error ); 00143 #define bli_dsqrte( alpha, error ) \ 00144 if ( *(alpha) < 0.0 ) { *(error) = FLA_FAILURE; } \ 00145 else { *(alpha) = ( double ) sqrt( *(alpha) ); *(error) = FLA_SUCCESS; } 00146 00147 // void bli_csqrte( scomplex* alpha, int* error ); 00148 #define bli_csqrte( alpha, error ) \ 00149 if ( (alpha)->real < 0.0F ) \ 00150 { *(error) = FLA_FAILURE; } \ 00151 else { \ 00152 (alpha)->real = ( float ) sqrt( (alpha)->real ); \ 00153 (alpha)->imag = 0.0F; *(error) = FLA_SUCCESS; } 00154 00155 // void bli_zsqrte( dcomplex* alpha, int* error ); 00156 #define bli_zsqrte( alpha, error ) \ 00157 if ( (alpha)->real < 0.0 ) \ 00158 { *(error) = FLA_FAILURE; } \ 00159 else { \ 00160 (alpha)->real = ( double ) sqrt( (alpha)->real ); \ 00161 (alpha)->imag = 0.0; *(error) = FLA_SUCCESS; } 00162 00163 // --- absval2 --- 00164 00165 // void bli_sabsval2( float* alpha, float* absval ); 00166 #define bli_sabsval2( alpha, absval ) \ 00167 *(absval) = ( float ) fabs( ( double ) *(alpha) ); 00168 00169 // void bli_dabsval2( double* alpha, double* absval ); 00170 #define bli_dabsval2( alpha, absval ) \ 00171 *(absval) = fabs( *(alpha) ); 00172 00173 // void bli_cabsval2( scomplex* alpha, scomplex* absval ); 00174 #define bli_cabsval2( alpha, absval ) \ 00175 (absval)->real = ( float ) sqrt( ( double ) (alpha)->real * (alpha)->real + \ 00176 (alpha)->imag * (alpha)->imag ); \ 00177 (absval)->imag = 0.0F; 00178 00179 // void bli_csabsval2( scomplex* alpha, float* absval ); 00180 #define bli_csabsval2( alpha, absval ) \ 00181 *(absval) = ( float ) sqrt( ( double ) (alpha)->real * (alpha)->real + \ 00182 (alpha)->imag * (alpha)->imag ); \ 00183 00184 // void bli_zabsval2( dcomplex* alpha, dcomplex* absval ); 00185 #define bli_zabsval2( alpha, absval ) \ 00186 (absval)->real = sqrt( (alpha)->real * (alpha)->real + \ 00187 (alpha)->imag * (alpha)->imag ); \ 00188 (absval)->imag = 0.0; 00189 00190 // void bli_zdabsval2( dcomplex* alpha, double* absval ); 00191 #define bli_zdabsval2( alpha, absval ) \ 00192 *(absval) = sqrt( (alpha)->real * (alpha)->real + \ 00193 (alpha)->imag * (alpha)->imag ); \ 00194 00195 00196 // --- absqr --- 00197 00198 // void bli_sabsqr( float* alpha ); 00199 #define bli_sabsqr( alpha ) \ 00200 *(alpha) = *(alpha) * *(alpha); 00201 00202 // void bli_dabsqr( double* alpha ); 00203 #define bli_dabsqr( alpha ) \ 00204 *(alpha) = *(alpha) * *(alpha); 00205 00206 // void bli_cabsqr( scomplex* alpha ); 00207 #define bli_cabsqr( alpha ) \ 00208 (alpha)->real = (alpha)->real * (alpha)->real + (alpha)->imag * (alpha)->imag; \ 00209 (alpha)->imag = 0.0F; 00210 00211 // void bli_zabsqr( dcomplex* alpha ); 00212 #define bli_zabsqr( alpha ) \ 00213 (alpha)->real = (alpha)->real * (alpha)->real + (alpha)->imag * (alpha)->imag; \ 00214 (alpha)->imag = 0.0; 00215 00216 // --- invscals --- 00217 00218 // void bli_sinvscals( float* a, float* y ); 00219 #define bli_sinvscals( a, y ) \ 00220 *(y) = *(y) / *(a); 00221 00222 // void bli_dinvscals( double* a, double* y ); 00223 #define bli_dinvscals( a, y ) \ 00224 *(y) = *(y) / *(a); 00225 00226 // void bli_csinvscals( float* a, scomplex* y ); 00227 #define bli_csinvscals( a, y ) \ 00228 { \ 00229 (y)->real = (y)->real / *(a); \ 00230 (y)->imag = (y)->imag / *(a); \ 00231 } 00232 00233 // void bli_cinvscals( scomplex* a, scomplex* y ); 00234 #define bli_cinvscals( a, y ) \ 00235 { \ 00236 float temp = (a)->real * (a)->real + (a)->imag * (a)->imag; \ 00237 float zreal = ( (y)->real * (a)->real + (y)->imag * (a)->imag ) / temp; \ 00238 float zimag = ( (y)->imag * (a)->real - (y)->real * (a)->imag ) / temp; \ 00239 (y)->real = zreal; \ 00240 (y)->imag = zimag; \ 00241 } 00242 00243 // void bli_zdinvscals( double* a, dcomplex* y ); 00244 #define bli_zdinvscals( a, y ) \ 00245 { \ 00246 (y)->real = (y)->real / *(a); \ 00247 (y)->imag = (y)->imag / *(a); \ 00248 } 00249 00250 // void bli_zinvscals( dcomplex* a, dcomplex* y ); 00251 #define bli_zinvscals( a, y ) \ 00252 { \ 00253 double temp = (a)->real * (a)->real + (a)->imag * (a)->imag; \ 00254 double zreal = ( (y)->real * (a)->real + (y)->imag * (a)->imag ) / temp; \ 00255 double zimag = ( (y)->imag * (a)->real - (y)->real * (a)->imag ) / temp; \ 00256 (y)->real = zreal; \ 00257 (y)->imag = zimag; \ 00258 } 00259 00260 // --- div3 --- 00261 00262 // void bli_sdiv3( float* x, float* y, float* a ); 00263 #define bli_sdiv3( x, y, a ) \ 00264 *(a) = *(x) / *(y); 00265 00266 // void bli_ddiv3( double* x, double* y, double* a ); 00267 #define bli_ddiv3( x, y, a ) \ 00268 *(a) = *(x) / *(y); 00269 00270 // void bli_cdiv3( scomplex* x, scomplex* y, scomplex* a ); 00271 #define bli_cdiv3( x, y, a ) \ 00272 { \ 00273 float temp = (y)->real * (y)->real + (y)->imag * (y)->imag; \ 00274 float areal = ( (x)->real * (y)->real + (x)->imag * (y)->imag ) / temp; \ 00275 float aimag = ( (x)->imag * (y)->real - (x)->real * (y)->imag ) / temp; \ 00276 (a)->real = areal; \ 00277 (a)->imag = aimag; \ 00278 } 00279 00280 // void bli_zdiv3( dcomplex* x, dcomplex* y, dcomplex* a ); 00281 #define bli_zdiv3( x, y, a ) \ 00282 { \ 00283 double temp = (y)->real * (y)->real + (y)->imag * (y)->imag; \ 00284 double areal = ( (x)->real * (y)->real + (x)->imag * (y)->imag ) / temp; \ 00285 double aimag = ( (x)->imag * (y)->real - (x)->real * (y)->imag ) / temp; \ 00286 (a)->real = areal; \ 00287 (a)->imag = aimag; \ 00288 } 00289 00290 // --- add3 --- 00291 00292 // void bli_sadd3( float* x, float* y, float* a ); 00293 #define bli_sadd3( x, y, a ) \ 00294 *(a) = *(x) + *(y); 00295 00296 // void bli_dadd3( double* x, double* y, double* a ); 00297 #define bli_dadd3( x, y, a ) \ 00298 *(a) = *(x) + *(y); 00299 00300 // void bli_cadd3( scomplex* x, scomplex* y, scomplex* a ); 00301 #define bli_cadd3( x, y, a ) \ 00302 { \ 00303 (a)->real = (x)->real + (y)->real; \ 00304 (a)->imag = (x)->imag + (y)->imag; \ 00305 } 00306 00307 // void bli_zadd3( dcomplex* x, dcomplex* y, dcomplex* a ); 00308 #define bli_zadd3( x, y, a ) \ 00309 { \ 00310 (a)->real = (x)->real + (y)->real; \ 00311 (a)->imag = (x)->imag + (y)->imag; \ 00312 } 00313 00314 // --- copys --- 00315 00316 // void bli_scopys( conj_t conj, float* x, float* y ); 00317 #define bli_scopys( conj, x, y ) \ 00318 *(y) = *(x); 00319 00320 // void bli_dcopys( conj_t conj, double* x, double* y ); 00321 #define bli_dcopys( conj, x, y ) \ 00322 *(y) = *(x); 00323 00324 // void bli_ccopys( conj_t conj, scomplex* x, scomplex* y ); 00325 #define bli_ccopys( conj, x, y ) \ 00326 *(y) = *(x); \ 00327 if ( bli_is_conj( conj ) ) (y)->imag *= -1.0F; 00328 00329 // void bli_zcopys( conj_t conj, dcomplex* x, dcomplex* y ); 00330 #define bli_zcopys( conj, x, y ) \ 00331 *(y) = *(x); \ 00332 if ( bli_is_conj( conj ) ) (y)->imag *= -1.0; 00333 00334 // --- scals --- 00335 00336 // void bli_sscals( float* a, float* y ); 00337 #define bli_sscals( a, y ) \ 00338 *(y) = *(a) * *(y); 00339 00340 // void bli_dscals( double* a, double* y ); 00341 #define bli_dscals( a, y ) \ 00342 *(y) = *(a) * *(y); 00343 00344 // void bli_csscals( float* a, scomplex* y ); 00345 #define bli_csscals( a, y ) \ 00346 { \ 00347 (y)->real = *(a) * (y)->real; \ 00348 (y)->imag = *(a) * (y)->imag; \ 00349 } 00350 00351 // void bli_cscals( scomplex* a, scomplex* y ); 00352 #define bli_cscals( a, y ) \ 00353 { \ 00354 float tempr = (a)->real * (y)->real - (a)->imag * (y)->imag; \ 00355 float tempi = (a)->imag * (y)->real + (a)->real * (y)->imag; \ 00356 (y)->real = tempr; \ 00357 (y)->imag = tempi; \ 00358 } 00359 00360 // void bli_zdscals( double* a, dcomplex* y ); 00361 #define bli_zdscals( a, y ) \ 00362 { \ 00363 (y)->real = *(a) * (y)->real; \ 00364 (y)->imag = *(a) * (y)->imag; \ 00365 } 00366 00367 // void bli_zscals( dcomplex* a, dcomplex* y ); 00368 #define bli_zscals( a, y ) \ 00369 { \ 00370 double tempr = (a)->real * (y)->real - (a)->imag * (y)->imag; \ 00371 double tempi = (a)->imag * (y)->real + (a)->real * (y)->imag; \ 00372 (y)->real = tempr; \ 00373 (y)->imag = tempi; \ 00374 } 00375 00376 // --- mult3 --- 00377 00378 // void bli_smult3( float* x, float* y, float* a ); 00379 #define bli_smult3( x, y, a ) \ 00380 *(a) = *(x) * *(y); 00381 00382 // void bli_dmult3( double* x, double* y, double* a ); 00383 #define bli_dmult3( x, y, a ) \ 00384 *(a) = *(x) * *(y); 00385 00386 // void bli_cmult3( scomplex* x, scomplex* y, scomplex* a ); 00387 #define bli_cmult3( x, y, a ) \ 00388 { \ 00389 float tempr = (x)->real * (y)->real - (x)->imag * (y)->imag; \ 00390 float tempi = (x)->imag * (y)->real + (x)->real * (y)->imag; \ 00391 (a)->real = tempr; \ 00392 (a)->imag = tempi; \ 00393 } 00394 00395 // void bli_zmult3( dcomplex* x, dcomplex* y, dcomplex* a ); 00396 #define bli_zmult3( x, y, a ) \ 00397 { \ 00398 double tempr = (x)->real * (y)->real - (x)->imag * (y)->imag; \ 00399 double tempi = (x)->imag * (y)->real + (x)->real * (y)->imag; \ 00400 (a)->real = tempr; \ 00401 (a)->imag = tempi; \ 00402 } 00403 00404 // --- mult4 --- 00405 00406 // void bli_smult4( float* alpha, float* x, float* y1, float* y2 ); 00407 #define bli_smult4( alpha, x, y1, y2 ) \ 00408 *(y2) = *(y1) + *(alpha) * *(x); 00409 00410 // void bli_dmult4( double* alpha, double* x, double* y1, double* y2 ); 00411 #define bli_dmult4( alpha, x, y1, y2 ) \ 00412 *(y2) = *(y1) + *(alpha) * *(x); 00413 00414 // void bli_cmult4( scomplex* alpha, scomplex* x, scomplex* y1, scomplex* y2 ); 00415 #define bli_cmult4( alpha, x, y1, y2 ) \ 00416 { \ 00417 (y2)->real = (y1)->real + (alpha)->real * (x)->real - (alpha)->imag * (x)->imag; \ 00418 (y2)->imag = (y1)->imag + (alpha)->imag * (x)->real + (alpha)->real * (x)->imag; \ 00419 } 00420 00421 // void bli_zmult4( dcomplex* alpha, dcomplex* x, dcomplex* y1, dcomplex* y2 ); 00422 #define bli_zmult4( alpha, x, y1, y2 ) \ 00423 { \ 00424 (y2)->real = (y1)->real + (alpha)->real * (x)->real - (alpha)->imag * (x)->imag; \ 00425 (y2)->imag = (y1)->imag + (alpha)->imag * (x)->real + (alpha)->real * (x)->imag; \ 00426 } 00427 00428 // --- conjs --- 00429 00430 // void bli_sconjs( float* a ); 00431 #define bli_sconjs( a ) \ 00432 ; 00433 00434 // void bli_dconjs( double* a ); 00435 #define bli_dconjs( a ) \ 00436 ; 00437 00438 // void bli_cconjs( scomplex* a ); 00439 #define bli_cconjs( a ) \ 00440 (a)->imag *= -1.0F; 00441 00442 // void bli_zconjs( dcomplex* a ); 00443 #define bli_zconjs( a ) \ 00444 (a)->imag *= -1.0; 00445 00446 // --- copyconj --- 00447 00448 // void bli_scopyconj( float* x, float* y ); 00449 #define bli_scopyconj( x, y ) \ 00450 *(y) = *(x); 00451 00452 // void bli_dcopyconj( double* x, double* y ); 00453 #define bli_dcopyconj( x, y ) \ 00454 *(y) = *(x); 00455 00456 // void bli_ccopyconj( scomplex* x, scomplex* y ); 00457 #define bli_ccopyconj( x, y ) \ 00458 (y)->real = (x)->real; \ 00459 (y)->imag = -1.0F * (x)->imag; 00460 00461 // void bli_zcopyconj( dcomplex* x, dcomplex* y ); 00462 #define bli_zcopyconj( x, y ) \ 00463 (y)->real = (x)->real; \ 00464 (y)->imag = -1.0 * (x)->imag; 00465 00466 // --- eq1 --- 00467 00468 // void bli_seq1( float* alpha ); 00469 #define bli_seq1( alpha ) \ 00470 ( *alpha == 1.0F ) 00471 00472 // void bli_deq1( double* alpha ); 00473 #define bli_deq1( alpha ) \ 00474 ( *alpha == 1.0 ) 00475 00476 // void bli_ceq1( scomplex* alpha ); 00477 #define bli_ceq1( alpha ) \ 00478 ( (alpha)->real == 1.0F && (alpha)->imag == 0.0F ) 00479 00480 // void bli_zeq1( dcomplex* alpha ); 00481 #define bli_zeq1( alpha ) \ 00482 ( (alpha)->real == 1.0 && (alpha)->imag == 0.0 ) 00483 00484 // --- Swapping/toggle macros -------------------------------------------------- 00485 00486 // --- swap_pointers --- 00487 00488 #define bli_sswap_pointers( a, b ) \ 00489 { \ 00490 float* temp = (a); \ 00491 (a) = (b); \ 00492 (b) = temp; \ 00493 } 00494 00495 #define bli_dswap_pointers( a, b ) \ 00496 { \ 00497 double* temp = (a); \ 00498 (a) = (b); \ 00499 (b) = temp; \ 00500 } 00501 00502 #define bli_cswap_pointers( a, b ) \ 00503 { \ 00504 void* temp = (a); \ 00505 (a) = (b); \ 00506 (b) = temp; \ 00507 } 00508 00509 #define bli_zswap_pointers( a, b ) \ 00510 { \ 00511 void* temp = (a); \ 00512 (a) = (b); \ 00513 (b) = temp; \ 00514 } 00515 00516 // --- swap_ints --- 00517 00518 #define bli_swap_ints( a, b ) \ 00519 { \ 00520 int temp = (a); \ 00521 (a) = (b); \ 00522 (b) = temp; \ 00523 } 00524 00525 // --- swap_trans --- 00526 00527 #define bli_swap_trans( a, b ) \ 00528 { \ 00529 trans_t temp = (a); \ 00530 (a) = (b); \ 00531 (b) = temp; \ 00532 } 00533 00534 // --- swap_conj --- 00535 00536 #define bli_swap_conj( a, b ) \ 00537 { \ 00538 conj_t temp = (a); \ 00539 (a) = (b); \ 00540 (b) = temp; \ 00541 } 00542 00543 // --- toggle_side --- 00544 00545 #define bli_toggle_side( side ) \ 00546 { \ 00547 if ( bli_is_left( side ) ) side = BLIS_RIGHT; \ 00548 else side = BLIS_LEFT; \ 00549 } 00550 00551 // --- toggle_uplo --- 00552 00553 #define bli_toggle_uplo( uplo ) \ 00554 { \ 00555 if ( bli_is_lower( uplo ) ) uplo = BLIS_UPPER_TRIANGULAR; \ 00556 else uplo = BLIS_LOWER_TRIANGULAR; \ 00557 } 00558 00559 // --- toggle_trans --- 00560 #define bli_toggle_trans( trans ) \ 00561 { \ 00562 if ( bli_is_notrans( trans ) ) trans = BLIS_TRANSPOSE; \ 00563 else if ( bli_is_trans( trans ) ) trans = BLIS_NO_TRANSPOSE; \ 00564 else if ( bli_is_conjnotrans( trans ) ) trans = BLIS_CONJ_TRANSPOSE; \ 00565 else trans = BLIS_CONJ_NO_TRANSPOSE; \ 00566 } 00567 00568 // --- toggle_conjtrans --- 00569 #define bli_toggle_conjtrans( trans ) \ 00570 { \ 00571 if ( bli_is_notrans( trans ) ) trans = BLIS_CONJ_TRANSPOSE; \ 00572 else trans = BLIS_NO_TRANSPOSE; \ 00573 } 00574 00575 // --- toggle_conj --- 00576 00577 #define bli_toggle_conj( conj ) \ 00578 { \ 00579 if ( bli_is_conj( conj ) ) conj = BLIS_NO_CONJUGATE; \ 00580 else conj = BLIS_CONJUGATE; \ 00581 } 00582 00583 #endif // #ifndef BLIS_MACRO_DEFS_H