libflame  revision_anchor
blis_macro_defs.h
Go to the documentation of this file.
00001 /*
00002    libflame
00003    An object-based infrastructure for developing high-performance
00004    dense linear algebra libraries.
00005 
00006    Copyright (C) 2011, The University of Texas
00007 
00008    libflame is free software; you can redistribute it and/or modify
00009    it under the terms of the GNU Lesser General Public License as
00010    published by the Free Software Foundation; either version 2.1 of
00011    the License, or (at your option) any later version.
00012 
00013    libflame is distributed in the hope that it will be useful, but
00014    WITHOUT ANY WARRANTY; without even the implied warranty of
00015    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
00016    Lesser General Public License for more details.
00017 
00018    You should have received a copy of the GNU Lesser General Public
00019    License along with libflame; if you did not receive a copy, see
00020    http://www.gnu.org/licenses/.
00021 
00022    For more information, please contact us at flame@cs.utexas.edu or
00023    send mail to:
00024 
00025    Field G. Van Zee and/or
00026    Robert A. van de Geijn
00027    The University of Texas at Austin
00028    Department of Computer Sciences
00029    1 University Station C0500
00030    Austin TX 78712
00031 */
00032 
00033 #ifndef BLIS_MACRO_DEFS_H
00034 #define BLIS_MACRO_DEFS_H
00035 
00036 // --- Constants ---------------------------------------------------------------
00037 
00038 #define BLIS_NO_INTRINSICS  0
00039 #define BLIS_SSE_INTRINSICS 3
00040 
00041 // --- boolean ---
00042 
00043 #undef FALSE
00044 #define FALSE 0
00045 
00046 #undef TRUE
00047 #define TRUE 1
00048 
00049 /*
00050 // --- trans ---
00051 
00052 #define BLIS_NO_TRANSPOSE      'n'
00053 #define BLIS_TRANSPOSE         't'
00054 #define BLIS_CONJ_NO_TRANSPOSE 'c'
00055 #define BLIS_CONJ_TRANSPOSE    'h'
00056 
00057 // --- conj ---
00058 
00059 #define BLIS_NO_CONJUGATE      'n'
00060 #define BLIS_CONJUGATE         'c'
00061 
00062 // --- uplo ---
00063 
00064 #define BLIS_LOWER_TRIANGULAR  'l'
00065 #define BLIS_UPPER_TRIANGULAR  'u'
00066 
00067 // --- side ---
00068 
00069 #define BLIS_LEFT              'l'
00070 #define BLIS_RIGHT             'r'
00071 
00072 // --- diag ---
00073 
00074 #define BLIS_NONUNIT_DIAG      'n'
00075 #define BLIS_UNIT_DIAG         'u'
00076 #define BLIS_ZERO_DIAG         'z'
00077 */
00078 
00079 // --- Functional macros -------------------------------------------------------
00080 
00081 // --- Type-agnostic ---
00082 
00083 // --- min ---
00084 
00085 #define bli_min( x, y ) \
00086 ( (x) < (y) ? (x) : (y) )
00087 
00088 // --- max ---
00089 
00090 #define bli_max( x, y ) \
00091 ( (x) > (y) ? (x) : (y) )
00092 
00093 // --- Type-dependent ---
00094 
00095 // --- neg1 ---
00096 
00097 // void bli_sneg1( float* x );
00098 #define bli_sneg1( x ) \
00099 *(x)     *= -1.0F;
00100 
00101 // void bli_dneg1( double* x );
00102 #define bli_dneg1( x ) \
00103 *(x)     *= -1.0;
00104 
00105 // void bli_cneg1( scomplex* x );
00106 #define bli_cneg1( x ) \
00107 (x)->real *= -1.0F; \
00108 (x)->imag *= -1.0F;
00109 
00110 // void bli_zneg1( dcomplex* x );
00111 #define bli_zneg1( x ) \
00112 (x)->real *= -1.0; \
00113 (x)->imag *= -1.0;
00114 
00115 // --- neg2 ---
00116 
00117 // void bli_sneg2( float* x, float* y );
00118 #define bli_sneg2( x, y ) \
00119 *(y)      = -1.0F * *(x);
00120 
00121 // void bli_dneg2( double* x, double* y );
00122 #define bli_dneg2( x, y ) \
00123 *(y)      = -1.0  * *(x);
00124 
00125 // void bli_cneg2( scomplex* x, scomplex* y );
00126 #define bli_cneg2( x, y ) \
00127 (y)->real = -1.0F * (x)->real; \
00128 (y)->imag = -1.0F * (x)->imag;
00129 
00130 // void bli_zneg2( dcomplex* x, dcomplex* y );
00131 #define bli_zneg2( x, y ) \
00132 (y)->real = -1.0  * (x)->real; \
00133 (y)->imag = -1.0  * (x)->imag;
00134 
00135 // --- sqrte ---
00136 
00137 // void bli_ssqrte( float* alpha, int* error );
00138 #define bli_ssqrte( alpha, error ) \
00139 if ( *(alpha)      < 0.0F ) {                            *(error) = FLA_FAILURE; } \
00140 else { *(alpha)      =  ( float ) sqrt( *(alpha)      ); *(error) = FLA_SUCCESS; }
00141 
00142 // void bli_dsqrte( double* alpha, int* error );
00143 #define bli_dsqrte( alpha, error ) \
00144 if ( *(alpha)      < 0.0  ) {                            *(error) = FLA_FAILURE; } \
00145 else { *(alpha)      = ( double ) sqrt( *(alpha)      ); *(error) = FLA_SUCCESS; }
00146 
00147 // void bli_csqrte( scomplex* alpha, int* error );
00148 #define bli_csqrte( alpha, error ) \
00149 if ( (alpha)->real < 0.0F ) \
00150 {                     *(error) = FLA_FAILURE; } \
00151 else { \
00152 (alpha)->real =  ( float ) sqrt( (alpha)->real ); \
00153 (alpha)->imag = 0.0F; *(error) = FLA_SUCCESS; }
00154 
00155 // void bli_zsqrte( dcomplex* alpha, int* error );
00156 #define bli_zsqrte( alpha, error ) \
00157 if ( (alpha)->real < 0.0  ) \
00158 {                     *(error) = FLA_FAILURE; } \
00159 else { \
00160 (alpha)->real = ( double ) sqrt( (alpha)->real ); \
00161 (alpha)->imag = 0.0;  *(error) = FLA_SUCCESS; }
00162 
00163 // --- absval2 ---
00164 
00165 // void bli_sabsval2( float* alpha, float* absval );
00166 #define bli_sabsval2( alpha, absval ) \
00167 *(absval) = ( float ) fabs( ( double ) *(alpha) );
00168 
00169 // void bli_dabsval2( double* alpha, double* absval );
00170 #define bli_dabsval2( alpha, absval ) \
00171 *(absval) = fabs( *(alpha) );
00172 
00173 // void bli_cabsval2( scomplex* alpha, scomplex* absval );
00174 #define bli_cabsval2( alpha, absval ) \
00175 (absval)->real = ( float ) sqrt( ( double ) (alpha)->real * (alpha)->real + \
00176                                             (alpha)->imag * (alpha)->imag ); \
00177 (absval)->imag = 0.0F;
00178 
00179 // void bli_csabsval2( scomplex* alpha, float* absval );
00180 #define bli_csabsval2( alpha, absval ) \
00181 *(absval)      = ( float ) sqrt( ( double ) (alpha)->real * (alpha)->real + \
00182                                             (alpha)->imag * (alpha)->imag ); \
00183 
00184 // void bli_zabsval2( dcomplex* alpha, dcomplex* absval );
00185 #define bli_zabsval2( alpha, absval ) \
00186 (absval)->real = sqrt( (alpha)->real * (alpha)->real + \
00187                        (alpha)->imag * (alpha)->imag ); \
00188 (absval)->imag = 0.0;
00189 
00190 // void bli_zdabsval2( dcomplex* alpha, double* absval );
00191 #define bli_zdabsval2( alpha, absval ) \
00192 *(absval)      = sqrt( (alpha)->real * (alpha)->real + \
00193                        (alpha)->imag * (alpha)->imag ); \
00194 
00195 
00196 // --- absqr ---
00197 
00198 // void bli_sabsqr( float* alpha );
00199 #define bli_sabsqr( alpha ) \
00200 *(alpha) = *(alpha) * *(alpha);
00201 
00202 // void bli_dabsqr( double* alpha );
00203 #define bli_dabsqr( alpha ) \
00204 *(alpha) = *(alpha) * *(alpha);
00205 
00206 // void bli_cabsqr( scomplex* alpha );
00207 #define bli_cabsqr( alpha ) \
00208 (alpha)->real = (alpha)->real * (alpha)->real + (alpha)->imag * (alpha)->imag; \
00209 (alpha)->imag = 0.0F;
00210 
00211 // void bli_zabsqr( dcomplex* alpha );
00212 #define bli_zabsqr( alpha ) \
00213 (alpha)->real = (alpha)->real * (alpha)->real + (alpha)->imag * (alpha)->imag; \
00214 (alpha)->imag = 0.0;
00215 
00216 // --- invscals ---
00217 
00218 // void bli_sinvscals( float* a, float* y );
00219 #define bli_sinvscals( a, y ) \
00220 *(y) = *(y) / *(a);
00221 
00222 // void bli_dinvscals( double* a, double* y );
00223 #define bli_dinvscals( a, y ) \
00224 *(y) = *(y) / *(a);
00225 
00226 // void bli_csinvscals( float* a, scomplex* y );
00227 #define bli_csinvscals( a, y ) \
00228 { \
00229 (y)->real = (y)->real / *(a); \
00230 (y)->imag = (y)->imag / *(a); \
00231 }
00232 
00233 // void bli_cinvscals( scomplex* a, scomplex* y );
00234 #define bli_cinvscals( a, y ) \
00235 { \
00236 float temp  = (a)->real * (a)->real + (a)->imag * (a)->imag; \
00237 float zreal = ( (y)->real * (a)->real + (y)->imag * (a)->imag ) / temp; \
00238 float zimag = ( (y)->imag * (a)->real - (y)->real * (a)->imag ) / temp; \
00239 (y)->real = zreal; \
00240 (y)->imag = zimag; \
00241 }
00242 
00243 // void bli_zdinvscals( double* a, dcomplex* y );
00244 #define bli_zdinvscals( a, y ) \
00245 { \
00246 (y)->real = (y)->real / *(a); \
00247 (y)->imag = (y)->imag / *(a); \
00248 }
00249 
00250 // void bli_zinvscals( dcomplex* a, dcomplex* y );
00251 #define bli_zinvscals( a, y ) \
00252 { \
00253 double temp  = (a)->real * (a)->real + (a)->imag * (a)->imag; \
00254 double zreal = ( (y)->real * (a)->real + (y)->imag * (a)->imag ) / temp; \
00255 double zimag = ( (y)->imag * (a)->real - (y)->real * (a)->imag ) / temp; \
00256 (y)->real = zreal; \
00257 (y)->imag = zimag; \
00258 }
00259 
00260 // --- div3 ---
00261 
00262 // void bli_sdiv3( float* x, float* y, float* a );
00263 #define bli_sdiv3( x, y, a ) \
00264 *(a) = *(x) / *(y);
00265 
00266 // void bli_ddiv3( double* x, double* y, double* a );
00267 #define bli_ddiv3( x, y, a ) \
00268 *(a) = *(x) / *(y);
00269 
00270 // void bli_cdiv3( scomplex* x, scomplex* y, scomplex* a );
00271 #define bli_cdiv3( x, y, a ) \
00272 { \
00273 float temp  = (y)->real * (y)->real + (y)->imag * (y)->imag; \
00274 float areal = ( (x)->real * (y)->real + (x)->imag * (y)->imag ) / temp; \
00275 float aimag = ( (x)->imag * (y)->real - (x)->real * (y)->imag ) / temp; \
00276 (a)->real = areal; \
00277 (a)->imag = aimag; \
00278 }
00279 
00280 // void bli_zdiv3( dcomplex* x, dcomplex* y, dcomplex* a );
00281 #define bli_zdiv3( x, y, a ) \
00282 { \
00283 double temp  = (y)->real * (y)->real + (y)->imag * (y)->imag; \
00284 double areal = ( (x)->real * (y)->real + (x)->imag * (y)->imag ) / temp; \
00285 double aimag = ( (x)->imag * (y)->real - (x)->real * (y)->imag ) / temp; \
00286 (a)->real = areal; \
00287 (a)->imag = aimag; \
00288 }
00289 
00290 // --- add3 ---
00291 
00292 // void bli_sadd3( float* x, float* y, float* a );
00293 #define bli_sadd3( x, y, a ) \
00294 *(a) = *(x) + *(y);
00295 
00296 // void bli_dadd3( double* x, double* y, double* a );
00297 #define bli_dadd3( x, y, a ) \
00298 *(a) = *(x) + *(y);
00299 
00300 // void bli_cadd3( scomplex* x, scomplex* y, scomplex* a );
00301 #define bli_cadd3( x, y, a ) \
00302 { \
00303 (a)->real = (x)->real + (y)->real; \
00304 (a)->imag = (x)->imag + (y)->imag; \
00305 }
00306 
00307 // void bli_zadd3( dcomplex* x, dcomplex* y, dcomplex* a );
00308 #define bli_zadd3( x, y, a ) \
00309 { \
00310 (a)->real = (x)->real + (y)->real; \
00311 (a)->imag = (x)->imag + (y)->imag; \
00312 }
00313 
00314 // --- copys ---
00315 
00316 // void bli_scopys( conj_t conj, float* x, float* y );
00317 #define bli_scopys( conj, x, y ) \
00318 *(y) = *(x);
00319 
00320 // void bli_dcopys( conj_t conj, double* x, double* y );
00321 #define bli_dcopys( conj, x, y ) \
00322 *(y) = *(x);
00323 
00324 // void bli_ccopys( conj_t conj, scomplex* x, scomplex* y );
00325 #define bli_ccopys( conj, x, y ) \
00326 *(y) = *(x); \
00327 if ( bli_is_conj( conj ) ) (y)->imag *= -1.0F;
00328 
00329 // void bli_zcopys( conj_t conj, dcomplex* x, dcomplex* y );
00330 #define bli_zcopys( conj, x, y ) \
00331 *(y) = *(x); \
00332 if ( bli_is_conj( conj ) ) (y)->imag *= -1.0;
00333 
00334 // --- scals ---
00335 
00336 // void bli_sscals( float* a, float* y );
00337 #define bli_sscals( a, y ) \
00338 *(y) = *(a) * *(y);
00339 
00340 // void bli_dscals( double* a, double* y );
00341 #define bli_dscals( a, y ) \
00342 *(y) = *(a) * *(y);
00343 
00344 // void bli_csscals( float* a, scomplex* y );
00345 #define bli_csscals( a, y ) \
00346 { \
00347 (y)->real = *(a) * (y)->real; \
00348 (y)->imag = *(a) * (y)->imag; \
00349 }
00350 
00351 // void bli_cscals( scomplex* a, scomplex* y );
00352 #define bli_cscals( a, y ) \
00353 { \
00354 float tempr = (a)->real * (y)->real - (a)->imag * (y)->imag; \
00355 float tempi = (a)->imag * (y)->real + (a)->real * (y)->imag; \
00356 (y)->real = tempr; \
00357 (y)->imag = tempi; \
00358 }
00359 
00360 // void bli_zdscals( double* a, dcomplex* y );
00361 #define bli_zdscals( a, y ) \
00362 { \
00363 (y)->real = *(a) * (y)->real; \
00364 (y)->imag = *(a) * (y)->imag; \
00365 }
00366 
00367 // void bli_zscals( dcomplex* a, dcomplex* y );
00368 #define bli_zscals( a, y ) \
00369 { \
00370 double tempr = (a)->real * (y)->real - (a)->imag * (y)->imag; \
00371 double tempi = (a)->imag * (y)->real + (a)->real * (y)->imag; \
00372 (y)->real = tempr; \
00373 (y)->imag = tempi; \
00374 }
00375 
00376 // --- mult3 ---
00377 
00378 // void bli_smult3( float* x, float* y, float* a );
00379 #define bli_smult3( x, y, a ) \
00380 *(a) = *(x) * *(y);
00381 
00382 // void bli_dmult3( double* x, double* y, double* a );
00383 #define bli_dmult3( x, y, a ) \
00384 *(a) = *(x) * *(y);
00385 
00386 // void bli_cmult3( scomplex* x, scomplex* y, scomplex* a );
00387 #define bli_cmult3( x, y, a ) \
00388 { \
00389 float tempr = (x)->real * (y)->real - (x)->imag * (y)->imag; \
00390 float tempi = (x)->imag * (y)->real + (x)->real * (y)->imag; \
00391 (a)->real = tempr; \
00392 (a)->imag = tempi; \
00393 }
00394 
00395 // void bli_zmult3( dcomplex* x, dcomplex* y, dcomplex* a );
00396 #define bli_zmult3( x, y, a ) \
00397 { \
00398 double tempr = (x)->real * (y)->real - (x)->imag * (y)->imag; \
00399 double tempi = (x)->imag * (y)->real + (x)->real * (y)->imag; \
00400 (a)->real = tempr; \
00401 (a)->imag = tempi; \
00402 }
00403 
00404 // --- mult4 ---
00405 
00406 // void bli_smult4( float* alpha, float* x, float* y1, float* y2 );
00407 #define bli_smult4( alpha, x, y1, y2 ) \
00408 *(y2) = *(y1) + *(alpha) * *(x);
00409 
00410 // void bli_dmult4( double* alpha, double* x, double* y1, double* y2 );
00411 #define bli_dmult4( alpha, x, y1, y2 ) \
00412 *(y2) = *(y1) + *(alpha) * *(x);
00413 
00414 // void bli_cmult4( scomplex* alpha, scomplex* x, scomplex* y1, scomplex* y2 );
00415 #define bli_cmult4( alpha, x, y1, y2 ) \
00416 { \
00417 (y2)->real = (y1)->real + (alpha)->real * (x)->real - (alpha)->imag * (x)->imag; \
00418 (y2)->imag = (y1)->imag + (alpha)->imag * (x)->real + (alpha)->real * (x)->imag; \
00419 }
00420 
00421 // void bli_zmult4( dcomplex* alpha, dcomplex* x, dcomplex* y1, dcomplex* y2 );
00422 #define bli_zmult4( alpha, x, y1, y2 ) \
00423 { \
00424 (y2)->real = (y1)->real + (alpha)->real * (x)->real - (alpha)->imag * (x)->imag; \
00425 (y2)->imag = (y1)->imag + (alpha)->imag * (x)->real + (alpha)->real * (x)->imag; \
00426 }
00427 
00428 // --- conjs ---
00429 
00430 // void bli_sconjs( float* a );
00431 #define bli_sconjs( a ) \
00432 ;
00433 
00434 // void bli_dconjs( double* a );
00435 #define bli_dconjs( a ) \
00436 ;
00437 
00438 // void bli_cconjs( scomplex* a );
00439 #define bli_cconjs( a ) \
00440 (a)->imag *= -1.0F;
00441 
00442 // void bli_zconjs( dcomplex* a );
00443 #define bli_zconjs( a ) \
00444 (a)->imag *= -1.0;
00445 
00446 // --- copyconj ---
00447 
00448 // void bli_scopyconj( float* x, float* y );
00449 #define bli_scopyconj( x, y ) \
00450 *(y) = *(x);
00451 
00452 // void bli_dcopyconj( double* x, double* y );
00453 #define bli_dcopyconj( x, y ) \
00454 *(y) = *(x);
00455 
00456 // void bli_ccopyconj( scomplex* x, scomplex* y );
00457 #define bli_ccopyconj( x, y ) \
00458 (y)->real =         (x)->real; \
00459 (y)->imag = -1.0F * (x)->imag;
00460 
00461 // void bli_zcopyconj( dcomplex* x, dcomplex* y );
00462 #define bli_zcopyconj( x, y ) \
00463 (y)->real =         (x)->real; \
00464 (y)->imag = -1.0  * (x)->imag;
00465 
00466 // --- eq1 ---
00467 
00468 // void bli_seq1( float* alpha );
00469 #define bli_seq1( alpha ) \
00470   ( *alpha == 1.0F )
00471 
00472 // void bli_deq1( double* alpha );
00473 #define bli_deq1( alpha ) \
00474   ( *alpha == 1.0 )
00475 
00476 // void bli_ceq1( scomplex* alpha );
00477 #define bli_ceq1( alpha ) \
00478   ( (alpha)->real == 1.0F && (alpha)->imag == 0.0F )
00479 
00480 // void bli_zeq1( dcomplex* alpha );
00481 #define bli_zeq1( alpha ) \
00482   ( (alpha)->real == 1.0 && (alpha)->imag == 0.0 )
00483 
00484 // --- Swapping/toggle macros --------------------------------------------------
00485 
00486 // --- swap_pointers ---
00487 
00488 #define bli_sswap_pointers( a, b ) \
00489 { \
00490 float* temp = (a); \
00491 (a) = (b); \
00492 (b) = temp; \
00493 }
00494 
00495 #define bli_dswap_pointers( a, b ) \
00496 { \
00497 double* temp = (a); \
00498 (a) = (b); \
00499 (b) = temp; \
00500 }
00501 
00502 #define bli_cswap_pointers( a, b ) \
00503 { \
00504 void* temp = (a); \
00505 (a) = (b); \
00506 (b) = temp; \
00507 }
00508 
00509 #define bli_zswap_pointers( a, b ) \
00510 { \
00511 void* temp = (a); \
00512 (a) = (b); \
00513 (b) = temp; \
00514 }
00515 
00516 // --- swap_ints ---
00517 
00518 #define bli_swap_ints( a, b ) \
00519 { \
00520 int temp = (a); \
00521 (a) = (b); \
00522 (b) = temp; \
00523 }
00524 
00525 // --- swap_trans ---
00526 
00527 #define bli_swap_trans( a, b ) \
00528 { \
00529 trans_t temp = (a); \
00530 (a) = (b); \
00531 (b) = temp; \
00532 }
00533 
00534 // --- swap_conj ---
00535 
00536 #define bli_swap_conj( a, b ) \
00537 { \
00538 conj_t temp = (a); \
00539 (a) = (b); \
00540 (b) = temp; \
00541 }
00542 
00543 // --- toggle_side ---
00544 
00545 #define bli_toggle_side( side ) \
00546 { \
00547 if ( bli_is_left( side ) ) side = BLIS_RIGHT; \
00548 else                       side = BLIS_LEFT; \
00549 }
00550 
00551 // --- toggle_uplo ---
00552 
00553 #define bli_toggle_uplo( uplo ) \
00554 { \
00555 if ( bli_is_lower( uplo ) ) uplo = BLIS_UPPER_TRIANGULAR; \
00556 else                        uplo = BLIS_LOWER_TRIANGULAR; \
00557 }
00558 
00559 // --- toggle_trans ---
00560 #define bli_toggle_trans( trans ) \
00561 { \
00562 if      ( bli_is_notrans( trans ) )     trans = BLIS_TRANSPOSE; \
00563 else if ( bli_is_trans( trans ) )       trans = BLIS_NO_TRANSPOSE; \
00564 else if ( bli_is_conjnotrans( trans ) ) trans = BLIS_CONJ_TRANSPOSE; \
00565 else                                    trans = BLIS_CONJ_NO_TRANSPOSE; \
00566 }
00567 
00568 // --- toggle_conjtrans ---
00569 #define bli_toggle_conjtrans( trans ) \
00570 { \
00571 if      ( bli_is_notrans( trans ) )     trans = BLIS_CONJ_TRANSPOSE; \
00572 else                                    trans = BLIS_NO_TRANSPOSE; \
00573 }
00574 
00575 // --- toggle_conj ---
00576 
00577 #define bli_toggle_conj( conj ) \
00578 { \
00579 if ( bli_is_conj( conj ) ) conj = BLIS_NO_CONJUGATE; \
00580 else                       conj = BLIS_CONJUGATE; \
00581 }
00582 
00583 #endif // #ifndef BLIS_MACRO_DEFS_H