libflame revision_anchor
blis_macro_defs.h
Go to the documentation of this file.
00001 /*
00002    libflame
00003    An object-based infrastructure for developing high-performance
00004    dense linear algebra libraries.
00005 
00006    Copyright (C) 2011, The University of Texas
00007 
00008    libflame is free software; you can redistribute it and/or modify
00009    it under the terms of the GNU Lesser General Public License as
00010    published by the Free Software Foundation; either version 2.1 of
00011    the License, or (at your option) any later version.
00012 
00013    libflame is distributed in the hope that it will be useful, but
00014    WITHOUT ANY WARRANTY; without even the implied warranty of
00015    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
00016    Lesser General Public License for more details.
00017 
00018    You should have received a copy of the GNU Lesser General Public
00019    License along with libflame; if you did not receive a copy, see
00020    http://www.gnu.org/licenses/.
00021 
00022    For more information, please contact us at flame@cs.utexas.edu or
00023    send mail to:
00024 
00025    Field G. Van Zee and/or
00026    Robert A. van de Geijn
00027    The University of Texas at Austin
00028    Department of Computer Sciences
00029    1 University Station C0500
00030    Austin TX 78712
00031 */
00032 
00033 #ifndef BLIS_MACRO_DEFS_H
00034 #define BLIS_MACRO_DEFS_H
00035 
00036 // --- Constants ---------------------------------------------------------------
00037 
00038 // --- boolean ---
00039 
00040 #undef FALSE
00041 #define FALSE 0
00042 
00043 #undef TRUE
00044 #define TRUE 1
00045 
00046 // --- trans ---
00047 
00048 #define BLIS_NO_TRANSPOSE      'n'
00049 #define BLIS_TRANSPOSE         't'
00050 #define BLIS_CONJ_NO_TRANSPOSE 'c'
00051 #define BLIS_CONJ_TRANSPOSE    'h'
00052 
00053 // --- conj ---
00054 
00055 #define BLIS_NO_CONJUGATE      'n'
00056 #define BLIS_CONJUGATE         'c'
00057 
00058 // --- uplo ---
00059 
00060 #define BLIS_LOWER_TRIANGULAR  'l'
00061 #define BLIS_UPPER_TRIANGULAR  'u'
00062 
00063 // --- side ---
00064 
00065 #define BLIS_LEFT              'l'
00066 #define BLIS_RIGHT             'r'
00067 
00068 // --- diag ---
00069 
00070 #define BLIS_NONUNIT_DIAG      'n'
00071 #define BLIS_UNIT_DIAG         'u'
00072 #define BLIS_ZERO_DIAG         'z'
00073 
00074 // --- Functional macros -------------------------------------------------------
00075 
00076 // --- Type agnostic ---
00077 
00078 // --- min ---
00079 
00080 #define bli_min( x, y ) \
00081 ( (x) < (y) ? (x) : (y) )
00082 
00083 // --- max ---
00084 
00085 #define bli_max( x, y ) \
00086 ( (x) > (y) ? (x) : (y) )
00087 
00088 // --- Type dependent ---
00089 
00090 // --- neg1 ---
00091 
00092 // void bli_sneg1( float* x );
00093 #define bli_sneg1( x ) \
00094 *(x)     *= -1.0F;
00095 
00096 // void bli_dneg1( double* x );
00097 #define bli_dneg1( x ) \
00098 *(x)     *= -1.0;
00099 
00100 // void bli_cneg1( scomplex* x );
00101 #define bli_cneg1( x ) \
00102 (x)->real *= -1.0F; \
00103 (x)->imag *= -1.0F;
00104 
00105 // void bli_zneg1( dcomplex* x );
00106 #define bli_zneg1( x ) \
00107 (x)->real *= -1.0; \
00108 (x)->imag *= -1.0;
00109 
00110 // --- neg2 ---
00111 
00112 // void bli_sneg2( float* x, float* y );
00113 #define bli_sneg2( x, y ) \
00114 *(y)      = -1.0F * *(x);
00115 
00116 // void bli_dneg2( double* x, double* y );
00117 #define bli_dneg2( x, y ) \
00118 *(y)      = -1.0  * *(x);
00119 
00120 // void bli_cneg2( scomplex* x, scomplex* y );
00121 #define bli_cneg2( x, y ) \
00122 (y)->real = -1.0F * (x)->real; \
00123 (y)->imag = -1.0F * (x)->imag;
00124 
00125 // void bli_zneg2( dcomplex* x, dcomplex* y );
00126 #define bli_zneg2( x, y ) \
00127 (y)->real = -1.0  * (x)->real; \
00128 (y)->imag = -1.0  * (x)->imag;
00129 
00130 // --- sqrte ---
00131 
00132 // void bli_ssqrte( float* alpha, int* error );
00133 #define bli_ssqrte( alpha, error ) \
00134 if ( *(alpha)      < 0.0F ) {                            *(error) = FLA_FAILURE; } \
00135 else { *(alpha)      =  ( float ) sqrt( *(alpha)      ); *(error) = FLA_SUCCESS; }
00136 
00137 // void bli_dsqrte( double* alpha, int* error );
00138 #define bli_dsqrte( alpha, error ) \
00139 if ( *(alpha)      < 0.0  ) {                            *(error) = FLA_FAILURE; } \
00140 else { *(alpha)      = ( double ) sqrt( *(alpha)      ); *(error) = FLA_SUCCESS; }
00141 
00142 // void bli_csqrte( scomplex* alpha, int* error );
00143 #define bli_csqrte( alpha, error ) \
00144 if ( (alpha)->real < 0.0F ) \
00145 {                     *(error) = FLA_FAILURE; } \
00146 else { \
00147 (alpha)->real =  ( float ) sqrt( (alpha)->real ); \
00148 (alpha)->imag = 0.0F; *(error) = FLA_SUCCESS; }
00149 
00150 // void bli_zsqrte( dcomplex* alpha, int* error );
00151 #define bli_zsqrte( alpha, error ) \
00152 if ( (alpha)->real < 0.0  ) \
00153 {                     *(error) = FLA_FAILURE; } \
00154 else { \
00155 (alpha)->real = ( double ) sqrt( (alpha)->real ); \
00156 (alpha)->imag = 0.0;  *(error) = FLA_SUCCESS; }
00157 
00158 // --- absval2 ---
00159 
00160 // void bli_sabsval2( float* alpha, float* sqrtalpha );
00161 #define bli_sabsval2( alpha, sqrtalpha ) \
00162 *(sqrtalpha) = ( float ) fabs( ( double ) *(alpha) );
00163 
00164 // void bli_dabsval2( double* alpha, double* sqrtalpha );
00165 #define bli_dabsval2( alpha, sqrtalpha ) \
00166 *(sqrtalpha) = fabs( *(alpha) );
00167 
00168 // void bli_cabsval2( scomplex* alpha, scomplex* sqrtalpha );
00169 #define bli_cabsval2( alpha, sqrtalpha ) \
00170 (sqrtalpha)->real = ( float ) sqrt( ( double ) (alpha)->real * (alpha)->real + \
00171                                                (alpha)->imag * (alpha)->imag ); \
00172 (sqrtalpha)->imag = 0.0F;
00173 
00174 // void bli_zabsval2( dcomplex* alpha, dcomplex* sqrtalpha );
00175 #define bli_zabsval2( alpha, sqrtalpha ) \
00176 (sqrtalpha)->real = sqrt( (alpha)->real * (alpha)->real + \
00177                           (alpha)->imag * (alpha)->imag ); \
00178 (sqrtalpha)->imag = 0.0;
00179 
00180 // --- absqr ---
00181 
00182 // void bli_sabsqr( float* alpha );
00183 #define bli_sabsqr( alpha ) \
00184 *(alpha) = *(alpha) * *(alpha);
00185 
00186 // void bli_dabsqr( double* alpha );
00187 #define bli_dabsqr( alpha ) \
00188 *(alpha) = *(alpha) * *(alpha);
00189 
00190 // void bli_cabsqr( scomplex* alpha );
00191 #define bli_cabsqr( alpha ) \
00192 (alpha)->real = (alpha)->real * (alpha)->real + (alpha)->imag * (alpha)->imag; \
00193 (alpha)->imag = 0.0F;
00194 
00195 // void bli_zabsqr( dcomplex* alpha );
00196 #define bli_zabsqr( alpha ) \
00197 (alpha)->real = (alpha)->real * (alpha)->real + (alpha)->imag * (alpha)->imag; \
00198 (alpha)->imag = 0.0;
00199 
00200 // --- invscals ---
00201 
00202 // void bli_sinvscals( float* a, float* y );
00203 #define bli_sinvscals( a, y ) \
00204 *(y) = *(y) / *(a);
00205 
00206 // void bli_dinvscals( double* a, double* y );
00207 #define bli_dinvscals( a, y ) \
00208 *(y) = *(y) / *(a);
00209 
00210 // void bli_cinvscals( scomplex* a, scomplex* y );
00211 #define bli_cinvscals( a, y ) \
00212 { \
00213 float temp  = (a)->real * (a)->real + (a)->imag * (a)->imag; \
00214 float zreal = ( (y)->real * (a)->real + (y)->imag * (a)->imag ) / temp; \
00215 float zimag = ( (y)->imag * (a)->real - (y)->real * (a)->imag ) / temp; \
00216 (y)->real = zreal; \
00217 (y)->imag = zimag; \
00218 }
00219 
00220 // void bli_zinvscals( dcomplex* a, dcomplex* y );
00221 #define bli_zinvscals( a, y ) \
00222 { \
00223 double temp  = (a)->real * (a)->real + (a)->imag * (a)->imag; \
00224 double zreal = ( (y)->real * (a)->real + (y)->imag * (a)->imag ) / temp; \
00225 double zimag = ( (y)->imag * (a)->real - (y)->real * (a)->imag ) / temp; \
00226 (y)->real = zreal; \
00227 (y)->imag = zimag; \
00228 }
00229 
00230 // --- div3 ---
00231 
00232 // void bli_sdiv3( float* x, float* y, float* a );
00233 #define bli_sdiv3( x, y, a ) \
00234 *(a) = *(x) / *(y);
00235 
00236 // void bli_ddiv3( double* x, double* y, double* a );
00237 #define bli_ddiv3( x, y, a ) \
00238 *(a) = *(x) / *(y);
00239 
00240 // void bli_cdiv3( scomplex* x, scomplex* y, scomplex* a );
00241 #define bli_cdiv3( x, y, a ) \
00242 { \
00243 float temp  = (y)->real * (y)->real + (y)->imag * (y)->imag; \
00244 float areal = ( (x)->real * (y)->real + (x)->imag * (y)->imag ) / temp; \
00245 float aimag = ( (x)->imag * (y)->real - (x)->real * (y)->imag ) / temp; \
00246 (a)->real = areal; \
00247 (a)->imag = aimag; \
00248 }
00249 
00250 // void bli_zdiv3( dcomplex* x, dcomplex* y, dcomplex* a );
00251 #define bli_zdiv3( x, y, a ) \
00252 { \
00253 double temp  = (y)->real * (y)->real + (y)->imag * (y)->imag; \
00254 double areal = ( (x)->real * (y)->real + (x)->imag * (y)->imag ) / temp; \
00255 double aimag = ( (x)->imag * (y)->real - (x)->real * (y)->imag ) / temp; \
00256 (a)->real = areal; \
00257 (a)->imag = aimag; \
00258 }
00259 
00260 // --- add3 ---
00261 
00262 // void bli_sadd3( float* x, float* y, float* a );
00263 #define bli_sadd3( x, y, a ) \
00264 *(a) = *(x) + *(y);
00265 
00266 // void bli_dadd3( double* x, double* y, double* a );
00267 #define bli_dadd3( x, y, a ) \
00268 *(a) = *(x) + *(y);
00269 
00270 // void bli_cadd3( scomplex* x, scomplex* y, scomplex* a );
00271 #define bli_cadd3( x, y, a ) \
00272 { \
00273 (a)->real = (x)->real + (y)->real; \
00274 (a)->imag = (x)->imag + (y)->imag; \
00275 }
00276 
00277 // void bli_zadd3( dcomplex* x, dcomplex* y, dcomplex* a );
00278 #define bli_zadd3( x, y, a ) \
00279 { \
00280 (a)->real = (x)->real + (y)->real; \
00281 (a)->imag = (x)->imag + (y)->imag; \
00282 }
00283 
00284 // --- copys ---
00285 
00286 // void bli_scopys( char conj, float* x, float* y );
00287 #define bli_scopys( conj, x, y ) \
00288 *(y) = *(x);
00289 
00290 // void bli_dcopys( char conj, double* x, double* y );
00291 #define bli_dcopys( conj, x, y ) \
00292 *(y) = *(x);
00293 
00294 // void bli_ccopys( char conj, scomplex* x, scomplex* y );
00295 #define bli_ccopys( conj, x, y ) \
00296 *(y) = *(x); \
00297 if ( bli_does_conj( conj ) ) (y)->imag *= -1.0F;
00298 
00299 // void bli_zcopys( char conj, dcomplex* x, dcomplex* y );
00300 #define bli_zcopys( conj, x, y ) \
00301 *(y) = *(x); \
00302 if ( bli_does_conj( conj ) ) (y)->imag *= -1.0;
00303 
00304 // --- scals ---
00305 
00306 // void bli_sscals( float* a, float* y );
00307 #define bli_sscals( a, y ) \
00308 *(y) = *(a) * *(y);
00309 
00310 // void bli_dscals( double* a, double* y );
00311 #define bli_dscals( a, y ) \
00312 *(y) = *(a) * *(y);
00313 
00314 // void bli_cscals( scomplex* a, scomplex* y );
00315 #define bli_cscals( a, y ) \
00316 { \
00317 float tempr = (a)->real * (y)->real - (a)->imag * (y)->imag; \
00318 float tempi = (a)->imag * (y)->real + (a)->real * (y)->imag; \
00319 (y)->real = tempr; \
00320 (y)->imag = tempi; \
00321 }
00322 
00323 // void bli_zscals( dcomplex* a, dcomplex* y );
00324 #define bli_zscals( a, y ) \
00325 { \
00326 double tempr = (a)->real * (y)->real - (a)->imag * (y)->imag; \
00327 double tempi = (a)->imag * (y)->real + (a)->real * (y)->imag; \
00328 (y)->real = tempr; \
00329 (y)->imag = tempi; \
00330 }
00331 
00332 // --- mult3 ---
00333 
00334 // void bli_smult3( float* x, float* y, float* a );
00335 #define bli_smult3( x, y, a ) \
00336 *(a) = *(x) * *(y);
00337 
00338 // void bli_dmult3( double* x, double* y, double* a );
00339 #define bli_dmult3( x, y, a ) \
00340 *(a) = *(x) * *(y);
00341 
00342 // void bli_cmult3( scomplex* x, scomplex* y, scomplex* a );
00343 #define bli_cmult3( x, y, a ) \
00344 { \
00345 float tempr = (x)->real * (y)->real - (x)->imag * (y)->imag; \
00346 float tempi = (x)->imag * (y)->real + (x)->real * (y)->imag; \
00347 (a)->real = tempr; \
00348 (a)->imag = tempi; \
00349 }
00350 
00351 // void bli_zmult3( dcomplex* x, dcomplex* y, dcomplex* a );
00352 #define bli_zmult3( x, y, a ) \
00353 { \
00354 double tempr = (x)->real * (y)->real - (x)->imag * (y)->imag; \
00355 double tempi = (x)->imag * (y)->real + (x)->real * (y)->imag; \
00356 (a)->real = tempr; \
00357 (a)->imag = tempi; \
00358 }
00359 
00360 // --- mult4 ---
00361 
00362 // void bli_smult4( float* alpha, float* x, float* y1, float* y2 );
00363 #define bli_smult4( alpha, x, y1, y2 ) \
00364 *(y2) = *(y1) + *(alpha) * *(x);
00365 
00366 // void bli_dmult4( double* alpha, double* x, double* y1, double* y2 );
00367 #define bli_dmult4( alpha, x, y1, y2 ) \
00368 *(y2) = *(y1) + *(alpha) * *(x);
00369 
00370 // void bli_cmult4( scomplex* alpha, scomplex* x, scomplex* y1, scomplex* y2 );
00371 #define bli_cmult4( alpha, x, y1, y2 ) \
00372 { \
00373 (y2)->real = (y1)->real + (alpha)->real * (x)->real - (alpha)->imag * (x)->imag; \
00374 (y2)->imag = (y1)->imag + (alpha)->imag * (x)->real + (alpha)->real * (x)->imag; \
00375 }
00376 
00377 // void bli_zmult4( dcomplex* alpha, dcomplex* x, dcomplex* y1, dcomplex* y2 );
00378 #define bli_zmult4( alpha, x, y1, y2 ) \
00379 { \
00380 (y2)->real = (y1)->real + (alpha)->real * (x)->real - (alpha)->imag * (x)->imag; \
00381 (y2)->imag = (y1)->imag + (alpha)->imag * (x)->real + (alpha)->real * (x)->imag; \
00382 }
00383 
00384 // --- conjs ---
00385 
00386 // void bli_sconjs( float* a );
00387 #define bli_sconjs( a ) \
00388 ;
00389 
00390 // void bli_dconjs( double* a );
00391 #define bli_dconjs( a ) \
00392 ;
00393 
00394 // void bli_cconjs( scomplex* a );
00395 #define bli_cconjs( a ) \
00396 (a)->imag *= -1.0F;
00397 
00398 // void bli_zconjs( dcomplex* a );
00399 #define bli_zconjs( a ) \
00400 (a)->imag *= -1.0;
00401 
00402 // --- copyconj ---
00403 
00404 // void bli_scopyconj( float* x, float* y );
00405 #define bli_scopyconj( x, y ) \
00406 *(y) = *(x);
00407 
00408 // void bli_dcopyconj( double* x, double* y );
00409 #define bli_dcopyconj( x, y ) \
00410 *(y) = *(x);
00411 
00412 // void bli_ccopyconj( scomplex* x, scomplex* y );
00413 #define bli_ccopyconj( x, y ) \
00414 (y)->real =         (x)->real; \
00415 (y)->imag = -1.0F * (x)->imag;
00416 
00417 // void bli_zcopyconj( dcomplex* x, dcomplex* y );
00418 #define bli_zcopyconj( x, y ) \
00419 (y)->real =         (x)->real; \
00420 (y)->imag = -1.0  * (x)->imag;
00421 
00422 // --- eq1 ---
00423 
00424 // void bli_seq1( float* alpha );
00425 #define bli_seq1( alpha ) \
00426   ( *alpha == 1.0F )
00427 
00428 // void bli_deq1( double* alpha );
00429 #define bli_deq1( alpha ) \
00430   ( *alpha == 1.0 )
00431 
00432 // void bli_ceq1( scomplex* alpha );
00433 #define bli_ceq1( alpha ) \
00434   ( (alpha)->real == 1.0F && (alpha)->imag == 0.0F )
00435 
00436 // void bli_zeq1( dcomplex* alpha );
00437 #define bli_zeq1( alpha ) \
00438   ( (alpha)->real == 1.0 && (alpha)->imag == 0.0 )
00439 
00440 // --- Swapping/toggle macros --------------------------------------------------
00441 
00442 // --- swap_pointers ---
00443 
00444 #define bli_sswap_pointers( a, b ) \
00445 { \
00446 float* temp = (a); \
00447 (a) = (b); \
00448 (b) = temp; \
00449 }
00450 
00451 #define bli_dswap_pointers( a, b ) \
00452 { \
00453 double* temp = (a); \
00454 (a) = (b); \
00455 (b) = temp; \
00456 }
00457 
00458 #define bli_cswap_pointers( a, b ) \
00459 { \
00460 scomplex* temp = (a); \
00461 (a) = (b); \
00462 (b) = temp; \
00463 }
00464 
00465 #define bli_zswap_pointers( a, b ) \
00466 { \
00467 dcomplex* temp = (a); \
00468 (a) = (b); \
00469 (b) = temp; \
00470 }
00471 
00472 // --- swap_ints ---
00473 
00474 #define bli_swap_ints( a, b ) \
00475 { \
00476 int temp = (a); \
00477 (a) = (b); \
00478 (b) = temp; \
00479 }
00480 
00481 // --- swap_chars ---
00482 
00483 #define bli_swap_chars( a, b ) \
00484 { \
00485 char temp = (a); \
00486 (a) = (b); \
00487 (b) = temp; \
00488 }
00489 
00490 // --- toggle_side ---
00491 
00492 #define bli_toggle_side( side ) \
00493 { \
00494 if ( bli_is_left( side ) ) side = BLIS_RIGHT; \
00495 else                       side = BLIS_LEFT; \
00496 }
00497 
00498 // --- toggle_uplo ---
00499 
00500 #define bli_toggle_uplo( uplo ) \
00501 { \
00502 if ( bli_is_lower( uplo ) ) uplo = BLIS_UPPER_TRIANGULAR; \
00503 else                        uplo = BLIS_LOWER_TRIANGULAR; \
00504 }
00505 
00506 // --- toggle_trans ---
00507 #define bli_toggle_trans( trans ) \
00508 { \
00509 if      ( bli_is_notrans( trans ) )     trans = BLIS_TRANSPOSE; \
00510 else if ( bli_is_trans( trans ) )       trans = BLIS_NO_TRANSPOSE; \
00511 else if ( bli_is_conjnotrans( trans ) ) trans = BLIS_CONJ_TRANSPOSE; \
00512 else                                    trans = BLIS_CONJ_NO_TRANSPOSE; \
00513 }
00514 
00515 // --- toggle_conjtrans ---
00516 #define bli_toggle_conjtrans( trans ) \
00517 { \
00518 if      ( bli_is_notrans( trans ) )     trans = BLIS_CONJ_TRANSPOSE; \
00519 else                                    trans = BLIS_NO_TRANSPOSE; \
00520 }
00521 
00522 // --- toggle_conj ---
00523 
00524 #define bli_toggle_conj( conj ) \
00525 { \
00526 if ( bli_is_conj( conj ) ) conj = BLIS_NO_CONJUGATE; \
00527 else                       conj = BLIS_CONJUGATE; \
00528 }
00529 
00530 #endif // #ifndef BLIS_MACRO_DEFS_H