Go to the documentation of this file.
11 #ifndef BLIS1_MACRO_DEFS_H
12 #define BLIS1_MACRO_DEFS_H
16 #define BLIS1_NO_INTRINSICS 0
17 #define BLIS1_SSE_INTRINSICS 3
63 #define bl1_min( a, b ) ( (a) < (b) ? (a) : (b) )
64 #define bl1_max( a, b ) ( (a) > (b) ? (a) : (b) )
65 #define bl1_abs( a ) ( (a) <= 0 ? -(a) : (a) )
69 #define bl1_fmin( a, b ) bl1_min( a, b )
70 #define bl1_fmax( a, b ) bl1_max( a, b )
71 #define bl1_fabs( a ) ( (a) <= 0.0 ? -(a) : (a) )
74 #define bl1_fminabs( a, b ) \
76 bl1_fmin( bl1_fabs( a ), \
79 #define bl1_fmaxabs( a, b ) \
81 bl1_fmax( bl1_fabs( a ), \
89 #define bl1_sneg1( x ) \
93 #define bl1_dneg1( x ) \
97 #define bl1_cneg1( x ) \
102 #define bl1_zneg1( x ) \
109 #define bl1_sneg2( x, y ) \
113 #define bl1_dneg2( x, y ) \
117 #define bl1_cneg2( x, y ) \
118 (y)->real = -1.0F * (x)->real; \
119 (y)->imag = -1.0F * (x)->imag;
122 #define bl1_zneg2( x, y ) \
123 (y)->real = -1.0 * (x)->real; \
124 (y)->imag = -1.0 * (x)->imag;
129 #define bl1_ssqrte( alpha, error ) \
130 if ( *(alpha) <= 0.0F || isnan( *(alpha) ) ) { *(error) = FLA_FAILURE; } \
131 else { *(alpha) = ( float ) sqrt( *(alpha) ); *(error) = FLA_SUCCESS; }
134 #define bl1_dsqrte( alpha, error ) \
135 if ( *(alpha) <= 0.0 || isnan( *(alpha) ) ) { *(error) = FLA_FAILURE; } \
136 else { *(alpha) = ( double ) sqrt( *(alpha) ); *(error) = FLA_SUCCESS; }
139 #define bl1_csqrte( alpha, error ) \
140 if ( (alpha)->real <= 0.0F || isnan( (alpha)->real) ) \
141 { *(error) = FLA_FAILURE; } \
143 (alpha)->real = ( float ) sqrt( (alpha)->real ); \
144 (alpha)->imag = 0.0F; *(error) = FLA_SUCCESS; }
147 #define bl1_zsqrte( alpha, error ) \
148 if ( (alpha)->real <= 0.0 || isnan( (alpha)->real) ) \
149 { *(error) = FLA_FAILURE; } \
151 (alpha)->real = ( double ) sqrt( (alpha)->real ); \
152 (alpha)->imag = 0.0; *(error) = FLA_SUCCESS; }
157 #define bl1_sabsval2( alpha, absval ) \
158 *(absval) = ( float ) fabs( ( double ) *(alpha) );
161 #define bl1_dabsval2( alpha, absval ) \
162 *(absval) = fabs( *(alpha) );
165 #define bl1_cabsval2( x, a ) \
167 float s = bl1_fmaxabs( (x)->real, (x)->imag ); \
168 float mag = sqrtf( s ) * \
169 sqrtf( ( (x)->real / s ) * (x)->real + \
170 ( (x)->imag / s ) * (x)->imag ); \
176 #define bl1_csabsval2( x, a ) \
178 float s = bl1_fmaxabs( (x)->real, (x)->imag ); \
179 float mag = sqrtf( s ) * \
180 sqrtf( ( (x)->real / s ) * (x)->real + \
181 ( (x)->imag / s ) * (x)->imag ); \
186 #define bl1_zabsval2( x, a ) \
188 double s = bl1_fmaxabs( (x)->real, (x)->imag ); \
189 double mag = sqrt( s ) * \
190 sqrt( ( (x)->real / s ) * (x)->real + \
191 ( (x)->imag / s ) * (x)->imag ); \
197 #define bl1_zdabsval2( x, a ) \
199 double s = bl1_fmaxabs( (x)->real, (x)->imag ); \
200 double mag = sqrt( s ) * \
201 sqrt( ( (x)->real / s ) * (x)->real + \
202 ( (x)->imag / s ) * (x)->imag ); \
210 #define bl1_sabsqr( alpha ) \
211 *(alpha) = *(alpha) * *(alpha);
214 #define bl1_dabsqr( alpha ) \
215 *(alpha) = *(alpha) * *(alpha);
218 #define bl1_cabsqr( alpha ) \
219 (alpha)->real = (alpha)->real * (alpha)->real + (alpha)->imag * (alpha)->imag; \
220 (alpha)->imag = 0.0F;
223 #define bl1_zabsqr( alpha ) \
224 (alpha)->real = (alpha)->real * (alpha)->real + (alpha)->imag * (alpha)->imag; \
230 #define bl1_sinvscals( a, y ) \
234 #define bl1_dinvscals( a, y ) \
238 #define bl1_csinvscals( a, y ) \
240 (y)->real = (y)->real / *(a); \
241 (y)->imag = (y)->imag / *(a); \
245 #define bl1_cinvscals( a, y ) \
247 float s = bl1_fmaxabs( (a)->real, (a)->imag ); \
248 float ar_s = (a)->real / s; \
249 float ai_s = (a)->imag / s; \
250 float yrt = (y)->real; \
251 float temp = ( ar_s * (a)->real + ai_s * (a)->imag ); \
252 (y)->real = ( (yrt) * ar_s + (y)->imag * ai_s ) / temp; \
253 (y)->imag = ( (y)->imag * ar_s - (yrt) * ai_s ) / temp; \
257 #define bl1_zdinvscals( a, y ) \
259 (y)->real = (y)->real / *(a); \
260 (y)->imag = (y)->imag / *(a); \
264 #define bl1_zinvscals( a, y ) \
266 double s = bl1_fmaxabs( (a)->real, (a)->imag ); \
267 double ar_s = (a)->real / s; \
268 double ai_s = (a)->imag / s; \
269 double yrt = (y)->real; \
270 double temp = ( ar_s * (a)->real + ai_s * (a)->imag ); \
271 (y)->real = ( (yrt) * ar_s + (y)->imag * ai_s ) / temp; \
272 (y)->imag = ( (y)->imag * ar_s - (yrt) * ai_s ) / temp; \
278 #define bl1_sdiv3( x, y, a ) \
282 #define bl1_ddiv3( x, y, a ) \
287 #define bl1_cdiv3( x, y, a ) \
290 bl1_cinvscals( y, a ); \
294 #define bl1_zdiv3( x, y, a ) \
297 bl1_zinvscals( y, a ); \
303 #define bl1_sadd3( x, y, a ) \
307 #define bl1_dadd3( x, y, a ) \
311 #define bl1_cadd3( x, y, a ) \
313 (a)->real = (x)->real + (y)->real; \
314 (a)->imag = (x)->imag + (y)->imag; \
318 #define bl1_zadd3( x, y, a ) \
320 (a)->real = (x)->real + (y)->real; \
321 (a)->imag = (x)->imag + (y)->imag; \
327 #define bl1_scopys( conj, x, y ) \
331 #define bl1_dcopys( conj, x, y ) \
335 #define bl1_ccopys( conj, x, y ) \
337 if ( bl1_is_conj( conj ) ) (y)->imag *= -1.0F;
340 #define bl1_zcopys( conj, x, y ) \
342 if ( bl1_is_conj( conj ) ) (y)->imag *= -1.0;
347 #define bl1_sscals( a, y ) \
351 #define bl1_dscals( a, y ) \
355 #define bl1_csscals( a, y ) \
357 (y)->real = *(a) * (y)->real; \
358 (y)->imag = *(a) * (y)->imag; \
362 #define bl1_cscals( a, y ) \
364 float tempr = (a)->real * (y)->real - (a)->imag * (y)->imag; \
365 float tempi = (a)->imag * (y)->real + (a)->real * (y)->imag; \
371 #define bl1_zdscals( a, y ) \
373 (y)->real = *(a) * (y)->real; \
374 (y)->imag = *(a) * (y)->imag; \
378 #define bl1_zscals( a, y ) \
380 double tempr = (a)->real * (y)->real - (a)->imag * (y)->imag; \
381 double tempi = (a)->imag * (y)->real + (a)->real * (y)->imag; \
389 #define bl1_smult3( x, y, a ) \
393 #define bl1_dmult3( x, y, a ) \
397 #define bl1_cmult3( x, y, a ) \
399 float tempr = (x)->real * (y)->real - (x)->imag * (y)->imag; \
400 float tempi = (x)->imag * (y)->real + (x)->real * (y)->imag; \
406 #define bl1_zmult3( x, y, a ) \
408 double tempr = (x)->real * (y)->real - (x)->imag * (y)->imag; \
409 double tempi = (x)->imag * (y)->real + (x)->real * (y)->imag; \
417 #define bl1_smult4( alpha, x, y1, y2 ) \
418 *(y2) = *(y1) + *(alpha) * *(x);
421 #define bl1_dmult4( alpha, x, y1, y2 ) \
422 *(y2) = *(y1) + *(alpha) * *(x);
425 #define bl1_cmult4( alpha, x, y1, y2 ) \
427 (y2)->real = (y1)->real + (alpha)->real * (x)->real - (alpha)->imag * (x)->imag; \
428 (y2)->imag = (y1)->imag + (alpha)->imag * (x)->real + (alpha)->real * (x)->imag; \
432 #define bl1_zmult4( alpha, x, y1, y2 ) \
434 (y2)->real = (y1)->real + (alpha)->real * (x)->real - (alpha)->imag * (x)->imag; \
435 (y2)->imag = (y1)->imag + (alpha)->imag * (x)->real + (alpha)->real * (x)->imag; \
441 #define bl1_sconjs( a ) \
445 #define bl1_dconjs( a ) \
449 #define bl1_cconjs( a ) \
453 #define bl1_zconjs( a ) \
459 #define bl1_scopyconj( x, y ) \
463 #define bl1_dcopyconj( x, y ) \
467 #define bl1_ccopyconj( x, y ) \
468 (y)->real = (x)->real; \
469 (y)->imag = -1.0F * (x)->imag;
472 #define bl1_zcopyconj( x, y ) \
473 (y)->real = (x)->real; \
474 (y)->imag = -1.0 * (x)->imag;
479 #define bl1_seq1( alpha ) \
483 #define bl1_deq1( alpha ) \
487 #define bl1_ceq1( alpha ) \
488 ( (alpha)->real == 1.0F && (alpha)->imag == 0.0F )
491 #define bl1_zeq1( alpha ) \
492 ( (alpha)->real == 1.0 && (alpha)->imag == 0.0 )
498 #define bl1_sswap_pointers( a, b ) \
505 #define bl1_dswap_pointers( a, b ) \
507 double* temp = (a); \
512 #define bl1_cswap_pointers( a, b ) \
519 #define bl1_zswap_pointers( a, b ) \
528 #define bl1_swap_ints( a, b ) \
537 #define bl1_swap_trans( a, b ) \
539 trans1_t temp = (a); \
546 #define bl1_swap_conj( a, b ) \
548 conj1_t temp = (a); \
555 #define bl1_toggle_side( side ) \
557 if ( bl1_is_left( side ) ) side = BLIS1_RIGHT; \
558 else side = BLIS1_LEFT; \
563 #define bl1_toggle_uplo( uplo ) \
565 if ( bl1_is_lower( uplo ) ) uplo = BLIS1_UPPER_TRIANGULAR; \
566 else uplo = BLIS1_LOWER_TRIANGULAR; \
570 #define bl1_toggle_trans( trans ) \
572 if ( bl1_is_notrans( trans ) ) trans = BLIS1_TRANSPOSE; \
573 else if ( bl1_is_trans( trans ) ) trans = BLIS1_NO_TRANSPOSE; \
574 else if ( bl1_is_conjnotrans( trans ) ) trans = BLIS1_CONJ_TRANSPOSE; \
575 else trans = BLIS1_CONJ_NO_TRANSPOSE; \
579 #define bl1_toggle_conjtrans( trans ) \
581 if ( bl1_is_notrans( trans ) ) trans = BLIS1_CONJ_TRANSPOSE; \
582 else trans = BLIS1_NO_TRANSPOSE; \
587 #define bl1_toggle_conj( conj ) \
589 if ( bl1_is_conj( conj ) ) conj = BLIS1_NO_CONJUGATE; \
590 else conj = BLIS1_CONJUGATE; \