libflame revision_anchor
|
Functions | |
void | bli_saxpysmt (char trans, int m, int n, float *alpha0, float *alpha1, float *a, int a_rs, int a_cs, float *beta, float *b, int b_rs, int b_cs) |
void | bli_daxpysmt (char trans, int m, int n, double *alpha0, double *alpha1, double *a, int a_rs, int a_cs, double *beta, double *b, int b_rs, int b_cs) |
void | bli_caxpysmt (char trans, int m, int n, scomplex *alpha0, scomplex *alpha1, scomplex *a, int a_rs, int a_cs, scomplex *beta, scomplex *b, int b_rs, int b_cs) |
void | bli_zaxpysmt (char trans, int m, int n, dcomplex *alpha0, dcomplex *alpha1, dcomplex *a, int a_rs, int a_cs, dcomplex *beta, dcomplex *b, int b_rs, int b_cs) |
void bli_caxpysmt | ( | char | trans, |
int | m, | ||
int | n, | ||
scomplex * | alpha0, | ||
scomplex * | alpha1, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | beta, | ||
scomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bli_callocv(), bli_caxpy(), bli_ccopyv(), bli_cfree(), bli_cscal(), bli_does_conj(), bli_does_notrans(), bli_does_trans(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), scomplex::imag, and scomplex::real.
Referenced by FLA_Axpys_external().
{ scomplex* a_begin; scomplex* b_begin; scomplex* a_temp; scomplex alpha_prod; int inca_temp; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; // Return early if possible. if ( bli_zero_dim2( m, n ) ) return; alpha_prod.real = alpha0->real * alpha1->real - alpha0->imag * alpha1->imag; alpha_prod.imag = alpha0->real * alpha1->imag + alpha0->imag * alpha1->real; // Handle cases where A and B are vectors to ensure that the underlying axpy // gets invoked only once. if ( bli_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bli_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bli_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bli_does_trans( trans ) ) { bli_swap_ints( lda, inca ); } // An optimization: if B is row-major and if A is effectively row-major // after a possible transposition, then let's access the matrices by rows // instead of by columns for increased spatial locality. if ( bli_is_row_storage( b_rs, b_cs ) ) { if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) || ( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) ) { bli_swap_ints( n_iter, n_elem ); bli_swap_ints( lda, inca ); bli_swap_ints( ldb, incb ); } } } if ( bli_does_conj( trans ) ) { a_temp = bli_callocv( n_elem ); inca_temp = 1; for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bli_ccopyv( trans, n_elem, a_begin, inca, a_temp, inca_temp ); bli_cscal( n_elem, beta, b_begin, incb ); bli_caxpy( n_elem, &alpha_prod, a_temp, inca_temp, b_begin, incb ); } bli_cfree( a_temp ); } else // if ( !bli_does_conj( trans ) ) { for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bli_cscal( n_elem, beta, b_begin, incb ); bli_caxpy( n_elem, &alpha_prod, a_begin, inca, b_begin, incb ); } } }
void bli_daxpysmt | ( | char | trans, |
int | m, | ||
int | n, | ||
double * | alpha0, | ||
double * | alpha1, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | beta, | ||
double * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bli_daxpy(), bli_does_notrans(), bli_does_trans(), bli_dscal(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), and bli_zero_dim2().
Referenced by FLA_Axpys_external().
{ double* a_begin; double* b_begin; double alpha_prod; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; // Return early if possible. if ( bli_zero_dim2( m, n ) ) return; alpha_prod = (*alpha0) * (*alpha1); // Handle cases where A and B are vectors to ensure that the underlying axpy // gets invoked only once. if ( bli_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bli_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bli_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bli_does_trans( trans ) ) { bli_swap_ints( lda, inca ); } // An optimization: if B is row-major and if A is effectively row-major // after a possible transposition, then let's access the matrices by rows // instead of by columns for increased spatial locality. if ( bli_is_row_storage( b_rs, b_cs ) ) { if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) || ( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) ) { bli_swap_ints( n_iter, n_elem ); bli_swap_ints( lda, inca ); bli_swap_ints( ldb, incb ); } } } for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bli_dscal( n_elem, beta, b_begin, incb ); bli_daxpy( n_elem, &alpha_prod, a_begin, inca, b_begin, incb ); } }
void bli_saxpysmt | ( | char | trans, |
int | m, | ||
int | n, | ||
float * | alpha0, | ||
float * | alpha1, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | beta, | ||
float * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bli_does_notrans(), bli_does_trans(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_saxpy(), bli_sscal(), bli_vector_dim(), bli_vector_inc(), and bli_zero_dim2().
Referenced by FLA_Axpys_external().
{ float* a_begin; float* b_begin; float alpha_prod; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; // Return early if possible. if ( bli_zero_dim2( m, n ) ) return; alpha_prod = (*alpha0) * (*alpha1); // Handle cases where A and B are vectors to ensure that the underlying axpy // gets invoked only once. if ( bli_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bli_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bli_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bli_does_trans( trans ) ) { bli_swap_ints( lda, inca ); } // An optimization: if B is row-major and if A is effectively row-major // after a possible transposition, then let's access the matrices by rows // instead of by columns for increased spatial locality. if ( bli_is_row_storage( b_rs, b_cs ) ) { if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) || ( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) ) { bli_swap_ints( n_iter, n_elem ); bli_swap_ints( lda, inca ); bli_swap_ints( ldb, incb ); } } } for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bli_sscal( n_elem, beta, b_begin, incb ); bli_saxpy( n_elem, &alpha_prod, a_begin, inca, b_begin, incb ); } }
void bli_zaxpysmt | ( | char | trans, |
int | m, | ||
int | n, | ||
dcomplex * | alpha0, | ||
dcomplex * | alpha1, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | beta, | ||
dcomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bli_does_conj(), bli_does_notrans(), bli_does_trans(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zallocv(), bli_zaxpy(), bli_zcopyv(), bli_zero_dim2(), bli_zfree(), bli_zscal(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Axpys_external().
{ dcomplex* a_begin; dcomplex* b_begin; dcomplex* a_temp; dcomplex alpha_prod; int inca_temp; int lda, inca; int ldb, incb; int n_iter; int n_elem; int j; // Return early if possible. if ( bli_zero_dim2( m, n ) ) return; alpha_prod.real = alpha0->real * alpha1->real - alpha0->imag * alpha1->imag; alpha_prod.imag = alpha0->real * alpha1->imag + alpha0->imag * alpha1->real; // Handle cases where A and B are vectors to ensure that the underlying axpy // gets invoked only once. if ( bli_is_vector( m, n ) ) { // Initialize with values appropriate for vectors. n_iter = 1; n_elem = bli_vector_dim( m, n ); lda = 1; // multiplied by zero when n_iter == 1; not needed. inca = bli_vector_inc( trans, m, n, a_rs, a_cs ); ldb = 1; // multiplied by zero when n_iter == 1; not needed. incb = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs ); } else // matrix case { // Initialize with optimal values for column-major storage. n_iter = n; n_elem = m; lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Handle the transposition of A. if ( bli_does_trans( trans ) ) { bli_swap_ints( lda, inca ); } // An optimization: if B is row-major and if A is effectively row-major // after a possible transposition, then let's access the matrices by rows // instead of by columns for increased spatial locality. if ( bli_is_row_storage( b_rs, b_cs ) ) { if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) || ( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) ) { bli_swap_ints( n_iter, n_elem ); bli_swap_ints( lda, inca ); bli_swap_ints( ldb, incb ); } } } if ( bli_does_conj( trans ) ) { a_temp = bli_zallocv( n_elem ); inca_temp = 1; for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bli_zcopyv( trans, n_elem, a_begin, inca, a_temp, inca_temp ); bli_zscal( n_elem, beta, b_begin, incb ); bli_zaxpy( n_elem, &alpha_prod, a_temp, inca_temp, b_begin, incb ); } bli_zfree( a_temp ); } else // if ( !bli_does_conj( trans ) ) { for ( j = 0; j < n_iter; j++ ) { a_begin = a + j*lda; b_begin = b + j*ldb; bli_zscal( n_elem, beta, b_begin, incb ); bli_zaxpy( n_elem, &alpha_prod, a_begin, inca, b_begin, incb ); } } }