libflame revision_anchor
|
Go to the source code of this file.
Functions | |
void | bli_sgemm (char transa, char transb, int m, int k, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs) |
void | bli_dgemm (char transa, char transb, int m, int k, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs) |
void | bli_cgemm (char transa, char transb, int m, int k, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs, scomplex *beta, scomplex *c, int c_rs, int c_cs) |
void | bli_zgemm (char transa, char transb, int m, int k, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs, dcomplex *beta, dcomplex *c, int c_rs, int c_cs) |
void | bli_sgemm_blas (char transa, char transb, int m, int n, int k, float *alpha, float *a, int lda, float *b, int ldb, float *beta, float *c, int ldc) |
void | bli_dgemm_blas (char transa, char transb, int m, int n, int k, double *alpha, double *a, int lda, double *b, int ldb, double *beta, double *c, int ldc) |
void | bli_cgemm_blas (char transa, char transb, int m, int n, int k, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, scomplex *beta, scomplex *c, int ldc) |
void | bli_zgemm_blas (char transa, char transb, int m, int n, int k, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, dcomplex *beta, dcomplex *c, int ldc) |
void | bli_shemm (char side, char uplo, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs) |
void | bli_dhemm (char side, char uplo, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs) |
void | bli_chemm (char side, char uplo, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs, scomplex *beta, scomplex *c, int c_rs, int c_cs) |
void | bli_zhemm (char side, char uplo, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs, dcomplex *beta, dcomplex *c, int c_rs, int c_cs) |
void | bli_chemm_blas (char side, char uplo, int m, int n, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, scomplex *beta, scomplex *c, int ldc) |
void | bli_zhemm_blas (char side, char uplo, int m, int n, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, dcomplex *beta, dcomplex *c, int ldc) |
void | bli_sherk (char uplo, char trans, int m, int k, float *alpha, float *a, int a_rs, int a_cs, float *beta, float *c, int c_rs, int c_cs) |
void | bli_dherk (char uplo, char trans, int m, int k, double *alpha, double *a, int a_rs, int a_cs, double *beta, double *c, int c_rs, int c_cs) |
void | bli_cherk (char uplo, char trans, int m, int k, float *alpha, scomplex *a, int a_rs, int a_cs, float *beta, scomplex *c, int c_rs, int c_cs) |
void | bli_zherk (char uplo, char trans, int m, int k, double *alpha, dcomplex *a, int a_rs, int a_cs, double *beta, dcomplex *c, int c_rs, int c_cs) |
void | bli_cherk_blas (char uplo, char trans, int m, int k, float *alpha, scomplex *a, int lda, float *beta, scomplex *c, int ldc) |
void | bli_zherk_blas (char uplo, char trans, int m, int k, double *alpha, dcomplex *a, int lda, double *beta, dcomplex *c, int ldc) |
void | bli_sher2k (char uplo, char trans, int m, int k, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs) |
void | bli_dher2k (char uplo, char trans, int m, int k, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs) |
void | bli_cher2k (char uplo, char trans, int m, int k, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs, float *beta, scomplex *c, int c_rs, int c_cs) |
void | bli_zher2k (char uplo, char trans, int m, int k, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs, double *beta, dcomplex *c, int c_rs, int c_cs) |
void | bli_cher2k_blas (char uplo, char trans, int m, int k, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, float *beta, scomplex *c, int ldc) |
void | bli_zher2k_blas (char uplo, char trans, int m, int k, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, double *beta, dcomplex *c, int ldc) |
void | bli_ssymm (char side, char uplo, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs) |
void | bli_dsymm (char side, char uplo, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs) |
void | bli_csymm (char side, char uplo, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs, scomplex *beta, scomplex *c, int c_rs, int c_cs) |
void | bli_zsymm (char side, char uplo, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs, dcomplex *beta, dcomplex *c, int c_rs, int c_cs) |
void | bli_ssymm_blas (char side, char uplo, int m, int n, float *alpha, float *a, int lda, float *b, int ldb, float *beta, float *c, int ldc) |
void | bli_dsymm_blas (char side, char uplo, int m, int n, double *alpha, double *a, int lda, double *b, int ldb, double *beta, double *c, int ldc) |
void | bli_csymm_blas (char side, char uplo, int m, int n, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, scomplex *beta, scomplex *c, int ldc) |
void | bli_zsymm_blas (char side, char uplo, int m, int n, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, dcomplex *beta, dcomplex *c, int ldc) |
void | bli_ssyrk (char uplo, char trans, int m, int k, float *alpha, float *a, int a_rs, int a_cs, float *beta, float *c, int c_rs, int c_cs) |
void | bli_dsyrk (char uplo, char trans, int m, int k, double *alpha, double *a, int a_rs, int a_cs, double *beta, double *c, int c_rs, int c_cs) |
void | bli_csyrk (char uplo, char trans, int m, int k, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *beta, scomplex *c, int c_rs, int c_cs) |
void | bli_zsyrk (char uplo, char trans, int m, int k, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *beta, dcomplex *c, int c_rs, int c_cs) |
void | bli_ssyrk_blas (char uplo, char trans, int m, int k, float *alpha, float *a, int lda, float *beta, float *c, int ldc) |
void | bli_dsyrk_blas (char uplo, char trans, int m, int k, double *alpha, double *a, int lda, double *beta, double *c, int ldc) |
void | bli_csyrk_blas (char uplo, char trans, int m, int k, scomplex *alpha, scomplex *a, int lda, scomplex *beta, scomplex *c, int ldc) |
void | bli_zsyrk_blas (char uplo, char trans, int m, int k, dcomplex *alpha, dcomplex *a, int lda, dcomplex *beta, dcomplex *c, int ldc) |
void | bli_ssyr2k (char uplo, char trans, int m, int k, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs) |
void | bli_dsyr2k (char uplo, char trans, int m, int k, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs) |
void | bli_csyr2k (char uplo, char trans, int m, int k, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs, scomplex *beta, scomplex *c, int c_rs, int c_cs) |
void | bli_zsyr2k (char uplo, char trans, int m, int k, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs, dcomplex *beta, dcomplex *c, int c_rs, int c_cs) |
void | bli_ssyr2k_blas (char uplo, char trans, int m, int k, float *alpha, float *a, int lda, float *b, int ldb, float *beta, float *c, int ldc) |
void | bli_dsyr2k_blas (char uplo, char trans, int m, int k, double *alpha, double *a, int lda, double *b, int ldb, double *beta, double *c, int ldc) |
void | bli_csyr2k_blas (char uplo, char trans, int m, int k, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb, scomplex *beta, scomplex *c, int ldc) |
void | bli_zsyr2k_blas (char uplo, char trans, int m, int k, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb, dcomplex *beta, dcomplex *c, int ldc) |
void | bli_strmm (char side, char uplo, char trans, char diag, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
void | bli_dtrmm (char side, char uplo, char trans, char diag, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
void | bli_ctrmm (char side, char uplo, char trans, char diag, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
void | bli_ztrmm (char side, char uplo, char trans, char diag, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
void | bli_strmm_blas (char side, char uplo, char trans, char diag, int m, int n, float *alpha, float *a, int lda, float *b, int ldb) |
void | bli_dtrmm_blas (char side, char uplo, char trans, char diag, int m, int n, double *alpha, double *a, int lda, double *b, int ldb) |
void | bli_ctrmm_blas (char side, char uplo, char trans, char diag, int m, int n, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb) |
void | bli_ztrmm_blas (char side, char uplo, char trans, char diag, int m, int n, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb) |
void | bli_strsm (char side, char uplo, char trans, char diag, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs) |
void | bli_dtrsm (char side, char uplo, char trans, char diag, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs) |
void | bli_ctrsm (char side, char uplo, char trans, char diag, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs) |
void | bli_ztrsm (char side, char uplo, char trans, char diag, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs) |
void | bli_strsm_blas (char side, char uplo, char trans, char diag, int m, int n, float *alpha, float *a, int lda, float *b, int ldb) |
void | bli_dtrsm_blas (char side, char uplo, char trans, char diag, int m, int n, double *alpha, double *a, int lda, double *b, int ldb) |
void | bli_ctrsm_blas (char side, char uplo, char trans, char diag, int m, int n, scomplex *alpha, scomplex *a, int lda, scomplex *b, int ldb) |
void | bli_ztrsm_blas (char side, char uplo, char trans, char diag, int m, int n, dcomplex *alpha, dcomplex *a, int lda, dcomplex *b, int ldb) |
void | bli_strmmsx (char side, char uplo, char trans, char diag, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs) |
void | bli_dtrmmsx (char side, char uplo, char trans, char diag, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs) |
void | bli_ctrmmsx (char side, char uplo, char trans, char diag, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs, scomplex *beta, scomplex *c, int c_rs, int c_cs) |
void | bli_ztrmmsx (char side, char uplo, char trans, char diag, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs, dcomplex *beta, dcomplex *c, int c_rs, int c_cs) |
void | bli_strsmsx (char side, char uplo, char trans, char diag, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs, float *beta, float *c, int c_rs, int c_cs) |
void | bli_dtrsmsx (char side, char uplo, char trans, char diag, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs) |
void | bli_ctrsmsx (char side, char uplo, char trans, char diag, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs, scomplex *beta, scomplex *c, int c_rs, int c_cs) |
void | bli_ztrsmsx (char side, char uplo, char trans, char diag, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs, dcomplex *beta, dcomplex *c, int c_rs, int c_cs) |
void bli_cgemm | ( | char | transa, |
char | transb, | ||
int | m, | ||
int | k, | ||
int | n, | ||
scomplex * | alpha, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | b, | ||
int | b_rs, | ||
int | b_cs, | ||
scomplex * | beta, | ||
scomplex * | c, | ||
int | c_rs, | ||
int | c_cs | ||
) |
References bli_c0(), bli_c1(), bli_callocm(), bli_caxpymt(), bli_cconjm(), bli_ccopymt(), bli_ccreate_contigm(), bli_ccreate_contigmt(), bli_cfree(), bli_cfree_contigm(), bli_cfree_saved_contigm(), bli_cgemm_blas(), bli_cscalm(), bli_is_col_storage(), bli_is_conjnotrans(), and bli_zero_dim3().
Referenced by FLA_Gemm_external().
{ int m_save = m; int n_save = n; scomplex* a_save = a; scomplex* b_save = b; scomplex* c_save = c; int a_rs_save = a_rs; int a_cs_save = a_cs; int b_rs_save = b_rs; int b_cs_save = b_cs; int c_rs_save = c_rs; int c_cs_save = c_cs; scomplex zero = bli_c0(); scomplex one = bli_c1(); scomplex* a_unswap; scomplex* b_unswap; scomplex* a_conj; scomplex* b_conj; scomplex* c_trans; int lda, inca; int ldb, incb; int ldc, incc; int lda_conj, inca_conj; int ldb_conj, incb_conj; int ldc_trans, incc_trans; int m_gemm, n_gemm; int gemm_needs_axpyt = FALSE; int a_was_copied; int b_was_copied; // Return early if possible. if ( bli_zero_dim3( m, k, n ) ) { bli_cscalm( BLIS_NO_CONJUGATE, m, n, beta, c, c_rs, c_cs ); return; } // If necessary, allocate, initialize, and use a temporary contiguous // copy of each matrix rather than the original matrices. bli_ccreate_contigmt( transa, m, k, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_ccreate_contigmt( transb, k, n, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_ccreate_contigm( m, n, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); // Figure out whether A and/or B was copied to contiguous memory. This // is used later to prevent redundant copying. a_was_copied = ( a != a_save ); b_was_copied = ( b != b_save ); // These are used to track the original values of a and b prior to any // operand swapping that might take place. This is necessary for proper // freeing of memory when one is a temporary contiguous matrix. a_unswap = a; b_unswap = b; // These are used to track the dimensions of the product of the // A and B operands to the BLAS invocation of gemm. These differ // from m and n when the operands need to be swapped. m_gemm = m; n_gemm = n; // Initialize with values assuming column-major storage. lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; ldc = c_cs; incc = c_rs; // Adjust the parameters based on the storage of each matrix. if ( bli_is_col_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_c += tr( A_c ) * tr( B_c ) // effective operation: C_c += tr( A_c ) * tr( B_c ) } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_c += tr( A_c ) * tr( B_r ) // effective operation: C_c += tr( A_c ) * tr( B_c )^T bli_swap_ints( ldb, incb ); bli_toggle_trans( transb ); } } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_c += tr( A_r ) * tr( B_c ) // effective operation: C_c += tr( A_r )^T * tr( B_c ) bli_swap_ints( lda, inca ); bli_toggle_trans( transa ); } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_c += tr( A_r ) * tr( B_r ) // effective operation: C_c += ( tr( B_c ) * tr( A_c ) )^T bli_swap_ints( lda, inca ); bli_swap_ints( ldb, incb ); bli_cswap_pointers( a, b ); bli_swap_ints( a_was_copied, b_was_copied ); bli_swap_ints( lda, ldb ); bli_swap_ints( inca, incb ); bli_swap_chars( transa, transb ); gemm_needs_axpyt = TRUE; bli_swap_ints( m_gemm, n_gemm ); } } } else // if ( bli_is_row_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_r += tr( A_c ) * tr( B_c ) // effective operation: C_c += ( tr( A_c ) * tr( B_c ) )^T bli_swap_ints( ldc, incc ); bli_swap_ints( m, n ); gemm_needs_axpyt = TRUE; } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_r += tr( A_c ) * tr( B_r ) // effective operation: C_c += tr( B_c ) * tr( A_c )^T bli_swap_ints( ldc, incc ); bli_swap_ints( ldb, incb ); bli_toggle_trans( transa ); bli_swap_ints( m, n ); bli_swap_ints( m_gemm, n_gemm ); bli_cswap_pointers( a, b ); bli_swap_ints( a_was_copied, b_was_copied ); bli_swap_ints( lda, ldb ); bli_swap_ints( inca, incb ); bli_swap_chars( transa, transb ); } } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_r += tr( A_r ) * tr( B_c ) // effective operation: C_c += tr( B_c )^T * tr( A_c ) bli_swap_ints( ldc, incc ); bli_swap_ints( lda, inca ); bli_toggle_trans( transb ); bli_swap_ints( m, n ); bli_swap_ints( m_gemm, n_gemm ); bli_cswap_pointers( a, b ); bli_swap_ints( a_was_copied, b_was_copied ); bli_swap_ints( lda, ldb ); bli_swap_ints( inca, incb ); bli_swap_chars( transa, transb ); } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_r += tr( A_r ) * tr( B_r ) // effective operation: C_c += tr( B_c ) * tr( A_c ) bli_swap_ints( lda, inca ); bli_swap_ints( ldb, incb ); bli_swap_ints( ldc, incc ); bli_swap_ints( m, n ); bli_swap_ints( m_gemm, n_gemm ); bli_cswap_pointers( a, b ); bli_swap_ints( a_was_copied, b_was_copied ); bli_swap_ints( lda, ldb ); bli_swap_ints( inca, incb ); bli_swap_chars( transa, transb ); } } } // We need a temporary matrix for the case where A is conjugated. a_conj = a; lda_conj = lda; inca_conj = inca; // If transa indicates conjugate-no-transpose and A was not already // copied, then copy and conjugate it to a temporary matrix. Otherwise, // if transa indicates conjugate-no-transpose and A was already copied, // just conjugate it. if ( bli_is_conjnotrans( transa ) && !a_was_copied ) { a_conj = bli_callocm( m_gemm, k ); lda_conj = m_gemm; inca_conj = 1; bli_ccopymt( BLIS_CONJUGATE, m_gemm, k, a, inca, lda, a_conj, inca_conj, lda_conj ); } else if ( bli_is_conjnotrans( transa ) && a_was_copied ) { bli_cconjm( m_gemm, k, a_conj, inca_conj, lda_conj ); } // We need a temporary matrix for the case where B is conjugated. b_conj = b; ldb_conj = ldb; incb_conj = incb; // If transb indicates conjugate-no-transpose and B was not already // copied, then copy and conjugate it to a temporary matrix. Otherwise, // if transb indicates conjugate-no-transpose and B was already copied, // just conjugate it. if ( bli_is_conjnotrans( transb ) && !b_was_copied ) { b_conj = bli_callocm( k, n_gemm ); ldb_conj = k; incb_conj = 1; bli_ccopymt( BLIS_CONJUGATE, k, n_gemm, b, incb, ldb, b_conj, incb_conj, ldb_conj ); } else if ( bli_is_conjnotrans( transb ) && b_was_copied ) { bli_cconjm( k, n_gemm, b_conj, incb_conj, ldb_conj ); } // There are two cases where we need to perform the gemm and then axpy // the result into C with a transposition. We handle those cases here. if ( gemm_needs_axpyt ) { // We need a temporary matrix for holding C^T. Notice that m and n // represent the dimensions of C, while m_gemm and n_gemm are the // dimensions of the actual product op(A)*op(B), which may be n-by-m // since the operands may have been swapped. c_trans = bli_callocm( m_gemm, n_gemm ); ldc_trans = m_gemm; incc_trans = 1; // Compute tr( A ) * tr( B ), where A and B may have been swapped // to reference the other, and store the result in C_trans. bli_cgemm_blas( transa, transb, m_gemm, n_gemm, k, alpha, a_conj, lda_conj, b_conj, ldb_conj, &zero, c_trans, ldc_trans ); // Scale C by beta. bli_cscalm( BLIS_NO_CONJUGATE, m, n, beta, c, incc, ldc ); // And finally, accumulate the matrix product in C_trans into C // with a transpose. bli_caxpymt( BLIS_TRANSPOSE, m, n, &one, c_trans, incc_trans, ldc_trans, c, incc, ldc ); // Free the temporary matrix for C. bli_cfree( c_trans ); } else // no extra axpyt step needed { bli_cgemm_blas( transa, transb, m_gemm, n_gemm, k, alpha, a_conj, lda_conj, b_conj, ldb_conj, beta, c, ldc ); } if ( bli_is_conjnotrans( transa ) && !a_was_copied ) bli_cfree( a_conj ); if ( bli_is_conjnotrans( transb ) && !b_was_copied ) bli_cfree( b_conj ); // Free any temporary contiguous matrices, copying the result back to // the original matrix. bli_cfree_contigm( a_save, a_rs_save, a_cs_save, &a_unswap, &a_rs, &a_cs ); bli_cfree_contigm( b_save, b_rs_save, b_cs_save, &b_unswap, &b_rs, &b_cs ); bli_cfree_saved_contigm( m_save, n_save, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); }
void bli_cgemm_blas | ( | char | transa, |
char | transb, | ||
int | m, | ||
int | n, | ||
int | k, | ||
scomplex * | alpha, | ||
scomplex * | a, | ||
int | lda, | ||
scomplex * | b, | ||
int | ldb, | ||
scomplex * | beta, | ||
scomplex * | c, | ||
int | ldc | ||
) |
References bli_param_map_to_netlib_trans(), cblas_cgemm(), CblasColMajor, and F77_cgemm().
Referenced by bli_cgemm().
{ #ifdef BLIS_ENABLE_CBLAS_INTERFACES enum CBLAS_ORDER cblas_order = CblasColMajor; enum CBLAS_TRANSPOSE cblas_transa; enum CBLAS_TRANSPOSE cblas_transb; bli_param_map_to_netlib_trans( transa, &cblas_transa ); bli_param_map_to_netlib_trans( transb, &cblas_transb ); cblas_cgemm( cblas_order, cblas_transa, cblas_transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc ); #else char blas_transa; char blas_transb; bli_param_map_to_netlib_trans( transa, &blas_transa ); bli_param_map_to_netlib_trans( transb, &blas_transb ); F77_cgemm( &blas_transa, &blas_transb, &m, &n, &k, alpha, a, &lda, b, &ldb, beta, c, &ldc ); #endif }
void bli_chemm | ( | char | side, |
char | uplo, | ||
int | m, | ||
int | n, | ||
scomplex * | alpha, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | b, | ||
int | b_rs, | ||
int | b_cs, | ||
scomplex * | beta, | ||
scomplex * | c, | ||
int | c_rs, | ||
int | c_cs | ||
) |
References bli_c0(), bli_c1(), bli_callocm(), bli_caxpymt(), bli_cconjmr(), bli_ccopymrt(), bli_ccopymt(), bli_ccreate_contigm(), bli_ccreate_contigmr(), bli_cfree(), bli_cfree_contigm(), bli_cfree_saved_contigm(), bli_chemm_blas(), bli_cscalm(), bli_is_col_storage(), bli_is_left(), bli_set_dim_with_side(), and bli_zero_dim2().
Referenced by FLA_Hemm_external().
{ int m_save = m; int n_save = n; scomplex* a_save = a; scomplex* b_save = b; scomplex* c_save = c; int a_rs_save = a_rs; int a_cs_save = a_cs; int b_rs_save = b_rs; int b_cs_save = b_cs; int c_rs_save = c_rs; int c_cs_save = c_cs; scomplex zero = bli_c0(); scomplex one = bli_c1(); scomplex* a_conj; scomplex* b_copy; scomplex* c_trans; int dim_a; int lda, inca; int ldb, incb; int ldc, incc; int lda_conj, inca_conj; int ldb_copy, incb_copy; int ldc_trans, incc_trans; int hemm_needs_conja = FALSE; int hemm_needs_copyb = FALSE; int hemm_needs_transb = FALSE; int hemm_needs_axpyt = FALSE; int a_was_copied; // Return early if possible. if ( bli_zero_dim2( m, n ) ) return; // If necessary, allocate, initialize, and use a temporary contiguous // copy of each matrix rather than the original matrices. bli_set_dim_with_side( side, m, n, &dim_a ); bli_ccreate_contigmr( uplo, dim_a, dim_a, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_ccreate_contigm( m, n, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_ccreate_contigm( m, n, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); // Figure out whether A was copied to contiguous memory. This is used to // prevent redundant copying. a_was_copied = ( a != a_save ); // Initialize with values assuming column-major storage. lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; ldc = c_cs; incc = c_rs; // Adjust the parameters based on the storage of each matrix. if ( bli_is_col_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_c += uplo( A_c ) * B_c // effective operation: C_c += uplo( A_c ) * B_c } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_c += uplo( A_c ) * B_r // effective operation: C_c += uplo( A_c ) * B_c hemm_needs_copyb = TRUE; } } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_c += uplo( A_r ) * B_c // effective operation: C_c += ~uplo( conj( A_c ) ) * B_c bli_swap_ints( lda, inca ); bli_toggle_uplo( uplo ); hemm_needs_conja = TRUE; } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_c += uplo( A_r ) * B_r // effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T bli_swap_ints( lda, inca ); bli_swap_ints( ldb, incb ); bli_toggle_side( side ); bli_toggle_uplo( uplo ); hemm_needs_axpyt = TRUE; } } } else // if ( bli_is_row_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_r += uplo( A_c ) * B_c // effective operation: C_c += ( uplo( A_c ) * B_c )^T bli_swap_ints( ldc, incc ); bli_swap_ints( m, n ); hemm_needs_axpyt = TRUE; } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_r += uplo( A_c ) * B_r // effective operation: C_c += B_c * ~uplo( conj( A_c ) ) bli_swap_ints( ldc, incc ); bli_swap_ints( ldb, incb ); bli_swap_ints( m, n ); bli_toggle_side( side ); hemm_needs_conja = TRUE; } } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_r += uplo( A_r ) * B_c // effective operation: C_c += B_c^T * ~uplo( A_c ) bli_swap_ints( ldc, incc ); bli_swap_ints( lda, inca ); bli_swap_ints( m, n ); bli_toggle_side( side ); bli_toggle_uplo( uplo ); hemm_needs_copyb = TRUE; hemm_needs_transb = TRUE; } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_r += uplo( A_r ) * B_r // effective operation: C_c += B_c * conj( ~uplo( A_c ) ) bli_swap_ints( ldc, incc ); bli_swap_ints( lda, inca ); bli_swap_ints( ldb, incb ); bli_swap_ints( m, n ); bli_toggle_uplo( uplo ); bli_toggle_side( side ); } } } // We need a temporary matrix for the cases where A is conjugated. a_conj = a; lda_conj = lda; inca_conj = inca; if ( hemm_needs_conja && !a_was_copied ) { int dim_a; if ( bli_is_left( side ) ) dim_a = m; else dim_a = n; a_conj = bli_callocm( dim_a, dim_a ); lda_conj = dim_a; inca_conj = 1; bli_ccopymrt( uplo, BLIS_CONJ_NO_TRANSPOSE, dim_a, dim_a, a, inca, lda, a_conj, inca_conj, lda_conj ); } else if ( hemm_needs_conja && a_was_copied ) { int dim_a; if ( bli_is_left( side ) ) dim_a = m; else dim_a = n; bli_cconjmr( uplo, dim_a, dim_a, a_conj, inca_conj, lda_conj ); } // We need a temporary matrix for the cases where B needs to be copied. b_copy = b; ldb_copy = ldb; incb_copy = incb; // There are two cases where we need to make a copy of B: one where the // copy's dimensions are transposed from the original B, and one where // the dimensions are not swapped. if ( hemm_needs_copyb ) { char transb; // Set transb, which determines whether or not we need to copy from B // as if it needs a transposition. If a transposition is needed, then // m and n and have already been swapped. So in either case m // represents the leading dimension of the copy. if ( hemm_needs_transb ) transb = BLIS_TRANSPOSE; else transb = BLIS_NO_TRANSPOSE; b_copy = bli_callocm( m, n ); ldb_copy = m; incb_copy = 1; bli_ccopymt( transb, m, n, b, incb, ldb, b_copy, incb_copy, ldb_copy ); } // There are two cases where we need to perform the hemm and then axpy // the result into C with a transposition. We handle those cases here. if ( hemm_needs_axpyt ) { // We need a temporary matrix for holding C^T. Notice that m and n // represent the dimensions of C, and thus C_trans is n-by-m // (interpreting both as column-major matrices). So the leading // dimension of the temporary matrix holding C^T is n. c_trans = bli_callocm( n, m ); ldc_trans = n; incc_trans = 1; // Compute A * B (or B * A) and store the result in C_trans. // Note that there is no overlap between the axpyt cases and // the conja/copyb cases, hence the use of a, b, lda, and ldb. bli_chemm_blas( side, uplo, n, m, alpha, a, lda, b, ldb, &zero, c_trans, ldc_trans ); // Scale C by beta. bli_cscalm( BLIS_NO_CONJUGATE, m, n, beta, c, incc, ldc ); // And finally, accumulate the matrix product in C_trans into C // with a transpose. bli_caxpymt( BLIS_TRANSPOSE, m, n, &one, c_trans, incc_trans, ldc_trans, c, incc, ldc ); // Free the temporary matrix for C. bli_cfree( c_trans ); } else // no extra axpyt step needed { bli_chemm_blas( side, uplo, m, n, alpha, a_conj, lda_conj, b_copy, ldb_copy, beta, c, ldc ); } if ( hemm_needs_conja && !a_was_copied ) bli_cfree( a_conj ); if ( hemm_needs_copyb ) bli_cfree( b_copy ); // Free any temporary contiguous matrices, copying the result back to // the original matrix. bli_cfree_contigm( a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_cfree_contigm( b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_cfree_saved_contigm( m_save, n_save, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); }
void bli_chemm_blas | ( | char | side, |
char | uplo, | ||
int | m, | ||
int | n, | ||
scomplex * | alpha, | ||
scomplex * | a, | ||
int | lda, | ||
scomplex * | b, | ||
int | ldb, | ||
scomplex * | beta, | ||
scomplex * | c, | ||
int | ldc | ||
) |
References bli_param_map_to_netlib_side(), bli_param_map_to_netlib_uplo(), cblas_chemm(), CblasColMajor, and F77_chemm().
Referenced by bli_chemm().
{ #ifdef BLIS_ENABLE_CBLAS_INTERFACES enum CBLAS_ORDER cblas_order = CblasColMajor; enum CBLAS_SIDE cblas_side; enum CBLAS_UPLO cblas_uplo; bli_param_map_to_netlib_side( side, &cblas_side ); bli_param_map_to_netlib_uplo( uplo, &cblas_uplo ); cblas_chemm( cblas_order, cblas_side, cblas_uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc ); #else char blas_side; char blas_uplo; bli_param_map_to_netlib_side( side, &blas_side ); bli_param_map_to_netlib_uplo( uplo, &blas_uplo ); F77_chemm( &blas_side, &blas_uplo, &m, &n, alpha, a, &lda, b, &ldb, beta, c, &ldc ); #endif }
void bli_cher2k | ( | char | uplo, |
char | trans, | ||
int | m, | ||
int | k, | ||
scomplex * | alpha, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | b, | ||
int | b_rs, | ||
int | b_cs, | ||
float * | beta, | ||
scomplex * | c, | ||
int | c_rs, | ||
int | c_cs | ||
) |
References bli_c1(), bli_callocm(), bli_caxpymrt(), bli_ccopymt(), bli_ccreate_contigmr(), bli_ccreate_contigmt(), bli_cfree(), bli_cfree_contigm(), bli_cfree_saved_contigmr(), bli_cher2k_blas(), bli_csscalmr(), bli_does_trans(), bli_is_col_storage(), bli_s0(), and bli_zero_dim2().
Referenced by FLA_Her2k_external().
{ char uplo_save = uplo; int m_save = m; scomplex* a_save = a; scomplex* b_save = b; scomplex* c_save = c; int a_rs_save = a_rs; int a_cs_save = a_cs; int b_rs_save = b_rs; int b_cs_save = b_cs; int c_rs_save = c_rs; int c_cs_save = c_cs; float zero_r = bli_s0(); scomplex one = bli_c1(); scomplex alpha_copy; scomplex* a_copy; scomplex* b_copy; scomplex* c_conj; int lda, inca; int ldb, incb; int ldc, incc; int lda_copy, inca_copy; int ldb_copy, incb_copy; int ldc_conj, incc_conj; int her2k_needs_copya = FALSE; int her2k_needs_copyb = FALSE; int her2k_needs_conj = FALSE; int her2k_needs_alpha_conj = FALSE; // Return early if possible. if ( bli_zero_dim2( m, k ) ) return; // If necessary, allocate, initialize, and use a temporary contiguous // copy of each matrix rather than the original matrices. bli_ccreate_contigmt( trans, m, k, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_ccreate_contigmt( trans, m, k, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_ccreate_contigmr( uplo, m, m, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); // Initialize with values assuming column-major storage. lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; ldc = c_cs; incc = c_rs; // Adjust the parameters based on the storage of each matrix. if ( bli_is_col_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c' // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c' } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_c ) += A_c * B_r' + B_r * A_c' // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c' her2k_needs_copyb = TRUE; } } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_c ) += A_r * B_c' + B_c * A_r' // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c' her2k_needs_copya = TRUE; } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_c ) += A_r * B_r' + B_r * A_r' // requested operation: uplo( C_c ) += conj( A_c' * B_c + B_c' * A_c ) bli_swap_ints( lda, inca ); bli_swap_ints( ldb, incb ); bli_toggle_conjtrans( trans ); her2k_needs_conj = TRUE; her2k_needs_alpha_conj = TRUE; } } } else // if ( bli_is_row_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_r ) += A_c * B_c' + B_c * A_c' // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' ) bli_swap_ints( ldc, incc ); bli_toggle_uplo( uplo ); her2k_needs_conj = TRUE; } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_r ) += A_c * B_r' + B_r * A_c' // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' ) her2k_needs_copyb = TRUE; bli_swap_ints( ldc, incc ); bli_toggle_uplo( uplo ); her2k_needs_conj = TRUE; } } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_r ) += A_r * B_c' + B_c * A_r' // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' ) her2k_needs_copya = TRUE; bli_swap_ints( ldc, incc ); bli_toggle_uplo( uplo ); her2k_needs_conj = TRUE; } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_r ) += A_r * B_r' + B_r * A_r' // requested operation: ~uplo( C_c ) += A_c' * B_c + B_c' * A_c bli_swap_ints( ldc, incc ); bli_swap_ints( lda, inca ); bli_swap_ints( ldb, incb ); bli_toggle_uplo( uplo ); bli_toggle_conjtrans( trans ); her2k_needs_alpha_conj = TRUE; } } } // Make a copy of alpha and conjugate if necessary. alpha_copy = *alpha; if ( her2k_needs_alpha_conj ) { bli_zconjs( &alpha_copy ); } a_copy = a; lda_copy = lda; inca_copy = inca; // There are two cases where we need to copy A column-major storage. // We handle those two cases here. if ( her2k_needs_copya ) { int m_a; int n_a; // Determine the dimensions of A according to the value of trans. We // need this in order to set the leading dimension of the copy of A. if ( bli_does_trans( trans ) ) { m_a = k; n_a = m; } else { m_a = m; n_a = k; } // We need a temporary matrix to hold a column-major copy of A. a_copy = bli_callocm( m, k ); lda_copy = m_a; inca_copy = 1; // Copy the contents of A into A_copy. bli_ccopymt( BLIS_NO_TRANSPOSE, m_a, n_a, a, inca, lda, a_copy, inca_copy, lda_copy ); } b_copy = b; ldb_copy = ldb; incb_copy = incb; // There are two cases where we need to copy B column-major storage. // We handle those two cases here. if ( her2k_needs_copyb ) { int m_b; int n_b; // Determine the dimensions of B according to the value of trans. We // need this in order to set the leading dimension of the copy of B. if ( bli_does_trans( trans ) ) { m_b = k; n_b = m; } else { m_b = m; n_b = k; } // We need a temporary matrix to hold a column-major copy of B. b_copy = bli_callocm( m, k ); ldb_copy = m_b; incb_copy = 1; // Copy the contents of B into B_copy. bli_ccopymt( BLIS_NO_TRANSPOSE, m_b, n_b, b, incb, ldb, b_copy, incb_copy, ldb_copy ); } // There are two cases where we need to perform the rank-2k product and // then axpy the result into C with a conjugation. We handle those two // cases here. if ( her2k_needs_conj ) { // We need a temporary matrix for holding the rank-k product. c_conj = bli_callocm( m, m ); ldc_conj = m; incc_conj = 1; // Compute the rank-2k product. bli_cher2k_blas( uplo, trans, m, k, &alpha_copy, a_copy, lda_copy, b_copy, ldb_copy, &zero_r, c_conj, ldc_conj ); // Scale C by beta. bli_csscalmr( uplo, m, m, beta, c, incc, ldc ); // And finally, accumulate the rank-2k product in C_conj into C // with a conjugation. bli_caxpymrt( uplo, BLIS_CONJ_NO_TRANSPOSE, m, m, &one, c_conj, incc_conj, ldc_conj, c, incc, ldc ); // Free the temporary matrix for C. bli_cfree( c_conj ); } else { bli_cher2k_blas( uplo, trans, m, k, &alpha_copy, a_copy, lda_copy, b_copy, ldb_copy, beta, c, ldc ); } if ( her2k_needs_copya ) bli_cfree( a_copy ); if ( her2k_needs_copyb ) bli_cfree( b_copy ); // Free any temporary contiguous matrices, copying the result back to // the original matrix. bli_cfree_contigm( a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_cfree_contigm( b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_cfree_saved_contigmr( uplo_save, m_save, m_save, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); }
void bli_cher2k_blas | ( | char | uplo, |
char | trans, | ||
int | m, | ||
int | k, | ||
scomplex * | alpha, | ||
scomplex * | a, | ||
int | lda, | ||
scomplex * | b, | ||
int | ldb, | ||
float * | beta, | ||
scomplex * | c, | ||
int | ldc | ||
) |
References bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), cblas_cher2k(), CblasColMajor, and F77_cher2k().
Referenced by bli_cher2k().
{ #ifdef BLIS_ENABLE_CBLAS_INTERFACES enum CBLAS_ORDER cblas_order = CblasColMajor; enum CBLAS_UPLO cblas_uplo; enum CBLAS_TRANSPOSE cblas_trans; bli_param_map_to_netlib_uplo( uplo, &cblas_uplo ); bli_param_map_to_netlib_trans( trans, &cblas_trans ); cblas_cher2k( cblas_order, cblas_uplo, cblas_trans, m, k, alpha, a, lda, b, ldb, *beta, c, ldc ); #else char blas_uplo; char blas_trans; bli_param_map_to_netlib_uplo( uplo, &blas_uplo ); bli_param_map_to_netlib_trans( trans, &blas_trans ); F77_cher2k( &blas_uplo, &blas_trans, &m, &k, alpha, a, &lda, b, &ldb, beta, c, &ldc ); #endif }
void bli_cherk | ( | char | uplo, |
char | trans, | ||
int | m, | ||
int | k, | ||
float * | alpha, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | beta, | ||
scomplex * | c, | ||
int | c_rs, | ||
int | c_cs | ||
) |
References bli_c1(), bli_callocm(), bli_caxpymrt(), bli_ccreate_contigmr(), bli_ccreate_contigmt(), bli_cfree(), bli_cfree_contigm(), bli_cfree_saved_contigmr(), bli_cherk_blas(), bli_csscalmr(), bli_is_col_storage(), bli_s0(), and bli_zero_dim2().
Referenced by FLA_Herk_external(), and FLA_UDdate_UT_opc_var1().
{ char uplo_save = uplo; int m_save = m; scomplex* a_save = a; scomplex* c_save = c; int a_rs_save = a_rs; int a_cs_save = a_cs; int c_rs_save = c_rs; int c_cs_save = c_cs; float zero_r = bli_s0(); scomplex one = bli_c1(); scomplex* c_conj; int lda, inca; int ldc, incc; int ldc_conj, incc_conj; int herk_needs_conj = FALSE; // Return early if possible. if ( bli_zero_dim2( m, k ) ) return; // If necessary, allocate, initialize, and use a temporary contiguous // copy of each matrix rather than the original matrices. bli_ccreate_contigmt( trans, m, k, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_ccreate_contigmr( uplo, m, m, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); // Initialize with values assuming column-major storage. lda = a_cs; inca = a_rs; ldc = c_cs; incc = c_rs; // Adjust the parameters based on the storage of each matrix. if ( bli_is_col_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { // requested operation: uplo( C_c ) += A_c * A_c' // effective operation: uplo( C_c ) += A_c * A_c' } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { // requested operation: uplo( C_c ) += A_r * A_r' // effective operation: uplo( C_c ) += conj( A_c' * A_c ) bli_swap_ints( lda, inca ); bli_toggle_conjtrans( trans ); herk_needs_conj = TRUE; } } else // if ( bli_is_row_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { // requested operation: uplo( C_r ) += A_c * A_c' // effective operation: ~uplo( C_c ) += conj( A_c * A_c' ) bli_swap_ints( ldc, incc ); bli_toggle_uplo( uplo ); herk_needs_conj = TRUE; } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { // requested operation: uplo( C_r ) += A_r * A_r' // effective operation: ~uplo( C_c ) += A_c' * A_c bli_swap_ints( ldc, incc ); bli_swap_ints( lda, inca ); bli_toggle_uplo( uplo ); bli_toggle_conjtrans( trans ); } } // There are two cases where we need to perform the rank-k product and // then axpy the result into C with a conjugation. We handle those two // cases here. if ( herk_needs_conj ) { // We need a temporary matrix for holding the rank-k product. c_conj = bli_callocm( m, m ); ldc_conj = m; incc_conj = 1; // Compute the rank-k product. bli_cherk_blas( uplo, trans, m, k, alpha, a, lda, &zero_r, c_conj, ldc_conj ); // Scale C by beta. bli_csscalmr( uplo, m, m, beta, c, incc, ldc ); // And finally, accumulate the rank-k product in C_conj into C // with a conjugation. bli_caxpymrt( uplo, BLIS_CONJ_NO_TRANSPOSE, m, m, &one, c_conj, incc_conj, ldc_conj, c, incc, ldc ); // Free the temporary matrix for C. bli_cfree( c_conj ); } else { bli_cherk_blas( uplo, trans, m, k, alpha, a, lda, beta, c, ldc ); } // Free any temporary contiguous matrices, copying the result back to // the original matrix. bli_cfree_contigm( a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_cfree_saved_contigmr( uplo_save, m_save, m_save, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); }
void bli_cherk_blas | ( | char | uplo, |
char | trans, | ||
int | m, | ||
int | k, | ||
float * | alpha, | ||
scomplex * | a, | ||
int | lda, | ||
float * | beta, | ||
scomplex * | c, | ||
int | ldc | ||
) |
References bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), cblas_cherk(), CblasColMajor, and F77_cherk().
Referenced by bli_cherk().
{ #ifdef BLIS_ENABLE_CBLAS_INTERFACES enum CBLAS_ORDER cblas_order = CblasColMajor; enum CBLAS_UPLO cblas_uplo; enum CBLAS_TRANSPOSE cblas_trans; bli_param_map_to_netlib_uplo( uplo, &cblas_uplo ); bli_param_map_to_netlib_trans( trans, &cblas_trans ); cblas_cherk( cblas_order, cblas_uplo, cblas_trans, m, k, *alpha, a, lda, *beta, c, ldc ); #else char blas_uplo; char blas_trans; bli_param_map_to_netlib_uplo( uplo, &blas_uplo ); bli_param_map_to_netlib_trans( trans, &blas_trans ); F77_cherk( &blas_uplo, &blas_trans, &m, &k, alpha, a, &lda, beta, c, &ldc ); #endif }
void bli_csymm | ( | char | side, |
char | uplo, | ||
int | m, | ||
int | n, | ||
scomplex * | alpha, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | b, | ||
int | b_rs, | ||
int | b_cs, | ||
scomplex * | beta, | ||
scomplex * | c, | ||
int | c_rs, | ||
int | c_cs | ||
) |
References bli_c0(), bli_c1(), bli_callocm(), bli_caxpymt(), bli_ccopymt(), bli_ccreate_contigm(), bli_ccreate_contigmr(), bli_cfree(), bli_cfree_contigm(), bli_cfree_saved_contigm(), bli_cscalm(), bli_csymm_blas(), bli_is_col_storage(), bli_set_dim_with_side(), and bli_zero_dim2().
Referenced by FLA_Symm_external().
{ int m_save = m; int n_save = n; scomplex* a_save = a; scomplex* b_save = b; scomplex* c_save = c; int a_rs_save = a_rs; int a_cs_save = a_cs; int b_rs_save = b_rs; int b_cs_save = b_cs; int c_rs_save = c_rs; int c_cs_save = c_cs; scomplex zero = bli_c0(); scomplex one = bli_c1(); scomplex* b_copy; scomplex* c_trans; int dim_a; int lda, inca; int ldb, incb; int ldc, incc; int ldb_copy, incb_copy; int ldc_trans, incc_trans; int symm_needs_copyb = FALSE; int symm_needs_transb = FALSE; int symm_needs_axpyt = FALSE; // Return early if possible. if ( bli_zero_dim2( m, n ) ) return; // If necessary, allocate, initialize, and use a temporary contiguous // copy of each matrix rather than the original matrices. bli_set_dim_with_side( side, m, n, &dim_a ); bli_ccreate_contigmr( uplo, dim_a, dim_a, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_ccreate_contigm( m, n, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_ccreate_contigm( m, n, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); // Initialize with values assuming column-major storage. lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; ldc = c_cs; incc = c_rs; // Adjust the parameters based on the storage of each matrix. if ( bli_is_col_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_c += uplo( A_c ) * B_c // effective operation: C_c += uplo( A_c ) * B_c } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_c += uplo( A_c ) * B_r // effective operation: C_c += uplo( A_c ) * B_c symm_needs_copyb = TRUE; } } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_c += uplo( A_r ) * B_c // effective operation: C_c += ~uplo( conj( A_c ) ) * B_c bli_swap_ints( lda, inca ); bli_toggle_uplo( uplo ); } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_c += uplo( A_r ) * B_r // effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T bli_swap_ints( lda, inca ); bli_swap_ints( ldb, incb ); bli_toggle_side( side ); bli_toggle_uplo( uplo ); symm_needs_axpyt = TRUE; } } } else // if ( bli_is_row_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_r += uplo( A_c ) * B_c // effective operation: C_c += ( uplo( A_c ) * B_c )^T bli_swap_ints( ldc, incc ); bli_swap_ints( m, n ); symm_needs_axpyt = TRUE; } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_r += uplo( A_c ) * B_r // effective operation: C_c += B_c * ~uplo( conj( A_c ) ) bli_swap_ints( ldc, incc ); bli_swap_ints( ldb, incb ); bli_swap_ints( m, n ); bli_toggle_side( side ); } } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_r += uplo( A_r ) * B_c // effective operation: C_c += B_c^T * ~uplo( A_c ) bli_swap_ints( ldc, incc ); bli_swap_ints( lda, inca ); bli_swap_ints( m, n ); bli_toggle_side( side ); bli_toggle_uplo( uplo ); symm_needs_copyb = TRUE; symm_needs_transb = TRUE; } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_r += uplo( A_r ) * B_r // effective operation: C_c += B_c * conj( ~uplo( A_c ) ) bli_swap_ints( ldc, incc ); bli_swap_ints( lda, inca ); bli_swap_ints( ldb, incb ); bli_swap_ints( m, n ); bli_toggle_uplo( uplo ); bli_toggle_side( side ); } } } // We need a temporary matrix for the cases where B needs to be copied. b_copy = b; ldb_copy = ldb; incb_copy = incb; // There are two cases where we need to make a copy of B: one where the // copy's dimensions are transposed from the original B, and one where // the dimensions are not swapped. if ( symm_needs_copyb ) { char transb; // Set transb, which determines whether or not we need to copy from B // as if it needs a transposition. If a transposition is needed, then // m and n and have already been swapped. So in either case m // represents the leading dimension of the copy. if ( symm_needs_transb ) transb = BLIS_TRANSPOSE; else transb = BLIS_NO_TRANSPOSE; b_copy = bli_callocm( m, n ); ldb_copy = m; incb_copy = 1; bli_ccopymt( transb, m, n, b, incb, ldb, b_copy, incb_copy, ldb_copy ); } // There are two cases where we need to perform the symm and then axpy // the result into C with a transposition. We handle those cases here. if ( symm_needs_axpyt ) { // We need a temporary matrix for holding C^T. Notice that m and n // represent the dimensions of C, and thus C_trans is n-by-m // (interpreting both as column-major matrices). So the leading // dimension of the temporary matrix holding C^T is n. c_trans = bli_callocm( n, m ); ldc_trans = n; incc_trans = 1; // Compute A * B (or B * A) and store the result in C_trans. // Note that there is no overlap between the axpyt cases and // the conja/copyb cases, hence the use of a, b, lda, and ldb. bli_csymm_blas( side, uplo, n, m, alpha, a, lda, b, ldb, &zero, c_trans, ldc_trans ); // Scale C by beta. bli_cscalm( BLIS_NO_CONJUGATE, m, n, beta, c, incc, ldc ); // And finally, accumulate the matrix product in C_trans into C // with a transpose. bli_caxpymt( BLIS_TRANSPOSE, m, n, &one, c_trans, incc_trans, ldc_trans, c, incc, ldc ); // Free the temporary matrix for C. bli_cfree( c_trans ); } else // no extra axpyt step needed { bli_csymm_blas( side, uplo, m, n, alpha, a, lda, b_copy, ldb_copy, beta, c, ldc ); } if ( symm_needs_copyb ) bli_cfree( b_copy ); // Free any temporary contiguous matrices, copying the result back to // the original matrix. bli_cfree_contigm( a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_cfree_contigm( b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_cfree_saved_contigm( m_save, n_save, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); }
void bli_csymm_blas | ( | char | side, |
char | uplo, | ||
int | m, | ||
int | n, | ||
scomplex * | alpha, | ||
scomplex * | a, | ||
int | lda, | ||
scomplex * | b, | ||
int | ldb, | ||
scomplex * | beta, | ||
scomplex * | c, | ||
int | ldc | ||
) |
References bli_param_map_to_netlib_side(), bli_param_map_to_netlib_uplo(), cblas_csymm(), CblasColMajor, and F77_csymm().
Referenced by bli_csymm().
{ #ifdef BLIS_ENABLE_CBLAS_INTERFACES enum CBLAS_ORDER cblas_order = CblasColMajor; enum CBLAS_SIDE cblas_side; enum CBLAS_UPLO cblas_uplo; bli_param_map_to_netlib_side( side, &cblas_side ); bli_param_map_to_netlib_uplo( uplo, &cblas_uplo ); cblas_csymm( cblas_order, cblas_side, cblas_uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc ); #else char blas_side; char blas_uplo; bli_param_map_to_netlib_side( side, &blas_side ); bli_param_map_to_netlib_uplo( uplo, &blas_uplo ); F77_csymm( &blas_side, &blas_uplo, &m, &n, alpha, a, &lda, b, &ldb, beta, c, &ldc ); #endif }
void bli_csyr2k | ( | char | uplo, |
char | trans, | ||
int | m, | ||
int | k, | ||
scomplex * | alpha, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | b, | ||
int | b_rs, | ||
int | b_cs, | ||
scomplex * | beta, | ||
scomplex * | c, | ||
int | c_rs, | ||
int | c_cs | ||
) |
References bli_callocm(), bli_ccopymt(), bli_ccreate_contigmr(), bli_ccreate_contigmt(), bli_cfree(), bli_cfree_contigm(), bli_cfree_saved_contigmr(), bli_csyr2k_blas(), bli_does_trans(), bli_is_col_storage(), and bli_zero_dim2().
Referenced by FLA_Syr2k_external().
{ char uplo_save = uplo; int m_save = m; scomplex* a_save = a; scomplex* b_save = b; scomplex* c_save = c; int a_rs_save = a_rs; int a_cs_save = a_cs; int b_rs_save = b_rs; int b_cs_save = b_cs; int c_rs_save = c_rs; int c_cs_save = c_cs; scomplex* a_copy; scomplex* b_copy; int lda, inca; int ldb, incb; int ldc, incc; int lda_copy, inca_copy; int ldb_copy, incb_copy; int syr2k_needs_copya = FALSE; int syr2k_needs_copyb = FALSE; // Return early if possible. if ( bli_zero_dim2( m, k ) ) return; // If necessary, allocate, initialize, and use a temporary contiguous // copy of each matrix rather than the original matrices. bli_ccreate_contigmt( trans, m, k, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_ccreate_contigmt( trans, m, k, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_ccreate_contigmr( uplo, m, m, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); // Initialize with values assuming column-major storage. lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; ldc = c_cs; incc = c_rs; // Adjust the parameters based on the storage of each matrix. if ( bli_is_col_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c' // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c' } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_c ) += A_c * B_r' + B_r * A_c' // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c' syr2k_needs_copyb = TRUE; } } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_c ) += A_r * B_c' + B_c * A_r' // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c' syr2k_needs_copya = TRUE; } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_c ) += A_r * B_r' + B_r * A_r' // requested operation: uplo( C_c ) += conj( A_c' * B_c + B_c' * A_c ) bli_swap_ints( lda, inca ); bli_swap_ints( ldb, incb ); bli_toggle_trans( trans ); } } } else // if ( bli_is_row_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_r ) += A_c * B_c' + B_c * A_c' // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' ) bli_swap_ints( ldc, incc ); bli_toggle_uplo( uplo ); } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_r ) += A_c * B_r' + B_r * A_c' // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' ) syr2k_needs_copyb = TRUE; bli_swap_ints( ldc, incc ); bli_toggle_uplo( uplo ); } } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_r ) += A_r * B_c' + B_c * A_r' // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' ) syr2k_needs_copya = TRUE; bli_swap_ints( ldc, incc ); bli_toggle_uplo( uplo ); } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_r ) += A_r * B_r' + B_r * A_r' // requested operation: ~uplo( C_c ) += A_c' * B_c + B_c' * A_c bli_swap_ints( ldc, incc ); bli_swap_ints( lda, inca ); bli_swap_ints( ldb, incb ); bli_toggle_uplo( uplo ); bli_toggle_trans( trans ); } } } a_copy = a; lda_copy = lda; inca_copy = inca; // There are two cases where we need to copy A column-major storage. // We handle those two cases here. if ( syr2k_needs_copya ) { int m_a; int n_a; // Determine the dimensions of A according to the value of trans. We // need this in order to set the leading dimension of the copy of A. if ( bli_does_trans( trans ) ) { m_a = k; n_a = m; } else { m_a = m; n_a = k; } // We need a temporary matrix to hold a column-major copy of A. a_copy = bli_callocm( m, k ); lda_copy = m_a; inca_copy = 1; // Copy the contents of A into A_copy. bli_ccopymt( BLIS_NO_TRANSPOSE, m_a, n_a, a, inca, lda, a_copy, inca_copy, lda_copy ); } b_copy = b; ldb_copy = ldb; incb_copy = incb; // There are two cases where we need to copy B column-major storage. // We handle those two cases here. if ( syr2k_needs_copyb ) { int m_b; int n_b; // Determine the dimensions of B according to the value of trans. We // need this in order to set the leading dimension of the copy of B. if ( bli_does_trans( trans ) ) { m_b = k; n_b = m; } else { m_b = m; n_b = k; } // We need a temporary matrix to hold a column-major copy of B. b_copy = bli_callocm( m, k ); ldb_copy = m_b; incb_copy = 1; // Copy the contents of B into B_copy. bli_ccopymt( BLIS_NO_TRANSPOSE, m_b, n_b, b, incb, ldb, b_copy, incb_copy, ldb_copy ); } bli_csyr2k_blas( uplo, trans, m, k, alpha, a_copy, lda_copy, b_copy, ldb_copy, beta, c, ldc ); if ( syr2k_needs_copya ) bli_cfree( a_copy ); if ( syr2k_needs_copyb ) bli_cfree( b_copy ); // Free any temporary contiguous matrices, copying the result back to // the original matrix. bli_cfree_contigm( a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_cfree_contigm( b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_cfree_saved_contigmr( uplo_save, m_save, m_save, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); }
void bli_csyr2k_blas | ( | char | uplo, |
char | trans, | ||
int | m, | ||
int | k, | ||
scomplex * | alpha, | ||
scomplex * | a, | ||
int | lda, | ||
scomplex * | b, | ||
int | ldb, | ||
scomplex * | beta, | ||
scomplex * | c, | ||
int | ldc | ||
) |
References bli_is_conjtrans(), bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), cblas_csyr2k(), CblasColMajor, and F77_csyr2k().
Referenced by bli_csyr2k().
{ // BLAS doesn't recognize the conjugate-transposition constant for syr2k, // so we have to map it down to regular transposition. if ( bli_is_conjtrans( trans ) ) trans = BLIS_TRANSPOSE; #ifdef BLIS_ENABLE_CBLAS_INTERFACES enum CBLAS_ORDER cblas_order = CblasColMajor; enum CBLAS_UPLO cblas_uplo; enum CBLAS_TRANSPOSE cblas_trans; bli_param_map_to_netlib_uplo( uplo, &cblas_uplo ); bli_param_map_to_netlib_trans( trans, &cblas_trans ); cblas_csyr2k( cblas_order, cblas_uplo, cblas_trans, m, k, alpha, a, lda, b, ldb, beta, c, ldc ); #else char blas_uplo; char blas_trans; bli_param_map_to_netlib_uplo( uplo, &blas_uplo ); bli_param_map_to_netlib_trans( trans, &blas_trans ); F77_csyr2k( &blas_uplo, &blas_trans, &m, &k, alpha, a, &lda, b, &ldb, beta, c, &ldc ); #endif }
void bli_csyrk | ( | char | uplo, |
char | trans, | ||
int | m, | ||
int | k, | ||
scomplex * | alpha, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | beta, | ||
scomplex * | c, | ||
int | c_rs, | ||
int | c_cs | ||
) |
References bli_ccreate_contigmr(), bli_ccreate_contigmt(), bli_cfree_contigm(), bli_cfree_saved_contigmr(), bli_csyrk_blas(), bli_is_col_storage(), and bli_zero_dim2().
Referenced by FLA_Syrk_external().
{ char uplo_save = uplo; int m_save = m; scomplex* a_save = a; scomplex* c_save = c; int a_rs_save = a_rs; int a_cs_save = a_cs; int c_rs_save = c_rs; int c_cs_save = c_cs; int lda, inca; int ldc, incc; // Return early if possible. if ( bli_zero_dim2( m, k ) ) return; // If necessary, allocate, initialize, and use a temporary contiguous // copy of each matrix rather than the original matrices. bli_ccreate_contigmt( trans, m, k, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_ccreate_contigmr( uplo, m, m, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); // Initialize with values assuming column-major storage. lda = a_cs; inca = a_rs; ldc = c_cs; incc = c_rs; // Adjust the parameters based on the storage of each matrix. if ( bli_is_col_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { // requested operation: uplo( C_c ) += A_c * A_c^T // effective operation: uplo( C_c ) += A_c * A_c^T } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { // requested operation: uplo( C_c ) += A_r * A_r^T // effective operation: uplo( C_c ) += A_c^T * A_c bli_swap_ints( lda, inca ); bli_toggle_trans( trans ); } } else // if ( bli_is_row_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { // requested operation: uplo( C_r ) += A_c * A_c^T // effective operation: ~uplo( C_c ) += A_c * A_c^T bli_swap_ints( ldc, incc ); bli_toggle_uplo( uplo ); } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { // requested operation: uplo( C_r ) += A_r * A_r^T // effective operation: ~uplo( C_c ) += A_c^T * A_c bli_swap_ints( ldc, incc ); bli_swap_ints( lda, inca ); bli_toggle_uplo( uplo ); bli_toggle_trans( trans ); } } bli_csyrk_blas( uplo, trans, m, k, alpha, a, lda, beta, c, ldc ); // Free any temporary contiguous matrices, copying the result back to // the original matrix. bli_cfree_contigm( a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_cfree_saved_contigmr( uplo_save, m_save, m_save, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); }
void bli_csyrk_blas | ( | char | uplo, |
char | trans, | ||
int | m, | ||
int | k, | ||
scomplex * | alpha, | ||
scomplex * | a, | ||
int | lda, | ||
scomplex * | beta, | ||
scomplex * | c, | ||
int | ldc | ||
) |
References bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), cblas_csyrk(), CblasColMajor, and F77_csyrk().
Referenced by bli_csyrk().
{ #ifdef BLIS_ENABLE_CBLAS_INTERFACES enum CBLAS_ORDER cblas_order = CblasColMajor; enum CBLAS_UPLO cblas_uplo; enum CBLAS_TRANSPOSE cblas_trans; bli_param_map_to_netlib_uplo( uplo, &cblas_uplo ); bli_param_map_to_netlib_trans( trans, &cblas_trans ); cblas_csyrk( cblas_order, cblas_uplo, cblas_trans, m, k, alpha, a, lda, beta, c, ldc ); #else char blas_uplo; char blas_trans; bli_param_map_to_netlib_uplo( uplo, &blas_uplo ); bli_param_map_to_netlib_trans( trans, &blas_trans ); F77_csyrk( &blas_uplo, &blas_trans, &m, &k, alpha, a, &lda, beta, c, &ldc ); #endif }
void bli_ctrmm | ( | char | side, |
char | uplo, | ||
char | trans, | ||
char | diag, | ||
int | m, | ||
int | n, | ||
scomplex * | alpha, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bli_callocm(), bli_cconjmr(), bli_ccopymrt(), bli_ccreate_contigm(), bli_ccreate_contigmr(), bli_cfree(), bli_cfree_contigm(), bli_cfree_saved_contigm(), bli_ctrmm_blas(), bli_is_col_storage(), bli_is_conjnotrans(), bli_is_left(), bli_set_dim_with_side(), and bli_zero_dim2().
Referenced by bli_ctrmmsx(), and FLA_Trmm_external().
{ int m_save = m; int n_save = n; scomplex* a_save = a; scomplex* b_save = b; int a_rs_save = a_rs; int a_cs_save = a_cs; int b_rs_save = b_rs; int b_cs_save = b_cs; scomplex* a_conj; int dim_a; int lda, inca; int ldb, incb; int lda_conj, inca_conj; int a_was_copied; // Return early if possible. if ( bli_zero_dim2( m, n ) ) return; // If necessary, allocate, initialize, and use a temporary contiguous // copy of each matrix rather than the original matrices. bli_set_dim_with_side( side, m, n, &dim_a ); bli_ccreate_contigmr( uplo, dim_a, dim_a, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_ccreate_contigm( m, n, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); // Figure out whether A was copied to contiguous memory. This is used to // prevent redundant copying. a_was_copied = ( a != a_save ); // Initialize with values assuming column-major storage. lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Adjust the parameters based on the storage of each matrix. if ( bli_is_col_storage( b_rs, b_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { // requested operation: B_c := tr( uplo( A_c ) ) * B_c // effective operation: B_c := tr( uplo( A_c ) ) * B_c } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { // requested operation: B_c := tr( uplo( A_r ) ) * B_c // effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c bli_swap_ints( lda, inca ); bli_toggle_uplo( uplo ); bli_toggle_trans( trans ); } } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { // requested operation: B_r := tr( uplo( A_c ) ) * B_r // effective operation: B_c := B_c * tr( uplo( A_c ) )^T bli_swap_ints( ldb, incb ); bli_swap_ints( m, n ); bli_toggle_side( side ); bli_toggle_trans( trans ); } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { // requested operation: B_r := tr( uplo( A_r ) ) * B_r // effective operation: B_c := B_c * tr( ~uplo( A_c ) ) bli_swap_ints( ldb, incb ); bli_swap_ints( lda, inca ); bli_swap_ints( m, n ); bli_toggle_uplo( uplo ); bli_toggle_side( side ); } } // Initialize with values assuming that trans is not conjnotrans. a_conj = a; lda_conj = lda; inca_conj = inca; // We want to handle the conjnotrans case. The easiest way to do so is // by making a conjugated copy of A. if ( bli_is_conjnotrans( trans ) && !a_was_copied ) { int dim_a; if ( bli_is_left( side ) ) dim_a = m; else dim_a = n; a_conj = bli_callocm( dim_a, dim_a ); lda_conj = dim_a; inca_conj = 1; bli_ccopymrt( uplo, BLIS_CONJ_NO_TRANSPOSE, dim_a, dim_a, a, inca, lda, a_conj, inca_conj, lda_conj ); } else if ( bli_is_conjnotrans( trans ) && a_was_copied ) { int dim_a; if ( bli_is_left( side ) ) dim_a = m; else dim_a = n; bli_cconjmr( uplo, dim_a, dim_a, a_conj, inca_conj, lda_conj ); } bli_ctrmm_blas( side, uplo, trans, diag, m, n, alpha, a_conj, lda_conj, b, ldb ); if ( bli_is_conjnotrans( trans ) && !a_was_copied ) bli_cfree( a_conj ); // Free any temporary contiguous matrices, copying the result back to // the original matrix. bli_cfree_contigm( a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_cfree_saved_contigm( m_save, n_save, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); }
void bli_ctrmm_blas | ( | char | side, |
char | uplo, | ||
char | trans, | ||
char | diag, | ||
int | m, | ||
int | n, | ||
scomplex * | alpha, | ||
scomplex * | a, | ||
int | lda, | ||
scomplex * | b, | ||
int | ldb | ||
) |
References bli_param_map_to_netlib_diag(), bli_param_map_to_netlib_side(), bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), cblas_ctrmm(), CblasColMajor, and F77_ctrmm().
Referenced by bli_ctrmm().
{ #ifdef BLIS_ENABLE_CBLAS_INTERFACES enum CBLAS_ORDER cblas_order = CblasColMajor; enum CBLAS_SIDE cblas_side; enum CBLAS_UPLO cblas_uplo; enum CBLAS_TRANSPOSE cblas_trans; enum CBLAS_DIAG cblas_diag; bli_param_map_to_netlib_side( side, &cblas_side ); bli_param_map_to_netlib_uplo( uplo, &cblas_uplo ); bli_param_map_to_netlib_trans( trans, &cblas_trans ); bli_param_map_to_netlib_diag( diag, &cblas_diag ); cblas_ctrmm( cblas_order, cblas_side, cblas_uplo, cblas_trans, cblas_diag, m, n, alpha, a, lda, b, ldb ); #else char blas_side; char blas_uplo; char blas_trans; char blas_diag; bli_param_map_to_netlib_side( side, &blas_side ); bli_param_map_to_netlib_uplo( uplo, &blas_uplo ); bli_param_map_to_netlib_trans( trans, &blas_trans ); bli_param_map_to_netlib_diag( diag, &blas_diag ); F77_ctrmm( &blas_side, &blas_uplo, &blas_trans, &blas_diag, &m, &n, alpha, a, &lda, b, &ldb ); #endif }
void bli_ctrmmsx | ( | char | side, |
char | uplo, | ||
char | trans, | ||
char | diag, | ||
int | m, | ||
int | n, | ||
scomplex * | alpha, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | b, | ||
int | b_rs, | ||
int | b_cs, | ||
scomplex * | beta, | ||
scomplex * | c, | ||
int | c_rs, | ||
int | c_cs | ||
) |
References bli_c1(), bli_callocm(), bli_caxpymt(), bli_ccopymt(), bli_ccreate_contigm(), bli_ccreate_contigmr(), bli_cfree(), bli_cfree_contigm(), bli_cfree_saved_contigm(), bli_cscalm(), bli_ctrmm(), bli_is_col_storage(), bli_set_dim_with_side(), and bli_zero_dim2().
Referenced by FLA_Trmmsx_external().
{ int m_save = m; int n_save = n; scomplex* a_save = a; scomplex* b_save = b; scomplex* c_save = c; int a_rs_save = a_rs; int a_cs_save = a_cs; int b_rs_save = b_rs; int b_cs_save = b_cs; int c_rs_save = c_rs; int c_cs_save = c_cs; scomplex one = bli_c1(); scomplex* b_copy; int dim_a; int b_copy_rs, b_copy_cs; // Return early if possible. if ( bli_zero_dim2( m, n ) ) return; // If necessary, allocate, initialize, and use a temporary contiguous // copy of each matrix rather than the original matrices. bli_set_dim_with_side( side, m, n, &dim_a ); bli_ccreate_contigmr( uplo, dim_a, dim_a, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_ccreate_contigm( m, n, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_ccreate_contigm( m, n, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); // Create a copy of B to use in the computation so the original matrix is // left untouched. b_copy = bli_callocm( m, n ); // Match the strides of B_copy to that of B. if ( bli_is_col_storage( b_rs, b_cs ) ) { b_copy_rs = 1; b_copy_cs = m; } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { b_copy_rs = n; b_copy_cs = 1; } // Copy the contents of B to B_copy. bli_ccopymt( BLIS_NO_CONJUGATE, m, n, b, b_rs, b_cs, b_copy, b_copy_rs, b_copy_cs ); // Perform the operation on B_copy. bli_ctrmm( side, uplo, trans, diag, m, n, alpha, a, a_rs, a_cs, b_copy, b_copy_rs, b_copy_cs ); // Scale C by beta. bli_cscalm( BLIS_NO_CONJUGATE, m, n, beta, c, c_rs, c_cs ); // Add B_copy into C. bli_caxpymt( BLIS_NO_TRANSPOSE, m, n, &one, b_copy, b_copy_rs, b_copy_cs, c, c_rs, c_cs ); // Free the copy of B. bli_cfree( b_copy ); // Free any temporary contiguous matrices, copying the result back to // the original matrix. bli_cfree_contigm( a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_cfree_contigm( b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_cfree_saved_contigm( m_save, n_save, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); }
void bli_ctrsm | ( | char | side, |
char | uplo, | ||
char | trans, | ||
char | diag, | ||
int | m, | ||
int | n, | ||
scomplex * | alpha, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bli_callocm(), bli_cconjmr(), bli_ccopymrt(), bli_ccreate_contigm(), bli_ccreate_contigmr(), bli_cfree(), bli_cfree_contigm(), bli_cfree_saved_contigm(), bli_ctrsm_blas(), bli_is_col_storage(), bli_is_conjnotrans(), bli_is_left(), bli_set_dim_with_side(), and bli_zero_dim2().
Referenced by bli_ctrsmsx(), FLA_LU_nopiv_opc_var1(), FLA_LU_nopiv_opc_var2(), FLA_LU_nopiv_opc_var3(), FLA_LU_piv_opc_var3(), and FLA_Trsm_external().
{ int m_save = m; int n_save = n; scomplex* a_save = a; scomplex* b_save = b; int a_rs_save = a_rs; int a_cs_save = a_cs; int b_rs_save = b_rs; int b_cs_save = b_cs; scomplex* a_conj; int dim_a; int lda, inca; int ldb, incb; int lda_conj, inca_conj; int a_was_copied; // Return early if possible. if ( bli_zero_dim2( m, n ) ) return; // If necessary, allocate, initialize, and use a temporary contiguous // copy of each matrix rather than the original matrices. bli_set_dim_with_side( side, m, n, &dim_a ); bli_ccreate_contigmr( uplo, dim_a, dim_a, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_ccreate_contigm( m, n, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); // Figure out whether A was copied to contiguous memory. This is used to // prevent redundant copying. a_was_copied = ( a != a_save ); // Initialize with values assuming column-major storage. lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Adjust the parameters based on the storage of each matrix. if ( bli_is_col_storage( b_rs, b_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { // requested operation: B_c := tr( uplo( A_c ) ) * B_c // effective operation: B_c := tr( uplo( A_c ) ) * B_c } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { // requested operation: B_c := tr( uplo( A_r ) ) * B_c // effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c bli_swap_ints( lda, inca ); bli_toggle_uplo( uplo ); bli_toggle_trans( trans ); } } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { // requested operation: B_r := tr( uplo( A_c ) ) * B_r // effective operation: B_c := B_c * tr( uplo( A_c ) )^T bli_swap_ints( ldb, incb ); bli_swap_ints( m, n ); bli_toggle_side( side ); bli_toggle_trans( trans ); } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { // requested operation: B_r := tr( uplo( A_r ) ) * B_r // effective operation: B_c := B_c * tr( ~uplo( A_c ) ) bli_swap_ints( ldb, incb ); bli_swap_ints( lda, inca ); bli_swap_ints( m, n ); bli_toggle_uplo( uplo ); bli_toggle_side( side ); } } // Initialize with values assuming that trans is not conjnotrans. a_conj = a; lda_conj = lda; inca_conj = inca; // We want to handle the conjnotrans case. The easiest way to do so is // by making a conjugated copy of A. if ( bli_is_conjnotrans( trans ) && !a_was_copied ) { int dim_a; if ( bli_is_left( side ) ) dim_a = m; else dim_a = n; a_conj = bli_callocm( dim_a, dim_a ); lda_conj = dim_a; inca_conj = 1; bli_ccopymrt( uplo, BLIS_CONJ_NO_TRANSPOSE, dim_a, dim_a, a, inca, lda, a_conj, inca_conj, lda_conj ); } else if ( bli_is_conjnotrans( trans ) && a_was_copied ) { int dim_a; if ( bli_is_left( side ) ) dim_a = m; else dim_a = n; bli_cconjmr( uplo, dim_a, dim_a, a_conj, inca_conj, lda_conj ); } bli_ctrsm_blas( side, uplo, trans, diag, m, n, alpha, a_conj, lda_conj, b, ldb ); if ( bli_is_conjnotrans( trans ) && !a_was_copied ) bli_cfree( a_conj ); // Free any temporary contiguous matrices, copying the result back to // the original matrix. bli_cfree_contigm( a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_cfree_saved_contigm( m_save, n_save, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); }
void bli_ctrsm_blas | ( | char | side, |
char | uplo, | ||
char | trans, | ||
char | diag, | ||
int | m, | ||
int | n, | ||
scomplex * | alpha, | ||
scomplex * | a, | ||
int | lda, | ||
scomplex * | b, | ||
int | ldb | ||
) |
References bli_param_map_to_netlib_diag(), bli_param_map_to_netlib_side(), bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), cblas_ctrsm(), CblasColMajor, and F77_ctrsm().
Referenced by bli_ctrsm().
{ #ifdef BLIS_ENABLE_CBLAS_INTERFACES enum CBLAS_ORDER cblas_order = CblasColMajor; enum CBLAS_SIDE cblas_side; enum CBLAS_UPLO cblas_uplo; enum CBLAS_TRANSPOSE cblas_trans; enum CBLAS_DIAG cblas_diag; bli_param_map_to_netlib_side( side, &cblas_side ); bli_param_map_to_netlib_uplo( uplo, &cblas_uplo ); bli_param_map_to_netlib_trans( trans, &cblas_trans ); bli_param_map_to_netlib_diag( diag, &cblas_diag ); cblas_ctrsm( cblas_order, cblas_side, cblas_uplo, cblas_trans, cblas_diag, m, n, alpha, a, lda, b, ldb ); #else char blas_side; char blas_uplo; char blas_trans; char blas_diag; bli_param_map_to_netlib_side( side, &blas_side ); bli_param_map_to_netlib_uplo( uplo, &blas_uplo ); bli_param_map_to_netlib_trans( trans, &blas_trans ); bli_param_map_to_netlib_diag( diag, &blas_diag ); F77_ctrsm( &blas_side, &blas_uplo, &blas_trans, &blas_diag, &m, &n, alpha, a, &lda, b, &ldb ); #endif }
void bli_ctrsmsx | ( | char | side, |
char | uplo, | ||
char | trans, | ||
char | diag, | ||
int | m, | ||
int | n, | ||
scomplex * | alpha, | ||
scomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
scomplex * | b, | ||
int | b_rs, | ||
int | b_cs, | ||
scomplex * | beta, | ||
scomplex * | c, | ||
int | c_rs, | ||
int | c_cs | ||
) |
References bli_c1(), bli_callocm(), bli_caxpymt(), bli_ccopymt(), bli_ccreate_contigm(), bli_ccreate_contigmr(), bli_cfree(), bli_cfree_contigm(), bli_cfree_saved_contigm(), bli_cscalm(), bli_ctrsm(), bli_is_col_storage(), bli_set_dim_with_side(), and bli_zero_dim2().
Referenced by FLA_Trsmsx_external().
{ int m_save = m; int n_save = n; scomplex* a_save = a; scomplex* b_save = b; scomplex* c_save = c; int a_rs_save = a_rs; int a_cs_save = a_cs; int b_rs_save = b_rs; int b_cs_save = b_cs; int c_rs_save = c_rs; int c_cs_save = c_cs; scomplex one = bli_c1(); scomplex* b_copy; int dim_a; int b_copy_rs, b_copy_cs; // Return early if possible. if ( bli_zero_dim2( m, n ) ) return; // If necessary, allocate, initialize, and use a temporary contiguous // copy of each matrix rather than the original matrices. bli_set_dim_with_side( side, m, n, &dim_a ); bli_ccreate_contigmr( uplo, dim_a, dim_a, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_ccreate_contigm( m, n, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_ccreate_contigm( m, n, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); // Create a copy of B to use in the computation so the original matrix is // left untouched. b_copy = bli_callocm( m, n ); // Match the strides of B_copy to that of B. if ( bli_is_col_storage( b_rs, b_cs ) ) { b_copy_rs = 1; b_copy_cs = m; } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { b_copy_rs = n; b_copy_cs = 1; } // Copy the contents of B to B_copy. bli_ccopymt( BLIS_NO_CONJUGATE, m, n, b, b_rs, b_cs, b_copy, b_copy_rs, b_copy_cs ); // Perform the operation on B_copy. bli_ctrsm( side, uplo, trans, diag, m, n, alpha, a, a_rs, a_cs, b_copy, b_copy_rs, b_copy_cs ); // Scale C by beta. bli_cscalm( BLIS_NO_CONJUGATE, m, n, beta, c, c_rs, c_cs ); // Add B_copy into C. bli_caxpymt( BLIS_NO_TRANSPOSE, m, n, &one, b_copy, b_copy_rs, b_copy_cs, c, c_rs, c_cs ); // Free the copy of B. bli_cfree( b_copy ); // Free any temporary contiguous matrices, copying the result back to // the original matrix. bli_cfree_contigm( a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_cfree_contigm( b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_cfree_saved_contigm( m_save, n_save, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); }
void bli_dgemm | ( | char | transa, |
char | transb, | ||
int | m, | ||
int | k, | ||
int | n, | ||
double * | alpha, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | b, | ||
int | b_rs, | ||
int | b_cs, | ||
double * | beta, | ||
double * | c, | ||
int | c_rs, | ||
int | c_cs | ||
) |
References bli_d0(), bli_d1(), bli_dallocm(), bli_daxpymt(), bli_dcreate_contigm(), bli_dcreate_contigmt(), bli_dfree(), bli_dfree_contigm(), bli_dfree_saved_contigm(), bli_dgemm_blas(), bli_dscalm(), bli_is_col_storage(), and bli_zero_dim3().
Referenced by FLA_Gemm_external().
{ int m_save = m; int n_save = n; double* a_save = a; double* b_save = b; double* c_save = c; int a_rs_save = a_rs; int a_cs_save = a_cs; int b_rs_save = b_rs; int b_cs_save = b_cs; int c_rs_save = c_rs; int c_cs_save = c_cs; double zero = bli_d0(); double one = bli_d1(); double* a_unswap; double* b_unswap; double* c_trans; int lda, inca; int ldb, incb; int ldc, incc; int ldc_trans, incc_trans; int m_gemm, n_gemm; int gemm_needs_axpyt = FALSE; // Return early if possible. if ( bli_zero_dim3( m, k, n ) ) { bli_dscalm( BLIS_NO_CONJUGATE, m, n, beta, c, c_rs, c_cs ); return; } // If necessary, allocate, initialize, and use a temporary contiguous // copy of each matrix rather than the original matrices. bli_dcreate_contigmt( transa, m, k, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_dcreate_contigmt( transb, k, n, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_dcreate_contigm( m, n, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); // These are used to track the original values of a and b prior to any // operand swapping that might take place. This is necessary for proper // freeing of memory when one is a temporary contiguous matrix. a_unswap = a; b_unswap = b; // These are used to track the dimensions of the product of the // A and B operands to the BLAS invocation of gemm. These differ // from m and n when the operands need to be swapped. m_gemm = m; n_gemm = n; // Initialize with values assuming column-major storage. lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; ldc = c_cs; incc = c_rs; // Adjust the parameters based on the storage of each matrix. if ( bli_is_col_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_c += tr( A_c ) * tr( B_c ) // effective operation: C_c += tr( A_c ) * tr( B_c ) } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_c += tr( A_c ) * tr( B_r ) // effective operation: C_c += tr( A_c ) * tr( B_c )^T bli_swap_ints( ldb, incb ); bli_toggle_trans( transb ); } } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_c += tr( A_r ) * tr( B_c ) // effective operation: C_c += tr( A_r )^T * tr( B_c ) bli_swap_ints( lda, inca ); bli_toggle_trans( transa ); } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_c += tr( A_r ) * tr( B_r ) // effective operation: C_c += ( tr( B_c ) * tr( A_c ) )^T bli_swap_ints( lda, inca ); bli_swap_ints( ldb, incb ); bli_dswap_pointers( a, b ); bli_swap_ints( lda, ldb ); bli_swap_ints( inca, incb ); bli_swap_chars( transa, transb ); gemm_needs_axpyt = TRUE; bli_swap_ints( m_gemm, n_gemm ); } } } else // if ( bli_is_row_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_r += tr( A_c ) * tr( B_c ) // effective operation: C_c += ( tr( A_c ) * tr( B_c ) )^T bli_swap_ints( ldc, incc ); bli_swap_ints( m, n ); gemm_needs_axpyt = TRUE; } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_r += tr( A_c ) * tr( B_r ) // effective operation: C_c += tr( B_c ) * tr( A_c )^T bli_swap_ints( ldc, incc ); bli_swap_ints( ldb, incb ); bli_toggle_trans( transa ); bli_swap_ints( m, n ); bli_swap_ints( m_gemm, n_gemm ); bli_dswap_pointers( a, b ); bli_swap_ints( lda, ldb ); bli_swap_ints( inca, incb ); bli_swap_chars( transa, transb ); } } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_r += tr( A_r ) * tr( B_c ) // effective operation: C_c += tr( B_c )^T * tr( A_c ) bli_swap_ints( ldc, incc ); bli_swap_ints( lda, inca ); bli_toggle_trans( transb ); bli_swap_ints( m, n ); bli_swap_ints( m_gemm, n_gemm ); bli_dswap_pointers( a, b ); bli_swap_ints( lda, ldb ); bli_swap_ints( inca, incb ); bli_swap_chars( transa, transb ); } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_r += tr( A_r ) * tr( B_r ) // effective operation: C_c += tr( B_c ) * tr( A_c ) bli_swap_ints( lda, inca ); bli_swap_ints( ldb, incb ); bli_swap_ints( ldc, incc ); bli_swap_ints( m, n ); bli_swap_ints( m_gemm, n_gemm ); bli_dswap_pointers( a, b ); bli_swap_ints( lda, ldb ); bli_swap_ints( inca, incb ); bli_swap_chars( transa, transb ); } } } // There are two cases where we need to perform the gemm and then axpy // the result into C with a transposition. We handle those cases here. if ( gemm_needs_axpyt ) { // We need a temporary matrix for holding C^T. Notice that m and n // represent the dimensions of C, while m_gemm and n_gemm are the // dimensions of the actual product op(A)*op(B), which may be n-by-m // since the operands may have been swapped. c_trans = bli_dallocm( m_gemm, n_gemm ); ldc_trans = m_gemm; incc_trans = 1; // Compute tr( A ) * tr( B ), where A and B may have been swapped // to reference the other, and store the result in C_trans. bli_dgemm_blas( transa, transb, m_gemm, n_gemm, k, alpha, a, lda, b, ldb, &zero, c_trans, ldc_trans ); // Scale C by beta. bli_dscalm( BLIS_NO_CONJUGATE, m, n, beta, c, incc, ldc ); // And finally, accumulate the matrix product in C_trans into C // with a transpose. bli_daxpymt( BLIS_TRANSPOSE, m, n, &one, c_trans, incc_trans, ldc_trans, c, incc, ldc ); // Free the temporary matrix for C. bli_dfree( c_trans ); } else // no extra axpyt step needed { bli_dgemm_blas( transa, transb, m_gemm, n_gemm, k, alpha, a, lda, b, ldb, beta, c, ldc ); } // Free any temporary contiguous matrices, copying the result back to // the original matrix. bli_dfree_contigm( a_save, a_rs_save, a_cs_save, &a_unswap, &a_rs, &a_cs ); bli_dfree_contigm( b_save, b_rs_save, b_cs_save, &b_unswap, &b_rs, &b_cs ); bli_dfree_saved_contigm( m_save, n_save, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); }
void bli_dgemm_blas | ( | char | transa, |
char | transb, | ||
int | m, | ||
int | n, | ||
int | k, | ||
double * | alpha, | ||
double * | a, | ||
int | lda, | ||
double * | b, | ||
int | ldb, | ||
double * | beta, | ||
double * | c, | ||
int | ldc | ||
) |
References bli_param_map_to_netlib_trans(), cblas_dgemm(), CblasColMajor, and F77_dgemm().
Referenced by bli_dgemm().
{ #ifdef BLIS_ENABLE_CBLAS_INTERFACES enum CBLAS_ORDER cblas_order = CblasColMajor; enum CBLAS_TRANSPOSE cblas_transa; enum CBLAS_TRANSPOSE cblas_transb; bli_param_map_to_netlib_trans( transa, &cblas_transa ); bli_param_map_to_netlib_trans( transb, &cblas_transb ); cblas_dgemm( cblas_order, cblas_transa, cblas_transb, m, n, k, *alpha, a, lda, b, ldb, *beta, c, ldc ); #else char blas_transa; char blas_transb; bli_param_map_to_netlib_trans( transa, &blas_transa ); bli_param_map_to_netlib_trans( transb, &blas_transb ); F77_dgemm( &blas_transa, &blas_transb, &m, &n, &k, alpha, a, &lda, b, &ldb, beta, c, &ldc ); #endif }
void bli_dhemm | ( | char | side, |
char | uplo, | ||
int | m, | ||
int | n, | ||
double * | alpha, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | b, | ||
int | b_rs, | ||
int | b_cs, | ||
double * | beta, | ||
double * | c, | ||
int | c_rs, | ||
int | c_cs | ||
) |
References bli_dsymm().
{ bli_dsymm( side, uplo, m, n, alpha, a, a_rs, a_cs, b, b_rs, b_cs, beta, c, c_rs, c_cs ); }
void bli_dher2k | ( | char | uplo, |
char | trans, | ||
int | m, | ||
int | k, | ||
double * | alpha, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | b, | ||
int | b_rs, | ||
int | b_cs, | ||
double * | beta, | ||
double * | c, | ||
int | c_rs, | ||
int | c_cs | ||
) |
References bli_dsyr2k().
{ bli_dsyr2k( uplo, trans, m, k, alpha, a, a_rs, a_cs, b, b_rs, b_cs, beta, c, c_rs, c_cs ); }
void bli_dherk | ( | char | uplo, |
char | trans, | ||
int | m, | ||
int | k, | ||
double * | alpha, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | beta, | ||
double * | c, | ||
int | c_rs, | ||
int | c_cs | ||
) |
References bli_dsyrk().
{ bli_dsyrk( uplo, trans, m, k, alpha, a, a_rs, a_cs, beta, c, c_rs, c_cs ); }
void bli_dsymm | ( | char | side, |
char | uplo, | ||
int | m, | ||
int | n, | ||
double * | alpha, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | b, | ||
int | b_rs, | ||
int | b_cs, | ||
double * | beta, | ||
double * | c, | ||
int | c_rs, | ||
int | c_cs | ||
) |
References bli_d0(), bli_d1(), bli_dallocm(), bli_daxpymt(), bli_dcopymt(), bli_dcreate_contigm(), bli_dcreate_contigmr(), bli_dfree(), bli_dfree_contigm(), bli_dfree_saved_contigm(), bli_dscalm(), bli_dsymm_blas(), bli_is_col_storage(), bli_set_dim_with_side(), and bli_zero_dim2().
Referenced by bli_dhemm(), FLA_Hemm_external(), and FLA_Symm_external().
{ int m_save = m; int n_save = n; double* a_save = a; double* b_save = b; double* c_save = c; int a_rs_save = a_rs; int a_cs_save = a_cs; int b_rs_save = b_rs; int b_cs_save = b_cs; int c_rs_save = c_rs; int c_cs_save = c_cs; double zero = bli_d0(); double one = bli_d1(); double* b_copy; double* c_trans; int dim_a; int lda, inca; int ldb, incb; int ldc, incc; int ldb_copy, incb_copy; int ldc_trans, incc_trans; int symm_needs_copyb = FALSE; int symm_needs_transb = FALSE; int symm_needs_axpyt = FALSE; // Return early if possible. if ( bli_zero_dim2( m, n ) ) return; // If necessary, allocate, initialize, and use a temporary contiguous // copy of each matrix rather than the original matrices. bli_set_dim_with_side( side, m, n, &dim_a ); bli_dcreate_contigmr( uplo, dim_a, dim_a, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_dcreate_contigm( m, n, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_dcreate_contigm( m, n, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); // Initialize with values assuming column-major storage. lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; ldc = c_cs; incc = c_rs; // Adjust the parameters based on the storage of each matrix. if ( bli_is_col_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_c += uplo( A_c ) * B_c // effective operation: C_c += uplo( A_c ) * B_c } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_c += uplo( A_c ) * B_r // effective operation: C_c += uplo( A_c ) * B_c symm_needs_copyb = TRUE; } } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_c += uplo( A_r ) * B_c // effective operation: C_c += ~uplo( conj( A_c ) ) * B_c bli_swap_ints( lda, inca ); bli_toggle_uplo( uplo ); } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_c += uplo( A_r ) * B_r // effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T bli_swap_ints( lda, inca ); bli_swap_ints( ldb, incb ); bli_toggle_side( side ); bli_toggle_uplo( uplo ); symm_needs_axpyt = TRUE; } } } else // if ( bli_is_row_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_r += uplo( A_c ) * B_c // effective operation: C_c += ( uplo( A_c ) * B_c )^T bli_swap_ints( ldc, incc ); bli_swap_ints( m, n ); symm_needs_axpyt = TRUE; } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_r += uplo( A_c ) * B_r // effective operation: C_c += B_c * ~uplo( conj( A_c ) ) bli_swap_ints( ldc, incc ); bli_swap_ints( ldb, incb ); bli_swap_ints( m, n ); bli_toggle_side( side ); } } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_r += uplo( A_r ) * B_c // effective operation: C_c += B_c^T * ~uplo( A_c ) bli_swap_ints( ldc, incc ); bli_swap_ints( lda, inca ); bli_swap_ints( m, n ); bli_toggle_side( side ); bli_toggle_uplo( uplo ); symm_needs_copyb = TRUE; symm_needs_transb = TRUE; } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_r += uplo( A_r ) * B_r // effective operation: C_c += B_c * conj( ~uplo( A_c ) ) bli_swap_ints( ldc, incc ); bli_swap_ints( lda, inca ); bli_swap_ints( ldb, incb ); bli_swap_ints( m, n ); bli_toggle_uplo( uplo ); bli_toggle_side( side ); } } } // We need a temporary matrix for the cases where B needs to be copied. b_copy = b; ldb_copy = ldb; incb_copy = incb; // There are two cases where we need to make a copy of B: one where the // copy's dimensions are transposed from the original B, and one where // the dimensions are not swapped. if ( symm_needs_copyb ) { char transb; // Set transb, which determines whether or not we need to copy from B // as if it needs a transposition. If a transposition is needed, then // m and n and have already been swapped. So in either case m // represents the leading dimension of the copy. if ( symm_needs_transb ) transb = BLIS_TRANSPOSE; else transb = BLIS_NO_TRANSPOSE; b_copy = bli_dallocm( m, n ); ldb_copy = m; incb_copy = 1; bli_dcopymt( transb, m, n, b, incb, ldb, b_copy, incb_copy, ldb_copy ); } // There are two cases where we need to perform the symm and then axpy // the result into C with a transposition. We handle those cases here. if ( symm_needs_axpyt ) { // We need a temporary matrix for holding C^T. Notice that m and n // represent the dimensions of C, and thus C_trans is n-by-m // (interpreting both as column-major matrices). So the leading // dimension of the temporary matrix holding C^T is n. c_trans = bli_dallocm( n, m ); ldc_trans = n; incc_trans = 1; // Compute A * B (or B * A) and store the result in C_trans. // Note that there is no overlap between the axpyt cases and // the conja/copyb cases, hence the use of a, b, lda, and ldb. bli_dsymm_blas( side, uplo, n, m, alpha, a, lda, b, ldb, &zero, c_trans, ldc_trans ); // Scale C by beta. bli_dscalm( BLIS_NO_CONJUGATE, m, n, beta, c, incc, ldc ); // And finally, accumulate the matrix product in C_trans into C // with a transpose. bli_daxpymt( BLIS_TRANSPOSE, m, n, &one, c_trans, incc_trans, ldc_trans, c, incc, ldc ); // Free the temporary matrix for C. bli_dfree( c_trans ); } else // no extra axpyt step needed { bli_dsymm_blas( side, uplo, m, n, alpha, a, lda, b_copy, ldb_copy, beta, c, ldc ); } if ( symm_needs_copyb ) bli_dfree( b_copy ); // Free any temporary contiguous matrices, copying the result back to // the original matrix. bli_dfree_contigm( a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_dfree_contigm( b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_dfree_saved_contigm( m_save, n_save, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); }
void bli_dsymm_blas | ( | char | side, |
char | uplo, | ||
int | m, | ||
int | n, | ||
double * | alpha, | ||
double * | a, | ||
int | lda, | ||
double * | b, | ||
int | ldb, | ||
double * | beta, | ||
double * | c, | ||
int | ldc | ||
) |
References bli_param_map_to_netlib_side(), bli_param_map_to_netlib_uplo(), cblas_dsymm(), CblasColMajor, and F77_dsymm().
Referenced by bli_dsymm().
{ #ifdef BLIS_ENABLE_CBLAS_INTERFACES enum CBLAS_ORDER cblas_order = CblasColMajor; enum CBLAS_SIDE cblas_side; enum CBLAS_UPLO cblas_uplo; bli_param_map_to_netlib_side( side, &cblas_side ); bli_param_map_to_netlib_uplo( uplo, &cblas_uplo ); cblas_dsymm( cblas_order, cblas_side, cblas_uplo, m, n, *alpha, a, lda, b, ldb, *beta, c, ldc ); #else char blas_side; char blas_uplo; bli_param_map_to_netlib_side( side, &blas_side ); bli_param_map_to_netlib_uplo( uplo, &blas_uplo ); F77_dsymm( &blas_side, &blas_uplo, &m, &n, alpha, a, &lda, b, &ldb, beta, c, &ldc ); #endif }
void bli_dsyr2k | ( | char | uplo, |
char | trans, | ||
int | m, | ||
int | k, | ||
double * | alpha, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | b, | ||
int | b_rs, | ||
int | b_cs, | ||
double * | beta, | ||
double * | c, | ||
int | c_rs, | ||
int | c_cs | ||
) |
References bli_dallocm(), bli_dcopymt(), bli_dcreate_contigmr(), bli_dcreate_contigmt(), bli_dfree(), bli_dfree_contigm(), bli_dfree_saved_contigmr(), bli_does_trans(), bli_dsyr2k_blas(), bli_is_col_storage(), and bli_zero_dim2().
Referenced by bli_dher2k(), FLA_Her2k_external(), and FLA_Syr2k_external().
{ char uplo_save = uplo; int m_save = m; double* a_save = a; double* b_save = b; double* c_save = c; int a_rs_save = a_rs; int a_cs_save = a_cs; int b_rs_save = b_rs; int b_cs_save = b_cs; int c_rs_save = c_rs; int c_cs_save = c_cs; double* a_copy; double* b_copy; int lda, inca; int ldb, incb; int ldc, incc; int lda_copy, inca_copy; int ldb_copy, incb_copy; int syr2k_needs_copya = FALSE; int syr2k_needs_copyb = FALSE; // Return early if possible. if ( bli_zero_dim2( m, k ) ) return; // If necessary, allocate, initialize, and use a temporary contiguous // copy of each matrix rather than the original matrices. bli_dcreate_contigmt( trans, m, k, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_dcreate_contigmt( trans, m, k, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_dcreate_contigmr( uplo, m, m, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); // Initialize with values assuming column-major storage. lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; ldc = c_cs; incc = c_rs; // Adjust the parameters based on the storage of each matrix. if ( bli_is_col_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c' // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c' } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_c ) += A_c * B_r' + B_r * A_c' // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c' syr2k_needs_copyb = TRUE; } } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_c ) += A_r * B_c' + B_c * A_r' // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c' syr2k_needs_copya = TRUE; } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_c ) += A_r * B_r' + B_r * A_r' // requested operation: uplo( C_c ) += conj( A_c' * B_c + B_c' * A_c ) bli_swap_ints( lda, inca ); bli_swap_ints( ldb, incb ); bli_toggle_trans( trans ); } } } else // if ( bli_is_row_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_r ) += A_c * B_c' + B_c * A_c' // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' ) bli_swap_ints( ldc, incc ); bli_toggle_uplo( uplo ); } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_r ) += A_c * B_r' + B_r * A_c' // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' ) syr2k_needs_copyb = TRUE; bli_swap_ints( ldc, incc ); bli_toggle_uplo( uplo ); } } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_r ) += A_r * B_c' + B_c * A_r' // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' ) syr2k_needs_copya = TRUE; bli_swap_ints( ldc, incc ); bli_toggle_uplo( uplo ); } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_r ) += A_r * B_r' + B_r * A_r' // requested operation: ~uplo( C_c ) += A_c' * B_c + B_c' * A_c bli_swap_ints( ldc, incc ); bli_swap_ints( lda, inca ); bli_swap_ints( ldb, incb ); bli_toggle_uplo( uplo ); bli_toggle_trans( trans ); } } } a_copy = a; lda_copy = lda; inca_copy = inca; // There are two cases where we need to copy A column-major storage. // We handle those two cases here. if ( syr2k_needs_copya ) { int m_a; int n_a; // Determine the dimensions of A according to the value of trans. We // need this in order to set the leading dimension of the copy of A. if ( bli_does_trans( trans ) ) { m_a = k; n_a = m; } else { m_a = m; n_a = k; } // We need a temporary matrix to hold a column-major copy of A. a_copy = bli_dallocm( m, k ); lda_copy = m_a; inca_copy = 1; // Copy the contents of A into A_copy. bli_dcopymt( BLIS_NO_TRANSPOSE, m_a, n_a, a, inca, lda, a_copy, inca_copy, lda_copy ); } b_copy = b; ldb_copy = ldb; incb_copy = incb; // There are two cases where we need to copy B column-major storage. // We handle those two cases here. if ( syr2k_needs_copyb ) { int m_b; int n_b; // Determine the dimensions of B according to the value of trans. We // need this in order to set the leading dimension of the copy of B. if ( bli_does_trans( trans ) ) { m_b = k; n_b = m; } else { m_b = m; n_b = k; } // We need a temporary matrix to hold a column-major copy of B. b_copy = bli_dallocm( m, k ); ldb_copy = m_b; incb_copy = 1; // Copy the contents of B into B_copy. bli_dcopymt( BLIS_NO_TRANSPOSE, m_b, n_b, b, incb, ldb, b_copy, incb_copy, ldb_copy ); } bli_dsyr2k_blas( uplo, trans, m, k, alpha, a_copy, lda_copy, b_copy, ldb_copy, beta, c, ldc ); if ( syr2k_needs_copya ) bli_dfree( a_copy ); if ( syr2k_needs_copyb ) bli_dfree( b_copy ); // Free any temporary contiguous matrices, copying the result back to // the original matrix. bli_dfree_contigm( a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_dfree_contigm( b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_dfree_saved_contigmr( uplo_save, m_save, m_save, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); }
void bli_dsyr2k_blas | ( | char | uplo, |
char | trans, | ||
int | m, | ||
int | k, | ||
double * | alpha, | ||
double * | a, | ||
int | lda, | ||
double * | b, | ||
int | ldb, | ||
double * | beta, | ||
double * | c, | ||
int | ldc | ||
) |
References bli_is_conjtrans(), bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), cblas_dsyr2k(), CblasColMajor, and F77_dsyr2k().
Referenced by bli_dsyr2k().
{ // BLAS doesn't recognize the conjugate-transposition constant for syr2k, // so we have to map it down to regular transposition. if ( bli_is_conjtrans( trans ) ) trans = BLIS_TRANSPOSE; #ifdef BLIS_ENABLE_CBLAS_INTERFACES enum CBLAS_ORDER cblas_order = CblasColMajor; enum CBLAS_UPLO cblas_uplo; enum CBLAS_TRANSPOSE cblas_trans; bli_param_map_to_netlib_uplo( uplo, &cblas_uplo ); bli_param_map_to_netlib_trans( trans, &cblas_trans ); cblas_dsyr2k( cblas_order, cblas_uplo, cblas_trans, m, k, *alpha, a, lda, b, ldb, *beta, c, ldc ); #else char blas_uplo; char blas_trans; bli_param_map_to_netlib_uplo( uplo, &blas_uplo ); bli_param_map_to_netlib_trans( trans, &blas_trans ); F77_dsyr2k( &blas_uplo, &blas_trans, &m, &k, alpha, a, &lda, b, &ldb, beta, c, &ldc ); #endif }
void bli_dsyrk | ( | char | uplo, |
char | trans, | ||
int | m, | ||
int | k, | ||
double * | alpha, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | beta, | ||
double * | c, | ||
int | c_rs, | ||
int | c_cs | ||
) |
References bli_dcreate_contigmr(), bli_dcreate_contigmt(), bli_dfree_contigm(), bli_dfree_saved_contigmr(), bli_dsyrk_blas(), bli_is_col_storage(), and bli_zero_dim2().
Referenced by bli_dherk(), FLA_Herk_external(), FLA_Syrk_external(), and FLA_UDdate_UT_opd_var1().
{ char uplo_save = uplo; int m_save = m; double* a_save = a; double* c_save = c; int a_rs_save = a_rs; int a_cs_save = a_cs; int c_rs_save = c_rs; int c_cs_save = c_cs; int lda, inca; int ldc, incc; // Return early if possible. if ( bli_zero_dim2( m, k ) ) return; // If necessary, allocate, initialize, and use a temporary contiguous // copy of each matrix rather than the original matrices. bli_dcreate_contigmt( trans, m, k, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_dcreate_contigmr( uplo, m, m, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); // Initialize with values assuming column-major storage. lda = a_cs; inca = a_rs; ldc = c_cs; incc = c_rs; // Adjust the parameters based on the storage of each matrix. if ( bli_is_col_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { // requested operation: uplo( C_c ) += A_c * A_c^T // effective operation: uplo( C_c ) += A_c * A_c^T } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { // requested operation: uplo( C_c ) += A_r * A_r^T // effective operation: uplo( C_c ) += A_c^T * A_c bli_swap_ints( lda, inca ); bli_toggle_trans( trans ); } } else // if ( bli_is_row_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { // requested operation: uplo( C_r ) += A_c * A_c^T // effective operation: ~uplo( C_c ) += A_c * A_c^T bli_swap_ints( ldc, incc ); bli_toggle_uplo( uplo ); } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { // requested operation: uplo( C_r ) += A_r * A_r^T // effective operation: ~uplo( C_c ) += A_c^T * A_c bli_swap_ints( ldc, incc ); bli_swap_ints( lda, inca ); bli_toggle_uplo( uplo ); bli_toggle_trans( trans ); } } bli_dsyrk_blas( uplo, trans, m, k, alpha, a, lda, beta, c, ldc ); // Free any temporary contiguous matrices, copying the result back to // the original matrix. bli_dfree_contigm( a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_dfree_saved_contigmr( uplo_save, m_save, m_save, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); }
void bli_dsyrk_blas | ( | char | uplo, |
char | trans, | ||
int | m, | ||
int | k, | ||
double * | alpha, | ||
double * | a, | ||
int | lda, | ||
double * | beta, | ||
double * | c, | ||
int | ldc | ||
) |
References bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), cblas_dsyrk(), CblasColMajor, and F77_dsyrk().
Referenced by bli_dsyrk().
{ #ifdef BLIS_ENABLE_CBLAS_INTERFACES enum CBLAS_ORDER cblas_order = CblasColMajor; enum CBLAS_UPLO cblas_uplo; enum CBLAS_TRANSPOSE cblas_trans; bli_param_map_to_netlib_uplo( uplo, &cblas_uplo ); bli_param_map_to_netlib_trans( trans, &cblas_trans ); cblas_dsyrk( cblas_order, cblas_uplo, cblas_trans, m, k, *alpha, a, lda, *beta, c, ldc ); #else char blas_uplo; char blas_trans; bli_param_map_to_netlib_uplo( uplo, &blas_uplo ); bli_param_map_to_netlib_trans( trans, &blas_trans ); F77_dsyrk( &blas_uplo, &blas_trans, &m, &k, alpha, a, &lda, beta, c, &ldc ); #endif }
void bli_dtrmm | ( | char | side, |
char | uplo, | ||
char | trans, | ||
char | diag, | ||
int | m, | ||
int | n, | ||
double * | alpha, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bli_dcreate_contigm(), bli_dcreate_contigmr(), bli_dfree_contigm(), bli_dfree_saved_contigm(), bli_dtrmm_blas(), bli_is_col_storage(), bli_set_dim_with_side(), and bli_zero_dim2().
Referenced by bli_dtrmmsx(), and FLA_Trmm_external().
{ int m_save = m; int n_save = n; double* a_save = a; double* b_save = b; int a_rs_save = a_rs; int a_cs_save = a_cs; int b_rs_save = b_rs; int b_cs_save = b_cs; int dim_a; int lda, inca; int ldb, incb; // Return early if possible. if ( bli_zero_dim2( m, n ) ) return; // If necessary, allocate, initialize, and use a temporary contiguous // copy of each matrix rather than the original matrices. bli_set_dim_with_side( side, m, n, &dim_a ); bli_dcreate_contigmr( uplo, dim_a, dim_a, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_dcreate_contigm( m, n, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); // Initialize with values assuming column-major storage. lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Adjust the parameters based on the storage of each matrix. if ( bli_is_col_storage( b_rs, b_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { // requested operation: B_c := tr( uplo( A_c ) ) * B_c // effective operation: B_c := tr( uplo( A_c ) ) * B_c } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { // requested operation: B_c := tr( uplo( A_r ) ) * B_c // effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c bli_swap_ints( lda, inca ); bli_toggle_uplo( uplo ); bli_toggle_trans( trans ); } } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { // requested operation: B_r := tr( uplo( A_c ) ) * B_r // effective operation: B_c := B_c * tr( uplo( A_c ) )^T bli_swap_ints( ldb, incb ); bli_swap_ints( m, n ); bli_toggle_side( side ); bli_toggle_trans( trans ); } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { // requested operation: B_r := tr( uplo( A_r ) ) * B_r // effective operation: B_c := B_c * tr( ~uplo( A_c ) ) bli_swap_ints( ldb, incb ); bli_swap_ints( lda, inca ); bli_swap_ints( m, n ); bli_toggle_uplo( uplo ); bli_toggle_side( side ); } } bli_dtrmm_blas( side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb ); // Free any temporary contiguous matrices, copying the result back to // the original matrix. bli_dfree_contigm( a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_dfree_saved_contigm( m_save, n_save, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); }
void bli_dtrmm_blas | ( | char | side, |
char | uplo, | ||
char | trans, | ||
char | diag, | ||
int | m, | ||
int | n, | ||
double * | alpha, | ||
double * | a, | ||
int | lda, | ||
double * | b, | ||
int | ldb | ||
) |
References bli_param_map_to_netlib_diag(), bli_param_map_to_netlib_side(), bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), cblas_dtrmm(), CblasColMajor, and F77_dtrmm().
Referenced by bli_dtrmm().
{ #ifdef BLIS_ENABLE_CBLAS_INTERFACES enum CBLAS_ORDER cblas_order = CblasColMajor; enum CBLAS_SIDE cblas_side; enum CBLAS_UPLO cblas_uplo; enum CBLAS_TRANSPOSE cblas_trans; enum CBLAS_DIAG cblas_diag; bli_param_map_to_netlib_side( side, &cblas_side ); bli_param_map_to_netlib_uplo( uplo, &cblas_uplo ); bli_param_map_to_netlib_trans( trans, &cblas_trans ); bli_param_map_to_netlib_diag( diag, &cblas_diag ); cblas_dtrmm( cblas_order, cblas_side, cblas_uplo, cblas_trans, cblas_diag, m, n, *alpha, a, lda, b, ldb ); #else char blas_side; char blas_uplo; char blas_trans; char blas_diag; bli_param_map_to_netlib_side( side, &blas_side ); bli_param_map_to_netlib_uplo( uplo, &blas_uplo ); bli_param_map_to_netlib_trans( trans, &blas_trans ); bli_param_map_to_netlib_diag( diag, &blas_diag ); F77_dtrmm( &blas_side, &blas_uplo, &blas_trans, &blas_diag, &m, &n, alpha, a, &lda, b, &ldb ); #endif }
void bli_dtrmmsx | ( | char | side, |
char | uplo, | ||
char | trans, | ||
char | diag, | ||
int | m, | ||
int | n, | ||
double * | alpha, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | b, | ||
int | b_rs, | ||
int | b_cs, | ||
double * | beta, | ||
double * | c, | ||
int | c_rs, | ||
int | c_cs | ||
) |
References bli_d1(), bli_dallocm(), bli_daxpymt(), bli_dcopymt(), bli_dcreate_contigm(), bli_dcreate_contigmr(), bli_dfree(), bli_dfree_contigm(), bli_dfree_saved_contigm(), bli_dscalm(), bli_dtrmm(), bli_is_col_storage(), bli_set_dim_with_side(), and bli_zero_dim2().
Referenced by FLA_Trmmsx_external().
{ int m_save = m; int n_save = n; double* a_save = a; double* b_save = b; double* c_save = c; int a_rs_save = a_rs; int a_cs_save = a_cs; int b_rs_save = b_rs; int b_cs_save = b_cs; int c_rs_save = c_rs; int c_cs_save = c_cs; double one = bli_d1(); double* b_copy; int dim_a; int b_copy_rs, b_copy_cs; // Return early if possible. if ( bli_zero_dim2( m, n ) ) return; // If necessary, allocate, initialize, and use a temporary contiguous // copy of each matrix rather than the original matrices. bli_set_dim_with_side( side, m, n, &dim_a ); bli_dcreate_contigmr( uplo, dim_a, dim_a, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_dcreate_contigm( m, n, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_dcreate_contigm( m, n, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); // Create a copy of B to use in the computation so the original matrix is // left untouched. b_copy = bli_dallocm( m, n ); // Match the strides of B_copy to that of B. if ( bli_is_col_storage( b_rs, b_cs ) ) { b_copy_rs = 1; b_copy_cs = m; } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { b_copy_rs = n; b_copy_cs = 1; } // Copy the contents of B to B_copy. bli_dcopymt( BLIS_NO_CONJUGATE, m, n, b, b_rs, b_cs, b_copy, b_copy_rs, b_copy_cs ); // Perform the operation on B_copy. bli_dtrmm( side, uplo, trans, diag, m, n, alpha, a, a_rs, a_cs, b_copy, b_copy_rs, b_copy_cs ); // Scale C by beta. bli_dscalm( BLIS_NO_CONJUGATE, m, n, beta, c, c_rs, c_cs ); // Add B_copy into C. bli_daxpymt( BLIS_NO_TRANSPOSE, m, n, &one, b_copy, b_copy_rs, b_copy_cs, c, c_rs, c_cs ); // Free the copy of B. bli_dfree( b_copy ); // Free any temporary contiguous matrices, copying the result back to // the original matrix. bli_dfree_contigm( a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_dfree_contigm( b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_dfree_saved_contigm( m_save, n_save, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); }
void bli_dtrsm | ( | char | side, |
char | uplo, | ||
char | trans, | ||
char | diag, | ||
int | m, | ||
int | n, | ||
double * | alpha, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bli_dcreate_contigm(), bli_dcreate_contigmr(), bli_dfree_contigm(), bli_dfree_saved_contigm(), bli_dtrsm_blas(), bli_is_col_storage(), bli_set_dim_with_side(), and bli_zero_dim2().
Referenced by bli_dtrsmsx(), FLA_LU_nopiv_opd_var1(), FLA_LU_nopiv_opd_var2(), FLA_LU_nopiv_opd_var3(), FLA_LU_piv_opd_var3(), and FLA_Trsm_external().
{ int m_save = m; int n_save = n; double* a_save = a; double* b_save = b; int a_rs_save = a_rs; int a_cs_save = a_cs; int b_rs_save = b_rs; int b_cs_save = b_cs; int dim_a; int lda, inca; int ldb, incb; // Return early if possible. if ( bli_zero_dim2( m, n ) ) return; // If necessary, allocate, initialize, and use a temporary contiguous // copy of each matrix rather than the original matrices. bli_set_dim_with_side( side, m, n, &dim_a ); bli_dcreate_contigmr( uplo, dim_a, dim_a, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_dcreate_contigm( m, n, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); // Initialize with values assuming column-major storage. lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Adjust the parameters based on the storage of each matrix. if ( bli_is_col_storage( b_rs, b_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { // requested operation: B_c := tr( uplo( A_c ) ) * B_c // effective operation: B_c := tr( uplo( A_c ) ) * B_c } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { // requested operation: B_c := tr( uplo( A_r ) ) * B_c // effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c bli_swap_ints( lda, inca ); bli_toggle_uplo( uplo ); bli_toggle_trans( trans ); } } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { // requested operation: B_r := tr( uplo( A_c ) ) * B_r // effective operation: B_c := B_c * tr( uplo( A_c ) )^T bli_swap_ints( ldb, incb ); bli_swap_ints( m, n ); bli_toggle_side( side ); bli_toggle_trans( trans ); } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { // requested operation: B_r := tr( uplo( A_r ) ) * B_r // effective operation: B_c := B_c * tr( ~uplo( A_c ) ) bli_swap_ints( ldb, incb ); bli_swap_ints( lda, inca ); bli_swap_ints( m, n ); bli_toggle_uplo( uplo ); bli_toggle_side( side ); } } bli_dtrsm_blas( side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb ); // Free any temporary contiguous matrices, copying the result back to // the original matrix. bli_dfree_contigm( a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_dfree_saved_contigm( m_save, n_save, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); }
void bli_dtrsm_blas | ( | char | side, |
char | uplo, | ||
char | trans, | ||
char | diag, | ||
int | m, | ||
int | n, | ||
double * | alpha, | ||
double * | a, | ||
int | lda, | ||
double * | b, | ||
int | ldb | ||
) |
References bli_param_map_to_netlib_diag(), bli_param_map_to_netlib_side(), bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), cblas_dtrsm(), CblasColMajor, and F77_dtrsm().
Referenced by bli_dtrsm().
{ #ifdef BLIS_ENABLE_CBLAS_INTERFACES enum CBLAS_ORDER cblas_order = CblasColMajor; enum CBLAS_SIDE cblas_side; enum CBLAS_UPLO cblas_uplo; enum CBLAS_TRANSPOSE cblas_trans; enum CBLAS_DIAG cblas_diag; bli_param_map_to_netlib_side( side, &cblas_side ); bli_param_map_to_netlib_uplo( uplo, &cblas_uplo ); bli_param_map_to_netlib_trans( trans, &cblas_trans ); bli_param_map_to_netlib_diag( diag, &cblas_diag ); cblas_dtrsm( cblas_order, cblas_side, cblas_uplo, cblas_trans, cblas_diag, m, n, *alpha, a, lda, b, ldb ); #else char blas_side; char blas_uplo; char blas_trans; char blas_diag; bli_param_map_to_netlib_side( side, &blas_side ); bli_param_map_to_netlib_uplo( uplo, &blas_uplo ); bli_param_map_to_netlib_trans( trans, &blas_trans ); bli_param_map_to_netlib_diag( diag, &blas_diag ); F77_dtrsm( &blas_side, &blas_uplo, &blas_trans, &blas_diag, &m, &n, alpha, a, &lda, b, &ldb ); #endif }
void bli_dtrsmsx | ( | char | side, |
char | uplo, | ||
char | trans, | ||
char | diag, | ||
int | m, | ||
int | n, | ||
double * | alpha, | ||
double * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | b, | ||
int | b_rs, | ||
int | b_cs, | ||
double * | beta, | ||
double * | c, | ||
int | c_rs, | ||
int | c_cs | ||
) |
References bli_d1(), bli_dallocm(), bli_daxpymt(), bli_dcopymt(), bli_dcreate_contigm(), bli_dcreate_contigmr(), bli_dfree(), bli_dfree_contigm(), bli_dfree_saved_contigm(), bli_dscalm(), bli_dtrsm(), bli_is_col_storage(), bli_set_dim_with_side(), and bli_zero_dim2().
Referenced by FLA_Trsmsx_external().
{ int m_save = m; int n_save = n; double* a_save = a; double* b_save = b; double* c_save = c; int a_rs_save = a_rs; int a_cs_save = a_cs; int b_rs_save = b_rs; int b_cs_save = b_cs; int c_rs_save = c_rs; int c_cs_save = c_cs; double one = bli_d1(); double* b_copy; int dim_a; int b_copy_rs, b_copy_cs; // Return early if possible. if ( bli_zero_dim2( m, n ) ) return; // If necessary, allocate, initialize, and use a temporary contiguous // copy of each matrix rather than the original matrices. bli_set_dim_with_side( side, m, n, &dim_a ); bli_dcreate_contigmr( uplo, dim_a, dim_a, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_dcreate_contigm( m, n, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_dcreate_contigm( m, n, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); // Create a copy of B to use in the computation so the original matrix is // left untouched. b_copy = bli_dallocm( m, n ); // Match the strides of B_copy to that of B. if ( bli_is_col_storage( b_rs, b_cs ) ) { b_copy_rs = 1; b_copy_cs = m; } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { b_copy_rs = n; b_copy_cs = 1; } // Copy the contents of B to B_copy. bli_dcopymt( BLIS_NO_CONJUGATE, m, n, b, b_rs, b_cs, b_copy, b_copy_rs, b_copy_cs ); // Perform the operation on B_copy. bli_dtrsm( side, uplo, trans, diag, m, n, alpha, a, a_rs, a_cs, b_copy, b_copy_rs, b_copy_cs ); // Scale C by beta. bli_dscalm( BLIS_NO_CONJUGATE, m, n, beta, c, c_rs, c_cs ); // Add B_copy into C. bli_daxpymt( BLIS_NO_TRANSPOSE, m, n, &one, b_copy, b_copy_rs, b_copy_cs, c, c_rs, c_cs ); // Free the copy of B. bli_dfree( b_copy ); // Free any temporary contiguous matrices, copying the result back to // the original matrix. bli_dfree_contigm( a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_dfree_contigm( b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_dfree_saved_contigm( m_save, n_save, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); }
void bli_sgemm | ( | char | transa, |
char | transb, | ||
int | m, | ||
int | k, | ||
int | n, | ||
float * | alpha, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | b, | ||
int | b_rs, | ||
int | b_cs, | ||
float * | beta, | ||
float * | c, | ||
int | c_rs, | ||
int | c_cs | ||
) |
References bli_is_col_storage(), bli_s0(), bli_s1(), bli_sallocm(), bli_saxpymt(), bli_screate_contigm(), bli_screate_contigmt(), bli_sfree(), bli_sfree_contigm(), bli_sfree_saved_contigm(), bli_sgemm_blas(), bli_sscalm(), and bli_zero_dim3().
Referenced by FLA_Gemm_external().
{ int m_save = m; int n_save = n; float* a_save = a; float* b_save = b; float* c_save = c; int a_rs_save = a_rs; int a_cs_save = a_cs; int b_rs_save = b_rs; int b_cs_save = b_cs; int c_rs_save = c_rs; int c_cs_save = c_cs; float zero = bli_s0(); float one = bli_s1(); float* a_unswap; float* b_unswap; float* c_trans; int lda, inca; int ldb, incb; int ldc, incc; int ldc_trans, incc_trans; int m_gemm, n_gemm; int gemm_needs_axpyt = FALSE; // Return early if possible. if ( bli_zero_dim3( m, k, n ) ) { bli_sscalm( BLIS_NO_CONJUGATE, m, n, beta, c, c_rs, c_cs ); return; } // If necessary, allocate, initialize, and use a temporary contiguous // copy of each matrix rather than the original matrices. bli_screate_contigmt( transa, m, k, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_screate_contigmt( transb, k, n, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_screate_contigm( m, n, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); // These are used to track the original values of a and b prior to any // operand swapping that might take place. This is necessary for proper // freeing of memory when one is a temporary contiguous matrix. a_unswap = a; b_unswap = b; // These are used to track the dimensions of the product of the // A and B operands to the BLAS invocation of gemm. These differ // from m and n when the operands need to be swapped. m_gemm = m; n_gemm = n; // Initialize with values assuming column-major storage. lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; ldc = c_cs; incc = c_rs; // Adjust the parameters based on the storage of each matrix. if ( bli_is_col_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_c += tr( A_c ) * tr( B_c ) // effective operation: C_c += tr( A_c ) * tr( B_c ) } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_c += tr( A_c ) * tr( B_r ) // effective operation: C_c += tr( A_c ) * tr( B_c )^T bli_swap_ints( ldb, incb ); bli_toggle_trans( transb ); } } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_c += tr( A_r ) * tr( B_c ) // effective operation: C_c += tr( A_r )^T * tr( B_c ) bli_swap_ints( lda, inca ); bli_toggle_trans( transa ); } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_c += tr( A_r ) * tr( B_r ) // effective operation: C_c += ( tr( B_c ) * tr( A_c ) )^T bli_swap_ints( lda, inca ); bli_swap_ints( ldb, incb ); bli_sswap_pointers( a, b ); bli_swap_ints( lda, ldb ); bli_swap_ints( inca, incb ); bli_swap_chars( transa, transb ); gemm_needs_axpyt = TRUE; bli_swap_ints( m_gemm, n_gemm ); } } } else // if ( bli_is_row_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_r += tr( A_c ) * tr( B_c ) // effective operation: C_c += ( tr( A_c ) * tr( B_c ) )^T bli_swap_ints( ldc, incc ); bli_swap_ints( m, n ); gemm_needs_axpyt = TRUE; } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_r += tr( A_c ) * tr( B_r ) // effective operation: C_c += tr( B_c ) * tr( A_c )^T bli_swap_ints( ldc, incc ); bli_swap_ints( ldb, incb ); bli_toggle_trans( transa ); bli_swap_ints( m, n ); bli_swap_ints( m_gemm, n_gemm ); bli_sswap_pointers( a, b ); bli_swap_ints( lda, ldb ); bli_swap_ints( inca, incb ); bli_swap_chars( transa, transb ); } } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_r += tr( A_r ) * tr( B_c ) // effective operation: C_c += tr( B_c )^T * tr( A_c ) bli_swap_ints( ldc, incc ); bli_swap_ints( lda, inca ); bli_toggle_trans( transb ); bli_swap_ints( m, n ); bli_swap_ints( m_gemm, n_gemm ); bli_sswap_pointers( a, b ); bli_swap_ints( lda, ldb ); bli_swap_ints( inca, incb ); bli_swap_chars( transa, transb ); } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_r += tr( A_r ) * tr( B_r ) // effective operation: C_c += tr( B_c ) * tr( A_c ) bli_swap_ints( lda, inca ); bli_swap_ints( ldb, incb ); bli_swap_ints( ldc, incc ); bli_swap_ints( m, n ); bli_swap_ints( m_gemm, n_gemm ); bli_sswap_pointers( a, b ); bli_swap_ints( lda, ldb ); bli_swap_ints( inca, incb ); bli_swap_chars( transa, transb ); } } } // There are two cases where we need to perform the gemm and then axpy // the result into C with a transposition. We handle those cases here. if ( gemm_needs_axpyt ) { // We need a temporary matrix for holding C^T. Notice that m and n // represent the dimensions of C, while m_gemm and n_gemm are the // dimensions of the actual product op(A)*op(B), which may be n-by-m // since the operands may have been swapped. c_trans = bli_sallocm( m_gemm, n_gemm ); ldc_trans = m_gemm; incc_trans = 1; // Compute tr( A ) * tr( B ), where A and B may have been swapped // to reference the other, and store the result in C_trans. bli_sgemm_blas( transa, transb, m_gemm, n_gemm, k, alpha, a, lda, b, ldb, &zero, c_trans, ldc_trans ); // Scale C by beta. bli_sscalm( BLIS_NO_CONJUGATE, m, n, beta, c, incc, ldc ); // And finally, accumulate the matrix product in C_trans into C // with a transpose. bli_saxpymt( BLIS_TRANSPOSE, m, n, &one, c_trans, incc_trans, ldc_trans, c, incc, ldc ); // Free the temporary matrix for C. bli_sfree( c_trans ); } else // no extra axpyt step needed { bli_sgemm_blas( transa, transb, m_gemm, n_gemm, k, alpha, a, lda, b, ldb, beta, c, ldc ); } // Free any temporary contiguous matrices, copying the result back to // the original matrix. bli_sfree_contigm( a_save, a_rs_save, a_cs_save, &a_unswap, &a_rs, &a_cs ); bli_sfree_contigm( b_save, b_rs_save, b_cs_save, &b_unswap, &b_rs, &b_cs ); bli_sfree_saved_contigm( m_save, n_save, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); }
void bli_sgemm_blas | ( | char | transa, |
char | transb, | ||
int | m, | ||
int | n, | ||
int | k, | ||
float * | alpha, | ||
float * | a, | ||
int | lda, | ||
float * | b, | ||
int | ldb, | ||
float * | beta, | ||
float * | c, | ||
int | ldc | ||
) |
References bli_param_map_to_netlib_trans(), cblas_sgemm(), CblasColMajor, and F77_sgemm().
Referenced by bli_sgemm().
{ #ifdef BLIS_ENABLE_CBLAS_INTERFACES enum CBLAS_ORDER cblas_order = CblasColMajor; enum CBLAS_TRANSPOSE cblas_transa; enum CBLAS_TRANSPOSE cblas_transb; bli_param_map_to_netlib_trans( transa, &cblas_transa ); bli_param_map_to_netlib_trans( transb, &cblas_transb ); cblas_sgemm( cblas_order, cblas_transa, cblas_transb, m, n, k, *alpha, a, lda, b, ldb, *beta, c, ldc ); #else char blas_transa; char blas_transb; bli_param_map_to_netlib_trans( transa, &blas_transa ); bli_param_map_to_netlib_trans( transb, &blas_transb ); F77_sgemm( &blas_transa, &blas_transb, &m, &n, &k, alpha, a, &lda, b, &ldb, beta, c, &ldc ); #endif }
void bli_shemm | ( | char | side, |
char | uplo, | ||
int | m, | ||
int | n, | ||
float * | alpha, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | b, | ||
int | b_rs, | ||
int | b_cs, | ||
float * | beta, | ||
float * | c, | ||
int | c_rs, | ||
int | c_cs | ||
) |
References bli_ssymm().
{ bli_ssymm( side, uplo, m, n, alpha, a, a_rs, a_cs, b, b_rs, b_cs, beta, c, c_rs, c_cs ); }
void bli_sher2k | ( | char | uplo, |
char | trans, | ||
int | m, | ||
int | k, | ||
float * | alpha, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | b, | ||
int | b_rs, | ||
int | b_cs, | ||
float * | beta, | ||
float * | c, | ||
int | c_rs, | ||
int | c_cs | ||
) |
References bli_ssyr2k().
{ bli_ssyr2k( uplo, trans, m, k, alpha, a, a_rs, a_cs, b, b_rs, b_cs, beta, c, c_rs, c_cs ); }
void bli_sherk | ( | char | uplo, |
char | trans, | ||
int | m, | ||
int | k, | ||
float * | alpha, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | beta, | ||
float * | c, | ||
int | c_rs, | ||
int | c_cs | ||
) |
References bli_ssyrk().
{ bli_ssyrk( uplo, trans, m, k, alpha, a, a_rs, a_cs, beta, c, c_rs, c_cs ); }
void bli_ssymm | ( | char | side, |
char | uplo, | ||
int | m, | ||
int | n, | ||
float * | alpha, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | b, | ||
int | b_rs, | ||
int | b_cs, | ||
float * | beta, | ||
float * | c, | ||
int | c_rs, | ||
int | c_cs | ||
) |
References bli_is_col_storage(), bli_s0(), bli_s1(), bli_sallocm(), bli_saxpymt(), bli_scopymt(), bli_screate_contigm(), bli_screate_contigmr(), bli_set_dim_with_side(), bli_sfree(), bli_sfree_contigm(), bli_sfree_saved_contigm(), bli_sscalm(), bli_ssymm_blas(), and bli_zero_dim2().
Referenced by bli_shemm(), FLA_Hemm_external(), and FLA_Symm_external().
{ int m_save = m; int n_save = n; float* a_save = a; float* b_save = b; float* c_save = c; int a_rs_save = a_rs; int a_cs_save = a_cs; int b_rs_save = b_rs; int b_cs_save = b_cs; int c_rs_save = c_rs; int c_cs_save = c_cs; float zero = bli_s0(); float one = bli_s1(); float* b_copy; float* c_trans; int dim_a; int lda, inca; int ldb, incb; int ldc, incc; int ldb_copy, incb_copy; int ldc_trans, incc_trans; int symm_needs_copyb = FALSE; int symm_needs_transb = FALSE; int symm_needs_axpyt = FALSE; // Return early if possible. if ( bli_zero_dim2( m, n ) ) return; // If necessary, allocate, initialize, and use a temporary contiguous // copy of each matrix rather than the original matrices. bli_set_dim_with_side( side, m, n, &dim_a ); bli_screate_contigmr( uplo, dim_a, dim_a, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_screate_contigm( m, n, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_screate_contigm( m, n, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); // Initialize with values assuming column-major storage. lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; ldc = c_cs; incc = c_rs; // Adjust the parameters based on the storage of each matrix. if ( bli_is_col_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_c += uplo( A_c ) * B_c // effective operation: C_c += uplo( A_c ) * B_c } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_c += uplo( A_c ) * B_r // effective operation: C_c += uplo( A_c ) * B_c symm_needs_copyb = TRUE; } } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_c += uplo( A_r ) * B_c // effective operation: C_c += ~uplo( conj( A_c ) ) * B_c bli_swap_ints( lda, inca ); bli_toggle_uplo( uplo ); } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_c += uplo( A_r ) * B_r // effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T bli_swap_ints( lda, inca ); bli_swap_ints( ldb, incb ); bli_toggle_side( side ); bli_toggle_uplo( uplo ); symm_needs_axpyt = TRUE; } } } else // if ( bli_is_row_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_r += uplo( A_c ) * B_c // effective operation: C_c += ( uplo( A_c ) * B_c )^T bli_swap_ints( ldc, incc ); bli_swap_ints( m, n ); symm_needs_axpyt = TRUE; } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_r += uplo( A_c ) * B_r // effective operation: C_c += B_c * ~uplo( conj( A_c ) ) bli_swap_ints( ldc, incc ); bli_swap_ints( ldb, incb ); bli_swap_ints( m, n ); bli_toggle_side( side ); } } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_r += uplo( A_r ) * B_c // effective operation: C_c += B_c^T * ~uplo( A_c ) bli_swap_ints( ldc, incc ); bli_swap_ints( lda, inca ); bli_swap_ints( m, n ); bli_toggle_side( side ); bli_toggle_uplo( uplo ); symm_needs_copyb = TRUE; symm_needs_transb = TRUE; } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_r += uplo( A_r ) * B_r // effective operation: C_c += B_c * conj( ~uplo( A_c ) ) bli_swap_ints( ldc, incc ); bli_swap_ints( lda, inca ); bli_swap_ints( ldb, incb ); bli_swap_ints( m, n ); bli_toggle_uplo( uplo ); bli_toggle_side( side ); } } } // We need a temporary matrix for the cases where B needs to be copied. b_copy = b; ldb_copy = ldb; incb_copy = incb; // There are two cases where we need to make a copy of B: one where the // copy's dimensions are transposed from the original B, and one where // the dimensions are not swapped. if ( symm_needs_copyb ) { char transb; // Set transb, which determines whether or not we need to copy from B // as if it needs a transposition. If a transposition is needed, then // m and n and have already been swapped. So in either case m // represents the leading dimension of the copy. if ( symm_needs_transb ) transb = BLIS_TRANSPOSE; else transb = BLIS_NO_TRANSPOSE; b_copy = bli_sallocm( m, n ); ldb_copy = m; incb_copy = 1; bli_scopymt( transb, m, n, b, incb, ldb, b_copy, incb_copy, ldb_copy ); } // There are two cases where we need to perform the symm and then axpy // the result into C with a transposition. We handle those cases here. if ( symm_needs_axpyt ) { // We need a temporary matrix for holding C^T. Notice that m and n // represent the dimensions of C, and thus C_trans is n-by-m // (interpreting both as column-major matrices). So the leading // dimension of the temporary matrix holding C^T is n. c_trans = bli_sallocm( n, m ); ldc_trans = n; incc_trans = 1; // Compute A * B (or B * A) and store the result in C_trans. // Note that there is no overlap between the axpyt cases and // the conja/copyb cases, hence the use of a, b, lda, and ldb. bli_ssymm_blas( side, uplo, n, m, alpha, a, lda, b, ldb, &zero, c_trans, ldc_trans ); // Scale C by beta. bli_sscalm( BLIS_NO_CONJUGATE, m, n, beta, c, incc, ldc ); // And finally, accumulate the matrix product in C_trans into C // with a transpose. bli_saxpymt( BLIS_TRANSPOSE, m, n, &one, c_trans, incc_trans, ldc_trans, c, incc, ldc ); // Free the temporary matrix for C. bli_sfree( c_trans ); } else // no extra axpyt step needed { bli_ssymm_blas( side, uplo, m, n, alpha, a, lda, b_copy, ldb_copy, beta, c, ldc ); } if ( symm_needs_copyb ) bli_sfree( b_copy ); // Free any temporary contiguous matrices, copying the result back to // the original matrix. bli_sfree_contigm( a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_sfree_contigm( b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_sfree_saved_contigm( m_save, n_save, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); }
void bli_ssymm_blas | ( | char | side, |
char | uplo, | ||
int | m, | ||
int | n, | ||
float * | alpha, | ||
float * | a, | ||
int | lda, | ||
float * | b, | ||
int | ldb, | ||
float * | beta, | ||
float * | c, | ||
int | ldc | ||
) |
References bli_param_map_to_netlib_side(), bli_param_map_to_netlib_uplo(), cblas_ssymm(), CblasColMajor, and F77_ssymm().
Referenced by bli_ssymm().
{ #ifdef BLIS_ENABLE_CBLAS_INTERFACES enum CBLAS_ORDER cblas_order = CblasColMajor; enum CBLAS_SIDE cblas_side; enum CBLAS_UPLO cblas_uplo; bli_param_map_to_netlib_side( side, &cblas_side ); bli_param_map_to_netlib_uplo( uplo, &cblas_uplo ); cblas_ssymm( cblas_order, cblas_side, cblas_uplo, m, n, *alpha, a, lda, b, ldb, *beta, c, ldc ); #else char blas_side; char blas_uplo; bli_param_map_to_netlib_side( side, &blas_side ); bli_param_map_to_netlib_uplo( uplo, &blas_uplo ); F77_ssymm( &blas_side, &blas_uplo, &m, &n, alpha, a, &lda, b, &ldb, beta, c, &ldc ); #endif }
void bli_ssyr2k | ( | char | uplo, |
char | trans, | ||
int | m, | ||
int | k, | ||
float * | alpha, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | b, | ||
int | b_rs, | ||
int | b_cs, | ||
float * | beta, | ||
float * | c, | ||
int | c_rs, | ||
int | c_cs | ||
) |
References bli_does_trans(), bli_is_col_storage(), bli_sallocm(), bli_scopymt(), bli_screate_contigmr(), bli_screate_contigmt(), bli_sfree(), bli_sfree_contigm(), bli_sfree_saved_contigmr(), bli_ssyr2k_blas(), and bli_zero_dim2().
Referenced by bli_sher2k(), FLA_Her2k_external(), and FLA_Syr2k_external().
{ char uplo_save = uplo; int m_save = m; float* a_save = a; float* b_save = b; float* c_save = c; int a_rs_save = a_rs; int a_cs_save = a_cs; int b_rs_save = b_rs; int b_cs_save = b_cs; int c_rs_save = c_rs; int c_cs_save = c_cs; float* a_copy; float* b_copy; int lda, inca; int ldb, incb; int ldc, incc; int lda_copy, inca_copy; int ldb_copy, incb_copy; int syr2k_needs_copya = FALSE; int syr2k_needs_copyb = FALSE; // Return early if possible. if ( bli_zero_dim2( m, k ) ) return; // If necessary, allocate, initialize, and use a temporary contiguous // copy of each matrix rather than the original matrices. bli_screate_contigmt( trans, m, k, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_screate_contigmt( trans, m, k, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_screate_contigmr( uplo, m, m, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); // Initialize with values assuming column-major storage. lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; ldc = c_cs; incc = c_rs; // Adjust the parameters based on the storage of each matrix. if ( bli_is_col_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c' // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c' } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_c ) += A_c * B_r' + B_r * A_c' // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c' syr2k_needs_copyb = TRUE; } } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_c ) += A_r * B_c' + B_c * A_r' // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c' syr2k_needs_copya = TRUE; } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_c ) += A_r * B_r' + B_r * A_r' // requested operation: uplo( C_c ) += conj( A_c' * B_c + B_c' * A_c ) bli_swap_ints( lda, inca ); bli_swap_ints( ldb, incb ); bli_toggle_trans( trans ); } } } else // if ( bli_is_row_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_r ) += A_c * B_c' + B_c * A_c' // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' ) bli_swap_ints( ldc, incc ); bli_toggle_uplo( uplo ); } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_r ) += A_c * B_r' + B_r * A_c' // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' ) syr2k_needs_copyb = TRUE; bli_swap_ints( ldc, incc ); bli_toggle_uplo( uplo ); } } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_r ) += A_r * B_c' + B_c * A_r' // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' ) syr2k_needs_copya = TRUE; bli_swap_ints( ldc, incc ); bli_toggle_uplo( uplo ); } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_r ) += A_r * B_r' + B_r * A_r' // requested operation: ~uplo( C_c ) += A_c' * B_c + B_c' * A_c bli_swap_ints( ldc, incc ); bli_swap_ints( lda, inca ); bli_swap_ints( ldb, incb ); bli_toggle_uplo( uplo ); bli_toggle_trans( trans ); } } } a_copy = a; lda_copy = lda; inca_copy = inca; // There are two cases where we need to copy A column-major storage. // We handle those two cases here. if ( syr2k_needs_copya ) { int m_a; int n_a; // Determine the dimensions of A according to the value of trans. We // need this in order to set the leading dimension of the copy of A. if ( bli_does_trans( trans ) ) { m_a = k; n_a = m; } else { m_a = m; n_a = k; } // We need a temporary matrix to hold a column-major copy of A. a_copy = bli_sallocm( m, k ); lda_copy = m_a; inca_copy = 1; // Copy the contents of A into A_copy. bli_scopymt( BLIS_NO_TRANSPOSE, m_a, n_a, a, inca, lda, a_copy, inca_copy, lda_copy ); } b_copy = b; ldb_copy = ldb; incb_copy = incb; // There are two cases where we need to copy B column-major storage. // We handle those two cases here. if ( syr2k_needs_copyb ) { int m_b; int n_b; // Determine the dimensions of B according to the value of trans. We // need this in order to set the leading dimension of the copy of B. if ( bli_does_trans( trans ) ) { m_b = k; n_b = m; } else { m_b = m; n_b = k; } // We need a temporary matrix to hold a column-major copy of B. b_copy = bli_sallocm( m, k ); ldb_copy = m_b; incb_copy = 1; // Copy the contents of B into B_copy. bli_scopymt( BLIS_NO_TRANSPOSE, m_b, n_b, b, incb, ldb, b_copy, incb_copy, ldb_copy ); } bli_ssyr2k_blas( uplo, trans, m, k, alpha, a_copy, lda_copy, b_copy, ldb_copy, beta, c, ldc ); if ( syr2k_needs_copya ) bli_sfree( a_copy ); if ( syr2k_needs_copyb ) bli_sfree( b_copy ); // Free any temporary contiguous matrices, copying the result back to // the original matrix. bli_sfree_contigm( a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_sfree_contigm( b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_sfree_saved_contigmr( uplo_save, m_save, m_save, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); }
void bli_ssyr2k_blas | ( | char | uplo, |
char | trans, | ||
int | m, | ||
int | k, | ||
float * | alpha, | ||
float * | a, | ||
int | lda, | ||
float * | b, | ||
int | ldb, | ||
float * | beta, | ||
float * | c, | ||
int | ldc | ||
) |
References bli_is_conjtrans(), bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), cblas_ssyr2k(), CblasColMajor, and F77_ssyr2k().
Referenced by bli_ssyr2k().
{ // BLAS doesn't recognize the conjugate-transposition constant for syr2k, // so we have to map it down to regular transposition. if ( bli_is_conjtrans( trans ) ) trans = BLIS_TRANSPOSE; #ifdef BLIS_ENABLE_CBLAS_INTERFACES enum CBLAS_ORDER cblas_order = CblasColMajor; enum CBLAS_UPLO cblas_uplo; enum CBLAS_TRANSPOSE cblas_trans; bli_param_map_to_netlib_uplo( uplo, &cblas_uplo ); bli_param_map_to_netlib_trans( trans, &cblas_trans ); cblas_ssyr2k( cblas_order, cblas_uplo, cblas_trans, m, k, *alpha, a, lda, b, ldb, *beta, c, ldc ); #else char blas_uplo; char blas_trans; bli_param_map_to_netlib_uplo( uplo, &blas_uplo ); bli_param_map_to_netlib_trans( trans, &blas_trans ); F77_ssyr2k( &blas_uplo, &blas_trans, &m, &k, alpha, a, &lda, b, &ldb, beta, c, &ldc ); #endif }
void bli_ssyrk | ( | char | uplo, |
char | trans, | ||
int | m, | ||
int | k, | ||
float * | alpha, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | beta, | ||
float * | c, | ||
int | c_rs, | ||
int | c_cs | ||
) |
References bli_is_col_storage(), bli_screate_contigmr(), bli_screate_contigmt(), bli_sfree_contigm(), bli_sfree_saved_contigmr(), bli_ssyrk_blas(), and bli_zero_dim2().
Referenced by bli_sherk(), FLA_Herk_external(), FLA_Syrk_external(), and FLA_UDdate_UT_ops_var1().
{ char uplo_save = uplo; int m_save = m; float* a_save = a; float* c_save = c; int a_rs_save = a_rs; int a_cs_save = a_cs; int c_rs_save = c_rs; int c_cs_save = c_cs; int lda, inca; int ldc, incc; // Return early if possible. if ( bli_zero_dim2( m, k ) ) return; // If necessary, allocate, initialize, and use a temporary contiguous // copy of each matrix rather than the original matrices. bli_screate_contigmt( trans, m, k, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_screate_contigmr( uplo, m, m, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); // Initialize with values assuming column-major storage. lda = a_cs; inca = a_rs; ldc = c_cs; incc = c_rs; // Adjust the parameters based on the storage of each matrix. if ( bli_is_col_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { // requested operation: uplo( C_c ) += A_c * A_c^T // effective operation: uplo( C_c ) += A_c * A_c^T } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { // requested operation: uplo( C_c ) += A_r * A_r^T // effective operation: uplo( C_c ) += A_c^T * A_c bli_swap_ints( lda, inca ); bli_toggle_trans( trans ); } } else // if ( bli_is_row_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { // requested operation: uplo( C_r ) += A_c * A_c^T // effective operation: ~uplo( C_c ) += A_c * A_c^T bli_swap_ints( ldc, incc ); bli_toggle_uplo( uplo ); } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { // requested operation: uplo( C_r ) += A_r * A_r^T // effective operation: ~uplo( C_c ) += A_c^T * A_c bli_swap_ints( ldc, incc ); bli_swap_ints( lda, inca ); bli_toggle_uplo( uplo ); bli_toggle_trans( trans ); } } bli_ssyrk_blas( uplo, trans, m, k, alpha, a, lda, beta, c, ldc ); // Free any temporary contiguous matrices, copying the result back to // the original matrix. bli_sfree_contigm( a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_sfree_saved_contigmr( uplo_save, m_save, m_save, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); }
void bli_ssyrk_blas | ( | char | uplo, |
char | trans, | ||
int | m, | ||
int | k, | ||
float * | alpha, | ||
float * | a, | ||
int | lda, | ||
float * | beta, | ||
float * | c, | ||
int | ldc | ||
) |
References bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), cblas_ssyrk(), CblasColMajor, and F77_ssyrk().
Referenced by bli_ssyrk().
{ #ifdef BLIS_ENABLE_CBLAS_INTERFACES enum CBLAS_ORDER cblas_order = CblasColMajor; enum CBLAS_UPLO cblas_uplo; enum CBLAS_TRANSPOSE cblas_trans; bli_param_map_to_netlib_uplo( uplo, &cblas_uplo ); bli_param_map_to_netlib_trans( trans, &cblas_trans ); cblas_ssyrk( cblas_order, cblas_uplo, cblas_trans, m, k, *alpha, a, lda, *beta, c, ldc ); #else char blas_uplo; char blas_trans; bli_param_map_to_netlib_uplo( uplo, &blas_uplo ); bli_param_map_to_netlib_trans( trans, &blas_trans ); F77_ssyrk( &blas_uplo, &blas_trans, &m, &k, alpha, a, &lda, beta, c, &ldc ); #endif }
void bli_strmm | ( | char | side, |
char | uplo, | ||
char | trans, | ||
char | diag, | ||
int | m, | ||
int | n, | ||
float * | alpha, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bli_is_col_storage(), bli_screate_contigm(), bli_screate_contigmr(), bli_set_dim_with_side(), bli_sfree_contigm(), bli_sfree_saved_contigm(), bli_strmm_blas(), and bli_zero_dim2().
Referenced by bli_strmmsx(), and FLA_Trmm_external().
{ int m_save = m; int n_save = n; float* a_save = a; float* b_save = b; int a_rs_save = a_rs; int a_cs_save = a_cs; int b_rs_save = b_rs; int b_cs_save = b_cs; int dim_a; int lda, inca; int ldb, incb; // Return early if possible. if ( bli_zero_dim2( m, n ) ) return; // If necessary, allocate, initialize, and use a temporary contiguous // copy of each matrix rather than the original matrices. bli_set_dim_with_side( side, m, n, &dim_a ); bli_screate_contigmr( uplo, dim_a, dim_a, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_screate_contigm( m, n, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); // Initialize with values assuming column-major storage. lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Adjust the parameters based on the storage of each matrix. if ( bli_is_col_storage( b_rs, b_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { // requested operation: B_c := tr( uplo( A_c ) ) * B_c // effective operation: B_c := tr( uplo( A_c ) ) * B_c } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { // requested operation: B_c := tr( uplo( A_r ) ) * B_c // effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c bli_swap_ints( lda, inca ); bli_toggle_uplo( uplo ); bli_toggle_trans( trans ); } } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { // requested operation: B_r := tr( uplo( A_c ) ) * B_r // effective operation: B_c := B_c * tr( uplo( A_c ) )^T bli_swap_ints( ldb, incb ); bli_swap_ints( m, n ); bli_toggle_side( side ); bli_toggle_trans( trans ); } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { // requested operation: B_r := tr( uplo( A_r ) ) * B_r // effective operation: B_c := B_c * tr( ~uplo( A_c ) ) bli_swap_ints( ldb, incb ); bli_swap_ints( lda, inca ); bli_swap_ints( m, n ); bli_toggle_uplo( uplo ); bli_toggle_side( side ); } } bli_strmm_blas( side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb ); // Free any temporary contiguous matrices, copying the result back to // the original matrix. bli_sfree_contigm( a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_sfree_saved_contigm( m_save, n_save, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); }
void bli_strmm_blas | ( | char | side, |
char | uplo, | ||
char | trans, | ||
char | diag, | ||
int | m, | ||
int | n, | ||
float * | alpha, | ||
float * | a, | ||
int | lda, | ||
float * | b, | ||
int | ldb | ||
) |
References bli_param_map_to_netlib_diag(), bli_param_map_to_netlib_side(), bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), cblas_strmm(), CblasColMajor, and F77_strmm().
Referenced by bli_strmm().
{ #ifdef BLIS_ENABLE_CBLAS_INTERFACES enum CBLAS_ORDER cblas_order = CblasColMajor; enum CBLAS_SIDE cblas_side; enum CBLAS_UPLO cblas_uplo; enum CBLAS_TRANSPOSE cblas_trans; enum CBLAS_DIAG cblas_diag; bli_param_map_to_netlib_side( side, &cblas_side ); bli_param_map_to_netlib_uplo( uplo, &cblas_uplo ); bli_param_map_to_netlib_trans( trans, &cblas_trans ); bli_param_map_to_netlib_diag( diag, &cblas_diag ); cblas_strmm( cblas_order, cblas_side, cblas_uplo, cblas_trans, cblas_diag, m, n, *alpha, a, lda, b, ldb ); #else char blas_side; char blas_uplo; char blas_trans; char blas_diag; bli_param_map_to_netlib_side( side, &blas_side ); bli_param_map_to_netlib_uplo( uplo, &blas_uplo ); bli_param_map_to_netlib_trans( trans, &blas_trans ); bli_param_map_to_netlib_diag( diag, &blas_diag ); F77_strmm( &blas_side, &blas_uplo, &blas_trans, &blas_diag, &m, &n, alpha, a, &lda, b, &ldb ); #endif }
void bli_strmmsx | ( | char | side, |
char | uplo, | ||
char | trans, | ||
char | diag, | ||
int | m, | ||
int | n, | ||
float * | alpha, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | b, | ||
int | b_rs, | ||
int | b_cs, | ||
float * | beta, | ||
float * | c, | ||
int | c_rs, | ||
int | c_cs | ||
) |
References bli_is_col_storage(), bli_s1(), bli_sallocm(), bli_saxpymt(), bli_scopymt(), bli_screate_contigm(), bli_screate_contigmr(), bli_set_dim_with_side(), bli_sfree(), bli_sfree_contigm(), bli_sfree_saved_contigm(), bli_sscalm(), bli_strmm(), and bli_zero_dim2().
Referenced by FLA_Trmmsx_external().
{ int m_save = m; int n_save = n; float* a_save = a; float* b_save = b; float* c_save = c; int a_rs_save = a_rs; int a_cs_save = a_cs; int b_rs_save = b_rs; int b_cs_save = b_cs; int c_rs_save = c_rs; int c_cs_save = c_cs; float one = bli_s1(); float* b_copy; int dim_a; int b_copy_rs, b_copy_cs; // Return early if possible. if ( bli_zero_dim2( m, n ) ) return; // If necessary, allocate, initialize, and use a temporary contiguous // copy of each matrix rather than the original matrices. bli_set_dim_with_side( side, m, n, &dim_a ); bli_screate_contigmr( uplo, dim_a, dim_a, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_screate_contigm( m, n, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_screate_contigm( m, n, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); // Create a copy of B to use in the computation so the original matrix is // left untouched. b_copy = bli_sallocm( m, n ); // Match the strides of B_copy to that of B. if ( bli_is_col_storage( b_rs, b_cs ) ) { b_copy_rs = 1; b_copy_cs = m; } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { b_copy_rs = n; b_copy_cs = 1; } // Copy the contents of B to B_copy. bli_scopymt( BLIS_NO_CONJUGATE, m, n, b, b_rs, b_cs, b_copy, b_copy_rs, b_copy_cs ); // Perform the operation on B_copy. bli_strmm( side, uplo, trans, diag, m, n, alpha, a, a_rs, a_cs, b_copy, b_copy_rs, b_copy_cs ); // Scale C by beta. bli_sscalm( BLIS_NO_CONJUGATE, m, n, beta, c, c_rs, c_cs ); // Add B_copy into C. bli_saxpymt( BLIS_NO_TRANSPOSE, m, n, &one, b_copy, b_copy_rs, b_copy_cs, c, c_rs, c_cs ); // Free the copy of B. bli_sfree( b_copy ); // Free any temporary contiguous matrices, copying the result back to // the original matrix. bli_sfree_contigm( a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_sfree_contigm( b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_sfree_saved_contigm( m_save, n_save, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); }
void bli_strsm | ( | char | side, |
char | uplo, | ||
char | trans, | ||
char | diag, | ||
int | m, | ||
int | n, | ||
float * | alpha, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bli_is_col_storage(), bli_screate_contigm(), bli_screate_contigmr(), bli_set_dim_with_side(), bli_sfree_contigm(), bli_sfree_saved_contigm(), bli_strsm_blas(), and bli_zero_dim2().
Referenced by bli_strsmsx(), FLA_LU_nopiv_ops_var1(), FLA_LU_nopiv_ops_var2(), FLA_LU_nopiv_ops_var3(), FLA_LU_piv_ops_var3(), and FLA_Trsm_external().
{ int m_save = m; int n_save = n; float* a_save = a; float* b_save = b; int a_rs_save = a_rs; int a_cs_save = a_cs; int b_rs_save = b_rs; int b_cs_save = b_cs; int dim_a; int lda, inca; int ldb, incb; // Return early if possible. if ( bli_zero_dim2( m, n ) ) return; // If necessary, allocate, initialize, and use a temporary contiguous // copy of each matrix rather than the original matrices. bli_set_dim_with_side( side, m, n, &dim_a ); bli_screate_contigmr( uplo, dim_a, dim_a, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_screate_contigm( m, n, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); // Initialize with values assuming column-major storage. lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Adjust the parameters based on the storage of each matrix. if ( bli_is_col_storage( b_rs, b_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { // requested operation: B_c := tr( uplo( A_c ) ) * B_c // effective operation: B_c := tr( uplo( A_c ) ) * B_c } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { // requested operation: B_c := tr( uplo( A_r ) ) * B_c // effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c bli_swap_ints( lda, inca ); bli_toggle_uplo( uplo ); bli_toggle_trans( trans ); } } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { // requested operation: B_r := tr( uplo( A_c ) ) * B_r // effective operation: B_c := B_c * tr( uplo( A_c ) )^T bli_swap_ints( ldb, incb ); bli_swap_ints( m, n ); bli_toggle_side( side ); bli_toggle_trans( trans ); } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { // requested operation: B_r := tr( uplo( A_r ) ) * B_r // effective operation: B_c := B_c * tr( ~uplo( A_c ) ) bli_swap_ints( ldb, incb ); bli_swap_ints( lda, inca ); bli_swap_ints( m, n ); bli_toggle_uplo( uplo ); bli_toggle_side( side ); } } bli_strsm_blas( side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb ); // Free any temporary contiguous matrices, copying the result back to // the original matrix. bli_sfree_contigm( a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_sfree_saved_contigm( m_save, n_save, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); }
void bli_strsm_blas | ( | char | side, |
char | uplo, | ||
char | trans, | ||
char | diag, | ||
int | m, | ||
int | n, | ||
float * | alpha, | ||
float * | a, | ||
int | lda, | ||
float * | b, | ||
int | ldb | ||
) |
References bli_param_map_to_netlib_diag(), bli_param_map_to_netlib_side(), bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), cblas_strsm(), CblasColMajor, and F77_strsm().
Referenced by bli_strsm().
{ #ifdef BLIS_ENABLE_CBLAS_INTERFACES enum CBLAS_ORDER cblas_order = CblasColMajor; enum CBLAS_SIDE cblas_side; enum CBLAS_UPLO cblas_uplo; enum CBLAS_TRANSPOSE cblas_trans; enum CBLAS_DIAG cblas_diag; bli_param_map_to_netlib_side( side, &cblas_side ); bli_param_map_to_netlib_uplo( uplo, &cblas_uplo ); bli_param_map_to_netlib_trans( trans, &cblas_trans ); bli_param_map_to_netlib_diag( diag, &cblas_diag ); cblas_strsm( cblas_order, cblas_side, cblas_uplo, cblas_trans, cblas_diag, m, n, *alpha, a, lda, b, ldb ); #else char blas_side; char blas_uplo; char blas_trans; char blas_diag; bli_param_map_to_netlib_side( side, &blas_side ); bli_param_map_to_netlib_uplo( uplo, &blas_uplo ); bli_param_map_to_netlib_trans( trans, &blas_trans ); bli_param_map_to_netlib_diag( diag, &blas_diag ); F77_strsm( &blas_side, &blas_uplo, &blas_trans, &blas_diag, &m, &n, alpha, a, &lda, b, &ldb ); #endif }
void bli_strsmsx | ( | char | side, |
char | uplo, | ||
char | trans, | ||
char | diag, | ||
int | m, | ||
int | n, | ||
float * | alpha, | ||
float * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
float * | b, | ||
int | b_rs, | ||
int | b_cs, | ||
float * | beta, | ||
float * | c, | ||
int | c_rs, | ||
int | c_cs | ||
) |
References bli_is_col_storage(), bli_s1(), bli_sallocm(), bli_saxpymt(), bli_scopymt(), bli_screate_contigm(), bli_screate_contigmr(), bli_set_dim_with_side(), bli_sfree(), bli_sfree_contigm(), bli_sfree_saved_contigm(), bli_sscalm(), bli_strsm(), and bli_zero_dim2().
Referenced by FLA_Trsmsx_external().
{ int m_save = m; int n_save = n; float* a_save = a; float* b_save = b; float* c_save = c; int a_rs_save = a_rs; int a_cs_save = a_cs; int b_rs_save = b_rs; int b_cs_save = b_cs; int c_rs_save = c_rs; int c_cs_save = c_cs; float one = bli_s1(); float* b_copy; int dim_a; int b_copy_rs, b_copy_cs; // Return early if possible. if ( bli_zero_dim2( m, n ) ) return; // If necessary, allocate, initialize, and use a temporary contiguous // copy of each matrix rather than the original matrices. bli_set_dim_with_side( side, m, n, &dim_a ); bli_screate_contigmr( uplo, dim_a, dim_a, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_screate_contigm( m, n, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_screate_contigm( m, n, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); // Create a copy of B to use in the computation so the original matrix is // left untouched. b_copy = bli_sallocm( m, n ); // Match the strides of B_copy to that of B. if ( bli_is_col_storage( b_rs, b_cs ) ) { b_copy_rs = 1; b_copy_cs = m; } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { b_copy_rs = n; b_copy_cs = 1; } // Copy the contents of B to B_copy. bli_scopymt( BLIS_NO_CONJUGATE, m, n, b, b_rs, b_cs, b_copy, b_copy_rs, b_copy_cs ); // Perform the operation on B_copy. bli_strsm( side, uplo, trans, diag, m, n, alpha, a, a_rs, a_cs, b_copy, b_copy_rs, b_copy_cs ); // Scale C by beta. bli_sscalm( BLIS_NO_CONJUGATE, m, n, beta, c, c_rs, c_cs ); // Add B_copy into C. bli_saxpymt( BLIS_NO_TRANSPOSE, m, n, &one, b_copy, b_copy_rs, b_copy_cs, c, c_rs, c_cs ); // Free the copy of B. bli_sfree( b_copy ); // Free any temporary contiguous matrices, copying the result back to // the original matrix. bli_sfree_contigm( a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_sfree_contigm( b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_sfree_saved_contigm( m_save, n_save, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); }
void bli_zgemm | ( | char | transa, |
char | transb, | ||
int | m, | ||
int | k, | ||
int | n, | ||
dcomplex * | alpha, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | b, | ||
int | b_rs, | ||
int | b_cs, | ||
dcomplex * | beta, | ||
dcomplex * | c, | ||
int | c_rs, | ||
int | c_cs | ||
) |
References bli_is_col_storage(), bli_is_conjnotrans(), bli_z0(), bli_z1(), bli_zallocm(), bli_zaxpymt(), bli_zconjm(), bli_zcopymt(), bli_zcreate_contigm(), bli_zcreate_contigmt(), bli_zero_dim3(), bli_zfree(), bli_zfree_contigm(), bli_zfree_saved_contigm(), bli_zgemm_blas(), and bli_zscalm().
Referenced by FLA_Gemm_external().
{ int m_save = m; int n_save = n; dcomplex* a_save = a; dcomplex* b_save = b; dcomplex* c_save = c; int a_rs_save = a_rs; int a_cs_save = a_cs; int b_rs_save = b_rs; int b_cs_save = b_cs; int c_rs_save = c_rs; int c_cs_save = c_cs; dcomplex zero = bli_z0(); dcomplex one = bli_z1(); dcomplex* a_unswap; dcomplex* b_unswap; dcomplex* a_conj; dcomplex* b_conj; dcomplex* c_trans; int lda, inca; int ldb, incb; int ldc, incc; int lda_conj, inca_conj; int ldb_conj, incb_conj; int ldc_trans, incc_trans; int m_gemm, n_gemm; int gemm_needs_axpyt = FALSE; int a_was_copied; int b_was_copied; // Return early if possible. if ( bli_zero_dim3( m, k, n ) ) { bli_zscalm( BLIS_NO_CONJUGATE, m, n, beta, c, c_rs, c_cs ); return; } // If necessary, allocate, initialize, and use a temporary contiguous // copy of each matrix rather than the original matrices. bli_zcreate_contigmt( transa, m, k, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_zcreate_contigmt( transb, k, n, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_zcreate_contigm( m, n, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); // Figure out whether A and/or B was copied to contiguous memory. This // is used later to prevent redundant copying. a_was_copied = ( a != a_save ); b_was_copied = ( b != b_save ); // These are used to track the original values of a and b prior to any // operand swapping that might take place. This is necessary for proper // freeing of memory when one is a temporary contiguous matrix. a_unswap = a; b_unswap = b; // These are used to track the dimensions of the product of the // A and B operands to the BLAS invocation of gemm. These differ // from m and n when the operands need to be swapped. m_gemm = m; n_gemm = n; // Initialize with values assuming column-major storage. lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; ldc = c_cs; incc = c_rs; // Adjust the parameters based on the storage of each matrix. if ( bli_is_col_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_c += tr( A_c ) * tr( B_c ) // effective operation: C_c += tr( A_c ) * tr( B_c ) } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_c += tr( A_c ) * tr( B_r ) // effective operation: C_c += tr( A_c ) * tr( B_c )^T bli_swap_ints( ldb, incb ); bli_toggle_trans( transb ); } } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_c += tr( A_r ) * tr( B_c ) // effective operation: C_c += tr( A_r )^T * tr( B_c ) bli_swap_ints( lda, inca ); bli_toggle_trans( transa ); } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_c += tr( A_r ) * tr( B_r ) // effective operation: C_c += ( tr( B_c ) * tr( A_c ) )^T bli_swap_ints( lda, inca ); bli_swap_ints( ldb, incb ); bli_zswap_pointers( a, b ); bli_swap_ints( a_was_copied, b_was_copied ); bli_swap_ints( lda, ldb ); bli_swap_ints( inca, incb ); bli_swap_chars( transa, transb ); gemm_needs_axpyt = TRUE; bli_swap_ints( m_gemm, n_gemm ); } } } else // if ( bli_is_row_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_r += tr( A_c ) * tr( B_c ) // effective operation: C_c += ( tr( A_c ) * tr( B_c ) )^T bli_swap_ints( ldc, incc ); bli_swap_ints( m, n ); gemm_needs_axpyt = TRUE; } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_r += tr( A_c ) * tr( B_r ) // effective operation: C_c += tr( B_c ) * tr( A_c )^T bli_swap_ints( ldc, incc ); bli_swap_ints( ldb, incb ); bli_toggle_trans( transa ); bli_swap_ints( m, n ); bli_swap_ints( m_gemm, n_gemm ); bli_zswap_pointers( a, b ); bli_swap_ints( a_was_copied, b_was_copied ); bli_swap_ints( lda, ldb ); bli_swap_ints( inca, incb ); bli_swap_chars( transa, transb ); } } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_r += tr( A_r ) * tr( B_c ) // effective operation: C_c += tr( B_c )^T * tr( A_c ) bli_swap_ints( ldc, incc ); bli_swap_ints( lda, inca ); bli_toggle_trans( transb ); bli_swap_ints( m, n ); bli_swap_ints( m_gemm, n_gemm ); bli_zswap_pointers( a, b ); bli_swap_ints( a_was_copied, b_was_copied ); bli_swap_ints( lda, ldb ); bli_swap_ints( inca, incb ); bli_swap_chars( transa, transb ); } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_r += tr( A_r ) * tr( B_r ) // effective operation: C_c += tr( B_c ) * tr( A_c ) bli_swap_ints( lda, inca ); bli_swap_ints( ldb, incb ); bli_swap_ints( ldc, incc ); bli_swap_ints( m, n ); bli_swap_ints( m_gemm, n_gemm ); bli_zswap_pointers( a, b ); bli_swap_ints( a_was_copied, b_was_copied ); bli_swap_ints( lda, ldb ); bli_swap_ints( inca, incb ); bli_swap_chars( transa, transb ); } } } // We need a temporary matrix for the case where A is conjugated. a_conj = a; lda_conj = lda; inca_conj = inca; // If transa indicates conjugate-no-transpose and A was not already // copied, then copy and conjugate it to a temporary matrix. Otherwise, // if transa indicates conjugate-no-transpose and A was already copied, // just conjugate it. if ( bli_is_conjnotrans( transa ) && !a_was_copied ) { a_conj = bli_zallocm( m_gemm, k ); lda_conj = m_gemm; inca_conj = 1; bli_zcopymt( BLIS_CONJUGATE, m_gemm, k, a, inca, lda, a_conj, inca_conj, lda_conj ); } else if ( bli_is_conjnotrans( transa ) && a_was_copied ) { bli_zconjm( m_gemm, k, a_conj, inca_conj, lda_conj ); } // We need a temporary matrix for the case where B is conjugated. b_conj = b; ldb_conj = ldb; incb_conj = incb; // If transb indicates conjugate-no-transpose and B was not already // copied, then copy and conjugate it to a temporary matrix. Otherwise, // if transb indicates conjugate-no-transpose and B was already copied, // just conjugate it. if ( bli_is_conjnotrans( transb ) && !b_was_copied ) { b_conj = bli_zallocm( k, n_gemm ); ldb_conj = k; incb_conj = 1; bli_zcopymt( BLIS_CONJUGATE, k, n_gemm, b, incb, ldb, b_conj, incb_conj, ldb_conj ); } else if ( bli_is_conjnotrans( transb ) && b_was_copied ) { bli_zconjm( k, n_gemm, b_conj, incb_conj, ldb_conj ); } // There are two cases where we need to perform the gemm and then axpy // the result into C with a transposition. We handle those cases here. if ( gemm_needs_axpyt ) { // We need a temporary matrix for holding C^T. Notice that m and n // represent the dimensions of C, while m_gemm and n_gemm are the // dimensions of the actual product op(A)*op(B), which may be n-by-m // since the operands may have been swapped. c_trans = bli_zallocm( m_gemm, n_gemm ); ldc_trans = m_gemm; incc_trans = 1; // Compute tr( A ) * tr( B ), where A and B may have been swapped // to reference the other, and store the result in C_trans. bli_zgemm_blas( transa, transb, m_gemm, n_gemm, k, alpha, a_conj, lda_conj, b_conj, ldb_conj, &zero, c_trans, ldc_trans ); // Scale C by beta. bli_zscalm( BLIS_NO_CONJUGATE, m, n, beta, c, incc, ldc ); // And finally, accumulate the matrix product in C_trans into C // with a transpose. bli_zaxpymt( BLIS_TRANSPOSE, m, n, &one, c_trans, incc_trans, ldc_trans, c, incc, ldc ); // Free the temporary matrix for C. bli_zfree( c_trans ); } else // no extra axpyt step needed { bli_zgemm_blas( transa, transb, m_gemm, n_gemm, k, alpha, a_conj, lda_conj, b_conj, ldb_conj, beta, c, ldc ); } if ( bli_is_conjnotrans( transa ) && !a_was_copied ) bli_zfree( a_conj ); if ( bli_is_conjnotrans( transb ) && !b_was_copied ) bli_zfree( b_conj ); // Free any temporary contiguous matrices, copying the result back to // the original matrix. bli_zfree_contigm( a_save, a_rs_save, a_cs_save, &a_unswap, &a_rs, &a_cs ); bli_zfree_contigm( b_save, b_rs_save, b_cs_save, &b_unswap, &b_rs, &b_cs ); bli_zfree_saved_contigm( m_save, n_save, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); }
void bli_zgemm_blas | ( | char | transa, |
char | transb, | ||
int | m, | ||
int | n, | ||
int | k, | ||
dcomplex * | alpha, | ||
dcomplex * | a, | ||
int | lda, | ||
dcomplex * | b, | ||
int | ldb, | ||
dcomplex * | beta, | ||
dcomplex * | c, | ||
int | ldc | ||
) |
References bli_param_map_to_netlib_trans(), cblas_zgemm(), CblasColMajor, and F77_zgemm().
Referenced by bli_zgemm().
{ #ifdef BLIS_ENABLE_CBLAS_INTERFACES enum CBLAS_ORDER cblas_order = CblasColMajor; enum CBLAS_TRANSPOSE cblas_transa; enum CBLAS_TRANSPOSE cblas_transb; bli_param_map_to_netlib_trans( transa, &cblas_transa ); bli_param_map_to_netlib_trans( transb, &cblas_transb ); cblas_zgemm( cblas_order, cblas_transa, cblas_transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc ); #else char blas_transa; char blas_transb; bli_param_map_to_netlib_trans( transa, &blas_transa ); bli_param_map_to_netlib_trans( transb, &blas_transb ); F77_zgemm( &blas_transa, &blas_transb, &m, &n, &k, alpha, a, &lda, b, &ldb, beta, c, &ldc ); #endif }
void bli_zhemm | ( | char | side, |
char | uplo, | ||
int | m, | ||
int | n, | ||
dcomplex * | alpha, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | b, | ||
int | b_rs, | ||
int | b_cs, | ||
dcomplex * | beta, | ||
dcomplex * | c, | ||
int | c_rs, | ||
int | c_cs | ||
) |
References bli_is_col_storage(), bli_is_left(), bli_set_dim_with_side(), bli_z0(), bli_z1(), bli_zallocm(), bli_zaxpymt(), bli_zconjmr(), bli_zcopymrt(), bli_zcopymt(), bli_zcreate_contigm(), bli_zcreate_contigmr(), bli_zero_dim2(), bli_zfree(), bli_zfree_contigm(), bli_zfree_saved_contigm(), bli_zhemm_blas(), and bli_zscalm().
Referenced by FLA_Hemm_external().
{ int m_save = m; int n_save = n; dcomplex* a_save = a; dcomplex* b_save = b; dcomplex* c_save = c; int a_rs_save = a_rs; int a_cs_save = a_cs; int b_rs_save = b_rs; int b_cs_save = b_cs; int c_rs_save = c_rs; int c_cs_save = c_cs; dcomplex zero = bli_z0(); dcomplex one = bli_z1(); dcomplex* a_conj; dcomplex* b_copy; dcomplex* c_trans; int dim_a; int lda, inca; int ldb, incb; int ldc, incc; int lda_conj, inca_conj; int ldb_copy, incb_copy; int ldc_trans, incc_trans; int hemm_needs_conja = FALSE; int hemm_needs_copyb = FALSE; int hemm_needs_transb = FALSE; int hemm_needs_axpyt = FALSE; int a_was_copied; // Return early if possible. if ( bli_zero_dim2( m, n ) ) return; // If necessary, allocate, initialize, and use a temporary contiguous // copy of each matrix rather than the original matrices. bli_set_dim_with_side( side, m, n, &dim_a ); bli_zcreate_contigmr( uplo, dim_a, dim_a, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_zcreate_contigm( m, n, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_zcreate_contigm( m, n, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); // Figure out whether A was copied to contiguous memory. This is used to // prevent redundant copying. a_was_copied = ( a != a_save ); // Initialize with values assuming column-major storage. lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; ldc = c_cs; incc = c_rs; // Adjust the parameters based on the storage of each matrix. if ( bli_is_col_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_c += uplo( A_c ) * B_c // effective operation: C_c += uplo( A_c ) * B_c } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_c += uplo( A_c ) * B_r // effective operation: C_c += uplo( A_c ) * B_c hemm_needs_copyb = TRUE; } } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_c += uplo( A_r ) * B_c // effective operation: C_c += ~uplo( conj( A_c ) ) * B_c bli_swap_ints( lda, inca ); bli_toggle_uplo( uplo ); hemm_needs_conja = TRUE; } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_c += uplo( A_r ) * B_r // effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T bli_swap_ints( lda, inca ); bli_swap_ints( ldb, incb ); bli_toggle_side( side ); bli_toggle_uplo( uplo ); hemm_needs_axpyt = TRUE; } } } else // if ( bli_is_row_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_r += uplo( A_c ) * B_c // effective operation: C_c += ( uplo( A_c ) * B_c )^T bli_swap_ints( ldc, incc ); bli_swap_ints( m, n ); hemm_needs_axpyt = TRUE; } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_r += uplo( A_c ) * B_r // effective operation: C_c += B_c * ~uplo( conj( A_c ) ) bli_swap_ints( ldc, incc ); bli_swap_ints( ldb, incb ); bli_swap_ints( m, n ); bli_toggle_side( side ); hemm_needs_conja = TRUE; } } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_r += uplo( A_r ) * B_c // effective operation: C_c += B_c^T * ~uplo( A_c ) bli_swap_ints( ldc, incc ); bli_swap_ints( lda, inca ); bli_swap_ints( m, n ); bli_toggle_side( side ); bli_toggle_uplo( uplo ); hemm_needs_copyb = TRUE; hemm_needs_transb = TRUE; } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_r += uplo( A_r ) * B_r // effective operation: C_c += B_c * conj( ~uplo( A_c ) ) bli_swap_ints( ldc, incc ); bli_swap_ints( lda, inca ); bli_swap_ints( ldb, incb ); bli_swap_ints( m, n ); bli_toggle_uplo( uplo ); bli_toggle_side( side ); } } } // We need a temporary matrix for the cases where A is conjugated. a_conj = a; lda_conj = lda; inca_conj = inca; if ( hemm_needs_conja && !a_was_copied ) { int dim_a; if ( bli_is_left( side ) ) dim_a = m; else dim_a = n; a_conj = bli_zallocm( dim_a, dim_a ); lda_conj = dim_a; inca_conj = 1; bli_zcopymrt( uplo, BLIS_CONJ_NO_TRANSPOSE, dim_a, dim_a, a, inca, lda, a_conj, inca_conj, lda_conj ); } else if ( hemm_needs_conja && a_was_copied ) { int dim_a; if ( bli_is_left( side ) ) dim_a = m; else dim_a = n; bli_zconjmr( uplo, dim_a, dim_a, a_conj, inca_conj, lda_conj ); } // We need a temporary matrix for the cases where B needs to be copied. b_copy = b; ldb_copy = ldb; incb_copy = incb; // There are two cases where we need to make a copy of B: one where the // copy's dimensions are transposed from the original B, and one where // the dimensions are not swapped. if ( hemm_needs_copyb ) { char transb; // Set transb, which determines whether or not we need to copy from B // as if it needs a transposition. If a transposition is needed, then // m and n and have already been swapped. So in either case m // represents the leading dimension of the copy. if ( hemm_needs_transb ) transb = BLIS_TRANSPOSE; else transb = BLIS_NO_TRANSPOSE; b_copy = bli_zallocm( m, n ); ldb_copy = m; incb_copy = 1; bli_zcopymt( transb, m, n, b, incb, ldb, b_copy, incb_copy, ldb_copy ); } // There are two cases where we need to perform the hemm and then axpy // the result into C with a transposition. We handle those cases here. if ( hemm_needs_axpyt ) { // We need a temporary matrix for holding C^T. Notice that m and n // represent the dimensions of C, and thus C_trans is n-by-m // (interpreting both as column-major matrices). So the leading // dimension of the temporary matrix holding C^T is n. c_trans = bli_zallocm( n, m ); ldc_trans = n; incc_trans = 1; // Compute A * B (or B * A) and store the result in C_trans. // Note that there is no overlap between the axpyt cases and // the conja/copyb cases, hence the use of a, b, lda, and ldb. bli_zhemm_blas( side, uplo, n, m, alpha, a, lda, b, ldb, &zero, c_trans, ldc_trans ); // Scale C by beta. bli_zscalm( BLIS_NO_CONJUGATE, m, n, beta, c, incc, ldc ); // And finally, accumulate the matrix product in C_trans into C // with a transpose. bli_zaxpymt( BLIS_TRANSPOSE, m, n, &one, c_trans, incc_trans, ldc_trans, c, incc, ldc ); // Free the temporary matrix for C. bli_zfree( c_trans ); } else // no extra axpyt step needed { bli_zhemm_blas( side, uplo, m, n, alpha, a_conj, lda_conj, b_copy, ldb_copy, beta, c, ldc ); } if ( hemm_needs_conja && !a_was_copied ) bli_zfree( a_conj ); if ( hemm_needs_copyb ) bli_zfree( b_copy ); // Free any temporary contiguous matrices, copying the result back to // the original matrix. bli_zfree_contigm( a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_zfree_contigm( b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_zfree_saved_contigm( m_save, n_save, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); }
void bli_zhemm_blas | ( | char | side, |
char | uplo, | ||
int | m, | ||
int | n, | ||
dcomplex * | alpha, | ||
dcomplex * | a, | ||
int | lda, | ||
dcomplex * | b, | ||
int | ldb, | ||
dcomplex * | beta, | ||
dcomplex * | c, | ||
int | ldc | ||
) |
References bli_param_map_to_netlib_side(), bli_param_map_to_netlib_uplo(), cblas_zhemm(), CblasColMajor, and F77_zhemm().
Referenced by bli_zhemm().
{ #ifdef BLIS_ENABLE_CBLAS_INTERFACES enum CBLAS_ORDER cblas_order = CblasColMajor; enum CBLAS_SIDE cblas_side; enum CBLAS_UPLO cblas_uplo; bli_param_map_to_netlib_side( side, &cblas_side ); bli_param_map_to_netlib_uplo( uplo, &cblas_uplo ); cblas_zhemm( cblas_order, cblas_side, cblas_uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc ); #else char blas_side; char blas_uplo; bli_param_map_to_netlib_side( side, &blas_side ); bli_param_map_to_netlib_uplo( uplo, &blas_uplo ); F77_zhemm( &blas_side, &blas_uplo, &m, &n, alpha, a, &lda, b, &ldb, beta, c, &ldc ); #endif }
void bli_zher2k | ( | char | uplo, |
char | trans, | ||
int | m, | ||
int | k, | ||
dcomplex * | alpha, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | b, | ||
int | b_rs, | ||
int | b_cs, | ||
double * | beta, | ||
dcomplex * | c, | ||
int | c_rs, | ||
int | c_cs | ||
) |
References bli_d0(), bli_does_trans(), bli_is_col_storage(), bli_z1(), bli_zallocm(), bli_zaxpymrt(), bli_zcopymt(), bli_zcreate_contigmr(), bli_zcreate_contigmt(), bli_zdscalmr(), bli_zero_dim2(), bli_zfree(), bli_zfree_contigm(), bli_zfree_saved_contigmr(), and bli_zher2k_blas().
Referenced by FLA_Her2k_external().
{ char uplo_save = uplo; int m_save = m; dcomplex* a_save = a; dcomplex* b_save = b; dcomplex* c_save = c; int a_rs_save = a_rs; int a_cs_save = a_cs; int b_rs_save = b_rs; int b_cs_save = b_cs; int c_rs_save = c_rs; int c_cs_save = c_cs; double zero_r = bli_d0(); dcomplex one = bli_z1(); dcomplex alpha_copy; dcomplex* a_copy; dcomplex* b_copy; dcomplex* c_conj; int lda, inca; int ldb, incb; int ldc, incc; int lda_copy, inca_copy; int ldb_copy, incb_copy; int ldc_conj, incc_conj; int her2k_needs_copya = FALSE; int her2k_needs_copyb = FALSE; int her2k_needs_conj = FALSE; int her2k_needs_alpha_conj = FALSE; // Return early if possible. if ( bli_zero_dim2( m, k ) ) return; // If necessary, allocate, initialize, and use a temporary contiguous // copy of each matrix rather than the original matrices. bli_zcreate_contigmt( trans, m, k, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_zcreate_contigmt( trans, m, k, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_zcreate_contigmr( uplo, m, m, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); // Initialize with values assuming column-major storage. lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; ldc = c_cs; incc = c_rs; // Adjust the parameters based on the storage of each matrix. if ( bli_is_col_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c' // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c' } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_c ) += A_c * B_r' + B_r * A_c' // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c' her2k_needs_copyb = TRUE; } } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_c ) += A_r * B_c' + B_c * A_r' // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c' her2k_needs_copya = TRUE; } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_c ) += A_r * B_r' + B_r * A_r' // requested operation: uplo( C_c ) += conj( A_c' * B_c + B_c' * A_c ) bli_swap_ints( lda, inca ); bli_swap_ints( ldb, incb ); bli_toggle_conjtrans( trans ); her2k_needs_conj = TRUE; her2k_needs_alpha_conj = TRUE; } } } else // if ( bli_is_row_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_r ) += A_c * B_c' + B_c * A_c' // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' ) bli_swap_ints( ldc, incc ); bli_toggle_uplo( uplo ); her2k_needs_conj = TRUE; } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_r ) += A_c * B_r' + B_r * A_c' // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' ) her2k_needs_copyb = TRUE; bli_swap_ints( ldc, incc ); bli_toggle_uplo( uplo ); her2k_needs_conj = TRUE; } } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_r ) += A_r * B_c' + B_c * A_r' // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' ) her2k_needs_copya = TRUE; bli_swap_ints( ldc, incc ); bli_toggle_uplo( uplo ); her2k_needs_conj = TRUE; } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_r ) += A_r * B_r' + B_r * A_r' // requested operation: ~uplo( C_c ) += A_c' * B_c + B_c' * A_c bli_swap_ints( ldc, incc ); bli_swap_ints( lda, inca ); bli_swap_ints( ldb, incb ); bli_toggle_uplo( uplo ); bli_toggle_conjtrans( trans ); her2k_needs_alpha_conj = TRUE; } } } // Make a copy of alpha and conjugate if necessary. alpha_copy = *alpha; if ( her2k_needs_alpha_conj ) { bli_zconjs( &alpha_copy ); } a_copy = a; lda_copy = lda; inca_copy = inca; // There are two cases where we need to copy A column-major storage. // We handle those two cases here. if ( her2k_needs_copya ) { int m_a; int n_a; // Determine the dimensions of A according to the value of trans. We // need this in order to set the leading dimension of the copy of A. if ( bli_does_trans( trans ) ) { m_a = k; n_a = m; } else { m_a = m; n_a = k; } // We need a temporary matrix to hold a column-major copy of A. a_copy = bli_zallocm( m, k ); lda_copy = m_a; inca_copy = 1; // Copy the contents of A into A_copy. bli_zcopymt( BLIS_NO_TRANSPOSE, m_a, n_a, a, inca, lda, a_copy, inca_copy, lda_copy ); } b_copy = b; ldb_copy = ldb; incb_copy = incb; // There are two cases where we need to copy B column-major storage. // We handle those two cases here. if ( her2k_needs_copyb ) { int m_b; int n_b; // Determine the dimensions of B according to the value of trans. We // need this in order to set the leading dimension of the copy of B. if ( bli_does_trans( trans ) ) { m_b = k; n_b = m; } else { m_b = m; n_b = k; } // We need a temporary matrix to hold a column-major copy of B. b_copy = bli_zallocm( m, k ); ldb_copy = m_b; incb_copy = 1; // Copy the contents of B into B_copy. bli_zcopymt( BLIS_NO_TRANSPOSE, m_b, n_b, b, incb, ldb, b_copy, incb_copy, ldb_copy ); } // There are two cases where we need to perform the rank-2k product and // then axpy the result into C with a conjugation. We handle those two // cases here. if ( her2k_needs_conj ) { // We need a temporary matrix for holding the rank-k product. c_conj = bli_zallocm( m, m ); ldc_conj = m; incc_conj = 1; // Compute the rank-2k product. bli_zher2k_blas( uplo, trans, m, k, &alpha_copy, a_copy, lda_copy, b_copy, ldb_copy, &zero_r, c_conj, ldc_conj ); // Scale C by beta. bli_zdscalmr( uplo, m, m, beta, c, incc, ldc ); // And finally, accumulate the rank-2k product in C_conj into C // with a conjugation. bli_zaxpymrt( uplo, BLIS_CONJ_NO_TRANSPOSE, m, m, &one, c_conj, incc_conj, ldc_conj, c, incc, ldc ); // Free the temporary matrix for C. bli_zfree( c_conj ); } else { bli_zher2k_blas( uplo, trans, m, k, &alpha_copy, a_copy, lda_copy, b_copy, ldb_copy, beta, c, ldc ); } if ( her2k_needs_copya ) bli_zfree( a_copy ); if ( her2k_needs_copyb ) bli_zfree( b_copy ); // Free any temporary contiguous matrices, copying the result back to // the original matrix. bli_zfree_contigm( a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_zfree_contigm( b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_zfree_saved_contigmr( uplo_save, m_save, m_save, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); }
void bli_zher2k_blas | ( | char | uplo, |
char | trans, | ||
int | m, | ||
int | k, | ||
dcomplex * | alpha, | ||
dcomplex * | a, | ||
int | lda, | ||
dcomplex * | b, | ||
int | ldb, | ||
double * | beta, | ||
dcomplex * | c, | ||
int | ldc | ||
) |
References bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), cblas_zher2k(), CblasColMajor, and F77_zher2k().
Referenced by bli_zher2k().
{ #ifdef BLIS_ENABLE_CBLAS_INTERFACES enum CBLAS_ORDER cblas_order = CblasColMajor; enum CBLAS_UPLO cblas_uplo; enum CBLAS_TRANSPOSE cblas_trans; bli_param_map_to_netlib_uplo( uplo, &cblas_uplo ); bli_param_map_to_netlib_trans( trans, &cblas_trans ); cblas_zher2k( cblas_order, cblas_uplo, cblas_trans, m, k, alpha, a, lda, b, ldb, *beta, c, ldc ); #else char blas_uplo; char blas_trans; bli_param_map_to_netlib_uplo( uplo, &blas_uplo ); bli_param_map_to_netlib_trans( trans, &blas_trans ); F77_zher2k( &blas_uplo, &blas_trans, &m, &k, alpha, a, &lda, b, &ldb, beta, c, &ldc ); #endif }
void bli_zherk | ( | char | uplo, |
char | trans, | ||
int | m, | ||
int | k, | ||
double * | alpha, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
double * | beta, | ||
dcomplex * | c, | ||
int | c_rs, | ||
int | c_cs | ||
) |
References bli_d0(), bli_is_col_storage(), bli_z1(), bli_zallocm(), bli_zaxpymrt(), bli_zcreate_contigmr(), bli_zcreate_contigmt(), bli_zdscalmr(), bli_zero_dim2(), bli_zfree(), bli_zfree_contigm(), bli_zfree_saved_contigmr(), and bli_zherk_blas().
Referenced by FLA_Herk_external(), and FLA_UDdate_UT_opz_var1().
{ char uplo_save = uplo; int m_save = m; dcomplex* a_save = a; dcomplex* c_save = c; int a_rs_save = a_rs; int a_cs_save = a_cs; int c_rs_save = c_rs; int c_cs_save = c_cs; double zero_r = bli_d0(); dcomplex one = bli_z1(); dcomplex* c_conj; int lda, inca; int ldc, incc; int ldc_conj, incc_conj; int herk_needs_conj = FALSE; // Return early if possible. if ( bli_zero_dim2( m, k ) ) return; // If necessary, allocate, initialize, and use a temporary contiguous // copy of each matrix rather than the original matrices. bli_zcreate_contigmt( trans, m, k, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_zcreate_contigmr( uplo, m, m, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); // Initialize with values assuming column-major storage. lda = a_cs; inca = a_rs; ldc = c_cs; incc = c_rs; // Adjust the parameters based on the storage of each matrix. if ( bli_is_col_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { // requested operation: uplo( C_c ) += A_c * A_c' // effective operation: uplo( C_c ) += A_c * A_c' } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { // requested operation: uplo( C_c ) += A_r * A_r' // effective operation: uplo( C_c ) += conj( A_c' * A_c ) bli_swap_ints( lda, inca ); bli_toggle_conjtrans( trans ); herk_needs_conj = TRUE; } } else // if ( bli_is_row_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { // requested operation: uplo( C_r ) += A_c * A_c' // effective operation: ~uplo( C_c ) += conj( A_c * A_c' ) bli_swap_ints( ldc, incc ); bli_toggle_uplo( uplo ); herk_needs_conj = TRUE; } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { // requested operation: uplo( C_r ) += A_r * A_r' // effective operation: ~uplo( C_c ) += A_c' * A_c bli_swap_ints( ldc, incc ); bli_swap_ints( lda, inca ); bli_toggle_uplo( uplo ); bli_toggle_conjtrans( trans ); } } // There are two cases where we need to perform the rank-k product and // then axpy the result into C with a conjugation. We handle those two // cases here. if ( herk_needs_conj ) { // We need a temporary matrix for holding the rank-k product. c_conj = bli_zallocm( m, m ); ldc_conj = m; incc_conj = 1; // Compute the rank-k product. bli_zherk_blas( uplo, trans, m, k, alpha, a, lda, &zero_r, c_conj, ldc_conj ); // Scale C by beta. bli_zdscalmr( uplo, m, m, beta, c, incc, ldc ); // And finally, accumulate the rank-k product in C_conj into C // with a conjugation. bli_zaxpymrt( uplo, BLIS_CONJ_NO_TRANSPOSE, m, m, &one, c_conj, incc_conj, ldc_conj, c, incc, ldc ); // Free the temporary matrix for C. bli_zfree( c_conj ); } else { bli_zherk_blas( uplo, trans, m, k, alpha, a, lda, beta, c, ldc ); } // Free any temporary contiguous matrices, copying the result back to // the original matrix. bli_zfree_contigm( a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_zfree_saved_contigmr( uplo_save, m_save, m_save, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); }
void bli_zherk_blas | ( | char | uplo, |
char | trans, | ||
int | m, | ||
int | k, | ||
double * | alpha, | ||
dcomplex * | a, | ||
int | lda, | ||
double * | beta, | ||
dcomplex * | c, | ||
int | ldc | ||
) |
References bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), cblas_zherk(), CblasColMajor, and F77_zherk().
Referenced by bli_zherk().
{ #ifdef BLIS_ENABLE_CBLAS_INTERFACES enum CBLAS_ORDER cblas_order = CblasColMajor; enum CBLAS_UPLO cblas_uplo; enum CBLAS_TRANSPOSE cblas_trans; bli_param_map_to_netlib_uplo( uplo, &cblas_uplo ); bli_param_map_to_netlib_trans( trans, &cblas_trans ); cblas_zherk( cblas_order, cblas_uplo, cblas_trans, m, k, *alpha, a, lda, *beta, c, ldc ); #else char blas_uplo; char blas_trans; bli_param_map_to_netlib_uplo( uplo, &blas_uplo ); bli_param_map_to_netlib_trans( trans, &blas_trans ); F77_zherk( &blas_uplo, &blas_trans, &m, &k, alpha, a, &lda, beta, c, &ldc ); #endif }
void bli_zsymm | ( | char | side, |
char | uplo, | ||
int | m, | ||
int | n, | ||
dcomplex * | alpha, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | b, | ||
int | b_rs, | ||
int | b_cs, | ||
dcomplex * | beta, | ||
dcomplex * | c, | ||
int | c_rs, | ||
int | c_cs | ||
) |
References bli_is_col_storage(), bli_set_dim_with_side(), bli_z0(), bli_z1(), bli_zallocm(), bli_zaxpymt(), bli_zcopymt(), bli_zcreate_contigm(), bli_zcreate_contigmr(), bli_zero_dim2(), bli_zfree(), bli_zfree_contigm(), bli_zfree_saved_contigm(), bli_zscalm(), and bli_zsymm_blas().
Referenced by FLA_Symm_external().
{ int m_save = m; int n_save = n; dcomplex* a_save = a; dcomplex* b_save = b; dcomplex* c_save = c; int a_rs_save = a_rs; int a_cs_save = a_cs; int b_rs_save = b_rs; int b_cs_save = b_cs; int c_rs_save = c_rs; int c_cs_save = c_cs; dcomplex zero = bli_z0(); dcomplex one = bli_z1(); dcomplex* b_copy; dcomplex* c_trans; int dim_a; int lda, inca; int ldb, incb; int ldc, incc; int ldb_copy, incb_copy; int ldc_trans, incc_trans; int symm_needs_copyb = FALSE; int symm_needs_transb = FALSE; int symm_needs_axpyt = FALSE; // Return early if possible. if ( bli_zero_dim2( m, n ) ) return; // If necessary, allocate, initialize, and use a temporary contiguous // copy of each matrix rather than the original matrices. bli_set_dim_with_side( side, m, n, &dim_a ); bli_zcreate_contigmr( uplo, dim_a, dim_a, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_zcreate_contigm( m, n, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_zcreate_contigm( m, n, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); // Initialize with values assuming column-major storage. lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; ldc = c_cs; incc = c_rs; // Adjust the parameters based on the storage of each matrix. if ( bli_is_col_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_c += uplo( A_c ) * B_c // effective operation: C_c += uplo( A_c ) * B_c } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_c += uplo( A_c ) * B_r // effective operation: C_c += uplo( A_c ) * B_c symm_needs_copyb = TRUE; } } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_c += uplo( A_r ) * B_c // effective operation: C_c += ~uplo( conj( A_c ) ) * B_c bli_swap_ints( lda, inca ); bli_toggle_uplo( uplo ); } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_c += uplo( A_r ) * B_r // effective operation: C_c += ( B_c * ~uplo( conj( A_c ) ) )^T bli_swap_ints( lda, inca ); bli_swap_ints( ldb, incb ); bli_toggle_side( side ); bli_toggle_uplo( uplo ); symm_needs_axpyt = TRUE; } } } else // if ( bli_is_row_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_r += uplo( A_c ) * B_c // effective operation: C_c += ( uplo( A_c ) * B_c )^T bli_swap_ints( ldc, incc ); bli_swap_ints( m, n ); symm_needs_axpyt = TRUE; } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_r += uplo( A_c ) * B_r // effective operation: C_c += B_c * ~uplo( conj( A_c ) ) bli_swap_ints( ldc, incc ); bli_swap_ints( ldb, incb ); bli_swap_ints( m, n ); bli_toggle_side( side ); } } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: C_r += uplo( A_r ) * B_c // effective operation: C_c += B_c^T * ~uplo( A_c ) bli_swap_ints( ldc, incc ); bli_swap_ints( lda, inca ); bli_swap_ints( m, n ); bli_toggle_side( side ); bli_toggle_uplo( uplo ); symm_needs_copyb = TRUE; symm_needs_transb = TRUE; } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: C_r += uplo( A_r ) * B_r // effective operation: C_c += B_c * conj( ~uplo( A_c ) ) bli_swap_ints( ldc, incc ); bli_swap_ints( lda, inca ); bli_swap_ints( ldb, incb ); bli_swap_ints( m, n ); bli_toggle_uplo( uplo ); bli_toggle_side( side ); } } } // We need a temporary matrix for the cases where B needs to be copied. b_copy = b; ldb_copy = ldb; incb_copy = incb; // There are two cases where we need to make a copy of B: one where the // copy's dimensions are transposed from the original B, and one where // the dimensions are not swapped. if ( symm_needs_copyb ) { char transb; // Set transb, which determines whether or not we need to copy from B // as if it needs a transposition. If a transposition is needed, then // m and n and have already been swapped. So in either case m // represents the leading dimension of the copy. if ( symm_needs_transb ) transb = BLIS_TRANSPOSE; else transb = BLIS_NO_TRANSPOSE; b_copy = bli_zallocm( m, n ); ldb_copy = m; incb_copy = 1; bli_zcopymt( transb, m, n, b, incb, ldb, b_copy, incb_copy, ldb_copy ); } // There are two cases where we need to perform the symm and then axpy // the result into C with a transposition. We handle those cases here. if ( symm_needs_axpyt ) { // We need a temporary matrix for holding C^T. Notice that m and n // represent the dimensions of C, and thus C_trans is n-by-m // (interpreting both as column-major matrices). So the leading // dimension of the temporary matrix holding C^T is n. c_trans = bli_zallocm( n, m ); ldc_trans = n; incc_trans = 1; // Compute A * B (or B * A) and store the result in C_trans. // Note that there is no overlap between the axpyt cases and // the conja/copyb cases, hence the use of a, b, lda, and ldb. bli_zsymm_blas( side, uplo, n, m, alpha, a, lda, b, ldb, &zero, c_trans, ldc_trans ); // Scale C by beta. bli_zscalm( BLIS_NO_CONJUGATE, m, n, beta, c, incc, ldc ); // And finally, accumulate the matrix product in C_trans into C // with a transpose. bli_zaxpymt( BLIS_TRANSPOSE, m, n, &one, c_trans, incc_trans, ldc_trans, c, incc, ldc ); // Free the temporary matrix for C. bli_zfree( c_trans ); } else // no extra axpyt step needed { bli_zsymm_blas( side, uplo, m, n, alpha, a, lda, b_copy, ldb_copy, beta, c, ldc ); } if ( symm_needs_copyb ) bli_zfree( b_copy ); // Free any temporary contiguous matrices, copying the result back to // the original matrix. bli_zfree_contigm( a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_zfree_contigm( b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_zfree_saved_contigm( m_save, n_save, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); }
void bli_zsymm_blas | ( | char | side, |
char | uplo, | ||
int | m, | ||
int | n, | ||
dcomplex * | alpha, | ||
dcomplex * | a, | ||
int | lda, | ||
dcomplex * | b, | ||
int | ldb, | ||
dcomplex * | beta, | ||
dcomplex * | c, | ||
int | ldc | ||
) |
References bli_param_map_to_netlib_side(), bli_param_map_to_netlib_uplo(), cblas_zsymm(), CblasColMajor, and F77_zsymm().
Referenced by bli_zsymm().
{ #ifdef BLIS_ENABLE_CBLAS_INTERFACES enum CBLAS_ORDER cblas_order = CblasColMajor; enum CBLAS_SIDE cblas_side; enum CBLAS_UPLO cblas_uplo; bli_param_map_to_netlib_side( side, &cblas_side ); bli_param_map_to_netlib_uplo( uplo, &cblas_uplo ); cblas_zsymm( cblas_order, cblas_side, cblas_uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc ); #else char blas_side; char blas_uplo; bli_param_map_to_netlib_side( side, &blas_side ); bli_param_map_to_netlib_uplo( uplo, &blas_uplo ); F77_zsymm( &blas_side, &blas_uplo, &m, &n, alpha, a, &lda, b, &ldb, beta, c, &ldc ); #endif }
void bli_zsyr2k | ( | char | uplo, |
char | trans, | ||
int | m, | ||
int | k, | ||
dcomplex * | alpha, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | b, | ||
int | b_rs, | ||
int | b_cs, | ||
dcomplex * | beta, | ||
dcomplex * | c, | ||
int | c_rs, | ||
int | c_cs | ||
) |
References bli_does_trans(), bli_is_col_storage(), bli_zallocm(), bli_zcopymt(), bli_zcreate_contigmr(), bli_zcreate_contigmt(), bli_zero_dim2(), bli_zfree(), bli_zfree_contigm(), bli_zfree_saved_contigmr(), and bli_zsyr2k_blas().
Referenced by FLA_Syr2k_external().
{ char uplo_save = uplo; int m_save = m; dcomplex* a_save = a; dcomplex* b_save = b; dcomplex* c_save = c; int a_rs_save = a_rs; int a_cs_save = a_cs; int b_rs_save = b_rs; int b_cs_save = b_cs; int c_rs_save = c_rs; int c_cs_save = c_cs; dcomplex* a_copy; dcomplex* b_copy; int lda, inca; int ldb, incb; int ldc, incc; int lda_copy, inca_copy; int ldb_copy, incb_copy; int syr2k_needs_copya = FALSE; int syr2k_needs_copyb = FALSE; // Return early if possible. if ( bli_zero_dim2( m, k ) ) return; // If necessary, allocate, initialize, and use a temporary contiguous // copy of each matrix rather than the original matrices. bli_zcreate_contigmt( trans, m, k, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_zcreate_contigmt( trans, m, k, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_zcreate_contigmr( uplo, m, m, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); // Initialize with values assuming column-major storage. lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; ldc = c_cs; incc = c_rs; // Adjust the parameters based on the storage of each matrix. if ( bli_is_col_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c' // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c' } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_c ) += A_c * B_r' + B_r * A_c' // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c' syr2k_needs_copyb = TRUE; } } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_c ) += A_r * B_c' + B_c * A_r' // requested operation: uplo( C_c ) += A_c * B_c' + B_c * A_c' syr2k_needs_copya = TRUE; } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_c ) += A_r * B_r' + B_r * A_r' // requested operation: uplo( C_c ) += conj( A_c' * B_c + B_c' * A_c ) bli_swap_ints( lda, inca ); bli_swap_ints( ldb, incb ); bli_toggle_trans( trans ); } } } else // if ( bli_is_row_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_r ) += A_c * B_c' + B_c * A_c' // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' ) bli_swap_ints( ldc, incc ); bli_toggle_uplo( uplo ); } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_r ) += A_c * B_r' + B_r * A_c' // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' ) syr2k_needs_copyb = TRUE; bli_swap_ints( ldc, incc ); bli_toggle_uplo( uplo ); } } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { if ( bli_is_col_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_r ) += A_r * B_c' + B_c * A_r' // requested operation: ~uplo( C_c ) += conj( A_c * B_c' + B_c * A_c' ) syr2k_needs_copya = TRUE; bli_swap_ints( ldc, incc ); bli_toggle_uplo( uplo ); } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { // requested operation: uplo( C_r ) += A_r * B_r' + B_r * A_r' // requested operation: ~uplo( C_c ) += A_c' * B_c + B_c' * A_c bli_swap_ints( ldc, incc ); bli_swap_ints( lda, inca ); bli_swap_ints( ldb, incb ); bli_toggle_uplo( uplo ); bli_toggle_trans( trans ); } } } a_copy = a; lda_copy = lda; inca_copy = inca; // There are two cases where we need to copy A column-major storage. // We handle those two cases here. if ( syr2k_needs_copya ) { int m_a; int n_a; // Determine the dimensions of A according to the value of trans. We // need this in order to set the leading dimension of the copy of A. if ( bli_does_trans( trans ) ) { m_a = k; n_a = m; } else { m_a = m; n_a = k; } // We need a temporary matrix to hold a column-major copy of A. a_copy = bli_zallocm( m, k ); lda_copy = m_a; inca_copy = 1; // Copy the contents of A into A_copy. bli_zcopymt( BLIS_NO_TRANSPOSE, m_a, n_a, a, inca, lda, a_copy, inca_copy, lda_copy ); } b_copy = b; ldb_copy = ldb; incb_copy = incb; // There are two cases where we need to copy B column-major storage. // We handle those two cases here. if ( syr2k_needs_copyb ) { int m_b; int n_b; // Determine the dimensions of B according to the value of trans. We // need this in order to set the leading dimension of the copy of B. if ( bli_does_trans( trans ) ) { m_b = k; n_b = m; } else { m_b = m; n_b = k; } // We need a temporary matrix to hold a column-major copy of B. b_copy = bli_zallocm( m, k ); ldb_copy = m_b; incb_copy = 1; // Copy the contents of B into B_copy. bli_zcopymt( BLIS_NO_TRANSPOSE, m_b, n_b, b, incb, ldb, b_copy, incb_copy, ldb_copy ); } bli_zsyr2k_blas( uplo, trans, m, k, alpha, a_copy, lda_copy, b_copy, ldb_copy, beta, c, ldc ); if ( syr2k_needs_copya ) bli_zfree( a_copy ); if ( syr2k_needs_copyb ) bli_zfree( b_copy ); // Free any temporary contiguous matrices, copying the result back to // the original matrix. bli_zfree_contigm( a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_zfree_contigm( b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_zfree_saved_contigmr( uplo_save, m_save, m_save, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); }
void bli_zsyr2k_blas | ( | char | uplo, |
char | trans, | ||
int | m, | ||
int | k, | ||
dcomplex * | alpha, | ||
dcomplex * | a, | ||
int | lda, | ||
dcomplex * | b, | ||
int | ldb, | ||
dcomplex * | beta, | ||
dcomplex * | c, | ||
int | ldc | ||
) |
References bli_is_conjtrans(), bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), cblas_zsyr2k(), CblasColMajor, and F77_zsyr2k().
Referenced by bli_zsyr2k().
{ // BLAS doesn't recognize the conjugate-transposition constant for syr2k, // so we have to map it down to regular transposition. if ( bli_is_conjtrans( trans ) ) trans = BLIS_TRANSPOSE; #ifdef BLIS_ENABLE_CBLAS_INTERFACES enum CBLAS_ORDER cblas_order = CblasColMajor; enum CBLAS_UPLO cblas_uplo; enum CBLAS_TRANSPOSE cblas_trans; bli_param_map_to_netlib_uplo( uplo, &cblas_uplo ); bli_param_map_to_netlib_trans( trans, &cblas_trans ); cblas_zsyr2k( cblas_order, cblas_uplo, cblas_trans, m, k, alpha, a, lda, b, ldb, beta, c, ldc ); #else char blas_uplo; char blas_trans; bli_param_map_to_netlib_uplo( uplo, &blas_uplo ); bli_param_map_to_netlib_trans( trans, &blas_trans ); F77_zsyr2k( &blas_uplo, &blas_trans, &m, &k, alpha, a, &lda, b, &ldb, beta, c, &ldc ); #endif }
void bli_zsyrk | ( | char | uplo, |
char | trans, | ||
int | m, | ||
int | k, | ||
dcomplex * | alpha, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | beta, | ||
dcomplex * | c, | ||
int | c_rs, | ||
int | c_cs | ||
) |
References bli_is_col_storage(), bli_zcreate_contigmr(), bli_zcreate_contigmt(), bli_zero_dim2(), bli_zfree_contigm(), bli_zfree_saved_contigmr(), and bli_zsyrk_blas().
Referenced by FLA_Syrk_external().
{ char uplo_save = uplo; int m_save = m; dcomplex* a_save = a; dcomplex* c_save = c; int a_rs_save = a_rs; int a_cs_save = a_cs; int c_rs_save = c_rs; int c_cs_save = c_cs; int lda, inca; int ldc, incc; // Return early if possible. if ( bli_zero_dim2( m, k ) ) return; // If necessary, allocate, initialize, and use a temporary contiguous // copy of each matrix rather than the original matrices. bli_zcreate_contigmt( trans, m, k, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_zcreate_contigmr( uplo, m, m, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); // Initialize with values assuming column-major storage. lda = a_cs; inca = a_rs; ldc = c_cs; incc = c_rs; // Adjust the parameters based on the storage of each matrix. if ( bli_is_col_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { // requested operation: uplo( C_c ) += A_c * A_c^T // effective operation: uplo( C_c ) += A_c * A_c^T } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { // requested operation: uplo( C_c ) += A_r * A_r^T // effective operation: uplo( C_c ) += A_c^T * A_c bli_swap_ints( lda, inca ); bli_toggle_trans( trans ); } } else // if ( bli_is_row_storage( c_rs, c_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { // requested operation: uplo( C_r ) += A_c * A_c^T // effective operation: ~uplo( C_c ) += A_c * A_c^T bli_swap_ints( ldc, incc ); bli_toggle_uplo( uplo ); } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { // requested operation: uplo( C_r ) += A_r * A_r^T // effective operation: ~uplo( C_c ) += A_c^T * A_c bli_swap_ints( ldc, incc ); bli_swap_ints( lda, inca ); bli_toggle_uplo( uplo ); bli_toggle_trans( trans ); } } bli_zsyrk_blas( uplo, trans, m, k, alpha, a, lda, beta, c, ldc ); // Free any temporary contiguous matrices, copying the result back to // the original matrix. bli_zfree_contigm( a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_zfree_saved_contigmr( uplo_save, m_save, m_save, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); }
void bli_zsyrk_blas | ( | char | uplo, |
char | trans, | ||
int | m, | ||
int | k, | ||
dcomplex * | alpha, | ||
dcomplex * | a, | ||
int | lda, | ||
dcomplex * | beta, | ||
dcomplex * | c, | ||
int | ldc | ||
) |
References bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), cblas_zsyrk(), CblasColMajor, and F77_zsyrk().
Referenced by bli_zsyrk().
{ #ifdef BLIS_ENABLE_CBLAS_INTERFACES enum CBLAS_ORDER cblas_order = CblasColMajor; enum CBLAS_UPLO cblas_uplo; enum CBLAS_TRANSPOSE cblas_trans; bli_param_map_to_netlib_uplo( uplo, &cblas_uplo ); bli_param_map_to_netlib_trans( trans, &cblas_trans ); cblas_zsyrk( cblas_order, cblas_uplo, cblas_trans, m, k, alpha, a, lda, beta, c, ldc ); #else char blas_uplo; char blas_trans; bli_param_map_to_netlib_uplo( uplo, &blas_uplo ); bli_param_map_to_netlib_trans( trans, &blas_trans ); F77_zsyrk( &blas_uplo, &blas_trans, &m, &k, alpha, a, &lda, beta, c, &ldc ); #endif }
void bli_ztrmm | ( | char | side, |
char | uplo, | ||
char | trans, | ||
char | diag, | ||
int | m, | ||
int | n, | ||
dcomplex * | alpha, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bli_is_col_storage(), bli_is_conjnotrans(), bli_is_left(), bli_set_dim_with_side(), bli_zallocm(), bli_zconjmr(), bli_zcopymrt(), bli_zcreate_contigm(), bli_zcreate_contigmr(), bli_zero_dim2(), bli_zfree(), bli_zfree_contigm(), bli_zfree_saved_contigm(), and bli_ztrmm_blas().
Referenced by bli_ztrmmsx(), and FLA_Trmm_external().
{ int m_save = m; int n_save = n; dcomplex* a_save = a; dcomplex* b_save = b; int a_rs_save = a_rs; int a_cs_save = a_cs; int b_rs_save = b_rs; int b_cs_save = b_cs; dcomplex* a_conj; int dim_a; int lda, inca; int ldb, incb; int lda_conj, inca_conj; int a_was_copied; // Return early if possible. if ( bli_zero_dim2( m, n ) ) return; // If necessary, allocate, initialize, and use a temporary contiguous // copy of each matrix rather than the original matrices. bli_set_dim_with_side( side, m, n, &dim_a ); bli_zcreate_contigmr( uplo, dim_a, dim_a, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_zcreate_contigm( m, n, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); // Figure out whether A was copied to contiguous memory. This is used to // prevent redundant copying. a_was_copied = ( a != a_save ); // Initialize with values assuming column-major storage. lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Adjust the parameters based on the storage of each matrix. if ( bli_is_col_storage( b_rs, b_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { // requested operation: B_c := tr( uplo( A_c ) ) * B_c // effective operation: B_c := tr( uplo( A_c ) ) * B_c } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { // requested operation: B_c := tr( uplo( A_r ) ) * B_c // effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c bli_swap_ints( lda, inca ); bli_toggle_uplo( uplo ); bli_toggle_trans( trans ); } } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { // requested operation: B_r := tr( uplo( A_c ) ) * B_r // effective operation: B_c := B_c * tr( uplo( A_c ) )^T bli_swap_ints( ldb, incb ); bli_swap_ints( m, n ); bli_toggle_side( side ); bli_toggle_trans( trans ); } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { // requested operation: B_r := tr( uplo( A_r ) ) * B_r // effective operation: B_c := B_c * tr( ~uplo( A_c ) ) bli_swap_ints( ldb, incb ); bli_swap_ints( lda, inca ); bli_swap_ints( m, n ); bli_toggle_side( side ); bli_toggle_uplo( uplo ); } } // Initialize with values assuming that trans is not conjnotrans. a_conj = a; lda_conj = lda; inca_conj = inca; // We want to handle the conjnotrans case. The easiest way to do so is // by making a conjugated copy of A. if ( bli_is_conjnotrans( trans ) && !a_was_copied ) { int dim_a; if ( bli_is_left( side ) ) dim_a = m; else dim_a = n; a_conj = bli_zallocm( dim_a, dim_a ); lda_conj = dim_a; inca_conj = 1; bli_zcopymrt( uplo, BLIS_CONJ_NO_TRANSPOSE, dim_a, dim_a, a, inca, lda, a_conj, inca_conj, lda_conj ); } else if ( bli_is_conjnotrans( trans ) && a_was_copied ) { int dim_a; if ( bli_is_left( side ) ) dim_a = m; else dim_a = n; bli_zconjmr( uplo, dim_a, dim_a, a_conj, inca_conj, lda_conj ); } bli_ztrmm_blas( side, uplo, trans, diag, m, n, alpha, a_conj, lda_conj, b, ldb ); if ( bli_is_conjnotrans( trans ) && !a_was_copied ) bli_zfree( a_conj ); // Free any temporary contiguous matrices, copying the result back to // the original matrix. bli_zfree_contigm( a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_zfree_saved_contigm( m_save, n_save, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); }
void bli_ztrmm_blas | ( | char | side, |
char | uplo, | ||
char | trans, | ||
char | diag, | ||
int | m, | ||
int | n, | ||
dcomplex * | alpha, | ||
dcomplex * | a, | ||
int | lda, | ||
dcomplex * | b, | ||
int | ldb | ||
) |
References bli_param_map_to_netlib_diag(), bli_param_map_to_netlib_side(), bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), cblas_ztrmm(), CblasColMajor, and F77_ztrmm().
Referenced by bli_ztrmm().
{ #ifdef BLIS_ENABLE_CBLAS_INTERFACES enum CBLAS_ORDER cblas_order = CblasColMajor; enum CBLAS_SIDE cblas_side; enum CBLAS_UPLO cblas_uplo; enum CBLAS_TRANSPOSE cblas_trans; enum CBLAS_DIAG cblas_diag; bli_param_map_to_netlib_side( side, &cblas_side ); bli_param_map_to_netlib_uplo( uplo, &cblas_uplo ); bli_param_map_to_netlib_trans( trans, &cblas_trans ); bli_param_map_to_netlib_diag( diag, &cblas_diag ); cblas_ztrmm( cblas_order, cblas_side, cblas_uplo, cblas_trans, cblas_diag, m, n, alpha, a, lda, b, ldb ); #else char blas_side; char blas_uplo; char blas_trans; char blas_diag; bli_param_map_to_netlib_side( side, &blas_side ); bli_param_map_to_netlib_uplo( uplo, &blas_uplo ); bli_param_map_to_netlib_trans( trans, &blas_trans ); bli_param_map_to_netlib_diag( diag, &blas_diag ); F77_ztrmm( &blas_side, &blas_uplo, &blas_trans, &blas_diag, &m, &n, alpha, a, &lda, b, &ldb ); #endif }
void bli_ztrmmsx | ( | char | side, |
char | uplo, | ||
char | trans, | ||
char | diag, | ||
int | m, | ||
int | n, | ||
dcomplex * | alpha, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | b, | ||
int | b_rs, | ||
int | b_cs, | ||
dcomplex * | beta, | ||
dcomplex * | c, | ||
int | c_rs, | ||
int | c_cs | ||
) |
References bli_is_col_storage(), bli_set_dim_with_side(), bli_z1(), bli_zallocm(), bli_zaxpymt(), bli_zcopymt(), bli_zcreate_contigm(), bli_zcreate_contigmr(), bli_zero_dim2(), bli_zfree(), bli_zfree_contigm(), bli_zfree_saved_contigm(), bli_zscalm(), and bli_ztrmm().
Referenced by FLA_Trmmsx_external().
{ int m_save = m; int n_save = n; dcomplex* a_save = a; dcomplex* b_save = b; dcomplex* c_save = c; int a_rs_save = a_rs; int a_cs_save = a_cs; int b_rs_save = b_rs; int b_cs_save = b_cs; int c_rs_save = c_rs; int c_cs_save = c_cs; dcomplex one = bli_z1(); dcomplex* b_copy; int dim_a; int b_copy_rs, b_copy_cs; // Return early if possible. if ( bli_zero_dim2( m, n ) ) return; // If necessary, allocate, initialize, and use a temporary contiguous // copy of each matrix rather than the original matrices. bli_set_dim_with_side( side, m, n, &dim_a ); bli_zcreate_contigmr( uplo, dim_a, dim_a, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_zcreate_contigm( m, n, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_zcreate_contigm( m, n, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); // Create a copy of B to use in the computation so the original matrix is // left untouched. b_copy = bli_zallocm( m, n ); // Match the strides of B_copy to that of B. if ( bli_is_col_storage( b_rs, b_cs ) ) { b_copy_rs = 1; b_copy_cs = m; } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { b_copy_rs = n; b_copy_cs = 1; } // Copy the contents of B to B_copy. bli_zcopymt( BLIS_NO_CONJUGATE, m, n, b, b_rs, b_cs, b_copy, b_copy_rs, b_copy_cs ); // Perform the operation on B_copy. bli_ztrmm( side, uplo, trans, diag, m, n, alpha, a, a_rs, a_cs, b_copy, b_copy_rs, b_copy_cs ); // Scale C by beta. bli_zscalm( BLIS_NO_CONJUGATE, m, n, beta, c, c_rs, c_cs ); // Add B_copy into C. bli_zaxpymt( BLIS_NO_TRANSPOSE, m, n, &one, b_copy, b_copy_rs, b_copy_cs, c, c_rs, c_cs ); // Free the copy of B. bli_zfree( b_copy ); // Free any temporary contiguous matrices, copying the result back to // the original matrix. bli_zfree_contigm( a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_zfree_contigm( b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_zfree_saved_contigm( m_save, n_save, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); }
void bli_ztrsm | ( | char | side, |
char | uplo, | ||
char | trans, | ||
char | diag, | ||
int | m, | ||
int | n, | ||
dcomplex * | alpha, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | b, | ||
int | b_rs, | ||
int | b_cs | ||
) |
References bli_is_col_storage(), bli_is_conjnotrans(), bli_is_left(), bli_set_dim_with_side(), bli_zallocm(), bli_zconjmr(), bli_zcopymrt(), bli_zcreate_contigm(), bli_zcreate_contigmr(), bli_zero_dim2(), bli_zfree(), bli_zfree_contigm(), bli_zfree_saved_contigm(), and bli_ztrsm_blas().
Referenced by bli_ztrsmsx(), FLA_LU_nopiv_opz_var1(), FLA_LU_nopiv_opz_var2(), FLA_LU_nopiv_opz_var3(), FLA_LU_piv_opz_var3(), and FLA_Trsm_external().
{ int m_save = m; int n_save = n; dcomplex* a_save = a; dcomplex* b_save = b; int a_rs_save = a_rs; int a_cs_save = a_cs; int b_rs_save = b_rs; int b_cs_save = b_cs; dcomplex* a_conj; int dim_a; int lda, inca; int ldb, incb; int lda_conj, inca_conj; int a_was_copied; // Return early if possible. if ( bli_zero_dim2( m, n ) ) return; // If necessary, allocate, initialize, and use a temporary contiguous // copy of each matrix rather than the original matrices. bli_set_dim_with_side( side, m, n, &dim_a ); bli_zcreate_contigmr( uplo, dim_a, dim_a, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_zcreate_contigm( m, n, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); // Figure out whether A was copied to contiguous memory. This is used to // prevent redundant copying. a_was_copied = ( a != a_save ); // Initialize with values assuming column-major storage. lda = a_cs; inca = a_rs; ldb = b_cs; incb = b_rs; // Adjust the parameters based on the storage of each matrix. if ( bli_is_col_storage( b_rs, b_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { // requested operation: B_c := tr( uplo( A_c ) ) * B_c // effective operation: B_c := tr( uplo( A_c ) ) * B_c } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { // requested operation: B_c := tr( uplo( A_r ) ) * B_c // effective operation: B_c := tr( ~uplo( A_c ) )^T * B_c bli_swap_ints( lda, inca ); bli_toggle_uplo( uplo ); bli_toggle_trans( trans ); } } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { if ( bli_is_col_storage( a_rs, a_cs ) ) { // requested operation: B_r := tr( uplo( A_c ) ) * B_r // effective operation: B_c := B_c * tr( uplo( A_c ) )^T bli_swap_ints( ldb, incb ); bli_swap_ints( m, n ); bli_toggle_side( side ); bli_toggle_trans( trans ); } else // if ( bli_is_row_storage( a_rs, a_cs ) ) { // requested operation: B_r := tr( uplo( A_r ) ) * B_r // effective operation: B_c := B_c * tr( ~uplo( A_c ) ) bli_swap_ints( ldb, incb ); bli_swap_ints( lda, inca ); bli_swap_ints( m, n ); bli_toggle_side( side ); bli_toggle_uplo( uplo ); } } // Initialize with values assuming that trans is not conjnotrans. a_conj = a; lda_conj = lda; inca_conj = inca; // We want to handle the conjnotrans case. The easiest way to do so is // by making a conjugated copy of A. if ( bli_is_conjnotrans( trans ) && !a_was_copied ) { int dim_a; if ( bli_is_left( side ) ) dim_a = m; else dim_a = n; a_conj = bli_zallocm( dim_a, dim_a ); lda_conj = dim_a; inca_conj = 1; bli_zcopymrt( uplo, BLIS_CONJ_NO_TRANSPOSE, dim_a, dim_a, a, inca, lda, a_conj, inca_conj, lda_conj ); } else if ( bli_is_conjnotrans( trans ) && a_was_copied ) { int dim_a; if ( bli_is_left( side ) ) dim_a = m; else dim_a = n; bli_zconjmr( uplo, dim_a, dim_a, a_conj, inca_conj, lda_conj ); } bli_ztrsm_blas( side, uplo, trans, diag, m, n, alpha, a_conj, lda_conj, b, ldb ); if ( bli_is_conjnotrans( trans ) && !a_was_copied ) bli_zfree( a_conj ); // Free any temporary contiguous matrices, copying the result back to // the original matrix. bli_zfree_contigm( a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_zfree_saved_contigm( m_save, n_save, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); }
void bli_ztrsm_blas | ( | char | side, |
char | uplo, | ||
char | trans, | ||
char | diag, | ||
int | m, | ||
int | n, | ||
dcomplex * | alpha, | ||
dcomplex * | a, | ||
int | lda, | ||
dcomplex * | b, | ||
int | ldb | ||
) |
References bli_param_map_to_netlib_diag(), bli_param_map_to_netlib_side(), bli_param_map_to_netlib_trans(), bli_param_map_to_netlib_uplo(), cblas_ztrsm(), CblasColMajor, and F77_ztrsm().
Referenced by bli_ztrsm().
{ #ifdef BLIS_ENABLE_CBLAS_INTERFACES enum CBLAS_ORDER cblas_order = CblasColMajor; enum CBLAS_SIDE cblas_side; enum CBLAS_UPLO cblas_uplo; enum CBLAS_TRANSPOSE cblas_trans; enum CBLAS_DIAG cblas_diag; bli_param_map_to_netlib_side( side, &cblas_side ); bli_param_map_to_netlib_uplo( uplo, &cblas_uplo ); bli_param_map_to_netlib_trans( trans, &cblas_trans ); bli_param_map_to_netlib_diag( diag, &cblas_diag ); cblas_ztrsm( cblas_order, cblas_side, cblas_uplo, cblas_trans, cblas_diag, m, n, alpha, a, lda, b, ldb ); #else char blas_side; char blas_uplo; char blas_trans; char blas_diag; bli_param_map_to_netlib_side( side, &blas_side ); bli_param_map_to_netlib_uplo( uplo, &blas_uplo ); bli_param_map_to_netlib_trans( trans, &blas_trans ); bli_param_map_to_netlib_diag( diag, &blas_diag ); F77_ztrsm( &blas_side, &blas_uplo, &blas_trans, &blas_diag, &m, &n, alpha, a, &lda, b, &ldb ); #endif }
void bli_ztrsmsx | ( | char | side, |
char | uplo, | ||
char | trans, | ||
char | diag, | ||
int | m, | ||
int | n, | ||
dcomplex * | alpha, | ||
dcomplex * | a, | ||
int | a_rs, | ||
int | a_cs, | ||
dcomplex * | b, | ||
int | b_rs, | ||
int | b_cs, | ||
dcomplex * | beta, | ||
dcomplex * | c, | ||
int | c_rs, | ||
int | c_cs | ||
) |
References bli_is_col_storage(), bli_set_dim_with_side(), bli_z1(), bli_zallocm(), bli_zaxpymt(), bli_zcopymt(), bli_zcreate_contigm(), bli_zcreate_contigmr(), bli_zero_dim2(), bli_zfree(), bli_zfree_contigm(), bli_zfree_saved_contigm(), bli_zscalm(), and bli_ztrsm().
Referenced by FLA_Trsmsx_external().
{ int m_save = m; int n_save = n; dcomplex* a_save = a; dcomplex* b_save = b; dcomplex* c_save = c; int a_rs_save = a_rs; int a_cs_save = a_cs; int b_rs_save = b_rs; int b_cs_save = b_cs; int c_rs_save = c_rs; int c_cs_save = c_cs; dcomplex one = bli_z1(); dcomplex* b_copy; int dim_a; int b_copy_rs, b_copy_cs; // Return early if possible. if ( bli_zero_dim2( m, n ) ) return; // If necessary, allocate, initialize, and use a temporary contiguous // copy of each matrix rather than the original matrices. bli_set_dim_with_side( side, m, n, &dim_a ); bli_zcreate_contigmr( uplo, dim_a, dim_a, a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_zcreate_contigm( m, n, b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_zcreate_contigm( m, n, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); // Create a copy of B to use in the computation so the original matrix is // left untouched. b_copy = bli_zallocm( m, n ); // Match the strides of B_copy to that of B. if ( bli_is_col_storage( b_rs, b_cs ) ) { b_copy_rs = 1; b_copy_cs = m; } else // if ( bli_is_row_storage( b_rs, b_cs ) ) { b_copy_rs = n; b_copy_cs = 1; } // Copy the contents of B to B_copy. bli_zcopymt( BLIS_NO_CONJUGATE, m, n, b, b_rs, b_cs, b_copy, b_copy_rs, b_copy_cs ); // Perform the operation on B_copy. bli_ztrsm( side, uplo, trans, diag, m, n, alpha, a, a_rs, a_cs, b_copy, b_copy_rs, b_copy_cs ); // Scale C by beta. bli_zscalm( BLIS_NO_CONJUGATE, m, n, beta, c, c_rs, c_cs ); // Add B_copy into C. bli_zaxpymt( BLIS_NO_TRANSPOSE, m, n, &one, b_copy, b_copy_rs, b_copy_cs, c, c_rs, c_cs ); // Free the copy of B. bli_zfree( b_copy ); // Free any temporary contiguous matrices, copying the result back to // the original matrix. bli_zfree_contigm( a_save, a_rs_save, a_cs_save, &a, &a_rs, &a_cs ); bli_zfree_contigm( b_save, b_rs_save, b_cs_save, &b, &b_rs, &b_cs ); bli_zfree_saved_contigm( m_save, n_save, c_save, c_rs_save, c_cs_save, &c, &c_rs, &c_cs ); }