libflame  revision_anchor
Functions
blis_prototypes_level1.h File Reference

(r)

Go to the source code of this file.

Functions

void bl1_samax (int n, float *x, int incx, int *index)
 
void bl1_damax (int n, double *x, int incx, int *index)
 
void bl1_camax (int n, scomplex *x, int incx, int *index)
 
void bl1_zamax (int n, dcomplex *x, int incx, int *index)
 
void bl1_sasum (int n, float *x, int incx, float *norm)
 
void bl1_dasum (int n, double *x, int incx, double *norm)
 
void bl1_casum (int n, scomplex *x, int incx, float *norm)
 
void bl1_zasum (int n, dcomplex *x, int incx, double *norm)
 
void bl1_saxpy (int n, float *alpha, float *x, int incx, float *y, int incy)
 
void bl1_daxpy (int n, double *alpha, double *x, int incx, double *y, int incy)
 
void bl1_caxpy (int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
 
void bl1_zaxpy (int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
 
void bl1_saxpyv (conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
 
void bl1_daxpyv (conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
 
void bl1_caxpyv (conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
 
void bl1_zaxpyv (conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
 
void bl1_saxpymt (trans1_t trans, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_daxpymt (trans1_t trans, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_caxpymt (trans1_t trans, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_zaxpymt (trans1_t trans, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 
void bl1_saxpymrt (uplo1_t uplo, trans1_t trans, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_daxpymrt (uplo1_t uplo, trans1_t trans, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_caxpymrt (uplo1_t uplo, trans1_t trans, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_zaxpymrt (uplo1_t uplo, trans1_t trans, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 
void bl1_saxpysv (int n, float *alpha0, float *alpha1, float *x, int incx, float *beta, float *y, int incy)
 
void bl1_daxpysv (int n, double *alpha0, double *alpha1, double *x, int incx, double *beta, double *y, int incy)
 
void bl1_caxpysv (int n, scomplex *alpha0, scomplex *alpha1, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
 
void bl1_zaxpysv (int n, dcomplex *alpha0, dcomplex *alpha1, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
 
void bl1_saxpysmt (trans1_t trans, int m, int n, float *alpha0, float *alpha1, float *a, int a_rs, int a_cs, float *beta, float *b, int b_rs, int b_cs)
 
void bl1_daxpysmt (trans1_t trans, int m, int n, double *alpha0, double *alpha1, double *a, int a_rs, int a_cs, double *beta, double *b, int b_rs, int b_cs)
 
void bl1_caxpysmt (trans1_t trans, int m, int n, scomplex *alpha0, scomplex *alpha1, scomplex *a, int a_rs, int a_cs, scomplex *beta, scomplex *b, int b_rs, int b_cs)
 
void bl1_zaxpysmt (trans1_t trans, int m, int n, dcomplex *alpha0, dcomplex *alpha1, dcomplex *a, int a_rs, int a_cs, dcomplex *beta, dcomplex *b, int b_rs, int b_cs)
 
void bl1_sconjv (int m, float *x, int incx)
 
void bl1_dconjv (int m, double *x, int incx)
 
void bl1_cconjv (int m, scomplex *x, int incx)
 
void bl1_zconjv (int m, dcomplex *x, int incx)
 
void bl1_sconjm (int m, int n, float *a, int a_rs, int a_cs)
 
void bl1_dconjm (int m, int n, double *a, int a_rs, int a_cs)
 
void bl1_cconjm (int m, int n, scomplex *a, int a_rs, int a_cs)
 
void bl1_zconjm (int m, int n, dcomplex *a, int a_rs, int a_cs)
 
void bl1_sconjmr (uplo1_t uplo, int m, int n, float *a, int a_rs, int a_cs)
 
void bl1_dconjmr (uplo1_t uplo, int m, int n, double *a, int a_rs, int a_cs)
 
void bl1_cconjmr (uplo1_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs)
 
void bl1_zconjmr (uplo1_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs)
 
void bl1_scopy (int m, float *x, int incx, float *y, int incy)
 
void bl1_dcopy (int m, double *x, int incx, double *y, int incy)
 
void bl1_ccopy (int m, scomplex *x, int incx, scomplex *y, int incy)
 
void bl1_zcopy (int m, dcomplex *x, int incx, dcomplex *y, int incy)
 
void bl1_icopyv (conj1_t conj, int m, int *x, int incx, int *y, int incy)
 
void bl1_scopyv (conj1_t conj, int m, float *x, int incx, float *y, int incy)
 
void bl1_dcopyv (conj1_t conj, int m, double *x, int incx, double *y, int incy)
 
void bl1_ccopyv (conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
 
void bl1_zcopyv (conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
 
void bl1_sdcopyv (conj1_t conj, int m, float *x, int incx, double *y, int incy)
 
void bl1_dscopyv (conj1_t conj, int m, double *x, int incx, float *y, int incy)
 
void bl1_sccopyv (conj1_t conj, int m, float *x, int incx, scomplex *y, int incy)
 
void bl1_cscopyv (conj1_t conj, int m, scomplex *x, int incx, float *y, int incy)
 
void bl1_szcopyv (conj1_t conj, int m, float *x, int incx, dcomplex *y, int incy)
 
void bl1_zscopyv (conj1_t conj, int m, dcomplex *x, int incx, float *y, int incy)
 
void bl1_dccopyv (conj1_t conj, int m, double *x, int incx, scomplex *y, int incy)
 
void bl1_cdcopyv (conj1_t conj, int m, scomplex *x, int incx, double *y, int incy)
 
void bl1_dzcopyv (conj1_t conj, int m, double *x, int incx, dcomplex *y, int incy)
 
void bl1_zdcopyv (conj1_t conj, int m, dcomplex *x, int incx, double *y, int incy)
 
void bl1_czcopyv (conj1_t conj, int m, scomplex *x, int incx, dcomplex *y, int incy)
 
void bl1_zccopyv (conj1_t conj, int m, dcomplex *x, int incx, scomplex *y, int incy)
 
void bl1_scopymr (uplo1_t uplo, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_dcopymr (uplo1_t uplo, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_ccopymr (uplo1_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_zcopymr (uplo1_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 
void bl1_sscopymr (uplo1_t uplo, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_sdcopymr (uplo1_t uplo, int m, int n, float *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_dscopymr (uplo1_t uplo, int m, int n, double *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_sccopymr (uplo1_t uplo, int m, int n, float *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_cscopymr (uplo1_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_szcopymr (uplo1_t uplo, int m, int n, float *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 
void bl1_zscopymr (uplo1_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_ddcopymr (uplo1_t uplo, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_dccopymr (uplo1_t uplo, int m, int n, double *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_cdcopymr (uplo1_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_dzcopymr (uplo1_t uplo, int m, int n, double *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 
void bl1_zdcopymr (uplo1_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_cccopymr (uplo1_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_czcopymr (uplo1_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 
void bl1_zccopymr (uplo1_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_zzcopymr (uplo1_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 
void bl1_scopymrt (uplo1_t uplo, trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_dcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_ccopymrt (uplo1_t uplo, trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_zcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 
void bl1_sscopymrt (uplo1_t uplo, trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_sdcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_sccopymrt (uplo1_t uplo, trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_szcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 
void bl1_dscopymrt (uplo1_t uplo, trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_ddcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_dccopymrt (uplo1_t uplo, trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_dzcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 
void bl1_cscopymrt (uplo1_t uplo, trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_cdcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_cccopymrt (uplo1_t uplo, trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_czcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 
void bl1_zscopymrt (uplo1_t uplo, trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_zdcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_zccopymrt (uplo1_t uplo, trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_zzcopymrt (uplo1_t uplo, trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 
void bl1_icopymt (trans1_t trans, int m, int n, int *a, int a_rs, int a_cs, int *b, int b_rs, int b_cs)
 
void bl1_scopymt (trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_dcopymt (trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_ccopymt (trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_zcopymt (trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 
void bl1_sscopymt (trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_sdcopymt (trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_dscopymt (trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_sccopymt (trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_cscopymt (trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_szcopymt (trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 
void bl1_zscopymt (trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_ddcopymt (trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_dccopymt (trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_cdcopymt (trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_dzcopymt (trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 
void bl1_zdcopymt (trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_cccopymt (trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_czcopymt (trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 
void bl1_zccopymt (trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_zzcopymt (trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 
void bl1_cdot_in (conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
 
void bl1_zdot_in (conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
 
void bl1_sdot (conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
 
void bl1_ddot (conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
 
void bl1_cdot (conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
 
void bl1_zdot (conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
 
void bl1_sdots (conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy, float *beta, float *rho)
 
void bl1_ddots (conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy, double *beta, double *rho)
 
void bl1_cdots (conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *beta, scomplex *rho)
 
void bl1_zdots (conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *beta, dcomplex *rho)
 
void bl1_sdot2s (conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy, float *beta, float *rho)
 
void bl1_ddot2s (conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy, double *beta, double *rho)
 
void bl1_cdot2s (conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *beta, scomplex *rho)
 
void bl1_zdot2s (conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *beta, dcomplex *rho)
 
void bl1_sfnorm (int m, int n, float *a, int a_rs, int a_cs, float *norm)
 
void bl1_dfnorm (int m, int n, double *a, int a_rs, int a_cs, double *norm)
 
void bl1_cfnorm (int m, int n, scomplex *a, int a_rs, int a_cs, float *norm)
 
void bl1_zfnorm (int m, int n, dcomplex *a, int a_rs, int a_cs, double *norm)
 
void bl1_sinvscalv (conj1_t conj, int n, float *alpha, float *x, int incx)
 
void bl1_dinvscalv (conj1_t conj, int n, double *alpha, double *x, int incx)
 
void bl1_csinvscalv (conj1_t conj, int n, float *alpha, scomplex *x, int incx)
 
void bl1_cinvscalv (conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
 
void bl1_zdinvscalv (conj1_t conj, int n, double *alpha, dcomplex *x, int incx)
 
void bl1_zinvscalv (conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
 
void bl1_sinvscalm (conj1_t conj, int m, int n, float *alpha, float *a, int a_rs, int a_cs)
 
void bl1_dinvscalm (conj1_t conj, int m, int n, double *alpha, double *a, int a_rs, int a_cs)
 
void bl1_csinvscalm (conj1_t conj, int m, int n, float *alpha, scomplex *a, int a_rs, int a_cs)
 
void bl1_cinvscalm (conj1_t conj, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs)
 
void bl1_zdinvscalm (conj1_t conj, int m, int n, double *alpha, dcomplex *a, int a_rs, int a_cs)
 
void bl1_zinvscalm (conj1_t conj, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs)
 
void bl1_snrm2 (int n, float *x, int incx, float *norm)
 
void bl1_dnrm2 (int n, double *x, int incx, double *norm)
 
void bl1_cnrm2 (int n, scomplex *x, int incx, float *norm)
 
void bl1_znrm2 (int n, dcomplex *x, int incx, double *norm)
 
void bl1_sscal (int n, float *alpha, float *x, int incx)
 
void bl1_dscal (int n, double *alpha, double *x, int incx)
 
void bl1_csscal (int n, float *alpha, scomplex *x, int incx)
 
void bl1_cscal (int n, scomplex *alpha, scomplex *x, int incx)
 
void bl1_zdscal (int n, double *alpha, dcomplex *x, int incx)
 
void bl1_zscal (int n, dcomplex *alpha, dcomplex *x, int incx)
 
void bl1_sscalv (conj1_t conj, int n, float *alpha, float *x, int incx)
 
void bl1_dscalv (conj1_t conj, int n, double *alpha, double *x, int incx)
 
void bl1_csscalv (conj1_t conj, int n, float *alpha, scomplex *x, int incx)
 
void bl1_cscalv (conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
 
void bl1_zdscalv (conj1_t conj, int n, double *alpha, dcomplex *x, int incx)
 
void bl1_zscalv (conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
 
void bl1_sscalm (conj1_t conj, int m, int n, float *alpha, float *a, int a_rs, int a_cs)
 
void bl1_dscalm (conj1_t conj, int m, int n, double *alpha, double *a, int a_rs, int a_cs)
 
void bl1_csscalm (conj1_t conj, int m, int n, float *alpha, scomplex *a, int a_rs, int a_cs)
 
void bl1_cscalm (conj1_t conj, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs)
 
void bl1_zdscalm (conj1_t conj, int m, int n, double *alpha, dcomplex *a, int a_rs, int a_cs)
 
void bl1_zscalm (conj1_t conj, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs)
 
void bl1_sscalmr (uplo1_t uplo, int m, int n, float *alpha, float *a, int a_rs, int a_cs)
 
void bl1_dscalmr (uplo1_t uplo, int m, int n, double *alpha, double *a, int a_rs, int a_cs)
 
void bl1_csscalmr (uplo1_t uplo, int m, int n, float *alpha, scomplex *a, int a_rs, int a_cs)
 
void bl1_cscalmr (uplo1_t uplo, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs)
 
void bl1_zdscalmr (uplo1_t uplo, int m, int n, double *alpha, dcomplex *a, int a_rs, int a_cs)
 
void bl1_zscalmr (uplo1_t uplo, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs)
 
void bl1_sswap (int n, float *x, int incx, float *y, int incy)
 
void bl1_dswap (int n, double *x, int incx, double *y, int incy)
 
void bl1_cswap (int n, scomplex *x, int incx, scomplex *y, int incy)
 
void bl1_zswap (int n, dcomplex *x, int incx, dcomplex *y, int incy)
 
void bl1_sswapv (int n, float *x, int incx, float *y, int incy)
 
void bl1_dswapv (int n, double *x, int incx, double *y, int incy)
 
void bl1_cswapv (int n, scomplex *x, int incx, scomplex *y, int incy)
 
void bl1_zswapv (int n, dcomplex *x, int incx, dcomplex *y, int incy)
 
void bl1_sswapmt (trans1_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_dswapmt (trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_cswapmt (trans1_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_zswapmt (trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 

Function Documentation

◆ bl1_camax()

void bl1_camax ( int  n,
scomplex x,
int  incx,
int *  index 
)
36 {
37 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
38  *index = cblas_icamax( n,
39  x, incx );
40 #else
41  *index = F77_icamax( &n,
42  x, &incx ) - 1;
43 #endif
44 }
int F77_icamax(int *n, scomplex *x, int *incx)
CBLAS_INDEX cblas_icamax(const int N, const void *X, const int incX)

References cblas_icamax(), and F77_icamax().

Referenced by FLA_Amax_external(), FLA_LU_piv_opc_var3(), FLA_LU_piv_opc_var4(), FLA_LU_piv_opc_var5(), and FLA_SA_LU_unb().

◆ bl1_casum()

void bl1_casum ( int  n,
scomplex x,
int  incx,
float *  norm 
)
36 {
37 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
38  *norm = cblas_scasum( n,
39  x, incx );
40 #else
41  *norm = F77_scasum( &n,
42  x, &incx );
43 #endif
44 }
float F77_scasum(int *n, scomplex *x, int *incx)
float cblas_scasum(const int N, const void *X, const int incX)

References cblas_scasum(), and F77_scasum().

Referenced by FLA_Asum_external().

◆ bl1_caxpy()

void bl1_caxpy ( int  n,
scomplex alpha,
scomplex x,
int  incx,
scomplex y,
int  incy 
)
44 {
45 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
46  cblas_caxpy( n,
47  alpha,
48  x, incx,
49  y, incy );
50 #else
51  F77_caxpy( &n,
52  alpha,
53  x, &incx,
54  y, &incy );
55 #endif
56 }
void F77_caxpy(int *n, scomplex *alpha, scomplex *x, int *incx, scomplex *y, int *incy)
void cblas_caxpy(const int N, const void *alpha, const void *X, const int incX, void *Y, const int incY)

References cblas_caxpy(), and F77_caxpy().

Referenced by bl1_caxpymt(), bl1_caxpysmt(), bl1_caxpysv(), and bl1_caxpyv().

◆ bl1_caxpymrt()

void bl1_caxpymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
228 {
229  scomplex* a_begin;
230  scomplex* b_begin;
231  int lda, inca;
232  int ldb, incb;
233  int n_iter;
234  int n_elem;
235  int n_elem_max;
236  int n_elem_is_descending;
237  int j;
238  conj1_t conj;
239 
240  // Return early if possible.
241  if ( bl1_zero_dim2( m, n ) ) return;
242 
243  // Initialize variables based on storage format of B and value of uplo.
244  if ( bl1_is_col_storage( b_rs, b_cs ) )
245  {
246  if ( bl1_is_lower( uplo ) )
247  {
248  n_iter = bl1_min( m, n );
249  n_elem_max = m;
250  lda = a_cs;
251  inca = a_rs;
252  ldb = b_cs;
253  incb = b_rs;
254  n_elem_is_descending = TRUE;
255  }
256  else // if ( bl1_is_upper( uplo ) )
257  {
258  n_iter = n;
259  n_elem_max = bl1_min( m, n );
260  lda = a_cs;
261  inca = a_rs;
262  ldb = b_cs;
263  incb = b_rs;
264  n_elem_is_descending = FALSE;
265  }
266  }
267  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
268  {
269  if ( bl1_is_lower( uplo ) )
270  {
271  n_iter = m;
272  n_elem_max = bl1_min( m, n );
273  lda = a_rs;
274  inca = a_cs;
275  ldb = b_rs;
276  incb = b_cs;
277  n_elem_is_descending = FALSE;
278  }
279  else // if ( bl1_is_upper( uplo ) )
280  {
281  n_iter = bl1_min( m, n );
282  n_elem_max = n;
283  lda = a_rs;
284  inca = a_cs;
285  ldb = b_rs;
286  incb = b_cs;
287  n_elem_is_descending = TRUE;
288  }
289  }
290 
291  // Swap lda and inca if we're doing a transpose.
292  if ( bl1_does_trans( trans ) )
293  {
294  bl1_swap_ints( lda, inca );
295  }
296 
297  // Extract conj component from trans parameter.
298  conj = bl1_proj_trans1_to_conj( trans );
299 
300  // Choose the loop based on whether n_elem will be shrinking or growing
301  // with each iteration.
302  if ( n_elem_is_descending )
303  {
304  for ( j = 0; j < n_iter; j++ )
305  {
306  n_elem = n_elem_max - j;
307  a_begin = a + j*lda + j*inca;
308  b_begin = b + j*ldb + j*incb;
309 
310  bl1_caxpyv( conj,
311  n_elem,
312  alpha,
313  a_begin, inca,
314  b_begin, incb );
315  }
316  }
317  else // if ( n_elem_is_ascending )
318  {
319  for ( j = 0; j < n_iter; j++ )
320  {
321  n_elem = bl1_min( j + 1, n_elem_max );
322  a_begin = a + j*lda;
323  b_begin = b + j*ldb;
324 
325  bl1_caxpyv( conj,
326  n_elem,
327  alpha,
328  a_begin, inca,
329  b_begin, incb );
330  }
331  }
332 }
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpyv.c:29
int bl1_is_lower(uplo1_t uplo)
Definition: bl1_is.c:49
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
conj1_t bl1_proj_trans1_to_conj(trans1_t trans)
Definition: bl1_proj.c:13
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
int bl1_does_trans(trans1_t trans)
Definition: bl1_does.c:13
conj1_t
Definition: blis_type_defs.h:80
Definition: blis_type_defs.h:133

References bl1_caxpyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().

Referenced by bl1_cher2k(), bl1_cherk(), and FLA_Axpyrt_external().

◆ bl1_caxpymt()

void bl1_caxpymt ( trans1_t  trans,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
150 {
151  scomplex* a_begin;
152  scomplex* b_begin;
153  scomplex* a_temp;
154  int inca_temp;
155  int lda, inca;
156  int ldb, incb;
157  int n_iter;
158  int n_elem;
159  int j;
160 
161  // Return early if possible.
162  if ( bl1_zero_dim2( m, n ) ) return;
163 
164  // Handle cases where A and B are vectors to ensure that the underlying axpy
165  // gets invoked only once.
166  if ( bl1_is_vector( m, n ) )
167  {
168  // Initialize with values appropriate for vectors.
169  n_iter = 1;
170  n_elem = bl1_vector_dim( m, n );
171  lda = 1; // multiplied by zero when n_iter == 1; not needed.
172  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
173  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
174  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
175  }
176  else // matrix case
177  {
178  // Initialize with optimal values for column-major storage.
179  n_iter = n;
180  n_elem = m;
181  lda = a_cs;
182  inca = a_rs;
183  ldb = b_cs;
184  incb = b_rs;
185 
186  // Handle the transposition of A.
187  if ( bl1_does_trans( trans ) )
188  {
189  bl1_swap_ints( lda, inca );
190  }
191 
192  // An optimization: if B is row-major and if A is effectively row-major
193  // after a possible transposition, then let's access the matrices by rows
194  // instead of by columns for increased spatial locality.
195  if ( bl1_is_row_storage( b_rs, b_cs ) )
196  {
197  if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
198  ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
199  {
200  bl1_swap_ints( n_iter, n_elem );
201  bl1_swap_ints( lda, inca );
202  bl1_swap_ints( ldb, incb );
203  }
204  }
205  }
206 
207  if ( bl1_does_conj( trans ) )
208  {
209  conj1_t conj = bl1_proj_trans1_to_conj( trans );
210 
211  a_temp = bl1_callocv( n_elem );
212  inca_temp = 1;
213 
214  for ( j = 0; j < n_iter; j++ )
215  {
216  a_begin = a + j*lda;
217  b_begin = b + j*ldb;
218 
219  bl1_ccopyv( conj,
220  n_elem,
221  a_begin, inca,
222  a_temp, inca_temp );
223 
224  bl1_caxpy( n_elem,
225  alpha,
226  a_temp, inca_temp,
227  b_begin, incb );
228  }
229 
230  bl1_cfree( a_temp );
231  }
232  else // if ( !bl1_does_conj( trans ) )
233  {
234  for ( j = 0; j < n_iter; j++ )
235  {
236  a_begin = a + j*lda;
237  b_begin = b + j*ldb;
238 
239  bl1_caxpy( n_elem,
240  alpha,
241  a_begin, inca,
242  b_begin, incb );
243  }
244 
245  }
246 }
void bl1_caxpy(int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpy.c:43
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_copyv.c:49
int bl1_does_notrans(trans1_t trans)
Definition: bl1_does.c:19
int bl1_does_conj(trans1_t trans)
Definition: bl1_does.c:25
int bl1_is_row_storage(int rs, int cs)
Definition: bl1_is.c:95
int bl1_is_vector(int m, int n)
Definition: bl1_is.c:106
int bl1_vector_dim(int m, int n)
Definition: bl1_vector.c:13
int bl1_vector_inc(trans1_t trans, int m, int n, int rs, int cs)
Definition: bl1_vector.c:19
void bl1_cfree(scomplex *p)
Definition: bl1_free.c:40
scomplex * bl1_callocv(unsigned int n_elem)
Definition: bl1_allocv.c:40
@ BLIS1_NO_TRANSPOSE
Definition: blis_type_defs.h:54

References bl1_callocv(), bl1_caxpy(), bl1_ccopyv(), bl1_cfree(), bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_cgemm(), bl1_chemm(), bl1_csymm(), bl1_ctrmmsx(), bl1_ctrsmsx(), FLA_Axpy_external(), and FLA_Axpyt_external().

◆ bl1_caxpysmt()

void bl1_caxpysmt ( trans1_t  trans,
int  m,
int  n,
scomplex alpha0,
scomplex alpha1,
scomplex a,
int  a_rs,
int  a_cs,
scomplex beta,
scomplex b,
int  b_rs,
int  b_cs 
)
164 {
165  scomplex* a_begin;
166  scomplex* b_begin;
167  scomplex* a_temp;
168  scomplex alpha_prod;
169  int inca_temp;
170  int lda, inca;
171  int ldb, incb;
172  int n_iter;
173  int n_elem;
174  int j;
175 
176  // Return early if possible.
177  if ( bl1_zero_dim2( m, n ) ) return;
178 
179  alpha_prod.real = alpha0->real * alpha1->real - alpha0->imag * alpha1->imag;
180  alpha_prod.imag = alpha0->real * alpha1->imag + alpha0->imag * alpha1->real;
181 
182  // Handle cases where A and B are vectors to ensure that the underlying axpy
183  // gets invoked only once.
184  if ( bl1_is_vector( m, n ) )
185  {
186  // Initialize with values appropriate for vectors.
187  n_iter = 1;
188  n_elem = bl1_vector_dim( m, n );
189  lda = 1; // multiplied by zero when n_iter == 1; not needed.
190  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
191  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
192  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
193  }
194  else // matrix case
195  {
196  // Initialize with optimal values for column-major storage.
197  n_iter = n;
198  n_elem = m;
199  lda = a_cs;
200  inca = a_rs;
201  ldb = b_cs;
202  incb = b_rs;
203 
204  // Handle the transposition of A.
205  if ( bl1_does_trans( trans ) )
206  {
207  bl1_swap_ints( lda, inca );
208  }
209 
210  // An optimization: if B is row-major and if A is effectively row-major
211  // after a possible transposition, then let's access the matrices by rows
212  // instead of by columns for increased spatial locality.
213  if ( bl1_is_row_storage( b_rs, b_cs ) )
214  {
215  if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
216  ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
217  {
218  bl1_swap_ints( n_iter, n_elem );
219  bl1_swap_ints( lda, inca );
220  bl1_swap_ints( ldb, incb );
221  }
222  }
223  }
224 
225  if ( bl1_does_conj( trans ) )
226  {
227  conj1_t conj = bl1_proj_trans1_to_conj( trans );
228 
229  a_temp = bl1_callocv( n_elem );
230  inca_temp = 1;
231 
232  for ( j = 0; j < n_iter; j++ )
233  {
234  a_begin = a + j*lda;
235  b_begin = b + j*ldb;
236 
237  bl1_ccopyv( conj,
238  n_elem,
239  a_begin, inca,
240  a_temp, inca_temp );
241 
242  bl1_cscal( n_elem,
243  beta,
244  b_begin, incb );
245 
246  bl1_caxpy( n_elem,
247  &alpha_prod,
248  a_temp, inca_temp,
249  b_begin, incb );
250  }
251 
252  bl1_cfree( a_temp );
253  }
254  else // if ( !bl1_does_conj( trans ) )
255  {
256  for ( j = 0; j < n_iter; j++ )
257  {
258  a_begin = a + j*lda;
259  b_begin = b + j*ldb;
260 
261  bl1_cscal( n_elem,
262  beta,
263  b_begin, incb );
264 
265  bl1_caxpy( n_elem,
266  &alpha_prod,
267  a_begin, inca,
268  b_begin, incb );
269  }
270  }
271 }
double *restrict alpha1
Definition: bl1_axpyv2bdotaxpy.c:198
void bl1_cscal(int n, scomplex *alpha, scomplex *x, int incx)
Definition: bl1_scal.c:52
float imag
Definition: blis_type_defs.h:134
float real
Definition: blis_type_defs.h:134

References alpha1, bl1_callocv(), bl1_caxpy(), bl1_ccopyv(), bl1_cfree(), bl1_cscal(), bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), BLIS1_NO_TRANSPOSE, scomplex::imag, and scomplex::real.

Referenced by FLA_Axpys_external().

◆ bl1_caxpysv()

void bl1_caxpysv ( int  n,
scomplex alpha0,
scomplex alpha1,
scomplex x,
int  incx,
scomplex beta,
scomplex y,
int  incy 
)
52 {
53  scomplex alpha_prod;
54 
55  // Return early if possible.
56  if ( bl1_zero_dim1( n ) ) return;
57 
58  alpha_prod.real = alpha0->real * alpha1->real - alpha0->imag * alpha1->imag;
59  alpha_prod.imag = alpha0->real * alpha1->imag + alpha0->imag * alpha1->real;
60 
61  bl1_cscal( n,
62  beta,
63  y, incy );
64 
65  bl1_caxpy( n,
66  &alpha_prod,
67  x, incx,
68  y, incy );
69 }
int bl1_zero_dim1(int m)
Definition: bl1_is.c:113

References alpha1, bl1_caxpy(), bl1_cscal(), bl1_zero_dim1(), scomplex::imag, and scomplex::real.

Referenced by FLA_Lyap_h_opc_var2(), FLA_Lyap_h_opc_var3(), FLA_Lyap_h_opc_var4(), FLA_Lyap_n_opc_var2(), FLA_Lyap_n_opc_var3(), and FLA_Lyap_n_opc_var4().

◆ bl1_caxpyv()

void bl1_caxpyv ( conj1_t  conj,
int  n,
scomplex alpha,
scomplex x,
int  incx,
scomplex y,
int  incy 
)
30 {
31  scomplex* x_copy;
32  int incx_copy;
33 
34  // Return early if possible.
35  if ( bl1_zero_dim1( n ) ) return;
36 
37  x_copy = x;
38  incx_copy = incx;
39 
40  if ( bl1_is_conj( conj ) )
41  {
42  x_copy = bl1_callocv( n );
43  incx_copy = 1;
44 
45  bl1_ccopyv( conj,
46  n,
47  x, incx,
48  x_copy, incx_copy );
49  }
50 
51  bl1_caxpy( n,
52  alpha,
53  x_copy, incx_copy,
54  y, incy );
55 
56  if ( bl1_is_conj( conj ) )
57  bl1_cfree( x_copy );
58 }
int bl1_is_conj(conj1_t conj)
Definition: bl1_is.c:42

References bl1_callocv(), bl1_caxpy(), bl1_ccopyv(), bl1_cfree(), bl1_is_conj(), and bl1_zero_dim1().

Referenced by bl1_caxpymrt(), bl1_cgemv(), bl1_chemv(), bl1_ctrmvsx(), bl1_ctrsvsx(), FLA_Apply_H2_UT_l_opc_var1(), FLA_Apply_H2_UT_r_opc_var1(), FLA_Apply_HUD_UT_l_opc_var1(), FLA_Bidiag_UT_u_step_ofc_var2(), FLA_Bidiag_UT_u_step_ofc_var3(), FLA_Bidiag_UT_u_step_ofc_var4(), FLA_Bidiag_UT_u_step_opc_var2(), FLA_Bidiag_UT_u_step_opc_var3(), FLA_Bidiag_UT_u_step_opc_var4(), FLA_Bidiag_UT_u_step_opc_var5(), FLA_Eig_gest_il_opc_var1(), FLA_Eig_gest_il_opc_var2(), FLA_Eig_gest_il_opc_var3(), FLA_Eig_gest_il_opc_var4(), FLA_Eig_gest_il_opc_var5(), FLA_Eig_gest_iu_opc_var1(), FLA_Eig_gest_iu_opc_var2(), FLA_Eig_gest_iu_opc_var3(), FLA_Eig_gest_iu_opc_var4(), FLA_Eig_gest_iu_opc_var5(), FLA_Eig_gest_nl_opc_var1(), FLA_Eig_gest_nl_opc_var2(), FLA_Eig_gest_nl_opc_var4(), FLA_Eig_gest_nl_opc_var5(), FLA_Eig_gest_nu_opc_var1(), FLA_Eig_gest_nu_opc_var2(), FLA_Eig_gest_nu_opc_var4(), FLA_Eig_gest_nu_opc_var5(), FLA_Fused_Ahx_Ax_opc_var1(), FLA_Fused_Ahx_Axpy_Ax_opc_var1(), FLA_Fused_Gerc2_Ahx_Ax_opc_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1(), FLA_Fused_Gerc2_opc_var1(), FLA_Fused_Her2_Ax_l_opc_var1(), FLA_Fused_Uhu_Yhu_Zhu_opc_var1(), FLA_Fused_UYx_ZVx_opc_var1(), FLA_Fused_UZhu_ZUhu_opc_var1(), FLA_Hess_UT_step_ofc_var2(), FLA_Hess_UT_step_ofc_var3(), FLA_Hess_UT_step_ofc_var4(), FLA_Hess_UT_step_opc_var2(), FLA_Hess_UT_step_opc_var3(), FLA_Hess_UT_step_opc_var4(), FLA_Hess_UT_step_opc_var5(), FLA_Tridiag_UT_l_step_ofc_var2(), FLA_Tridiag_UT_l_step_ofc_var3(), FLA_Tridiag_UT_l_step_opc_var1(), FLA_Tridiag_UT_l_step_opc_var2(), and FLA_Tridiag_UT_l_step_opc_var3().

◆ bl1_cccopymr()

void bl1_cccopymr ( uplo1_t  uplo,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
1051 {
1052  scomplex* a_begin;
1053  scomplex* b_begin;
1054  int lda, inca;
1055  int ldb, incb;
1056  int n_iter;
1057  int n_elem_max;
1058  int n_elem;
1059  int j;
1060 
1061  // Return early if possible.
1062  if ( bl1_zero_dim2( m, n ) ) return;
1063 
1064  // We initialize for column-major.
1065  n_iter = n;
1066  n_elem_max = m;
1067  lda = a_cs;
1068  inca = a_rs;
1069  ldb = b_cs;
1070  incb = b_rs;
1071 
1072  // An optimization: if B is row-major, then let's access the matrix
1073  // by rows instead of by columns for increased spatial locality.
1074  if ( bl1_is_row_storage( b_rs, b_cs ) )
1075  {
1076  bl1_swap_ints( n_iter, n_elem_max );
1077  bl1_swap_ints( lda, inca );
1078  bl1_swap_ints( ldb, incb );
1079  bl1_toggle_uplo( uplo );
1080  }
1081 
1082 
1083  if ( bl1_is_upper( uplo ) )
1084  {
1085  for ( j = 0; j < n_iter; j++ )
1086  {
1087  n_elem = bl1_min( j + 1, n_elem_max );
1088  a_begin = a + j*lda;
1089  b_begin = b + j*ldb;
1090 
1092  n_elem,
1093  a_begin, inca,
1094  b_begin, incb );
1095  }
1096  }
1097  else // if ( bl1_is_lower( uplo ) )
1098  {
1099  for ( j = 0; j < n_iter; j++ )
1100  {
1101  n_elem = bl1_max( 0, n_elem_max - j );
1102  a_begin = a + j*lda + j*inca;
1103  b_begin = b + j*ldb + j*incb;
1104 
1105  if ( n_elem <= 0 ) break;
1106 
1108  n_elem,
1109  a_begin, inca,
1110  b_begin, incb );
1111  }
1112  }
1113 }
int bl1_is_upper(uplo1_t uplo)
Definition: bl1_is.c:54
@ BLIS1_NO_CONJUGATE
Definition: blis_type_defs.h:81

References bl1_ccopyv(), bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

◆ bl1_cccopymrt()

void bl1_cccopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
1497 {
1498  scomplex* a_begin;
1499  scomplex* b_begin;
1500  int lda, inca;
1501  int ldb, incb;
1502  int n_iter;
1503  int n_elem;
1504  int n_elem_max;
1505  int n_elem_is_descending;
1506  int j;
1507  conj1_t conj;
1508 
1509  // Return early if possible.
1510  if ( bl1_zero_dim2( m, n ) ) return;
1511 
1512  // Initialize variables based on storage format of B and value of uplo.
1513  if ( bl1_is_col_storage( b_rs, b_cs ) )
1514  {
1515  if ( bl1_is_lower( uplo ) )
1516  {
1517  n_iter = bl1_min( m, n );
1518  n_elem_max = m;
1519  lda = a_cs;
1520  inca = a_rs;
1521  ldb = b_cs;
1522  incb = b_rs;
1523  n_elem_is_descending = TRUE;
1524  }
1525  else // if ( bl1_is_upper( uplo ) )
1526  {
1527  n_iter = n;
1528  n_elem_max = bl1_min( m, n );
1529  lda = a_cs;
1530  inca = a_rs;
1531  ldb = b_cs;
1532  incb = b_rs;
1533  n_elem_is_descending = FALSE;
1534  }
1535  }
1536  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
1537  {
1538  if ( bl1_is_lower( uplo ) )
1539  {
1540  n_iter = m;
1541  n_elem_max = bl1_min( m, n );
1542  lda = a_rs;
1543  inca = a_cs;
1544  ldb = b_rs;
1545  incb = b_cs;
1546  n_elem_is_descending = FALSE;
1547  }
1548  else // if ( bl1_is_upper( uplo ) )
1549  {
1550  n_iter = bl1_min( m, n );
1551  n_elem_max = n;
1552  lda = a_rs;
1553  inca = a_cs;
1554  ldb = b_rs;
1555  incb = b_cs;
1556  n_elem_is_descending = TRUE;
1557  }
1558  }
1559 
1560  // Swap lda and inca if we're doing a transpose.
1561  if ( bl1_does_trans( trans ) )
1562  {
1563  bl1_swap_ints( lda, inca );
1564  }
1565 
1566  // Extract conj component from trans parameter.
1567  conj = bl1_proj_trans1_to_conj( trans );
1568 
1569  // Choose the loop based on whether n_elem will be shrinking or growing
1570  // with each iteration.
1571  if ( n_elem_is_descending )
1572  {
1573  for ( j = 0; j < n_iter; j++ )
1574  {
1575  n_elem = n_elem_max - j;
1576  a_begin = a + j*lda + j*inca;
1577  b_begin = b + j*ldb + j*incb;
1578 
1579  bl1_ccopyv( conj,
1580  n_elem,
1581  a_begin, inca,
1582  b_begin, incb );
1583  }
1584  }
1585  else // if ( n_elem_is_ascending )
1586  {
1587  for ( j = 0; j < n_iter; j++ )
1588  {
1589  n_elem = bl1_min( j + 1, n_elem_max );
1590  a_begin = a + j*lda;
1591  b_begin = b + j*ldb;
1592 
1593  bl1_ccopyv( conj,
1594  n_elem,
1595  a_begin, inca,
1596  b_begin, incb );
1597  }
1598  }
1599 }

References bl1_ccopyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().

◆ bl1_cccopymt()

void bl1_cccopymt ( trans1_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
1167 {
1168  scomplex* a_begin;
1169  scomplex* b_begin;
1170  int lda, inca;
1171  int ldb, incb;
1172  int n_iter;
1173  int n_elem;
1174  int j;
1175  conj1_t conj;
1176 
1177  // Return early if possible.
1178  if ( bl1_zero_dim2( m, n ) ) return;
1179 
1180  // Handle cases where A and B are vectors to ensure that the underlying copy
1181  // gets invoked only once.
1182  if ( bl1_is_vector( m, n ) )
1183  {
1184  // Initialize with values appropriate for vectors.
1185  n_iter = 1;
1186  n_elem = bl1_vector_dim( m, n );
1187  lda = 1; // multiplied by zero when n_iter == 1; not needed.
1188  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
1189  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
1190  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
1191  }
1192  else // matrix case
1193  {
1194  // Initialize with optimal values for column-major storage of B.
1195  n_iter = n;
1196  n_elem = m;
1197  lda = a_cs;
1198  inca = a_rs;
1199  ldb = b_cs;
1200  incb = b_rs;
1201 
1202  // Handle the transposition of A.
1203  if ( bl1_does_trans( trans ) )
1204  {
1205  bl1_swap_ints( lda, inca );
1206  }
1207 
1208  // An optimization: if B is row-major, then let's access the matrix by rows
1209  // instead of by columns for increased spatial locality.
1210  if ( bl1_is_row_storage( b_rs, b_cs ) )
1211  {
1212  bl1_swap_ints( n_iter, n_elem );
1213  bl1_swap_ints( lda, inca );
1214  bl1_swap_ints( ldb, incb );
1215  }
1216  }
1217 
1218  // Extract conj component from trans parameter.
1219  conj = bl1_proj_trans1_to_conj( trans );
1220 
1221  for ( j = 0; j < n_iter; ++j )
1222  {
1223  a_begin = a + j*lda;
1224  b_begin = b + j*ldb;
1225 
1226  bl1_ccopyv( conj,
1227  n_elem,
1228  a_begin, inca,
1229  b_begin, incb );
1230  }
1231 }

References bl1_ccopyv(), bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

◆ bl1_cconjm()

void bl1_cconjm ( int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs 
)
24 {
25  float m1 = bl1_sm1();
26  float* a_conj;
27  int lda, inca;
28  int n_iter;
29  int n_elem;
30  int j;
31 
32  // Return early if possible.
33  if ( bl1_zero_dim2( m, n ) ) return;
34 
35  // Handle cases where A is a vector to ensure that the underlying axpy
36  // gets invoked only once.
37  if ( bl1_is_vector( m, n ) )
38  {
39  // Initialize with values appropriate for a vector.
40  n_iter = 1;
41  n_elem = bl1_vector_dim( m, n );
42  lda = 1; // multiplied by zero when n_iter == 1; not needed.
43  inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
44  }
45  else // matrix case
46  {
47  // Initialize with optimal values for column-major storage.
48  n_iter = n;
49  n_elem = m;
50  lda = a_cs;
51  inca = a_rs;
52 
53  // An optimization: if A is row-major, then let's access the matrix
54  // by rows instead of by columns to increase spatial locality.
55  if ( bl1_is_row_storage( a_rs, a_cs ) )
56  {
57  bl1_swap_ints( n_iter, n_elem );
58  bl1_swap_ints( lda, inca );
59  }
60  }
61 
62  for ( j = 0; j < n_iter; ++j )
63  {
64  a_conj = ( float* )( a + j*lda ) + 1;
65 
66  bl1_sscal( n_elem,
67  &m1,
68  a_conj, 2*inca );
69  }
70 }
void bl1_sscal(int n, float *alpha, float *x, int incx)
Definition: bl1_scal.c:13
float bl1_sm1(void)
Definition: bl1_constants.c:175

References bl1_is_row_storage(), bl1_is_vector(), bl1_sm1(), bl1_sscal(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_cgemm(), and FLA_Conjugate().

◆ bl1_cconjmr()

void bl1_cconjmr ( uplo1_t  uplo,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs 
)
24 {
25  float m1 = bl1_sm1();
26  float* a_conj;
27  int lda, inca;
28  int n_iter;
29  int n_elem_max;
30  int n_elem;
31  int j;
32 
33  // Return early if possible.
34  if ( bl1_zero_dim2( m, n ) ) return;
35 
36  // We initialize for column-major.
37  n_iter = n;
38  n_elem_max = m;
39  lda = a_cs;
40  inca = a_rs;
41 
42  // An optimization: if A is row-major, then let's access the matrix
43  // by rows instead of by columns to increase spatial locality.
44  if ( bl1_is_row_storage( a_rs, a_cs ) )
45  {
46  bl1_swap_ints( n_iter, n_elem_max );
47  bl1_swap_ints( lda, inca );
48  bl1_toggle_uplo( uplo );
49  }
50 
51  if ( bl1_is_upper( uplo ) )
52  {
53  for ( j = 0; j < n_iter; ++j )
54  {
55  n_elem = bl1_min( j + 1, n_elem_max );
56  a_conj = ( float* )( a + j*lda ) + 1;
57 
58  bl1_sscal( n_elem,
59  &m1,
60  a_conj, 2*inca );
61  }
62  }
63  else // if ( bl1_is_lower( uplo ) )
64  {
65  for ( j = 0; j < n_iter; ++j )
66  {
67  n_elem = bl1_max( 0, n_elem_max - j );
68  a_conj = ( float* )( a + j*lda + j*inca ) + 1;
69 
70  if ( n_elem <= 0 ) break;
71 
72  bl1_sscal( n_elem,
73  &m1,
74  a_conj, 2*inca );
75  }
76  }
77 }

References bl1_is_row_storage(), bl1_is_upper(), bl1_sm1(), bl1_sscal(), and bl1_zero_dim2().

Referenced by bl1_chemm(), bl1_ctrmm(), bl1_ctrsm(), and FLA_Conjugate_r().

◆ bl1_cconjv()

void bl1_cconjv ( int  m,
scomplex x,
int  incx 
)
24 {
25  float m1 = bl1_sm1();
26  float* x_conj = ( float* ) x + 1;
27  int incx_conj = 2 * incx;
28 
29  bl1_sscal( m,
30  &m1,
31  x_conj, incx_conj );
32 }

References bl1_sm1(), and bl1_sscal().

Referenced by bl1_ccopymt(), bl1_ccopyv(), bl1_cgemv(), bl1_cswapmt(), bl1_zccopyv(), FLA_Bidiag_UT_u_step_ofc_var3(), FLA_Bidiag_UT_u_step_ofc_var4(), FLA_Bidiag_UT_u_step_opc_var3(), FLA_Bidiag_UT_u_step_opc_var4(), and FLA_Househ2_UT_r_opc().

◆ bl1_ccopy()

void bl1_ccopy ( int  m,
scomplex x,
int  incx,
scomplex y,
int  incy 
)
40 {
41 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
42  cblas_ccopy( m,
43  x, incx,
44  y, incy );
45 #else
46  F77_ccopy( &m,
47  x, &incx,
48  y, &incy );
49 #endif
50 }
void F77_ccopy(int *n, scomplex *x, int *incx, scomplex *y, int *incy)
void cblas_ccopy(const int N, const void *X, const int incX, void *Y, const int incY)

References cblas_ccopy(), and F77_ccopy().

Referenced by bl1_ccopymr(), bl1_ccopymt(), bl1_ccopyv(), and FLA_SA_LU_unb().

◆ bl1_ccopymr()

void bl1_ccopymr ( uplo1_t  uplo,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
140 {
141  scomplex* a_begin;
142  scomplex* b_begin;
143  int lda, inca;
144  int ldb, incb;
145  int n_iter;
146  int n_elem_max;
147  int n_elem;
148  int j;
149 
150  // Return early if possible.
151  if ( bl1_zero_dim2( m, n ) ) return;
152 
153  // We initialize for column-major.
154  n_iter = n;
155  n_elem_max = m;
156  lda = a_cs;
157  inca = a_rs;
158  ldb = b_cs;
159  incb = b_rs;
160 
161  // An optimization: if A and B are both row-major, then let's access the
162  // matrices by rows instead of by columns for increased spatial locality.
163  if ( bl1_is_row_storage( b_rs, b_cs ) && bl1_is_row_storage( a_rs, a_cs ) )
164  {
165  bl1_swap_ints( n_iter, n_elem_max );
166  bl1_swap_ints( lda, inca );
167  bl1_swap_ints( ldb, incb );
168  bl1_toggle_uplo( uplo );
169  }
170 
171 
172  if ( bl1_is_upper( uplo ) )
173  {
174  for ( j = 0; j < n_iter; j++ )
175  {
176  n_elem = bl1_min( j + 1, n_elem_max );
177  a_begin = a + j*lda;
178  b_begin = b + j*ldb;
179 
180  bl1_ccopy( n_elem,
181  a_begin, inca,
182  b_begin, incb );
183  }
184  }
185  else // if ( bl1_is_lower( uplo ) )
186  {
187  for ( j = 0; j < n_iter; j++ )
188  {
189  n_elem = bl1_max( 0, n_elem_max - j );
190  a_begin = a + j*lda + j*inca;
191  b_begin = b + j*ldb + j*incb;
192 
193  if ( n_elem <= 0 ) break;
194 
195  bl1_ccopy( n_elem,
196  a_begin, inca,
197  b_begin, incb );
198  }
199  }
200 }
void bl1_ccopy(int m, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_copy.c:39

References bl1_ccopy(), bl1_is_row_storage(), bl1_is_upper(), and bl1_zero_dim2().

Referenced by bl1_ccreate_contigmr(), bl1_cfree_saved_contigmr(), and FLA_Copyr_external().

◆ bl1_ccopymrt()

void bl1_ccopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
224 {
225  scomplex* a_begin;
226  scomplex* b_begin;
227  int lda, inca;
228  int ldb, incb;
229  int n_iter;
230  int n_elem;
231  int n_elem_max;
232  int n_elem_is_descending;
233  int j;
234  conj1_t conj;
235 
236  // Return early if possible.
237  if ( bl1_zero_dim2( m, n ) ) return;
238 
239  // Initialize variables based on storage format of B and value of uplo.
240  if ( bl1_is_col_storage( b_rs, b_cs ) )
241  {
242  if ( bl1_is_lower( uplo ) )
243  {
244  n_iter = bl1_min( m, n );
245  n_elem_max = m;
246  lda = a_cs;
247  inca = a_rs;
248  ldb = b_cs;
249  incb = b_rs;
250  n_elem_is_descending = TRUE;
251  }
252  else // if ( bl1_is_upper( uplo ) )
253  {
254  n_iter = n;
255  n_elem_max = bl1_min( m, n );
256  lda = a_cs;
257  inca = a_rs;
258  ldb = b_cs;
259  incb = b_rs;
260  n_elem_is_descending = FALSE;
261  }
262  }
263  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
264  {
265  if ( bl1_is_lower( uplo ) )
266  {
267  n_iter = m;
268  n_elem_max = bl1_min( m, n );
269  lda = a_rs;
270  inca = a_cs;
271  ldb = b_rs;
272  incb = b_cs;
273  n_elem_is_descending = FALSE;
274  }
275  else // if ( bl1_is_upper( uplo ) )
276  {
277  n_iter = bl1_min( m, n );
278  n_elem_max = n;
279  lda = a_rs;
280  inca = a_cs;
281  ldb = b_rs;
282  incb = b_cs;
283  n_elem_is_descending = TRUE;
284  }
285  }
286 
287  // Swap lda and inca if we're doing a transpose.
288  if ( bl1_does_trans( trans ) )
289  {
290  bl1_swap_ints( lda, inca );
291  }
292 
293  // Extract conj component from trans parameter.
294  conj = bl1_proj_trans1_to_conj( trans );
295 
296  // Choose the loop based on whether n_elem will be shrinking or growing
297  // with each iteration.
298  if ( n_elem_is_descending )
299  {
300  for ( j = 0; j < n_iter; j++ )
301  {
302  n_elem = n_elem_max - j;
303  a_begin = a + j*lda + j*inca;
304  b_begin = b + j*ldb + j*incb;
305 
306  bl1_ccopyv( conj,
307  n_elem,
308  a_begin, inca,
309  b_begin, incb );
310  }
311  }
312  else // if ( n_elem_is_ascending )
313  {
314  for ( j = 0; j < n_iter; j++ )
315  {
316  n_elem = bl1_min( j + 1, n_elem_max );
317  a_begin = a + j*lda;
318  b_begin = b + j*ldb;
319 
320  bl1_ccopyv( conj,
321  n_elem,
322  a_begin, inca,
323  b_begin, incb );
324  }
325  }
326 }

References bl1_ccopyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().

Referenced by bl1_chemm(), bl1_ctrmm(), bl1_ctrsm(), FLA_Copyrt_external(), FLA_Lyap_h_opc_var1(), FLA_Lyap_h_opc_var2(), FLA_Lyap_h_opc_var3(), FLA_Lyap_h_opc_var4(), FLA_Lyap_n_opc_var1(), FLA_Lyap_n_opc_var2(), FLA_Lyap_n_opc_var3(), and FLA_Lyap_n_opc_var4().

◆ bl1_ccopymt()

void bl1_ccopymt ( trans1_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
216 {
217  scomplex* a_begin;
218  scomplex* b_begin;
219  int lda, inca;
220  int ldb, incb;
221  int n_iter;
222  int n_elem;
223  int j;
224 
225  // Return early if possible.
226  if ( bl1_zero_dim2( m, n ) ) return;
227 
228  // Handle cases where A and B are vectors to ensure that the underlying copy
229  // gets invoked only once.
230  if ( bl1_is_vector( m, n ) )
231  {
232  // Initialize with values appropriate for vectors.
233  n_iter = 1;
234  n_elem = bl1_vector_dim( m, n );
235  lda = 1; // multiplied by zero when n_iter == 1; not needed.
236  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
237  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
238  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
239  }
240  else // matrix case
241  {
242  // Initialize with optimal values for column-major storage.
243  n_iter = n;
244  n_elem = m;
245  lda = a_cs;
246  inca = a_rs;
247  ldb = b_cs;
248  incb = b_rs;
249 
250  // Handle the transposition of A.
251  if ( bl1_does_trans( trans ) )
252  {
253  bl1_swap_ints( lda, inca );
254  }
255 
256  // An optimization: if B is row-major and if A is effectively row-major
257  // after a possible transposition, then let's access the matrix by rows
258  // instead of by columns for increased spatial locality.
259  if ( bl1_is_row_storage( b_rs, b_cs ) )
260  {
261  if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
262  ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
263  {
264  bl1_swap_ints( n_iter, n_elem );
265  bl1_swap_ints( lda, inca );
266  bl1_swap_ints( ldb, incb );
267  }
268  }
269  }
270 
271  for ( j = 0; j < n_iter; j++ )
272  {
273  a_begin = a + j*lda;
274  b_begin = b + j*ldb;
275 
276  bl1_ccopy( n_elem,
277  a_begin, inca,
278  b_begin, incb );
279 
280  if ( bl1_does_conj( trans ) )
281  bl1_cconjv( n_elem,
282  b_begin, incb );
283  }
284 }
void bl1_cconjv(int m, scomplex *x, int incx)
Definition: bl1_conjv.c:23

References bl1_cconjv(), bl1_ccopy(), bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_ccreate_contigm(), bl1_ccreate_contigmt(), bl1_cfree_saved_contigm(), bl1_cfree_saved_contigmsr(), bl1_cgemm(), bl1_chemm(), bl1_cher2k(), bl1_csymm(), bl1_csyr2k(), bl1_ctrmmsx(), bl1_ctrsmsx(), FLA_Copy_external(), and FLA_Copyt_external().

◆ bl1_ccopyv()

void bl1_ccopyv ( conj1_t  conj,
int  m,
scomplex x,
int  incx,
scomplex y,
int  incy 
)

◆ bl1_cdcopymr()

void bl1_cdcopymr ( uplo1_t  uplo,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)
855 {
856  scomplex* a_begin;
857  double* b_begin;
858  int lda, inca;
859  int ldb, incb;
860  int n_iter;
861  int n_elem_max;
862  int n_elem;
863  int j;
864 
865  // Return early if possible.
866  if ( bl1_zero_dim2( m, n ) ) return;
867 
868  // We initialize for column-major.
869  n_iter = n;
870  n_elem_max = m;
871  lda = a_cs;
872  inca = a_rs;
873  ldb = b_cs;
874  incb = b_rs;
875 
876  // An optimization: if B is row-major, then let's access the matrix
877  // by rows instead of by columns for increased spatial locality.
878  if ( bl1_is_row_storage( b_rs, b_cs ) )
879  {
880  bl1_swap_ints( n_iter, n_elem_max );
881  bl1_swap_ints( lda, inca );
882  bl1_swap_ints( ldb, incb );
883  bl1_toggle_uplo( uplo );
884  }
885 
886 
887  if ( bl1_is_upper( uplo ) )
888  {
889  for ( j = 0; j < n_iter; j++ )
890  {
891  n_elem = bl1_min( j + 1, n_elem_max );
892  a_begin = a + j*lda;
893  b_begin = b + j*ldb;
894 
896  n_elem,
897  a_begin, inca,
898  b_begin, incb );
899  }
900  }
901  else // if ( bl1_is_lower( uplo ) )
902  {
903  for ( j = 0; j < n_iter; j++ )
904  {
905  n_elem = bl1_max( 0, n_elem_max - j );
906  a_begin = a + j*lda + j*inca;
907  b_begin = b + j*ldb + j*incb;
908 
909  if ( n_elem <= 0 ) break;
910 
912  n_elem,
913  a_begin, inca,
914  b_begin, incb );
915  }
916  }
917 }
void bl1_cdcopyv(conj1_t conj, int m, scomplex *x, int incx, double *y, int incy)
Definition: bl1_copyv.c:236

References bl1_cdcopyv(), bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Copyr_external().

◆ bl1_cdcopymrt()

void bl1_cdcopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)
1391 {
1392  scomplex* a_begin;
1393  double* b_begin;
1394  int lda, inca;
1395  int ldb, incb;
1396  int n_iter;
1397  int n_elem;
1398  int n_elem_max;
1399  int n_elem_is_descending;
1400  int j;
1401  conj1_t conj;
1402 
1403  // Return early if possible.
1404  if ( bl1_zero_dim2( m, n ) ) return;
1405 
1406  // Initialize variables based on storage format of B and value of uplo.
1407  if ( bl1_is_col_storage( b_rs, b_cs ) )
1408  {
1409  if ( bl1_is_lower( uplo ) )
1410  {
1411  n_iter = bl1_min( m, n );
1412  n_elem_max = m;
1413  lda = a_cs;
1414  inca = a_rs;
1415  ldb = b_cs;
1416  incb = b_rs;
1417  n_elem_is_descending = TRUE;
1418  }
1419  else // if ( bl1_is_upper( uplo ) )
1420  {
1421  n_iter = n;
1422  n_elem_max = bl1_min( m, n );
1423  lda = a_cs;
1424  inca = a_rs;
1425  ldb = b_cs;
1426  incb = b_rs;
1427  n_elem_is_descending = FALSE;
1428  }
1429  }
1430  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
1431  {
1432  if ( bl1_is_lower( uplo ) )
1433  {
1434  n_iter = m;
1435  n_elem_max = bl1_min( m, n );
1436  lda = a_rs;
1437  inca = a_cs;
1438  ldb = b_rs;
1439  incb = b_cs;
1440  n_elem_is_descending = FALSE;
1441  }
1442  else // if ( bl1_is_upper( uplo ) )
1443  {
1444  n_iter = bl1_min( m, n );
1445  n_elem_max = n;
1446  lda = a_rs;
1447  inca = a_cs;
1448  ldb = b_rs;
1449  incb = b_cs;
1450  n_elem_is_descending = TRUE;
1451  }
1452  }
1453 
1454  // Swap lda and inca if we're doing a transpose.
1455  if ( bl1_does_trans( trans ) )
1456  {
1457  bl1_swap_ints( lda, inca );
1458  }
1459 
1460  // Extract conj component from trans parameter.
1461  conj = bl1_proj_trans1_to_conj( trans );
1462 
1463  // Choose the loop based on whether n_elem will be shrinking or growing
1464  // with each iteration.
1465  if ( n_elem_is_descending )
1466  {
1467  for ( j = 0; j < n_iter; j++ )
1468  {
1469  n_elem = n_elem_max - j;
1470  a_begin = a + j*lda + j*inca;
1471  b_begin = b + j*ldb + j*incb;
1472 
1473  bl1_cdcopyv( conj,
1474  n_elem,
1475  a_begin, inca,
1476  b_begin, incb );
1477  }
1478  }
1479  else // if ( n_elem_is_ascending )
1480  {
1481  for ( j = 0; j < n_iter; j++ )
1482  {
1483  n_elem = bl1_min( j + 1, n_elem_max );
1484  a_begin = a + j*lda;
1485  b_begin = b + j*ldb;
1486 
1487  bl1_cdcopyv( conj,
1488  n_elem,
1489  a_begin, inca,
1490  b_begin, incb );
1491  }
1492  }
1493 }

References bl1_cdcopyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().

Referenced by FLA_Copyrt_external().

◆ bl1_cdcopymt()

void bl1_cdcopymt ( trans1_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)
965 {
966  scomplex* a_begin;
967  double* b_begin;
968  int lda, inca;
969  int ldb, incb;
970  int n_iter;
971  int n_elem;
972  int j;
973  conj1_t conj;
974 
975  // Return early if possible.
976  if ( bl1_zero_dim2( m, n ) ) return;
977 
978  // Handle cases where A and B are vectors to ensure that the underlying copy
979  // gets invoked only once.
980  if ( bl1_is_vector( m, n ) )
981  {
982  // Initialize with values appropriate for vectors.
983  n_iter = 1;
984  n_elem = bl1_vector_dim( m, n );
985  lda = 1; // multiplied by zero when n_iter == 1; not needed.
986  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
987  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
988  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
989  }
990  else // matrix case
991  {
992  // Initialize with optimal values for column-major storage of B.
993  n_iter = n;
994  n_elem = m;
995  lda = a_cs;
996  inca = a_rs;
997  ldb = b_cs;
998  incb = b_rs;
999 
1000  // Handle the transposition of A.
1001  if ( bl1_does_trans( trans ) )
1002  {
1003  bl1_swap_ints( lda, inca );
1004  }
1005 
1006  // An optimization: if B is row-major, then let's access the matrix by rows
1007  // instead of by columns for increased spatial locality.
1008  if ( bl1_is_row_storage( b_rs, b_cs ) )
1009  {
1010  bl1_swap_ints( n_iter, n_elem );
1011  bl1_swap_ints( lda, inca );
1012  bl1_swap_ints( ldb, incb );
1013  }
1014  }
1015 
1016  // Extract conj component from trans parameter.
1017  conj = bl1_proj_trans1_to_conj( trans );
1018 
1019  for ( j = 0; j < n_iter; ++j )
1020  {
1021  a_begin = a + j*lda;
1022  b_begin = b + j*ldb;
1023 
1024  bl1_cdcopyv( conj,
1025  n_elem,
1026  a_begin, inca,
1027  b_begin, incb );
1028  }
1029 }

References bl1_cdcopyv(), bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

◆ bl1_cdcopyv()

void bl1_cdcopyv ( conj1_t  conj,
int  m,
scomplex x,
int  incx,
double *  y,
int  incy 
)
237 {
238  scomplex* chi;
239  double* psi;
240  int i;
241 
242  // Return early if possible.
243  if ( bl1_zero_dim1( m ) ) return;
244 
245  // Initialize pointers.
246  chi = x;
247  psi = y;
248 
249  for ( i = 0; i < m; ++i )
250  {
251  *psi = chi->real;
252 
253  chi += incx;
254  psi += incy;
255  }
256 }
int i
Definition: bl1_axmyv2.c:145

References bl1_zero_dim1(), i, and scomplex::real.

Referenced by bl1_cdcopymr(), bl1_cdcopymrt(), and bl1_cdcopymt().

◆ bl1_cdot()

void bl1_cdot ( conj1_t  conj,
int  n,
scomplex x,
int  incx,
scomplex y,
int  incy,
scomplex rho 
)
40 {
41 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
42  if ( bl1_is_conj( conj ) )
43  {
44  cblas_cdotc_sub( n,
45  x, incx,
46  y, incy,
47  rho );
48  }
49  else // if ( !bl1_is_conj( conj ) )
50  {
51  cblas_cdotu_sub( n,
52  x, incx,
53  y, incy,
54  rho );
55  }
56 #else
57  bl1_cdot_in( conj,
58  n,
59  x, incx,
60  y, incy,
61  rho );
62 #endif
63 }
* rho
Definition: bl1_axpyv2bdotaxpy.c:322
void bl1_cdot_in(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition: bl1_dot.c:94
void cblas_cdotc_sub(const int N, const void *X, const int incX, const void *Y, const int incY, void *dotc)
void cblas_cdotu_sub(const int N, const void *X, const int incX, const void *Y, const int incY, void *dotu)

References bl1_cdot_in(), bl1_is_conj(), cblas_cdotc_sub(), cblas_cdotu_sub(), and rho.

Referenced by bl1_cdot2s(), bl1_cdots(), FLA_Bidiag_UT_u_step_ofc_var2(), FLA_Bidiag_UT_u_step_ofc_var3(), FLA_Bidiag_UT_u_step_ofc_var4(), FLA_Bidiag_UT_u_step_opc_var2(), FLA_Bidiag_UT_u_step_opc_var3(), FLA_Bidiag_UT_u_step_opc_var4(), FLA_Bidiag_UT_u_step_opc_var5(), FLA_Dot_external(), FLA_Dotc_external(), FLA_Fused_Ahx_Ax_opc_var1(), FLA_Fused_Gerc2_Ahx_Ax_opc_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1(), FLA_Fused_Her2_Ax_l_opc_var1(), FLA_Fused_Uhu_Yhu_Zhu_opc_var1(), FLA_Fused_UYx_ZVx_opc_var1(), FLA_Fused_UZhu_ZUhu_opc_var1(), FLA_Hess_UT_step_ofc_var2(), FLA_Hess_UT_step_ofc_var3(), FLA_Hess_UT_step_ofc_var4(), FLA_Hess_UT_step_opc_var2(), FLA_Hess_UT_step_opc_var3(), FLA_Hess_UT_step_opc_var4(), FLA_Hess_UT_step_opc_var5(), FLA_Sylv_hh_opc_var1(), FLA_Sylv_hn_opc_var1(), FLA_Sylv_nh_opc_var1(), FLA_Sylv_nn_opc_var1(), FLA_Tridiag_UT_l_step_ofc_var2(), FLA_Tridiag_UT_l_step_ofc_var3(), FLA_Tridiag_UT_l_step_opc_var1(), FLA_Tridiag_UT_l_step_opc_var2(), and FLA_Tridiag_UT_l_step_opc_var3().

◆ bl1_cdot2s()

void bl1_cdot2s ( conj1_t  conj,
int  n,
scomplex alpha,
scomplex x,
int  incx,
scomplex y,
int  incy,
scomplex beta,
scomplex rho 
)
40 {
41  scomplex dotxy;
42  scomplex dotyx;
43  scomplex alpha_d = *alpha;
44  scomplex alphac_d = *alpha;
45  scomplex beta_d = *beta;
46  scomplex rho_d = *rho;
47 
48  alphac_d.imag *= -1.0F;
49 
50  bl1_cdot( conj,
51  n,
52  x, incx,
53  y, incy,
54  &dotxy );
55 
56  bl1_cdot( conj,
57  n,
58  y, incy,
59  x, incx,
60  &dotyx );
61 
62  rho->real = beta_d.real * rho_d.real - beta_d.imag * rho_d.imag +
63  alpha_d.real * dotxy.real - alpha_d.imag * dotxy.imag +
64  alphac_d.real * dotyx.real - alphac_d.imag * dotyx.imag;
65  rho->imag = beta_d.real * rho_d.imag + beta_d.imag * rho_d.real +
66  alpha_d.real * dotxy.imag + alpha_d.imag * dotxy.real +
67  alphac_d.real * dotyx.imag + alphac_d.imag * dotyx.real;
68 }
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition: bl1_dot.c:39

References bl1_cdot(), scomplex::imag, scomplex::real, and rho.

Referenced by FLA_Dot2cs_external(), FLA_Dot2s_external(), FLA_Eig_gest_il_opc_var1(), FLA_Eig_gest_il_opc_var2(), FLA_Eig_gest_il_opc_var3(), FLA_Eig_gest_iu_opc_var1(), FLA_Eig_gest_iu_opc_var2(), FLA_Eig_gest_iu_opc_var3(), FLA_Eig_gest_nl_opc_var1(), FLA_Eig_gest_nl_opc_var2(), FLA_Eig_gest_nu_opc_var1(), FLA_Eig_gest_nu_opc_var2(), FLA_Lyap_h_opc_var1(), FLA_Lyap_h_opc_var2(), FLA_Lyap_h_opc_var3(), FLA_Lyap_n_opc_var1(), FLA_Lyap_n_opc_var2(), and FLA_Lyap_n_opc_var3().

◆ bl1_cdot_in()

void bl1_cdot_in ( conj1_t  conj,
int  n,
scomplex x,
int  incx,
scomplex y,
int  incy,
scomplex rho 
)
95 {
96  scomplex* xip;
97  scomplex* yip;
98  scomplex xi;
99  scomplex yi;
100  scomplex rho_temp;
101  int i;
102 
103  rho_temp.real = 0.0F;
104  rho_temp.imag = 0.0F;
105 
106  xip = x;
107  yip = y;
108 
109  if ( bl1_is_conj( conj ) )
110  {
111  for ( i = 0; i < n; ++i )
112  {
113  xi.real = xip->real;
114  xi.imag = xip->imag;
115  yi.real = yip->real;
116  yi.imag = yip->imag;
117 
118  rho_temp.real += xi.real * yi.real - -xi.imag * yi.imag;
119  rho_temp.imag += xi.real * yi.imag + -xi.imag * yi.real;
120 
121  xip += incx;
122  yip += incy;
123  }
124  }
125  else // if ( !bl1_is_conj( conj ) )
126  {
127  for ( i = 0; i < n; ++i )
128  {
129  xi.real = xip->real;
130  xi.imag = xip->imag;
131  yi.real = yip->real;
132  yi.imag = yip->imag;
133 
134  rho_temp.real += xi.real * yi.real - xi.imag * yi.imag;
135  rho_temp.imag += xi.real * yi.imag + xi.imag * yi.real;
136 
137  xip += incx;
138  yip += incy;
139  }
140  }
141 
142  rho->real = rho_temp.real;
143  rho->imag = rho_temp.imag;
144 }

References bl1_is_conj(), i, scomplex::imag, scomplex::real, and rho.

Referenced by bl1_cdot().

◆ bl1_cdots()

void bl1_cdots ( conj1_t  conj,
int  n,
scomplex alpha,
scomplex x,
int  incx,
scomplex y,
int  incy,
scomplex beta,
scomplex rho 
)
40 {
41  scomplex rho_orig = *rho;
42  scomplex dot_prod;
43 
44  bl1_cdot( conj,
45  n,
46  x, incx,
47  y, incy,
48  &dot_prod );
49 
50  rho->real = beta->real * rho_orig.real - beta->imag * rho_orig.imag +
51  alpha->real * dot_prod.real - alpha->imag * dot_prod.imag;
52  rho->imag = beta->real * rho_orig.imag + beta->imag * rho_orig.real +
53  alpha->real * dot_prod.imag + alpha->imag * dot_prod.real;
54 }

References bl1_cdot(), scomplex::imag, scomplex::real, and rho.

Referenced by FLA_Chol_l_opc_var1(), FLA_Chol_l_opc_var2(), FLA_Chol_u_opc_var1(), FLA_Chol_u_opc_var2(), FLA_Dotcs_external(), FLA_Dots_external(), FLA_Fused_Ahx_Axpy_Ax_opc_var1(), FLA_Hess_UT_step_opc_var5(), FLA_LU_nopiv_opc_var1(), FLA_LU_nopiv_opc_var2(), FLA_LU_nopiv_opc_var3(), FLA_LU_nopiv_opc_var4(), FLA_LU_piv_opc_var3(), FLA_LU_piv_opc_var4(), FLA_Ttmm_l_opc_var2(), FLA_Ttmm_l_opc_var3(), FLA_Ttmm_u_opc_var2(), and FLA_Ttmm_u_opc_var3().

◆ bl1_cfnorm()

void bl1_cfnorm ( int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
float *  norm 
)
122 {
123  scomplex* a_ij;
124  float sum;
125  int lda, inca;
126  int n_iter;
127  int n_elem;
128  int i, j;
129 
130  // Return early if possible.
131  if ( bl1_zero_dim2( m, n ) ) return;
132 
133  // Handle cases where A is a vector separately.
134  if ( bl1_is_vector( m, n ) )
135  {
136  // Initialize with values appropriate for vectors.
137  n_iter = 1;
138  n_elem = bl1_vector_dim( m, n );
139  lda = 1; // multiplied by zero when n_iter == 1; not needed.
140  inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
141  }
142  else // matrix case
143  {
144  // Initialize with optimal values for column-major storage.
145  n_iter = n;
146  n_elem = m;
147  lda = a_cs;
148  inca = a_rs;
149 
150  // An optimization: if A is row-major, then let's access the matrix by
151  // rows instead of by columns for increased spatial locality.
152  if ( bl1_is_row_storage( a_rs, a_cs ) )
153  {
154  bl1_swap_ints( n_iter, n_elem );
155  bl1_swap_ints( lda, inca );
156  }
157  }
158 
159  // Initialize the accumulator variable.
160  sum = 0.0F;
161 
162  for ( j = 0; j < n_iter; j++ )
163  {
164  for ( i = 0; i < n_elem; i++ )
165  {
166  a_ij = a + i*inca + j*lda;
167  sum += a_ij->real * a_ij->real + a_ij->imag * a_ij->imag;
168  }
169  }
170 
171  // Compute the norm and store the result.
172  *norm = ( float ) sqrt( sum );
173 }

References bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), BLIS1_NO_TRANSPOSE, i, scomplex::imag, and scomplex::real.

Referenced by FLA_Norm_frob().

◆ bl1_cinvscalm()

void bl1_cinvscalm ( conj1_t  conj,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs 
)
170 {
171  scomplex alpha_inv;
172  scomplex* a_begin;
173  int lda, inca;
174  int n_iter;
175  int n_elem;
176  int j;
177 
178  // Return early if possible.
179  if ( bl1_zero_dim2( m, n ) ) return;
180  if ( bl1_ceq1( alpha ) ) return;
181 
182  // Handle cases where A is a vector to ensure that the underlying axpy
183  // gets invoked only once.
184  if ( bl1_is_vector( m, n ) )
185  {
186  // Initialize with values appropriate for a vector.
187  n_iter = 1;
188  n_elem = bl1_vector_dim( m, n );
189  lda = 1; // multiplied by zero when n_iter == 1; not needed.
190  inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
191  }
192  else // matrix case
193  {
194  // Initialize with optimal values for column-major storage.
195  n_iter = n;
196  n_elem = m;
197  lda = a_cs;
198  inca = a_rs;
199 
200  // An optimization: if A is row-major, then let's access the matrix
201  // by rows instead of by columns to increase spatial locality.
202  if ( bl1_is_row_storage( a_rs, a_cs ) )
203  {
204  bl1_swap_ints( n_iter, n_elem );
205  bl1_swap_ints( lda, inca );
206  }
207  }
208 
209  bl1_cinvert2s( conj, alpha, &alpha_inv );
210 
211  for ( j = 0; j < n_iter; j++ )
212  {
213  a_begin = a + j*lda;
214 
215  bl1_cscal( n_elem,
216  &alpha_inv,
217  a_begin, inca );
218  }
219 }
void bl1_cinvert2s(conj1_t conj, scomplex *alpha, scomplex *beta)
Definition: bl1_invert2s.c:27

References bl1_cinvert2s(), bl1_cscal(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Inv_scal_external(), and FLA_Inv_scalc_external().

◆ bl1_cinvscalv()

void bl1_cinvscalv ( conj1_t  conj,
int  n,
scomplex alpha,
scomplex x,
int  incx 
)

◆ bl1_cnrm2()

void bl1_cnrm2 ( int  n,
scomplex x,
int  incx,
float *  norm 
)
36 {
37 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
38  *norm = cblas_scnrm2( n,
39  x, incx );
40 #else
41  *norm = F77_scnrm2( &n,
42  x, &incx );
43 #endif
44 }
float F77_scnrm2(int *n, scomplex *x, int *incx)
float cblas_scnrm2(const int N, const void *X, const int incX)

References cblas_scnrm2(), and F77_scnrm2().

Referenced by FLA_Househ2_UT_l_opc(), FLA_Househ2s_UT_l_opc(), FLA_Househ3UD_UT_opc(), and FLA_Nrm2_external().

◆ bl1_cscal()

void bl1_cscal ( int  n,
scomplex alpha,
scomplex x,
int  incx 
)
53 {
54 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
55  cblas_cscal( n,
56  alpha,
57  x, incx );
58 #else
59  F77_cscal( &n,
60  alpha,
61  x, &incx );
62 #endif
63 }
void F77_cscal(int *n, scomplex *alpha, scomplex *y, int *incy)
void cblas_cscal(const int N, const void *alpha, void *X, const int incX)

References cblas_cscal(), and F77_cscal().

Referenced by bl1_caxpysmt(), bl1_caxpysv(), bl1_cinvscalm(), bl1_cinvscalv(), bl1_cscalm(), bl1_cscalmr(), bl1_cscalv(), and FLA_SA_LU_unb().

◆ bl1_cscalm()

void bl1_cscalm ( conj1_t  conj,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs 
)
170 {
171  scomplex alpha_conj;
172  scomplex* a_begin;
173  int lda, inca;
174  int n_iter;
175  int n_elem;
176  int j;
177 
178  // Return early if possible.
179  if ( bl1_zero_dim2( m, n ) ) return;
180  if ( bl1_ceq1( alpha ) ) return;
181 
182  // Handle cases where A is a vector to ensure that the underlying axpy
183  // gets invoked only once.
184  if ( bl1_is_vector( m, n ) )
185  {
186  // Initialize with values appropriate for a vector.
187  n_iter = 1;
188  n_elem = bl1_vector_dim( m, n );
189  lda = 1; // multiplied by zero when n_iter == 1; not needed.
190  inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
191  }
192  else // matrix case
193  {
194  // Initialize with optimal values for column-major storage.
195  n_iter = n;
196  n_elem = m;
197  lda = a_cs;
198  inca = a_rs;
199 
200  // An optimization: if A is row-major, then let's access the matrix
201  // by rows instead of by columns to increase spatial locality.
202  if ( bl1_is_row_storage( a_rs, a_cs ) )
203  {
204  bl1_swap_ints( n_iter, n_elem );
205  bl1_swap_ints( lda, inca );
206  }
207  }
208 
209  bl1_ccopys( conj, alpha, &alpha_conj );
210 
211  for ( j = 0; j < n_iter; j++ )
212  {
213  a_begin = a + j*lda;
214 
215  bl1_cscal( n_elem,
216  &alpha_conj,
217  a_begin, inca );
218  }
219 }

References bl1_cscal(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_cgemm(), bl1_chemm(), bl1_csymm(), bl1_ctrmmsx(), bl1_ctrsmsx(), FLA_Lyap_h_opc_var1(), FLA_Lyap_h_opc_var2(), FLA_Lyap_h_opc_var3(), FLA_Lyap_h_opc_var4(), FLA_Lyap_n_opc_var1(), FLA_Lyap_n_opc_var2(), FLA_Lyap_n_opc_var3(), FLA_Lyap_n_opc_var4(), FLA_Scal_external(), and FLA_Scalc_external().

◆ bl1_cscalmr()

void bl1_cscalmr ( uplo1_t  uplo,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs 
)
182 {
183  scomplex* a_begin;
184  int lda, inca;
185  int n_iter;
186  int n_elem_max;
187  int n_elem;
188  int j;
189 
190  // Return early if possible.
191  if ( bl1_zero_dim2( m, n ) ) return;
192  if ( bl1_ceq1( alpha ) ) return;
193 
194  // We initialize for column-major.
195  n_iter = n;
196  n_elem_max = m;
197  lda = a_cs;
198  inca = a_rs;
199 
200  // An optimization: if A is row-major, then let's access the matrix
201  // by rows instead of by columns to increase spatial locality.
202  if ( bl1_is_row_storage( a_rs, a_cs ) )
203  {
204  bl1_swap_ints( n_iter, n_elem_max );
205  bl1_swap_ints( lda, inca );
206  bl1_toggle_uplo( uplo );
207  }
208 
209  if ( bl1_is_upper( uplo ) )
210  {
211  for ( j = 0; j < n_iter; j++ )
212  {
213  n_elem = bl1_min( j + 1, n_elem_max );
214  a_begin = a + j*lda;
215 
216  bl1_cscal( n_elem,
217  alpha,
218  a_begin, inca );
219  }
220  }
221  else // if ( bl1_is_lower( uplo ) )
222  {
223  for ( j = 0; j < n_iter; j++ )
224  {
225  n_elem = bl1_max( 0, n_elem_max - j );
226  a_begin = a + j*lda + j*inca;
227 
228  if ( n_elem <= 0 ) break;
229 
230  bl1_cscal( n_elem,
231  alpha,
232  a_begin, inca );
233  }
234  }
235 }

References bl1_cscal(), bl1_is_row_storage(), bl1_is_upper(), and bl1_zero_dim2().

Referenced by FLA_Scalr_external().

◆ bl1_cscalv()

void bl1_cscalv ( conj1_t  conj,
int  n,
scomplex alpha,
scomplex x,
int  incx 
)

◆ bl1_cscopymr()

void bl1_cscopymr ( uplo1_t  uplo,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)
529 {
530  scomplex* a_begin;
531  float* b_begin;
532  int lda, inca;
533  int ldb, incb;
534  int n_iter;
535  int n_elem_max;
536  int n_elem;
537  int j;
538 
539  // Return early if possible.
540  if ( bl1_zero_dim2( m, n ) ) return;
541 
542  // We initialize for column-major.
543  n_iter = n;
544  n_elem_max = m;
545  lda = a_cs;
546  inca = a_rs;
547  ldb = b_cs;
548  incb = b_rs;
549 
550  // An optimization: if B is row-major, then let's access the matrix
551  // by rows instead of by columns for increased spatial locality.
552  if ( bl1_is_row_storage( b_rs, b_cs ) )
553  {
554  bl1_swap_ints( n_iter, n_elem_max );
555  bl1_swap_ints( lda, inca );
556  bl1_swap_ints( ldb, incb );
557  bl1_toggle_uplo( uplo );
558  }
559 
560 
561  if ( bl1_is_upper( uplo ) )
562  {
563  for ( j = 0; j < n_iter; j++ )
564  {
565  n_elem = bl1_min( j + 1, n_elem_max );
566  a_begin = a + j*lda;
567  b_begin = b + j*ldb;
568 
570  n_elem,
571  a_begin, inca,
572  b_begin, incb );
573  }
574  }
575  else // if ( bl1_is_lower( uplo ) )
576  {
577  for ( j = 0; j < n_iter; j++ )
578  {
579  n_elem = bl1_max( 0, n_elem_max - j );
580  a_begin = a + j*lda + j*inca;
581  b_begin = b + j*ldb + j*incb;
582 
583  if ( n_elem <= 0 ) break;
584 
586  n_elem,
587  a_begin, inca,
588  b_begin, incb );
589  }
590  }
591 }
void bl1_cscopyv(conj1_t conj, int m, scomplex *x, int incx, float *y, int incy)
Definition: bl1_copyv.c:146

References bl1_cscopyv(), bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Copyr_external().

◆ bl1_cscopymrt()

void bl1_cscopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)
1285 {
1286  scomplex* a_begin;
1287  float* b_begin;
1288  int lda, inca;
1289  int ldb, incb;
1290  int n_iter;
1291  int n_elem;
1292  int n_elem_max;
1293  int n_elem_is_descending;
1294  int j;
1295  conj1_t conj;
1296 
1297  // Return early if possible.
1298  if ( bl1_zero_dim2( m, n ) ) return;
1299 
1300  // Initialize variables based on storage format of B and value of uplo.
1301  if ( bl1_is_col_storage( b_rs, b_cs ) )
1302  {
1303  if ( bl1_is_lower( uplo ) )
1304  {
1305  n_iter = bl1_min( m, n );
1306  n_elem_max = m;
1307  lda = a_cs;
1308  inca = a_rs;
1309  ldb = b_cs;
1310  incb = b_rs;
1311  n_elem_is_descending = TRUE;
1312  }
1313  else // if ( bl1_is_upper( uplo ) )
1314  {
1315  n_iter = n;
1316  n_elem_max = bl1_min( m, n );
1317  lda = a_cs;
1318  inca = a_rs;
1319  ldb = b_cs;
1320  incb = b_rs;
1321  n_elem_is_descending = FALSE;
1322  }
1323  }
1324  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
1325  {
1326  if ( bl1_is_lower( uplo ) )
1327  {
1328  n_iter = m;
1329  n_elem_max = bl1_min( m, n );
1330  lda = a_rs;
1331  inca = a_cs;
1332  ldb = b_rs;
1333  incb = b_cs;
1334  n_elem_is_descending = FALSE;
1335  }
1336  else // if ( bl1_is_upper( uplo ) )
1337  {
1338  n_iter = bl1_min( m, n );
1339  n_elem_max = n;
1340  lda = a_rs;
1341  inca = a_cs;
1342  ldb = b_rs;
1343  incb = b_cs;
1344  n_elem_is_descending = TRUE;
1345  }
1346  }
1347 
1348  // Swap lda and inca if we're doing a transpose.
1349  if ( bl1_does_trans( trans ) )
1350  {
1351  bl1_swap_ints( lda, inca );
1352  }
1353 
1354  // Extract conj component from trans parameter.
1355  conj = bl1_proj_trans1_to_conj( trans );
1356 
1357  // Choose the loop based on whether n_elem will be shrinking or growing
1358  // with each iteration.
1359  if ( n_elem_is_descending )
1360  {
1361  for ( j = 0; j < n_iter; j++ )
1362  {
1363  n_elem = n_elem_max - j;
1364  a_begin = a + j*lda + j*inca;
1365  b_begin = b + j*ldb + j*incb;
1366 
1367  bl1_cscopyv( conj,
1368  n_elem,
1369  a_begin, inca,
1370  b_begin, incb );
1371  }
1372  }
1373  else // if ( n_elem_is_ascending )
1374  {
1375  for ( j = 0; j < n_iter; j++ )
1376  {
1377  n_elem = bl1_min( j + 1, n_elem_max );
1378  a_begin = a + j*lda;
1379  b_begin = b + j*ldb;
1380 
1381  bl1_cscopyv( conj,
1382  n_elem,
1383  a_begin, inca,
1384  b_begin, incb );
1385  }
1386  }
1387 }

References bl1_cscopyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().

Referenced by FLA_Copyrt_external().

◆ bl1_cscopymt()

void bl1_cscopymt ( trans1_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)
629 {
630  scomplex* a_begin;
631  float* b_begin;
632  int lda, inca;
633  int ldb, incb;
634  int n_iter;
635  int n_elem;
636  int j;
637  conj1_t conj;
638 
639  // Return early if possible.
640  if ( bl1_zero_dim2( m, n ) ) return;
641 
642  // Handle cases where A and B are vectors to ensure that the underlying copy
643  // gets invoked only once.
644  if ( bl1_is_vector( m, n ) )
645  {
646  // Initialize with values appropriate for vectors.
647  n_iter = 1;
648  n_elem = bl1_vector_dim( m, n );
649  lda = 1; // multiplied by zero when n_iter == 1; not needed.
650  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
651  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
652  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
653  }
654  else // matrix case
655  {
656  // Initialize with optimal values for column-major storage of B.
657  n_iter = n;
658  n_elem = m;
659  lda = a_cs;
660  inca = a_rs;
661  ldb = b_cs;
662  incb = b_rs;
663 
664  // Handle the transposition of A.
665  if ( bl1_does_trans( trans ) )
666  {
667  bl1_swap_ints( lda, inca );
668  }
669 
670  // An optimization: if B is row-major, then let's access the matrix by rows
671  // instead of by columns for increased spatial locality.
672  if ( bl1_is_row_storage( b_rs, b_cs ) )
673  {
674  bl1_swap_ints( n_iter, n_elem );
675  bl1_swap_ints( lda, inca );
676  bl1_swap_ints( ldb, incb );
677  }
678  }
679 
680  // Extract conj component from trans parameter.
681  conj = bl1_proj_trans1_to_conj( trans );
682 
683  for ( j = 0; j < n_iter; ++j )
684  {
685  a_begin = a + j*lda;
686  b_begin = b + j*ldb;
687 
688  bl1_cscopyv( conj,
689  n_elem,
690  a_begin, inca,
691  b_begin, incb );
692  }
693 }

References bl1_cscopyv(), bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

◆ bl1_cscopyv()

void bl1_cscopyv ( conj1_t  conj,
int  m,
scomplex x,
int  incx,
float *  y,
int  incy 
)
147 {
148  scomplex* chi;
149  float* psi;
150  int i;
151 
152  // Return early if possible.
153  if ( bl1_zero_dim1( m ) ) return;
154 
155  // Initialize pointers.
156  chi = x;
157  psi = y;
158 
159  for ( i = 0; i < m; ++i )
160  {
161  *psi = chi->real;
162 
163  chi += incx;
164  psi += incy;
165  }
166 }

References bl1_zero_dim1(), i, and scomplex::real.

Referenced by bl1_cscopymr(), bl1_cscopymrt(), and bl1_cscopymt().

◆ bl1_csinvscalm()

void bl1_csinvscalm ( conj1_t  conj,
int  m,
int  n,
float *  alpha,
scomplex a,
int  a_rs,
int  a_cs 
)
118 {
119  float alpha_inv;
120  scomplex* a_begin;
121  int lda, inca;
122  int n_iter;
123  int n_elem;
124  int j;
125 
126  // Return early if possible.
127  if ( bl1_zero_dim2( m, n ) ) return;
128  if ( bl1_seq1( alpha ) ) return;
129 
130  // Handle cases where A is a vector to ensure that the underlying axpy
131  // gets invoked only once.
132  if ( bl1_is_vector( m, n ) )
133  {
134  // Initialize with values appropriate for a vector.
135  n_iter = 1;
136  n_elem = bl1_vector_dim( m, n );
137  lda = 1; // multiplied by zero when n_iter == 1; not needed.
138  inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
139  }
140  else // matrix case
141  {
142  // Initialize with optimal values for column-major storage.
143  n_iter = n;
144  n_elem = m;
145  lda = a_cs;
146  inca = a_rs;
147 
148  // An optimization: if A is row-major, then let's access the matrix
149  // by rows instead of by columns to increase spatial locality.
150  if ( bl1_is_row_storage( a_rs, a_cs ) )
151  {
152  bl1_swap_ints( n_iter, n_elem );
153  bl1_swap_ints( lda, inca );
154  }
155  }
156 
157  bl1_sinvert2s( conj, alpha, &alpha_inv );
158 
159  for ( j = 0; j < n_iter; j++ )
160  {
161  a_begin = a + j*lda;
162 
163  bl1_csscal( n_elem,
164  &alpha_inv,
165  a_begin, inca );
166  }
167 }
void bl1_csscal(int n, float *alpha, scomplex *x, int incx)
Definition: bl1_scal.c:39
void bl1_sinvert2s(conj1_t conj, float *alpha, float *beta)
Definition: bl1_invert2s.c:13

References bl1_csscal(), bl1_is_row_storage(), bl1_is_vector(), bl1_sinvert2s(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Inv_scal_external(), and FLA_Inv_scalc_external().

◆ bl1_csinvscalv()

void bl1_csinvscalv ( conj1_t  conj,
int  n,
float *  alpha,
scomplex x,
int  incx 
)
40 {
41  float alpha_inv;
42 
43  if ( bl1_seq1( alpha ) ) return;
44 
45  alpha_inv = 1.0F / *alpha;
46 
47  bl1_csscal( n,
48  &alpha_inv,
49  x, incx );
50 }

References bl1_csscal().

◆ bl1_csscal()

void bl1_csscal ( int  n,
float *  alpha,
scomplex x,
int  incx 
)
40 {
41 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
42  cblas_csscal( n,
43  *alpha,
44  x, incx );
45 #else
46  F77_csscal( &n,
47  alpha,
48  x, &incx );
49 #endif
50 }
void F77_csscal(int *n, float *alpha, scomplex *y, int *incy)
void cblas_csscal(const int N, const float alpha, void *X, const int incX)

References cblas_csscal(), and F77_csscal().

Referenced by bl1_csinvscalm(), bl1_csinvscalv(), bl1_csscalm(), bl1_csscalmr(), and bl1_csscalv().

◆ bl1_csscalm()

void bl1_csscalm ( conj1_t  conj,
int  m,
int  n,
float *  alpha,
scomplex a,
int  a_rs,
int  a_cs 
)
118 {
119  float alpha_conj;
120  scomplex* a_begin;
121  int lda, inca;
122  int n_iter;
123  int n_elem;
124  int j;
125 
126  // Return early if possible.
127  if ( bl1_zero_dim2( m, n ) ) return;
128  if ( bl1_seq1( alpha ) ) return;
129 
130  // Handle cases where A is a vector to ensure that the underlying axpy
131  // gets invoked only once.
132  if ( bl1_is_vector( m, n ) )
133  {
134  // Initialize with values appropriate for a vector.
135  n_iter = 1;
136  n_elem = bl1_vector_dim( m, n );
137  lda = 1; // multiplied by zero when n_iter == 1; not needed.
138  inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
139  }
140  else // matrix case
141  {
142  // Initialize with optimal values for column-major storage.
143  n_iter = n;
144  n_elem = m;
145  lda = a_cs;
146  inca = a_rs;
147 
148  // An optimization: if A is row-major, then let's access the matrix
149  // by rows instead of by columns to increase spatial locality.
150  if ( bl1_is_row_storage( a_rs, a_cs ) )
151  {
152  bl1_swap_ints( n_iter, n_elem );
153  bl1_swap_ints( lda, inca );
154  }
155  }
156 
157  bl1_scopys( conj, alpha, &alpha_conj );
158 
159  for ( j = 0; j < n_iter; j++ )
160  {
161  a_begin = a + j*lda;
162 
163  bl1_csscal( n_elem,
164  &alpha_conj,
165  a_begin, inca );
166  }
167 }

References bl1_csscal(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Scal_external(), and FLA_Scalc_external().

◆ bl1_csscalmr()

void bl1_csscalmr ( uplo1_t  uplo,
int  m,
int  n,
float *  alpha,
scomplex a,
int  a_rs,
int  a_cs 
)
126 {
127  scomplex* a_begin;
128  int lda, inca;
129  int n_iter;
130  int n_elem_max;
131  int n_elem;
132  int j;
133 
134  // Return early if possible.
135  if ( bl1_zero_dim2( m, n ) ) return;
136  if ( bl1_seq1( alpha ) ) return;
137 
138  // We initialize for column-major.
139  n_iter = n;
140  n_elem_max = m;
141  lda = a_cs;
142  inca = a_rs;
143 
144  // An optimization: if A is row-major, then let's access the matrix
145  // by rows instead of by columns to increase spatial locality.
146  if ( bl1_is_row_storage( a_rs, a_cs ) )
147  {
148  bl1_swap_ints( n_iter, n_elem_max );
149  bl1_swap_ints( lda, inca );
150  bl1_toggle_uplo( uplo );
151  }
152 
153  if ( bl1_is_upper( uplo ) )
154  {
155  for ( j = 0; j < n_iter; j++ )
156  {
157  n_elem = bl1_min( j + 1, n_elem_max );
158  a_begin = a + j*lda;
159 
160  bl1_csscal( n_elem,
161  alpha,
162  a_begin, inca );
163  }
164  }
165  else // if ( bl1_is_lower( uplo ) )
166  {
167  for ( j = 0; j < n_iter; j++ )
168  {
169  n_elem = bl1_max( 0, n_elem_max - j );
170  a_begin = a + j*lda + j*inca;
171 
172  if ( n_elem <= 0 ) break;
173 
174  bl1_csscal( n_elem,
175  alpha,
176  a_begin, inca );
177  }
178  }
179 }

References bl1_csscal(), bl1_is_row_storage(), bl1_is_upper(), and bl1_zero_dim2().

Referenced by bl1_cher2k(), bl1_cherk(), and FLA_Scalr_external().

◆ bl1_csscalv()

void bl1_csscalv ( conj1_t  conj,
int  n,
float *  alpha,
scomplex x,
int  incx 
)
36 {
37  // Return early if possible.
38  if ( bl1_zero_dim1( n ) ) return;
39  if ( bl1_seq1( alpha ) ) return;
40 
41  bl1_csscal( n,
42  alpha,
43  x, incx );
44 }

References bl1_csscal(), and bl1_zero_dim1().

Referenced by bl1_csapdiagmv(), FLA_Bsvd_ext_opc_var1(), and FLA_Bsvd_v_opc_var1().

◆ bl1_cswap()

void bl1_cswap ( int  n,
scomplex x,
int  incx,
scomplex y,
int  incy 
)
40 {
41 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
42  cblas_cswap( n,
43  x, incx,
44  y, incy );
45 #else
46  F77_cswap( &n,
47  x, &incx,
48  y, &incy );
49 #endif
50 }
void F77_cswap(int *n, scomplex *x, int *incx, scomplex *y, int *incy)
void cblas_cswap(const int N, void *X, const int incX, void *Y, const int incY)

References cblas_cswap(), and F77_cswap().

Referenced by bl1_cswapmt(), bl1_cswapv(), FLA_SA_Apply_pivots(), and FLA_SA_LU_unb().

◆ bl1_cswapmt()

void bl1_cswapmt ( trans1_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
148 {
149  scomplex* a_begin;
150  scomplex* b_begin;
151  int lda, inca;
152  int ldb, incb;
153  int n_iter;
154  int n_elem;
155  int j;
156 
157  // Return early if possible.
158  if ( bl1_zero_dim2( m, n ) ) return;
159 
160  // Handle cases where A and B are vectors to ensure that the underlying copy
161  // gets invoked only once.
162  if ( bl1_is_vector( m, n ) )
163  {
164  // Initialize with values appropriate for vectors.
165  n_iter = 1;
166  n_elem = bl1_vector_dim( m, n );
167  lda = 1; // multiplied by zero when n_iter == 1; not needed.
168  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
169  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
170  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
171  }
172  else // matrix case
173  {
174  // Initialize with optimal values for column-major storage.
175  n_iter = n;
176  n_elem = m;
177  lda = a_cs;
178  inca = a_rs;
179  ldb = b_cs;
180  incb = b_rs;
181 
182  // Handle the transposition of A.
183  if ( bl1_does_trans( trans ) )
184  {
185  bl1_swap_ints( lda, inca );
186  }
187 
188  // An optimization: if B is row-major and if A is effectively row-major
189  // after a possible transposition, then let's access the matrix by rows
190  // instead of by columns for increased spatial locality.
191  if ( bl1_is_row_storage( b_rs, b_cs ) )
192  {
193  if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
194  ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
195  {
196  bl1_swap_ints( n_iter, n_elem );
197  bl1_swap_ints( lda, inca );
198  bl1_swap_ints( ldb, incb );
199  }
200  }
201  }
202 
203  for ( j = 0; j < n_iter; j++ )
204  {
205  a_begin = a + j*lda;
206  b_begin = b + j*ldb;
207 
208  bl1_cswap( n_elem,
209  a_begin, inca,
210  b_begin, incb );
211 
212  if ( bl1_does_conj( trans ) )
213  bl1_cconjv( n_elem,
214  a_begin, inca );
215 
216  if ( bl1_does_conj( trans ) )
217  bl1_cconjv( n_elem,
218  b_begin, incb );
219  }
220 }
void bl1_cswap(int n, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_swap.c:39

References bl1_cconjv(), bl1_cswap(), bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Swap_external(), and FLA_Swapt_external().

◆ bl1_cswapv()

void bl1_cswapv ( int  n,
scomplex x,
int  incx,
scomplex y,
int  incy 
)
34 {
35  // Return early if possible.
36  if ( bl1_zero_dim1( n ) ) return;
37 
38  bl1_cswap( n,
39  x, incx,
40  y, incy );
41 }

References bl1_cswap(), and bl1_zero_dim1().

Referenced by FLA_Apply_pivots_macro_external(), FLA_Sort_bsvd_ext_b_opc(), and FLA_Sort_bsvd_ext_f_opc().

◆ bl1_czcopymr()

void bl1_czcopymr ( uplo1_t  uplo,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
1117 {
1118  scomplex* a_begin;
1119  dcomplex* b_begin;
1120  int lda, inca;
1121  int ldb, incb;
1122  int n_iter;
1123  int n_elem_max;
1124  int n_elem;
1125  int j;
1126 
1127  // Return early if possible.
1128  if ( bl1_zero_dim2( m, n ) ) return;
1129 
1130  // We initialize for column-major.
1131  n_iter = n;
1132  n_elem_max = m;
1133  lda = a_cs;
1134  inca = a_rs;
1135  ldb = b_cs;
1136  incb = b_rs;
1137 
1138  // An optimization: if B is row-major, then let's access the matrix
1139  // by rows instead of by columns for increased spatial locality.
1140  if ( bl1_is_row_storage( b_rs, b_cs ) )
1141  {
1142  bl1_swap_ints( n_iter, n_elem_max );
1143  bl1_swap_ints( lda, inca );
1144  bl1_swap_ints( ldb, incb );
1145  bl1_toggle_uplo( uplo );
1146  }
1147 
1148 
1149  if ( bl1_is_upper( uplo ) )
1150  {
1151  for ( j = 0; j < n_iter; j++ )
1152  {
1153  n_elem = bl1_min( j + 1, n_elem_max );
1154  a_begin = a + j*lda;
1155  b_begin = b + j*ldb;
1156 
1158  n_elem,
1159  a_begin, inca,
1160  b_begin, incb );
1161  }
1162  }
1163  else // if ( bl1_is_lower( uplo ) )
1164  {
1165  for ( j = 0; j < n_iter; j++ )
1166  {
1167  n_elem = bl1_max( 0, n_elem_max - j );
1168  a_begin = a + j*lda + j*inca;
1169  b_begin = b + j*ldb + j*incb;
1170 
1171  if ( n_elem <= 0 ) break;
1172 
1174  n_elem,
1175  a_begin, inca,
1176  b_begin, incb );
1177  }
1178  }
1179 }
void bl1_czcopyv(conj1_t conj, int m, scomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_copyv.c:304
Definition: blis_type_defs.h:138

References bl1_czcopyv(), bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Copyr_external().

◆ bl1_czcopymrt()

void bl1_czcopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
1603 {
1604  scomplex* a_begin;
1605  dcomplex* b_begin;
1606  int lda, inca;
1607  int ldb, incb;
1608  int n_iter;
1609  int n_elem;
1610  int n_elem_max;
1611  int n_elem_is_descending;
1612  int j;
1613  conj1_t conj;
1614 
1615  // Return early if possible.
1616  if ( bl1_zero_dim2( m, n ) ) return;
1617 
1618  // Initialize variables based on storage format of B and value of uplo.
1619  if ( bl1_is_col_storage( b_rs, b_cs ) )
1620  {
1621  if ( bl1_is_lower( uplo ) )
1622  {
1623  n_iter = bl1_min( m, n );
1624  n_elem_max = m;
1625  lda = a_cs;
1626  inca = a_rs;
1627  ldb = b_cs;
1628  incb = b_rs;
1629  n_elem_is_descending = TRUE;
1630  }
1631  else // if ( bl1_is_upper( uplo ) )
1632  {
1633  n_iter = n;
1634  n_elem_max = bl1_min( m, n );
1635  lda = a_cs;
1636  inca = a_rs;
1637  ldb = b_cs;
1638  incb = b_rs;
1639  n_elem_is_descending = FALSE;
1640  }
1641  }
1642  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
1643  {
1644  if ( bl1_is_lower( uplo ) )
1645  {
1646  n_iter = m;
1647  n_elem_max = bl1_min( m, n );
1648  lda = a_rs;
1649  inca = a_cs;
1650  ldb = b_rs;
1651  incb = b_cs;
1652  n_elem_is_descending = FALSE;
1653  }
1654  else // if ( bl1_is_upper( uplo ) )
1655  {
1656  n_iter = bl1_min( m, n );
1657  n_elem_max = n;
1658  lda = a_rs;
1659  inca = a_cs;
1660  ldb = b_rs;
1661  incb = b_cs;
1662  n_elem_is_descending = TRUE;
1663  }
1664  }
1665 
1666  // Swap lda and inca if we're doing a transpose.
1667  if ( bl1_does_trans( trans ) )
1668  {
1669  bl1_swap_ints( lda, inca );
1670  }
1671 
1672  // Extract conj component from trans parameter.
1673  conj = bl1_proj_trans1_to_conj( trans );
1674 
1675  // Choose the loop based on whether n_elem will be shrinking or growing
1676  // with each iteration.
1677  if ( n_elem_is_descending )
1678  {
1679  for ( j = 0; j < n_iter; j++ )
1680  {
1681  n_elem = n_elem_max - j;
1682  a_begin = a + j*lda + j*inca;
1683  b_begin = b + j*ldb + j*incb;
1684 
1685  bl1_czcopyv( conj,
1686  n_elem,
1687  a_begin, inca,
1688  b_begin, incb );
1689  }
1690  }
1691  else // if ( n_elem_is_ascending )
1692  {
1693  for ( j = 0; j < n_iter; j++ )
1694  {
1695  n_elem = bl1_min( j + 1, n_elem_max );
1696  a_begin = a + j*lda;
1697  b_begin = b + j*ldb;
1698 
1699  bl1_czcopyv( conj,
1700  n_elem,
1701  a_begin, inca,
1702  b_begin, incb );
1703  }
1704  }
1705 }

References bl1_czcopyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().

Referenced by FLA_Copyrt_external().

◆ bl1_czcopymt()

void bl1_czcopymt ( trans1_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
1235 {
1236  scomplex* a_begin;
1237  dcomplex* b_begin;
1238  int lda, inca;
1239  int ldb, incb;
1240  int n_iter;
1241  int n_elem;
1242  int j;
1243  conj1_t conj;
1244 
1245  // Return early if possible.
1246  if ( bl1_zero_dim2( m, n ) ) return;
1247 
1248  // Handle cases where A and B are vectors to ensure that the underlying copy
1249  // gets invoked only once.
1250  if ( bl1_is_vector( m, n ) )
1251  {
1252  // Initialize with values appropriate for vectors.
1253  n_iter = 1;
1254  n_elem = bl1_vector_dim( m, n );
1255  lda = 1; // multiplied by zero when n_iter == 1; not needed.
1256  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
1257  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
1258  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
1259  }
1260  else // matrix case
1261  {
1262  // Initialize with optimal values for column-major storage of B.
1263  n_iter = n;
1264  n_elem = m;
1265  lda = a_cs;
1266  inca = a_rs;
1267  ldb = b_cs;
1268  incb = b_rs;
1269 
1270  // Handle the transposition of A.
1271  if ( bl1_does_trans( trans ) )
1272  {
1273  bl1_swap_ints( lda, inca );
1274  }
1275 
1276  // An optimization: if B is row-major, then let's access the matrix by rows
1277  // instead of by columns for increased spatial locality.
1278  if ( bl1_is_row_storage( b_rs, b_cs ) )
1279  {
1280  bl1_swap_ints( n_iter, n_elem );
1281  bl1_swap_ints( lda, inca );
1282  bl1_swap_ints( ldb, incb );
1283  }
1284  }
1285 
1286  // Extract conj component from trans parameter.
1287  conj = bl1_proj_trans1_to_conj( trans );
1288 
1289  for ( j = 0; j < n_iter; ++j )
1290  {
1291  a_begin = a + j*lda;
1292  b_begin = b + j*ldb;
1293 
1294  bl1_czcopyv( conj,
1295  n_elem,
1296  a_begin, inca,
1297  b_begin, incb );
1298  }
1299 }

References bl1_czcopyv(), bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

◆ bl1_czcopyv()

void bl1_czcopyv ( conj1_t  conj,
int  m,
scomplex x,
int  incx,
dcomplex y,
int  incy 
)
305 {
306  scomplex* chi;
307  dcomplex* psi;
308  int i;
309 
310  // Return early if possible.
311  if ( bl1_zero_dim1( m ) ) return;
312 
313  // Initialize pointers.
314  chi = x;
315  psi = y;
316 
317  for ( i = 0; i < m; ++i )
318  {
319  psi->real = chi->real;
320  psi->imag = chi->imag;
321 
322  chi += incx;
323  psi += incy;
324  }
325 
326  if ( bl1_is_conj( conj ) )
327  bl1_zconjv( m,
328  y, incy );
329 }
void bl1_zconjv(int m, dcomplex *x, int incx)
Definition: bl1_conjv.c:34
double real
Definition: blis_type_defs.h:139
double imag
Definition: blis_type_defs.h:139

References bl1_is_conj(), bl1_zconjv(), bl1_zero_dim1(), i, scomplex::imag, dcomplex::imag, scomplex::real, and dcomplex::real.

Referenced by bl1_czcopymr(), bl1_czcopymrt(), and bl1_czcopymt().

◆ bl1_damax()

void bl1_damax ( int  n,
double *  x,
int  incx,
int *  index 
)
25 {
26 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
27  *index = cblas_idamax( n,
28  x, incx );
29 #else
30  *index = F77_idamax( &n,
31  x, &incx ) - 1;
32 #endif
33 }
int F77_idamax(int *n, double *x, int *incx)
CBLAS_INDEX cblas_idamax(const int N, const double *X, const int incX)

References cblas_idamax(), and F77_idamax().

Referenced by FLA_Amax_external(), FLA_LU_piv_opd_var3(), FLA_LU_piv_opd_var4(), FLA_LU_piv_opd_var5(), and FLA_SA_LU_unb().

◆ bl1_dasum()

void bl1_dasum ( int  n,
double *  x,
int  incx,
double *  norm 
)
25 {
26 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
27  *norm = cblas_dasum( n,
28  x, incx );
29 #else
30  *norm = F77_dasum( &n,
31  x, &incx );
32 #endif
33 }
double F77_dasum(int *n, double *x, int *incx)
double cblas_dasum(const int N, const double *X, const int incX)

References cblas_dasum(), and F77_dasum().

Referenced by FLA_Asum_external().

◆ bl1_daxpy()

void bl1_daxpy ( int  n,
double *  alpha,
double *  x,
int  incx,
double *  y,
int  incy 
)
29 {
30 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
31  cblas_daxpy( n,
32  *alpha,
33  x, incx,
34  y, incy );
35 #else
36  F77_daxpy( &n,
37  alpha,
38  x, &incx,
39  y, &incy );
40 #endif
41 }
void F77_daxpy(int *n, double *alpha, double *x, int *incx, double *y, int *incy)
void cblas_daxpy(const int N, const double alpha, const double *X, const int incX, double *Y, const int incY)

References cblas_daxpy(), and F77_daxpy().

Referenced by bl1_daxpymt(), bl1_daxpysmt(), bl1_daxpysv(), and bl1_daxpyv().

◆ bl1_daxpymrt()

void bl1_daxpymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)
121 {
122  double* a_begin;
123  double* b_begin;
124  int lda, inca;
125  int ldb, incb;
126  int n_iter;
127  int n_elem;
128  int n_elem_max;
129  int n_elem_is_descending;
130  int j;
131  conj1_t conj;
132 
133  // Return early if possible.
134  if ( bl1_zero_dim2( m, n ) ) return;
135 
136  // Initialize variables based on storage format of B and value of uplo.
137  if ( bl1_is_col_storage( b_rs, b_cs ) )
138  {
139  if ( bl1_is_lower( uplo ) )
140  {
141  n_iter = bl1_min( m, n );
142  n_elem_max = m;
143  lda = a_cs;
144  inca = a_rs;
145  ldb = b_cs;
146  incb = b_rs;
147  n_elem_is_descending = TRUE;
148  }
149  else // if ( bl1_is_upper( uplo ) )
150  {
151  n_iter = n;
152  n_elem_max = bl1_min( m, n );
153  lda = a_cs;
154  inca = a_rs;
155  ldb = b_cs;
156  incb = b_rs;
157  n_elem_is_descending = FALSE;
158  }
159  }
160  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
161  {
162  if ( bl1_is_lower( uplo ) )
163  {
164  n_iter = m;
165  n_elem_max = bl1_min( m, n );
166  lda = a_rs;
167  inca = a_cs;
168  ldb = b_rs;
169  incb = b_cs;
170  n_elem_is_descending = FALSE;
171  }
172  else // if ( bl1_is_upper( uplo ) )
173  {
174  n_iter = bl1_min( m, n );
175  n_elem_max = n;
176  lda = a_rs;
177  inca = a_cs;
178  ldb = b_rs;
179  incb = b_cs;
180  n_elem_is_descending = TRUE;
181  }
182  }
183 
184  // Swap lda and inca if we're doing a transpose.
185  if ( bl1_does_trans( trans ) )
186  {
187  bl1_swap_ints( lda, inca );
188  }
189 
190  // Extract conj component from trans parameter.
191  conj = bl1_proj_trans1_to_conj( trans );
192 
193  // Choose the loop based on whether n_elem will be shrinking or growing
194  // with each iteration.
195  if ( n_elem_is_descending )
196  {
197  for ( j = 0; j < n_iter; j++ )
198  {
199  n_elem = n_elem_max - j;
200  a_begin = a + j*lda + j*inca;
201  b_begin = b + j*ldb + j*incb;
202 
203  bl1_daxpyv( conj,
204  n_elem,
205  alpha,
206  a_begin, inca,
207  b_begin, incb );
208  }
209  }
210  else // if ( n_elem_is_ascending )
211  {
212  for ( j = 0; j < n_iter; j++ )
213  {
214  n_elem = bl1_min( j + 1, n_elem_max );
215  a_begin = a + j*lda;
216  b_begin = b + j*ldb;
217 
218  bl1_daxpyv( conj,
219  n_elem,
220  alpha,
221  a_begin, inca,
222  b_begin, incb );
223  }
224  }
225 }
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition: bl1_axpyv.c:21

References bl1_daxpyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().

Referenced by FLA_Axpyrt_external().

◆ bl1_daxpymt()

void bl1_daxpymt ( trans1_t  trans,
int  m,
int  n,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)
82 {
83  double* a_begin;
84  double* b_begin;
85  int lda, inca;
86  int ldb, incb;
87  int n_iter;
88  int n_elem;
89  int j;
90 
91  // Return early if possible.
92  if ( bl1_zero_dim2( m, n ) ) return;
93 
94  // Handle cases where A and B are vectors to ensure that the underlying axpy
95  // gets invoked only once.
96  if ( bl1_is_vector( m, n ) )
97  {
98  // Initialize with values appropriate for vectors.
99  n_iter = 1;
100  n_elem = bl1_vector_dim( m, n );
101  lda = 1; // multiplied by zero when n_iter == 1; not needed.
102  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
103  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
104  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
105  }
106  else // matrix case
107  {
108  // Initialize with optimal values for column-major storage.
109  n_iter = n;
110  n_elem = m;
111  lda = a_cs;
112  inca = a_rs;
113  ldb = b_cs;
114  incb = b_rs;
115 
116  // Handle the transposition of A.
117  if ( bl1_does_trans( trans ) )
118  {
119  bl1_swap_ints( lda, inca );
120  }
121 
122  // An optimization: if B is row-major and if A is effectively row-major
123  // after a possible transposition, then let's access the matrices by rows
124  // instead of by columns for increased spatial locality.
125  if ( bl1_is_row_storage( b_rs, b_cs ) )
126  {
127  if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
128  ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
129  {
130  bl1_swap_ints( n_iter, n_elem );
131  bl1_swap_ints( lda, inca );
132  bl1_swap_ints( ldb, incb );
133  }
134  }
135  }
136 
137  for ( j = 0; j < n_iter; j++ )
138  {
139  a_begin = a + j*lda;
140  b_begin = b + j*ldb;
141 
142  bl1_daxpy( n_elem,
143  alpha,
144  a_begin, inca,
145  b_begin, incb );
146  }
147 }
void bl1_daxpy(int n, double *alpha, double *x, int incx, double *y, int incy)
Definition: bl1_axpy.c:28

References bl1_daxpy(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_dgemm(), bl1_dsymm(), bl1_dtrmmsx(), bl1_dtrsmsx(), FLA_Axpy_external(), and FLA_Axpyt_external().

◆ bl1_daxpysmt()

void bl1_daxpysmt ( trans1_t  trans,
int  m,
int  n,
double *  alpha0,
double *  alpha1,
double *  a,
int  a_rs,
int  a_cs,
double *  beta,
double *  b,
int  b_rs,
int  b_cs 
)
89 {
90  double* a_begin;
91  double* b_begin;
92  double alpha_prod;
93  int lda, inca;
94  int ldb, incb;
95  int n_iter;
96  int n_elem;
97  int j;
98 
99  // Return early if possible.
100  if ( bl1_zero_dim2( m, n ) ) return;
101 
102  alpha_prod = (*alpha0) * (*alpha1);
103 
104  // Handle cases where A and B are vectors to ensure that the underlying axpy
105  // gets invoked only once.
106  if ( bl1_is_vector( m, n ) )
107  {
108  // Initialize with values appropriate for vectors.
109  n_iter = 1;
110  n_elem = bl1_vector_dim( m, n );
111  lda = 1; // multiplied by zero when n_iter == 1; not needed.
112  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
113  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
114  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
115  }
116  else // matrix case
117  {
118  // Initialize with optimal values for column-major storage.
119  n_iter = n;
120  n_elem = m;
121  lda = a_cs;
122  inca = a_rs;
123  ldb = b_cs;
124  incb = b_rs;
125 
126  // Handle the transposition of A.
127  if ( bl1_does_trans( trans ) )
128  {
129  bl1_swap_ints( lda, inca );
130  }
131 
132  // An optimization: if B is row-major and if A is effectively row-major
133  // after a possible transposition, then let's access the matrices by rows
134  // instead of by columns for increased spatial locality.
135  if ( bl1_is_row_storage( b_rs, b_cs ) )
136  {
137  if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
138  ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
139  {
140  bl1_swap_ints( n_iter, n_elem );
141  bl1_swap_ints( lda, inca );
142  bl1_swap_ints( ldb, incb );
143  }
144  }
145  }
146 
147  for ( j = 0; j < n_iter; j++ )
148  {
149  a_begin = a + j*lda;
150  b_begin = b + j*ldb;
151 
152  bl1_dscal( n_elem,
153  beta,
154  b_begin, incb );
155 
156  bl1_daxpy( n_elem,
157  &alpha_prod,
158  a_begin, inca,
159  b_begin, incb );
160  }
161 }
void bl1_dscal(int n, double *alpha, double *x, int incx)
Definition: bl1_scal.c:26

References bl1_daxpy(), bl1_does_notrans(), bl1_does_trans(), bl1_dscal(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Axpys_external().

◆ bl1_daxpysv()

void bl1_daxpysv ( int  n,
double *  alpha0,
double *  alpha1,
double *  x,
int  incx,
double *  beta,
double *  y,
int  incy 
)
33 {
34  double alpha_prod;
35 
36  // Return early if possible.
37  if ( bl1_zero_dim1( n ) ) return;
38 
39  alpha_prod = (*alpha0) * (*alpha1);
40 
41  bl1_dscal( n,
42  beta,
43  y, incy );
44 
45  bl1_daxpy( n,
46  &alpha_prod,
47  x, incx,
48  y, incy );
49 }

References bl1_daxpy(), bl1_dscal(), and bl1_zero_dim1().

Referenced by FLA_Lyap_h_opd_var2(), FLA_Lyap_h_opd_var3(), FLA_Lyap_h_opd_var4(), FLA_Lyap_n_opd_var2(), FLA_Lyap_n_opd_var3(), and FLA_Lyap_n_opd_var4().

◆ bl1_daxpyv()

void bl1_daxpyv ( conj1_t  conj,
int  n,
double *  alpha,
double *  x,
int  incx,
double *  y,
int  incy 
)
22 {
23  bl1_daxpy( n,
24  alpha,
25  x, incx,
26  y, incy );
27 }

References bl1_daxpy().

Referenced by bl1_daxpymrt(), bl1_dtrmvsx(), bl1_dtrsvsx(), FLA_Apply_H2_UT_l_opd_var1(), FLA_Apply_H2_UT_r_opd_var1(), FLA_Apply_HUD_UT_l_opd_var1(), FLA_Bidiag_UT_u_step_ofd_var2(), FLA_Bidiag_UT_u_step_ofd_var3(), FLA_Bidiag_UT_u_step_ofd_var4(), FLA_Bidiag_UT_u_step_opd_var2(), FLA_Bidiag_UT_u_step_opd_var3(), FLA_Bidiag_UT_u_step_opd_var4(), FLA_Bidiag_UT_u_step_opd_var5(), FLA_Eig_gest_il_opd_var1(), FLA_Eig_gest_il_opd_var2(), FLA_Eig_gest_il_opd_var3(), FLA_Eig_gest_il_opd_var4(), FLA_Eig_gest_il_opd_var5(), FLA_Eig_gest_iu_opd_var1(), FLA_Eig_gest_iu_opd_var2(), FLA_Eig_gest_iu_opd_var3(), FLA_Eig_gest_iu_opd_var4(), FLA_Eig_gest_iu_opd_var5(), FLA_Eig_gest_nl_opd_var1(), FLA_Eig_gest_nl_opd_var2(), FLA_Eig_gest_nl_opd_var4(), FLA_Eig_gest_nl_opd_var5(), FLA_Eig_gest_nu_opd_var1(), FLA_Eig_gest_nu_opd_var2(), FLA_Eig_gest_nu_opd_var4(), FLA_Eig_gest_nu_opd_var5(), FLA_Fused_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_Uhu_Yhu_Zhu_opd_var1(), FLA_Fused_UZhu_ZUhu_opd_var1(), FLA_Hess_UT_step_ofd_var2(), FLA_Hess_UT_step_ofd_var3(), FLA_Hess_UT_step_ofd_var4(), FLA_Hess_UT_step_opd_var2(), FLA_Hess_UT_step_opd_var3(), FLA_Hess_UT_step_opd_var4(), FLA_Hess_UT_step_opd_var5(), FLA_Tridiag_UT_l_step_ofd_var2(), FLA_Tridiag_UT_l_step_ofd_var3(), FLA_Tridiag_UT_l_step_opd_var1(), FLA_Tridiag_UT_l_step_opd_var2(), and FLA_Tridiag_UT_l_step_opd_var3().

◆ bl1_dccopymr()

void bl1_dccopymr ( uplo1_t  uplo,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
791 {
792  double* a_begin;
793  scomplex* b_begin;
794  int lda, inca;
795  int ldb, incb;
796  int n_iter;
797  int n_elem_max;
798  int n_elem;
799  int j;
800 
801  // Return early if possible.
802  if ( bl1_zero_dim2( m, n ) ) return;
803 
804  // We initialize for column-major.
805  n_iter = n;
806  n_elem_max = m;
807  lda = a_cs;
808  inca = a_rs;
809  ldb = b_cs;
810  incb = b_rs;
811 
812  // An optimization: if B is row-major, then let's access the matrix
813  // by rows instead of by columns for increased spatial locality.
814  if ( bl1_is_row_storage( b_rs, b_cs ) )
815  {
816  bl1_swap_ints( n_iter, n_elem_max );
817  bl1_swap_ints( lda, inca );
818  bl1_swap_ints( ldb, incb );
819  bl1_toggle_uplo( uplo );
820  }
821 
822 
823  if ( bl1_is_upper( uplo ) )
824  {
825  for ( j = 0; j < n_iter; j++ )
826  {
827  n_elem = bl1_min( j + 1, n_elem_max );
828  a_begin = a + j*lda;
829  b_begin = b + j*ldb;
830 
832  n_elem,
833  a_begin, inca,
834  b_begin, incb );
835  }
836  }
837  else // if ( bl1_is_lower( uplo ) )
838  {
839  for ( j = 0; j < n_iter; j++ )
840  {
841  n_elem = bl1_max( 0, n_elem_max - j );
842  a_begin = a + j*lda + j*inca;
843  b_begin = b + j*ldb + j*incb;
844 
845  if ( n_elem <= 0 ) break;
846 
848  n_elem,
849  a_begin, inca,
850  b_begin, incb );
851  }
852  }
853 }
void bl1_dccopyv(conj1_t conj, int m, double *x, int incx, scomplex *y, int incy)
Definition: bl1_copyv.c:214

References bl1_dccopyv(), bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Copyr_external().

◆ bl1_dccopymrt()

void bl1_dccopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
1073 {
1074  double* a_begin;
1075  scomplex* b_begin;
1076  int lda, inca;
1077  int ldb, incb;
1078  int n_iter;
1079  int n_elem;
1080  int n_elem_max;
1081  int n_elem_is_descending;
1082  int j;
1083  conj1_t conj;
1084 
1085  // Return early if possible.
1086  if ( bl1_zero_dim2( m, n ) ) return;
1087 
1088  // Initialize variables based on storage format of B and value of uplo.
1089  if ( bl1_is_col_storage( b_rs, b_cs ) )
1090  {
1091  if ( bl1_is_lower( uplo ) )
1092  {
1093  n_iter = bl1_min( m, n );
1094  n_elem_max = m;
1095  lda = a_cs;
1096  inca = a_rs;
1097  ldb = b_cs;
1098  incb = b_rs;
1099  n_elem_is_descending = TRUE;
1100  }
1101  else // if ( bl1_is_upper( uplo ) )
1102  {
1103  n_iter = n;
1104  n_elem_max = bl1_min( m, n );
1105  lda = a_cs;
1106  inca = a_rs;
1107  ldb = b_cs;
1108  incb = b_rs;
1109  n_elem_is_descending = FALSE;
1110  }
1111  }
1112  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
1113  {
1114  if ( bl1_is_lower( uplo ) )
1115  {
1116  n_iter = m;
1117  n_elem_max = bl1_min( m, n );
1118  lda = a_rs;
1119  inca = a_cs;
1120  ldb = b_rs;
1121  incb = b_cs;
1122  n_elem_is_descending = FALSE;
1123  }
1124  else // if ( bl1_is_upper( uplo ) )
1125  {
1126  n_iter = bl1_min( m, n );
1127  n_elem_max = n;
1128  lda = a_rs;
1129  inca = a_cs;
1130  ldb = b_rs;
1131  incb = b_cs;
1132  n_elem_is_descending = TRUE;
1133  }
1134  }
1135 
1136  // Swap lda and inca if we're doing a transpose.
1137  if ( bl1_does_trans( trans ) )
1138  {
1139  bl1_swap_ints( lda, inca );
1140  }
1141 
1142  // Extract conj component from trans parameter.
1143  conj = bl1_proj_trans1_to_conj( trans );
1144 
1145  // Choose the loop based on whether n_elem will be shrinking or growing
1146  // with each iteration.
1147  if ( n_elem_is_descending )
1148  {
1149  for ( j = 0; j < n_iter; j++ )
1150  {
1151  n_elem = n_elem_max - j;
1152  a_begin = a + j*lda + j*inca;
1153  b_begin = b + j*ldb + j*incb;
1154 
1155  bl1_dccopyv( conj,
1156  n_elem,
1157  a_begin, inca,
1158  b_begin, incb );
1159  }
1160  }
1161  else // if ( n_elem_is_ascending )
1162  {
1163  for ( j = 0; j < n_iter; j++ )
1164  {
1165  n_elem = bl1_min( j + 1, n_elem_max );
1166  a_begin = a + j*lda;
1167  b_begin = b + j*ldb;
1168 
1169  bl1_dccopyv( conj,
1170  n_elem,
1171  a_begin, inca,
1172  b_begin, incb );
1173  }
1174  }
1175 }

References bl1_dccopyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().

Referenced by FLA_Copyrt_external().

◆ bl1_dccopymt()

void bl1_dccopymt ( trans1_t  trans,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
899 {
900  double* a_begin;
901  scomplex* b_begin;
902  int lda, inca;
903  int ldb, incb;
904  int n_iter;
905  int n_elem;
906  int j;
907  conj1_t conj;
908 
909  // Return early if possible.
910  if ( bl1_zero_dim2( m, n ) ) return;
911 
912  // Handle cases where A and B are vectors to ensure that the underlying copy
913  // gets invoked only once.
914  if ( bl1_is_vector( m, n ) )
915  {
916  // Initialize with values appropriate for vectors.
917  n_iter = 1;
918  n_elem = bl1_vector_dim( m, n );
919  lda = 1; // multiplied by zero when n_iter == 1; not needed.
920  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
921  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
922  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
923  }
924  else // matrix case
925  {
926  // Initialize with optimal values for column-major storage of B.
927  n_iter = n;
928  n_elem = m;
929  lda = a_cs;
930  inca = a_rs;
931  ldb = b_cs;
932  incb = b_rs;
933 
934  // Handle the transposition of A.
935  if ( bl1_does_trans( trans ) )
936  {
937  bl1_swap_ints( lda, inca );
938  }
939 
940  // An optimization: if B is row-major, then let's access the matrix by rows
941  // instead of by columns for increased spatial locality.
942  if ( bl1_is_row_storage( b_rs, b_cs ) )
943  {
944  bl1_swap_ints( n_iter, n_elem );
945  bl1_swap_ints( lda, inca );
946  bl1_swap_ints( ldb, incb );
947  }
948  }
949 
950  // Extract conj component from trans parameter.
951  conj = bl1_proj_trans1_to_conj( trans );
952 
953  for ( j = 0; j < n_iter; ++j )
954  {
955  a_begin = a + j*lda;
956  b_begin = b + j*ldb;
957 
958  bl1_dccopyv( conj,
959  n_elem,
960  a_begin, inca,
961  b_begin, incb );
962  }
963 }

References bl1_dccopyv(), bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

◆ bl1_dccopyv()

void bl1_dccopyv ( conj1_t  conj,
int  m,
double *  x,
int  incx,
scomplex y,
int  incy 
)
215 {
216  double* chi;
217  scomplex* psi;
218  int i;
219 
220  // Return early if possible.
221  if ( bl1_zero_dim1( m ) ) return;
222 
223  // Initialize pointers.
224  chi = x;
225  psi = y;
226 
227  for ( i = 0; i < m; ++i )
228  {
229  psi->real = *chi;
230  psi->imag = 0.0F;
231 
232  chi += incx;
233  psi += incy;
234  }
235 }

References bl1_zero_dim1(), i, scomplex::imag, and scomplex::real.

Referenced by bl1_dccopymr(), bl1_dccopymrt(), and bl1_dccopymt().

◆ bl1_dconjm()

void bl1_dconjm ( int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs 
)
19 {
20  return;
21 }

◆ bl1_dconjmr()

void bl1_dconjmr ( uplo1_t  uplo,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs 
)
19 {
20  return;
21 }

◆ bl1_dconjv()

void bl1_dconjv ( int  m,
double *  x,
int  incx 
)

◆ bl1_dcopy()

void bl1_dcopy ( int  m,
double *  x,
int  incx,
double *  y,
int  incy 
)
27 {
28 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
29  cblas_dcopy( m,
30  x, incx,
31  y, incy );
32 #else
33  F77_dcopy( &m,
34  x, &incx,
35  y, &incy );
36 #endif
37 }
void F77_dcopy(int *n, double *x, int *incx, double *y, int *incy)
void cblas_dcopy(const int N, const double *X, const int incX, double *Y, const int incY)

References cblas_dcopy(), and F77_dcopy().

Referenced by bl1_dcopymr(), bl1_dcopymt(), bl1_dcopyv(), FLA_Obj_extract_imag_part(), FLA_Obj_extract_real_part(), and FLA_SA_LU_unb().

◆ bl1_dcopymr()

void bl1_dcopymr ( uplo1_t  uplo,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)
77 {
78  double* a_begin;
79  double* b_begin;
80  int lda, inca;
81  int ldb, incb;
82  int n_iter;
83  int n_elem_max;
84  int n_elem;
85  int j;
86 
87  // Return early if possible.
88  if ( bl1_zero_dim2( m, n ) ) return;
89 
90  // We initialize for column-major.
91  n_iter = n;
92  n_elem_max = m;
93  lda = a_cs;
94  inca = a_rs;
95  ldb = b_cs;
96  incb = b_rs;
97 
98  // An optimization: if A and B are both row-major, then let's access the
99  // matrices by rows instead of by columns for increased spatial locality.
100  if ( bl1_is_row_storage( b_rs, b_cs ) && bl1_is_row_storage( a_rs, a_cs ) )
101  {
102  bl1_swap_ints( n_iter, n_elem_max );
103  bl1_swap_ints( lda, inca );
104  bl1_swap_ints( ldb, incb );
105  bl1_toggle_uplo( uplo );
106  }
107 
108 
109  if ( bl1_is_upper( uplo ) )
110  {
111  for ( j = 0; j < n_iter; j++ )
112  {
113  n_elem = bl1_min( j + 1, n_elem_max );
114  a_begin = a + j*lda;
115  b_begin = b + j*ldb;
116 
117  bl1_dcopy( n_elem,
118  a_begin, inca,
119  b_begin, incb );
120  }
121  }
122  else // if ( bl1_is_lower( uplo ) )
123  {
124  for ( j = 0; j < n_iter; j++ )
125  {
126  n_elem = bl1_max( 0, n_elem_max - j );
127  a_begin = a + j*lda + j*inca;
128  b_begin = b + j*ldb + j*incb;
129 
130  if ( n_elem <= 0 ) break;
131 
132  bl1_dcopy( n_elem,
133  a_begin, inca,
134  b_begin, incb );
135  }
136  }
137 }
void bl1_dcopy(int m, double *x, int incx, double *y, int incy)
Definition: bl1_copy.c:26

References bl1_dcopy(), bl1_is_row_storage(), bl1_is_upper(), and bl1_zero_dim2().

Referenced by bl1_dcreate_contigmr(), bl1_dfree_saved_contigmr(), and FLA_Copyr_external().

◆ bl1_dcopymrt()

void bl1_dcopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)
119 {
120  double* a_begin;
121  double* b_begin;
122  int lda, inca;
123  int ldb, incb;
124  int n_iter;
125  int n_elem;
126  int n_elem_max;
127  int n_elem_is_descending;
128  int j;
129  conj1_t conj;
130 
131  // Return early if possible.
132  if ( bl1_zero_dim2( m, n ) ) return;
133 
134  // Initialize variables based on storage format of B and value of uplo.
135  if ( bl1_is_col_storage( b_rs, b_cs ) )
136  {
137  if ( bl1_is_lower( uplo ) )
138  {
139  n_iter = bl1_min( m, n );
140  n_elem_max = m;
141  lda = a_cs;
142  inca = a_rs;
143  ldb = b_cs;
144  incb = b_rs;
145  n_elem_is_descending = TRUE;
146  }
147  else // if ( bl1_is_upper( uplo ) )
148  {
149  n_iter = n;
150  n_elem_max = bl1_min( m, n );
151  lda = a_cs;
152  inca = a_rs;
153  ldb = b_cs;
154  incb = b_rs;
155  n_elem_is_descending = FALSE;
156  }
157  }
158  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
159  {
160  if ( bl1_is_lower( uplo ) )
161  {
162  n_iter = m;
163  n_elem_max = bl1_min( m, n );
164  lda = a_rs;
165  inca = a_cs;
166  ldb = b_rs;
167  incb = b_cs;
168  n_elem_is_descending = FALSE;
169  }
170  else // if ( bl1_is_upper( uplo ) )
171  {
172  n_iter = bl1_min( m, n );
173  n_elem_max = n;
174  lda = a_rs;
175  inca = a_cs;
176  ldb = b_rs;
177  incb = b_cs;
178  n_elem_is_descending = TRUE;
179  }
180  }
181 
182  // Swap lda and inca if we're doing a transpose.
183  if ( bl1_does_trans( trans ) )
184  {
185  bl1_swap_ints( lda, inca );
186  }
187 
188  // Extract conj component from trans parameter.
189  conj = bl1_proj_trans1_to_conj( trans );
190 
191  // Choose the loop based on whether n_elem will be shrinking or growing
192  // with each iteration.
193  if ( n_elem_is_descending )
194  {
195  for ( j = 0; j < n_iter; j++ )
196  {
197  n_elem = n_elem_max - j;
198  a_begin = a + j*lda + j*inca;
199  b_begin = b + j*ldb + j*incb;
200 
201  bl1_dcopyv( conj,
202  n_elem,
203  a_begin, inca,
204  b_begin, incb );
205  }
206  }
207  else // if ( n_elem_is_ascending )
208  {
209  for ( j = 0; j < n_iter; j++ )
210  {
211  n_elem = bl1_min( j + 1, n_elem_max );
212  a_begin = a + j*lda;
213  b_begin = b + j*ldb;
214 
215  bl1_dcopyv( conj,
216  n_elem,
217  a_begin, inca,
218  b_begin, incb );
219  }
220  }
221 }
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition: bl1_copyv.c:42

References bl1_dcopyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().

Referenced by FLA_Copyrt_external(), FLA_Lyap_h_opd_var1(), FLA_Lyap_h_opd_var2(), FLA_Lyap_h_opd_var3(), FLA_Lyap_h_opd_var4(), FLA_Lyap_n_opd_var1(), FLA_Lyap_n_opd_var2(), FLA_Lyap_n_opd_var3(), and FLA_Lyap_n_opd_var4().

◆ bl1_dcopymt()

void bl1_dcopymt ( trans1_t  trans,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)
149 {
150  double* a_begin;
151  double* b_begin;
152  int lda, inca;
153  int ldb, incb;
154  int n_iter;
155  int n_elem;
156  int j;
157 
158  // Return early if possible.
159  if ( bl1_zero_dim2( m, n ) ) return;
160 
161  // Handle cases where A and B are vectors to ensure that the underlying copy
162  // gets invoked only once.
163  if ( bl1_is_vector( m, n ) )
164  {
165  // Initialize with values appropriate for vectors.
166  n_iter = 1;
167  n_elem = bl1_vector_dim( m, n );
168  lda = 1; // multiplied by zero when n_iter == 1; not needed.
169  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
170  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
171  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
172  }
173  else // matrix case
174  {
175  // Initialize with optimal values for column-major storage.
176  n_iter = n;
177  n_elem = m;
178  lda = a_cs;
179  inca = a_rs;
180  ldb = b_cs;
181  incb = b_rs;
182 
183  // Handle the transposition of A.
184  if ( bl1_does_trans( trans ) )
185  {
186  bl1_swap_ints( lda, inca );
187  }
188 
189  // An optimization: if B is row-major and if A is effectively row-major
190  // after a possible transposition, then let's access the matrix by rows
191  // instead of by columns for increased spatial locality.
192  if ( bl1_is_row_storage( b_rs, b_cs ) )
193  {
194  if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
195  ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
196  {
197  bl1_swap_ints( n_iter, n_elem );
198  bl1_swap_ints( lda, inca );
199  bl1_swap_ints( ldb, incb );
200  }
201  }
202  }
203 
204  for ( j = 0; j < n_iter; j++ )
205  {
206  a_begin = a + j*lda;
207  b_begin = b + j*ldb;
208 
209  bl1_dcopy( n_elem,
210  a_begin, inca,
211  b_begin, incb );
212  }
213 }

References bl1_dcopy(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_dcreate_contigm(), bl1_dcreate_contigmt(), bl1_dfree_saved_contigm(), bl1_dfree_saved_contigmsr(), bl1_dsymm(), bl1_dsyr2k(), bl1_dtrmmsx(), bl1_dtrsmsx(), FLA_Bsvd_v_opd_var2(), FLA_Copy_external(), FLA_Copyt_external(), and FLA_Tevd_v_opd_var2().

◆ bl1_dcopyv()

void bl1_dcopyv ( conj1_t  conj,
int  m,
double *  x,
int  incx,
double *  y,
int  incy 
)

◆ bl1_ddcopymr()

void bl1_ddcopymr ( uplo1_t  uplo,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)
725 {
726  double* a_begin;
727  double* b_begin;
728  int lda, inca;
729  int ldb, incb;
730  int n_iter;
731  int n_elem_max;
732  int n_elem;
733  int j;
734 
735  // Return early if possible.
736  if ( bl1_zero_dim2( m, n ) ) return;
737 
738  // We initialize for column-major.
739  n_iter = n;
740  n_elem_max = m;
741  lda = a_cs;
742  inca = a_rs;
743  ldb = b_cs;
744  incb = b_rs;
745 
746  // An optimization: if B is row-major, then let's access the matrix
747  // by rows instead of by columns for increased spatial locality.
748  if ( bl1_is_row_storage( b_rs, b_cs ) )
749  {
750  bl1_swap_ints( n_iter, n_elem_max );
751  bl1_swap_ints( lda, inca );
752  bl1_swap_ints( ldb, incb );
753  bl1_toggle_uplo( uplo );
754  }
755 
756 
757  if ( bl1_is_upper( uplo ) )
758  {
759  for ( j = 0; j < n_iter; j++ )
760  {
761  n_elem = bl1_min( j + 1, n_elem_max );
762  a_begin = a + j*lda;
763  b_begin = b + j*ldb;
764 
766  n_elem,
767  a_begin, inca,
768  b_begin, incb );
769  }
770  }
771  else // if ( bl1_is_lower( uplo ) )
772  {
773  for ( j = 0; j < n_iter; j++ )
774  {
775  n_elem = bl1_max( 0, n_elem_max - j );
776  a_begin = a + j*lda + j*inca;
777  b_begin = b + j*ldb + j*incb;
778 
779  if ( n_elem <= 0 ) break;
780 
782  n_elem,
783  a_begin, inca,
784  b_begin, incb );
785  }
786  }
787 }

References bl1_dcopyv(), bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

◆ bl1_ddcopymrt()

void bl1_ddcopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)
967 {
968  double* a_begin;
969  double* b_begin;
970  int lda, inca;
971  int ldb, incb;
972  int n_iter;
973  int n_elem;
974  int n_elem_max;
975  int n_elem_is_descending;
976  int j;
977  conj1_t conj;
978 
979  // Return early if possible.
980  if ( bl1_zero_dim2( m, n ) ) return;
981 
982  // Initialize variables based on storage format of B and value of uplo.
983  if ( bl1_is_col_storage( b_rs, b_cs ) )
984  {
985  if ( bl1_is_lower( uplo ) )
986  {
987  n_iter = bl1_min( m, n );
988  n_elem_max = m;
989  lda = a_cs;
990  inca = a_rs;
991  ldb = b_cs;
992  incb = b_rs;
993  n_elem_is_descending = TRUE;
994  }
995  else // if ( bl1_is_upper( uplo ) )
996  {
997  n_iter = n;
998  n_elem_max = bl1_min( m, n );
999  lda = a_cs;
1000  inca = a_rs;
1001  ldb = b_cs;
1002  incb = b_rs;
1003  n_elem_is_descending = FALSE;
1004  }
1005  }
1006  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
1007  {
1008  if ( bl1_is_lower( uplo ) )
1009  {
1010  n_iter = m;
1011  n_elem_max = bl1_min( m, n );
1012  lda = a_rs;
1013  inca = a_cs;
1014  ldb = b_rs;
1015  incb = b_cs;
1016  n_elem_is_descending = FALSE;
1017  }
1018  else // if ( bl1_is_upper( uplo ) )
1019  {
1020  n_iter = bl1_min( m, n );
1021  n_elem_max = n;
1022  lda = a_rs;
1023  inca = a_cs;
1024  ldb = b_rs;
1025  incb = b_cs;
1026  n_elem_is_descending = TRUE;
1027  }
1028  }
1029 
1030  // Swap lda and inca if we're doing a transpose.
1031  if ( bl1_does_trans( trans ) )
1032  {
1033  bl1_swap_ints( lda, inca );
1034  }
1035 
1036  // Extract conj component from trans parameter.
1037  conj = bl1_proj_trans1_to_conj( trans );
1038 
1039  // Choose the loop based on whether n_elem will be shrinking or growing
1040  // with each iteration.
1041  if ( n_elem_is_descending )
1042  {
1043  for ( j = 0; j < n_iter; j++ )
1044  {
1045  n_elem = n_elem_max - j;
1046  a_begin = a + j*lda + j*inca;
1047  b_begin = b + j*ldb + j*incb;
1048 
1049  bl1_dcopyv( conj,
1050  n_elem,
1051  a_begin, inca,
1052  b_begin, incb );
1053  }
1054  }
1055  else // if ( n_elem_is_ascending )
1056  {
1057  for ( j = 0; j < n_iter; j++ )
1058  {
1059  n_elem = bl1_min( j + 1, n_elem_max );
1060  a_begin = a + j*lda;
1061  b_begin = b + j*ldb;
1062 
1063  bl1_dcopyv( conj,
1064  n_elem,
1065  a_begin, inca,
1066  b_begin, incb );
1067  }
1068  }
1069 }

References bl1_dcopyv(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().

◆ bl1_ddcopymt()

void bl1_ddcopymt ( trans1_t  trans,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)
831 {
832  double* a_begin;
833  double* b_begin;
834  int lda, inca;
835  int ldb, incb;
836  int n_iter;
837  int n_elem;
838  int j;
839  conj1_t conj;
840 
841  // Return early if possible.
842  if ( bl1_zero_dim2( m, n ) ) return;
843 
844  // Handle cases where A and B are vectors to ensure that the underlying copy
845  // gets invoked only once.
846  if ( bl1_is_vector( m, n ) )
847  {
848  // Initialize with values appropriate for vectors.
849  n_iter = 1;
850  n_elem = bl1_vector_dim( m, n );
851  lda = 1; // multiplied by zero when n_iter == 1; not needed.
852  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
853  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
854  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
855  }
856  else // matrix case
857  {
858  // Initialize with optimal values for column-major storage of B.
859  n_iter = n;
860  n_elem = m;
861  lda = a_cs;
862  inca = a_rs;
863  ldb = b_cs;
864  incb = b_rs;
865 
866  // Handle the transposition of A.
867  if ( bl1_does_trans( trans ) )
868  {
869  bl1_swap_ints( lda, inca );
870  }
871 
872  // An optimization: if B is row-major, then let's access the matrix by rows
873  // instead of by columns for increased spatial locality.
874  if ( bl1_is_row_storage( b_rs, b_cs ) )
875  {
876  bl1_swap_ints( n_iter, n_elem );
877  bl1_swap_ints( lda, inca );
878  bl1_swap_ints( ldb, incb );
879  }
880  }
881 
882  // Extract conj component from trans parameter.
883  conj = bl1_proj_trans1_to_conj( trans );
884 
885  for ( j = 0; j < n_iter; ++j )
886  {
887  a_begin = a + j*lda;
888  b_begin = b + j*ldb;
889 
890  bl1_dcopyv( conj,
891  n_elem,
892  a_begin, inca,
893  b_begin, incb );
894  }
895 }

References bl1_dcopyv(), bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

◆ bl1_ddot()

void bl1_ddot ( conj1_t  conj,
int  n,
double *  x,
int  incx,
double *  y,
int  incy,
double *  rho 
)

◆ bl1_ddot2s()

void bl1_ddot2s ( conj1_t  conj,
int  n,
double *  alpha,
double *  x,
int  incx,
double *  y,
int  incy,
double *  beta,
double *  rho 
)
27 {
28  double dot;
29 
30  bl1_ddot( conj,
31  n,
32  x, incx,
33  y, incy,
34  &dot );
35 
36  *rho = (*beta) * (*rho) + 2.0 * (*alpha) * dot;
37 }
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition: bl1_dot.c:26

References bl1_ddot(), and rho.

Referenced by FLA_Dot2cs_external(), FLA_Dot2s_external(), FLA_Eig_gest_il_opd_var1(), FLA_Eig_gest_il_opd_var2(), FLA_Eig_gest_il_opd_var3(), FLA_Eig_gest_iu_opd_var1(), FLA_Eig_gest_iu_opd_var2(), FLA_Eig_gest_iu_opd_var3(), FLA_Eig_gest_nl_opd_var1(), FLA_Eig_gest_nl_opd_var2(), FLA_Eig_gest_nu_opd_var1(), FLA_Eig_gest_nu_opd_var2(), FLA_Lyap_h_opd_var1(), FLA_Lyap_h_opd_var2(), FLA_Lyap_h_opd_var3(), FLA_Lyap_n_opd_var1(), FLA_Lyap_n_opd_var2(), and FLA_Lyap_n_opd_var3().

◆ bl1_ddots()

void bl1_ddots ( conj1_t  conj,
int  n,
double *  alpha,
double *  x,
int  incx,
double *  y,
int  incy,
double *  beta,
double *  rho 
)

◆ bl1_dfnorm()

void bl1_dfnorm ( int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
double *  norm 
)
68 {
69  double* a_ij;
70  double sum;
71  int lda, inca;
72  int n_iter;
73  int n_elem;
74  int i, j;
75 
76  // Return early if possible.
77  if ( bl1_zero_dim2( m, n ) ) return;
78 
79  // Handle cases where A is a vector separately.
80  if ( bl1_is_vector( m, n ) )
81  {
82  // Initialize with values appropriate for vectors.
83  n_iter = 1;
84  n_elem = bl1_vector_dim( m, n );
85  lda = 1; // multiplied by zero when n_iter == 1; not needed.
86  inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
87  }
88  else // matrix case
89  {
90  // Initialize with optimal values for column-major storage.
91  n_iter = n;
92  n_elem = m;
93  lda = a_cs;
94  inca = a_rs;
95 
96  // An optimization: if A is row-major, then let's access the matrix by
97  // rows instead of by columns for increased spatial locality.
98  if ( bl1_is_row_storage( a_rs, a_cs ) )
99  {
100  bl1_swap_ints( n_iter, n_elem );
101  bl1_swap_ints( lda, inca );
102  }
103  }
104 
105  // Initialize the accumulator variable.
106  sum = 0.0;
107 
108  for ( j = 0; j < n_iter; j++ )
109  {
110  for ( i = 0; i < n_elem; i++ )
111  {
112  a_ij = a + i*inca + j*lda;
113  sum += (*a_ij) * (*a_ij);
114  }
115  }
116 
117  // Compute the norm and store the result.
118  *norm = sqrt( sum );
119 }

References bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), BLIS1_NO_TRANSPOSE, and i.

Referenced by FLA_Norm_frob().

◆ bl1_dinvscalm()

void bl1_dinvscalm ( conj1_t  conj,
int  m,
int  n,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs 
)
66 {
67  double alpha_inv;
68  double* a_begin;
69  int lda, inca;
70  int n_iter;
71  int n_elem;
72  int j;
73 
74  // Return early if possible.
75  if ( bl1_zero_dim2( m, n ) ) return;
76  if ( bl1_deq1( alpha ) ) return;
77 
78  // Handle cases where A is a vector to ensure that the underlying axpy
79  // gets invoked only once.
80  if ( bl1_is_vector( m, n ) )
81  {
82  // Initialize with values appropriate for a vector.
83  n_iter = 1;
84  n_elem = bl1_vector_dim( m, n );
85  lda = 1; // multiplied by zero when n_iter == 1; not needed.
86  inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
87  }
88  else // matrix case
89  {
90  // Initialize with optimal values for column-major storage.
91  n_iter = n;
92  n_elem = m;
93  lda = a_cs;
94  inca = a_rs;
95 
96  // An optimization: if A is row-major, then let's access the matrix
97  // by rows instead of by columns to increase spatial locality.
98  if ( bl1_is_row_storage( a_rs, a_cs ) )
99  {
100  bl1_swap_ints( n_iter, n_elem );
101  bl1_swap_ints( lda, inca );
102  }
103  }
104 
105  bl1_dinvert2s( conj, alpha, &alpha_inv );
106 
107  for ( j = 0; j < n_iter; j++ )
108  {
109  a_begin = a + j*lda;
110 
111  bl1_dscal( n_elem,
112  &alpha_inv,
113  a_begin, inca );
114  }
115 }
void bl1_dinvert2s(conj1_t conj, double *alpha, double *beta)
Definition: bl1_invert2s.c:20

References bl1_dinvert2s(), bl1_dscal(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Inv_scal_external(), and FLA_Inv_scalc_external().

◆ bl1_dinvscalv()

void bl1_dinvscalv ( conj1_t  conj,
int  n,
double *  alpha,
double *  x,
int  incx 
)

◆ bl1_dnrm2()

void bl1_dnrm2 ( int  n,
double *  x,
int  incx,
double *  norm 
)
25 {
26 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
27  *norm = cblas_dnrm2( n,
28  x, incx );
29 #else
30  *norm = F77_dnrm2( &n,
31  x, &incx );
32 #endif
33 }
double F77_dnrm2(int *n, double *x, int *incx)
double cblas_dnrm2(const int N, const double *X, const int incX)

References cblas_dnrm2(), and F77_dnrm2().

Referenced by FLA_Househ2_UT_l_opd(), FLA_Househ2s_UT_l_opd(), FLA_Househ3UD_UT_opd(), and FLA_Nrm2_external().

◆ bl1_dscal()

void bl1_dscal ( int  n,
double *  alpha,
double *  x,
int  incx 
)
27 {
28 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
29  cblas_dscal( n,
30  *alpha,
31  x, incx );
32 #else
33  F77_dscal( &n,
34  alpha,
35  x, &incx );
36 #endif
37 }
void F77_dscal(int *n, double *alpha, double *y, int *incy)
void cblas_dscal(const int N, const double alpha, double *X, const int incX)

References cblas_dscal(), and F77_dscal().

Referenced by bl1_daxpysmt(), bl1_daxpysv(), bl1_dinvscalm(), bl1_dinvscalv(), bl1_dscalm(), bl1_dscalmr(), bl1_dscalv(), bl1_zconjm(), bl1_zconjmr(), bl1_zconjv(), and FLA_SA_LU_unb().

◆ bl1_dscalm()

void bl1_dscalm ( conj1_t  conj,
int  m,
int  n,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs 
)
66 {
67  double alpha_conj;
68  double* a_begin;
69  int lda, inca;
70  int n_iter;
71  int n_elem;
72  int j;
73 
74  // Return early if possible.
75  if ( bl1_zero_dim2( m, n ) ) return;
76  if ( bl1_deq1( alpha ) ) return;
77 
78  // Handle cases where A is a vector to ensure that the underlying axpy
79  // gets invoked only once.
80  if ( bl1_is_vector( m, n ) )
81  {
82  // Initialize with values appropriate for a vector.
83  n_iter = 1;
84  n_elem = bl1_vector_dim( m, n );
85  lda = 1; // multiplied by zero when n_iter == 1; not needed.
86  inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
87  }
88  else // matrix case
89  {
90  // Initialize with optimal values for column-major storage.
91  n_iter = n;
92  n_elem = m;
93  lda = a_cs;
94  inca = a_rs;
95 
96  // An optimization: if A is row-major, then let's access the matrix
97  // by rows instead of by columns to increase spatial locality.
98  if ( bl1_is_row_storage( a_rs, a_cs ) )
99  {
100  bl1_swap_ints( n_iter, n_elem );
101  bl1_swap_ints( lda, inca );
102  }
103  }
104 
105  bl1_dcopys( conj, alpha, &alpha_conj );
106 
107  for ( j = 0; j < n_iter; j++ )
108  {
109  a_begin = a + j*lda;
110 
111  bl1_dscal( n_elem,
112  &alpha_conj,
113  a_begin, inca );
114  }
115 }

References bl1_dscal(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_dgemm(), bl1_dsymm(), bl1_dtrmmsx(), bl1_dtrsmsx(), FLA_Lyap_h_opd_var1(), FLA_Lyap_h_opd_var2(), FLA_Lyap_h_opd_var3(), FLA_Lyap_h_opd_var4(), FLA_Lyap_n_opd_var1(), FLA_Lyap_n_opd_var2(), FLA_Lyap_n_opd_var3(), FLA_Lyap_n_opd_var4(), FLA_Scal_external(), and FLA_Scalc_external().

◆ bl1_dscalmr()

void bl1_dscalmr ( uplo1_t  uplo,
int  m,
int  n,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs 
)
70 {
71  double* a_begin;
72  int lda, inca;
73  int n_iter;
74  int n_elem_max;
75  int n_elem;
76  int j;
77 
78  // Return early if possible.
79  if ( bl1_zero_dim2( m, n ) ) return;
80  if ( bl1_deq1( alpha ) ) return;
81 
82  // We initialize for column-major.
83  n_iter = n;
84  n_elem_max = m;
85  lda = a_cs;
86  inca = a_rs;
87 
88  // An optimization: if A is row-major, then let's access the matrix
89  // by rows instead of by columns to increase spatial locality.
90  if ( bl1_is_row_storage( a_rs, a_cs ) )
91  {
92  bl1_swap_ints( n_iter, n_elem_max );
93  bl1_swap_ints( lda, inca );
94  bl1_toggle_uplo( uplo );
95  }
96 
97  if ( bl1_is_upper( uplo ) )
98  {
99  for ( j = 0; j < n_iter; j++ )
100  {
101  n_elem = bl1_min( j + 1, n_elem_max );
102  a_begin = a + j*lda;
103 
104  bl1_dscal( n_elem,
105  alpha,
106  a_begin, inca );
107  }
108  }
109  else // if ( bl1_is_lower( uplo ) )
110  {
111  for ( j = 0; j < n_iter; j++ )
112  {
113  n_elem = bl1_max( 0, n_elem_max - j );
114  a_begin = a + j*lda + j*inca;
115 
116  if ( n_elem <= 0 ) break;
117 
118  bl1_dscal( n_elem,
119  alpha,
120  a_begin, inca );
121  }
122  }
123 }

References bl1_dscal(), bl1_is_row_storage(), bl1_is_upper(), and bl1_zero_dim2().

Referenced by FLA_Scalr_external().

◆ bl1_dscalv()

void bl1_dscalv ( conj1_t  conj,
int  n,
double *  alpha,
double *  x,
int  incx 
)

◆ bl1_dscopymr()

void bl1_dscopymr ( uplo1_t  uplo,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)
399 {
400  double* a_begin;
401  float* b_begin;
402  int lda, inca;
403  int ldb, incb;
404  int n_iter;
405  int n_elem_max;
406  int n_elem;
407  int j;
408 
409  // Return early if possible.
410  if ( bl1_zero_dim2( m, n ) ) return;
411 
412  // We initialize for column-major.
413  n_iter = n;
414  n_elem_max = m;
415  lda = a_cs;
416  inca = a_rs;
417  ldb = b_cs;
418  incb = b_rs;
419 
420  // An optimization: if B is row-major, then let's access the matrix
421  // by rows instead of by columns for increased spatial locality.
422  if ( bl1_is_row_storage( b_rs, b_cs ) )
423  {
424  bl1_swap_ints( n_iter, n_elem_max );
425  bl1_swap_ints( lda, inca );
426  bl1_swap_ints( ldb, incb );
427  bl1_toggle_uplo( uplo );
428  }
429 
430 
431  if ( bl1_is_upper( uplo ) )
432  {
433  for ( j = 0; j < n_iter; j++ )
434  {
435  n_elem = bl1_min( j + 1, n_elem_max );
436  a_begin = a + j*lda;
437  b_begin = b + j*ldb;
438 
440  n_elem,
441  a_begin, inca,
442  b_begin, incb );
443  }
444  }
445  else // if ( bl1_is_lower( uplo ) )
446  {
447  for ( j = 0; j < n_iter; j++ )
448  {
449  n_elem = bl1_max( 0, n_elem_max - j );
450  a_begin = a + j*lda + j*inca;
451  b_begin = b + j*ldb + j*incb;
452 
453  if ( n_elem <= 0 ) break;
454 
456  n_elem,
457  a_begin, inca,
458  b_begin, incb );
459  }
460  }
461 }
void bl1_dscopyv(conj1_t conj, int m, double *x, int incx, float *y, int incy)
Definition: bl1_copyv.c:101

References bl1_dscopyv(), bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Copyr_external().

◆ bl1_dscopymrt()

void bl1_dscopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)
861 {
862  double* a_begin;
863  float* b_begin;
864  int lda, inca;
865  int ldb, incb;
866  int n_iter;
867  int n_elem;
868  int n_elem_max;
869  int n_elem_is_descending;
870  int j;
871  conj1_t conj;
872 
873  // Return early if possible.
874  if ( bl1_zero_dim2( m, n ) ) return;
875 
876  // Initialize variables based on storage format of B and value of uplo.
877  if ( bl1_is_col_storage( b_rs, b_cs ) )
878  {
879  if ( bl1_is_lower( uplo ) )
880  {
881  n_iter = bl1_min( m, n );
882  n_elem_max = m;
883  lda = a_cs;
884  inca = a_rs;
885  ldb = b_cs;
886  incb = b_rs;
887  n_elem_is_descending = TRUE;
888  }
889  else // if ( bl1_is_upper( uplo ) )
890  {
891  n_iter = n;
892  n_elem_max = bl1_min( m, n );
893  lda = a_cs;
894  inca = a_rs;
895  ldb = b_cs;
896  incb = b_rs;
897  n_elem_is_descending = FALSE;
898  }
899  }
900  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
901  {
902  if ( bl1_is_lower( uplo ) )
903  {
904  n_iter = m;
905  n_elem_max = bl1_min( m, n );
906  lda = a_rs;
907  inca = a_cs;
908  ldb = b_rs;
909  incb = b_cs;
910  n_elem_is_descending = FALSE;
911  }
912  else // if ( bl1_is_upper( uplo ) )
913  {
914  n_iter = bl1_min( m, n );
915  n_elem_max = n;
916  lda = a_rs;
917  inca = a_cs;
918  ldb = b_rs;
919  incb = b_cs;
920  n_elem_is_descending = TRUE;
921  }
922  }
923 
924  // Swap lda and inca if we're doing a transpose.
925  if ( bl1_does_trans( trans ) )
926  {
927  bl1_swap_ints( lda, inca );
928  }
929 
930  // Extract conj component from trans parameter.
931  conj = bl1_proj_trans1_to_conj( trans );
932 
933  // Choose the loop based on whether n_elem will be shrinking or growing
934  // with each iteration.
935  if ( n_elem_is_descending )
936  {
937  for ( j = 0; j < n_iter; j++ )
938  {
939  n_elem = n_elem_max - j;
940  a_begin = a + j*lda + j*inca;
941  b_begin = b + j*ldb + j*incb;
942 
943  bl1_dscopyv( conj,
944  n_elem,
945  a_begin, inca,
946  b_begin, incb );
947  }
948  }
949  else // if ( n_elem_is_ascending )
950  {
951  for ( j = 0; j < n_iter; j++ )
952  {
953  n_elem = bl1_min( j + 1, n_elem_max );
954  a_begin = a + j*lda;
955  b_begin = b + j*ldb;
956 
957  bl1_dscopyv( conj,
958  n_elem,
959  a_begin, inca,
960  b_begin, incb );
961  }
962  }
963 }

References bl1_does_trans(), bl1_dscopyv(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().

Referenced by FLA_Copyrt_external().

◆ bl1_dscopymt()

void bl1_dscopymt ( trans1_t  trans,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)
495 {
496  double* a_begin;
497  float* b_begin;
498  int lda, inca;
499  int ldb, incb;
500  int n_iter;
501  int n_elem;
502  int j;
503  conj1_t conj;
504 
505  // Return early if possible.
506  if ( bl1_zero_dim2( m, n ) ) return;
507 
508  // Handle cases where A and B are vectors to ensure that the underlying copy
509  // gets invoked only once.
510  if ( bl1_is_vector( m, n ) )
511  {
512  // Initialize with values appropriate for vectors.
513  n_iter = 1;
514  n_elem = bl1_vector_dim( m, n );
515  lda = 1; // multiplied by zero when n_iter == 1; not needed.
516  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
517  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
518  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
519  }
520  else // matrix case
521  {
522  // Initialize with optimal values for column-major storage of B.
523  n_iter = n;
524  n_elem = m;
525  lda = a_cs;
526  inca = a_rs;
527  ldb = b_cs;
528  incb = b_rs;
529 
530  // Handle the transposition of A.
531  if ( bl1_does_trans( trans ) )
532  {
533  bl1_swap_ints( lda, inca );
534  }
535 
536  // An optimization: if B is row-major, then let's access the matrix by rows
537  // instead of by columns for increased spatial locality.
538  if ( bl1_is_row_storage( b_rs, b_cs ) )
539  {
540  bl1_swap_ints( n_iter, n_elem );
541  bl1_swap_ints( lda, inca );
542  bl1_swap_ints( ldb, incb );
543  }
544  }
545 
546  // Extract conj component from trans parameter.
547  conj = bl1_proj_trans1_to_conj( trans );
548 
549  for ( j = 0; j < n_iter; ++j )
550  {
551  a_begin = a + j*lda;
552  b_begin = b + j*ldb;
553 
554  bl1_dscopyv( conj,
555  n_elem,
556  a_begin, inca,
557  b_begin, incb );
558  }
559 }

References bl1_does_trans(), bl1_dscopyv(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

◆ bl1_dscopyv()

void bl1_dscopyv ( conj1_t  conj,
int  m,
double *  x,
int  incx,
float *  y,
int  incy 
)
102 {
103  double* chi;
104  float* psi;
105  int i;
106 
107  // Return early if possible.
108  if ( bl1_zero_dim1( m ) ) return;
109 
110  // Initialize pointers.
111  chi = x;
112  psi = y;
113 
114  for ( i = 0; i < m; ++i )
115  {
116  *psi = *chi;
117 
118  chi += incx;
119  psi += incy;
120  }
121 }

References bl1_zero_dim1(), and i.

Referenced by bl1_dscopymr(), bl1_dscopymrt(), and bl1_dscopymt().

◆ bl1_dswap()

void bl1_dswap ( int  n,
double *  x,
int  incx,
double *  y,
int  incy 
)
27 {
28 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
29  cblas_dswap( n,
30  x, incx,
31  y, incy );
32 #else
33  F77_dswap( &n,
34  x, &incx,
35  y, &incy );
36 #endif
37 }
void F77_dswap(int *n, double *x, int *incx, double *y, int *incy)
void cblas_dswap(const int N, double *X, const int incX, double *Y, const int incY)

References cblas_dswap(), and F77_dswap().

Referenced by bl1_dswapmt(), bl1_dswapv(), FLA_SA_Apply_pivots(), and FLA_SA_LU_unb().

◆ bl1_dswapmt()

void bl1_dswapmt ( trans1_t  trans,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)
81 {
82  double* a_begin;
83  double* b_begin;
84  int lda, inca;
85  int ldb, incb;
86  int n_iter;
87  int n_elem;
88  int j;
89 
90  // Return early if possible.
91  if ( bl1_zero_dim2( m, n ) ) return;
92 
93  // Handle cases where A and B are vectors to ensure that the underlying copy
94  // gets invoked only once.
95  if ( bl1_is_vector( m, n ) )
96  {
97  // Initialize with values appropriate for vectors.
98  n_iter = 1;
99  n_elem = bl1_vector_dim( m, n );
100  lda = 1; // multiplied by zero when n_iter == 1; not needed.
101  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
102  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
103  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
104  }
105  else // matrix case
106  {
107  // Initialize with optimal values for column-major storage.
108  n_iter = n;
109  n_elem = m;
110  lda = a_cs;
111  inca = a_rs;
112  ldb = b_cs;
113  incb = b_rs;
114 
115  // Handle the transposition of A.
116  if ( bl1_does_trans( trans ) )
117  {
118  bl1_swap_ints( lda, inca );
119  }
120 
121  // An optimization: if B is row-major and if A is effectively row-major
122  // after a possible transposition, then let's access the matrix by rows
123  // instead of by columns for increased spatial locality.
124  if ( bl1_is_row_storage( b_rs, b_cs ) )
125  {
126  if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
127  ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
128  {
129  bl1_swap_ints( n_iter, n_elem );
130  bl1_swap_ints( lda, inca );
131  bl1_swap_ints( ldb, incb );
132  }
133  }
134  }
135 
136  for ( j = 0; j < n_iter; j++ )
137  {
138  a_begin = a + j*lda;
139  b_begin = b + j*ldb;
140 
141  bl1_dswap( n_elem,
142  a_begin, inca,
143  b_begin, incb );
144  }
145 }
void bl1_dswap(int n, double *x, int incx, double *y, int incy)
Definition: bl1_swap.c:26

References bl1_does_notrans(), bl1_does_trans(), bl1_dswap(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Swap_external(), and FLA_Swapt_external().

◆ bl1_dswapv()

void bl1_dswapv ( int  n,
double *  x,
int  incx,
double *  y,
int  incy 
)
24 {
25  // Return early if possible.
26  if ( bl1_zero_dim1( n ) ) return;
27 
28  bl1_dswap( n,
29  x, incx,
30  y, incy );
31 }

References bl1_dswap(), and bl1_zero_dim1().

Referenced by FLA_Apply_pivots_macro_external(), FLA_Sort_bsvd_ext_b_opd(), FLA_Sort_bsvd_ext_f_opd(), FLA_Sort_evd_b_opd(), FLA_Sort_evd_f_opd(), FLA_Sort_svd_b_opd(), and FLA_Sort_svd_f_opd().

◆ bl1_dzcopymr()

void bl1_dzcopymr ( uplo1_t  uplo,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
921 {
922  double* a_begin;
923  dcomplex* b_begin;
924  int lda, inca;
925  int ldb, incb;
926  int n_iter;
927  int n_elem_max;
928  int n_elem;
929  int j;
930 
931  // Return early if possible.
932  if ( bl1_zero_dim2( m, n ) ) return;
933 
934  // We initialize for column-major.
935  n_iter = n;
936  n_elem_max = m;
937  lda = a_cs;
938  inca = a_rs;
939  ldb = b_cs;
940  incb = b_rs;
941 
942  // An optimization: if B is row-major, then let's access the matrix
943  // by rows instead of by columns for increased spatial locality.
944  if ( bl1_is_row_storage( b_rs, b_cs ) )
945  {
946  bl1_swap_ints( n_iter, n_elem_max );
947  bl1_swap_ints( lda, inca );
948  bl1_swap_ints( ldb, incb );
949  bl1_toggle_uplo( uplo );
950  }
951 
952 
953  if ( bl1_is_upper( uplo ) )
954  {
955  for ( j = 0; j < n_iter; j++ )
956  {
957  n_elem = bl1_min( j + 1, n_elem_max );
958  a_begin = a + j*lda;
959  b_begin = b + j*ldb;
960 
962  n_elem,
963  a_begin, inca,
964  b_begin, incb );
965  }
966  }
967  else // if ( bl1_is_lower( uplo ) )
968  {
969  for ( j = 0; j < n_iter; j++ )
970  {
971  n_elem = bl1_max( 0, n_elem_max - j );
972  a_begin = a + j*lda + j*inca;
973  b_begin = b + j*ldb + j*incb;
974 
975  if ( n_elem <= 0 ) break;
976 
978  n_elem,
979  a_begin, inca,
980  b_begin, incb );
981  }
982  }
983 }
void bl1_dzcopyv(conj1_t conj, int m, double *x, int incx, dcomplex *y, int incy)
Definition: bl1_copyv.c:259

References bl1_dzcopyv(), bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Copyr_external().

◆ bl1_dzcopymrt()

void bl1_dzcopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
1179 {
1180  double* a_begin;
1181  dcomplex* b_begin;
1182  int lda, inca;
1183  int ldb, incb;
1184  int n_iter;
1185  int n_elem;
1186  int n_elem_max;
1187  int n_elem_is_descending;
1188  int j;
1189  conj1_t conj;
1190 
1191  // Return early if possible.
1192  if ( bl1_zero_dim2( m, n ) ) return;
1193 
1194  // Initialize variables based on storage format of B and value of uplo.
1195  if ( bl1_is_col_storage( b_rs, b_cs ) )
1196  {
1197  if ( bl1_is_lower( uplo ) )
1198  {
1199  n_iter = bl1_min( m, n );
1200  n_elem_max = m;
1201  lda = a_cs;
1202  inca = a_rs;
1203  ldb = b_cs;
1204  incb = b_rs;
1205  n_elem_is_descending = TRUE;
1206  }
1207  else // if ( bl1_is_upper( uplo ) )
1208  {
1209  n_iter = n;
1210  n_elem_max = bl1_min( m, n );
1211  lda = a_cs;
1212  inca = a_rs;
1213  ldb = b_cs;
1214  incb = b_rs;
1215  n_elem_is_descending = FALSE;
1216  }
1217  }
1218  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
1219  {
1220  if ( bl1_is_lower( uplo ) )
1221  {
1222  n_iter = m;
1223  n_elem_max = bl1_min( m, n );
1224  lda = a_rs;
1225  inca = a_cs;
1226  ldb = b_rs;
1227  incb = b_cs;
1228  n_elem_is_descending = FALSE;
1229  }
1230  else // if ( bl1_is_upper( uplo ) )
1231  {
1232  n_iter = bl1_min( m, n );
1233  n_elem_max = n;
1234  lda = a_rs;
1235  inca = a_cs;
1236  ldb = b_rs;
1237  incb = b_cs;
1238  n_elem_is_descending = TRUE;
1239  }
1240  }
1241 
1242  // Swap lda and inca if we're doing a transpose.
1243  if ( bl1_does_trans( trans ) )
1244  {
1245  bl1_swap_ints( lda, inca );
1246  }
1247 
1248  // Extract conj component from trans parameter.
1249  conj = bl1_proj_trans1_to_conj( trans );
1250 
1251  // Choose the loop based on whether n_elem will be shrinking or growing
1252  // with each iteration.
1253  if ( n_elem_is_descending )
1254  {
1255  for ( j = 0; j < n_iter; j++ )
1256  {
1257  n_elem = n_elem_max - j;
1258  a_begin = a + j*lda + j*inca;
1259  b_begin = b + j*ldb + j*incb;
1260 
1261  bl1_dzcopyv( conj,
1262  n_elem,
1263  a_begin, inca,
1264  b_begin, incb );
1265  }
1266  }
1267  else // if ( n_elem_is_ascending )
1268  {
1269  for ( j = 0; j < n_iter; j++ )
1270  {
1271  n_elem = bl1_min( j + 1, n_elem_max );
1272  a_begin = a + j*lda;
1273  b_begin = b + j*ldb;
1274 
1275  bl1_dzcopyv( conj,
1276  n_elem,
1277  a_begin, inca,
1278  b_begin, incb );
1279  }
1280  }
1281 }

References bl1_does_trans(), bl1_dzcopyv(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), and bl1_zero_dim2().

Referenced by FLA_Copyrt_external().

◆ bl1_dzcopymt()

void bl1_dzcopymt ( trans1_t  trans,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
1033 {
1034  double* a_begin;
1035  dcomplex* b_begin;
1036  int lda, inca;
1037  int ldb, incb;
1038  int n_iter;
1039  int n_elem;
1040  int j;
1041  conj1_t conj;
1042 
1043  // Return early if possible.
1044  if ( bl1_zero_dim2( m, n ) ) return;
1045 
1046  // Handle cases where A and B are vectors to ensure that the underlying copy
1047  // gets invoked only once.
1048  if ( bl1_is_vector( m, n ) )
1049  {
1050  // Initialize with values appropriate for vectors.
1051  n_iter = 1;
1052  n_elem = bl1_vector_dim( m, n );
1053  lda = 1; // multiplied by zero when n_iter == 1; not needed.
1054  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
1055  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
1056  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
1057  }
1058  else // matrix case
1059  {
1060  // Initialize with optimal values for column-major storage of B.
1061  n_iter = n;
1062  n_elem = m;
1063  lda = a_cs;
1064  inca = a_rs;
1065  ldb = b_cs;
1066  incb = b_rs;
1067 
1068  // Handle the transposition of A.
1069  if ( bl1_does_trans( trans ) )
1070  {
1071  bl1_swap_ints( lda, inca );
1072  }
1073 
1074  // An optimization: if B is row-major, then let's access the matrix by rows
1075  // instead of by columns for increased spatial locality.
1076  if ( bl1_is_row_storage( b_rs, b_cs ) )
1077  {
1078  bl1_swap_ints( n_iter, n_elem );
1079  bl1_swap_ints( lda, inca );
1080  bl1_swap_ints( ldb, incb );
1081  }
1082  }
1083 
1084  // Extract conj component from trans parameter.
1085  conj = bl1_proj_trans1_to_conj( trans );
1086 
1087  for ( j = 0; j < n_iter; ++j )
1088  {
1089  a_begin = a + j*lda;
1090  b_begin = b + j*ldb;
1091 
1092  bl1_dzcopyv( conj,
1093  n_elem,
1094  a_begin, inca,
1095  b_begin, incb );
1096  }
1097 }

References bl1_does_trans(), bl1_dzcopyv(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

◆ bl1_dzcopyv()

void bl1_dzcopyv ( conj1_t  conj,
int  m,
double *  x,
int  incx,
dcomplex y,
int  incy 
)
260 {
261  double* chi;
262  dcomplex* psi;
263  int i;
264 
265  // Return early if possible.
266  if ( bl1_zero_dim1( m ) ) return;
267 
268  // Initialize pointers.
269  chi = x;
270  psi = y;
271 
272  for ( i = 0; i < m; ++i )
273  {
274  psi->real = *chi;
275  psi->imag = 0.0;
276 
277  chi += incx;
278  psi += incy;
279  }
280 }

References bl1_zero_dim1(), i, dcomplex::imag, and dcomplex::real.

Referenced by bl1_dzcopymr(), bl1_dzcopymrt(), and bl1_dzcopymt().

◆ bl1_icopymt()

void bl1_icopymt ( trans1_t  trans,
int  m,
int  n,
int *  a,
int  a_rs,
int  a_cs,
int *  b,
int  b_rs,
int  b_cs 
)
14 {
15  int* a_begin;
16  int* b_begin;
17  int lda, inca;
18  int ldb, incb;
19  int n_iter;
20  int n_elem;
21  int j;
22 
23  // Return early if possible.
24  if ( bl1_zero_dim2( m, n ) ) return;
25 
26  // Handle cases where A and B are vectors to ensure that the underlying copy
27  // gets invoked only once.
28  if ( bl1_is_vector( m, n ) )
29  {
30  // Initialize with values appropriate for vectors.
31  n_iter = 1;
32  n_elem = bl1_vector_dim( m, n );
33  lda = 1; // multiplied by zero when n_iter == 1; not needed.
34  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
35  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
36  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
37  }
38  else // matrix case
39  {
40  // Initialize with optimal values for column-major storage.
41  n_iter = n;
42  n_elem = m;
43  lda = a_cs;
44  inca = a_rs;
45  ldb = b_cs;
46  incb = b_rs;
47 
48  // Handle the transposition of A.
49  if ( bl1_does_trans( trans ) )
50  {
51  bl1_swap_ints( lda, inca );
52  }
53 
54  // An optimization: if B is row-major and if A is effectively row-major
55  // after a possible transposition, then let's access the matrix by rows
56  // instead of by columns for increased spatial locality.
57  if ( bl1_is_row_storage( b_rs, b_cs ) )
58  {
59  if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
60  ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
61  {
62  bl1_swap_ints( n_iter, n_elem );
63  bl1_swap_ints( lda, inca );
64  bl1_swap_ints( ldb, incb );
65  }
66  }
67  }
68 
69  for ( j = 0; j < n_iter; j++ )
70  {
71  a_begin = a + j*lda;
72  b_begin = b + j*ldb;
73 
75  n_elem,
76  a_begin, inca,
77  b_begin, incb );
78  }
79 }
void bl1_icopyv(conj1_t conj, int m, int *x, int incx, int *y, int incy)
Definition: bl1_copyv.c:13

References bl1_does_notrans(), bl1_does_trans(), bl1_icopyv(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

◆ bl1_icopyv()

void bl1_icopyv ( conj1_t  conj,
int  m,
int *  x,
int  incx,
int *  y,
int  incy 
)
14 {
15  int* chi;
16  int* psi;
17  int i;
18 
19  // Return early if possible.
20  if ( bl1_zero_dim1( m ) ) return;
21 
22  // Initialize pointers.
23  chi = x;
24  psi = y;
25 
26  for ( i = 0; i < m; ++i )
27  {
28  *psi = *chi;
29 
30  chi += incx;
31  psi += incy;
32  }
33 }

References bl1_zero_dim1(), and i.

Referenced by bl1_icopymt().

◆ bl1_samax()

void bl1_samax ( int  n,
float *  x,
int  incx,
int *  index 
)
14 {
15 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
16  *index = cblas_isamax( n,
17  x, incx );
18 #else
19  *index = F77_isamax( &n,
20  x, &incx ) - 1;
21 #endif
22 }
int F77_isamax(int *n, float *x, int *incx)
CBLAS_INDEX cblas_isamax(const int N, const float *X, const int incX)

References cblas_isamax(), and F77_isamax().

Referenced by FLA_Amax_external(), FLA_LU_piv_ops_var3(), FLA_LU_piv_ops_var4(), FLA_LU_piv_ops_var5(), and FLA_SA_LU_unb().

◆ bl1_sasum()

void bl1_sasum ( int  n,
float *  x,
int  incx,
float *  norm 
)
14 {
15 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
16  *norm = cblas_sasum( n,
17  x, incx );
18 #else
19  *norm = F77_sasum( &n,
20  x, &incx );
21 #endif
22 }
float F77_sasum(int *n, float *x, int *incx)
float cblas_sasum(const int N, const float *X, const int incX)

References cblas_sasum(), and F77_sasum().

Referenced by FLA_Asum_external().

◆ bl1_saxpy()

void bl1_saxpy ( int  n,
float *  alpha,
float *  x,
int  incx,
float *  y,
int  incy 
)
14 {
15 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
16  cblas_saxpy( n,
17  *alpha,
18  x, incx,
19  y, incy );
20 #else
21  F77_saxpy( &n,
22  alpha,
23  x, &incx,
24  y, &incy );
25 #endif
26 }
void F77_saxpy(int *n, float *alpha, float *x, int *incx, float *y, int *incy)
void cblas_saxpy(const int N, const float alpha, const float *X, const int incX, float *Y, const int incY)

References cblas_saxpy(), and F77_saxpy().

Referenced by bl1_saxpymt(), bl1_saxpysmt(), bl1_saxpysv(), and bl1_saxpyv().

◆ bl1_saxpymrt()

void bl1_saxpymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)
14 {
15  float* a_begin;
16  float* b_begin;
17  int lda, inca;
18  int ldb, incb;
19  int n_iter;
20  int n_elem;
21  int n_elem_max;
22  int n_elem_is_descending;
23  int j;
24  conj1_t conj;
25 
26  // Return early if possible.
27  if ( bl1_zero_dim2( m, n ) ) return;
28 
29  // Initialize variables based on storage format of B and value of uplo.
30  if ( bl1_is_col_storage( b_rs, b_cs ) )
31  {
32  if ( bl1_is_lower( uplo ) )
33  {
34  n_iter = bl1_min( m, n );
35  n_elem_max = m;
36  lda = a_cs;
37  inca = a_rs;
38  ldb = b_cs;
39  incb = b_rs;
40  n_elem_is_descending = TRUE;
41  }
42  else // if ( bl1_is_upper( uplo ) )
43  {
44  n_iter = n;
45  n_elem_max = bl1_min( m, n );
46  lda = a_cs;
47  inca = a_rs;
48  ldb = b_cs;
49  incb = b_rs;
50  n_elem_is_descending = FALSE;
51  }
52  }
53  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
54  {
55  if ( bl1_is_lower( uplo ) )
56  {
57  n_iter = m;
58  n_elem_max = bl1_min( m, n );
59  lda = a_rs;
60  inca = a_cs;
61  ldb = b_rs;
62  incb = b_cs;
63  n_elem_is_descending = FALSE;
64  }
65  else // if ( bl1_is_upper( uplo ) )
66  {
67  n_iter = bl1_min( m, n );
68  n_elem_max = n;
69  lda = a_rs;
70  inca = a_cs;
71  ldb = b_rs;
72  incb = b_cs;
73  n_elem_is_descending = TRUE;
74  }
75  }
76 
77  // Swap lda and inca if we're doing a transpose.
78  if ( bl1_does_trans( trans ) )
79  {
80  bl1_swap_ints( lda, inca );
81  }
82 
83  // Extract conj component from trans parameter.
84  conj = bl1_proj_trans1_to_conj( trans );
85 
86  // Choose the loop based on whether n_elem will be shrinking or growing
87  // with each iteration.
88  if ( n_elem_is_descending )
89  {
90  for ( j = 0; j < n_iter; j++ )
91  {
92  n_elem = n_elem_max - j;
93  a_begin = a + j*lda + j*inca;
94  b_begin = b + j*ldb + j*incb;
95 
96  bl1_saxpyv( conj,
97  n_elem,
98  alpha,
99  a_begin, inca,
100  b_begin, incb );
101  }
102  }
103  else // if ( n_elem_is_ascending )
104  {
105  for ( j = 0; j < n_iter; j++ )
106  {
107  n_elem = bl1_min( j + 1, n_elem_max );
108  a_begin = a + j*lda;
109  b_begin = b + j*ldb;
110 
111  bl1_saxpyv( conj,
112  n_elem,
113  alpha,
114  a_begin, inca,
115  b_begin, incb );
116  }
117  }
118 }
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition: bl1_axpyv.c:13

References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_saxpyv(), and bl1_zero_dim2().

Referenced by FLA_Axpyrt_external().

◆ bl1_saxpymt()

void bl1_saxpymt ( trans1_t  trans,
int  m,
int  n,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)
14 {
15  float* a_begin;
16  float* b_begin;
17  int lda, inca;
18  int ldb, incb;
19  int n_iter;
20  int n_elem;
21  int j;
22 
23  // Return early if possible.
24  if ( bl1_zero_dim2( m, n ) ) return;
25 
26  // Handle cases where A and B are vectors to ensure that the underlying axpy
27  // gets invoked only once.
28  if ( bl1_is_vector( m, n ) )
29  {
30  // Initialize with values appropriate for vectors.
31  n_iter = 1;
32  n_elem = bl1_vector_dim( m, n );
33  lda = 1; // multiplied by zero when n_iter == 1; not needed.
34  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
35  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
36  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
37  }
38  else // matrix case
39  {
40  // Initialize with optimal values for column-major storage.
41  n_iter = n;
42  n_elem = m;
43  lda = a_cs;
44  inca = a_rs;
45  ldb = b_cs;
46  incb = b_rs;
47 
48  // Handle the transposition of A.
49  if ( bl1_does_trans( trans ) )
50  {
51  bl1_swap_ints( lda, inca );
52  }
53 
54  // An optimization: if B is row-major and if A is effectively row-major
55  // after a possible transposition, then let's access the matrices by rows
56  // instead of by columns for increased spatial locality.
57  if ( bl1_is_row_storage( b_rs, b_cs ) )
58  {
59  if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
60  ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
61  {
62  bl1_swap_ints( n_iter, n_elem );
63  bl1_swap_ints( lda, inca );
64  bl1_swap_ints( ldb, incb );
65  }
66  }
67  }
68 
69  for ( j = 0; j < n_iter; j++ )
70  {
71  a_begin = a + j*lda;
72  b_begin = b + j*ldb;
73 
74  bl1_saxpy( n_elem,
75  alpha,
76  a_begin, inca,
77  b_begin, incb );
78  }
79 }
void bl1_saxpy(int n, float *alpha, float *x, int incx, float *y, int incy)
Definition: bl1_axpy.c:13

References bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_saxpy(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_sgemm(), bl1_ssymm(), bl1_strmmsx(), bl1_strsmsx(), FLA_Axpy_external(), and FLA_Axpyt_external().

◆ bl1_saxpysmt()

void bl1_saxpysmt ( trans1_t  trans,
int  m,
int  n,
float *  alpha0,
float *  alpha1,
float *  a,
int  a_rs,
int  a_cs,
float *  beta,
float *  b,
int  b_rs,
int  b_cs 
)
14 {
15  float* a_begin;
16  float* b_begin;
17  float alpha_prod;
18  int lda, inca;
19  int ldb, incb;
20  int n_iter;
21  int n_elem;
22  int j;
23 
24  // Return early if possible.
25  if ( bl1_zero_dim2( m, n ) ) return;
26 
27  alpha_prod = (*alpha0) * (*alpha1);
28 
29  // Handle cases where A and B are vectors to ensure that the underlying axpy
30  // gets invoked only once.
31  if ( bl1_is_vector( m, n ) )
32  {
33  // Initialize with values appropriate for vectors.
34  n_iter = 1;
35  n_elem = bl1_vector_dim( m, n );
36  lda = 1; // multiplied by zero when n_iter == 1; not needed.
37  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
38  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
39  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
40  }
41  else // matrix case
42  {
43  // Initialize with optimal values for column-major storage.
44  n_iter = n;
45  n_elem = m;
46  lda = a_cs;
47  inca = a_rs;
48  ldb = b_cs;
49  incb = b_rs;
50 
51  // Handle the transposition of A.
52  if ( bl1_does_trans( trans ) )
53  {
54  bl1_swap_ints( lda, inca );
55  }
56 
57  // An optimization: if B is row-major and if A is effectively row-major
58  // after a possible transposition, then let's access the matrices by rows
59  // instead of by columns for increased spatial locality.
60  if ( bl1_is_row_storage( b_rs, b_cs ) )
61  {
62  if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
63  ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
64  {
65  bl1_swap_ints( n_iter, n_elem );
66  bl1_swap_ints( lda, inca );
67  bl1_swap_ints( ldb, incb );
68  }
69  }
70  }
71 
72  for ( j = 0; j < n_iter; j++ )
73  {
74  a_begin = a + j*lda;
75  b_begin = b + j*ldb;
76 
77  bl1_sscal( n_elem,
78  beta,
79  b_begin, incb );
80 
81  bl1_saxpy( n_elem,
82  &alpha_prod,
83  a_begin, inca,
84  b_begin, incb );
85  }
86 }

References bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_saxpy(), bl1_sscal(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Axpys_external().

◆ bl1_saxpysv()

void bl1_saxpysv ( int  n,
float *  alpha0,
float *  alpha1,
float *  x,
int  incx,
float *  beta,
float *  y,
int  incy 
)
14 {
15  float alpha_prod;
16 
17  // Return early if possible.
18  if ( bl1_zero_dim1( n ) ) return;
19 
20  alpha_prod = (*alpha0) * (*alpha1);
21 
22  bl1_sscal( n,
23  beta,
24  y, incy );
25 
26  bl1_saxpy( n,
27  &alpha_prod,
28  x, incx,
29  y, incy );
30 }

References bl1_saxpy(), bl1_sscal(), and bl1_zero_dim1().

Referenced by FLA_Lyap_h_ops_var2(), FLA_Lyap_h_ops_var3(), FLA_Lyap_h_ops_var4(), FLA_Lyap_n_ops_var2(), FLA_Lyap_n_ops_var3(), and FLA_Lyap_n_ops_var4().

◆ bl1_saxpyv()

void bl1_saxpyv ( conj1_t  conj,
int  n,
float *  alpha,
float *  x,
int  incx,
float *  y,
int  incy 
)
14 {
15  bl1_saxpy( n,
16  alpha,
17  x, incx,
18  y, incy );
19 }

References bl1_saxpy().

Referenced by bl1_saxpymrt(), bl1_strmvsx(), bl1_strsvsx(), FLA_Apply_H2_UT_l_ops_var1(), FLA_Apply_H2_UT_r_ops_var1(), FLA_Apply_HUD_UT_l_ops_var1(), FLA_Bidiag_UT_u_step_ofs_var2(), FLA_Bidiag_UT_u_step_ofs_var3(), FLA_Bidiag_UT_u_step_ofs_var4(), FLA_Bidiag_UT_u_step_ops_var2(), FLA_Bidiag_UT_u_step_ops_var3(), FLA_Bidiag_UT_u_step_ops_var4(), FLA_Bidiag_UT_u_step_ops_var5(), FLA_Eig_gest_il_ops_var1(), FLA_Eig_gest_il_ops_var2(), FLA_Eig_gest_il_ops_var3(), FLA_Eig_gest_il_ops_var4(), FLA_Eig_gest_il_ops_var5(), FLA_Eig_gest_iu_ops_var1(), FLA_Eig_gest_iu_ops_var2(), FLA_Eig_gest_iu_ops_var3(), FLA_Eig_gest_iu_ops_var4(), FLA_Eig_gest_iu_ops_var5(), FLA_Eig_gest_nl_ops_var1(), FLA_Eig_gest_nl_ops_var2(), FLA_Eig_gest_nl_ops_var4(), FLA_Eig_gest_nl_ops_var5(), FLA_Eig_gest_nu_ops_var1(), FLA_Eig_gest_nu_ops_var2(), FLA_Eig_gest_nu_ops_var4(), FLA_Eig_gest_nu_ops_var5(), FLA_Fused_Ahx_Ax_ops_var1(), FLA_Fused_Ahx_Axpy_Ax_ops_var1(), FLA_Fused_Gerc2_Ahx_Ax_ops_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_ops_var1(), FLA_Fused_Gerc2_ops_var1(), FLA_Fused_Her2_Ax_l_ops_var1(), FLA_Fused_Uhu_Yhu_Zhu_ops_var1(), FLA_Fused_UYx_ZVx_ops_var1(), FLA_Fused_UZhu_ZUhu_ops_var1(), FLA_Hess_UT_step_ofs_var2(), FLA_Hess_UT_step_ofs_var3(), FLA_Hess_UT_step_ofs_var4(), FLA_Hess_UT_step_ops_var2(), FLA_Hess_UT_step_ops_var3(), FLA_Hess_UT_step_ops_var4(), FLA_Hess_UT_step_ops_var5(), FLA_Tridiag_UT_l_step_ofs_var2(), FLA_Tridiag_UT_l_step_ofs_var3(), FLA_Tridiag_UT_l_step_ops_var1(), FLA_Tridiag_UT_l_step_ops_var2(), and FLA_Tridiag_UT_l_step_ops_var3().

◆ bl1_sccopymr()

void bl1_sccopymr ( uplo1_t  uplo,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
465 {
466  float* a_begin;
467  scomplex* b_begin;
468  int lda, inca;
469  int ldb, incb;
470  int n_iter;
471  int n_elem_max;
472  int n_elem;
473  int j;
474 
475  // Return early if possible.
476  if ( bl1_zero_dim2( m, n ) ) return;
477 
478  // We initialize for column-major.
479  n_iter = n;
480  n_elem_max = m;
481  lda = a_cs;
482  inca = a_rs;
483  ldb = b_cs;
484  incb = b_rs;
485 
486  // An optimization: if B is row-major, then let's access the matrix
487  // by rows instead of by columns for increased spatial locality.
488  if ( bl1_is_row_storage( b_rs, b_cs ) )
489  {
490  bl1_swap_ints( n_iter, n_elem_max );
491  bl1_swap_ints( lda, inca );
492  bl1_swap_ints( ldb, incb );
493  bl1_toggle_uplo( uplo );
494  }
495 
496 
497  if ( bl1_is_upper( uplo ) )
498  {
499  for ( j = 0; j < n_iter; j++ )
500  {
501  n_elem = bl1_min( j + 1, n_elem_max );
502  a_begin = a + j*lda;
503  b_begin = b + j*ldb;
504 
506  n_elem,
507  a_begin, inca,
508  b_begin, incb );
509  }
510  }
511  else // if ( bl1_is_lower( uplo ) )
512  {
513  for ( j = 0; j < n_iter; j++ )
514  {
515  n_elem = bl1_max( 0, n_elem_max - j );
516  a_begin = a + j*lda + j*inca;
517  b_begin = b + j*ldb + j*incb;
518 
519  if ( n_elem <= 0 ) break;
520 
522  n_elem,
523  a_begin, inca,
524  b_begin, incb );
525  }
526  }
527 }
void bl1_sccopyv(conj1_t conj, int m, float *x, int incx, scomplex *y, int incy)
Definition: bl1_copyv.c:124

References bl1_is_row_storage(), bl1_is_upper(), bl1_sccopyv(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Copyr_external().

◆ bl1_sccopymrt()

void bl1_sccopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
649 {
650  float* a_begin;
651  scomplex* b_begin;
652  int lda, inca;
653  int ldb, incb;
654  int n_iter;
655  int n_elem;
656  int n_elem_max;
657  int n_elem_is_descending;
658  int j;
659  conj1_t conj;
660 
661  // Return early if possible.
662  if ( bl1_zero_dim2( m, n ) ) return;
663 
664  // Initialize variables based on storage format of B and value of uplo.
665  if ( bl1_is_col_storage( b_rs, b_cs ) )
666  {
667  if ( bl1_is_lower( uplo ) )
668  {
669  n_iter = bl1_min( m, n );
670  n_elem_max = m;
671  lda = a_cs;
672  inca = a_rs;
673  ldb = b_cs;
674  incb = b_rs;
675  n_elem_is_descending = TRUE;
676  }
677  else // if ( bl1_is_upper( uplo ) )
678  {
679  n_iter = n;
680  n_elem_max = bl1_min( m, n );
681  lda = a_cs;
682  inca = a_rs;
683  ldb = b_cs;
684  incb = b_rs;
685  n_elem_is_descending = FALSE;
686  }
687  }
688  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
689  {
690  if ( bl1_is_lower( uplo ) )
691  {
692  n_iter = m;
693  n_elem_max = bl1_min( m, n );
694  lda = a_rs;
695  inca = a_cs;
696  ldb = b_rs;
697  incb = b_cs;
698  n_elem_is_descending = FALSE;
699  }
700  else // if ( bl1_is_upper( uplo ) )
701  {
702  n_iter = bl1_min( m, n );
703  n_elem_max = n;
704  lda = a_rs;
705  inca = a_cs;
706  ldb = b_rs;
707  incb = b_cs;
708  n_elem_is_descending = TRUE;
709  }
710  }
711 
712  // Swap lda and inca if we're doing a transpose.
713  if ( bl1_does_trans( trans ) )
714  {
715  bl1_swap_ints( lda, inca );
716  }
717 
718  // Extract conj component from trans parameter.
719  conj = bl1_proj_trans1_to_conj( trans );
720 
721  // Choose the loop based on whether n_elem will be shrinking or growing
722  // with each iteration.
723  if ( n_elem_is_descending )
724  {
725  for ( j = 0; j < n_iter; j++ )
726  {
727  n_elem = n_elem_max - j;
728  a_begin = a + j*lda + j*inca;
729  b_begin = b + j*ldb + j*incb;
730 
731  bl1_sccopyv( conj,
732  n_elem,
733  a_begin, inca,
734  b_begin, incb );
735  }
736  }
737  else // if ( n_elem_is_ascending )
738  {
739  for ( j = 0; j < n_iter; j++ )
740  {
741  n_elem = bl1_min( j + 1, n_elem_max );
742  a_begin = a + j*lda;
743  b_begin = b + j*ldb;
744 
745  bl1_sccopyv( conj,
746  n_elem,
747  a_begin, inca,
748  b_begin, incb );
749  }
750  }
751 }

References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_sccopyv(), and bl1_zero_dim2().

Referenced by FLA_Copyrt_external().

◆ bl1_sccopymt()

void bl1_sccopymt ( trans1_t  trans,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
563 {
564  float* a_begin;
565  scomplex* b_begin;
566  int lda, inca;
567  int ldb, incb;
568  int n_iter;
569  int n_elem;
570  int j;
571  conj1_t conj;
572 
573  // Return early if possible.
574  if ( bl1_zero_dim2( m, n ) ) return;
575 
576  // Handle cases where A and B are vectors to ensure that the underlying copy
577  // gets invoked only once.
578  if ( bl1_is_vector( m, n ) )
579  {
580  // Initialize with values appropriate for vectors.
581  n_iter = 1;
582  n_elem = bl1_vector_dim( m, n );
583  lda = 1; // multiplied by zero when n_iter == 1; not needed.
584  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
585  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
586  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
587  }
588  else // matrix case
589  {
590  // Initialize with optimal values for column-major storage of B.
591  n_iter = n;
592  n_elem = m;
593  lda = a_cs;
594  inca = a_rs;
595  ldb = b_cs;
596  incb = b_rs;
597 
598  // Handle the transposition of A.
599  if ( bl1_does_trans( trans ) )
600  {
601  bl1_swap_ints( lda, inca );
602  }
603 
604  // An optimization: if B is row-major, then let's access the matrix by rows
605  // instead of by columns for increased spatial locality.
606  if ( bl1_is_row_storage( b_rs, b_cs ) )
607  {
608  bl1_swap_ints( n_iter, n_elem );
609  bl1_swap_ints( lda, inca );
610  bl1_swap_ints( ldb, incb );
611  }
612  }
613 
614  // Extract conj component from trans parameter.
615  conj = bl1_proj_trans1_to_conj( trans );
616 
617  for ( j = 0; j < n_iter; ++j )
618  {
619  a_begin = a + j*lda;
620  b_begin = b + j*ldb;
621 
622  bl1_sccopyv( conj,
623  n_elem,
624  a_begin, inca,
625  b_begin, incb );
626  }
627 }

References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_sccopyv(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

◆ bl1_sccopyv()

void bl1_sccopyv ( conj1_t  conj,
int  m,
float *  x,
int  incx,
scomplex y,
int  incy 
)
125 {
126  float* chi;
127  scomplex* psi;
128  int i;
129 
130  // Return early if possible.
131  if ( bl1_zero_dim1( m ) ) return;
132 
133  // Initialize pointers.
134  chi = x;
135  psi = y;
136 
137  for ( i = 0; i < m; ++i )
138  {
139  psi->real = *chi;
140  psi->imag = 0.0F;
141 
142  chi += incx;
143  psi += incy;
144  }
145 }

References bl1_zero_dim1(), i, scomplex::imag, and scomplex::real.

Referenced by bl1_sccopymr(), bl1_sccopymrt(), and bl1_sccopymt().

◆ bl1_sconjm()

void bl1_sconjm ( int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs 
)
14 {
15  return;
16 }

◆ bl1_sconjmr()

void bl1_sconjmr ( uplo1_t  uplo,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs 
)
14 {
15  return;
16 }

◆ bl1_sconjv()

void bl1_sconjv ( int  m,
float *  x,
int  incx 
)

◆ bl1_scopy()

void bl1_scopy ( int  m,
float *  x,
int  incx,
float *  y,
int  incy 
)
14 {
15 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
16  cblas_scopy( m,
17  x, incx,
18  y, incy );
19 #else
20  F77_scopy( &m,
21  x, &incx,
22  y, &incy );
23 #endif
24 }
void F77_scopy(int *n, float *x, int *incx, float *y, int *incy)
void cblas_scopy(const int N, const float *X, const int incX, float *Y, const int incY)

References cblas_scopy(), and F77_scopy().

Referenced by bl1_scopymr(), bl1_scopymt(), bl1_scopyv(), FLA_Obj_extract_imag_part(), FLA_Obj_extract_real_part(), and FLA_SA_LU_unb().

◆ bl1_scopymr()

void bl1_scopymr ( uplo1_t  uplo,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)
14 {
15  float* a_begin;
16  float* b_begin;
17  int lda, inca;
18  int ldb, incb;
19  int n_iter;
20  int n_elem_max;
21  int n_elem;
22  int j;
23 
24  // Return early if possible.
25  if ( bl1_zero_dim2( m, n ) ) return;
26 
27  // We initialize for column-major.
28  n_iter = n;
29  n_elem_max = m;
30  lda = a_cs;
31  inca = a_rs;
32  ldb = b_cs;
33  incb = b_rs;
34 
35  // An optimization: if A and B are both row-major, then let's access the
36  // matrices by rows instead of by columns for increased spatial locality.
37  if ( bl1_is_row_storage( b_rs, b_cs ) && bl1_is_row_storage( a_rs, a_cs ) )
38  {
39  bl1_swap_ints( n_iter, n_elem_max );
40  bl1_swap_ints( lda, inca );
41  bl1_swap_ints( ldb, incb );
42  bl1_toggle_uplo( uplo );
43  }
44 
45 
46  if ( bl1_is_upper( uplo ) )
47  {
48  for ( j = 0; j < n_iter; j++ )
49  {
50  n_elem = bl1_min( j + 1, n_elem_max );
51  a_begin = a + j*lda;
52  b_begin = b + j*ldb;
53 
54  bl1_scopy( n_elem,
55  a_begin, inca,
56  b_begin, incb );
57  }
58  }
59  else // if ( bl1_is_lower( uplo ) )
60  {
61  for ( j = 0; j < n_iter; j++ )
62  {
63  n_elem = bl1_max( 0, n_elem_max - j );
64  a_begin = a + j*lda + j*inca;
65  b_begin = b + j*ldb + j*incb;
66 
67  if ( n_elem <= 0 ) break;
68 
69  bl1_scopy( n_elem,
70  a_begin, inca,
71  b_begin, incb );
72  }
73  }
74 }
void bl1_scopy(int m, float *x, int incx, float *y, int incy)
Definition: bl1_copy.c:13

References bl1_is_row_storage(), bl1_is_upper(), bl1_scopy(), and bl1_zero_dim2().

Referenced by bl1_screate_contigmr(), bl1_sfree_saved_contigmr(), and FLA_Copyr_external().

◆ bl1_scopymrt()

void bl1_scopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)
14 {
15  float* a_begin;
16  float* b_begin;
17  int lda, inca;
18  int ldb, incb;
19  int n_iter;
20  int n_elem;
21  int n_elem_max;
22  int n_elem_is_descending;
23  int j;
24  conj1_t conj;
25 
26  // Return early if possible.
27  if ( bl1_zero_dim2( m, n ) ) return;
28 
29  // Initialize variables based on storage format of B and value of uplo.
30  if ( bl1_is_col_storage( b_rs, b_cs ) )
31  {
32  if ( bl1_is_lower( uplo ) )
33  {
34  n_iter = bl1_min( m, n );
35  n_elem_max = m;
36  lda = a_cs;
37  inca = a_rs;
38  ldb = b_cs;
39  incb = b_rs;
40  n_elem_is_descending = TRUE;
41  }
42  else // if ( bl1_is_upper( uplo ) )
43  {
44  n_iter = n;
45  n_elem_max = bl1_min( m, n );
46  lda = a_cs;
47  inca = a_rs;
48  ldb = b_cs;
49  incb = b_rs;
50  n_elem_is_descending = FALSE;
51  }
52  }
53  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
54  {
55  if ( bl1_is_lower( uplo ) )
56  {
57  n_iter = m;
58  n_elem_max = bl1_min( m, n );
59  lda = a_rs;
60  inca = a_cs;
61  ldb = b_rs;
62  incb = b_cs;
63  n_elem_is_descending = FALSE;
64  }
65  else // if ( bl1_is_upper( uplo ) )
66  {
67  n_iter = bl1_min( m, n );
68  n_elem_max = n;
69  lda = a_rs;
70  inca = a_cs;
71  ldb = b_rs;
72  incb = b_cs;
73  n_elem_is_descending = TRUE;
74  }
75  }
76 
77  // Swap lda and inca if we're doing a transpose.
78  if ( bl1_does_trans( trans ) )
79  {
80  bl1_swap_ints( lda, inca );
81  }
82 
83  // Extract conj component from trans parameter.
84  conj = bl1_proj_trans1_to_conj( trans );
85 
86  // Choose the loop based on whether n_elem will be shrinking or growing
87  // with each iteration.
88  if ( n_elem_is_descending )
89  {
90  for ( j = 0; j < n_iter; j++ )
91  {
92  n_elem = n_elem_max - j;
93  a_begin = a + j*lda + j*inca;
94  b_begin = b + j*ldb + j*incb;
95 
96  bl1_scopyv( conj,
97  n_elem,
98  a_begin, inca,
99  b_begin, incb );
100  }
101  }
102  else // if ( n_elem_is_ascending )
103  {
104  for ( j = 0; j < n_iter; j++ )
105  {
106  n_elem = bl1_min( j + 1, n_elem_max );
107  a_begin = a + j*lda;
108  b_begin = b + j*ldb;
109 
110  bl1_scopyv( conj,
111  n_elem,
112  a_begin, inca,
113  b_begin, incb );
114  }
115  }
116 }
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition: bl1_copyv.c:35

References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_scopyv(), and bl1_zero_dim2().

Referenced by FLA_Copyrt_external(), FLA_Lyap_h_ops_var1(), FLA_Lyap_h_ops_var2(), FLA_Lyap_h_ops_var3(), FLA_Lyap_h_ops_var4(), FLA_Lyap_n_ops_var1(), FLA_Lyap_n_ops_var2(), FLA_Lyap_n_ops_var3(), and FLA_Lyap_n_ops_var4().

◆ bl1_scopymt()

void bl1_scopymt ( trans1_t  trans,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)
82 {
83  float* a_begin;
84  float* b_begin;
85  int lda, inca;
86  int ldb, incb;
87  int n_iter;
88  int n_elem;
89  int j;
90 
91  // Return early if possible.
92  if ( bl1_zero_dim2( m, n ) ) return;
93 
94  // Handle cases where A and B are vectors to ensure that the underlying copy
95  // gets invoked only once.
96  if ( bl1_is_vector( m, n ) )
97  {
98  // Initialize with values appropriate for vectors.
99  n_iter = 1;
100  n_elem = bl1_vector_dim( m, n );
101  lda = 1; // multiplied by zero when n_iter == 1; not needed.
102  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
103  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
104  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
105  }
106  else // matrix case
107  {
108  // Initialize with optimal values for column-major storage.
109  n_iter = n;
110  n_elem = m;
111  lda = a_cs;
112  inca = a_rs;
113  ldb = b_cs;
114  incb = b_rs;
115 
116  // Handle the transposition of A.
117  if ( bl1_does_trans( trans ) )
118  {
119  bl1_swap_ints( lda, inca );
120  }
121 
122  // An optimization: if B is row-major and if A is effectively row-major
123  // after a possible transposition, then let's access the matrix by rows
124  // instead of by columns for increased spatial locality.
125  if ( bl1_is_row_storage( b_rs, b_cs ) )
126  {
127  if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
128  ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
129  {
130  bl1_swap_ints( n_iter, n_elem );
131  bl1_swap_ints( lda, inca );
132  bl1_swap_ints( ldb, incb );
133  }
134  }
135  }
136 
137  for ( j = 0; j < n_iter; j++ )
138  {
139  a_begin = a + j*lda;
140  b_begin = b + j*ldb;
141 
142  bl1_scopy( n_elem,
143  a_begin, inca,
144  b_begin, incb );
145  }
146 }

References bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_scopy(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_screate_contigm(), bl1_screate_contigmt(), bl1_sfree_saved_contigm(), bl1_sfree_saved_contigmsr(), bl1_ssymm(), bl1_ssyr2k(), bl1_strmmsx(), bl1_strsmsx(), FLA_Copy_external(), and FLA_Copyt_external().

◆ bl1_scopyv()

void bl1_scopyv ( conj1_t  conj,
int  m,
float *  x,
int  incx,
float *  y,
int  incy 
)

◆ bl1_sdcopymr()

void bl1_sdcopymr ( uplo1_t  uplo,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)
335 {
336  float* a_begin;
337  double* b_begin;
338  int lda, inca;
339  int ldb, incb;
340  int n_iter;
341  int n_elem_max;
342  int n_elem;
343  int j;
344 
345  // Return early if possible.
346  if ( bl1_zero_dim2( m, n ) ) return;
347 
348  // We initialize for column-major.
349  n_iter = n;
350  n_elem_max = m;
351  lda = a_cs;
352  inca = a_rs;
353  ldb = b_cs;
354  incb = b_rs;
355 
356  // An optimization: if B is row-major, then let's access the matrix
357  // by rows instead of by columns for increased spatial locality.
358  if ( bl1_is_row_storage( b_rs, b_cs ) )
359  {
360  bl1_swap_ints( n_iter, n_elem_max );
361  bl1_swap_ints( lda, inca );
362  bl1_swap_ints( ldb, incb );
363  bl1_toggle_uplo( uplo );
364  }
365 
366 
367  if ( bl1_is_upper( uplo ) )
368  {
369  for ( j = 0; j < n_iter; j++ )
370  {
371  n_elem = bl1_min( j + 1, n_elem_max );
372  a_begin = a + j*lda;
373  b_begin = b + j*ldb;
374 
376  n_elem,
377  a_begin, inca,
378  b_begin, incb );
379  }
380  }
381  else // if ( bl1_is_lower( uplo ) )
382  {
383  for ( j = 0; j < n_iter; j++ )
384  {
385  n_elem = bl1_max( 0, n_elem_max - j );
386  a_begin = a + j*lda + j*inca;
387  b_begin = b + j*ldb + j*incb;
388 
389  if ( n_elem <= 0 ) break;
390 
392  n_elem,
393  a_begin, inca,
394  b_begin, incb );
395  }
396  }
397 }
void bl1_sdcopyv(conj1_t conj, int m, float *x, int incx, double *y, int incy)
Definition: bl1_copyv.c:80

References bl1_is_row_storage(), bl1_is_upper(), bl1_sdcopyv(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Copyr_external().

◆ bl1_sdcopymrt()

void bl1_sdcopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)
543 {
544  float* a_begin;
545  double* b_begin;
546  int lda, inca;
547  int ldb, incb;
548  int n_iter;
549  int n_elem;
550  int n_elem_max;
551  int n_elem_is_descending;
552  int j;
553  conj1_t conj;
554 
555  // Return early if possible.
556  if ( bl1_zero_dim2( m, n ) ) return;
557 
558  // Initialize variables based on storage format of B and value of uplo.
559  if ( bl1_is_col_storage( b_rs, b_cs ) )
560  {
561  if ( bl1_is_lower( uplo ) )
562  {
563  n_iter = bl1_min( m, n );
564  n_elem_max = m;
565  lda = a_cs;
566  inca = a_rs;
567  ldb = b_cs;
568  incb = b_rs;
569  n_elem_is_descending = TRUE;
570  }
571  else // if ( bl1_is_upper( uplo ) )
572  {
573  n_iter = n;
574  n_elem_max = bl1_min( m, n );
575  lda = a_cs;
576  inca = a_rs;
577  ldb = b_cs;
578  incb = b_rs;
579  n_elem_is_descending = FALSE;
580  }
581  }
582  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
583  {
584  if ( bl1_is_lower( uplo ) )
585  {
586  n_iter = m;
587  n_elem_max = bl1_min( m, n );
588  lda = a_rs;
589  inca = a_cs;
590  ldb = b_rs;
591  incb = b_cs;
592  n_elem_is_descending = FALSE;
593  }
594  else // if ( bl1_is_upper( uplo ) )
595  {
596  n_iter = bl1_min( m, n );
597  n_elem_max = n;
598  lda = a_rs;
599  inca = a_cs;
600  ldb = b_rs;
601  incb = b_cs;
602  n_elem_is_descending = TRUE;
603  }
604  }
605 
606  // Swap lda and inca if we're doing a transpose.
607  if ( bl1_does_trans( trans ) )
608  {
609  bl1_swap_ints( lda, inca );
610  }
611 
612  // Extract conj component from trans parameter.
613  conj = bl1_proj_trans1_to_conj( trans );
614 
615  // Choose the loop based on whether n_elem will be shrinking or growing
616  // with each iteration.
617  if ( n_elem_is_descending )
618  {
619  for ( j = 0; j < n_iter; j++ )
620  {
621  n_elem = n_elem_max - j;
622  a_begin = a + j*lda + j*inca;
623  b_begin = b + j*ldb + j*incb;
624 
625  bl1_sdcopyv( conj,
626  n_elem,
627  a_begin, inca,
628  b_begin, incb );
629  }
630  }
631  else // if ( n_elem_is_ascending )
632  {
633  for ( j = 0; j < n_iter; j++ )
634  {
635  n_elem = bl1_min( j + 1, n_elem_max );
636  a_begin = a + j*lda;
637  b_begin = b + j*ldb;
638 
639  bl1_sdcopyv( conj,
640  n_elem,
641  a_begin, inca,
642  b_begin, incb );
643  }
644  }
645 }

References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_sdcopyv(), and bl1_zero_dim2().

Referenced by FLA_Copyrt_external().

◆ bl1_sdcopymt()

void bl1_sdcopymt ( trans1_t  trans,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)
429 {
430  float* a_begin;
431  double* b_begin;
432  int lda, inca;
433  int ldb, incb;
434  int n_iter;
435  int n_elem;
436  int j;
437  conj1_t conj;
438 
439  // Return early if possible.
440  if ( bl1_zero_dim2( m, n ) ) return;
441 
442  // Handle cases where A and B are vectors to ensure that the underlying copy
443  // gets invoked only once.
444  if ( bl1_is_vector( m, n ) )
445  {
446  // Initialize with values appropriate for vectors.
447  n_iter = 1;
448  n_elem = bl1_vector_dim( m, n );
449  lda = 1; // multiplied by zero when n_iter == 1; not needed.
450  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
451  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
452  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
453  }
454  else // matrix case
455  {
456  // Initialize with optimal values for column-major storage of B.
457  n_iter = n;
458  n_elem = m;
459  lda = a_cs;
460  inca = a_rs;
461  ldb = b_cs;
462  incb = b_rs;
463 
464  // Handle the transposition of A.
465  if ( bl1_does_trans( trans ) )
466  {
467  bl1_swap_ints( lda, inca );
468  }
469 
470  // An optimization: if B is row-major, then let's access the matrix by rows
471  // instead of by columns for increased spatial locality.
472  if ( bl1_is_row_storage( b_rs, b_cs ) )
473  {
474  bl1_swap_ints( n_iter, n_elem );
475  bl1_swap_ints( lda, inca );
476  bl1_swap_ints( ldb, incb );
477  }
478  }
479 
480  // Extract conj component from trans parameter.
481  conj = bl1_proj_trans1_to_conj( trans );
482 
483  for ( j = 0; j < n_iter; ++j )
484  {
485  a_begin = a + j*lda;
486  b_begin = b + j*ldb;
487 
488  bl1_sdcopyv( conj,
489  n_elem,
490  a_begin, inca,
491  b_begin, incb );
492  }
493 }

References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_sdcopyv(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

◆ bl1_sdcopyv()

void bl1_sdcopyv ( conj1_t  conj,
int  m,
float *  x,
int  incx,
double *  y,
int  incy 
)
81 {
82  float* chi;
83  double* psi;
84  int i;
85 
86  // Return early if possible.
87  if ( bl1_zero_dim1( m ) ) return;
88 
89  // Initialize pointers.
90  chi = x;
91  psi = y;
92 
93  for ( i = 0; i < m; ++i )
94  {
95  *psi = *chi;
96 
97  chi += incx;
98  psi += incy;
99  }
100 }

References bl1_zero_dim1(), and i.

Referenced by bl1_sdcopymr(), bl1_sdcopymrt(), and bl1_sdcopymt().

◆ bl1_sdot()

void bl1_sdot ( conj1_t  conj,
int  n,
float *  x,
int  incx,
float *  y,
int  incy,
float *  rho 
)

◆ bl1_sdot2s()

void bl1_sdot2s ( conj1_t  conj,
int  n,
float *  alpha,
float *  x,
int  incx,
float *  y,
int  incy,
float *  beta,
float *  rho 
)
14 {
15  float dot;
16 
17  bl1_sdot( conj,
18  n,
19  x, incx,
20  y, incy,
21  &dot );
22 
23  *rho = (*beta) * (*rho) + 2.0F * (*alpha) * dot;
24 }
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition: bl1_dot.c:13

References bl1_sdot(), and rho.

Referenced by FLA_Dot2cs_external(), FLA_Dot2s_external(), FLA_Eig_gest_il_ops_var1(), FLA_Eig_gest_il_ops_var2(), FLA_Eig_gest_il_ops_var3(), FLA_Eig_gest_iu_ops_var1(), FLA_Eig_gest_iu_ops_var2(), FLA_Eig_gest_iu_ops_var3(), FLA_Eig_gest_nl_ops_var1(), FLA_Eig_gest_nl_ops_var2(), FLA_Eig_gest_nu_ops_var1(), FLA_Eig_gest_nu_ops_var2(), FLA_Lyap_h_ops_var1(), FLA_Lyap_h_ops_var2(), FLA_Lyap_h_ops_var3(), FLA_Lyap_n_ops_var1(), FLA_Lyap_n_ops_var2(), and FLA_Lyap_n_ops_var3().

◆ bl1_sdots()

void bl1_sdots ( conj1_t  conj,
int  n,
float *  alpha,
float *  x,
int  incx,
float *  y,
int  incy,
float *  beta,
float *  rho 
)

◆ bl1_sfnorm()

void bl1_sfnorm ( int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
float *  norm 
)
14 {
15  float* a_ij;
16  float sum;
17  int lda, inca;
18  int n_iter;
19  int n_elem;
20  int i, j;
21 
22  // Return early if possible.
23  if ( bl1_zero_dim2( m, n ) ) return;
24 
25  // Handle cases where A is a vector separately.
26  if ( bl1_is_vector( m, n ) )
27  {
28  // Initialize with values appropriate for vectors.
29  n_iter = 1;
30  n_elem = bl1_vector_dim( m, n );
31  lda = 1; // multiplied by zero when n_iter == 1; not needed.
32  inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
33  }
34  else // matrix case
35  {
36  // Initialize with optimal values for column-major storage.
37  n_iter = n;
38  n_elem = m;
39  lda = a_cs;
40  inca = a_rs;
41 
42  // An optimization: if A is row-major, then let's access the matrix by
43  // rows instead of by columns for increased spatial locality.
44  if ( bl1_is_row_storage( a_rs, a_cs ) )
45  {
46  bl1_swap_ints( n_iter, n_elem );
47  bl1_swap_ints( lda, inca );
48  }
49  }
50 
51  // Initialize the accumulator variable.
52  sum = 0.0F;
53 
54  for ( j = 0; j < n_iter; j++ )
55  {
56  for ( i = 0; i < n_elem; i++ )
57  {
58  a_ij = a + i*inca + j*lda;
59  sum += (*a_ij) * (*a_ij);
60  }
61  }
62 
63  // Compute the norm and store the result.
64  *norm = ( float ) sqrt( sum );
65 }

References bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), BLIS1_NO_TRANSPOSE, and i.

Referenced by FLA_Norm_frob().

◆ bl1_sinvscalm()

void bl1_sinvscalm ( conj1_t  conj,
int  m,
int  n,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs 
)
14 {
15  float alpha_inv;
16  float* a_begin;
17  int lda, inca;
18  int n_iter;
19  int n_elem;
20  int j;
21 
22  // Return early if possible.
23  if ( bl1_zero_dim2( m, n ) ) return;
24  if ( bl1_seq1( alpha ) ) return;
25 
26  // Handle cases where A is a vector to ensure that the underlying axpy
27  // gets invoked only once.
28  if ( bl1_is_vector( m, n ) )
29  {
30  // Initialize with values appropriate for a vector.
31  n_iter = 1;
32  n_elem = bl1_vector_dim( m, n );
33  lda = 1; // multiplied by zero when n_iter == 1; not needed.
34  inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
35  }
36  else // matrix case
37  {
38  // Initialize with optimal values for column-major storage.
39  n_iter = n;
40  n_elem = m;
41  lda = a_cs;
42  inca = a_rs;
43 
44  // An optimization: if A is row-major, then let's access the matrix
45  // by rows instead of by columns to increase spatial locality.
46  if ( bl1_is_row_storage( a_rs, a_cs ) )
47  {
48  bl1_swap_ints( n_iter, n_elem );
49  bl1_swap_ints( lda, inca );
50  }
51  }
52 
53  bl1_sinvert2s( conj, alpha, &alpha_inv );
54 
55  for ( j = 0; j < n_iter; j++ )
56  {
57  a_begin = a + j*lda;
58 
59  bl1_sscal( n_elem,
60  &alpha_inv,
61  a_begin, inca );
62  }
63 }

References bl1_is_row_storage(), bl1_is_vector(), bl1_sinvert2s(), bl1_sscal(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Inv_scal_external(), and FLA_Inv_scalc_external().

◆ bl1_sinvscalv()

void bl1_sinvscalv ( conj1_t  conj,
int  n,
float *  alpha,
float *  x,
int  incx 
)

◆ bl1_snrm2()

void bl1_snrm2 ( int  n,
float *  x,
int  incx,
float *  norm 
)
14 {
15 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
16  *norm = cblas_snrm2( n,
17  x, incx );
18 #else
19  *norm = F77_snrm2( &n,
20  x, &incx );
21 #endif
22 }
float F77_snrm2(int *n, float *x, int *incx)
float cblas_snrm2(const int N, const float *X, const int incX)

References cblas_snrm2(), and F77_snrm2().

Referenced by FLA_Househ2_UT_l_ops(), FLA_Househ2s_UT_l_ops(), FLA_Househ3UD_UT_ops(), and FLA_Nrm2_external().

◆ bl1_sscal()

void bl1_sscal ( int  n,
float *  alpha,
float *  x,
int  incx 
)
14 {
15 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
16  cblas_sscal( n,
17  *alpha,
18  x, incx );
19 #else
20  F77_sscal( &n,
21  alpha,
22  x, &incx );
23 #endif
24 }
void F77_sscal(int *n, float *alpha, float *y, int *incy)
void cblas_sscal(const int N, const float alpha, float *X, const int incX)

References cblas_sscal(), and F77_sscal().

Referenced by bl1_cconjm(), bl1_cconjmr(), bl1_cconjv(), bl1_saxpysmt(), bl1_saxpysv(), bl1_sinvscalm(), bl1_sinvscalv(), bl1_sscalm(), bl1_sscalmr(), bl1_sscalv(), and FLA_SA_LU_unb().

◆ bl1_sscalm()

void bl1_sscalm ( conj1_t  conj,
int  m,
int  n,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs 
)
14 {
15  float alpha_conj;
16  float* a_begin;
17  int lda, inca;
18  int n_iter;
19  int n_elem;
20  int j;
21 
22  // Return early if possible.
23  if ( bl1_zero_dim2( m, n ) ) return;
24  if ( bl1_seq1( alpha ) ) return;
25 
26  // Handle cases where A is a vector to ensure that the underlying axpy
27  // gets invoked only once.
28  if ( bl1_is_vector( m, n ) )
29  {
30  // Initialize with values appropriate for a vector.
31  n_iter = 1;
32  n_elem = bl1_vector_dim( m, n );
33  lda = 1; // multiplied by zero when n_iter == 1; not needed.
34  inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
35  }
36  else // matrix case
37  {
38  // Initialize with optimal values for column-major storage.
39  n_iter = n;
40  n_elem = m;
41  lda = a_cs;
42  inca = a_rs;
43 
44  // An optimization: if A is row-major, then let's access the matrix
45  // by rows instead of by columns to increase spatial locality.
46  if ( bl1_is_row_storage( a_rs, a_cs ) )
47  {
48  bl1_swap_ints( n_iter, n_elem );
49  bl1_swap_ints( lda, inca );
50  }
51  }
52 
53  bl1_scopys( conj, alpha, &alpha_conj );
54 
55  for ( j = 0; j < n_iter; j++ )
56  {
57  a_begin = a + j*lda;
58 
59  bl1_sscal( n_elem,
60  &alpha_conj,
61  a_begin, inca );
62  }
63 }

References bl1_is_row_storage(), bl1_is_vector(), bl1_sscal(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_sgemm(), bl1_ssymm(), bl1_strmmsx(), bl1_strsmsx(), FLA_Lyap_h_ops_var1(), FLA_Lyap_h_ops_var2(), FLA_Lyap_h_ops_var3(), FLA_Lyap_h_ops_var4(), FLA_Lyap_n_ops_var1(), FLA_Lyap_n_ops_var2(), FLA_Lyap_n_ops_var3(), FLA_Lyap_n_ops_var4(), FLA_Scal_external(), and FLA_Scalc_external().

◆ bl1_sscalmr()

void bl1_sscalmr ( uplo1_t  uplo,
int  m,
int  n,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs 
)
14 {
15  float* a_begin;
16  int lda, inca;
17  int n_iter;
18  int n_elem_max;
19  int n_elem;
20  int j;
21 
22  // Return early if possible.
23  if ( bl1_zero_dim2( m, n ) ) return;
24  if ( bl1_seq1( alpha ) ) return;
25 
26  // We initialize for column-major.
27  n_iter = n;
28  n_elem_max = m;
29  lda = a_cs;
30  inca = a_rs;
31 
32  // An optimization: if A is row-major, then let's access the matrix
33  // by rows instead of by columns to increase spatial locality.
34  if ( bl1_is_row_storage( a_rs, a_cs ) )
35  {
36  bl1_swap_ints( n_iter, n_elem_max );
37  bl1_swap_ints( lda, inca );
38  bl1_toggle_uplo( uplo );
39  }
40 
41  if ( bl1_is_upper( uplo ) )
42  {
43  for ( j = 0; j < n_iter; j++ )
44  {
45  n_elem = bl1_min( j + 1, n_elem_max );
46  a_begin = a + j*lda;
47 
48  bl1_sscal( n_elem,
49  alpha,
50  a_begin, inca );
51  }
52  }
53  else // if ( bl1_is_lower( uplo ) )
54  {
55  for ( j = 0; j < n_iter; j++ )
56  {
57  n_elem = bl1_max( 0, n_elem_max - j );
58  a_begin = a + j*lda + j*inca;
59 
60  if ( n_elem <= 0 ) break;
61 
62  bl1_sscal( n_elem,
63  alpha,
64  a_begin, inca );
65  }
66  }
67 }

References bl1_is_row_storage(), bl1_is_upper(), bl1_sscal(), and bl1_zero_dim2().

Referenced by FLA_Scalr_external().

◆ bl1_sscalv()

void bl1_sscalv ( conj1_t  conj,
int  n,
float *  alpha,
float *  x,
int  incx 
)

◆ bl1_sscopymr()

void bl1_sscopymr ( uplo1_t  uplo,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)
269 {
270  float* a_begin;
271  float* b_begin;
272  int lda, inca;
273  int ldb, incb;
274  int n_iter;
275  int n_elem_max;
276  int n_elem;
277  int j;
278 
279  // Return early if possible.
280  if ( bl1_zero_dim2( m, n ) ) return;
281 
282  // We initialize for column-major.
283  n_iter = n;
284  n_elem_max = m;
285  lda = a_cs;
286  inca = a_rs;
287  ldb = b_cs;
288  incb = b_rs;
289 
290  // An optimization: if B is row-major, then let's access the matrix
291  // by rows instead of by columns for increased spatial locality.
292  if ( bl1_is_row_storage( b_rs, b_cs ) )
293  {
294  bl1_swap_ints( n_iter, n_elem_max );
295  bl1_swap_ints( lda, inca );
296  bl1_swap_ints( ldb, incb );
297  bl1_toggle_uplo( uplo );
298  }
299 
300 
301  if ( bl1_is_upper( uplo ) )
302  {
303  for ( j = 0; j < n_iter; j++ )
304  {
305  n_elem = bl1_min( j + 1, n_elem_max );
306  a_begin = a + j*lda;
307  b_begin = b + j*ldb;
308 
310  n_elem,
311  a_begin, inca,
312  b_begin, incb );
313  }
314  }
315  else // if ( bl1_is_lower( uplo ) )
316  {
317  for ( j = 0; j < n_iter; j++ )
318  {
319  n_elem = bl1_max( 0, n_elem_max - j );
320  a_begin = a + j*lda + j*inca;
321  b_begin = b + j*ldb + j*incb;
322 
323  if ( n_elem <= 0 ) break;
324 
326  n_elem,
327  a_begin, inca,
328  b_begin, incb );
329  }
330  }
331 }

References bl1_is_row_storage(), bl1_is_upper(), bl1_scopyv(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

◆ bl1_sscopymrt()

void bl1_sscopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)
437 {
438  float* a_begin;
439  float* b_begin;
440  int lda, inca;
441  int ldb, incb;
442  int n_iter;
443  int n_elem;
444  int n_elem_max;
445  int n_elem_is_descending;
446  int j;
447  conj1_t conj;
448 
449  // Return early if possible.
450  if ( bl1_zero_dim2( m, n ) ) return;
451 
452  // Initialize variables based on storage format of B and value of uplo.
453  if ( bl1_is_col_storage( b_rs, b_cs ) )
454  {
455  if ( bl1_is_lower( uplo ) )
456  {
457  n_iter = bl1_min( m, n );
458  n_elem_max = m;
459  lda = a_cs;
460  inca = a_rs;
461  ldb = b_cs;
462  incb = b_rs;
463  n_elem_is_descending = TRUE;
464  }
465  else // if ( bl1_is_upper( uplo ) )
466  {
467  n_iter = n;
468  n_elem_max = bl1_min( m, n );
469  lda = a_cs;
470  inca = a_rs;
471  ldb = b_cs;
472  incb = b_rs;
473  n_elem_is_descending = FALSE;
474  }
475  }
476  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
477  {
478  if ( bl1_is_lower( uplo ) )
479  {
480  n_iter = m;
481  n_elem_max = bl1_min( m, n );
482  lda = a_rs;
483  inca = a_cs;
484  ldb = b_rs;
485  incb = b_cs;
486  n_elem_is_descending = FALSE;
487  }
488  else // if ( bl1_is_upper( uplo ) )
489  {
490  n_iter = bl1_min( m, n );
491  n_elem_max = n;
492  lda = a_rs;
493  inca = a_cs;
494  ldb = b_rs;
495  incb = b_cs;
496  n_elem_is_descending = TRUE;
497  }
498  }
499 
500  // Swap lda and inca if we're doing a transpose.
501  if ( bl1_does_trans( trans ) )
502  {
503  bl1_swap_ints( lda, inca );
504  }
505 
506  // Extract conj component from trans parameter.
507  conj = bl1_proj_trans1_to_conj( trans );
508 
509  // Choose the loop based on whether n_elem will be shrinking or growing
510  // with each iteration.
511  if ( n_elem_is_descending )
512  {
513  for ( j = 0; j < n_iter; j++ )
514  {
515  n_elem = n_elem_max - j;
516  a_begin = a + j*lda + j*inca;
517  b_begin = b + j*ldb + j*incb;
518 
519  bl1_scopyv( conj,
520  n_elem,
521  a_begin, inca,
522  b_begin, incb );
523  }
524  }
525  else // if ( n_elem_is_ascending )
526  {
527  for ( j = 0; j < n_iter; j++ )
528  {
529  n_elem = bl1_min( j + 1, n_elem_max );
530  a_begin = a + j*lda;
531  b_begin = b + j*ldb;
532 
533  bl1_scopyv( conj,
534  n_elem,
535  a_begin, inca,
536  b_begin, incb );
537  }
538  }
539 }

References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_scopyv(), and bl1_zero_dim2().

◆ bl1_sscopymt()

void bl1_sscopymt ( trans1_t  trans,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)
361 {
362  float* a_begin;
363  float* b_begin;
364  int lda, inca;
365  int ldb, incb;
366  int n_iter;
367  int n_elem;
368  int j;
369  conj1_t conj;
370 
371  // Return early if possible.
372  if ( bl1_zero_dim2( m, n ) ) return;
373 
374  // Handle cases where A and B are vectors to ensure that the underlying copy
375  // gets invoked only once.
376  if ( bl1_is_vector( m, n ) )
377  {
378  // Initialize with values appropriate for vectors.
379  n_iter = 1;
380  n_elem = bl1_vector_dim( m, n );
381  lda = 1; // multiplied by zero when n_iter == 1; not needed.
382  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
383  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
384  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
385  }
386  else // matrix case
387  {
388  // Initialize with optimal values for column-major storage of B.
389  n_iter = n;
390  n_elem = m;
391  lda = a_cs;
392  inca = a_rs;
393  ldb = b_cs;
394  incb = b_rs;
395 
396  // Handle the transposition of A.
397  if ( bl1_does_trans( trans ) )
398  {
399  bl1_swap_ints( lda, inca );
400  }
401 
402  // An optimization: if B is row-major, then let's access the matrix by rows
403  // instead of by columns for increased spatial locality.
404  if ( bl1_is_row_storage( b_rs, b_cs ) )
405  {
406  bl1_swap_ints( n_iter, n_elem );
407  bl1_swap_ints( lda, inca );
408  bl1_swap_ints( ldb, incb );
409  }
410  }
411 
412  // Extract conj component from trans parameter.
413  conj = bl1_proj_trans1_to_conj( trans );
414 
415  for ( j = 0; j < n_iter; ++j )
416  {
417  a_begin = a + j*lda;
418  b_begin = b + j*ldb;
419 
420  bl1_scopyv( conj,
421  n_elem,
422  a_begin, inca,
423  b_begin, incb );
424  }
425 }

References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_scopyv(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

◆ bl1_sswap()

void bl1_sswap ( int  n,
float *  x,
int  incx,
float *  y,
int  incy 
)
14 {
15 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
16  cblas_sswap( n,
17  x, incx,
18  y, incy );
19 #else
20  F77_sswap( &n,
21  x, &incx,
22  y, &incy );
23 #endif
24 }
void F77_sswap(int *n, float *x, int *incx, float *y, int *incy)
void cblas_sswap(const int N, float *X, const int incX, float *Y, const int incY)

References cblas_sswap(), and F77_sswap().

Referenced by bl1_sswapmt(), bl1_sswapv(), FLA_SA_Apply_pivots(), and FLA_SA_LU_unb().

◆ bl1_sswapmt()

void bl1_sswapmt ( trans1_t  trans,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)
14 {
15  float* a_begin;
16  float* b_begin;
17  int lda, inca;
18  int ldb, incb;
19  int n_iter;
20  int n_elem;
21  int j;
22 
23  // Return early if possible.
24  if ( bl1_zero_dim2( m, n ) ) return;
25 
26  // Handle cases where A and B are vectors to ensure that the underlying copy
27  // gets invoked only once.
28  if ( bl1_is_vector( m, n ) )
29  {
30  // Initialize with values appropriate for vectors.
31  n_iter = 1;
32  n_elem = bl1_vector_dim( m, n );
33  lda = 1; // multiplied by zero when n_iter == 1; not needed.
34  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
35  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
36  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
37  }
38  else // matrix case
39  {
40  // Initialize with optimal values for column-major storage.
41  n_iter = n;
42  n_elem = m;
43  lda = a_cs;
44  inca = a_rs;
45  ldb = b_cs;
46  incb = b_rs;
47 
48  // Handle the transposition of A.
49  if ( bl1_does_trans( trans ) )
50  {
51  bl1_swap_ints( lda, inca );
52  }
53 
54  // An optimization: if B is row-major and if A is effectively row-major
55  // after a possible transposition, then let's access the matrix by rows
56  // instead of by columns for increased spatial locality.
57  if ( bl1_is_row_storage( b_rs, b_cs ) )
58  {
59  if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
60  ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
61  {
62  bl1_swap_ints( n_iter, n_elem );
63  bl1_swap_ints( lda, inca );
64  bl1_swap_ints( ldb, incb );
65  }
66  }
67  }
68 
69  for ( j = 0; j < n_iter; j++ )
70  {
71  a_begin = a + j*lda;
72  b_begin = b + j*ldb;
73 
74  bl1_sswap( n_elem,
75  a_begin, inca,
76  b_begin, incb );
77  }
78 }
void bl1_sswap(int n, float *x, int incx, float *y, int incy)
Definition: bl1_swap.c:13

References bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_sswap(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Swap_external(), and FLA_Swapt_external().

◆ bl1_sswapv()

void bl1_sswapv ( int  n,
float *  x,
int  incx,
float *  y,
int  incy 
)
14 {
15  // Return early if possible.
16  if ( bl1_zero_dim1( n ) ) return;
17 
18  bl1_sswap( n,
19  x, incx,
20  y, incy );
21 }

References bl1_sswap(), and bl1_zero_dim1().

Referenced by FLA_Apply_pivots_macro_external(), FLA_Sort_bsvd_ext_b_ops(), and FLA_Sort_bsvd_ext_f_ops().

◆ bl1_szcopymr()

void bl1_szcopymr ( uplo1_t  uplo,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
595 {
596  float* a_begin;
597  dcomplex* b_begin;
598  int lda, inca;
599  int ldb, incb;
600  int n_iter;
601  int n_elem_max;
602  int n_elem;
603  int j;
604 
605  // Return early if possible.
606  if ( bl1_zero_dim2( m, n ) ) return;
607 
608  // We initialize for column-major.
609  n_iter = n;
610  n_elem_max = m;
611  lda = a_cs;
612  inca = a_rs;
613  ldb = b_cs;
614  incb = b_rs;
615 
616  // An optimization: if B is row-major, then let's access the matrix
617  // by rows instead of by columns for increased spatial locality.
618  if ( bl1_is_row_storage( b_rs, b_cs ) )
619  {
620  bl1_swap_ints( n_iter, n_elem_max );
621  bl1_swap_ints( lda, inca );
622  bl1_swap_ints( ldb, incb );
623  bl1_toggle_uplo( uplo );
624  }
625 
626 
627  if ( bl1_is_upper( uplo ) )
628  {
629  for ( j = 0; j < n_iter; j++ )
630  {
631  n_elem = bl1_min( j + 1, n_elem_max );
632  a_begin = a + j*lda;
633  b_begin = b + j*ldb;
634 
636  n_elem,
637  a_begin, inca,
638  b_begin, incb );
639  }
640  }
641  else // if ( bl1_is_lower( uplo ) )
642  {
643  for ( j = 0; j < n_iter; j++ )
644  {
645  n_elem = bl1_max( 0, n_elem_max - j );
646  a_begin = a + j*lda + j*inca;
647  b_begin = b + j*ldb + j*incb;
648 
649  if ( n_elem <= 0 ) break;
650 
652  n_elem,
653  a_begin, inca,
654  b_begin, incb );
655  }
656  }
657 }
void bl1_szcopyv(conj1_t conj, int m, float *x, int incx, dcomplex *y, int incy)
Definition: bl1_copyv.c:169

References bl1_is_row_storage(), bl1_is_upper(), bl1_szcopyv(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Copyr_external().

◆ bl1_szcopymrt()

void bl1_szcopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
755 {
756  float* a_begin;
757  dcomplex* b_begin;
758  int lda, inca;
759  int ldb, incb;
760  int n_iter;
761  int n_elem;
762  int n_elem_max;
763  int n_elem_is_descending;
764  int j;
765  conj1_t conj;
766 
767  // Return early if possible.
768  if ( bl1_zero_dim2( m, n ) ) return;
769 
770  // Initialize variables based on storage format of B and value of uplo.
771  if ( bl1_is_col_storage( b_rs, b_cs ) )
772  {
773  if ( bl1_is_lower( uplo ) )
774  {
775  n_iter = bl1_min( m, n );
776  n_elem_max = m;
777  lda = a_cs;
778  inca = a_rs;
779  ldb = b_cs;
780  incb = b_rs;
781  n_elem_is_descending = TRUE;
782  }
783  else // if ( bl1_is_upper( uplo ) )
784  {
785  n_iter = n;
786  n_elem_max = bl1_min( m, n );
787  lda = a_cs;
788  inca = a_rs;
789  ldb = b_cs;
790  incb = b_rs;
791  n_elem_is_descending = FALSE;
792  }
793  }
794  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
795  {
796  if ( bl1_is_lower( uplo ) )
797  {
798  n_iter = m;
799  n_elem_max = bl1_min( m, n );
800  lda = a_rs;
801  inca = a_cs;
802  ldb = b_rs;
803  incb = b_cs;
804  n_elem_is_descending = FALSE;
805  }
806  else // if ( bl1_is_upper( uplo ) )
807  {
808  n_iter = bl1_min( m, n );
809  n_elem_max = n;
810  lda = a_rs;
811  inca = a_cs;
812  ldb = b_rs;
813  incb = b_cs;
814  n_elem_is_descending = TRUE;
815  }
816  }
817 
818  // Swap lda and inca if we're doing a transpose.
819  if ( bl1_does_trans( trans ) )
820  {
821  bl1_swap_ints( lda, inca );
822  }
823 
824  // Extract conj component from trans parameter.
825  conj = bl1_proj_trans1_to_conj( trans );
826 
827  // Choose the loop based on whether n_elem will be shrinking or growing
828  // with each iteration.
829  if ( n_elem_is_descending )
830  {
831  for ( j = 0; j < n_iter; j++ )
832  {
833  n_elem = n_elem_max - j;
834  a_begin = a + j*lda + j*inca;
835  b_begin = b + j*ldb + j*incb;
836 
837  bl1_szcopyv( conj,
838  n_elem,
839  a_begin, inca,
840  b_begin, incb );
841  }
842  }
843  else // if ( n_elem_is_ascending )
844  {
845  for ( j = 0; j < n_iter; j++ )
846  {
847  n_elem = bl1_min( j + 1, n_elem_max );
848  a_begin = a + j*lda;
849  b_begin = b + j*ldb;
850 
851  bl1_szcopyv( conj,
852  n_elem,
853  a_begin, inca,
854  b_begin, incb );
855  }
856  }
857 }

References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_szcopyv(), and bl1_zero_dim2().

Referenced by FLA_Copyrt_external().

◆ bl1_szcopymt()

void bl1_szcopymt ( trans1_t  trans,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
697 {
698  float* a_begin;
699  dcomplex* b_begin;
700  int lda, inca;
701  int ldb, incb;
702  int n_iter;
703  int n_elem;
704  int j;
705  conj1_t conj;
706 
707  // Return early if possible.
708  if ( bl1_zero_dim2( m, n ) ) return;
709 
710  // Handle cases where A and B are vectors to ensure that the underlying copy
711  // gets invoked only once.
712  if ( bl1_is_vector( m, n ) )
713  {
714  // Initialize with values appropriate for vectors.
715  n_iter = 1;
716  n_elem = bl1_vector_dim( m, n );
717  lda = 1; // multiplied by zero when n_iter == 1; not needed.
718  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
719  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
720  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
721  }
722  else // matrix case
723  {
724  // Initialize with optimal values for column-major storage of B.
725  n_iter = n;
726  n_elem = m;
727  lda = a_cs;
728  inca = a_rs;
729  ldb = b_cs;
730  incb = b_rs;
731 
732  // Handle the transposition of A.
733  if ( bl1_does_trans( trans ) )
734  {
735  bl1_swap_ints( lda, inca );
736  }
737 
738  // An optimization: if B is row-major, then let's access the matrix by rows
739  // instead of by columns for increased spatial locality.
740  if ( bl1_is_row_storage( b_rs, b_cs ) )
741  {
742  bl1_swap_ints( n_iter, n_elem );
743  bl1_swap_ints( lda, inca );
744  bl1_swap_ints( ldb, incb );
745  }
746  }
747 
748  // Extract conj component from trans parameter.
749  conj = bl1_proj_trans1_to_conj( trans );
750 
751  for ( j = 0; j < n_iter; ++j )
752  {
753  a_begin = a + j*lda;
754  b_begin = b + j*ldb;
755 
756  bl1_szcopyv( conj,
757  n_elem,
758  a_begin, inca,
759  b_begin, incb );
760  }
761 }

References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_szcopyv(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

◆ bl1_szcopyv()

void bl1_szcopyv ( conj1_t  conj,
int  m,
float *  x,
int  incx,
dcomplex y,
int  incy 
)
170 {
171  float* chi;
172  dcomplex* psi;
173  int i;
174 
175  // Return early if possible.
176  if ( bl1_zero_dim1( m ) ) return;
177 
178  // Initialize pointers.
179  chi = x;
180  psi = y;
181 
182  for ( i = 0; i < m; ++i )
183  {
184  psi->real = *chi;
185  psi->imag = 0.0;
186 
187  chi += incx;
188  psi += incy;
189  }
190 }

References bl1_zero_dim1(), i, dcomplex::imag, and dcomplex::real.

Referenced by bl1_szcopymr(), bl1_szcopymrt(), and bl1_szcopymt().

◆ bl1_zamax()

void bl1_zamax ( int  n,
dcomplex x,
int  incx,
int *  index 
)
47 {
48 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
49  *index = cblas_izamax( n,
50  x, incx );
51 #else
52  *index = F77_izamax( &n,
53  x, &incx ) - 1;
54 #endif
55 }
int F77_izamax(int *n, dcomplex *x, int *incx)
CBLAS_INDEX cblas_izamax(const int N, const void *X, const int incX)

References cblas_izamax(), and F77_izamax().

Referenced by FLA_Amax_external(), FLA_LU_piv_opz_var3(), FLA_LU_piv_opz_var4(), FLA_LU_piv_opz_var5(), and FLA_SA_LU_unb().

◆ bl1_zasum()

void bl1_zasum ( int  n,
dcomplex x,
int  incx,
double *  norm 
)
47 {
48 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
49  *norm = cblas_dzasum( n,
50  x, incx );
51 #else
52  *norm = F77_dzasum( &n,
53  x, &incx );
54 #endif
55 }
double F77_dzasum(int *n, dcomplex *x, int *incx)
double cblas_dzasum(const int N, const void *X, const int incX)

References cblas_dzasum(), and F77_dzasum().

Referenced by FLA_Asum_external().

◆ bl1_zaxpy()

void bl1_zaxpy ( int  n,
dcomplex alpha,
dcomplex x,
int  incx,
dcomplex y,
int  incy 
)
59 {
60 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
61  cblas_zaxpy( n,
62  alpha,
63  x, incx,
64  y, incy );
65 #else
66  F77_zaxpy( &n,
67  alpha,
68  x, &incx,
69  y, &incy );
70 #endif
71 }
void F77_zaxpy(int *n, dcomplex *alpha, dcomplex *x, int *incx, dcomplex *y, int *incy)
void cblas_zaxpy(const int N, const void *alpha, const void *X, const int incX, void *Y, const int incY)

References cblas_zaxpy(), and F77_zaxpy().

Referenced by bl1_zaxpymt(), bl1_zaxpysmt(), bl1_zaxpysv(), and bl1_zaxpyv().

◆ bl1_zaxpymrt()

void bl1_zaxpymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
335 {
336  dcomplex* a_begin;
337  dcomplex* b_begin;
338  int lda, inca;
339  int ldb, incb;
340  int n_iter;
341  int n_elem;
342  int n_elem_max;
343  int n_elem_is_descending;
344  int j;
345  conj1_t conj;
346 
347  // Return early if possible.
348  if ( bl1_zero_dim2( m, n ) ) return;
349 
350  // Initialize variables based on storage format of B and value of uplo.
351  if ( bl1_is_col_storage( b_rs, b_cs ) )
352  {
353  if ( bl1_is_lower( uplo ) )
354  {
355  n_iter = bl1_min( m, n );
356  n_elem_max = m;
357  lda = a_cs;
358  inca = a_rs;
359  ldb = b_cs;
360  incb = b_rs;
361  n_elem_is_descending = TRUE;
362  }
363  else // if ( bl1_is_upper( uplo ) )
364  {
365  n_iter = n;
366  n_elem_max = bl1_min( m, n );
367  lda = a_cs;
368  inca = a_rs;
369  ldb = b_cs;
370  incb = b_rs;
371  n_elem_is_descending = FALSE;
372  }
373  }
374  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
375  {
376  if ( bl1_is_lower( uplo ) )
377  {
378  n_iter = m;
379  n_elem_max = bl1_min( m, n );
380  lda = a_rs;
381  inca = a_cs;
382  ldb = b_rs;
383  incb = b_cs;
384  n_elem_is_descending = FALSE;
385  }
386  else // if ( bl1_is_upper( uplo ) )
387  {
388  n_iter = bl1_min( m, n );
389  n_elem_max = n;
390  lda = a_rs;
391  inca = a_cs;
392  ldb = b_rs;
393  incb = b_cs;
394  n_elem_is_descending = TRUE;
395  }
396  }
397 
398  // Swap lda and inca if we're doing a transpose.
399  if ( bl1_does_trans( trans ) )
400  {
401  bl1_swap_ints( lda, inca );
402  }
403 
404  // Extract conj component from trans parameter.
405  conj = bl1_proj_trans1_to_conj( trans );
406 
407  // Choose the loop based on whether n_elem will be shrinking or growing
408  // with each iteration.
409  if ( n_elem_is_descending )
410  {
411  for ( j = 0; j < n_iter; j++ )
412  {
413  n_elem = n_elem_max - j;
414  a_begin = a + j*lda + j*inca;
415  b_begin = b + j*ldb + j*incb;
416 
417  bl1_zaxpyv( conj,
418  n_elem,
419  alpha,
420  a_begin, inca,
421  b_begin, incb );
422  }
423  }
424  else // if ( n_elem_is_ascending )
425  {
426  for ( j = 0; j < n_iter; j++ )
427  {
428  n_elem = bl1_min( j + 1, n_elem_max );
429  a_begin = a + j*lda;
430  b_begin = b + j*ldb;
431 
432  bl1_zaxpyv( conj,
433  n_elem,
434  alpha,
435  a_begin, inca,
436  b_begin, incb );
437  }
438  }
439 }
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_axpyv.c:60

References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_zaxpyv(), and bl1_zero_dim2().

Referenced by bl1_zher2k(), bl1_zherk(), and FLA_Axpyrt_external().

◆ bl1_zaxpymt()

void bl1_zaxpymt ( trans1_t  trans,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
249 {
250  dcomplex* a_begin;
251  dcomplex* b_begin;
252  dcomplex* a_temp;
253  int inca_temp;
254  int lda, inca;
255  int ldb, incb;
256  int n_iter;
257  int n_elem;
258  int j;
259 
260  // Return early if possible.
261  if ( bl1_zero_dim2( m, n ) ) return;
262 
263  // Handle cases where A and B are vectors to ensure that the underlying axpy
264  // gets invoked only once.
265  if ( bl1_is_vector( m, n ) )
266  {
267  // Initialize with values appropriate for vectors.
268  n_iter = 1;
269  n_elem = bl1_vector_dim( m, n );
270  lda = 1; // multiplied by zero when n_iter == 1; not needed.
271  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
272  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
273  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
274  }
275  else // matrix case
276  {
277  // Initialize with optimal values for column-major storage.
278  n_iter = n;
279  n_elem = m;
280  lda = a_cs;
281  inca = a_rs;
282  ldb = b_cs;
283  incb = b_rs;
284 
285  // Handle the transposition of A.
286  if ( bl1_does_trans( trans ) )
287  {
288  bl1_swap_ints( lda, inca );
289  }
290 
291  // An optimization: if B is row-major and if A is effectively row-major
292  // after a possible transposition, then let's access the matrices by rows
293  // instead of by columns for increased spatial locality.
294  if ( bl1_is_row_storage( b_rs, b_cs ) )
295  {
296  if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
297  ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
298  {
299  bl1_swap_ints( n_iter, n_elem );
300  bl1_swap_ints( lda, inca );
301  bl1_swap_ints( ldb, incb );
302  }
303  }
304  }
305 
306  if ( bl1_does_conj( trans ) )
307  {
308  conj1_t conj = bl1_proj_trans1_to_conj( trans );
309 
310  a_temp = bl1_zallocv( n_elem );
311  inca_temp = 1;
312 
313  for ( j = 0; j < n_iter; j++ )
314  {
315  a_begin = a + j*lda;
316  b_begin = b + j*ldb;
317 
318  bl1_zcopyv( conj,
319  n_elem,
320  a_begin, inca,
321  a_temp, inca_temp );
322 
323  bl1_zaxpy( n_elem,
324  alpha,
325  a_temp, inca_temp,
326  b_begin, incb );
327  }
328 
329  bl1_zfree( a_temp );
330  }
331  else // if ( !bl1_does_conj( trans ) )
332  {
333  for ( j = 0; j < n_iter; j++ )
334  {
335  a_begin = a + j*lda;
336  b_begin = b + j*ldb;
337 
338  bl1_zaxpy( n_elem,
339  alpha,
340  a_begin, inca,
341  b_begin, incb );
342  }
343 
344  }
345 }
void bl1_zaxpy(int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_axpy.c:58
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_copyv.c:63
dcomplex * bl1_zallocv(unsigned int n_elem)
Definition: bl1_allocv.c:45
void bl1_zfree(dcomplex *p)
Definition: bl1_free.c:45

References bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zallocv(), bl1_zaxpy(), bl1_zcopyv(), bl1_zero_dim2(), bl1_zfree(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_zgemm(), bl1_zhemm(), bl1_zsymm(), bl1_ztrmmsx(), bl1_ztrsmsx(), FLA_Axpy_external(), and FLA_Axpyt_external().

◆ bl1_zaxpysmt()

void bl1_zaxpysmt ( trans1_t  trans,
int  m,
int  n,
dcomplex alpha0,
dcomplex alpha1,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex beta,
dcomplex b,
int  b_rs,
int  b_cs 
)
274 {
275  dcomplex* a_begin;
276  dcomplex* b_begin;
277  dcomplex* a_temp;
278  dcomplex alpha_prod;
279  int inca_temp;
280  int lda, inca;
281  int ldb, incb;
282  int n_iter;
283  int n_elem;
284  int j;
285 
286  // Return early if possible.
287  if ( bl1_zero_dim2( m, n ) ) return;
288 
289  alpha_prod.real = alpha0->real * alpha1->real - alpha0->imag * alpha1->imag;
290  alpha_prod.imag = alpha0->real * alpha1->imag + alpha0->imag * alpha1->real;
291 
292  // Handle cases where A and B are vectors to ensure that the underlying axpy
293  // gets invoked only once.
294  if ( bl1_is_vector( m, n ) )
295  {
296  // Initialize with values appropriate for vectors.
297  n_iter = 1;
298  n_elem = bl1_vector_dim( m, n );
299  lda = 1; // multiplied by zero when n_iter == 1; not needed.
300  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
301  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
302  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
303  }
304  else // matrix case
305  {
306  // Initialize with optimal values for column-major storage.
307  n_iter = n;
308  n_elem = m;
309  lda = a_cs;
310  inca = a_rs;
311  ldb = b_cs;
312  incb = b_rs;
313 
314  // Handle the transposition of A.
315  if ( bl1_does_trans( trans ) )
316  {
317  bl1_swap_ints( lda, inca );
318  }
319 
320  // An optimization: if B is row-major and if A is effectively row-major
321  // after a possible transposition, then let's access the matrices by rows
322  // instead of by columns for increased spatial locality.
323  if ( bl1_is_row_storage( b_rs, b_cs ) )
324  {
325  if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
326  ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
327  {
328  bl1_swap_ints( n_iter, n_elem );
329  bl1_swap_ints( lda, inca );
330  bl1_swap_ints( ldb, incb );
331  }
332  }
333  }
334 
335  if ( bl1_does_conj( trans ) )
336  {
337  conj1_t conj = bl1_proj_trans1_to_conj( trans );
338 
339  a_temp = bl1_zallocv( n_elem );
340  inca_temp = 1;
341 
342  for ( j = 0; j < n_iter; j++ )
343  {
344  a_begin = a + j*lda;
345  b_begin = b + j*ldb;
346 
347  bl1_zcopyv( conj,
348  n_elem,
349  a_begin, inca,
350  a_temp, inca_temp );
351 
352  bl1_zscal( n_elem,
353  beta,
354  b_begin, incb );
355 
356  bl1_zaxpy( n_elem,
357  &alpha_prod,
358  a_temp, inca_temp,
359  b_begin, incb );
360  }
361 
362  bl1_zfree( a_temp );
363  }
364  else // if ( !bl1_does_conj( trans ) )
365  {
366  for ( j = 0; j < n_iter; j++ )
367  {
368  a_begin = a + j*lda;
369  b_begin = b + j*ldb;
370 
371  bl1_zscal( n_elem,
372  beta,
373  b_begin, incb );
374 
375  bl1_zaxpy( n_elem,
376  &alpha_prod,
377  a_begin, inca,
378  b_begin, incb );
379  }
380  }
381 }
void bl1_zscal(int n, dcomplex *alpha, dcomplex *x, int incx)
Definition: bl1_scal.c:78

References alpha1, bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zallocv(), bl1_zaxpy(), bl1_zcopyv(), bl1_zero_dim2(), bl1_zfree(), bl1_zscal(), BLIS1_NO_TRANSPOSE, dcomplex::imag, and dcomplex::real.

Referenced by FLA_Axpys_external().

◆ bl1_zaxpysv()

void bl1_zaxpysv ( int  n,
dcomplex alpha0,
dcomplex alpha1,
dcomplex x,
int  incx,
dcomplex beta,
dcomplex y,
int  incy 
)
72 {
73  dcomplex alpha_prod;
74 
75  // Return early if possible.
76  if ( bl1_zero_dim1( n ) ) return;
77 
78  alpha_prod.real = alpha0->real * alpha1->real - alpha0->imag * alpha1->imag;
79  alpha_prod.imag = alpha0->real * alpha1->imag + alpha0->imag * alpha1->real;
80 
81  bl1_zscal( n,
82  beta,
83  y, incy );
84 
85  bl1_zaxpy( n,
86  &alpha_prod,
87  x, incx,
88  y, incy );
89 }

References alpha1, bl1_zaxpy(), bl1_zero_dim1(), bl1_zscal(), dcomplex::imag, and dcomplex::real.

Referenced by FLA_Lyap_h_opz_var2(), FLA_Lyap_h_opz_var3(), FLA_Lyap_h_opz_var4(), FLA_Lyap_n_opz_var2(), FLA_Lyap_n_opz_var3(), and FLA_Lyap_n_opz_var4().

◆ bl1_zaxpyv()

void bl1_zaxpyv ( conj1_t  conj,
int  n,
dcomplex alpha,
dcomplex x,
int  incx,
dcomplex y,
int  incy 
)
61 {
62  dcomplex* x_copy;
63  int incx_copy;
64 
65  // Return early if possible.
66  if ( bl1_zero_dim1( n ) ) return;
67 
68  x_copy = x;
69  incx_copy = incx;
70 
71  if ( bl1_is_conj( conj ) )
72  {
73  x_copy = bl1_zallocv( n );
74  incx_copy = 1;
75 
76  bl1_zcopyv( conj,
77  n,
78  x, incx,
79  x_copy, incx_copy );
80  }
81 
82  bl1_zaxpy( n,
83  alpha,
84  x_copy, incx_copy,
85  y, incy );
86 
87  if ( bl1_is_conj( conj ) )
88  bl1_zfree( x_copy );
89 }

References bl1_is_conj(), bl1_zallocv(), bl1_zaxpy(), bl1_zcopyv(), bl1_zero_dim1(), and bl1_zfree().

Referenced by bl1_zaxpymrt(), bl1_zgemv(), bl1_zhemv(), bl1_ztrmvsx(), bl1_ztrsvsx(), FLA_Apply_H2_UT_l_opz_var1(), FLA_Apply_H2_UT_r_opz_var1(), FLA_Apply_HUD_UT_l_opz_var1(), FLA_Bidiag_UT_u_step_ofz_var2(), FLA_Bidiag_UT_u_step_ofz_var3(), FLA_Bidiag_UT_u_step_ofz_var4(), FLA_Bidiag_UT_u_step_opz_var2(), FLA_Bidiag_UT_u_step_opz_var3(), FLA_Bidiag_UT_u_step_opz_var4(), FLA_Bidiag_UT_u_step_opz_var5(), FLA_Eig_gest_il_opz_var1(), FLA_Eig_gest_il_opz_var2(), FLA_Eig_gest_il_opz_var3(), FLA_Eig_gest_il_opz_var4(), FLA_Eig_gest_il_opz_var5(), FLA_Eig_gest_iu_opz_var1(), FLA_Eig_gest_iu_opz_var2(), FLA_Eig_gest_iu_opz_var3(), FLA_Eig_gest_iu_opz_var4(), FLA_Eig_gest_iu_opz_var5(), FLA_Eig_gest_nl_opz_var1(), FLA_Eig_gest_nl_opz_var2(), FLA_Eig_gest_nl_opz_var4(), FLA_Eig_gest_nl_opz_var5(), FLA_Eig_gest_nu_opz_var1(), FLA_Eig_gest_nu_opz_var2(), FLA_Eig_gest_nu_opz_var4(), FLA_Eig_gest_nu_opz_var5(), FLA_Fused_Ahx_Axpy_Ax_opz_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1(), FLA_Fused_UZhu_ZUhu_opz_var1(), FLA_Hess_UT_step_ofz_var2(), FLA_Hess_UT_step_ofz_var3(), FLA_Hess_UT_step_ofz_var4(), FLA_Hess_UT_step_opz_var2(), FLA_Hess_UT_step_opz_var3(), FLA_Hess_UT_step_opz_var4(), FLA_Hess_UT_step_opz_var5(), FLA_Tridiag_UT_l_step_ofz_var2(), FLA_Tridiag_UT_l_step_ofz_var3(), FLA_Tridiag_UT_l_step_opz_var1(), FLA_Tridiag_UT_l_step_opz_var2(), and FLA_Tridiag_UT_l_step_opz_var3().

◆ bl1_zccopymr()

void bl1_zccopymr ( uplo1_t  uplo,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
1181 {
1182  dcomplex* a_begin;
1183  scomplex* b_begin;
1184  int lda, inca;
1185  int ldb, incb;
1186  int n_iter;
1187  int n_elem_max;
1188  int n_elem;
1189  int j;
1190 
1191  // Return early if possible.
1192  if ( bl1_zero_dim2( m, n ) ) return;
1193 
1194  // We initialize for column-major.
1195  n_iter = n;
1196  n_elem_max = m;
1197  lda = a_cs;
1198  inca = a_rs;
1199  ldb = b_cs;
1200  incb = b_rs;
1201 
1202  // An optimization: if B is row-major, then let's access the matrix
1203  // by rows instead of by columns for increased spatial locality.
1204  if ( bl1_is_row_storage( b_rs, b_cs ) )
1205  {
1206  bl1_swap_ints( n_iter, n_elem_max );
1207  bl1_swap_ints( lda, inca );
1208  bl1_swap_ints( ldb, incb );
1209  bl1_toggle_uplo( uplo );
1210  }
1211 
1212 
1213  if ( bl1_is_upper( uplo ) )
1214  {
1215  for ( j = 0; j < n_iter; j++ )
1216  {
1217  n_elem = bl1_min( j + 1, n_elem_max );
1218  a_begin = a + j*lda;
1219  b_begin = b + j*ldb;
1220 
1222  n_elem,
1223  a_begin, inca,
1224  b_begin, incb );
1225  }
1226  }
1227  else // if ( bl1_is_lower( uplo ) )
1228  {
1229  for ( j = 0; j < n_iter; j++ )
1230  {
1231  n_elem = bl1_max( 0, n_elem_max - j );
1232  a_begin = a + j*lda + j*inca;
1233  b_begin = b + j*ldb + j*incb;
1234 
1235  if ( n_elem <= 0 ) break;
1236 
1238  n_elem,
1239  a_begin, inca,
1240  b_begin, incb );
1241  }
1242  }
1243 }
void bl1_zccopyv(conj1_t conj, int m, dcomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_copyv.c:330

References bl1_is_row_storage(), bl1_is_upper(), bl1_zccopyv(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Copyr_external().

◆ bl1_zccopymrt()

void bl1_zccopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
1921 {
1922  dcomplex* a_begin;
1923  scomplex* b_begin;
1924  int lda, inca;
1925  int ldb, incb;
1926  int n_iter;
1927  int n_elem;
1928  int n_elem_max;
1929  int n_elem_is_descending;
1930  int j;
1931  conj1_t conj;
1932 
1933  // Return early if possible.
1934  if ( bl1_zero_dim2( m, n ) ) return;
1935 
1936  // Initialize variables based on storage format of B and value of uplo.
1937  if ( bl1_is_col_storage( b_rs, b_cs ) )
1938  {
1939  if ( bl1_is_lower( uplo ) )
1940  {
1941  n_iter = bl1_min( m, n );
1942  n_elem_max = m;
1943  lda = a_cs;
1944  inca = a_rs;
1945  ldb = b_cs;
1946  incb = b_rs;
1947  n_elem_is_descending = TRUE;
1948  }
1949  else // if ( bl1_is_upper( uplo ) )
1950  {
1951  n_iter = n;
1952  n_elem_max = bl1_min( m, n );
1953  lda = a_cs;
1954  inca = a_rs;
1955  ldb = b_cs;
1956  incb = b_rs;
1957  n_elem_is_descending = FALSE;
1958  }
1959  }
1960  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
1961  {
1962  if ( bl1_is_lower( uplo ) )
1963  {
1964  n_iter = m;
1965  n_elem_max = bl1_min( m, n );
1966  lda = a_rs;
1967  inca = a_cs;
1968  ldb = b_rs;
1969  incb = b_cs;
1970  n_elem_is_descending = FALSE;
1971  }
1972  else // if ( bl1_is_upper( uplo ) )
1973  {
1974  n_iter = bl1_min( m, n );
1975  n_elem_max = n;
1976  lda = a_rs;
1977  inca = a_cs;
1978  ldb = b_rs;
1979  incb = b_cs;
1980  n_elem_is_descending = TRUE;
1981  }
1982  }
1983 
1984  // Swap lda and inca if we're doing a transpose.
1985  if ( bl1_does_trans( trans ) )
1986  {
1987  bl1_swap_ints( lda, inca );
1988  }
1989 
1990  // Extract conj component from trans parameter.
1991  conj = bl1_proj_trans1_to_conj( trans );
1992 
1993  // Choose the loop based on whether n_elem will be shrinking or growing
1994  // with each iteration.
1995  if ( n_elem_is_descending )
1996  {
1997  for ( j = 0; j < n_iter; j++ )
1998  {
1999  n_elem = n_elem_max - j;
2000  a_begin = a + j*lda + j*inca;
2001  b_begin = b + j*ldb + j*incb;
2002 
2003  bl1_zccopyv( conj,
2004  n_elem,
2005  a_begin, inca,
2006  b_begin, incb );
2007  }
2008  }
2009  else // if ( n_elem_is_ascending )
2010  {
2011  for ( j = 0; j < n_iter; j++ )
2012  {
2013  n_elem = bl1_min( j + 1, n_elem_max );
2014  a_begin = a + j*lda;
2015  b_begin = b + j*ldb;
2016 
2017  bl1_zccopyv( conj,
2018  n_elem,
2019  a_begin, inca,
2020  b_begin, incb );
2021  }
2022  }
2023 }

References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_zccopyv(), and bl1_zero_dim2().

Referenced by FLA_Copyrt_external().

◆ bl1_zccopymt()

void bl1_zccopymt ( trans1_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
1301 {
1302  dcomplex* a_begin;
1303  scomplex* b_begin;
1304  int lda, inca;
1305  int ldb, incb;
1306  int n_iter;
1307  int n_elem;
1308  int j;
1309  conj1_t conj;
1310 
1311  // Return early if possible.
1312  if ( bl1_zero_dim2( m, n ) ) return;
1313 
1314  // Handle cases where A and B are vectors to ensure that the underlying copy
1315  // gets invoked only once.
1316  if ( bl1_is_vector( m, n ) )
1317  {
1318  // Initialize with values appropriate for vectors.
1319  n_iter = 1;
1320  n_elem = bl1_vector_dim( m, n );
1321  lda = 1; // multiplied by zero when n_iter == 1; not needed.
1322  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
1323  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
1324  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
1325  }
1326  else // matrix case
1327  {
1328  // Initialize with optimal values for column-major storage of B.
1329  n_iter = n;
1330  n_elem = m;
1331  lda = a_cs;
1332  inca = a_rs;
1333  ldb = b_cs;
1334  incb = b_rs;
1335 
1336  // Handle the transposition of A.
1337  if ( bl1_does_trans( trans ) )
1338  {
1339  bl1_swap_ints( lda, inca );
1340  }
1341 
1342  // An optimization: if B is row-major, then let's access the matrix by rows
1343  // instead of by columns for increased spatial locality.
1344  if ( bl1_is_row_storage( b_rs, b_cs ) )
1345  {
1346  bl1_swap_ints( n_iter, n_elem );
1347  bl1_swap_ints( lda, inca );
1348  bl1_swap_ints( ldb, incb );
1349  }
1350  }
1351 
1352  // Extract conj component from trans parameter.
1353  conj = bl1_proj_trans1_to_conj( trans );
1354 
1355  for ( j = 0; j < n_iter; ++j )
1356  {
1357  a_begin = a + j*lda;
1358  b_begin = b + j*ldb;
1359 
1360  bl1_zccopyv( conj,
1361  n_elem,
1362  a_begin, inca,
1363  b_begin, incb );
1364  }
1365 }

References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zccopyv(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

◆ bl1_zccopyv()

void bl1_zccopyv ( conj1_t  conj,
int  m,
dcomplex x,
int  incx,
scomplex y,
int  incy 
)
331 {
332  dcomplex* chi;
333  scomplex* psi;
334  int i;
335 
336  // Return early if possible.
337  if ( bl1_zero_dim1( m ) ) return;
338 
339  // Initialize pointers.
340  chi = x;
341  psi = y;
342 
343  for ( i = 0; i < m; ++i )
344  {
345  psi->real = chi->real;
346  psi->imag = chi->imag;
347 
348  chi += incx;
349  psi += incy;
350  }
351 
352  if ( bl1_is_conj( conj ) )
353  bl1_cconjv( m,
354  y, incy );
355 }

References bl1_cconjv(), bl1_is_conj(), bl1_zero_dim1(), i, scomplex::imag, dcomplex::imag, scomplex::real, and dcomplex::real.

Referenced by bl1_zccopymr(), bl1_zccopymrt(), and bl1_zccopymt().

◆ bl1_zconjm()

void bl1_zconjm ( int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs 
)
73 {
74  double m1 = bl1_dm1();
75  double* a_conj;
76  int lda, inca;
77  int n_iter;
78  int n_elem;
79  int j;
80 
81  // Return early if possible.
82  if ( bl1_zero_dim2( m, n ) ) return;
83 
84  // Handle cases where A is a vector to ensure that the underlying axpy
85  // gets invoked only once.
86  if ( bl1_is_vector( m, n ) )
87  {
88  // Initialize with values appropriate for a vector.
89  n_iter = 1;
90  n_elem = bl1_vector_dim( m, n );
91  lda = 1; // multiplied by zero when n_iter == 1; not needed.
92  inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
93  }
94  else // matrix case
95  {
96  // Initialize with optimal values for column-major storage.
97  n_iter = n;
98  n_elem = m;
99  lda = a_cs;
100  inca = a_rs;
101 
102  // An optimization: if A is row-major, then let's access the matrix
103  // by rows instead of by columns to increase spatial locality.
104  if ( bl1_is_row_storage( a_rs, a_cs ) )
105  {
106  bl1_swap_ints( n_iter, n_elem );
107  bl1_swap_ints( lda, inca );
108  }
109  }
110 
111  for ( j = 0; j < n_iter; ++j )
112  {
113  a_conj = ( double* )( a + j*lda ) + 1;
114 
115  bl1_dscal( n_elem,
116  &m1,
117  a_conj, 2*inca );
118  }
119 }
double bl1_dm1(void)
Definition: bl1_constants.c:182

References bl1_dm1(), bl1_dscal(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_zgemm(), and FLA_Conjugate().

◆ bl1_zconjmr()

void bl1_zconjmr ( uplo1_t  uplo,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs 
)
80 {
81  double m1 = bl1_dm1();
82  double* a_conj;
83  int lda, inca;
84  int n_iter;
85  int n_elem_max;
86  int n_elem;
87  int j;
88 
89  // Return early if possible.
90  if ( bl1_zero_dim2( m, n ) ) return;
91 
92  // We initialize for column-major.
93  n_iter = n;
94  n_elem_max = m;
95  lda = a_cs;
96  inca = a_rs;
97 
98  // An optimization: if A is row-major, then let's access the matrix
99  // by rows instead of by columns to increase spatial locality.
100  if ( bl1_is_row_storage( a_rs, a_cs ) )
101  {
102  bl1_swap_ints( n_iter, n_elem_max );
103  bl1_swap_ints( lda, inca );
104  bl1_toggle_uplo( uplo );
105  }
106 
107  if ( bl1_is_upper( uplo ) )
108  {
109  for ( j = 0; j < n_iter; ++j )
110  {
111  n_elem = bl1_min( j + 1, n_elem_max );
112  a_conj = ( double* )( a + j*lda ) + 1;
113 
114  bl1_dscal( n_elem,
115  &m1,
116  a_conj, 2*inca );
117  }
118  }
119  else // if ( bl1_is_lower( uplo ) )
120  {
121  for ( j = 0; j < n_iter; ++j )
122  {
123  n_elem = bl1_max( 0, n_elem_max - j );
124  a_conj = ( double* )( a + j*lda + j*inca ) + 1;
125 
126  if ( n_elem <= 0 ) break;
127 
128  bl1_dscal( n_elem,
129  &m1,
130  a_conj, 2*inca );
131  }
132  }
133 }

References bl1_dm1(), bl1_dscal(), bl1_is_row_storage(), bl1_is_upper(), and bl1_zero_dim2().

Referenced by bl1_zhemm(), bl1_ztrmm(), bl1_ztrsm(), and FLA_Conjugate_r().

◆ bl1_zconjv()

void bl1_zconjv ( int  m,
dcomplex x,
int  incx 
)
35 {
36  double m1 = bl1_dm1();
37  double* x_conj = ( double* ) x + 1;
38  int incx_conj = 2 * incx;
39 
40  bl1_dscal( m,
41  &m1,
42  x_conj, incx_conj );
43 }

References bl1_dm1(), and bl1_dscal().

Referenced by bl1_czcopyv(), bl1_zcopymt(), bl1_zcopyv(), bl1_zgemv(), bl1_zswapmt(), FLA_Bidiag_UT_u_step_ofz_var3(), FLA_Bidiag_UT_u_step_ofz_var4(), FLA_Bidiag_UT_u_step_opz_var3(), FLA_Bidiag_UT_u_step_opz_var4(), and FLA_Househ2_UT_r_opz().

◆ bl1_zcopy()

void bl1_zcopy ( int  m,
dcomplex x,
int  incx,
dcomplex y,
int  incy 
)
53 {
54 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
55  cblas_zcopy( m,
56  x, incx,
57  y, incy );
58 #else
59  F77_zcopy( &m,
60  x, &incx,
61  y, &incy );
62 #endif
63 }
void F77_zcopy(int *n, dcomplex *x, int *incx, dcomplex *y, int *incy)
void cblas_zcopy(const int N, const void *X, const int incX, void *Y, const int incY)

References cblas_zcopy(), and F77_zcopy().

Referenced by bl1_zcopymr(), bl1_zcopymt(), bl1_zcopyv(), and FLA_SA_LU_unb().

◆ bl1_zcopymr()

void bl1_zcopymr ( uplo1_t  uplo,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
203 {
204  dcomplex* a_begin;
205  dcomplex* b_begin;
206  int lda, inca;
207  int ldb, incb;
208  int n_iter;
209  int n_elem_max;
210  int n_elem;
211  int j;
212 
213  // Return early if possible.
214  if ( bl1_zero_dim2( m, n ) ) return;
215 
216  // We initialize for column-major.
217  n_iter = n;
218  n_elem_max = m;
219  lda = a_cs;
220  inca = a_rs;
221  ldb = b_cs;
222  incb = b_rs;
223 
224  // An optimization: if A and B are both row-major, then let's access the
225  // matrices by rows instead of by columns for increased spatial locality.
226  if ( bl1_is_row_storage( b_rs, b_cs ) && bl1_is_row_storage( a_rs, a_cs ) )
227  {
228  bl1_swap_ints( n_iter, n_elem_max );
229  bl1_swap_ints( lda, inca );
230  bl1_swap_ints( ldb, incb );
231  bl1_toggle_uplo( uplo );
232  }
233 
234 
235  if ( bl1_is_upper( uplo ) )
236  {
237  for ( j = 0; j < n_iter; j++ )
238  {
239  n_elem = bl1_min( j + 1, n_elem_max );
240  a_begin = a + j*lda;
241  b_begin = b + j*ldb;
242 
243  bl1_zcopy( n_elem,
244  a_begin, inca,
245  b_begin, incb );
246  }
247  }
248  else // if ( bl1_is_lower( uplo ) )
249  {
250  for ( j = 0; j < n_iter; j++ )
251  {
252  n_elem = bl1_max( 0, n_elem_max - j );
253  a_begin = a + j*lda + j*inca;
254  b_begin = b + j*ldb + j*incb;
255 
256  if ( n_elem <= 0 ) break;
257 
258  bl1_zcopy( n_elem,
259  a_begin, inca,
260  b_begin, incb );
261  }
262  }
263 }
void bl1_zcopy(int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_copy.c:52

References bl1_is_row_storage(), bl1_is_upper(), bl1_zcopy(), and bl1_zero_dim2().

Referenced by bl1_zcreate_contigmr(), bl1_zfree_saved_contigmr(), bl1_zfree_saved_contigmsr(), and FLA_Copyr_external().

◆ bl1_zcopymrt()

void bl1_zcopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
329 {
330  dcomplex* a_begin;
331  dcomplex* b_begin;
332  int lda, inca;
333  int ldb, incb;
334  int n_iter;
335  int n_elem;
336  int n_elem_max;
337  int n_elem_is_descending;
338  int j;
339  conj1_t conj;
340 
341  // Return early if possible.
342  if ( bl1_zero_dim2( m, n ) ) return;
343 
344  // Initialize variables based on storage format of B and value of uplo.
345  if ( bl1_is_col_storage( b_rs, b_cs ) )
346  {
347  if ( bl1_is_lower( uplo ) )
348  {
349  n_iter = bl1_min( m, n );
350  n_elem_max = m;
351  lda = a_cs;
352  inca = a_rs;
353  ldb = b_cs;
354  incb = b_rs;
355  n_elem_is_descending = TRUE;
356  }
357  else // if ( bl1_is_upper( uplo ) )
358  {
359  n_iter = n;
360  n_elem_max = bl1_min( m, n );
361  lda = a_cs;
362  inca = a_rs;
363  ldb = b_cs;
364  incb = b_rs;
365  n_elem_is_descending = FALSE;
366  }
367  }
368  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
369  {
370  if ( bl1_is_lower( uplo ) )
371  {
372  n_iter = m;
373  n_elem_max = bl1_min( m, n );
374  lda = a_rs;
375  inca = a_cs;
376  ldb = b_rs;
377  incb = b_cs;
378  n_elem_is_descending = FALSE;
379  }
380  else // if ( bl1_is_upper( uplo ) )
381  {
382  n_iter = bl1_min( m, n );
383  n_elem_max = n;
384  lda = a_rs;
385  inca = a_cs;
386  ldb = b_rs;
387  incb = b_cs;
388  n_elem_is_descending = TRUE;
389  }
390  }
391 
392  // Swap lda and inca if we're doing a transpose.
393  if ( bl1_does_trans( trans ) )
394  {
395  bl1_swap_ints( lda, inca );
396  }
397 
398  // Extract conj component from trans parameter.
399  conj = bl1_proj_trans1_to_conj( trans );
400 
401  // Choose the loop based on whether n_elem will be shrinking or growing
402  // with each iteration.
403  if ( n_elem_is_descending )
404  {
405  for ( j = 0; j < n_iter; j++ )
406  {
407  n_elem = n_elem_max - j;
408  a_begin = a + j*lda + j*inca;
409  b_begin = b + j*ldb + j*incb;
410 
411  bl1_zcopyv( conj,
412  n_elem,
413  a_begin, inca,
414  b_begin, incb );
415  }
416  }
417  else // if ( n_elem_is_ascending )
418  {
419  for ( j = 0; j < n_iter; j++ )
420  {
421  n_elem = bl1_min( j + 1, n_elem_max );
422  a_begin = a + j*lda;
423  b_begin = b + j*ldb;
424 
425  bl1_zcopyv( conj,
426  n_elem,
427  a_begin, inca,
428  b_begin, incb );
429  }
430  }
431 }

References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_zcopyv(), and bl1_zero_dim2().

Referenced by bl1_zhemm(), bl1_ztrmm(), bl1_ztrsm(), FLA_Copyrt_external(), FLA_Lyap_h_opz_var1(), FLA_Lyap_h_opz_var2(), FLA_Lyap_h_opz_var3(), FLA_Lyap_h_opz_var4(), FLA_Lyap_n_opz_var1(), FLA_Lyap_n_opz_var2(), FLA_Lyap_n_opz_var3(), and FLA_Lyap_n_opz_var4().

◆ bl1_zcopymt()

void bl1_zcopymt ( trans1_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
287 {
288  dcomplex* a_begin;
289  dcomplex* b_begin;
290  int lda, inca;
291  int ldb, incb;
292  int n_iter;
293  int n_elem;
294  int j;
295 
296  // Return early if possible.
297  if ( bl1_zero_dim2( m, n ) ) return;
298 
299  // Handle cases where A and B are vectors to ensure that the underlying copy
300  // gets invoked only once.
301  if ( bl1_is_vector( m, n ) )
302  {
303  // Initialize with values appropriate for vectors.
304  n_iter = 1;
305  n_elem = bl1_vector_dim( m, n );
306  lda = 1; // multiplied by zero when n_iter == 1; not needed.
307  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
308  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
309  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
310  }
311  else // matrix case
312  {
313  // Initialize with optimal values for column-major storage.
314  n_iter = n;
315  n_elem = m;
316  lda = a_cs;
317  inca = a_rs;
318  ldb = b_cs;
319  incb = b_rs;
320 
321  // Handle the transposition of A.
322  if ( bl1_does_trans( trans ) )
323  {
324  bl1_swap_ints( lda, inca );
325  }
326 
327  // An optimization: if B is row-major and if A is effectively row-major
328  // after a possible transposition, then let's access the matrix by rows
329  // instead of by columns for increased spatial locality.
330  if ( bl1_is_row_storage( b_rs, b_cs ) )
331  {
332  if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
333  ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
334  {
335  bl1_swap_ints( n_iter, n_elem );
336  bl1_swap_ints( lda, inca );
337  bl1_swap_ints( ldb, incb );
338  }
339  }
340  }
341 
342  for ( j = 0; j < n_iter; j++ )
343  {
344  a_begin = a + j*lda;
345  b_begin = b + j*ldb;
346 
347  bl1_zcopy( n_elem,
348  a_begin, inca,
349  b_begin, incb );
350 
351  if ( bl1_does_conj( trans ) )
352  bl1_zconjv( n_elem,
353  b_begin, incb );
354  }
355 }

References bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zconjv(), bl1_zcopy(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_zcreate_contigm(), bl1_zcreate_contigmt(), bl1_zfree_saved_contigm(), bl1_zgemm(), bl1_zhemm(), bl1_zher2k(), bl1_zsymm(), bl1_zsyr2k(), bl1_ztrmmsx(), bl1_ztrsmsx(), FLA_Bsvd_v_opz_var2(), FLA_Copy_external(), FLA_Copyt_external(), and FLA_Tevd_v_opz_var2().

◆ bl1_zcopyv()

void bl1_zcopyv ( conj1_t  conj,
int  m,
dcomplex x,
int  incx,
dcomplex y,
int  incy 
)

◆ bl1_zdcopymr()

void bl1_zdcopymr ( uplo1_t  uplo,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)
985 {
986  dcomplex* a_begin;
987  double* b_begin;
988  int lda, inca;
989  int ldb, incb;
990  int n_iter;
991  int n_elem_max;
992  int n_elem;
993  int j;
994 
995  // Return early if possible.
996  if ( bl1_zero_dim2( m, n ) ) return;
997 
998  // We initialize for column-major.
999  n_iter = n;
1000  n_elem_max = m;
1001  lda = a_cs;
1002  inca = a_rs;
1003  ldb = b_cs;
1004  incb = b_rs;
1005 
1006  // An optimization: if B is row-major, then let's access the matrix
1007  // by rows instead of by columns for increased spatial locality.
1008  if ( bl1_is_row_storage( b_rs, b_cs ) )
1009  {
1010  bl1_swap_ints( n_iter, n_elem_max );
1011  bl1_swap_ints( lda, inca );
1012  bl1_swap_ints( ldb, incb );
1013  bl1_toggle_uplo( uplo );
1014  }
1015 
1016 
1017  if ( bl1_is_upper( uplo ) )
1018  {
1019  for ( j = 0; j < n_iter; j++ )
1020  {
1021  n_elem = bl1_min( j + 1, n_elem_max );
1022  a_begin = a + j*lda;
1023  b_begin = b + j*ldb;
1024 
1026  n_elem,
1027  a_begin, inca,
1028  b_begin, incb );
1029  }
1030  }
1031  else // if ( bl1_is_lower( uplo ) )
1032  {
1033  for ( j = 0; j < n_iter; j++ )
1034  {
1035  n_elem = bl1_max( 0, n_elem_max - j );
1036  a_begin = a + j*lda + j*inca;
1037  b_begin = b + j*ldb + j*incb;
1038 
1039  if ( n_elem <= 0 ) break;
1040 
1042  n_elem,
1043  a_begin, inca,
1044  b_begin, incb );
1045  }
1046  }
1047 }
void bl1_zdcopyv(conj1_t conj, int m, dcomplex *x, int incx, double *y, int incy)
Definition: bl1_copyv.c:281

References bl1_is_row_storage(), bl1_is_upper(), bl1_zdcopyv(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Copyr_external().

◆ bl1_zdcopymrt()

void bl1_zdcopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)
1815 {
1816  dcomplex* a_begin;
1817  double* b_begin;
1818  int lda, inca;
1819  int ldb, incb;
1820  int n_iter;
1821  int n_elem;
1822  int n_elem_max;
1823  int n_elem_is_descending;
1824  int j;
1825  conj1_t conj;
1826 
1827  // Return early if possible.
1828  if ( bl1_zero_dim2( m, n ) ) return;
1829 
1830  // Initialize variables based on storage format of B and value of uplo.
1831  if ( bl1_is_col_storage( b_rs, b_cs ) )
1832  {
1833  if ( bl1_is_lower( uplo ) )
1834  {
1835  n_iter = bl1_min( m, n );
1836  n_elem_max = m;
1837  lda = a_cs;
1838  inca = a_rs;
1839  ldb = b_cs;
1840  incb = b_rs;
1841  n_elem_is_descending = TRUE;
1842  }
1843  else // if ( bl1_is_upper( uplo ) )
1844  {
1845  n_iter = n;
1846  n_elem_max = bl1_min( m, n );
1847  lda = a_cs;
1848  inca = a_rs;
1849  ldb = b_cs;
1850  incb = b_rs;
1851  n_elem_is_descending = FALSE;
1852  }
1853  }
1854  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
1855  {
1856  if ( bl1_is_lower( uplo ) )
1857  {
1858  n_iter = m;
1859  n_elem_max = bl1_min( m, n );
1860  lda = a_rs;
1861  inca = a_cs;
1862  ldb = b_rs;
1863  incb = b_cs;
1864  n_elem_is_descending = FALSE;
1865  }
1866  else // if ( bl1_is_upper( uplo ) )
1867  {
1868  n_iter = bl1_min( m, n );
1869  n_elem_max = n;
1870  lda = a_rs;
1871  inca = a_cs;
1872  ldb = b_rs;
1873  incb = b_cs;
1874  n_elem_is_descending = TRUE;
1875  }
1876  }
1877 
1878  // Swap lda and inca if we're doing a transpose.
1879  if ( bl1_does_trans( trans ) )
1880  {
1881  bl1_swap_ints( lda, inca );
1882  }
1883 
1884  // Extract conj component from trans parameter.
1885  conj = bl1_proj_trans1_to_conj( trans );
1886 
1887  // Choose the loop based on whether n_elem will be shrinking or growing
1888  // with each iteration.
1889  if ( n_elem_is_descending )
1890  {
1891  for ( j = 0; j < n_iter; j++ )
1892  {
1893  n_elem = n_elem_max - j;
1894  a_begin = a + j*lda + j*inca;
1895  b_begin = b + j*ldb + j*incb;
1896 
1897  bl1_zdcopyv( conj,
1898  n_elem,
1899  a_begin, inca,
1900  b_begin, incb );
1901  }
1902  }
1903  else // if ( n_elem_is_ascending )
1904  {
1905  for ( j = 0; j < n_iter; j++ )
1906  {
1907  n_elem = bl1_min( j + 1, n_elem_max );
1908  a_begin = a + j*lda;
1909  b_begin = b + j*ldb;
1910 
1911  bl1_zdcopyv( conj,
1912  n_elem,
1913  a_begin, inca,
1914  b_begin, incb );
1915  }
1916  }
1917 }

References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_zdcopyv(), and bl1_zero_dim2().

Referenced by FLA_Copyrt_external().

◆ bl1_zdcopymt()

void bl1_zdcopymt ( trans1_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)
1099 {
1100  dcomplex* a_begin;
1101  double* b_begin;
1102  int lda, inca;
1103  int ldb, incb;
1104  int n_iter;
1105  int n_elem;
1106  int j;
1107  conj1_t conj;
1108 
1109  // Return early if possible.
1110  if ( bl1_zero_dim2( m, n ) ) return;
1111 
1112  // Handle cases where A and B are vectors to ensure that the underlying copy
1113  // gets invoked only once.
1114  if ( bl1_is_vector( m, n ) )
1115  {
1116  // Initialize with values appropriate for vectors.
1117  n_iter = 1;
1118  n_elem = bl1_vector_dim( m, n );
1119  lda = 1; // multiplied by zero when n_iter == 1; not needed.
1120  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
1121  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
1122  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
1123  }
1124  else // matrix case
1125  {
1126  // Initialize with optimal values for column-major storage of B.
1127  n_iter = n;
1128  n_elem = m;
1129  lda = a_cs;
1130  inca = a_rs;
1131  ldb = b_cs;
1132  incb = b_rs;
1133 
1134  // Handle the transposition of A.
1135  if ( bl1_does_trans( trans ) )
1136  {
1137  bl1_swap_ints( lda, inca );
1138  }
1139 
1140  // An optimization: if B is row-major, then let's access the matrix by rows
1141  // instead of by columns for increased spatial locality.
1142  if ( bl1_is_row_storage( b_rs, b_cs ) )
1143  {
1144  bl1_swap_ints( n_iter, n_elem );
1145  bl1_swap_ints( lda, inca );
1146  bl1_swap_ints( ldb, incb );
1147  }
1148  }
1149 
1150  // Extract conj component from trans parameter.
1151  conj = bl1_proj_trans1_to_conj( trans );
1152 
1153  for ( j = 0; j < n_iter; ++j )
1154  {
1155  a_begin = a + j*lda;
1156  b_begin = b + j*ldb;
1157 
1158  bl1_zdcopyv( conj,
1159  n_elem,
1160  a_begin, inca,
1161  b_begin, incb );
1162  }
1163 }

References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zdcopyv(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

◆ bl1_zdcopyv()

void bl1_zdcopyv ( conj1_t  conj,
int  m,
dcomplex x,
int  incx,
double *  y,
int  incy 
)
282 {
283  dcomplex* chi;
284  double* psi;
285  int i;
286 
287  // Return early if possible.
288  if ( bl1_zero_dim1( m ) ) return;
289 
290  // Initialize pointers.
291  chi = x;
292  psi = y;
293 
294  for ( i = 0; i < m; ++i )
295  {
296  *psi = chi->real;
297 
298  chi += incx;
299  psi += incy;
300  }
301 }

References bl1_zero_dim1(), i, and dcomplex::real.

Referenced by bl1_zdcopymr(), bl1_zdcopymrt(), and bl1_zdcopymt().

◆ bl1_zdinvscalm()

void bl1_zdinvscalm ( conj1_t  conj,
int  m,
int  n,
double *  alpha,
dcomplex a,
int  a_rs,
int  a_cs 
)
222 {
223  double alpha_inv;
224  dcomplex* a_begin;
225  int lda, inca;
226  int n_iter;
227  int n_elem;
228  int j;
229 
230  // Return early if possible.
231  if ( bl1_zero_dim2( m, n ) ) return;
232  if ( bl1_deq1( alpha ) ) return;
233 
234  // Handle cases where A is a vector to ensure that the underlying axpy
235  // gets invoked only once.
236  if ( bl1_is_vector( m, n ) )
237  {
238  // Initialize with values appropriate for a vector.
239  n_iter = 1;
240  n_elem = bl1_vector_dim( m, n );
241  lda = 1; // multiplied by zero when n_iter == 1; not needed.
242  inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
243  }
244  else // matrix case
245  {
246  // Initialize with optimal values for column-major storage.
247  n_iter = n;
248  n_elem = m;
249  lda = a_cs;
250  inca = a_rs;
251 
252  // An optimization: if A is row-major, then let's access the matrix
253  // by rows instead of by columns to increase spatial locality.
254  if ( bl1_is_row_storage( a_rs, a_cs ) )
255  {
256  bl1_swap_ints( n_iter, n_elem );
257  bl1_swap_ints( lda, inca );
258  }
259  }
260 
261  bl1_dinvert2s( conj, alpha, &alpha_inv );
262 
263  for ( j = 0; j < n_iter; j++ )
264  {
265  a_begin = a + j*lda;
266 
267  bl1_zdscal( n_elem,
268  &alpha_inv,
269  a_begin, inca );
270  }
271 }
void bl1_zdscal(int n, double *alpha, dcomplex *x, int incx)
Definition: bl1_scal.c:65

References bl1_dinvert2s(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zdscal(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Inv_scal_external(), and FLA_Inv_scalc_external().

◆ bl1_zdinvscalv()

void bl1_zdinvscalv ( conj1_t  conj,
int  n,
double *  alpha,
dcomplex x,
int  incx 
)
66 {
67  double alpha_inv;
68 
69  if ( bl1_deq1( alpha ) ) return;
70 
71  alpha_inv = 1.0 / *alpha;
72 
73  bl1_zdscal( n,
74  &alpha_inv,
75  x, incx );
76 }

References bl1_zdscal().

◆ bl1_zdot()

void bl1_zdot ( conj1_t  conj,
int  n,
dcomplex x,
int  incx,
dcomplex y,
int  incy,
dcomplex rho 
)
66 {
67 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
68  if ( bl1_is_conj( conj ) )
69  {
70  cblas_zdotc_sub( n,
71  x, incx,
72  y, incy,
73  rho );
74  }
75  else // if ( !bl1_is_conj( conj ) )
76  {
77  cblas_zdotu_sub( n,
78  x, incx,
79  y, incy,
80  rho );
81  }
82 #else
83  bl1_zdot_in( conj,
84  n,
85  x, incx,
86  y, incy,
87  rho );
88 #endif
89 }
void bl1_zdot_in(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition: bl1_dot.c:146
void cblas_zdotc_sub(const int N, const void *X, const int incX, const void *Y, const int incY, void *dotc)
void cblas_zdotu_sub(const int N, const void *X, const int incX, const void *Y, const int incY, void *dotu)

References bl1_is_conj(), bl1_zdot_in(), cblas_zdotc_sub(), cblas_zdotu_sub(), and rho.

Referenced by bl1_zdot2s(), bl1_zdots(), FLA_Bidiag_UT_u_step_ofz_var2(), FLA_Bidiag_UT_u_step_ofz_var3(), FLA_Bidiag_UT_u_step_ofz_var4(), FLA_Bidiag_UT_u_step_opz_var2(), FLA_Bidiag_UT_u_step_opz_var3(), FLA_Bidiag_UT_u_step_opz_var4(), FLA_Bidiag_UT_u_step_opz_var5(), FLA_Dot_external(), FLA_Dotc_external(), FLA_Fused_Ahx_Axpy_Ax_opz_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1(), FLA_Fused_UZhu_ZUhu_opz_var1(), FLA_Hess_UT_step_ofz_var2(), FLA_Hess_UT_step_ofz_var3(), FLA_Hess_UT_step_ofz_var4(), FLA_Hess_UT_step_opz_var2(), FLA_Hess_UT_step_opz_var3(), FLA_Hess_UT_step_opz_var4(), FLA_Hess_UT_step_opz_var5(), FLA_Sylv_hh_opz_var1(), FLA_Sylv_hn_opz_var1(), FLA_Sylv_nh_opz_var1(), FLA_Sylv_nn_opz_var1(), FLA_Tridiag_UT_l_step_ofz_var2(), FLA_Tridiag_UT_l_step_ofz_var3(), FLA_Tridiag_UT_l_step_opz_var1(), FLA_Tridiag_UT_l_step_opz_var2(), and FLA_Tridiag_UT_l_step_opz_var3().

◆ bl1_zdot2s()

void bl1_zdot2s ( conj1_t  conj,
int  n,
dcomplex alpha,
dcomplex x,
int  incx,
dcomplex y,
int  incy,
dcomplex beta,
dcomplex rho 
)
71 {
72  dcomplex dotxy;
73  dcomplex dotyx;
74  dcomplex alpha_d = *alpha;
75  dcomplex alphac_d = *alpha;
76  dcomplex beta_d = *beta;
77  dcomplex rho_d = *rho;
78 
79  alphac_d.imag *= -1.0;
80 
81  bl1_zdot( conj,
82  n,
83  x, incx,
84  y, incy,
85  &dotxy );
86 
87  bl1_zdot( conj,
88  n,
89  y, incy,
90  x, incx,
91  &dotyx );
92 
93  rho->real = beta_d.real * rho_d.real - beta_d.imag * rho_d.imag +
94  alpha_d.real * dotxy.real - alpha_d.imag * dotxy.imag +
95  alphac_d.real * dotyx.real - alphac_d.imag * dotyx.imag;
96  rho->imag = beta_d.real * rho_d.imag + beta_d.imag * rho_d.real +
97  alpha_d.real * dotxy.imag + alpha_d.imag * dotxy.real +
98  alphac_d.real * dotyx.imag + alphac_d.imag * dotyx.real;
99 }
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition: bl1_dot.c:65

References bl1_zdot(), dcomplex::imag, dcomplex::real, and rho.

Referenced by FLA_Dot2cs_external(), FLA_Dot2s_external(), FLA_Eig_gest_il_opz_var1(), FLA_Eig_gest_il_opz_var2(), FLA_Eig_gest_il_opz_var3(), FLA_Eig_gest_iu_opz_var1(), FLA_Eig_gest_iu_opz_var2(), FLA_Eig_gest_iu_opz_var3(), FLA_Eig_gest_nl_opz_var1(), FLA_Eig_gest_nl_opz_var2(), FLA_Eig_gest_nu_opz_var1(), FLA_Eig_gest_nu_opz_var2(), FLA_Lyap_h_opz_var1(), FLA_Lyap_h_opz_var2(), FLA_Lyap_h_opz_var3(), FLA_Lyap_n_opz_var1(), FLA_Lyap_n_opz_var2(), and FLA_Lyap_n_opz_var3().

◆ bl1_zdot_in()

void bl1_zdot_in ( conj1_t  conj,
int  n,
dcomplex x,
int  incx,
dcomplex y,
int  incy,
dcomplex rho 
)
147 {
148  dcomplex* xip;
149  dcomplex* yip;
150  dcomplex xi;
151  dcomplex yi;
152  dcomplex rho_temp;
153  int i;
154 
155  rho_temp.real = 0.0;
156  rho_temp.imag = 0.0;
157 
158  xip = x;
159  yip = y;
160 
161  if ( bl1_is_conj( conj ) )
162  {
163  for ( i = 0; i < n; ++i )
164  {
165  xi.real = xip->real;
166  xi.imag = xip->imag;
167  yi.real = yip->real;
168  yi.imag = yip->imag;
169 
170  rho_temp.real += xi.real * yi.real - -xi.imag * yi.imag;
171  rho_temp.imag += xi.real * yi.imag + -xi.imag * yi.real;
172 
173  xip += incx;
174  yip += incy;
175  }
176  }
177  else // if ( !bl1_is_conj( conj ) )
178  {
179  for ( i = 0; i < n; ++i )
180  {
181  xi.real = xip->real;
182  xi.imag = xip->imag;
183  yi.real = yip->real;
184  yi.imag = yip->imag;
185 
186  rho_temp.real += xi.real * yi.real - xi.imag * yi.imag;
187  rho_temp.imag += xi.real * yi.imag + xi.imag * yi.real;
188 
189  xip += incx;
190  yip += incy;
191  }
192  }
193 
194  rho->real = rho_temp.real;
195  rho->imag = rho_temp.imag;
196 }

References bl1_is_conj(), i, dcomplex::imag, dcomplex::real, and rho.

Referenced by bl1_zdot().

◆ bl1_zdots()

void bl1_zdots ( conj1_t  conj,
int  n,
dcomplex alpha,
dcomplex x,
int  incx,
dcomplex y,
int  incy,
dcomplex beta,
dcomplex rho 
)
57 {
58  dcomplex rho_orig = *rho;
59  dcomplex dot_prod;
60 
61  bl1_zdot( conj,
62  n,
63  x, incx,
64  y, incy,
65  &dot_prod );
66 
67  rho->real = beta->real * rho_orig.real - beta->imag * rho_orig.imag +
68  alpha->real * dot_prod.real - alpha->imag * dot_prod.imag;
69  rho->imag = beta->real * rho_orig.imag + beta->imag * rho_orig.real +
70  alpha->real * dot_prod.imag + alpha->imag * dot_prod.real;
71 }

References bl1_zdot(), dcomplex::imag, dcomplex::real, and rho.

Referenced by FLA_Chol_l_opz_var1(), FLA_Chol_l_opz_var2(), FLA_Chol_u_opz_var1(), FLA_Chol_u_opz_var2(), FLA_Dotcs_external(), FLA_Dots_external(), FLA_Hess_UT_step_opz_var5(), FLA_LU_nopiv_opz_var1(), FLA_LU_nopiv_opz_var2(), FLA_LU_nopiv_opz_var3(), FLA_LU_nopiv_opz_var4(), FLA_LU_piv_opz_var3(), FLA_LU_piv_opz_var4(), FLA_Ttmm_l_opz_var2(), FLA_Ttmm_l_opz_var3(), FLA_Ttmm_u_opz_var2(), and FLA_Ttmm_u_opz_var3().

◆ bl1_zdscal()

void bl1_zdscal ( int  n,
double *  alpha,
dcomplex x,
int  incx 
)
66 {
67 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
68  cblas_zdscal( n,
69  *alpha,
70  x, incx );
71 #else
72  F77_zdscal( &n,
73  alpha,
74  x, &incx );
75 #endif
76 }
void F77_zdscal(int *n, double *alpha, dcomplex *y, int *incy)
void cblas_zdscal(const int N, const double alpha, void *X, const int incX)

References cblas_zdscal(), and F77_zdscal().

Referenced by bl1_zdinvscalm(), bl1_zdinvscalv(), bl1_zdscalm(), bl1_zdscalmr(), and bl1_zdscalv().

◆ bl1_zdscalm()

void bl1_zdscalm ( conj1_t  conj,
int  m,
int  n,
double *  alpha,
dcomplex a,
int  a_rs,
int  a_cs 
)
222 {
223  double alpha_conj;
224  dcomplex* a_begin;
225  int lda, inca;
226  int n_iter;
227  int n_elem;
228  int j;
229 
230  // Return early if possible.
231  if ( bl1_zero_dim2( m, n ) ) return;
232  if ( bl1_deq1( alpha ) ) return;
233 
234  // Handle cases where A is a vector to ensure that the underlying axpy
235  // gets invoked only once.
236  if ( bl1_is_vector( m, n ) )
237  {
238  // Initialize with values appropriate for a vector.
239  n_iter = 1;
240  n_elem = bl1_vector_dim( m, n );
241  lda = 1; // multiplied by zero when n_iter == 1; not needed.
242  inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
243  }
244  else // matrix case
245  {
246  // Initialize with optimal values for column-major storage.
247  n_iter = n;
248  n_elem = m;
249  lda = a_cs;
250  inca = a_rs;
251 
252  // An optimization: if A is row-major, then let's access the matrix
253  // by rows instead of by columns to increase spatial locality.
254  if ( bl1_is_row_storage( a_rs, a_cs ) )
255  {
256  bl1_swap_ints( n_iter, n_elem );
257  bl1_swap_ints( lda, inca );
258  }
259  }
260 
261  bl1_dcopys( conj, alpha, &alpha_conj );
262 
263  for ( j = 0; j < n_iter; j++ )
264  {
265  a_begin = a + j*lda;
266 
267  bl1_zdscal( n_elem,
268  &alpha_conj,
269  a_begin, inca );
270  }
271 }

References bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zdscal(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Scal_external(), and FLA_Scalc_external().

◆ bl1_zdscalmr()

void bl1_zdscalmr ( uplo1_t  uplo,
int  m,
int  n,
double *  alpha,
dcomplex a,
int  a_rs,
int  a_cs 
)
238 {
239  dcomplex* a_begin;
240  int lda, inca;
241  int n_iter;
242  int n_elem_max;
243  int n_elem;
244  int j;
245 
246  // Return early if possible.
247  if ( bl1_zero_dim2( m, n ) ) return;
248  if ( bl1_deq1( alpha ) ) return;
249 
250  // We initialize for column-major.
251  n_iter = n;
252  n_elem_max = m;
253  lda = a_cs;
254  inca = a_rs;
255 
256  // An optimization: if A is row-major, then let's access the matrix
257  // by rows instead of by columns to increase spatial locality.
258  if ( bl1_is_row_storage( a_rs, a_cs ) )
259  {
260  bl1_swap_ints( n_iter, n_elem_max );
261  bl1_swap_ints( lda, inca );
262  bl1_toggle_uplo( uplo );
263  }
264 
265  if ( bl1_is_upper( uplo ) )
266  {
267  for ( j = 0; j < n_iter; j++ )
268  {
269  n_elem = bl1_min( j + 1, n_elem_max );
270  a_begin = a + j*lda;
271 
272  bl1_zdscal( n_elem,
273  alpha,
274  a_begin, inca );
275  }
276  }
277  else // if ( bl1_is_lower( uplo ) )
278  {
279  for ( j = 0; j < n_iter; j++ )
280  {
281  n_elem = bl1_max( 0, n_elem_max - j );
282  a_begin = a + j*lda + j*inca;
283 
284  if ( n_elem <= 0 ) break;
285 
286  bl1_zdscal( n_elem,
287  alpha,
288  a_begin, inca );
289  }
290  }
291 }

References bl1_is_row_storage(), bl1_is_upper(), bl1_zdscal(), and bl1_zero_dim2().

Referenced by bl1_zher2k(), bl1_zherk(), and FLA_Scalr_external().

◆ bl1_zdscalv()

void bl1_zdscalv ( conj1_t  conj,
int  n,
double *  alpha,
dcomplex x,
int  incx 
)
62 {
63  // Return early if possible.
64  if ( bl1_zero_dim1( n ) ) return;
65  if ( bl1_deq1( alpha ) ) return;
66 
67  bl1_zdscal( n,
68  alpha,
69  x, incx );
70 }

References bl1_zdscal(), and bl1_zero_dim1().

Referenced by bl1_zdapdiagmv(), FLA_Bsvd_ext_opz_var1(), FLA_Bsvd_v_opz_var1(), and FLA_Bsvd_v_opz_var2().

◆ bl1_zfnorm()

void bl1_zfnorm ( int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
double *  norm 
)
176 {
177  dcomplex* a_ij;
178  double sum;
179  int lda, inca;
180  int n_iter;
181  int n_elem;
182  int i, j;
183 
184  // Return early if possible.
185  if ( bl1_zero_dim2( m, n ) ) return;
186 
187  // Handle cases where A is a vector separately.
188  if ( bl1_is_vector( m, n ) )
189  {
190  // Initialize with values appropriate for vectors.
191  n_iter = 1;
192  n_elem = bl1_vector_dim( m, n );
193  lda = 1; // multiplied by zero when n_iter == 1; not needed.
194  inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
195  }
196  else // matrix case
197  {
198  // Initialize with optimal values for column-major storage.
199  n_iter = n;
200  n_elem = m;
201  lda = a_cs;
202  inca = a_rs;
203 
204  // An optimization: if A is row-major, then let's access the matrix by
205  // rows instead of by columns for increased spatial locality.
206  if ( bl1_is_row_storage( a_rs, a_cs ) )
207  {
208  bl1_swap_ints( n_iter, n_elem );
209  bl1_swap_ints( lda, inca );
210  }
211  }
212 
213  // Initialize the accumulator variable.
214  sum = 0.0;
215 
216  for ( j = 0; j < n_iter; j++ )
217  {
218  for ( i = 0; i < n_elem; i++ )
219  {
220  a_ij = a + i*inca + j*lda;
221  sum += a_ij->real * a_ij->real + a_ij->imag * a_ij->imag;
222  }
223  }
224 
225  // Compute the norm and store the result.
226  *norm = sqrt( sum );
227 }

References bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), BLIS1_NO_TRANSPOSE, i, dcomplex::imag, and dcomplex::real.

Referenced by FLA_Norm_frob().

◆ bl1_zinvscalm()

void bl1_zinvscalm ( conj1_t  conj,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs 
)
274 {
275  dcomplex alpha_inv;
276  dcomplex* a_begin;
277  int lda, inca;
278  int n_iter;
279  int n_elem;
280  int j;
281 
282  // Return early if possible.
283  if ( bl1_zero_dim2( m, n ) ) return;
284  if ( bl1_zeq1( alpha ) ) return;
285 
286  // Handle cases where A is a vector to ensure that the underlying axpy
287  // gets invoked only once.
288  if ( bl1_is_vector( m, n ) )
289  {
290  // Initialize with values appropriate for a vector.
291  n_iter = 1;
292  n_elem = bl1_vector_dim( m, n );
293  lda = 1; // multiplied by zero when n_iter == 1; not needed.
294  inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
295  }
296  else // matrix case
297  {
298  // Initialize with optimal values for column-major storage.
299  n_iter = n;
300  n_elem = m;
301  lda = a_cs;
302  inca = a_rs;
303 
304  // An optimization: if A is row-major, then let's access the matrix
305  // by rows instead of by columns to increase spatial locality.
306  if ( bl1_is_row_storage( a_rs, a_cs ) )
307  {
308  bl1_swap_ints( n_iter, n_elem );
309  bl1_swap_ints( lda, inca );
310  }
311  }
312 
313  bl1_zinvert2s( conj, alpha, &alpha_inv );
314 
315  for ( j = 0; j < n_iter; j++ )
316  {
317  a_begin = a + j*lda;
318 
319  bl1_zscal( n_elem,
320  &alpha_inv,
321  a_begin, inca );
322  }
323 }
void bl1_zinvert2s(conj1_t conj, dcomplex *alpha, dcomplex *beta)
Definition: bl1_invert2s.c:44

References bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), bl1_zinvert2s(), bl1_zscal(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Inv_scal_external(), and FLA_Inv_scalc_external().

◆ bl1_zinvscalv()

void bl1_zinvscalv ( conj1_t  conj,
int  n,
dcomplex alpha,
dcomplex x,
int  incx 
)

◆ bl1_znrm2()

void bl1_znrm2 ( int  n,
dcomplex x,
int  incx,
double *  norm 
)
47 {
48 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
49  *norm = cblas_dznrm2( n,
50  x, incx );
51 #else
52  *norm = F77_dznrm2( &n,
53  x, &incx );
54 #endif
55 }
double F77_dznrm2(int *n, dcomplex *x, int *incx)
double cblas_dznrm2(const int N, const void *X, const int incX)

References cblas_dznrm2(), and F77_dznrm2().

Referenced by FLA_Househ2_UT_l_opz(), FLA_Househ2s_UT_l_opz(), FLA_Househ3UD_UT_opz(), and FLA_Nrm2_external().

◆ bl1_zscal()

void bl1_zscal ( int  n,
dcomplex alpha,
dcomplex x,
int  incx 
)
79 {
80 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
81  cblas_zscal( n,
82  alpha,
83  x, incx );
84 #else
85  F77_zscal( &n,
86  alpha,
87  x, &incx );
88 #endif
89 }
void F77_zscal(int *n, dcomplex *alpha, dcomplex *y, int *incy)
void cblas_zscal(const int N, const void *alpha, void *X, const int incX)

References cblas_zscal(), and F77_zscal().

Referenced by bl1_zaxpysmt(), bl1_zaxpysv(), bl1_zinvscalm(), bl1_zinvscalv(), bl1_zscalm(), bl1_zscalmr(), bl1_zscalv(), and FLA_SA_LU_unb().

◆ bl1_zscalm()

void bl1_zscalm ( conj1_t  conj,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs 
)
274 {
275  dcomplex alpha_conj;
276  dcomplex* a_begin;
277  int lda, inca;
278  int n_iter;
279  int n_elem;
280  int j;
281 
282  // Return early if possible.
283  if ( bl1_zero_dim2( m, n ) ) return;
284  if ( bl1_zeq1( alpha ) ) return;
285 
286  // Handle cases where A is a vector to ensure that the underlying axpy
287  // gets invoked only once.
288  if ( bl1_is_vector( m, n ) )
289  {
290  // Initialize with values appropriate for a vector.
291  n_iter = 1;
292  n_elem = bl1_vector_dim( m, n );
293  lda = 1; // multiplied by zero when n_iter == 1; not needed.
294  inca = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, a_rs, a_cs );
295  }
296  else // matrix case
297  {
298  // Initialize with optimal values for column-major storage.
299  n_iter = n;
300  n_elem = m;
301  lda = a_cs;
302  inca = a_rs;
303 
304  // An optimization: if A is row-major, then let's access the matrix
305  // by rows instead of by columns to increase spatial locality.
306  if ( bl1_is_row_storage( a_rs, a_cs ) )
307  {
308  bl1_swap_ints( n_iter, n_elem );
309  bl1_swap_ints( lda, inca );
310  }
311  }
312 
313  bl1_zcopys( conj, alpha, &alpha_conj );
314 
315  for ( j = 0; j < n_iter; j++ )
316  {
317  a_begin = a + j*lda;
318 
319  bl1_zscal( n_elem,
320  &alpha_conj,
321  a_begin, inca );
322  }
323 }

References bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), bl1_zscal(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_zgemm(), bl1_zhemm(), bl1_zsymm(), bl1_ztrmmsx(), bl1_ztrsmsx(), FLA_Lyap_h_opz_var1(), FLA_Lyap_h_opz_var2(), FLA_Lyap_h_opz_var3(), FLA_Lyap_h_opz_var4(), FLA_Lyap_n_opz_var1(), FLA_Lyap_n_opz_var2(), FLA_Lyap_n_opz_var3(), FLA_Lyap_n_opz_var4(), FLA_Scal_external(), and FLA_Scalc_external().

◆ bl1_zscalmr()

void bl1_zscalmr ( uplo1_t  uplo,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs 
)
294 {
295  dcomplex* a_begin;
296  int lda, inca;
297  int n_iter;
298  int n_elem_max;
299  int n_elem;
300  int j;
301 
302  // Return early if possible.
303  if ( bl1_zero_dim2( m, n ) ) return;
304  if ( bl1_zeq1( alpha ) ) return;
305 
306  // We initialize for column-major.
307  n_iter = n;
308  n_elem_max = m;
309  lda = a_cs;
310  inca = a_rs;
311 
312  // An optimization: if A is row-major, then let's access the matrix
313  // by rows instead of by columns to increase spatial locality.
314  if ( bl1_is_row_storage( a_rs, a_cs ) )
315  {
316  bl1_swap_ints( n_iter, n_elem_max );
317  bl1_swap_ints( lda, inca );
318  bl1_toggle_uplo( uplo );
319  }
320 
321  if ( bl1_is_upper( uplo ) )
322  {
323  for ( j = 0; j < n_iter; j++ )
324  {
325  n_elem = bl1_min( j + 1, n_elem_max );
326  a_begin = a + j*lda;
327 
328  bl1_zscal( n_elem,
329  alpha,
330  a_begin, inca );
331  }
332  }
333  else // if ( bl1_is_lower( uplo ) )
334  {
335  for ( j = 0; j < n_iter; j++ )
336  {
337  n_elem = bl1_max( 0, n_elem_max - j );
338  a_begin = a + j*lda + j*inca;
339 
340  if ( n_elem <= 0 ) break;
341 
342  bl1_zscal( n_elem,
343  alpha,
344  a_begin, inca );
345  }
346  }
347 }

References bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), and bl1_zscal().

Referenced by FLA_Scalr_external().

◆ bl1_zscalv()

void bl1_zscalv ( conj1_t  conj,
int  n,
dcomplex alpha,
dcomplex x,
int  incx 
)

◆ bl1_zscopymr()

void bl1_zscopymr ( uplo1_t  uplo,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)
659 {
660  dcomplex* a_begin;
661  float* b_begin;
662  int lda, inca;
663  int ldb, incb;
664  int n_iter;
665  int n_elem_max;
666  int n_elem;
667  int j;
668 
669  // Return early if possible.
670  if ( bl1_zero_dim2( m, n ) ) return;
671 
672  // We initialize for column-major.
673  n_iter = n;
674  n_elem_max = m;
675  lda = a_cs;
676  inca = a_rs;
677  ldb = b_cs;
678  incb = b_rs;
679 
680  // An optimization: if B is row-major, then let's access the matrix
681  // by rows instead of by columns for increased spatial locality.
682  if ( bl1_is_row_storage( b_rs, b_cs ) )
683  {
684  bl1_swap_ints( n_iter, n_elem_max );
685  bl1_swap_ints( lda, inca );
686  bl1_swap_ints( ldb, incb );
687  bl1_toggle_uplo( uplo );
688  }
689 
690 
691  if ( bl1_is_upper( uplo ) )
692  {
693  for ( j = 0; j < n_iter; j++ )
694  {
695  n_elem = bl1_min( j + 1, n_elem_max );
696  a_begin = a + j*lda;
697  b_begin = b + j*ldb;
698 
700  n_elem,
701  a_begin, inca,
702  b_begin, incb );
703  }
704  }
705  else // if ( bl1_is_lower( uplo ) )
706  {
707  for ( j = 0; j < n_iter; j++ )
708  {
709  n_elem = bl1_max( 0, n_elem_max - j );
710  a_begin = a + j*lda + j*inca;
711  b_begin = b + j*ldb + j*incb;
712 
713  if ( n_elem <= 0 ) break;
714 
716  n_elem,
717  a_begin, inca,
718  b_begin, incb );
719  }
720  }
721 }
void bl1_zscopyv(conj1_t conj, int m, dcomplex *x, int incx, float *y, int incy)
Definition: bl1_copyv.c:191

References bl1_is_row_storage(), bl1_is_upper(), bl1_zero_dim2(), bl1_zscopyv(), and BLIS1_NO_CONJUGATE.

Referenced by FLA_Copyr_external().

◆ bl1_zscopymrt()

void bl1_zscopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)
1709 {
1710  dcomplex* a_begin;
1711  float* b_begin;
1712  int lda, inca;
1713  int ldb, incb;
1714  int n_iter;
1715  int n_elem;
1716  int n_elem_max;
1717  int n_elem_is_descending;
1718  int j;
1719  conj1_t conj;
1720 
1721  // Return early if possible.
1722  if ( bl1_zero_dim2( m, n ) ) return;
1723 
1724  // Initialize variables based on storage format of B and value of uplo.
1725  if ( bl1_is_col_storage( b_rs, b_cs ) )
1726  {
1727  if ( bl1_is_lower( uplo ) )
1728  {
1729  n_iter = bl1_min( m, n );
1730  n_elem_max = m;
1731  lda = a_cs;
1732  inca = a_rs;
1733  ldb = b_cs;
1734  incb = b_rs;
1735  n_elem_is_descending = TRUE;
1736  }
1737  else // if ( bl1_is_upper( uplo ) )
1738  {
1739  n_iter = n;
1740  n_elem_max = bl1_min( m, n );
1741  lda = a_cs;
1742  inca = a_rs;
1743  ldb = b_cs;
1744  incb = b_rs;
1745  n_elem_is_descending = FALSE;
1746  }
1747  }
1748  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
1749  {
1750  if ( bl1_is_lower( uplo ) )
1751  {
1752  n_iter = m;
1753  n_elem_max = bl1_min( m, n );
1754  lda = a_rs;
1755  inca = a_cs;
1756  ldb = b_rs;
1757  incb = b_cs;
1758  n_elem_is_descending = FALSE;
1759  }
1760  else // if ( bl1_is_upper( uplo ) )
1761  {
1762  n_iter = bl1_min( m, n );
1763  n_elem_max = n;
1764  lda = a_rs;
1765  inca = a_cs;
1766  ldb = b_rs;
1767  incb = b_cs;
1768  n_elem_is_descending = TRUE;
1769  }
1770  }
1771 
1772  // Swap lda and inca if we're doing a transpose.
1773  if ( bl1_does_trans( trans ) )
1774  {
1775  bl1_swap_ints( lda, inca );
1776  }
1777 
1778  // Extract conj component from trans parameter.
1779  conj = bl1_proj_trans1_to_conj( trans );
1780 
1781  // Choose the loop based on whether n_elem will be shrinking or growing
1782  // with each iteration.
1783  if ( n_elem_is_descending )
1784  {
1785  for ( j = 0; j < n_iter; j++ )
1786  {
1787  n_elem = n_elem_max - j;
1788  a_begin = a + j*lda + j*inca;
1789  b_begin = b + j*ldb + j*incb;
1790 
1791  bl1_zscopyv( conj,
1792  n_elem,
1793  a_begin, inca,
1794  b_begin, incb );
1795  }
1796  }
1797  else // if ( n_elem_is_ascending )
1798  {
1799  for ( j = 0; j < n_iter; j++ )
1800  {
1801  n_elem = bl1_min( j + 1, n_elem_max );
1802  a_begin = a + j*lda;
1803  b_begin = b + j*ldb;
1804 
1805  bl1_zscopyv( conj,
1806  n_elem,
1807  a_begin, inca,
1808  b_begin, incb );
1809  }
1810  }
1811 }

References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_zero_dim2(), and bl1_zscopyv().

Referenced by FLA_Copyrt_external().

◆ bl1_zscopymt()

void bl1_zscopymt ( trans1_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)
763 {
764  dcomplex* a_begin;
765  float* b_begin;
766  int lda, inca;
767  int ldb, incb;
768  int n_iter;
769  int n_elem;
770  int j;
771  conj1_t conj;
772 
773  // Return early if possible.
774  if ( bl1_zero_dim2( m, n ) ) return;
775 
776  // Handle cases where A and B are vectors to ensure that the underlying copy
777  // gets invoked only once.
778  if ( bl1_is_vector( m, n ) )
779  {
780  // Initialize with values appropriate for vectors.
781  n_iter = 1;
782  n_elem = bl1_vector_dim( m, n );
783  lda = 1; // multiplied by zero when n_iter == 1; not needed.
784  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
785  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
786  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
787  }
788  else // matrix case
789  {
790  // Initialize with optimal values for column-major storage of B.
791  n_iter = n;
792  n_elem = m;
793  lda = a_cs;
794  inca = a_rs;
795  ldb = b_cs;
796  incb = b_rs;
797 
798  // Handle the transposition of A.
799  if ( bl1_does_trans( trans ) )
800  {
801  bl1_swap_ints( lda, inca );
802  }
803 
804  // An optimization: if B is row-major, then let's access the matrix by rows
805  // instead of by columns for increased spatial locality.
806  if ( bl1_is_row_storage( b_rs, b_cs ) )
807  {
808  bl1_swap_ints( n_iter, n_elem );
809  bl1_swap_ints( lda, inca );
810  bl1_swap_ints( ldb, incb );
811  }
812  }
813 
814  // Extract conj component from trans parameter.
815  conj = bl1_proj_trans1_to_conj( trans );
816 
817  for ( j = 0; j < n_iter; ++j )
818  {
819  a_begin = a + j*lda;
820  b_begin = b + j*ldb;
821 
822  bl1_zscopyv( conj,
823  n_elem,
824  a_begin, inca,
825  b_begin, incb );
826  }
827 }

References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), bl1_zscopyv(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

◆ bl1_zscopyv()

void bl1_zscopyv ( conj1_t  conj,
int  m,
dcomplex x,
int  incx,
float *  y,
int  incy 
)
192 {
193  dcomplex* chi;
194  float* psi;
195  int i;
196 
197  // Return early if possible.
198  if ( bl1_zero_dim1( m ) ) return;
199 
200  // Initialize pointers.
201  chi = x;
202  psi = y;
203 
204  for ( i = 0; i < m; ++i )
205  {
206  *psi = chi->real;
207 
208  chi += incx;
209  psi += incy;
210  }
211 }

References bl1_zero_dim1(), i, and dcomplex::real.

Referenced by bl1_zscopymr(), bl1_zscopymrt(), and bl1_zscopymt().

◆ bl1_zswap()

void bl1_zswap ( int  n,
dcomplex x,
int  incx,
dcomplex y,
int  incy 
)
53 {
54 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
55  cblas_zswap( n,
56  x, incx,
57  y, incy );
58 #else
59  F77_zswap( &n,
60  x, &incx,
61  y, &incy );
62 #endif
63 }
void F77_zswap(int *n, dcomplex *x, int *incx, dcomplex *y, int *incy)
void cblas_zswap(const int N, void *X, const int incX, void *Y, const int incY)

References cblas_zswap(), and F77_zswap().

Referenced by bl1_zswapmt(), bl1_zswapv(), FLA_SA_Apply_pivots(), and FLA_SA_LU_unb().

◆ bl1_zswapmt()

void bl1_zswapmt ( trans1_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
223 {
224  dcomplex* a_begin;
225  dcomplex* b_begin;
226  int lda, inca;
227  int ldb, incb;
228  int n_iter;
229  int n_elem;
230  int j;
231 
232  // Return early if possible.
233  if ( bl1_zero_dim2( m, n ) ) return;
234 
235  // Handle cases where A and B are vectors to ensure that the underlying copy
236  // gets invoked only once.
237  if ( bl1_is_vector( m, n ) )
238  {
239  // Initialize with values appropriate for vectors.
240  n_iter = 1;
241  n_elem = bl1_vector_dim( m, n );
242  lda = 1; // multiplied by zero when n_iter == 1; not needed.
243  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
244  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
245  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
246  }
247  else // matrix case
248  {
249  // Initialize with optimal values for column-major storage.
250  n_iter = n;
251  n_elem = m;
252  lda = a_cs;
253  inca = a_rs;
254  ldb = b_cs;
255  incb = b_rs;
256 
257  // Handle the transposition of A.
258  if ( bl1_does_trans( trans ) )
259  {
260  bl1_swap_ints( lda, inca );
261  }
262 
263  // An optimization: if B is row-major and if A is effectively row-major
264  // after a possible transposition, then let's access the matrix by rows
265  // instead of by columns for increased spatial locality.
266  if ( bl1_is_row_storage( b_rs, b_cs ) )
267  {
268  if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
269  ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
270  {
271  bl1_swap_ints( n_iter, n_elem );
272  bl1_swap_ints( lda, inca );
273  bl1_swap_ints( ldb, incb );
274  }
275  }
276  }
277 
278  for ( j = 0; j < n_iter; j++ )
279  {
280  a_begin = a + j*lda;
281  b_begin = b + j*ldb;
282 
283  bl1_zswap( n_elem,
284  a_begin, inca,
285  b_begin, incb );
286 
287  if ( bl1_does_conj( trans ) )
288  bl1_zconjv( n_elem,
289  a_begin, inca );
290 
291  if ( bl1_does_conj( trans ) )
292  bl1_zconjv( n_elem,
293  b_begin, incb );
294  }
295 }
void bl1_zswap(int n, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_swap.c:52

References bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zconjv(), bl1_zero_dim2(), bl1_zswap(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Swap_external(), and FLA_Swapt_external().

◆ bl1_zswapv()

void bl1_zswapv ( int  n,
dcomplex x,
int  incx,
dcomplex y,
int  incy 
)
44 {
45  // Return early if possible.
46  if ( bl1_zero_dim1( n ) ) return;
47 
48  bl1_zswap( n,
49  x, incx,
50  y, incy );
51 }

References bl1_zero_dim1(), and bl1_zswap().

Referenced by FLA_Apply_pivots_macro_external(), FLA_Sort_bsvd_ext_b_opz(), FLA_Sort_bsvd_ext_f_opz(), FLA_Sort_evd_b_opz(), FLA_Sort_evd_f_opz(), FLA_Sort_svd_b_opz(), and FLA_Sort_svd_f_opz().

◆ bl1_zzcopymr()

void bl1_zzcopymr ( uplo1_t  uplo,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
1247 {
1248  dcomplex* a_begin;
1249  dcomplex* b_begin;
1250  int lda, inca;
1251  int ldb, incb;
1252  int n_iter;
1253  int n_elem_max;
1254  int n_elem;
1255  int j;
1256 
1257  // Return early if possible.
1258  if ( bl1_zero_dim2( m, n ) ) return;
1259 
1260  // We initialize for column-major.
1261  n_iter = n;
1262  n_elem_max = m;
1263  lda = a_cs;
1264  inca = a_rs;
1265  ldb = b_cs;
1266  incb = b_rs;
1267 
1268  // An optimization: if B is row-major, then let's access the matrix
1269  // by rows instead of by columns for increased spatial locality.
1270  if ( bl1_is_row_storage( b_rs, b_cs ) )
1271  {
1272  bl1_swap_ints( n_iter, n_elem_max );
1273  bl1_swap_ints( lda, inca );
1274  bl1_swap_ints( ldb, incb );
1275  bl1_toggle_uplo( uplo );
1276  }
1277 
1278 
1279  if ( bl1_is_upper( uplo ) )
1280  {
1281  for ( j = 0; j < n_iter; j++ )
1282  {
1283  n_elem = bl1_min( j + 1, n_elem_max );
1284  a_begin = a + j*lda;
1285  b_begin = b + j*ldb;
1286 
1288  n_elem,
1289  a_begin, inca,
1290  b_begin, incb );
1291  }
1292  }
1293  else // if ( bl1_is_lower( uplo ) )
1294  {
1295  for ( j = 0; j < n_iter; j++ )
1296  {
1297  n_elem = bl1_max( 0, n_elem_max - j );
1298  a_begin = a + j*lda + j*inca;
1299  b_begin = b + j*ldb + j*incb;
1300 
1301  if ( n_elem <= 0 ) break;
1302 
1304  n_elem,
1305  a_begin, inca,
1306  b_begin, incb );
1307  }
1308  }
1309 }

References bl1_is_row_storage(), bl1_is_upper(), bl1_zcopyv(), bl1_zero_dim2(), and BLIS1_NO_CONJUGATE.

◆ bl1_zzcopymrt()

void bl1_zzcopymrt ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
2027 {
2028  dcomplex* a_begin;
2029  dcomplex* b_begin;
2030  int lda, inca;
2031  int ldb, incb;
2032  int n_iter;
2033  int n_elem;
2034  int n_elem_max;
2035  int n_elem_is_descending;
2036  int j;
2037  conj1_t conj;
2038 
2039  // Return early if possible.
2040  if ( bl1_zero_dim2( m, n ) ) return;
2041 
2042  // Initialize variables based on storage format of B and value of uplo.
2043  if ( bl1_is_col_storage( b_rs, b_cs ) )
2044  {
2045  if ( bl1_is_lower( uplo ) )
2046  {
2047  n_iter = bl1_min( m, n );
2048  n_elem_max = m;
2049  lda = a_cs;
2050  inca = a_rs;
2051  ldb = b_cs;
2052  incb = b_rs;
2053  n_elem_is_descending = TRUE;
2054  }
2055  else // if ( bl1_is_upper( uplo ) )
2056  {
2057  n_iter = n;
2058  n_elem_max = bl1_min( m, n );
2059  lda = a_cs;
2060  inca = a_rs;
2061  ldb = b_cs;
2062  incb = b_rs;
2063  n_elem_is_descending = FALSE;
2064  }
2065  }
2066  else // if ( bl1_is_row_storage( b_rs, b_cs ) )
2067  {
2068  if ( bl1_is_lower( uplo ) )
2069  {
2070  n_iter = m;
2071  n_elem_max = bl1_min( m, n );
2072  lda = a_rs;
2073  inca = a_cs;
2074  ldb = b_rs;
2075  incb = b_cs;
2076  n_elem_is_descending = FALSE;
2077  }
2078  else // if ( bl1_is_upper( uplo ) )
2079  {
2080  n_iter = bl1_min( m, n );
2081  n_elem_max = n;
2082  lda = a_rs;
2083  inca = a_cs;
2084  ldb = b_rs;
2085  incb = b_cs;
2086  n_elem_is_descending = TRUE;
2087  }
2088  }
2089 
2090  // Swap lda and inca if we're doing a transpose.
2091  if ( bl1_does_trans( trans ) )
2092  {
2093  bl1_swap_ints( lda, inca );
2094  }
2095 
2096  // Extract conj component from trans parameter.
2097  conj = bl1_proj_trans1_to_conj( trans );
2098 
2099  // Choose the loop based on whether n_elem will be shrinking or growing
2100  // with each iteration.
2101  if ( n_elem_is_descending )
2102  {
2103  for ( j = 0; j < n_iter; j++ )
2104  {
2105  n_elem = n_elem_max - j;
2106  a_begin = a + j*lda + j*inca;
2107  b_begin = b + j*ldb + j*incb;
2108 
2109  bl1_zcopyv( conj,
2110  n_elem,
2111  a_begin, inca,
2112  b_begin, incb );
2113  }
2114  }
2115  else // if ( n_elem_is_ascending )
2116  {
2117  for ( j = 0; j < n_iter; j++ )
2118  {
2119  n_elem = bl1_min( j + 1, n_elem_max );
2120  a_begin = a + j*lda;
2121  b_begin = b + j*ldb;
2122 
2123  bl1_zcopyv( conj,
2124  n_elem,
2125  a_begin, inca,
2126  b_begin, incb );
2127  }
2128  }
2129 }

References bl1_does_trans(), bl1_is_col_storage(), bl1_is_lower(), bl1_proj_trans1_to_conj(), bl1_zcopyv(), and bl1_zero_dim2().

◆ bl1_zzcopymt()

void bl1_zzcopymt ( trans1_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
1369 {
1370  dcomplex* a_begin;
1371  dcomplex* b_begin;
1372  int lda, inca;
1373  int ldb, incb;
1374  int n_iter;
1375  int n_elem;
1376  int j;
1377  conj1_t conj;
1378 
1379  // Return early if possible.
1380  if ( bl1_zero_dim2( m, n ) ) return;
1381 
1382  // Handle cases where A and B are vectors to ensure that the underlying copy
1383  // gets invoked only once.
1384  if ( bl1_is_vector( m, n ) )
1385  {
1386  // Initialize with values appropriate for vectors.
1387  n_iter = 1;
1388  n_elem = bl1_vector_dim( m, n );
1389  lda = 1; // multiplied by zero when n_iter == 1; not needed.
1390  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
1391  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
1392  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
1393  }
1394  else // matrix case
1395  {
1396  // Initialize with optimal values for column-major storage of B.
1397  n_iter = n;
1398  n_elem = m;
1399  lda = a_cs;
1400  inca = a_rs;
1401  ldb = b_cs;
1402  incb = b_rs;
1403 
1404  // Handle the transposition of A.
1405  if ( bl1_does_trans( trans ) )
1406  {
1407  bl1_swap_ints( lda, inca );
1408  }
1409 
1410  // An optimization: if B is row-major, then let's access the matrix by rows
1411  // instead of by columns for increased spatial locality.
1412  if ( bl1_is_row_storage( b_rs, b_cs ) )
1413  {
1414  bl1_swap_ints( n_iter, n_elem );
1415  bl1_swap_ints( lda, inca );
1416  bl1_swap_ints( ldb, incb );
1417  }
1418  }
1419 
1420  // Extract conj component from trans parameter.
1421  conj = bl1_proj_trans1_to_conj( trans );
1422 
1423  for ( j = 0; j < n_iter; ++j )
1424  {
1425  a_begin = a + j*lda;
1426  b_begin = b + j*ldb;
1427 
1428  bl1_zcopyv( conj,
1429  n_elem,
1430  a_begin, inca,
1431  b_begin, incb );
1432  }
1433 }

References bl1_does_trans(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zcopyv(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.