libflame  revision_anchor
Functions
blis_prototypes_level1.h File Reference

(r)

Go to the source code of this file.

Functions

void bli_samax (int n, float *x, int incx, int *index)
void bli_damax (int n, double *x, int incx, int *index)
void bli_camax (int n, scomplex *x, int incx, int *index)
void bli_zamax (int n, dcomplex *x, int incx, int *index)
void bli_sasum (int n, float *x, int incx, float *norm)
void bli_dasum (int n, double *x, int incx, double *norm)
void bli_casum (int n, scomplex *x, int incx, float *norm)
void bli_zasum (int n, dcomplex *x, int incx, double *norm)
void bli_saxpy (int n, float *alpha, float *x, int incx, float *y, int incy)
void bli_daxpy (int n, double *alpha, double *x, int incx, double *y, int incy)
void bli_caxpy (int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
void bli_zaxpy (int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
void bli_saxpyv (conj_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
void bli_daxpyv (conj_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
void bli_caxpyv (conj_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
void bli_zaxpyv (conj_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
void bli_saxpymt (trans_t trans, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bli_daxpymt (trans_t trans, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bli_caxpymt (trans_t trans, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bli_zaxpymt (trans_t trans, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bli_saxpymrt (uplo_t uplo, trans_t trans, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bli_daxpymrt (uplo_t uplo, trans_t trans, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bli_caxpymrt (uplo_t uplo, trans_t trans, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bli_zaxpymrt (uplo_t uplo, trans_t trans, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bli_saxpysv (int n, float *alpha0, float *alpha1, float *x, int incx, float *beta, float *y, int incy)
void bli_daxpysv (int n, double *alpha0, double *alpha1, double *x, int incx, double *beta, double *y, int incy)
void bli_caxpysv (int n, scomplex *alpha0, scomplex *alpha1, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
void bli_zaxpysv (int n, dcomplex *alpha0, dcomplex *alpha1, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
void bli_saxpysmt (trans_t trans, int m, int n, float *alpha0, float *alpha1, float *a, int a_rs, int a_cs, float *beta, float *b, int b_rs, int b_cs)
void bli_daxpysmt (trans_t trans, int m, int n, double *alpha0, double *alpha1, double *a, int a_rs, int a_cs, double *beta, double *b, int b_rs, int b_cs)
void bli_caxpysmt (trans_t trans, int m, int n, scomplex *alpha0, scomplex *alpha1, scomplex *a, int a_rs, int a_cs, scomplex *beta, scomplex *b, int b_rs, int b_cs)
void bli_zaxpysmt (trans_t trans, int m, int n, dcomplex *alpha0, dcomplex *alpha1, dcomplex *a, int a_rs, int a_cs, dcomplex *beta, dcomplex *b, int b_rs, int b_cs)
void bli_sconjv (int m, float *x, int incx)
void bli_dconjv (int m, double *x, int incx)
void bli_cconjv (int m, scomplex *x, int incx)
void bli_zconjv (int m, dcomplex *x, int incx)
void bli_sconjm (int m, int n, float *a, int a_rs, int a_cs)
void bli_dconjm (int m, int n, double *a, int a_rs, int a_cs)
void bli_cconjm (int m, int n, scomplex *a, int a_rs, int a_cs)
void bli_zconjm (int m, int n, dcomplex *a, int a_rs, int a_cs)
void bli_sconjmr (uplo_t uplo, int m, int n, float *a, int a_rs, int a_cs)
void bli_dconjmr (uplo_t uplo, int m, int n, double *a, int a_rs, int a_cs)
void bli_cconjmr (uplo_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs)
void bli_zconjmr (uplo_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs)
void bli_scopy (int m, float *x, int incx, float *y, int incy)
void bli_dcopy (int m, double *x, int incx, double *y, int incy)
void bli_ccopy (int m, scomplex *x, int incx, scomplex *y, int incy)
void bli_zcopy (int m, dcomplex *x, int incx, dcomplex *y, int incy)
void bli_icopyv (conj_t conj, int m, int *x, int incx, int *y, int incy)
void bli_scopyv (conj_t conj, int m, float *x, int incx, float *y, int incy)
void bli_dcopyv (conj_t conj, int m, double *x, int incx, double *y, int incy)
void bli_ccopyv (conj_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
void bli_zcopyv (conj_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
void bli_sdcopyv (conj_t conj, int m, float *x, int incx, double *y, int incy)
void bli_dscopyv (conj_t conj, int m, double *x, int incx, float *y, int incy)
void bli_sccopyv (conj_t conj, int m, float *x, int incx, scomplex *y, int incy)
void bli_cscopyv (conj_t conj, int m, scomplex *x, int incx, float *y, int incy)
void bli_szcopyv (conj_t conj, int m, float *x, int incx, dcomplex *y, int incy)
void bli_zscopyv (conj_t conj, int m, dcomplex *x, int incx, float *y, int incy)
void bli_dccopyv (conj_t conj, int m, double *x, int incx, scomplex *y, int incy)
void bli_cdcopyv (conj_t conj, int m, scomplex *x, int incx, double *y, int incy)
void bli_dzcopyv (conj_t conj, int m, double *x, int incx, dcomplex *y, int incy)
void bli_zdcopyv (conj_t conj, int m, dcomplex *x, int incx, double *y, int incy)
void bli_czcopyv (conj_t conj, int m, scomplex *x, int incx, dcomplex *y, int incy)
void bli_zccopyv (conj_t conj, int m, dcomplex *x, int incx, scomplex *y, int incy)
void bli_scopymr (uplo_t uplo, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bli_dcopymr (uplo_t uplo, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bli_ccopymr (uplo_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bli_zcopymr (uplo_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bli_sscopymr (uplo_t uplo, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bli_sdcopymr (uplo_t uplo, int m, int n, float *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bli_dscopymr (uplo_t uplo, int m, int n, double *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bli_sccopymr (uplo_t uplo, int m, int n, float *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bli_cscopymr (uplo_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bli_szcopymr (uplo_t uplo, int m, int n, float *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bli_zscopymr (uplo_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bli_ddcopymr (uplo_t uplo, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bli_dccopymr (uplo_t uplo, int m, int n, double *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bli_cdcopymr (uplo_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bli_dzcopymr (uplo_t uplo, int m, int n, double *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bli_zdcopymr (uplo_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bli_cccopymr (uplo_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bli_czcopymr (uplo_t uplo, int m, int n, scomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bli_zccopymr (uplo_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bli_zzcopymr (uplo_t uplo, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bli_scopymrt (uplo_t uplo, trans_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bli_dcopymrt (uplo_t uplo, trans_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bli_ccopymrt (uplo_t uplo, trans_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bli_zcopymrt (uplo_t uplo, trans_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bli_sscopymrt (uplo_t uplo, trans_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bli_sdcopymrt (uplo_t uplo, trans_t trans, int m, int n, float *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bli_sccopymrt (uplo_t uplo, trans_t trans, int m, int n, float *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bli_szcopymrt (uplo_t uplo, trans_t trans, int m, int n, float *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bli_dscopymrt (uplo_t uplo, trans_t trans, int m, int n, double *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bli_ddcopymrt (uplo_t uplo, trans_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bli_dccopymrt (uplo_t uplo, trans_t trans, int m, int n, double *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bli_dzcopymrt (uplo_t uplo, trans_t trans, int m, int n, double *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bli_cscopymrt (uplo_t uplo, trans_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bli_cdcopymrt (uplo_t uplo, trans_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bli_cccopymrt (uplo_t uplo, trans_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bli_czcopymrt (uplo_t uplo, trans_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bli_zscopymrt (uplo_t uplo, trans_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bli_zdcopymrt (uplo_t uplo, trans_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bli_zccopymrt (uplo_t uplo, trans_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bli_zzcopymrt (uplo_t uplo, trans_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bli_icopymt (trans_t trans, int m, int n, int *a, int a_rs, int a_cs, int *b, int b_rs, int b_cs)
void bli_scopymt (trans_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bli_dcopymt (trans_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bli_ccopymt (trans_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bli_zcopymt (trans_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bli_sscopymt (trans_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bli_sdcopymt (trans_t trans, int m, int n, float *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bli_dscopymt (trans_t trans, int m, int n, double *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bli_sccopymt (trans_t trans, int m, int n, float *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bli_cscopymt (trans_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bli_szcopymt (trans_t trans, int m, int n, float *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bli_zscopymt (trans_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bli_ddcopymt (trans_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bli_dccopymt (trans_t trans, int m, int n, double *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bli_cdcopymt (trans_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bli_dzcopymt (trans_t trans, int m, int n, double *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bli_zdcopymt (trans_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bli_cccopymt (trans_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bli_czcopymt (trans_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bli_zccopymt (trans_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bli_zzcopymt (trans_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
void bli_cdot_in (conj_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
void bli_zdot_in (conj_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
void bli_sdot (conj_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
void bli_ddot (conj_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
void bli_cdot (conj_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
void bli_zdot (conj_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
void bli_sdots (conj_t conj, int n, float *alpha, float *x, int incx, float *y, int incy, float *beta, float *rho)
void bli_ddots (conj_t conj, int n, double *alpha, double *x, int incx, double *y, int incy, double *beta, double *rho)
void bli_cdots (conj_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *beta, scomplex *rho)
void bli_zdots (conj_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *beta, dcomplex *rho)
void bli_sdot2s (conj_t conj, int n, float *alpha, float *x, int incx, float *y, int incy, float *beta, float *rho)
void bli_ddot2s (conj_t conj, int n, double *alpha, double *x, int incx, double *y, int incy, double *beta, double *rho)
void bli_cdot2s (conj_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *beta, scomplex *rho)
void bli_zdot2s (conj_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *beta, dcomplex *rho)
void bli_sfnorm (int m, int n, float *a, int a_rs, int a_cs, float *norm)
void bli_dfnorm (int m, int n, double *a, int a_rs, int a_cs, double *norm)
void bli_cfnorm (int m, int n, scomplex *a, int a_rs, int a_cs, float *norm)
void bli_zfnorm (int m, int n, dcomplex *a, int a_rs, int a_cs, double *norm)
void bli_sinvscalv (conj_t conj, int n, float *alpha, float *x, int incx)
void bli_dinvscalv (conj_t conj, int n, double *alpha, double *x, int incx)
void bli_csinvscalv (conj_t conj, int n, float *alpha, scomplex *x, int incx)
void bli_cinvscalv (conj_t conj, int n, scomplex *alpha, scomplex *x, int incx)
void bli_zdinvscalv (conj_t conj, int n, double *alpha, dcomplex *x, int incx)
void bli_zinvscalv (conj_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
void bli_sinvscalm (conj_t conj, int m, int n, float *alpha, float *a, int a_rs, int a_cs)
void bli_dinvscalm (conj_t conj, int m, int n, double *alpha, double *a, int a_rs, int a_cs)
void bli_csinvscalm (conj_t conj, int m, int n, float *alpha, scomplex *a, int a_rs, int a_cs)
void bli_cinvscalm (conj_t conj, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs)
void bli_zdinvscalm (conj_t conj, int m, int n, double *alpha, dcomplex *a, int a_rs, int a_cs)
void bli_zinvscalm (conj_t conj, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs)
void bli_snrm2 (int n, float *x, int incx, float *norm)
void bli_dnrm2 (int n, double *x, int incx, double *norm)
void bli_cnrm2 (int n, scomplex *x, int incx, float *norm)
void bli_znrm2 (int n, dcomplex *x, int incx, double *norm)
void bli_sscal (int n, float *alpha, float *x, int incx)
void bli_dscal (int n, double *alpha, double *x, int incx)
void bli_csscal (int n, float *alpha, scomplex *x, int incx)
void bli_cscal (int n, scomplex *alpha, scomplex *x, int incx)
void bli_zdscal (int n, double *alpha, dcomplex *x, int incx)
void bli_zscal (int n, dcomplex *alpha, dcomplex *x, int incx)
void bli_sscalv (conj_t conj, int n, float *alpha, float *x, int incx)
void bli_dscalv (conj_t conj, int n, double *alpha, double *x, int incx)
void bli_csscalv (conj_t conj, int n, float *alpha, scomplex *x, int incx)
void bli_cscalv (conj_t conj, int n, scomplex *alpha, scomplex *x, int incx)
void bli_zdscalv (conj_t conj, int n, double *alpha, dcomplex *x, int incx)
void bli_zscalv (conj_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
void bli_sscalm (conj_t conj, int m, int n, float *alpha, float *a, int a_rs, int a_cs)
void bli_dscalm (conj_t conj, int m, int n, double *alpha, double *a, int a_rs, int a_cs)
void bli_csscalm (conj_t conj, int m, int n, float *alpha, scomplex *a, int a_rs, int a_cs)
void bli_cscalm (conj_t conj, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs)
void bli_zdscalm (conj_t conj, int m, int n, double *alpha, dcomplex *a, int a_rs, int a_cs)
void bli_zscalm (conj_t conj, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs)
void bli_sscalmr (uplo_t uplo, int m, int n, float *alpha, float *a, int a_rs, int a_cs)
void bli_dscalmr (uplo_t uplo, int m, int n, double *alpha, double *a, int a_rs, int a_cs)
void bli_csscalmr (uplo_t uplo, int m, int n, float *alpha, scomplex *a, int a_rs, int a_cs)
void bli_cscalmr (uplo_t uplo, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs)
void bli_zdscalmr (uplo_t uplo, int m, int n, double *alpha, dcomplex *a, int a_rs, int a_cs)
void bli_zscalmr (uplo_t uplo, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs)
void bli_sswap (int n, float *x, int incx, float *y, int incy)
void bli_dswap (int n, double *x, int incx, double *y, int incy)
void bli_cswap (int n, scomplex *x, int incx, scomplex *y, int incy)
void bli_zswap (int n, dcomplex *x, int incx, dcomplex *y, int incy)
void bli_sswapv (int n, float *x, int incx, float *y, int incy)
void bli_dswapv (int n, double *x, int incx, double *y, int incy)
void bli_cswapv (int n, scomplex *x, int incx, scomplex *y, int incy)
void bli_zswapv (int n, dcomplex *x, int incx, dcomplex *y, int incy)
void bli_sswapmt (trans_t trans, int m, int n, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
void bli_dswapmt (trans_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
void bli_cswapmt (trans_t trans, int m, int n, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
void bli_zswapmt (trans_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)

Function Documentation

void bli_camax ( int  n,
scomplex x,
int  incx,
int *  index 
)

References cblas_icamax(), and F77_icamax().

Referenced by FLA_Amax_external(), FLA_LU_piv_opc_var3(), FLA_LU_piv_opc_var4(), FLA_LU_piv_opc_var5(), and FLA_SA_LU_unb().

{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
    *index = cblas_icamax( n,
                           x, incx );
#else
    *index = F77_icamax( &n,
                         x, &incx ) - 1;
#endif
}
void bli_casum ( int  n,
scomplex x,
int  incx,
float *  norm 
)

References cblas_scasum(), and F77_scasum().

Referenced by FLA_Asum_external().

{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
    *norm = cblas_scasum( n,
                          x, incx );
#else
    *norm = F77_scasum( &n,
                        x, &incx );
#endif
}
void bli_caxpy ( int  n,
scomplex alpha,
scomplex x,
int  incx,
scomplex y,
int  incy 
)

References cblas_caxpy(), and F77_caxpy().

Referenced by bli_caxpymt(), bli_caxpysmt(), bli_caxpysv(), and bli_caxpyv().

{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
    cblas_caxpy( n,
                 alpha,
                 x, incx,
                 y, incy );
#else
    F77_caxpy( &n,
               alpha,
               x, &incx,
               y, &incy );
#endif
}
void bli_caxpymrt ( uplo_t  uplo,
trans_t  trans,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bli_caxpyv(), bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), and bli_zero_dim2().

Referenced by bli_cher2k(), bli_cherk(), and FLA_Axpyrt_external().

{
    scomplex* a_begin;
    scomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bli_is_col_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bli_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bli_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bli_does_trans( trans ) )
    {
        bli_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bli_caxpyv( conj,
                        n_elem,
                        alpha,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bli_caxpyv( conj,
                        n_elem,
                        alpha,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
}
void bli_caxpymt ( trans_t  trans,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bli_callocv(), bli_caxpy(), bli_ccopyv(), bli_cfree(), bli_does_conj(), bli_does_notrans(), bli_does_trans(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by bli_cgemm(), bli_chemm(), bli_csymm(), bli_ctrmmsx(), bli_ctrsmsx(), FLA_Axpy_external(), and FLA_Axpyt_external().

{
    scomplex* a_begin;
    scomplex* b_begin;
    scomplex* a_temp;
    int       inca_temp;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying axpy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major and if A is effectively row-major
        // after a possible transposition, then let's access the matrices by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) ||
                 ( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) )
            {
                bli_swap_ints( n_iter, n_elem );
                bli_swap_ints( lda, inca );
                bli_swap_ints( ldb, incb );
            }
        }
    }

    if ( bli_does_conj( trans ) )
    {
        conj_t conj = bli_proj_trans_to_conj( trans );

        a_temp = bli_callocv( n_elem );
        inca_temp = 1;

        for ( j = 0; j < n_iter; j++ )
        {
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bli_ccopyv( conj,
                        n_elem,
                        a_begin, inca,
                        a_temp,  inca_temp );

            bli_caxpy( n_elem,
                       alpha,
                       a_temp,  inca_temp, 
                       b_begin, incb );
        }

        bli_cfree( a_temp );
    }
    else // if ( !bli_does_conj( trans ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bli_caxpy( n_elem,
                       alpha,
                       a_begin, inca, 
                       b_begin, incb );
        }
    
    }
}
void bli_caxpysmt ( trans_t  trans,
int  m,
int  n,
scomplex alpha0,
scomplex alpha1,
scomplex a,
int  a_rs,
int  a_cs,
scomplex beta,
scomplex b,
int  b_rs,
int  b_cs 
)

References bli_callocv(), bli_caxpy(), bli_ccopyv(), bli_cfree(), bli_cscal(), bli_does_conj(), bli_does_notrans(), bli_does_trans(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), BLIS_NO_TRANSPOSE, scomplex::imag, and scomplex::real.

Referenced by FLA_Axpys_external().

{
    scomplex* a_begin;
    scomplex* b_begin;
    scomplex* a_temp;
    scomplex  alpha_prod;
    int       inca_temp;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    alpha_prod.real = alpha0->real * alpha1->real - alpha0->imag * alpha1->imag;
    alpha_prod.imag = alpha0->real * alpha1->imag + alpha0->imag * alpha1->real;

    // Handle cases where A and B are vectors to ensure that the underlying axpy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major and if A is effectively row-major
        // after a possible transposition, then let's access the matrices by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) ||
                 ( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) )
            {
                bli_swap_ints( n_iter, n_elem );
                bli_swap_ints( lda, inca );
                bli_swap_ints( ldb, incb );
            }
        }
    }

    if ( bli_does_conj( trans ) )
    {
        conj_t conj = bli_proj_trans_to_conj( trans );

        a_temp = bli_callocv( n_elem );
        inca_temp = 1;

        for ( j = 0; j < n_iter; j++ )
        {
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bli_ccopyv( conj,
                        n_elem,
                        a_begin, inca,
                        a_temp,  inca_temp );

            bli_cscal( n_elem,
                       beta,
                       b_begin, incb );

            bli_caxpy( n_elem,
                       &alpha_prod,
                       a_temp,  inca_temp, 
                       b_begin, incb );
        }
    
        bli_cfree( a_temp );
    }
    else // if ( !bli_does_conj( trans ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bli_cscal( n_elem,
                       beta,
                       b_begin, incb );

            bli_caxpy( n_elem,
                       &alpha_prod,
                       a_begin, inca, 
                       b_begin, incb );
        }
    }
}
void bli_caxpysv ( int  n,
scomplex alpha0,
scomplex alpha1,
scomplex x,
int  incx,
scomplex beta,
scomplex y,
int  incy 
)

References bli_caxpy(), bli_cscal(), bli_zero_dim1(), scomplex::imag, and scomplex::real.

Referenced by FLA_Lyap_h_opc_var2(), FLA_Lyap_h_opc_var3(), FLA_Lyap_h_opc_var4(), FLA_Lyap_n_opc_var2(), FLA_Lyap_n_opc_var3(), and FLA_Lyap_n_opc_var4().

{
    scomplex alpha_prod;

    // Return early if possible.
    if ( bli_zero_dim1( n ) ) return;

    alpha_prod.real = alpha0->real * alpha1->real - alpha0->imag * alpha1->imag;
    alpha_prod.imag = alpha0->real * alpha1->imag + alpha0->imag * alpha1->real;

    bli_cscal( n,
               beta,
               y, incy );

    bli_caxpy( n,
               &alpha_prod,
               x, incx,
               y, incy );
}
void bli_caxpyv ( conj_t  conj,
int  n,
scomplex alpha,
scomplex x,
int  incx,
scomplex y,
int  incy 
)

References bli_callocv(), bli_caxpy(), bli_ccopyv(), bli_cfree(), bli_is_conj(), and bli_zero_dim1().

Referenced by bli_caxpymrt(), bli_cgemv(), bli_chemv(), bli_ctrmvsx(), bli_ctrsvsx(), FLA_Apply_H2_UT_l_opc_var1(), FLA_Apply_H2_UT_r_opc_var1(), FLA_Apply_HUD_UT_l_opc_var1(), FLA_Bidiag_UT_u_step_ofc_var2(), FLA_Bidiag_UT_u_step_ofc_var3(), FLA_Bidiag_UT_u_step_ofc_var4(), FLA_Bidiag_UT_u_step_opc_var2(), FLA_Bidiag_UT_u_step_opc_var3(), FLA_Bidiag_UT_u_step_opc_var4(), FLA_Bidiag_UT_u_step_opc_var5(), FLA_Eig_gest_il_opc_var1(), FLA_Eig_gest_il_opc_var2(), FLA_Eig_gest_il_opc_var3(), FLA_Eig_gest_il_opc_var4(), FLA_Eig_gest_il_opc_var5(), FLA_Eig_gest_iu_opc_var1(), FLA_Eig_gest_iu_opc_var2(), FLA_Eig_gest_iu_opc_var3(), FLA_Eig_gest_iu_opc_var4(), FLA_Eig_gest_iu_opc_var5(), FLA_Eig_gest_nl_opc_var1(), FLA_Eig_gest_nl_opc_var2(), FLA_Eig_gest_nl_opc_var4(), FLA_Eig_gest_nl_opc_var5(), FLA_Eig_gest_nu_opc_var1(), FLA_Eig_gest_nu_opc_var2(), FLA_Eig_gest_nu_opc_var4(), FLA_Eig_gest_nu_opc_var5(), FLA_Hess_UT_step_ofc_var2(), FLA_Hess_UT_step_ofc_var3(), FLA_Hess_UT_step_ofc_var4(), FLA_Hess_UT_step_opc_var2(), FLA_Hess_UT_step_opc_var3(), FLA_Hess_UT_step_opc_var4(), FLA_Hess_UT_step_opc_var5(), FLA_Tridiag_UT_l_step_ofc_var2(), FLA_Tridiag_UT_l_step_ofc_var3(), FLA_Tridiag_UT_l_step_opc_var1(), FLA_Tridiag_UT_l_step_opc_var2(), and FLA_Tridiag_UT_l_step_opc_var3().

{
    scomplex* x_copy;
    int       incx_copy;

    // Return early if possible.
    if ( bli_zero_dim1( n ) ) return;

    x_copy    = x;
    incx_copy = incx;
    
    if ( bli_is_conj( conj ) )
    {
        x_copy    = bli_callocv( n );
        incx_copy = 1;
    
        bli_ccopyv( conj,
                    n,
                    x,      incx,
                    x_copy, incx_copy );
    }

    bli_caxpy( n,
               alpha,
               x_copy, incx_copy,
               y,      incy );

    if ( bli_is_conj( conj ) )
        bli_cfree( x_copy );
}
void bli_cccopymr ( uplo_t  uplo,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bli_ccopyv(), bli_is_row_storage(), bli_is_upper(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

{
    scomplex* a_begin;
    scomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if B is row-major, then let's access the matrix
    // by rows instead of by columns for increased spatial locality.
    if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        bli_swap_ints( n_iter, n_elem_max );
        bli_swap_ints( lda, inca );
        bli_swap_ints( ldb, incb );
        bli_toggle_uplo( uplo );
    }
    
    
    if ( bli_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bli_ccopyv( BLIS_NO_TRANSPOSE,
                        n_elem,
                        a_begin, inca, 
                        b_begin, incb );
        }
    }
    else // if ( bli_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bli_ccopyv( BLIS_NO_TRANSPOSE,
                        n_elem,
                        a_begin, inca, 
                        b_begin, incb );
        }
    }
}
void bli_cccopymrt ( uplo_t  uplo,
trans_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bli_ccopyv(), bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), and bli_zero_dim2().

{
    scomplex* a_begin;
    scomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bli_is_col_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bli_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bli_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bli_does_trans( trans ) )
    {
        bli_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bli_ccopyv( conj,
                        n_elem,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bli_ccopyv( conj,
                        n_elem,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
}
void bli_cccopymt ( trans_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bli_ccopyv(), bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

{
    scomplex* a_begin;
    scomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
            bli_swap_ints( ldb, incb );
        }
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bli_ccopyv( conj,
                    n_elem,
                    a_begin, inca,
                    b_begin, incb );
    }
}
void bli_cconjm ( int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs 
)

References bli_is_row_storage(), bli_is_vector(), bli_sm1(), bli_sscal(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by bli_cgemm(), and FLA_Conjugate().

{
    float   m1 = bli_sm1();
    float*  a_conj;
    int     lda, inca;
    int     n_iter;
    int     n_elem;
    int     j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A is a vector to ensure that the underlying axpy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for a vector.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, a_rs, a_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;

        // An optimization: if A is row-major, then let's access the matrix
        // by rows instead of by columns to increase spatial locality.
        if ( bli_is_row_storage( a_rs, a_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
        }
    }

    for ( j = 0; j < n_iter; ++j )
    {
        a_conj = ( float* )( a + j*lda ) + 1;

        bli_sscal( n_elem,
                   &m1,
                   a_conj, 2*inca );
    }
}
void bli_cconjmr ( uplo_t  uplo,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs 
)

References bli_is_row_storage(), bli_is_upper(), bli_sm1(), bli_sscal(), and bli_zero_dim2().

Referenced by bli_chemm(), bli_ctrmm(), bli_ctrsm(), and FLA_Conjugate_r().

{
    float   m1 = bli_sm1();
    float*  a_conj;
    int     lda, inca;
    int     n_iter;
    int     n_elem_max;
    int     n_elem;
    int     j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;

    // An optimization: if A is row-major, then let's access the matrix
    // by rows instead of by columns to increase spatial locality.
    if ( bli_is_row_storage( a_rs, a_cs ) )
    {
        bli_swap_ints( n_iter, n_elem_max );
        bli_swap_ints( lda, inca );
        bli_toggle_uplo( uplo );
    }

    if ( bli_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; ++j )
        {
            n_elem = bli_min( j + 1, n_elem_max );
            a_conj = ( float* )( a + j*lda ) + 1;
    
            bli_sscal( n_elem,
                       &m1,
                       a_conj, 2*inca );
        }
    }
    else // if ( bli_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; ++j )
        {
            n_elem = bli_max( 0, n_elem_max - j );
            a_conj = ( float* )( a + j*lda + j*inca ) + 1;
    
            if ( n_elem <= 0 ) break;

            bli_sscal( n_elem,
                       &m1,
                       a_conj, 2*inca );
        }
    }
}
void bli_cconjv ( int  m,
scomplex x,
int  incx 
)
void bli_ccopy ( int  m,
scomplex x,
int  incx,
scomplex y,
int  incy 
)

References cblas_ccopy(), and F77_ccopy().

Referenced by bli_ccopymr(), bli_ccopymt(), bli_ccopyv(), and FLA_SA_LU_unb().

{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
    cblas_ccopy( m,
                 x, incx, 
                 y, incy );
#else
    F77_ccopy( &m,
               x, &incx, 
               y, &incy );
#endif
}
void bli_ccopymr ( uplo_t  uplo,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bli_ccopy(), bli_is_row_storage(), bli_is_upper(), and bli_zero_dim2().

Referenced by bli_ccreate_contigmr(), bli_cfree_saved_contigmr(), and FLA_Copyr_external().

{
    scomplex* a_begin;
    scomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if A and B are both row-major, then let's access the
    // matrices by rows instead of by columns for increased spatial locality.
    if ( bli_is_row_storage( b_rs, b_cs ) && bli_is_row_storage( a_rs, a_cs ) )
    {
        bli_swap_ints( n_iter, n_elem_max );
        bli_swap_ints( lda, inca );
        bli_swap_ints( ldb, incb );
        bli_toggle_uplo( uplo );
    }
    
    
    if ( bli_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bli_ccopy( n_elem,
                       a_begin, inca, 
                       b_begin, incb );
        }
    }
    else // if ( bli_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bli_ccopy( n_elem,
                       a_begin, inca, 
                       b_begin, incb );
        }
    }
}
void bli_ccopymrt ( uplo_t  uplo,
trans_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bli_ccopyv(), bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), and bli_zero_dim2().

Referenced by bli_chemm(), bli_ctrmm(), bli_ctrsm(), FLA_Copyrt_external(), FLA_Lyap_h_opc_var1(), FLA_Lyap_h_opc_var2(), FLA_Lyap_h_opc_var3(), FLA_Lyap_h_opc_var4(), FLA_Lyap_n_opc_var1(), FLA_Lyap_n_opc_var2(), FLA_Lyap_n_opc_var3(), and FLA_Lyap_n_opc_var4().

{
    scomplex* a_begin;
    scomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bli_is_col_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bli_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bli_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bli_does_trans( trans ) )
    {
        bli_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bli_ccopyv( conj,
                        n_elem,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bli_ccopyv( conj,
                        n_elem,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
}
void bli_ccopymt ( trans_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bli_cconjv(), bli_ccopy(), bli_does_conj(), bli_does_notrans(), bli_does_trans(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by bli_ccreate_contigm(), bli_ccreate_contigmt(), bli_cfree_saved_contigm(), bli_cfree_saved_contigmsr(), bli_cgemm(), bli_chemm(), bli_cher2k(), bli_csymm(), bli_csyr2k(), bli_ctrmmsx(), bli_ctrsmsx(), FLA_Copy_external(), and FLA_Copyt_external().

{
    scomplex* a_begin;
    scomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major and if A is effectively row-major
        // after a possible transposition, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) ||
                 ( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) )
            {
                bli_swap_ints( n_iter, n_elem );
                bli_swap_ints( lda, inca );
                bli_swap_ints( ldb, incb );
            }
        }
    }

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;
        
        bli_ccopy( n_elem,
                   a_begin, inca, 
                   b_begin, incb );

        if ( bli_does_conj( trans ) )
            bli_cconjv( n_elem,
                        b_begin, incb );
    }
}
void bli_ccopyv ( conj_t  conj,
int  m,
scomplex x,
int  incx,
scomplex y,
int  incy 
)
void bli_cdcopymr ( uplo_t  uplo,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bli_cdcopyv(), bli_is_row_storage(), bli_is_upper(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Copyr_external().

{
    scomplex* a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if B is row-major, then let's access the matrix
    // by rows instead of by columns for increased spatial locality.
    if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        bli_swap_ints( n_iter, n_elem_max );
        bli_swap_ints( lda, inca );
        bli_swap_ints( ldb, incb );
        bli_toggle_uplo( uplo );
    }
    
    
    if ( bli_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bli_cdcopyv( BLIS_NO_TRANSPOSE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
    else // if ( bli_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bli_cdcopyv( BLIS_NO_TRANSPOSE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
}
void bli_cdcopymrt ( uplo_t  uplo,
trans_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bli_cdcopyv(), bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), and bli_zero_dim2().

Referenced by FLA_Copyrt_external().

{
    scomplex* a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bli_is_col_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bli_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bli_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bli_does_trans( trans ) )
    {
        bli_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bli_cdcopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bli_cdcopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
}
void bli_cdcopymt ( trans_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bli_cdcopyv(), bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

{
    scomplex* a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
            bli_swap_ints( ldb, incb );
        }
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bli_cdcopyv( conj,
                     n_elem,
                     a_begin, inca,
                     b_begin, incb );
    }
}
void bli_cdcopyv ( conj_t  conj,
int  m,
scomplex x,
int  incx,
double *  y,
int  incy 
)

References bli_zero_dim1(), and scomplex::real.

Referenced by bli_cdcopymr(), bli_cdcopymrt(), and bli_cdcopymt().

{
    scomplex* chi;
    double*   psi;
    int       i;

    // Return early if possible.
    if ( bli_zero_dim1( m ) ) return;

    // Initialize pointers.
    chi = x;
    psi = y;

    for ( i = 0; i < m; ++i )
    {
        *psi = chi->real;

        chi += incx;
        psi += incy;
    }
}
void bli_cdot ( conj_t  conj,
int  n,
scomplex x,
int  incx,
scomplex y,
int  incy,
scomplex rho 
)
void bli_cdot2s ( conj_t  conj,
int  n,
scomplex alpha,
scomplex x,
int  incx,
scomplex y,
int  incy,
scomplex beta,
scomplex rho 
)

References bli_cdot(), scomplex::imag, and scomplex::real.

Referenced by FLA_Dot2cs_external(), FLA_Dot2s_external(), FLA_Eig_gest_il_opc_var1(), FLA_Eig_gest_il_opc_var2(), FLA_Eig_gest_il_opc_var3(), FLA_Eig_gest_iu_opc_var1(), FLA_Eig_gest_iu_opc_var2(), FLA_Eig_gest_iu_opc_var3(), FLA_Eig_gest_nl_opc_var1(), FLA_Eig_gest_nl_opc_var2(), FLA_Eig_gest_nu_opc_var1(), FLA_Eig_gest_nu_opc_var2(), FLA_Lyap_h_opc_var1(), FLA_Lyap_h_opc_var2(), FLA_Lyap_h_opc_var3(), FLA_Lyap_n_opc_var1(), FLA_Lyap_n_opc_var2(), and FLA_Lyap_n_opc_var3().

{
    scomplex dotxy;
    scomplex dotyx;
    scomplex alpha_d    = *alpha;
    scomplex alphac_d   = *alpha;
    scomplex beta_d     = *beta;
    scomplex rho_d      = *rho;

    alphac_d.imag *= -1.0F;

    bli_cdot( conj,
              n,
              x, incx,
              y, incy,
              &dotxy );

    bli_cdot( conj,
              n,
              y, incy,
              x, incx,
              &dotyx );

    rho->real = beta_d.real   * rho_d.real - beta_d.imag   * rho_d.imag +
                alpha_d.real  * dotxy.real - alpha_d.imag  * dotxy.imag +
                alphac_d.real * dotyx.real - alphac_d.imag * dotyx.imag; 
    rho->imag = beta_d.real   * rho_d.imag + beta_d.imag   * rho_d.real +
                alpha_d.real  * dotxy.imag + alpha_d.imag  * dotxy.real +
                alphac_d.real * dotyx.imag + alphac_d.imag * dotyx.real; 
}
void bli_cdot_in ( conj_t  conj,
int  n,
scomplex x,
int  incx,
scomplex y,
int  incy,
scomplex rho 
)

References bli_is_conj(), scomplex::imag, and scomplex::real.

Referenced by bli_cdot().

{
    scomplex* xip;
    scomplex* yip;
    scomplex  xi;
    scomplex  yi;
    scomplex  rho_temp;
    int       i;

    rho_temp.real = 0.0F;
    rho_temp.imag = 0.0F;
        
    xip = x;
    yip = y;
        
    if ( bli_is_conj( conj ) )
    {
        for ( i = 0; i < n; ++i )
        {
            xi.real = xip->real;
            xi.imag = xip->imag;
            yi.real = yip->real;
            yi.imag = yip->imag;
            
            rho_temp.real += xi.real * yi.real - -xi.imag * yi.imag;
            rho_temp.imag += xi.real * yi.imag + -xi.imag * yi.real;

            xip += incx;
            yip += incy;
        }
    }
    else // if ( !bli_is_conj( conj ) )
    {
        for ( i = 0; i < n; ++i )
        {
            xi.real = xip->real;
            xi.imag = xip->imag;
            yi.real = yip->real;
            yi.imag = yip->imag;
            
            rho_temp.real += xi.real * yi.real - xi.imag * yi.imag;
            rho_temp.imag += xi.real * yi.imag + xi.imag * yi.real;

            xip += incx;
            yip += incy;
        }
    }
    
    rho->real = rho_temp.real;
    rho->imag = rho_temp.imag;
}
void bli_cdots ( conj_t  conj,
int  n,
scomplex alpha,
scomplex x,
int  incx,
scomplex y,
int  incy,
scomplex beta,
scomplex rho 
)
void bli_cfnorm ( int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
float *  norm 
)

References bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), BLIS_NO_TRANSPOSE, scomplex::imag, and scomplex::real.

Referenced by FLA_Norm_frob().

{
    scomplex* a_ij;
    float     sum;
    int       lda, inca;
    int       n_iter;
    int       n_elem;
    int       i, j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A is a vector separately.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, a_rs, a_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        
        // An optimization: if A is row-major, then let's access the matrix by
        // rows instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( a_rs, a_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
        }
    }

    // Initialize the accumulator variable.
    sum = 0.0F;

    for ( j = 0; j < n_iter; j++ )
    {
        for ( i = 0; i < n_elem; i++ )
        {
            a_ij = a + i*inca + j*lda;
            sum += a_ij->real * a_ij->real + a_ij->imag * a_ij->imag;
        }
    }
    
    // Compute the norm and store the result.
    *norm = ( float ) sqrt( sum );
}
void bli_cinvscalm ( conj_t  conj,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs 
)

References bli_cinvert2s(), bli_cscal(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Inv_scal_external(), and FLA_Inv_scalc_external().

{
    scomplex  alpha_inv;
    scomplex* a_begin;
    int       lda, inca;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;
    if ( bli_ceq1( alpha ) ) return;

    // Handle cases where A is a vector to ensure that the underlying axpy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for a vector.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, a_rs, a_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;

        // An optimization: if A is row-major, then let's access the matrix
        // by rows instead of by columns to increase spatial locality.
        if ( bli_is_row_storage( a_rs, a_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
        }
    }

    bli_cinvert2s( conj, alpha, &alpha_inv );

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;

        bli_cscal( n_elem,
                   &alpha_inv,
                   a_begin, inca );
    }
}
void bli_cinvscalv ( conj_t  conj,
int  n,
scomplex alpha,
scomplex x,
int  incx 
)
void bli_cnrm2 ( int  n,
scomplex x,
int  incx,
float *  norm 
)

References cblas_scnrm2(), and F77_scnrm2().

Referenced by FLA_Househ2_UT_l_opc(), FLA_Househ2s_UT_l_opc(), FLA_Househ3UD_UT_opc(), and FLA_Nrm2_external().

{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
    *norm = cblas_scnrm2( n,
                          x, incx );
#else
    *norm = F77_scnrm2( &n,
                        x, &incx );
#endif
}
void bli_cscal ( int  n,
scomplex alpha,
scomplex x,
int  incx 
)

References cblas_cscal(), and F77_cscal().

Referenced by bli_caxpysmt(), bli_caxpysv(), bli_cinvscalm(), bli_cinvscalv(), bli_cscalm(), bli_cscalmr(), bli_cscalv(), and FLA_SA_LU_unb().

{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
    cblas_cscal( n,
                 alpha,
                 x, incx );
#else
    F77_cscal( &n,
               alpha,
               x, &incx );
#endif
}
void bli_cscalm ( conj_t  conj,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs 
)

References bli_cscal(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by bli_cgemm(), bli_chemm(), bli_csymm(), bli_ctrmmsx(), bli_ctrsmsx(), FLA_Lyap_h_opc_var1(), FLA_Lyap_h_opc_var2(), FLA_Lyap_h_opc_var3(), FLA_Lyap_h_opc_var4(), FLA_Lyap_n_opc_var1(), FLA_Lyap_n_opc_var2(), FLA_Lyap_n_opc_var3(), FLA_Lyap_n_opc_var4(), FLA_Scal_external(), and FLA_Scalc_external().

{
    scomplex  alpha_conj;
    scomplex* a_begin;
    int       lda, inca;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;
    if ( bli_ceq1( alpha ) ) return;

    // Handle cases where A is a vector to ensure that the underlying axpy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for a vector.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, a_rs, a_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;

        // An optimization: if A is row-major, then let's access the matrix
        // by rows instead of by columns to increase spatial locality.
        if ( bli_is_row_storage( a_rs, a_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
        }
    }

    bli_ccopys( conj, alpha, &alpha_conj );

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;

        bli_cscal( n_elem,
                   &alpha_conj,
                   a_begin, inca );
    }
}
void bli_cscalmr ( uplo_t  uplo,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs 
)

References bli_cscal(), bli_is_row_storage(), bli_is_upper(), and bli_zero_dim2().

Referenced by FLA_Scalr_external().

{
    scomplex* a_begin;
    int       lda, inca;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;
    if ( bli_ceq1( alpha ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;

    // An optimization: if A is row-major, then let's access the matrix
    // by rows instead of by columns to increase spatial locality.
    if ( bli_is_row_storage( a_rs, a_cs ) )
    {
        bli_swap_ints( n_iter, n_elem_max );
        bli_swap_ints( lda, inca );
        bli_toggle_uplo( uplo );
    }
    
    if ( bli_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;

            bli_cscal( n_elem,
                       alpha,
                       a_begin, inca );
        }
    }
    else // if ( bli_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;

            if ( n_elem <= 0 ) break;

            bli_cscal( n_elem,
                       alpha,
                       a_begin, inca );
        }
    }
}
void bli_cscalv ( conj_t  conj,
int  n,
scomplex alpha,
scomplex x,
int  incx 
)
void bli_cscopymr ( uplo_t  uplo,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bli_cscopyv(), bli_is_row_storage(), bli_is_upper(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Copyr_external().

{
    scomplex* a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if B is row-major, then let's access the matrix
    // by rows instead of by columns for increased spatial locality.
    if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        bli_swap_ints( n_iter, n_elem_max );
        bli_swap_ints( lda, inca );
        bli_swap_ints( ldb, incb );
        bli_toggle_uplo( uplo );
    }
    
    
    if ( bli_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bli_cscopyv( BLIS_NO_TRANSPOSE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
    else // if ( bli_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bli_cscopyv( BLIS_NO_TRANSPOSE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
}
void bli_cscopymrt ( uplo_t  uplo,
trans_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bli_cscopyv(), bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), and bli_zero_dim2().

Referenced by FLA_Copyrt_external().

{
    scomplex* a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bli_is_col_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bli_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bli_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bli_does_trans( trans ) )
    {
        bli_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bli_cscopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bli_cscopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
}
void bli_cscopymt ( trans_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bli_cscopyv(), bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

{
    scomplex* a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
            bli_swap_ints( ldb, incb );
        }
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bli_cscopyv( conj,
                     n_elem,
                     a_begin, inca,
                     b_begin, incb );
    }
}
void bli_cscopyv ( conj_t  conj,
int  m,
scomplex x,
int  incx,
float *  y,
int  incy 
)

References bli_zero_dim1(), and scomplex::real.

Referenced by bli_cscopymr(), bli_cscopymrt(), and bli_cscopymt().

{
    scomplex* chi;
    float*    psi;
    int       i;

    // Return early if possible.
    if ( bli_zero_dim1( m ) ) return;

    // Initialize pointers.
    chi = x;
    psi = y;

    for ( i = 0; i < m; ++i )
    {
        *psi = chi->real;

        chi += incx;
        psi += incy;
    }
}
void bli_csinvscalm ( conj_t  conj,
int  m,
int  n,
float *  alpha,
scomplex a,
int  a_rs,
int  a_cs 
)

References bli_csscal(), bli_is_row_storage(), bli_is_vector(), bli_sinvert2s(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Inv_scal_external(), and FLA_Inv_scalc_external().

{
    float     alpha_inv;
    scomplex* a_begin;
    int       lda, inca;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;
    if ( bli_seq1( alpha ) ) return;

    // Handle cases where A is a vector to ensure that the underlying axpy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for a vector.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, a_rs, a_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;

        // An optimization: if A is row-major, then let's access the matrix
        // by rows instead of by columns to increase spatial locality.
        if ( bli_is_row_storage( a_rs, a_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
        }
    }

    bli_sinvert2s( conj, alpha, &alpha_inv );

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;

        bli_csscal( n_elem,
                    &alpha_inv,
                    a_begin, inca );
    }
}
void bli_csinvscalv ( conj_t  conj,
int  n,
float *  alpha,
scomplex x,
int  incx 
)

References bli_csscal().

{
    float alpha_inv;

    if ( bli_seq1( alpha ) ) return;

    alpha_inv = 1.0F / *alpha;

    bli_csscal( n,
                &alpha_inv,
                x, incx );
}
void bli_csscal ( int  n,
float *  alpha,
scomplex x,
int  incx 
)

References cblas_csscal(), and F77_csscal().

Referenced by bli_csinvscalm(), bli_csinvscalv(), bli_csscalm(), bli_csscalmr(), and bli_csscalv().

{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
    cblas_csscal( n,
                  *alpha,
                  x, incx );
#else
    F77_csscal( &n,
                alpha,
                x, &incx );
#endif
}
void bli_csscalm ( conj_t  conj,
int  m,
int  n,
float *  alpha,
scomplex a,
int  a_rs,
int  a_cs 
)

References bli_csscal(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Scal_external(), and FLA_Scalc_external().

{
    float     alpha_conj;
    scomplex* a_begin;
    int       lda, inca;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;
    if ( bli_seq1( alpha ) ) return;

    // Handle cases where A is a vector to ensure that the underlying axpy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for a vector.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, a_rs, a_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;

        // An optimization: if A is row-major, then let's access the matrix
        // by rows instead of by columns to increase spatial locality.
        if ( bli_is_row_storage( a_rs, a_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
        }
    }

    bli_scopys( conj, alpha, &alpha_conj );

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;

        bli_csscal( n_elem,
                    &alpha_conj,
                    a_begin, inca );
    }
}
void bli_csscalmr ( uplo_t  uplo,
int  m,
int  n,
float *  alpha,
scomplex a,
int  a_rs,
int  a_cs 
)

References bli_csscal(), bli_is_row_storage(), bli_is_upper(), and bli_zero_dim2().

Referenced by bli_cher2k(), bli_cherk(), and FLA_Scalr_external().

{
    scomplex* a_begin;
    int       lda, inca;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;
    if ( bli_seq1( alpha ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;

    // An optimization: if A is row-major, then let's access the matrix
    // by rows instead of by columns to increase spatial locality.
    if ( bli_is_row_storage( a_rs, a_cs ) )
    {
        bli_swap_ints( n_iter, n_elem_max );
        bli_swap_ints( lda, inca );
        bli_toggle_uplo( uplo );
    }
    
    if ( bli_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;

            bli_csscal( n_elem,
                        alpha,
                        a_begin, inca );
        }
    }
    else // if ( bli_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;

            if ( n_elem <= 0 ) break;

            bli_csscal( n_elem,
                        alpha,
                        a_begin, inca );
        }
    }
}
void bli_csscalv ( conj_t  conj,
int  n,
float *  alpha,
scomplex x,
int  incx 
)

References bli_csscal(), and bli_zero_dim1().

Referenced by bli_csapdiagmv().

{
    // Return early if possible.
    if ( bli_zero_dim1( n ) ) return;
    if ( bli_seq1( alpha ) ) return;

    bli_csscal( n,
                alpha,
                x, incx );
}
void bli_cswap ( int  n,
scomplex x,
int  incx,
scomplex y,
int  incy 
)

References cblas_cswap(), and F77_cswap().

Referenced by bli_cswapmt(), bli_cswapv(), FLA_SA_Apply_pivots(), and FLA_SA_LU_unb().

{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
    cblas_cswap( n,
                 x, incx, 
                 y, incy );
#else
    F77_cswap( &n,
               x, &incx, 
               y, &incy );
#endif
}
void bli_cswapmt ( trans_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bli_cconjv(), bli_cswap(), bli_does_conj(), bli_does_notrans(), bli_does_trans(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Swap_external(), and FLA_Swapt_external().

{
    scomplex* a_begin;
    scomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major and if A is effectively row-major
        // after a possible transposition, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) ||
                 ( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) )
            {
                bli_swap_ints( n_iter, n_elem );
                bli_swap_ints( lda, inca );
                bli_swap_ints( ldb, incb );
            }
        }
    }

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;
        
        bli_cswap( n_elem,
                   a_begin, inca, 
                   b_begin, incb );

        if ( bli_does_conj( trans ) )
            bli_cconjv( n_elem,
                        a_begin, inca );

        if ( bli_does_conj( trans ) )
            bli_cconjv( n_elem,
                        b_begin, incb );
    }
}
void bli_cswapv ( int  n,
scomplex x,
int  incx,
scomplex y,
int  incy 
)

References bli_cswap(), and bli_zero_dim1().

Referenced by FLA_Apply_pivots_macro_external().

{
    // Return early if possible.
    if ( bli_zero_dim1( n ) ) return;

    bli_cswap( n,
               x, incx, 
               y, incy );
}
void bli_czcopymr ( uplo_t  uplo,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bli_czcopyv(), bli_is_row_storage(), bli_is_upper(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Copyr_external().

{
    scomplex* a_begin;
    dcomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if B is row-major, then let's access the matrix
    // by rows instead of by columns for increased spatial locality.
    if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        bli_swap_ints( n_iter, n_elem_max );
        bli_swap_ints( lda, inca );
        bli_swap_ints( ldb, incb );
        bli_toggle_uplo( uplo );
    }
    
    
    if ( bli_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bli_czcopyv( BLIS_NO_TRANSPOSE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
    else // if ( bli_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bli_czcopyv( BLIS_NO_TRANSPOSE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
}
void bli_czcopymrt ( uplo_t  uplo,
trans_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bli_czcopyv(), bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), and bli_zero_dim2().

Referenced by FLA_Copyrt_external().

{
    scomplex* a_begin;
    dcomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bli_is_col_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bli_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bli_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bli_does_trans( trans ) )
    {
        bli_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bli_czcopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bli_czcopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
}
void bli_czcopymt ( trans_t  trans,
int  m,
int  n,
scomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bli_czcopyv(), bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

{
    scomplex* a_begin;
    dcomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
            bli_swap_ints( ldb, incb );
        }
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bli_czcopyv( conj,
                     n_elem,
                     a_begin, inca,
                     b_begin, incb );
    }
}
void bli_czcopyv ( conj_t  conj,
int  m,
scomplex x,
int  incx,
dcomplex y,
int  incy 
)

References bli_is_conj(), bli_zconjv(), bli_zero_dim1(), scomplex::imag, dcomplex::imag, scomplex::real, and dcomplex::real.

Referenced by bli_czcopymr(), bli_czcopymrt(), and bli_czcopymt().

{
    scomplex* chi;
    dcomplex* psi;
    int       i;

    // Return early if possible.
    if ( bli_zero_dim1( m ) ) return;

    // Initialize pointers.
    chi = x;
    psi = y;

    for ( i = 0; i < m; ++i )
    {
        psi->real = chi->real;
        psi->imag = chi->imag;

        chi += incx;
        psi += incy;
    }

    if ( bli_is_conj( conj ) )
        bli_zconjv( m,
                    y, incy );
}
void bli_damax ( int  n,
double *  x,
int  incx,
int *  index 
)

References cblas_idamax(), and F77_idamax().

Referenced by FLA_Amax_external(), FLA_LU_piv_opd_var3(), FLA_LU_piv_opd_var4(), FLA_LU_piv_opd_var5(), and FLA_SA_LU_unb().

{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
    *index = cblas_idamax( n,
                           x, incx );
#else
    *index = F77_idamax( &n,
                         x, &incx ) - 1;
#endif
}
void bli_dasum ( int  n,
double *  x,
int  incx,
double *  norm 
)

References cblas_dasum(), and F77_dasum().

Referenced by FLA_Asum_external().

{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
    *norm = cblas_dasum( n,
                         x, incx );
#else
    *norm = F77_dasum( &n,
                       x, &incx );
#endif
}
void bli_daxpy ( int  n,
double *  alpha,
double *  x,
int  incx,
double *  y,
int  incy 
)

References cblas_daxpy(), and F77_daxpy().

Referenced by bli_daxpymt(), bli_daxpysmt(), bli_daxpysv(), and bli_daxpyv().

{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
    cblas_daxpy( n,
                 *alpha,
                 x, incx,
                 y, incy );
#else
    F77_daxpy( &n,
               alpha,
               x, &incx,
               y, &incy );
#endif
}
void bli_daxpymrt ( uplo_t  uplo,
trans_t  trans,
int  m,
int  n,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bli_daxpyv(), bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), and bli_zero_dim2().

Referenced by FLA_Axpyrt_external().

{
    double*   a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bli_is_col_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bli_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bli_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bli_does_trans( trans ) )
    {
        bli_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bli_daxpyv( conj,
                        n_elem,
                        alpha,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bli_daxpyv( conj,
                        n_elem,
                        alpha,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
}
void bli_daxpymt ( trans_t  trans,
int  m,
int  n,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bli_daxpy(), bli_does_notrans(), bli_does_trans(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by bli_dgemm(), bli_dsymm(), bli_dtrmmsx(), bli_dtrsmsx(), FLA_Axpy_external(), and FLA_Axpyt_external().

{
    double*   a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying axpy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major and if A is effectively row-major
        // after a possible transposition, then let's access the matrices by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) ||
                 ( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) )
            {
                bli_swap_ints( n_iter, n_elem );
                bli_swap_ints( lda, inca );
                bli_swap_ints( ldb, incb );
            }
        }
    }

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bli_daxpy( n_elem,
                   alpha,
                   a_begin, inca, 
                   b_begin, incb );
    }
}
void bli_daxpysmt ( trans_t  trans,
int  m,
int  n,
double *  alpha0,
double *  alpha1,
double *  a,
int  a_rs,
int  a_cs,
double *  beta,
double *  b,
int  b_rs,
int  b_cs 
)

References bli_daxpy(), bli_does_notrans(), bli_does_trans(), bli_dscal(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Axpys_external().

{
    double*   a_begin;
    double*   b_begin;
    double    alpha_prod;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    alpha_prod = (*alpha0) * (*alpha1);

    // Handle cases where A and B are vectors to ensure that the underlying axpy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major and if A is effectively row-major
        // after a possible transposition, then let's access the matrices by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) ||
                 ( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) )
            {
                bli_swap_ints( n_iter, n_elem );
                bli_swap_ints( lda, inca );
                bli_swap_ints( ldb, incb );
            }
        }
    }

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bli_dscal( n_elem,
                   beta,
                   b_begin, incb );

        bli_daxpy( n_elem,
                   &alpha_prod,
                   a_begin, inca, 
                   b_begin, incb );
    }
}
void bli_daxpysv ( int  n,
double *  alpha0,
double *  alpha1,
double *  x,
int  incx,
double *  beta,
double *  y,
int  incy 
)

References bli_daxpy(), bli_dscal(), and bli_zero_dim1().

Referenced by FLA_Lyap_h_opd_var2(), FLA_Lyap_h_opd_var3(), FLA_Lyap_h_opd_var4(), FLA_Lyap_n_opd_var2(), FLA_Lyap_n_opd_var3(), and FLA_Lyap_n_opd_var4().

{
    double   alpha_prod;

    // Return early if possible.
    if ( bli_zero_dim1( n ) ) return;

    alpha_prod = (*alpha0) * (*alpha1);

    bli_dscal( n,
               beta,
               y, incy );

    bli_daxpy( n,
               &alpha_prod,
               x, incx,
               y, incy );
}
void bli_daxpyv ( conj_t  conj,
int  n,
double *  alpha,
double *  x,
int  incx,
double *  y,
int  incy 
)
void bli_dccopymr ( uplo_t  uplo,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bli_dccopyv(), bli_is_row_storage(), bli_is_upper(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Copyr_external().

{
    double*   a_begin;
    scomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if B is row-major, then let's access the matrix
    // by rows instead of by columns for increased spatial locality.
    if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        bli_swap_ints( n_iter, n_elem_max );
        bli_swap_ints( lda, inca );
        bli_swap_ints( ldb, incb );
        bli_toggle_uplo( uplo );
    }
    
    
    if ( bli_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bli_dccopyv( BLIS_NO_TRANSPOSE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
    else // if ( bli_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bli_dccopyv( BLIS_NO_TRANSPOSE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
}
void bli_dccopymrt ( uplo_t  uplo,
trans_t  trans,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bli_dccopyv(), bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), and bli_zero_dim2().

Referenced by FLA_Copyrt_external().

{
    double*   a_begin;
    scomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bli_is_col_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bli_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bli_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bli_does_trans( trans ) )
    {
        bli_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bli_dccopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bli_dccopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
}
void bli_dccopymt ( trans_t  trans,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bli_dccopyv(), bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

{
    double*   a_begin;
    scomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
            bli_swap_ints( ldb, incb );
        }
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bli_dccopyv( conj,
                     n_elem,
                     a_begin, inca,
                     b_begin, incb );
    }
}
void bli_dccopyv ( conj_t  conj,
int  m,
double *  x,
int  incx,
scomplex y,
int  incy 
)

References bli_zero_dim1(), scomplex::imag, and scomplex::real.

Referenced by bli_dccopymr(), bli_dccopymrt(), and bli_dccopymt().

{
    double*   chi;
    scomplex* psi;
    int       i;

    // Return early if possible.
    if ( bli_zero_dim1( m ) ) return;

    // Initialize pointers.
    chi = x;
    psi = y;

    for ( i = 0; i < m; ++i )
    {
        psi->real = *chi;
        psi->imag = 0.0F;

        chi += incx;
        psi += incy;
    }
}
void bli_dconjm ( int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs 
)
{
    return;
}
void bli_dconjmr ( uplo_t  uplo,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs 
)
{
    return;
}
void bli_dconjv ( int  m,
double *  x,
int  incx 
)
void bli_dcopy ( int  m,
double *  x,
int  incx,
double *  y,
int  incy 
)

References cblas_dcopy(), and F77_dcopy().

Referenced by bli_dcopymr(), bli_dcopymt(), bli_dcopyv(), and FLA_SA_LU_unb().

{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
    cblas_dcopy( m,
                 x, incx, 
                 y, incy );
#else
    F77_dcopy( &m,
               x, &incx, 
               y, &incy );
#endif
}
void bli_dcopymr ( uplo_t  uplo,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bli_dcopy(), bli_is_row_storage(), bli_is_upper(), and bli_zero_dim2().

Referenced by bli_dcreate_contigmr(), bli_dfree_saved_contigmr(), and FLA_Copyr_external().

{
    double*   a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if A and B are both row-major, then let's access the
    // matrices by rows instead of by columns for increased spatial locality.
    if ( bli_is_row_storage( b_rs, b_cs ) && bli_is_row_storage( a_rs, a_cs ) )
    {
        bli_swap_ints( n_iter, n_elem_max );
        bli_swap_ints( lda, inca );
        bli_swap_ints( ldb, incb );
        bli_toggle_uplo( uplo );
    }
    
    
    if ( bli_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bli_dcopy( n_elem,
                       a_begin, inca, 
                       b_begin, incb );
        }
    }
    else // if ( bli_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bli_dcopy( n_elem,
                       a_begin, inca, 
                       b_begin, incb );
        }
    }
}
void bli_dcopymrt ( uplo_t  uplo,
trans_t  trans,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bli_dcopyv(), bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), and bli_zero_dim2().

Referenced by FLA_Copyrt_external(), FLA_Lyap_h_opd_var1(), FLA_Lyap_h_opd_var2(), FLA_Lyap_h_opd_var3(), FLA_Lyap_h_opd_var4(), FLA_Lyap_n_opd_var1(), FLA_Lyap_n_opd_var2(), FLA_Lyap_n_opd_var3(), and FLA_Lyap_n_opd_var4().

{
    double*   a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bli_is_col_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bli_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bli_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bli_does_trans( trans ) )
    {
        bli_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bli_dcopyv( conj,
                        n_elem,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bli_dcopyv( conj,
                        n_elem,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
}
void bli_dcopymt ( trans_t  trans,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bli_dcopy(), bli_does_notrans(), bli_does_trans(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by bli_dcreate_contigm(), bli_dcreate_contigmt(), bli_dfree_saved_contigm(), bli_dfree_saved_contigmsr(), bli_dsymm(), bli_dsyr2k(), bli_dtrmmsx(), bli_dtrsmsx(), FLA_Bsvd_v_opd_var2(), FLA_Copy_external(), FLA_Copyt_external(), FLA_Tevd_v_opd_var2(), and FLA_Tevd_v_opd_var4().

{
    double*   a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major and if A is effectively row-major
        // after a possible transposition, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) ||
                 ( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) )
            {
                bli_swap_ints( n_iter, n_elem );
                bli_swap_ints( lda, inca );
                bli_swap_ints( ldb, incb );
            }
        }
    }

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;
        
        bli_dcopy( n_elem,
                   a_begin, inca, 
                   b_begin, incb );
    }
}
void bli_dcopyv ( conj_t  conj,
int  m,
double *  x,
int  incx,
double *  y,
int  incy 
)
void bli_ddcopymr ( uplo_t  uplo,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bli_dcopyv(), bli_is_row_storage(), bli_is_upper(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

{
    double*   a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if B is row-major, then let's access the matrix
    // by rows instead of by columns for increased spatial locality.
    if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        bli_swap_ints( n_iter, n_elem_max );
        bli_swap_ints( lda, inca );
        bli_swap_ints( ldb, incb );
        bli_toggle_uplo( uplo );
    }
    
    
    if ( bli_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bli_dcopyv( BLIS_NO_TRANSPOSE,
                        n_elem,
                        a_begin, inca, 
                        b_begin, incb );
        }
    }
    else // if ( bli_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bli_dcopyv( BLIS_NO_TRANSPOSE,
                        n_elem,
                        a_begin, inca, 
                        b_begin, incb );
        }
    }
}
void bli_ddcopymrt ( uplo_t  uplo,
trans_t  trans,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bli_dcopyv(), bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), and bli_zero_dim2().

{
    double*   a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bli_is_col_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bli_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bli_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bli_does_trans( trans ) )
    {
        bli_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bli_dcopyv( conj,
                        n_elem,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bli_dcopyv( conj,
                        n_elem,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
}
void bli_ddcopymt ( trans_t  trans,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bli_dcopyv(), bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

{
    double*   a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
            bli_swap_ints( ldb, incb );
        }
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bli_dcopyv( conj,
                    n_elem,
                    a_begin, inca,
                    b_begin, incb );
    }
}
void bli_ddot ( conj_t  conj,
int  n,
double *  x,
int  incx,
double *  y,
int  incy,
double *  rho 
)
void bli_ddot2s ( conj_t  conj,
int  n,
double *  alpha,
double *  x,
int  incx,
double *  y,
int  incy,
double *  beta,
double *  rho 
)
void bli_ddots ( conj_t  conj,
int  n,
double *  alpha,
double *  x,
int  incx,
double *  y,
int  incy,
double *  beta,
double *  rho 
)
void bli_dfnorm ( int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
double *  norm 
)

References bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Norm_frob().

{
    double*   a_ij;
    double    sum;
    int       lda, inca;
    int       n_iter;
    int       n_elem;
    int       i, j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A is a vector separately.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, a_rs, a_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        
        // An optimization: if A is row-major, then let's access the matrix by
        // rows instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( a_rs, a_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
        }
    }

    // Initialize the accumulator variable.
    sum = 0.0;

    for ( j = 0; j < n_iter; j++ )
    {
        for ( i = 0; i < n_elem; i++ )
        {
            a_ij = a + i*inca + j*lda;
            sum += (*a_ij) * (*a_ij);
        }
    }
    
    // Compute the norm and store the result.
    *norm = sqrt( sum );
}
void bli_dinvscalm ( conj_t  conj,
int  m,
int  n,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs 
)

References bli_dinvert2s(), bli_dscal(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Inv_scal_external(), and FLA_Inv_scalc_external().

{
    double    alpha_inv;
    double*   a_begin;
    int       lda, inca;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;
    if ( bli_deq1( alpha ) ) return;

    // Handle cases where A is a vector to ensure that the underlying axpy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for a vector.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, a_rs, a_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;

        // An optimization: if A is row-major, then let's access the matrix
        // by rows instead of by columns to increase spatial locality.
        if ( bli_is_row_storage( a_rs, a_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
        }
    }

    bli_dinvert2s( conj, alpha, &alpha_inv );

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;

        bli_dscal( n_elem,
                   &alpha_inv,
                   a_begin, inca );
    }
}
void bli_dinvscalv ( conj_t  conj,
int  n,
double *  alpha,
double *  x,
int  incx 
)
void bli_dnrm2 ( int  n,
double *  x,
int  incx,
double *  norm 
)

References cblas_dnrm2(), and F77_dnrm2().

Referenced by FLA_Househ2_UT_l_opd(), FLA_Househ2s_UT_l_opd(), FLA_Househ3UD_UT_opd(), and FLA_Nrm2_external().

{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
    *norm = cblas_dnrm2( n,
                         x, incx );
#else
    *norm = F77_dnrm2( &n,
                       x, &incx );
#endif
}
void bli_dscal ( int  n,
double *  alpha,
double *  x,
int  incx 
)

References cblas_dscal(), and F77_dscal().

Referenced by bli_daxpysmt(), bli_daxpysv(), bli_dinvscalm(), bli_dinvscalv(), bli_dscalm(), bli_dscalmr(), bli_dscalv(), bli_zconjm(), bli_zconjmr(), bli_zconjv(), and FLA_SA_LU_unb().

{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
    cblas_dscal( n,
                 *alpha,
                 x, incx );
#else
    F77_dscal( &n,
               alpha,
               x, &incx );
#endif
}
void bli_dscalm ( conj_t  conj,
int  m,
int  n,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs 
)

References bli_dscal(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by bli_dgemm(), bli_dsymm(), bli_dtrmmsx(), bli_dtrsmsx(), FLA_Lyap_h_opd_var1(), FLA_Lyap_h_opd_var2(), FLA_Lyap_h_opd_var3(), FLA_Lyap_h_opd_var4(), FLA_Lyap_n_opd_var1(), FLA_Lyap_n_opd_var2(), FLA_Lyap_n_opd_var3(), FLA_Lyap_n_opd_var4(), FLA_Scal_external(), and FLA_Scalc_external().

{
    double    alpha_conj;
    double*   a_begin;
    int       lda, inca;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;
    if ( bli_deq1( alpha ) ) return;

    // Handle cases where A is a vector to ensure that the underlying axpy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for a vector.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, a_rs, a_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;

        // An optimization: if A is row-major, then let's access the matrix
        // by rows instead of by columns to increase spatial locality.
        if ( bli_is_row_storage( a_rs, a_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
        }
    }

    bli_dcopys( conj, alpha, &alpha_conj );

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;

        bli_dscal( n_elem,
                   &alpha_conj,
                   a_begin, inca );
    }
}
void bli_dscalmr ( uplo_t  uplo,
int  m,
int  n,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs 
)

References bli_dscal(), bli_is_row_storage(), bli_is_upper(), and bli_zero_dim2().

Referenced by FLA_Scalr_external().

{
    double*   a_begin;
    int       lda, inca;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;
    if ( bli_deq1( alpha ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;

    // An optimization: if A is row-major, then let's access the matrix
    // by rows instead of by columns to increase spatial locality.
    if ( bli_is_row_storage( a_rs, a_cs ) )
    {
        bli_swap_ints( n_iter, n_elem_max );
        bli_swap_ints( lda, inca );
        bli_toggle_uplo( uplo );
    }
    
    if ( bli_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;

            bli_dscal( n_elem,
                       alpha,
                       a_begin, inca );
        }
    }
    else // if ( bli_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;

            if ( n_elem <= 0 ) break;

            bli_dscal( n_elem,
                       alpha,
                       a_begin, inca );
        }
    }
}
void bli_dscalv ( conj_t  conj,
int  n,
double *  alpha,
double *  x,
int  incx 
)
void bli_dscopymr ( uplo_t  uplo,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bli_dscopyv(), bli_is_row_storage(), bli_is_upper(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Copyr_external().

{
    double*   a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if B is row-major, then let's access the matrix
    // by rows instead of by columns for increased spatial locality.
    if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        bli_swap_ints( n_iter, n_elem_max );
        bli_swap_ints( lda, inca );
        bli_swap_ints( ldb, incb );
        bli_toggle_uplo( uplo );
    }
    
    
    if ( bli_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bli_dscopyv( BLIS_NO_TRANSPOSE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
    else // if ( bli_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bli_dscopyv( BLIS_NO_TRANSPOSE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
}
void bli_dscopymrt ( uplo_t  uplo,
trans_t  trans,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bli_does_trans(), bli_dscopyv(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), and bli_zero_dim2().

Referenced by FLA_Copyrt_external().

{
    double*   a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bli_is_col_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bli_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bli_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bli_does_trans( trans ) )
    {
        bli_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bli_dscopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bli_dscopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
}
void bli_dscopymt ( trans_t  trans,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bli_does_trans(), bli_dscopyv(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

{
    double*   a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
            bli_swap_ints( ldb, incb );
        }
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bli_dscopyv( conj,
                     n_elem,
                     a_begin, inca,
                     b_begin, incb );
    }
}
void bli_dscopyv ( conj_t  conj,
int  m,
double *  x,
int  incx,
float *  y,
int  incy 
)

References bli_zero_dim1().

Referenced by bli_dscopymr(), bli_dscopymrt(), and bli_dscopymt().

{
    double*   chi;
    float*    psi;
    int       i;

    // Return early if possible.
    if ( bli_zero_dim1( m ) ) return;

    // Initialize pointers.
    chi = x;
    psi = y;

    for ( i = 0; i < m; ++i )
    {
        *psi = *chi;

        chi += incx;
        psi += incy;
    }
}
void bli_dswap ( int  n,
double *  x,
int  incx,
double *  y,
int  incy 
)

References cblas_dswap(), and F77_dswap().

Referenced by bli_dswapmt(), bli_dswapv(), FLA_SA_Apply_pivots(), and FLA_SA_LU_unb().

{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
    cblas_dswap( n,
                 x, incx, 
                 y, incy );
#else
    F77_dswap( &n,
               x, &incx, 
               y, &incy );
#endif
}
void bli_dswapmt ( trans_t  trans,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bli_does_notrans(), bli_does_trans(), bli_dswap(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Swap_external(), and FLA_Swapt_external().

{
    double*   a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major and if A is effectively row-major
        // after a possible transposition, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) ||
                 ( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) )
            {
                bli_swap_ints( n_iter, n_elem );
                bli_swap_ints( lda, inca );
                bli_swap_ints( ldb, incb );
            }
        }
    }

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;
        
        bli_dswap( n_elem,
                   a_begin, inca, 
                   b_begin, incb );
    }
}
void bli_dswapv ( int  n,
double *  x,
int  incx,
double *  y,
int  incy 
)

References bli_dswap(), and bli_zero_dim1().

Referenced by FLA_Apply_pivots_macro_external(), FLA_Sort_evd_b_opd(), FLA_Sort_evd_f_opd(), FLA_Sort_svd_b_opd(), and FLA_Sort_svd_f_opd().

{
    // Return early if possible.
    if ( bli_zero_dim1( n ) ) return;

    bli_dswap( n,
               x, incx, 
               y, incy );
}
void bli_dzcopymr ( uplo_t  uplo,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bli_dzcopyv(), bli_is_row_storage(), bli_is_upper(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Copyr_external().

{
    double*   a_begin;
    dcomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if B is row-major, then let's access the matrix
    // by rows instead of by columns for increased spatial locality.
    if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        bli_swap_ints( n_iter, n_elem_max );
        bli_swap_ints( lda, inca );
        bli_swap_ints( ldb, incb );
        bli_toggle_uplo( uplo );
    }
    
    
    if ( bli_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bli_dzcopyv( BLIS_NO_TRANSPOSE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
    else // if ( bli_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bli_dzcopyv( BLIS_NO_TRANSPOSE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
}
void bli_dzcopymrt ( uplo_t  uplo,
trans_t  trans,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bli_does_trans(), bli_dzcopyv(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), and bli_zero_dim2().

Referenced by FLA_Copyrt_external().

{
    double*   a_begin;
    dcomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bli_is_col_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bli_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bli_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bli_does_trans( trans ) )
    {
        bli_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bli_dzcopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bli_dzcopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
}
void bli_dzcopymt ( trans_t  trans,
int  m,
int  n,
double *  a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bli_does_trans(), bli_dzcopyv(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

{
    double*   a_begin;
    dcomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
            bli_swap_ints( ldb, incb );
        }
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bli_dzcopyv( conj,
                     n_elem,
                     a_begin, inca,
                     b_begin, incb );
    }
}
void bli_dzcopyv ( conj_t  conj,
int  m,
double *  x,
int  incx,
dcomplex y,
int  incy 
)

References bli_zero_dim1(), dcomplex::imag, and dcomplex::real.

Referenced by bli_dzcopymr(), bli_dzcopymrt(), and bli_dzcopymt().

{
    double*   chi;
    dcomplex* psi;
    int       i;

    // Return early if possible.
    if ( bli_zero_dim1( m ) ) return;

    // Initialize pointers.
    chi = x;
    psi = y;

    for ( i = 0; i < m; ++i )
    {
        psi->real = *chi;
        psi->imag = 0.0;

        chi += incx;
        psi += incy;
    }
}
void bli_icopymt ( trans_t  trans,
int  m,
int  n,
int *  a,
int  a_rs,
int  a_cs,
int *  b,
int  b_rs,
int  b_cs 
)

References bli_does_notrans(), bli_does_trans(), bli_icopyv(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

{
    int*      a_begin;
    int*      b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major and if A is effectively row-major
        // after a possible transposition, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) ||
                 ( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) )
            {
                bli_swap_ints( n_iter, n_elem );
                bli_swap_ints( lda, inca );
                bli_swap_ints( ldb, incb );
            }
        }
    }

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;
        
        bli_icopyv( trans,
                    n_elem,
                    a_begin, inca, 
                    b_begin, incb );
    }
}
void bli_icopyv ( conj_t  conj,
int  m,
int *  x,
int  incx,
int *  y,
int  incy 
)

References bli_zero_dim1().

Referenced by bli_icopymt().

{
    int*      chi;
    int*      psi;
    int       i;

    // Return early if possible.
    if ( bli_zero_dim1( m ) ) return;

    // Initialize pointers.
    chi = x;
    psi = y;

    for ( i = 0; i < m; ++i )
    {
        *psi = *chi;

        chi += incx;
        psi += incy;
    }
}
void bli_samax ( int  n,
float *  x,
int  incx,
int *  index 
)

References cblas_isamax(), and F77_isamax().

Referenced by FLA_Amax_external(), FLA_LU_piv_ops_var3(), FLA_LU_piv_ops_var4(), FLA_LU_piv_ops_var5(), and FLA_SA_LU_unb().

{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
    *index = cblas_isamax( n,
                           x, incx );
#else
    *index = F77_isamax( &n,
                         x, &incx ) - 1;
#endif
}
void bli_sasum ( int  n,
float *  x,
int  incx,
float *  norm 
)

References cblas_sasum(), and F77_sasum().

Referenced by FLA_Asum_external().

{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
    *norm = cblas_sasum( n,
                         x, incx );
#else
    *norm = F77_sasum( &n,
                       x, &incx );
#endif
}
void bli_saxpy ( int  n,
float *  alpha,
float *  x,
int  incx,
float *  y,
int  incy 
)

References cblas_saxpy(), and F77_saxpy().

Referenced by bli_saxpymt(), bli_saxpysmt(), bli_saxpysv(), and bli_saxpyv().

{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
    cblas_saxpy( n,
                 *alpha,
                 x, incx,
                 y, incy );
#else
    F77_saxpy( &n,
               alpha,
               x, &incx,
               y, &incy );
#endif
}
void bli_saxpymrt ( uplo_t  uplo,
trans_t  trans,
int  m,
int  n,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), bli_saxpyv(), and bli_zero_dim2().

Referenced by FLA_Axpyrt_external().

{
    float*    a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bli_is_col_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bli_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bli_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bli_does_trans( trans ) )
    {
        bli_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bli_saxpyv( conj,
                        n_elem,
                        alpha,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bli_saxpyv( conj,
                        n_elem,
                        alpha,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
}
void bli_saxpymt ( trans_t  trans,
int  m,
int  n,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bli_does_notrans(), bli_does_trans(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_saxpy(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by bli_sgemm(), bli_ssymm(), bli_strmmsx(), bli_strsmsx(), FLA_Axpy_external(), and FLA_Axpyt_external().

{
    float*    a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying axpy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major and if A is effectively row-major
        // after a possible transposition, then let's access the matrices by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) ||
                 ( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) )
            {
                bli_swap_ints( n_iter, n_elem );
                bli_swap_ints( lda, inca );
                bli_swap_ints( ldb, incb );
            }
        }
    }

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bli_saxpy( n_elem,
                   alpha,
                   a_begin, inca, 
                   b_begin, incb );
    }
}
void bli_saxpysmt ( trans_t  trans,
int  m,
int  n,
float *  alpha0,
float *  alpha1,
float *  a,
int  a_rs,
int  a_cs,
float *  beta,
float *  b,
int  b_rs,
int  b_cs 
)

References bli_does_notrans(), bli_does_trans(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_saxpy(), bli_sscal(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Axpys_external().

{
    float*    a_begin;
    float*    b_begin;
    float     alpha_prod;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    alpha_prod = (*alpha0) * (*alpha1);

    // Handle cases where A and B are vectors to ensure that the underlying axpy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major and if A is effectively row-major
        // after a possible transposition, then let's access the matrices by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) ||
                 ( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) )
            {
                bli_swap_ints( n_iter, n_elem );
                bli_swap_ints( lda, inca );
                bli_swap_ints( ldb, incb );
            }
        }
    }

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bli_sscal( n_elem,
                   beta,
                   b_begin, incb );

        bli_saxpy( n_elem,
                   &alpha_prod,
                   a_begin, inca, 
                   b_begin, incb );
    }
}
void bli_saxpysv ( int  n,
float *  alpha0,
float *  alpha1,
float *  x,
int  incx,
float *  beta,
float *  y,
int  incy 
)

References bli_saxpy(), bli_sscal(), and bli_zero_dim1().

Referenced by FLA_Lyap_h_ops_var2(), FLA_Lyap_h_ops_var3(), FLA_Lyap_h_ops_var4(), FLA_Lyap_n_ops_var2(), FLA_Lyap_n_ops_var3(), and FLA_Lyap_n_ops_var4().

{
    float    alpha_prod;

    // Return early if possible.
    if ( bli_zero_dim1( n ) ) return;

    alpha_prod = (*alpha0) * (*alpha1);

    bli_sscal( n,
               beta,
               y, incy );

    bli_saxpy( n,
               &alpha_prod,
               x, incx,
               y, incy );
}
void bli_saxpyv ( conj_t  conj,
int  n,
float *  alpha,
float *  x,
int  incx,
float *  y,
int  incy 
)
void bli_sccopymr ( uplo_t  uplo,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bli_is_row_storage(), bli_is_upper(), bli_sccopyv(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Copyr_external().

{
    float*    a_begin;
    scomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if B is row-major, then let's access the matrix
    // by rows instead of by columns for increased spatial locality.
    if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        bli_swap_ints( n_iter, n_elem_max );
        bli_swap_ints( lda, inca );
        bli_swap_ints( ldb, incb );
        bli_toggle_uplo( uplo );
    }
    
    
    if ( bli_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bli_sccopyv( BLIS_NO_TRANSPOSE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
    else // if ( bli_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bli_sccopyv( BLIS_NO_TRANSPOSE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
}
void bli_sccopymrt ( uplo_t  uplo,
trans_t  trans,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), bli_sccopyv(), and bli_zero_dim2().

Referenced by FLA_Copyrt_external().

{
    float*    a_begin;
    scomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bli_is_col_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bli_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bli_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bli_does_trans( trans ) )
    {
        bli_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bli_sccopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bli_sccopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
}
void bli_sccopymt ( trans_t  trans,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_sccopyv(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

{
    float*    a_begin;
    scomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
            bli_swap_ints( ldb, incb );
        }
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bli_sccopyv( conj,
                     n_elem,
                     a_begin, inca,
                     b_begin, incb );
    }
}
void bli_sccopyv ( conj_t  conj,
int  m,
float *  x,
int  incx,
scomplex y,
int  incy 
)

References bli_zero_dim1(), scomplex::imag, and scomplex::real.

Referenced by bli_sccopymr(), bli_sccopymrt(), and bli_sccopymt().

{
    float*    chi;
    scomplex* psi;
    int       i;

    // Return early if possible.
    if ( bli_zero_dim1( m ) ) return;

    // Initialize pointers.
    chi = x;
    psi = y;

    for ( i = 0; i < m; ++i )
    {
        psi->real = *chi;
        psi->imag = 0.0F;

        chi += incx;
        psi += incy;
    }
}
void bli_sconjm ( int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs 
)
{
    return;
}
void bli_sconjmr ( uplo_t  uplo,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs 
)
{
    return;
}
void bli_sconjv ( int  m,
float *  x,
int  incx 
)
void bli_scopy ( int  m,
float *  x,
int  incx,
float *  y,
int  incy 
)

References cblas_scopy(), and F77_scopy().

Referenced by bli_scopymr(), bli_scopymt(), bli_scopyv(), and FLA_SA_LU_unb().

{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
    cblas_scopy( m,
                 x, incx, 
                 y, incy );
#else
    F77_scopy( &m,
               x, &incx, 
               y, &incy );
#endif
}
void bli_scopymr ( uplo_t  uplo,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bli_is_row_storage(), bli_is_upper(), bli_scopy(), and bli_zero_dim2().

Referenced by bli_screate_contigmr(), bli_sfree_saved_contigmr(), and FLA_Copyr_external().

{
    float*    a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if A and B are both row-major, then let's access the
    // matrices by rows instead of by columns for increased spatial locality.
    if ( bli_is_row_storage( b_rs, b_cs ) && bli_is_row_storage( a_rs, a_cs ) )
    {
        bli_swap_ints( n_iter, n_elem_max );
        bli_swap_ints( lda, inca );
        bli_swap_ints( ldb, incb );
        bli_toggle_uplo( uplo );
    }
    
    
    if ( bli_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bli_scopy( n_elem,
                       a_begin, inca, 
                       b_begin, incb );
        }
    }
    else // if ( bli_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bli_scopy( n_elem,
                       a_begin, inca, 
                       b_begin, incb );
        }
    }
}
void bli_scopymrt ( uplo_t  uplo,
trans_t  trans,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), bli_scopyv(), and bli_zero_dim2().

Referenced by FLA_Copyrt_external(), FLA_Lyap_h_ops_var1(), FLA_Lyap_h_ops_var2(), FLA_Lyap_h_ops_var3(), FLA_Lyap_h_ops_var4(), FLA_Lyap_n_ops_var1(), FLA_Lyap_n_ops_var2(), FLA_Lyap_n_ops_var3(), and FLA_Lyap_n_ops_var4().

{
    float*    a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bli_is_col_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bli_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bli_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bli_does_trans( trans ) )
    {
        bli_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bli_scopyv( conj,
                        n_elem,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bli_scopyv( conj,
                        n_elem,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
}
void bli_scopymt ( trans_t  trans,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bli_does_notrans(), bli_does_trans(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_scopy(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by bli_screate_contigm(), bli_screate_contigmt(), bli_sfree_saved_contigm(), bli_sfree_saved_contigmsr(), bli_ssymm(), bli_ssyr2k(), bli_strmmsx(), bli_strsmsx(), FLA_Copy_external(), and FLA_Copyt_external().

{
    float*    a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major and if A is effectively row-major
        // after a possible transposition, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) ||
                 ( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) )
            {
                bli_swap_ints( n_iter, n_elem );
                bli_swap_ints( lda, inca );
                bli_swap_ints( ldb, incb );
            }
        }
    }

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;
        
        bli_scopy( n_elem,
                   a_begin, inca, 
                   b_begin, incb );
    }
}
void bli_scopyv ( conj_t  conj,
int  m,
float *  x,
int  incx,
float *  y,
int  incy 
)
void bli_sdcopymr ( uplo_t  uplo,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bli_is_row_storage(), bli_is_upper(), bli_sdcopyv(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Copyr_external().

{
    float*    a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if B is row-major, then let's access the matrix
    // by rows instead of by columns for increased spatial locality.
    if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        bli_swap_ints( n_iter, n_elem_max );
        bli_swap_ints( lda, inca );
        bli_swap_ints( ldb, incb );
        bli_toggle_uplo( uplo );
    }
    
    
    if ( bli_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bli_sdcopyv( BLIS_NO_TRANSPOSE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
    else // if ( bli_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bli_sdcopyv( BLIS_NO_TRANSPOSE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
}
void bli_sdcopymrt ( uplo_t  uplo,
trans_t  trans,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), bli_sdcopyv(), and bli_zero_dim2().

Referenced by FLA_Copyrt_external().

{
    float*    a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bli_is_col_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bli_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bli_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bli_does_trans( trans ) )
    {
        bli_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bli_sdcopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bli_sdcopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
}
void bli_sdcopymt ( trans_t  trans,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_sdcopyv(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

{
    float*    a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
            bli_swap_ints( ldb, incb );
        }
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bli_sdcopyv( conj,
                     n_elem,
                     a_begin, inca,
                     b_begin, incb );
    }
}
void bli_sdcopyv ( conj_t  conj,
int  m,
float *  x,
int  incx,
double *  y,
int  incy 
)

References bli_zero_dim1().

Referenced by bli_sdcopymr(), bli_sdcopymrt(), and bli_sdcopymt().

{
    float*    chi;
    double*   psi;
    int       i;

    // Return early if possible.
    if ( bli_zero_dim1( m ) ) return;

    // Initialize pointers.
    chi = x;
    psi = y;

    for ( i = 0; i < m; ++i )
    {
        *psi = *chi;

        chi += incx;
        psi += incy;
    }
}
void bli_sdot ( conj_t  conj,
int  n,
float *  x,
int  incx,
float *  y,
int  incy,
float *  rho 
)
void bli_sdot2s ( conj_t  conj,
int  n,
float *  alpha,
float *  x,
int  incx,
float *  y,
int  incy,
float *  beta,
float *  rho 
)
void bli_sdots ( conj_t  conj,
int  n,
float *  alpha,
float *  x,
int  incx,
float *  y,
int  incy,
float *  beta,
float *  rho 
)
void bli_sfnorm ( int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
float *  norm 
)

References bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Norm_frob().

{
    float*    a_ij;
    float     sum;
    int       lda, inca;
    int       n_iter;
    int       n_elem;
    int       i, j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A is a vector separately.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, a_rs, a_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        
        // An optimization: if A is row-major, then let's access the matrix by
        // rows instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( a_rs, a_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
        }
    }

    // Initialize the accumulator variable.
    sum = 0.0F;

    for ( j = 0; j < n_iter; j++ )
    {
        for ( i = 0; i < n_elem; i++ )
        {
            a_ij = a + i*inca + j*lda;
            sum += (*a_ij) * (*a_ij);
        }
    }
    
    // Compute the norm and store the result.
    *norm = ( float ) sqrt( sum );
}
void bli_sinvscalm ( conj_t  conj,
int  m,
int  n,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs 
)

References bli_is_row_storage(), bli_is_vector(), bli_sinvert2s(), bli_sscal(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Inv_scal_external(), and FLA_Inv_scalc_external().

{
    float     alpha_inv;
    float*    a_begin;
    int       lda, inca;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;
    if ( bli_seq1( alpha ) ) return;

    // Handle cases where A is a vector to ensure that the underlying axpy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for a vector.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, a_rs, a_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;

        // An optimization: if A is row-major, then let's access the matrix
        // by rows instead of by columns to increase spatial locality.
        if ( bli_is_row_storage( a_rs, a_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
        }
    }

    bli_sinvert2s( conj, alpha, &alpha_inv );

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;

        bli_sscal( n_elem,
                   &alpha_inv,
                   a_begin, inca );
    }
}
void bli_sinvscalv ( conj_t  conj,
int  n,
float *  alpha,
float *  x,
int  incx 
)
void bli_snrm2 ( int  n,
float *  x,
int  incx,
float *  norm 
)

References cblas_snrm2(), and F77_snrm2().

Referenced by FLA_Househ2_UT_l_ops(), FLA_Househ2s_UT_l_ops(), FLA_Househ3UD_UT_ops(), and FLA_Nrm2_external().

{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
    *norm = cblas_snrm2( n,
                         x, incx );
#else
    *norm = F77_snrm2( &n,
                       x, &incx );
#endif
}
void bli_sscal ( int  n,
float *  alpha,
float *  x,
int  incx 
)

References cblas_sscal(), and F77_sscal().

Referenced by bli_cconjm(), bli_cconjmr(), bli_cconjv(), bli_saxpysmt(), bli_saxpysv(), bli_sinvscalm(), bli_sinvscalv(), bli_sscalm(), bli_sscalmr(), bli_sscalv(), and FLA_SA_LU_unb().

{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
    cblas_sscal( n,
                 *alpha,
                 x, incx );
#else
    F77_sscal( &n,
               alpha,
               x, &incx );
#endif
}
void bli_sscalm ( conj_t  conj,
int  m,
int  n,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs 
)

References bli_is_row_storage(), bli_is_vector(), bli_sscal(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by bli_sgemm(), bli_ssymm(), bli_strmmsx(), bli_strsmsx(), FLA_Lyap_h_ops_var1(), FLA_Lyap_h_ops_var2(), FLA_Lyap_h_ops_var3(), FLA_Lyap_h_ops_var4(), FLA_Lyap_n_ops_var1(), FLA_Lyap_n_ops_var2(), FLA_Lyap_n_ops_var3(), FLA_Lyap_n_ops_var4(), FLA_Scal_external(), and FLA_Scalc_external().

{
    float     alpha_conj;
    float*    a_begin;
    int       lda, inca;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;
    if ( bli_seq1( alpha ) ) return;

    // Handle cases where A is a vector to ensure that the underlying axpy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for a vector.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, a_rs, a_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;

        // An optimization: if A is row-major, then let's access the matrix
        // by rows instead of by columns to increase spatial locality.
        if ( bli_is_row_storage( a_rs, a_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
        }
    }

    bli_scopys( conj, alpha, &alpha_conj );

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;

        bli_sscal( n_elem,
                   &alpha_conj,
                   a_begin, inca );
    }
}
void bli_sscalmr ( uplo_t  uplo,
int  m,
int  n,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs 
)

References bli_is_row_storage(), bli_is_upper(), bli_sscal(), and bli_zero_dim2().

Referenced by FLA_Scalr_external().

{
    float*    a_begin;
    int       lda, inca;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;
    if ( bli_seq1( alpha ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;

    // An optimization: if A is row-major, then let's access the matrix
    // by rows instead of by columns to increase spatial locality.
    if ( bli_is_row_storage( a_rs, a_cs ) )
    {
        bli_swap_ints( n_iter, n_elem_max );
        bli_swap_ints( lda, inca );
        bli_toggle_uplo( uplo );
    }
    
    if ( bli_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;

            bli_sscal( n_elem,
                       alpha,
                       a_begin, inca );
        }
    }
    else // if ( bli_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;

            if ( n_elem <= 0 ) break;

            bli_sscal( n_elem,
                       alpha,
                       a_begin, inca );
        }
    }
}
void bli_sscalv ( conj_t  conj,
int  n,
float *  alpha,
float *  x,
int  incx 
)
void bli_sscopymr ( uplo_t  uplo,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bli_is_row_storage(), bli_is_upper(), bli_scopyv(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

{
    float*    a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if B is row-major, then let's access the matrix
    // by rows instead of by columns for increased spatial locality.
    if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        bli_swap_ints( n_iter, n_elem_max );
        bli_swap_ints( lda, inca );
        bli_swap_ints( ldb, incb );
        bli_toggle_uplo( uplo );
    }
    
    
    if ( bli_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bli_scopyv( BLIS_NO_TRANSPOSE,
                        n_elem,
                        a_begin, inca, 
                        b_begin, incb );
        }
    }
    else // if ( bli_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bli_scopyv( BLIS_NO_TRANSPOSE,
                        n_elem,
                        a_begin, inca, 
                        b_begin, incb );
        }
    }
}
void bli_sscopymrt ( uplo_t  uplo,
trans_t  trans,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), bli_scopyv(), and bli_zero_dim2().

{
    float*    a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bli_is_col_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bli_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bli_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bli_does_trans( trans ) )
    {
        bli_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bli_scopyv( conj,
                        n_elem,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bli_scopyv( conj,
                        n_elem,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
}
void bli_sscopymt ( trans_t  trans,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_scopyv(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

{
    float*    a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
            bli_swap_ints( ldb, incb );
        }
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bli_scopyv( conj,
                    n_elem,
                    a_begin, inca,
                    b_begin, incb );
    }
}
void bli_sswap ( int  n,
float *  x,
int  incx,
float *  y,
int  incy 
)

References cblas_sswap(), and F77_sswap().

Referenced by bli_sswapmt(), bli_sswapv(), FLA_SA_Apply_pivots(), and FLA_SA_LU_unb().

{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
    cblas_sswap( n,
                 x, incx, 
                 y, incy );
#else
    F77_sswap( &n,
               x, &incx, 
               y, &incy );
#endif
}
void bli_sswapmt ( trans_t  trans,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bli_does_notrans(), bli_does_trans(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_sswap(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Swap_external(), and FLA_Swapt_external().

{
    float*    a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major and if A is effectively row-major
        // after a possible transposition, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) ||
                 ( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) )
            {
                bli_swap_ints( n_iter, n_elem );
                bli_swap_ints( lda, inca );
                bli_swap_ints( ldb, incb );
            }
        }
    }

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;
        
        bli_sswap( n_elem,
                   a_begin, inca, 
                   b_begin, incb );
    }
}
void bli_sswapv ( int  n,
float *  x,
int  incx,
float *  y,
int  incy 
)

References bli_sswap(), and bli_zero_dim1().

Referenced by FLA_Apply_pivots_macro_external().

{
    // Return early if possible.
    if ( bli_zero_dim1( n ) ) return;

    bli_sswap( n,
               x, incx, 
               y, incy );
}
void bli_szcopymr ( uplo_t  uplo,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bli_is_row_storage(), bli_is_upper(), bli_szcopyv(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Copyr_external().

{
    float*    a_begin;
    dcomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if B is row-major, then let's access the matrix
    // by rows instead of by columns for increased spatial locality.
    if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        bli_swap_ints( n_iter, n_elem_max );
        bli_swap_ints( lda, inca );
        bli_swap_ints( ldb, incb );
        bli_toggle_uplo( uplo );
    }
    
    
    if ( bli_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bli_szcopyv( BLIS_NO_TRANSPOSE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
    else // if ( bli_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bli_szcopyv( BLIS_NO_TRANSPOSE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
}
void bli_szcopymrt ( uplo_t  uplo,
trans_t  trans,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), bli_szcopyv(), and bli_zero_dim2().

Referenced by FLA_Copyrt_external().

{
    float*    a_begin;
    dcomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bli_is_col_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bli_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bli_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bli_does_trans( trans ) )
    {
        bli_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bli_szcopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bli_szcopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
}
void bli_szcopymt ( trans_t  trans,
int  m,
int  n,
float *  a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_szcopyv(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

{
    float*    a_begin;
    dcomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
            bli_swap_ints( ldb, incb );
        }
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bli_szcopyv( conj,
                     n_elem,
                     a_begin, inca,
                     b_begin, incb );
    }
}
void bli_szcopyv ( conj_t  conj,
int  m,
float *  x,
int  incx,
dcomplex y,
int  incy 
)

References bli_zero_dim1(), dcomplex::imag, and dcomplex::real.

Referenced by bli_szcopymr(), bli_szcopymrt(), and bli_szcopymt().

{
    float*    chi;
    dcomplex* psi;
    int       i;

    // Return early if possible.
    if ( bli_zero_dim1( m ) ) return;

    // Initialize pointers.
    chi = x;
    psi = y;

    for ( i = 0; i < m; ++i )
    {
        psi->real = *chi;
        psi->imag = 0.0;

        chi += incx;
        psi += incy;
    }
}
void bli_zamax ( int  n,
dcomplex x,
int  incx,
int *  index 
)

References cblas_izamax(), and F77_izamax().

Referenced by FLA_Amax_external(), FLA_LU_piv_opz_var3(), FLA_LU_piv_opz_var4(), FLA_LU_piv_opz_var5(), and FLA_SA_LU_unb().

{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
    *index = cblas_izamax( n,
                           x, incx );
#else
    *index = F77_izamax( &n,
                         x, &incx ) - 1;
#endif
}
void bli_zasum ( int  n,
dcomplex x,
int  incx,
double *  norm 
)

References cblas_dzasum(), and F77_dzasum().

Referenced by FLA_Asum_external().

{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
    *norm = cblas_dzasum( n,
                          x, incx );
#else
    *norm = F77_dzasum( &n,
                        x, &incx );
#endif
}
void bli_zaxpy ( int  n,
dcomplex alpha,
dcomplex x,
int  incx,
dcomplex y,
int  incy 
)

References cblas_zaxpy(), and F77_zaxpy().

Referenced by bli_zaxpymt(), bli_zaxpysmt(), bli_zaxpysv(), and bli_zaxpyv().

{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
    cblas_zaxpy( n,
                 alpha,
                 x, incx,
                 y, incy );
#else
    F77_zaxpy( &n,
               alpha,
               x, &incx,
               y, &incy );
#endif
}
void bli_zaxpymrt ( uplo_t  uplo,
trans_t  trans,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), bli_zaxpyv(), and bli_zero_dim2().

Referenced by bli_zher2k(), bli_zherk(), and FLA_Axpyrt_external().

{
    dcomplex* a_begin;
    dcomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bli_is_col_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bli_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bli_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bli_does_trans( trans ) )
    {
        bli_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bli_zaxpyv( conj,
                        n_elem,
                        alpha,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bli_zaxpyv( conj,
                        n_elem,
                        alpha,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
}
void bli_zaxpymt ( trans_t  trans,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bli_does_conj(), bli_does_notrans(), bli_does_trans(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_vector_dim(), bli_vector_inc(), bli_zallocv(), bli_zaxpy(), bli_zcopyv(), bli_zero_dim2(), bli_zfree(), and BLIS_NO_TRANSPOSE.

Referenced by bli_zgemm(), bli_zhemm(), bli_zsymm(), bli_ztrmmsx(), bli_ztrsmsx(), FLA_Axpy_external(), and FLA_Axpyt_external().

{
    dcomplex* a_begin;
    dcomplex* b_begin;
    dcomplex* a_temp;
    int       inca_temp;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying axpy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major and if A is effectively row-major
        // after a possible transposition, then let's access the matrices by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) ||
                 ( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) )
            {
                bli_swap_ints( n_iter, n_elem );
                bli_swap_ints( lda, inca );
                bli_swap_ints( ldb, incb );
            }
        }
    }

    if ( bli_does_conj( trans ) )
    {
        conj_t conj = bli_proj_trans_to_conj( trans );

        a_temp = bli_zallocv( n_elem );
        inca_temp = 1;

        for ( j = 0; j < n_iter; j++ )
        {
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bli_zcopyv( conj,
                        n_elem,
                        a_begin, inca,
                        a_temp,  inca_temp );

            bli_zaxpy( n_elem,
                       alpha,
                       a_temp,  inca_temp, 
                       b_begin, incb );
        }

        bli_zfree( a_temp );
    }
    else // if ( !bli_does_conj( trans ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bli_zaxpy( n_elem,
                       alpha,
                       a_begin, inca, 
                       b_begin, incb );
        }
    
    }
}
void bli_zaxpysmt ( trans_t  trans,
int  m,
int  n,
dcomplex alpha0,
dcomplex alpha1,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex beta,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bli_does_conj(), bli_does_notrans(), bli_does_trans(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_vector_dim(), bli_vector_inc(), bli_zallocv(), bli_zaxpy(), bli_zcopyv(), bli_zero_dim2(), bli_zfree(), bli_zscal(), BLIS_NO_TRANSPOSE, dcomplex::imag, and dcomplex::real.

Referenced by FLA_Axpys_external().

{
    dcomplex* a_begin;
    dcomplex* b_begin;
    dcomplex* a_temp;
    dcomplex  alpha_prod;
    int       inca_temp;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    alpha_prod.real = alpha0->real * alpha1->real - alpha0->imag * alpha1->imag;
    alpha_prod.imag = alpha0->real * alpha1->imag + alpha0->imag * alpha1->real;

    // Handle cases where A and B are vectors to ensure that the underlying axpy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major and if A is effectively row-major
        // after a possible transposition, then let's access the matrices by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) ||
                 ( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) )
            {
                bli_swap_ints( n_iter, n_elem );
                bli_swap_ints( lda, inca );
                bli_swap_ints( ldb, incb );
            }
        }
    }

    if ( bli_does_conj( trans ) )
    {
        conj_t conj = bli_proj_trans_to_conj( trans );

        a_temp = bli_zallocv( n_elem );
        inca_temp = 1;

        for ( j = 0; j < n_iter; j++ )
        {
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bli_zcopyv( conj,
                        n_elem,
                        a_begin, inca,
                        a_temp,  inca_temp );

            bli_zscal( n_elem,
                       beta,
                       b_begin, incb );

            bli_zaxpy( n_elem,
                       &alpha_prod,
                       a_temp,  inca_temp, 
                       b_begin, incb );
        }
    
        bli_zfree( a_temp );
    }
    else // if ( !bli_does_conj( trans ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bli_zscal( n_elem,
                       beta,
                       b_begin, incb );

            bli_zaxpy( n_elem,
                       &alpha_prod,
                       a_begin, inca, 
                       b_begin, incb );
        }
    }
}
void bli_zaxpysv ( int  n,
dcomplex alpha0,
dcomplex alpha1,
dcomplex x,
int  incx,
dcomplex beta,
dcomplex y,
int  incy 
)

References bli_zaxpy(), bli_zero_dim1(), bli_zscal(), dcomplex::imag, and dcomplex::real.

Referenced by FLA_Lyap_h_opz_var2(), FLA_Lyap_h_opz_var3(), FLA_Lyap_h_opz_var4(), FLA_Lyap_n_opz_var2(), FLA_Lyap_n_opz_var3(), and FLA_Lyap_n_opz_var4().

{
    dcomplex alpha_prod;

    // Return early if possible.
    if ( bli_zero_dim1( n ) ) return;

    alpha_prod.real = alpha0->real * alpha1->real - alpha0->imag * alpha1->imag;
    alpha_prod.imag = alpha0->real * alpha1->imag + alpha0->imag * alpha1->real;

    bli_zscal( n,
               beta,
               y, incy );

    bli_zaxpy( n,
               &alpha_prod,
               x, incx,
               y, incy );
}
void bli_zaxpyv ( conj_t  conj,
int  n,
dcomplex alpha,
dcomplex x,
int  incx,
dcomplex y,
int  incy 
)

References bli_is_conj(), bli_zallocv(), bli_zaxpy(), bli_zcopyv(), bli_zero_dim1(), and bli_zfree().

Referenced by bli_zaxpymrt(), bli_zgemv(), bli_zhemv(), bli_ztrmvsx(), bli_ztrsvsx(), FLA_Apply_H2_UT_l_opz_var1(), FLA_Apply_H2_UT_r_opz_var1(), FLA_Apply_HUD_UT_l_opz_var1(), FLA_Bidiag_UT_u_step_ofz_var2(), FLA_Bidiag_UT_u_step_ofz_var3(), FLA_Bidiag_UT_u_step_ofz_var4(), FLA_Bidiag_UT_u_step_opz_var2(), FLA_Bidiag_UT_u_step_opz_var3(), FLA_Bidiag_UT_u_step_opz_var4(), FLA_Bidiag_UT_u_step_opz_var5(), FLA_Eig_gest_il_opz_var1(), FLA_Eig_gest_il_opz_var2(), FLA_Eig_gest_il_opz_var3(), FLA_Eig_gest_il_opz_var4(), FLA_Eig_gest_il_opz_var5(), FLA_Eig_gest_iu_opz_var1(), FLA_Eig_gest_iu_opz_var2(), FLA_Eig_gest_iu_opz_var3(), FLA_Eig_gest_iu_opz_var4(), FLA_Eig_gest_iu_opz_var5(), FLA_Eig_gest_nl_opz_var1(), FLA_Eig_gest_nl_opz_var2(), FLA_Eig_gest_nl_opz_var4(), FLA_Eig_gest_nl_opz_var5(), FLA_Eig_gest_nu_opz_var1(), FLA_Eig_gest_nu_opz_var2(), FLA_Eig_gest_nu_opz_var4(), FLA_Eig_gest_nu_opz_var5(), FLA_Fused_Ahx_Axpy_Ax_opz_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1(), FLA_Fused_UZhu_ZUhu_opz_var1(), FLA_Hess_UT_step_ofz_var2(), FLA_Hess_UT_step_ofz_var3(), FLA_Hess_UT_step_ofz_var4(), FLA_Hess_UT_step_opz_var2(), FLA_Hess_UT_step_opz_var3(), FLA_Hess_UT_step_opz_var4(), FLA_Hess_UT_step_opz_var5(), FLA_Tridiag_UT_l_step_ofz_var2(), FLA_Tridiag_UT_l_step_ofz_var3(), FLA_Tridiag_UT_l_step_opz_var1(), FLA_Tridiag_UT_l_step_opz_var2(), and FLA_Tridiag_UT_l_step_opz_var3().

{
    dcomplex* x_copy;
    int       incx_copy;

    // Return early if possible.
    if ( bli_zero_dim1( n ) ) return;

    x_copy    = x;
    incx_copy = incx;
    
    if ( bli_is_conj( conj ) )
    {
        x_copy    = bli_zallocv( n );
        incx_copy = 1;
    
        bli_zcopyv( conj,
                    n,
                    x,      incx,
                    x_copy, incx_copy );
    }

    bli_zaxpy( n,
               alpha,
               x_copy, incx_copy,
               y,      incy );

    if ( bli_is_conj( conj ) )
        bli_zfree( x_copy );
}
void bli_zccopymr ( uplo_t  uplo,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bli_is_row_storage(), bli_is_upper(), bli_zccopyv(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Copyr_external().

{
    dcomplex* a_begin;
    scomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if B is row-major, then let's access the matrix
    // by rows instead of by columns for increased spatial locality.
    if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        bli_swap_ints( n_iter, n_elem_max );
        bli_swap_ints( lda, inca );
        bli_swap_ints( ldb, incb );
        bli_toggle_uplo( uplo );
    }
    
    
    if ( bli_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bli_zccopyv( BLIS_NO_TRANSPOSE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
    else // if ( bli_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bli_zccopyv( BLIS_NO_TRANSPOSE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
}
void bli_zccopymrt ( uplo_t  uplo,
trans_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), bli_zccopyv(), and bli_zero_dim2().

Referenced by FLA_Copyrt_external().

{
    dcomplex* a_begin;
    scomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bli_is_col_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bli_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bli_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bli_does_trans( trans ) )
    {
        bli_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bli_zccopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bli_zccopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
}
void bli_zccopymt ( trans_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)

References bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_vector_dim(), bli_vector_inc(), bli_zccopyv(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

{
    dcomplex* a_begin;
    scomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
            bli_swap_ints( ldb, incb );
        }
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bli_zccopyv( conj,
                     n_elem,
                     a_begin, inca,
                     b_begin, incb );
    }
}
void bli_zccopyv ( conj_t  conj,
int  m,
dcomplex x,
int  incx,
scomplex y,
int  incy 
)

References bli_cconjv(), bli_is_conj(), bli_zero_dim1(), scomplex::imag, dcomplex::imag, scomplex::real, and dcomplex::real.

Referenced by bli_zccopymr(), bli_zccopymrt(), and bli_zccopymt().

{
    dcomplex* chi;
    scomplex* psi;
    int       i;

    // Return early if possible.
    if ( bli_zero_dim1( m ) ) return;

    // Initialize pointers.
    chi = x;
    psi = y;

    for ( i = 0; i < m; ++i )
    {
        psi->real = chi->real;
        psi->imag = chi->imag;

        chi += incx;
        psi += incy;
    }

    if ( bli_is_conj( conj ) )
        bli_cconjv( m,
                    y, incy );
}
void bli_zconjm ( int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs 
)

References bli_dm1(), bli_dscal(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by bli_zgemm(), and FLA_Conjugate().

{
    double  m1 = bli_dm1();
    double* a_conj;
    int     lda, inca;
    int     n_iter;
    int     n_elem;
    int     j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A is a vector to ensure that the underlying axpy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for a vector.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, a_rs, a_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;

        // An optimization: if A is row-major, then let's access the matrix
        // by rows instead of by columns to increase spatial locality.
        if ( bli_is_row_storage( a_rs, a_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
        }
    }

    for ( j = 0; j < n_iter; ++j )
    {
        a_conj = ( double* )( a + j*lda ) + 1;

        bli_dscal( n_elem,
                   &m1,
                   a_conj, 2*inca );
    }
}
void bli_zconjmr ( uplo_t  uplo,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs 
)

References bli_dm1(), bli_dscal(), bli_is_row_storage(), bli_is_upper(), and bli_zero_dim2().

Referenced by bli_zhemm(), bli_ztrmm(), bli_ztrsm(), and FLA_Conjugate_r().

{
    double  m1 = bli_dm1();
    double* a_conj;
    int     lda, inca;
    int     n_iter;
    int     n_elem_max;
    int     n_elem;
    int     j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;

    // An optimization: if A is row-major, then let's access the matrix
    // by rows instead of by columns to increase spatial locality.
    if ( bli_is_row_storage( a_rs, a_cs ) )
    {
        bli_swap_ints( n_iter, n_elem_max );
        bli_swap_ints( lda, inca );
        bli_toggle_uplo( uplo );
    }

    if ( bli_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; ++j )
        {
            n_elem = bli_min( j + 1, n_elem_max );
            a_conj = ( double* )( a + j*lda ) + 1;
    
            bli_dscal( n_elem,
                       &m1,
                       a_conj, 2*inca );
        }
    }
    else // if ( bli_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; ++j )
        {
            n_elem = bli_max( 0, n_elem_max - j );
            a_conj = ( double* )( a + j*lda + j*inca ) + 1;
    
            if ( n_elem <= 0 ) break;

            bli_dscal( n_elem,
                       &m1,
                       a_conj, 2*inca );
        }
    }
}
void bli_zconjv ( int  m,
dcomplex x,
int  incx 
)
void bli_zcopy ( int  m,
dcomplex x,
int  incx,
dcomplex y,
int  incy 
)

References cblas_zcopy(), and F77_zcopy().

Referenced by bli_zcopymr(), bli_zcopymt(), bli_zcopyv(), and FLA_SA_LU_unb().

{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
    cblas_zcopy( m,
                 x, incx, 
                 y, incy );
#else
    F77_zcopy( &m,
               x, &incx, 
               y, &incy );
#endif
}
void bli_zcopymr ( uplo_t  uplo,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bli_is_row_storage(), bli_is_upper(), bli_zcopy(), and bli_zero_dim2().

Referenced by bli_zcreate_contigmr(), bli_zfree_saved_contigmr(), bli_zfree_saved_contigmsr(), and FLA_Copyr_external().

{
    dcomplex* a_begin;
    dcomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if A and B are both row-major, then let's access the
    // matrices by rows instead of by columns for increased spatial locality.
    if ( bli_is_row_storage( b_rs, b_cs ) && bli_is_row_storage( a_rs, a_cs ) )
    {
        bli_swap_ints( n_iter, n_elem_max );
        bli_swap_ints( lda, inca );
        bli_swap_ints( ldb, incb );
        bli_toggle_uplo( uplo );
    }
    
    
    if ( bli_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bli_zcopy( n_elem,
                       a_begin, inca, 
                       b_begin, incb );
        }
    }
    else // if ( bli_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bli_zcopy( n_elem,
                       a_begin, inca, 
                       b_begin, incb );
        }
    }
}
void bli_zcopymrt ( uplo_t  uplo,
trans_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), bli_zcopyv(), and bli_zero_dim2().

Referenced by bli_zhemm(), bli_ztrmm(), bli_ztrsm(), FLA_Copyrt_external(), FLA_Lyap_h_opz_var1(), FLA_Lyap_h_opz_var2(), FLA_Lyap_h_opz_var3(), FLA_Lyap_h_opz_var4(), FLA_Lyap_n_opz_var1(), FLA_Lyap_n_opz_var2(), FLA_Lyap_n_opz_var3(), and FLA_Lyap_n_opz_var4().

{
    dcomplex* a_begin;
    dcomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bli_is_col_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bli_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bli_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bli_does_trans( trans ) )
    {
        bli_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bli_zcopyv( conj,
                        n_elem,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bli_zcopyv( conj,
                        n_elem,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
}
void bli_zcopymt ( trans_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bli_does_conj(), bli_does_notrans(), bli_does_trans(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zconjv(), bli_zcopy(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by bli_zcreate_contigm(), bli_zcreate_contigmt(), bli_zfree_saved_contigm(), bli_zgemm(), bli_zhemm(), bli_zher2k(), bli_zsymm(), bli_zsyr2k(), bli_ztrmmsx(), bli_ztrsmsx(), FLA_Bsvd_v_opz_var2(), FLA_Copy_external(), FLA_Copyt_external(), FLA_Tevd_v_opz_var2(), and FLA_Tevd_v_opz_var4().

{
    dcomplex* a_begin;
    dcomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major and if A is effectively row-major
        // after a possible transposition, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) ||
                 ( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) )
            {
                bli_swap_ints( n_iter, n_elem );
                bli_swap_ints( lda, inca );
                bli_swap_ints( ldb, incb );
            }
        }
    }

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;
        
        bli_zcopy( n_elem,
                   a_begin, inca, 
                   b_begin, incb );

        if ( bli_does_conj( trans ) )
            bli_zconjv( n_elem,
                        b_begin, incb );
    }
}
void bli_zcopyv ( conj_t  conj,
int  m,
dcomplex x,
int  incx,
dcomplex y,
int  incy 
)
void bli_zdcopymr ( uplo_t  uplo,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bli_is_row_storage(), bli_is_upper(), bli_zdcopyv(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Copyr_external().

{
    dcomplex* a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if B is row-major, then let's access the matrix
    // by rows instead of by columns for increased spatial locality.
    if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        bli_swap_ints( n_iter, n_elem_max );
        bli_swap_ints( lda, inca );
        bli_swap_ints( ldb, incb );
        bli_toggle_uplo( uplo );
    }
    
    
    if ( bli_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bli_zdcopyv( BLIS_NO_TRANSPOSE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
    else // if ( bli_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bli_zdcopyv( BLIS_NO_TRANSPOSE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
}
void bli_zdcopymrt ( uplo_t  uplo,
trans_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), bli_zdcopyv(), and bli_zero_dim2().

Referenced by FLA_Copyrt_external().

{
    dcomplex* a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bli_is_col_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bli_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bli_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bli_does_trans( trans ) )
    {
        bli_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bli_zdcopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bli_zdcopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
}
void bli_zdcopymt ( trans_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)

References bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_vector_dim(), bli_vector_inc(), bli_zdcopyv(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

{
    dcomplex* a_begin;
    double*   b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
            bli_swap_ints( ldb, incb );
        }
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bli_zdcopyv( conj,
                     n_elem,
                     a_begin, inca,
                     b_begin, incb );
    }
}
void bli_zdcopyv ( conj_t  conj,
int  m,
dcomplex x,
int  incx,
double *  y,
int  incy 
)

References bli_zero_dim1(), and dcomplex::real.

Referenced by bli_zdcopymr(), bli_zdcopymrt(), and bli_zdcopymt().

{
    dcomplex* chi;
    double*   psi;
    int       i;

    // Return early if possible.
    if ( bli_zero_dim1( m ) ) return;

    // Initialize pointers.
    chi = x;
    psi = y;

    for ( i = 0; i < m; ++i )
    {
        *psi = chi->real;

        chi += incx;
        psi += incy;
    }
}
void bli_zdinvscalm ( conj_t  conj,
int  m,
int  n,
double *  alpha,
dcomplex a,
int  a_rs,
int  a_cs 
)

References bli_dinvert2s(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zdscal(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Inv_scal_external(), and FLA_Inv_scalc_external().

{
    double    alpha_inv;
    dcomplex* a_begin;
    int       lda, inca;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;
    if ( bli_deq1( alpha ) ) return;

    // Handle cases where A is a vector to ensure that the underlying axpy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for a vector.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, a_rs, a_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;

        // An optimization: if A is row-major, then let's access the matrix
        // by rows instead of by columns to increase spatial locality.
        if ( bli_is_row_storage( a_rs, a_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
        }
    }

    bli_dinvert2s( conj, alpha, &alpha_inv );

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;

        bli_zdscal( n_elem,
                    &alpha_inv,
                    a_begin, inca );
    }
}
void bli_zdinvscalv ( conj_t  conj,
int  n,
double *  alpha,
dcomplex x,
int  incx 
)

References bli_zdscal().

{
    double alpha_inv;

    if ( bli_deq1( alpha ) ) return;

    alpha_inv = 1.0 / *alpha;

    bli_zdscal( n,
                &alpha_inv,
                x, incx );
}
void bli_zdot ( conj_t  conj,
int  n,
dcomplex x,
int  incx,
dcomplex y,
int  incy,
dcomplex rho 
)
void bli_zdot2s ( conj_t  conj,
int  n,
dcomplex alpha,
dcomplex x,
int  incx,
dcomplex y,
int  incy,
dcomplex beta,
dcomplex rho 
)

References bli_zdot(), dcomplex::imag, and dcomplex::real.

Referenced by FLA_Dot2cs_external(), FLA_Dot2s_external(), FLA_Eig_gest_il_opz_var1(), FLA_Eig_gest_il_opz_var2(), FLA_Eig_gest_il_opz_var3(), FLA_Eig_gest_iu_opz_var1(), FLA_Eig_gest_iu_opz_var2(), FLA_Eig_gest_iu_opz_var3(), FLA_Eig_gest_nl_opz_var1(), FLA_Eig_gest_nl_opz_var2(), FLA_Eig_gest_nu_opz_var1(), FLA_Eig_gest_nu_opz_var2(), FLA_Lyap_h_opz_var1(), FLA_Lyap_h_opz_var2(), FLA_Lyap_h_opz_var3(), FLA_Lyap_n_opz_var1(), FLA_Lyap_n_opz_var2(), and FLA_Lyap_n_opz_var3().

{
    dcomplex dotxy;
    dcomplex dotyx;
    dcomplex alpha_d    = *alpha;
    dcomplex alphac_d   = *alpha;
    dcomplex beta_d     = *beta;
    dcomplex rho_d      = *rho;

    alphac_d.imag *= -1.0;

    bli_zdot( conj,
              n,
              x, incx,
              y, incy,
              &dotxy );

    bli_zdot( conj,
              n,
              y, incy,
              x, incx,
              &dotyx );

    rho->real = beta_d.real   * rho_d.real - beta_d.imag   * rho_d.imag +
                alpha_d.real  * dotxy.real - alpha_d.imag  * dotxy.imag +
                alphac_d.real * dotyx.real - alphac_d.imag * dotyx.imag; 
    rho->imag = beta_d.real   * rho_d.imag + beta_d.imag   * rho_d.real +
                alpha_d.real  * dotxy.imag + alpha_d.imag  * dotxy.real +
                alphac_d.real * dotyx.imag + alphac_d.imag * dotyx.real; 
}
void bli_zdot_in ( conj_t  conj,
int  n,
dcomplex x,
int  incx,
dcomplex y,
int  incy,
dcomplex rho 
)

References bli_is_conj(), dcomplex::imag, and dcomplex::real.

Referenced by bli_zdot().

{
    dcomplex* xip;
    dcomplex* yip;
    dcomplex  xi;
    dcomplex  yi;
    dcomplex  rho_temp;
    int       i;

    rho_temp.real = 0.0;
    rho_temp.imag = 0.0;
        
    xip = x;
    yip = y;
        
    if ( bli_is_conj( conj ) )
    {
        for ( i = 0; i < n; ++i )
        {
            xi.real = xip->real;
            xi.imag = xip->imag;
            yi.real = yip->real;
            yi.imag = yip->imag;
            
            rho_temp.real += xi.real * yi.real - -xi.imag * yi.imag;
            rho_temp.imag += xi.real * yi.imag + -xi.imag * yi.real;

            xip += incx;
            yip += incy;
        }
    }
    else // if ( !bli_is_conj( conj ) )
    {
        for ( i = 0; i < n; ++i )
        {
            xi.real = xip->real;
            xi.imag = xip->imag;
            yi.real = yip->real;
            yi.imag = yip->imag;
            
            rho_temp.real += xi.real * yi.real - xi.imag * yi.imag;
            rho_temp.imag += xi.real * yi.imag + xi.imag * yi.real;

            xip += incx;
            yip += incy;
        }
    }
    
    rho->real = rho_temp.real;
    rho->imag = rho_temp.imag;
}
void bli_zdots ( conj_t  conj,
int  n,
dcomplex alpha,
dcomplex x,
int  incx,
dcomplex y,
int  incy,
dcomplex beta,
dcomplex rho 
)

References bli_zdot(), dcomplex::imag, and dcomplex::real.

Referenced by FLA_Chol_l_opz_var1(), FLA_Chol_l_opz_var2(), FLA_Chol_u_opz_var1(), FLA_Chol_u_opz_var2(), FLA_Dotcs_external(), FLA_Dots_external(), FLA_Hess_UT_step_opz_var5(), FLA_LU_nopiv_opz_var1(), FLA_LU_nopiv_opz_var2(), FLA_LU_nopiv_opz_var3(), FLA_LU_nopiv_opz_var4(), FLA_LU_piv_opz_var3(), FLA_LU_piv_opz_var4(), FLA_Ttmm_l_opz_var2(), FLA_Ttmm_l_opz_var3(), FLA_Ttmm_u_opz_var2(), and FLA_Ttmm_u_opz_var3().

{
    dcomplex rho_orig = *rho;
    dcomplex dot_prod;

    bli_zdot( conj,
              n,
              x, incx,
              y, incy,
              &dot_prod );

    rho->real = beta->real  * rho_orig.real - beta->imag  * rho_orig.imag +
                alpha->real * dot_prod.real - alpha->imag * dot_prod.imag;
    rho->imag = beta->real  * rho_orig.imag + beta->imag  * rho_orig.real +
                alpha->real * dot_prod.imag + alpha->imag * dot_prod.real;
}
void bli_zdscal ( int  n,
double *  alpha,
dcomplex x,
int  incx 
)

References cblas_zdscal(), and F77_zdscal().

Referenced by bli_zdinvscalm(), bli_zdinvscalv(), bli_zdscalm(), bli_zdscalmr(), and bli_zdscalv().

{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
    cblas_zdscal( n,
                  *alpha,
                  x, incx );
#else
    F77_zdscal( &n,
                alpha,
                x, &incx );
#endif
}
void bli_zdscalm ( conj_t  conj,
int  m,
int  n,
double *  alpha,
dcomplex a,
int  a_rs,
int  a_cs 
)

References bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zdscal(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Scal_external(), and FLA_Scalc_external().

{
    double    alpha_conj;
    dcomplex* a_begin;
    int       lda, inca;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;
    if ( bli_deq1( alpha ) ) return;

    // Handle cases where A is a vector to ensure that the underlying axpy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for a vector.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, a_rs, a_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;

        // An optimization: if A is row-major, then let's access the matrix
        // by rows instead of by columns to increase spatial locality.
        if ( bli_is_row_storage( a_rs, a_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
        }
    }

    bli_dcopys( conj, alpha, &alpha_conj );

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;

        bli_zdscal( n_elem,
                    &alpha_conj,
                    a_begin, inca );
    }
}
void bli_zdscalmr ( uplo_t  uplo,
int  m,
int  n,
double *  alpha,
dcomplex a,
int  a_rs,
int  a_cs 
)

References bli_is_row_storage(), bli_is_upper(), bli_zdscal(), and bli_zero_dim2().

Referenced by bli_zher2k(), bli_zherk(), and FLA_Scalr_external().

{
    dcomplex* a_begin;
    int       lda, inca;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;
    if ( bli_deq1( alpha ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;

    // An optimization: if A is row-major, then let's access the matrix
    // by rows instead of by columns to increase spatial locality.
    if ( bli_is_row_storage( a_rs, a_cs ) )
    {
        bli_swap_ints( n_iter, n_elem_max );
        bli_swap_ints( lda, inca );
        bli_toggle_uplo( uplo );
    }
    
    if ( bli_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;

            bli_zdscal( n_elem,
                        alpha,
                        a_begin, inca );
        }
    }
    else // if ( bli_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;

            if ( n_elem <= 0 ) break;

            bli_zdscal( n_elem,
                        alpha,
                        a_begin, inca );
        }
    }
}
void bli_zdscalv ( conj_t  conj,
int  n,
double *  alpha,
dcomplex x,
int  incx 
)

References bli_zdscal(), and bli_zero_dim1().

Referenced by bli_zdapdiagmv(), FLA_Bsvd_v_opz_var1(), and FLA_Bsvd_v_opz_var2().

{
    // Return early if possible.
    if ( bli_zero_dim1( n ) ) return;
    if ( bli_deq1( alpha ) ) return;

    bli_zdscal( n,
                alpha,
                x, incx );
}
void bli_zfnorm ( int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
double *  norm 
)

References bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), BLIS_NO_TRANSPOSE, dcomplex::imag, and dcomplex::real.

Referenced by FLA_Norm_frob().

{
    dcomplex* a_ij;
    double    sum;
    int       lda, inca;
    int       n_iter;
    int       n_elem;
    int       i, j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A is a vector separately.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, a_rs, a_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        
        // An optimization: if A is row-major, then let's access the matrix by
        // rows instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( a_rs, a_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
        }
    }

    // Initialize the accumulator variable.
    sum = 0.0;

    for ( j = 0; j < n_iter; j++ )
    {
        for ( i = 0; i < n_elem; i++ )
        {
            a_ij = a + i*inca + j*lda;
            sum += a_ij->real * a_ij->real + a_ij->imag * a_ij->imag;
        }
    }
    
    // Compute the norm and store the result.
    *norm = sqrt( sum );
}
void bli_zinvscalm ( conj_t  conj,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs 
)

References bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), bli_zinvert2s(), bli_zscal(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Inv_scal_external(), and FLA_Inv_scalc_external().

{
    dcomplex  alpha_inv;
    dcomplex* a_begin;
    int       lda, inca;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;
    if ( bli_zeq1( alpha ) ) return;

    // Handle cases where A is a vector to ensure that the underlying axpy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for a vector.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, a_rs, a_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;

        // An optimization: if A is row-major, then let's access the matrix
        // by rows instead of by columns to increase spatial locality.
        if ( bli_is_row_storage( a_rs, a_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
        }
    }

    bli_zinvert2s( conj, alpha, &alpha_inv );

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;

        bli_zscal( n_elem,
                   &alpha_inv,
                   a_begin, inca );
    }
}
void bli_zinvscalv ( conj_t  conj,
int  n,
dcomplex alpha,
dcomplex x,
int  incx 
)
void bli_znrm2 ( int  n,
dcomplex x,
int  incx,
double *  norm 
)

References cblas_dznrm2(), and F77_dznrm2().

Referenced by FLA_Househ2_UT_l_opz(), FLA_Househ2s_UT_l_opz(), FLA_Househ3UD_UT_opz(), and FLA_Nrm2_external().

{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
    *norm = cblas_dznrm2( n,
                          x, incx );
#else
    *norm = F77_dznrm2( &n,
                        x, &incx );
#endif
}
void bli_zscal ( int  n,
dcomplex alpha,
dcomplex x,
int  incx 
)

References cblas_zscal(), and F77_zscal().

Referenced by bli_zaxpysmt(), bli_zaxpysv(), bli_zinvscalm(), bli_zinvscalv(), bli_zscalm(), bli_zscalmr(), bli_zscalv(), and FLA_SA_LU_unb().

{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
    cblas_zscal( n,
                 alpha,
                 x, incx );
#else
    F77_zscal( &n,
               alpha,
               x, &incx );
#endif
}
void bli_zscalm ( conj_t  conj,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs 
)

References bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), bli_zscal(), and BLIS_NO_TRANSPOSE.

Referenced by bli_zgemm(), bli_zhemm(), bli_zsymm(), bli_ztrmmsx(), bli_ztrsmsx(), FLA_Lyap_h_opz_var1(), FLA_Lyap_h_opz_var2(), FLA_Lyap_h_opz_var3(), FLA_Lyap_h_opz_var4(), FLA_Lyap_n_opz_var1(), FLA_Lyap_n_opz_var2(), FLA_Lyap_n_opz_var3(), FLA_Lyap_n_opz_var4(), FLA_Scal_external(), and FLA_Scalc_external().

{
    dcomplex  alpha_conj;
    dcomplex* a_begin;
    int       lda, inca;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;
    if ( bli_zeq1( alpha ) ) return;

    // Handle cases where A is a vector to ensure that the underlying axpy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for a vector.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, a_rs, a_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;

        // An optimization: if A is row-major, then let's access the matrix
        // by rows instead of by columns to increase spatial locality.
        if ( bli_is_row_storage( a_rs, a_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
        }
    }

    bli_zcopys( conj, alpha, &alpha_conj );

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;

        bli_zscal( n_elem,
                   &alpha_conj,
                   a_begin, inca );
    }
}
void bli_zscalmr ( uplo_t  uplo,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs 
)

References bli_is_row_storage(), bli_is_upper(), bli_zero_dim2(), and bli_zscal().

Referenced by FLA_Scalr_external().

{
    dcomplex* a_begin;
    int       lda, inca;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;
    if ( bli_zeq1( alpha ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;

    // An optimization: if A is row-major, then let's access the matrix
    // by rows instead of by columns to increase spatial locality.
    if ( bli_is_row_storage( a_rs, a_cs ) )
    {
        bli_swap_ints( n_iter, n_elem_max );
        bli_swap_ints( lda, inca );
        bli_toggle_uplo( uplo );
    }
    
    if ( bli_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;

            bli_zscal( n_elem,
                       alpha,
                       a_begin, inca );
        }
    }
    else // if ( bli_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;

            if ( n_elem <= 0 ) break;

            bli_zscal( n_elem,
                       alpha,
                       a_begin, inca );
        }
    }
}
void bli_zscalv ( conj_t  conj,
int  n,
dcomplex alpha,
dcomplex x,
int  incx 
)
void bli_zscopymr ( uplo_t  uplo,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bli_is_row_storage(), bli_is_upper(), bli_zero_dim2(), bli_zscopyv(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Copyr_external().

{
    dcomplex* a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if B is row-major, then let's access the matrix
    // by rows instead of by columns for increased spatial locality.
    if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        bli_swap_ints( n_iter, n_elem_max );
        bli_swap_ints( lda, inca );
        bli_swap_ints( ldb, incb );
        bli_toggle_uplo( uplo );
    }
    
    
    if ( bli_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bli_zscopyv( BLIS_NO_TRANSPOSE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
    else // if ( bli_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bli_zscopyv( BLIS_NO_TRANSPOSE,
                         n_elem,
                         a_begin, inca, 
                         b_begin, incb );
        }
    }
}
void bli_zscopymrt ( uplo_t  uplo,
trans_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), bli_zero_dim2(), and bli_zscopyv().

Referenced by FLA_Copyrt_external().

{
    dcomplex* a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bli_is_col_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bli_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bli_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bli_does_trans( trans ) )
    {
        bli_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bli_zscopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bli_zscopyv( conj,
                         n_elem,
                         a_begin, inca,
                         b_begin, incb );
        }
    }
}
void bli_zscopymt ( trans_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)

References bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_vector_dim(), bli_vector_inc(), bli_zero_dim2(), bli_zscopyv(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Copy_external(), and FLA_Copyt_external().

{
    dcomplex* a_begin;
    float*    b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
            bli_swap_ints( ldb, incb );
        }
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bli_zscopyv( conj,
                     n_elem,
                     a_begin, inca,
                     b_begin, incb );
    }
}
void bli_zscopyv ( conj_t  conj,
int  m,
dcomplex x,
int  incx,
float *  y,
int  incy 
)

References bli_zero_dim1(), and dcomplex::real.

Referenced by bli_zscopymr(), bli_zscopymrt(), and bli_zscopymt().

{
    dcomplex* chi;
    float*    psi;
    int       i;

    // Return early if possible.
    if ( bli_zero_dim1( m ) ) return;

    // Initialize pointers.
    chi = x;
    psi = y;

    for ( i = 0; i < m; ++i )
    {
        *psi = chi->real;

        chi += incx;
        psi += incy;
    }
}
void bli_zswap ( int  n,
dcomplex x,
int  incx,
dcomplex y,
int  incy 
)

References cblas_zswap(), and F77_zswap().

Referenced by bli_zswapmt(), bli_zswapv(), FLA_SA_Apply_pivots(), and FLA_SA_LU_unb().

{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
    cblas_zswap( n,
                 x, incx, 
                 y, incy );
#else
    F77_zswap( &n,
               x, &incx, 
               y, &incy );
#endif
}
void bli_zswapmt ( trans_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bli_does_conj(), bli_does_notrans(), bli_does_trans(), bli_is_col_storage(), bli_is_row_storage(), bli_is_vector(), bli_vector_dim(), bli_vector_inc(), bli_zconjv(), bli_zero_dim2(), bli_zswap(), and BLIS_NO_TRANSPOSE.

Referenced by FLA_Swap_external(), and FLA_Swapt_external().

{
    dcomplex* a_begin;
    dcomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major and if A is effectively row-major
        // after a possible transposition, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            if ( ( bli_is_col_storage( a_rs, a_cs ) && bli_does_trans( trans ) ) ||
                 ( bli_is_row_storage( a_rs, a_cs ) && bli_does_notrans( trans ) ) )
            {
                bli_swap_ints( n_iter, n_elem );
                bli_swap_ints( lda, inca );
                bli_swap_ints( ldb, incb );
            }
        }
    }

    for ( j = 0; j < n_iter; j++ )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;
        
        bli_zswap( n_elem,
                   a_begin, inca, 
                   b_begin, incb );

        if ( bli_does_conj( trans ) )
            bli_zconjv( n_elem,
                        a_begin, inca );

        if ( bli_does_conj( trans ) )
            bli_zconjv( n_elem,
                        b_begin, incb );
    }
}
void bli_zswapv ( int  n,
dcomplex x,
int  incx,
dcomplex y,
int  incy 
)

References bli_zero_dim1(), and bli_zswap().

Referenced by FLA_Apply_pivots_macro_external(), FLA_Sort_evd_b_opz(), FLA_Sort_evd_f_opz(), FLA_Sort_svd_b_opz(), and FLA_Sort_svd_f_opz().

{
    // Return early if possible.
    if ( bli_zero_dim1( n ) ) return;

    bli_zswap( n,
               x, incx, 
               y, incy );
}
void bli_zzcopymr ( uplo_t  uplo,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bli_is_row_storage(), bli_is_upper(), bli_zcopyv(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

{
    dcomplex* a_begin;
    dcomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem_max;
    int       n_elem;
    int       j;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // We initialize for column-major.
    n_iter     = n;
    n_elem_max = m;
    lda        = a_cs;
    inca       = a_rs;
    ldb        = b_cs;
    incb       = b_rs;

    // An optimization: if B is row-major, then let's access the matrix
    // by rows instead of by columns for increased spatial locality.
    if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        bli_swap_ints( n_iter, n_elem_max );
        bli_swap_ints( lda, inca );
        bli_swap_ints( ldb, incb );
        bli_toggle_uplo( uplo );
    }
    
    
    if ( bli_is_upper( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;

            bli_zcopyv( BLIS_NO_TRANSPOSE,
                        n_elem,
                        a_begin, inca, 
                        b_begin, incb );
        }
    }
    else // if ( bli_is_lower( uplo ) )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_max( 0, n_elem_max - j );
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;

            if ( n_elem <= 0 ) break;

            bli_zcopyv( BLIS_NO_TRANSPOSE,
                        n_elem,
                        a_begin, inca, 
                        b_begin, incb );
        }
    }
}
void bli_zzcopymrt ( uplo_t  uplo,
trans_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bli_does_trans(), bli_is_col_storage(), bli_is_lower(), bli_proj_trans_to_conj(), bli_zcopyv(), and bli_zero_dim2().

{
    dcomplex* a_begin;
    dcomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       n_elem_max;
    int       n_elem_is_descending;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Initialize variables based on storage format of B and value of uplo.
    if      ( bli_is_col_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = m;
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = TRUE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = n;
            n_elem_max = bli_min( m, n );
            lda        = a_cs;
            inca       = a_rs;
            ldb        = b_cs;
            incb       = b_rs;
            n_elem_is_descending = FALSE;
        }
    }
    else // if ( bli_is_row_storage( b_rs, b_cs ) )
    {
        if ( bli_is_lower( uplo ) )
        {
            n_iter     = m;
            n_elem_max = bli_min( m, n );
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = FALSE;
        }
        else // if ( bli_is_upper( uplo ) )
        {
            n_iter     = bli_min( m, n );
            n_elem_max = n;
            lda        = a_rs;
            inca       = a_cs;
            ldb        = b_rs;
            incb       = b_cs;
            n_elem_is_descending = TRUE;
        }
    }

    // Swap lda and inca if we're doing a transpose.
    if ( bli_does_trans( trans ) )
    {
        bli_swap_ints( lda, inca );
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    // Choose the loop based on whether n_elem will be shrinking or growing
    // with each iteration.
    if ( n_elem_is_descending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = n_elem_max - j;
            a_begin = a + j*lda + j*inca;
            b_begin = b + j*ldb + j*incb;
        
            bli_zcopyv( conj,
                        n_elem,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
    else // if ( n_elem_is_ascending )
    {
        for ( j = 0; j < n_iter; j++ )
        {
            n_elem  = bli_min( j + 1, n_elem_max );
            a_begin = a + j*lda;
            b_begin = b + j*ldb;
        
            bli_zcopyv( conj,
                        n_elem,
                        a_begin, inca,
                        b_begin, incb );
        }
    }
}
void bli_zzcopymt ( trans_t  trans,
int  m,
int  n,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)

References bli_does_trans(), bli_is_row_storage(), bli_is_vector(), bli_proj_trans_to_conj(), bli_vector_dim(), bli_vector_inc(), bli_zcopyv(), bli_zero_dim2(), and BLIS_NO_TRANSPOSE.

{
    dcomplex* a_begin;
    dcomplex* b_begin;
    int       lda, inca;
    int       ldb, incb;
    int       n_iter;
    int       n_elem;
    int       j;
    conj_t    conj;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) ) return;

    // Handle cases where A and B are vectors to ensure that the underlying copy
    // gets invoked only once.
    if ( bli_is_vector( m, n ) )
    {
        // Initialize with values appropriate for vectors.
        n_iter = 1;
        n_elem = bli_vector_dim( m, n );
        lda    = 1; // multiplied by zero when n_iter == 1; not needed.
        inca   = bli_vector_inc( trans,             m, n, a_rs, a_cs );
        ldb    = 1; // multiplied by zero when n_iter == 1; not needed.
        incb   = bli_vector_inc( BLIS_NO_TRANSPOSE, m, n, b_rs, b_cs );
    }
    else // matrix case
    {
        // Initialize with optimal values for column-major storage of B.
        n_iter = n;
        n_elem = m;
        lda    = a_cs;
        inca   = a_rs;
        ldb    = b_cs;
        incb   = b_rs;
        
        // Handle the transposition of A.
        if ( bli_does_trans( trans ) )
        {
            bli_swap_ints( lda, inca );
        }

        // An optimization: if B is row-major, then let's access the matrix by rows
        // instead of by columns for increased spatial locality.
        if ( bli_is_row_storage( b_rs, b_cs ) )
        {
            bli_swap_ints( n_iter, n_elem );
            bli_swap_ints( lda, inca );
            bli_swap_ints( ldb, incb );
        }
    }

    // Extract conj component from trans parameter.
    conj = bli_proj_trans_to_conj( trans );

    for ( j = 0; j < n_iter; ++j )
    {
        a_begin = a + j*lda;
        b_begin = b + j*ldb;

        bli_zcopyv( conj,
                    n_elem,
                    a_begin, inca,
                    b_begin, incb );
    }
}