libflame  revision_anchor
Functions
bli_gemv.c File Reference

(r)

Functions

void bli_sgemv (trans_t transa, conj_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
void bli_dgemv (trans_t transa, conj_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
void bli_cgemv (trans_t transa, conj_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
void bli_zgemv (trans_t transa, conj_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
void bli_sgemv_blas (trans_t transa, int m, int n, float *alpha, float *a, int lda, float *x, int incx, float *beta, float *y, int incy)
void bli_dgemv_blas (trans_t transa, int m, int n, double *alpha, double *a, int lda, double *x, int incx, double *beta, double *y, int incy)
void bli_cgemv_blas (trans_t transa, int m, int n, scomplex *alpha, scomplex *a, int lda, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
void bli_zgemv_blas (trans_t transa, int m, int n, dcomplex *alpha, dcomplex *a, int lda, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)

Function Documentation

void bli_cgemv ( trans_t  transa,
conj_t  conjx,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex x,
int  incx,
scomplex beta,
scomplex y,
int  incy 
)

References bli_c0(), bli_c1(), bli_callocv(), bli_caxpyv(), bli_cconjv(), bli_ccopyv(), bli_ccreate_contigm(), bli_cfree(), bli_cfree_contigm(), bli_cgemv_blas(), bli_cscalv(), bli_does_trans(), bli_is_conj(), bli_is_conjnotrans(), bli_is_row_storage(), bli_zero_dim2(), BLIS_CONJUGATE, BLIS_NO_CONJUGATE, and BLIS_NO_TRANSPOSE.

Referenced by FLA_Accum_T_UT_fc_opc_var1(), FLA_Accum_T_UT_fr_opc_var1(), FLA_Apply_H2_UT_l_opc_var1(), FLA_Apply_H2_UT_r_opc_var1(), FLA_Apply_HUD_UT_l_opc_var1(), FLA_Bidiag_UT_u_step_ofc_var2(), FLA_Bidiag_UT_u_step_ofc_var3(), FLA_Bidiag_UT_u_step_ofc_var4(), FLA_Bidiag_UT_u_step_opc_var1(), FLA_Bidiag_UT_u_step_opc_var2(), FLA_Bidiag_UT_u_step_opc_var3(), FLA_Bidiag_UT_u_step_opc_var4(), FLA_Bidiag_UT_u_step_opc_var5(), FLA_CAQR2_UT_opc_var1(), FLA_Chol_l_opc_var2(), FLA_Chol_u_opc_var2(), FLA_Eig_gest_il_opc_var2(), FLA_Eig_gest_il_opc_var3(), FLA_Eig_gest_iu_opc_var2(), FLA_Eig_gest_iu_opc_var3(), FLA_Eig_gest_nl_opc_var2(), FLA_Eig_gest_nu_opc_var2(), FLA_Gemv_external(), FLA_Gemvc_external(), FLA_Hess_UT_step_ofc_var2(), FLA_Hess_UT_step_ofc_var3(), FLA_Hess_UT_step_ofc_var4(), FLA_Hess_UT_step_opc_var1(), FLA_Hess_UT_step_opc_var2(), FLA_Hess_UT_step_opc_var3(), FLA_Hess_UT_step_opc_var4(), FLA_Hess_UT_step_opc_var5(), FLA_LQ_UT_opc_var2(), FLA_LU_nopiv_opc_var2(), FLA_LU_nopiv_opc_var3(), FLA_LU_nopiv_opc_var4(), FLA_LU_piv_opc_var3(), FLA_LU_piv_opc_var4(), FLA_Lyap_h_opc_var2(), FLA_Lyap_h_opc_var3(), FLA_Lyap_n_opc_var2(), FLA_Lyap_n_opc_var3(), FLA_QR2_UT_opc_var1(), FLA_QR_UT_opc_var2(), FLA_Tridiag_UT_l_step_ofc_var2(), FLA_Tridiag_UT_l_step_ofc_var3(), FLA_Tridiag_UT_l_step_opc_var1(), FLA_Tridiag_UT_l_step_opc_var2(), FLA_Tridiag_UT_l_step_opc_var3(), FLA_Ttmm_l_opc_var2(), and FLA_Ttmm_u_opc_var2().

{
    scomplex* a_save    = a;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    scomplex  zero = bli_c0();
    scomplex  one  = bli_c1();
    scomplex* x_conj;
    scomplex* ax;
    int       lda, inca;
    int       n_x;
    int       incx_conj;
    int       incax;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) )
    {
        int n_elem;

        if ( bli_does_trans( transa ) ) n_elem = n;
        else                            n_elem = m;

        bli_cscalv( BLIS_NO_CONJUGATE,
                    n_elem,
                    beta,
                    y, incy );
        return;
    }

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of the matrix rather than the original matrix.
    bli_ccreate_contigm( m,
                         n,
                         a_save, a_rs_save, a_cs_save,
                         &a,     &a_rs,     &a_cs );

    // Initialize with values assuming column-major storage.
    lda  = a_cs;
    inca = a_rs;

    // If A is a row-major matrix, then we can use the underlying column-major
    // BLAS implementation by fiddling with the parameters.
    if ( bli_is_row_storage( a_rs, a_cs ) )
    {
        bli_swap_ints( m, n );
        bli_swap_ints( lda, inca );
        bli_toggle_trans( transa );
    }

    // Initialize with values assuming no conjugation of x.
    x_conj    = x;
    incx_conj = incx;

    // We need a temporary vector for the cases when x is conjugated, and
    // also for the cases where A is conjugated.
    if ( bli_is_conj( conjx ) || bli_is_conjnotrans( transa ) )
    {
        if ( bli_does_trans( transa ) ) n_x = m;
        else                            n_x = n;

        x_conj    = bli_callocv( n_x );
        incx_conj = 1;

        bli_ccopyv( conjx,
                    n_x,
                    x,      incx,
                    x_conj, incx_conj );
    }

    // We want to handle the conjnotrans case, but without explicitly
    // conjugating A. To do so, we leverage the fact that computing the
    // product conj(A) * x is equivalent to computing conj( A * conj(x) ).
    if ( bli_is_conjnotrans( transa ) )
    {
        // We need a temporary vector for the product A * conj(x), which is
        // conformal to y. We know we are not transposing, so y is length m.
        ax    = bli_callocv( m );
        incax = 1;
        
        // Start by conjugating the contents of the temporary copy of x.
        bli_cconjv( n,
                    x_conj, incx_conj );

        // Compute A * conj(x) where x is the temporary copy of x created above.
        bli_cgemv_blas( BLIS_NO_TRANSPOSE,
                        m,
                        n,
                        &one,
                        a,      lda,
                        x_conj, incx_conj,
                        &zero,
                        ax, incax );

        // Scale y by beta.
        bli_cscalv( BLIS_NO_CONJUGATE,
                    m,
                    beta,
                    y, incy );

        // And finally, accumulate alpha * conj( A * conj(x) ) into y.
        bli_caxpyv( BLIS_CONJUGATE,
                    m,
                    alpha,
                    ax, incax,
                    y,  incy);

        // Free the temporary vector for Ax.
        bli_cfree( ax );
    }
    else // notrans, trans, or conjtrans
    {
        bli_cgemv_blas( transa,
                        m,
                        n,
                        alpha,
                        a,      lda,
                        x_conj, incx_conj,
                        beta,
                        y, incy );
    }

    // Free the temporary conjugated x vector.
    if ( bli_is_conj( conjx ) || bli_is_conjnotrans( transa ) )
        bli_cfree( x_conj );

    // Free the temporary contiguous matrix.
    bli_cfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );
}
void bli_cgemv_blas ( trans_t  transa,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  lda,
scomplex x,
int  incx,
scomplex beta,
scomplex y,
int  incy 
)

References bli_param_map_to_netlib_trans(), cblas_cgemv(), CblasColMajor, and F77_cgemv().

Referenced by bli_cgemv().

{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
    enum CBLAS_ORDER     cblas_order = CblasColMajor;
    enum CBLAS_TRANSPOSE cblas_transa;

    bli_param_map_to_netlib_trans( transa, &cblas_transa );

    cblas_cgemv( cblas_order,
                 cblas_transa,
                 m,
                 n,
                 alpha,
                 a, lda,
                 x, incx,
                 beta,
                 y, incy );
#else
    char blas_transa;

    bli_param_map_to_netlib_trans( transa, &blas_transa );

    F77_cgemv( &blas_transa,
               &m,
               &n,
               alpha,
               a, &lda,
               x, &incx,
               beta,
               y, &incy );
#endif
}
void bli_dgemv ( trans_t  transa,
conj_t  conjx,
int  m,
int  n,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs,
double *  x,
int  incx,
double *  beta,
double *  y,
int  incy 
)

References bli_dcreate_contigm(), bli_dfree_contigm(), bli_dgemv_blas(), bli_does_trans(), bli_dscalv(), bli_is_row_storage(), bli_zero_dim2(), and BLIS_NO_CONJUGATE.

Referenced by FLA_Accum_T_UT_fc_opd_var1(), FLA_Accum_T_UT_fr_opd_var1(), FLA_Apply_H2_UT_l_opd_var1(), FLA_Apply_H2_UT_r_opd_var1(), FLA_Apply_HUD_UT_l_opd_var1(), FLA_Bidiag_UT_u_step_ofd_var2(), FLA_Bidiag_UT_u_step_ofd_var3(), FLA_Bidiag_UT_u_step_ofd_var4(), FLA_Bidiag_UT_u_step_opd_var1(), FLA_Bidiag_UT_u_step_opd_var2(), FLA_Bidiag_UT_u_step_opd_var3(), FLA_Bidiag_UT_u_step_opd_var4(), FLA_Bidiag_UT_u_step_opd_var5(), FLA_CAQR2_UT_opd_var1(), FLA_Chol_l_opd_var2(), FLA_Chol_u_opd_var2(), FLA_Eig_gest_il_opd_var2(), FLA_Eig_gest_il_opd_var3(), FLA_Eig_gest_iu_opd_var2(), FLA_Eig_gest_iu_opd_var3(), FLA_Eig_gest_nl_opd_var2(), FLA_Eig_gest_nu_opd_var2(), FLA_Gemv_external(), FLA_Gemvc_external(), FLA_Hess_UT_step_ofd_var2(), FLA_Hess_UT_step_ofd_var3(), FLA_Hess_UT_step_ofd_var4(), FLA_Hess_UT_step_opd_var1(), FLA_Hess_UT_step_opd_var2(), FLA_Hess_UT_step_opd_var3(), FLA_Hess_UT_step_opd_var4(), FLA_Hess_UT_step_opd_var5(), FLA_LQ_UT_opd_var2(), FLA_LU_nopiv_opd_var2(), FLA_LU_nopiv_opd_var3(), FLA_LU_nopiv_opd_var4(), FLA_LU_piv_opd_var3(), FLA_LU_piv_opd_var4(), FLA_Lyap_h_opd_var2(), FLA_Lyap_h_opd_var3(), FLA_Lyap_n_opd_var2(), FLA_Lyap_n_opd_var3(), FLA_QR2_UT_opd_var1(), FLA_QR_UT_opd_var2(), FLA_Tridiag_UT_l_step_ofd_var2(), FLA_Tridiag_UT_l_step_ofd_var3(), FLA_Tridiag_UT_l_step_opd_var1(), FLA_Tridiag_UT_l_step_opd_var2(), FLA_Tridiag_UT_l_step_opd_var3(), FLA_Ttmm_l_opd_var2(), and FLA_Ttmm_u_opd_var2().

{
    double*   a_save    = a;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       lda, inca;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) )
    {
        int n_elem;

        if ( bli_does_trans( transa ) ) n_elem = n;
        else                            n_elem = m;

        bli_dscalv( BLIS_NO_CONJUGATE,
                    n_elem,
                    beta,
                    y, incy );
        return;
    }

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of the matrix rather than the original matrix.
    bli_dcreate_contigm( m,
                         n,
                         a_save, a_rs_save, a_cs_save,
                         &a,     &a_rs,     &a_cs );

    // Initialize with values assuming column-major storage.
    lda  = a_cs;
    inca = a_rs;

    // If A is a row-major matrix, then we can use the underlying column-major
    // BLAS implementation by fiddling with the parameters.
    if ( bli_is_row_storage( a_rs, a_cs ) )
    {
        bli_swap_ints( m, n );
        bli_swap_ints( lda, inca );
        bli_toggle_trans( transa );
    }

    bli_dgemv_blas( transa,
                    m,
                    n,
                    alpha,
                    a, lda,
                    x, incx,
                    beta,
                    y, incy );

    // Free the temporary contiguous matrix.
    bli_dfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );
}
void bli_dgemv_blas ( trans_t  transa,
int  m,
int  n,
double *  alpha,
double *  a,
int  lda,
double *  x,
int  incx,
double *  beta,
double *  y,
int  incy 
)

References bli_param_map_to_netlib_trans(), cblas_dgemv(), CblasColMajor, and F77_dgemv().

Referenced by bli_dgemv().

{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
    enum CBLAS_ORDER     cblas_order = CblasColMajor;
    enum CBLAS_TRANSPOSE cblas_transa;

    bli_param_map_to_netlib_trans( transa, &cblas_transa );

    cblas_dgemv( cblas_order,
                 cblas_transa,
                 m,
                 n,
                 *alpha,
                 a, lda,
                 x, incx,
                 *beta,
                 y, incy );
#else
    char blas_transa;

    bli_param_map_to_netlib_trans( transa, &blas_transa );

    F77_dgemv( &blas_transa,
               &m,
               &n,
               alpha,
               a, &lda,
               x, &incx,
               beta,
               y, &incy );
#endif
}
void bli_sgemv ( trans_t  transa,
conj_t  conjx,
int  m,
int  n,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs,
float *  x,
int  incx,
float *  beta,
float *  y,
int  incy 
)

References bli_does_trans(), bli_is_row_storage(), bli_screate_contigm(), bli_sfree_contigm(), bli_sgemv_blas(), bli_sscalv(), bli_zero_dim2(), and BLIS_NO_CONJUGATE.

Referenced by FLA_Accum_T_UT_fc_ops_var1(), FLA_Accum_T_UT_fr_ops_var1(), FLA_Apply_H2_UT_l_ops_var1(), FLA_Apply_H2_UT_r_ops_var1(), FLA_Apply_HUD_UT_l_ops_var1(), FLA_Bidiag_UT_u_step_ofs_var2(), FLA_Bidiag_UT_u_step_ofs_var3(), FLA_Bidiag_UT_u_step_ofs_var4(), FLA_Bidiag_UT_u_step_ops_var1(), FLA_Bidiag_UT_u_step_ops_var2(), FLA_Bidiag_UT_u_step_ops_var3(), FLA_Bidiag_UT_u_step_ops_var4(), FLA_Bidiag_UT_u_step_ops_var5(), FLA_CAQR2_UT_ops_var1(), FLA_Chol_l_ops_var2(), FLA_Chol_u_ops_var2(), FLA_Eig_gest_il_ops_var2(), FLA_Eig_gest_il_ops_var3(), FLA_Eig_gest_iu_ops_var2(), FLA_Eig_gest_iu_ops_var3(), FLA_Eig_gest_nl_ops_var2(), FLA_Eig_gest_nu_ops_var2(), FLA_Gemv_external(), FLA_Gemvc_external(), FLA_Hess_UT_step_ofs_var2(), FLA_Hess_UT_step_ofs_var3(), FLA_Hess_UT_step_ofs_var4(), FLA_Hess_UT_step_ops_var1(), FLA_Hess_UT_step_ops_var2(), FLA_Hess_UT_step_ops_var3(), FLA_Hess_UT_step_ops_var4(), FLA_Hess_UT_step_ops_var5(), FLA_LQ_UT_ops_var2(), FLA_LU_nopiv_ops_var2(), FLA_LU_nopiv_ops_var3(), FLA_LU_nopiv_ops_var4(), FLA_LU_piv_ops_var3(), FLA_LU_piv_ops_var4(), FLA_Lyap_h_ops_var2(), FLA_Lyap_h_ops_var3(), FLA_Lyap_n_ops_var2(), FLA_Lyap_n_ops_var3(), FLA_QR2_UT_ops_var1(), FLA_QR_UT_ops_var2(), FLA_Tridiag_UT_l_step_ofs_var2(), FLA_Tridiag_UT_l_step_ofs_var3(), FLA_Tridiag_UT_l_step_ops_var1(), FLA_Tridiag_UT_l_step_ops_var2(), FLA_Tridiag_UT_l_step_ops_var3(), FLA_Ttmm_l_ops_var2(), and FLA_Ttmm_u_ops_var2().

{
    float*    a_save    = a;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    int       lda, inca;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) )
    {
        int n_elem;

        if ( bli_does_trans( transa ) ) n_elem = n;
        else                            n_elem = m;

        bli_sscalv( BLIS_NO_CONJUGATE,
                    n_elem,
                    beta,
                    y, incy );
        return;
    }

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of the matrix rather than the original matrix.
    bli_screate_contigm( m,
                         n,
                         a_save, a_rs_save, a_cs_save,
                         &a,     &a_rs,     &a_cs );

    // Initialize with values assuming column-major storage.
    lda  = a_cs;
    inca = a_rs;

    // If A is a row-major matrix, then we can use the underlying column-major
    // BLAS implementation by fiddling with the parameters.
    if ( bli_is_row_storage( a_rs, a_cs ) )
    {
        bli_swap_ints( m, n );
        bli_swap_ints( lda, inca );
        bli_toggle_trans( transa );
    }

    bli_sgemv_blas( transa,
                    m,
                    n,
                    alpha,
                    a, lda,
                    x, incx,
                    beta,
                    y, incy );

    // Free the temporary contiguous matrix.
    bli_sfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );
}
void bli_sgemv_blas ( trans_t  transa,
int  m,
int  n,
float *  alpha,
float *  a,
int  lda,
float *  x,
int  incx,
float *  beta,
float *  y,
int  incy 
)

References bli_param_map_to_netlib_trans(), cblas_sgemv(), CblasColMajor, and F77_sgemv().

Referenced by bli_sgemv().

{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
    enum CBLAS_ORDER     cblas_order = CblasColMajor;
    enum CBLAS_TRANSPOSE cblas_transa;

    bli_param_map_to_netlib_trans( transa, &cblas_transa );

    cblas_sgemv( cblas_order,
                 cblas_transa,
                 m,
                 n,
                 *alpha,
                 a, lda,
                 x, incx,
                 *beta,
                 y, incy );
#else
    char blas_transa;

    bli_param_map_to_netlib_trans( transa, &blas_transa );

    F77_sgemv( &blas_transa,
               &m,
               &n,
               alpha,
               a, &lda,
               x, &incx,
               beta,
               y, &incy );
#endif
}
void bli_zgemv ( trans_t  transa,
conj_t  conjx,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex x,
int  incx,
dcomplex beta,
dcomplex y,
int  incy 
)

References bli_does_trans(), bli_is_conj(), bli_is_conjnotrans(), bli_is_row_storage(), bli_z0(), bli_z1(), bli_zallocv(), bli_zaxpyv(), bli_zconjv(), bli_zcopyv(), bli_zcreate_contigm(), bli_zero_dim2(), bli_zfree(), bli_zfree_contigm(), bli_zgemv_blas(), bli_zscalv(), BLIS_CONJUGATE, BLIS_NO_CONJUGATE, and BLIS_NO_TRANSPOSE.

Referenced by FLA_Accum_T_UT_fc_opz_var1(), FLA_Accum_T_UT_fr_opz_var1(), FLA_Apply_H2_UT_l_opz_var1(), FLA_Apply_H2_UT_r_opz_var1(), FLA_Apply_HUD_UT_l_opz_var1(), FLA_Bidiag_UT_u_step_ofz_var2(), FLA_Bidiag_UT_u_step_ofz_var3(), FLA_Bidiag_UT_u_step_ofz_var4(), FLA_Bidiag_UT_u_step_opz_var1(), FLA_Bidiag_UT_u_step_opz_var2(), FLA_Bidiag_UT_u_step_opz_var3(), FLA_Bidiag_UT_u_step_opz_var4(), FLA_Bidiag_UT_u_step_opz_var5(), FLA_CAQR2_UT_opz_var1(), FLA_Chol_l_opz_var2(), FLA_Chol_u_opz_var2(), FLA_Eig_gest_il_opz_var2(), FLA_Eig_gest_il_opz_var3(), FLA_Eig_gest_iu_opz_var2(), FLA_Eig_gest_iu_opz_var3(), FLA_Eig_gest_nl_opz_var2(), FLA_Eig_gest_nu_opz_var2(), FLA_Gemv_external(), FLA_Gemvc_external(), FLA_Hess_UT_step_ofz_var2(), FLA_Hess_UT_step_ofz_var3(), FLA_Hess_UT_step_ofz_var4(), FLA_Hess_UT_step_opz_var1(), FLA_Hess_UT_step_opz_var2(), FLA_Hess_UT_step_opz_var3(), FLA_Hess_UT_step_opz_var4(), FLA_Hess_UT_step_opz_var5(), FLA_LQ_UT_opz_var2(), FLA_LU_nopiv_opz_var2(), FLA_LU_nopiv_opz_var3(), FLA_LU_nopiv_opz_var4(), FLA_LU_piv_opz_var3(), FLA_LU_piv_opz_var4(), FLA_Lyap_h_opz_var2(), FLA_Lyap_h_opz_var3(), FLA_Lyap_n_opz_var2(), FLA_Lyap_n_opz_var3(), FLA_QR2_UT_opz_var1(), FLA_QR_UT_opz_var2(), FLA_Tridiag_UT_l_step_ofz_var2(), FLA_Tridiag_UT_l_step_ofz_var3(), FLA_Tridiag_UT_l_step_opz_var1(), FLA_Tridiag_UT_l_step_opz_var2(), FLA_Tridiag_UT_l_step_opz_var3(), FLA_Ttmm_l_opz_var2(), and FLA_Ttmm_u_opz_var2().

{
    dcomplex* a_save    = a;
    int       a_rs_save = a_rs;
    int       a_cs_save = a_cs;
    dcomplex  zero = bli_z0();
    dcomplex  one  = bli_z1();
    dcomplex* x_conj;
    dcomplex* ax;
    int       lda, inca;
    int       n_x;
    int       incx_conj;
    int       incax;

    // Return early if possible.
    if ( bli_zero_dim2( m, n ) )
    {
        int n_elem;

        if ( bli_does_trans( transa ) ) n_elem = n;
        else                            n_elem = m;

        bli_zscalv( BLIS_NO_CONJUGATE,
                    n_elem,
                    beta,
                    y, incy );
        return;
    }

    // If necessary, allocate, initialize, and use a temporary contiguous
    // copy of the matrix rather than the original matrix.
    bli_zcreate_contigm( m,
                         n,
                         a_save, a_rs_save, a_cs_save,
                         &a,     &a_rs,     &a_cs );

    // Initialize with values assuming column-major storage.
    lda  = a_cs;
    inca = a_rs;

    // If A is a row-major matrix, then we can use the underlying column-major
    // BLAS implementation by fiddling with the parameters.
    if ( bli_is_row_storage( a_rs, a_cs ) )
    {
        bli_swap_ints( m, n );
        bli_swap_ints( lda, inca );
        bli_toggle_trans( transa );
    }

    // Initialize with values assuming no conjugation of x.
    x_conj    = x;
    incx_conj = incx;

    // We need a temporary vector for the cases when x is conjugated, and
    // also for the cases where A is conjugated.
    if ( bli_is_conj( conjx ) || bli_is_conjnotrans( transa ) )
    {
        if ( bli_does_trans( transa ) ) n_x = m;
        else                            n_x = n;

        x_conj    = bli_zallocv( n_x );
        incx_conj = 1;

        bli_zcopyv( conjx,
                    n_x,
                    x,      incx,
                    x_conj, incx_conj );
    }

    // We want to handle the conjnotrans case, but without explicitly
    // conjugating A. To do so, we leverage the fact that computing the
    // product conj(A) * x is equivalent to computing conj( A * conj(x) ).
    if ( bli_is_conjnotrans( transa ) )
    {
        // We need a temporary vector for the product A * conj(x), which is
        // conformal to y. We know we are not transposing, so y is length m.
        ax    = bli_zallocv( m );
        incax = 1;
        
        // Start by conjugating the contents of the temporary copy of x.
        bli_zconjv( n,
                    x_conj, incx_conj );

        // Compute A * conj(x) where x is the temporary copy of x created above.
        bli_zgemv_blas( BLIS_NO_TRANSPOSE,
                        m,
                        n,
                        &one,
                        a,      lda,
                        x_conj, incx_conj,
                        &zero,
                        ax,     incax );

        // Scale y by beta.
        bli_zscalv( BLIS_NO_CONJUGATE,
                    m,
                    beta,
                    y, incy );

        // And finally, accumulate alpha * conj( A * conj(x) ) into y.
        bli_zaxpyv( BLIS_CONJUGATE,
                    m,
                    alpha,
                    ax, incax,
                    y,  incy);

        // Free the temporary vector for Ax.
        bli_zfree( ax );
    }
    else // notrans, trans, or conjtrans
    {
        bli_zgemv_blas( transa,
                        m,
                        n,
                        alpha,
                        a,      lda,
                        x_conj, incx_conj,
                        beta,
                        y,      incy );
    }

    // Free the temporary conjugated x vector.
    if ( bli_is_conj( conjx ) || bli_is_conjnotrans( transa ) )
        bli_zfree( x_conj );

    // Free the temporary contiguous matrix.
    bli_zfree_contigm( a_save, a_rs_save, a_cs_save,
                       &a,     &a_rs,     &a_cs );
}
void bli_zgemv_blas ( trans_t  transa,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  lda,
dcomplex x,
int  incx,
dcomplex beta,
dcomplex y,
int  incy 
)

References bli_param_map_to_netlib_trans(), cblas_zgemv(), CblasColMajor, and F77_zgemv().

Referenced by bli_zgemv().

{
#ifdef BLIS_ENABLE_CBLAS_INTERFACES
    enum CBLAS_ORDER     cblas_order = CblasColMajor;
    enum CBLAS_TRANSPOSE cblas_transa;

    bli_param_map_to_netlib_trans( transa, &cblas_transa );

    cblas_zgemv( cblas_order,
                 cblas_transa,
                 m,
                 n,
                 alpha,
                 a, lda,
                 x, incx,
                 beta,
                 y, incy );
#else
    char blas_transa;

    bli_param_map_to_netlib_trans( transa, &blas_transa );

    F77_zgemv( &blas_transa,
               &m,
               &n,
               alpha,
               a, &lda,
               x, &incx,
               beta,
               y, &incy );
#endif
}