libflame
revision_anchor
|
Functions | |
FLA_Error | FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1 (FLA_Obj alpha, FLA_Obj tau, FLA_Obj u, FLA_Obj y, FLA_Obj z, FLA_Obj v, FLA_Obj A, FLA_Obj up, FLA_Obj a, FLA_Obj w) |
FLA_Error | FLA_Fused_Gerc2_Ahx_Axpy_Ax_ops_var1 (int m_A, int n_A, float *buff_tau, float *buff_alpha, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z, float *buff_v, int inc_v, float *buff_A, int rs_A, int cs_A, float *buff_up, int inc_up, float *buff_a, int inc_a, float *buff_w, int inc_w) |
FLA_Error | FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1 (int m_A, int n_A, double *buff_tau, double *buff_alpha, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z, double *buff_v, int inc_v, double *buff_A, int rs_A, int cs_A, double *buff_up, int inc_up, double *buff_a, int inc_a, double *buff_w, int inc_w) |
FLA_Error | FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1 (int m_A, int n_A, scomplex *buff_tau, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z, scomplex *buff_v, int inc_v, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_up, int inc_up, scomplex *buff_a, int inc_a, scomplex *buff_w, int inc_w) |
FLA_Error | FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1 (int m_A, int n_A, dcomplex *buff_tau, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z, dcomplex *buff_v, int inc_v, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_up, int inc_up, dcomplex *buff_a, int inc_a, dcomplex *buff_w, int inc_w) |
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1 | ( | int | m_A, |
int | n_A, | ||
scomplex * | buff_tau, | ||
scomplex * | buff_alpha, | ||
scomplex * | buff_u, | ||
int | inc_u, | ||
scomplex * | buff_y, | ||
int | inc_y, | ||
scomplex * | buff_z, | ||
int | inc_z, | ||
scomplex * | buff_v, | ||
int | inc_v, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
scomplex * | buff_up, | ||
int | inc_up, | ||
scomplex * | buff_a, | ||
int | inc_a, | ||
scomplex * | buff_w, | ||
int | inc_w | ||
) |
References bli_cdot(), bli_csetv(), BLIS_CONJUGATE, F77_caxpy(), FLA_MINUS_ONE, and FLA_ZERO.
Referenced by FLA_Bidiag_UT_u_step_ofc_var3(), and FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1().
{ scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO ); scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE ); scomplex minus_inv_tau; scomplex conj_psi1; scomplex conj_nu1; scomplex conj_alpha1; int i; bli_csetv( m_A, buff_0, buff_w, inc_w ); bli_cdiv3( buff_m1, buff_tau, &minus_inv_tau ); for ( i = 0; i < n_A; ++i ) { scomplex* a1 = buff_A + (i )*cs_A + (0 )*rs_A; scomplex* u = buff_u; scomplex* psi1 = buff_y + (i )*inc_y; scomplex* nu1 = buff_v + (i )*inc_v; scomplex* z = buff_z; scomplex* up = buff_up; scomplex* alpha1 = buff_a + (i )*inc_a; scomplex* w = buff_w; scomplex* alpha = buff_alpha; scomplex temp1; scomplex temp2; /*------------------------------------------------------------*/ bli_ccopyconj( psi1, &conj_psi1 ); bli_cmult3( alpha, &conj_psi1, &temp1 ); bli_ccopyconj( nu1, &conj_nu1 ); bli_cmult3( alpha, &conj_nu1, &temp2 ); // bli_caxpyv( BLIS_NO_CONJUGATE, // m_A, // &temp1, // u, inc_u, // a1, rs_A ); F77_caxpy( &m_A, &temp1, u, &inc_u, a1, &rs_A ); // bli_caxpyv( BLIS_NO_CONJUGATE, // m_A, // &temp2, // z, inc_z, // a1, rs_A ); F77_caxpy( &m_A, &temp2, z, &inc_z, a1, &rs_A ); bli_cdot( BLIS_CONJUGATE, m_A, a1, rs_A, up, inc_up, psi1 ); bli_ccopyconj( psi1, &conj_psi1 ); bli_cmult4( &minus_inv_tau, &conj_psi1, alpha1, alpha1 ); bli_ccopyconj( alpha1, &conj_alpha1 ); // bli_caxpyv( BLIS_NO_CONJUGATE, // m_A, // conj_alpha1, // a1, rs_A, // w, inc_w ); F77_caxpy( &m_A, &conj_alpha1, a1, &rs_A, w, &inc_w ); /*------------------------------------------------------------*/ } return FLA_SUCCESS; }
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1 | ( | int | m_A, |
int | n_A, | ||
double * | buff_tau, | ||
double * | buff_alpha, | ||
double * | buff_u, | ||
int | inc_u, | ||
double * | buff_y, | ||
int | inc_y, | ||
double * | buff_z, | ||
int | inc_z, | ||
double * | buff_v, | ||
int | inc_v, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
double * | buff_up, | ||
int | inc_up, | ||
double * | buff_a, | ||
int | inc_a, | ||
double * | buff_w, | ||
int | inc_w | ||
) |
References bli_d0(), bli_daxpyv(), bli_daxpyv2b(), bli_ddot(), bli_ddotsv2(), bli_dm1(), bli_dsetv(), BLIS_CONJUGATE, and BLIS_NO_CONJUGATE.
Referenced by FLA_Bidiag_UT_u_step_ofd_var3(), and FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1().
{ double zero = bli_d0(); double minus_one = bli_dm1(); double* restrict u = buff_u; double* restrict up = buff_up; double* restrict w = buff_w; double* restrict z = buff_z; double* restrict alpha = buff_alpha; double* restrict a1; double* restrict a2; double* restrict psi1; double* restrict psi2; double* restrict alpha1; double* restrict alpha2; double* restrict nu1; double* restrict nu2; double minus_inv_tau; double alpha_conj_psi1; double alpha_conj_psi2; double alpha_conj_nu1; double alpha_conj_nu2; int i; int n_run = n_A / 2; int n_left = n_A % 2; int twocs_A = 2*cs_A; int twoinc_y = 2*inc_y; int twoinc_a = 2*inc_a; int twoinc_v = 2*inc_v; bli_dsetv( m_A, &zero, buff_w, inc_w ); bli_ddiv3( &minus_one, buff_tau, &minus_inv_tau ); a1 = buff_A; a2 = buff_A + cs_A; psi1 = buff_y; psi2 = buff_y + inc_y; alpha1 = buff_a; alpha2 = buff_a + inc_a; nu1 = buff_v; nu2 = buff_v + inc_v; for ( i = 0; i < n_run; ++i ) { /*------------------------------------------------------------*/ bli_dmult3( alpha, psi1, &alpha_conj_psi1 ); bli_dmult3( alpha, psi2, &alpha_conj_psi2 ); bli_dmult3( alpha, nu1, &alpha_conj_nu1 ); bli_dmult3( alpha, nu2, &alpha_conj_nu2 ); /* Effective computation: A = A + alpha * ( u * y' + z * v' ); y = A' * up; a = a - conj(y) / tau; w = A * conj(a); */ bli_daxpyv2b( m_A, &alpha_conj_psi1, &alpha_conj_nu1, u, inc_u, z, inc_z, a1, rs_A ); bli_daxpyv2b( m_A, &alpha_conj_psi2, &alpha_conj_nu2, u, inc_u, z, inc_z, a2, rs_A ); bli_ddotsv2( BLIS_CONJUGATE, m_A, a1, rs_A, a2, rs_A, up, inc_up, &zero, psi1, psi2 ); bli_dmult4( &minus_inv_tau, psi1, alpha1, alpha1 ); bli_dmult4( &minus_inv_tau, psi2, alpha2, alpha2 ); bli_daxpyv2b( m_A, alpha1, alpha2, a1, rs_A, a2, rs_A, w, inc_w ); /*------------------------------------------------------------*/ a1 += twocs_A; a2 += twocs_A; psi1 += twoinc_y; psi2 += twoinc_y; alpha1 += twoinc_a; alpha2 += twoinc_a; nu1 += twoinc_v; nu2 += twoinc_v; } if ( n_left == 1 ) { double rho1; bli_dmult3( alpha, psi1, &alpha_conj_psi1 ); bli_dmult3( alpha, nu1, &alpha_conj_nu1 ); bli_daxpyv2b( m_A, &alpha_conj_psi1, &alpha_conj_nu1, u, inc_u, z, inc_z, a1, rs_A ); bli_ddot( BLIS_CONJUGATE, m_A, a1, rs_A, up, inc_up, &rho1 ); bli_dscals( &zero, psi1 ); bli_dadd3( psi1, &rho1, psi1 ); bli_dmult4( &minus_inv_tau, psi1, alpha1, alpha1 ); bli_daxpyv( BLIS_NO_CONJUGATE, m_A, alpha1, a1, rs_A, w, inc_w ); } return FLA_SUCCESS; }
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_ops_var1 | ( | int | m_A, |
int | n_A, | ||
float * | buff_tau, | ||
float * | buff_alpha, | ||
float * | buff_u, | ||
int | inc_u, | ||
float * | buff_y, | ||
int | inc_y, | ||
float * | buff_z, | ||
int | inc_z, | ||
float * | buff_v, | ||
int | inc_v, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
float * | buff_up, | ||
int | inc_up, | ||
float * | buff_a, | ||
int | inc_a, | ||
float * | buff_w, | ||
int | inc_w | ||
) |
References bli_ssetv(), F77_saxpy(), F77_sdot(), FLA_MINUS_ONE, and FLA_ZERO.
Referenced by FLA_Bidiag_UT_u_step_ofs_var3(), and FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1().
{ float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO ); float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE ); float minus_inv_tau; int i; bli_ssetv( m_A, buff_0, buff_w, inc_w ); minus_inv_tau = *buff_m1 / *buff_tau; for ( i = 0; i < n_A; ++i ) { float* a1 = buff_A + (i )*cs_A + (0 )*rs_A; float* u = buff_u; float* psi1 = buff_y + (i )*inc_y; float* nu1 = buff_v + (i )*inc_v; float* z = buff_z; float* up = buff_up; float* alpha1 = buff_a + (i )*inc_a; float* w = buff_w; float* alpha = buff_alpha; float temp1; float temp2; /*------------------------------------------------------------*/ // bli_smult3( alpha, psi1, &temp1 ); temp1 = *alpha * *psi1; // bli_smult3( alpha, nu1, &temp2 ); temp2 = *alpha * *nu1; // bli_saxpyv( BLIS_NO_CONJUGATE, // m_A, // &temp1, // u, inc_u, // a1, rs_A ); F77_saxpy( &m_A, &temp1, u, &inc_u, a1, &rs_A ); // bli_saxpyv( BLIS_NO_CONJUGATE, // m_A, // &temp2, // z, inc_z, // a1, rs_A ); F77_saxpy( &m_A, &temp2, z, &inc_z, a1, &rs_A ); // bli_sdot( BLIS_CONJUGATE, // m_A, // a1, rs_A, // u, inc_u, // psi1 ); *psi1 = F77_sdot( &m_A, a1, &rs_A, up, &inc_up ); // bli_smult4( &minus_inv_tau, psi1, alpha1, alpha1 ); *alpha1 = *alpha1 + minus_inv_tau * *psi1; // bli_saxpyv( BLIS_NO_CONJUGATE, // m_A, // alpha1, // a1, rs_A, // w, inc_w ); F77_saxpy( &m_A, alpha1, a1, &rs_A, w, &inc_w ); /*------------------------------------------------------------*/ } return FLA_SUCCESS; }
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1 | ( | FLA_Obj | alpha, |
FLA_Obj | tau, | ||
FLA_Obj | u, | ||
FLA_Obj | y, | ||
FLA_Obj | z, | ||
FLA_Obj | v, | ||
FLA_Obj | A, | ||
FLA_Obj | up, | ||
FLA_Obj | a, | ||
FLA_Obj | w | ||
) |
References FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_ops_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_Obj_width().
{ /* Effective computation: A = A + alpha * ( u * y' + z * v' ); y = A' * up; a = a - conj(y) / tau; w = A * conj(a); */ FLA_Datatype datatype; int m_A, n_A; int rs_A, cs_A; int inc_u, inc_y, inc_z, inc_v; int inc_up, inc_a, inc_w; datatype = FLA_Obj_datatype( A ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); inc_u = FLA_Obj_vector_inc( u ); inc_y = FLA_Obj_vector_inc( y ); inc_z = FLA_Obj_vector_inc( z ); inc_v = FLA_Obj_vector_inc( v ); inc_up = FLA_Obj_vector_inc( up ); inc_a = FLA_Obj_vector_inc( a ); inc_w = FLA_Obj_vector_inc( w ); switch ( datatype ) { case FLA_FLOAT: { float* buff_A = FLA_FLOAT_PTR( A ); float* buff_u = FLA_FLOAT_PTR( u ); float* buff_y = FLA_FLOAT_PTR( y ); float* buff_z = FLA_FLOAT_PTR( z ); float* buff_v = FLA_FLOAT_PTR( v ); float* buff_up = FLA_FLOAT_PTR( up ); float* buff_a = FLA_FLOAT_PTR( a ); float* buff_w = FLA_FLOAT_PTR( w ); float* buff_tau = FLA_FLOAT_PTR( tau ); float* buff_alpha = FLA_FLOAT_PTR( alpha ); FLA_Fused_Gerc2_Ahx_Axpy_Ax_ops_var1( m_A, n_A, buff_tau, buff_alpha, buff_u, inc_u, buff_y, inc_y, buff_z, inc_z, buff_v, inc_v, buff_A, rs_A, cs_A, buff_up, inc_up, buff_a, inc_a, buff_w, inc_w ); break; } case FLA_DOUBLE: { double* buff_A = FLA_DOUBLE_PTR( A ); double* buff_u = FLA_DOUBLE_PTR( u ); double* buff_y = FLA_DOUBLE_PTR( y ); double* buff_z = FLA_DOUBLE_PTR( z ); double* buff_v = FLA_DOUBLE_PTR( v ); double* buff_up = FLA_DOUBLE_PTR( up ); double* buff_a = FLA_DOUBLE_PTR( a ); double* buff_w = FLA_DOUBLE_PTR( w ); double* buff_tau = FLA_DOUBLE_PTR( tau ); double* buff_alpha = FLA_DOUBLE_PTR( alpha ); FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1( m_A, n_A, buff_tau, buff_alpha, buff_u, inc_u, buff_y, inc_y, buff_z, inc_z, buff_v, inc_v, buff_A, rs_A, cs_A, buff_up, inc_up, buff_a, inc_a, buff_w, inc_w ); break; } case FLA_COMPLEX: { scomplex* buff_A = FLA_COMPLEX_PTR( A ); scomplex* buff_u = FLA_COMPLEX_PTR( u ); scomplex* buff_y = FLA_COMPLEX_PTR( y ); scomplex* buff_z = FLA_COMPLEX_PTR( z ); scomplex* buff_v = FLA_COMPLEX_PTR( v ); scomplex* buff_up = FLA_COMPLEX_PTR( up ); scomplex* buff_a = FLA_COMPLEX_PTR( a ); scomplex* buff_w = FLA_COMPLEX_PTR( w ); scomplex* buff_tau = FLA_COMPLEX_PTR( tau ); scomplex* buff_alpha = FLA_COMPLEX_PTR( alpha ); FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1( m_A, n_A, buff_tau, buff_alpha, buff_u, inc_u, buff_y, inc_y, buff_z, inc_z, buff_v, inc_v, buff_A, rs_A, cs_A, buff_up, inc_up, buff_a, inc_a, buff_w, inc_w ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A ); dcomplex* buff_u = FLA_DOUBLE_COMPLEX_PTR( u ); dcomplex* buff_y = FLA_DOUBLE_COMPLEX_PTR( y ); dcomplex* buff_z = FLA_DOUBLE_COMPLEX_PTR( z ); dcomplex* buff_v = FLA_DOUBLE_COMPLEX_PTR( v ); dcomplex* buff_up = FLA_DOUBLE_COMPLEX_PTR( up ); dcomplex* buff_a = FLA_DOUBLE_COMPLEX_PTR( a ); dcomplex* buff_w = FLA_DOUBLE_COMPLEX_PTR( w ); dcomplex* buff_tau = FLA_DOUBLE_COMPLEX_PTR( tau ); dcomplex* buff_alpha = FLA_DOUBLE_COMPLEX_PTR( alpha ); FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1( m_A, n_A, buff_tau, buff_alpha, buff_u, inc_u, buff_y, inc_y, buff_z, inc_z, buff_v, inc_v, buff_A, rs_A, cs_A, buff_up, inc_up, buff_a, inc_a, buff_w, inc_w ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1 | ( | int | m_A, |
int | n_A, | ||
dcomplex * | buff_tau, | ||
dcomplex * | buff_alpha, | ||
dcomplex * | buff_u, | ||
int | inc_u, | ||
dcomplex * | buff_y, | ||
int | inc_y, | ||
dcomplex * | buff_z, | ||
int | inc_z, | ||
dcomplex * | buff_v, | ||
int | inc_v, | ||
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
dcomplex * | buff_up, | ||
int | inc_up, | ||
dcomplex * | buff_a, | ||
int | inc_a, | ||
dcomplex * | buff_w, | ||
int | inc_w | ||
) |
References bli_z0(), bli_zaxpyv(), bli_zaxpyv2b(), bli_zdot(), bli_zdotsv2(), bli_zm1(), bli_zsetv(), BLIS_CONJUGATE, and BLIS_NO_CONJUGATE.
Referenced by FLA_Bidiag_UT_u_step_ofz_var3(), and FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1().
{ dcomplex zero = bli_z0(); dcomplex minus_one = bli_zm1(); dcomplex* restrict u = buff_u; dcomplex* restrict up = buff_up; dcomplex* restrict w = buff_w; dcomplex* restrict z = buff_z; dcomplex* restrict alpha = buff_alpha; dcomplex* restrict a1; dcomplex* restrict a2; dcomplex* restrict psi1; dcomplex* restrict psi2; dcomplex* restrict alpha1; dcomplex* restrict alpha2; dcomplex* restrict nu1; dcomplex* restrict nu2; dcomplex minus_inv_tau; dcomplex conj_psi1; dcomplex conj_psi2; dcomplex conj_nu1; dcomplex conj_nu2; dcomplex conj_alpha1; dcomplex conj_alpha2; dcomplex alpha_conj_psi1; dcomplex alpha_conj_psi2; dcomplex alpha_conj_nu1; dcomplex alpha_conj_nu2; int i; int n_run = n_A / 2; int n_left = n_A % 2; int twocs_A = 2*cs_A; int twoinc_y = 2*inc_y; int twoinc_a = 2*inc_a; int twoinc_v = 2*inc_v; bli_zsetv( m_A, &zero, buff_w, inc_w ); bli_zdiv3( &minus_one, buff_tau, &minus_inv_tau ); a1 = buff_A; a2 = buff_A + cs_A; psi1 = buff_y; psi2 = buff_y + inc_y; alpha1 = buff_a; alpha2 = buff_a + inc_a; nu1 = buff_v; nu2 = buff_v + inc_v; for ( i = 0; i < n_run; ++i ) { /*------------------------------------------------------------*/ bli_zcopyconj( psi1, &conj_psi1 ); bli_zcopyconj( psi2, &conj_psi2 ); bli_zmult3( alpha, &conj_psi1, &alpha_conj_psi1 ); bli_zmult3( alpha, &conj_psi2, &alpha_conj_psi2 ); bli_zcopyconj( nu1, &conj_nu1 ); bli_zcopyconj( nu2, &conj_nu2 ); bli_zmult3( alpha, &conj_nu1, &alpha_conj_nu1 ); bli_zmult3( alpha, &conj_nu2, &alpha_conj_nu2 ); bli_zaxpyv2b( m_A, &alpha_conj_psi1, &alpha_conj_nu1, u, inc_u, z, inc_z, a1, rs_A ); bli_zaxpyv2b( m_A, &alpha_conj_psi2, &alpha_conj_nu2, u, inc_u, z, inc_z, a2, rs_A ); bli_zdotsv2( BLIS_CONJUGATE, m_A, a1, rs_A, a2, rs_A, up, inc_up, &zero, psi1, psi2 ); bli_zcopyconj( psi1, &conj_psi1 ); bli_zcopyconj( psi2, &conj_psi2 ); bli_zmult4( &minus_inv_tau, &conj_psi1, alpha1, alpha1 ); bli_zmult4( &minus_inv_tau, &conj_psi2, alpha2, alpha2 ); bli_zcopyconj( alpha1, &conj_alpha1 ); bli_zcopyconj( alpha2, &conj_alpha2 ); bli_zaxpyv2b( m_A, &conj_alpha1, &conj_alpha2, a1, rs_A, a2, rs_A, w, inc_w ); /*------------------------------------------------------------*/ a1 += twocs_A; a2 += twocs_A; psi1 += twoinc_y; psi2 += twoinc_y; alpha1 += twoinc_a; alpha2 += twoinc_a; nu1 += twoinc_v; nu2 += twoinc_v; } if ( n_left == 1 ) { dcomplex rho1; bli_zcopyconj( psi1, &conj_psi1 ); bli_zmult3( alpha, &conj_psi1, &alpha_conj_psi1 ); bli_zcopyconj( nu1, &conj_nu1 ); bli_zmult3( alpha, &conj_nu1, &alpha_conj_nu1 ); bli_zaxpyv2b( m_A, &alpha_conj_psi1, &alpha_conj_nu1, u, inc_u, z, inc_z, a1, rs_A ); bli_zdot( BLIS_CONJUGATE, m_A, a1, rs_A, up, inc_up, &rho1 ); bli_zscals( &zero, psi1 ); bli_zadd3( psi1, &rho1, psi1 ); bli_zcopyconj( psi1, &conj_psi1 ); bli_zmult4( &minus_inv_tau, &conj_psi1, alpha1, alpha1 ); bli_zcopyconj( alpha1, &conj_alpha1 ); bli_zaxpyv( BLIS_NO_CONJUGATE, m_A, &conj_alpha1, a1, rs_A, w, inc_w ); } return FLA_SUCCESS; }