libflame
revision_anchor
|
Functions | |
FLA_Error | FLA_Fused_UZhu_ZUhu_opt_var1 (FLA_Obj delta, FLA_Obj U, FLA_Obj Z, FLA_Obj t, FLA_Obj u, FLA_Obj w) |
FLA_Error | FLA_Fused_UZhu_ZUhu_ops_var1 (int m_U, int n_U, float *buff_delta, float *buff_U, int rs_U, int cs_U, float *buff_Z, int rs_Z, int cs_Z, float *buff_t, int inc_t, float *buff_u, int inc_u, float *buff_w, int inc_w) |
FLA_Error | FLA_Fused_UZhu_ZUhu_opd_var1 (int m_U, int n_U, double *buff_delta, double *buff_U, int rs_U, int cs_U, double *buff_Z, int rs_Z, int cs_Z, double *buff_t, int inc_t, double *buff_u, int inc_u, double *buff_w, int inc_w) |
FLA_Error | FLA_Fused_UZhu_ZUhu_opc_var1 (int m_U, int n_U, scomplex *buff_delta, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_t, int inc_t, scomplex *buff_u, int inc_u, scomplex *buff_w, int inc_w) |
FLA_Error | FLA_Fused_UZhu_ZUhu_opz_var1 (int m_U, int n_U, dcomplex *buff_delta, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_t, int inc_t, dcomplex *buff_u, int inc_u, dcomplex *buff_w, int inc_w) |
FLA_Error FLA_Fused_UZhu_ZUhu_opc_var1 | ( | int | m_U, |
int | n_U, | ||
scomplex * | buff_delta, | ||
scomplex * | buff_U, | ||
int | rs_U, | ||
int | cs_U, | ||
scomplex * | buff_Z, | ||
int | rs_Z, | ||
int | cs_Z, | ||
scomplex * | buff_t, | ||
int | inc_t, | ||
scomplex * | buff_u, | ||
int | inc_u, | ||
scomplex * | buff_w, | ||
int | inc_w | ||
) |
References bli_cdot(), BLIS_CONJUGATE, and F77_caxpy().
Referenced by FLA_Fused_UZhu_ZUhu_opt_var1(), and FLA_Tridiag_UT_l_step_ofc_var3().
{ int i; for ( i = 0; i < n_U; ++i ) { scomplex* u1 = buff_U + (i )*cs_U + (0 )*rs_U; scomplex* z1 = buff_Z + (i )*cs_Z + (0 )*rs_Z; scomplex* delta = buff_delta; scomplex* tau1 = buff_t + (i )*inc_t; scomplex* u = buff_u; scomplex* w = buff_w; scomplex alpha; scomplex beta; /*------------------------------------------------------------*/ bli_cdot( BLIS_CONJUGATE, m_U, z1, rs_Z, u, inc_u, &alpha ); bli_cdot( BLIS_CONJUGATE, m_U, u1, rs_U, u, inc_u, &beta ); *tau1 = beta; bli_cscals( delta, &alpha ); bli_cscals( delta, &beta ); // bli_caxpyv( BLIS_NO_CONJUGATE, // m_U, // &alpha, // u1, rs_U, // w, inc_w ); F77_caxpy( &m_U, &alpha, u1, &rs_U, w, &inc_w ); // bli_caxpyv( BLIS_NO_CONJUGATE, // m_U, // &beta, // z1, rs_U, // w, inc_w ); F77_caxpy( &m_U, &beta, z1, &rs_Z, w, &inc_w ); /*------------------------------------------------------------*/ } return FLA_SUCCESS; }
FLA_Error FLA_Fused_UZhu_ZUhu_opd_var1 | ( | int | m_U, |
int | n_U, | ||
double * | buff_delta, | ||
double * | buff_U, | ||
int | rs_U, | ||
int | cs_U, | ||
double * | buff_Z, | ||
int | rs_Z, | ||
int | cs_Z, | ||
double * | buff_t, | ||
int | inc_t, | ||
double * | buff_u, | ||
int | inc_u, | ||
double * | buff_w, | ||
int | inc_w | ||
) |
References bli_d0(), bli_daxpyv(), bli_daxpyv2b(), bli_ddot(), bli_ddotaxpy(), bli_ddotsv2(), bli_ddotv2axpyv2b(), BLIS_CONJUGATE, and BLIS_NO_CONJUGATE.
Referenced by FLA_Fused_UZhu_ZUhu_opt_var1(), and FLA_Tridiag_UT_l_step_ofd_var3().
{ double zero = bli_d0(); int n_run = n_U / 2; int n_left = n_U % 2; int step_u = 2*cs_U; int step_z = 2*cs_Z; int step_tau = 2*inc_t; int i; double* u = buff_u; double* w = buff_w; //double* delta = buff_delta; double* u1; double* u2; double* u3; double* z1; double* z2; double* z3; double* tau1; double* tau2; double* tau3; u1 = buff_U; u2 = buff_U + cs_U; u3 = buff_U + 2*cs_U; z1 = buff_Z; z2 = buff_Z + cs_Z; z3 = buff_Z + 2*cs_Z; tau1 = buff_t; tau2 = buff_t + inc_t; tau3 = buff_t + 2*inc_t; for ( i = 0; i < n_run; ++i ) { double rho_z1u; double rho_z2u; //double rho_z3u; double rho_u1u; double rho_u2u; //double rho_u3u; /*------------------------------------------------------------*/ /* bli_ddotsv3( BLIS_CONJUGATE, m_U, z1, rs_Z, z2, rs_Z, z3, rs_Z, u, inc_u, &zero, &rho_z1u, &rho_z2u, &rho_z3u ); bli_dneg1( &rho_z1u ); bli_dneg1( &rho_z2u ); bli_dneg1( &rho_z3u ); bli_ddotv2axpyv2b( m_U, u1, rs_U, u2, rs_U, u, inc_u, &rho_z1u, &rho_z2u, &rho_u1u, &rho_u2u, w, inc_w ); bli_ddotaxpy( m_U, u3, rs_U, u, inc_u, &rho_z3u, &rho_u3u, w, inc_w ); *tau1 = rho_u1u; *tau2 = rho_u2u; *tau3 = rho_u3u; bli_dneg1( &rho_u1u ); bli_dneg1( &rho_u2u ); bli_dneg1( &rho_u3u ); bli_daxpyv3b( m_U, &rho_u1u, &rho_u2u, &rho_u3u, z1, rs_Z, z2, rs_Z, z3, rs_Z, w, inc_w ); */ bli_ddotsv2( BLIS_CONJUGATE, m_U, z1, rs_Z, z2, rs_Z, u, inc_u, &zero, &rho_z1u, &rho_z2u ); bli_dneg1( &rho_z1u ); bli_dneg1( &rho_z2u ); bli_ddotv2axpyv2b( m_U, u1, rs_U, u2, rs_U, u, inc_u, &rho_z1u, &rho_z2u, &rho_u1u, &rho_u2u, w, inc_w ); *tau1 = rho_u1u; *tau2 = rho_u2u; bli_dneg1( &rho_u1u ); bli_dneg1( &rho_u2u ); bli_daxpyv2b( m_U, &rho_u1u, &rho_u2u, z1, rs_Z, z2, rs_Z, w, inc_w ); /*------------------------------------------------------------*/ u1 += step_u; u2 += step_u; u3 += step_u; z1 += step_z; z2 += step_z; z3 += step_z; tau1 += step_tau; tau2 += step_tau; tau3 += step_tau; } if ( n_left > 0 ) { for ( i = 0; i < n_left; ++i ) { double rho_z1u; double rho_u1u; bli_ddot( BLIS_CONJUGATE, m_U, z1, rs_Z, u, inc_u, &rho_z1u ); bli_dneg1( &rho_z1u ); bli_ddotaxpy( m_U, u1, rs_U, u, inc_u, &rho_z1u, &rho_u1u, w, inc_w ); *tau1 = rho_u1u; bli_dneg1( &rho_u1u ); bli_daxpyv( BLIS_NO_CONJUGATE, m_U, &rho_u1u, z1, rs_Z, w, inc_w ); u1 += cs_U; z1 += cs_Z; tau1 += inc_t; } } return FLA_SUCCESS; }
FLA_Error FLA_Fused_UZhu_ZUhu_ops_var1 | ( | int | m_U, |
int | n_U, | ||
float * | buff_delta, | ||
float * | buff_U, | ||
int | rs_U, | ||
int | cs_U, | ||
float * | buff_Z, | ||
int | rs_Z, | ||
int | cs_Z, | ||
float * | buff_t, | ||
int | inc_t, | ||
float * | buff_u, | ||
int | inc_u, | ||
float * | buff_w, | ||
int | inc_w | ||
) |
References F77_saxpy(), and F77_sdot().
Referenced by FLA_Fused_UZhu_ZUhu_opt_var1(), and FLA_Tridiag_UT_l_step_ofs_var3().
{ int i; for ( i = 0; i < n_U; ++i ) { float* u1 = buff_U + (i )*cs_U + (0 )*rs_U; float* z1 = buff_Z + (i )*cs_Z + (0 )*rs_Z; float* delta = buff_delta; float* tau1 = buff_t + (i )*inc_t; float* u = buff_u; float* w = buff_w; float alpha; float beta; /*------------------------------------------------------------*/ // bli_sdot( BLIS_CONJUGATE, // m_U, // z1, rs_Z, // u, inc_u, // &alpha ); alpha = F77_sdot( &m_U, z1, &rs_Z, u, &inc_u ); // bli_sdot( BLIS_CONJUGATE, // m_U, // u1, rs_U, // u, inc_u, // &beta ); beta = F77_sdot( &m_U, u1, &rs_U, u, &inc_u ); *tau1 = beta; // bli_sscals( delta, &alpha ); // bli_sscals( delta, &beta ); alpha *= *delta; beta *= *delta; // bli_saxpyv( BLIS_NO_CONJUGATE, // m_U, // &alpha, // u1, rs_U, // w, inc_w ); F77_saxpy( &m_U, &alpha, u1, &rs_U, w, &inc_w ); // bli_saxpyv( BLIS_NO_CONJUGATE, // m_U, // &beta, // z1, rs_U, // w, inc_w ); F77_saxpy( &m_U, &beta, z1, &rs_Z, w, &inc_w ); /*------------------------------------------------------------*/ } return FLA_SUCCESS; }
FLA_Error FLA_Fused_UZhu_ZUhu_opt_var1 | ( | FLA_Obj | delta, |
FLA_Obj | U, | ||
FLA_Obj | Z, | ||
FLA_Obj | t, | ||
FLA_Obj | u, | ||
FLA_Obj | w | ||
) |
References FLA_Fused_UZhu_ZUhu_opc_var1(), FLA_Fused_UZhu_ZUhu_opd_var1(), FLA_Fused_UZhu_ZUhu_ops_var1(), FLA_Fused_UZhu_ZUhu_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_Obj_width().
{ /* Effective computation: w = w + delta * ( U ( Z' u ) + Z ( U' u ) ); t = U' u; */ FLA_Datatype datatype; int m_U, n_U; int rs_U, cs_U; int rs_Z, cs_Z; int inc_u, inc_w, inc_t; datatype = FLA_Obj_datatype( U ); m_U = FLA_Obj_length( U ); n_U = FLA_Obj_width( U ); rs_U = FLA_Obj_row_stride( U ); cs_U = FLA_Obj_col_stride( U ); rs_Z = FLA_Obj_row_stride( Z ); cs_Z = FLA_Obj_col_stride( Z ); inc_u = FLA_Obj_vector_inc( u ); inc_w = FLA_Obj_vector_inc( w ); inc_t = FLA_Obj_vector_inc( t ); switch ( datatype ) { case FLA_FLOAT: { float* buff_U = FLA_FLOAT_PTR( U ); float* buff_Z = FLA_FLOAT_PTR( Z ); float* buff_t = FLA_FLOAT_PTR( t ); float* buff_u = FLA_FLOAT_PTR( u ); float* buff_w = FLA_FLOAT_PTR( w ); float* buff_delta = FLA_FLOAT_PTR( delta ); FLA_Fused_UZhu_ZUhu_ops_var1( m_U, n_U, buff_delta, buff_U, rs_U, cs_U, buff_Z, rs_Z, cs_Z, buff_t, inc_t, buff_u, inc_u, buff_w, inc_w ); break; } case FLA_DOUBLE: { double* buff_U = FLA_DOUBLE_PTR( U ); double* buff_Z = FLA_DOUBLE_PTR( Z ); double* buff_t = FLA_DOUBLE_PTR( t ); double* buff_u = FLA_DOUBLE_PTR( u ); double* buff_w = FLA_DOUBLE_PTR( w ); double* buff_delta = FLA_DOUBLE_PTR( delta ); FLA_Fused_UZhu_ZUhu_opd_var1( m_U, n_U, buff_delta, buff_U, rs_U, cs_U, buff_Z, rs_Z, cs_Z, buff_t, inc_t, buff_u, inc_u, buff_w, inc_w ); break; } case FLA_COMPLEX: { scomplex* buff_U = FLA_COMPLEX_PTR( U ); scomplex* buff_Z = FLA_COMPLEX_PTR( Z ); scomplex* buff_t = FLA_COMPLEX_PTR( t ); scomplex* buff_u = FLA_COMPLEX_PTR( u ); scomplex* buff_w = FLA_COMPLEX_PTR( w ); scomplex* buff_delta = FLA_COMPLEX_PTR( delta ); FLA_Fused_UZhu_ZUhu_opc_var1( m_U, n_U, buff_delta, buff_U, rs_U, cs_U, buff_Z, rs_Z, cs_Z, buff_u, inc_u, buff_t, inc_t, buff_w, inc_w ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_U = FLA_DOUBLE_COMPLEX_PTR( U ); dcomplex* buff_Z = FLA_DOUBLE_COMPLEX_PTR( Z ); dcomplex* buff_t = FLA_DOUBLE_COMPLEX_PTR( t ); dcomplex* buff_u = FLA_DOUBLE_COMPLEX_PTR( u ); dcomplex* buff_w = FLA_DOUBLE_COMPLEX_PTR( w ); dcomplex* buff_delta = FLA_DOUBLE_COMPLEX_PTR( delta ); FLA_Fused_UZhu_ZUhu_opz_var1( m_U, n_U, buff_delta, buff_U, rs_U, cs_U, buff_Z, rs_Z, cs_Z, buff_t, inc_t, buff_u, inc_u, buff_w, inc_w ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Fused_UZhu_ZUhu_opz_var1 | ( | int | m_U, |
int | n_U, | ||
dcomplex * | buff_delta, | ||
dcomplex * | buff_U, | ||
int | rs_U, | ||
int | cs_U, | ||
dcomplex * | buff_Z, | ||
int | rs_Z, | ||
int | cs_Z, | ||
dcomplex * | buff_t, | ||
int | inc_t, | ||
dcomplex * | buff_u, | ||
int | inc_u, | ||
dcomplex * | buff_w, | ||
int | inc_w | ||
) |
References bli_zaxpyv(), bli_zdot(), bli_zdotaxpy(), BLIS_CONJUGATE, and BLIS_NO_CONJUGATE.
Referenced by FLA_Fused_UZhu_ZUhu_opt_var1(), and FLA_Tridiag_UT_l_step_ofz_var3().
{ //dcomplex zero = bli_z0(); int n_run = n_U / 1; int n_left = n_U % 1; int step_u = 1*cs_U; int step_z = 1*cs_Z; int step_tau = 1*inc_t; int i; dcomplex* u = buff_u; dcomplex* w = buff_w; //dcomplex* delta = buff_delta; dcomplex* u1; dcomplex* u2; dcomplex* z1; dcomplex* z2; dcomplex* tau1; dcomplex* tau2; u1 = buff_U; u2 = buff_U + cs_U; z1 = buff_Z; z2 = buff_Z + cs_Z; tau1 = buff_t; tau2 = buff_t + inc_t; for ( i = 0; i < n_run; ++i ) { dcomplex rho_z1u; //dcomplex rho_z2u; dcomplex rho_u1u; //dcomplex rho_u2u; /*------------------------------------------------------------*/ /* Effective computation: w = w + delta * ( U ( Z' u ) + Z ( U' u ) ); */ /* bli_zdotsv2( BLIS_CONJUGATE, m_U, z1, rs_Z, u1, rs_U, u, inc_u, &zero, &rho_z1u, &rho_u1u ); *tau1 = rho_u1u; //bli_zscals( delta, &rho_z1u ); //bli_zscals( delta, &rho_u1u ); bli_zneg1( &rho_z1u ); bli_zneg1( &rho_u1u ); bli_zaxpyv2b( m_U, &rho_z1u, &rho_u1u, u1, rs_U, z1, rs_Z, w, inc_w ); */ /* bli_zdotsv2( BLIS_CONJUGATE, m_U, z1, rs_Z, z2, rs_Z, u, inc_u, &zero, &rho_z1u, &rho_z2u ); bli_zneg1( &rho_z1u ); bli_zneg1( &rho_z2u ); bli_zdotv2axpyv2b( m_U, u1, rs_U, u2, rs_U, u, inc_u, &rho_z1u, &rho_z2u, &rho_u1u, &rho_u2u, w, inc_w ); *tau1 = rho_u1u; *tau2 = rho_u2u; bli_zneg1( &rho_u1u ); bli_zneg1( &rho_u2u ); bli_zaxpyv2b( m_U, &rho_u1u, &rho_u2u, z1, rs_Z, z2, rs_Z, w, inc_w ); */ bli_zdot( BLIS_CONJUGATE, m_U, z1, rs_Z, u, inc_u, &rho_z1u ); bli_zneg1( &rho_z1u ); bli_zdotaxpy( m_U, u1, rs_U, u, inc_u, &rho_z1u, &rho_u1u, w, inc_w ); *tau1 = rho_u1u; bli_zneg1( &rho_u1u ); bli_zaxpyv( BLIS_NO_CONJUGATE, m_U, &rho_u1u, z1, rs_Z, w, inc_w ); /*------------------------------------------------------------*/ u1 += step_u; u2 += step_u; z1 += step_z; z2 += step_z; tau1 += step_tau; tau2 += step_tau; } if ( n_left == 1 ) { dcomplex rho_z1u; dcomplex rho_u1u; bli_zdot( BLIS_CONJUGATE, m_U, z1, rs_Z, u, inc_u, &rho_z1u ); bli_zneg1( &rho_z1u ); bli_zdotaxpy( m_U, u1, rs_U, u, inc_u, &rho_z1u, &rho_u1u, w, inc_w ); *tau1 = rho_u1u; bli_zneg1( &rho_u1u ); bli_zaxpyv( BLIS_NO_CONJUGATE, m_U, &rho_u1u, z1, rs_Z, w, inc_w ); } return FLA_SUCCESS; }