libflame  revision_anchor
Functions
FLA_Bidiag_UT_u_fus_var3.c File Reference

(r)

Functions

FLA_Error FLA_Bidiag_UT_u_ofu_var3 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_step_ofu_var3 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofs_var3 (int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofd_var3 (int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofc_var3 (int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofz_var3 (int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
 

Function Documentation

◆ FLA_Bidiag_UT_u_ofu_var3()

FLA_Error FLA_Bidiag_UT_u_ofu_var3 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)
14 {
15  return FLA_Bidiag_UT_u_step_ofu_var3( A, TU, TV );
16 }
FLA_Error FLA_Bidiag_UT_u_step_ofu_var3(FLA_Obj A, FLA_Obj T, FLA_Obj S)
Definition: FLA_Bidiag_UT_u_fus_var3.c:18

References FLA_Bidiag_UT_u_step_ofu_var3().

◆ FLA_Bidiag_UT_u_step_ofc_var3()

FLA_Error FLA_Bidiag_UT_u_step_ofc_var3 ( int  m_A,
int  n_A,
int  m_TS,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T,
scomplex buff_S,
int  rs_S,
int  cs_S 
)
927 {
928  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
929  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
930  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
931 
932  scomplex alpha12;
933  scomplex minus_conj_alpha12;
934  scomplex psi11_minus_alpha12;
935  scomplex minus_inv_tau11;
936  scomplex minus_upsilon11;
937  scomplex minus_conj_nu11;
938  scomplex minus_conj_psi11;
939  scomplex minus_zeta11;
940  scomplex beta;
941  int i;
942 
943  // b_alg = FLA_Obj_length( T );
944  int b_alg = m_TS;
945 
946  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
947  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
948  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
949  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
950  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
951  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
952  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
953  scomplex* buff_w = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
954  scomplex* buff_ap = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
955  scomplex* buff_u = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
956  scomplex* buff_up = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
957  scomplex* buff_v = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
958  scomplex* buff_y = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
959  scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
960  int inc_w = 1;
961  int inc_ap = 1;
962  int inc_u = 1;
963  int inc_up = 1;
964  int inc_v = 1;
965  int inc_y = 1;
966  int inc_z = 1;
967 
968  for ( i = 0; i < b_alg; ++i )
969  {
970  scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
971  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
972  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
973  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
974  scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
975  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
976  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
977 
978  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
979  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
980 
981  scomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
982  scomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
983 
984  scomplex* w21 = buff_w + (i+1)*inc_w;
985 
986  scomplex* a12p = buff_ap + (i+1)*inc_ap;
987 
988  scomplex* upsilon11 = buff_u + (i )*inc_u;
989  scomplex* u21 = buff_u + (i+1)*inc_u;
990 
991  scomplex* u21p = buff_up + (i+1)*inc_up;
992 
993  scomplex* nu11 = buff_v + (i )*inc_v;
994  scomplex* v21 = buff_v + (i+1)*inc_v;
995 
996  scomplex* psi11 = buff_y + (i )*inc_y;
997  scomplex* y21 = buff_y + (i+1)*inc_y;
998 
999  scomplex* zeta11 = buff_z + (i )*inc_z;
1000  scomplex* z21 = buff_z + (i+1)*inc_z;
1001 
1002  scomplex* a12p_t = a12p + (0 )*inc_ap;
1003  scomplex* a12p_b = a12p + (1 )*inc_ap;
1004 
1005  scomplex* v21_t = v21 + (0 )*inc_v;
1006  scomplex* v21_b = v21 + (1 )*inc_v;
1007 
1008  scomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
1009  scomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
1010 
1011  scomplex* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
1012 
1013  int m_ahead = m_A - i - 1;
1014  int n_ahead = n_A - i - 1;
1015  int m_behind = i;
1016  int n_behind = i;
1017 
1018  /*------------------------------------------------------------*/
1019 
1020  if ( m_behind > 0 )
1021  {
1022  // FLA_Copy( upsilon11, minus_upsilon11 );
1023  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
1024  bl1_cmult3( buff_m1, upsilon11, &minus_upsilon11 );
1025 
1026  // FLA_Copy( zeta11, minus_zeta11 );
1027  // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
1028  bl1_cmult3( buff_m1, zeta11, &minus_zeta11 );
1029 
1030  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, psi11, minus_conj_psi11 );
1031  // FLA_Scal( FLA_MINUS_ONE, minus_conj_psi11 );
1032  bl1_ccopyconj( psi11, &minus_conj_psi11 );
1033  bl1_cscals( buff_m1, &minus_conj_psi11 );
1034 
1035  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, nu11, minus_conj_nu11 );
1036  // FLA_Scal( FLA_MINUS_ONE, minus_conj_nu11 );
1037  bl1_ccopyconj( nu11, &minus_conj_nu11 );
1038  bl1_cscals( buff_m1, &minus_conj_nu11 );
1039 
1040  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, upsilon11, alpha11 );
1041  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, zeta11, alpha11 );
1043  1,
1044  &minus_conj_psi11,
1045  upsilon11, 1,
1046  alpha11, 1 );
1048  1,
1049  &minus_conj_nu11,
1050  zeta11, 1,
1051  alpha11, 1 );
1052 
1053  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, u21, a21 );
1054  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, z21, a21 );
1056  m_ahead,
1057  &minus_conj_psi11,
1058  u21, inc_u,
1059  a21, rs_A );
1061  m_ahead,
1062  &minus_conj_nu11,
1063  z21, inc_z,
1064  a21, rs_A );
1065 
1066  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon11, y21, a12t );
1067  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta11, v21, a12t );
1069  n_ahead,
1070  &minus_upsilon11,
1071  y21, inc_y,
1072  a12t, cs_A );
1074  n_ahead,
1075  &minus_zeta11,
1076  v21, inc_v,
1077  a12t, cs_A );
1078  }
1079 
1080  // FLA_Househ2_UT( FLA_LEFT,
1081  // alpha11,
1082  // a21, tau11 );
1083  // FLA_Copy( a21, u21p );
1084  FLA_Househ2_UT_l_opc( m_ahead,
1085  alpha11,
1086  a21, rs_A,
1087  tau11 );
1089  m_ahead,
1090  a21, rs_A,
1091  u21p, inc_up );
1092 
1093  if ( n_ahead > 0 )
1094  {
1095  // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
1096  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
1097  bl1_cdiv3( buff_m1, tau11, &minus_inv_tau11 );
1098 
1099  // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
1100  // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
1102  n_ahead,
1103  a12t, cs_A,
1104  a12p, inc_ap );
1106  n_ahead,
1107  &minus_inv_tau11,
1108  a12t, cs_A,
1109  a12p, inc_ap );
1110  }
1111 
1112  if ( m_behind > 0 && n_ahead > 0 )
1113  {
1114  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
1115  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
1116  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
1117  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
1118  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
1120  n_ahead,
1121  tau11,
1122  buff_m1,
1123  u21, inc_u,
1124  y21, inc_y,
1125  z21, inc_z,
1126  v21, inc_v,
1127  A22, rs_A, cs_A,
1128  u21p, inc_up,
1129  a12p, inc_ap,
1130  w21, inc_w );
1131 
1132 
1133  }
1134  else if ( n_ahead > 0 )
1135  {
1136  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
1137  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
1138  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
1140  n_ahead,
1141  tau11,
1142  buff_0,
1143  A22, rs_A, cs_A,
1144  u21p, inc_up,
1145  a12p, inc_ap,
1146  y21, inc_y,
1147  w21, inc_w );
1148  }
1149 
1150  if ( n_ahead > 0 )
1151  {
1152  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
1154  n_ahead,
1155  buff_1,
1156  a12t, cs_A,
1157  y21, inc_y );
1158 
1159  // FLA_Househ2s_UT( FLA_RIGHT,
1160  // a12p_t,
1161  // a12p_b,
1162  // alpha12, psi11_minus_alpha12, sigma11 );
1163  FLA_Househ2s_UT_r_opc( n_ahead - 1,
1164  a12p_t,
1165  a12p_b, inc_ap,
1166  &alpha12,
1167  &psi11_minus_alpha12,
1168  sigma11 );
1169 
1170  // FLA_Copy( a12p, v21 );
1171  // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
1172  // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
1173  // FLA_Conjugate( v21_b );
1175  n_ahead,
1176  a12p, inc_ap,
1177  v21, inc_v );
1178  bl1_cmult4( buff_m1, &alpha12, v21_t, v21_t );
1180  n_ahead,
1181  &psi11_minus_alpha12,
1182  v21, inc_v );
1183  bl1_cconjv( n_ahead - 1,
1184  v21_b, inc_v );
1185 
1186  // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
1187  // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
1188  *a12t_l = alpha12;
1190  n_ahead - 1,
1191  v21_b, inc_v,
1192  a12t_r, cs_A );
1193  }
1194 
1195  // FLA_Copy( u21p, u21 );
1197  m_ahead,
1198  u21p, inc_up,
1199  u21, inc_u );
1200 
1201  if ( n_ahead > 0 )
1202  {
1203  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1204  // FLA_Scal( FLA_MINUS_ONE, beta );
1205  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
1207  n_ahead,
1208  y21, inc_y,
1209  v21, inc_v,
1210  &beta );
1211  bl1_cscals( &minus_inv_tau11, &beta );
1212 
1213  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
1214  // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
1215  bl1_ccopyconj( &alpha12, &minus_conj_alpha12 );
1216  bl1_cneg1( &minus_conj_alpha12 );
1217 
1218  // FLA_Copy( w21, z21 );
1219  // FLA_Axpy( minus_conj_alpha12, A22_l, z21 );
1220  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
1221  // FLA_Axpy( beta, u21, z21 );
1223  m_ahead,
1224  w21, inc_w,
1225  z21, inc_z );
1227  m_ahead,
1228  &minus_conj_alpha12,
1229  A22_l, rs_A,
1230  z21, inc_z );
1232  m_ahead,
1233  &psi11_minus_alpha12,
1234  z21, inc_z );
1236  m_ahead,
1237  &beta,
1238  u21, inc_u,
1239  z21, inc_z );
1240 
1241  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1242  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1244  n_ahead,
1245  tau11,
1246  y21, inc_y );
1248  m_ahead,
1249  sigma11,
1250  z21, inc_z );
1251 
1252  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
1255  m_behind,
1256  n_ahead,
1257  buff_1,
1258  A02, rs_A, cs_A,
1259  v21, inc_v,
1260  buff_0,
1261  s01, rs_S );
1262  }
1263 
1264  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1265  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1267  n_behind,
1268  a10t, cs_A,
1269  t01, rs_T );
1272  m_ahead,
1273  n_behind,
1274  buff_1,
1275  A20, rs_A, cs_A,
1276  u21, inc_u,
1277  buff_1,
1278  t01, rs_T );
1279 
1280  if ( m_behind + 1 == b_alg && n_ahead > 0 )
1281  {
1282  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
1283  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
1286  m_ahead,
1287  n_ahead,
1288  buff_m1,
1289  u21, inc_u,
1290  y21, inc_y,
1291  A22, rs_A, cs_A );
1294  m_ahead,
1295  n_ahead,
1296  buff_m1,
1297  z21, inc_z,
1298  v21, inc_v,
1299  A22, rs_A, cs_A );
1300  }
1301 
1302  /*------------------------------------------------------------*/
1303 
1304  }
1305 
1306  // FLA_Obj_free( &w );
1307  // FLA_Obj_free( &ap );
1308  // FLA_Obj_free( &u );
1309  // FLA_Obj_free( &up );
1310  // FLA_Obj_free( &v );
1311  // FLA_Obj_free( &y );
1312  // FLA_Obj_free( &z );
1313  FLA_free( buff_w );
1314  FLA_free( buff_ap );
1315  FLA_free( buff_u );
1316  FLA_free( buff_up );
1317  FLA_free( buff_v );
1318  FLA_free( buff_y );
1319  FLA_free( buff_z );
1320 
1321  return FLA_SUCCESS;
1322 }
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1(int m_A, int n_A, scomplex *buff_tau, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z, scomplex *buff_v, int inc_v, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_up, int inc_up, scomplex *buff_a, int inc_a, scomplex *buff_w, int inc_w)
Definition: FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1.c:424
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opc_var1(int m_A, int n_A, scomplex *buff_tau, scomplex *buff_beta, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_u, int inc_u, scomplex *buff_a, int inc_a, scomplex *buff_y, int inc_y, scomplex *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:322
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
FLA_Error FLA_Househ2s_UT_r_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *alpha, scomplex *chi_1_minus_alpha, scomplex *tau)
Definition: FLA_Househ2s_UT.c:589
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition: FLA_Househ2_UT.c:390
int i
Definition: bl1_axmyv2.c:145
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpyv.c:29
void bl1_cconjv(int m, scomplex *x, int incx)
Definition: bl1_conjv.c:23
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_copyv.c:49
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition: bl1_dot.c:39
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_gemv.c:125
void bl1_cger(conj1_t conjx, conj1_t conjy, int m, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *a, int a_rs, int a_cs)
Definition: bl1_ger.c:111
void bl1_cinvscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition: bl1_invscalv.c:52
@ BLIS1_CONJ_TRANSPOSE
Definition: blis_type_defs.h:57
@ BLIS1_CONJ_NO_TRANSPOSE
Definition: blis_type_defs.h:56
@ BLIS1_CONJUGATE
Definition: blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition: blis_type_defs.h:81
Definition: blis_type_defs.h:133

References bl1_caxpyv(), bl1_cconjv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cger(), bl1_cinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_opc_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_Househ2s_UT_r_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var3().

◆ FLA_Bidiag_UT_u_step_ofd_var3()

FLA_Error FLA_Bidiag_UT_u_step_ofd_var3 ( int  m_A,
int  n_A,
int  m_TS,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_T,
int  rs_T,
int  cs_T,
double *  buff_S,
int  rs_S,
int  cs_S 
)
522 {
523  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
524  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
525  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
526 
527  double alpha12;
528  double minus_conj_alpha12;
529  double psi11_minus_alpha12;
530  double minus_inv_tau11;
531  double minus_upsilon11;
532  double minus_conj_nu11;
533  double minus_conj_psi11;
534  double minus_zeta11;
535  double beta;
536  int i;
537 
538  // b_alg = FLA_Obj_length( T );
539  int b_alg = m_TS;
540 
541  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
542  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
543  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
544  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
545  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
546  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
547  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
548  double* buff_w = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
549  double* buff_ap = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
550  double* buff_u = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
551  double* buff_up = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
552  double* buff_v = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
553  double* buff_y = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
554  double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
555  int inc_w = 1;
556  int inc_ap = 1;
557  int inc_u = 1;
558  int inc_up = 1;
559  int inc_v = 1;
560  int inc_y = 1;
561  int inc_z = 1;
562 
563  for ( i = 0; i < b_alg; ++i )
564  {
565  double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
566  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
567  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
568  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
569  double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
570  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
571  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
572 
573  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
574  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
575 
576  double* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
577  double* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
578 
579  double* w21 = buff_w + (i+1)*inc_w;
580 
581  double* a12p = buff_ap + (i+1)*inc_ap;
582 
583  double* upsilon11 = buff_u + (i )*inc_u;
584  double* u21 = buff_u + (i+1)*inc_u;
585 
586  double* u21p = buff_up + (i+1)*inc_up;
587 
588  double* nu11 = buff_v + (i )*inc_v;
589  double* v21 = buff_v + (i+1)*inc_v;
590 
591  double* psi11 = buff_y + (i )*inc_y;
592  double* y21 = buff_y + (i+1)*inc_y;
593 
594  double* zeta11 = buff_z + (i )*inc_z;
595  double* z21 = buff_z + (i+1)*inc_z;
596 
597  double* a12p_t = a12p + (0 )*inc_ap;
598  double* a12p_b = a12p + (1 )*inc_ap;
599 
600  double* v21_t = v21 + (0 )*inc_v;
601  double* v21_b = v21 + (1 )*inc_v;
602 
603  double* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
604  double* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
605 
606  double* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
607 
608  int m_ahead = m_A - i - 1;
609  int n_ahead = n_A - i - 1;
610  int m_behind = i;
611  int n_behind = i;
612 
613  /*------------------------------------------------------------*/
614 
615  if ( m_behind > 0 )
616  {
617  // FLA_Copy( upsilon11, minus_upsilon11 );
618  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
619  bl1_dmult3( buff_m1, upsilon11, &minus_upsilon11 );
620 
621  // FLA_Copy( zeta11, minus_zeta11 );
622  // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
623  bl1_dmult3( buff_m1, zeta11, &minus_zeta11 );
624 
625  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, psi11, minus_conj_psi11 );
626  // FLA_Scal( FLA_MINUS_ONE, minus_conj_psi11 );
627  bl1_dcopyconj( psi11, &minus_conj_psi11 );
628  bl1_dscals( buff_m1, &minus_conj_psi11 );
629 
630  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, nu11, minus_conj_nu11 );
631  // FLA_Scal( FLA_MINUS_ONE, minus_conj_nu11 );
632  bl1_dcopyconj( nu11, &minus_conj_nu11 );
633  bl1_dscals( buff_m1, &minus_conj_nu11 );
634 
635  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, upsilon11, alpha11 );
636  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, zeta11, alpha11 );
638  1,
639  &minus_conj_psi11,
640  upsilon11, 1,
641  alpha11, 1 );
643  1,
644  &minus_conj_nu11,
645  zeta11, 1,
646  alpha11, 1 );
647 
648  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, u21, a21 );
649  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, z21, a21 );
651  m_ahead,
652  &minus_conj_psi11,
653  u21, inc_u,
654  a21, rs_A );
656  m_ahead,
657  &minus_conj_nu11,
658  z21, inc_z,
659  a21, rs_A );
660 
661  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon11, y21, a12t );
662  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta11, v21, a12t );
664  n_ahead,
665  &minus_upsilon11,
666  y21, inc_y,
667  a12t, cs_A );
669  n_ahead,
670  &minus_zeta11,
671  v21, inc_v,
672  a12t, cs_A );
673  }
674 
675  // FLA_Househ2_UT( FLA_LEFT,
676  // alpha11,
677  // a21, tau11 );
678  // FLA_Copy( a21, u21p );
679  FLA_Househ2_UT_l_opd( m_ahead,
680  alpha11,
681  a21, rs_A,
682  tau11 );
684  m_ahead,
685  a21, rs_A,
686  u21p, inc_up );
687 
688  if ( n_ahead > 0 )
689  {
690  // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
691  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
692  bl1_ddiv3( buff_m1, tau11, &minus_inv_tau11 );
693 
694  // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
695  // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
697  n_ahead,
698  a12t, cs_A,
699  a12p, inc_ap );
701  n_ahead,
702  &minus_inv_tau11,
703  a12t, cs_A,
704  a12p, inc_ap );
705  }
706 
707  if ( m_behind > 0 && n_ahead > 0 )
708  {
709  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
710  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
711  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
712  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
713  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
715  n_ahead,
716  tau11,
717  buff_m1,
718  u21, inc_u,
719  y21, inc_y,
720  z21, inc_z,
721  v21, inc_v,
722  A22, rs_A, cs_A,
723  u21p, inc_up,
724  a12p, inc_ap,
725  w21, inc_w );
726 
727 
728  }
729  else if ( n_ahead > 0 )
730  {
731  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
732  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
733  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
735  n_ahead,
736  tau11,
737  buff_0,
738  A22, rs_A, cs_A,
739  u21p, inc_up,
740  a12p, inc_ap,
741  y21, inc_y,
742  w21, inc_w );
743  }
744 
745  if ( n_ahead > 0 )
746  {
747  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
749  n_ahead,
750  buff_1,
751  a12t, cs_A,
752  y21, inc_y );
753 
754  // FLA_Househ2s_UT( FLA_RIGHT,
755  // a12p_t,
756  // a12p_b,
757  // alpha12, psi11_minus_alpha12, sigma11 );
758  FLA_Househ2s_UT_r_opd( n_ahead - 1,
759  a12p_t,
760  a12p_b, inc_ap,
761  &alpha12,
762  &psi11_minus_alpha12,
763  sigma11 );
764 
765  // FLA_Copy( a12p, v21 );
766  // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
767  // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
768  // FLA_Conjugate( v21_b );
770  n_ahead,
771  a12p, inc_ap,
772  v21, inc_v );
773  bl1_dmult4( buff_m1, &alpha12, v21_t, v21_t );
775  n_ahead,
776  &psi11_minus_alpha12,
777  v21, inc_v );
778  bl1_dconjv( n_ahead - 1,
779  v21_b, inc_v );
780 
781  // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
782  // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
783  *a12t_l = alpha12;
785  n_ahead - 1,
786  v21_b, inc_v,
787  a12t_r, cs_A );
788  }
789 
790  // FLA_Copy( u21p, u21 );
792  m_ahead,
793  u21p, inc_up,
794  u21, inc_u );
795 
796  if ( n_ahead > 0 )
797  {
798  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
799  // FLA_Scal( FLA_MINUS_ONE, beta );
800  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
802  n_ahead,
803  y21, inc_y,
804  v21, inc_v,
805  &beta );
806  bl1_dscals( &minus_inv_tau11, &beta );
807 
808  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
809  // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
810  bl1_dcopyconj( &alpha12, &minus_conj_alpha12 );
811  bl1_dneg1( &minus_conj_alpha12 );
812 
813  // FLA_Copy( w21, z21 );
814  // FLA_Axpy( minus_conj_alpha12, A22_l, z21 );
815  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
816  // FLA_Axpy( beta, u21, z21 );
818  m_ahead,
819  w21, inc_w,
820  z21, inc_z );
822  m_ahead,
823  &minus_conj_alpha12,
824  A22_l, rs_A,
825  z21, inc_z );
827  m_ahead,
828  &psi11_minus_alpha12,
829  z21, inc_z );
831  m_ahead,
832  &beta,
833  u21, inc_u,
834  z21, inc_z );
835 
836  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
837  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
839  n_ahead,
840  tau11,
841  y21, inc_y );
843  m_ahead,
844  sigma11,
845  z21, inc_z );
846 
847  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
850  m_behind,
851  n_ahead,
852  buff_1,
853  A02, rs_A, cs_A,
854  v21, inc_v,
855  buff_0,
856  s01, rs_S );
857  }
858 
859  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
860  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
862  n_behind,
863  a10t, cs_A,
864  t01, rs_T );
867  m_ahead,
868  n_behind,
869  buff_1,
870  A20, rs_A, cs_A,
871  u21, inc_u,
872  buff_1,
873  t01, rs_T );
874 
875  if ( m_behind + 1 == b_alg && n_ahead > 0 )
876  {
877  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
878  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
881  m_ahead,
882  n_ahead,
883  buff_m1,
884  u21, inc_u,
885  y21, inc_y,
886  A22, rs_A, cs_A );
889  m_ahead,
890  n_ahead,
891  buff_m1,
892  z21, inc_z,
893  v21, inc_v,
894  A22, rs_A, cs_A );
895  }
896 
897  /*------------------------------------------------------------*/
898 
899  }
900 
901  // FLA_Obj_free( &w );
902  // FLA_Obj_free( &ap );
903  // FLA_Obj_free( &u );
904  // FLA_Obj_free( &up );
905  // FLA_Obj_free( &v );
906  // FLA_Obj_free( &y );
907  // FLA_Obj_free( &z );
908  FLA_free( buff_w );
909  FLA_free( buff_ap );
910  FLA_free( buff_u );
911  FLA_free( buff_up );
912  FLA_free( buff_v );
913  FLA_free( buff_y );
914  FLA_free( buff_z );
915 
916  return FLA_SUCCESS;
917 }
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opd_var1(int m_A, int n_A, double *buff_tau, double *buff_beta, double *buff_A, int rs_A, int cs_A, double *buff_u, int inc_u, double *buff_a, int inc_a, double *buff_y, int inc_y, double *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:207
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1(int m_A, int n_A, double *buff_tau, double *buff_alpha, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z, double *buff_v, int inc_v, double *buff_A, int rs_A, int cs_A, double *buff_up, int inc_up, double *buff_a, int inc_a, double *buff_w, int inc_w)
Definition: FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1.c:267
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition: FLA_Househ2_UT.c:274
FLA_Error FLA_Househ2s_UT_r_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *alpha, double *chi_1_minus_alpha, double *tau)
Definition: FLA_Househ2s_UT.c:572
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition: bl1_axpyv.c:21
void bl1_dconjv(int m, double *x, int incx)
Definition: bl1_conjv.c:18
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition: bl1_copyv.c:42
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition: bl1_dot.c:26
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_gemv.c:69
void bl1_dger(conj1_t conjx, conj1_t conjy, int m, int n, double *alpha, double *x, int incx, double *y, int incy, double *a, int a_rs, int a_cs)
Definition: bl1_ger.c:62
void bl1_dinvscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition: bl1_invscalv.c:26

References bl1_daxpyv(), bl1_dconjv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dger(), bl1_dinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_Househ2s_UT_r_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var3().

◆ FLA_Bidiag_UT_u_step_ofs_var3()

FLA_Error FLA_Bidiag_UT_u_step_ofs_var3 ( int  m_A,
int  n_A,
int  m_TS,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_T,
int  rs_T,
int  cs_T,
float *  buff_S,
int  rs_S,
int  cs_S 
)
120 {
121  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
122  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
123  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
124 
125  float alpha12;
126  float minus_conj_alpha12;
127  float psi11_minus_alpha12;
128  float minus_inv_tau11;
129  float minus_upsilon11;
130  float minus_conj_nu11;
131  float minus_conj_psi11;
132  float minus_zeta11;
133  float beta;
134  int i;
135 
136  // b_alg = FLA_Obj_length( T );
137  int b_alg = m_TS;
138 
139  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
140  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
141  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
142  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
143  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
144  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
145  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
146  float* buff_w = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
147  float* buff_ap = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
148  float* buff_u = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
149  float* buff_up = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
150  float* buff_v = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
151  float* buff_y = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
152  float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
153  int inc_w = 1;
154  int inc_ap = 1;
155  int inc_u = 1;
156  int inc_up = 1;
157  int inc_v = 1;
158  int inc_y = 1;
159  int inc_z = 1;
160 
161  for ( i = 0; i < b_alg; ++i )
162  {
163  float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
164  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
165  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
166  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
167  float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
168  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
169  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
170 
171  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
172  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
173 
174  float* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
175  float* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
176 
177  float* w21 = buff_w + (i+1)*inc_w;
178 
179  float* a12p = buff_ap + (i+1)*inc_ap;
180 
181  float* upsilon11 = buff_u + (i )*inc_u;
182  float* u21 = buff_u + (i+1)*inc_u;
183 
184  float* u21p = buff_up + (i+1)*inc_up;
185 
186  float* nu11 = buff_v + (i )*inc_v;
187  float* v21 = buff_v + (i+1)*inc_v;
188 
189  float* psi11 = buff_y + (i )*inc_y;
190  float* y21 = buff_y + (i+1)*inc_y;
191 
192  float* zeta11 = buff_z + (i )*inc_z;
193  float* z21 = buff_z + (i+1)*inc_z;
194 
195  float* a12p_t = a12p + (0 )*inc_ap;
196  float* a12p_b = a12p + (1 )*inc_ap;
197 
198  float* v21_t = v21 + (0 )*inc_v;
199  float* v21_b = v21 + (1 )*inc_v;
200 
201  float* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
202  float* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
203 
204  float* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
205 
206  int m_ahead = m_A - i - 1;
207  int n_ahead = n_A - i - 1;
208  int m_behind = i;
209  int n_behind = i;
210 
211  /*------------------------------------------------------------*/
212 
213  if ( m_behind > 0 )
214  {
215  // FLA_Copy( upsilon11, minus_upsilon11 );
216  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
217  bl1_smult3( buff_m1, upsilon11, &minus_upsilon11 );
218 
219  // FLA_Copy( zeta11, minus_zeta11 );
220  // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
221  bl1_smult3( buff_m1, zeta11, &minus_zeta11 );
222 
223  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, psi11, minus_conj_psi11 );
224  // FLA_Scal( FLA_MINUS_ONE, minus_conj_psi11 );
225  bl1_scopyconj( psi11, &minus_conj_psi11 );
226  bl1_sscals( buff_m1, &minus_conj_psi11 );
227 
228  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, nu11, minus_conj_nu11 );
229  // FLA_Scal( FLA_MINUS_ONE, minus_conj_nu11 );
230  bl1_scopyconj( nu11, &minus_conj_nu11 );
231  bl1_sscals( buff_m1, &minus_conj_nu11 );
232 
233  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_upsilon11, psi11, alpha11 );
234  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_zeta11, nu11, alpha11 );
236  1,
237  &minus_upsilon11,
238  psi11, 1,
239  alpha11, 1 );
241  1,
242  &minus_zeta11,
243  nu11, 1,
244  alpha11, 1 );
245 
246  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, u21, a21 );
247  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, z21, a21 );
249  m_ahead,
250  &minus_conj_psi11,
251  u21, inc_u,
252  a21, rs_A );
254  m_ahead,
255  &minus_conj_nu11,
256  z21, inc_z,
257  a21, rs_A );
258 
259  // FLA_Axpyt( FLA_TRANSPOSE, minus_upsilon11, y21, a12t );
260  // FLA_Axpyt( FLA_TRANSPOSE, minus_zeta11, v21, a12t );
262  n_ahead,
263  &minus_upsilon11,
264  y21, inc_y,
265  a12t, cs_A );
267  n_ahead,
268  &minus_zeta11,
269  v21, inc_v,
270  a12t, cs_A );
271  }
272 
273  // FLA_Househ2_UT( FLA_LEFT,
274  // alpha11,
275  // a21, tau11 );
276  // FLA_Copy( a21, u21p );
277  FLA_Househ2_UT_l_ops( m_ahead,
278  alpha11,
279  a21, rs_A,
280  tau11 );
282  m_ahead,
283  a21, rs_A,
284  u21p, inc_up );
285 
286  if ( n_ahead > 0 )
287  {
288  // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
289  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
290  bl1_sdiv3( buff_m1, tau11, &minus_inv_tau11 );
291 
292  // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
293  // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
295  n_ahead,
296  a12t, cs_A,
297  a12p, inc_ap );
299  n_ahead,
300  &minus_inv_tau11,
301  a12t, cs_A,
302  a12p, inc_ap );
303  }
304 
305  if ( m_behind > 0 && n_ahead > 0 )
306  {
307  // FLA_Ger( FLA_MINUS_ONE, u21, y21, A22 );
308  // FLA_Ger( FLA_MINUS_ONE, z21, v21, A22 );
309  // FLA_Gemvc( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
310  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
311  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
313  n_ahead,
314  tau11,
315  buff_m1,
316  u21, inc_u,
317  y21, inc_y,
318  z21, inc_z,
319  v21, inc_v,
320  A22, rs_A, cs_A,
321  u21p, inc_up,
322  a12p, inc_ap,
323  w21, inc_w );
324 
325 
326  }
327  else if ( n_ahead > 0 )
328  {
329  // FLA_Gemvc( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
330  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
331  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
333  n_ahead,
334  tau11,
335  buff_0,
336  A22, rs_A, cs_A,
337  u21p, inc_up,
338  a12p, inc_ap,
339  y21, inc_y,
340  w21, inc_w );
341  }
342 
343  if ( n_ahead > 0 )
344  {
345  // FLA_Axpyt( FLA_TRANSPOSE, FLA_ONE, a12t, y21 );
347  n_ahead,
348  buff_1,
349  a12t, cs_A,
350  y21, inc_y );
351 
352  // FLA_Househ2s_UT( FLA_RIGHT,
353  // a12p_t,
354  // a12p_b,
355  // alpha12, psi11_minus_alpha12, sigma11 );
356  FLA_Househ2s_UT_r_ops( n_ahead - 1,
357  a12p_t,
358  a12p_b, inc_ap,
359  &alpha12,
360  &psi11_minus_alpha12,
361  sigma11 );
362 
363  // FLA_Copy( a12p, v21 );
364  // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
365  // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
367  n_ahead,
368  a12p, inc_ap,
369  v21, inc_v );
370  bl1_smult4( buff_m1, &alpha12, v21_t, v21_t );
372  n_ahead,
373  &psi11_minus_alpha12,
374  v21, inc_v );
375 
376  // FLA_Copy( alpha12, a12t_l );
377  // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
378  *a12t_l = alpha12;
380  n_ahead - 1,
381  v21_b, inc_v,
382  a12t_r, cs_A );
383  }
384 
385  // FLA_Copy( u21p, u21 );
387  m_ahead,
388  u21p, inc_up,
389  u21, inc_u );
390 
391  if ( n_ahead > 0 )
392  {
393  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
394  // FLA_Scal( FLA_MINUS_ONE, beta );
395  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
397  n_ahead,
398  y21, inc_y,
399  v21, inc_v,
400  &beta );
401  bl1_sscals( &minus_inv_tau11, &beta );
402 
403  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
404  // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
405  bl1_scopyconj( &alpha12, &minus_conj_alpha12 );
406  bl1_sneg1( &minus_conj_alpha12 );
407 
408  // FLA_Copy( w21, z21 );
409  // FLA_Axpy( minus_conj_alpha12, A22_l, z21 );
410  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
411  // FLA_Axpy( beta, u21, z21 );
413  m_ahead,
414  w21, inc_w,
415  z21, inc_z );
417  m_ahead,
418  &minus_conj_alpha12,
419  A22_l, rs_A,
420  z21, inc_z );
422  m_ahead,
423  &psi11_minus_alpha12,
424  z21, inc_z );
426  m_ahead,
427  &beta,
428  u21, inc_u,
429  z21, inc_z );
430 
431  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
432  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
434  n_ahead,
435  tau11,
436  y21, inc_y );
438  m_ahead,
439  sigma11,
440  z21, inc_z );
441 
442  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
445  m_behind,
446  n_ahead,
447  buff_1,
448  A02, rs_A, cs_A,
449  v21, inc_v,
450  buff_0,
451  s01, rs_S );
452  }
453 
454  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
455  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
457  n_behind,
458  a10t, cs_A,
459  t01, rs_T );
462  m_ahead,
463  n_behind,
464  buff_1,
465  A20, rs_A, cs_A,
466  u21, inc_u,
467  buff_1,
468  t01, rs_T );
469 
470  if ( m_behind + 1 == b_alg && n_ahead > 0 )
471  {
472  // FLA_Ger( FLA_MINUS_ONE, u21, y21, A22 );
473  // FLA_Ger( FLA_MINUS_ONE, z21, v21, A22 );
476  m_ahead,
477  n_ahead,
478  buff_m1,
479  u21, inc_u,
480  y21, inc_y,
481  A22, rs_A, cs_A );
484  m_ahead,
485  n_ahead,
486  buff_m1,
487  z21, inc_z,
488  v21, inc_v,
489  A22, rs_A, cs_A );
490  }
491 
492  /*------------------------------------------------------------*/
493 
494  }
495 
496  // FLA_Obj_free( &w );
497  // FLA_Obj_free( &ap );
498  // FLA_Obj_free( &u );
499  // FLA_Obj_free( &up );
500  // FLA_Obj_free( &v );
501  // FLA_Obj_free( &y );
502  // FLA_Obj_free( &z );
503  FLA_free( buff_w );
504  FLA_free( buff_ap );
505  FLA_free( buff_u );
506  FLA_free( buff_up );
507  FLA_free( buff_v );
508  FLA_free( buff_y );
509  FLA_free( buff_z );
510 
511  return FLA_SUCCESS;
512 }
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_ops_var1(int m_A, int n_A, float *buff_tau, float *buff_alpha, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z, float *buff_v, int inc_v, float *buff_A, int rs_A, int cs_A, float *buff_up, int inc_up, float *buff_a, int inc_a, float *buff_w, int inc_w)
Definition: FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1.c:170
FLA_Error FLA_Fused_Ahx_Axpy_Ax_ops_var1(int m_A, int n_A, float *buff_tau, float *buff_beta, float *buff_A, int rs_A, int cs_A, float *buff_u, int inc_u, float *buff_a, int inc_a, float *buff_y, int inc_y, float *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:143
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition: FLA_Househ2_UT.c:160
FLA_Error FLA_Househ2s_UT_r_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *alpha, float *chi_1_minus_alpha, float *tau)
Definition: FLA_Househ2s_UT.c:555
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition: bl1_axpyv.c:13
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition: bl1_copyv.c:35
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition: bl1_dot.c:13
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_gemv.c:13
void bl1_sger(conj1_t conjx, conj1_t conjy, int m, int n, float *alpha, float *x, int incx, float *y, int incy, float *a, int a_rs, int a_cs)
Definition: bl1_ger.c:13
void bl1_sinvscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition: bl1_invscalv.c:13
@ BLIS1_NO_TRANSPOSE
Definition: blis_type_defs.h:54

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sger(), bl1_sinvscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_ops_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_Househ2s_UT_r_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var3().

◆ FLA_Bidiag_UT_u_step_ofu_var3()

FLA_Error FLA_Bidiag_UT_u_step_ofu_var3 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  S 
)
19 {
20  FLA_Datatype datatype;
21  int m_A, n_A, m_TS;
22  int rs_A, cs_A;
23  int rs_T, cs_T;
24  int rs_S, cs_S;
25 
26  datatype = FLA_Obj_datatype( A );
27 
28  m_A = FLA_Obj_length( A );
29  n_A = FLA_Obj_width( A );
30  m_TS = FLA_Obj_length( T );
31 
32  rs_A = FLA_Obj_row_stride( A );
33  cs_A = FLA_Obj_col_stride( A );
34 
35  rs_T = FLA_Obj_row_stride( T );
36  cs_T = FLA_Obj_col_stride( T );
37 
38  rs_S = FLA_Obj_row_stride( S );
39  cs_S = FLA_Obj_col_stride( S );
40 
41 
42  switch ( datatype )
43  {
44  case FLA_FLOAT:
45  {
46  float* buff_A = FLA_FLOAT_PTR( A );
47  float* buff_T = FLA_FLOAT_PTR( T );
48  float* buff_S = FLA_FLOAT_PTR( S );
49 
51  n_A,
52  m_TS,
53  buff_A, rs_A, cs_A,
54  buff_T, rs_T, cs_T,
55  buff_S, rs_S, cs_S );
56 
57  break;
58  }
59 
60  case FLA_DOUBLE:
61  {
62  double* buff_A = FLA_DOUBLE_PTR( A );
63  double* buff_T = FLA_DOUBLE_PTR( T );
64  double* buff_S = FLA_DOUBLE_PTR( S );
65 
67  n_A,
68  m_TS,
69  buff_A, rs_A, cs_A,
70  buff_T, rs_T, cs_T,
71  buff_S, rs_S, cs_S );
72 
73  break;
74  }
75 
76  case FLA_COMPLEX:
77  {
78  scomplex* buff_A = FLA_COMPLEX_PTR( A );
79  scomplex* buff_T = FLA_COMPLEX_PTR( T );
80  scomplex* buff_S = FLA_COMPLEX_PTR( S );
81 
83  n_A,
84  m_TS,
85  buff_A, rs_A, cs_A,
86  buff_T, rs_T, cs_T,
87  buff_S, rs_S, cs_S );
88 
89  break;
90  }
91 
92  case FLA_DOUBLE_COMPLEX:
93  {
94  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
95  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
96  dcomplex* buff_S = FLA_DOUBLE_COMPLEX_PTR( S );
97 
99  n_A,
100  m_TS,
101  buff_A, rs_A, cs_A,
102  buff_T, rs_T, cs_T,
103  buff_S, rs_S, cs_S );
104 
105  break;
106  }
107  }
108 
109  return FLA_SUCCESS;
110 }
FLA_Error FLA_Bidiag_UT_u_step_ofd_var3(int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_fus_var3.c:516
FLA_Error FLA_Bidiag_UT_u_step_ofz_var3(int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_fus_var3.c:1326
FLA_Error FLA_Bidiag_UT_u_step_ofs_var3(int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_fus_var3.c:114
FLA_Error FLA_Bidiag_UT_u_step_ofc_var3(int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_fus_var3.c:921
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
int FLA_Datatype
Definition: FLA_type_defs.h:49
Definition: blis_type_defs.h:138

References FLA_Bidiag_UT_u_step_ofc_var3(), FLA_Bidiag_UT_u_step_ofd_var3(), FLA_Bidiag_UT_u_step_ofs_var3(), FLA_Bidiag_UT_u_step_ofz_var3(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

Referenced by FLA_Bidiag_UT_u_blf_var3(), and FLA_Bidiag_UT_u_ofu_var3().

◆ FLA_Bidiag_UT_u_step_ofz_var3()

FLA_Error FLA_Bidiag_UT_u_step_ofz_var3 ( int  m_A,
int  n_A,
int  m_TS,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T,
dcomplex buff_S,
int  rs_S,
int  cs_S 
)
1332 {
1333  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
1334  dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
1335  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
1336 
1337  dcomplex alpha12;
1338  dcomplex minus_conj_alpha12;
1339  dcomplex psi11_minus_alpha12;
1340  dcomplex minus_inv_tau11;
1341  dcomplex minus_upsilon11;
1342  dcomplex minus_conj_nu11;
1343  dcomplex minus_conj_psi11;
1344  dcomplex minus_zeta11;
1345  dcomplex beta;
1346  int i;
1347 
1348  // b_alg = FLA_Obj_length( T );
1349  int b_alg = m_TS;
1350 
1351  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
1352  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &ap );
1353  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
1354  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &up );
1355  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
1356  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
1357  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
1358  dcomplex* buff_w = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1359  dcomplex* buff_ap = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1360  dcomplex* buff_u = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1361  dcomplex* buff_up = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1362  dcomplex* buff_v = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1363  dcomplex* buff_y = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
1364  dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1365  int inc_w = 1;
1366  int inc_ap = 1;
1367  int inc_u = 1;
1368  int inc_up = 1;
1369  int inc_v = 1;
1370  int inc_y = 1;
1371  int inc_z = 1;
1372 
1373  for ( i = 0; i < b_alg; ++i )
1374  {
1375  dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
1376  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1377  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1378  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1379  dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1380  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1381  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1382 
1383  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1384  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1385 
1386  dcomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
1387  dcomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
1388 
1389  dcomplex* w21 = buff_w + (i+1)*inc_w;
1390 
1391  dcomplex* a12p = buff_ap + (i+1)*inc_ap;
1392 
1393  dcomplex* upsilon11 = buff_u + (i )*inc_u;
1394  dcomplex* u21 = buff_u + (i+1)*inc_u;
1395 
1396  dcomplex* u21p = buff_up + (i+1)*inc_up;
1397 
1398  dcomplex* nu11 = buff_v + (i )*inc_v;
1399  dcomplex* v21 = buff_v + (i+1)*inc_v;
1400 
1401  dcomplex* psi11 = buff_y + (i )*inc_y;
1402  dcomplex* y21 = buff_y + (i+1)*inc_y;
1403 
1404  dcomplex* zeta11 = buff_z + (i )*inc_z;
1405  dcomplex* z21 = buff_z + (i+1)*inc_z;
1406 
1407  dcomplex* a12p_t = a12p + (0 )*inc_ap;
1408  dcomplex* a12p_b = a12p + (1 )*inc_ap;
1409 
1410  dcomplex* v21_t = v21 + (0 )*inc_v;
1411  dcomplex* v21_b = v21 + (1 )*inc_v;
1412 
1413  dcomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
1414  dcomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
1415 
1416  dcomplex* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
1417 
1418  int m_ahead = m_A - i - 1;
1419  int n_ahead = n_A - i - 1;
1420  int m_behind = i;
1421  int n_behind = i;
1422 
1423  /*------------------------------------------------------------*/
1424 
1425  if ( m_behind > 0 )
1426  {
1427  // FLA_Copy( upsilon11, minus_upsilon11 );
1428  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
1429  bl1_zmult3( buff_m1, upsilon11, &minus_upsilon11 );
1430 
1431  // FLA_Copy( zeta11, minus_zeta11 );
1432  // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
1433  bl1_zmult3( buff_m1, zeta11, &minus_zeta11 );
1434 
1435  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, psi11, minus_conj_psi11 );
1436  // FLA_Scal( FLA_MINUS_ONE, minus_conj_psi11 );
1437  bl1_zcopyconj( psi11, &minus_conj_psi11 );
1438  bl1_zscals( buff_m1, &minus_conj_psi11 );
1439 
1440  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, nu11, minus_conj_nu11 );
1441  // FLA_Scal( FLA_MINUS_ONE, minus_conj_nu11 );
1442  bl1_zcopyconj( nu11, &minus_conj_nu11 );
1443  bl1_zscals( buff_m1, &minus_conj_nu11 );
1444 
1445  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, upsilon11, alpha11 );
1446  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, zeta11, alpha11 );
1448  1,
1449  &minus_conj_psi11,
1450  upsilon11, 1,
1451  alpha11, 1 );
1453  1,
1454  &minus_conj_nu11,
1455  zeta11, 1,
1456  alpha11, 1 );
1457 
1458  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi11, u21, a21 );
1459  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_nu11, z21, a21 );
1461  m_ahead,
1462  &minus_conj_psi11,
1463  u21, inc_u,
1464  a21, rs_A );
1466  m_ahead,
1467  &minus_conj_nu11,
1468  z21, inc_z,
1469  a21, rs_A );
1470 
1471  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon11, y21, a12t );
1472  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta11, v21, a12t );
1474  n_ahead,
1475  &minus_upsilon11,
1476  y21, inc_y,
1477  a12t, cs_A );
1479  n_ahead,
1480  &minus_zeta11,
1481  v21, inc_v,
1482  a12t, cs_A );
1483  }
1484 
1485  // FLA_Househ2_UT( FLA_LEFT,
1486  // alpha11,
1487  // a21, tau11 );
1488  // FLA_Copy( a21, u21p );
1489  FLA_Househ2_UT_l_opz( m_ahead,
1490  alpha11,
1491  a21, rs_A,
1492  tau11 );
1494  m_ahead,
1495  a21, rs_A,
1496  u21p, inc_up );
1497 
1498  if ( n_ahead > 0 )
1499  {
1500  // FLA_Copy( FLA_MINUS_ONE, minus_inv_tau11 );
1501  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, minus_inv_tau11 );
1502  bl1_zdiv3( buff_m1, tau11, &minus_inv_tau11 );
1503 
1504  // FLA_Copyt( FLA_TRANSPOSE, a12t, a12p );
1505  // FLA_Axpyt( FLA_TRANSPOSE, minus_inv_tau11, a12t, a12p );
1507  n_ahead,
1508  a12t, cs_A,
1509  a12p, inc_ap );
1511  n_ahead,
1512  &minus_inv_tau11,
1513  a12t, cs_A,
1514  a12p, inc_ap );
1515  }
1516 
1517  if ( m_behind > 0 && n_ahead > 0 )
1518  {
1519  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
1520  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
1521  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
1522  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
1523  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
1525  n_ahead,
1526  tau11,
1527  buff_m1,
1528  u21, inc_u,
1529  y21, inc_y,
1530  z21, inc_z,
1531  v21, inc_v,
1532  A22, rs_A, cs_A,
1533  u21p, inc_up,
1534  a12p, inc_ap,
1535  w21, inc_w );
1536 
1537 
1538  }
1539  else if ( n_ahead > 0 )
1540  {
1541  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, u21p, FLA_ZERO, y21 );
1542  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_inv_tau11, y21, a12p );
1543  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a12p, FLA_ZERO, w21 );
1545  n_ahead,
1546  tau11,
1547  buff_0,
1548  A22, rs_A, cs_A,
1549  u21p, inc_up,
1550  a12p, inc_ap,
1551  y21, inc_y,
1552  w21, inc_w );
1553  }
1554 
1555  if ( n_ahead > 0 )
1556  {
1557  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_ONE, a12t, y21 );
1559  n_ahead,
1560  buff_1,
1561  a12t, cs_A,
1562  y21, inc_y );
1563 
1564  // FLA_Househ2s_UT( FLA_RIGHT,
1565  // a12p_t,
1566  // a12p_b,
1567  // alpha12, psi11_minus_alpha12, sigma11 );
1568  FLA_Househ2s_UT_r_opz( n_ahead - 1,
1569  a12p_t,
1570  a12p_b, inc_ap,
1571  &alpha12,
1572  &psi11_minus_alpha12,
1573  sigma11 );
1574 
1575  // FLA_Copy( a12p, v21 );
1576  // FLA_Mult_add( FLA_MINUS_ONE, alpha12, v21_t );
1577  // FLA_Inv_scalc( FLA_NO_CONJUGATE, psi11_minus_alpha12, v21 );
1578  // FLA_Conjugate( v21_b );
1580  n_ahead,
1581  a12p, inc_ap,
1582  v21, inc_v );
1583  bl1_zmult4( buff_m1, &alpha12, v21_t, v21_t );
1585  n_ahead,
1586  &psi11_minus_alpha12,
1587  v21, inc_v );
1588  bl1_zconjv( n_ahead - 1,
1589  v21_b, inc_v );
1590 
1591  // FLA_Copyt( FLA_NO_TRANSPOSE, alpha12, a12t_l );
1592  // FLA_Copyt( FLA_TRANSPOSE, v21_b, a12t_r );
1593  *a12t_l = alpha12;
1595  n_ahead - 1,
1596  v21_b, inc_v,
1597  a12t_r, cs_A );
1598  }
1599 
1600  // FLA_Copy( u21p, u21 );
1602  m_ahead,
1603  u21p, inc_up,
1604  u21, inc_u );
1605 
1606  if ( n_ahead > 0 )
1607  {
1608  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
1609  // FLA_Scal( FLA_MINUS_ONE, beta );
1610  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, beta );
1612  n_ahead,
1613  y21, inc_y,
1614  v21, inc_v,
1615  &beta );
1616  bl1_zscals( &minus_inv_tau11, &beta );
1617 
1618  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha12, minus_conj_alpha12 );
1619  // FLA_Scal( FLA_MINUS_ONE, minus_conj_alpha12 );
1620  bl1_zcopyconj( &alpha12, &minus_conj_alpha12 );
1621  bl1_zneg1( &minus_conj_alpha12 );
1622 
1623  // FLA_Copy( w21, z21 );
1624  // FLA_Axpy( minus_conj_alpha12, A22_l, z21 );
1625  // FLA_Inv_scalc( FLA_CONJUGATE, psi11_minus_alpha12, z21 );
1626  // FLA_Axpy( beta, u21, z21 );
1628  m_ahead,
1629  w21, inc_w,
1630  z21, inc_z );
1632  m_ahead,
1633  &minus_conj_alpha12,
1634  A22_l, rs_A,
1635  z21, inc_z );
1637  m_ahead,
1638  &psi11_minus_alpha12,
1639  z21, inc_z );
1641  m_ahead,
1642  &beta,
1643  u21, inc_u,
1644  z21, inc_z );
1645 
1646  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
1647  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
1649  n_ahead,
1650  tau11,
1651  y21, inc_y );
1653  m_ahead,
1654  sigma11,
1655  z21, inc_z );
1656 
1657  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
1660  m_behind,
1661  n_ahead,
1662  buff_1,
1663  A02, rs_A, cs_A,
1664  v21, inc_v,
1665  buff_0,
1666  s01, rs_S );
1667  }
1668 
1669  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a10t, t01 );
1670  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, u21, FLA_ONE, t01 );
1672  n_behind,
1673  a10t, cs_A,
1674  t01, rs_T );
1677  m_ahead,
1678  n_behind,
1679  buff_1,
1680  A20, rs_A, cs_A,
1681  u21, inc_u,
1682  buff_1,
1683  t01, rs_T );
1684 
1685  if ( m_behind + 1 == b_alg && n_ahead > 0 )
1686  {
1687  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u21, y21, A22 );
1688  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
1691  m_ahead,
1692  n_ahead,
1693  buff_m1,
1694  u21, inc_u,
1695  y21, inc_y,
1696  A22, rs_A, cs_A );
1699  m_ahead,
1700  n_ahead,
1701  buff_m1,
1702  z21, inc_z,
1703  v21, inc_v,
1704  A22, rs_A, cs_A );
1705  }
1706 
1707  /*------------------------------------------------------------*/
1708 
1709  }
1710 
1711  // FLA_Obj_free( &w );
1712  // FLA_Obj_free( &ap );
1713  // FLA_Obj_free( &u );
1714  // FLA_Obj_free( &up );
1715  // FLA_Obj_free( &v );
1716  // FLA_Obj_free( &y );
1717  // FLA_Obj_free( &z );
1718  FLA_free( buff_w );
1719  FLA_free( buff_ap );
1720  FLA_free( buff_u );
1721  FLA_free( buff_up );
1722  FLA_free( buff_v );
1723  FLA_free( buff_y );
1724  FLA_free( buff_z );
1725 
1726  return FLA_SUCCESS;
1727 }
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1(int m_A, int n_A, dcomplex *buff_tau, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z, dcomplex *buff_v, int inc_v, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_up, int inc_up, dcomplex *buff_a, int inc_a, dcomplex *buff_w, int inc_w)
Definition: FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1.c:523
FLA_Error FLA_Fused_Ahx_Axpy_Ax_opz_var1(int m_A, int n_A, dcomplex *buff_tau, dcomplex *buff_beta, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_u, int inc_u, dcomplex *buff_a, int inc_a, dcomplex *buff_y, int inc_y, dcomplex *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Axpy_Ax_opt_var1.c:390
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition: FLA_Househ2_UT.c:521
FLA_Error FLA_Househ2s_UT_r_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *alpha, dcomplex *chi_1_minus_alpha, dcomplex *tau)
Definition: FLA_Househ2s_UT.c:610
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_axpyv.c:60
void bl1_zconjv(int m, dcomplex *x, int incx)
Definition: bl1_conjv.c:34
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_copyv.c:63
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition: bl1_dot.c:65
bl1_zscals(beta, rho_yz)
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_gemv.c:255
void bl1_zger(conj1_t conjx, conj1_t conjy, int m, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_ger.c:194
void bl1_zinvscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition: bl1_invscalv.c:78

References bl1_zaxpyv(), bl1_zconjv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zger(), bl1_zinvscalv(), bl1_zscals(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_free(), FLA_Fused_Ahx_Axpy_Ax_opz_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_Househ2s_UT_r_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var3().