libflame  revision_anchor
Functions
FLA_Hess_UT_opt_var4.c File Reference

(r)

Functions

FLA_Error FLA_Hess_UT_opt_var4 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_opt_var4 (FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ops_var4 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opd_var4 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opc_var4 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opz_var4 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T)
 

Function Documentation

◆ FLA_Hess_UT_opt_var4()

FLA_Error FLA_Hess_UT_opt_var4 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  FLA_Error r_val;
16  FLA_Obj Y, Z;
17 
18  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &Y );
19  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &Z );
20 
21  r_val = FLA_Hess_UT_step_opt_var4( A, Y, Z, T );
22 
23  FLA_Obj_free( &Y );
24  FLA_Obj_free( &Z );
25 
26  return r_val;
27 }
FLA_Error FLA_Hess_UT_step_opt_var4(FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T)
Definition: FLA_Hess_UT_opt_var4.c:29
FLA_Error FLA_Obj_create_conf_to(FLA_Trans trans, FLA_Obj old, FLA_Obj *obj)
Definition: FLA_Obj.c:286
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition: FLA_Obj.c:588
int FLA_Error
Definition: FLA_type_defs.h:47
Definition: FLA_type_defs.h:159

References FLA_Hess_UT_step_opt_var4(), FLA_Obj_create_conf_to(), and FLA_Obj_free().

Referenced by FLA_Hess_UT_internal().

◆ FLA_Hess_UT_step_opc_var4()

FLA_Error FLA_Hess_UT_step_opc_var4 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_Y,
int  rs_Y,
int  cs_Y,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
858 {
859  scomplex* buff_2 = FLA_COMPLEX_PTR( FLA_TWO );
860  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
861  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
862  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
863 
864  scomplex first_elem, last_elem;
865  scomplex dot_product;
866  scomplex beta, conj_beta;
867  scomplex inv_tau11;
868  scomplex minus_inv_tau11;
869  int i;
870 
871  // b_alg = FLA_Obj_length( T );
872  int b_alg = m_T;
873 
874  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
875  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &e );
876  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
877  scomplex* buff_d = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
878  scomplex* buff_e = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
879  scomplex* buff_f = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
880  int inc_d = 1;
881  int inc_e = 1;
882  int inc_f = 1;
883 
884  // FLA_Set( FLA_ZERO, Y );
885  // FLA_Set( FLA_ZERO, Z );
886  bl1_csetm( m_A,
887  b_alg,
888  buff_0,
889  buff_Y, rs_Y, cs_Y );
890  bl1_csetm( m_A,
891  b_alg,
892  buff_0,
893  buff_Z, rs_Z, cs_Z );
894 
895  for ( i = 0; i < b_alg; ++i )
896  {
897  scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
898  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
899  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
900  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
901  scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
902  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
903  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
904 
905  scomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
906  scomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
907  scomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
908 
909  scomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
910  scomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
911  scomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
912 
913  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
914  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
915 
916  scomplex* d0 = buff_d + (0 )*inc_d;
917 
918  scomplex* e0 = buff_e + (0 )*inc_e;
919 
920  scomplex* f0 = buff_f + (0 )*inc_f;
921 
922  scomplex* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
923 
924  scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
925  scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
926 
927  scomplex* ABL = a10t;
928  scomplex* ZBL = z10t;
929 
930  scomplex* a2 = alpha11;
931 
932  int m_ahead = m_A - i - 1;
933  int n_ahead = m_A - i - 1;
934  int m_behind = i;
935  int n_behind = i;
936 
937  /*------------------------------------------------------------*/
938 
939  if ( m_behind > 0 )
940  {
941  // FLA_Copy( a10t_r, last_elem );
942  // FLA_Set( FLA_ONE, a10t_r );
943  last_elem = *a10t_r;
944  *a10t_r = *buff_1;
945  }
946 
947  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
948  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
951  m_ahead + 1,
952  n_behind,
953  buff_m1,
954  ABL, rs_A, cs_A,
955  y10t, cs_Y,
956  buff_1,
957  a2, rs_A );
960  m_ahead + 1,
961  n_behind,
962  buff_m1,
963  ZBL, rs_Z, cs_Z,
964  a10t, cs_A,
965  buff_1,
966  a2, rs_A );
967 
968  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
969  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, A20, z10t, FLA_ONE, a12t );
972  m_ahead,
973  n_behind,
974  buff_m1,
975  Y20, rs_Y, cs_Y,
976  a10t, cs_A,
977  buff_1,
978  a12t, cs_A );
981  m_ahead,
982  n_behind,
983  buff_m1,
984  A20, rs_A, cs_A,
985  z10t, cs_Z,
986  buff_1,
987  a12t, cs_A );
988 
989  if ( m_behind > 0 )
990  {
991  // FLA_Copy( last_elem, a10t_r );
992  *a10t_r = last_elem;
993  }
994 
995  if ( m_ahead > 0 )
996  {
997  // FLA_Househ2_UT( FLA_LEFT,
998  // a21_t,
999  // a21_b, tau11 );
1000  FLA_Househ2_UT_l_opc( m_ahead - 1,
1001  a21_t,
1002  a21_b, rs_A,
1003  tau11 );
1004 
1005  // FLA_Set( FLA_ONE, inv_tau11 );
1006  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
1007  // FLA_Copy( inv_tau11, minus_inv_tau11 );
1008  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
1009  bl1_cdiv3( buff_1, tau11, &inv_tau11 );
1010  bl1_cneg2( &inv_tau11, &minus_inv_tau11 );
1011 
1012  // FLA_Copy( a21_t, first_elem );
1013  // FLA_Set( FLA_ONE, a21_t );
1014  first_elem = *a21_t;
1015  *a21_t = *buff_1;
1016 
1017  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y21 );
1020  m_ahead,
1021  n_ahead,
1022  buff_1,
1023  A22, rs_A, cs_A,
1024  a21, rs_A,
1025  buff_0,
1026  y21, rs_Y );
1027 
1028  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z21 );
1031  m_ahead,
1032  n_ahead,
1033  buff_1,
1034  A22, rs_A, cs_A,
1035  a21, rs_A,
1036  buff_0,
1037  z21, rs_Z );
1038 
1039  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d0 );
1040  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, a21, FLA_ZERO, e0 );
1041  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f0 );
1044  m_ahead,
1045  n_behind,
1046  buff_1,
1047  A20, rs_A, cs_A,
1048  a21, rs_A,
1049  buff_0,
1050  d0, inc_d );
1053  m_ahead,
1054  n_behind,
1055  buff_1,
1056  Y20, rs_Y, cs_Y,
1057  a21, rs_A,
1058  buff_0,
1059  e0, inc_e );
1062  m_ahead,
1063  n_behind,
1064  buff_1,
1065  Z20, rs_Z, cs_Z,
1066  a21, rs_A,
1067  buff_0,
1068  f0, inc_f );
1069 
1070  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
1071  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, y21 );
1074  m_ahead,
1075  n_behind,
1076  buff_m1,
1077  Y20, rs_Y, cs_Y,
1078  d0, inc_d,
1079  buff_1,
1080  y21, rs_Y );
1083  m_ahead,
1084  n_behind,
1085  buff_m1,
1086  A20, rs_A, cs_A,
1087  f0, inc_f,
1088  buff_1,
1089  y21, rs_Y );
1090 
1091  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, e0, FLA_ONE, z21 );
1092  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d0, FLA_ONE, z21 );
1095  m_ahead,
1096  n_behind,
1097  buff_m1,
1098  A20, rs_A, cs_A,
1099  e0, inc_e,
1100  buff_1,
1101  z21, rs_Z );
1104  m_ahead,
1105  n_behind,
1106  buff_m1,
1107  Z20, rs_Z, cs_Z,
1108  d0, inc_d,
1109  buff_1,
1110  z21, rs_Z );
1111 
1112  // FLA_Copy( d0, t01 );
1114  n_behind,
1115  d0, inc_d,
1116  t01, rs_T );
1117 
1118  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
1119  // FLA_Inv_scal( FLA_TWO, beta );
1120  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
1122  m_ahead,
1123  a21, rs_A,
1124  z21, rs_Z,
1125  &beta );
1126  bl1_cinvscals( buff_2, &beta );
1127  bl1_ccopyconj( &beta, &conj_beta );
1128 
1129  // FLA_Scal( minus_inv_tau11, conj_beta );
1130  // FLA_Axpy( conj_beta, a21, y21 );
1131  // FLA_Scal( inv_tau11, y21 );
1132  bl1_cscals( &minus_inv_tau11, &conj_beta );
1134  m_ahead,
1135  &conj_beta,
1136  a21, rs_A,
1137  y21, rs_Y );
1139  m_ahead,
1140  &inv_tau11,
1141  y21, rs_Y );
1142 
1143  // FLA_Scal( minus_inv_tau11, beta );
1144  // FLA_Axpy( beta, a21, z21 );
1145  // FLA_Scal( inv_tau11, z21 );
1146  bl1_cscals( &minus_inv_tau11, &beta );
1148  m_ahead,
1149  &beta,
1150  a21, rs_A,
1151  z21, rs_Z );
1153  m_ahead,
1154  &inv_tau11,
1155  z21, rs_Z );
1156 
1157  // FLA_Dot( a12t, a21, dot_product );
1158  // FLA_Scal( minus_inv_tau11, dot_product );
1159  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
1161  m_ahead,
1162  a12t, cs_A,
1163  a21, rs_A,
1164  &dot_product );
1165  bl1_cscals( &minus_inv_tau11, &dot_product );
1167  m_ahead,
1168  &dot_product,
1169  a21, rs_A,
1170  a12t, cs_A );
1171 
1172  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, e0 );
1173  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, e0, a21, A02 );
1176  m_behind,
1177  n_ahead,
1178  buff_1,
1179  A02, rs_A, cs_A,
1180  a21, rs_A,
1181  buff_0,
1182  e0, inc_e );
1185  m_behind,
1186  n_ahead,
1187  &minus_inv_tau11,
1188  e0, inc_e,
1189  a21, rs_A,
1190  A02, rs_A, cs_A );
1191 
1192  // FLA_Copy( first_elem, a21_t );
1193  *a21_t = first_elem;
1194  }
1195 
1196  /*------------------------------------------------------------*/
1197 
1198  }
1199 
1200  // FLA_Obj_free( &d );
1201  // FLA_Obj_free( &e );
1202  // FLA_Obj_free( &f );
1203  FLA_free( buff_d );
1204  FLA_free( buff_e );
1205  FLA_free( buff_f );
1206 
1207  return FLA_SUCCESS;
1208 }
FLA_Obj FLA_TWO
Definition: FLA_Init.c:17
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition: FLA_Househ2_UT.c:390
int i
Definition: bl1_axmyv2.c:145
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpyv.c:29
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_copyv.c:49
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition: bl1_dot.c:39
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_gemv.c:125
void bl1_cger(conj1_t conjx, conj1_t conjy, int m, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *a, int a_rs, int a_cs)
Definition: bl1_ger.c:111
void bl1_cscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition: bl1_scalv.c:46
void bl1_csetm(int m, int n, scomplex *sigma, scomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:61
@ BLIS1_NO_TRANSPOSE
Definition: blis_type_defs.h:54
@ BLIS1_CONJ_TRANSPOSE
Definition: blis_type_defs.h:57
@ BLIS1_CONJ_NO_TRANSPOSE
Definition: blis_type_defs.h:56
@ BLIS1_CONJUGATE
Definition: blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition: blis_type_defs.h:81
Definition: blis_type_defs.h:133

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cger(), bl1_cscalv(), bl1_csetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var4().

◆ FLA_Hess_UT_step_opd_var4()

FLA_Error FLA_Hess_UT_step_opd_var4 ( int  m_A,
int  m_T,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_Y,
int  rs_Y,
int  cs_Y,
double *  buff_Z,
int  rs_Z,
int  cs_Z,
double *  buff_T,
int  rs_T,
int  cs_T 
)
498 {
499  double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
500  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
501  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
502  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
503 
504  double first_elem, last_elem;
505  double dot_product;
506  double beta, conj_beta;
507  double inv_tau11;
508  double minus_inv_tau11;
509  int i;
510 
511  // b_alg = FLA_Obj_length( T );
512  int b_alg = m_T;
513 
514  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
515  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &e );
516  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
517  double* buff_d = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
518  double* buff_e = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
519  double* buff_f = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
520  int inc_d = 1;
521  int inc_e = 1;
522  int inc_f = 1;
523 
524  // FLA_Set( FLA_ZERO, Y );
525  // FLA_Set( FLA_ZERO, Z );
526  bl1_dsetm( m_A,
527  b_alg,
528  buff_0,
529  buff_Y, rs_Y, cs_Y );
530  bl1_dsetm( m_A,
531  b_alg,
532  buff_0,
533  buff_Z, rs_Z, cs_Z );
534 
535  for ( i = 0; i < b_alg; ++i )
536  {
537  double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
538  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
539  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
540  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
541  double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
542  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
543  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
544 
545  double* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
546  double* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
547  double* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
548 
549  double* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
550  double* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
551  double* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
552 
553  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
554  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
555 
556  double* d0 = buff_d + (0 )*inc_d;
557 
558  double* e0 = buff_e + (0 )*inc_e;
559 
560  double* f0 = buff_f + (0 )*inc_f;
561 
562  double* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
563 
564  double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
565  double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
566 
567  double* ABL = a10t;
568  double* ZBL = z10t;
569 
570  double* a2 = alpha11;
571 
572  int m_ahead = m_A - i - 1;
573  int n_ahead = m_A - i - 1;
574  int m_behind = i;
575  int n_behind = i;
576 
577  /*------------------------------------------------------------*/
578 
579  if ( m_behind > 0 )
580  {
581  // FLA_Copy( a10t_r, last_elem );
582  // FLA_Set( FLA_ONE, a10t_r );
583  last_elem = *a10t_r;
584  *a10t_r = *buff_1;
585  }
586 
587  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
588  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
591  m_ahead + 1,
592  n_behind,
593  buff_m1,
594  ABL, rs_A, cs_A,
595  y10t, cs_Y,
596  buff_1,
597  a2, rs_A );
600  m_ahead + 1,
601  n_behind,
602  buff_m1,
603  ZBL, rs_Z, cs_Z,
604  a10t, cs_A,
605  buff_1,
606  a2, rs_A );
607 
608  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
609  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, A20, z10t, FLA_ONE, a12t );
612  m_ahead,
613  n_behind,
614  buff_m1,
615  Y20, rs_Y, cs_Y,
616  a10t, cs_A,
617  buff_1,
618  a12t, cs_A );
621  m_ahead,
622  n_behind,
623  buff_m1,
624  A20, rs_A, cs_A,
625  z10t, cs_Z,
626  buff_1,
627  a12t, cs_A );
628 
629  if ( m_behind > 0 )
630  {
631  // FLA_Copy( last_elem, a10t_r );
632  *a10t_r = last_elem;
633  }
634 
635  if ( m_ahead > 0 )
636  {
637  // FLA_Househ2_UT( FLA_LEFT,
638  // a21_t,
639  // a21_b, tau11 );
640  FLA_Househ2_UT_l_opd( m_ahead - 1,
641  a21_t,
642  a21_b, rs_A,
643  tau11 );
644 
645  // FLA_Set( FLA_ONE, inv_tau11 );
646  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
647  // FLA_Copy( inv_tau11, minus_inv_tau11 );
648  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
649  bl1_ddiv3( buff_1, tau11, &inv_tau11 );
650  bl1_dneg2( &inv_tau11, &minus_inv_tau11 );
651 
652  // FLA_Copy( a21_t, first_elem );
653  // FLA_Set( FLA_ONE, a21_t );
654  first_elem = *a21_t;
655  *a21_t = *buff_1;
656 
657  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y21 );
660  m_ahead,
661  n_ahead,
662  buff_1,
663  A22, rs_A, cs_A,
664  a21, rs_A,
665  buff_0,
666  y21, rs_Y );
667 
668  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z21 );
671  m_ahead,
672  n_ahead,
673  buff_1,
674  A22, rs_A, cs_A,
675  a21, rs_A,
676  buff_0,
677  z21, rs_Z );
678 
679  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d0 );
680  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, a21, FLA_ZERO, e0 );
681  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f0 );
684  m_ahead,
685  n_behind,
686  buff_1,
687  A20, rs_A, cs_A,
688  a21, rs_A,
689  buff_0,
690  d0, inc_d );
693  m_ahead,
694  n_behind,
695  buff_1,
696  Y20, rs_Y, cs_Y,
697  a21, rs_A,
698  buff_0,
699  e0, inc_e );
702  m_ahead,
703  n_behind,
704  buff_1,
705  Z20, rs_Z, cs_Z,
706  a21, rs_A,
707  buff_0,
708  f0, inc_f );
709 
710  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
711  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, y21 );
714  m_ahead,
715  n_behind,
716  buff_m1,
717  Y20, rs_Y, cs_Y,
718  d0, inc_d,
719  buff_1,
720  y21, rs_Y );
723  m_ahead,
724  n_behind,
725  buff_m1,
726  A20, rs_A, cs_A,
727  f0, inc_f,
728  buff_1,
729  y21, rs_Y );
730 
731  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, e0, FLA_ONE, z21 );
732  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d0, FLA_ONE, z21 );
735  m_ahead,
736  n_behind,
737  buff_m1,
738  A20, rs_A, cs_A,
739  e0, inc_e,
740  buff_1,
741  z21, rs_Z );
744  m_ahead,
745  n_behind,
746  buff_m1,
747  Z20, rs_Z, cs_Z,
748  d0, inc_d,
749  buff_1,
750  z21, rs_Z );
751 
752  // FLA_Copy( d0, t01 );
754  n_behind,
755  d0, inc_d,
756  t01, rs_T );
757 
758  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
759  // FLA_Inv_scal( FLA_TWO, beta );
760  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
762  m_ahead,
763  a21, rs_A,
764  z21, rs_Z,
765  &beta );
766  bl1_dinvscals( buff_2, &beta );
767  bl1_dcopyconj( &beta, &conj_beta );
768 
769  // FLA_Scal( minus_inv_tau11, conj_beta );
770  // FLA_Axpy( conj_beta, a21, y21 );
771  // FLA_Scal( inv_tau11, y21 );
772  bl1_dscals( &minus_inv_tau11, &conj_beta );
774  m_ahead,
775  &conj_beta,
776  a21, rs_A,
777  y21, rs_Y );
779  m_ahead,
780  &inv_tau11,
781  y21, rs_Y );
782 
783  // FLA_Scal( minus_inv_tau11, beta );
784  // FLA_Axpy( beta, a21, z21 );
785  // FLA_Scal( inv_tau11, z21 );
786  bl1_dscals( &minus_inv_tau11, &beta );
788  m_ahead,
789  &beta,
790  a21, rs_A,
791  z21, rs_Z );
793  m_ahead,
794  &inv_tau11,
795  z21, rs_Z );
796 
797  // FLA_Dot( a12t, a21, dot_product );
798  // FLA_Scal( minus_inv_tau11, dot_product );
799  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
801  m_ahead,
802  a12t, cs_A,
803  a21, rs_A,
804  &dot_product );
805  bl1_dscals( &minus_inv_tau11, &dot_product );
807  m_ahead,
808  &dot_product,
809  a21, rs_A,
810  a12t, cs_A );
811 
812  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, e0 );
813  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, e0, a21, A02 );
816  m_behind,
817  n_ahead,
818  buff_1,
819  A02, rs_A, cs_A,
820  a21, rs_A,
821  buff_0,
822  e0, inc_e );
825  m_behind,
826  n_ahead,
827  &minus_inv_tau11,
828  e0, inc_e,
829  a21, rs_A,
830  A02, rs_A, cs_A );
831 
832  // FLA_Copy( first_elem, a21_t );
833  *a21_t = first_elem;
834  }
835 
836  /*------------------------------------------------------------*/
837 
838  }
839 
840  // FLA_Obj_free( &d );
841  // FLA_Obj_free( &e );
842  // FLA_Obj_free( &f );
843  FLA_free( buff_d );
844  FLA_free( buff_e );
845  FLA_free( buff_f );
846 
847  return FLA_SUCCESS;
848 }
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition: FLA_Househ2_UT.c:274
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition: bl1_axpyv.c:21
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition: bl1_copyv.c:42
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition: bl1_dot.c:26
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_gemv.c:69
void bl1_dger(conj1_t conjx, conj1_t conjy, int m, int n, double *alpha, double *x, int incx, double *y, int incy, double *a, int a_rs, int a_cs)
Definition: bl1_ger.c:62
void bl1_dscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition: bl1_scalv.c:24
void bl1_dsetm(int m, int n, double *sigma, double *a, int a_rs, int a_cs)
Definition: bl1_setm.c:45

References bl1_daxpyv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dger(), bl1_dscalv(), bl1_dsetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var4().

◆ FLA_Hess_UT_step_ops_var4()

FLA_Error FLA_Hess_UT_step_ops_var4 ( int  m_A,
int  m_T,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_Y,
int  rs_Y,
int  cs_Y,
float *  buff_Z,
int  rs_Z,
int  cs_Z,
float *  buff_T,
int  rs_T,
int  cs_T 
)
138 {
139  float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
140  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
141  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
142  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
143 
144  float first_elem, last_elem;
145  float dot_product;
146  float beta, conj_beta;
147  float inv_tau11;
148  float minus_inv_tau11;
149  int i;
150 
151  // b_alg = FLA_Obj_length( T );
152  int b_alg = m_T;
153 
154  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
155  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &e );
156  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
157  float* buff_d = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
158  float* buff_e = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
159  float* buff_f = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
160  int inc_d = 1;
161  int inc_e = 1;
162  int inc_f = 1;
163 
164  // FLA_Set( FLA_ZERO, Y );
165  // FLA_Set( FLA_ZERO, Z );
166  bl1_ssetm( m_A,
167  b_alg,
168  buff_0,
169  buff_Y, rs_Y, cs_Y );
170  bl1_ssetm( m_A,
171  b_alg,
172  buff_0,
173  buff_Z, rs_Z, cs_Z );
174 
175  for ( i = 0; i < b_alg; ++i )
176  {
177  float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
178  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
179  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
180  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
181  float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
182  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
183  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
184 
185  float* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
186  float* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
187  float* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
188 
189  float* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
190  float* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
191  float* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
192 
193  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
194  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
195 
196  float* d0 = buff_d + (0 )*inc_d;
197 
198  float* e0 = buff_e + (0 )*inc_e;
199 
200  float* f0 = buff_f + (0 )*inc_f;
201 
202  float* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
203 
204  float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
205  float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
206 
207  float* ABL = a10t;
208  float* ZBL = z10t;
209 
210  float* a2 = alpha11;
211 
212  int m_ahead = m_A - i - 1;
213  int n_ahead = m_A - i - 1;
214  int m_behind = i;
215  int n_behind = i;
216 
217  /*------------------------------------------------------------*/
218 
219  if ( m_behind > 0 )
220  {
221  // FLA_Copy( a10t_r, last_elem );
222  // FLA_Set( FLA_ONE, a10t_r );
223  last_elem = *a10t_r;
224  *a10t_r = *buff_1;
225  }
226 
227  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
228  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
231  m_ahead + 1,
232  n_behind,
233  buff_m1,
234  ABL, rs_A, cs_A,
235  y10t, cs_Y,
236  buff_1,
237  a2, rs_A );
240  m_ahead + 1,
241  n_behind,
242  buff_m1,
243  ZBL, rs_Z, cs_Z,
244  a10t, cs_A,
245  buff_1,
246  a2, rs_A );
247 
248  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
249  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, A20, z10t, FLA_ONE, a12t );
252  m_ahead,
253  n_behind,
254  buff_m1,
255  Y20, rs_Y, cs_Y,
256  a10t, cs_A,
257  buff_1,
258  a12t, cs_A );
261  m_ahead,
262  n_behind,
263  buff_m1,
264  A20, rs_A, cs_A,
265  z10t, cs_Z,
266  buff_1,
267  a12t, cs_A );
268 
269  if ( m_behind > 0 )
270  {
271  // FLA_Copy( last_elem, a10t_r );
272  *a10t_r = last_elem;
273  }
274 
275  if ( m_ahead > 0 )
276  {
277  // FLA_Househ2_UT( FLA_LEFT,
278  // a21_t,
279  // a21_b, tau11 );
280  FLA_Househ2_UT_l_ops( m_ahead - 1,
281  a21_t,
282  a21_b, rs_A,
283  tau11 );
284 
285  // FLA_Set( FLA_ONE, inv_tau11 );
286  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
287  // FLA_Copy( inv_tau11, minus_inv_tau11 );
288  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
289  bl1_sdiv3( buff_1, tau11, &inv_tau11 );
290  bl1_sneg2( &inv_tau11, &minus_inv_tau11 );
291 
292  // FLA_Copy( a21_t, first_elem );
293  // FLA_Set( FLA_ONE, a21_t );
294  first_elem = *a21_t;
295  *a21_t = *buff_1;
296 
297  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y21 );
300  m_ahead,
301  n_ahead,
302  buff_1,
303  A22, rs_A, cs_A,
304  a21, rs_A,
305  buff_0,
306  y21, rs_Y );
307 
308  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z21 );
311  m_ahead,
312  n_ahead,
313  buff_1,
314  A22, rs_A, cs_A,
315  a21, rs_A,
316  buff_0,
317  z21, rs_Z );
318 
319  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d0 );
320  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, a21, FLA_ZERO, e0 );
321  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f0 );
324  m_ahead,
325  n_behind,
326  buff_1,
327  A20, rs_A, cs_A,
328  a21, rs_A,
329  buff_0,
330  d0, inc_d );
333  m_ahead,
334  n_behind,
335  buff_1,
336  Y20, rs_Y, cs_Y,
337  a21, rs_A,
338  buff_0,
339  e0, inc_e );
342  m_ahead,
343  n_behind,
344  buff_1,
345  Z20, rs_Z, cs_Z,
346  a21, rs_A,
347  buff_0,
348  f0, inc_f );
349 
350  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
351  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, y21 );
354  m_ahead,
355  n_behind,
356  buff_m1,
357  Y20, rs_Y, cs_Y,
358  d0, inc_d,
359  buff_1,
360  y21, rs_Y );
363  m_ahead,
364  n_behind,
365  buff_m1,
366  A20, rs_A, cs_A,
367  f0, inc_f,
368  buff_1,
369  y21, rs_Y );
370 
371  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, e0, FLA_ONE, z21 );
372  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d0, FLA_ONE, z21 );
375  m_ahead,
376  n_behind,
377  buff_m1,
378  A20, rs_A, cs_A,
379  e0, inc_e,
380  buff_1,
381  z21, rs_Z );
384  m_ahead,
385  n_behind,
386  buff_m1,
387  Z20, rs_Z, cs_Z,
388  d0, inc_d,
389  buff_1,
390  z21, rs_Z );
391 
392  // FLA_Copy( d0, t01 );
394  n_behind,
395  d0, inc_d,
396  t01, rs_T );
397 
398  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
399  // FLA_Inv_scal( FLA_TWO, beta );
400  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
402  m_ahead,
403  a21, rs_A,
404  z21, rs_Z,
405  &beta );
406  bl1_sinvscals( buff_2, &beta );
407  bl1_scopyconj( &beta, &conj_beta );
408 
409  // FLA_Scal( minus_inv_tau11, conj_beta );
410  // FLA_Axpy( conj_beta, a21, y21 );
411  // FLA_Scal( inv_tau11, y21 );
412  bl1_sscals( &minus_inv_tau11, &conj_beta );
414  m_ahead,
415  &conj_beta,
416  a21, rs_A,
417  y21, rs_Y );
419  m_ahead,
420  &inv_tau11,
421  y21, rs_Y );
422 
423  // FLA_Scal( minus_inv_tau11, beta );
424  // FLA_Axpy( beta, a21, z21 );
425  // FLA_Scal( inv_tau11, z21 );
426  bl1_sscals( &minus_inv_tau11, &beta );
428  m_ahead,
429  &beta,
430  a21, rs_A,
431  z21, rs_Z );
433  m_ahead,
434  &inv_tau11,
435  z21, rs_Z );
436 
437  // FLA_Dot( a12t, a21, dot_product );
438  // FLA_Scal( minus_inv_tau11, dot_product );
439  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
441  m_ahead,
442  a12t, cs_A,
443  a21, rs_A,
444  &dot_product );
445  bl1_sscals( &minus_inv_tau11, &dot_product );
447  m_ahead,
448  &dot_product,
449  a21, rs_A,
450  a12t, cs_A );
451 
452  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, e0 );
453  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, e0, a21, A02 );
456  m_behind,
457  n_ahead,
458  buff_1,
459  A02, rs_A, cs_A,
460  a21, rs_A,
461  buff_0,
462  e0, inc_e );
465  m_behind,
466  n_ahead,
467  &minus_inv_tau11,
468  e0, inc_e,
469  a21, rs_A,
470  A02, rs_A, cs_A );
471 
472  // FLA_Copy( first_elem, a21_t );
473  *a21_t = first_elem;
474  }
475 
476  /*------------------------------------------------------------*/
477 
478  }
479 
480  // FLA_Obj_free( &d );
481  // FLA_Obj_free( &e );
482  // FLA_Obj_free( &f );
483  FLA_free( buff_d );
484  FLA_free( buff_e );
485  FLA_free( buff_f );
486 
487  return FLA_SUCCESS;
488 }
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition: FLA_Househ2_UT.c:160
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition: bl1_axpyv.c:13
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition: bl1_copyv.c:35
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition: bl1_dot.c:13
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_gemv.c:13
void bl1_sger(conj1_t conjx, conj1_t conjy, int m, int n, float *alpha, float *x, int incx, float *y, int incy, float *a, int a_rs, int a_cs)
Definition: bl1_ger.c:13
void bl1_sscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition: bl1_scalv.c:13
void bl1_ssetm(int m, int n, float *sigma, float *a, int a_rs, int a_cs)
Definition: bl1_setm.c:29

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sger(), bl1_sscalv(), bl1_ssetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var4().

◆ FLA_Hess_UT_step_opt_var4()

FLA_Error FLA_Hess_UT_step_opt_var4 ( FLA_Obj  A,
FLA_Obj  Y,
FLA_Obj  Z,
FLA_Obj  T 
)
30 {
31  FLA_Datatype datatype;
32  int m_A, m_T;
33  int rs_A, cs_A;
34  int rs_Y, cs_Y;
35  int rs_Z, cs_Z;
36  int rs_T, cs_T;
37 
38  datatype = FLA_Obj_datatype( A );
39 
40  m_A = FLA_Obj_length( A );
41  m_T = FLA_Obj_length( T );
42 
43  rs_A = FLA_Obj_row_stride( A );
44  cs_A = FLA_Obj_col_stride( A );
45 
46  rs_Y = FLA_Obj_row_stride( Y );
47  cs_Y = FLA_Obj_col_stride( Y );
48 
49  rs_Z = FLA_Obj_row_stride( Z );
50  cs_Z = FLA_Obj_col_stride( Z );
51 
52  rs_T = FLA_Obj_row_stride( T );
53  cs_T = FLA_Obj_col_stride( T );
54 
55 
56  switch ( datatype )
57  {
58  case FLA_FLOAT:
59  {
60  float* buff_A = FLA_FLOAT_PTR( A );
61  float* buff_Y = FLA_FLOAT_PTR( Y );
62  float* buff_Z = FLA_FLOAT_PTR( Z );
63  float* buff_T = FLA_FLOAT_PTR( T );
64 
66  m_T,
67  buff_A, rs_A, cs_A,
68  buff_Y, rs_Y, cs_Y,
69  buff_Z, rs_Z, cs_Z,
70  buff_T, rs_T, cs_T );
71 
72  break;
73  }
74 
75  case FLA_DOUBLE:
76  {
77  double* buff_A = FLA_DOUBLE_PTR( A );
78  double* buff_Y = FLA_DOUBLE_PTR( Y );
79  double* buff_Z = FLA_DOUBLE_PTR( Z );
80  double* buff_T = FLA_DOUBLE_PTR( T );
81 
83  m_T,
84  buff_A, rs_A, cs_A,
85  buff_Y, rs_Y, cs_Y,
86  buff_Z, rs_Z, cs_Z,
87  buff_T, rs_T, cs_T );
88 
89  break;
90  }
91 
92  case FLA_COMPLEX:
93  {
94  scomplex* buff_A = FLA_COMPLEX_PTR( A );
95  scomplex* buff_Y = FLA_COMPLEX_PTR( Y );
96  scomplex* buff_Z = FLA_COMPLEX_PTR( Z );
97  scomplex* buff_T = FLA_COMPLEX_PTR( T );
98 
100  m_T,
101  buff_A, rs_A, cs_A,
102  buff_Y, rs_Y, cs_Y,
103  buff_Z, rs_Z, cs_Z,
104  buff_T, rs_T, cs_T );
105 
106  break;
107  }
108 
109  case FLA_DOUBLE_COMPLEX:
110  {
111  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
112  dcomplex* buff_Y = FLA_DOUBLE_COMPLEX_PTR( Y );
113  dcomplex* buff_Z = FLA_DOUBLE_COMPLEX_PTR( Z );
114  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
115 
117  m_T,
118  buff_A, rs_A, cs_A,
119  buff_Y, rs_Y, cs_Y,
120  buff_Z, rs_Z, cs_Z,
121  buff_T, rs_T, cs_T );
122 
123  break;
124  }
125  }
126 
127  return FLA_SUCCESS;
128 }
FLA_Error FLA_Hess_UT_step_opc_var4(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_opt_var4.c:852
FLA_Error FLA_Hess_UT_step_opz_var4(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_opt_var4.c:1212
FLA_Error FLA_Hess_UT_step_ops_var4(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_opt_var4.c:132
FLA_Error FLA_Hess_UT_step_opd_var4(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_opt_var4.c:492
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
int FLA_Datatype
Definition: FLA_type_defs.h:49
Definition: blis_type_defs.h:138

References FLA_Hess_UT_step_opc_var4(), FLA_Hess_UT_step_opd_var4(), FLA_Hess_UT_step_ops_var4(), FLA_Hess_UT_step_opz_var4(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), and FLA_Obj_row_stride().

Referenced by FLA_Hess_UT_blk_var4(), and FLA_Hess_UT_opt_var4().

◆ FLA_Hess_UT_step_opz_var4()

FLA_Error FLA_Hess_UT_step_opz_var4 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_Y,
int  rs_Y,
int  cs_Y,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
1218 {
1219  dcomplex* buff_2 = FLA_DOUBLE_COMPLEX_PTR( FLA_TWO );
1220  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
1221  dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
1222  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
1223 
1224  dcomplex first_elem, last_elem;
1225  dcomplex dot_product;
1226  dcomplex beta, conj_beta;
1227  dcomplex inv_tau11;
1228  dcomplex minus_inv_tau11;
1229  int i;
1230 
1231  // b_alg = FLA_Obj_length( T );
1232  int b_alg = m_T;
1233 
1234  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
1235  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &e );
1236  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
1237  dcomplex* buff_d = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1238  dcomplex* buff_e = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1239  dcomplex* buff_f = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1240  int inc_d = 1;
1241  int inc_e = 1;
1242  int inc_f = 1;
1243 
1244  // FLA_Set( FLA_ZERO, Y );
1245  // FLA_Set( FLA_ZERO, Z );
1246  bl1_zsetm( m_A,
1247  b_alg,
1248  buff_0,
1249  buff_Y, rs_Y, cs_Y );
1250  bl1_zsetm( m_A,
1251  b_alg,
1252  buff_0,
1253  buff_Z, rs_Z, cs_Z );
1254 
1255  for ( i = 0; i < b_alg; ++i )
1256  {
1257  dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
1258  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1259  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1260  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1261  dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1262  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1263  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1264 
1265  dcomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
1266  dcomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
1267  dcomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
1268 
1269  dcomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
1270  dcomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
1271  dcomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
1272 
1273  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1274  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1275 
1276  dcomplex* d0 = buff_d + (0 )*inc_d;
1277 
1278  dcomplex* e0 = buff_e + (0 )*inc_e;
1279 
1280  dcomplex* f0 = buff_f + (0 )*inc_f;
1281 
1282  dcomplex* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
1283 
1284  dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
1285  dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
1286 
1287  dcomplex* ABL = a10t;
1288  dcomplex* ZBL = z10t;
1289 
1290  dcomplex* a2 = alpha11;
1291 
1292  int m_ahead = m_A - i - 1;
1293  int n_ahead = m_A - i - 1;
1294  int m_behind = i;
1295  int n_behind = i;
1296 
1297  /*------------------------------------------------------------*/
1298 
1299  if ( m_behind > 0 )
1300  {
1301  // FLA_Copy( a10t_r, last_elem );
1302  // FLA_Set( FLA_ONE, a10t_r );
1303  last_elem = *a10t_r;
1304  *a10t_r = *buff_1;
1305  }
1306 
1307  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
1308  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
1311  m_ahead + 1,
1312  n_behind,
1313  buff_m1,
1314  ABL, rs_A, cs_A,
1315  y10t, cs_Y,
1316  buff_1,
1317  a2, rs_A );
1320  m_ahead + 1,
1321  n_behind,
1322  buff_m1,
1323  ZBL, rs_Z, cs_Z,
1324  a10t, cs_A,
1325  buff_1,
1326  a2, rs_A );
1327 
1328  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
1329  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, A20, z10t, FLA_ONE, a12t );
1332  m_ahead,
1333  n_behind,
1334  buff_m1,
1335  Y20, rs_Y, cs_Y,
1336  a10t, cs_A,
1337  buff_1,
1338  a12t, cs_A );
1341  m_ahead,
1342  n_behind,
1343  buff_m1,
1344  A20, rs_A, cs_A,
1345  z10t, cs_Z,
1346  buff_1,
1347  a12t, cs_A );
1348 
1349  if ( m_behind > 0 )
1350  {
1351  // FLA_Copy( last_elem, a10t_r );
1352  *a10t_r = last_elem;
1353  }
1354 
1355  if ( m_ahead > 0 )
1356  {
1357  // FLA_Househ2_UT( FLA_LEFT,
1358  // a21_t,
1359  // a21_b, tau11 );
1360  FLA_Househ2_UT_l_opz( m_ahead - 1,
1361  a21_t,
1362  a21_b, rs_A,
1363  tau11 );
1364 
1365  // FLA_Set( FLA_ONE, inv_tau11 );
1366  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
1367  // FLA_Copy( inv_tau11, minus_inv_tau11 );
1368  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
1369  bl1_zdiv3( buff_1, tau11, &inv_tau11 );
1370  bl1_zneg2( &inv_tau11, &minus_inv_tau11 );
1371 
1372  // FLA_Copy( a21_t, first_elem );
1373  // FLA_Set( FLA_ONE, a21_t );
1374  first_elem = *a21_t;
1375  *a21_t = *buff_1;
1376 
1377  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y21 );
1380  m_ahead,
1381  n_ahead,
1382  buff_1,
1383  A22, rs_A, cs_A,
1384  a21, rs_A,
1385  buff_0,
1386  y21, rs_Y );
1387 
1388  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z21 );
1391  m_ahead,
1392  n_ahead,
1393  buff_1,
1394  A22, rs_A, cs_A,
1395  a21, rs_A,
1396  buff_0,
1397  z21, rs_Z );
1398 
1399  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d0 );
1400  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, a21, FLA_ZERO, e0 );
1401  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f0 );
1404  m_ahead,
1405  n_behind,
1406  buff_1,
1407  A20, rs_A, cs_A,
1408  a21, rs_A,
1409  buff_0,
1410  d0, inc_d );
1413  m_ahead,
1414  n_behind,
1415  buff_1,
1416  Y20, rs_Y, cs_Y,
1417  a21, rs_A,
1418  buff_0,
1419  e0, inc_e );
1422  m_ahead,
1423  n_behind,
1424  buff_1,
1425  Z20, rs_Z, cs_Z,
1426  a21, rs_A,
1427  buff_0,
1428  f0, inc_f );
1429 
1430  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
1431  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, y21 );
1434  m_ahead,
1435  n_behind,
1436  buff_m1,
1437  Y20, rs_Y, cs_Y,
1438  d0, inc_d,
1439  buff_1,
1440  y21, rs_Y );
1443  m_ahead,
1444  n_behind,
1445  buff_m1,
1446  A20, rs_A, cs_A,
1447  f0, inc_f,
1448  buff_1,
1449  y21, rs_Y );
1450 
1451  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, e0, FLA_ONE, z21 );
1452  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d0, FLA_ONE, z21 );
1455  m_ahead,
1456  n_behind,
1457  buff_m1,
1458  A20, rs_A, cs_A,
1459  e0, inc_e,
1460  buff_1,
1461  z21, rs_Z );
1464  m_ahead,
1465  n_behind,
1466  buff_m1,
1467  Z20, rs_Z, cs_Z,
1468  d0, inc_d,
1469  buff_1,
1470  z21, rs_Z );
1471 
1472  // FLA_Copy( d0, t01 );
1474  n_behind,
1475  d0, inc_d,
1476  t01, rs_T );
1477 
1478  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
1479  // FLA_Inv_scal( FLA_TWO, beta );
1480  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
1482  m_ahead,
1483  a21, rs_A,
1484  z21, rs_Z,
1485  &beta );
1486  bl1_zinvscals( buff_2, &beta );
1487  bl1_zcopyconj( &beta, &conj_beta );
1488 
1489  // FLA_Scal( minus_inv_tau11, conj_beta );
1490  // FLA_Axpy( conj_beta, a21, y21 );
1491  // FLA_Scal( inv_tau11, y21 );
1492  bl1_zscals( &minus_inv_tau11, &conj_beta );
1494  m_ahead,
1495  &conj_beta,
1496  a21, rs_A,
1497  y21, rs_Y );
1499  m_ahead,
1500  &inv_tau11,
1501  y21, rs_Y );
1502 
1503  // FLA_Scal( minus_inv_tau11, beta );
1504  // FLA_Axpy( beta, a21, z21 );
1505  // FLA_Scal( inv_tau11, z21 );
1506  bl1_zscals( &minus_inv_tau11, &beta );
1508  m_ahead,
1509  &beta,
1510  a21, rs_A,
1511  z21, rs_Z );
1513  m_ahead,
1514  &inv_tau11,
1515  z21, rs_Z );
1516 
1517  // FLA_Dot( a12t, a21, dot_product );
1518  // FLA_Scal( minus_inv_tau11, dot_product );
1519  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
1521  m_ahead,
1522  a12t, cs_A,
1523  a21, rs_A,
1524  &dot_product );
1525  bl1_zscals( &minus_inv_tau11, &dot_product );
1527  m_ahead,
1528  &dot_product,
1529  a21, rs_A,
1530  a12t, cs_A );
1531 
1532  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, e0 );
1533  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, e0, a21, A02 );
1536  m_behind,
1537  n_ahead,
1538  buff_1,
1539  A02, rs_A, cs_A,
1540  a21, rs_A,
1541  buff_0,
1542  e0, inc_e );
1545  m_behind,
1546  n_ahead,
1547  &minus_inv_tau11,
1548  e0, inc_e,
1549  a21, rs_A,
1550  A02, rs_A, cs_A );
1551 
1552  // FLA_Copy( first_elem, a21_t );
1553  *a21_t = first_elem;
1554  }
1555 
1556  /*------------------------------------------------------------*/
1557 
1558  }
1559 
1560  // FLA_Obj_free( &d );
1561  // FLA_Obj_free( &e );
1562  // FLA_Obj_free( &f );
1563  FLA_free( buff_d );
1564  FLA_free( buff_e );
1565  FLA_free( buff_f );
1566 
1567  return FLA_SUCCESS;
1568 }
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition: FLA_Househ2_UT.c:521
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_axpyv.c:60
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_copyv.c:63
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition: bl1_dot.c:65
bl1_zscals(beta, rho_yz)
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_gemv.c:255
void bl1_zger(conj1_t conjx, conj1_t conjy, int m, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_ger.c:194
void bl1_zscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition: bl1_scalv.c:72
void bl1_zsetm(int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:78

References bl1_zaxpyv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zger(), bl1_zscals(), bl1_zscalv(), bl1_zsetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var4().