libflame  revision_anchor
Functions
FLA_Hess_UT_opt_var3.c File Reference

(r)

Functions

FLA_Error FLA_Hess_UT_opt_var3 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_opt_var3 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ops_var3 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opd_var3 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opc_var3 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opz_var3 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
 

Function Documentation

◆ FLA_Hess_UT_opt_var3()

FLA_Error FLA_Hess_UT_opt_var3 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  return FLA_Hess_UT_step_opt_var3( A, T );
16 }
FLA_Error FLA_Hess_UT_step_opt_var3(FLA_Obj A, FLA_Obj T)
Definition: FLA_Hess_UT_opt_var3.c:18

References FLA_Hess_UT_step_opt_var3().

Referenced by FLA_Hess_UT_internal().

◆ FLA_Hess_UT_step_opc_var3()

FLA_Error FLA_Hess_UT_step_opc_var3 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
807 {
808  scomplex* buff_2 = FLA_COMPLEX_PTR( FLA_TWO );
809  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
810  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
811  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
812 
813  scomplex first_elem;
814  scomplex dot_product;
815  scomplex beta, conj_beta;
816  scomplex inv_tau11;
817  scomplex minus_inv_tau11;
818  scomplex minus_upsilon1, minus_conj_upsilon1;
819  scomplex minus_psi1, minus_conj_psi1;
820  scomplex minus_zeta1;
821  int i;
822 
823  // b_alg = FLA_Obj_length( T );
824  int b_alg = m_T;
825 
826  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
827  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
828  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
829  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &v );
830  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
831  scomplex* buff_u = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
832  scomplex* buff_y = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
833  scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
834  scomplex* buff_v = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
835  scomplex* buff_w = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
836  int inc_u = 1;
837  int inc_y = 1;
838  int inc_z = 1;
839  int inc_v = 1;
840  int inc_w = 1;
841 
842  // Initialize some variables (only to prevent compiler warnings).
843  first_elem = *buff_0;
844  minus_inv_tau11 = *buff_0;
845 
846  for ( i = 0; i < b_alg; ++i )
847  {
848  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
849  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
850  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
851  scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
852  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
853  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
854 
855  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
856  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
857 
858  scomplex* upsilon1 = buff_u + (i )*inc_u;
859  scomplex* u2 = buff_u + (i+1)*inc_u;
860 
861  scomplex* y0 = buff_y + (0 )*inc_y;
862  scomplex* psi1 = buff_y + (i )*inc_y;
863  scomplex* y2 = buff_y + (i+1)*inc_y;
864 
865  scomplex* zeta1 = buff_z + (i )*inc_z;
866  scomplex* z2 = buff_z + (i+1)*inc_z;
867 
868  scomplex* v2 = buff_v + (i+1)*inc_v;
869 
870  scomplex* w2 = buff_w + (i+1)*inc_w;
871 
872  scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
873  scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
874 
875  int m_ahead = m_A - i - 1;
876  int n_ahead = m_A - i - 1;
877  int m_behind = i;
878  int n_behind = i;
879 
880  /*------------------------------------------------------------*/
881 
882  if ( m_behind > 0 )
883  {
884  // FLA_Copy( upsilon1, minus_upsilon1 );
885  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon1 );
886  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, minus_conj_upsilon1 );
887  bl1_cmult3( buff_m1, upsilon1, &minus_upsilon1 );
888  bl1_ccopyconj( &minus_upsilon1, &minus_conj_upsilon1 );
889 
890  // FLA_Copy( psi1, minus_psi1 );
891  // FLA_Scal( FLA_MINUS_ONE, minus_psi1 );
892  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_psi1, minus_conj_psi1 );
893  bl1_cmult3( buff_m1, psi1, &minus_psi1 );
894  bl1_ccopyconj( &minus_psi1, &minus_conj_psi1 );
895 
896  // FLA_Copy( zeta1, minus_zeta1 );
897  // FLA_Scal( FLA_MINUS_ONE, minus_zeta1 );
898  bl1_cmult3( buff_m1, zeta1, &minus_zeta1 );
899 
900  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, psi1, alpha11 );
901  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta1, upsilon1, alpha11 );
903  1,
904  &minus_upsilon1,
905  psi1, 1,
906  alpha11, 1 );
908  1,
909  &minus_zeta1,
910  upsilon1, 1,
911  alpha11, 1 );
912 
913  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon1, y2, a12t );
914  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta1, u2, a12t );
916  m_ahead,
917  &minus_upsilon1,
918  y2, inc_y,
919  a12t, cs_A );
921  m_ahead,
922  &minus_zeta1,
923  u2, inc_u,
924  a12t, cs_A );
925 
926  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi1, u2, a21 );
927  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon1, z2, a21 );
929  m_ahead,
930  &minus_conj_psi1,
931  u2, inc_u,
932  a21, rs_A );
934  m_ahead,
935  &minus_conj_upsilon1,
936  z2, inc_z,
937  a21, rs_A );
938  }
939 
940  if ( m_ahead > 0 )
941  {
942  // FLA_Househ2_UT( FLA_LEFT,
943  // a21_t,
944  // a21_b, tau11 );
945  FLA_Househ2_UT_l_opc( m_ahead - 1,
946  a21_t,
947  a21_b, rs_A,
948  tau11 );
949 
950  // FLA_Set( FLA_ONE, inv_tau11 );
951  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
952  // FLA_Copy( inv_tau11, minus_inv_tau11 );
953  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
954  bl1_cdiv3( buff_1, tau11, &inv_tau11 );
955  bl1_cneg2( &inv_tau11, &minus_inv_tau11 );
956 
957  // FLA_Copy( a21_t, first_elem );
958  // FLA_Set( FLA_ONE, a21_t );
959  first_elem = *a21_t;
960  *a21_t = *buff_1;
961  }
962 
963  if ( m_behind > 0 )
964  {
965  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
966  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
969  m_ahead,
970  n_ahead,
971  buff_m1,
972  u2, inc_u,
973  y2, inc_y,
974  A22, rs_A, cs_A );
977  m_ahead,
978  n_ahead,
979  buff_m1,
980  z2, inc_z,
981  u2, inc_u,
982  A22, rs_A, cs_A );
983  }
984 
985  if ( m_ahead > 0 )
986  {
987  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
990  m_ahead,
991  n_ahead,
992  buff_1,
993  A22, rs_A, cs_A,
994  a21, rs_A,
995  buff_0,
996  v2, inc_v );
997 
998  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
1001  m_ahead,
1002  n_ahead,
1003  buff_1,
1004  A22, rs_A, cs_A,
1005  a21, rs_A,
1006  buff_0,
1007  w2, inc_w );
1008 
1009  // FLA_Copy( a21, u2 );
1010  // FLA_Copy( v2, y2 );
1011  // FLA_Copy( w2, z2 );
1013  m_ahead,
1014  a21, rs_A,
1015  u2, inc_u );
1017  m_ahead,
1018  v2, inc_v,
1019  y2, inc_y );
1021  m_ahead,
1022  w2, inc_w,
1023  z2, inc_z );
1024 
1025  // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
1026  // FLA_Inv_scal( FLA_TWO, beta );
1027  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
1029  m_ahead,
1030  a21, rs_A,
1031  z2, inc_z,
1032  &beta );
1033  bl1_cinvscals( buff_2, &beta );
1034  bl1_ccopyconj( &beta, &conj_beta );
1035 
1036  // FLA_Scal( minus_inv_tau11, conj_beta );
1037  // FLA_Axpy( conj_beta, a21, y2 );
1038  // FLA_Scal( inv_tau11, y2 );
1039  bl1_cscals( &minus_inv_tau11, &conj_beta );
1041  m_ahead,
1042  &conj_beta,
1043  a21, rs_A,
1044  y2, inc_y );
1046  m_ahead,
1047  &inv_tau11,
1048  y2, inc_y );
1049 
1050  // FLA_Scal( minus_inv_tau11, beta );
1051  // FLA_Axpy( beta, a21, z2 );
1052  // FLA_Scal( inv_tau11, z2 );
1053  bl1_cscals( &minus_inv_tau11, &beta );
1055  m_ahead,
1056  &beta,
1057  a21, rs_A,
1058  z2, inc_z );
1060  m_ahead,
1061  &inv_tau11,
1062  z2, inc_z );
1063 
1064  // FLA_Dot( a12t, a21, dot_product );
1065  // FLA_Scal( minus_inv_tau11, dot_product );
1066  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
1068  m_ahead,
1069  a12t, cs_A,
1070  a21, rs_A,
1071  &dot_product );
1072  bl1_cscals( &minus_inv_tau11, &dot_product );
1074  m_ahead,
1075  &dot_product,
1076  a21, rs_A,
1077  a12t, cs_A );
1078 
1079  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
1080  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
1083  m_behind,
1084  n_ahead,
1085  buff_1,
1086  A02, rs_A, cs_A,
1087  a21, rs_A,
1088  buff_0,
1089  y0, inc_y );
1092  m_behind,
1093  n_ahead,
1094  &minus_inv_tau11,
1095  y0, inc_y,
1096  a21, rs_A,
1097  A02, rs_A, cs_A );
1098 
1099  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
1102  m_ahead,
1103  n_behind,
1104  buff_1,
1105  A20, rs_A, cs_A,
1106  a21, rs_A,
1107  buff_0,
1108  t01, rs_T );
1109 
1110  // FLA_Copy( first_elem, a21_t );
1111  *a21_t = first_elem;
1112  }
1113 
1114  if ( m_behind + 1 == b_alg && m_ahead > 0 )
1115  {
1116  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
1117  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
1120  m_ahead,
1121  n_ahead,
1122  buff_m1,
1123  u2, inc_u,
1124  y2, inc_y,
1125  A22, rs_A, cs_A );
1128  m_ahead,
1129  n_ahead,
1130  buff_m1,
1131  z2, inc_z,
1132  u2, inc_u,
1133  A22, rs_A, cs_A );
1134  }
1135 
1136  /*------------------------------------------------------------*/
1137 
1138  }
1139 
1140  // FLA_Obj_free( &u );
1141  // FLA_Obj_free( &y );
1142  // FLA_Obj_free( &z );
1143  // FLA_Obj_free( &v );
1144  // FLA_Obj_free( &w );
1145  FLA_free( buff_u );
1146  FLA_free( buff_y );
1147  FLA_free( buff_z );
1148  FLA_free( buff_v );
1149  FLA_free( buff_w );
1150 
1151  return FLA_SUCCESS;
1152 }
FLA_Obj FLA_TWO
Definition: FLA_Init.c:17
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition: FLA_Househ2_UT.c:390
double *restrict zeta1
Definition: bl1_axmyv2.c:142
double *restrict psi1
Definition: bl1_axmyv2.c:139
int i
Definition: bl1_axmyv2.c:145
upsilon1
Definition: bl1_axpyv2bdotaxpy.c:225
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpyv.c:29
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_copyv.c:49
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition: bl1_dot.c:39
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_gemv.c:125
void bl1_cger(conj1_t conjx, conj1_t conjy, int m, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *a, int a_rs, int a_cs)
Definition: bl1_ger.c:111
void bl1_cscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition: bl1_scalv.c:46
@ BLIS1_NO_TRANSPOSE
Definition: blis_type_defs.h:54
@ BLIS1_CONJ_TRANSPOSE
Definition: blis_type_defs.h:57
@ BLIS1_CONJUGATE
Definition: blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition: blis_type_defs.h:81
Definition: blis_type_defs.h:133

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cger(), bl1_cscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, i, psi1, upsilon1, and zeta1.

Referenced by FLA_Hess_UT_step_opt_var3().

◆ FLA_Hess_UT_step_opd_var3()

FLA_Error FLA_Hess_UT_step_opd_var3 ( int  m_A,
int  m_T,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_T,
int  rs_T,
int  cs_T 
)
454 {
455  double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
456  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
457  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
458  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
459 
460  double first_elem;
461  double dot_product;
462  double beta, conj_beta;
463  double inv_tau11;
464  double minus_inv_tau11;
465  double minus_upsilon1, minus_conj_upsilon1;
466  double minus_psi1, minus_conj_psi1;
467  double minus_zeta1;
468  int i;
469 
470  // b_alg = FLA_Obj_length( T );
471  int b_alg = m_T;
472 
473  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
474  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
475  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
476  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &v );
477  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
478  double* buff_u = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
479  double* buff_y = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
480  double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
481  double* buff_v = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
482  double* buff_w = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
483  int inc_u = 1;
484  int inc_y = 1;
485  int inc_z = 1;
486  int inc_v = 1;
487  int inc_w = 1;
488 
489  // Initialize some variables (only to prevent compiler warnings).
490  first_elem = *buff_0;
491  minus_inv_tau11 = *buff_0;
492 
493  for ( i = 0; i < b_alg; ++i )
494  {
495  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
496  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
497  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
498  double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
499  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
500  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
501 
502  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
503  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
504 
505  double* upsilon1 = buff_u + (i )*inc_u;
506  double* u2 = buff_u + (i+1)*inc_u;
507 
508  double* y0 = buff_y + (0 )*inc_y;
509  double* psi1 = buff_y + (i )*inc_y;
510  double* y2 = buff_y + (i+1)*inc_y;
511 
512  double* zeta1 = buff_z + (i )*inc_z;
513  double* z2 = buff_z + (i+1)*inc_z;
514 
515  double* v2 = buff_v + (i+1)*inc_v;
516 
517  double* w2 = buff_w + (i+1)*inc_w;
518 
519  double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
520  double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
521 
522  int m_ahead = m_A - i - 1;
523  int n_ahead = m_A - i - 1;
524  int m_behind = i;
525  int n_behind = i;
526 
527  /*------------------------------------------------------------*/
528 
529  if ( m_behind > 0 )
530  {
531  // FLA_Copy( upsilon1, minus_upsilon1 );
532  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon1 );
533  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, minus_conj_upsilon1 );
534  bl1_dmult3( buff_m1, upsilon1, &minus_upsilon1 );
535  bl1_dcopyconj( &minus_upsilon1, &minus_conj_upsilon1 );
536 
537  // FLA_Copy( psi1, minus_psi1 );
538  // FLA_Scal( FLA_MINUS_ONE, minus_psi1 );
539  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_psi1, minus_conj_psi1 );
540  bl1_dmult3( buff_m1, psi1, &minus_psi1 );
541  bl1_dcopyconj( &minus_psi1, &minus_conj_psi1 );
542 
543  // FLA_Copy( zeta1, minus_zeta1 );
544  // FLA_Scal( FLA_MINUS_ONE, minus_zeta1 );
545  bl1_dmult3( buff_m1, zeta1, &minus_zeta1 );
546 
547  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, psi1, alpha11 );
548  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta1, upsilon1, alpha11 );
550  1,
551  &minus_upsilon1,
552  psi1, 1,
553  alpha11, 1 );
555  1,
556  &minus_zeta1,
557  upsilon1, 1,
558  alpha11, 1 );
559 
560  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon1, y2, a12t );
561  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta1, u2, a12t );
563  m_ahead,
564  &minus_upsilon1,
565  y2, inc_y,
566  a12t, cs_A );
568  m_ahead,
569  &minus_zeta1,
570  u2, inc_u,
571  a12t, cs_A );
572 
573  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi1, u2, a21 );
574  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon1, z2, a21 );
576  m_ahead,
577  &minus_conj_psi1,
578  u2, inc_u,
579  a21, rs_A );
581  m_ahead,
582  &minus_conj_upsilon1,
583  z2, inc_z,
584  a21, rs_A );
585  }
586 
587  if ( m_ahead > 0 )
588  {
589  // FLA_Househ2_UT( FLA_LEFT,
590  // a21_t,
591  // a21_b, tau11 );
592  FLA_Househ2_UT_l_opd( m_ahead - 1,
593  a21_t,
594  a21_b, rs_A,
595  tau11 );
596 
597  // FLA_Set( FLA_ONE, inv_tau11 );
598  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
599  // FLA_Copy( inv_tau11, minus_inv_tau11 );
600  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
601  bl1_ddiv3( buff_1, tau11, &inv_tau11 );
602  bl1_dneg2( &inv_tau11, &minus_inv_tau11 );
603 
604  // FLA_Copy( a21_t, first_elem );
605  // FLA_Set( FLA_ONE, a21_t );
606  first_elem = *a21_t;
607  *a21_t = *buff_1;
608  }
609 
610  if ( m_behind > 0 )
611  {
612  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
613  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
616  m_ahead,
617  n_ahead,
618  buff_m1,
619  u2, inc_u,
620  y2, inc_y,
621  A22, rs_A, cs_A );
624  m_ahead,
625  n_ahead,
626  buff_m1,
627  z2, inc_z,
628  u2, inc_u,
629  A22, rs_A, cs_A );
630  }
631 
632  if ( m_ahead > 0 )
633  {
634  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
637  m_ahead,
638  n_ahead,
639  buff_1,
640  A22, rs_A, cs_A,
641  a21, rs_A,
642  buff_0,
643  v2, inc_v );
644 
645  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
648  m_ahead,
649  n_ahead,
650  buff_1,
651  A22, rs_A, cs_A,
652  a21, rs_A,
653  buff_0,
654  w2, inc_w );
655 
656  // FLA_Copy( a21, u2 );
657  // FLA_Copy( v2, y2 );
658  // FLA_Copy( w2, z2 );
660  m_ahead,
661  a21, rs_A,
662  u2, inc_u );
664  m_ahead,
665  v2, inc_v,
666  y2, inc_y );
668  m_ahead,
669  w2, inc_w,
670  z2, inc_z );
671 
672  // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
673  // FLA_Inv_scal( FLA_TWO, beta );
674  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
676  m_ahead,
677  a21, rs_A,
678  z2, inc_z,
679  &beta );
680  bl1_dinvscals( buff_2, &beta );
681  bl1_dcopyconj( &beta, &conj_beta );
682 
683  // FLA_Scal( minus_inv_tau11, conj_beta );
684  // FLA_Axpy( conj_beta, a21, y2 );
685  // FLA_Scal( inv_tau11, y2 );
686  bl1_dscals( &minus_inv_tau11, &conj_beta );
688  m_ahead,
689  &conj_beta,
690  a21, rs_A,
691  y2, inc_y );
693  m_ahead,
694  &inv_tau11,
695  y2, inc_y );
696 
697  // FLA_Scal( minus_inv_tau11, beta );
698  // FLA_Axpy( beta, a21, z2 );
699  // FLA_Scal( inv_tau11, z2 );
700  bl1_dscals( &minus_inv_tau11, &beta );
702  m_ahead,
703  &beta,
704  a21, rs_A,
705  z2, inc_z );
707  m_ahead,
708  &inv_tau11,
709  z2, inc_z );
710 
711  // FLA_Dot( a12t, a21, dot_product );
712  // FLA_Scal( minus_inv_tau11, dot_product );
713  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
715  m_ahead,
716  a12t, cs_A,
717  a21, rs_A,
718  &dot_product );
719  bl1_dscals( &minus_inv_tau11, &dot_product );
721  m_ahead,
722  &dot_product,
723  a21, rs_A,
724  a12t, cs_A );
725 
726  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
727  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
730  m_behind,
731  n_ahead,
732  buff_1,
733  A02, rs_A, cs_A,
734  a21, rs_A,
735  buff_0,
736  y0, inc_y );
739  m_behind,
740  n_ahead,
741  &minus_inv_tau11,
742  y0, inc_y,
743  a21, rs_A,
744  A02, rs_A, cs_A );
745 
746  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
749  m_ahead,
750  n_behind,
751  buff_1,
752  A20, rs_A, cs_A,
753  a21, rs_A,
754  buff_0,
755  t01, rs_T );
756 
757  // FLA_Copy( first_elem, a21_t );
758  *a21_t = first_elem;
759  }
760 
761  if ( m_behind + 1 == b_alg && m_ahead > 0 )
762  {
763  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
764  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
767  m_ahead,
768  n_ahead,
769  buff_m1,
770  u2, inc_u,
771  y2, inc_y,
772  A22, rs_A, cs_A );
775  m_ahead,
776  n_ahead,
777  buff_m1,
778  z2, inc_z,
779  u2, inc_u,
780  A22, rs_A, cs_A );
781  }
782 
783  /*------------------------------------------------------------*/
784 
785  }
786 
787  // FLA_Obj_free( &u );
788  // FLA_Obj_free( &y );
789  // FLA_Obj_free( &z );
790  // FLA_Obj_free( &v );
791  // FLA_Obj_free( &w );
792  FLA_free( buff_u );
793  FLA_free( buff_y );
794  FLA_free( buff_z );
795  FLA_free( buff_v );
796  FLA_free( buff_w );
797 
798  return FLA_SUCCESS;
799 }
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition: FLA_Househ2_UT.c:274
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition: bl1_axpyv.c:21
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition: bl1_copyv.c:42
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition: bl1_dot.c:26
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_gemv.c:69
void bl1_dger(conj1_t conjx, conj1_t conjy, int m, int n, double *alpha, double *x, int incx, double *y, int incy, double *a, int a_rs, int a_cs)
Definition: bl1_ger.c:62
void bl1_dscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition: bl1_scalv.c:24

References bl1_daxpyv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dger(), bl1_dscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, i, psi1, upsilon1, and zeta1.

Referenced by FLA_Hess_UT_step_opt_var3().

◆ FLA_Hess_UT_step_ops_var3()

FLA_Error FLA_Hess_UT_step_ops_var3 ( int  m_A,
int  m_T,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_T,
int  rs_T,
int  cs_T 
)
101 {
102  float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
103  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
104  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
105  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
106 
107  float first_elem;
108  float dot_product;
109  float beta, conj_beta;
110  float inv_tau11;
111  float minus_inv_tau11;
112  float minus_upsilon1, minus_conj_upsilon1;
113  float minus_psi1, minus_conj_psi1;
114  float minus_zeta1;
115  int i;
116 
117  // b_alg = FLA_Obj_length( T );
118  int b_alg = m_T;
119 
120  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
121  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
122  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
123  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &v );
124  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
125  float* buff_u = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
126  float* buff_y = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
127  float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
128  float* buff_v = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
129  float* buff_w = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
130  int inc_u = 1;
131  int inc_y = 1;
132  int inc_z = 1;
133  int inc_v = 1;
134  int inc_w = 1;
135 
136  // Initialize some variables (only to prevent compiler warnings).
137  first_elem = *buff_0;
138  minus_inv_tau11 = *buff_0;
139 
140  for ( i = 0; i < b_alg; ++i )
141  {
142  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
143  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
144  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
145  float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
146  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
147  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
148 
149  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
150  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
151 
152  float* upsilon1 = buff_u + (i )*inc_u;
153  float* u2 = buff_u + (i+1)*inc_u;
154 
155  float* y0 = buff_y + (0 )*inc_y;
156  float* psi1 = buff_y + (i )*inc_y;
157  float* y2 = buff_y + (i+1)*inc_y;
158 
159  float* zeta1 = buff_z + (i )*inc_z;
160  float* z2 = buff_z + (i+1)*inc_z;
161 
162  float* v2 = buff_v + (i+1)*inc_v;
163 
164  float* w2 = buff_w + (i+1)*inc_w;
165 
166  float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
167  float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
168 
169  int m_ahead = m_A - i - 1;
170  int n_ahead = m_A - i - 1;
171  int m_behind = i;
172  int n_behind = i;
173 
174  /*------------------------------------------------------------*/
175 
176  if ( m_behind > 0 )
177  {
178  // FLA_Copy( upsilon1, minus_upsilon1 );
179  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon1 );
180  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, minus_conj_upsilon1 );
181  bl1_smult3( buff_m1, upsilon1, &minus_upsilon1 );
182  bl1_scopyconj( &minus_upsilon1, &minus_conj_upsilon1 );
183 
184  // FLA_Copy( psi1, minus_psi1 );
185  // FLA_Scal( FLA_MINUS_ONE, minus_psi1 );
186  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_psi1, minus_conj_psi1 );
187  bl1_smult3( buff_m1, psi1, &minus_psi1 );
188  bl1_scopyconj( &minus_psi1, &minus_conj_psi1 );
189 
190  // FLA_Copy( zeta1, minus_zeta1 );
191  // FLA_Scal( FLA_MINUS_ONE, minus_zeta1 );
192  bl1_smult3( buff_m1, zeta1, &minus_zeta1 );
193 
194  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, psi1, alpha11 );
195  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta1, upsilon1, alpha11 );
197  1,
198  &minus_upsilon1,
199  psi1, 1,
200  alpha11, 1 );
202  1,
203  &minus_zeta1,
204  upsilon1, 1,
205  alpha11, 1 );
206 
207  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon1, y2, a12t );
208  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta1, u2, a12t );
210  m_ahead,
211  &minus_upsilon1,
212  y2, inc_y,
213  a12t, cs_A );
215  m_ahead,
216  &minus_zeta1,
217  u2, inc_u,
218  a12t, cs_A );
219 
220  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi1, u2, a21 );
221  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon1, z2, a21 );
223  m_ahead,
224  &minus_conj_psi1,
225  u2, inc_u,
226  a21, rs_A );
228  m_ahead,
229  &minus_conj_upsilon1,
230  z2, inc_z,
231  a21, rs_A );
232  }
233 
234  if ( m_ahead > 0 )
235  {
236  // FLA_Househ2_UT( FLA_LEFT,
237  // a21_t,
238  // a21_b, tau11 );
239  FLA_Househ2_UT_l_ops( m_ahead - 1,
240  a21_t,
241  a21_b, rs_A,
242  tau11 );
243 
244  // FLA_Set( FLA_ONE, inv_tau11 );
245  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
246  // FLA_Copy( inv_tau11, minus_inv_tau11 );
247  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
248  bl1_sdiv3( buff_1, tau11, &inv_tau11 );
249  bl1_sneg2( &inv_tau11, &minus_inv_tau11 );
250 
251  // FLA_Copy( a21_t, first_elem );
252  // FLA_Set( FLA_ONE, a21_t );
253  first_elem = *a21_t;
254  *a21_t = *buff_1;
255  }
256 
257  if ( m_behind > 0 )
258  {
259  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
260  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
263  m_ahead,
264  n_ahead,
265  buff_m1,
266  u2, inc_u,
267  y2, inc_y,
268  A22, rs_A, cs_A );
271  m_ahead,
272  n_ahead,
273  buff_m1,
274  z2, inc_z,
275  u2, inc_u,
276  A22, rs_A, cs_A );
277  }
278 
279  if ( m_ahead > 0 )
280  {
281  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
284  m_ahead,
285  n_ahead,
286  buff_1,
287  A22, rs_A, cs_A,
288  a21, rs_A,
289  buff_0,
290  v2, inc_v );
291 
292  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
295  m_ahead,
296  n_ahead,
297  buff_1,
298  A22, rs_A, cs_A,
299  a21, rs_A,
300  buff_0,
301  w2, inc_w );
302 
303  // FLA_Copy( a21, u2 );
304  // FLA_Copy( v2, y2 );
305  // FLA_Copy( w2, z2 );
307  m_ahead,
308  a21, rs_A,
309  u2, inc_u );
311  m_ahead,
312  v2, inc_v,
313  y2, inc_y );
315  m_ahead,
316  w2, inc_w,
317  z2, inc_z );
318 
319  // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
320  // FLA_Inv_scal( FLA_TWO, beta );
321  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
323  m_ahead,
324  a21, rs_A,
325  z2, inc_z,
326  &beta );
327  bl1_sinvscals( buff_2, &beta );
328  bl1_scopyconj( &beta, &conj_beta );
329 
330  // FLA_Scal( minus_inv_tau11, conj_beta );
331  // FLA_Axpy( conj_beta, a21, y2 );
332  // FLA_Scal( inv_tau11, y2 );
333  bl1_sscals( &minus_inv_tau11, &conj_beta );
335  m_ahead,
336  &conj_beta,
337  a21, rs_A,
338  y2, inc_y );
340  m_ahead,
341  &inv_tau11,
342  y2, inc_y );
343 
344  // FLA_Scal( minus_inv_tau11, beta );
345  // FLA_Axpy( beta, a21, z2 );
346  // FLA_Scal( inv_tau11, z2 );
347  bl1_sscals( &minus_inv_tau11, &beta );
349  m_ahead,
350  &beta,
351  a21, rs_A,
352  z2, inc_z );
354  m_ahead,
355  &inv_tau11,
356  z2, inc_z );
357 
358  // FLA_Dot( a12t, a21, dot_product );
359  // FLA_Scal( minus_inv_tau11, dot_product );
360  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
362  m_ahead,
363  a12t, cs_A,
364  a21, rs_A,
365  &dot_product );
366  bl1_sscals( &minus_inv_tau11, &dot_product );
368  m_ahead,
369  &dot_product,
370  a21, rs_A,
371  a12t, cs_A );
372 
373  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
374  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
377  m_behind,
378  n_ahead,
379  buff_1,
380  A02, rs_A, cs_A,
381  a21, rs_A,
382  buff_0,
383  y0, inc_y );
386  m_behind,
387  n_ahead,
388  &minus_inv_tau11,
389  y0, inc_y,
390  a21, rs_A,
391  A02, rs_A, cs_A );
392 
393  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
396  m_ahead,
397  n_behind,
398  buff_1,
399  A20, rs_A, cs_A,
400  a21, rs_A,
401  buff_0,
402  t01, rs_T );
403 
404  // FLA_Copy( first_elem, a21_t );
405  *a21_t = first_elem;
406  }
407 
408  if ( m_behind + 1 == b_alg && m_ahead > 0 )
409  {
410  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
411  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
414  m_ahead,
415  n_ahead,
416  buff_m1,
417  u2, inc_u,
418  y2, inc_y,
419  A22, rs_A, cs_A );
422  m_ahead,
423  n_ahead,
424  buff_m1,
425  z2, inc_z,
426  u2, inc_u,
427  A22, rs_A, cs_A );
428  }
429 
430  /*------------------------------------------------------------*/
431 
432  }
433 
434  // FLA_Obj_free( &u );
435  // FLA_Obj_free( &y );
436  // FLA_Obj_free( &z );
437  // FLA_Obj_free( &v );
438  // FLA_Obj_free( &w );
439  FLA_free( buff_u );
440  FLA_free( buff_y );
441  FLA_free( buff_z );
442  FLA_free( buff_v );
443  FLA_free( buff_w );
444 
445  return FLA_SUCCESS;
446 }
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition: FLA_Househ2_UT.c:160
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition: bl1_axpyv.c:13
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition: bl1_copyv.c:35
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition: bl1_dot.c:13
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_gemv.c:13
void bl1_sger(conj1_t conjx, conj1_t conjy, int m, int n, float *alpha, float *x, int incx, float *y, int incy, float *a, int a_rs, int a_cs)
Definition: bl1_ger.c:13
void bl1_sscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition: bl1_scalv.c:13

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sger(), bl1_sscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, i, psi1, upsilon1, and zeta1.

Referenced by FLA_Hess_UT_step_opt_var3().

◆ FLA_Hess_UT_step_opt_var3()

FLA_Error FLA_Hess_UT_step_opt_var3 ( FLA_Obj  A,
FLA_Obj  T 
)
19 {
20  FLA_Datatype datatype;
21  int m_A, m_T;
22  int rs_A, cs_A;
23  int rs_T, cs_T;
24 
25  datatype = FLA_Obj_datatype( A );
26 
27  m_A = FLA_Obj_length( A );
28  m_T = FLA_Obj_length( T );
29 
30  rs_A = FLA_Obj_row_stride( A );
31  cs_A = FLA_Obj_col_stride( A );
32 
33  rs_T = FLA_Obj_row_stride( T );
34  cs_T = FLA_Obj_col_stride( T );
35 
36 
37  switch ( datatype )
38  {
39  case FLA_FLOAT:
40  {
41  float* buff_A = FLA_FLOAT_PTR( A );
42  float* buff_T = FLA_FLOAT_PTR( T );
43 
45  m_T,
46  buff_A, rs_A, cs_A,
47  buff_T, rs_T, cs_T );
48 
49  break;
50  }
51 
52  case FLA_DOUBLE:
53  {
54  double* buff_A = FLA_DOUBLE_PTR( A );
55  double* buff_T = FLA_DOUBLE_PTR( T );
56 
58  m_T,
59  buff_A, rs_A, cs_A,
60  buff_T, rs_T, cs_T );
61 
62  break;
63  }
64 
65  case FLA_COMPLEX:
66  {
67  scomplex* buff_A = FLA_COMPLEX_PTR( A );
68  scomplex* buff_T = FLA_COMPLEX_PTR( T );
69 
71  m_T,
72  buff_A, rs_A, cs_A,
73  buff_T, rs_T, cs_T );
74 
75  break;
76  }
77 
78  case FLA_DOUBLE_COMPLEX:
79  {
80  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
81  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
82 
84  m_T,
85  buff_A, rs_A, cs_A,
86  buff_T, rs_T, cs_T );
87 
88  break;
89  }
90  }
91 
92  return FLA_SUCCESS;
93 }
FLA_Error FLA_Hess_UT_step_opz_var3(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_opt_var3.c:1156
FLA_Error FLA_Hess_UT_step_opd_var3(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_opt_var3.c:450
FLA_Error FLA_Hess_UT_step_opc_var3(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_opt_var3.c:803
FLA_Error FLA_Hess_UT_step_ops_var3(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_opt_var3.c:97
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
int FLA_Datatype
Definition: FLA_type_defs.h:49
Definition: blis_type_defs.h:138

References FLA_Hess_UT_step_opc_var3(), FLA_Hess_UT_step_opd_var3(), FLA_Hess_UT_step_ops_var3(), FLA_Hess_UT_step_opz_var3(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), and FLA_Obj_row_stride().

Referenced by FLA_Hess_UT_blk_var3(), and FLA_Hess_UT_opt_var3().

◆ FLA_Hess_UT_step_opz_var3()

FLA_Error FLA_Hess_UT_step_opz_var3 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
1160 {
1161  dcomplex* buff_2 = FLA_DOUBLE_COMPLEX_PTR( FLA_TWO );
1162  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
1163  dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
1164  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
1165 
1166  dcomplex first_elem;
1167  dcomplex dot_product;
1168  dcomplex beta, conj_beta;
1169  dcomplex inv_tau11;
1170  dcomplex minus_inv_tau11;
1171  dcomplex minus_upsilon1, minus_conj_upsilon1;
1172  dcomplex minus_psi1, minus_conj_psi1;
1173  dcomplex minus_zeta1;
1174  int i;
1175 
1176  // b_alg = FLA_Obj_length( T );
1177  int b_alg = m_T;
1178 
1179  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
1180  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
1181  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
1182  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &v );
1183  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
1184  dcomplex* buff_u = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1185  dcomplex* buff_y = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1186  dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1187  dcomplex* buff_v = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1188  dcomplex* buff_w = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1189  int inc_u = 1;
1190  int inc_y = 1;
1191  int inc_z = 1;
1192  int inc_v = 1;
1193  int inc_w = 1;
1194 
1195  // Initialize some variables (only to prevent compiler warnings).
1196  first_elem = *buff_0;
1197  minus_inv_tau11 = *buff_0;
1198 
1199  for ( i = 0; i < b_alg; ++i )
1200  {
1201  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1202  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1203  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1204  dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1205  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1206  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1207 
1208  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1209  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1210 
1211  dcomplex* upsilon1 = buff_u + (i )*inc_u;
1212  dcomplex* u2 = buff_u + (i+1)*inc_u;
1213 
1214  dcomplex* y0 = buff_y + (0 )*inc_y;
1215  dcomplex* psi1 = buff_y + (i )*inc_y;
1216  dcomplex* y2 = buff_y + (i+1)*inc_y;
1217 
1218  dcomplex* zeta1 = buff_z + (i )*inc_z;
1219  dcomplex* z2 = buff_z + (i+1)*inc_z;
1220 
1221  dcomplex* v2 = buff_v + (i+1)*inc_v;
1222 
1223  dcomplex* w2 = buff_w + (i+1)*inc_w;
1224 
1225  dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
1226  dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
1227 
1228  int m_ahead = m_A - i - 1;
1229  int n_ahead = m_A - i - 1;
1230  int m_behind = i;
1231  int n_behind = i;
1232 
1233  /*------------------------------------------------------------*/
1234 
1235  if ( m_behind > 0 )
1236  {
1237  // FLA_Copy( upsilon1, minus_upsilon1 );
1238  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon1 );
1239  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, minus_conj_upsilon1 );
1240  bl1_zmult3( buff_m1, upsilon1, &minus_upsilon1 );
1241  bl1_zcopyconj( &minus_upsilon1, &minus_conj_upsilon1 );
1242 
1243  // FLA_Copy( psi1, minus_psi1 );
1244  // FLA_Scal( FLA_MINUS_ONE, minus_psi1 );
1245  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_psi1, minus_conj_psi1 );
1246  bl1_zmult3( buff_m1, psi1, &minus_psi1 );
1247  bl1_zcopyconj( &minus_psi1, &minus_conj_psi1 );
1248 
1249  // FLA_Copy( zeta1, minus_zeta1 );
1250  // FLA_Scal( FLA_MINUS_ONE, minus_zeta1 );
1251  bl1_zmult3( buff_m1, zeta1, &minus_zeta1 );
1252 
1253  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, psi1, alpha11 );
1254  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta1, upsilon1, alpha11 );
1256  1,
1257  &minus_upsilon1,
1258  psi1, 1,
1259  alpha11, 1 );
1261  1,
1262  &minus_zeta1,
1263  upsilon1, 1,
1264  alpha11, 1 );
1265 
1266  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon1, y2, a12t );
1267  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta1, u2, a12t );
1269  m_ahead,
1270  &minus_upsilon1,
1271  y2, inc_y,
1272  a12t, cs_A );
1274  m_ahead,
1275  &minus_zeta1,
1276  u2, inc_u,
1277  a12t, cs_A );
1278 
1279  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi1, u2, a21 );
1280  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon1, z2, a21 );
1282  m_ahead,
1283  &minus_conj_psi1,
1284  u2, inc_u,
1285  a21, rs_A );
1287  m_ahead,
1288  &minus_conj_upsilon1,
1289  z2, inc_z,
1290  a21, rs_A );
1291  }
1292 
1293  if ( m_ahead > 0 )
1294  {
1295  // FLA_Househ2_UT( FLA_LEFT,
1296  // a21_t,
1297  // a21_b, tau11 );
1298  FLA_Househ2_UT_l_opz( m_ahead - 1,
1299  a21_t,
1300  a21_b, rs_A,
1301  tau11 );
1302 
1303  // FLA_Set( FLA_ONE, inv_tau11 );
1304  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
1305  // FLA_Copy( inv_tau11, minus_inv_tau11 );
1306  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
1307  bl1_zdiv3( buff_1, tau11, &inv_tau11 );
1308  bl1_zneg2( &inv_tau11, &minus_inv_tau11 );
1309 
1310  // FLA_Copy( a21_t, first_elem );
1311  // FLA_Set( FLA_ONE, a21_t );
1312  first_elem = *a21_t;
1313  *a21_t = *buff_1;
1314  }
1315 
1316  if ( m_behind > 0 )
1317  {
1318  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
1319  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
1322  m_ahead,
1323  n_ahead,
1324  buff_m1,
1325  u2, inc_u,
1326  y2, inc_y,
1327  A22, rs_A, cs_A );
1330  m_ahead,
1331  n_ahead,
1332  buff_m1,
1333  z2, inc_z,
1334  u2, inc_u,
1335  A22, rs_A, cs_A );
1336  }
1337 
1338  if ( m_ahead > 0 )
1339  {
1340  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
1343  m_ahead,
1344  n_ahead,
1345  buff_1,
1346  A22, rs_A, cs_A,
1347  a21, rs_A,
1348  buff_0,
1349  v2, inc_v );
1350 
1351  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
1354  m_ahead,
1355  n_ahead,
1356  buff_1,
1357  A22, rs_A, cs_A,
1358  a21, rs_A,
1359  buff_0,
1360  w2, inc_w );
1361 
1362  // FLA_Copy( a21, u2 );
1363  // FLA_Copy( v2, y2 );
1364  // FLA_Copy( w2, z2 );
1366  m_ahead,
1367  a21, rs_A,
1368  u2, inc_u );
1370  m_ahead,
1371  v2, inc_v,
1372  y2, inc_y );
1374  m_ahead,
1375  w2, inc_w,
1376  z2, inc_z );
1377 
1378  // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
1379  // FLA_Inv_scal( FLA_TWO, beta );
1380  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
1382  m_ahead,
1383  a21, rs_A,
1384  z2, inc_z,
1385  &beta );
1386  bl1_zinvscals( buff_2, &beta );
1387  bl1_zcopyconj( &beta, &conj_beta );
1388 
1389  // FLA_Scal( minus_inv_tau11, conj_beta );
1390  // FLA_Axpy( conj_beta, a21, y2 );
1391  // FLA_Scal( inv_tau11, y2 );
1392  bl1_zscals( &minus_inv_tau11, &conj_beta );
1394  m_ahead,
1395  &conj_beta,
1396  a21, rs_A,
1397  y2, inc_y );
1399  m_ahead,
1400  &inv_tau11,
1401  y2, inc_y );
1402 
1403  // FLA_Scal( minus_inv_tau11, beta );
1404  // FLA_Axpy( beta, a21, z2 );
1405  // FLA_Scal( inv_tau11, z2 );
1406  bl1_zscals( &minus_inv_tau11, &beta );
1408  m_ahead,
1409  &beta,
1410  a21, rs_A,
1411  z2, inc_z );
1413  m_ahead,
1414  &inv_tau11,
1415  z2, inc_z );
1416 
1417  // FLA_Dot( a12t, a21, dot_product );
1418  // FLA_Scal( minus_inv_tau11, dot_product );
1419  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
1421  m_ahead,
1422  a12t, cs_A,
1423  a21, rs_A,
1424  &dot_product );
1425  bl1_zscals( &minus_inv_tau11, &dot_product );
1427  m_ahead,
1428  &dot_product,
1429  a21, rs_A,
1430  a12t, cs_A );
1431 
1432  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
1433  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
1436  m_behind,
1437  n_ahead,
1438  buff_1,
1439  A02, rs_A, cs_A,
1440  a21, rs_A,
1441  buff_0,
1442  y0, inc_y );
1445  m_behind,
1446  n_ahead,
1447  &minus_inv_tau11,
1448  y0, inc_y,
1449  a21, rs_A,
1450  A02, rs_A, cs_A );
1451 
1452  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
1455  m_ahead,
1456  n_behind,
1457  buff_1,
1458  A20, rs_A, cs_A,
1459  a21, rs_A,
1460  buff_0,
1461  t01, rs_T );
1462 
1463  // FLA_Copy( first_elem, a21_t );
1464  *a21_t = first_elem;
1465  }
1466 
1467  if ( m_behind + 1 == b_alg && m_ahead > 0 )
1468  {
1469  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
1470  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
1473  m_ahead,
1474  n_ahead,
1475  buff_m1,
1476  u2, inc_u,
1477  y2, inc_y,
1478  A22, rs_A, cs_A );
1481  m_ahead,
1482  n_ahead,
1483  buff_m1,
1484  z2, inc_z,
1485  u2, inc_u,
1486  A22, rs_A, cs_A );
1487  }
1488 
1489  /*------------------------------------------------------------*/
1490 
1491  }
1492 
1493  // FLA_Obj_free( &u );
1494  // FLA_Obj_free( &y );
1495  // FLA_Obj_free( &z );
1496  // FLA_Obj_free( &v );
1497  // FLA_Obj_free( &w );
1498  FLA_free( buff_u );
1499  FLA_free( buff_y );
1500  FLA_free( buff_z );
1501  FLA_free( buff_v );
1502  FLA_free( buff_w );
1503 
1504  return FLA_SUCCESS;
1505 }
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition: FLA_Househ2_UT.c:521
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_axpyv.c:60
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_copyv.c:63
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition: bl1_dot.c:65
bl1_zscals(beta, rho_yz)
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_gemv.c:255
void bl1_zger(conj1_t conjx, conj1_t conjy, int m, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_ger.c:194
void bl1_zscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition: bl1_scalv.c:72

References bl1_zaxpyv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zger(), bl1_zscals(), bl1_zscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, i, psi1, upsilon1, and zeta1.

Referenced by FLA_Hess_UT_step_opt_var3().