libflame  revision_anchor
Functions
FLA_Hess_UT_fus_var4.c File Reference

(r)

Functions

FLA_Error FLA_Hess_UT_ofu_var4 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ofu_var4 (FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ofs_var4 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_ofd_var4 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_ofc_var4 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_ofz_var4 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T)
 

Function Documentation

◆ FLA_Hess_UT_ofu_var4()

FLA_Error FLA_Hess_UT_ofu_var4 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  FLA_Error r_val;
16  FLA_Obj Y, Z;
17 
18  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &Y );
19  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &Z );
20 
21  r_val = FLA_Hess_UT_step_ofu_var4( A, Y, Z, T );
22 
23  FLA_Obj_free( &Y );
24  FLA_Obj_free( &Z );
25 
26  return r_val;
27 }
FLA_Error FLA_Hess_UT_step_ofu_var4(FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T)
Definition: FLA_Hess_UT_fus_var4.c:29
FLA_Error FLA_Obj_create_conf_to(FLA_Trans trans, FLA_Obj old, FLA_Obj *obj)
Definition: FLA_Obj.c:286
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition: FLA_Obj.c:588
int FLA_Error
Definition: FLA_type_defs.h:47
Definition: FLA_type_defs.h:159

References FLA_Hess_UT_step_ofu_var4(), FLA_Obj_create_conf_to(), and FLA_Obj_free().

◆ FLA_Hess_UT_step_ofc_var4()

FLA_Error FLA_Hess_UT_step_ofc_var4 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_Y,
int  rs_Y,
int  cs_Y,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
688 {
689  scomplex* buff_2 = FLA_COMPLEX_PTR( FLA_TWO );
690  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
691  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
692  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
693 
694  scomplex first_elem, last_elem;
695  scomplex dot_product;
696  scomplex beta, conj_beta;
697  scomplex inv_tau11;
698  scomplex minus_inv_tau11;
699  int i;
700 
701  // b_alg = FLA_Obj_length( T );
702  int b_alg = m_T;
703 
704  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
705  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &e );
706  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
707  scomplex* buff_e = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
708  int inc_e = 1;
709 
710  // FLA_Set( FLA_ZERO, Y );
711  // FLA_Set( FLA_ZERO, Z );
712  bl1_csetm( m_A,
713  b_alg,
714  buff_0,
715  buff_Y, rs_Y, cs_Y );
716  bl1_csetm( m_A,
717  b_alg,
718  buff_0,
719  buff_Z, rs_Z, cs_Z );
720 
721  for ( i = 0; i < b_alg; ++i )
722  {
723  scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
724  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
725  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
726  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
727  scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
728  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
729  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
730 
731  scomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
732  scomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
733  scomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
734 
735  scomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
736  scomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
737  scomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
738 
739  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
740  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
741 
742  scomplex* e0 = buff_e + (0 )*inc_e;
743 
744  scomplex* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
745 
746  scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
747  scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
748 
749  scomplex* ABL = a10t;
750  scomplex* ZBL = z10t;
751 
752  scomplex* a2 = alpha11;
753 
754  int m_ahead = m_A - i - 1;
755  int n_ahead = m_A - i - 1;
756  int m_behind = i;
757  int n_behind = i;
758 
759  /*------------------------------------------------------------*/
760 
761  if ( m_behind > 0 )
762  {
763  // FLA_Copy( a10t_r, last_elem );
764  // FLA_Set( FLA_ONE, a10t_r );
765  last_elem = *a10t_r;
766  *a10t_r = *buff_1;
767  }
768 
769  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
770  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
773  m_ahead + 1,
774  n_behind,
775  buff_m1,
776  ABL, rs_A, cs_A,
777  y10t, cs_Y,
778  buff_1,
779  a2, rs_A );
782  m_ahead + 1,
783  n_behind,
784  buff_m1,
785  ZBL, rs_Z, cs_Z,
786  a10t, cs_A,
787  buff_1,
788  a2, rs_A );
789 
790  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
791  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, A20, z10t, FLA_ONE, a12t );
794  m_ahead,
795  n_behind,
796  buff_m1,
797  Y20, rs_Y, cs_Y,
798  a10t, cs_A,
799  buff_1,
800  a12t, cs_A );
803  m_ahead,
804  n_behind,
805  buff_m1,
806  A20, rs_A, cs_A,
807  z10t, cs_Z,
808  buff_1,
809  a12t, cs_A );
810 
811  if ( m_behind > 0 )
812  {
813  // FLA_Copy( last_elem, a10t_r );
814  *a10t_r = last_elem;
815  }
816 
817  if ( m_ahead > 0 )
818  {
819  // FLA_Househ2_UT( FLA_LEFT,
820  // a21_t,
821  // a21_b, tau11 );
822  FLA_Househ2_UT_l_opc( m_ahead - 1,
823  a21_t,
824  a21_b, rs_A,
825  tau11 );
826 
827  // FLA_Set( FLA_ONE, inv_tau11 );
828  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
829  // FLA_Copy( inv_tau11, minus_inv_tau11 );
830  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
831  bl1_cdiv3( buff_1, tau11, &inv_tau11 );
832  bl1_cneg2( &inv_tau11, &minus_inv_tau11 );
833 
834  // FLA_Copy( a21_t, first_elem );
835  // FLA_Set( FLA_ONE, a21_t );
836  first_elem = *a21_t;
837  *a21_t = *buff_1;
838 
839  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y21 );
840  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z21 );
841  FLA_Fused_Ahx_Ax_opc_var1( m_ahead,
842  n_ahead,
843  A22, rs_A, cs_A,
844  a21, rs_A,
845  y21, rs_Y,
846  z21, rs_Z );
847 
848  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d0 );
849  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, a21, FLA_ZERO, e0 );
850  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f0 );
851  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
852  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, y21 );
853  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, e0, FLA_ONE, z21 );
854  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d0, FLA_ONE, z21 );
855  // FLA_Copy( d0, t01 );
857  n_behind,
858  buff_m1,
859  A20, rs_A, cs_A,
860  Y20, rs_Y, cs_Y,
861  Z20, rs_Z, cs_Z,
862  t01, rs_T,
863  a21, rs_A,
864  y21, rs_Y,
865  z21, rs_Z );
866 
867  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
868  // FLA_Inv_scal( FLA_TWO, beta );
869  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
871  m_ahead,
872  a21, rs_A,
873  z21, rs_Z,
874  &beta );
875  bl1_cinvscals( buff_2, &beta );
876  bl1_ccopyconj( &beta, &conj_beta );
877 
878  // FLA_Scal( minus_inv_tau11, conj_beta );
879  // FLA_Axpy( conj_beta, a21, y21 );
880  // FLA_Scal( inv_tau11, y21 );
881  bl1_cscals( &minus_inv_tau11, &conj_beta );
883  m_ahead,
884  &conj_beta,
885  a21, rs_A,
886  y21, rs_Y );
888  m_ahead,
889  &inv_tau11,
890  y21, rs_Y );
891 
892  // FLA_Scal( minus_inv_tau11, beta );
893  // FLA_Axpy( beta, a21, z21 );
894  // FLA_Scal( inv_tau11, z21 );
895  bl1_cscals( &minus_inv_tau11, &beta );
897  m_ahead,
898  &beta,
899  a21, rs_A,
900  z21, rs_Z );
902  m_ahead,
903  &inv_tau11,
904  z21, rs_Z );
905 
906  // FLA_Dot( a12t, a21, dot_product );
907  // FLA_Scal( minus_inv_tau11, dot_product );
908  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
910  m_ahead,
911  a12t, cs_A,
912  a21, rs_A,
913  &dot_product );
914  bl1_cscals( &minus_inv_tau11, &dot_product );
916  m_ahead,
917  &dot_product,
918  a21, rs_A,
919  a12t, cs_A );
920 
921  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, e0 );
922  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, e0, a21, A02 );
925  m_behind,
926  n_ahead,
927  buff_1,
928  A02, rs_A, cs_A,
929  a21, rs_A,
930  buff_0,
931  e0, inc_e );
934  m_behind,
935  n_ahead,
936  &minus_inv_tau11,
937  e0, inc_e,
938  a21, rs_A,
939  A02, rs_A, cs_A );
940 
941  // FLA_Copy( first_elem, a21_t );
942  *a21_t = first_elem;
943  }
944 
945  /*------------------------------------------------------------*/
946 
947  }
948 
949  // FLA_Obj_free( &e );
950  FLA_free( buff_e );
951 
952  return FLA_SUCCESS;
953 }
FLA_Error FLA_Fused_Ahx_Ax_opc_var1(int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_x, int inc_x, scomplex *buff_v, int inc_v, scomplex *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Ax_opt_var1.c:256
FLA_Error FLA_Fused_Uhu_Yhu_Zhu_opc_var1(int m_U, int n_U, scomplex *buff_delta, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_t, int inc_t, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z)
Definition: FLA_Fused_Uhu_Yhu_Zhu_opt_var1.c:398
FLA_Obj FLA_TWO
Definition: FLA_Init.c:17
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition: FLA_Househ2_UT.c:390
int i
Definition: bl1_axmyv2.c:145
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpyv.c:29
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition: bl1_dot.c:39
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_gemv.c:125
void bl1_cger(conj1_t conjx, conj1_t conjy, int m, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *a, int a_rs, int a_cs)
Definition: bl1_ger.c:111
void bl1_cscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition: bl1_scalv.c:46
void bl1_csetm(int m, int n, scomplex *sigma, scomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:61
@ BLIS1_NO_TRANSPOSE
Definition: blis_type_defs.h:54
@ BLIS1_CONJ_NO_TRANSPOSE
Definition: blis_type_defs.h:56
@ BLIS1_CONJUGATE
Definition: blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition: blis_type_defs.h:81
Definition: blis_type_defs.h:133

References bl1_caxpyv(), bl1_cdot(), bl1_cgemv(), bl1_cger(), bl1_cscalv(), bl1_csetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_opc_var1(), FLA_Fused_Uhu_Yhu_Zhu_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_ofu_var4().

◆ FLA_Hess_UT_step_ofd_var4()

FLA_Error FLA_Hess_UT_step_ofd_var4 ( int  m_A,
int  m_T,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_Y,
int  rs_Y,
int  cs_Y,
double *  buff_Z,
int  rs_Z,
int  cs_Z,
double *  buff_T,
int  rs_T,
int  cs_T 
)
413 {
414  double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
415  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
416  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
417  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
418 
419  double first_elem, last_elem;
420  double dot_product;
421  double beta, conj_beta;
422  double inv_tau11;
423  double minus_inv_tau11;
424  int i;
425 
426  // b_alg = FLA_Obj_length( T );
427  int b_alg = m_T;
428 
429  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
430  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &e );
431  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
432  double* buff_e = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
433  int inc_e = 1;
434 
435  // FLA_Set( FLA_ZERO, Y );
436  // FLA_Set( FLA_ZERO, Z );
437  bl1_dsetm( m_A,
438  b_alg,
439  buff_0,
440  buff_Y, rs_Y, cs_Y );
441  bl1_dsetm( m_A,
442  b_alg,
443  buff_0,
444  buff_Z, rs_Z, cs_Z );
445 
446  for ( i = 0; i < b_alg; ++i )
447  {
448  double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
449  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
450  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
451  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
452  double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
453  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
454  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
455 
456  double* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
457  double* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
458  double* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
459 
460  double* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
461  double* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
462  double* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
463 
464  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
465  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
466 
467  double* e0 = buff_e + (0 )*inc_e;
468 
469  double* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
470 
471  double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
472  double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
473 
474  double* ABL = a10t;
475  double* ZBL = z10t;
476 
477  double* a2 = alpha11;
478 
479  int m_ahead = m_A - i - 1;
480  int n_ahead = m_A - i - 1;
481  int m_behind = i;
482  int n_behind = i;
483 
484  /*------------------------------------------------------------*/
485 
486  if ( m_behind > 0 )
487  {
488  // FLA_Copy( a10t_r, last_elem );
489  // FLA_Set( FLA_ONE, a10t_r );
490  last_elem = *a10t_r;
491  *a10t_r = *buff_1;
492  }
493 
494  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
495  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
498  m_ahead + 1,
499  n_behind,
500  buff_m1,
501  ABL, rs_A, cs_A,
502  y10t, cs_Y,
503  buff_1,
504  a2, rs_A );
507  m_ahead + 1,
508  n_behind,
509  buff_m1,
510  ZBL, rs_Z, cs_Z,
511  a10t, cs_A,
512  buff_1,
513  a2, rs_A );
514 
515  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
516  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, A20, z10t, FLA_ONE, a12t );
519  m_ahead,
520  n_behind,
521  buff_m1,
522  Y20, rs_Y, cs_Y,
523  a10t, cs_A,
524  buff_1,
525  a12t, cs_A );
528  m_ahead,
529  n_behind,
530  buff_m1,
531  A20, rs_A, cs_A,
532  z10t, cs_Z,
533  buff_1,
534  a12t, cs_A );
535 
536  if ( m_behind > 0 )
537  {
538  // FLA_Copy( last_elem, a10t_r );
539  *a10t_r = last_elem;
540  }
541 
542  if ( m_ahead > 0 )
543  {
544  // FLA_Househ2_UT( FLA_LEFT,
545  // a21_t,
546  // a21_b, tau11 );
547  FLA_Househ2_UT_l_opd( m_ahead - 1,
548  a21_t,
549  a21_b, rs_A,
550  tau11 );
551 
552  // FLA_Set( FLA_ONE, inv_tau11 );
553  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
554  // FLA_Copy( inv_tau11, minus_inv_tau11 );
555  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
556  bl1_ddiv3( buff_1, tau11, &inv_tau11 );
557  bl1_dneg2( &inv_tau11, &minus_inv_tau11 );
558 
559  // FLA_Copy( a21_t, first_elem );
560  // FLA_Set( FLA_ONE, a21_t );
561  first_elem = *a21_t;
562  *a21_t = *buff_1;
563 
564  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y21 );
565  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z21 );
566  FLA_Fused_Ahx_Ax_opd_var1( m_ahead,
567  n_ahead,
568  A22, rs_A, cs_A,
569  a21, rs_A,
570  y21, rs_Y,
571  z21, rs_Z );
572 
573  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d0 );
574  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, a21, FLA_ZERO, e0 );
575  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f0 );
576  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
577  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, y21 );
578  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, e0, FLA_ONE, z21 );
579  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d0, FLA_ONE, z21 );
580  // FLA_Copy( d0, t01 );
582  n_behind,
583  buff_m1,
584  A20, rs_A, cs_A,
585  Y20, rs_Y, cs_Y,
586  Z20, rs_Z, cs_Z,
587  t01, rs_T,
588  a21, rs_A,
589  y21, rs_Y,
590  z21, rs_Z );
591 
592  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
593  // FLA_Inv_scal( FLA_TWO, beta );
594  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
596  m_ahead,
597  a21, rs_A,
598  z21, rs_Z,
599  &beta );
600  bl1_dinvscals( buff_2, &beta );
601  bl1_dcopyconj( &beta, &conj_beta );
602 
603  // FLA_Scal( minus_inv_tau11, conj_beta );
604  // FLA_Axpy( conj_beta, a21, y21 );
605  // FLA_Scal( inv_tau11, y21 );
606  bl1_dscals( &minus_inv_tau11, &conj_beta );
608  m_ahead,
609  &conj_beta,
610  a21, rs_A,
611  y21, rs_Y );
613  m_ahead,
614  &inv_tau11,
615  y21, rs_Y );
616 
617  // FLA_Scal( minus_inv_tau11, beta );
618  // FLA_Axpy( beta, a21, z21 );
619  // FLA_Scal( inv_tau11, z21 );
620  bl1_dscals( &minus_inv_tau11, &beta );
622  m_ahead,
623  &beta,
624  a21, rs_A,
625  z21, rs_Z );
627  m_ahead,
628  &inv_tau11,
629  z21, rs_Z );
630 
631  // FLA_Dot( a12t, a21, dot_product );
632  // FLA_Scal( minus_inv_tau11, dot_product );
633  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
635  m_ahead,
636  a12t, cs_A,
637  a21, rs_A,
638  &dot_product );
639  bl1_dscals( &minus_inv_tau11, &dot_product );
641  m_ahead,
642  &dot_product,
643  a21, rs_A,
644  a12t, cs_A );
645 
646  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, e0 );
647  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, e0, a21, A02 );
650  m_behind,
651  n_ahead,
652  buff_1,
653  A02, rs_A, cs_A,
654  a21, rs_A,
655  buff_0,
656  e0, inc_e );
659  m_behind,
660  n_ahead,
661  &minus_inv_tau11,
662  e0, inc_e,
663  a21, rs_A,
664  A02, rs_A, cs_A );
665 
666  // FLA_Copy( first_elem, a21_t );
667  *a21_t = first_elem;
668  }
669 
670  /*------------------------------------------------------------*/
671 
672  }
673 
674  // FLA_Obj_free( &e );
675  FLA_free( buff_e );
676 
677  return FLA_SUCCESS;
678 }
FLA_Error FLA_Fused_Ahx_Ax_opd_var1(int m_A, int n_A, double *buff_A, int rs_A, int cs_A, double *buff_x, int inc_x, double *buff_v, int inc_v, double *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Ax_opt_var1.c:173
FLA_Error FLA_Fused_Uhu_Yhu_Zhu_opd_var1(int m_U, int n_U, double *buff_delta, double *buff_U, int rs_U, int cs_U, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_t, int inc_t, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z)
Definition: FLA_Fused_Uhu_Yhu_Zhu_opt_var1.c:270
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition: FLA_Househ2_UT.c:274
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition: bl1_axpyv.c:21
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition: bl1_dot.c:26
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_gemv.c:69
void bl1_dger(conj1_t conjx, conj1_t conjy, int m, int n, double *alpha, double *x, int incx, double *y, int incy, double *a, int a_rs, int a_cs)
Definition: bl1_ger.c:62
void bl1_dscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition: bl1_scalv.c:24
void bl1_dsetm(int m, int n, double *sigma, double *a, int a_rs, int a_cs)
Definition: bl1_setm.c:45

References bl1_daxpyv(), bl1_ddot(), bl1_dgemv(), bl1_dger(), bl1_dscalv(), bl1_dsetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_opd_var1(), FLA_Fused_Uhu_Yhu_Zhu_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_ofu_var4().

◆ FLA_Hess_UT_step_ofs_var4()

FLA_Error FLA_Hess_UT_step_ofs_var4 ( int  m_A,
int  m_T,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_Y,
int  rs_Y,
int  cs_Y,
float *  buff_Z,
int  rs_Z,
int  cs_Z,
float *  buff_T,
int  rs_T,
int  cs_T 
)
138 {
139  float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
140  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
141  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
142  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
143 
144  float first_elem, last_elem;
145  float dot_product;
146  float beta, conj_beta;
147  float inv_tau11;
148  float minus_inv_tau11;
149  int i;
150 
151  // b_alg = FLA_Obj_length( T );
152  int b_alg = m_T;
153 
154  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
155  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &e );
156  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
157  float* buff_e = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
158  int inc_e = 1;
159 
160  // FLA_Set( FLA_ZERO, Y );
161  // FLA_Set( FLA_ZERO, Z );
162  bl1_ssetm( m_A,
163  b_alg,
164  buff_0,
165  buff_Y, rs_Y, cs_Y );
166  bl1_ssetm( m_A,
167  b_alg,
168  buff_0,
169  buff_Z, rs_Z, cs_Z );
170 
171  for ( i = 0; i < b_alg; ++i )
172  {
173  float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
174  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
175  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
176  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
177  float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
178  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
179  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
180 
181  float* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
182  float* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
183  float* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
184 
185  float* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
186  float* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
187  float* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
188 
189  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
190  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
191 
192  float* e0 = buff_e + (0 )*inc_e;
193 
194  float* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
195 
196  float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
197  float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
198 
199  float* ABL = a10t;
200  float* ZBL = z10t;
201 
202  float* a2 = alpha11;
203 
204  int m_ahead = m_A - i - 1;
205  int n_ahead = m_A - i - 1;
206  int m_behind = i;
207  int n_behind = i;
208 
209  /*------------------------------------------------------------*/
210 
211  if ( m_behind > 0 )
212  {
213  // FLA_Copy( a10t_r, last_elem );
214  // FLA_Set( FLA_ONE, a10t_r );
215  last_elem = *a10t_r;
216  *a10t_r = *buff_1;
217  }
218 
219  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
220  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
223  m_ahead + 1,
224  n_behind,
225  buff_m1,
226  ABL, rs_A, cs_A,
227  y10t, cs_Y,
228  buff_1,
229  a2, rs_A );
232  m_ahead + 1,
233  n_behind,
234  buff_m1,
235  ZBL, rs_Z, cs_Z,
236  a10t, cs_A,
237  buff_1,
238  a2, rs_A );
239 
240  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
241  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, A20, z10t, FLA_ONE, a12t );
244  m_ahead,
245  n_behind,
246  buff_m1,
247  Y20, rs_Y, cs_Y,
248  a10t, cs_A,
249  buff_1,
250  a12t, cs_A );
253  m_ahead,
254  n_behind,
255  buff_m1,
256  A20, rs_A, cs_A,
257  z10t, cs_Z,
258  buff_1,
259  a12t, cs_A );
260 
261  if ( m_behind > 0 )
262  {
263  // FLA_Copy( last_elem, a10t_r );
264  *a10t_r = last_elem;
265  }
266 
267  if ( m_ahead > 0 )
268  {
269  // FLA_Househ2_UT( FLA_LEFT,
270  // a21_t,
271  // a21_b, tau11 );
272  FLA_Househ2_UT_l_ops( m_ahead - 1,
273  a21_t,
274  a21_b, rs_A,
275  tau11 );
276 
277  // FLA_Set( FLA_ONE, inv_tau11 );
278  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
279  // FLA_Copy( inv_tau11, minus_inv_tau11 );
280  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
281  bl1_sdiv3( buff_1, tau11, &inv_tau11 );
282  bl1_sneg2( &inv_tau11, &minus_inv_tau11 );
283 
284  // FLA_Copy( a21_t, first_elem );
285  // FLA_Set( FLA_ONE, a21_t );
286  first_elem = *a21_t;
287  *a21_t = *buff_1;
288 
289  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y21 );
290  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z21 );
291  FLA_Fused_Ahx_Ax_ops_var1( m_ahead,
292  n_ahead,
293  A22, rs_A, cs_A,
294  a21, rs_A,
295  y21, rs_Y,
296  z21, rs_Z );
297 
298  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d0 );
299  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, a21, FLA_ZERO, e0 );
300  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f0 );
301  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
302  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, y21 );
303  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, e0, FLA_ONE, z21 );
304  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d0, FLA_ONE, z21 );
305  // FLA_Copy( d0, t01 );
307  n_behind,
308  buff_m1,
309  A20, rs_A, cs_A,
310  Y20, rs_Y, cs_Y,
311  Z20, rs_Z, cs_Z,
312  t01, rs_T,
313  a21, rs_A,
314  y21, rs_Y,
315  z21, rs_Z );
316 
317  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
318  // FLA_Inv_scal( FLA_TWO, beta );
319  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
321  m_ahead,
322  a21, rs_A,
323  z21, rs_Z,
324  &beta );
325  bl1_sinvscals( buff_2, &beta );
326  bl1_scopyconj( &beta, &conj_beta );
327 
328  // FLA_Scal( minus_inv_tau11, conj_beta );
329  // FLA_Axpy( conj_beta, a21, y21 );
330  // FLA_Scal( inv_tau11, y21 );
331  bl1_sscals( &minus_inv_tau11, &conj_beta );
333  m_ahead,
334  &conj_beta,
335  a21, rs_A,
336  y21, rs_Y );
338  m_ahead,
339  &inv_tau11,
340  y21, rs_Y );
341 
342  // FLA_Scal( minus_inv_tau11, beta );
343  // FLA_Axpy( beta, a21, z21 );
344  // FLA_Scal( inv_tau11, z21 );
345  bl1_sscals( &minus_inv_tau11, &beta );
347  m_ahead,
348  &beta,
349  a21, rs_A,
350  z21, rs_Z );
352  m_ahead,
353  &inv_tau11,
354  z21, rs_Z );
355 
356  // FLA_Dot( a12t, a21, dot_product );
357  // FLA_Scal( minus_inv_tau11, dot_product );
358  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
360  m_ahead,
361  a12t, cs_A,
362  a21, rs_A,
363  &dot_product );
364  bl1_sscals( &minus_inv_tau11, &dot_product );
366  m_ahead,
367  &dot_product,
368  a21, rs_A,
369  a12t, cs_A );
370 
371  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, e0 );
372  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, e0, a21, A02 );
375  m_behind,
376  n_ahead,
377  buff_1,
378  A02, rs_A, cs_A,
379  a21, rs_A,
380  buff_0,
381  e0, inc_e );
384  m_behind,
385  n_ahead,
386  &minus_inv_tau11,
387  e0, inc_e,
388  a21, rs_A,
389  A02, rs_A, cs_A );
390 
391  // FLA_Copy( first_elem, a21_t );
392  *a21_t = first_elem;
393  }
394 
395  /*------------------------------------------------------------*/
396 
397  }
398 
399  // FLA_Obj_free( &e );
400  FLA_free( buff_e );
401 
402  return FLA_SUCCESS;
403 }
FLA_Error FLA_Fused_Ahx_Ax_ops_var1(int m_A, int n_A, float *buff_A, int rs_A, int cs_A, float *buff_x, int inc_x, float *buff_v, int inc_v, float *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Ax_opt_var1.c:116
FLA_Error FLA_Fused_Uhu_Yhu_Zhu_ops_var1(int m_U, int n_U, float *buff_delta, float *buff_U, int rs_U, int cs_U, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_t, int inc_t, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z)
Definition: FLA_Fused_Uhu_Yhu_Zhu_opt_var1.c:156
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition: FLA_Househ2_UT.c:160
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition: bl1_axpyv.c:13
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition: bl1_dot.c:13
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_gemv.c:13
void bl1_sger(conj1_t conjx, conj1_t conjy, int m, int n, float *alpha, float *x, int incx, float *y, int incy, float *a, int a_rs, int a_cs)
Definition: bl1_ger.c:13
void bl1_sscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition: bl1_scalv.c:13
void bl1_ssetm(int m, int n, float *sigma, float *a, int a_rs, int a_cs)
Definition: bl1_setm.c:29

References bl1_saxpyv(), bl1_sdot(), bl1_sgemv(), bl1_sger(), bl1_sscalv(), bl1_ssetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_ops_var1(), FLA_Fused_Uhu_Yhu_Zhu_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_ofu_var4().

◆ FLA_Hess_UT_step_ofu_var4()

FLA_Error FLA_Hess_UT_step_ofu_var4 ( FLA_Obj  A,
FLA_Obj  Y,
FLA_Obj  Z,
FLA_Obj  T 
)
30 {
31  FLA_Datatype datatype;
32  int m_A, m_T;
33  int rs_A, cs_A;
34  int rs_Y, cs_Y;
35  int rs_Z, cs_Z;
36  int rs_T, cs_T;
37 
38  datatype = FLA_Obj_datatype( A );
39 
40  m_A = FLA_Obj_length( A );
41  m_T = FLA_Obj_length( T );
42 
43  rs_A = FLA_Obj_row_stride( A );
44  cs_A = FLA_Obj_col_stride( A );
45 
46  rs_Y = FLA_Obj_row_stride( Y );
47  cs_Y = FLA_Obj_col_stride( Y );
48 
49  rs_Z = FLA_Obj_row_stride( Z );
50  cs_Z = FLA_Obj_col_stride( Z );
51 
52  rs_T = FLA_Obj_row_stride( T );
53  cs_T = FLA_Obj_col_stride( T );
54 
55 
56  switch ( datatype )
57  {
58  case FLA_FLOAT:
59  {
60  float* buff_A = FLA_FLOAT_PTR( A );
61  float* buff_Y = FLA_FLOAT_PTR( Y );
62  float* buff_Z = FLA_FLOAT_PTR( Z );
63  float* buff_T = FLA_FLOAT_PTR( T );
64 
66  m_T,
67  buff_A, rs_A, cs_A,
68  buff_Y, rs_Y, cs_Y,
69  buff_Z, rs_Z, cs_Z,
70  buff_T, rs_T, cs_T );
71 
72  break;
73  }
74 
75  case FLA_DOUBLE:
76  {
77  double* buff_A = FLA_DOUBLE_PTR( A );
78  double* buff_Y = FLA_DOUBLE_PTR( Y );
79  double* buff_Z = FLA_DOUBLE_PTR( Z );
80  double* buff_T = FLA_DOUBLE_PTR( T );
81 
83  m_T,
84  buff_A, rs_A, cs_A,
85  buff_Y, rs_Y, cs_Y,
86  buff_Z, rs_Z, cs_Z,
87  buff_T, rs_T, cs_T );
88 
89  break;
90  }
91 
92  case FLA_COMPLEX:
93  {
94  scomplex* buff_A = FLA_COMPLEX_PTR( A );
95  scomplex* buff_Y = FLA_COMPLEX_PTR( Y );
96  scomplex* buff_Z = FLA_COMPLEX_PTR( Z );
97  scomplex* buff_T = FLA_COMPLEX_PTR( T );
98 
100  m_T,
101  buff_A, rs_A, cs_A,
102  buff_Y, rs_Y, cs_Y,
103  buff_Z, rs_Z, cs_Z,
104  buff_T, rs_T, cs_T );
105 
106  break;
107  }
108 
109  case FLA_DOUBLE_COMPLEX:
110  {
111  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
112  dcomplex* buff_Y = FLA_DOUBLE_COMPLEX_PTR( Y );
113  dcomplex* buff_Z = FLA_DOUBLE_COMPLEX_PTR( Z );
114  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
115 
117  m_T,
118  buff_A, rs_A, cs_A,
119  buff_Y, rs_Y, cs_Y,
120  buff_Z, rs_Z, cs_Z,
121  buff_T, rs_T, cs_T );
122 
123  break;
124  }
125  }
126 
127  return FLA_SUCCESS;
128 }
FLA_Error FLA_Hess_UT_step_ofz_var4(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_fus_var4.c:957
FLA_Error FLA_Hess_UT_step_ofs_var4(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_fus_var4.c:132
FLA_Error FLA_Hess_UT_step_ofd_var4(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_fus_var4.c:407
FLA_Error FLA_Hess_UT_step_ofc_var4(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_fus_var4.c:682
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
int FLA_Datatype
Definition: FLA_type_defs.h:49
Definition: blis_type_defs.h:138

References FLA_Hess_UT_step_ofc_var4(), FLA_Hess_UT_step_ofd_var4(), FLA_Hess_UT_step_ofs_var4(), FLA_Hess_UT_step_ofz_var4(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), and FLA_Obj_row_stride().

Referenced by FLA_Hess_UT_blf_var4(), and FLA_Hess_UT_ofu_var4().

◆ FLA_Hess_UT_step_ofz_var4()

FLA_Error FLA_Hess_UT_step_ofz_var4 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_Y,
int  rs_Y,
int  cs_Y,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
963 {
964  dcomplex* buff_2 = FLA_DOUBLE_COMPLEX_PTR( FLA_TWO );
965  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
966  dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
967  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
968 
969  dcomplex first_elem, last_elem;
970  dcomplex dot_product;
971  dcomplex beta, conj_beta;
972  dcomplex inv_tau11;
973  dcomplex minus_inv_tau11;
974  int i;
975 
976  // b_alg = FLA_Obj_length( T );
977  int b_alg = m_T;
978 
979  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
980  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &e );
981  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
982  dcomplex* buff_e = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
983  int inc_e = 1;
984 
985  // FLA_Set( FLA_ZERO, Y );
986  // FLA_Set( FLA_ZERO, Z );
987  bl1_zsetm( m_A,
988  b_alg,
989  buff_0,
990  buff_Y, rs_Y, cs_Y );
991  bl1_zsetm( m_A,
992  b_alg,
993  buff_0,
994  buff_Z, rs_Z, cs_Z );
995 
996  for ( i = 0; i < b_alg; ++i )
997  {
998  dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
999  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1000  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1001  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1002  dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1003  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1004  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1005 
1006  dcomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
1007  dcomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
1008  dcomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
1009 
1010  dcomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
1011  dcomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
1012  dcomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
1013 
1014  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1015  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1016 
1017  dcomplex* e0 = buff_e + (0 )*inc_e;
1018 
1019  dcomplex* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
1020 
1021  dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
1022  dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
1023 
1024  dcomplex* ABL = a10t;
1025  dcomplex* ZBL = z10t;
1026 
1027  dcomplex* a2 = alpha11;
1028 
1029  int m_ahead = m_A - i - 1;
1030  int n_ahead = m_A - i - 1;
1031  int m_behind = i;
1032  int n_behind = i;
1033 
1034  /*------------------------------------------------------------*/
1035 
1036  if ( m_behind > 0 )
1037  {
1038  // FLA_Copy( a10t_r, last_elem );
1039  // FLA_Set( FLA_ONE, a10t_r );
1040  last_elem = *a10t_r;
1041  *a10t_r = *buff_1;
1042  }
1043 
1044  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
1045  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
1048  m_ahead + 1,
1049  n_behind,
1050  buff_m1,
1051  ABL, rs_A, cs_A,
1052  y10t, cs_Y,
1053  buff_1,
1054  a2, rs_A );
1057  m_ahead + 1,
1058  n_behind,
1059  buff_m1,
1060  ZBL, rs_Z, cs_Z,
1061  a10t, cs_A,
1062  buff_1,
1063  a2, rs_A );
1064 
1065  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
1066  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, A20, z10t, FLA_ONE, a12t );
1069  m_ahead,
1070  n_behind,
1071  buff_m1,
1072  Y20, rs_Y, cs_Y,
1073  a10t, cs_A,
1074  buff_1,
1075  a12t, cs_A );
1078  m_ahead,
1079  n_behind,
1080  buff_m1,
1081  A20, rs_A, cs_A,
1082  z10t, cs_Z,
1083  buff_1,
1084  a12t, cs_A );
1085 
1086  if ( m_behind > 0 )
1087  {
1088  // FLA_Copy( last_elem, a10t_r );
1089  *a10t_r = last_elem;
1090  }
1091 
1092  if ( m_ahead > 0 )
1093  {
1094  // FLA_Househ2_UT( FLA_LEFT,
1095  // a21_t,
1096  // a21_b, tau11 );
1097  FLA_Househ2_UT_l_opz( m_ahead - 1,
1098  a21_t,
1099  a21_b, rs_A,
1100  tau11 );
1101 
1102  // FLA_Set( FLA_ONE, inv_tau11 );
1103  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
1104  // FLA_Copy( inv_tau11, minus_inv_tau11 );
1105  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
1106  bl1_zdiv3( buff_1, tau11, &inv_tau11 );
1107  bl1_zneg2( &inv_tau11, &minus_inv_tau11 );
1108 
1109  // FLA_Copy( a21_t, first_elem );
1110  // FLA_Set( FLA_ONE, a21_t );
1111  first_elem = *a21_t;
1112  *a21_t = *buff_1;
1113 
1114  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y21 );
1115  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z21 );
1116  FLA_Fused_Ahx_Ax_opz_var1( m_ahead,
1117  n_ahead,
1118  A22, rs_A, cs_A,
1119  a21, rs_A,
1120  y21, rs_Y,
1121  z21, rs_Z );
1122 
1123  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d0 );
1124  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, a21, FLA_ZERO, e0 );
1125  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f0 );
1126  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
1127  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, y21 );
1128  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, e0, FLA_ONE, z21 );
1129  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d0, FLA_ONE, z21 );
1130  // FLA_Copy( d0, t01 );
1132  n_behind,
1133  buff_m1,
1134  A20, rs_A, cs_A,
1135  Y20, rs_Y, cs_Y,
1136  Z20, rs_Z, cs_Z,
1137  t01, rs_T,
1138  a21, rs_A,
1139  y21, rs_Y,
1140  z21, rs_Z );
1141 
1142  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
1143  // FLA_Inv_scal( FLA_TWO, beta );
1144  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
1146  m_ahead,
1147  a21, rs_A,
1148  z21, rs_Z,
1149  &beta );
1150  bl1_zinvscals( buff_2, &beta );
1151  bl1_zcopyconj( &beta, &conj_beta );
1152 
1153  // FLA_Scal( minus_inv_tau11, conj_beta );
1154  // FLA_Axpy( conj_beta, a21, y21 );
1155  // FLA_Scal( inv_tau11, y21 );
1156  bl1_zscals( &minus_inv_tau11, &conj_beta );
1158  m_ahead,
1159  &conj_beta,
1160  a21, rs_A,
1161  y21, rs_Y );
1163  m_ahead,
1164  &inv_tau11,
1165  y21, rs_Y );
1166 
1167  // FLA_Scal( minus_inv_tau11, beta );
1168  // FLA_Axpy( beta, a21, z21 );
1169  // FLA_Scal( inv_tau11, z21 );
1170  bl1_zscals( &minus_inv_tau11, &beta );
1172  m_ahead,
1173  &beta,
1174  a21, rs_A,
1175  z21, rs_Z );
1177  m_ahead,
1178  &inv_tau11,
1179  z21, rs_Z );
1180 
1181  // FLA_Dot( a12t, a21, dot_product );
1182  // FLA_Scal( minus_inv_tau11, dot_product );
1183  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
1185  m_ahead,
1186  a12t, cs_A,
1187  a21, rs_A,
1188  &dot_product );
1189  bl1_zscals( &minus_inv_tau11, &dot_product );
1191  m_ahead,
1192  &dot_product,
1193  a21, rs_A,
1194  a12t, cs_A );
1195 
1196  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, e0 );
1197  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, e0, a21, A02 );
1200  m_behind,
1201  n_ahead,
1202  buff_1,
1203  A02, rs_A, cs_A,
1204  a21, rs_A,
1205  buff_0,
1206  e0, inc_e );
1209  m_behind,
1210  n_ahead,
1211  &minus_inv_tau11,
1212  e0, inc_e,
1213  a21, rs_A,
1214  A02, rs_A, cs_A );
1215 
1216  // FLA_Copy( first_elem, a21_t );
1217  *a21_t = first_elem;
1218  }
1219 
1220  /*------------------------------------------------------------*/
1221 
1222  }
1223 
1224  // FLA_Obj_free( &e );
1225  FLA_free( buff_e );
1226 
1227  return FLA_SUCCESS;
1228 }
FLA_Error FLA_Fused_Ahx_Ax_opz_var1(int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_x, int inc_x, dcomplex *buff_v, int inc_v, dcomplex *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Ax_opt_var1.c:307
FLA_Error FLA_Fused_Uhu_Yhu_Zhu_opz_var1(int m_U, int n_U, dcomplex *buff_delta, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_t, int inc_t, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z)
Definition: FLA_Fused_Uhu_Yhu_Zhu_opt_var1.c:500
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition: FLA_Househ2_UT.c:521
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_axpyv.c:60
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition: bl1_dot.c:65
bl1_zscals(beta, rho_yz)
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_gemv.c:255
void bl1_zger(conj1_t conjx, conj1_t conjy, int m, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_ger.c:194
void bl1_zscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition: bl1_scalv.c:72
void bl1_zsetm(int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:78

References bl1_zaxpyv(), bl1_zdot(), bl1_zgemv(), bl1_zger(), bl1_zscals(), bl1_zscalv(), bl1_zsetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_opz_var1(), FLA_Fused_Uhu_Yhu_Zhu_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_ofu_var4().