libflame  revision_anchor
Functions
FLA_Hess_UT_fus_var2.c File Reference

(r)

Functions

FLA_Error FLA_Hess_UT_ofu_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ofu_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ofs_var2 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_ofd_var2 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_ofc_var2 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_ofz_var2 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
 

Function Documentation

◆ FLA_Hess_UT_ofu_var2()

FLA_Error FLA_Hess_UT_ofu_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  return FLA_Hess_UT_step_ofu_var2( A, T );
16 }
FLA_Error FLA_Hess_UT_step_ofu_var2(FLA_Obj A, FLA_Obj T)
Definition: FLA_Hess_UT_fus_var2.c:18

References FLA_Hess_UT_step_ofu_var2().

◆ FLA_Hess_UT_step_ofc_var2()

FLA_Error FLA_Hess_UT_step_ofc_var2 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
497 {
498  scomplex* buff_2 = FLA_COMPLEX_PTR( FLA_TWO );
499  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
500  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
501  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
502 
503  scomplex first_elem;
504  scomplex dot_product;
505  scomplex beta, conj_beta;
506  scomplex inv_tau11;
507  scomplex minus_inv_tau11;
508  int i;
509 
510  // b_alg = FLA_Obj_length( T );
511  int b_alg = m_T;
512 
513  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
514  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
515  scomplex* buff_y = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
516  scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
517  int inc_y = 1;
518  int inc_z = 1;
519 
520  for ( i = 0; i < b_alg; ++i )
521  {
522  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
523  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
524  scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
525  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
526  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
527 
528  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
529  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
530 
531  scomplex* y0 = buff_y + (0 )*inc_y;
532  scomplex* y2 = buff_y + (i+1)*inc_y;
533 
534  scomplex* z2 = buff_z + (i+1)*inc_z;
535 
536  scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
537  scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
538 
539  int m_ahead = m_A - i - 1;
540  int n_ahead = m_A - i - 1;
541  int m_behind = i;
542  int n_behind = i;
543 
544  /*------------------------------------------------------------*/
545 
546  if ( m_ahead > 0 )
547  {
548  // FLA_Househ2_UT( FLA_LEFT,
549  // a21_t,
550  // a21_b, tau11 );
551  FLA_Househ2_UT_l_opc( m_ahead - 1,
552  a21_t,
553  a21_b, rs_A,
554  tau11 );
555 
556  // FLA_Set( FLA_ONE, inv_tau11 );
557  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
558  // FLA_Copy( inv_tau11, minus_inv_tau11 );
559  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
560  bl1_cdiv3( buff_1, tau11, &inv_tau11 );
561  bl1_cneg2( &inv_tau11, &minus_inv_tau11 );
562 
563  // FLA_Copy( a21_t, first_elem );
564  // FLA_Set( FLA_ONE, a21_t );
565  first_elem = *a21_t;
566  *a21_t = *buff_1;
567 
568  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y2 );
569  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z2 );
570  FLA_Fused_Ahx_Ax_opc_var1( m_ahead,
571  n_ahead,
572  A22, rs_A, cs_A,
573  a21, rs_A,
574  y2, inc_y,
575  z2, inc_z );
576 
577  // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
578  // FLA_Inv_scal( FLA_TWO, beta );
579  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
581  m_ahead,
582  a21, rs_A,
583  z2, inc_z,
584  &beta );
585  bl1_cinvscals( buff_2, &beta );
586  bl1_ccopyconj( &beta, &conj_beta );
587 
588  // FLA_Scal( minus_inv_tau11, conj_beta );
589  // FLA_Axpy( conj_beta, a21, y2 );
590  // FLA_Scal( inv_tau11, y2 );
591  bl1_cscals( &minus_inv_tau11, &conj_beta );
593  m_ahead,
594  &conj_beta,
595  a21, rs_A,
596  y2, inc_y );
598  m_ahead,
599  &inv_tau11,
600  y2, inc_y );
601 
602  // FLA_Scal( minus_inv_tau11, beta );
603  // FLA_Axpy( beta, a21, z2 );
604  // FLA_Scal( inv_tau11, z2 );
605  bl1_cscals( &minus_inv_tau11, &beta );
607  m_ahead,
608  &beta,
609  a21, rs_A,
610  z2, inc_z );
612  m_ahead,
613  &inv_tau11,
614  z2, inc_z );
615 
616  // FLA_Dot( a12t, a21, dot_product );
617  // FLA_Scal( minus_inv_tau11, dot_product );
618  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
620  m_ahead,
621  a12t, cs_A,
622  a21, rs_A,
623  &dot_product );
624  bl1_cscals( &minus_inv_tau11, &dot_product );
626  m_ahead,
627  &dot_product,
628  a21, rs_A,
629  a12t, cs_A );
630 
631  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
632  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
635  m_behind,
636  n_ahead,
637  buff_1,
638  A02, rs_A, cs_A,
639  a21, rs_A,
640  buff_0,
641  y0, inc_y );
644  m_behind,
645  n_ahead,
646  &minus_inv_tau11,
647  y0, inc_y,
648  a21, rs_A,
649  A02, rs_A, cs_A );
650 
651  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y2, A22 );
652  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, a21, A22 );
653  FLA_Fused_Gerc2_opc_var1( m_ahead,
654  n_ahead,
655  buff_m1,
656  a21, rs_A,
657  y2, inc_y,
658  z2, inc_z,
659  a21, rs_A,
660  A22, rs_A, cs_A );
661 
662  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
665  m_ahead,
666  n_behind,
667  buff_1,
668  A20, rs_A, cs_A,
669  a21, rs_A,
670  buff_0,
671  t01, rs_T );
672 
673  // FLA_Copy( first_elem, a21_t );
674  *a21_t = first_elem;
675  }
676 
677  /*------------------------------------------------------------*/
678 
679  }
680 
681  // FLA_Obj_free( &y );
682  // FLA_Obj_free( &z );
683  FLA_free( buff_y );
684  FLA_free( buff_z );
685 
686  return FLA_SUCCESS;
687 }
FLA_Error FLA_Fused_Gerc2_opc_var1(int m_A, int n_A, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z, scomplex *buff_v, int inc_v, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Fused_Gerc2_opt_var1.c:241
FLA_Error FLA_Fused_Ahx_Ax_opc_var1(int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_x, int inc_x, scomplex *buff_v, int inc_v, scomplex *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Ax_opt_var1.c:256
FLA_Obj FLA_TWO
Definition: FLA_Init.c:17
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition: FLA_Househ2_UT.c:390
int i
Definition: bl1_axmyv2.c:145
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpyv.c:29
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition: bl1_dot.c:39
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_gemv.c:125
void bl1_cger(conj1_t conjx, conj1_t conjy, int m, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *a, int a_rs, int a_cs)
Definition: bl1_ger.c:111
void bl1_cscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition: bl1_scalv.c:46
@ BLIS1_NO_TRANSPOSE
Definition: blis_type_defs.h:54
@ BLIS1_CONJ_TRANSPOSE
Definition: blis_type_defs.h:57
@ BLIS1_CONJUGATE
Definition: blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition: blis_type_defs.h:81
Definition: blis_type_defs.h:133

References bl1_caxpyv(), bl1_cdot(), bl1_cgemv(), bl1_cger(), bl1_cscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_opc_var1(), FLA_Fused_Gerc2_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_ofu_var2().

◆ FLA_Hess_UT_step_ofd_var2()

FLA_Error FLA_Hess_UT_step_ofd_var2 ( int  m_A,
int  m_T,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_T,
int  rs_T,
int  cs_T 
)
299 {
300  double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
301  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
302  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
303  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
304 
305  double first_elem;
306  double dot_product;
307  double beta, conj_beta;
308  double inv_tau11;
309  double minus_inv_tau11;
310  int i;
311 
312  // b_alg = FLA_Obj_length( T );
313  int b_alg = m_T;
314 
315  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
316  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
317  double* buff_y = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
318  double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
319  int inc_y = 1;
320  int inc_z = 1;
321 
322  for ( i = 0; i < b_alg; ++i )
323  {
324  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
325  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
326  double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
327  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
328  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
329 
330  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
331  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
332 
333  double* y0 = buff_y + (0 )*inc_y;
334  double* y2 = buff_y + (i+1)*inc_y;
335 
336  double* z2 = buff_z + (i+1)*inc_z;
337 
338  double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
339  double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
340 
341  int m_ahead = m_A - i - 1;
342  int n_ahead = m_A - i - 1;
343  int m_behind = i;
344  int n_behind = i;
345 
346  /*------------------------------------------------------------*/
347 
348  if ( m_ahead > 0 )
349  {
350  // FLA_Househ2_UT( FLA_LEFT,
351  // a21_t,
352  // a21_b, tau11 );
353  FLA_Househ2_UT_l_opd( m_ahead - 1,
354  a21_t,
355  a21_b, rs_A,
356  tau11 );
357 
358  // FLA_Set( FLA_ONE, inv_tau11 );
359  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
360  // FLA_Copy( inv_tau11, minus_inv_tau11 );
361  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
362  bl1_ddiv3( buff_1, tau11, &inv_tau11 );
363  bl1_dneg2( &inv_tau11, &minus_inv_tau11 );
364 
365  // FLA_Copy( a21_t, first_elem );
366  // FLA_Set( FLA_ONE, a21_t );
367  first_elem = *a21_t;
368  *a21_t = *buff_1;
369 
370  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y2 );
371  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z2 );
372  FLA_Fused_Ahx_Ax_opd_var1( m_ahead,
373  n_ahead,
374  A22, rs_A, cs_A,
375  a21, rs_A,
376  y2, inc_y,
377  z2, inc_z );
378 
379  // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
380  // FLA_Inv_scal( FLA_TWO, beta );
381  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
383  m_ahead,
384  a21, rs_A,
385  z2, inc_z,
386  &beta );
387  bl1_dinvscals( buff_2, &beta );
388  bl1_dcopyconj( &beta, &conj_beta );
389 
390  // FLA_Scal( minus_inv_tau11, conj_beta );
391  // FLA_Axpy( conj_beta, a21, y2 );
392  // FLA_Scal( inv_tau11, y2 );
393  bl1_dscals( &minus_inv_tau11, &conj_beta );
395  m_ahead,
396  &conj_beta,
397  a21, rs_A,
398  y2, inc_y );
400  m_ahead,
401  &inv_tau11,
402  y2, inc_y );
403 
404  // FLA_Scal( minus_inv_tau11, beta );
405  // FLA_Axpy( beta, a21, z2 );
406  // FLA_Scal( inv_tau11, z2 );
407  bl1_dscals( &minus_inv_tau11, &beta );
409  m_ahead,
410  &beta,
411  a21, rs_A,
412  z2, inc_z );
414  m_ahead,
415  &inv_tau11,
416  z2, inc_z );
417 
418  // FLA_Dot( a12t, a21, dot_product );
419  // FLA_Scal( minus_inv_tau11, dot_product );
420  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
422  m_ahead,
423  a12t, cs_A,
424  a21, rs_A,
425  &dot_product );
426  bl1_dscals( &minus_inv_tau11, &dot_product );
428  m_ahead,
429  &dot_product,
430  a21, rs_A,
431  a12t, cs_A );
432 
433  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
434  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
437  m_behind,
438  n_ahead,
439  buff_1,
440  A02, rs_A, cs_A,
441  a21, rs_A,
442  buff_0,
443  y0, inc_y );
446  m_behind,
447  n_ahead,
448  &minus_inv_tau11,
449  y0, inc_y,
450  a21, rs_A,
451  A02, rs_A, cs_A );
452 
453  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y2, A22 );
454  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, a21, A22 );
455  FLA_Fused_Gerc2_opd_var1( m_ahead,
456  n_ahead,
457  buff_m1,
458  a21, rs_A,
459  y2, inc_y,
460  z2, inc_z,
461  a21, rs_A,
462  A22, rs_A, cs_A );
463 
464  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
467  m_ahead,
468  n_behind,
469  buff_1,
470  A20, rs_A, cs_A,
471  a21, rs_A,
472  buff_0,
473  t01, rs_T );
474 
475  // FLA_Copy( first_elem, a21_t );
476  *a21_t = first_elem;
477  }
478 
479  /*------------------------------------------------------------*/
480 
481  }
482 
483  // FLA_Obj_free( &y );
484  // FLA_Obj_free( &z );
485  FLA_free( buff_y );
486  FLA_free( buff_z );
487 
488  return FLA_SUCCESS;
489 }
FLA_Error FLA_Fused_Gerc2_opd_var1(int m_A, int n_A, double *buff_alpha, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z, double *buff_v, int inc_v, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Fused_Gerc2_opt_var1.c:193
FLA_Error FLA_Fused_Ahx_Ax_opd_var1(int m_A, int n_A, double *buff_A, int rs_A, int cs_A, double *buff_x, int inc_x, double *buff_v, int inc_v, double *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Ax_opt_var1.c:173
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition: FLA_Househ2_UT.c:274
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition: bl1_axpyv.c:21
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition: bl1_dot.c:26
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_gemv.c:69
void bl1_dger(conj1_t conjx, conj1_t conjy, int m, int n, double *alpha, double *x, int incx, double *y, int incy, double *a, int a_rs, int a_cs)
Definition: bl1_ger.c:62
void bl1_dscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition: bl1_scalv.c:24

References bl1_daxpyv(), bl1_ddot(), bl1_dgemv(), bl1_dger(), bl1_dscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_opd_var1(), FLA_Fused_Gerc2_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_ofu_var2().

◆ FLA_Hess_UT_step_ofs_var2()

FLA_Error FLA_Hess_UT_step_ofs_var2 ( int  m_A,
int  m_T,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_T,
int  rs_T,
int  cs_T 
)
101 {
102  float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
103  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
104  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
105  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
106 
107  float first_elem;
108  float dot_product;
109  float beta, conj_beta;
110  float inv_tau11;
111  float minus_inv_tau11;
112  int i;
113 
114  // b_alg = FLA_Obj_length( T );
115  int b_alg = m_T;
116 
117  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
118  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
119  float* buff_y = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
120  float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
121  int inc_y = 1;
122  int inc_z = 1;
123 
124  for ( i = 0; i < b_alg; ++i )
125  {
126  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
127  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
128  float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
129  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
130  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
131 
132  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
133  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
134 
135  float* y0 = buff_y + (0 )*inc_y;
136  float* y2 = buff_y + (i+1)*inc_y;
137 
138  float* z2 = buff_z + (i+1)*inc_z;
139 
140  float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
141  float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
142 
143  int m_ahead = m_A - i - 1;
144  int n_ahead = m_A - i - 1;
145  int m_behind = i;
146  int n_behind = i;
147 
148  /*------------------------------------------------------------*/
149 
150  if ( m_ahead > 0 )
151  {
152  // FLA_Househ2_UT( FLA_LEFT,
153  // a21_t,
154  // a21_b, tau11 );
155  FLA_Househ2_UT_l_ops( m_ahead - 1,
156  a21_t,
157  a21_b, rs_A,
158  tau11 );
159 
160  // FLA_Set( FLA_ONE, inv_tau11 );
161  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
162  // FLA_Copy( inv_tau11, minus_inv_tau11 );
163  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
164  bl1_sdiv3( buff_1, tau11, &inv_tau11 );
165  bl1_sneg2( &inv_tau11, &minus_inv_tau11 );
166 
167  // FLA_Copy( a21_t, first_elem );
168  // FLA_Set( FLA_ONE, a21_t );
169  first_elem = *a21_t;
170  *a21_t = *buff_1;
171 
172  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y2 );
173  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z2 );
174  FLA_Fused_Ahx_Ax_ops_var1( m_ahead,
175  n_ahead,
176  A22, rs_A, cs_A,
177  a21, rs_A,
178  y2, inc_y,
179  z2, inc_z );
180 
181  // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
182  // FLA_Inv_scal( FLA_TWO, beta );
183  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
185  m_ahead,
186  a21, rs_A,
187  z2, inc_z,
188  &beta );
189  bl1_sinvscals( buff_2, &beta );
190  bl1_scopyconj( &beta, &conj_beta );
191 
192  // FLA_Scal( minus_inv_tau11, conj_beta );
193  // FLA_Axpy( conj_beta, a21, y2 );
194  // FLA_Scal( inv_tau11, y2 );
195  bl1_sscals( &minus_inv_tau11, &conj_beta );
197  m_ahead,
198  &conj_beta,
199  a21, rs_A,
200  y2, inc_y );
202  m_ahead,
203  &inv_tau11,
204  y2, inc_y );
205 
206  // FLA_Scal( minus_inv_tau11, beta );
207  // FLA_Axpy( beta, a21, z2 );
208  // FLA_Scal( inv_tau11, z2 );
209  bl1_sscals( &minus_inv_tau11, &beta );
211  m_ahead,
212  &beta,
213  a21, rs_A,
214  z2, inc_z );
216  m_ahead,
217  &inv_tau11,
218  z2, inc_z );
219 
220  // FLA_Dot( a12t, a21, dot_product );
221  // FLA_Scal( minus_inv_tau11, dot_product );
222  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
224  m_ahead,
225  a12t, cs_A,
226  a21, rs_A,
227  &dot_product );
228  bl1_sscals( &minus_inv_tau11, &dot_product );
230  m_ahead,
231  &dot_product,
232  a21, rs_A,
233  a12t, cs_A );
234 
235  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
236  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
239  m_behind,
240  n_ahead,
241  buff_1,
242  A02, rs_A, cs_A,
243  a21, rs_A,
244  buff_0,
245  y0, inc_y );
248  m_behind,
249  n_ahead,
250  &minus_inv_tau11,
251  y0, inc_y,
252  a21, rs_A,
253  A02, rs_A, cs_A );
254 
255  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y2, A22 );
256  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, a21, A22 );
257  FLA_Fused_Gerc2_ops_var1( m_ahead,
258  n_ahead,
259  buff_m1,
260  a21, rs_A,
261  y2, inc_y,
262  z2, inc_z,
263  a21, rs_A,
264  A22, rs_A, cs_A );
265 
266  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
269  m_ahead,
270  n_behind,
271  buff_1,
272  A20, rs_A, cs_A,
273  a21, rs_A,
274  buff_0,
275  t01, rs_T );
276 
277  // FLA_Copy( first_elem, a21_t );
278  *a21_t = first_elem;
279  }
280 
281  /*------------------------------------------------------------*/
282 
283  }
284 
285  // FLA_Obj_free( &y );
286  // FLA_Obj_free( &z );
287  FLA_free( buff_y );
288  FLA_free( buff_z );
289 
290  return FLA_SUCCESS;
291 }
FLA_Error FLA_Fused_Gerc2_ops_var1(int m_A, int n_A, float *buff_alpha, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z, float *buff_v, int inc_v, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Fused_Gerc2_opt_var1.c:130
FLA_Error FLA_Fused_Ahx_Ax_ops_var1(int m_A, int n_A, float *buff_A, int rs_A, int cs_A, float *buff_x, int inc_x, float *buff_v, int inc_v, float *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Ax_opt_var1.c:116
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition: FLA_Househ2_UT.c:160
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition: bl1_axpyv.c:13
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition: bl1_dot.c:13
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_gemv.c:13
void bl1_sger(conj1_t conjx, conj1_t conjy, int m, int n, float *alpha, float *x, int incx, float *y, int incy, float *a, int a_rs, int a_cs)
Definition: bl1_ger.c:13
void bl1_sscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition: bl1_scalv.c:13

References bl1_saxpyv(), bl1_sdot(), bl1_sgemv(), bl1_sger(), bl1_sscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_ops_var1(), FLA_Fused_Gerc2_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_ofu_var2().

◆ FLA_Hess_UT_step_ofu_var2()

FLA_Error FLA_Hess_UT_step_ofu_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
19 {
20  FLA_Datatype datatype;
21  int m_A, m_T;
22  int rs_A, cs_A;
23  int rs_T, cs_T;
24 
25  datatype = FLA_Obj_datatype( A );
26 
27  m_A = FLA_Obj_length( A );
28  m_T = FLA_Obj_length( T );
29 
30  rs_A = FLA_Obj_row_stride( A );
31  cs_A = FLA_Obj_col_stride( A );
32 
33  rs_T = FLA_Obj_row_stride( T );
34  cs_T = FLA_Obj_col_stride( T );
35 
36 
37  switch ( datatype )
38  {
39  case FLA_FLOAT:
40  {
41  float* buff_A = FLA_FLOAT_PTR( A );
42  float* buff_T = FLA_FLOAT_PTR( T );
43 
45  m_T,
46  buff_A, rs_A, cs_A,
47  buff_T, rs_T, cs_T );
48 
49  break;
50  }
51 
52  case FLA_DOUBLE:
53  {
54  double* buff_A = FLA_DOUBLE_PTR( A );
55  double* buff_T = FLA_DOUBLE_PTR( T );
56 
58  m_T,
59  buff_A, rs_A, cs_A,
60  buff_T, rs_T, cs_T );
61 
62  break;
63  }
64 
65  case FLA_COMPLEX:
66  {
67  scomplex* buff_A = FLA_COMPLEX_PTR( A );
68  scomplex* buff_T = FLA_COMPLEX_PTR( T );
69 
71  m_T,
72  buff_A, rs_A, cs_A,
73  buff_T, rs_T, cs_T );
74 
75  break;
76  }
77 
78  case FLA_DOUBLE_COMPLEX:
79  {
80  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
81  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
82 
84  m_T,
85  buff_A, rs_A, cs_A,
86  buff_T, rs_T, cs_T );
87 
88  break;
89  }
90  }
91 
92  return FLA_SUCCESS;
93 }
FLA_Error FLA_Hess_UT_step_ofd_var2(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_fus_var2.c:295
FLA_Error FLA_Hess_UT_step_ofs_var2(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_fus_var2.c:97
FLA_Error FLA_Hess_UT_step_ofc_var2(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_fus_var2.c:493
FLA_Error FLA_Hess_UT_step_ofz_var2(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_fus_var2.c:691
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
int FLA_Datatype
Definition: FLA_type_defs.h:49
Definition: blis_type_defs.h:138

References FLA_Hess_UT_step_ofc_var2(), FLA_Hess_UT_step_ofd_var2(), FLA_Hess_UT_step_ofs_var2(), FLA_Hess_UT_step_ofz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), and FLA_Obj_row_stride().

Referenced by FLA_Hess_UT_blf_var2(), and FLA_Hess_UT_ofu_var2().

◆ FLA_Hess_UT_step_ofz_var2()

FLA_Error FLA_Hess_UT_step_ofz_var2 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
695 {
696  dcomplex* buff_2 = FLA_DOUBLE_COMPLEX_PTR( FLA_TWO );
697  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
698  dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
699  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
700 
701  dcomplex first_elem;
702  dcomplex dot_product;
703  dcomplex beta, conj_beta;
704  dcomplex inv_tau11;
705  dcomplex minus_inv_tau11;
706  int i;
707 
708  // b_alg = FLA_Obj_length( T );
709  int b_alg = m_T;
710 
711  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
712  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
713  dcomplex* buff_y = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
714  dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
715  int inc_y = 1;
716  int inc_z = 1;
717 
718  for ( i = 0; i < b_alg; ++i )
719  {
720  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
721  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
722  dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
723  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
724  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
725 
726  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
727  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
728 
729  dcomplex* y0 = buff_y + (0 )*inc_y;
730  dcomplex* y2 = buff_y + (i+1)*inc_y;
731 
732  dcomplex* z2 = buff_z + (i+1)*inc_z;
733 
734  dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
735  dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
736 
737  int m_ahead = m_A - i - 1;
738  int n_ahead = m_A - i - 1;
739  int m_behind = i;
740  int n_behind = i;
741 
742  /*------------------------------------------------------------*/
743 
744  if ( m_ahead > 0 )
745  {
746  // FLA_Househ2_UT( FLA_LEFT,
747  // a21_t,
748  // a21_b, tau11 );
749  FLA_Househ2_UT_l_opz( m_ahead - 1,
750  a21_t,
751  a21_b, rs_A,
752  tau11 );
753 
754  // FLA_Set( FLA_ONE, inv_tau11 );
755  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
756  // FLA_Copy( inv_tau11, minus_inv_tau11 );
757  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
758  bl1_zdiv3( buff_1, tau11, &inv_tau11 );
759  bl1_zneg2( &inv_tau11, &minus_inv_tau11 );
760 
761  // FLA_Copy( a21_t, first_elem );
762  // FLA_Set( FLA_ONE, a21_t );
763  first_elem = *a21_t;
764  *a21_t = *buff_1;
765 
766  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y2 );
767  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z2 );
768  FLA_Fused_Ahx_Ax_opz_var1( m_ahead,
769  n_ahead,
770  A22, rs_A, cs_A,
771  a21, rs_A,
772  y2, inc_y,
773  z2, inc_z );
774 
775  // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
776  // FLA_Inv_scal( FLA_TWO, beta );
777  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
779  m_ahead,
780  a21, rs_A,
781  z2, inc_z,
782  &beta );
783  bl1_zinvscals( buff_2, &beta );
784  bl1_zcopyconj( &beta, &conj_beta );
785 
786  // FLA_Scal( minus_inv_tau11, conj_beta );
787  // FLA_Axpy( conj_beta, a21, y2 );
788  // FLA_Scal( inv_tau11, y2 );
789  bl1_zscals( &minus_inv_tau11, &conj_beta );
791  m_ahead,
792  &conj_beta,
793  a21, rs_A,
794  y2, inc_y );
796  m_ahead,
797  &inv_tau11,
798  y2, inc_y );
799 
800  // FLA_Scal( minus_inv_tau11, beta );
801  // FLA_Axpy( beta, a21, z2 );
802  // FLA_Scal( inv_tau11, z2 );
803  bl1_zscals( &minus_inv_tau11, &beta );
805  m_ahead,
806  &beta,
807  a21, rs_A,
808  z2, inc_z );
810  m_ahead,
811  &inv_tau11,
812  z2, inc_z );
813 
814  // FLA_Dot( a12t, a21, dot_product );
815  // FLA_Scal( minus_inv_tau11, dot_product );
816  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
818  m_ahead,
819  a12t, cs_A,
820  a21, rs_A,
821  &dot_product );
822  bl1_zscals( &minus_inv_tau11, &dot_product );
824  m_ahead,
825  &dot_product,
826  a21, rs_A,
827  a12t, cs_A );
828 
829  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
830  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
833  m_behind,
834  n_ahead,
835  buff_1,
836  A02, rs_A, cs_A,
837  a21, rs_A,
838  buff_0,
839  y0, inc_y );
842  m_behind,
843  n_ahead,
844  &minus_inv_tau11,
845  y0, inc_y,
846  a21, rs_A,
847  A02, rs_A, cs_A );
848 
849  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y2, A22 );
850  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, a21, A22 );
851  FLA_Fused_Gerc2_opz_var1( m_ahead,
852  n_ahead,
853  buff_m1,
854  a21, rs_A,
855  y2, inc_y,
856  z2, inc_z,
857  a21, rs_A,
858  A22, rs_A, cs_A );
859 
860  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
863  m_ahead,
864  n_behind,
865  buff_1,
866  A20, rs_A, cs_A,
867  a21, rs_A,
868  buff_0,
869  t01, rs_T );
870 
871  // FLA_Copy( first_elem, a21_t );
872  *a21_t = first_elem;
873  }
874 
875  /*------------------------------------------------------------*/
876 
877  }
878 
879  // FLA_Obj_free( &y );
880  // FLA_Obj_free( &z );
881  FLA_free( buff_y );
882  FLA_free( buff_z );
883 
884  return FLA_SUCCESS;
885 }
FLA_Error FLA_Fused_Gerc2_opz_var1(int m_A, int n_A, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z, dcomplex *buff_v, int inc_v, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Fused_Gerc2_opt_var1.c:306
FLA_Error FLA_Fused_Ahx_Ax_opz_var1(int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_x, int inc_x, dcomplex *buff_v, int inc_v, dcomplex *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Ax_opt_var1.c:307
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition: FLA_Househ2_UT.c:521
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_axpyv.c:60
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition: bl1_dot.c:65
bl1_zscals(beta, rho_yz)
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_gemv.c:255
void bl1_zger(conj1_t conjx, conj1_t conjy, int m, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_ger.c:194
void bl1_zscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition: bl1_scalv.c:72

References bl1_zaxpyv(), bl1_zdot(), bl1_zgemv(), bl1_zger(), bl1_zscals(), bl1_zscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_opz_var1(), FLA_Fused_Gerc2_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_ofu_var2().