libflame  revision_anchor
Functions
FLA_Hess_UT_opt_var2.c File Reference

(r)

Functions

FLA_Error FLA_Hess_UT_opt_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_opt_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ops_var2 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opd_var2 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opc_var2 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opz_var2 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
 

Function Documentation

◆ FLA_Hess_UT_opt_var2()

FLA_Error FLA_Hess_UT_opt_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  return FLA_Hess_UT_step_opt_var2( A, T );
16 }
FLA_Error FLA_Hess_UT_step_opt_var2(FLA_Obj A, FLA_Obj T)
Definition: FLA_Hess_UT_opt_var2.c:18

References FLA_Hess_UT_step_opt_var2().

Referenced by FLA_Hess_UT_internal().

◆ FLA_Hess_UT_step_opc_var2()

FLA_Error FLA_Hess_UT_step_opc_var2 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
539 {
540  scomplex* buff_2 = FLA_COMPLEX_PTR( FLA_TWO );
541  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
542  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
543  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
544 
545  scomplex first_elem;
546  scomplex dot_product;
547  scomplex beta, conj_beta;
548  scomplex inv_tau11;
549  scomplex minus_inv_tau11;
550  int i;
551 
552  // b_alg = FLA_Obj_length( T );
553  int b_alg = m_T;
554 
555  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
556  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
557  scomplex* buff_y = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
558  scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
559  int inc_y = 1;
560  int inc_z = 1;
561 
562  for ( i = 0; i < b_alg; ++i )
563  {
564  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
565  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
566  scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
567  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
568  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
569 
570  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
571  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
572 
573  scomplex* y0 = buff_y + (0 )*inc_y;
574  scomplex* y2 = buff_y + (i+1)*inc_y;
575 
576  scomplex* z2 = buff_z + (i+1)*inc_z;
577 
578  scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
579  scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
580 
581  int m_ahead = m_A - i - 1;
582  int n_ahead = m_A - i - 1;
583  int m_behind = i;
584  int n_behind = i;
585 
586  /*------------------------------------------------------------*/
587 
588  if ( m_ahead > 0 )
589  {
590  // FLA_Househ2_UT( FLA_LEFT,
591  // a21_t,
592  // a21_b, tau11 );
593  FLA_Househ2_UT_l_opc( m_ahead - 1,
594  a21_t,
595  a21_b, rs_A,
596  tau11 );
597 
598  // FLA_Set( FLA_ONE, inv_tau11 );
599  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
600  // FLA_Copy( inv_tau11, minus_inv_tau11 );
601  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
602  bl1_cdiv3( buff_1, tau11, &inv_tau11 );
603  bl1_cneg2( &inv_tau11, &minus_inv_tau11 );
604 
605  // FLA_Copy( a21_t, first_elem );
606  // FLA_Set( FLA_ONE, a21_t );
607  first_elem = *a21_t;
608  *a21_t = *buff_1;
609 
610  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y2 );
613  m_ahead,
614  n_ahead,
615  buff_1,
616  A22, rs_A, cs_A,
617  a21, rs_A,
618  buff_0,
619  y2, inc_y );
620 
621  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z2 );
624  m_ahead,
625  n_ahead,
626  buff_1,
627  A22, rs_A, cs_A,
628  a21, rs_A,
629  buff_0,
630  z2, inc_z );
631 
632  // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
633  // FLA_Inv_scal( FLA_TWO, beta );
634  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
636  m_ahead,
637  a21, rs_A,
638  z2, inc_z,
639  &beta );
640  bl1_cinvscals( buff_2, &beta );
641  bl1_ccopyconj( &beta, &conj_beta );
642 
643  // FLA_Scal( minus_inv_tau11, conj_beta );
644  // FLA_Axpy( conj_beta, a21, y2 );
645  // FLA_Scal( inv_tau11, y2 );
646  bl1_cscals( &minus_inv_tau11, &conj_beta );
648  m_ahead,
649  &conj_beta,
650  a21, rs_A,
651  y2, inc_y );
653  m_ahead,
654  &inv_tau11,
655  y2, inc_y );
656 
657  // FLA_Scal( minus_inv_tau11, beta );
658  // FLA_Axpy( beta, a21, z2 );
659  // FLA_Scal( inv_tau11, z2 );
660  bl1_cscals( &minus_inv_tau11, &beta );
662  m_ahead,
663  &beta,
664  a21, rs_A,
665  z2, inc_z );
667  m_ahead,
668  &inv_tau11,
669  z2, inc_z );
670 
671  // FLA_Dot( a12t, a21, dot_product );
672  // FLA_Scal( minus_inv_tau11, dot_product );
673  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
675  m_ahead,
676  a12t, cs_A,
677  a21, rs_A,
678  &dot_product );
679  bl1_cscals( &minus_inv_tau11, &dot_product );
681  m_ahead,
682  &dot_product,
683  a21, rs_A,
684  a12t, cs_A );
685 
686  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
687  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
690  m_behind,
691  n_ahead,
692  buff_1,
693  A02, rs_A, cs_A,
694  a21, rs_A,
695  buff_0,
696  y0, inc_y );
699  m_behind,
700  n_ahead,
701  &minus_inv_tau11,
702  y0, inc_y,
703  a21, rs_A,
704  A02, rs_A, cs_A );
705 
706  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y2, A22 );
707  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, a21, A22 );
710  m_ahead,
711  n_ahead,
712  buff_m1,
713  a21, rs_A,
714  y2, inc_y,
715  A22, rs_A, cs_A );
718  m_ahead,
719  n_ahead,
720  buff_m1,
721  z2, inc_z,
722  a21, rs_A,
723  A22, rs_A, cs_A );
724 
725  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
728  m_ahead,
729  n_behind,
730  buff_1,
731  A20, rs_A, cs_A,
732  a21, rs_A,
733  buff_0,
734  t01, rs_T );
735 
736  // FLA_Copy( first_elem, a21_t );
737  *a21_t = first_elem;
738  }
739 
740  /*------------------------------------------------------------*/
741 
742  }
743 
744  // FLA_Obj_free( &y );
745  // FLA_Obj_free( &z );
746  FLA_free( buff_y );
747  FLA_free( buff_z );
748 
749  return FLA_SUCCESS;
750 }
FLA_Obj FLA_TWO
Definition: FLA_Init.c:17
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition: FLA_Househ2_UT.c:390
int i
Definition: bl1_axmyv2.c:145
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpyv.c:29
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition: bl1_dot.c:39
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_gemv.c:125
void bl1_cger(conj1_t conjx, conj1_t conjy, int m, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *a, int a_rs, int a_cs)
Definition: bl1_ger.c:111
void bl1_cscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition: bl1_scalv.c:46
@ BLIS1_NO_TRANSPOSE
Definition: blis_type_defs.h:54
@ BLIS1_CONJ_TRANSPOSE
Definition: blis_type_defs.h:57
@ BLIS1_CONJUGATE
Definition: blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition: blis_type_defs.h:81
Definition: blis_type_defs.h:133

References bl1_caxpyv(), bl1_cdot(), bl1_cgemv(), bl1_cger(), bl1_cscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var2().

◆ FLA_Hess_UT_step_opd_var2()

FLA_Error FLA_Hess_UT_step_opd_var2 ( int  m_A,
int  m_T,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_T,
int  rs_T,
int  cs_T 
)
320 {
321  double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
322  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
323  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
324  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
325 
326  double first_elem;
327  double dot_product;
328  double beta, conj_beta;
329  double inv_tau11;
330  double minus_inv_tau11;
331  int i;
332 
333  // b_alg = FLA_Obj_length( T );
334  int b_alg = m_T;
335 
336  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
337  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
338  double* buff_y = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
339  double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
340  int inc_y = 1;
341  int inc_z = 1;
342 
343  for ( i = 0; i < b_alg; ++i )
344  {
345  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
346  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
347  double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
348  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
349  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
350 
351  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
352  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
353 
354  double* y0 = buff_y + (0 )*inc_y;
355  double* y2 = buff_y + (i+1)*inc_y;
356 
357  double* z2 = buff_z + (i+1)*inc_z;
358 
359  double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
360  double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
361 
362  int m_ahead = m_A - i - 1;
363  int n_ahead = m_A - i - 1;
364  int m_behind = i;
365  int n_behind = i;
366 
367  /*------------------------------------------------------------*/
368 
369  if ( m_ahead > 0 )
370  {
371  // FLA_Househ2_UT( FLA_LEFT,
372  // a21_t,
373  // a21_b, tau11 );
374  FLA_Househ2_UT_l_opd( m_ahead - 1,
375  a21_t,
376  a21_b, rs_A,
377  tau11 );
378 
379  // FLA_Set( FLA_ONE, inv_tau11 );
380  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
381  // FLA_Copy( inv_tau11, minus_inv_tau11 );
382  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
383  bl1_ddiv3( buff_1, tau11, &inv_tau11 );
384  bl1_dneg2( &inv_tau11, &minus_inv_tau11 );
385 
386  // FLA_Copy( a21_t, first_elem );
387  // FLA_Set( FLA_ONE, a21_t );
388  first_elem = *a21_t;
389  *a21_t = *buff_1;
390 
391  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y2 );
394  m_ahead,
395  n_ahead,
396  buff_1,
397  A22, rs_A, cs_A,
398  a21, rs_A,
399  buff_0,
400  y2, inc_y );
401 
402  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z2 );
405  m_ahead,
406  n_ahead,
407  buff_1,
408  A22, rs_A, cs_A,
409  a21, rs_A,
410  buff_0,
411  z2, inc_z );
412 
413  // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
414  // FLA_Inv_scal( FLA_TWO, beta );
415  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
417  m_ahead,
418  a21, rs_A,
419  z2, inc_z,
420  &beta );
421  bl1_dinvscals( buff_2, &beta );
422  bl1_dcopyconj( &beta, &conj_beta );
423 
424  // FLA_Scal( minus_inv_tau11, conj_beta );
425  // FLA_Axpy( conj_beta, a21, y2 );
426  // FLA_Scal( inv_tau11, y2 );
427  bl1_dscals( &minus_inv_tau11, &conj_beta );
429  m_ahead,
430  &conj_beta,
431  a21, rs_A,
432  y2, inc_y );
434  m_ahead,
435  &inv_tau11,
436  y2, inc_y );
437 
438  // FLA_Scal( minus_inv_tau11, beta );
439  // FLA_Axpy( beta, a21, z2 );
440  // FLA_Scal( inv_tau11, z2 );
441  bl1_dscals( &minus_inv_tau11, &beta );
443  m_ahead,
444  &beta,
445  a21, rs_A,
446  z2, inc_z );
448  m_ahead,
449  &inv_tau11,
450  z2, inc_z );
451 
452  // FLA_Dot( a12t, a21, dot_product );
453  // FLA_Scal( minus_inv_tau11, dot_product );
454  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
456  m_ahead,
457  a12t, cs_A,
458  a21, rs_A,
459  &dot_product );
460  bl1_dscals( &minus_inv_tau11, &dot_product );
462  m_ahead,
463  &dot_product,
464  a21, rs_A,
465  a12t, cs_A );
466 
467  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
468  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
471  m_behind,
472  n_ahead,
473  buff_1,
474  A02, rs_A, cs_A,
475  a21, rs_A,
476  buff_0,
477  y0, inc_y );
480  m_behind,
481  n_ahead,
482  &minus_inv_tau11,
483  y0, inc_y,
484  a21, rs_A,
485  A02, rs_A, cs_A );
486 
487  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y2, A22 );
488  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, a21, A22 );
491  m_ahead,
492  n_ahead,
493  buff_m1,
494  a21, rs_A,
495  y2, inc_y,
496  A22, rs_A, cs_A );
499  m_ahead,
500  n_ahead,
501  buff_m1,
502  z2, inc_z,
503  a21, rs_A,
504  A22, rs_A, cs_A );
505 
506  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
509  m_ahead,
510  n_behind,
511  buff_1,
512  A20, rs_A, cs_A,
513  a21, rs_A,
514  buff_0,
515  t01, rs_T );
516 
517  // FLA_Copy( first_elem, a21_t );
518  *a21_t = first_elem;
519  }
520 
521  /*------------------------------------------------------------*/
522 
523  }
524 
525  // FLA_Obj_free( &y );
526  // FLA_Obj_free( &z );
527  FLA_free( buff_y );
528  FLA_free( buff_z );
529 
530  return FLA_SUCCESS;
531 }
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition: FLA_Househ2_UT.c:274
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition: bl1_axpyv.c:21
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition: bl1_dot.c:26
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_gemv.c:69
void bl1_dger(conj1_t conjx, conj1_t conjy, int m, int n, double *alpha, double *x, int incx, double *y, int incy, double *a, int a_rs, int a_cs)
Definition: bl1_ger.c:62
void bl1_dscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition: bl1_scalv.c:24

References bl1_daxpyv(), bl1_ddot(), bl1_dgemv(), bl1_dger(), bl1_dscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var2().

◆ FLA_Hess_UT_step_ops_var2()

FLA_Error FLA_Hess_UT_step_ops_var2 ( int  m_A,
int  m_T,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_T,
int  rs_T,
int  cs_T 
)
101 {
102  float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
103  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
104  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
105  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
106 
107  float first_elem;
108  float dot_product;
109  float beta, conj_beta;
110  float inv_tau11;
111  float minus_inv_tau11;
112  int i;
113 
114  // b_alg = FLA_Obj_length( T );
115  int b_alg = m_T;
116 
117  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
118  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
119  float* buff_y = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
120  float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
121  int inc_y = 1;
122  int inc_z = 1;
123 
124  for ( i = 0; i < b_alg; ++i )
125  {
126  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
127  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
128  float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
129  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
130  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
131 
132  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
133  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
134 
135  float* y0 = buff_y + (0 )*inc_y;
136  float* y2 = buff_y + (i+1)*inc_y;
137 
138  float* z2 = buff_z + (i+1)*inc_z;
139 
140  float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
141  float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
142 
143  int m_ahead = m_A - i - 1;
144  int n_ahead = m_A - i - 1;
145  int m_behind = i;
146  int n_behind = i;
147 
148  /*------------------------------------------------------------*/
149 
150  if ( m_ahead > 0 )
151  {
152  // FLA_Househ2_UT( FLA_LEFT,
153  // a21_t,
154  // a21_b, tau11 );
155  FLA_Househ2_UT_l_ops( m_ahead - 1,
156  a21_t,
157  a21_b, rs_A,
158  tau11 );
159 
160  // FLA_Set( FLA_ONE, inv_tau11 );
161  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
162  // FLA_Copy( inv_tau11, minus_inv_tau11 );
163  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
164  bl1_sdiv3( buff_1, tau11, &inv_tau11 );
165  bl1_sneg2( &inv_tau11, &minus_inv_tau11 );
166 
167  // FLA_Copy( a21_t, first_elem );
168  // FLA_Set( FLA_ONE, a21_t );
169  first_elem = *a21_t;
170  *a21_t = *buff_1;
171 
172  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y2 );
175  m_ahead,
176  n_ahead,
177  buff_1,
178  A22, rs_A, cs_A,
179  a21, rs_A,
180  buff_0,
181  y2, inc_y );
182 
183  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z2 );
186  m_ahead,
187  n_ahead,
188  buff_1,
189  A22, rs_A, cs_A,
190  a21, rs_A,
191  buff_0,
192  z2, inc_z );
193 
194  // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
195  // FLA_Inv_scal( FLA_TWO, beta );
196  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
198  m_ahead,
199  a21, rs_A,
200  z2, inc_z,
201  &beta );
202  bl1_sinvscals( buff_2, &beta );
203  bl1_scopyconj( &beta, &conj_beta );
204 
205  // FLA_Scal( minus_inv_tau11, conj_beta );
206  // FLA_Axpy( conj_beta, a21, y2 );
207  // FLA_Scal( inv_tau11, y2 );
208  bl1_sscals( &minus_inv_tau11, &conj_beta );
210  m_ahead,
211  &conj_beta,
212  a21, rs_A,
213  y2, inc_y );
215  m_ahead,
216  &inv_tau11,
217  y2, inc_y );
218 
219  // FLA_Scal( minus_inv_tau11, beta );
220  // FLA_Axpy( beta, a21, z2 );
221  // FLA_Scal( inv_tau11, z2 );
222  bl1_sscals( &minus_inv_tau11, &beta );
224  m_ahead,
225  &beta,
226  a21, rs_A,
227  z2, inc_z );
229  m_ahead,
230  &inv_tau11,
231  z2, inc_z );
232 
233  // FLA_Dot( a12t, a21, dot_product );
234  // FLA_Scal( minus_inv_tau11, dot_product );
235  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
237  m_ahead,
238  a12t, cs_A,
239  a21, rs_A,
240  &dot_product );
241  bl1_sscals( &minus_inv_tau11, &dot_product );
243  m_ahead,
244  &dot_product,
245  a21, rs_A,
246  a12t, cs_A );
247 
248  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
249  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
252  m_behind,
253  n_ahead,
254  buff_1,
255  A02, rs_A, cs_A,
256  a21, rs_A,
257  buff_0,
258  y0, inc_y );
261  m_behind,
262  n_ahead,
263  &minus_inv_tau11,
264  y0, inc_y,
265  a21, rs_A,
266  A02, rs_A, cs_A );
267 
268  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y2, A22 );
269  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, a21, A22 );
272  m_ahead,
273  n_ahead,
274  buff_m1,
275  a21, rs_A,
276  y2, inc_y,
277  A22, rs_A, cs_A );
280  m_ahead,
281  n_ahead,
282  buff_m1,
283  z2, inc_z,
284  a21, rs_A,
285  A22, rs_A, cs_A );
286 
287  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
290  m_ahead,
291  n_behind,
292  buff_1,
293  A20, rs_A, cs_A,
294  a21, rs_A,
295  buff_0,
296  t01, rs_T );
297 
298  // FLA_Copy( first_elem, a21_t );
299  *a21_t = first_elem;
300  }
301 
302  /*------------------------------------------------------------*/
303 
304  }
305 
306  // FLA_Obj_free( &y );
307  // FLA_Obj_free( &z );
308  FLA_free( buff_y );
309  FLA_free( buff_z );
310 
311  return FLA_SUCCESS;
312 }
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition: FLA_Househ2_UT.c:160
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition: bl1_axpyv.c:13
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition: bl1_dot.c:13
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_gemv.c:13
void bl1_sger(conj1_t conjx, conj1_t conjy, int m, int n, float *alpha, float *x, int incx, float *y, int incy, float *a, int a_rs, int a_cs)
Definition: bl1_ger.c:13
void bl1_sscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition: bl1_scalv.c:13

References bl1_saxpyv(), bl1_sdot(), bl1_sgemv(), bl1_sger(), bl1_sscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var2().

◆ FLA_Hess_UT_step_opt_var2()

FLA_Error FLA_Hess_UT_step_opt_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
19 {
20  FLA_Datatype datatype;
21  int m_A, m_T;
22  int rs_A, cs_A;
23  int rs_T, cs_T;
24 
25  datatype = FLA_Obj_datatype( A );
26 
27  m_A = FLA_Obj_length( A );
28  m_T = FLA_Obj_length( T );
29 
30  rs_A = FLA_Obj_row_stride( A );
31  cs_A = FLA_Obj_col_stride( A );
32 
33  rs_T = FLA_Obj_row_stride( T );
34  cs_T = FLA_Obj_col_stride( T );
35 
36 
37  switch ( datatype )
38  {
39  case FLA_FLOAT:
40  {
41  float* buff_A = FLA_FLOAT_PTR( A );
42  float* buff_T = FLA_FLOAT_PTR( T );
43 
45  m_T,
46  buff_A, rs_A, cs_A,
47  buff_T, rs_T, cs_T );
48 
49  break;
50  }
51 
52  case FLA_DOUBLE:
53  {
54  double* buff_A = FLA_DOUBLE_PTR( A );
55  double* buff_T = FLA_DOUBLE_PTR( T );
56 
58  m_T,
59  buff_A, rs_A, cs_A,
60  buff_T, rs_T, cs_T );
61 
62  break;
63  }
64 
65  case FLA_COMPLEX:
66  {
67  scomplex* buff_A = FLA_COMPLEX_PTR( A );
68  scomplex* buff_T = FLA_COMPLEX_PTR( T );
69 
71  m_T,
72  buff_A, rs_A, cs_A,
73  buff_T, rs_T, cs_T );
74 
75  break;
76  }
77 
78  case FLA_DOUBLE_COMPLEX:
79  {
80  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
81  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
82 
84  m_T,
85  buff_A, rs_A, cs_A,
86  buff_T, rs_T, cs_T );
87 
88  break;
89  }
90  }
91 
92  return FLA_SUCCESS;
93 }
FLA_Error FLA_Hess_UT_step_opc_var2(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_opt_var2.c:535
FLA_Error FLA_Hess_UT_step_opd_var2(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_opt_var2.c:316
FLA_Error FLA_Hess_UT_step_opz_var2(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_opt_var2.c:754
FLA_Error FLA_Hess_UT_step_ops_var2(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_opt_var2.c:97
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
int FLA_Datatype
Definition: FLA_type_defs.h:49
Definition: blis_type_defs.h:138

References FLA_Hess_UT_step_opc_var2(), FLA_Hess_UT_step_opd_var2(), FLA_Hess_UT_step_ops_var2(), FLA_Hess_UT_step_opz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), and FLA_Obj_row_stride().

Referenced by FLA_Hess_UT_blk_var2(), and FLA_Hess_UT_opt_var2().

◆ FLA_Hess_UT_step_opz_var2()

FLA_Error FLA_Hess_UT_step_opz_var2 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
758 {
759  dcomplex* buff_2 = FLA_DOUBLE_COMPLEX_PTR( FLA_TWO );
760  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
761  dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
762  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
763 
764  dcomplex first_elem;
765  dcomplex dot_product;
766  dcomplex beta, conj_beta;
767  dcomplex inv_tau11;
768  dcomplex minus_inv_tau11;
769  int i;
770 
771  // b_alg = FLA_Obj_length( T );
772  int b_alg = m_T;
773 
774  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
775  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
776  dcomplex* buff_y = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
777  dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
778  int inc_y = 1;
779  int inc_z = 1;
780 
781  for ( i = 0; i < b_alg; ++i )
782  {
783  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
784  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
785  dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
786  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
787  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
788 
789  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
790  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
791 
792  dcomplex* y0 = buff_y + (0 )*inc_y;
793  dcomplex* y2 = buff_y + (i+1)*inc_y;
794 
795  dcomplex* z2 = buff_z + (i+1)*inc_z;
796 
797  dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
798  dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
799 
800  int m_ahead = m_A - i - 1;
801  int n_ahead = m_A - i - 1;
802  int m_behind = i;
803  int n_behind = i;
804 
805  /*------------------------------------------------------------*/
806 
807  if ( m_ahead > 0 )
808  {
809  // FLA_Househ2_UT( FLA_LEFT,
810  // a21_t,
811  // a21_b, tau11 );
812  FLA_Househ2_UT_l_opz( m_ahead - 1,
813  a21_t,
814  a21_b, rs_A,
815  tau11 );
816 
817  // FLA_Set( FLA_ONE, inv_tau11 );
818  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
819  // FLA_Copy( inv_tau11, minus_inv_tau11 );
820  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
821  bl1_zdiv3( buff_1, tau11, &inv_tau11 );
822  bl1_zneg2( &inv_tau11, &minus_inv_tau11 );
823 
824  // FLA_Copy( a21_t, first_elem );
825  // FLA_Set( FLA_ONE, a21_t );
826  first_elem = *a21_t;
827  *a21_t = *buff_1;
828 
829  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y2 );
832  m_ahead,
833  n_ahead,
834  buff_1,
835  A22, rs_A, cs_A,
836  a21, rs_A,
837  buff_0,
838  y2, inc_y );
839 
840  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z2 );
843  m_ahead,
844  n_ahead,
845  buff_1,
846  A22, rs_A, cs_A,
847  a21, rs_A,
848  buff_0,
849  z2, inc_z );
850 
851  // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
852  // FLA_Inv_scal( FLA_TWO, beta );
853  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
855  m_ahead,
856  a21, rs_A,
857  z2, inc_z,
858  &beta );
859  bl1_zinvscals( buff_2, &beta );
860  bl1_zcopyconj( &beta, &conj_beta );
861 
862  // FLA_Scal( minus_inv_tau11, conj_beta );
863  // FLA_Axpy( conj_beta, a21, y2 );
864  // FLA_Scal( inv_tau11, y2 );
865  bl1_zscals( &minus_inv_tau11, &conj_beta );
867  m_ahead,
868  &conj_beta,
869  a21, rs_A,
870  y2, inc_y );
872  m_ahead,
873  &inv_tau11,
874  y2, inc_y );
875 
876  // FLA_Scal( minus_inv_tau11, beta );
877  // FLA_Axpy( beta, a21, z2 );
878  // FLA_Scal( inv_tau11, z2 );
879  bl1_zscals( &minus_inv_tau11, &beta );
881  m_ahead,
882  &beta,
883  a21, rs_A,
884  z2, inc_z );
886  m_ahead,
887  &inv_tau11,
888  z2, inc_z );
889 
890  // FLA_Dot( a12t, a21, dot_product );
891  // FLA_Scal( minus_inv_tau11, dot_product );
892  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
894  m_ahead,
895  a12t, cs_A,
896  a21, rs_A,
897  &dot_product );
898  bl1_zscals( &minus_inv_tau11, &dot_product );
900  m_ahead,
901  &dot_product,
902  a21, rs_A,
903  a12t, cs_A );
904 
905  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
906  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
909  m_behind,
910  n_ahead,
911  buff_1,
912  A02, rs_A, cs_A,
913  a21, rs_A,
914  buff_0,
915  y0, inc_y );
918  m_behind,
919  n_ahead,
920  &minus_inv_tau11,
921  y0, inc_y,
922  a21, rs_A,
923  A02, rs_A, cs_A );
924 
925  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y2, A22 );
926  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, a21, A22 );
929  m_ahead,
930  n_ahead,
931  buff_m1,
932  a21, rs_A,
933  y2, inc_y,
934  A22, rs_A, cs_A );
937  m_ahead,
938  n_ahead,
939  buff_m1,
940  z2, inc_z,
941  a21, rs_A,
942  A22, rs_A, cs_A );
943 
944  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
947  m_ahead,
948  n_behind,
949  buff_1,
950  A20, rs_A, cs_A,
951  a21, rs_A,
952  buff_0,
953  t01, rs_T );
954 
955  // FLA_Copy( first_elem, a21_t );
956  *a21_t = first_elem;
957  }
958 
959  /*------------------------------------------------------------*/
960 
961  }
962 
963  // FLA_Obj_free( &y );
964  // FLA_Obj_free( &z );
965  FLA_free( buff_y );
966  FLA_free( buff_z );
967 
968  return FLA_SUCCESS;
969 }
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition: FLA_Househ2_UT.c:521
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_axpyv.c:60
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition: bl1_dot.c:65
bl1_zscals(beta, rho_yz)
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_gemv.c:255
void bl1_zger(conj1_t conjx, conj1_t conjy, int m, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_ger.c:194
void bl1_zscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition: bl1_scalv.c:72

References bl1_zaxpyv(), bl1_zdot(), bl1_zgemv(), bl1_zger(), bl1_zscals(), bl1_zscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var2().