libflame  revision_anchor
Functions
FLA_Tridiag_UT_l_fus_var2.c File Reference

(r)

Functions

FLA_Error FLA_Tridiag_UT_l_ofu_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofu_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofs_var2 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofd_var2 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofc_var2 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofz_var2 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
 

Function Documentation

◆ FLA_Tridiag_UT_l_ofu_var2()

FLA_Error FLA_Tridiag_UT_l_ofu_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  return FLA_Tridiag_UT_l_step_ofu_var2( A, T );
16 }
FLA_Error FLA_Tridiag_UT_l_step_ofu_var2(FLA_Obj A, FLA_Obj T)
Definition: FLA_Tridiag_UT_l_fus_var2.c:18

References FLA_Tridiag_UT_l_step_ofu_var2().

◆ FLA_Tridiag_UT_l_step_ofc_var2()

FLA_Error FLA_Tridiag_UT_l_step_ofc_var2 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
557 {
558  scomplex* buff_2 = FLA_COMPLEX_PTR( FLA_TWO );
559  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
560  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
561  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
562 
563  scomplex first_elem;
564  scomplex beta;
565  scomplex inv_tau11;
566  scomplex minus_inv_tau11;
567  scomplex minus_upsilon11, minus_conj_upsilon11;
568  scomplex minus_zeta11, minus_conj_zeta11;
569  int i;
570 
571  // b_alg = FLA_Obj_length( T );
572  int b_alg = m_T;
573 
574  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
575  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
576  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
577  scomplex* buff_u = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
578  scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
579  scomplex* buff_w = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
580  int inc_u = 1;
581  int inc_z = 1;
582  int inc_w = 1;
583 
584  // Initialize some variables (only to prevent compiler warnings).
585  first_elem = *buff_0;
586  minus_inv_tau11 = *buff_0;
587 
588  for ( i = 0; i < b_alg; ++i )
589  {
590  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
591  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
592  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
593  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
594 
595  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
596  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
597 
598  scomplex* upsilon11= buff_u + (i )*inc_u;
599  scomplex* u21 = buff_u + (i+1)*inc_u;
600 
601  scomplex* zeta11 = buff_z + (i )*inc_z;
602  scomplex* z21 = buff_z + (i+1)*inc_z;
603 
604  scomplex* w21 = buff_w + (i+1)*inc_w;
605 
606  scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
607  scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
608 
609  int m_ahead = m_A - i - 1;
610  int m_behind = i;
611  int n_behind = i;
612 
613  /*------------------------------------------------------------*/
614 
615  if ( m_behind > 0 )
616  {
617  // FLA_Copy( upsilon11, minus_upsilon11 );
618  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
619  // FLA_Copy( minus_upsilon11, minus_conj_upsilon11 );
620  bl1_cmult3( buff_m1, upsilon11, &minus_upsilon11 );
621  bl1_ccopyconj( &minus_upsilon11, &minus_conj_upsilon11 );
622 
623  // FLA_Copy( zeta11, minus_zeta11 );
624  // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
625  // FLA_Copy( minus_zeta11, minus_conj_zeta11 );
626  bl1_cmult3( buff_m1, zeta11, &minus_zeta11 );
627  bl1_ccopyconj( &minus_zeta11, &minus_conj_zeta11 );
628 
629  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon11, zeta11, alpha11 );
630  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta11, upsilon11, alpha11 );
632  1,
633  &minus_upsilon11,
634  zeta11, 1,
635  alpha11, 1 );
637  1,
638  &minus_zeta11,
639  upsilon11, 1,
640  alpha11, 1 );
641 
642  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_zeta11, u21, a21 );
643  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon11, z21, a21 );
645  m_ahead,
646  &minus_conj_zeta11,
647  u21, inc_u,
648  a21, rs_A );
650  m_ahead,
651  &minus_conj_upsilon11,
652  z21, inc_z,
653  a21, rs_A );
654  }
655 
656  if ( m_ahead > 0 )
657  {
658  // FLA_Househ2_UT( FLA_LEFT,
659  // a21_t,
660  // a21_b, tau11 );
661  FLA_Househ2_UT_l_opc( m_ahead - 1,
662  a21_t,
663  a21_b, rs_A,
664  tau11 );
665 
666  // FLA_Set( FLA_ONE, inv_tau11 );
667  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
668  // FLA_Copy( inv_tau11, minus_inv_tau11 );
669  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
670  bl1_cdiv3( buff_1, tau11, &inv_tau11 );
671  bl1_cneg2( &inv_tau11, &minus_inv_tau11 );
672 
673  // FLA_Copy( a21_t, first_elem );
674  // FLA_Set( FLA_ONE, a21_t );
675  first_elem = *a21_t;
676  *a21_t = *buff_1;
677  }
678 
679  if ( m_behind > 0 && m_ahead > 0 )
680  {
681  // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
682  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
684  buff_m1,
685  u21, inc_u,
686  z21, inc_z,
687  A22, rs_A, cs_A,
688  a21, rs_A,
689  w21, inc_w );
690  }
691  else if ( m_ahead > 0 )
692  {
693  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
696  m_ahead,
697  buff_1,
698  A22, rs_A, cs_A,
699  a21, rs_A,
700  buff_0,
701  w21, inc_w );
702  }
703 
704  if ( m_ahead > 0 )
705  {
706  // FLA_Copy( a21, u21 );
707  // FLA_Copy( w21, z21 );
709  m_ahead,
710  a21, rs_A,
711  u21, inc_u );
713  m_ahead,
714  w21, inc_w,
715  z21, inc_z );
716 
717  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
718  // FLA_Inv_scal( FLA_TWO, beta );
720  m_ahead,
721  a21, rs_A,
722  z21, inc_z,
723  &beta );
724  bl1_cinvscals( buff_2, &beta );
725 
726  // FLA_Scal( minus_inv_tau11, beta );
727  // FLA_Axpy( beta, a21, z21 );
728  // FLA_Scal( inv_tau11, z21 );
729  bl1_cscals( &minus_inv_tau11, &beta );
731  m_ahead,
732  &beta,
733  a21, rs_A,
734  z21, inc_z );
736  m_ahead,
737  &inv_tau11,
738  z21, inc_z );
739 
740  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
743  m_ahead,
744  n_behind,
745  buff_1,
746  A20, rs_A, cs_A,
747  a21, rs_A,
748  buff_0,
749  t01, rs_T );
750 
751  // FLA_Copy( first_elem, a21_t );
752  *a21_t = first_elem;
753  }
754 
755  if ( m_behind + 1 == b_alg && m_ahead > 0 )
756  {
757  // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
760  m_ahead,
761  buff_m1,
762  u21, inc_u,
763  z21, inc_z,
764  A22, rs_A, cs_A );
765  }
766 
767  /*------------------------------------------------------------*/
768 
769  }
770 
771  // FLA_Obj_free( &u );
772  // FLA_Obj_free( &z );
773  // FLA_Obj_free( &w );
774  FLA_free( buff_u );
775  FLA_free( buff_z );
776  FLA_free( buff_w );
777 
778  return FLA_SUCCESS;
779 }
FLA_Error FLA_Fused_Her2_Ax_l_opc_var1(int m_A, scomplex *buff_beta, scomplex *buff_u, int inc_u, scomplex *buff_z, int inc_z, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_x, int inc_x, scomplex *buff_w, int inc_w)
Definition: FLA_Fused_Her2_Ax_l_opt_var1.c:329
FLA_Obj FLA_TWO
Definition: FLA_Init.c:17
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition: FLA_Househ2_UT.c:390
int i
Definition: bl1_axmyv2.c:145
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpyv.c:29
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_copyv.c:49
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition: bl1_dot.c:39
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_gemv.c:125
void bl1_chemv(uplo1_t uplo, conj1_t conj, int m, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_hemv.c:35
void bl1_cher2(uplo1_t uplo, conj1_t conj, int m, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *a, int a_rs, int a_cs)
Definition: bl1_her2.c:33
void bl1_cscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition: bl1_scalv.c:46
@ BLIS1_LOWER_TRIANGULAR
Definition: blis_type_defs.h:62
@ BLIS1_NO_TRANSPOSE
Definition: blis_type_defs.h:54
@ BLIS1_CONJ_TRANSPOSE
Definition: blis_type_defs.h:57
@ BLIS1_CONJUGATE
Definition: blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition: blis_type_defs.h:81
Definition: blis_type_defs.h:133

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_chemv(), bl1_cher2(), bl1_cscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Her2_Ax_l_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_ofu_var2().

◆ FLA_Tridiag_UT_l_step_ofd_var2()

FLA_Error FLA_Tridiag_UT_l_step_ofd_var2 ( int  m_A,
int  m_T,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_T,
int  rs_T,
int  cs_T 
)
329 {
330  double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
331  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
332  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
333  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
334 
335  double first_elem;
336  double beta;
337  double inv_tau11;
338  double minus_inv_tau11;
339  double minus_upsilon11, minus_conj_upsilon11;
340  double minus_zeta11, minus_conj_zeta11;
341  int i;
342 
343  // b_alg = FLA_Obj_length( T );
344  int b_alg = m_T;
345 
346  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
347  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
348  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
349  double* buff_u = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
350  double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
351  double* buff_w = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
352  int inc_u = 1;
353  int inc_z = 1;
354  int inc_w = 1;
355 
356  // Initialize some variables (only to prevent compiler warnings).
357  first_elem = *buff_0;
358  minus_inv_tau11 = *buff_0;
359 
360  for ( i = 0; i < b_alg; ++i )
361  {
362  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
363  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
364  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
365  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
366 
367  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
368  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
369 
370  double* upsilon11= buff_u + (i )*inc_u;
371  double* u21 = buff_u + (i+1)*inc_u;
372 
373  double* zeta11 = buff_z + (i )*inc_z;
374  double* z21 = buff_z + (i+1)*inc_z;
375 
376  double* w21 = buff_w + (i+1)*inc_w;
377 
378  double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
379  double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
380 
381  int m_ahead = m_A - i - 1;
382  int m_behind = i;
383  int n_behind = i;
384 
385  /*------------------------------------------------------------*/
386 
387  if ( m_behind > 0 )
388  {
389  // FLA_Copy( upsilon11, minus_upsilon11 );
390  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
391  // FLA_Copy( minus_upsilon11, minus_conj_upsilon11 );
392  bl1_dmult3( buff_m1, upsilon11, &minus_upsilon11 );
393  bl1_dcopyconj( &minus_upsilon11, &minus_conj_upsilon11 );
394 
395  // FLA_Copy( zeta11, minus_zeta11 );
396  // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
397  // FLA_Copy( minus_zeta11, minus_conj_zeta11 );
398  bl1_dmult3( buff_m1, zeta11, &minus_zeta11 );
399  bl1_dcopyconj( &minus_zeta11, &minus_conj_zeta11 );
400 
401  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon11, zeta11, alpha11 );
402  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta11, upsilon11, alpha11 );
404  1,
405  &minus_upsilon11,
406  zeta11, 1,
407  alpha11, 1 );
409  1,
410  &minus_zeta11,
411  upsilon11, 1,
412  alpha11, 1 );
413 
414  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_zeta11, u21, a21 );
415  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon11, z21, a21 );
417  m_ahead,
418  &minus_conj_zeta11,
419  u21, inc_u,
420  a21, rs_A );
422  m_ahead,
423  &minus_conj_upsilon11,
424  z21, inc_z,
425  a21, rs_A );
426  }
427 
428  if ( m_ahead > 0 )
429  {
430  // FLA_Househ2_UT( FLA_LEFT,
431  // a21_t,
432  // a21_b, tau11 );
433  FLA_Househ2_UT_l_opd( m_ahead - 1,
434  a21_t,
435  a21_b, rs_A,
436  tau11 );
437 
438  // FLA_Set( FLA_ONE, inv_tau11 );
439  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
440  // FLA_Copy( inv_tau11, minus_inv_tau11 );
441  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
442  bl1_ddiv3( buff_1, tau11, &inv_tau11 );
443  bl1_dneg2( &inv_tau11, &minus_inv_tau11 );
444 
445  // FLA_Copy( a21_t, first_elem );
446  // FLA_Set( FLA_ONE, a21_t );
447  first_elem = *a21_t;
448  *a21_t = *buff_1;
449  }
450 
451  if ( m_behind > 0 && m_ahead > 0 )
452  {
453  // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
454  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
456  buff_m1,
457  u21, inc_u,
458  z21, inc_z,
459  A22, rs_A, cs_A,
460  a21, rs_A,
461  w21, inc_w );
462  }
463  else if ( m_ahead > 0 )
464  {
465  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
467  m_ahead,
468  buff_1,
469  A22, rs_A, cs_A,
470  a21, rs_A,
471  buff_0,
472  w21, inc_w );
473  }
474 
475  if ( m_ahead > 0 )
476  {
477  // FLA_Copy( a21, u21 );
478  // FLA_Copy( w21, z21 );
480  m_ahead,
481  a21, rs_A,
482  u21, inc_u );
484  m_ahead,
485  w21, inc_w,
486  z21, inc_z );
487 
488  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
489  // FLA_Inv_scal( FLA_TWO, beta );
491  m_ahead,
492  a21, rs_A,
493  z21, inc_z,
494  &beta );
495  bl1_dinvscals( buff_2, &beta );
496 
497  // FLA_Scal( minus_inv_tau11, beta );
498  // FLA_Axpy( beta, a21, z21 );
499  // FLA_Scal( inv_tau11, z21 );
500  bl1_dscals( &minus_inv_tau11, &beta );
502  m_ahead,
503  &beta,
504  a21, rs_A,
505  z21, inc_z );
507  m_ahead,
508  &inv_tau11,
509  z21, inc_z );
510 
511  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
514  m_ahead,
515  n_behind,
516  buff_1,
517  A20, rs_A, cs_A,
518  a21, rs_A,
519  buff_0,
520  t01, rs_T );
521 
522  // FLA_Copy( first_elem, a21_t );
523  *a21_t = first_elem;
524  }
525 
526  if ( m_behind + 1 == b_alg && m_ahead > 0 )
527  {
528  // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
530  m_ahead,
531  buff_m1,
532  u21, inc_u,
533  z21, inc_z,
534  A22, rs_A, cs_A );
535  }
536 
537  /*------------------------------------------------------------*/
538 
539  }
540 
541  // FLA_Obj_free( &u );
542  // FLA_Obj_free( &z );
543  // FLA_Obj_free( &w );
544  FLA_free( buff_u );
545  FLA_free( buff_z );
546  FLA_free( buff_w );
547 
548  return FLA_SUCCESS;
549 }
FLA_Error FLA_Fused_Her2_Ax_l_opd_var1(int m_A, double *buff_beta, double *buff_u, int inc_u, double *buff_z, int inc_z, double *buff_A, int rs_A, int cs_A, double *buff_x, int inc_x, double *buff_w, int inc_w)
Definition: FLA_Fused_Her2_Ax_l_opt_var1.c:246
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition: FLA_Househ2_UT.c:274
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition: bl1_axpyv.c:21
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition: bl1_copyv.c:42
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition: bl1_dot.c:26
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_gemv.c:69
void bl1_dscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition: bl1_scalv.c:24
void bl1_dsymv(uplo1_t uplo, int m, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_symv.c:56
void bl1_dsyr2(uplo1_t uplo, int m, double *alpha, double *x, int incx, double *y, int incy, double *a, int a_rs, int a_cs)
Definition: bl1_syr2.c:58

References bl1_daxpyv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dscalv(), bl1_dsymv(), bl1_dsyr2(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, FLA_free(), FLA_Fused_Her2_Ax_l_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_ofu_var2().

◆ FLA_Tridiag_UT_l_step_ofs_var2()

FLA_Error FLA_Tridiag_UT_l_step_ofs_var2 ( int  m_A,
int  m_T,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_T,
int  rs_T,
int  cs_T 
)
101 {
102  float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
103  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
104  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
105  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
106 
107  float first_elem;
108  float beta;
109  float inv_tau11;
110  float minus_inv_tau11;
111  float minus_upsilon11, minus_conj_upsilon11;
112  float minus_zeta11, minus_conj_zeta11;
113  int i;
114 
115  // b_alg = FLA_Obj_length( T );
116  int b_alg = m_T;
117 
118  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
119  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
120  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
121  float* buff_u = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
122  float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
123  float* buff_w = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
124  int inc_u = 1;
125  int inc_z = 1;
126  int inc_w = 1;
127 
128  // Initialize some variables (only to prevent compiler warnings).
129  first_elem = *buff_0;
130  minus_inv_tau11 = *buff_0;
131 
132  for ( i = 0; i < b_alg; ++i )
133  {
134  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
135  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
136  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
137  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
138 
139  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
140  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
141 
142  float* upsilon11= buff_u + (i )*inc_u;
143  float* u21 = buff_u + (i+1)*inc_u;
144 
145  float* zeta11 = buff_z + (i )*inc_z;
146  float* z21 = buff_z + (i+1)*inc_z;
147 
148  float* w21 = buff_w + (i+1)*inc_w;
149 
150  float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
151  float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
152 
153  int m_ahead = m_A - i - 1;
154  int m_behind = i;
155  int n_behind = i;
156 
157  /*------------------------------------------------------------*/
158 
159  if ( m_behind > 0 )
160  {
161  // FLA_Copy( upsilon11, minus_upsilon11 );
162  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
163  // FLA_Copy( minus_upsilon11, minus_conj_upsilon11 );
164  bl1_smult3( buff_m1, upsilon11, &minus_upsilon11 );
165  bl1_scopyconj( &minus_upsilon11, &minus_conj_upsilon11 );
166 
167  // FLA_Copy( zeta11, minus_zeta11 );
168  // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
169  // FLA_Copy( minus_zeta11, minus_conj_zeta11 );
170  bl1_smult3( buff_m1, zeta11, &minus_zeta11 );
171  bl1_scopyconj( &minus_zeta11, &minus_conj_zeta11 );
172 
173  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon11, zeta11, alpha11 );
174  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta11, upsilon11, alpha11 );
176  1,
177  &minus_upsilon11,
178  zeta11, 1,
179  alpha11, 1 );
181  1,
182  &minus_zeta11,
183  upsilon11, 1,
184  alpha11, 1 );
185 
186  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_zeta11, u21, a21 );
187  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon11, z21, a21 );
189  m_ahead,
190  &minus_conj_zeta11,
191  u21, inc_u,
192  a21, rs_A );
194  m_ahead,
195  &minus_conj_upsilon11,
196  z21, inc_z,
197  a21, rs_A );
198  }
199 
200  if ( m_ahead > 0 )
201  {
202  // FLA_Househ2_UT( FLA_LEFT,
203  // a21_t,
204  // a21_b, tau11 );
205  FLA_Househ2_UT_l_ops( m_ahead - 1,
206  a21_t,
207  a21_b, rs_A,
208  tau11 );
209 
210  // FLA_Set( FLA_ONE, inv_tau11 );
211  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
212  // FLA_Copy( inv_tau11, minus_inv_tau11 );
213  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
214  bl1_sdiv3( buff_1, tau11, &inv_tau11 );
215  bl1_sneg2( &inv_tau11, &minus_inv_tau11 );
216 
217  // FLA_Copy( a21_t, first_elem );
218  // FLA_Set( FLA_ONE, a21_t );
219  first_elem = *a21_t;
220  *a21_t = *buff_1;
221  }
222 
223  if ( m_behind > 0 && m_ahead > 0 )
224  {
225  // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
226  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
228  buff_m1,
229  u21, inc_u,
230  z21, inc_z,
231  A22, rs_A, cs_A,
232  a21, rs_A,
233  w21, inc_w );
234  }
235  else if ( m_ahead > 0 )
236  {
237  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
239  m_ahead,
240  buff_1,
241  A22, rs_A, cs_A,
242  a21, rs_A,
243  buff_0,
244  w21, inc_w );
245  }
246 
247  if ( m_ahead > 0 )
248  {
249  // FLA_Copy( a21, u21 );
250  // FLA_Copy( w21, z21 );
252  m_ahead,
253  a21, rs_A,
254  u21, inc_u );
256  m_ahead,
257  w21, inc_w,
258  z21, inc_z );
259 
260  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
261  // FLA_Inv_scal( FLA_TWO, beta );
263  m_ahead,
264  a21, rs_A,
265  z21, inc_z,
266  &beta );
267  bl1_sinvscals( buff_2, &beta );
268 
269  // FLA_Scal( minus_inv_tau11, beta );
270  // FLA_Axpy( beta, a21, z21 );
271  // FLA_Scal( inv_tau11, z21 );
272  bl1_sscals( &minus_inv_tau11, &beta );
274  m_ahead,
275  &beta,
276  a21, rs_A,
277  z21, inc_z );
279  m_ahead,
280  &inv_tau11,
281  z21, inc_z );
282 
283  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
286  m_ahead,
287  n_behind,
288  buff_1,
289  A20, rs_A, cs_A,
290  a21, rs_A,
291  buff_0,
292  t01, rs_T );
293 
294  // FLA_Copy( first_elem, a21_t );
295  *a21_t = first_elem;
296  }
297 
298  if ( m_behind + 1 == b_alg && m_ahead > 0 )
299  {
300  // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
302  m_ahead,
303  buff_m1,
304  u21, inc_u,
305  z21, inc_z,
306  A22, rs_A, cs_A );
307  }
308 
309  /*------------------------------------------------------------*/
310 
311  }
312 
313  // FLA_Obj_free( &u );
314  // FLA_Obj_free( &z );
315  // FLA_Obj_free( &w );
316  FLA_free( buff_u );
317  FLA_free( buff_z );
318  FLA_free( buff_w );
319 
320  return FLA_SUCCESS;
321 }
FLA_Error FLA_Fused_Her2_Ax_l_ops_var1(int m_A, float *buff_beta, float *buff_u, int inc_u, float *buff_z, int inc_z, float *buff_A, int rs_A, int cs_A, float *buff_x, int inc_x, float *buff_w, int inc_w)
Definition: FLA_Fused_Her2_Ax_l_opt_var1.c:126
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition: FLA_Househ2_UT.c:160
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition: bl1_axpyv.c:13
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition: bl1_copyv.c:35
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition: bl1_dot.c:13
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_gemv.c:13
void bl1_sscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition: bl1_scalv.c:13
void bl1_ssymv(uplo1_t uplo, int m, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_symv.c:13
void bl1_ssyr2(uplo1_t uplo, int m, float *alpha, float *x, int incx, float *y, int incy, float *a, int a_rs, int a_cs)
Definition: bl1_syr2.c:13

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sscalv(), bl1_ssymv(), bl1_ssyr2(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, FLA_free(), FLA_Fused_Her2_Ax_l_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_ofu_var2().

◆ FLA_Tridiag_UT_l_step_ofu_var2()

FLA_Error FLA_Tridiag_UT_l_step_ofu_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
19 {
20  FLA_Datatype datatype;
21  int m_A, m_T;
22  int rs_A, cs_A;
23  int rs_T, cs_T;
24 
25  datatype = FLA_Obj_datatype( A );
26 
27  m_A = FLA_Obj_length( A );
28  m_T = FLA_Obj_length( T );
29 
30  rs_A = FLA_Obj_row_stride( A );
31  cs_A = FLA_Obj_col_stride( A );
32 
33  rs_T = FLA_Obj_row_stride( T );
34  cs_T = FLA_Obj_col_stride( T );
35 
36 
37  switch ( datatype )
38  {
39  case FLA_FLOAT:
40  {
41  float* buff_A = FLA_FLOAT_PTR( A );
42  float* buff_T = FLA_FLOAT_PTR( T );
43 
45  m_T,
46  buff_A, rs_A, cs_A,
47  buff_T, rs_T, cs_T );
48 
49  break;
50  }
51 
52  case FLA_DOUBLE:
53  {
54  double* buff_A = FLA_DOUBLE_PTR( A );
55  double* buff_T = FLA_DOUBLE_PTR( T );
56 
58  m_T,
59  buff_A, rs_A, cs_A,
60  buff_T, rs_T, cs_T );
61 
62  break;
63  }
64 
65  case FLA_COMPLEX:
66  {
67  scomplex* buff_A = FLA_COMPLEX_PTR( A );
68  scomplex* buff_T = FLA_COMPLEX_PTR( T );
69 
71  m_T,
72  buff_A, rs_A, cs_A,
73  buff_T, rs_T, cs_T );
74 
75  break;
76  }
77 
78  case FLA_DOUBLE_COMPLEX:
79  {
80  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
81  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
82 
84  m_T,
85  buff_A, rs_A, cs_A,
86  buff_T, rs_T, cs_T );
87 
88  break;
89  }
90  }
91 
92  return FLA_SUCCESS;
93 }
FLA_Error FLA_Tridiag_UT_l_step_ofd_var2(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
Definition: FLA_Tridiag_UT_l_fus_var2.c:325
FLA_Error FLA_Tridiag_UT_l_step_ofs_var2(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
Definition: FLA_Tridiag_UT_l_fus_var2.c:97
FLA_Error FLA_Tridiag_UT_l_step_ofz_var2(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Tridiag_UT_l_fus_var2.c:783
FLA_Error FLA_Tridiag_UT_l_step_ofc_var2(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Tridiag_UT_l_fus_var2.c:553
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
int FLA_Datatype
Definition: FLA_type_defs.h:49
Definition: blis_type_defs.h:138

References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Tridiag_UT_l_step_ofc_var2(), FLA_Tridiag_UT_l_step_ofd_var2(), FLA_Tridiag_UT_l_step_ofs_var2(), and FLA_Tridiag_UT_l_step_ofz_var2().

Referenced by FLA_Tridiag_UT_l_blf_var2(), and FLA_Tridiag_UT_l_ofu_var2().

◆ FLA_Tridiag_UT_l_step_ofz_var2()

FLA_Error FLA_Tridiag_UT_l_step_ofz_var2 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
787 {
788  dcomplex* buff_2 = FLA_DOUBLE_COMPLEX_PTR( FLA_TWO );
789  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
790  dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
791  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
792 
793  dcomplex first_elem;
794  dcomplex beta;
795  dcomplex inv_tau11;
796  dcomplex minus_inv_tau11;
797  dcomplex minus_upsilon11, minus_conj_upsilon11;
798  dcomplex minus_zeta11, minus_conj_zeta11;
799  int i;
800 
801  // b_alg = FLA_Obj_length( T );
802  int b_alg = m_T;
803 
804  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
805  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
806  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
807  dcomplex* buff_u = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
808  dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
809  dcomplex* buff_w = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
810  int inc_u = 1;
811  int inc_z = 1;
812  int inc_w = 1;
813 
814  // Initialize some variables (only to prevent compiler warnings).
815  first_elem = *buff_0;
816  minus_inv_tau11 = *buff_0;
817 
818  for ( i = 0; i < b_alg; ++i )
819  {
820  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
821  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
822  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
823  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
824 
825  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
826  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
827 
828  dcomplex* upsilon11= buff_u + (i )*inc_u;
829  dcomplex* u21 = buff_u + (i+1)*inc_u;
830 
831  dcomplex* zeta11 = buff_z + (i )*inc_z;
832  dcomplex* z21 = buff_z + (i+1)*inc_z;
833 
834  dcomplex* w21 = buff_w + (i+1)*inc_w;
835 
836  dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
837  dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
838 
839  int m_ahead = m_A - i - 1;
840  int m_behind = i;
841  int n_behind = i;
842 
843  /*------------------------------------------------------------*/
844 
845  if ( m_behind > 0 )
846  {
847  // FLA_Copy( upsilon11, minus_upsilon11 );
848  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
849  // FLA_Copy( minus_upsilon11, minus_conj_upsilon11 );
850  bl1_zmult3( buff_m1, upsilon11, &minus_upsilon11 );
851  bl1_zcopyconj( &minus_upsilon11, &minus_conj_upsilon11 );
852 
853  // FLA_Copy( zeta11, minus_zeta11 );
854  // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
855  // FLA_Copy( minus_zeta11, minus_conj_zeta11 );
856  bl1_zmult3( buff_m1, zeta11, &minus_zeta11 );
857  bl1_zcopyconj( &minus_zeta11, &minus_conj_zeta11 );
858 
859  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon11, zeta11, alpha11 );
860  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta11, upsilon11, alpha11 );
862  1,
863  &minus_upsilon11,
864  zeta11, 1,
865  alpha11, 1 );
867  1,
868  &minus_zeta11,
869  upsilon11, 1,
870  alpha11, 1 );
871 
872  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_zeta11, u21, a21 );
873  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon11, z21, a21 );
875  m_ahead,
876  &minus_conj_zeta11,
877  u21, inc_u,
878  a21, rs_A );
880  m_ahead,
881  &minus_conj_upsilon11,
882  z21, inc_z,
883  a21, rs_A );
884  }
885 
886  if ( m_ahead > 0 )
887  {
888  // FLA_Househ2_UT( FLA_LEFT,
889  // a21_t,
890  // a21_b, tau11 );
891  FLA_Househ2_UT_l_opz( m_ahead - 1,
892  a21_t,
893  a21_b, rs_A,
894  tau11 );
895 
896  // FLA_Set( FLA_ONE, inv_tau11 );
897  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
898  // FLA_Copy( inv_tau11, minus_inv_tau11 );
899  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
900  bl1_zdiv3( buff_1, tau11, &inv_tau11 );
901  bl1_zneg2( &inv_tau11, &minus_inv_tau11 );
902 
903  // FLA_Copy( a21_t, first_elem );
904  // FLA_Set( FLA_ONE, a21_t );
905  first_elem = *a21_t;
906  *a21_t = *buff_1;
907  }
908 
909  if ( m_behind > 0 && m_ahead > 0 )
910  {
911  // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
912  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
914  buff_m1,
915  u21, inc_u,
916  z21, inc_z,
917  A22, rs_A, cs_A,
918  a21, rs_A,
919  w21, inc_w );
920  }
921  else if ( m_ahead > 0 )
922  {
923  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
926  m_ahead,
927  buff_1,
928  A22, rs_A, cs_A,
929  a21, rs_A,
930  buff_0,
931  w21, inc_w );
932  }
933 
934  if ( m_ahead > 0 )
935  {
936  // FLA_Copy( a21, u21 );
937  // FLA_Copy( w21, z21 );
939  m_ahead,
940  a21, rs_A,
941  u21, inc_u );
943  m_ahead,
944  w21, inc_w,
945  z21, inc_z );
946 
947  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
948  // FLA_Inv_scal( FLA_TWO, beta );
950  m_ahead,
951  a21, rs_A,
952  z21, inc_z,
953  &beta );
954  bl1_zinvscals( buff_2, &beta );
955 
956  // FLA_Scal( minus_inv_tau11, beta );
957  // FLA_Axpy( beta, a21, z21 );
958  // FLA_Scal( inv_tau11, z21 );
959  bl1_zscals( &minus_inv_tau11, &beta );
961  m_ahead,
962  &beta,
963  a21, rs_A,
964  z21, inc_z );
966  m_ahead,
967  &inv_tau11,
968  z21, inc_z );
969 
970  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
973  m_ahead,
974  n_behind,
975  buff_1,
976  A20, rs_A, cs_A,
977  a21, rs_A,
978  buff_0,
979  t01, rs_T );
980 
981  // FLA_Copy( first_elem, a21_t );
982  *a21_t = first_elem;
983  }
984 
985  if ( m_behind + 1 == b_alg && m_ahead > 0 )
986  {
987  // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
990  m_ahead,
991  buff_m1,
992  u21, inc_u,
993  z21, inc_z,
994  A22, rs_A, cs_A );
995  }
996 
997  /*------------------------------------------------------------*/
998 
999  }
1000 
1001  // FLA_Obj_free( &u );
1002  // FLA_Obj_free( &z );
1003  // FLA_Obj_free( &w );
1004  FLA_free( buff_u );
1005  FLA_free( buff_z );
1006  FLA_free( buff_w );
1007 
1008  return FLA_SUCCESS;
1009 }
FLA_Error FLA_Fused_Her2_Ax_l_opz_var1(int m_A, dcomplex *buff_beta, dcomplex *buff_u, int inc_u, dcomplex *buff_z, int inc_z, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_x, int inc_x, dcomplex *buff_w, int inc_w)
Definition: FLA_Fused_Her2_Ax_l_opt_var1.c:450
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition: FLA_Househ2_UT.c:521
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_axpyv.c:60
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_copyv.c:63
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition: bl1_dot.c:65
bl1_zscals(beta, rho_yz)
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_gemv.c:255
void bl1_zhemv(uplo1_t uplo, conj1_t conj, int m, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_hemv.c:134
void bl1_zher2(uplo1_t uplo, conj1_t conj, int m, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_her2.c:121
void bl1_zscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition: bl1_scalv.c:72

References bl1_zaxpyv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zhemv(), bl1_zher2(), bl1_zscals(), bl1_zscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Her2_Ax_l_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_ofu_var2().