libflame  revision_anchor
Functions
FLA_Bidiag_UT_u_fus_var2.c File Reference

(r)

Functions

FLA_Error FLA_Bidiag_UT_u_ofu_var2 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_step_ofu_var2 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofs_var2 (int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofd_var2 (int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofc_var2 (int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_ofz_var2 (int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
 

Function Documentation

◆ FLA_Bidiag_UT_u_ofu_var2()

FLA_Error FLA_Bidiag_UT_u_ofu_var2 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)
14 {
15  return FLA_Bidiag_UT_u_step_ofu_var2( A, TU, TV );
16 }
FLA_Error FLA_Bidiag_UT_u_step_ofu_var2(FLA_Obj A, FLA_Obj T, FLA_Obj S)
Definition: FLA_Bidiag_UT_u_fus_var2.c:18

References FLA_Bidiag_UT_u_step_ofu_var2().

◆ FLA_Bidiag_UT_u_step_ofc_var2()

FLA_Error FLA_Bidiag_UT_u_step_ofc_var2 ( int  m_A,
int  n_A,
int  m_TS,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T,
scomplex buff_S,
int  rs_S,
int  cs_S 
)
520 {
521  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
522  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
523  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
524 
525  scomplex beta;
526  int i;
527 
528  // b_alg = FLA_Obj_length( T );
529  int b_alg = m_TS;
530 
531  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
532  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
533  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
534  scomplex* buff_v = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
535  scomplex* buff_y = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
536  scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
537  int inc_v = 1;
538  int inc_y = 1;
539  int inc_z = 1;
540 
541  for ( i = 0; i < b_alg; ++i )
542  {
543  scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
544  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
545  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
546  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
547  scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
548  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
549  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
550 
551  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
552  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
553 
554  scomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
555  scomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
556 
557  scomplex* v21 = buff_v + (i+1)*inc_v;
558 
559  scomplex* y21 = buff_y + (i+1)*inc_y;
560 
561  scomplex* z21 = buff_z + (i+1)*inc_z;
562 
563  scomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
564  scomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
565 
566  scomplex* v21_t = v21 + (0 )*inc_v;
567  scomplex* v21_b = v21 + (1 )*inc_v;
568 
569  int m_ahead = m_A - i - 1;
570  int n_ahead = n_A - i - 1;
571  int m_behind = i;
572  int n_behind = i;
573 
574  /*------------------------------------------------------------*/
575 
576  // FLA_Househ2_UT( FLA_LEFT,
577  // alpha11,
578  // a21, tau11 );
579  FLA_Househ2_UT_l_opc( m_ahead,
580  alpha11,
581  a21, rs_A,
582  tau11 );
583 
584  if ( n_ahead > 0 )
585  {
586  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
587  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
589  n_ahead,
590  a12t, cs_A,
591  y21, inc_y );
594  m_ahead,
595  n_ahead,
596  buff_1,
597  A22, rs_A, cs_A,
598  a21, rs_A,
599  buff_1,
600  y21, inc_y );
601 
602  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
604  n_ahead,
605  tau11,
606  y21, inc_y );
607 
608  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
610  n_ahead,
611  buff_m1,
612  y21, inc_y,
613  a12t, cs_A );
614 
615  // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
616  FLA_Househ2_UT_r_opc( n_ahead - 1,
617  a12t_l,
618  a12t_r, cs_A,
619  sigma11 );
620 
621  // FLA_Set( FLA_ONE, v21_t );
622  // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
623  *v21_t = *buff_1;
625  n_ahead - 1,
626  a12t_r, cs_A,
627  v21_b, inc_y );
628 
629  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
630  // FLA_Scal( FLA_MINUS_ONE, beta );
632  n_ahead,
633  y21, inc_y,
634  v21, inc_v,
635  &beta );
636  bl1_cneg1( &beta );
637 
638  // FLA_Copy( a21, z21 );
639  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
640  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
642  m_ahead,
643  a21, rs_A,
644  z21, inc_z );
647  m_ahead,
648  n_ahead,
649  buff_1,
650  A22, rs_A, cs_A,
651  v21, inc_v,
652  &beta,
653  z21, inc_z );
655  m_ahead,
656  sigma11,
657  z21, inc_z );
658 
659  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y21, A22 );
660  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
661  FLA_Fused_Gerc2_opc_var1( m_ahead,
662  n_ahead,
663  buff_m1,
664  a21, rs_A,
665  y21, inc_y,
666  z21, inc_z,
667  v21, inc_v,
668  A22, rs_A, cs_A );
669 
670  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
673  m_behind,
674  n_ahead,
675  buff_1,
676  A02, rs_A, cs_A,
677  v21, inc_v,
678  buff_0,
679  s01, rs_S );
680  }
681 
682  // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
683  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
685  n_behind,
686  a10t, cs_A,
687  t01, rs_T );
690  m_ahead,
691  n_behind,
692  buff_1,
693  A20, rs_A, cs_A,
694  a21, rs_A,
695  buff_1,
696  t01, rs_T );
697 
698  /*------------------------------------------------------------*/
699 
700  }
701 
702  // FLA_Obj_free( &v );
703  // FLA_Obj_free( &y );
704  // FLA_Obj_free( &z );
705  FLA_free( buff_v );
706  FLA_free( buff_y );
707  FLA_free( buff_z );
708 
709  return FLA_SUCCESS;
710 }
FLA_Error FLA_Fused_Gerc2_opc_var1(int m_A, int n_A, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z, scomplex *buff_v, int inc_v, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Fused_Gerc2_opt_var1.c:241
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
FLA_Error FLA_Househ2_UT_r_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition: FLA_Househ2_UT.c:677
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition: FLA_Househ2_UT.c:390
int i
Definition: bl1_axmyv2.c:145
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpyv.c:29
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_copyv.c:49
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition: bl1_dot.c:39
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_gemv.c:125
void bl1_cinvscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition: bl1_invscalv.c:52
@ BLIS1_NO_TRANSPOSE
Definition: blis_type_defs.h:54
@ BLIS1_CONJ_TRANSPOSE
Definition: blis_type_defs.h:57
@ BLIS1_CONJ_NO_TRANSPOSE
Definition: blis_type_defs.h:56
@ BLIS1_CONJUGATE
Definition: blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition: blis_type_defs.h:81
Definition: blis_type_defs.h:133

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Gerc2_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_Househ2_UT_r_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var2().

◆ FLA_Bidiag_UT_u_step_ofd_var2()

FLA_Error FLA_Bidiag_UT_u_step_ofd_var2 ( int  m_A,
int  n_A,
int  m_TS,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_T,
int  rs_T,
int  cs_T,
double *  buff_S,
int  rs_S,
int  cs_S 
)
320 {
321  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
322  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
323  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
324 
325  double beta;
326  int i;
327 
328  // b_alg = FLA_Obj_length( T );
329  int b_alg = m_TS;
330 
331  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
332  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
333  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
334  double* buff_v = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
335  double* buff_y = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
336  double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
337  int inc_v = 1;
338  int inc_y = 1;
339  int inc_z = 1;
340 
341  for ( i = 0; i < b_alg; ++i )
342  {
343  double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
344  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
345  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
346  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
347  double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
348  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
349  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
350 
351  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
352  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
353 
354  double* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
355  double* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
356 
357  double* v21 = buff_v + (i+1)*inc_v;
358 
359  double* y21 = buff_y + (i+1)*inc_y;
360 
361  double* z21 = buff_z + (i+1)*inc_z;
362 
363  double* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
364  double* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
365 
366  double* v21_t = v21 + (0 )*inc_v;
367  double* v21_b = v21 + (1 )*inc_v;
368 
369  int m_ahead = m_A - i - 1;
370  int n_ahead = n_A - i - 1;
371  int m_behind = i;
372  int n_behind = i;
373 
374  /*------------------------------------------------------------*/
375 
376  // FLA_Househ2_UT( FLA_LEFT,
377  // alpha11,
378  // a21, tau11 );
379  FLA_Househ2_UT_l_opd( m_ahead,
380  alpha11,
381  a21, rs_A,
382  tau11 );
383 
384  if ( n_ahead > 0 )
385  {
386  // FLA_Copyt( FLA_TRANSPOSE, a12t, y21 );
387  // FLA_Gemvc( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
389  n_ahead,
390  a12t, cs_A,
391  y21, inc_y );
394  m_ahead,
395  n_ahead,
396  buff_1,
397  A22, rs_A, cs_A,
398  a21, rs_A,
399  buff_1,
400  y21, inc_y );
401 
402  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
404  n_ahead,
405  tau11,
406  y21, inc_y );
407 
408  // FLA_Axpyt( FLA_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
410  n_ahead,
411  buff_m1,
412  y21, inc_y,
413  a12t, cs_A );
414 
415  // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
416  FLA_Househ2_UT_r_opd( n_ahead - 1,
417  a12t_l,
418  a12t_r, cs_A,
419  sigma11 );
420 
421  // FLA_Set( FLA_ONE, v21_t );
422  // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
423  *v21_t = *buff_1;
425  n_ahead - 1,
426  a12t_r, cs_A,
427  v21_b, inc_y );
428 
429  // FLA_Dotc( FLA_CONJUGATE, v21, y21, beta );
430  // FLA_Scal( FLA_MINUS_ONE, beta );
432  n_ahead,
433  v21, inc_v,
434  y21, inc_y,
435  &beta );
436  bl1_dneg1( &beta );
437 
438  // FLA_Copy( a21, z21 );
439  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
440  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
442  m_ahead,
443  a21, rs_A,
444  z21, inc_z );
447  m_ahead,
448  n_ahead,
449  buff_1,
450  A22, rs_A, cs_A,
451  v21, inc_v,
452  &beta,
453  z21, inc_z );
455  m_ahead,
456  sigma11,
457  z21, inc_z );
458 
459  // FLA_Ger( FLA_MINUS_ONE, a21, y21, A22 );
460  // FLA_Ger( FLA_MINUS_ONE, z21, v21, A22 );
461  FLA_Fused_Gerc2_opd_var1( m_ahead,
462  n_ahead,
463  buff_m1,
464  a21, rs_A,
465  y21, inc_y,
466  z21, inc_z,
467  v21, inc_v,
468  A22, rs_A, cs_A );
469 
470  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
473  m_behind,
474  n_ahead,
475  buff_1,
476  A02, rs_A, cs_A,
477  v21, inc_v,
478  buff_0,
479  s01, rs_S );
480  }
481 
482  // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
483  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
485  n_behind,
486  a10t, cs_A,
487  t01, rs_T );
490  m_ahead,
491  n_behind,
492  buff_1,
493  A20, rs_A, cs_A,
494  a21, rs_A,
495  buff_1,
496  t01, rs_T );
497 
498  /*------------------------------------------------------------*/
499 
500  }
501 
502  // FLA_Obj_free( &v );
503  // FLA_Obj_free( &y );
504  // FLA_Obj_free( &z );
505  FLA_free( buff_v );
506  FLA_free( buff_y );
507  FLA_free( buff_z );
508 
509  return FLA_SUCCESS;
510 }
FLA_Error FLA_Fused_Gerc2_opd_var1(int m_A, int n_A, double *buff_alpha, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z, double *buff_v, int inc_v, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Fused_Gerc2_opt_var1.c:193
FLA_Error FLA_Househ2_UT_r_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition: FLA_Househ2_UT.c:664
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition: FLA_Househ2_UT.c:274
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition: bl1_axpyv.c:21
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition: bl1_copyv.c:42
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition: bl1_dot.c:26
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_gemv.c:69
void bl1_dinvscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition: bl1_invscalv.c:26
@ BLIS1_TRANSPOSE
Definition: blis_type_defs.h:55

References bl1_daxpyv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dinvscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Fused_Gerc2_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_Househ2_UT_r_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var2().

◆ FLA_Bidiag_UT_u_step_ofs_var2()

FLA_Error FLA_Bidiag_UT_u_step_ofs_var2 ( int  m_A,
int  n_A,
int  m_TS,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_T,
int  rs_T,
int  cs_T,
float *  buff_S,
int  rs_S,
int  cs_S 
)
120 {
121  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
122  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
123  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
124 
125  float beta;
126  int i;
127 
128  // b_alg = FLA_Obj_length( T );
129  int b_alg = m_TS;
130 
131  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
132  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
133  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
134  float* buff_v = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
135  float* buff_y = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
136  float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
137  int inc_v = 1;
138  int inc_y = 1;
139  int inc_z = 1;
140 
141  for ( i = 0; i < b_alg; ++i )
142  {
143  float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
144  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
145  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
146  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
147  float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
148  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
149  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
150 
151  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
152  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
153 
154  float* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
155  float* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
156 
157  float* v21 = buff_v + (i+1)*inc_v;
158 
159  float* y21 = buff_y + (i+1)*inc_y;
160 
161  float* z21 = buff_z + (i+1)*inc_z;
162 
163  float* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
164  float* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
165 
166  float* v21_t = v21 + (0 )*inc_v;
167  float* v21_b = v21 + (1 )*inc_v;
168 
169  int m_ahead = m_A - i - 1;
170  int n_ahead = n_A - i - 1;
171  int m_behind = i;
172  int n_behind = i;
173 
174  /*------------------------------------------------------------*/
175 
176  // FLA_Househ2_UT( FLA_LEFT,
177  // alpha11,
178  // a21, tau11 );
179  FLA_Househ2_UT_l_ops( m_ahead,
180  alpha11,
181  a21, rs_A,
182  tau11 );
183 
184  if ( n_ahead > 0 )
185  {
186  // FLA_Copyt( FLA_TRANSPOSE, a12t, y21 );
187  // FLA_Gemvc( FLA_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
189  n_ahead,
190  a12t, cs_A,
191  y21, inc_y );
194  m_ahead,
195  n_ahead,
196  buff_1,
197  A22, rs_A, cs_A,
198  a21, rs_A,
199  buff_1,
200  y21, inc_y );
201 
202  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
204  n_ahead,
205  tau11,
206  y21, inc_y );
207 
208  // FLA_Axpyt( FLA_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
210  n_ahead,
211  buff_m1,
212  y21, inc_y,
213  a12t, cs_A );
214 
215  // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
216  FLA_Househ2_UT_r_ops( n_ahead - 1,
217  a12t_l,
218  a12t_r, cs_A,
219  sigma11 );
220 
221  // FLA_Set( FLA_ONE, v21_t );
222  // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
223  *v21_t = *buff_1;
225  n_ahead - 1,
226  a12t_r, cs_A,
227  v21_b, inc_y );
228 
229  // FLA_Dotc( FLA_CONJUGATE, v21, y21, beta );
230  // FLA_Scal( FLA_MINUS_ONE, beta );
232  n_ahead,
233  v21, inc_v,
234  y21, inc_y,
235  &beta );
236  bl1_sneg1( &beta );
237 
238  // FLA_Copy( a21, z21 );
239  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
240  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
242  m_ahead,
243  a21, rs_A,
244  z21, inc_z );
247  m_ahead,
248  n_ahead,
249  buff_1,
250  A22, rs_A, cs_A,
251  v21, inc_v,
252  &beta,
253  z21, inc_z );
255  m_ahead,
256  sigma11,
257  z21, inc_z );
258 
259  // FLA_Ger( FLA_MINUS_ONE, a21, y21, A22 );
260  // FLA_Ger( FLA_MINUS_ONE, z21, v21, A22 );
261  FLA_Fused_Gerc2_ops_var1( m_ahead,
262  n_ahead,
263  buff_m1,
264  a21, rs_A,
265  y21, inc_y,
266  z21, inc_z,
267  v21, inc_v,
268  A22, rs_A, cs_A );
269 
270  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
273  m_behind,
274  n_ahead,
275  buff_1,
276  A02, rs_A, cs_A,
277  v21, inc_v,
278  buff_0,
279  s01, rs_S );
280  }
281 
282  // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
283  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
285  n_behind,
286  a10t, cs_A,
287  t01, rs_T );
290  m_ahead,
291  n_behind,
292  buff_1,
293  A20, rs_A, cs_A,
294  a21, rs_A,
295  buff_1,
296  t01, rs_T );
297 
298  /*------------------------------------------------------------*/
299 
300  }
301 
302  // FLA_Obj_free( &v );
303  // FLA_Obj_free( &y );
304  // FLA_Obj_free( &z );
305  FLA_free( buff_v );
306  FLA_free( buff_y );
307  FLA_free( buff_z );
308 
309  return FLA_SUCCESS;
310 }
FLA_Error FLA_Fused_Gerc2_ops_var1(int m_A, int n_A, float *buff_alpha, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z, float *buff_v, int inc_v, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Fused_Gerc2_opt_var1.c:130
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition: FLA_Househ2_UT.c:160
FLA_Error FLA_Househ2_UT_r_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition: FLA_Househ2_UT.c:651
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition: bl1_axpyv.c:13
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition: bl1_copyv.c:35
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition: bl1_dot.c:13
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_gemv.c:13
void bl1_sinvscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition: bl1_invscalv.c:13

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sinvscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_free(), FLA_Fused_Gerc2_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_Househ2_UT_r_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var2().

◆ FLA_Bidiag_UT_u_step_ofu_var2()

FLA_Error FLA_Bidiag_UT_u_step_ofu_var2 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  S 
)
19 {
20  FLA_Datatype datatype;
21  int m_A, n_A, m_TS;
22  int rs_A, cs_A;
23  int rs_T, cs_T;
24  int rs_S, cs_S;
25 
26  datatype = FLA_Obj_datatype( A );
27 
28  m_A = FLA_Obj_length( A );
29  n_A = FLA_Obj_width( A );
30  m_TS = FLA_Obj_length( T );
31 
32  rs_A = FLA_Obj_row_stride( A );
33  cs_A = FLA_Obj_col_stride( A );
34 
35  rs_T = FLA_Obj_row_stride( T );
36  cs_T = FLA_Obj_col_stride( T );
37 
38  rs_S = FLA_Obj_row_stride( S );
39  cs_S = FLA_Obj_col_stride( S );
40 
41 
42  switch ( datatype )
43  {
44  case FLA_FLOAT:
45  {
46  float* buff_A = FLA_FLOAT_PTR( A );
47  float* buff_T = FLA_FLOAT_PTR( T );
48  float* buff_S = FLA_FLOAT_PTR( S );
49 
51  n_A,
52  m_TS,
53  buff_A, rs_A, cs_A,
54  buff_T, rs_T, cs_T,
55  buff_S, rs_S, cs_S );
56 
57  break;
58  }
59 
60  case FLA_DOUBLE:
61  {
62  double* buff_A = FLA_DOUBLE_PTR( A );
63  double* buff_T = FLA_DOUBLE_PTR( T );
64  double* buff_S = FLA_DOUBLE_PTR( S );
65 
67  n_A,
68  m_TS,
69  buff_A, rs_A, cs_A,
70  buff_T, rs_T, cs_T,
71  buff_S, rs_S, cs_S );
72 
73  break;
74  }
75 
76  case FLA_COMPLEX:
77  {
78  scomplex* buff_A = FLA_COMPLEX_PTR( A );
79  scomplex* buff_T = FLA_COMPLEX_PTR( T );
80  scomplex* buff_S = FLA_COMPLEX_PTR( S );
81 
83  n_A,
84  m_TS,
85  buff_A, rs_A, cs_A,
86  buff_T, rs_T, cs_T,
87  buff_S, rs_S, cs_S );
88 
89  break;
90  }
91 
92  case FLA_DOUBLE_COMPLEX:
93  {
94  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
95  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
96  dcomplex* buff_S = FLA_DOUBLE_COMPLEX_PTR( S );
97 
99  n_A,
100  m_TS,
101  buff_A, rs_A, cs_A,
102  buff_T, rs_T, cs_T,
103  buff_S, rs_S, cs_S );
104 
105  break;
106  }
107  }
108 
109  return FLA_SUCCESS;
110 }
FLA_Error FLA_Bidiag_UT_u_step_ofs_var2(int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_fus_var2.c:114
FLA_Error FLA_Bidiag_UT_u_step_ofc_var2(int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_fus_var2.c:514
FLA_Error FLA_Bidiag_UT_u_step_ofz_var2(int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_fus_var2.c:714
FLA_Error FLA_Bidiag_UT_u_step_ofd_var2(int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_fus_var2.c:314
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
int FLA_Datatype
Definition: FLA_type_defs.h:49
Definition: blis_type_defs.h:138

References FLA_Bidiag_UT_u_step_ofc_var2(), FLA_Bidiag_UT_u_step_ofd_var2(), FLA_Bidiag_UT_u_step_ofs_var2(), FLA_Bidiag_UT_u_step_ofz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

Referenced by FLA_Bidiag_UT_u_blf_var2(), and FLA_Bidiag_UT_u_ofu_var2().

◆ FLA_Bidiag_UT_u_step_ofz_var2()

FLA_Error FLA_Bidiag_UT_u_step_ofz_var2 ( int  m_A,
int  n_A,
int  m_TS,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T,
dcomplex buff_S,
int  rs_S,
int  cs_S 
)
720 {
721  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
722  dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
723  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
724 
725  dcomplex beta;
726  int i;
727 
728  // b_alg = FLA_Obj_length( T );
729  int b_alg = m_TS;
730 
731  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
732  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
733  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
734  dcomplex* buff_v = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
735  dcomplex* buff_y = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
736  dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
737  int inc_v = 1;
738  int inc_y = 1;
739  int inc_z = 1;
740 
741  for ( i = 0; i < b_alg; ++i )
742  {
743  dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
744  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
745  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
746  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
747  dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
748  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
749  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
750 
751  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
752  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
753 
754  dcomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
755  dcomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
756 
757  dcomplex* v21 = buff_v + (i+1)*inc_v;
758 
759  dcomplex* y21 = buff_y + (i+1)*inc_y;
760 
761  dcomplex* z21 = buff_z + (i+1)*inc_z;
762 
763  dcomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
764  dcomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
765 
766  dcomplex* v21_t = v21 + (0 )*inc_v;
767  dcomplex* v21_b = v21 + (1 )*inc_v;
768 
769  int m_ahead = m_A - i - 1;
770  int n_ahead = n_A - i - 1;
771  int m_behind = i;
772  int n_behind = i;
773 
774  /*------------------------------------------------------------*/
775 
776  // FLA_Househ2_UT( FLA_LEFT,
777  // alpha11,
778  // a21, tau11 );
779  FLA_Househ2_UT_l_opz( m_ahead,
780  alpha11,
781  a21, rs_A,
782  tau11 );
783 
784  if ( n_ahead > 0 )
785  {
786  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
787  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
789  n_ahead,
790  a12t, cs_A,
791  y21, inc_y );
794  m_ahead,
795  n_ahead,
796  buff_1,
797  A22, rs_A, cs_A,
798  a21, rs_A,
799  buff_1,
800  y21, inc_y );
801 
802  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
804  n_ahead,
805  tau11,
806  y21, inc_y );
807 
808  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
810  n_ahead,
811  buff_m1,
812  y21, inc_y,
813  a12t, cs_A );
814 
815  // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
816  FLA_Househ2_UT_r_opz( n_ahead - 1,
817  a12t_l,
818  a12t_r, cs_A,
819  sigma11 );
820 
821  // FLA_Set( FLA_ONE, v21_t );
822  // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
823  *v21_t = *buff_1;
825  n_ahead - 1,
826  a12t_r, cs_A,
827  v21_b, inc_y );
828 
829  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
830  // FLA_Scal( FLA_MINUS_ONE, beta );
832  n_ahead,
833  y21, inc_y,
834  v21, inc_v,
835  &beta );
836  bl1_zneg1( &beta );
837 
838  // FLA_Copy( a21, z21 );
839  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
840  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
842  m_ahead,
843  a21, rs_A,
844  z21, inc_z );
847  m_ahead,
848  n_ahead,
849  buff_1,
850  A22, rs_A, cs_A,
851  v21, inc_v,
852  &beta,
853  z21, inc_z );
855  m_ahead,
856  sigma11,
857  z21, inc_z );
858 
859  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y21, A22 );
860  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
861  FLA_Fused_Gerc2_opz_var1( m_ahead,
862  n_ahead,
863  buff_m1,
864  a21, rs_A,
865  y21, inc_y,
866  z21, inc_z,
867  v21, inc_v,
868  A22, rs_A, cs_A );
869 
870  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
873  m_behind,
874  n_ahead,
875  buff_1,
876  A02, rs_A, cs_A,
877  v21, inc_v,
878  buff_0,
879  s01, rs_S );
880  }
881 
882  // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
883  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
885  n_behind,
886  a10t, cs_A,
887  t01, rs_T );
890  m_ahead,
891  n_behind,
892  buff_1,
893  A20, rs_A, cs_A,
894  a21, rs_A,
895  buff_1,
896  t01, rs_T );
897 
898  /*------------------------------------------------------------*/
899 
900  }
901 
902  // FLA_Obj_free( &v );
903  // FLA_Obj_free( &y );
904  // FLA_Obj_free( &z );
905  FLA_free( buff_v );
906  FLA_free( buff_y );
907  FLA_free( buff_z );
908 
909  return FLA_SUCCESS;
910 }
FLA_Error FLA_Fused_Gerc2_opz_var1(int m_A, int n_A, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z, dcomplex *buff_v, int inc_v, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Fused_Gerc2_opt_var1.c:306
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition: FLA_Househ2_UT.c:521
FLA_Error FLA_Househ2_UT_r_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition: FLA_Househ2_UT.c:693
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_axpyv.c:60
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_copyv.c:63
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition: bl1_dot.c:65
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_gemv.c:255
void bl1_zinvscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition: bl1_invscalv.c:78

References bl1_zaxpyv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Gerc2_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_Househ2_UT_r_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_ofu_var2().