libflame  revision_anchor
Functions
FLA_Bidiag_UT_u_opt_var2.c File Reference

(r)

Functions

FLA_Error FLA_Bidiag_UT_u_opt_var2 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_step_opt_var2 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_ops_var2 (int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opd_var2 (int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opc_var2 (int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opz_var2 (int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
 

Function Documentation

◆ FLA_Bidiag_UT_u_opt_var2()

FLA_Error FLA_Bidiag_UT_u_opt_var2 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)
14 {
15  return FLA_Bidiag_UT_u_step_opt_var2( A, TU, TV );
16 }
FLA_Error FLA_Bidiag_UT_u_step_opt_var2(FLA_Obj A, FLA_Obj T, FLA_Obj S)
Definition: FLA_Bidiag_UT_u_opt_var2.c:18

References FLA_Bidiag_UT_u_step_opt_var2().

Referenced by FLA_Bidiag_UT_u().

◆ FLA_Bidiag_UT_u_step_opc_var2()

FLA_Error FLA_Bidiag_UT_u_step_opc_var2 ( int  m_A,
int  n_A,
int  m_TS,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T,
scomplex buff_S,
int  rs_S,
int  cs_S 
)
536 {
537  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
538  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
539  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
540 
541  scomplex beta;
542  int i;
543 
544  // b_alg = FLA_Obj_length( T );
545  int b_alg = m_TS;
546 
547  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
548  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
549  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
550  scomplex* buff_v = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
551  scomplex* buff_y = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
552  scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
553  int inc_v = 1;
554  int inc_y = 1;
555  int inc_z = 1;
556 
557  for ( i = 0; i < b_alg; ++i )
558  {
559  scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
560  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
561  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
562  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
563  scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
564  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
565  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
566 
567  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
568  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
569 
570  scomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
571  scomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
572 
573  scomplex* v21 = buff_v + (i+1)*inc_v;
574 
575  scomplex* y21 = buff_y + (i+1)*inc_y;
576 
577  scomplex* z21 = buff_z + (i+1)*inc_z;
578 
579  scomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
580  scomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
581 
582  scomplex* v21_t = v21 + (0 )*inc_v;
583  scomplex* v21_b = v21 + (1 )*inc_v;
584 
585  int m_ahead = m_A - i - 1;
586  int n_ahead = n_A - i - 1;
587  int m_behind = i;
588  int n_behind = i;
589 
590  /*------------------------------------------------------------*/
591 
592  // FLA_Househ2_UT( FLA_LEFT,
593  // alpha11,
594  // a21, tau11 );
595  FLA_Househ2_UT_l_opc( m_ahead,
596  alpha11,
597  a21, rs_A,
598  tau11 );
599 
600  if ( n_ahead > 0 )
601  {
602  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
603  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
605  n_ahead,
606  a12t, cs_A,
607  y21, inc_y );
610  m_ahead,
611  n_ahead,
612  buff_1,
613  A22, rs_A, cs_A,
614  a21, rs_A,
615  buff_1,
616  y21, inc_y );
617 
618  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
620  n_ahead,
621  tau11,
622  y21, inc_y );
623 
624  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
626  n_ahead,
627  buff_m1,
628  y21, inc_y,
629  a12t, cs_A );
630 
631  // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
632  FLA_Househ2_UT_r_opc( n_ahead - 1,
633  a12t_l,
634  a12t_r, cs_A,
635  sigma11 );
636 
637  // FLA_Set( FLA_ONE, v21_t );
638  // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
639  *v21_t = *buff_1;
641  n_ahead - 1,
642  a12t_r, cs_A,
643  v21_b, inc_y );
644 
645  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
646  // FLA_Scal( FLA_MINUS_ONE, beta );
648  n_ahead,
649  y21, inc_y,
650  v21, inc_v,
651  &beta );
652  bl1_cneg1( &beta );
653 
654  // FLA_Copy( a21, z21 );
655  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
656  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
658  m_ahead,
659  a21, rs_A,
660  z21, inc_z );
663  m_ahead,
664  n_ahead,
665  buff_1,
666  A22, rs_A, cs_A,
667  v21, inc_v,
668  &beta,
669  z21, inc_z );
671  m_ahead,
672  sigma11,
673  z21, inc_z );
674 
675  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y21, A22 );
676  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
679  m_ahead,
680  n_ahead,
681  buff_m1,
682  a21, rs_A,
683  y21, inc_y,
684  A22, rs_A, cs_A );
687  m_ahead,
688  n_ahead,
689  buff_m1,
690  z21, inc_z,
691  v21, inc_v,
692  A22, rs_A, cs_A );
693 
694  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
697  m_behind,
698  n_ahead,
699  buff_1,
700  A02, rs_A, cs_A,
701  v21, inc_v,
702  buff_0,
703  s01, rs_S );
704  }
705 
706  // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
707  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
709  n_behind,
710  a10t, cs_A,
711  t01, rs_T );
714  m_ahead,
715  n_behind,
716  buff_1,
717  A20, rs_A, cs_A,
718  a21, rs_A,
719  buff_1,
720  t01, rs_T );
721 
722  /*------------------------------------------------------------*/
723 
724  }
725 
726  // FLA_Obj_free( &v );
727  // FLA_Obj_free( &y );
728  // FLA_Obj_free( &z );
729  FLA_free( buff_v );
730  FLA_free( buff_y );
731  FLA_free( buff_z );
732 
733  return FLA_SUCCESS;
734 }
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
FLA_Error FLA_Househ2_UT_r_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition: FLA_Househ2_UT.c:677
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition: FLA_Househ2_UT.c:390
int i
Definition: bl1_axmyv2.c:145
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpyv.c:29
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_copyv.c:49
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition: bl1_dot.c:39
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_gemv.c:125
void bl1_cger(conj1_t conjx, conj1_t conjy, int m, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *a, int a_rs, int a_cs)
Definition: bl1_ger.c:111
void bl1_cinvscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition: bl1_invscalv.c:52
@ BLIS1_NO_TRANSPOSE
Definition: blis_type_defs.h:54
@ BLIS1_CONJ_TRANSPOSE
Definition: blis_type_defs.h:57
@ BLIS1_CONJ_NO_TRANSPOSE
Definition: blis_type_defs.h:56
@ BLIS1_CONJUGATE
Definition: blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition: blis_type_defs.h:81
Definition: blis_type_defs.h:133

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cger(), bl1_cinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_Househ2_UT_r_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var2().

◆ FLA_Bidiag_UT_u_step_opd_var2()

FLA_Error FLA_Bidiag_UT_u_step_opd_var2 ( int  m_A,
int  n_A,
int  m_TS,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_T,
int  rs_T,
int  cs_T,
double *  buff_S,
int  rs_S,
int  cs_S 
)
328 {
329  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
330  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
331  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
332 
333  double beta;
334  int i;
335 
336  // b_alg = FLA_Obj_length( T );
337  int b_alg = m_TS;
338 
339  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
340  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
341  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
342  double* buff_v = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
343  double* buff_y = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
344  double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
345  int inc_v = 1;
346  int inc_y = 1;
347  int inc_z = 1;
348 
349  for ( i = 0; i < b_alg; ++i )
350  {
351  double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
352  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
353  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
354  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
355  double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
356  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
357  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
358 
359  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
360  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
361 
362  double* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
363  double* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
364 
365  double* v21 = buff_v + (i+1)*inc_v;
366 
367  double* y21 = buff_y + (i+1)*inc_y;
368 
369  double* z21 = buff_z + (i+1)*inc_z;
370 
371  double* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
372  double* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
373 
374  double* v21_t = v21 + (0 )*inc_v;
375  double* v21_b = v21 + (1 )*inc_v;
376 
377  int m_ahead = m_A - i - 1;
378  int n_ahead = n_A - i - 1;
379  int m_behind = i;
380  int n_behind = i;
381 
382  /*------------------------------------------------------------*/
383 
384  // FLA_Househ2_UT( FLA_LEFT,
385  // alpha11,
386  // a21, tau11 );
387  FLA_Househ2_UT_l_opd( m_ahead,
388  alpha11,
389  a21, rs_A,
390  tau11 );
391 
392  if ( n_ahead > 0 )
393  {
394  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
395  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
397  n_ahead,
398  a12t, cs_A,
399  y21, inc_y );
402  m_ahead,
403  n_ahead,
404  buff_1,
405  A22, rs_A, cs_A,
406  a21, rs_A,
407  buff_1,
408  y21, inc_y );
409 
410  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
412  n_ahead,
413  tau11,
414  y21, inc_y );
415 
416  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
418  n_ahead,
419  buff_m1,
420  y21, inc_y,
421  a12t, cs_A );
422 
423  // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
424  FLA_Househ2_UT_r_opd( n_ahead - 1,
425  a12t_l,
426  a12t_r, cs_A,
427  sigma11 );
428 
429  // FLA_Set( FLA_ONE, v21_t );
430  // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
431  *v21_t = *buff_1;
433  n_ahead - 1,
434  a12t_r, cs_A,
435  v21_b, inc_y );
436 
437  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
438  // FLA_Scal( FLA_MINUS_ONE, beta );
440  n_ahead,
441  y21, inc_y,
442  v21, inc_v,
443  &beta );
444  bl1_dneg1( &beta );
445 
446  // FLA_Copy( a21, z21 );
447  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
448  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
450  m_ahead,
451  a21, rs_A,
452  z21, inc_z );
455  m_ahead,
456  n_ahead,
457  buff_1,
458  A22, rs_A, cs_A,
459  v21, inc_v,
460  &beta,
461  z21, inc_z );
463  m_ahead,
464  sigma11,
465  z21, inc_z );
466 
467  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y21, A22 );
468  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
471  m_ahead,
472  n_ahead,
473  buff_m1,
474  a21, rs_A,
475  y21, inc_y,
476  A22, rs_A, cs_A );
479  m_ahead,
480  n_ahead,
481  buff_m1,
482  z21, inc_z,
483  v21, inc_v,
484  A22, rs_A, cs_A );
485 
486  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
489  m_behind,
490  n_ahead,
491  buff_1,
492  A02, rs_A, cs_A,
493  v21, inc_v,
494  buff_0,
495  s01, rs_S );
496  }
497 
498  // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
499  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
501  n_behind,
502  a10t, cs_A,
503  t01, rs_T );
506  m_ahead,
507  n_behind,
508  buff_1,
509  A20, rs_A, cs_A,
510  a21, rs_A,
511  buff_1,
512  t01, rs_T );
513 
514  /*------------------------------------------------------------*/
515 
516  }
517 
518  // FLA_Obj_free( &v );
519  // FLA_Obj_free( &y );
520  // FLA_Obj_free( &z );
521  FLA_free( buff_v );
522  FLA_free( buff_y );
523  FLA_free( buff_z );
524 
525  return FLA_SUCCESS;
526 }
FLA_Error FLA_Househ2_UT_r_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition: FLA_Househ2_UT.c:664
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition: FLA_Househ2_UT.c:274
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition: bl1_axpyv.c:21
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition: bl1_copyv.c:42
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition: bl1_dot.c:26
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_gemv.c:69
void bl1_dger(conj1_t conjx, conj1_t conjy, int m, int n, double *alpha, double *x, int incx, double *y, int incy, double *a, int a_rs, int a_cs)
Definition: bl1_ger.c:62
void bl1_dinvscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition: bl1_invscalv.c:26

References bl1_daxpyv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dger(), bl1_dinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_Househ2_UT_r_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var2().

◆ FLA_Bidiag_UT_u_step_ops_var2()

FLA_Error FLA_Bidiag_UT_u_step_ops_var2 ( int  m_A,
int  n_A,
int  m_TS,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_T,
int  rs_T,
int  cs_T,
float *  buff_S,
int  rs_S,
int  cs_S 
)
120 {
121  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
122  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
123  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
124 
125  float beta;
126  int i;
127 
128  // b_alg = FLA_Obj_length( T );
129  int b_alg = m_TS;
130 
131  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
132  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
133  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
134  float* buff_v = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
135  float* buff_y = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
136  float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
137  int inc_v = 1;
138  int inc_y = 1;
139  int inc_z = 1;
140 
141  for ( i = 0; i < b_alg; ++i )
142  {
143  float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
144  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
145  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
146  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
147  float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
148  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
149  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
150 
151  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
152  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
153 
154  float* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
155  float* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
156 
157  float* v21 = buff_v + (i+1)*inc_v;
158 
159  float* y21 = buff_y + (i+1)*inc_y;
160 
161  float* z21 = buff_z + (i+1)*inc_z;
162 
163  float* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
164  float* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
165 
166  float* v21_t = v21 + (0 )*inc_v;
167  float* v21_b = v21 + (1 )*inc_v;
168 
169  int m_ahead = m_A - i - 1;
170  int n_ahead = n_A - i - 1;
171  int m_behind = i;
172  int n_behind = i;
173 
174  /*------------------------------------------------------------*/
175 
176  // FLA_Househ2_UT( FLA_LEFT,
177  // alpha11,
178  // a21, tau11 );
179  FLA_Househ2_UT_l_ops( m_ahead,
180  alpha11,
181  a21, rs_A,
182  tau11 );
183 
184  if ( n_ahead > 0 )
185  {
186  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
187  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
189  n_ahead,
190  a12t, cs_A,
191  y21, inc_y );
194  m_ahead,
195  n_ahead,
196  buff_1,
197  A22, rs_A, cs_A,
198  a21, rs_A,
199  buff_1,
200  y21, inc_y );
201 
202  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
204  n_ahead,
205  tau11,
206  y21, inc_y );
207 
208  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
210  n_ahead,
211  buff_m1,
212  y21, inc_y,
213  a12t, cs_A );
214 
215  // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
216  FLA_Househ2_UT_r_ops( n_ahead - 1,
217  a12t_l,
218  a12t_r, cs_A,
219  sigma11 );
220 
221  // FLA_Set( FLA_ONE, v21_t );
222  // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
223  *v21_t = *buff_1;
225  n_ahead - 1,
226  a12t_r, cs_A,
227  v21_b, inc_y );
228 
229  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
230  // FLA_Scal( FLA_MINUS_ONE, beta );
232  n_ahead,
233  y21, inc_y,
234  v21, inc_v,
235  &beta );
236  bl1_sneg1( &beta );
237 
238  // FLA_Copy( a21, z21 );
239  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
240  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
242  m_ahead,
243  a21, rs_A,
244  z21, inc_z );
247  m_ahead,
248  n_ahead,
249  buff_1,
250  A22, rs_A, cs_A,
251  v21, inc_v,
252  &beta,
253  z21, inc_z );
255  m_ahead,
256  sigma11,
257  z21, inc_z );
258 
259  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y21, A22 );
260  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
263  m_ahead,
264  n_ahead,
265  buff_m1,
266  a21, rs_A,
267  y21, inc_y,
268  A22, rs_A, cs_A );
271  m_ahead,
272  n_ahead,
273  buff_m1,
274  z21, inc_z,
275  v21, inc_v,
276  A22, rs_A, cs_A );
277 
278  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
281  m_behind,
282  n_ahead,
283  buff_1,
284  A02, rs_A, cs_A,
285  v21, inc_v,
286  buff_0,
287  s01, rs_S );
288  }
289 
290  // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
291  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
293  n_behind,
294  a10t, cs_A,
295  t01, rs_T );
298  m_ahead,
299  n_behind,
300  buff_1,
301  A20, rs_A, cs_A,
302  a21, rs_A,
303  buff_1,
304  t01, rs_T );
305 
306  /*------------------------------------------------------------*/
307 
308  }
309 
310  // FLA_Obj_free( &v );
311  // FLA_Obj_free( &y );
312  // FLA_Obj_free( &z );
313  FLA_free( buff_v );
314  FLA_free( buff_y );
315  FLA_free( buff_z );
316 
317  return FLA_SUCCESS;
318 }
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition: FLA_Househ2_UT.c:160
FLA_Error FLA_Househ2_UT_r_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition: FLA_Househ2_UT.c:651
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition: bl1_axpyv.c:13
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition: bl1_copyv.c:35
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition: bl1_dot.c:13
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_gemv.c:13
void bl1_sger(conj1_t conjx, conj1_t conjy, int m, int n, float *alpha, float *x, int incx, float *y, int incy, float *a, int a_rs, int a_cs)
Definition: bl1_ger.c:13
void bl1_sinvscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition: bl1_invscalv.c:13

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sger(), bl1_sinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_Househ2_UT_r_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var2().

◆ FLA_Bidiag_UT_u_step_opt_var2()

FLA_Error FLA_Bidiag_UT_u_step_opt_var2 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  S 
)
19 {
20  FLA_Datatype datatype;
21  int m_A, n_A, m_TS;
22  int rs_A, cs_A;
23  int rs_T, cs_T;
24  int rs_S, cs_S;
25 
26  datatype = FLA_Obj_datatype( A );
27 
28  m_A = FLA_Obj_length( A );
29  n_A = FLA_Obj_width( A );
30  m_TS = FLA_Obj_length( T );
31 
32  rs_A = FLA_Obj_row_stride( A );
33  cs_A = FLA_Obj_col_stride( A );
34 
35  rs_T = FLA_Obj_row_stride( T );
36  cs_T = FLA_Obj_col_stride( T );
37 
38  rs_S = FLA_Obj_row_stride( S );
39  cs_S = FLA_Obj_col_stride( S );
40 
41 
42  switch ( datatype )
43  {
44  case FLA_FLOAT:
45  {
46  float* buff_A = FLA_FLOAT_PTR( A );
47  float* buff_T = FLA_FLOAT_PTR( T );
48  float* buff_S = FLA_FLOAT_PTR( S );
49 
51  n_A,
52  m_TS,
53  buff_A, rs_A, cs_A,
54  buff_T, rs_T, cs_T,
55  buff_S, rs_S, cs_S );
56 
57  break;
58  }
59 
60  case FLA_DOUBLE:
61  {
62  double* buff_A = FLA_DOUBLE_PTR( A );
63  double* buff_T = FLA_DOUBLE_PTR( T );
64  double* buff_S = FLA_DOUBLE_PTR( S );
65 
67  n_A,
68  m_TS,
69  buff_A, rs_A, cs_A,
70  buff_T, rs_T, cs_T,
71  buff_S, rs_S, cs_S );
72 
73  break;
74  }
75 
76  case FLA_COMPLEX:
77  {
78  scomplex* buff_A = FLA_COMPLEX_PTR( A );
79  scomplex* buff_T = FLA_COMPLEX_PTR( T );
80  scomplex* buff_S = FLA_COMPLEX_PTR( S );
81 
83  n_A,
84  m_TS,
85  buff_A, rs_A, cs_A,
86  buff_T, rs_T, cs_T,
87  buff_S, rs_S, cs_S );
88 
89  break;
90  }
91 
92  case FLA_DOUBLE_COMPLEX:
93  {
94  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
95  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
96  dcomplex* buff_S = FLA_DOUBLE_COMPLEX_PTR( S );
97 
99  n_A,
100  m_TS,
101  buff_A, rs_A, cs_A,
102  buff_T, rs_T, cs_T,
103  buff_S, rs_S, cs_S );
104 
105  break;
106  }
107  }
108 
109  return FLA_SUCCESS;
110 }
FLA_Error FLA_Bidiag_UT_u_step_opz_var2(int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var2.c:738
FLA_Error FLA_Bidiag_UT_u_step_opc_var2(int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var2.c:530
FLA_Error FLA_Bidiag_UT_u_step_opd_var2(int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var2.c:322
FLA_Error FLA_Bidiag_UT_u_step_ops_var2(int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var2.c:114
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
int FLA_Datatype
Definition: FLA_type_defs.h:49
Definition: blis_type_defs.h:138

References FLA_Bidiag_UT_u_step_opc_var2(), FLA_Bidiag_UT_u_step_opd_var2(), FLA_Bidiag_UT_u_step_ops_var2(), FLA_Bidiag_UT_u_step_opz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

Referenced by FLA_Bidiag_UT_u_blk_var2(), and FLA_Bidiag_UT_u_opt_var2().

◆ FLA_Bidiag_UT_u_step_opz_var2()

FLA_Error FLA_Bidiag_UT_u_step_opz_var2 ( int  m_A,
int  n_A,
int  m_TS,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T,
dcomplex buff_S,
int  rs_S,
int  cs_S 
)
744 {
745  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
746  dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
747  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
748 
749  dcomplex beta;
750  int i;
751 
752  // b_alg = FLA_Obj_length( T );
753  int b_alg = m_TS;
754 
755  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
756  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
757  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
758  dcomplex* buff_v = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
759  dcomplex* buff_y = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
760  dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
761  int inc_v = 1;
762  int inc_y = 1;
763  int inc_z = 1;
764 
765  for ( i = 0; i < b_alg; ++i )
766  {
767  dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
768  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
769  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
770  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
771  dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
772  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
773  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
774 
775  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
776  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
777 
778  dcomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
779  dcomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
780 
781  dcomplex* v21 = buff_v + (i+1)*inc_v;
782 
783  dcomplex* y21 = buff_y + (i+1)*inc_y;
784 
785  dcomplex* z21 = buff_z + (i+1)*inc_z;
786 
787  dcomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
788  dcomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
789 
790  dcomplex* v21_t = v21 + (0 )*inc_v;
791  dcomplex* v21_b = v21 + (1 )*inc_v;
792 
793  int m_ahead = m_A - i - 1;
794  int n_ahead = n_A - i - 1;
795  int m_behind = i;
796  int n_behind = i;
797 
798  /*------------------------------------------------------------*/
799 
800  // FLA_Househ2_UT( FLA_LEFT,
801  // alpha11,
802  // a21, tau11 );
803  FLA_Househ2_UT_l_opz( m_ahead,
804  alpha11,
805  a21, rs_A,
806  tau11 );
807 
808  if ( n_ahead > 0 )
809  {
810  // FLA_Copyt( FLA_CONJ_TRANSPOSE, a12t, y21 );
811  // FLA_Gemvc( FLA_CONJ_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, a21, FLA_ONE, y21 );
813  n_ahead,
814  a12t, cs_A,
815  y21, inc_y );
818  m_ahead,
819  n_ahead,
820  buff_1,
821  A22, rs_A, cs_A,
822  a21, rs_A,
823  buff_1,
824  y21, inc_y );
825 
826  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, y21 );
828  n_ahead,
829  tau11,
830  y21, inc_y );
831 
832  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, FLA_MINUS_ONE, y21, a12t );
834  n_ahead,
835  buff_m1,
836  y21, inc_y,
837  a12t, cs_A );
838 
839  // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
840  FLA_Househ2_UT_r_opz( n_ahead - 1,
841  a12t_l,
842  a12t_r, cs_A,
843  sigma11 );
844 
845  // FLA_Set( FLA_ONE, v21_t );
846  // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
847  *v21_t = *buff_1;
849  n_ahead - 1,
850  a12t_r, cs_A,
851  v21_b, inc_y );
852 
853  // FLA_Dotc( FLA_CONJUGATE, y21, v21, beta );
854  // FLA_Scal( FLA_MINUS_ONE, beta );
856  n_ahead,
857  y21, inc_y,
858  v21, inc_v,
859  &beta );
860  bl1_zneg1( &beta );
861 
862  // FLA_Copy( a21, z21 );
863  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A22, v21, beta, z21 );
864  // FLA_Inv_scalc( FLA_NO_CONJUGATE, sigma11, z21 );
866  m_ahead,
867  a21, rs_A,
868  z21, inc_z );
871  m_ahead,
872  n_ahead,
873  buff_1,
874  A22, rs_A, cs_A,
875  v21, inc_v,
876  &beta,
877  z21, inc_z );
879  m_ahead,
880  sigma11,
881  z21, inc_z );
882 
883  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y21, A22 );
884  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z21, v21, A22 );
887  m_ahead,
888  n_ahead,
889  buff_m1,
890  a21, rs_A,
891  y21, inc_y,
892  A22, rs_A, cs_A );
895  m_ahead,
896  n_ahead,
897  buff_m1,
898  z21, inc_z,
899  v21, inc_v,
900  A22, rs_A, cs_A );
901 
902  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
905  m_behind,
906  n_ahead,
907  buff_1,
908  A02, rs_A, cs_A,
909  v21, inc_v,
910  buff_0,
911  s01, rs_S );
912  }
913 
914  // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
915  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
917  n_behind,
918  a10t, cs_A,
919  t01, rs_T );
922  m_ahead,
923  n_behind,
924  buff_1,
925  A20, rs_A, cs_A,
926  a21, rs_A,
927  buff_1,
928  t01, rs_T );
929 
930  /*------------------------------------------------------------*/
931 
932  }
933 
934  // FLA_Obj_free( &v );
935  // FLA_Obj_free( &y );
936  // FLA_Obj_free( &z );
937  FLA_free( buff_v );
938  FLA_free( buff_y );
939  FLA_free( buff_z );
940 
941  return FLA_SUCCESS;
942 }
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition: FLA_Househ2_UT.c:521
FLA_Error FLA_Househ2_UT_r_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition: FLA_Househ2_UT.c:693
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_axpyv.c:60
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_copyv.c:63
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition: bl1_dot.c:65
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_gemv.c:255
void bl1_zger(conj1_t conjx, conj1_t conjy, int m, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_ger.c:194
void bl1_zinvscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition: bl1_invscalv.c:78

References bl1_zaxpyv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zger(), bl1_zinvscalv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_Househ2_UT_r_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var2().