libflame
revision_anchor
|
Go to the source code of this file.
Functions | |
FLA_Error | FLA_QR2_UT_blk_var1 (FLA_Obj U, FLA_Obj D, FLA_Obj T, fla_qr2ut_t *cntl) |
FLA_Error | FLA_QR2_UT_blk_var2 (FLA_Obj U, FLA_Obj D, FLA_Obj T, fla_qr2ut_t *cntl) |
FLA_Error | FLA_QR2_UT_unb_var1 (FLA_Obj U, FLA_Obj D, FLA_Obj T) |
FLA_Error | FLA_QR2_UT_opt_var1 (FLA_Obj U, FLA_Obj D, FLA_Obj T) |
FLA_Error | FLA_QR2_UT_ops_var1 (int m_UT, int m_D, float *U, int rs_U, int cs_U, float *D, int rs_D, int cs_D, float *T, int rs_T, int cs_T) |
FLA_Error | FLA_QR2_UT_opd_var1 (int m_UT, int m_D, double *U, int rs_U, int cs_U, double *D, int rs_D, int cs_D, double *T, int rs_T, int cs_T) |
FLA_Error | FLA_QR2_UT_opc_var1 (int m_UT, int m_D, scomplex *U, int rs_U, int cs_U, scomplex *D, int rs_D, int cs_D, scomplex *T, int rs_T, int cs_T) |
FLA_Error | FLA_QR2_UT_opz_var1 (int m_UT, int m_D, dcomplex *U, int rs_U, int cs_U, dcomplex *D, int rs_D, int cs_D, dcomplex *T, int rs_T, int cs_T) |
FLA_Error FLA_QR2_UT_blk_var1 | ( | FLA_Obj | U, |
FLA_Obj | D, | ||
FLA_Obj | T, | ||
fla_qr2ut_t * | cntl | ||
) |
References FLA_Axpy_internal(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy_internal(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_QR2_UT_internal(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x2_to_3x3(), and FLA_Trsm_internal().
Referenced by FLA_QR2_UT_internal().
{ FLA_Obj UTL, UTR, U00, U01, U02, UBL, UBR, U10, U11, U12, U20, U21, U22; FLA_Obj DL, DR, D0, D1, D2; FLA_Obj TL, TR, T0, T1, W12; FLA_Obj W12T, W12B; FLA_Obj T1T, T2B; dim_t b_alg, b; // Query the algorithmic blocksize by inspecting the length of T. b_alg = FLA_Obj_length( T ); FLA_Part_2x2( U, &UTL, &UTR, &UBL, &UBR, 0, 0, FLA_TL ); FLA_Part_1x2( D, &DL, &DR, 0, FLA_LEFT ); FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT ); while ( FLA_Obj_min_dim( UBR ) > 0 ){ b = min( b_alg, FLA_Obj_min_dim( UBR ) ); FLA_Repart_2x2_to_3x3( UTL, /**/ UTR, &U00, /**/ &U01, &U02, /* ************* */ /* ******************** */ &U10, /**/ &U11, &U12, UBL, /**/ UBR, &U20, /**/ &U21, &U22, b, b, FLA_BR ); FLA_Repart_1x2_to_1x3( DL, /**/ DR, &D0, /**/ &D1, &D2, b, FLA_RIGHT ); FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &W12, b, FLA_RIGHT ); /*------------------------------------------------------------*/ // T1T = FLA_Top_part( T1, b ); FLA_Part_2x1( T1, &T1T, &T2B, b, FLA_TOP ); // [ U11, ... // D1, T1 ] = FLA_QR2_UT( U11 // D1, T1T ); FLA_QR2_UT_internal( U11, D1, T1T, FLA_Cntl_sub_qr2ut( cntl ) ); if ( FLA_Obj_width( U12 ) > 0 ) { // W12T = FLA_Top_part( W12, b ); FLA_Part_2x1( W12, &W12T, &W12B, b, FLA_TOP ); // W12T = inv( triu( T1T ) )' * ( U12 + D1' * D2 ); FLA_Copy_internal( U12, W12T, FLA_Cntl_sub_copy( cntl ) ); FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, D1, D2, FLA_ONE, W12T, FLA_Cntl_sub_gemm1( cntl ) ); FLA_Trsm_internal( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_ONE, T1T, W12T, FLA_Cntl_sub_trsm( cntl ) ); // U12 = U12 - W12T; // D2 = D2 - D1 * W12T; FLA_Axpy_internal( FLA_MINUS_ONE, W12T, U12, FLA_Cntl_sub_axpy( cntl ) ); FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_MINUS_ONE, D1, W12T, FLA_ONE, D2, FLA_Cntl_sub_gemm2( cntl ) ); } /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &UTL, /**/ &UTR, U00, U01, /**/ U02, U10, U11, /**/ U12, /* ************** */ /* ****************** */ &UBL, /**/ &UBR, U20, U21, /**/ U22, FLA_TL ); FLA_Cont_with_1x3_to_1x2( &DL, /**/ &DR, D0, D1, /**/ D2, FLA_LEFT ); FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ W12, FLA_LEFT ); } return FLA_SUCCESS; }
FLA_Error FLA_QR2_UT_blk_var2 | ( | FLA_Obj | U, |
FLA_Obj | D, | ||
FLA_Obj | T, | ||
fla_qr2ut_t * | cntl | ||
) |
References FLA_Cont_with_3x1_to_2x1(), FLA_Determine_blocksize(), FLA_Obj_length(), FLA_Part_2x1(), FLA_QR2_UT_internal(), and FLA_Repart_2x1_to_3x1().
Referenced by FLA_QR2_UT_internal().
{ FLA_Obj DT, D0, DB, D1, D2; FLA_Obj TT, T0, TB, T1, T2; dim_t b; FLA_Part_2x1( D, &DT, &DB, 0, FLA_TOP ); FLA_Part_2x1( T, &TT, &TB, 0, FLA_TOP ); while ( FLA_Obj_length( DT ) < FLA_Obj_length( D ) ){ b = FLA_Determine_blocksize( DB, FLA_BOTTOM, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_2x1_to_3x1( DT, &D0, /* ** */ /* ****** */ &D1, DB, &D2, b, FLA_BOTTOM ); FLA_Repart_2x1_to_3x1( TT, &T0, /* ** */ /* ****** */ &T1, TB, &T2, b, FLA_BOTTOM ); /*------------------------------------------------------------*/ // [ U, ... // D1, T ] = FLA_QR2_UT( U // D1, T1 ); FLA_QR2_UT_internal( U, D1, T1, FLA_Cntl_sub_qr2ut( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_3x1_to_2x1( &DT, D0, D1, /* ** */ /* ****** */ &DB, D2, FLA_TOP ); FLA_Cont_with_3x1_to_2x1( &TT, T0, T1, /* ** */ /* ****** */ &TB, T2, FLA_TOP ); } return FLA_SUCCESS; }
FLA_Error FLA_QR2_UT_opc_var1 | ( | int | m_UT, |
int | m_D, | ||
scomplex * | U, | ||
int | rs_U, | ||
int | cs_U, | ||
scomplex * | D, | ||
int | rs_D, | ||
int | cs_D, | ||
scomplex * | T, | ||
int | rs_T, | ||
int | cs_T | ||
) |
References bli_cgemv(), BLIS_CONJ_TRANSPOSE, BLIS_NO_CONJUGATE, FLA_Apply_H2_UT_l_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_ONE, and FLA_ZERO.
Referenced by FLA_QR2_UT_opt_var1().
{ scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE ); scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO ); int i; for ( i = 0; i < m_UT; ++i ) { scomplex* upsilon11 = buff_U + (i )*cs_U + (i )*rs_U; scomplex* u12t = buff_U + (i+1)*cs_U + (i )*rs_U; scomplex* D0 = buff_D + (0 )*cs_D + (0 )*rs_D; scomplex* d1 = buff_D + (i )*cs_D + (0 )*rs_D; scomplex* D2 = buff_D + (i+1)*cs_D + (0 )*rs_D; scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T; scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T; int mn_ahead = m_UT - i - 1; int mn_behind = i; //------------------------------------------------------------// // FLA_Househ2_UT( FLA_LEFT, // upsilon11, // d1, tau11 ); FLA_Househ2_UT_l_opc( m_D, upsilon11, d1, rs_D, tau11 ); // FLA_Apply_H2_UT( FLA_LEFT, tau11, d1, u12t, // D2 ); FLA_Apply_H2_UT_l_opc_var1( m_D, mn_ahead, tau11, d1, rs_D, u12t, cs_U, D2, rs_D, cs_D ); // FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, D0, d1, FLA_ZERO, t01 ); bli_cgemv( BLIS_CONJ_TRANSPOSE, BLIS_NO_CONJUGATE, m_D, mn_behind, buff_1, D0, rs_D, cs_D, d1, rs_D, buff_0, t01, rs_T ); //------------------------------------------------------------// } return FLA_SUCCESS; }
FLA_Error FLA_QR2_UT_opd_var1 | ( | int | m_UT, |
int | m_D, | ||
double * | U, | ||
int | rs_U, | ||
int | cs_U, | ||
double * | D, | ||
int | rs_D, | ||
int | cs_D, | ||
double * | T, | ||
int | rs_T, | ||
int | cs_T | ||
) |
References bli_dgemv(), BLIS_CONJ_TRANSPOSE, BLIS_NO_CONJUGATE, FLA_Apply_H2_UT_l_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_ONE, and FLA_ZERO.
Referenced by FLA_QR2_UT_opt_var1().
{ double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE ); double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO ); int i; for ( i = 0; i < m_UT; ++i ) { double* upsilon11 = buff_U + (i )*cs_U + (i )*rs_U; double* u12t = buff_U + (i+1)*cs_U + (i )*rs_U; double* D0 = buff_D + (0 )*cs_D + (0 )*rs_D; double* d1 = buff_D + (i )*cs_D + (0 )*rs_D; double* D2 = buff_D + (i+1)*cs_D + (0 )*rs_D; double* tau11 = buff_T + (i )*cs_T + (i )*rs_T; double* t01 = buff_T + (i )*cs_T + (0 )*rs_T; int mn_ahead = m_UT - i - 1; int mn_behind = i; //------------------------------------------------------------// // FLA_Househ2_UT( FLA_LEFT, // upsilon11, // d1, tau11 ); FLA_Househ2_UT_l_opd( m_D, upsilon11, d1, rs_D, tau11 ); // FLA_Apply_H2_UT( FLA_LEFT, tau11, d1, u12t, // D2 ); FLA_Apply_H2_UT_l_opd_var1( m_D, mn_ahead, tau11, d1, rs_D, u12t, cs_U, D2, rs_D, cs_D ); // FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, D0, d1, FLA_ZERO, t01 ); bli_dgemv( BLIS_CONJ_TRANSPOSE, BLIS_NO_CONJUGATE, m_D, mn_behind, buff_1, D0, rs_D, cs_D, d1, rs_D, buff_0, t01, rs_T ); //------------------------------------------------------------// } return FLA_SUCCESS; }
FLA_Error FLA_QR2_UT_ops_var1 | ( | int | m_UT, |
int | m_D, | ||
float * | U, | ||
int | rs_U, | ||
int | cs_U, | ||
float * | D, | ||
int | rs_D, | ||
int | cs_D, | ||
float * | T, | ||
int | rs_T, | ||
int | cs_T | ||
) |
References bli_sgemv(), BLIS_CONJ_TRANSPOSE, BLIS_NO_CONJUGATE, FLA_Apply_H2_UT_l_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_ONE, and FLA_ZERO.
Referenced by FLA_QR2_UT_opt_var1().
{ float* buff_1 = FLA_FLOAT_PTR( FLA_ONE ); float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO ); int i; for ( i = 0; i < m_UT; ++i ) { float* upsilon11 = buff_U + (i )*cs_U + (i )*rs_U; float* u12t = buff_U + (i+1)*cs_U + (i )*rs_U; float* D0 = buff_D + (0 )*cs_D + (0 )*rs_D; float* d1 = buff_D + (i )*cs_D + (0 )*rs_D; float* D2 = buff_D + (i+1)*cs_D + (0 )*rs_D; float* tau11 = buff_T + (i )*cs_T + (i )*rs_T; float* t01 = buff_T + (i )*cs_T + (0 )*rs_T; int mn_ahead = m_UT - i - 1; int mn_behind = i; //------------------------------------------------------------// // FLA_Househ2_UT( FLA_LEFT, // upsilon11, // d1, tau11 ); FLA_Househ2_UT_l_ops( m_D, upsilon11, d1, rs_D, tau11 ); // FLA_Apply_H2_UT( FLA_LEFT, tau11, d1, u12t, // D2 ); FLA_Apply_H2_UT_l_ops_var1( m_D, mn_ahead, tau11, d1, rs_D, u12t, cs_U, D2, rs_D, cs_D ); // FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, D0, d1, FLA_ZERO, t01 ); bli_sgemv( BLIS_CONJ_TRANSPOSE, BLIS_NO_CONJUGATE, m_D, mn_behind, buff_1, D0, rs_D, cs_D, d1, rs_D, buff_0, t01, rs_T ); //------------------------------------------------------------// } return FLA_SUCCESS; }
FLA_Error FLA_QR2_UT_opt_var1 | ( | FLA_Obj | U, |
FLA_Obj | D, | ||
FLA_Obj | T | ||
) |
References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_QR2_UT_opc_var1(), FLA_QR2_UT_opd_var1(), FLA_QR2_UT_ops_var1(), and FLA_QR2_UT_opz_var1().
Referenced by FLA_QR2_UT_internal().
{ FLA_Datatype datatype; int m_UT, m_D; int rs_U, cs_U; int rs_D, cs_D; int rs_T, cs_T; datatype = FLA_Obj_datatype( U ); m_UT = FLA_Obj_length( U ); m_D = FLA_Obj_length( D ); rs_U = FLA_Obj_row_stride( U ); cs_U = FLA_Obj_col_stride( U ); rs_D = FLA_Obj_row_stride( D ); cs_D = FLA_Obj_col_stride( D ); rs_T = FLA_Obj_row_stride( T ); cs_T = FLA_Obj_col_stride( T ); switch ( datatype ) { case FLA_FLOAT: { float* buff_U = FLA_FLOAT_PTR( U ); float* buff_D = FLA_FLOAT_PTR( D ); float* buff_T = FLA_FLOAT_PTR( T ); FLA_QR2_UT_ops_var1( m_UT, m_D, buff_U, rs_U, cs_U, buff_D, rs_D, cs_D, buff_T, rs_T, cs_T ); break; } case FLA_DOUBLE: { double* buff_U = FLA_DOUBLE_PTR( U ); double* buff_D = FLA_DOUBLE_PTR( D ); double* buff_T = FLA_DOUBLE_PTR( T ); FLA_QR2_UT_opd_var1( m_UT, m_D, buff_U, rs_U, cs_U, buff_D, rs_D, cs_D, buff_T, rs_T, cs_T ); break; } case FLA_COMPLEX: { scomplex* buff_U = FLA_COMPLEX_PTR( U ); scomplex* buff_D = FLA_COMPLEX_PTR( D ); scomplex* buff_T = FLA_COMPLEX_PTR( T ); FLA_QR2_UT_opc_var1( m_UT, m_D, buff_U, rs_U, cs_U, buff_D, rs_D, cs_D, buff_T, rs_T, cs_T ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_U = FLA_DOUBLE_COMPLEX_PTR( U ); dcomplex* buff_D = FLA_DOUBLE_COMPLEX_PTR( D ); dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T ); FLA_QR2_UT_opz_var1( m_UT, m_D, buff_U, rs_U, cs_U, buff_D, rs_D, cs_D, buff_T, rs_T, cs_T ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_QR2_UT_opz_var1 | ( | int | m_UT, |
int | m_D, | ||
dcomplex * | U, | ||
int | rs_U, | ||
int | cs_U, | ||
dcomplex * | D, | ||
int | rs_D, | ||
int | cs_D, | ||
dcomplex * | T, | ||
int | rs_T, | ||
int | cs_T | ||
) |
References bli_zgemv(), BLIS_CONJ_TRANSPOSE, BLIS_NO_CONJUGATE, FLA_Apply_H2_UT_l_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_ONE, and FLA_ZERO.
Referenced by FLA_QR2_UT_opt_var1().
{ dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE ); dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO ); int i; for ( i = 0; i < m_UT; ++i ) { dcomplex* upsilon11 = buff_U + (i )*cs_U + (i )*rs_U; dcomplex* u12t = buff_U + (i+1)*cs_U + (i )*rs_U; dcomplex* D0 = buff_D + (0 )*cs_D + (0 )*rs_D; dcomplex* d1 = buff_D + (i )*cs_D + (0 )*rs_D; dcomplex* D2 = buff_D + (i+1)*cs_D + (0 )*rs_D; dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T; dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T; int mn_ahead = m_UT - i - 1; int mn_behind = i; //------------------------------------------------------------// // FLA_Househ2_UT( FLA_LEFT, // upsilon11, // d1, tau11 ); FLA_Househ2_UT_l_opz( m_D, upsilon11, d1, rs_D, tau11 ); // FLA_Apply_H2_UT( FLA_LEFT, tau11, d1, u12t, // D2 ); FLA_Apply_H2_UT_l_opz_var1( m_D, mn_ahead, tau11, d1, rs_D, u12t, cs_U, D2, rs_D, cs_D ); // FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, D0, d1, FLA_ZERO, t01 ); bli_zgemv( BLIS_CONJ_TRANSPOSE, BLIS_NO_CONJUGATE, m_D, mn_behind, buff_1, D0, rs_D, cs_D, d1, rs_D, buff_0, t01, rs_T ); //------------------------------------------------------------// } return FLA_SUCCESS; }
FLA_Error FLA_QR2_UT_unb_var1 | ( | FLA_Obj | U, |
FLA_Obj | D, | ||
FLA_Obj | T | ||
) |
References FLA_Apply_H2_UT(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Gemv_external(), FLA_Househ2_UT(), FLA_Obj_min_dim(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x2_to_3x3(), and FLA_ZERO.
Referenced by FLA_QR2_UT_internal().
{ FLA_Obj UTL, UTR, U00, u01, U02, UBL, UBR, u10t, upsilon11, u12t, U20, u21, U22; FLA_Obj DL, DR, D0, d1, D2; FLA_Obj TTL, TTR, T00, t01, T02, TBL, TBR, t10t, tau11, t12t, T20, t21, T22; FLA_Part_2x2( U, &UTL, &UTR, &UBL, &UBR, 0, 0, FLA_TL ); FLA_Part_1x2( D, &DL, &DR, 0, FLA_LEFT ); FLA_Part_2x2( T, &TTL, &TTR, &TBL, &TBR, 0, 0, FLA_TL ); while ( FLA_Obj_min_dim( UBR ) > 0 ){ FLA_Repart_2x2_to_3x3( UTL, /**/ UTR, &U00, /**/ &u01, &U02, /* ************* */ /* ************************** */ &u10t, /**/ &upsilon11, &u12t, UBL, /**/ UBR, &U20, /**/ &u21, &U22, 1, 1, FLA_BR ); FLA_Repart_1x2_to_1x3( DL, /**/ DR, &D0, /**/ &d1, &D2, 1, FLA_RIGHT ); FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02, /* ************* */ /* ************************ */ &t10t, /**/ &tau11, &t12t, TBL, /**/ TBR, &T20, /**/ &t21, &T22, 1, 1, FLA_BR ); /*------------------------------------------------------------*/ // Compute tau11 and u2 from upsilon11 and d1 such that tau11 and u2 // determine a Householder transform H such that applying H from the // left to the column vector consisting of upsilon11 and d1 annihilates // the entries in d1 (and updates upsilon11). FLA_Househ2_UT( FLA_LEFT, upsilon11, d1, tau11 ); // / u12t \ = H / u12t \ // \ D2 / \ D2 / // // where H is formed from tau11 and d1. FLA_Apply_H2_UT( FLA_LEFT, tau11, d1, u12t, D2 ); // t01 = D0' * d1; FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, D0, d1, FLA_ZERO, t01 ); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &UTL, /**/ &UTR, U00, u01, /**/ U02, u10t, upsilon11, /**/ u12t, /* ************** */ /* ************************ */ &UBL, /**/ &UBR, U20, u21, /**/ U22, FLA_TL ); FLA_Cont_with_1x3_to_1x2( &DL, /**/ &DR, D0, d1, /**/ D2, FLA_LEFT ); FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02, t10t, tau11, /**/ t12t, /* ************** */ /* ********************** */ &TBL, /**/ &TBR, T20, t21, /**/ T22, FLA_TL ); } return FLA_SUCCESS; }