libflame revision_anchor
|
Go to the source code of this file.
Functions | |
FLA_Error | FLASH_UDdate_UT_inc (FLA_Obj R, FLA_Obj C, FLA_Obj D, FLA_Obj T, FLA_Obj W) |
FLA_Error | FLA_UDdate_UT_inc_blk_var1 (FLA_Obj R, FLA_Obj C, FLA_Obj D, FLA_Obj T, FLA_Obj W, fla_uddateutinc_t *cntl) |
FLA_Error | FLASH_UDdate_UT_inc_create_hier_matrices (FLA_Obj R_flat, FLA_Obj C_flat, FLA_Obj D_flat, dim_t depth, dim_t *b_flash, dim_t b_alg, FLA_Obj *R, FLA_Obj *C, FLA_Obj *D, FLA_Obj *T, FLA_Obj *W) |
dim_t | FLASH_UDdate_UT_inc_determine_alg_blocksize (FLA_Obj R) |
FLA_Error | FLASH_UDdate_UT_inc_update_rhs (FLA_Obj T, FLA_Obj bR, FLA_Obj C, FLA_Obj bC, FLA_Obj D, FLA_Obj bD) |
FLA_Error | FLASH_UDdate_UT_inc_solve (FLA_Obj R, FLA_Obj bR, FLA_Obj x) |
FLA_Error FLA_UDdate_UT_inc_blk_var1 | ( | FLA_Obj | R, |
FLA_Obj | C, | ||
FLA_Obj | D, | ||
FLA_Obj | T, | ||
FLA_Obj | W, | ||
fla_uddateutinc_t * | cntl | ||
) |
References FLA_Apply_QUD_UT_internal(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Obj_min_dim(), FLA_Obj_width(), FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x2_to_3x3(), and FLA_UDdate_UT_internal().
Referenced by FLASH_UDdate_UT_inc().
{ FLA_Obj RTL, RTR, R00, R01, R02, RBL, RBR, R10, R11, R12, R20, R21, R22; FLA_Obj CL, CR, C0, C1, C2; FLA_Obj DL, DR, D0, D1, D2; FLA_Obj TL, TR, T0, T1, T2; FLA_Obj WTL, WTR, W00, W01, W02, WBL, WBR, W10, W11, W12, W20, W21, W22; dim_t b; FLA_Part_2x2( R, &RTL, &RTR, &RBL, &RBR, 0, 0, FLA_TL ); FLA_Part_1x2( C, &CL, &CR, 0, FLA_LEFT ); FLA_Part_1x2( D, &DL, &DR, 0, FLA_LEFT ); FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT ); FLA_Part_2x2( W, &WTL, &WTR, &WBL, &WBR, 0, 0, FLA_TL ); while ( FLA_Obj_min_dim( RBR ) > 0 ){ b = FLA_Determine_blocksize( RBR, FLA_BR, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_2x2_to_3x3( RTL, /**/ RTR, &R00, /**/ &R01, &R02, /* ************* */ /* ******************** */ &R10, /**/ &R11, &R12, RBL, /**/ RBR, &R20, /**/ &R21, &R22, b, b, FLA_BR ); FLA_Repart_1x2_to_1x3( CL, /**/ CR, &C0, /**/ &C1, &C2, b, FLA_RIGHT ); FLA_Repart_1x2_to_1x3( DL, /**/ DR, &D0, /**/ &D1, &D2, b, FLA_RIGHT ); FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &T2, b, FLA_RIGHT ); FLA_Repart_2x2_to_3x3( WTL, /**/ WTR, &W00, /**/ &W01, &W02, /* ************* */ /* ******************** */ &W10, /**/ &W11, &W12, WBL, /**/ WBR, &W20, /**/ &W21, &W22, b, b, FLA_BR ); /*------------------------------------------------------------*/ /* Perform an up/downdate of the upper triangular factor R11 via up/downdating UT Householder transformations: [ R11, ... C1, ... D1, T1 ] = FLA_UDdate_UT( R11, ... C1, ... D1, T1 ); by updating R11 in such a way that removes the contributions of the rows in D1 while simultaneously adding new contributions to the factorization from the rows of C1. Note that C1 and D1 are also updated in the process. */ FLA_UDdate_UT_internal( R11, C1, D1, T1, FLA_Cntl_sub_uddateut( cntl ) ); if ( FLA_Obj_width( R12 ) > 0 ) { /* Apply Q' to R12, C2, and D2 from the left: / R12 \ / R12 \ | C2 | = Q' * | C2 | \ D2 / \ D2 / where Q is formed from C1, D1, and T1. */ FLA_Apply_QUD_UT_internal( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE, T1, W12, R12, C1, C2, D1, D2, FLA_Cntl_sub_apqudut( cntl ) ); } /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &RTL, /**/ &RTR, R00, R01, /**/ R02, R10, R11, /**/ R12, /* ************** */ /* ****************** */ &RBL, /**/ &RBR, R20, R21, /**/ R22, FLA_TL ); FLA_Cont_with_1x3_to_1x2( &CL, /**/ &CR, C0, C1, /**/ C2, FLA_LEFT ); FLA_Cont_with_1x3_to_1x2( &DL, /**/ &DR, D0, D1, /**/ D2, FLA_LEFT ); FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ T2, FLA_LEFT ); FLA_Cont_with_3x3_to_2x2( &WTL, /**/ &WTR, W00, W01, /**/ W02, W10, W11, /**/ W12, /* ************** */ /* ****************** */ &WBL, /**/ &WBR, W20, W21, /**/ W22, FLA_TL ); } return FLA_SUCCESS; }
References FLA_Check_error_level(), FLA_UDdate_UT_inc_blk_var1(), FLA_UDdate_UT_inc_check(), FLASH_Queue_begin(), and FLASH_Queue_end().
{ FLA_Error r_val; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_UDdate_UT_inc_check( R, C, D, T, W ); // Begin a parallel region. FLASH_Queue_begin(); // Invoke FLA_QR_UT_inc_blk_var1() with the standard control tree. r_val = FLA_UDdate_UT_inc_blk_var1( R, C, D, T, W, flash_uddateutinc_cntl ); // End the parallel region. FLASH_Queue_end(); return r_val; }
FLA_Error FLASH_UDdate_UT_inc_create_hier_matrices | ( | FLA_Obj | R_flat, |
FLA_Obj | C_flat, | ||
FLA_Obj | D_flat, | ||
dim_t | depth, | ||
dim_t * | b_flash, | ||
dim_t | b_alg, | ||
FLA_Obj * | R, | ||
FLA_Obj * | C, | ||
FLA_Obj * | D, | ||
FLA_Obj * | T, | ||
FLA_Obj * | W | ||
) |
References FLA_Abort(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_width(), FLA_Print_message(), FLASH_Obj_create_ext(), FLASH_Obj_create_hier_copy_of_flat(), and FLASH_UDdate_UT_inc_determine_alg_blocksize().
{ FLA_Datatype datatype; dim_t m_T, n_T; dim_t m_W, n_W; dim_t m_C; dim_t m_D; // *** The current UDdate_UT_inc algorithm implemented assumes that // the matrix has a hierarchical depth of 1. We check for that here // because we anticipate that we'll use a more general algorithm in the // future, and we don't want to forget to remove the constraint. *** if ( depth != 1 ) { FLA_Print_message( "FLASH_UDdate_UT_inc() currently only supports matrices of depth 1", __FILE__, __LINE__ ); FLA_Abort(); } // Create hierarchical copy of matrices R_flat, C_flat, and D_flat. FLASH_Obj_create_hier_copy_of_flat( R_flat, depth, b_flash, R ); FLASH_Obj_create_hier_copy_of_flat( C_flat, depth, b_flash, C ); FLASH_Obj_create_hier_copy_of_flat( D_flat, depth, b_flash, D ); // Query the datatype of matrix R_flat. datatype = FLA_Obj_datatype( R_flat ); // If the user passed in zero for b_alg, then we need to set the // algorithmic (inner) blocksize to a reasonable default value. if ( b_alg == 0 ) { b_alg = FLASH_UDdate_UT_inc_determine_alg_blocksize( *R ); } // Determine the element (not scalar) dimensions of the new hierarchical // matrix T. By using the element dimensions, we will probably allocate // more storage than we actually need (at the bottom and right edge cases) // but this is simpler than computing the exact amount and the excess // storage is usually small in practice. n_T = FLA_Obj_width( *R ); m_C = FLA_Obj_length( *C ); m_D = FLA_Obj_length( *D ); m_T = max( m_C, m_D ); // Create hierarchical matrix T, with element dimensions conformal to the // the larger of C and D, where each block is b_alg-by-b_flash. FLASH_Obj_create_ext( datatype, m_T * b_alg, n_T * b_flash[0], depth, &b_alg, b_flash, T ); // Determine the element (not scalar) dimensions of the new hierarchical // matrix W. The element length and width will be identical to that of R. // Once again, we will probably allocate excess storage, but we consider // this to be small. m_W = FLA_Obj_length( *R ); n_W = FLA_Obj_width( *R ); // Create hierarchical matrix W, with element dimensions conformal to R, // where each block is b_alg-by-b_flash. FLASH_Obj_create_ext( datatype, m_W * b_alg, n_W * b_flash[0], depth, &b_alg, b_flash, W ); return FLA_SUCCESS; }
References FLA_Obj_length().
Referenced by FLASH_UDdate_UT_inc_create_hier_matrices().
{ dim_t b_alg; dim_t b_flash; // Acquire the storage blocksize. b_flash = FLA_Obj_length( *FLASH_OBJ_PTR_AT( R ) ); // Scale the storage blocksize by a pre-defined scalar to arrive at a // reasonable algorithmic blocksize, but make sure it's at least 1. b_alg = ( dim_t ) max( ( double ) b_flash * FLA_UDDATE_INNER_TO_OUTER_B_RATIO, 1 ); return b_alg; }
References FLA_Check_error_level(), FLA_ONE, FLA_UDdate_UT_inc_solve_check(), FLASH_Copy(), and FLASH_Trsm().
{ // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_UDdate_UT_inc_solve_check( R, bR, x ); // Copy the contents of bR to x so that after the triangular solve, the // solution resides in x (and bR is preserved). FLASH_Copy( bR, x ); // Perform a triangular solve with R the right-hand side. FLASH_Trsm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_ONE, R, x ); return FLA_SUCCESS; }
FLA_Error FLASH_UDdate_UT_inc_update_rhs | ( | FLA_Obj | T, |
FLA_Obj | bR, | ||
FLA_Obj | C, | ||
FLA_Obj | bC, | ||
FLA_Obj | D, | ||
FLA_Obj | bD | ||
) |
References FLA_Check_error_level(), FLA_UDdate_UT_inc_update_rhs_check(), FLASH_Apply_QUD_UT_inc(), FLASH_Apply_QUD_UT_inc_create_workspace(), FLASH_Obj_create_copy_of(), and FLASH_Obj_free().
{ FLA_Obj W; FLA_Obj bC_copy; FLA_Obj bD_copy; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_UDdate_UT_inc_update_rhs_check( T, bR, C, bC, D, bD ); // Create hierarchical workspace. FLASH_Apply_QUD_UT_inc_create_workspace( T, bR, &W ); // Make temporary copies of the bC and bD right-hand side objects so we // don't destory their original contents. FLASH_Obj_create_copy_of( FLA_NO_TRANSPOSE, bC, &bC_copy ); FLASH_Obj_create_copy_of( FLA_NO_TRANSPOSE, bD, &bD_copy ); // Apply the updowndating Q' incrementally to the right-hand sides. FLASH_Apply_QUD_UT_inc( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE, T, W, bR, C, bC_copy, D, bD_copy ); // Free the temporary objects. FLASH_Obj_free( &bC_copy ); FLASH_Obj_free( &bD_copy ); // Free the workspace object. FLASH_Obj_free( &W ); return FLA_SUCCESS; }