libflame
revision_anchor
|
Go to the source code of this file.
Functions | |
FLA_Error | FLASH_CAQR_UT_inc (dim_t p, FLA_Obj A, FLA_Obj ATW, FLA_Obj R, FLA_Obj RTW) |
FLA_Error | FLASH_CAQR_UT_inc_noopt (dim_t p, FLA_Obj A, FLA_Obj ATW, FLA_Obj R, FLA_Obj RTW) |
FLA_Error | FLASH_CAQR_UT_inc_create_hier_matrices (dim_t p, FLA_Obj A_flat, dim_t depth, dim_t *b_flash, dim_t b_alg, FLA_Obj *A, FLA_Obj *ATW, FLA_Obj *R, FLA_Obj *RTW) |
dim_t | FLASH_CAQR_UT_inc_determine_alg_blocksize (FLA_Obj A) |
FLA_Error | FLASH_CAQR_UT_inc_adjust_views (FLA_Obj A, FLA_Obj TW) |
void | FLA_CAQR_UT_inc_init_structure (dim_t p, dim_t nb_part, FLA_Obj R) |
dim_t | FLA_CAQR_UT_inc_compute_blocks_per_part (dim_t p, FLA_Obj A) |
FLA_Error | FLA_CAQR_UT_inc_factorize_panels (dim_t nb_part, FLA_Obj A, FLA_Obj ATW) |
FLA_Error | FLA_CAQR_UT_inc_copy_triangles (dim_t nb_part, FLA_Obj A, FLA_Obj R) |
FLA_Error | FLA_CAQR_UT_inc_blk_var1 (FLA_Obj R, FLA_Obj TW, fla_caqrutinc_t *cntl) |
FLA_Error | FLASH_CAQR_UT_inc_solve (dim_t p, FLA_Obj A, FLA_Obj ATW, FLA_Obj R, FLA_Obj RTW, FLA_Obj B, FLA_Obj X) |
FLA_Error FLA_CAQR_UT_inc_blk_var1 | ( | FLA_Obj | R, |
FLA_Obj | TW, | ||
fla_caqrutinc_t * | cntl | ||
) |
References FLA_Apply_CAQ2_UT_internal(), FLA_CAQR2_UT_internal(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Obj_min_dim(), FLA_Obj_width(), FLA_Part_2x2(), and FLA_Repart_2x2_to_3x3().
Referenced by FLASH_CAQR_UT_inc_noopt().
{ FLA_Obj ATL, ATR, A00, A01, A02, ABL, ABR, A10, A11, A12, A20, A21, A22; FLA_Obj TTL, WTR, T00, W01, W02, TBL, TBR, T10, T11, W12, T20, T21, T22; dim_t b; FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_TL ); FLA_Part_2x2( TW, &TTL, &WTR, &TBL, &TBR, 0, 0, FLA_TL ); while ( FLA_Obj_min_dim( ABR ) > 0 ){ b = FLA_Determine_blocksize( ABR, FLA_BR, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02, /* ************* */ /* ******************** */ &A10, /**/ &A11, &A12, ABL, /**/ ABR, &A20, /**/ &A21, &A22, b, b, FLA_BR ); FLA_Repart_2x2_to_3x3( TTL, /**/ WTR, &T00, /**/ &W01, &W02, /* ************* */ /* ******************** */ &T10, /**/ &T11, &W12, TBL, /**/ TBR, &T20, /**/ &T21, &T22, b, b, FLA_BR ); /*------------------------------------------------------------*/ FLA_CAQR2_UT_internal( A11, A21, T21, FLA_Cntl_sub_caqr2ut( cntl ) ); if ( FLA_Obj_width( A12 ) > 0 ) { FLA_Apply_CAQ2_UT_internal( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE, A21, T21, W12, A12, A22, FLA_Cntl_sub_apcaq2ut( cntl ) ); } /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02, A10, A11, /**/ A12, /* ************** */ /* ****************** */ &ABL, /**/ &ABR, A20, A21, /**/ A22, FLA_TL ); FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &WTR, T00, W01, /**/ W02, T10, T11, /**/ W12, /* ************** */ /* ****************** */ &TBL, /**/ &TBR, T20, T21, /**/ T22, FLA_TL ); } return FLA_SUCCESS; }
References FLA_Obj_length().
Referenced by FLASH_Apply_CAQ_UT_inc(), FLASH_CAQR_UT_inc_create_hier_matrices(), and FLASH_CAQR_UT_inc_noopt().
{ dim_t nb_part; dim_t nb_left; dim_t num_blocks; // Query the element (not scalar) length of A. num_blocks = FLA_Obj_length( A ); // Compute the number of blocks per partitions. nb_part = num_blocks / p; nb_left = num_blocks % p; // If there are leftover blocks, increase nb_part by one. if ( nb_left > 0 ) nb_part += 1; return nb_part; }
FLA_Error FLA_CAQR_UT_inc_copy_triangles | ( | dim_t | nb_part, |
FLA_Obj | A, | ||
FLA_Obj | R | ||
) |
References FLA_Cont_with_3x1_to_2x1(), FLA_Obj_length(), FLA_Part_2x1(), FLA_Repart_2x1_to_3x1(), and FLASH_Copyr().
Referenced by FLASH_CAQR_UT_inc_noopt().
{ FLA_Obj AT, A0, AB, A1, A2; FLA_Obj RT, R0, RB, R1, R2; dim_t b; FLA_Part_2x1( A, &AT, &AB, 0, FLA_TOP ); FLA_Part_2x1( R, &RT, &RB, 0, FLA_TOP ); while ( FLA_Obj_length( AB ) > 0 ){ b = min( nb_part, FLA_Obj_length( AB ) ); FLA_Repart_2x1_to_3x1( AT, &A0, /* ** */ /* ** */ &A1, AB, &A2, b, FLA_BOTTOM ); FLA_Repart_2x1_to_3x1( RT, &R0, /* ** */ /* ** */ &R1, RB, &R2, b, FLA_BOTTOM ); /*------------------------------------------------------------*/ // Copy the individual upper triangles in A into R. FLASH_Copyr( FLA_UPPER_TRIANGULAR, A1, R1 ); /*------------------------------------------------------------*/ FLA_Cont_with_3x1_to_2x1( &AT, A0, A1, /* ** */ /* ** */ &AB, A2, FLA_TOP ); FLA_Cont_with_3x1_to_2x1( &RT, R0, R1, /* ** */ /* ** */ &RB, R2, FLA_TOP ); } return FLA_SUCCESS; }
FLA_Error FLA_CAQR_UT_inc_factorize_panels | ( | dim_t | nb_part, |
FLA_Obj | A, | ||
FLA_Obj | ATW | ||
) |
References FLA_Cont_with_3x1_to_2x1(), FLA_Obj_length(), FLA_Part_2x1(), FLA_Repart_2x1_to_3x1(), and FLASH_QR_UT_inc().
Referenced by FLASH_CAQR_UT_inc_noopt().
{ FLA_Obj AT, A0, AB, A1, A2; FLA_Obj TWT, TW0, TWB, TW1, TW2; dim_t b; FLA_Part_2x1( A, &AT, &AB, 0, FLA_TOP ); FLA_Part_2x1( TW, &TWT, &TWB, 0, FLA_TOP ); while ( FLA_Obj_length( AB ) > 0 ){ b = min( nb_part, FLA_Obj_length( AB ) ); FLA_Repart_2x1_to_3x1( AT, &A0, /* ** */ /* ** */ &A1, AB, &A2, b, FLA_BOTTOM ); FLA_Repart_2x1_to_3x1( TWT, &TW0, /* ** */ /* ** */ &TW1, TWB, &TW2, b, FLA_BOTTOM ); /*------------------------------------------------------------*/ // Perform an incremental QR factorization on A1, writing triangular // block Householder factors to T in TW1. FLASH_QR_UT_inc( A1, TW1 ); /*------------------------------------------------------------*/ FLA_Cont_with_3x1_to_2x1( &AT, A0, A1, /* ** */ /* ** */ &AB, A2, FLA_TOP ); FLA_Cont_with_3x1_to_2x1( &TWT, TW0, TW1, /* ** */ /* ** */ &TWB, TW2, FLA_TOP ); } return FLA_SUCCESS; }
void FLA_CAQR_UT_inc_init_structure | ( | dim_t | p, |
dim_t | nb_part, | ||
FLA_Obj | R | ||
) |
References FLA_Obj_view::base, FLA_Obj_buffer_at_view(), FLA_Obj_col_stride(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), and FLA_Obj_struct::uplo.
Referenced by FLASH_CAQR_UT_inc_create_hier_matrices().
{ dim_t m, n; dim_t rs, cs; dim_t i, j, ip; FLA_Obj* buff_R; m = FLA_Obj_length( R ); n = FLA_Obj_width( R ); rs = FLA_Obj_row_stride( R ); cs = FLA_Obj_col_stride( R ); buff_R = FLA_Obj_buffer_at_view( R ); // Fill in R by row panels. for ( ip = 0; ip < p; ++ip ) { FLA_Obj* buff_R1 = buff_R + (ip*nb_part)*rs; int m_behind = ip*nb_part; int m_ahead = m - m_behind; int m_cur = min( nb_part, m_ahead ); int n_cur = n; // Iterate across columns for the current panel. for ( j = 0; j < n_cur; ++j ) { FLA_Obj* rho = buff_R1 + j*cs; // Mark the above-diagonal blocks as full. for ( i = 0; i < j; ++i ) { rho->base->uplo = FLA_FULL_MATRIX; rho += rs; } // Mark the diagonal block as triangular. rho->base->uplo = FLA_UPPER_TRIANGULAR; rho += rs; // Mark the below-diagonal blocks as zero. for ( i = j + 1; i < m_cur; ++i ) { rho->base->uplo = FLA_ZERO_MATRIX; rho += rs; } } } }
References FLASH_CAQR_UT_inc_noopt().
{ FLA_Error r_val; //if ( FLASH_Queue_stack_depth() == 0 ) // r_val = FLASH_CAQR_UT_inc_opt1( A, ATW, R, RTW ); //else r_val = FLASH_CAQR_UT_inc_noopt( p, A, ATW, R, RTW ); return r_val; }
References FLA_Cont_with_3x1_to_2x1(), FLA_Obj_length(), FLA_Obj_width(), FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLASH_Obj_scalar_width(), FLASH_Obj_scalar_width_tl(), FLA_Obj_view::m, FLA_Obj_view::m_inner, FLA_Obj_view::n, and FLA_Obj_view::n_inner.
Referenced by FLASH_CAQR_UT_inc_create_hier_matrices().
{ dim_t b_flash; dim_t n, n_last; // We can query b_flash as the width of the top-left element of TW. b_flash = FLASH_Obj_scalar_width_tl( TW ); // Query the element (not scalar) n dimension of A. n = FLA_Obj_width( A ); // If the bottom-right-most block along the diagonal is a partial block, // adjust the view of the corresponding T block. n_last = FLASH_Obj_scalar_width( A ) % b_flash; if ( n_last > 0 ) { FLA_Obj TWTL, TWTR, TWBL, TWBR; FLA_Obj TWL, TWR; FLA_Obj TWT, TW0, TWB, TW1, TW2; FLA_Obj* TW1p; FLA_Part_2x2( TW, &TWTL, &TWTR, &TWBL, &TWBR, n-1, n-1, FLA_TL ); FLA_Part_2x1( TWBR, &TWT, &TWB, 0, FLA_TOP ); while ( FLA_Obj_length( TWB ) > 0 ) { FLA_Repart_2x1_to_3x1( TWT, &TW0, /* *** */ /* *** */ &TW1, TWB, &TW2, 1, FLA_BOTTOM ); // ----------------------------------------------------------- TW1p = FLASH_OBJ_PTR_AT( TW1 ); FLA_Part_1x2( *TW1p, &TWL, &TWR, n_last, FLA_LEFT ); *TW1p = TWL; TW1p->m_inner = TW1p->m; TW1p->n_inner = TW1p->n; // ----------------------------------------------------------- FLA_Cont_with_3x1_to_2x1( &TWT, TW0, TW1, /* *** */ /* *** */ &TWB, TW2, FLA_TOP ); } } return FLA_SUCCESS; }
FLA_Error FLASH_CAQR_UT_inc_create_hier_matrices | ( | dim_t | p, |
FLA_Obj | A_flat, | ||
dim_t | depth, | ||
dim_t * | b_flash, | ||
dim_t | b_alg, | ||
FLA_Obj * | A, | ||
FLA_Obj * | ATW, | ||
FLA_Obj * | R, | ||
FLA_Obj * | RTW | ||
) |
References FLA_Abort(), FLA_CAQR_UT_inc_compute_blocks_per_part(), FLA_CAQR_UT_inc_init_structure(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_width(), FLA_Print_message(), FLASH_CAQR_UT_inc_adjust_views(), FLASH_CAQR_UT_inc_determine_alg_blocksize(), FLASH_Obj_create_conf_to(), FLASH_Obj_create_ext(), and FLASH_Obj_create_hier_copy_of_flat().
{ FLA_Datatype datatype; dim_t m, n; dim_t nb_part; // *** The current CAQR_UT_inc algorithm implemented assumes that // the matrix has a hierarchical depth of 1. if ( depth != 1 ) { FLA_Print_message( "FLASH_CAQR_UT_inc() currently only supports matrices of depth 1", __FILE__, __LINE__ ); FLA_Abort(); } // Create hierarchical copy of matrix A_flat. FLASH_Obj_create_hier_copy_of_flat( A_flat, depth, b_flash, A ); // Create hierarchical copy of matrix A_flat. FLASH_Obj_create_conf_to( FLA_NO_TRANSPOSE, *A, R ); // Query the datatype of matrix A_flat. datatype = FLA_Obj_datatype( A_flat ); // If the user passed in zero for b_alg, then we need to set the // algorithmic (inner) blocksize to a reasonable default value. if ( b_alg == 0 ) { b_alg = FLASH_CAQR_UT_inc_determine_alg_blocksize( *A ); } // Query the element (not scalar) dimensions of the new hierarchical // matrix. This is done so we can create T with full blocks for the // bottom and right "edge cases" of A. m = FLA_Obj_length( *A ); n = FLA_Obj_width( *A ); // Create hierarchical matrices T and W for both A and R. T is lower // triangular where each block is b_alg-by-b_flash and W is strictly // upper triangular where each block is b_alg-by-b_flash. So we can // create them simultaneously as part of the same hierarchical matrix. FLASH_Obj_create_ext( datatype, m * b_alg, n * b_flash[0], depth, &b_alg, b_flash, ATW ); FLASH_Obj_create_ext( datatype, m * b_alg, n * b_flash[0], depth, &b_alg, b_flash, RTW ); // If the bottom-right-most block along the diagonal is a partial block, // adjust the view of the corresponding T block. FLASH_CAQR_UT_inc_adjust_views( *A, *ATW ); FLASH_CAQR_UT_inc_adjust_views( *A, *RTW ); // Compute the partition length from the number of partitions. nb_part = FLA_CAQR_UT_inc_compute_blocks_per_part( p, *A ); // Encode block structure (upper tri, full, or zero) into blocks of R. FLA_CAQR_UT_inc_init_structure( p, nb_part, *R ); return FLA_SUCCESS; }
References FLA_Obj_length().
Referenced by FLASH_CAQR_UT_inc_create_hier_matrices().
{ dim_t b_alg; dim_t b_flash; // Acquire the storage blocksize. b_flash = FLA_Obj_length( *FLASH_OBJ_PTR_AT( A ) ); // Scale the storage blocksize by a pre-defined scalar to arrive at a // reasonable algorithmic blocksize, but make sure it's at least 1. b_alg = ( dim_t ) max( ( double ) b_flash * FLA_CAQR_INNER_TO_OUTER_B_RATIO, 1 ); return b_alg; }
References FLA_CAQR_UT_inc_blk_var1(), FLA_CAQR_UT_inc_check(), FLA_CAQR_UT_inc_compute_blocks_per_part(), FLA_CAQR_UT_inc_copy_triangles(), FLA_CAQR_UT_inc_factorize_panels(), FLA_Check_error_level(), FLASH_Queue_begin(), and FLASH_Queue_end().
Referenced by FLASH_CAQR_UT_inc().
{ FLA_Error r_val = FLA_SUCCESS; dim_t nb_part; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_CAQR_UT_inc_check( p, A, ATW, R, RTW ); // Compute the partition length from the number of partitions. nb_part = FLA_CAQR_UT_inc_compute_blocks_per_part( p, A ); // Begin a parallel region. FLASH_Queue_begin(); // Perform incremental QR's on each of the p partitions. FLA_CAQR_UT_inc_factorize_panels( nb_part, A, ATW ); // Copy the triangles of A into R. FLA_CAQR_UT_inc_copy_triangles( nb_part, A, R ); // Perform an incremental CAQR on the resulting upper triangular R's in A. FLA_CAQR_UT_inc_blk_var1( R, RTW, flash_caqrutinc_cntl ); // End the parallel region. FLASH_Queue_end(); return r_val; }
FLA_Error FLASH_CAQR_UT_inc_solve | ( | dim_t | p, |
FLA_Obj | A, | ||
FLA_Obj | ATW, | ||
FLA_Obj | R, | ||
FLA_Obj | RTW, | ||
FLA_Obj | B, | ||
FLA_Obj | X | ||
) |
References FLA_CAQR_UT_inc_solve_check(), FLA_Check_error_level(), FLA_ONE, FLASH_Apply_CAQ_UT_inc(), FLASH_Apply_CAQ_UT_inc_create_workspace(), FLASH_Copy(), FLASH_Obj_create_copy_of(), FLASH_Obj_free(), FLASH_Obj_scalar_width(), FLASH_Part_create_2x1(), FLASH_Part_free_2x1(), and FLASH_Trsm().
{ FLA_Obj W, Y; FLA_Obj RT, RB; FLA_Obj YT, YB; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_CAQR_UT_inc_solve_check( p, A, ATW, R, RTW, B, X ); FLASH_Apply_CAQ_UT_inc_create_workspace( p, RTW, B, &W ); FLASH_Obj_create_copy_of( FLA_NO_TRANSPOSE, B, &Y ); // Create a temporary hierarchical view of only the top n-by-n part of R in // case m > n so that RT captures the upper triangle. We do the same for Y // to ensure conformality. FLASH_Part_create_2x1( R, &RT, &RB, FLASH_Obj_scalar_width( R ), FLA_TOP ); FLASH_Part_create_2x1( Y, &YT, &YB, FLASH_Obj_scalar_width( R ), FLA_TOP ); FLASH_Apply_CAQ_UT_inc( p, FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE, A, ATW, R, RTW, W, Y ); FLASH_Trsm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_ONE, RT, YT ); FLASH_Copy( YT, X ); // Free the temporary hierarchical views. FLASH_Part_free_2x1( &RT, &RB ); FLASH_Part_free_2x1( &YT, &YB ); FLASH_Obj_free( &Y ); FLASH_Obj_free( &W ); return FLA_SUCCESS; }