libflame revision_anchor
Functions
FLA_UDdate_UT_inc.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLASH_UDdate_UT_inc (FLA_Obj R, FLA_Obj C, FLA_Obj D, FLA_Obj T, FLA_Obj W)
FLA_Error FLA_UDdate_UT_inc_blk_var1 (FLA_Obj R, FLA_Obj C, FLA_Obj D, FLA_Obj T, FLA_Obj W, fla_uddateutinc_t *cntl)
FLA_Error FLASH_UDdate_UT_inc_create_hier_matrices (FLA_Obj R_flat, FLA_Obj C_flat, FLA_Obj D_flat, dim_t depth, dim_t *b_flash, dim_t b_alg, FLA_Obj *R, FLA_Obj *C, FLA_Obj *D, FLA_Obj *T, FLA_Obj *W)
dim_t FLASH_UDdate_UT_inc_determine_alg_blocksize (FLA_Obj R)
FLA_Error FLASH_UDdate_UT_inc_update_rhs (FLA_Obj T, FLA_Obj bR, FLA_Obj C, FLA_Obj bC, FLA_Obj D, FLA_Obj bD)
FLA_Error FLASH_UDdate_UT_inc_solve (FLA_Obj R, FLA_Obj bR, FLA_Obj x)

Function Documentation

FLA_Error FLA_UDdate_UT_inc_blk_var1 ( FLA_Obj  R,
FLA_Obj  C,
FLA_Obj  D,
FLA_Obj  T,
FLA_Obj  W,
fla_uddateutinc_t cntl 
)

References FLA_Apply_QUD_UT_internal(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Obj_min_dim(), FLA_Obj_width(), FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x2_to_3x3(), and FLA_UDdate_UT_internal().

Referenced by FLASH_UDdate_UT_inc().

{
  FLA_Obj RTL,   RTR,      R00, R01, R02, 
          RBL,   RBR,      R10, R11, R12,
                           R20, R21, R22;

  FLA_Obj CL,    CR,       C0,  C1,  C2;

  FLA_Obj DL,    DR,       D0,  D1,  D2;

  FLA_Obj TL,    TR,       T0,  T1,  T2;

  FLA_Obj WTL,   WTR,      W00, W01, W02, 
          WBL,   WBR,      W10, W11, W12,
                           W20, W21, W22;

  dim_t b;

  FLA_Part_2x2( R,    &RTL, &RTR,
                      &RBL, &RBR,     0, 0, FLA_TL );

  FLA_Part_1x2( C,    &CL,  &CR,      0, FLA_LEFT );

  FLA_Part_1x2( D,    &DL,  &DR,      0, FLA_LEFT );

  FLA_Part_1x2( T,    &TL,  &TR,      0, FLA_LEFT );

  FLA_Part_2x2( W,    &WTL, &WTR,
                      &WBL, &WBR,     0, 0, FLA_TL );

  while ( FLA_Obj_min_dim( RBR ) > 0 ){

    b = FLA_Determine_blocksize( RBR, FLA_BR, FLA_Cntl_blocksize( cntl ) );

    FLA_Repart_2x2_to_3x3( RTL, /**/ RTR,       &R00, /**/ &R01, &R02,
                        /* ************* */   /* ******************** */
                                                &R10, /**/ &R11, &R12,
                           RBL, /**/ RBR,       &R20, /**/ &R21, &R22,
                           b, b, FLA_BR );

    FLA_Repart_1x2_to_1x3( CL,  /**/ CR,        &C0, /**/ &C1, &C2,
                           b, FLA_RIGHT );

    FLA_Repart_1x2_to_1x3( DL,  /**/ DR,        &D0, /**/ &D1, &D2,
                           b, FLA_RIGHT );

    FLA_Repart_1x2_to_1x3( TL,  /**/ TR,        &T0, /**/ &T1, &T2,
                           b, FLA_RIGHT );

    FLA_Repart_2x2_to_3x3( WTL, /**/ WTR,       &W00, /**/ &W01, &W02,
                        /* ************* */   /* ******************** */
                                                &W10, /**/ &W11, &W12,
                           WBL, /**/ WBR,       &W20, /**/ &W21, &W22,
                           b, b, FLA_BR );

    /*------------------------------------------------------------*/

    /*
       Perform an up/downdate of the upper triangular factor R11 via 
       up/downdating UT Householder transformations:

         [ R11, ...
           C1,  ...
           D1, T1 ] = FLA_UDdate_UT( R11, ...
                                     C1, ...
                                     D1, T1 );

       by updating R11 in such a way that removes the contributions of
       the rows in D1 while simultaneously adding new contributions to
       the factorization from the rows of C1. Note that C1 and D1 are
       also updated in the process.
    */

    FLA_UDdate_UT_internal( R11,
                            C1,
                            D1, T1,
                            FLA_Cntl_sub_uddateut( cntl ) );



    if ( FLA_Obj_width( R12 ) > 0 )
    {
      /*
           Apply Q' to R12, C2, and D2 from the left:
  
             / R12 \          / R12 \
             | C2  |  =  Q' * | C2  |
             \ D2  /          \ D2  /
  
           where Q is formed from C1, D1, and T1.
      */

      FLA_Apply_QUD_UT_internal( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE,
                                 T1, W12,
                                     R12,
                                 C1, C2,
                                 D1, D2, FLA_Cntl_sub_apqudut( cntl ) );
    }


    /*------------------------------------------------------------*/

    FLA_Cont_with_3x3_to_2x2( &RTL, /**/ &RTR,       R00, R01, /**/ R02,
                                                     R10, R11, /**/ R12,
                            /* ************** */  /* ****************** */
                              &RBL, /**/ &RBR,       R20, R21, /**/ R22,
                              FLA_TL );

    FLA_Cont_with_1x3_to_1x2( &CL,  /**/ &CR,        C0, C1, /**/ C2,
                              FLA_LEFT );

    FLA_Cont_with_1x3_to_1x2( &DL,  /**/ &DR,        D0, D1, /**/ D2,
                              FLA_LEFT );

    FLA_Cont_with_1x3_to_1x2( &TL,  /**/ &TR,        T0, T1, /**/ T2,
                              FLA_LEFT );

    FLA_Cont_with_3x3_to_2x2( &WTL, /**/ &WTR,       W00, W01, /**/ W02,
                                                     W10, W11, /**/ W12,
                            /* ************** */  /* ****************** */
                              &WBL, /**/ &WBR,       W20, W21, /**/ W22,
                              FLA_TL );
  }

  return FLA_SUCCESS;
}
FLA_Error FLASH_UDdate_UT_inc ( FLA_Obj  R,
FLA_Obj  C,
FLA_Obj  D,
FLA_Obj  T,
FLA_Obj  W 
)

References FLA_Check_error_level(), FLA_UDdate_UT_inc_blk_var1(), FLA_UDdate_UT_inc_check(), FLASH_Queue_begin(), and FLASH_Queue_end().

{
  FLA_Error r_val;

  // Check parameters.
  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
    FLA_UDdate_UT_inc_check( R, C, D, T, W );

  // Begin a parallel region.
  FLASH_Queue_begin();

  // Invoke FLA_QR_UT_inc_blk_var1() with the standard control tree.
  r_val = FLA_UDdate_UT_inc_blk_var1( R, C, D, T, W, flash_uddateutinc_cntl );

  // End the parallel region.
  FLASH_Queue_end();

  return r_val;
}
FLA_Error FLASH_UDdate_UT_inc_create_hier_matrices ( FLA_Obj  R_flat,
FLA_Obj  C_flat,
FLA_Obj  D_flat,
dim_t  depth,
dim_t b_flash,
dim_t  b_alg,
FLA_Obj R,
FLA_Obj C,
FLA_Obj D,
FLA_Obj T,
FLA_Obj W 
)

References FLA_Abort(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_width(), FLA_Print_message(), FLASH_Obj_create_ext(), FLASH_Obj_create_hier_copy_of_flat(), and FLASH_UDdate_UT_inc_determine_alg_blocksize().

{
    FLA_Datatype datatype;
    dim_t        m_T, n_T;
    dim_t        m_W, n_W;
    dim_t        m_C;
    dim_t        m_D;
    
    // *** The current UDdate_UT_inc algorithm implemented assumes that
    // the matrix has a hierarchical depth of 1. We check for that here
    // because we anticipate that we'll use a more general algorithm in the
    // future, and we don't want to forget to remove the constraint. ***
    if ( depth != 1 )
    {
       FLA_Print_message( "FLASH_UDdate_UT_inc() currently only supports matrices of depth 1",
                          __FILE__, __LINE__ );
       FLA_Abort();
    }

    // Create hierarchical copy of matrices R_flat, C_flat, and D_flat.
    FLASH_Obj_create_hier_copy_of_flat( R_flat, depth, b_flash, R );
    FLASH_Obj_create_hier_copy_of_flat( C_flat, depth, b_flash, C );
    FLASH_Obj_create_hier_copy_of_flat( D_flat, depth, b_flash, D );

    // Query the datatype of matrix R_flat.
    datatype = FLA_Obj_datatype( R_flat );
    
    // If the user passed in zero for b_alg, then we need to set the
    // algorithmic (inner) blocksize to a reasonable default value.
    if ( b_alg == 0 )
    {
        b_alg = FLASH_UDdate_UT_inc_determine_alg_blocksize( *R );
    }

    // Determine the element (not scalar) dimensions of the new hierarchical
    // matrix T. By using the element dimensions, we will probably allocate
    // more storage than we actually need (at the bottom and right edge cases)
    // but this is simpler than computing the exact amount and the excess
    // storage is usually small in practice.
    n_T = FLA_Obj_width( *R );
    m_C = FLA_Obj_length( *C );
    m_D = FLA_Obj_length( *D );
    m_T = max( m_C, m_D );

    // Create hierarchical matrix T, with element dimensions conformal to the
    // the larger of C and D, where each block is b_alg-by-b_flash.
    FLASH_Obj_create_ext( datatype, m_T * b_alg, n_T * b_flash[0], 
                          depth, &b_alg, b_flash, 
                          T );

    // Determine the element (not scalar) dimensions of the new hierarchical
    // matrix W. The element length and width will be identical to that of R.
    // Once again, we will probably allocate excess storage, but we consider
    // this to be small.
    m_W = FLA_Obj_length( *R );
    n_W = FLA_Obj_width( *R );
       
    // Create hierarchical matrix W, with element dimensions conformal to R,
    // where each block is b_alg-by-b_flash.
    FLASH_Obj_create_ext( datatype, m_W * b_alg, n_W * b_flash[0], 
                          depth, &b_alg, b_flash, 
                          W );

    return FLA_SUCCESS;
}
dim_t FLASH_UDdate_UT_inc_determine_alg_blocksize ( FLA_Obj  R)

References FLA_Obj_length().

Referenced by FLASH_UDdate_UT_inc_create_hier_matrices().

{
    dim_t b_alg;
    dim_t b_flash;

    // Acquire the storage blocksize.
    b_flash = FLA_Obj_length( *FLASH_OBJ_PTR_AT( R ) );

    // Scale the storage blocksize by a pre-defined scalar to arrive at a
    // reasonable algorithmic blocksize, but make sure it's at least 1.
    b_alg = ( dim_t ) max( ( double ) b_flash * FLA_UDDATE_INNER_TO_OUTER_B_RATIO, 1 );

    return b_alg;
}
FLA_Error FLASH_UDdate_UT_inc_solve ( FLA_Obj  R,
FLA_Obj  bR,
FLA_Obj  x 
)

References FLA_Check_error_level(), FLA_ONE, FLA_UDdate_UT_inc_solve_check(), FLASH_Copy(), and FLASH_Trsm().

{
    // Check parameters.
    if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
        FLA_UDdate_UT_inc_solve_check( R, bR, x );

    // Copy the contents of bR to x so that after the triangular solve, the
    // solution resides in x (and bR is preserved).
    FLASH_Copy( bR, x );
    
    // Perform a triangular solve with R the right-hand side.
    FLASH_Trsm( FLA_LEFT, FLA_UPPER_TRIANGULAR,
                FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
                FLA_ONE, R, x );

    return FLA_SUCCESS;
}
FLA_Error FLASH_UDdate_UT_inc_update_rhs ( FLA_Obj  T,
FLA_Obj  bR,
FLA_Obj  C,
FLA_Obj  bC,
FLA_Obj  D,
FLA_Obj  bD 
)

References FLA_Check_error_level(), FLA_UDdate_UT_inc_update_rhs_check(), FLASH_Apply_QUD_UT_inc(), FLASH_Apply_QUD_UT_inc_create_workspace(), FLASH_Obj_create_copy_of(), and FLASH_Obj_free().

{
    FLA_Obj W;
    FLA_Obj bC_copy;
    FLA_Obj bD_copy;

    // Check parameters.
    if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
        FLA_UDdate_UT_inc_update_rhs_check( T, bR, C, bC, D, bD );

    // Create hierarchical workspace.
    FLASH_Apply_QUD_UT_inc_create_workspace( T, bR, &W );
    
    // Make temporary copies of the bC and bD right-hand side objects so we
    // don't destory their original contents.
    FLASH_Obj_create_copy_of( FLA_NO_TRANSPOSE, bC, &bC_copy );
    FLASH_Obj_create_copy_of( FLA_NO_TRANSPOSE, bD, &bD_copy );

    // Apply the updowndating Q' incrementally to the right-hand sides.
    FLASH_Apply_QUD_UT_inc( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE,
                            T, W,
                               bR,
                            C, bC_copy,
                            D, bD_copy );

    // Free the temporary objects.
    FLASH_Obj_free( &bC_copy );
    FLASH_Obj_free( &bD_copy );

    // Free the workspace object.
    FLASH_Obj_free( &W );

    return FLA_SUCCESS;
}