libflame  revision_anchor
Functions
FLA_Apply_CAQ_UT_inc.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLASH_Apply_CAQ_UT_inc (dim_t p, FLA_Side side, FLA_Trans trans, FLA_Direct direct, FLA_Store storev, FLA_Obj A, FLA_Obj ATW, FLA_Obj R, FLA_Obj RTW, FLA_Obj W, FLA_Obj B)
FLA_Error FLA_Apply_CAQ_UT_inc_apply_panels (dim_t nb_part, FLA_Obj A, FLA_Obj ATW, FLA_Obj W, FLA_Obj B)
FLA_Error FLASH_Apply_CAQ_UT_inc_create_workspace (dim_t p, FLA_Obj TW, FLA_Obj B, FLA_Obj *W)
FLA_Error FLA_Apply_CAQ_UT_inc_internal (FLA_Side side, FLA_Trans trans, FLA_Direct direct, FLA_Store storev, FLA_Obj R, FLA_Obj TW, FLA_Obj W1, FLA_Obj B, fla_apcaqutinc_t *cntl)
FLA_Error FLA_Apply_CAQ_UT_inc_lhfc (FLA_Obj R, FLA_Obj TW, FLA_Obj W1, FLA_Obj B, fla_apcaqutinc_t *cntl)

Function Documentation

References FLA_Cont_with_3x1_to_2x1(), FLA_Obj_length(), FLA_Part_2x1(), FLA_Repart_2x1_to_3x1(), and FLASH_Apply_Q_UT_inc().

Referenced by FLASH_Apply_CAQ_UT_inc().

{
  FLA_Obj AT,              A0, 
          AB,              A1,
                           A2;

  FLA_Obj TWT,             TW0, 
          TWB,             TW1,
                           TW2;

  FLA_Obj WT,              W0,
          WB,              W1,
                           W2;

  FLA_Obj BT,              B0, 
          BB,              B1,
                           B2;

  dim_t b;

  FLA_Part_2x1( A,    &AT, 
                      &AB,            0, FLA_TOP );

  FLA_Part_2x1( TW,   &TWT, 
                      &TWB,           0, FLA_TOP );

  FLA_Part_2x1( W,    &WT, 
                      &WB,            0, FLA_TOP );

  FLA_Part_2x1( B,    &BT, 
                      &BB,            0, FLA_TOP );

  while ( FLA_Obj_length( AB ) > 0 ){

    b = min( nb_part, FLA_Obj_length( AB ) );

    FLA_Repart_2x1_to_3x1( AT,                &A0, 
                        /* ** */            /* ** */
                                              &A1, 
                           AB,                &A2,        b, FLA_BOTTOM );

    FLA_Repart_2x1_to_3x1( TWT,               &TW0, 
                        /* ** */            /* ** */
                                              &TW1, 
                           TWB,               &TW2,       b, FLA_BOTTOM );

    // NOTE: we use a blocksize of 1 for W since it has exactly nb_part
    // rows (where each row is a row panels of b_alg x b_flash blocks).
    FLA_Repart_2x1_to_3x1( WT,                &W0, 
                        /* ** */            /* ** */
                                              &W1, 
                           WB,                &W2,        1, FLA_BOTTOM );

    FLA_Repart_2x1_to_3x1( BT,                &B0, 
                        /* ** */            /* ** */
                                              &B1, 
                           BB,                &B2,        b, FLA_BOTTOM );

    /*------------------------------------------------------------*/

    // Apply incremental Q's associated with each block A1 to the
    // corresponding block of right-hand side B1.
    FLASH_Apply_Q_UT_inc( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE,
                          A1, TW1, W1, B1 );

    /*------------------------------------------------------------*/

    FLA_Cont_with_3x1_to_2x1( &AT,               A0, 
                                                 A1, 
                            /* ** */          /* ** */
                              &AB,               A2,      FLA_TOP );

    FLA_Cont_with_3x1_to_2x1( &TWT,              TW0, 
                                                 TW1, 
                            /* ** */          /* ** */
                              &TWB,              TW2,     FLA_TOP );

    FLA_Cont_with_3x1_to_2x1( &WT,                W0, 
                                                  W1, 
                            /* ** */           /* ** */
                              &WB,                W2,     FLA_TOP );

    FLA_Cont_with_3x1_to_2x1( &BT,                B0, 
                                                  B1, 
                            /* ** */           /* ** */
                              &BB,                B2,     FLA_TOP );
  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Apply_CAQ_UT_inc_internal ( FLA_Side  side,
FLA_Trans  trans,
FLA_Direct  direct,
FLA_Store  storev,
FLA_Obj  R,
FLA_Obj  TW,
FLA_Obj  W1,
FLA_Obj  B,
fla_apcaqutinc_t cntl 
)

References FLA_Apply_CAQ_UT_inc_internal_check(), FLA_Apply_CAQ_UT_inc_lhfc(), and FLA_Check_error_level().

Referenced by FLASH_Apply_CAQ_UT_inc().

{
    FLA_Error r_val = FLA_SUCCESS;

    if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
        FLA_Apply_CAQ_UT_inc_internal_check( side, trans, direct, storev, R, TW, W, B, cntl );

    if      ( side == FLA_LEFT )
    {
        if      ( trans == FLA_NO_TRANSPOSE )
        {
            if      ( direct == FLA_FORWARD )
            {
                if      ( storev == FLA_COLUMNWISE )
                    FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
                else if ( storev == FLA_ROWWISE )
                    FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
            }
            else if ( direct == FLA_BACKWARD )
            {
                if      ( storev == FLA_COLUMNWISE )
                    FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
                else if ( storev == FLA_ROWWISE )
                    FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
            }
        }
        else if ( trans == FLA_TRANSPOSE || trans == FLA_CONJ_TRANSPOSE )
        {
            if      ( direct == FLA_FORWARD )
            {
                if      ( storev == FLA_COLUMNWISE )
                    r_val = FLA_Apply_CAQ_UT_inc_lhfc( R, TW, W, B, cntl );
                else if ( storev == FLA_ROWWISE )
                    FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
            }
            else if ( direct == FLA_BACKWARD )
            {
                if      ( storev == FLA_COLUMNWISE )
                    FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
                else if ( storev == FLA_ROWWISE )
                    FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
            }
        }
    }
    else if ( side == FLA_RIGHT )
    {
        if      ( trans == FLA_NO_TRANSPOSE )
        {
            if      ( direct == FLA_FORWARD )
            {
                if      ( storev == FLA_COLUMNWISE )
                    FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
                else if ( storev == FLA_ROWWISE )
                    FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
            }
            else if ( direct == FLA_BACKWARD )
            {
                if      ( storev == FLA_COLUMNWISE )
                    FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
                else if ( storev == FLA_ROWWISE )
                    FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
            }
        }
        else if ( trans == FLA_TRANSPOSE || trans == FLA_CONJ_TRANSPOSE )
        {
            if      ( direct == FLA_FORWARD )
            {
                if      ( storev == FLA_COLUMNWISE )
                    FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
                else if ( storev == FLA_ROWWISE )
                    FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
            }
            else if ( direct == FLA_BACKWARD )
            {
                if      ( storev == FLA_COLUMNWISE )
                    FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
                else if ( storev == FLA_ROWWISE )
                    FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
            }
        }
    }

    return r_val;
}
FLA_Error FLASH_Apply_CAQ_UT_inc ( dim_t  p,
FLA_Side  side,
FLA_Trans  trans,
FLA_Direct  direct,
FLA_Store  storev,
FLA_Obj  A,
FLA_Obj  ATW,
FLA_Obj  R,
FLA_Obj  RTW,
FLA_Obj  W,
FLA_Obj  B 
)

References FLA_Apply_CAQ_UT_inc_apply_panels(), FLA_Apply_CAQ_UT_inc_check(), FLA_Apply_CAQ_UT_inc_internal(), FLA_CAQR_UT_inc_compute_blocks_per_part(), FLA_Check_error_level(), FLA_Part_2x1(), FLASH_Queue_begin(), and FLASH_Queue_end().

Referenced by FLASH_CAQR_UT_inc_solve().

{
  FLA_Error r_val;
  dim_t     nb_part;
  FLA_Obj   WT, WB;

  // Check parameters.
  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
    FLA_Apply_CAQ_UT_inc_check( side, trans, direct, storev, A, ATW, R, RTW, W, B );

  // Compute the partition length from the number of partitions.
  nb_part = FLA_CAQR_UT_inc_compute_blocks_per_part( p, R );

  // Begin a parallel region.
  FLASH_Queue_begin();

  // Apply the individual Q's from the incremental QR factorizations.
  FLA_Apply_CAQ_UT_inc_apply_panels( nb_part, A, ATW, W, B );

  FLA_Part_2x1( W,   &WT,
                     &WB,    1, FLA_TOP );

  // Apply the Q from the factorization of the upper triangular R's.
  r_val = FLA_Apply_CAQ_UT_inc_internal( side, trans, direct, storev,
                                         R, RTW, WT, B, flash_apcaqutinc_cntl );


  // End the parallel region.
  FLASH_Queue_end();

  return r_val;
}

References FLA_Abort(), FLA_Obj_datatype(), FLA_Obj_width(), FLA_Print_message(), FLASH_Obj_create_ext(), FLASH_Obj_depth(), FLASH_Obj_scalar_length_tl(), and FLASH_Obj_scalar_width_tl().

Referenced by FLASH_CAQR_UT_inc_solve().

{
    FLA_Datatype datatype;
    dim_t        depth;
    dim_t        b_alg;
    dim_t        b_flash;
    dim_t        m, n;

    // Query the depth.
    depth = FLASH_Obj_depth( TW );
    
    // *** The current Apply_CAQ_UT_inc algorithm implemented assumes that
    // the matrix has a hierarchical depth of 1.
    if ( depth != 1 )
    {
       FLA_Print_message( "FLASH_Apply_CAQ_UT_inc() currently only supports matrices of depth 1",
                          __FILE__, __LINE__ );
       FLA_Abort();
    }

    // Query the datatype of matrix TW.
    datatype = FLA_Obj_datatype( TW );
    
    // Inspect the length of a the top-left element of TW to get the
    // algorithmic blocksize we'll use throughout the Apply_CAQ_UT_inc
    // algorithm.
    b_alg = FLASH_Obj_scalar_length_tl( TW );

    // The width of the top-left element gives us the storage blocksize.
    b_flash = FLASH_Obj_scalar_width_tl( TW );

    // The element length of W need to be p: one panel for each
    // factorized subproblem.
    m = p;

    // Query the element (not scalar) width of the right-hand side
    // matrix B. This is done so we can create W with full blocks for the
    // right "edge cases" of B.
    n = FLA_Obj_width( B );

    // Create hierarchical matrix W.
    FLASH_Obj_create_ext( datatype, m * b_alg, n * b_flash, 
                          depth, &b_alg, &b_flash, 
                          W );
       
    return FLA_SUCCESS;
}