libflame revision_anchor
Functions
FLA_Bidiag_UT_u_blk_var4.c File Reference

(r)

Functions

FLA_Error FLA_Bidiag_UT_u_blk_var4 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)

Function Documentation

FLA_Error FLA_Bidiag_UT_u_blk_var4 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)

References FLA_Bidiag_UT_u_step_opt_var4(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Copyt(), FLA_Gemm_external(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Obj_set_to_scalar(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Triangularize(), and FLA_ZERO.

Referenced by FLA_Bidiag_UT_u().

{
  FLA_Obj  ATL,   ATR,      A00, A01, A02, 
           ABL,   ABR,      A10, A11, A12,
                            A20, A21, A22;
  FLA_Obj  UT,              U0,
           UB,              U1,
                            U2;
  FLA_Obj  VT,              V0,
           VB,              V1,
                            V2;
  FLA_Obj  YT,              Y0,
           YB,              Y1,
                            Y2; 
  FLA_Obj  ZT,              Z0,
           ZB,              Z1,
                            Z2;
  FLA_Obj  TUL,   TUR,      TU0, TU1, TU2;
  FLA_Obj  TVL,   TVR,      TV0, TV1, TV2;

  FLA_Obj  U, V, Y, Z;
  FLA_Obj  ABR_l, ABR_t;
  FLA_Obj  UB_l, U2_l;
  FLA_Obj  VB_l, V2_l;
  FLA_Obj  YB_l, Y2_l;
  FLA_Obj  ZB_l, Z2_l;
  FLA_Obj  TU1_tl;
  FLA_Obj  TV1_tl;
  FLA_Obj  none, none2, none3;
  FLA_Obj  VB_tl,
           VB_bl;
  FLA_Datatype datatype_A;
  dim_t        m_A, n_A;
  dim_t        b_alg, b;

  b_alg      = FLA_Obj_length( TU );

  datatype_A = FLA_Obj_datatype( A );
  m_A        = FLA_Obj_length( A );
  n_A        = FLA_Obj_width( A );

  FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &U );
  FLA_Obj_create( datatype_A, n_A, b_alg, 0, 0, &V );
  FLA_Obj_create( datatype_A, n_A, b_alg, 0, 0, &Y );
  FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &Z );

  FLA_Part_2x2( A,    &ATL, &ATR,
                      &ABL, &ABR,   0, 0, FLA_TL );
  FLA_Part_2x1( U,    &UT,
                      &UB,            0, FLA_TOP );
  FLA_Part_2x1( V,    &VT,
                      &VB,            0, FLA_TOP );
  FLA_Part_2x1( Y,    &YT,
                      &YB,            0, FLA_TOP );
  FLA_Part_2x1( Z,    &ZT,
                      &ZB,            0, FLA_TOP );
  FLA_Part_1x2( TU,   &TUL, &TUR,      0, FLA_LEFT ); 
  FLA_Part_1x2( TV,   &TVL, &TVR,      0, FLA_LEFT ); 

  while ( FLA_Obj_min_dim( ABR ) > 0 )
  {
    b = min( FLA_Obj_min_dim( ABR ), b_alg );

    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00, /**/ &A01, &A02,
                        /* ************* */   /* ******************** */
                                                &A10, /**/ &A11, &A12,
                           ABL, /**/ ABR,       &A20, /**/ &A21, &A22,
                           b, b, FLA_BR );
    FLA_Repart_2x1_to_3x1( UT,                &U0,
                        /* ** */            /* ** */
                                              &U1,
                           UB,                &U2,        b, FLA_BOTTOM );
    FLA_Repart_2x1_to_3x1( VT,                &V0,
                        /* ** */            /* ** */
                                              &V1,
                           VB,                &V2,        b, FLA_BOTTOM );
    FLA_Repart_2x1_to_3x1( YT,                &Y0,
                        /* ** */            /* ** */
                                              &Y1,
                           YB,                &Y2,        b, FLA_BOTTOM );
    FLA_Repart_2x1_to_3x1( ZT,                &Z0,
                        /* ** */            /* ** */
                                              &Z1,
                           ZB,                &Z2,        b, FLA_BOTTOM );
    FLA_Repart_1x2_to_1x3( TUL, /**/ TUR,       &TU0, /**/ &TU1, &TU2,
                           b, FLA_RIGHT );
    FLA_Repart_1x2_to_1x3( TVL, /**/ TVR,       &TV0, /**/ &TV1, &TV2,
                           b, FLA_RIGHT );

    /*------------------------------------------------------------*/

    FLA_Part_2x2( TU1,     &TU1_tl, &none,   
                           &none2,  &none3,   b, b, FLA_TL ); 

    FLA_Part_2x2( TV1,     &TV1_tl, &none,   
                           &none2,  &none3,   b, b, FLA_TL ); 

    FLA_Part_1x2( ABR,    &ABR_l, &none,    b, FLA_LEFT );
    FLA_Part_2x1( ABR,    &ABR_t,
                          &none,            b, FLA_TOP );

    FLA_Part_1x2( UB,     &UB_l,  &none,    b, FLA_LEFT );
    FLA_Part_1x2( VB,     &VB_l,  &none,    b, FLA_LEFT );
    FLA_Part_1x2( YB,     &YB_l,  &none,    b, FLA_LEFT );
    FLA_Part_1x2( ZB,     &ZB_l,  &none,    b, FLA_LEFT );

    FLA_Part_2x1( UB_l,   &none,
                          &U2_l,            b, FLA_TOP );
    FLA_Part_2x1( VB_l,   &none,
                          &V2_l,            b, FLA_TOP );
    FLA_Part_2x1( YB_l,   &none, 
                          &Y2_l,            b, FLA_TOP );
    FLA_Part_2x1( ZB_l,   &none,
                          &Z2_l,            b, FLA_TOP );

    // [ ABR, YB, ZB, TU1, TV1 ] = FLA_Bidiag_UT_u_step_unb_var4( ABR, TU1, TV1, b );
    //FLA_Bidiag_UT_u_step_unb_var4( ABR, YB, ZB, TU1_tl, TV1_tl );
    //FLA_Bidiag_UT_u_step_ofu_var4( ABR, YB, ZB, TU1_tl, TV1_tl );
    FLA_Bidiag_UT_u_step_opt_var4( ABR, YB, ZB, TU1_tl, TV1_tl );

    if ( FLA_Obj_length( A22 ) > 0 )
    {
      // Build UB from ABR, with explicit unit subdiagonal and zeros.
      FLA_Copy( ABR_l, UB_l );
      FLA_Triangularize( FLA_LOWER_TRIANGULAR, FLA_UNIT_DIAG, UB_l );

      // Build VB from ABR, with explicit unit subdiagonal and zeros.
      FLA_Copyt( FLA_TRANSPOSE, ABR_t, VB_l );
      FLA_Part_2x1( VB_l,   &VB_tl,
                            &VB_bl,            1, FLA_TOP );
      FLA_Triangularize( FLA_LOWER_TRIANGULAR, FLA_UNIT_DIAG, VB_bl );
      FLA_Obj_set_to_scalar( FLA_ZERO, VB_tl );

      // A22 = A22 - U2 * Y2' - Z2 * V2';
      FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
                         FLA_MINUS_ONE, U2_l, Y2_l, FLA_ONE, A22 );
      FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
                         FLA_MINUS_ONE, Z2_l, V2_l, FLA_ONE, A22 );
    }

    /*------------------------------------------------------------*/

    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00, A01, /**/ A02,
                                                     A10, A11, /**/ A12,
                            /* ************** */  /* ****************** */
                              &ABL, /**/ &ABR,       A20, A21, /**/ A22,
                              FLA_TL );
    FLA_Cont_with_3x1_to_2x1( &UT,                U0,
                                                  U1,
                            /* ** */           /* ** */
                              &UB,                U2,     FLA_TOP );
    FLA_Cont_with_3x1_to_2x1( &VT,                V0,
                                                  V1,
                            /* ** */           /* ** */
                              &VB,                V2,     FLA_TOP );
    FLA_Cont_with_3x1_to_2x1( &YT,                Y0,
                                                  Y1,
                            /* ** */           /* ** */
                              &YB,                Y2,     FLA_TOP );
    FLA_Cont_with_3x1_to_2x1( &ZT,                Z0,
                                                  Z1,
                            /* ** */           /* ** */
                              &ZB,                Z2,     FLA_TOP );
    FLA_Cont_with_1x3_to_1x2( &TUL, /**/ &TUR,       TU0, TU1, /**/ TU2,
                              FLA_LEFT );
    FLA_Cont_with_1x3_to_1x2( &TVL, /**/ &TVR,       TV0, TV1, /**/ TV2,
                              FLA_LEFT );
  }

  FLA_Obj_free( &U );
  FLA_Obj_free( &V );
  FLA_Obj_free( &Y );
  FLA_Obj_free( &Z );

  return FLA_SUCCESS;
}