libflame revision_anchor
Functions
FLA_Bidiag_UT.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLA_Bidiag_UT (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
FLA_Error FLA_Bidiag_UT_internal (FLA_Obj A, FLA_Obj TU, FLA_Obj TV, fla_bidiagut_t *cntl)
FLA_Error FLA_Bidiag_UT_l (FLA_Obj A, FLA_Obj TU, FLA_Obj TV, fla_bidiagut_t *cntl)
FLA_Error FLA_Bidiag_UT_u (FLA_Obj A, FLA_Obj TU, FLA_Obj TV, fla_bidiagut_t *cntl)
FLA_Error FLA_Bidiag_UT_create_T (FLA_Obj A, FLA_Obj *TU, FLA_Obj *TV)
FLA_Error FLA_Bidiag_UT_recover_tau (FLA_Obj TU, FLA_Obj TV, FLA_Obj tu, FLA_Obj tv)
FLA_Error FLA_Bidiag_UT_extract_diagonals (FLA_Obj A, FLA_Obj d, FLA_Obj e)
FLA_Error FLA_Bidiag_UT_u_extract_diagonals (FLA_Obj A, FLA_Obj d, FLA_Obj e)
FLA_Error FLA_Bidiag_UT_l_extract_diagonals (FLA_Obj A, FLA_Obj d, FLA_Obj e)
FLA_Error FLA_Bidiag_UT_realify (FLA_Obj A, FLA_Obj d, FLA_Obj e)
FLA_Error FLA_Bidiag_UT_l_realify_unb (FLA_Obj A, FLA_Obj d, FLA_Obj e)
FLA_Error FLA_Bidiag_UT_l_realify_opt (FLA_Obj A, FLA_Obj d, FLA_Obj e)
FLA_Error FLA_Bidiag_UT_u_realify_unb (FLA_Obj A, FLA_Obj d, FLA_Obj e)
FLA_Error FLA_Bidiag_UT_u_realify_opt (FLA_Obj A, FLA_Obj d, FLA_Obj e)

Function Documentation

FLA_Error FLA_Bidiag_UT ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)

References FLA_Bidiag_UT_check(), FLA_Bidiag_UT_internal(), and FLA_Check_error_level().

{
  FLA_Error r_val;

  // Check parameters.
  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
    FLA_Bidiag_UT_check( A, TU, TV );

  // Invoke FLA_Bidiag_UT_internal() with the standard control tree.
  r_val = FLA_Bidiag_UT_internal( A, TU, TV, fla_bidiagut_cntl_leaf );

  return r_val;
}
FLA_Error FLA_Bidiag_UT_create_T ( FLA_Obj  A,
FLA_Obj TU,
FLA_Obj TV 
)

References FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_min_dim(), FLA_Obj_row_stride(), and FLA_Query_blocksize().

{
  FLA_Datatype datatype;
  dim_t        b_alg, k;
  dim_t        rs_T, cs_T;

  // Query the datatype of A.
  datatype = FLA_Obj_datatype( A );

  // Query the blocksize from the library.
  b_alg = FLA_Query_blocksize( datatype, FLA_DIMENSION_MIN );

  // Scale the blocksize by a pre-set global constant.
  b_alg = ( dim_t )( ( ( double ) b_alg ) * FLA_BIDIAG_INNER_TO_OUTER_B_RATIO );

  // Query the minimum dimension of A.
  k = FLA_Obj_min_dim( A );

  // Figure out whether TU and TV should be row-major or column-major.
  if ( FLA_Obj_row_stride( A ) == 1 )
  {
    rs_T = 1;
    cs_T = b_alg;
  }
  else // if ( FLA_Obj_col_stride( A ) == 1 )
  {
    rs_T = k;
    cs_T = 1;
  }

  // Create two b_alg x k matrices to hold the block Householder transforms
  // that will be accumulated within the bidiagonal reduction algorithm.
  FLA_Obj_create( datatype, b_alg, k, rs_T, cs_T, TU );
  FLA_Obj_create( datatype, b_alg, k, rs_T, cs_T, TV );

  return FLA_SUCCESS;
}
FLA_Error FLA_Bidiag_UT_extract_diagonals ( FLA_Obj  A,
FLA_Obj  d,
FLA_Obj  e 
)
FLA_Error FLA_Bidiag_UT_internal ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV,
fla_bidiagut_t cntl 
)

References FLA_Bidiag_UT_internal_check(), FLA_Bidiag_UT_u(), FLA_Check_error_level(), FLA_Obj_length(), and FLA_Obj_width().

Referenced by FLA_Bidiag_UT().

{
    FLA_Error r_val = FLA_SUCCESS;
    
    if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
        FLA_Bidiag_UT_internal_check( A, TU, TV, cntl );

    if ( FLA_Obj_length( A ) >= FLA_Obj_width( A ) )
    {
        r_val = FLA_Bidiag_UT_u( A, TU, TV, cntl );
    }
    else // if ( FLA_Obj_length( A ) < FLA_Obj_width( A ) )
    {
        //r_val = FLA_Bidiag_UT_l( A, TU, TV, cntl );
        FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
    }

    return r_val;
}
FLA_Error FLA_Bidiag_UT_l ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV,
fla_bidiagut_t cntl 
)
FLA_Error FLA_Bidiag_UT_l_extract_diagonals ( FLA_Obj  A,
FLA_Obj  d,
FLA_Obj  e 
)

References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), dcomplex::real, and scomplex::real.

Referenced by FLA_Bidiag_UT_extract_diagonals().

{
  FLA_Datatype datatype;
  int          m_A;
  int          rs_A, cs_A;
  int          inc_d;
  int          inc_e;
  int          i;

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );

  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  inc_d    = FLA_Obj_vector_inc( d );

  inc_e    = FLA_Obj_vector_inc( e );

  switch ( datatype )
  {
    case FLA_FLOAT:
    {
      float*    buff_A = FLA_FLOAT_PTR( A );
      float*    buff_d = FLA_FLOAT_PTR( d );
      float*    buff_e = FLA_FLOAT_PTR( e );

      for ( i = 0; i < m_A; ++i )
      {
        float*    alpha11  = buff_A + (i  )*cs_A + (i  )*rs_A;
        float*    a21_t    = buff_A + (i  )*cs_A + (i+1)*rs_A;
        float*    delta1   = buff_d + (i  )*inc_d;
        float*    epsilon1 = buff_e + (i  )*inc_e;

        int       m_ahead  = m_A - i - 1;

        // delta1 = alpha11;
        *delta1 = *alpha11;

        // epsilon1 = a21_t;
        if ( m_ahead > 0 )
          *epsilon1 = *a21_t;
      }

      break;
    }

    case FLA_DOUBLE:
    {
      double*   buff_A = FLA_DOUBLE_PTR( A );
      double*   buff_d = FLA_DOUBLE_PTR( d );
      double*   buff_e = FLA_DOUBLE_PTR( e );

      for ( i = 0; i < m_A; ++i )
      {
        double*   alpha11  = buff_A + (i  )*cs_A + (i  )*rs_A;
        double*   a21_t    = buff_A + (i  )*cs_A + (i+1)*rs_A;
        double*   delta1   = buff_d + (i  )*inc_d;
        double*   epsilon1 = buff_e + (i  )*inc_e;

        int       m_ahead  = m_A - i - 1;

        // delta1 = alpha11;
        *delta1 = *alpha11;

        // epsilon1 = a21_t;
        if ( m_ahead > 0 )
          *epsilon1 = *a21_t;
      }

      break;
    }

    case FLA_COMPLEX:
    {
      scomplex* buff_A = FLA_COMPLEX_PTR( A );
      float*    buff_d = FLA_FLOAT_PTR( d );
      float*    buff_e = FLA_FLOAT_PTR( e );

      for ( i = 0; i < m_A; ++i )
      {
        scomplex* alpha11  = buff_A + (i  )*cs_A + (i  )*rs_A;
        scomplex* a21_t    = buff_A + (i  )*cs_A + (i+1)*rs_A;
        float*    delta1   = buff_d + (i  )*inc_d;
        float*    epsilon1 = buff_e + (i  )*inc_e;

        int       m_ahead  = m_A - i - 1;

        // delta1 = alpha11;
        *delta1 = alpha11->real;

        // epsilon1 = a21_t;
        if ( m_ahead > 0 )
          *epsilon1 = a21_t->real;
      }

      break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
      double*   buff_d = FLA_DOUBLE_PTR( d );
      double*   buff_e = FLA_DOUBLE_PTR( e );

      for ( i = 0; i < m_A; ++i )
      {
        dcomplex* alpha11  = buff_A + (i  )*cs_A + (i  )*rs_A;
        dcomplex* a21_t    = buff_A + (i  )*cs_A + (i+1)*rs_A;
        double*   delta1   = buff_d + (i  )*inc_d;
        double*   epsilon1 = buff_e + (i  )*inc_e;

        int       m_ahead  = m_A - i - 1;

        // delta1 = alpha11;
        *delta1 = alpha11->real;

        // epsilon1 = a21_t;
        if ( m_ahead > 0 )
          *epsilon1 = a21_t->real;
      }

      break;
    }
  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Bidiag_UT_l_realify_opt ( FLA_Obj  A,
FLA_Obj  d,
FLA_Obj  e 
)

References bli_dsetv(), bli_ssetv(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), FLA_Obj_width(), FLA_ONE, FLA_ZERO, dcomplex::imag, and scomplex::imag.

Referenced by FLA_Bidiag_UT_realify().

{
  FLA_Datatype datatype;
  int          m_A, n_A;
  int          min_m_n;
  int          rs_A, cs_A;
  int          inc_d;
  int          inc_e;
  int          i;

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );
  n_A      = FLA_Obj_width( A );
  min_m_n  = FLA_Obj_min_dim( A );

  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  inc_d    = FLA_Obj_vector_inc( d );

  inc_e    = FLA_Obj_vector_inc( e );


  switch ( datatype )
  {
    case FLA_FLOAT:
    {
      float* buff_d = FLA_FLOAT_PTR( d );
      float* buff_e = FLA_FLOAT_PTR( e );
      float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );

      bli_ssetv( min_m_n,
                 buff_1,
                 buff_d, inc_d );

      bli_ssetv( min_m_n,
                 buff_1,
                 buff_e, inc_e );

      break;
    }

    case FLA_DOUBLE:
    {
      double* buff_d = FLA_DOUBLE_PTR( d );
      double* buff_e = FLA_DOUBLE_PTR( e );
      double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );

      bli_dsetv( min_m_n,
                 buff_1,
                 buff_d, inc_d );

      bli_dsetv( min_m_n,
                 buff_1,
                 buff_e, inc_e );

      break;
    }

    case FLA_COMPLEX:
    {
      scomplex* buff_A = FLA_COMPLEX_PTR( A );
      scomplex* buff_d = FLA_COMPLEX_PTR( d );
      scomplex* buff_e = FLA_COMPLEX_PTR( e );
      scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
      float*    buff_0 = FLA_FLOAT_PTR( FLA_ZERO );

      for ( i = 0; i < min_m_n; ++i )
      {
        scomplex* a10t_r   = buff_A + (i-1)*cs_A + (i  )*rs_A;
        scomplex* alpha11  = buff_A + (i  )*cs_A + (i  )*rs_A;
        scomplex* a21_t    = buff_A + (i  )*cs_A + (i+1)*rs_A;
        scomplex* delta1   = buff_d + (i  )*inc_d;
        scomplex* epsilon1 = buff_e + (i  )*inc_e;
        scomplex  absv;

        int       m_ahead  = m_A - i - 1;
        int       m_behind = i;

        if ( m_behind == 0 )
        {
          // FLA_Obj_set_to_scalar( FLA_ONE, delta1 );
          *delta1 = *buff_1;
        }
        else
        {
          // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, a10t_r, delta1 );
          // FLA_Copyt( FLA_NO_TRANSPOSE, a10t_r, absv );
          // FLA_Absolute_value( absv );
          // FLA_Inv_scal( absv, delta1 );
          bli_ccopys( BLIS_CONJUGATE, a10t_r, delta1 );
          bli_cabsval2( a10t_r, &absv );
          bli_cinvscals( &absv, delta1 );

          // FLA_Scalc( FLA_NO_CONJUGATE, delta1, a10t_r );
          // FLA_Obj_set_imag_part( FLA_ZERO, a10t_r );
          bli_cscals( delta1, a10t_r );
          a10t_r->imag = *buff_0;

          // FLA_Scalc( FLA_NO_CONJUGATE, delta1, alpha11 );
          bli_cscals( delta1, alpha11 );
        }

        // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha11, epsilon1 );
        // FLA_Copyt( FLA_NO_TRANSPOSE, alpha11, absv );
        // FLA_Absolute_value( absv );
        // FLA_Inv_scal( absv, epsilon1 );
        bli_ccopys( BLIS_CONJUGATE, alpha11, epsilon1 );
        bli_cabsval2( alpha11, &absv );
        bli_cinvscals( &absv, epsilon1 );

        // FLA_Scalc( FLA_NO_CONJUGATE, epsilon1, alpha11 );
        // FLA_Obj_set_imag_part( FLA_ZERO, alpha11 );
        bli_cscals( epsilon1, alpha11 );
        alpha11->imag = *buff_0;

        if ( m_ahead > 0 )
        {
          // FLA_Scalc( FLA_NO_CONJUGATE, epsilon1, a21_t );
          bli_cscals( epsilon1, a21_t );
        }
      }

      break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
      dcomplex* buff_d = FLA_DOUBLE_COMPLEX_PTR( d );
      dcomplex* buff_e = FLA_DOUBLE_COMPLEX_PTR( e );
      dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
      double*   buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );

      for ( i = 0; i < min_m_n; ++i )
      {
        dcomplex* a10t_r   = buff_A + (i-1)*cs_A + (i  )*rs_A;
        dcomplex* alpha11  = buff_A + (i  )*cs_A + (i  )*rs_A;
        dcomplex* a21_t    = buff_A + (i  )*cs_A + (i+1)*rs_A;
        dcomplex* delta1   = buff_d + (i  )*inc_d;
        dcomplex* epsilon1 = buff_e + (i  )*inc_e;
        dcomplex  absv;

        int       m_ahead  = m_A - i - 1;
        int       m_behind = i;

        if ( m_behind == 0 )
        {
          // FLA_Obj_set_to_scalar( FLA_ONE, delta1 );
          *delta1 = *buff_1;
        }
        else
        {
          // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, a10t_r, delta1 );
          // FLA_Copyt( FLA_NO_TRANSPOSE, a10t_r, absv );
          // FLA_Absolute_value( absv );
          // FLA_Inv_scal( absv, delta1 );
          bli_zcopys( BLIS_CONJUGATE, a10t_r, delta1 );
          bli_zabsval2( a10t_r, &absv );
          bli_zinvscals( &absv, delta1 );

          // FLA_Scalc( FLA_NO_CONJUGATE, delta1, a10t_r );
          // FLA_Obj_set_imag_part( FLA_ZERO, a10t_r );
          bli_zscals( delta1, a10t_r );
          a10t_r->imag = *buff_0;

          // FLA_Scalc( FLA_NO_CONJUGATE, delta1, alpha11 );
          bli_zscals( delta1, alpha11 );
        }

        // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha11, epsilon1 );
        // FLA_Copyt( FLA_NO_TRANSPOSE, alpha11, absv );
        // FLA_Absolute_value( absv );
        // FLA_Inv_scal( absv, epsilon1 );
        bli_zcopys( BLIS_CONJUGATE, alpha11, epsilon1 );
        bli_zabsval2( alpha11, &absv );
        bli_zinvscals( &absv, epsilon1 );

        // FLA_Scalc( FLA_NO_CONJUGATE, epsilon1, alpha11 );
        // FLA_Obj_set_imag_part( FLA_ZERO, alpha11 );
        bli_zscals( epsilon1, alpha11 );
        alpha11->imag = *buff_0;

        if ( m_ahead > 0 )
        {
          // FLA_Scalc( FLA_NO_CONJUGATE, epsilon1, a21_t );
          bli_zscals( epsilon1, a21_t );
        }
      }

      break;
    }
  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Bidiag_UT_l_realify_unb ( FLA_Obj  A,
FLA_Obj  d,
FLA_Obj  e 
)

References FLA_Absolute_value(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copyt(), FLA_Inv_scal(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Obj_set_imag_part(), FLA_Obj_set_to_scalar(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Scalc(), and FLA_ZERO.

{
  FLA_Obj ATL,   ATR,      A00,  a01,     A02, 
          ABL,   ABR,      a10t, alpha11, a12t,
                           A20,  a21,     A22;

  FLA_Obj dT,              d0,
          dB,              delta1,
                           d2;

  FLA_Obj eT,              e0,
          eB,              epsilon1,
                           e2;

  FLA_Obj a10t_l, a10t_r;

  FLA_Obj a21_t,
          a21_b;

  FLA_Obj absv;


  FLA_Obj_create( FLA_Obj_datatype( A ), 1, 1, 0, 0, &absv );

  FLA_Part_2x2( A,    &ATL, &ATR,
                      &ABL, &ABR,     0, 0, FLA_TL );

  FLA_Part_2x1( d,    &dT,
                      &dB,            0, FLA_TOP );

  FLA_Part_2x1( e,    &eT,
                      &eB,            0, FLA_TOP );

  while ( FLA_Obj_min_dim( ABR ) > 0 )
  {
    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00,  /**/ &a01,     &A02,
                        /* ************* */   /* ************************** */
                                                &a10t, /**/ &alpha11, &a12t,
                           ABL, /**/ ABR,       &A20,  /**/ &a21,     &A22,
                           1, 1, FLA_BR );

    FLA_Repart_2x1_to_3x1( dT,                &d0, 
                        /* ** */            /* ****** */
                                              &delta1, 
                           dB,                &d2,        1, FLA_BOTTOM );

    FLA_Repart_2x1_to_3x1( eT,                &e0, 
                        /* ** */            /* ******** */
                                              &epsilon1, 
                           eB,                &e2,        1, FLA_BOTTOM );

    /*------------------------------------------------------------*/

    if ( FLA_Obj_width( a10t ) == 0 )
    {
      // delta1 = 1;
      FLA_Obj_set_to_scalar( FLA_ONE, delta1 );
    }
    else
    {
      FLA_Part_1x2( a10t,   &a10t_l, &a10t_r,    1, FLA_RIGHT );

      // delta1 = conj(a10t_r) / abs(a10t_r); 
      FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, a10t_r, delta1 );
      FLA_Copyt( FLA_NO_TRANSPOSE, a10t_r, absv );
      FLA_Absolute_value( absv );
      FLA_Inv_scal( absv, delta1 );

      // a10t_r = delta1 * a10t_r;
      // a10t_r.imag = 0;
      FLA_Scalc( FLA_NO_CONJUGATE, delta1, a10t_r );
      FLA_Obj_set_imag_part( FLA_ZERO, a10t_r );

      // alpha11 = delta1 * alpha11;
      FLA_Scalc( FLA_NO_CONJUGATE, delta1, alpha11 );
    }

    // epsilon1 = conj(alpha11) / abs(alpha11);
    FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha11, epsilon1 );
    FLA_Copyt( FLA_NO_TRANSPOSE, alpha11, absv );
    FLA_Absolute_value( absv );
    FLA_Inv_scal( absv, epsilon1 );

    // alpha11 = epsilon1 * alpha11;
    // alpha11.imag = 0;
    FLA_Scalc( FLA_NO_CONJUGATE, epsilon1, alpha11 );
    FLA_Obj_set_imag_part( FLA_ZERO, alpha11 );

    if ( FLA_Obj_length( a21 ) > 0 )
    {
      FLA_Part_2x1( a21,   &a21_t,
                           &a21_b,    1, FLA_TOP );

      // a21_t = epsilon1 * a21_t;
      FLA_Scalc( FLA_NO_CONJUGATE, epsilon1, a21_t );
    }

    /*------------------------------------------------------------*/

    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00,  a01,     /**/ A02,
                                                     a10t, alpha11, /**/ a12t,
                            /* ************** */  /* ************************ */
                              &ABL, /**/ &ABR,       A20,  a21,     /**/ A22,
                              FLA_TL );

    FLA_Cont_with_3x1_to_2x1( &dT,                d0, 
                                                  delta1, 
                            /* ** */           /* ****** */
                              &dB,                d2,     FLA_TOP );

    FLA_Cont_with_3x1_to_2x1( &eT,                e0, 
                                                  epsilon1, 
                            /* ** */           /* ******** */
                              &eB,                e2,     FLA_TOP );
  }

  FLA_Obj_free( &absv );

  return FLA_SUCCESS;
}
FLA_Error FLA_Bidiag_UT_realify ( FLA_Obj  A,
FLA_Obj  d,
FLA_Obj  e 
)

References FLA_Bidiag_UT_l_realify_opt(), FLA_Bidiag_UT_realify_check(), FLA_Bidiag_UT_u_realify_opt(), FLA_Check_error_level(), FLA_Obj_is_real(), FLA_Obj_length(), and FLA_Obj_width().

{
  FLA_Error r_val = FLA_SUCCESS;

  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
    FLA_Bidiag_UT_realify_check( A, d, e );

  if ( FLA_Obj_is_real( A ) ) return FLA_SUCCESS;

  if ( FLA_Obj_length( A ) < FLA_Obj_width( A ) )
    //r_val = FLA_Bidiag_UT_l_realify_unb( A, d, e );
    r_val = FLA_Bidiag_UT_l_realify_opt( A, d, e );
  else
    //r_val = FLA_Bidiag_UT_u_realify_unb( A, d, e );
    r_val = FLA_Bidiag_UT_u_realify_opt( A, d, e );

  return r_val;
}
FLA_Error FLA_Bidiag_UT_recover_tau ( FLA_Obj  TU,
FLA_Obj  TV,
FLA_Obj  tu,
FLA_Obj  tv 
)
FLA_Error FLA_Bidiag_UT_u ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV,
fla_bidiagut_t cntl 
)

References FLA_Bidiag_UT_u_blk_var1(), FLA_Bidiag_UT_u_blk_var2(), FLA_Bidiag_UT_u_blk_var3(), FLA_Bidiag_UT_u_blk_var4(), FLA_Bidiag_UT_u_blk_var5(), FLA_Bidiag_UT_u_opt_var1(), FLA_Bidiag_UT_u_opt_var2(), FLA_Bidiag_UT_u_opt_var3(), FLA_Bidiag_UT_u_opt_var4(), FLA_Bidiag_UT_u_opt_var5(), FLA_Bidiag_UT_u_unb_var1(), FLA_Bidiag_UT_u_unb_var2(), FLA_Bidiag_UT_u_unb_var3(), FLA_Bidiag_UT_u_unb_var4(), and FLA_Bidiag_UT_u_unb_var5().

Referenced by FLA_Bidiag_UT_internal().

{
    FLA_Error r_val = FLA_SUCCESS;
    
    if      ( FLA_Cntl_variant( cntl ) == FLA_UNBLOCKED_VARIANT1 )
    {
        r_val = FLA_Bidiag_UT_u_unb_var1( A, TU, TV );
    }
    else if ( FLA_Cntl_variant( cntl ) == FLA_UNBLOCKED_VARIANT2 )
    {
        r_val = FLA_Bidiag_UT_u_unb_var2( A, TU, TV );
    }
    else if ( FLA_Cntl_variant( cntl ) == FLA_UNBLOCKED_VARIANT3 )
    {
        r_val = FLA_Bidiag_UT_u_unb_var3( A, TU, TV );
    }
    else if ( FLA_Cntl_variant( cntl ) == FLA_UNBLOCKED_VARIANT4 )
    {
        r_val = FLA_Bidiag_UT_u_unb_var4( A, TU, TV );
    }
    else if ( FLA_Cntl_variant( cntl ) == FLA_UNBLOCKED_VARIANT5 )
    {
        r_val = FLA_Bidiag_UT_u_unb_var5( A, TU, TV );
    }
    else if ( FLA_Cntl_variant( cntl ) == FLA_UNB_OPT_VARIANT1 )
    {
        r_val = FLA_Bidiag_UT_u_opt_var1( A, TU, TV );
    }
    else if ( FLA_Cntl_variant( cntl ) == FLA_UNB_OPT_VARIANT2 )
    {
        r_val = FLA_Bidiag_UT_u_opt_var2( A, TU, TV );
    }
    else if ( FLA_Cntl_variant( cntl ) == FLA_UNB_OPT_VARIANT3 )
    {
        r_val = FLA_Bidiag_UT_u_opt_var3( A, TU, TV );
    }
    else if ( FLA_Cntl_variant( cntl ) == FLA_UNB_OPT_VARIANT4 )
    {
        r_val = FLA_Bidiag_UT_u_opt_var4( A, TU, TV );
    }
    else if ( FLA_Cntl_variant( cntl ) == FLA_UNB_OPT_VARIANT5 )
    {
        r_val = FLA_Bidiag_UT_u_opt_var5( A, TU, TV );
    }
    else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT1 )
    {
        r_val = FLA_Bidiag_UT_u_blk_var1( A, TU, TV );
    }
    else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT2 )
    {
        r_val = FLA_Bidiag_UT_u_blk_var2( A, TU, TV );
    }
    else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT3 )
    {
        r_val = FLA_Bidiag_UT_u_blk_var3( A, TU, TV );
    }
    else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT4 )
    {
        r_val = FLA_Bidiag_UT_u_blk_var4( A, TU, TV );
    }
    else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT5 )
    {
        r_val = FLA_Bidiag_UT_u_blk_var5( A, TU, TV );
    }
    else
    {
        FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
    }

    return r_val;
}
FLA_Error FLA_Bidiag_UT_u_extract_diagonals ( FLA_Obj  A,
FLA_Obj  d,
FLA_Obj  e 
)

References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), FLA_Obj_width(), dcomplex::real, and scomplex::real.

Referenced by FLA_Bidiag_UT_extract_diagonals().

{
  FLA_Datatype datatype;
  int          n_A;
  int          rs_A, cs_A;
  int          inc_d;
  int          inc_e;
  int          i;

  datatype = FLA_Obj_datatype( A );

  n_A      = FLA_Obj_width( A );

  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  inc_d    = FLA_Obj_vector_inc( d );

  inc_e    = FLA_Obj_vector_inc( e );

  switch ( datatype )
  {
    case FLA_FLOAT:
    {
      float*    buff_A = FLA_FLOAT_PTR( A );
      float*    buff_d = FLA_FLOAT_PTR( d );
      float*    buff_e = FLA_FLOAT_PTR( e );

      for ( i = 0; i < n_A; ++i )
      {
        float*    alpha11  = buff_A + (i  )*cs_A + (i  )*rs_A;
        float*    a12t_l   = buff_A + (i+1)*cs_A + (i  )*rs_A;
        float*    delta1   = buff_d + (i  )*inc_d;
        float*    epsilon1 = buff_e + (i  )*inc_e;

        int       n_ahead  = n_A - i - 1;

        // delta1 = alpha11;
        *delta1 = *alpha11;

        // epsilon1 = a12t_l;
        if ( n_ahead > 0 )
          *epsilon1 = *a12t_l;
      }

      break;
    }

    case FLA_DOUBLE:
    {
      double*   buff_A = FLA_DOUBLE_PTR( A );
      double*   buff_d = FLA_DOUBLE_PTR( d );
      double*   buff_e = FLA_DOUBLE_PTR( e );

      for ( i = 0; i < n_A; ++i )
      {
        double*   alpha11  = buff_A + (i  )*cs_A + (i  )*rs_A;
        double*   a12t_l   = buff_A + (i+1)*cs_A + (i  )*rs_A;
        double*   delta1   = buff_d + (i  )*inc_d;
        double*   epsilon1 = buff_e + (i  )*inc_e;

        int       n_ahead  = n_A - i - 1;

        // delta1 = alpha11;
        *delta1 = *alpha11;

        // epsilon1 = a12t_l;
        if ( n_ahead > 0 )
          *epsilon1 = *a12t_l;
      }

      break;
    }

    case FLA_COMPLEX:
    {
      scomplex* buff_A = FLA_COMPLEX_PTR( A );
      float*    buff_d = FLA_FLOAT_PTR( d );
      float*    buff_e = FLA_FLOAT_PTR( e );

      for ( i = 0; i < n_A; ++i )
      {
        scomplex* alpha11  = buff_A + (i  )*cs_A + (i  )*rs_A;
        scomplex* a12t_l   = buff_A + (i+1)*cs_A + (i  )*rs_A;
        float*    delta1   = buff_d + (i  )*inc_d;
        float*    epsilon1 = buff_e + (i  )*inc_e;

        int       n_ahead  = n_A - i - 1;

        // delta1 = alpha11;
        *delta1 = alpha11->real;

        // epsilon1 = a12t_l;
        if ( n_ahead > 0 )
          *epsilon1 = a12t_l->real;
      }

      break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
      double*   buff_d = FLA_DOUBLE_PTR( d );
      double*   buff_e = FLA_DOUBLE_PTR( e );

      for ( i = 0; i < n_A; ++i )
      {
        dcomplex* alpha11  = buff_A + (i  )*cs_A + (i  )*rs_A;
        dcomplex* a12t_l   = buff_A + (i+1)*cs_A + (i  )*rs_A;
        double*   delta1   = buff_d + (i  )*inc_d;
        double*   epsilon1 = buff_e + (i  )*inc_e;

        int       n_ahead  = n_A - i - 1;

        // delta1 = alpha11;
        *delta1 = alpha11->real;

        // epsilon1 = a12t_l;
        if ( n_ahead > 0 )
          *epsilon1 = a12t_l->real;
      }

      break;
    }
  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Bidiag_UT_u_realify_opt ( FLA_Obj  A,
FLA_Obj  d,
FLA_Obj  e 
)

References bli_dsetv(), bli_ssetv(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), FLA_Obj_width(), FLA_ONE, FLA_ZERO, dcomplex::imag, and scomplex::imag.

Referenced by FLA_Bidiag_UT_realify().

{
  FLA_Datatype datatype;
  int          m_A, n_A;
  int          min_m_n;
  int          rs_A, cs_A;
  int          inc_d;
  int          inc_e;
  int          i;

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );
  n_A      = FLA_Obj_width( A );
  min_m_n  = FLA_Obj_min_dim( A );

  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  inc_d    = FLA_Obj_vector_inc( d );

  inc_e    = FLA_Obj_vector_inc( e );


  switch ( datatype )
  {
    case FLA_FLOAT:
    {
      float* buff_d = FLA_FLOAT_PTR( d );
      float* buff_e = FLA_FLOAT_PTR( e );
      float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );

      bli_ssetv( min_m_n,
                 buff_1,
                 buff_d, inc_d );

      bli_ssetv( min_m_n,
                 buff_1,
                 buff_e, inc_e );

      break;
    }

    case FLA_DOUBLE:
    {
      double* buff_d = FLA_DOUBLE_PTR( d );
      double* buff_e = FLA_DOUBLE_PTR( e );
      double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );

      bli_dsetv( min_m_n,
                 buff_1,
                 buff_d, inc_d );

      bli_dsetv( min_m_n,
                 buff_1,
                 buff_e, inc_e );

      break;
    }

    case FLA_COMPLEX:
    {
      scomplex* buff_A = FLA_COMPLEX_PTR( A );
      scomplex* buff_d = FLA_COMPLEX_PTR( d );
      scomplex* buff_e = FLA_COMPLEX_PTR( e );
      scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
      float*    buff_0 = FLA_FLOAT_PTR( FLA_ZERO );

      for ( i = 0; i < min_m_n; ++i )
      {
        scomplex* a01_b    = buff_A + (i  )*cs_A + (i-1)*rs_A;
        scomplex* alpha11  = buff_A + (i  )*cs_A + (i  )*rs_A;
        scomplex* a12t_l   = buff_A + (i+1)*cs_A + (i  )*rs_A;
        scomplex* delta1   = buff_d + (i  )*inc_d;
        scomplex* epsilon1 = buff_e + (i  )*inc_e;
        scomplex  absv;

        int       n_ahead  = n_A - i - 1;
        int       n_behind = i;

        if ( n_behind == 0 )
        {
          // FLA_Obj_set_to_scalar( FLA_ONE, epsilon1 );
          *epsilon1 = *buff_1;
        }
        else
        {
          // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, a01_b, epsilon1 );
          // FLA_Copyt( FLA_NO_TRANSPOSE, a01_b, absv );
          // FLA_Absolute_value( absv );
          // FLA_Inv_scal( absv, epsilon1 );
          bli_ccopys( BLIS_CONJUGATE, a01_b, epsilon1 );
          bli_cabsval2( a01_b, &absv );
          bli_cinvscals( &absv, epsilon1 );

          // FLA_Scalc( FLA_NO_CONJUGATE, epsilon1, a01_b );
          // FLA_Obj_set_imag_part( FLA_ZERO, a01_b );
          bli_cscals( epsilon1, a01_b );
          a01_b->imag = *buff_0;

          // FLA_Scalc( FLA_NO_CONJUGATE, epsilon1, alpha11 );
          bli_cscals( epsilon1, alpha11 );
        }

        // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha11, delta1 );
        // FLA_Copyt( FLA_NO_TRANSPOSE, alpha11, absv );
        // FLA_Absolute_value( absv );
        // FLA_Inv_scal( absv, delta1 );
        bli_ccopys( BLIS_CONJUGATE, alpha11, delta1 );
        bli_cabsval2( alpha11, &absv );
        bli_cinvscals( &absv, delta1 );

        // FLA_Scalc( FLA_NO_CONJUGATE, delta1, alpha11 );
        // FLA_Obj_set_imag_part( FLA_ZERO, alpha11 );
        bli_cscals( delta1, alpha11 );
        alpha11->imag = *buff_0;

        if ( n_ahead > 0 )
        {
          // FLA_Scalc( FLA_NO_CONJUGATE, delta1, a12t_l );
          bli_cscals( delta1, a12t_l );
        }
      }

      break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
      dcomplex* buff_d = FLA_DOUBLE_COMPLEX_PTR( d );
      dcomplex* buff_e = FLA_DOUBLE_COMPLEX_PTR( e );
      dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
      double*   buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );

      for ( i = 0; i < min_m_n; ++i )
      {
        dcomplex* a01_b    = buff_A + (i  )*cs_A + (i-1)*rs_A;
        dcomplex* alpha11  = buff_A + (i  )*cs_A + (i  )*rs_A;
        dcomplex* a12t_l   = buff_A + (i+1)*cs_A + (i  )*rs_A;
        dcomplex* delta1   = buff_d + (i  )*inc_d;
        dcomplex* epsilon1 = buff_e + (i  )*inc_e;
        dcomplex  absv;

        int       n_ahead  = n_A - i - 1;
        int       n_behind = i;

        if ( n_behind == 0 )
        {
          // FLA_Obj_set_to_scalar( FLA_ONE, epsilon1 );
          *epsilon1 = *buff_1;
        }
        else
        {
          // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, a01_b, epsilon1 );
          // FLA_Copyt( FLA_NO_TRANSPOSE, a01_b, absv );
          // FLA_Absolute_value( absv );
          // FLA_Inv_scal( absv, epsilon1 );
          bli_zcopys( BLIS_CONJUGATE, a01_b, epsilon1 );
          bli_zabsval2( a01_b, &absv );
          bli_zinvscals( &absv, epsilon1 );

          // FLA_Scalc( FLA_NO_CONJUGATE, epsilon1, a01_b );
          // FLA_Obj_set_imag_part( FLA_ZERO, a01_b );
          bli_zscals( epsilon1, a01_b );
          a01_b->imag = *buff_0;

          // FLA_Scalc( FLA_NO_CONJUGATE, epsilon1, alpha11 );
          bli_zscals( epsilon1, alpha11 );
        }

        // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha11, delta1 );
        // FLA_Copyt( FLA_NO_TRANSPOSE, alpha11, absv );
        // FLA_Absolute_value( absv );
        // FLA_Inv_scal( absv, delta1 );
        bli_zcopys( BLIS_CONJUGATE, alpha11, delta1 );
        bli_zabsval2( alpha11, &absv );
        bli_zinvscals( &absv, delta1 );

        // FLA_Scalc( FLA_NO_CONJUGATE, delta1, alpha11 );
        // FLA_Obj_set_imag_part( FLA_ZERO, alpha11 );
        bli_zscals( delta1, alpha11 );
        alpha11->imag = *buff_0;

        if ( n_ahead > 0 )
        {
          // FLA_Scalc( FLA_NO_CONJUGATE, delta1, a12t_l );
          bli_zscals( delta1, a12t_l );
        }
      }

      break;
    }
  }

  return FLA_SUCCESS;
}
FLA_Error FLA_Bidiag_UT_u_realify_unb ( FLA_Obj  A,
FLA_Obj  d,
FLA_Obj  e 
)

References FLA_Absolute_value(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copyt(), FLA_Inv_scal(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Obj_set_imag_part(), FLA_Obj_set_to_scalar(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Scalc(), and FLA_ZERO.

{
  FLA_Obj ATL,   ATR,      A00,  a01,     A02, 
          ABL,   ABR,      a10t, alpha11, a12t,
                           A20,  a21,     A22;

  FLA_Obj dT,              d0,
          dB,              delta1,
                           d2;

  FLA_Obj eT,              e0,
          eB,              epsilon1,
                           e2;

  FLA_Obj a01_t,
          a01_b;

  FLA_Obj a12t_l, a12t_r;

  FLA_Obj absv;


  FLA_Obj_create( FLA_Obj_datatype( A ), 1, 1, 0, 0, &absv );

  FLA_Part_2x2( A,    &ATL, &ATR,
                      &ABL, &ABR,     0, 0, FLA_TL );

  FLA_Part_2x1( d,    &dT,
                      &dB,            0, FLA_TOP );

  FLA_Part_2x1( e,    &eT,
                      &eB,            0, FLA_TOP );

  while ( FLA_Obj_min_dim( ABR ) > 0 )
  {
    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00,  /**/ &a01,     &A02,
                        /* ************* */   /* ************************** */
                                                &a10t, /**/ &alpha11, &a12t,
                           ABL, /**/ ABR,       &A20,  /**/ &a21,     &A22,
                           1, 1, FLA_BR );

    FLA_Repart_2x1_to_3x1( dT,                &d0, 
                        /* ** */            /* ****** */
                                              &delta1, 
                           dB,                &d2,        1, FLA_BOTTOM );

    FLA_Repart_2x1_to_3x1( eT,                &e0, 
                        /* ** */            /* ******** */
                                              &epsilon1, 
                           eB,                &e2,        1, FLA_BOTTOM );

    /*------------------------------------------------------------*/

    if ( FLA_Obj_length( a01 ) == 0 )
    {
      // epsilon1 = 1;
      FLA_Obj_set_to_scalar( FLA_ONE, epsilon1 );
    }
    else
    {
      FLA_Part_2x1( a01,   &a01_t,
                           &a01_b,    1, FLA_BOTTOM );

      // epsilon1 = conj(a01_b) / abs(a01_b); 
      FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, a01_b, epsilon1 );
      FLA_Copyt( FLA_NO_TRANSPOSE, a01_b, absv );
      FLA_Absolute_value( absv );
      FLA_Inv_scal( absv, epsilon1 );

      // a01_b = epsilon1 * a01_b;
      // a01_b.imag = 0;
      FLA_Scalc( FLA_NO_CONJUGATE, epsilon1, a01_b );
      FLA_Obj_set_imag_part( FLA_ZERO, a01_b );

      // alpha11 = epsilon1 * alpha11;
      FLA_Scalc( FLA_NO_CONJUGATE, epsilon1, alpha11 );
    }

    // delta1 = conj(alpha11) / abs(alpha11);
    FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, alpha11, delta1 );
    FLA_Copyt( FLA_NO_TRANSPOSE, alpha11, absv );
    FLA_Absolute_value( absv );
    FLA_Inv_scal( absv, delta1 );

    // alpha11 = delta1 * alpha11;
    // alpha11.imag = 0;
    FLA_Scalc( FLA_NO_CONJUGATE, delta1, alpha11 );
    FLA_Obj_set_imag_part( FLA_ZERO, alpha11 );

    if ( FLA_Obj_width( a12t ) > 0 )
    {
      FLA_Part_1x2( a12t,   &a12t_l, &a12t_r,    1, FLA_LEFT );

      // a12t_l = delta1 * a12t_l;
      FLA_Scalc( FLA_NO_CONJUGATE, delta1, a12t_l );
    }

    /*------------------------------------------------------------*/

    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00,  a01,     /**/ A02,
                                                     a10t, alpha11, /**/ a12t,
                            /* ************** */  /* ************************ */
                              &ABL, /**/ &ABR,       A20,  a21,     /**/ A22,
                              FLA_TL );

    FLA_Cont_with_3x1_to_2x1( &dT,                d0, 
                                                  delta1, 
                            /* ** */           /* ****** */
                              &dB,                d2,     FLA_TOP );

    FLA_Cont_with_3x1_to_2x1( &eT,                e0, 
                                                  epsilon1, 
                            /* ** */           /* ******** */
                              &eB,                e2,     FLA_TOP );
  }

  FLA_Obj_free( &absv );

  return FLA_SUCCESS;
}