libflame  revision_anchor
Functions
FLA_Tridiag_UT.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLA_Tridiag_UT (FLA_Uplo uplo, FLA_Obj A, FLA_Obj T)
FLA_Error FLA_Tridiag_UT_internal (FLA_Uplo uplo, FLA_Obj A, FLA_Obj T, fla_tridiagut_t *cntl)
FLA_Error FLA_Tridiag_UT_l (FLA_Obj A, FLA_Obj T, fla_tridiagut_t *cntl)
FLA_Error FLA_Tridiag_UT_u (FLA_Obj A, FLA_Obj T, fla_tridiagut_t *cntl)
FLA_Error FLA_Tridiag_UT_create_T (FLA_Obj A, FLA_Obj *T)
FLA_Error FLA_Tridiag_UT_recover_tau (FLA_Obj T, FLA_Obj t)
FLA_Error FLA_Tridiag_UT_extract_diagonals (FLA_Uplo uplo, FLA_Obj A, FLA_Obj d, FLA_Obj e)
FLA_Error FLA_Tridiag_UT_l_extract_diagonals (FLA_Obj A, FLA_Obj d, FLA_Obj e)
FLA_Error FLA_Tridiag_UT_u_extract_diagonals (FLA_Obj A, FLA_Obj d, FLA_Obj e)
FLA_Error FLA_Tridiag_UT_realify (FLA_Uplo uplo, FLA_Obj A, FLA_Obj d)
FLA_Error FLA_Tridiag_UT_l_realify_unb (FLA_Obj A, FLA_Obj d)
FLA_Error FLA_Tridiag_UT_l_realify_opt (FLA_Obj A, FLA_Obj d)
FLA_Error FLA_Tridiag_UT_u_realify_unb (FLA_Obj A, FLA_Obj d)
FLA_Error FLA_Tridiag_UT_u_realify_opt (FLA_Obj A, FLA_Obj d)
FLA_Error FLA_Tridiag_UT_shift_U (FLA_Uplo uplo, FLA_Obj A)
FLA_Error FLA_Tridiag_UT_shift_U_l_ops (int m_A, float *buff_A, int rs_A, int cs_A)
FLA_Error FLA_Tridiag_UT_shift_U_u_ops (int m_A, float *buff_A, int rs_A, int cs_A)
FLA_Error FLA_Tridiag_UT_shift_U_l_opd (int m_A, double *buff_A, int rs_A, int cs_A)
FLA_Error FLA_Tridiag_UT_shift_U_u_opd (int m_A, double *buff_A, int rs_A, int cs_A)
FLA_Error FLA_Tridiag_UT_shift_U_l_opc (int m_A, scomplex *buff_A, int rs_A, int cs_A)
FLA_Error FLA_Tridiag_UT_shift_U_u_opc (int m_A, scomplex *buff_A, int rs_A, int cs_A)
FLA_Error FLA_Tridiag_UT_shift_U_l_opz (int m_A, dcomplex *buff_A, int rs_A, int cs_A)
FLA_Error FLA_Tridiag_UT_shift_U_u_opz (int m_A, dcomplex *buff_A, int rs_A, int cs_A)
FLA_Error FLA_Tridiag_UT_form_Q (FLA_Uplo uplo, FLA_Obj A, FLA_Obj T)
FLA_Error FLA_Tridiag_UT_form_Q_l_blk_var1 (FLA_Obj A, FLA_Obj T, FLA_Obj W)
FLA_Error FLA_Tridiag_UT_form_Q_u_blk_var1 (FLA_Obj A, FLA_Obj T, FLA_Obj W)
FLA_Error FLA_Tridiag_UT_form_Q_l_opt_var1 (FLA_Obj A, FLA_Obj T)
FLA_Error FLA_Tridiag_UT_form_Q_l_ops_var1 (int m_A, int n_AT, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
FLA_Error FLA_Tridiag_UT_form_Q_l_opd_var1 (int m_A, int n_AT, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
FLA_Error FLA_Tridiag_UT_form_Q_l_opc_var1 (int m_A, int n_AT, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
FLA_Error FLA_Tridiag_UT_form_Q_l_opz_var1 (int m_A, int n_AT, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)

Function Documentation

References FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_min_dim(), FLA_Obj_row_stride(), and FLA_Query_blocksize().

Referenced by FLA_Hevd_lv_unb_var1(), and FLA_Hevd_lv_unb_var2().

{
  FLA_Datatype datatype;
  dim_t        b_alg, k;
  dim_t        rs_T, cs_T;

  // Query the datatype of A.
  datatype = FLA_Obj_datatype( A );

  // Query the blocksize from the library.
  b_alg = FLA_Query_blocksize( datatype, FLA_DIMENSION_MIN );

  // Scale the blocksize by a pre-set global constant.
  b_alg = ( dim_t )( ( ( double ) b_alg ) * FLA_TRIDIAG_INNER_TO_OUTER_B_RATIO );

  // Query the minimum dimension of A.
  k = FLA_Obj_min_dim( A );

  // Figure out whether T should be row-major or column-major.
  if ( FLA_Obj_row_stride( A ) == 1 )
  {
    rs_T = 1;
    cs_T = b_alg;
  }
  else // if ( FLA_Obj_col_stride( A ) == 1 )
  {
    rs_T = k;
    cs_T = 1;
  }

  // Create a b_alg x k matrix to hold the block Householder transforms that
  // will be accumulated within the tridiagonal reduction algorithm.
  FLA_Obj_create( datatype, b_alg, k, rs_T, cs_T, T );

  return FLA_SUCCESS;
}

References FLA_Check_error_level(), FLA_Tridiag_UT_extract_diagonals_check(), and FLA_Tridiag_UT_l_extract_diagonals().

Referenced by FLA_Hevd_lv_unb_var1(), and FLA_Hevd_lv_unb_var2().

{
  FLA_Error r_val = FLA_SUCCESS;

  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
    FLA_Tridiag_UT_extract_diagonals_check( uplo, A, d, e );

  if ( uplo == FLA_LOWER_TRIANGULAR )
    r_val = FLA_Tridiag_UT_l_extract_diagonals( A, d, e );
  //else
  //  FLA_Tridiag_UT_u_extract_diagonals( A, d, e );

  return r_val;
}

References FLA_Check_error_level(), FLA_Part_1x2(), FLA_Part_2x2(), FLA_QR_UT_form_Q(), FLA_Tridiag_UT_form_Q_check(), and FLA_Tridiag_UT_shift_U().

Referenced by FLA_Hevd_lv_unb_var1(), and FLA_Hevd_lv_unb_var2().

{
    FLA_Error r_val = FLA_SUCCESS;
    FLA_Obj   ATL, ATR,
              ABL, ABR;
    FLA_Obj   TL,  TR;

    if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
        FLA_Tridiag_UT_form_Q_check( uplo, A, T );

    // Shift the Householder vectors one row/column towards the diagonal.
    FLA_Tridiag_UT_shift_U( uplo, A );

    FLA_Part_2x2( A,    &ATL, &ATR,
                        &ABL, &ABR,    1, 1, FLA_TL );
    FLA_Part_1x2( T,    &TL, &TR,     1, FLA_RIGHT );

    if ( uplo == FLA_LOWER_TRIANGULAR )
    {
        FLA_QR_UT_form_Q( ABR, TL, ABR );
    }
    else // if ( uplo == FLA_UPPER_TRIANGULAR )
    {
        //FLA_LQ_UT_form_Q( A, T, A );
    }

    return r_val;
}
FLA_Error FLA_Tridiag_UT_form_Q_l_opc_var1 ( int  m_A,
int  n_AT,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
FLA_Error FLA_Tridiag_UT_form_Q_l_opd_var1 ( int  m_A,
int  n_AT,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_T,
int  rs_T,
int  cs_T 
)
FLA_Error FLA_Tridiag_UT_form_Q_l_ops_var1 ( int  m_A,
int  n_AT,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_T,
int  rs_T,
int  cs_T 
)
FLA_Error FLA_Tridiag_UT_form_Q_l_opz_var1 ( int  m_A,
int  n_AT,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)

References FLA_Check_error_level(), FLA_Tridiag_UT_internal_check(), FLA_Tridiag_UT_l(), and FLA_Tridiag_UT_u().

Referenced by FLA_Tridiag_UT().

{
    FLA_Error r_val = FLA_SUCCESS;
    
    if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING )
        FLA_Tridiag_UT_internal_check( uplo, A, T, cntl );

    if ( uplo == FLA_LOWER_TRIANGULAR )
    {
        r_val = FLA_Tridiag_UT_l( A, T, cntl );
    }
    else // if ( uplo == FLA_UPPER_TRIANGULAR )
    {
        r_val = FLA_Tridiag_UT_u( A, T, cntl );
    }

    return r_val;
}

References FLA_Tridiag_UT_l_blf_var2(), FLA_Tridiag_UT_l_blf_var3(), FLA_Tridiag_UT_l_blk_var1(), FLA_Tridiag_UT_l_blk_var2(), FLA_Tridiag_UT_l_blk_var3(), FLA_Tridiag_UT_l_opt_var1(), FLA_Tridiag_UT_l_opt_var2(), FLA_Tridiag_UT_l_opt_var3(), FLA_Tridiag_UT_l_unb_var1(), FLA_Tridiag_UT_l_unb_var2(), and FLA_Tridiag_UT_l_unb_var3().

Referenced by FLA_Tridiag_UT_internal().

{
    FLA_Error r_val = FLA_SUCCESS;
    
    if      ( FLA_Cntl_variant( cntl ) == FLA_UNBLOCKED_VARIANT1 )
    {
        r_val = FLA_Tridiag_UT_l_unb_var1( A, T );
    }
    else if ( FLA_Cntl_variant( cntl ) == FLA_UNBLOCKED_VARIANT2 )
    {
        r_val = FLA_Tridiag_UT_l_unb_var2( A, T );
    }
    else if ( FLA_Cntl_variant( cntl ) == FLA_UNBLOCKED_VARIANT3 )
    {
        r_val = FLA_Tridiag_UT_l_unb_var3( A, T );
    }
    else if ( FLA_Cntl_variant( cntl ) == FLA_UNB_OPT_VARIANT1 )
    {
        r_val = FLA_Tridiag_UT_l_opt_var1( A, T );
    }
    else if ( FLA_Cntl_variant( cntl ) == FLA_UNB_OPT_VARIANT2 )
    {
        r_val = FLA_Tridiag_UT_l_opt_var2( A, T );
    }
    else if ( FLA_Cntl_variant( cntl ) == FLA_UNB_OPT_VARIANT3 )
    {
        r_val = FLA_Tridiag_UT_l_opt_var3( A, T );
    }
    else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT1 )
    {
        r_val = FLA_Tridiag_UT_l_blk_var1( A, T );
    }
    else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT2 )
    {
        r_val = FLA_Tridiag_UT_l_blk_var2( A, T );
    }
    else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT3 )
    {
        r_val = FLA_Tridiag_UT_l_blk_var3( A, T );
    }
    else if ( FLA_Cntl_variant( cntl ) == FLA_BLK_FUS_VARIANT2 )
    {
        r_val = FLA_Tridiag_UT_l_blf_var2( A, T );
    }
    else if ( FLA_Cntl_variant( cntl ) == FLA_BLK_FUS_VARIANT3 )
    {
        r_val = FLA_Tridiag_UT_l_blf_var3( A, T );
    }
    else
    {
        FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
    }

    return r_val;
}

References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), scomplex::real, and dcomplex::real.

Referenced by FLA_Tridiag_UT_extract_diagonals().

{
  FLA_Datatype datatype;
  int          m_A;
  int          rs_A, cs_A;
  int          inc_d;
  int          inc_e;
  int          i;

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );

  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  inc_d    = FLA_Obj_vector_inc( d );

  inc_e    = FLA_Obj_vector_inc( e );

  switch ( datatype )
  {
    case FLA_FLOAT:
    {
      float*    buff_A = FLA_FLOAT_PTR( A );
      float*    buff_d = FLA_FLOAT_PTR( d );
      float*    buff_e = FLA_FLOAT_PTR( e );

      for ( i = 0; i < m_A; ++i )
      {
        float*    alpha11  = buff_A + (i  )*cs_A + (i  )*rs_A;
        float*    a21_t    = buff_A + (i  )*cs_A + (i+1)*rs_A;
        float*    delta1   = buff_d + (i  )*inc_d;
        float*    epsilon1 = buff_e + (i  )*inc_e;

        int       m_ahead  = m_A - i - 1;

        // delta1 = alpha11;
        *delta1 = *alpha11;

        // epsilon1 = a21_t;
        if ( m_ahead > 0 )
          *epsilon1 = *a21_t;
      }

      break;
    }

    case FLA_DOUBLE:
    {
      double*   buff_A = FLA_DOUBLE_PTR( A );
      double*   buff_d = FLA_DOUBLE_PTR( d );
      double*   buff_e = FLA_DOUBLE_PTR( e );

      for ( i = 0; i < m_A; ++i )
      {
        double*   alpha11  = buff_A + (i  )*cs_A + (i  )*rs_A;
        double*   a21_t    = buff_A + (i  )*cs_A + (i+1)*rs_A;
        double*   delta1   = buff_d + (i  )*inc_d;
        double*   epsilon1 = buff_e + (i  )*inc_e;

        int       m_ahead  = m_A - i - 1;

        // delta1 = alpha11;
        *delta1 = *alpha11;

        // epsilon1 = a21_t;
        if ( m_ahead > 0 )
          *epsilon1 = *a21_t;
      }

      break;
    }

    case FLA_COMPLEX:
    {
      scomplex* buff_A = FLA_COMPLEX_PTR( A );
      float*    buff_d = FLA_FLOAT_PTR( d );
      float*    buff_e = FLA_FLOAT_PTR( e );

      for ( i = 0; i < m_A; ++i )
      {
        scomplex* alpha11  = buff_A + (i  )*cs_A + (i  )*rs_A;
        scomplex* a21_t    = buff_A + (i  )*cs_A + (i+1)*rs_A;
        float*    delta1   = buff_d + (i  )*inc_d;
        float*    epsilon1 = buff_e + (i  )*inc_e;

        int       m_ahead  = m_A - i - 1;

        // delta1 = alpha11;
        *delta1 = alpha11->real;

        // epsilon1 = a21_t;
        if ( m_ahead > 0 )
          *epsilon1 = a21_t->real;
      }

      break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
      double*   buff_d = FLA_DOUBLE_PTR( d );
      double*   buff_e = FLA_DOUBLE_PTR( e );

      for ( i = 0; i < m_A; ++i )
      {
        dcomplex* alpha11  = buff_A + (i  )*cs_A + (i  )*rs_A;
        dcomplex* a21_t    = buff_A + (i  )*cs_A + (i+1)*rs_A;
        double*   delta1   = buff_d + (i  )*inc_d;
        double*   epsilon1 = buff_e + (i  )*inc_e;

        int       m_ahead  = m_A - i - 1;

        // delta1 = alpha11;
        *delta1 = alpha11->real;

        // epsilon1 = a21_t;
        if ( m_ahead > 0 )
          *epsilon1 = a21_t->real;
      }

      break;
    }
  }

  return FLA_SUCCESS;
}

References bli_csetv(), bli_dsetv(), bli_ssetv(), bli_zsetv(), BLIS_CONJUGATE, FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_ONE.

Referenced by FLA_Tridiag_UT_realify().

{
  FLA_Datatype datatype;
  int          m_A;
  int          rs_A, cs_A;
  int          inc_d;
  int          i;

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );

  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  inc_d    = FLA_Obj_vector_inc( d );


  switch ( datatype )
  {
    case FLA_FLOAT:
    {
      float* buff_d = FLA_FLOAT_PTR( d );
      float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );

      bli_ssetv( m_A,
                 buff_1,
                 buff_d, inc_d );

      break;
    }

    case FLA_DOUBLE:
    {
      double* buff_d = FLA_DOUBLE_PTR( d );
      double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );

      bli_dsetv( m_A,
                 buff_1,
                 buff_d, inc_d );

      break;
    }

    case FLA_COMPLEX:
    {
      scomplex* buff_A = FLA_COMPLEX_PTR( A );
      scomplex* buff_d = FLA_COMPLEX_PTR( d );
      scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );

      bli_csetv( 1,
                 buff_1,
                 buff_d, inc_d );

      for ( i = 1; i < m_A; ++i )
      {
        scomplex* a10t_r   = buff_A + (i-1)*cs_A + (i  )*rs_A;
        scomplex* a21_t    = buff_A + (i  )*cs_A + (i+1)*rs_A;
        scomplex* delta1   = buff_d + (i  )*inc_d;
        scomplex  absv;
        scomplex  conj_delta1;

        int       m_ahead  = m_A - i - 1;

        // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, a10t_r, delta1 );
        // FLA_Copyt( FLA_NO_TRANSPOSE, a10t_r, absv );
        // FLA_Absolute_value( absv );
        // FLA_Inv_scal( absv, delta1 );
        bli_ccopys( BLIS_CONJUGATE, a10t_r, delta1 );
        bli_cabsval2( a10t_r, &absv );
        bli_cinvscals( &absv, delta1 );

        // FLA_Copyt( FLA_NO_TRANSPOSE, absv, a10t_r );
        // FLA_Scalc( FLA_CONJUGATE, delta1, a21_t );
        *a10t_r = absv;
        if ( m_ahead > 0 )
        {
          bli_ccopyconj( delta1, &conj_delta1 );
          bli_cscals( &conj_delta1, a21_t );
        }
      }

      break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
      dcomplex* buff_d = FLA_DOUBLE_COMPLEX_PTR( d );
      dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );

      bli_zsetv( 1,
                 buff_1,
                 buff_d, inc_d );

      for ( i = 1; i < m_A; ++i )
      {
        dcomplex* a10t_r   = buff_A + (i-1)*cs_A + (i  )*rs_A;
        dcomplex* a21_t    = buff_A + (i  )*cs_A + (i+1)*rs_A;
        dcomplex* delta1   = buff_d + (i  )*inc_d;
        dcomplex  absv;
        dcomplex  conj_delta1;

        int       m_ahead  = m_A - i - 1;

        // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, a10t_r, delta1 );
        // FLA_Copyt( FLA_NO_TRANSPOSE, a10t_r, absv );
        // FLA_Absolute_value( absv );
        // FLA_Inv_scal( absv, delta1 );
        bli_zcopys( BLIS_CONJUGATE, a10t_r, delta1 );
        bli_zabsval2( a10t_r, &absv );
        bli_zinvscals( &absv, delta1 );

        // FLA_Copyt( FLA_NO_TRANSPOSE, absv, a10t_r );
        // FLA_Scalc( FLA_CONJUGATE, delta1, a21_t );
        *a10t_r = absv;
        if ( m_ahead > 0 )
        {
          bli_zcopyconj( delta1, &conj_delta1 );
          bli_zscals( &conj_delta1, a21_t );
        }
      }

      break;
    }
  }

  return FLA_SUCCESS;
}

References FLA_Absolute_value(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copyt(), FLA_Inv_scal(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_min_dim(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Scalc(), and FLA_Set().

{
  FLA_Obj ATL,   ATR,      A00,  a01,     A02, 
          ABL,   ABR,      a10t, alpha11, a12t,
                           A20,  a21,     A22;

  FLA_Obj dT,              d0,
          dB,              delta1,
                           d2;

  FLA_Obj a10t_l, a10t_r;

  FLA_Obj a21_t,
          a21_b;

  FLA_Obj absv;


  FLA_Obj_create( FLA_Obj_datatype( A ), 1, 1, 0, 0, &absv );

  FLA_Part_2x2( A,    &ATL, &ATR,
                      &ABL, &ABR,     1, 1, FLA_TL );

  FLA_Part_2x1( d,    &dT,
                      &dB,            1, FLA_TOP );

  // Set first element of vector d to one.
  FLA_Set( FLA_ONE, dT );

  while ( FLA_Obj_min_dim( ABR ) > 0 )
  {
    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00,  /**/ &a01,     &A02,
                        /* ************* */   /* ************************** */
                                                &a10t, /**/ &alpha11, &a12t,
                           ABL, /**/ ABR,       &A20,  /**/ &a21,     &A22,
                           1, 1, FLA_BR );

    FLA_Repart_2x1_to_3x1( dT,                &d0, 
                        /* ** */            /* ****** */
                                              &delta1, 
                           dB,                &d2,        1, FLA_BOTTOM );

    /*------------------------------------------------------------*/

    FLA_Part_1x2( a10t,   &a10t_l, &a10t_r,    1, FLA_RIGHT );

    FLA_Part_2x1( a21,    &a21_t,
                          &a21_b,              1, FLA_TOP );

    // delta1 = conj(a10t_r) / abs(a10t_r);
    FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, a10t_r, delta1 );
    FLA_Copyt( FLA_NO_TRANSPOSE, a10t_r, absv );
    FLA_Absolute_value( absv );
    FLA_Inv_scal( absv, delta1 );

    // a10t_r  = delta1 * a10t_r;
    //         = abs(a10t_r);
    // alpha11 = delta1 * alpha11 * conj(delta1);
    //         = alpha11;
    // a21_t   = a21_t * conj(delta1);
    FLA_Copyt( FLA_NO_TRANSPOSE, absv, a10t_r );
    FLA_Scalc( FLA_CONJUGATE, delta1, a21_t );

    /*------------------------------------------------------------*/

    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00,  a01,     /**/ A02,
                                                     a10t, alpha11, /**/ a12t,
                            /* ************** */  /* ************************ */
                              &ABL, /**/ &ABR,       A20,  a21,     /**/ A22,
                              FLA_TL );

    FLA_Cont_with_3x1_to_2x1( &dT,                d0, 
                                                  delta1, 
                            /* ** */           /* ****** */
                              &dB,                d2,     FLA_TOP );
  }

  FLA_Obj_free( &absv );

  return FLA_SUCCESS;
}

References FLA_Check_error_level(), FLA_Obj_is_real(), FLA_ONE, FLA_Set(), FLA_Tridiag_UT_l_realify_opt(), FLA_Tridiag_UT_realify_check(), and FLA_Tridiag_UT_u_realify_opt().

Referenced by FLA_Hevd_lv_unb_var1(), and FLA_Hevd_lv_unb_var2().

{
  FLA_Error r_val = FLA_SUCCESS;

  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
    FLA_Tridiag_UT_realify_check( uplo, A, d );

  if ( FLA_Obj_is_real( A ) )
  {
    FLA_Set( FLA_ONE, d );
    return FLA_SUCCESS;
  }

  if ( uplo == FLA_LOWER_TRIANGULAR )
    //r_val = FLA_Tridiag_UT_l_realify_unb( A, d );
    r_val = FLA_Tridiag_UT_l_realify_opt( A, d );
  else
    //r_val = FLA_Tridiag_UT_u_realify_unb( A, d );
    r_val = FLA_Tridiag_UT_u_realify_opt( A, d );

  return r_val;
}

References FLA_Check_error_level(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Obj_length(), FLA_Part_1x2(), FLA_Part_2x1(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Tridiag_UT_recover_tau_check(), and FLA_Tridiag_UT_recover_tau_submatrix().

{
  FLA_Obj TL,    TR,       T0,  T1,  T2;

  FLA_Obj tT,              t0,
          tB,              t1,
                           t2;

  dim_t b_alg, b;

  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
    FLA_Tridiag_UT_recover_tau_check( T, t );

  b_alg = FLA_Obj_length( T );

  FLA_Part_1x2( T,    &TL,  &TR,      0, FLA_LEFT );

  FLA_Part_2x1( t,    &tT, 
                      &tB,            0, FLA_TOP );

  while ( FLA_Obj_length( tT ) < FLA_Obj_length( t ) ){

    b = min( FLA_Obj_length( tB ), b_alg );

    FLA_Repart_1x2_to_1x3( TL,  /**/ TR,        &T0, /**/ &T1, &T2,
                           b, FLA_RIGHT );

    FLA_Repart_2x1_to_3x1( tT,                &t0, 
                        /* ** */            /* ** */
                                              &t1, 
                           tB,                &t2,        b, FLA_BOTTOM );

    /*------------------------------------------------------------*/

    FLA_Tridiag_UT_recover_tau_submatrix( T1, t1 );

    /*------------------------------------------------------------*/

    FLA_Cont_with_1x3_to_1x2( &TL,  /**/ &TR,        T0, T1, /**/ T2,
                              FLA_LEFT );

    FLA_Cont_with_3x1_to_2x1( &tT,                t0, 
                                                  t1, 
                            /* ** */           /* ** */
                              &tB,                t2,     FLA_TOP );
  }

  return FLA_SUCCESS;
}

References FLA_Check_error_level(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Tridiag_UT_shift_U_check(), FLA_Tridiag_UT_shift_U_l_opc(), FLA_Tridiag_UT_shift_U_l_opd(), FLA_Tridiag_UT_shift_U_l_ops(), and FLA_Tridiag_UT_shift_U_l_opz().

Referenced by FLA_Tridiag_UT_form_Q().

{
    FLA_Datatype datatype;
    int          m_A;
    int          rs_A, cs_A;

    datatype = FLA_Obj_datatype( A );
    m_A      = FLA_Obj_length( A );
    rs_A     = FLA_Obj_row_stride( A );
    cs_A     = FLA_Obj_col_stride( A );

    if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
        FLA_Tridiag_UT_shift_U_check( uplo, A );

    switch ( datatype )
    {
        case FLA_FLOAT:
        {
            float*    buff_A = ( float* ) FLA_FLOAT_PTR( A );

            if ( uplo == FLA_LOWER_TRIANGULAR )
                FLA_Tridiag_UT_shift_U_l_ops( m_A,
                                              buff_A, rs_A, cs_A );
            //else // if ( uplo == FLA_UPPER_TRIANGULAR )
            //  FLA_Tridiag_UT_shift_U_u_ops( m_A,
            //                                buff_A, rs_A, cs_A );

            break;
        }

        case FLA_DOUBLE:
        {
            double*   buff_A = ( double* ) FLA_DOUBLE_PTR( A );

            if ( uplo == FLA_LOWER_TRIANGULAR )
                FLA_Tridiag_UT_shift_U_l_opd( m_A,
                                              buff_A, rs_A, cs_A );
            //else // if ( uplo == FLA_UPPER_TRIANGULAR )
            //  FLA_Tridiag_UT_shift_U_u_opd( m_A,
            //                                buff_A, rs_A, cs_A );

            break;
        }

        case FLA_COMPLEX:
        {
            scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );

            if ( uplo == FLA_LOWER_TRIANGULAR )
                FLA_Tridiag_UT_shift_U_l_opc( m_A,
                                              buff_A, rs_A, cs_A );
            //else // if ( uplo == FLA_UPPER_TRIANGULAR )
            //  FLA_Tridiag_UT_shift_U_u_opc( m_A,
            //                                buff_A, rs_A, cs_A );

            break;
        }

        case FLA_DOUBLE_COMPLEX:
        {
            dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );

            if ( uplo == FLA_LOWER_TRIANGULAR )
                FLA_Tridiag_UT_shift_U_l_opz( m_A,
                                              buff_A, rs_A, cs_A );
            //else // if ( uplo == FLA_UPPER_TRIANGULAR )
            //  FLA_Tridiag_UT_shift_U_u_opz( m_A,
            //                                buff_A, rs_A, cs_A );

            break;
        }
    }

    return FLA_SUCCESS;
}
FLA_Error FLA_Tridiag_UT_shift_U_l_opc ( int  m_A,
scomplex buff_A,
int  rs_A,
int  cs_A 
)

References bli_c0(), bli_c1(), bli_ccopyv(), bli_csetv(), and BLIS_NO_CONJUGATE.

Referenced by FLA_Tridiag_UT_shift_U().

{
    scomplex* a00  = buff_A;
    scomplex* a10  = buff_A + rs_A;
    scomplex  zero = bli_c0();
    scomplex  one  = bli_c1();
    int       j;

    for ( j = m_A - 1; j > 0; --j )
    {
        scomplex* alpha01  = buff_A + (j  )*cs_A + (0  )*rs_A;
        scomplex* alpha11  = buff_A + (j  )*cs_A + (j  )*rs_A;
        scomplex* a20      = buff_A + (j-1)*cs_A + (j+1)*rs_A;
        scomplex* a21      = buff_A + (j  )*cs_A + (j+1)*rs_A;

        int       m_ahead  = m_A - j - 1;

        *alpha01 = zero;
        *alpha11 = one;
        bli_ccopyv( BLIS_NO_CONJUGATE,
                    m_ahead,
                    a20, rs_A,
                    a21, rs_A );
    }

    *a00 = one;
    bli_csetv( m_A - 1,
               &zero,
               a10, rs_A );

    return FLA_SUCCESS;
}
FLA_Error FLA_Tridiag_UT_shift_U_l_opd ( int  m_A,
double *  buff_A,
int  rs_A,
int  cs_A 
)

References bli_d0(), bli_d1(), bli_dcopyv(), bli_dsetv(), and BLIS_NO_CONJUGATE.

Referenced by FLA_Tridiag_UT_shift_U().

{
    double* a00  = buff_A;
    double* a10  = buff_A + rs_A;
    double  zero = bli_d0();
    double  one  = bli_d1();
    int     j;

    for ( j = m_A - 1; j > 0; --j )
    {
        double*   alpha01  = buff_A + (j  )*cs_A + (0  )*rs_A;
        double*   alpha11  = buff_A + (j  )*cs_A + (j  )*rs_A;
        double*   a20      = buff_A + (j-1)*cs_A + (j+1)*rs_A;
        double*   a21      = buff_A + (j  )*cs_A + (j+1)*rs_A;

        int       m_ahead  = m_A - j - 1;

        *alpha01 = zero;
        *alpha11 = one;
        bli_dcopyv( BLIS_NO_CONJUGATE,
                    m_ahead,
                    a20, rs_A,
                    a21, rs_A );
    }

    *a00 = one;
    bli_dsetv( m_A - 1,
               &zero,
               a10, rs_A );

    return FLA_SUCCESS;
}
FLA_Error FLA_Tridiag_UT_shift_U_l_ops ( int  m_A,
float *  buff_A,
int  rs_A,
int  cs_A 
)

References bli_s0(), bli_s1(), bli_scopyv(), bli_ssetv(), and BLIS_NO_CONJUGATE.

Referenced by FLA_Tridiag_UT_shift_U().

{
    float*  a00  = buff_A;
    float*  a10  = buff_A + rs_A;
    float   zero = bli_s0();
    float   one  = bli_s1();
    int     j;

    for ( j = m_A - 1; j > 0; --j )
    {
        float*    alpha01  = buff_A + (j  )*cs_A + (0  )*rs_A;
        float*    alpha11  = buff_A + (j  )*cs_A + (j  )*rs_A;
        float*    a20      = buff_A + (j-1)*cs_A + (j+1)*rs_A;
        float*    a21      = buff_A + (j  )*cs_A + (j+1)*rs_A;

        int       m_ahead  = m_A - j - 1;

        *alpha01 = zero;
        *alpha11 = one;
        bli_scopyv( BLIS_NO_CONJUGATE,
                    m_ahead,
                    a20, rs_A,
                    a21, rs_A );
    }

    *a00 = one;
    bli_ssetv( m_A - 1,
               &zero,
               a10, rs_A );

    return FLA_SUCCESS;
}
FLA_Error FLA_Tridiag_UT_shift_U_l_opz ( int  m_A,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)

References bli_z0(), bli_z1(), bli_zcopyv(), bli_zsetv(), and BLIS_NO_CONJUGATE.

Referenced by FLA_Tridiag_UT_shift_U().

{
    dcomplex* a00  = buff_A;
    dcomplex* a10  = buff_A + rs_A;
    dcomplex  zero = bli_z0();
    dcomplex  one  = bli_z1();
    int       j;

    for ( j = m_A - 1; j > 0; --j )
    {
        dcomplex* alpha01  = buff_A + (j  )*cs_A + (0  )*rs_A;
        dcomplex* alpha11  = buff_A + (j  )*cs_A + (j  )*rs_A;
        dcomplex* a20      = buff_A + (j-1)*cs_A + (j+1)*rs_A;
        dcomplex* a21      = buff_A + (j  )*cs_A + (j+1)*rs_A;

        int       m_ahead  = m_A - j - 1;

        *alpha01 = zero;
        *alpha11 = one;
        bli_zcopyv( BLIS_NO_CONJUGATE,
                    m_ahead,
                    a20, rs_A,
                    a21, rs_A );
    }

    *a00 = one;
    bli_zsetv( m_A - 1,
               &zero,
               a10, rs_A );

    return FLA_SUCCESS;
}
FLA_Error FLA_Tridiag_UT_shift_U_u_opc ( int  m_A,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
FLA_Error FLA_Tridiag_UT_shift_U_u_opd ( int  m_A,
double *  buff_A,
int  rs_A,
int  cs_A 
)
FLA_Error FLA_Tridiag_UT_shift_U_u_ops ( int  m_A,
float *  buff_A,
int  rs_A,
int  cs_A 
)
FLA_Error FLA_Tridiag_UT_shift_U_u_opz ( int  m_A,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)

Referenced by FLA_Tridiag_UT_internal().

{
    FLA_Error r_val = FLA_SUCCESS;
    
/*
    if      ( FLA_Cntl_variant( cntl ) == FLA_UNBLOCKED_VARIANT1 )
    {
        r_val = FLA_Tridiag_UT_u_unb_var1( A, T );
    }
    else if ( FLA_Cntl_variant( cntl ) == FLA_UNBLOCKED_VARIANT2 )
    {
        r_val = FLA_Tridiag_UT_u_unb_var2( A, T );
    }
    else if ( FLA_Cntl_variant( cntl ) == FLA_UNBLOCKED_VARIANT3 )
    {
        r_val = FLA_Tridiag_UT_u_unb_var3( A, T );
    }
    else if ( FLA_Cntl_variant( cntl ) == FLA_UNB_OPT_VARIANT1 )
    {
        r_val = FLA_Tridiag_UT_u_opt_var1( A, T );
    }
    else if ( FLA_Cntl_variant( cntl ) == FLA_UNB_OPT_VARIANT2 )
    {
        r_val = FLA_Tridiag_UT_u_opt_var2( A, T );
    }
    else if ( FLA_Cntl_variant( cntl ) == FLA_UNB_OPT_VARIANT3 )
    {
        r_val = FLA_Tridiag_UT_u_opt_var3( A, T );
    }
    else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT1 )
    {
        r_val = FLA_Tridiag_UT_u_blk_var1( A, T );
    }
    else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT2 )
    {
        r_val = FLA_Tridiag_UT_u_blk_var2( A, T );
    }
    else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT3 )
    {
        r_val = FLA_Tridiag_UT_u_blk_var3( A, T );
    }
    else
*/
    {
        FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
    }

    return r_val;
}

References bli_csetv(), bli_dsetv(), bli_ssetv(), bli_zsetv(), BLIS_CONJUGATE, FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_ONE.

Referenced by FLA_Tridiag_UT_realify().

{
  FLA_Datatype datatype;
  int          m_A;
  int          rs_A, cs_A;
  int          inc_d;
  int          i;

  datatype = FLA_Obj_datatype( A );

  m_A      = FLA_Obj_length( A );

  rs_A     = FLA_Obj_row_stride( A );
  cs_A     = FLA_Obj_col_stride( A );

  inc_d    = FLA_Obj_vector_inc( d );


  switch ( datatype )
  {
    case FLA_FLOAT:
    {
      float* buff_d = FLA_FLOAT_PTR( d );
      float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );

      bli_ssetv( m_A,
                 buff_1,
                 buff_d, inc_d );

      break;
    }

    case FLA_DOUBLE:
    {
      double* buff_d = FLA_DOUBLE_PTR( d );
      double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );

      bli_dsetv( m_A,
                 buff_1,
                 buff_d, inc_d );

      break;
    }

    case FLA_COMPLEX:
    {
      scomplex* buff_A = FLA_COMPLEX_PTR( A );
      scomplex* buff_d = FLA_COMPLEX_PTR( d );
      scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );

      bli_csetv( 1,
                 buff_1,
                 buff_d, inc_d );

      for ( i = 1; i < m_A; ++i )
      {
        scomplex* a01_b    = buff_A + (i  )*cs_A + (i-1)*rs_A;
        scomplex* a12t_l   = buff_A + (i+1)*cs_A + (i  )*rs_A;
        scomplex* delta1   = buff_d + (i  )*inc_d;
        scomplex  absv;
        scomplex  conj_delta1;

        int       m_ahead  = m_A - i - 1;

        // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, a01_b, delta1 );
        // FLA_Copyt( FLA_NO_TRANSPOSE, a01_b, absv );
        // FLA_Absolute_value( absv );
        // FLA_Inv_scal( absv, delta1 );
        bli_ccopys( BLIS_CONJUGATE, a01_b, delta1 );
        bli_cabsval2( a01_b, &absv );
        bli_cinvscals( &absv, delta1 );

        // FLA_Copyt( FLA_NO_TRANSPOSE, absv, a01_b );
        // FLA_Scalc( FLA_CONJUGATE, delta1, a12t_l );
        *a01_b = absv;
        if ( m_ahead > 0 )
        {
          bli_ccopyconj( delta1, &conj_delta1 );
          bli_cscals( &conj_delta1, a12t_l );
        }
      }

      break;
    }

    case FLA_DOUBLE_COMPLEX:
    {
      dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
      dcomplex* buff_d = FLA_DOUBLE_COMPLEX_PTR( d );
      dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );

      bli_zsetv( 1,
                 buff_1,
                 buff_d, inc_d );

      for ( i = 1; i < m_A; ++i )
      {
        dcomplex* a01_b    = buff_A + (i  )*cs_A + (i-1)*rs_A;
        dcomplex* a12t_l   = buff_A + (i+1)*cs_A + (i  )*rs_A;
        dcomplex* delta1   = buff_d + (i  )*inc_d;
        dcomplex  absv;
        dcomplex  conj_delta1;

        int       m_ahead  = m_A - i - 1;

        // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, a01_b, delta1 );
        // FLA_Copyt( FLA_NO_TRANSPOSE, a01_b, absv );
        // FLA_Absolute_value( absv );
        // FLA_Inv_scal( absv, delta1 );
        bli_zcopys( BLIS_CONJUGATE, a01_b, delta1 );
        bli_zabsval2( a01_b, &absv );
        bli_zinvscals( &absv, delta1 );

        // FLA_Copyt( FLA_NO_TRANSPOSE, absv, a01_b );
        // FLA_Scalc( FLA_CONJUGATE, delta1, a12t_l );
        *a01_b = absv;
        if ( m_ahead > 0 )
        {
          bli_zcopyconj( delta1, &conj_delta1 );
          bli_zscals( &conj_delta1, a12t_l );
        }
      }

      break;
    }
  }

  return FLA_SUCCESS;
}

References FLA_Absolute_value(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copyt(), FLA_Inv_scal(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_min_dim(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Scalc(), and FLA_Set().

{
  FLA_Obj ATL,   ATR,      A00,  a01,     A02, 
          ABL,   ABR,      a10t, alpha11, a12t,
                           A20,  a21,     A22;

  FLA_Obj dT,              d0,
          dB,              delta1,
                           d2;

  FLA_Obj a01_t,
          a01_b;

  FLA_Obj a12t_l, a12t_r;


  FLA_Obj absv;


  FLA_Obj_create( FLA_Obj_datatype( A ), 1, 1, 0, 0, &absv );

  FLA_Part_2x2( A,    &ATL, &ATR,
                      &ABL, &ABR,     1, 1, FLA_TL );

  FLA_Part_2x1( d,    &dT,
                      &dB,            1, FLA_TOP );

  // Set first element of vector d to one.
  FLA_Set( FLA_ONE, dT );

  while ( FLA_Obj_min_dim( ABR ) > 0 )
  {
    FLA_Repart_2x2_to_3x3( ATL, /**/ ATR,       &A00,  /**/ &a01,     &A02,
                        /* ************* */   /* ************************** */
                                                &a10t, /**/ &alpha11, &a12t,
                           ABL, /**/ ABR,       &A20,  /**/ &a21,     &A22,
                           1, 1, FLA_BR );

    FLA_Repart_2x1_to_3x1( dT,                &d0, 
                        /* ** */            /* ****** */
                                              &delta1, 
                           dB,                &d2,        1, FLA_BOTTOM );

    /*------------------------------------------------------------*/

    FLA_Part_2x1( a01,    &a01_t,
                          &a01_b,              1, FLA_BOTTOM );

    FLA_Part_1x2( a12t,   &a12t_l, &a12t_r,    1, FLA_LEFT );

    // delta1 = conj(a01_b) / abs(a01_b);
    FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, a01_b, delta1 );
    FLA_Copyt( FLA_NO_TRANSPOSE, a01_b, absv );
    FLA_Absolute_value( absv );
    FLA_Inv_scal( absv, delta1 );

    // a01_b   = delta1 * a01_b;
    //         = abs(a01_b);
    // alpha11 = delta1 * alpha11 * conj(delta1);
    //         = alpha11;
    // a12t_l  = a12t_l * conj(delta1);
    FLA_Copyt( FLA_NO_TRANSPOSE, absv, a01_b );
    FLA_Scalc( FLA_CONJUGATE, delta1, a12t_l );

    /*------------------------------------------------------------*/

    FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR,       A00,  a01,     /**/ A02,
                                                     a10t, alpha11, /**/ a12t,
                            /* ************** */  /* ************************ */
                              &ABL, /**/ &ABR,       A20,  a21,     /**/ A22,
                              FLA_TL );

    FLA_Cont_with_3x1_to_2x1( &dT,                d0, 
                                                  delta1, 
                            /* ** */           /* ****** */
                              &dB,                d2,     FLA_TOP );
  }

  FLA_Obj_free( &absv );

  return FLA_SUCCESS;
}