libflame  revision_anchor
Functions
FLA_Bsvd_v.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLA_Bsvd_compute_shift (FLA_Obj tol, FLA_Obj sminl, FLA_Obj smax, FLA_Obj d, FLA_Obj e, FLA_Obj shift)
FLA_Error FLA_Bsvd_compute_shift_ops (int m_A, float tol, float sminl, float smax, float *buff_d, int inc_d, float *buff_e, int inc_e, float *shift)
FLA_Error FLA_Bsvd_compute_shift_opd (int m_A, double tol, double sminl, double smax, double *buff_d, int inc_d, double *buff_e, int inc_e, double *shift)
FLA_Error FLA_Bsvd_compute_tol_thresh (FLA_Obj tolmul, FLA_Obj maxit, FLA_Obj d, FLA_Obj e, FLA_Obj tol, FLA_Obj thresh)
FLA_Error FLA_Bsvd_compute_tol_thresh_ops (int m_A, float tolmul, float maxit, float *buff_d, int inc_d, float *buff_e, int inc_e, float *tol, float *thresh)
FLA_Error FLA_Bsvd_compute_tol_thresh_opd (int m_A, double tolmul, double maxit, double *buff_d, int inc_d, double *buff_e, int inc_e, double *tol, double *thresh)
FLA_Error FLA_Bsvd_find_converged (FLA_Obj tol, FLA_Obj d, FLA_Obj e, FLA_Obj sminl)
FLA_Error FLA_Bsvd_find_converged_ops (int m_A, float tol, float *buff_d, int inc_d, float *buff_e, int inc_e, float *sminl)
FLA_Error FLA_Bsvd_find_converged_opd (int m_A, double tol, double *buff_d, int inc_d, double *buff_e, int inc_e, double *sminl)
FLA_Error FLA_Bsvd_find_max_min (FLA_Obj d, FLA_Obj e, FLA_Obj smax, FLA_Obj smin)
FLA_Error FLA_Bsvd_find_max_min_ops (int m_A, float *buff_d, int inc_d, float *buff_e, int inc_e, float *smax, float *smin)
FLA_Error FLA_Bsvd_find_max_min_opd (int m_A, double *buff_d, int inc_d, double *buff_e, int inc_e, double *smax, double *smin)
FLA_Error FLA_Bsvd_find_submatrix_ops (int mn_A, int ij_begin, float *buff_d, int inc_d, float *buff_e, int inc_e, int *ijTL, int *ijBR)
FLA_Error FLA_Bsvd_find_submatrix_opd (int mn_A, int ij_begin, double *buff_d, int inc_d, double *buff_e, int inc_e, int *ijTL, int *ijBR)
FLA_Error FLA_Bsvd_v_opt_var1 (dim_t n_iter_max, FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj H, FLA_Obj U, FLA_Obj V, dim_t b_alg)
FLA_Error FLA_Bsvd_v_ops_var1 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, float *buff_U, int rs_U, int cs_U, float *buff_V, int rs_V, int cs_V, int b_alg)
FLA_Error FLA_Bsvd_v_opd_var1 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, double *buff_U, int rs_U, int cs_U, double *buff_V, int rs_V, int cs_V, int b_alg)
FLA_Error FLA_Bsvd_v_opc_var1 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_V, int rs_V, int cs_V, int b_alg)
FLA_Error FLA_Bsvd_v_opz_var1 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_V, int rs_V, int cs_V, int b_alg)
FLA_Error FLA_Bsvd_v_opt_var2 (dim_t n_iter_max, FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj H, FLA_Obj RG, FLA_Obj RH, FLA_Obj W, FLA_Obj U, FLA_Obj V, dim_t b_alg)
FLA_Error FLA_Bsvd_v_ops_var2 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, float *buff_RG, int rs_RG, int cs_RG, float *buff_RH, int rs_RH, int cs_RH, float *buff_W, int rs_W, int cs_W, float *buff_U, int rs_U, int cs_U, float *buff_V, int rs_V, int cs_V, int b_alg)
FLA_Error FLA_Bsvd_v_opd_var2 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, double *buff_RG, int rs_RG, int cs_RG, double *buff_RH, int rs_RH, int cs_RH, double *buff_W, int rs_W, int cs_W, double *buff_U, int rs_U, int cs_U, double *buff_V, int rs_V, int cs_V, int b_alg)
FLA_Error FLA_Bsvd_v_opc_var2 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, float *buff_RG, int rs_RG, int cs_RG, float *buff_RH, int rs_RH, int cs_RH, scomplex *buff_W, int rs_W, int cs_W, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_V, int rs_V, int cs_V, int b_alg)
FLA_Error FLA_Bsvd_v_opz_var2 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, double *buff_RG, int rs_RG, int cs_RG, double *buff_RH, int rs_RH, int cs_RH, dcomplex *buff_W, int rs_W, int cs_W, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_V, int rs_V, int cs_V, int b_alg)

Function Documentation

FLA_Error FLA_Bsvd_compute_shift ( FLA_Obj  tol,
FLA_Obj  sminl,
FLA_Obj  smax,
FLA_Obj  d,
FLA_Obj  e,
FLA_Obj  shift 
)

References FLA_Bsvd_compute_shift_opd(), FLA_Bsvd_compute_shift_ops(), FLA_Obj_datatype(), FLA_Obj_vector_dim(), and FLA_Obj_vector_inc().

{
    FLA_Datatype datatype;
    int          m_A;
    int          inc_d;
    int          inc_e;

    datatype = FLA_Obj_datatype( d );

    m_A      = FLA_Obj_vector_dim( d );

    inc_d    = FLA_Obj_vector_inc( d );
    inc_e    = FLA_Obj_vector_inc( e );

    switch ( datatype )
    {
        case FLA_FLOAT:
        {
            float*    buff_tol       = FLA_FLOAT_PTR( tol );
            float*    buff_sminl     = FLA_FLOAT_PTR( sminl );
            float*    buff_smax      = FLA_FLOAT_PTR( smax );
            float*    buff_d         = FLA_FLOAT_PTR( d );
            float*    buff_e         = FLA_FLOAT_PTR( e );
            float*    buff_shift     = FLA_FLOAT_PTR( shift );

            FLA_Bsvd_compute_shift_ops( m_A,
                                        *buff_tol,
                                        *buff_sminl,
                                        *buff_smax,
                                        buff_d, inc_d,
                                        buff_e, inc_e,
                                        buff_shift );

            break;
        }

        case FLA_DOUBLE:
        {
            double*   buff_tol       = FLA_DOUBLE_PTR( tol );
            double*   buff_sminl     = FLA_DOUBLE_PTR( sminl );
            double*   buff_smax      = FLA_DOUBLE_PTR( smax );
            double*   buff_d         = FLA_DOUBLE_PTR( d );
            double*   buff_e         = FLA_DOUBLE_PTR( e );
            double*   buff_shift     = FLA_DOUBLE_PTR( shift );

            FLA_Bsvd_compute_shift_opd( m_A,
                                        *buff_tol,
                                        *buff_sminl,
                                        *buff_smax,
                                        buff_d, inc_d,
                                        buff_e, inc_e,
                                        buff_shift );

            break;
        }
    }

    return FLA_SUCCESS;
}
FLA_Error FLA_Bsvd_compute_shift_opd ( int  m_A,
double  tol,
double  sminl,
double  smax,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
double *  shift 
)

References FLA_Mach_params_opd(), and FLA_Sv_2x2_opd().

Referenced by FLA_Bsvd_compute_shift(), and FLA_Bsvd_sinval_v_opd_var1().

{
    double  hndrth = 0.01;
    double  eps;
    double* d_first;
    double* e_last;
    double* d_last_m1;
    double* d_last;
    double  sll, temp;

    eps = FLA_Mach_params_opd( FLA_MACH_EPS );

    d_first   = buff_d + (0    )*inc_d;
    e_last    = buff_e + (m_A-2)*inc_e;
    d_last_m1 = buff_d + (m_A-2)*inc_d;
    d_last    = buff_d + (m_A-1)*inc_d;

    // If the shift would ruin relative accuracy, set it to zero.
    if ( m_A * tol * ( sminl / smax ) <= max( eps, hndrth * tol ) )
    {
#ifdef PRINTF
printf( "FLA_Bsvd_compute_shift_opd: shift would ruin accuracy; setting shift to 0.\n" );
printf( "                   m_A = %d     \n", m_A );
printf( "                   tol = %20.15e\n", tol );
printf( "                 sminl = %20.15e\n", sminl );
printf( "                  smax = %20.15e\n", smax );
printf( "                   LHS = %20.15e\n", m_A * tol * ( sminl / smax ) );
printf( "      max(eps,0.01*tol)= %20.15e\n", max( eps, hndrth * tol ) );
#endif
        *shift = 0.0;
    }
    else
    {
        // Compute the shift from the last 2x2 matrix.
        FLA_Sv_2x2_opd( d_last_m1,
                        e_last,
                        d_last,
                        shift,
                        &temp );

        sll = fabs( *d_first );

        // Check if the shift is negligible; if so, set it to zero.
        if ( sll > 0.0 )
        {
            temp = ( *shift / sll );
            if ( temp * temp < eps )
            {
#ifdef PRINTF
printf( "FLA_Bsvd_compute_shift_opd: shift is negligible; setting shift to 0.\n" );
#endif
                *shift = 0.0;
            }
        }
    }

    return FLA_SUCCESS;
}
FLA_Error FLA_Bsvd_compute_shift_ops ( int  m_A,
float  tol,
float  sminl,
float  smax,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
float *  shift 
)

Referenced by FLA_Bsvd_compute_shift().

{
    FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );

    return FLA_SUCCESS;
}
FLA_Error FLA_Bsvd_compute_tol_thresh ( FLA_Obj  tolmul,
FLA_Obj  maxit,
FLA_Obj  d,
FLA_Obj  e,
FLA_Obj  tol,
FLA_Obj  thresh 
)

References FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_compute_tol_thresh_ops(), FLA_Obj_datatype(), FLA_Obj_vector_dim(), and FLA_Obj_vector_inc().

{
    FLA_Datatype datatype;
    int          n_A;
    int          inc_d;
    int          inc_e;

    datatype = FLA_Obj_datatype( d );

    n_A      = FLA_Obj_vector_dim( d );

    inc_d    = FLA_Obj_vector_inc( d );
    inc_e    = FLA_Obj_vector_inc( e );
    

    switch ( datatype )
    {
        case FLA_FLOAT:
        {
            float*    buff_tolmul = FLA_FLOAT_PTR( tolmul );
            float*    buff_maxitr = FLA_FLOAT_PTR( maxitr );
            float*    buff_d      = FLA_FLOAT_PTR( d );
            float*    buff_e      = FLA_FLOAT_PTR( e );
            float*    buff_tol    = FLA_FLOAT_PTR( tol );
            float*    buff_thresh = FLA_FLOAT_PTR( thresh );

            FLA_Bsvd_compute_tol_thresh_ops( n_A,
                                             *buff_tolmul,
                                             *buff_maxitr,
                                             buff_d, inc_d,
                                             buff_e, inc_e,
                                             buff_tol,
                                             buff_thresh );

            break;
        }

        case FLA_DOUBLE:
        {
            double*   buff_tolmul = FLA_DOUBLE_PTR( tolmul );
            double*   buff_maxitr = FLA_DOUBLE_PTR( maxitr );
            double*   buff_d      = FLA_DOUBLE_PTR( d );
            double*   buff_e      = FLA_DOUBLE_PTR( e );
            double*   buff_tol    = FLA_DOUBLE_PTR( tol );
            double*   buff_thresh = FLA_DOUBLE_PTR( thresh );

            FLA_Bsvd_compute_tol_thresh_opd( n_A,
                                             *buff_tolmul,
                                             *buff_maxitr,
                                             buff_d, inc_d,
                                             buff_e, inc_e,
                                             buff_tol,
                                             buff_thresh );

            break;
        }
    }

    return FLA_SUCCESS;
}
FLA_Error FLA_Bsvd_compute_tol_thresh_opd ( int  m_A,
double  tolmul,
double  maxit,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
double *  tol,
double *  thresh 
)

References bli_d0(), and FLA_Mach_params_opd().

Referenced by FLA_Bsvd_compute_tol_thresh(), FLA_Bsvd_v_opd_var1(), FLA_Bsvd_v_opd_var2(), FLA_Bsvd_v_opz_var1(), and FLA_Bsvd_v_opz_var2().

{
    double zero = bli_d0();
    double smin;
    double eps, unfl;
    double mu;
    int    i;

    // Query machine epsilon and the safe minimum.
    eps  = FLA_Mach_params_opd( FLA_MACH_EPS );
    unfl = FLA_Mach_params_opd( FLA_MACH_SFMIN );

    // Compute tol as the product of machine epsilon and tolmul.
    *tol = tolmul * eps;

    // Compute the approximate maximum singular value.
    // NOT NEEDED unless we're supporting absolute accuracy.
    //FLA_Bsvd_sinval_find_max( n_A,
    //                          buff_d, inc_d,
    //                          buff_e, inc_e,
    //                          &smax );

    // Compute the approximate minimum singular value.
    smin = fabs( *buff_d );

    // Skip the accumulation of smin if the first element is zero.
    if ( smin != zero )
    {
        mu = smin;
        for ( i = 1; i < n_A; ++i )
        {
            double* epsilon1 = buff_e + (i-1)*inc_e;
            double* delta2   = buff_d + (i  )*inc_d;

            mu   = fabs( *delta2 ) * ( mu / ( mu + fabs( *epsilon1 ) ) );
            smin = min( smin, mu );

            // Stop early if we encountered a zero.
            if ( smin == zero ) break;
        }
    }

    // Compute thresh either in terms of tol or as a function of the
    // maximum total number of iterations, the problem size, and the
    // safe minimum.
    smin = smin / sqrt( ( double ) n_A );
    *thresh = max( *tol * smin, maxitr * n_A * n_A * unfl );

    return FLA_SUCCESS;
}
FLA_Error FLA_Bsvd_compute_tol_thresh_ops ( int  m_A,
float  tolmul,
float  maxit,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
float *  tol,
float *  thresh 
)

Referenced by FLA_Bsvd_compute_tol_thresh().

{
    FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );

    return FLA_SUCCESS;
}

References FLA_Bsvd_find_converged_opd(), FLA_Bsvd_find_converged_ops(), FLA_Obj_datatype(), FLA_Obj_vector_dim(), and FLA_Obj_vector_inc().

{
    FLA_Datatype datatype;
    int          m_A;
    int          inc_d;
    int          inc_e;

    datatype = FLA_Obj_datatype( d );

    m_A      = FLA_Obj_vector_dim( d );

    inc_d    = FLA_Obj_vector_inc( d );
    inc_e    = FLA_Obj_vector_inc( e );
    

    switch ( datatype )
    {
        case FLA_FLOAT:
        {
            float*    buff_tol    = FLA_FLOAT_PTR( tol );
            float*    buff_d      = FLA_FLOAT_PTR( d );
            float*    buff_e      = FLA_FLOAT_PTR( e );
            float*    buff_sminl  = FLA_FLOAT_PTR( sminl );

            FLA_Bsvd_find_converged_ops( m_A,
                                         *buff_tol,
                                         buff_d, inc_d,
                                         buff_e, inc_e,
                                         buff_sminl );

            break;
        }

        case FLA_DOUBLE:
        {
            double*   buff_tol    = FLA_DOUBLE_PTR( tol );
            double*   buff_d      = FLA_DOUBLE_PTR( d );
            double*   buff_e      = FLA_DOUBLE_PTR( e );
            double*   buff_sminl  = FLA_DOUBLE_PTR( sminl );

            FLA_Bsvd_find_converged_opd( m_A,
                                         *buff_tol,
                                         buff_d, inc_d,
                                         buff_e, inc_e,
                                         buff_sminl );

            break;
        }
    }

    return FLA_SUCCESS;
}
FLA_Error FLA_Bsvd_find_converged_opd ( int  m_A,
double  tol,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
double *  sminl 
)

Referenced by FLA_Bsvd_find_converged(), and FLA_Bsvd_sinval_v_opd_var1().

{
    double* epsilon_last;
    double* delta_last;
    double  mu;
    int     i;

    epsilon_last = buff_e + (m_A-2)*inc_e;
    delta_last   = buff_d + (m_A-1)*inc_d;

    // Check convergence at the bottom of the matrix first.
    if ( MAC_Bsvd_sinval_is_converged_opd( tol, *delta_last, *epsilon_last ) )
    {
        //*epsilon_last = 0.0;
        *sminl = 0.0;
        return m_A - 2;
    }

    // If the last element is not yet converged, check interior elements.
    // Also, accumulate sminl for later use when it comes time to check
    // the shift.

    mu     = fabs( *buff_d );
    *sminl = mu;

    for ( i = 0; i < m_A - 1; ++i )
    {
        double* epsilon1 = buff_e + (i  )*inc_e;
        double* delta2   = buff_d + (i+1)*inc_d;

        // Check convergence of epsilon1 against the value of mu accumulated
        // so far.
        if ( MAC_Bsvd_sinval_is_converged_opd( tol, mu, *epsilon1 ) )
        {
//printf( "FLA_Bsvd_sinval_find_converged: Split occurred in col %d\n", i );
//printf( "FLA_Bsvd_sinval_find_converged: mu      alpha12 = %23.19e %23.19e\n", mu, *epsilon1 );
//printf( "FLA_Bsvd_sinval_find_converged:         alpha22 =         %43.19e\n", *delta2 );
            //*epsilon1 = 0.0;
            //return FLA_FAILURE;
            return i;
        }

        // Update mu and sminl.
        mu     = fabs( *delta2 ) * ( mu / ( mu + fabs( *epsilon1 ) ) );
        *sminl = min( *sminl, mu );
    }

    // Return with no convergence found.
    return FLA_SUCCESS;
}
FLA_Error FLA_Bsvd_find_converged_ops ( int  m_A,
float  tol,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
float *  sminl 
)

Referenced by FLA_Bsvd_find_converged().

{
    FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );

    return FLA_SUCCESS;
}
FLA_Error FLA_Bsvd_find_max_min_opd ( int  m_A,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
double *  smax,
double *  smin 
)

Referenced by FLA_Bsvd_find_max(), and FLA_Bsvd_sinval_v_opd_var1().

{
    double smax_cand;
    double smin_cand;
    int    i;

    smax_cand = fabs( buff_d[ (m_A-1)*inc_d ] );
    smin_cand = smax_cand;

    for ( i = 0; i < m_A - 1; ++i )
    {
        double abs_di = fabs( buff_d[ i*inc_d ] );
        double abs_ei = fabs( buff_e[ i*inc_e ] );

        // Track the minimum element.
        smin_cand = min( smin_cand, abs_di );

        // Track the maximum element.
        smax_cand = max( smax_cand, abs_di );
        smax_cand = max( smax_cand, abs_ei );
    }

    // Save the results of the search.
    *smax = smax_cand;
    *smin = smin_cand;

    return FLA_SUCCESS;
}
FLA_Error FLA_Bsvd_find_max_min_ops ( int  m_A,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
float *  smax,
float *  smin 
)

Referenced by FLA_Bsvd_find_max().

{
    FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );

    return FLA_SUCCESS;
}
FLA_Error FLA_Bsvd_find_submatrix_opd ( int  mn_A,
int  ij_begin,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
int *  ijTL,
int *  ijBR 
)

References bli_d0().

Referenced by FLA_Bsvd_v_opd_var1(), FLA_Bsvd_v_opd_var2(), FLA_Bsvd_v_opz_var1(), and FLA_Bsvd_v_opz_var2().

{
    double rzero = bli_d0();
    int    ij_tl;
    int    ij_br;

    // Search for the first non-zero superdiagonal element starting at
    // the index specified by ij_begin.
    for ( ij_tl = ij_begin; ij_tl < mn_A - 1; ++ij_tl )
    {
        double* e1 = buff_e + (ij_tl  )*inc_e;

        // If we find a non-zero element, record it and break out of this
        // loop.
        if ( *e1 != rzero )
        {
#ifdef PRINTF
printf( "FLA_Bsvd_find_submatrix_opd: found non-zero superdiagonal element\n" );
printf( "                             e[%3d] = %22.19e\n", ij_tl, *e1 );
#endif
            *ijTL = ij_tl;
            break;
        }
    }

    // If ij_tl was incremented all the way up to mn_A - 1, then we didn't
    // find any non-zeros.
    if ( ij_tl == mn_A - 1 )
    {
#ifdef PRINTF
printf( "FLA_Bsvd_find_submatrix_opd: no submatrices found.\n" );
#endif
        return FLA_FAILURE;
    }

    // If we've gotten this far, then a non-zero superdiagonal element was
    // found. Now we must walk the remaining portion of the superdiagonal
    // to find the first zero element, or if one is not found, we simply
    // use the last element of the superdiagonal.
    for ( ij_br = ij_tl; ij_br < mn_A - 1; ++ij_br )
    {
        double* e1 = buff_e + (ij_br  )*inc_e;

        // If we find a zero element, record it and break out of this
        // loop.
        if ( *e1 == rzero )
        {
#ifdef PRINTF
printf( "FLA_Bsvd_find_submatrix_opd: found zero superdiagonal element\n" );
printf( "                             e[%3d] = %22.19e\n", ij_br, *e1 );
#endif
            break;
        }
    }

    // If a zero element was found, then ij_br should hold the index of
    // that element. If a zero element was not found, then ij_br should
    // hold mn_A - 1. Either way, we save the value and return success.
    *ijBR = ij_br;

    return FLA_SUCCESS;
}
FLA_Error FLA_Bsvd_find_submatrix_ops ( int  mn_A,
int  ij_begin,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
int *  ijTL,
int *  ijBR 
)
{
    FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );

    return FLA_SUCCESS;
}
FLA_Error FLA_Bsvd_v_opc_var1 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_H,
int  rs_H,
int  cs_H,
scomplex buff_U,
int  rs_U,
int  cs_U,
scomplex buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)

Referenced by FLA_Bsvd_v_opt_var1().

{
    FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );

    return FLA_SUCCESS;
}
FLA_Error FLA_Bsvd_v_opc_var2 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_H,
int  rs_H,
int  cs_H,
float *  buff_RG,
int  rs_RG,
int  cs_RG,
float *  buff_RH,
int  rs_RH,
int  cs_RH,
scomplex buff_W,
int  rs_W,
int  cs_W,
scomplex buff_U,
int  rs_U,
int  cs_U,
scomplex buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)

Referenced by FLA_Bsvd_v_opt_var2().

{
    FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );

    return FLA_SUCCESS;
}
FLA_Error FLA_Bsvd_v_opd_var1 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_H,
int  rs_H,
int  cs_H,
double *  buff_U,
int  rs_U,
int  cs_U,
double *  buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)

References bli_d0(), bli_dm1(), bli_dscalv(), bli_z1(), bli_zsetm(), BLIS_NO_CONJUGATE, FLA_Abort(), FLA_Apply_G_rf_bld_var3(), FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_find_submatrix_opd(), FLA_Bsvd_iteracc_v_opd_var1(), and FLA_Mach_params_opd().

Referenced by FLA_Bsvd_v_opt_var1().

{
    dcomplex  one        = bli_z1();
    double    rzero      = bli_d0();

    int       maxitr     = 6;

    double    eps;
    double    tolmul;
    double    tol;
    double    thresh;

    dcomplex* G;
    dcomplex* H;
    double*   d1;
    double*   e1;
    int       r_val;
    int       done;
    int       m_GH_sweep_max;
    int       ij_begin;
    int       ijTL, ijBR;
    int       m_A11;
    int       n_iter_perf;
    int       n_UV_apply;
    int       total_deflations;
    int       n_deflations;
    int       n_iter_prev;
    int       n_iter_perf_sweep_max;

    // Compute some convergence constants.
    eps    = FLA_Mach_params_opd( FLA_MACH_EPS );
    tolmul = max( 10.0, min( 100.0, pow( eps, -0.125 ) ) );
    FLA_Bsvd_compute_tol_thresh_opd( min_m_n,
                                     tolmul,
                                     maxitr,
                                     buff_d, inc_d,
                                     buff_e, inc_e,
                                     &tol,
                                     &thresh );
#ifdef PRINTF
printf( "FLA_Bsvd_v_opd_var1: tolmul = %12.6e\n", tolmul );
printf( "FLA_Bsvd_v_opd_var1: tol    = %12.6e\n", tol );
printf( "FLA_Bsvd_v_opd_var1: thresh = %12.6e\n", thresh );
#endif

    // Initialize our completion flag.
    done = FALSE;

    // Initialize a counter that holds the maximum number of rows of G
    // and H that we would need to initialize for the next sweep.
    m_GH_sweep_max = min_m_n - 1;

    // Initialize a counter for the total number of iterations performed.
    n_iter_prev = 0;

    // Iterate until the matrix has completely deflated.
    for ( total_deflations = 0; done != TRUE; )
    {

        // Initialize G and H to contain only identity rotations.
        bli_zsetm( m_GH_sweep_max,
                   n_GH,
                   &one,
                   buff_G, rs_G, cs_G );
        bli_zsetm( m_GH_sweep_max,
                   n_GH,
                   &one,
                   buff_H, rs_H, cs_H );

        // Keep track of the maximum number of iterations performed in the
        // current sweep. This is used when applying the sweep's Givens
        // rotations.
        n_iter_perf_sweep_max = 0;

        // Perform a sweep: Move through the matrix and perform a bidiagonal
        // SVD on each non-zero submatrix that is encountered. During the
        // first time through, ijTL will be 0 and ijBR will be min_m_n - 1.
        for ( ij_begin = 0; ij_begin < min_m_n;  )
        {

#ifdef PRINTF
if ( ij_begin == 0 )
printf( "FLA_Bsvd_v_opd_var1: beginning new sweep (ij_begin = %d)\n", ij_begin );
#endif

            // Search for the first submatrix along the diagonal that is
            // bounded by zeroes (or endpoints of the matrix). If no
            // submatrix is found (ie: if the entire superdiagonal is zero
            // then FLA_FAILURE is returned. This function also inspects
            // superdiagonal elements for proximity to zero. If a given
            // element is close enough to zero, then it is deemed
            // converged and manually set to zero.
            r_val = FLA_Bsvd_find_submatrix_opd( min_m_n,
                                                 ij_begin,
                                                 buff_d, inc_d,
                                                 buff_e, inc_e,
                                                 &ijTL,
                                                 &ijBR );

            // Verify that a submatrix was found. If one was not found,
            // then we are done with the current sweep. Furthermore, if
            // a submatrix was not found AND we began our search at the
            // beginning of the matrix (ie: ij_begin == 0), then the
            // matrix has completely deflated and so we are done with
            // Francis step iteration.
            if ( r_val == FLA_FAILURE )
            {
                if ( ij_begin == 0 )
                {
#ifdef PRINTF
printf( "FLA_Bsvd_v_opd_var1: superdiagonal is completely zero.\n" );
printf( "FLA_Bsvd_v_opd_var1: Francis iteration is done!\n" );
#endif
                    done = TRUE;
                }

                // Break out of the current sweep so we can apply the last
                // remaining Givens rotations.
                break;
            }

            // If we got this far, then:
            //   (a) ijTL refers to the index of the first non-zero
            //       superdiagonal along the diagonal, and
            //   (b) ijBR refers to either:
            //       - the first zero element that occurs after ijTL, or
            //       - the the last diagonal element.
            // Note that ijTL and ijBR also correspond to the first and
            // last diagonal elements of the submatrix of interest. Thus,
            // we may compute the dimension of this submatrix as:
            m_A11 = ijBR - ijTL + 1;

#ifdef PRINTF
printf( "FLA_Bsvd_v_opd_var1: ij_begin = %d\n", ij_begin );
printf( "FLA_Bsvd_v_opd_var1: ijTL     = %d\n", ijTL );
printf( "FLA_Bsvd_v_opd_var1: ijBR     = %d\n", ijBR );
printf( "FLA_Bsvd_v_opd_var1: m_A11    = %d\n", m_A11 );
#endif

            // Adjust ij_begin, which gets us ready for the next submatrix
            // search in the current sweep.
            ij_begin = ijBR + 1;

            // Index to the submatrices upon which we will operate.
            d1 = buff_d + ijTL * inc_d;
            e1 = buff_e + ijTL * inc_e;
            G  = buff_G + ijTL * rs_G;
            H  = buff_H + ijTL * rs_H;

            // Search for a batch of singular values, recursing on deflated
            // subproblems whenever a split occurs. Iteration continues as
            // long as
            //   (a) there is still matrix left to operate on, and
            //   (b) the number of iterations performed in this batch is
            //       less than n_G.
            // If/when either of the two above conditions fails to hold,
            // the function returns.
            n_deflations = FLA_Bsvd_iteracc_v_opd_var1( m_A11,
                                                        n_GH,
                                                        ijTL,
                                                        tol,
                                                        thresh,
                                                        d1, inc_d,
                                                        e1, inc_e,
                                                        G,  rs_G, cs_G,
                                                        H,  rs_H, cs_H,
                                                        &n_iter_perf );

            // Record the number of deflations that were observed.
            total_deflations += n_deflations;

            // Update the maximum number of iterations performed in the
            // current sweep.
            n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );

#ifdef PRINTF
printf( "FLA_Bsvd_v_opd_var1: deflations observed       = %d\n", n_deflations );
printf( "FLA_Bsvd_v_opd_var1: total deflations observed = %d\n", total_deflations );
printf( "FLA_Bsvd_v_opd_var1: num iterations performed  = %d\n", n_iter_perf );
#endif

            // Store the most recent value of ijBR in m_G_sweep_max.
            // When the sweep is done, this value will contain the minimum
            // number of rows of G and H we can apply and safely include all
            // non-identity rotations that were computed during the
            // singular value searches.
            m_GH_sweep_max = ijBR;

            // Make sure we haven't exceeded our maximum iteration count.
            if ( n_iter_prev >= n_iter_max * min_m_n )
            {
#ifdef PRINTF
printf( "FLA_Bsvd_v_opd_var1: reached maximum total number of iterations: %d\n", n_iter_prev );
#endif
                FLA_Abort();
                //return FLA_FAILURE;
            }
        }

        // The sweep is complete. Now we must apply the Givens rotations
        // that were accumulated during the sweep.

        // Recall that the number of columns of U and V to which we apply
        // rotations is one more than the number of rotations.
        n_UV_apply = m_GH_sweep_max + 1;
        
#ifdef PRINTF
printf( "FLA_Bsvd_v_opd_var1: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
printf( "FLA_Bsvd_v_opd_var1: m_U = %d\n", m_U );
printf( "FLA_Bsvd_v_opd_var1: m_V = %d\n", m_V );
printf( "FLA_Bsvd_v_opd_var1: napp= %d\n", n_UV_apply );
#endif
        // Apply the Givens rotations. Note that we only apply k sets of
        // rotations, where k = n_iter_perf_sweep_max. Also note that we only
        // apply to n_UV_apply columns of U and V since this is the most we
        // need to touch given the most recent value stored to m_GH_sweep_max.
        //FLA_Apply_G_rf_bld_var5( n_iter_perf_sweep_max,
        FLA_Apply_G_rf_bld_var3( n_iter_perf_sweep_max,
        //FLA_Apply_G_rf_bld_var9( n_iter_perf_sweep_max,
        //FLA_Apply_G_rf_bld_var6( n_iter_perf_sweep_max,
                                 m_U,
                                 n_UV_apply,
                                 buff_G, rs_G, cs_G,
                                 buff_U, rs_U, cs_U,
                                 b_alg );
        //FLA_Apply_G_rf_bld_var5( n_iter_perf_sweep_max,
        FLA_Apply_G_rf_bld_var3( n_iter_perf_sweep_max,
        //FLA_Apply_G_rf_bld_var9( n_iter_perf_sweep_max,
        //FLA_Apply_G_rf_bld_var6( n_iter_perf_sweep_max,
                                 m_V,
                                 n_UV_apply,
                                 buff_H, rs_H, cs_H,
                                 buff_V, rs_V, cs_V,
                                 b_alg );

        // Increment the total number of iterations previously performed.
        n_iter_prev += n_iter_perf_sweep_max;

#ifdef PRINTF
printf( "FLA_Bsvd_v_opd_var1: total number of iterations performed: %d\n", n_iter_prev );
#endif
    }

    // Make all the singular values positive.
    {
        int    i;
        double minus_one = bli_dm1();

        for ( i = 0; i < min_m_n; ++i )
        {
            if ( buff_d[ (i  )*inc_d ] < rzero )
            {
                buff_d[ (i  )*inc_d ] = -buff_d[ (i  )*inc_d ];
                
                // Scale the right singular vectors.
                bli_dscalv( BLIS_NO_CONJUGATE,
                            m_V,
                            &minus_one,
                            buff_V + (i  )*cs_V, rs_V );
            }
        }
    }

    return n_iter_prev;
}
FLA_Error FLA_Bsvd_v_opd_var2 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_H,
int  rs_H,
int  cs_H,
double *  buff_RG,
int  rs_RG,
int  cs_RG,
double *  buff_RH,
int  rs_RH,
int  cs_RH,
double *  buff_W,
int  rs_W,
int  cs_W,
double *  buff_U,
int  rs_U,
int  cs_U,
double *  buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)

References bli_d0(), bli_d1(), bli_dcopymt(), bli_dgemm(), bli_dident(), bli_dm1(), bli_dscalv(), bli_z1(), bli_zsetm(), BLIS_NO_CONJUGATE, BLIS_NO_TRANSPOSE, FLA_Apply_G_rf_bld_var3b(), FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_find_submatrix_opd(), FLA_Bsvd_iteracc_v_opd_var1(), and FLA_Mach_params_opd().

Referenced by FLA_Bsvd_v_opt_var2().

{
    dcomplex  one        = bli_z1();
    double    rone       = bli_d1();
    double    rzero      = bli_d0();

    int       maxitr     = 6;

    double    eps;
    double    tolmul;
    double    tol;
    double    thresh;

    dcomplex* G;
    dcomplex* H;
    double*   d1;
    double*   e1;
    int       r_val;
    int       done;
    int       m_GH_sweep_max;
    int       ij_begin;
    int       ijTL, ijBR;
    int       m_A11;
    int       n_iter_perf;
    int       n_UV_apply;
    int       total_deflations;
    int       n_deflations;
    int       n_iter_prev;
    int       n_iter_perf_sweep_max;

    // Compute some convergence constants.
    eps    = FLA_Mach_params_opd( FLA_MACH_EPS );
    tolmul = max( 10.0, min( 100.0, pow( eps, -0.125 ) ) );
    FLA_Bsvd_compute_tol_thresh_opd( min_m_n,
                                     tolmul,
                                     maxitr,
                                     buff_d, inc_d,
                                     buff_e, inc_e,
                                     &tol,
                                     &thresh );

    // Initialize our completion flag.
    done = FALSE;

    // Initialize a counter that holds the maximum number of rows of G
    // and H that we would need to initialize for the next sweep.
    m_GH_sweep_max = min_m_n - 1;

    // Initialize a counter for the total number of iterations performed.
    n_iter_prev = 0;

    // Initialize RG and RH to identity.
    bli_dident( min_m_n,
                buff_RG, rs_RG, cs_RG );
    bli_dident( min_m_n,
                buff_RH, rs_RH, cs_RH );

    // Iterate until the matrix has completely deflated.
    for ( total_deflations = 0; done != TRUE; )
    {

        // Initialize G and H to contain only identity rotations.
        bli_zsetm( m_GH_sweep_max,
                   n_GH,
                   &one,
                   buff_G, rs_G, cs_G );
        bli_zsetm( m_GH_sweep_max,
                   n_GH,
                   &one,
                   buff_H, rs_H, cs_H );

        // Keep track of the maximum number of iterations performed in the
        // current sweep. This is used when applying the sweep's Givens
        // rotations.
        n_iter_perf_sweep_max = 0;

        // Perform a sweep: Move through the matrix and perform a bidiagonal
        // SVD on each non-zero submatrix that is encountered. During the
        // first time through, ijTL will be 0 and ijBR will be min_m_n - 1.
        for ( ij_begin = 0; ij_begin < min_m_n;  )
        {

#ifdef PRINTF
if ( ij_begin == 0 )
printf( "FLA_Bsvd_v_opd_var2: beginning new sweep (ij_begin = %d)\n", ij_begin );
#endif

            // Search for the first submatrix along the diagonal that is
            // bounded by zeroes (or endpoints of the matrix). If no
            // submatrix is found (ie: if the entire superdiagonal is zero
            // then FLA_FAILURE is returned. This function also inspects
            // superdiagonal elements for proximity to zero. If a given
            // element is close enough to zero, then it is deemed
            // converged and manually set to zero.
            r_val = FLA_Bsvd_find_submatrix_opd( min_m_n,
                                                 ij_begin,
                                                 buff_d, inc_d,
                                                 buff_e, inc_e,
                                                 &ijTL,
                                                 &ijBR );

            // Verify that a submatrix was found. If one was not found,
            // then we are done with the current sweep. Furthermore, if
            // a submatrix was not found AND we began our search at the
            // beginning of the matrix (ie: ij_begin == 0), then the
            // matrix has completely deflated and so we are done with
            // Francis step iteration.
            if ( r_val == FLA_FAILURE )
            {
                if ( ij_begin == 0 )
                {
#ifdef PRINTF
printf( "FLA_Bsvd_v_opd_var2: superdiagonal is completely zero.\n" );
printf( "FLA_Bsvd_v_opd_var2: Francis iteration is done!\n" );
#endif
                    done = TRUE;
                }

                // Break out of the current sweep so we can apply the last
                // remaining Givens rotations.
                break;
            }

            // If we got this far, then:
            //   (a) ijTL refers to the index of the first non-zero
            //       superdiagonal along the diagonal, and
            //   (b) ijBR refers to either:
            //       - the first zero element that occurs after ijTL, or
            //       - the the last diagonal element.
            // Note that ijTL and ijBR also correspond to the first and
            // last diagonal elements of the submatrix of interest. Thus,
            // we may compute the dimension of this submatrix as:
            m_A11 = ijBR - ijTL + 1;

#ifdef PRINTF
printf( "FLA_Bsvd_v_opd_var2: ij_begin = %d\n", ij_begin );
printf( "FLA_Bsvd_v_opd_var2: ijTL     = %d\n", ijTL );
printf( "FLA_Bsvd_v_opd_var2: ijBR     = %d\n", ijBR );
printf( "FLA_Bsvd_v_opd_var2: m_A11    = %d\n", m_A11 );
#endif

            // Adjust ij_begin, which gets us ready for the next submatrix
            // search in the current sweep.
            ij_begin = ijBR + 1;

            // Index to the submatrices upon which we will operate.
            d1 = buff_d + ijTL * inc_d;
            e1 = buff_e + ijTL * inc_e;
            G  = buff_G + ijTL * rs_G;
            H  = buff_H + ijTL * rs_H;

            // Search for a batch of singular values, recursing on deflated
            // subproblems whenever possible. A new singular value search is
            // performed as long as
            //   (a) there is still matrix left to operate on, and
            //   (b) the number of iterations performed in this batch is
            //       less than n_G.
            // If/when either of the two above conditions fails to hold,
            // the function returns.
            n_deflations = FLA_Bsvd_iteracc_v_opd_var1( m_A11,
                                                        n_GH,
                                                        ijTL,
                                                        tol,
                                                        thresh,
                                                        d1, inc_d,
                                                        e1, inc_e,
                                                        G,  rs_G, cs_G,
                                                        H,  rs_H, cs_H,
                                                        &n_iter_perf );

            // Record the number of deflations that occurred.
            total_deflations += n_deflations;

            // Update the maximum number of iterations performed in the
            // current sweep.
            n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );

#ifdef PRINTF
printf( "FLA_Bsvd_v_opd_var2: deflations observed       = %d\n", n_deflations );
printf( "FLA_Bsvd_v_opd_var2: total deflations observed = %d\n", total_deflations );
printf( "FLA_Bsvd_v_opd_var2: num iterations performed  = %d\n", n_iter_perf );
#endif

            // Store the most recent value of ijBR in m_G_sweep_max.
            // When the sweep is done, this value will contain the minimum
            // number of rows of G and H we can apply and safely include all
            // non-identity rotations that were computed during the
            // singular value searches.
            m_GH_sweep_max = ijBR;

        }

        // The sweep is complete. Now we must apply the Givens rotations
        // that were accumulated during the sweep.

        // Recall that the number of columns of U and V to which we apply
        // rotations is one more than the number of rotations.
        n_UV_apply = m_GH_sweep_max + 1;


#ifdef PRINTF
printf( "FLA_Bsvd_v_opd_var2: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
printf( "FLA_Bsvd_v_opd_var2: m_U = %d\n", m_U );
printf( "FLA_Bsvd_v_opd_var2: m_V = %d\n", m_V );
printf( "FLA_Bsvd_v_opd_var2: napp= %d\n", n_UV_apply );
#endif

        // Apply the Givens rotations. Note that we only apply k sets of
        // rotations, where k = n_iter_perf_sweep_max. Also note that we only
        // apply to n_UV_apply columns of U and V since this is the most we
        // need to touch given the most recent value stored to m_GH_sweep_max.
        //FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max,
        FLA_Apply_G_rf_bld_var3b( n_iter_perf_sweep_max,
        //FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max,
        //FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max,
                                  min_m_n,
                                  n_UV_apply,
                                  n_iter_prev,
                                  buff_G,  rs_G,  cs_G,
                                  buff_RG, rs_RG, cs_RG,
                                  b_alg );
        //FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max,
        FLA_Apply_G_rf_bld_var3b( n_iter_perf_sweep_max,
        //FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max,
        //FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max,
                                  min_m_n,
                                  n_UV_apply,
                                  n_iter_prev,
                                  buff_H,  rs_H,  cs_H,
                                  buff_RH, rs_RH, cs_RH,
                                  b_alg );

        // Increment the total number of iterations previously performed.
        n_iter_prev += n_iter_perf_sweep_max;

#ifdef PRINTF
printf( "FLA_Bsvd_v_opd_var2: total number of iterations performed: %d\n", n_iter_prev );
#endif
    }

    // Copy the contents of Q to temporary storage.
    bli_dcopymt( BLIS_NO_TRANSPOSE,
                 m_U,
                 m_V,
                 buff_U, rs_U, cs_U,
                 buff_W, rs_W, cs_W );
// W needs to be max_m_n-by-min_m_n!!!!!!!!!!!!!!!

    // Multiply U by R, overwriting U.
    bli_dgemm( BLIS_NO_TRANSPOSE,
               BLIS_NO_TRANSPOSE,
               m_U,
               m_V,
               m_V,
               &rone,
               ( double* )buff_W,  rs_W,  cs_W,
                          buff_RG, rs_RG, cs_RG,
               &rzero,
               ( double* )buff_U,  rs_U,  cs_U );

    bli_dcopymt( BLIS_NO_TRANSPOSE,
                 m_V,
                 m_V,
                 buff_V, rs_V, cs_V,
                 buff_W, rs_W, cs_W );

    // Multiply V by R, overwriting V.
    bli_dgemm( BLIS_NO_TRANSPOSE,
               BLIS_NO_TRANSPOSE,
               m_V,
               m_V,
               m_V,
               &rone,
               ( double* )buff_W,  rs_W,  cs_W,
                          buff_RH, rs_RH, cs_RH,
               &rzero,
               ( double* )buff_V,  rs_V,  cs_V );

    // Make all the singular values positive.
    {
        int    i;
        double minus_one = bli_dm1();

        for ( i = 0; i < min_m_n; ++i )
        {
            if ( buff_d[ (i  )*inc_d ] < rzero )
            {
                buff_d[ (i  )*inc_d ] = -buff_d[ (i  )*inc_d ];
                
                // Scale the right singular vectors.
                bli_dscalv( BLIS_NO_CONJUGATE,
                            m_V,
                            &minus_one,
                            buff_V + (i  )*cs_V, rs_V );
            }
        }
    }

    return n_iter_prev;
}
FLA_Error FLA_Bsvd_v_ops_var1 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_H,
int  rs_H,
int  cs_H,
float *  buff_U,
int  rs_U,
int  cs_U,
float *  buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)

Referenced by FLA_Bsvd_v_opt_var1().

{
    FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );

    return FLA_SUCCESS;
}
FLA_Error FLA_Bsvd_v_ops_var2 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_H,
int  rs_H,
int  cs_H,
float *  buff_RG,
int  rs_RG,
int  cs_RG,
float *  buff_RH,
int  rs_RH,
int  cs_RH,
float *  buff_W,
int  rs_W,
int  cs_W,
float *  buff_U,
int  rs_U,
int  cs_U,
float *  buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)

Referenced by FLA_Bsvd_v_opt_var2().

{
    FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );

    return FLA_SUCCESS;
}
FLA_Error FLA_Bsvd_v_opt_var1 ( dim_t  n_iter_max,
FLA_Obj  d,
FLA_Obj  e,
FLA_Obj  G,
FLA_Obj  H,
FLA_Obj  U,
FLA_Obj  V,
dim_t  b_alg 
)

References FLA_Bsvd_v_opc_var1(), FLA_Bsvd_v_opd_var1(), FLA_Bsvd_v_ops_var1(), FLA_Bsvd_v_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_Obj_width().

Referenced by FLA_Svd_uv_unb_var1().

{
    FLA_Error    r_val = FLA_SUCCESS;
    FLA_Datatype datatype;
    int          m_U, m_V, n_GH;
    int          inc_d;
    int          inc_e;
    int          rs_G, cs_G;
    int          rs_H, cs_H;
    int          rs_U, cs_U;
    int          rs_V, cs_V;

    datatype = FLA_Obj_datatype( U );

    m_U        = FLA_Obj_length( U );
    m_V        = FLA_Obj_length( V );
    n_GH       = FLA_Obj_width( G );

    inc_d      = FLA_Obj_vector_inc( d );
    inc_e      = FLA_Obj_vector_inc( e );
    
    rs_G       = FLA_Obj_row_stride( G );
    cs_G       = FLA_Obj_col_stride( G );

    rs_H       = FLA_Obj_row_stride( H );
    cs_H       = FLA_Obj_col_stride( H );

    rs_U       = FLA_Obj_row_stride( U );
    cs_U       = FLA_Obj_col_stride( U );

    rs_V       = FLA_Obj_row_stride( V );
    cs_V       = FLA_Obj_col_stride( V );


    switch ( datatype )
    {
        case FLA_FLOAT:
        {
            float*    buff_d = FLA_FLOAT_PTR( d );
            float*    buff_e = FLA_FLOAT_PTR( e );
            scomplex* buff_G = FLA_COMPLEX_PTR( G );
            scomplex* buff_H = FLA_COMPLEX_PTR( H );
            float*    buff_U = FLA_FLOAT_PTR( U );
            float*    buff_V = FLA_FLOAT_PTR( V );

            r_val = FLA_Bsvd_v_ops_var1( min( m_U, m_V ),
                                                     m_U,
                                         m_V,
                                         n_GH,
                                         n_iter_max,
                                         buff_d, inc_d,
                                         buff_e, inc_e,
                                         buff_G, rs_G, cs_G,
                                         buff_H, rs_H, cs_H,
                                         buff_U, rs_U, cs_U,
                                         buff_V, rs_V, cs_V,
                                         b_alg );

            break;
        }

        case FLA_DOUBLE:
        {
            double*   buff_d = FLA_DOUBLE_PTR( d );
            double*   buff_e = FLA_DOUBLE_PTR( e );
            dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
            dcomplex* buff_H = FLA_DOUBLE_COMPLEX_PTR( H );
            double*   buff_U = FLA_DOUBLE_PTR( U );
            double*   buff_V = FLA_DOUBLE_PTR( V );

            r_val = FLA_Bsvd_v_opd_var1( min( m_U, m_V ),
                                                     m_U,
                                         m_V,
                                         n_GH,
                                         n_iter_max,
                                         buff_d, inc_d,
                                         buff_e, inc_e,
                                         buff_G, rs_G, cs_G,
                                         buff_H, rs_H, cs_H,
                                         buff_U, rs_U, cs_U,
                                         buff_V, rs_V, cs_V,
                                         b_alg );

            break;
        }

        case FLA_COMPLEX:
        {
            float*    buff_d = FLA_FLOAT_PTR( d );
            float*    buff_e = FLA_FLOAT_PTR( e );
            scomplex* buff_G = FLA_COMPLEX_PTR( G );
            scomplex* buff_H = FLA_COMPLEX_PTR( H );
            scomplex* buff_U = FLA_COMPLEX_PTR( U );
            scomplex* buff_V = FLA_COMPLEX_PTR( V );

            r_val = FLA_Bsvd_v_opc_var1( min( m_U, m_V ),
                                                     m_U,
                                         m_V,
                                         n_GH,
                                         n_iter_max,
                                         buff_d, inc_d,
                                         buff_e, inc_e,
                                         buff_G, rs_G, cs_G,
                                         buff_H, rs_H, cs_H,
                                         buff_U, rs_U, cs_U,
                                         buff_V, rs_V, cs_V,
                                         b_alg );

            break;
        }

        case FLA_DOUBLE_COMPLEX:
        {
            double*   buff_d = FLA_DOUBLE_PTR( d );
            double*   buff_e = FLA_DOUBLE_PTR( e );
            dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
            dcomplex* buff_H = FLA_DOUBLE_COMPLEX_PTR( H );
            dcomplex* buff_U = FLA_DOUBLE_COMPLEX_PTR( U );
            dcomplex* buff_V = FLA_DOUBLE_COMPLEX_PTR( V );

            r_val = FLA_Bsvd_v_opz_var1( min( m_U, m_V ),
                                                     m_U,
                                         m_V,
                                         n_GH,
                                         n_iter_max,
                                         buff_d, inc_d,
                                         buff_e, inc_e,
                                         buff_G, rs_G, cs_G,
                                         buff_H, rs_H, cs_H,
                                         buff_U, rs_U, cs_U,
                                         buff_V, rs_V, cs_V,
                                         b_alg );

            break;
        }
    }

    return r_val;
}
FLA_Error FLA_Bsvd_v_opt_var2 ( dim_t  n_iter_max,
FLA_Obj  d,
FLA_Obj  e,
FLA_Obj  G,
FLA_Obj  H,
FLA_Obj  RG,
FLA_Obj  RH,
FLA_Obj  W,
FLA_Obj  U,
FLA_Obj  V,
dim_t  b_alg 
)

References FLA_Bsvd_v_opc_var2(), FLA_Bsvd_v_opd_var2(), FLA_Bsvd_v_ops_var2(), FLA_Bsvd_v_opz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_Obj_width().

Referenced by FLA_Svd_uv_unb_var2().

{
    FLA_Error    r_val = FLA_SUCCESS;
    FLA_Datatype datatype;
    int          m_U, m_V, n_GH;
    int          inc_d;
    int          inc_e;
    int          rs_G, cs_G;
    int          rs_H, cs_H;
    int          rs_RG, cs_RG;
    int          rs_RH, cs_RH;
    int          rs_W, cs_W;
    int          rs_U, cs_U;
    int          rs_V, cs_V;

    datatype = FLA_Obj_datatype( U );

    m_U        = FLA_Obj_length( U );
    m_V        = FLA_Obj_length( V );
    n_GH       = FLA_Obj_width( G );

    inc_d      = FLA_Obj_vector_inc( d );
    inc_e      = FLA_Obj_vector_inc( e );
    
    rs_G       = FLA_Obj_row_stride( G );
    cs_G       = FLA_Obj_col_stride( G );

    rs_H       = FLA_Obj_row_stride( H );
    cs_H       = FLA_Obj_col_stride( H );

    rs_RG      = FLA_Obj_row_stride( RG );
    cs_RG      = FLA_Obj_col_stride( RG );

    rs_RH      = FLA_Obj_row_stride( RH );
    cs_RH      = FLA_Obj_col_stride( RH );

    rs_W       = FLA_Obj_row_stride( W );
    cs_W       = FLA_Obj_col_stride( W );

    rs_U       = FLA_Obj_row_stride( U );
    cs_U       = FLA_Obj_col_stride( U );

    rs_V       = FLA_Obj_row_stride( V );
    cs_V       = FLA_Obj_col_stride( V );


    switch ( datatype )
    {
        case FLA_FLOAT:
        {
            float*    buff_d = FLA_FLOAT_PTR( d );
            float*    buff_e = FLA_FLOAT_PTR( e );
            scomplex* buff_G = FLA_COMPLEX_PTR( G );
            scomplex* buff_H = FLA_COMPLEX_PTR( H );
            float*    buff_RG = FLA_FLOAT_PTR( RG );
            float*    buff_RH = FLA_FLOAT_PTR( RH );
            float*    buff_W = FLA_FLOAT_PTR( W );
            float*    buff_U = FLA_FLOAT_PTR( U );
            float*    buff_V = FLA_FLOAT_PTR( V );

            r_val = FLA_Bsvd_v_ops_var2( min( m_U, m_V ),
                                                     m_U,
                                         m_V,
                                         n_GH,
                                         n_iter_max,
                                         buff_d, inc_d,
                                         buff_e, inc_e,
                                         buff_G, rs_G, cs_G,
                                         buff_H, rs_H, cs_H,
                                         buff_RG, rs_RG, cs_RG,
                                         buff_RH, rs_RH, cs_RH,
                                         buff_W, rs_W, cs_W,
                                         buff_U, rs_U, cs_U,
                                         buff_V, rs_V, cs_V,
                                         b_alg );

            break;
        }

        case FLA_DOUBLE:
        {
            double*   buff_d = FLA_DOUBLE_PTR( d );
            double*   buff_e = FLA_DOUBLE_PTR( e );
            dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
            dcomplex* buff_H = FLA_DOUBLE_COMPLEX_PTR( H );
            double*   buff_RG = FLA_DOUBLE_PTR( RG );
            double*   buff_RH = FLA_DOUBLE_PTR( RH );
            double*   buff_W = FLA_DOUBLE_PTR( W );
            double*   buff_U = FLA_DOUBLE_PTR( U );
            double*   buff_V = FLA_DOUBLE_PTR( V );

            r_val = FLA_Bsvd_v_opd_var2( min( m_U, m_V ),
                                                     m_U,
                                         m_V,
                                         n_GH,
                                         n_iter_max,
                                         buff_d, inc_d,
                                         buff_e, inc_e,
                                         buff_G, rs_G, cs_G,
                                         buff_H, rs_H, cs_H,
                                         buff_RG, rs_RG, cs_RG,
                                         buff_RH, rs_RH, cs_RH,
                                         buff_W, rs_W, cs_W,
                                         buff_U, rs_U, cs_U,
                                         buff_V, rs_V, cs_V,
                                         b_alg );

            break;
        }

        case FLA_COMPLEX:
        {
            float*    buff_d = FLA_FLOAT_PTR( d );
            float*    buff_e = FLA_FLOAT_PTR( e );
            scomplex* buff_G = FLA_COMPLEX_PTR( G );
            scomplex* buff_H = FLA_COMPLEX_PTR( H );
            float*    buff_RG = FLA_FLOAT_PTR( RG );
            float*    buff_RH = FLA_FLOAT_PTR( RH );
            scomplex* buff_W = FLA_COMPLEX_PTR( W );
            scomplex* buff_U = FLA_COMPLEX_PTR( U );
            scomplex* buff_V = FLA_COMPLEX_PTR( V );

            r_val = FLA_Bsvd_v_opc_var2( min( m_U, m_V ),
                                                     m_U,
                                         m_V,
                                         n_GH,
                                         n_iter_max,
                                         buff_d, inc_d,
                                         buff_e, inc_e,
                                         buff_G, rs_G, cs_G,
                                         buff_H, rs_H, cs_H,
                                         buff_RG, rs_RG, cs_RG,
                                         buff_RH, rs_RH, cs_RH,
                                         buff_W, rs_W, cs_W,
                                         buff_U, rs_U, cs_U,
                                         buff_V, rs_V, cs_V,
                                         b_alg );

            break;
        }

        case FLA_DOUBLE_COMPLEX:
        {
            double*   buff_d = FLA_DOUBLE_PTR( d );
            double*   buff_e = FLA_DOUBLE_PTR( e );
            dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
            dcomplex* buff_H = FLA_DOUBLE_COMPLEX_PTR( H );
            double*   buff_RG = FLA_DOUBLE_PTR( RG );
            double*   buff_RH = FLA_DOUBLE_PTR( RH );
            dcomplex* buff_W = FLA_DOUBLE_COMPLEX_PTR( W );
            dcomplex* buff_U = FLA_DOUBLE_COMPLEX_PTR( U );
            dcomplex* buff_V = FLA_DOUBLE_COMPLEX_PTR( V );

            r_val = FLA_Bsvd_v_opz_var2( min( m_U, m_V ),
                                                     m_U,
                                         m_V,
                                         n_GH,
                                         n_iter_max,
                                         buff_d, inc_d,
                                         buff_e, inc_e,
                                         buff_G, rs_G, cs_G,
                                         buff_H, rs_H, cs_H,
                                         buff_RG, rs_RG, cs_RG,
                                         buff_RH, rs_RH, cs_RH,
                                         buff_W, rs_W, cs_W,
                                         buff_U, rs_U, cs_U,
                                         buff_V, rs_V, cs_V,
                                         b_alg );

            break;
        }
    }

    return r_val;
}
FLA_Error FLA_Bsvd_v_opz_var1 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_H,
int  rs_H,
int  cs_H,
dcomplex buff_U,
int  rs_U,
int  cs_U,
dcomplex buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)

References bli_d0(), bli_dm1(), bli_z1(), bli_zdscalv(), bli_zsetm(), BLIS_NO_CONJUGATE, FLA_Abort(), FLA_Apply_G_rf_blz_var3(), FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_find_submatrix_opd(), FLA_Bsvd_iteracc_v_opd_var1(), and FLA_Mach_params_opd().

Referenced by FLA_Bsvd_v_opt_var1().

{
    dcomplex  one        = bli_z1();
    double    rzero      = bli_d0();

    int       maxitr     = 6;

    double    eps;
    double    tolmul;
    double    tol;
    double    thresh;

    dcomplex* G;
    dcomplex* H;
    double*   d1;
    double*   e1;
    int       r_val;
    int       done;
    int       m_GH_sweep_max;
    int       ij_begin;
    int       ijTL, ijBR;
    int       m_A11;
    int       n_iter_perf;
    int       n_UV_apply;
    int       total_deflations;
    int       n_deflations;
    int       n_iter_prev;
    int       n_iter_perf_sweep_max;

    // Compute some convergence constants.
    eps    = FLA_Mach_params_opd( FLA_MACH_EPS );
    tolmul = max( 10.0, min( 100.0, pow( eps, -0.125 ) ) );
    FLA_Bsvd_compute_tol_thresh_opd( min_m_n,
                                     tolmul,
                                     maxitr,
                                     buff_d, inc_d,
                                     buff_e, inc_e,
                                     &tol,
                                     &thresh );
#ifdef PRINTF
printf( "FLA_Bsvd_v_opz_var1: tolmul = %12.6e\n", tolmul );
printf( "FLA_Bsvd_v_opz_var1: tol    = %12.6e\n", tol );
printf( "FLA_Bsvd_v_opz_var1: thresh = %12.6e\n", thresh );
#endif

    // Initialize our completion flag.
    done = FALSE;

    // Initialize a counter that holds the maximum number of rows of G
    // and H that we would need to initialize for the next sweep.
    m_GH_sweep_max = min_m_n - 1;

    // Initialize a counter for the total number of iterations performed.
    n_iter_prev = 0;

    // Iterate until the matrix has completely deflated.
    for ( total_deflations = 0; done != TRUE; )
    {

        // Initialize G and H to contain only identity rotations.
        bli_zsetm( m_GH_sweep_max,
                   n_GH,
                   &one,
                   buff_G, rs_G, cs_G );
        bli_zsetm( m_GH_sweep_max,
                   n_GH,
                   &one,
                   buff_H, rs_H, cs_H );

        // Keep track of the maximum number of iterations performed in the
        // current sweep. This is used when applying the sweep's Givens
        // rotations.
        n_iter_perf_sweep_max = 0;

        // Perform a sweep: Move through the matrix and perform a bidiagonal
        // SVD on each non-zero submatrix that is encountered. During the
        // first time through, ijTL will be 0 and ijBR will be min_m_n - 1.
        for ( ij_begin = 0; ij_begin < min_m_n;  )
        {

#ifdef PRINTF
if ( ij_begin == 0 )
printf( "FLA_Bsvd_v_opz_var1: beginning new sweep (ij_begin = %d)\n", ij_begin );
#endif

            // Search for the first submatrix along the diagonal that is
            // bounded by zeroes (or endpoints of the matrix). If no
            // submatrix is found (ie: if the entire superdiagonal is zero
            // then FLA_FAILURE is returned. This function also inspects
            // superdiagonal elements for proximity to zero. If a given
            // element is close enough to zero, then it is deemed
            // converged and manually set to zero.
            r_val = FLA_Bsvd_find_submatrix_opd( min_m_n,
                                                 ij_begin,
                                                 buff_d, inc_d,
                                                 buff_e, inc_e,
                                                 &ijTL,
                                                 &ijBR );

            // Verify that a submatrix was found. If one was not found,
            // then we are done with the current sweep. Furthermore, if
            // a submatrix was not found AND we began our search at the
            // beginning of the matrix (ie: ij_begin == 0), then the
            // matrix has completely deflated and so we are done with
            // Francis step iteration.
            if ( r_val == FLA_FAILURE )
            {
                if ( ij_begin == 0 )
                {
#ifdef PRINTF
printf( "FLA_Bsvd_v_opz_var1: superdiagonal is completely zero.\n" );
printf( "FLA_Bsvd_v_opz_var1: Francis iteration is done!\n" );
#endif
                    done = TRUE;
                }

                // Break out of the current sweep so we can apply the last
                // remaining Givens rotations.
                break;
            }

            // If we got this far, then:
            //   (a) ijTL refers to the index of the first non-zero
            //       superdiagonal along the diagonal, and
            //   (b) ijBR refers to either:
            //       - the first zero element that occurs after ijTL, or
            //       - the the last diagonal element.
            // Note that ijTL and ijBR also correspond to the first and
            // last diagonal elements of the submatrix of interest. Thus,
            // we may compute the dimension of this submatrix as:
            m_A11 = ijBR - ijTL + 1;

#ifdef PRINTF
printf( "FLA_Bsvd_v_opz_var1: ij_begin = %d\n", ij_begin );
printf( "FLA_Bsvd_v_opz_var1: ijTL     = %d\n", ijTL );
printf( "FLA_Bsvd_v_opz_var1: ijBR     = %d\n", ijBR );
printf( "FLA_Bsvd_v_opz_var1: m_A11    = %d\n", m_A11 );
#endif

            // Adjust ij_begin, which gets us ready for the next submatrix
            // search in the current sweep.
            ij_begin = ijBR + 1;

            // Index to the submatrices upon which we will operate.
            d1 = buff_d + ijTL * inc_d;
            e1 = buff_e + ijTL * inc_e;
            G  = buff_G + ijTL * rs_G;
            H  = buff_H + ijTL * rs_H;

            // Search for a batch of singular values, recursing on deflated
            // subproblems whenever a split occurs. Iteration continues as
            // long as
            //   (a) there is still matrix left to operate on, and
            //   (b) the number of iterations performed in this batch is
            //       less than n_G.
            // If/when either of the two above conditions fails to hold,
            // the function returns.
            n_deflations = FLA_Bsvd_iteracc_v_opd_var1( m_A11,
                                                        n_GH,
                                                        ijTL,
                                                        tol,
                                                        thresh,
                                                        d1, inc_d,
                                                        e1, inc_e,
                                                        G,  rs_G, cs_G,
                                                        H,  rs_H, cs_H,
                                                        &n_iter_perf );

            // Record the number of deflations that were observed.
            total_deflations += n_deflations;

            // Update the maximum number of iterations performed in the
            // current sweep.
            n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );

#ifdef PRINTF
printf( "FLA_Bsvd_v_opz_var1: deflations observed       = %d\n", n_deflations );
printf( "FLA_Bsvd_v_opz_var1: total deflations observed = %d\n", total_deflations );
printf( "FLA_Bsvd_v_opz_var1: num iterations performed  = %d\n", n_iter_perf );
#endif

            // Store the most recent value of ijBR in m_G_sweep_max.
            // When the sweep is done, this value will contain the minimum
            // number of rows of G and H we can apply and safely include all
            // non-identity rotations that were computed during the
            // singular value searches.
            m_GH_sweep_max = ijBR;

            // Make sure we haven't exceeded our maximum iteration count.
            if ( n_iter_prev >= n_iter_max * min_m_n )
            {
#ifdef PRINTF
printf( "FLA_Bsvd_v_opz_var1: reached maximum total number of iterations: %d\n", n_iter_prev );
#endif
                FLA_Abort();
                //return FLA_FAILURE;
            }
        }

        // The sweep is complete. Now we must apply the Givens rotations
        // that were accumulated during the sweep.

        // Recall that the number of columns of U and V to which we apply
        // rotations is one more than the number of rotations.
        n_UV_apply = m_GH_sweep_max + 1;
        
#ifdef PRINTF
printf( "FLA_Bsvd_v_opz_var1: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
printf( "FLA_Bsvd_v_opz_var1: m_U = %d\n", m_U );
printf( "FLA_Bsvd_v_opz_var1: m_V = %d\n", m_V );
printf( "FLA_Bsvd_v_opz_var1: napp= %d\n", n_UV_apply );
#endif
        // Apply the Givens rotations. Note that we only apply k sets of
        // rotations, where k = n_iter_perf_sweep_max. Also note that we only
        // apply to n_UV_apply columns of U and V since this is the most we
        // need to touch given the most recent value stored to m_GH_sweep_max.
        //FLA_Apply_G_rf_blz_var5( n_iter_perf_sweep_max,
        FLA_Apply_G_rf_blz_var3( n_iter_perf_sweep_max,
        //FLA_Apply_G_rf_blz_var9( n_iter_perf_sweep_max,
        //FLA_Apply_G_rf_blz_var6( n_iter_perf_sweep_max,
                                 m_U,
                                 n_UV_apply,
                                 buff_G, rs_G, cs_G,
                                 buff_U, rs_U, cs_U,
                                 b_alg );
        //FLA_Apply_G_rf_blz_var5( n_iter_perf_sweep_max,
        FLA_Apply_G_rf_blz_var3( n_iter_perf_sweep_max,
        //FLA_Apply_G_rf_blz_var9( n_iter_perf_sweep_max,
        //FLA_Apply_G_rf_blz_var6( n_iter_perf_sweep_max,
                                 m_V,
                                 n_UV_apply,
                                 buff_H, rs_H, cs_H,
                                 buff_V, rs_V, cs_V,
                                 b_alg );

        // Increment the total number of iterations previously performed.
        n_iter_prev += n_iter_perf_sweep_max;

#ifdef PRINTF
printf( "FLA_Bsvd_v_opz_var1: total number of iterations performed: %d\n", n_iter_prev );
#endif
    }

    // Make all the singular values positive.
    {
        int    i;
        double minus_one = bli_dm1();

        for ( i = 0; i < min_m_n; ++i )
        {
            if ( buff_d[ (i  )*inc_d ] < rzero )
            {
                buff_d[ (i  )*inc_d ] = -buff_d[ (i  )*inc_d ];
                
                // Scale the right singular vectors.
                bli_zdscalv( BLIS_NO_CONJUGATE,
                             m_V,
                             &minus_one,
                             buff_V + (i  )*cs_V, rs_V );
            }
        }
    }

    return n_iter_prev;
}
FLA_Error FLA_Bsvd_v_opz_var2 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_H,
int  rs_H,
int  cs_H,
double *  buff_RG,
int  rs_RG,
int  cs_RG,
double *  buff_RH,
int  rs_RH,
int  cs_RH,
dcomplex buff_W,
int  rs_W,
int  cs_W,
dcomplex buff_U,
int  rs_U,
int  cs_U,
dcomplex buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)

References bli_d0(), bli_d1(), bli_dgemm(), bli_dident(), bli_dm1(), bli_z1(), bli_zcopymt(), bli_zdscalv(), bli_zsetm(), BLIS_NO_CONJUGATE, BLIS_NO_TRANSPOSE, FLA_Apply_G_rf_bld_var3b(), FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_find_submatrix_opd(), FLA_Bsvd_iteracc_v_opd_var1(), and FLA_Mach_params_opd().

Referenced by FLA_Bsvd_v_opt_var2().

{
    dcomplex  one        = bli_z1();
    double    rone       = bli_d1();
    double    rzero      = bli_d0();

    int       maxitr     = 6;

    double    eps;
    double    tolmul;
    double    tol;
    double    thresh;

    dcomplex* G;
    dcomplex* H;
    double*   d1;
    double*   e1;
    int       r_val;
    int       done;
    int       m_GH_sweep_max;
    int       ij_begin;
    int       ijTL, ijBR;
    int       m_A11;
    int       n_iter_perf;
    int       n_UV_apply;
    int       total_deflations;
    int       n_deflations;
    int       n_iter_prev;
    int       n_iter_perf_sweep_max;

    // Compute some convergence constants.
    eps    = FLA_Mach_params_opd( FLA_MACH_EPS );
    tolmul = max( 10.0, min( 100.0, pow( eps, -0.125 ) ) );
    FLA_Bsvd_compute_tol_thresh_opd( min_m_n,
                                     tolmul,
                                     maxitr,
                                     buff_d, inc_d,
                                     buff_e, inc_e,
                                     &tol,
                                     &thresh );

    // Initialize our completion flag.
    done = FALSE;

    // Initialize a counter that holds the maximum number of rows of G
    // and H that we would need to initialize for the next sweep.
    m_GH_sweep_max = min_m_n - 1;

    // Initialize a counter for the total number of iterations performed.
    n_iter_prev = 0;

    // Initialize RG and RH to identity.
    bli_dident( min_m_n,
                buff_RG, rs_RG, cs_RG );
    bli_dident( min_m_n,
                buff_RH, rs_RH, cs_RH );

    // Iterate until the matrix has completely deflated.
    for ( total_deflations = 0; done != TRUE; )
    {

        // Initialize G and H to contain only identity rotations.
        bli_zsetm( m_GH_sweep_max,
                   n_GH,
                   &one,
                   buff_G, rs_G, cs_G );
        bli_zsetm( m_GH_sweep_max,
                   n_GH,
                   &one,
                   buff_H, rs_H, cs_H );

        // Keep track of the maximum number of iterations performed in the
        // current sweep. This is used when applying the sweep's Givens
        // rotations.
        n_iter_perf_sweep_max = 0;

        // Perform a sweep: Move through the matrix and perform a bidiagonal
        // SVD on each non-zero submatrix that is encountered. During the
        // first time through, ijTL will be 0 and ijBR will be min_m_n - 1.
        for ( ij_begin = 0; ij_begin < min_m_n;  )
        {

#ifdef PRINTF
if ( ij_begin == 0 )
printf( "FLA_Bsvd_v_opz_var2: beginning new sweep (ij_begin = %d)\n", ij_begin );
#endif

            // Search for the first submatrix along the diagonal that is
            // bounded by zeroes (or endpoints of the matrix). If no
            // submatrix is found (ie: if the entire superdiagonal is zero
            // then FLA_FAILURE is returned. This function also inspects
            // superdiagonal elements for proximity to zero. If a given
            // element is close enough to zero, then it is deemed
            // converged and manually set to zero.
            r_val = FLA_Bsvd_find_submatrix_opd( min_m_n,
                                                 ij_begin,
                                                 buff_d, inc_d,
                                                 buff_e, inc_e,
                                                 &ijTL,
                                                 &ijBR );

            // Verify that a submatrix was found. If one was not found,
            // then we are done with the current sweep. Furthermore, if
            // a submatrix was not found AND we began our search at the
            // beginning of the matrix (ie: ij_begin == 0), then the
            // matrix has completely deflated and so we are done with
            // Francis step iteration.
            if ( r_val == FLA_FAILURE )
            {
                if ( ij_begin == 0 )
                {
#ifdef PRINTF
printf( "FLA_Bsvd_v_opz_var2: superdiagonal is completely zero.\n" );
printf( "FLA_Bsvd_v_opz_var2: Francis iteration is done!\n" );
#endif
                    done = TRUE;
                }

                // Break out of the current sweep so we can apply the last
                // remaining Givens rotations.
                break;
            }

            // If we got this far, then:
            //   (a) ijTL refers to the index of the first non-zero
            //       superdiagonal along the diagonal, and
            //   (b) ijBR refers to either:
            //       - the first zero element that occurs after ijTL, or
            //       - the the last diagonal element.
            // Note that ijTL and ijBR also correspond to the first and
            // last diagonal elements of the submatrix of interest. Thus,
            // we may compute the dimension of this submatrix as:
            m_A11 = ijBR - ijTL + 1;

#ifdef PRINTF
printf( "FLA_Bsvd_v_opz_var2: ij_begin = %d\n", ij_begin );
printf( "FLA_Bsvd_v_opz_var2: ijTL     = %d\n", ijTL );
printf( "FLA_Bsvd_v_opz_var2: ijBR     = %d\n", ijBR );
printf( "FLA_Bsvd_v_opz_var2: m_A11    = %d\n", m_A11 );
#endif

            // Adjust ij_begin, which gets us ready for the next submatrix
            // search in the current sweep.
            ij_begin = ijBR + 1;

            // Index to the submatrices upon which we will operate.
            d1 = buff_d + ijTL * inc_d;
            e1 = buff_e + ijTL * inc_e;
            G  = buff_G + ijTL * rs_G;
            H  = buff_H + ijTL * rs_H;

            // Search for a batch of singular values, recursing on deflated
            // subproblems whenever possible. A new singular value search is
            // performed as long as
            //   (a) there is still matrix left to operate on, and
            //   (b) the number of iterations performed in this batch is
            //       less than n_G.
            // If/when either of the two above conditions fails to hold,
            // the function returns.
            n_deflations = FLA_Bsvd_iteracc_v_opd_var1( m_A11,
                                                        n_GH,
                                                        ijTL,
                                                        tol,
                                                        thresh,
                                                        d1, inc_d,
                                                        e1, inc_e,
                                                        G,  rs_G, cs_G,
                                                        H,  rs_H, cs_H,
                                                        &n_iter_perf );

            // Record the number of deflations that occurred.
            total_deflations += n_deflations;

            // Update the maximum number of iterations performed in the
            // current sweep.
            n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );

#ifdef PRINTF
printf( "FLA_Bsvd_v_opz_var2: deflations observed       = %d\n", n_deflations );
printf( "FLA_Bsvd_v_opz_var2: total deflations observed = %d\n", total_deflations );
printf( "FLA_Bsvd_v_opz_var2: num iterations performed  = %d\n", n_iter_perf );
#endif

            // Store the most recent value of ijBR in m_G_sweep_max.
            // When the sweep is done, this value will contain the minimum
            // number of rows of G and H we can apply and safely include all
            // non-identity rotations that were computed during the
            // singular value searches.
            m_GH_sweep_max = ijBR;

        }

        // The sweep is complete. Now we must apply the Givens rotations
        // that were accumulated during the sweep.

        // Recall that the number of columns of U and V to which we apply
        // rotations is one more than the number of rotations.
        n_UV_apply = m_GH_sweep_max + 1;


#ifdef PRINTF
printf( "FLA_Bsvd_v_opz_var2: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
printf( "FLA_Bsvd_v_opz_var2: m_U = %d\n", m_U );
printf( "FLA_Bsvd_v_opz_var2: m_V = %d\n", m_V );
printf( "FLA_Bsvd_v_opz_var2: napp= %d\n", n_UV_apply );
#endif

        // Apply the Givens rotations. Note that we only apply k sets of
        // rotations, where k = n_iter_perf_sweep_max. Also note that we only
        // apply to n_UV_apply columns of U and V since this is the most we
        // need to touch given the most recent value stored to m_GH_sweep_max.
        //FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max,
        FLA_Apply_G_rf_bld_var3b( n_iter_perf_sweep_max,
        //FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max,
        //FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max,
                                  min_m_n,
                                  n_UV_apply,
                                  n_iter_prev,
                                  buff_G,  rs_G,  cs_G,
                                  buff_RG, rs_RG, cs_RG,
                                  b_alg );
        //FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max,
        FLA_Apply_G_rf_bld_var3b( n_iter_perf_sweep_max,
        //FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max,
        //FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max,
                                  min_m_n,
                                  n_UV_apply,
                                  n_iter_prev,
                                  buff_H,  rs_H,  cs_H,
                                  buff_RH, rs_RH, cs_RH,
                                  b_alg );

        // Increment the total number of iterations previously performed.
        n_iter_prev += n_iter_perf_sweep_max;

#ifdef PRINTF
printf( "FLA_Bsvd_v_opz_var2: total number of iterations performed: %d\n", n_iter_prev );
#endif
    }

    // Copy the contents of Q to temporary storage.
    bli_zcopymt( BLIS_NO_TRANSPOSE,
                 m_U,
                 m_V,
                 buff_U, rs_U, cs_U,
                 buff_W, rs_W, cs_W );
// W needs to be max_m_n-by-min_m_n!!!!!!!!!!!!!!!

    // Multiply U by R, overwriting U.
    bli_dgemm( BLIS_NO_TRANSPOSE,
               BLIS_NO_TRANSPOSE,
               2*m_U,
               m_V,
               m_V,
               &rone,
               ( double* )buff_W,  rs_W,  2*cs_W,
                          buff_RG, rs_RG,   cs_RG,
               &rzero,
               ( double* )buff_U,  rs_U,  2*cs_U );

    bli_zcopymt( BLIS_NO_TRANSPOSE,
                 m_V,
                 m_V,
                 buff_V, rs_V, cs_V,
                 buff_W, rs_W, cs_W );

    // Multiply V by R, overwriting V.
    bli_dgemm( BLIS_NO_TRANSPOSE,
               BLIS_NO_TRANSPOSE,
               2*m_V,
               m_V,
               m_V,
               &rone,
               ( double* )buff_W,  rs_W,  2*cs_W,
                          buff_RH, rs_RH,   cs_RH,
               &rzero,
               ( double* )buff_V,  rs_V,  2*cs_V );

    // Make all the singular values positive.
    {
        int    i;
        double minus_one = bli_dm1();

        for ( i = 0; i < min_m_n; ++i )
        {
            if ( buff_d[ (i  )*inc_d ] < rzero )
            {
                buff_d[ (i  )*inc_d ] = -buff_d[ (i  )*inc_d ];
                
                // Scale the right singular vectors.
                bli_zdscalv( BLIS_NO_CONJUGATE,
                             m_V,
                             &minus_one,
                             buff_V + (i  )*cs_V, rs_V );
            }
        }
    }

    return n_iter_prev;
}