libflame  revision_anchor
Functions
FLA_Bsvd_v.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLA_Bsvd_compute_shift (FLA_Obj tol, FLA_Obj sminl, FLA_Obj smax, FLA_Obj d, FLA_Obj e, FLA_Obj shift)
 
FLA_Error FLA_Bsvd_compute_shift_ops (int m_A, float tol, float sminl, float smax, float *buff_d, int inc_d, float *buff_e, int inc_e, float *shift)
 
FLA_Error FLA_Bsvd_compute_shift_opd (int m_A, double tol, double sminl, double smax, double *buff_d, int inc_d, double *buff_e, int inc_e, double *shift)
 
FLA_Error FLA_Bsvd_compute_tol_thresh (FLA_Obj tolmul, FLA_Obj maxit, FLA_Obj d, FLA_Obj e, FLA_Obj tol, FLA_Obj thresh)
 
FLA_Error FLA_Bsvd_compute_tol_thresh_ops (int m_A, float tolmul, float maxit, float *buff_d, int inc_d, float *buff_e, int inc_e, float *tol, float *thresh)
 
FLA_Error FLA_Bsvd_compute_tol_thresh_opd (int m_A, double tolmul, double maxit, double *buff_d, int inc_d, double *buff_e, int inc_e, double *tol, double *thresh)
 
FLA_Error FLA_Bsvd_find_converged (FLA_Obj tol, FLA_Obj d, FLA_Obj e, FLA_Obj sminl)
 
FLA_Error FLA_Bsvd_find_converged_ops (int m_A, float tol, float *buff_d, int inc_d, float *buff_e, int inc_e, float *sminl)
 
FLA_Error FLA_Bsvd_find_converged_opd (int m_A, double tol, double *buff_d, int inc_d, double *buff_e, int inc_e, double *sminl)
 
FLA_Error FLA_Bsvd_find_max_min (FLA_Obj d, FLA_Obj e, FLA_Obj smax, FLA_Obj smin)
 
FLA_Error FLA_Bsvd_find_max_min_ops (int m_A, float *buff_d, int inc_d, float *buff_e, int inc_e, float *smax, float *smin)
 
FLA_Error FLA_Bsvd_find_max_min_opd (int m_A, double *buff_d, int inc_d, double *buff_e, int inc_e, double *smax, double *smin)
 
FLA_Error FLA_Bsvd_find_submatrix_ops (int mn_A, int ij_begin, float *buff_d, int inc_d, float *buff_e, int inc_e, int *ijTL, int *ijBR)
 
FLA_Error FLA_Bsvd_find_submatrix_opd (int mn_A, int ij_begin, double *buff_d, int inc_d, double *buff_e, int inc_e, int *ijTL, int *ijBR)
 
FLA_Error FLA_Bsvd_v_opt_var1 (dim_t n_iter_max, FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj H, FLA_Obj U, FLA_Obj V, dim_t b_alg)
 
FLA_Error FLA_Bsvd_v_ops_var1 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, float *buff_U, int rs_U, int cs_U, float *buff_V, int rs_V, int cs_V, int b_alg)
 
FLA_Error FLA_Bsvd_v_opd_var1 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, double *buff_U, int rs_U, int cs_U, double *buff_V, int rs_V, int cs_V, int b_alg)
 
FLA_Error FLA_Bsvd_v_opc_var1 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_V, int rs_V, int cs_V, int b_alg)
 
FLA_Error FLA_Bsvd_v_opz_var1 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_V, int rs_V, int cs_V, int b_alg)
 
FLA_Error FLA_Bsvd_v_opt_var2 (dim_t n_iter_max, FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj H, FLA_Obj RG, FLA_Obj RH, FLA_Obj W, FLA_Obj U, FLA_Obj V, dim_t b_alg)
 
FLA_Error FLA_Bsvd_v_ops_var2 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, float *buff_RG, int rs_RG, int cs_RG, float *buff_RH, int rs_RH, int cs_RH, float *buff_W, int rs_W, int cs_W, float *buff_U, int rs_U, int cs_U, float *buff_V, int rs_V, int cs_V, int b_alg)
 
FLA_Error FLA_Bsvd_v_opd_var2 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, double *buff_RG, int rs_RG, int cs_RG, double *buff_RH, int rs_RH, int cs_RH, double *buff_W, int rs_W, int cs_W, double *buff_U, int rs_U, int cs_U, double *buff_V, int rs_V, int cs_V, int b_alg)
 
FLA_Error FLA_Bsvd_v_opc_var2 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, float *buff_RG, int rs_RG, int cs_RG, float *buff_RH, int rs_RH, int cs_RH, scomplex *buff_W, int rs_W, int cs_W, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_V, int rs_V, int cs_V, int b_alg)
 
FLA_Error FLA_Bsvd_v_opz_var2 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, double *buff_RG, int rs_RG, int cs_RG, double *buff_RH, int rs_RH, int cs_RH, dcomplex *buff_W, int rs_W, int cs_W, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_V, int rs_V, int cs_V, int b_alg)
 

Function Documentation

◆ FLA_Bsvd_compute_shift()

FLA_Error FLA_Bsvd_compute_shift ( FLA_Obj  tol,
FLA_Obj  sminl,
FLA_Obj  smax,
FLA_Obj  d,
FLA_Obj  e,
FLA_Obj  shift 
)
14 {
15  FLA_Datatype datatype;
16  int m_A;
17  int inc_d;
18  int inc_e;
19 
20  datatype = FLA_Obj_datatype( d );
21 
22  m_A = FLA_Obj_vector_dim( d );
23 
24  inc_d = FLA_Obj_vector_inc( d );
25  inc_e = FLA_Obj_vector_inc( e );
26 
27  switch ( datatype )
28  {
29  case FLA_FLOAT:
30  {
31  float* buff_tol = FLA_FLOAT_PTR( tol );
32  float* buff_sminl = FLA_FLOAT_PTR( sminl );
33  float* buff_smax = FLA_FLOAT_PTR( smax );
34  float* buff_d = FLA_FLOAT_PTR( d );
35  float* buff_e = FLA_FLOAT_PTR( e );
36  float* buff_shift = FLA_FLOAT_PTR( shift );
37 
39  *buff_tol,
40  *buff_sminl,
41  *buff_smax,
42  buff_d, inc_d,
43  buff_e, inc_e,
44  buff_shift );
45 
46  break;
47  }
48 
49  case FLA_DOUBLE:
50  {
51  double* buff_tol = FLA_DOUBLE_PTR( tol );
52  double* buff_sminl = FLA_DOUBLE_PTR( sminl );
53  double* buff_smax = FLA_DOUBLE_PTR( smax );
54  double* buff_d = FLA_DOUBLE_PTR( d );
55  double* buff_e = FLA_DOUBLE_PTR( e );
56  double* buff_shift = FLA_DOUBLE_PTR( shift );
57 
59  *buff_tol,
60  *buff_sminl,
61  *buff_smax,
62  buff_d, inc_d,
63  buff_e, inc_e,
64  buff_shift );
65 
66  break;
67  }
68  }
69 
70  return FLA_SUCCESS;
71 }
FLA_Error FLA_Bsvd_compute_shift_opd(int m_A, double tol, double sminl, double smax, double *buff_d, int inc_d, double *buff_e, int inc_e, double *shift)
Definition: FLA_Bsvd_compute_shift.c:130
FLA_Error FLA_Bsvd_compute_shift_ops(int m_A, float tol, float sminl, float smax, float *buff_d, int inc_d, float *buff_e, int inc_e, float *shift)
Definition: FLA_Bsvd_compute_shift.c:75
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition: FLA_Query.c:145
dim_t FLA_Obj_vector_dim(FLA_Obj obj)
Definition: FLA_Query.c:137
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
int FLA_Datatype
Definition: FLA_type_defs.h:49

References FLA_Bsvd_compute_shift_opd(), FLA_Bsvd_compute_shift_ops(), FLA_Obj_datatype(), FLA_Obj_vector_dim(), and FLA_Obj_vector_inc().

◆ FLA_Bsvd_compute_shift_opd()

FLA_Error FLA_Bsvd_compute_shift_opd ( int  m_A,
double  tol,
double  sminl,
double  smax,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
double *  shift 
)
137 {
138  double hndrth = 0.01;
139  double eps;
140  double* d_first;
141  double* e_last;
142  double* d_last_m1;
143  double* d_last;
144  double sll, temp;
145 
146  eps = FLA_Mach_params_opd( FLA_MACH_EPS );
147 
148  d_first = buff_d + (0 )*inc_d;
149  e_last = buff_e + (m_A-2)*inc_e;
150  d_last_m1 = buff_d + (m_A-2)*inc_d;
151  d_last = buff_d + (m_A-1)*inc_d;
152 
153  // If the shift would ruin relative accuracy, set it to zero.
154  if ( m_A * tol * ( sminl / smax ) <= max( eps, hndrth * tol ) )
155  {
156 #ifdef PRINTF
157  printf( "FLA_Bsvd_compute_shift_opd: shift would ruin accuracy; setting shift to 0.\n" );
158  printf( " m_A = %d \n", m_A );
159  printf( " tol = %20.15e\n", tol );
160  printf( " sminl = %20.15e\n", sminl );
161  printf( " smax = %20.15e\n", smax );
162  printf( " LHS = %20.15e\n", m_A * tol * ( sminl / smax ) );
163  printf( " max(eps,0.01*tol)= %20.15e\n", max( eps, hndrth * tol ) );
164 #endif
165  *shift = 0.0;
166  }
167  else
168  {
169  // Compute the shift from the last 2x2 matrix.
170  FLA_Sv_2x2_opd( d_last_m1,
171  e_last,
172  d_last,
173  shift,
174  &temp );
175 
176  sll = fabs( *d_first );
177 
178  // Check if the shift is negligible; if so, set it to zero.
179  if ( sll > 0.0 )
180  {
181  temp = ( *shift / sll );
182  if ( temp * temp < eps )
183  {
184 #ifdef PRINTF
185  printf( "FLA_Bsvd_compute_shift_opd: shift is negligible; setting shift to 0.\n" );
186 #endif
187  *shift = 0.0;
188  }
189  }
190  }
191 
192  return FLA_SUCCESS;
193 }
double FLA_Mach_params_opd(FLA_Machval machval)
Definition: FLA_Mach_params.c:74
FLA_Error FLA_Sv_2x2_opd(double *alpha11, double *alpha12, double *alpha22, double *sigma1, double *sigma2)
Definition: FLA_Sv_2x2.c:166
dcomplex temp
Definition: bl1_axpyv2b.c:301

References FLA_Mach_params_opd(), FLA_Sv_2x2_opd(), and temp.

Referenced by FLA_Bsvd_compute_shift(), and FLA_Bsvd_sinval_v_opd_var1().

◆ FLA_Bsvd_compute_shift_ops()

FLA_Error FLA_Bsvd_compute_shift_ops ( int  m_A,
float  tol,
float  sminl,
float  smax,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
float *  shift 
)
82 {
83  float hndrth = 0.01;
84  float eps;
85  float* d_first;
86  float* e_last;
87  float* d_last_m1;
88  float* d_last;
89  float sll, temp;
90 
91  eps = FLA_Mach_params_ops( FLA_MACH_EPS );
92 
93  d_first = buff_d + (0 )*inc_d;
94  e_last = buff_e + (m_A-2)*inc_e;
95  d_last_m1 = buff_d + (m_A-2)*inc_d;
96  d_last = buff_d + (m_A-1)*inc_d;
97 
98  // If the shift would ruin relative accuracy, set it to zero.
99  if ( m_A * tol * ( sminl / smax ) <= max( eps, hndrth * tol ) )
100  {
101  *shift = 0.0;
102  }
103  else
104  {
105  // Compute the shift from the last 2x2 matrix.
106  FLA_Sv_2x2_ops( d_last_m1,
107  e_last,
108  d_last,
109  shift,
110  &temp );
111 
112  sll = fabsf( *d_first );
113 
114  // Check if the shift is negligible; if so, set it to zero.
115  if ( sll > 0.0F )
116  {
117  temp = ( *shift / sll );
118  if ( temp * temp < eps )
119  {
120  *shift = 0.0F;
121  }
122  }
123  }
124 
125  return FLA_SUCCESS;
126 }
float FLA_Mach_params_ops(FLA_Machval machval)
Definition: FLA_Mach_params.c:47
FLA_Error FLA_Sv_2x2_ops(float *alpha11, float *alpha12, float *alpha22, float *sigma1, float *sigma2)
Definition: FLA_Sv_2x2.c:83

References FLA_Mach_params_ops(), FLA_Sv_2x2_ops(), and temp.

Referenced by FLA_Bsvd_compute_shift(), and FLA_Bsvd_sinval_v_ops_var1().

◆ FLA_Bsvd_compute_tol_thresh()

FLA_Error FLA_Bsvd_compute_tol_thresh ( FLA_Obj  tolmul,
FLA_Obj  maxit,
FLA_Obj  d,
FLA_Obj  e,
FLA_Obj  tol,
FLA_Obj  thresh 
)
16 {
17  FLA_Datatype datatype;
18  int n_A;
19  int inc_d;
20  int inc_e;
21 
22  datatype = FLA_Obj_datatype( d );
23 
24  n_A = FLA_Obj_vector_dim( d );
25 
26  inc_d = FLA_Obj_vector_inc( d );
27  inc_e = FLA_Obj_vector_inc( e );
28 
29 
30  switch ( datatype )
31  {
32  case FLA_FLOAT:
33  {
34  float* buff_tolmul = FLA_FLOAT_PTR( tolmul );
35  float* buff_maxitr = FLA_FLOAT_PTR( maxitr );
36  float* buff_d = FLA_FLOAT_PTR( d );
37  float* buff_e = FLA_FLOAT_PTR( e );
38  float* buff_tol = FLA_FLOAT_PTR( tol );
39  float* buff_thresh = FLA_FLOAT_PTR( thresh );
40 
42  *buff_tolmul,
43  *buff_maxitr,
44  buff_d, inc_d,
45  buff_e, inc_e,
46  buff_tol,
47  buff_thresh );
48 
49  break;
50  }
51 
52  case FLA_DOUBLE:
53  {
54  double* buff_tolmul = FLA_DOUBLE_PTR( tolmul );
55  double* buff_maxitr = FLA_DOUBLE_PTR( maxitr );
56  double* buff_d = FLA_DOUBLE_PTR( d );
57  double* buff_e = FLA_DOUBLE_PTR( e );
58  double* buff_tol = FLA_DOUBLE_PTR( tol );
59  double* buff_thresh = FLA_DOUBLE_PTR( thresh );
60 
62  *buff_tolmul,
63  *buff_maxitr,
64  buff_d, inc_d,
65  buff_e, inc_e,
66  buff_tol,
67  buff_thresh );
68 
69  break;
70  }
71  }
72 
73  return FLA_SUCCESS;
74 }
FLA_Error FLA_Bsvd_compute_tol_thresh_opd(int n_A, double tolmul, double maxitr, double *buff_d, int inc_d, double *buff_e, int inc_e, double *tol, double *thresh)
Definition: FLA_Bsvd_compute_tol_thresh.c:137
FLA_Error FLA_Bsvd_compute_tol_thresh_ops(int n_A, float tolmul, float maxitr, float *buff_d, int inc_d, float *buff_e, int inc_e, float *tol, float *thresh)
Definition: FLA_Bsvd_compute_tol_thresh.c:78

References FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_compute_tol_thresh_ops(), FLA_Obj_datatype(), FLA_Obj_vector_dim(), and FLA_Obj_vector_inc().

◆ FLA_Bsvd_compute_tol_thresh_opd()

FLA_Error FLA_Bsvd_compute_tol_thresh_opd ( int  m_A,
double  tolmul,
double  maxit,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
double *  tol,
double *  thresh 
)
144 {
145  double zero = bl1_d0();
146  double smin;
147  double eps, unfl;
148  double mu;
149  int i;
150 
151  // Query machine epsilon and the safe minimum.
152  eps = FLA_Mach_params_opd( FLA_MACH_EPS );
153  unfl = FLA_Mach_params_opd( FLA_MACH_SFMIN );
154 
155  // Compute tol as the product of machine epsilon and tolmul.
156  *tol = tolmul * eps;
157 
158  // Compute the approximate maximum singular value.
159  // NOT NEEDED unless we're supporting absolute accuracy.
160  //FLA_Bsvd_sinval_find_max( n_A,
161  // buff_d, inc_d,
162  // buff_e, inc_e,
163  // &smax );
164 
165  // Compute the approximate minimum singular value.
166  smin = fabs( *buff_d );
167 
168  // Skip the accumulation of smin if the first element is zero.
169  if ( smin != zero )
170  {
171  mu = smin;
172  for ( i = 1; i < n_A; ++i )
173  {
174  double* epsilon1 = buff_e + (i-1)*inc_e;
175  double* delta2 = buff_d + (i )*inc_d;
176 
177  mu = fabs( *delta2 ) * ( mu / ( mu + fabs( *epsilon1 ) ) );
178  smin = min( smin, mu );
179 
180  // Stop early if we encountered a zero.
181  if ( smin == zero ) break;
182  }
183  }
184 
185  // Compute thresh either in terms of tol or as a function of the
186  // maximum total number of iterations, the problem size, and the
187  // safe minimum.
188  smin = smin / sqrt( ( double ) n_A );
189  *thresh = max( *tol * smin, maxitr * n_A * n_A * unfl );
190 
191  return FLA_SUCCESS;
192 }
int i
Definition: bl1_axmyv2.c:145
double bl1_d0(void)
Definition: bl1_constants.c:118

References bl1_d0(), FLA_Mach_params_opd(), and i.

Referenced by FLA_Bsvd_compute_tol_thresh(), FLA_Bsvd_ext_opd_var1(), FLA_Bsvd_ext_opz_var1(), FLA_Bsvd_v_opd_var1(), FLA_Bsvd_v_opd_var2(), FLA_Bsvd_v_opz_var1(), and FLA_Bsvd_v_opz_var2().

◆ FLA_Bsvd_compute_tol_thresh_ops()

FLA_Error FLA_Bsvd_compute_tol_thresh_ops ( int  m_A,
float  tolmul,
float  maxit,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
float *  tol,
float *  thresh 
)
85 {
86  float zero = bl1_s0();
87  float smin;
88  float eps, unfl;
89  float mu;
90  int i;
91 
92  // Query machine epsilon and the safe minimum.
93  eps = FLA_Mach_params_ops( FLA_MACH_EPS );
94  unfl = FLA_Mach_params_ops( FLA_MACH_SFMIN );
95 
96  // Compute tol as the product of machine epsilon and tolmul.
97  *tol = tolmul * eps;
98 
99  // Compute the approximate maximum singular value.
100  // NOT NEEDED unless we're supporting absolute accuracy.
101  //FLA_Bsvd_sinval_find_max( n_A,
102  // buff_d, inc_d,
103  // buff_e, inc_e,
104  // &smax );
105 
106  // Compute the approximate minimum singular value.
107  smin = fabsf( *buff_d );
108 
109  // Skip the accumulation of smin if the first element is zero.
110  if ( smin != zero )
111  {
112  mu = smin;
113  for ( i = 1; i < n_A; ++i )
114  {
115  float* epsilon1 = buff_e + (i-1)*inc_e;
116  float* delta2 = buff_d + (i )*inc_d;
117 
118  mu = fabsf( *delta2 ) * ( mu / ( mu + fabsf( *epsilon1 ) ) );
119  smin = min( smin, mu );
120 
121  // Stop early if we encountered a zero.
122  if ( smin == zero ) break;
123  }
124  }
125 
126  // Compute thresh either in terms of tol or as a function of the
127  // maximum total number of iterations, the problem size, and the
128  // safe minimum.
129  smin = smin / sqrtf( ( float ) n_A );
130  *thresh = max( *tol * smin, maxitr * n_A * n_A * unfl );
131 
132  return FLA_SUCCESS;
133 }
float bl1_s0(void)
Definition: bl1_constants.c:111

References bl1_s0(), FLA_Mach_params_ops(), and i.

Referenced by FLA_Bsvd_compute_tol_thresh(), FLA_Bsvd_ext_opc_var1(), FLA_Bsvd_ext_ops_var1(), FLA_Bsvd_v_opc_var1(), and FLA_Bsvd_v_ops_var1().

◆ FLA_Bsvd_find_converged()

FLA_Error FLA_Bsvd_find_converged ( FLA_Obj  tol,
FLA_Obj  d,
FLA_Obj  e,
FLA_Obj  sminl 
)
14 {
15  FLA_Datatype datatype;
16  int m_A;
17  int inc_d;
18  int inc_e;
19 
20  datatype = FLA_Obj_datatype( d );
21 
22  m_A = FLA_Obj_vector_dim( d );
23 
24  inc_d = FLA_Obj_vector_inc( d );
25  inc_e = FLA_Obj_vector_inc( e );
26 
27 
28  switch ( datatype )
29  {
30  case FLA_FLOAT:
31  {
32  float* buff_tol = FLA_FLOAT_PTR( tol );
33  float* buff_d = FLA_FLOAT_PTR( d );
34  float* buff_e = FLA_FLOAT_PTR( e );
35  float* buff_sminl = FLA_FLOAT_PTR( sminl );
36 
38  *buff_tol,
39  buff_d, inc_d,
40  buff_e, inc_e,
41  buff_sminl );
42 
43  break;
44  }
45 
46  case FLA_DOUBLE:
47  {
48  double* buff_tol = FLA_DOUBLE_PTR( tol );
49  double* buff_d = FLA_DOUBLE_PTR( d );
50  double* buff_e = FLA_DOUBLE_PTR( e );
51  double* buff_sminl = FLA_DOUBLE_PTR( sminl );
52 
54  *buff_tol,
55  buff_d, inc_d,
56  buff_e, inc_e,
57  buff_sminl );
58 
59  break;
60  }
61  }
62 
63  return FLA_SUCCESS;
64 }
FLA_Error FLA_Bsvd_find_converged_ops(int m_A, float tol, float *buff_d, int inc_d, float *buff_e, int inc_e, float *sminl)
Definition: FLA_Bsvd_find_converged.c:68
FLA_Error FLA_Bsvd_find_converged_opd(int m_A, double tol, double *buff_d, int inc_d, double *buff_e, int inc_e, double *sminl)
Definition: FLA_Bsvd_find_converged.c:117

References FLA_Bsvd_find_converged_opd(), FLA_Bsvd_find_converged_ops(), FLA_Obj_datatype(), FLA_Obj_vector_dim(), and FLA_Obj_vector_inc().

◆ FLA_Bsvd_find_converged_opd()

FLA_Error FLA_Bsvd_find_converged_opd ( int  m_A,
double  tol,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
double *  sminl 
)
122 {
123  double* epsilon_last;
124  double* delta_last;
125  double mu;
126  int i;
127 
128  epsilon_last = buff_e + (m_A-2)*inc_e;
129  delta_last = buff_d + (m_A-1)*inc_d;
130 
131  // Check convergence at the bottom of the matrix first.
132  if ( MAC_Bsvd_sinval_is_converged_opd( tol, *delta_last, *epsilon_last ) )
133  {
134  //*epsilon_last = 0.0;
135  *sminl = 0.0;
136  return m_A - 2;
137  }
138 
139  // If the last element is not yet converged, check interior elements.
140  // Also, accumulate sminl for later use when it comes time to check
141  // the shift.
142 
143  mu = fabs( *buff_d );
144  *sminl = mu;
145 
146  for ( i = 0; i < m_A - 1; ++i )
147  {
148  double* epsilon1 = buff_e + (i )*inc_e;
149  double* delta2 = buff_d + (i+1)*inc_d;
150 
151  // Check convergence of epsilon1 against the value of mu accumulated
152  // so far.
153  if ( MAC_Bsvd_sinval_is_converged_opd( tol, mu, *epsilon1 ) )
154  {
155 //printf( "FLA_Bsvd_sinval_find_converged: Split occurred in col %d\n", i );
156 //printf( "FLA_Bsvd_sinval_find_converged: mu alpha12 = %23.19e %23.19e\n", mu, *epsilon1 );
157 //printf( "FLA_Bsvd_sinval_find_converged: alpha22 = %43.19e\n", *delta2 );
158  //*epsilon1 = 0.0;
159  //return FLA_FAILURE;
160  return i;
161  }
162 
163  // Update mu and sminl.
164  mu = fabs( *delta2 ) * ( mu / ( mu + fabs( *epsilon1 ) ) );
165  *sminl = min( *sminl, mu );
166  }
167 
168  // Return with no convergence found.
169  return FLA_SUCCESS;
170 }

References i.

Referenced by FLA_Bsvd_find_converged(), and FLA_Bsvd_sinval_v_opd_var1().

◆ FLA_Bsvd_find_converged_ops()

FLA_Error FLA_Bsvd_find_converged_ops ( int  m_A,
float  tol,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
float *  sminl 
)
73 {
74  float* epsilon_last;
75  float* delta_last;
76  float mu;
77  int i;
78 
79  epsilon_last = buff_e + (m_A-2)*inc_e;
80  delta_last = buff_d + (m_A-1)*inc_d;
81 
82  // Check convergence at the bottom of the matrix first.
83  if ( MAC_Bsvd_sinval_is_converged_ops( tol, *delta_last, *epsilon_last ) )
84  {
85  *sminl = 0.0F;
86  return m_A - 2;
87  }
88 
89  // If the last element is not yet converged, check interior elements.
90  // Also, accumulate sminl for later use when it comes time to check
91  // the shift.
92 
93  mu = fabsf( *buff_d );
94  *sminl = mu;
95 
96  for ( i = 0; i < m_A - 1; ++i )
97  {
98  float* epsilon1 = buff_e + (i )*inc_e;
99  float* delta2 = buff_d + (i+1)*inc_d;
100 
101  // Check convergence of epsilon1 against the value of mu accumulated
102  // so far.
103  if ( MAC_Bsvd_sinval_is_converged_ops( tol, mu, *epsilon1 ) )
104  {
105  return i;
106  }
107 
108  // Update mu and sminl.
109  mu = fabsf( *delta2 ) * ( mu / ( mu + fabsf( *epsilon1 ) ) );
110  *sminl = min( *sminl, mu );
111  }
112 
113  // Return with no convergence found.
114  return FLA_SUCCESS;
115 }

References i.

Referenced by FLA_Bsvd_find_converged(), and FLA_Bsvd_sinval_v_ops_var1().

◆ FLA_Bsvd_find_max_min()

FLA_Error FLA_Bsvd_find_max_min ( FLA_Obj  d,
FLA_Obj  e,
FLA_Obj  smax,
FLA_Obj  smin 
)

◆ FLA_Bsvd_find_max_min_opd()

FLA_Error FLA_Bsvd_find_max_min_opd ( int  m_A,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
double *  smax,
double *  smin 
)
108 {
109  double smax_cand;
110  double smin_cand;
111  int i;
112 
113  smax_cand = fabs( buff_d[ (m_A-1)*inc_d ] );
114  smin_cand = smax_cand;
115 
116  for ( i = 0; i < m_A - 1; ++i )
117  {
118  double abs_di = fabs( buff_d[ i*inc_d ] );
119  double abs_ei = fabs( buff_e[ i*inc_e ] );
120 
121  // Track the minimum element.
122  smin_cand = min( smin_cand, abs_di );
123 
124  // Track the maximum element.
125  smax_cand = max( smax_cand, abs_di );
126  smax_cand = max( smax_cand, abs_ei );
127  }
128 
129  // Save the results of the search.
130  *smax = smax_cand;
131  *smin = smin_cand;
132 
133  return FLA_SUCCESS;
134 }

References i.

Referenced by FLA_Bsvd_find_max(), and FLA_Bsvd_sinval_v_opd_var1().

◆ FLA_Bsvd_find_max_min_ops()

FLA_Error FLA_Bsvd_find_max_min_ops ( int  m_A,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
float *  smax,
float *  smin 
)
73 {
74  float smax_cand;
75  float smin_cand;
76  int i;
77 
78  smax_cand = fabsf( buff_d[ (m_A-1)*inc_d ] );
79  smin_cand = smax_cand;
80 
81  for ( i = 0; i < m_A - 1; ++i )
82  {
83  float abs_di = fabsf( buff_d[ i*inc_d ] );
84  float abs_ei = fabsf( buff_e[ i*inc_e ] );
85 
86  // Track the minimum element.
87  smin_cand = min( smin_cand, abs_di );
88 
89  // Track the maximum element.
90  smax_cand = max( smax_cand, abs_di );
91  smax_cand = max( smax_cand, abs_ei );
92  }
93 
94  // Save the results of the search.
95  *smax = smax_cand;
96  *smin = smin_cand;
97 
98  return FLA_SUCCESS;
99 }

References i.

Referenced by FLA_Bsvd_find_max(), and FLA_Bsvd_sinval_v_ops_var1().

◆ FLA_Bsvd_find_submatrix_opd()

FLA_Error FLA_Bsvd_find_submatrix_opd ( int  mn_A,
int  ij_begin,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
int *  ijTL,
int *  ijBR 
)
79 {
80  double rzero = bl1_d0();
81  int ij_tl;
82  int ij_br;
83 
84  // Search for the first non-zero superdiagonal element starting at
85  // the index specified by ij_begin.
86  for ( ij_tl = ij_begin; ij_tl < mn_A - 1; ++ij_tl )
87  {
88  double* e1 = buff_e + (ij_tl )*inc_e;
89 
90  // If we find a non-zero element, record it and break out of this
91  // loop.
92  if ( *e1 != rzero )
93  {
94 #ifdef PRINTF
95  printf( "FLA_Bsvd_find_submatrix_opd: found non-zero superdiagonal element\n" );
96  printf( " e[%3d] = %22.19e\n", ij_tl, *e1 );
97 #endif
98  *ijTL = ij_tl;
99  break;
100  }
101  }
102 
103  // If ij_tl was incremented all the way up to mn_A - 1, then we didn't
104  // find any non-zeros.
105  if ( ij_tl == mn_A - 1 )
106  {
107 #ifdef PRINTF
108  printf( "FLA_Bsvd_find_submatrix_opd: no submatrices found.\n" );
109 #endif
110  return FLA_FAILURE;
111  }
112 
113  // If we've gotten this far, then a non-zero superdiagonal element was
114  // found. Now we must walk the remaining portion of the superdiagonal
115  // to find the first zero element, or if one is not found, we simply
116  // use the last element of the superdiagonal.
117  for ( ij_br = ij_tl; ij_br < mn_A - 1; ++ij_br )
118  {
119  double* e1 = buff_e + (ij_br )*inc_e;
120 
121  // If we find a zero element, record it and break out of this
122  // loop.
123  if ( *e1 == rzero )
124  {
125 #ifdef PRINTF
126  printf( "FLA_Bsvd_find_submatrix_opd: found zero superdiagonal element\n" );
127  printf( " e[%3d] = %22.19e\n", ij_br, *e1 );
128 #endif
129  break;
130  }
131  }
132 
133  // If a zero element was found, then ij_br should hold the index of
134  // that element. If a zero element was not found, then ij_br should
135  // hold mn_A - 1. Either way, we save the value and return success.
136  *ijBR = ij_br;
137 
138  return FLA_SUCCESS;
139 }

References bl1_d0().

Referenced by FLA_Bsvd_ext_opd_var1(), FLA_Bsvd_ext_opz_var1(), FLA_Bsvd_v_opd_var1(), FLA_Bsvd_v_opd_var2(), FLA_Bsvd_v_opz_var1(), and FLA_Bsvd_v_opz_var2().

◆ FLA_Bsvd_find_submatrix_ops()

FLA_Error FLA_Bsvd_find_submatrix_ops ( int  mn_A,
int  ij_begin,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
int *  ijTL,
int *  ijBR 
)
20 {
21  float rzero = bl1_s0();
22  int ij_tl;
23  int ij_br;
24 
25  // Search for the first non-zero superdiagonal element starting at
26  // the index specified by ij_begin.
27  for ( ij_tl = ij_begin; ij_tl < mn_A - 1; ++ij_tl )
28  {
29  float* e1 = buff_e + (ij_tl )*inc_e;
30 
31  // If we find a non-zero element, record it and break out of this
32  // loop.
33  if ( *e1 != rzero )
34  {
35  *ijTL = ij_tl;
36  break;
37  }
38  }
39 
40  // If ij_tl was incremented all the way up to mn_A - 1, then we didn't
41  // find any non-zeros.
42  if ( ij_tl == mn_A - 1 )
43  {
44  return FLA_FAILURE;
45  }
46 
47  // If we've gotten this far, then a non-zero superdiagonal element was
48  // found. Now we must walk the remaining portion of the superdiagonal
49  // to find the first zero element, or if one is not found, we simply
50  // use the last element of the superdiagonal.
51  for ( ij_br = ij_tl; ij_br < mn_A - 1; ++ij_br )
52  {
53  float* e1 = buff_e + (ij_br )*inc_e;
54 
55  // If we find a zero element, record it and break out of this
56  // loop.
57  if ( *e1 == rzero )
58  {
59  break;
60  }
61  }
62 
63  // If a zero element was found, then ij_br should hold the index of
64  // that element. If a zero element was not found, then ij_br should
65  // hold mn_A - 1. Either way, we save the value and return success.
66  *ijBR = ij_br;
67 
68  return FLA_SUCCESS;
69 }

References bl1_s0().

Referenced by FLA_Bsvd_ext_opc_var1(), FLA_Bsvd_ext_ops_var1(), FLA_Bsvd_v_opc_var1(), and FLA_Bsvd_v_ops_var1().

◆ FLA_Bsvd_v_opc_var1()

FLA_Error FLA_Bsvd_v_opc_var1 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_H,
int  rs_H,
int  cs_H,
scomplex buff_U,
int  rs_U,
int  cs_U,
scomplex buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)
686 {
687  scomplex one = bl1_c1();
688  float rzero = bl1_s0();
689 
690  int maxitr = 6;
691 
692  float eps;
693  float tolmul;
694  float tol;
695  float thresh;
696 
697  scomplex* G;
698  scomplex* H;
699  float* d1;
700  float* e1;
701  int r_val;
702  int done;
703  int m_GH_sweep_max;
704  int ij_begin;
705  int ijTL, ijBR;
706  int m_A11;
707  int n_iter_perf;
708  int n_UV_apply;
709  int total_deflations;
710  int n_deflations;
711  int n_iter_prev;
712  int n_iter_perf_sweep_max;
713 
714  // Compute some convergence constants.
715  eps = FLA_Mach_params_ops( FLA_MACH_EPS );
716  tolmul = max( 10.0F, min( 100.0F, powf( eps, -0.125F ) ) );
718  tolmul,
719  maxitr,
720  buff_d, inc_d,
721  buff_e, inc_e,
722  &tol,
723  &thresh );
724 
725  // Initialize our completion flag.
726  done = FALSE;
727 
728  // Initialize a counter that holds the maximum number of rows of G
729  // and H that we would need to initialize for the next sweep.
730  m_GH_sweep_max = min_m_n - 1;
731 
732  // Initialize a counter for the total number of iterations performed.
733  n_iter_prev = 0;
734 
735  // Iterate until the matrix has completely deflated.
736  for ( total_deflations = 0; done != TRUE; )
737  {
738 
739  // Initialize G and H to contain only identity rotations.
740  bl1_csetm( m_GH_sweep_max,
741  n_GH,
742  &one,
743  buff_G, rs_G, cs_G );
744  bl1_csetm( m_GH_sweep_max,
745  n_GH,
746  &one,
747  buff_H, rs_H, cs_H );
748 
749  // Keep track of the maximum number of iterations performed in the
750  // current sweep. This is used when applying the sweep's Givens
751  // rotations.
752  n_iter_perf_sweep_max = 0;
753 
754  // Perform a sweep: Move through the matrix and perform a bidiagonal
755  // SVD on each non-zero submatrix that is encountered. During the
756  // first time through, ijTL will be 0 and ijBR will be min_m_n - 1.
757  for ( ij_begin = 0; ij_begin < min_m_n; )
758  {
759  // Search for the first submatrix along the diagonal that is
760  // bounded by zeroes (or endpoints of the matrix). If no
761  // submatrix is found (ie: if the entire superdiagonal is zero
762  // then FLA_FAILURE is returned. This function also inspects
763  // superdiagonal elements for proximity to zero. If a given
764  // element is close enough to zero, then it is deemed
765  // converged and manually set to zero.
766  r_val = FLA_Bsvd_find_submatrix_ops( min_m_n,
767  ij_begin,
768  buff_d, inc_d,
769  buff_e, inc_e,
770  &ijTL,
771  &ijBR );
772 
773  // Verify that a submatrix was found. If one was not found,
774  // then we are done with the current sweep. Furthermore, if
775  // a submatrix was not found AND we began our search at the
776  // beginning of the matrix (ie: ij_begin == 0), then the
777  // matrix has completely deflated and so we are done with
778  // Francis step iteration.
779  if ( r_val == FLA_FAILURE )
780  {
781  if ( ij_begin == 0 )
782  {
783  done = TRUE;
784  }
785 
786  // Break out of the current sweep so we can apply the last
787  // remaining Givens rotations.
788  break;
789  }
790 
791  // If we got this far, then:
792  // (a) ijTL refers to the index of the first non-zero
793  // superdiagonal along the diagonal, and
794  // (b) ijBR refers to either:
795  // - the first zero element that occurs after ijTL, or
796  // - the the last diagonal element.
797  // Note that ijTL and ijBR also correspond to the first and
798  // last diagonal elements of the submatrix of interest. Thus,
799  // we may compute the dimension of this submatrix as:
800  m_A11 = ijBR - ijTL + 1;
801 
802  // Adjust ij_begin, which gets us ready for the next submatrix
803  // search in the current sweep.
804  ij_begin = ijBR + 1;
805 
806  // Index to the submatrices upon which we will operate.
807  d1 = buff_d + ijTL * inc_d;
808  e1 = buff_e + ijTL * inc_e;
809  G = buff_G + ijTL * rs_G;
810  H = buff_H + ijTL * rs_H;
811 
812  // Search for a batch of singular values, recursing on deflated
813  // subproblems whenever a split occurs. Iteration continues as
814  // long as
815  // (a) there is still matrix left to operate on, and
816  // (b) the number of iterations performed in this batch is
817  // less than n_G.
818  // If/when either of the two above conditions fails to hold,
819  // the function returns.
820  n_deflations = FLA_Bsvd_iteracc_v_ops_var1( m_A11,
821  n_GH,
822  ijTL,
823  tol,
824  thresh,
825  d1, inc_d,
826  e1, inc_e,
827  G, rs_G, cs_G,
828  H, rs_H, cs_H,
829  &n_iter_perf );
830 
831  // Record the number of deflations that were observed.
832  total_deflations += n_deflations;
833 
834  // Update the maximum number of iterations performed in the
835  // current sweep.
836  n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );
837 
838  // Store the most recent value of ijBR in m_G_sweep_max.
839  // When the sweep is done, this value will contain the minimum
840  // number of rows of G and H we can apply and safely include all
841  // non-identity rotations that were computed during the
842  // singular value searches.
843  m_GH_sweep_max = ijBR;
844 
845  // Make sure we haven't exceeded our maximum iteration count.
846  if ( n_iter_prev >= n_iter_max * min_m_n )
847  {
848  FLA_Abort();
849  //return FLA_FAILURE;
850  }
851  }
852 
853  // The sweep is complete. Now we must apply the Givens rotations
854  // that were accumulated during the sweep.
855 
856  // Recall that the number of columns of U and V to which we apply
857  // rotations is one more than the number of rotations.
858  n_UV_apply = m_GH_sweep_max + 1;
859 
860  // Apply the Givens rotations. Note that we only apply k sets of
861  // rotations, where k = n_iter_perf_sweep_max. Also note that we only
862  // apply to n_UV_apply columns of U and V since this is the most we
863  // need to touch given the most recent value stored to m_GH_sweep_max.
864  //FLA_Apply_G_rf_blc_var5( n_iter_perf_sweep_max,
865  FLA_Apply_G_rf_blc_var3( n_iter_perf_sweep_max,
866  //FLA_Apply_G_rf_blz_var9( n_iter_perf_sweep_max,
867  //FLA_Apply_G_rf_blz_var6( n_iter_perf_sweep_max,
868  m_U,
869  n_UV_apply,
870  buff_G, rs_G, cs_G,
871  buff_U, rs_U, cs_U,
872  b_alg );
873  //FLA_Apply_G_rf_blc_var5( n_iter_perf_sweep_max,
874  FLA_Apply_G_rf_blc_var3( n_iter_perf_sweep_max,
875  //FLA_Apply_G_rf_blz_var9( n_iter_perf_sweep_max,
876  //FLA_Apply_G_rf_blz_var6( n_iter_perf_sweep_max,
877  m_V,
878  n_UV_apply,
879  buff_H, rs_H, cs_H,
880  buff_V, rs_V, cs_V,
881  b_alg );
882 
883  // Increment the total number of iterations previously performed.
884  n_iter_prev += n_iter_perf_sweep_max;
885  }
886 
887  // Make all the singular values positive.
888  {
889  int i;
890  float minus_one = bl1_sm1();
891 
892  for ( i = 0; i < min_m_n; ++i )
893  {
894  if ( buff_d[ (i )*inc_d ] < rzero )
895  {
896  buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ];
897 
898  // Scale the right singular vectors.
900  m_V,
901  &minus_one,
902  buff_V + (i )*cs_V, rs_V );
903  }
904  }
905  }
906 
907  return FLA_SUCCESS;
908 }
FLA_Error FLA_Apply_G_rf_blc_var3(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var3.c:157
FLA_Error FLA_Bsvd_find_submatrix_ops(int mn_A, int ij_begin, float *buff_d, int inc_d, float *buff_e, int inc_e, int *ijTL, int *ijBR)
Definition: FLA_Bsvd_find_submatrix.c:14
FLA_Error FLA_Bsvd_iteracc_v_ops_var1(int m_A, int n_GH, int ijTL, float tol, float thresh, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, int *n_iter_perf)
Definition: FLA_Bsvd_iteracc_v_opt_var1.c:13
void FLA_Abort(void)
Definition: FLA_Error.c:248
void bl1_csscalv(conj1_t conj, int n, float *alpha, scomplex *x, int incx)
Definition: bl1_scalv.c:35
scomplex bl1_c1(void)
Definition: bl1_constants.c:61
float bl1_sm1(void)
Definition: bl1_constants.c:175
void bl1_csetm(int m, int n, scomplex *sigma, scomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:61
@ BLIS1_NO_CONJUGATE
Definition: blis_type_defs.h:81
Definition: blis_type_defs.h:133

References bl1_c1(), bl1_csetm(), bl1_csscalv(), bl1_s0(), bl1_sm1(), BLIS1_NO_CONJUGATE, FLA_Abort(), FLA_Apply_G_rf_blc_var3(), FLA_Bsvd_compute_tol_thresh_ops(), FLA_Bsvd_find_submatrix_ops(), FLA_Bsvd_iteracc_v_ops_var1(), FLA_Mach_params_ops(), and i.

Referenced by FLA_Bsvd_v_opt_var1().

◆ FLA_Bsvd_v_opc_var2()

FLA_Error FLA_Bsvd_v_opc_var2 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_H,
int  rs_H,
int  cs_H,
float *  buff_RG,
int  rs_RG,
int  cs_RG,
float *  buff_RH,
int  rs_RH,
int  cs_RH,
scomplex buff_W,
int  rs_W,
int  cs_W,
scomplex buff_U,
int  rs_U,
int  cs_U,
scomplex buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)
544 {
545  FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
546 
547  return FLA_SUCCESS;
548 }

Referenced by FLA_Bsvd_v_opt_var2().

◆ FLA_Bsvd_v_opd_var1()

FLA_Error FLA_Bsvd_v_opd_var1 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_H,
int  rs_H,
int  cs_H,
double *  buff_U,
int  rs_U,
int  cs_U,
double *  buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)
409 {
410  dcomplex one = bl1_z1();
411  double rzero = bl1_d0();
412 
413  int maxitr = 6;
414 
415  double eps;
416  double tolmul;
417  double tol;
418  double thresh;
419 
420  dcomplex* G;
421  dcomplex* H;
422  double* d1;
423  double* e1;
424  int r_val;
425  int done;
426  int m_GH_sweep_max;
427  int ij_begin;
428  int ijTL, ijBR;
429  int m_A11;
430  int n_iter_perf;
431  int n_UV_apply;
432  int total_deflations;
433  int n_deflations;
434  int n_iter_prev;
435  int n_iter_perf_sweep_max;
436 
437  // Compute some convergence constants.
438  eps = FLA_Mach_params_opd( FLA_MACH_EPS );
439  tolmul = max( 10.0, min( 100.0, pow( eps, -0.125 ) ) );
441  tolmul,
442  maxitr,
443  buff_d, inc_d,
444  buff_e, inc_e,
445  &tol,
446  &thresh );
447 #ifdef PRINTF
448  printf( "FLA_Bsvd_v_opd_var1: tolmul = %12.6e\n", tolmul );
449  printf( "FLA_Bsvd_v_opd_var1: tol = %12.6e\n", tol );
450  printf( "FLA_Bsvd_v_opd_var1: thresh = %12.6e\n", thresh );
451 #endif
452 
453  // Initialize our completion flag.
454  done = FALSE;
455 
456  // Initialize a counter that holds the maximum number of rows of G
457  // and H that we would need to initialize for the next sweep.
458  m_GH_sweep_max = min_m_n - 1;
459 
460  // Initialize a counter for the total number of iterations performed.
461  n_iter_prev = 0;
462 
463  // Iterate until the matrix has completely deflated.
464  for ( total_deflations = 0; done != TRUE; )
465  {
466 
467  // Initialize G and H to contain only identity rotations.
468  bl1_zsetm( m_GH_sweep_max,
469  n_GH,
470  &one,
471  buff_G, rs_G, cs_G );
472  bl1_zsetm( m_GH_sweep_max,
473  n_GH,
474  &one,
475  buff_H, rs_H, cs_H );
476 
477  // Keep track of the maximum number of iterations performed in the
478  // current sweep. This is used when applying the sweep's Givens
479  // rotations.
480  n_iter_perf_sweep_max = 0;
481 
482  // Perform a sweep: Move through the matrix and perform a bidiagonal
483  // SVD on each non-zero submatrix that is encountered. During the
484  // first time through, ijTL will be 0 and ijBR will be min_m_n - 1.
485  for ( ij_begin = 0; ij_begin < min_m_n; )
486  {
487 
488 #ifdef PRINTF
489  if ( ij_begin == 0 )
490  printf( "FLA_Bsvd_v_opd_var1: beginning new sweep (ij_begin = %d)\n", ij_begin );
491 #endif
492 
493  // Search for the first submatrix along the diagonal that is
494  // bounded by zeroes (or endpoints of the matrix). If no
495  // submatrix is found (ie: if the entire superdiagonal is zero
496  // then FLA_FAILURE is returned. This function also inspects
497  // superdiagonal elements for proximity to zero. If a given
498  // element is close enough to zero, then it is deemed
499  // converged and manually set to zero.
500  r_val = FLA_Bsvd_find_submatrix_opd( min_m_n,
501  ij_begin,
502  buff_d, inc_d,
503  buff_e, inc_e,
504  &ijTL,
505  &ijBR );
506 
507  // Verify that a submatrix was found. If one was not found,
508  // then we are done with the current sweep. Furthermore, if
509  // a submatrix was not found AND we began our search at the
510  // beginning of the matrix (ie: ij_begin == 0), then the
511  // matrix has completely deflated and so we are done with
512  // Francis step iteration.
513  if ( r_val == FLA_FAILURE )
514  {
515  if ( ij_begin == 0 )
516  {
517 #ifdef PRINTF
518  printf( "FLA_Bsvd_v_opd_var1: superdiagonal is completely zero.\n" );
519  printf( "FLA_Bsvd_v_opd_var1: Francis iteration is done!\n" );
520 #endif
521  done = TRUE;
522  }
523 
524  // Break out of the current sweep so we can apply the last
525  // remaining Givens rotations.
526  break;
527  }
528 
529  // If we got this far, then:
530  // (a) ijTL refers to the index of the first non-zero
531  // superdiagonal along the diagonal, and
532  // (b) ijBR refers to either:
533  // - the first zero element that occurs after ijTL, or
534  // - the the last diagonal element.
535  // Note that ijTL and ijBR also correspond to the first and
536  // last diagonal elements of the submatrix of interest. Thus,
537  // we may compute the dimension of this submatrix as:
538  m_A11 = ijBR - ijTL + 1;
539 
540 #ifdef PRINTF
541  printf( "FLA_Bsvd_v_opd_var1: ij_begin = %d\n", ij_begin );
542  printf( "FLA_Bsvd_v_opd_var1: ijTL = %d\n", ijTL );
543  printf( "FLA_Bsvd_v_opd_var1: ijBR = %d\n", ijBR );
544  printf( "FLA_Bsvd_v_opd_var1: m_A11 = %d\n", m_A11 );
545 #endif
546 
547  // Adjust ij_begin, which gets us ready for the next submatrix
548  // search in the current sweep.
549  ij_begin = ijBR + 1;
550 
551  // Index to the submatrices upon which we will operate.
552  d1 = buff_d + ijTL * inc_d;
553  e1 = buff_e + ijTL * inc_e;
554  G = buff_G + ijTL * rs_G;
555  H = buff_H + ijTL * rs_H;
556 
557  // Search for a batch of singular values, recursing on deflated
558  // subproblems whenever a split occurs. Iteration continues as
559  // long as
560  // (a) there is still matrix left to operate on, and
561  // (b) the number of iterations performed in this batch is
562  // less than n_G.
563  // If/when either of the two above conditions fails to hold,
564  // the function returns.
565  n_deflations = FLA_Bsvd_iteracc_v_opd_var1( m_A11,
566  n_GH,
567  ijTL,
568  tol,
569  thresh,
570  d1, inc_d,
571  e1, inc_e,
572  G, rs_G, cs_G,
573  H, rs_H, cs_H,
574  &n_iter_perf );
575 
576  // Record the number of deflations that were observed.
577  total_deflations += n_deflations;
578 
579  // Update the maximum number of iterations performed in the
580  // current sweep.
581  n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );
582 
583 #ifdef PRINTF
584  printf( "FLA_Bsvd_v_opd_var1: deflations observed = %d\n", n_deflations );
585  printf( "FLA_Bsvd_v_opd_var1: total deflations observed = %d\n", total_deflations );
586  printf( "FLA_Bsvd_v_opd_var1: num iterations performed = %d\n", n_iter_perf );
587 #endif
588 
589  // Store the most recent value of ijBR in m_G_sweep_max.
590  // When the sweep is done, this value will contain the minimum
591  // number of rows of G and H we can apply and safely include all
592  // non-identity rotations that were computed during the
593  // singular value searches.
594  m_GH_sweep_max = ijBR;
595 
596  // Make sure we haven't exceeded our maximum iteration count.
597  if ( n_iter_prev >= n_iter_max * min_m_n )
598  {
599 #ifdef PRINTF
600  printf( "FLA_Bsvd_v_opd_var1: reached maximum total number of iterations: %d\n", n_iter_prev );
601 #endif
602  FLA_Abort();
603  //return FLA_FAILURE;
604  }
605  }
606 
607  // The sweep is complete. Now we must apply the Givens rotations
608  // that were accumulated during the sweep.
609 
610  // Recall that the number of columns of U and V to which we apply
611  // rotations is one more than the number of rotations.
612  n_UV_apply = m_GH_sweep_max + 1;
613 
614 #ifdef PRINTF
615  printf( "FLA_Bsvd_v_opd_var1: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
616  printf( "FLA_Bsvd_v_opd_var1: m_U = %d\n", m_U );
617  printf( "FLA_Bsvd_v_opd_var1: m_V = %d\n", m_V );
618  printf( "FLA_Bsvd_v_opd_var1: napp= %d\n", n_UV_apply );
619 #endif
620  // Apply the Givens rotations. Note that we only apply k sets of
621  // rotations, where k = n_iter_perf_sweep_max. Also note that we only
622  // apply to n_UV_apply columns of U and V since this is the most we
623  // need to touch given the most recent value stored to m_GH_sweep_max.
624  //FLA_Apply_G_rf_bld_var5( n_iter_perf_sweep_max,
625  FLA_Apply_G_rf_bld_var3( n_iter_perf_sweep_max,
626  //FLA_Apply_G_rf_bld_var9( n_iter_perf_sweep_max,
627  //FLA_Apply_G_rf_bld_var6( n_iter_perf_sweep_max,
628  m_U,
629  n_UV_apply,
630  buff_G, rs_G, cs_G,
631  buff_U, rs_U, cs_U,
632  b_alg );
633  //FLA_Apply_G_rf_bld_var5( n_iter_perf_sweep_max,
634  FLA_Apply_G_rf_bld_var3( n_iter_perf_sweep_max,
635  //FLA_Apply_G_rf_bld_var9( n_iter_perf_sweep_max,
636  //FLA_Apply_G_rf_bld_var6( n_iter_perf_sweep_max,
637  m_V,
638  n_UV_apply,
639  buff_H, rs_H, cs_H,
640  buff_V, rs_V, cs_V,
641  b_alg );
642 
643  // Increment the total number of iterations previously performed.
644  n_iter_prev += n_iter_perf_sweep_max;
645 
646 #ifdef PRINTF
647  printf( "FLA_Bsvd_v_opd_var1: total number of iterations performed: %d\n", n_iter_prev );
648 #endif
649  }
650 
651  // Make all the singular values positive.
652  {
653  int i;
654  double minus_one = bl1_dm1();
655 
656  for ( i = 0; i < min_m_n; ++i )
657  {
658  if ( buff_d[ (i )*inc_d ] < rzero )
659  {
660  buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ];
661 
662  // Scale the right singular vectors.
664  m_V,
665  &minus_one,
666  buff_V + (i )*cs_V, rs_V );
667  }
668  }
669  }
670 
671  return n_iter_prev;
672 }
FLA_Error FLA_Apply_G_rf_bld_var3(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var3.c:128
FLA_Error FLA_Bsvd_find_submatrix_opd(int mn_A, int ij_begin, double *buff_d, int inc_d, double *buff_e, int inc_e, int *ijTL, int *ijBR)
Definition: FLA_Bsvd_find_submatrix.c:73
FLA_Error FLA_Bsvd_iteracc_v_opd_var1(int m_A, int n_GH, int ijTL, double tol, double thresh, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, int *n_iter_perf)
Definition: FLA_Bsvd_iteracc_v_opt_var1.c:176
void bl1_dscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition: bl1_scalv.c:24
double bl1_dm1(void)
Definition: bl1_constants.c:182
dcomplex bl1_z1(void)
Definition: bl1_constants.c:69
void bl1_zsetm(int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:78
Definition: blis_type_defs.h:138

References bl1_d0(), bl1_dm1(), bl1_dscalv(), bl1_z1(), bl1_zsetm(), BLIS1_NO_CONJUGATE, FLA_Abort(), FLA_Apply_G_rf_bld_var3(), FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_find_submatrix_opd(), FLA_Bsvd_iteracc_v_opd_var1(), FLA_Mach_params_opd(), and i.

Referenced by FLA_Bsvd_v_opt_var1().

◆ FLA_Bsvd_v_opd_var2()

FLA_Error FLA_Bsvd_v_opd_var2 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_H,
int  rs_H,
int  cs_H,
double *  buff_RG,
int  rs_RG,
int  cs_RG,
double *  buff_RH,
int  rs_RH,
int  cs_RH,
double *  buff_W,
int  rs_W,
int  cs_W,
double *  buff_U,
int  rs_U,
int  cs_U,
double *  buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)
229 {
230  dcomplex one = bl1_z1();
231  double rone = bl1_d1();
232  double rzero = bl1_d0();
233 
234  int maxitr = 6;
235 
236  double eps;
237  double tolmul;
238  double tol;
239  double thresh;
240 
241  dcomplex* G;
242  dcomplex* H;
243  double* d1;
244  double* e1;
245  int r_val;
246  int done;
247  int m_GH_sweep_max;
248  int ij_begin;
249  int ijTL, ijBR;
250  int m_A11;
251  int n_iter_perf;
252  int n_UV_apply;
253  int total_deflations;
254  int n_deflations;
255  int n_iter_prev;
256  int n_iter_perf_sweep_max;
257 
258  // Compute some convergence constants.
259  eps = FLA_Mach_params_opd( FLA_MACH_EPS );
260  tolmul = max( 10.0, min( 100.0, pow( eps, -0.125 ) ) );
262  tolmul,
263  maxitr,
264  buff_d, inc_d,
265  buff_e, inc_e,
266  &tol,
267  &thresh );
268 
269  // Initialize our completion flag.
270  done = FALSE;
271 
272  // Initialize a counter that holds the maximum number of rows of G
273  // and H that we would need to initialize for the next sweep.
274  m_GH_sweep_max = min_m_n - 1;
275 
276  // Initialize a counter for the total number of iterations performed.
277  n_iter_prev = 0;
278 
279  // Initialize RG and RH to identity.
280  bl1_dident( min_m_n,
281  buff_RG, rs_RG, cs_RG );
282  bl1_dident( min_m_n,
283  buff_RH, rs_RH, cs_RH );
284 
285  // Iterate until the matrix has completely deflated.
286  for ( total_deflations = 0; done != TRUE; )
287  {
288 
289  // Initialize G and H to contain only identity rotations.
290  bl1_zsetm( m_GH_sweep_max,
291  n_GH,
292  &one,
293  buff_G, rs_G, cs_G );
294  bl1_zsetm( m_GH_sweep_max,
295  n_GH,
296  &one,
297  buff_H, rs_H, cs_H );
298 
299  // Keep track of the maximum number of iterations performed in the
300  // current sweep. This is used when applying the sweep's Givens
301  // rotations.
302  n_iter_perf_sweep_max = 0;
303 
304  // Perform a sweep: Move through the matrix and perform a bidiagonal
305  // SVD on each non-zero submatrix that is encountered. During the
306  // first time through, ijTL will be 0 and ijBR will be min_m_n - 1.
307  for ( ij_begin = 0; ij_begin < min_m_n; )
308  {
309 
310 #ifdef PRINTF
311 if ( ij_begin == 0 )
312 printf( "FLA_Bsvd_v_opd_var2: beginning new sweep (ij_begin = %d)\n", ij_begin );
313 #endif
314 
315  // Search for the first submatrix along the diagonal that is
316  // bounded by zeroes (or endpoints of the matrix). If no
317  // submatrix is found (ie: if the entire superdiagonal is zero
318  // then FLA_FAILURE is returned. This function also inspects
319  // superdiagonal elements for proximity to zero. If a given
320  // element is close enough to zero, then it is deemed
321  // converged and manually set to zero.
322  r_val = FLA_Bsvd_find_submatrix_opd( min_m_n,
323  ij_begin,
324  buff_d, inc_d,
325  buff_e, inc_e,
326  &ijTL,
327  &ijBR );
328 
329  // Verify that a submatrix was found. If one was not found,
330  // then we are done with the current sweep. Furthermore, if
331  // a submatrix was not found AND we began our search at the
332  // beginning of the matrix (ie: ij_begin == 0), then the
333  // matrix has completely deflated and so we are done with
334  // Francis step iteration.
335  if ( r_val == FLA_FAILURE )
336  {
337  if ( ij_begin == 0 )
338  {
339 #ifdef PRINTF
340 printf( "FLA_Bsvd_v_opd_var2: superdiagonal is completely zero.\n" );
341 printf( "FLA_Bsvd_v_opd_var2: Francis iteration is done!\n" );
342 #endif
343  done = TRUE;
344  }
345 
346  // Break out of the current sweep so we can apply the last
347  // remaining Givens rotations.
348  break;
349  }
350 
351  // If we got this far, then:
352  // (a) ijTL refers to the index of the first non-zero
353  // superdiagonal along the diagonal, and
354  // (b) ijBR refers to either:
355  // - the first zero element that occurs after ijTL, or
356  // - the the last diagonal element.
357  // Note that ijTL and ijBR also correspond to the first and
358  // last diagonal elements of the submatrix of interest. Thus,
359  // we may compute the dimension of this submatrix as:
360  m_A11 = ijBR - ijTL + 1;
361 
362 #ifdef PRINTF
363 printf( "FLA_Bsvd_v_opd_var2: ij_begin = %d\n", ij_begin );
364 printf( "FLA_Bsvd_v_opd_var2: ijTL = %d\n", ijTL );
365 printf( "FLA_Bsvd_v_opd_var2: ijBR = %d\n", ijBR );
366 printf( "FLA_Bsvd_v_opd_var2: m_A11 = %d\n", m_A11 );
367 #endif
368 
369  // Adjust ij_begin, which gets us ready for the next submatrix
370  // search in the current sweep.
371  ij_begin = ijBR + 1;
372 
373  // Index to the submatrices upon which we will operate.
374  d1 = buff_d + ijTL * inc_d;
375  e1 = buff_e + ijTL * inc_e;
376  G = buff_G + ijTL * rs_G;
377  H = buff_H + ijTL * rs_H;
378 
379  // Search for a batch of singular values, recursing on deflated
380  // subproblems whenever possible. A new singular value search is
381  // performed as long as
382  // (a) there is still matrix left to operate on, and
383  // (b) the number of iterations performed in this batch is
384  // less than n_G.
385  // If/when either of the two above conditions fails to hold,
386  // the function returns.
387  n_deflations = FLA_Bsvd_iteracc_v_opd_var1( m_A11,
388  n_GH,
389  ijTL,
390  tol,
391  thresh,
392  d1, inc_d,
393  e1, inc_e,
394  G, rs_G, cs_G,
395  H, rs_H, cs_H,
396  &n_iter_perf );
397 
398  // Record the number of deflations that occurred.
399  total_deflations += n_deflations;
400 
401  // Update the maximum number of iterations performed in the
402  // current sweep.
403  n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );
404 
405 #ifdef PRINTF
406 printf( "FLA_Bsvd_v_opd_var2: deflations observed = %d\n", n_deflations );
407 printf( "FLA_Bsvd_v_opd_var2: total deflations observed = %d\n", total_deflations );
408 printf( "FLA_Bsvd_v_opd_var2: num iterations performed = %d\n", n_iter_perf );
409 #endif
410 
411  // Store the most recent value of ijBR in m_G_sweep_max.
412  // When the sweep is done, this value will contain the minimum
413  // number of rows of G and H we can apply and safely include all
414  // non-identity rotations that were computed during the
415  // singular value searches.
416  m_GH_sweep_max = ijBR;
417 
418  }
419 
420  // The sweep is complete. Now we must apply the Givens rotations
421  // that were accumulated during the sweep.
422 
423  // Recall that the number of columns of U and V to which we apply
424  // rotations is one more than the number of rotations.
425  n_UV_apply = m_GH_sweep_max + 1;
426 
427 
428 #ifdef PRINTF
429 printf( "FLA_Bsvd_v_opd_var2: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
430 printf( "FLA_Bsvd_v_opd_var2: m_U = %d\n", m_U );
431 printf( "FLA_Bsvd_v_opd_var2: m_V = %d\n", m_V );
432 printf( "FLA_Bsvd_v_opd_var2: napp= %d\n", n_UV_apply );
433 #endif
434 
435  // Apply the Givens rotations. Note that we only apply k sets of
436  // rotations, where k = n_iter_perf_sweep_max. Also note that we only
437  // apply to n_UV_apply columns of U and V since this is the most we
438  // need to touch given the most recent value stored to m_GH_sweep_max.
439  //FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max,
440  FLA_Apply_G_rf_bld_var3b( n_iter_perf_sweep_max,
441  //FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max,
442  //FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max,
443  min_m_n,
444  n_UV_apply,
445  n_iter_prev,
446  buff_G, rs_G, cs_G,
447  buff_RG, rs_RG, cs_RG,
448  b_alg );
449  //FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max,
450  FLA_Apply_G_rf_bld_var3b( n_iter_perf_sweep_max,
451  //FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max,
452  //FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max,
453  min_m_n,
454  n_UV_apply,
455  n_iter_prev,
456  buff_H, rs_H, cs_H,
457  buff_RH, rs_RH, cs_RH,
458  b_alg );
459 
460  // Increment the total number of iterations previously performed.
461  n_iter_prev += n_iter_perf_sweep_max;
462 
463 #ifdef PRINTF
464 printf( "FLA_Bsvd_v_opd_var2: total number of iterations performed: %d\n", n_iter_prev );
465 #endif
466  }
467 
468  // Copy the contents of Q to temporary storage.
470  m_U,
471  m_V,
472  buff_U, rs_U, cs_U,
473  buff_W, rs_W, cs_W );
474 // W needs to be max_m_n-by-min_m_n!!!!!!!!!!!!!!!
475 
476  // Multiply U by R, overwriting U.
479  m_U,
480  m_V,
481  m_V,
482  &rone,
483  ( double* )buff_W, rs_W, cs_W,
484  buff_RG, rs_RG, cs_RG,
485  &rzero,
486  ( double* )buff_U, rs_U, cs_U );
487 
489  m_V,
490  m_V,
491  buff_V, rs_V, cs_V,
492  buff_W, rs_W, cs_W );
493 
494  // Multiply V by R, overwriting V.
497  m_V,
498  m_V,
499  m_V,
500  &rone,
501  ( double* )buff_W, rs_W, cs_W,
502  buff_RH, rs_RH, cs_RH,
503  &rzero,
504  ( double* )buff_V, rs_V, cs_V );
505 
506  // Make all the singular values positive.
507  {
508  int i;
509  double minus_one = bl1_dm1();
510 
511  for ( i = 0; i < min_m_n; ++i )
512  {
513  if ( buff_d[ (i )*inc_d ] < rzero )
514  {
515  buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ];
516 
517  // Scale the right singular vectors.
519  m_V,
520  &minus_one,
521  buff_V + (i )*cs_V, rs_V );
522  }
523  }
524  }
525 
526  return n_iter_prev;
527 }
FLA_Error FLA_Apply_G_rf_bld_var3b(int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var3b.c:135
void bl1_dcopymt(trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
Definition: bl1_copymt.c:148
void bl1_dgemm(trans1_t transa, trans1_t transb, int m, int k, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs)
Definition: bl1_gemm.c:274
void bl1_dident(int m, double *a, int a_rs, int a_cs)
Definition: bl1_ident.c:32
double bl1_d1(void)
Definition: bl1_constants.c:54
@ BLIS1_NO_TRANSPOSE
Definition: blis_type_defs.h:54

References bl1_d0(), bl1_d1(), bl1_dcopymt(), bl1_dgemm(), bl1_dident(), bl1_dm1(), bl1_dscalv(), bl1_z1(), bl1_zsetm(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_Apply_G_rf_bld_var3b(), FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_find_submatrix_opd(), FLA_Bsvd_iteracc_v_opd_var1(), FLA_Mach_params_opd(), and i.

Referenced by FLA_Bsvd_v_opt_var2().

◆ FLA_Bsvd_v_ops_var1()

FLA_Error FLA_Bsvd_v_ops_var1 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_H,
int  rs_H,
int  cs_H,
float *  buff_U,
int  rs_U,
int  cs_U,
float *  buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)
169 {
170  scomplex one = bl1_c1();
171  float rzero = bl1_s0();
172 
173  int maxitr = 6;
174 
175  float eps;
176  float tolmul;
177  float tol;
178  float thresh;
179 
180  scomplex* G;
181  scomplex* H;
182  float* d1;
183  float* e1;
184  int r_val;
185  int done;
186  int m_GH_sweep_max;
187  int ij_begin;
188  int ijTL, ijBR;
189  int m_A11;
190  int n_iter_perf;
191  int n_UV_apply;
192  int total_deflations;
193  int n_deflations;
194  int n_iter_prev;
195  int n_iter_perf_sweep_max;
196 
197  // Compute some convergence constants.
198  eps = FLA_Mach_params_ops( FLA_MACH_EPS );
199  tolmul = max( 10.0F, min( 100.0F, powf( eps, -0.125F ) ) );
201  tolmul,
202  maxitr,
203  buff_d, inc_d,
204  buff_e, inc_e,
205  &tol,
206  &thresh );
207 
208  // Initialize our completion flag.
209  done = FALSE;
210 
211  // Initialize a counter that holds the maximum number of rows of G
212  // and H that we would need to initialize for the next sweep.
213  m_GH_sweep_max = min_m_n - 1;
214 
215  // Initialize a counter for the total number of iterations performed.
216  n_iter_prev = 0;
217 
218  // Iterate until the matrix has completely deflated.
219  for ( total_deflations = 0; done != TRUE; )
220  {
221 
222  // Initialize G and H to contain only identity rotations.
223  bl1_csetm( m_GH_sweep_max,
224  n_GH,
225  &one,
226  buff_G, rs_G, cs_G );
227  bl1_csetm( m_GH_sweep_max,
228  n_GH,
229  &one,
230  buff_H, rs_H, cs_H );
231 
232  // Keep track of the maximum number of iterations performed in the
233  // current sweep. This is used when applying the sweep's Givens
234  // rotations.
235  n_iter_perf_sweep_max = 0;
236 
237  // Perform a sweep: Move through the matrix and perform a bidiagonal
238  // SVD on each non-zero submatrix that is encountered. During the
239  // first time through, ijTL will be 0 and ijBR will be min_m_n - 1.
240  for ( ij_begin = 0; ij_begin < min_m_n; )
241  {
242 
243  // Search for the first submatrix along the diagonal that is
244  // bounded by zeroes (or endpoints of the matrix). If no
245  // submatrix is found (ie: if the entire superdiagonal is zero
246  // then FLA_FAILURE is returned. This function also inspects
247  // superdiagonal elements for proximity to zero. If a given
248  // element is close enough to zero, then it is deemed
249  // converged and manually set to zero.
250  r_val = FLA_Bsvd_find_submatrix_ops( min_m_n,
251  ij_begin,
252  buff_d, inc_d,
253  buff_e, inc_e,
254  &ijTL,
255  &ijBR );
256 
257  // Verify that a submatrix was found. If one was not found,
258  // then we are done with the current sweep. Furthermore, if
259  // a submatrix was not found AND we began our search at the
260  // beginning of the matrix (ie: ij_begin == 0), then the
261  // matrix has completely deflated and so we are done with
262  // Francis step iteration.
263  if ( r_val == FLA_FAILURE )
264  {
265  if ( ij_begin == 0 )
266  {
267  done = TRUE;
268  }
269 
270  // Break out of the current sweep so we can apply the last
271  // remaining Givens rotations.
272  break;
273  }
274 
275  // If we got this far, then:
276  // (a) ijTL refers to the index of the first non-zero
277  // superdiagonal along the diagonal, and
278  // (b) ijBR refers to either:
279  // - the first zero element that occurs after ijTL, or
280  // - the the last diagonal element.
281  // Note that ijTL and ijBR also correspond to the first and
282  // last diagonal elements of the submatrix of interest. Thus,
283  // we may compute the dimension of this submatrix as:
284  m_A11 = ijBR - ijTL + 1;
285 
286  // Adjust ij_begin, which gets us ready for the next submatrix
287  // search in the current sweep.
288  ij_begin = ijBR + 1;
289 
290  // Index to the submatrices upon which we will operate.
291  d1 = buff_d + ijTL * inc_d;
292  e1 = buff_e + ijTL * inc_e;
293  G = buff_G + ijTL * rs_G;
294  H = buff_H + ijTL * rs_H;
295 
296  // Search for a batch of singular values, recursing on deflated
297  // subproblems whenever a split occurs. Iteration continues as
298  // long as
299  // (a) there is still matrix left to operate on, and
300  // (b) the number of iterations performed in this batch is
301  // less than n_G.
302  // If/when either of the two above conditions fails to hold,
303  // the function returns.
304  n_deflations = FLA_Bsvd_iteracc_v_ops_var1( m_A11,
305  n_GH,
306  ijTL,
307  tol,
308  thresh,
309  d1, inc_d,
310  e1, inc_e,
311  G, rs_G, cs_G,
312  H, rs_H, cs_H,
313  &n_iter_perf );
314 
315  // Record the number of deflations that were observed.
316  total_deflations += n_deflations;
317 
318  // Update the maximum number of iterations performed in the
319  // current sweep.
320  n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );
321 
322  // Store the most recent value of ijBR in m_G_sweep_max.
323  // When the sweep is done, this value will contain the minimum
324  // number of rows of G and H we can apply and safely include all
325  // non-identity rotations that were computed during the
326  // singular value searches.
327  m_GH_sweep_max = ijBR;
328 
329  // Make sure we haven't exceeded our maximum iteration count.
330  if ( n_iter_prev >= n_iter_max * min_m_n )
331  {
332  FLA_Abort();
333  //return FLA_FAILURE;
334  }
335  }
336 
337  // The sweep is complete. Now we must apply the Givens rotations
338  // that were accumulated during the sweep.
339 
340  // Recall that the number of columns of U and V to which we apply
341  // rotations is one more than the number of rotations.
342  n_UV_apply = m_GH_sweep_max + 1;
343 
344  // Apply the Givens rotations. Note that we only apply k sets of
345  // rotations, where k = n_iter_perf_sweep_max. Also note that we only
346  // apply to n_UV_apply columns of U and V since this is the most we
347  // need to touch given the most recent value stored to m_GH_sweep_max.
348  //FLA_Apply_G_rf_bls_var5( n_iter_perf_sweep_max,
349  FLA_Apply_G_rf_bls_var3( n_iter_perf_sweep_max,
350  //FLA_Apply_G_rf_bld_var9( n_iter_perf_sweep_max,
351  //FLA_Apply_G_rf_bld_var6( n_iter_perf_sweep_max,
352  m_U,
353  n_UV_apply,
354  buff_G, rs_G, cs_G,
355  buff_U, rs_U, cs_U,
356  b_alg );
357  //FLA_Apply_G_rf_blc_var5( n_iter_perf_sweep_max,
358  FLA_Apply_G_rf_bls_var3( n_iter_perf_sweep_max,
359  //FLA_Apply_G_rf_bld_var9( n_iter_perf_sweep_max,
360  //FLA_Apply_G_rf_bld_var6( n_iter_perf_sweep_max,
361  m_V,
362  n_UV_apply,
363  buff_H, rs_H, cs_H,
364  buff_V, rs_V, cs_V,
365  b_alg );
366 
367  // Increment the total number of iterations previously performed.
368  n_iter_prev += n_iter_perf_sweep_max;
369 
370  }
371 
372  // Make all the singular values positive.
373  {
374  int i;
375  float minus_one = bl1_sm1();
376 
377  for ( i = 0; i < min_m_n; ++i )
378  {
379  if ( buff_d[ (i )*inc_d ] < rzero )
380  {
381  buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ];
382 
383  // Scale the right singular vectors.
385  m_V,
386  &minus_one,
387  buff_V + (i )*cs_V, rs_V );
388  }
389  }
390  }
391 
392  return n_iter_prev;
393 }
FLA_Error FLA_Apply_G_rf_bls_var3(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var3.c:99
void bl1_sscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition: bl1_scalv.c:13

References bl1_c1(), bl1_csetm(), bl1_s0(), bl1_sm1(), bl1_sscalv(), BLIS1_NO_CONJUGATE, FLA_Abort(), FLA_Apply_G_rf_bls_var3(), FLA_Bsvd_compute_tol_thresh_ops(), FLA_Bsvd_find_submatrix_ops(), FLA_Bsvd_iteracc_v_ops_var1(), FLA_Mach_params_ops(), and i.

Referenced by FLA_Bsvd_v_opt_var1().

◆ FLA_Bsvd_v_ops_var2()

FLA_Error FLA_Bsvd_v_ops_var2 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_H,
int  rs_H,
int  cs_H,
float *  buff_RG,
int  rs_RG,
int  cs_RG,
float *  buff_RH,
int  rs_RH,
int  cs_RH,
float *  buff_W,
int  rs_W,
int  cs_W,
float *  buff_U,
int  rs_U,
int  cs_U,
float *  buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)
206 {
207  FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
208 
209  return FLA_SUCCESS;
210 }

Referenced by FLA_Bsvd_v_opt_var2().

◆ FLA_Bsvd_v_opt_var1()

FLA_Error FLA_Bsvd_v_opt_var1 ( dim_t  n_iter_max,
FLA_Obj  d,
FLA_Obj  e,
FLA_Obj  G,
FLA_Obj  H,
FLA_Obj  U,
FLA_Obj  V,
dim_t  b_alg 
)
16 {
17  FLA_Error r_val = FLA_SUCCESS;
18  FLA_Datatype datatype;
19  int m_U, m_V, n_GH;
20  int inc_d;
21  int inc_e;
22  int rs_G, cs_G;
23  int rs_H, cs_H;
24  int rs_U, cs_U;
25  int rs_V, cs_V;
26 
27  datatype = FLA_Obj_datatype( U );
28 
29  m_U = FLA_Obj_length( U );
30  m_V = FLA_Obj_length( V );
31  n_GH = FLA_Obj_width( G );
32 
33  inc_d = FLA_Obj_vector_inc( d );
34  inc_e = FLA_Obj_vector_inc( e );
35 
36  rs_G = FLA_Obj_row_stride( G );
37  cs_G = FLA_Obj_col_stride( G );
38 
39  rs_H = FLA_Obj_row_stride( H );
40  cs_H = FLA_Obj_col_stride( H );
41 
42  rs_U = FLA_Obj_row_stride( U );
43  cs_U = FLA_Obj_col_stride( U );
44 
45  rs_V = FLA_Obj_row_stride( V );
46  cs_V = FLA_Obj_col_stride( V );
47 
48 
49  switch ( datatype )
50  {
51  case FLA_FLOAT:
52  {
53  float* buff_d = FLA_FLOAT_PTR( d );
54  float* buff_e = FLA_FLOAT_PTR( e );
55  scomplex* buff_G = FLA_COMPLEX_PTR( G );
56  scomplex* buff_H = FLA_COMPLEX_PTR( H );
57  float* buff_U = FLA_FLOAT_PTR( U );
58  float* buff_V = FLA_FLOAT_PTR( V );
59 
60  r_val = FLA_Bsvd_v_ops_var1( min( m_U, m_V ),
61  m_U,
62  m_V,
63  n_GH,
64  n_iter_max,
65  buff_d, inc_d,
66  buff_e, inc_e,
67  buff_G, rs_G, cs_G,
68  buff_H, rs_H, cs_H,
69  buff_U, rs_U, cs_U,
70  buff_V, rs_V, cs_V,
71  b_alg );
72 
73  break;
74  }
75 
76  case FLA_DOUBLE:
77  {
78  double* buff_d = FLA_DOUBLE_PTR( d );
79  double* buff_e = FLA_DOUBLE_PTR( e );
80  dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
81  dcomplex* buff_H = FLA_DOUBLE_COMPLEX_PTR( H );
82  double* buff_U = FLA_DOUBLE_PTR( U );
83  double* buff_V = FLA_DOUBLE_PTR( V );
84 
85  r_val = FLA_Bsvd_v_opd_var1( min( m_U, m_V ),
86  m_U,
87  m_V,
88  n_GH,
89  n_iter_max,
90  buff_d, inc_d,
91  buff_e, inc_e,
92  buff_G, rs_G, cs_G,
93  buff_H, rs_H, cs_H,
94  buff_U, rs_U, cs_U,
95  buff_V, rs_V, cs_V,
96  b_alg );
97 
98  break;
99  }
100 
101  case FLA_COMPLEX:
102  {
103  float* buff_d = FLA_FLOAT_PTR( d );
104  float* buff_e = FLA_FLOAT_PTR( e );
105  scomplex* buff_G = FLA_COMPLEX_PTR( G );
106  scomplex* buff_H = FLA_COMPLEX_PTR( H );
107  scomplex* buff_U = FLA_COMPLEX_PTR( U );
108  scomplex* buff_V = FLA_COMPLEX_PTR( V );
109 
110  r_val = FLA_Bsvd_v_opc_var1( min( m_U, m_V ),
111  m_U,
112  m_V,
113  n_GH,
114  n_iter_max,
115  buff_d, inc_d,
116  buff_e, inc_e,
117  buff_G, rs_G, cs_G,
118  buff_H, rs_H, cs_H,
119  buff_U, rs_U, cs_U,
120  buff_V, rs_V, cs_V,
121  b_alg );
122 
123  break;
124  }
125 
126  case FLA_DOUBLE_COMPLEX:
127  {
128  double* buff_d = FLA_DOUBLE_PTR( d );
129  double* buff_e = FLA_DOUBLE_PTR( e );
130  dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
131  dcomplex* buff_H = FLA_DOUBLE_COMPLEX_PTR( H );
132  dcomplex* buff_U = FLA_DOUBLE_COMPLEX_PTR( U );
133  dcomplex* buff_V = FLA_DOUBLE_COMPLEX_PTR( V );
134 
135  r_val = FLA_Bsvd_v_opz_var1( min( m_U, m_V ),
136  m_U,
137  m_V,
138  n_GH,
139  n_iter_max,
140  buff_d, inc_d,
141  buff_e, inc_e,
142  buff_G, rs_G, cs_G,
143  buff_H, rs_H, cs_H,
144  buff_U, rs_U, cs_U,
145  buff_V, rs_V, cs_V,
146  b_alg );
147 
148  break;
149  }
150  }
151 
152  return r_val;
153 }
FLA_Error FLA_Bsvd_v_opd_var1(int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, double *buff_U, int rs_U, int cs_U, double *buff_V, int rs_V, int cs_V, int b_alg)
Definition: FLA_Bsvd_v_opt_var1.c:397
FLA_Error FLA_Bsvd_v_opc_var1(int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_V, int rs_V, int cs_V, int b_alg)
Definition: FLA_Bsvd_v_opt_var1.c:674
FLA_Error FLA_Bsvd_v_opz_var1(int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_V, int rs_V, int cs_V, int b_alg)
Definition: FLA_Bsvd_v_opt_var1.c:912
FLA_Error FLA_Bsvd_v_ops_var1(int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, float *buff_U, int rs_U, int cs_U, float *buff_V, int rs_V, int cs_V, int b_alg)
Definition: FLA_Bsvd_v_opt_var1.c:157
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
int FLA_Error
Definition: FLA_type_defs.h:47

References FLA_Bsvd_v_opc_var1(), FLA_Bsvd_v_opd_var1(), FLA_Bsvd_v_ops_var1(), FLA_Bsvd_v_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_Obj_width().

Referenced by FLA_Svd_uv_unb_var1().

◆ FLA_Bsvd_v_opt_var2()

FLA_Error FLA_Bsvd_v_opt_var2 ( dim_t  n_iter_max,
FLA_Obj  d,
FLA_Obj  e,
FLA_Obj  G,
FLA_Obj  H,
FLA_Obj  RG,
FLA_Obj  RH,
FLA_Obj  W,
FLA_Obj  U,
FLA_Obj  V,
dim_t  b_alg 
)
14 {
15  FLA_Error r_val = FLA_SUCCESS;
16  FLA_Datatype datatype;
17  int m_U, m_V, n_GH;
18  int inc_d;
19  int inc_e;
20  int rs_G, cs_G;
21  int rs_H, cs_H;
22  int rs_RG, cs_RG;
23  int rs_RH, cs_RH;
24  int rs_W, cs_W;
25  int rs_U, cs_U;
26  int rs_V, cs_V;
27 
28  datatype = FLA_Obj_datatype( U );
29 
30  m_U = FLA_Obj_length( U );
31  m_V = FLA_Obj_length( V );
32  n_GH = FLA_Obj_width( G );
33 
34  inc_d = FLA_Obj_vector_inc( d );
35  inc_e = FLA_Obj_vector_inc( e );
36 
37  rs_G = FLA_Obj_row_stride( G );
38  cs_G = FLA_Obj_col_stride( G );
39 
40  rs_H = FLA_Obj_row_stride( H );
41  cs_H = FLA_Obj_col_stride( H );
42 
43  rs_RG = FLA_Obj_row_stride( RG );
44  cs_RG = FLA_Obj_col_stride( RG );
45 
46  rs_RH = FLA_Obj_row_stride( RH );
47  cs_RH = FLA_Obj_col_stride( RH );
48 
49  rs_W = FLA_Obj_row_stride( W );
50  cs_W = FLA_Obj_col_stride( W );
51 
52  rs_U = FLA_Obj_row_stride( U );
53  cs_U = FLA_Obj_col_stride( U );
54 
55  rs_V = FLA_Obj_row_stride( V );
56  cs_V = FLA_Obj_col_stride( V );
57 
58 
59  switch ( datatype )
60  {
61  case FLA_FLOAT:
62  {
63  float* buff_d = FLA_FLOAT_PTR( d );
64  float* buff_e = FLA_FLOAT_PTR( e );
65  scomplex* buff_G = FLA_COMPLEX_PTR( G );
66  scomplex* buff_H = FLA_COMPLEX_PTR( H );
67  float* buff_RG = FLA_FLOAT_PTR( RG );
68  float* buff_RH = FLA_FLOAT_PTR( RH );
69  float* buff_W = FLA_FLOAT_PTR( W );
70  float* buff_U = FLA_FLOAT_PTR( U );
71  float* buff_V = FLA_FLOAT_PTR( V );
72 
73  r_val = FLA_Bsvd_v_ops_var2( min( m_U, m_V ),
74  m_U,
75  m_V,
76  n_GH,
77  n_iter_max,
78  buff_d, inc_d,
79  buff_e, inc_e,
80  buff_G, rs_G, cs_G,
81  buff_H, rs_H, cs_H,
82  buff_RG, rs_RG, cs_RG,
83  buff_RH, rs_RH, cs_RH,
84  buff_W, rs_W, cs_W,
85  buff_U, rs_U, cs_U,
86  buff_V, rs_V, cs_V,
87  b_alg );
88 
89  break;
90  }
91 
92  case FLA_DOUBLE:
93  {
94  double* buff_d = FLA_DOUBLE_PTR( d );
95  double* buff_e = FLA_DOUBLE_PTR( e );
96  dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
97  dcomplex* buff_H = FLA_DOUBLE_COMPLEX_PTR( H );
98  double* buff_RG = FLA_DOUBLE_PTR( RG );
99  double* buff_RH = FLA_DOUBLE_PTR( RH );
100  double* buff_W = FLA_DOUBLE_PTR( W );
101  double* buff_U = FLA_DOUBLE_PTR( U );
102  double* buff_V = FLA_DOUBLE_PTR( V );
103 
104  r_val = FLA_Bsvd_v_opd_var2( min( m_U, m_V ),
105  m_U,
106  m_V,
107  n_GH,
108  n_iter_max,
109  buff_d, inc_d,
110  buff_e, inc_e,
111  buff_G, rs_G, cs_G,
112  buff_H, rs_H, cs_H,
113  buff_RG, rs_RG, cs_RG,
114  buff_RH, rs_RH, cs_RH,
115  buff_W, rs_W, cs_W,
116  buff_U, rs_U, cs_U,
117  buff_V, rs_V, cs_V,
118  b_alg );
119 
120  break;
121  }
122 
123  case FLA_COMPLEX:
124  {
125  float* buff_d = FLA_FLOAT_PTR( d );
126  float* buff_e = FLA_FLOAT_PTR( e );
127  scomplex* buff_G = FLA_COMPLEX_PTR( G );
128  scomplex* buff_H = FLA_COMPLEX_PTR( H );
129  float* buff_RG = FLA_FLOAT_PTR( RG );
130  float* buff_RH = FLA_FLOAT_PTR( RH );
131  scomplex* buff_W = FLA_COMPLEX_PTR( W );
132  scomplex* buff_U = FLA_COMPLEX_PTR( U );
133  scomplex* buff_V = FLA_COMPLEX_PTR( V );
134 
135  r_val = FLA_Bsvd_v_opc_var2( min( m_U, m_V ),
136  m_U,
137  m_V,
138  n_GH,
139  n_iter_max,
140  buff_d, inc_d,
141  buff_e, inc_e,
142  buff_G, rs_G, cs_G,
143  buff_H, rs_H, cs_H,
144  buff_RG, rs_RG, cs_RG,
145  buff_RH, rs_RH, cs_RH,
146  buff_W, rs_W, cs_W,
147  buff_U, rs_U, cs_U,
148  buff_V, rs_V, cs_V,
149  b_alg );
150 
151  break;
152  }
153 
154  case FLA_DOUBLE_COMPLEX:
155  {
156  double* buff_d = FLA_DOUBLE_PTR( d );
157  double* buff_e = FLA_DOUBLE_PTR( e );
158  dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
159  dcomplex* buff_H = FLA_DOUBLE_COMPLEX_PTR( H );
160  double* buff_RG = FLA_DOUBLE_PTR( RG );
161  double* buff_RH = FLA_DOUBLE_PTR( RH );
162  dcomplex* buff_W = FLA_DOUBLE_COMPLEX_PTR( W );
163  dcomplex* buff_U = FLA_DOUBLE_COMPLEX_PTR( U );
164  dcomplex* buff_V = FLA_DOUBLE_COMPLEX_PTR( V );
165 
166  r_val = FLA_Bsvd_v_opz_var2( min( m_U, m_V ),
167  m_U,
168  m_V,
169  n_GH,
170  n_iter_max,
171  buff_d, inc_d,
172  buff_e, inc_e,
173  buff_G, rs_G, cs_G,
174  buff_H, rs_H, cs_H,
175  buff_RG, rs_RG, cs_RG,
176  buff_RH, rs_RH, cs_RH,
177  buff_W, rs_W, cs_W,
178  buff_U, rs_U, cs_U,
179  buff_V, rs_V, cs_V,
180  b_alg );
181 
182  break;
183  }
184  }
185 
186  return r_val;
187 }
FLA_Error FLA_Bsvd_v_opd_var2(int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, double *buff_RG, int rs_RG, int cs_RG, double *buff_RH, int rs_RH, int cs_RH, double *buff_W, int rs_W, int cs_W, double *buff_U, int rs_U, int cs_U, double *buff_V, int rs_V, int cs_V, int b_alg)
Definition: FLA_Bsvd_v_opt_var2.c:214
FLA_Error FLA_Bsvd_v_ops_var2(int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, float *buff_RG, int rs_RG, int cs_RG, float *buff_RH, int rs_RH, int cs_RH, float *buff_W, int rs_W, int cs_W, float *buff_U, int rs_U, int cs_U, float *buff_V, int rs_V, int cs_V, int b_alg)
Definition: FLA_Bsvd_v_opt_var2.c:191
FLA_Error FLA_Bsvd_v_opc_var2(int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, float *buff_RG, int rs_RG, int cs_RG, float *buff_RH, int rs_RH, int cs_RH, scomplex *buff_W, int rs_W, int cs_W, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_V, int rs_V, int cs_V, int b_alg)
Definition: FLA_Bsvd_v_opt_var2.c:529
FLA_Error FLA_Bsvd_v_opz_var2(int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, double *buff_RG, int rs_RG, int cs_RG, double *buff_RH, int rs_RH, int cs_RH, dcomplex *buff_W, int rs_W, int cs_W, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_V, int rs_V, int cs_V, int b_alg)
Definition: FLA_Bsvd_v_opt_var2.c:550

References FLA_Bsvd_v_opc_var2(), FLA_Bsvd_v_opd_var2(), FLA_Bsvd_v_ops_var2(), FLA_Bsvd_v_opz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_Obj_width().

Referenced by FLA_Svd_uv_unb_var2().

◆ FLA_Bsvd_v_opz_var1()

FLA_Error FLA_Bsvd_v_opz_var1 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_H,
int  rs_H,
int  cs_H,
dcomplex buff_U,
int  rs_U,
int  cs_U,
dcomplex buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)
924 {
925  dcomplex one = bl1_z1();
926  double rzero = bl1_d0();
927 
928  int maxitr = 6;
929 
930  double eps;
931  double tolmul;
932  double tol;
933  double thresh;
934 
935  dcomplex* G;
936  dcomplex* H;
937  double* d1;
938  double* e1;
939  int r_val;
940  int done;
941  int m_GH_sweep_max;
942  int ij_begin;
943  int ijTL, ijBR;
944  int m_A11;
945  int n_iter_perf;
946  int n_UV_apply;
947  int total_deflations;
948  int n_deflations;
949  int n_iter_prev;
950  int n_iter_perf_sweep_max;
951 
952  // Compute some convergence constants.
953  eps = FLA_Mach_params_opd( FLA_MACH_EPS );
954  tolmul = max( 10.0, min( 100.0, pow( eps, -0.125 ) ) );
956  tolmul,
957  maxitr,
958  buff_d, inc_d,
959  buff_e, inc_e,
960  &tol,
961  &thresh );
962 #ifdef PRINTF
963  printf( "FLA_Bsvd_v_opz_var1: tolmul = %12.6e\n", tolmul );
964  printf( "FLA_Bsvd_v_opz_var1: tol = %12.6e\n", tol );
965  printf( "FLA_Bsvd_v_opz_var1: thresh = %12.6e\n", thresh );
966 #endif
967 
968  // Initialize our completion flag.
969  done = FALSE;
970 
971  // Initialize a counter that holds the maximum number of rows of G
972  // and H that we would need to initialize for the next sweep.
973  m_GH_sweep_max = min_m_n - 1;
974 
975  // Initialize a counter for the total number of iterations performed.
976  n_iter_prev = 0;
977 
978  // Iterate until the matrix has completely deflated.
979  for ( total_deflations = 0; done != TRUE; )
980  {
981 
982  // Initialize G and H to contain only identity rotations.
983  bl1_zsetm( m_GH_sweep_max,
984  n_GH,
985  &one,
986  buff_G, rs_G, cs_G );
987  bl1_zsetm( m_GH_sweep_max,
988  n_GH,
989  &one,
990  buff_H, rs_H, cs_H );
991 
992  // Keep track of the maximum number of iterations performed in the
993  // current sweep. This is used when applying the sweep's Givens
994  // rotations.
995  n_iter_perf_sweep_max = 0;
996 
997  // Perform a sweep: Move through the matrix and perform a bidiagonal
998  // SVD on each non-zero submatrix that is encountered. During the
999  // first time through, ijTL will be 0 and ijBR will be min_m_n - 1.
1000  for ( ij_begin = 0; ij_begin < min_m_n; )
1001  {
1002 
1003 #ifdef PRINTF
1004  if ( ij_begin == 0 )
1005  printf( "FLA_Bsvd_v_opz_var1: beginning new sweep (ij_begin = %d)\n", ij_begin );
1006 #endif
1007 
1008  // Search for the first submatrix along the diagonal that is
1009  // bounded by zeroes (or endpoints of the matrix). If no
1010  // submatrix is found (ie: if the entire superdiagonal is zero
1011  // then FLA_FAILURE is returned. This function also inspects
1012  // superdiagonal elements for proximity to zero. If a given
1013  // element is close enough to zero, then it is deemed
1014  // converged and manually set to zero.
1015  r_val = FLA_Bsvd_find_submatrix_opd( min_m_n,
1016  ij_begin,
1017  buff_d, inc_d,
1018  buff_e, inc_e,
1019  &ijTL,
1020  &ijBR );
1021 
1022  // Verify that a submatrix was found. If one was not found,
1023  // then we are done with the current sweep. Furthermore, if
1024  // a submatrix was not found AND we began our search at the
1025  // beginning of the matrix (ie: ij_begin == 0), then the
1026  // matrix has completely deflated and so we are done with
1027  // Francis step iteration.
1028  if ( r_val == FLA_FAILURE )
1029  {
1030  if ( ij_begin == 0 )
1031  {
1032 #ifdef PRINTF
1033  printf( "FLA_Bsvd_v_opz_var1: superdiagonal is completely zero.\n" );
1034  printf( "FLA_Bsvd_v_opz_var1: Francis iteration is done!\n" );
1035 #endif
1036  done = TRUE;
1037  }
1038 
1039  // Break out of the current sweep so we can apply the last
1040  // remaining Givens rotations.
1041  break;
1042  }
1043 
1044  // If we got this far, then:
1045  // (a) ijTL refers to the index of the first non-zero
1046  // superdiagonal along the diagonal, and
1047  // (b) ijBR refers to either:
1048  // - the first zero element that occurs after ijTL, or
1049  // - the the last diagonal element.
1050  // Note that ijTL and ijBR also correspond to the first and
1051  // last diagonal elements of the submatrix of interest. Thus,
1052  // we may compute the dimension of this submatrix as:
1053  m_A11 = ijBR - ijTL + 1;
1054 
1055 #ifdef PRINTF
1056  printf( "FLA_Bsvd_v_opz_var1: ij_begin = %d\n", ij_begin );
1057  printf( "FLA_Bsvd_v_opz_var1: ijTL = %d\n", ijTL );
1058  printf( "FLA_Bsvd_v_opz_var1: ijBR = %d\n", ijBR );
1059  printf( "FLA_Bsvd_v_opz_var1: m_A11 = %d\n", m_A11 );
1060 #endif
1061 
1062  // Adjust ij_begin, which gets us ready for the next submatrix
1063  // search in the current sweep.
1064  ij_begin = ijBR + 1;
1065 
1066  // Index to the submatrices upon which we will operate.
1067  d1 = buff_d + ijTL * inc_d;
1068  e1 = buff_e + ijTL * inc_e;
1069  G = buff_G + ijTL * rs_G;
1070  H = buff_H + ijTL * rs_H;
1071 
1072  // Search for a batch of singular values, recursing on deflated
1073  // subproblems whenever a split occurs. Iteration continues as
1074  // long as
1075  // (a) there is still matrix left to operate on, and
1076  // (b) the number of iterations performed in this batch is
1077  // less than n_G.
1078  // If/when either of the two above conditions fails to hold,
1079  // the function returns.
1080  n_deflations = FLA_Bsvd_iteracc_v_opd_var1( m_A11,
1081  n_GH,
1082  ijTL,
1083  tol,
1084  thresh,
1085  d1, inc_d,
1086  e1, inc_e,
1087  G, rs_G, cs_G,
1088  H, rs_H, cs_H,
1089  &n_iter_perf );
1090 
1091  // Record the number of deflations that were observed.
1092  total_deflations += n_deflations;
1093 
1094  // Update the maximum number of iterations performed in the
1095  // current sweep.
1096  n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );
1097 
1098 #ifdef PRINTF
1099  printf( "FLA_Bsvd_v_opz_var1: deflations observed = %d\n", n_deflations );
1100  printf( "FLA_Bsvd_v_opz_var1: total deflations observed = %d\n", total_deflations );
1101  printf( "FLA_Bsvd_v_opz_var1: num iterations performed = %d\n", n_iter_perf );
1102 #endif
1103 
1104  // Store the most recent value of ijBR in m_G_sweep_max.
1105  // When the sweep is done, this value will contain the minimum
1106  // number of rows of G and H we can apply and safely include all
1107  // non-identity rotations that were computed during the
1108  // singular value searches.
1109  m_GH_sweep_max = ijBR;
1110 
1111  // Make sure we haven't exceeded our maximum iteration count.
1112  if ( n_iter_prev >= n_iter_max * min_m_n )
1113  {
1114 #ifdef PRINTF
1115  printf( "FLA_Bsvd_v_opz_var1: reached maximum total number of iterations: %d\n", n_iter_prev );
1116 #endif
1117  FLA_Abort();
1118  //return FLA_FAILURE;
1119  }
1120  }
1121 
1122  // The sweep is complete. Now we must apply the Givens rotations
1123  // that were accumulated during the sweep.
1124 
1125  // Recall that the number of columns of U and V to which we apply
1126  // rotations is one more than the number of rotations.
1127  n_UV_apply = m_GH_sweep_max + 1;
1128 
1129 #ifdef PRINTF
1130  printf( "FLA_Bsvd_v_opz_var1: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
1131  printf( "FLA_Bsvd_v_opz_var1: m_U = %d\n", m_U );
1132  printf( "FLA_Bsvd_v_opz_var1: m_V = %d\n", m_V );
1133  printf( "FLA_Bsvd_v_opz_var1: napp= %d\n", n_UV_apply );
1134 #endif
1135  // Apply the Givens rotations. Note that we only apply k sets of
1136  // rotations, where k = n_iter_perf_sweep_max. Also note that we only
1137  // apply to n_UV_apply columns of U and V since this is the most we
1138  // need to touch given the most recent value stored to m_GH_sweep_max.
1139  //FLA_Apply_G_rf_blz_var5( n_iter_perf_sweep_max,
1140  FLA_Apply_G_rf_blz_var3( n_iter_perf_sweep_max,
1141  //FLA_Apply_G_rf_blz_var9( n_iter_perf_sweep_max,
1142  //FLA_Apply_G_rf_blz_var6( n_iter_perf_sweep_max,
1143  m_U,
1144  n_UV_apply,
1145  buff_G, rs_G, cs_G,
1146  buff_U, rs_U, cs_U,
1147  b_alg );
1148  //FLA_Apply_G_rf_blz_var5( n_iter_perf_sweep_max,
1149  FLA_Apply_G_rf_blz_var3( n_iter_perf_sweep_max,
1150  //FLA_Apply_G_rf_blz_var9( n_iter_perf_sweep_max,
1151  //FLA_Apply_G_rf_blz_var6( n_iter_perf_sweep_max,
1152  m_V,
1153  n_UV_apply,
1154  buff_H, rs_H, cs_H,
1155  buff_V, rs_V, cs_V,
1156  b_alg );
1157 
1158  // Increment the total number of iterations previously performed.
1159  n_iter_prev += n_iter_perf_sweep_max;
1160 
1161 #ifdef PRINTF
1162  printf( "FLA_Bsvd_v_opz_var1: total number of iterations performed: %d\n", n_iter_prev );
1163 #endif
1164  }
1165 
1166  // Make all the singular values positive.
1167  {
1168  int i;
1169  double minus_one = bl1_dm1();
1170 
1171  for ( i = 0; i < min_m_n; ++i )
1172  {
1173  if ( buff_d[ (i )*inc_d ] < rzero )
1174  {
1175  buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ];
1176 
1177  // Scale the right singular vectors.
1179  m_V,
1180  &minus_one,
1181  buff_V + (i )*cs_V, rs_V );
1182  }
1183  }
1184  }
1185 
1186  return n_iter_prev;
1187 }
FLA_Error FLA_Apply_G_rf_blz_var3(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var3.c:186
void bl1_zdscalv(conj1_t conj, int n, double *alpha, dcomplex *x, int incx)
Definition: bl1_scalv.c:61

References bl1_d0(), bl1_dm1(), bl1_z1(), bl1_zdscalv(), bl1_zsetm(), BLIS1_NO_CONJUGATE, FLA_Abort(), FLA_Apply_G_rf_blz_var3(), FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_find_submatrix_opd(), FLA_Bsvd_iteracc_v_opd_var1(), FLA_Mach_params_opd(), and i.

Referenced by FLA_Bsvd_v_opt_var1().

◆ FLA_Bsvd_v_opz_var2()

FLA_Error FLA_Bsvd_v_opz_var2 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_H,
int  rs_H,
int  cs_H,
double *  buff_RG,
int  rs_RG,
int  cs_RG,
double *  buff_RH,
int  rs_RH,
int  cs_RH,
dcomplex buff_W,
int  rs_W,
int  cs_W,
dcomplex buff_U,
int  rs_U,
int  cs_U,
dcomplex buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)
565 {
566  dcomplex one = bl1_z1();
567  double rone = bl1_d1();
568  double rzero = bl1_d0();
569 
570  int maxitr = 6;
571 
572  double eps;
573  double tolmul;
574  double tol;
575  double thresh;
576 
577  dcomplex* G;
578  dcomplex* H;
579  double* d1;
580  double* e1;
581  int r_val;
582  int done;
583  int m_GH_sweep_max;
584  int ij_begin;
585  int ijTL, ijBR;
586  int m_A11;
587  int n_iter_perf;
588  int n_UV_apply;
589  int total_deflations;
590  int n_deflations;
591  int n_iter_prev;
592  int n_iter_perf_sweep_max;
593 
594  // Compute some convergence constants.
595  eps = FLA_Mach_params_opd( FLA_MACH_EPS );
596  tolmul = max( 10.0, min( 100.0, pow( eps, -0.125 ) ) );
598  tolmul,
599  maxitr,
600  buff_d, inc_d,
601  buff_e, inc_e,
602  &tol,
603  &thresh );
604 
605  // Initialize our completion flag.
606  done = FALSE;
607 
608  // Initialize a counter that holds the maximum number of rows of G
609  // and H that we would need to initialize for the next sweep.
610  m_GH_sweep_max = min_m_n - 1;
611 
612  // Initialize a counter for the total number of iterations performed.
613  n_iter_prev = 0;
614 
615  // Initialize RG and RH to identity.
616  bl1_dident( min_m_n,
617  buff_RG, rs_RG, cs_RG );
618  bl1_dident( min_m_n,
619  buff_RH, rs_RH, cs_RH );
620 
621  // Iterate until the matrix has completely deflated.
622  for ( total_deflations = 0; done != TRUE; )
623  {
624 
625  // Initialize G and H to contain only identity rotations.
626  bl1_zsetm( m_GH_sweep_max,
627  n_GH,
628  &one,
629  buff_G, rs_G, cs_G );
630  bl1_zsetm( m_GH_sweep_max,
631  n_GH,
632  &one,
633  buff_H, rs_H, cs_H );
634 
635  // Keep track of the maximum number of iterations performed in the
636  // current sweep. This is used when applying the sweep's Givens
637  // rotations.
638  n_iter_perf_sweep_max = 0;
639 
640  // Perform a sweep: Move through the matrix and perform a bidiagonal
641  // SVD on each non-zero submatrix that is encountered. During the
642  // first time through, ijTL will be 0 and ijBR will be min_m_n - 1.
643  for ( ij_begin = 0; ij_begin < min_m_n; )
644  {
645 
646 #ifdef PRINTF
647 if ( ij_begin == 0 )
648 printf( "FLA_Bsvd_v_opz_var2: beginning new sweep (ij_begin = %d)\n", ij_begin );
649 #endif
650 
651  // Search for the first submatrix along the diagonal that is
652  // bounded by zeroes (or endpoints of the matrix). If no
653  // submatrix is found (ie: if the entire superdiagonal is zero
654  // then FLA_FAILURE is returned. This function also inspects
655  // superdiagonal elements for proximity to zero. If a given
656  // element is close enough to zero, then it is deemed
657  // converged and manually set to zero.
658  r_val = FLA_Bsvd_find_submatrix_opd( min_m_n,
659  ij_begin,
660  buff_d, inc_d,
661  buff_e, inc_e,
662  &ijTL,
663  &ijBR );
664 
665  // Verify that a submatrix was found. If one was not found,
666  // then we are done with the current sweep. Furthermore, if
667  // a submatrix was not found AND we began our search at the
668  // beginning of the matrix (ie: ij_begin == 0), then the
669  // matrix has completely deflated and so we are done with
670  // Francis step iteration.
671  if ( r_val == FLA_FAILURE )
672  {
673  if ( ij_begin == 0 )
674  {
675 #ifdef PRINTF
676 printf( "FLA_Bsvd_v_opz_var2: superdiagonal is completely zero.\n" );
677 printf( "FLA_Bsvd_v_opz_var2: Francis iteration is done!\n" );
678 #endif
679  done = TRUE;
680  }
681 
682  // Break out of the current sweep so we can apply the last
683  // remaining Givens rotations.
684  break;
685  }
686 
687  // If we got this far, then:
688  // (a) ijTL refers to the index of the first non-zero
689  // superdiagonal along the diagonal, and
690  // (b) ijBR refers to either:
691  // - the first zero element that occurs after ijTL, or
692  // - the the last diagonal element.
693  // Note that ijTL and ijBR also correspond to the first and
694  // last diagonal elements of the submatrix of interest. Thus,
695  // we may compute the dimension of this submatrix as:
696  m_A11 = ijBR - ijTL + 1;
697 
698 #ifdef PRINTF
699 printf( "FLA_Bsvd_v_opz_var2: ij_begin = %d\n", ij_begin );
700 printf( "FLA_Bsvd_v_opz_var2: ijTL = %d\n", ijTL );
701 printf( "FLA_Bsvd_v_opz_var2: ijBR = %d\n", ijBR );
702 printf( "FLA_Bsvd_v_opz_var2: m_A11 = %d\n", m_A11 );
703 #endif
704 
705  // Adjust ij_begin, which gets us ready for the next submatrix
706  // search in the current sweep.
707  ij_begin = ijBR + 1;
708 
709  // Index to the submatrices upon which we will operate.
710  d1 = buff_d + ijTL * inc_d;
711  e1 = buff_e + ijTL * inc_e;
712  G = buff_G + ijTL * rs_G;
713  H = buff_H + ijTL * rs_H;
714 
715  // Search for a batch of singular values, recursing on deflated
716  // subproblems whenever possible. A new singular value search is
717  // performed as long as
718  // (a) there is still matrix left to operate on, and
719  // (b) the number of iterations performed in this batch is
720  // less than n_G.
721  // If/when either of the two above conditions fails to hold,
722  // the function returns.
723  n_deflations = FLA_Bsvd_iteracc_v_opd_var1( m_A11,
724  n_GH,
725  ijTL,
726  tol,
727  thresh,
728  d1, inc_d,
729  e1, inc_e,
730  G, rs_G, cs_G,
731  H, rs_H, cs_H,
732  &n_iter_perf );
733 
734  // Record the number of deflations that occurred.
735  total_deflations += n_deflations;
736 
737  // Update the maximum number of iterations performed in the
738  // current sweep.
739  n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );
740 
741 #ifdef PRINTF
742 printf( "FLA_Bsvd_v_opz_var2: deflations observed = %d\n", n_deflations );
743 printf( "FLA_Bsvd_v_opz_var2: total deflations observed = %d\n", total_deflations );
744 printf( "FLA_Bsvd_v_opz_var2: num iterations performed = %d\n", n_iter_perf );
745 #endif
746 
747  // Store the most recent value of ijBR in m_G_sweep_max.
748  // When the sweep is done, this value will contain the minimum
749  // number of rows of G and H we can apply and safely include all
750  // non-identity rotations that were computed during the
751  // singular value searches.
752  m_GH_sweep_max = ijBR;
753 
754  }
755 
756  // The sweep is complete. Now we must apply the Givens rotations
757  // that were accumulated during the sweep.
758 
759  // Recall that the number of columns of U and V to which we apply
760  // rotations is one more than the number of rotations.
761  n_UV_apply = m_GH_sweep_max + 1;
762 
763 
764 #ifdef PRINTF
765 printf( "FLA_Bsvd_v_opz_var2: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
766 printf( "FLA_Bsvd_v_opz_var2: m_U = %d\n", m_U );
767 printf( "FLA_Bsvd_v_opz_var2: m_V = %d\n", m_V );
768 printf( "FLA_Bsvd_v_opz_var2: napp= %d\n", n_UV_apply );
769 #endif
770 
771  // Apply the Givens rotations. Note that we only apply k sets of
772  // rotations, where k = n_iter_perf_sweep_max. Also note that we only
773  // apply to n_UV_apply columns of U and V since this is the most we
774  // need to touch given the most recent value stored to m_GH_sweep_max.
775  //FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max,
776  FLA_Apply_G_rf_bld_var3b( n_iter_perf_sweep_max,
777  //FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max,
778  //FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max,
779  min_m_n,
780  n_UV_apply,
781  n_iter_prev,
782  buff_G, rs_G, cs_G,
783  buff_RG, rs_RG, cs_RG,
784  b_alg );
785  //FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max,
786  FLA_Apply_G_rf_bld_var3b( n_iter_perf_sweep_max,
787  //FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max,
788  //FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max,
789  min_m_n,
790  n_UV_apply,
791  n_iter_prev,
792  buff_H, rs_H, cs_H,
793  buff_RH, rs_RH, cs_RH,
794  b_alg );
795 
796  // Increment the total number of iterations previously performed.
797  n_iter_prev += n_iter_perf_sweep_max;
798 
799 #ifdef PRINTF
800 printf( "FLA_Bsvd_v_opz_var2: total number of iterations performed: %d\n", n_iter_prev );
801 #endif
802  }
803 
804  // Copy the contents of Q to temporary storage.
806  m_U,
807  m_V,
808  buff_U, rs_U, cs_U,
809  buff_W, rs_W, cs_W );
810 // W needs to be max_m_n-by-min_m_n!!!!!!!!!!!!!!!
811 
812  // Multiply U by R, overwriting U.
815  2*m_U,
816  m_V,
817  m_V,
818  &rone,
819  ( double* )buff_W, rs_W, 2*cs_W,
820  buff_RG, rs_RG, cs_RG,
821  &rzero,
822  ( double* )buff_U, rs_U, 2*cs_U );
823 
825  m_V,
826  m_V,
827  buff_V, rs_V, cs_V,
828  buff_W, rs_W, cs_W );
829 
830  // Multiply V by R, overwriting V.
833  2*m_V,
834  m_V,
835  m_V,
836  &rone,
837  ( double* )buff_W, rs_W, 2*cs_W,
838  buff_RH, rs_RH, cs_RH,
839  &rzero,
840  ( double* )buff_V, rs_V, 2*cs_V );
841 
842  // Make all the singular values positive.
843  {
844  int i;
845  double minus_one = bl1_dm1();
846 
847  for ( i = 0; i < min_m_n; ++i )
848  {
849  if ( buff_d[ (i )*inc_d ] < rzero )
850  {
851  buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ];
852 
853  // Scale the right singular vectors.
855  m_V,
856  &minus_one,
857  buff_V + (i )*cs_V, rs_V );
858  }
859  }
860  }
861 
862  return n_iter_prev;
863 }
void bl1_zcopymt(trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition: bl1_copymt.c:286

References bl1_d0(), bl1_d1(), bl1_dgemm(), bl1_dident(), bl1_dm1(), bl1_z1(), bl1_zcopymt(), bl1_zdscalv(), bl1_zsetm(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_Apply_G_rf_bld_var3b(), FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_find_submatrix_opd(), FLA_Bsvd_iteracc_v_opd_var1(), FLA_Mach_params_opd(), and i.

Referenced by FLA_Bsvd_v_opt_var2().