libflame  revision_anchor
Functions
FLA_Bsvd_v_opt_var2.c File Reference

(r)

Functions

FLA_Error FLA_Bsvd_v_opt_var2 (dim_t n_iter_max, FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj H, FLA_Obj RG, FLA_Obj RH, FLA_Obj W, FLA_Obj U, FLA_Obj V, dim_t b_alg)
 
FLA_Error FLA_Bsvd_v_ops_var2 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, float *buff_RG, int rs_RG, int cs_RG, float *buff_RH, int rs_RH, int cs_RH, float *buff_W, int rs_W, int cs_W, float *buff_U, int rs_U, int cs_U, float *buff_V, int rs_V, int cs_V, int b_alg)
 
FLA_Error FLA_Bsvd_v_opd_var2 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, double *buff_RG, int rs_RG, int cs_RG, double *buff_RH, int rs_RH, int cs_RH, double *buff_W, int rs_W, int cs_W, double *buff_U, int rs_U, int cs_U, double *buff_V, int rs_V, int cs_V, int b_alg)
 
FLA_Error FLA_Bsvd_v_opc_var2 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, float *buff_RG, int rs_RG, int cs_RG, float *buff_RH, int rs_RH, int cs_RH, scomplex *buff_W, int rs_W, int cs_W, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_V, int rs_V, int cs_V, int b_alg)
 
FLA_Error FLA_Bsvd_v_opz_var2 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, double *buff_RG, int rs_RG, int cs_RG, double *buff_RH, int rs_RH, int cs_RH, dcomplex *buff_W, int rs_W, int cs_W, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_V, int rs_V, int cs_V, int b_alg)
 

Function Documentation

◆ FLA_Bsvd_v_opc_var2()

FLA_Error FLA_Bsvd_v_opc_var2 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_H,
int  rs_H,
int  cs_H,
float *  buff_RG,
int  rs_RG,
int  cs_RG,
float *  buff_RH,
int  rs_RH,
int  cs_RH,
scomplex buff_W,
int  rs_W,
int  cs_W,
scomplex buff_U,
int  rs_U,
int  cs_U,
scomplex buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)
544 {
545  FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
546 
547  return FLA_SUCCESS;
548 }

Referenced by FLA_Bsvd_v_opt_var2().

◆ FLA_Bsvd_v_opd_var2()

FLA_Error FLA_Bsvd_v_opd_var2 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_H,
int  rs_H,
int  cs_H,
double *  buff_RG,
int  rs_RG,
int  cs_RG,
double *  buff_RH,
int  rs_RH,
int  cs_RH,
double *  buff_W,
int  rs_W,
int  cs_W,
double *  buff_U,
int  rs_U,
int  cs_U,
double *  buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)
229 {
230  dcomplex one = bl1_z1();
231  double rone = bl1_d1();
232  double rzero = bl1_d0();
233 
234  int maxitr = 6;
235 
236  double eps;
237  double tolmul;
238  double tol;
239  double thresh;
240 
241  dcomplex* G;
242  dcomplex* H;
243  double* d1;
244  double* e1;
245  int r_val;
246  int done;
247  int m_GH_sweep_max;
248  int ij_begin;
249  int ijTL, ijBR;
250  int m_A11;
251  int n_iter_perf;
252  int n_UV_apply;
253  int total_deflations;
254  int n_deflations;
255  int n_iter_prev;
256  int n_iter_perf_sweep_max;
257 
258  // Compute some convergence constants.
259  eps = FLA_Mach_params_opd( FLA_MACH_EPS );
260  tolmul = max( 10.0, min( 100.0, pow( eps, -0.125 ) ) );
262  tolmul,
263  maxitr,
264  buff_d, inc_d,
265  buff_e, inc_e,
266  &tol,
267  &thresh );
268 
269  // Initialize our completion flag.
270  done = FALSE;
271 
272  // Initialize a counter that holds the maximum number of rows of G
273  // and H that we would need to initialize for the next sweep.
274  m_GH_sweep_max = min_m_n - 1;
275 
276  // Initialize a counter for the total number of iterations performed.
277  n_iter_prev = 0;
278 
279  // Initialize RG and RH to identity.
280  bl1_dident( min_m_n,
281  buff_RG, rs_RG, cs_RG );
282  bl1_dident( min_m_n,
283  buff_RH, rs_RH, cs_RH );
284 
285  // Iterate until the matrix has completely deflated.
286  for ( total_deflations = 0; done != TRUE; )
287  {
288 
289  // Initialize G and H to contain only identity rotations.
290  bl1_zsetm( m_GH_sweep_max,
291  n_GH,
292  &one,
293  buff_G, rs_G, cs_G );
294  bl1_zsetm( m_GH_sweep_max,
295  n_GH,
296  &one,
297  buff_H, rs_H, cs_H );
298 
299  // Keep track of the maximum number of iterations performed in the
300  // current sweep. This is used when applying the sweep's Givens
301  // rotations.
302  n_iter_perf_sweep_max = 0;
303 
304  // Perform a sweep: Move through the matrix and perform a bidiagonal
305  // SVD on each non-zero submatrix that is encountered. During the
306  // first time through, ijTL will be 0 and ijBR will be min_m_n - 1.
307  for ( ij_begin = 0; ij_begin < min_m_n; )
308  {
309 
310 #ifdef PRINTF
311 if ( ij_begin == 0 )
312 printf( "FLA_Bsvd_v_opd_var2: beginning new sweep (ij_begin = %d)\n", ij_begin );
313 #endif
314 
315  // Search for the first submatrix along the diagonal that is
316  // bounded by zeroes (or endpoints of the matrix). If no
317  // submatrix is found (ie: if the entire superdiagonal is zero
318  // then FLA_FAILURE is returned. This function also inspects
319  // superdiagonal elements for proximity to zero. If a given
320  // element is close enough to zero, then it is deemed
321  // converged and manually set to zero.
322  r_val = FLA_Bsvd_find_submatrix_opd( min_m_n,
323  ij_begin,
324  buff_d, inc_d,
325  buff_e, inc_e,
326  &ijTL,
327  &ijBR );
328 
329  // Verify that a submatrix was found. If one was not found,
330  // then we are done with the current sweep. Furthermore, if
331  // a submatrix was not found AND we began our search at the
332  // beginning of the matrix (ie: ij_begin == 0), then the
333  // matrix has completely deflated and so we are done with
334  // Francis step iteration.
335  if ( r_val == FLA_FAILURE )
336  {
337  if ( ij_begin == 0 )
338  {
339 #ifdef PRINTF
340 printf( "FLA_Bsvd_v_opd_var2: superdiagonal is completely zero.\n" );
341 printf( "FLA_Bsvd_v_opd_var2: Francis iteration is done!\n" );
342 #endif
343  done = TRUE;
344  }
345 
346  // Break out of the current sweep so we can apply the last
347  // remaining Givens rotations.
348  break;
349  }
350 
351  // If we got this far, then:
352  // (a) ijTL refers to the index of the first non-zero
353  // superdiagonal along the diagonal, and
354  // (b) ijBR refers to either:
355  // - the first zero element that occurs after ijTL, or
356  // - the the last diagonal element.
357  // Note that ijTL and ijBR also correspond to the first and
358  // last diagonal elements of the submatrix of interest. Thus,
359  // we may compute the dimension of this submatrix as:
360  m_A11 = ijBR - ijTL + 1;
361 
362 #ifdef PRINTF
363 printf( "FLA_Bsvd_v_opd_var2: ij_begin = %d\n", ij_begin );
364 printf( "FLA_Bsvd_v_opd_var2: ijTL = %d\n", ijTL );
365 printf( "FLA_Bsvd_v_opd_var2: ijBR = %d\n", ijBR );
366 printf( "FLA_Bsvd_v_opd_var2: m_A11 = %d\n", m_A11 );
367 #endif
368 
369  // Adjust ij_begin, which gets us ready for the next submatrix
370  // search in the current sweep.
371  ij_begin = ijBR + 1;
372 
373  // Index to the submatrices upon which we will operate.
374  d1 = buff_d + ijTL * inc_d;
375  e1 = buff_e + ijTL * inc_e;
376  G = buff_G + ijTL * rs_G;
377  H = buff_H + ijTL * rs_H;
378 
379  // Search for a batch of singular values, recursing on deflated
380  // subproblems whenever possible. A new singular value search is
381  // performed as long as
382  // (a) there is still matrix left to operate on, and
383  // (b) the number of iterations performed in this batch is
384  // less than n_G.
385  // If/when either of the two above conditions fails to hold,
386  // the function returns.
387  n_deflations = FLA_Bsvd_iteracc_v_opd_var1( m_A11,
388  n_GH,
389  ijTL,
390  tol,
391  thresh,
392  d1, inc_d,
393  e1, inc_e,
394  G, rs_G, cs_G,
395  H, rs_H, cs_H,
396  &n_iter_perf );
397 
398  // Record the number of deflations that occurred.
399  total_deflations += n_deflations;
400 
401  // Update the maximum number of iterations performed in the
402  // current sweep.
403  n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );
404 
405 #ifdef PRINTF
406 printf( "FLA_Bsvd_v_opd_var2: deflations observed = %d\n", n_deflations );
407 printf( "FLA_Bsvd_v_opd_var2: total deflations observed = %d\n", total_deflations );
408 printf( "FLA_Bsvd_v_opd_var2: num iterations performed = %d\n", n_iter_perf );
409 #endif
410 
411  // Store the most recent value of ijBR in m_G_sweep_max.
412  // When the sweep is done, this value will contain the minimum
413  // number of rows of G and H we can apply and safely include all
414  // non-identity rotations that were computed during the
415  // singular value searches.
416  m_GH_sweep_max = ijBR;
417 
418  }
419 
420  // The sweep is complete. Now we must apply the Givens rotations
421  // that were accumulated during the sweep.
422 
423  // Recall that the number of columns of U and V to which we apply
424  // rotations is one more than the number of rotations.
425  n_UV_apply = m_GH_sweep_max + 1;
426 
427 
428 #ifdef PRINTF
429 printf( "FLA_Bsvd_v_opd_var2: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
430 printf( "FLA_Bsvd_v_opd_var2: m_U = %d\n", m_U );
431 printf( "FLA_Bsvd_v_opd_var2: m_V = %d\n", m_V );
432 printf( "FLA_Bsvd_v_opd_var2: napp= %d\n", n_UV_apply );
433 #endif
434 
435  // Apply the Givens rotations. Note that we only apply k sets of
436  // rotations, where k = n_iter_perf_sweep_max. Also note that we only
437  // apply to n_UV_apply columns of U and V since this is the most we
438  // need to touch given the most recent value stored to m_GH_sweep_max.
439  //FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max,
440  FLA_Apply_G_rf_bld_var3b( n_iter_perf_sweep_max,
441  //FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max,
442  //FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max,
443  min_m_n,
444  n_UV_apply,
445  n_iter_prev,
446  buff_G, rs_G, cs_G,
447  buff_RG, rs_RG, cs_RG,
448  b_alg );
449  //FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max,
450  FLA_Apply_G_rf_bld_var3b( n_iter_perf_sweep_max,
451  //FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max,
452  //FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max,
453  min_m_n,
454  n_UV_apply,
455  n_iter_prev,
456  buff_H, rs_H, cs_H,
457  buff_RH, rs_RH, cs_RH,
458  b_alg );
459 
460  // Increment the total number of iterations previously performed.
461  n_iter_prev += n_iter_perf_sweep_max;
462 
463 #ifdef PRINTF
464 printf( "FLA_Bsvd_v_opd_var2: total number of iterations performed: %d\n", n_iter_prev );
465 #endif
466  }
467 
468  // Copy the contents of Q to temporary storage.
470  m_U,
471  m_V,
472  buff_U, rs_U, cs_U,
473  buff_W, rs_W, cs_W );
474 // W needs to be max_m_n-by-min_m_n!!!!!!!!!!!!!!!
475 
476  // Multiply U by R, overwriting U.
479  m_U,
480  m_V,
481  m_V,
482  &rone,
483  ( double* )buff_W, rs_W, cs_W,
484  buff_RG, rs_RG, cs_RG,
485  &rzero,
486  ( double* )buff_U, rs_U, cs_U );
487 
489  m_V,
490  m_V,
491  buff_V, rs_V, cs_V,
492  buff_W, rs_W, cs_W );
493 
494  // Multiply V by R, overwriting V.
497  m_V,
498  m_V,
499  m_V,
500  &rone,
501  ( double* )buff_W, rs_W, cs_W,
502  buff_RH, rs_RH, cs_RH,
503  &rzero,
504  ( double* )buff_V, rs_V, cs_V );
505 
506  // Make all the singular values positive.
507  {
508  int i;
509  double minus_one = bl1_dm1();
510 
511  for ( i = 0; i < min_m_n; ++i )
512  {
513  if ( buff_d[ (i )*inc_d ] < rzero )
514  {
515  buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ];
516 
517  // Scale the right singular vectors.
519  m_V,
520  &minus_one,
521  buff_V + (i )*cs_V, rs_V );
522  }
523  }
524  }
525 
526  return n_iter_prev;
527 }
FLA_Error FLA_Apply_G_rf_bld_var3b(int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var3b.c:135
FLA_Error FLA_Bsvd_compute_tol_thresh_opd(int n_A, double tolmul, double maxitr, double *buff_d, int inc_d, double *buff_e, int inc_e, double *tol, double *thresh)
Definition: FLA_Bsvd_compute_tol_thresh.c:137
FLA_Error FLA_Bsvd_find_submatrix_opd(int mn_A, int ij_begin, double *buff_d, int inc_d, double *buff_e, int inc_e, int *ijTL, int *ijBR)
Definition: FLA_Bsvd_find_submatrix.c:73
FLA_Error FLA_Bsvd_iteracc_v_opd_var1(int m_A, int n_GH, int ijTL, double tol, double thresh, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, int *n_iter_perf)
Definition: FLA_Bsvd_iteracc_v_opt_var1.c:176
double FLA_Mach_params_opd(FLA_Machval machval)
Definition: FLA_Mach_params.c:74
int i
Definition: bl1_axmyv2.c:145
void bl1_dcopymt(trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
Definition: bl1_copymt.c:148
void bl1_dgemm(trans1_t transa, trans1_t transb, int m, int k, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs)
Definition: bl1_gemm.c:274
void bl1_dscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition: bl1_scalv.c:24
double bl1_dm1(void)
Definition: bl1_constants.c:182
dcomplex bl1_z1(void)
Definition: bl1_constants.c:69
double bl1_d0(void)
Definition: bl1_constants.c:118
void bl1_dident(int m, double *a, int a_rs, int a_cs)
Definition: bl1_ident.c:32
double bl1_d1(void)
Definition: bl1_constants.c:54
void bl1_zsetm(int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:78
@ BLIS1_NO_TRANSPOSE
Definition: blis_type_defs.h:54
@ BLIS1_NO_CONJUGATE
Definition: blis_type_defs.h:81
Definition: blis_type_defs.h:138

References bl1_d0(), bl1_d1(), bl1_dcopymt(), bl1_dgemm(), bl1_dident(), bl1_dm1(), bl1_dscalv(), bl1_z1(), bl1_zsetm(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_Apply_G_rf_bld_var3b(), FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_find_submatrix_opd(), FLA_Bsvd_iteracc_v_opd_var1(), FLA_Mach_params_opd(), and i.

Referenced by FLA_Bsvd_v_opt_var2().

◆ FLA_Bsvd_v_ops_var2()

FLA_Error FLA_Bsvd_v_ops_var2 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_H,
int  rs_H,
int  cs_H,
float *  buff_RG,
int  rs_RG,
int  cs_RG,
float *  buff_RH,
int  rs_RH,
int  cs_RH,
float *  buff_W,
int  rs_W,
int  cs_W,
float *  buff_U,
int  rs_U,
int  cs_U,
float *  buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)
206 {
207  FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
208 
209  return FLA_SUCCESS;
210 }

Referenced by FLA_Bsvd_v_opt_var2().

◆ FLA_Bsvd_v_opt_var2()

FLA_Error FLA_Bsvd_v_opt_var2 ( dim_t  n_iter_max,
FLA_Obj  d,
FLA_Obj  e,
FLA_Obj  G,
FLA_Obj  H,
FLA_Obj  RG,
FLA_Obj  RH,
FLA_Obj  W,
FLA_Obj  U,
FLA_Obj  V,
dim_t  b_alg 
)
14 {
15  FLA_Error r_val = FLA_SUCCESS;
16  FLA_Datatype datatype;
17  int m_U, m_V, n_GH;
18  int inc_d;
19  int inc_e;
20  int rs_G, cs_G;
21  int rs_H, cs_H;
22  int rs_RG, cs_RG;
23  int rs_RH, cs_RH;
24  int rs_W, cs_W;
25  int rs_U, cs_U;
26  int rs_V, cs_V;
27 
28  datatype = FLA_Obj_datatype( U );
29 
30  m_U = FLA_Obj_length( U );
31  m_V = FLA_Obj_length( V );
32  n_GH = FLA_Obj_width( G );
33 
34  inc_d = FLA_Obj_vector_inc( d );
35  inc_e = FLA_Obj_vector_inc( e );
36 
37  rs_G = FLA_Obj_row_stride( G );
38  cs_G = FLA_Obj_col_stride( G );
39 
40  rs_H = FLA_Obj_row_stride( H );
41  cs_H = FLA_Obj_col_stride( H );
42 
43  rs_RG = FLA_Obj_row_stride( RG );
44  cs_RG = FLA_Obj_col_stride( RG );
45 
46  rs_RH = FLA_Obj_row_stride( RH );
47  cs_RH = FLA_Obj_col_stride( RH );
48 
49  rs_W = FLA_Obj_row_stride( W );
50  cs_W = FLA_Obj_col_stride( W );
51 
52  rs_U = FLA_Obj_row_stride( U );
53  cs_U = FLA_Obj_col_stride( U );
54 
55  rs_V = FLA_Obj_row_stride( V );
56  cs_V = FLA_Obj_col_stride( V );
57 
58 
59  switch ( datatype )
60  {
61  case FLA_FLOAT:
62  {
63  float* buff_d = FLA_FLOAT_PTR( d );
64  float* buff_e = FLA_FLOAT_PTR( e );
65  scomplex* buff_G = FLA_COMPLEX_PTR( G );
66  scomplex* buff_H = FLA_COMPLEX_PTR( H );
67  float* buff_RG = FLA_FLOAT_PTR( RG );
68  float* buff_RH = FLA_FLOAT_PTR( RH );
69  float* buff_W = FLA_FLOAT_PTR( W );
70  float* buff_U = FLA_FLOAT_PTR( U );
71  float* buff_V = FLA_FLOAT_PTR( V );
72 
73  r_val = FLA_Bsvd_v_ops_var2( min( m_U, m_V ),
74  m_U,
75  m_V,
76  n_GH,
77  n_iter_max,
78  buff_d, inc_d,
79  buff_e, inc_e,
80  buff_G, rs_G, cs_G,
81  buff_H, rs_H, cs_H,
82  buff_RG, rs_RG, cs_RG,
83  buff_RH, rs_RH, cs_RH,
84  buff_W, rs_W, cs_W,
85  buff_U, rs_U, cs_U,
86  buff_V, rs_V, cs_V,
87  b_alg );
88 
89  break;
90  }
91 
92  case FLA_DOUBLE:
93  {
94  double* buff_d = FLA_DOUBLE_PTR( d );
95  double* buff_e = FLA_DOUBLE_PTR( e );
96  dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
97  dcomplex* buff_H = FLA_DOUBLE_COMPLEX_PTR( H );
98  double* buff_RG = FLA_DOUBLE_PTR( RG );
99  double* buff_RH = FLA_DOUBLE_PTR( RH );
100  double* buff_W = FLA_DOUBLE_PTR( W );
101  double* buff_U = FLA_DOUBLE_PTR( U );
102  double* buff_V = FLA_DOUBLE_PTR( V );
103 
104  r_val = FLA_Bsvd_v_opd_var2( min( m_U, m_V ),
105  m_U,
106  m_V,
107  n_GH,
108  n_iter_max,
109  buff_d, inc_d,
110  buff_e, inc_e,
111  buff_G, rs_G, cs_G,
112  buff_H, rs_H, cs_H,
113  buff_RG, rs_RG, cs_RG,
114  buff_RH, rs_RH, cs_RH,
115  buff_W, rs_W, cs_W,
116  buff_U, rs_U, cs_U,
117  buff_V, rs_V, cs_V,
118  b_alg );
119 
120  break;
121  }
122 
123  case FLA_COMPLEX:
124  {
125  float* buff_d = FLA_FLOAT_PTR( d );
126  float* buff_e = FLA_FLOAT_PTR( e );
127  scomplex* buff_G = FLA_COMPLEX_PTR( G );
128  scomplex* buff_H = FLA_COMPLEX_PTR( H );
129  float* buff_RG = FLA_FLOAT_PTR( RG );
130  float* buff_RH = FLA_FLOAT_PTR( RH );
131  scomplex* buff_W = FLA_COMPLEX_PTR( W );
132  scomplex* buff_U = FLA_COMPLEX_PTR( U );
133  scomplex* buff_V = FLA_COMPLEX_PTR( V );
134 
135  r_val = FLA_Bsvd_v_opc_var2( min( m_U, m_V ),
136  m_U,
137  m_V,
138  n_GH,
139  n_iter_max,
140  buff_d, inc_d,
141  buff_e, inc_e,
142  buff_G, rs_G, cs_G,
143  buff_H, rs_H, cs_H,
144  buff_RG, rs_RG, cs_RG,
145  buff_RH, rs_RH, cs_RH,
146  buff_W, rs_W, cs_W,
147  buff_U, rs_U, cs_U,
148  buff_V, rs_V, cs_V,
149  b_alg );
150 
151  break;
152  }
153 
154  case FLA_DOUBLE_COMPLEX:
155  {
156  double* buff_d = FLA_DOUBLE_PTR( d );
157  double* buff_e = FLA_DOUBLE_PTR( e );
158  dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
159  dcomplex* buff_H = FLA_DOUBLE_COMPLEX_PTR( H );
160  double* buff_RG = FLA_DOUBLE_PTR( RG );
161  double* buff_RH = FLA_DOUBLE_PTR( RH );
162  dcomplex* buff_W = FLA_DOUBLE_COMPLEX_PTR( W );
163  dcomplex* buff_U = FLA_DOUBLE_COMPLEX_PTR( U );
164  dcomplex* buff_V = FLA_DOUBLE_COMPLEX_PTR( V );
165 
166  r_val = FLA_Bsvd_v_opz_var2( min( m_U, m_V ),
167  m_U,
168  m_V,
169  n_GH,
170  n_iter_max,
171  buff_d, inc_d,
172  buff_e, inc_e,
173  buff_G, rs_G, cs_G,
174  buff_H, rs_H, cs_H,
175  buff_RG, rs_RG, cs_RG,
176  buff_RH, rs_RH, cs_RH,
177  buff_W, rs_W, cs_W,
178  buff_U, rs_U, cs_U,
179  buff_V, rs_V, cs_V,
180  b_alg );
181 
182  break;
183  }
184  }
185 
186  return r_val;
187 }
FLA_Error FLA_Bsvd_v_opd_var2(int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, double *buff_RG, int rs_RG, int cs_RG, double *buff_RH, int rs_RH, int cs_RH, double *buff_W, int rs_W, int cs_W, double *buff_U, int rs_U, int cs_U, double *buff_V, int rs_V, int cs_V, int b_alg)
Definition: FLA_Bsvd_v_opt_var2.c:214
FLA_Error FLA_Bsvd_v_ops_var2(int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, float *buff_RG, int rs_RG, int cs_RG, float *buff_RH, int rs_RH, int cs_RH, float *buff_W, int rs_W, int cs_W, float *buff_U, int rs_U, int cs_U, float *buff_V, int rs_V, int cs_V, int b_alg)
Definition: FLA_Bsvd_v_opt_var2.c:191
FLA_Error FLA_Bsvd_v_opc_var2(int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, float *buff_RG, int rs_RG, int cs_RG, float *buff_RH, int rs_RH, int cs_RH, scomplex *buff_W, int rs_W, int cs_W, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_V, int rs_V, int cs_V, int b_alg)
Definition: FLA_Bsvd_v_opt_var2.c:529
FLA_Error FLA_Bsvd_v_opz_var2(int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, double *buff_RG, int rs_RG, int cs_RG, double *buff_RH, int rs_RH, int cs_RH, dcomplex *buff_W, int rs_W, int cs_W, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_V, int rs_V, int cs_V, int b_alg)
Definition: FLA_Bsvd_v_opt_var2.c:550
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition: FLA_Query.c:145
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
int FLA_Error
Definition: FLA_type_defs.h:47
int FLA_Datatype
Definition: FLA_type_defs.h:49
Definition: blis_type_defs.h:133

References FLA_Bsvd_v_opc_var2(), FLA_Bsvd_v_opd_var2(), FLA_Bsvd_v_ops_var2(), FLA_Bsvd_v_opz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_Obj_width().

Referenced by FLA_Svd_uv_unb_var2().

◆ FLA_Bsvd_v_opz_var2()

FLA_Error FLA_Bsvd_v_opz_var2 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_H,
int  rs_H,
int  cs_H,
double *  buff_RG,
int  rs_RG,
int  cs_RG,
double *  buff_RH,
int  rs_RH,
int  cs_RH,
dcomplex buff_W,
int  rs_W,
int  cs_W,
dcomplex buff_U,
int  rs_U,
int  cs_U,
dcomplex buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)
565 {
566  dcomplex one = bl1_z1();
567  double rone = bl1_d1();
568  double rzero = bl1_d0();
569 
570  int maxitr = 6;
571 
572  double eps;
573  double tolmul;
574  double tol;
575  double thresh;
576 
577  dcomplex* G;
578  dcomplex* H;
579  double* d1;
580  double* e1;
581  int r_val;
582  int done;
583  int m_GH_sweep_max;
584  int ij_begin;
585  int ijTL, ijBR;
586  int m_A11;
587  int n_iter_perf;
588  int n_UV_apply;
589  int total_deflations;
590  int n_deflations;
591  int n_iter_prev;
592  int n_iter_perf_sweep_max;
593 
594  // Compute some convergence constants.
595  eps = FLA_Mach_params_opd( FLA_MACH_EPS );
596  tolmul = max( 10.0, min( 100.0, pow( eps, -0.125 ) ) );
598  tolmul,
599  maxitr,
600  buff_d, inc_d,
601  buff_e, inc_e,
602  &tol,
603  &thresh );
604 
605  // Initialize our completion flag.
606  done = FALSE;
607 
608  // Initialize a counter that holds the maximum number of rows of G
609  // and H that we would need to initialize for the next sweep.
610  m_GH_sweep_max = min_m_n - 1;
611 
612  // Initialize a counter for the total number of iterations performed.
613  n_iter_prev = 0;
614 
615  // Initialize RG and RH to identity.
616  bl1_dident( min_m_n,
617  buff_RG, rs_RG, cs_RG );
618  bl1_dident( min_m_n,
619  buff_RH, rs_RH, cs_RH );
620 
621  // Iterate until the matrix has completely deflated.
622  for ( total_deflations = 0; done != TRUE; )
623  {
624 
625  // Initialize G and H to contain only identity rotations.
626  bl1_zsetm( m_GH_sweep_max,
627  n_GH,
628  &one,
629  buff_G, rs_G, cs_G );
630  bl1_zsetm( m_GH_sweep_max,
631  n_GH,
632  &one,
633  buff_H, rs_H, cs_H );
634 
635  // Keep track of the maximum number of iterations performed in the
636  // current sweep. This is used when applying the sweep's Givens
637  // rotations.
638  n_iter_perf_sweep_max = 0;
639 
640  // Perform a sweep: Move through the matrix and perform a bidiagonal
641  // SVD on each non-zero submatrix that is encountered. During the
642  // first time through, ijTL will be 0 and ijBR will be min_m_n - 1.
643  for ( ij_begin = 0; ij_begin < min_m_n; )
644  {
645 
646 #ifdef PRINTF
647 if ( ij_begin == 0 )
648 printf( "FLA_Bsvd_v_opz_var2: beginning new sweep (ij_begin = %d)\n", ij_begin );
649 #endif
650 
651  // Search for the first submatrix along the diagonal that is
652  // bounded by zeroes (or endpoints of the matrix). If no
653  // submatrix is found (ie: if the entire superdiagonal is zero
654  // then FLA_FAILURE is returned. This function also inspects
655  // superdiagonal elements for proximity to zero. If a given
656  // element is close enough to zero, then it is deemed
657  // converged and manually set to zero.
658  r_val = FLA_Bsvd_find_submatrix_opd( min_m_n,
659  ij_begin,
660  buff_d, inc_d,
661  buff_e, inc_e,
662  &ijTL,
663  &ijBR );
664 
665  // Verify that a submatrix was found. If one was not found,
666  // then we are done with the current sweep. Furthermore, if
667  // a submatrix was not found AND we began our search at the
668  // beginning of the matrix (ie: ij_begin == 0), then the
669  // matrix has completely deflated and so we are done with
670  // Francis step iteration.
671  if ( r_val == FLA_FAILURE )
672  {
673  if ( ij_begin == 0 )
674  {
675 #ifdef PRINTF
676 printf( "FLA_Bsvd_v_opz_var2: superdiagonal is completely zero.\n" );
677 printf( "FLA_Bsvd_v_opz_var2: Francis iteration is done!\n" );
678 #endif
679  done = TRUE;
680  }
681 
682  // Break out of the current sweep so we can apply the last
683  // remaining Givens rotations.
684  break;
685  }
686 
687  // If we got this far, then:
688  // (a) ijTL refers to the index of the first non-zero
689  // superdiagonal along the diagonal, and
690  // (b) ijBR refers to either:
691  // - the first zero element that occurs after ijTL, or
692  // - the the last diagonal element.
693  // Note that ijTL and ijBR also correspond to the first and
694  // last diagonal elements of the submatrix of interest. Thus,
695  // we may compute the dimension of this submatrix as:
696  m_A11 = ijBR - ijTL + 1;
697 
698 #ifdef PRINTF
699 printf( "FLA_Bsvd_v_opz_var2: ij_begin = %d\n", ij_begin );
700 printf( "FLA_Bsvd_v_opz_var2: ijTL = %d\n", ijTL );
701 printf( "FLA_Bsvd_v_opz_var2: ijBR = %d\n", ijBR );
702 printf( "FLA_Bsvd_v_opz_var2: m_A11 = %d\n", m_A11 );
703 #endif
704 
705  // Adjust ij_begin, which gets us ready for the next submatrix
706  // search in the current sweep.
707  ij_begin = ijBR + 1;
708 
709  // Index to the submatrices upon which we will operate.
710  d1 = buff_d + ijTL * inc_d;
711  e1 = buff_e + ijTL * inc_e;
712  G = buff_G + ijTL * rs_G;
713  H = buff_H + ijTL * rs_H;
714 
715  // Search for a batch of singular values, recursing on deflated
716  // subproblems whenever possible. A new singular value search is
717  // performed as long as
718  // (a) there is still matrix left to operate on, and
719  // (b) the number of iterations performed in this batch is
720  // less than n_G.
721  // If/when either of the two above conditions fails to hold,
722  // the function returns.
723  n_deflations = FLA_Bsvd_iteracc_v_opd_var1( m_A11,
724  n_GH,
725  ijTL,
726  tol,
727  thresh,
728  d1, inc_d,
729  e1, inc_e,
730  G, rs_G, cs_G,
731  H, rs_H, cs_H,
732  &n_iter_perf );
733 
734  // Record the number of deflations that occurred.
735  total_deflations += n_deflations;
736 
737  // Update the maximum number of iterations performed in the
738  // current sweep.
739  n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );
740 
741 #ifdef PRINTF
742 printf( "FLA_Bsvd_v_opz_var2: deflations observed = %d\n", n_deflations );
743 printf( "FLA_Bsvd_v_opz_var2: total deflations observed = %d\n", total_deflations );
744 printf( "FLA_Bsvd_v_opz_var2: num iterations performed = %d\n", n_iter_perf );
745 #endif
746 
747  // Store the most recent value of ijBR in m_G_sweep_max.
748  // When the sweep is done, this value will contain the minimum
749  // number of rows of G and H we can apply and safely include all
750  // non-identity rotations that were computed during the
751  // singular value searches.
752  m_GH_sweep_max = ijBR;
753 
754  }
755 
756  // The sweep is complete. Now we must apply the Givens rotations
757  // that were accumulated during the sweep.
758 
759  // Recall that the number of columns of U and V to which we apply
760  // rotations is one more than the number of rotations.
761  n_UV_apply = m_GH_sweep_max + 1;
762 
763 
764 #ifdef PRINTF
765 printf( "FLA_Bsvd_v_opz_var2: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
766 printf( "FLA_Bsvd_v_opz_var2: m_U = %d\n", m_U );
767 printf( "FLA_Bsvd_v_opz_var2: m_V = %d\n", m_V );
768 printf( "FLA_Bsvd_v_opz_var2: napp= %d\n", n_UV_apply );
769 #endif
770 
771  // Apply the Givens rotations. Note that we only apply k sets of
772  // rotations, where k = n_iter_perf_sweep_max. Also note that we only
773  // apply to n_UV_apply columns of U and V since this is the most we
774  // need to touch given the most recent value stored to m_GH_sweep_max.
775  //FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max,
776  FLA_Apply_G_rf_bld_var3b( n_iter_perf_sweep_max,
777  //FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max,
778  //FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max,
779  min_m_n,
780  n_UV_apply,
781  n_iter_prev,
782  buff_G, rs_G, cs_G,
783  buff_RG, rs_RG, cs_RG,
784  b_alg );
785  //FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max,
786  FLA_Apply_G_rf_bld_var3b( n_iter_perf_sweep_max,
787  //FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max,
788  //FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max,
789  min_m_n,
790  n_UV_apply,
791  n_iter_prev,
792  buff_H, rs_H, cs_H,
793  buff_RH, rs_RH, cs_RH,
794  b_alg );
795 
796  // Increment the total number of iterations previously performed.
797  n_iter_prev += n_iter_perf_sweep_max;
798 
799 #ifdef PRINTF
800 printf( "FLA_Bsvd_v_opz_var2: total number of iterations performed: %d\n", n_iter_prev );
801 #endif
802  }
803 
804  // Copy the contents of Q to temporary storage.
806  m_U,
807  m_V,
808  buff_U, rs_U, cs_U,
809  buff_W, rs_W, cs_W );
810 // W needs to be max_m_n-by-min_m_n!!!!!!!!!!!!!!!
811 
812  // Multiply U by R, overwriting U.
815  2*m_U,
816  m_V,
817  m_V,
818  &rone,
819  ( double* )buff_W, rs_W, 2*cs_W,
820  buff_RG, rs_RG, cs_RG,
821  &rzero,
822  ( double* )buff_U, rs_U, 2*cs_U );
823 
825  m_V,
826  m_V,
827  buff_V, rs_V, cs_V,
828  buff_W, rs_W, cs_W );
829 
830  // Multiply V by R, overwriting V.
833  2*m_V,
834  m_V,
835  m_V,
836  &rone,
837  ( double* )buff_W, rs_W, 2*cs_W,
838  buff_RH, rs_RH, cs_RH,
839  &rzero,
840  ( double* )buff_V, rs_V, 2*cs_V );
841 
842  // Make all the singular values positive.
843  {
844  int i;
845  double minus_one = bl1_dm1();
846 
847  for ( i = 0; i < min_m_n; ++i )
848  {
849  if ( buff_d[ (i )*inc_d ] < rzero )
850  {
851  buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ];
852 
853  // Scale the right singular vectors.
855  m_V,
856  &minus_one,
857  buff_V + (i )*cs_V, rs_V );
858  }
859  }
860  }
861 
862  return n_iter_prev;
863 }
void bl1_zcopymt(trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition: bl1_copymt.c:286
void bl1_zdscalv(conj1_t conj, int n, double *alpha, dcomplex *x, int incx)
Definition: bl1_scalv.c:61

References bl1_d0(), bl1_d1(), bl1_dgemm(), bl1_dident(), bl1_dm1(), bl1_z1(), bl1_zcopymt(), bl1_zdscalv(), bl1_zsetm(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_Apply_G_rf_bld_var3b(), FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_find_submatrix_opd(), FLA_Bsvd_iteracc_v_opd_var1(), FLA_Mach_params_opd(), and i.

Referenced by FLA_Bsvd_v_opt_var2().