libflame  revision_anchor
Functions
FLA_Bsvd_v_opt_var1.c File Reference

(r)

Functions

FLA_Error FLA_Bsvd_v_opt_var1 (dim_t n_iter_max, FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj H, FLA_Obj U, FLA_Obj V, dim_t b_alg)
 
FLA_Error FLA_Bsvd_v_ops_var1 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, float *buff_U, int rs_U, int cs_U, float *buff_V, int rs_V, int cs_V, int b_alg)
 
FLA_Error FLA_Bsvd_v_opd_var1 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, double *buff_U, int rs_U, int cs_U, double *buff_V, int rs_V, int cs_V, int b_alg)
 
FLA_Error FLA_Bsvd_v_opc_var1 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_V, int rs_V, int cs_V, int b_alg)
 
FLA_Error FLA_Bsvd_v_opz_var1 (int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_V, int rs_V, int cs_V, int b_alg)
 

Function Documentation

◆ FLA_Bsvd_v_opc_var1()

FLA_Error FLA_Bsvd_v_opc_var1 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_H,
int  rs_H,
int  cs_H,
scomplex buff_U,
int  rs_U,
int  cs_U,
scomplex buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)
686 {
687  scomplex one = bl1_c1();
688  float rzero = bl1_s0();
689 
690  int maxitr = 6;
691 
692  float eps;
693  float tolmul;
694  float tol;
695  float thresh;
696 
697  scomplex* G;
698  scomplex* H;
699  float* d1;
700  float* e1;
701  int r_val;
702  int done;
703  int m_GH_sweep_max;
704  int ij_begin;
705  int ijTL, ijBR;
706  int m_A11;
707  int n_iter_perf;
708  int n_UV_apply;
709  int total_deflations;
710  int n_deflations;
711  int n_iter_prev;
712  int n_iter_perf_sweep_max;
713 
714  // Compute some convergence constants.
715  eps = FLA_Mach_params_ops( FLA_MACH_EPS );
716  tolmul = max( 10.0F, min( 100.0F, powf( eps, -0.125F ) ) );
718  tolmul,
719  maxitr,
720  buff_d, inc_d,
721  buff_e, inc_e,
722  &tol,
723  &thresh );
724 
725  // Initialize our completion flag.
726  done = FALSE;
727 
728  // Initialize a counter that holds the maximum number of rows of G
729  // and H that we would need to initialize for the next sweep.
730  m_GH_sweep_max = min_m_n - 1;
731 
732  // Initialize a counter for the total number of iterations performed.
733  n_iter_prev = 0;
734 
735  // Iterate until the matrix has completely deflated.
736  for ( total_deflations = 0; done != TRUE; )
737  {
738 
739  // Initialize G and H to contain only identity rotations.
740  bl1_csetm( m_GH_sweep_max,
741  n_GH,
742  &one,
743  buff_G, rs_G, cs_G );
744  bl1_csetm( m_GH_sweep_max,
745  n_GH,
746  &one,
747  buff_H, rs_H, cs_H );
748 
749  // Keep track of the maximum number of iterations performed in the
750  // current sweep. This is used when applying the sweep's Givens
751  // rotations.
752  n_iter_perf_sweep_max = 0;
753 
754  // Perform a sweep: Move through the matrix and perform a bidiagonal
755  // SVD on each non-zero submatrix that is encountered. During the
756  // first time through, ijTL will be 0 and ijBR will be min_m_n - 1.
757  for ( ij_begin = 0; ij_begin < min_m_n; )
758  {
759  // Search for the first submatrix along the diagonal that is
760  // bounded by zeroes (or endpoints of the matrix). If no
761  // submatrix is found (ie: if the entire superdiagonal is zero
762  // then FLA_FAILURE is returned. This function also inspects
763  // superdiagonal elements for proximity to zero. If a given
764  // element is close enough to zero, then it is deemed
765  // converged and manually set to zero.
766  r_val = FLA_Bsvd_find_submatrix_ops( min_m_n,
767  ij_begin,
768  buff_d, inc_d,
769  buff_e, inc_e,
770  &ijTL,
771  &ijBR );
772 
773  // Verify that a submatrix was found. If one was not found,
774  // then we are done with the current sweep. Furthermore, if
775  // a submatrix was not found AND we began our search at the
776  // beginning of the matrix (ie: ij_begin == 0), then the
777  // matrix has completely deflated and so we are done with
778  // Francis step iteration.
779  if ( r_val == FLA_FAILURE )
780  {
781  if ( ij_begin == 0 )
782  {
783  done = TRUE;
784  }
785 
786  // Break out of the current sweep so we can apply the last
787  // remaining Givens rotations.
788  break;
789  }
790 
791  // If we got this far, then:
792  // (a) ijTL refers to the index of the first non-zero
793  // superdiagonal along the diagonal, and
794  // (b) ijBR refers to either:
795  // - the first zero element that occurs after ijTL, or
796  // - the the last diagonal element.
797  // Note that ijTL and ijBR also correspond to the first and
798  // last diagonal elements of the submatrix of interest. Thus,
799  // we may compute the dimension of this submatrix as:
800  m_A11 = ijBR - ijTL + 1;
801 
802  // Adjust ij_begin, which gets us ready for the next submatrix
803  // search in the current sweep.
804  ij_begin = ijBR + 1;
805 
806  // Index to the submatrices upon which we will operate.
807  d1 = buff_d + ijTL * inc_d;
808  e1 = buff_e + ijTL * inc_e;
809  G = buff_G + ijTL * rs_G;
810  H = buff_H + ijTL * rs_H;
811 
812  // Search for a batch of singular values, recursing on deflated
813  // subproblems whenever a split occurs. Iteration continues as
814  // long as
815  // (a) there is still matrix left to operate on, and
816  // (b) the number of iterations performed in this batch is
817  // less than n_G.
818  // If/when either of the two above conditions fails to hold,
819  // the function returns.
820  n_deflations = FLA_Bsvd_iteracc_v_ops_var1( m_A11,
821  n_GH,
822  ijTL,
823  tol,
824  thresh,
825  d1, inc_d,
826  e1, inc_e,
827  G, rs_G, cs_G,
828  H, rs_H, cs_H,
829  &n_iter_perf );
830 
831  // Record the number of deflations that were observed.
832  total_deflations += n_deflations;
833 
834  // Update the maximum number of iterations performed in the
835  // current sweep.
836  n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );
837 
838  // Store the most recent value of ijBR in m_G_sweep_max.
839  // When the sweep is done, this value will contain the minimum
840  // number of rows of G and H we can apply and safely include all
841  // non-identity rotations that were computed during the
842  // singular value searches.
843  m_GH_sweep_max = ijBR;
844 
845  // Make sure we haven't exceeded our maximum iteration count.
846  if ( n_iter_prev >= n_iter_max * min_m_n )
847  {
848  FLA_Abort();
849  //return FLA_FAILURE;
850  }
851  }
852 
853  // The sweep is complete. Now we must apply the Givens rotations
854  // that were accumulated during the sweep.
855 
856  // Recall that the number of columns of U and V to which we apply
857  // rotations is one more than the number of rotations.
858  n_UV_apply = m_GH_sweep_max + 1;
859 
860  // Apply the Givens rotations. Note that we only apply k sets of
861  // rotations, where k = n_iter_perf_sweep_max. Also note that we only
862  // apply to n_UV_apply columns of U and V since this is the most we
863  // need to touch given the most recent value stored to m_GH_sweep_max.
864  //FLA_Apply_G_rf_blc_var5( n_iter_perf_sweep_max,
865  FLA_Apply_G_rf_blc_var3( n_iter_perf_sweep_max,
866  //FLA_Apply_G_rf_blz_var9( n_iter_perf_sweep_max,
867  //FLA_Apply_G_rf_blz_var6( n_iter_perf_sweep_max,
868  m_U,
869  n_UV_apply,
870  buff_G, rs_G, cs_G,
871  buff_U, rs_U, cs_U,
872  b_alg );
873  //FLA_Apply_G_rf_blc_var5( n_iter_perf_sweep_max,
874  FLA_Apply_G_rf_blc_var3( n_iter_perf_sweep_max,
875  //FLA_Apply_G_rf_blz_var9( n_iter_perf_sweep_max,
876  //FLA_Apply_G_rf_blz_var6( n_iter_perf_sweep_max,
877  m_V,
878  n_UV_apply,
879  buff_H, rs_H, cs_H,
880  buff_V, rs_V, cs_V,
881  b_alg );
882 
883  // Increment the total number of iterations previously performed.
884  n_iter_prev += n_iter_perf_sweep_max;
885  }
886 
887  // Make all the singular values positive.
888  {
889  int i;
890  float minus_one = bl1_sm1();
891 
892  for ( i = 0; i < min_m_n; ++i )
893  {
894  if ( buff_d[ (i )*inc_d ] < rzero )
895  {
896  buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ];
897 
898  // Scale the right singular vectors.
900  m_V,
901  &minus_one,
902  buff_V + (i )*cs_V, rs_V );
903  }
904  }
905  }
906 
907  return FLA_SUCCESS;
908 }
FLA_Error FLA_Apply_G_rf_blc_var3(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var3.c:157
FLA_Error FLA_Bsvd_compute_tol_thresh_ops(int n_A, float tolmul, float maxitr, float *buff_d, int inc_d, float *buff_e, int inc_e, float *tol, float *thresh)
Definition: FLA_Bsvd_compute_tol_thresh.c:78
FLA_Error FLA_Bsvd_find_submatrix_ops(int mn_A, int ij_begin, float *buff_d, int inc_d, float *buff_e, int inc_e, int *ijTL, int *ijBR)
Definition: FLA_Bsvd_find_submatrix.c:14
FLA_Error FLA_Bsvd_iteracc_v_ops_var1(int m_A, int n_GH, int ijTL, float tol, float thresh, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, int *n_iter_perf)
Definition: FLA_Bsvd_iteracc_v_opt_var1.c:13
void FLA_Abort(void)
Definition: FLA_Error.c:248
float FLA_Mach_params_ops(FLA_Machval machval)
Definition: FLA_Mach_params.c:47
int i
Definition: bl1_axmyv2.c:145
void bl1_csscalv(conj1_t conj, int n, float *alpha, scomplex *x, int incx)
Definition: bl1_scalv.c:35
float bl1_s0(void)
Definition: bl1_constants.c:111
scomplex bl1_c1(void)
Definition: bl1_constants.c:61
float bl1_sm1(void)
Definition: bl1_constants.c:175
void bl1_csetm(int m, int n, scomplex *sigma, scomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:61
@ BLIS1_NO_CONJUGATE
Definition: blis_type_defs.h:81
Definition: blis_type_defs.h:133

References bl1_c1(), bl1_csetm(), bl1_csscalv(), bl1_s0(), bl1_sm1(), BLIS1_NO_CONJUGATE, FLA_Abort(), FLA_Apply_G_rf_blc_var3(), FLA_Bsvd_compute_tol_thresh_ops(), FLA_Bsvd_find_submatrix_ops(), FLA_Bsvd_iteracc_v_ops_var1(), FLA_Mach_params_ops(), and i.

Referenced by FLA_Bsvd_v_opt_var1().

◆ FLA_Bsvd_v_opd_var1()

FLA_Error FLA_Bsvd_v_opd_var1 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_H,
int  rs_H,
int  cs_H,
double *  buff_U,
int  rs_U,
int  cs_U,
double *  buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)
409 {
410  dcomplex one = bl1_z1();
411  double rzero = bl1_d0();
412 
413  int maxitr = 6;
414 
415  double eps;
416  double tolmul;
417  double tol;
418  double thresh;
419 
420  dcomplex* G;
421  dcomplex* H;
422  double* d1;
423  double* e1;
424  int r_val;
425  int done;
426  int m_GH_sweep_max;
427  int ij_begin;
428  int ijTL, ijBR;
429  int m_A11;
430  int n_iter_perf;
431  int n_UV_apply;
432  int total_deflations;
433  int n_deflations;
434  int n_iter_prev;
435  int n_iter_perf_sweep_max;
436 
437  // Compute some convergence constants.
438  eps = FLA_Mach_params_opd( FLA_MACH_EPS );
439  tolmul = max( 10.0, min( 100.0, pow( eps, -0.125 ) ) );
441  tolmul,
442  maxitr,
443  buff_d, inc_d,
444  buff_e, inc_e,
445  &tol,
446  &thresh );
447 #ifdef PRINTF
448  printf( "FLA_Bsvd_v_opd_var1: tolmul = %12.6e\n", tolmul );
449  printf( "FLA_Bsvd_v_opd_var1: tol = %12.6e\n", tol );
450  printf( "FLA_Bsvd_v_opd_var1: thresh = %12.6e\n", thresh );
451 #endif
452 
453  // Initialize our completion flag.
454  done = FALSE;
455 
456  // Initialize a counter that holds the maximum number of rows of G
457  // and H that we would need to initialize for the next sweep.
458  m_GH_sweep_max = min_m_n - 1;
459 
460  // Initialize a counter for the total number of iterations performed.
461  n_iter_prev = 0;
462 
463  // Iterate until the matrix has completely deflated.
464  for ( total_deflations = 0; done != TRUE; )
465  {
466 
467  // Initialize G and H to contain only identity rotations.
468  bl1_zsetm( m_GH_sweep_max,
469  n_GH,
470  &one,
471  buff_G, rs_G, cs_G );
472  bl1_zsetm( m_GH_sweep_max,
473  n_GH,
474  &one,
475  buff_H, rs_H, cs_H );
476 
477  // Keep track of the maximum number of iterations performed in the
478  // current sweep. This is used when applying the sweep's Givens
479  // rotations.
480  n_iter_perf_sweep_max = 0;
481 
482  // Perform a sweep: Move through the matrix and perform a bidiagonal
483  // SVD on each non-zero submatrix that is encountered. During the
484  // first time through, ijTL will be 0 and ijBR will be min_m_n - 1.
485  for ( ij_begin = 0; ij_begin < min_m_n; )
486  {
487 
488 #ifdef PRINTF
489  if ( ij_begin == 0 )
490  printf( "FLA_Bsvd_v_opd_var1: beginning new sweep (ij_begin = %d)\n", ij_begin );
491 #endif
492 
493  // Search for the first submatrix along the diagonal that is
494  // bounded by zeroes (or endpoints of the matrix). If no
495  // submatrix is found (ie: if the entire superdiagonal is zero
496  // then FLA_FAILURE is returned. This function also inspects
497  // superdiagonal elements for proximity to zero. If a given
498  // element is close enough to zero, then it is deemed
499  // converged and manually set to zero.
500  r_val = FLA_Bsvd_find_submatrix_opd( min_m_n,
501  ij_begin,
502  buff_d, inc_d,
503  buff_e, inc_e,
504  &ijTL,
505  &ijBR );
506 
507  // Verify that a submatrix was found. If one was not found,
508  // then we are done with the current sweep. Furthermore, if
509  // a submatrix was not found AND we began our search at the
510  // beginning of the matrix (ie: ij_begin == 0), then the
511  // matrix has completely deflated and so we are done with
512  // Francis step iteration.
513  if ( r_val == FLA_FAILURE )
514  {
515  if ( ij_begin == 0 )
516  {
517 #ifdef PRINTF
518  printf( "FLA_Bsvd_v_opd_var1: superdiagonal is completely zero.\n" );
519  printf( "FLA_Bsvd_v_opd_var1: Francis iteration is done!\n" );
520 #endif
521  done = TRUE;
522  }
523 
524  // Break out of the current sweep so we can apply the last
525  // remaining Givens rotations.
526  break;
527  }
528 
529  // If we got this far, then:
530  // (a) ijTL refers to the index of the first non-zero
531  // superdiagonal along the diagonal, and
532  // (b) ijBR refers to either:
533  // - the first zero element that occurs after ijTL, or
534  // - the the last diagonal element.
535  // Note that ijTL and ijBR also correspond to the first and
536  // last diagonal elements of the submatrix of interest. Thus,
537  // we may compute the dimension of this submatrix as:
538  m_A11 = ijBR - ijTL + 1;
539 
540 #ifdef PRINTF
541  printf( "FLA_Bsvd_v_opd_var1: ij_begin = %d\n", ij_begin );
542  printf( "FLA_Bsvd_v_opd_var1: ijTL = %d\n", ijTL );
543  printf( "FLA_Bsvd_v_opd_var1: ijBR = %d\n", ijBR );
544  printf( "FLA_Bsvd_v_opd_var1: m_A11 = %d\n", m_A11 );
545 #endif
546 
547  // Adjust ij_begin, which gets us ready for the next submatrix
548  // search in the current sweep.
549  ij_begin = ijBR + 1;
550 
551  // Index to the submatrices upon which we will operate.
552  d1 = buff_d + ijTL * inc_d;
553  e1 = buff_e + ijTL * inc_e;
554  G = buff_G + ijTL * rs_G;
555  H = buff_H + ijTL * rs_H;
556 
557  // Search for a batch of singular values, recursing on deflated
558  // subproblems whenever a split occurs. Iteration continues as
559  // long as
560  // (a) there is still matrix left to operate on, and
561  // (b) the number of iterations performed in this batch is
562  // less than n_G.
563  // If/when either of the two above conditions fails to hold,
564  // the function returns.
565  n_deflations = FLA_Bsvd_iteracc_v_opd_var1( m_A11,
566  n_GH,
567  ijTL,
568  tol,
569  thresh,
570  d1, inc_d,
571  e1, inc_e,
572  G, rs_G, cs_G,
573  H, rs_H, cs_H,
574  &n_iter_perf );
575 
576  // Record the number of deflations that were observed.
577  total_deflations += n_deflations;
578 
579  // Update the maximum number of iterations performed in the
580  // current sweep.
581  n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );
582 
583 #ifdef PRINTF
584  printf( "FLA_Bsvd_v_opd_var1: deflations observed = %d\n", n_deflations );
585  printf( "FLA_Bsvd_v_opd_var1: total deflations observed = %d\n", total_deflations );
586  printf( "FLA_Bsvd_v_opd_var1: num iterations performed = %d\n", n_iter_perf );
587 #endif
588 
589  // Store the most recent value of ijBR in m_G_sweep_max.
590  // When the sweep is done, this value will contain the minimum
591  // number of rows of G and H we can apply and safely include all
592  // non-identity rotations that were computed during the
593  // singular value searches.
594  m_GH_sweep_max = ijBR;
595 
596  // Make sure we haven't exceeded our maximum iteration count.
597  if ( n_iter_prev >= n_iter_max * min_m_n )
598  {
599 #ifdef PRINTF
600  printf( "FLA_Bsvd_v_opd_var1: reached maximum total number of iterations: %d\n", n_iter_prev );
601 #endif
602  FLA_Abort();
603  //return FLA_FAILURE;
604  }
605  }
606 
607  // The sweep is complete. Now we must apply the Givens rotations
608  // that were accumulated during the sweep.
609 
610  // Recall that the number of columns of U and V to which we apply
611  // rotations is one more than the number of rotations.
612  n_UV_apply = m_GH_sweep_max + 1;
613 
614 #ifdef PRINTF
615  printf( "FLA_Bsvd_v_opd_var1: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
616  printf( "FLA_Bsvd_v_opd_var1: m_U = %d\n", m_U );
617  printf( "FLA_Bsvd_v_opd_var1: m_V = %d\n", m_V );
618  printf( "FLA_Bsvd_v_opd_var1: napp= %d\n", n_UV_apply );
619 #endif
620  // Apply the Givens rotations. Note that we only apply k sets of
621  // rotations, where k = n_iter_perf_sweep_max. Also note that we only
622  // apply to n_UV_apply columns of U and V since this is the most we
623  // need to touch given the most recent value stored to m_GH_sweep_max.
624  //FLA_Apply_G_rf_bld_var5( n_iter_perf_sweep_max,
625  FLA_Apply_G_rf_bld_var3( n_iter_perf_sweep_max,
626  //FLA_Apply_G_rf_bld_var9( n_iter_perf_sweep_max,
627  //FLA_Apply_G_rf_bld_var6( n_iter_perf_sweep_max,
628  m_U,
629  n_UV_apply,
630  buff_G, rs_G, cs_G,
631  buff_U, rs_U, cs_U,
632  b_alg );
633  //FLA_Apply_G_rf_bld_var5( n_iter_perf_sweep_max,
634  FLA_Apply_G_rf_bld_var3( n_iter_perf_sweep_max,
635  //FLA_Apply_G_rf_bld_var9( n_iter_perf_sweep_max,
636  //FLA_Apply_G_rf_bld_var6( n_iter_perf_sweep_max,
637  m_V,
638  n_UV_apply,
639  buff_H, rs_H, cs_H,
640  buff_V, rs_V, cs_V,
641  b_alg );
642 
643  // Increment the total number of iterations previously performed.
644  n_iter_prev += n_iter_perf_sweep_max;
645 
646 #ifdef PRINTF
647  printf( "FLA_Bsvd_v_opd_var1: total number of iterations performed: %d\n", n_iter_prev );
648 #endif
649  }
650 
651  // Make all the singular values positive.
652  {
653  int i;
654  double minus_one = bl1_dm1();
655 
656  for ( i = 0; i < min_m_n; ++i )
657  {
658  if ( buff_d[ (i )*inc_d ] < rzero )
659  {
660  buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ];
661 
662  // Scale the right singular vectors.
664  m_V,
665  &minus_one,
666  buff_V + (i )*cs_V, rs_V );
667  }
668  }
669  }
670 
671  return n_iter_prev;
672 }
FLA_Error FLA_Apply_G_rf_bld_var3(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var3.c:128
FLA_Error FLA_Bsvd_compute_tol_thresh_opd(int n_A, double tolmul, double maxitr, double *buff_d, int inc_d, double *buff_e, int inc_e, double *tol, double *thresh)
Definition: FLA_Bsvd_compute_tol_thresh.c:137
FLA_Error FLA_Bsvd_find_submatrix_opd(int mn_A, int ij_begin, double *buff_d, int inc_d, double *buff_e, int inc_e, int *ijTL, int *ijBR)
Definition: FLA_Bsvd_find_submatrix.c:73
FLA_Error FLA_Bsvd_iteracc_v_opd_var1(int m_A, int n_GH, int ijTL, double tol, double thresh, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, int *n_iter_perf)
Definition: FLA_Bsvd_iteracc_v_opt_var1.c:176
double FLA_Mach_params_opd(FLA_Machval machval)
Definition: FLA_Mach_params.c:74
void bl1_dscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition: bl1_scalv.c:24
double bl1_dm1(void)
Definition: bl1_constants.c:182
dcomplex bl1_z1(void)
Definition: bl1_constants.c:69
double bl1_d0(void)
Definition: bl1_constants.c:118
void bl1_zsetm(int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:78
Definition: blis_type_defs.h:138

References bl1_d0(), bl1_dm1(), bl1_dscalv(), bl1_z1(), bl1_zsetm(), BLIS1_NO_CONJUGATE, FLA_Abort(), FLA_Apply_G_rf_bld_var3(), FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_find_submatrix_opd(), FLA_Bsvd_iteracc_v_opd_var1(), FLA_Mach_params_opd(), and i.

Referenced by FLA_Bsvd_v_opt_var1().

◆ FLA_Bsvd_v_ops_var1()

FLA_Error FLA_Bsvd_v_ops_var1 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_H,
int  rs_H,
int  cs_H,
float *  buff_U,
int  rs_U,
int  cs_U,
float *  buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)
169 {
170  scomplex one = bl1_c1();
171  float rzero = bl1_s0();
172 
173  int maxitr = 6;
174 
175  float eps;
176  float tolmul;
177  float tol;
178  float thresh;
179 
180  scomplex* G;
181  scomplex* H;
182  float* d1;
183  float* e1;
184  int r_val;
185  int done;
186  int m_GH_sweep_max;
187  int ij_begin;
188  int ijTL, ijBR;
189  int m_A11;
190  int n_iter_perf;
191  int n_UV_apply;
192  int total_deflations;
193  int n_deflations;
194  int n_iter_prev;
195  int n_iter_perf_sweep_max;
196 
197  // Compute some convergence constants.
198  eps = FLA_Mach_params_ops( FLA_MACH_EPS );
199  tolmul = max( 10.0F, min( 100.0F, powf( eps, -0.125F ) ) );
201  tolmul,
202  maxitr,
203  buff_d, inc_d,
204  buff_e, inc_e,
205  &tol,
206  &thresh );
207 
208  // Initialize our completion flag.
209  done = FALSE;
210 
211  // Initialize a counter that holds the maximum number of rows of G
212  // and H that we would need to initialize for the next sweep.
213  m_GH_sweep_max = min_m_n - 1;
214 
215  // Initialize a counter for the total number of iterations performed.
216  n_iter_prev = 0;
217 
218  // Iterate until the matrix has completely deflated.
219  for ( total_deflations = 0; done != TRUE; )
220  {
221 
222  // Initialize G and H to contain only identity rotations.
223  bl1_csetm( m_GH_sweep_max,
224  n_GH,
225  &one,
226  buff_G, rs_G, cs_G );
227  bl1_csetm( m_GH_sweep_max,
228  n_GH,
229  &one,
230  buff_H, rs_H, cs_H );
231 
232  // Keep track of the maximum number of iterations performed in the
233  // current sweep. This is used when applying the sweep's Givens
234  // rotations.
235  n_iter_perf_sweep_max = 0;
236 
237  // Perform a sweep: Move through the matrix and perform a bidiagonal
238  // SVD on each non-zero submatrix that is encountered. During the
239  // first time through, ijTL will be 0 and ijBR will be min_m_n - 1.
240  for ( ij_begin = 0; ij_begin < min_m_n; )
241  {
242 
243  // Search for the first submatrix along the diagonal that is
244  // bounded by zeroes (or endpoints of the matrix). If no
245  // submatrix is found (ie: if the entire superdiagonal is zero
246  // then FLA_FAILURE is returned. This function also inspects
247  // superdiagonal elements for proximity to zero. If a given
248  // element is close enough to zero, then it is deemed
249  // converged and manually set to zero.
250  r_val = FLA_Bsvd_find_submatrix_ops( min_m_n,
251  ij_begin,
252  buff_d, inc_d,
253  buff_e, inc_e,
254  &ijTL,
255  &ijBR );
256 
257  // Verify that a submatrix was found. If one was not found,
258  // then we are done with the current sweep. Furthermore, if
259  // a submatrix was not found AND we began our search at the
260  // beginning of the matrix (ie: ij_begin == 0), then the
261  // matrix has completely deflated and so we are done with
262  // Francis step iteration.
263  if ( r_val == FLA_FAILURE )
264  {
265  if ( ij_begin == 0 )
266  {
267  done = TRUE;
268  }
269 
270  // Break out of the current sweep so we can apply the last
271  // remaining Givens rotations.
272  break;
273  }
274 
275  // If we got this far, then:
276  // (a) ijTL refers to the index of the first non-zero
277  // superdiagonal along the diagonal, and
278  // (b) ijBR refers to either:
279  // - the first zero element that occurs after ijTL, or
280  // - the the last diagonal element.
281  // Note that ijTL and ijBR also correspond to the first and
282  // last diagonal elements of the submatrix of interest. Thus,
283  // we may compute the dimension of this submatrix as:
284  m_A11 = ijBR - ijTL + 1;
285 
286  // Adjust ij_begin, which gets us ready for the next submatrix
287  // search in the current sweep.
288  ij_begin = ijBR + 1;
289 
290  // Index to the submatrices upon which we will operate.
291  d1 = buff_d + ijTL * inc_d;
292  e1 = buff_e + ijTL * inc_e;
293  G = buff_G + ijTL * rs_G;
294  H = buff_H + ijTL * rs_H;
295 
296  // Search for a batch of singular values, recursing on deflated
297  // subproblems whenever a split occurs. Iteration continues as
298  // long as
299  // (a) there is still matrix left to operate on, and
300  // (b) the number of iterations performed in this batch is
301  // less than n_G.
302  // If/when either of the two above conditions fails to hold,
303  // the function returns.
304  n_deflations = FLA_Bsvd_iteracc_v_ops_var1( m_A11,
305  n_GH,
306  ijTL,
307  tol,
308  thresh,
309  d1, inc_d,
310  e1, inc_e,
311  G, rs_G, cs_G,
312  H, rs_H, cs_H,
313  &n_iter_perf );
314 
315  // Record the number of deflations that were observed.
316  total_deflations += n_deflations;
317 
318  // Update the maximum number of iterations performed in the
319  // current sweep.
320  n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );
321 
322  // Store the most recent value of ijBR in m_G_sweep_max.
323  // When the sweep is done, this value will contain the minimum
324  // number of rows of G and H we can apply and safely include all
325  // non-identity rotations that were computed during the
326  // singular value searches.
327  m_GH_sweep_max = ijBR;
328 
329  // Make sure we haven't exceeded our maximum iteration count.
330  if ( n_iter_prev >= n_iter_max * min_m_n )
331  {
332  FLA_Abort();
333  //return FLA_FAILURE;
334  }
335  }
336 
337  // The sweep is complete. Now we must apply the Givens rotations
338  // that were accumulated during the sweep.
339 
340  // Recall that the number of columns of U and V to which we apply
341  // rotations is one more than the number of rotations.
342  n_UV_apply = m_GH_sweep_max + 1;
343 
344  // Apply the Givens rotations. Note that we only apply k sets of
345  // rotations, where k = n_iter_perf_sweep_max. Also note that we only
346  // apply to n_UV_apply columns of U and V since this is the most we
347  // need to touch given the most recent value stored to m_GH_sweep_max.
348  //FLA_Apply_G_rf_bls_var5( n_iter_perf_sweep_max,
349  FLA_Apply_G_rf_bls_var3( n_iter_perf_sweep_max,
350  //FLA_Apply_G_rf_bld_var9( n_iter_perf_sweep_max,
351  //FLA_Apply_G_rf_bld_var6( n_iter_perf_sweep_max,
352  m_U,
353  n_UV_apply,
354  buff_G, rs_G, cs_G,
355  buff_U, rs_U, cs_U,
356  b_alg );
357  //FLA_Apply_G_rf_blc_var5( n_iter_perf_sweep_max,
358  FLA_Apply_G_rf_bls_var3( n_iter_perf_sweep_max,
359  //FLA_Apply_G_rf_bld_var9( n_iter_perf_sweep_max,
360  //FLA_Apply_G_rf_bld_var6( n_iter_perf_sweep_max,
361  m_V,
362  n_UV_apply,
363  buff_H, rs_H, cs_H,
364  buff_V, rs_V, cs_V,
365  b_alg );
366 
367  // Increment the total number of iterations previously performed.
368  n_iter_prev += n_iter_perf_sweep_max;
369 
370  }
371 
372  // Make all the singular values positive.
373  {
374  int i;
375  float minus_one = bl1_sm1();
376 
377  for ( i = 0; i < min_m_n; ++i )
378  {
379  if ( buff_d[ (i )*inc_d ] < rzero )
380  {
381  buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ];
382 
383  // Scale the right singular vectors.
385  m_V,
386  &minus_one,
387  buff_V + (i )*cs_V, rs_V );
388  }
389  }
390  }
391 
392  return n_iter_prev;
393 }
FLA_Error FLA_Apply_G_rf_bls_var3(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var3.c:99
void bl1_sscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition: bl1_scalv.c:13

References bl1_c1(), bl1_csetm(), bl1_s0(), bl1_sm1(), bl1_sscalv(), BLIS1_NO_CONJUGATE, FLA_Abort(), FLA_Apply_G_rf_bls_var3(), FLA_Bsvd_compute_tol_thresh_ops(), FLA_Bsvd_find_submatrix_ops(), FLA_Bsvd_iteracc_v_ops_var1(), FLA_Mach_params_ops(), and i.

Referenced by FLA_Bsvd_v_opt_var1().

◆ FLA_Bsvd_v_opt_var1()

FLA_Error FLA_Bsvd_v_opt_var1 ( dim_t  n_iter_max,
FLA_Obj  d,
FLA_Obj  e,
FLA_Obj  G,
FLA_Obj  H,
FLA_Obj  U,
FLA_Obj  V,
dim_t  b_alg 
)
16 {
17  FLA_Error r_val = FLA_SUCCESS;
18  FLA_Datatype datatype;
19  int m_U, m_V, n_GH;
20  int inc_d;
21  int inc_e;
22  int rs_G, cs_G;
23  int rs_H, cs_H;
24  int rs_U, cs_U;
25  int rs_V, cs_V;
26 
27  datatype = FLA_Obj_datatype( U );
28 
29  m_U = FLA_Obj_length( U );
30  m_V = FLA_Obj_length( V );
31  n_GH = FLA_Obj_width( G );
32 
33  inc_d = FLA_Obj_vector_inc( d );
34  inc_e = FLA_Obj_vector_inc( e );
35 
36  rs_G = FLA_Obj_row_stride( G );
37  cs_G = FLA_Obj_col_stride( G );
38 
39  rs_H = FLA_Obj_row_stride( H );
40  cs_H = FLA_Obj_col_stride( H );
41 
42  rs_U = FLA_Obj_row_stride( U );
43  cs_U = FLA_Obj_col_stride( U );
44 
45  rs_V = FLA_Obj_row_stride( V );
46  cs_V = FLA_Obj_col_stride( V );
47 
48 
49  switch ( datatype )
50  {
51  case FLA_FLOAT:
52  {
53  float* buff_d = FLA_FLOAT_PTR( d );
54  float* buff_e = FLA_FLOAT_PTR( e );
55  scomplex* buff_G = FLA_COMPLEX_PTR( G );
56  scomplex* buff_H = FLA_COMPLEX_PTR( H );
57  float* buff_U = FLA_FLOAT_PTR( U );
58  float* buff_V = FLA_FLOAT_PTR( V );
59 
60  r_val = FLA_Bsvd_v_ops_var1( min( m_U, m_V ),
61  m_U,
62  m_V,
63  n_GH,
64  n_iter_max,
65  buff_d, inc_d,
66  buff_e, inc_e,
67  buff_G, rs_G, cs_G,
68  buff_H, rs_H, cs_H,
69  buff_U, rs_U, cs_U,
70  buff_V, rs_V, cs_V,
71  b_alg );
72 
73  break;
74  }
75 
76  case FLA_DOUBLE:
77  {
78  double* buff_d = FLA_DOUBLE_PTR( d );
79  double* buff_e = FLA_DOUBLE_PTR( e );
80  dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
81  dcomplex* buff_H = FLA_DOUBLE_COMPLEX_PTR( H );
82  double* buff_U = FLA_DOUBLE_PTR( U );
83  double* buff_V = FLA_DOUBLE_PTR( V );
84 
85  r_val = FLA_Bsvd_v_opd_var1( min( m_U, m_V ),
86  m_U,
87  m_V,
88  n_GH,
89  n_iter_max,
90  buff_d, inc_d,
91  buff_e, inc_e,
92  buff_G, rs_G, cs_G,
93  buff_H, rs_H, cs_H,
94  buff_U, rs_U, cs_U,
95  buff_V, rs_V, cs_V,
96  b_alg );
97 
98  break;
99  }
100 
101  case FLA_COMPLEX:
102  {
103  float* buff_d = FLA_FLOAT_PTR( d );
104  float* buff_e = FLA_FLOAT_PTR( e );
105  scomplex* buff_G = FLA_COMPLEX_PTR( G );
106  scomplex* buff_H = FLA_COMPLEX_PTR( H );
107  scomplex* buff_U = FLA_COMPLEX_PTR( U );
108  scomplex* buff_V = FLA_COMPLEX_PTR( V );
109 
110  r_val = FLA_Bsvd_v_opc_var1( min( m_U, m_V ),
111  m_U,
112  m_V,
113  n_GH,
114  n_iter_max,
115  buff_d, inc_d,
116  buff_e, inc_e,
117  buff_G, rs_G, cs_G,
118  buff_H, rs_H, cs_H,
119  buff_U, rs_U, cs_U,
120  buff_V, rs_V, cs_V,
121  b_alg );
122 
123  break;
124  }
125 
126  case FLA_DOUBLE_COMPLEX:
127  {
128  double* buff_d = FLA_DOUBLE_PTR( d );
129  double* buff_e = FLA_DOUBLE_PTR( e );
130  dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
131  dcomplex* buff_H = FLA_DOUBLE_COMPLEX_PTR( H );
132  dcomplex* buff_U = FLA_DOUBLE_COMPLEX_PTR( U );
133  dcomplex* buff_V = FLA_DOUBLE_COMPLEX_PTR( V );
134 
135  r_val = FLA_Bsvd_v_opz_var1( min( m_U, m_V ),
136  m_U,
137  m_V,
138  n_GH,
139  n_iter_max,
140  buff_d, inc_d,
141  buff_e, inc_e,
142  buff_G, rs_G, cs_G,
143  buff_H, rs_H, cs_H,
144  buff_U, rs_U, cs_U,
145  buff_V, rs_V, cs_V,
146  b_alg );
147 
148  break;
149  }
150  }
151 
152  return r_val;
153 }
FLA_Error FLA_Bsvd_v_opd_var1(int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, double *buff_U, int rs_U, int cs_U, double *buff_V, int rs_V, int cs_V, int b_alg)
Definition: FLA_Bsvd_v_opt_var1.c:397
FLA_Error FLA_Bsvd_v_opc_var1(int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_V, int rs_V, int cs_V, int b_alg)
Definition: FLA_Bsvd_v_opt_var1.c:674
FLA_Error FLA_Bsvd_v_opz_var1(int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_H, int rs_H, int cs_H, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_V, int rs_V, int cs_V, int b_alg)
Definition: FLA_Bsvd_v_opt_var1.c:912
FLA_Error FLA_Bsvd_v_ops_var1(int min_m_n, int m_U, int m_V, int n_GH, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_H, int rs_H, int cs_H, float *buff_U, int rs_U, int cs_U, float *buff_V, int rs_V, int cs_V, int b_alg)
Definition: FLA_Bsvd_v_opt_var1.c:157
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition: FLA_Query.c:145
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
int FLA_Error
Definition: FLA_type_defs.h:47
int FLA_Datatype
Definition: FLA_type_defs.h:49

References FLA_Bsvd_v_opc_var1(), FLA_Bsvd_v_opd_var1(), FLA_Bsvd_v_ops_var1(), FLA_Bsvd_v_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_Obj_width().

Referenced by FLA_Svd_uv_unb_var1().

◆ FLA_Bsvd_v_opz_var1()

FLA_Error FLA_Bsvd_v_opz_var1 ( int  min_m_n,
int  m_U,
int  m_V,
int  n_GH,
int  n_iter_max,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_H,
int  rs_H,
int  cs_H,
dcomplex buff_U,
int  rs_U,
int  cs_U,
dcomplex buff_V,
int  rs_V,
int  cs_V,
int  b_alg 
)
924 {
925  dcomplex one = bl1_z1();
926  double rzero = bl1_d0();
927 
928  int maxitr = 6;
929 
930  double eps;
931  double tolmul;
932  double tol;
933  double thresh;
934 
935  dcomplex* G;
936  dcomplex* H;
937  double* d1;
938  double* e1;
939  int r_val;
940  int done;
941  int m_GH_sweep_max;
942  int ij_begin;
943  int ijTL, ijBR;
944  int m_A11;
945  int n_iter_perf;
946  int n_UV_apply;
947  int total_deflations;
948  int n_deflations;
949  int n_iter_prev;
950  int n_iter_perf_sweep_max;
951 
952  // Compute some convergence constants.
953  eps = FLA_Mach_params_opd( FLA_MACH_EPS );
954  tolmul = max( 10.0, min( 100.0, pow( eps, -0.125 ) ) );
956  tolmul,
957  maxitr,
958  buff_d, inc_d,
959  buff_e, inc_e,
960  &tol,
961  &thresh );
962 #ifdef PRINTF
963  printf( "FLA_Bsvd_v_opz_var1: tolmul = %12.6e\n", tolmul );
964  printf( "FLA_Bsvd_v_opz_var1: tol = %12.6e\n", tol );
965  printf( "FLA_Bsvd_v_opz_var1: thresh = %12.6e\n", thresh );
966 #endif
967 
968  // Initialize our completion flag.
969  done = FALSE;
970 
971  // Initialize a counter that holds the maximum number of rows of G
972  // and H that we would need to initialize for the next sweep.
973  m_GH_sweep_max = min_m_n - 1;
974 
975  // Initialize a counter for the total number of iterations performed.
976  n_iter_prev = 0;
977 
978  // Iterate until the matrix has completely deflated.
979  for ( total_deflations = 0; done != TRUE; )
980  {
981 
982  // Initialize G and H to contain only identity rotations.
983  bl1_zsetm( m_GH_sweep_max,
984  n_GH,
985  &one,
986  buff_G, rs_G, cs_G );
987  bl1_zsetm( m_GH_sweep_max,
988  n_GH,
989  &one,
990  buff_H, rs_H, cs_H );
991 
992  // Keep track of the maximum number of iterations performed in the
993  // current sweep. This is used when applying the sweep's Givens
994  // rotations.
995  n_iter_perf_sweep_max = 0;
996 
997  // Perform a sweep: Move through the matrix and perform a bidiagonal
998  // SVD on each non-zero submatrix that is encountered. During the
999  // first time through, ijTL will be 0 and ijBR will be min_m_n - 1.
1000  for ( ij_begin = 0; ij_begin < min_m_n; )
1001  {
1002 
1003 #ifdef PRINTF
1004  if ( ij_begin == 0 )
1005  printf( "FLA_Bsvd_v_opz_var1: beginning new sweep (ij_begin = %d)\n", ij_begin );
1006 #endif
1007 
1008  // Search for the first submatrix along the diagonal that is
1009  // bounded by zeroes (or endpoints of the matrix). If no
1010  // submatrix is found (ie: if the entire superdiagonal is zero
1011  // then FLA_FAILURE is returned. This function also inspects
1012  // superdiagonal elements for proximity to zero. If a given
1013  // element is close enough to zero, then it is deemed
1014  // converged and manually set to zero.
1015  r_val = FLA_Bsvd_find_submatrix_opd( min_m_n,
1016  ij_begin,
1017  buff_d, inc_d,
1018  buff_e, inc_e,
1019  &ijTL,
1020  &ijBR );
1021 
1022  // Verify that a submatrix was found. If one was not found,
1023  // then we are done with the current sweep. Furthermore, if
1024  // a submatrix was not found AND we began our search at the
1025  // beginning of the matrix (ie: ij_begin == 0), then the
1026  // matrix has completely deflated and so we are done with
1027  // Francis step iteration.
1028  if ( r_val == FLA_FAILURE )
1029  {
1030  if ( ij_begin == 0 )
1031  {
1032 #ifdef PRINTF
1033  printf( "FLA_Bsvd_v_opz_var1: superdiagonal is completely zero.\n" );
1034  printf( "FLA_Bsvd_v_opz_var1: Francis iteration is done!\n" );
1035 #endif
1036  done = TRUE;
1037  }
1038 
1039  // Break out of the current sweep so we can apply the last
1040  // remaining Givens rotations.
1041  break;
1042  }
1043 
1044  // If we got this far, then:
1045  // (a) ijTL refers to the index of the first non-zero
1046  // superdiagonal along the diagonal, and
1047  // (b) ijBR refers to either:
1048  // - the first zero element that occurs after ijTL, or
1049  // - the the last diagonal element.
1050  // Note that ijTL and ijBR also correspond to the first and
1051  // last diagonal elements of the submatrix of interest. Thus,
1052  // we may compute the dimension of this submatrix as:
1053  m_A11 = ijBR - ijTL + 1;
1054 
1055 #ifdef PRINTF
1056  printf( "FLA_Bsvd_v_opz_var1: ij_begin = %d\n", ij_begin );
1057  printf( "FLA_Bsvd_v_opz_var1: ijTL = %d\n", ijTL );
1058  printf( "FLA_Bsvd_v_opz_var1: ijBR = %d\n", ijBR );
1059  printf( "FLA_Bsvd_v_opz_var1: m_A11 = %d\n", m_A11 );
1060 #endif
1061 
1062  // Adjust ij_begin, which gets us ready for the next submatrix
1063  // search in the current sweep.
1064  ij_begin = ijBR + 1;
1065 
1066  // Index to the submatrices upon which we will operate.
1067  d1 = buff_d + ijTL * inc_d;
1068  e1 = buff_e + ijTL * inc_e;
1069  G = buff_G + ijTL * rs_G;
1070  H = buff_H + ijTL * rs_H;
1071 
1072  // Search for a batch of singular values, recursing on deflated
1073  // subproblems whenever a split occurs. Iteration continues as
1074  // long as
1075  // (a) there is still matrix left to operate on, and
1076  // (b) the number of iterations performed in this batch is
1077  // less than n_G.
1078  // If/when either of the two above conditions fails to hold,
1079  // the function returns.
1080  n_deflations = FLA_Bsvd_iteracc_v_opd_var1( m_A11,
1081  n_GH,
1082  ijTL,
1083  tol,
1084  thresh,
1085  d1, inc_d,
1086  e1, inc_e,
1087  G, rs_G, cs_G,
1088  H, rs_H, cs_H,
1089  &n_iter_perf );
1090 
1091  // Record the number of deflations that were observed.
1092  total_deflations += n_deflations;
1093 
1094  // Update the maximum number of iterations performed in the
1095  // current sweep.
1096  n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );
1097 
1098 #ifdef PRINTF
1099  printf( "FLA_Bsvd_v_opz_var1: deflations observed = %d\n", n_deflations );
1100  printf( "FLA_Bsvd_v_opz_var1: total deflations observed = %d\n", total_deflations );
1101  printf( "FLA_Bsvd_v_opz_var1: num iterations performed = %d\n", n_iter_perf );
1102 #endif
1103 
1104  // Store the most recent value of ijBR in m_G_sweep_max.
1105  // When the sweep is done, this value will contain the minimum
1106  // number of rows of G and H we can apply and safely include all
1107  // non-identity rotations that were computed during the
1108  // singular value searches.
1109  m_GH_sweep_max = ijBR;
1110 
1111  // Make sure we haven't exceeded our maximum iteration count.
1112  if ( n_iter_prev >= n_iter_max * min_m_n )
1113  {
1114 #ifdef PRINTF
1115  printf( "FLA_Bsvd_v_opz_var1: reached maximum total number of iterations: %d\n", n_iter_prev );
1116 #endif
1117  FLA_Abort();
1118  //return FLA_FAILURE;
1119  }
1120  }
1121 
1122  // The sweep is complete. Now we must apply the Givens rotations
1123  // that were accumulated during the sweep.
1124 
1125  // Recall that the number of columns of U and V to which we apply
1126  // rotations is one more than the number of rotations.
1127  n_UV_apply = m_GH_sweep_max + 1;
1128 
1129 #ifdef PRINTF
1130  printf( "FLA_Bsvd_v_opz_var1: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
1131  printf( "FLA_Bsvd_v_opz_var1: m_U = %d\n", m_U );
1132  printf( "FLA_Bsvd_v_opz_var1: m_V = %d\n", m_V );
1133  printf( "FLA_Bsvd_v_opz_var1: napp= %d\n", n_UV_apply );
1134 #endif
1135  // Apply the Givens rotations. Note that we only apply k sets of
1136  // rotations, where k = n_iter_perf_sweep_max. Also note that we only
1137  // apply to n_UV_apply columns of U and V since this is the most we
1138  // need to touch given the most recent value stored to m_GH_sweep_max.
1139  //FLA_Apply_G_rf_blz_var5( n_iter_perf_sweep_max,
1140  FLA_Apply_G_rf_blz_var3( n_iter_perf_sweep_max,
1141  //FLA_Apply_G_rf_blz_var9( n_iter_perf_sweep_max,
1142  //FLA_Apply_G_rf_blz_var6( n_iter_perf_sweep_max,
1143  m_U,
1144  n_UV_apply,
1145  buff_G, rs_G, cs_G,
1146  buff_U, rs_U, cs_U,
1147  b_alg );
1148  //FLA_Apply_G_rf_blz_var5( n_iter_perf_sweep_max,
1149  FLA_Apply_G_rf_blz_var3( n_iter_perf_sweep_max,
1150  //FLA_Apply_G_rf_blz_var9( n_iter_perf_sweep_max,
1151  //FLA_Apply_G_rf_blz_var6( n_iter_perf_sweep_max,
1152  m_V,
1153  n_UV_apply,
1154  buff_H, rs_H, cs_H,
1155  buff_V, rs_V, cs_V,
1156  b_alg );
1157 
1158  // Increment the total number of iterations previously performed.
1159  n_iter_prev += n_iter_perf_sweep_max;
1160 
1161 #ifdef PRINTF
1162  printf( "FLA_Bsvd_v_opz_var1: total number of iterations performed: %d\n", n_iter_prev );
1163 #endif
1164  }
1165 
1166  // Make all the singular values positive.
1167  {
1168  int i;
1169  double minus_one = bl1_dm1();
1170 
1171  for ( i = 0; i < min_m_n; ++i )
1172  {
1173  if ( buff_d[ (i )*inc_d ] < rzero )
1174  {
1175  buff_d[ (i )*inc_d ] = -buff_d[ (i )*inc_d ];
1176 
1177  // Scale the right singular vectors.
1179  m_V,
1180  &minus_one,
1181  buff_V + (i )*cs_V, rs_V );
1182  }
1183  }
1184  }
1185 
1186  return n_iter_prev;
1187 }
FLA_Error FLA_Apply_G_rf_blz_var3(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var3.c:186
void bl1_zdscalv(conj1_t conj, int n, double *alpha, dcomplex *x, int incx)
Definition: bl1_scalv.c:61

References bl1_d0(), bl1_dm1(), bl1_z1(), bl1_zdscalv(), bl1_zsetm(), BLIS1_NO_CONJUGATE, FLA_Abort(), FLA_Apply_G_rf_blz_var3(), FLA_Bsvd_compute_tol_thresh_opd(), FLA_Bsvd_find_submatrix_opd(), FLA_Bsvd_iteracc_v_opd_var1(), FLA_Mach_params_opd(), and i.

Referenced by FLA_Bsvd_v_opt_var1().