libflame  revision_anchor
Functions
FLA_Svd_ext.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLA_Svd_ext_u_unb_var1 (FLA_Svd_type jobu, FLA_Svd_type jobv, dim_t n_iter_max, FLA_Obj A, FLA_Obj s, FLA_Obj V, FLA_Obj U, dim_t k_accum, dim_t b_alg)
 

Function Documentation

◆ FLA_Svd_ext_u_unb_var1()

FLA_Error FLA_Svd_ext_u_unb_var1 ( FLA_Svd_type  jobu,
FLA_Svd_type  jobv,
dim_t  n_iter_max,
FLA_Obj  A,
FLA_Obj  s,
FLA_Obj  V,
FLA_Obj  U,
dim_t  k_accum,
dim_t  b_alg 
)
19 {
20  FLA_Error r_val = FLA_SUCCESS;
21  FLA_Datatype dt;
22  FLA_Datatype dt_real;
23  FLA_Datatype dt_comp;
24  FLA_Obj scale, T, S, rL, rR, d, e, G, H, C; // C is dummy.
25  dim_t m_A, n_A, min_m_n;
26  dim_t n_GH;
27  double crossover_ratio = 17.0 / 9.0;
28  FLA_Bool u_is_formed = FALSE,
29  v_is_formed = FALSE;
30  int apply_scale;
31 
32  n_GH = k_accum;
33 
34  m_A = FLA_Obj_length( A );
35  n_A = FLA_Obj_width( A );
36  min_m_n = min( m_A, n_A );
37  dt = FLA_Obj_datatype( A );
38  dt_real = FLA_Obj_datatype_proj_to_real( A );
39  dt_comp = FLA_Obj_datatype_proj_to_complex( A );
40 
41  // Create matrices to hold block Householder transformations.
42  FLA_Bidiag_UT_create_T( A, &T, &S );
43 
44  // Create vectors to hold the realifying scalars.
45  if ( FLA_Obj_is_complex( A ) )
46  {
47  FLA_Obj_create( dt, min_m_n, 1, 0, 0, &rL );
48  FLA_Obj_create( dt, min_m_n, 1, 0, 0, &rR );
49  }
50 
51  // Create vectors to hold the diagonal and sub-diagonal.
52  FLA_Obj_create( dt_real, min_m_n, 1, 0, 0, &d );
53  FLA_Obj_create( dt_real, min_m_n-1, 1, 0, 0, &e );
54 
55  // Create matrices to hold the left and right Givens scalars.
56  FLA_Obj_create( dt_comp, min_m_n-1, n_GH, 0, 0, &G );
57  FLA_Obj_create( dt_comp, min_m_n-1, n_GH, 0, 0, &H );
58 
59  // Create a real scaling factor.
60  FLA_Obj_create( dt_real, 1, 1, 0, 0, &scale );
61 
62  // Scale matrix A if necessary.
63  FLA_Max_abs_value( A, scale );
64  apply_scale =
65  ( FLA_Obj_gt( scale, FLA_OVERFLOW_SQUARE_THRES ) == TRUE ) -
66  ( FLA_Obj_lt( scale, FLA_UNDERFLOW_SQUARE_THRES ) == TRUE );
67 
68  if ( apply_scale )
69  FLA_Scal( apply_scale > 0 ? FLA_SAFE_MIN : FLA_SAFE_INV_MIN, A );
70 
71  if ( m_A < crossover_ratio * n_A )
72  {
73  // Reduce the matrix to bidiagonal form.
74  // Apply scalars to rotate elements on the superdiagonal to the real domain.
75  // Extract the diagonal and superdiagonal from A.
76  FLA_Bidiag_UT( A, T, S );
77  if ( FLA_Obj_is_complex( A ) )
78  FLA_Bidiag_UT_realify( A, rL, rR );
80 
81  // Form U and V.
82  if ( u_is_formed == FALSE )
83  {
84  switch ( jobu )
85  {
86  case FLA_SVD_VECTORS_MIN_OVERWRITE:
87  if ( jobv != FLA_SVD_VECTORS_NONE )
88  FLA_Bidiag_UT_form_V_ext( FLA_UPPER_TRIANGULAR, A, S, FLA_NO_TRANSPOSE, V );
89  v_is_formed = TRUE; // For this case, V should be formed here.
90  U = A;
91  case FLA_SVD_VECTORS_ALL:
92  case FLA_SVD_VECTORS_MIN_COPY:
93  FLA_Bidiag_UT_form_U_ext( FLA_UPPER_TRIANGULAR, A, T, FLA_NO_TRANSPOSE, U );
94  u_is_formed = TRUE;
95  break;
96  case FLA_SVD_VECTORS_NONE:
97  // Do nothing
98  break;
99  }
100  }
101  if ( v_is_formed == FALSE )
102  {
103  if ( jobv == FLA_SVD_VECTORS_MIN_OVERWRITE )
104  {
105  FLA_Bidiag_UT_form_V_ext( FLA_UPPER_TRIANGULAR, A, S, FLA_CONJ_TRANSPOSE, A );
106  v_is_formed = TRUE; /* and */
107  V = A; // This V is actually V^H.
108 
109  // V^H -> V
110  FLA_Obj_flip_base( &V );
111  FLA_Obj_flip_view( &V );
112  if ( FLA_Obj_is_complex( A ) )
113  FLA_Conjugate( V );
114  }
115  else if ( jobv != FLA_SVD_VECTORS_NONE )
116  {
117  FLA_Bidiag_UT_form_V_ext( FLA_UPPER_TRIANGULAR, A, S, FLA_NO_TRANSPOSE, V );
118  v_is_formed = TRUE;
119  }
120  }
121 
122  // For complex matrices, apply realification transformation.
123  if ( FLA_Obj_is_complex( A ) && jobu != FLA_SVD_VECTORS_NONE )
124  {
125  FLA_Obj UL, UR;
126  FLA_Part_1x2( U, &UL, &UR, min_m_n, FLA_LEFT );
127  FLA_Apply_diag_matrix( FLA_RIGHT, FLA_CONJUGATE, rL, UL );
128  }
129  if ( FLA_Obj_is_complex( A ) && jobv != FLA_SVD_VECTORS_NONE )
130  {
131  FLA_Obj VL, VR;
132  FLA_Part_1x2( V, &VL, &VR, min_m_n, FLA_LEFT );
133  FLA_Apply_diag_matrix( FLA_RIGHT, FLA_NO_CONJUGATE, rR, VL );
134  }
135 
136  // Perform a singular value decomposition on the upper bidiagonal matrix.
137  r_val = FLA_Bsvd_ext_opt_var1( n_iter_max,
138  d, e, G, H,
139  jobu, U, jobv, V,
140  FALSE, C, // C is not referenced
141  b_alg );
142  }
143  else // if ( crossover_ratio * n_A <= m_A )
144  {
145  FLA_Obj TQ, R;
146  FLA_Obj AT,
147  AB;
148 
149  // Perform a QR factorization on A.
150  FLA_QR_UT_create_T( A, &TQ );
151  FLA_QR_UT( A, TQ );
152 
153  // Set the lower triangle of R to zero and then copy the upper
154  // triangle of A to R.
155  FLA_Part_2x1( A, &AT,
156  &AB, n_A, FLA_TOP );
157  FLA_Obj_create( dt, n_A, n_A, 0, 0, &R );
158  FLA_Setr( FLA_LOWER_TRIANGULAR, FLA_ZERO, R );
159  FLA_Copyr( FLA_UPPER_TRIANGULAR, AT, R );
160 
161  // Form U; if necessary overwrite on A.
162  if ( u_is_formed == FALSE )
163  {
164  switch ( jobu )
165  {
166  case FLA_SVD_VECTORS_MIN_OVERWRITE:
167  U = A;
168  case FLA_SVD_VECTORS_ALL:
169  case FLA_SVD_VECTORS_MIN_COPY:
170  FLA_QR_UT_form_Q( A, TQ, U );
171  u_is_formed = TRUE;
172  break;
173  case FLA_SVD_VECTORS_NONE:
174  // Do nothing
175  break;
176  }
177  }
178  FLA_Obj_free( &TQ );
179 
180  // Reduce the matrix to bidiagonal form.
181  // Apply scalars to rotate elements on the superdiagonal to the real domain.
182  // Extract the diagonal and superdiagonal from A.
183  FLA_Bidiag_UT( R, T, S );
184  if ( FLA_Obj_is_complex( R ) )
185  FLA_Bidiag_UT_realify( R, rL, rR );
187 
188  if ( v_is_formed == FALSE )
189  {
190  if ( jobv == FLA_SVD_VECTORS_MIN_OVERWRITE )
191  {
192  FLA_Bidiag_UT_form_V_ext( FLA_UPPER_TRIANGULAR, R, S, FLA_CONJ_TRANSPOSE, AT );
193  v_is_formed = TRUE; /* and */
194  V = AT; // This V is actually V^H.
195 
196  // V^H -> V
197  FLA_Obj_flip_base( &V );
198  FLA_Obj_flip_view( &V );
199  if ( FLA_Obj_is_complex( A ) )
200  FLA_Conjugate( V );
201  }
202  else if ( jobv != FLA_SVD_VECTORS_NONE )
203  {
204  FLA_Bidiag_UT_form_V_ext( FLA_UPPER_TRIANGULAR, R, S, FLA_NO_TRANSPOSE, V );
205  v_is_formed = TRUE;
206  }
207  }
208 
209  // Apply householder vectors U in R.
210  FLA_Bidiag_UT_form_U_ext( FLA_UPPER_TRIANGULAR, R, T, FLA_NO_TRANSPOSE, R );
211 
212  // Apply the realifying scalars in rL and rR to U and V, respectively.
213  if ( FLA_Obj_is_complex( A ) && jobu != FLA_SVD_VECTORS_NONE )
214  {
215  FLA_Obj RL, RR;
216  FLA_Part_1x2( R, &RL, &RR, min_m_n, FLA_LEFT );
217  FLA_Apply_diag_matrix( FLA_RIGHT, FLA_CONJUGATE, rL, RL );
218  }
219  if ( FLA_Obj_is_complex( A ) && jobv != FLA_SVD_VECTORS_NONE )
220  {
221  FLA_Obj VL, VR;
222  FLA_Part_1x2( V, &VL, &VR, min_m_n, FLA_LEFT );
223  FLA_Apply_diag_matrix( FLA_RIGHT, FLA_NO_CONJUGATE, rR, VL );
224  }
225 
226  // Perform a singular value decomposition on the bidiagonal matrix.
227  r_val = FLA_Bsvd_ext_opt_var1( n_iter_max,
228  d, e, G, H,
229  jobu, R, jobv, V,
230  FALSE, C,
231  b_alg );
232 
233  // Multiply R into U, storing the result in A and then copying back
234  // to U.
235  if ( jobu != FLA_SVD_VECTORS_NONE )
236  {
237  FLA_Obj UL, UR;
238  FLA_Part_1x2( U, &UL, &UR, min_m_n, FLA_LEFT );
239 
240  if ( jobu == FLA_SVD_VECTORS_MIN_OVERWRITE ||
241  jobv == FLA_SVD_VECTORS_MIN_OVERWRITE )
242  {
243  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, UL, &C );
244  FLA_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
245  FLA_ONE, UL, R, FLA_ZERO, C );
246  FLA_Copy( C, UL );
247  FLA_Obj_free( &C );
248  }
249  else
250  {
251  FLA_Gemm( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
252  FLA_ONE, UL, R, FLA_ZERO, A );
253  FLA_Copy( A, UL );
254  }
255  }
256  FLA_Obj_free( &R );
257  }
258 
259  // Copy the converged eigenvalues to the output vector.
260  FLA_Copy( d, s );
261 
262  // No sort is required as it is applied on FLA_Bsvd.
263 
264  if ( apply_scale )
265  FLA_Scal( apply_scale < 0 ? FLA_SAFE_MIN : FLA_SAFE_INV_MIN, s );
266 
267  // When V is overwritten, flip it again.
268  if ( jobv == FLA_SVD_VECTORS_MIN_OVERWRITE )
269  {
270  // Always apply conjugation first wrt dimensions used; then, flip base.
271  if ( FLA_Obj_is_complex( V ) )
272  FLA_Conjugate( V );
273  FLA_Obj_flip_base( &V );
274  }
275 
276  FLA_Obj_free( &scale );
277  FLA_Obj_free( &T );
278  FLA_Obj_free( &S );
279 
280  if ( FLA_Obj_is_complex( A ) )
281  {
282  FLA_Obj_free( &rL );
283  FLA_Obj_free( &rR );
284  }
285 
286  FLA_Obj_free( &d );
287  FLA_Obj_free( &e );
288  FLA_Obj_free( &G );
289  FLA_Obj_free( &H );
290 
291  return r_val;
292 }
FLA_Error FLA_Bidiag_UT(FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
Definition: FLA_Bidiag_UT.c:17
FLA_Error FLA_Bidiag_UT_form_V_ext(FLA_Uplo uplo, FLA_Obj A, FLA_Obj S, FLA_Trans transv, FLA_Obj V)
Definition: FLA_Bidiag_UT_form_V_ext.c:13
FLA_Error FLA_Bidiag_UT_extract_real_diagonals(FLA_Obj A, FLA_Obj d, FLA_Obj e)
Definition: FLA_Bidiag_UT_extract_real_diagonals.c:13
FLA_Error FLA_Bidiag_UT_form_U_ext(FLA_Uplo uplo, FLA_Obj A, FLA_Obj T, FLA_Trans transu, FLA_Obj U)
Definition: FLA_Bidiag_UT_form_U_ext.c:13
FLA_Error FLA_Bidiag_UT_create_T(FLA_Obj A, FLA_Obj *TU, FLA_Obj *TV)
Definition: FLA_Bidiag_UT_create_T.c:13
FLA_Error FLA_Bidiag_UT_realify(FLA_Obj A, FLA_Obj d, FLA_Obj e)
Definition: FLA_Bidiag_UT_realify.c:13
FLA_Error FLA_Bsvd_ext_opt_var1(dim_t n_iter_max, FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj H, FLA_Svd_type jobu, FLA_Obj U, FLA_Svd_type jobv, FLA_Obj V, FLA_Bool apply_Uh2C, FLA_Obj C, dim_t b_alg)
Definition: FLA_Bsvd_ext_opt_var1.c:13
FLA_Error FLA_QR_UT_form_Q(FLA_Obj A, FLA_Obj T, FLA_Obj Q)
Definition: FLA_QR_UT_form_Q.c:13
FLA_Error FLA_QR_UT_create_T(FLA_Obj A, FLA_Obj *T)
Definition: FLA_QR_UT_create_T.c:13
FLA_Error FLA_Copy(FLA_Obj A, FLA_Obj B)
Definition: FLA_Copy.c:15
FLA_Error FLA_Scal(FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Scal.c:15
FLA_Error FLA_Copyr(FLA_Uplo uplo, FLA_Obj A, FLA_Obj B)
Definition: FLA_Copyr.c:15
FLA_Error FLA_Gemm(FLA_Trans transa, FLA_Trans transb, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C)
Definition: FLA_Gemm.c:15
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
FLA_Obj FLA_SAFE_INV_MIN
Definition: FLA_Init.c:29
FLA_Obj FLA_UNDERFLOW_SQUARE_THRES
Definition: FLA_Init.c:33
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
FLA_Obj FLA_SAFE_MIN
Definition: FLA_Init.c:27
FLA_Obj FLA_OVERFLOW_SQUARE_THRES
Definition: FLA_Init.c:34
FLA_Error FLA_QR_UT(FLA_Obj A, FLA_Obj T)
Definition: FLA_QR_UT.c:15
FLA_Bool FLA_Obj_gt(FLA_Obj A, FLA_Obj B)
Definition: FLA_Query.c:658
FLA_Datatype FLA_Obj_datatype_proj_to_complex(FLA_Obj A)
Definition: FLA_Query.c:37
FLA_Error FLA_Obj_flip_view(FLA_Obj *obj)
Definition: FLA_Obj.c:669
FLA_Error FLA_Obj_flip_base(FLA_Obj *obj)
Definition: FLA_Obj.c:647
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Obj_create(FLA_Datatype datatype, dim_t m, dim_t n, dim_t rs, dim_t cs, FLA_Obj *obj)
Definition: FLA_Obj.c:55
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:110
FLA_Error FLA_Part_2x1(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:76
FLA_Bool FLA_Obj_lt(FLA_Obj A, FLA_Obj B)
Definition: FLA_Query.c:813
FLA_Datatype FLA_Obj_datatype_proj_to_real(FLA_Obj A)
Definition: FLA_Query.c:23
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
FLA_Bool FLA_Obj_is_complex(FLA_Obj A)
Definition: FLA_Query.c:324
FLA_Error FLA_Obj_create_conf_to(FLA_Trans trans, FLA_Obj old, FLA_Obj *obj)
Definition: FLA_Obj.c:286
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition: FLA_Obj.c:588
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
int FLA_Error
Definition: FLA_type_defs.h:47
int FLA_Datatype
Definition: FLA_type_defs.h:49
unsigned long dim_t
Definition: FLA_type_defs.h:71
int FLA_Bool
Definition: FLA_type_defs.h:46
FLA_Error FLA_Setr(FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Setr.c:13
FLA_Error FLA_Conjugate(FLA_Obj A)
Definition: FLA_Conjugate.c:13
FLA_Error FLA_Max_abs_value(FLA_Obj A, FLA_Obj amax)
Definition: FLA_Max_abs_value.c:13
FLA_Error FLA_Apply_diag_matrix(FLA_Side side, FLA_Conj conj, FLA_Obj x, FLA_Obj A)
Definition: FLA_Apply_diag_matrix.c:13
Definition: FLA_type_defs.h:159

References FLA_Apply_diag_matrix(), FLA_Bidiag_UT(), FLA_Bidiag_UT_create_T(), FLA_Bidiag_UT_extract_real_diagonals(), FLA_Bidiag_UT_form_U_ext(), FLA_Bidiag_UT_form_V_ext(), FLA_Bidiag_UT_realify(), FLA_Bsvd_ext_opt_var1(), FLA_Conjugate(), FLA_Copy(), FLA_Copyr(), FLA_Gemm(), FLA_Max_abs_value(), FLA_Obj_create(), FLA_Obj_create_conf_to(), FLA_Obj_datatype(), FLA_Obj_datatype_proj_to_complex(), FLA_Obj_datatype_proj_to_real(), FLA_Obj_flip_base(), FLA_Obj_flip_view(), FLA_Obj_free(), FLA_Obj_gt(), FLA_Obj_is_complex(), FLA_Obj_length(), FLA_Obj_lt(), FLA_Obj_width(), FLA_ONE, FLA_OVERFLOW_SQUARE_THRES, FLA_Part_1x2(), FLA_Part_2x1(), FLA_QR_UT(), FLA_QR_UT_create_T(), FLA_QR_UT_form_Q(), FLA_SAFE_INV_MIN, FLA_SAFE_MIN, FLA_Scal(), FLA_Setr(), FLA_UNDERFLOW_SQUARE_THRES, and FLA_ZERO.

Referenced by FLA_Svd(), and FLA_Svd_ext().