libflame  revision_anchor
Functions
FLA_Tevd_v_opt_var2.c File Reference

(r)

Functions

FLA_Error FLA_Tevd_v_opt_var2 (dim_t n_iter_max, FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj R, FLA_Obj W, FLA_Obj U, dim_t b_alg)
 
FLA_Error FLA_Tevd_v_ops_var2 (int m_A, int m_U, int n_G, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, float *buff_R, int rs_R, int cs_R, float *buff_W, int rs_W, int cs_W, float *buff_U, int rs_U, int cs_U, int b_alg)
 
FLA_Error FLA_Tevd_v_opd_var2 (int m_A, int m_U, int n_G, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, double *buff_R, int rs_R, int cs_R, double *buff_W, int rs_W, int cs_W, double *buff_U, int rs_U, int cs_U, int b_alg)
 
FLA_Error FLA_Tevd_v_opc_var2 (int m_A, int m_U, int n_G, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, float *buff_R, int rs_R, int cs_R, scomplex *buff_W, int rs_W, int cs_W, scomplex *buff_U, int rs_U, int cs_U, int b_alg)
 
FLA_Error FLA_Tevd_v_opz_var2 (int m_A, int m_U, int n_G, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, double *buff_R, int rs_R, int cs_R, dcomplex *buff_W, int rs_W, int cs_W, dcomplex *buff_U, int rs_U, int cs_U, int b_alg)
 

Function Documentation

◆ FLA_Tevd_v_opc_var2()

FLA_Error FLA_Tevd_v_opc_var2 ( int  m_A,
int  m_U,
int  n_G,
int  n_iter_max,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_R,
int  rs_R,
int  cs_R,
scomplex buff_W,
int  rs_W,
int  cs_W,
scomplex buff_U,
int  rs_U,
int  cs_U,
int  b_alg 
)
416 {
417  FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
418 
419  return FLA_SUCCESS;
420 }

Referenced by FLA_Tevd_v_opt_var2().

◆ FLA_Tevd_v_opd_var2()

FLA_Error FLA_Tevd_v_opd_var2 ( int  m_A,
int  m_U,
int  n_G,
int  n_iter_max,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_R,
int  rs_R,
int  cs_R,
double *  buff_W,
int  rs_W,
int  cs_W,
double *  buff_U,
int  rs_U,
int  cs_U,
int  b_alg 
)
181 {
182  dcomplex one = bl1_z1();
183  double rone = bl1_d1();
184  double rzero = bl1_d0();
185 
186  dcomplex* G;
187  double* d1;
188  double* e1;
189  int r_val;
190  int done;
191  int m_G_sweep_max;
192  int ij_begin;
193  int ijTL, ijBR;
194  int m_A11;
195  int n_iter_perf;
196  int n_U_apply;
197  int total_deflations;
198  int n_deflations;
199  int n_iter_prev;
200  int n_iter_perf_sweep_max;
201 
202  // Initialize our completion flag.
203  done = FALSE;
204 
205  // Initialize a counter that holds the maximum number of rows of G
206  // that we would need to initialize for the next sweep.
207  m_G_sweep_max = m_A - 1;
208 
209  // Initialize a counter for the total number of iterations performed.
210  n_iter_prev = 0;
211 
212  // Initialize R to identity.
213  bl1_dident( m_A,
214  buff_R, rs_R, cs_R );
215 
216  // Iterate until the matrix has completely deflated.
217  for ( total_deflations = 0; done != TRUE; )
218  {
219 
220  // Initialize G to contain only identity rotations.
221  bl1_zsetm( m_G_sweep_max,
222  n_G,
223  &one,
224  buff_G, rs_G, cs_G );
225 
226  // Keep track of the maximum number of iterations performed in the
227  // current sweep. This is used when applying the sweep's Givens
228  // rotations.
229  n_iter_perf_sweep_max = 0;
230 
231  // Perform a sweep: Move through the matrix and perform a tridiagonal
232  // EVD on each non-zero submatrix that is encountered. During the
233  // first time through, ijTL will be 0 and ijBR will be m_A - 1.
234  for ( ij_begin = 0; ij_begin < m_A; )
235  {
236 
237 #ifdef PRINTF
238 if ( ij_begin == 0 )
239 printf( "FLA_Tevd_v_opd_var2: beginning new sweep (ij_begin = %d)\n", ij_begin );
240 #endif
241 
242  // Search for the first submatrix along the diagonal that is
243  // bounded by zeroes (or endpoints of the matrix). If no
244  // submatrix is found (ie: if the entire subdiagonal is zero
245  // then FLA_FAILURE is returned. This function also inspects
246  // subdiagonal elements for proximity to zero. If a given
247  // element is close enough to zero, then it is deemed
248  // converged and manually set to zero.
249  r_val = FLA_Tevd_find_submatrix_opd( m_A,
250  ij_begin,
251  buff_d, inc_d,
252  buff_e, inc_e,
253  &ijTL,
254  &ijBR );
255 
256  // Verify that a submatrix was found. If one was not found,
257  // then we are done with the current sweep. Furthermore, if
258  // a submatrix was not found AND we began our search at the
259  // beginning of the matrix (ie: ij_begin == 0), then the
260  // matrix has completely deflated and so we are done with
261  // Francis step iteration.
262  if ( r_val == FLA_FAILURE )
263  {
264  if ( ij_begin == 0 )
265  {
266 #ifdef PRINTF
267 printf( "FLA_Tevd_v_opd_var2: subdiagonal is completely zero.\n" );
268 printf( "FLA_Tevd_v_opd_var2: Francis iteration is done!\n" );
269 #endif
270  done = TRUE;
271  }
272 
273  // Break out of the current sweep so we can apply the last
274  // remaining Givens rotations.
275  break;
276  }
277 
278  // If we got this far, then:
279  // (a) ijTL refers to the index of the first non-zero
280  // subdiagonal along the diagonal, and
281  // (b) ijBR refers to either:
282  // - the first zero element that occurs after ijTL, or
283  // - the the last diagonal element.
284  // Note that ijTL and ijBR also correspond to the first and
285  // last diagonal elements of the submatrix of interest. Thus,
286  // we may compute the dimension of this submatrix as:
287  m_A11 = ijBR - ijTL + 1;
288 
289 #ifdef PRINTF
290 printf( "FLA_Tevd_v_opd_var2: ij_begin = %d\n", ij_begin );
291 printf( "FLA_Tevd_v_opd_var2: ijTL = %d\n", ijTL );
292 printf( "FLA_Tevd_v_opd_var2: ijBR = %d\n", ijBR );
293 printf( "FLA_Tevd_v_opd_var2: m_A11 = %d\n", m_A11 );
294 #endif
295 
296  // Adjust ij_begin, which gets us ready for the next subproblem, if
297  // there is one.
298  ij_begin = ijBR + 1;
299 
300  // Index to the submatrices upon which we will operate.
301  d1 = buff_d + ijTL * inc_d;
302  e1 = buff_e + ijTL * inc_e;
303  G = buff_G + ijTL * rs_G;
304 
305  // Search for a batch of eigenvalues, recursing on deflated
306  // subproblems whenever a split occurs. Iteration continues
307  // as long as
308  // (a) there is still matrix left to operate on, and
309  // (b) the number of iterations performed in this batch is
310  // less than n_G.
311  // If/when either of the two above conditions fails to hold,
312  // the function returns.
313  n_deflations = FLA_Tevd_iteracc_v_opd_var1( m_A11,
314  n_G,
315  ijTL,
316  d1, inc_d,
317  e1, inc_e,
318  G, rs_G, cs_G,
319  &n_iter_perf );
320 
321  // Record the number of deflations that we observed.
322  total_deflations += n_deflations;
323 
324  // Update the maximum number of iterations performed in the
325  // current sweep.
326  n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );
327 
328 #ifdef PRINTF
329 printf( "FLA_Tevd_v_opd_var2: deflations observed = %d\n", n_deflations );
330 printf( "FLA_Tevd_v_opd_var2: total deflations observed = %d\n", total_deflations );
331 printf( "FLA_Tevd_v_opd_var2: num iterations = %d\n", n_iter_perf );
332 #endif
333 
334  // Store the most recent value of ijBR in m_G_sweep_max.
335  // When the sweep is done, this value will contain the minimum
336  // number of rows of G we can apply and safely include all
337  // non-identity rotations that were computed during the
338  // eigenvalue searches.
339  m_G_sweep_max = ijBR;
340 
341  // Make sure we haven't exceeded our maximum iteration count.
342  if ( n_iter_prev >= m_A * n_iter_max )
343  {
344 #ifdef PRINTF
345 printf( "FLA_Tevd_v_opd_var2: reached maximum total number of iterations: %d\n", n_iter_prev );
346 #endif
347  FLA_Abort();
348  //return FLA_FAILURE;
349  }
350  }
351 
352  // The sweep is complete. Now we must apply the Givens rotations
353  // that were accumulated during the sweep.
354 
355 
356  // Recall that the number of columns of U to which we apply
357  // rotations is one more than the number of rotations.
358  n_U_apply = m_G_sweep_max + 1;
359 
360  // Apply the Givens rotations that were computed as part of
361  // the previous batch of iterations.
362  //FLA_Apply_G_rf_bld_var8b( n_iter_perf_sweep_max,
363  //FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max,
364  FLA_Apply_G_rf_bld_var3b( n_iter_perf_sweep_max,
365  //FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max,
366  //FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max,
367  m_U,
368  n_U_apply,
369  n_iter_prev,
370  buff_G, rs_G, cs_G,
371  buff_R, rs_R, cs_R,
372  b_alg );
373 
374 #ifdef PRINTF
375 printf( "FLA_Tevd_v_opd_var2: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
376 #endif
377 
378  // Increment the total number of iterations previously performed.
379  n_iter_prev += n_iter_perf_sweep_max;
380  }
381 
382  // Copy the contents of Q to temporary storage.
384  m_A,
385  m_A,
386  buff_U, rs_U, cs_U,
387  buff_W, rs_W, cs_W );
388 
389 
390  // Multiply Q by R, overwriting U.
393  m_A,
394  m_A,
395  m_A,
396  &rone,
397  ( double* )buff_W, rs_W, cs_W,
398  buff_R, rs_R, cs_R,
399  &rzero,
400  ( double* )buff_U, rs_U, cs_U );
401 
402  return n_iter_prev;
403 }
FLA_Error FLA_Apply_G_rf_bld_var3b(int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var3b.c:135
FLA_Error FLA_Tevd_iteracc_v_opd_var1(int m_A, int n_G, int ijTL, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, int *n_iter_perf)
Definition: FLA_Tevd_iteracc_v_opt_var1.c:26
FLA_Error FLA_Tevd_find_submatrix_opd(int m_A, int ij_begin, double *buff_d, int inc_d, double *buff_e, int inc_e, int *ijTL, int *ijBR)
Definition: FLA_Tevd_find_submatrix.c:28
void FLA_Abort(void)
Definition: FLA_Error.c:248
void bl1_dcopymt(trans1_t trans, int m, int n, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
Definition: bl1_copymt.c:148
void bl1_dgemm(trans1_t transa, trans1_t transb, int m, int k, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs, double *beta, double *c, int c_rs, int c_cs)
Definition: bl1_gemm.c:274
dcomplex bl1_z1(void)
Definition: bl1_constants.c:69
double bl1_d0(void)
Definition: bl1_constants.c:118
void bl1_dident(int m, double *a, int a_rs, int a_cs)
Definition: bl1_ident.c:32
double bl1_d1(void)
Definition: bl1_constants.c:54
void bl1_zsetm(int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:78
@ BLIS1_NO_TRANSPOSE
Definition: blis_type_defs.h:54
Definition: blis_type_defs.h:138

References bl1_d0(), bl1_d1(), bl1_dcopymt(), bl1_dgemm(), bl1_dident(), bl1_z1(), bl1_zsetm(), BLIS1_NO_TRANSPOSE, FLA_Abort(), FLA_Apply_G_rf_bld_var3b(), FLA_Tevd_find_submatrix_opd(), and FLA_Tevd_iteracc_v_opd_var1().

Referenced by FLA_Tevd_v_opt_var2().

◆ FLA_Tevd_v_ops_var2()

FLA_Error FLA_Tevd_v_ops_var2 ( int  m_A,
int  m_U,
int  n_G,
int  n_iter_max,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_R,
int  rs_R,
int  cs_R,
float *  buff_W,
int  rs_W,
int  cs_W,
float *  buff_U,
int  rs_U,
int  cs_U,
int  b_alg 
)
162 {
163  FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
164 
165  return FLA_SUCCESS;
166 }

Referenced by FLA_Tevd_v_opt_var2().

◆ FLA_Tevd_v_opt_var2()

FLA_Error FLA_Tevd_v_opt_var2 ( dim_t  n_iter_max,
FLA_Obj  d,
FLA_Obj  e,
FLA_Obj  G,
FLA_Obj  R,
FLA_Obj  W,
FLA_Obj  U,
dim_t  b_alg 
)
14 {
15  FLA_Error r_val = FLA_SUCCESS;
16  FLA_Datatype datatype;
17  int m_A, m_U, n_G;
18  int inc_d;
19  int inc_e;
20  int rs_G, cs_G;
21  int rs_R, cs_R;
22  int rs_U, cs_U;
23  int rs_W, cs_W;
24 
25  datatype = FLA_Obj_datatype( U );
26 
27  m_A = FLA_Obj_vector_dim( d );
28  m_U = FLA_Obj_length( U );
29  n_G = FLA_Obj_width( G );
30 
31  inc_d = FLA_Obj_vector_inc( d );
32  inc_e = FLA_Obj_vector_inc( e );
33 
34  rs_G = FLA_Obj_row_stride( G );
35  cs_G = FLA_Obj_col_stride( G );
36 
37  rs_R = FLA_Obj_row_stride( R );
38  cs_R = FLA_Obj_col_stride( R );
39 
40  rs_W = FLA_Obj_row_stride( W );
41  cs_W = FLA_Obj_col_stride( W );
42 
43  rs_U = FLA_Obj_row_stride( U );
44  cs_U = FLA_Obj_col_stride( U );
45 
46 
47  switch ( datatype )
48  {
49  case FLA_FLOAT:
50  {
51  float* buff_d = FLA_FLOAT_PTR( d );
52  float* buff_e = FLA_FLOAT_PTR( e );
53  scomplex* buff_G = FLA_COMPLEX_PTR( G );
54  float* buff_R = FLA_FLOAT_PTR( R );
55  float* buff_W = FLA_FLOAT_PTR( W );
56  float* buff_U = FLA_FLOAT_PTR( U );
57 
58  r_val = FLA_Tevd_v_ops_var2( m_A,
59  m_U,
60  n_G,
61  n_iter_max,
62  buff_d, inc_d,
63  buff_e, inc_e,
64  buff_G, rs_G, cs_G,
65  buff_R, rs_R, cs_R,
66  buff_W, rs_W, cs_W,
67  buff_U, rs_U, cs_U,
68  b_alg );
69 
70  break;
71  }
72 
73  case FLA_DOUBLE:
74  {
75  double* buff_d = FLA_DOUBLE_PTR( d );
76  double* buff_e = FLA_DOUBLE_PTR( e );
77  dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
78  double* buff_R = FLA_DOUBLE_PTR( R );
79  double* buff_W = FLA_DOUBLE_PTR( W );
80  double* buff_U = FLA_DOUBLE_PTR( U );
81 
82  r_val = FLA_Tevd_v_opd_var2( m_A,
83  m_U,
84  n_G,
85  n_iter_max,
86  buff_d, inc_d,
87  buff_e, inc_e,
88  buff_G, rs_G, cs_G,
89  buff_R, rs_R, cs_R,
90  buff_W, rs_W, cs_W,
91  buff_U, rs_U, cs_U,
92  b_alg );
93 
94  break;
95  }
96 
97  case FLA_COMPLEX:
98  {
99  float* buff_d = FLA_FLOAT_PTR( d );
100  float* buff_e = FLA_FLOAT_PTR( e );
101  scomplex* buff_G = FLA_COMPLEX_PTR( G );
102  float* buff_R = FLA_FLOAT_PTR( R );
103  scomplex* buff_W = FLA_COMPLEX_PTR( W );
104  scomplex* buff_U = FLA_COMPLEX_PTR( U );
105 
106  r_val = FLA_Tevd_v_opc_var2( m_A,
107  m_U,
108  n_G,
109  n_iter_max,
110  buff_d, inc_d,
111  buff_e, inc_e,
112  buff_G, rs_G, cs_G,
113  buff_R, rs_R, cs_R,
114  buff_W, rs_W, cs_W,
115  buff_U, rs_U, cs_U,
116  b_alg );
117 
118  break;
119  }
120 
121  case FLA_DOUBLE_COMPLEX:
122  {
123  double* buff_d = FLA_DOUBLE_PTR( d );
124  double* buff_e = FLA_DOUBLE_PTR( e );
125  dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
126  double* buff_R = FLA_DOUBLE_PTR( R );
127  dcomplex* buff_W = FLA_DOUBLE_COMPLEX_PTR( W );
128  dcomplex* buff_U = FLA_DOUBLE_COMPLEX_PTR( U );
129 
130  r_val = FLA_Tevd_v_opz_var2( m_A,
131  m_U,
132  n_G,
133  n_iter_max,
134  buff_d, inc_d,
135  buff_e, inc_e,
136  buff_G, rs_G, cs_G,
137  buff_R, rs_R, cs_R,
138  buff_W, rs_W, cs_W,
139  buff_U, rs_U, cs_U,
140  b_alg );
141 
142  break;
143  }
144  }
145 
146  return r_val;
147 }
FLA_Error FLA_Tevd_v_opc_var2(int m_A, int m_U, int n_G, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, float *buff_R, int rs_R, int cs_R, scomplex *buff_W, int rs_W, int cs_W, scomplex *buff_U, int rs_U, int cs_U, int b_alg)
Definition: FLA_Tevd_v_opt_var2.c:405
FLA_Error FLA_Tevd_v_ops_var2(int m_A, int m_U, int n_G, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, float *buff_R, int rs_R, int cs_R, float *buff_W, int rs_W, int cs_W, float *buff_U, int rs_U, int cs_U, int b_alg)
Definition: FLA_Tevd_v_opt_var2.c:151
FLA_Error FLA_Tevd_v_opd_var2(int m_A, int m_U, int n_G, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, double *buff_R, int rs_R, int cs_R, double *buff_W, int rs_W, int cs_W, double *buff_U, int rs_U, int cs_U, int b_alg)
Definition: FLA_Tevd_v_opt_var2.c:170
FLA_Error FLA_Tevd_v_opz_var2(int m_A, int m_U, int n_G, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, double *buff_R, int rs_R, int cs_R, dcomplex *buff_W, int rs_W, int cs_W, dcomplex *buff_U, int rs_U, int cs_U, int b_alg)
Definition: FLA_Tevd_v_opt_var2.c:424
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition: FLA_Query.c:145
dim_t FLA_Obj_vector_dim(FLA_Obj obj)
Definition: FLA_Query.c:137
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
int FLA_Error
Definition: FLA_type_defs.h:47
int FLA_Datatype
Definition: FLA_type_defs.h:49
Definition: blis_type_defs.h:133

References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_dim(), FLA_Obj_vector_inc(), FLA_Obj_width(), FLA_Tevd_v_opc_var2(), FLA_Tevd_v_opd_var2(), FLA_Tevd_v_ops_var2(), and FLA_Tevd_v_opz_var2().

Referenced by FLA_Hevd_lv_unb_var2().

◆ FLA_Tevd_v_opz_var2()

FLA_Error FLA_Tevd_v_opz_var2 ( int  m_A,
int  m_U,
int  n_G,
int  n_iter_max,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_R,
int  rs_R,
int  cs_R,
dcomplex buff_W,
int  rs_W,
int  cs_W,
dcomplex buff_U,
int  rs_U,
int  cs_U,
int  b_alg 
)
435 {
436  dcomplex one = bl1_z1();
437  double rone = bl1_d1();
438  double rzero = bl1_d0();
439 
440  dcomplex* G;
441  double* d1;
442  double* e1;
443  int r_val;
444  int done;
445  int m_G_sweep_max;
446  int ij_begin;
447  int ijTL, ijBR;
448  int m_A11;
449  int n_iter_perf;
450  int n_U_apply;
451  int total_deflations;
452  int n_deflations;
453  int n_iter_prev;
454  int n_iter_perf_sweep_max;
455 
456  // Initialize our completion flag.
457  done = FALSE;
458 
459  // Initialize a counter that holds the maximum number of rows of G
460  // that we would need to initialize for the next sweep.
461  m_G_sweep_max = m_A - 1;
462 
463  // Initialize a counter for the total number of iterations performed.
464  n_iter_prev = 0;
465 
466  // Initialize R to identity.
467  bl1_dident( m_A,
468  buff_R, rs_R, cs_R );
469 
470  // Iterate until the matrix has completely deflated.
471  for ( total_deflations = 0; done != TRUE; )
472  {
473 
474  // Initialize G to contain only identity rotations.
475  bl1_zsetm( m_G_sweep_max,
476  n_G,
477  &one,
478  buff_G, rs_G, cs_G );
479 
480  // Keep track of the maximum number of iterations performed in the
481  // current sweep. This is used when applying the sweep's Givens
482  // rotations.
483  n_iter_perf_sweep_max = 0;
484 
485  // Perform a sweep: Move through the matrix and perform a tridiagonal
486  // EVD on each non-zero submatrix that is encountered. During the
487  // first time through, ijTL will be 0 and ijBR will be m_A - 1.
488  for ( ij_begin = 0; ij_begin < m_A; )
489  {
490 
491 #ifdef PRINTF
492 if ( ij_begin == 0 )
493 printf( "FLA_Tevd_v_opz_var2: beginning new sweep (ij_begin = %d)\n", ij_begin );
494 #endif
495 
496  // Search for the first submatrix along the diagonal that is
497  // bounded by zeroes (or endpoints of the matrix). If no
498  // submatrix is found (ie: if the entire subdiagonal is zero
499  // then FLA_FAILURE is returned. This function also inspects
500  // subdiagonal elements for proximity to zero. If a given
501  // element is close enough to zero, then it is deemed
502  // converged and manually set to zero.
503  r_val = FLA_Tevd_find_submatrix_opd( m_A,
504  ij_begin,
505  buff_d, inc_d,
506  buff_e, inc_e,
507  &ijTL,
508  &ijBR );
509 
510  // Verify that a submatrix was found. If one was not found,
511  // then we are done with the current sweep. Furthermore, if
512  // a submatrix was not found AND we began our search at the
513  // beginning of the matrix (ie: ij_begin == 0), then the
514  // matrix has completely deflated and so we are done with
515  // Francis step iteration.
516  if ( r_val == FLA_FAILURE )
517  {
518  if ( ij_begin == 0 )
519  {
520 #ifdef PRINTF
521 printf( "FLA_Tevd_v_opz_var2: subdiagonal is completely zero.\n" );
522 printf( "FLA_Tevd_v_opz_var2: Francis iteration is done!\n" );
523 #endif
524  done = TRUE;
525  }
526 
527  // Break out of the current sweep so we can apply the last
528  // remaining Givens rotations.
529  break;
530  }
531 
532  // If we got this far, then:
533  // (a) ijTL refers to the index of the first non-zero
534  // subdiagonal along the diagonal, and
535  // (b) ijBR refers to either:
536  // - the first zero element that occurs after ijTL, or
537  // - the the last diagonal element.
538  // Note that ijTL and ijBR also correspond to the first and
539  // last diagonal elements of the submatrix of interest. Thus,
540  // we may compute the dimension of this submatrix as:
541  m_A11 = ijBR - ijTL + 1;
542 
543 #ifdef PRINTF
544 printf( "FLA_Tevd_v_opz_var2: ij_begin = %d\n", ij_begin );
545 printf( "FLA_Tevd_v_opz_var2: ijTL = %d\n", ijTL );
546 printf( "FLA_Tevd_v_opz_var2: ijBR = %d\n", ijBR );
547 printf( "FLA_Tevd_v_opz_var2: m_A11 = %d\n", m_A11 );
548 #endif
549 
550  // Adjust ij_begin, which gets us ready for the next subproblem, if
551  // there is one.
552  ij_begin = ijBR + 1;
553 
554  // Index to the submatrices upon which we will operate.
555  d1 = buff_d + ijTL * inc_d;
556  e1 = buff_e + ijTL * inc_e;
557  G = buff_G + ijTL * rs_G;
558 
559  // Search for a batch of eigenvalues, recursing on deflated
560  // subproblems whenever a split occurs. Iteration continues
561  // as long as
562  // (a) there is still matrix left to operate on, and
563  // (b) the number of iterations performed in this batch is
564  // less than n_G.
565  // If/when either of the two above conditions fails to hold,
566  // the function returns.
567  n_deflations = FLA_Tevd_iteracc_v_opd_var1( m_A11,
568  n_G,
569  ijTL,
570  d1, inc_d,
571  e1, inc_e,
572  G, rs_G, cs_G,
573  &n_iter_perf );
574 
575  // Record the number of deflations that we observed.
576  total_deflations += n_deflations;
577 
578  // Update the maximum number of iterations performed in the
579  // current sweep.
580  n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );
581 
582 #ifdef PRINTF
583 printf( "FLA_Tevd_v_opz_var2: deflations observed = %d\n", n_deflations );
584 printf( "FLA_Tevd_v_opz_var2: total deflations observed = %d\n", total_deflations );
585 printf( "FLA_Tevd_v_opz_var2: num iterations = %d\n", n_iter_perf );
586 #endif
587 
588  // Store the most recent value of ijBR in m_G_sweep_max.
589  // When the sweep is done, this value will contain the minimum
590  // number of rows of G we can apply and safely include all
591  // non-identity rotations that were computed during the
592  // eigenvalue searches.
593  m_G_sweep_max = ijBR;
594 
595  // Make sure we haven't exceeded our maximum iteration count.
596  if ( n_iter_prev >= m_A * n_iter_max )
597  {
598 #ifdef PRINTF
599 printf( "FLA_Tevd_v_opz_var2: reached maximum total number of iterations: %d\n", n_iter_prev );
600 #endif
601  FLA_Abort();
602  //return FLA_FAILURE;
603  }
604  }
605 
606  // The sweep is complete. Now we must apply the Givens rotations
607  // that were accumulated during the sweep.
608 
609 
610  // Recall that the number of columns of U to which we apply
611  // rotations is one more than the number of rotations.
612  n_U_apply = m_G_sweep_max + 1;
613 
614  // Apply the Givens rotations that were computed as part of
615  // the previous batch of iterations.
616  //FLA_Apply_G_rf_bld_var8b( n_iter_perf_sweep_max,
617  //FLA_Apply_G_rf_bld_var5b( n_iter_perf_sweep_max,
618  FLA_Apply_G_rf_bld_var3b( n_iter_perf_sweep_max,
619  //FLA_Apply_G_rf_bld_var9b( n_iter_perf_sweep_max,
620  //FLA_Apply_G_rf_bld_var6b( n_iter_perf_sweep_max,
621  m_U,
622  n_U_apply,
623  n_iter_prev,
624  buff_G, rs_G, cs_G,
625  buff_R, rs_R, cs_R,
626  b_alg );
627 
628 #ifdef PRINTF
629 printf( "FLA_Tevd_v_opz_var2: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
630 #endif
631 
632  // Increment the total number of iterations previously performed.
633  n_iter_prev += n_iter_perf_sweep_max;
634  }
635 
636  // Copy the contents of Q to temporary storage.
638  m_A,
639  m_A,
640  buff_U, rs_U, cs_U,
641  buff_W, rs_W, cs_W );
642 
643 
644  // Multiply Q by R, overwriting U.
647  2*m_A,
648  m_A,
649  m_A,
650  &rone,
651  ( double* )buff_W, rs_W, 2*cs_W,
652  buff_R, rs_R, cs_R,
653  &rzero,
654  ( double* )buff_U, rs_U, 2*cs_U );
655 
656  return n_iter_prev;
657 }
void bl1_zcopymt(trans1_t trans, int m, int n, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition: bl1_copymt.c:286

References bl1_d0(), bl1_d1(), bl1_dgemm(), bl1_dident(), bl1_z1(), bl1_zcopymt(), bl1_zsetm(), BLIS1_NO_TRANSPOSE, FLA_Abort(), FLA_Apply_G_rf_bld_var3b(), FLA_Tevd_find_submatrix_opd(), and FLA_Tevd_iteracc_v_opd_var1().

Referenced by FLA_Tevd_v_opt_var2().