libflame  revision_anchor
Functions
FLA_Tevd_v_opt_var1.c File Reference

(r)

Functions

FLA_Error FLA_Tevd_v_opt_var1 (dim_t n_iter_max, FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj U, dim_t b_alg)
 
FLA_Error FLA_Tevd_v_ops_var1 (int m_A, int m_U, int n_G, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, float *buff_U, int rs_U, int cs_U, int b_alg)
 
FLA_Error FLA_Tevd_v_opd_var1 (int m_A, int m_U, int n_G, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, double *buff_U, int rs_U, int cs_U, int b_alg)
 
FLA_Error FLA_Tevd_v_opc_var1 (int m_A, int m_U, int n_G, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_U, int rs_U, int cs_U, int b_alg)
 
FLA_Error FLA_Tevd_v_opz_var1 (int m_A, int m_U, int n_G, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_U, int rs_U, int cs_U, int b_alg)
 

Function Documentation

◆ FLA_Tevd_v_opc_var1()

FLA_Error FLA_Tevd_v_opc_var1 ( int  m_A,
int  m_U,
int  n_G,
int  n_iter_max,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_U,
int  rs_U,
int  cs_U,
int  b_alg 
)
374 {
375  FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
376 
377  return FLA_SUCCESS;
378 }

Referenced by FLA_Tevd_v_opt_var1().

◆ FLA_Tevd_v_opd_var1()

FLA_Error FLA_Tevd_v_opd_var1 ( int  m_A,
int  m_U,
int  n_G,
int  n_iter_max,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_U,
int  rs_U,
int  cs_U,
int  b_alg 
)
153 {
154  dcomplex one = bl1_z1();
155 
156  dcomplex* G;
157  double* d1;
158  double* e1;
159  int r_val;
160  int done;
161  int m_G_sweep_max;
162  int ij_begin;
163  int ijTL, ijBR;
164  int m_A11;
165  int n_iter_perf;
166  int n_U_apply;
167  int total_deflations;
168  int n_deflations;
169  int n_iter_prev;
170  int n_iter_perf_sweep_max;
171 
172  // Initialize our completion flag.
173  done = FALSE;
174 
175  // Initialize a counter that holds the maximum number of rows of G
176  // that we would need to initialize for the next sweep.
177  m_G_sweep_max = m_A - 1;
178 
179  // Initialize a counter for the total number of iterations performed.
180  n_iter_prev = 0;
181 
182  // Iterate until the matrix has completely deflated.
183  for ( total_deflations = 0; done != TRUE; )
184  {
185  // Initialize G to contain only identity rotations.
186  bl1_zsetm( m_G_sweep_max,
187  n_G,
188  &one,
189  buff_G, rs_G, cs_G );
190 
191  // Keep track of the maximum number of iterations performed in the
192  // current sweep. This is used when applying the sweep's Givens
193  // rotations.
194  n_iter_perf_sweep_max = 0;
195 
196  // Perform a sweep: Move through the matrix and perform a tridiagonal
197  // EVD on each non-zero submatrix that is encountered. During the
198  // first time through, ijTL will be 0 and ijBR will be m_A - 1.
199  for ( ij_begin = 0; ij_begin < m_A; )
200  {
201 
202 #ifdef PRINTF
203 if ( ij_begin == 0 )
204 printf( "FLA_Tevd_v_opd_var1: beginning new sweep (ij_begin = %d)\n", ij_begin );
205 #endif
206 
207  // Search for the first submatrix along the diagonal that is
208  // bounded by zeroes (or endpoints of the matrix). If no
209  // submatrix is found (ie: if the entire subdiagonal is zero
210  // then FLA_FAILURE is returned. This function also inspects
211  // subdiagonal elements for proximity to zero. If a given
212  // element is close enough to zero, then it is deemed
213  // converged and manually set to zero.
214  r_val = FLA_Tevd_find_submatrix_opd( m_A,
215  ij_begin,
216  buff_d, inc_d,
217  buff_e, inc_e,
218  &ijTL,
219  &ijBR );
220 
221  // Verify that a submatrix was found. If one was not found,
222  // then we are done with the current sweep. Furthermore, if
223  // a submatrix was not found AND we began our search at the
224  // beginning of the matrix (ie: ij_begin == 0), then the
225  // matrix has completely deflated and so we are done with
226  // Francis step iteration.
227  if ( r_val == FLA_FAILURE )
228  {
229  if ( ij_begin == 0 )
230  {
231 #ifdef PRINTF
232 printf( "FLA_Tevd_v_opd_var1: subdiagonal is completely zero.\n" );
233 printf( "FLA_Tevd_v_opd_var1: Francis iteration is done!\n" );
234 #endif
235  done = TRUE;
236  }
237 
238  // Break out of the current sweep so we can apply the last
239  // remaining Givens rotations.
240  break;
241  }
242 
243  // If we got this far, then:
244  // (a) ijTL refers to the index of the first non-zero
245  // subdiagonal along the diagonal, and
246  // (b) ijBR refers to either:
247  // - the first zero element that occurs after ijTL, or
248  // - the the last diagonal element.
249  // Note that ijTL and ijBR also correspond to the first and
250  // last diagonal elements of the submatrix of interest. Thus,
251  // we may compute the dimension of this submatrix as:
252  m_A11 = ijBR - ijTL + 1;
253 
254 #ifdef PRINTF
255 printf( "FLA_Tevd_v_opd_var1: ij_begin = %d\n", ij_begin );
256 printf( "FLA_Tevd_v_opd_var1: ijTL = %d\n", ijTL );
257 printf( "FLA_Tevd_v_opd_var1: ijBR = %d\n", ijBR );
258 printf( "FLA_Tevd_v_opd_var1: m_A11 = %d\n", m_A11 );
259 #endif
260 
261  // Adjust ij_begin, which gets us ready for the next submatrix
262  // search in the current sweep.
263  ij_begin = ijBR + 1;
264 
265  // Index to the submatrices upon which we will operate.
266  d1 = buff_d + ijTL * inc_d;
267  e1 = buff_e + ijTL * inc_e;
268  G = buff_G + ijTL * rs_G;
269 
270  // Search for a batch of eigenvalues, recursing on deflated
271  // subproblems whenever a split occurs. Iteration continues
272  // as long as:
273  // (a) there is still matrix left to operate on, and
274  // (b) the number of iterations performed in this batch is
275  // less than n_G.
276  // If/when either of the two above conditions fails to hold,
277  // the function returns.
278  n_deflations = FLA_Tevd_iteracc_v_opd_var1( m_A11,
279  n_G,
280  ijTL,
281  d1, inc_d,
282  e1, inc_e,
283  G, rs_G, cs_G,
284  &n_iter_perf );
285 
286  // Record the number of deflations that were observed.
287  total_deflations += n_deflations;
288 
289  // Update the maximum number of iterations performed in the
290  // current sweep.
291  n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );
292 
293 #ifdef PRINTF
294 printf( "FLA_Tevd_v_opd_var1: deflations observed = %d\n", n_deflations );
295 printf( "FLA_Tevd_v_opd_var1: total deflations observed = %d\n", total_deflations );
296 printf( "FLA_Tevd_v_opd_var1: num iterations performed = %d\n", n_iter_perf );
297 #endif
298 
299  // Store the most recent value of ijBR in m_G_sweep_max.
300  // When the sweep is done, this value will contain the minimum
301  // number of rows of G we can apply and safely include all
302  // non-identity rotations that were computed during the
303  // eigenvalue searches.
304  m_G_sweep_max = ijBR;
305 
306  // Make sure we haven't exceeded our maximum iteration count.
307  if ( n_iter_prev >= m_A * n_iter_max )
308  {
309 #ifdef PRINTF
310 printf( "FLA_Tevd_v_opd_var1: reached maximum total number of iterations: %d\n", n_iter_prev );
311 #endif
312  FLA_Abort();
313  //return FLA_FAILURE;
314  }
315  }
316 
317  // The sweep is complete. Now we must apply the Givens rotations
318  // that were accumulated during the sweep.
319 
320  // Recall that the number of columns of U to which we apply
321  // rotations is one more than the number of rotations.
322  n_U_apply = m_G_sweep_max + 1;
323 
324 #ifdef PRINTF
325 printf( "FLA_Tevd_v_opd_var1: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
326 #endif
327 
328  // Apply the Givens rotations. Note that we optimize the scope
329  // of the operation in two ways:
330  // 1. We only apply k sets of Givens rotations, where
331  // k = n_iter_perf_sweep_max. We could simply always apply
332  // n_G sets of rotations since G is initialized to contain
333  // identity rotations in every element, but we do this to
334  // save a little bit of time.
335  // 2. We only apply to the first n_U_apply columns of A since
336  // this is the most we need to touch given the ijBR index
337  // bound of the last submatrix found in the previous sweep.
338  // Similar to above, we could simply always perform the
339  // application on all m_A columns of A, but instead we apply
340  // only to the first n_U_apply columns to save time.
341  //FLA_Apply_G_rf_bld_var1( n_iter_perf_sweep_max,
342  //FLA_Apply_G_rf_bld_var2( n_iter_perf_sweep_max,
343  FLA_Apply_G_rf_bld_var3( n_iter_perf_sweep_max,
344  //FLA_Apply_G_rf_bld_var9( n_iter_perf_sweep_max,
345  //FLA_Apply_G_rf_bld_var6( n_iter_perf_sweep_max,
346  m_U,
347  n_U_apply,
348  buff_G, rs_G, cs_G,
349  buff_U, rs_U, cs_U,
350  b_alg );
351 
352 
353 
354  // Increment the total number of iterations previously performed.
355  n_iter_prev += n_iter_perf_sweep_max;
356 
357 #ifdef PRINTF
358 printf( "FLA_Tevd_v_opd_var1: total number of iterations performed: %d\n", n_iter_prev );
359 #endif
360  }
361 
362  return n_iter_prev;
363 }
FLA_Error FLA_Apply_G_rf_bld_var3(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var3.c:128
FLA_Error FLA_Tevd_iteracc_v_opd_var1(int m_A, int n_G, int ijTL, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, int *n_iter_perf)
Definition: FLA_Tevd_iteracc_v_opt_var1.c:26
FLA_Error FLA_Tevd_find_submatrix_opd(int m_A, int ij_begin, double *buff_d, int inc_d, double *buff_e, int inc_e, int *ijTL, int *ijBR)
Definition: FLA_Tevd_find_submatrix.c:28
void FLA_Abort(void)
Definition: FLA_Error.c:248
dcomplex bl1_z1(void)
Definition: bl1_constants.c:69
void bl1_zsetm(int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:78
Definition: blis_type_defs.h:138

References bl1_z1(), bl1_zsetm(), FLA_Abort(), FLA_Apply_G_rf_bld_var3(), FLA_Tevd_find_submatrix_opd(), and FLA_Tevd_iteracc_v_opd_var1().

Referenced by FLA_Tevd_v_opt_var1().

◆ FLA_Tevd_v_ops_var1()

FLA_Error FLA_Tevd_v_ops_var1 ( int  m_A,
int  m_U,
int  n_G,
int  n_iter_max,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_U,
int  rs_U,
int  cs_U,
int  b_alg 
)
136 {
137  FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
138 
139  return FLA_SUCCESS;
140 }

Referenced by FLA_Tevd_v_opt_var1().

◆ FLA_Tevd_v_opt_var1()

FLA_Error FLA_Tevd_v_opt_var1 ( dim_t  n_iter_max,
FLA_Obj  d,
FLA_Obj  e,
FLA_Obj  G,
FLA_Obj  U,
dim_t  b_alg 
)
14 {
15  FLA_Error r_val = FLA_SUCCESS;
16  FLA_Datatype datatype;
17  int m_A, m_U, n_G;
18  int inc_d;
19  int inc_e;
20  int rs_G, cs_G;
21  int rs_U, cs_U;
22 
23  datatype = FLA_Obj_datatype( U );
24 
25  m_A = FLA_Obj_vector_dim( d );
26  m_U = FLA_Obj_length( U );
27  n_G = FLA_Obj_width( G );
28 
29  inc_d = FLA_Obj_vector_inc( d );
30  inc_e = FLA_Obj_vector_inc( e );
31 
32  rs_G = FLA_Obj_row_stride( G );
33  cs_G = FLA_Obj_col_stride( G );
34 
35  rs_U = FLA_Obj_row_stride( U );
36  cs_U = FLA_Obj_col_stride( U );
37 
38 
39  switch ( datatype )
40  {
41  case FLA_FLOAT:
42  {
43  float* buff_d = FLA_FLOAT_PTR( d );
44  float* buff_e = FLA_FLOAT_PTR( e );
45  scomplex* buff_G = FLA_COMPLEX_PTR( G );
46  float* buff_U = FLA_FLOAT_PTR( U );
47 
48  r_val = FLA_Tevd_v_ops_var1( m_A,
49  m_U,
50  n_G,
51  n_iter_max,
52  buff_d, inc_d,
53  buff_e, inc_e,
54  buff_G, rs_G, cs_G,
55  buff_U, rs_U, cs_U,
56  b_alg );
57 
58  break;
59  }
60 
61  case FLA_DOUBLE:
62  {
63  double* buff_d = FLA_DOUBLE_PTR( d );
64  double* buff_e = FLA_DOUBLE_PTR( e );
65  dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
66  double* buff_U = FLA_DOUBLE_PTR( U );
67 
68  r_val = FLA_Tevd_v_opd_var1( m_A,
69  m_U,
70  n_G,
71  n_iter_max,
72  buff_d, inc_d,
73  buff_e, inc_e,
74  buff_G, rs_G, cs_G,
75  buff_U, rs_U, cs_U,
76  b_alg );
77 
78  break;
79  }
80 
81  case FLA_COMPLEX:
82  {
83  float* buff_d = FLA_FLOAT_PTR( d );
84  float* buff_e = FLA_FLOAT_PTR( e );
85  scomplex* buff_G = FLA_COMPLEX_PTR( G );
86  scomplex* buff_U = FLA_COMPLEX_PTR( U );
87 
88  r_val = FLA_Tevd_v_opc_var1( m_A,
89  m_U,
90  n_G,
91  n_iter_max,
92  buff_d, inc_d,
93  buff_e, inc_e,
94  buff_G, rs_G, cs_G,
95  buff_U, rs_U, cs_U,
96  b_alg );
97 
98  break;
99  }
100 
101  case FLA_DOUBLE_COMPLEX:
102  {
103  double* buff_d = FLA_DOUBLE_PTR( d );
104  double* buff_e = FLA_DOUBLE_PTR( e );
105  dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
106  dcomplex* buff_U = FLA_DOUBLE_COMPLEX_PTR( U );
107 
108  r_val = FLA_Tevd_v_opz_var1( m_A,
109  m_U,
110  n_G,
111  n_iter_max,
112  buff_d, inc_d,
113  buff_e, inc_e,
114  buff_G, rs_G, cs_G,
115  buff_U, rs_U, cs_U,
116  b_alg );
117 
118  break;
119  }
120  }
121 
122  return r_val;
123 }
FLA_Error FLA_Tevd_v_opz_var1(int m_A, int m_U, int n_G, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_U, int rs_U, int cs_U, int b_alg)
Definition: FLA_Tevd_v_opt_var1.c:380
FLA_Error FLA_Tevd_v_opd_var1(int m_A, int m_U, int n_G, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G, double *buff_U, int rs_U, int cs_U, int b_alg)
Definition: FLA_Tevd_v_opt_var1.c:144
FLA_Error FLA_Tevd_v_ops_var1(int m_A, int m_U, int n_G, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, float *buff_U, int rs_U, int cs_U, int b_alg)
Definition: FLA_Tevd_v_opt_var1.c:127
FLA_Error FLA_Tevd_v_opc_var1(int m_A, int m_U, int n_G, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_U, int rs_U, int cs_U, int b_alg)
Definition: FLA_Tevd_v_opt_var1.c:365
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition: FLA_Query.c:145
dim_t FLA_Obj_vector_dim(FLA_Obj obj)
Definition: FLA_Query.c:137
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
int FLA_Error
Definition: FLA_type_defs.h:47
int FLA_Datatype
Definition: FLA_type_defs.h:49
Definition: blis_type_defs.h:133

References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_dim(), FLA_Obj_vector_inc(), FLA_Obj_width(), FLA_Tevd_v_opc_var1(), FLA_Tevd_v_opd_var1(), FLA_Tevd_v_ops_var1(), and FLA_Tevd_v_opz_var1().

Referenced by FLA_Hevd_lv_unb_var1().

◆ FLA_Tevd_v_opz_var1()

FLA_Error FLA_Tevd_v_opz_var1 ( int  m_A,
int  m_U,
int  n_G,
int  n_iter_max,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_U,
int  rs_U,
int  cs_U,
int  b_alg 
)
389 {
390  dcomplex one = bl1_z1();
391 
392  dcomplex* G;
393  double* d1;
394  double* e1;
395  int r_val;
396  int done;
397  int m_G_sweep_max;
398  int ij_begin;
399  int ijTL, ijBR;
400  int m_A11;
401  int n_iter_perf;
402  int n_U_apply;
403  int total_deflations;
404  int n_deflations;
405  int n_iter_prev;
406  int n_iter_perf_sweep_max;
407 
408  // Initialize our completion flag.
409  done = FALSE;
410 
411  // Initialize a counter that holds the maximum number of rows of G
412  // that we would need to initialize for the next sweep.
413  m_G_sweep_max = m_A - 1;
414 
415  // Initialize a counter for the total number of iterations performed.
416  n_iter_prev = 0;
417 
418  // Iterate until the matrix has completely deflated.
419  for ( total_deflations = 0; done != TRUE; )
420  {
421 
422  // Initialize G to contain only identity rotations.
423  bl1_zsetm( m_G_sweep_max,
424  n_G,
425  &one,
426  buff_G, rs_G, cs_G );
427 
428  // Keep track of the maximum number of iterations performed in the
429  // current sweep. This is used when applying the sweep's Givens
430  // rotations.
431  n_iter_perf_sweep_max = 0;
432 
433  // Perform a sweep: Move through the matrix and perform a tridiagonal
434  // EVD on each non-zero submatrix that is encountered. During the
435  // first time through, ijTL will be 0 and ijBR will be m_A - 1.
436  for ( ij_begin = 0; ij_begin < m_A; )
437  {
438 
439 #ifdef PRINTF
440 if ( ij_begin == 0 )
441 printf( "FLA_Tevd_v_opz_var1: beginning new sweep (ij_begin = %d)\n", ij_begin );
442 #endif
443 
444  // Search for the first submatrix along the diagonal that is
445  // bounded by zeroes (or endpoints of the matrix). If no
446  // submatrix is found (ie: if the entire subdiagonal is zero
447  // then FLA_FAILURE is returned. This function also inspects
448  // subdiagonal elements for proximity to zero. If a given
449  // element is close enough to zero, then it is deemed
450  // converged and manually set to zero.
451  r_val = FLA_Tevd_find_submatrix_opd( m_A,
452  ij_begin,
453  buff_d, inc_d,
454  buff_e, inc_e,
455  &ijTL,
456  &ijBR );
457 
458  // Verify that a submatrix was found. If one was not found,
459  // then we are done with the current sweep. Furthermore, if
460  // a submatrix was not found AND we began our search at the
461  // beginning of the matrix (ie: ij_begin == 0), then the
462  // matrix has completely deflated and so we are done with
463  // Francis step iteration.
464  if ( r_val == FLA_FAILURE )
465  {
466  if ( ij_begin == 0 )
467  {
468 #ifdef PRINTF
469 printf( "FLA_Tevd_v_opz_var1: subdiagonal is completely zero.\n" );
470 printf( "FLA_Tevd_v_opz_var1: Francis iteration is done!\n" );
471 #endif
472  done = TRUE;
473  }
474 
475  // Break out of the current sweep so we can apply the last
476  // remaining Givens rotations.
477  break;
478  }
479 
480  // If we got this far, then:
481  // (a) ijTL refers to the index of the first non-zero
482  // subdiagonal along the diagonal, and
483  // (b) ijBR refers to either:
484  // - the first zero element that occurs after ijTL, or
485  // - the the last diagonal element.
486  // Note that ijTL and ijBR also correspond to the first and
487  // last diagonal elements of the submatrix of interest. Thus,
488  // we may compute the dimension of this submatrix as:
489  m_A11 = ijBR - ijTL + 1;
490 
491 #ifdef PRINTF
492 printf( "FLA_Tevd_v_opz_var1: ij_begin = %d\n", ij_begin );
493 printf( "FLA_Tevd_v_opz_var1: ijTL = %d\n", ijTL );
494 printf( "FLA_Tevd_v_opz_var1: ijBR = %d\n", ijBR );
495 printf( "FLA_Tevd_v_opz_var1: m_A11 = %d\n", m_A11 );
496 #endif
497 
498  // Adjust ij_begin, which gets us ready for the next submatrix
499  // search in the current sweep.
500  ij_begin = ijBR + 1;
501 
502  // Index to the submatrices upon which we will operate.
503  d1 = buff_d + ijTL * inc_d;
504  e1 = buff_e + ijTL * inc_e;
505  G = buff_G + ijTL * rs_G;
506 
507  // Search for a batch of eigenvalues, recursing on deflated
508  // subproblems whenever a split occurs. Iteration continues
509  // as long as:
510  // (a) there is still matrix left to operate on, and
511  // (b) the number of iterations performed in this batch is
512  // less than n_G.
513  // If/when either of the two above conditions fails to hold,
514  // the function returns.
515  n_deflations = FLA_Tevd_iteracc_v_opd_var1( m_A11,
516  n_G,
517  ijTL,
518  d1, inc_d,
519  e1, inc_e,
520  G, rs_G, cs_G,
521  &n_iter_perf );
522 
523  // Record the number of deflations that were observed.
524  total_deflations += n_deflations;
525 
526  // Update the maximum number of iterations performed in the
527  // current sweep.
528  n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );
529 
530 #ifdef PRINTF
531 printf( "FLA_Tevd_v_opz_var1: deflations observed = %d\n", n_deflations );
532 printf( "FLA_Tevd_v_opz_var1: total deflations observed = %d\n", total_deflations );
533 printf( "FLA_Tevd_v_opz_var1: num iterations performed = %d\n", n_iter_perf );
534 #endif
535 
536  // Store the most recent value of ijBR in m_G_sweep_max.
537  // When the sweep is done, this value will contain the minimum
538  // number of rows of G we can apply and safely include all
539  // non-identity rotations that were computed during the
540  // eigenvalue searches.
541  m_G_sweep_max = ijBR;
542 
543  // Make sure we haven't exceeded our maximum iteration count.
544  if ( n_iter_prev >= m_A * n_iter_max )
545  {
546 #ifdef PRINTF
547 printf( "FLA_Tevd_v_opz_var1: reached maximum total number of iterations: %d\n", n_iter_prev );
548 #endif
549  FLA_Abort();
550  //return FLA_FAILURE;
551  }
552  }
553 
554  // The sweep is complete. Now we must apply the Givens rotations
555  // that were accumulated during the sweep.
556 
557  // Recall that the number of columns of U to which we apply
558  // rotations is one more than the number of rotations.
559  n_U_apply = m_G_sweep_max + 1;
560 
561 #ifdef PRINTF
562 printf( "FLA_Tevd_v_opz_var1: applying %d sets of Givens rotations\n", n_iter_perf_sweep_max );
563 #endif
564 
565  // Apply the Givens rotations. Note that we optimize the scope
566  // of the operation in two ways:
567  // 1. We only apply k sets of Givens rotations, where
568  // k = n_iter_perf_sweep_max. We could simply always apply
569  // n_G sets of rotations since G is initialized to contain
570  // identity rotations in every element, but we do this to
571  // save a little bit of time.
572  // 2. We only apply to the first n_U_apply columns of A since
573  // this is the most we need to touch given the ijBR index
574  // bound of the last submatrix found in the previous sweep.
575  // Similar to above, we could simply always perform the
576  // application on all m_A columns of A, but instead we apply
577  // only to the first n_U_apply columns to save time.
578  //FLA_Apply_G_rf_blz_var5( n_iter_perf_sweep_max,
579  FLA_Apply_G_rf_blz_var3( n_iter_perf_sweep_max,
580  //FLA_Apply_G_rf_blz_var9( n_iter_perf_sweep_max,
581  //FLA_Apply_G_rf_blz_var6( n_iter_perf_sweep_max,
582  m_U,
583  n_U_apply,
584  buff_G, rs_G, cs_G,
585  buff_U, rs_U, cs_U,
586  b_alg );
587 
588  // Increment the total number of iterations previously performed.
589  n_iter_prev += n_iter_perf_sweep_max;
590 
591 #ifdef PRINTF
592 printf( "FLA_Tevd_v_opz_var1: total number of iterations performed: %d\n", n_iter_prev );
593 #endif
594  }
595 
596  return n_iter_prev;
597 }
FLA_Error FLA_Apply_G_rf_blz_var3(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var3.c:186

References bl1_z1(), bl1_zsetm(), FLA_Abort(), FLA_Apply_G_rf_blz_var3(), FLA_Tevd_find_submatrix_opd(), and FLA_Tevd_iteracc_v_opd_var1().

Referenced by FLA_Tevd_v_opt_var1().