libflame  revision_anchor
Functions
FLA_Tevd_n_opt_var1.c File Reference

(r)

Functions

FLA_Error FLA_Tevd_n_opt_var1 (dim_t n_iter_max, FLA_Obj d, FLA_Obj e, FLA_Obj G, FLA_Obj U)
 
FLA_Error FLA_Tevd_n_ops_var1 (int m_A, int m_U, int n_G, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G)
 
FLA_Error FLA_Tevd_n_opd_var1 (int m_A, int m_U, int n_G, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G)
 
FLA_Error FLA_Tevd_n_opc_var1 (int m_A, int m_U, int n_G, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G)
 
FLA_Error FLA_Tevd_n_opz_var1 (int m_A, int m_U, int n_G, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G)
 

Function Documentation

◆ FLA_Tevd_n_opc_var1()

FLA_Error FLA_Tevd_n_opc_var1 ( int  m_A,
int  m_U,
int  n_G,
int  n_iter_max,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G 
)
162 {
163  return FLA_SUCCESS;
164 }

Referenced by FLA_Tevd_n_opt_var1().

◆ FLA_Tevd_n_opd_var1()

FLA_Error FLA_Tevd_n_opd_var1 ( int  m_A,
int  m_U,
int  n_G,
int  n_iter_max,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G 
)
151 {
152  return FLA_SUCCESS;
153 }

Referenced by FLA_Tevd_n_opt_var1().

◆ FLA_Tevd_n_ops_var1()

FLA_Error FLA_Tevd_n_ops_var1 ( int  m_A,
int  m_U,
int  n_G,
int  n_iter_max,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
scomplex buff_G,
int  rs_G,
int  cs_G 
)
138 {
139  return FLA_SUCCESS;
140 }

Referenced by FLA_Tevd_n_opt_var1().

◆ FLA_Tevd_n_opt_var1()

FLA_Error FLA_Tevd_n_opt_var1 ( dim_t  n_iter_max,
FLA_Obj  d,
FLA_Obj  e,
FLA_Obj  G,
FLA_Obj  U 
)
14 {
15  FLA_Error r_val = FLA_SUCCESS;
16  FLA_Datatype datatype;
17  int m_A, m_U, n_G;
18  int inc_d;
19  int inc_e;
20  int rs_G, cs_G;
21 
22  datatype = FLA_Obj_datatype( U );
23 
24  m_A = FLA_Obj_vector_dim( d );
25  m_U = FLA_Obj_vector_dim( d );
26  n_G = FLA_Obj_width( G );
27 
28  inc_d = FLA_Obj_vector_inc( d );
29  inc_e = FLA_Obj_vector_inc( e );
30 
31  rs_G = FLA_Obj_row_stride( G );
32  cs_G = FLA_Obj_col_stride( G );
33 
34 /*
35 FLA_Obj de, deL, deR, deLT, deLB;
36 FLA_Obj_create( FLA_DOUBLE, m_A, 2, 0, 0, &de );
37 FLA_Part_1x2( de, &deL, &deR, 1, FLA_LEFT );
38 FLA_Part_2x1( deL, &deLT,
39  &deLB, 1, FLA_BOTTOM );
40 FLA_Copy( d, deR );
41 FLA_Copy( e, deLT );
42 FLA_Set( FLA_ZERO, deLB );
43 //FLA_Obj_show( "de", de, "%21.17e", "" );
44 */
45 
46  switch ( datatype )
47  {
48  case FLA_FLOAT:
49  {
50  float* buff_d = FLA_FLOAT_PTR( d );
51  float* buff_e = FLA_FLOAT_PTR( e );
52  scomplex* buff_G = FLA_COMPLEX_PTR( G );
53 
54  r_val = FLA_Tevd_n_ops_var1( m_A,
55  m_U,
56  n_G,
57  n_iter_max,
58  buff_d, inc_d,
59  buff_e, inc_e,
60  buff_G, rs_G, cs_G );
61 
62  break;
63  }
64 
65  case FLA_DOUBLE:
66  {
67  double* buff_d = FLA_DOUBLE_PTR( d );
68  double* buff_e = FLA_DOUBLE_PTR( e );
69  dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
70 
71  r_val = FLA_Tevd_n_opd_var1( m_A,
72  m_U,
73  n_G,
74  n_iter_max,
75  buff_d, inc_d,
76  buff_e, inc_e,
77  buff_G, rs_G, cs_G );
78 
79  break;
80  }
81 
82  case FLA_COMPLEX:
83  {
84  float* buff_d = FLA_FLOAT_PTR( d );
85  float* buff_e = FLA_FLOAT_PTR( e );
86  scomplex* buff_G = FLA_COMPLEX_PTR( G );
87 
88  r_val = FLA_Tevd_n_opc_var1( m_A,
89  m_U,
90  n_G,
91  n_iter_max,
92  buff_d, inc_d,
93  buff_e, inc_e,
94  buff_G, rs_G, cs_G );
95 
96  break;
97  }
98 
99  case FLA_DOUBLE_COMPLEX:
100  {
101  double* buff_d = FLA_DOUBLE_PTR( d );
102  double* buff_e = FLA_DOUBLE_PTR( e );
103  dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G );
104 
105  r_val = FLA_Tevd_n_opz_var1( m_A,
106  m_U,
107  n_G,
108  n_iter_max,
109  buff_d, inc_d,
110  buff_e, inc_e,
111  buff_G, rs_G, cs_G );
112 
113  break;
114  }
115  }
116 /*
117 FLA_Copy( d, deR );
118 FLA_Copy( e, deLT );
119 FLA_Set( FLA_ZERO, deLB );
120 FLA_Sort( FLA_FORWARD, deR );
121 FLA_Obj_show( "de after", de, "%21.17e", "" );
122 double eps = FLA_Mach_params_opd( FLA_MACH_EPS );
123 printf( "epsilon = %21.17e\n", eps );
124 FLA_Obj_free( &de );
125 */
126  return r_val;
127 }
FLA_Error FLA_Tevd_n_opd_var1(int m_A, int m_U, int n_G, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G)
Definition: FLA_Tevd_n_opt_var1.c:144
FLA_Error FLA_Tevd_n_opc_var1(int m_A, int m_U, int n_G, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G)
Definition: FLA_Tevd_n_opt_var1.c:155
FLA_Error FLA_Tevd_n_opz_var1(int m_A, int m_U, int n_G, int n_iter_max, double *buff_d, int inc_d, double *buff_e, int inc_e, dcomplex *buff_G, int rs_G, int cs_G)
Definition: FLA_Tevd_n_opt_var1.c:168
FLA_Error FLA_Tevd_n_ops_var1(int m_A, int m_U, int n_G, int n_iter_max, float *buff_d, int inc_d, float *buff_e, int inc_e, scomplex *buff_G, int rs_G, int cs_G)
Definition: FLA_Tevd_n_opt_var1.c:131
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition: FLA_Query.c:145
dim_t FLA_Obj_vector_dim(FLA_Obj obj)
Definition: FLA_Query.c:137
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
int FLA_Error
Definition: FLA_type_defs.h:47
int FLA_Datatype
Definition: FLA_type_defs.h:49
Definition: blis_type_defs.h:138
Definition: blis_type_defs.h:133

References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_row_stride(), FLA_Obj_vector_dim(), FLA_Obj_vector_inc(), FLA_Obj_width(), FLA_Tevd_n_opc_var1(), FLA_Tevd_n_opd_var1(), FLA_Tevd_n_ops_var1(), and FLA_Tevd_n_opz_var1().

◆ FLA_Tevd_n_opz_var1()

FLA_Error FLA_Tevd_n_opz_var1 ( int  m_A,
int  m_U,
int  n_G,
int  n_iter_max,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
dcomplex buff_G,
int  rs_G,
int  cs_G 
)
175 {
176  dcomplex one = bl1_z1();
177  double rone = bl1_d1();
178 
179  double eps;
180  double eps2;
181  double safmin;
182  double ssfmin;
183  double safmax;
184  double ssfmax;
185 
186  dcomplex* G;
187  double* d1;
188  double* e1;
189  int r_val;
190  int done;
191  int m_G_sweep_max;
192  int ij_begin;
193  int ijTL, ijBR;
194  int m_A11;
195  int n_iter_perf;
196  int total_deflations;
197  int n_deflations;
198  int n_iter_prev;
199  int n_iter_perf_sweep_max;
200 
201  // Initialize some numerical constants.
202  eps = FLA_Mach_params_opd( FLA_MACH_EPS );
203  eps2 = FLA_Mach_params_opd( FLA_MACH_EPS2 );
204  safmin = FLA_Mach_params_opd( FLA_MACH_SFMIN );
205  safmax = rone / safmin;
206  ssfmax = sqrt( safmax ) / 3.0;
207  ssfmin = sqrt( safmin ) / eps2;
208 
209  // Initialize our completion flag.
210  done = FALSE;
211 
212  // Initialize a counter that holds the maximum number of rows of G
213  // that we would need to initialize for the next sweep.
214  m_G_sweep_max = m_A - 1;
215 
216  // Initialize a counter for the total number of iterations performed.
217  n_iter_prev = 0;
218 
219  // Iterate until the matrix has completely deflated.
220  for ( total_deflations = 0; done != TRUE; )
221  {
222 
223  // Initialize G to contain only identity rotations.
224  bl1_zsetm( m_G_sweep_max,
225  n_G,
226  &one,
227  buff_G, rs_G, cs_G );
228 
229  // Keep track of the maximum number of iterations performed in the
230  // current sweep. This is used when applying the sweep's Givens
231  // rotations.
232  n_iter_perf_sweep_max = 0;
233 
234  // Perform a sweep: Move through the matrix and perform a tridiagonal
235  // EVD on each non-zero submatrix that is encountered. During the
236  // first time through, ijTL will be 0 and ijBR will be m_A - 1.
237  for ( ij_begin = 0; ij_begin < m_A; )
238  {
239 
240 #ifdef PRINTF
241 if ( ij_begin == 0 )
242 printf( "FLA_Tevd_n_opz_var1: beginning new sweep (ij_begin = %d)\n", ij_begin );
243 #endif
244 
245  // Search for the first submatrix along the diagonal that is
246  // bounded by zeroes (or endpoints of the matrix). If no
247  // submatrix is found (ie: if the entire subdiagonal is zero
248  // then FLA_FAILURE is returned. This function also inspects
249  // subdiagonal elements for proximity to zero. If a given
250  // element is close enough to zero, then it is deemed
251  // converged and manually set to zero.
252  r_val = FLA_Tevd_find_submatrix_opd( m_A,
253  ij_begin,
254  buff_d, inc_d,
255  buff_e, inc_e,
256  &ijTL,
257  &ijBR );
258 
259  // Verify that a submatrix was found. If one was not found,
260  // then we are done with the current sweep. Furthermore, if
261  // a submatrix was not found AND we began our search at the
262  // beginning of the matrix (ie: ij_begin == 0), then the
263  // matrix has completely deflated and so we are done with
264  // Francis step iteration.
265  if ( r_val == FLA_FAILURE )
266  {
267  if ( ij_begin == 0 )
268  {
269 #ifdef PRINTF
270 printf( "FLA_Tevd_n_opz_var1: subdiagonal is completely zero.\n" );
271 printf( "FLA_Tevd_n_opz_var1: Francis iteration is done!\n" );
272 #endif
273  done = TRUE;
274  }
275 
276  // Break out of the current sweep so we can apply the last
277  // remaining Givens rotations.
278  break;
279  }
280 
281  // If we got this far, then:
282  // (a) ijTL refers to the index of the first non-zero
283  // subdiagonal along the diagonal, and
284  // (b) ijBR refers to either:
285  // - the first zero element that occurs after ijTL, or
286  // - the the last diagonal element.
287  // Note that ijTL and ijBR also correspond to the first and
288  // last diagonal elements of the submatrix of interest. Thus,
289  // we may compute the dimension of this submatrix as:
290  m_A11 = ijBR - ijTL + 1;
291 
292 #ifdef PRINTF
293 printf( "FLA_Tevd_n_opz_var1: ij_begin = %d\n", ij_begin );
294 printf( "FLA_Tevd_n_opz_var1: ijTL = %d\n", ijTL );
295 printf( "FLA_Tevd_n_opz_var1: ijBR = %d\n", ijBR );
296 printf( "FLA_Tevd_n_opz_var1: m_A11 = %d\n", m_A11 );
297 #endif
298 
299  // Adjust ij_begin, which gets us ready for the next submatrix
300  // search in the current sweep.
301  ij_begin = ijBR + 1;
302 
303  // Index to the submatrices upon which we will operate.
304  d1 = buff_d + ijTL * inc_d;
305  e1 = buff_e + ijTL * inc_e;
306  G = buff_G + ijTL * rs_G;
307 
308  // Compute the 1-norm (which equals the infinity norm since the
309  // matrix is tridiagonal and symmetric) and perform appropriate
310  // scaling if necessary.
311 /*
312  FLA_Norm1_tridiag( m_A11,
313  d1, inc_d,
314  e1, inc_e,
315  &norm );
316 */
317 
318  // Search for a batch of eigenvalues, recursing on deflated
319  // subproblems whenever a split occurs. Iteration continues
320  // as long as:
321  // (a) there is still matrix left to operate on, and
322  // (b) the number of iterations performed in this batch is
323  // less than n_G.
324  // If/when either of the two above conditions fails to hold,
325  // the function returns.
326  n_deflations = FLA_Tevd_iteracc_n_opd_var1( m_A11,
327  n_G,
328  ijTL,
329  d1, inc_d,
330  e1, inc_e,
331  &n_iter_perf );
332 
333  // Record the number of deflations that were observed.
334  total_deflations += n_deflations;
335 
336  // Update the maximum number of iterations performed in the
337  // current sweep.
338  n_iter_perf_sweep_max = max( n_iter_perf_sweep_max, n_iter_perf );
339 
340 #ifdef PRINTF
341 printf( "FLA_Tevd_n_opz_var1: deflations observed = %d\n", n_deflations );
342 printf( "FLA_Tevd_n_opz_var1: total deflations observed = %d\n", total_deflations );
343 printf( "FLA_Tevd_n_opz_var1: num iterations performed = %d\n", n_iter_perf );
344 #endif
345 
346  // Store the most recent value of ijBR in m_G_sweep_max.
347  // When the sweep is done, this value will contain the minimum
348  // number of rows of G we can apply and safely include all
349  // non-identity rotations that were computed during the
350  // eigenvalue searches.
351  m_G_sweep_max = ijBR;
352 
353  // Make sure we haven't exceeded our maximum iteration count.
354  if ( n_iter_prev >= m_A * n_iter_max )
355  {
356 #ifdef PRINTF
357 printf( "FLA_Tevd_n_opz_var1: reached maximum total number of iterations: %d\n", n_iter_prev );
358 #endif
359  FLA_Abort();
360  //return FLA_FAILURE;
361  }
362  }
363 
364  // The sweep is complete.
365 
366  // Increment the total number of iterations previously performed.
367  n_iter_prev += n_iter_perf_sweep_max;
368 
369 #ifdef PRINTF
370 printf( "FLA_Tevd_n_opz_var1: total number of iterations performed: %d\n", n_iter_prev );
371 #endif
372  }
373 
374  //return FLA_SUCCESS;
375  return n_iter_prev;
376 }
FLA_Error FLA_Tevd_iteracc_n_opd_var1(int m_A, int n_G, int ijTL, double *buff_d, int inc_d, double *buff_e, int inc_e, int *n_iter_perf)
Definition: FLA_Tevd_iteracc_n_opt_var1.c:25
FLA_Error FLA_Tevd_find_submatrix_opd(int m_A, int ij_begin, double *buff_d, int inc_d, double *buff_e, int inc_e, int *ijTL, int *ijBR)
Definition: FLA_Tevd_find_submatrix.c:28
void FLA_Abort(void)
Definition: FLA_Error.c:248
double FLA_Mach_params_opd(FLA_Machval machval)
Definition: FLA_Mach_params.c:74
dcomplex bl1_z1(void)
Definition: bl1_constants.c:69
double bl1_d1(void)
Definition: bl1_constants.c:54
void bl1_zsetm(int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:78

References bl1_d1(), bl1_z1(), bl1_zsetm(), FLA_Abort(), FLA_Mach_params_opd(), FLA_Tevd_find_submatrix_opd(), and FLA_Tevd_iteracc_n_opd_var1().

Referenced by FLA_Tevd_n_opt_var1().