libflame  revision_anchor
Functions
FLA_Tevd_iteracc_v_opt_var3.c File Reference

(r)

Functions

FLA_Error FLA_Tevd_iteracc_v_ops_var3 (int m_A, int m_U, int n_G, int ijTL, float *buff_d, int inc_d, float *buff_e, int inc_e, float *buff_l, int inc_l, int *buff_ls, int inc_ls, float *buff_pu, int inc_pu, scomplex *buff_G, int rs_G, int cs_G, int *n_iter_perf)
 
FLA_Error FLA_Tevd_iteracc_v_opd_var3 (int m_A, int m_U, int n_G, int ijTL, double *buff_d, int inc_d, double *buff_e, int inc_e, double *buff_l, int inc_l, int *buff_ls, int inc_ls, double *buff_pu, int inc_pu, dcomplex *buff_G, int rs_G, int cs_G, int *n_iter_perf)
 

Function Documentation

◆ FLA_Tevd_iteracc_v_opd_var3()

FLA_Error FLA_Tevd_iteracc_v_opd_var3 ( int  m_A,
int  m_U,
int  n_G,
int  ijTL,
double *  buff_d,
int  inc_d,
double *  buff_e,
int  inc_e,
double *  buff_l,
int  inc_l,
int *  buff_ls,
int  inc_ls,
double *  buff_pu,
int  inc_pu,
dcomplex buff_G,
int  rs_G,
int  cs_G,
int *  n_iter_perf 
)
41 {
42  FLA_Error r_val;
43  int i, k;
44  int k_iter = 0;
45  int n_deflations = 0;
46  //double pshift;
47  //double eps = FLA_Mach_params_opd( FLA_MACH_EPS );
48  //double safmin = FLA_Mach_params_opd( FLA_MACH_SFMIN );
49 
50  // Iterate from back to front until all that is left is a 2x2.
51  for ( i = m_A - 1; i > 1; --i )
52  {
53  dcomplex* G1 = buff_G + (k_iter)*cs_G;
54  int m_ATL = i + 1;
55  int k_left = n_G - k_iter;
56 
57  /*------------------------------------------------------------*/
58 
59  // Search for an eigenvalue of ATL submatrix until
60  // (a) deflation occurs, or
61  // (b) we perform the maximum number of additional iterations
62  // that are allowed within the current sweep
63  // (ie: n_G - k_iter).
64 
65  r_val = FLA_Tevd_eigval_v_opd_var3( m_ATL,
66  m_U,
67  k_left,
68  //1,
69  G1, rs_G, cs_G,
70  buff_d, inc_d,
71  buff_e, inc_e,
72  buff_l, inc_l,
73  buff_ls, inc_ls,
74  buff_pu, inc_pu,
75  &k );
76 /*
77  FLA_Tevd_find_perfshift_opd( m_ATL,
78  m_U,
79  buff_d, inc_d,
80  buff_e, inc_e,
81  buff_l, inc_l,
82  buff_ls, inc_ls,
83  buff_pu, inc_pu,
84  &pshift );
85 
86  for ( k = 0; k < k_left; ++k )
87  {
88  // Mark the shift as used.
89  //pshift = *(buff_l + ij_shift * inc_l);
90  //*(buff_ls + ij_shift * inc_ls) = 0;
91  //printf( "using pshift %22.15e\n", pshift );
92  //printf( "using pshift %f\n", pshift );
93 
94  r_val = FLA_Tevd_francis_v_opd_var1( m_ATL,
95  &pshift,
96  g1, rs_G,
97  buff_d, inc_d,
98  buff_e, inc_e );
99  g1 += cs_G;
100 
101  // Check for internal deflation.
102  if ( r_val != FLA_SUCCESS )
103  {
104 //#ifdef PRINTF
105 // printf( "FLA_Tevd_eigval_v_opt_var1: Internal deflation in col %d, eig %d\n", r_val, m_A - 1 );
106 // printf( "FLA_Tevd_eigval_v_opt_var1: alpha11 = %23.19e\n", buff_d[r_val*inc_d] );
107 // printf( "FLA_Tevd_eigval_v_opt_var1: alpha21 alpha22 = %23.19e %23.19e\n", buff_e[r_val*inc_e], buff_d[(r_val+1)*inc_d] );
108 //#endif
109 
110  //printf( "found internal deflation in column %d\n", r_val );
111  // Set the off-diagonal element to zero.
112  buff_e[ r_val*inc_e ] = 0.0;
113  break;
114  }
115  else
116  {
117  double e_last = buff_e[ (m_ATL-2)*inc_e ];
118  double d_last_m1 = buff_d[ (m_ATL-2)*inc_d ];
119  double d_last = buff_d[ (m_ATL-1)*inc_d ];
120  r_val = i;
121 
122  if ( MAC_Tevd_eigval_converged_opd( eps, safmin, d_last_m1, e_last, d_last ) )
123  {
124  //printf( "zeroing %22.15e\n", buff_e[ (m_ATL-2)*inc_e ] );
125  buff_e[ (m_ATL-2)*inc_e ] = 0.0;
126  break;
127  }
128  }
129  }
130 */
131 
132  // If the eigenvalue search did not result in any deflation, return.
133  if ( r_val == FLA_FAILURE && k_iter == n_G )
134  {
135 #ifdef PRINTF
136  printf( "FLA_Tevd_iteracc_v_opd_var1: failed to converge (m_A11 = %d) after %2d iters k_total=%d/%d\n", i, k, k_iter, n_G );
137 #endif
138  *n_iter_perf = k_iter;
139  return n_deflations;
140  }
141 
142  // Update local counters according to the results of the eigenvalue
143  // search.
144  k_iter += k;
145  n_deflations += 1;
146 
147 
148 #ifdef PRINTF
149  if ( r_val == i )
150  printf( "FLA_Tevd_iteracc_v_opd_var3: found eig %22.15e in col %3d (n=%d) after %2d iters k_total=%d/%d\n", buff_d[ r_val*inc_d ], ijTL+r_val, m_ATL, k, k_iter, n_G );
151  else
152  printf( "FLA_Tevd_iteracc_v_opd_var3: split occurred in col %3d (n=%d) after %2d iters k_total=%d/%d\n", ijTL+r_val, m_ATL, k, k_iter, n_G );
153 #endif
154 
155  // If the most recent eigenvalue search put us at our limit
156  // for accumulated Givens rotation sets, return.
157  if ( k_iter == n_G )
158  {
159  *n_iter_perf = k_iter;
160  return n_deflations;
161  }
162 
163 
164  // If r_val != i, then a split occurred somewhere within submatrix
165  // ATL. Therefore, we must recurse with two subproblems.
166  if ( r_val != i )
167  {
168  int m_TLr = r_val + 1;
169  int m_BRr = m_ATL - m_TLr;
170  int ijTLr = 0;
171  int ijBRr = m_TLr;
172  int n_Gr = n_G - k_iter;
173  double* dTL = buff_d + (0 )*inc_d;
174  double* eTL = buff_e + (0 )*inc_e;
175  double* puTL = buff_pu+ (0 )*inc_pu;
176  dcomplex* GT = buff_G + (0 )*rs_G + (k_iter)*cs_G;
177  double* dBR = buff_d + (ijBRr)*inc_d;
178  double* eBR = buff_e + (ijBRr)*inc_e;
179  double* puBR = buff_pu+ (ijBRr)*inc_pu;
180  dcomplex* GB = buff_G + (ijBRr)*rs_G + (k_iter)*cs_G;
181 
182  int n_deflationsTL;
183  int n_deflationsBR;
184  int n_iter_perfTL;
185  int n_iter_perfBR;
186 
187 #ifdef PRINTF
188 printf( "FLA_Tevd_iteracc_v_opd_var3: Internal deflation in col %d\n", ijTL+r_val );
189 printf( "FLA_Tevd_iteracc_v_opd_var3: alpha11 = %23.19e\n", buff_d[r_val*inc_d] );
190 printf( "FLA_Tevd_iteracc_v_opd_var3: alpha21 alpha22 = %23.19e %23.19e\n", buff_e[r_val*inc_e], buff_d[(r_val+1)*inc_d] );
191 #endif
192 #ifdef PRINTF
193 printf( "FLA_Tevd_iteracc_v_opd_var3: recursing: m_TLr m_BRr: %d %d\n", m_TLr, m_BRr );
194 printf( "FLA_Tevd_iteracc_v_opd_var3: ijTLr ijBRr: %d %d\n", ijTLr, ijBRr );
195 printf( "FLA_Tevd_iteracc_v_opd_var3: GB(0,0) i,j: %d %d\n", ijTL + m_TLr+1, k_iter );
196 #endif
197  n_deflationsTL = FLA_Tevd_iteracc_v_opd_var3( m_TLr,
198  m_U,
199  n_Gr,
200  ijTL + ijTLr,
201  dTL, inc_d,
202  eTL, inc_e,
203  buff_l, inc_l,
204  buff_ls, inc_ls,
205  puTL, inc_pu,
206  GT, rs_G, cs_G,
207  &n_iter_perfTL );
208  n_deflationsBR = FLA_Tevd_iteracc_v_opd_var3( m_BRr,
209  m_U,
210  n_Gr,
211  ijTL + ijBRr,
212  dBR, inc_d,
213  eBR, inc_e,
214  buff_l, inc_l,
215  buff_ls, inc_ls,
216  puBR, inc_pu,
217  GB, rs_G, cs_G,
218  &n_iter_perfBR );
219 
220  *n_iter_perf = k_iter + max( n_iter_perfTL, n_iter_perfBR );
221 
222 #ifdef PRINTF
223 printf( "FLA_Tevd_iteracc_v_opd_var3: num deflations: %d = (prev:%d, TL:%d, BR:%d)\n", n_deflations + n_deflationsTL + n_deflationsBR, n_deflations, n_deflationsTL, n_deflationsBR );
224 printf( "FLA_Tevd_iteracc_v_opd_var3: num iterations: %d = (prev:%d, TL:%d, BR:%d)\n", *n_iter_perf, k_iter, n_iter_perfTL, n_iter_perfBR );
225 #endif
226  return n_deflations + n_deflationsTL + n_deflationsBR;
227  }
228 
229  /*------------------------------------------------------------*/
230  }
231 
232  // Skip 1x1 matrices (and submatrices) entirely.
233  if ( m_A > 1 )
234  {
235  dcomplex* g1 = buff_G + (k_iter)*cs_G;
236 
237  double* alpha11 = buff_d + (0 )*inc_d;
238  double* alpha21 = buff_e + (0 )*inc_e;
239  double* alpha22 = buff_d + (1 )*inc_d;
240  double lambda1;
241  double lambda2;
242 
243  double gamma;
244  double sigma;
245 
246  // Find the eigenvalue decomposition of the remaining (or only) 2x2
247  // submatrix.
248  FLA_Hevv_2x2_opd( alpha11,
249  alpha21,
250  alpha22,
251  &lambda1,
252  &lambda2,
253  &gamma,
254  &sigma );
255 
256  // Store the eigenvalues.
257  *alpha11 = lambda1;
258  *alpha22 = lambda2;
259 
260  // Zero out the remaining subdiagonal element.
261  *alpha21 = 0.0;
262 
263  // Store the rotation.
264  g1[0].real = gamma;
265  g1[0].imag = sigma;
266 
267 
268  // Update the local counters.
269  k_iter += 1;
270  n_deflations += 1;
271 
272 #ifdef PRINTF
273 printf( "FLA_Tevd_iteracc_v_opd_var3: Hevv eig %22.15e in col %3d (n=%d) after %2d iters k_total=%d/%d\n", buff_d[ 1*inc_d ], ijTL+1, 2, 1, k_iter, n_G );
274 printf( "FLA_Tevd_iteracc_v_opd_var3: Hevv eig %22.15e in col %3d (n=%d) after %2d iters k_total=%d/%d\n", buff_d[ 0*inc_d ], ijTL+0, 2, 0, k_iter, n_G );
275 #endif
276  }
277 
278 
279  *n_iter_perf = k_iter;
280  return n_deflations;
281 }
FLA_Error FLA_Tevd_eigval_v_opd_var3(int m_A, int m_U, int n_G, dcomplex *buff_G, int rs_G, int cs_G, double *buff_d, int inc_d, double *buff_e, int inc_e, double *buff_l, int inc_l, int *buff_ls, int inc_ls, double *buff_pu, int inc_pu, int *n_iter)
Definition: FLA_Tevd_eigval_v_opt_var3.c:30
FLA_Error FLA_Tevd_iteracc_v_opd_var3(int m_A, int m_U, int n_G, int ijTL, double *buff_d, int inc_d, double *buff_e, int inc_e, double *buff_l, int inc_l, int *buff_ls, int inc_ls, double *buff_pu, int inc_pu, dcomplex *buff_G, int rs_G, int cs_G, int *n_iter_perf)
Definition: FLA_Tevd_iteracc_v_opt_var3.c:30
int FLA_Error
Definition: FLA_type_defs.h:47
FLA_Error FLA_Hevv_2x2_opd(double *alpha11, double *alpha21, double *alpha22, double *lambda1, double *lambda2, double *gamma1, double *sigma1)
Definition: FLA_Hevv_2x2.c:249
int i
Definition: bl1_axmyv2.c:145
Definition: blis_type_defs.h:138
double real
Definition: blis_type_defs.h:139
double imag
Definition: blis_type_defs.h:139

References FLA_Hevv_2x2_opd(), FLA_Tevd_eigval_v_opd_var3(), FLA_Tevd_iteracc_v_opd_var3(), i, dcomplex::imag, and dcomplex::real.

Referenced by FLA_Tevd_iteracc_v_opd_var3().

◆ FLA_Tevd_iteracc_v_ops_var3()

FLA_Error FLA_Tevd_iteracc_v_ops_var3 ( int  m_A,
int  m_U,
int  n_G,
int  ijTL,
float *  buff_d,
int  inc_d,
float *  buff_e,
int  inc_e,
float *  buff_l,
int  inc_l,
int *  buff_ls,
int  inc_ls,
float *  buff_pu,
int  inc_pu,
scomplex buff_G,
int  rs_G,
int  cs_G,
int *  n_iter_perf 
)
24 {
25  return FLA_SUCCESS;
26 }