libflame  revision_anchor
Functions
FLA_Tridiag_UT_l.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLA_Tridiag_UT_l_blk_var1 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_unb_var1 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_unb_var1 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_blk_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_blf_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_unb_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_unb_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_blk_var3 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_blf_var3 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_unb_var3 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_unb_var3 (FLA_Obj A, FLA_Obj Z, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_opt_var1 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_opt_var1 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_ops_var1 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_opd_var1 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_opc_var1 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_opz_var1 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_opt_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_opt_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_ops_var2 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_opd_var2 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_opc_var2 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_opz_var2 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_opt_var3 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_opt_var3 (FLA_Obj A, FLA_Obj Z, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_ops_var3 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_opd_var3 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_opc_var3 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_opz_var3 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_ofu_var1 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofu_var1 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofs_var1 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofd_var1 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofc_var1 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofz_var1 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_ofu_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofu_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofs_var2 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofd_var2 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofc_var2 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofz_var2 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_ofu_var3 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofu_var3 (FLA_Obj A, FLA_Obj Z, FLA_Obj T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofs_var3 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofd_var3 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofc_var3 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Tridiag_UT_l_step_ofz_var3 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Fused_Her2_Ax_l_opt_var1 (FLA_Obj alpha, FLA_Obj u, FLA_Obj z, FLA_Obj A, FLA_Obj x, FLA_Obj w)
 
FLA_Error FLA_Fused_Her2_Ax_l_ops_var1 (int m_A, float *buff_alpha, float *buff_u, int inc_u, float *buff_z, int inc_z, float *buff_A, int rs_A, int cs_A, float *buff_x, int inc_x, float *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Her2_Ax_l_opd_var1 (int m_A, double *buff_alpha, double *buff_u, int inc_u, double *buff_z, int inc_z, double *buff_A, int rs_A, int cs_A, double *buff_x, int inc_x, double *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Her2_Ax_l_opc_var1 (int m_A, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_z, int inc_z, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_x, int inc_x, scomplex *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Her2_Ax_l_opz_var1 (int m_A, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_z, int inc_z, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_x, int inc_x, dcomplex *buff_w, int inc_w)
 
FLA_Error FLA_Fused_UZhu_ZUhu_opt_var1 (FLA_Obj delta, FLA_Obj U, FLA_Obj Z, FLA_Obj t, FLA_Obj u, FLA_Obj w)
 
FLA_Error FLA_Fused_UZhu_ZUhu_ops_var1 (int m_U, int n_U, float *buff_delta, float *buff_U, int rs_U, int cs_U, float *buff_Z, int rs_Z, int cs_Z, float *buff_t, int inc_t, float *buff_u, int inc_u, float *buff_w, int inc_w)
 
FLA_Error FLA_Fused_UZhu_ZUhu_opd_var1 (int m_U, int n_U, double *buff_delta, double *buff_U, int rs_U, int cs_U, double *buff_Z, int rs_Z, int cs_Z, double *buff_t, int inc_t, double *buff_u, int inc_u, double *buff_w, int inc_w)
 
FLA_Error FLA_Fused_UZhu_ZUhu_opc_var1 (int m_U, int n_U, scomplex *buff_delta, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_t, int inc_t, scomplex *buff_u, int inc_u, scomplex *buff_w, int inc_w)
 
FLA_Error FLA_Fused_UZhu_ZUhu_opz_var1 (int m_U, int n_U, dcomplex *buff_delta, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_t, int inc_t, dcomplex *buff_u, int inc_u, dcomplex *buff_w, int inc_w)
 

Function Documentation

◆ FLA_Fused_Her2_Ax_l_opc_var1()

FLA_Error FLA_Fused_Her2_Ax_l_opc_var1 ( int  m_A,
scomplex buff_alpha,
scomplex buff_u,
int  inc_u,
scomplex buff_z,
int  inc_z,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_x,
int  inc_x,
scomplex buff_w,
int  inc_w 
)
336 {
337  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
338  int i;
339 
340  bl1_csetv( m_A,
341  buff_0,
342  buff_w, inc_w );
343 
344  for ( i = 0; i < m_A; ++i )
345  {
346  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
347  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
348 
349  scomplex* upsilon1 = buff_u + (i )*inc_u;
350  scomplex* u2 = buff_u + (i+1)*inc_u;
351 
352  scomplex* zeta1 = buff_z + (i )*inc_z;
353  scomplex* z2 = buff_z + (i+1)*inc_z;
354 
355  scomplex* chi1 = buff_x + (i )*inc_x;
356  scomplex* x2 = buff_x + (i+1)*inc_x;
357 
358  scomplex* omega1 = buff_w + (i )*inc_w;
359  scomplex* w2 = buff_w + (i+1)*inc_w;
360 
361  // scomplex* beta = buff_beta;
362 
363  scomplex minus_conj_upsilon1;
364  scomplex minus_conj_zeta1;
365  scomplex temp;
366 
367  int m_ahead = m_A - i - 1;
368 
369  /*------------------------------------------------------------*/
370 
371  // bl1_ccopyconj( zeta1, &conj_zeta1 );
372  // bl1_cmult3( beta, &conj_zeta1, &minus_conj_zeta1 );
373  // bl1_cmult3( &minus_conj_zeta1, upsilon1, &temp );
374  // bl1_cadd3( &temp, alpha11, alpha11 );
375 
376  //bl1_ccopyconj( upsilon1, &conj_upsilon1 );
377  //bl1_cmult3( beta, &conj_upsilon1, &minus_conj_upsilon1 );
378  //bl1_cmult3( &minus_conj_upsilon1, zeta1, &temp );
379  //bl1_cadd3( &temp, alpha11, alpha11 );
380  minus_conj_zeta1.real = - zeta1->real;
381  minus_conj_zeta1.imag = - -zeta1->imag;
382  minus_conj_upsilon1.real = - upsilon1->real;
383  minus_conj_upsilon1.imag = - -upsilon1->imag;
384 
385  alpha11->real -= zeta1->real * upsilon1->real - -zeta1->imag * upsilon1->imag +
386  zeta1->real * upsilon1->real - zeta1->imag * -upsilon1->imag;
387  alpha11->imag -= -zeta1->imag * upsilon1->real + zeta1->real * upsilon1->imag +
388  zeta1->imag * upsilon1->real + zeta1->real * -upsilon1->imag;
389 
391  m_ahead,
392  &minus_conj_zeta1,
393  u2, inc_u,
394  a21, rs_A );
395 /*
396  F77_caxpy( &m_ahead,
397  &minus_conj_zeta1,
398  u2, &inc_u,
399  a21, &rs_A );
400 */
401 
402 
404  m_ahead,
405  &minus_conj_upsilon1,
406  z2, inc_z,
407  a21, rs_A );
408 /*
409  F77_caxpy( &m_ahead,
410  &minus_conj_upsilon1,
411  z2, &inc_z,
412  a21, &rs_A );
413 */
414 
415  // bl1_cmult3( alpha11, chi1, &temp );
416  // bl1_cadd3( &temp, omega1, omega1 );
417  omega1->real += alpha11->real * chi1->real - alpha11->imag * chi1->imag;
418  omega1->imag += alpha11->imag * chi1->real + alpha11->real * chi1->imag;
419 
421  m_ahead,
422  a21, rs_A,
423  x2, inc_x,
424  &temp );
425  // bl1_cadd3( &temp, omega1, omega1 );
426  omega1->real += temp.real;
427  omega1->imag += temp.imag;
428 
430  m_ahead,
431  chi1,
432  a21, rs_A,
433  w2, inc_w );
434 /*
435  F77_caxpy( &m_ahead,
436  chi1,
437  a21, &rs_A,
438  w2, &inc_w );
439 */
440 
441  /*------------------------------------------------------------*/
442 
443  }
444 
445  return FLA_SUCCESS;
446 }
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
double *restrict zeta1
Definition: bl1_axmyv2.c:142
int i
Definition: bl1_axmyv2.c:145
chi1
Definition: bl1_axmyv2.c:366
dcomplex temp
Definition: bl1_axpyv2b.c:301
upsilon1
Definition: bl1_axpyv2bdotaxpy.c:225
double *restrict omega1
Definition: bl1_axpyv2bdotaxpy.c:200
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpyv.c:29
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition: bl1_dot.c:39
void bl1_csetv(int m, scomplex *sigma, scomplex *x, int incx)
Definition: bl1_setv.c:52
@ BLIS1_CONJUGATE
Definition: blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition: blis_type_defs.h:81
double real
Definition: blis_type_defs.h:139
double imag
Definition: blis_type_defs.h:139
Definition: blis_type_defs.h:133
float imag
Definition: blis_type_defs.h:134
float real
Definition: blis_type_defs.h:134

References bl1_caxpyv(), bl1_cdot(), bl1_csetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, chi1, FLA_ZERO, i, scomplex::imag, dcomplex::imag, omega1, scomplex::real, dcomplex::real, temp, upsilon1, and zeta1.

Referenced by FLA_Fused_Her2_Ax_l_opt_var1(), and FLA_Tridiag_UT_l_step_ofc_var2().

◆ FLA_Fused_Her2_Ax_l_opd_var1()

FLA_Error FLA_Fused_Her2_Ax_l_opd_var1 ( int  m_A,
double *  buff_alpha,
double *  buff_u,
int  inc_u,
double *  buff_z,
int  inc_z,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_x,
int  inc_x,
double *  buff_w,
int  inc_w 
)
253 {
254  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
255  int i;
256 
257  bl1_dsetv( m_A,
258  buff_0,
259  buff_w, inc_w );
260 
261  for ( i = 0; i < m_A; ++i )
262  {
263  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
264  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
265 
266  double* upsilon1 = buff_u + (i )*inc_u;
267  double* u2 = buff_u + (i+1)*inc_u;
268 
269  double* zeta1 = buff_z + (i )*inc_z;
270  double* z2 = buff_z + (i+1)*inc_z;
271 
272  double* chi1 = buff_x + (i )*inc_x;
273  double* x2 = buff_x + (i+1)*inc_x;
274 
275  double* omega1 = buff_w + (i )*inc_w;
276  double* w2 = buff_w + (i+1)*inc_w;
277 
278  // double* beta = buff_beta;
279 
280  double minus_conj_upsilon1;
281  double minus_conj_zeta1;
282  double temp;
283 
284  int m_ahead = m_A - i - 1;
285 
286  /*------------------------------------------------------------*/
287 
288  // bl1_dcopyconj( zeta1, &conj_zeta1 );
289  // bl1_dmult3( beta, &conj_zeta1, &minus_conj_zeta1 );
290  // bl1_dmult3( &minus_conj_zeta1, upsilon1, &temp );
291  // bl1_dadd3( &temp, alpha11, alpha11 );
292 
293  //bl1_dcopyconj( upsilon1, &conj_upsilon1 );
294  //bl1_dmult3( beta, &conj_upsilon1, &minus_conj_upsilon1 );
295  //bl1_dmult3( &minus_conj_upsilon1, zeta1, &temp );
296  //bl1_dadd3( &temp, alpha11, alpha11 );
297  minus_conj_zeta1 = - *zeta1;
298  minus_conj_upsilon1 = - *upsilon1;
299 
300  *alpha11 -= 2.0 * *zeta1 * *upsilon1;
301 
302  // bl1_dmult3( alpha11, chi1, &temp );
303  // bl1_dadd3( &temp, omega1, omega1 );
304  *omega1 += *alpha11 * *chi1;
305 
306  bl1_daxpyv2bdotaxpy( m_ahead,
307  &minus_conj_zeta1,
308  u2, inc_u,
309  &minus_conj_upsilon1,
310  z2, inc_z,
311  a21, rs_A,
312  x2, inc_x,
313  chi1,
314  &temp,
315  w2, inc_w );
316 
317  // bl1_dadd3( &temp, omega1, omega1 );
318  *omega1 += temp;
319 
320  /*------------------------------------------------------------*/
321 
322  }
323 
324  return FLA_SUCCESS;
325 }
void bl1_daxpyv2bdotaxpy(int n, double *beta, double *u, int inc_u, double *gamma, double *z, int inc_z, double *a, int inc_a, double *x, int inc_x, double *kappa, double *rho, double *w, int inc_w)
Definition: bl1_axpyv2bdotaxpy.c:36
void bl1_dsetv(int m, double *sigma, double *x, int incx)
Definition: bl1_setv.c:39

References bl1_daxpyv2bdotaxpy(), bl1_dsetv(), chi1, FLA_ZERO, i, omega1, temp, upsilon1, and zeta1.

Referenced by FLA_Fused_Her2_Ax_l_opt_var1(), and FLA_Tridiag_UT_l_step_ofd_var2().

◆ FLA_Fused_Her2_Ax_l_ops_var1()

FLA_Error FLA_Fused_Her2_Ax_l_ops_var1 ( int  m_A,
float *  buff_alpha,
float *  buff_u,
int  inc_u,
float *  buff_z,
int  inc_z,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_x,
int  inc_x,
float *  buff_w,
int  inc_w 
)
133 {
134  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
135  int i;
136 
137  bl1_ssetv( m_A,
138  buff_0,
139  buff_w, inc_w );
140 
141  for ( i = 0; i < m_A; ++i )
142  {
143  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
144  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
145 
146  float* upsilon1 = buff_u + (i )*inc_u;
147  float* u2 = buff_u + (i+1)*inc_u;
148 
149  float* zeta1 = buff_z + (i )*inc_z;
150  float* z2 = buff_z + (i+1)*inc_z;
151 
152  float* chi1 = buff_x + (i )*inc_x;
153  float* x2 = buff_x + (i+1)*inc_x;
154 
155  float* omega1 = buff_w + (i )*inc_w;
156  float* w2 = buff_w + (i+1)*inc_w;
157 
158  // float* beta = buff_beta;
159 
160  float minus_conj_upsilon1;
161  float minus_conj_zeta1;
162  float temp;
163 
164  int m_ahead = m_A - i - 1;
165 
166  /*------------------------------------------------------------*/
167 
168  // bl1_scopyconj( zeta1, &conj_zeta1 );
169  // bl1_smult3( beta, &conj_zeta1, &minus_conj_zeta1 );
170  // bl1_smult3( &minus_conj_zeta1, upsilon1, &temp );
171  // bl1_sadd3( &temp, alpha11, alpha11 );
172 
173  //bl1_scopyconj( upsilon1, &conj_upsilon1 );
174  //bl1_smult3( beta, &conj_upsilon1, &minus_conj_upsilon1 );
175  //bl1_smult3( &minus_conj_upsilon1, zeta1, &temp );
176  //bl1_sadd3( &temp, alpha11, alpha11 );
177  minus_conj_zeta1 = - *zeta1;
178  minus_conj_upsilon1 = - *upsilon1;
179 
180  *alpha11 -= 2.0F * *zeta1 * *upsilon1;
181 
183  m_ahead,
184  &minus_conj_zeta1,
185  u2, inc_u,
186  a21, rs_A );
187 /*
188  F77_saxpy( &m_ahead,
189  &minus_conj_zeta1,
190  u2, &inc_u,
191  a21, &rs_A );
192 */
193 
194 
196  m_ahead,
197  &minus_conj_upsilon1,
198  z2, inc_z,
199  a21, rs_A );
200 /*
201  F77_saxpy( &m_ahead,
202  &minus_conj_upsilon1,
203  z2, &inc_z,
204  a21, &rs_A );
205 */
206 
207  // bl1_smult3( alpha11, chi1, &temp );
208  // bl1_sadd3( &temp, omega1, omega1 );
209  *omega1 += *alpha11 * *chi1;
210 
212  m_ahead,
213  a21, rs_A,
214  x2, inc_x,
215  &temp );
216 /*
217  temp = F77_sdot( &m_ahead,
218  a21, &rs_A,
219  x2, &inc_x );
220 */
221 
222  // bl1_sadd3( &temp, omega1, omega1 );
223  *omega1 += temp;
224 
226  m_ahead,
227  chi1,
228  a21, rs_A,
229  w2, inc_w );
230 /*
231  F77_saxpy( &m_ahead,
232  chi1,
233  a21, &rs_A,
234  w2, &inc_w );
235 */
236 
237  /*------------------------------------------------------------*/
238 
239  }
240 
241  return FLA_SUCCESS;
242 }
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition: bl1_axpyv.c:13
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition: bl1_dot.c:13
void bl1_ssetv(int m, float *sigma, float *x, int incx)
Definition: bl1_setv.c:26

References bl1_saxpyv(), bl1_sdot(), bl1_ssetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, chi1, FLA_ZERO, i, omega1, temp, upsilon1, and zeta1.

Referenced by FLA_Fused_Her2_Ax_l_opt_var1(), and FLA_Tridiag_UT_l_step_ofs_var2().

◆ FLA_Fused_Her2_Ax_l_opt_var1()

FLA_Error FLA_Fused_Her2_Ax_l_opt_var1 ( FLA_Obj  alpha,
FLA_Obj  u,
FLA_Obj  z,
FLA_Obj  A,
FLA_Obj  x,
FLA_Obj  w 
)
14 {
15 /*
16  Effective computation:
17  A = A + beta * ( u * z' + z * u' );
18  w = A * x;
19 */
20  FLA_Datatype datatype;
21  int m_A;
22  int rs_A, cs_A;
23  int inc_u, inc_z, inc_x, inc_w;
24 
25  datatype = FLA_Obj_datatype( A );
26 
27  m_A = FLA_Obj_length( A );
28 
29  rs_A = FLA_Obj_row_stride( A );
30  cs_A = FLA_Obj_col_stride( A );
31 
32  inc_u = FLA_Obj_vector_inc( u );
33  inc_z = FLA_Obj_vector_inc( z );
34  inc_x = FLA_Obj_vector_inc( x );
35  inc_w = FLA_Obj_vector_inc( w );
36 
37 
38  switch ( datatype )
39  {
40  case FLA_FLOAT:
41  {
42  float* buff_A = FLA_FLOAT_PTR( A );
43  float* buff_u = FLA_FLOAT_PTR( u );
44  float* buff_z = FLA_FLOAT_PTR( z );
45  float* buff_x = FLA_FLOAT_PTR( x );
46  float* buff_w = FLA_FLOAT_PTR( w );
47  float* buff_beta = FLA_FLOAT_PTR( beta );
48 
50  buff_beta,
51  buff_u, inc_u,
52  buff_z, inc_z,
53  buff_A, rs_A, cs_A,
54  buff_x, inc_x,
55  buff_w, inc_w );
56 
57  break;
58  }
59 
60  case FLA_DOUBLE:
61  {
62  double* buff_A = FLA_DOUBLE_PTR( A );
63  double* buff_u = FLA_DOUBLE_PTR( u );
64  double* buff_z = FLA_DOUBLE_PTR( z );
65  double* buff_x = FLA_DOUBLE_PTR( x );
66  double* buff_w = FLA_DOUBLE_PTR( w );
67  double* buff_beta = FLA_DOUBLE_PTR( beta );
68 
70  buff_beta,
71  buff_u, inc_u,
72  buff_z, inc_z,
73  buff_A, rs_A, cs_A,
74  buff_x, inc_x,
75  buff_w, inc_w );
76 
77  break;
78  }
79 
80  case FLA_COMPLEX:
81  {
82  scomplex* buff_A = FLA_COMPLEX_PTR( A );
83  scomplex* buff_u = FLA_COMPLEX_PTR( u );
84  scomplex* buff_z = FLA_COMPLEX_PTR( z );
85  scomplex* buff_x = FLA_COMPLEX_PTR( x );
86  scomplex* buff_w = FLA_COMPLEX_PTR( w );
87  scomplex* buff_beta = FLA_COMPLEX_PTR( beta );
88 
90  buff_beta,
91  buff_u, inc_u,
92  buff_z, inc_z,
93  buff_A, rs_A, cs_A,
94  buff_x, inc_x,
95  buff_w, inc_w );
96 
97  break;
98  }
99 
100  case FLA_DOUBLE_COMPLEX:
101  {
102  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
103  dcomplex* buff_u = FLA_DOUBLE_COMPLEX_PTR( u );
104  dcomplex* buff_z = FLA_DOUBLE_COMPLEX_PTR( z );
105  dcomplex* buff_x = FLA_DOUBLE_COMPLEX_PTR( x );
106  dcomplex* buff_w = FLA_DOUBLE_COMPLEX_PTR( w );
107  dcomplex* buff_beta = FLA_DOUBLE_COMPLEX_PTR( beta );
108 
110  buff_beta,
111  buff_u, inc_u,
112  buff_z, inc_z,
113  buff_A, rs_A, cs_A,
114  buff_x, inc_x,
115  buff_w, inc_w );
116 
117  break;
118  }
119  }
120 
121  return FLA_SUCCESS;
122 }
FLA_Error FLA_Fused_Her2_Ax_l_opc_var1(int m_A, scomplex *buff_beta, scomplex *buff_u, int inc_u, scomplex *buff_z, int inc_z, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_x, int inc_x, scomplex *buff_w, int inc_w)
Definition: FLA_Fused_Her2_Ax_l_opt_var1.c:329
FLA_Error FLA_Fused_Her2_Ax_l_opd_var1(int m_A, double *buff_beta, double *buff_u, int inc_u, double *buff_z, int inc_z, double *buff_A, int rs_A, int cs_A, double *buff_x, int inc_x, double *buff_w, int inc_w)
Definition: FLA_Fused_Her2_Ax_l_opt_var1.c:246
FLA_Error FLA_Fused_Her2_Ax_l_opz_var1(int m_A, dcomplex *buff_beta, dcomplex *buff_u, int inc_u, dcomplex *buff_z, int inc_z, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_x, int inc_x, dcomplex *buff_w, int inc_w)
Definition: FLA_Fused_Her2_Ax_l_opt_var1.c:450
FLA_Error FLA_Fused_Her2_Ax_l_ops_var1(int m_A, float *buff_beta, float *buff_u, int inc_u, float *buff_z, int inc_z, float *buff_A, int rs_A, int cs_A, float *buff_x, int inc_x, float *buff_w, int inc_w)
Definition: FLA_Fused_Her2_Ax_l_opt_var1.c:126
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition: FLA_Query.c:145
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
int FLA_Datatype
Definition: FLA_type_defs.h:49
Definition: blis_type_defs.h:138

References FLA_Fused_Her2_Ax_l_opc_var1(), FLA_Fused_Her2_Ax_l_opd_var1(), FLA_Fused_Her2_Ax_l_ops_var1(), FLA_Fused_Her2_Ax_l_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_vector_inc().

◆ FLA_Fused_Her2_Ax_l_opz_var1()

FLA_Error FLA_Fused_Her2_Ax_l_opz_var1 ( int  m_A,
dcomplex buff_alpha,
dcomplex buff_u,
int  inc_u,
dcomplex buff_z,
int  inc_z,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_x,
int  inc_x,
dcomplex buff_w,
int  inc_w 
)
457 {
458  dcomplex zero = bl1_z0();
459  int i;
460 
461  bl1_zsetv( m_A,
462  &zero,
463  buff_w, inc_w );
464 
465  for ( i = 0; i < m_A; ++i )
466  {
467  dcomplex* restrict alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
468  dcomplex* restrict a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
469 
470  dcomplex* restrict upsilon1 = buff_u + (i )*inc_u;
471  dcomplex* restrict u2 = buff_u + (i+1)*inc_u;
472 
473  dcomplex* restrict zeta1 = buff_z + (i )*inc_z;
474  dcomplex* restrict z2 = buff_z + (i+1)*inc_z;
475 
476  dcomplex* restrict chi1 = buff_x + (i )*inc_x;
477  dcomplex* restrict x2 = buff_x + (i+1)*inc_x;
478 
479  dcomplex* restrict omega1 = buff_w + (i )*inc_w;
480  dcomplex* restrict w2 = buff_w + (i+1)*inc_w;
481 
482  //dcomplex* restrict beta = buff_beta;
483 
484  dcomplex minus_conj_upsilon1;
485  dcomplex minus_conj_zeta1;
486  dcomplex temp;
487 
488  dcomplex ze1;
489  dcomplex up1;
490  dcomplex a11;
491  dcomplex om1;
492  dcomplex ch1;
493 
494  int m_ahead = m_A - i - 1;
495 
496  /*------------------------------------------------------------*/
497 
498  // bl1_zcopyconj( zeta1, &conj_zeta1 );
499  // bl1_zmult3( beta, &conj_zeta1, &minus_conj_zeta1 );
500  // bl1_zmult3( &minus_conj_zeta1, upsilon1, &temp );
501  // bl1_zadd3( &temp, alpha11, alpha11 );
502 
503  //bl1_zcopyconj( upsilon1, &conj_upsilon1 );
504  //bl1_zmult3( beta, &conj_upsilon1, &minus_conj_upsilon1 );
505  //bl1_zmult3( &minus_conj_upsilon1, zeta1, &temp );
506  //bl1_zadd3( &temp, alpha11, alpha11 );
507  minus_conj_zeta1.real = - zeta1->real;
508  minus_conj_zeta1.imag = - -zeta1->imag;
509  minus_conj_upsilon1.real = - upsilon1->real;
510  minus_conj_upsilon1.imag = - -upsilon1->imag;
511 
512  ze1 = *zeta1;
513  up1 = *upsilon1;
514  a11 = *alpha11;
515  om1 = *omega1;
516  ch1 = *chi1;
517 
518  //alpha11->real -= zeta1->real * upsilon1->real - -zeta1->imag * upsilon1->imag +
519  // zeta1->real * upsilon1->real - zeta1->imag * -upsilon1->imag;
520  //alpha11->imag -= -zeta1->imag * upsilon1->real + zeta1->real * upsilon1->imag +
521  // zeta1->imag * upsilon1->real + zeta1->real * -upsilon1->imag;
522  a11.real -= ze1.real * up1.real - -ze1.imag * up1.imag +
523  up1.real * ze1.real - -up1.imag * ze1.imag;
524  a11.imag -= ze1.real * up1.imag + -ze1.imag * up1.real +
525  up1.real * ze1.imag + -up1.imag * ze1.real;
526 
527  // bl1_zmult3( alpha11, chi1, &temp );
528  // bl1_zadd3( &temp, omega1, omega1 );
529  //omega1->real += alpha11->real * chi1->real - alpha11->imag * chi1->imag;
530  //omega1->imag += alpha11->imag * chi1->real + alpha11->real * chi1->imag;
531  om1.real += a11.real * ch1.real - a11.imag * ch1.imag;
532  om1.imag += a11.imag * ch1.real + a11.real * ch1.imag;
533 
534  *alpha11 = a11;
535  *omega1 = om1;
536 
537 /*
538  bl1_zaxpyv2bdotaxpy( m_ahead,
539  &minus_conj_zeta1,
540  u2, inc_u,
541  &minus_conj_upsilon1,
542  z2, inc_z,
543  a21, rs_A,
544  x2, inc_x,
545  chi1,
546  &temp,
547  w2, inc_w );
548 */
549 
550  bl1_zaxpyv2b( m_ahead,
551  &minus_conj_zeta1,
552  &minus_conj_upsilon1,
553  u2, inc_u,
554  z2, inc_z,
555  a21, rs_A );
556 
557  bl1_zdotaxpy( m_ahead,
558  a21, rs_A,
559  x2, inc_x,
560  chi1,
561  &temp,
562  w2, inc_w );
563 
564 
565  // bl1_zadd3( &temp, omega1, omega1 );
566  omega1->real += temp.real;
567  omega1->imag += temp.imag;
568 
569  /*------------------------------------------------------------*/
570 
571  }
572 
573  return FLA_SUCCESS;
574 }
void bl1_zaxpyv2b(int n, dcomplex *alpha1, dcomplex *alpha2, dcomplex *x1, int inc_x1, dcomplex *x2, int inc_x2, dcomplex *y, int inc_y)
Definition: bl1_axpyv2b.c:210
void bl1_zdotaxpy(int n, dcomplex *a, int inc_a, dcomplex *x, int inc_x, dcomplex *kappa, dcomplex *rho, dcomplex *w, int inc_w)
Definition: bl1_dotaxpy.c:258
dcomplex bl1_z0(void)
Definition: bl1_constants.c:133
void bl1_zsetv(int m, dcomplex *sigma, dcomplex *x, int incx)
Definition: bl1_setv.c:66

References bl1_z0(), bl1_zaxpyv2b(), bl1_zdotaxpy(), bl1_zsetv(), chi1, i, dcomplex::imag, omega1, dcomplex::real, temp, upsilon1, and zeta1.

Referenced by FLA_Fused_Her2_Ax_l_opt_var1(), and FLA_Tridiag_UT_l_step_ofz_var2().

◆ FLA_Fused_UZhu_ZUhu_opc_var1()

FLA_Error FLA_Fused_UZhu_ZUhu_opc_var1 ( int  m_U,
int  n_U,
scomplex buff_delta,
scomplex buff_U,
int  rs_U,
int  cs_U,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_t,
int  inc_t,
scomplex buff_u,
int  inc_u,
scomplex buff_w,
int  inc_w 
)
419 {
420  int i;
421 
422  for ( i = 0; i < n_U; ++i )
423  {
424  scomplex* u1 = buff_U + (i )*cs_U + (0 )*rs_U;
425  scomplex* z1 = buff_Z + (i )*cs_Z + (0 )*rs_Z;
426  scomplex* delta = buff_delta;
427  scomplex* tau1 = buff_t + (i )*inc_t;
428  scomplex* u = buff_u;
429  scomplex* w = buff_w;
430  scomplex alpha;
431  scomplex beta;
432 
433  /*------------------------------------------------------------*/
434 
436  m_U,
437  z1, rs_Z,
438  u, inc_u,
439  &alpha );
440 
442  m_U,
443  u1, rs_U,
444  u, inc_u,
445  &beta );
446 
447  *tau1 = beta;
448 
449  bl1_cscals( delta, &alpha );
450  bl1_cscals( delta, &beta );
451 
453  m_U,
454  &alpha,
455  u1, rs_U,
456  w, inc_w );
457 /*
458  F77_caxpy( &m_U,
459  &alpha,
460  u1, &rs_U,
461  w, &inc_w );
462 */
463 
465  m_U,
466  &beta,
467  z1, rs_U,
468  w, inc_w );
469 /*
470  F77_caxpy( &m_U,
471  &beta,
472  z1, &rs_Z,
473  w, &inc_w );
474 */
475 
476  /*------------------------------------------------------------*/
477 
478  }
479 
480  return FLA_SUCCESS;
481 }
double *restrict z1
Definition: bl1_dotsv2.c:148

References bl1_caxpyv(), bl1_cdot(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, i, and z1.

Referenced by FLA_Fused_UZhu_ZUhu_opt_var1(), and FLA_Tridiag_UT_l_step_ofc_var3().

◆ FLA_Fused_UZhu_ZUhu_opd_var1()

FLA_Error FLA_Fused_UZhu_ZUhu_opd_var1 ( int  m_U,
int  n_U,
double *  buff_delta,
double *  buff_U,
int  rs_U,
int  cs_U,
double *  buff_Z,
int  rs_Z,
int  cs_Z,
double *  buff_t,
int  inc_t,
double *  buff_u,
int  inc_u,
double *  buff_w,
int  inc_w 
)
230 {
231  double zero = bl1_d0();
232 
233  int n_run = n_U / 2;
234  int n_left = n_U % 2;
235  int step_u = 2*cs_U;
236  int step_z = 2*cs_Z;
237  int step_tau = 2*inc_t;
238  int i;
239 
240  double* u = buff_u;
241  double* w = buff_w;
242  //double* delta = buff_delta;
243 
244  double* u1;
245  double* u2;
246  double* u3;
247  double* z1;
248  double* z2;
249  double* z3;
250  double* tau1;
251  double* tau2;
252  double* tau3;
253 
254  u1 = buff_U;
255  u2 = buff_U + cs_U;
256  u3 = buff_U + 2*cs_U;
257  z1 = buff_Z;
258  z2 = buff_Z + cs_Z;
259  z3 = buff_Z + 2*cs_Z;
260  tau1 = buff_t;
261  tau2 = buff_t + inc_t;
262  tau3 = buff_t + 2*inc_t;
263 
264  for ( i = 0; i < n_run; ++i )
265  {
266  double rho_z1u;
267  double rho_z2u;
268  //double rho_z3u;
269  double rho_u1u;
270  double rho_u2u;
271  //double rho_u3u;
272 
273  /*------------------------------------------------------------*/
274 /*
275  bl1_ddotsv3( BLIS1_CONJUGATE,
276  m_U,
277  z1, rs_Z,
278  z2, rs_Z,
279  z3, rs_Z,
280  u, inc_u,
281  &zero,
282  &rho_z1u,
283  &rho_z2u,
284  &rho_z3u );
285  bl1_dneg1( &rho_z1u );
286  bl1_dneg1( &rho_z2u );
287  bl1_dneg1( &rho_z3u );
288 
289  bl1_ddotv2axpyv2b( m_U,
290  u1, rs_U,
291  u2, rs_U,
292  u, inc_u,
293  &rho_z1u,
294  &rho_z2u,
295  &rho_u1u,
296  &rho_u2u,
297  w, inc_w );
298  bl1_ddotaxpy( m_U,
299  u3, rs_U,
300  u, inc_u,
301  &rho_z3u,
302  &rho_u3u,
303  w, inc_w );
304 
305  *tau1 = rho_u1u;
306  *tau2 = rho_u2u;
307  *tau3 = rho_u3u;
308 
309  bl1_dneg1( &rho_u1u );
310  bl1_dneg1( &rho_u2u );
311  bl1_dneg1( &rho_u3u );
312 
313  bl1_daxpyv3b( m_U,
314  &rho_u1u,
315  &rho_u2u,
316  &rho_u3u,
317  z1, rs_Z,
318  z2, rs_Z,
319  z3, rs_Z,
320  w, inc_w );
321 */
323  m_U,
324  z1, rs_Z,
325  z2, rs_Z,
326  u, inc_u,
327  &zero,
328  &rho_z1u,
329  &rho_z2u );
330  bl1_dneg1( &rho_z1u );
331  bl1_dneg1( &rho_z2u );
332 
333  bl1_ddotv2axpyv2b( m_U,
334  u1, rs_U,
335  u2, rs_U,
336  u, inc_u,
337  &rho_z1u,
338  &rho_z2u,
339  &rho_u1u,
340  &rho_u2u,
341  w, inc_w );
342 
343  *tau1 = rho_u1u;
344  *tau2 = rho_u2u;
345 
346  bl1_dneg1( &rho_u1u );
347  bl1_dneg1( &rho_u2u );
348 
349  bl1_daxpyv2b( m_U,
350  &rho_u1u,
351  &rho_u2u,
352  z1, rs_Z,
353  z2, rs_Z,
354  w, inc_w );
355 
356 
357  /*------------------------------------------------------------*/
358 
359  u1 += step_u;
360  u2 += step_u;
361  u3 += step_u;
362  z1 += step_z;
363  z2 += step_z;
364  z3 += step_z;
365  tau1 += step_tau;
366  tau2 += step_tau;
367  tau3 += step_tau;
368  }
369 
370  if ( n_left > 0 )
371  {
372  for ( i = 0; i < n_left; ++i )
373  {
374  double rho_z1u;
375  double rho_u1u;
376 
378  m_U,
379  z1, rs_Z,
380  u, inc_u,
381  &rho_z1u );
382  bl1_dneg1( &rho_z1u );
383 
384  bl1_ddotaxpy( m_U,
385  u1, rs_U,
386  u, inc_u,
387  &rho_z1u,
388  &rho_u1u,
389  w, inc_w );
390 
391  *tau1 = rho_u1u;
392 
393  bl1_dneg1( &rho_u1u );
395  m_U,
396  &rho_u1u,
397  z1, rs_Z,
398  w, inc_w );
399 
400  u1 += cs_U;
401  z1 += cs_Z;
402  tau1 += inc_t;
403  }
404  }
405 
406  return FLA_SUCCESS;
407 }
int n_left
Definition: bl1_axmyv2.c:149
int n_run
Definition: bl1_axmyv2.c:148
void bl1_daxpyv2b(int n, double *alpha1, double *alpha2, double *x1, int inc_x1, double *x2, int inc_x2, double *y, int inc_y)
Definition: bl1_axpyv2b.c:31
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition: bl1_axpyv.c:21
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition: bl1_dot.c:26
void bl1_ddotaxpy(int n, double *a, int inc_a, double *x, int inc_x, double *kappa, double *rho, double *w, int inc_w)
Definition: bl1_dotaxpy.c:31
void bl1_ddotsv2(conj1_t conjxy, int n, double *x, int inc_x, double *y, int inc_y, double *z, int inc_z, double *beta, double *rho_xz, double *rho_yz)
Definition: bl1_dotsv2.c:35
void bl1_ddotv2axpyv2b(int n, double *a1, int inc_a1, double *a2, int inc_a2, double *x, int inc_x, double *kappa1, double *kappa2, double *rho1, double *rho2, double *w, int inc_w)
Definition: bl1_dotv2axpyv2b.c:36
double bl1_d0(void)
Definition: bl1_constants.c:118

References bl1_d0(), bl1_daxpyv(), bl1_daxpyv2b(), bl1_ddot(), bl1_ddotaxpy(), bl1_ddotsv2(), bl1_ddotv2axpyv2b(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, i, n_left, n_run, and z1.

Referenced by FLA_Fused_UZhu_ZUhu_opt_var1(), and FLA_Tridiag_UT_l_step_ofd_var3().

◆ FLA_Fused_UZhu_ZUhu_ops_var1()

FLA_Error FLA_Fused_UZhu_ZUhu_ops_var1 ( int  m_U,
int  n_U,
float *  buff_delta,
float *  buff_U,
int  rs_U,
int  cs_U,
float *  buff_Z,
int  rs_Z,
int  cs_Z,
float *  buff_t,
int  inc_t,
float *  buff_u,
int  inc_u,
float *  buff_w,
int  inc_w 
)
144 {
145  int i;
146 
147  for ( i = 0; i < n_U; ++i )
148  {
149  float* u1 = buff_U + (i )*cs_U + (0 )*rs_U;
150  float* z1 = buff_Z + (i )*cs_Z + (0 )*rs_Z;
151  float* delta = buff_delta;
152  float* tau1 = buff_t + (i )*inc_t;
153  float* u = buff_u;
154  float* w = buff_w;
155  float alpha;
156  float beta;
157 
158  /*------------------------------------------------------------*/
159 
161  m_U,
162  z1, rs_Z,
163  u, inc_u,
164  &alpha );
165 /*
166  alpha = F77_sdot( &m_U,
167  z1, &rs_Z,
168  u, &inc_u );
169 */
170 
172  m_U,
173  u1, rs_U,
174  u, inc_u,
175  &beta );
176 /*
177  beta = F77_sdot( &m_U,
178  u1, &rs_U,
179  u, &inc_u );
180 */
181 
182  *tau1 = beta;
183 
184  // bl1_sscals( delta, &alpha );
185  // bl1_sscals( delta, &beta );
186  alpha *= *delta;
187  beta *= *delta;
188 
190  m_U,
191  &alpha,
192  u1, rs_U,
193  w, inc_w );
194 /*
195  F77_saxpy( &m_U,
196  &alpha,
197  u1, &rs_U,
198  w, &inc_w );
199 */
200 
202  m_U,
203  &beta,
204  z1, rs_U,
205  w, inc_w );
206 /*
207  F77_saxpy( &m_U,
208  &beta,
209  z1, &rs_Z,
210  w, &inc_w );
211 */
212 
213  /*------------------------------------------------------------*/
214 
215  }
216 
217  return FLA_SUCCESS;
218 }

References bl1_saxpyv(), bl1_sdot(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, i, and z1.

Referenced by FLA_Fused_UZhu_ZUhu_opt_var1(), and FLA_Tridiag_UT_l_step_ofs_var3().

◆ FLA_Fused_UZhu_ZUhu_opt_var1()

FLA_Error FLA_Fused_UZhu_ZUhu_opt_var1 ( FLA_Obj  delta,
FLA_Obj  U,
FLA_Obj  Z,
FLA_Obj  t,
FLA_Obj  u,
FLA_Obj  w 
)
14 {
15 /*
16  Effective computation:
17  w = w + delta * ( U ( Z' u ) + Z ( U' u ) );
18  t = U' u;
19 */
20  FLA_Datatype datatype;
21  int m_U, n_U;
22  int rs_U, cs_U;
23  int rs_Z, cs_Z;
24  int inc_u, inc_w, inc_t;
25 
26  datatype = FLA_Obj_datatype( U );
27 
28  m_U = FLA_Obj_length( U );
29  n_U = FLA_Obj_width( U );
30 
31  rs_U = FLA_Obj_row_stride( U );
32  cs_U = FLA_Obj_col_stride( U );
33 
34  rs_Z = FLA_Obj_row_stride( Z );
35  cs_Z = FLA_Obj_col_stride( Z );
36 
37  inc_u = FLA_Obj_vector_inc( u );
38 
39  inc_w = FLA_Obj_vector_inc( w );
40 
41  inc_t = FLA_Obj_vector_inc( t );
42 
43 
44  switch ( datatype )
45  {
46  case FLA_FLOAT:
47  {
48  float* buff_U = FLA_FLOAT_PTR( U );
49  float* buff_Z = FLA_FLOAT_PTR( Z );
50  float* buff_t = FLA_FLOAT_PTR( t );
51  float* buff_u = FLA_FLOAT_PTR( u );
52  float* buff_w = FLA_FLOAT_PTR( w );
53  float* buff_delta = FLA_FLOAT_PTR( delta );
54 
56  n_U,
57  buff_delta,
58  buff_U, rs_U, cs_U,
59  buff_Z, rs_Z, cs_Z,
60  buff_t, inc_t,
61  buff_u, inc_u,
62  buff_w, inc_w );
63 
64  break;
65  }
66 
67  case FLA_DOUBLE:
68  {
69  double* buff_U = FLA_DOUBLE_PTR( U );
70  double* buff_Z = FLA_DOUBLE_PTR( Z );
71  double* buff_t = FLA_DOUBLE_PTR( t );
72  double* buff_u = FLA_DOUBLE_PTR( u );
73  double* buff_w = FLA_DOUBLE_PTR( w );
74  double* buff_delta = FLA_DOUBLE_PTR( delta );
75 
77  n_U,
78  buff_delta,
79  buff_U, rs_U, cs_U,
80  buff_Z, rs_Z, cs_Z,
81  buff_t, inc_t,
82  buff_u, inc_u,
83  buff_w, inc_w );
84 
85  break;
86  }
87 
88  case FLA_COMPLEX:
89  {
90  scomplex* buff_U = FLA_COMPLEX_PTR( U );
91  scomplex* buff_Z = FLA_COMPLEX_PTR( Z );
92  scomplex* buff_t = FLA_COMPLEX_PTR( t );
93  scomplex* buff_u = FLA_COMPLEX_PTR( u );
94  scomplex* buff_w = FLA_COMPLEX_PTR( w );
95  scomplex* buff_delta = FLA_COMPLEX_PTR( delta );
96 
98  n_U,
99  buff_delta,
100  buff_U, rs_U, cs_U,
101  buff_Z, rs_Z, cs_Z,
102  buff_u, inc_u,
103  buff_t, inc_t,
104  buff_w, inc_w );
105 
106  break;
107  }
108 
109  case FLA_DOUBLE_COMPLEX:
110  {
111  dcomplex* buff_U = FLA_DOUBLE_COMPLEX_PTR( U );
112  dcomplex* buff_Z = FLA_DOUBLE_COMPLEX_PTR( Z );
113  dcomplex* buff_t = FLA_DOUBLE_COMPLEX_PTR( t );
114  dcomplex* buff_u = FLA_DOUBLE_COMPLEX_PTR( u );
115  dcomplex* buff_w = FLA_DOUBLE_COMPLEX_PTR( w );
116  dcomplex* buff_delta = FLA_DOUBLE_COMPLEX_PTR( delta );
117 
119  n_U,
120  buff_delta,
121  buff_U, rs_U, cs_U,
122  buff_Z, rs_Z, cs_Z,
123  buff_t, inc_t,
124  buff_u, inc_u,
125  buff_w, inc_w );
126 
127  break;
128  }
129  }
130 
131  return FLA_SUCCESS;
132 }
FLA_Error FLA_Fused_UZhu_ZUhu_opd_var1(int m_U, int n_U, double *buff_delta, double *buff_U, int rs_U, int cs_U, double *buff_Z, int rs_Z, int cs_Z, double *buff_t, int inc_t, double *buff_u, int inc_u, double *buff_w, int inc_w)
Definition: FLA_Fused_UZhu_ZUhu_opt_var1.c:222
FLA_Error FLA_Fused_UZhu_ZUhu_opc_var1(int m_U, int n_U, scomplex *buff_delta, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_t, int inc_t, scomplex *buff_u, int inc_u, scomplex *buff_w, int inc_w)
Definition: FLA_Fused_UZhu_ZUhu_opt_var1.c:411
FLA_Error FLA_Fused_UZhu_ZUhu_ops_var1(int m_U, int n_U, float *buff_delta, float *buff_U, int rs_U, int cs_U, float *buff_Z, int rs_Z, int cs_Z, float *buff_t, int inc_t, float *buff_u, int inc_u, float *buff_w, int inc_w)
Definition: FLA_Fused_UZhu_ZUhu_opt_var1.c:136
FLA_Error FLA_Fused_UZhu_ZUhu_opz_var1(int m_U, int n_U, dcomplex *buff_delta, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_t, int inc_t, dcomplex *buff_u, int inc_u, dcomplex *buff_w, int inc_w)
Definition: FLA_Fused_UZhu_ZUhu_opt_var1.c:485
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123

References FLA_Fused_UZhu_ZUhu_opc_var1(), FLA_Fused_UZhu_ZUhu_opd_var1(), FLA_Fused_UZhu_ZUhu_ops_var1(), FLA_Fused_UZhu_ZUhu_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_Obj_width().

◆ FLA_Fused_UZhu_ZUhu_opz_var1()

FLA_Error FLA_Fused_UZhu_ZUhu_opz_var1 ( int  m_U,
int  n_U,
dcomplex buff_delta,
dcomplex buff_U,
int  rs_U,
int  cs_U,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_t,
int  inc_t,
dcomplex buff_u,
int  inc_u,
dcomplex buff_w,
int  inc_w 
)
493 {
494  //dcomplex zero = bl1_z0();
495 
496  int n_run = n_U / 1;
497  int n_left = n_U % 1;
498  int step_u = 1*cs_U;
499  int step_z = 1*cs_Z;
500  int step_tau = 1*inc_t;
501  int i;
502 
503  dcomplex* u = buff_u;
504  dcomplex* w = buff_w;
505  //dcomplex* delta = buff_delta;
506 
507  dcomplex* u1;
508  dcomplex* u2;
509  dcomplex* z1;
510  dcomplex* z2;
511  dcomplex* tau1;
512  dcomplex* tau2;
513 
514  u1 = buff_U;
515  u2 = buff_U + cs_U;
516  z1 = buff_Z;
517  z2 = buff_Z + cs_Z;
518  tau1 = buff_t;
519  tau2 = buff_t + inc_t;
520 
521  for ( i = 0; i < n_run; ++i )
522  {
523  dcomplex rho_z1u;
524  //dcomplex rho_z2u;
525  dcomplex rho_u1u;
526  //dcomplex rho_u2u;
527 
528  /*------------------------------------------------------------*/
529 
530 /*
531  Effective computation:
532  w = w + delta * ( U ( Z' u ) + Z ( U' u ) );
533 */
534 
535 /*
536  bl1_zdotsv2( BLIS1_CONJUGATE,
537  m_U,
538  z1, rs_Z,
539  u1, rs_U,
540  u, inc_u,
541  &zero,
542  &rho_z1u,
543  &rho_u1u );
544 
545  *tau1 = rho_u1u;
546 
547  //bl1_zscals( delta, &rho_z1u );
548  //bl1_zscals( delta, &rho_u1u );
549  bl1_zneg1( &rho_z1u );
550  bl1_zneg1( &rho_u1u );
551 
552  bl1_zaxpyv2b( m_U,
553  &rho_z1u,
554  &rho_u1u,
555  u1, rs_U,
556  z1, rs_Z,
557  w, inc_w );
558 */
559 /*
560  bl1_zdotsv2( BLIS1_CONJUGATE,
561  m_U,
562  z1, rs_Z,
563  z2, rs_Z,
564  u, inc_u,
565  &zero,
566  &rho_z1u,
567  &rho_z2u );
568  bl1_zneg1( &rho_z1u );
569  bl1_zneg1( &rho_z2u );
570 
571  bl1_zdotv2axpyv2b( m_U,
572  u1, rs_U,
573  u2, rs_U,
574  u, inc_u,
575  &rho_z1u,
576  &rho_z2u,
577  &rho_u1u,
578  &rho_u2u,
579  w, inc_w );
580 
581  *tau1 = rho_u1u;
582  *tau2 = rho_u2u;
583 
584  bl1_zneg1( &rho_u1u );
585  bl1_zneg1( &rho_u2u );
586 
587  bl1_zaxpyv2b( m_U,
588  &rho_u1u,
589  &rho_u2u,
590  z1, rs_Z,
591  z2, rs_Z,
592  w, inc_w );
593 */
595  m_U,
596  z1, rs_Z,
597  u, inc_u,
598  &rho_z1u );
599  bl1_zneg1( &rho_z1u );
600 
601  bl1_zdotaxpy( m_U,
602  u1, rs_U,
603  u, inc_u,
604  &rho_z1u,
605  &rho_u1u,
606  w, inc_w );
607 
608  *tau1 = rho_u1u;
609 
610  bl1_zneg1( &rho_u1u );
611 
613  m_U,
614  &rho_u1u,
615  z1, rs_Z,
616  w, inc_w );
617 
618  /*------------------------------------------------------------*/
619 
620  u1 += step_u;
621  u2 += step_u;
622  z1 += step_z;
623  z2 += step_z;
624  tau1 += step_tau;
625  tau2 += step_tau;
626  }
627 
628  if ( n_left == 1 )
629  {
630  dcomplex rho_z1u;
631  dcomplex rho_u1u;
632 
634  m_U,
635  z1, rs_Z,
636  u, inc_u,
637  &rho_z1u );
638  bl1_zneg1( &rho_z1u );
639 
640  bl1_zdotaxpy( m_U,
641  u1, rs_U,
642  u, inc_u,
643  &rho_z1u,
644  &rho_u1u,
645  w, inc_w );
646 
647  *tau1 = rho_u1u;
648 
649  bl1_zneg1( &rho_u1u );
651  m_U,
652  &rho_u1u,
653  z1, rs_Z,
654  w, inc_w );
655  }
656 
657  return FLA_SUCCESS;
658 }
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_axpyv.c:60
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition: bl1_dot.c:65

References bl1_zaxpyv(), bl1_zdot(), bl1_zdotaxpy(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, i, n_left, n_run, and z1.

Referenced by FLA_Fused_UZhu_ZUhu_opt_var1(), and FLA_Tridiag_UT_l_step_ofz_var3().

◆ FLA_Tridiag_UT_l_blf_var2()

FLA_Error FLA_Tridiag_UT_l_blf_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  FLA_Obj ATL, ATR, A00, A01, A02,
16  ABL, ABR, A10, A11, A12,
17  A20, A21, A22;
18  FLA_Obj TL, TR, T0, T1, T2;
19 
20  FLA_Obj T1_tl;
21  FLA_Obj none, none2, none3;
22  dim_t b_alg, b;
23 
24  b_alg = FLA_Obj_length( T );
25 
26  FLA_Part_2x2( A, &ATL, &ATR,
27  &ABL, &ABR, 0, 0, FLA_TL );
28  FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
29 
30  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) )
31  {
32  b = min( FLA_Obj_length( ABR ), b_alg );
33 
34  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
35  /* ************* */ /* ******************** */
36  &A10, /**/ &A11, &A12,
37  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
38  b, b, FLA_BR );
39  FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &T2,
40  b, FLA_RIGHT );
41 
42  /*------------------------------------------------------------*/
43 
44  FLA_Part_2x2( T1, &T1_tl, &none,
45  &none2, &none3, b, b, FLA_TL );
46 
47  // [ ABR, T1 ] = FLA_Tridiag_UT_l_step_unb_var2( ABR, T1, b );
48  //FLA_Tridiag_UT_l_step_unb_var2( ABR, T1_tl );
49  FLA_Tridiag_UT_l_step_ofu_var2( ABR, T1_tl );
50  //FLA_Tridiag_UT_l_step_opt_var2( ABR, T1_tl );
51 
52  /*------------------------------------------------------------*/
53 
54  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
55  A10, A11, /**/ A12,
56  /* ************** */ /* ****************** */
57  &ABL, /**/ &ABR, A20, A21, /**/ A22,
58  FLA_TL );
59  FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ T2,
60  FLA_LEFT );
61  }
62 
63  return FLA_SUCCESS;
64 }
FLA_Error FLA_Tridiag_UT_l_step_ofu_var2(FLA_Obj A, FLA_Obj T)
Definition: FLA_Tridiag_UT_l_fus_var2.c:18
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition: FLA_View.c:304
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:17
FLA_Error FLA_Cont_with_1x3_to_1x2(FLA_Obj *AL, FLA_Obj *AR, FLA_Obj A0, FLA_Obj A1, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:475
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:110
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:142
FLA_Error FLA_Repart_1x2_to_1x3(FLA_Obj AL, FLA_Obj AR, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:267
unsigned long dim_t
Definition: FLA_type_defs.h:71
Definition: FLA_type_defs.h:159

References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Obj_length(), FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x2_to_3x3(), and FLA_Tridiag_UT_l_step_ofu_var2().

Referenced by FLA_Tridiag_UT_l().

◆ FLA_Tridiag_UT_l_blf_var3()

FLA_Error FLA_Tridiag_UT_l_blf_var3 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  FLA_Obj ATL, ATR, A00, A01, A02,
16  ABL, ABR, A10, A11, A12,
17  A20, A21, A22;
18  FLA_Obj UT, U0,
19  UB, U1,
20  U2;
21  FLA_Obj ZT, Z0,
22  ZB, Z1,
23  Z2;
24  FLA_Obj TL, TR, T0, T1, T2;
25 
26  FLA_Obj U, Z;
27  FLA_Obj ABR_l;
28  FLA_Obj UB_l, U2_l;
29  FLA_Obj ZB_l, Z2_l;
30  FLA_Obj T1_tl;
31  FLA_Obj none, none2, none3;
32  FLA_Obj UB_tl,
33  UB_bl;
34  FLA_Datatype datatype_A;
35  dim_t m_A;
36  dim_t b_alg, b, bb;
37 
38  b_alg = FLA_Obj_length( T );
39 
40  datatype_A = FLA_Obj_datatype( A );
41  m_A = FLA_Obj_length( A );
42 
43  FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &U );
44  FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &Z );
45 
46  FLA_Part_2x2( A, &ATL, &ATR,
47  &ABL, &ABR, 0, 0, FLA_TL );
48  FLA_Part_2x1( U, &UT,
49  &UB, 0, FLA_TOP );
50  FLA_Part_2x1( Z, &ZT,
51  &ZB, 0, FLA_TOP );
52  FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
53 
54  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) )
55  {
56  b = min( FLA_Obj_length( ABR ), b_alg );
57 
58  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
59  /* ************* */ /* ******************** */
60  &A10, /**/ &A11, &A12,
61  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
62  b, b, FLA_BR );
63  FLA_Repart_2x1_to_3x1( UT, &U0,
64  /* ** */ /* ** */
65  &U1,
66  UB, &U2, b, FLA_BOTTOM );
67  FLA_Repart_2x1_to_3x1( ZT, &Z0,
68  /* ** */ /* ** */
69  &Z1,
70  ZB, &Z2, b, FLA_BOTTOM );
71  FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &T2,
72  b, FLA_RIGHT );
73 
74  /*------------------------------------------------------------*/
75 
76  FLA_Part_2x2( T1, &T1_tl, &none,
77  &none2, &none3, b, b, FLA_TL );
78 
79  bb = min( FLA_Obj_length( ABR ) - 1, b_alg );
80 
81  FLA_Part_1x2( ABR, &ABR_l, &none, bb, FLA_LEFT );
82  FLA_Part_1x2( UB, &UB_l, &none, bb, FLA_LEFT );
83  FLA_Part_1x2( ZB, &ZB_l, &none, bb, FLA_LEFT );
84 
85  FLA_Part_2x1( UB_l, &none,
86  &U2_l, b, FLA_TOP );
87  FLA_Part_2x1( ZB_l, &none,
88  &Z2_l, b, FLA_TOP );
89 
90  // [ ABR, ZB, T1 ] = FLA_Tridiag_UT_l_step_unb_var3( ABR, ZB, T1, b );
91  //FLA_Tridiag_UT_l_step_unb_var3( ABR, ZB, T1_tl );
92  FLA_Tridiag_UT_l_step_ofu_var3( ABR, ZB, T1_tl );
93  //FLA_Tridiag_UT_l_step_opt_var3( ABR, ZB, T1_tl );
94 
95  if ( FLA_Obj_length( A22 ) > 0 )
96  {
97  // Build UB from ABR, with explicit unit subdiagonal and zeros.
98  FLA_Copy_external( ABR_l, UB_l );
99  FLA_Part_2x1( UB_l, &UB_tl,
100  &UB_bl, 1, FLA_TOP );
101  FLA_Triangularize( FLA_LOWER_TRIANGULAR, FLA_UNIT_DIAG, UB_bl );
102  FLA_Set( FLA_ZERO, UB_tl );
103 
104  // A22 = A22 - U2 * Y2' - Z2 * U2';
105  FLA_Her2k_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE,
106  FLA_MINUS_ONE, U2_l, Z2_l, FLA_ONE, A22 );
107  }
108 
109  /*------------------------------------------------------------*/
110 
111  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
112  A10, A11, /**/ A12,
113  /* ************** */ /* ****************** */
114  &ABL, /**/ &ABR, A20, A21, /**/ A22,
115  FLA_TL );
116  FLA_Cont_with_3x1_to_2x1( &UT, U0,
117  U1,
118  /* ** */ /* ** */
119  &UB, U2, FLA_TOP );
120  FLA_Cont_with_3x1_to_2x1( &ZT, Z0,
121  Z1,
122  /* ** */ /* ** */
123  &ZB, Z2, FLA_TOP );
124  FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ T2,
125  FLA_LEFT );
126  }
127 
128  FLA_Obj_free( &U );
129  FLA_Obj_free( &Z );
130 
131  return FLA_SUCCESS;
132 }
FLA_Error FLA_Tridiag_UT_l_step_ofu_var3(FLA_Obj A, FLA_Obj Z, FLA_Obj T)
Definition: FLA_Tridiag_UT_l_fus_var3.c:27
FLA_Error FLA_Copy_external(FLA_Obj A, FLA_Obj B)
Definition: FLA_Copy_external.c:13
FLA_Error FLA_Her2k_external(FLA_Uplo uplo, FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C)
Definition: FLA_Her2k_external.c:13
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
FLA_Error FLA_Cont_with_3x1_to_2x1(FLA_Obj *AT, FLA_Obj A0, FLA_Obj A1, FLA_Obj *AB, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:428
FLA_Error FLA_Repart_2x1_to_3x1(FLA_Obj AT, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj AB, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:226
FLA_Error FLA_Obj_create(FLA_Datatype datatype, dim_t m, dim_t n, dim_t rs, dim_t cs, FLA_Obj *obj)
Definition: FLA_Obj.c:55
FLA_Error FLA_Part_2x1(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:76
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition: FLA_Obj.c:588
FLA_Error FLA_Triangularize(FLA_Uplo uplo, FLA_Diag diag, FLA_Obj A)
Definition: FLA_Triangularize.c:13
FLA_Error FLA_Set(FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Set.c:13

References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy_external(), FLA_Her2k_external(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Set(), FLA_Triangularize(), FLA_Tridiag_UT_l_step_ofu_var3(), and FLA_ZERO.

Referenced by FLA_Tridiag_UT_l().

◆ FLA_Tridiag_UT_l_blk_var1()

FLA_Error FLA_Tridiag_UT_l_blk_var1 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  FLA_Obj ATL, ATR, A00, A01, A02,
16  ABL, ABR, A10, A11, A12,
17  A20, A21, A22;
18  FLA_Obj TL, TR, T0, T1, T2;
19 
20  FLA_Obj T1_tl;
21  FLA_Obj none, none2, none3;
22  dim_t b_alg, b;
23 
24  b_alg = FLA_Obj_length( T );
25 
26  FLA_Part_2x2( A, &ATL, &ATR,
27  &ABL, &ABR, 0, 0, FLA_TL );
28  FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
29 
30  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) )
31  {
32  b = min( FLA_Obj_length( ABR ), b_alg );
33 
34  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
35  /* ************* */ /* ******************** */
36  &A10, /**/ &A11, &A12,
37  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
38  b, b, FLA_BR );
39  FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &T2,
40  b, FLA_RIGHT );
41 
42  /*------------------------------------------------------------*/
43 
44  FLA_Part_2x2( T1, &T1_tl, &none,
45  &none2, &none3, b, b, FLA_TL );
46 
47  // [ ABR, T1 ] = FLA_Tridiag_UT_l_step_unb_var1( ABR, T1, b );
48  //FLA_Tridiag_UT_l_step_unb_var1( ABR, T1_tl );
49  //FLA_Tridiag_UT_l_step_ofu_var1( ABR, T1_tl );
50  FLA_Tridiag_UT_l_step_opt_var1( ABR, T1_tl );
51 
52  /*------------------------------------------------------------*/
53 
54  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
55  A10, A11, /**/ A12,
56  /* ************** */ /* ****************** */
57  &ABL, /**/ &ABR, A20, A21, /**/ A22,
58  FLA_TL );
59  FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ T2,
60  FLA_LEFT );
61  }
62 
63  return FLA_SUCCESS;
64 }
FLA_Error FLA_Tridiag_UT_l_step_opt_var1(FLA_Obj A, FLA_Obj T)
Definition: FLA_Tridiag_UT_l_opt_var1.c:18

References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Obj_length(), FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x2_to_3x3(), and FLA_Tridiag_UT_l_step_opt_var1().

Referenced by FLA_Tridiag_UT_l().

◆ FLA_Tridiag_UT_l_blk_var2()

FLA_Error FLA_Tridiag_UT_l_blk_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  FLA_Obj ATL, ATR, A00, A01, A02,
16  ABL, ABR, A10, A11, A12,
17  A20, A21, A22;
18  FLA_Obj TL, TR, T0, T1, T2;
19 
20  FLA_Obj T1_tl;
21  FLA_Obj none, none2, none3;
22  dim_t b_alg, b;
23 
24  b_alg = FLA_Obj_length( T );
25 
26  FLA_Part_2x2( A, &ATL, &ATR,
27  &ABL, &ABR, 0, 0, FLA_TL );
28  FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
29 
30  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) )
31  {
32  b = min( FLA_Obj_length( ABR ), b_alg );
33 
34  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
35  /* ************* */ /* ******************** */
36  &A10, /**/ &A11, &A12,
37  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
38  b, b, FLA_BR );
39  FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &T2,
40  b, FLA_RIGHT );
41 
42  /*------------------------------------------------------------*/
43 
44  FLA_Part_2x2( T1, &T1_tl, &none,
45  &none2, &none3, b, b, FLA_TL );
46 
47  // [ ABR, T1 ] = FLA_Tridiag_UT_l_step_unb_var2( ABR, T1, b );
48  //FLA_Tridiag_UT_l_step_unb_var2( ABR, T1_tl );
49  //FLA_Tridiag_UT_l_step_ofu_var2( ABR, T1_tl );
50  FLA_Tridiag_UT_l_step_opt_var2( ABR, T1_tl );
51 
52  /*------------------------------------------------------------*/
53 
54  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
55  A10, A11, /**/ A12,
56  /* ************** */ /* ****************** */
57  &ABL, /**/ &ABR, A20, A21, /**/ A22,
58  FLA_TL );
59  FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ T2,
60  FLA_LEFT );
61  }
62 
63  return FLA_SUCCESS;
64 }
FLA_Error FLA_Tridiag_UT_l_step_opt_var2(FLA_Obj A, FLA_Obj T)
Definition: FLA_Tridiag_UT_l_opt_var2.c:18

References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Obj_length(), FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x2_to_3x3(), and FLA_Tridiag_UT_l_step_opt_var2().

Referenced by FLA_Tridiag_UT_l().

◆ FLA_Tridiag_UT_l_blk_var3()

FLA_Error FLA_Tridiag_UT_l_blk_var3 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  FLA_Obj ATL, ATR, A00, A01, A02,
16  ABL, ABR, A10, A11, A12,
17  A20, A21, A22;
18  FLA_Obj UT, U0,
19  UB, U1,
20  U2;
21  FLA_Obj ZT, Z0,
22  ZB, Z1,
23  Z2;
24  FLA_Obj TL, TR, T0, T1, T2;
25 
26  FLA_Obj U, Z;
27  FLA_Obj ABR_l;
28  FLA_Obj UB_l, U2_l;
29  FLA_Obj ZB_l, Z2_l;
30  FLA_Obj T1_tl;
31  FLA_Obj none, none2, none3;
32  FLA_Obj UB_tl,
33  UB_bl;
34  FLA_Datatype datatype_A;
35  dim_t m_A;
36  dim_t b_alg, b, bb;
37 
38  b_alg = FLA_Obj_length( T );
39 
40  datatype_A = FLA_Obj_datatype( A );
41  m_A = FLA_Obj_length( A );
42 
43  FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &U );
44  FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &Z );
45 
46  FLA_Part_2x2( A, &ATL, &ATR,
47  &ABL, &ABR, 0, 0, FLA_TL );
48  FLA_Part_2x1( U, &UT,
49  &UB, 0, FLA_TOP );
50  FLA_Part_2x1( Z, &ZT,
51  &ZB, 0, FLA_TOP );
52  FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
53 
54  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) )
55  {
56  b = min( FLA_Obj_length( ABR ), b_alg );
57 
58  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
59  /* ************* */ /* ******************** */
60  &A10, /**/ &A11, &A12,
61  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
62  b, b, FLA_BR );
63  FLA_Repart_2x1_to_3x1( UT, &U0,
64  /* ** */ /* ** */
65  &U1,
66  UB, &U2, b, FLA_BOTTOM );
67  FLA_Repart_2x1_to_3x1( ZT, &Z0,
68  /* ** */ /* ** */
69  &Z1,
70  ZB, &Z2, b, FLA_BOTTOM );
71  FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &T2,
72  b, FLA_RIGHT );
73 
74  /*------------------------------------------------------------*/
75 
76  FLA_Part_2x2( T1, &T1_tl, &none,
77  &none2, &none3, b, b, FLA_TL );
78 
79  bb = min( FLA_Obj_length( ABR ) - 1, b_alg );
80 
81  FLA_Part_1x2( ABR, &ABR_l, &none, bb, FLA_LEFT );
82  FLA_Part_1x2( UB, &UB_l, &none, bb, FLA_LEFT );
83  FLA_Part_1x2( ZB, &ZB_l, &none, bb, FLA_LEFT );
84 
85  FLA_Part_2x1( UB_l, &none,
86  &U2_l, b, FLA_TOP );
87  FLA_Part_2x1( ZB_l, &none,
88  &Z2_l, b, FLA_TOP );
89 
90  // [ ABR, ZB, T1 ] = FLA_Tridiag_UT_l_step_unb_var3( ABR, ZB, T1, b );
91  //FLA_Tridiag_UT_l_step_unb_var3( ABR, ZB, T1_tl );
92  //FLA_Tridiag_UT_l_step_ofu_var3( ABR, ZB, T1_tl );
93  FLA_Tridiag_UT_l_step_opt_var3( ABR, ZB, T1_tl );
94 
95  if ( FLA_Obj_length( A22 ) > 0 )
96  {
97  // Build UB from ABR, with explicit unit subdiagonal and zeros.
98  FLA_Copy_external( ABR_l, UB_l );
99  FLA_Part_2x1( UB_l, &UB_tl,
100  &UB_bl, 1, FLA_TOP );
101  FLA_Triangularize( FLA_LOWER_TRIANGULAR, FLA_UNIT_DIAG, UB_bl );
102  FLA_Set( FLA_ZERO, UB_tl );
103 
104  // A22 = A22 - U2 * Y2' - Z2 * U2';
105  FLA_Her2k_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE,
106  FLA_MINUS_ONE, U2_l, Z2_l, FLA_ONE, A22 );
107  }
108 
109  /*------------------------------------------------------------*/
110 
111  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
112  A10, A11, /**/ A12,
113  /* ************** */ /* ****************** */
114  &ABL, /**/ &ABR, A20, A21, /**/ A22,
115  FLA_TL );
116  FLA_Cont_with_3x1_to_2x1( &UT, U0,
117  U1,
118  /* ** */ /* ** */
119  &UB, U2, FLA_TOP );
120  FLA_Cont_with_3x1_to_2x1( &ZT, Z0,
121  Z1,
122  /* ** */ /* ** */
123  &ZB, Z2, FLA_TOP );
124  FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ T2,
125  FLA_LEFT );
126  }
127 
128  FLA_Obj_free( &U );
129  FLA_Obj_free( &Z );
130 
131  return FLA_SUCCESS;
132 }
FLA_Error FLA_Tridiag_UT_l_step_opt_var3(FLA_Obj A, FLA_Obj Z, FLA_Obj T)
Definition: FLA_Tridiag_UT_l_opt_var3.c:27

References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy_external(), FLA_Her2k_external(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Set(), FLA_Triangularize(), FLA_Tridiag_UT_l_step_opt_var3(), and FLA_ZERO.

Referenced by FLA_Tridiag_UT_l().

◆ FLA_Tridiag_UT_l_ofu_var1()

FLA_Error FLA_Tridiag_UT_l_ofu_var1 ( FLA_Obj  A,
FLA_Obj  T 
)

◆ FLA_Tridiag_UT_l_ofu_var2()

FLA_Error FLA_Tridiag_UT_l_ofu_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  return FLA_Tridiag_UT_l_step_ofu_var2( A, T );
16 }
FLA_Error FLA_Tridiag_UT_l_step_ofu_var2(FLA_Obj A, FLA_Obj T)
Definition: FLA_Tridiag_UT_l_fus_var2.c:18

References FLA_Tridiag_UT_l_step_ofu_var2().

◆ FLA_Tridiag_UT_l_ofu_var3()

FLA_Error FLA_Tridiag_UT_l_ofu_var3 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  FLA_Error r_val;
16  FLA_Obj Z;
17 
18  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &Z );
19 
20  r_val = FLA_Tridiag_UT_l_step_ofu_var3( A, Z, T );
21 
22  FLA_Obj_free( &Z );
23 
24  return r_val;
25 }
FLA_Error FLA_Tridiag_UT_l_step_ofu_var3(FLA_Obj A, FLA_Obj Z, FLA_Obj T)
Definition: FLA_Tridiag_UT_l_fus_var3.c:27
FLA_Error FLA_Obj_create_conf_to(FLA_Trans trans, FLA_Obj old, FLA_Obj *obj)
Definition: FLA_Obj.c:286
int FLA_Error
Definition: FLA_type_defs.h:47

References FLA_Obj_create_conf_to(), FLA_Obj_free(), and FLA_Tridiag_UT_l_step_ofu_var3().

◆ FLA_Tridiag_UT_l_opt_var1()

FLA_Error FLA_Tridiag_UT_l_opt_var1 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  return FLA_Tridiag_UT_l_step_opt_var1( A, T );
16 }
FLA_Error FLA_Tridiag_UT_l_step_opt_var1(FLA_Obj A, FLA_Obj T)
Definition: FLA_Tridiag_UT_l_opt_var1.c:18

References FLA_Tridiag_UT_l_step_opt_var1().

Referenced by FLA_Tridiag_UT_l().

◆ FLA_Tridiag_UT_l_opt_var2()

FLA_Error FLA_Tridiag_UT_l_opt_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  return FLA_Tridiag_UT_l_step_opt_var2( A, T );
16 }
FLA_Error FLA_Tridiag_UT_l_step_opt_var2(FLA_Obj A, FLA_Obj T)
Definition: FLA_Tridiag_UT_l_opt_var2.c:18

References FLA_Tridiag_UT_l_step_opt_var2().

Referenced by FLA_Tridiag_UT_l().

◆ FLA_Tridiag_UT_l_opt_var3()

FLA_Error FLA_Tridiag_UT_l_opt_var3 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  FLA_Error r_val;
16  FLA_Obj Z;
17 
18  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &Z );
19 
20  r_val = FLA_Tridiag_UT_l_step_opt_var3( A, Z, T );
21 
22  FLA_Obj_free( &Z );
23 
24  return r_val;
25 }
FLA_Error FLA_Tridiag_UT_l_step_opt_var3(FLA_Obj A, FLA_Obj Z, FLA_Obj T)
Definition: FLA_Tridiag_UT_l_opt_var3.c:27

References FLA_Obj_create_conf_to(), FLA_Obj_free(), and FLA_Tridiag_UT_l_step_opt_var3().

Referenced by FLA_Tridiag_UT_l().

◆ FLA_Tridiag_UT_l_step_ofc_var1()

FLA_Error FLA_Tridiag_UT_l_step_ofc_var1 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T 
)

◆ FLA_Tridiag_UT_l_step_ofc_var2()

FLA_Error FLA_Tridiag_UT_l_step_ofc_var2 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
557 {
558  scomplex* buff_2 = FLA_COMPLEX_PTR( FLA_TWO );
559  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
560  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
561  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
562 
563  scomplex first_elem;
564  scomplex beta;
565  scomplex inv_tau11;
566  scomplex minus_inv_tau11;
567  scomplex minus_upsilon11, minus_conj_upsilon11;
568  scomplex minus_zeta11, minus_conj_zeta11;
569  int i;
570 
571  // b_alg = FLA_Obj_length( T );
572  int b_alg = m_T;
573 
574  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
575  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
576  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
577  scomplex* buff_u = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
578  scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
579  scomplex* buff_w = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
580  int inc_u = 1;
581  int inc_z = 1;
582  int inc_w = 1;
583 
584  // Initialize some variables (only to prevent compiler warnings).
585  first_elem = *buff_0;
586  minus_inv_tau11 = *buff_0;
587 
588  for ( i = 0; i < b_alg; ++i )
589  {
590  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
591  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
592  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
593  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
594 
595  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
596  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
597 
598  scomplex* upsilon11= buff_u + (i )*inc_u;
599  scomplex* u21 = buff_u + (i+1)*inc_u;
600 
601  scomplex* zeta11 = buff_z + (i )*inc_z;
602  scomplex* z21 = buff_z + (i+1)*inc_z;
603 
604  scomplex* w21 = buff_w + (i+1)*inc_w;
605 
606  scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
607  scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
608 
609  int m_ahead = m_A - i - 1;
610  int m_behind = i;
611  int n_behind = i;
612 
613  /*------------------------------------------------------------*/
614 
615  if ( m_behind > 0 )
616  {
617  // FLA_Copy( upsilon11, minus_upsilon11 );
618  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
619  // FLA_Copy( minus_upsilon11, minus_conj_upsilon11 );
620  bl1_cmult3( buff_m1, upsilon11, &minus_upsilon11 );
621  bl1_ccopyconj( &minus_upsilon11, &minus_conj_upsilon11 );
622 
623  // FLA_Copy( zeta11, minus_zeta11 );
624  // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
625  // FLA_Copy( minus_zeta11, minus_conj_zeta11 );
626  bl1_cmult3( buff_m1, zeta11, &minus_zeta11 );
627  bl1_ccopyconj( &minus_zeta11, &minus_conj_zeta11 );
628 
629  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon11, zeta11, alpha11 );
630  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta11, upsilon11, alpha11 );
632  1,
633  &minus_upsilon11,
634  zeta11, 1,
635  alpha11, 1 );
637  1,
638  &minus_zeta11,
639  upsilon11, 1,
640  alpha11, 1 );
641 
642  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_zeta11, u21, a21 );
643  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon11, z21, a21 );
645  m_ahead,
646  &minus_conj_zeta11,
647  u21, inc_u,
648  a21, rs_A );
650  m_ahead,
651  &minus_conj_upsilon11,
652  z21, inc_z,
653  a21, rs_A );
654  }
655 
656  if ( m_ahead > 0 )
657  {
658  // FLA_Househ2_UT( FLA_LEFT,
659  // a21_t,
660  // a21_b, tau11 );
661  FLA_Househ2_UT_l_opc( m_ahead - 1,
662  a21_t,
663  a21_b, rs_A,
664  tau11 );
665 
666  // FLA_Set( FLA_ONE, inv_tau11 );
667  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
668  // FLA_Copy( inv_tau11, minus_inv_tau11 );
669  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
670  bl1_cdiv3( buff_1, tau11, &inv_tau11 );
671  bl1_cneg2( &inv_tau11, &minus_inv_tau11 );
672 
673  // FLA_Copy( a21_t, first_elem );
674  // FLA_Set( FLA_ONE, a21_t );
675  first_elem = *a21_t;
676  *a21_t = *buff_1;
677  }
678 
679  if ( m_behind > 0 && m_ahead > 0 )
680  {
681  // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
682  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
684  buff_m1,
685  u21, inc_u,
686  z21, inc_z,
687  A22, rs_A, cs_A,
688  a21, rs_A,
689  w21, inc_w );
690  }
691  else if ( m_ahead > 0 )
692  {
693  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
696  m_ahead,
697  buff_1,
698  A22, rs_A, cs_A,
699  a21, rs_A,
700  buff_0,
701  w21, inc_w );
702  }
703 
704  if ( m_ahead > 0 )
705  {
706  // FLA_Copy( a21, u21 );
707  // FLA_Copy( w21, z21 );
709  m_ahead,
710  a21, rs_A,
711  u21, inc_u );
713  m_ahead,
714  w21, inc_w,
715  z21, inc_z );
716 
717  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
718  // FLA_Inv_scal( FLA_TWO, beta );
720  m_ahead,
721  a21, rs_A,
722  z21, inc_z,
723  &beta );
724  bl1_cinvscals( buff_2, &beta );
725 
726  // FLA_Scal( minus_inv_tau11, beta );
727  // FLA_Axpy( beta, a21, z21 );
728  // FLA_Scal( inv_tau11, z21 );
729  bl1_cscals( &minus_inv_tau11, &beta );
731  m_ahead,
732  &beta,
733  a21, rs_A,
734  z21, inc_z );
736  m_ahead,
737  &inv_tau11,
738  z21, inc_z );
739 
740  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
743  m_ahead,
744  n_behind,
745  buff_1,
746  A20, rs_A, cs_A,
747  a21, rs_A,
748  buff_0,
749  t01, rs_T );
750 
751  // FLA_Copy( first_elem, a21_t );
752  *a21_t = first_elem;
753  }
754 
755  if ( m_behind + 1 == b_alg && m_ahead > 0 )
756  {
757  // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
760  m_ahead,
761  buff_m1,
762  u21, inc_u,
763  z21, inc_z,
764  A22, rs_A, cs_A );
765  }
766 
767  /*------------------------------------------------------------*/
768 
769  }
770 
771  // FLA_Obj_free( &u );
772  // FLA_Obj_free( &z );
773  // FLA_Obj_free( &w );
774  FLA_free( buff_u );
775  FLA_free( buff_z );
776  FLA_free( buff_w );
777 
778  return FLA_SUCCESS;
779 }
FLA_Obj FLA_TWO
Definition: FLA_Init.c:17
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition: FLA_Househ2_UT.c:390
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_copyv.c:49
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_gemv.c:125
void bl1_chemv(uplo1_t uplo, conj1_t conj, int m, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_hemv.c:35
void bl1_cher2(uplo1_t uplo, conj1_t conj, int m, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *a, int a_rs, int a_cs)
Definition: bl1_her2.c:33
void bl1_cscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition: bl1_scalv.c:46
@ BLIS1_LOWER_TRIANGULAR
Definition: blis_type_defs.h:62
@ BLIS1_NO_TRANSPOSE
Definition: blis_type_defs.h:54
@ BLIS1_CONJ_TRANSPOSE
Definition: blis_type_defs.h:57

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_chemv(), bl1_cher2(), bl1_cscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Her2_Ax_l_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_ofu_var2().

◆ FLA_Tridiag_UT_l_step_ofc_var3()

FLA_Error FLA_Tridiag_UT_l_step_ofc_var3 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
469 {
470  scomplex* buff_2 = FLA_COMPLEX_PTR( FLA_TWO );
471  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
472  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
473  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
474 
475  scomplex first_elem, last_elem;
476  scomplex beta;
477  scomplex inv_tau11;
478  scomplex minus_inv_tau11;
479  int i;
480 
481  // b_alg = FLA_Obj_length( T );
482  int b_alg = m_T;
483 
484  // FLA_Set( FLA_ZERO, Z );
485  bl1_csetm( m_A,
486  b_alg,
487  buff_0,
488  buff_Z, rs_Z, cs_Z );
489 
490  for ( i = 0; i < b_alg; ++i )
491  {
492  scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
493  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
494  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
495  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
496  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
497 
498  scomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
499  scomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
500  scomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
501 
502  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
503  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
504 
505  scomplex* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
506 
507  scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
508  scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
509 
510  scomplex* ABL = a10t;
511  scomplex* ZBL = z10t;
512 
513  scomplex* a2 = alpha11;
514 
515  int m_ahead = m_A - i - 1;
516  int m_behind = i;
517  int n_behind = i;
518 
519  /*------------------------------------------------------------*/
520 
521  if ( m_behind > 0 )
522  {
523  // FLA_Copy( a10t_r, last_elem );
524  // FLA_Set( FLA_ONE, a10t_r );
525  last_elem = *a10t_r;
526  *a10t_r = *buff_1;
527  }
528 
529  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, z10t, FLA_ONE, a2 );
530  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
533  m_ahead + 1,
534  n_behind,
535  buff_m1,
536  ABL, rs_A, cs_A,
537  z10t, cs_Z,
538  buff_1,
539  a2, rs_A );
542  m_ahead + 1,
543  n_behind,
544  buff_m1,
545  ZBL, rs_Z, cs_Z,
546  a10t, cs_A,
547  buff_1,
548  a2, rs_A );
549 
550  if ( m_behind > 0 )
551  {
552  // FLA_Copy( last_elem, a10t_r );
553  *a10t_r = last_elem;
554  }
555 
556  if ( m_ahead > 0 )
557  {
558  // FLA_Househ2_UT( FLA_LEFT,
559  // a21_t,
560  // a21_b, tau11 );
561  FLA_Househ2_UT_l_opc( m_ahead - 1,
562  a21_t,
563  a21_b, rs_A,
564  tau11 );
565 
566  // FLA_Set( FLA_ONE, inv_tau11 );
567  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
568  // FLA_Copy( inv_tau11, minus_inv_tau11 );
569  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
570  bl1_cdiv3( buff_1, tau11, &inv_tau11 );
571  bl1_cneg2( &inv_tau11, &minus_inv_tau11 );
572 
573  // FLA_Copy( a21_t, first_elem );
574  // FLA_Set( FLA_ONE, a21_t );
575  first_elem = *a21_t;
576  *a21_t = *buff_1;
577 
578  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
581  m_ahead,
582  buff_1,
583  A22, rs_A, cs_A,
584  a21, rs_A,
585  buff_0,
586  z21, rs_Z );
587 
588  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d01 );
589  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f01 );
590  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f01, FLA_ONE, z21 );
591  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d01, FLA_ONE, z21 );
592  // FLA_Copy( d01, t01 );
594  n_behind,
595  buff_m1,
596  A20, rs_A, cs_A,
597  Z20, rs_Z, cs_Z,
598  t01, rs_T,
599  a21, rs_A,
600  z21, rs_Z );
601 
602  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
603  // FLA_Inv_scal( FLA_TWO, beta );
605  m_ahead,
606  a21, rs_A,
607  z21, rs_Z,
608  &beta );
609  bl1_cinvscals( buff_2, &beta );
610 
611  // FLA_Scal( minus_inv_tau11, beta );
612  // FLA_Axpy( beta, a21, z21 );
613  // FLA_Scal( inv_tau11, z21 );
614  bl1_cscals( &minus_inv_tau11, &beta );
616  m_ahead,
617  &beta,
618  a21, rs_A,
619  z21, rs_Z );
621  m_ahead,
622  &inv_tau11,
623  z21, rs_Z );
624 
625  // FLA_Copy( first_elem, a21_t );
626  *a21_t = first_elem;
627  }
628 
629  /*------------------------------------------------------------*/
630 
631  }
632 
633  return FLA_SUCCESS;
634 }
void bl1_csetm(int m, int n, scomplex *sigma, scomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:61

References bl1_caxpyv(), bl1_cdot(), bl1_cgemv(), bl1_chemv(), bl1_cscalv(), bl1_csetm(), BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_Fused_UZhu_ZUhu_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_ofu_var3().

◆ FLA_Tridiag_UT_l_step_ofd_var1()

FLA_Error FLA_Tridiag_UT_l_step_ofd_var1 ( int  m_A,
int  m_T,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_T,
int  rs_T,
int  cs_T 
)

◆ FLA_Tridiag_UT_l_step_ofd_var2()

FLA_Error FLA_Tridiag_UT_l_step_ofd_var2 ( int  m_A,
int  m_T,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_T,
int  rs_T,
int  cs_T 
)
329 {
330  double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
331  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
332  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
333  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
334 
335  double first_elem;
336  double beta;
337  double inv_tau11;
338  double minus_inv_tau11;
339  double minus_upsilon11, minus_conj_upsilon11;
340  double minus_zeta11, minus_conj_zeta11;
341  int i;
342 
343  // b_alg = FLA_Obj_length( T );
344  int b_alg = m_T;
345 
346  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
347  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
348  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
349  double* buff_u = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
350  double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
351  double* buff_w = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
352  int inc_u = 1;
353  int inc_z = 1;
354  int inc_w = 1;
355 
356  // Initialize some variables (only to prevent compiler warnings).
357  first_elem = *buff_0;
358  minus_inv_tau11 = *buff_0;
359 
360  for ( i = 0; i < b_alg; ++i )
361  {
362  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
363  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
364  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
365  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
366 
367  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
368  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
369 
370  double* upsilon11= buff_u + (i )*inc_u;
371  double* u21 = buff_u + (i+1)*inc_u;
372 
373  double* zeta11 = buff_z + (i )*inc_z;
374  double* z21 = buff_z + (i+1)*inc_z;
375 
376  double* w21 = buff_w + (i+1)*inc_w;
377 
378  double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
379  double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
380 
381  int m_ahead = m_A - i - 1;
382  int m_behind = i;
383  int n_behind = i;
384 
385  /*------------------------------------------------------------*/
386 
387  if ( m_behind > 0 )
388  {
389  // FLA_Copy( upsilon11, minus_upsilon11 );
390  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
391  // FLA_Copy( minus_upsilon11, minus_conj_upsilon11 );
392  bl1_dmult3( buff_m1, upsilon11, &minus_upsilon11 );
393  bl1_dcopyconj( &minus_upsilon11, &minus_conj_upsilon11 );
394 
395  // FLA_Copy( zeta11, minus_zeta11 );
396  // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
397  // FLA_Copy( minus_zeta11, minus_conj_zeta11 );
398  bl1_dmult3( buff_m1, zeta11, &minus_zeta11 );
399  bl1_dcopyconj( &minus_zeta11, &minus_conj_zeta11 );
400 
401  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon11, zeta11, alpha11 );
402  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta11, upsilon11, alpha11 );
404  1,
405  &minus_upsilon11,
406  zeta11, 1,
407  alpha11, 1 );
409  1,
410  &minus_zeta11,
411  upsilon11, 1,
412  alpha11, 1 );
413 
414  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_zeta11, u21, a21 );
415  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon11, z21, a21 );
417  m_ahead,
418  &minus_conj_zeta11,
419  u21, inc_u,
420  a21, rs_A );
422  m_ahead,
423  &minus_conj_upsilon11,
424  z21, inc_z,
425  a21, rs_A );
426  }
427 
428  if ( m_ahead > 0 )
429  {
430  // FLA_Househ2_UT( FLA_LEFT,
431  // a21_t,
432  // a21_b, tau11 );
433  FLA_Househ2_UT_l_opd( m_ahead - 1,
434  a21_t,
435  a21_b, rs_A,
436  tau11 );
437 
438  // FLA_Set( FLA_ONE, inv_tau11 );
439  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
440  // FLA_Copy( inv_tau11, minus_inv_tau11 );
441  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
442  bl1_ddiv3( buff_1, tau11, &inv_tau11 );
443  bl1_dneg2( &inv_tau11, &minus_inv_tau11 );
444 
445  // FLA_Copy( a21_t, first_elem );
446  // FLA_Set( FLA_ONE, a21_t );
447  first_elem = *a21_t;
448  *a21_t = *buff_1;
449  }
450 
451  if ( m_behind > 0 && m_ahead > 0 )
452  {
453  // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
454  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
456  buff_m1,
457  u21, inc_u,
458  z21, inc_z,
459  A22, rs_A, cs_A,
460  a21, rs_A,
461  w21, inc_w );
462  }
463  else if ( m_ahead > 0 )
464  {
465  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
467  m_ahead,
468  buff_1,
469  A22, rs_A, cs_A,
470  a21, rs_A,
471  buff_0,
472  w21, inc_w );
473  }
474 
475  if ( m_ahead > 0 )
476  {
477  // FLA_Copy( a21, u21 );
478  // FLA_Copy( w21, z21 );
480  m_ahead,
481  a21, rs_A,
482  u21, inc_u );
484  m_ahead,
485  w21, inc_w,
486  z21, inc_z );
487 
488  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
489  // FLA_Inv_scal( FLA_TWO, beta );
491  m_ahead,
492  a21, rs_A,
493  z21, inc_z,
494  &beta );
495  bl1_dinvscals( buff_2, &beta );
496 
497  // FLA_Scal( minus_inv_tau11, beta );
498  // FLA_Axpy( beta, a21, z21 );
499  // FLA_Scal( inv_tau11, z21 );
500  bl1_dscals( &minus_inv_tau11, &beta );
502  m_ahead,
503  &beta,
504  a21, rs_A,
505  z21, inc_z );
507  m_ahead,
508  &inv_tau11,
509  z21, inc_z );
510 
511  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
514  m_ahead,
515  n_behind,
516  buff_1,
517  A20, rs_A, cs_A,
518  a21, rs_A,
519  buff_0,
520  t01, rs_T );
521 
522  // FLA_Copy( first_elem, a21_t );
523  *a21_t = first_elem;
524  }
525 
526  if ( m_behind + 1 == b_alg && m_ahead > 0 )
527  {
528  // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
530  m_ahead,
531  buff_m1,
532  u21, inc_u,
533  z21, inc_z,
534  A22, rs_A, cs_A );
535  }
536 
537  /*------------------------------------------------------------*/
538 
539  }
540 
541  // FLA_Obj_free( &u );
542  // FLA_Obj_free( &z );
543  // FLA_Obj_free( &w );
544  FLA_free( buff_u );
545  FLA_free( buff_z );
546  FLA_free( buff_w );
547 
548  return FLA_SUCCESS;
549 }
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition: FLA_Househ2_UT.c:274
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition: bl1_copyv.c:42
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_gemv.c:69
void bl1_dscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition: bl1_scalv.c:24
void bl1_dsymv(uplo1_t uplo, int m, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_symv.c:56
void bl1_dsyr2(uplo1_t uplo, int m, double *alpha, double *x, int incx, double *y, int incy, double *a, int a_rs, int a_cs)
Definition: bl1_syr2.c:58

References bl1_daxpyv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dscalv(), bl1_dsymv(), bl1_dsyr2(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, FLA_free(), FLA_Fused_Her2_Ax_l_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_ofu_var2().

◆ FLA_Tridiag_UT_l_step_ofd_var3()

FLA_Error FLA_Tridiag_UT_l_step_ofd_var3 ( int  m_A,
int  m_T,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_Z,
int  rs_Z,
int  cs_Z,
double *  buff_T,
int  rs_T,
int  cs_T 
)
296 {
297  double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
298  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
299  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
300  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
301 
302  double first_elem, last_elem;
303  double beta;
304  double inv_tau11;
305  double minus_inv_tau11;
306  int i;
307 
308  // b_alg = FLA_Obj_length( T );
309  int b_alg = m_T;
310 
311  // FLA_Set( FLA_ZERO, Z );
312  bl1_dsetm( m_A,
313  b_alg,
314  buff_0,
315  buff_Z, rs_Z, cs_Z );
316 
317  for ( i = 0; i < b_alg; ++i )
318  {
319  double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
320  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
321  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
322  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
323  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
324 
325  double* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
326  double* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
327  double* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
328 
329  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
330  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
331 
332  double* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
333 
334  double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
335  double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
336 
337  double* ABL = a10t;
338  double* ZBL = z10t;
339 
340  double* a2 = alpha11;
341 
342  int m_ahead = m_A - i - 1;
343  int m_behind = i;
344  int n_behind = i;
345 
346  /*------------------------------------------------------------*/
347 
348  if ( m_behind > 0 )
349  {
350  // FLA_Copy( a10t_r, last_elem );
351  // FLA_Set( FLA_ONE, a10t_r );
352  last_elem = *a10t_r;
353  *a10t_r = *buff_1;
354  }
355 
356  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, z10t, FLA_ONE, a2 );
357  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
360  m_ahead + 1,
361  n_behind,
362  buff_m1,
363  ABL, rs_A, cs_A,
364  z10t, cs_Z,
365  buff_1,
366  a2, rs_A );
369  m_ahead + 1,
370  n_behind,
371  buff_m1,
372  ZBL, rs_Z, cs_Z,
373  a10t, cs_A,
374  buff_1,
375  a2, rs_A );
376 
377  if ( m_behind > 0 )
378  {
379  // FLA_Copy( last_elem, a10t_r );
380  *a10t_r = last_elem;
381  }
382 
383  if ( m_ahead > 0 )
384  {
385  // FLA_Househ2_UT( FLA_LEFT,
386  // a21_t,
387  // a21_b, tau11 );
388  FLA_Househ2_UT_l_opd( m_ahead - 1,
389  a21_t,
390  a21_b, rs_A,
391  tau11 );
392 
393  // FLA_Set( FLA_ONE, inv_tau11 );
394  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
395  // FLA_Copy( inv_tau11, minus_inv_tau11 );
396  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
397  bl1_ddiv3( buff_1, tau11, &inv_tau11 );
398  bl1_dneg2( &inv_tau11, &minus_inv_tau11 );
399 
400  // FLA_Copy( a21_t, first_elem );
401  // FLA_Set( FLA_ONE, a21_t );
402  first_elem = *a21_t;
403  *a21_t = *buff_1;
404 
405  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
407  m_ahead,
408  buff_1,
409  A22, rs_A, cs_A,
410  a21, rs_A,
411  buff_0,
412  z21, rs_Z );
413 
414  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d01 );
415  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f01 );
416  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f01, FLA_ONE, z21 );
417  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d01, FLA_ONE, z21 );
418  // FLA_Copy( d01, t01 );
420  n_behind,
421  buff_m1,
422  A20, rs_A, cs_A,
423  Z20, rs_Z, cs_Z,
424  t01, rs_T,
425  a21, rs_A,
426  z21, rs_Z );
427 
428  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
429  // FLA_Inv_scal( FLA_TWO, beta );
431  m_ahead,
432  a21, rs_A,
433  z21, rs_Z,
434  &beta );
435  bl1_dinvscals( buff_2, &beta );
436 
437  // FLA_Scal( minus_inv_tau11, beta );
438  // FLA_Axpy( beta, a21, z21 );
439  // FLA_Scal( inv_tau11, z21 );
440  bl1_dscals( &minus_inv_tau11, &beta );
442  m_ahead,
443  &beta,
444  a21, rs_A,
445  z21, rs_Z );
447  m_ahead,
448  &inv_tau11,
449  z21, rs_Z );
450 
451  // FLA_Copy( first_elem, a21_t );
452  *a21_t = first_elem;
453  }
454 
455  /*------------------------------------------------------------*/
456 
457  }
458 
459  return FLA_SUCCESS;
460 }
void bl1_dsetm(int m, int n, double *sigma, double *a, int a_rs, int a_cs)
Definition: bl1_setm.c:45

References bl1_daxpyv(), bl1_ddot(), bl1_dgemv(), bl1_dscalv(), bl1_dsetm(), bl1_dsymv(), BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_Fused_UZhu_ZUhu_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_ofu_var3().

◆ FLA_Tridiag_UT_l_step_ofs_var1()

FLA_Error FLA_Tridiag_UT_l_step_ofs_var1 ( int  m_A,
int  m_T,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_T,
int  rs_T,
int  cs_T 
)

◆ FLA_Tridiag_UT_l_step_ofs_var2()

FLA_Error FLA_Tridiag_UT_l_step_ofs_var2 ( int  m_A,
int  m_T,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_T,
int  rs_T,
int  cs_T 
)
101 {
102  float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
103  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
104  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
105  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
106 
107  float first_elem;
108  float beta;
109  float inv_tau11;
110  float minus_inv_tau11;
111  float minus_upsilon11, minus_conj_upsilon11;
112  float minus_zeta11, minus_conj_zeta11;
113  int i;
114 
115  // b_alg = FLA_Obj_length( T );
116  int b_alg = m_T;
117 
118  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
119  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
120  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
121  float* buff_u = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
122  float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
123  float* buff_w = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
124  int inc_u = 1;
125  int inc_z = 1;
126  int inc_w = 1;
127 
128  // Initialize some variables (only to prevent compiler warnings).
129  first_elem = *buff_0;
130  minus_inv_tau11 = *buff_0;
131 
132  for ( i = 0; i < b_alg; ++i )
133  {
134  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
135  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
136  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
137  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
138 
139  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
140  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
141 
142  float* upsilon11= buff_u + (i )*inc_u;
143  float* u21 = buff_u + (i+1)*inc_u;
144 
145  float* zeta11 = buff_z + (i )*inc_z;
146  float* z21 = buff_z + (i+1)*inc_z;
147 
148  float* w21 = buff_w + (i+1)*inc_w;
149 
150  float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
151  float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
152 
153  int m_ahead = m_A - i - 1;
154  int m_behind = i;
155  int n_behind = i;
156 
157  /*------------------------------------------------------------*/
158 
159  if ( m_behind > 0 )
160  {
161  // FLA_Copy( upsilon11, minus_upsilon11 );
162  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
163  // FLA_Copy( minus_upsilon11, minus_conj_upsilon11 );
164  bl1_smult3( buff_m1, upsilon11, &minus_upsilon11 );
165  bl1_scopyconj( &minus_upsilon11, &minus_conj_upsilon11 );
166 
167  // FLA_Copy( zeta11, minus_zeta11 );
168  // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
169  // FLA_Copy( minus_zeta11, minus_conj_zeta11 );
170  bl1_smult3( buff_m1, zeta11, &minus_zeta11 );
171  bl1_scopyconj( &minus_zeta11, &minus_conj_zeta11 );
172 
173  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon11, zeta11, alpha11 );
174  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta11, upsilon11, alpha11 );
176  1,
177  &minus_upsilon11,
178  zeta11, 1,
179  alpha11, 1 );
181  1,
182  &minus_zeta11,
183  upsilon11, 1,
184  alpha11, 1 );
185 
186  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_zeta11, u21, a21 );
187  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon11, z21, a21 );
189  m_ahead,
190  &minus_conj_zeta11,
191  u21, inc_u,
192  a21, rs_A );
194  m_ahead,
195  &minus_conj_upsilon11,
196  z21, inc_z,
197  a21, rs_A );
198  }
199 
200  if ( m_ahead > 0 )
201  {
202  // FLA_Househ2_UT( FLA_LEFT,
203  // a21_t,
204  // a21_b, tau11 );
205  FLA_Househ2_UT_l_ops( m_ahead - 1,
206  a21_t,
207  a21_b, rs_A,
208  tau11 );
209 
210  // FLA_Set( FLA_ONE, inv_tau11 );
211  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
212  // FLA_Copy( inv_tau11, minus_inv_tau11 );
213  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
214  bl1_sdiv3( buff_1, tau11, &inv_tau11 );
215  bl1_sneg2( &inv_tau11, &minus_inv_tau11 );
216 
217  // FLA_Copy( a21_t, first_elem );
218  // FLA_Set( FLA_ONE, a21_t );
219  first_elem = *a21_t;
220  *a21_t = *buff_1;
221  }
222 
223  if ( m_behind > 0 && m_ahead > 0 )
224  {
225  // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
226  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
228  buff_m1,
229  u21, inc_u,
230  z21, inc_z,
231  A22, rs_A, cs_A,
232  a21, rs_A,
233  w21, inc_w );
234  }
235  else if ( m_ahead > 0 )
236  {
237  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
239  m_ahead,
240  buff_1,
241  A22, rs_A, cs_A,
242  a21, rs_A,
243  buff_0,
244  w21, inc_w );
245  }
246 
247  if ( m_ahead > 0 )
248  {
249  // FLA_Copy( a21, u21 );
250  // FLA_Copy( w21, z21 );
252  m_ahead,
253  a21, rs_A,
254  u21, inc_u );
256  m_ahead,
257  w21, inc_w,
258  z21, inc_z );
259 
260  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
261  // FLA_Inv_scal( FLA_TWO, beta );
263  m_ahead,
264  a21, rs_A,
265  z21, inc_z,
266  &beta );
267  bl1_sinvscals( buff_2, &beta );
268 
269  // FLA_Scal( minus_inv_tau11, beta );
270  // FLA_Axpy( beta, a21, z21 );
271  // FLA_Scal( inv_tau11, z21 );
272  bl1_sscals( &minus_inv_tau11, &beta );
274  m_ahead,
275  &beta,
276  a21, rs_A,
277  z21, inc_z );
279  m_ahead,
280  &inv_tau11,
281  z21, inc_z );
282 
283  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
286  m_ahead,
287  n_behind,
288  buff_1,
289  A20, rs_A, cs_A,
290  a21, rs_A,
291  buff_0,
292  t01, rs_T );
293 
294  // FLA_Copy( first_elem, a21_t );
295  *a21_t = first_elem;
296  }
297 
298  if ( m_behind + 1 == b_alg && m_ahead > 0 )
299  {
300  // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
302  m_ahead,
303  buff_m1,
304  u21, inc_u,
305  z21, inc_z,
306  A22, rs_A, cs_A );
307  }
308 
309  /*------------------------------------------------------------*/
310 
311  }
312 
313  // FLA_Obj_free( &u );
314  // FLA_Obj_free( &z );
315  // FLA_Obj_free( &w );
316  FLA_free( buff_u );
317  FLA_free( buff_z );
318  FLA_free( buff_w );
319 
320  return FLA_SUCCESS;
321 }
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition: FLA_Househ2_UT.c:160
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition: bl1_copyv.c:35
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_gemv.c:13
void bl1_sscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition: bl1_scalv.c:13
void bl1_ssymv(uplo1_t uplo, int m, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_symv.c:13
void bl1_ssyr2(uplo1_t uplo, int m, float *alpha, float *x, int incx, float *y, int incy, float *a, int a_rs, int a_cs)
Definition: bl1_syr2.c:13

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sscalv(), bl1_ssymv(), bl1_ssyr2(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, FLA_free(), FLA_Fused_Her2_Ax_l_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_ofu_var2().

◆ FLA_Tridiag_UT_l_step_ofs_var3()

FLA_Error FLA_Tridiag_UT_l_step_ofs_var3 ( int  m_A,
int  m_T,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_Z,
int  rs_Z,
int  cs_Z,
float *  buff_T,
int  rs_T,
int  cs_T 
)
123 {
124  float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
125  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
126  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
127  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
128 
129  float first_elem, last_elem;
130  float beta;
131  float inv_tau11;
132  float minus_inv_tau11;
133  int i;
134 
135  // b_alg = FLA_Obj_length( T );
136  int b_alg = m_T;
137 
138  // FLA_Set( FLA_ZERO, Z );
139  bl1_ssetm( m_A,
140  b_alg,
141  buff_0,
142  buff_Z, rs_Z, cs_Z );
143 
144  for ( i = 0; i < b_alg; ++i )
145  {
146  float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
147  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
148  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
149  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
150  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
151 
152  float* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
153  float* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
154  float* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
155 
156  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
157  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
158 
159  float* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
160 
161  float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
162  float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
163 
164  float* ABL = a10t;
165  float* ZBL = z10t;
166 
167  float* a2 = alpha11;
168 
169  int m_ahead = m_A - i - 1;
170  int m_behind = i;
171  int n_behind = i;
172 
173  /*------------------------------------------------------------*/
174 
175  if ( m_behind > 0 )
176  {
177  // FLA_Copy( a10t_r, last_elem );
178  // FLA_Set( FLA_ONE, a10t_r );
179  last_elem = *a10t_r;
180  *a10t_r = *buff_1;
181  }
182 
183  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, z10t, FLA_ONE, a2 );
184  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
187  m_ahead + 1,
188  n_behind,
189  buff_m1,
190  ABL, rs_A, cs_A,
191  z10t, cs_Z,
192  buff_1,
193  a2, rs_A );
196  m_ahead + 1,
197  n_behind,
198  buff_m1,
199  ZBL, rs_Z, cs_Z,
200  a10t, cs_A,
201  buff_1,
202  a2, rs_A );
203 
204  if ( m_behind > 0 )
205  {
206  // FLA_Copy( last_elem, a10t_r );
207  *a10t_r = last_elem;
208  }
209 
210  if ( m_ahead > 0 )
211  {
212  // FLA_Househ2_UT( FLA_LEFT,
213  // a21_t,
214  // a21_b, tau11 );
215  FLA_Househ2_UT_l_ops( m_ahead - 1,
216  a21_t,
217  a21_b, rs_A,
218  tau11 );
219 
220  // FLA_Set( FLA_ONE, inv_tau11 );
221  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
222  // FLA_Copy( inv_tau11, minus_inv_tau11 );
223  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
224  bl1_sdiv3( buff_1, tau11, &inv_tau11 );
225  bl1_sneg2( &inv_tau11, &minus_inv_tau11 );
226 
227  // FLA_Copy( a21_t, first_elem );
228  // FLA_Set( FLA_ONE, a21_t );
229  first_elem = *a21_t;
230  *a21_t = *buff_1;
231 
232  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
234  m_ahead,
235  buff_1,
236  A22, rs_A, cs_A,
237  a21, rs_A,
238  buff_0,
239  z21, rs_Z );
240 
241  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d01 );
242  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f01 );
243  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f01, FLA_ONE, z21 );
244  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d01, FLA_ONE, z21 );
245  // FLA_Copy( d01, t01 );
247  n_behind,
248  buff_m1,
249  A20, rs_A, cs_A,
250  Z20, rs_Z, cs_Z,
251  t01, rs_T,
252  a21, rs_A,
253  z21, rs_Z );
254 
255  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
256  // FLA_Inv_scal( FLA_TWO, beta );
258  m_ahead,
259  a21, rs_A,
260  z21, rs_Z,
261  &beta );
262  bl1_sinvscals( buff_2, &beta );
263 
264  // FLA_Scal( minus_inv_tau11, beta );
265  // FLA_Axpy( beta, a21, z21 );
266  // FLA_Scal( inv_tau11, z21 );
267  bl1_sscals( &minus_inv_tau11, &beta );
269  m_ahead,
270  &beta,
271  a21, rs_A,
272  z21, rs_Z );
274  m_ahead,
275  &inv_tau11,
276  z21, rs_Z );
277 
278  // FLA_Copy( first_elem, a21_t );
279  *a21_t = first_elem;
280  }
281 
282  /*------------------------------------------------------------*/
283 
284  }
285 
286  return FLA_SUCCESS;
287 }
void bl1_ssetm(int m, int n, float *sigma, float *a, int a_rs, int a_cs)
Definition: bl1_setm.c:29

References bl1_saxpyv(), bl1_sdot(), bl1_sgemv(), bl1_sscalv(), bl1_ssetm(), bl1_ssymv(), BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_Fused_UZhu_ZUhu_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_ofu_var3().

◆ FLA_Tridiag_UT_l_step_ofu_var1()

FLA_Error FLA_Tridiag_UT_l_step_ofu_var1 ( FLA_Obj  A,
FLA_Obj  T 
)

◆ FLA_Tridiag_UT_l_step_ofu_var2()

FLA_Error FLA_Tridiag_UT_l_step_ofu_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
19 {
20  FLA_Datatype datatype;
21  int m_A, m_T;
22  int rs_A, cs_A;
23  int rs_T, cs_T;
24 
25  datatype = FLA_Obj_datatype( A );
26 
27  m_A = FLA_Obj_length( A );
28  m_T = FLA_Obj_length( T );
29 
30  rs_A = FLA_Obj_row_stride( A );
31  cs_A = FLA_Obj_col_stride( A );
32 
33  rs_T = FLA_Obj_row_stride( T );
34  cs_T = FLA_Obj_col_stride( T );
35 
36 
37  switch ( datatype )
38  {
39  case FLA_FLOAT:
40  {
41  float* buff_A = FLA_FLOAT_PTR( A );
42  float* buff_T = FLA_FLOAT_PTR( T );
43 
45  m_T,
46  buff_A, rs_A, cs_A,
47  buff_T, rs_T, cs_T );
48 
49  break;
50  }
51 
52  case FLA_DOUBLE:
53  {
54  double* buff_A = FLA_DOUBLE_PTR( A );
55  double* buff_T = FLA_DOUBLE_PTR( T );
56 
58  m_T,
59  buff_A, rs_A, cs_A,
60  buff_T, rs_T, cs_T );
61 
62  break;
63  }
64 
65  case FLA_COMPLEX:
66  {
67  scomplex* buff_A = FLA_COMPLEX_PTR( A );
68  scomplex* buff_T = FLA_COMPLEX_PTR( T );
69 
71  m_T,
72  buff_A, rs_A, cs_A,
73  buff_T, rs_T, cs_T );
74 
75  break;
76  }
77 
78  case FLA_DOUBLE_COMPLEX:
79  {
80  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
81  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
82 
84  m_T,
85  buff_A, rs_A, cs_A,
86  buff_T, rs_T, cs_T );
87 
88  break;
89  }
90  }
91 
92  return FLA_SUCCESS;
93 }
FLA_Error FLA_Tridiag_UT_l_step_ofd_var2(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
Definition: FLA_Tridiag_UT_l_fus_var2.c:325
FLA_Error FLA_Tridiag_UT_l_step_ofs_var2(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
Definition: FLA_Tridiag_UT_l_fus_var2.c:97
FLA_Error FLA_Tridiag_UT_l_step_ofz_var2(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Tridiag_UT_l_fus_var2.c:783
FLA_Error FLA_Tridiag_UT_l_step_ofc_var2(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Tridiag_UT_l_fus_var2.c:553

References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Tridiag_UT_l_step_ofc_var2(), FLA_Tridiag_UT_l_step_ofd_var2(), FLA_Tridiag_UT_l_step_ofs_var2(), and FLA_Tridiag_UT_l_step_ofz_var2().

Referenced by FLA_Tridiag_UT_l_blf_var2(), and FLA_Tridiag_UT_l_ofu_var2().

◆ FLA_Tridiag_UT_l_step_ofu_var3()

FLA_Error FLA_Tridiag_UT_l_step_ofu_var3 ( FLA_Obj  A,
FLA_Obj  Z,
FLA_Obj  T 
)
28 {
29  FLA_Datatype datatype;
30  int m_A, m_T;
31  int rs_A, cs_A;
32  int rs_Z, cs_Z;
33  int rs_T, cs_T;
34 
35  datatype = FLA_Obj_datatype( A );
36 
37  m_A = FLA_Obj_length( A );
38  m_T = FLA_Obj_length( T );
39 
40  rs_A = FLA_Obj_row_stride( A );
41  cs_A = FLA_Obj_col_stride( A );
42 
43  rs_Z = FLA_Obj_row_stride( Z );
44  cs_Z = FLA_Obj_col_stride( Z );
45 
46  rs_T = FLA_Obj_row_stride( T );
47  cs_T = FLA_Obj_col_stride( T );
48 
49 
50  switch ( datatype )
51  {
52  case FLA_FLOAT:
53  {
54  float* buff_A = FLA_FLOAT_PTR( A );
55  float* buff_Z = FLA_FLOAT_PTR( Z );
56  float* buff_T = FLA_FLOAT_PTR( T );
57 
59  m_T,
60  buff_A, rs_A, cs_A,
61  buff_Z, rs_Z, cs_Z,
62  buff_T, rs_T, cs_T );
63 
64  break;
65  }
66 
67  case FLA_DOUBLE:
68  {
69  double* buff_A = FLA_DOUBLE_PTR( A );
70  double* buff_Z = FLA_DOUBLE_PTR( Z );
71  double* buff_T = FLA_DOUBLE_PTR( T );
72 
74  m_T,
75  buff_A, rs_A, cs_A,
76  buff_Z, rs_Z, cs_Z,
77  buff_T, rs_T, cs_T );
78 
79  break;
80  }
81 
82  case FLA_COMPLEX:
83  {
84  scomplex* buff_A = FLA_COMPLEX_PTR( A );
85  scomplex* buff_Z = FLA_COMPLEX_PTR( Z );
86  scomplex* buff_T = FLA_COMPLEX_PTR( T );
87 
89  m_T,
90  buff_A, rs_A, cs_A,
91  buff_Z, rs_Z, cs_Z,
92  buff_T, rs_T, cs_T );
93 
94  break;
95  }
96 
97  case FLA_DOUBLE_COMPLEX:
98  {
99  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
100  dcomplex* buff_Z = FLA_DOUBLE_COMPLEX_PTR( Z );
101  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
102 
104  m_T,
105  buff_A, rs_A, cs_A,
106  buff_Z, rs_Z, cs_Z,
107  buff_T, rs_T, cs_T );
108 
109  break;
110  }
111  }
112 
113  return FLA_SUCCESS;
114 }
FLA_Error FLA_Tridiag_UT_l_step_ofc_var3(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Tridiag_UT_l_fus_var3.c:464
FLA_Error FLA_Tridiag_UT_l_step_ofd_var3(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T)
Definition: FLA_Tridiag_UT_l_fus_var3.c:291
FLA_Error FLA_Tridiag_UT_l_step_ofz_var3(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Tridiag_UT_l_fus_var3.c:638
FLA_Error FLA_Tridiag_UT_l_step_ofs_var3(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T)
Definition: FLA_Tridiag_UT_l_fus_var3.c:118

References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Tridiag_UT_l_step_ofc_var3(), FLA_Tridiag_UT_l_step_ofd_var3(), FLA_Tridiag_UT_l_step_ofs_var3(), and FLA_Tridiag_UT_l_step_ofz_var3().

Referenced by FLA_Tridiag_UT_l_blf_var3(), and FLA_Tridiag_UT_l_ofu_var3().

◆ FLA_Tridiag_UT_l_step_ofz_var1()

FLA_Error FLA_Tridiag_UT_l_step_ofz_var1 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)

◆ FLA_Tridiag_UT_l_step_ofz_var2()

FLA_Error FLA_Tridiag_UT_l_step_ofz_var2 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
787 {
788  dcomplex* buff_2 = FLA_DOUBLE_COMPLEX_PTR( FLA_TWO );
789  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
790  dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
791  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
792 
793  dcomplex first_elem;
794  dcomplex beta;
795  dcomplex inv_tau11;
796  dcomplex minus_inv_tau11;
797  dcomplex minus_upsilon11, minus_conj_upsilon11;
798  dcomplex minus_zeta11, minus_conj_zeta11;
799  int i;
800 
801  // b_alg = FLA_Obj_length( T );
802  int b_alg = m_T;
803 
804  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
805  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
806  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
807  dcomplex* buff_u = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
808  dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
809  dcomplex* buff_w = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
810  int inc_u = 1;
811  int inc_z = 1;
812  int inc_w = 1;
813 
814  // Initialize some variables (only to prevent compiler warnings).
815  first_elem = *buff_0;
816  minus_inv_tau11 = *buff_0;
817 
818  for ( i = 0; i < b_alg; ++i )
819  {
820  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
821  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
822  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
823  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
824 
825  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
826  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
827 
828  dcomplex* upsilon11= buff_u + (i )*inc_u;
829  dcomplex* u21 = buff_u + (i+1)*inc_u;
830 
831  dcomplex* zeta11 = buff_z + (i )*inc_z;
832  dcomplex* z21 = buff_z + (i+1)*inc_z;
833 
834  dcomplex* w21 = buff_w + (i+1)*inc_w;
835 
836  dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
837  dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
838 
839  int m_ahead = m_A - i - 1;
840  int m_behind = i;
841  int n_behind = i;
842 
843  /*------------------------------------------------------------*/
844 
845  if ( m_behind > 0 )
846  {
847  // FLA_Copy( upsilon11, minus_upsilon11 );
848  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
849  // FLA_Copy( minus_upsilon11, minus_conj_upsilon11 );
850  bl1_zmult3( buff_m1, upsilon11, &minus_upsilon11 );
851  bl1_zcopyconj( &minus_upsilon11, &minus_conj_upsilon11 );
852 
853  // FLA_Copy( zeta11, minus_zeta11 );
854  // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
855  // FLA_Copy( minus_zeta11, minus_conj_zeta11 );
856  bl1_zmult3( buff_m1, zeta11, &minus_zeta11 );
857  bl1_zcopyconj( &minus_zeta11, &minus_conj_zeta11 );
858 
859  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon11, zeta11, alpha11 );
860  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta11, upsilon11, alpha11 );
862  1,
863  &minus_upsilon11,
864  zeta11, 1,
865  alpha11, 1 );
867  1,
868  &minus_zeta11,
869  upsilon11, 1,
870  alpha11, 1 );
871 
872  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_zeta11, u21, a21 );
873  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon11, z21, a21 );
875  m_ahead,
876  &minus_conj_zeta11,
877  u21, inc_u,
878  a21, rs_A );
880  m_ahead,
881  &minus_conj_upsilon11,
882  z21, inc_z,
883  a21, rs_A );
884  }
885 
886  if ( m_ahead > 0 )
887  {
888  // FLA_Househ2_UT( FLA_LEFT,
889  // a21_t,
890  // a21_b, tau11 );
891  FLA_Househ2_UT_l_opz( m_ahead - 1,
892  a21_t,
893  a21_b, rs_A,
894  tau11 );
895 
896  // FLA_Set( FLA_ONE, inv_tau11 );
897  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
898  // FLA_Copy( inv_tau11, minus_inv_tau11 );
899  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
900  bl1_zdiv3( buff_1, tau11, &inv_tau11 );
901  bl1_zneg2( &inv_tau11, &minus_inv_tau11 );
902 
903  // FLA_Copy( a21_t, first_elem );
904  // FLA_Set( FLA_ONE, a21_t );
905  first_elem = *a21_t;
906  *a21_t = *buff_1;
907  }
908 
909  if ( m_behind > 0 && m_ahead > 0 )
910  {
911  // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
912  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
914  buff_m1,
915  u21, inc_u,
916  z21, inc_z,
917  A22, rs_A, cs_A,
918  a21, rs_A,
919  w21, inc_w );
920  }
921  else if ( m_ahead > 0 )
922  {
923  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
926  m_ahead,
927  buff_1,
928  A22, rs_A, cs_A,
929  a21, rs_A,
930  buff_0,
931  w21, inc_w );
932  }
933 
934  if ( m_ahead > 0 )
935  {
936  // FLA_Copy( a21, u21 );
937  // FLA_Copy( w21, z21 );
939  m_ahead,
940  a21, rs_A,
941  u21, inc_u );
943  m_ahead,
944  w21, inc_w,
945  z21, inc_z );
946 
947  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
948  // FLA_Inv_scal( FLA_TWO, beta );
950  m_ahead,
951  a21, rs_A,
952  z21, inc_z,
953  &beta );
954  bl1_zinvscals( buff_2, &beta );
955 
956  // FLA_Scal( minus_inv_tau11, beta );
957  // FLA_Axpy( beta, a21, z21 );
958  // FLA_Scal( inv_tau11, z21 );
959  bl1_zscals( &minus_inv_tau11, &beta );
961  m_ahead,
962  &beta,
963  a21, rs_A,
964  z21, inc_z );
966  m_ahead,
967  &inv_tau11,
968  z21, inc_z );
969 
970  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
973  m_ahead,
974  n_behind,
975  buff_1,
976  A20, rs_A, cs_A,
977  a21, rs_A,
978  buff_0,
979  t01, rs_T );
980 
981  // FLA_Copy( first_elem, a21_t );
982  *a21_t = first_elem;
983  }
984 
985  if ( m_behind + 1 == b_alg && m_ahead > 0 )
986  {
987  // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
990  m_ahead,
991  buff_m1,
992  u21, inc_u,
993  z21, inc_z,
994  A22, rs_A, cs_A );
995  }
996 
997  /*------------------------------------------------------------*/
998 
999  }
1000 
1001  // FLA_Obj_free( &u );
1002  // FLA_Obj_free( &z );
1003  // FLA_Obj_free( &w );
1004  FLA_free( buff_u );
1005  FLA_free( buff_z );
1006  FLA_free( buff_w );
1007 
1008  return FLA_SUCCESS;
1009 }
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition: FLA_Househ2_UT.c:521
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_copyv.c:63
bl1_zscals(beta, rho_yz)
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_gemv.c:255
void bl1_zhemv(uplo1_t uplo, conj1_t conj, int m, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_hemv.c:134
void bl1_zher2(uplo1_t uplo, conj1_t conj, int m, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_her2.c:121
void bl1_zscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition: bl1_scalv.c:72

References bl1_zaxpyv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zhemv(), bl1_zher2(), bl1_zscals(), bl1_zscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Her2_Ax_l_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_ofu_var2().

◆ FLA_Tridiag_UT_l_step_ofz_var3()

FLA_Error FLA_Tridiag_UT_l_step_ofz_var3 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
643 {
644  dcomplex* buff_2 = FLA_DOUBLE_COMPLEX_PTR( FLA_TWO );
645  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
646  dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
647  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
648 
649  dcomplex first_elem, last_elem;
650  dcomplex beta;
651  dcomplex inv_tau11;
652  dcomplex minus_inv_tau11;
653  int i;
654 
655  // b_alg = FLA_Obj_length( T );
656  int b_alg = m_T;
657 
658  // FLA_Set( FLA_ZERO, Z );
659  bl1_zsetm( m_A,
660  b_alg,
661  buff_0,
662  buff_Z, rs_Z, cs_Z );
663 
664  for ( i = 0; i < b_alg; ++i )
665  {
666  dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
667  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
668  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
669  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
670  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
671 
672  dcomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
673  dcomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
674  dcomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
675 
676  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
677  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
678 
679  dcomplex* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
680 
681  dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
682  dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
683 
684  dcomplex* ABL = a10t;
685  dcomplex* ZBL = z10t;
686 
687  dcomplex* a2 = alpha11;
688 
689  int m_ahead = m_A - i - 1;
690  int m_behind = i;
691  int n_behind = i;
692 
693  /*------------------------------------------------------------*/
694 
695  if ( m_behind > 0 )
696  {
697  // FLA_Copy( a10t_r, last_elem );
698  // FLA_Set( FLA_ONE, a10t_r );
699  last_elem = *a10t_r;
700  *a10t_r = *buff_1;
701  }
702 
703  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, z10t, FLA_ONE, a2 );
704  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
707  m_ahead + 1,
708  n_behind,
709  buff_m1,
710  ABL, rs_A, cs_A,
711  z10t, cs_Z,
712  buff_1,
713  a2, rs_A );
716  m_ahead + 1,
717  n_behind,
718  buff_m1,
719  ZBL, rs_Z, cs_Z,
720  a10t, cs_A,
721  buff_1,
722  a2, rs_A );
723 
724  if ( m_behind > 0 )
725  {
726  // FLA_Copy( last_elem, a10t_r );
727  *a10t_r = last_elem;
728  }
729 
730  if ( m_ahead > 0 )
731  {
732  // FLA_Househ2_UT( FLA_LEFT,
733  // a21_t,
734  // a21_b, tau11 );
735  FLA_Househ2_UT_l_opz( m_ahead - 1,
736  a21_t,
737  a21_b, rs_A,
738  tau11 );
739 
740  // FLA_Set( FLA_ONE, inv_tau11 );
741  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
742  // FLA_Copy( inv_tau11, minus_inv_tau11 );
743  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
744  bl1_zdiv3( buff_1, tau11, &inv_tau11 );
745  bl1_zneg2( &inv_tau11, &minus_inv_tau11 );
746 
747  // FLA_Copy( a21_t, first_elem );
748  // FLA_Set( FLA_ONE, a21_t );
749  first_elem = *a21_t;
750  *a21_t = *buff_1;
751 
752  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
755  m_ahead,
756  buff_1,
757  A22, rs_A, cs_A,
758  a21, rs_A,
759  buff_0,
760  z21, rs_Z );
761 
762  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d01 );
763  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f01 );
764  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f01, FLA_ONE, z21 );
765  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d01, FLA_ONE, z21 );
766  // FLA_Copy( d01, t01 );
768  n_behind,
769  buff_m1,
770  A20, rs_A, cs_A,
771  Z20, rs_Z, cs_Z,
772  t01, rs_T,
773  a21, rs_A,
774  z21, rs_Z );
775 
776  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
777  // FLA_Inv_scal( FLA_TWO, beta );
779  m_ahead,
780  a21, rs_A,
781  z21, rs_Z,
782  &beta );
783  bl1_zinvscals( buff_2, &beta );
784 
785  // FLA_Scal( minus_inv_tau11, beta );
786  // FLA_Axpy( beta, a21, z21 );
787  // FLA_Scal( inv_tau11, z21 );
788  bl1_zscals( &minus_inv_tau11, &beta );
790  m_ahead,
791  &beta,
792  a21, rs_A,
793  z21, rs_Z );
795  m_ahead,
796  &inv_tau11,
797  z21, rs_Z );
798 
799  // FLA_Copy( first_elem, a21_t );
800  *a21_t = first_elem;
801  }
802 
803  /*------------------------------------------------------------*/
804 
805  }
806 
807  return FLA_SUCCESS;
808 }
void bl1_zsetm(int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:78

References bl1_zaxpyv(), bl1_zdot(), bl1_zgemv(), bl1_zhemv(), bl1_zscals(), bl1_zscalv(), bl1_zsetm(), BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_Fused_UZhu_ZUhu_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_ofu_var3().

◆ FLA_Tridiag_UT_l_step_opc_var1()

FLA_Error FLA_Tridiag_UT_l_step_opc_var1 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
363 {
364  scomplex* buff_2 = FLA_COMPLEX_PTR( FLA_TWO );
365  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
366  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
367  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
368 
369  scomplex first_elem;
370  scomplex beta;
371  scomplex inv_tau11;
372  scomplex minus_inv_tau11;
373  int i;
374 
375  // b_alg = FLA_Obj_length( T );
376  int b_alg = m_T;
377 
378  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
379  scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
380  int inc_z = 1;
381 
382  for ( i = 0; i < b_alg; ++i )
383  {
384  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
385  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
386  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
387 
388  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
389  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
390 
391  scomplex* z21 = buff_z + (i+1)*inc_z;
392 
393  scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
394  scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
395 
396  int m_ahead = m_A - i - 1;
397  int n_behind = i;
398 
399  /*------------------------------------------------------------*/
400 
401  if ( m_ahead > 0 )
402  {
403  // FLA_Househ2_UT( FLA_LEFT,
404  // a21_t,
405  // a21_b, tau11 );
406  FLA_Househ2_UT_l_opc( m_ahead - 1,
407  a21_t,
408  a21_b, rs_A,
409  tau11 );
410 
411  // FLA_Set( FLA_ONE, inv_tau11 );
412  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
413  // FLA_Copy( inv_tau11, minus_inv_tau11 );
414  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
415  bl1_cdiv3( buff_1, tau11, &inv_tau11 );
416  bl1_cneg2( &inv_tau11, &minus_inv_tau11 );
417 
418  // FLA_Copy( a21_t, first_elem );
419  // FLA_Set( FLA_ONE, a21_t );
420  first_elem = *a21_t;
421  *a21_t = *buff_1;
422 
423  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
426  m_ahead,
427  buff_1,
428  A22, rs_A, cs_A,
429  a21, rs_A,
430  buff_0,
431  z21, inc_z );
432 
433  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
434  // FLA_Inv_scal( FLA_TWO, beta );
436  m_ahead,
437  a21, rs_A,
438  z21, inc_z,
439  &beta );
440  bl1_cinvscals( buff_2, &beta );
441 
442  // FLA_Scal( minus_inv_tau11, beta );
443  // FLA_Axpy( beta, a21, z21 );
444  // FLA_Scal( inv_tau11, z21 );
445  bl1_cscals( &minus_inv_tau11, &beta );
447  m_ahead,
448  &beta,
449  a21, rs_A,
450  z21, inc_z );
452  m_ahead,
453  &inv_tau11,
454  z21, inc_z );
455 
456  // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, a21, z21, A22 );
459  m_ahead,
460  buff_m1,
461  a21, rs_A,
462  z21, inc_z,
463  A22, rs_A, cs_A );
464 
465  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
468  m_ahead,
469  n_behind,
470  buff_1,
471  A20, rs_A, cs_A,
472  a21, rs_A,
473  buff_0,
474  t01, rs_T );
475 
476  // FLA_Copy( first_elem, a21_t );
477  *a21_t = first_elem;
478  }
479 
480  /*------------------------------------------------------------*/
481 
482  }
483 
484  // FLA_Obj_free( &z );
485  FLA_free( buff_z );
486 
487  return FLA_SUCCESS;
488 }

References bl1_caxpyv(), bl1_cdot(), bl1_cgemv(), bl1_chemv(), bl1_cher2(), bl1_cscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_opt_var1().

◆ FLA_Tridiag_UT_l_step_opc_var2()

FLA_Error FLA_Tridiag_UT_l_step_opc_var2 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
549 {
550  scomplex* buff_2 = FLA_COMPLEX_PTR( FLA_TWO );
551  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
552  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
553  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
554 
555  scomplex first_elem;
556  scomplex beta;
557  scomplex inv_tau11;
558  scomplex minus_inv_tau11;
559  scomplex minus_upsilon11, minus_conj_upsilon11;
560  scomplex minus_zeta11, minus_conj_zeta11;
561  int i;
562 
563  // b_alg = FLA_Obj_length( T );
564  int b_alg = m_T;
565 
566  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
567  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
568  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
569  scomplex* buff_u = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
570  scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
571  scomplex* buff_w = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
572  int inc_u = 1;
573  int inc_z = 1;
574  int inc_w = 1;
575 
576  // Initialize some variables (only to prevent compiler warnings).
577  first_elem = *buff_0;
578  minus_inv_tau11 = *buff_0;
579 
580  for ( i = 0; i < b_alg; ++i )
581  {
582  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
583  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
584  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
585  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
586 
587  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
588  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
589 
590  scomplex* upsilon11= buff_u + (i )*inc_u;
591  scomplex* u21 = buff_u + (i+1)*inc_u;
592 
593  scomplex* zeta11 = buff_z + (i )*inc_z;
594  scomplex* z21 = buff_z + (i+1)*inc_z;
595 
596  scomplex* w21 = buff_w + (i+1)*inc_w;
597 
598  scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
599  scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
600 
601  int m_ahead = m_A - i - 1;
602  int m_behind = i;
603  int n_behind = i;
604 
605  /*------------------------------------------------------------*/
606 
607  if ( m_behind > 0 )
608  {
609  // FLA_Copy( upsilon11, minus_upsilon11 );
610  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
611  // FLA_Copy( minus_upsilon11, minus_conj_upsilon11 );
612  bl1_cmult3( buff_m1, upsilon11, &minus_upsilon11 );
613  bl1_ccopyconj( &minus_upsilon11, &minus_conj_upsilon11 );
614 
615  // FLA_Copy( zeta11, minus_zeta11 );
616  // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
617  // FLA_Copy( minus_zeta11, minus_conj_zeta11 );
618  bl1_cmult3( buff_m1, zeta11, &minus_zeta11 );
619  bl1_ccopyconj( &minus_zeta11, &minus_conj_zeta11 );
620 
621  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon11, zeta11, alpha11 );
622  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta11, upsilon11, alpha11 );
624  1,
625  &minus_upsilon11,
626  zeta11, 1,
627  alpha11, 1 );
629  1,
630  &minus_zeta11,
631  upsilon11, 1,
632  alpha11, 1 );
633 
634  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_zeta11, u21, a21 );
635  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon11, z21, a21 );
637  m_ahead,
638  &minus_conj_zeta11,
639  u21, inc_u,
640  a21, rs_A );
642  m_ahead,
643  &minus_conj_upsilon11,
644  z21, inc_z,
645  a21, rs_A );
646  }
647 
648  if ( m_ahead > 0 )
649  {
650  // FLA_Househ2_UT( FLA_LEFT,
651  // a21_t,
652  // a21_b, tau11 );
653  FLA_Househ2_UT_l_opc( m_ahead - 1,
654  a21_t,
655  a21_b, rs_A,
656  tau11 );
657 
658  // FLA_Set( FLA_ONE, inv_tau11 );
659  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
660  // FLA_Copy( inv_tau11, minus_inv_tau11 );
661  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
662  bl1_cdiv3( buff_1, tau11, &inv_tau11 );
663  bl1_cneg2( &inv_tau11, &minus_inv_tau11 );
664 
665  // FLA_Copy( a21_t, first_elem );
666  // FLA_Set( FLA_ONE, a21_t );
667  first_elem = *a21_t;
668  *a21_t = *buff_1;
669  }
670 
671  if ( m_behind > 0 )
672  {
673  // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
676  m_ahead,
677  buff_m1,
678  u21, inc_u,
679  z21, inc_z,
680  A22, rs_A, cs_A );
681  }
682 
683  if ( m_ahead > 0 )
684  {
685  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
688  m_ahead,
689  buff_1,
690  A22, rs_A, cs_A,
691  a21, rs_A,
692  buff_0,
693  w21, inc_w );
694 
695  // FLA_Copy( a21, u21 );
696  // FLA_Copy( w21, z21 );
698  m_ahead,
699  a21, rs_A,
700  u21, inc_u );
702  m_ahead,
703  w21, inc_w,
704  z21, inc_z );
705 
706  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
707  // FLA_Inv_scal( FLA_TWO, beta );
709  m_ahead,
710  a21, rs_A,
711  z21, inc_z,
712  &beta );
713  bl1_cinvscals( buff_2, &beta );
714 
715  // FLA_Scal( minus_inv_tau11, beta );
716  // FLA_Axpy( beta, a21, z21 );
717  // FLA_Scal( inv_tau11, z21 );
718  bl1_cscals( &minus_inv_tau11, &beta );
720  m_ahead,
721  &beta,
722  a21, rs_A,
723  z21, inc_z );
725  m_ahead,
726  &inv_tau11,
727  z21, inc_z );
728 
729  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
732  m_ahead,
733  n_behind,
734  buff_1,
735  A20, rs_A, cs_A,
736  a21, rs_A,
737  buff_0,
738  t01, rs_T );
739 
740  // FLA_Copy( first_elem, a21_t );
741  *a21_t = first_elem;
742  }
743 
744  if ( m_behind + 1 == b_alg && m_ahead > 0 )
745  {
746  // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
749  m_ahead,
750  buff_m1,
751  u21, inc_u,
752  z21, inc_z,
753  A22, rs_A, cs_A );
754  }
755 
756  /*------------------------------------------------------------*/
757 
758  }
759 
760  // FLA_Obj_free( &u );
761  // FLA_Obj_free( &z );
762  // FLA_Obj_free( &w );
763  FLA_free( buff_u );
764  FLA_free( buff_z );
765  FLA_free( buff_w );
766 
767  return FLA_SUCCESS;
768 }

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_chemv(), bl1_cher2(), bl1_cscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_opt_var2().

◆ FLA_Tridiag_UT_l_step_opc_var3()

FLA_Error FLA_Tridiag_UT_l_step_opc_var3 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
569 {
570  scomplex* buff_2 = FLA_COMPLEX_PTR( FLA_TWO );
571  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
572  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
573  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
574 
575  scomplex first_elem, last_elem;
576  scomplex beta;
577  scomplex inv_tau11;
578  scomplex minus_inv_tau11;
579  int i;
580 
581  // b_alg = FLA_Obj_length( T );
582  int b_alg = m_T;
583 
584  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
585  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
586  scomplex* buff_d = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
587  scomplex* buff_f = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
588  int inc_d = 1;
589  int inc_f = 1;
590 
591  // FLA_Set( FLA_ZERO, Z );
592  bl1_csetm( m_A,
593  b_alg,
594  buff_0,
595  buff_Z, rs_Z, cs_Z );
596 
597  for ( i = 0; i < b_alg; ++i )
598  {
599  scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
600  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
601  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
602  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
603  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
604 
605  scomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
606  scomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
607  scomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
608 
609  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
610  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
611 
612  scomplex* d01 = buff_d + (0 )*inc_d;
613 
614  scomplex* f01 = buff_f + (0 )*inc_f;
615 
616  scomplex* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
617 
618  scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
619  scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
620 
621  scomplex* ABL = a10t;
622  scomplex* ZBL = z10t;
623 
624  scomplex* a2 = alpha11;
625 
626  int m_ahead = m_A - i - 1;
627  int m_behind = i;
628  int n_behind = i;
629 
630  /*------------------------------------------------------------*/
631 
632  if ( m_behind > 0 )
633  {
634  // FLA_Copy( a10t_r, last_elem );
635  // FLA_Set( FLA_ONE, a10t_r );
636  last_elem = *a10t_r;
637  *a10t_r = *buff_1;
638  }
639 
640  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, z10t, FLA_ONE, a2 );
641  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
644  m_ahead + 1,
645  n_behind,
646  buff_m1,
647  ABL, rs_A, cs_A,
648  z10t, cs_Z,
649  buff_1,
650  a2, rs_A );
653  m_ahead + 1,
654  n_behind,
655  buff_m1,
656  ZBL, rs_Z, cs_Z,
657  a10t, cs_A,
658  buff_1,
659  a2, rs_A );
660 
661  if ( m_behind > 0 )
662  {
663  // FLA_Copy( last_elem, a10t_r );
664  *a10t_r = last_elem;
665  }
666 
667  if ( m_ahead > 0 )
668  {
669  // FLA_Househ2_UT( FLA_LEFT,
670  // a21_t,
671  // a21_b, tau11 );
672  FLA_Househ2_UT_l_opc( m_ahead - 1,
673  a21_t,
674  a21_b, rs_A,
675  tau11 );
676 
677  // FLA_Set( FLA_ONE, inv_tau11 );
678  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
679  // FLA_Copy( inv_tau11, minus_inv_tau11 );
680  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
681  bl1_cdiv3( buff_1, tau11, &inv_tau11 );
682  bl1_cneg2( &inv_tau11, &minus_inv_tau11 );
683 
684  // FLA_Copy( a21_t, first_elem );
685  // FLA_Set( FLA_ONE, a21_t );
686  first_elem = *a21_t;
687  *a21_t = *buff_1;
688 
689  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
692  m_ahead,
693  buff_1,
694  A22, rs_A, cs_A,
695  a21, rs_A,
696  buff_0,
697  z21, rs_Z );
698 
699  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d01 );
700  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f01 );
703  m_ahead,
704  n_behind,
705  buff_1,
706  A20, rs_A, cs_A,
707  a21, rs_A,
708  buff_0,
709  d01, inc_d );
712  m_ahead,
713  n_behind,
714  buff_1,
715  Z20, rs_Z, cs_Z,
716  a21, rs_A,
717  buff_0,
718  f01, inc_f );
719 
720  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f01, FLA_ONE, z21 );
721  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d01, FLA_ONE, z21 );
724  m_ahead,
725  n_behind,
726  buff_m1,
727  A20, rs_A, cs_A,
728  f01, inc_f,
729  buff_1,
730  z21, rs_Z );
733  m_ahead,
734  n_behind,
735  buff_m1,
736  Z20, rs_Z, cs_Z,
737  d01, inc_d,
738  buff_1,
739  z21, rs_Z );
740 
741  // FLA_Copy( d01, t01 );
743  n_behind,
744  d01, inc_d,
745  t01, rs_T );
746 
747  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
748  // FLA_Inv_scal( FLA_TWO, beta );
750  m_ahead,
751  a21, rs_A,
752  z21, rs_Z,
753  &beta );
754  bl1_cinvscals( buff_2, &beta );
755 
756  // FLA_Scal( minus_inv_tau11, beta );
757  // FLA_Axpy( beta, a21, z21 );
758  // FLA_Scal( inv_tau11, z21 );
759  bl1_cscals( &minus_inv_tau11, &beta );
761  m_ahead,
762  &beta,
763  a21, rs_A,
764  z21, rs_Z );
766  m_ahead,
767  &inv_tau11,
768  z21, rs_Z );
769 
770  // FLA_Copy( first_elem, a21_t );
771  *a21_t = first_elem;
772  }
773 
774  /*------------------------------------------------------------*/
775 
776  }
777 
778  // FLA_Obj_free( &d );
779  // FLA_Obj_free( &f );
780  FLA_free( buff_d );
781  FLA_free( buff_f );
782 
783  return FLA_SUCCESS;
784 }

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_chemv(), bl1_cscalv(), bl1_csetm(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_opt_var3().

◆ FLA_Tridiag_UT_l_step_opd_var1()

FLA_Error FLA_Tridiag_UT_l_step_opd_var1 ( int  m_A,
int  m_T,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_T,
int  rs_T,
int  cs_T 
)
232 {
233  double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
234  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
235  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
236  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
237 
238  double first_elem;
239  double beta;
240  double inv_tau11;
241  double minus_inv_tau11;
242  int i;
243 
244  // b_alg = FLA_Obj_length( T );
245  int b_alg = m_T;
246 
247  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
248  double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
249  int inc_z = 1;
250 
251  for ( i = 0; i < b_alg; ++i )
252  {
253  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
254  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
255  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
256 
257  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
258  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
259 
260  double* z21 = buff_z + (i+1)*inc_z;
261 
262  double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
263  double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
264 
265  int m_ahead = m_A - i - 1;
266  int n_behind = i;
267 
268  /*------------------------------------------------------------*/
269 
270  if ( m_ahead > 0 )
271  {
272  // FLA_Househ2_UT( FLA_LEFT,
273  // a21_t,
274  // a21_b, tau11 );
275  FLA_Househ2_UT_l_opd( m_ahead - 1,
276  a21_t,
277  a21_b, rs_A,
278  tau11 );
279 
280  // FLA_Set( FLA_ONE, inv_tau11 );
281  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
282  // FLA_Copy( inv_tau11, minus_inv_tau11 );
283  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
284  bl1_ddiv3( buff_1, tau11, &inv_tau11 );
285  bl1_dneg2( &inv_tau11, &minus_inv_tau11 );
286 
287  // FLA_Copy( a21_t, first_elem );
288  // FLA_Set( FLA_ONE, a21_t );
289  first_elem = *a21_t;
290  *a21_t = *buff_1;
291 
292  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
294  m_ahead,
295  buff_1,
296  A22, rs_A, cs_A,
297  a21, rs_A,
298  buff_0,
299  z21, inc_z );
300 
301  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
302  // FLA_Inv_scal( FLA_TWO, beta );
304  m_ahead,
305  a21, rs_A,
306  z21, inc_z,
307  &beta );
308  bl1_dinvscals( buff_2, &beta );
309 
310  // FLA_Scal( minus_inv_tau11, beta );
311  // FLA_Axpy( beta, a21, z21 );
312  // FLA_Scal( inv_tau11, z21 );
313  bl1_dscals( &minus_inv_tau11, &beta );
315  m_ahead,
316  &beta,
317  a21, rs_A,
318  z21, inc_z );
320  m_ahead,
321  &inv_tau11,
322  z21, inc_z );
323 
324  // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, a21, z21, A22 );
326  m_ahead,
327  buff_m1,
328  a21, rs_A,
329  z21, inc_z,
330  A22, rs_A, cs_A );
331 
332  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
335  m_ahead,
336  n_behind,
337  buff_1,
338  A20, rs_A, cs_A,
339  a21, rs_A,
340  buff_0,
341  t01, rs_T );
342 
343  // FLA_Copy( first_elem, a21_t );
344  *a21_t = first_elem;
345  }
346 
347  /*------------------------------------------------------------*/
348 
349  }
350 
351  // FLA_Obj_free( &z );
352  FLA_free( buff_z );
353 
354  return FLA_SUCCESS;
355 }

References bl1_daxpyv(), bl1_ddot(), bl1_dgemv(), bl1_dscalv(), bl1_dsymv(), bl1_dsyr2(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_opt_var1().

◆ FLA_Tridiag_UT_l_step_opd_var2()

FLA_Error FLA_Tridiag_UT_l_step_opd_var2 ( int  m_A,
int  m_T,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_T,
int  rs_T,
int  cs_T 
)
325 {
326  double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
327  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
328  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
329  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
330 
331  double first_elem;
332  double beta;
333  double inv_tau11;
334  double minus_inv_tau11;
335  double minus_upsilon11, minus_conj_upsilon11;
336  double minus_zeta11, minus_conj_zeta11;
337  int i;
338 
339  // b_alg = FLA_Obj_length( T );
340  int b_alg = m_T;
341 
342  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
343  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
344  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
345  double* buff_u = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
346  double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
347  double* buff_w = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
348  int inc_u = 1;
349  int inc_z = 1;
350  int inc_w = 1;
351 
352  // Initialize some variables (only to prevent compiler warnings).
353  first_elem = *buff_0;
354  minus_inv_tau11 = *buff_0;
355 
356  for ( i = 0; i < b_alg; ++i )
357  {
358  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
359  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
360  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
361  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
362 
363  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
364  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
365 
366  double* upsilon11= buff_u + (i )*inc_u;
367  double* u21 = buff_u + (i+1)*inc_u;
368 
369  double* zeta11 = buff_z + (i )*inc_z;
370  double* z21 = buff_z + (i+1)*inc_z;
371 
372  double* w21 = buff_w + (i+1)*inc_w;
373 
374  double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
375  double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
376 
377  int m_ahead = m_A - i - 1;
378  int m_behind = i;
379  int n_behind = i;
380 
381  /*------------------------------------------------------------*/
382 
383  if ( m_behind > 0 )
384  {
385  // FLA_Copy( upsilon11, minus_upsilon11 );
386  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
387  // FLA_Copy( minus_upsilon11, minus_conj_upsilon11 );
388  bl1_dmult3( buff_m1, upsilon11, &minus_upsilon11 );
389  bl1_dcopyconj( &minus_upsilon11, &minus_conj_upsilon11 );
390 
391  // FLA_Copy( zeta11, minus_zeta11 );
392  // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
393  // FLA_Copy( minus_zeta11, minus_conj_zeta11 );
394  bl1_dmult3( buff_m1, zeta11, &minus_zeta11 );
395  bl1_dcopyconj( &minus_zeta11, &minus_conj_zeta11 );
396 
397  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon11, zeta11, alpha11 );
398  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta11, upsilon11, alpha11 );
400  1,
401  &minus_upsilon11,
402  zeta11, 1,
403  alpha11, 1 );
405  1,
406  &minus_zeta11,
407  upsilon11, 1,
408  alpha11, 1 );
409 
410  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_zeta11, u21, a21 );
411  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon11, z21, a21 );
413  m_ahead,
414  &minus_conj_zeta11,
415  u21, inc_u,
416  a21, rs_A );
418  m_ahead,
419  &minus_conj_upsilon11,
420  z21, inc_z,
421  a21, rs_A );
422  }
423 
424  if ( m_ahead > 0 )
425  {
426  // FLA_Househ2_UT( FLA_LEFT,
427  // a21_t,
428  // a21_b, tau11 );
429  FLA_Househ2_UT_l_opd( m_ahead - 1,
430  a21_t,
431  a21_b, rs_A,
432  tau11 );
433 
434  // FLA_Set( FLA_ONE, inv_tau11 );
435  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
436  // FLA_Copy( inv_tau11, minus_inv_tau11 );
437  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
438  bl1_ddiv3( buff_1, tau11, &inv_tau11 );
439  bl1_dneg2( &inv_tau11, &minus_inv_tau11 );
440 
441  // FLA_Copy( a21_t, first_elem );
442  // FLA_Set( FLA_ONE, a21_t );
443  first_elem = *a21_t;
444  *a21_t = *buff_1;
445  }
446 
447  if ( m_behind > 0 )
448  {
449  // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
451  m_ahead,
452  buff_m1,
453  u21, inc_u,
454  z21, inc_z,
455  A22, rs_A, cs_A );
456  }
457 
458  if ( m_ahead > 0 )
459  {
460  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
462  m_ahead,
463  buff_1,
464  A22, rs_A, cs_A,
465  a21, rs_A,
466  buff_0,
467  w21, inc_w );
468 
469  // FLA_Copy( a21, u21 );
470  // FLA_Copy( w21, z21 );
472  m_ahead,
473  a21, rs_A,
474  u21, inc_u );
476  m_ahead,
477  w21, inc_w,
478  z21, inc_z );
479 
480  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
481  // FLA_Inv_scal( FLA_TWO, beta );
483  m_ahead,
484  a21, rs_A,
485  z21, inc_z,
486  &beta );
487  bl1_dinvscals( buff_2, &beta );
488 
489  // FLA_Scal( minus_inv_tau11, beta );
490  // FLA_Axpy( beta, a21, z21 );
491  // FLA_Scal( inv_tau11, z21 );
492  bl1_dscals( &minus_inv_tau11, &beta );
494  m_ahead,
495  &beta,
496  a21, rs_A,
497  z21, inc_z );
499  m_ahead,
500  &inv_tau11,
501  z21, inc_z );
502 
503  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
506  m_ahead,
507  n_behind,
508  buff_1,
509  A20, rs_A, cs_A,
510  a21, rs_A,
511  buff_0,
512  t01, rs_T );
513 
514  // FLA_Copy( first_elem, a21_t );
515  *a21_t = first_elem;
516  }
517 
518  if ( m_behind + 1 == b_alg && m_ahead > 0 )
519  {
520  // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
522  m_ahead,
523  buff_m1,
524  u21, inc_u,
525  z21, inc_z,
526  A22, rs_A, cs_A );
527  }
528 
529  /*------------------------------------------------------------*/
530 
531  }
532 
533  // FLA_Obj_free( &u );
534  // FLA_Obj_free( &z );
535  // FLA_Obj_free( &w );
536  FLA_free( buff_u );
537  FLA_free( buff_z );
538  FLA_free( buff_w );
539 
540  return FLA_SUCCESS;
541 }

References bl1_daxpyv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dscalv(), bl1_dsymv(), bl1_dsyr2(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_opt_var2().

◆ FLA_Tridiag_UT_l_step_opd_var3()

FLA_Error FLA_Tridiag_UT_l_step_opd_var3 ( int  m_A,
int  m_T,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_Z,
int  rs_Z,
int  cs_Z,
double *  buff_T,
int  rs_T,
int  cs_T 
)
346 {
347  double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
348  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
349  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
350  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
351 
352  double first_elem, last_elem;
353  double beta;
354  double inv_tau11;
355  double minus_inv_tau11;
356  int i;
357 
358  // b_alg = FLA_Obj_length( T );
359  int b_alg = m_T;
360 
361  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
362  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
363  double* buff_d = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
364  double* buff_f = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
365  int inc_d = 1;
366  int inc_f = 1;
367 
368  // FLA_Set( FLA_ZERO, Z );
369  bl1_dsetm( m_A,
370  b_alg,
371  buff_0,
372  buff_Z, rs_Z, cs_Z );
373 
374  for ( i = 0; i < b_alg; ++i )
375  {
376  double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
377  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
378  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
379  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
380  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
381 
382  double* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
383  double* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
384  double* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
385 
386  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
387  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
388 
389  double* d01 = buff_d + (0 )*inc_d;
390 
391  double* f01 = buff_f + (0 )*inc_f;
392 
393  double* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
394 
395  double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
396  double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
397 
398  double* ABL = a10t;
399  double* ZBL = z10t;
400 
401  double* a2 = alpha11;
402 
403  int m_ahead = m_A - i - 1;
404  int m_behind = i;
405  int n_behind = i;
406 
407  /*------------------------------------------------------------*/
408 
409  if ( m_behind > 0 )
410  {
411  // FLA_Copy( a10t_r, last_elem );
412  // FLA_Set( FLA_ONE, a10t_r );
413  last_elem = *a10t_r;
414  *a10t_r = *buff_1;
415  }
416 
417  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, z10t, FLA_ONE, a2 );
418  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
421  m_ahead + 1,
422  n_behind,
423  buff_m1,
424  ABL, rs_A, cs_A,
425  z10t, cs_Z,
426  buff_1,
427  a2, rs_A );
430  m_ahead + 1,
431  n_behind,
432  buff_m1,
433  ZBL, rs_Z, cs_Z,
434  a10t, cs_A,
435  buff_1,
436  a2, rs_A );
437 
438  if ( m_behind > 0 )
439  {
440  // FLA_Copy( last_elem, a10t_r );
441  *a10t_r = last_elem;
442  }
443 
444  if ( m_ahead > 0 )
445  {
446  // FLA_Househ2_UT( FLA_LEFT,
447  // a21_t,
448  // a21_b, tau11 );
449  FLA_Househ2_UT_l_opd( m_ahead - 1,
450  a21_t,
451  a21_b, rs_A,
452  tau11 );
453 
454  // FLA_Set( FLA_ONE, inv_tau11 );
455  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
456  // FLA_Copy( inv_tau11, minus_inv_tau11 );
457  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
458  bl1_ddiv3( buff_1, tau11, &inv_tau11 );
459  bl1_dneg2( &inv_tau11, &minus_inv_tau11 );
460 
461  // FLA_Copy( a21_t, first_elem );
462  // FLA_Set( FLA_ONE, a21_t );
463  first_elem = *a21_t;
464  *a21_t = *buff_1;
465 
466  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
468  m_ahead,
469  buff_1,
470  A22, rs_A, cs_A,
471  a21, rs_A,
472  buff_0,
473  z21, rs_Z );
474 
475  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d01 );
476  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f01 );
479  m_ahead,
480  n_behind,
481  buff_1,
482  A20, rs_A, cs_A,
483  a21, rs_A,
484  buff_0,
485  d01, inc_d );
488  m_ahead,
489  n_behind,
490  buff_1,
491  Z20, rs_Z, cs_Z,
492  a21, rs_A,
493  buff_0,
494  f01, inc_f );
495 
496  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f01, FLA_ONE, z21 );
497  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d01, FLA_ONE, z21 );
500  m_ahead,
501  n_behind,
502  buff_m1,
503  A20, rs_A, cs_A,
504  f01, inc_f,
505  buff_1,
506  z21, rs_Z );
509  m_ahead,
510  n_behind,
511  buff_m1,
512  Z20, rs_Z, cs_Z,
513  d01, inc_d,
514  buff_1,
515  z21, rs_Z );
516 
517  // FLA_Copy( d01, t01 );
519  n_behind,
520  d01, inc_d,
521  t01, rs_T );
522 
523  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
524  // FLA_Inv_scal( FLA_TWO, beta );
526  m_ahead,
527  a21, rs_A,
528  z21, rs_Z,
529  &beta );
530  bl1_dinvscals( buff_2, &beta );
531 
532  // FLA_Scal( minus_inv_tau11, beta );
533  // FLA_Axpy( beta, a21, z21 );
534  // FLA_Scal( inv_tau11, z21 );
535  bl1_dscals( &minus_inv_tau11, &beta );
537  m_ahead,
538  &beta,
539  a21, rs_A,
540  z21, rs_Z );
542  m_ahead,
543  &inv_tau11,
544  z21, rs_Z );
545 
546  // FLA_Copy( first_elem, a21_t );
547  *a21_t = first_elem;
548  }
549 
550  /*------------------------------------------------------------*/
551 
552  }
553 
554  // FLA_Obj_free( &d );
555  // FLA_Obj_free( &f );
556  FLA_free( buff_d );
557  FLA_free( buff_f );
558 
559  return FLA_SUCCESS;
560 }

References bl1_daxpyv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dscalv(), bl1_dsetm(), bl1_dsymv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_opt_var3().

◆ FLA_Tridiag_UT_l_step_ops_var1()

FLA_Error FLA_Tridiag_UT_l_step_ops_var1 ( int  m_A,
int  m_T,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_T,
int  rs_T,
int  cs_T 
)
101 {
102  float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
103  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
104  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
105  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
106 
107  float first_elem;
108  float beta;
109  float inv_tau11;
110  float minus_inv_tau11;
111  int i;
112 
113  // b_alg = FLA_Obj_length( T );
114  int b_alg = m_T;
115 
116  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
117  float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
118  int inc_z = 1;
119 
120  for ( i = 0; i < b_alg; ++i )
121  {
122  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
123  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
124  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
125 
126  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
127  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
128 
129  float* z21 = buff_z + (i+1)*inc_z;
130 
131  float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
132  float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
133 
134  int m_ahead = m_A - i - 1;
135  int n_behind = i;
136 
137  /*------------------------------------------------------------*/
138 
139  if ( m_ahead > 0 )
140  {
141  // FLA_Househ2_UT( FLA_LEFT,
142  // a21_t,
143  // a21_b, tau11 );
144  FLA_Househ2_UT_l_ops( m_ahead - 1,
145  a21_t,
146  a21_b, rs_A,
147  tau11 );
148 
149  // FLA_Set( FLA_ONE, inv_tau11 );
150  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
151  // FLA_Copy( inv_tau11, minus_inv_tau11 );
152  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
153  bl1_sdiv3( buff_1, tau11, &inv_tau11 );
154  bl1_sneg2( &inv_tau11, &minus_inv_tau11 );
155 
156  // FLA_Copy( a21_t, first_elem );
157  // FLA_Set( FLA_ONE, a21_t );
158  first_elem = *a21_t;
159  *a21_t = *buff_1;
160 
161  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
163  m_ahead,
164  buff_1,
165  A22, rs_A, cs_A,
166  a21, rs_A,
167  buff_0,
168  z21, inc_z );
169 
170  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
171  // FLA_Inv_scal( FLA_TWO, beta );
173  m_ahead,
174  a21, rs_A,
175  z21, inc_z,
176  &beta );
177  bl1_sinvscals( buff_2, &beta );
178 
179  // FLA_Scal( minus_inv_tau11, beta );
180  // FLA_Axpy( beta, a21, z21 );
181  // FLA_Scal( inv_tau11, z21 );
182  bl1_sscals( &minus_inv_tau11, &beta );
184  m_ahead,
185  &beta,
186  a21, rs_A,
187  z21, inc_z );
189  m_ahead,
190  &inv_tau11,
191  z21, inc_z );
192 
193  // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, a21, z21, A22 );
195  m_ahead,
196  buff_m1,
197  a21, rs_A,
198  z21, inc_z,
199  A22, rs_A, cs_A );
200 
201  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
204  m_ahead,
205  n_behind,
206  buff_1,
207  A20, rs_A, cs_A,
208  a21, rs_A,
209  buff_0,
210  t01, rs_T );
211 
212  // FLA_Copy( first_elem, a21_t );
213  *a21_t = first_elem;
214  }
215 
216  /*------------------------------------------------------------*/
217 
218  }
219 
220  // FLA_Obj_free( &z );
221  FLA_free( buff_z );
222 
223  return FLA_SUCCESS;
224 }

References bl1_saxpyv(), bl1_sdot(), bl1_sgemv(), bl1_sscalv(), bl1_ssymv(), bl1_ssyr2(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_opt_var1().

◆ FLA_Tridiag_UT_l_step_ops_var2()

FLA_Error FLA_Tridiag_UT_l_step_ops_var2 ( int  m_A,
int  m_T,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_T,
int  rs_T,
int  cs_T 
)
101 {
102  float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
103  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
104  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
105  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
106 
107  float first_elem;
108  float beta;
109  float inv_tau11;
110  float minus_inv_tau11;
111  float minus_upsilon11, minus_conj_upsilon11;
112  float minus_zeta11, minus_conj_zeta11;
113  int i;
114 
115  // b_alg = FLA_Obj_length( T );
116  int b_alg = m_T;
117 
118  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
119  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
120  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
121  float* buff_u = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
122  float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
123  float* buff_w = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
124  int inc_u = 1;
125  int inc_z = 1;
126  int inc_w = 1;
127 
128  // Initialize some variables (only to prevent compiler warnings).
129  first_elem = *buff_0;
130  minus_inv_tau11 = *buff_0;
131 
132  for ( i = 0; i < b_alg; ++i )
133  {
134  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
135  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
136  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
137  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
138 
139  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
140  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
141 
142  float* upsilon11= buff_u + (i )*inc_u;
143  float* u21 = buff_u + (i+1)*inc_u;
144 
145  float* zeta11 = buff_z + (i )*inc_z;
146  float* z21 = buff_z + (i+1)*inc_z;
147 
148  float* w21 = buff_w + (i+1)*inc_w;
149 
150  float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
151  float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
152 
153  int m_ahead = m_A - i - 1;
154  int m_behind = i;
155  int n_behind = i;
156 
157  /*------------------------------------------------------------*/
158 
159  if ( m_behind > 0 )
160  {
161  // FLA_Copy( upsilon11, minus_upsilon11 );
162  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
163  // FLA_Copy( minus_upsilon11, minus_conj_upsilon11 );
164  bl1_smult3( buff_m1, upsilon11, &minus_upsilon11 );
165  bl1_scopyconj( &minus_upsilon11, &minus_conj_upsilon11 );
166 
167  // FLA_Copy( zeta11, minus_zeta11 );
168  // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
169  // FLA_Copy( minus_zeta11, minus_conj_zeta11 );
170  bl1_smult3( buff_m1, zeta11, &minus_zeta11 );
171  bl1_scopyconj( &minus_zeta11, &minus_conj_zeta11 );
172 
173  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon11, zeta11, alpha11 );
174  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta11, upsilon11, alpha11 );
176  1,
177  &minus_upsilon11,
178  zeta11, 1,
179  alpha11, 1 );
181  1,
182  &minus_zeta11,
183  upsilon11, 1,
184  alpha11, 1 );
185 
186  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_zeta11, u21, a21 );
187  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon11, z21, a21 );
189  m_ahead,
190  &minus_conj_zeta11,
191  u21, inc_u,
192  a21, rs_A );
194  m_ahead,
195  &minus_conj_upsilon11,
196  z21, inc_z,
197  a21, rs_A );
198  }
199 
200  if ( m_ahead > 0 )
201  {
202  // FLA_Househ2_UT( FLA_LEFT,
203  // a21_t,
204  // a21_b, tau11 );
205  FLA_Househ2_UT_l_ops( m_ahead - 1,
206  a21_t,
207  a21_b, rs_A,
208  tau11 );
209 
210  // FLA_Set( FLA_ONE, inv_tau11 );
211  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
212  // FLA_Copy( inv_tau11, minus_inv_tau11 );
213  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
214  bl1_sdiv3( buff_1, tau11, &inv_tau11 );
215  bl1_sneg2( &inv_tau11, &minus_inv_tau11 );
216 
217  // FLA_Copy( a21_t, first_elem );
218  // FLA_Set( FLA_ONE, a21_t );
219  first_elem = *a21_t;
220  *a21_t = *buff_1;
221  }
222 
223  if ( m_behind > 0 )
224  {
225  // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
227  m_ahead,
228  buff_m1,
229  u21, inc_u,
230  z21, inc_z,
231  A22, rs_A, cs_A );
232  }
233 
234  if ( m_ahead > 0 )
235  {
236  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
238  m_ahead,
239  buff_1,
240  A22, rs_A, cs_A,
241  a21, rs_A,
242  buff_0,
243  w21, inc_w );
244 
245  // FLA_Copy( a21, u21 );
246  // FLA_Copy( w21, z21 );
248  m_ahead,
249  a21, rs_A,
250  u21, inc_u );
252  m_ahead,
253  w21, inc_w,
254  z21, inc_z );
255 
256  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
257  // FLA_Inv_scal( FLA_TWO, beta );
259  m_ahead,
260  a21, rs_A,
261  z21, inc_z,
262  &beta );
263  bl1_sinvscals( buff_2, &beta );
264 
265  // FLA_Scal( minus_inv_tau11, beta );
266  // FLA_Axpy( beta, a21, z21 );
267  // FLA_Scal( inv_tau11, z21 );
268  bl1_sscals( &minus_inv_tau11, &beta );
270  m_ahead,
271  &beta,
272  a21, rs_A,
273  z21, inc_z );
275  m_ahead,
276  &inv_tau11,
277  z21, inc_z );
278 
279  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
282  m_ahead,
283  n_behind,
284  buff_1,
285  A20, rs_A, cs_A,
286  a21, rs_A,
287  buff_0,
288  t01, rs_T );
289 
290  // FLA_Copy( first_elem, a21_t );
291  *a21_t = first_elem;
292  }
293 
294  if ( m_behind + 1 == b_alg && m_ahead > 0 )
295  {
296  // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
298  m_ahead,
299  buff_m1,
300  u21, inc_u,
301  z21, inc_z,
302  A22, rs_A, cs_A );
303  }
304 
305  /*------------------------------------------------------------*/
306 
307  }
308 
309  // FLA_Obj_free( &u );
310  // FLA_Obj_free( &z );
311  // FLA_Obj_free( &w );
312  FLA_free( buff_u );
313  FLA_free( buff_z );
314  FLA_free( buff_w );
315 
316  return FLA_SUCCESS;
317 }

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sscalv(), bl1_ssymv(), bl1_ssyr2(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_opt_var2().

◆ FLA_Tridiag_UT_l_step_ops_var3()

FLA_Error FLA_Tridiag_UT_l_step_ops_var3 ( int  m_A,
int  m_T,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_Z,
int  rs_Z,
int  cs_Z,
float *  buff_T,
int  rs_T,
int  cs_T 
)
123 {
124  float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
125  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
126  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
127  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
128 
129  float first_elem, last_elem;
130  float beta;
131  float inv_tau11;
132  float minus_inv_tau11;
133  int i;
134 
135  // b_alg = FLA_Obj_length( T );
136  int b_alg = m_T;
137 
138  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
139  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
140  float* buff_d = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
141  float* buff_f = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
142  int inc_d = 1;
143  int inc_f = 1;
144 
145  // FLA_Set( FLA_ZERO, Z );
146  bl1_ssetm( m_A,
147  b_alg,
148  buff_0,
149  buff_Z, rs_Z, cs_Z );
150 
151  for ( i = 0; i < b_alg; ++i )
152  {
153  float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
154  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
155  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
156  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
157  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
158 
159  float* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
160  float* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
161  float* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
162 
163  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
164  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
165 
166  float* d01 = buff_d + (0 )*inc_d;
167 
168  float* f01 = buff_f + (0 )*inc_f;
169 
170  float* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
171 
172  float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
173  float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
174 
175  float* ABL = a10t;
176  float* ZBL = z10t;
177 
178  float* a2 = alpha11;
179 
180  int m_ahead = m_A - i - 1;
181  int m_behind = i;
182  int n_behind = i;
183 
184  /*------------------------------------------------------------*/
185 
186  if ( m_behind > 0 )
187  {
188  // FLA_Copy( a10t_r, last_elem );
189  // FLA_Set( FLA_ONE, a10t_r );
190  last_elem = *a10t_r;
191  *a10t_r = *buff_1;
192  }
193 
194  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, z10t, FLA_ONE, a2 );
195  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
198  m_ahead + 1,
199  n_behind,
200  buff_m1,
201  ABL, rs_A, cs_A,
202  z10t, cs_Z,
203  buff_1,
204  a2, rs_A );
207  m_ahead + 1,
208  n_behind,
209  buff_m1,
210  ZBL, rs_Z, cs_Z,
211  a10t, cs_A,
212  buff_1,
213  a2, rs_A );
214 
215  if ( m_behind > 0 )
216  {
217  // FLA_Copy( last_elem, a10t_r );
218  *a10t_r = last_elem;
219  }
220 
221  if ( m_ahead > 0 )
222  {
223  // FLA_Househ2_UT( FLA_LEFT,
224  // a21_t,
225  // a21_b, tau11 );
226  FLA_Househ2_UT_l_ops( m_ahead - 1,
227  a21_t,
228  a21_b, rs_A,
229  tau11 );
230 
231  // FLA_Set( FLA_ONE, inv_tau11 );
232  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
233  // FLA_Copy( inv_tau11, minus_inv_tau11 );
234  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
235  bl1_sdiv3( buff_1, tau11, &inv_tau11 );
236  bl1_sneg2( &inv_tau11, &minus_inv_tau11 );
237 
238  // FLA_Copy( a21_t, first_elem );
239  // FLA_Set( FLA_ONE, a21_t );
240  first_elem = *a21_t;
241  *a21_t = *buff_1;
242 
243  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
245  m_ahead,
246  buff_1,
247  A22, rs_A, cs_A,
248  a21, rs_A,
249  buff_0,
250  z21, rs_Z );
251 
252  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d01 );
253  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f01 );
256  m_ahead,
257  n_behind,
258  buff_1,
259  A20, rs_A, cs_A,
260  a21, rs_A,
261  buff_0,
262  d01, inc_d );
265  m_ahead,
266  n_behind,
267  buff_1,
268  Z20, rs_Z, cs_Z,
269  a21, rs_A,
270  buff_0,
271  f01, inc_f );
272 
273  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f01, FLA_ONE, z21 );
274  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d01, FLA_ONE, z21 );
277  m_ahead,
278  n_behind,
279  buff_m1,
280  A20, rs_A, cs_A,
281  f01, inc_f,
282  buff_1,
283  z21, rs_Z );
286  m_ahead,
287  n_behind,
288  buff_m1,
289  Z20, rs_Z, cs_Z,
290  d01, inc_d,
291  buff_1,
292  z21, rs_Z );
293 
294  // FLA_Copy( d01, t01 );
296  n_behind,
297  d01, inc_d,
298  t01, rs_T );
299 
300  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
301  // FLA_Inv_scal( FLA_TWO, beta );
303  m_ahead,
304  a21, rs_A,
305  z21, rs_Z,
306  &beta );
307  bl1_sinvscals( buff_2, &beta );
308 
309  // FLA_Scal( minus_inv_tau11, beta );
310  // FLA_Axpy( beta, a21, z21 );
311  // FLA_Scal( inv_tau11, z21 );
312  bl1_sscals( &minus_inv_tau11, &beta );
314  m_ahead,
315  &beta,
316  a21, rs_A,
317  z21, rs_Z );
319  m_ahead,
320  &inv_tau11,
321  z21, rs_Z );
322 
323  // FLA_Copy( first_elem, a21_t );
324  *a21_t = first_elem;
325  }
326 
327  /*------------------------------------------------------------*/
328 
329  }
330 
331  // FLA_Obj_free( &d );
332  // FLA_Obj_free( &f );
333  FLA_free( buff_d );
334  FLA_free( buff_f );
335 
336  return FLA_SUCCESS;
337 }

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sscalv(), bl1_ssetm(), bl1_ssymv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_opt_var3().

◆ FLA_Tridiag_UT_l_step_opt_var1()

FLA_Error FLA_Tridiag_UT_l_step_opt_var1 ( FLA_Obj  A,
FLA_Obj  T 
)
19 {
20  FLA_Datatype datatype;
21  int m_A, m_T;
22  int rs_A, cs_A;
23  int rs_T, cs_T;
24 
25  datatype = FLA_Obj_datatype( A );
26 
27  m_A = FLA_Obj_length( A );
28  m_T = FLA_Obj_length( T );
29 
30  rs_A = FLA_Obj_row_stride( A );
31  cs_A = FLA_Obj_col_stride( A );
32 
33  rs_T = FLA_Obj_row_stride( T );
34  cs_T = FLA_Obj_col_stride( T );
35 
36 
37  switch ( datatype )
38  {
39  case FLA_FLOAT:
40  {
41  float* buff_A = FLA_FLOAT_PTR( A );
42  float* buff_T = FLA_FLOAT_PTR( T );
43 
45  m_T,
46  buff_A, rs_A, cs_A,
47  buff_T, rs_T, cs_T );
48 
49  break;
50  }
51 
52  case FLA_DOUBLE:
53  {
54  double* buff_A = FLA_DOUBLE_PTR( A );
55  double* buff_T = FLA_DOUBLE_PTR( T );
56 
58  m_T,
59  buff_A, rs_A, cs_A,
60  buff_T, rs_T, cs_T );
61 
62  break;
63  }
64 
65  case FLA_COMPLEX:
66  {
67  scomplex* buff_A = FLA_COMPLEX_PTR( A );
68  scomplex* buff_T = FLA_COMPLEX_PTR( T );
69 
71  m_T,
72  buff_A, rs_A, cs_A,
73  buff_T, rs_T, cs_T );
74 
75  break;
76  }
77 
78  case FLA_DOUBLE_COMPLEX:
79  {
80  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
81  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
82 
84  m_T,
85  buff_A, rs_A, cs_A,
86  buff_T, rs_T, cs_T );
87 
88  break;
89  }
90  }
91 
92  return FLA_SUCCESS;
93 }
FLA_Error FLA_Tridiag_UT_l_step_ops_var1(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
Definition: FLA_Tridiag_UT_l_opt_var1.c:97
FLA_Error FLA_Tridiag_UT_l_step_opd_var1(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
Definition: FLA_Tridiag_UT_l_opt_var1.c:228
FLA_Error FLA_Tridiag_UT_l_step_opc_var1(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Tridiag_UT_l_opt_var1.c:359
FLA_Error FLA_Tridiag_UT_l_step_opz_var1(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Tridiag_UT_l_opt_var1.c:492

References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Tridiag_UT_l_step_opc_var1(), FLA_Tridiag_UT_l_step_opd_var1(), FLA_Tridiag_UT_l_step_ops_var1(), and FLA_Tridiag_UT_l_step_opz_var1().

Referenced by FLA_Tridiag_UT_l_blk_var1(), and FLA_Tridiag_UT_l_opt_var1().

◆ FLA_Tridiag_UT_l_step_opt_var2()

FLA_Error FLA_Tridiag_UT_l_step_opt_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
19 {
20  FLA_Datatype datatype;
21  int m_A, m_T;
22  int rs_A, cs_A;
23  int rs_T, cs_T;
24 
25  datatype = FLA_Obj_datatype( A );
26 
27  m_A = FLA_Obj_length( A );
28  m_T = FLA_Obj_length( T );
29 
30  rs_A = FLA_Obj_row_stride( A );
31  cs_A = FLA_Obj_col_stride( A );
32 
33  rs_T = FLA_Obj_row_stride( T );
34  cs_T = FLA_Obj_col_stride( T );
35 
36 
37  switch ( datatype )
38  {
39  case FLA_FLOAT:
40  {
41  float* buff_A = FLA_FLOAT_PTR( A );
42  float* buff_T = FLA_FLOAT_PTR( T );
43 
45  m_T,
46  buff_A, rs_A, cs_A,
47  buff_T, rs_T, cs_T );
48 
49  break;
50  }
51 
52  case FLA_DOUBLE:
53  {
54  double* buff_A = FLA_DOUBLE_PTR( A );
55  double* buff_T = FLA_DOUBLE_PTR( T );
56 
58  m_T,
59  buff_A, rs_A, cs_A,
60  buff_T, rs_T, cs_T );
61 
62  break;
63  }
64 
65  case FLA_COMPLEX:
66  {
67  scomplex* buff_A = FLA_COMPLEX_PTR( A );
68  scomplex* buff_T = FLA_COMPLEX_PTR( T );
69 
71  m_T,
72  buff_A, rs_A, cs_A,
73  buff_T, rs_T, cs_T );
74 
75  break;
76  }
77 
78  case FLA_DOUBLE_COMPLEX:
79  {
80  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
81  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
82 
84  m_T,
85  buff_A, rs_A, cs_A,
86  buff_T, rs_T, cs_T );
87 
88  break;
89  }
90  }
91 
92  return FLA_SUCCESS;
93 }
FLA_Error FLA_Tridiag_UT_l_step_opc_var2(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Tridiag_UT_l_opt_var2.c:545
FLA_Error FLA_Tridiag_UT_l_step_opd_var2(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
Definition: FLA_Tridiag_UT_l_opt_var2.c:321
FLA_Error FLA_Tridiag_UT_l_step_ops_var2(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
Definition: FLA_Tridiag_UT_l_opt_var2.c:97
FLA_Error FLA_Tridiag_UT_l_step_opz_var2(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Tridiag_UT_l_opt_var2.c:772

References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Tridiag_UT_l_step_opc_var2(), FLA_Tridiag_UT_l_step_opd_var2(), FLA_Tridiag_UT_l_step_ops_var2(), and FLA_Tridiag_UT_l_step_opz_var2().

Referenced by FLA_Tridiag_UT_l_blk_var2(), and FLA_Tridiag_UT_l_opt_var2().

◆ FLA_Tridiag_UT_l_step_opt_var3()

FLA_Error FLA_Tridiag_UT_l_step_opt_var3 ( FLA_Obj  A,
FLA_Obj  Z,
FLA_Obj  T 
)
28 {
29  FLA_Datatype datatype;
30  int m_A, m_T;
31  int rs_A, cs_A;
32  int rs_Z, cs_Z;
33  int rs_T, cs_T;
34 
35  datatype = FLA_Obj_datatype( A );
36 
37  m_A = FLA_Obj_length( A );
38  m_T = FLA_Obj_length( T );
39 
40  rs_A = FLA_Obj_row_stride( A );
41  cs_A = FLA_Obj_col_stride( A );
42 
43  rs_Z = FLA_Obj_row_stride( Z );
44  cs_Z = FLA_Obj_col_stride( Z );
45 
46  rs_T = FLA_Obj_row_stride( T );
47  cs_T = FLA_Obj_col_stride( T );
48 
49 
50  switch ( datatype )
51  {
52  case FLA_FLOAT:
53  {
54  float* buff_A = FLA_FLOAT_PTR( A );
55  float* buff_Z = FLA_FLOAT_PTR( Z );
56  float* buff_T = FLA_FLOAT_PTR( T );
57 
59  m_T,
60  buff_A, rs_A, cs_A,
61  buff_Z, rs_Z, cs_Z,
62  buff_T, rs_T, cs_T );
63 
64  break;
65  }
66 
67  case FLA_DOUBLE:
68  {
69  double* buff_A = FLA_DOUBLE_PTR( A );
70  double* buff_Z = FLA_DOUBLE_PTR( Z );
71  double* buff_T = FLA_DOUBLE_PTR( T );
72 
74  m_T,
75  buff_A, rs_A, cs_A,
76  buff_Z, rs_Z, cs_Z,
77  buff_T, rs_T, cs_T );
78 
79  break;
80  }
81 
82  case FLA_COMPLEX:
83  {
84  scomplex* buff_A = FLA_COMPLEX_PTR( A );
85  scomplex* buff_Z = FLA_COMPLEX_PTR( Z );
86  scomplex* buff_T = FLA_COMPLEX_PTR( T );
87 
89  m_T,
90  buff_A, rs_A, cs_A,
91  buff_Z, rs_Z, cs_Z,
92  buff_T, rs_T, cs_T );
93 
94  break;
95  }
96 
97  case FLA_DOUBLE_COMPLEX:
98  {
99  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
100  dcomplex* buff_Z = FLA_DOUBLE_COMPLEX_PTR( Z );
101  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
102 
104  m_T,
105  buff_A, rs_A, cs_A,
106  buff_Z, rs_Z, cs_Z,
107  buff_T, rs_T, cs_T );
108 
109  break;
110  }
111  }
112 
113  return FLA_SUCCESS;
114 }
FLA_Error FLA_Tridiag_UT_l_step_opd_var3(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T)
Definition: FLA_Tridiag_UT_l_opt_var3.c:341
FLA_Error FLA_Tridiag_UT_l_step_opc_var3(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Tridiag_UT_l_opt_var3.c:564
FLA_Error FLA_Tridiag_UT_l_step_ops_var3(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T)
Definition: FLA_Tridiag_UT_l_opt_var3.c:118
FLA_Error FLA_Tridiag_UT_l_step_opz_var3(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Tridiag_UT_l_opt_var3.c:788

References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Tridiag_UT_l_step_opc_var3(), FLA_Tridiag_UT_l_step_opd_var3(), FLA_Tridiag_UT_l_step_ops_var3(), and FLA_Tridiag_UT_l_step_opz_var3().

Referenced by FLA_Tridiag_UT_l_blk_var3(), and FLA_Tridiag_UT_l_opt_var3().

◆ FLA_Tridiag_UT_l_step_opz_var1()

FLA_Error FLA_Tridiag_UT_l_step_opz_var1 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
496 {
497  dcomplex* buff_2 = FLA_DOUBLE_COMPLEX_PTR( FLA_TWO );
498  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
499  dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
500  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
501 
502  dcomplex first_elem;
503  dcomplex beta;
504  dcomplex inv_tau11;
505  dcomplex minus_inv_tau11;
506  int i;
507 
508  // b_alg = FLA_Obj_length( T );
509  int b_alg = m_T;
510 
511  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
512  dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
513  int inc_z = 1;
514 
515  for ( i = 0; i < b_alg; ++i )
516  {
517  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
518  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
519  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
520 
521  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
522  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
523 
524  dcomplex* z21 = buff_z + (i+1)*inc_z;
525 
526  dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
527  dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
528 
529  int m_ahead = m_A - i - 1;
530  int n_behind = i;
531 
532  /*------------------------------------------------------------*/
533 
534  if ( m_ahead > 0 )
535  {
536  // FLA_Househ2_UT( FLA_LEFT,
537  // a21_t,
538  // a21_b, tau11 );
539  FLA_Househ2_UT_l_opz( m_ahead - 1,
540  a21_t,
541  a21_b, rs_A,
542  tau11 );
543 
544  // FLA_Set( FLA_ONE, inv_tau11 );
545  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
546  // FLA_Copy( inv_tau11, minus_inv_tau11 );
547  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
548  bl1_zdiv3( buff_1, tau11, &inv_tau11 );
549  bl1_zneg2( &inv_tau11, &minus_inv_tau11 );
550 
551  // FLA_Copy( a21_t, first_elem );
552  // FLA_Set( FLA_ONE, a21_t );
553  first_elem = *a21_t;
554  *a21_t = *buff_1;
555 
556  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
559  m_ahead,
560  buff_1,
561  A22, rs_A, cs_A,
562  a21, rs_A,
563  buff_0,
564  z21, inc_z );
565 
566  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
567  // FLA_Inv_scal( FLA_TWO, beta );
569  m_ahead,
570  a21, rs_A,
571  z21, inc_z,
572  &beta );
573  bl1_zinvscals( buff_2, &beta );
574 
575  // FLA_Scal( minus_inv_tau11, beta );
576  // FLA_Axpy( beta, a21, z21 );
577  // FLA_Scal( inv_tau11, z21 );
578  bl1_zscals( &minus_inv_tau11, &beta );
580  m_ahead,
581  &beta,
582  a21, rs_A,
583  z21, inc_z );
585  m_ahead,
586  &inv_tau11,
587  z21, inc_z );
588 
589  // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, a21, z21, A22 );
592  m_ahead,
593  buff_m1,
594  a21, rs_A,
595  z21, inc_z,
596  A22, rs_A, cs_A );
597 
598  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
601  m_ahead,
602  n_behind,
603  buff_1,
604  A20, rs_A, cs_A,
605  a21, rs_A,
606  buff_0,
607  t01, rs_T );
608 
609  // FLA_Copy( first_elem, a21_t );
610  *a21_t = first_elem;
611  }
612 
613  /*------------------------------------------------------------*/
614 
615  }
616 
617  // FLA_Obj_free( &z );
618  FLA_free( buff_z );
619 
620  return FLA_SUCCESS;
621 }

References bl1_zaxpyv(), bl1_zdot(), bl1_zgemv(), bl1_zhemv(), bl1_zher2(), bl1_zscals(), bl1_zscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_opt_var1().

◆ FLA_Tridiag_UT_l_step_opz_var2()

FLA_Error FLA_Tridiag_UT_l_step_opz_var2 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
776 {
777  dcomplex* buff_2 = FLA_DOUBLE_COMPLEX_PTR( FLA_TWO );
778  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
779  dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
780  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
781 
782  dcomplex first_elem;
783  dcomplex beta;
784  dcomplex inv_tau11;
785  dcomplex minus_inv_tau11;
786  dcomplex minus_upsilon11, minus_conj_upsilon11;
787  dcomplex minus_zeta11, minus_conj_zeta11;
788  int i;
789 
790  // b_alg = FLA_Obj_length( T );
791  int b_alg = m_T;
792 
793  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
794  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
795  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
796  dcomplex* buff_u = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
797  dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
798  dcomplex* buff_w = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
799  int inc_u = 1;
800  int inc_z = 1;
801  int inc_w = 1;
802 
803  // Initialize some variables (only to prevent compiler warnings).
804  first_elem = *buff_0;
805  minus_inv_tau11 = *buff_0;
806 
807  for ( i = 0; i < b_alg; ++i )
808  {
809  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
810  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
811  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
812  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
813 
814  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
815  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
816 
817  dcomplex* upsilon11= buff_u + (i )*inc_u;
818  dcomplex* u21 = buff_u + (i+1)*inc_u;
819 
820  dcomplex* zeta11 = buff_z + (i )*inc_z;
821  dcomplex* z21 = buff_z + (i+1)*inc_z;
822 
823  dcomplex* w21 = buff_w + (i+1)*inc_w;
824 
825  dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
826  dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
827 
828  int m_ahead = m_A - i - 1;
829  int m_behind = i;
830  int n_behind = i;
831 
832  /*------------------------------------------------------------*/
833 
834  if ( m_behind > 0 )
835  {
836  // FLA_Copy( upsilon11, minus_upsilon11 );
837  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
838  // FLA_Copy( minus_upsilon11, minus_conj_upsilon11 );
839  bl1_zmult3( buff_m1, upsilon11, &minus_upsilon11 );
840  bl1_zcopyconj( &minus_upsilon11, &minus_conj_upsilon11 );
841 
842  // FLA_Copy( zeta11, minus_zeta11 );
843  // FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
844  // FLA_Copy( minus_zeta11, minus_conj_zeta11 );
845  bl1_zmult3( buff_m1, zeta11, &minus_zeta11 );
846  bl1_zcopyconj( &minus_zeta11, &minus_conj_zeta11 );
847 
848  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon11, zeta11, alpha11 );
849  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta11, upsilon11, alpha11 );
851  1,
852  &minus_upsilon11,
853  zeta11, 1,
854  alpha11, 1 );
856  1,
857  &minus_zeta11,
858  upsilon11, 1,
859  alpha11, 1 );
860 
861  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_zeta11, u21, a21 );
862  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon11, z21, a21 );
864  m_ahead,
865  &minus_conj_zeta11,
866  u21, inc_u,
867  a21, rs_A );
869  m_ahead,
870  &minus_conj_upsilon11,
871  z21, inc_z,
872  a21, rs_A );
873  }
874 
875  if ( m_ahead > 0 )
876  {
877  // FLA_Househ2_UT( FLA_LEFT,
878  // a21_t,
879  // a21_b, tau11 );
880  FLA_Househ2_UT_l_opz( m_ahead - 1,
881  a21_t,
882  a21_b, rs_A,
883  tau11 );
884 
885  // FLA_Set( FLA_ONE, inv_tau11 );
886  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
887  // FLA_Copy( inv_tau11, minus_inv_tau11 );
888  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
889  bl1_zdiv3( buff_1, tau11, &inv_tau11 );
890  bl1_zneg2( &inv_tau11, &minus_inv_tau11 );
891 
892  // FLA_Copy( a21_t, first_elem );
893  // FLA_Set( FLA_ONE, a21_t );
894  first_elem = *a21_t;
895  *a21_t = *buff_1;
896  }
897 
898  if ( m_behind > 0 )
899  {
900  // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
903  m_ahead,
904  buff_m1,
905  u21, inc_u,
906  z21, inc_z,
907  A22, rs_A, cs_A );
908  }
909 
910  if ( m_ahead > 0 )
911  {
912  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
915  m_ahead,
916  buff_1,
917  A22, rs_A, cs_A,
918  a21, rs_A,
919  buff_0,
920  w21, inc_w );
921 
922  // FLA_Copy( a21, u21 );
923  // FLA_Copy( w21, z21 );
925  m_ahead,
926  a21, rs_A,
927  u21, inc_u );
929  m_ahead,
930  w21, inc_w,
931  z21, inc_z );
932 
933  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
934  // FLA_Inv_scal( FLA_TWO, beta );
936  m_ahead,
937  a21, rs_A,
938  z21, inc_z,
939  &beta );
940  bl1_zinvscals( buff_2, &beta );
941 
942  // FLA_Scal( minus_inv_tau11, beta );
943  // FLA_Axpy( beta, a21, z21 );
944  // FLA_Scal( inv_tau11, z21 );
945  bl1_zscals( &minus_inv_tau11, &beta );
947  m_ahead,
948  &beta,
949  a21, rs_A,
950  z21, inc_z );
952  m_ahead,
953  &inv_tau11,
954  z21, inc_z );
955 
956  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
959  m_ahead,
960  n_behind,
961  buff_1,
962  A20, rs_A, cs_A,
963  a21, rs_A,
964  buff_0,
965  t01, rs_T );
966 
967  // FLA_Copy( first_elem, a21_t );
968  *a21_t = first_elem;
969  }
970 
971  if ( m_behind + 1 == b_alg && m_ahead > 0 )
972  {
973  // FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
976  m_ahead,
977  buff_m1,
978  u21, inc_u,
979  z21, inc_z,
980  A22, rs_A, cs_A );
981  }
982 
983  /*------------------------------------------------------------*/
984 
985  }
986 
987  // FLA_Obj_free( &u );
988  // FLA_Obj_free( &z );
989  // FLA_Obj_free( &w );
990  FLA_free( buff_u );
991  FLA_free( buff_z );
992  FLA_free( buff_w );
993 
994  return FLA_SUCCESS;
995 }

References bl1_zaxpyv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zhemv(), bl1_zher2(), bl1_zscals(), bl1_zscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_opt_var2().

◆ FLA_Tridiag_UT_l_step_opz_var3()

FLA_Error FLA_Tridiag_UT_l_step_opz_var3 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
793 {
794  dcomplex* buff_2 = FLA_DOUBLE_COMPLEX_PTR( FLA_TWO );
795  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
796  dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
797  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
798 
799  dcomplex first_elem, last_elem;
800  dcomplex beta;
801  dcomplex inv_tau11;
802  dcomplex minus_inv_tau11;
803  int i;
804 
805  // b_alg = FLA_Obj_length( T );
806  int b_alg = m_T;
807 
808  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
809  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
810  dcomplex* buff_d = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
811  dcomplex* buff_f = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
812  int inc_d = 1;
813  int inc_f = 1;
814 
815  // FLA_Set( FLA_ZERO, Z );
816  bl1_zsetm( m_A,
817  b_alg,
818  buff_0,
819  buff_Z, rs_Z, cs_Z );
820 
821  for ( i = 0; i < b_alg; ++i )
822  {
823  dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
824  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
825  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
826  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
827  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
828 
829  dcomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
830  dcomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
831  dcomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
832 
833  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
834  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
835 
836  dcomplex* d01 = buff_d + (0 )*inc_d;
837 
838  dcomplex* f01 = buff_f + (0 )*inc_f;
839 
840  dcomplex* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
841 
842  dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
843  dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
844 
845  dcomplex* ABL = a10t;
846  dcomplex* ZBL = z10t;
847 
848  dcomplex* a2 = alpha11;
849 
850  int m_ahead = m_A - i - 1;
851  int m_behind = i;
852  int n_behind = i;
853 
854  /*------------------------------------------------------------*/
855 
856  if ( m_behind > 0 )
857  {
858  // FLA_Copy( a10t_r, last_elem );
859  // FLA_Set( FLA_ONE, a10t_r );
860  last_elem = *a10t_r;
861  *a10t_r = *buff_1;
862  }
863 
864  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, z10t, FLA_ONE, a2 );
865  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
868  m_ahead + 1,
869  n_behind,
870  buff_m1,
871  ABL, rs_A, cs_A,
872  z10t, cs_Z,
873  buff_1,
874  a2, rs_A );
877  m_ahead + 1,
878  n_behind,
879  buff_m1,
880  ZBL, rs_Z, cs_Z,
881  a10t, cs_A,
882  buff_1,
883  a2, rs_A );
884 
885  if ( m_behind > 0 )
886  {
887  // FLA_Copy( last_elem, a10t_r );
888  *a10t_r = last_elem;
889  }
890 
891  if ( m_ahead > 0 )
892  {
893  // FLA_Househ2_UT( FLA_LEFT,
894  // a21_t,
895  // a21_b, tau11 );
896  FLA_Househ2_UT_l_opz( m_ahead - 1,
897  a21_t,
898  a21_b, rs_A,
899  tau11 );
900 
901  // FLA_Set( FLA_ONE, inv_tau11 );
902  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
903  // FLA_Copy( inv_tau11, minus_inv_tau11 );
904  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
905  bl1_zdiv3( buff_1, tau11, &inv_tau11 );
906  bl1_zneg2( &inv_tau11, &minus_inv_tau11 );
907 
908  // FLA_Copy( a21_t, first_elem );
909  // FLA_Set( FLA_ONE, a21_t );
910  first_elem = *a21_t;
911  *a21_t = *buff_1;
912 
913  // FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
916  m_ahead,
917  buff_1,
918  A22, rs_A, cs_A,
919  a21, rs_A,
920  buff_0,
921  z21, rs_Z );
922 
923  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d01 );
924  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f01 );
927  m_ahead,
928  n_behind,
929  buff_1,
930  A20, rs_A, cs_A,
931  a21, rs_A,
932  buff_0,
933  d01, inc_d );
936  m_ahead,
937  n_behind,
938  buff_1,
939  Z20, rs_Z, cs_Z,
940  a21, rs_A,
941  buff_0,
942  f01, inc_f );
943 
944  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f01, FLA_ONE, z21 );
945  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d01, FLA_ONE, z21 );
948  m_ahead,
949  n_behind,
950  buff_m1,
951  A20, rs_A, cs_A,
952  f01, inc_f,
953  buff_1,
954  z21, rs_Z );
957  m_ahead,
958  n_behind,
959  buff_m1,
960  Z20, rs_Z, cs_Z,
961  d01, inc_d,
962  buff_1,
963  z21, rs_Z );
964 
965  // FLA_Copy( d01, t01 );
967  n_behind,
968  d01, inc_d,
969  t01, rs_T );
970 
971  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
972  // FLA_Inv_scal( FLA_TWO, beta );
974  m_ahead,
975  a21, rs_A,
976  z21, rs_Z,
977  &beta );
978  bl1_zinvscals( buff_2, &beta );
979 
980  // FLA_Scal( minus_inv_tau11, beta );
981  // FLA_Axpy( beta, a21, z21 );
982  // FLA_Scal( inv_tau11, z21 );
983  bl1_zscals( &minus_inv_tau11, &beta );
985  m_ahead,
986  &beta,
987  a21, rs_A,
988  z21, rs_Z );
990  m_ahead,
991  &inv_tau11,
992  z21, rs_Z );
993 
994  // FLA_Copy( first_elem, a21_t );
995  *a21_t = first_elem;
996  }
997 
998  /*------------------------------------------------------------*/
999 
1000  }
1001 
1002  // FLA_Obj_free( &d );
1003  // FLA_Obj_free( &f );
1004  FLA_free( buff_d );
1005  FLA_free( buff_f );
1006 
1007  return FLA_SUCCESS;
1008 }

References bl1_zaxpyv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zhemv(), bl1_zscals(), bl1_zscalv(), bl1_zsetm(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Tridiag_UT_l_step_opt_var3().

◆ FLA_Tridiag_UT_l_step_unb_var1()

FLA_Error FLA_Tridiag_UT_l_step_unb_var1 ( FLA_Obj  A,
FLA_Obj  T 
)
19 {
20  FLA_Obj ATL, ATR, A00, a01, A02,
21  ABL, ABR, a10t, alpha11, a12t,
22  A20, a21, A22;
23  FLA_Obj TTL, TTR, T00, t01, T02,
24  TBL, TBR, t10t, tau11, t12t,
25  T20, t21, T22;
26  FLA_Obj zT, z01,
27  zB, zeta11,
28  z21;
29  FLA_Obj z;
30 
31  FLA_Obj inv_tau11;
32  FLA_Obj minus_inv_tau11;
33  FLA_Obj first_elem;
34  FLA_Obj beta;
35 
36  FLA_Obj a21_t,
37  a21_b;
38 
39  FLA_Datatype datatype_A;
40  dim_t m_A;
41  dim_t b_alg;
42 
43 
44  b_alg = FLA_Obj_length( T );
45 
46  datatype_A = FLA_Obj_datatype( A );
47  m_A = FLA_Obj_length( A );
48 
49  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &inv_tau11 );
50  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_inv_tau11 );
51  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &first_elem );
52  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &beta );
53  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
54 
55  FLA_Part_2x2( A, &ATL, &ATR,
56  &ABL, &ABR, 0, 0, FLA_TL );
57  FLA_Part_2x2( T, &TTL, &TTR,
58  &TBL, &TBR, 0, 0, FLA_TL );
59  FLA_Part_2x1( z, &zT,
60  &zB, 0, FLA_TOP );
61 
62  while ( FLA_Obj_length( ATL ) < b_alg )
63  {
64  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
65  /* ************* */ /* ************************** */
66  &a10t, /**/ &alpha11, &a12t,
67  ABL, /**/ ABR, &A20, /**/ &a21, &A22,
68  1, 1, FLA_BR );
69  FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02,
70  /* ************* */ /* ************************ */
71  &t10t, /**/ &tau11, &t12t,
72  TBL, /**/ TBR, &T20, /**/ &t21, &T22,
73  1, 1, FLA_BR );
74  FLA_Repart_2x1_to_3x1( zT, &z01,
75  /* ** */ /* ****** */
76  &zeta11,
77  zB, &z21, 1, FLA_BOTTOM );
78 
79  /*------------------------------------------------------------*/
80 
81  if ( FLA_Obj_length( A22 ) > 0 )
82  {
83  FLA_Part_2x1( a21, &a21_t,
84  &a21_b, 1, FLA_TOP );
85 
86  // [ u21, tau11, a21 ] = House( a21 );
87  FLA_Househ2_UT( FLA_LEFT,
88  a21_t,
89  a21_b, tau11 );
90 
91  // inv_tau11 = 1 / tau11;
92  // minus_inv_tau11 = -1 / tau11;
93  FLA_Set( FLA_ONE, inv_tau11 );
94  FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
95  FLA_Copy( inv_tau11, minus_inv_tau11 );
96  FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
97 
98  // Save first element of a21_t and set it to one so we can use a21 as
99  // u21 in subsequent computations. We will restore a21_t later on.
100  FLA_Copy( a21_t, first_elem );
101  FLA_Set( FLA_ONE, a21_t );
102 
103  // z21 = A22 * u21;
104  FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
105 
106  // beta = u21' * z21 / 2;
107  FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
108  FLA_Inv_scal( FLA_TWO, beta );
109 
110  // z21 = ( z21 - beta / tau * u21 ) / tau;
111  FLA_Scal( minus_inv_tau11, beta );
112  FLA_Axpy( beta, a21, z21 );
113  FLA_Scal( inv_tau11, z21 );
114 
115  // A22 = A22 - u21 * z21' - z21 * u21';
116  FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, a21, z21, A22 );
117 
118  // t01 = U20' * u21;
119  FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
120 
121  // Restore first element of a21.
122  FLA_Copy( first_elem, a21_t );
123  }
124 
125  /*------------------------------------------------------------*/
126 
127  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
128  a10t, alpha11, /**/ a12t,
129  /* ************** */ /* ************************ */
130  &ABL, /**/ &ABR, A20, a21, /**/ A22,
131  FLA_TL );
132  FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02,
133  t10t, tau11, /**/ t12t,
134  /* ************** */ /* ********************** */
135  &TBL, /**/ &TBR, T20, t21, /**/ T22,
136  FLA_TL );
137  FLA_Cont_with_3x1_to_2x1( &zT, z01,
138  zeta11,
139  /* ** */ /* ****** */
140  &zB, z21, FLA_TOP );
141  }
142 
143  FLA_Obj_free( &inv_tau11 );
144  FLA_Obj_free( &minus_inv_tau11 );
145  FLA_Obj_free( &first_elem );
146  FLA_Obj_free( &beta );
147  FLA_Obj_free( &z );
148 
149  return FLA_SUCCESS;
150 }
FLA_Error FLA_Copy(FLA_Obj A, FLA_Obj B)
Definition: FLA_Copy.c:15
FLA_Error FLA_Scal(FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Scal.c:15
FLA_Error FLA_Dotc(FLA_Conj conj, FLA_Obj x, FLA_Obj y, FLA_Obj rho)
Definition: FLA_Dotc.c:13
FLA_Error FLA_Axpy(FLA_Obj alpha, FLA_Obj A, FLA_Obj B)
Definition: FLA_Axpy.c:15
FLA_Error FLA_Inv_scal(FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Inv_scal.c:13
FLA_Error FLA_Inv_scalc(FLA_Conj conjalpha, FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Inv_scalc.c:13
FLA_Error FLA_Her2(FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj x, FLA_Obj y, FLA_Obj A)
Definition: FLA_Her2.c:13
FLA_Error FLA_Hemv(FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y)
Definition: FLA_Hemv.c:13
FLA_Error FLA_Gemv(FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y)
Definition: FLA_Gemv.c:15
FLA_Error FLA_Househ2_UT(FLA_Side side, FLA_Obj chi_1, FLA_Obj x2, FLA_Obj tau)
Definition: FLA_Househ2_UT.c:16

References FLA_Axpy(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Dotc(), FLA_Gemv(), FLA_Hemv(), FLA_Her2(), FLA_Househ2_UT(), FLA_Inv_scal(), FLA_Inv_scalc(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Scal(), FLA_Set(), FLA_TWO, and FLA_ZERO.

Referenced by FLA_Tridiag_UT_l_unb_var1().

◆ FLA_Tridiag_UT_l_step_unb_var2()

FLA_Error FLA_Tridiag_UT_l_step_unb_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
19 {
20  FLA_Obj ATL, ATR, A00, a01, A02,
21  ABL, ABR, a10t, alpha11, a12t,
22  A20, a21, A22;
23  FLA_Obj TTL, TTR, T00, t01, T02,
24  TBL, TBR, t10t, tau11, t12t,
25  T20, t21, T22;
26  FLA_Obj uT, u01,
27  uB, upsilon11,
28  u21;
29  FLA_Obj zT, z01,
30  zB, zeta11,
31  z21;
32  FLA_Obj wT, w01,
33  wB, omega11,
34  w21;
35  FLA_Obj u, z, w;
36 
37  FLA_Obj inv_tau11;
38  FLA_Obj minus_inv_tau11;
39  FLA_Obj first_elem;
40  FLA_Obj beta;
41  FLA_Obj minus_upsilon11;
42  FLA_Obj minus_conj_upsilon11;
43  FLA_Obj minus_zeta11;
44  FLA_Obj minus_conj_zeta11;
45 
46  FLA_Obj a21_t,
47  a21_b;
48 
49  FLA_Datatype datatype_A;
50  dim_t m_A;
51  dim_t b_alg;
52 
53 
54  b_alg = FLA_Obj_length( T );
55 
56  datatype_A = FLA_Obj_datatype( A );
57  m_A = FLA_Obj_length( A );
58 
59  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &inv_tau11 );
60  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_inv_tau11 );
61  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &first_elem );
62  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &beta );
63  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_upsilon11 );
64  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_conj_upsilon11 );
65  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_zeta11 );
66  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_conj_zeta11 );
67  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
68  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
69  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
70 
71  FLA_Part_2x2( A, &ATL, &ATR,
72  &ABL, &ABR, 0, 0, FLA_TL );
73  FLA_Part_2x2( T, &TTL, &TTR,
74  &TBL, &TBR, 0, 0, FLA_TL );
75  FLA_Part_2x1( u, &uT,
76  &uB, 0, FLA_TOP );
77  FLA_Part_2x1( z, &zT,
78  &zB, 0, FLA_TOP );
79  FLA_Part_2x1( w, &wT,
80  &wB, 0, FLA_TOP );
81 
82  while ( FLA_Obj_length( ATL ) < b_alg )
83  {
84  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
85  /* ************* */ /* ************************** */
86  &a10t, /**/ &alpha11, &a12t,
87  ABL, /**/ ABR, &A20, /**/ &a21, &A22,
88  1, 1, FLA_BR );
89  FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02,
90  /* ************* */ /* ************************ */
91  &t10t, /**/ &tau11, &t12t,
92  TBL, /**/ TBR, &T20, /**/ &t21, &T22,
93  1, 1, FLA_BR );
94  FLA_Repart_2x1_to_3x1( uT, &u01,
95  /* ** */ /* ********* */
96  &upsilon11,
97  uB, &u21, 1, FLA_BOTTOM );
98  FLA_Repart_2x1_to_3x1( zT, &z01,
99  /* ** */ /* ****** */
100  &zeta11,
101  zB, &z21, 1, FLA_BOTTOM );
102  FLA_Repart_2x1_to_3x1( wT, &w01,
103  /* ** */ /* ******* */
104  &omega11,
105  wB, &w21, 1, FLA_BOTTOM );
106 
107  /*------------------------------------------------------------*/
108 
109  if ( FLA_Obj_length( ATL ) > 0 )
110  {
111  FLA_Copy( upsilon11, minus_upsilon11 );
112  FLA_Scal( FLA_MINUS_ONE, minus_upsilon11 );
113  FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon11, minus_conj_upsilon11 );
114 
115  FLA_Copy( zeta11, minus_zeta11 );
116  FLA_Scal( FLA_MINUS_ONE, minus_zeta11 );
117  FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta11, minus_conj_zeta11 );
118 
119  // alpha11 = alpha11 - upsilon11 * conj(zeta11) - zeta11 * conj(upsilon11);
120  FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon11, zeta11, alpha11 );
121  FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta11, upsilon11, alpha11 );
122 
123  // a21 = a21 - conj(zeta11) * u21 - conj(upsilon11) * z21;
124  FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_zeta11, u21, a21 );
125  FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon11, z21, a21 );
126  }
127 
128  if ( FLA_Obj_length( A22 ) > 0 )
129  {
130  FLA_Part_2x1( a21, &a21_t,
131  &a21_b, 1, FLA_TOP );
132 
133  // [ x21, tau11, a21 ] = House( a21 );
134  FLA_Househ2_UT( FLA_LEFT,
135  a21_t,
136  a21_b, tau11 );
137 
138  // inv_tau11 = 1 / tau11;
139  // minus_inv_tau11 = -1 / tau11;
140  FLA_Set( FLA_ONE, inv_tau11 );
141  FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
142  FLA_Copy( inv_tau11, minus_inv_tau11 );
143  FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
144 
145  // Save first element of a21_t and set it to one so we can use a21 as
146  // u21 in subsequent computations. We will restore a21_t later on.
147  FLA_Copy( a21_t, first_elem );
148  FLA_Set( FLA_ONE, a21_t );
149  }
150 
151  if ( FLA_Obj_length( ATL ) > 0 )
152  {
153  // A22 = A22 - u21 * z21' - z21 * u21';
154  FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
155  }
156 
157  if ( FLA_Obj_length( A22 ) > 0 )
158  {
159  // w21 = A22 * x21;
160  FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, w21 );
161 
162  // u21 = x21;
163  // z21 = w21;
164  FLA_Copy( a21, u21 );
165  FLA_Copy( w21, z21 );
166 
167  // beta = u21' * z21 / 2;
168  FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
169  FLA_Inv_scal( FLA_TWO, beta );
170 
171  // z21 = ( z21 - beta / tau * u21 ) / tau;
172  FLA_Scal( minus_inv_tau11, beta );
173  FLA_Axpy( beta, a21, z21 );
174  FLA_Scal( inv_tau11, z21 );
175 
176  // t01 = U20' * u21;
177  FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
178 
179  // Restore first element of a21.
180  FLA_Copy( first_elem, a21_t );
181  }
182 
183  // Update A22 if this is the last iteration; this is needed when we're
184  // being called from the blocked routine so A22 is left in a valid state.
185  if ( FLA_Obj_length( ATL ) + 1 == b_alg &&
186  FLA_Obj_length( A22 ) > 0 )
187  {
188  // A22 = A22 - u21 * y21' - z21 * u21';
189  FLA_Her2( FLA_LOWER_TRIANGULAR, FLA_MINUS_ONE, u21, z21, A22 );
190  }
191 
192  /*------------------------------------------------------------*/
193 
194  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
195  a10t, alpha11, /**/ a12t,
196  /* ************** */ /* ************************ */
197  &ABL, /**/ &ABR, A20, a21, /**/ A22,
198  FLA_TL );
199  FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02,
200  t10t, tau11, /**/ t12t,
201  /* ************** */ /* ********************** */
202  &TBL, /**/ &TBR, T20, t21, /**/ T22,
203  FLA_TL );
204  FLA_Cont_with_3x1_to_2x1( &uT, u01,
205  upsilon11,
206  /* ** */ /* ********* */
207  &uB, u21, FLA_TOP );
208  FLA_Cont_with_3x1_to_2x1( &zT, z01,
209  zeta11,
210  /* ** */ /* ****** */
211  &zB, z21, FLA_TOP );
212  FLA_Cont_with_3x1_to_2x1( &wT, w01,
213  omega11,
214  /* ** */ /* ******* */
215  &wB, w21, FLA_TOP );
216  }
217 
218  FLA_Obj_free( &inv_tau11 );
219  FLA_Obj_free( &minus_inv_tau11 );
220  FLA_Obj_free( &first_elem );
221  FLA_Obj_free( &beta );
222  FLA_Obj_free( &minus_upsilon11 );
223  FLA_Obj_free( &minus_conj_upsilon11 );
224  FLA_Obj_free( &minus_zeta11 );
225  FLA_Obj_free( &minus_conj_zeta11 );
226  FLA_Obj_free( &u );
227  FLA_Obj_free( &z );
228  FLA_Obj_free( &w );
229 
230  return FLA_SUCCESS;
231 }
FLA_Error FLA_Axpyt(FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj B)
Definition: FLA_Axpyt.c:15
FLA_Error FLA_Copyt(FLA_Trans trans, FLA_Obj A, FLA_Obj B)
Definition: FLA_Copyt.c:15

References FLA_Axpy(), FLA_Axpyt(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Copyt(), FLA_Dotc(), FLA_Gemv(), FLA_Hemv(), FLA_Her2(), FLA_Househ2_UT(), FLA_Inv_scal(), FLA_Inv_scalc(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Scal(), FLA_Set(), FLA_TWO, and FLA_ZERO.

Referenced by FLA_Tridiag_UT_l_unb_var2().

◆ FLA_Tridiag_UT_l_step_unb_var3()

FLA_Error FLA_Tridiag_UT_l_step_unb_var3 ( FLA_Obj  A,
FLA_Obj  Z,
FLA_Obj  T 
)
30 {
31  FLA_Obj ATL, ATR, A00, a01, A02,
32  ABL, ABR, a10t, alpha11, a12t,
33  A20, a21, A22;
34  FLA_Obj ZTL, ZTR, Z00, z011, Z02,
35  ZBL, ZBR, z10t, zeta11, z12t,
36  Z20, z21, Z22;
37  FLA_Obj TTL, TTR, T00, t01, T02,
38  TBL, TBR, t10t, tau11, t12t,
39  T20, t21, T22;
40  FLA_Obj dT, d01,
41  dB, delta11,
42  d21;
43  FLA_Obj fT, f01,
44  fB, phi11,
45  f21;
46  FLA_Obj d, f;
47 
48  FLA_Obj inv_tau11;
49  FLA_Obj minus_inv_tau11;
50  FLA_Obj beta;
51  FLA_Obj first_elem;
52  FLA_Obj last_elem;
53 
54  FLA_Obj a10t_l, a10t_r;
55  FLA_Obj a21_t,
56  a21_b;
57  FLA_Obj a2;
58 
59  FLA_Datatype datatype_A;
60  dim_t m_A;
61  dim_t b_alg;
62 
63 
64  b_alg = FLA_Obj_length( T );
65 
66  datatype_A = FLA_Obj_datatype( A );
67  m_A = FLA_Obj_length( A );
68 
69  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &inv_tau11 );
70  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_inv_tau11 );
71  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &beta );
72  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &first_elem );
73  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &last_elem );
74  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
75  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
76 
77  FLA_Set( FLA_ZERO, Z );
78 
79  FLA_Part_2x2( A, &ATL, &ATR,
80  &ABL, &ABR, 0, 0, FLA_TL );
81  FLA_Part_2x2( Z, &ZTL, &ZTR,
82  &ZBL, &ZBR, 0, 0, FLA_TL );
83  FLA_Part_2x2( T, &TTL, &TTR,
84  &TBL, &TBR, 0, 0, FLA_TL );
85  FLA_Part_2x1( d, &dT,
86  &dB, 0, FLA_TOP );
87  FLA_Part_2x1( f, &fT,
88  &fB, 0, FLA_TOP );
89 
90  while ( FLA_Obj_length( ATL ) < b_alg )
91  {
92  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
93  /* ************* */ /* ************************** */
94  &a10t, /**/ &alpha11, &a12t,
95  ABL, /**/ ABR, &A20, /**/ &a21, &A22,
96  1, 1, FLA_BR );
97  FLA_Repart_2x2_to_3x3( ZTL, /**/ ZTR, &Z00, /**/ &z011, &Z02,
98  /* ************* */ /* ************************* */
99  &z10t, /**/ &zeta11, &z12t,
100  ZBL, /**/ ZBR, &Z20, /**/ &z21, &Z22,
101  1, 1, FLA_BR );
102  FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02,
103  /* ************* */ /* ************************ */
104  &t10t, /**/ &tau11, &t12t,
105  TBL, /**/ TBR, &T20, /**/ &t21, &T22,
106  1, 1, FLA_BR );
107  FLA_Repart_2x1_to_3x1( dT, &d01,
108  /* ** */ /* ******* */
109  &delta11,
110  dB, &d21, 1, FLA_BOTTOM );
111  FLA_Repart_2x1_to_3x1( fT, &f01,
112  /* ** */ /* ***** */
113  &phi11,
114  fB, &f21, 1, FLA_BOTTOM );
115 
116  /*------------------------------------------------------------*/
117 
118  // Save first element of a10_r and set it to one so we can use a10t as
119  // u10t in subsequent computations. We will restore a10_r later on.
120  if ( FLA_Obj_length( ATL ) > 0 )
121  {
122  FLA_Part_1x2( a10t, &a10t_l, &a10t_r, 1, FLA_RIGHT );
123  FLA_Copy( a10t_r, last_elem );
124  FLA_Set( FLA_ONE, a10t_r );
125  }
126 
127  FLA_Merge_2x1( alpha11,
128  a21, &a2 );
129 
130  // alpha11 = alpha11 - u10t * z10t' - z10t * u10t';
131  // a21 = a21 - U20 * z10t' - Z20 * u10t';
132  FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, z10t, FLA_ONE, a2 );
133  FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
134 
135  // Restore last element of a10t.
136  if ( FLA_Obj_length( ATL ) > 0 )
137  {
138  FLA_Copy( last_elem, a10t_r );
139  }
140 
141  if ( FLA_Obj_length( A22 ) > 0 )
142  {
143  FLA_Part_2x1( a21, &a21_t,
144  &a21_b, 1, FLA_TOP );
145 
146  // [ u21, tau11, a21 ] = House( a21 );
147  FLA_Househ2_UT( FLA_LEFT,
148  a21_t,
149  a21_b, tau11 );
150 
151  // inv_tau11 = 1 / tau11;
152  // minus_inv_tau11 = -1 / tau11;
153  FLA_Set( FLA_ONE, inv_tau11 );
154  FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
155  FLA_Copy( inv_tau11, minus_inv_tau11 );
156  FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
157 
158  // Save first element of a21_t and set it to one.
159  FLA_Copy( a21_t, first_elem );
160  FLA_Set( FLA_ONE, a21_t );
161 
162  // z21 = A22 * u21;
163  FLA_Hemv( FLA_LOWER_TRIANGULAR, FLA_ONE, A22, a21, FLA_ZERO, z21 );
164 
165  // z21 = z21 - U20 * ( Z20' * u21 ) - Z20 * ( U20' * u21 );
166  FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d01 );
167  FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f01 );
168 
169  FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f01, FLA_ONE, z21 );
170  FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d01, FLA_ONE, z21 );
171 
172  // t01 = U20' * u21;
173  FLA_Copy( d01, t01 );
174 
175  // beta = u21' * z21 / 2;
176  FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
177  FLA_Inv_scal( FLA_TWO, beta );
178 
179  // z21 = ( z21 - beta / tau * u21 ) / tau;
180  FLA_Scal( minus_inv_tau11, beta );
181  FLA_Axpy( beta, a21, z21 );
182  FLA_Scal( inv_tau11, z21 );
183 
184  // Restore first element of a21.
185  FLA_Copy( first_elem, a21_t );
186  }
187 
188  /*------------------------------------------------------------*/
189 
190  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
191  a10t, alpha11, /**/ a12t,
192  /* ************** */ /* ************************ */
193  &ABL, /**/ &ABR, A20, a21, /**/ A22,
194  FLA_TL );
195  FLA_Cont_with_3x3_to_2x2( &ZTL, /**/ &ZTR, Z00, z011, /**/ Z02,
196  z10t, zeta11, /**/ z12t,
197  /* ************** */ /* *********************** */
198  &ZBL, /**/ &ZBR, Z20, z21, /**/ Z22,
199  FLA_TL );
200  FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02,
201  t10t, tau11, /**/ t12t,
202  /* ************** */ /* ********************** */
203  &TBL, /**/ &TBR, T20, t21, /**/ T22,
204  FLA_TL );
205  FLA_Cont_with_3x1_to_2x1( &dT, d01,
206  delta11,
207  /* ** */ /* ******* */
208  &dB, d21, FLA_TOP );
209  FLA_Cont_with_3x1_to_2x1( &fT, f01,
210  phi11,
211  /* ** */ /* ***** */
212  &fB, f21, FLA_TOP );
213  }
214 
215  FLA_Obj_free( &inv_tau11 );
216  FLA_Obj_free( &minus_inv_tau11 );
217  FLA_Obj_free( &beta );
218  FLA_Obj_free( &first_elem );
219  FLA_Obj_free( &last_elem );
220  FLA_Obj_free( &d );
221  FLA_Obj_free( &f );
222 
223  return FLA_SUCCESS;
224 }
FLA_Error FLA_Gemvc(FLA_Trans transa, FLA_Conj conjx, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y)
Definition: FLA_Gemvc.c:13
FLA_Error FLA_Merge_2x1(FLA_Obj AT, FLA_Obj AB, FLA_Obj *A)
Definition: FLA_View.c:541

References FLA_Axpy(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Dotc(), FLA_Gemv(), FLA_Gemvc(), FLA_Hemv(), FLA_Househ2_UT(), FLA_Inv_scal(), FLA_Inv_scalc(), FLA_Merge_2x1(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Scal(), FLA_Set(), FLA_TWO, and FLA_ZERO.

Referenced by FLA_Tridiag_UT_l_unb_var3().

◆ FLA_Tridiag_UT_l_unb_var1()

FLA_Error FLA_Tridiag_UT_l_unb_var1 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  return FLA_Tridiag_UT_l_step_unb_var1( A, T );
16 }
FLA_Error FLA_Tridiag_UT_l_step_unb_var1(FLA_Obj A, FLA_Obj T)
Definition: FLA_Tridiag_UT_l_unb_var1.c:18

References FLA_Tridiag_UT_l_step_unb_var1().

Referenced by FLA_Tridiag_UT_l().

◆ FLA_Tridiag_UT_l_unb_var2()

FLA_Error FLA_Tridiag_UT_l_unb_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  return FLA_Tridiag_UT_l_step_unb_var2( A, T );
16 }
FLA_Error FLA_Tridiag_UT_l_step_unb_var2(FLA_Obj A, FLA_Obj T)
Definition: FLA_Tridiag_UT_l_unb_var2.c:18

References FLA_Tridiag_UT_l_step_unb_var2().

Referenced by FLA_Tridiag_UT_l().

◆ FLA_Tridiag_UT_l_unb_var3()

FLA_Error FLA_Tridiag_UT_l_unb_var3 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  FLA_Error r_val;
16  FLA_Obj Z;
17 
18  //FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &Y );
19  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &Z );
20 
21  r_val = FLA_Tridiag_UT_l_step_unb_var3( A, Z, T );
22 
23  //FLA_Obj_free( &Y );
24  FLA_Obj_free( &Z );
25 
26  return r_val;
27 }
FLA_Error FLA_Tridiag_UT_l_step_unb_var3(FLA_Obj A, FLA_Obj Z, FLA_Obj T)
Definition: FLA_Tridiag_UT_l_unb_var3.c:29

References FLA_Obj_create_conf_to(), FLA_Obj_free(), and FLA_Tridiag_UT_l_step_unb_var3().

Referenced by FLA_Tridiag_UT_l().