libflame  revision_anchor
Functions
FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1.c File Reference

(r)

Functions

FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1 (FLA_Obj alpha, FLA_Obj tau, FLA_Obj u, FLA_Obj y, FLA_Obj z, FLA_Obj v, FLA_Obj A, FLA_Obj up, FLA_Obj a, FLA_Obj w)
 
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_ops_var1 (int m_A, int n_A, float *buff_tau, float *buff_alpha, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z, float *buff_v, int inc_v, float *buff_A, int rs_A, int cs_A, float *buff_up, int inc_up, float *buff_a, int inc_a, float *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1 (int m_A, int n_A, double *buff_tau, double *buff_alpha, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z, double *buff_v, int inc_v, double *buff_A, int rs_A, int cs_A, double *buff_up, int inc_up, double *buff_a, int inc_a, double *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1 (int m_A, int n_A, scomplex *buff_tau, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z, scomplex *buff_v, int inc_v, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_up, int inc_up, scomplex *buff_a, int inc_a, scomplex *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1 (int m_A, int n_A, dcomplex *buff_tau, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z, dcomplex *buff_v, int inc_v, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_up, int inc_up, dcomplex *buff_a, int inc_a, dcomplex *buff_w, int inc_w)
 

Function Documentation

◆ FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1()

FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1 ( int  m_A,
int  n_A,
scomplex buff_tau,
scomplex buff_alpha,
scomplex buff_u,
int  inc_u,
scomplex buff_y,
int  inc_y,
scomplex buff_z,
int  inc_z,
scomplex buff_v,
int  inc_v,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_up,
int  inc_up,
scomplex buff_a,
int  inc_a,
scomplex buff_w,
int  inc_w 
)
436 {
437  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
438  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
439  scomplex minus_inv_tau;
440  scomplex conj_psi1;
441  scomplex conj_nu1;
442  scomplex conj_alpha1;
443  int i;
444 
445  bl1_csetv( m_A,
446  buff_0,
447  buff_w, inc_w );
448 
449  bl1_cdiv3( buff_m1, buff_tau, &minus_inv_tau );
450 
451  for ( i = 0; i < n_A; ++i )
452  {
453  scomplex* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
454  scomplex* u = buff_u;
455  scomplex* psi1 = buff_y + (i )*inc_y;
456  scomplex* nu1 = buff_v + (i )*inc_v;
457  scomplex* z = buff_z;
458  scomplex* up = buff_up;
459  scomplex* alpha1 = buff_a + (i )*inc_a;
460  scomplex* w = buff_w;
461  scomplex* alpha = buff_alpha;
462  scomplex temp1;
463  scomplex temp2;
464 
465  /*------------------------------------------------------------*/
466 
467  bl1_ccopyconj( psi1, &conj_psi1 );
468  bl1_cmult3( alpha, &conj_psi1, &temp1 );
469 
470  bl1_ccopyconj( nu1, &conj_nu1 );
471  bl1_cmult3( alpha, &conj_nu1, &temp2 );
472 
474  m_A,
475  &temp1,
476  u, inc_u,
477  a1, rs_A );
478  //F77_caxpy( &m_A,
479  // &temp1,
480  // u, &inc_u,
481  // a1, &rs_A );
482 
484  m_A,
485  &temp2,
486  z, inc_z,
487  a1, rs_A );
488  //F77_caxpy( &m_A,
489  // &temp2,
490  // z, &inc_z,
491  // a1, &rs_A );
492 
494  m_A,
495  a1, rs_A,
496  up, inc_up,
497  psi1 );
498 
499  bl1_ccopyconj( psi1, &conj_psi1 );
500  bl1_cmult4( &minus_inv_tau, &conj_psi1, alpha1, alpha1 );
501 
502  bl1_ccopyconj( alpha1, &conj_alpha1 );
503 
505  m_A,
506  &conj_alpha1,
507  a1, rs_A,
508  w, inc_w );
509  //F77_caxpy( &m_A,
510  // &conj_alpha1,
511  // a1, &rs_A,
512  // w, &inc_w );
513 
514  /*------------------------------------------------------------*/
515 
516  }
517 
518  return FLA_SUCCESS;
519 }
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
double *restrict psi1
Definition: bl1_axmyv2.c:139
int i
Definition: bl1_axmyv2.c:145
double temp2
Definition: bl1_axpyv2b.c:147
double temp1
Definition: bl1_axpyv2b.c:146
double *restrict alpha1
Definition: bl1_axpyv2bdotaxpy.c:198
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpyv.c:29
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition: bl1_dot.c:39
void bl1_csetv(int m, scomplex *sigma, scomplex *x, int incx)
Definition: bl1_setv.c:52
@ BLIS1_CONJUGATE
Definition: blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition: blis_type_defs.h:81
Definition: blis_type_defs.h:133

References alpha1, bl1_caxpyv(), bl1_cdot(), bl1_csetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_MINUS_ONE, FLA_ZERO, i, psi1, temp1, and temp2.

Referenced by FLA_Bidiag_UT_u_step_ofc_var3(), and FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1().

◆ FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1()

FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1 ( int  m_A,
int  n_A,
double *  buff_tau,
double *  buff_alpha,
double *  buff_u,
int  inc_u,
double *  buff_y,
int  inc_y,
double *  buff_z,
int  inc_z,
double *  buff_v,
int  inc_v,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_up,
int  inc_up,
double *  buff_a,
int  inc_a,
double *  buff_w,
int  inc_w 
)
279 {
280  double zero = bl1_d0();
281  double minus_one = bl1_dm1();
282  double* restrict u = buff_u;
283  double* restrict up = buff_up;
284  double* restrict w = buff_w;
285  double* restrict z = buff_z;
286  double* restrict alpha = buff_alpha;
287  double* restrict a1;
288  double* restrict a2;
289  double* restrict psi1;
290  double* restrict psi2;
291  double* restrict alpha1;
292  double* restrict alpha2;
293  double* restrict nu1;
294  double* restrict nu2;
295 
296  double minus_inv_tau;
297  double alpha_conj_psi1;
298  double alpha_conj_psi2;
299  double alpha_conj_nu1;
300  double alpha_conj_nu2;
301  int i;
302  int n_run = n_A / 2;
303  int n_left = n_A % 2;
304  int twocs_A = 2*cs_A;
305  int twoinc_y = 2*inc_y;
306  int twoinc_a = 2*inc_a;
307  int twoinc_v = 2*inc_v;
308 
309 
310  bl1_dsetv( m_A,
311  &zero,
312  buff_w, inc_w );
313 
314  bl1_ddiv3( &minus_one, buff_tau, &minus_inv_tau );
315 
316  a1 = buff_A;
317  a2 = buff_A + cs_A;
318  psi1 = buff_y;
319  psi2 = buff_y + inc_y;
320  alpha1 = buff_a;
321  alpha2 = buff_a + inc_a;
322  nu1 = buff_v;
323  nu2 = buff_v + inc_v;
324 
325  for ( i = 0; i < n_run; ++i )
326  {
327 
328  /*------------------------------------------------------------*/
329 
330  bl1_dmult3( alpha, psi1, &alpha_conj_psi1 );
331  bl1_dmult3( alpha, psi2, &alpha_conj_psi2 );
332 
333  bl1_dmult3( alpha, nu1, &alpha_conj_nu1 );
334  bl1_dmult3( alpha, nu2, &alpha_conj_nu2 );
335 
336 /*
337  Effective computation:
338  A = A + alpha * ( u * y' + z * v' );
339  y = A' * up;
340  a = a - conj(y) / tau;
341  w = A * conj(a);
342 */
343  bl1_daxpyv2b( m_A,
344  &alpha_conj_psi1,
345  &alpha_conj_nu1,
346  u, inc_u,
347  z, inc_z,
348  a1, rs_A );
349  bl1_daxpyv2b( m_A,
350  &alpha_conj_psi2,
351  &alpha_conj_nu2,
352  u, inc_u,
353  z, inc_z,
354  a2, rs_A );
355 
356 
358  m_A,
359  a1, rs_A,
360  a2, rs_A,
361  up, inc_up,
362  &zero,
363  psi1,
364  psi2 );
365 
366  bl1_dmult4( &minus_inv_tau, psi1, alpha1, alpha1 );
367  bl1_dmult4( &minus_inv_tau, psi2, alpha2, alpha2 );
368 
369  bl1_daxpyv2b( m_A,
370  alpha1,
371  alpha2,
372  a1, rs_A,
373  a2, rs_A,
374  w, inc_w );
375 
376  /*------------------------------------------------------------*/
377 
378  a1 += twocs_A;
379  a2 += twocs_A;
380  psi1 += twoinc_y;
381  psi2 += twoinc_y;
382  alpha1 += twoinc_a;
383  alpha2 += twoinc_a;
384  nu1 += twoinc_v;
385  nu2 += twoinc_v;
386  }
387 
388  if ( n_left == 1 )
389  {
390  double rho1;
391 
392  bl1_dmult3( alpha, psi1, &alpha_conj_psi1 );
393  bl1_dmult3( alpha, nu1, &alpha_conj_nu1 );
394 
395  bl1_daxpyv2b( m_A,
396  &alpha_conj_psi1,
397  &alpha_conj_nu1,
398  u, inc_u,
399  z, inc_z,
400  a1, rs_A );
401 
403  m_A,
404  a1, rs_A,
405  up, inc_up,
406  &rho1 );
407  bl1_dscals( &zero, psi1 );
408  bl1_dadd3( psi1, &rho1, psi1 );
409 
410  bl1_dmult4( &minus_inv_tau, psi1, alpha1, alpha1 );
411 
413  m_A,
414  alpha1,
415  a1, rs_A,
416  w, inc_w );
417  }
418 
419  return FLA_SUCCESS;
420 }
int n_left
Definition: bl1_axmyv2.c:149
int n_run
Definition: bl1_axmyv2.c:148
int twoinc_y
Definition: bl1_axpyv2b.c:154
void bl1_daxpyv2b(int n, double *alpha1, double *alpha2, double *x1, int inc_x1, double *x2, int inc_x2, double *y, int inc_y)
Definition: bl1_axpyv2b.c:31
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition: bl1_axpyv.c:21
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition: bl1_dot.c:26
double rho1
Definition: bl1_dotsv2.c:149
void bl1_ddotsv2(conj1_t conjxy, int n, double *x, int inc_x, double *y, int inc_y, double *z, int inc_z, double *beta, double *rho_xz, double *rho_yz)
Definition: bl1_dotsv2.c:35
double *restrict alpha2
Definition: bl1_dotv2axpyv2b.c:186
double bl1_dm1(void)
Definition: bl1_constants.c:182
void bl1_dsetv(int m, double *sigma, double *x, int incx)
Definition: bl1_setv.c:39
double bl1_d0(void)
Definition: bl1_constants.c:118

References alpha1, alpha2, bl1_d0(), bl1_daxpyv(), bl1_daxpyv2b(), bl1_ddot(), bl1_ddotsv2(), bl1_dm1(), bl1_dsetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, i, n_left, n_run, psi1, rho1, and twoinc_y.

Referenced by FLA_Bidiag_UT_u_step_ofd_var3(), and FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1().

◆ FLA_Fused_Gerc2_Ahx_Axpy_Ax_ops_var1()

FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_ops_var1 ( int  m_A,
int  n_A,
float *  buff_tau,
float *  buff_alpha,
float *  buff_u,
int  inc_u,
float *  buff_y,
int  inc_y,
float *  buff_z,
int  inc_z,
float *  buff_v,
int  inc_v,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_up,
int  inc_up,
float *  buff_a,
int  inc_a,
float *  buff_w,
int  inc_w 
)
182 {
183  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
184  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
185  float minus_inv_tau;
186  int i;
187 
188  bl1_ssetv( m_A,
189  buff_0,
190  buff_w, inc_w );
191 
192  minus_inv_tau = *buff_m1 / *buff_tau;
193 
194  for ( i = 0; i < n_A; ++i )
195  {
196  float* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
197  float* u = buff_u;
198  float* psi1 = buff_y + (i )*inc_y;
199  float* nu1 = buff_v + (i )*inc_v;
200  float* z = buff_z;
201  float* up = buff_up;
202  float* alpha1 = buff_a + (i )*inc_a;
203  float* w = buff_w;
204  float* alpha = buff_alpha;
205  float temp1;
206  float temp2;
207 
208  /*------------------------------------------------------------*/
209 
210  // bl1_smult3( alpha, psi1, &temp1 );
211  temp1 = *alpha * *psi1;
212 
213  // bl1_smult3( alpha, nu1, &temp2 );
214  temp2 = *alpha * *nu1;
215 
217  m_A,
218  &temp1,
219  u, inc_u,
220  a1, rs_A );
221  //F77_saxpy( &m_A,
222  // &temp1,
223  // u, &inc_u,
224  // a1, &rs_A );
225 
227  m_A,
228  &temp2,
229  z, inc_z,
230  a1, rs_A );
231  //F77_saxpy( &m_A,
232  // &temp2,
233  // z, &inc_z,
234  // a1, &rs_A );
235 
237  m_A,
238  a1, rs_A,
239  up, inc_up,
240  psi1 );
241  //*psi1 = F77_sdot( &m_A,
242  // a1, &rs_A,
243  // up, &inc_up );
244 
245  // bl1_smult4( &minus_inv_tau, psi1, alpha1, alpha1 );
246  *alpha1 = *alpha1 + minus_inv_tau * *psi1;
247 
249  m_A,
250  alpha1,
251  a1, rs_A,
252  w, inc_w );
253  //F77_saxpy( &m_A,
254  // alpha1,
255  // a1, &rs_A,
256  // w, &inc_w );
257 
258  /*------------------------------------------------------------*/
259 
260  }
261 
262  return FLA_SUCCESS;
263 }
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition: bl1_axpyv.c:13
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition: bl1_dot.c:13
void bl1_ssetv(int m, float *sigma, float *x, int incx)
Definition: bl1_setv.c:26

References alpha1, bl1_saxpyv(), bl1_sdot(), bl1_ssetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_MINUS_ONE, FLA_ZERO, i, psi1, temp1, and temp2.

Referenced by FLA_Bidiag_UT_u_step_ofs_var3(), and FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1().

◆ FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1()

FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1 ( FLA_Obj  alpha,
FLA_Obj  tau,
FLA_Obj  u,
FLA_Obj  y,
FLA_Obj  z,
FLA_Obj  v,
FLA_Obj  A,
FLA_Obj  up,
FLA_Obj  a,
FLA_Obj  w 
)
14 {
15 /*
16  Effective computation:
17  A = A + alpha * ( u * y' + z * v' );
18  y = A' * up;
19  a = a - conj(y) / tau;
20  w = A * conj(a);
21 */
22  FLA_Datatype datatype;
23  int m_A, n_A;
24  int rs_A, cs_A;
25  int inc_u, inc_y, inc_z, inc_v;
26  int inc_up, inc_a, inc_w;
27 
28  datatype = FLA_Obj_datatype( A );
29 
30  m_A = FLA_Obj_length( A );
31  n_A = FLA_Obj_width( A );
32 
33  rs_A = FLA_Obj_row_stride( A );
34  cs_A = FLA_Obj_col_stride( A );
35 
36  inc_u = FLA_Obj_vector_inc( u );
37  inc_y = FLA_Obj_vector_inc( y );
38  inc_z = FLA_Obj_vector_inc( z );
39  inc_v = FLA_Obj_vector_inc( v );
40 
41  inc_up = FLA_Obj_vector_inc( up );
42  inc_a = FLA_Obj_vector_inc( a );
43  inc_w = FLA_Obj_vector_inc( w );
44 
45 
46  switch ( datatype )
47  {
48  case FLA_FLOAT:
49  {
50  float* buff_A = FLA_FLOAT_PTR( A );
51  float* buff_u = FLA_FLOAT_PTR( u );
52  float* buff_y = FLA_FLOAT_PTR( y );
53  float* buff_z = FLA_FLOAT_PTR( z );
54  float* buff_v = FLA_FLOAT_PTR( v );
55  float* buff_up = FLA_FLOAT_PTR( up );
56  float* buff_a = FLA_FLOAT_PTR( a );
57  float* buff_w = FLA_FLOAT_PTR( w );
58  float* buff_tau = FLA_FLOAT_PTR( tau );
59  float* buff_alpha = FLA_FLOAT_PTR( alpha );
60 
62  n_A,
63  buff_tau,
64  buff_alpha,
65  buff_u, inc_u,
66  buff_y, inc_y,
67  buff_z, inc_z,
68  buff_v, inc_v,
69  buff_A, rs_A, cs_A,
70  buff_up, inc_up,
71  buff_a, inc_a,
72  buff_w, inc_w );
73 
74  break;
75  }
76 
77  case FLA_DOUBLE:
78  {
79  double* buff_A = FLA_DOUBLE_PTR( A );
80  double* buff_u = FLA_DOUBLE_PTR( u );
81  double* buff_y = FLA_DOUBLE_PTR( y );
82  double* buff_z = FLA_DOUBLE_PTR( z );
83  double* buff_v = FLA_DOUBLE_PTR( v );
84  double* buff_up = FLA_DOUBLE_PTR( up );
85  double* buff_a = FLA_DOUBLE_PTR( a );
86  double* buff_w = FLA_DOUBLE_PTR( w );
87  double* buff_tau = FLA_DOUBLE_PTR( tau );
88  double* buff_alpha = FLA_DOUBLE_PTR( alpha );
89 
91  n_A,
92  buff_tau,
93  buff_alpha,
94  buff_u, inc_u,
95  buff_y, inc_y,
96  buff_z, inc_z,
97  buff_v, inc_v,
98  buff_A, rs_A, cs_A,
99  buff_up, inc_up,
100  buff_a, inc_a,
101  buff_w, inc_w );
102 
103  break;
104  }
105 
106  case FLA_COMPLEX:
107  {
108  scomplex* buff_A = FLA_COMPLEX_PTR( A );
109  scomplex* buff_u = FLA_COMPLEX_PTR( u );
110  scomplex* buff_y = FLA_COMPLEX_PTR( y );
111  scomplex* buff_z = FLA_COMPLEX_PTR( z );
112  scomplex* buff_v = FLA_COMPLEX_PTR( v );
113  scomplex* buff_up = FLA_COMPLEX_PTR( up );
114  scomplex* buff_a = FLA_COMPLEX_PTR( a );
115  scomplex* buff_w = FLA_COMPLEX_PTR( w );
116  scomplex* buff_tau = FLA_COMPLEX_PTR( tau );
117  scomplex* buff_alpha = FLA_COMPLEX_PTR( alpha );
118 
120  n_A,
121  buff_tau,
122  buff_alpha,
123  buff_u, inc_u,
124  buff_y, inc_y,
125  buff_z, inc_z,
126  buff_v, inc_v,
127  buff_A, rs_A, cs_A,
128  buff_up, inc_up,
129  buff_a, inc_a,
130  buff_w, inc_w );
131 
132  break;
133  }
134 
135  case FLA_DOUBLE_COMPLEX:
136  {
137  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
138  dcomplex* buff_u = FLA_DOUBLE_COMPLEX_PTR( u );
139  dcomplex* buff_y = FLA_DOUBLE_COMPLEX_PTR( y );
140  dcomplex* buff_z = FLA_DOUBLE_COMPLEX_PTR( z );
141  dcomplex* buff_v = FLA_DOUBLE_COMPLEX_PTR( v );
142  dcomplex* buff_up = FLA_DOUBLE_COMPLEX_PTR( up );
143  dcomplex* buff_a = FLA_DOUBLE_COMPLEX_PTR( a );
144  dcomplex* buff_w = FLA_DOUBLE_COMPLEX_PTR( w );
145  dcomplex* buff_tau = FLA_DOUBLE_COMPLEX_PTR( tau );
146  dcomplex* buff_alpha = FLA_DOUBLE_COMPLEX_PTR( alpha );
147 
149  n_A,
150  buff_tau,
151  buff_alpha,
152  buff_u, inc_u,
153  buff_y, inc_y,
154  buff_z, inc_z,
155  buff_v, inc_v,
156  buff_A, rs_A, cs_A,
157  buff_up, inc_up,
158  buff_a, inc_a,
159  buff_w, inc_w );
160 
161  break;
162  }
163  }
164 
165  return FLA_SUCCESS;
166 }
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_ops_var1(int m_A, int n_A, float *buff_tau, float *buff_alpha, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z, float *buff_v, int inc_v, float *buff_A, int rs_A, int cs_A, float *buff_up, int inc_up, float *buff_a, int inc_a, float *buff_w, int inc_w)
Definition: FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1.c:170
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1(int m_A, int n_A, dcomplex *buff_tau, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z, dcomplex *buff_v, int inc_v, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_up, int inc_up, dcomplex *buff_a, int inc_a, dcomplex *buff_w, int inc_w)
Definition: FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1.c:523
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1(int m_A, int n_A, scomplex *buff_tau, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z, scomplex *buff_v, int inc_v, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_up, int inc_up, scomplex *buff_a, int inc_a, scomplex *buff_w, int inc_w)
Definition: FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1.c:424
FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1(int m_A, int n_A, double *buff_tau, double *buff_alpha, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z, double *buff_v, int inc_v, double *buff_A, int rs_A, int cs_A, double *buff_up, int inc_up, double *buff_a, int inc_a, double *buff_w, int inc_w)
Definition: FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1.c:267
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition: FLA_Query.c:145
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
int FLA_Datatype
Definition: FLA_type_defs.h:49
Definition: blis_type_defs.h:138

References FLA_Fused_Gerc2_Ahx_Axpy_Ax_opc_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_ops_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_Obj_width().

◆ FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1()

FLA_Error FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1 ( int  m_A,
int  n_A,
dcomplex buff_tau,
dcomplex buff_alpha,
dcomplex buff_u,
int  inc_u,
dcomplex buff_y,
int  inc_y,
dcomplex buff_z,
int  inc_z,
dcomplex buff_v,
int  inc_v,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_up,
int  inc_up,
dcomplex buff_a,
int  inc_a,
dcomplex buff_w,
int  inc_w 
)
535 {
536  dcomplex zero = bl1_z0();
537  dcomplex minus_one = bl1_zm1();
538  dcomplex* restrict u = buff_u;
539  dcomplex* restrict up = buff_up;
540  dcomplex* restrict w = buff_w;
541  dcomplex* restrict z = buff_z;
542  dcomplex* restrict alpha = buff_alpha;
543  dcomplex* restrict a1;
544  dcomplex* restrict a2;
545  dcomplex* restrict psi1;
546  dcomplex* restrict psi2;
547  dcomplex* restrict alpha1;
548  dcomplex* restrict alpha2;
549  dcomplex* restrict nu1;
550  dcomplex* restrict nu2;
551 
552  dcomplex minus_inv_tau;
553  dcomplex conj_psi1;
554  dcomplex conj_psi2;
555  dcomplex conj_nu1;
556  dcomplex conj_nu2;
557  dcomplex conj_alpha1;
558  dcomplex conj_alpha2;
559  dcomplex alpha_conj_psi1;
560  dcomplex alpha_conj_psi2;
561  dcomplex alpha_conj_nu1;
562  dcomplex alpha_conj_nu2;
563  int i;
564  int n_run = n_A / 2;
565  int n_left = n_A % 2;
566  int twocs_A = 2*cs_A;
567  int twoinc_y = 2*inc_y;
568  int twoinc_a = 2*inc_a;
569  int twoinc_v = 2*inc_v;
570 
571 
572  bl1_zsetv( m_A,
573  &zero,
574  buff_w, inc_w );
575 
576  bl1_zdiv3( &minus_one, buff_tau, &minus_inv_tau );
577 
578  a1 = buff_A;
579  a2 = buff_A + cs_A;
580  psi1 = buff_y;
581  psi2 = buff_y + inc_y;
582  alpha1 = buff_a;
583  alpha2 = buff_a + inc_a;
584  nu1 = buff_v;
585  nu2 = buff_v + inc_v;
586 
587  for ( i = 0; i < n_run; ++i )
588  {
589 
590  /*------------------------------------------------------------*/
591 
592  bl1_zcopyconj( psi1, &conj_psi1 );
593  bl1_zcopyconj( psi2, &conj_psi2 );
594  bl1_zmult3( alpha, &conj_psi1, &alpha_conj_psi1 );
595  bl1_zmult3( alpha, &conj_psi2, &alpha_conj_psi2 );
596 
597  bl1_zcopyconj( nu1, &conj_nu1 );
598  bl1_zcopyconj( nu2, &conj_nu2 );
599  bl1_zmult3( alpha, &conj_nu1, &alpha_conj_nu1 );
600  bl1_zmult3( alpha, &conj_nu2, &alpha_conj_nu2 );
601 
602  bl1_zaxpyv2b( m_A,
603  &alpha_conj_psi1,
604  &alpha_conj_nu1,
605  u, inc_u,
606  z, inc_z,
607  a1, rs_A );
608  bl1_zaxpyv2b( m_A,
609  &alpha_conj_psi2,
610  &alpha_conj_nu2,
611  u, inc_u,
612  z, inc_z,
613  a2, rs_A );
614 
615 
617  m_A,
618  a1, rs_A,
619  a2, rs_A,
620  up, inc_up,
621  &zero,
622  psi1,
623  psi2 );
624 
625  bl1_zcopyconj( psi1, &conj_psi1 );
626  bl1_zcopyconj( psi2, &conj_psi2 );
627  bl1_zmult4( &minus_inv_tau, &conj_psi1, alpha1, alpha1 );
628  bl1_zmult4( &minus_inv_tau, &conj_psi2, alpha2, alpha2 );
629  bl1_zcopyconj( alpha1, &conj_alpha1 );
630  bl1_zcopyconj( alpha2, &conj_alpha2 );
631 
632  bl1_zaxpyv2b( m_A,
633  &conj_alpha1,
634  &conj_alpha2,
635  a1, rs_A,
636  a2, rs_A,
637  w, inc_w );
638 
639  /*------------------------------------------------------------*/
640 
641  a1 += twocs_A;
642  a2 += twocs_A;
643  psi1 += twoinc_y;
644  psi2 += twoinc_y;
645  alpha1 += twoinc_a;
646  alpha2 += twoinc_a;
647  nu1 += twoinc_v;
648  nu2 += twoinc_v;
649  }
650 
651  if ( n_left == 1 )
652  {
653  dcomplex rho1;
654 
655  bl1_zcopyconj( psi1, &conj_psi1 );
656  bl1_zmult3( alpha, &conj_psi1, &alpha_conj_psi1 );
657  bl1_zcopyconj( nu1, &conj_nu1 );
658  bl1_zmult3( alpha, &conj_nu1, &alpha_conj_nu1 );
659 
660  bl1_zaxpyv2b( m_A,
661  &alpha_conj_psi1,
662  &alpha_conj_nu1,
663  u, inc_u,
664  z, inc_z,
665  a1, rs_A );
666 
668  m_A,
669  a1, rs_A,
670  up, inc_up,
671  &rho1 );
672  bl1_zscals( &zero, psi1 );
673  bl1_zadd3( psi1, &rho1, psi1 );
674 
675  bl1_zcopyconj( psi1, &conj_psi1 );
676  bl1_zmult4( &minus_inv_tau, &conj_psi1, alpha1, alpha1 );
677  bl1_zcopyconj( alpha1, &conj_alpha1 );
678 
680  m_A,
681  &conj_alpha1,
682  a1, rs_A,
683  w, inc_w );
684  }
685 
686  return FLA_SUCCESS;
687 }
void bl1_zaxpyv2b(int n, dcomplex *alpha1, dcomplex *alpha2, dcomplex *x1, int inc_x1, dcomplex *x2, int inc_x2, dcomplex *y, int inc_y)
Definition: bl1_axpyv2b.c:210
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_axpyv.c:60
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition: bl1_dot.c:65
bl1_zscals(beta, rho_yz)
void bl1_zdotsv2(conj1_t conjxy, int n, dcomplex *x, int inc_x, dcomplex *y, int inc_y, dcomplex *z, int inc_z, dcomplex *beta, dcomplex *rho_xz, dcomplex *rho_yz)
Definition: bl1_dotsv2.c:248
dcomplex bl1_z0(void)
Definition: bl1_constants.c:133
void bl1_zsetv(int m, dcomplex *sigma, dcomplex *x, int incx)
Definition: bl1_setv.c:66
dcomplex bl1_zm1(void)
Definition: bl1_constants.c:197

References alpha1, alpha2, bl1_z0(), bl1_zaxpyv(), bl1_zaxpyv2b(), bl1_zdot(), bl1_zdotsv2(), bl1_zm1(), bl1_zscals(), bl1_zsetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, i, n_left, n_run, psi1, rho1, and twoinc_y.

Referenced by FLA_Bidiag_UT_u_step_ofz_var3(), and FLA_Fused_Gerc2_Ahx_Axpy_Ax_opt_var1().