libflame  revision_anchor
Functions
FLA_Fused_Ahx_Ax_opt_var1.c File Reference

(r)

Functions

FLA_Error FLA_Fused_Ahx_Ax_opt_var1 (FLA_Obj A, FLA_Obj x, FLA_Obj v, FLA_Obj w)
 
FLA_Error FLA_Fused_Ahx_Ax_ops_var1 (int m_A, int n_A, float *buff_A, int rs_A, int cs_A, float *buff_x, int inc_x, float *buff_v, int inc_v, float *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Ahx_Ax_opd_var1 (int m_A, int n_A, double *buff_A, int rs_A, int cs_A, double *buff_x, int inc_x, double *buff_v, int inc_v, double *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Ahx_Ax_opc_var1 (int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_x, int inc_x, scomplex *buff_v, int inc_v, scomplex *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Ahx_Ax_opz_var1 (int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_x, int inc_x, dcomplex *buff_v, int inc_v, dcomplex *buff_w, int inc_w)
 

Function Documentation

◆ FLA_Fused_Ahx_Ax_opc_var1()

FLA_Error FLA_Fused_Ahx_Ax_opc_var1 ( int  m_A,
int  n_A,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_x,
int  inc_x,
scomplex buff_v,
int  inc_v,
scomplex buff_w,
int  inc_w 
)
262 {
263  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
264  int i;
265 
266  bl1_csetv( m_A,
267  buff_0,
268  buff_w, inc_w );
269 
270  for ( i = 0; i < n_A; ++i )
271  {
272  scomplex* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
273  scomplex* nu1 = buff_v + (i )*inc_v;
274  scomplex* x = buff_x;
275  scomplex* chi1 = buff_x + (i )*inc_x;
276  scomplex* w = buff_w;
277 
278  /*------------------------------------------------------------*/
279 
281  m_A,
282  a1, rs_A,
283  x, inc_x,
284  nu1 );
285 
287  m_A,
288  chi1,
289  a1, rs_A,
290  w, inc_w );
291 /*
292  F77_caxpy( &m_A,
293  chi1,
294  a1, &rs_A,
295  w, &inc_w );
296 */
297 
298  /*------------------------------------------------------------*/
299 
300  }
301 
302  return FLA_SUCCESS;
303 }
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
int i
Definition: bl1_axmyv2.c:145
chi1
Definition: bl1_axmyv2.c:366
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpyv.c:29
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition: bl1_dot.c:39
void bl1_csetv(int m, scomplex *sigma, scomplex *x, int incx)
Definition: bl1_setv.c:52
@ BLIS1_CONJUGATE
Definition: blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition: blis_type_defs.h:81
Definition: blis_type_defs.h:133

References bl1_caxpyv(), bl1_cdot(), bl1_csetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, chi1, FLA_ZERO, and i.

Referenced by FLA_Fused_Ahx_Ax_opt_var1(), FLA_Hess_UT_step_ofc_var2(), FLA_Hess_UT_step_ofc_var3(), and FLA_Hess_UT_step_ofc_var4().

◆ FLA_Fused_Ahx_Ax_opd_var1()

FLA_Error FLA_Fused_Ahx_Ax_opd_var1 ( int  m_A,
int  n_A,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_x,
int  inc_x,
double *  buff_v,
int  inc_v,
double *  buff_w,
int  inc_w 
)
179 {
180  double zero = bl1_d0();
181  int i;
182 
183  double* restrict w = buff_w;
184  double* restrict x = buff_x;
185 
186  double* restrict a1;
187  double* restrict a2;
188  double* restrict nu1;
189  double* restrict nu2;
190  double* restrict chi1;
191  double* restrict chi2;
192 
193  int n_run = n_A / 2;
194  int n_left = n_A % 2;
195  int step_a1 = 2*cs_A;
196  int step_nu1 = 2*inc_v;
197  int step_chi1 = 2*inc_x;
198 
199  bl1_dsetv( m_A,
200  &zero,
201  buff_w, inc_w );
202 
203  a1 = buff_A;
204  a2 = buff_A + cs_A;
205  nu1 = buff_v;
206  nu2 = buff_v + inc_v;
207  chi1 = buff_x;
208  chi2 = buff_x + inc_x;
209 
210  for ( i = 0; i < n_run; ++i )
211  {
212  /*------------------------------------------------------------*/
213 
214  bl1_ddotv2axpyv2b( m_A,
215  a1, rs_A,
216  a2, rs_A,
217  x, inc_x,
218  chi1,
219  chi2,
220  nu1,
221  nu2,
222  w, inc_w );
223 
224  /*------------------------------------------------------------*/
225 
226  a1 += step_a1;
227  a2 += step_a1;
228  nu1 += step_nu1;
229  nu2 += step_nu1;
230  chi1 += step_chi1;
231  chi2 += step_chi1;
232  }
233 
234  if ( n_left > 0 )
235  {
236  for ( i = 0; i < n_left; ++i )
237  {
238  bl1_ddotaxpy( m_A,
239  a1, rs_A,
240  x, inc_x,
241  chi1,
242  nu1,
243  w, inc_w );
244 
245  a1 += rs_A;
246  nu1 += inc_v;
247  chi1 += inc_x;
248  }
249  }
250 
251  return FLA_SUCCESS;
252 }
int n_left
Definition: bl1_axmyv2.c:149
int n_run
Definition: bl1_axmyv2.c:148
double *restrict chi2
Definition: bl1_axpyv2b.c:140
void bl1_ddotaxpy(int n, double *a, int inc_a, double *x, int inc_x, double *kappa, double *rho, double *w, int inc_w)
Definition: bl1_dotaxpy.c:31
void bl1_ddotv2axpyv2b(int n, double *a1, int inc_a1, double *a2, int inc_a2, double *x, int inc_x, double *kappa1, double *kappa2, double *rho1, double *rho2, double *w, int inc_w)
Definition: bl1_dotv2axpyv2b.c:36
void bl1_dsetv(int m, double *sigma, double *x, int incx)
Definition: bl1_setv.c:39
double bl1_d0(void)
Definition: bl1_constants.c:118

References bl1_d0(), bl1_ddotaxpy(), bl1_ddotv2axpyv2b(), bl1_dsetv(), chi1, chi2, i, n_left, and n_run.

Referenced by FLA_Fused_Ahx_Ax_opt_var1(), FLA_Hess_UT_step_ofd_var2(), FLA_Hess_UT_step_ofd_var3(), and FLA_Hess_UT_step_ofd_var4().

◆ FLA_Fused_Ahx_Ax_ops_var1()

FLA_Error FLA_Fused_Ahx_Ax_ops_var1 ( int  m_A,
int  n_A,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_x,
int  inc_x,
float *  buff_v,
int  inc_v,
float *  buff_w,
int  inc_w 
)
122 {
123  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
124  int i;
125 
126  bl1_ssetv( m_A,
127  buff_0,
128  buff_w, inc_w );
129 
130  for ( i = 0; i < n_A; ++i )
131  {
132  float* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
133  float* nu1 = buff_v + (i )*inc_v;
134  float* x = buff_x;
135  float* chi1 = buff_x + (i )*inc_x;
136  float* w = buff_w;
137 
138  /*------------------------------------------------------------*/
139 
141  m_A,
142  a1, rs_A,
143  x, inc_x,
144  nu1 );
145 /*
146  *nu1 = F77_sdot( &m_A,
147  a1, &rs_A,
148  x, &inc_x );
149 */
150 
152  m_A,
153  chi1,
154  a1, rs_A,
155  w, inc_w );
156 /*
157  F77_saxpy( &m_A,
158  chi1,
159  a1, &rs_A,
160  w, &inc_w );
161 */
162 
163  /*------------------------------------------------------------*/
164 
165  }
166 
167 
168  return FLA_SUCCESS;
169 }
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition: bl1_axpyv.c:13
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition: bl1_dot.c:13
void bl1_ssetv(int m, float *sigma, float *x, int incx)
Definition: bl1_setv.c:26

References bl1_saxpyv(), bl1_sdot(), bl1_ssetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, chi1, FLA_ZERO, and i.

Referenced by FLA_Fused_Ahx_Ax_opt_var1(), FLA_Hess_UT_step_ofs_var2(), FLA_Hess_UT_step_ofs_var3(), and FLA_Hess_UT_step_ofs_var4().

◆ FLA_Fused_Ahx_Ax_opt_var1()

FLA_Error FLA_Fused_Ahx_Ax_opt_var1 ( FLA_Obj  A,
FLA_Obj  x,
FLA_Obj  v,
FLA_Obj  w 
)
14 {
15 /*
16  Effective computation:
17  v = A' * x;
18  w = A * x;
19 */
20  FLA_Datatype datatype;
21  int m_A, n_A;
22  int rs_A, cs_A;
23  int inc_x, inc_v, inc_w;
24 
25  datatype = FLA_Obj_datatype( A );
26 
27  m_A = FLA_Obj_length( A );
28  n_A = FLA_Obj_width( A );
29 
30  rs_A = FLA_Obj_row_stride( A );
31  cs_A = FLA_Obj_col_stride( A );
32 
33  inc_x = FLA_Obj_vector_inc( x );
34 
35  inc_v = FLA_Obj_vector_inc( v );
36 
37  inc_w = FLA_Obj_vector_inc( w );
38 
39 
40  switch ( datatype )
41  {
42  case FLA_FLOAT:
43  {
44  float* buff_A = FLA_FLOAT_PTR( A );
45  float* buff_x = FLA_FLOAT_PTR( x );
46  float* buff_v = FLA_FLOAT_PTR( v );
47  float* buff_w = FLA_FLOAT_PTR( w );
48 
50  n_A,
51  buff_A, rs_A, cs_A,
52  buff_x, inc_x,
53  buff_v, inc_v,
54  buff_w, inc_w );
55 
56  break;
57  }
58 
59  case FLA_DOUBLE:
60  {
61  double* buff_A = FLA_DOUBLE_PTR( A );
62  double* buff_x = FLA_DOUBLE_PTR( x );
63  double* buff_v = FLA_DOUBLE_PTR( v );
64  double* buff_w = FLA_DOUBLE_PTR( w );
65 
67  n_A,
68  buff_A, rs_A, cs_A,
69  buff_x, inc_x,
70  buff_v, inc_v,
71  buff_w, inc_w );
72 
73  break;
74  }
75 
76  case FLA_COMPLEX:
77  {
78  scomplex* buff_A = FLA_COMPLEX_PTR( A );
79  scomplex* buff_x = FLA_COMPLEX_PTR( x );
80  scomplex* buff_v = FLA_COMPLEX_PTR( v );
81  scomplex* buff_w = FLA_COMPLEX_PTR( w );
82 
84  n_A,
85  buff_A, rs_A, cs_A,
86  buff_x, inc_x,
87  buff_v, inc_v,
88  buff_w, inc_w );
89 
90  break;
91  }
92 
93  case FLA_DOUBLE_COMPLEX:
94  {
95  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
96  dcomplex* buff_x = FLA_DOUBLE_COMPLEX_PTR( x );
97  dcomplex* buff_v = FLA_DOUBLE_COMPLEX_PTR( v );
98  dcomplex* buff_w = FLA_DOUBLE_COMPLEX_PTR( w );
99 
101  n_A,
102  buff_A, rs_A, cs_A,
103  buff_x, inc_x,
104  buff_v, inc_v,
105  buff_w, inc_w );
106 
107  break;
108  }
109  }
110 
111  return FLA_SUCCESS;
112 }
FLA_Error FLA_Fused_Ahx_Ax_opc_var1(int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_x, int inc_x, scomplex *buff_v, int inc_v, scomplex *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Ax_opt_var1.c:256
FLA_Error FLA_Fused_Ahx_Ax_opd_var1(int m_A, int n_A, double *buff_A, int rs_A, int cs_A, double *buff_x, int inc_x, double *buff_v, int inc_v, double *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Ax_opt_var1.c:173
FLA_Error FLA_Fused_Ahx_Ax_opz_var1(int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_x, int inc_x, dcomplex *buff_v, int inc_v, dcomplex *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Ax_opt_var1.c:307
FLA_Error FLA_Fused_Ahx_Ax_ops_var1(int m_A, int n_A, float *buff_A, int rs_A, int cs_A, float *buff_x, int inc_x, float *buff_v, int inc_v, float *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Ax_opt_var1.c:116
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition: FLA_Query.c:145
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
int FLA_Datatype
Definition: FLA_type_defs.h:49
Definition: blis_type_defs.h:138

References FLA_Fused_Ahx_Ax_opc_var1(), FLA_Fused_Ahx_Ax_opd_var1(), FLA_Fused_Ahx_Ax_ops_var1(), FLA_Fused_Ahx_Ax_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_Obj_width().

◆ FLA_Fused_Ahx_Ax_opz_var1()

FLA_Error FLA_Fused_Ahx_Ax_opz_var1 ( int  m_A,
int  n_A,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_x,
int  inc_x,
dcomplex buff_v,
int  inc_v,
dcomplex buff_w,
int  inc_w 
)
313 {
314  dcomplex zero = bl1_z0();
315  int i;
316 
317  dcomplex* restrict w = buff_w;
318  dcomplex* restrict x = buff_x;
319 
320  dcomplex* restrict a1;
321  dcomplex* restrict a2;
322  dcomplex* restrict nu1;
323  dcomplex* restrict nu2;
324  dcomplex* restrict chi1;
325  dcomplex* restrict chi2;
326 
327  int n_run = n_A / 2;
328  int n_left = n_A % 2;
329  int step_a1 = 2*cs_A;
330  int step_nu1 = 2*inc_v;
331  int step_chi1 = 2*inc_x;
332 
333  bl1_zsetv( m_A,
334  &zero,
335  buff_w, inc_w );
336 
337  a1 = buff_A;
338  a2 = buff_A + cs_A;
339  nu1 = buff_v;
340  nu2 = buff_v + inc_v;
341  chi1 = buff_x;
342  chi2 = buff_x + inc_x;
343 
344  for ( i = 0; i < n_run; ++i )
345  {
346  /*------------------------------------------------------------*/
347 
348 /*
349  bl1_zdotaxpy( m_A,
350  a1, rs_A,
351  x, inc_x,
352  chi1,
353  nu1,
354  w, inc_w );
355 */
356 
357  bl1_zdotv2axpyv2b( m_A,
358  a1, rs_A,
359  a2, rs_A,
360  x, inc_x,
361  chi1,
362  chi2,
363  nu1,
364  nu2,
365  w, inc_w );
366 
367  /*------------------------------------------------------------*/
368 
369  a1 += step_a1;
370  a2 += step_a1;
371  nu1 += step_nu1;
372  nu2 += step_nu1;
373  chi1 += step_chi1;
374  chi2 += step_chi1;
375  }
376 
377  if ( n_left > 0 )
378  {
379  for ( i = 0; i < n_left; ++i )
380  {
381  bl1_zdotaxpy( m_A,
382  a1, rs_A,
383  x, inc_x,
384  chi1,
385  nu1,
386  w, inc_w );
387 
388  a1 += rs_A;
389  nu1 += inc_v;
390  chi1 += inc_x;
391  }
392  }
393 
394  return FLA_SUCCESS;
395 }
void bl1_zdotaxpy(int n, dcomplex *a, int inc_a, dcomplex *x, int inc_x, dcomplex *kappa, dcomplex *rho, dcomplex *w, int inc_w)
Definition: bl1_dotaxpy.c:258
void bl1_zdotv2axpyv2b(int n, dcomplex *a1, int inc_a1, dcomplex *a2, int inc_a2, dcomplex *x, int inc_x, dcomplex *kappa1, dcomplex *kappa2, dcomplex *rho1, dcomplex *rho2, dcomplex *w, int inc_w)
Definition: bl1_dotv2axpyv2b.c:331
dcomplex bl1_z0(void)
Definition: bl1_constants.c:133
void bl1_zsetv(int m, dcomplex *sigma, dcomplex *x, int incx)
Definition: bl1_setv.c:66

References bl1_z0(), bl1_zdotaxpy(), bl1_zdotv2axpyv2b(), bl1_zsetv(), chi1, chi2, i, n_left, and n_run.

Referenced by FLA_Fused_Ahx_Ax_opt_var1(), FLA_Hess_UT_step_ofz_var2(), FLA_Hess_UT_step_ofz_var3(), and FLA_Hess_UT_step_ofz_var4().