libflame  revision_anchor
Functions | Variables
bl1_dotaxpy.c File Reference

(r)

Functions

void bl1_sdotaxpy (int n, float *a, int inc_a, float *x, int inc_x, float *kappa, float *rho, float *w, int inc_w)
 
void bl1_ddotaxpy (int n, double *a, int inc_a, double *x, int inc_x, double *kappa, double *rho, double *w, int inc_w)
 
 if (inc_a !=1||inc_x !=1||inc_w !=1)
 
 for (i=0;i< n_run;++i)
 
 if (n_left > 0)
 
void bl1_cdotaxpy (int n, scomplex *a, int inc_a, scomplex *x, int inc_x, scomplex *kappa, scomplex *rho, scomplex *w, int inc_w)
 
void bl1_zdotaxpy (int n, dcomplex *a, int inc_a, dcomplex *x, int inc_x, dcomplex *kappa, dcomplex *rho, dcomplex *w, int inc_w)
 

Variables

double *restrict chi1
 
double *restrict omega1 = w
 
double kappa_c = *kappa
 
double rho_c
 
int i
 
int n_pre
 
int n_run
 
int n_left
 
rho = rho_c
 
 alpha1 = a
 
rho_c real = 0.0
 
rho_c imag = 0.0
 

Function Documentation

◆ bl1_cdotaxpy()

void bl1_cdotaxpy ( int  n,
scomplex a,
int  inc_a,
scomplex x,
int  inc_x,
scomplex kappa,
scomplex rho,
scomplex w,
int  inc_w 
)
253 {
254  bl1_abort();
255 }
void bl1_abort(void)
Definition: bl1_abort.c:13

References bl1_abort().

◆ bl1_ddotaxpy()

void bl1_ddotaxpy ( int  n,
double *  a,
int  inc_a,
double *  x,
int  inc_x,
double *  kappa,
double *  rho,
double *  w,
int  inc_w 
)
38 {
39  double* restrict alpha1;
40  double* restrict chi1;
41  double* restrict omega1;
42  double rho_c;
43  int i;
44 
45  int n_pre;
46  int n_run;
47  int n_left;
48 
49  v2df_t k1v, rho1v;
50  v2df_t a1v, x1v, w1v;
51  v2df_t a2v, x2v, w2v;
52 
53  if ( inc_a != 1 ||
54  inc_x != 1 ||
55  inc_w != 1 ) bl1_abort();
56 
57  n_pre = 0;
58  if ( ( unsigned long ) a % 16 != 0 )
59  {
60  if ( ( unsigned long ) x % 16 == 0 ||
61  ( unsigned long ) w % 16 == 0 ) bl1_abort();
62 
63  n_pre = 1;
64  }
65 
66  n_run = ( n - n_pre ) / 4;
67  n_left = ( n - n_pre ) % 4;
68 
69  alpha1 = a;
70  chi1 = x;
71  omega1 = w;
72 
73  rho_c = 0.0;
74 
75  if ( n_pre == 1 )
76  {
77  double kappa_c = *kappa;
78  double alpha1_c = *alpha1;
79  double chi1_c = *chi1;
80  double omega1_c = *omega1;
81 
82  rho_c += alpha1_c * chi1_c;
83  omega1_c += kappa_c * alpha1_c;
84 
85  *omega1 = omega1_c;
86 
87  alpha1 += inc_a;
88  chi1 += inc_x;
89  omega1 += inc_w;
90  }
91 
92  rho1v.v = _mm_setzero_pd();
93 
94  k1v.v = _mm_loaddup_pd( ( double* )kappa );
95 
96  for ( i = 0; i < n_run; ++i )
97  {
98  a1v.v = _mm_load_pd( ( double* )alpha1 );
99  x1v.v = _mm_load_pd( ( double* )chi1 );
100  w1v.v = _mm_load_pd( ( double* )omega1 );
101 
102  a2v.v = _mm_load_pd( ( double* )(alpha1 + 2) );
103  x2v.v = _mm_load_pd( ( double* )(chi1 + 2) );
104  w2v.v = _mm_load_pd( ( double* )(omega1 + 2) );
105 
106  rho1v.v += a1v.v * x1v.v;
107  w1v.v += k1v.v * a1v.v;
108 
109  _mm_store_pd( ( double* )omega1, w1v.v );
110 
111  rho1v.v += a2v.v * x2v.v;
112  w2v.v += k1v.v * a2v.v;
113 
114  _mm_store_pd( ( double* )(omega1 + 2), w2v.v );
115 
116  alpha1 += 4;
117  chi1 += 4;
118  omega1 += 4;
119  }
120 
121  if ( n_left > 0 )
122  {
123  for ( i = 0; i < n_left; ++i )
124  {
125  double kappa_c = *kappa;
126  double alpha1_c = *alpha1;
127  double chi1_c = *chi1;
128  double omega1_c = *omega1;
129 
130  rho_c += alpha1_c * chi1_c;
131  omega1_c += kappa_c * alpha1_c;
132 
133  *omega1 = omega1_c;
134 
135  alpha1 += inc_a;
136  chi1 += inc_x;
137  omega1 += inc_w;
138  }
139  }
140 
141  rho_c += rho1v.d[0] + rho1v.d[1];
142 
143  *rho = rho_c;
144 }
double alpha1_c
Definition: bl1_axpyv2b.c:144
double *restrict omega1
Definition: bl1_dotaxpy.c:149
double *restrict chi1
Definition: bl1_dotaxpy.c:146
alpha1
Definition: bl1_dotaxpy.c:338
int n_left
Definition: bl1_dotaxpy.c:156
int n_pre
Definition: bl1_dotaxpy.c:154
double rho_c
Definition: bl1_dotaxpy.c:151
double kappa_c
Definition: bl1_dotaxpy.c:150
int n_run
Definition: bl1_dotaxpy.c:155
* rho
Definition: bl1_dotaxpy.c:242
int i
Definition: bl1_dotaxpy.c:152
Definition: blis_type_defs.h:117
double d[2]
Definition: blis_type_defs.h:119
__m128d v
Definition: blis_type_defs.h:118

References alpha1, alpha1_c, bl1_abort(), chi1, v2df_t::d, i, kappa_c, n_left, n_pre, n_run, omega1, rho, rho_c, and v2df_t::v.

Referenced by FLA_Fused_Ahx_Ax_opd_var1(), and FLA_Fused_UZhu_ZUhu_opd_var1().

◆ bl1_sdotaxpy()

void bl1_sdotaxpy ( int  n,
float *  a,
int  inc_a,
float *  x,
int  inc_x,
float *  kappa,
float *  rho,
float *  w,
int  inc_w 
)
26 {
27  bl1_abort();
28 }

References bl1_abort().

◆ bl1_zdotaxpy()

void bl1_zdotaxpy ( int  n,
dcomplex a,
int  inc_a,
dcomplex x,
int  inc_x,
dcomplex kappa,
dcomplex rho,
dcomplex w,
int  inc_w 
)
265 {
266  dcomplex* restrict alpha1;
267  dcomplex* restrict chi1;
268  dcomplex* restrict omega1;
269  int i;
270 
271  v2df_t kappa1v, kappa1rv;
272  v2df_t rho1v;
273  v2df_t a11v, a12v;
274  v2df_t x1v, x1rv;
275  v2df_t w1v;
276  v2df_t acbc, bdad;
277  v2df_t adac, bcbd;
278 
279  alpha1 = a;
280  chi1 = x;
281  omega1 = w;
282 
283  if ( inc_a != 1 ||
284  inc_x != 1 ||
285  inc_w != 1 ) bl1_abort();
286 
287  kappa1v.v = _mm_load_pd( ( double* )kappa );
288  kappa1rv.v = _mm_shuffle_pd( kappa1v.v, kappa1v.v, _MM_SHUFFLE2 (0,1) );
289 
290  rho1v.v = _mm_setzero_pd();
291 
292  for ( i = 0; i < n; ++i )
293  {
294  //alpha_c = *alpha1;
295  a11v.v = _mm_loaddup_pd( ( double* )&(alpha1->real) );
296  a12v.v = _mm_loaddup_pd( ( double* )&(alpha1->imag) );
297 
298  //rho_c.real += alpha1_c.real * chi1_c.real - -alpha1_c.imag * chi1_c.imag;
299  //rho_c.imag += alpha1_c.real * chi1_c.imag + -alpha1_c.imag * chi1_c.real;
300  x1v.v = _mm_load_pd( ( double* )chi1 );
301  x1rv.v = _mm_shuffle_pd( x1v.v, x1v.v, _MM_SHUFFLE2 (0,1) );
302  adac.v = a11v.v * x1rv.v;
303  bcbd.v = a12v.v * x1v.v;
304  rho1v.v = rho1v.v + _mm_addsub_pd( adac.v, bcbd.v );
305 
306  //omega_c = *omega1;
307  w1v.v = _mm_load_pd( ( double* )omega1 );
308 
309  //omega1_c.real += kappa_c.real * alpha1_c.real - kappa_c.imag * alpha1_c.imag;
310  //omega1_c.imag += kappa_c.real * alpha1_c.imag + kappa_c.imag * alpha1_c.real;
311  acbc.v = kappa1v.v * a11v.v;
312  bdad.v = kappa1rv.v * a12v.v;
313  w1v.v += _mm_addsub_pd( acbc.v, bdad.v );
314 
315  //*omega1 = omega1_c;
316  _mm_store_pd( ( double* )omega1, w1v.v );
317 
318  alpha1 += 1;
319  chi1 += 1;
320  omega1 += 1;
321  }
322 
323  rho1v.v = _mm_shuffle_pd( rho1v.v, rho1v.v, _MM_SHUFFLE2 (0,1) );
324 
325  //rho->real = rho_c.real;
326  //rho->imag = rho_c.imag;
327  _mm_store_pd( ( double* )rho, rho1v.v );
328 }
Definition: blis_type_defs.h:138

References alpha1, bl1_abort(), chi1, i, omega1, rho, and v2df_t::v.

Referenced by FLA_Fused_Ahx_Ax_opz_var1(), FLA_Fused_Gerc2_Ahx_Ax_opz_var1(), FLA_Fused_Her2_Ax_l_opz_var1(), and FLA_Fused_UZhu_ZUhu_opz_var1().

◆ for()

for ( )
199  {
200  double alpha1_c = *alpha1;
201  double alpha2_c = *(alpha1 + 1);
202  double chi1_c = *chi1;
203  double chi2_c = *(chi1 + 1);
204  double omega1_c = *omega1;
205  double omega2_c = *(omega1 + 1);
206 
207  // rho += conj(alpha1) * chi1;
208  rho_c += alpha1_c * chi1_c;
209  rho_c += alpha2_c * chi2_c;
210 
211  // omega1 += kappa * alpha1;
212  omega1_c += kappa_c * alpha1_c;
213  omega2_c += kappa_c * alpha2_c;
214 
215  *omega1 = omega1_c;
216  *(omega1 + 1) = omega2_c;
217 
218  alpha1 += 2*inc_a;
219  chi1 += 2*inc_x;
220  omega1 += 2*inc_w;
221  }
double alpha2_c
Definition: bl1_axpyv2b.c:145

References alpha1, alpha1_c, alpha2_c, chi1, kappa_c, omega1, and rho_c.

◆ if() [1/2]

if ( inc_a !  = 1 || inc_x != 1 || inc_w != 1)
183  {
184  double alpha1_c = *alpha1;
185  double chi1_c = *chi1;
186  double omega1_c = *omega1;
187 
188  rho_c += alpha1_c * chi1_c;
189  omega1_c += kappa_c * alpha1_c;
190 
191  *omega1 = omega1_c;
192 
193  alpha1 += inc_a;
194  chi1 += inc_x;
195  omega1 += inc_w;
196  }

◆ if() [2/2]

if ( n_left  ,
 
)
224  {
225  for ( i = 0; i < n_left; ++i )
226  {
227  double alpha1_c = *alpha1;
228  double chi1_c = *chi1;
229  double omega1_c = *omega1;
230 
231  rho_c += alpha1_c * chi1_c;
232  omega1_c += kappa_c * alpha1_c;
233 
234  *omega1 = omega1_c;
235 
236  alpha1 += inc_a;
237  chi1 += inc_x;
238  omega1 += inc_w;
239  }
240  }

References alpha1, alpha1_c, chi1, i, kappa_c, n_left, omega1, and rho_c.

Variable Documentation

◆ alpha1

alpha1 = a

Referenced by bl1_ddotaxpy(), bl1_zdotaxpy(), for(), and if().

◆ chi1

chi1
Initial value:
{
double* restrict alpha1

Referenced by bl1_ddotaxpy(), bl1_zdotaxpy(), for(), and if().

◆ i

int i

Referenced by bl1_ddotaxpy(), bl1_zdotaxpy(), and if().

◆ imag

rho imag = 0.0

◆ kappa_c

kappa_c = *kappa

Referenced by bl1_ddotaxpy(), for(), and if().

◆ n_left

int n_left

Referenced by bl1_ddotaxpy(), and if().

◆ n_pre

int n_pre

Referenced by bl1_ddotaxpy().

◆ n_run

int n_run

Referenced by bl1_ddotaxpy().

◆ omega1

omega1 = w

Referenced by bl1_ddotaxpy(), bl1_zdotaxpy(), for(), and if().

◆ real

rho real = 0.0

◆ rho

* rho = rho_c

Referenced by bl1_ddotaxpy(), and bl1_zdotaxpy().

◆ rho_c

dcomplex rho_c

Referenced by bl1_ddotaxpy(), for(), and if().