libflame  revision_anchor
Functions | Variables
bl1_axpyv2b.c File Reference

(r)

Functions

void bl1_saxpyv2b (int n, float *alpha1, float *alpha2, float *x1, int inc_x1, float *x2, int inc_x2, float *y, int inc_y)
 
void bl1_daxpyv2b (int n, double *alpha1, double *alpha2, double *x1, int inc_x1, double *x2, int inc_x2, double *y, int inc_y)
 
 for (i=0;i< n_run;++i)
 
 if (n_left==1)
 
void bl1_caxpyv2b (int n, scomplex *alpha1, scomplex *alpha2, scomplex *x1, int inc_x1, scomplex *x2, int inc_x2, scomplex *y, int inc_y)
 
void bl1_zaxpyv2b (int n, dcomplex *alpha1, dcomplex *alpha2, dcomplex *x1, int inc_x1, dcomplex *x2, int inc_x2, dcomplex *y, int inc_y)
 

Variables

double *restrict chi2
 
double *restrict psi1 = y
 
double alpha1_c = *alpha1
 
double alpha2_c = *alpha2
 
double temp1
 
double temp2
 
int i
 
int n_run = n / 2
 
int n_left = n % 2
 
int twoinc_x1 = 2*inc_x1
 
int twoinc_x2 = 2*inc_x2
 
int twoinc_y = 2*inc_y
 
 chi1 = x1
 
dcomplex temp
 

Function Documentation

◆ bl1_caxpyv2b()

void bl1_caxpyv2b ( int  n,
scomplex alpha1,
scomplex alpha2,
scomplex x1,
int  inc_x1,
scomplex x2,
int  inc_x2,
scomplex y,
int  inc_y 
)
205 {
206  bl1_abort();
207 }
void bl1_abort(void)
Definition: bl1_abort.c:13

References bl1_abort().

◆ bl1_daxpyv2b()

void bl1_daxpyv2b ( int  n,
double *  alpha1,
double *  alpha2,
double *  x1,
int  inc_x1,
double *  x2,
int  inc_x2,
double *  y,
int  inc_y 
)
38 {
39  double* restrict chi1;
40  double* restrict chi2;
41  double* restrict psi1;
42  int i;
43 
44  int n_pre;
45  int n_run;
46  int n_left;
47 
48  v2df_t a1v, a2v;
49  v2df_t x11v, x12v;
50  v2df_t x21v, x22v;
51  v2df_t y1v;
52  v2df_t y2v;
53 
54  if ( inc_x1 != 1 ||
55  inc_x2 != 1 ||
56  inc_y != 1 ) bl1_abort();
57 
58  n_pre = 0;
59  if ( ( unsigned long ) y % 16 != 0 )
60  {
61  if ( ( unsigned long ) x1 % 16 == 0 ||
62  ( unsigned long ) x2 % 16 == 0 ) bl1_abort();
63 
64  n_pre = 1;
65  }
66 
67  n_run = ( n - n_pre ) / 4;
68  n_left = ( n - n_pre ) % 4;
69 
70  chi1 = x1;
71  chi2 = x2;
72  psi1 = y;
73 
74  if ( n_pre == 1 )
75  {
76  double alpha1_c = *alpha1;
77  double alpha2_c = *alpha2;
78  double chi11_c = *chi1;
79  double chi12_c = *chi2;
80  double temp1;
81 
82  // psi1 = psi1 + alpha1 * chi11 + alpha2 * chi12;
83  temp1 = alpha1_c * chi11_c + alpha2_c * chi12_c;
84  *psi1 = *psi1 + temp1;
85 
86  chi1 += inc_x1;
87  chi2 += inc_x2;
88  psi1 += inc_y;
89  }
90 
91  a1v.v = _mm_loaddup_pd( ( double* )alpha1 );
92  a2v.v = _mm_loaddup_pd( ( double* )alpha2 );
93 
94  for ( i = 0; i < n_run; ++i )
95  {
96  x11v.v = _mm_load_pd( ( double* )chi1 );
97  x12v.v = _mm_load_pd( ( double* )chi2 );
98  y1v.v = _mm_load_pd( ( double* )psi1 );
99 
100  x21v.v = _mm_load_pd( ( double* )(chi1 + 2) );
101  x22v.v = _mm_load_pd( ( double* )(chi2 + 2) );
102  y2v.v = _mm_load_pd( ( double* )(psi1 + 2) );
103 
104  y1v.v += a1v.v * x11v.v + a2v.v * x12v.v;
105  y2v.v += a1v.v * x21v.v + a2v.v * x22v.v;
106 
107  _mm_store_pd( ( double* )psi1, y1v.v );
108  _mm_store_pd( ( double* )(psi1 + 2), y2v.v );
109 
110  //chi1 += step_x1;
111  //chi2 += step_x2;
112  //psi1 += step_y;
113  chi1 += 4;
114  chi2 += 4;
115  psi1 += 4;
116  }
117 
118  if ( n_left > 0 )
119  {
120  double alpha1_c = *alpha1;
121  double alpha2_c = *alpha2;
122 
123  for ( i = 0; i < n_left; ++i )
124  {
125  double chi11_c = *chi1;
126  double chi12_c = *chi2;
127  double psi1_c = *psi1;
128  double temp1;
129 
130  temp1 = alpha1_c * chi11_c + alpha2_c * chi12_c;
131  *psi1 = psi1_c + temp1;
132 
133  chi1 += inc_x1;
134  chi2 += inc_x2;
135  psi1 += inc_y;
136  }
137  }
138 }
int n_pre
Definition: bl1_axmyv2.c:147
chi1
Definition: bl1_axpyv2b.c:156
int n_left
Definition: bl1_axpyv2b.c:151
int n_run
Definition: bl1_axpyv2b.c:150
double *restrict psi1
Definition: bl1_axpyv2b.c:143
int i
Definition: bl1_axpyv2b.c:148
double temp1
Definition: bl1_axpyv2b.c:146
double *restrict chi2
Definition: bl1_axpyv2b.c:140
double alpha1_c
Definition: bl1_axpyv2b.c:144
double alpha2_c
Definition: bl1_axpyv2b.c:145
double *restrict alpha1
Definition: bl1_axpyv2bdotaxpy.c:198
x1
Definition: bl1_dotsv2.c:374
double *restrict alpha2
Definition: bl1_dotv2axpyv2b.c:186
Definition: blis_type_defs.h:117
__m128d v
Definition: blis_type_defs.h:118

References alpha1, alpha1_c, alpha2, alpha2_c, bl1_abort(), chi1, chi2, i, n_left, n_pre, n_run, psi1, temp1, v2df_t::v, and x1.

Referenced by FLA_Fused_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opd_var1(), FLA_Fused_Gerc2_opd_var1(), and FLA_Fused_UZhu_ZUhu_opd_var1().

◆ bl1_saxpyv2b()

void bl1_saxpyv2b ( int  n,
float *  alpha1,
float *  alpha2,
float *  x1,
int  inc_x1,
float *  x2,
int  inc_x2,
float *  y,
int  inc_y 
)
26 {
27  bl1_abort();
28 }

References bl1_abort().

◆ bl1_zaxpyv2b()

void bl1_zaxpyv2b ( int  n,
dcomplex alpha1,
dcomplex alpha2,
dcomplex x1,
int  inc_x1,
dcomplex x2,
int  inc_x2,
dcomplex y,
int  inc_y 
)
217 {
218  dcomplex* restrict chi1;
219  dcomplex* restrict chi2;
220  dcomplex* restrict psi1;
221  int i;
222  v2df_t alpha1v, alpha1rv;
223  v2df_t alpha2v, alpha2rv;
224  v2df_t x11v, x12v;
225  v2df_t t1v, y1v;
226  v2df_t acbc, bdad;
227 
228  chi1 = x1;
229  chi2 = x2;
230  psi1 = y;
231 
232  alpha1v.v = _mm_load_pd( ( double* )alpha1 );
233  alpha2v.v = _mm_load_pd( ( double* )alpha2 );
234  alpha1rv.v = _mm_shuffle_pd( alpha1v.v, alpha1v.v, _MM_SHUFFLE2 (0,1) );
235  alpha2rv.v = _mm_shuffle_pd( alpha2v.v, alpha2v.v, _MM_SHUFFLE2 (0,1) );
236 
237  if ( inc_x1 == 1 &&
238  inc_x2 == 1 &&
239  inc_y == 1 )
240  {
241  for ( i = 0; i < n; ++i )
242  {
243  x11v.v = _mm_load_pd( ( double* )chi1 );
244  x12v.v = _mm_shuffle_pd( x11v.v, x11v.v, _MM_SHUFFLE2 (1,1) );
245  x11v.v = _mm_shuffle_pd( x11v.v, x11v.v, _MM_SHUFFLE2 (0,0) );
246  acbc.v = alpha1v.v * x11v.v;
247  bdad.v = alpha1rv.v * x12v.v;
248  t1v.v = _mm_addsub_pd( acbc.v, bdad.v );
249 
250  x11v.v = _mm_load_pd( ( double* )chi2 );
251  x12v.v = _mm_shuffle_pd( x11v.v, x11v.v, _MM_SHUFFLE2 (1,1) );
252  x11v.v = _mm_shuffle_pd( x11v.v, x11v.v, _MM_SHUFFLE2 (0,0) );
253  acbc.v = alpha2v.v * x11v.v;
254  bdad.v = alpha2rv.v * x12v.v;
255  t1v.v = t1v.v + _mm_addsub_pd( acbc.v, bdad.v );
256 
257  y1v.v = _mm_load_pd( ( double* )psi1 );
258  y1v.v = y1v.v + t1v.v;
259  _mm_store_pd( ( double* )psi1, y1v.v );
260 
261  chi1 += 1;
262  chi2 += 1;
263  psi1 += 1;
264  }
265  }
266  else
267  {
268  for ( i = 0; i < n; ++i )
269  {
270  x11v.v = _mm_load_pd( ( double* )chi1 );
271  x12v.v = _mm_shuffle_pd( x11v.v, x11v.v, _MM_SHUFFLE2 (1,1) );
272  x11v.v = _mm_shuffle_pd( x11v.v, x11v.v, _MM_SHUFFLE2 (0,0) );
273  acbc.v = alpha1v.v * x11v.v;
274  bdad.v = alpha1rv.v * x12v.v;
275  t1v.v = _mm_addsub_pd( acbc.v, bdad.v );
276 
277  x11v.v = _mm_load_pd( ( double* )chi2 );
278  x12v.v = _mm_shuffle_pd( x11v.v, x11v.v, _MM_SHUFFLE2 (1,1) );
279  x11v.v = _mm_shuffle_pd( x11v.v, x11v.v, _MM_SHUFFLE2 (0,0) );
280  acbc.v = alpha2v.v * x11v.v;
281  bdad.v = alpha2rv.v * x12v.v;
282  t1v.v = t1v.v + _mm_addsub_pd( acbc.v, bdad.v );
283 
284  y1v.v = _mm_load_pd( ( double* )psi1 );
285  y1v.v = y1v.v + t1v.v;
286  _mm_store_pd( ( double* )psi1, y1v.v );
287 
288  chi1 += inc_x1;
289  chi2 += inc_x2;
290  psi1 += inc_y;
291  }
292  }
293 }
Definition: blis_type_defs.h:138

References alpha1, alpha2, chi1, chi2, i, psi1, v2df_t::v, and x1.

Referenced by FLA_Fused_Ahx_Axpy_Ax_opz_var1(), FLA_Fused_Gerc2_Ahx_Ax_opz_var1(), FLA_Fused_Gerc2_Ahx_Axpy_Ax_opz_var1(), FLA_Fused_Gerc2_opz_var1(), FLA_Fused_Her2_Ax_l_opz_var1(), and FLA_Fused_Uhu_Yhu_Zhu_opz_var1().

◆ for()

for ( )
164  {
165  double chi11_c = *chi1;
166  double chi21_c = *(chi1 + inc_x1);
167  double chi12_c = *chi2;
168  double chi22_c = *(chi2 + inc_x2);
169  double psi1_c = *psi1;
170  double psi2_c = *(psi1 + inc_y);
171 
172  // psi1 = psi1 + alpha1 * chi11 + alpha2 * chi12;
173  // psi2 = psi2 + alpha1 * chi21 + alpha2 * chi22;
174  temp1 = alpha1_c * chi11_c + alpha2_c * chi12_c;
175  temp2 = alpha1_c * chi21_c + alpha2_c * chi22_c;
176 
177  *psi1 = psi1_c + temp1;
178  *(psi1 + inc_y) = psi2_c + temp2;
179 
180  chi1 += twoinc_x1;
181  chi2 += twoinc_x2;
182  psi1 += twoinc_y;
183  }
double temp2
Definition: bl1_axpyv2b.c:147
int twoinc_x2
Definition: bl1_axpyv2b.c:153
int twoinc_y
Definition: bl1_axpyv2b.c:154
int twoinc_x1
Definition: bl1_axpyv2b.c:152

References alpha1_c, alpha2_c, chi1, chi2, psi1, temp1, temp2, twoinc_x1, twoinc_x2, and twoinc_y.

◆ if()

if ( n_left  = = 1)
186  {
187  double chi11_c = *chi1;
188  double chi12_c = *chi2;
189 
190  // psi1 = psi1 + alpha1 * chi11 + alpha2 * chi12;
191  temp1 = alpha1_c * chi11_c + alpha2_c * chi12_c;
192 
193  *psi1 = *psi1 + temp1;
194  }

References alpha1_c, alpha2_c, chi1, chi2, psi1, and temp1.

Variable Documentation

◆ alpha1_c

dcomplex alpha1_c = *alpha1

◆ alpha2_c

dcomplex alpha2_c = *alpha2

◆ chi1

chi1 = x1

Referenced by bl1_daxpyv2b(), bl1_zaxpyv2b(), for(), and if().

◆ chi2

dcomplex *restrict chi2
Initial value:
{
double* restrict chi1

Referenced by bl1_daxpyv2b(), bl1_zaxpyv2b(), FLA_Fused_Ahx_Ax_opd_var1(), FLA_Fused_Ahx_Ax_opz_var1(), for(), and if().

◆ i

int i

Referenced by bl1_daxpyv2b(), and bl1_zaxpyv2b().

◆ n_left

int n_left = n % 2

Referenced by bl1_daxpyv2b().

◆ n_run

int n_run = n / 2

Referenced by bl1_daxpyv2b().

◆ psi1

psi1 = y

Referenced by bl1_daxpyv2b(), bl1_zaxpyv2b(), for(), and if().

◆ temp

dcomplex temp

◆ temp1

double temp1

◆ temp2

double temp2

◆ twoinc_x1

int twoinc_x1 = 2*inc_x1

Referenced by for().

◆ twoinc_x2

int twoinc_x2 = 2*inc_x2

Referenced by for().

◆ twoinc_y

int twoinc_y = 2*inc_y