libflame  revision_anchor
Functions | Variables
bl1_axpyv3b.c File Reference

(r)

Functions

void bl1_saxpyv3b (int n, float *alpha1, float *alpha2, float *alpha3, float *x1, int inc_x1, float *x2, int inc_x2, float *x3, int inc_x3, float *y, int inc_y)
 
void bl1_daxpyv3b (int n, double *alpha1, double *alpha2, double *alpha3, double *x1, int inc_x1, double *x2, int inc_x2, double *x3, int inc_x3, double *y, int inc_y)
 
 for (i=0;i< n_run;++i)
 
 if (n_left==1)
 
void bl1_caxpyv3b (int n, scomplex *alpha1, scomplex *alpha2, scomplex *alpha3, scomplex *x1, int inc_x1, scomplex *x2, int inc_x2, scomplex *x3, int inc_x3, scomplex *y, int inc_y)
 
void bl1_zaxpyv3b (int n, dcomplex *alpha1, dcomplex *alpha2, dcomplex *alpha3, dcomplex *x1, int inc_x1, dcomplex *x2, int inc_x2, dcomplex *x3, int inc_x3, dcomplex *y, int inc_y)
 

Variables

double *restrict chi2
 
double *restrict psi1 = y
 
double alpha1_c = *alpha1
 
double alpha2_c = *alpha2
 
double temp1
 
double temp2
 
int i
 
int n_run = n / 2
 
int n_left = n % 2
 
int twoinc_x1 = 2*inc_x1
 
int twoinc_x2 = 2*inc_x2
 
int twoinc_y = 2*inc_y
 
 chi1 = x1
 

Function Documentation

◆ bl1_caxpyv3b()

void bl1_caxpyv3b ( int  n,
scomplex alpha1,
scomplex alpha2,
scomplex alpha3,
scomplex x1,
int  inc_x1,
scomplex x2,
int  inc_x2,
scomplex x3,
int  inc_x3,
scomplex y,
int  inc_y 
)
219 {
220  bl1_abort();
221 }
void bl1_abort(void)
Definition: bl1_abort.c:13

References bl1_abort().

◆ bl1_daxpyv3b()

void bl1_daxpyv3b ( int  n,
double *  alpha1,
double *  alpha2,
double *  alpha3,
double *  x1,
int  inc_x1,
double *  x2,
int  inc_x2,
double *  x3,
int  inc_x3,
double *  y,
int  inc_y 
)
43 {
44  double* restrict chi1;
45  double* restrict chi2;
46  double* restrict chi3;
47  double* restrict psi1;
48  int i;
49 
50  int n_pre;
51  int n_run;
52  int n_left;
53 
54  v2df_t a1v, a2v, a3v;
55  v2df_t x11v, x12v, x13v;
56  v2df_t x21v, x22v, x23v;
57  v2df_t y1v;
58  v2df_t y2v;
59 
60  if ( inc_x1 != 1 ||
61  inc_x2 != 1 ||
62  inc_x3 != 1 ||
63  inc_y != 1 ) bl1_abort();
64 
65  n_pre = 0;
66  if ( ( unsigned long ) y % 16 != 0 )
67  {
68  if ( ( unsigned long ) x1 % 16 == 0 ||
69  ( unsigned long ) x2 % 16 == 0 ||
70  ( unsigned long ) x3 % 16 == 0 ) bl1_abort();
71 
72  n_pre = 1;
73  }
74 
75  n_run = ( n - n_pre ) / 4;
76  n_left = ( n - n_pre ) % 4;
77 
78  chi1 = x1;
79  chi2 = x2;
80  chi3 = x3;
81  psi1 = y;
82 
83  if ( n_pre == 1 )
84  {
85  double alpha1_c = *alpha1;
86  double alpha2_c = *alpha2;
87  double alpha3_c = *alpha3;
88  double chi11_c = *chi1;
89  double chi12_c = *chi2;
90  double chi13_c = *chi3;
91 
92  *psi1 += alpha1_c * chi11_c + alpha2_c * chi12_c + alpha3_c * chi13_c;
93 
94  chi1 += inc_x1;
95  chi2 += inc_x2;
96  chi3 += inc_x3;
97  psi1 += inc_y;
98  }
99 
100  a1v.v = _mm_loaddup_pd( ( double* )alpha1 );
101  a2v.v = _mm_loaddup_pd( ( double* )alpha2 );
102  a3v.v = _mm_loaddup_pd( ( double* )alpha3 );
103 
104  for ( i = 0; i < n_run; ++i )
105  {
106  x11v.v = _mm_load_pd( ( double* )chi1 );
107  x12v.v = _mm_load_pd( ( double* )chi2 );
108  x13v.v = _mm_load_pd( ( double* )chi3 );
109  y1v.v = _mm_load_pd( ( double* )psi1 );
110 
111  y1v.v += a1v.v * x11v.v + a2v.v * x12v.v + a3v.v * x13v.v;
112 
113  _mm_store_pd( ( double* )psi1, y1v.v );
114 
115  x21v.v = _mm_load_pd( ( double* )(chi1 + 2) );
116  x22v.v = _mm_load_pd( ( double* )(chi2 + 2) );
117  x23v.v = _mm_load_pd( ( double* )(chi3 + 2) );
118  y2v.v = _mm_load_pd( ( double* )(psi1 + 2) );
119 
120  y2v.v += a1v.v * x21v.v + a2v.v * x22v.v + a3v.v * x23v.v;
121 
122  _mm_store_pd( ( double* )(psi1 + 2), y2v.v );
123 
124  chi1 += 4;
125  chi2 += 4;
126  chi3 += 4;
127  psi1 += 4;
128  }
129 
130  if ( n_left > 0 )
131  {
132  double alpha1_c = *alpha1;
133  double alpha2_c = *alpha2;
134  double alpha3_c = *alpha3;
135 
136  for ( i = 0; i < n_left; ++i )
137  {
138  double chi11_c = *chi1;
139  double chi12_c = *chi2;
140  double chi13_c = *chi3;
141 
142  *psi1 += alpha1_c * chi11_c + alpha2_c * chi12_c + alpha3_c * chi13_c;
143 
144  chi1 += inc_x1;
145  chi2 += inc_x2;
146  chi3 += inc_x3;
147  psi1 += inc_y;
148  }
149  }
150 }
int n_pre
Definition: bl1_axmyv2.c:147
double *restrict alpha1
Definition: bl1_axpyv2bdotaxpy.c:198
chi1
Definition: bl1_axpyv3b.c:168
int n_left
Definition: bl1_axpyv3b.c:163
double *restrict chi2
Definition: bl1_axpyv3b.c:152
double alpha1_c
Definition: bl1_axpyv3b.c:156
int n_run
Definition: bl1_axpyv3b.c:162
double *restrict psi1
Definition: bl1_axpyv3b.c:155
int i
Definition: bl1_axpyv3b.c:160
double alpha2_c
Definition: bl1_axpyv3b.c:157
x1
Definition: bl1_dotsv2.c:374
double *restrict alpha2
Definition: bl1_dotv2axpyv2b.c:186
Definition: blis_type_defs.h:117
__m128d v
Definition: blis_type_defs.h:118

References alpha1, alpha1_c, alpha2, alpha2_c, bl1_abort(), chi1, chi2, i, n_left, n_pre, n_run, psi1, v2df_t::v, and x1.

◆ bl1_saxpyv3b()

void bl1_saxpyv3b ( int  n,
float *  alpha1,
float *  alpha2,
float *  alpha3,
float *  x1,
int  inc_x1,
float *  x2,
int  inc_x2,
float *  x3,
int  inc_x3,
float *  y,
int  inc_y 
)
29 {
30  bl1_abort();
31 }

References bl1_abort().

◆ bl1_zaxpyv3b()

void bl1_zaxpyv3b ( int  n,
dcomplex alpha1,
dcomplex alpha2,
dcomplex alpha3,
dcomplex x1,
int  inc_x1,
dcomplex x2,
int  inc_x2,
dcomplex x3,
int  inc_x3,
dcomplex y,
int  inc_y 
)
232 {
233  bl1_abort();
234 }

References bl1_abort().

◆ for()

for ( )
176  {
177  double chi11_c = *chi1;
178  double chi21_c = *(chi1 + inc_x1);
179  double chi12_c = *chi2;
180  double chi22_c = *(chi2 + inc_x2);
181  double psi1_c = *psi1;
182  double psi2_c = *(psi1 + inc_y);
183 
184  // psi1 = psi1 + alpha1 * chi11 + alpha2 * chi12;
185  // psi2 = psi2 + alpha1 * chi21 + alpha2 * chi22;
186  temp1 = alpha1_c * chi11_c + alpha2_c * chi12_c;
187  temp2 = alpha1_c * chi21_c + alpha2_c * chi22_c;
188 
189  *psi1 = psi1_c + temp1;
190  *(psi1 + inc_y) = psi2_c + temp2;
191 
192  chi1 += twoinc_x1;
193  chi2 += twoinc_x2;
194  psi1 += twoinc_y;
195  }
double temp2
Definition: bl1_axpyv3b.c:159
int twoinc_x2
Definition: bl1_axpyv3b.c:165
int twoinc_y
Definition: bl1_axpyv3b.c:166
int twoinc_x1
Definition: bl1_axpyv3b.c:164
double temp1
Definition: bl1_axpyv3b.c:158

References alpha1_c, alpha2_c, chi1, chi2, psi1, temp1, temp2, twoinc_x1, twoinc_x2, and twoinc_y.

◆ if()

if ( n_left  = = 1)
198  {
199  double chi11_c = *chi1;
200  double chi12_c = *chi2;
201 
202  // psi1 = psi1 + alpha1 * chi11 + alpha2 * chi12;
203  temp1 = alpha1_c * chi11_c + alpha2_c * chi12_c;
204 
205  *psi1 = *psi1 + temp1;
206  }

References alpha1_c, alpha2_c, chi1, chi2, psi1, and temp1.

Variable Documentation

◆ alpha1_c

alpha1_c = *alpha1

Referenced by bl1_daxpyv3b(), for(), and if().

◆ alpha2_c

alpha2_c = *alpha2

Referenced by bl1_daxpyv3b(), for(), and if().

◆ chi1

chi1 = x1

Referenced by bl1_daxpyv3b(), for(), and if().

◆ chi2

chi2
Initial value:
{
double* restrict chi1

Referenced by bl1_daxpyv3b(), for(), and if().

◆ i

int i

Referenced by bl1_daxpyv3b().

◆ n_left

int n_left = n % 2

Referenced by bl1_daxpyv3b().

◆ n_run

int n_run = n / 2

Referenced by bl1_daxpyv3b().

◆ psi1

psi1 = y

Referenced by bl1_daxpyv3b(), for(), and if().

◆ temp1

double temp1

Referenced by for(), and if().

◆ temp2

double temp2

Referenced by for().

◆ twoinc_x1

int twoinc_x1 = 2*inc_x1

Referenced by for().

◆ twoinc_x2

int twoinc_x2 = 2*inc_x2

Referenced by for().

◆ twoinc_y

int twoinc_y = 2*inc_y

Referenced by for().