|
void | bl1_saxpyv2bdotaxpy (int n, float *beta, float *u, int inc_u, float *gamma, float *z, int inc_z, float *a, int inc_a, float *x, int inc_x, float *kappa, float *rho, float *w, int inc_w) |
|
void | bl1_daxpyv2bdotaxpy (int n, double *beta, double *u, int inc_u, double *gamma, double *z, int inc_z, double *a, int inc_a, double *x, int inc_x, double *kappa, double *rho, double *w, int inc_w) |
|
| if (n_pre==1) |
|
| for (i=0;i< n_run;++i) |
|
| if (n_left > 0) |
|
void | bl1_caxpyv2bdotaxpy (int n, scomplex *beta, scomplex *u, int inc_u, scomplex *gamma, scomplex *z, int inc_z, scomplex *a, int inc_a, scomplex *x, int inc_x, scomplex *kappa, scomplex *rho, scomplex *w, int inc_w) |
|
void | bl1_zaxpyv2bdotaxpy (int n, dcomplex *beta, dcomplex *u, int inc_u, dcomplex *gamma, dcomplex *z, int inc_z, dcomplex *a, int inc_a, dcomplex *x, int inc_x, dcomplex *kappa, dcomplex *rho, dcomplex *w, int inc_w) |
|
void bl1_daxpyv2bdotaxpy |
( |
int |
n, |
|
|
double * |
beta, |
|
|
double * |
u, |
|
|
int |
inc_u, |
|
|
double * |
gamma, |
|
|
double * |
z, |
|
|
int |
inc_z, |
|
|
double * |
a, |
|
|
int |
inc_a, |
|
|
double * |
x, |
|
|
int |
inc_x, |
|
|
double * |
kappa, |
|
|
double * |
rho, |
|
|
double * |
w, |
|
|
int |
inc_w |
|
) |
| |
49 double* restrict
zeta1;
51 double* restrict
chi1;
67 if ( (
unsigned long ) a % 16 != 0 )
69 if ( (
unsigned long ) u % 16 == 0 ||
70 (
unsigned long ) z % 16 == 0 ||
71 (
unsigned long ) x % 16 == 0 ||
72 (
unsigned long ) w % 16 == 0 )
bl1_abort();
96 double zeta1_c = *
zeta1;
98 double chi1_c = *
chi1;
115 b1v.
v = _mm_loaddup_pd( (
double* )beta );
116 g1v.
v = _mm_loaddup_pd( (
double* )gamma );
117 k1v.
v = _mm_loaddup_pd( (
double* )kappa );
119 rhov.
v = _mm_setzero_pd();
123 u1v.
v = _mm_load_pd( (
double* )
upsilon1 );
124 z1v.
v = _mm_load_pd( (
double* )
zeta1 );
125 a1v.
v = _mm_load_pd( (
double* )
alpha1 );
127 a1v.
v += b1v.
v * u1v.
v + g1v.
v * z1v.
v;
129 u2v.
v = _mm_load_pd( (
double* )(
upsilon1 + 2) );
130 z2v.
v = _mm_load_pd( (
double* )(
zeta1 + 2) );
131 a2v.
v = _mm_load_pd( (
double* )(
alpha1 + 2) );
133 a2v.
v += b1v.
v * u2v.
v + g1v.
v * z2v.
v;
135 x1v.
v = _mm_load_pd( (
double* )
chi1 );
136 x2v.
v = _mm_load_pd( (
double* )(
chi1 + 2) );
138 w1v.
v = _mm_load_pd( (
double* )
omega1 );
139 w2v.
v = _mm_load_pd( (
double* )(
omega1 + 2) );
141 rhov.
v += a1v.
v * x1v.
v;
142 rhov.
v += a2v.
v * x2v.
v;
144 w1v.
v += k1v.
v * a1v.
v;
145 w2v.
v += k1v.
v * a2v.
v;
147 _mm_store_pd( (
double* )
alpha1, a1v.
v );
148 _mm_store_pd( (
double* )(
alpha1 + 2), a2v.
v );
150 _mm_store_pd( (
double* )
omega1, w1v.
v );
151 _mm_store_pd( (
double* )(
omega1 + 2), w2v.
v );
161 rho_c += rhov.
d[0] + rhov.
d[1];
172 double zeta1_c = *
zeta1;
174 double chi1_c = *
chi1;
175 double omega1_c = *
omega1;
double alpha1_c
Definition: bl1_axpyv2b.c:144
int n_left
Definition: bl1_axpyv2bdotaxpy.c:209
double *restrict chi1
Definition: bl1_axpyv2bdotaxpy.c:199
upsilon1
Definition: bl1_axpyv2bdotaxpy.c:225
double beta_c
Definition: bl1_axpyv2bdotaxpy.c:201
double rho_c
Definition: bl1_axpyv2bdotaxpy.c:204
double *restrict alpha1
Definition: bl1_axpyv2bdotaxpy.c:198
double kappa_c
Definition: bl1_axpyv2bdotaxpy.c:203
* rho
Definition: bl1_axpyv2bdotaxpy.c:322
double *restrict zeta1
Definition: bl1_axpyv2bdotaxpy.c:195
int i
Definition: bl1_axpyv2bdotaxpy.c:205
int n_pre
Definition: bl1_axpyv2bdotaxpy.c:207
double gamma_c
Definition: bl1_axpyv2bdotaxpy.c:202
double *restrict omega1
Definition: bl1_axpyv2bdotaxpy.c:200
int n_run
Definition: bl1_axpyv2bdotaxpy.c:208
Definition: blis_type_defs.h:117
double d[2]
Definition: blis_type_defs.h:119
__m128d v
Definition: blis_type_defs.h:118
References alpha1, alpha1_c, beta_c, bl1_abort(), chi1, v2df_t::d, gamma_c, i, kappa_c, n_left, n_pre, n_run, omega1, rho, rho_c, upsilon1, v2df_t::v, and zeta1.
Referenced by FLA_Fused_Gerc2_Ahx_Ax_opd_var1(), and FLA_Fused_Her2_Ax_l_opd_var1().