libflame  revision_anchor
Functions
bl1_axpysmt.c File Reference

(r)

Functions

void bl1_saxpysmt (trans1_t trans, int m, int n, float *alpha0, float *alpha1, float *a, int a_rs, int a_cs, float *beta, float *b, int b_rs, int b_cs)
 
void bl1_daxpysmt (trans1_t trans, int m, int n, double *alpha0, double *alpha1, double *a, int a_rs, int a_cs, double *beta, double *b, int b_rs, int b_cs)
 
void bl1_caxpysmt (trans1_t trans, int m, int n, scomplex *alpha0, scomplex *alpha1, scomplex *a, int a_rs, int a_cs, scomplex *beta, scomplex *b, int b_rs, int b_cs)
 
void bl1_zaxpysmt (trans1_t trans, int m, int n, dcomplex *alpha0, dcomplex *alpha1, dcomplex *a, int a_rs, int a_cs, dcomplex *beta, dcomplex *b, int b_rs, int b_cs)
 

Function Documentation

◆ bl1_caxpysmt()

void bl1_caxpysmt ( trans1_t  trans,
int  m,
int  n,
scomplex alpha0,
scomplex alpha1,
scomplex a,
int  a_rs,
int  a_cs,
scomplex beta,
scomplex b,
int  b_rs,
int  b_cs 
)
164 {
165  scomplex* a_begin;
166  scomplex* b_begin;
167  scomplex* a_temp;
168  scomplex alpha_prod;
169  int inca_temp;
170  int lda, inca;
171  int ldb, incb;
172  int n_iter;
173  int n_elem;
174  int j;
175 
176  // Return early if possible.
177  if ( bl1_zero_dim2( m, n ) ) return;
178 
179  alpha_prod.real = alpha0->real * alpha1->real - alpha0->imag * alpha1->imag;
180  alpha_prod.imag = alpha0->real * alpha1->imag + alpha0->imag * alpha1->real;
181 
182  // Handle cases where A and B are vectors to ensure that the underlying axpy
183  // gets invoked only once.
184  if ( bl1_is_vector( m, n ) )
185  {
186  // Initialize with values appropriate for vectors.
187  n_iter = 1;
188  n_elem = bl1_vector_dim( m, n );
189  lda = 1; // multiplied by zero when n_iter == 1; not needed.
190  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
191  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
192  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
193  }
194  else // matrix case
195  {
196  // Initialize with optimal values for column-major storage.
197  n_iter = n;
198  n_elem = m;
199  lda = a_cs;
200  inca = a_rs;
201  ldb = b_cs;
202  incb = b_rs;
203 
204  // Handle the transposition of A.
205  if ( bl1_does_trans( trans ) )
206  {
207  bl1_swap_ints( lda, inca );
208  }
209 
210  // An optimization: if B is row-major and if A is effectively row-major
211  // after a possible transposition, then let's access the matrices by rows
212  // instead of by columns for increased spatial locality.
213  if ( bl1_is_row_storage( b_rs, b_cs ) )
214  {
215  if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
216  ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
217  {
218  bl1_swap_ints( n_iter, n_elem );
219  bl1_swap_ints( lda, inca );
220  bl1_swap_ints( ldb, incb );
221  }
222  }
223  }
224 
225  if ( bl1_does_conj( trans ) )
226  {
227  conj1_t conj = bl1_proj_trans1_to_conj( trans );
228 
229  a_temp = bl1_callocv( n_elem );
230  inca_temp = 1;
231 
232  for ( j = 0; j < n_iter; j++ )
233  {
234  a_begin = a + j*lda;
235  b_begin = b + j*ldb;
236 
237  bl1_ccopyv( conj,
238  n_elem,
239  a_begin, inca,
240  a_temp, inca_temp );
241 
242  bl1_cscal( n_elem,
243  beta,
244  b_begin, incb );
245 
246  bl1_caxpy( n_elem,
247  &alpha_prod,
248  a_temp, inca_temp,
249  b_begin, incb );
250  }
251 
252  bl1_cfree( a_temp );
253  }
254  else // if ( !bl1_does_conj( trans ) )
255  {
256  for ( j = 0; j < n_iter; j++ )
257  {
258  a_begin = a + j*lda;
259  b_begin = b + j*ldb;
260 
261  bl1_cscal( n_elem,
262  beta,
263  b_begin, incb );
264 
265  bl1_caxpy( n_elem,
266  &alpha_prod,
267  a_begin, inca,
268  b_begin, incb );
269  }
270  }
271 }
void bl1_caxpy(int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpy.c:43
double *restrict alpha1
Definition: bl1_axpyv2bdotaxpy.c:198
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_copyv.c:49
void bl1_cscal(int n, scomplex *alpha, scomplex *x, int incx)
Definition: bl1_scal.c:52
int bl1_does_notrans(trans1_t trans)
Definition: bl1_does.c:19
int bl1_does_conj(trans1_t trans)
Definition: bl1_does.c:25
int bl1_is_row_storage(int rs, int cs)
Definition: bl1_is.c:95
int bl1_is_vector(int m, int n)
Definition: bl1_is.c:106
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
conj1_t bl1_proj_trans1_to_conj(trans1_t trans)
Definition: bl1_proj.c:13
int bl1_vector_dim(int m, int n)
Definition: bl1_vector.c:13
int bl1_vector_inc(trans1_t trans, int m, int n, int rs, int cs)
Definition: bl1_vector.c:19
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
int bl1_does_trans(trans1_t trans)
Definition: bl1_does.c:13
void bl1_cfree(scomplex *p)
Definition: bl1_free.c:40
scomplex * bl1_callocv(unsigned int n_elem)
Definition: bl1_allocv.c:40
@ BLIS1_NO_TRANSPOSE
Definition: blis_type_defs.h:54
conj1_t
Definition: blis_type_defs.h:80
Definition: blis_type_defs.h:133
float imag
Definition: blis_type_defs.h:134
float real
Definition: blis_type_defs.h:134

References alpha1, bl1_callocv(), bl1_caxpy(), bl1_ccopyv(), bl1_cfree(), bl1_cscal(), bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), BLIS1_NO_TRANSPOSE, scomplex::imag, and scomplex::real.

Referenced by FLA_Axpys_external().

◆ bl1_daxpysmt()

void bl1_daxpysmt ( trans1_t  trans,
int  m,
int  n,
double *  alpha0,
double *  alpha1,
double *  a,
int  a_rs,
int  a_cs,
double *  beta,
double *  b,
int  b_rs,
int  b_cs 
)
89 {
90  double* a_begin;
91  double* b_begin;
92  double alpha_prod;
93  int lda, inca;
94  int ldb, incb;
95  int n_iter;
96  int n_elem;
97  int j;
98 
99  // Return early if possible.
100  if ( bl1_zero_dim2( m, n ) ) return;
101 
102  alpha_prod = (*alpha0) * (*alpha1);
103 
104  // Handle cases where A and B are vectors to ensure that the underlying axpy
105  // gets invoked only once.
106  if ( bl1_is_vector( m, n ) )
107  {
108  // Initialize with values appropriate for vectors.
109  n_iter = 1;
110  n_elem = bl1_vector_dim( m, n );
111  lda = 1; // multiplied by zero when n_iter == 1; not needed.
112  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
113  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
114  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
115  }
116  else // matrix case
117  {
118  // Initialize with optimal values for column-major storage.
119  n_iter = n;
120  n_elem = m;
121  lda = a_cs;
122  inca = a_rs;
123  ldb = b_cs;
124  incb = b_rs;
125 
126  // Handle the transposition of A.
127  if ( bl1_does_trans( trans ) )
128  {
129  bl1_swap_ints( lda, inca );
130  }
131 
132  // An optimization: if B is row-major and if A is effectively row-major
133  // after a possible transposition, then let's access the matrices by rows
134  // instead of by columns for increased spatial locality.
135  if ( bl1_is_row_storage( b_rs, b_cs ) )
136  {
137  if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
138  ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
139  {
140  bl1_swap_ints( n_iter, n_elem );
141  bl1_swap_ints( lda, inca );
142  bl1_swap_ints( ldb, incb );
143  }
144  }
145  }
146 
147  for ( j = 0; j < n_iter; j++ )
148  {
149  a_begin = a + j*lda;
150  b_begin = b + j*ldb;
151 
152  bl1_dscal( n_elem,
153  beta,
154  b_begin, incb );
155 
156  bl1_daxpy( n_elem,
157  &alpha_prod,
158  a_begin, inca,
159  b_begin, incb );
160  }
161 }
void bl1_daxpy(int n, double *alpha, double *x, int incx, double *y, int incy)
Definition: bl1_axpy.c:28
void bl1_dscal(int n, double *alpha, double *x, int incx)
Definition: bl1_scal.c:26

References bl1_daxpy(), bl1_does_notrans(), bl1_does_trans(), bl1_dscal(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Axpys_external().

◆ bl1_saxpysmt()

void bl1_saxpysmt ( trans1_t  trans,
int  m,
int  n,
float *  alpha0,
float *  alpha1,
float *  a,
int  a_rs,
int  a_cs,
float *  beta,
float *  b,
int  b_rs,
int  b_cs 
)
14 {
15  float* a_begin;
16  float* b_begin;
17  float alpha_prod;
18  int lda, inca;
19  int ldb, incb;
20  int n_iter;
21  int n_elem;
22  int j;
23 
24  // Return early if possible.
25  if ( bl1_zero_dim2( m, n ) ) return;
26 
27  alpha_prod = (*alpha0) * (*alpha1);
28 
29  // Handle cases where A and B are vectors to ensure that the underlying axpy
30  // gets invoked only once.
31  if ( bl1_is_vector( m, n ) )
32  {
33  // Initialize with values appropriate for vectors.
34  n_iter = 1;
35  n_elem = bl1_vector_dim( m, n );
36  lda = 1; // multiplied by zero when n_iter == 1; not needed.
37  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
38  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
39  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
40  }
41  else // matrix case
42  {
43  // Initialize with optimal values for column-major storage.
44  n_iter = n;
45  n_elem = m;
46  lda = a_cs;
47  inca = a_rs;
48  ldb = b_cs;
49  incb = b_rs;
50 
51  // Handle the transposition of A.
52  if ( bl1_does_trans( trans ) )
53  {
54  bl1_swap_ints( lda, inca );
55  }
56 
57  // An optimization: if B is row-major and if A is effectively row-major
58  // after a possible transposition, then let's access the matrices by rows
59  // instead of by columns for increased spatial locality.
60  if ( bl1_is_row_storage( b_rs, b_cs ) )
61  {
62  if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
63  ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
64  {
65  bl1_swap_ints( n_iter, n_elem );
66  bl1_swap_ints( lda, inca );
67  bl1_swap_ints( ldb, incb );
68  }
69  }
70  }
71 
72  for ( j = 0; j < n_iter; j++ )
73  {
74  a_begin = a + j*lda;
75  b_begin = b + j*ldb;
76 
77  bl1_sscal( n_elem,
78  beta,
79  b_begin, incb );
80 
81  bl1_saxpy( n_elem,
82  &alpha_prod,
83  a_begin, inca,
84  b_begin, incb );
85  }
86 }
void bl1_saxpy(int n, float *alpha, float *x, int incx, float *y, int incy)
Definition: bl1_axpy.c:13
void bl1_sscal(int n, float *alpha, float *x, int incx)
Definition: bl1_scal.c:13

References bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_saxpy(), bl1_sscal(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by FLA_Axpys_external().

◆ bl1_zaxpysmt()

void bl1_zaxpysmt ( trans1_t  trans,
int  m,
int  n,
dcomplex alpha0,
dcomplex alpha1,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex beta,
dcomplex b,
int  b_rs,
int  b_cs 
)
274 {
275  dcomplex* a_begin;
276  dcomplex* b_begin;
277  dcomplex* a_temp;
278  dcomplex alpha_prod;
279  int inca_temp;
280  int lda, inca;
281  int ldb, incb;
282  int n_iter;
283  int n_elem;
284  int j;
285 
286  // Return early if possible.
287  if ( bl1_zero_dim2( m, n ) ) return;
288 
289  alpha_prod.real = alpha0->real * alpha1->real - alpha0->imag * alpha1->imag;
290  alpha_prod.imag = alpha0->real * alpha1->imag + alpha0->imag * alpha1->real;
291 
292  // Handle cases where A and B are vectors to ensure that the underlying axpy
293  // gets invoked only once.
294  if ( bl1_is_vector( m, n ) )
295  {
296  // Initialize with values appropriate for vectors.
297  n_iter = 1;
298  n_elem = bl1_vector_dim( m, n );
299  lda = 1; // multiplied by zero when n_iter == 1; not needed.
300  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
301  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
302  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
303  }
304  else // matrix case
305  {
306  // Initialize with optimal values for column-major storage.
307  n_iter = n;
308  n_elem = m;
309  lda = a_cs;
310  inca = a_rs;
311  ldb = b_cs;
312  incb = b_rs;
313 
314  // Handle the transposition of A.
315  if ( bl1_does_trans( trans ) )
316  {
317  bl1_swap_ints( lda, inca );
318  }
319 
320  // An optimization: if B is row-major and if A is effectively row-major
321  // after a possible transposition, then let's access the matrices by rows
322  // instead of by columns for increased spatial locality.
323  if ( bl1_is_row_storage( b_rs, b_cs ) )
324  {
325  if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
326  ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
327  {
328  bl1_swap_ints( n_iter, n_elem );
329  bl1_swap_ints( lda, inca );
330  bl1_swap_ints( ldb, incb );
331  }
332  }
333  }
334 
335  if ( bl1_does_conj( trans ) )
336  {
337  conj1_t conj = bl1_proj_trans1_to_conj( trans );
338 
339  a_temp = bl1_zallocv( n_elem );
340  inca_temp = 1;
341 
342  for ( j = 0; j < n_iter; j++ )
343  {
344  a_begin = a + j*lda;
345  b_begin = b + j*ldb;
346 
347  bl1_zcopyv( conj,
348  n_elem,
349  a_begin, inca,
350  a_temp, inca_temp );
351 
352  bl1_zscal( n_elem,
353  beta,
354  b_begin, incb );
355 
356  bl1_zaxpy( n_elem,
357  &alpha_prod,
358  a_temp, inca_temp,
359  b_begin, incb );
360  }
361 
362  bl1_zfree( a_temp );
363  }
364  else // if ( !bl1_does_conj( trans ) )
365  {
366  for ( j = 0; j < n_iter; j++ )
367  {
368  a_begin = a + j*lda;
369  b_begin = b + j*ldb;
370 
371  bl1_zscal( n_elem,
372  beta,
373  b_begin, incb );
374 
375  bl1_zaxpy( n_elem,
376  &alpha_prod,
377  a_begin, inca,
378  b_begin, incb );
379  }
380  }
381 }
void bl1_zaxpy(int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_axpy.c:58
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_copyv.c:63
void bl1_zscal(int n, dcomplex *alpha, dcomplex *x, int incx)
Definition: bl1_scal.c:78
dcomplex * bl1_zallocv(unsigned int n_elem)
Definition: bl1_allocv.c:45
void bl1_zfree(dcomplex *p)
Definition: bl1_free.c:45
Definition: blis_type_defs.h:138
double real
Definition: blis_type_defs.h:139
double imag
Definition: blis_type_defs.h:139

References alpha1, bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zallocv(), bl1_zaxpy(), bl1_zcopyv(), bl1_zero_dim2(), bl1_zfree(), bl1_zscal(), BLIS1_NO_TRANSPOSE, dcomplex::imag, and dcomplex::real.

Referenced by FLA_Axpys_external().