libflame  revision_anchor
Functions
bl1_axpymt.c File Reference

(r)

Functions

void bl1_saxpymt (trans1_t trans, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
 
void bl1_daxpymt (trans1_t trans, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
 
void bl1_caxpymt (trans1_t trans, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
 
void bl1_zaxpymt (trans1_t trans, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
 

Function Documentation

◆ bl1_caxpymt()

void bl1_caxpymt ( trans1_t  trans,
int  m,
int  n,
scomplex alpha,
scomplex a,
int  a_rs,
int  a_cs,
scomplex b,
int  b_rs,
int  b_cs 
)
150 {
151  scomplex* a_begin;
152  scomplex* b_begin;
153  scomplex* a_temp;
154  int inca_temp;
155  int lda, inca;
156  int ldb, incb;
157  int n_iter;
158  int n_elem;
159  int j;
160 
161  // Return early if possible.
162  if ( bl1_zero_dim2( m, n ) ) return;
163 
164  // Handle cases where A and B are vectors to ensure that the underlying axpy
165  // gets invoked only once.
166  if ( bl1_is_vector( m, n ) )
167  {
168  // Initialize with values appropriate for vectors.
169  n_iter = 1;
170  n_elem = bl1_vector_dim( m, n );
171  lda = 1; // multiplied by zero when n_iter == 1; not needed.
172  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
173  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
174  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
175  }
176  else // matrix case
177  {
178  // Initialize with optimal values for column-major storage.
179  n_iter = n;
180  n_elem = m;
181  lda = a_cs;
182  inca = a_rs;
183  ldb = b_cs;
184  incb = b_rs;
185 
186  // Handle the transposition of A.
187  if ( bl1_does_trans( trans ) )
188  {
189  bl1_swap_ints( lda, inca );
190  }
191 
192  // An optimization: if B is row-major and if A is effectively row-major
193  // after a possible transposition, then let's access the matrices by rows
194  // instead of by columns for increased spatial locality.
195  if ( bl1_is_row_storage( b_rs, b_cs ) )
196  {
197  if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
198  ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
199  {
200  bl1_swap_ints( n_iter, n_elem );
201  bl1_swap_ints( lda, inca );
202  bl1_swap_ints( ldb, incb );
203  }
204  }
205  }
206 
207  if ( bl1_does_conj( trans ) )
208  {
209  conj1_t conj = bl1_proj_trans1_to_conj( trans );
210 
211  a_temp = bl1_callocv( n_elem );
212  inca_temp = 1;
213 
214  for ( j = 0; j < n_iter; j++ )
215  {
216  a_begin = a + j*lda;
217  b_begin = b + j*ldb;
218 
219  bl1_ccopyv( conj,
220  n_elem,
221  a_begin, inca,
222  a_temp, inca_temp );
223 
224  bl1_caxpy( n_elem,
225  alpha,
226  a_temp, inca_temp,
227  b_begin, incb );
228  }
229 
230  bl1_cfree( a_temp );
231  }
232  else // if ( !bl1_does_conj( trans ) )
233  {
234  for ( j = 0; j < n_iter; j++ )
235  {
236  a_begin = a + j*lda;
237  b_begin = b + j*ldb;
238 
239  bl1_caxpy( n_elem,
240  alpha,
241  a_begin, inca,
242  b_begin, incb );
243  }
244 
245  }
246 }
void bl1_caxpy(int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpy.c:43
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_copyv.c:49
int bl1_does_notrans(trans1_t trans)
Definition: bl1_does.c:19
int bl1_does_conj(trans1_t trans)
Definition: bl1_does.c:25
int bl1_is_row_storage(int rs, int cs)
Definition: bl1_is.c:95
int bl1_is_vector(int m, int n)
Definition: bl1_is.c:106
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
conj1_t bl1_proj_trans1_to_conj(trans1_t trans)
Definition: bl1_proj.c:13
int bl1_vector_dim(int m, int n)
Definition: bl1_vector.c:13
int bl1_vector_inc(trans1_t trans, int m, int n, int rs, int cs)
Definition: bl1_vector.c:19
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
int bl1_does_trans(trans1_t trans)
Definition: bl1_does.c:13
void bl1_cfree(scomplex *p)
Definition: bl1_free.c:40
scomplex * bl1_callocv(unsigned int n_elem)
Definition: bl1_allocv.c:40
@ BLIS1_NO_TRANSPOSE
Definition: blis_type_defs.h:54
conj1_t
Definition: blis_type_defs.h:80
Definition: blis_type_defs.h:133

References bl1_callocv(), bl1_caxpy(), bl1_ccopyv(), bl1_cfree(), bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_cgemm(), bl1_chemm(), bl1_csymm(), bl1_ctrmmsx(), bl1_ctrsmsx(), FLA_Axpy_external(), and FLA_Axpyt_external().

◆ bl1_daxpymt()

void bl1_daxpymt ( trans1_t  trans,
int  m,
int  n,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs,
double *  b,
int  b_rs,
int  b_cs 
)
82 {
83  double* a_begin;
84  double* b_begin;
85  int lda, inca;
86  int ldb, incb;
87  int n_iter;
88  int n_elem;
89  int j;
90 
91  // Return early if possible.
92  if ( bl1_zero_dim2( m, n ) ) return;
93 
94  // Handle cases where A and B are vectors to ensure that the underlying axpy
95  // gets invoked only once.
96  if ( bl1_is_vector( m, n ) )
97  {
98  // Initialize with values appropriate for vectors.
99  n_iter = 1;
100  n_elem = bl1_vector_dim( m, n );
101  lda = 1; // multiplied by zero when n_iter == 1; not needed.
102  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
103  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
104  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
105  }
106  else // matrix case
107  {
108  // Initialize with optimal values for column-major storage.
109  n_iter = n;
110  n_elem = m;
111  lda = a_cs;
112  inca = a_rs;
113  ldb = b_cs;
114  incb = b_rs;
115 
116  // Handle the transposition of A.
117  if ( bl1_does_trans( trans ) )
118  {
119  bl1_swap_ints( lda, inca );
120  }
121 
122  // An optimization: if B is row-major and if A is effectively row-major
123  // after a possible transposition, then let's access the matrices by rows
124  // instead of by columns for increased spatial locality.
125  if ( bl1_is_row_storage( b_rs, b_cs ) )
126  {
127  if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
128  ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
129  {
130  bl1_swap_ints( n_iter, n_elem );
131  bl1_swap_ints( lda, inca );
132  bl1_swap_ints( ldb, incb );
133  }
134  }
135  }
136 
137  for ( j = 0; j < n_iter; j++ )
138  {
139  a_begin = a + j*lda;
140  b_begin = b + j*ldb;
141 
142  bl1_daxpy( n_elem,
143  alpha,
144  a_begin, inca,
145  b_begin, incb );
146  }
147 }
void bl1_daxpy(int n, double *alpha, double *x, int incx, double *y, int incy)
Definition: bl1_axpy.c:28

References bl1_daxpy(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_dgemm(), bl1_dsymm(), bl1_dtrmmsx(), bl1_dtrsmsx(), FLA_Axpy_external(), and FLA_Axpyt_external().

◆ bl1_saxpymt()

void bl1_saxpymt ( trans1_t  trans,
int  m,
int  n,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs,
float *  b,
int  b_rs,
int  b_cs 
)
14 {
15  float* a_begin;
16  float* b_begin;
17  int lda, inca;
18  int ldb, incb;
19  int n_iter;
20  int n_elem;
21  int j;
22 
23  // Return early if possible.
24  if ( bl1_zero_dim2( m, n ) ) return;
25 
26  // Handle cases where A and B are vectors to ensure that the underlying axpy
27  // gets invoked only once.
28  if ( bl1_is_vector( m, n ) )
29  {
30  // Initialize with values appropriate for vectors.
31  n_iter = 1;
32  n_elem = bl1_vector_dim( m, n );
33  lda = 1; // multiplied by zero when n_iter == 1; not needed.
34  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
35  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
36  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
37  }
38  else // matrix case
39  {
40  // Initialize with optimal values for column-major storage.
41  n_iter = n;
42  n_elem = m;
43  lda = a_cs;
44  inca = a_rs;
45  ldb = b_cs;
46  incb = b_rs;
47 
48  // Handle the transposition of A.
49  if ( bl1_does_trans( trans ) )
50  {
51  bl1_swap_ints( lda, inca );
52  }
53 
54  // An optimization: if B is row-major and if A is effectively row-major
55  // after a possible transposition, then let's access the matrices by rows
56  // instead of by columns for increased spatial locality.
57  if ( bl1_is_row_storage( b_rs, b_cs ) )
58  {
59  if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
60  ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
61  {
62  bl1_swap_ints( n_iter, n_elem );
63  bl1_swap_ints( lda, inca );
64  bl1_swap_ints( ldb, incb );
65  }
66  }
67  }
68 
69  for ( j = 0; j < n_iter; j++ )
70  {
71  a_begin = a + j*lda;
72  b_begin = b + j*ldb;
73 
74  bl1_saxpy( n_elem,
75  alpha,
76  a_begin, inca,
77  b_begin, incb );
78  }
79 }
void bl1_saxpy(int n, float *alpha, float *x, int incx, float *y, int incy)
Definition: bl1_axpy.c:13

References bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_saxpy(), bl1_vector_dim(), bl1_vector_inc(), bl1_zero_dim2(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_sgemm(), bl1_ssymm(), bl1_strmmsx(), bl1_strsmsx(), FLA_Axpy_external(), and FLA_Axpyt_external().

◆ bl1_zaxpymt()

void bl1_zaxpymt ( trans1_t  trans,
int  m,
int  n,
dcomplex alpha,
dcomplex a,
int  a_rs,
int  a_cs,
dcomplex b,
int  b_rs,
int  b_cs 
)
249 {
250  dcomplex* a_begin;
251  dcomplex* b_begin;
252  dcomplex* a_temp;
253  int inca_temp;
254  int lda, inca;
255  int ldb, incb;
256  int n_iter;
257  int n_elem;
258  int j;
259 
260  // Return early if possible.
261  if ( bl1_zero_dim2( m, n ) ) return;
262 
263  // Handle cases where A and B are vectors to ensure that the underlying axpy
264  // gets invoked only once.
265  if ( bl1_is_vector( m, n ) )
266  {
267  // Initialize with values appropriate for vectors.
268  n_iter = 1;
269  n_elem = bl1_vector_dim( m, n );
270  lda = 1; // multiplied by zero when n_iter == 1; not needed.
271  inca = bl1_vector_inc( trans, m, n, a_rs, a_cs );
272  ldb = 1; // multiplied by zero when n_iter == 1; not needed.
273  incb = bl1_vector_inc( BLIS1_NO_TRANSPOSE, m, n, b_rs, b_cs );
274  }
275  else // matrix case
276  {
277  // Initialize with optimal values for column-major storage.
278  n_iter = n;
279  n_elem = m;
280  lda = a_cs;
281  inca = a_rs;
282  ldb = b_cs;
283  incb = b_rs;
284 
285  // Handle the transposition of A.
286  if ( bl1_does_trans( trans ) )
287  {
288  bl1_swap_ints( lda, inca );
289  }
290 
291  // An optimization: if B is row-major and if A is effectively row-major
292  // after a possible transposition, then let's access the matrices by rows
293  // instead of by columns for increased spatial locality.
294  if ( bl1_is_row_storage( b_rs, b_cs ) )
295  {
296  if ( ( bl1_is_col_storage( a_rs, a_cs ) && bl1_does_trans( trans ) ) ||
297  ( bl1_is_row_storage( a_rs, a_cs ) && bl1_does_notrans( trans ) ) )
298  {
299  bl1_swap_ints( n_iter, n_elem );
300  bl1_swap_ints( lda, inca );
301  bl1_swap_ints( ldb, incb );
302  }
303  }
304  }
305 
306  if ( bl1_does_conj( trans ) )
307  {
308  conj1_t conj = bl1_proj_trans1_to_conj( trans );
309 
310  a_temp = bl1_zallocv( n_elem );
311  inca_temp = 1;
312 
313  for ( j = 0; j < n_iter; j++ )
314  {
315  a_begin = a + j*lda;
316  b_begin = b + j*ldb;
317 
318  bl1_zcopyv( conj,
319  n_elem,
320  a_begin, inca,
321  a_temp, inca_temp );
322 
323  bl1_zaxpy( n_elem,
324  alpha,
325  a_temp, inca_temp,
326  b_begin, incb );
327  }
328 
329  bl1_zfree( a_temp );
330  }
331  else // if ( !bl1_does_conj( trans ) )
332  {
333  for ( j = 0; j < n_iter; j++ )
334  {
335  a_begin = a + j*lda;
336  b_begin = b + j*ldb;
337 
338  bl1_zaxpy( n_elem,
339  alpha,
340  a_begin, inca,
341  b_begin, incb );
342  }
343 
344  }
345 }
void bl1_zaxpy(int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_axpy.c:58
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_copyv.c:63
dcomplex * bl1_zallocv(unsigned int n_elem)
Definition: bl1_allocv.c:45
void bl1_zfree(dcomplex *p)
Definition: bl1_free.c:45
Definition: blis_type_defs.h:138

References bl1_does_conj(), bl1_does_notrans(), bl1_does_trans(), bl1_is_col_storage(), bl1_is_row_storage(), bl1_is_vector(), bl1_proj_trans1_to_conj(), bl1_vector_dim(), bl1_vector_inc(), bl1_zallocv(), bl1_zaxpy(), bl1_zcopyv(), bl1_zero_dim2(), bl1_zfree(), and BLIS1_NO_TRANSPOSE.

Referenced by bl1_zgemm(), bl1_zhemm(), bl1_zsymm(), bl1_ztrmmsx(), bl1_ztrsmsx(), FLA_Axpy_external(), and FLA_Axpyt_external().