libflame  revision_anchor
Functions
bl1_herk.c File Reference

(r)

Functions

void bl1_sherk (uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, float *a, int a_rs, int a_cs, float *beta, float *c, int c_rs, int c_cs)
 
void bl1_dherk (uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, double *a, int a_rs, int a_cs, double *beta, double *c, int c_rs, int c_cs)
 
void bl1_cherk (uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, scomplex *a, int a_rs, int a_cs, float *beta, scomplex *c, int c_rs, int c_cs)
 
void bl1_zherk (uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, dcomplex *a, int a_rs, int a_cs, double *beta, dcomplex *c, int c_rs, int c_cs)
 
void bl1_cherk_blas (uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, scomplex *a, int lda, float *beta, scomplex *c, int ldc)
 
void bl1_zherk_blas (uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, dcomplex *a, int lda, double *beta, dcomplex *c, int ldc)
 

Function Documentation

◆ bl1_cherk()

void bl1_cherk ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
float *  alpha,
scomplex a,
int  a_rs,
int  a_cs,
float *  beta,
scomplex c,
int  c_rs,
int  c_cs 
)
37 {
38  uplo1_t uplo_save = uplo;
39  int m_save = m;
40  scomplex* a_save = a;
41  scomplex* c_save = c;
42  int a_rs_save = a_rs;
43  int a_cs_save = a_cs;
44  int c_rs_save = c_rs;
45  int c_cs_save = c_cs;
46  float zero_r = bl1_s0();
47  scomplex one = bl1_c1();
48  scomplex* c_conj;
49  int lda, inca;
50  int ldc, incc;
51  int ldc_conj, incc_conj;
52  int herk_needs_conj = FALSE;
53 
54  // Return early if possible.
55  if ( bl1_zero_dim2( m, k ) ) return;
56 
57  // If necessary, allocate, initialize, and use a temporary contiguous
58  // copy of each matrix rather than the original matrices.
59  bl1_ccreate_contigmt( trans,
60  m,
61  k,
62  a_save, a_rs_save, a_cs_save,
63  &a, &a_rs, &a_cs );
64 
66  m,
67  m,
68  c_save, c_rs_save, c_cs_save,
69  &c, &c_rs, &c_cs );
70 
71  // Initialize with values assuming column-major storage.
72  lda = a_cs;
73  inca = a_rs;
74  ldc = c_cs;
75  incc = c_rs;
76 
77  // Adjust the parameters based on the storage of each matrix.
78  if ( bl1_is_col_storage( c_rs, c_cs ) )
79  {
80  if ( bl1_is_col_storage( a_rs, a_cs ) )
81  {
82  // requested operation: uplo( C_c ) += A_c * A_c'
83  // effective operation: uplo( C_c ) += A_c * A_c'
84  }
85  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
86  {
87  // requested operation: uplo( C_c ) += A_r * A_r'
88  // effective operation: uplo( C_c ) += conj( A_c' * A_c )
89  bl1_swap_ints( lda, inca );
90 
91  bl1_toggle_conjtrans( trans );
92 
93  herk_needs_conj = TRUE;
94  }
95  }
96  else // if ( bl1_is_row_storage( c_rs, c_cs ) )
97  {
98  if ( bl1_is_col_storage( a_rs, a_cs ) )
99  {
100  // requested operation: uplo( C_r ) += A_c * A_c'
101  // effective operation: ~uplo( C_c ) += conj( A_c * A_c' )
102  bl1_swap_ints( ldc, incc );
103 
104  bl1_toggle_uplo( uplo );
105 
106  herk_needs_conj = TRUE;
107  }
108  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
109  {
110  // requested operation: uplo( C_r ) += A_r * A_r'
111  // effective operation: ~uplo( C_c ) += A_c' * A_c
112  bl1_swap_ints( ldc, incc );
113  bl1_swap_ints( lda, inca );
114 
115  bl1_toggle_uplo( uplo );
116  bl1_toggle_conjtrans( trans );
117  }
118  }
119 
120  // There are two cases where we need to perform the rank-k product and
121  // then axpy the result into C with a conjugation. We handle those two
122  // cases here.
123  if ( herk_needs_conj )
124  {
125  // We need a temporary matrix for holding the rank-k product.
126  c_conj = bl1_callocm( m, m );
127  ldc_conj = m;
128  incc_conj = 1;
129 
130  // Compute the rank-k product.
131  bl1_cherk_blas( uplo,
132  trans,
133  m,
134  k,
135  alpha,
136  a, lda,
137  &zero_r,
138  c_conj, ldc_conj );
139 
140  // Scale C by beta.
141  bl1_csscalmr( uplo,
142  m,
143  m,
144  beta,
145  c, incc, ldc );
146 
147  // And finally, accumulate the rank-k product in C_conj into C
148  // with a conjugation.
149  bl1_caxpymrt( uplo,
151  m,
152  m,
153  &one,
154  c_conj, incc_conj, ldc_conj,
155  c, incc, ldc );
156 
157  // Free the temporary matrix for C.
158  bl1_cfree( c_conj );
159  }
160  else
161  {
162  bl1_cherk_blas( uplo,
163  trans,
164  m,
165  k,
166  alpha,
167  a, lda,
168  beta,
169  c, ldc );
170  }
171 
172  // Free any temporary contiguous matrices, copying the result back to
173  // the original matrix.
174  bl1_cfree_contigm( a_save, a_rs_save, a_cs_save,
175  &a, &a_rs, &a_cs );
176 
177  bl1_cfree_saved_contigmr( uplo_save,
178  m_save,
179  m_save,
180  c_save, c_rs_save, c_cs_save,
181  &c, &c_rs, &c_cs );
182 }
void bl1_caxpymrt(uplo1_t uplo, trans1_t trans, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
Definition: bl1_axpymrt.c:227
void bl1_cherk_blas(uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, scomplex *a, int lda, float *beta, scomplex *c, int ldc)
Definition: bl1_herk.c:334
void bl1_csscalmr(uplo1_t uplo, int m, int n, float *alpha, scomplex *a, int a_rs, int a_cs)
Definition: bl1_scalmr.c:125
int bl1_is_col_storage(int rs, int cs)
Definition: bl1_is.c:90
int bl1_zero_dim2(int m, int n)
Definition: bl1_is.c:118
scomplex * bl1_callocm(unsigned int m, unsigned int n)
Definition: bl1_allocm.c:40
float bl1_s0(void)
Definition: bl1_constants.c:111
void bl1_cfree_contigm(scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:45
scomplex bl1_c1(void)
Definition: bl1_constants.c:61
void bl1_cfree(scomplex *p)
Definition: bl1_free.c:40
void bl1_cfree_saved_contigmr(uplo1_t uplo, int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigmr.c:59
void bl1_ccreate_contigmr(uplo1_t uplo, int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:77
void bl1_ccreate_contigmt(trans1_t trans_dims, int m, int n, scomplex *a_save, int a_rs_save, int a_cs_save, scomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmt.c:89
uplo1_t
Definition: blis_type_defs.h:61
@ BLIS1_CONJ_NO_TRANSPOSE
Definition: blis_type_defs.h:56
Definition: blis_type_defs.h:133

References bl1_c1(), bl1_callocm(), bl1_caxpymrt(), bl1_ccreate_contigmr(), bl1_ccreate_contigmt(), bl1_cfree(), bl1_cfree_contigm(), bl1_cfree_saved_contigmr(), bl1_cherk_blas(), bl1_csscalmr(), bl1_is_col_storage(), bl1_s0(), bl1_zero_dim2(), and BLIS1_CONJ_NO_TRANSPOSE.

Referenced by FLA_Herk_external(), and FLA_UDdate_UT_opc_var1().

◆ bl1_cherk_blas()

void bl1_cherk_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
float *  alpha,
scomplex a,
int  lda,
float *  beta,
scomplex c,
int  ldc 
)
335 {
336 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
337  enum CBLAS_ORDER cblas_order = CblasColMajor;
338  enum CBLAS_UPLO cblas_uplo;
339  enum CBLAS_TRANSPOSE cblas_trans;
340 
341  bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
342  bl1_param_map_to_netlib_trans( trans, &cblas_trans );
343 
344  cblas_cherk( cblas_order,
345  cblas_uplo,
346  cblas_trans,
347  m,
348  k,
349  *alpha,
350  a, lda,
351  *beta,
352  c, ldc );
353 #else
354  char blas_uplo;
355  char blas_trans;
356 
357  bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
358  bl1_param_map_to_netlib_trans( trans, &blas_trans );
359 
360  F77_cherk( &blas_uplo,
361  &blas_trans,
362  &m,
363  &k,
364  alpha,
365  a, &lda,
366  beta,
367  c, &ldc );
368 #endif
369 }
void F77_cherk(char *uplo, char *transa, int *n, int *k, float *alpha, scomplex *a, int *lda, float *beta, scomplex *c, int *ldc)
CBLAS_ORDER
Definition: blis_prototypes_cblas.h:17
@ CblasColMajor
Definition: blis_prototypes_cblas.h:17
CBLAS_UPLO
Definition: blis_prototypes_cblas.h:19
CBLAS_TRANSPOSE
Definition: blis_prototypes_cblas.h:18
void cblas_cherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, const void *A, const int lda, const float beta, void *C, const int ldc)
void bl1_param_map_to_netlib_trans(trans1_t blis_trans, void *blas_trans)
Definition: bl1_param_map.c:15
void bl1_param_map_to_netlib_uplo(uplo1_t blis_uplo, void *blas_uplo)
Definition: bl1_param_map.c:47

References bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_cherk(), CblasColMajor, and F77_cherk().

Referenced by bl1_cherk().

◆ bl1_dherk()

void bl1_dherk ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
double *  alpha,
double *  a,
int  a_rs,
int  a_cs,
double *  beta,
double *  c,
int  c_rs,
int  c_cs 
)
25 {
26  bl1_dsyrk( uplo,
27  trans,
28  m,
29  k,
30  alpha,
31  a, a_rs, a_cs,
32  beta,
33  c, c_rs, c_cs );
34 }
void bl1_dsyrk(uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, double *a, int a_rs, int a_cs, double *beta, double *c, int c_rs, int c_cs)
Definition: bl1_syrk.c:109

References bl1_dsyrk().

◆ bl1_sherk()

void bl1_sherk ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
float *  alpha,
float *  a,
int  a_rs,
int  a_cs,
float *  beta,
float *  c,
int  c_rs,
int  c_cs 
)
14 {
15  bl1_ssyrk( uplo,
16  trans,
17  m,
18  k,
19  alpha,
20  a, a_rs, a_cs,
21  beta,
22  c, c_rs, c_cs );
23 }
void bl1_ssyrk(uplo1_t uplo, trans1_t trans, int m, int k, float *alpha, float *a, int a_rs, int a_cs, float *beta, float *c, int c_rs, int c_cs)
Definition: bl1_syrk.c:13

References bl1_ssyrk().

◆ bl1_zherk()

void bl1_zherk ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
double *  alpha,
dcomplex a,
int  a_rs,
int  a_cs,
double *  beta,
dcomplex c,
int  c_rs,
int  c_cs 
)
185 {
186  uplo1_t uplo_save = uplo;
187  int m_save = m;
188  dcomplex* a_save = a;
189  dcomplex* c_save = c;
190  int a_rs_save = a_rs;
191  int a_cs_save = a_cs;
192  int c_rs_save = c_rs;
193  int c_cs_save = c_cs;
194  double zero_r = bl1_d0();
195  dcomplex one = bl1_z1();
196  dcomplex* c_conj;
197  int lda, inca;
198  int ldc, incc;
199  int ldc_conj, incc_conj;
200  int herk_needs_conj = FALSE;
201 
202  // Return early if possible.
203  if ( bl1_zero_dim2( m, k ) ) return;
204 
205  // If necessary, allocate, initialize, and use a temporary contiguous
206  // copy of each matrix rather than the original matrices.
207  bl1_zcreate_contigmt( trans,
208  m,
209  k,
210  a_save, a_rs_save, a_cs_save,
211  &a, &a_rs, &a_cs );
212 
213  bl1_zcreate_contigmr( uplo,
214  m,
215  m,
216  c_save, c_rs_save, c_cs_save,
217  &c, &c_rs, &c_cs );
218 
219  // Initialize with values assuming column-major storage.
220  lda = a_cs;
221  inca = a_rs;
222  ldc = c_cs;
223  incc = c_rs;
224 
225  // Adjust the parameters based on the storage of each matrix.
226  if ( bl1_is_col_storage( c_rs, c_cs ) )
227  {
228  if ( bl1_is_col_storage( a_rs, a_cs ) )
229  {
230  // requested operation: uplo( C_c ) += A_c * A_c'
231  // effective operation: uplo( C_c ) += A_c * A_c'
232  }
233  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
234  {
235  // requested operation: uplo( C_c ) += A_r * A_r'
236  // effective operation: uplo( C_c ) += conj( A_c' * A_c )
237  bl1_swap_ints( lda, inca );
238 
239  bl1_toggle_conjtrans( trans );
240 
241  herk_needs_conj = TRUE;
242  }
243  }
244  else // if ( bl1_is_row_storage( c_rs, c_cs ) )
245  {
246  if ( bl1_is_col_storage( a_rs, a_cs ) )
247  {
248  // requested operation: uplo( C_r ) += A_c * A_c'
249  // effective operation: ~uplo( C_c ) += conj( A_c * A_c' )
250  bl1_swap_ints( ldc, incc );
251 
252  bl1_toggle_uplo( uplo );
253 
254  herk_needs_conj = TRUE;
255  }
256  else // if ( bl1_is_row_storage( a_rs, a_cs ) )
257  {
258  // requested operation: uplo( C_r ) += A_r * A_r'
259  // effective operation: ~uplo( C_c ) += A_c' * A_c
260  bl1_swap_ints( ldc, incc );
261  bl1_swap_ints( lda, inca );
262 
263  bl1_toggle_uplo( uplo );
264  bl1_toggle_conjtrans( trans );
265  }
266  }
267 
268  // There are two cases where we need to perform the rank-k product and
269  // then axpy the result into C with a conjugation. We handle those two
270  // cases here.
271  if ( herk_needs_conj )
272  {
273  // We need a temporary matrix for holding the rank-k product.
274  c_conj = bl1_zallocm( m, m );
275  ldc_conj = m;
276  incc_conj = 1;
277 
278  // Compute the rank-k product.
279  bl1_zherk_blas( uplo,
280  trans,
281  m,
282  k,
283  alpha,
284  a, lda,
285  &zero_r,
286  c_conj, ldc_conj );
287 
288  // Scale C by beta.
289  bl1_zdscalmr( uplo,
290  m,
291  m,
292  beta,
293  c, incc, ldc );
294 
295  // And finally, accumulate the rank-k product in C_conj into C
296  // with a conjugation.
297  bl1_zaxpymrt( uplo,
299  m,
300  m,
301  &one,
302  c_conj, incc_conj, ldc_conj,
303  c, incc, ldc );
304 
305  // Free the temporary matrix for C.
306  bl1_zfree( c_conj );
307  }
308  else
309  {
310  bl1_zherk_blas( uplo,
311  trans,
312  m,
313  k,
314  alpha,
315  a, lda,
316  beta,
317  c, ldc );
318  }
319 
320  // Free any temporary contiguous matrices, copying the result back to
321  // the original matrix.
322  bl1_zfree_contigm( a_save, a_rs_save, a_cs_save,
323  &a, &a_rs, &a_cs );
324 
325  bl1_zfree_saved_contigmr( uplo_save,
326  m_save,
327  m_save,
328  c_save, c_rs_save, c_cs_save,
329  &c, &c_rs, &c_cs );
330 }
void bl1_zaxpymrt(uplo1_t uplo, trans1_t trans, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition: bl1_axpymrt.c:334
void bl1_zherk_blas(uplo1_t uplo, trans1_t trans, int m, int k, double *alpha, dcomplex *a, int lda, double *beta, dcomplex *c, int ldc)
Definition: bl1_herk.c:371
void bl1_zdscalmr(uplo1_t uplo, int m, int n, double *alpha, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_scalmr.c:237
void bl1_zfree_saved_contigmr(uplo1_t uplo, int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_saved_contigmr.c:82
void bl1_zcreate_contigmt(trans1_t trans_dims, int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmt.c:127
dcomplex * bl1_zallocm(unsigned int m, unsigned int n)
Definition: bl1_allocm.c:45
dcomplex bl1_z1(void)
Definition: bl1_constants.c:69
void bl1_zcreate_contigmr(uplo1_t uplo, int m, int n, dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_create_contigmr.c:109
double bl1_d0(void)
Definition: bl1_constants.c:118
void bl1_zfree(dcomplex *p)
Definition: bl1_free.c:45
void bl1_zfree_contigm(dcomplex *a_save, int a_rs_save, int a_cs_save, dcomplex **a, int *a_rs, int *a_cs)
Definition: bl1_free_contigm.c:61
Definition: blis_type_defs.h:138

References bl1_d0(), bl1_is_col_storage(), bl1_z1(), bl1_zallocm(), bl1_zaxpymrt(), bl1_zcreate_contigmr(), bl1_zcreate_contigmt(), bl1_zdscalmr(), bl1_zero_dim2(), bl1_zfree(), bl1_zfree_contigm(), bl1_zfree_saved_contigmr(), bl1_zherk_blas(), and BLIS1_CONJ_NO_TRANSPOSE.

Referenced by FLA_Herk_external(), and FLA_UDdate_UT_opz_var1().

◆ bl1_zherk_blas()

void bl1_zherk_blas ( uplo1_t  uplo,
trans1_t  trans,
int  m,
int  k,
double *  alpha,
dcomplex a,
int  lda,
double *  beta,
dcomplex c,
int  ldc 
)
372 {
373 #ifdef BLIS1_ENABLE_CBLAS_INTERFACES
374  enum CBLAS_ORDER cblas_order = CblasColMajor;
375  enum CBLAS_UPLO cblas_uplo;
376  enum CBLAS_TRANSPOSE cblas_trans;
377 
378  bl1_param_map_to_netlib_uplo( uplo, &cblas_uplo );
379  bl1_param_map_to_netlib_trans( trans, &cblas_trans );
380 
381  cblas_zherk( cblas_order,
382  cblas_uplo,
383  cblas_trans,
384  m,
385  k,
386  *alpha,
387  a, lda,
388  *beta,
389  c, ldc );
390 #else
391  char blas_uplo;
392  char blas_trans;
393 
394  bl1_param_map_to_netlib_uplo( uplo, &blas_uplo );
395  bl1_param_map_to_netlib_trans( trans, &blas_trans );
396 
397  F77_zherk( &blas_uplo,
398  &blas_trans,
399  &m,
400  &k,
401  alpha,
402  a, &lda,
403  beta,
404  c, &ldc );
405 #endif
406 }
void F77_zherk(char *uplo, char *transa, int *n, int *k, double *alpha, dcomplex *a, int *lda, double *beta, dcomplex *c, int *ldc)
void cblas_zherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, const void *A, const int lda, const double beta, void *C, const int ldc)

References bl1_param_map_to_netlib_trans(), bl1_param_map_to_netlib_uplo(), cblas_zherk(), CblasColMajor, and F77_zherk().

Referenced by bl1_zherk().