libflame  revision_anchor
Functions
FLA_Apply_G_rf_opt_var2.c File Reference

(r)

Functions

FLA_Error FLA_Apply_G_rf_opt_var2 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ops_var2 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opd_var2 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opc_var2 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opz_var2 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 

Function Documentation

◆ FLA_Apply_G_rf_opc_var2()

FLA_Error FLA_Apply_G_rf_opc_var2 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
343 {
344  float one = bl1_s1();
345  float zero = bl1_s0();
346  float gamma;
347  float sigma;
348  scomplex* a1;
349  scomplex* a2;
350  scomplex* g11;
351  int j, g, k;
352  int nG, nG_app;
353  int k_minus_1;
354 
355  k_minus_1 = k_G - 1;
356  nG = n_A - 1;
357 
358  // Use the simple variant for nG < 2(k - 1).
359  if ( nG < k_minus_1 || k_G == 1 )
360  {
362  m_A,
363  n_A,
364  buff_G, rs_G, cs_G,
365  buff_A, rs_A, cs_A );
366  return FLA_SUCCESS;
367  }
368 
369 
370  // Start-up phase.
371 
372  for ( j = 0; j < k_minus_1; ++j )
373  {
374  nG_app = j + 1;
375 
376  for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g )
377  {
378  g11 = buff_G + (g )*rs_G + (k )*cs_G;
379  a1 = buff_A + (g )*cs_A;
380  a2 = buff_A + (g + 1)*cs_A;
381 
382  gamma = g11->real;
383  sigma = g11->imag;
384 
385  // Skip the current iteration if the rotation is identity.
386  if ( gamma == one && sigma == zero ) continue;
387 
388  MAC_Apply_G_mx2_opc( m_A,
389  &gamma,
390  &sigma,
391  a1, rs_A,
392  a2, rs_A );
393  }
394  }
395 
396  // Pipeline stage
397 
398  for ( j = k_minus_1; j < nG; ++j )
399  {
400  nG_app = k_G;
401 
402  for ( k = 0, g = j; k < nG_app; ++k, --g )
403  {
404  g11 = buff_G + (g )*rs_G + (k )*cs_G;
405  a1 = buff_A + (g )*cs_A;
406  a2 = buff_A + (g + 1)*cs_A;
407 
408  gamma = g11->real;
409  sigma = g11->imag;
410 
411  // Skip the current iteration if the rotation is identity.
412  if ( gamma == one && sigma == zero ) continue;
413 
414  MAC_Apply_G_mx2_opc( m_A,
415  &gamma,
416  &sigma,
417  a1, rs_A,
418  a2, rs_A );
419  }
420  }
421 
422  // Shutdown stage
423 
424  for ( j = nG - k_minus_1; j < nG; ++j )
425  {
426  nG_app = nG - j;
427 
428  for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g )
429  {
430  g11 = buff_G + (g )*rs_G + (k )*cs_G;
431  a1 = buff_A + (g )*cs_A;
432  a2 = buff_A + (g + 1)*cs_A;
433 
434  gamma = g11->real;
435  sigma = g11->imag;
436 
437  // Skip the current iteration if the rotation is identity.
438  if ( gamma == one && sigma == zero ) continue;
439 
440  MAC_Apply_G_mx2_opc( m_A,
441  &gamma,
442  &sigma,
443  a1, rs_A,
444  a2, rs_A );
445  }
446  }
447 
448  return FLA_SUCCESS;
449 }
FLA_Error FLA_Apply_G_rf_opc_var1(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var1.c:215
float bl1_s0(void)
Definition: bl1_constants.c:111
float bl1_s1(void)
Definition: bl1_constants.c:47
Definition: blis_type_defs.h:133
float imag
Definition: blis_type_defs.h:134
float real
Definition: blis_type_defs.h:134

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_opc_var1(), scomplex::imag, and scomplex::real.

Referenced by FLA_Apply_G_rf_opt_var2().

◆ FLA_Apply_G_rf_opd_var2()

FLA_Error FLA_Apply_G_rf_opd_var2 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A 
)
230 {
231  double one = bl1_d1();
232  double zero = bl1_d0();
233  double gamma;
234  double sigma;
235  double* a1;
236  double* a2;
237  dcomplex* g11;
238  int j, g, k;
239  int nG, nG_app;
240  int k_minus_1;
241 
242  k_minus_1 = k_G - 1;
243  nG = n_A - 1;
244 
245  // Use the simple variant for nG < 2(k - 1).
246  if ( nG < k_minus_1 || k_G == 1 )
247  {
249  m_A,
250  n_A,
251  buff_G, rs_G, cs_G,
252  buff_A, rs_A, cs_A );
253  return FLA_SUCCESS;
254  }
255 
256 
257  // Start-up phase.
258 
259  for ( j = 0; j < k_minus_1; ++j )
260  {
261  nG_app = j + 1;
262 
263  for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g )
264  {
265  g11 = buff_G + (g )*rs_G + (k )*cs_G;
266  a1 = buff_A + (g )*cs_A;
267  a2 = buff_A + (g + 1)*cs_A;
268 
269  gamma = g11->real;
270  sigma = g11->imag;
271 
272  // Skip the current iteration if the rotation is identity.
273  if ( gamma == one && sigma == zero ) continue;
274 
275  MAC_Apply_G_mx2_opd( m_A,
276  &gamma,
277  &sigma,
278  a1, rs_A,
279  a2, rs_A );
280  }
281  }
282 
283  // Pipeline stage
284 
285  for ( j = k_minus_1; j < nG; ++j )
286  {
287  nG_app = k_G;
288 
289  for ( k = 0, g = j; k < nG_app; ++k, --g )
290  {
291  g11 = buff_G + (g )*rs_G + (k )*cs_G;
292  a1 = buff_A + (g )*cs_A;
293  a2 = buff_A + (g + 1)*cs_A;
294 
295  gamma = g11->real;
296  sigma = g11->imag;
297 
298  // Skip the current iteration if the rotation is identity.
299  if ( gamma == one && sigma == zero ) continue;
300 
301  MAC_Apply_G_mx2_opd( m_A,
302  &gamma,
303  &sigma,
304  a1, rs_A,
305  a2, rs_A );
306  }
307  }
308 
309  // Shutdown stage
310 
311  for ( j = nG - k_minus_1; j < nG; ++j )
312  {
313  nG_app = nG - j;
314 
315  for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g )
316  {
317  g11 = buff_G + (g )*rs_G + (k )*cs_G;
318  a1 = buff_A + (g )*cs_A;
319  a2 = buff_A + (g + 1)*cs_A;
320 
321  gamma = g11->real;
322  sigma = g11->imag;
323 
324  // Skip the current iteration if the rotation is identity.
325  if ( gamma == one && sigma == zero ) continue;
326 
327  MAC_Apply_G_mx2_opd( m_A,
328  &gamma,
329  &sigma,
330  a1, rs_A,
331  a2, rs_A );
332  }
333  }
334 
335  return FLA_SUCCESS;
336 }
FLA_Error FLA_Apply_G_rf_opd_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var1.c:164
double bl1_d0(void)
Definition: bl1_constants.c:118
double bl1_d1(void)
Definition: bl1_constants.c:54
Definition: blis_type_defs.h:138
double real
Definition: blis_type_defs.h:139
double imag
Definition: blis_type_defs.h:139

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_opd_var1(), dcomplex::imag, and dcomplex::real.

Referenced by FLA_Apply_G_rf_opt_var2().

◆ FLA_Apply_G_rf_ops_var2()

FLA_Error FLA_Apply_G_rf_ops_var2 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A 
)
117 {
118  float one = bl1_s1();
119  float zero = bl1_s0();
120  float gamma;
121  float sigma;
122  float* a1;
123  float* a2;
124  scomplex* g11;
125  int j, g, k;
126  int nG, nG_app;
127  int k_minus_1;
128 
129  k_minus_1 = k_G - 1;
130  nG = n_A - 1;
131 
132  // Use the simple variant for nG < 2(k - 1).
133  if ( nG < k_minus_1 || k_G == 1 )
134  {
136  m_A,
137  n_A,
138  buff_G, rs_G, cs_G,
139  buff_A, rs_A, cs_A );
140  return FLA_SUCCESS;
141  }
142 
143 
144  // Start-up phase.
145 
146  for ( j = 0; j < k_minus_1; ++j )
147  {
148  nG_app = j + 1;
149 
150  for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g )
151  {
152  g11 = buff_G + (g )*rs_G + (k )*cs_G;
153  a1 = buff_A + (g )*cs_A;
154  a2 = buff_A + (g + 1)*cs_A;
155 
156  gamma = g11->real;
157  sigma = g11->imag;
158 
159  // Skip the current iteration if the rotation is identity.
160  if ( gamma == one && sigma == zero ) continue;
161 
162  MAC_Apply_G_mx2_ops( m_A,
163  &gamma,
164  &sigma,
165  a1, rs_A,
166  a2, rs_A );
167  }
168  }
169 
170  // Pipeline stage
171 
172  for ( j = k_minus_1; j < nG; ++j )
173  {
174  nG_app = k_G;
175 
176  for ( k = 0, g = j; k < nG_app; ++k, --g )
177  {
178  g11 = buff_G + (g )*rs_G + (k )*cs_G;
179  a1 = buff_A + (g )*cs_A;
180  a2 = buff_A + (g + 1)*cs_A;
181 
182  gamma = g11->real;
183  sigma = g11->imag;
184 
185  // Skip the current iteration if the rotation is identity.
186  if ( gamma == one && sigma == zero ) continue;
187 
188  MAC_Apply_G_mx2_ops( m_A,
189  &gamma,
190  &sigma,
191  a1, rs_A,
192  a2, rs_A );
193  }
194  }
195 
196  // Shutdown stage
197 
198  for ( j = nG - k_minus_1; j < nG; ++j )
199  {
200  nG_app = nG - j;
201 
202  for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g )
203  {
204  g11 = buff_G + (g )*rs_G + (k )*cs_G;
205  a1 = buff_A + (g )*cs_A;
206  a2 = buff_A + (g + 1)*cs_A;
207 
208  gamma = g11->real;
209  sigma = g11->imag;
210 
211  // Skip the current iteration if the rotation is identity.
212  if ( gamma == one && sigma == zero ) continue;
213 
214  MAC_Apply_G_mx2_ops( m_A,
215  &gamma,
216  &sigma,
217  a1, rs_A,
218  a2, rs_A );
219  }
220  }
221 
222  return FLA_SUCCESS;
223 }
FLA_Error FLA_Apply_G_rf_ops_var1(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var1.c:113

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_ops_var1(), scomplex::imag, and scomplex::real.

Referenced by FLA_Apply_G_rf_opt_var2().

◆ FLA_Apply_G_rf_opt_var2()

FLA_Error FLA_Apply_G_rf_opt_var2 ( FLA_Obj  G,
FLA_Obj  A 
)
32 {
33  FLA_Datatype datatype;
34  int k_G, m_A, n_A;
35  int rs_G, cs_G;
36  int rs_A, cs_A;
37 
38  datatype = FLA_Obj_datatype( A );
39 
40  k_G = FLA_Obj_width( G );
41  m_A = FLA_Obj_length( A );
42  n_A = FLA_Obj_width( A );
43 
44  rs_G = FLA_Obj_row_stride( G );
45  cs_G = FLA_Obj_col_stride( G );
46 
47  rs_A = FLA_Obj_row_stride( A );
48  cs_A = FLA_Obj_col_stride( A );
49 
50  switch ( datatype )
51  {
52  case FLA_FLOAT:
53  {
54  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
55  float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
56 
58  m_A,
59  n_A,
60  buff_G, rs_G, cs_G,
61  buff_A, rs_A, cs_A );
62 
63  break;
64  }
65 
66  case FLA_DOUBLE:
67  {
68  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
69  double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
70 
72  m_A,
73  n_A,
74  buff_G, rs_G, cs_G,
75  buff_A, rs_A, cs_A );
76 
77  break;
78  }
79 
80  case FLA_COMPLEX:
81  {
82  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
83  scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
84 
86  m_A,
87  n_A,
88  buff_G, rs_G, cs_G,
89  buff_A, rs_A, cs_A );
90 
91  break;
92  }
93 
94  case FLA_DOUBLE_COMPLEX:
95  {
96  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
97  dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
98 
100  m_A,
101  n_A,
102  buff_G, rs_G, cs_G,
103  buff_A, rs_A, cs_A );
104 
105  break;
106  }
107  }
108 
109  return FLA_SUCCESS;
110 }
FLA_Error FLA_Apply_G_rf_ops_var2(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var2.c:112
FLA_Error FLA_Apply_G_rf_opd_var2(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var2.c:225
FLA_Error FLA_Apply_G_rf_opz_var2(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var2.c:451
FLA_Error FLA_Apply_G_rf_opc_var2(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var2.c:338
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
int FLA_Datatype
Definition: FLA_type_defs.h:49

References FLA_Apply_G_rf_opc_var2(), FLA_Apply_G_rf_opd_var2(), FLA_Apply_G_rf_ops_var2(), FLA_Apply_G_rf_opz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

◆ FLA_Apply_G_rf_opz_var2()

FLA_Error FLA_Apply_G_rf_opz_var2 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)
456 {
457  double one = bl1_d1();
458  double zero = bl1_d0();
459  double gamma;
460  double sigma;
461  dcomplex* a1;
462  dcomplex* a2;
463  dcomplex* g11;
464  int j, g, k;
465  int nG, nG_app;
466  int k_minus_1;
467 
468  k_minus_1 = k_G - 1;
469  nG = n_A - 1;
470 
471  // Use the simple variant for nG < 2(k - 1).
472  if ( nG < k_minus_1 || k_G == 1 )
473  {
475  m_A,
476  n_A,
477  buff_G, rs_G, cs_G,
478  buff_A, rs_A, cs_A );
479  return FLA_SUCCESS;
480  }
481 
482 
483  // Start-up phase.
484 
485  for ( j = 0; j < k_minus_1; ++j )
486  {
487  nG_app = j + 1;
488 
489  for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g )
490  {
491  g11 = buff_G + (g )*rs_G + (k )*cs_G;
492  a1 = buff_A + (g )*cs_A;
493  a2 = buff_A + (g + 1)*cs_A;
494 
495  gamma = g11->real;
496  sigma = g11->imag;
497 
498  // Skip the current iteration if the rotation is identity.
499  if ( gamma == one && sigma == zero ) continue;
500 
501  MAC_Apply_G_mx2_opz( m_A,
502  &gamma,
503  &sigma,
504  a1, rs_A,
505  a2, rs_A );
506  }
507  }
508 
509  // Pipeline stage
510 
511  for ( j = k_minus_1; j < nG; ++j )
512  {
513  nG_app = k_G;
514 
515  for ( k = 0, g = j; k < nG_app; ++k, --g )
516  {
517  g11 = buff_G + (g )*rs_G + (k )*cs_G;
518  a1 = buff_A + (g )*cs_A;
519  a2 = buff_A + (g + 1)*cs_A;
520 
521  gamma = g11->real;
522  sigma = g11->imag;
523 
524  // Skip the current iteration if the rotation is identity.
525  if ( gamma == one && sigma == zero ) continue;
526 
527  MAC_Apply_G_mx2_opz( m_A,
528  &gamma,
529  &sigma,
530  a1, rs_A,
531  a2, rs_A );
532  }
533  }
534 
535  // Shutdown stage
536 
537  for ( j = nG - k_minus_1; j < nG; ++j )
538  {
539  nG_app = nG - j;
540 
541  for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g )
542  {
543  g11 = buff_G + (g )*rs_G + (k )*cs_G;
544  a1 = buff_A + (g )*cs_A;
545  a2 = buff_A + (g + 1)*cs_A;
546 
547  gamma = g11->real;
548  sigma = g11->imag;
549 
550  // Skip the current iteration if the rotation is identity.
551  if ( gamma == one && sigma == zero ) continue;
552 
553  MAC_Apply_G_mx2_opz( m_A,
554  &gamma,
555  &sigma,
556  a1, rs_A,
557  a2, rs_A );
558  }
559  }
560 
561  return FLA_SUCCESS;
562 }
FLA_Error FLA_Apply_G_rf_opz_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var1.c:267

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_opz_var1(), dcomplex::imag, and dcomplex::real.

Referenced by FLA_Apply_G_rf_opt_var2().