libflame  revision_anchor
Functions
FLA_LU_nopiv_opt_var2.c File Reference

(r)

Functions

FLA_Error FLA_LU_nopiv_opt_var2 (FLA_Obj A)
 
FLA_Error FLA_LU_nopiv_ops_var2 (int m_A, int n_A, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_LU_nopiv_opd_var2 (int m_A, int n_A, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_LU_nopiv_opc_var2 (int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_LU_nopiv_opz_var2 (int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A)
 

Function Documentation

◆ FLA_LU_nopiv_opc_var2()

FLA_Error FLA_LU_nopiv_opc_var2 ( int  m_A,
int  n_A,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
242 {
243  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
244  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
245  int min_m_n = min( m_A, n_A );
246  int i;
247 
248  for ( i = 0; i < min_m_n; ++i )
249  {
250  scomplex* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
251  scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
252  scomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
253  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
254  scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
255  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
256 
257  int n_ahead = n_A - i - 1;
258  int mn_behind = i;
259 
260  /*------------------------------------------------------------*/
261 
262  // FLA_Trsv_external( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, A00, a10t );
266  mn_behind,
267  A00, rs_A, cs_A,
268  a10t, cs_A );
269 
270  // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
272  mn_behind,
273  buff_m1,
274  a10t, cs_A,
275  a01, rs_A,
276  buff_1,
277  alpha11 );
278 
279  // FLA_Gemv_external( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, a10t, FLA_ONE, a12t );
282  mn_behind,
283  n_ahead,
284  buff_m1,
285  A02, rs_A, cs_A,
286  a10t, cs_A,
287  buff_1,
288  a12t, cs_A );
289 
290  /*------------------------------------------------------------*/
291 
292  }
293 
294  if ( m_A > n_A )
295  {
296  scomplex* ATL = buff_A;
297  scomplex* ABL = buff_A + n_A*rs_A;
298 
299  // FLA_Trsm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR,
300  // FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
301  // FLA_ONE, ATL, ABL );
306  m_A - n_A,
307  n_A,
308  buff_1,
309  ATL, rs_A, cs_A,
310  ABL, rs_A, cs_A );
311  }
312 
313  return FLA_SUCCESS;
314 }
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
int i
Definition: bl1_axmyv2.c:145
void bl1_cdots(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *beta, scomplex *rho)
Definition: bl1_dots.c:39
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_gemv.c:125
void bl1_ctrsm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
Definition: bl1_trsm.c:219
void bl1_ctrsv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx)
Definition: bl1_trsv.c:99
@ BLIS1_UPPER_TRIANGULAR
Definition: blis_type_defs.h:63
@ BLIS1_NONUNIT_DIAG
Definition: blis_type_defs.h:74
@ BLIS1_NO_TRANSPOSE
Definition: blis_type_defs.h:54
@ BLIS1_TRANSPOSE
Definition: blis_type_defs.h:55
@ BLIS1_NO_CONJUGATE
Definition: blis_type_defs.h:81
@ BLIS1_RIGHT
Definition: blis_type_defs.h:69
Definition: blis_type_defs.h:133

References bl1_cdots(), bl1_cgemv(), bl1_ctrsm(), bl1_ctrsv(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_NONUNIT_DIAG, BLIS1_RIGHT, BLIS1_TRANSPOSE, BLIS1_UPPER_TRIANGULAR, FLA_MINUS_ONE, FLA_ONE, and i.

Referenced by FLA_LU_nopiv_opt_var2().

◆ FLA_LU_nopiv_opd_var2()

FLA_Error FLA_LU_nopiv_opd_var2 ( int  m_A,
int  n_A,
double *  buff_A,
int  rs_A,
int  cs_A 
)
163 {
164  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
165  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
166  int min_m_n = min( m_A, n_A );
167  int i;
168 
169  for ( i = 0; i < min_m_n; ++i )
170  {
171  double* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
172  double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
173  double* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
174  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
175  double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
176  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
177 
178  int n_ahead = n_A - i - 1;
179  int mn_behind = i;
180 
181  /*------------------------------------------------------------*/
182 
183  // FLA_Trsv_external( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, A00, a10t );
187  mn_behind,
188  A00, rs_A, cs_A,
189  a10t, cs_A );
190 
191  // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
193  mn_behind,
194  buff_m1,
195  a10t, cs_A,
196  a01, rs_A,
197  buff_1,
198  alpha11 );
199 
200  // FLA_Gemv_external( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, a10t, FLA_ONE, a12t );
203  mn_behind,
204  n_ahead,
205  buff_m1,
206  A02, rs_A, cs_A,
207  a10t, cs_A,
208  buff_1,
209  a12t, cs_A );
210 
211  /*------------------------------------------------------------*/
212 
213  }
214 
215  if ( m_A > n_A )
216  {
217  double* ATL = buff_A;
218  double* ABL = buff_A + n_A*rs_A;
219 
220  // FLA_Trsm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR,
221  // FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
222  // FLA_ONE, ATL, ABL );
227  m_A - n_A,
228  n_A,
229  buff_1,
230  ATL, rs_A, cs_A,
231  ABL, rs_A, cs_A );
232  }
233 
234  return FLA_SUCCESS;
235 }
void bl1_ddots(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy, double *beta, double *rho)
Definition: bl1_dots.c:26
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_gemv.c:69
void bl1_dtrsm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
Definition: bl1_trsm.c:116
void bl1_dtrsv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, double *a, int a_rs, int a_cs, double *x, int incx)
Definition: bl1_trsv.c:56

References bl1_ddots(), bl1_dgemv(), bl1_dtrsm(), bl1_dtrsv(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_NONUNIT_DIAG, BLIS1_RIGHT, BLIS1_TRANSPOSE, BLIS1_UPPER_TRIANGULAR, FLA_MINUS_ONE, FLA_ONE, and i.

Referenced by FLA_LU_nopiv_opt_var2().

◆ FLA_LU_nopiv_ops_var2()

FLA_Error FLA_LU_nopiv_ops_var2 ( int  m_A,
int  n_A,
float *  buff_A,
int  rs_A,
int  cs_A 
)
84 {
85  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
86  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
87  int min_m_n = min( m_A, n_A );
88  int i;
89 
90  for ( i = 0; i < min_m_n; ++i )
91  {
92  float* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
93  float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
94  float* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
95  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
96  float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
97  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
98 
99  int n_ahead = n_A - i - 1;
100  int mn_behind = i;
101 
102  /*------------------------------------------------------------*/
103 
104  // FLA_Trsv_external( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, A00, a10t );
108  mn_behind,
109  A00, rs_A, cs_A,
110  a10t, cs_A );
111 
112  // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
114  mn_behind,
115  buff_m1,
116  a10t, cs_A,
117  a01, rs_A,
118  buff_1,
119  alpha11 );
120 
121  // FLA_Gemv_external( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, a10t, FLA_ONE, a12t );
124  mn_behind,
125  n_ahead,
126  buff_m1,
127  A02, rs_A, cs_A,
128  a10t, cs_A,
129  buff_1,
130  a12t, cs_A );
131 
132  /*------------------------------------------------------------*/
133 
134  }
135 
136  if ( m_A > n_A )
137  {
138  float* ATL = buff_A;
139  float* ABL = buff_A + n_A*rs_A;
140 
141  // FLA_Trsm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR,
142  // FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
143  // FLA_ONE, ATL, ABL );
148  m_A - n_A,
149  n_A,
150  buff_1,
151  ATL, rs_A, cs_A,
152  ABL, rs_A, cs_A );
153  }
154 
155  return FLA_SUCCESS;
156 }
void bl1_sdots(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy, float *beta, float *rho)
Definition: bl1_dots.c:13
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_gemv.c:13
void bl1_strsm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
Definition: bl1_trsm.c:13
void bl1_strsv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, float *a, int a_rs, int a_cs, float *x, int incx)
Definition: bl1_trsv.c:13

References bl1_sdots(), bl1_sgemv(), bl1_strsm(), bl1_strsv(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_NONUNIT_DIAG, BLIS1_RIGHT, BLIS1_TRANSPOSE, BLIS1_UPPER_TRIANGULAR, FLA_MINUS_ONE, FLA_ONE, and i.

Referenced by FLA_LU_nopiv_opt_var2().

◆ FLA_LU_nopiv_opt_var2()

FLA_Error FLA_LU_nopiv_opt_var2 ( FLA_Obj  A)
16 {
17  FLA_Datatype datatype;
18  int m_A, n_A;
19  int rs_A, cs_A;
20 
21  datatype = FLA_Obj_datatype( A );
22 
23  m_A = FLA_Obj_length( A );
24  n_A = FLA_Obj_width( A );
25  rs_A = FLA_Obj_row_stride( A );
26  cs_A = FLA_Obj_col_stride( A );
27 
28 
29  switch ( datatype )
30  {
31  case FLA_FLOAT:
32  {
33  float* buff_A = FLA_FLOAT_PTR( A );
34 
36  n_A,
37  buff_A, rs_A, cs_A );
38 
39  break;
40  }
41 
42  case FLA_DOUBLE:
43  {
44  double* buff_A = FLA_DOUBLE_PTR( A );
45 
47  n_A,
48  buff_A, rs_A, cs_A );
49 
50  break;
51  }
52 
53  case FLA_COMPLEX:
54  {
55  scomplex* buff_A = FLA_COMPLEX_PTR( A );
56 
58  n_A,
59  buff_A, rs_A, cs_A );
60 
61  break;
62  }
63 
64  case FLA_DOUBLE_COMPLEX:
65  {
66  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
67 
69  n_A,
70  buff_A, rs_A, cs_A );
71 
72  break;
73  }
74  }
75 
76  return FLA_SUCCESS;
77 }
FLA_Error FLA_LU_nopiv_opz_var2(int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_LU_nopiv_opt_var2.c:318
FLA_Error FLA_LU_nopiv_opd_var2(int m_A, int n_A, double *buff_A, int rs_A, int cs_A)
Definition: FLA_LU_nopiv_opt_var2.c:160
FLA_Error FLA_LU_nopiv_opc_var2(int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_LU_nopiv_opt_var2.c:239
FLA_Error FLA_LU_nopiv_ops_var2(int m_A, int n_A, float *buff_A, int rs_A, int cs_A)
Definition: FLA_LU_nopiv_opt_var2.c:81
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
int FLA_Datatype
Definition: FLA_type_defs.h:49
Definition: blis_type_defs.h:138

References FLA_LU_nopiv_opc_var2(), FLA_LU_nopiv_opd_var2(), FLA_LU_nopiv_ops_var2(), FLA_LU_nopiv_opz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

Referenced by FLA_LU_nopiv_internal().

◆ FLA_LU_nopiv_opz_var2()

FLA_Error FLA_LU_nopiv_opz_var2 ( int  m_A,
int  n_A,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)
321 {
322  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
323  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
324  int min_m_n = min( m_A, n_A );
325  int i;
326 
327  for ( i = 0; i < min_m_n; ++i )
328  {
329  dcomplex* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
330  dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
331  dcomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
332  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
333  dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
334  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
335 
336  int n_ahead = n_A - i - 1;
337  int mn_behind = i;
338 
339  /*------------------------------------------------------------*/
340 
341  // FLA_Trsv_external( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, A00, a10t );
345  mn_behind,
346  A00, rs_A, cs_A,
347  a10t, cs_A );
348 
349  // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
351  mn_behind,
352  buff_m1,
353  a10t, cs_A,
354  a01, rs_A,
355  buff_1,
356  alpha11 );
357 
358  // FLA_Gemv_external( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, a10t, FLA_ONE, a12t );
361  mn_behind,
362  n_ahead,
363  buff_m1,
364  A02, rs_A, cs_A,
365  a10t, cs_A,
366  buff_1,
367  a12t, cs_A );
368 
369  /*------------------------------------------------------------*/
370 
371  }
372 
373  if ( m_A > n_A )
374  {
375  dcomplex* ATL = buff_A;
376  dcomplex* ABL = buff_A + n_A*rs_A;
377 
378  // FLA_Trsm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR,
379  // FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
380  // FLA_ONE, ATL, ABL );
385  m_A - n_A,
386  n_A,
387  buff_1,
388  ATL, rs_A, cs_A,
389  ABL, rs_A, cs_A );
390  }
391 
392  return FLA_SUCCESS;
393 }
void bl1_zdots(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *beta, dcomplex *rho)
Definition: bl1_dots.c:56
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_gemv.c:255
void bl1_ztrsm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition: bl1_trsm.c:369
void bl1_ztrsv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx)
Definition: bl1_trsv.c:177

References bl1_zdots(), bl1_zgemv(), bl1_ztrsm(), bl1_ztrsv(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_NONUNIT_DIAG, BLIS1_RIGHT, BLIS1_TRANSPOSE, BLIS1_UPPER_TRIANGULAR, FLA_MINUS_ONE, FLA_ONE, and i.

Referenced by FLA_LU_nopiv_opt_var2().