libflame  revision_anchor
Functions
FLA_LU_nopiv_opt_var3.c File Reference

(r)

Functions

FLA_Error FLA_LU_nopiv_opt_var3 (FLA_Obj A)
 
FLA_Error FLA_LU_nopiv_ops_var3 (int m_A, int n_A, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_LU_nopiv_opd_var3 (int m_A, int n_A, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_LU_nopiv_opc_var3 (int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_LU_nopiv_opz_var3 (int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A)
 

Function Documentation

◆ FLA_LU_nopiv_opc_var3()

FLA_Error FLA_LU_nopiv_opc_var3 ( int  m_A,
int  n_A,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
254 {
255  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
256  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
257  int min_m_n = min( m_A, n_A );
258  int i;
259 
260  for ( i = 0; i < min_m_n; ++i )
261  {
262  scomplex* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
263  scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
264  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
265  scomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
266  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
267  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
268 
269  int m_ahead = m_A - i - 1;
270  int mn_behind = i;
271 
272  /*------------------------------------------------------------*/
273 
274  // FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, A00, a01 );
278  mn_behind,
279  A00, rs_A, cs_A,
280  a01, rs_A );
281 
282  // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
284  mn_behind,
285  buff_m1,
286  a10t, cs_A,
287  a01, rs_A,
288  buff_1,
289  alpha11 );
290 
291  // FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
294  m_ahead,
295  mn_behind,
296  buff_m1,
297  A20, rs_A, cs_A,
298  a01, rs_A,
299  buff_1,
300  a21, rs_A );
301 
302  // FLA_Inv_scal_external( alpha11, a21 );
304  m_ahead,
305  alpha11,
306  a21, rs_A );
307 
308  /*------------------------------------------------------------*/
309 
310  }
311 
312  if ( m_A < n_A )
313  {
314  scomplex* ATL = buff_A;
315  scomplex* ATR = buff_A + m_A*cs_A;
316 
317  // FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR,
318  // FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
319  // FLA_ONE, ATL, ATR );
324  m_A,
325  n_A - m_A,
326  buff_1,
327  ATL, rs_A, cs_A,
328  ATR, rs_A, cs_A );
329  }
330 
331  return FLA_SUCCESS;
332 }
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
int i
Definition: bl1_axmyv2.c:145
void bl1_cdots(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *beta, scomplex *rho)
Definition: bl1_dots.c:39
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_gemv.c:125
void bl1_cinvscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition: bl1_invscalv.c:52
void bl1_ctrsm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
Definition: bl1_trsm.c:219
void bl1_ctrsv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx)
Definition: bl1_trsv.c:99
@ BLIS1_LOWER_TRIANGULAR
Definition: blis_type_defs.h:62
@ BLIS1_UNIT_DIAG
Definition: blis_type_defs.h:75
@ BLIS1_NO_TRANSPOSE
Definition: blis_type_defs.h:54
@ BLIS1_NO_CONJUGATE
Definition: blis_type_defs.h:81
@ BLIS1_LEFT
Definition: blis_type_defs.h:68
Definition: blis_type_defs.h:133

References bl1_cdots(), bl1_cgemv(), bl1_cinvscalv(), bl1_ctrsm(), bl1_ctrsv(), BLIS1_LEFT, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_UNIT_DIAG, FLA_MINUS_ONE, FLA_ONE, and i.

Referenced by FLA_LU_nopiv_opt_var3().

◆ FLA_LU_nopiv_opd_var3()

FLA_Error FLA_LU_nopiv_opd_var3 ( int  m_A,
int  n_A,
double *  buff_A,
int  rs_A,
int  cs_A 
)
169 {
170  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
171  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
172  int min_m_n = min( m_A, n_A );
173  int i;
174 
175  for ( i = 0; i < min_m_n; ++i )
176  {
177  double* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
178  double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
179  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
180  double* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
181  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
182  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
183 
184  int m_ahead = m_A - i - 1;
185  int mn_behind = i;
186 
187  /*------------------------------------------------------------*/
188 
189  // FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, A00, a01 );
193  mn_behind,
194  A00, rs_A, cs_A,
195  a01, rs_A );
196 
197  // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
199  mn_behind,
200  buff_m1,
201  a10t, cs_A,
202  a01, rs_A,
203  buff_1,
204  alpha11 );
205 
206  // FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
209  m_ahead,
210  mn_behind,
211  buff_m1,
212  A20, rs_A, cs_A,
213  a01, rs_A,
214  buff_1,
215  a21, rs_A );
216 
217  // FLA_Inv_scal_external( alpha11, a21 );
219  m_ahead,
220  alpha11,
221  a21, rs_A );
222 
223  /*------------------------------------------------------------*/
224 
225  }
226 
227  if ( m_A < n_A )
228  {
229  double* ATL = buff_A;
230  double* ATR = buff_A + m_A*cs_A;
231 
232  // FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR,
233  // FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
234  // FLA_ONE, ATL, ATR );
239  m_A,
240  n_A - m_A,
241  buff_1,
242  ATL, rs_A, cs_A,
243  ATR, rs_A, cs_A );
244  }
245 
246  return FLA_SUCCESS;
247 }
void bl1_ddots(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy, double *beta, double *rho)
Definition: bl1_dots.c:26
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_gemv.c:69
void bl1_dinvscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition: bl1_invscalv.c:26
void bl1_dtrsm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
Definition: bl1_trsm.c:116
void bl1_dtrsv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, double *a, int a_rs, int a_cs, double *x, int incx)
Definition: bl1_trsv.c:56

References bl1_ddots(), bl1_dgemv(), bl1_dinvscalv(), bl1_dtrsm(), bl1_dtrsv(), BLIS1_LEFT, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_UNIT_DIAG, FLA_MINUS_ONE, FLA_ONE, and i.

Referenced by FLA_LU_nopiv_opt_var3().

◆ FLA_LU_nopiv_ops_var3()

FLA_Error FLA_LU_nopiv_ops_var3 ( int  m_A,
int  n_A,
float *  buff_A,
int  rs_A,
int  cs_A 
)
84 {
85  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
86  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
87  int min_m_n = min( m_A, n_A );
88  int i;
89 
90  for ( i = 0; i < min_m_n; ++i )
91  {
92  float* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
93  float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
94  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
95  float* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
96  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
97  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
98 
99  int m_ahead = m_A - i - 1;
100  int mn_behind = i;
101 
102  /*------------------------------------------------------------*/
103 
104  // FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, A00, a01 );
108  mn_behind,
109  A00, rs_A, cs_A,
110  a01, rs_A );
111 
112  // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
114  mn_behind,
115  buff_m1,
116  a10t, cs_A,
117  a01, rs_A,
118  buff_1,
119  alpha11 );
120 
121  // FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
124  m_ahead,
125  mn_behind,
126  buff_m1,
127  A20, rs_A, cs_A,
128  a01, rs_A,
129  buff_1,
130  a21, rs_A );
131 
132  // FLA_Inv_scal_external( alpha11, a21 );
134  m_ahead,
135  alpha11,
136  a21, rs_A );
137 
138  /*------------------------------------------------------------*/
139 
140  }
141 
142  if ( m_A < n_A )
143  {
144  float* ATL = buff_A;
145  float* ATR = buff_A + m_A*cs_A;
146 
147  // FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR,
148  // FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
149  // FLA_ONE, ATL, ATR );
154  m_A,
155  n_A - m_A,
156  buff_1,
157  ATL, rs_A, cs_A,
158  ATR, rs_A, cs_A );
159  }
160 
161  return FLA_SUCCESS;
162 }
void bl1_sdots(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy, float *beta, float *rho)
Definition: bl1_dots.c:13
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_gemv.c:13
void bl1_sinvscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition: bl1_invscalv.c:13
void bl1_strsm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
Definition: bl1_trsm.c:13
void bl1_strsv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, float *a, int a_rs, int a_cs, float *x, int incx)
Definition: bl1_trsv.c:13

References bl1_sdots(), bl1_sgemv(), bl1_sinvscalv(), bl1_strsm(), bl1_strsv(), BLIS1_LEFT, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_UNIT_DIAG, FLA_MINUS_ONE, FLA_ONE, and i.

Referenced by FLA_LU_nopiv_opt_var3().

◆ FLA_LU_nopiv_opt_var3()

FLA_Error FLA_LU_nopiv_opt_var3 ( FLA_Obj  A)
16 {
17  FLA_Datatype datatype;
18  int m_A, n_A;
19  int rs_A, cs_A;
20 
21  datatype = FLA_Obj_datatype( A );
22 
23  m_A = FLA_Obj_length( A );
24  n_A = FLA_Obj_width( A );
25  rs_A = FLA_Obj_row_stride( A );
26  cs_A = FLA_Obj_col_stride( A );
27 
28 
29  switch ( datatype )
30  {
31  case FLA_FLOAT:
32  {
33  float* buff_A = FLA_FLOAT_PTR( A );
34 
36  n_A,
37  buff_A, rs_A, cs_A );
38 
39  break;
40  }
41 
42  case FLA_DOUBLE:
43  {
44  double* buff_A = FLA_DOUBLE_PTR( A );
45 
47  n_A,
48  buff_A, rs_A, cs_A );
49 
50  break;
51  }
52 
53  case FLA_COMPLEX:
54  {
55  scomplex* buff_A = FLA_COMPLEX_PTR( A );
56 
58  n_A,
59  buff_A, rs_A, cs_A );
60 
61  break;
62  }
63 
64  case FLA_DOUBLE_COMPLEX:
65  {
66  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
67 
69  n_A,
70  buff_A, rs_A, cs_A );
71 
72  break;
73  }
74  }
75 
76  return FLA_SUCCESS;
77 }
FLA_Error FLA_LU_nopiv_opz_var3(int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_LU_nopiv_opt_var3.c:336
FLA_Error FLA_LU_nopiv_ops_var3(int m_A, int n_A, float *buff_A, int rs_A, int cs_A)
Definition: FLA_LU_nopiv_opt_var3.c:81
FLA_Error FLA_LU_nopiv_opc_var3(int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_LU_nopiv_opt_var3.c:251
FLA_Error FLA_LU_nopiv_opd_var3(int m_A, int n_A, double *buff_A, int rs_A, int cs_A)
Definition: FLA_LU_nopiv_opt_var3.c:166
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
int FLA_Datatype
Definition: FLA_type_defs.h:49
Definition: blis_type_defs.h:138

References FLA_LU_nopiv_opc_var3(), FLA_LU_nopiv_opd_var3(), FLA_LU_nopiv_ops_var3(), FLA_LU_nopiv_opz_var3(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

Referenced by FLA_LU_nopiv_internal().

◆ FLA_LU_nopiv_opz_var3()

FLA_Error FLA_LU_nopiv_opz_var3 ( int  m_A,
int  n_A,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)
339 {
340  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
341  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
342  int min_m_n = min( m_A, n_A );
343  int i;
344 
345  for ( i = 0; i < min_m_n; ++i )
346  {
347  dcomplex* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
348  dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
349  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
350  dcomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
351  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
352  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
353 
354  int m_ahead = m_A - i - 1;
355  int mn_behind = i;
356 
357  /*------------------------------------------------------------*/
358 
359  // FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, A00, a01 );
363  mn_behind,
364  A00, rs_A, cs_A,
365  a01, rs_A );
366 
367  // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
369  mn_behind,
370  buff_m1,
371  a10t, cs_A,
372  a01, rs_A,
373  buff_1,
374  alpha11 );
375 
376  // FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
379  m_ahead,
380  mn_behind,
381  buff_m1,
382  A20, rs_A, cs_A,
383  a01, rs_A,
384  buff_1,
385  a21, rs_A );
386 
387  // FLA_Inv_scal_external( alpha11, a21 );
389  m_ahead,
390  alpha11,
391  a21, rs_A );
392 
393  /*------------------------------------------------------------*/
394 
395  }
396 
397  if ( m_A < n_A )
398  {
399  dcomplex* ATL = buff_A;
400  dcomplex* ATR = buff_A + m_A*cs_A;
401 
402  // FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR,
403  // FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
404  // FLA_ONE, ATL, ATR );
409  m_A,
410  n_A - m_A,
411  buff_1,
412  ATL, rs_A, cs_A,
413  ATR, rs_A, cs_A );
414  }
415 
416  return FLA_SUCCESS;
417 }
void bl1_zdots(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *beta, dcomplex *rho)
Definition: bl1_dots.c:56
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_gemv.c:255
void bl1_zinvscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition: bl1_invscalv.c:78
void bl1_ztrsm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition: bl1_trsm.c:369
void bl1_ztrsv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx)
Definition: bl1_trsv.c:177

References bl1_zdots(), bl1_zgemv(), bl1_zinvscalv(), bl1_ztrsm(), bl1_ztrsv(), BLIS1_LEFT, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_UNIT_DIAG, FLA_MINUS_ONE, FLA_ONE, and i.

Referenced by FLA_LU_nopiv_opt_var3().