libflame  revision_anchor
Functions
FLA_LU_piv_opt_var3.c File Reference

(r)

Functions

FLA_Error FLA_LU_piv_opt_var3 (FLA_Obj A, FLA_Obj p)
 
FLA_Error FLA_LU_piv_ops_var3 (int m_A, int n_A, float *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
 
FLA_Error FLA_LU_piv_opd_var3 (int m_A, int n_A, double *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
 
FLA_Error FLA_LU_piv_opc_var3 (int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
 
FLA_Error FLA_LU_piv_opz_var3 (int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
 

Function Documentation

◆ FLA_LU_piv_opc_var3()

FLA_Error FLA_LU_piv_opc_var3 ( int  m_A,
int  n_A,
scomplex buff_A,
int  rs_A,
int  cs_A,
int *  buff_p,
int  inc_p 
)
371 {
372  FLA_Error r_val = FLA_SUCCESS;
373  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
374  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
375  int min_m_n = min( m_A, n_A );
376  int i;
377 
378  for ( i = 0; i < min_m_n; ++i )
379  {
380  scomplex pivot_val = czero;
381  scomplex* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
382  scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
383  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
384  scomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
385  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
386  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
387 
388  scomplex* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
389 
390  int* p0 = buff_p;
391  int* pi1 = buff_p + i*inc_p;
392 
393  int m_ahead = m_A - i - 1;
394  int mn_behind = i;
395 
396  /*------------------------------------------------------------*/
397 
398  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, p0, a1 );
400  a1, rs_A, cs_A,
401  0,
402  mn_behind - 1,
403  p0, inc_p );
404 
405  // FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, A00, a01 );
409  mn_behind,
410  A00, rs_A, cs_A,
411  a01, rs_A );
412 
413  // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
415  mn_behind,
416  buff_m1,
417  a10t, cs_A,
418  a01, rs_A,
419  buff_1,
420  alpha11 );
421 
422  // FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
425  m_ahead,
426  mn_behind,
427  buff_m1,
428  A20, rs_A, cs_A,
429  a01, rs_A,
430  buff_1,
431  a21, rs_A );
432 
433  // FLA_Merge_2x1( alpha11,
434  // a21, &aB1 );
435 
436  // FLA_Amax_external( aB1, pi1 );
437  bl1_camax( m_ahead + 1,
438  alpha11, rs_A,
439  pi1 );
440 
441  // If a null pivot is encountered, return the index.
442  pivot_val =*(alpha11 + *pi1);
443  if ( pivot_val.real == czero.real &&
444  pivot_val.imag == czero.imag ) r_val = ( r_val == FLA_SUCCESS ? i : r_val );
445  else
446  {
447  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, aB1 );
449  alpha11, rs_A, cs_A,
450  0,
451  0,
452  pi1, inc_p );
453 
454  // FLA_Inv_scal_external( alpha11, a21 );
456  m_ahead,
457  alpha11,
458  a21, rs_A );
459 
460  // FLA_Merge_2x1( a10t,
461  // A20, &AB0 );
462 
463  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB0 );
464  FLA_Apply_pivots_ln_opc_var1( mn_behind,
465  a10t, rs_A, cs_A,
466  0,
467  0,
468  pi1, inc_p );
469  }
470  /*------------------------------------------------------------*/
471 
472  }
473 
474  if ( m_A < n_A )
475  {
476  scomplex* ATL = buff_A;
477  scomplex* ATR = buff_A + m_A*cs_A;
478 
479  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, p, ATR );
480  FLA_Apply_pivots_ln_opc_var1( n_A - m_A,
481  ATR, rs_A, cs_A,
482  0,
483  m_A - 1,
484  buff_p, inc_p );
485 
486  // FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR,
487  // FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
488  // FLA_ONE, ATL, ATR );
493  m_A,
494  n_A - m_A,
495  buff_1,
496  ATL, rs_A, cs_A,
497  ATR, rs_A, cs_A );
498  }
499 
500  return r_val;
501 }
FLA_Error FLA_Apply_pivots_ln_opc_var1(int n, scomplex *a, int a_rs, int a_cs, int k1, int k2, int *p, int incp)
Definition: FLA_Apply_pivots_ln_opt_var1.c:356
const scomplex czero
Definition: FLA_Init.c:38
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
int FLA_Error
Definition: FLA_type_defs.h:47
void bl1_camax(int n, scomplex *x, int incx, int *index)
Definition: bl1_amax.c:35
int i
Definition: bl1_axmyv2.c:145
void bl1_cdots(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *beta, scomplex *rho)
Definition: bl1_dots.c:39
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_gemv.c:125
void bl1_cinvscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition: bl1_invscalv.c:52
void bl1_ctrsm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
Definition: bl1_trsm.c:219
void bl1_ctrsv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx)
Definition: bl1_trsv.c:99
@ BLIS1_LOWER_TRIANGULAR
Definition: blis_type_defs.h:62
@ BLIS1_UNIT_DIAG
Definition: blis_type_defs.h:75
@ BLIS1_NO_TRANSPOSE
Definition: blis_type_defs.h:54
@ BLIS1_NO_CONJUGATE
Definition: blis_type_defs.h:81
@ BLIS1_LEFT
Definition: blis_type_defs.h:68
Definition: blis_type_defs.h:133
float imag
Definition: blis_type_defs.h:134
float real
Definition: blis_type_defs.h:134

References bl1_camax(), bl1_cdots(), bl1_cgemv(), bl1_cinvscalv(), bl1_ctrsm(), bl1_ctrsv(), BLIS1_LEFT, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_UNIT_DIAG, czero, FLA_Apply_pivots_ln_opc_var1(), FLA_MINUS_ONE, FLA_ONE, i, scomplex::imag, and scomplex::real.

Referenced by FLA_LU_piv_opt_var3().

◆ FLA_LU_piv_opd_var3()

FLA_Error FLA_LU_piv_opd_var3 ( int  m_A,
int  n_A,
double *  buff_A,
int  rs_A,
int  cs_A,
int *  buff_p,
int  inc_p 
)
234 {
235  FLA_Error r_val = FLA_SUCCESS;
236  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
237  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
238  int min_m_n = min( m_A, n_A );
239  int i;
240 
241  for ( i = 0; i < min_m_n; ++i )
242  {
243  double pivot_val = dzero;
244  double* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
245  double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
246  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
247  double* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
248  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
249  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
250 
251  double* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
252 
253  int* p0 = buff_p;
254  int* pi1 = buff_p + i*inc_p;
255 
256  int m_ahead = m_A - i - 1;
257  int mn_behind = i;
258 
259  /*------------------------------------------------------------*/
260 
261  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, p0, a1 );
263  a1, rs_A, cs_A,
264  0,
265  mn_behind - 1,
266  p0, inc_p );
267 
268  // FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, A00, a01 );
272  mn_behind,
273  A00, rs_A, cs_A,
274  a01, rs_A );
275 
276  // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
278  mn_behind,
279  buff_m1,
280  a10t, cs_A,
281  a01, rs_A,
282  buff_1,
283  alpha11 );
284 
285  // FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
288  m_ahead,
289  mn_behind,
290  buff_m1,
291  A20, rs_A, cs_A,
292  a01, rs_A,
293  buff_1,
294  a21, rs_A );
295 
296  // FLA_Merge_2x1( alpha11,
297  // a21, &aB1 );
298 
299  // FLA_Amax_external( aB1, pi1 );
300  bl1_damax( m_ahead + 1,
301  alpha11, rs_A,
302  pi1 );
303 
304  // If a null pivot is encountered, return the index.
305  pivot_val =*(alpha11 + *pi1);
306  if ( pivot_val == dzero ) r_val = ( r_val == FLA_SUCCESS ? i : r_val );
307  else
308  {
309  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, aB1 );
311  alpha11, rs_A, cs_A,
312  0,
313  0,
314  pi1, inc_p );
315 
316  // FLA_Inv_scal_external( alpha11, a21 );
318  m_ahead,
319  alpha11,
320  a21, rs_A );
321 
322  // FLA_Merge_2x1( a10t,
323  // A20, &AB0 );
324 
325  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB0 );
326  FLA_Apply_pivots_ln_opd_var1( mn_behind,
327  a10t, rs_A, cs_A,
328  0,
329  0,
330  pi1, inc_p );
331  }
332  /*------------------------------------------------------------*/
333 
334  }
335 
336  if ( m_A < n_A )
337  {
338  double* ATL = buff_A;
339  double* ATR = buff_A + m_A*cs_A;
340 
341  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, p, ATR );
342  FLA_Apply_pivots_ln_opd_var1( n_A - m_A,
343  ATR, rs_A, cs_A,
344  0,
345  m_A - 1,
346  buff_p, inc_p );
347 
348  // FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR,
349  // FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
350  // FLA_ONE, ATL, ATR );
355  m_A,
356  n_A - m_A,
357  buff_1,
358  ATL, rs_A, cs_A,
359  ATR, rs_A, cs_A );
360  }
361 
362  return r_val;
363 }
FLA_Error FLA_Apply_pivots_ln_opd_var1(int n, double *a, int a_rs, int a_cs, int k1, int k2, int *p, int incp)
Definition: FLA_Apply_pivots_ln_opt_var1.c:274
const double dzero
Definition: FLA_Init.c:37
void bl1_damax(int n, double *x, int incx, int *index)
Definition: bl1_amax.c:24
void bl1_ddots(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy, double *beta, double *rho)
Definition: bl1_dots.c:26
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_gemv.c:69
void bl1_dinvscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition: bl1_invscalv.c:26
void bl1_dtrsm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
Definition: bl1_trsm.c:116
void bl1_dtrsv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, double *a, int a_rs, int a_cs, double *x, int incx)
Definition: bl1_trsv.c:56

References bl1_damax(), bl1_ddots(), bl1_dgemv(), bl1_dinvscalv(), bl1_dtrsm(), bl1_dtrsv(), BLIS1_LEFT, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_UNIT_DIAG, dzero, FLA_Apply_pivots_ln_opd_var1(), FLA_MINUS_ONE, FLA_ONE, and i.

Referenced by FLA_LU_piv_opt_var3().

◆ FLA_LU_piv_ops_var3()

FLA_Error FLA_LU_piv_ops_var3 ( int  m_A,
int  n_A,
float *  buff_A,
int  rs_A,
int  cs_A,
int *  buff_p,
int  inc_p 
)
97 {
98  FLA_Error r_val = FLA_SUCCESS;
99  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
100  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
101  int min_m_n = min( m_A, n_A );
102  int i;
103 
104  for ( i = 0; i < min_m_n; ++i )
105  {
106  float pivot_val = fzero;
107  float* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
108  float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
109  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
110  float* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
111  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
112  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
113 
114  float* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
115 
116  int* p0 = buff_p;
117  int* pi1 = buff_p + i*inc_p;
118 
119  int m_ahead = m_A - i - 1;
120  int mn_behind = i;
121 
122  /*------------------------------------------------------------*/
123 
124  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, p0, a1 );
126  a1, rs_A, cs_A,
127  0,
128  mn_behind - 1,
129  p0, inc_p );
130 
131  // FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, A00, a01 );
135  mn_behind,
136  A00, rs_A, cs_A,
137  a01, rs_A );
138 
139  // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
141  mn_behind,
142  buff_m1,
143  a10t, cs_A,
144  a01, rs_A,
145  buff_1,
146  alpha11 );
147 
148  // FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
151  m_ahead,
152  mn_behind,
153  buff_m1,
154  A20, rs_A, cs_A,
155  a01, rs_A,
156  buff_1,
157  a21, rs_A );
158 
159  // FLA_Merge_2x1( alpha11,
160  // a21, &aB1 );
161 
162  // FLA_Amax_external( aB1, pi1 );
163  bl1_samax( m_ahead + 1,
164  alpha11, rs_A,
165  pi1 );
166 
167  // If a null pivot is encountered, return the index.
168  pivot_val = *(alpha11 + *pi1);
169  if ( pivot_val == fzero ) r_val = ( r_val == FLA_SUCCESS ? i : r_val );
170  else
171  {
172  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, aB1 );
174  alpha11, rs_A, cs_A,
175  0,
176  0,
177  pi1, inc_p );
178 
179  // FLA_Inv_scal_external( alpha11, a21 );
181  m_ahead,
182  alpha11,
183  a21, rs_A );
184 
185  // FLA_Merge_2x1( a10t,
186  // A20, &AB0 );
187 
188  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB0 );
189  FLA_Apply_pivots_ln_ops_var1( mn_behind,
190  a10t, rs_A, cs_A,
191  0,
192  0,
193  pi1, inc_p );
194  }
195  /*------------------------------------------------------------*/
196 
197  }
198 
199  if ( m_A < n_A )
200  {
201  float* ATL = buff_A;
202  float* ATR = buff_A + m_A*cs_A;
203 
204  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, p, ATR );
205  FLA_Apply_pivots_ln_ops_var1( n_A - m_A,
206  ATR, rs_A, cs_A,
207  0,
208  m_A - 1,
209  buff_p, inc_p );
210 
211  // FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR,
212  // FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
213  // FLA_ONE, ATL, ATR );
218  m_A,
219  n_A - m_A,
220  buff_1,
221  ATL, rs_A, cs_A,
222  ATR, rs_A, cs_A );
223  }
224 
225  return r_val;
226 }
FLA_Error FLA_Apply_pivots_ln_ops_var1(int n, float *a, int a_rs, int a_cs, int k1, int k2, int *p, int incp)
Definition: FLA_Apply_pivots_ln_opt_var1.c:192
const float fzero
Definition: FLA_Init.c:36
void bl1_samax(int n, float *x, int incx, int *index)
Definition: bl1_amax.c:13
void bl1_sdots(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy, float *beta, float *rho)
Definition: bl1_dots.c:13
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_gemv.c:13
void bl1_sinvscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition: bl1_invscalv.c:13
void bl1_strsm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
Definition: bl1_trsm.c:13
void bl1_strsv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, float *a, int a_rs, int a_cs, float *x, int incx)
Definition: bl1_trsv.c:13

References bl1_samax(), bl1_sdots(), bl1_sgemv(), bl1_sinvscalv(), bl1_strsm(), bl1_strsv(), BLIS1_LEFT, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_UNIT_DIAG, FLA_Apply_pivots_ln_ops_var1(), FLA_MINUS_ONE, FLA_ONE, fzero, and i.

Referenced by FLA_LU_piv_opt_var3().

◆ FLA_LU_piv_opt_var3()

FLA_Error FLA_LU_piv_opt_var3 ( FLA_Obj  A,
FLA_Obj  p 
)
16 {
17  FLA_Error r_val = FLA_SUCCESS;
18  FLA_Datatype datatype;
19  int m_A, n_A;
20  int rs_A, cs_A;
21  int inc_p;
22 
23  datatype = FLA_Obj_datatype( A );
24 
25  m_A = FLA_Obj_length( A );
26  n_A = FLA_Obj_width( A );
27  rs_A = FLA_Obj_row_stride( A );
28  cs_A = FLA_Obj_col_stride( A );
29 
30  inc_p = FLA_Obj_vector_inc( p );
31 
32 
33  switch ( datatype )
34  {
35  case FLA_FLOAT:
36  {
37  float* buff_A = FLA_FLOAT_PTR( A );
38  int* buff_p = FLA_INT_PTR( p );
39 
40  r_val = FLA_LU_piv_ops_var3( m_A,
41  n_A,
42  buff_A, rs_A, cs_A,
43  buff_p, inc_p );
44 
45  break;
46  }
47 
48  case FLA_DOUBLE:
49  {
50  double* buff_A = FLA_DOUBLE_PTR( A );
51  int* buff_p = FLA_INT_PTR( p );
52 
53  r_val = FLA_LU_piv_opd_var3( m_A,
54  n_A,
55  buff_A, rs_A, cs_A,
56  buff_p, inc_p );
57 
58  break;
59  }
60 
61  case FLA_COMPLEX:
62  {
63  scomplex* buff_A = FLA_COMPLEX_PTR( A );
64  int* buff_p = FLA_INT_PTR( p );
65 
66  r_val = FLA_LU_piv_opc_var3( m_A,
67  n_A,
68  buff_A, rs_A, cs_A,
69  buff_p, inc_p );
70 
71  break;
72  }
73 
74  case FLA_DOUBLE_COMPLEX:
75  {
76  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
77  int* buff_p = FLA_INT_PTR( p );
78 
79  r_val = FLA_LU_piv_opz_var3( m_A,
80  n_A,
81  buff_A, rs_A, cs_A,
82  buff_p, inc_p );
83 
84  break;
85  }
86  }
87 
88  return r_val;
89 }
FLA_Error FLA_LU_piv_opz_var3(int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
Definition: FLA_LU_piv_opt_var3.c:505
FLA_Error FLA_LU_piv_opc_var3(int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
Definition: FLA_LU_piv_opt_var3.c:367
FLA_Error FLA_LU_piv_opd_var3(int m_A, int n_A, double *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
Definition: FLA_LU_piv_opt_var3.c:230
FLA_Error FLA_LU_piv_ops_var3(int m_A, int n_A, float *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
Definition: FLA_LU_piv_opt_var3.c:93
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition: FLA_Query.c:145
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
int FLA_Datatype
Definition: FLA_type_defs.h:49
Definition: blis_type_defs.h:138

References FLA_LU_piv_opc_var3(), FLA_LU_piv_opd_var3(), FLA_LU_piv_ops_var3(), FLA_LU_piv_opz_var3(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_Obj_width().

Referenced by FLA_LU_piv_internal().

◆ FLA_LU_piv_opz_var3()

FLA_Error FLA_LU_piv_opz_var3 ( int  m_A,
int  n_A,
dcomplex buff_A,
int  rs_A,
int  cs_A,
int *  buff_p,
int  inc_p 
)
509 {
510  FLA_Error r_val = FLA_SUCCESS;
511  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
512  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
513  int min_m_n = min( m_A, n_A );
514  int i;
515 
516  for ( i = 0; i < min_m_n; ++i )
517  {
518  dcomplex pivot_val = zzero;
519  dcomplex* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
520  dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
521  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
522  dcomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
523  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
524  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
525 
526  dcomplex* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
527 
528  int* p0 = buff_p;
529  int* pi1 = buff_p + i*inc_p;
530 
531  int m_ahead = m_A - i - 1;
532  int mn_behind = i;
533 
534  /*------------------------------------------------------------*/
535 
536  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, p0, a1 );
538  a1, rs_A, cs_A,
539  0,
540  mn_behind - 1,
541  p0, inc_p );
542 
543  // FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, A00, a01 );
547  mn_behind,
548  A00, rs_A, cs_A,
549  a01, rs_A );
550 
551  // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
553  mn_behind,
554  buff_m1,
555  a10t, cs_A,
556  a01, rs_A,
557  buff_1,
558  alpha11 );
559 
560  // FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
563  m_ahead,
564  mn_behind,
565  buff_m1,
566  A20, rs_A, cs_A,
567  a01, rs_A,
568  buff_1,
569  a21, rs_A );
570 
571  // FLA_Merge_2x1( alpha11,
572  // a21, &aB1 );
573 
574  // FLA_Amax_external( aB1, pi1 );
575  bl1_zamax( m_ahead + 1,
576  alpha11, rs_A,
577  pi1 );
578 
579  // If a null pivot is encountered, return the index.
580  pivot_val =*(alpha11 + *pi1);
581  if ( pivot_val.real == zzero.real &&
582  pivot_val.imag == zzero.imag ) r_val = ( r_val == FLA_SUCCESS ? i : r_val );
583  else
584  {
585  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, aB1 );
587  alpha11, rs_A, cs_A,
588  0,
589  0,
590  pi1, inc_p );
591 
592  // FLA_Inv_scal_external( alpha11, a21 );
594  m_ahead,
595  alpha11,
596  a21, rs_A );
597 
598  // FLA_Merge_2x1( a10t,
599  // A20, &AB0 );
600 
601  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB0 );
602  FLA_Apply_pivots_ln_opz_var1( mn_behind,
603  a10t, rs_A, cs_A,
604  0,
605  0,
606  pi1, inc_p );
607  }
608  /*------------------------------------------------------------*/
609 
610  }
611 
612  if ( m_A < n_A )
613  {
614  dcomplex* ATL = buff_A;
615  dcomplex* ATR = buff_A + m_A*cs_A;
616 
617  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, p, ATR );
618  FLA_Apply_pivots_ln_opz_var1( n_A - m_A,
619  ATR, rs_A, cs_A,
620  0,
621  m_A - 1,
622  buff_p, inc_p );
623 
624  // FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR,
625  // FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
626  // FLA_ONE, ATL, ATR );
631  m_A,
632  n_A - m_A,
633  buff_1,
634  ATL, rs_A, cs_A,
635  ATR, rs_A, cs_A );
636  }
637 
638  return r_val;
639 }
FLA_Error FLA_Apply_pivots_ln_opz_var1(int n, dcomplex *a, int a_rs, int a_cs, int k1, int k2, int *p, int incp)
Definition: FLA_Apply_pivots_ln_opt_var1.c:438
const dcomplex zzero
Definition: FLA_Init.c:39
void bl1_zamax(int n, dcomplex *x, int incx, int *index)
Definition: bl1_amax.c:46
void bl1_zdots(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *beta, dcomplex *rho)
Definition: bl1_dots.c:56
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_gemv.c:255
void bl1_zinvscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition: bl1_invscalv.c:78
void bl1_ztrsm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition: bl1_trsm.c:369
void bl1_ztrsv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx)
Definition: bl1_trsv.c:177
double real
Definition: blis_type_defs.h:139
double imag
Definition: blis_type_defs.h:139

References bl1_zamax(), bl1_zdots(), bl1_zgemv(), bl1_zinvscalv(), bl1_ztrsm(), bl1_ztrsv(), BLIS1_LEFT, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_UNIT_DIAG, FLA_Apply_pivots_ln_opz_var1(), FLA_MINUS_ONE, FLA_ONE, i, dcomplex::imag, dcomplex::real, and zzero.

Referenced by FLA_LU_piv_opt_var3().