libflame  revision_anchor
Functions
FLA_LU_piv_opt_var5.c File Reference

(r)

Functions

FLA_Error FLA_LU_piv_opt_var5 (FLA_Obj A, FLA_Obj p)
 
FLA_Error FLA_LU_piv_ops_var5 (int m_A, int n_A, float *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
 
FLA_Error FLA_LU_piv_opd_var5 (int m_A, int n_A, double *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
 
FLA_Error FLA_LU_piv_opc_var5 (int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
 
FLA_Error FLA_LU_piv_opz_var5 (int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
 

Function Documentation

◆ FLA_LU_piv_opc_var5()

FLA_Error FLA_LU_piv_opc_var5 ( int  m_A,
int  n_A,
scomplex buff_A,
int  rs_A,
int  cs_A,
int *  buff_p,
int  inc_p 
)
243 {
244  FLA_Error r_val = FLA_SUCCESS;
245  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
246  int min_m_n = min( m_A, n_A );
247  int i;
248 
249  for ( i = 0; i < min_m_n; ++i )
250  {
251  scomplex pivot_val = czero;
252  scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
253  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
254  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
255  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
256  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
257 
258  int* pi1 = buff_p + i*inc_p;
259 
260  int m_ahead = m_A - i - 1;
261  int n_ahead = n_A - i - 1;
262 
263  /*------------------------------------------------------------*/
264 
265  // FLA_Merge_2x1( alpha11,
266  // a21, &aB1 );
267 
268  // FLA_Amax_external( aB1, pi1 );
269  bl1_camax( m_ahead + 1,
270  alpha11, rs_A,
271  pi1 );
272 
273  // If a null pivot is encountered, return the index.
274  pivot_val = *(alpha11 + *pi1);
275  if ( pivot_val.real == czero.real &&
276  pivot_val.imag == czero.imag ) r_val = ( r_val == FLA_SUCCESS ? i : r_val );
277  else
278  {
279  // FLA_Merge_1x2( ABL, ABR, &AB );
280 
281  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB );
283  a10t, rs_A, cs_A,
284  0,
285  0,
286  pi1, inc_p );
287 
288  // FLA_Inv_scal_external( alpha11, a21 );
290  m_ahead,
291  alpha11,
292  a21, rs_A );
293  }
294 
295  // FLA_Ger_external( FLA_MINUS_ONE, a21, a12t, A22 );
298  m_ahead,
299  n_ahead,
300  buff_m1,
301  a21, rs_A,
302  a12t, cs_A,
303  A22, rs_A, cs_A );
304 
305  /*------------------------------------------------------------*/
306 
307  }
308 
309  return r_val;
310 }
FLA_Error FLA_Apply_pivots_ln_opc_var1(int n, scomplex *a, int a_rs, int a_cs, int k1, int k2, int *p, int incp)
Definition: FLA_Apply_pivots_ln_opt_var1.c:356
const scomplex czero
Definition: FLA_Init.c:38
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
int FLA_Error
Definition: FLA_type_defs.h:47
void bl1_camax(int n, scomplex *x, int incx, int *index)
Definition: bl1_amax.c:35
int i
Definition: bl1_axmyv2.c:145
void bl1_cger(conj1_t conjx, conj1_t conjy, int m, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *a, int a_rs, int a_cs)
Definition: bl1_ger.c:111
void bl1_cinvscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition: bl1_invscalv.c:52
@ BLIS1_NO_CONJUGATE
Definition: blis_type_defs.h:81
Definition: blis_type_defs.h:133
float imag
Definition: blis_type_defs.h:134
float real
Definition: blis_type_defs.h:134

References bl1_camax(), bl1_cger(), bl1_cinvscalv(), BLIS1_NO_CONJUGATE, czero, FLA_Apply_pivots_ln_opc_var1(), FLA_MINUS_ONE, i, scomplex::imag, and scomplex::real.

Referenced by FLA_LU_piv_opt_var5().

◆ FLA_LU_piv_opd_var5()

FLA_Error FLA_LU_piv_opd_var5 ( int  m_A,
int  n_A,
double *  buff_A,
int  rs_A,
int  cs_A,
int *  buff_p,
int  inc_p 
)
169 {
170  FLA_Error r_val = FLA_SUCCESS;
171  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
172  int min_m_n = min( m_A, n_A );
173  int i;
174 
175  for ( i = 0; i < min_m_n; ++i )
176  {
177  double pivot_val = dzero;
178  double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
179  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
180  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
181  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
182  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
183 
184  int* pi1 = buff_p + i*inc_p;
185 
186  int m_ahead = m_A - i - 1;
187  int n_ahead = n_A - i - 1;
188 
189  /*------------------------------------------------------------*/
190 
191  // FLA_Merge_2x1( alpha11,
192  // a21, &aB1 );
193 
194  // FLA_Amax_external( aB1, pi1 );
195  bl1_damax( m_ahead + 1,
196  alpha11, rs_A,
197  pi1 );
198 
199  // If a null pivot is encountered, return the index.
200  pivot_val = *(alpha11 + *pi1);
201  if ( pivot_val == dzero ) r_val = ( r_val == FLA_SUCCESS ? i : r_val );
202  else
203  {
204  // FLA_Merge_1x2( ABL, ABR, &AB );
205 
206  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB );
208  a10t, rs_A, cs_A,
209  0,
210  0,
211  pi1, inc_p );
212 
213  // FLA_Inv_scal_external( alpha11, a21 );
215  m_ahead,
216  alpha11,
217  a21, rs_A );
218  }
219 
220  // FLA_Ger_external( FLA_MINUS_ONE, a21, a12t, A22 );
223  m_ahead,
224  n_ahead,
225  buff_m1,
226  a21, rs_A,
227  a12t, cs_A,
228  A22, rs_A, cs_A );
229 
230  /*------------------------------------------------------------*/
231 
232  }
233 
234  return r_val;
235 }
FLA_Error FLA_Apply_pivots_ln_opd_var1(int n, double *a, int a_rs, int a_cs, int k1, int k2, int *p, int incp)
Definition: FLA_Apply_pivots_ln_opt_var1.c:274
const double dzero
Definition: FLA_Init.c:37
void bl1_damax(int n, double *x, int incx, int *index)
Definition: bl1_amax.c:24
void bl1_dger(conj1_t conjx, conj1_t conjy, int m, int n, double *alpha, double *x, int incx, double *y, int incy, double *a, int a_rs, int a_cs)
Definition: bl1_ger.c:62
void bl1_dinvscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition: bl1_invscalv.c:26

References bl1_damax(), bl1_dger(), bl1_dinvscalv(), BLIS1_NO_CONJUGATE, dzero, FLA_Apply_pivots_ln_opd_var1(), FLA_MINUS_ONE, and i.

Referenced by FLA_LU_piv_opt_var5().

◆ FLA_LU_piv_ops_var5()

FLA_Error FLA_LU_piv_ops_var5 ( int  m_A,
int  n_A,
float *  buff_A,
int  rs_A,
int  cs_A,
int *  buff_p,
int  inc_p 
)
95 {
96  FLA_Error r_val = FLA_SUCCESS;
97  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
98  int min_m_n = min( m_A, n_A );
99  int i;
100 
101  for ( i = 0; i < min_m_n; ++i )
102  {
103  float pivot_val = fzero;
104  float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
105  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
106  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
107  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
108  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
109 
110  int* pi1 = buff_p + i*inc_p;
111 
112  int m_ahead = m_A - i - 1;
113  int n_ahead = n_A - i - 1;
114 
115  /*------------------------------------------------------------*/
116 
117  // FLA_Merge_2x1( alpha11,
118  // a21, &aB1 );
119 
120  // FLA_Amax_external( aB1, pi1 );
121  bl1_samax( m_ahead + 1,
122  alpha11, rs_A,
123  pi1 );
124 
125  // If a null pivot is encountered, return the index.
126  pivot_val = *(alpha11 + *pi1);
127  if ( pivot_val == fzero ) r_val = ( r_val == FLA_SUCCESS ? i : r_val );
128  else
129  {
130  // FLA_Merge_1x2( ABL, ABR, &AB );
131 
132  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB );
134  a10t, rs_A, cs_A,
135  0,
136  0,
137  pi1, inc_p );
138 
139  // FLA_Inv_scal_external( alpha11, a21 );
141  m_ahead,
142  alpha11,
143  a21, rs_A );
144  }
145 
146  // FLA_Ger_external( FLA_MINUS_ONE, a21, a12t, A22 );
149  m_ahead,
150  n_ahead,
151  buff_m1,
152  a21, rs_A,
153  a12t, cs_A,
154  A22, rs_A, cs_A );
155 
156  /*------------------------------------------------------------*/
157 
158  }
159 
160  return r_val;
161 }
FLA_Error FLA_Apply_pivots_ln_ops_var1(int n, float *a, int a_rs, int a_cs, int k1, int k2, int *p, int incp)
Definition: FLA_Apply_pivots_ln_opt_var1.c:192
const float fzero
Definition: FLA_Init.c:36
void bl1_samax(int n, float *x, int incx, int *index)
Definition: bl1_amax.c:13
void bl1_sger(conj1_t conjx, conj1_t conjy, int m, int n, float *alpha, float *x, int incx, float *y, int incy, float *a, int a_rs, int a_cs)
Definition: bl1_ger.c:13
void bl1_sinvscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition: bl1_invscalv.c:13

References bl1_samax(), bl1_sger(), bl1_sinvscalv(), BLIS1_NO_CONJUGATE, FLA_Apply_pivots_ln_ops_var1(), FLA_MINUS_ONE, fzero, and i.

Referenced by FLA_LU_piv_opt_var5().

◆ FLA_LU_piv_opt_var5()

FLA_Error FLA_LU_piv_opt_var5 ( FLA_Obj  A,
FLA_Obj  p 
)
14 {
15  FLA_Error r_val = FLA_SUCCESS;
16  FLA_Datatype datatype;
17  int m_A, n_A;
18  int rs_A, cs_A;
19  int inc_p;
20 
21  datatype = FLA_Obj_datatype( A );
22 
23  m_A = FLA_Obj_length( A );
24  n_A = FLA_Obj_width( A );
25  rs_A = FLA_Obj_row_stride( A );
26  cs_A = FLA_Obj_col_stride( A );
27 
28  inc_p = FLA_Obj_vector_inc( p );
29 
30 
31  switch ( datatype )
32  {
33  case FLA_FLOAT:
34  {
35  float* buff_A = FLA_FLOAT_PTR( A );
36  int* buff_p = FLA_INT_PTR( p );
37 
38  r_val = FLA_LU_piv_ops_var5( m_A,
39  n_A,
40  buff_A, rs_A, cs_A,
41  buff_p, inc_p );
42 
43  break;
44  }
45 
46  case FLA_DOUBLE:
47  {
48  double* buff_A = FLA_DOUBLE_PTR( A );
49  int* buff_p = FLA_INT_PTR( p );
50 
51  r_val = FLA_LU_piv_opd_var5( m_A,
52  n_A,
53  buff_A, rs_A, cs_A,
54  buff_p, inc_p );
55 
56  break;
57  }
58 
59  case FLA_COMPLEX:
60  {
61  scomplex* buff_A = FLA_COMPLEX_PTR( A );
62  int* buff_p = FLA_INT_PTR( p );
63 
64  r_val = FLA_LU_piv_opc_var5( m_A,
65  n_A,
66  buff_A, rs_A, cs_A,
67  buff_p, inc_p );
68 
69  break;
70  }
71 
72  case FLA_DOUBLE_COMPLEX:
73  {
74  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
75  int* buff_p = FLA_INT_PTR( p );
76 
77  r_val = FLA_LU_piv_opz_var5( m_A,
78  n_A,
79  buff_A, rs_A, cs_A,
80  buff_p, inc_p );
81 
82  break;
83  }
84  }
85 
86  return r_val;
87 }
FLA_Error FLA_LU_piv_opz_var5(int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
Definition: FLA_LU_piv_opt_var5.c:314
FLA_Error FLA_LU_piv_opc_var5(int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
Definition: FLA_LU_piv_opt_var5.c:239
FLA_Error FLA_LU_piv_ops_var5(int m_A, int n_A, float *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
Definition: FLA_LU_piv_opt_var5.c:91
FLA_Error FLA_LU_piv_opd_var5(int m_A, int n_A, double *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
Definition: FLA_LU_piv_opt_var5.c:165
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition: FLA_Query.c:145
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
int FLA_Datatype
Definition: FLA_type_defs.h:49
Definition: blis_type_defs.h:138

References FLA_LU_piv_opc_var5(), FLA_LU_piv_opd_var5(), FLA_LU_piv_ops_var5(), FLA_LU_piv_opz_var5(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_Obj_width().

Referenced by FLA_LU_piv_internal().

◆ FLA_LU_piv_opz_var5()

FLA_Error FLA_LU_piv_opz_var5 ( int  m_A,
int  n_A,
dcomplex buff_A,
int  rs_A,
int  cs_A,
int *  buff_p,
int  inc_p 
)
318 {
319  FLA_Error r_val = FLA_SUCCESS;
320  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
321  int min_m_n = min( m_A, n_A );
322  int i;
323 
324  for ( i = 0; i < min_m_n; ++i )
325  {
326  dcomplex pivot_val = zzero;
327  dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
328  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
329  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
330  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
331  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
332 
333  int* pi1 = buff_p + i*inc_p;
334 
335  int m_ahead = m_A - i - 1;
336  int n_ahead = n_A - i - 1;
337 
338  /*------------------------------------------------------------*/
339 
340  // FLA_Merge_2x1( alpha11,
341  // a21, &aB1 );
342 
343  // FLA_Amax_external( aB1, pi1 );
344  bl1_zamax( m_ahead + 1,
345  alpha11, rs_A,
346  pi1 );
347 
348  // If a null pivot is encountered, return the index.
349  pivot_val = *(alpha11 + *pi1);
350  if ( pivot_val.real == zzero.real &&
351  pivot_val.imag == zzero.imag ) r_val = ( r_val == FLA_SUCCESS ? i : r_val );
352  else
353  {
354  // FLA_Merge_1x2( ABL, ABR, &AB );
355 
356  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB );
358  a10t, rs_A, cs_A,
359  0,
360  0,
361  pi1, inc_p );
362 
363  // FLA_Inv_scal_external( alpha11, a21 );
365  m_ahead,
366  alpha11,
367  a21, rs_A );
368  }
369  // FLA_Ger_external( FLA_MINUS_ONE, a21, a12t, A22 );
372  m_ahead,
373  n_ahead,
374  buff_m1,
375  a21, rs_A,
376  a12t, cs_A,
377  A22, rs_A, cs_A );
378 
379  /*------------------------------------------------------------*/
380 
381  }
382 
383  return r_val;
384 }
FLA_Error FLA_Apply_pivots_ln_opz_var1(int n, dcomplex *a, int a_rs, int a_cs, int k1, int k2, int *p, int incp)
Definition: FLA_Apply_pivots_ln_opt_var1.c:438
const dcomplex zzero
Definition: FLA_Init.c:39
void bl1_zamax(int n, dcomplex *x, int incx, int *index)
Definition: bl1_amax.c:46
void bl1_zger(conj1_t conjx, conj1_t conjy, int m, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_ger.c:194
void bl1_zinvscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition: bl1_invscalv.c:78
double real
Definition: blis_type_defs.h:139
double imag
Definition: blis_type_defs.h:139

References bl1_zamax(), bl1_zger(), bl1_zinvscalv(), BLIS1_NO_CONJUGATE, FLA_Apply_pivots_ln_opz_var1(), FLA_MINUS_ONE, i, dcomplex::imag, dcomplex::real, and zzero.

Referenced by FLA_LU_piv_opt_var5().