libflame  revision_anchor
Functions
FLA_Apply_pivots_ln_opt_var1.c File Reference

(r)

Functions

FLA_Error FLA_Apply_pivots_ln_opt_var1 (FLA_Obj p, FLA_Obj A)
 
FLA_Error FLA_Apply_pivots_ln_opi_var1 (int n, int *a, int a_rs, int a_cs, int k1, int k2, int *p, int incp)
 
FLA_Error FLA_Apply_pivots_ln_ops_var1 (int n, float *a, int a_rs, int a_cs, int k1, int k2, int *p, int incp)
 
FLA_Error FLA_Apply_pivots_ln_opd_var1 (int n, double *a, int a_rs, int a_cs, int k1, int k2, int *p, int incp)
 
FLA_Error FLA_Apply_pivots_ln_opc_var1 (int n, scomplex *a, int a_rs, int a_cs, int k1, int k2, int *p, int incp)
 
FLA_Error FLA_Apply_pivots_ln_opz_var1 (int n, dcomplex *a, int a_rs, int a_cs, int k1, int k2, int *p, int incp)
 

Function Documentation

◆ FLA_Apply_pivots_ln_opc_var1()

FLA_Error FLA_Apply_pivots_ln_opc_var1 ( int  n,
scomplex a,
int  a_rs,
int  a_cs,
int  k1,
int  k2,
int *  p,
int  incp 
)
361 {
362  scomplex temp;
363  scomplex* a_i_0;
364  scomplex* a_pi_0;
365  scomplex* a_0_j;
366  scomplex* a_i_j;
367  scomplex* a_pi_j;
368  int i, j;
369  int i_begin, i_bound, i_inc;
370  int p_inc;
371 
372  // Handle both positive and negative increments for the pivot vector.
373  if ( incp > 0 )
374  {
375  i_begin = k1;
376  i_bound = k2 + 1;
377  i_inc = 1;
378  p_inc = 1*incp;
379  }
380  else // if ( incp < 0 )
381  {
382  i_begin = k2;
383  i_bound = k1 - 1;
384  i_inc = -1;
385  p_inc = -1*incp;
386  }
387 
388  // Optimize memory accesses depending on whether A is stored in
389  // column-major or row-major order. That is, for column-major
390  // matrices, we interchange all the elements in a single column
391  // at a time. But for row-major matrices, we perform an entire
392  // row interchange before moving to the next interchange. For
393  // general storage, we decide based on which stride is closer
394  // to one.
395  if ( a_rs == 1 || a_rs < a_cs )
396  {
397  for ( j = 0; j < n; j++ )
398  {
399  a_0_j = a + j*a_cs;
400 
401  for ( i = i_begin; i != i_bound; i += i_inc )
402  {
403  a_i_j = a_0_j + ( i )*a_rs;
404  // Add i to shift from relative to absolute index.
405  a_pi_j = a_0_j + ( p[i*p_inc] + i )*a_rs;
406 
407  temp = *a_pi_j;
408  *a_pi_j = *a_i_j;
409  *a_i_j = temp;
410  }
411  }
412  }
413  else // if ( a_cs == 1 || a_cs < a_rs )
414  {
415  for ( i = i_begin; i != i_bound; i += i_inc )
416  {
417  a_i_0 = a + ( i )*a_rs;
418  // Add i to shift from relative to absolute index.
419  a_pi_0 = a + ( p[i*p_inc] + i )*a_rs;
420 
421  for ( j = 0; j < n; j++ )
422  {
423  a_i_j = a_i_0 + j*a_cs;
424  a_pi_j = a_pi_0 + j*a_cs;
425 
426  temp = *a_pi_j;
427  *a_pi_j = *a_i_j;
428  *a_i_j = temp;
429  }
430  }
431  }
432 
433  return FLA_SUCCESS;
434 }
int i
Definition: bl1_axmyv2.c:145
dcomplex temp
Definition: bl1_axpyv2b.c:301
Definition: blis_type_defs.h:133

References i, and temp.

Referenced by FLA_Apply_pivots_ln_opt_var1(), FLA_Apply_pivots_lt_opt_var1(), FLA_Apply_pivots_rn_opt_var1(), FLA_Apply_pivots_rt_opt_var1(), FLA_LU_piv_opc_var3(), FLA_LU_piv_opc_var4(), and FLA_LU_piv_opc_var5().

◆ FLA_Apply_pivots_ln_opd_var1()

FLA_Error FLA_Apply_pivots_ln_opd_var1 ( int  n,
double *  a,
int  a_rs,
int  a_cs,
int  k1,
int  k2,
int *  p,
int  incp 
)
279 {
280  double temp;
281  double* a_i_0;
282  double* a_pi_0;
283  double* a_0_j;
284  double* a_i_j;
285  double* a_pi_j;
286  int i, j;
287  int i_begin, i_bound, i_inc;
288  int p_inc;
289 
290  // Handle both positive and negative increments for the pivot vector.
291  if ( incp > 0 )
292  {
293  i_begin = k1;
294  i_bound = k2 + 1;
295  i_inc = 1;
296  p_inc = 1*incp;
297  }
298  else // if ( incp < 0 )
299  {
300  i_begin = k2;
301  i_bound = k1 - 1;
302  i_inc = -1;
303  p_inc = -1*incp;
304  }
305 
306  // Optimize memory accesses depending on whether A is stored in
307  // column-major or row-major order. That is, for column-major
308  // matrices, we interchange all the elements in a single column
309  // at a time. But for row-major matrices, we perform an entire
310  // row interchange before moving to the next interchange. For
311  // general storage, we decide based on which stride is closer
312  // to one.
313  if ( a_rs == 1 || a_rs < a_cs )
314  {
315  for ( j = 0; j < n; j++ )
316  {
317  a_0_j = a + j*a_cs;
318 
319  for ( i = i_begin; i != i_bound; i += i_inc )
320  {
321  a_i_j = a_0_j + ( i )*a_rs;
322  // Add i to shift from relative to absolute index.
323  a_pi_j = a_0_j + ( p[i*p_inc] + i )*a_rs;
324 
325  temp = *a_pi_j;
326  *a_pi_j = *a_i_j;
327  *a_i_j = temp;
328  }
329  }
330  }
331  else // if ( a_cs == 1 || a_cs < a_rs )
332  {
333  for ( i = i_begin; i != i_bound; i += i_inc )
334  {
335  a_i_0 = a + ( i )*a_rs;
336  // Add i to shift from relative to absolute index.
337  a_pi_0 = a + ( p[i*p_inc] + i )*a_rs;
338 
339  for ( j = 0; j < n; j++ )
340  {
341  a_i_j = a_i_0 + j*a_cs;
342  a_pi_j = a_pi_0 + j*a_cs;
343 
344  temp = *a_pi_j;
345  *a_pi_j = *a_i_j;
346  *a_i_j = temp;
347  }
348  }
349  }
350 
351  return FLA_SUCCESS;
352 }

References i, and temp.

Referenced by FLA_Apply_pivots_ln_opt_var1(), FLA_Apply_pivots_lt_opt_var1(), FLA_Apply_pivots_rn_opt_var1(), FLA_Apply_pivots_rt_opt_var1(), FLA_LU_piv_opd_var3(), FLA_LU_piv_opd_var4(), and FLA_LU_piv_opd_var5().

◆ FLA_Apply_pivots_ln_opi_var1()

FLA_Error FLA_Apply_pivots_ln_opi_var1 ( int  n,
int *  a,
int  a_rs,
int  a_cs,
int  k1,
int  k2,
int *  p,
int  incp 
)
115 {
116  int temp;
117  int* a_i_0;
118  int* a_pi_0;
119  int* a_0_j;
120  int* a_i_j;
121  int* a_pi_j;
122  int i, j;
123  int i_begin, i_bound, i_inc;
124  int p_inc;
125 
126  // Handle both positive and negative increments for the pivot vector.
127  if ( incp > 0 )
128  {
129  i_begin = k1;
130  i_bound = k2 + 1;
131  i_inc = 1;
132  p_inc = 1*incp;
133  }
134  else // if ( incp < 0 )
135  {
136  i_begin = k2;
137  i_bound = k1 - 1;
138  i_inc = -1;
139  p_inc = -1*incp;
140  }
141 
142  // Optimize memory accesses depending on whether A is stored in
143  // column-major or row-major order. That is, for column-major
144  // matrices, we interchange all the elements in a single column
145  // at a time. But for row-major matrices, we perform an entire
146  // row interchange before moving to the next interchange. For
147  // general storage, we decide based on which stride is closer
148  // to one.
149  if ( a_rs == 1 || a_rs < a_cs )
150  {
151  for ( j = 0; j < n; j++ )
152  {
153  a_0_j = a + j*a_cs;
154 
155  for ( i = i_begin; i != i_bound; i += i_inc )
156  {
157  a_i_j = a_0_j + ( i )*a_rs;
158  // Add i to shift from relative to absolute index.
159  a_pi_j = a_0_j + ( p[i*p_inc] + i )*a_rs;
160 
161  temp = *a_pi_j;
162  *a_pi_j = *a_i_j;
163  *a_i_j = temp;
164  }
165  }
166  }
167  else // if ( a_cs == 1 || a_cs < a_rs )
168  {
169  for ( i = i_begin; i != i_bound; i += i_inc )
170  {
171  a_i_0 = a + ( i )*a_rs;
172  // Add i to shift from relative to absolute index.
173  a_pi_0 = a + ( p[i*p_inc] + i )*a_rs;
174 
175  for ( j = 0; j < n; j++ )
176  {
177  a_i_j = a_i_0 + j*a_cs;
178  a_pi_j = a_pi_0 + j*a_cs;
179 
180  temp = *a_pi_j;
181  *a_pi_j = *a_i_j;
182  *a_i_j = temp;
183  }
184  }
185  }
186 
187  return FLA_SUCCESS;
188 }

References i, and temp.

Referenced by FLA_Apply_pivots_ln_opt_var1(), FLA_Apply_pivots_lt_opt_var1(), FLA_Apply_pivots_rn_opt_var1(), and FLA_Apply_pivots_rt_opt_var1().

◆ FLA_Apply_pivots_ln_ops_var1()

FLA_Error FLA_Apply_pivots_ln_ops_var1 ( int  n,
float *  a,
int  a_rs,
int  a_cs,
int  k1,
int  k2,
int *  p,
int  incp 
)
197 {
198  float temp;
199  float* a_i_0;
200  float* a_pi_0;
201  float* a_0_j;
202  float* a_i_j;
203  float* a_pi_j;
204  int i, j;
205  int i_begin, i_bound, i_inc;
206  int p_inc;
207 
208  // Handle both positive and negative increments for the pivot vector.
209  if ( incp > 0 )
210  {
211  i_begin = k1;
212  i_bound = k2 + 1;
213  i_inc = 1;
214  p_inc = 1*incp;
215  }
216  else // if ( incp < 0 )
217  {
218  i_begin = k2;
219  i_bound = k1 - 1;
220  i_inc = -1;
221  p_inc = -1*incp;
222  }
223 
224  // Optimize memory accesses depending on whether A is stored in
225  // column-major or row-major order. That is, for column-major
226  // matrices, we interchange all the elements in a single column
227  // at a time. But for row-major matrices, we perform an entire
228  // row interchange before moving to the next interchange. For
229  // general storage, we decide based on which stride is closer
230  // to one.
231  if ( a_rs == 1 || a_rs < a_cs )
232  {
233  for ( j = 0; j < n; j++ )
234  {
235  a_0_j = a + j*a_cs;
236 
237  for ( i = i_begin; i != i_bound; i += i_inc )
238  {
239  a_i_j = a_0_j + ( i )*a_rs;
240  // Add i to shift from relative to absolute index.
241  a_pi_j = a_0_j + ( p[i*p_inc] + i )*a_rs;
242 
243  temp = *a_pi_j;
244  *a_pi_j = *a_i_j;
245  *a_i_j = temp;
246  }
247  }
248  }
249  else // if ( a_cs == 1 || a_cs < a_rs )
250  {
251  for ( i = i_begin; i != i_bound; i += i_inc )
252  {
253  a_i_0 = a + ( i )*a_rs;
254  // Add i to shift from relative to absolute index.
255  a_pi_0 = a + ( p[i*p_inc] + i )*a_rs;
256 
257  for ( j = 0; j < n; j++ )
258  {
259  a_i_j = a_i_0 + j*a_cs;
260  a_pi_j = a_pi_0 + j*a_cs;
261 
262  temp = *a_pi_j;
263  *a_pi_j = *a_i_j;
264  *a_i_j = temp;
265  }
266  }
267  }
268 
269  return FLA_SUCCESS;
270 }

References i, and temp.

Referenced by FLA_Apply_pivots_ln_opt_var1(), FLA_Apply_pivots_lt_opt_var1(), FLA_Apply_pivots_rn_opt_var1(), FLA_Apply_pivots_rt_opt_var1(), FLA_LU_piv_ops_var3(), FLA_LU_piv_ops_var4(), and FLA_LU_piv_ops_var5().

◆ FLA_Apply_pivots_ln_opt_var1()

FLA_Error FLA_Apply_pivots_ln_opt_var1 ( FLA_Obj  p,
FLA_Obj  A 
)
14 {
15  FLA_Datatype datatype;
16  int n_A;
17  int rs_A, cs_A;
18  int inc_p;
19  int k1_0, k2_0;
20 
21  datatype = FLA_Obj_datatype( A );
22 
23  n_A = FLA_Obj_width( A );
24 
25  rs_A = FLA_Obj_row_stride( A );
26  cs_A = FLA_Obj_col_stride( A );
27 
28  inc_p = FLA_Obj_vector_inc( p );
29 
30  // Use zero-based indices.
31  k1_0 = 0;
32  k2_0 = ( int ) FLA_Obj_vector_dim( p ) - 1;
33 
34  switch ( datatype )
35  {
36  case FLA_INT:
37  {
38  int* buff_A = FLA_INT_PTR( A );
39  int* buff_p = FLA_INT_PTR( p );
40 
42  buff_A, rs_A, cs_A,
43  k1_0,
44  k2_0,
45  buff_p, inc_p );
46 
47  break;
48  }
49 
50  case FLA_FLOAT:
51  {
52  float* buff_A = FLA_FLOAT_PTR( A );
53  int* buff_p = FLA_INT_PTR( p );
54 
56  buff_A, rs_A, cs_A,
57  k1_0,
58  k2_0,
59  buff_p, inc_p );
60 
61  break;
62  }
63 
64  case FLA_DOUBLE:
65  {
66  double* buff_A = FLA_DOUBLE_PTR( A );
67  int* buff_p = FLA_INT_PTR( p );
68 
70  buff_A, rs_A, cs_A,
71  k1_0,
72  k2_0,
73  buff_p, inc_p );
74 
75  break;
76  }
77 
78  case FLA_COMPLEX:
79  {
80  scomplex* buff_A = FLA_COMPLEX_PTR( A );
81  int* buff_p = FLA_INT_PTR( p );
82 
84  buff_A, rs_A, cs_A,
85  k1_0,
86  k2_0,
87  buff_p, inc_p );
88 
89  break;
90  }
91 
92  case FLA_DOUBLE_COMPLEX:
93  {
94  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
95  int* buff_p = FLA_INT_PTR( p );
96 
98  buff_A, rs_A, cs_A,
99  k1_0,
100  k2_0,
101  buff_p, inc_p );
102 
103  break;
104  }
105  }
106 
107  return FLA_SUCCESS;
108 }
FLA_Error FLA_Apply_pivots_ln_opd_var1(int n, double *a, int a_rs, int a_cs, int k1, int k2, int *p, int incp)
Definition: FLA_Apply_pivots_ln_opt_var1.c:274
FLA_Error FLA_Apply_pivots_ln_opi_var1(int n, int *a, int a_rs, int a_cs, int k1, int k2, int *p, int incp)
Definition: FLA_Apply_pivots_ln_opt_var1.c:110
FLA_Error FLA_Apply_pivots_ln_opz_var1(int n, dcomplex *a, int a_rs, int a_cs, int k1, int k2, int *p, int incp)
Definition: FLA_Apply_pivots_ln_opt_var1.c:438
FLA_Error FLA_Apply_pivots_ln_opc_var1(int n, scomplex *a, int a_rs, int a_cs, int k1, int k2, int *p, int incp)
Definition: FLA_Apply_pivots_ln_opt_var1.c:356
FLA_Error FLA_Apply_pivots_ln_ops_var1(int n, float *a, int a_rs, int a_cs, int k1, int k2, int *p, int incp)
Definition: FLA_Apply_pivots_ln_opt_var1.c:192
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition: FLA_Query.c:145
dim_t FLA_Obj_vector_dim(FLA_Obj obj)
Definition: FLA_Query.c:137
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
int FLA_Datatype
Definition: FLA_type_defs.h:49
Definition: blis_type_defs.h:138

References FLA_Apply_pivots_ln_opc_var1(), FLA_Apply_pivots_ln_opd_var1(), FLA_Apply_pivots_ln_opi_var1(), FLA_Apply_pivots_ln_ops_var1(), FLA_Apply_pivots_ln_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_row_stride(), FLA_Obj_vector_dim(), FLA_Obj_vector_inc(), and FLA_Obj_width().

Referenced by FLA_Apply_pivots_ln().

◆ FLA_Apply_pivots_ln_opz_var1()

FLA_Error FLA_Apply_pivots_ln_opz_var1 ( int  n,
dcomplex a,
int  a_rs,
int  a_cs,
int  k1,
int  k2,
int *  p,
int  incp 
)
443 {
444  dcomplex temp;
445  dcomplex* a_i_0;
446  dcomplex* a_pi_0;
447  dcomplex* a_0_j;
448  dcomplex* a_i_j;
449  dcomplex* a_pi_j;
450  int i, j;
451  int i_begin, i_bound, i_inc;
452  int p_inc;
453 
454  // Handle both positive and negative increments for the pivot vector.
455  if ( incp > 0 )
456  {
457  i_begin = k1;
458  i_bound = k2 + 1;
459  i_inc = 1;
460  p_inc = 1*incp;
461  }
462  else // if ( incp < 0 )
463  {
464  i_begin = k2;
465  i_bound = k1 - 1;
466  i_inc = -1;
467  p_inc = -1*incp;
468  }
469 
470  // Optimize memory accesses depending on whether A is stored in
471  // column-major or row-major order. That is, for column-major
472  // matrices, we interchange all the elements in a single column
473  // at a time. But for row-major matrices, we perform an entire
474  // row interchange before moving to the next interchange. For
475  // general storage, we decide based on which stride is closer
476  // to one.
477  if ( a_rs == 1 || a_rs < a_cs )
478  {
479  for ( j = 0; j < n; j++ )
480  {
481  a_0_j = a + j*a_cs;
482 
483  for ( i = i_begin; i != i_bound; i += i_inc )
484  {
485  a_i_j = a_0_j + ( i )*a_rs;
486  // Add i to shift from relative to absolute index.
487  a_pi_j = a_0_j + ( p[i*p_inc] + i )*a_rs;
488 
489  temp = *a_pi_j;
490  *a_pi_j = *a_i_j;
491  *a_i_j = temp;
492  }
493  }
494  }
495  else // if ( a_cs == 1 || a_cs < a_rs )
496  {
497  for ( i = i_begin; i != i_bound; i += i_inc )
498  {
499  a_i_0 = a + ( i )*a_rs;
500  // Add i to shift from relative to absolute index.
501  a_pi_0 = a + ( p[i*p_inc] + i )*a_rs;
502 
503  for ( j = 0; j < n; j++ )
504  {
505  a_i_j = a_i_0 + j*a_cs;
506  a_pi_j = a_pi_0 + j*a_cs;
507 
508  temp = *a_pi_j;
509  *a_pi_j = *a_i_j;
510  *a_i_j = temp;
511  }
512  }
513  }
514 
515  return FLA_SUCCESS;
516 }

References i, and temp.

Referenced by FLA_Apply_pivots_ln_opt_var1(), FLA_Apply_pivots_lt_opt_var1(), FLA_Apply_pivots_rn_opt_var1(), FLA_Apply_pivots_rt_opt_var1(), FLA_LU_piv_opz_var3(), FLA_LU_piv_opz_var4(), and FLA_LU_piv_opz_var5().