libflame  revision_anchor
Functions
FLA_LU_piv_vars.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLA_LU_piv_blk_var3 (FLA_Obj A, FLA_Obj p, fla_lu_t *cntl)
 
FLA_Error FLA_LU_piv_blk_var4 (FLA_Obj A, FLA_Obj p, fla_lu_t *cntl)
 
FLA_Error FLA_LU_piv_blk_var5 (FLA_Obj A, FLA_Obj p, fla_lu_t *cntl)
 
FLA_Error FLA_LU_piv_unb_var3 (FLA_Obj A, FLA_Obj p)
 
FLA_Error FLA_LU_piv_unb_var3b (FLA_Obj A, FLA_Obj p)
 
FLA_Error FLA_LU_piv_unb_var4 (FLA_Obj A, FLA_Obj p)
 
FLA_Error FLA_LU_piv_unb_var5 (FLA_Obj A, FLA_Obj p)
 
FLA_Error FLA_LU_piv_opt_var3 (FLA_Obj A, FLA_Obj p)
 
FLA_Error FLA_LU_piv_ops_var3 (int m_A, int n_A, float *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
 
FLA_Error FLA_LU_piv_opd_var3 (int m_A, int n_A, double *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
 
FLA_Error FLA_LU_piv_opc_var3 (int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
 
FLA_Error FLA_LU_piv_opz_var3 (int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
 
FLA_Error FLA_LU_piv_opt_var4 (FLA_Obj A, FLA_Obj p)
 
FLA_Error FLA_LU_piv_ops_var4 (int m_A, int n_A, float *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
 
FLA_Error FLA_LU_piv_opd_var4 (int m_A, int n_A, double *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
 
FLA_Error FLA_LU_piv_opc_var4 (int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
 
FLA_Error FLA_LU_piv_opz_var4 (int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
 
FLA_Error FLA_LU_piv_opt_var5 (FLA_Obj A, FLA_Obj p)
 
FLA_Error FLA_LU_piv_ops_var5 (int m_A, int n_A, float *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
 
FLA_Error FLA_LU_piv_opd_var5 (int m_A, int n_A, double *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
 
FLA_Error FLA_LU_piv_opc_var5 (int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
 
FLA_Error FLA_LU_piv_opz_var5 (int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
 

Function Documentation

◆ FLA_LU_piv_blk_var3()

FLA_Error FLA_LU_piv_blk_var3 ( FLA_Obj  A,
FLA_Obj  p,
fla_lu_t cntl 
)
16 {
17  FLA_Error r_val = FLA_SUCCESS, r_val_sub = FLA_SUCCESS;
18  FLA_Obj ATL, ATR, A00, A01, A02,
19  ABL, ABR, A10, A11, A12,
20  A20, A21, A22;
21 
22  FLA_Obj AL, AR, A0, A1, A2;
23 
24  FLA_Obj pT, p0,
25  pB, p1,
26  p2;
27 
28  FLA_Obj AB0, AB1;
29 
30  dim_t b;
31 
32 
33  FLA_Part_2x2( A, &ATL, &ATR,
34  &ABL, &ABR, 0, 0, FLA_TL );
35 
36  FLA_Part_1x2( A, &AL, &AR, 0, FLA_LEFT );
37 
38  FLA_Part_2x1( p, &pT,
39  &pB, 0, FLA_TOP );
40 
41  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) &&
42  FLA_Obj_width( ATL ) < FLA_Obj_width( A )){
43 
44  b = FLA_Determine_blocksize( ABR, FLA_BR, FLA_Cntl_blocksize( cntl ) );
45 
46  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
47  /* ************* */ /* ******************** */
48  &A10, /**/ &A11, &A12,
49  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
50  b, b, FLA_BR );
51 
52  FLA_Repart_1x2_to_1x3( AL, /**/ AR, &A0, /**/ &A1, &A2,
53  b, FLA_RIGHT );
54 
55  FLA_Repart_2x1_to_3x1( pT, &p0,
56  /* ** */ /* ** */
57  &p1,
58  pB, &p2, b, FLA_BOTTOM );
59 
60  /*------------------------------------------------------------*/
61 
62  // Apply previously computed pivots
63  FLA_Apply_pivots_internal( FLA_LEFT, FLA_NO_TRANSPOSE, p0, A1,
64  FLA_Cntl_sub_appiv1( cntl ) );
65 
66  // A01 = trilu( A00 ) \ A10
67  FLA_Trsm_internal( FLA_LEFT, FLA_LOWER_TRIANGULAR,
68  FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
69  FLA_ONE, A00, A01,
70  FLA_Cntl_sub_trsm1( cntl ) );
71 
72  // A11 = A11 - A10 * A01
73  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
74  FLA_MINUS_ONE, A10, A01, FLA_ONE, A11,
75  FLA_Cntl_sub_gemm1( cntl ) );
76 
77  // A21 = A21 - A20 * A01
78  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
79  FLA_MINUS_ONE, A20, A01, FLA_ONE, A21,
80  FLA_Cntl_sub_gemm2( cntl ) );
81 
82  // AB1 = / A11 \
83  // \ A21 /
84  FLA_Merge_2x1( A11,
85  A21, &AB1 );
86 
87  // AB1, p1 = LU_piv( AB1 )
88  r_val = FLA_LU_piv_internal( AB1, p1,
89  FLA_Cntl_sub_lu( cntl ) );
90 
91  // If the unblocked algorithm returns a null pivot,
92  // update the pivot index and return it.
93  if ( r_val == FLA_SUCCESS && r_val_sub >= 0 )
94  {
95  r_val = FLA_Obj_length( A01 ) + r_val_sub;
96  }
97 
98  // AB0 = / A10 \
99  // \ A20 /
100  FLA_Merge_2x1( A10,
101  A20, &AB0 );
102 
103  // Apply pivots to previous columns
104  FLA_Apply_pivots_internal( FLA_LEFT, FLA_NO_TRANSPOSE, p1, AB0,
105  FLA_Cntl_sub_appiv2( cntl ) );
106 
107  /*------------------------------------------------------------*/
108 
109  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
110  A10, A11, /**/ A12,
111  /* ************** */ /* ****************** */
112  &ABL, /**/ &ABR, A20, A21, /**/ A22,
113  FLA_TL );
114 
115  FLA_Cont_with_1x3_to_1x2( &AL, /**/ &AR, A0, A1, /**/ A2,
116  FLA_LEFT );
117 
118  FLA_Cont_with_3x1_to_2x1( &pT, p0,
119  p1,
120  /* ** */ /* ** */
121  &pB, p2, FLA_TOP );
122 
123  }
124 
125  if ( FLA_Obj_width( ATR ) > 0 )
126  {
127  /* Apply pivots to untouched columns */
128  FLA_Apply_pivots_internal( FLA_LEFT, FLA_NO_TRANSPOSE, p, ATR,
129  FLA_Cntl_sub_appiv1( cntl ) );
130 
131  /* ATR = trilu( ATL ) \ ATR */
132  FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR,
133  FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
134  FLA_ONE, ATL, ATR );
135  }
136 
137  return r_val;
138 }
FLA_Error FLA_Apply_pivots_internal(FLA_Side side, FLA_Trans trans, FLA_Obj p, FLA_Obj A, fla_appiv_t *cntl)
Definition: FLA_Apply_pivots_internal.c:13
FLA_Error FLA_Gemm_internal(FLA_Trans transa, FLA_Trans transb, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_gemm_t *cntl)
Definition: FLA_Gemm_internal.c:16
FLA_Error FLA_LU_piv_internal(FLA_Obj A, FLA_Obj p, fla_lu_t *cntl)
Definition: FLA_LU_piv_internal.c:15
FLA_Error FLA_Trsm_internal(FLA_Side side, FLA_Uplo uplo, FLA_Trans transa, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_trsm_t *cntl)
Definition: FLA_Trsm_internal.c:16
FLA_Error FLA_Trsm_external(FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj B)
Definition: FLA_Trsm_external.c:13
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition: FLA_View.c:304
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:17
FLA_Error FLA_Cont_with_3x1_to_2x1(FLA_Obj *AT, FLA_Obj A0, FLA_Obj A1, FLA_Obj *AB, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:428
FLA_Error FLA_Repart_2x1_to_3x1(FLA_Obj AT, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj AB, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:226
FLA_Error FLA_Cont_with_1x3_to_1x2(FLA_Obj *AL, FLA_Obj *AR, FLA_Obj A0, FLA_Obj A1, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:475
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:110
FLA_Error FLA_Part_2x1(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:76
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:142
FLA_Error FLA_Repart_1x2_to_1x3(FLA_Obj AL, FLA_Obj AR, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:267
dim_t FLA_Determine_blocksize(FLA_Obj A_unproc, FLA_Quadrant to_dir, fla_blocksize_t *cntl_blocksizes)
Definition: FLA_Blocksize.c:234
FLA_Error FLA_Merge_2x1(FLA_Obj AT, FLA_Obj AB, FLA_Obj *A)
Definition: FLA_View.c:541
int FLA_Error
Definition: FLA_type_defs.h:47
unsigned long dim_t
Definition: FLA_type_defs.h:71
Definition: FLA_type_defs.h:159

References FLA_Apply_pivots_internal(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_LU_piv_internal(), FLA_Merge_2x1(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Trsm_external(), and FLA_Trsm_internal().

Referenced by FLA_LU_piv_internal().

◆ FLA_LU_piv_blk_var4()

FLA_Error FLA_LU_piv_blk_var4 ( FLA_Obj  A,
FLA_Obj  p,
fla_lu_t cntl 
)
16 {
17  FLA_Error r_val = FLA_SUCCESS, r_val_sub = FLA_SUCCESS;
18  FLA_Obj ATL, ATR, A00, A01, A02,
19  ABL, ABR, A10, A11, A12,
20  A20, A21, A22;
21 
22  FLA_Obj pT, p0,
23  pB, p1,
24  p2;
25 
26  FLA_Obj AB0, AB1, AB2;
27 
28  dim_t b;
29 
30 
31  FLA_Part_2x2( A, &ATL, &ATR,
32  &ABL, &ABR, 0, 0, FLA_TL );
33 
34  FLA_Part_2x1( p, &pT,
35  &pB, 0, FLA_TOP );
36 
37  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) &&
38  FLA_Obj_width( ATL ) < FLA_Obj_width( A )){
39 
40  b = FLA_Determine_blocksize( ABR, FLA_BR, FLA_Cntl_blocksize( cntl ) );
41 
42  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
43  /* ************* */ /* ******************** */
44  &A10, /**/ &A11, &A12,
45  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
46  b, b, FLA_BR );
47 
48  FLA_Repart_2x1_to_3x1( pT, &p0,
49  /* ** */ /* ** */
50  &p1,
51  pB, &p2, b, FLA_BOTTOM );
52 
53  /*------------------------------------------------------------*/
54 
55  // A11 = A11 - A10 * A0
56  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
57  FLA_MINUS_ONE, A10, A01, FLA_ONE, A11,
58  FLA_Cntl_sub_gemm1( cntl ) );
59 
60  // A21 = A21 - A20 * A01
61  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
62  FLA_MINUS_ONE, A20, A01, FLA_ONE, A21,
63  FLA_Cntl_sub_gemm3( cntl ) );
64 
65  // AB1 = / A11 \
66  // \ A21 /
67  FLA_Merge_2x1( A11,
68  A21, &AB1 );
69 
70  // AB1, p1 = LU_piv( AB1 )
71  FLA_LU_piv_internal( AB1, p1,
72  FLA_Cntl_sub_lu( cntl ) );
73 
74  // If the unblocked algorithm returns a null pivot,
75  // update the pivot index and return it.
76  if ( r_val == FLA_SUCCESS && r_val_sub >= 0 )
77  {
78  r_val = FLA_Obj_length( A01 ) + r_val_sub;
79  }
80 
81  // AB0 = / A10 \
82  // \ A20 /
83  FLA_Merge_2x1( A10,
84  A20, &AB0 );
85 
86  // AB2 = / A12 \
87  // \ A22 /
88  FLA_Merge_2x1( A12,
89  A22, &AB2 );
90 
91  // Apply pivots to remaining columns
92  FLA_Apply_pivots_internal( FLA_LEFT, FLA_NO_TRANSPOSE, p1, AB0,
93  FLA_Cntl_sub_appiv1( cntl ) );
94  FLA_Apply_pivots_internal( FLA_LEFT, FLA_NO_TRANSPOSE, p1, AB2,
95  FLA_Cntl_sub_appiv1( cntl ) );
96 
97  // A12 = A12 - A10 * A02
98  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
99  FLA_MINUS_ONE, A10, A02, FLA_ONE, A12,
100  FLA_Cntl_sub_gemm2( cntl ) );
101 
102  // A12 = trilu( A11 ) \ A12
103  FLA_Trsm_internal( FLA_LEFT, FLA_LOWER_TRIANGULAR,
104  FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
105  FLA_ONE, A11, A12,
106  FLA_Cntl_sub_trsm1( cntl ) );
107 
108  /*------------------------------------------------------------*/
109 
110  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
111  A10, A11, /**/ A12,
112  /* ************** */ /* ****************** */
113  &ABL, /**/ &ABR, A20, A21, /**/ A22,
114  FLA_TL );
115 
116  FLA_Cont_with_3x1_to_2x1( &pT, p0,
117  p1,
118  /* ** */ /* ** */
119  &pB, p2, FLA_TOP );
120 
121  }
122 
123  return r_val;
124 }

References FLA_Apply_pivots_internal(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_LU_piv_internal(), FLA_Merge_2x1(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), and FLA_Trsm_internal().

Referenced by FLA_LU_piv_internal().

◆ FLA_LU_piv_blk_var5()

FLA_Error FLA_LU_piv_blk_var5 ( FLA_Obj  A,
FLA_Obj  p,
fla_lu_t cntl 
)
14 {
15  FLA_Error r_val = FLA_SUCCESS, r_val_sub = FLA_SUCCESS;
16  FLA_Obj ATL, ATR, A00, A01, A02,
17  ABL, ABR, A10, A11, A12,
18  A20, A21, A22;
19 
20  FLA_Obj pT, p0,
21  pB, p1,
22  p2;
23 
24  FLA_Obj AB0, AB1, AB2;
25 
26  dim_t b;
27 
28 
29  FLA_Part_2x2( A, &ATL, &ATR,
30  &ABL, &ABR, 0, 0, FLA_TL );
31 
32  FLA_Part_2x1( p, &pT,
33  &pB, 0, FLA_TOP );
34 
35  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) &&
36  FLA_Obj_width( ATL ) < FLA_Obj_width( A )){
37 
38  b = FLA_Determine_blocksize( ABR, FLA_BR, FLA_Cntl_blocksize( cntl ) );
39 
40  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
41  /* ************* */ /* ******************** */
42  &A10, /**/ &A11, &A12,
43  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
44  b, b, FLA_BR );
45 
46  FLA_Repart_2x1_to_3x1( pT, &p0,
47  /* ** */ /* ** */
48  &p1,
49  pB, &p2, b, FLA_BOTTOM );
50 
51  /*------------------------------------------------------------*/
52 
53  // AB1 = / A11 \
54  // \ A21 /
55  FLA_Merge_2x1( A11,
56  A21, &AB1 );
57 
58  // AB1, p1 = LU_piv( AB1 )
59  r_val_sub = FLA_LU_piv_internal( AB1, p1,
60  FLA_Cntl_sub_lu( cntl ) );
61 
62  // If the unblocked algorithm returns a null pivot,
63  // update the pivot index and return it.
64  if ( r_val == FLA_SUCCESS && r_val_sub >= 0 )
65  {
66  r_val = FLA_Obj_length( A01 ) + r_val_sub;
67  }
68 
69  // AB0 = / A10 \
70  // \ A20 /
71  FLA_Merge_2x1( A10,
72  A20, &AB0 );
73 
74  // Apply computed pivots to AB0
75  FLA_Apply_pivots_internal( FLA_LEFT, FLA_NO_TRANSPOSE, p1, AB0,
76  FLA_Cntl_sub_appiv1( cntl ) );
77 
78  // AB2 = / A12 \
79  // \ A22 /
80  FLA_Merge_2x1( A12,
81  A22, &AB2 );
82 
83  // Apply computed pivots to AB2
84  FLA_Apply_pivots_internal( FLA_LEFT, FLA_NO_TRANSPOSE, p1, AB2,
85  FLA_Cntl_sub_appiv1( cntl ) );
86 
87  // A12 = trilu( A11 ) \ A12
88  FLA_Trsm_internal( FLA_LEFT, FLA_LOWER_TRIANGULAR,
89  FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
90  FLA_ONE, A11, A12,
91  FLA_Cntl_sub_trsm1( cntl ) );
92 
93  // A22 = A22 - A21 * A12
94  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
95  FLA_MINUS_ONE, A21, A12, FLA_ONE, A22,
96  FLA_Cntl_sub_gemm1( cntl ) );
97 
98  /*------------------------------------------------------------*/
99 
100  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
101  A10, A11, /**/ A12,
102  /* ************** */ /* ****************** */
103  &ABL, /**/ &ABR, A20, A21, /**/ A22,
104  FLA_TL );
105 
106  FLA_Cont_with_3x1_to_2x1( &pT, p0,
107  p1,
108  /* ** */ /* ** */
109  &pB, p2, FLA_TOP );
110 
111  }
112 
113  return r_val;
114 }

References FLA_Apply_pivots_internal(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_LU_piv_internal(), FLA_Merge_2x1(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), and FLA_Trsm_internal().

Referenced by FLA_LU_piv_internal().

◆ FLA_LU_piv_opc_var3()

FLA_Error FLA_LU_piv_opc_var3 ( int  m_A,
int  n_A,
scomplex buff_A,
int  rs_A,
int  cs_A,
int *  buff_p,
int  inc_p 
)
371 {
372  FLA_Error r_val = FLA_SUCCESS;
373  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
374  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
375  int min_m_n = min( m_A, n_A );
376  int i;
377 
378  for ( i = 0; i < min_m_n; ++i )
379  {
380  scomplex pivot_val = czero;
381  scomplex* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
382  scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
383  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
384  scomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
385  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
386  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
387 
388  scomplex* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
389 
390  int* p0 = buff_p;
391  int* pi1 = buff_p + i*inc_p;
392 
393  int m_ahead = m_A - i - 1;
394  int mn_behind = i;
395 
396  /*------------------------------------------------------------*/
397 
398  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, p0, a1 );
400  a1, rs_A, cs_A,
401  0,
402  mn_behind - 1,
403  p0, inc_p );
404 
405  // FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, A00, a01 );
409  mn_behind,
410  A00, rs_A, cs_A,
411  a01, rs_A );
412 
413  // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
415  mn_behind,
416  buff_m1,
417  a10t, cs_A,
418  a01, rs_A,
419  buff_1,
420  alpha11 );
421 
422  // FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
425  m_ahead,
426  mn_behind,
427  buff_m1,
428  A20, rs_A, cs_A,
429  a01, rs_A,
430  buff_1,
431  a21, rs_A );
432 
433  // FLA_Merge_2x1( alpha11,
434  // a21, &aB1 );
435 
436  // FLA_Amax_external( aB1, pi1 );
437  bl1_camax( m_ahead + 1,
438  alpha11, rs_A,
439  pi1 );
440 
441  // If a null pivot is encountered, return the index.
442  pivot_val =*(alpha11 + *pi1);
443  if ( pivot_val.real == czero.real &&
444  pivot_val.imag == czero.imag ) r_val = ( r_val == FLA_SUCCESS ? i : r_val );
445  else
446  {
447  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, aB1 );
449  alpha11, rs_A, cs_A,
450  0,
451  0,
452  pi1, inc_p );
453 
454  // FLA_Inv_scal_external( alpha11, a21 );
456  m_ahead,
457  alpha11,
458  a21, rs_A );
459 
460  // FLA_Merge_2x1( a10t,
461  // A20, &AB0 );
462 
463  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB0 );
464  FLA_Apply_pivots_ln_opc_var1( mn_behind,
465  a10t, rs_A, cs_A,
466  0,
467  0,
468  pi1, inc_p );
469  }
470  /*------------------------------------------------------------*/
471 
472  }
473 
474  if ( m_A < n_A )
475  {
476  scomplex* ATL = buff_A;
477  scomplex* ATR = buff_A + m_A*cs_A;
478 
479  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, p, ATR );
480  FLA_Apply_pivots_ln_opc_var1( n_A - m_A,
481  ATR, rs_A, cs_A,
482  0,
483  m_A - 1,
484  buff_p, inc_p );
485 
486  // FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR,
487  // FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
488  // FLA_ONE, ATL, ATR );
493  m_A,
494  n_A - m_A,
495  buff_1,
496  ATL, rs_A, cs_A,
497  ATR, rs_A, cs_A );
498  }
499 
500  return r_val;
501 }
FLA_Error FLA_Apply_pivots_ln_opc_var1(int n, scomplex *a, int a_rs, int a_cs, int k1, int k2, int *p, int incp)
Definition: FLA_Apply_pivots_ln_opt_var1.c:356
const scomplex czero
Definition: FLA_Init.c:38
void bl1_camax(int n, scomplex *x, int incx, int *index)
Definition: bl1_amax.c:35
int i
Definition: bl1_axmyv2.c:145
void bl1_cdots(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *beta, scomplex *rho)
Definition: bl1_dots.c:39
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_gemv.c:125
void bl1_cinvscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition: bl1_invscalv.c:52
void bl1_ctrsm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
Definition: bl1_trsm.c:219
void bl1_ctrsv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx)
Definition: bl1_trsv.c:99
@ BLIS1_LOWER_TRIANGULAR
Definition: blis_type_defs.h:62
@ BLIS1_UNIT_DIAG
Definition: blis_type_defs.h:75
@ BLIS1_NO_TRANSPOSE
Definition: blis_type_defs.h:54
@ BLIS1_NO_CONJUGATE
Definition: blis_type_defs.h:81
@ BLIS1_LEFT
Definition: blis_type_defs.h:68
Definition: blis_type_defs.h:133
float imag
Definition: blis_type_defs.h:134
float real
Definition: blis_type_defs.h:134

References bl1_camax(), bl1_cdots(), bl1_cgemv(), bl1_cinvscalv(), bl1_ctrsm(), bl1_ctrsv(), BLIS1_LEFT, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_UNIT_DIAG, czero, FLA_Apply_pivots_ln_opc_var1(), FLA_MINUS_ONE, FLA_ONE, i, scomplex::imag, and scomplex::real.

Referenced by FLA_LU_piv_opt_var3().

◆ FLA_LU_piv_opc_var4()

FLA_Error FLA_LU_piv_opc_var4 ( int  m_A,
int  n_A,
scomplex buff_A,
int  rs_A,
int  cs_A,
int *  buff_p,
int  inc_p 
)
346 {
347  FLA_Error r_val = FLA_SUCCESS;
348  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
349  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
350  int min_m_n = min( m_A, n_A );
351  int i, is_null_pivot;
352 
353  for ( i = 0; i < min_m_n; ++i )
354  {
355  scomplex pivot_val = czero;
356  scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
357  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
358  scomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
359  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
360  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
361  scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
362  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
363 
364  int* pi1 = buff_p + i*inc_p;
365 
366  int m_ahead = m_A - i - 1;
367  int n_ahead = n_A - i - 1;
368  int mn_behind = i;
369 
370  /*------------------------------------------------------------*/
371 
372  // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
374  mn_behind,
375  buff_m1,
376  a10t, cs_A,
377  a01, rs_A,
378  buff_1,
379  alpha11 );
380 
381  // FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
384  m_ahead,
385  mn_behind,
386  buff_m1,
387  A20, rs_A, cs_A,
388  a01, rs_A,
389  buff_1,
390  a21, rs_A );
391 
392  // FLA_Merge_2x1( alpha11,
393  // a21, &aB1 );
394 
395  // FLA_Amax_external( aB1, pi1 );
396  bl1_camax( m_ahead + 1,
397  alpha11, rs_A,
398  pi1 );
399 
400  // If a null pivot is encountered, return the index.
401  pivot_val =*(alpha11 + *pi1);
402 
403  is_null_pivot = (pivot_val.real == czero.real && pivot_val.imag == czero.imag);
404  if ( is_null_pivot )
405  {
406  r_val = ( r_val == FLA_SUCCESS ? i : r_val );
407  }
408  else
409  {
410  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, aB1 );
412  alpha11, rs_A, cs_A,
413  0,
414  0,
415  pi1, inc_p );
416 
417  // FLA_Merge_2x1( a10t,
418  // A20, &AB0 );
419 
420  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB0 );
421  FLA_Apply_pivots_ln_opc_var1( mn_behind,
422  a10t, rs_A, cs_A,
423  0,
424  0,
425  pi1, inc_p );
426 
427  // FLA_Merge_2x1( a12t,
428  // A22, &AB2 );
429 
430  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB2 );
432  a12t, rs_A, cs_A,
433  0,
434  0,
435  pi1, inc_p );
436  }
437 
438  // FLA_Gemv_external( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, a10t, FLA_ONE, a12t );
441  mn_behind,
442  n_ahead,
443  buff_m1,
444  A02, rs_A, cs_A,
445  a10t, cs_A,
446  buff_1,
447  a12t, cs_A );
448 
449  if ( ! is_null_pivot )
450  {
451  // FLA_Inv_scal_external( alpha11, a21 );
453  m_ahead,
454  alpha11,
455  a21, rs_A );
456  }
457  /*------------------------------------------------------------*/
458 
459  }
460 
461  return r_val;
462 }
@ BLIS1_TRANSPOSE
Definition: blis_type_defs.h:55

References bl1_camax(), bl1_cdots(), bl1_cgemv(), bl1_cinvscalv(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, czero, FLA_Apply_pivots_ln_opc_var1(), FLA_MINUS_ONE, FLA_ONE, i, scomplex::imag, and scomplex::real.

Referenced by FLA_LU_piv_opt_var4().

◆ FLA_LU_piv_opc_var5()

FLA_Error FLA_LU_piv_opc_var5 ( int  m_A,
int  n_A,
scomplex buff_A,
int  rs_A,
int  cs_A,
int *  buff_p,
int  inc_p 
)
243 {
244  FLA_Error r_val = FLA_SUCCESS;
245  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
246  int min_m_n = min( m_A, n_A );
247  int i;
248 
249  for ( i = 0; i < min_m_n; ++i )
250  {
251  scomplex pivot_val = czero;
252  scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
253  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
254  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
255  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
256  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
257 
258  int* pi1 = buff_p + i*inc_p;
259 
260  int m_ahead = m_A - i - 1;
261  int n_ahead = n_A - i - 1;
262 
263  /*------------------------------------------------------------*/
264 
265  // FLA_Merge_2x1( alpha11,
266  // a21, &aB1 );
267 
268  // FLA_Amax_external( aB1, pi1 );
269  bl1_camax( m_ahead + 1,
270  alpha11, rs_A,
271  pi1 );
272 
273  // If a null pivot is encountered, return the index.
274  pivot_val = *(alpha11 + *pi1);
275  if ( pivot_val.real == czero.real &&
276  pivot_val.imag == czero.imag ) r_val = ( r_val == FLA_SUCCESS ? i : r_val );
277  else
278  {
279  // FLA_Merge_1x2( ABL, ABR, &AB );
280 
281  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB );
283  a10t, rs_A, cs_A,
284  0,
285  0,
286  pi1, inc_p );
287 
288  // FLA_Inv_scal_external( alpha11, a21 );
290  m_ahead,
291  alpha11,
292  a21, rs_A );
293  }
294 
295  // FLA_Ger_external( FLA_MINUS_ONE, a21, a12t, A22 );
298  m_ahead,
299  n_ahead,
300  buff_m1,
301  a21, rs_A,
302  a12t, cs_A,
303  A22, rs_A, cs_A );
304 
305  /*------------------------------------------------------------*/
306 
307  }
308 
309  return r_val;
310 }
void bl1_cger(conj1_t conjx, conj1_t conjy, int m, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *a, int a_rs, int a_cs)
Definition: bl1_ger.c:111

References bl1_camax(), bl1_cger(), bl1_cinvscalv(), BLIS1_NO_CONJUGATE, czero, FLA_Apply_pivots_ln_opc_var1(), FLA_MINUS_ONE, i, scomplex::imag, and scomplex::real.

Referenced by FLA_LU_piv_opt_var5().

◆ FLA_LU_piv_opd_var3()

FLA_Error FLA_LU_piv_opd_var3 ( int  m_A,
int  n_A,
double *  buff_A,
int  rs_A,
int  cs_A,
int *  buff_p,
int  inc_p 
)
234 {
235  FLA_Error r_val = FLA_SUCCESS;
236  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
237  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
238  int min_m_n = min( m_A, n_A );
239  int i;
240 
241  for ( i = 0; i < min_m_n; ++i )
242  {
243  double pivot_val = dzero;
244  double* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
245  double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
246  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
247  double* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
248  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
249  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
250 
251  double* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
252 
253  int* p0 = buff_p;
254  int* pi1 = buff_p + i*inc_p;
255 
256  int m_ahead = m_A - i - 1;
257  int mn_behind = i;
258 
259  /*------------------------------------------------------------*/
260 
261  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, p0, a1 );
263  a1, rs_A, cs_A,
264  0,
265  mn_behind - 1,
266  p0, inc_p );
267 
268  // FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, A00, a01 );
272  mn_behind,
273  A00, rs_A, cs_A,
274  a01, rs_A );
275 
276  // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
278  mn_behind,
279  buff_m1,
280  a10t, cs_A,
281  a01, rs_A,
282  buff_1,
283  alpha11 );
284 
285  // FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
288  m_ahead,
289  mn_behind,
290  buff_m1,
291  A20, rs_A, cs_A,
292  a01, rs_A,
293  buff_1,
294  a21, rs_A );
295 
296  // FLA_Merge_2x1( alpha11,
297  // a21, &aB1 );
298 
299  // FLA_Amax_external( aB1, pi1 );
300  bl1_damax( m_ahead + 1,
301  alpha11, rs_A,
302  pi1 );
303 
304  // If a null pivot is encountered, return the index.
305  pivot_val =*(alpha11 + *pi1);
306  if ( pivot_val == dzero ) r_val = ( r_val == FLA_SUCCESS ? i : r_val );
307  else
308  {
309  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, aB1 );
311  alpha11, rs_A, cs_A,
312  0,
313  0,
314  pi1, inc_p );
315 
316  // FLA_Inv_scal_external( alpha11, a21 );
318  m_ahead,
319  alpha11,
320  a21, rs_A );
321 
322  // FLA_Merge_2x1( a10t,
323  // A20, &AB0 );
324 
325  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB0 );
326  FLA_Apply_pivots_ln_opd_var1( mn_behind,
327  a10t, rs_A, cs_A,
328  0,
329  0,
330  pi1, inc_p );
331  }
332  /*------------------------------------------------------------*/
333 
334  }
335 
336  if ( m_A < n_A )
337  {
338  double* ATL = buff_A;
339  double* ATR = buff_A + m_A*cs_A;
340 
341  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, p, ATR );
342  FLA_Apply_pivots_ln_opd_var1( n_A - m_A,
343  ATR, rs_A, cs_A,
344  0,
345  m_A - 1,
346  buff_p, inc_p );
347 
348  // FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR,
349  // FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
350  // FLA_ONE, ATL, ATR );
355  m_A,
356  n_A - m_A,
357  buff_1,
358  ATL, rs_A, cs_A,
359  ATR, rs_A, cs_A );
360  }
361 
362  return r_val;
363 }
FLA_Error FLA_Apply_pivots_ln_opd_var1(int n, double *a, int a_rs, int a_cs, int k1, int k2, int *p, int incp)
Definition: FLA_Apply_pivots_ln_opt_var1.c:274
const double dzero
Definition: FLA_Init.c:37
void bl1_damax(int n, double *x, int incx, int *index)
Definition: bl1_amax.c:24
void bl1_ddots(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy, double *beta, double *rho)
Definition: bl1_dots.c:26
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_gemv.c:69
void bl1_dinvscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition: bl1_invscalv.c:26
void bl1_dtrsm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
Definition: bl1_trsm.c:116
void bl1_dtrsv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, double *a, int a_rs, int a_cs, double *x, int incx)
Definition: bl1_trsv.c:56

References bl1_damax(), bl1_ddots(), bl1_dgemv(), bl1_dinvscalv(), bl1_dtrsm(), bl1_dtrsv(), BLIS1_LEFT, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_UNIT_DIAG, dzero, FLA_Apply_pivots_ln_opd_var1(), FLA_MINUS_ONE, FLA_ONE, and i.

Referenced by FLA_LU_piv_opt_var3().

◆ FLA_LU_piv_opd_var4()

FLA_Error FLA_LU_piv_opd_var4 ( int  m_A,
int  n_A,
double *  buff_A,
int  rs_A,
int  cs_A,
int *  buff_p,
int  inc_p 
)
222 {
223  FLA_Error r_val = FLA_SUCCESS;
224  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
225  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
226  int min_m_n = min( m_A, n_A );
227  int i, is_null_pivot;
228 
229  for ( i = 0; i < min_m_n; ++i )
230  {
231  double pivot_val = dzero;
232  double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
233  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
234  double* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
235  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
236  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
237  double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
238  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
239 
240  int* pi1 = buff_p + i*inc_p;
241 
242  int m_ahead = m_A - i - 1;
243  int n_ahead = n_A - i - 1;
244  int mn_behind = i;
245 
246  /*------------------------------------------------------------*/
247 
248  // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
250  mn_behind,
251  buff_m1,
252  a10t, cs_A,
253  a01, rs_A,
254  buff_1,
255  alpha11 );
256 
257  // FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
260  m_ahead,
261  mn_behind,
262  buff_m1,
263  A20, rs_A, cs_A,
264  a01, rs_A,
265  buff_1,
266  a21, rs_A );
267 
268  // FLA_Merge_2x1( alpha11,
269  // a21, &aB1 );
270 
271  // FLA_Amax_external( aB1, pi1 );
272  bl1_damax( m_ahead + 1,
273  alpha11, rs_A,
274  pi1 );
275 
276  // If a null pivot is encountered, return the index.
277  pivot_val =*(alpha11 + *pi1);
278 
279  is_null_pivot = (pivot_val == dzero);
280  if ( is_null_pivot )
281  {
282  r_val = ( r_val == FLA_SUCCESS ? i : r_val );
283  }
284  else
285  {
286  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, aB1 );
288  alpha11, rs_A, cs_A,
289  0,
290  0,
291  pi1, inc_p );
292 
293  // FLA_Merge_2x1( a10t,
294  // A20, &AB0 );
295 
296  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB0 );
297  FLA_Apply_pivots_ln_opd_var1( mn_behind,
298  a10t, rs_A, cs_A,
299  0,
300  0,
301  pi1, inc_p );
302 
303  // FLA_Merge_2x1( a12t,
304  // A22, &AB2 );
305 
306  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB2 );
308  a12t, rs_A, cs_A,
309  0,
310  0,
311  pi1, inc_p );
312  }
313 
314  // FLA_Gemv_external( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, a10t, FLA_ONE, a12t );
317  mn_behind,
318  n_ahead,
319  buff_m1,
320  A02, rs_A, cs_A,
321  a10t, cs_A,
322  buff_1,
323  a12t, cs_A );
324 
325  if ( ! is_null_pivot )
326  {
327  // FLA_Inv_scal_external( alpha11, a21 );
329  m_ahead,
330  alpha11,
331  a21, rs_A );
332  }
333  /*------------------------------------------------------------*/
334 
335  }
336 
337  return r_val;
338 }

References bl1_damax(), bl1_ddots(), bl1_dgemv(), bl1_dinvscalv(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, dzero, FLA_Apply_pivots_ln_opd_var1(), FLA_MINUS_ONE, FLA_ONE, and i.

Referenced by FLA_LU_piv_opt_var4().

◆ FLA_LU_piv_opd_var5()

FLA_Error FLA_LU_piv_opd_var5 ( int  m_A,
int  n_A,
double *  buff_A,
int  rs_A,
int  cs_A,
int *  buff_p,
int  inc_p 
)
169 {
170  FLA_Error r_val = FLA_SUCCESS;
171  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
172  int min_m_n = min( m_A, n_A );
173  int i;
174 
175  for ( i = 0; i < min_m_n; ++i )
176  {
177  double pivot_val = dzero;
178  double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
179  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
180  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
181  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
182  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
183 
184  int* pi1 = buff_p + i*inc_p;
185 
186  int m_ahead = m_A - i - 1;
187  int n_ahead = n_A - i - 1;
188 
189  /*------------------------------------------------------------*/
190 
191  // FLA_Merge_2x1( alpha11,
192  // a21, &aB1 );
193 
194  // FLA_Amax_external( aB1, pi1 );
195  bl1_damax( m_ahead + 1,
196  alpha11, rs_A,
197  pi1 );
198 
199  // If a null pivot is encountered, return the index.
200  pivot_val = *(alpha11 + *pi1);
201  if ( pivot_val == dzero ) r_val = ( r_val == FLA_SUCCESS ? i : r_val );
202  else
203  {
204  // FLA_Merge_1x2( ABL, ABR, &AB );
205 
206  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB );
208  a10t, rs_A, cs_A,
209  0,
210  0,
211  pi1, inc_p );
212 
213  // FLA_Inv_scal_external( alpha11, a21 );
215  m_ahead,
216  alpha11,
217  a21, rs_A );
218  }
219 
220  // FLA_Ger_external( FLA_MINUS_ONE, a21, a12t, A22 );
223  m_ahead,
224  n_ahead,
225  buff_m1,
226  a21, rs_A,
227  a12t, cs_A,
228  A22, rs_A, cs_A );
229 
230  /*------------------------------------------------------------*/
231 
232  }
233 
234  return r_val;
235 }
void bl1_dger(conj1_t conjx, conj1_t conjy, int m, int n, double *alpha, double *x, int incx, double *y, int incy, double *a, int a_rs, int a_cs)
Definition: bl1_ger.c:62

References bl1_damax(), bl1_dger(), bl1_dinvscalv(), BLIS1_NO_CONJUGATE, dzero, FLA_Apply_pivots_ln_opd_var1(), FLA_MINUS_ONE, and i.

Referenced by FLA_LU_piv_opt_var5().

◆ FLA_LU_piv_ops_var3()

FLA_Error FLA_LU_piv_ops_var3 ( int  m_A,
int  n_A,
float *  buff_A,
int  rs_A,
int  cs_A,
int *  buff_p,
int  inc_p 
)
97 {
98  FLA_Error r_val = FLA_SUCCESS;
99  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
100  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
101  int min_m_n = min( m_A, n_A );
102  int i;
103 
104  for ( i = 0; i < min_m_n; ++i )
105  {
106  float pivot_val = fzero;
107  float* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
108  float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
109  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
110  float* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
111  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
112  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
113 
114  float* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
115 
116  int* p0 = buff_p;
117  int* pi1 = buff_p + i*inc_p;
118 
119  int m_ahead = m_A - i - 1;
120  int mn_behind = i;
121 
122  /*------------------------------------------------------------*/
123 
124  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, p0, a1 );
126  a1, rs_A, cs_A,
127  0,
128  mn_behind - 1,
129  p0, inc_p );
130 
131  // FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, A00, a01 );
135  mn_behind,
136  A00, rs_A, cs_A,
137  a01, rs_A );
138 
139  // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
141  mn_behind,
142  buff_m1,
143  a10t, cs_A,
144  a01, rs_A,
145  buff_1,
146  alpha11 );
147 
148  // FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
151  m_ahead,
152  mn_behind,
153  buff_m1,
154  A20, rs_A, cs_A,
155  a01, rs_A,
156  buff_1,
157  a21, rs_A );
158 
159  // FLA_Merge_2x1( alpha11,
160  // a21, &aB1 );
161 
162  // FLA_Amax_external( aB1, pi1 );
163  bl1_samax( m_ahead + 1,
164  alpha11, rs_A,
165  pi1 );
166 
167  // If a null pivot is encountered, return the index.
168  pivot_val = *(alpha11 + *pi1);
169  if ( pivot_val == fzero ) r_val = ( r_val == FLA_SUCCESS ? i : r_val );
170  else
171  {
172  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, aB1 );
174  alpha11, rs_A, cs_A,
175  0,
176  0,
177  pi1, inc_p );
178 
179  // FLA_Inv_scal_external( alpha11, a21 );
181  m_ahead,
182  alpha11,
183  a21, rs_A );
184 
185  // FLA_Merge_2x1( a10t,
186  // A20, &AB0 );
187 
188  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB0 );
189  FLA_Apply_pivots_ln_ops_var1( mn_behind,
190  a10t, rs_A, cs_A,
191  0,
192  0,
193  pi1, inc_p );
194  }
195  /*------------------------------------------------------------*/
196 
197  }
198 
199  if ( m_A < n_A )
200  {
201  float* ATL = buff_A;
202  float* ATR = buff_A + m_A*cs_A;
203 
204  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, p, ATR );
205  FLA_Apply_pivots_ln_ops_var1( n_A - m_A,
206  ATR, rs_A, cs_A,
207  0,
208  m_A - 1,
209  buff_p, inc_p );
210 
211  // FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR,
212  // FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
213  // FLA_ONE, ATL, ATR );
218  m_A,
219  n_A - m_A,
220  buff_1,
221  ATL, rs_A, cs_A,
222  ATR, rs_A, cs_A );
223  }
224 
225  return r_val;
226 }
FLA_Error FLA_Apply_pivots_ln_ops_var1(int n, float *a, int a_rs, int a_cs, int k1, int k2, int *p, int incp)
Definition: FLA_Apply_pivots_ln_opt_var1.c:192
const float fzero
Definition: FLA_Init.c:36
void bl1_samax(int n, float *x, int incx, int *index)
Definition: bl1_amax.c:13
void bl1_sdots(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy, float *beta, float *rho)
Definition: bl1_dots.c:13
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_gemv.c:13
void bl1_sinvscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition: bl1_invscalv.c:13
void bl1_strsm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
Definition: bl1_trsm.c:13
void bl1_strsv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, float *a, int a_rs, int a_cs, float *x, int incx)
Definition: bl1_trsv.c:13

References bl1_samax(), bl1_sdots(), bl1_sgemv(), bl1_sinvscalv(), bl1_strsm(), bl1_strsv(), BLIS1_LEFT, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_UNIT_DIAG, FLA_Apply_pivots_ln_ops_var1(), FLA_MINUS_ONE, FLA_ONE, fzero, and i.

Referenced by FLA_LU_piv_opt_var3().

◆ FLA_LU_piv_ops_var4()

FLA_Error FLA_LU_piv_ops_var4 ( int  m_A,
int  n_A,
float *  buff_A,
int  rs_A,
int  cs_A,
int *  buff_p,
int  inc_p 
)
97 {
98  FLA_Error r_val = FLA_SUCCESS;
99  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
100  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
101  int min_m_n = min( m_A, n_A );
102  int i, is_null_pivot;
103 
104 
105  for ( i = 0; i < min_m_n; ++i )
106  {
107  float pivot_val = fzero;
108  float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
109  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
110  float* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
111  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
112  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
113  float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
114  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
115 
116  int* pi1 = buff_p + i*inc_p;
117 
118  int m_ahead = m_A - i - 1;
119  int n_ahead = n_A - i - 1;
120  int mn_behind = i;
121 
122  /*------------------------------------------------------------*/
123 
124  // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
126  mn_behind,
127  buff_m1,
128  a10t, cs_A,
129  a01, rs_A,
130  buff_1,
131  alpha11 );
132 
133  // FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
136  m_ahead,
137  mn_behind,
138  buff_m1,
139  A20, rs_A, cs_A,
140  a01, rs_A,
141  buff_1,
142  a21, rs_A );
143 
144  // FLA_Merge_2x1( alpha11,
145  // a21, &aB1 );
146 
147  // FLA_Amax_external( aB1, pi1 );
148  bl1_samax( m_ahead + 1,
149  alpha11, rs_A,
150  pi1 );
151 
152  // If a null pivot is encountered, return the index.
153  pivot_val = *(alpha11 + *pi1);
154 
155  is_null_pivot = (pivot_val == fzero);
156  if ( is_null_pivot )
157  {
158  r_val = ( r_val == FLA_SUCCESS ? i : r_val );
159  }
160  else
161  {
162  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, aB1 );
164  alpha11, rs_A, cs_A,
165  0,
166  0,
167  pi1, inc_p );
168 
169  // FLA_Merge_2x1( a10t,
170  // A20, &AB0 );
171 
172  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB0 );
173  FLA_Apply_pivots_ln_ops_var1( mn_behind,
174  a10t, rs_A, cs_A,
175  0,
176  0,
177  pi1, inc_p );
178 
179  // FLA_Merge_2x1( a12t,
180  // A22, &AB2 );
181 
182  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB2 );
184  a12t, rs_A, cs_A,
185  0,
186  0,
187  pi1, inc_p );
188  }
189 
190  // FLA_Gemv_external( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, a10t, FLA_ONE, a12t );
193  mn_behind,
194  n_ahead,
195  buff_m1,
196  A02, rs_A, cs_A,
197  a10t, cs_A,
198  buff_1,
199  a12t, cs_A );
200 
201  if ( ! is_null_pivot )
202  {
203  // FLA_Inv_scal_external( alpha11, a21 );
205  m_ahead,
206  alpha11,
207  a21, rs_A );
208  }
209  /*------------------------------------------------------------*/
210 
211  }
212 
213  return r_val;
214 }

References bl1_samax(), bl1_sdots(), bl1_sgemv(), bl1_sinvscalv(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_Apply_pivots_ln_ops_var1(), FLA_MINUS_ONE, FLA_ONE, fzero, and i.

Referenced by FLA_LU_piv_opt_var4().

◆ FLA_LU_piv_ops_var5()

FLA_Error FLA_LU_piv_ops_var5 ( int  m_A,
int  n_A,
float *  buff_A,
int  rs_A,
int  cs_A,
int *  buff_p,
int  inc_p 
)
95 {
96  FLA_Error r_val = FLA_SUCCESS;
97  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
98  int min_m_n = min( m_A, n_A );
99  int i;
100 
101  for ( i = 0; i < min_m_n; ++i )
102  {
103  float pivot_val = fzero;
104  float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
105  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
106  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
107  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
108  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
109 
110  int* pi1 = buff_p + i*inc_p;
111 
112  int m_ahead = m_A - i - 1;
113  int n_ahead = n_A - i - 1;
114 
115  /*------------------------------------------------------------*/
116 
117  // FLA_Merge_2x1( alpha11,
118  // a21, &aB1 );
119 
120  // FLA_Amax_external( aB1, pi1 );
121  bl1_samax( m_ahead + 1,
122  alpha11, rs_A,
123  pi1 );
124 
125  // If a null pivot is encountered, return the index.
126  pivot_val = *(alpha11 + *pi1);
127  if ( pivot_val == fzero ) r_val = ( r_val == FLA_SUCCESS ? i : r_val );
128  else
129  {
130  // FLA_Merge_1x2( ABL, ABR, &AB );
131 
132  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB );
134  a10t, rs_A, cs_A,
135  0,
136  0,
137  pi1, inc_p );
138 
139  // FLA_Inv_scal_external( alpha11, a21 );
141  m_ahead,
142  alpha11,
143  a21, rs_A );
144  }
145 
146  // FLA_Ger_external( FLA_MINUS_ONE, a21, a12t, A22 );
149  m_ahead,
150  n_ahead,
151  buff_m1,
152  a21, rs_A,
153  a12t, cs_A,
154  A22, rs_A, cs_A );
155 
156  /*------------------------------------------------------------*/
157 
158  }
159 
160  return r_val;
161 }
void bl1_sger(conj1_t conjx, conj1_t conjy, int m, int n, float *alpha, float *x, int incx, float *y, int incy, float *a, int a_rs, int a_cs)
Definition: bl1_ger.c:13

References bl1_samax(), bl1_sger(), bl1_sinvscalv(), BLIS1_NO_CONJUGATE, FLA_Apply_pivots_ln_ops_var1(), FLA_MINUS_ONE, fzero, and i.

Referenced by FLA_LU_piv_opt_var5().

◆ FLA_LU_piv_opt_var3()

FLA_Error FLA_LU_piv_opt_var3 ( FLA_Obj  A,
FLA_Obj  p 
)
16 {
17  FLA_Error r_val = FLA_SUCCESS;
18  FLA_Datatype datatype;
19  int m_A, n_A;
20  int rs_A, cs_A;
21  int inc_p;
22 
23  datatype = FLA_Obj_datatype( A );
24 
25  m_A = FLA_Obj_length( A );
26  n_A = FLA_Obj_width( A );
27  rs_A = FLA_Obj_row_stride( A );
28  cs_A = FLA_Obj_col_stride( A );
29 
30  inc_p = FLA_Obj_vector_inc( p );
31 
32 
33  switch ( datatype )
34  {
35  case FLA_FLOAT:
36  {
37  float* buff_A = FLA_FLOAT_PTR( A );
38  int* buff_p = FLA_INT_PTR( p );
39 
40  r_val = FLA_LU_piv_ops_var3( m_A,
41  n_A,
42  buff_A, rs_A, cs_A,
43  buff_p, inc_p );
44 
45  break;
46  }
47 
48  case FLA_DOUBLE:
49  {
50  double* buff_A = FLA_DOUBLE_PTR( A );
51  int* buff_p = FLA_INT_PTR( p );
52 
53  r_val = FLA_LU_piv_opd_var3( m_A,
54  n_A,
55  buff_A, rs_A, cs_A,
56  buff_p, inc_p );
57 
58  break;
59  }
60 
61  case FLA_COMPLEX:
62  {
63  scomplex* buff_A = FLA_COMPLEX_PTR( A );
64  int* buff_p = FLA_INT_PTR( p );
65 
66  r_val = FLA_LU_piv_opc_var3( m_A,
67  n_A,
68  buff_A, rs_A, cs_A,
69  buff_p, inc_p );
70 
71  break;
72  }
73 
74  case FLA_DOUBLE_COMPLEX:
75  {
76  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
77  int* buff_p = FLA_INT_PTR( p );
78 
79  r_val = FLA_LU_piv_opz_var3( m_A,
80  n_A,
81  buff_A, rs_A, cs_A,
82  buff_p, inc_p );
83 
84  break;
85  }
86  }
87 
88  return r_val;
89 }
FLA_Error FLA_LU_piv_opz_var3(int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
Definition: FLA_LU_piv_opt_var3.c:505
FLA_Error FLA_LU_piv_opc_var3(int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
Definition: FLA_LU_piv_opt_var3.c:367
FLA_Error FLA_LU_piv_opd_var3(int m_A, int n_A, double *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
Definition: FLA_LU_piv_opt_var3.c:230
FLA_Error FLA_LU_piv_ops_var3(int m_A, int n_A, float *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
Definition: FLA_LU_piv_opt_var3.c:93
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition: FLA_Query.c:145
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
int FLA_Datatype
Definition: FLA_type_defs.h:49
Definition: blis_type_defs.h:138

References FLA_LU_piv_opc_var3(), FLA_LU_piv_opd_var3(), FLA_LU_piv_ops_var3(), FLA_LU_piv_opz_var3(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_Obj_width().

Referenced by FLA_LU_piv_internal().

◆ FLA_LU_piv_opt_var4()

FLA_Error FLA_LU_piv_opt_var4 ( FLA_Obj  A,
FLA_Obj  p 
)
16 {
17  FLA_Error r_val = FLA_SUCCESS;
18  FLA_Datatype datatype;
19  int m_A, n_A;
20  int rs_A, cs_A;
21  int inc_p;
22 
23  datatype = FLA_Obj_datatype( A );
24 
25  m_A = FLA_Obj_length( A );
26  n_A = FLA_Obj_width( A );
27  rs_A = FLA_Obj_row_stride( A );
28  cs_A = FLA_Obj_col_stride( A );
29 
30  inc_p = FLA_Obj_vector_inc( p );
31 
32 
33  switch ( datatype )
34  {
35  case FLA_FLOAT:
36  {
37  float* buff_A = FLA_FLOAT_PTR( A );
38  int* buff_p = FLA_INT_PTR( p );
39 
40  r_val = FLA_LU_piv_ops_var4( m_A,
41  n_A,
42  buff_A, rs_A, cs_A,
43  buff_p, inc_p );
44 
45  break;
46  }
47 
48  case FLA_DOUBLE:
49  {
50  double* buff_A = FLA_DOUBLE_PTR( A );
51  int* buff_p = FLA_INT_PTR( p );
52 
53  r_val = FLA_LU_piv_opd_var4( m_A,
54  n_A,
55  buff_A, rs_A, cs_A,
56  buff_p, inc_p );
57 
58  break;
59  }
60 
61  case FLA_COMPLEX:
62  {
63  scomplex* buff_A = FLA_COMPLEX_PTR( A );
64  int* buff_p = FLA_INT_PTR( p );
65 
66  r_val = FLA_LU_piv_opc_var4( m_A,
67  n_A,
68  buff_A, rs_A, cs_A,
69  buff_p, inc_p );
70 
71  break;
72  }
73 
74  case FLA_DOUBLE_COMPLEX:
75  {
76  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
77  int* buff_p = FLA_INT_PTR( p );
78 
79  r_val = FLA_LU_piv_opz_var4( m_A,
80  n_A,
81  buff_A, rs_A, cs_A,
82  buff_p, inc_p );
83 
84  break;
85  }
86  }
87 
88  return r_val;
89 }
FLA_Error FLA_LU_piv_opc_var4(int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
Definition: FLA_LU_piv_opt_var4.c:342
FLA_Error FLA_LU_piv_ops_var4(int m_A, int n_A, float *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
Definition: FLA_LU_piv_opt_var4.c:93
FLA_Error FLA_LU_piv_opd_var4(int m_A, int n_A, double *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
Definition: FLA_LU_piv_opt_var4.c:218
FLA_Error FLA_LU_piv_opz_var4(int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
Definition: FLA_LU_piv_opt_var4.c:466

References FLA_LU_piv_opc_var4(), FLA_LU_piv_opd_var4(), FLA_LU_piv_ops_var4(), FLA_LU_piv_opz_var4(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_Obj_width().

Referenced by FLA_LU_piv_internal().

◆ FLA_LU_piv_opt_var5()

FLA_Error FLA_LU_piv_opt_var5 ( FLA_Obj  A,
FLA_Obj  p 
)
14 {
15  FLA_Error r_val = FLA_SUCCESS;
16  FLA_Datatype datatype;
17  int m_A, n_A;
18  int rs_A, cs_A;
19  int inc_p;
20 
21  datatype = FLA_Obj_datatype( A );
22 
23  m_A = FLA_Obj_length( A );
24  n_A = FLA_Obj_width( A );
25  rs_A = FLA_Obj_row_stride( A );
26  cs_A = FLA_Obj_col_stride( A );
27 
28  inc_p = FLA_Obj_vector_inc( p );
29 
30 
31  switch ( datatype )
32  {
33  case FLA_FLOAT:
34  {
35  float* buff_A = FLA_FLOAT_PTR( A );
36  int* buff_p = FLA_INT_PTR( p );
37 
38  r_val = FLA_LU_piv_ops_var5( m_A,
39  n_A,
40  buff_A, rs_A, cs_A,
41  buff_p, inc_p );
42 
43  break;
44  }
45 
46  case FLA_DOUBLE:
47  {
48  double* buff_A = FLA_DOUBLE_PTR( A );
49  int* buff_p = FLA_INT_PTR( p );
50 
51  r_val = FLA_LU_piv_opd_var5( m_A,
52  n_A,
53  buff_A, rs_A, cs_A,
54  buff_p, inc_p );
55 
56  break;
57  }
58 
59  case FLA_COMPLEX:
60  {
61  scomplex* buff_A = FLA_COMPLEX_PTR( A );
62  int* buff_p = FLA_INT_PTR( p );
63 
64  r_val = FLA_LU_piv_opc_var5( m_A,
65  n_A,
66  buff_A, rs_A, cs_A,
67  buff_p, inc_p );
68 
69  break;
70  }
71 
72  case FLA_DOUBLE_COMPLEX:
73  {
74  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
75  int* buff_p = FLA_INT_PTR( p );
76 
77  r_val = FLA_LU_piv_opz_var5( m_A,
78  n_A,
79  buff_A, rs_A, cs_A,
80  buff_p, inc_p );
81 
82  break;
83  }
84  }
85 
86  return r_val;
87 }
FLA_Error FLA_LU_piv_opz_var5(int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
Definition: FLA_LU_piv_opt_var5.c:314
FLA_Error FLA_LU_piv_opc_var5(int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
Definition: FLA_LU_piv_opt_var5.c:239
FLA_Error FLA_LU_piv_ops_var5(int m_A, int n_A, float *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
Definition: FLA_LU_piv_opt_var5.c:91
FLA_Error FLA_LU_piv_opd_var5(int m_A, int n_A, double *buff_A, int rs_A, int cs_A, int *buff_p, int inc_p)
Definition: FLA_LU_piv_opt_var5.c:165

References FLA_LU_piv_opc_var5(), FLA_LU_piv_opd_var5(), FLA_LU_piv_ops_var5(), FLA_LU_piv_opz_var5(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_Obj_width().

Referenced by FLA_LU_piv_internal().

◆ FLA_LU_piv_opz_var3()

FLA_Error FLA_LU_piv_opz_var3 ( int  m_A,
int  n_A,
dcomplex buff_A,
int  rs_A,
int  cs_A,
int *  buff_p,
int  inc_p 
)
509 {
510  FLA_Error r_val = FLA_SUCCESS;
511  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
512  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
513  int min_m_n = min( m_A, n_A );
514  int i;
515 
516  for ( i = 0; i < min_m_n; ++i )
517  {
518  dcomplex pivot_val = zzero;
519  dcomplex* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
520  dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
521  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
522  dcomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
523  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
524  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
525 
526  dcomplex* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
527 
528  int* p0 = buff_p;
529  int* pi1 = buff_p + i*inc_p;
530 
531  int m_ahead = m_A - i - 1;
532  int mn_behind = i;
533 
534  /*------------------------------------------------------------*/
535 
536  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, p0, a1 );
538  a1, rs_A, cs_A,
539  0,
540  mn_behind - 1,
541  p0, inc_p );
542 
543  // FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, A00, a01 );
547  mn_behind,
548  A00, rs_A, cs_A,
549  a01, rs_A );
550 
551  // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
553  mn_behind,
554  buff_m1,
555  a10t, cs_A,
556  a01, rs_A,
557  buff_1,
558  alpha11 );
559 
560  // FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
563  m_ahead,
564  mn_behind,
565  buff_m1,
566  A20, rs_A, cs_A,
567  a01, rs_A,
568  buff_1,
569  a21, rs_A );
570 
571  // FLA_Merge_2x1( alpha11,
572  // a21, &aB1 );
573 
574  // FLA_Amax_external( aB1, pi1 );
575  bl1_zamax( m_ahead + 1,
576  alpha11, rs_A,
577  pi1 );
578 
579  // If a null pivot is encountered, return the index.
580  pivot_val =*(alpha11 + *pi1);
581  if ( pivot_val.real == zzero.real &&
582  pivot_val.imag == zzero.imag ) r_val = ( r_val == FLA_SUCCESS ? i : r_val );
583  else
584  {
585  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, aB1 );
587  alpha11, rs_A, cs_A,
588  0,
589  0,
590  pi1, inc_p );
591 
592  // FLA_Inv_scal_external( alpha11, a21 );
594  m_ahead,
595  alpha11,
596  a21, rs_A );
597 
598  // FLA_Merge_2x1( a10t,
599  // A20, &AB0 );
600 
601  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB0 );
602  FLA_Apply_pivots_ln_opz_var1( mn_behind,
603  a10t, rs_A, cs_A,
604  0,
605  0,
606  pi1, inc_p );
607  }
608  /*------------------------------------------------------------*/
609 
610  }
611 
612  if ( m_A < n_A )
613  {
614  dcomplex* ATL = buff_A;
615  dcomplex* ATR = buff_A + m_A*cs_A;
616 
617  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, p, ATR );
618  FLA_Apply_pivots_ln_opz_var1( n_A - m_A,
619  ATR, rs_A, cs_A,
620  0,
621  m_A - 1,
622  buff_p, inc_p );
623 
624  // FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR,
625  // FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
626  // FLA_ONE, ATL, ATR );
631  m_A,
632  n_A - m_A,
633  buff_1,
634  ATL, rs_A, cs_A,
635  ATR, rs_A, cs_A );
636  }
637 
638  return r_val;
639 }
FLA_Error FLA_Apply_pivots_ln_opz_var1(int n, dcomplex *a, int a_rs, int a_cs, int k1, int k2, int *p, int incp)
Definition: FLA_Apply_pivots_ln_opt_var1.c:438
const dcomplex zzero
Definition: FLA_Init.c:39
void bl1_zamax(int n, dcomplex *x, int incx, int *index)
Definition: bl1_amax.c:46
void bl1_zdots(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *beta, dcomplex *rho)
Definition: bl1_dots.c:56
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_gemv.c:255
void bl1_zinvscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition: bl1_invscalv.c:78
void bl1_ztrsm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition: bl1_trsm.c:369
void bl1_ztrsv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx)
Definition: bl1_trsv.c:177
double real
Definition: blis_type_defs.h:139
double imag
Definition: blis_type_defs.h:139

References bl1_zamax(), bl1_zdots(), bl1_zgemv(), bl1_zinvscalv(), bl1_ztrsm(), bl1_ztrsv(), BLIS1_LEFT, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_UNIT_DIAG, FLA_Apply_pivots_ln_opz_var1(), FLA_MINUS_ONE, FLA_ONE, i, dcomplex::imag, dcomplex::real, and zzero.

Referenced by FLA_LU_piv_opt_var3().

◆ FLA_LU_piv_opz_var4()

FLA_Error FLA_LU_piv_opz_var4 ( int  m_A,
int  n_A,
dcomplex buff_A,
int  rs_A,
int  cs_A,
int *  buff_p,
int  inc_p 
)
470 {
471  FLA_Error r_val = FLA_SUCCESS;
472  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
473  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
474  int min_m_n = min( m_A, n_A );
475  int i, is_null_pivot;
476 
477  for ( i = 0; i < min_m_n; ++i )
478  {
479  dcomplex pivot_val = zzero;
480  dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
481  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
482  dcomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
483  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
484  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
485  dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
486  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
487 
488  int* pi1 = buff_p + i*inc_p;
489 
490  int m_ahead = m_A - i - 1;
491  int n_ahead = n_A - i - 1;
492  int mn_behind = i;
493 
494  /*------------------------------------------------------------*/
495 
496  // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
498  mn_behind,
499  buff_m1,
500  a10t, cs_A,
501  a01, rs_A,
502  buff_1,
503  alpha11 );
504 
505  // FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
508  m_ahead,
509  mn_behind,
510  buff_m1,
511  A20, rs_A, cs_A,
512  a01, rs_A,
513  buff_1,
514  a21, rs_A );
515 
516  // FLA_Merge_2x1( alpha11,
517  // a21, &aB1 );
518 
519  // FLA_Amax_external( aB1, pi1 );
520  bl1_zamax( m_ahead + 1,
521  alpha11, rs_A,
522  pi1 );
523 
524  // If a null pivot is encountered, return the index.
525  pivot_val =*(alpha11 + *pi1);
526 
527  is_null_pivot = (pivot_val.real == zzero.real && pivot_val.imag == zzero.imag);
528  if ( is_null_pivot )
529  {
530  r_val = ( r_val == FLA_SUCCESS ? i : r_val );
531  }
532  else
533  {
534  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, aB1 );
536  alpha11, rs_A, cs_A,
537  0,
538  0,
539  pi1, inc_p );
540 
541  // FLA_Merge_2x1( a10t,
542  // A20, &AB0 );
543 
544  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB0 );
545  FLA_Apply_pivots_ln_opz_var1( mn_behind,
546  a10t, rs_A, cs_A,
547  0,
548  0,
549  pi1, inc_p );
550 
551  // FLA_Merge_2x1( a12t,
552  // A22, &AB2 );
553 
554  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB2 );
556  a12t, rs_A, cs_A,
557  0,
558  0,
559  pi1, inc_p );
560  }
561 
562  // FLA_Gemv_external( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, a10t, FLA_ONE, a12t );
565  mn_behind,
566  n_ahead,
567  buff_m1,
568  A02, rs_A, cs_A,
569  a10t, cs_A,
570  buff_1,
571  a12t, cs_A );
572 
573  if ( ! is_null_pivot )
574  {
575  // FLA_Inv_scal_external( alpha11, a21 );
577  m_ahead,
578  alpha11,
579  a21, rs_A );
580  }
581  /*------------------------------------------------------------*/
582 
583  }
584 
585  return r_val;
586 }

References bl1_zamax(), bl1_zdots(), bl1_zgemv(), bl1_zinvscalv(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_Apply_pivots_ln_opz_var1(), FLA_MINUS_ONE, FLA_ONE, i, dcomplex::imag, dcomplex::real, and zzero.

Referenced by FLA_LU_piv_opt_var4().

◆ FLA_LU_piv_opz_var5()

FLA_Error FLA_LU_piv_opz_var5 ( int  m_A,
int  n_A,
dcomplex buff_A,
int  rs_A,
int  cs_A,
int *  buff_p,
int  inc_p 
)
318 {
319  FLA_Error r_val = FLA_SUCCESS;
320  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
321  int min_m_n = min( m_A, n_A );
322  int i;
323 
324  for ( i = 0; i < min_m_n; ++i )
325  {
326  dcomplex pivot_val = zzero;
327  dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
328  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
329  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
330  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
331  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
332 
333  int* pi1 = buff_p + i*inc_p;
334 
335  int m_ahead = m_A - i - 1;
336  int n_ahead = n_A - i - 1;
337 
338  /*------------------------------------------------------------*/
339 
340  // FLA_Merge_2x1( alpha11,
341  // a21, &aB1 );
342 
343  // FLA_Amax_external( aB1, pi1 );
344  bl1_zamax( m_ahead + 1,
345  alpha11, rs_A,
346  pi1 );
347 
348  // If a null pivot is encountered, return the index.
349  pivot_val = *(alpha11 + *pi1);
350  if ( pivot_val.real == zzero.real &&
351  pivot_val.imag == zzero.imag ) r_val = ( r_val == FLA_SUCCESS ? i : r_val );
352  else
353  {
354  // FLA_Merge_1x2( ABL, ABR, &AB );
355 
356  // FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB );
358  a10t, rs_A, cs_A,
359  0,
360  0,
361  pi1, inc_p );
362 
363  // FLA_Inv_scal_external( alpha11, a21 );
365  m_ahead,
366  alpha11,
367  a21, rs_A );
368  }
369  // FLA_Ger_external( FLA_MINUS_ONE, a21, a12t, A22 );
372  m_ahead,
373  n_ahead,
374  buff_m1,
375  a21, rs_A,
376  a12t, cs_A,
377  A22, rs_A, cs_A );
378 
379  /*------------------------------------------------------------*/
380 
381  }
382 
383  return r_val;
384 }
void bl1_zger(conj1_t conjx, conj1_t conjy, int m, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_ger.c:194

References bl1_zamax(), bl1_zger(), bl1_zinvscalv(), BLIS1_NO_CONJUGATE, FLA_Apply_pivots_ln_opz_var1(), FLA_MINUS_ONE, i, dcomplex::imag, dcomplex::real, and zzero.

Referenced by FLA_LU_piv_opt_var5().

◆ FLA_LU_piv_unb_var3()

FLA_Error FLA_LU_piv_unb_var3 ( FLA_Obj  A,
FLA_Obj  p 
)
16 {
17  FLA_Obj ATL, ATR, A00, a01, A02,
18  ABL, ABR, a10t, alpha11, a12t,
19  A20, a21, A22;
20 
21  FLA_Obj AL, AR, A0, a1, A2;
22 
23  FLA_Obj pT, p0,
24  pB, pi1,
25  p2;
26 
27  FLA_Obj AB0, aB1;
28 
29 
30  FLA_Part_2x2( A, &ATL, &ATR,
31  &ABL, &ABR, 0, 0, FLA_TL );
32 
33  FLA_Part_1x2( A, &AL, &AR, 0, FLA_LEFT );
34 
35  FLA_Part_2x1( p, &pT,
36  &pB, 0, FLA_TOP );
37 
38  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) &&
39  FLA_Obj_width( ATL ) < FLA_Obj_width( A )){
40 
41  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
42  /* ************* */ /* ************************** */
43  &a10t, /**/ &alpha11, &a12t,
44  ABL, /**/ ABR, &A20, /**/ &a21, &A22,
45  1, 1, FLA_BR );
46 
47  FLA_Repart_1x2_to_1x3( AL, /**/ AR, &A0, /**/ &a1, &A2,
48  1, FLA_RIGHT );
49 
50  FLA_Repart_2x1_to_3x1( pT, &p0,
51  /* ** */ /* *** */
52  &pi1,
53  pB, &p2, 1, FLA_BOTTOM );
54 
55  /*------------------------------------------------------------*/
56 
57  // Apply previously computed pivots
58  FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, p0, a1 );
59 
60  // a01 = trilu( A00 ) \ a01
61  FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, A00, a01 );
62 
63  // alpha11 = alpha11 - a10t * a01
64  FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
65 
66  // a21 = a21 - A20 * a01
67  FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
68 
69  // aB1 = / alpha11 \
70  // \ a21 /
71  FLA_Merge_2x1( alpha11,
72  a21, &aB1 );
73 
74  // Determine pivot index
75  FLA_Amax_external( aB1, pi1 );
76 
77  // Apply pivots to current column
78  FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, aB1 );
79 
80  // a21 = a21 / alpha11
81  FLA_Inv_scal_external( alpha11, a21 );
82 
83  // AB0 = / a10t \
84  // \ A20 /
85  FLA_Merge_2x1( a10t,
86  A20, &AB0 );
87 
88  // Apply pivots to previous columns
89  FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB0 );
90 
91  /*------------------------------------------------------------*/
92 
93  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
94  a10t, alpha11, /**/ a12t,
95  /* ************** */ /* ************************ */
96  &ABL, /**/ &ABR, A20, a21, /**/ A22,
97  FLA_TL );
98 
99  FLA_Cont_with_1x3_to_1x2( &AL, /**/ &AR, A0, a1, /**/ A2,
100  FLA_LEFT );
101 
102  FLA_Cont_with_3x1_to_2x1( &pT, p0,
103  pi1,
104  /* ** */ /* *** */
105  &pB, p2, FLA_TOP );
106 
107  }
108 
109  if ( FLA_Obj_width( ATR ) > 0 )
110  {
111  /* Apply pivots to untouched columns */
112  FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, p, ATR );
113 
114  /* ATR = trilu( ATL ) \ ATR */
115  FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR,
116  FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
117  FLA_ONE, ATL, ATR );
118  }
119 
120  return FLA_SUCCESS;
121 }
FLA_Error FLA_Amax_external(FLA_Obj x, FLA_Obj index)
Definition: FLA_Amax_external.c:13
FLA_Error FLA_Inv_scal_external(FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Inv_scal_external.c:13
FLA_Error FLA_Dots_external(FLA_Obj alpha, FLA_Obj x, FLA_Obj y, FLA_Obj beta, FLA_Obj rho)
Definition: FLA_Dots_external.c:13
FLA_Error FLA_Gemv_external(FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y)
Definition: FLA_Gemv_external.c:13
FLA_Error FLA_Trsv_external(FLA_Uplo uplo, FLA_Trans transa, FLA_Diag diag, FLA_Obj A, FLA_Obj x)
Definition: FLA_Trsv_external.c:13
FLA_Error FLA_Apply_pivots(FLA_Side side, FLA_Trans trans, FLA_Obj p, FLA_Obj A)
Definition: FLA_Apply_pivots.c:15

References FLA_Amax_external(), FLA_Apply_pivots(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Dots_external(), FLA_Gemv_external(), FLA_Inv_scal_external(), FLA_Merge_2x1(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Trsm_external(), and FLA_Trsv_external().

Referenced by FLA_LU_piv_internal().

◆ FLA_LU_piv_unb_var3b()

FLA_Error FLA_LU_piv_unb_var3b ( FLA_Obj  A,
FLA_Obj  p 
)
16 {
17  FLA_Obj ATL, ATR, A00, a01, A02,
18  ABL, ABR, a10t, alpha11, a12t,
19  A20, a21, A22;
20 
21  FLA_Obj pT, p0,
22  pB, pi1,
23  p2;
24 
25  FLA_Obj AB0, aB1, AB2;
26 
27 
28  FLA_Part_2x2( A, &ATL, &ATR,
29  &ABL, &ABR, 0, 0, FLA_TL );
30 
31  FLA_Part_2x1( p, &pT,
32  &pB, 0, FLA_TOP );
33 
34  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) &&
35  FLA_Obj_width( ATL ) < FLA_Obj_width( A )){
36 
37  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
38  /* ************* */ /* ************************** */
39  &a10t, /**/ &alpha11, &a12t,
40  ABL, /**/ ABR, &A20, /**/ &a21, &A22,
41  1, 1, FLA_BR );
42 
43  FLA_Repart_2x1_to_3x1( pT, &p0,
44  /* ** */ /* *** */
45  &pi1,
46  pB, &p2, 1, FLA_BOTTOM );
47 
48  /*------------------------------------------------------------*/
49 
50  // a01 = trilu( A00 ) \ a01
51  FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, A00, a01 );
52 
53  // alpha11 = alpha11 - a10t * a01
54  FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
55 
56  // a21 = a21 - A20 * a01
57  FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
58 
59  // aB1 = / alpha11 \
60  // \ a21 /
61  FLA_Merge_2x1( alpha11,
62  a21, &aB1 );
63 
64  // Determine pivot index
65  FLA_Amax_external( aB1, pi1 );
66 
67  // Apply pivots to current column
68  FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, aB1 );
69 
70  // AB0 = / a10t \
71  // \ A20 /
72  FLA_Merge_2x1( a10t,
73  A20, &AB0 );
74 
75  // AB2 = / a12t \
76  // \ A22 /
77  FLA_Merge_2x1( a12t,
78  A22, &AB2 );
79 
80  // Apply pivots to remaining columns
81  FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB0 );
82  FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB2 );
83 
84  // a21 = a21 / alpha11
85  FLA_Inv_scal_external( alpha11, a21 );
86 
87  /*------------------------------------------------------------*/
88 
89  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
90  a10t, alpha11, /**/ a12t,
91  /* ************** */ /* ************************ */
92  &ABL, /**/ &ABR, A20, a21, /**/ A22,
93  FLA_TL );
94 
95  FLA_Cont_with_3x1_to_2x1( &pT, p0,
96  pi1,
97  /* ** */ /* *** */
98  &pB, p2, FLA_TOP );
99 
100  }
101 
102  if ( FLA_Obj_width( ATR ) > 0 )
103  {
104  /* ATR = trilu( ATL ) \ ATR */
105  FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR,
106  FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
107  FLA_ONE, ATL, ATR );
108  }
109 
110  return FLA_SUCCESS;
111 }

References FLA_Amax_external(), FLA_Apply_pivots(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Dots_external(), FLA_Gemv_external(), FLA_Inv_scal_external(), FLA_Merge_2x1(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Trsm_external(), and FLA_Trsv_external().

◆ FLA_LU_piv_unb_var4()

FLA_Error FLA_LU_piv_unb_var4 ( FLA_Obj  A,
FLA_Obj  p 
)
16 {
17  FLA_Obj ATL, ATR, A00, a01, A02,
18  ABL, ABR, a10t, alpha11, a12t,
19  A20, a21, A22;
20 
21  FLA_Obj pT, p0,
22  pB, pi1,
23  p2;
24 
25  FLA_Obj AB0, aB1, AB2;
26 
27 
28  FLA_Part_2x2( A, &ATL, &ATR,
29  &ABL, &ABR, 0, 0, FLA_TL );
30 
31  FLA_Part_2x1( p, &pT,
32  &pB, 0, FLA_TOP );
33 
34  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) &&
35  FLA_Obj_width( ATL ) < FLA_Obj_width( A )){
36 
37  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
38  /* ************* */ /* ************************** */
39  &a10t, /**/ &alpha11, &a12t,
40  ABL, /**/ ABR, &A20, /**/ &a21, &A22,
41  1, 1, FLA_BR );
42 
43  FLA_Repart_2x1_to_3x1( pT, &p0,
44  /* ** */ /* *** */
45  &pi1,
46  pB, &p2, 1, FLA_BOTTOM );
47 
48  /*------------------------------------------------------------*/
49 
50  // alpha11 = alpha11 - a10t * a01
51  FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
52 
53  // a21 = a21 - A20 * a01
54  FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
55 
56  // aB1 = / alpha11 \
57  // \ a21 /
58  FLA_Merge_2x1( alpha11,
59  a21, &aB1 );
60 
61  // Determine pivot index
62  FLA_Amax_external( aB1, pi1 );
63 
64  // Apply pivots to current column
65  FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, aB1 );
66 
67  // AB0 = / a10t \
68  // \ A20 /
69  FLA_Merge_2x1( a10t,
70  A20, &AB0 );
71 
72  // AB2 = / a12t \
73  // \ A22 /
74  FLA_Merge_2x1( a12t,
75  A22, &AB2 );
76 
77  // Apply pivots to remaining columns
78  FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB0 );
79  FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB2 );
80 
81  // a21 = a21 / alpha11
82  FLA_Inv_scal_external( alpha11, a21 );
83 
84  // a12t = a12t - a10t * A02
85  FLA_Gemv_external( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, a10t, FLA_ONE, a12t );
86 
87  /*------------------------------------------------------------*/
88 
89  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
90  a10t, alpha11, /**/ a12t,
91  /* ************** */ /* ************************ */
92  &ABL, /**/ &ABR, A20, a21, /**/ A22,
93  FLA_TL );
94 
95  FLA_Cont_with_3x1_to_2x1( &pT, p0,
96  pi1,
97  /* ** */ /* *** */
98  &pB, p2, FLA_TOP );
99 
100  }
101 
102  return FLA_SUCCESS;
103 }

References FLA_Amax_external(), FLA_Apply_pivots(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Dots_external(), FLA_Gemv_external(), FLA_Inv_scal_external(), FLA_Merge_2x1(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), and FLA_Repart_2x2_to_3x3().

Referenced by FLA_LU_piv_internal().

◆ FLA_LU_piv_unb_var5()

FLA_Error FLA_LU_piv_unb_var5 ( FLA_Obj  A,
FLA_Obj  p 
)
16 {
17  FLA_Obj ATL, ATR, A00, a01, A02,
18  ABL, ABR, a10t, alpha11, a12t,
19  A20, a21, A22;
20 
21  FLA_Obj pT, p0,
22  pB, pi1,
23  p2;
24 
25  FLA_Obj AB0, aB1, AB2;
26 
27 
28  FLA_Part_2x2( A, &ATL, &ATR,
29  &ABL, &ABR, 0, 0, FLA_TL );
30 
31  FLA_Part_2x1( p, &pT,
32  &pB, 0, FLA_TOP );
33 
34  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) &&
35  FLA_Obj_width( ATL ) < FLA_Obj_width( A )){
36 
37  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
38  /* ************* */ /* ************************** */
39  &a10t, /**/ &alpha11, &a12t,
40  ABL, /**/ ABR, &A20, /**/ &a21, &A22,
41  1, 1, FLA_BR );
42 
43  FLA_Repart_2x1_to_3x1( pT, &p0,
44  /* ** */ /* *** */
45  &pi1,
46  pB, &p2, 1, FLA_BOTTOM );
47 
48  /*------------------------------------------------------------*/
49 
50  // aB1 = / alpha11 \
51  // \ a21 /
52  FLA_Merge_2x1( alpha11,
53  a21, &aB1 );
54 
55  // Determine pivot index
56  FLA_Amax_external( aB1, pi1 );
57 
58  // Apply pivots to current column
59  FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, aB1 );
60 
61  // a21 = a21 / alpha11
62  FLA_Inv_scal_external( alpha11, a21 );
63 
64  // AB0 = / a10t \
65  // \ A20 /
66  FLA_Merge_2x1( a10t,
67  A20, &AB0 );
68 
69  // Apply pivots to previous columns
70  FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB0 );
71 
72  // AB2 = / a12t \
73  // \ A22 /
74  FLA_Merge_2x1( a12t,
75  A22, &AB2 );
76 
77  // Apply pivots to remaining columns
78  FLA_Apply_pivots( FLA_LEFT, FLA_NO_TRANSPOSE, pi1, AB2 );
79 
80  // A22 = A22 - a21 * a12t
81  FLA_Ger_external( FLA_MINUS_ONE, a21, a12t, A22 );
82 
83  /*------------------------------------------------------------*/
84 
85  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
86  a10t, alpha11, /**/ a12t,
87  /* ************** */ /* ************************ */
88  &ABL, /**/ &ABR, A20, a21, /**/ A22,
89  FLA_TL );
90 
91  FLA_Cont_with_3x1_to_2x1( &pT, p0,
92  pi1,
93  /* ** */ /* *** */
94  &pB, p2, FLA_TOP );
95 
96  }
97 
98  return FLA_SUCCESS;
99 }
FLA_Error FLA_Ger_external(FLA_Obj alpha, FLA_Obj x, FLA_Obj y, FLA_Obj A)
Definition: FLA_Ger_external.c:13

References FLA_Amax_external(), FLA_Apply_pivots(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Ger_external(), FLA_Inv_scal_external(), FLA_Merge_2x1(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_Obj_width(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), and FLA_Repart_2x2_to_3x3().

Referenced by FLA_LU_piv_internal().