libflame  revision_anchor
Functions
FLA_LU_nopiv_vars.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLA_LU_nopiv_blk_var1 (FLA_Obj A, fla_lu_t *cntl)
 
FLA_Error FLA_LU_nopiv_blk_var2 (FLA_Obj A, fla_lu_t *cntl)
 
FLA_Error FLA_LU_nopiv_blk_var3 (FLA_Obj A, fla_lu_t *cntl)
 
FLA_Error FLA_LU_nopiv_blk_var4 (FLA_Obj A, fla_lu_t *cntl)
 
FLA_Error FLA_LU_nopiv_blk_var5 (FLA_Obj A, fla_lu_t *cntl)
 
FLA_Error FLA_LU_nopiv_unb_var1 (FLA_Obj A)
 
FLA_Error FLA_LU_nopiv_unb_var2 (FLA_Obj A)
 
FLA_Error FLA_LU_nopiv_unb_var3 (FLA_Obj A)
 
FLA_Error FLA_LU_nopiv_unb_var4 (FLA_Obj A)
 
FLA_Error FLA_LU_nopiv_unb_var5 (FLA_Obj A)
 
FLA_Error FLA_LU_nopiv_opt_var1 (FLA_Obj A)
 
FLA_Error FLA_LU_nopiv_ops_var1 (int m_A, int n_A, float *A, int rs_A, int cs_A)
 
FLA_Error FLA_LU_nopiv_opd_var1 (int m_A, int n_A, double *A, int rs_A, int cs_A)
 
FLA_Error FLA_LU_nopiv_opc_var1 (int m_A, int n_A, scomplex *A, int rs_A, int cs_A)
 
FLA_Error FLA_LU_nopiv_opz_var1 (int m_A, int n_A, dcomplex *A, int rs_A, int cs_A)
 
FLA_Error FLA_LU_nopiv_opt_var2 (FLA_Obj A)
 
FLA_Error FLA_LU_nopiv_ops_var2 (int m_A, int n_A, float *A, int rs_A, int cs_A)
 
FLA_Error FLA_LU_nopiv_opd_var2 (int m_A, int n_A, double *A, int rs_A, int cs_A)
 
FLA_Error FLA_LU_nopiv_opc_var2 (int m_A, int n_A, scomplex *A, int rs_A, int cs_A)
 
FLA_Error FLA_LU_nopiv_opz_var2 (int m_A, int n_A, dcomplex *A, int rs_A, int cs_A)
 
FLA_Error FLA_LU_nopiv_opt_var3 (FLA_Obj A)
 
FLA_Error FLA_LU_nopiv_ops_var3 (int m_A, int n_A, float *A, int rs_A, int cs_A)
 
FLA_Error FLA_LU_nopiv_opd_var3 (int m_A, int n_A, double *A, int rs_A, int cs_A)
 
FLA_Error FLA_LU_nopiv_opc_var3 (int m_A, int n_A, scomplex *A, int rs_A, int cs_A)
 
FLA_Error FLA_LU_nopiv_opz_var3 (int m_A, int n_A, dcomplex *A, int rs_A, int cs_A)
 
FLA_Error FLA_LU_nopiv_opt_var4 (FLA_Obj A)
 
FLA_Error FLA_LU_nopiv_ops_var4 (int m_A, int n_A, float *A, int rs_A, int cs_A)
 
FLA_Error FLA_LU_nopiv_opd_var4 (int m_A, int n_A, double *A, int rs_A, int cs_A)
 
FLA_Error FLA_LU_nopiv_opc_var4 (int m_A, int n_A, scomplex *A, int rs_A, int cs_A)
 
FLA_Error FLA_LU_nopiv_opz_var4 (int m_A, int n_A, dcomplex *A, int rs_A, int cs_A)
 
FLA_Error FLA_LU_nopiv_opt_var5 (FLA_Obj A)
 
FLA_Error FLA_LU_nopiv_ops_var5 (int m_A, int n_A, float *A, int rs_A, int cs_A)
 
FLA_Error FLA_LU_nopiv_opd_var5 (int m_A, int n_A, double *A, int rs_A, int cs_A)
 
FLA_Error FLA_LU_nopiv_opc_var5 (int m_A, int n_A, scomplex *A, int rs_A, int cs_A)
 
FLA_Error FLA_LU_nopiv_opz_var5 (int m_A, int n_A, dcomplex *A, int rs_A, int cs_A)
 

Function Documentation

◆ FLA_LU_nopiv_blk_var1()

FLA_Error FLA_LU_nopiv_blk_var1 ( FLA_Obj  A,
fla_lu_t cntl 
)
16 {
17  FLA_Obj ATL, ATR, A00, A01, A02,
18  ABL, ABR, A10, A11, A12,
19  A20, A21, A22;
20 
21  dim_t b;
22 
23 
24  FLA_Part_2x2( A, &ATL, &ATR,
25  &ABL, &ABR, 0, 0, FLA_TL );
26 
27  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) &&
28  FLA_Obj_width( ATL ) < FLA_Obj_width( A )){
29 
30  b = FLA_Determine_blocksize( ABR, FLA_BR, FLA_Cntl_blocksize( cntl ) );
31 
32  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
33  /* ************* */ /* ******************** */
34  &A10, /**/ &A11, &A12,
35  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
36  b, b, FLA_BR );
37 
38  /*------------------------------------------------------------*/
39 
40  // A01 = trilu( A00 ) \ A01
41  FLA_Trsm_internal( FLA_LEFT, FLA_LOWER_TRIANGULAR,
42  FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
43  FLA_ONE, A00, A01,
44  FLA_Cntl_sub_trsm1( cntl ) );
45 
46  // A10 = A10 / triu( A00 )
47  FLA_Trsm_internal( FLA_RIGHT, FLA_UPPER_TRIANGULAR,
48  FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
49  FLA_ONE, A00, A10,
50  FLA_Cntl_sub_trsm2( cntl ) );
51 
52  // A11 = LU( A11 - A10 * A01 )
53  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
54  FLA_MINUS_ONE, A10, A01, FLA_ONE, A11,
55  FLA_Cntl_sub_gemm1( cntl ) );
56 
57  // A11 = LU_nopiv( A11 )
58  FLA_LU_nopiv_internal( A11, FLA_Cntl_sub_lu( cntl ) );
59 
60  /*------------------------------------------------------------*/
61 
62  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
63  A10, A11, /**/ A12,
64  /* ************** */ /* ****************** */
65  &ABL, /**/ &ABR, A20, A21, /**/ A22,
66  FLA_TL );
67  }
68 
69  if ( FLA_Obj_length( ABL ) > 0 )
70  // ABL = ABL / triu( ATL )
71  FLA_Trsm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR,
72  FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
73  FLA_ONE, ATL, ABL );
74  else if ( FLA_Obj_width( ATR ) > 0 )
75  // ATR = trilu( ATL ) \ ATR
76  FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR,
77  FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
78  FLA_ONE, ATL, ATR );
79 
80  return FLA_SUCCESS;
81 }
FLA_Error FLA_Gemm_internal(FLA_Trans transa, FLA_Trans transb, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_gemm_t *cntl)
Definition: FLA_Gemm_internal.c:16
FLA_Error FLA_LU_nopiv_internal(FLA_Obj A, fla_lu_t *cntl)
Definition: FLA_LU_nopiv_internal.c:16
FLA_Error FLA_Trsm_internal(FLA_Side side, FLA_Uplo uplo, FLA_Trans transa, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_trsm_t *cntl)
Definition: FLA_Trsm_internal.c:16
FLA_Error FLA_Trsm_external(FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj B)
Definition: FLA_Trsm_external.c:13
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition: FLA_View.c:304
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:17
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:142
dim_t FLA_Determine_blocksize(FLA_Obj A_unproc, FLA_Quadrant to_dir, fla_blocksize_t *cntl_blocksizes)
Definition: FLA_Blocksize.c:234
unsigned long dim_t
Definition: FLA_type_defs.h:71
Definition: FLA_type_defs.h:159

References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_LU_nopiv_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), FLA_Trsm_external(), and FLA_Trsm_internal().

Referenced by FLA_LU_nopiv_internal().

◆ FLA_LU_nopiv_blk_var2()

FLA_Error FLA_LU_nopiv_blk_var2 ( FLA_Obj  A,
fla_lu_t cntl 
)
16 {
17  FLA_Obj ATL, ATR, A00, A01, A02,
18  ABL, ABR, A10, A11, A12,
19  A20, A21, A22;
20 
21  dim_t b;
22 
23 
24  FLA_Part_2x2( A, &ATL, &ATR,
25  &ABL, &ABR, 0, 0, FLA_TL );
26 
27  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) &&
28  FLA_Obj_width( ATL ) < FLA_Obj_width( A )){
29 
30  b = FLA_Determine_blocksize( ABR, FLA_BR, FLA_Cntl_blocksize( cntl ) );
31 
32  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
33  /* ************* */ /* ******************** */
34  &A10, /**/ &A11, &A12,
35  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
36  b, b, FLA_BR );
37 
38  /*------------------------------------------------------------*/
39 
40  // A10 = A10 / triu( A00 )
41  FLA_Trsm_internal( FLA_RIGHT, FLA_UPPER_TRIANGULAR,
42  FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
43  FLA_ONE, A00, A10,
44  FLA_Cntl_sub_trsm1( cntl ) );
45 
46  // A11 = LU( A11 - A10 * A01 )
47  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
48  FLA_MINUS_ONE, A10, A01, FLA_ONE, A11,
49  FLA_Cntl_sub_gemm1( cntl ) );
50 
51  // A11 = LU_nopiv( A11 )
52  FLA_LU_nopiv_internal( A11, FLA_Cntl_sub_lu( cntl ) );
53 
54  // A12 = A12 - A10 * A02
55  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
56  FLA_MINUS_ONE, A10, A02, FLA_ONE, A12,
57  FLA_Cntl_sub_gemm2( cntl ) );
58 
59  // A12 = trilu( A11 ) \ A12
60  FLA_Trsm_internal( FLA_LEFT, FLA_LOWER_TRIANGULAR,
61  FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
62  FLA_ONE, A11, A12,
63  FLA_Cntl_sub_trsm2( cntl ) );
64 
65  /*------------------------------------------------------------*/
66 
67  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
68  A10, A11, /**/ A12,
69  /* ************** */ /* ****************** */
70  &ABL, /**/ &ABR, A20, A21, /**/ A22,
71  FLA_TL );
72  }
73 
74  if ( FLA_Obj_length( ABL ) > 0 )
75  // ABL = ABL / triu( ATL )
76  FLA_Trsm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR,
77  FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
78  FLA_ONE, ATL, ABL );
79 
80  return FLA_SUCCESS;
81 }

References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_LU_nopiv_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), FLA_Trsm_external(), and FLA_Trsm_internal().

Referenced by FLA_LU_nopiv_internal().

◆ FLA_LU_nopiv_blk_var3()

FLA_Error FLA_LU_nopiv_blk_var3 ( FLA_Obj  A,
fla_lu_t cntl 
)
16 {
17  FLA_Obj ATL, ATR, A00, A01, A02,
18  ABL, ABR, A10, A11, A12,
19  A20, A21, A22;
20 
21  dim_t b;
22 
23 
24  FLA_Part_2x2( A, &ATL, &ATR,
25  &ABL, &ABR, 0, 0, FLA_TL );
26 
27  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) &&
28  FLA_Obj_width( ATL ) < FLA_Obj_width( A )){
29 
30  b = FLA_Determine_blocksize( ABR, FLA_BR, FLA_Cntl_blocksize( cntl ) );
31 
32  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
33  /* ************* */ /* ******************** */
34  &A10, /**/ &A11, &A12,
35  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
36  b, b, FLA_BR );
37 
38  /*------------------------------------------------------------*/
39 
40  // A01 = trilu( A00 ) \ A10
41  FLA_Trsm_internal( FLA_LEFT, FLA_LOWER_TRIANGULAR,
42  FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
43  FLA_ONE, A00, A01,
44  FLA_Cntl_sub_trsm1( cntl ) );
45 
46  // A11 = LU( A11 - A10 * A01 )
47  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
48  FLA_MINUS_ONE, A10, A01, FLA_ONE, A11,
49  FLA_Cntl_sub_gemm1( cntl ) );
50 
51  // A11 = LU_nopiv( A11 )
52  FLA_LU_nopiv_internal( A11, FLA_Cntl_sub_lu( cntl ) );
53 
54  // A21 = A21 - A20 * A01
55  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
56  FLA_MINUS_ONE, A20, A01, FLA_ONE, A21,
57  FLA_Cntl_sub_gemm2( cntl ) );
58 
59  // A21 = A21 / triu( A11 )
60  FLA_Trsm_internal( FLA_RIGHT, FLA_UPPER_TRIANGULAR,
61  FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
62  FLA_ONE, A11, A21,
63  FLA_Cntl_sub_trsm2( cntl ) );
64 
65  /*------------------------------------------------------------*/
66 
67  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
68  A10, A11, /**/ A12,
69  /* ************** */ /* ****************** */
70  &ABL, /**/ &ABR, A20, A21, /**/ A22,
71  FLA_TL );
72  }
73 
74  if ( FLA_Obj_width( ATR ) > 0 )
75  /* ATR = trilu( ATL ) \ ATR */
76  FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR,
77  FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
78  FLA_ONE, ATL, ATR );
79 
80  return FLA_SUCCESS;
81 }

References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_LU_nopiv_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), FLA_Trsm_external(), and FLA_Trsm_internal().

Referenced by FLA_LU_nopiv_internal().

◆ FLA_LU_nopiv_blk_var4()

FLA_Error FLA_LU_nopiv_blk_var4 ( FLA_Obj  A,
fla_lu_t cntl 
)
16 {
17  FLA_Obj ATL, ATR, A00, A01, A02,
18  ABL, ABR, A10, A11, A12,
19  A20, A21, A22;
20 
21  dim_t b;
22 
23 
24  FLA_Part_2x2( A, &ATL, &ATR,
25  &ABL, &ABR, 0, 0, FLA_TL );
26 
27  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) &&
28  FLA_Obj_width( ATL ) < FLA_Obj_width( A )){
29 
30  b = FLA_Determine_blocksize( ABR, FLA_BR, FLA_Cntl_blocksize( cntl ) );
31 
32  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
33  /* ************* */ /* ******************** */
34  &A10, /**/ &A11, &A12,
35  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
36  b, b, FLA_BR );
37 
38  /*------------------------------------------------------------*/
39 
40  // A11 = LU( A11 - A10 * A01 )
41  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
42  FLA_MINUS_ONE, A10, A01, FLA_ONE, A11,
43  FLA_Cntl_sub_gemm1( cntl ) );
44 
45  // A11 = LU_nopiv( A11 )
46  FLA_LU_nopiv_internal( A11, FLA_Cntl_sub_lu( cntl ) );
47 
48  // A12 = A12 - A10 * A02
49  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
50  FLA_MINUS_ONE, A10, A02, FLA_ONE, A12,
51  FLA_Cntl_sub_gemm2( cntl ) );
52 
53  // A12 = trilu( A11 ) \ A12
54  FLA_Trsm_internal( FLA_LEFT, FLA_LOWER_TRIANGULAR,
55  FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
56  FLA_ONE, A11, A12,
57  FLA_Cntl_sub_trsm1( cntl ) );
58 
59  // A21 = A21 - A20 * A01
60  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
61  FLA_MINUS_ONE, A20, A01, FLA_ONE, A21,
62  FLA_Cntl_sub_gemm3( cntl ) );
63 
64  // A21 = A21 / triu( A11 )
65  FLA_Trsm_internal( FLA_RIGHT, FLA_UPPER_TRIANGULAR,
66  FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
67  FLA_ONE, A11, A21,
68  FLA_Cntl_sub_trsm2( cntl ) );
69 
70  /*------------------------------------------------------------*/
71 
72  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
73  A10, A11, /**/ A12,
74  /* ************** */ /* ****************** */
75  &ABL, /**/ &ABR, A20, A21, /**/ A22,
76  FLA_TL );
77  }
78 
79  return FLA_SUCCESS;
80 }

References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_LU_nopiv_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Trsm_internal().

Referenced by FLA_LU_nopiv_internal().

◆ FLA_LU_nopiv_blk_var5()

FLA_Error FLA_LU_nopiv_blk_var5 ( FLA_Obj  A,
fla_lu_t cntl 
)
14 {
15  FLA_Obj ATL, ATR, A00, A01, A02,
16  ABL, ABR, A10, A11, A12,
17  A20, A21, A22;
18 
19  dim_t b;
20 
21 
22  FLA_Part_2x2( A, &ATL, &ATR,
23  &ABL, &ABR, 0, 0, FLA_TL );
24 
25  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) &&
26  FLA_Obj_width( ATL ) < FLA_Obj_width( A )){
27 
28  b = FLA_Determine_blocksize( ABR, FLA_BR, FLA_Cntl_blocksize( cntl ) );
29 
30  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
31  /* ************* */ /* ******************** */
32  &A10, /**/ &A11, &A12,
33  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
34  b, b, FLA_BR );
35 
36  /*------------------------------------------------------------*/
37 
38  // A11 = LU_nopiv( A11 )
39  FLA_LU_nopiv_internal( A11, FLA_Cntl_sub_lu( cntl ) );
40 
41  // A12 = trilu( A11 ) \ A12
42  FLA_Trsm_internal( FLA_LEFT, FLA_LOWER_TRIANGULAR,
43  FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
44  FLA_ONE, A11, A12,
45  FLA_Cntl_sub_trsm1( cntl ) );
46 
47  // A21 = A21 / triu( A11 )
48  FLA_Trsm_internal( FLA_RIGHT, FLA_UPPER_TRIANGULAR,
49  FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
50  FLA_ONE, A11, A21,
51  FLA_Cntl_sub_trsm2( cntl ) );
52 
53  // A22 = A22 - A21 * A12
54  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
55  FLA_MINUS_ONE, A21, A12, FLA_ONE, A22,
56  FLA_Cntl_sub_gemm1( cntl ) );
57 
58  /*------------------------------------------------------------*/
59 
60  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
61  A10, A11, /**/ A12,
62  /* ************** */ /* ****************** */
63  &ABL, /**/ &ABR, A20, A21, /**/ A22,
64  FLA_TL );
65  }
66 
67  return FLA_SUCCESS;
68 }

References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_LU_nopiv_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Trsm_internal().

Referenced by FLA_LU_nopiv_internal().

◆ FLA_LU_nopiv_opc_var1()

FLA_Error FLA_LU_nopiv_opc_var1 ( int  m_A,
int  n_A,
scomplex A,
int  rs_A,
int  cs_A 
)
266 {
267  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
268  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
269  int min_m_n = min( m_A, n_A );
270  int i;
271 
272  for ( i = 0; i < min_m_n; ++i )
273  {
274  scomplex* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
275  scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
276  scomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
277  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
278 
279  int mn_behind = i;
280 
281  /*------------------------------------------------------------*/
282 
283  // FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, A00, a01 );
287  mn_behind,
288  A00, rs_A, cs_A,
289  a01, rs_A );
290 
291  // FLA_Trsv_external( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, A00, a10t );
295  mn_behind,
296  A00, rs_A, cs_A,
297  a10t, cs_A );
298 
299  // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
301  mn_behind,
302  buff_m1,
303  a10t, cs_A,
304  a01, rs_A,
305  buff_1,
306  alpha11 );
307 
308  /*------------------------------------------------------------*/
309 
310  }
311 
312  if ( m_A > n_A )
313  {
314  scomplex* ATL = buff_A;
315  scomplex* ABL = buff_A + n_A*rs_A;
316 
317  // FLA_Trsm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR,
318  // FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
319  // FLA_ONE, ATL, ABL );
324  m_A - n_A,
325  n_A,
326  buff_1,
327  ATL, rs_A, cs_A,
328  ABL, rs_A, cs_A );
329  }
330  else if ( m_A < n_A )
331  {
332  scomplex* ATL = buff_A;
333  scomplex* ATR = buff_A + m_A*cs_A;
334 
335  // FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR,
336  // FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
337  // FLA_ONE, ATL, ATR );
342  m_A,
343  n_A - m_A,
344  buff_1,
345  ATL, rs_A, cs_A,
346  ATR, rs_A, cs_A );
347  }
348 
349  return FLA_SUCCESS;
350 }
int i
Definition: bl1_axmyv2.c:145
void bl1_cdots(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *beta, scomplex *rho)
Definition: bl1_dots.c:39
void bl1_ctrsm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *b, int b_rs, int b_cs)
Definition: bl1_trsm.c:219
void bl1_ctrsv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx)
Definition: bl1_trsv.c:99
@ BLIS1_LOWER_TRIANGULAR
Definition: blis_type_defs.h:62
@ BLIS1_UPPER_TRIANGULAR
Definition: blis_type_defs.h:63
@ BLIS1_UNIT_DIAG
Definition: blis_type_defs.h:75
@ BLIS1_NONUNIT_DIAG
Definition: blis_type_defs.h:74
@ BLIS1_NO_TRANSPOSE
Definition: blis_type_defs.h:54
@ BLIS1_TRANSPOSE
Definition: blis_type_defs.h:55
@ BLIS1_NO_CONJUGATE
Definition: blis_type_defs.h:81
@ BLIS1_RIGHT
Definition: blis_type_defs.h:69
@ BLIS1_LEFT
Definition: blis_type_defs.h:68
Definition: blis_type_defs.h:133

References bl1_cdots(), bl1_ctrsm(), bl1_ctrsv(), BLIS1_LEFT, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_NONUNIT_DIAG, BLIS1_RIGHT, BLIS1_TRANSPOSE, BLIS1_UNIT_DIAG, BLIS1_UPPER_TRIANGULAR, FLA_MINUS_ONE, FLA_ONE, and i.

Referenced by FLA_LU_nopiv_opt_var1().

◆ FLA_LU_nopiv_opc_var2()

FLA_Error FLA_LU_nopiv_opc_var2 ( int  m_A,
int  n_A,
scomplex A,
int  rs_A,
int  cs_A 
)
242 {
243  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
244  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
245  int min_m_n = min( m_A, n_A );
246  int i;
247 
248  for ( i = 0; i < min_m_n; ++i )
249  {
250  scomplex* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
251  scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
252  scomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
253  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
254  scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
255  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
256 
257  int n_ahead = n_A - i - 1;
258  int mn_behind = i;
259 
260  /*------------------------------------------------------------*/
261 
262  // FLA_Trsv_external( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, A00, a10t );
266  mn_behind,
267  A00, rs_A, cs_A,
268  a10t, cs_A );
269 
270  // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
272  mn_behind,
273  buff_m1,
274  a10t, cs_A,
275  a01, rs_A,
276  buff_1,
277  alpha11 );
278 
279  // FLA_Gemv_external( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, a10t, FLA_ONE, a12t );
282  mn_behind,
283  n_ahead,
284  buff_m1,
285  A02, rs_A, cs_A,
286  a10t, cs_A,
287  buff_1,
288  a12t, cs_A );
289 
290  /*------------------------------------------------------------*/
291 
292  }
293 
294  if ( m_A > n_A )
295  {
296  scomplex* ATL = buff_A;
297  scomplex* ABL = buff_A + n_A*rs_A;
298 
299  // FLA_Trsm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR,
300  // FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
301  // FLA_ONE, ATL, ABL );
306  m_A - n_A,
307  n_A,
308  buff_1,
309  ATL, rs_A, cs_A,
310  ABL, rs_A, cs_A );
311  }
312 
313  return FLA_SUCCESS;
314 }
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_gemv.c:125

References bl1_cdots(), bl1_cgemv(), bl1_ctrsm(), bl1_ctrsv(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_NONUNIT_DIAG, BLIS1_RIGHT, BLIS1_TRANSPOSE, BLIS1_UPPER_TRIANGULAR, FLA_MINUS_ONE, FLA_ONE, and i.

Referenced by FLA_LU_nopiv_opt_var2().

◆ FLA_LU_nopiv_opc_var3()

FLA_Error FLA_LU_nopiv_opc_var3 ( int  m_A,
int  n_A,
scomplex A,
int  rs_A,
int  cs_A 
)
254 {
255  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
256  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
257  int min_m_n = min( m_A, n_A );
258  int i;
259 
260  for ( i = 0; i < min_m_n; ++i )
261  {
262  scomplex* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
263  scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
264  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
265  scomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
266  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
267  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
268 
269  int m_ahead = m_A - i - 1;
270  int mn_behind = i;
271 
272  /*------------------------------------------------------------*/
273 
274  // FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, A00, a01 );
278  mn_behind,
279  A00, rs_A, cs_A,
280  a01, rs_A );
281 
282  // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
284  mn_behind,
285  buff_m1,
286  a10t, cs_A,
287  a01, rs_A,
288  buff_1,
289  alpha11 );
290 
291  // FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
294  m_ahead,
295  mn_behind,
296  buff_m1,
297  A20, rs_A, cs_A,
298  a01, rs_A,
299  buff_1,
300  a21, rs_A );
301 
302  // FLA_Inv_scal_external( alpha11, a21 );
304  m_ahead,
305  alpha11,
306  a21, rs_A );
307 
308  /*------------------------------------------------------------*/
309 
310  }
311 
312  if ( m_A < n_A )
313  {
314  scomplex* ATL = buff_A;
315  scomplex* ATR = buff_A + m_A*cs_A;
316 
317  // FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR,
318  // FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
319  // FLA_ONE, ATL, ATR );
324  m_A,
325  n_A - m_A,
326  buff_1,
327  ATL, rs_A, cs_A,
328  ATR, rs_A, cs_A );
329  }
330 
331  return FLA_SUCCESS;
332 }
void bl1_cinvscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition: bl1_invscalv.c:52

References bl1_cdots(), bl1_cgemv(), bl1_cinvscalv(), bl1_ctrsm(), bl1_ctrsv(), BLIS1_LEFT, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_UNIT_DIAG, FLA_MINUS_ONE, FLA_ONE, and i.

Referenced by FLA_LU_nopiv_opt_var3().

◆ FLA_LU_nopiv_opc_var4()

FLA_Error FLA_LU_nopiv_opc_var4 ( int  m_A,
int  n_A,
scomplex A,
int  rs_A,
int  cs_A 
)
226 {
227  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
228  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
229  int min_m_n = min( m_A, n_A );
230  int i;
231 
232  for ( i = 0; i < min_m_n; ++i )
233  {
234  scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
235  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
236  scomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
237  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
238  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
239  scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
240  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
241 
242  int m_ahead = m_A - i - 1;
243  int n_ahead = n_A - i - 1;
244  int mn_behind = i;
245 
246  /*------------------------------------------------------------*/
247 
248  // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
250  mn_behind,
251  buff_m1,
252  a10t, cs_A,
253  a01, rs_A,
254  buff_1,
255  alpha11 );
256 
257  // FLA_Gemv_external( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, a10t, FLA_ONE, a12t );
260  mn_behind,
261  n_ahead,
262  buff_m1,
263  A02, rs_A, cs_A,
264  a10t, cs_A,
265  buff_1,
266  a12t, cs_A );
267 
268  // FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
271  m_ahead,
272  mn_behind,
273  buff_m1,
274  A20, rs_A, cs_A,
275  a01, rs_A,
276  buff_1,
277  a21, rs_A );
278 
279  // FLA_Inv_scal_external( alpha11, a21 );
281  m_ahead,
282  alpha11,
283  a21, rs_A );
284 
285  /*------------------------------------------------------------*/
286 
287  }
288 
289  return FLA_SUCCESS;
290 }

References bl1_cdots(), bl1_cgemv(), bl1_cinvscalv(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_MINUS_ONE, FLA_ONE, and i.

Referenced by FLA_LU_nopiv_opt_var4().

◆ FLA_LU_nopiv_opc_var5()

FLA_Error FLA_LU_nopiv_opc_var5 ( int  m_A,
int  n_A,
scomplex A,
int  rs_A,
int  cs_A 
)
172 {
173  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
174  int min_m_n = min( m_A, n_A );
175  int i;
176 
177  for ( i = 0; i < min_m_n; ++i )
178  {
179  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
180  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
181  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
182  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
183 
184  int m_ahead = m_A - i - 1;
185  int n_ahead = n_A - i - 1;
186 
187  /*------------------------------------------------------------*/
188 
189  // FLA_Inv_scal_external( alpha11, a21 );
191  m_ahead,
192  alpha11,
193  a21, rs_A );
194 
195  // FLA_Ger_external( FLA_MINUS_ONE, a21, a12t, A22 );
198  m_ahead,
199  n_ahead,
200  buff_m1,
201  a21, rs_A,
202  a12t, cs_A,
203  A22, rs_A, cs_A );
204 
205  /*------------------------------------------------------------*/
206 
207  }
208 
209  return FLA_SUCCESS;
210 }
void bl1_cger(conj1_t conjx, conj1_t conjy, int m, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *a, int a_rs, int a_cs)
Definition: bl1_ger.c:111

References bl1_cger(), bl1_cinvscalv(), BLIS1_NO_CONJUGATE, FLA_MINUS_ONE, and i.

Referenced by FLA_LU_nopiv_opt_var5().

◆ FLA_LU_nopiv_opd_var1()

FLA_Error FLA_LU_nopiv_opd_var1 ( int  m_A,
int  n_A,
double *  A,
int  rs_A,
int  cs_A 
)
175 {
176  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
177  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
178  int min_m_n = min( m_A, n_A );
179  int i;
180 
181  for ( i = 0; i < min_m_n; ++i )
182  {
183  double* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
184  double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
185  double* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
186  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
187 
188  int mn_behind = i;
189 
190  /*------------------------------------------------------------*/
191 
192  // FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, A00, a01 );
196  mn_behind,
197  A00, rs_A, cs_A,
198  a01, rs_A );
199 
200  // FLA_Trsv_external( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, A00, a10t );
204  mn_behind,
205  A00, rs_A, cs_A,
206  a10t, cs_A );
207 
208  // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
210  mn_behind,
211  buff_m1,
212  a10t, cs_A,
213  a01, rs_A,
214  buff_1,
215  alpha11 );
216 
217  /*------------------------------------------------------------*/
218 
219  }
220 
221  if ( m_A > n_A )
222  {
223  double* ATL = buff_A;
224  double* ABL = buff_A + n_A*rs_A;
225 
226  // FLA_Trsm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR,
227  // FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
228  // FLA_ONE, ATL, ABL );
233  m_A - n_A,
234  n_A,
235  buff_1,
236  ATL, rs_A, cs_A,
237  ABL, rs_A, cs_A );
238  }
239  else if ( m_A < n_A )
240  {
241  double* ATL = buff_A;
242  double* ATR = buff_A + m_A*cs_A;
243 
244  // FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR,
245  // FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
246  // FLA_ONE, ATL, ATR );
251  m_A,
252  n_A - m_A,
253  buff_1,
254  ATL, rs_A, cs_A,
255  ATR, rs_A, cs_A );
256  }
257 
258  return FLA_SUCCESS;
259 }
void bl1_ddots(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy, double *beta, double *rho)
Definition: bl1_dots.c:26
void bl1_dtrsm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *b, int b_rs, int b_cs)
Definition: bl1_trsm.c:116
void bl1_dtrsv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, double *a, int a_rs, int a_cs, double *x, int incx)
Definition: bl1_trsv.c:56

References bl1_ddots(), bl1_dtrsm(), bl1_dtrsv(), BLIS1_LEFT, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_NONUNIT_DIAG, BLIS1_RIGHT, BLIS1_TRANSPOSE, BLIS1_UNIT_DIAG, BLIS1_UPPER_TRIANGULAR, FLA_MINUS_ONE, FLA_ONE, and i.

Referenced by FLA_LU_nopiv_opt_var1().

◆ FLA_LU_nopiv_opd_var2()

FLA_Error FLA_LU_nopiv_opd_var2 ( int  m_A,
int  n_A,
double *  A,
int  rs_A,
int  cs_A 
)
163 {
164  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
165  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
166  int min_m_n = min( m_A, n_A );
167  int i;
168 
169  for ( i = 0; i < min_m_n; ++i )
170  {
171  double* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
172  double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
173  double* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
174  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
175  double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
176  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
177 
178  int n_ahead = n_A - i - 1;
179  int mn_behind = i;
180 
181  /*------------------------------------------------------------*/
182 
183  // FLA_Trsv_external( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, A00, a10t );
187  mn_behind,
188  A00, rs_A, cs_A,
189  a10t, cs_A );
190 
191  // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
193  mn_behind,
194  buff_m1,
195  a10t, cs_A,
196  a01, rs_A,
197  buff_1,
198  alpha11 );
199 
200  // FLA_Gemv_external( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, a10t, FLA_ONE, a12t );
203  mn_behind,
204  n_ahead,
205  buff_m1,
206  A02, rs_A, cs_A,
207  a10t, cs_A,
208  buff_1,
209  a12t, cs_A );
210 
211  /*------------------------------------------------------------*/
212 
213  }
214 
215  if ( m_A > n_A )
216  {
217  double* ATL = buff_A;
218  double* ABL = buff_A + n_A*rs_A;
219 
220  // FLA_Trsm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR,
221  // FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
222  // FLA_ONE, ATL, ABL );
227  m_A - n_A,
228  n_A,
229  buff_1,
230  ATL, rs_A, cs_A,
231  ABL, rs_A, cs_A );
232  }
233 
234  return FLA_SUCCESS;
235 }
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_gemv.c:69

References bl1_ddots(), bl1_dgemv(), bl1_dtrsm(), bl1_dtrsv(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_NONUNIT_DIAG, BLIS1_RIGHT, BLIS1_TRANSPOSE, BLIS1_UPPER_TRIANGULAR, FLA_MINUS_ONE, FLA_ONE, and i.

Referenced by FLA_LU_nopiv_opt_var2().

◆ FLA_LU_nopiv_opd_var3()

FLA_Error FLA_LU_nopiv_opd_var3 ( int  m_A,
int  n_A,
double *  A,
int  rs_A,
int  cs_A 
)
169 {
170  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
171  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
172  int min_m_n = min( m_A, n_A );
173  int i;
174 
175  for ( i = 0; i < min_m_n; ++i )
176  {
177  double* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
178  double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
179  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
180  double* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
181  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
182  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
183 
184  int m_ahead = m_A - i - 1;
185  int mn_behind = i;
186 
187  /*------------------------------------------------------------*/
188 
189  // FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, A00, a01 );
193  mn_behind,
194  A00, rs_A, cs_A,
195  a01, rs_A );
196 
197  // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
199  mn_behind,
200  buff_m1,
201  a10t, cs_A,
202  a01, rs_A,
203  buff_1,
204  alpha11 );
205 
206  // FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
209  m_ahead,
210  mn_behind,
211  buff_m1,
212  A20, rs_A, cs_A,
213  a01, rs_A,
214  buff_1,
215  a21, rs_A );
216 
217  // FLA_Inv_scal_external( alpha11, a21 );
219  m_ahead,
220  alpha11,
221  a21, rs_A );
222 
223  /*------------------------------------------------------------*/
224 
225  }
226 
227  if ( m_A < n_A )
228  {
229  double* ATL = buff_A;
230  double* ATR = buff_A + m_A*cs_A;
231 
232  // FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR,
233  // FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
234  // FLA_ONE, ATL, ATR );
239  m_A,
240  n_A - m_A,
241  buff_1,
242  ATL, rs_A, cs_A,
243  ATR, rs_A, cs_A );
244  }
245 
246  return FLA_SUCCESS;
247 }
void bl1_dinvscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition: bl1_invscalv.c:26

References bl1_ddots(), bl1_dgemv(), bl1_dinvscalv(), bl1_dtrsm(), bl1_dtrsv(), BLIS1_LEFT, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_UNIT_DIAG, FLA_MINUS_ONE, FLA_ONE, and i.

Referenced by FLA_LU_nopiv_opt_var3().

◆ FLA_LU_nopiv_opd_var4()

FLA_Error FLA_LU_nopiv_opd_var4 ( int  m_A,
int  n_A,
double *  A,
int  rs_A,
int  cs_A 
)
155 {
156  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
157  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
158  int min_m_n = min( m_A, n_A );
159  int i;
160 
161  for ( i = 0; i < min_m_n; ++i )
162  {
163  double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
164  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
165  double* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
166  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
167  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
168  double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
169  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
170 
171  int m_ahead = m_A - i - 1;
172  int n_ahead = n_A - i - 1;
173  int mn_behind = i;
174 
175  /*------------------------------------------------------------*/
176 
177  // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
179  mn_behind,
180  buff_m1,
181  a10t, cs_A,
182  a01, rs_A,
183  buff_1,
184  alpha11 );
185 
186  // FLA_Gemv_external( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, a10t, FLA_ONE, a12t );
189  mn_behind,
190  n_ahead,
191  buff_m1,
192  A02, rs_A, cs_A,
193  a10t, cs_A,
194  buff_1,
195  a12t, cs_A );
196 
197  // FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
200  m_ahead,
201  mn_behind,
202  buff_m1,
203  A20, rs_A, cs_A,
204  a01, rs_A,
205  buff_1,
206  a21, rs_A );
207 
208  // FLA_Inv_scal_external( alpha11, a21 );
210  m_ahead,
211  alpha11,
212  a21, rs_A );
213 
214  /*------------------------------------------------------------*/
215 
216  }
217 
218  return FLA_SUCCESS;
219 }

References bl1_ddots(), bl1_dgemv(), bl1_dinvscalv(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_MINUS_ONE, FLA_ONE, and i.

Referenced by FLA_LU_nopiv_opt_var4().

◆ FLA_LU_nopiv_opd_var5()

FLA_Error FLA_LU_nopiv_opd_var5 ( int  m_A,
int  n_A,
double *  A,
int  rs_A,
int  cs_A 
)
127 {
128  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
129  int min_m_n = min( m_A, n_A );
130  int i;
131 
132  for ( i = 0; i < min_m_n; ++i )
133  {
134  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
135  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
136  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
137  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
138 
139  int m_ahead = m_A - i - 1;
140  int n_ahead = n_A - i - 1;
141 
142  /*------------------------------------------------------------*/
143 
144  // FLA_Inv_scal_external( alpha11, a21 );
146  m_ahead,
147  alpha11,
148  a21, rs_A );
149 
150  // FLA_Ger_external( FLA_MINUS_ONE, a21, a12t, A22 );
153  m_ahead,
154  n_ahead,
155  buff_m1,
156  a21, rs_A,
157  a12t, cs_A,
158  A22, rs_A, cs_A );
159 
160  /*------------------------------------------------------------*/
161 
162  }
163 
164  return FLA_SUCCESS;
165 }
void bl1_dger(conj1_t conjx, conj1_t conjy, int m, int n, double *alpha, double *x, int incx, double *y, int incy, double *a, int a_rs, int a_cs)
Definition: bl1_ger.c:62

References bl1_dger(), bl1_dinvscalv(), BLIS1_NO_CONJUGATE, FLA_MINUS_ONE, and i.

Referenced by FLA_LU_nopiv_opt_var5().

◆ FLA_LU_nopiv_ops_var1()

FLA_Error FLA_LU_nopiv_ops_var1 ( int  m_A,
int  n_A,
float *  A,
int  rs_A,
int  cs_A 
)
84 {
85  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
86  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
87  int min_m_n = min( m_A, n_A );
88  int i;
89 
90  for ( i = 0; i < min_m_n; ++i )
91  {
92  float* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
93  float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
94  float* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
95  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
96 
97  int mn_behind = i;
98 
99  /*------------------------------------------------------------*/
100 
101  // FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, A00, a01 );
105  mn_behind,
106  A00, rs_A, cs_A,
107  a01, rs_A );
108 
109  // FLA_Trsv_external( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, A00, a10t );
113  mn_behind,
114  A00, rs_A, cs_A,
115  a10t, cs_A );
116 
117  // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
119  mn_behind,
120  buff_m1,
121  a10t, cs_A,
122  a01, rs_A,
123  buff_1,
124  alpha11 );
125 
126  /*------------------------------------------------------------*/
127 
128  }
129 
130  if ( m_A > n_A )
131  {
132  float* ATL = buff_A;
133  float* ABL = buff_A + n_A*rs_A;
134 
135  // FLA_Trsm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR,
136  // FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
137  // FLA_ONE, ATL, ABL );
142  m_A - n_A,
143  n_A,
144  buff_1,
145  ATL, rs_A, cs_A,
146  ABL, rs_A, cs_A );
147  }
148  else if ( m_A < n_A )
149  {
150  float* ATL = buff_A;
151  float* ATR = buff_A + m_A*cs_A;
152 
153  // FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR,
154  // FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
155  // FLA_ONE, ATL, ATR );
160  m_A,
161  n_A - m_A,
162  buff_1,
163  ATL, rs_A, cs_A,
164  ATR, rs_A, cs_A );
165  }
166 
167  return FLA_SUCCESS;
168 }
void bl1_sdots(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy, float *beta, float *rho)
Definition: bl1_dots.c:13
void bl1_strsm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *b, int b_rs, int b_cs)
Definition: bl1_trsm.c:13
void bl1_strsv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, float *a, int a_rs, int a_cs, float *x, int incx)
Definition: bl1_trsv.c:13

References bl1_sdots(), bl1_strsm(), bl1_strsv(), BLIS1_LEFT, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_NONUNIT_DIAG, BLIS1_RIGHT, BLIS1_TRANSPOSE, BLIS1_UNIT_DIAG, BLIS1_UPPER_TRIANGULAR, FLA_MINUS_ONE, FLA_ONE, and i.

Referenced by FLA_LU_nopiv_opt_var1().

◆ FLA_LU_nopiv_ops_var2()

FLA_Error FLA_LU_nopiv_ops_var2 ( int  m_A,
int  n_A,
float *  A,
int  rs_A,
int  cs_A 
)
84 {
85  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
86  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
87  int min_m_n = min( m_A, n_A );
88  int i;
89 
90  for ( i = 0; i < min_m_n; ++i )
91  {
92  float* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
93  float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
94  float* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
95  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
96  float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
97  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
98 
99  int n_ahead = n_A - i - 1;
100  int mn_behind = i;
101 
102  /*------------------------------------------------------------*/
103 
104  // FLA_Trsv_external( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, A00, a10t );
108  mn_behind,
109  A00, rs_A, cs_A,
110  a10t, cs_A );
111 
112  // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
114  mn_behind,
115  buff_m1,
116  a10t, cs_A,
117  a01, rs_A,
118  buff_1,
119  alpha11 );
120 
121  // FLA_Gemv_external( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, a10t, FLA_ONE, a12t );
124  mn_behind,
125  n_ahead,
126  buff_m1,
127  A02, rs_A, cs_A,
128  a10t, cs_A,
129  buff_1,
130  a12t, cs_A );
131 
132  /*------------------------------------------------------------*/
133 
134  }
135 
136  if ( m_A > n_A )
137  {
138  float* ATL = buff_A;
139  float* ABL = buff_A + n_A*rs_A;
140 
141  // FLA_Trsm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR,
142  // FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
143  // FLA_ONE, ATL, ABL );
148  m_A - n_A,
149  n_A,
150  buff_1,
151  ATL, rs_A, cs_A,
152  ABL, rs_A, cs_A );
153  }
154 
155  return FLA_SUCCESS;
156 }
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_gemv.c:13

References bl1_sdots(), bl1_sgemv(), bl1_strsm(), bl1_strsv(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_NONUNIT_DIAG, BLIS1_RIGHT, BLIS1_TRANSPOSE, BLIS1_UPPER_TRIANGULAR, FLA_MINUS_ONE, FLA_ONE, and i.

Referenced by FLA_LU_nopiv_opt_var2().

◆ FLA_LU_nopiv_ops_var3()

FLA_Error FLA_LU_nopiv_ops_var3 ( int  m_A,
int  n_A,
float *  A,
int  rs_A,
int  cs_A 
)
84 {
85  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
86  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
87  int min_m_n = min( m_A, n_A );
88  int i;
89 
90  for ( i = 0; i < min_m_n; ++i )
91  {
92  float* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
93  float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
94  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
95  float* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
96  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
97  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
98 
99  int m_ahead = m_A - i - 1;
100  int mn_behind = i;
101 
102  /*------------------------------------------------------------*/
103 
104  // FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, A00, a01 );
108  mn_behind,
109  A00, rs_A, cs_A,
110  a01, rs_A );
111 
112  // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
114  mn_behind,
115  buff_m1,
116  a10t, cs_A,
117  a01, rs_A,
118  buff_1,
119  alpha11 );
120 
121  // FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
124  m_ahead,
125  mn_behind,
126  buff_m1,
127  A20, rs_A, cs_A,
128  a01, rs_A,
129  buff_1,
130  a21, rs_A );
131 
132  // FLA_Inv_scal_external( alpha11, a21 );
134  m_ahead,
135  alpha11,
136  a21, rs_A );
137 
138  /*------------------------------------------------------------*/
139 
140  }
141 
142  if ( m_A < n_A )
143  {
144  float* ATL = buff_A;
145  float* ATR = buff_A + m_A*cs_A;
146 
147  // FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR,
148  // FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
149  // FLA_ONE, ATL, ATR );
154  m_A,
155  n_A - m_A,
156  buff_1,
157  ATL, rs_A, cs_A,
158  ATR, rs_A, cs_A );
159  }
160 
161  return FLA_SUCCESS;
162 }
void bl1_sinvscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition: bl1_invscalv.c:13

References bl1_sdots(), bl1_sgemv(), bl1_sinvscalv(), bl1_strsm(), bl1_strsv(), BLIS1_LEFT, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_UNIT_DIAG, FLA_MINUS_ONE, FLA_ONE, and i.

Referenced by FLA_LU_nopiv_opt_var3().

◆ FLA_LU_nopiv_ops_var4()

FLA_Error FLA_LU_nopiv_ops_var4 ( int  m_A,
int  n_A,
float *  A,
int  rs_A,
int  cs_A 
)
84 {
85  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
86  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
87  int min_m_n = min( m_A, n_A );
88  int i;
89 
90  for ( i = 0; i < min_m_n; ++i )
91  {
92  float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
93  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
94  float* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
95  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
96  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
97  float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
98  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
99 
100  int m_ahead = m_A - i - 1;
101  int n_ahead = n_A - i - 1;
102  int mn_behind = i;
103 
104  /*------------------------------------------------------------*/
105 
106  // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
108  mn_behind,
109  buff_m1,
110  a10t, cs_A,
111  a01, rs_A,
112  buff_1,
113  alpha11 );
114 
115  // FLA_Gemv_external( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, a10t, FLA_ONE, a12t );
118  mn_behind,
119  n_ahead,
120  buff_m1,
121  A02, rs_A, cs_A,
122  a10t, cs_A,
123  buff_1,
124  a12t, cs_A );
125 
126  // FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
129  m_ahead,
130  mn_behind,
131  buff_m1,
132  A20, rs_A, cs_A,
133  a01, rs_A,
134  buff_1,
135  a21, rs_A );
136 
137  // FLA_Inv_scal_external( alpha11, a21 );
139  m_ahead,
140  alpha11,
141  a21, rs_A );
142 
143  /*------------------------------------------------------------*/
144 
145  }
146 
147  return FLA_SUCCESS;
148 }

References bl1_sdots(), bl1_sgemv(), bl1_sinvscalv(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_MINUS_ONE, FLA_ONE, and i.

Referenced by FLA_LU_nopiv_opt_var4().

◆ FLA_LU_nopiv_ops_var5()

FLA_Error FLA_LU_nopiv_ops_var5 ( int  m_A,
int  n_A,
float *  A,
int  rs_A,
int  cs_A 
)
82 {
83  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
84  int min_m_n = min( m_A, n_A );
85  int i;
86 
87  for ( i = 0; i < min_m_n; ++i )
88  {
89  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
90  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
91  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
92  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
93 
94  int m_ahead = m_A - i - 1;
95  int n_ahead = n_A - i - 1;
96 
97  /*------------------------------------------------------------*/
98 
99  // FLA_Inv_scal_external( alpha11, a21 );
101  m_ahead,
102  alpha11,
103  a21, rs_A );
104 
105  // FLA_Ger_external( FLA_MINUS_ONE, a21, a12t, A22 );
108  m_ahead,
109  n_ahead,
110  buff_m1,
111  a21, rs_A,
112  a12t, cs_A,
113  A22, rs_A, cs_A );
114 
115  /*------------------------------------------------------------*/
116 
117  }
118 
119  return FLA_SUCCESS;
120 }
void bl1_sger(conj1_t conjx, conj1_t conjy, int m, int n, float *alpha, float *x, int incx, float *y, int incy, float *a, int a_rs, int a_cs)
Definition: bl1_ger.c:13

References bl1_sger(), bl1_sinvscalv(), BLIS1_NO_CONJUGATE, FLA_MINUS_ONE, and i.

Referenced by FLA_LU_nopiv_opt_var5().

◆ FLA_LU_nopiv_opt_var1()

FLA_Error FLA_LU_nopiv_opt_var1 ( FLA_Obj  A)
16 {
17  FLA_Datatype datatype;
18  int m_A, n_A;
19  int rs_A, cs_A;
20 
21  datatype = FLA_Obj_datatype( A );
22 
23  m_A = FLA_Obj_length( A );
24  n_A = FLA_Obj_width( A );
25  rs_A = FLA_Obj_row_stride( A );
26  cs_A = FLA_Obj_col_stride( A );
27 
28 
29  switch ( datatype )
30  {
31  case FLA_FLOAT:
32  {
33  float* buff_A = FLA_FLOAT_PTR( A );
34 
36  n_A,
37  buff_A, rs_A, cs_A );
38 
39  break;
40  }
41 
42  case FLA_DOUBLE:
43  {
44  double* buff_A = FLA_DOUBLE_PTR( A );
45 
47  n_A,
48  buff_A, rs_A, cs_A );
49 
50  break;
51  }
52 
53  case FLA_COMPLEX:
54  {
55  scomplex* buff_A = FLA_COMPLEX_PTR( A );
56 
58  n_A,
59  buff_A, rs_A, cs_A );
60 
61  break;
62  }
63 
64  case FLA_DOUBLE_COMPLEX:
65  {
66  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
67 
69  n_A,
70  buff_A, rs_A, cs_A );
71 
72  break;
73  }
74  }
75 
76  return FLA_SUCCESS;
77 }
FLA_Error FLA_LU_nopiv_opc_var1(int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_LU_nopiv_opt_var1.c:263
FLA_Error FLA_LU_nopiv_opz_var1(int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_LU_nopiv_opt_var1.c:354
FLA_Error FLA_LU_nopiv_opd_var1(int m_A, int n_A, double *buff_A, int rs_A, int cs_A)
Definition: FLA_LU_nopiv_opt_var1.c:172
FLA_Error FLA_LU_nopiv_ops_var1(int m_A, int n_A, float *buff_A, int rs_A, int cs_A)
Definition: FLA_LU_nopiv_opt_var1.c:81
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
int FLA_Datatype
Definition: FLA_type_defs.h:49
Definition: blis_type_defs.h:138

References FLA_LU_nopiv_opc_var1(), FLA_LU_nopiv_opd_var1(), FLA_LU_nopiv_ops_var1(), FLA_LU_nopiv_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

Referenced by FLA_LU_nopiv_internal().

◆ FLA_LU_nopiv_opt_var2()

FLA_Error FLA_LU_nopiv_opt_var2 ( FLA_Obj  A)
16 {
17  FLA_Datatype datatype;
18  int m_A, n_A;
19  int rs_A, cs_A;
20 
21  datatype = FLA_Obj_datatype( A );
22 
23  m_A = FLA_Obj_length( A );
24  n_A = FLA_Obj_width( A );
25  rs_A = FLA_Obj_row_stride( A );
26  cs_A = FLA_Obj_col_stride( A );
27 
28 
29  switch ( datatype )
30  {
31  case FLA_FLOAT:
32  {
33  float* buff_A = FLA_FLOAT_PTR( A );
34 
36  n_A,
37  buff_A, rs_A, cs_A );
38 
39  break;
40  }
41 
42  case FLA_DOUBLE:
43  {
44  double* buff_A = FLA_DOUBLE_PTR( A );
45 
47  n_A,
48  buff_A, rs_A, cs_A );
49 
50  break;
51  }
52 
53  case FLA_COMPLEX:
54  {
55  scomplex* buff_A = FLA_COMPLEX_PTR( A );
56 
58  n_A,
59  buff_A, rs_A, cs_A );
60 
61  break;
62  }
63 
64  case FLA_DOUBLE_COMPLEX:
65  {
66  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
67 
69  n_A,
70  buff_A, rs_A, cs_A );
71 
72  break;
73  }
74  }
75 
76  return FLA_SUCCESS;
77 }
FLA_Error FLA_LU_nopiv_opz_var2(int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_LU_nopiv_opt_var2.c:318
FLA_Error FLA_LU_nopiv_opd_var2(int m_A, int n_A, double *buff_A, int rs_A, int cs_A)
Definition: FLA_LU_nopiv_opt_var2.c:160
FLA_Error FLA_LU_nopiv_opc_var2(int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_LU_nopiv_opt_var2.c:239
FLA_Error FLA_LU_nopiv_ops_var2(int m_A, int n_A, float *buff_A, int rs_A, int cs_A)
Definition: FLA_LU_nopiv_opt_var2.c:81

References FLA_LU_nopiv_opc_var2(), FLA_LU_nopiv_opd_var2(), FLA_LU_nopiv_ops_var2(), FLA_LU_nopiv_opz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

Referenced by FLA_LU_nopiv_internal().

◆ FLA_LU_nopiv_opt_var3()

FLA_Error FLA_LU_nopiv_opt_var3 ( FLA_Obj  A)
16 {
17  FLA_Datatype datatype;
18  int m_A, n_A;
19  int rs_A, cs_A;
20 
21  datatype = FLA_Obj_datatype( A );
22 
23  m_A = FLA_Obj_length( A );
24  n_A = FLA_Obj_width( A );
25  rs_A = FLA_Obj_row_stride( A );
26  cs_A = FLA_Obj_col_stride( A );
27 
28 
29  switch ( datatype )
30  {
31  case FLA_FLOAT:
32  {
33  float* buff_A = FLA_FLOAT_PTR( A );
34 
36  n_A,
37  buff_A, rs_A, cs_A );
38 
39  break;
40  }
41 
42  case FLA_DOUBLE:
43  {
44  double* buff_A = FLA_DOUBLE_PTR( A );
45 
47  n_A,
48  buff_A, rs_A, cs_A );
49 
50  break;
51  }
52 
53  case FLA_COMPLEX:
54  {
55  scomplex* buff_A = FLA_COMPLEX_PTR( A );
56 
58  n_A,
59  buff_A, rs_A, cs_A );
60 
61  break;
62  }
63 
64  case FLA_DOUBLE_COMPLEX:
65  {
66  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
67 
69  n_A,
70  buff_A, rs_A, cs_A );
71 
72  break;
73  }
74  }
75 
76  return FLA_SUCCESS;
77 }
FLA_Error FLA_LU_nopiv_opz_var3(int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_LU_nopiv_opt_var3.c:336
FLA_Error FLA_LU_nopiv_ops_var3(int m_A, int n_A, float *buff_A, int rs_A, int cs_A)
Definition: FLA_LU_nopiv_opt_var3.c:81
FLA_Error FLA_LU_nopiv_opc_var3(int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_LU_nopiv_opt_var3.c:251
FLA_Error FLA_LU_nopiv_opd_var3(int m_A, int n_A, double *buff_A, int rs_A, int cs_A)
Definition: FLA_LU_nopiv_opt_var3.c:166

References FLA_LU_nopiv_opc_var3(), FLA_LU_nopiv_opd_var3(), FLA_LU_nopiv_ops_var3(), FLA_LU_nopiv_opz_var3(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

Referenced by FLA_LU_nopiv_internal().

◆ FLA_LU_nopiv_opt_var4()

FLA_Error FLA_LU_nopiv_opt_var4 ( FLA_Obj  A)
16 {
17  FLA_Datatype datatype;
18  int m_A, n_A;
19  int rs_A, cs_A;
20 
21  datatype = FLA_Obj_datatype( A );
22 
23  m_A = FLA_Obj_length( A );
24  n_A = FLA_Obj_width( A );
25  rs_A = FLA_Obj_row_stride( A );
26  cs_A = FLA_Obj_col_stride( A );
27 
28 
29  switch ( datatype )
30  {
31  case FLA_FLOAT:
32  {
33  float* buff_A = FLA_FLOAT_PTR( A );
34 
36  n_A,
37  buff_A, rs_A, cs_A );
38 
39  break;
40  }
41 
42  case FLA_DOUBLE:
43  {
44  double* buff_A = FLA_DOUBLE_PTR( A );
45 
47  n_A,
48  buff_A, rs_A, cs_A );
49 
50  break;
51  }
52 
53  case FLA_COMPLEX:
54  {
55  scomplex* buff_A = FLA_COMPLEX_PTR( A );
56 
58  n_A,
59  buff_A, rs_A, cs_A );
60 
61  break;
62  }
63 
64  case FLA_DOUBLE_COMPLEX:
65  {
66  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
67 
69  n_A,
70  buff_A, rs_A, cs_A );
71 
72  break;
73  }
74  }
75 
76  return FLA_SUCCESS;
77 }
FLA_Error FLA_LU_nopiv_opc_var4(int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_LU_nopiv_opt_var4.c:223
FLA_Error FLA_LU_nopiv_ops_var4(int m_A, int n_A, float *buff_A, int rs_A, int cs_A)
Definition: FLA_LU_nopiv_opt_var4.c:81
FLA_Error FLA_LU_nopiv_opd_var4(int m_A, int n_A, double *buff_A, int rs_A, int cs_A)
Definition: FLA_LU_nopiv_opt_var4.c:152
FLA_Error FLA_LU_nopiv_opz_var4(int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_LU_nopiv_opt_var4.c:294

References FLA_LU_nopiv_opc_var4(), FLA_LU_nopiv_opd_var4(), FLA_LU_nopiv_ops_var4(), FLA_LU_nopiv_opz_var4(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

Referenced by FLA_LU_nopiv_internal().

◆ FLA_LU_nopiv_opt_var5()

FLA_Error FLA_LU_nopiv_opt_var5 ( FLA_Obj  A)
14 {
15  FLA_Datatype datatype;
16  int m_A, n_A;
17  int rs_A, cs_A;
18 
19  datatype = FLA_Obj_datatype( A );
20 
21  m_A = FLA_Obj_length( A );
22  n_A = FLA_Obj_width( A );
23  rs_A = FLA_Obj_row_stride( A );
24  cs_A = FLA_Obj_col_stride( A );
25 
26 
27  switch ( datatype )
28  {
29  case FLA_FLOAT:
30  {
31  float* buff_A = FLA_FLOAT_PTR( A );
32 
34  n_A,
35  buff_A, rs_A, cs_A );
36 
37  break;
38  }
39 
40  case FLA_DOUBLE:
41  {
42  double* buff_A = FLA_DOUBLE_PTR( A );
43 
45  n_A,
46  buff_A, rs_A, cs_A );
47 
48  break;
49  }
50 
51  case FLA_COMPLEX:
52  {
53  scomplex* buff_A = FLA_COMPLEX_PTR( A );
54 
56  n_A,
57  buff_A, rs_A, cs_A );
58 
59  break;
60  }
61 
62  case FLA_DOUBLE_COMPLEX:
63  {
64  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
65 
67  n_A,
68  buff_A, rs_A, cs_A );
69 
70  break;
71  }
72  }
73 
74  return FLA_SUCCESS;
75 }
FLA_Error FLA_LU_nopiv_opd_var5(int m_A, int n_A, double *buff_A, int rs_A, int cs_A)
Definition: FLA_LU_nopiv_opt_var5.c:124
FLA_Error FLA_LU_nopiv_opz_var5(int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_LU_nopiv_opt_var5.c:214
FLA_Error FLA_LU_nopiv_ops_var5(int m_A, int n_A, float *buff_A, int rs_A, int cs_A)
Definition: FLA_LU_nopiv_opt_var5.c:79
FLA_Error FLA_LU_nopiv_opc_var5(int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_LU_nopiv_opt_var5.c:169

References FLA_LU_nopiv_opc_var5(), FLA_LU_nopiv_opd_var5(), FLA_LU_nopiv_ops_var5(), FLA_LU_nopiv_opz_var5(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

Referenced by FLA_LU_nopiv_internal().

◆ FLA_LU_nopiv_opz_var1()

FLA_Error FLA_LU_nopiv_opz_var1 ( int  m_A,
int  n_A,
dcomplex A,
int  rs_A,
int  cs_A 
)
357 {
358  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
359  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
360  int min_m_n = min( m_A, n_A );
361  int i;
362 
363  for ( i = 0; i < min_m_n; ++i )
364  {
365  dcomplex* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
366  dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
367  dcomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
368  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
369 
370  int mn_behind = i;
371 
372  /*------------------------------------------------------------*/
373 
374  // FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, A00, a01 );
378  mn_behind,
379  A00, rs_A, cs_A,
380  a01, rs_A );
381 
382  // FLA_Trsv_external( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, A00, a10t );
386  mn_behind,
387  A00, rs_A, cs_A,
388  a10t, cs_A );
389 
390  // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
392  mn_behind,
393  buff_m1,
394  a10t, cs_A,
395  a01, rs_A,
396  buff_1,
397  alpha11 );
398 
399  /*------------------------------------------------------------*/
400 
401  }
402 
403  if ( m_A > n_A )
404  {
405  dcomplex* ATL = buff_A;
406  dcomplex* ABL = buff_A + n_A*rs_A;
407 
408  // FLA_Trsm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR,
409  // FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
410  // FLA_ONE, ATL, ABL );
415  m_A - n_A,
416  n_A,
417  buff_1,
418  ATL, rs_A, cs_A,
419  ABL, rs_A, cs_A );
420  }
421  else if ( m_A < n_A )
422  {
423  dcomplex* ATL = buff_A;
424  dcomplex* ATR = buff_A + m_A*cs_A;
425 
426  // FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR,
427  // FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
428  // FLA_ONE, ATL, ATR );
433  m_A,
434  n_A - m_A,
435  buff_1,
436  ATL, rs_A, cs_A,
437  ATR, rs_A, cs_A );
438  }
439 
440  return FLA_SUCCESS;
441 }
void bl1_zdots(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *beta, dcomplex *rho)
Definition: bl1_dots.c:56
void bl1_ztrsm(side1_t side, uplo1_t uplo, trans1_t trans, diag1_t diag, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *b, int b_rs, int b_cs)
Definition: bl1_trsm.c:369
void bl1_ztrsv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx)
Definition: bl1_trsv.c:177

References bl1_zdots(), bl1_ztrsm(), bl1_ztrsv(), BLIS1_LEFT, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_NONUNIT_DIAG, BLIS1_RIGHT, BLIS1_TRANSPOSE, BLIS1_UNIT_DIAG, BLIS1_UPPER_TRIANGULAR, FLA_MINUS_ONE, FLA_ONE, and i.

Referenced by FLA_LU_nopiv_opt_var1().

◆ FLA_LU_nopiv_opz_var2()

FLA_Error FLA_LU_nopiv_opz_var2 ( int  m_A,
int  n_A,
dcomplex A,
int  rs_A,
int  cs_A 
)
321 {
322  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
323  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
324  int min_m_n = min( m_A, n_A );
325  int i;
326 
327  for ( i = 0; i < min_m_n; ++i )
328  {
329  dcomplex* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
330  dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
331  dcomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
332  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
333  dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
334  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
335 
336  int n_ahead = n_A - i - 1;
337  int mn_behind = i;
338 
339  /*------------------------------------------------------------*/
340 
341  // FLA_Trsv_external( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, A00, a10t );
345  mn_behind,
346  A00, rs_A, cs_A,
347  a10t, cs_A );
348 
349  // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
351  mn_behind,
352  buff_m1,
353  a10t, cs_A,
354  a01, rs_A,
355  buff_1,
356  alpha11 );
357 
358  // FLA_Gemv_external( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, a10t, FLA_ONE, a12t );
361  mn_behind,
362  n_ahead,
363  buff_m1,
364  A02, rs_A, cs_A,
365  a10t, cs_A,
366  buff_1,
367  a12t, cs_A );
368 
369  /*------------------------------------------------------------*/
370 
371  }
372 
373  if ( m_A > n_A )
374  {
375  dcomplex* ATL = buff_A;
376  dcomplex* ABL = buff_A + n_A*rs_A;
377 
378  // FLA_Trsm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR,
379  // FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
380  // FLA_ONE, ATL, ABL );
385  m_A - n_A,
386  n_A,
387  buff_1,
388  ATL, rs_A, cs_A,
389  ABL, rs_A, cs_A );
390  }
391 
392  return FLA_SUCCESS;
393 }
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_gemv.c:255

References bl1_zdots(), bl1_zgemv(), bl1_ztrsm(), bl1_ztrsv(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_NONUNIT_DIAG, BLIS1_RIGHT, BLIS1_TRANSPOSE, BLIS1_UPPER_TRIANGULAR, FLA_MINUS_ONE, FLA_ONE, and i.

Referenced by FLA_LU_nopiv_opt_var2().

◆ FLA_LU_nopiv_opz_var3()

FLA_Error FLA_LU_nopiv_opz_var3 ( int  m_A,
int  n_A,
dcomplex A,
int  rs_A,
int  cs_A 
)
339 {
340  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
341  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
342  int min_m_n = min( m_A, n_A );
343  int i;
344 
345  for ( i = 0; i < min_m_n; ++i )
346  {
347  dcomplex* A00 = buff_A + (0 )*cs_A + (0 )*rs_A;
348  dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
349  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
350  dcomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
351  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
352  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
353 
354  int m_ahead = m_A - i - 1;
355  int mn_behind = i;
356 
357  /*------------------------------------------------------------*/
358 
359  // FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, A00, a01 );
363  mn_behind,
364  A00, rs_A, cs_A,
365  a01, rs_A );
366 
367  // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
369  mn_behind,
370  buff_m1,
371  a10t, cs_A,
372  a01, rs_A,
373  buff_1,
374  alpha11 );
375 
376  // FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
379  m_ahead,
380  mn_behind,
381  buff_m1,
382  A20, rs_A, cs_A,
383  a01, rs_A,
384  buff_1,
385  a21, rs_A );
386 
387  // FLA_Inv_scal_external( alpha11, a21 );
389  m_ahead,
390  alpha11,
391  a21, rs_A );
392 
393  /*------------------------------------------------------------*/
394 
395  }
396 
397  if ( m_A < n_A )
398  {
399  dcomplex* ATL = buff_A;
400  dcomplex* ATR = buff_A + m_A*cs_A;
401 
402  // FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR,
403  // FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
404  // FLA_ONE, ATL, ATR );
409  m_A,
410  n_A - m_A,
411  buff_1,
412  ATL, rs_A, cs_A,
413  ATR, rs_A, cs_A );
414  }
415 
416  return FLA_SUCCESS;
417 }
void bl1_zinvscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition: bl1_invscalv.c:78

References bl1_zdots(), bl1_zgemv(), bl1_zinvscalv(), bl1_ztrsm(), bl1_ztrsv(), BLIS1_LEFT, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_UNIT_DIAG, FLA_MINUS_ONE, FLA_ONE, and i.

Referenced by FLA_LU_nopiv_opt_var3().

◆ FLA_LU_nopiv_opz_var4()

FLA_Error FLA_LU_nopiv_opz_var4 ( int  m_A,
int  n_A,
dcomplex A,
int  rs_A,
int  cs_A 
)
297 {
298  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
299  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
300  int min_m_n = min( m_A, n_A );
301  int i;
302 
303  for ( i = 0; i < min_m_n; ++i )
304  {
305  dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
306  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
307  dcomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
308  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
309  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
310  dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
311  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
312 
313  int m_ahead = m_A - i - 1;
314  int n_ahead = n_A - i - 1;
315  int mn_behind = i;
316 
317  /*------------------------------------------------------------*/
318 
319  // FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
321  mn_behind,
322  buff_m1,
323  a10t, cs_A,
324  a01, rs_A,
325  buff_1,
326  alpha11 );
327 
328  // FLA_Gemv_external( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, a10t, FLA_ONE, a12t );
331  mn_behind,
332  n_ahead,
333  buff_m1,
334  A02, rs_A, cs_A,
335  a10t, cs_A,
336  buff_1,
337  a12t, cs_A );
338 
339  // FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
342  m_ahead,
343  mn_behind,
344  buff_m1,
345  A20, rs_A, cs_A,
346  a01, rs_A,
347  buff_1,
348  a21, rs_A );
349 
350  // FLA_Inv_scal_external( alpha11, a21 );
352  m_ahead,
353  alpha11,
354  a21, rs_A );
355 
356  /*------------------------------------------------------------*/
357 
358  }
359 
360  return FLA_SUCCESS;
361 }

References bl1_zdots(), bl1_zgemv(), bl1_zinvscalv(), BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_TRANSPOSE, FLA_MINUS_ONE, FLA_ONE, and i.

Referenced by FLA_LU_nopiv_opt_var4().

◆ FLA_LU_nopiv_opz_var5()

FLA_Error FLA_LU_nopiv_opz_var5 ( int  m_A,
int  n_A,
dcomplex A,
int  rs_A,
int  cs_A 
)
217 {
218  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
219  int min_m_n = min( m_A, n_A );
220  int i;
221 
222  for ( i = 0; i < min_m_n; ++i )
223  {
224  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
225  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
226  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
227  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
228 
229  int m_ahead = m_A - i - 1;
230  int n_ahead = n_A - i - 1;
231 
232  /*------------------------------------------------------------*/
233 
234  // FLA_Inv_scal_external( alpha11, a21 );
236  m_ahead,
237  alpha11,
238  a21, rs_A );
239 
240  // FLA_Ger_external( FLA_MINUS_ONE, a21, a12t, A22 );
243  m_ahead,
244  n_ahead,
245  buff_m1,
246  a21, rs_A,
247  a12t, cs_A,
248  A22, rs_A, cs_A );
249 
250  /*------------------------------------------------------------*/
251 
252  }
253 
254  return FLA_SUCCESS;
255 }
void bl1_zger(conj1_t conjx, conj1_t conjy, int m, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_ger.c:194

References bl1_zger(), bl1_zinvscalv(), BLIS1_NO_CONJUGATE, FLA_MINUS_ONE, and i.

Referenced by FLA_LU_nopiv_opt_var5().

◆ FLA_LU_nopiv_unb_var1()

FLA_Error FLA_LU_nopiv_unb_var1 ( FLA_Obj  A)
16 {
17  FLA_Obj ATL, ATR, A00, a01, A02,
18  ABL, ABR, a10t, alpha11, a12t,
19  A20, a21, A22;
20 
21  FLA_Part_2x2( A, &ATL, &ATR,
22  &ABL, &ABR, 0, 0, FLA_TL );
23 
24  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) &&
25  FLA_Obj_width( ATL ) < FLA_Obj_width( A )){
26 
27  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
28  /* ************* */ /* ************************** */
29  &a10t, /**/ &alpha11, &a12t,
30  ABL, /**/ ABR, &A20, /**/ &a21, &A22,
31  1, 1, FLA_BR );
32 
33  /*------------------------------------------------------------*/
34 
35  // a01 = trilu( A00 ) \ a01
36  FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, A00, a01 );
37 
38  // a10t = a10t / triu( A00 )
39  FLA_Trsv_external( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, A00, a10t );
40 
41  // alpha11 = alpha11 - a10t * a01
42  FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
43 
44  /*------------------------------------------------------------*/
45 
46  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
47  a10t, alpha11, /**/ a12t,
48  /* ************** */ /* ************************ */
49  &ABL, /**/ &ABR, A20, a21, /**/ A22,
50  FLA_TL );
51 
52  }
53 
54  if ( FLA_Obj_length( ABL ) > 0 )
55  // ABL = ABL / triu( ATL )
56  FLA_Trsm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR,
57  FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
58  FLA_ONE, ATL, ABL );
59  else if ( FLA_Obj_width( ATR ) > 0 )
60  // ATR = trilu( ATL ) \ ATR
61  FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR,
62  FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
63  FLA_ONE, ATL, ATR );
64 
65  return FLA_SUCCESS;
66 }
FLA_Error FLA_Dots_external(FLA_Obj alpha, FLA_Obj x, FLA_Obj y, FLA_Obj beta, FLA_Obj rho)
Definition: FLA_Dots_external.c:13
FLA_Error FLA_Trsv_external(FLA_Uplo uplo, FLA_Trans transa, FLA_Diag diag, FLA_Obj A, FLA_Obj x)
Definition: FLA_Trsv_external.c:13

References FLA_Cont_with_3x3_to_2x2(), FLA_Dots_external(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), FLA_Trsm_external(), and FLA_Trsv_external().

Referenced by FLA_LU_nopiv_internal().

◆ FLA_LU_nopiv_unb_var2()

FLA_Error FLA_LU_nopiv_unb_var2 ( FLA_Obj  A)
16 {
17  FLA_Obj ATL, ATR, A00, a01, A02,
18  ABL, ABR, a10t, alpha11, a12t,
19  A20, a21, A22;
20 
21  FLA_Part_2x2( A, &ATL, &ATR,
22  &ABL, &ABR, 0, 0, FLA_TL );
23 
24  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) &&
25  FLA_Obj_width( ATL ) < FLA_Obj_width( A )){
26 
27  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
28  /* ************* */ /* ************************** */
29  &a10t, /**/ &alpha11, &a12t,
30  ABL, /**/ ABR, &A20, /**/ &a21, &A22,
31  1, 1, FLA_BR );
32 
33  /*------------------------------------------------------------*/
34 
35  // a10t = a10t / triu( A00 )
36  FLA_Trsv_external( FLA_UPPER_TRIANGULAR, FLA_TRANSPOSE, FLA_NONUNIT_DIAG, A00, a10t );
37 
38  // alpha11 = alpha11 - a10t * a01
39  FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
40 
41  // a12t = a12t - a10t * A02
42  FLA_Gemv_external( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, a10t, FLA_ONE, a12t );
43 
44  /*------------------------------------------------------------*/
45 
46  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
47  a10t, alpha11, /**/ a12t,
48  /* ************** */ /* ************************ */
49  &ABL, /**/ &ABR, A20, a21, /**/ A22,
50  FLA_TL );
51 
52  }
53 
54  if ( FLA_Obj_length( ABL ) > 0 )
55  // ABL = ABL / triu( ATL )
56  FLA_Trsm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR,
57  FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
58  FLA_ONE, ATL, ABL );
59 
60  return FLA_SUCCESS;
61 }
FLA_Error FLA_Gemv_external(FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y)
Definition: FLA_Gemv_external.c:13

References FLA_Cont_with_3x3_to_2x2(), FLA_Dots_external(), FLA_Gemv_external(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), FLA_Trsm_external(), and FLA_Trsv_external().

Referenced by FLA_LU_nopiv_internal().

◆ FLA_LU_nopiv_unb_var3()

FLA_Error FLA_LU_nopiv_unb_var3 ( FLA_Obj  A)
16 {
17  FLA_Obj ATL, ATR, A00, a01, A02,
18  ABL, ABR, a10t, alpha11, a12t,
19  A20, a21, A22;
20 
21  FLA_Part_2x2( A, &ATL, &ATR,
22  &ABL, &ABR, 0, 0, FLA_TL );
23 
24  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) &&
25  FLA_Obj_width( ATL ) < FLA_Obj_width( A )){
26 
27  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
28  /* ************* */ /* ************************** */
29  &a10t, /**/ &alpha11, &a12t,
30  ABL, /**/ ABR, &A20, /**/ &a21, &A22,
31  1, 1, FLA_BR );
32 
33  /*------------------------------------------------------------*/
34 
35  // a01 = trilu( A00 ) \ a10
36  FLA_Trsv_external( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_UNIT_DIAG, A00, a01 );
37 
38  // alpha11 = alpha11 - a10t * a01
39  FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
40 
41  // a21 = a21 - A20 * a01
42  FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
43 
44  // a21 = a21 / alpha11
45  FLA_Inv_scal_external( alpha11, a21 );
46 
47  /*------------------------------------------------------------*/
48 
49  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
50  a10t, alpha11, /**/ a12t,
51  /* ************** */ /* ************************ */
52  &ABL, /**/ &ABR, A20, a21, /**/ A22,
53  FLA_TL );
54 
55  }
56 
57  if ( FLA_Obj_width( ATR ) > 0 )
58  // ATR = trilu( ATL ) \ ATR
59  FLA_Trsm_external( FLA_LEFT, FLA_LOWER_TRIANGULAR,
60  FLA_NO_TRANSPOSE, FLA_UNIT_DIAG,
61  FLA_ONE, ATL, ATR );
62 
63  return FLA_SUCCESS;
64 }
FLA_Error FLA_Inv_scal_external(FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Inv_scal_external.c:13

References FLA_Cont_with_3x3_to_2x2(), FLA_Dots_external(), FLA_Gemv_external(), FLA_Inv_scal_external(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), FLA_Trsm_external(), and FLA_Trsv_external().

Referenced by FLA_LU_nopiv_internal().

◆ FLA_LU_nopiv_unb_var4()

FLA_Error FLA_LU_nopiv_unb_var4 ( FLA_Obj  A)
16 {
17  FLA_Obj ATL, ATR, A00, a01, A02,
18  ABL, ABR, a10t, alpha11, a12t,
19  A20, a21, A22;
20 
21  FLA_Part_2x2( A, &ATL, &ATR,
22  &ABL, &ABR, 0, 0, FLA_TL );
23 
24  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) &&
25  FLA_Obj_width( ATL ) < FLA_Obj_width( A )){
26 
27  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
28  /* ************* */ /* ************************** */
29  &a10t, /**/ &alpha11, &a12t,
30  ABL, /**/ ABR, &A20, /**/ &a21, &A22,
31  1, 1, FLA_BR );
32 
33  /*------------------------------------------------------------*/
34 
35  // alpha11 = alpha11 - a10t * a01
36  FLA_Dots_external( FLA_MINUS_ONE, a10t, a01, FLA_ONE, alpha11 );
37 
38  // a12t = a12t - a10t * A02
39  FLA_Gemv_external( FLA_TRANSPOSE, FLA_MINUS_ONE, A02, a10t, FLA_ONE, a12t );
40 
41  // a21 = a21 - A20 * a01
42  FLA_Gemv_external( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, a01, FLA_ONE, a21 );
43 
44  // a21 = a21 / alpha11
45  FLA_Inv_scal_external( alpha11, a21 );
46 
47  /*------------------------------------------------------------*/
48 
49  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
50  a10t, alpha11, /**/ a12t,
51  /* ************** */ /* ************************ */
52  &ABL, /**/ &ABR, A20, a21, /**/ A22,
53  FLA_TL );
54 
55  }
56 
57  return FLA_SUCCESS;
58 }

References FLA_Cont_with_3x3_to_2x2(), FLA_Dots_external(), FLA_Gemv_external(), FLA_Inv_scal_external(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, FLA_Part_2x2(), and FLA_Repart_2x2_to_3x3().

Referenced by FLA_LU_nopiv_internal().

◆ FLA_LU_nopiv_unb_var5()

FLA_Error FLA_LU_nopiv_unb_var5 ( FLA_Obj  A)
16 {
17  FLA_Obj ATL, ATR, A00, a01, A02,
18  ABL, ABR, a10t, alpha11, a12t,
19  A20, a21, A22;
20 
21  FLA_Part_2x2( A, &ATL, &ATR,
22  &ABL, &ABR, 0, 0, FLA_TL );
23 
24  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) &&
25  FLA_Obj_width( ATL ) < FLA_Obj_width( A )){
26 
27  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
28  /* ************* */ /* ************************** */
29  &a10t, /**/ &alpha11, &a12t,
30  ABL, /**/ ABR, &A20, /**/ &a21, &A22,
31  1, 1, FLA_BR );
32 
33  /*------------------------------------------------------------*/
34 
35  // a21 = a21 / alpha11
36  FLA_Inv_scal_external( alpha11, a21 );
37 
38  // A22 = A22 - a21 * a12t
39  FLA_Ger_external( FLA_MINUS_ONE, a21, a12t, A22 );
40 
41  /*------------------------------------------------------------*/
42 
43  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
44  a10t, alpha11, /**/ a12t,
45  /* ************** */ /* ************************ */
46  &ABL, /**/ &ABR, A20, a21, /**/ A22,
47  FLA_TL );
48 
49  }
50 
51  return FLA_SUCCESS;
52 }
FLA_Error FLA_Ger_external(FLA_Obj alpha, FLA_Obj x, FLA_Obj y, FLA_Obj A)
Definition: FLA_Ger_external.c:13

References FLA_Cont_with_3x3_to_2x2(), FLA_Ger_external(), FLA_Inv_scal_external(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_Obj_width(), FLA_Part_2x2(), and FLA_Repart_2x2_to_3x3().

Referenced by FLA_LU_nopiv_internal().