libflame  revision_anchor
Functions
FLA_Sylv_hh.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLA_Sylv_hh_blk_var1 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
 
FLA_Error FLA_Sylv_hh_blk_var2 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
 
FLA_Error FLA_Sylv_hh_blk_var3 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
 
FLA_Error FLA_Sylv_hh_blk_var4 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
 
FLA_Error FLA_Sylv_hh_blk_var5 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
 
FLA_Error FLA_Sylv_hh_blk_var6 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
 
FLA_Error FLA_Sylv_hh_blk_var7 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
 
FLA_Error FLA_Sylv_hh_blk_var8 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
 
FLA_Error FLA_Sylv_hh_blk_var9 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
 
FLA_Error FLA_Sylv_hh_blk_var10 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
 
FLA_Error FLA_Sylv_hh_blk_var11 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
 
FLA_Error FLA_Sylv_hh_blk_var12 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
 
FLA_Error FLA_Sylv_hh_blk_var13 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
 
FLA_Error FLA_Sylv_hh_blk_var14 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
 
FLA_Error FLA_Sylv_hh_blk_var15 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
 
FLA_Error FLA_Sylv_hh_blk_var16 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
 
FLA_Error FLA_Sylv_hh_blk_var17 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
 
FLA_Error FLA_Sylv_hh_blk_var18 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
 
FLA_Error FLA_Sylv_hh_opt_var1 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale)
 
FLA_Error FLA_Sylv_hh_opt_var2 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale)
 
FLA_Error FLA_Sylv_hh_opt_var3 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale)
 
FLA_Error FLA_Sylv_hh_opt_var4 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale)
 
FLA_Error FLA_Sylv_hh_opt_var5 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale)
 
FLA_Error FLA_Sylv_hh_opt_var6 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale)
 
FLA_Error FLA_Sylv_hh_opt_var7 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale)
 
FLA_Error FLA_Sylv_hh_opt_var8 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale)
 
FLA_Error FLA_Sylv_hh_opt_var9 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale)
 
FLA_Error FLA_Sylv_hh_opt_var10 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale)
 
FLA_Error FLA_Sylv_hh_opt_var11 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale)
 
FLA_Error FLA_Sylv_hh_opt_var12 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale)
 
FLA_Error FLA_Sylv_hh_opt_var13 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale)
 
FLA_Error FLA_Sylv_hh_opt_var14 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale)
 
FLA_Error FLA_Sylv_hh_opt_var15 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale)
 
FLA_Error FLA_Sylv_hh_opt_var16 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale)
 
FLA_Error FLA_Sylv_hh_opt_var17 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale)
 
FLA_Error FLA_Sylv_hh_opt_var18 (FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale)
 
FLA_Error FLA_Sylv_hh_ops_var1 (float sgn, int m_C, int n_C, float *buff_A, int rs_A, int cs_A, float *buff_B, int rs_B, int cs_B, float *buff_C, int rs_C, int cs_C, float *buff_scale, int *info)
 
FLA_Error FLA_Sylv_hh_opd_var1 (double sgn, int m_C, int n_C, double *buff_A, int rs_A, int cs_A, double *buff_B, int rs_B, int cs_B, double *buff_C, int rs_C, int cs_C, double *buff_scale, int *info)
 
FLA_Error FLA_Sylv_hh_opc_var1 (float sgn, int m_C, int n_C, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_B, int rs_B, int cs_B, scomplex *buff_C, int rs_C, int cs_C, scomplex *buff_scale, int *info)
 
FLA_Error FLA_Sylv_hh_opz_var1 (double sgn, int m_C, int n_C, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_B, int rs_B, int cs_B, dcomplex *buff_C, int rs_C, int cs_C, dcomplex *buff_scale, int *info)
 

Function Documentation

◆ FLA_Sylv_hh_blk_var1()

FLA_Error FLA_Sylv_hh_blk_var1 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale,
fla_sylv_t cntl 
)
16 {
17  FLA_Obj ATL, ATR, A00, A01, A02,
18  ABL, ABR, A10, A11, A12,
19  A20, A21, A22;
20 
21  FLA_Obj BTL, BTR, B00, B01, B02,
22  BBL, BBR, B10, B11, B12,
23  B20, B21, B22;
24 
25  FLA_Obj CTL, CTR, C00, C01, C02,
26  CBL, CBR, C10, C11, C12,
27  C20, C21, C22;
28 
29  dim_t b;
30 
31  FLA_Part_2x2( A, &ATL, &ATR,
32  &ABL, &ABR, 0, 0, FLA_TL );
33 
34  FLA_Part_2x2( B, &BTL, &BTR,
35  &BBL, &BBR, 0, 0, FLA_BR );
36 
37  FLA_Part_2x2( C, &CTL, &CTR,
38  &CBL, &CBR, 0, 0, FLA_TR );
39 
40  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
41 
42  b = FLA_Determine_blocksize( CBL, FLA_BL, FLA_Cntl_blocksize( cntl ) );
43 
44  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
45  /* ************* */ /* ******************** */
46  &A10, /**/ &A11, &A12,
47  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
48  b, b, FLA_BR );
49 
50  FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, &B01, /**/ &B02,
51  &B10, &B11, /**/ &B12,
52  /* ************* */ /* ******************** */
53  BBL, /**/ BBR, &B20, &B21, /**/ &B22,
54  b, b, FLA_TL );
55 
56  FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &C01, /**/ &C02,
57  /* ************* */ /* ******************** */
58  &C10, &C11, /**/ &C12,
59  CBL, /**/ CBR, &C20, &C21, /**/ &C22,
60  b, b, FLA_BL );
61 
62  // Loop Invariant:
63  // CTL =
64  // CTR =
65  // CBL =
66  // CBR =
67 
68  /*------------------------------------------------------------*/
69 
70  // C12 = sylv( A11', B22', C12 - A01' * C02 );
71  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
72  FLA_MINUS_ONE, A01, C02, FLA_ONE, C12,
73  FLA_Cntl_sub_gemm1( cntl ) );
74 
75  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
76  isgn, A11, B22, C12, scale,
77  FLA_Cntl_sub_sylv1( cntl ) );
78 
79  // C01 = sylv( A00', B11', C01 -/+ C02 * B12' );
80  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
81  FLA_NEGATE( isgn ), C02, B12, FLA_ONE, C01,
82  FLA_Cntl_sub_gemm2( cntl ) );
83 
84  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
85  isgn, A00, B11, C01, scale,
86  FLA_Cntl_sub_sylv2( cntl ) );
87 
88  // C11 = sylv( A11', B11', C11 - A01' * C01 -/+ C12 * B12' );
89  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
90  FLA_NEGATE( isgn ), C12, B12, FLA_ONE, C11,
91  FLA_Cntl_sub_gemm3( cntl ) );
92 
93  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
94  FLA_MINUS_ONE, A01, C01, FLA_ONE, C11,
95  FLA_Cntl_sub_gemm4( cntl ) );
96 
97  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
98  isgn, A11, B11, C11, scale,
99  FLA_Cntl_sub_sylv3( cntl ) );
100 
101  /*------------------------------------------------------------*/
102 
103  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
104  A10, A11, /**/ A12,
105  /* ************** */ /* ****************** */
106  &ABL, /**/ &ABR, A20, A21, /**/ A22,
107  FLA_TL );
108 
109  FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, /**/ B01, B02,
110  /* ************** */ /* ****************** */
111  B10, /**/ B11, B12,
112  &BBL, /**/ &BBR, B20, /**/ B21, B22,
113  FLA_BR );
114 
115  FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ C01, C02,
116  C10, /**/ C11, C12,
117  /* ************** */ /* ****************** */
118  &CBL, /**/ &CBR, C20, /**/ C21, C22,
119  FLA_TR );
120 
121  }
122 
123  return FLA_SUCCESS;
124 }
FLA_Error FLA_Gemm_internal(FLA_Trans transa, FLA_Trans transb, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_gemm_t *cntl)
Definition: FLA_Gemm_internal.c:16
FLA_Error FLA_Sylv_internal(FLA_Trans transa, FLA_Trans transb, FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale, fla_sylv_t *cntl)
Definition: FLA_Sylv_internal.c:16
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition: FLA_View.c:304
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:17
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:142
dim_t FLA_Determine_blocksize(FLA_Obj A_unproc, FLA_Quadrant to_dir, fla_blocksize_t *cntl_blocksizes)
Definition: FLA_Blocksize.c:234
unsigned long dim_t
Definition: FLA_type_defs.h:71
Definition: FLA_type_defs.h:159

References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().

Referenced by FLA_Sylv_hh().

◆ FLA_Sylv_hh_blk_var10()

FLA_Error FLA_Sylv_hh_blk_var10 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale,
fla_sylv_t cntl 
)
16 {
17  FLA_Obj ATL, ATR, A00, A01, A02,
18  ABL, ABR, A10, A11, A12,
19  A20, A21, A22;
20 
21  FLA_Obj BTL, BTR, B00, B01, B02,
22  BBL, BBR, B10, B11, B12,
23  B20, B21, B22;
24 
25  FLA_Obj CTL, CTR, C00, C01, C02,
26  CBL, CBR, C10, C11, C12,
27  C20, C21, C22;
28 
29  dim_t b;
30 
31  FLA_Part_2x2( A, &ATL, &ATR,
32  &ABL, &ABR, 0, 0, FLA_TL );
33 
34  FLA_Part_2x2( B, &BTL, &BTR,
35  &BBL, &BBR, 0, 0, FLA_BR );
36 
37  FLA_Part_2x2( C, &CTL, &CTR,
38  &CBL, &CBR, 0, 0, FLA_TR );
39 
40  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
41 
42  b = FLA_Determine_blocksize( CBL, FLA_BL, FLA_Cntl_blocksize( cntl ) );
43 
44  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
45  /* ************* */ /* ******************** */
46  &A10, /**/ &A11, &A12,
47  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
48  b, b, FLA_BR );
49 
50  FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, &B01, /**/ &B02,
51  &B10, &B11, /**/ &B12,
52  /* ************* */ /* ******************** */
53  BBL, /**/ BBR, &B20, &B21, /**/ &B22,
54  b, b, FLA_TL );
55 
56  FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &C01, /**/ &C02,
57  /* ************* */ /* ******************** */
58  &C10, &C11, /**/ &C12,
59  CBL, /**/ CBR, &C20, &C21, /**/ &C22,
60  b, b, FLA_BL );
61 
62  // Loop Invariant:
63  // CTL =
64  // CTR =
65  // CBL =
66  // CBR =
67 
68  /*------------------------------------------------------------*/
69 
70  // C01 = sylv( A00', B11', C01 );
71  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
72  isgn, A00, B11, C01, scale,
73  FLA_Cntl_sub_sylv1( cntl ) );
74 
75  // C11 = sylv( A11', B11', C11 - A01' * C01 );
76  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
77  FLA_MINUS_ONE, A01, C01, FLA_ONE, C11,
78  FLA_Cntl_sub_gemm1( cntl ) );
79 
80  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
81  isgn, A11, B11, C11, scale,
82  FLA_Cntl_sub_sylv2( cntl ) );
83 
84  // C21 = sylv( A22', B11', C21 - A12' * C11 - A02' * C01 );
85  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
86  FLA_MINUS_ONE, A02, C01, FLA_ONE, C21,
87  FLA_Cntl_sub_gemm2( cntl ) );
88 
89  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
90  FLA_MINUS_ONE, A12, C11, FLA_ONE, C21,
91  FLA_Cntl_sub_gemm3( cntl ) );
92 
93  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
94  isgn, A22, B11, C21, scale,
95  FLA_Cntl_sub_sylv3( cntl ) );
96 
97  // C20 = C20 -/+ C21 * B01';
98  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
99  FLA_NEGATE( isgn ), C21, B01, FLA_ONE, C20,
100  FLA_Cntl_sub_gemm4( cntl ) );
101 
102  // C10 = C10 -/+ C11 * B01';
103  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
104  FLA_NEGATE( isgn ), C11, B01, FLA_ONE, C10,
105  FLA_Cntl_sub_gemm5( cntl ) );
106 
107  // C00 = C00 -/+ C01 * B01';
108  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
109  FLA_NEGATE( isgn ), C01, B01, FLA_ONE, C00,
110  FLA_Cntl_sub_gemm6( cntl ) );
111 
112  /*------------------------------------------------------------*/
113 
114  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
115  A10, A11, /**/ A12,
116  /* ************** */ /* ****************** */
117  &ABL, /**/ &ABR, A20, A21, /**/ A22,
118  FLA_TL );
119 
120  FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, /**/ B01, B02,
121  /* ************** */ /* ****************** */
122  B10, /**/ B11, B12,
123  &BBL, /**/ &BBR, B20, /**/ B21, B22,
124  FLA_BR );
125 
126  FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ C01, C02,
127  C10, /**/ C11, C12,
128  /* ************** */ /* ****************** */
129  &CBL, /**/ &CBR, C20, /**/ C21, C22,
130  FLA_TR );
131 
132  }
133 
134  return FLA_SUCCESS;
135 }

References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().

Referenced by FLA_Sylv_hh().

◆ FLA_Sylv_hh_blk_var11()

FLA_Error FLA_Sylv_hh_blk_var11 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale,
fla_sylv_t cntl 
)
16 {
17  FLA_Obj ATL, ATR, A00, A01, A02,
18  ABL, ABR, A10, A11, A12,
19  A20, A21, A22;
20 
21  FLA_Obj BTL, BTR, B00, B01, B02,
22  BBL, BBR, B10, B11, B12,
23  B20, B21, B22;
24 
25  FLA_Obj CTL, CTR, C00, C01, C02,
26  CBL, CBR, C10, C11, C12,
27  C20, C21, C22;
28 
29  dim_t b;
30 
31  FLA_Part_2x2( A, &ATL, &ATR,
32  &ABL, &ABR, 0, 0, FLA_TL );
33 
34  FLA_Part_2x2( B, &BTL, &BTR,
35  &BBL, &BBR, 0, 0, FLA_BR );
36 
37  FLA_Part_2x2( C, &CTL, &CTR,
38  &CBL, &CBR, 0, 0, FLA_TR );
39 
40  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
41 
42  b = FLA_Determine_blocksize( CBL, FLA_BL, FLA_Cntl_blocksize( cntl ) );
43 
44  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
45  /* ************* */ /* ******************** */
46  &A10, /**/ &A11, &A12,
47  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
48  b, b, FLA_BR );
49 
50  FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, &B01, /**/ &B02,
51  &B10, &B11, /**/ &B12,
52  /* ************* */ /* ******************** */
53  BBL, /**/ BBR, &B20, &B21, /**/ &B22,
54  b, b, FLA_TL );
55 
56  FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &C01, /**/ &C02,
57  /* ************* */ /* ******************** */
58  &C10, &C11, /**/ &C12,
59  CBL, /**/ CBR, &C20, &C21, /**/ &C22,
60  b, b, FLA_BL );
61 
62  // Loop Invariant:
63  // CTL =
64  // CTR =
65  // CBL =
66  // CBR =
67 
68  /*------------------------------------------------------------*/
69 
70  // C12 = sylv( A11', B22', C12 );
71  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
72  isgn, A11, B22, C12, scale,
73  FLA_Cntl_sub_sylv1( cntl ) );
74 
75  // C22 = C22 - A12' * C12;
76  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
77  FLA_MINUS_ONE, A12, C12, FLA_ONE, C22,
78  FLA_Cntl_sub_gemm1( cntl ) );
79 
80  // C11 = sylv( A11', B11', C11 -/+ C12 * B12' );
81  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
82  FLA_NEGATE( isgn ), C12, B12, FLA_ONE, C11,
83  FLA_Cntl_sub_gemm2( cntl ) );
84 
85  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
86  isgn, A11, B11, C11, scale,
87  FLA_Cntl_sub_sylv2( cntl ) );
88 
89  // C21 = C21 - A12' * C11;
90  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
91  FLA_MINUS_ONE, A12, C11, FLA_ONE, C21,
92  FLA_Cntl_sub_gemm3( cntl ) );
93 
94  // C10 = sylv( A11', B00', C10 -/+ C12 * B02' -/+ C11 * B01' );
95  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
96  FLA_NEGATE( isgn ), C11, B01, FLA_ONE, C10,
97  FLA_Cntl_sub_gemm4( cntl ) );
98 
99  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
100  FLA_NEGATE( isgn ), C12, B02, FLA_ONE, C10,
101  FLA_Cntl_sub_gemm5( cntl ) );
102 
103  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
104  isgn, A11, B00, C10, scale,
105  FLA_Cntl_sub_sylv3( cntl ) );
106 
107  // C20 = C20 - A12' * C10;
108  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
109  FLA_MINUS_ONE, A12, C10, FLA_ONE, C20,
110  FLA_Cntl_sub_gemm6( cntl ) );
111 
112  /*------------------------------------------------------------*/
113 
114  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
115  A10, A11, /**/ A12,
116  /* ************** */ /* ****************** */
117  &ABL, /**/ &ABR, A20, A21, /**/ A22,
118  FLA_TL );
119 
120  FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, /**/ B01, B02,
121  /* ************** */ /* ****************** */
122  B10, /**/ B11, B12,
123  &BBL, /**/ &BBR, B20, /**/ B21, B22,
124  FLA_BR );
125 
126  FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ C01, C02,
127  C10, /**/ C11, C12,
128  /* ************** */ /* ****************** */
129  &CBL, /**/ &CBR, C20, /**/ C21, C22,
130  FLA_TR );
131 
132  }
133 
134  return FLA_SUCCESS;
135 }

References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().

Referenced by FLA_Sylv_hh().

◆ FLA_Sylv_hh_blk_var12()

FLA_Error FLA_Sylv_hh_blk_var12 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale,
fla_sylv_t cntl 
)
16 {
17  FLA_Obj ATL, ATR, A00, A01, A02,
18  ABL, ABR, A10, A11, A12,
19  A20, A21, A22;
20 
21  FLA_Obj BTL, BTR, B00, B01, B02,
22  BBL, BBR, B10, B11, B12,
23  B20, B21, B22;
24 
25  FLA_Obj CTL, CTR, C00, C01, C02,
26  CBL, CBR, C10, C11, C12,
27  C20, C21, C22;
28 
29  dim_t b;
30 
31  FLA_Part_2x2( A, &ATL, &ATR,
32  &ABL, &ABR, 0, 0, FLA_TL );
33 
34  FLA_Part_2x2( B, &BTL, &BTR,
35  &BBL, &BBR, 0, 0, FLA_BR );
36 
37  FLA_Part_2x2( C, &CTL, &CTR,
38  &CBL, &CBR, 0, 0, FLA_TR );
39 
40  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
41 
42  b = FLA_Determine_blocksize( CBL, FLA_BL, FLA_Cntl_blocksize( cntl ) );
43 
44  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
45  /* ************* */ /* ******************** */
46  &A10, /**/ &A11, &A12,
47  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
48  b, b, FLA_BR );
49 
50  FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, &B01, /**/ &B02,
51  &B10, &B11, /**/ &B12,
52  /* ************* */ /* ******************** */
53  BBL, /**/ BBR, &B20, &B21, /**/ &B22,
54  b, b, FLA_TL );
55 
56  FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &C01, /**/ &C02,
57  /* ************* */ /* ******************** */
58  &C10, &C11, /**/ &C12,
59  CBL, /**/ CBR, &C20, &C21, /**/ &C22,
60  b, b, FLA_BL );
61 
62  // Loop Invariant:
63  // CTL =
64  // CTR =
65  // CBL =
66  // CBR =
67 
68  /*------------------------------------------------------------*/
69 
70  // C11 = sylv( A11', B11', C11 - A01' * C01 );
71  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
72  FLA_MINUS_ONE, A01, C01, FLA_ONE, C11,
73  FLA_Cntl_sub_gemm1( cntl ) );
74 
75  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
76  isgn, A11, B11, C11, scale,
77  FLA_Cntl_sub_sylv1( cntl ) );
78 
79  // C21 = sylv( A22', B11', C21 - A12' * C11 - A02' * C01 );
80  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
81  FLA_MINUS_ONE, A02, C01, FLA_ONE, C21,
82  FLA_Cntl_sub_gemm2( cntl ) );
83 
84  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
85  FLA_MINUS_ONE, A12, C11, FLA_ONE, C21,
86  FLA_Cntl_sub_gemm3( cntl ) );
87 
88  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
89  isgn, A22, B11, C21, scale,
90  FLA_Cntl_sub_sylv2( cntl ) );
91 
92  // C20 = C20 -/+ C21 * B01';
93  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
94  FLA_NEGATE( isgn ), C21, B01, FLA_ONE, C20,
95  FLA_Cntl_sub_gemm4( cntl ) );
96 
97  // C10 = sylv( A11', B00', C10 - A01' * C00 -/+ C11 * B01' );
98  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
99  FLA_NEGATE( isgn ), C11, B01, FLA_ONE, C10,
100  FLA_Cntl_sub_gemm5( cntl ) );
101 
102  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
103  FLA_MINUS_ONE, A01, C00, FLA_ONE, C10,
104  FLA_Cntl_sub_gemm6( cntl ) );
105 
106  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
107  isgn, A11, B00, C10, scale,
108  FLA_Cntl_sub_sylv3( cntl ) );
109 
110  /*------------------------------------------------------------*/
111 
112  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
113  A10, A11, /**/ A12,
114  /* ************** */ /* ****************** */
115  &ABL, /**/ &ABR, A20, A21, /**/ A22,
116  FLA_TL );
117 
118  FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, /**/ B01, B02,
119  /* ************** */ /* ****************** */
120  B10, /**/ B11, B12,
121  &BBL, /**/ &BBR, B20, /**/ B21, B22,
122  FLA_BR );
123 
124  FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ C01, C02,
125  C10, /**/ C11, C12,
126  /* ************** */ /* ****************** */
127  &CBL, /**/ &CBR, C20, /**/ C21, C22,
128  FLA_TR );
129 
130  }
131 
132  return FLA_SUCCESS;
133 }

References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().

Referenced by FLA_Sylv_hh().

◆ FLA_Sylv_hh_blk_var13()

FLA_Error FLA_Sylv_hh_blk_var13 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale,
fla_sylv_t cntl 
)
16 {
17  FLA_Obj ATL, ATR, A00, A01, A02,
18  ABL, ABR, A10, A11, A12,
19  A20, A21, A22;
20 
21  FLA_Obj BTL, BTR, B00, B01, B02,
22  BBL, BBR, B10, B11, B12,
23  B20, B21, B22;
24 
25  FLA_Obj CTL, CTR, C00, C01, C02,
26  CBL, CBR, C10, C11, C12,
27  C20, C21, C22;
28 
29  dim_t b;
30 
31  FLA_Part_2x2( A, &ATL, &ATR,
32  &ABL, &ABR, 0, 0, FLA_TL );
33 
34  FLA_Part_2x2( B, &BTL, &BTR,
35  &BBL, &BBR, 0, 0, FLA_BR );
36 
37  FLA_Part_2x2( C, &CTL, &CTR,
38  &CBL, &CBR, 0, 0, FLA_TR );
39 
40  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
41 
42  b = FLA_Determine_blocksize( CBL, FLA_BL, FLA_Cntl_blocksize( cntl ) );
43 
44  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
45  /* ************* */ /* ******************** */
46  &A10, /**/ &A11, &A12,
47  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
48  b, b, FLA_BR );
49 
50  FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, &B01, /**/ &B02,
51  &B10, &B11, /**/ &B12,
52  /* ************* */ /* ******************** */
53  BBL, /**/ BBR, &B20, &B21, /**/ &B22,
54  b, b, FLA_TL );
55 
56  FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &C01, /**/ &C02,
57  /* ************* */ /* ******************** */
58  &C10, &C11, /**/ &C12,
59  CBL, /**/ CBR, &C20, &C21, /**/ &C22,
60  b, b, FLA_BL );
61 
62  // Loop Invariant:
63  // CTL =
64  // CTR =
65  // CBL =
66  // CBR =
67 
68  /*------------------------------------------------------------*/
69 
70  // C11 = sylv( A11', B11', C11 -/+ C12 * B12' );
71  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
72  FLA_NEGATE( isgn ), C12, B12, FLA_ONE, C11,
73  FLA_Cntl_sub_gemm1( cntl ) );
74 
75  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
76  isgn, A11, B11, C11, scale,
77  FLA_Cntl_sub_sylv1( cntl ) );
78 
79  // C21 = sylv( A22', B11', C21 - A12' * C11 -/+ C22 * B12' );
80  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
81  FLA_NEGATE( isgn ), C22, B12, FLA_ONE, C21,
82  FLA_Cntl_sub_gemm2( cntl ) );
83 
84  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
85  FLA_MINUS_ONE, A12, C11, FLA_ONE, C21,
86  FLA_Cntl_sub_gemm3( cntl ) );
87 
88  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
89  isgn, A22, B11, C21, scale,
90  FLA_Cntl_sub_sylv2( cntl ) );
91 
92  // C10 = sylv( A11', B00', C10 -/+ C12 * B02' -/+ C11 * B01' );
93  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
94  FLA_NEGATE( isgn ), C11, B01, FLA_ONE, C10,
95  FLA_Cntl_sub_gemm4( cntl ) );
96 
97  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
98  FLA_NEGATE( isgn ), C12, B02, FLA_ONE, C10,
99  FLA_Cntl_sub_gemm5( cntl ) );
100 
101  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
102  isgn, A11, B00, C10, scale,
103  FLA_Cntl_sub_sylv3( cntl ) );
104 
105  // C20 = C20 - A12' * C10;
106  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
107  FLA_MINUS_ONE, A12, C10, FLA_ONE, C20,
108  FLA_Cntl_sub_gemm6( cntl ) );
109 
110  /*------------------------------------------------------------*/
111 
112  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
113  A10, A11, /**/ A12,
114  /* ************** */ /* ****************** */
115  &ABL, /**/ &ABR, A20, A21, /**/ A22,
116  FLA_TL );
117 
118  FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, /**/ B01, B02,
119  /* ************** */ /* ****************** */
120  B10, /**/ B11, B12,
121  &BBL, /**/ &BBR, B20, /**/ B21, B22,
122  FLA_BR );
123 
124  FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ C01, C02,
125  C10, /**/ C11, C12,
126  /* ************** */ /* ****************** */
127  &CBL, /**/ &CBR, C20, /**/ C21, C22,
128  FLA_TR );
129 
130  }
131 
132  return FLA_SUCCESS;
133 }

References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().

Referenced by FLA_Sylv_hh().

◆ FLA_Sylv_hh_blk_var14()

FLA_Error FLA_Sylv_hh_blk_var14 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale,
fla_sylv_t cntl 
)
16 {
17  FLA_Obj ATL, ATR, A00, A01, A02,
18  ABL, ABR, A10, A11, A12,
19  A20, A21, A22;
20 
21  FLA_Obj BTL, BTR, B00, B01, B02,
22  BBL, BBR, B10, B11, B12,
23  B20, B21, B22;
24 
25  FLA_Obj CTL, CTR, C00, C01, C02,
26  CBL, CBR, C10, C11, C12,
27  C20, C21, C22;
28 
29  dim_t b;
30 
31  FLA_Part_2x2( A, &ATL, &ATR,
32  &ABL, &ABR, 0, 0, FLA_TL );
33 
34  FLA_Part_2x2( B, &BTL, &BTR,
35  &BBL, &BBR, 0, 0, FLA_BR );
36 
37  FLA_Part_2x2( C, &CTL, &CTR,
38  &CBL, &CBR, 0, 0, FLA_TR );
39 
40  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
41 
42  b = FLA_Determine_blocksize( CBL, FLA_BL, FLA_Cntl_blocksize( cntl ) );
43 
44  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
45  /* ************* */ /* ******************** */
46  &A10, /**/ &A11, &A12,
47  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
48  b, b, FLA_BR );
49 
50  FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, &B01, /**/ &B02,
51  &B10, &B11, /**/ &B12,
52  /* ************* */ /* ******************** */
53  BBL, /**/ BBR, &B20, &B21, /**/ &B22,
54  b, b, FLA_TL );
55 
56  FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &C01, /**/ &C02,
57  /* ************* */ /* ******************** */
58  &C10, &C11, /**/ &C12,
59  CBL, /**/ CBR, &C20, &C21, /**/ &C22,
60  b, b, FLA_BL );
61 
62  // Loop Invariant:
63  // CTL =
64  // CTR =
65  // CBL =
66  // CBR =
67 
68  /*------------------------------------------------------------*/
69 
70  // C11 = sylv( A11', B11', C11 );
71  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
72  isgn, A11, B11, C11, scale,
73  FLA_Cntl_sub_sylv1( cntl ) );
74 
75  // C21 = sylv( A22', B11', C21 - A12' * C11 );
76  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
77  FLA_MINUS_ONE, A12, C11, FLA_ONE, C21,
78  FLA_Cntl_sub_gemm1( cntl ) );
79 
80  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
81  isgn, A22, B11, C21, scale,
82  FLA_Cntl_sub_sylv2( cntl ) );
83 
84  // C10 = sylv( A11', B00', C10 -/+ C11 * B01' );
85  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
86  FLA_NEGATE( isgn ), C11, B01, FLA_ONE, C10,
87  FLA_Cntl_sub_gemm2( cntl ) );
88 
89  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
90  isgn, A11, B00, C10, scale,
91  FLA_Cntl_sub_sylv3( cntl ) );
92 
93  // C20 = C20 - A12' * C10 -/+ C21 * B01';
94  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
95  FLA_NEGATE( isgn ), C21, B01, FLA_ONE, C20,
96  FLA_Cntl_sub_gemm3( cntl ) );
97 
98  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
99  FLA_MINUS_ONE, A12, C10, FLA_ONE, C20,
100  FLA_Cntl_sub_gemm4( cntl ) );
101 
102  /*------------------------------------------------------------*/
103 
104  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
105  A10, A11, /**/ A12,
106  /* ************** */ /* ****************** */
107  &ABL, /**/ &ABR, A20, A21, /**/ A22,
108  FLA_TL );
109 
110  FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, /**/ B01, B02,
111  /* ************** */ /* ****************** */
112  B10, /**/ B11, B12,
113  &BBL, /**/ &BBR, B20, /**/ B21, B22,
114  FLA_BR );
115 
116  FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ C01, C02,
117  C10, /**/ C11, C12,
118  /* ************** */ /* ****************** */
119  &CBL, /**/ &CBR, C20, /**/ C21, C22,
120  FLA_TR );
121 
122  }
123 
124  return FLA_SUCCESS;
125 }

References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().

Referenced by FLA_Sylv_hh().

◆ FLA_Sylv_hh_blk_var15()

FLA_Error FLA_Sylv_hh_blk_var15 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale,
fla_sylv_t cntl 
)
14 {
15  FLA_Obj ATL, ATR, A00, A01, A02,
16  ABL, ABR, A10, A11, A12,
17  A20, A21, A22;
18 
19  FLA_Obj CT, C0,
20  CB, C1,
21  C2;
22 
23  dim_t b;
24 
25  FLA_Part_2x2( A, &ATL, &ATR,
26  &ABL, &ABR, 0, 0, FLA_TL );
27 
28  FLA_Part_2x1( C, &CT,
29  &CB, 0, FLA_TOP );
30 
31  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
32 
33  b = FLA_Determine_blocksize( CB, FLA_BOTTOM, FLA_Cntl_blocksize( cntl ) );
34 
35  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
36  /* ************* */ /* ******************** */
37  &A10, /**/ &A11, &A12,
38  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
39  b, b, FLA_BR );
40 
41  FLA_Repart_2x1_to_3x1( CT, &C0,
42  /* ** */ /* ** */
43  &C1,
44  CB, &C2, b, FLA_BOTTOM );
45 
46  // Loop Invariant:
47  // CT = sylv( ATL', B', CT )
48  // CB = CB
49 
50  /*------------------------------------------------------------*/
51 
52  // C1 = sylv( A11', B', C1 - A01' * C0 );
53  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
54  FLA_MINUS_ONE, A01, C0, FLA_ONE, C1,
55  FLA_Cntl_sub_gemm1( cntl ) );
56 
57  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
58  isgn, A11, B, C1, scale,
59  FLA_Cntl_sub_sylv1( cntl ) );
60 
61  /*------------------------------------------------------------*/
62 
63  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
64  A10, A11, /**/ A12,
65  /* ************** */ /* ****************** */
66  &ABL, /**/ &ABR, A20, A21, /**/ A22,
67  FLA_TL );
68 
69  FLA_Cont_with_3x1_to_2x1( &CT, C0,
70  C1,
71  /* ** */ /* ** */
72  &CB, C2, FLA_TOP );
73 
74  }
75 
76  return FLA_SUCCESS;
77 }
FLA_Error FLA_Cont_with_3x1_to_2x1(FLA_Obj *AT, FLA_Obj A0, FLA_Obj A1, FLA_Obj *AB, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:428
FLA_Error FLA_Repart_2x1_to_3x1(FLA_Obj AT, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj AB, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:226
FLA_Error FLA_Part_2x1(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:76

References FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().

Referenced by FLA_Sylv_hh().

◆ FLA_Sylv_hh_blk_var16()

FLA_Error FLA_Sylv_hh_blk_var16 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale,
fla_sylv_t cntl 
)
16 {
17  FLA_Obj ATL, ATR, A00, A01, A02,
18  ABL, ABR, A10, A11, A12,
19  A20, A21, A22;
20 
21  FLA_Obj CT, C0,
22  CB, C1,
23  C2;
24 
25  dim_t b;
26 
27  FLA_Part_2x2( A, &ATL, &ATR,
28  &ABL, &ABR, 0, 0, FLA_TL );
29 
30  FLA_Part_2x1( C, &CT,
31  &CB, 0, FLA_TOP );
32 
33  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
34 
35  b = FLA_Determine_blocksize( CB, FLA_BOTTOM, FLA_Cntl_blocksize( cntl ) );
36 
37  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
38  /* ************* */ /* ******************** */
39  &A10, /**/ &A11, &A12,
40  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
41  b, b, FLA_BR );
42 
43  FLA_Repart_2x1_to_3x1( CT, &C0,
44  /* ** */ /* ** */
45  &C1,
46  CB, &C2, b, FLA_BOTTOM );
47 
48  // Loop Invariant:
49  // CT = sylv( ATL', B', CT )
50  // CB = CB - ATR' * sylv( ATL', B', CT )
51 
52  /*------------------------------------------------------------*/
53 
54  // C1 = sylv( A11', B', C1 );
55  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
56  isgn, A11, B, C1, scale,
57  FLA_Cntl_sub_sylv1( cntl ) );
58 
59  // C2 = C2 - A12' * C1;
60  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
61  FLA_MINUS_ONE, A12, C1, FLA_ONE, C2,
62  FLA_Cntl_sub_gemm1( cntl ) );
63 
64  /*------------------------------------------------------------*/
65 
66  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
67  A10, A11, /**/ A12,
68  /* ************** */ /* ****************** */
69  &ABL, /**/ &ABR, A20, A21, /**/ A22,
70  FLA_TL );
71 
72  FLA_Cont_with_3x1_to_2x1( &CT, C0,
73  C1,
74  /* ** */ /* ** */
75  &CB, C2, FLA_TOP );
76 
77  }
78 
79  return FLA_SUCCESS;
80 }

References FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().

Referenced by FLA_Sylv_hh().

◆ FLA_Sylv_hh_blk_var17()

FLA_Error FLA_Sylv_hh_blk_var17 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale,
fla_sylv_t cntl 
)
14 {
15  FLA_Obj BTL, BTR, B00, B01, B02,
16  BBL, BBR, B10, B11, B12,
17  B20, B21, B22;
18 
19  FLA_Obj CL, CR, C0, C1, C2;
20 
21  dim_t b;
22 
23  FLA_Part_2x2( B, &BTL, &BTR,
24  &BBL, &BBR, 0, 0, FLA_BR );
25 
26  FLA_Part_1x2( C, &CL, &CR, 0, FLA_RIGHT );
27 
28  while ( FLA_Obj_length( BBR ) < FLA_Obj_length( B ) ){
29 
30  b = FLA_Determine_blocksize( CL, FLA_LEFT, FLA_Cntl_blocksize( cntl ) );
31 
32  FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, &B01, /**/ &B02,
33  &B10, &B11, /**/ &B12,
34  /* ************* */ /* ******************** */
35  BBL, /**/ BBR, &B20, &B21, /**/ &B22,
36  b, b, FLA_TL );
37 
38  FLA_Repart_1x2_to_1x3( CL, /**/ CR, &C0, &C1, /**/ &C2,
39  b, FLA_LEFT );
40 
41  // Loop Invariant:
42  // CL =
43  // CR =
44 
45  /*------------------------------------------------------------*/
46 
47  // C1 = sylv( A', B11', C1 -/+ C2 * B12' );
48  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
49  FLA_NEGATE( isgn ), C2, B12, FLA_ONE, C1,
50  FLA_Cntl_sub_gemm1( cntl ) );
51 
52  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
53  isgn, A, B11, C1, scale,
54  FLA_Cntl_sub_sylv1( cntl ) );
55 
56  /*------------------------------------------------------------*/
57 
58  FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, /**/ B01, B02,
59  /* ************** */ /* ****************** */
60  B10, /**/ B11, B12,
61  &BBL, /**/ &BBR, B20, /**/ B21, B22,
62  FLA_BR );
63 
64  FLA_Cont_with_1x3_to_1x2( &CL, /**/ &CR, C0, /**/ C1, C2,
65  FLA_RIGHT );
66  }
67 
68  return FLA_SUCCESS;
69 }
FLA_Error FLA_Cont_with_1x3_to_1x2(FLA_Obj *AL, FLA_Obj *AR, FLA_Obj A0, FLA_Obj A1, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:475
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:110
FLA_Error FLA_Repart_1x2_to_1x3(FLA_Obj AL, FLA_Obj AR, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:267

References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().

Referenced by FLA_Sylv_hh().

◆ FLA_Sylv_hh_blk_var18()

FLA_Error FLA_Sylv_hh_blk_var18 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale,
fla_sylv_t cntl 
)
14 {
15  FLA_Obj BTL, BTR, B00, B01, B02,
16  BBL, BBR, B10, B11, B12,
17  B20, B21, B22;
18 
19  FLA_Obj CL, CR, C0, C1, C2;
20 
21  dim_t b;
22 
23  FLA_Part_2x2( B, &BTL, &BTR,
24  &BBL, &BBR, 0, 0, FLA_BR );
25 
26  FLA_Part_1x2( C, &CL, &CR, 0, FLA_RIGHT );
27 
28  while ( FLA_Obj_length( BBR ) < FLA_Obj_length( B ) ){
29 
30  b = FLA_Determine_blocksize( CL, FLA_LEFT, FLA_Cntl_blocksize( cntl ) );
31 
32  FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, &B01, /**/ &B02,
33  &B10, &B11, /**/ &B12,
34  /* ************* */ /* ******************** */
35  BBL, /**/ BBR, &B20, &B21, /**/ &B22,
36  b, b, FLA_TL );
37 
38  FLA_Repart_1x2_to_1x3( CL, /**/ CR, &C0, &C1, /**/ &C2,
39  b, FLA_LEFT );
40 
41  // Loop Invariant:
42  // CL =
43  // CR =
44 
45  /*------------------------------------------------------------*/
46 
47  // C1 = sylv( A', B11', C1 );
48  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
49  isgn, A, B11, C1, scale,
50  FLA_Cntl_sub_sylv1( cntl ) );
51 
52  // C0 = C0 -/+ C1 * B01';
53  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
54  FLA_NEGATE( isgn ), C1, B01, FLA_ONE, C0,
55  FLA_Cntl_sub_gemm1( cntl ) );
56 
57  /*------------------------------------------------------------*/
58 
59  FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, /**/ B01, B02,
60  /* ************** */ /* ****************** */
61  B10, /**/ B11, B12,
62  &BBL, /**/ &BBR, B20, /**/ B21, B22,
63  FLA_BR );
64 
65  FLA_Cont_with_1x3_to_1x2( &CL, /**/ &CR, C0, /**/ C1, C2,
66  FLA_RIGHT );
67  }
68 
69  return FLA_SUCCESS;
70 }

References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().

Referenced by FLA_Sylv_hh().

◆ FLA_Sylv_hh_blk_var2()

FLA_Error FLA_Sylv_hh_blk_var2 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale,
fla_sylv_t cntl 
)
16 {
17  FLA_Obj ATL, ATR, A00, A01, A02,
18  ABL, ABR, A10, A11, A12,
19  A20, A21, A22;
20 
21  FLA_Obj BTL, BTR, B00, B01, B02,
22  BBL, BBR, B10, B11, B12,
23  B20, B21, B22;
24 
25  FLA_Obj CTL, CTR, C00, C01, C02,
26  CBL, CBR, C10, C11, C12,
27  C20, C21, C22;
28 
29  dim_t b;
30 
31  FLA_Part_2x2( A, &ATL, &ATR,
32  &ABL, &ABR, 0, 0, FLA_TL );
33 
34  FLA_Part_2x2( B, &BTL, &BTR,
35  &BBL, &BBR, 0, 0, FLA_BR );
36 
37  FLA_Part_2x2( C, &CTL, &CTR,
38  &CBL, &CBR, 0, 0, FLA_TR );
39 
40  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
41 
42  b = FLA_Determine_blocksize( CBL, FLA_BL, FLA_Cntl_blocksize( cntl ) );
43 
44  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
45  /* ************* */ /* ******************** */
46  &A10, /**/ &A11, &A12,
47  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
48  b, b, FLA_BR );
49 
50  FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, &B01, /**/ &B02,
51  &B10, &B11, /**/ &B12,
52  /* ************* */ /* ******************** */
53  BBL, /**/ BBR, &B20, &B21, /**/ &B22,
54  b, b, FLA_TL );
55 
56  FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &C01, /**/ &C02,
57  /* ************* */ /* ******************** */
58  &C10, &C11, /**/ &C12,
59  CBL, /**/ CBR, &C20, &C21, /**/ &C22,
60  b, b, FLA_BL );
61 
62  // Loop Invariant:
63  // CTL =
64  // CTR =
65  // CBL =
66  // CBR =
67 
68  /*------------------------------------------------------------*/
69 
70  // C12 = sylv( A11', B22', C12 );
71  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
72  isgn, A11, B22, C12, scale,
73  FLA_Cntl_sub_sylv1( cntl ) );
74 
75  // C22 = C22 - A12' * C12;
76  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
77  FLA_MINUS_ONE, A12, C12, FLA_ONE, C22,
78  FLA_Cntl_sub_gemm1( cntl ) );
79 
80  // C01 = sylv( A00', B11', C01 -/+ C02 * B12' );
81  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
82  FLA_NEGATE( isgn ), C02, B12, FLA_ONE, C01,
83  FLA_Cntl_sub_gemm2( cntl ) );
84 
85  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
86  isgn, A00, B11, C01, scale,
87  FLA_Cntl_sub_sylv2( cntl ) );
88 
89  // C11 = sylv( A11', B11', C11 - A01' * C01 -/+ C12 * B12' );
90  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
91  FLA_NEGATE( isgn ), C12, B12, FLA_ONE, C11,
92  FLA_Cntl_sub_gemm3( cntl ) );
93 
94  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
95  FLA_MINUS_ONE, A01, C01, FLA_ONE, C11,
96  FLA_Cntl_sub_gemm4( cntl ) );
97 
98  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
99  isgn, A11, B11, C11, scale,
100  FLA_Cntl_sub_sylv3( cntl ) );
101 
102  // C21 = C21 - A12' * C11 - A02' * C01;
103  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
104  FLA_MINUS_ONE, A02, C01, FLA_ONE, C21,
105  FLA_Cntl_sub_gemm5( cntl ) );
106 
107  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
108  FLA_MINUS_ONE, A12, C11, FLA_ONE, C21,
109  FLA_Cntl_sub_gemm6( cntl ) );
110 
111  /*------------------------------------------------------------*/
112 
113  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
114  A10, A11, /**/ A12,
115  /* ************** */ /* ****************** */
116  &ABL, /**/ &ABR, A20, A21, /**/ A22,
117  FLA_TL );
118 
119  FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, /**/ B01, B02,
120  /* ************** */ /* ****************** */
121  B10, /**/ B11, B12,
122  &BBL, /**/ &BBR, B20, /**/ B21, B22,
123  FLA_BR );
124 
125  FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ C01, C02,
126  C10, /**/ C11, C12,
127  /* ************** */ /* ****************** */
128  &CBL, /**/ &CBR, C20, /**/ C21, C22,
129  FLA_TR );
130 
131  }
132 
133  return FLA_SUCCESS;
134 }

References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().

Referenced by FLA_Sylv_hh().

◆ FLA_Sylv_hh_blk_var3()

FLA_Error FLA_Sylv_hh_blk_var3 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale,
fla_sylv_t cntl 
)
16 {
17  FLA_Obj ATL, ATR, A00, A01, A02,
18  ABL, ABR, A10, A11, A12,
19  A20, A21, A22;
20 
21  FLA_Obj BTL, BTR, B00, B01, B02,
22  BBL, BBR, B10, B11, B12,
23  B20, B21, B22;
24 
25  FLA_Obj CTL, CTR, C00, C01, C02,
26  CBL, CBR, C10, C11, C12,
27  C20, C21, C22;
28 
29  dim_t b;
30 
31  FLA_Part_2x2( A, &ATL, &ATR,
32  &ABL, &ABR, 0, 0, FLA_TL );
33 
34  FLA_Part_2x2( B, &BTL, &BTR,
35  &BBL, &BBR, 0, 0, FLA_BR );
36 
37  FLA_Part_2x2( C, &CTL, &CTR,
38  &CBL, &CBR, 0, 0, FLA_TR );
39 
40  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
41 
42  b = FLA_Determine_blocksize( CBL, FLA_BL, FLA_Cntl_blocksize( cntl ) );
43 
44  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
45  /* ************* */ /* ******************** */
46  &A10, /**/ &A11, &A12,
47  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
48  b, b, FLA_BR );
49 
50  FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, &B01, /**/ &B02,
51  &B10, &B11, /**/ &B12,
52  /* ************* */ /* ******************** */
53  BBL, /**/ BBR, &B20, &B21, /**/ &B22,
54  b, b, FLA_TL );
55 
56  FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &C01, /**/ &C02,
57  /* ************* */ /* ******************** */
58  &C10, &C11, /**/ &C12,
59  CBL, /**/ CBR, &C20, &C21, /**/ &C22,
60  b, b, FLA_BL );
61 
62  // Loop Invariant:
63  // CTL =
64  // CTR =
65  // CBL =
66  // CBR =
67 
68  /*------------------------------------------------------------*/
69 
70  // C01 = sylv( A00', B11', C01 -/+ C02 * B12' );
71  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
72  FLA_NEGATE( isgn ), C02, B12, FLA_ONE, C01,
73  FLA_Cntl_sub_gemm1( cntl ) );
74 
75  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
76  isgn, A00, B11, C01, scale,
77  FLA_Cntl_sub_sylv1( cntl ) );
78 
79  // C11 = sylv( A11', B11', C11 - A01' * C01 -/+ C12 * B12' );
80  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
81  FLA_NEGATE( isgn ), C12, B12, FLA_ONE, C11,
82  FLA_Cntl_sub_gemm2( cntl ) );
83 
84  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
85  FLA_MINUS_ONE, A01, C01, FLA_ONE, C11,
86  FLA_Cntl_sub_gemm3( cntl ) );
87 
88  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
89  isgn, A11, B11, C11, scale,
90  FLA_Cntl_sub_sylv2( cntl ) );
91 
92  // C21 = sylv( A22', B11', C21 - A12' * C11 - A02' * C01 -/+ C22 * B12' );
93  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
94  FLA_NEGATE( isgn ), C22, B12, FLA_ONE, C21,
95  FLA_Cntl_sub_gemm4( cntl ) );
96 
97  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
98  FLA_MINUS_ONE, A02, C01, FLA_ONE, C21,
99  FLA_Cntl_sub_gemm5( cntl ) );
100 
101  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
102  FLA_MINUS_ONE, A12, C11, FLA_ONE, C21,
103  FLA_Cntl_sub_gemm6( cntl ) );
104 
105  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
106  isgn, A22, B11, C21, scale,
107  FLA_Cntl_sub_sylv3( cntl ) );
108 
109  /*------------------------------------------------------------*/
110 
111  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
112  A10, A11, /**/ A12,
113  /* ************** */ /* ****************** */
114  &ABL, /**/ &ABR, A20, A21, /**/ A22,
115  FLA_TL );
116 
117  FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, /**/ B01, B02,
118  /* ************** */ /* ****************** */
119  B10, /**/ B11, B12,
120  &BBL, /**/ &BBR, B20, /**/ B21, B22,
121  FLA_BR );
122 
123  FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ C01, C02,
124  C10, /**/ C11, C12,
125  /* ************** */ /* ****************** */
126  &CBL, /**/ &CBR, C20, /**/ C21, C22,
127  FLA_TR );
128 
129  }
130 
131  return FLA_SUCCESS;
132 }

References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().

Referenced by FLA_Sylv_hh().

◆ FLA_Sylv_hh_blk_var4()

FLA_Error FLA_Sylv_hh_blk_var4 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale,
fla_sylv_t cntl 
)
16 {
17  FLA_Obj ATL, ATR, A00, A01, A02,
18  ABL, ABR, A10, A11, A12,
19  A20, A21, A22;
20 
21  FLA_Obj BTL, BTR, B00, B01, B02,
22  BBL, BBR, B10, B11, B12,
23  B20, B21, B22;
24 
25  FLA_Obj CTL, CTR, C00, C01, C02,
26  CBL, CBR, C10, C11, C12,
27  C20, C21, C22;
28 
29  dim_t b;
30 
31  FLA_Part_2x2( A, &ATL, &ATR,
32  &ABL, &ABR, 0, 0, FLA_TL );
33 
34  FLA_Part_2x2( B, &BTL, &BTR,
35  &BBL, &BBR, 0, 0, FLA_BR );
36 
37  FLA_Part_2x2( C, &CTL, &CTR,
38  &CBL, &CBR, 0, 0, FLA_TR );
39 
40  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
41 
42  b = FLA_Determine_blocksize( CBL, FLA_BL, FLA_Cntl_blocksize( cntl ) );
43 
44  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
45  /* ************* */ /* ******************** */
46  &A10, /**/ &A11, &A12,
47  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
48  b, b, FLA_BR );
49 
50  FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, &B01, /**/ &B02,
51  &B10, &B11, /**/ &B12,
52  /* ************* */ /* ******************** */
53  BBL, /**/ BBR, &B20, &B21, /**/ &B22,
54  b, b, FLA_TL );
55 
56  FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &C01, /**/ &C02,
57  /* ************* */ /* ******************** */
58  &C10, &C11, /**/ &C12,
59  CBL, /**/ CBR, &C20, &C21, /**/ &C22,
60  b, b, FLA_BL );
61 
62  // Loop Invariant:
63  // CTL =
64  // CTR =
65  // CBL =
66  // CBR =
67 
68  /*------------------------------------------------------------*/
69 
70  // C12 = sylv( A11', B22', C12 - A01' * C02 );
71  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
72  FLA_MINUS_ONE, A01, C02, FLA_ONE, C12,
73  FLA_Cntl_sub_gemm1( cntl ) );
74 
75  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
76  isgn, A11, B22, C12, scale,
77  FLA_Cntl_sub_sylv1( cntl ) );
78 
79  // C01 = sylv( A00', B11', C01 );
80  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
81  isgn, A00, B11, C01, scale,
82  FLA_Cntl_sub_sylv2( cntl ) );
83 
84  // C11 = sylv( A11', B11', C11 - A01' * C01 -/+ C12 * B12' );
85  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
86  FLA_NEGATE( isgn ), C12, B12, FLA_ONE, C11,
87  FLA_Cntl_sub_gemm2( cntl ) );
88 
89  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
90  FLA_MINUS_ONE, A01, C01, FLA_ONE, C11,
91  FLA_Cntl_sub_gemm3( cntl ) );
92 
93  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
94  isgn, A11, B11, C11, scale,
95  FLA_Cntl_sub_sylv3( cntl ) );
96 
97  // C10 = C10 -/+ C12 * B02' -/+ C11 * B01';
98  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
99  FLA_NEGATE( isgn ), C11, B01, FLA_ONE, C10,
100  FLA_Cntl_sub_gemm4( cntl ) );
101 
102  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
103  FLA_NEGATE( isgn ), C12, B02, FLA_ONE, C10,
104  FLA_Cntl_sub_gemm5( cntl ) );
105 
106  // C00 = C00 -/+ C01 * B01';
107  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
108  FLA_NEGATE( isgn ), C01, B01, FLA_ONE, C00,
109  FLA_Cntl_sub_gemm6( cntl ) );
110 
111  /*------------------------------------------------------------*/
112 
113  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
114  A10, A11, /**/ A12,
115  /* ************** */ /* ****************** */
116  &ABL, /**/ &ABR, A20, A21, /**/ A22,
117  FLA_TL );
118 
119  FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, /**/ B01, B02,
120  /* ************** */ /* ****************** */
121  B10, /**/ B11, B12,
122  &BBL, /**/ &BBR, B20, /**/ B21, B22,
123  FLA_BR );
124 
125  FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ C01, C02,
126  C10, /**/ C11, C12,
127  /* ************** */ /* ****************** */
128  &CBL, /**/ &CBR, C20, /**/ C21, C22,
129  FLA_TR );
130 
131  }
132 
133  return FLA_SUCCESS;
134 }

References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().

Referenced by FLA_Sylv_hh().

◆ FLA_Sylv_hh_blk_var5()

FLA_Error FLA_Sylv_hh_blk_var5 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale,
fla_sylv_t cntl 
)
16 {
17  FLA_Obj ATL, ATR, A00, A01, A02,
18  ABL, ABR, A10, A11, A12,
19  A20, A21, A22;
20 
21  FLA_Obj BTL, BTR, B00, B01, B02,
22  BBL, BBR, B10, B11, B12,
23  B20, B21, B22;
24 
25  FLA_Obj CTL, CTR, C00, C01, C02,
26  CBL, CBR, C10, C11, C12,
27  C20, C21, C22;
28 
29  dim_t b;
30 
31  FLA_Part_2x2( A, &ATL, &ATR,
32  &ABL, &ABR, 0, 0, FLA_TL );
33 
34  FLA_Part_2x2( B, &BTL, &BTR,
35  &BBL, &BBR, 0, 0, FLA_BR );
36 
37  FLA_Part_2x2( C, &CTL, &CTR,
38  &CBL, &CBR, 0, 0, FLA_TR );
39 
40  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
41 
42  b = FLA_Determine_blocksize( CBL, FLA_BL, FLA_Cntl_blocksize( cntl ) );
43 
44  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
45  /* ************* */ /* ******************** */
46  &A10, /**/ &A11, &A12,
47  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
48  b, b, FLA_BR );
49 
50  FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, &B01, /**/ &B02,
51  &B10, &B11, /**/ &B12,
52  /* ************* */ /* ******************** */
53  BBL, /**/ BBR, &B20, &B21, /**/ &B22,
54  b, b, FLA_TL );
55 
56  FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &C01, /**/ &C02,
57  /* ************* */ /* ******************** */
58  &C10, &C11, /**/ &C12,
59  CBL, /**/ CBR, &C20, &C21, /**/ &C22,
60  b, b, FLA_BL );
61 
62  // Loop Invariant:
63  // CTL =
64  // CTR =
65  // CBL =
66  // CBR =
67 
68  /*------------------------------------------------------------*/
69 
70  // C12 = sylv( A11', B22', C12 - A01' * C02 );
71  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
72  FLA_MINUS_ONE, A01, C02, FLA_ONE, C12,
73  FLA_Cntl_sub_gemm1( cntl ) );
74 
75  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
76  isgn, A11, B22, C12, scale,
77  FLA_Cntl_sub_sylv1( cntl ) );
78 
79  // C11 = sylv( A11', B11', C11 - A01' * C01 -/+ C12 * B12' );
80  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
81  FLA_NEGATE( isgn ), C12, B12, FLA_ONE, C11,
82  FLA_Cntl_sub_gemm2( cntl ) );
83 
84  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
85  FLA_MINUS_ONE, A01, C01, FLA_ONE, C11,
86  FLA_Cntl_sub_gemm3( cntl ) );
87 
88  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
89  isgn, A11, B11, C11, scale,
90  FLA_Cntl_sub_sylv2( cntl ) );
91 
92  // C10 = sylv( A11', B00', C10 - A01' * C00 -/+ C12 * B02' -/+ C11 * B01' );
93  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
94  FLA_NEGATE( isgn ), C11, B01, FLA_ONE, C10,
95  FLA_Cntl_sub_gemm4( cntl ) );
96 
97  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
98  FLA_NEGATE( isgn ), C12, B02, FLA_ONE, C10,
99  FLA_Cntl_sub_gemm5( cntl ) );
100 
101  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
102  FLA_MINUS_ONE, A01, C00, FLA_ONE, C10,
103  FLA_Cntl_sub_gemm6( cntl ) );
104 
105  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
106  isgn, A11, B00, C10, scale,
107  FLA_Cntl_sub_sylv3( cntl ) );
108 
109  /*------------------------------------------------------------*/
110 
111  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
112  A10, A11, /**/ A12,
113  /* ************** */ /* ****************** */
114  &ABL, /**/ &ABR, A20, A21, /**/ A22,
115  FLA_TL );
116 
117  FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, /**/ B01, B02,
118  /* ************** */ /* ****************** */
119  B10, /**/ B11, B12,
120  &BBL, /**/ &BBR, B20, /**/ B21, B22,
121  FLA_BR );
122 
123  FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ C01, C02,
124  C10, /**/ C11, C12,
125  /* ************** */ /* ****************** */
126  &CBL, /**/ &CBR, C20, /**/ C21, C22,
127  FLA_TR );
128 
129  }
130 
131  return FLA_SUCCESS;
132 }

References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().

Referenced by FLA_Sylv_hh().

◆ FLA_Sylv_hh_blk_var6()

FLA_Error FLA_Sylv_hh_blk_var6 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale,
fla_sylv_t cntl 
)
16 {
17  FLA_Obj ATL, ATR, A00, A01, A02,
18  ABL, ABR, A10, A11, A12,
19  A20, A21, A22;
20 
21  FLA_Obj BTL, BTR, B00, B01, B02,
22  BBL, BBR, B10, B11, B12,
23  B20, B21, B22;
24 
25  FLA_Obj CTL, CTR, C00, C01, C02,
26  CBL, CBR, C10, C11, C12,
27  C20, C21, C22;
28 
29  dim_t b;
30 
31  FLA_Part_2x2( A, &ATL, &ATR,
32  &ABL, &ABR, 0, 0, FLA_TL );
33 
34  FLA_Part_2x2( B, &BTL, &BTR,
35  &BBL, &BBR, 0, 0, FLA_BR );
36 
37  FLA_Part_2x2( C, &CTL, &CTR,
38  &CBL, &CBR, 0, 0, FLA_TR );
39 
40  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
41 
42  b = FLA_Determine_blocksize( CBL, FLA_BL, FLA_Cntl_blocksize( cntl ) );
43 
44  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
45  /* ************* */ /* ******************** */
46  &A10, /**/ &A11, &A12,
47  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
48  b, b, FLA_BR );
49 
50  FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, &B01, /**/ &B02,
51  &B10, &B11, /**/ &B12,
52  /* ************* */ /* ******************** */
53  BBL, /**/ BBR, &B20, &B21, /**/ &B22,
54  b, b, FLA_TL );
55 
56  FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &C01, /**/ &C02,
57  /* ************* */ /* ******************** */
58  &C10, &C11, /**/ &C12,
59  CBL, /**/ CBR, &C20, &C21, /**/ &C22,
60  b, b, FLA_BL );
61 
62  // Loop Invariant:
63  // CTL =
64  // CTR =
65  // CBL =
66  // CBR =
67 
68  /*------------------------------------------------------------*/
69 
70  // C12 = sylv( A11', B22', C12 );
71  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
72  isgn, A11, B22, C12, scale,
73  FLA_Cntl_sub_sylv1( cntl ) );
74 
75  // C22 = C22 - A12' * C12;
76  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
77  FLA_MINUS_ONE, A12, C12, FLA_ONE, C22,
78  FLA_Cntl_sub_gemm1( cntl ) );
79 
80  // C01 = sylv( A00', B11', C01 );
81  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
82  isgn, A00, B11, C01, scale,
83  FLA_Cntl_sub_sylv2( cntl ) );
84 
85  // C11 = sylv( A11', B11', C11 - A01' * C01 -/+ C12 * B12' );
86  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
87  FLA_NEGATE( isgn ), C12, B12, FLA_ONE, C11,
88  FLA_Cntl_sub_gemm2( cntl ) );
89 
90  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
91  FLA_MINUS_ONE, A01, C01, FLA_ONE, C11,
92  FLA_Cntl_sub_gemm3( cntl ) );
93 
94  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
95  isgn, A11, B11, C11, scale,
96  FLA_Cntl_sub_sylv3( cntl ) );
97 
98  // C21 = C21 - A12' * C11 - A02' * C01;
99  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
100  FLA_MINUS_ONE, A02, C01, FLA_ONE, C21,
101  FLA_Cntl_sub_gemm4( cntl ) );
102 
103  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
104  FLA_MINUS_ONE, A12, C11, FLA_ONE, C21,
105  FLA_Cntl_sub_gemm5( cntl ) );
106 
107  // C10 = C10 -/+ C12 * B02' -/+ C11 * B01';
108  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
109  FLA_NEGATE( isgn ), C11, B01, FLA_ONE, C10,
110  FLA_Cntl_sub_gemm6( cntl ) );
111 
112  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
113  FLA_NEGATE( isgn ), C12, B02, FLA_ONE, C10,
114  FLA_Cntl_sub_gemm7( cntl ) );
115 
116  // C00 = C00 -/+ C01 * B01';
117  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
118  FLA_NEGATE( isgn ), C01, B01, FLA_ONE, C00,
119  FLA_Cntl_sub_gemm8( cntl ) );
120 
121  /*------------------------------------------------------------*/
122 
123  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
124  A10, A11, /**/ A12,
125  /* ************** */ /* ****************** */
126  &ABL, /**/ &ABR, A20, A21, /**/ A22,
127  FLA_TL );
128 
129  FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, /**/ B01, B02,
130  /* ************** */ /* ****************** */
131  B10, /**/ B11, B12,
132  &BBL, /**/ &BBR, B20, /**/ B21, B22,
133  FLA_BR );
134 
135  FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ C01, C02,
136  C10, /**/ C11, C12,
137  /* ************** */ /* ****************** */
138  &CBL, /**/ &CBR, C20, /**/ C21, C22,
139  FLA_TR );
140 
141  }
142 
143  return FLA_SUCCESS;
144 }

References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().

Referenced by FLA_Sylv_hh().

◆ FLA_Sylv_hh_blk_var7()

FLA_Error FLA_Sylv_hh_blk_var7 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale,
fla_sylv_t cntl 
)
16 {
17  FLA_Obj ATL, ATR, A00, A01, A02,
18  ABL, ABR, A10, A11, A12,
19  A20, A21, A22;
20 
21  FLA_Obj BTL, BTR, B00, B01, B02,
22  BBL, BBR, B10, B11, B12,
23  B20, B21, B22;
24 
25  FLA_Obj CTL, CTR, C00, C01, C02,
26  CBL, CBR, C10, C11, C12,
27  C20, C21, C22;
28 
29  dim_t b;
30 
31  FLA_Part_2x2( A, &ATL, &ATR,
32  &ABL, &ABR, 0, 0, FLA_TL );
33 
34  FLA_Part_2x2( B, &BTL, &BTR,
35  &BBL, &BBR, 0, 0, FLA_BR );
36 
37  FLA_Part_2x2( C, &CTL, &CTR,
38  &CBL, &CBR, 0, 0, FLA_TR );
39 
40  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
41 
42  b = FLA_Determine_blocksize( CBL, FLA_BL, FLA_Cntl_blocksize( cntl ) );
43 
44  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
45  /* ************* */ /* ******************** */
46  &A10, /**/ &A11, &A12,
47  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
48  b, b, FLA_BR );
49 
50  FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, &B01, /**/ &B02,
51  &B10, &B11, /**/ &B12,
52  /* ************* */ /* ******************** */
53  BBL, /**/ BBR, &B20, &B21, /**/ &B22,
54  b, b, FLA_TL );
55 
56  FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &C01, /**/ &C02,
57  /* ************* */ /* ******************** */
58  &C10, &C11, /**/ &C12,
59  CBL, /**/ CBR, &C20, &C21, /**/ &C22,
60  b, b, FLA_BL );
61 
62  // Loop Invariant:
63  // CTL =
64  // CTR =
65  // CBL =
66  // CBR =
67 
68  /*------------------------------------------------------------*/
69 
70  // C01 = sylv( A00', B11', C01 );
71  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
72  isgn, A00, B11, C01, scale,
73  FLA_Cntl_sub_sylv1( cntl ) );
74 
75  // C11 = sylv( A11', B11', C11 - A01' * C01 -/+ C12 * B12' );
76  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
77  FLA_NEGATE( isgn ), C12, B12, FLA_ONE, C11,
78  FLA_Cntl_sub_gemm1( cntl ) );
79 
80  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
81  FLA_MINUS_ONE, A01, C01, FLA_ONE, C11,
82  FLA_Cntl_sub_gemm2( cntl ) );
83 
84  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
85  isgn, A11, B11, C11, scale,
86  FLA_Cntl_sub_sylv2( cntl ) );
87 
88  // C21 = sylv( A22', B11', C21 - A12' * C11 - A02' * C01 -/+ C22 * B12' );
89  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
90  FLA_NEGATE( isgn ), C22, B12, FLA_ONE, C21,
91  FLA_Cntl_sub_gemm3( cntl ) );
92 
93  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
94  FLA_MINUS_ONE, A02, C01, FLA_ONE, C21,
95  FLA_Cntl_sub_gemm4( cntl ) );
96 
97  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
98  FLA_MINUS_ONE, A12, C11, FLA_ONE, C21,
99  FLA_Cntl_sub_gemm5( cntl ) );
100 
101  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
102  isgn, A22, B11, C21, scale,
103  FLA_Cntl_sub_sylv3( cntl ) );
104 
105  // C10 = C10 -/+ C12 * B02' -/+ C11 * B01';
106  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
107  FLA_NEGATE( isgn ), C11, B01, FLA_ONE, C10,
108  FLA_Cntl_sub_gemm6( cntl ) );
109 
110  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
111  FLA_NEGATE( isgn ), C12, B02, FLA_ONE, C10,
112  FLA_Cntl_sub_gemm7( cntl ) );
113 
114  // C00 = C00 -/+ C01 * B01';
115  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
116  FLA_NEGATE( isgn ), C01, B01, FLA_ONE, C00,
117  FLA_Cntl_sub_gemm8( cntl ) );
118 
119  /*------------------------------------------------------------*/
120 
121  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
122  A10, A11, /**/ A12,
123  /* ************** */ /* ****************** */
124  &ABL, /**/ &ABR, A20, A21, /**/ A22,
125  FLA_TL );
126 
127  FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, /**/ B01, B02,
128  /* ************** */ /* ****************** */
129  B10, /**/ B11, B12,
130  &BBL, /**/ &BBR, B20, /**/ B21, B22,
131  FLA_BR );
132 
133  FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ C01, C02,
134  C10, /**/ C11, C12,
135  /* ************** */ /* ****************** */
136  &CBL, /**/ &CBR, C20, /**/ C21, C22,
137  FLA_TR );
138 
139  }
140 
141  return FLA_SUCCESS;
142 }

References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().

Referenced by FLA_Sylv_hh().

◆ FLA_Sylv_hh_blk_var8()

FLA_Error FLA_Sylv_hh_blk_var8 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale,
fla_sylv_t cntl 
)
16 {
17  FLA_Obj ATL, ATR, A00, A01, A02,
18  ABL, ABR, A10, A11, A12,
19  A20, A21, A22;
20 
21  FLA_Obj BTL, BTR, B00, B01, B02,
22  BBL, BBR, B10, B11, B12,
23  B20, B21, B22;
24 
25  FLA_Obj CTL, CTR, C00, C01, C02,
26  CBL, CBR, C10, C11, C12,
27  C20, C21, C22;
28 
29  dim_t b;
30 
31  FLA_Part_2x2( A, &ATL, &ATR,
32  &ABL, &ABR, 0, 0, FLA_TL );
33 
34  FLA_Part_2x2( B, &BTL, &BTR,
35  &BBL, &BBR, 0, 0, FLA_BR );
36 
37  FLA_Part_2x2( C, &CTL, &CTR,
38  &CBL, &CBR, 0, 0, FLA_TR );
39 
40  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
41 
42  b = FLA_Determine_blocksize( CBL, FLA_BL, FLA_Cntl_blocksize( cntl ) );
43 
44  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
45  /* ************* */ /* ******************** */
46  &A10, /**/ &A11, &A12,
47  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
48  b, b, FLA_BR );
49 
50  FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, &B01, /**/ &B02,
51  &B10, &B11, /**/ &B12,
52  /* ************* */ /* ******************** */
53  BBL, /**/ BBR, &B20, &B21, /**/ &B22,
54  b, b, FLA_TL );
55 
56  FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &C01, /**/ &C02,
57  /* ************* */ /* ******************** */
58  &C10, &C11, /**/ &C12,
59  CBL, /**/ CBR, &C20, &C21, /**/ &C22,
60  b, b, FLA_BL );
61 
62  // Loop Invariant:
63  // CTL =
64  // CTR =
65  // CBL =
66  // CBR =
67 
68  /*------------------------------------------------------------*/
69 
70  // C12 = sylv( A11', B22', C12 );
71  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
72  isgn, A11, B22, C12, scale,
73  FLA_Cntl_sub_sylv1( cntl ) );
74 
75  // C22 = C22 - A12' * C12;
76  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
77  FLA_MINUS_ONE, A12, C12, FLA_ONE, C22,
78  FLA_Cntl_sub_gemm1( cntl ) );
79 
80  // C11 = sylv( A11', B11', C11 - A01' * C01 -/+ C12 * B12' );
81  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
82  FLA_NEGATE( isgn ), C12, B12, FLA_ONE, C11,
83  FLA_Cntl_sub_gemm2( cntl ) );
84 
85  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
86  FLA_MINUS_ONE, A01, C01, FLA_ONE, C11,
87  FLA_Cntl_sub_gemm3( cntl ) );
88 
89  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
90  isgn, A11, B11, C11, scale,
91  FLA_Cntl_sub_sylv2( cntl ) );
92 
93  // C21 = C21 - A12' * C11 - A02' * C01;
94  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
95  FLA_MINUS_ONE, A02, C01, FLA_ONE, C21,
96  FLA_Cntl_sub_gemm4( cntl ) );
97 
98  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
99  FLA_MINUS_ONE, A12, C11, FLA_ONE, C21,
100  FLA_Cntl_sub_gemm5( cntl ) );
101 
102  // C10 = sylv( A11', B00, C10 - A01' * C00 -/+ C12 * B02' -/+ C11 * B01' );
103  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
104  FLA_NEGATE( isgn ), C11, B01, FLA_ONE, C10,
105  FLA_Cntl_sub_gemm6( cntl ) );
106 
107  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
108  FLA_NEGATE( isgn ), C12, B02, FLA_ONE, C10,
109  FLA_Cntl_sub_gemm7( cntl ) );
110 
111  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
112  FLA_MINUS_ONE, A01, C00, FLA_ONE, C10,
113  FLA_Cntl_sub_gemm8( cntl ) );
114 
115  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
116  isgn, A11, B00, C10, scale,
117  FLA_Cntl_sub_sylv3( cntl ) );
118 
119  /*------------------------------------------------------------*/
120 
121  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
122  A10, A11, /**/ A12,
123  /* ************** */ /* ****************** */
124  &ABL, /**/ &ABR, A20, A21, /**/ A22,
125  FLA_TL );
126 
127  FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, /**/ B01, B02,
128  /* ************** */ /* ****************** */
129  B10, /**/ B11, B12,
130  &BBL, /**/ &BBR, B20, /**/ B21, B22,
131  FLA_BR );
132 
133  FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ C01, C02,
134  C10, /**/ C11, C12,
135  /* ************** */ /* ****************** */
136  &CBL, /**/ &CBR, C20, /**/ C21, C22,
137  FLA_TR );
138 
139  }
140 
141  return FLA_SUCCESS;
142 }

References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().

Referenced by FLA_Sylv_hh().

◆ FLA_Sylv_hh_blk_var9()

FLA_Error FLA_Sylv_hh_blk_var9 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale,
fla_sylv_t cntl 
)
16 {
17  FLA_Obj ATL, ATR, A00, A01, A02,
18  ABL, ABR, A10, A11, A12,
19  A20, A21, A22;
20 
21  FLA_Obj BTL, BTR, B00, B01, B02,
22  BBL, BBR, B10, B11, B12,
23  B20, B21, B22;
24 
25  FLA_Obj CTL, CTR, C00, C01, C02,
26  CBL, CBR, C10, C11, C12,
27  C20, C21, C22;
28 
29  dim_t b;
30 
31  FLA_Part_2x2( A, &ATL, &ATR,
32  &ABL, &ABR, 0, 0, FLA_TL );
33 
34  FLA_Part_2x2( B, &BTL, &BTR,
35  &BBL, &BBR, 0, 0, FLA_BR );
36 
37  FLA_Part_2x2( C, &CTL, &CTR,
38  &CBL, &CBR, 0, 0, FLA_TR );
39 
40  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) ){
41 
42  b = FLA_Determine_blocksize( CBL, FLA_BL, FLA_Cntl_blocksize( cntl ) );
43 
44  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
45  /* ************* */ /* ******************** */
46  &A10, /**/ &A11, &A12,
47  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
48  b, b, FLA_BR );
49 
50  FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, &B01, /**/ &B02,
51  &B10, &B11, /**/ &B12,
52  /* ************* */ /* ******************** */
53  BBL, /**/ BBR, &B20, &B21, /**/ &B22,
54  b, b, FLA_TL );
55 
56  FLA_Repart_2x2_to_3x3( CTL, /**/ CTR, &C00, &C01, /**/ &C02,
57  /* ************* */ /* ******************** */
58  &C10, &C11, /**/ &C12,
59  CBL, /**/ CBR, &C20, &C21, /**/ &C22,
60  b, b, FLA_BL );
61 
62  // Loop Invariant:
63  // CTL =
64  // CTR =
65  // CBL =
66  // CBR =
67 
68  /*------------------------------------------------------------*/
69 
70  // C11 = sylv( A11', B11', C11 - A01' * C01 -/+ C12 * B12' );
71  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
72  FLA_NEGATE( isgn ), C12, B12, FLA_ONE, C11,
73  FLA_Cntl_sub_gemm1( cntl ) );
74 
75  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
76  FLA_MINUS_ONE, A01, C01, FLA_ONE, C11,
77  FLA_Cntl_sub_gemm2( cntl ) );
78 
79  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
80  isgn, A11, B11, C11, scale,
81  FLA_Cntl_sub_sylv1( cntl ) );
82 
83  // C21 = sylv( A22, B11', C21 - A12' * C11 - A02' * C01 -/+ C22 * B12' );
84  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
85  FLA_NEGATE( isgn ), C22, B12, FLA_ONE, C21,
86  FLA_Cntl_sub_gemm3( cntl ) );
87 
88  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
89  FLA_MINUS_ONE, A02, C01, FLA_ONE, C21,
90  FLA_Cntl_sub_gemm4( cntl ) );
91 
92  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
93  FLA_MINUS_ONE, A12, C11, FLA_ONE, C21,
94  FLA_Cntl_sub_gemm5( cntl ) );
95 
96  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
97  isgn, A22, B11, C21, scale,
98  FLA_Cntl_sub_sylv2( cntl ) );
99 
100  // C10 = sylv( A11', B00', C10 - A01' * C00 -/+ C12 * B02' -/+ C11 * B01' );
101  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
102  FLA_NEGATE( isgn ), C11, B01, FLA_ONE, C10,
103  FLA_Cntl_sub_gemm6( cntl ) );
104 
105  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
106  FLA_NEGATE( isgn ), C12, B02, FLA_ONE, C10,
107  FLA_Cntl_sub_gemm7( cntl ) );
108 
109  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
110  FLA_MINUS_ONE, A01, C00, FLA_ONE, C10,
111  FLA_Cntl_sub_gemm8( cntl ) );
112 
113  FLA_Sylv_internal( FLA_CONJ_TRANSPOSE, FLA_CONJ_TRANSPOSE,
114  isgn, A11, B00, C10, scale,
115  FLA_Cntl_sub_sylv3( cntl ) );
116 
117  /*------------------------------------------------------------*/
118 
119  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
120  A10, A11, /**/ A12,
121  /* ************** */ /* ****************** */
122  &ABL, /**/ &ABR, A20, A21, /**/ A22,
123  FLA_TL );
124 
125  FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, /**/ B01, B02,
126  /* ************** */ /* ****************** */
127  B10, /**/ B11, B12,
128  &BBL, /**/ &BBR, B20, /**/ B21, B22,
129  FLA_BR );
130 
131  FLA_Cont_with_3x3_to_2x2( &CTL, /**/ &CTR, C00, /**/ C01, C02,
132  C10, /**/ C11, C12,
133  /* ************** */ /* ****************** */
134  &CBL, /**/ &CBR, C20, /**/ C21, C22,
135  FLA_TR );
136 
137  }
138 
139  return FLA_SUCCESS;
140 }

References FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Sylv_internal().

Referenced by FLA_Sylv_hh().

◆ FLA_Sylv_hh_opc_var1()

FLA_Error FLA_Sylv_hh_opc_var1 ( float  sgn,
int  m_C,
int  n_C,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_B,
int  rs_B,
int  cs_B,
scomplex buff_C,
int  rs_C,
int  cs_C,
scomplex buff_scale,
int *  info 
)
262 {
263  int l, k;
264 
265  for ( l = n_C - 1; l >= 0; l-- )
266  {
267  for ( k = 0; k < m_C; k++ )
268  {
269  scomplex* a01 = buff_A + (k )*cs_A + (0 )*rs_A;
270  scomplex* b12t = buff_B + (l+1)*cs_B + (l )*rs_B;
271  scomplex* c01 = buff_C + (l )*cs_C + (0 )*rs_C;
272  scomplex* c12t = buff_C + (l+1)*cs_C + (k )*rs_C;
273  scomplex* alpha11 = buff_A + (k )*cs_A + (k )*rs_A;
274  scomplex* beta11 = buff_B + (l )*cs_B + (l )*rs_B;
275  scomplex* ckl = buff_C + (l )*cs_C + (k )*rs_C;
276  scomplex suml;
277  scomplex sumr;
278  scomplex vec;
279  scomplex a11;
280  scomplex x11;
281 
282  int m_behind = k;
283  int n_behind = n_C - l - 1;
284 
285  /*------------------------------------------------------------*/
286 
288  m_behind,
289  a01, rs_A,
290  c01, rs_C,
291  &suml );
292 
294  n_behind,
295  c12t, cs_C,
296  b12t, cs_B,
297  &sumr );
298 
299  vec.real = ckl->real - ( suml.real + sgn * sumr.real );
300  vec.imag = ckl->imag - ( suml.imag + sgn * -sumr.imag );
301 
302  a11.real = alpha11->real + sgn * beta11->real;
303  a11.imag = -alpha11->imag + sgn * -beta11->imag;
304 
305  bl1_cdiv3( &vec, &a11, &x11 );
306 
307  *ckl = x11;
308 
309  /*------------------------------------------------------------*/
310 
311  }
312  }
313 
314  return FLA_SUCCESS;
315 }
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition: bl1_dot.c:39
@ BLIS1_CONJUGATE
Definition: blis_type_defs.h:82
Definition: blis_type_defs.h:133
float imag
Definition: blis_type_defs.h:134
float real
Definition: blis_type_defs.h:134

References bl1_cdot(), BLIS1_CONJUGATE, scomplex::imag, and scomplex::real.

Referenced by FLA_Sylv_hh_opt_var1().

◆ FLA_Sylv_hh_opd_var1()

FLA_Error FLA_Sylv_hh_opd_var1 ( double  sgn,
int  m_C,
int  n_C,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_B,
int  rs_B,
int  cs_B,
double *  buff_C,
int  rs_C,
int  cs_C,
double *  buff_scale,
int *  info 
)
199 {
200  int l, k;
201 
202  for ( l = n_C - 1; l >= 0; l-- )
203  {
204  for ( k = 0; k < m_C; k++ )
205  {
206  double* a01 = buff_A + (k )*cs_A + (0 )*rs_A;
207  double* b12t = buff_B + (l+1)*cs_B + (l )*rs_B;
208  double* c01 = buff_C + (l )*cs_C + (0 )*rs_C;
209  double* c12t = buff_C + (l+1)*cs_C + (k )*rs_C;
210  double* alpha11 = buff_A + (k )*cs_A + (k )*rs_A;
211  double* beta11 = buff_B + (l )*cs_B + (l )*rs_B;
212  double* ckl = buff_C + (l )*cs_C + (k )*rs_C;
213  double suml;
214  double sumr;
215  double vec;
216  double a11;
217  double x11;
218 
219  int m_behind = k;
220  int n_behind = n_C - l - 1;
221 
222  /*------------------------------------------------------------*/
223 
225  m_behind,
226  a01, rs_A,
227  c01, rs_C,
228  &suml );
229 
231  n_behind,
232  c12t, cs_C,
233  b12t, cs_B,
234  &sumr );
235 
236  vec = (*ckl) - ( suml + sgn * sumr );
237 
238  a11 = (*alpha11) + sgn * (*beta11);
239 
240  bl1_ddiv3( &vec, &a11, &x11 );
241 
242  *ckl = x11;
243 
244  /*------------------------------------------------------------*/
245 
246  }
247  }
248 
249  return FLA_SUCCESS;
250 }
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition: bl1_dot.c:26

References bl1_ddot(), and BLIS1_CONJUGATE.

Referenced by FLA_Sylv_hh_opt_var1().

◆ FLA_Sylv_hh_ops_var1()

FLA_Error FLA_Sylv_hh_ops_var1 ( float  sgn,
int  m_C,
int  n_C,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_B,
int  rs_B,
int  cs_B,
float *  buff_C,
int  rs_C,
int  cs_C,
float *  buff_scale,
int *  info 
)
136 {
137  int l, k;
138 
139  for ( l = n_C - 1; l >= 0; l-- )
140  {
141  for ( k = 0; k < m_C; k++ )
142  {
143  float* a01 = buff_A + (k )*cs_A + (0 )*rs_A;
144  float* b12t = buff_B + (l+1)*cs_B + (l )*rs_B;
145  float* c01 = buff_C + (l )*cs_C + (0 )*rs_C;
146  float* c12t = buff_C + (l+1)*cs_C + (k )*rs_C;
147  float* alpha11 = buff_A + (k )*cs_A + (k )*rs_A;
148  float* beta11 = buff_B + (l )*cs_B + (l )*rs_B;
149  float* ckl = buff_C + (l )*cs_C + (k )*rs_C;
150  float suml;
151  float sumr;
152  float vec;
153  float a11;
154  float x11;
155 
156  int m_behind = k;
157  int n_behind = n_C - l - 1;
158 
159  /*------------------------------------------------------------*/
160 
162  m_behind,
163  a01, rs_A,
164  c01, rs_C,
165  &suml );
166 
168  n_behind,
169  c12t, cs_C,
170  b12t, cs_B,
171  &sumr );
172 
173  vec = (*ckl) - ( suml + sgn * sumr );
174 
175  a11 = (*alpha11) + sgn * (*beta11);
176 
177  bl1_sdiv3( &vec, &a11, &x11 );
178 
179  *ckl = x11;
180 
181  /*------------------------------------------------------------*/
182 
183  }
184  }
185 
186  return FLA_SUCCESS;
187 }
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition: bl1_dot.c:13

References bl1_sdot(), and BLIS1_CONJUGATE.

Referenced by FLA_Sylv_hh_opt_var1().

◆ FLA_Sylv_hh_opt_var1()

FLA_Error FLA_Sylv_hh_opt_var1 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale 
)
14 {
15  FLA_Datatype datatype;
16  int m_C, n_C;
17  int rs_A, cs_A;
18  int rs_B, cs_B;
19  int rs_C, cs_C;
20  int info;
21 
22  datatype = FLA_Obj_datatype( A );
23 
24  rs_A = FLA_Obj_row_stride( A );
25  cs_A = FLA_Obj_col_stride( A );
26 
27  rs_B = FLA_Obj_row_stride( B );
28  cs_B = FLA_Obj_col_stride( B );
29 
30  m_C = FLA_Obj_length( C );
31  n_C = FLA_Obj_width( C );
32  rs_C = FLA_Obj_row_stride( C );
33  cs_C = FLA_Obj_col_stride( C );
34 
35 
36  switch ( datatype )
37  {
38  case FLA_FLOAT:
39  {
40  int* buff_isgn = FLA_INT_PTR( isgn );
41  float* buff_A = FLA_FLOAT_PTR( A );
42  float* buff_B = FLA_FLOAT_PTR( B );
43  float* buff_C = FLA_FLOAT_PTR( C );
44  float* buff_scale = FLA_FLOAT_PTR( scale );
45  float sgn = ( float ) *buff_isgn;
46 
48  m_C,
49  n_C,
50  buff_A, rs_A, cs_A,
51  buff_B, rs_B, cs_B,
52  buff_C, rs_C, cs_C,
53  buff_scale,
54  &info );
55 
56  break;
57  }
58 
59  case FLA_DOUBLE:
60  {
61  int* buff_isgn = FLA_INT_PTR( isgn );
62  double* buff_A = FLA_DOUBLE_PTR( A );
63  double* buff_B = FLA_DOUBLE_PTR( B );
64  double* buff_C = FLA_DOUBLE_PTR( C );
65  double* buff_scale = FLA_DOUBLE_PTR( scale );
66  double sgn = ( double ) *buff_isgn;
67 
69  m_C,
70  n_C,
71  buff_A, rs_A, cs_A,
72  buff_B, rs_B, cs_B,
73  buff_C, rs_C, cs_C,
74  buff_scale,
75  &info );
76 
77  break;
78  }
79 
80  case FLA_COMPLEX:
81  {
82  int* buff_isgn = FLA_INT_PTR( isgn );
83  scomplex* buff_A = FLA_COMPLEX_PTR( A );
84  scomplex* buff_B = FLA_COMPLEX_PTR( B );
85  scomplex* buff_C = FLA_COMPLEX_PTR( C );
86  scomplex* buff_scale = FLA_COMPLEX_PTR( scale );
87  float sgn = ( float ) *buff_isgn;
88 
90  m_C,
91  n_C,
92  buff_A, rs_A, cs_A,
93  buff_B, rs_B, cs_B,
94  buff_C, rs_C, cs_C,
95  buff_scale,
96  &info );
97 
98  break;
99  }
100 
101  case FLA_DOUBLE_COMPLEX:
102  {
103  int* buff_isgn = FLA_INT_PTR( isgn );
104  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
105  dcomplex* buff_B = FLA_DOUBLE_COMPLEX_PTR( B );
106  dcomplex* buff_C = FLA_DOUBLE_COMPLEX_PTR( C );
107  dcomplex* buff_scale = FLA_DOUBLE_COMPLEX_PTR( scale );
108  double sgn = ( double ) *buff_isgn;
109 
111  m_C,
112  n_C,
113  buff_A, rs_A, cs_A,
114  buff_B, rs_B, cs_B,
115  buff_C, rs_C, cs_C,
116  buff_scale,
117  &info );
118 
119  break;
120  }
121  }
122 
123  return FLA_SUCCESS;
124 }
FLA_Error FLA_Sylv_hh_opz_var1(double sgn, int m_C, int n_C, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_B, int rs_B, int cs_B, dcomplex *buff_C, int rs_C, int cs_C, dcomplex *buff_scale, int *info)
Definition: FLA_Sylv_hh_opt_var1.c:319
FLA_Error FLA_Sylv_hh_opd_var1(double sgn, int m_C, int n_C, double *buff_A, int rs_A, int cs_A, double *buff_B, int rs_B, int cs_B, double *buff_C, int rs_C, int cs_C, double *buff_scale, int *info)
Definition: FLA_Sylv_hh_opt_var1.c:191
FLA_Error FLA_Sylv_hh_opc_var1(float sgn, int m_C, int n_C, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_B, int rs_B, int cs_B, scomplex *buff_C, int rs_C, int cs_C, scomplex *buff_scale, int *info)
Definition: FLA_Sylv_hh_opt_var1.c:254
FLA_Error FLA_Sylv_hh_ops_var1(float sgn, int m_C, int n_C, float *buff_A, int rs_A, int cs_A, float *buff_B, int rs_B, int cs_B, float *buff_C, int rs_C, int cs_C, float *buff_scale, int *info)
Definition: FLA_Sylv_hh_opt_var1.c:128
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
int FLA_Datatype
Definition: FLA_type_defs.h:49
Definition: blis_type_defs.h:138

References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_width(), FLA_Sylv_hh_opc_var1(), FLA_Sylv_hh_opd_var1(), FLA_Sylv_hh_ops_var1(), and FLA_Sylv_hh_opz_var1().

Referenced by FLA_Sylv_hh(), FLA_Sylv_hh_opt_var10(), FLA_Sylv_hh_opt_var11(), FLA_Sylv_hh_opt_var12(), FLA_Sylv_hh_opt_var13(), FLA_Sylv_hh_opt_var14(), FLA_Sylv_hh_opt_var15(), FLA_Sylv_hh_opt_var16(), FLA_Sylv_hh_opt_var17(), FLA_Sylv_hh_opt_var18(), FLA_Sylv_hh_opt_var2(), FLA_Sylv_hh_opt_var3(), FLA_Sylv_hh_opt_var4(), FLA_Sylv_hh_opt_var5(), FLA_Sylv_hh_opt_var6(), FLA_Sylv_hh_opt_var7(), FLA_Sylv_hh_opt_var8(), and FLA_Sylv_hh_opt_var9().

◆ FLA_Sylv_hh_opt_var10()

FLA_Error FLA_Sylv_hh_opt_var10 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale 
)
16 {
17  return FLA_Sylv_hh_opt_var1( isgn, A, B, C, scale );
18 }
FLA_Error FLA_Sylv_hh_opt_var1(FLA_Obj isgn, FLA_Obj A, FLA_Obj B, FLA_Obj C, FLA_Obj scale)
Definition: FLA_Sylv_hh_opt_var1.c:13

References FLA_Sylv_hh_opt_var1().

◆ FLA_Sylv_hh_opt_var11()

FLA_Error FLA_Sylv_hh_opt_var11 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale 
)
16 {
17  return FLA_Sylv_hh_opt_var1( isgn, A, B, C, scale );
18 }

References FLA_Sylv_hh_opt_var1().

◆ FLA_Sylv_hh_opt_var12()

FLA_Error FLA_Sylv_hh_opt_var12 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale 
)
16 {
17  return FLA_Sylv_hh_opt_var1( isgn, A, B, C, scale );
18 }

References FLA_Sylv_hh_opt_var1().

◆ FLA_Sylv_hh_opt_var13()

FLA_Error FLA_Sylv_hh_opt_var13 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale 
)
16 {
17  return FLA_Sylv_hh_opt_var1( isgn, A, B, C, scale );
18 }

References FLA_Sylv_hh_opt_var1().

◆ FLA_Sylv_hh_opt_var14()

FLA_Error FLA_Sylv_hh_opt_var14 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale 
)
16 {
17  return FLA_Sylv_hh_opt_var1( isgn, A, B, C, scale );
18 }

References FLA_Sylv_hh_opt_var1().

◆ FLA_Sylv_hh_opt_var15()

FLA_Error FLA_Sylv_hh_opt_var15 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale 
)
16 {
17  return FLA_Sylv_hh_opt_var1( isgn, A, B, C, scale );
18 }

References FLA_Sylv_hh_opt_var1().

◆ FLA_Sylv_hh_opt_var16()

FLA_Error FLA_Sylv_hh_opt_var16 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale 
)
16 {
17  return FLA_Sylv_hh_opt_var1( isgn, A, B, C, scale );
18 }

References FLA_Sylv_hh_opt_var1().

◆ FLA_Sylv_hh_opt_var17()

FLA_Error FLA_Sylv_hh_opt_var17 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale 
)
16 {
17  return FLA_Sylv_hh_opt_var1( isgn, A, B, C, scale );
18 }

References FLA_Sylv_hh_opt_var1().

◆ FLA_Sylv_hh_opt_var18()

FLA_Error FLA_Sylv_hh_opt_var18 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale 
)
16 {
17  return FLA_Sylv_hh_opt_var1( isgn, A, B, C, scale );
18 }

References FLA_Sylv_hh_opt_var1().

◆ FLA_Sylv_hh_opt_var2()

FLA_Error FLA_Sylv_hh_opt_var2 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale 
)
16 {
17  return FLA_Sylv_hh_opt_var1( isgn, A, B, C, scale );
18 }

References FLA_Sylv_hh_opt_var1().

◆ FLA_Sylv_hh_opt_var3()

FLA_Error FLA_Sylv_hh_opt_var3 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale 
)
16 {
17  return FLA_Sylv_hh_opt_var1( isgn, A, B, C, scale );
18 }

References FLA_Sylv_hh_opt_var1().

◆ FLA_Sylv_hh_opt_var4()

FLA_Error FLA_Sylv_hh_opt_var4 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale 
)
16 {
17  return FLA_Sylv_hh_opt_var1( isgn, A, B, C, scale );
18 }

References FLA_Sylv_hh_opt_var1().

◆ FLA_Sylv_hh_opt_var5()

FLA_Error FLA_Sylv_hh_opt_var5 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale 
)
16 {
17  return FLA_Sylv_hh_opt_var1( isgn, A, B, C, scale );
18 }

References FLA_Sylv_hh_opt_var1().

◆ FLA_Sylv_hh_opt_var6()

FLA_Error FLA_Sylv_hh_opt_var6 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale 
)
16 {
17  return FLA_Sylv_hh_opt_var1( isgn, A, B, C, scale );
18 }

References FLA_Sylv_hh_opt_var1().

◆ FLA_Sylv_hh_opt_var7()

FLA_Error FLA_Sylv_hh_opt_var7 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale 
)
16 {
17  return FLA_Sylv_hh_opt_var1( isgn, A, B, C, scale );
18 }

References FLA_Sylv_hh_opt_var1().

◆ FLA_Sylv_hh_opt_var8()

FLA_Error FLA_Sylv_hh_opt_var8 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale 
)
16 {
17  return FLA_Sylv_hh_opt_var1( isgn, A, B, C, scale );
18 }

References FLA_Sylv_hh_opt_var1().

◆ FLA_Sylv_hh_opt_var9()

FLA_Error FLA_Sylv_hh_opt_var9 ( FLA_Obj  isgn,
FLA_Obj  A,
FLA_Obj  B,
FLA_Obj  C,
FLA_Obj  scale 
)
16 {
17  return FLA_Sylv_hh_opt_var1( isgn, A, B, C, scale );
18 }

References FLA_Sylv_hh_opt_var1().

◆ FLA_Sylv_hh_opz_var1()

FLA_Error FLA_Sylv_hh_opz_var1 ( double  sgn,
int  m_C,
int  n_C,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_B,
int  rs_B,
int  cs_B,
dcomplex buff_C,
int  rs_C,
int  cs_C,
dcomplex buff_scale,
int *  info 
)
327 {
328  int l, k;
329 
330  for ( l = n_C - 1; l >= 0; l-- )
331  {
332  for ( k = 0; k < m_C; k++ )
333  {
334  dcomplex* a01 = buff_A + (k )*cs_A + (0 )*rs_A;
335  dcomplex* b12t = buff_B + (l+1)*cs_B + (l )*rs_B;
336  dcomplex* c01 = buff_C + (l )*cs_C + (0 )*rs_C;
337  dcomplex* c12t = buff_C + (l+1)*cs_C + (k )*rs_C;
338  dcomplex* alpha11 = buff_A + (k )*cs_A + (k )*rs_A;
339  dcomplex* beta11 = buff_B + (l )*cs_B + (l )*rs_B;
340  dcomplex* ckl = buff_C + (l )*cs_C + (k )*rs_C;
341  dcomplex suml;
342  dcomplex sumr;
343  dcomplex vec;
344  dcomplex a11;
345  dcomplex x11;
346 
347  int m_behind = k;
348  int n_behind = n_C - l - 1;
349 
350  /*------------------------------------------------------------*/
351 
353  m_behind,
354  a01, rs_A,
355  c01, rs_C,
356  &suml );
357 
359  n_behind,
360  c12t, cs_C,
361  b12t, cs_B,
362  &sumr );
363 
364  vec.real = ckl->real - ( suml.real + sgn * sumr.real );
365  vec.imag = ckl->imag - ( suml.imag + sgn * -sumr.imag );
366 
367  a11.real = alpha11->real + sgn * beta11->real;
368  a11.imag = -alpha11->imag + sgn * -beta11->imag;
369 
370  bl1_zdiv3( &vec, &a11, &x11 );
371 
372  *ckl = x11;
373 
374  /*------------------------------------------------------------*/
375 
376  }
377  }
378 
379  return FLA_SUCCESS;
380 }
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition: bl1_dot.c:65
double real
Definition: blis_type_defs.h:139
double imag
Definition: blis_type_defs.h:139

References bl1_zdot(), BLIS1_CONJUGATE, dcomplex::imag, and dcomplex::real.

Referenced by FLA_Sylv_hh_opt_var1().