libflame  revision_anchor
Functions
FLA_LQ_UT_vars.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLA_LQ_UT_unb_var1 (FLA_Obj A, FLA_Obj t)
 
FLA_Error FLA_LQ_UT_blk_var1 (FLA_Obj A, FLA_Obj T, fla_lqut_t *cntl)
 
FLA_Error FLA_LQ_UT_opt_var1 (FLA_Obj A, FLA_Obj t)
 
FLA_Error FLA_LQ_UT_ops_var1 (int m_A, int n_A, float *A, int rs_A, int cs_A, float *t, int inc_t)
 
FLA_Error FLA_LQ_UT_opd_var1 (int m_A, int n_A, double *A, int rs_A, int cs_A, double *t, int inc_t)
 
FLA_Error FLA_LQ_UT_opc_var1 (int m_A, int n_A, scomplex *A, int rs_A, int cs_A, scomplex *t, int inc_t)
 
FLA_Error FLA_LQ_UT_opz_var1 (int m_A, int n_A, dcomplex *A, int rs_A, int cs_A, dcomplex *t, int inc_t)
 
FLA_Error FLA_LQ_UT_unb_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_LQ_UT_blk_var2 (FLA_Obj A, FLA_Obj T, fla_lqut_t *cntl)
 
FLA_Error FLA_LQ_UT_opt_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_LQ_UT_ops_var2 (int m_A, int n_A, float *A, int rs_A, int cs_A, float *T, int rs_T, int cs_T)
 
FLA_Error FLA_LQ_UT_opd_var2 (int m_A, int n_A, double *A, int rs_A, int cs_A, double *T, int rs_T, int cs_T)
 
FLA_Error FLA_LQ_UT_opc_var2 (int m_A, int n_A, scomplex *A, int rs_A, int cs_A, scomplex *T, int rs_T, int cs_T)
 
FLA_Error FLA_LQ_UT_opz_var2 (int m_A, int n_A, dcomplex *A, int rs_A, int cs_A, dcomplex *T, int rs_T, int cs_T)
 
FLA_Error FLA_LQ_UT_blk_var3 (FLA_Obj A, FLA_Obj T, fla_lqut_t *cntl)
 

Function Documentation

◆ FLA_LQ_UT_blk_var1()

FLA_Error FLA_LQ_UT_blk_var1 ( FLA_Obj  A,
FLA_Obj  T,
fla_lqut_t cntl 
)
14 {
15  FLA_Obj ATL, ATR, A00, A01, A02,
16  ABL, ABR, A10, A11, A12,
17  A20, A21, A22;
18 
19  FLA_Obj TL, TR, T0, T1, W12;
20 
21  FLA_Obj T1T, T2B;
22 
23  FLA_Obj AR1, AR2;
24 
25  dim_t b_alg, b;
26 
27  // Query the algorithmic blocksize by inspecting the length of T.
28  b_alg = FLA_Obj_length( T );
29 
30  FLA_Part_2x2( A, &ATL, &ATR,
31  &ABL, &ABR, 0, 0, FLA_TL );
32 
33  FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
34 
35  while ( FLA_Obj_min_dim( ABR ) > 0 ){
36 
37  b = min( b_alg, FLA_Obj_min_dim( ABR ) );
38 
39  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
40  /* ************* */ /* ******************** */
41  &A10, /**/ &A11, &A12,
42  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
43  b, b, FLA_BR );
44 
45  FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &W12,
46  b, FLA_RIGHT );
47 
48  /*------------------------------------------------------------*/
49 
50  FLA_Part_2x1( T1, &T1T,
51  &T2B, b, FLA_TOP );
52 
53  FLA_Merge_1x2( A11, A12, &AR1 );
54 
55  // Perform an LQ factorization via the UT transform on AR1:
56  //
57  // ( A11 A12 ) -> L11 QR1
58  //
59  // where:
60  // - QR1 is formed from UR1 (which is stored row-wise above the
61  // diagonal of AR1) and T11 (which is stored to the upper triangle
62  // of T11).
63  // - L11 is stored to the lower triangle of AR1.
64 
65  FLA_LQ_UT_internal( AR1, T1T,
66  FLA_Cntl_sub_lqut( cntl ));
67 
68 
69  if ( FLA_Obj_length( A21 ) > 0 )
70  {
71  FLA_Merge_1x2( A21, A22, &AR2 );
72 
73  // Apply the Householder transforms associated with UR1 and T11 to
74  // AR2:
75  //
76  // ( A21 A22 ) := ( A21 A22 ) Q1
77  //
78  // where QR1 is formed from UR1 and T11.
79 
80  FLA_Apply_Q_UT_internal( FLA_RIGHT, FLA_NO_TRANSPOSE, FLA_FORWARD, FLA_ROWWISE,
81  AR1, T1T, W12, AR2,
82  FLA_Cntl_sub_apqut( cntl ) );
83  }
84 
85  /*------------------------------------------------------------*/
86 
87  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
88  A10, A11, /**/ A12,
89  /* ************** */ /* ****************** */
90  &ABL, /**/ &ABR, A20, A21, /**/ A22,
91  FLA_TL );
92 
93  FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ W12,
94  FLA_LEFT );
95  }
96 
97  return FLA_SUCCESS;
98 }
FLA_Error FLA_Apply_Q_UT_internal(FLA_Side side, FLA_Trans trans, FLA_Direct direct, FLA_Store storev, FLA_Obj A, FLA_Obj T, FLA_Obj W, FLA_Obj B, fla_apqut_t *cntl)
Definition: FLA_Apply_Q_UT_internal.c:17
FLA_Error FLA_LQ_UT_internal(FLA_Obj A, FLA_Obj T, fla_lqut_t *cntl)
Definition: FLA_LQ_UT_internal.c:17
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition: FLA_View.c:304
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:17
FLA_Error FLA_Cont_with_1x3_to_1x2(FLA_Obj *AL, FLA_Obj *AR, FLA_Obj A0, FLA_Obj A1, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:475
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:110
FLA_Error FLA_Part_2x1(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:76
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:142
FLA_Error FLA_Merge_1x2(FLA_Obj AL, FLA_Obj AR, FLA_Obj *A)
Definition: FLA_View.c:562
FLA_Error FLA_Repart_1x2_to_1x3(FLA_Obj AL, FLA_Obj AR, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:267
dim_t FLA_Obj_min_dim(FLA_Obj obj)
Definition: FLA_Query.c:153
unsigned long dim_t
Definition: FLA_type_defs.h:71
Definition: FLA_type_defs.h:159

References FLA_Apply_Q_UT_internal(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_LQ_UT_internal(), FLA_Merge_1x2(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), and FLA_Repart_2x2_to_3x3().

Referenced by FLA_LQ_UT_internal().

◆ FLA_LQ_UT_blk_var2()

FLA_Error FLA_LQ_UT_blk_var2 ( FLA_Obj  A,
FLA_Obj  T,
fla_lqut_t cntl 
)
14 {
15  FLA_Obj ATL, ATR, A00, A01, A02,
16  ABL, ABR, A10, A11, A12,
17  A20, A21, A22;
18 
19  FLA_Obj TTL, TTR, T00, T01, T02,
20  TBL, TBR, T10, T11, W12,
21  T20, T21, T22;
22 
23  FLA_Obj AR1,
24  AR2;
25 
26  dim_t b;
27 
28  FLA_Part_2x2( A, &ATL, &ATR,
29  &ABL, &ABR, 0, 0, FLA_TL );
30 
31  FLA_Part_2x2( T, &TTL, &TTR,
32  &TBL, &TBR, 0, 0, FLA_TL );
33 
34  while ( FLA_Obj_min_dim( ABR ) > 0 ){
35 
36  b = FLA_Determine_blocksize( ABR, FLA_BR, FLA_Cntl_blocksize( cntl ) );
37 
38  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
39  /* ************* */ /* ******************** */
40  &A10, /**/ &A11, &A12,
41  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
42  b, b, FLA_BR );
43 
44  FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &T01, &T02,
45  /* ************* */ /* ******************** */
46  &T10, /**/ &T11, &W12,
47  TBL, /**/ TBR, &T20, /**/ &T21, &T22,
48  b, b, FLA_BR );
49 
50  /*------------------------------------------------------------*/
51 
52  FLA_Merge_1x2( A11, A12, &AR1 );
53 
54  // Perform an LQ factorization via the UT transform on AR1:
55  //
56  // ( A11 A12 ) -> L11 QR1
57  //
58  // where:
59  // - QR1 is formed from UR1 (which is stored row-wise above the
60  // diagonal of AR1) and T11 (which is stored to the upper triangle
61  // of T11).
62  // - L11 is stored to the lower triangle of AR1.
63 
64  FLA_LQ_UT_internal( AR1, T11,
65  FLA_Cntl_sub_lqut( cntl ) );
66 
67 
68  if ( FLA_Obj_length( A21 ) > 0 )
69  {
70  FLA_Merge_1x2( A21, A22, &AR2 );
71 
72  // Apply the Householder transforms associated with UR1 and T11 to
73  // AR2:
74  //
75  // ( A21 A22 ) := ( A21 A22 ) Q1
76  //
77  // where QR1 is formed from UR1 and T11.
78 
79  FLA_Apply_Q_UT_internal( FLA_RIGHT, FLA_NO_TRANSPOSE, FLA_FORWARD, FLA_ROWWISE,
80  AR1, T11, W12, AR2,
81  FLA_Cntl_sub_apqut( cntl ) );
82  }
83 
84 
85  // Update T
86  //
87  // T01 = conj(A01) * U11^T + conj(A02) * U12^T;
88  //
89  // Recall: U11 = triuu( A11 );
90  // U12 = A12;
91 
92  FLA_Copyt_external( FLA_CONJ_NO_TRANSPOSE, A01, T01 );
93  FLA_Trmm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR,
94  FLA_TRANSPOSE, FLA_UNIT_DIAG,
95  FLA_ONE, A11, T01 );
96  FLA_Gemm_external( FLA_CONJ_NO_TRANSPOSE, FLA_TRANSPOSE,
97  FLA_ONE, A02, A12, FLA_ONE, T01 );
98 
99  /*------------------------------------------------------------*/
100 
101  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
102  A10, A11, /**/ A12,
103  /* ************** */ /* ****************** */
104  &ABL, /**/ &ABR, A20, A21, /**/ A22,
105  FLA_TL );
106 
107  FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, T01, /**/ T02,
108  T10, T11, /**/ W12,
109  /* ************** */ /* ****************** */
110  &TBL, /**/ &TBR, T20, T21, /**/ T22,
111  FLA_TL );
112  }
113 
114  return FLA_SUCCESS;
115 }
FLA_Error FLA_Copyt_external(FLA_Trans trans, FLA_Obj A, FLA_Obj B)
Definition: FLA_Copyt_external.c:13
FLA_Error FLA_Gemm_external(FLA_Trans transa, FLA_Trans transb, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C)
Definition: FLA_Gemm_external.c:13
FLA_Error FLA_Trmm_external(FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj B)
Definition: FLA_Trmm_external.c:13
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
dim_t FLA_Determine_blocksize(FLA_Obj A_unproc, FLA_Quadrant to_dir, fla_blocksize_t *cntl_blocksizes)
Definition: FLA_Blocksize.c:234

References FLA_Apply_Q_UT_internal(), FLA_Cont_with_3x3_to_2x2(), FLA_Copyt_external(), FLA_Determine_blocksize(), FLA_Gemm_external(), FLA_LQ_UT_internal(), FLA_Merge_1x2(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_ONE, FLA_Part_2x2(), FLA_Repart_2x2_to_3x3(), and FLA_Trmm_external().

Referenced by FLA_LQ_UT_internal().

◆ FLA_LQ_UT_blk_var3()

FLA_Error FLA_LQ_UT_blk_var3 ( FLA_Obj  A,
FLA_Obj  T,
fla_lqut_t cntl 
)
14 {
15  FLA_Obj ATL, ATR, A00, A01, A02,
16  ABL, ABR, A10, A11, A12,
17  A20, A21, A22;
18 
19  FLA_Obj TWTL, TWTR, TW00, TW01, TW02,
20  TWBL, TWBR, TW10, T11, W12,
21  TW20, TW21, TW22;
22 
23  FLA_Obj AR1,
24  AR2;
25 
26  dim_t b;
27 
28  FLA_Part_2x2( A, &ATL, &ATR,
29  &ABL, &ABR, 0, 0, FLA_TL );
30 
31  FLA_Part_2x2( TW, &TWTL, &TWTR,
32  &TWBL, &TWBR, 0, 0, FLA_TL );
33 
34  while ( FLA_Obj_min_dim( ABR ) > 0 ){
35 
36  b = FLA_Determine_blocksize( ABR, FLA_BR, FLA_Cntl_blocksize( cntl ) );
37 
38  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
39  /* ************* */ /* ******************** */
40  &A10, /**/ &A11, &A12,
41  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
42  b, b, FLA_BR );
43 
44  FLA_Repart_2x2_to_3x3( TWTL, /**/ TWTR, &TW00, /**/ &TW01, &TW02,
45  /* ************* */ /* ******************** */
46  &TW10, /**/ &T11, &W12,
47  TWBL, /**/ TWBR, &TW20, /**/ &TW21, &TW22,
48  b, b, FLA_BR );
49 
50  /*------------------------------------------------------------*/
51 
52  FLA_Merge_1x2( A11, A12, &AR1 );
53 
54  // Perform an LQ factorization via the UT transform on AR1:
55  //
56  // ( A11 A12 ) -> L11 QR1
57  //
58  // where:
59  // - QR1 is formed from UR1 (which is stored row-wise above the
60  // diagonal of AR1) and T11 (which is stored to the upper triangle
61  // of T11).
62  // - L11 is stored to the lower triangle of AR1.
63 
64  FLA_LQ_UT_internal( AR1, T11,
65  FLA_Cntl_sub_lqut( cntl ) );
66 
67 
68  if ( FLA_Obj_length( A21 ) > 0 )
69  {
70  FLA_Merge_1x2( A21, A22, &AR2 );
71 
72  // Apply the Householder transforms associated with UR1 and T11 to
73  // AR2:
74  //
75  // ( A21 A22 ) := ( A21 A22 ) Q1
76  //
77  // where QR1 is formed from UR1 and T11.
78 
79  FLA_Apply_Q_UT_internal( FLA_RIGHT, FLA_NO_TRANSPOSE, FLA_FORWARD, FLA_ROWWISE,
80  AR1, T11, W12, AR2,
81  FLA_Cntl_sub_apqut( cntl ) );
82  }
83 
84  /*------------------------------------------------------------*/
85 
86  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
87  A10, A11, /**/ A12,
88  /* ************** */ /* ****************** */
89  &ABL, /**/ &ABR, A20, A21, /**/ A22,
90  FLA_TL );
91 
92  FLA_Cont_with_3x3_to_2x2( &TWTL, /**/ &TWTR, TW00, TW01, /**/ TW02,
93  TW10, T11, /**/ W12,
94  /* ************** */ /* ****************** */
95  &TWBL, /**/ &TWBR, TW20, TW21, /**/ TW22,
96  FLA_TL );
97  }
98 
99  return FLA_SUCCESS;
100 }

References FLA_Apply_Q_UT_internal(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_LQ_UT_internal(), FLA_Merge_1x2(), FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Part_2x2(), and FLA_Repart_2x2_to_3x3().

Referenced by FLA_LQ_UT_internal().

◆ FLA_LQ_UT_opc_var1()

FLA_Error FLA_LQ_UT_opc_var1 ( int  m_A,
int  n_A,
scomplex A,
int  rs_A,
int  cs_A,
scomplex t,
int  inc_t 
)
186 {
187  int min_m_n = min( m_A, n_A );
188  int i;
189 
190  for ( i = 0; i < min_m_n; ++i )
191  {
192  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
193  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
194  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
195  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
196 
197  scomplex* tau1 = buff_t + (i )*inc_t;
198 
199  int m_ahead = m_A - i - 1;
200  int n_ahead = n_A - i - 1;
201 
202  /*------------------------------------------------------------*/
203 
204  // FLA_Househ2_UT( FLA_RIGHT, alpha11, a12t
205  // tau1 );
206  FLA_Househ2_UT_r_opc( n_ahead,
207  alpha11,
208  a12t, cs_A,
209  tau1 );
210 
211  // FLA_Apply_H2_UT( FLA_RIGHT, tau1, a12t, a21, A22 );
213  n_ahead,
214  tau1,
215  a12t, cs_A,
216  a21, rs_A,
217  A22, rs_A, cs_A );
218 
219  /*------------------------------------------------------------*/
220 
221  }
222 
223  return FLA_SUCCESS;
224 }
FLA_Error FLA_Apply_H2_UT_r_opc_var1(int n_u2h_A2, int m_a1, scomplex *tau, scomplex *u2h, int inc_u2h, scomplex *a1, int inc_a1, scomplex *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_r_opt_var1.c:254
FLA_Error FLA_Househ2_UT_r_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition: FLA_Househ2_UT.c:677
int i
Definition: bl1_axmyv2.c:145
Definition: blis_type_defs.h:133

References FLA_Apply_H2_UT_r_opc_var1(), FLA_Househ2_UT_r_opc(), and i.

Referenced by FLA_LQ_UT_opt_var1().

◆ FLA_LQ_UT_opc_var2()

FLA_Error FLA_LQ_UT_opc_var2 ( int  m_A,
int  n_A,
scomplex A,
int  rs_A,
int  cs_A,
scomplex T,
int  rs_T,
int  cs_T 
)
233 {
234  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
235  int min_m_n = min( m_A, n_A );
236  int i;
237 
238  for ( i = 0; i < min_m_n; ++i )
239  {
240  scomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
241  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
242  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
243  scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
244  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
245  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
246 
247  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
248  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
249 
250  int m_ahead = m_A - i - 1;
251  int n_ahead = n_A - i - 1;
252  int m_behind = i;
253 
254  /*------------------------------------------------------------*/
255 
256  // FLA_Househ2_UT( FLA_RIGHT, alpha11, a12t
257  // tau11 );
258  FLA_Househ2_UT_r_opc( n_ahead,
259  alpha11,
260  a12t, cs_A,
261  tau11 );
262 
263  // FLA_Apply_H2_UT( FLA_RIGHT, tau11, a12t, a21, A22 );
265  n_ahead,
266  tau11,
267  a12t, cs_A,
268  a21, rs_A,
269  A22, rs_A, cs_A );
270 
271  // FLA_Copyt_external( FLA_CONJ_NO_TRANSPOSE, a01, t01 );
273  m_behind,
274  a01, rs_A,
275  t01, rs_T );
276 
277  // FLA_Gemvc_external( FLA_CONJ_NO_TRANSPOSE, FLA_NO_CONJUGATE,
278  // FLA_ONE, A02, a12t, FLA_ONE, t01 );
281  m_behind,
282  n_ahead,
283  buff_1,
284  A02, rs_A, cs_A,
285  a12t, cs_A,
286  buff_1,
287  t01, rs_T );
288 
289  /*------------------------------------------------------------*/
290 
291  }
292 
293  return FLA_SUCCESS;
294 }
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_copyv.c:49
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_gemv.c:125
@ BLIS1_CONJ_NO_TRANSPOSE
Definition: blis_type_defs.h:56
@ BLIS1_CONJUGATE
Definition: blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition: blis_type_defs.h:81

References bl1_ccopyv(), bl1_cgemv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_r_opc_var1(), FLA_Househ2_UT_r_opc(), FLA_ONE, and i.

Referenced by FLA_LQ_UT_opt_var2().

◆ FLA_LQ_UT_opd_var1()

FLA_Error FLA_LQ_UT_opd_var1 ( int  m_A,
int  n_A,
double *  A,
int  rs_A,
int  cs_A,
double *  t,
int  inc_t 
)
140 {
141  int min_m_n = min( m_A, n_A );
142  int i;
143 
144  for ( i = 0; i < min_m_n; ++i )
145  {
146  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
147  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
148  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
149  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
150 
151  double* tau1 = buff_t + (i )*inc_t;
152 
153  int m_ahead = m_A - i - 1;
154  int n_ahead = n_A - i - 1;
155 
156  /*------------------------------------------------------------*/
157 
158  // FLA_Househ2_UT( FLA_RIGHT, alpha11, a12t
159  // tau1 );
160  FLA_Househ2_UT_r_opd( n_ahead,
161  alpha11,
162  a12t, cs_A,
163  tau1 );
164 
165  // FLA_Apply_H2_UT( FLA_RIGHT, tau1, a12t, a21, A22 );
167  n_ahead,
168  tau1,
169  a12t, cs_A,
170  a21, rs_A,
171  A22, rs_A, cs_A );
172 
173  /*------------------------------------------------------------*/
174 
175  }
176 
177  return FLA_SUCCESS;
178 }
FLA_Error FLA_Apply_H2_UT_r_opd_var1(int n_u2h_A2, int m_a1, double *tau, double *u2h, int inc_u2h, double *a1, int inc_a1, double *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_r_opt_var1.c:181
FLA_Error FLA_Househ2_UT_r_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition: FLA_Househ2_UT.c:664

References FLA_Apply_H2_UT_r_opd_var1(), FLA_Househ2_UT_r_opd(), and i.

Referenced by FLA_LQ_UT_opt_var1().

◆ FLA_LQ_UT_opd_var2()

FLA_Error FLA_LQ_UT_opd_var2 ( int  m_A,
int  n_A,
double *  A,
int  rs_A,
int  cs_A,
double *  T,
int  rs_T,
int  cs_T 
)
164 {
165  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
166  int min_m_n = min( m_A, n_A );
167  int i;
168 
169  for ( i = 0; i < min_m_n; ++i )
170  {
171  double* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
172  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
173  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
174  double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
175  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
176  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
177 
178  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
179  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
180 
181  int m_ahead = m_A - i - 1;
182  int n_ahead = n_A - i - 1;
183  int m_behind = i;
184 
185  /*------------------------------------------------------------*/
186 
187  // FLA_Househ2_UT( FLA_RIGHT, alpha11, a12t
188  // tau11 );
189  FLA_Househ2_UT_r_opd( n_ahead,
190  alpha11,
191  a12t, cs_A,
192  tau11 );
193 
194  // FLA_Apply_H2_UT( FLA_RIGHT, tau11, a12t, a21, A22 );
196  n_ahead,
197  tau11,
198  a12t, cs_A,
199  a21, rs_A,
200  A22, rs_A, cs_A );
201 
202  // FLA_Copyt_external( FLA_CONJ_NO_TRANSPOSE, a01, t01 );
204  m_behind,
205  a01, rs_A,
206  t01, rs_T );
207 
208  // FLA_Gemvc_external( FLA_CONJ_NO_TRANSPOSE, FLA_NO_CONJUGATE,
209  // FLA_ONE, A02, a12t, FLA_ONE, t01 );
212  m_behind,
213  n_ahead,
214  buff_1,
215  A02, rs_A, cs_A,
216  a12t, cs_A,
217  buff_1,
218  t01, rs_T );
219 
220  /*------------------------------------------------------------*/
221 
222  }
223 
224  return FLA_SUCCESS;
225 }
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition: bl1_copyv.c:42
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_gemv.c:69

References bl1_dcopyv(), bl1_dgemv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_r_opd_var1(), FLA_Househ2_UT_r_opd(), FLA_ONE, and i.

Referenced by FLA_LQ_UT_opt_var2().

◆ FLA_LQ_UT_ops_var1()

FLA_Error FLA_LQ_UT_ops_var1 ( int  m_A,
int  n_A,
float *  A,
int  rs_A,
int  cs_A,
float *  t,
int  inc_t 
)
94 {
95  int min_m_n = min( m_A, n_A );
96  int i;
97 
98  for ( i = 0; i < min_m_n; ++i )
99  {
100  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
101  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
102  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
103  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
104 
105  float* tau1 = buff_t + (i )*inc_t;
106 
107  int m_ahead = m_A - i - 1;
108  int n_ahead = n_A - i - 1;
109 
110  /*------------------------------------------------------------*/
111 
112  // FLA_Househ2_UT( FLA_RIGHT, alpha11, a12t
113  // tau1 );
114  FLA_Househ2_UT_r_ops( n_ahead,
115  alpha11,
116  a12t, cs_A,
117  tau1 );
118 
119  // FLA_Apply_H2_UT( FLA_RIGHT, tau1, a12t, a21, A22 );
121  n_ahead,
122  tau1,
123  a12t, cs_A,
124  a21, rs_A,
125  A22, rs_A, cs_A );
126 
127  /*------------------------------------------------------------*/
128 
129  }
130 
131  return FLA_SUCCESS;
132 }
FLA_Error FLA_Apply_H2_UT_r_ops_var1(int n_u2h_A2, int m_a1, float *tau, float *u2h, int inc_u2h, float *a1, int inc_a1, float *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_r_opt_var1.c:108
FLA_Error FLA_Househ2_UT_r_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition: FLA_Househ2_UT.c:651

References FLA_Apply_H2_UT_r_ops_var1(), FLA_Househ2_UT_r_ops(), and i.

Referenced by FLA_LQ_UT_opt_var1().

◆ FLA_LQ_UT_ops_var2()

FLA_Error FLA_LQ_UT_ops_var2 ( int  m_A,
int  n_A,
float *  A,
int  rs_A,
int  cs_A,
float *  T,
int  rs_T,
int  cs_T 
)
95 {
96  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
97  int min_m_n = min( m_A, n_A );
98  int i;
99 
100  for ( i = 0; i < min_m_n; ++i )
101  {
102  float* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
103  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
104  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
105  float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
106  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
107  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
108 
109  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
110  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
111 
112  int m_ahead = m_A - i - 1;
113  int n_ahead = n_A - i - 1;
114  int m_behind = i;
115 
116  /*------------------------------------------------------------*/
117 
118  // FLA_Househ2_UT( FLA_RIGHT, alpha11, a12t
119  // tau11 );
120  FLA_Househ2_UT_r_ops( n_ahead,
121  alpha11,
122  a12t, cs_A,
123  tau11 );
124 
125  // FLA_Apply_H2_UT( FLA_RIGHT, tau11, a12t, a21, A22 );
127  n_ahead,
128  tau11,
129  a12t, cs_A,
130  a21, rs_A,
131  A22, rs_A, cs_A );
132 
133  // FLA_Copyt_external( FLA_CONJ_NO_TRANSPOSE, a01, t01 );
135  m_behind,
136  a01, rs_A,
137  t01, rs_T );
138 
139  // FLA_Gemvc_external( FLA_CONJ_NO_TRANSPOSE, FLA_NO_CONJUGATE,
140  // FLA_ONE, A02, a12t, FLA_ONE, t01 );
143  m_behind,
144  n_ahead,
145  buff_1,
146  A02, rs_A, cs_A,
147  a12t, cs_A,
148  buff_1,
149  t01, rs_T );
150 
151  /*------------------------------------------------------------*/
152 
153  }
154 
155  return FLA_SUCCESS;
156 }
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition: bl1_copyv.c:35
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_gemv.c:13

References bl1_scopyv(), bl1_sgemv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_r_ops_var1(), FLA_Househ2_UT_r_ops(), FLA_ONE, and i.

Referenced by FLA_LQ_UT_opt_var2().

◆ FLA_LQ_UT_opt_var1()

FLA_Error FLA_LQ_UT_opt_var1 ( FLA_Obj  A,
FLA_Obj  t 
)
14 {
15  FLA_Datatype datatype;
16  int m_A, n_A;
17  int rs_A, cs_A;
18  int inc_t;
19 
20  datatype = FLA_Obj_datatype( A );
21 
22  m_A = FLA_Obj_length( A );
23  n_A = FLA_Obj_width( A );
24  rs_A = FLA_Obj_row_stride( A );
25  cs_A = FLA_Obj_col_stride( A );
26 
27  inc_t = FLA_Obj_vector_inc( t );
28 
29 
30  switch ( datatype )
31  {
32  case FLA_FLOAT:
33  {
34  float* buff_A = FLA_FLOAT_PTR( A );
35  float* buff_t = FLA_FLOAT_PTR( t );
36 
37  FLA_LQ_UT_ops_var1( m_A,
38  n_A,
39  buff_A, rs_A, cs_A,
40  buff_t, inc_t );
41 
42  break;
43  }
44 
45  case FLA_DOUBLE:
46  {
47  double* buff_A = FLA_DOUBLE_PTR( A );
48  double* buff_t = FLA_DOUBLE_PTR( t );
49 
50  FLA_LQ_UT_opd_var1( m_A,
51  n_A,
52  buff_A, rs_A, cs_A,
53  buff_t, inc_t );
54 
55  break;
56  }
57 
58  case FLA_COMPLEX:
59  {
60  scomplex* buff_A = FLA_COMPLEX_PTR( A );
61  scomplex* buff_t = FLA_COMPLEX_PTR( t );
62 
63  FLA_LQ_UT_opc_var1( m_A,
64  n_A,
65  buff_A, rs_A, cs_A,
66  buff_t, inc_t );
67 
68  break;
69  }
70 
71  case FLA_DOUBLE_COMPLEX:
72  {
73  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
74  dcomplex* buff_t = FLA_DOUBLE_COMPLEX_PTR( t );
75 
76  FLA_LQ_UT_opz_var1( m_A,
77  n_A,
78  buff_A, rs_A, cs_A,
79  buff_t, inc_t );
80 
81  break;
82  }
83  }
84 
85  return FLA_SUCCESS;
86 }
FLA_Error FLA_LQ_UT_ops_var1(int m_A, int n_A, float *buff_A, int rs_A, int cs_A, float *buff_t, int inc_t)
Definition: FLA_LQ_UT_opt_var1.c:90
FLA_Error FLA_LQ_UT_opz_var1(int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_t, int inc_t)
Definition: FLA_LQ_UT_opt_var1.c:228
FLA_Error FLA_LQ_UT_opd_var1(int m_A, int n_A, double *buff_A, int rs_A, int cs_A, double *buff_t, int inc_t)
Definition: FLA_LQ_UT_opt_var1.c:136
FLA_Error FLA_LQ_UT_opc_var1(int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_t, int inc_t)
Definition: FLA_LQ_UT_opt_var1.c:182
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
dim_t FLA_Obj_vector_inc(FLA_Obj obj)
Definition: FLA_Query.c:145
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
int FLA_Datatype
Definition: FLA_type_defs.h:49
Definition: blis_type_defs.h:138

References FLA_LQ_UT_opc_var1(), FLA_LQ_UT_opd_var1(), FLA_LQ_UT_ops_var1(), FLA_LQ_UT_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_Obj_vector_inc(), and FLA_Obj_width().

Referenced by FLA_LQ_UT_internal().

◆ FLA_LQ_UT_opt_var2()

FLA_Error FLA_LQ_UT_opt_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  FLA_Datatype datatype;
16  int m_A, n_A;
17  int rs_A, cs_A;
18  int rs_T, cs_T;
19 
20  datatype = FLA_Obj_datatype( A );
21 
22  m_A = FLA_Obj_length( A );
23  n_A = FLA_Obj_width( A );
24  rs_A = FLA_Obj_row_stride( A );
25  cs_A = FLA_Obj_col_stride( A );
26 
27  rs_T = FLA_Obj_row_stride( T );
28  cs_T = FLA_Obj_col_stride( T );
29 
30 
31  switch ( datatype )
32  {
33  case FLA_FLOAT:
34  {
35  float* buff_A = FLA_FLOAT_PTR( A );
36  float* buff_T = FLA_FLOAT_PTR( T );
37 
38  FLA_LQ_UT_ops_var2( m_A,
39  n_A,
40  buff_A, rs_A, cs_A,
41  buff_T, rs_T, cs_T );
42 
43  break;
44  }
45 
46  case FLA_DOUBLE:
47  {
48  double* buff_A = FLA_DOUBLE_PTR( A );
49  double* buff_T = FLA_DOUBLE_PTR( T );
50 
51  FLA_LQ_UT_opd_var2( m_A,
52  n_A,
53  buff_A, rs_A, cs_A,
54  buff_T, rs_T, cs_T );
55 
56  break;
57  }
58 
59  case FLA_COMPLEX:
60  {
61  scomplex* buff_A = FLA_COMPLEX_PTR( A );
62  scomplex* buff_T = FLA_COMPLEX_PTR( T );
63 
64  FLA_LQ_UT_opc_var2( m_A,
65  n_A,
66  buff_A, rs_A, cs_A,
67  buff_T, rs_T, cs_T );
68 
69  break;
70  }
71 
72  case FLA_DOUBLE_COMPLEX:
73  {
74  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
75  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
76 
77  FLA_LQ_UT_opz_var2( m_A,
78  n_A,
79  buff_A, rs_A, cs_A,
80  buff_T, rs_T, cs_T );
81 
82  break;
83  }
84  }
85 
86  return FLA_SUCCESS;
87 }
FLA_Error FLA_LQ_UT_ops_var2(int m_A, int n_A, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
Definition: FLA_LQ_UT_opt_var2.c:91
FLA_Error FLA_LQ_UT_opc_var2(int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_LQ_UT_opt_var2.c:229
FLA_Error FLA_LQ_UT_opd_var2(int m_A, int n_A, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
Definition: FLA_LQ_UT_opt_var2.c:160
FLA_Error FLA_LQ_UT_opz_var2(int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_LQ_UT_opt_var2.c:298

References FLA_LQ_UT_opc_var2(), FLA_LQ_UT_opd_var2(), FLA_LQ_UT_ops_var2(), FLA_LQ_UT_opz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

Referenced by FLA_LQ_UT_internal().

◆ FLA_LQ_UT_opz_var1()

FLA_Error FLA_LQ_UT_opz_var1 ( int  m_A,
int  n_A,
dcomplex A,
int  rs_A,
int  cs_A,
dcomplex t,
int  inc_t 
)
232 {
233  int min_m_n = min( m_A, n_A );
234  int i;
235 
236  for ( i = 0; i < min_m_n; ++i )
237  {
238  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
239  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
240  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
241  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
242 
243  dcomplex* tau1 = buff_t + (i )*inc_t;
244 
245  int m_ahead = m_A - i - 1;
246  int n_ahead = n_A - i - 1;
247 
248  /*------------------------------------------------------------*/
249 
250  // FLA_Househ2_UT( FLA_RIGHT, alpha11, a12t
251  // tau1 );
252  FLA_Househ2_UT_r_opz( n_ahead,
253  alpha11,
254  a12t, cs_A,
255  tau1 );
256 
257  // FLA_Apply_H2_UT( FLA_RIGHT, tau1, a12t, a21, A22 );
259  n_ahead,
260  tau1,
261  a12t, cs_A,
262  a21, rs_A,
263  A22, rs_A, cs_A );
264 
265  /*------------------------------------------------------------*/
266 
267  }
268 
269  return FLA_SUCCESS;
270 }
FLA_Error FLA_Apply_H2_UT_r_opz_var1(int n_u2h_A2, int m_a1, dcomplex *tau, dcomplex *u2h, int inc_u2h, dcomplex *a1, int inc_a1, dcomplex *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_r_opt_var1.c:327
FLA_Error FLA_Househ2_UT_r_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition: FLA_Househ2_UT.c:693

References FLA_Apply_H2_UT_r_opz_var1(), FLA_Househ2_UT_r_opz(), and i.

Referenced by FLA_LQ_UT_opt_var1().

◆ FLA_LQ_UT_opz_var2()

FLA_Error FLA_LQ_UT_opz_var2 ( int  m_A,
int  n_A,
dcomplex A,
int  rs_A,
int  cs_A,
dcomplex T,
int  rs_T,
int  cs_T 
)
302 {
303  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
304  int min_m_n = min( m_A, n_A );
305  int i;
306 
307  for ( i = 0; i < min_m_n; ++i )
308  {
309  dcomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
310  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
311  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
312  dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
313  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
314  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
315 
316  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
317  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
318 
319  int m_ahead = m_A - i - 1;
320  int n_ahead = n_A - i - 1;
321  int m_behind = i;
322 
323  /*------------------------------------------------------------*/
324 
325  // FLA_Househ2_UT( FLA_RIGHT, alpha11, a12t
326  // tau11 );
327  FLA_Househ2_UT_r_opz( n_ahead,
328  alpha11,
329  a12t, cs_A,
330  tau11 );
331 
332  // FLA_Apply_H2_UT( FLA_RIGHT, tau11, a12t, a21, A22 );
334  n_ahead,
335  tau11,
336  a12t, cs_A,
337  a21, rs_A,
338  A22, rs_A, cs_A );
339 
340  // FLA_Copyt_external( FLA_CONJ_NO_TRANSPOSE, a01, t01 );
342  m_behind,
343  a01, rs_A,
344  t01, rs_T );
345 
346  // FLA_Gemvc_external( FLA_CONJ_NO_TRANSPOSE, FLA_NO_CONJUGATE,
347  // FLA_ONE, A02, a12t, FLA_ONE, t01 );
350  m_behind,
351  n_ahead,
352  buff_1,
353  A02, rs_A, cs_A,
354  a12t, cs_A,
355  buff_1,
356  t01, rs_T );
357 
358  /*------------------------------------------------------------*/
359 
360  }
361 
362  return FLA_SUCCESS;
363 }
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_copyv.c:63
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_gemv.c:255

References bl1_zcopyv(), bl1_zgemv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_r_opz_var1(), FLA_Househ2_UT_r_opz(), FLA_ONE, and i.

Referenced by FLA_LQ_UT_opt_var2().

◆ FLA_LQ_UT_unb_var1()

FLA_Error FLA_LQ_UT_unb_var1 ( FLA_Obj  A,
FLA_Obj  t 
)
14 {
15  FLA_Obj ATL, ATR, A00, a01, A02,
16  ABL, ABR, a10t, alpha11, a12t,
17  A20, a21, A22;
18 
19  FLA_Obj tLt, tRt, t0t, tau1, t2t;
20 
21  FLA_Part_2x2( A, &ATL, &ATR,
22  &ABL, &ABR, 0, 0, FLA_TL );
23 
24  FLA_Part_1x2( t, &tLt, &tRt, 0, FLA_LEFT );
25 
26  while ( FLA_Obj_min_dim( ABR ) > 0 ){
27 
28  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
29  /* ************* */ /* ************************** */
30  &a10t, /**/ &alpha11, &a12t,
31  ABL, /**/ ABR, &A20, /**/ &a21, &A22,
32  1, 1, FLA_BR );
33 
34  FLA_Repart_1x2_to_1x3( tLt, /**/ tRt, &t0t, /**/ &tau1, &t2t,
35  1, FLA_RIGHT );
36 
37  /*------------------------------------------------------------*/
38 
39  // Compute tau11 and u12t from alpha11 and a12t such that tau11 and u12t
40  // determine a Householder transform H such that applying H from the
41  // right to the row vector consisting of alpha11 and a12t annihilates
42  // the entries in a12t (and updates alpha11).
43  FLA_Househ2_UT( FLA_RIGHT, alpha11, a12t,
44  tau1 );
45 
46  // ( a21 A22 ) = ( a21 A22 ) H
47  //
48  // where H is formed from tau11 and u12t.
49  FLA_Apply_H2_UT( FLA_RIGHT, tau1, a12t, a21, A22 );
50 
51  /*------------------------------------------------------------*/
52 
53  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
54  a10t, alpha11, /**/ a12t,
55  /* ************** */ /* ************************ */
56  &ABL, /**/ &ABR, A20, a21, /**/ A22,
57  FLA_TL );
58 
59  FLA_Cont_with_1x3_to_1x2( &tLt, /**/ &tRt, t0t, tau1, /**/ t2t,
60  FLA_LEFT );
61  }
62 
63  return FLA_SUCCESS;
64 }
FLA_Error FLA_Apply_H2_UT(FLA_Side side, FLA_Obj tau, FLA_Obj u2, FLA_Obj a1, FLA_Obj A2)
Definition: FLA_Apply_H2_UT.c:13
FLA_Error FLA_Househ2_UT(FLA_Side side, FLA_Obj chi_1, FLA_Obj x2, FLA_Obj tau)
Definition: FLA_Househ2_UT.c:16

References FLA_Apply_H2_UT(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Househ2_UT(), FLA_Obj_min_dim(), FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), and FLA_Repart_2x2_to_3x3().

Referenced by FLA_LQ_UT_internal().

◆ FLA_LQ_UT_unb_var2()

FLA_Error FLA_LQ_UT_unb_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  FLA_Obj ATL, ATR, A00, a01, A02,
16  ABL, ABR, a10t, alpha11, a12t,
17  A20, a21, A22;
18 
19  FLA_Obj TTL, TTR, T00, t01, T02,
20  TBL, TBR, t10t, tau11, t12t,
21  T20, t21, T22;
22 
23 
24  FLA_Part_2x2( A, &ATL, &ATR,
25  &ABL, &ABR, 0, 0, FLA_TL );
26 
27  FLA_Part_2x2( T, &TTL, &TTR,
28  &TBL, &TBR, 0, 0, FLA_TL );
29 
30  while ( FLA_Obj_min_dim( ABR ) > 0 ){
31 
32  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
33  /* ************* */ /* ************************** */
34  &a10t, /**/ &alpha11, &a12t,
35  ABL, /**/ ABR, &A20, /**/ &a21, &A22,
36  1, 1, FLA_BR );
37 
38  FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02,
39  /* ************* */ /* ************************ */
40  &t10t, /**/ &tau11, &t12t,
41  TBL, /**/ TBR, &T20, /**/ &t21, &T22,
42  1, 1, FLA_BR );
43 
44  /*------------------------------------------------------------*/
45 
46  // Compute tau11 and u12t from alpha11 and a12t such that tau11 and u12t
47  // determine a Householder transform H such that applying H from the
48  // right to the row vector consisting of alpha11 and a12t annihilates
49  // the entries in a12t (and updates alpha11).
50  FLA_Househ2_UT( FLA_RIGHT, alpha11, a12t,
51  tau11 );
52 
53  // ( a21 A22 ) = ( a21 A22 ) H
54  //
55  // where H is formed from tau11 and u12t.
56  FLA_Apply_H2_UT( FLA_RIGHT, tau11, a12t, a21, A22 );
57 
58  // t01 = conj(a01) + conj(A02) * u12t^T;
59  FLA_Copyt_external( FLA_CONJ_NO_TRANSPOSE, a01, t01 );
60  FLA_Gemvc_external( FLA_CONJ_NO_TRANSPOSE, FLA_NO_CONJUGATE, FLA_ONE, A02, a12t, FLA_ONE, t01 );
61 
62  /*------------------------------------------------------------*/
63 
64  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
65  a10t, alpha11, /**/ a12t,
66  /* ************** */ /* ************************ */
67  &ABL, /**/ &ABR, A20, a21, /**/ A22,
68  FLA_TL );
69 
70  FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02,
71  t10t, tau11, /**/ t12t,
72  /* ************** */ /* ********************** */
73  &TBL, /**/ &TBR, T20, t21, /**/ T22,
74  FLA_TL );
75 
76  }
77 
78  return FLA_SUCCESS;
79 }
FLA_Error FLA_Gemvc_external(FLA_Trans transa, FLA_Conj conjx, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y)
Definition: FLA_Gemvc_external.c:13

References FLA_Apply_H2_UT(), FLA_Cont_with_3x3_to_2x2(), FLA_Copyt_external(), FLA_Gemvc_external(), FLA_Househ2_UT(), FLA_Obj_min_dim(), FLA_ONE, FLA_Part_2x2(), and FLA_Repart_2x2_to_3x3().

Referenced by FLA_LQ_UT_internal().