libflame  revision_anchor
Functions
FLA_QR2_UT_vars.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLA_QR2_UT_blk_var1 (FLA_Obj U, FLA_Obj D, FLA_Obj T, fla_qr2ut_t *cntl)
 
FLA_Error FLA_QR2_UT_blk_var2 (FLA_Obj U, FLA_Obj D, FLA_Obj T, fla_qr2ut_t *cntl)
 
FLA_Error FLA_QR2_UT_unb_var1 (FLA_Obj U, FLA_Obj D, FLA_Obj T)
 
FLA_Error FLA_QR2_UT_opt_var1 (FLA_Obj U, FLA_Obj D, FLA_Obj T)
 
FLA_Error FLA_QR2_UT_ops_var1 (int m_UT, int m_D, float *U, int rs_U, int cs_U, float *D, int rs_D, int cs_D, float *T, int rs_T, int cs_T)
 
FLA_Error FLA_QR2_UT_opd_var1 (int m_UT, int m_D, double *U, int rs_U, int cs_U, double *D, int rs_D, int cs_D, double *T, int rs_T, int cs_T)
 
FLA_Error FLA_QR2_UT_opc_var1 (int m_UT, int m_D, scomplex *U, int rs_U, int cs_U, scomplex *D, int rs_D, int cs_D, scomplex *T, int rs_T, int cs_T)
 
FLA_Error FLA_QR2_UT_opz_var1 (int m_UT, int m_D, dcomplex *U, int rs_U, int cs_U, dcomplex *D, int rs_D, int cs_D, dcomplex *T, int rs_T, int cs_T)
 

Function Documentation

◆ FLA_QR2_UT_blk_var1()

FLA_Error FLA_QR2_UT_blk_var1 ( FLA_Obj  U,
FLA_Obj  D,
FLA_Obj  T,
fla_qr2ut_t cntl 
)
15 {
16  FLA_Obj UTL, UTR, U00, U01, U02,
17  UBL, UBR, U10, U11, U12,
18  U20, U21, U22;
19 
20  FLA_Obj DL, DR, D0, D1, D2;
21 
22  FLA_Obj TL, TR, T0, T1, W12;
23 
24  FLA_Obj W12T, W12B;
25 
26  FLA_Obj T1T, T2B;
27 
28  dim_t b_alg, b;
29 
30  // Query the algorithmic blocksize by inspecting the length of T.
31  b_alg = FLA_Obj_length( T );
32 
33  FLA_Part_2x2( U, &UTL, &UTR,
34  &UBL, &UBR, 0, 0, FLA_TL );
35 
36  FLA_Part_1x2( D, &DL, &DR, 0, FLA_LEFT );
37 
38  FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
39 
40  while ( FLA_Obj_min_dim( UBR ) > 0 ){
41 
42  b = min( b_alg, FLA_Obj_min_dim( UBR ) );
43 
44  FLA_Repart_2x2_to_3x3( UTL, /**/ UTR, &U00, /**/ &U01, &U02,
45  /* ************* */ /* ******************** */
46  &U10, /**/ &U11, &U12,
47  UBL, /**/ UBR, &U20, /**/ &U21, &U22,
48  b, b, FLA_BR );
49 
50  FLA_Repart_1x2_to_1x3( DL, /**/ DR, &D0, /**/ &D1, &D2,
51  b, FLA_RIGHT );
52 
53  FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &W12,
54  b, FLA_RIGHT );
55 
56  /*------------------------------------------------------------*/
57 
58  // T1T = FLA_Top_part( T1, b );
59 
60  FLA_Part_2x1( T1, &T1T,
61  &T2B, b, FLA_TOP );
62 
63  // [ U11, ...
64  // D1, T1 ] = FLA_QR2_UT( U11
65  // D1, T1T );
66 
68  D1, T1T,
69  FLA_Cntl_sub_qr2ut( cntl ) );
70 
71 
72  if ( FLA_Obj_width( U12 ) > 0 )
73  {
74  // W12T = FLA_Top_part( W12, b );
75 
76  FLA_Part_2x1( W12, &W12T,
77  &W12B, b, FLA_TOP );
78 
79  // W12T = inv( triu( T1T ) )' * ( U12 + D1' * D2 );
80 
81  FLA_Copy_internal( U12, W12T,
82  FLA_Cntl_sub_copy( cntl ) );
83 
84  FLA_Gemm_internal( FLA_CONJ_TRANSPOSE, FLA_NO_TRANSPOSE,
85  FLA_ONE, D1, D2, FLA_ONE, W12T,
86  FLA_Cntl_sub_gemm1( cntl ) );
87 
88  FLA_Trsm_internal( FLA_LEFT, FLA_UPPER_TRIANGULAR,
89  FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
90  FLA_ONE, T1T, W12T,
91  FLA_Cntl_sub_trsm( cntl ) );
92 
93  // U12 = U12 - W12T;
94  // D2 = D2 - D1 * W12T;
95 
96  FLA_Axpy_internal( FLA_MINUS_ONE, W12T, U12,
97  FLA_Cntl_sub_axpy( cntl ) );
98 
99  FLA_Gemm_internal( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
100  FLA_MINUS_ONE, D1, W12T, FLA_ONE, D2,
101  FLA_Cntl_sub_gemm2( cntl ) );
102  }
103 
104  /*------------------------------------------------------------*/
105 
106  FLA_Cont_with_3x3_to_2x2( &UTL, /**/ &UTR, U00, U01, /**/ U02,
107  U10, U11, /**/ U12,
108  /* ************** */ /* ****************** */
109  &UBL, /**/ &UBR, U20, U21, /**/ U22,
110  FLA_TL );
111 
112  FLA_Cont_with_1x3_to_1x2( &DL, /**/ &DR, D0, D1, /**/ D2,
113  FLA_LEFT );
114 
115  FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ W12,
116  FLA_LEFT );
117 
118  }
119 
120  return FLA_SUCCESS;
121 }
FLA_Error FLA_Axpy_internal(FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_axpy_t *cntl)
Definition: FLA_Axpy_internal.c:16
FLA_Error FLA_Copy_internal(FLA_Obj A, FLA_Obj B, fla_copy_t *cntl)
Definition: FLA_Copy_internal.c:16
FLA_Error FLA_Gemm_internal(FLA_Trans transa, FLA_Trans transb, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C, fla_gemm_t *cntl)
Definition: FLA_Gemm_internal.c:16
FLA_Error FLA_QR2_UT_internal(FLA_Obj U, FLA_Obj D, FLA_Obj T, fla_qr2ut_t *cntl)
Definition: FLA_QR2_UT_internal.c:16
FLA_Error FLA_Trsm_internal(FLA_Side side, FLA_Uplo uplo, FLA_Trans transa, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_trsm_t *cntl)
Definition: FLA_Trsm_internal.c:16
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition: FLA_View.c:304
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:17
FLA_Error FLA_Cont_with_1x3_to_1x2(FLA_Obj *AL, FLA_Obj *AR, FLA_Obj A0, FLA_Obj A1, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:475
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:110
FLA_Error FLA_Part_2x1(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:76
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:142
FLA_Error FLA_Repart_1x2_to_1x3(FLA_Obj AL, FLA_Obj AR, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:267
dim_t FLA_Obj_min_dim(FLA_Obj obj)
Definition: FLA_Query.c:153
unsigned long dim_t
Definition: FLA_type_defs.h:71
Definition: FLA_type_defs.h:159

References FLA_Axpy_internal(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy_internal(), FLA_Gemm_internal(), FLA_MINUS_ONE, FLA_Obj_length(), FLA_Obj_min_dim(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_QR2_UT_internal(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x2_to_3x3(), and FLA_Trsm_internal().

Referenced by FLA_QR2_UT_internal().

◆ FLA_QR2_UT_blk_var2()

FLA_Error FLA_QR2_UT_blk_var2 ( FLA_Obj  U,
FLA_Obj  D,
FLA_Obj  T,
fla_qr2ut_t cntl 
)
15 {
16  FLA_Obj DT, D0,
17  DB, D1,
18  D2;
19 
20  FLA_Obj TT, T0,
21  TB, T1,
22  T2;
23 
24  dim_t b;
25 
26  FLA_Part_2x1( D, &DT,
27  &DB, 0, FLA_TOP );
28 
29  FLA_Part_2x1( T, &TT,
30  &TB, 0, FLA_TOP );
31 
32  while ( FLA_Obj_length( DT ) < FLA_Obj_length( D ) ){
33 
34  b = FLA_Determine_blocksize( DB, FLA_BOTTOM, FLA_Cntl_blocksize( cntl ) );
35 
36  FLA_Repart_2x1_to_3x1( DT, &D0,
37  /* ** */ /* ****** */
38  &D1,
39  DB, &D2, b, FLA_BOTTOM );
40 
41  FLA_Repart_2x1_to_3x1( TT, &T0,
42  /* ** */ /* ****** */
43  &T1,
44  TB, &T2, b, FLA_BOTTOM );
45 
46  /*------------------------------------------------------------*/
47 
48  // [ U, ...
49  // D1, T ] = FLA_QR2_UT( U
50  // D1, T1 );
51 
53  D1, T1,
54  FLA_Cntl_sub_qr2ut( cntl ) );
55 
56  /*------------------------------------------------------------*/
57 
58  FLA_Cont_with_3x1_to_2x1( &DT, D0,
59  D1,
60  /* ** */ /* ****** */
61  &DB, D2, FLA_TOP );
62 
63  FLA_Cont_with_3x1_to_2x1( &TT, T0,
64  T1,
65  /* ** */ /* ****** */
66  &TB, T2, FLA_TOP );
67  }
68 
69  return FLA_SUCCESS;
70 }
FLA_Error FLA_Cont_with_3x1_to_2x1(FLA_Obj *AT, FLA_Obj A0, FLA_Obj A1, FLA_Obj *AB, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:428
FLA_Error FLA_Repart_2x1_to_3x1(FLA_Obj AT, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj AB, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:226
dim_t FLA_Determine_blocksize(FLA_Obj A_unproc, FLA_Quadrant to_dir, fla_blocksize_t *cntl_blocksizes)
Definition: FLA_Blocksize.c:234

References FLA_Cont_with_3x1_to_2x1(), FLA_Determine_blocksize(), FLA_Obj_length(), FLA_Part_2x1(), FLA_QR2_UT_internal(), and FLA_Repart_2x1_to_3x1().

Referenced by FLA_QR2_UT_internal().

◆ FLA_QR2_UT_opc_var1()

FLA_Error FLA_QR2_UT_opc_var1 ( int  m_UT,
int  m_D,
scomplex U,
int  rs_U,
int  cs_U,
scomplex D,
int  rs_D,
int  cs_D,
scomplex T,
int  rs_T,
int  cs_T 
)
236 {
237  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
238  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
239  int i;
240 
241  for ( i = 0; i < m_UT; ++i )
242  {
243  scomplex* upsilon11 = buff_U + (i )*cs_U + (i )*rs_U;
244  scomplex* u12t = buff_U + (i+1)*cs_U + (i )*rs_U;
245 
246  scomplex* D0 = buff_D + (0 )*cs_D + (0 )*rs_D;
247  scomplex* d1 = buff_D + (i )*cs_D + (0 )*rs_D;
248  scomplex* D2 = buff_D + (i+1)*cs_D + (0 )*rs_D;
249 
250  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
251  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
252 
253  int mn_ahead = m_UT - i - 1;
254  int mn_behind = i;
255 
256  //------------------------------------------------------------//
257 
258  // FLA_Househ2_UT( FLA_LEFT,
259  // upsilon11,
260  // d1, tau11 );
262  upsilon11,
263  d1, rs_D,
264  tau11 );
265 
266  // FLA_Apply_H2_UT( FLA_LEFT, tau11, d1, u12t,
267  // D2 );
269  mn_ahead,
270  tau11,
271  d1, rs_D,
272  u12t, cs_U,
273  D2, rs_D, cs_D );
274 
275  // FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, D0, d1, FLA_ZERO, t01 );
278  m_D,
279  mn_behind,
280  buff_1,
281  D0, rs_D, cs_D,
282  d1, rs_D,
283  buff_0,
284  t01, rs_T );
285 
286  //------------------------------------------------------------//
287 
288  }
289 
290  return FLA_SUCCESS;
291 }
FLA_Error FLA_Apply_H2_UT_l_opc_var1(int m_u2_A2, int n_a1t, scomplex *tau, scomplex *u2, int inc_u2, scomplex *a1t, int inc_a1t, scomplex *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_l_opt_var1.c:269
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition: FLA_Househ2_UT.c:390
int i
Definition: bl1_axmyv2.c:145
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_gemv.c:125
@ BLIS1_CONJ_TRANSPOSE
Definition: blis_type_defs.h:57
@ BLIS1_NO_CONJUGATE
Definition: blis_type_defs.h:81
Definition: blis_type_defs.h:133

References bl1_cgemv(), BLIS1_CONJ_TRANSPOSE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_QR2_UT_opt_var1().

◆ FLA_QR2_UT_opd_var1()

FLA_Error FLA_QR2_UT_opd_var1 ( int  m_UT,
int  m_D,
double *  U,
int  rs_U,
int  cs_U,
double *  D,
int  rs_D,
int  cs_D,
double *  T,
int  rs_T,
int  cs_T 
)
172 {
173  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
174  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
175  int i;
176 
177  for ( i = 0; i < m_UT; ++i )
178  {
179  double* upsilon11 = buff_U + (i )*cs_U + (i )*rs_U;
180  double* u12t = buff_U + (i+1)*cs_U + (i )*rs_U;
181 
182  double* D0 = buff_D + (0 )*cs_D + (0 )*rs_D;
183  double* d1 = buff_D + (i )*cs_D + (0 )*rs_D;
184  double* D2 = buff_D + (i+1)*cs_D + (0 )*rs_D;
185 
186  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
187  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
188 
189  int mn_ahead = m_UT - i - 1;
190  int mn_behind = i;
191 
192  //------------------------------------------------------------//
193 
194  // FLA_Househ2_UT( FLA_LEFT,
195  // upsilon11,
196  // d1, tau11 );
198  upsilon11,
199  d1, rs_D,
200  tau11 );
201 
202  // FLA_Apply_H2_UT( FLA_LEFT, tau11, d1, u12t,
203  // D2 );
205  mn_ahead,
206  tau11,
207  d1, rs_D,
208  u12t, cs_U,
209  D2, rs_D, cs_D );
210 
211  // FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, D0, d1, FLA_ZERO, t01 );
214  m_D,
215  mn_behind,
216  buff_1,
217  D0, rs_D, cs_D,
218  d1, rs_D,
219  buff_0,
220  t01, rs_T );
221 
222  //------------------------------------------------------------//
223 
224  }
225 
226  return FLA_SUCCESS;
227 }
FLA_Error FLA_Apply_H2_UT_l_opd_var1(int m_u2_A2, int n_a1t, double *tau, double *u2, int inc_u2, double *a1t, int inc_a1t, double *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_l_opt_var1.c:195
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition: FLA_Househ2_UT.c:274
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_gemv.c:69

References bl1_dgemv(), BLIS1_CONJ_TRANSPOSE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_QR2_UT_opt_var1().

◆ FLA_QR2_UT_ops_var1()

FLA_Error FLA_QR2_UT_ops_var1 ( int  m_UT,
int  m_D,
float *  U,
int  rs_U,
int  cs_U,
float *  D,
int  rs_D,
int  cs_D,
float *  T,
int  rs_T,
int  cs_T 
)
108 {
109  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
110  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
111  int i;
112 
113  for ( i = 0; i < m_UT; ++i )
114  {
115  float* upsilon11 = buff_U + (i )*cs_U + (i )*rs_U;
116  float* u12t = buff_U + (i+1)*cs_U + (i )*rs_U;
117 
118  float* D0 = buff_D + (0 )*cs_D + (0 )*rs_D;
119  float* d1 = buff_D + (i )*cs_D + (0 )*rs_D;
120  float* D2 = buff_D + (i+1)*cs_D + (0 )*rs_D;
121 
122  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
123  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
124 
125  int mn_ahead = m_UT - i - 1;
126  int mn_behind = i;
127 
128  //------------------------------------------------------------//
129 
130  // FLA_Househ2_UT( FLA_LEFT,
131  // upsilon11,
132  // d1, tau11 );
134  upsilon11,
135  d1, rs_D,
136  tau11 );
137 
138  // FLA_Apply_H2_UT( FLA_LEFT, tau11, d1, u12t,
139  // D2 );
141  mn_ahead,
142  tau11,
143  d1, rs_D,
144  u12t, cs_U,
145  D2, rs_D, cs_D );
146 
147  // FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, D0, d1, FLA_ZERO, t01 );
150  m_D,
151  mn_behind,
152  buff_1,
153  D0, rs_D, cs_D,
154  d1, rs_D,
155  buff_0,
156  t01, rs_T );
157 
158  //------------------------------------------------------------//
159 
160  }
161 
162  return FLA_SUCCESS;
163 }
FLA_Error FLA_Apply_H2_UT_l_ops_var1(int m_u2_A2, int n_a1t, float *tau, float *u2, int inc_u2, float *a1t, int inc_a1t, float *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_l_opt_var1.c:121
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition: FLA_Househ2_UT.c:160
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_gemv.c:13

References bl1_sgemv(), BLIS1_CONJ_TRANSPOSE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_QR2_UT_opt_var1().

◆ FLA_QR2_UT_opt_var1()

FLA_Error FLA_QR2_UT_opt_var1 ( FLA_Obj  U,
FLA_Obj  D,
FLA_Obj  T 
)
15 {
16  FLA_Datatype datatype;
17  int m_UT, m_D;
18  int rs_U, cs_U;
19  int rs_D, cs_D;
20  int rs_T, cs_T;
21 
22  datatype = FLA_Obj_datatype( U );
23 
24  m_UT = FLA_Obj_length( U );
25  m_D = FLA_Obj_length( D );
26 
27  rs_U = FLA_Obj_row_stride( U );
28  cs_U = FLA_Obj_col_stride( U );
29  rs_D = FLA_Obj_row_stride( D );
30  cs_D = FLA_Obj_col_stride( D );
31  rs_T = FLA_Obj_row_stride( T );
32  cs_T = FLA_Obj_col_stride( T );
33 
34 
35  switch ( datatype )
36  {
37  case FLA_FLOAT:
38  {
39  float* buff_U = FLA_FLOAT_PTR( U );
40  float* buff_D = FLA_FLOAT_PTR( D );
41  float* buff_T = FLA_FLOAT_PTR( T );
42 
43  FLA_QR2_UT_ops_var1( m_UT,
44  m_D,
45  buff_U, rs_U, cs_U,
46  buff_D, rs_D, cs_D,
47  buff_T, rs_T, cs_T );
48 
49  break;
50  }
51 
52  case FLA_DOUBLE:
53  {
54  double* buff_U = FLA_DOUBLE_PTR( U );
55  double* buff_D = FLA_DOUBLE_PTR( D );
56  double* buff_T = FLA_DOUBLE_PTR( T );
57 
58  FLA_QR2_UT_opd_var1( m_UT,
59  m_D,
60  buff_U, rs_U, cs_U,
61  buff_D, rs_D, cs_D,
62  buff_T, rs_T, cs_T );
63 
64  break;
65  }
66 
67  case FLA_COMPLEX:
68  {
69  scomplex* buff_U = FLA_COMPLEX_PTR( U );
70  scomplex* buff_D = FLA_COMPLEX_PTR( D );
71  scomplex* buff_T = FLA_COMPLEX_PTR( T );
72 
73  FLA_QR2_UT_opc_var1( m_UT,
74  m_D,
75  buff_U, rs_U, cs_U,
76  buff_D, rs_D, cs_D,
77  buff_T, rs_T, cs_T );
78 
79  break;
80  }
81 
82  case FLA_DOUBLE_COMPLEX:
83  {
84  dcomplex* buff_U = FLA_DOUBLE_COMPLEX_PTR( U );
85  dcomplex* buff_D = FLA_DOUBLE_COMPLEX_PTR( D );
86  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
87 
88  FLA_QR2_UT_opz_var1( m_UT,
89  m_D,
90  buff_U, rs_U, cs_U,
91  buff_D, rs_D, cs_D,
92  buff_T, rs_T, cs_T );
93 
94  break;
95  }
96  }
97 
98  return FLA_SUCCESS;
99 }
FLA_Error FLA_QR2_UT_opz_var1(int m_UT, int m_D, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_D, int rs_D, int cs_D, dcomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_QR2_UT_opt_var1.c:295
FLA_Error FLA_QR2_UT_opd_var1(int m_UT, int m_D, double *buff_U, int rs_U, int cs_U, double *buff_D, int rs_D, int cs_D, double *buff_T, int rs_T, int cs_T)
Definition: FLA_QR2_UT_opt_var1.c:167
FLA_Error FLA_QR2_UT_opc_var1(int m_UT, int m_D, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_D, int rs_D, int cs_D, scomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_QR2_UT_opt_var1.c:231
FLA_Error FLA_QR2_UT_ops_var1(int m_UT, int m_D, float *buff_U, int rs_U, int cs_U, float *buff_D, int rs_D, int cs_D, float *buff_T, int rs_T, int cs_T)
Definition: FLA_QR2_UT_opt_var1.c:103
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
int FLA_Datatype
Definition: FLA_type_defs.h:49
Definition: blis_type_defs.h:138

References FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), FLA_QR2_UT_opc_var1(), FLA_QR2_UT_opd_var1(), FLA_QR2_UT_ops_var1(), and FLA_QR2_UT_opz_var1().

Referenced by FLA_QR2_UT_internal().

◆ FLA_QR2_UT_opz_var1()

FLA_Error FLA_QR2_UT_opz_var1 ( int  m_UT,
int  m_D,
dcomplex U,
int  rs_U,
int  cs_U,
dcomplex D,
int  rs_D,
int  cs_D,
dcomplex T,
int  rs_T,
int  cs_T 
)
300 {
301  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
302  dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
303  int i;
304 
305  for ( i = 0; i < m_UT; ++i )
306  {
307  dcomplex* upsilon11 = buff_U + (i )*cs_U + (i )*rs_U;
308  dcomplex* u12t = buff_U + (i+1)*cs_U + (i )*rs_U;
309 
310  dcomplex* D0 = buff_D + (0 )*cs_D + (0 )*rs_D;
311  dcomplex* d1 = buff_D + (i )*cs_D + (0 )*rs_D;
312  dcomplex* D2 = buff_D + (i+1)*cs_D + (0 )*rs_D;
313 
314  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
315  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
316 
317  int mn_ahead = m_UT - i - 1;
318  int mn_behind = i;
319 
320  //------------------------------------------------------------//
321 
322  // FLA_Househ2_UT( FLA_LEFT,
323  // upsilon11,
324  // d1, tau11 );
326  upsilon11,
327  d1, rs_D,
328  tau11 );
329 
330  // FLA_Apply_H2_UT( FLA_LEFT, tau11, d1, u12t,
331  // D2 );
333  mn_ahead,
334  tau11,
335  d1, rs_D,
336  u12t, cs_U,
337  D2, rs_D, cs_D );
338 
339  // FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, D0, d1, FLA_ZERO, t01 );
342  m_D,
343  mn_behind,
344  buff_1,
345  D0, rs_D, cs_D,
346  d1, rs_D,
347  buff_0,
348  t01, rs_T );
349 
350  //------------------------------------------------------------//
351 
352  }
353 
354  return FLA_SUCCESS;
355 }
FLA_Error FLA_Apply_H2_UT_l_opz_var1(int m_u2_A2, int n_a1t, dcomplex *tau, dcomplex *u2, int inc_u2, dcomplex *a1t, int inc_a1t, dcomplex *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_l_opt_var1.c:343
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition: FLA_Househ2_UT.c:521
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_gemv.c:255

References bl1_zgemv(), BLIS1_CONJ_TRANSPOSE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_QR2_UT_opt_var1().

◆ FLA_QR2_UT_unb_var1()

FLA_Error FLA_QR2_UT_unb_var1 ( FLA_Obj  U,
FLA_Obj  D,
FLA_Obj  T 
)
15 {
16  FLA_Obj UTL, UTR, U00, u01, U02,
17  UBL, UBR, u10t, upsilon11, u12t,
18  U20, u21, U22;
19 
20  FLA_Obj DL, DR, D0, d1, D2;
21 
22  FLA_Obj TTL, TTR, T00, t01, T02,
23  TBL, TBR, t10t, tau11, t12t,
24  T20, t21, T22;
25 
26 
27  FLA_Part_2x2( U, &UTL, &UTR,
28  &UBL, &UBR, 0, 0, FLA_TL );
29 
30  FLA_Part_1x2( D, &DL, &DR, 0, FLA_LEFT );
31 
32  FLA_Part_2x2( T, &TTL, &TTR,
33  &TBL, &TBR, 0, 0, FLA_TL );
34 
35  while ( FLA_Obj_min_dim( UBR ) > 0 ){
36 
37  FLA_Repart_2x2_to_3x3( UTL, /**/ UTR, &U00, /**/ &u01, &U02,
38  /* ************* */ /* ************************** */
39  &u10t, /**/ &upsilon11, &u12t,
40  UBL, /**/ UBR, &U20, /**/ &u21, &U22,
41  1, 1, FLA_BR );
42 
43  FLA_Repart_1x2_to_1x3( DL, /**/ DR, &D0, /**/ &d1, &D2,
44  1, FLA_RIGHT );
45 
46  FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02,
47  /* ************* */ /* ************************ */
48  &t10t, /**/ &tau11, &t12t,
49  TBL, /**/ TBR, &T20, /**/ &t21, &T22,
50  1, 1, FLA_BR );
51 
52  /*------------------------------------------------------------*/
53 
54  // Compute tau11 and u2 from upsilon11 and d1 such that tau11 and u2
55  // determine a Householder transform H such that applying H from the
56  // left to the column vector consisting of upsilon11 and d1 annihilates
57  // the entries in d1 (and updates upsilon11).
58  FLA_Househ2_UT( FLA_LEFT,
59  upsilon11,
60  d1, tau11 );
61 
62  // / u12t \ = H / u12t \
63  // \ D2 / \ D2 /
64  //
65  // where H is formed from tau11 and d1.
66  FLA_Apply_H2_UT( FLA_LEFT, tau11, d1, u12t,
67  D2 );
68 
69  // t01 = D0' * d1;
70  FLA_Gemv_external( FLA_CONJ_TRANSPOSE, FLA_ONE, D0, d1, FLA_ZERO, t01 );
71 
72  /*------------------------------------------------------------*/
73 
74  FLA_Cont_with_3x3_to_2x2( &UTL, /**/ &UTR, U00, u01, /**/ U02,
75  u10t, upsilon11, /**/ u12t,
76  /* ************** */ /* ************************ */
77  &UBL, /**/ &UBR, U20, u21, /**/ U22,
78  FLA_TL );
79 
80  FLA_Cont_with_1x3_to_1x2( &DL, /**/ &DR, D0, d1, /**/ D2,
81  FLA_LEFT );
82 
83  FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02,
84  t10t, tau11, /**/ t12t,
85  /* ************** */ /* ********************** */
86  &TBL, /**/ &TBR, T20, t21, /**/ T22,
87  FLA_TL );
88  }
89 
90  return FLA_SUCCESS;
91 }
FLA_Error FLA_Gemv_external(FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y)
Definition: FLA_Gemv_external.c:13
FLA_Error FLA_Apply_H2_UT(FLA_Side side, FLA_Obj tau, FLA_Obj u2, FLA_Obj a1, FLA_Obj A2)
Definition: FLA_Apply_H2_UT.c:13
FLA_Error FLA_Househ2_UT(FLA_Side side, FLA_Obj chi_1, FLA_Obj x2, FLA_Obj tau)
Definition: FLA_Househ2_UT.c:16

References FLA_Apply_H2_UT(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Gemv_external(), FLA_Househ2_UT(), FLA_Obj_min_dim(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x2_to_3x3(), and FLA_ZERO.

Referenced by FLA_QR2_UT_internal().