libflame  revision_anchor
Functions
FLA_QR_UT_inc.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLASH_QR_UT_inc (FLA_Obj A, FLA_Obj TW)
 
FLA_Error FLASH_QR_UT_inc_noopt (FLA_Obj A, FLA_Obj TW)
 
FLA_Error FLASH_QR_UT_inc_opt1 (FLA_Obj A, FLA_Obj TW)
 
FLA_Error FLA_QR_UT_inc_blk_var1 (FLA_Obj A, FLA_Obj TW, fla_qrutinc_t *cntl)
 
FLA_Error FLA_QR_UT_inc_blk_var2 (FLA_Obj A, FLA_Obj TW, FLA_Obj U, fla_qrutinc_t *cntl)
 
FLA_Error FLASH_QR_UT_inc_create_hier_matrices (FLA_Obj A_flat, dim_t depth, dim_t *b_flash, dim_t b_alg, FLA_Obj *A, FLA_Obj *TW)
 
dim_t FLASH_QR_UT_inc_determine_alg_blocksize (FLA_Obj A)
 
FLA_Error FLASH_QR_UT_inc_solve (FLA_Obj A, FLA_Obj TW, FLA_Obj B, FLA_Obj X)
 

Function Documentation

◆ FLA_QR_UT_inc_blk_var1()

FLA_Error FLA_QR_UT_inc_blk_var1 ( FLA_Obj  A,
FLA_Obj  TW,
fla_qrutinc_t cntl 
)
14 {
15  FLA_Obj ATL, ATR, A00, A01, A02,
16  ABL, ABR, A10, A11, A12,
17  A20, A21, A22;
18 
19  FLA_Obj TTL, WTR, T00, W01, W02,
20  TBL, TBR, T10, T11, W12,
21  T20, T21, T22;
22 
23  dim_t b;
24 
25  FLA_Part_2x2( A, &ATL, &ATR,
26  &ABL, &ABR, 0, 0, FLA_TL );
27 
28  FLA_Part_2x2( TW, &TTL, &WTR,
29  &TBL, &TBR, 0, 0, FLA_TL );
30 
31  while ( FLA_Obj_min_dim( ABR ) > 0 ){
32 
33  b = FLA_Determine_blocksize( ABR, FLA_BR, FLA_Cntl_blocksize( cntl ) );
34 
35  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
36  /* ************* */ /* ******************** */
37  &A10, /**/ &A11, &A12,
38  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
39  b, b, FLA_BR );
40 
41  FLA_Repart_2x2_to_3x3( TTL, /**/ WTR, &T00, /**/ &W01, &W02,
42  /* ************* */ /* ******************** */
43  &T10, /**/ &T11, &W12,
44  TBL, /**/ TBR, &T20, /**/ &T21, &T22,
45  b, b, FLA_BR );
46 
47  /*------------------------------------------------------------*/
48 
49  /*
50  Perform a QR factorization (via UT transform) on A11:
51 
52  [ A11, T11 ] = QR_UT( A11, T11 );
53 
54  where T11 refers to a single storage block that refers to an
55  b_alg-by-b row-panel of upper triangular block Householder
56  transforms. Here, b is the storage blocksize while b_alg is
57  the algorithmic blocksize used by the QR factorization.
58  Typically b_alg << b.
59 
60  */
61 
62  FLA_QR_UT_internal( A11, T11,
63  FLA_Cntl_sub_qrut( cntl ) );
64 
65 
66  if ( FLA_Obj_width( A12 ) > 0 )
67  {
68  /*
69  Apply Q^H to A12 from the left:
70 
71  A12 = Q^H * A12
72 
73  where Q is formed from A11 and T11. Note that W12 refers
74  to a row-panel of blocks where each block refers to an
75  b_alg-by-b row-panel of workspace.
76  */
77 
78  FLA_Apply_Q_UT_internal( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE,
79  A11, T11, W12, A12,
80  FLA_Cntl_sub_apqut( cntl ) );
81  }
82 
83 
84  /*
85  Update QR factorization of A11 with each block of A21, storing
86  block Householder transforms into corresponding blocks of T21.
87 
88  [ A11, ...
89  A21, T21 ] = QR2_UT( A11, ...
90  A21, T21 );
91  */
92 
94  A21, T21,
95  FLA_Cntl_sub_qr2ut( cntl ) );
96 
97 
98  if ( FLA_Obj_width( A12 ) > 0 )
99  {
100  /*
101  Apply Q^H to A12 and A22 from the left:
102 
103  / A12 \ = Q^H * / A12 \
104  \ A22 / \ A22 /
105 
106  where Q is formed from A21 and T21.
107  */
108 
109  FLA_Apply_Q2_UT_internal( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE,
110  A21, T21, W12, A12,
111  A22,
112  FLA_Cntl_sub_apq2ut( cntl ) );
113  }
114 
115  /*------------------------------------------------------------*/
116 
117  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
118  A10, A11, /**/ A12,
119  /* ************** */ /* ****************** */
120  &ABL, /**/ &ABR, A20, A21, /**/ A22,
121  FLA_TL );
122 
123  FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &WTR, T00, W01, /**/ W02,
124  T10, T11, /**/ W12,
125  /* ************** */ /* ****************** */
126  &TBL, /**/ &TBR, T20, T21, /**/ T22,
127  FLA_TL );
128 
129  }
130 
131  return FLA_SUCCESS;
132 }
FLA_Error FLA_Apply_Q2_UT_internal(FLA_Side side, FLA_Trans trans, FLA_Direct direct, FLA_Store storev, FLA_Obj D, FLA_Obj T, FLA_Obj W, FLA_Obj C, FLA_Obj E, fla_apq2ut_t *cntl)
Definition: FLA_Apply_Q2_UT_internal.c:17
FLA_Error FLA_Apply_Q_UT_internal(FLA_Side side, FLA_Trans trans, FLA_Direct direct, FLA_Store storev, FLA_Obj A, FLA_Obj T, FLA_Obj W, FLA_Obj B, fla_apqut_t *cntl)
Definition: FLA_Apply_Q_UT_internal.c:17
FLA_Error FLA_QR2_UT_internal(FLA_Obj U, FLA_Obj D, FLA_Obj T, fla_qr2ut_t *cntl)
Definition: FLA_QR2_UT_internal.c:16
FLA_Error FLA_QR_UT_internal(FLA_Obj A, FLA_Obj T, fla_qrut_t *cntl)
Definition: FLA_QR_UT_internal.c:17
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition: FLA_View.c:304
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:17
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:142
dim_t FLA_Determine_blocksize(FLA_Obj A_unproc, FLA_Quadrant to_dir, fla_blocksize_t *cntl_blocksizes)
Definition: FLA_Blocksize.c:234
dim_t FLA_Obj_min_dim(FLA_Obj obj)
Definition: FLA_Query.c:153
unsigned long dim_t
Definition: FLA_type_defs.h:71
Definition: FLA_type_defs.h:159

References FLA_Apply_Q2_UT_internal(), FLA_Apply_Q_UT_internal(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Obj_min_dim(), FLA_Obj_width(), FLA_Part_2x2(), FLA_QR2_UT_internal(), FLA_QR_UT_internal(), and FLA_Repart_2x2_to_3x3().

Referenced by FLASH_QR_UT_inc_noopt().

◆ FLA_QR_UT_inc_blk_var2()

FLA_Error FLA_QR_UT_inc_blk_var2 ( FLA_Obj  A,
FLA_Obj  TW,
FLA_Obj  U,
fla_qrutinc_t cntl 
)
14 {
15  FLA_Obj ATL, ATR, A00, A01, A02,
16  ABL, ABR, A10, A11, A12,
17  A20, A21, A22;
18 
19  FLA_Obj TTL, WTR, T00, W01, W02,
20  TBL, TBR, T10, T11, W12,
21  T20, T21, T22;
22 
23  FLA_Obj UL, UR, U0, U11, U2;
24 
25  dim_t b;
26 
27  FLA_Part_2x2( A, &ATL, &ATR,
28  &ABL, &ABR, 0, 0, FLA_TL );
29 
30  FLA_Part_2x2( TW, &TTL, &WTR,
31  &TBL, &TBR, 0, 0, FLA_TL );
32 
33  FLA_Part_1x2( U, &UL, &UR, 0, FLA_LEFT );
34 
35  while ( FLA_Obj_min_dim( ABR ) > 0 ){
36 
37  b = FLA_Determine_blocksize( ABR, FLA_BR, FLA_Cntl_blocksize( cntl ) );
38 
39  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
40  /* ************* */ /* ******************** */
41  &A10, /**/ &A11, &A12,
42  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
43  b, b, FLA_BR );
44 
45  FLA_Repart_2x2_to_3x3( TTL, /**/ WTR, &T00, /**/ &W01, &W02,
46  /* ************* */ /* ******************** */
47  &T10, /**/ &T11, &W12,
48  TBL, /**/ TBR, &T20, /**/ &T21, &T22,
49  b, b, FLA_BR );
50 
51  FLA_Repart_1x2_to_1x3( UL, /**/ UR, &U0, /**/ &U11, &U2,
52  b, FLA_RIGHT );
53 
54  /*------------------------------------------------------------*/
55 
56  /*
57  Use U11 to hold a copy of A11 to avoid a false
58  write-after-read dependency so that FLA_QR2_UT() may proceed
59  while FLA_Apply_Q_UT() executes.
60  */
61 
62 
63  /*
64  Perform a QR factorization (via UT transform) on A11:
65 
66  [ A11, T11 ] = QR_UT( A11, T11 );
67 
68  where T11 refers to a single storage block that refers to an
69  b_alg-by-b row-panel of upper triangular block Householder
70  transforms. Here, b is the storage blocksize while b_alg is
71  the algorithmic blocksize used by the QR factorization.
72  Typically b_alg << b.
73 
74  After the factorization is complete, A11 is copied into U11.
75 
76  */
77 
78  FLA_QR_UT_copy_internal( A11, T11, U11,
79  FLA_Cntl_sub_qrut( cntl ) );
80 
81 
82  /*
83  Apply Q^H to A12 from the left:
84 
85  A12 = Q^H * A12
86 
87  where Q is formed from A11 and T11. Note that W12 refers
88  to a row-panel of blocks where each block refers to an
89  b_alg-by-b row-panel of workspace.
90  */
91 
92  FLA_Apply_Q_UT_internal( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE,
93  U11, T11, W12, A12,
94  FLA_Cntl_sub_apqut( cntl ) );
95 
96 
97  /*
98  Update QR factorization of A11 with each block of A21, storing
99  block Householder transforms into corresponding blocks of T21.
100 
101  [ A11, ...
102  A21, T21 ] = QR2_UT( A11, ...
103  A21, T21 );
104  */
105 
106  FLA_QR2_UT_internal( A11,
107  A21, T21,
108  FLA_Cntl_sub_qr2ut( cntl ) );
109 
110 
111  /*
112  Apply Q^H to A12 and A22 from the left:
113 
114  / A12 \ = Q^H * / A12 \
115  \ A22 / \ A22 /
116 
117  where Q is formed from A21 and T21.
118  */
119 
120  FLA_Apply_Q2_UT_internal( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE,
121  A21, T21, W12, A12,
122  A22,
123  FLA_Cntl_sub_apq2ut( cntl ) );
124 
125  /*------------------------------------------------------------*/
126 
127  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
128  A10, A11, /**/ A12,
129  /* ************** */ /* ****************** */
130  &ABL, /**/ &ABR, A20, A21, /**/ A22,
131  FLA_TL );
132 
133  FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &WTR, T00, W01, /**/ W02,
134  T10, T11, /**/ W12,
135  /* ************** */ /* ****************** */
136  &TBL, /**/ &TBR, T20, T21, /**/ T22,
137  FLA_TL );
138 
139  FLA_Cont_with_1x3_to_1x2( &UL, /**/ &UR, U0, U11, /**/ U2,
140  FLA_LEFT );
141 
142  }
143 
144  return FLA_SUCCESS;
145 }
FLA_Error FLA_QR_UT_copy_internal(FLA_Obj A, FLA_Obj T, FLA_Obj U, fla_qrut_t *cntl)
Definition: FLA_QR_UT_copy_internal.c:13
FLA_Error FLA_Cont_with_1x3_to_1x2(FLA_Obj *AL, FLA_Obj *AR, FLA_Obj A0, FLA_Obj A1, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:475
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:110
FLA_Error FLA_Repart_1x2_to_1x3(FLA_Obj AL, FLA_Obj AR, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:267

References FLA_Apply_Q2_UT_internal(), FLA_Apply_Q_UT_internal(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Determine_blocksize(), FLA_Obj_min_dim(), FLA_Part_1x2(), FLA_Part_2x2(), FLA_QR2_UT_internal(), FLA_QR_UT_copy_internal(), FLA_Repart_1x2_to_1x3(), and FLA_Repart_2x2_to_3x3().

Referenced by FLASH_QR_UT_inc_opt1().

◆ FLASH_QR_UT_inc()

FLA_Error FLASH_QR_UT_inc ( FLA_Obj  A,
FLA_Obj  TW 
)
14 {
15  FLA_Error r_val;
16 
17  if ( FLASH_Queue_stack_depth() == 0 )
18  r_val = FLASH_QR_UT_inc_opt1( A, TW );
19  else
20  r_val = FLASH_QR_UT_inc_noopt( A, TW );
21 
22  return r_val;
23 }
unsigned int FLASH_Queue_stack_depth(void)
Definition: FLASH_Queue.c:106
FLA_Error FLASH_QR_UT_inc_noopt(FLA_Obj A, FLA_Obj TW)
Definition: FLASH_QR_UT_inc_noopt.c:20
FLA_Error FLASH_QR_UT_inc_opt1(FLA_Obj A, FLA_Obj TW)
Definition: FLASH_QR_UT_inc_opt1.c:20
int FLA_Error
Definition: FLA_type_defs.h:47

◆ FLASH_QR_UT_inc_create_hier_matrices()

FLA_Error FLASH_QR_UT_inc_create_hier_matrices ( FLA_Obj  A_flat,
dim_t  depth,
dim_t b_flash,
dim_t  b_alg,
FLA_Obj A,
FLA_Obj TW 
)
14 {
15  FLA_Datatype datatype;
16  dim_t m, n;
17  dim_t n_last;
18 
19  // *** The current QR_UT_inc algorithm implemented assumes that
20  // the matrix has a hierarchical depth of 1. We check for that here
21  // because we anticipate that we'll use a more general algorithm in the
22  // future, and we don't want to forget to remove the constraint. ***
23  if ( depth != 1 )
24  {
25  FLA_Print_message( "FLASH_QR_UT_inc() currently only supports matrices of depth 1",
26  __FILE__, __LINE__ );
27  FLA_Abort();
28  }
29 
30  // Create hierarchical copy of matrix A_flat.
31  FLASH_Obj_create_hier_copy_of_flat( A_flat, depth, b_flash, A );
32 
33  // Query the datatype of matrix A_flat.
34  datatype = FLA_Obj_datatype( A_flat );
35 
36  // If the user passed in zero for b_alg, then we need to set the
37  // algorithmic (inner) blocksize to a reasonable default value.
38  if ( b_alg == 0 )
39  {
41  }
42 
43  // Query the element (not scalar) dimensions of the new hierarchical
44  // matrix. This is done so we can create T with full blocks for the
45  // bottom and right "edge cases" of A.
46  m = FLA_Obj_length( *A );
47  n = FLA_Obj_width( *A );
48 
49  // Create hierarchical matrices T and W. T is lower triangular where
50  // each block is b_alg-by-b_flash and W is strictly upper triangular
51  // where each block is b_alg-by-b_flash. So we can create them
52  // simultaneously as part of the same hierarchical matrix.
53  FLASH_Obj_create_ext( datatype, m * b_alg, n * b_flash[0],
54  depth, &b_alg, b_flash,
55  TW );
56 
57  // If the bottom-right-most block along the diagonal is a partial block,
58  // adjust the view of the corresponding T block.
59  n_last = FLASH_Obj_scalar_width( *A ) % *b_flash;
60 
61  if ( n_last > 0 )
62  {
63  FLA_Obj TWTL, TWTR,
64  TWBL, TWBR;
65  FLA_Obj TWL, TWR;
66  FLA_Obj TWT, TW0,
67  TWB, TW1,
68  TW2;
69  FLA_Obj* TW1p;
70 
71  FLA_Part_2x2( *TW, &TWTL, &TWTR,
72  &TWBL, &TWBR, n-1, n-1, FLA_TL );
73 
74  FLA_Part_2x1( TWBR, &TWT,
75  &TWB, 0, FLA_TOP );
76 
77  while ( FLA_Obj_length( TWB ) > 0 )
78  {
79  FLA_Repart_2x1_to_3x1( TWT, &TW0,
80  /* *** */ /* *** */
81  &TW1,
82  TWB, &TW2, 1, FLA_BOTTOM );
83 
84  // -----------------------------------------------------------
85 
86  TW1p = FLASH_OBJ_PTR_AT( TW1 );
87 
88  FLA_Part_1x2( *TW1p, &TWL, &TWR, n_last, FLA_LEFT );
89 
90  *TW1p = TWL;
91  TW1p->m_inner = TW1p->m;
92  TW1p->n_inner = TW1p->n;
93 
94  // -----------------------------------------------------------
95 
96  FLA_Cont_with_3x1_to_2x1( &TWT, TW0,
97  TW1,
98  /* *** */ /* *** */
99  &TWB, TW2, FLA_TOP );
100  }
101 
102 
103  }
104 
105  return FLA_SUCCESS;
106 }
dim_t FLASH_QR_UT_inc_determine_alg_blocksize(FLA_Obj A)
Definition: FLASH_QR_UT_inc_create_hier_matrices.c:109
FLA_Error FLASH_Obj_create_ext(FLA_Datatype datatype, dim_t m, dim_t n, dim_t depth, dim_t *b_m, dim_t *b_n, FLA_Obj *H)
Definition: FLASH_Obj.c:151
FLA_Error FLASH_Obj_create_hier_copy_of_flat(FLA_Obj F, dim_t depth, dim_t *b_mn, FLA_Obj *H)
Definition: FLASH_Obj.c:591
dim_t FLASH_Obj_scalar_width(FLA_Obj H)
Definition: FLASH_View.c:641
FLA_Error FLA_Cont_with_3x1_to_2x1(FLA_Obj *AT, FLA_Obj A0, FLA_Obj A1, FLA_Obj *AB, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:428
FLA_Error FLA_Repart_2x1_to_3x1(FLA_Obj AT, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj AB, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:226
FLA_Error FLA_Part_2x1(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:76
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
void FLA_Abort(void)
Definition: FLA_Error.c:248
void FLA_Print_message(char *str, char *file, int line)
Definition: FLA_Error.c:234
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
int FLA_Datatype
Definition: FLA_type_defs.h:49
dim_t n_inner
Definition: FLA_type_defs.h:166
dim_t m
Definition: FLA_type_defs.h:163
dim_t m_inner
Definition: FLA_type_defs.h:165
dim_t n
Definition: FLA_type_defs.h:164

References FLA_Abort(), FLA_Cont_with_3x1_to_2x1(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_width(), FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Print_message(), FLA_Repart_2x1_to_3x1(), FLASH_Obj_create_ext(), FLASH_Obj_create_hier_copy_of_flat(), FLASH_Obj_scalar_width(), FLASH_QR_UT_inc_determine_alg_blocksize(), FLA_Obj_view::m, FLA_Obj_view::m_inner, FLA_Obj_view::n, and FLA_Obj_view::n_inner.

◆ FLASH_QR_UT_inc_determine_alg_blocksize()

dim_t FLASH_QR_UT_inc_determine_alg_blocksize ( FLA_Obj  A)
110 {
111  dim_t b_alg;
112  dim_t b_flash;
113 
114  // Acquire the storage blocksize.
115  b_flash = FLA_Obj_length( *FLASH_OBJ_PTR_AT( A ) );
116 
117  // Scale the storage blocksize by a pre-defined scalar to arrive at a
118  // reasonable algorithmic blocksize, but make sure it's at least 1.
119  b_alg = ( dim_t ) max( ( double ) b_flash * FLA_QR_INNER_TO_OUTER_B_RATIO, 1 );
120 
121  return b_alg;
122 }

References FLA_Obj_length().

Referenced by FLASH_QR_UT_inc_create_hier_matrices().

◆ FLASH_QR_UT_inc_noopt()

FLA_Error FLASH_QR_UT_inc_noopt ( FLA_Obj  A,
FLA_Obj  TW 
)
21 {
22  FLA_Error r_val;
23 
24  // Check parameters.
25  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
26  FLA_QR_UT_inc_check( A, TW );
27 
28  // Begin a parallel region.
30 
31  // Invoke FLA_QR_UT_inc_blk_var1() with the standard control tree.
33 
34  // End the parallel region.
36 
37  return r_val;
38 }
fla_qrutinc_t * flash_qrutinc_cntl
Definition: FLASH_QR_UT_inc_cntl_init.c:18
void FLASH_Queue_begin(void)
Definition: FLASH_Queue.c:59
void FLASH_Queue_end(void)
Definition: FLASH_Queue.c:81
FLA_Error FLA_QR_UT_inc_blk_var1(FLA_Obj A, FLA_Obj TW, fla_qrutinc_t *cntl)
Definition: FLA_QR_UT_inc_blk_var1.c:13
FLA_Error FLA_QR_UT_inc_check(FLA_Obj A, FLA_Obj TW)
Definition: FLA_QR_UT_inc_check.c:13
unsigned int FLA_Check_error_level(void)
Definition: FLA_Check.c:18

References FLA_Check_error_level(), FLA_QR_UT_inc_blk_var1(), FLA_QR_UT_inc_check(), flash_qrutinc_cntl, FLASH_Queue_begin(), and FLASH_Queue_end().

Referenced by FLASH_QR_UT_inc().

◆ FLASH_QR_UT_inc_opt1()

FLA_Error FLASH_QR_UT_inc_opt1 ( FLA_Obj  A,
FLA_Obj  TW 
)
21 {
22  FLA_Error r_val;
23  FLA_Obj U;
24 
25  // Check parameters.
26  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
27  FLA_QR_UT_inc_check( A, TW );
28 
29  // Create a temporary matrix to hold copies of all of the blocks along the
30  // diagonal of A.
32 
33  // Begin a parallel region.
35 
36  // Invoke FLA_QR_UT_inc_blk_var2() with the standard control tree.
37  r_val = FLA_QR_UT_inc_blk_var2( A, TW, U, flash_qrutinc_cntl );
38 
39  // End the parallel region.
41 
42  // Free the temporary matrix.
43  FLASH_Obj_free( &U );
44 
45  return r_val;
46 }
fla_qrutinc_t * flash_qrutinc_cntl
Definition: FLASH_QR_UT_inc_cntl_init.c:18
void FLASH_Obj_free(FLA_Obj *H)
Definition: FLASH_Obj.c:638
FLA_Error FLASH_Obj_create_diag_panel(FLA_Obj A, FLA_Obj *U)
Definition: FLASH_Obj_create_diag_panel.c:13
FLA_Error FLA_QR_UT_inc_blk_var2(FLA_Obj A, FLA_Obj TW, FLA_Obj U, fla_qrutinc_t *cntl)
Definition: FLA_QR_UT_inc_blk_var2.c:13

References FLA_Check_error_level(), FLA_QR_UT_inc_blk_var2(), FLA_QR_UT_inc_check(), FLASH_Obj_create_diag_panel(), FLASH_Obj_free(), flash_qrutinc_cntl, FLASH_Queue_begin(), and FLASH_Queue_end().

Referenced by FLASH_QR_UT_inc().

◆ FLASH_QR_UT_inc_solve()

FLA_Error FLASH_QR_UT_inc_solve ( FLA_Obj  A,
FLA_Obj  TW,
FLA_Obj  B,
FLA_Obj  X 
)
14 {
15  FLA_Obj W, Y;
16  FLA_Obj AT, AB;
17  FLA_Obj YT, YB;
18 
19  // Check parameters.
20  if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING )
21  FLA_QR_UT_inc_solve_check( A, TW, B, X );
22 
24 
25  FLASH_Obj_create_copy_of( FLA_NO_TRANSPOSE, B, &Y );
26 
27  FLASH_Apply_Q_UT_inc( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE,
28  A, TW, W, Y );
29 
30  // Create a temporary hierarchical view of only the top n-by-n part of A in
31  // case m > n so that AT captures the upper triangular factor R. We do the
32  // same for Y to ensure conformality.
33  FLASH_Part_create_2x1( A, &AT,
34  &AB, FLASH_Obj_scalar_width( A ), FLA_TOP );
35  FLASH_Part_create_2x1( Y, &YT,
36  &YB, FLASH_Obj_scalar_width( A ), FLA_TOP );
37 
38  FLASH_Trsm( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
39  FLA_ONE, AT, YT );
40 
41  FLASH_Copy( YT, X );
42 
43  // Free the temporary hierarchical views.
45  &AB );
47  &YB );
48 
49  FLASH_Obj_free( &Y );
50  FLASH_Obj_free( &W );
51 
52  return FLA_SUCCESS;
53 }
FLA_Error FLASH_Copy(FLA_Obj A, FLA_Obj B)
Definition: FLASH_Copy.c:15
FLA_Error FLASH_Trsm(FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj B)
Definition: FLASH_Trsm.c:15
FLA_Error FLASH_Apply_Q_UT_inc(FLA_Side side, FLA_Trans trans, FLA_Direct direct, FLA_Store storev, FLA_Obj A, FLA_Obj TW, FLA_Obj W1, FLA_Obj B)
Definition: FLASH_Apply_Q_UT_inc.c:18
FLA_Error FLASH_Obj_create_copy_of(FLA_Trans trans, FLA_Obj H_cur, FLA_Obj *H_new)
Definition: FLASH_Obj.c:561
FLA_Error FLASH_Part_create_2x1(FLA_Obj A, FLA_Obj *AT, FLA_Obj *AB, dim_t n_rows, FLA_Side side)
Definition: FLASH_View.c:13
FLA_Error FLASH_Part_free_2x1(FLA_Obj *AT, FLA_Obj *AB)
Definition: FLASH_View.c:572
FLA_Error FLASH_Apply_Q_UT_inc_create_workspace(FLA_Obj TW, FLA_Obj B, FLA_Obj *W)
Definition: FLASH_Apply_Q_UT_inc_create_workspace.c:13
FLA_Error FLA_QR_UT_inc_solve_check(FLA_Obj A, FLA_Obj TW, FLA_Obj B, FLA_Obj X)
Definition: FLA_QR_UT_inc_solve_check.c:13
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18