libflame  revision_anchor
Functions
FLA_Bidiag_UT_u_opt_var1.c File Reference

(r)

Functions

FLA_Error FLA_Bidiag_UT_u_opt_var1 (FLA_Obj A, FLA_Obj TU, FLA_Obj TV)
 
FLA_Error FLA_Bidiag_UT_u_step_opt_var1 (FLA_Obj A, FLA_Obj T, FLA_Obj S)
 
FLA_Error FLA_Bidiag_UT_u_step_ops_var1 (int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opd_var1 (int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opc_var1 (int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
 
FLA_Error FLA_Bidiag_UT_u_step_opz_var1 (int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
 

Function Documentation

◆ FLA_Bidiag_UT_u_opt_var1()

FLA_Error FLA_Bidiag_UT_u_opt_var1 ( FLA_Obj  A,
FLA_Obj  TU,
FLA_Obj  TV 
)
14 {
15  return FLA_Bidiag_UT_u_step_opt_var1( A, TU, TV );
16 }
FLA_Error FLA_Bidiag_UT_u_step_opt_var1(FLA_Obj A, FLA_Obj T, FLA_Obj S)
Definition: FLA_Bidiag_UT_u_opt_var1.c:18

References FLA_Bidiag_UT_u_step_opt_var1().

Referenced by FLA_Bidiag_UT_u().

◆ FLA_Bidiag_UT_u_step_opc_var1()

FLA_Error FLA_Bidiag_UT_u_step_opc_var1 ( int  m_A,
int  n_A,
int  m_TS,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T,
scomplex buff_S,
int  rs_S,
int  cs_S 
)
390 {
391  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
392  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
393 
394  int i;
395 
396  // b_alg = FLA_Obj_length( T );
397  int b_alg = m_TS;
398 
399  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
400  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
401  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
402  scomplex* buff_v = ( scomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
403  int inc_v = 1;
404 
405  for ( i = 0; i < b_alg; ++i )
406  {
407  scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
408  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
409  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
410  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
411  scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
412  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
413  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
414 
415  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
416  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
417 
418  scomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
419  scomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
420 
421  scomplex* v21 = buff_v + (i+1)*inc_v;
422 
423  scomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
424  scomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
425 
426  scomplex* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
427  scomplex* A22_r = A22 + (1 )*cs_A + (0 )*rs_A;
428 
429  scomplex* v21_t = v21 + (0 )*inc_v;
430  scomplex* v21_b = v21 + (1 )*inc_v;
431 
432  int m_ahead = m_A - i - 1;
433  int n_ahead = n_A - i - 1;
434  int m_behind = i;
435  int n_behind = i;
436 
437  /*------------------------------------------------------------*/
438 
439  // FLA_Househ2_UT( FLA_LEFT,
440  // alpha11,
441  // a21, tau11 );
442  FLA_Househ2_UT_l_opc( m_ahead,
443  alpha11,
444  a21, rs_A,
445  tau11 );
446 
447  if ( n_ahead > 0 )
448  {
449  // FLA_Apply_H2_UT( FLA_LEFT, tau11, a21, a12t, A22 );
451  n_ahead,
452  tau11,
453  a21, rs_A,
454  a12t, cs_A,
455  A22, rs_A, cs_A );
456 
457  // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
458  FLA_Househ2_UT_r_opc( n_ahead - 1,
459  a12t_l,
460  a12t_r, cs_A,
461  sigma11 );
462 
463  // FLA_Set( FLA_ONE, v21_t );
464  // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
465  *v21_t = *buff_1;
467  n_ahead - 1,
468  a12t_r, cs_A,
469  v21_b, inc_v );
470 
471  // FLA_Apply_H2_UT( FLA_RIGHT, sigma11, v21_b, A22_l, A22_r );
473  n_ahead - 1,
474  sigma11,
475  v21_b, inc_v,
476  A22_l, rs_A,
477  A22_r, rs_A, cs_A );
478 
479  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
482  m_behind,
483  n_ahead,
484  buff_1,
485  A02, rs_A, cs_A,
486  v21, inc_v,
487  buff_0,
488  s01, rs_S );
489  }
490 
491  // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
492  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
494  n_behind,
495  a10t, cs_A,
496  t01, rs_T );
499  m_ahead,
500  n_behind,
501  buff_1,
502  A20, rs_A, cs_A,
503  a21, rs_A,
504  buff_1,
505  t01, rs_T );
506 
507  /*------------------------------------------------------------*/
508 
509  }
510 
511  // FLA_Obj_free( &v );
512  FLA_free( buff_v );
513 
514  return FLA_SUCCESS;
515 }
FLA_Error FLA_Apply_H2_UT_l_opc_var1(int m_u2_A2, int n_a1t, scomplex *tau, scomplex *u2, int inc_u2, scomplex *a1t, int inc_a1t, scomplex *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_l_opt_var1.c:269
FLA_Error FLA_Apply_H2_UT_r_opc_var1(int n_u2h_A2, int m_a1, scomplex *tau, scomplex *u2h, int inc_u2h, scomplex *a1, int inc_a1, scomplex *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_r_opt_var1.c:254
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
FLA_Error FLA_Househ2_UT_r_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition: FLA_Househ2_UT.c:677
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition: FLA_Househ2_UT.c:390
int i
Definition: bl1_axmyv2.c:145
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_copyv.c:49
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_gemv.c:125
@ BLIS1_CONJ_TRANSPOSE
Definition: blis_type_defs.h:57
@ BLIS1_CONJ_NO_TRANSPOSE
Definition: blis_type_defs.h:56
@ BLIS1_CONJUGATE
Definition: blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition: blis_type_defs.h:81
Definition: blis_type_defs.h:133

References bl1_ccopyv(), bl1_cgemv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_opc_var1(), FLA_Apply_H2_UT_r_opc_var1(), FLA_free(), FLA_Househ2_UT_l_opc(), FLA_Househ2_UT_r_opc(), FLA_malloc(), FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var1().

◆ FLA_Bidiag_UT_u_step_opd_var1()

FLA_Error FLA_Bidiag_UT_u_step_opd_var1 ( int  m_A,
int  n_A,
int  m_TS,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_T,
int  rs_T,
int  cs_T,
double *  buff_S,
int  rs_S,
int  cs_S 
)
255 {
256  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
257  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
258 
259  int i;
260 
261  // b_alg = FLA_Obj_length( T );
262  int b_alg = m_TS;
263 
264  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
265  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
266  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
267  double* buff_v = ( double* ) FLA_malloc( n_A * sizeof( *buff_A ) );
268  int inc_v = 1;
269 
270  for ( i = 0; i < b_alg; ++i )
271  {
272  double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
273  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
274  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
275  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
276  double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
277  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
278  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
279 
280  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
281  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
282 
283  double* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
284  double* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
285 
286  double* v21 = buff_v + (i+1)*inc_v;
287 
288  double* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
289  double* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
290 
291  double* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
292  double* A22_r = A22 + (1 )*cs_A + (0 )*rs_A;
293 
294  double* v21_t = v21 + (0 )*inc_v;
295  double* v21_b = v21 + (1 )*inc_v;
296 
297  int m_ahead = m_A - i - 1;
298  int n_ahead = n_A - i - 1;
299  int m_behind = i;
300  int n_behind = i;
301 
302  /*------------------------------------------------------------*/
303 
304  // FLA_Househ2_UT( FLA_LEFT,
305  // alpha11,
306  // a21, tau11 );
307  FLA_Househ2_UT_l_opd( m_ahead,
308  alpha11,
309  a21, rs_A,
310  tau11 );
311 
312  if ( n_ahead > 0 )
313  {
314  // FLA_Apply_H2_UT( FLA_LEFT, tau11, a21, a12t, A22 );
316  n_ahead,
317  tau11,
318  a21, rs_A,
319  a12t, cs_A,
320  A22, rs_A, cs_A );
321 
322  // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
323  FLA_Househ2_UT_r_opd( n_ahead - 1,
324  a12t_l,
325  a12t_r, cs_A,
326  sigma11 );
327 
328  // FLA_Set( FLA_ONE, v21_t );
329  // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
330  *v21_t = *buff_1;
332  n_ahead - 1,
333  a12t_r, cs_A,
334  v21_b, inc_v );
335 
336  // FLA_Apply_H2_UT( FLA_RIGHT, sigma11, v21_b, A22_l, A22_r );
338  n_ahead - 1,
339  sigma11,
340  v21_b, inc_v,
341  A22_l, rs_A,
342  A22_r, rs_A, cs_A );
343 
344  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
347  m_behind,
348  n_ahead,
349  buff_1,
350  A02, rs_A, cs_A,
351  v21, inc_v,
352  buff_0,
353  s01, rs_S );
354  }
355 
356  // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
357  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
359  n_behind,
360  a10t, cs_A,
361  t01, rs_T );
364  m_ahead,
365  n_behind,
366  buff_1,
367  A20, rs_A, cs_A,
368  a21, rs_A,
369  buff_1,
370  t01, rs_T );
371 
372  /*------------------------------------------------------------*/
373 
374  }
375 
376  // FLA_Obj_free( &v );
377  FLA_free( buff_v );
378 
379  return FLA_SUCCESS;
380 }
FLA_Error FLA_Apply_H2_UT_l_opd_var1(int m_u2_A2, int n_a1t, double *tau, double *u2, int inc_u2, double *a1t, int inc_a1t, double *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_l_opt_var1.c:195
FLA_Error FLA_Apply_H2_UT_r_opd_var1(int n_u2h_A2, int m_a1, double *tau, double *u2h, int inc_u2h, double *a1, int inc_a1, double *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_r_opt_var1.c:181
FLA_Error FLA_Househ2_UT_r_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition: FLA_Househ2_UT.c:664
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition: FLA_Househ2_UT.c:274
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition: bl1_copyv.c:42
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_gemv.c:69

References bl1_dcopyv(), bl1_dgemv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_opd_var1(), FLA_Apply_H2_UT_r_opd_var1(), FLA_free(), FLA_Househ2_UT_l_opd(), FLA_Househ2_UT_r_opd(), FLA_malloc(), FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var1().

◆ FLA_Bidiag_UT_u_step_ops_var1()

FLA_Error FLA_Bidiag_UT_u_step_ops_var1 ( int  m_A,
int  n_A,
int  m_TS,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_T,
int  rs_T,
int  cs_T,
float *  buff_S,
int  rs_S,
int  cs_S 
)
120 {
121  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
122  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
123 
124  int i;
125 
126  // b_alg = FLA_Obj_length( T );
127  int b_alg = m_TS;
128 
129  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
130  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
131  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
132  float* buff_v = ( float* ) FLA_malloc( n_A * sizeof( *buff_A ) );
133  int inc_v = 1;
134 
135  for ( i = 0; i < b_alg; ++i )
136  {
137  float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
138  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
139  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
140  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
141  float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
142  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
143  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
144 
145  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
146  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
147 
148  float* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
149  float* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
150 
151  float* v21 = buff_v + (i+1)*inc_v;
152 
153  float* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
154  float* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
155 
156  float* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
157  float* A22_r = A22 + (1 )*cs_A + (0 )*rs_A;
158 
159  float* v21_t = v21 + (0 )*inc_v;
160  float* v21_b = v21 + (1 )*inc_v;
161 
162  int m_ahead = m_A - i - 1;
163  int n_ahead = n_A - i - 1;
164  int m_behind = i;
165  int n_behind = i;
166 
167  /*------------------------------------------------------------*/
168 
169  // FLA_Househ2_UT( FLA_LEFT,
170  // alpha11,
171  // a21, tau11 );
172  FLA_Househ2_UT_l_ops( m_ahead,
173  alpha11,
174  a21, rs_A,
175  tau11 );
176 
177  if ( n_ahead > 0 )
178  {
179  // FLA_Apply_H2_UT( FLA_LEFT, tau11, a21, a12t, A22 );
181  n_ahead,
182  tau11,
183  a21, rs_A,
184  a12t, cs_A,
185  A22, rs_A, cs_A );
186 
187  // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
188  FLA_Househ2_UT_r_ops( n_ahead - 1,
189  a12t_l,
190  a12t_r, cs_A,
191  sigma11 );
192 
193  // FLA_Set( FLA_ONE, v21_t );
194  // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
195  *v21_t = *buff_1;
197  n_ahead - 1,
198  a12t_r, cs_A,
199  v21_b, inc_v );
200 
201  // FLA_Apply_H2_UT( FLA_RIGHT, sigma11, v21_b, A22_l, A22_r );
203  n_ahead - 1,
204  sigma11,
205  v21_b, inc_v,
206  A22_l, rs_A,
207  A22_r, rs_A, cs_A );
208 
209  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
212  m_behind,
213  n_ahead,
214  buff_1,
215  A02, rs_A, cs_A,
216  v21, inc_v,
217  buff_0,
218  s01, rs_S );
219  }
220 
221  // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
222  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
224  n_behind,
225  a10t, cs_A,
226  t01, rs_T );
229  m_ahead,
230  n_behind,
231  buff_1,
232  A20, rs_A, cs_A,
233  a21, rs_A,
234  buff_1,
235  t01, rs_T );
236 
237  /*------------------------------------------------------------*/
238 
239  }
240 
241  // FLA_Obj_free( &v );
242  FLA_free( buff_v );
243 
244  return FLA_SUCCESS;
245 }
FLA_Error FLA_Apply_H2_UT_l_ops_var1(int m_u2_A2, int n_a1t, float *tau, float *u2, int inc_u2, float *a1t, int inc_a1t, float *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_l_opt_var1.c:121
FLA_Error FLA_Apply_H2_UT_r_ops_var1(int n_u2h_A2, int m_a1, float *tau, float *u2h, int inc_u2h, float *a1, int inc_a1, float *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_r_opt_var1.c:108
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition: FLA_Househ2_UT.c:160
FLA_Error FLA_Househ2_UT_r_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition: FLA_Househ2_UT.c:651
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition: bl1_copyv.c:35
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_gemv.c:13

References bl1_scopyv(), bl1_sgemv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_ops_var1(), FLA_Apply_H2_UT_r_ops_var1(), FLA_free(), FLA_Househ2_UT_l_ops(), FLA_Househ2_UT_r_ops(), FLA_malloc(), FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var1().

◆ FLA_Bidiag_UT_u_step_opt_var1()

FLA_Error FLA_Bidiag_UT_u_step_opt_var1 ( FLA_Obj  A,
FLA_Obj  T,
FLA_Obj  S 
)
19 {
20  FLA_Datatype datatype;
21  int m_A, n_A, m_TS;
22  int rs_A, cs_A;
23  int rs_T, cs_T;
24  int rs_S, cs_S;
25 
26  datatype = FLA_Obj_datatype( A );
27 
28  m_A = FLA_Obj_length( A );
29  n_A = FLA_Obj_width( A );
30  m_TS = FLA_Obj_length( T );
31 
32  rs_A = FLA_Obj_row_stride( A );
33  cs_A = FLA_Obj_col_stride( A );
34 
35  rs_T = FLA_Obj_row_stride( T );
36  cs_T = FLA_Obj_col_stride( T );
37 
38  rs_S = FLA_Obj_row_stride( S );
39  cs_S = FLA_Obj_col_stride( S );
40 
41 
42  switch ( datatype )
43  {
44  case FLA_FLOAT:
45  {
46  float* buff_A = FLA_FLOAT_PTR( A );
47  float* buff_T = FLA_FLOAT_PTR( T );
48  float* buff_S = FLA_FLOAT_PTR( S );
49 
51  n_A,
52  m_TS,
53  buff_A, rs_A, cs_A,
54  buff_T, rs_T, cs_T,
55  buff_S, rs_S, cs_S );
56 
57  break;
58  }
59 
60  case FLA_DOUBLE:
61  {
62  double* buff_A = FLA_DOUBLE_PTR( A );
63  double* buff_T = FLA_DOUBLE_PTR( T );
64  double* buff_S = FLA_DOUBLE_PTR( S );
65 
67  n_A,
68  m_TS,
69  buff_A, rs_A, cs_A,
70  buff_T, rs_T, cs_T,
71  buff_S, rs_S, cs_S );
72 
73  break;
74  }
75 
76  case FLA_COMPLEX:
77  {
78  scomplex* buff_A = FLA_COMPLEX_PTR( A );
79  scomplex* buff_T = FLA_COMPLEX_PTR( T );
80  scomplex* buff_S = FLA_COMPLEX_PTR( S );
81 
83  n_A,
84  m_TS,
85  buff_A, rs_A, cs_A,
86  buff_T, rs_T, cs_T,
87  buff_S, rs_S, cs_S );
88 
89  break;
90  }
91 
92  case FLA_DOUBLE_COMPLEX:
93  {
94  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
95  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
96  dcomplex* buff_S = FLA_DOUBLE_COMPLEX_PTR( S );
97 
99  n_A,
100  m_TS,
101  buff_A, rs_A, cs_A,
102  buff_T, rs_T, cs_T,
103  buff_S, rs_S, cs_S );
104 
105  break;
106  }
107  }
108 
109  return FLA_SUCCESS;
110 }
FLA_Error FLA_Bidiag_UT_u_step_opz_var1(int m_A, int n_A, int m_TS, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T, dcomplex *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var1.c:519
FLA_Error FLA_Bidiag_UT_u_step_opc_var1(int m_A, int n_A, int m_TS, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T, scomplex *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var1.c:384
FLA_Error FLA_Bidiag_UT_u_step_opd_var1(int m_A, int n_A, int m_TS, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T, double *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var1.c:249
FLA_Error FLA_Bidiag_UT_u_step_ops_var1(int m_A, int n_A, int m_TS, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T, float *buff_S, int rs_S, int cs_S)
Definition: FLA_Bidiag_UT_u_opt_var1.c:114
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
int FLA_Datatype
Definition: FLA_type_defs.h:49
Definition: blis_type_defs.h:138

References FLA_Bidiag_UT_u_step_opc_var1(), FLA_Bidiag_UT_u_step_opd_var1(), FLA_Bidiag_UT_u_step_ops_var1(), FLA_Bidiag_UT_u_step_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

Referenced by FLA_Bidiag_UT_u_blk_var1(), and FLA_Bidiag_UT_u_opt_var1().

◆ FLA_Bidiag_UT_u_step_opz_var1()

FLA_Error FLA_Bidiag_UT_u_step_opz_var1 ( int  m_A,
int  n_A,
int  m_TS,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T,
dcomplex buff_S,
int  rs_S,
int  cs_S 
)
525 {
526  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
527  dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
528 
529  int i;
530 
531  // b_alg = FLA_Obj_length( T );
532  int b_alg = m_TS;
533 
534  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &v );
535  // FLA_Obj_create( datatype_A, n_A, 1, 0, 0, &y );
536  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
537  dcomplex* buff_v = ( dcomplex* ) FLA_malloc( n_A * sizeof( *buff_A ) );
538  int inc_v = 1;
539 
540  for ( i = 0; i < b_alg; ++i )
541  {
542  dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
543  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
544  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
545  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
546  dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
547  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
548  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
549 
550  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
551  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
552 
553  dcomplex* s01 = buff_S + (i )*cs_S + (0 )*rs_S;
554  dcomplex* sigma11 = buff_S + (i )*cs_S + (i )*rs_S;
555 
556  dcomplex* v21 = buff_v + (i+1)*inc_v;
557 
558  dcomplex* a12t_l = a12t + (0 )*cs_A + (0 )*rs_A;
559  dcomplex* a12t_r = a12t + (1 )*cs_A + (0 )*rs_A;
560 
561  dcomplex* A22_l = A22 + (0 )*cs_A + (0 )*rs_A;
562  dcomplex* A22_r = A22 + (1 )*cs_A + (0 )*rs_A;
563 
564  dcomplex* v21_t = v21 + (0 )*inc_v;
565  dcomplex* v21_b = v21 + (1 )*inc_v;
566 
567  int m_ahead = m_A - i - 1;
568  int n_ahead = n_A - i - 1;
569  int m_behind = i;
570  int n_behind = i;
571 
572  /*------------------------------------------------------------*/
573 
574  // FLA_Househ2_UT( FLA_LEFT,
575  // alpha11,
576  // a21, tau11 );
577  FLA_Househ2_UT_l_opz( m_ahead,
578  alpha11,
579  a21, rs_A,
580  tau11 );
581 
582  if ( n_ahead > 0 )
583  {
584  // FLA_Apply_H2_UT( FLA_LEFT, tau11, a21, a12t, A22 );
586  n_ahead,
587  tau11,
588  a21, rs_A,
589  a12t, cs_A,
590  A22, rs_A, cs_A );
591 
592  // FLA_Househ2_UT( FLA_RIGHT, a12t_l, a12t_r, sigma11 );
593  FLA_Househ2_UT_r_opz( n_ahead - 1,
594  a12t_l,
595  a12t_r, cs_A,
596  sigma11 );
597 
598  // FLA_Set( FLA_ONE, v21_t );
599  // FLA_Copyt( FLA_TRANSPOSE, a12t_r, v21_b );
600  *v21_t = *buff_1;
602  n_ahead - 1,
603  a12t_r, cs_A,
604  v21_b, inc_v );
605 
606  // FLA_Apply_H2_UT( FLA_RIGHT, sigma11, v21_b, A22_l, A22_r );
608  n_ahead - 1,
609  sigma11,
610  v21_b, inc_v,
611  A22_l, rs_A,
612  A22_r, rs_A, cs_A );
613 
614  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_ONE, A02, v21, FLA_ZERO, s01 );
617  m_behind,
618  n_ahead,
619  buff_1,
620  A02, rs_A, cs_A,
621  v21, inc_v,
622  buff_0,
623  s01, rs_S );
624  }
625 
626  // FLA_Copyt_external( FLA_CONJ_TRANSPOSE, a10t, t01 );
627  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ONE, t01 );
629  n_behind,
630  a10t, cs_A,
631  t01, rs_T );
634  m_ahead,
635  n_behind,
636  buff_1,
637  A20, rs_A, cs_A,
638  a21, rs_A,
639  buff_1,
640  t01, rs_T );
641 
642  /*------------------------------------------------------------*/
643 
644  }
645 
646  // FLA_Obj_free( &v );
647  FLA_free( buff_v );
648 
649  return FLA_SUCCESS;
650 }
FLA_Error FLA_Apply_H2_UT_l_opz_var1(int m_u2_A2, int n_a1t, dcomplex *tau, dcomplex *u2, int inc_u2, dcomplex *a1t, int inc_a1t, dcomplex *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_l_opt_var1.c:343
FLA_Error FLA_Apply_H2_UT_r_opz_var1(int n_u2h_A2, int m_a1, dcomplex *tau, dcomplex *u2h, int inc_u2h, dcomplex *a1, int inc_a1, dcomplex *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_r_opt_var1.c:327
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition: FLA_Househ2_UT.c:521
FLA_Error FLA_Househ2_UT_r_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition: FLA_Househ2_UT.c:693
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_copyv.c:63
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_gemv.c:255

References bl1_zcopyv(), bl1_zgemv(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_opz_var1(), FLA_Apply_H2_UT_r_opz_var1(), FLA_free(), FLA_Househ2_UT_l_opz(), FLA_Househ2_UT_r_opz(), FLA_malloc(), FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Bidiag_UT_u_step_opt_var1().