libflame  revision_anchor
Functions
FLA_Hess_UT_vars.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLA_Hess_UT_blk_var1 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_unb_var1 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_unb_var1 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_blk_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_blf_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_unb_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_unb_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_blk_var3 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_blf_var3 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_unb_var3 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_unb_var3 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_blk_var4 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_blf_var4 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_unb_var4 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_unb_var4 (FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_blk_var5 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_unb_var5 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_unb_var5 (FLA_Obj A, FLA_Obj U, FLA_Obj Z, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_opt_var1 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_opt_var1 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ops_var1 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opd_var1 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opc_var1 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opz_var1 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_opt_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_opt_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ops_var2 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opd_var2 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opc_var2 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opz_var2 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_opt_var3 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_opt_var3 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ops_var3 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opd_var3 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opc_var3 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opz_var3 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_opt_var4 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_opt_var4 (FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ops_var4 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opd_var4 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opc_var4 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opz_var4 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_opt_var5 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_opt_var5 (FLA_Obj A, FLA_Obj U, FLA_Obj Z, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ops_var5 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_U, int rs_U, int cs_U, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opd_var5 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_U, int rs_U, int cs_U, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opc_var5 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_opz_var5 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_ofu_var1 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ofu_var1 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ofs_var1 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_ofd_var1 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_ofc_var1 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_ofz_var1 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_ofu_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ofu_var2 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ofs_var2 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_ofd_var2 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_ofc_var2 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_ofz_var2 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_ofu_var3 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ofu_var3 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ofs_var3 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_ofd_var3 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_ofc_var3 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_ofz_var3 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_ofu_var4 (FLA_Obj A, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ofu_var4 (FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T)
 
FLA_Error FLA_Hess_UT_step_ofs_var4 (int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_ofd_var4 (int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_ofc_var4 (int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Hess_UT_step_ofz_var4 (int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T)
 
FLA_Error FLA_Fused_Ahx_Ax_ops_var1 (int m_A, int n_A, float *buff_A, int rs_A, int cs_A, float *buff_x, int inc_x, float *buff_v, int inc_v, float *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Ahx_Ax_opd_var1 (int m_A, int n_A, double *buff_A, int rs_A, int cs_A, double *buff_x, int inc_x, double *buff_v, int inc_v, double *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Ahx_Ax_opc_var1 (int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_x, int inc_x, scomplex *buff_v, int inc_v, scomplex *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Ahx_Ax_opz_var1 (int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_x, int inc_x, dcomplex *buff_v, int inc_v, dcomplex *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Gerc2_Ahx_Ax_ops_var1 (int m_A, int n_A, float *buff_alpha, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z, float *buff_A, int rs_A, int cs_A, float *buff_x, int inc_x, float *buff_v, int inc_v, float *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Gerc2_Ahx_Ax_opd_var1 (int m_A, int n_A, double *buff_alpha, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z, double *buff_A, int rs_A, int cs_A, double *buff_x, int inc_x, double *buff_v, int inc_v, double *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Gerc2_Ahx_Ax_opc_var1 (int m_A, int n_A, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_x, int inc_x, scomplex *buff_v, int inc_v, scomplex *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Gerc2_Ahx_Ax_opz_var1 (int m_A, int n_A, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_x, int inc_x, dcomplex *buff_v, int inc_v, dcomplex *buff_w, int inc_w)
 
FLA_Error FLA_Fused_Uhu_Yhu_Zhu_ops_var1 (int m_U, int n_U, float *buff_delta, float *buff_U, int rs_U, int cs_U, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_t, int inc_t, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z)
 
FLA_Error FLA_Fused_Uhu_Yhu_Zhu_opd_var1 (int m_U, int n_U, double *buff_delta, double *buff_U, int rs_U, int cs_U, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_t, int inc_t, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z)
 
FLA_Error FLA_Fused_Uhu_Yhu_Zhu_opc_var1 (int m_U, int n_U, scomplex *buff_delta, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_t, int inc_t, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z)
 
FLA_Error FLA_Fused_Uhu_Yhu_Zhu_opz_var1 (int m_U, int n_U, dcomplex *buff_delta, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_t, int inc_t, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z)
 

Function Documentation

◆ FLA_Fused_Ahx_Ax_opc_var1()

FLA_Error FLA_Fused_Ahx_Ax_opc_var1 ( int  m_A,
int  n_A,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_x,
int  inc_x,
scomplex buff_v,
int  inc_v,
scomplex buff_w,
int  inc_w 
)
262 {
263  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
264  int i;
265 
266  bl1_csetv( m_A,
267  buff_0,
268  buff_w, inc_w );
269 
270  for ( i = 0; i < n_A; ++i )
271  {
272  scomplex* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
273  scomplex* nu1 = buff_v + (i )*inc_v;
274  scomplex* x = buff_x;
275  scomplex* chi1 = buff_x + (i )*inc_x;
276  scomplex* w = buff_w;
277 
278  /*------------------------------------------------------------*/
279 
281  m_A,
282  a1, rs_A,
283  x, inc_x,
284  nu1 );
285 
287  m_A,
288  chi1,
289  a1, rs_A,
290  w, inc_w );
291 /*
292  F77_caxpy( &m_A,
293  chi1,
294  a1, &rs_A,
295  w, &inc_w );
296 */
297 
298  /*------------------------------------------------------------*/
299 
300  }
301 
302  return FLA_SUCCESS;
303 }
FLA_Obj FLA_ZERO
Definition: FLA_Init.c:20
int i
Definition: bl1_axmyv2.c:145
chi1
Definition: bl1_axmyv2.c:366
void bl1_caxpyv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_axpyv.c:29
void bl1_cdot(conj1_t conj, int n, scomplex *x, int incx, scomplex *y, int incy, scomplex *rho)
Definition: bl1_dot.c:39
void bl1_csetv(int m, scomplex *sigma, scomplex *x, int incx)
Definition: bl1_setv.c:52
@ BLIS1_CONJUGATE
Definition: blis_type_defs.h:82
@ BLIS1_NO_CONJUGATE
Definition: blis_type_defs.h:81
Definition: blis_type_defs.h:133

References bl1_caxpyv(), bl1_cdot(), bl1_csetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, chi1, FLA_ZERO, and i.

Referenced by FLA_Fused_Ahx_Ax_opt_var1(), FLA_Hess_UT_step_ofc_var2(), FLA_Hess_UT_step_ofc_var3(), and FLA_Hess_UT_step_ofc_var4().

◆ FLA_Fused_Ahx_Ax_opd_var1()

FLA_Error FLA_Fused_Ahx_Ax_opd_var1 ( int  m_A,
int  n_A,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_x,
int  inc_x,
double *  buff_v,
int  inc_v,
double *  buff_w,
int  inc_w 
)
179 {
180  double zero = bl1_d0();
181  int i;
182 
183  double* restrict w = buff_w;
184  double* restrict x = buff_x;
185 
186  double* restrict a1;
187  double* restrict a2;
188  double* restrict nu1;
189  double* restrict nu2;
190  double* restrict chi1;
191  double* restrict chi2;
192 
193  int n_run = n_A / 2;
194  int n_left = n_A % 2;
195  int step_a1 = 2*cs_A;
196  int step_nu1 = 2*inc_v;
197  int step_chi1 = 2*inc_x;
198 
199  bl1_dsetv( m_A,
200  &zero,
201  buff_w, inc_w );
202 
203  a1 = buff_A;
204  a2 = buff_A + cs_A;
205  nu1 = buff_v;
206  nu2 = buff_v + inc_v;
207  chi1 = buff_x;
208  chi2 = buff_x + inc_x;
209 
210  for ( i = 0; i < n_run; ++i )
211  {
212  /*------------------------------------------------------------*/
213 
214  bl1_ddotv2axpyv2b( m_A,
215  a1, rs_A,
216  a2, rs_A,
217  x, inc_x,
218  chi1,
219  chi2,
220  nu1,
221  nu2,
222  w, inc_w );
223 
224  /*------------------------------------------------------------*/
225 
226  a1 += step_a1;
227  a2 += step_a1;
228  nu1 += step_nu1;
229  nu2 += step_nu1;
230  chi1 += step_chi1;
231  chi2 += step_chi1;
232  }
233 
234  if ( n_left > 0 )
235  {
236  for ( i = 0; i < n_left; ++i )
237  {
238  bl1_ddotaxpy( m_A,
239  a1, rs_A,
240  x, inc_x,
241  chi1,
242  nu1,
243  w, inc_w );
244 
245  a1 += rs_A;
246  nu1 += inc_v;
247  chi1 += inc_x;
248  }
249  }
250 
251  return FLA_SUCCESS;
252 }
int n_left
Definition: bl1_axmyv2.c:149
int n_run
Definition: bl1_axmyv2.c:148
double *restrict chi2
Definition: bl1_axpyv2b.c:140
void bl1_ddotaxpy(int n, double *a, int inc_a, double *x, int inc_x, double *kappa, double *rho, double *w, int inc_w)
Definition: bl1_dotaxpy.c:31
void bl1_ddotv2axpyv2b(int n, double *a1, int inc_a1, double *a2, int inc_a2, double *x, int inc_x, double *kappa1, double *kappa2, double *rho1, double *rho2, double *w, int inc_w)
Definition: bl1_dotv2axpyv2b.c:36
void bl1_dsetv(int m, double *sigma, double *x, int incx)
Definition: bl1_setv.c:39
double bl1_d0(void)
Definition: bl1_constants.c:118

References bl1_d0(), bl1_ddotaxpy(), bl1_ddotv2axpyv2b(), bl1_dsetv(), chi1, chi2, i, n_left, and n_run.

Referenced by FLA_Fused_Ahx_Ax_opt_var1(), FLA_Hess_UT_step_ofd_var2(), FLA_Hess_UT_step_ofd_var3(), and FLA_Hess_UT_step_ofd_var4().

◆ FLA_Fused_Ahx_Ax_ops_var1()

FLA_Error FLA_Fused_Ahx_Ax_ops_var1 ( int  m_A,
int  n_A,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_x,
int  inc_x,
float *  buff_v,
int  inc_v,
float *  buff_w,
int  inc_w 
)
122 {
123  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
124  int i;
125 
126  bl1_ssetv( m_A,
127  buff_0,
128  buff_w, inc_w );
129 
130  for ( i = 0; i < n_A; ++i )
131  {
132  float* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
133  float* nu1 = buff_v + (i )*inc_v;
134  float* x = buff_x;
135  float* chi1 = buff_x + (i )*inc_x;
136  float* w = buff_w;
137 
138  /*------------------------------------------------------------*/
139 
141  m_A,
142  a1, rs_A,
143  x, inc_x,
144  nu1 );
145 /*
146  *nu1 = F77_sdot( &m_A,
147  a1, &rs_A,
148  x, &inc_x );
149 */
150 
152  m_A,
153  chi1,
154  a1, rs_A,
155  w, inc_w );
156 /*
157  F77_saxpy( &m_A,
158  chi1,
159  a1, &rs_A,
160  w, &inc_w );
161 */
162 
163  /*------------------------------------------------------------*/
164 
165  }
166 
167 
168  return FLA_SUCCESS;
169 }
void bl1_saxpyv(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy)
Definition: bl1_axpyv.c:13
void bl1_sdot(conj1_t conj, int n, float *x, int incx, float *y, int incy, float *rho)
Definition: bl1_dot.c:13
void bl1_ssetv(int m, float *sigma, float *x, int incx)
Definition: bl1_setv.c:26

References bl1_saxpyv(), bl1_sdot(), bl1_ssetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, chi1, FLA_ZERO, and i.

Referenced by FLA_Fused_Ahx_Ax_opt_var1(), FLA_Hess_UT_step_ofs_var2(), FLA_Hess_UT_step_ofs_var3(), and FLA_Hess_UT_step_ofs_var4().

◆ FLA_Fused_Ahx_Ax_opz_var1()

FLA_Error FLA_Fused_Ahx_Ax_opz_var1 ( int  m_A,
int  n_A,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_x,
int  inc_x,
dcomplex buff_v,
int  inc_v,
dcomplex buff_w,
int  inc_w 
)
313 {
314  dcomplex zero = bl1_z0();
315  int i;
316 
317  dcomplex* restrict w = buff_w;
318  dcomplex* restrict x = buff_x;
319 
320  dcomplex* restrict a1;
321  dcomplex* restrict a2;
322  dcomplex* restrict nu1;
323  dcomplex* restrict nu2;
324  dcomplex* restrict chi1;
325  dcomplex* restrict chi2;
326 
327  int n_run = n_A / 2;
328  int n_left = n_A % 2;
329  int step_a1 = 2*cs_A;
330  int step_nu1 = 2*inc_v;
331  int step_chi1 = 2*inc_x;
332 
333  bl1_zsetv( m_A,
334  &zero,
335  buff_w, inc_w );
336 
337  a1 = buff_A;
338  a2 = buff_A + cs_A;
339  nu1 = buff_v;
340  nu2 = buff_v + inc_v;
341  chi1 = buff_x;
342  chi2 = buff_x + inc_x;
343 
344  for ( i = 0; i < n_run; ++i )
345  {
346  /*------------------------------------------------------------*/
347 
348 /*
349  bl1_zdotaxpy( m_A,
350  a1, rs_A,
351  x, inc_x,
352  chi1,
353  nu1,
354  w, inc_w );
355 */
356 
357  bl1_zdotv2axpyv2b( m_A,
358  a1, rs_A,
359  a2, rs_A,
360  x, inc_x,
361  chi1,
362  chi2,
363  nu1,
364  nu2,
365  w, inc_w );
366 
367  /*------------------------------------------------------------*/
368 
369  a1 += step_a1;
370  a2 += step_a1;
371  nu1 += step_nu1;
372  nu2 += step_nu1;
373  chi1 += step_chi1;
374  chi2 += step_chi1;
375  }
376 
377  if ( n_left > 0 )
378  {
379  for ( i = 0; i < n_left; ++i )
380  {
381  bl1_zdotaxpy( m_A,
382  a1, rs_A,
383  x, inc_x,
384  chi1,
385  nu1,
386  w, inc_w );
387 
388  a1 += rs_A;
389  nu1 += inc_v;
390  chi1 += inc_x;
391  }
392  }
393 
394  return FLA_SUCCESS;
395 }
void bl1_zdotaxpy(int n, dcomplex *a, int inc_a, dcomplex *x, int inc_x, dcomplex *kappa, dcomplex *rho, dcomplex *w, int inc_w)
Definition: bl1_dotaxpy.c:258
void bl1_zdotv2axpyv2b(int n, dcomplex *a1, int inc_a1, dcomplex *a2, int inc_a2, dcomplex *x, int inc_x, dcomplex *kappa1, dcomplex *kappa2, dcomplex *rho1, dcomplex *rho2, dcomplex *w, int inc_w)
Definition: bl1_dotv2axpyv2b.c:331
dcomplex bl1_z0(void)
Definition: bl1_constants.c:133
void bl1_zsetv(int m, dcomplex *sigma, dcomplex *x, int incx)
Definition: bl1_setv.c:66
Definition: blis_type_defs.h:138

References bl1_z0(), bl1_zdotaxpy(), bl1_zdotv2axpyv2b(), bl1_zsetv(), chi1, chi2, i, n_left, and n_run.

Referenced by FLA_Fused_Ahx_Ax_opt_var1(), FLA_Hess_UT_step_ofz_var2(), FLA_Hess_UT_step_ofz_var3(), and FLA_Hess_UT_step_ofz_var4().

◆ FLA_Fused_Gerc2_Ahx_Ax_opc_var1()

FLA_Error FLA_Fused_Gerc2_Ahx_Ax_opc_var1 ( int  m_A,
int  n_A,
scomplex buff_alpha,
scomplex buff_u,
int  inc_u,
scomplex buff_y,
int  inc_y,
scomplex buff_z,
int  inc_z,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_x,
int  inc_x,
scomplex buff_v,
int  inc_v,
scomplex buff_w,
int  inc_w 
)
337 {
338  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
339  int i;
340 
341  bl1_csetv( m_A,
342  buff_0,
343  buff_w, inc_w );
344 
345  for ( i = 0; i < n_A; ++i )
346  {
347  scomplex* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
348  scomplex* nu1 = buff_v + (i )*inc_v;
349  scomplex* x = buff_x;
350  scomplex* chi1 = buff_x + (i )*inc_x;
351  scomplex* psi1 = buff_y + (i )*inc_y;
352  scomplex* u = buff_u;
353  scomplex* upsilon1 = buff_u + (i )*inc_u;
354  scomplex* w = buff_w;
355  scomplex* z = buff_z;
356  scomplex* alpha = buff_alpha;
357  scomplex temp1;
358  scomplex temp2;
359  scomplex conj_psi1;
360  scomplex conj_upsilon1;
361 
362  /*------------------------------------------------------------*/
363 
364  bl1_ccopyconj( psi1, &conj_psi1 );
365  bl1_cmult3( alpha, &conj_psi1, &temp1 );
366 
367  bl1_ccopyconj( upsilon1, &conj_upsilon1 );
368  bl1_cmult3( alpha, &conj_upsilon1, &temp2 );
369 
371  m_A,
372  &temp1,
373  u, inc_u,
374  a1, rs_A );
375 /*
376  F77_caxpy( &m_A,
377  &temp1,
378  u, &inc_u,
379  a1, &rs_A );
380 */
381 
383  m_A,
384  &temp2,
385  z, inc_z,
386  a1, rs_A );
387 /*
388  F77_caxpy( &m_A,
389  &temp2,
390  z, &inc_z,
391  a1, &rs_A );
392 */
393 
395  m_A,
396  a1, rs_A,
397  x, inc_x,
398  nu1 );
399 
401  m_A,
402  chi1,
403  a1, rs_A,
404  w, inc_w );
405 /*
406  F77_caxpy( &m_A,
407  chi1,
408  a1, &rs_A,
409  w, &inc_w );
410 */
411 
412  /*------------------------------------------------------------*/
413 
414  }
415 
416  return FLA_SUCCESS;
417 }
double *restrict psi1
Definition: bl1_axmyv2.c:139
double temp2
Definition: bl1_axpyv2b.c:147
double temp1
Definition: bl1_axpyv2b.c:146
upsilon1
Definition: bl1_axpyv2bdotaxpy.c:225

References bl1_caxpyv(), bl1_cdot(), bl1_csetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, chi1, FLA_ZERO, i, psi1, temp1, temp2, and upsilon1.

Referenced by FLA_Fused_Gerc2_Ahx_Ax_opt_var1(), and FLA_Hess_UT_step_ofc_var3().

◆ FLA_Fused_Gerc2_Ahx_Ax_opd_var1()

FLA_Error FLA_Fused_Gerc2_Ahx_Ax_opd_var1 ( int  m_A,
int  n_A,
double *  buff_alpha,
double *  buff_u,
int  inc_u,
double *  buff_y,
int  inc_y,
double *  buff_z,
int  inc_z,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_x,
int  inc_x,
double *  buff_v,
int  inc_v,
double *  buff_w,
int  inc_w 
)
258 {
259  double zero = bl1_d0();
260 
261  double* restrict alpha = buff_alpha;
262  double* restrict u = buff_u;
263  double* restrict z = buff_z;
264  double* restrict x = buff_x;
265  double* restrict w = buff_w;
266 
267  double* restrict a1;
268  double* restrict nu1;
269  double* restrict chi1;
270  double* restrict psi1;
271  double* restrict upsilon1;
272 
273  double alpha_psi1;
274  double alpha_upsilon1;
275 
276  int n_run = n_A / 1;
277  //int n_left = n_A % 1;
278  int step_a1 = 1*cs_A;
279  int step_nu1 = 1*inc_v;
280  int step_chi1 = 1*inc_x;
281  int step_psi1 = 1*inc_y;
282  int step_upsilon1 = 1*inc_u;
283  int i;
284 
285  bl1_dsetv( m_A,
286  &zero,
287  buff_w, inc_w );
288 
289  a1 = buff_A;
290  nu1 = buff_v;
291  chi1 = buff_x;
292  psi1 = buff_y;
293  upsilon1 = buff_u;
294 
295  for ( i = 0; i < n_run; ++i )
296  {
297  /*------------------------------------------------------------*/
298 
299  bl1_dmult3( alpha, psi1, &alpha_psi1 );
300  bl1_dmult3( alpha, upsilon1, &alpha_upsilon1 );
301 
302  bl1_daxpyv2bdotaxpy( m_A,
303  &alpha_psi1,
304  u, inc_u,
305  &alpha_upsilon1,
306  z, inc_z,
307  a1, rs_A,
308  x, inc_x,
309  chi1,
310  nu1,
311  w, inc_w );
312 
313  /*------------------------------------------------------------*/
314 
315  a1 += step_a1;
316  nu1 += step_nu1;
317  chi1 += step_chi1;
318  psi1 += step_psi1;
319  upsilon1 += step_upsilon1;
320  }
321 
322  return FLA_SUCCESS;
323 }
void bl1_daxpyv2bdotaxpy(int n, double *beta, double *u, int inc_u, double *gamma, double *z, int inc_z, double *a, int inc_a, double *x, int inc_x, double *kappa, double *rho, double *w, int inc_w)
Definition: bl1_axpyv2bdotaxpy.c:36

References bl1_d0(), bl1_daxpyv2bdotaxpy(), bl1_dsetv(), chi1, i, n_run, psi1, and upsilon1.

Referenced by FLA_Fused_Gerc2_Ahx_Ax_opt_var1(), and FLA_Hess_UT_step_ofd_var3().

◆ FLA_Fused_Gerc2_Ahx_Ax_ops_var1()

FLA_Error FLA_Fused_Gerc2_Ahx_Ax_ops_var1 ( int  m_A,
int  n_A,
float *  buff_alpha,
float *  buff_u,
int  inc_u,
float *  buff_y,
int  inc_y,
float *  buff_z,
int  inc_z,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_x,
int  inc_x,
float *  buff_v,
int  inc_v,
float *  buff_w,
int  inc_w 
)
160 {
161  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
162  int i;
163 
164  bl1_ssetv( m_A,
165  buff_0,
166  buff_w, inc_w );
167 
168  for ( i = 0; i < n_A; ++i )
169  {
170  float* a1 = buff_A + (i )*cs_A + (0 )*rs_A;
171  float* nu1 = buff_v + (i )*inc_v;
172  float* x = buff_x;
173  float* chi1 = buff_x + (i )*inc_x;
174  float* psi1 = buff_y + (i )*inc_y;
175  float* u = buff_u;
176  float* upsilon1 = buff_u + (i )*inc_u;
177  float* w = buff_w;
178  float* z = buff_z;
179  float* alpha = buff_alpha;
180  float temp1;
181  float temp2;
182 
183  /*------------------------------------------------------------*/
184 
185  // bl1_scopyconj( psi1, &conj_psi1 );
186  // bl1_smult3( alpha, &conj_psi1, &temp1 );
187  temp1 = *alpha * *psi1;
188 
189  // bl1_scopyconj( upsilon1, &conj_upsilon1 );
190  // bl1_smult3( alpha, &conj_upsilon1, &temp2 );
191  temp2 = *alpha * *upsilon1;
192 
194  m_A,
195  &temp1,
196  u, inc_u,
197  a1, rs_A );
198 /*
199  F77_saxpy( &m_A,
200  &temp1,
201  u, &inc_u,
202  a1, &rs_A );
203 */
204 
206  m_A,
207  &temp2,
208  z, inc_z,
209  a1, rs_A );
210 /*
211  F77_saxpy( &m_A,
212  &temp2,
213  z, &inc_z,
214  a1, &rs_A );
215 */
216 
218  m_A,
219  a1, rs_A,
220  x, inc_x,
221  nu1 );
222 /*
223  *nu1 = F77_sdot( &m_A,
224  a1, &rs_A,
225  x, &inc_x );
226 */
227 
229  m_A,
230  chi1,
231  a1, rs_A,
232  w, inc_w );
233 /*
234  F77_saxpy( &m_A,
235  chi1,
236  a1, &rs_A,
237  w, &inc_w );
238 */
239  /*------------------------------------------------------------*/
240 
241  }
242 
243  return FLA_SUCCESS;
244 }

References bl1_saxpyv(), bl1_sdot(), bl1_ssetv(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, chi1, FLA_ZERO, i, psi1, temp1, temp2, and upsilon1.

Referenced by FLA_Fused_Gerc2_Ahx_Ax_opt_var1(), and FLA_Hess_UT_step_ofs_var3().

◆ FLA_Fused_Gerc2_Ahx_Ax_opz_var1()

FLA_Error FLA_Fused_Gerc2_Ahx_Ax_opz_var1 ( int  m_A,
int  n_A,
dcomplex buff_alpha,
dcomplex buff_u,
int  inc_u,
dcomplex buff_y,
int  inc_y,
dcomplex buff_z,
int  inc_z,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_x,
int  inc_x,
dcomplex buff_v,
int  inc_v,
dcomplex buff_w,
int  inc_w 
)
431 {
432  dcomplex zero = bl1_z0();
433 
434  dcomplex* restrict alpha = buff_alpha;
435  dcomplex* restrict u = buff_u;
436  dcomplex* restrict z = buff_z;
437  dcomplex* restrict x = buff_x;
438  dcomplex* restrict w = buff_w;
439 
440  dcomplex* restrict a1;
441  dcomplex* restrict nu1;
442  dcomplex* restrict chi1;
443  dcomplex* restrict psi1;
444  dcomplex* restrict upsilon1;
445 
446  dcomplex temp1;
447  dcomplex temp2;
448  dcomplex conj_psi1;
449  dcomplex conj_upsilon1;
450 
451  int n_run = n_A / 1;
452  //int n_left = n_A % 1;
453  int step_a1 = 1*cs_A;
454  int step_nu1 = 1*inc_v;
455  int step_chi1 = 1*inc_x;
456  int step_psi1 = 1*inc_y;
457  int step_upsilon1 = 1*inc_u;
458  int i;
459 
460  bl1_zsetv( m_A,
461  &zero,
462  buff_w, inc_w );
463 
464  a1 = buff_A;
465  nu1 = buff_v;
466  chi1 = buff_x;
467  psi1 = buff_y;
468  upsilon1 = buff_u;
469 
470  for ( i = 0; i < n_run; ++i )
471  {
472  /*------------------------------------------------------------*/
473 
474  bl1_zcopyconj( psi1, &conj_psi1 );
475  bl1_zmult3( alpha, &conj_psi1, &temp1 );
476 
477  bl1_zcopyconj( upsilon1, &conj_upsilon1 );
478  bl1_zmult3( alpha, &conj_upsilon1, &temp2 );
479 
480 /*
481  bl1_zaxpyv2bdotaxpy( m_A,
482  &temp1,
483  u, inc_u,
484  &temp2,
485  z, inc_z,
486  a1, rs_A,
487  x, inc_x,
488  chi1,
489  nu1,
490  w, inc_w );
491 */
492 
493  bl1_zaxpyv2b( m_A,
494  &temp1,
495  &temp2,
496  u, inc_u,
497  z, inc_z,
498  a1, rs_A );
499  bl1_zdotaxpy( m_A,
500  a1, rs_A,
501  x, inc_x,
502  chi1,
503  nu1,
504  w, inc_w );
505 
506  /*------------------------------------------------------------*/
507 
508  a1 += step_a1;
509  nu1 += step_nu1;
510  chi1 += step_chi1;
511  psi1 += step_psi1;
512  upsilon1 += step_upsilon1;
513  }
514 
515  return FLA_SUCCESS;
516 }
void bl1_zaxpyv2b(int n, dcomplex *alpha1, dcomplex *alpha2, dcomplex *x1, int inc_x1, dcomplex *x2, int inc_x2, dcomplex *y, int inc_y)
Definition: bl1_axpyv2b.c:210

References bl1_z0(), bl1_zaxpyv2b(), bl1_zdotaxpy(), bl1_zsetv(), chi1, i, n_run, psi1, temp1, temp2, and upsilon1.

Referenced by FLA_Fused_Gerc2_Ahx_Ax_opt_var1(), and FLA_Hess_UT_step_ofz_var3().

◆ FLA_Fused_Uhu_Yhu_Zhu_opc_var1()

FLA_Error FLA_Fused_Uhu_Yhu_Zhu_opc_var1 ( int  m_U,
int  n_U,
scomplex buff_delta,
scomplex buff_U,
int  rs_U,
int  cs_U,
scomplex buff_Y,
int  rs_Y,
int  cs_Y,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_t,
int  inc_t,
scomplex buff_u,
int  inc_u,
scomplex buff_y,
int  inc_y,
scomplex buff_z,
int  inc_z 
)
408 {
409  int i;
410 
411  for ( i = 0; i < n_U; ++i )
412  {
413  scomplex* u1 = buff_U + (i )*cs_U + (0 )*rs_U;
414  scomplex* y1 = buff_Y + (i )*cs_Y + (0 )*rs_Y;
415  scomplex* z1 = buff_Z + (i )*cs_Z + (0 )*rs_Z;
416  scomplex* delta = buff_delta;
417  scomplex* tau1 = buff_t + (i )*inc_t;
418  scomplex* u = buff_u;
419  scomplex* y = buff_y;
420  scomplex* z = buff_z;
421  scomplex alpha;
422  scomplex beta;
423  scomplex gamma;
424 
425  /*------------------------------------------------------------*/
426 
428  m_U,
429  u1, rs_U,
430  u, inc_u,
431  &alpha );
432 
434  m_U,
435  z1, rs_Z,
436  u, inc_u,
437  &beta );
438 
440  m_U,
441  y1, rs_Y,
442  u, inc_u,
443  &gamma );
444 
445  *tau1 = alpha;
446 
447  bl1_cscals( delta, &alpha );
448  bl1_cscals( delta, &beta );
449  bl1_cscals( delta, &gamma );
450 
452  m_U,
453  &alpha,
454  y1, rs_Y,
455  y, inc_y );
456  //F77_caxpy( &m_U,
457  // &alpha,
458  // y1, &rs_Y,
459  // y, &inc_y );
460 
462  m_U,
463  &beta,
464  u1, rs_U,
465  y, inc_y );
466  //F77_caxpy( &m_U,
467  // &beta,
468  // u1, &rs_U,
469  // y, &inc_y );
470 
472  m_U,
473  &alpha,
474  z1, rs_Z,
475  z, inc_z );
476  //F77_caxpy( &m_U,
477  // &alpha,
478  // z1, &rs_Z,
479  // z, &inc_z );
480 
482  m_U,
483  &gamma,
484  u1, rs_U,
485  z, inc_z );
486  //F77_caxpy( &m_U,
487  // &gamma,
488  // u1, &rs_U,
489  // z, &inc_z );
490 
491  /*------------------------------------------------------------*/
492 
493  }
494 
495  return FLA_SUCCESS;
496 }
double *restrict z1
Definition: bl1_dotsv2.c:148
double *restrict y1
Definition: bl1_dotsv2.c:145

References bl1_caxpyv(), bl1_cdot(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, i, y1, and z1.

Referenced by FLA_Fused_Uhu_Yhu_Zhu_opt_var1(), and FLA_Hess_UT_step_ofc_var4().

◆ FLA_Fused_Uhu_Yhu_Zhu_opd_var1()

FLA_Error FLA_Fused_Uhu_Yhu_Zhu_opd_var1 ( int  m_U,
int  n_U,
double *  buff_delta,
double *  buff_U,
int  rs_U,
int  cs_U,
double *  buff_Y,
int  rs_Y,
int  cs_Y,
double *  buff_Z,
int  rs_Z,
int  cs_Z,
double *  buff_t,
int  inc_t,
double *  buff_u,
int  inc_u,
double *  buff_y,
int  inc_y,
double *  buff_z,
int  inc_z 
)
280 {
281  double zero = bl1_d0();
282 
283  double* restrict delta = buff_delta;
284  double* restrict u = buff_u;
285  double* restrict y = buff_y;
286  double* restrict z = buff_z;
287 
288  double* restrict u1;
289  double* restrict y1;
290  double* restrict z1;
291  double* restrict upsilon1;
292  double* restrict tau1;
293 
294  double alpha;
295  double beta;
296  double gamma;
297 
298  int i;
299 
300  int n_run = n_U / 1;
301  //int n_left = n_U % 1;
302  int step_u1 = 1*cs_U;
303  int step_y1 = 1*cs_Y;
304  int step_z1 = 1*cs_Z;
305  int step_upsilon1 = 1*inc_u;
306  int step_tau1 = 1*inc_t;
307 
308  u1 = buff_U;
309  y1 = buff_Y;
310  z1 = buff_Z;
311  upsilon1 = buff_u;
312  tau1 = buff_t;
313 
314  for ( i = 0; i < n_run; ++i )
315  {
316  /*------------------------------------------------------------*/
317 
318 /*
319  bl1_ddotsv3( BLIS1_CONJUGATE,
320  m_U,
321  u1, rs_U,
322  z1, rs_Z,
323  y1, rs_Y,
324  u, inc_u,
325  &zero,
326  &alpha,
327  &beta,
328  &gamma );
329 
330  *tau1 = alpha;
331 
332  bl1_dscals( delta, &alpha );
333  bl1_dscals( delta, &beta );
334  bl1_dscals( delta, &gamma );
335 
336  bl1_daxpyv2b( m_U,
337  &alpha,
338  &beta,
339  y1, rs_Y,
340  u1, rs_U,
341  y, inc_y );
342  bl1_daxpyv2b( m_U,
343  &alpha,
344  &gamma,
345  z1, rs_Z,
346  u1, rs_U,
347  z, inc_z );
348 */
349 
351  m_U,
352  y1, rs_Y,
353  z1, rs_Z,
354  u, inc_u,
355  &zero,
356  &beta,
357  &gamma );
358 
359  bl1_ddotaxmyv2( m_U,
360  &gamma,
361  &beta,
362  u1, rs_U,
363  u, inc_u,
364  &alpha,
365  y, inc_y,
366  z, inc_z );
367 
368  *tau1 = alpha;
369 
370  bl1_dscals( delta, &alpha );
372  m_U,
373  &alpha,
374  y1, rs_Y,
375  y, inc_y );
377  m_U,
378  &alpha,
379  z1, rs_Z,
380  z, inc_z );
381 
382 
383  /*------------------------------------------------------------*/
384 
385  u1 += step_u1;
386  y1 += step_y1;
387  z1 += step_z1;
388  upsilon1 += step_upsilon1;
389  tau1 += step_tau1;
390  }
391 
392 
393  return FLA_SUCCESS;
394 }
void bl1_daxpyv(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy)
Definition: bl1_axpyv.c:21
void bl1_ddotaxmyv2(int n, double *alpha, double *beta, double *x, int inc_x, double *u, int inc_u, double *rho, double *y, int inc_y, double *z, int inc_z)
Definition: bl1_dotaxmyv2.c:34
void bl1_ddotsv2(conj1_t conjxy, int n, double *x, int inc_x, double *y, int inc_y, double *z, int inc_z, double *beta, double *rho_xz, double *rho_yz)
Definition: bl1_dotsv2.c:35

References bl1_d0(), bl1_daxpyv(), bl1_ddotaxmyv2(), bl1_ddotsv2(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, i, n_run, upsilon1, y1, and z1.

Referenced by FLA_Fused_Uhu_Yhu_Zhu_opt_var1(), and FLA_Hess_UT_step_ofd_var4().

◆ FLA_Fused_Uhu_Yhu_Zhu_ops_var1()

FLA_Error FLA_Fused_Uhu_Yhu_Zhu_ops_var1 ( int  m_U,
int  n_U,
float *  buff_delta,
float *  buff_U,
int  rs_U,
int  cs_U,
float *  buff_Y,
int  rs_Y,
int  cs_Y,
float *  buff_Z,
int  rs_Z,
int  cs_Z,
float *  buff_t,
int  inc_t,
float *  buff_u,
int  inc_u,
float *  buff_y,
int  inc_y,
float *  buff_z,
int  inc_z 
)
166 {
167  int i;
168 
169  for ( i = 0; i < n_U; ++i )
170  {
171  float* u1 = buff_U + (i )*cs_U + (0 )*rs_U;
172  float* y1 = buff_Y + (i )*cs_Y + (0 )*rs_Y;
173  float* z1 = buff_Z + (i )*cs_Z + (0 )*rs_Z;
174  float* delta = buff_delta;
175  float* tau1 = buff_t + (i )*inc_t;
176  float* u = buff_u;
177  float* y = buff_y;
178  float* z = buff_z;
179  float alpha;
180  float beta;
181  float gamma;
182 
183  /*------------------------------------------------------------*/
184 
186  m_U,
187  u1, rs_U,
188  u, inc_u,
189  &alpha );
190  //alpha = F77_sdot( &m_U,
191  // u1, &rs_U,
192  // u, &inc_u );
193 
195  m_U,
196  z1, rs_Z,
197  u, inc_u,
198  &beta );
199  //beta = F77_sdot( &m_U,
200  // z1, &rs_Z,
201  // u, &inc_u );
202 
204  m_U,
205  y1, rs_Y,
206  u, inc_u,
207  &gamma );
208  //gamma = F77_sdot( &m_U,
209  // y1, &rs_Y,
210  // u, &inc_u );
211 
212  *tau1 = alpha;
213 
214  // bl1_sscals( delta, &alpha );
215  // bl1_sscals( delta, &beta );
216  // bl1_sscals( delta, &gamma );
217  alpha *= *delta;
218  beta *= *delta;
219  gamma *= *delta;
220 
222  m_U,
223  &alpha,
224  y1, rs_Y,
225  y, inc_y );
226  //F77_saxpy( &m_U,
227  // &alpha,
228  // y1, &rs_Y,
229  // y, &inc_y );
230 
232  m_U,
233  &beta,
234  u1, rs_U,
235  y, inc_y );
236  //F77_saxpy( &m_U,
237  // &beta,
238  // u1, &rs_U,
239  // y, &inc_y );
240 
242  m_U,
243  &alpha,
244  z1, rs_Z,
245  z, inc_z );
246  //F77_saxpy( &m_U,
247  // &alpha,
248  // z1, &rs_Z,
249  // z, &inc_z );
250 
252  m_U,
253  &gamma,
254  u1, rs_U,
255  z, inc_z );
256  //F77_saxpy( &m_U,
257  // &gamma,
258  // u1, &rs_U,
259  // z, &inc_z );
260 
261  /*------------------------------------------------------------*/
262 
263  }
264 
265  return FLA_SUCCESS;
266 }

References bl1_saxpyv(), bl1_sdot(), BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, i, y1, and z1.

Referenced by FLA_Fused_Uhu_Yhu_Zhu_opt_var1(), and FLA_Hess_UT_step_ofs_var4().

◆ FLA_Fused_Uhu_Yhu_Zhu_opz_var1()

FLA_Error FLA_Fused_Uhu_Yhu_Zhu_opz_var1 ( int  m_U,
int  n_U,
dcomplex buff_delta,
dcomplex buff_U,
int  rs_U,
int  cs_U,
dcomplex buff_Y,
int  rs_Y,
int  cs_Y,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_t,
int  inc_t,
dcomplex buff_u,
int  inc_u,
dcomplex buff_y,
int  inc_y,
dcomplex buff_z,
int  inc_z 
)
510 {
511  dcomplex zero = bl1_z0();
512 
513  dcomplex* restrict delta = buff_delta;
514  dcomplex* restrict u = buff_u;
515  dcomplex* restrict y = buff_y;
516  dcomplex* restrict z = buff_z;
517 
518  dcomplex* restrict u1;
519  dcomplex* restrict y1;
520  dcomplex* restrict z1;
521  dcomplex* restrict upsilon1;
522  dcomplex* restrict tau1;
523 
524  dcomplex alpha;
525  dcomplex beta;
526  dcomplex gamma;
527 
528  int i;
529 
530  int n_run = n_U / 1;
531  //int n_left = n_U % 1;
532  int step_u1 = 1*cs_U;
533  int step_y1 = 1*cs_Y;
534  int step_z1 = 1*cs_Z;
535  int step_upsilon1 = 1*inc_u;
536  int step_tau1 = 1*inc_t;
537 
538  u1 = buff_U;
539  y1 = buff_Y;
540  z1 = buff_Z;
541  upsilon1 = buff_u;
542  tau1 = buff_t;
543 
544  for ( i = 0; i < n_run; ++i )
545  {
546  /*------------------------------------------------------------*/
547 
548 
550  m_U,
551  u1, rs_U,
552  z1, rs_Z,
553  y1, rs_Y,
554  u, inc_u,
555  &zero,
556  &alpha,
557  &beta,
558  &gamma );
559 
560  *tau1 = alpha;
561 
562  bl1_zscals( delta, &alpha );
563  bl1_zscals( delta, &beta );
564  bl1_zscals( delta, &gamma );
565 
566  bl1_zaxpyv2b( m_U,
567  &alpha,
568  &beta,
569  y1, rs_Y,
570  u1, rs_U,
571  y, inc_y );
572  bl1_zaxpyv2b( m_U,
573  &alpha,
574  &gamma,
575  z1, rs_Z,
576  u1, rs_U,
577  z, inc_z );
578 
579 
580 /*
581  bl1_zdotsv2( BLIS1_CONJUGATE,
582  m_U,
583  y1, rs_Y,
584  z1, rs_Z,
585  u, inc_u,
586  &zero,
587  &beta,
588  &gamma );
589 
590  bl1_zdotaxmyv2( m_U,
591  &gamma,
592  &beta,
593  u1, rs_U,
594  u, inc_u,
595  &alpha,
596  y, inc_y,
597  z, inc_z );
598 
599  *tau1 = alpha;
600 
601  bl1_zscals( delta, &alpha );
602  bl1_zaxpyv( BLIS1_NO_CONJUGATE,
603  m_U,
604  &alpha,
605  y1, rs_Y,
606  y, inc_y );
607  bl1_zaxpyv( BLIS1_NO_CONJUGATE,
608  m_U,
609  &alpha,
610  z1, rs_Z,
611  z, inc_z );
612 */
613 
614  /*------------------------------------------------------------*/
615 
616  u1 += step_u1;
617  y1 += step_y1;
618  z1 += step_z1;
619  upsilon1 += step_upsilon1;
620  tau1 += step_tau1;
621  }
622 
623  return FLA_SUCCESS;
624 }
bl1_zscals(beta, rho_yz)
void bl1_zdotsv3(conj1_t conjxyw, int n, dcomplex *x, int inc_x, dcomplex *y, int inc_y, dcomplex *w, int inc_w, dcomplex *z, int inc_z, dcomplex *beta, dcomplex *rho_xz, dcomplex *rho_yz, dcomplex *rho_wz)
Definition: bl1_dotsv3.c:290

References bl1_z0(), bl1_zaxpyv2b(), bl1_zdotsv3(), bl1_zscals(), BLIS1_CONJUGATE, i, n_run, upsilon1, y1, and z1.

Referenced by FLA_Fused_Uhu_Yhu_Zhu_opt_var1(), and FLA_Hess_UT_step_ofz_var4().

◆ FLA_Hess_UT_blf_var2()

FLA_Error FLA_Hess_UT_blf_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  FLA_Obj ATL, ATR, A00, A01, A02,
16  ABL, ABR, A10, A11, A12,
17  A20, A21, A22;
18  FLA_Obj UT, U0,
19  UB, U1,
20  U2;
21  FLA_Obj ZT, Z0,
22  ZB, Z1,
23  Z2;
24  FLA_Obj TL, TR, T0, T1, T2;
25 
26  FLA_Obj U, Z;
27  FLA_Obj ABR_l;
28  FLA_Obj UB_l;
29  FLA_Obj T1_tl;
30  FLA_Obj WT_l;
31  FLA_Obj none, none2, none3;
32  FLA_Obj UB_tl,
33  UB_bl;
34  FLA_Datatype datatype_A;
35  dim_t m_A;
36  dim_t b_alg, b, bb;
37 
38  b_alg = FLA_Obj_length( T );
39 
40  datatype_A = FLA_Obj_datatype( A );
41  m_A = FLA_Obj_length( A );
42 
43  FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &U );
44  FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &Z );
45 
46  FLA_Part_2x2( A, &ATL, &ATR,
47  &ABL, &ABR, 0, 0, FLA_TL );
48  FLA_Part_2x1( U, &UT,
49  &UB, 0, FLA_TOP );
50  FLA_Part_2x1( Z, &ZT,
51  &ZB, 0, FLA_TOP );
52  FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
53 
54  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) )
55  {
56  b = min( FLA_Obj_length( ABR ), b_alg );
57 
58  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
59  /* ************* */ /* ******************** */
60  &A10, /**/ &A11, &A12,
61  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
62  b, b, FLA_BR );
63  FLA_Repart_2x1_to_3x1( UT, &U0,
64  /* ** */ /* ** */
65  &U1,
66  UB, &U2, b, FLA_BOTTOM );
67  FLA_Repart_2x1_to_3x1( ZT, &Z0,
68  /* ** */ /* ** */
69  &Z1,
70  ZB, &Z2, b, FLA_BOTTOM );
71  FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &T2,
72  b, FLA_RIGHT );
73 
74  /*------------------------------------------------------------*/
75 
76  FLA_Part_2x2( T1, &T1_tl, &none,
77  &none2, &none3, b, b, FLA_TL );
78 
79  bb = min( FLA_Obj_length( ABR ) - 1, b_alg );
80 
81  FLA_Part_1x2( ABR, &ABR_l, &none, bb, FLA_LEFT );
82  FLA_Part_1x2( UB, &UB_l, &none, bb, FLA_LEFT );
83 
84  // [ ABR, T1 ] = FLA_Hess_UT_step_unb_var2( ABR, T1, b );
85  //FLA_Hess_UT_step_unb_var2( ABR, T1_tl );
86  FLA_Hess_UT_step_ofu_var2( ABR, T1_tl );
87  //FLA_Hess_UT_step_opt_var2( ABR, T1_tl );
88 
89  // Build UB from ABR, with explicit unit subdiagonal and zeros.
90  FLA_Copy_external( ABR_l, UB_l );
91  FLA_Part_2x1( UB_l, &UB_tl,
92  &UB_bl, 1, FLA_TOP );
93  FLA_Triangularize( FLA_LOWER_TRIANGULAR, FLA_UNIT_DIAG, UB_bl );
94  FLA_Set( FLA_ZERO, UB_tl );
95 
96  // ATR = ATR - ATR * UB * inv( triu( T1 ) ) * UB' );
97  if ( FLA_Obj_length( ATR ) > 0 )
98  {
99  // NOTE: We use ZT as temporary workspace.
100  FLA_Part_1x2( ZT, &WT_l, &none, bb, FLA_LEFT );
101  FLA_Part_2x2( T1, &T1_tl, &none,
102  &none2, &none3, bb, bb, FLA_TL );
103 
104  // WT_l = ATR * UB_l * inv( triu( T1 ) ).
105  FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
106  FLA_ONE, ATR, UB_l, FLA_ZERO, WT_l );
107  FLA_Trsm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR,
108  FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_ONE, T1_tl, WT_l );
109 
110  // ATR = ATR - WT_l * UB_l'
111  FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
112  FLA_MINUS_ONE, WT_l, UB_l, FLA_ONE, ATR );
113  }
114 
115  /*------------------------------------------------------------*/
116 
117  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
118  A10, A11, /**/ A12,
119  /* ************** */ /* ****************** */
120  &ABL, /**/ &ABR, A20, A21, /**/ A22,
121  FLA_TL );
122  FLA_Cont_with_3x1_to_2x1( &UT, U0,
123  U1,
124  /* ** */ /* ** */
125  &UB, U2, FLA_TOP );
126  FLA_Cont_with_3x1_to_2x1( &ZT, Z0,
127  Z1,
128  /* ** */ /* ** */
129  &ZB, Z2, FLA_TOP );
130  FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ T2,
131  FLA_LEFT );
132  }
133 
134  FLA_Obj_free( &U );
135  FLA_Obj_free( &Z );
136 
137  return FLA_SUCCESS;
138 }
FLA_Error FLA_Hess_UT_step_ofu_var2(FLA_Obj A, FLA_Obj T)
Definition: FLA_Hess_UT_fus_var2.c:18
FLA_Error FLA_Copy_external(FLA_Obj A, FLA_Obj B)
Definition: FLA_Copy_external.c:13
FLA_Error FLA_Gemm_external(FLA_Trans transa, FLA_Trans transb, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, FLA_Obj beta, FLA_Obj C)
Definition: FLA_Gemm_external.c:13
FLA_Error FLA_Trsm_external(FLA_Side side, FLA_Uplo uplo, FLA_Trans trans, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj B)
Definition: FLA_Trsm_external.c:13
FLA_Obj FLA_MINUS_ONE
Definition: FLA_Init.c:22
FLA_Obj FLA_ONE
Definition: FLA_Init.c:18
FLA_Error FLA_Cont_with_3x3_to_2x2(FLA_Obj *ATL, FLA_Obj *ATR, FLA_Obj A00, FLA_Obj A01, FLA_Obj A02, FLA_Obj A10, FLA_Obj A11, FLA_Obj A12, FLA_Obj *ABL, FLA_Obj *ABR, FLA_Obj A20, FLA_Obj A21, FLA_Obj A22, FLA_Quadrant quadrant)
Definition: FLA_View.c:304
FLA_Error FLA_Part_2x2(FLA_Obj A, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:17
FLA_Error FLA_Cont_with_3x1_to_2x1(FLA_Obj *AT, FLA_Obj A0, FLA_Obj A1, FLA_Obj *AB, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:428
FLA_Error FLA_Repart_2x1_to_3x1(FLA_Obj AT, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj AB, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:226
FLA_Error FLA_Cont_with_1x3_to_1x2(FLA_Obj *AL, FLA_Obj *AR, FLA_Obj A0, FLA_Obj A1, FLA_Obj A2, FLA_Side side)
Definition: FLA_View.c:475
FLA_Error FLA_Obj_create(FLA_Datatype datatype, dim_t m, dim_t n, dim_t rs, dim_t cs, FLA_Obj *obj)
Definition: FLA_Obj.c:55
FLA_Error FLA_Part_1x2(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:110
FLA_Error FLA_Part_2x1(FLA_Obj A, FLA_Obj *A1, FLA_Obj *A2, dim_t mb, FLA_Side side)
Definition: FLA_View.c:76
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
FLA_Error FLA_Repart_2x2_to_3x3(FLA_Obj ATL, FLA_Obj ATR, FLA_Obj *A00, FLA_Obj *A01, FLA_Obj *A02, FLA_Obj *A10, FLA_Obj *A11, FLA_Obj *A12, FLA_Obj ABL, FLA_Obj ABR, FLA_Obj *A20, FLA_Obj *A21, FLA_Obj *A22, dim_t mb, dim_t nb, FLA_Quadrant quadrant)
Definition: FLA_View.c:142
FLA_Error FLA_Repart_1x2_to_1x3(FLA_Obj AL, FLA_Obj AR, FLA_Obj *A0, FLA_Obj *A1, FLA_Obj *A2, dim_t nb, FLA_Side side)
Definition: FLA_View.c:267
FLA_Error FLA_Obj_free(FLA_Obj *obj)
Definition: FLA_Obj.c:588
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
int FLA_Datatype
Definition: FLA_type_defs.h:49
unsigned long dim_t
Definition: FLA_type_defs.h:71
FLA_Error FLA_Triangularize(FLA_Uplo uplo, FLA_Diag diag, FLA_Obj A)
Definition: FLA_Triangularize.c:13
FLA_Error FLA_Set(FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Set.c:13
Definition: FLA_type_defs.h:159

References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy_external(), FLA_Gemm_external(), FLA_Hess_UT_step_ofu_var2(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Set(), FLA_Triangularize(), FLA_Trsm_external(), and FLA_ZERO.

Referenced by FLA_Hess_UT_internal().

◆ FLA_Hess_UT_blf_var3()

FLA_Error FLA_Hess_UT_blf_var3 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  FLA_Obj ATL, ATR, A00, A01, A02,
16  ABL, ABR, A10, A11, A12,
17  A20, A21, A22;
18  FLA_Obj UT, U0,
19  UB, U1,
20  U2;
21  FLA_Obj ZT, Z0,
22  ZB, Z1,
23  Z2;
24  FLA_Obj TL, TR, T0, T1, T2;
25 
26  FLA_Obj U, Z;
27  FLA_Obj ABR_l;
28  FLA_Obj UB_l;
29  FLA_Obj WT_l;
30  FLA_Obj T1_tl;
31  FLA_Obj none, none2, none3;
32  FLA_Obj UB_tl,
33  UB_bl;
34  FLA_Datatype datatype_A;
35  dim_t m_A;
36  dim_t b_alg, b, bb;
37 
38  b_alg = FLA_Obj_length( T );
39 
40  datatype_A = FLA_Obj_datatype( A );
41  m_A = FLA_Obj_length( A );
42 
43  FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &U );
44  FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &Z );
45 
46  FLA_Part_2x2( A, &ATL, &ATR,
47  &ABL, &ABR, 0, 0, FLA_TL );
48  FLA_Part_2x1( U, &UT,
49  &UB, 0, FLA_TOP );
50  FLA_Part_2x1( Z, &ZT,
51  &ZB, 0, FLA_TOP );
52  FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
53 
54  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) )
55  {
56  b = min( FLA_Obj_length( ABR ), b_alg );
57 
58  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
59  /* ************* */ /* ******************** */
60  &A10, /**/ &A11, &A12,
61  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
62  b, b, FLA_BR );
63  FLA_Repart_2x1_to_3x1( UT, &U0,
64  /* ** */ /* ** */
65  &U1,
66  UB, &U2, b, FLA_BOTTOM );
67  FLA_Repart_2x1_to_3x1( ZT, &Z0,
68  /* ** */ /* ** */
69  &Z1,
70  ZB, &Z2, b, FLA_BOTTOM );
71  FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &T2,
72  b, FLA_RIGHT );
73 
74  /*------------------------------------------------------------*/
75 
76  FLA_Part_2x2( T1, &T1_tl, &none,
77  &none2, &none3, b, b, FLA_TL );
78 
79  bb = min( FLA_Obj_length( ABR ) - 1, b_alg );
80 
81  FLA_Part_1x2( ABR, &ABR_l, &none, bb, FLA_LEFT );
82  FLA_Part_1x2( UB, &UB_l, &none, bb, FLA_LEFT );
83 
84  // [ ABR, T1 ] = FLA_Hess_UT_step_unb_var3( ABR, T1, b );
85  //FLA_Hess_UT_step_unb_var3( ABR, T1_tl );
86  FLA_Hess_UT_step_ofu_var3( ABR, T1_tl );
87  //FLA_Hess_UT_step_opt_var3( ABR, T1_tl );
88 
89  // Build UB from ABR, with explicit unit subdiagonal and zeros.
90  FLA_Copy_external( ABR_l, UB_l );
91  FLA_Part_2x1( UB_l, &UB_tl,
92  &UB_bl, 1, FLA_TOP );
93  FLA_Triangularize( FLA_LOWER_TRIANGULAR, FLA_UNIT_DIAG, UB_bl );
94  FLA_Set( FLA_ZERO, UB_tl );
95 
96  // ATR = ATR - ATR * UB * inv( triu( T1 ) ) * UB' );
97  if ( FLA_Obj_length( ATR ) > 0 )
98  {
99  // NOTE: We use ZT as temporary workspace.
100  FLA_Part_1x2( ZT, &WT_l, &none, bb, FLA_LEFT );
101  FLA_Part_2x2( T1, &T1_tl, &none,
102  &none2, &none3, bb, bb, FLA_TL );
103 
104  // WT_l = ATR * UB_l * inv( triu( T1 ) ).
105  FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
106  FLA_ONE, ATR, UB_l, FLA_ZERO, WT_l );
107  FLA_Trsm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR,
108  FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_ONE, T1_tl, WT_l );
109 
110  // ATR = ATR - WT_l * UB_l'
111  FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
112  FLA_MINUS_ONE, WT_l, UB_l, FLA_ONE, ATR );
113  }
114 
115  /*------------------------------------------------------------*/
116 
117  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
118  A10, A11, /**/ A12,
119  /* ************** */ /* ****************** */
120  &ABL, /**/ &ABR, A20, A21, /**/ A22,
121  FLA_TL );
122  FLA_Cont_with_3x1_to_2x1( &UT, U0,
123  U1,
124  /* ** */ /* ** */
125  &UB, U2, FLA_TOP );
126  FLA_Cont_with_3x1_to_2x1( &ZT, Z0,
127  Z1,
128  /* ** */ /* ** */
129  &ZB, Z2, FLA_TOP );
130  FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ T2,
131  FLA_LEFT );
132  }
133 
134  FLA_Obj_free( &U );
135  FLA_Obj_free( &Z );
136 
137  return FLA_SUCCESS;
138 }
FLA_Error FLA_Hess_UT_step_ofu_var3(FLA_Obj A, FLA_Obj T)
Definition: FLA_Hess_UT_fus_var3.c:18

References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy_external(), FLA_Gemm_external(), FLA_Hess_UT_step_ofu_var3(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Set(), FLA_Triangularize(), FLA_Trsm_external(), and FLA_ZERO.

Referenced by FLA_Hess_UT_internal().

◆ FLA_Hess_UT_blf_var4()

FLA_Error FLA_Hess_UT_blf_var4 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  FLA_Obj ATL, ATR, A00, A01, A02,
16  ABL, ABR, A10, A11, A12,
17  A20, A21, A22;
18  FLA_Obj UT, U0,
19  UB, U1,
20  U2;
21  FLA_Obj YT, Y0,
22  YB, Y1,
23  Y2;
24  FLA_Obj ZT, Z0,
25  ZB, Z1,
26  Z2;
27  FLA_Obj TL, TR, T0, T1, T2;
28 
29  FLA_Obj U, Y, Z;
30  FLA_Obj ABR_l;
31  FLA_Obj UB_l, U2_l;
32  FLA_Obj YB_l, Y2_l;
33  FLA_Obj ZB_l, Z2_l;
34  FLA_Obj WT_l;
35  FLA_Obj T1_tl;
36  FLA_Obj none, none2, none3;
37  FLA_Obj UB_tl,
38  UB_bl;
39  FLA_Datatype datatype_A;
40  dim_t m_A;
41  dim_t b_alg, b, bb;
42 
43  b_alg = FLA_Obj_length( T );
44 
45  datatype_A = FLA_Obj_datatype( A );
46  m_A = FLA_Obj_length( A );
47 
48  FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &U );
49  FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &Y );
50  FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &Z );
51 
52  FLA_Part_2x2( A, &ATL, &ATR,
53  &ABL, &ABR, 0, 0, FLA_TL );
54  FLA_Part_2x1( U, &UT,
55  &UB, 0, FLA_TOP );
56  FLA_Part_2x1( Y, &YT,
57  &YB, 0, FLA_TOP );
58  FLA_Part_2x1( Z, &ZT,
59  &ZB, 0, FLA_TOP );
60  FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
61 
62  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) )
63  {
64  b = min( FLA_Obj_length( ABR ), b_alg );
65 
66  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
67  /* ************* */ /* ******************** */
68  &A10, /**/ &A11, &A12,
69  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
70  b, b, FLA_BR );
71  FLA_Repart_2x1_to_3x1( UT, &U0,
72  /* ** */ /* ** */
73  &U1,
74  UB, &U2, b, FLA_BOTTOM );
75  FLA_Repart_2x1_to_3x1( YT, &Y0,
76  /* ** */ /* ** */
77  &Y1,
78  YB, &Y2, b, FLA_BOTTOM );
79  FLA_Repart_2x1_to_3x1( ZT, &Z0,
80  /* ** */ /* ** */
81  &Z1,
82  ZB, &Z2, b, FLA_BOTTOM );
83  FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &T2,
84  b, FLA_RIGHT );
85 
86  /*------------------------------------------------------------*/
87 
88  FLA_Part_2x2( T1, &T1_tl, &none,
89  &none2, &none3, b, b, FLA_TL );
90 
91  bb = min( FLA_Obj_length( ABR ) - 1, b_alg );
92 
93  FLA_Part_1x2( ABR, &ABR_l, &none, bb, FLA_LEFT );
94  FLA_Part_1x2( UB, &UB_l, &none, bb, FLA_LEFT );
95  FLA_Part_1x2( YB, &YB_l, &none, bb, FLA_LEFT );
96  FLA_Part_1x2( ZB, &ZB_l, &none, bb, FLA_LEFT );
97 
98  FLA_Part_2x1( UB_l, &none,
99  &U2_l, b, FLA_TOP );
100  FLA_Part_2x1( YB_l, &none,
101  &Y2_l, b, FLA_TOP );
102  FLA_Part_2x1( ZB_l, &none,
103  &Z2_l, b, FLA_TOP );
104 
105  // [ ABR, YB, ZB, T1 ] = FLA_Hess_UT_step_unb_var4( ABR, YB, ZB, T1, b );
106  //FLA_Hess_UT_step_unb_var4( ABR, YB, ZB, T1_tl );
107  FLA_Hess_UT_step_ofu_var4( ABR, YB, ZB, T1_tl );
108  //FLA_Hess_UT_step_opt_var4( ABR, YB, ZB, T1_tl );
109 
110  // Build UB from ABR, with explicit unit subdiagonal and zeros.
111  FLA_Copy_external( ABR_l, UB_l );
112  FLA_Part_2x1( UB_l, &UB_tl,
113  &UB_bl, 1, FLA_TOP );
114  FLA_Triangularize( FLA_LOWER_TRIANGULAR, FLA_UNIT_DIAG, UB_bl );
115  FLA_Set( FLA_ZERO, UB_tl );
116 
117  // ATR = ATR - ATR * UB * inv( triu( T ) ) * UB' );
118  if ( FLA_Obj_length( ATR ) > 0 )
119  {
120  // NOTE: We use ZT as temporary workspace.
121  FLA_Part_1x2( ZT, &WT_l, &none, bb, FLA_LEFT );
122  FLA_Part_2x2( T1, &T1_tl, &none,
123  &none2, &none3, bb, bb, FLA_TL );
124 
125  // WT_l = ATR * UB_l * inv( triu( T ) ).
126  FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
127  FLA_ONE, ATR, UB_l, FLA_ZERO, WT_l );
128  FLA_Trsm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR,
129  FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_ONE, T1_tl, WT_l );
130 
131  // ATR = ATR - WT_l * UB_l'
132  FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
133  FLA_MINUS_ONE, WT_l, UB_l, FLA_ONE, ATR );
134  }
135 
136  // A22 = A22 - U2 * Y2' - Z2 * U2';
137  FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
138  FLA_MINUS_ONE, U2_l, Y2_l, FLA_ONE, A22 );
139  FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
140  FLA_MINUS_ONE, Z2_l, U2_l, FLA_ONE, A22 );
141 
142  /*------------------------------------------------------------*/
143 
144  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
145  A10, A11, /**/ A12,
146  /* ************** */ /* ****************** */
147  &ABL, /**/ &ABR, A20, A21, /**/ A22,
148  FLA_TL );
149  FLA_Cont_with_3x1_to_2x1( &UT, U0,
150  U1,
151  /* ** */ /* ** */
152  &UB, U2, FLA_TOP );
153  FLA_Cont_with_3x1_to_2x1( &YT, Y0,
154  Y1,
155  /* ** */ /* ** */
156  &YB, Y2, FLA_TOP );
157  FLA_Cont_with_3x1_to_2x1( &ZT, Z0,
158  Z1,
159  /* ** */ /* ** */
160  &ZB, Z2, FLA_TOP );
161  FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ T2,
162  FLA_LEFT );
163  }
164 
165  FLA_Obj_free( &U );
166  FLA_Obj_free( &Y );
167  FLA_Obj_free( &Z );
168 
169  return FLA_SUCCESS;
170 }
FLA_Error FLA_Hess_UT_step_ofu_var4(FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T)
Definition: FLA_Hess_UT_fus_var4.c:29

References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy_external(), FLA_Gemm_external(), FLA_Hess_UT_step_ofu_var4(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Set(), FLA_Triangularize(), FLA_Trsm_external(), and FLA_ZERO.

Referenced by FLA_Hess_UT_internal().

◆ FLA_Hess_UT_blk_var1()

FLA_Error FLA_Hess_UT_blk_var1 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  FLA_Obj ATL, ATR, A00, A01, A02,
16  ABL, ABR, A10, A11, A12,
17  A20, A21, A22;
18  FLA_Obj UT, U0,
19  UB, U1,
20  U2;
21  FLA_Obj ZT, Z0,
22  ZB, Z1,
23  Z2;
24  FLA_Obj TL, TR, T0, T1, T2;
25 
26  FLA_Obj U, Z;
27  FLA_Obj ABR_l;
28  FLA_Obj UB_l;
29  FLA_Obj T1_tl;
30  FLA_Obj WT_l;
31  FLA_Obj none, none2, none3;
32  FLA_Obj UB_tl,
33  UB_bl;
34  FLA_Datatype datatype_A;
35  dim_t m_A;
36  dim_t b_alg, b, bb;
37 
38  b_alg = FLA_Obj_length( T );
39 
40  datatype_A = FLA_Obj_datatype( A );
41  m_A = FLA_Obj_length( A );
42 
43  FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &U );
44  FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &Z );
45 
46  FLA_Part_2x2( A, &ATL, &ATR,
47  &ABL, &ABR, 0, 0, FLA_TL );
48  FLA_Part_2x1( U, &UT,
49  &UB, 0, FLA_TOP );
50  FLA_Part_2x1( Z, &ZT,
51  &ZB, 0, FLA_TOP );
52  FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
53 
54  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) )
55  {
56  b = min( FLA_Obj_length( ABR ), b_alg );
57 
58  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
59  /* ************* */ /* ******************** */
60  &A10, /**/ &A11, &A12,
61  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
62  b, b, FLA_BR );
63  FLA_Repart_2x1_to_3x1( UT, &U0,
64  /* ** */ /* ** */
65  &U1,
66  UB, &U2, b, FLA_BOTTOM );
67  FLA_Repart_2x1_to_3x1( ZT, &Z0,
68  /* ** */ /* ** */
69  &Z1,
70  ZB, &Z2, b, FLA_BOTTOM );
71  FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &T2,
72  b, FLA_RIGHT );
73 
74  /*------------------------------------------------------------*/
75 
76  FLA_Part_2x2( T1, &T1_tl, &none,
77  &none2, &none3, b, b, FLA_TL );
78 
79  bb = min( FLA_Obj_length( ABR ) - 1, b_alg );
80 
81  FLA_Part_1x2( ABR, &ABR_l, &none, bb, FLA_LEFT );
82  FLA_Part_1x2( UB, &UB_l, &none, bb, FLA_LEFT );
83 
84  // [ ABR, T1 ] = FLA_Hess_UT_step_unb_var1( ABR, T1, b );
85  //FLA_Hess_UT_step_unb_var1( ABR, T1_tl );
86  FLA_Hess_UT_step_opt_var1( ABR, T1_tl );
87 
88  // Build UB from ABR, with explicit unit subdiagonal and zeros.
89  FLA_Copy_external( ABR_l, UB_l );
90  FLA_Part_2x1( UB_l, &UB_tl,
91  &UB_bl, 1, FLA_TOP );
92  FLA_Triangularize( FLA_LOWER_TRIANGULAR, FLA_UNIT_DIAG, UB_bl );
93  FLA_Set( FLA_ZERO, UB_tl );
94 
95  // ATR = ATR - ATR * UB * inv( triu( T1 ) ) * UB' );
96  if ( FLA_Obj_length( ATR ) > 0 )
97  {
98  // NOTE: We use ZT as temporary workspace.
99  FLA_Part_1x2( ZT, &WT_l, &none, bb, FLA_LEFT );
100  FLA_Part_2x2( T1, &T1_tl, &none,
101  &none2, &none3, bb, bb, FLA_TL );
102 
103  // WT_l = ATR * UB_l * inv( triu( T1 ) ).
104  FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
105  FLA_ONE, ATR, UB_l, FLA_ZERO, WT_l );
106  FLA_Trsm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR,
107  FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_ONE, T1_tl, WT_l );
108 
109  // ATR = ATR - WT_l * UB_l'
110  FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
111  FLA_MINUS_ONE, WT_l, UB_l, FLA_ONE, ATR );
112  }
113 
114  /*------------------------------------------------------------*/
115 
116  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
117  A10, A11, /**/ A12,
118  /* ************** */ /* ****************** */
119  &ABL, /**/ &ABR, A20, A21, /**/ A22,
120  FLA_TL );
121  FLA_Cont_with_3x1_to_2x1( &UT, U0,
122  U1,
123  /* ** */ /* ** */
124  &UB, U2, FLA_TOP );
125  FLA_Cont_with_3x1_to_2x1( &ZT, Z0,
126  Z1,
127  /* ** */ /* ** */
128  &ZB, Z2, FLA_TOP );
129  FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ T2,
130  FLA_LEFT );
131  }
132 
133  FLA_Obj_free( &U );
134  FLA_Obj_free( &Z );
135 
136  return FLA_SUCCESS;
137 }
FLA_Error FLA_Hess_UT_step_opt_var1(FLA_Obj A, FLA_Obj T)
Definition: FLA_Hess_UT_opt_var1.c:18

References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy_external(), FLA_Gemm_external(), FLA_Hess_UT_step_opt_var1(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Set(), FLA_Triangularize(), FLA_Trsm_external(), and FLA_ZERO.

Referenced by FLA_Hess_UT_internal().

◆ FLA_Hess_UT_blk_var2()

FLA_Error FLA_Hess_UT_blk_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  FLA_Obj ATL, ATR, A00, A01, A02,
16  ABL, ABR, A10, A11, A12,
17  A20, A21, A22;
18  FLA_Obj UT, U0,
19  UB, U1,
20  U2;
21  FLA_Obj ZT, Z0,
22  ZB, Z1,
23  Z2;
24  FLA_Obj TL, TR, T0, T1, T2;
25 
26  FLA_Obj U, Z;
27  FLA_Obj ABR_l;
28  FLA_Obj UB_l;
29  FLA_Obj T1_tl;
30  FLA_Obj WT_l;
31  FLA_Obj none, none2, none3;
32  FLA_Obj UB_tl,
33  UB_bl;
34  FLA_Datatype datatype_A;
35  dim_t m_A;
36  dim_t b_alg, b, bb;
37 
38  b_alg = FLA_Obj_length( T );
39 
40  datatype_A = FLA_Obj_datatype( A );
41  m_A = FLA_Obj_length( A );
42 
43  FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &U );
44  FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &Z );
45 
46  FLA_Part_2x2( A, &ATL, &ATR,
47  &ABL, &ABR, 0, 0, FLA_TL );
48  FLA_Part_2x1( U, &UT,
49  &UB, 0, FLA_TOP );
50  FLA_Part_2x1( Z, &ZT,
51  &ZB, 0, FLA_TOP );
52  FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
53 
54  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) )
55  {
56  b = min( FLA_Obj_length( ABR ), b_alg );
57 
58  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
59  /* ************* */ /* ******************** */
60  &A10, /**/ &A11, &A12,
61  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
62  b, b, FLA_BR );
63  FLA_Repart_2x1_to_3x1( UT, &U0,
64  /* ** */ /* ** */
65  &U1,
66  UB, &U2, b, FLA_BOTTOM );
67  FLA_Repart_2x1_to_3x1( ZT, &Z0,
68  /* ** */ /* ** */
69  &Z1,
70  ZB, &Z2, b, FLA_BOTTOM );
71  FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &T2,
72  b, FLA_RIGHT );
73 
74  /*------------------------------------------------------------*/
75 
76  FLA_Part_2x2( T1, &T1_tl, &none,
77  &none2, &none3, b, b, FLA_TL );
78 
79  bb = min( FLA_Obj_length( ABR ) - 1, b_alg );
80 
81  FLA_Part_1x2( ABR, &ABR_l, &none, bb, FLA_LEFT );
82  FLA_Part_1x2( UB, &UB_l, &none, bb, FLA_LEFT );
83 
84  // [ ABR, T1 ] = FLA_Hess_UT_step_unb_var2( ABR, T1, b );
85  //FLA_Hess_UT_step_unb_var2( ABR, T1_tl );
86  //FLA_Hess_UT_step_ofu_var2( ABR, T1_tl );
87  FLA_Hess_UT_step_opt_var2( ABR, T1_tl );
88 
89  // Build UB from ABR, with explicit unit subdiagonal and zeros.
90  FLA_Copy_external( ABR_l, UB_l );
91  FLA_Part_2x1( UB_l, &UB_tl,
92  &UB_bl, 1, FLA_TOP );
93  FLA_Triangularize( FLA_LOWER_TRIANGULAR, FLA_UNIT_DIAG, UB_bl );
94  FLA_Set( FLA_ZERO, UB_tl );
95 
96  // ATR = ATR - ATR * UB * inv( triu( T1 ) ) * UB' );
97  if ( FLA_Obj_length( ATR ) > 0 )
98  {
99  // NOTE: We use ZT as temporary workspace.
100  FLA_Part_1x2( ZT, &WT_l, &none, bb, FLA_LEFT );
101  FLA_Part_2x2( T1, &T1_tl, &none,
102  &none2, &none3, bb, bb, FLA_TL );
103 
104  // WT_l = ATR * UB_l * inv( triu( T1 ) ).
105  FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
106  FLA_ONE, ATR, UB_l, FLA_ZERO, WT_l );
107  FLA_Trsm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR,
108  FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_ONE, T1_tl, WT_l );
109 
110  // ATR = ATR - WT_l * UB_l'
111  FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
112  FLA_MINUS_ONE, WT_l, UB_l, FLA_ONE, ATR );
113  }
114 
115  /*------------------------------------------------------------*/
116 
117  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
118  A10, A11, /**/ A12,
119  /* ************** */ /* ****************** */
120  &ABL, /**/ &ABR, A20, A21, /**/ A22,
121  FLA_TL );
122  FLA_Cont_with_3x1_to_2x1( &UT, U0,
123  U1,
124  /* ** */ /* ** */
125  &UB, U2, FLA_TOP );
126  FLA_Cont_with_3x1_to_2x1( &ZT, Z0,
127  Z1,
128  /* ** */ /* ** */
129  &ZB, Z2, FLA_TOP );
130  FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ T2,
131  FLA_LEFT );
132  }
133 
134  FLA_Obj_free( &U );
135  FLA_Obj_free( &Z );
136 
137  return FLA_SUCCESS;
138 }
FLA_Error FLA_Hess_UT_step_opt_var2(FLA_Obj A, FLA_Obj T)
Definition: FLA_Hess_UT_opt_var2.c:18

References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy_external(), FLA_Gemm_external(), FLA_Hess_UT_step_opt_var2(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Set(), FLA_Triangularize(), FLA_Trsm_external(), and FLA_ZERO.

Referenced by FLA_Hess_UT_internal().

◆ FLA_Hess_UT_blk_var3()

FLA_Error FLA_Hess_UT_blk_var3 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  FLA_Obj ATL, ATR, A00, A01, A02,
16  ABL, ABR, A10, A11, A12,
17  A20, A21, A22;
18  FLA_Obj UT, U0,
19  UB, U1,
20  U2;
21  FLA_Obj ZT, Z0,
22  ZB, Z1,
23  Z2;
24  FLA_Obj TL, TR, T0, T1, T2;
25 
26  FLA_Obj U, Z;
27  FLA_Obj ABR_l;
28  FLA_Obj UB_l;
29  FLA_Obj WT_l;
30  FLA_Obj T1_tl;
31  FLA_Obj none, none2, none3;
32  FLA_Obj UB_tl,
33  UB_bl;
34  FLA_Datatype datatype_A;
35  dim_t m_A;
36  dim_t b_alg, b, bb;
37 
38  b_alg = FLA_Obj_length( T );
39 
40  datatype_A = FLA_Obj_datatype( A );
41  m_A = FLA_Obj_length( A );
42 
43  FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &U );
44  FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &Z );
45 
46  FLA_Part_2x2( A, &ATL, &ATR,
47  &ABL, &ABR, 0, 0, FLA_TL );
48  FLA_Part_2x1( U, &UT,
49  &UB, 0, FLA_TOP );
50  FLA_Part_2x1( Z, &ZT,
51  &ZB, 0, FLA_TOP );
52  FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
53 
54  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) )
55  {
56  b = min( FLA_Obj_length( ABR ), b_alg );
57 
58  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
59  /* ************* */ /* ******************** */
60  &A10, /**/ &A11, &A12,
61  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
62  b, b, FLA_BR );
63  FLA_Repart_2x1_to_3x1( UT, &U0,
64  /* ** */ /* ** */
65  &U1,
66  UB, &U2, b, FLA_BOTTOM );
67  FLA_Repart_2x1_to_3x1( ZT, &Z0,
68  /* ** */ /* ** */
69  &Z1,
70  ZB, &Z2, b, FLA_BOTTOM );
71  FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &T2,
72  b, FLA_RIGHT );
73 
74  /*------------------------------------------------------------*/
75 
76  FLA_Part_2x2( T1, &T1_tl, &none,
77  &none2, &none3, b, b, FLA_TL );
78 
79  bb = min( FLA_Obj_length( ABR ) - 1, b_alg );
80 
81  FLA_Part_1x2( ABR, &ABR_l, &none, bb, FLA_LEFT );
82  FLA_Part_1x2( UB, &UB_l, &none, bb, FLA_LEFT );
83 
84  // [ ABR, T1 ] = FLA_Hess_UT_step_unb_var3( ABR, T1, b );
85  //FLA_Hess_UT_step_unb_var3( ABR, T1_tl );
86  //FLA_Hess_UT_step_ofu_var3( ABR, T1_tl );
87  FLA_Hess_UT_step_opt_var3( ABR, T1_tl );
88 
89  // Build UB from ABR, with explicit unit subdiagonal and zeros.
90  FLA_Copy_external( ABR_l, UB_l );
91  FLA_Part_2x1( UB_l, &UB_tl,
92  &UB_bl, 1, FLA_TOP );
93  FLA_Triangularize( FLA_LOWER_TRIANGULAR, FLA_UNIT_DIAG, UB_bl );
94  FLA_Set( FLA_ZERO, UB_tl );
95 
96  // ATR = ATR - ATR * UB * inv( triu( T1 ) ) * UB' );
97  if ( FLA_Obj_length( ATR ) > 0 )
98  {
99  // NOTE: We use ZT as temporary workspace.
100  FLA_Part_1x2( ZT, &WT_l, &none, bb, FLA_LEFT );
101  FLA_Part_2x2( T1, &T1_tl, &none,
102  &none2, &none3, bb, bb, FLA_TL );
103 
104  // WT_l = ATR * UB_l * inv( triu( T1 ) ).
105  FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
106  FLA_ONE, ATR, UB_l, FLA_ZERO, WT_l );
107  FLA_Trsm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR,
108  FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_ONE, T1_tl, WT_l );
109 
110  // ATR = ATR - WT_l * UB_l'
111  FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
112  FLA_MINUS_ONE, WT_l, UB_l, FLA_ONE, ATR );
113  }
114 
115  /*------------------------------------------------------------*/
116 
117  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
118  A10, A11, /**/ A12,
119  /* ************** */ /* ****************** */
120  &ABL, /**/ &ABR, A20, A21, /**/ A22,
121  FLA_TL );
122  FLA_Cont_with_3x1_to_2x1( &UT, U0,
123  U1,
124  /* ** */ /* ** */
125  &UB, U2, FLA_TOP );
126  FLA_Cont_with_3x1_to_2x1( &ZT, Z0,
127  Z1,
128  /* ** */ /* ** */
129  &ZB, Z2, FLA_TOP );
130  FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ T2,
131  FLA_LEFT );
132  }
133 
134  FLA_Obj_free( &U );
135  FLA_Obj_free( &Z );
136 
137  return FLA_SUCCESS;
138 }
FLA_Error FLA_Hess_UT_step_opt_var3(FLA_Obj A, FLA_Obj T)
Definition: FLA_Hess_UT_opt_var3.c:18

References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy_external(), FLA_Gemm_external(), FLA_Hess_UT_step_opt_var3(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Set(), FLA_Triangularize(), FLA_Trsm_external(), and FLA_ZERO.

Referenced by FLA_Hess_UT_internal().

◆ FLA_Hess_UT_blk_var4()

FLA_Error FLA_Hess_UT_blk_var4 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  FLA_Obj ATL, ATR, A00, A01, A02,
16  ABL, ABR, A10, A11, A12,
17  A20, A21, A22;
18  FLA_Obj UT, U0,
19  UB, U1,
20  U2;
21  FLA_Obj YT, Y0,
22  YB, Y1,
23  Y2;
24  FLA_Obj ZT, Z0,
25  ZB, Z1,
26  Z2;
27  FLA_Obj TL, TR, T0, T1, T2;
28 
29  FLA_Obj U, Y, Z;
30  FLA_Obj ABR_l;
31  FLA_Obj UB_l, U2_l;
32  FLA_Obj YB_l, Y2_l;
33  FLA_Obj ZB_l, Z2_l;
34  FLA_Obj WT_l;
35  FLA_Obj T1_tl;
36  FLA_Obj none, none2, none3;
37  FLA_Obj UB_tl,
38  UB_bl;
39  FLA_Datatype datatype_A;
40  dim_t m_A;
41  dim_t b_alg, b, bb;
42 
43  b_alg = FLA_Obj_length( T );
44 
45  datatype_A = FLA_Obj_datatype( A );
46  m_A = FLA_Obj_length( A );
47 
48  FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &U );
49  FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &Y );
50  FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &Z );
51 
52  FLA_Part_2x2( A, &ATL, &ATR,
53  &ABL, &ABR, 0, 0, FLA_TL );
54  FLA_Part_2x1( U, &UT,
55  &UB, 0, FLA_TOP );
56  FLA_Part_2x1( Y, &YT,
57  &YB, 0, FLA_TOP );
58  FLA_Part_2x1( Z, &ZT,
59  &ZB, 0, FLA_TOP );
60  FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
61 
62  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) )
63  {
64  b = min( FLA_Obj_length( ABR ), b_alg );
65 
66  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
67  /* ************* */ /* ******************** */
68  &A10, /**/ &A11, &A12,
69  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
70  b, b, FLA_BR );
71  FLA_Repart_2x1_to_3x1( UT, &U0,
72  /* ** */ /* ** */
73  &U1,
74  UB, &U2, b, FLA_BOTTOM );
75  FLA_Repart_2x1_to_3x1( YT, &Y0,
76  /* ** */ /* ** */
77  &Y1,
78  YB, &Y2, b, FLA_BOTTOM );
79  FLA_Repart_2x1_to_3x1( ZT, &Z0,
80  /* ** */ /* ** */
81  &Z1,
82  ZB, &Z2, b, FLA_BOTTOM );
83  FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &T2,
84  b, FLA_RIGHT );
85 
86  /*------------------------------------------------------------*/
87 
88  FLA_Part_2x2( T1, &T1_tl, &none,
89  &none2, &none3, b, b, FLA_TL );
90 
91  bb = min( FLA_Obj_length( ABR ) - 1, b_alg );
92 
93  FLA_Part_1x2( ABR, &ABR_l, &none, bb, FLA_LEFT );
94  FLA_Part_1x2( UB, &UB_l, &none, bb, FLA_LEFT );
95  FLA_Part_1x2( YB, &YB_l, &none, bb, FLA_LEFT );
96  FLA_Part_1x2( ZB, &ZB_l, &none, bb, FLA_LEFT );
97 
98  FLA_Part_2x1( UB_l, &none,
99  &U2_l, b, FLA_TOP );
100  FLA_Part_2x1( YB_l, &none,
101  &Y2_l, b, FLA_TOP );
102  FLA_Part_2x1( ZB_l, &none,
103  &Z2_l, b, FLA_TOP );
104 
105  // [ ABR, YB, ZB, T1 ] = FLA_Hess_UT_step_unb_var4( ABR, YB, ZB, T1, b );
106  //FLA_Hess_UT_step_unb_var4( ABR, YB, ZB, T1_tl );
107  //FLA_Hess_UT_step_ofu_var4( ABR, YB, ZB, T1_tl );
108  FLA_Hess_UT_step_opt_var4( ABR, YB, ZB, T1_tl );
109 
110  // Build UB from ABR, with explicit unit subdiagonal and zeros.
111  FLA_Copy_external( ABR_l, UB_l );
112  FLA_Part_2x1( UB_l, &UB_tl,
113  &UB_bl, 1, FLA_TOP );
114  FLA_Triangularize( FLA_LOWER_TRIANGULAR, FLA_UNIT_DIAG, UB_bl );
115  FLA_Set( FLA_ZERO, UB_tl );
116 
117  // ATR = ATR - ATR * UB * inv( triu( T ) ) * UB' );
118  if ( FLA_Obj_length( ATR ) > 0 )
119  {
120  // NOTE: We use ZT as temporary workspace.
121  FLA_Part_1x2( ZT, &WT_l, &none, bb, FLA_LEFT );
122  FLA_Part_2x2( T1, &T1_tl, &none,
123  &none2, &none3, bb, bb, FLA_TL );
124 
125  // WT_l = ATR * UB_l * inv( triu( T ) ).
126  FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
127  FLA_ONE, ATR, UB_l, FLA_ZERO, WT_l );
128  FLA_Trsm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR,
129  FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_ONE, T1_tl, WT_l );
130 
131  // ATR = ATR - WT_l * UB_l'
132  FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
133  FLA_MINUS_ONE, WT_l, UB_l, FLA_ONE, ATR );
134  }
135 
136  // A22 = A22 - U2 * Y2' - Z2 * U2';
137  FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
138  FLA_MINUS_ONE, U2_l, Y2_l, FLA_ONE, A22 );
139  FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
140  FLA_MINUS_ONE, Z2_l, U2_l, FLA_ONE, A22 );
141 
142  /*------------------------------------------------------------*/
143 
144  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
145  A10, A11, /**/ A12,
146  /* ************** */ /* ****************** */
147  &ABL, /**/ &ABR, A20, A21, /**/ A22,
148  FLA_TL );
149  FLA_Cont_with_3x1_to_2x1( &UT, U0,
150  U1,
151  /* ** */ /* ** */
152  &UB, U2, FLA_TOP );
153  FLA_Cont_with_3x1_to_2x1( &YT, Y0,
154  Y1,
155  /* ** */ /* ** */
156  &YB, Y2, FLA_TOP );
157  FLA_Cont_with_3x1_to_2x1( &ZT, Z0,
158  Z1,
159  /* ** */ /* ** */
160  &ZB, Z2, FLA_TOP );
161  FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ T2,
162  FLA_LEFT );
163  }
164 
165  FLA_Obj_free( &U );
166  FLA_Obj_free( &Y );
167  FLA_Obj_free( &Z );
168 
169  return FLA_SUCCESS;
170 }
FLA_Error FLA_Hess_UT_step_opt_var4(FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T)
Definition: FLA_Hess_UT_opt_var4.c:29

References FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy_external(), FLA_Gemm_external(), FLA_Hess_UT_step_opt_var4(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Set(), FLA_Triangularize(), FLA_Trsm_external(), and FLA_ZERO.

Referenced by FLA_Hess_UT_internal().

◆ FLA_Hess_UT_blk_var5()

FLA_Error FLA_Hess_UT_blk_var5 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  FLA_Obj ATL, ATR, A00, A01, A02,
16  ABL, ABR, A10, A11, A12,
17  A20, A21, A22;
18  FLA_Obj UT, U0,
19  UB, U1,
20  U2;
21  FLA_Obj ZT, Z0,
22  ZB, Z1,
23  Z2;
24  FLA_Obj TL, TR, T0, T1, W12;
25 
26  FLA_Obj U, Z;
27  FLA_Obj UB_l;
28  FLA_Obj ZB_l;
29  FLA_Obj WT_l;
30  FLA_Obj T1_tl;
31  FLA_Obj none, none2, none3;
32  FLA_Datatype datatype_A;
33  dim_t m_A;
34  dim_t b_alg, b, bb;
35 
36  b_alg = FLA_Obj_length( T );
37 
38  datatype_A = FLA_Obj_datatype( A );
39  m_A = FLA_Obj_length( A );
40 
41  FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &U );
42  FLA_Obj_create( datatype_A, m_A, b_alg, 0, 0, &Z );
43 
44  FLA_Part_2x2( A, &ATL, &ATR,
45  &ABL, &ABR, 0, 0, FLA_TL );
46  FLA_Part_2x1( U, &UT,
47  &UB, 0, FLA_TOP );
48  FLA_Part_2x1( Z, &ZT,
49  &ZB, 0, FLA_TOP );
50  FLA_Part_1x2( T, &TL, &TR, 0, FLA_LEFT );
51 
52  while ( FLA_Obj_length( ATL ) < FLA_Obj_length( A ) )
53  {
54  b = min( FLA_Obj_length( ABR ), b_alg );
55 
56  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02,
57  /* ************* */ /* ******************** */
58  &A10, /**/ &A11, &A12,
59  ABL, /**/ ABR, &A20, /**/ &A21, &A22,
60  b, b, FLA_BR );
61  FLA_Repart_2x1_to_3x1( UT, &U0,
62  /* ** */ /* ** */
63  &U1,
64  UB, &U2, b, FLA_BOTTOM );
65  FLA_Repart_2x1_to_3x1( ZT, &Z0,
66  /* ** */ /* ** */
67  &Z1,
68  ZB, &Z2, b, FLA_BOTTOM );
69  FLA_Repart_1x2_to_1x3( TL, /**/ TR, &T0, /**/ &T1, &W12,
70  b, FLA_RIGHT );
71 
72  /*------------------------------------------------------------*/
73 
74  FLA_Part_2x2( T1, &T1_tl, &none,
75  &none2, &none3, b, b, FLA_TL );
76 
77  bb = min( FLA_Obj_length( ABR ) - 1, b_alg );
78 
79  FLA_Part_1x2( UB, &UB_l, &none, bb, FLA_LEFT );
80  FLA_Part_1x2( ZB, &ZB_l, &none, bb, FLA_LEFT );
81 
82  // [ ABR, UB, ZB, T1 ] = FLA_Hess_UT_step_unb_var5( ABR, UB, ZB, T1, b );
83  //FLA_Hess_UT_step_unb_var5( ABR, UB, ZB, T1_tl );
84  FLA_Hess_UT_step_opt_var5( ABR, UB, ZB, T1_tl );
85 
86  // ATR = ATR - ATR * UB * inv( triu ( T1 ) ) * UB' );
87  if ( FLA_Obj_length( ATR ) > 0 )
88  {
89  // NOTE: We use ZT as temporary workspace.
90  FLA_Part_1x2( ZT, &WT_l, &none, bb, FLA_LEFT );
91  FLA_Part_2x2( T1, &T1_tl, &none,
92  &none2, &none3, bb, bb, FLA_TL );
93 
94  // WT_l = ATR * UB_l * inv( triu( T1 ) ).
95  FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
96  FLA_ONE, ATR, UB_l, FLA_ZERO, WT_l );
97  FLA_Trsm_external( FLA_RIGHT, FLA_UPPER_TRIANGULAR,
98  FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG, FLA_ONE, T1_tl, WT_l );
99 
100  // ATR = ATR - WT_l * UB_l'
101  FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_CONJ_TRANSPOSE,
102  FLA_MINUS_ONE, WT_l, UB_l, FLA_ONE, ATR );
103  }
104 
105  // / A12 \ = Q11' * / / A12 \ - / Z1 \ * inv( triu( T1 ) ) * U2' \
106  // \ A22 / \ \ A22 / \ Z2 / /
107  //
108  // where Q11 corresponds to the block Householder transformation
109  // associated with UB and T1.
110  if ( FLA_Obj_width( A12 ) > 0 )
111  {
112  FLA_Obj ABR2, ABR2_b;
113  FLA_Obj UB_b;
114 
115  // NOTE: Since A12.n > 0, we are guaranteed to not be at an edge case,
116  // namely the case where bb = b - 1 = ABR.m - 1, thus we are free to use
117  // the "full" matrix partitions in this scope block (ie: ZB instead of
118  // ZB_l).
119 
120  // W12 = U2'
121  // W12 = inv( triu( T1 ) ) * W12;
122  FLA_Copyt_external( FLA_CONJ_TRANSPOSE, U2, W12 );
123  FLA_Trsm_external( FLA_LEFT, FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE,
124  FLA_NONUNIT_DIAG, FLA_ONE, T1_tl, W12 );
125 
126  FLA_Merge_2x1( A12,
127  A22, &ABR2 );
128 
129  // / A12 \ = / A12 \ - / Z1 \ * W12
130  // \ A22 / \ A22 / \ Z2 /
131  FLA_Gemm_external( FLA_NO_TRANSPOSE, FLA_NO_TRANSPOSE,
132  FLA_MINUS_ONE, ZB, W12, FLA_ONE, ABR2 );
133 
134  // Omit the top row of UB so it has [implicit] unit diagonal, allowing us
135  // to use FLA_Apply_Q_UT() to apply the block Householder transformation
136  // corresponding to UB and T1. This trick is valid since the top row of
137  // ABR2 would normally be unchanged by the transformation (ie: multiplied
138  // by identity).
139  FLA_Part_2x1( UB, &none,
140  &UB_b, 1, FLA_TOP );
141  FLA_Part_2x1( ABR2, &none,
142  &ABR2_b, 1, FLA_TOP );
143 
144  // Apply Q11' to A12 and A22 from the left:
145  //
146  // / A12 \ = / I - / U1 \ * inv( triu( T1 ) ) * / U1 \' \' / A12 \
147  // \ A22 / \ \ U2 / \ U2 / / \ A22 /
148  //
149  FLA_Apply_Q_UT( FLA_LEFT, FLA_CONJ_TRANSPOSE, FLA_FORWARD, FLA_COLUMNWISE,
150  UB_b, T1_tl, W12, ABR2_b );
151  }
152 
153  /*------------------------------------------------------------*/
154 
155  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02,
156  A10, A11, /**/ A12,
157  /* ************** */ /* ****************** */
158  &ABL, /**/ &ABR, A20, A21, /**/ A22,
159  FLA_TL );
160  FLA_Cont_with_3x1_to_2x1( &UT, U0,
161  U1,
162  /* ** */ /* ** */
163  &UB, U2, FLA_TOP );
164  FLA_Cont_with_3x1_to_2x1( &ZT, Z0,
165  Z1,
166  /* ** */ /* ** */
167  &ZB, Z2, FLA_TOP );
168  FLA_Cont_with_1x3_to_1x2( &TL, /**/ &TR, T0, T1, /**/ W12,
169  FLA_LEFT );
170  }
171 
172  FLA_Obj_free( &U );
173  FLA_Obj_free( &Z );
174 
175  return FLA_SUCCESS;
176 }
FLA_Error FLA_Hess_UT_step_opt_var5(FLA_Obj A, FLA_Obj U, FLA_Obj Z, FLA_Obj T)
Definition: FLA_Hess_UT_opt_var5.c:29
FLA_Error FLA_Copyt_external(FLA_Trans trans, FLA_Obj A, FLA_Obj B)
Definition: FLA_Copyt_external.c:13
FLA_Error FLA_Apply_Q_UT(FLA_Side side, FLA_Trans trans, FLA_Direct direct, FLA_Store storev, FLA_Obj A, FLA_Obj T, FLA_Obj W, FLA_Obj B)
Definition: FLA_Apply_Q_UT.c:16
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
FLA_Error FLA_Merge_2x1(FLA_Obj AT, FLA_Obj AB, FLA_Obj *A)
Definition: FLA_View.c:541

References FLA_Apply_Q_UT(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copyt_external(), FLA_Gemm_external(), FLA_Hess_UT_step_opt_var5(), FLA_Merge_2x1(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_Obj_width(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Trsm_external(), and FLA_ZERO.

Referenced by FLA_Hess_UT_internal().

◆ FLA_Hess_UT_ofu_var1()

FLA_Error FLA_Hess_UT_ofu_var1 ( FLA_Obj  A,
FLA_Obj  T 
)

◆ FLA_Hess_UT_ofu_var2()

FLA_Error FLA_Hess_UT_ofu_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  return FLA_Hess_UT_step_ofu_var2( A, T );
16 }

References FLA_Hess_UT_step_ofu_var2().

◆ FLA_Hess_UT_ofu_var3()

FLA_Error FLA_Hess_UT_ofu_var3 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  return FLA_Hess_UT_step_ofu_var3( A, T );
16 }

References FLA_Hess_UT_step_ofu_var3().

◆ FLA_Hess_UT_ofu_var4()

FLA_Error FLA_Hess_UT_ofu_var4 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  FLA_Error r_val;
16  FLA_Obj Y, Z;
17 
18  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &Y );
19  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &Z );
20 
21  r_val = FLA_Hess_UT_step_ofu_var4( A, Y, Z, T );
22 
23  FLA_Obj_free( &Y );
24  FLA_Obj_free( &Z );
25 
26  return r_val;
27 }
FLA_Error FLA_Obj_create_conf_to(FLA_Trans trans, FLA_Obj old, FLA_Obj *obj)
Definition: FLA_Obj.c:286
int FLA_Error
Definition: FLA_type_defs.h:47

References FLA_Hess_UT_step_ofu_var4(), FLA_Obj_create_conf_to(), and FLA_Obj_free().

◆ FLA_Hess_UT_opt_var1()

FLA_Error FLA_Hess_UT_opt_var1 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  return FLA_Hess_UT_step_opt_var1( A, T );
16 }

References FLA_Hess_UT_step_opt_var1().

Referenced by FLA_Hess_UT_internal().

◆ FLA_Hess_UT_opt_var2()

FLA_Error FLA_Hess_UT_opt_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  return FLA_Hess_UT_step_opt_var2( A, T );
16 }

References FLA_Hess_UT_step_opt_var2().

Referenced by FLA_Hess_UT_internal().

◆ FLA_Hess_UT_opt_var3()

FLA_Error FLA_Hess_UT_opt_var3 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  return FLA_Hess_UT_step_opt_var3( A, T );
16 }

References FLA_Hess_UT_step_opt_var3().

Referenced by FLA_Hess_UT_internal().

◆ FLA_Hess_UT_opt_var4()

FLA_Error FLA_Hess_UT_opt_var4 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  FLA_Error r_val;
16  FLA_Obj Y, Z;
17 
18  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &Y );
19  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &Z );
20 
21  r_val = FLA_Hess_UT_step_opt_var4( A, Y, Z, T );
22 
23  FLA_Obj_free( &Y );
24  FLA_Obj_free( &Z );
25 
26  return r_val;
27 }

References FLA_Hess_UT_step_opt_var4(), FLA_Obj_create_conf_to(), and FLA_Obj_free().

Referenced by FLA_Hess_UT_internal().

◆ FLA_Hess_UT_opt_var5()

FLA_Error FLA_Hess_UT_opt_var5 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  FLA_Error r_val;
16  FLA_Obj U, Z;
17 
18  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &U );
19  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &Z );
20 
21  r_val = FLA_Hess_UT_step_opt_var5( A, U, Z, T );
22 
23  FLA_Obj_free( &U );
24  FLA_Obj_free( &Z );
25 
26  return r_val;
27 }

References FLA_Hess_UT_step_opt_var5(), FLA_Obj_create_conf_to(), and FLA_Obj_free().

Referenced by FLA_Hess_UT_internal().

◆ FLA_Hess_UT_step_ofc_var1()

FLA_Error FLA_Hess_UT_step_ofc_var1 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T 
)

◆ FLA_Hess_UT_step_ofc_var2()

FLA_Error FLA_Hess_UT_step_ofc_var2 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
497 {
498  scomplex* buff_2 = FLA_COMPLEX_PTR( FLA_TWO );
499  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
500  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
501  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
502 
503  scomplex first_elem;
504  scomplex dot_product;
505  scomplex beta, conj_beta;
506  scomplex inv_tau11;
507  scomplex minus_inv_tau11;
508  int i;
509 
510  // b_alg = FLA_Obj_length( T );
511  int b_alg = m_T;
512 
513  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
514  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
515  scomplex* buff_y = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
516  scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
517  int inc_y = 1;
518  int inc_z = 1;
519 
520  for ( i = 0; i < b_alg; ++i )
521  {
522  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
523  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
524  scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
525  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
526  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
527 
528  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
529  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
530 
531  scomplex* y0 = buff_y + (0 )*inc_y;
532  scomplex* y2 = buff_y + (i+1)*inc_y;
533 
534  scomplex* z2 = buff_z + (i+1)*inc_z;
535 
536  scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
537  scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
538 
539  int m_ahead = m_A - i - 1;
540  int n_ahead = m_A - i - 1;
541  int m_behind = i;
542  int n_behind = i;
543 
544  /*------------------------------------------------------------*/
545 
546  if ( m_ahead > 0 )
547  {
548  // FLA_Househ2_UT( FLA_LEFT,
549  // a21_t,
550  // a21_b, tau11 );
551  FLA_Househ2_UT_l_opc( m_ahead - 1,
552  a21_t,
553  a21_b, rs_A,
554  tau11 );
555 
556  // FLA_Set( FLA_ONE, inv_tau11 );
557  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
558  // FLA_Copy( inv_tau11, minus_inv_tau11 );
559  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
560  bl1_cdiv3( buff_1, tau11, &inv_tau11 );
561  bl1_cneg2( &inv_tau11, &minus_inv_tau11 );
562 
563  // FLA_Copy( a21_t, first_elem );
564  // FLA_Set( FLA_ONE, a21_t );
565  first_elem = *a21_t;
566  *a21_t = *buff_1;
567 
568  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y2 );
569  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z2 );
570  FLA_Fused_Ahx_Ax_opc_var1( m_ahead,
571  n_ahead,
572  A22, rs_A, cs_A,
573  a21, rs_A,
574  y2, inc_y,
575  z2, inc_z );
576 
577  // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
578  // FLA_Inv_scal( FLA_TWO, beta );
579  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
581  m_ahead,
582  a21, rs_A,
583  z2, inc_z,
584  &beta );
585  bl1_cinvscals( buff_2, &beta );
586  bl1_ccopyconj( &beta, &conj_beta );
587 
588  // FLA_Scal( minus_inv_tau11, conj_beta );
589  // FLA_Axpy( conj_beta, a21, y2 );
590  // FLA_Scal( inv_tau11, y2 );
591  bl1_cscals( &minus_inv_tau11, &conj_beta );
593  m_ahead,
594  &conj_beta,
595  a21, rs_A,
596  y2, inc_y );
598  m_ahead,
599  &inv_tau11,
600  y2, inc_y );
601 
602  // FLA_Scal( minus_inv_tau11, beta );
603  // FLA_Axpy( beta, a21, z2 );
604  // FLA_Scal( inv_tau11, z2 );
605  bl1_cscals( &minus_inv_tau11, &beta );
607  m_ahead,
608  &beta,
609  a21, rs_A,
610  z2, inc_z );
612  m_ahead,
613  &inv_tau11,
614  z2, inc_z );
615 
616  // FLA_Dot( a12t, a21, dot_product );
617  // FLA_Scal( minus_inv_tau11, dot_product );
618  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
620  m_ahead,
621  a12t, cs_A,
622  a21, rs_A,
623  &dot_product );
624  bl1_cscals( &minus_inv_tau11, &dot_product );
626  m_ahead,
627  &dot_product,
628  a21, rs_A,
629  a12t, cs_A );
630 
631  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
632  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
635  m_behind,
636  n_ahead,
637  buff_1,
638  A02, rs_A, cs_A,
639  a21, rs_A,
640  buff_0,
641  y0, inc_y );
644  m_behind,
645  n_ahead,
646  &minus_inv_tau11,
647  y0, inc_y,
648  a21, rs_A,
649  A02, rs_A, cs_A );
650 
651  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y2, A22 );
652  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, a21, A22 );
653  FLA_Fused_Gerc2_opc_var1( m_ahead,
654  n_ahead,
655  buff_m1,
656  a21, rs_A,
657  y2, inc_y,
658  z2, inc_z,
659  a21, rs_A,
660  A22, rs_A, cs_A );
661 
662  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
665  m_ahead,
666  n_behind,
667  buff_1,
668  A20, rs_A, cs_A,
669  a21, rs_A,
670  buff_0,
671  t01, rs_T );
672 
673  // FLA_Copy( first_elem, a21_t );
674  *a21_t = first_elem;
675  }
676 
677  /*------------------------------------------------------------*/
678 
679  }
680 
681  // FLA_Obj_free( &y );
682  // FLA_Obj_free( &z );
683  FLA_free( buff_y );
684  FLA_free( buff_z );
685 
686  return FLA_SUCCESS;
687 }
FLA_Error FLA_Fused_Gerc2_opc_var1(int m_A, int n_A, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z, scomplex *buff_v, int inc_v, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Fused_Gerc2_opt_var1.c:241
FLA_Error FLA_Fused_Ahx_Ax_opc_var1(int m_A, int n_A, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_x, int inc_x, scomplex *buff_v, int inc_v, scomplex *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Ax_opt_var1.c:256
FLA_Obj FLA_TWO
Definition: FLA_Init.c:17
void FLA_free(void *ptr)
Definition: FLA_Memory.c:247
void * FLA_malloc(size_t size)
Definition: FLA_Memory.c:111
FLA_Error FLA_Househ2_UT_l_opc(int m_x2, scomplex *chi_1, scomplex *x2, int inc_x2, scomplex *tau)
Definition: FLA_Househ2_UT.c:390
void bl1_cgemv(trans1_t transa, conj1_t conjx, int m, int n, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_gemv.c:125
void bl1_cger(conj1_t conjx, conj1_t conjy, int m, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *a, int a_rs, int a_cs)
Definition: bl1_ger.c:111
void bl1_cscalv(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx)
Definition: bl1_scalv.c:46
@ BLIS1_NO_TRANSPOSE
Definition: blis_type_defs.h:54
@ BLIS1_CONJ_TRANSPOSE
Definition: blis_type_defs.h:57

References bl1_caxpyv(), bl1_cdot(), bl1_cgemv(), bl1_cger(), bl1_cscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_opc_var1(), FLA_Fused_Gerc2_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_ofu_var2().

◆ FLA_Hess_UT_step_ofc_var3()

FLA_Error FLA_Hess_UT_step_ofc_var3 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
753 {
754  scomplex* buff_2 = FLA_COMPLEX_PTR( FLA_TWO );
755  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
756  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
757  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
758 
759  scomplex first_elem;
760  scomplex dot_product;
761  scomplex beta, conj_beta;
762  scomplex inv_tau11;
763  scomplex minus_inv_tau11;
764  scomplex minus_upsilon1, minus_conj_upsilon1;
765  scomplex minus_psi1, minus_conj_psi1;
766  scomplex minus_zeta1;
767  int i;
768 
769  // b_alg = FLA_Obj_length( T );
770  int b_alg = m_T;
771 
772  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
773  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
774  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
775  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &v );
776  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
777  scomplex* buff_u = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
778  scomplex* buff_y = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
779  scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
780  scomplex* buff_v = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
781  scomplex* buff_w = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
782  int inc_u = 1;
783  int inc_y = 1;
784  int inc_z = 1;
785  int inc_v = 1;
786  int inc_w = 1;
787 
788  for ( i = 0; i < b_alg; ++i )
789  {
790  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
791  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
792  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
793  scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
794  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
795  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
796 
797  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
798  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
799 
800  scomplex* upsilon1 = buff_u + (i )*inc_u;
801  scomplex* u2 = buff_u + (i+1)*inc_u;
802 
803  scomplex* y0 = buff_y + (0 )*inc_y;
804  scomplex* psi1 = buff_y + (i )*inc_y;
805  scomplex* y2 = buff_y + (i+1)*inc_y;
806 
807  scomplex* zeta1 = buff_z + (i )*inc_z;
808  scomplex* z2 = buff_z + (i+1)*inc_z;
809 
810  scomplex* v2 = buff_v + (i+1)*inc_v;
811 
812  scomplex* w2 = buff_w + (i+1)*inc_w;
813 
814  scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
815  scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
816 
817  int m_ahead = m_A - i - 1;
818  int n_ahead = m_A - i - 1;
819  int m_behind = i;
820  int n_behind = i;
821 
822  /*------------------------------------------------------------*/
823 
824  if ( m_behind > 0 )
825  {
826  // FLA_Copy( upsilon1, minus_upsilon1 );
827  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon1 );
828  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, minus_conj_upsilon1 );
829  bl1_cmult3( buff_m1, upsilon1, &minus_upsilon1 );
830  bl1_ccopyconj( &minus_upsilon1, &minus_conj_upsilon1 );
831 
832  // FLA_Copy( psi1, minus_psi1 );
833  // FLA_Scal( FLA_MINUS_ONE, minus_psi1 );
834  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_psi1, minus_conj_psi1 );
835  bl1_cmult3( buff_m1, psi1, &minus_psi1 );
836  bl1_ccopyconj( &minus_psi1, &minus_conj_psi1 );
837 
838  // FLA_Copy( zeta1, minus_zeta1 );
839  // FLA_Scal( FLA_MINUS_ONE, minus_zeta1 );
840  bl1_cmult3( buff_m1, zeta1, &minus_zeta1 );
841 
842  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, psi1, alpha11 );
843  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta1, upsilon1, alpha11 );
845  1,
846  &minus_upsilon1,
847  psi1, 1,
848  alpha11, 1 );
850  1,
851  &minus_zeta1,
852  upsilon1, 1,
853  alpha11, 1 );
854 
855  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon1, y2, a12t );
856  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta1, u2, a12t );
858  m_ahead,
859  &minus_upsilon1,
860  y2, inc_y,
861  a12t, cs_A );
863  m_ahead,
864  &minus_zeta1,
865  u2, inc_u,
866  a12t, cs_A );
867 
868  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi1, u2, a21 );
869  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon1, z2, a21 );
871  m_ahead,
872  &minus_conj_psi1,
873  u2, inc_u,
874  a21, rs_A );
876  m_ahead,
877  &minus_conj_upsilon1,
878  z2, inc_z,
879  a21, rs_A );
880  }
881 
882  if ( m_ahead > 0 )
883  {
884  // FLA_Househ2_UT( FLA_LEFT,
885  // a21_t,
886  // a21_b, tau11 );
887  FLA_Househ2_UT_l_opc( m_ahead - 1,
888  a21_t,
889  a21_b, rs_A,
890  tau11 );
891 
892  // FLA_Set( FLA_ONE, inv_tau11 );
893  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
894  // FLA_Copy( inv_tau11, minus_inv_tau11 );
895  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
896  bl1_cdiv3( buff_1, tau11, &inv_tau11 );
897  bl1_cneg2( &inv_tau11, &minus_inv_tau11 );
898 
899  // FLA_Copy( a21_t, first_elem );
900  // FLA_Set( FLA_ONE, a21_t );
901  first_elem = *a21_t;
902  *a21_t = *buff_1;
903  }
904 
905  if ( m_behind > 0 && m_ahead > 0 )
906  {
907  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
908  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
909  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
910  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
912  n_ahead,
913  buff_m1,
914  u2, inc_u,
915  y2, inc_y,
916  z2, inc_z,
917  A22, rs_A, cs_A,
918  a21, rs_A,
919  v2, inc_v,
920  w2, inc_w );
921  }
922  else if ( m_ahead > 0 )
923  {
924  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
925  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
926  FLA_Fused_Ahx_Ax_opc_var1( m_ahead,
927  n_ahead,
928  A22, rs_A, cs_A,
929  a21, rs_A,
930  v2, inc_v,
931  w2, inc_w );
932  }
933 
934  if ( m_ahead > 0 )
935  {
936  // FLA_Copy( a21, u2 );
937  // FLA_Copy( v2, y2 );
938  // FLA_Copy( w2, z2 );
940  m_ahead,
941  a21, rs_A,
942  u2, inc_u );
944  m_ahead,
945  v2, inc_v,
946  y2, inc_y );
948  m_ahead,
949  w2, inc_w,
950  z2, inc_z );
951 
952  // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
953  // FLA_Inv_scal( FLA_TWO, beta );
954  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
956  m_ahead,
957  a21, rs_A,
958  z2, inc_z,
959  &beta );
960  bl1_cinvscals( buff_2, &beta );
961  bl1_ccopyconj( &beta, &conj_beta );
962 
963  // FLA_Scal( minus_inv_tau11, conj_beta );
964  // FLA_Axpy( conj_beta, a21, y2 );
965  // FLA_Scal( inv_tau11, y2 );
966  bl1_cscals( &minus_inv_tau11, &conj_beta );
968  m_ahead,
969  &conj_beta,
970  a21, rs_A,
971  y2, inc_y );
973  m_ahead,
974  &inv_tau11,
975  y2, inc_y );
976 
977  // FLA_Scal( minus_inv_tau11, beta );
978  // FLA_Axpy( beta, a21, z2 );
979  // FLA_Scal( inv_tau11, z2 );
980  bl1_cscals( &minus_inv_tau11, &beta );
982  m_ahead,
983  &beta,
984  a21, rs_A,
985  z2, inc_z );
987  m_ahead,
988  &inv_tau11,
989  z2, inc_z );
990 
991  // FLA_Dot( a12t, a21, dot_product );
992  // FLA_Scal( minus_inv_tau11, dot_product );
993  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
995  m_ahead,
996  a12t, cs_A,
997  a21, rs_A,
998  &dot_product );
999  bl1_cscals( &minus_inv_tau11, &dot_product );
1001  m_ahead,
1002  &dot_product,
1003  a21, rs_A,
1004  a12t, cs_A );
1005 
1006  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
1007  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
1010  m_behind,
1011  n_ahead,
1012  buff_1,
1013  A02, rs_A, cs_A,
1014  a21, rs_A,
1015  buff_0,
1016  y0, inc_y );
1019  m_behind,
1020  n_ahead,
1021  &minus_inv_tau11,
1022  y0, inc_y,
1023  a21, rs_A,
1024  A02, rs_A, cs_A );
1025 
1026  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
1029  m_ahead,
1030  n_behind,
1031  buff_1,
1032  A20, rs_A, cs_A,
1033  a21, rs_A,
1034  buff_0,
1035  t01, rs_T );
1036 
1037  // FLA_Copy( first_elem, a21_t );
1038  *a21_t = first_elem;
1039  }
1040 
1041  if ( m_behind + 1 == b_alg && m_ahead > 0 )
1042  {
1043  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
1044  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
1045  FLA_Fused_Gerc2_opc_var1( m_ahead,
1046  n_ahead,
1047  buff_m1,
1048  u2, inc_u,
1049  y2, inc_y,
1050  z2, inc_z,
1051  u2, inc_u,
1052  A22, rs_A, cs_A );
1053  }
1054 
1055  /*------------------------------------------------------------*/
1056 
1057  }
1058 
1059  // FLA_Obj_free( &u );
1060  // FLA_Obj_free( &y );
1061  // FLA_Obj_free( &z );
1062  // FLA_Obj_free( &v );
1063  // FLA_Obj_free( &w );
1064  FLA_free( buff_u );
1065  FLA_free( buff_y );
1066  FLA_free( buff_z );
1067  FLA_free( buff_v );
1068  FLA_free( buff_w );
1069 
1070  return FLA_SUCCESS;
1071 }
FLA_Error FLA_Fused_Gerc2_Ahx_Ax_opc_var1(int m_A, int n_A, scomplex *buff_alpha, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_x, int inc_x, scomplex *buff_v, int inc_v, scomplex *buff_w, int inc_w)
Definition: FLA_Fused_Gerc2_Ahx_Ax_opt_var1.c:327
double *restrict zeta1
Definition: bl1_axmyv2.c:142
void bl1_ccopyv(conj1_t conj, int m, scomplex *x, int incx, scomplex *y, int incy)
Definition: bl1_copyv.c:49

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cger(), bl1_cscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_opc_var1(), FLA_Fused_Gerc2_Ahx_Ax_opc_var1(), FLA_Fused_Gerc2_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, i, psi1, upsilon1, and zeta1.

Referenced by FLA_Hess_UT_step_ofu_var3().

◆ FLA_Hess_UT_step_ofc_var4()

FLA_Error FLA_Hess_UT_step_ofc_var4 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_Y,
int  rs_Y,
int  cs_Y,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
688 {
689  scomplex* buff_2 = FLA_COMPLEX_PTR( FLA_TWO );
690  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
691  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
692  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
693 
694  scomplex first_elem, last_elem;
695  scomplex dot_product;
696  scomplex beta, conj_beta;
697  scomplex inv_tau11;
698  scomplex minus_inv_tau11;
699  int i;
700 
701  // b_alg = FLA_Obj_length( T );
702  int b_alg = m_T;
703 
704  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
705  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &e );
706  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
707  scomplex* buff_e = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
708  int inc_e = 1;
709 
710  // FLA_Set( FLA_ZERO, Y );
711  // FLA_Set( FLA_ZERO, Z );
712  bl1_csetm( m_A,
713  b_alg,
714  buff_0,
715  buff_Y, rs_Y, cs_Y );
716  bl1_csetm( m_A,
717  b_alg,
718  buff_0,
719  buff_Z, rs_Z, cs_Z );
720 
721  for ( i = 0; i < b_alg; ++i )
722  {
723  scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
724  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
725  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
726  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
727  scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
728  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
729  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
730 
731  scomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
732  scomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
733  scomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
734 
735  scomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
736  scomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
737  scomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
738 
739  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
740  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
741 
742  scomplex* e0 = buff_e + (0 )*inc_e;
743 
744  scomplex* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
745 
746  scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
747  scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
748 
749  scomplex* ABL = a10t;
750  scomplex* ZBL = z10t;
751 
752  scomplex* a2 = alpha11;
753 
754  int m_ahead = m_A - i - 1;
755  int n_ahead = m_A - i - 1;
756  int m_behind = i;
757  int n_behind = i;
758 
759  /*------------------------------------------------------------*/
760 
761  if ( m_behind > 0 )
762  {
763  // FLA_Copy( a10t_r, last_elem );
764  // FLA_Set( FLA_ONE, a10t_r );
765  last_elem = *a10t_r;
766  *a10t_r = *buff_1;
767  }
768 
769  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
770  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
773  m_ahead + 1,
774  n_behind,
775  buff_m1,
776  ABL, rs_A, cs_A,
777  y10t, cs_Y,
778  buff_1,
779  a2, rs_A );
782  m_ahead + 1,
783  n_behind,
784  buff_m1,
785  ZBL, rs_Z, cs_Z,
786  a10t, cs_A,
787  buff_1,
788  a2, rs_A );
789 
790  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
791  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, A20, z10t, FLA_ONE, a12t );
794  m_ahead,
795  n_behind,
796  buff_m1,
797  Y20, rs_Y, cs_Y,
798  a10t, cs_A,
799  buff_1,
800  a12t, cs_A );
803  m_ahead,
804  n_behind,
805  buff_m1,
806  A20, rs_A, cs_A,
807  z10t, cs_Z,
808  buff_1,
809  a12t, cs_A );
810 
811  if ( m_behind > 0 )
812  {
813  // FLA_Copy( last_elem, a10t_r );
814  *a10t_r = last_elem;
815  }
816 
817  if ( m_ahead > 0 )
818  {
819  // FLA_Househ2_UT( FLA_LEFT,
820  // a21_t,
821  // a21_b, tau11 );
822  FLA_Househ2_UT_l_opc( m_ahead - 1,
823  a21_t,
824  a21_b, rs_A,
825  tau11 );
826 
827  // FLA_Set( FLA_ONE, inv_tau11 );
828  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
829  // FLA_Copy( inv_tau11, minus_inv_tau11 );
830  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
831  bl1_cdiv3( buff_1, tau11, &inv_tau11 );
832  bl1_cneg2( &inv_tau11, &minus_inv_tau11 );
833 
834  // FLA_Copy( a21_t, first_elem );
835  // FLA_Set( FLA_ONE, a21_t );
836  first_elem = *a21_t;
837  *a21_t = *buff_1;
838 
839  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y21 );
840  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z21 );
841  FLA_Fused_Ahx_Ax_opc_var1( m_ahead,
842  n_ahead,
843  A22, rs_A, cs_A,
844  a21, rs_A,
845  y21, rs_Y,
846  z21, rs_Z );
847 
848  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d0 );
849  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, a21, FLA_ZERO, e0 );
850  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f0 );
851  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
852  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, y21 );
853  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, e0, FLA_ONE, z21 );
854  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d0, FLA_ONE, z21 );
855  // FLA_Copy( d0, t01 );
857  n_behind,
858  buff_m1,
859  A20, rs_A, cs_A,
860  Y20, rs_Y, cs_Y,
861  Z20, rs_Z, cs_Z,
862  t01, rs_T,
863  a21, rs_A,
864  y21, rs_Y,
865  z21, rs_Z );
866 
867  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
868  // FLA_Inv_scal( FLA_TWO, beta );
869  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
871  m_ahead,
872  a21, rs_A,
873  z21, rs_Z,
874  &beta );
875  bl1_cinvscals( buff_2, &beta );
876  bl1_ccopyconj( &beta, &conj_beta );
877 
878  // FLA_Scal( minus_inv_tau11, conj_beta );
879  // FLA_Axpy( conj_beta, a21, y21 );
880  // FLA_Scal( inv_tau11, y21 );
881  bl1_cscals( &minus_inv_tau11, &conj_beta );
883  m_ahead,
884  &conj_beta,
885  a21, rs_A,
886  y21, rs_Y );
888  m_ahead,
889  &inv_tau11,
890  y21, rs_Y );
891 
892  // FLA_Scal( minus_inv_tau11, beta );
893  // FLA_Axpy( beta, a21, z21 );
894  // FLA_Scal( inv_tau11, z21 );
895  bl1_cscals( &minus_inv_tau11, &beta );
897  m_ahead,
898  &beta,
899  a21, rs_A,
900  z21, rs_Z );
902  m_ahead,
903  &inv_tau11,
904  z21, rs_Z );
905 
906  // FLA_Dot( a12t, a21, dot_product );
907  // FLA_Scal( minus_inv_tau11, dot_product );
908  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
910  m_ahead,
911  a12t, cs_A,
912  a21, rs_A,
913  &dot_product );
914  bl1_cscals( &minus_inv_tau11, &dot_product );
916  m_ahead,
917  &dot_product,
918  a21, rs_A,
919  a12t, cs_A );
920 
921  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, e0 );
922  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, e0, a21, A02 );
925  m_behind,
926  n_ahead,
927  buff_1,
928  A02, rs_A, cs_A,
929  a21, rs_A,
930  buff_0,
931  e0, inc_e );
934  m_behind,
935  n_ahead,
936  &minus_inv_tau11,
937  e0, inc_e,
938  a21, rs_A,
939  A02, rs_A, cs_A );
940 
941  // FLA_Copy( first_elem, a21_t );
942  *a21_t = first_elem;
943  }
944 
945  /*------------------------------------------------------------*/
946 
947  }
948 
949  // FLA_Obj_free( &e );
950  FLA_free( buff_e );
951 
952  return FLA_SUCCESS;
953 }
FLA_Error FLA_Fused_Uhu_Yhu_Zhu_opc_var1(int m_U, int n_U, scomplex *buff_delta, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_t, int inc_t, scomplex *buff_u, int inc_u, scomplex *buff_y, int inc_y, scomplex *buff_z, int inc_z)
Definition: FLA_Fused_Uhu_Yhu_Zhu_opt_var1.c:398
void bl1_csetm(int m, int n, scomplex *sigma, scomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:61
@ BLIS1_CONJ_NO_TRANSPOSE
Definition: blis_type_defs.h:56

References bl1_caxpyv(), bl1_cdot(), bl1_cgemv(), bl1_cger(), bl1_cscalv(), bl1_csetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_opc_var1(), FLA_Fused_Uhu_Yhu_Zhu_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_ofu_var4().

◆ FLA_Hess_UT_step_ofd_var1()

FLA_Error FLA_Hess_UT_step_ofd_var1 ( int  m_A,
int  m_T,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_T,
int  rs_T,
int  cs_T 
)

◆ FLA_Hess_UT_step_ofd_var2()

FLA_Error FLA_Hess_UT_step_ofd_var2 ( int  m_A,
int  m_T,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_T,
int  rs_T,
int  cs_T 
)
299 {
300  double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
301  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
302  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
303  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
304 
305  double first_elem;
306  double dot_product;
307  double beta, conj_beta;
308  double inv_tau11;
309  double minus_inv_tau11;
310  int i;
311 
312  // b_alg = FLA_Obj_length( T );
313  int b_alg = m_T;
314 
315  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
316  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
317  double* buff_y = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
318  double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
319  int inc_y = 1;
320  int inc_z = 1;
321 
322  for ( i = 0; i < b_alg; ++i )
323  {
324  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
325  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
326  double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
327  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
328  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
329 
330  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
331  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
332 
333  double* y0 = buff_y + (0 )*inc_y;
334  double* y2 = buff_y + (i+1)*inc_y;
335 
336  double* z2 = buff_z + (i+1)*inc_z;
337 
338  double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
339  double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
340 
341  int m_ahead = m_A - i - 1;
342  int n_ahead = m_A - i - 1;
343  int m_behind = i;
344  int n_behind = i;
345 
346  /*------------------------------------------------------------*/
347 
348  if ( m_ahead > 0 )
349  {
350  // FLA_Househ2_UT( FLA_LEFT,
351  // a21_t,
352  // a21_b, tau11 );
353  FLA_Househ2_UT_l_opd( m_ahead - 1,
354  a21_t,
355  a21_b, rs_A,
356  tau11 );
357 
358  // FLA_Set( FLA_ONE, inv_tau11 );
359  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
360  // FLA_Copy( inv_tau11, minus_inv_tau11 );
361  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
362  bl1_ddiv3( buff_1, tau11, &inv_tau11 );
363  bl1_dneg2( &inv_tau11, &minus_inv_tau11 );
364 
365  // FLA_Copy( a21_t, first_elem );
366  // FLA_Set( FLA_ONE, a21_t );
367  first_elem = *a21_t;
368  *a21_t = *buff_1;
369 
370  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y2 );
371  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z2 );
372  FLA_Fused_Ahx_Ax_opd_var1( m_ahead,
373  n_ahead,
374  A22, rs_A, cs_A,
375  a21, rs_A,
376  y2, inc_y,
377  z2, inc_z );
378 
379  // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
380  // FLA_Inv_scal( FLA_TWO, beta );
381  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
383  m_ahead,
384  a21, rs_A,
385  z2, inc_z,
386  &beta );
387  bl1_dinvscals( buff_2, &beta );
388  bl1_dcopyconj( &beta, &conj_beta );
389 
390  // FLA_Scal( minus_inv_tau11, conj_beta );
391  // FLA_Axpy( conj_beta, a21, y2 );
392  // FLA_Scal( inv_tau11, y2 );
393  bl1_dscals( &minus_inv_tau11, &conj_beta );
395  m_ahead,
396  &conj_beta,
397  a21, rs_A,
398  y2, inc_y );
400  m_ahead,
401  &inv_tau11,
402  y2, inc_y );
403 
404  // FLA_Scal( minus_inv_tau11, beta );
405  // FLA_Axpy( beta, a21, z2 );
406  // FLA_Scal( inv_tau11, z2 );
407  bl1_dscals( &minus_inv_tau11, &beta );
409  m_ahead,
410  &beta,
411  a21, rs_A,
412  z2, inc_z );
414  m_ahead,
415  &inv_tau11,
416  z2, inc_z );
417 
418  // FLA_Dot( a12t, a21, dot_product );
419  // FLA_Scal( minus_inv_tau11, dot_product );
420  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
422  m_ahead,
423  a12t, cs_A,
424  a21, rs_A,
425  &dot_product );
426  bl1_dscals( &minus_inv_tau11, &dot_product );
428  m_ahead,
429  &dot_product,
430  a21, rs_A,
431  a12t, cs_A );
432 
433  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
434  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
437  m_behind,
438  n_ahead,
439  buff_1,
440  A02, rs_A, cs_A,
441  a21, rs_A,
442  buff_0,
443  y0, inc_y );
446  m_behind,
447  n_ahead,
448  &minus_inv_tau11,
449  y0, inc_y,
450  a21, rs_A,
451  A02, rs_A, cs_A );
452 
453  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y2, A22 );
454  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, a21, A22 );
455  FLA_Fused_Gerc2_opd_var1( m_ahead,
456  n_ahead,
457  buff_m1,
458  a21, rs_A,
459  y2, inc_y,
460  z2, inc_z,
461  a21, rs_A,
462  A22, rs_A, cs_A );
463 
464  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
467  m_ahead,
468  n_behind,
469  buff_1,
470  A20, rs_A, cs_A,
471  a21, rs_A,
472  buff_0,
473  t01, rs_T );
474 
475  // FLA_Copy( first_elem, a21_t );
476  *a21_t = first_elem;
477  }
478 
479  /*------------------------------------------------------------*/
480 
481  }
482 
483  // FLA_Obj_free( &y );
484  // FLA_Obj_free( &z );
485  FLA_free( buff_y );
486  FLA_free( buff_z );
487 
488  return FLA_SUCCESS;
489 }
FLA_Error FLA_Fused_Gerc2_opd_var1(int m_A, int n_A, double *buff_alpha, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z, double *buff_v, int inc_v, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Fused_Gerc2_opt_var1.c:193
FLA_Error FLA_Fused_Ahx_Ax_opd_var1(int m_A, int n_A, double *buff_A, int rs_A, int cs_A, double *buff_x, int inc_x, double *buff_v, int inc_v, double *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Ax_opt_var1.c:173
FLA_Error FLA_Househ2_UT_l_opd(int m_x2, double *chi_1, double *x2, int inc_x2, double *tau)
Definition: FLA_Househ2_UT.c:274
void bl1_ddot(conj1_t conj, int n, double *x, int incx, double *y, int incy, double *rho)
Definition: bl1_dot.c:26
void bl1_dgemv(trans1_t transa, conj1_t conjx, int m, int n, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_gemv.c:69
void bl1_dger(conj1_t conjx, conj1_t conjy, int m, int n, double *alpha, double *x, int incx, double *y, int incy, double *a, int a_rs, int a_cs)
Definition: bl1_ger.c:62
void bl1_dscalv(conj1_t conj, int n, double *alpha, double *x, int incx)
Definition: bl1_scalv.c:24

References bl1_daxpyv(), bl1_ddot(), bl1_dgemv(), bl1_dger(), bl1_dscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_opd_var1(), FLA_Fused_Gerc2_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_ofu_var2().

◆ FLA_Hess_UT_step_ofd_var3()

FLA_Error FLA_Hess_UT_step_ofd_var3 ( int  m_A,
int  m_T,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_T,
int  rs_T,
int  cs_T 
)
427 {
428  double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
429  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
430  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
431  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
432 
433  double first_elem;
434  double dot_product;
435  double beta, conj_beta;
436  double inv_tau11;
437  double minus_inv_tau11;
438  double minus_upsilon1, minus_conj_upsilon1;
439  double minus_psi1, minus_conj_psi1;
440  double minus_zeta1;
441  int i;
442 
443  // b_alg = FLA_Obj_length( T );
444  int b_alg = m_T;
445 
446  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
447  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
448  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
449  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &v );
450  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
451  double* buff_u = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
452  double* buff_y = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
453  double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
454  double* buff_v = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
455  double* buff_w = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
456  int inc_u = 1;
457  int inc_y = 1;
458  int inc_z = 1;
459  int inc_v = 1;
460  int inc_w = 1;
461 
462  for ( i = 0; i < b_alg; ++i )
463  {
464  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
465  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
466  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
467  double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
468  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
469  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
470 
471  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
472  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
473 
474  double* upsilon1 = buff_u + (i )*inc_u;
475  double* u2 = buff_u + (i+1)*inc_u;
476 
477  double* y0 = buff_y + (0 )*inc_y;
478  double* psi1 = buff_y + (i )*inc_y;
479  double* y2 = buff_y + (i+1)*inc_y;
480 
481  double* zeta1 = buff_z + (i )*inc_z;
482  double* z2 = buff_z + (i+1)*inc_z;
483 
484  double* v2 = buff_v + (i+1)*inc_v;
485 
486  double* w2 = buff_w + (i+1)*inc_w;
487 
488  double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
489  double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
490 
491  int m_ahead = m_A - i - 1;
492  int n_ahead = m_A - i - 1;
493  int m_behind = i;
494  int n_behind = i;
495 
496  /*------------------------------------------------------------*/
497 
498  if ( m_behind > 0 )
499  {
500  // FLA_Copy( upsilon1, minus_upsilon1 );
501  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon1 );
502  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, minus_conj_upsilon1 );
503  bl1_dmult3( buff_m1, upsilon1, &minus_upsilon1 );
504  bl1_dcopyconj( &minus_upsilon1, &minus_conj_upsilon1 );
505 
506  // FLA_Copy( psi1, minus_psi1 );
507  // FLA_Scal( FLA_MINUS_ONE, minus_psi1 );
508  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_psi1, minus_conj_psi1 );
509  bl1_dmult3( buff_m1, psi1, &minus_psi1 );
510  bl1_dcopyconj( &minus_psi1, &minus_conj_psi1 );
511 
512  // FLA_Copy( zeta1, minus_zeta1 );
513  // FLA_Scal( FLA_MINUS_ONE, minus_zeta1 );
514  bl1_dmult3( buff_m1, zeta1, &minus_zeta1 );
515 
516  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, psi1, alpha11 );
517  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta1, upsilon1, alpha11 );
519  1,
520  &minus_upsilon1,
521  psi1, 1,
522  alpha11, 1 );
524  1,
525  &minus_zeta1,
526  upsilon1, 1,
527  alpha11, 1 );
528 
529  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon1, y2, a12t );
530  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta1, u2, a12t );
532  m_ahead,
533  &minus_upsilon1,
534  y2, inc_y,
535  a12t, cs_A );
537  m_ahead,
538  &minus_zeta1,
539  u2, inc_u,
540  a12t, cs_A );
541 
542  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi1, u2, a21 );
543  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon1, z2, a21 );
545  m_ahead,
546  &minus_conj_psi1,
547  u2, inc_u,
548  a21, rs_A );
550  m_ahead,
551  &minus_conj_upsilon1,
552  z2, inc_z,
553  a21, rs_A );
554  }
555 
556  if ( m_ahead > 0 )
557  {
558  // FLA_Househ2_UT( FLA_LEFT,
559  // a21_t,
560  // a21_b, tau11 );
561  FLA_Househ2_UT_l_opd( m_ahead - 1,
562  a21_t,
563  a21_b, rs_A,
564  tau11 );
565 
566  // FLA_Set( FLA_ONE, inv_tau11 );
567  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
568  // FLA_Copy( inv_tau11, minus_inv_tau11 );
569  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
570  bl1_ddiv3( buff_1, tau11, &inv_tau11 );
571  bl1_dneg2( &inv_tau11, &minus_inv_tau11 );
572 
573  // FLA_Copy( a21_t, first_elem );
574  // FLA_Set( FLA_ONE, a21_t );
575  first_elem = *a21_t;
576  *a21_t = *buff_1;
577  }
578 
579  if ( m_behind > 0 && m_ahead > 0 )
580  {
581  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
582  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
583  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
584  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
586  n_ahead,
587  buff_m1,
588  u2, inc_u,
589  y2, inc_y,
590  z2, inc_z,
591  A22, rs_A, cs_A,
592  a21, rs_A,
593  v2, inc_v,
594  w2, inc_w );
595  }
596  else if ( m_ahead > 0 )
597  {
598  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
599  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
600  FLA_Fused_Ahx_Ax_opd_var1( m_ahead,
601  n_ahead,
602  A22, rs_A, cs_A,
603  a21, rs_A,
604  v2, inc_v,
605  w2, inc_w );
606  }
607 
608  if ( m_ahead > 0 )
609  {
610  // FLA_Copy( a21, u2 );
611  // FLA_Copy( v2, y2 );
612  // FLA_Copy( w2, z2 );
614  m_ahead,
615  a21, rs_A,
616  u2, inc_u );
618  m_ahead,
619  v2, inc_v,
620  y2, inc_y );
622  m_ahead,
623  w2, inc_w,
624  z2, inc_z );
625 
626  // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
627  // FLA_Inv_scal( FLA_TWO, beta );
628  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
630  m_ahead,
631  a21, rs_A,
632  z2, inc_z,
633  &beta );
634  bl1_dinvscals( buff_2, &beta );
635  bl1_dcopyconj( &beta, &conj_beta );
636 
637  // FLA_Scal( minus_inv_tau11, conj_beta );
638  // FLA_Axpy( conj_beta, a21, y2 );
639  // FLA_Scal( inv_tau11, y2 );
640  bl1_dscals( &minus_inv_tau11, &conj_beta );
642  m_ahead,
643  &conj_beta,
644  a21, rs_A,
645  y2, inc_y );
647  m_ahead,
648  &inv_tau11,
649  y2, inc_y );
650 
651  // FLA_Scal( minus_inv_tau11, beta );
652  // FLA_Axpy( beta, a21, z2 );
653  // FLA_Scal( inv_tau11, z2 );
654  bl1_dscals( &minus_inv_tau11, &beta );
656  m_ahead,
657  &beta,
658  a21, rs_A,
659  z2, inc_z );
661  m_ahead,
662  &inv_tau11,
663  z2, inc_z );
664 
665  // FLA_Dot( a12t, a21, dot_product );
666  // FLA_Scal( minus_inv_tau11, dot_product );
667  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
669  m_ahead,
670  a12t, cs_A,
671  a21, rs_A,
672  &dot_product );
673  bl1_dscals( &minus_inv_tau11, &dot_product );
675  m_ahead,
676  &dot_product,
677  a21, rs_A,
678  a12t, cs_A );
679 
680  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
681  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
684  m_behind,
685  n_ahead,
686  buff_1,
687  A02, rs_A, cs_A,
688  a21, rs_A,
689  buff_0,
690  y0, inc_y );
693  m_behind,
694  n_ahead,
695  &minus_inv_tau11,
696  y0, inc_y,
697  a21, rs_A,
698  A02, rs_A, cs_A );
699 
700  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
703  m_ahead,
704  n_behind,
705  buff_1,
706  A20, rs_A, cs_A,
707  a21, rs_A,
708  buff_0,
709  t01, rs_T );
710 
711  // FLA_Copy( first_elem, a21_t );
712  *a21_t = first_elem;
713  }
714 
715  if ( m_behind + 1 == b_alg && m_ahead > 0 )
716  {
717  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
718  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
719  FLA_Fused_Gerc2_opd_var1( m_ahead,
720  n_ahead,
721  buff_m1,
722  u2, inc_u,
723  y2, inc_y,
724  z2, inc_z,
725  u2, inc_u,
726  A22, rs_A, cs_A );
727  }
728 
729  /*------------------------------------------------------------*/
730 
731  }
732 
733  // FLA_Obj_free( &u );
734  // FLA_Obj_free( &y );
735  // FLA_Obj_free( &z );
736  // FLA_Obj_free( &v );
737  // FLA_Obj_free( &w );
738  FLA_free( buff_u );
739  FLA_free( buff_y );
740  FLA_free( buff_z );
741  FLA_free( buff_v );
742  FLA_free( buff_w );
743 
744  return FLA_SUCCESS;
745 }
FLA_Error FLA_Fused_Gerc2_Ahx_Ax_opd_var1(int m_A, int n_A, double *buff_alpha, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z, double *buff_A, int rs_A, int cs_A, double *buff_x, int inc_x, double *buff_v, int inc_v, double *buff_w, int inc_w)
Definition: FLA_Fused_Gerc2_Ahx_Ax_opt_var1.c:248
void bl1_dcopyv(conj1_t conj, int m, double *x, int incx, double *y, int incy)
Definition: bl1_copyv.c:42

References bl1_daxpyv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dger(), bl1_dscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_opd_var1(), FLA_Fused_Gerc2_Ahx_Ax_opd_var1(), FLA_Fused_Gerc2_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, i, psi1, upsilon1, and zeta1.

Referenced by FLA_Hess_UT_step_ofu_var3().

◆ FLA_Hess_UT_step_ofd_var4()

FLA_Error FLA_Hess_UT_step_ofd_var4 ( int  m_A,
int  m_T,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_Y,
int  rs_Y,
int  cs_Y,
double *  buff_Z,
int  rs_Z,
int  cs_Z,
double *  buff_T,
int  rs_T,
int  cs_T 
)
413 {
414  double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
415  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
416  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
417  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
418 
419  double first_elem, last_elem;
420  double dot_product;
421  double beta, conj_beta;
422  double inv_tau11;
423  double minus_inv_tau11;
424  int i;
425 
426  // b_alg = FLA_Obj_length( T );
427  int b_alg = m_T;
428 
429  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
430  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &e );
431  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
432  double* buff_e = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
433  int inc_e = 1;
434 
435  // FLA_Set( FLA_ZERO, Y );
436  // FLA_Set( FLA_ZERO, Z );
437  bl1_dsetm( m_A,
438  b_alg,
439  buff_0,
440  buff_Y, rs_Y, cs_Y );
441  bl1_dsetm( m_A,
442  b_alg,
443  buff_0,
444  buff_Z, rs_Z, cs_Z );
445 
446  for ( i = 0; i < b_alg; ++i )
447  {
448  double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
449  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
450  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
451  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
452  double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
453  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
454  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
455 
456  double* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
457  double* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
458  double* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
459 
460  double* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
461  double* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
462  double* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
463 
464  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
465  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
466 
467  double* e0 = buff_e + (0 )*inc_e;
468 
469  double* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
470 
471  double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
472  double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
473 
474  double* ABL = a10t;
475  double* ZBL = z10t;
476 
477  double* a2 = alpha11;
478 
479  int m_ahead = m_A - i - 1;
480  int n_ahead = m_A - i - 1;
481  int m_behind = i;
482  int n_behind = i;
483 
484  /*------------------------------------------------------------*/
485 
486  if ( m_behind > 0 )
487  {
488  // FLA_Copy( a10t_r, last_elem );
489  // FLA_Set( FLA_ONE, a10t_r );
490  last_elem = *a10t_r;
491  *a10t_r = *buff_1;
492  }
493 
494  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
495  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
498  m_ahead + 1,
499  n_behind,
500  buff_m1,
501  ABL, rs_A, cs_A,
502  y10t, cs_Y,
503  buff_1,
504  a2, rs_A );
507  m_ahead + 1,
508  n_behind,
509  buff_m1,
510  ZBL, rs_Z, cs_Z,
511  a10t, cs_A,
512  buff_1,
513  a2, rs_A );
514 
515  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
516  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, A20, z10t, FLA_ONE, a12t );
519  m_ahead,
520  n_behind,
521  buff_m1,
522  Y20, rs_Y, cs_Y,
523  a10t, cs_A,
524  buff_1,
525  a12t, cs_A );
528  m_ahead,
529  n_behind,
530  buff_m1,
531  A20, rs_A, cs_A,
532  z10t, cs_Z,
533  buff_1,
534  a12t, cs_A );
535 
536  if ( m_behind > 0 )
537  {
538  // FLA_Copy( last_elem, a10t_r );
539  *a10t_r = last_elem;
540  }
541 
542  if ( m_ahead > 0 )
543  {
544  // FLA_Househ2_UT( FLA_LEFT,
545  // a21_t,
546  // a21_b, tau11 );
547  FLA_Househ2_UT_l_opd( m_ahead - 1,
548  a21_t,
549  a21_b, rs_A,
550  tau11 );
551 
552  // FLA_Set( FLA_ONE, inv_tau11 );
553  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
554  // FLA_Copy( inv_tau11, minus_inv_tau11 );
555  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
556  bl1_ddiv3( buff_1, tau11, &inv_tau11 );
557  bl1_dneg2( &inv_tau11, &minus_inv_tau11 );
558 
559  // FLA_Copy( a21_t, first_elem );
560  // FLA_Set( FLA_ONE, a21_t );
561  first_elem = *a21_t;
562  *a21_t = *buff_1;
563 
564  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y21 );
565  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z21 );
566  FLA_Fused_Ahx_Ax_opd_var1( m_ahead,
567  n_ahead,
568  A22, rs_A, cs_A,
569  a21, rs_A,
570  y21, rs_Y,
571  z21, rs_Z );
572 
573  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d0 );
574  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, a21, FLA_ZERO, e0 );
575  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f0 );
576  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
577  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, y21 );
578  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, e0, FLA_ONE, z21 );
579  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d0, FLA_ONE, z21 );
580  // FLA_Copy( d0, t01 );
582  n_behind,
583  buff_m1,
584  A20, rs_A, cs_A,
585  Y20, rs_Y, cs_Y,
586  Z20, rs_Z, cs_Z,
587  t01, rs_T,
588  a21, rs_A,
589  y21, rs_Y,
590  z21, rs_Z );
591 
592  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
593  // FLA_Inv_scal( FLA_TWO, beta );
594  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
596  m_ahead,
597  a21, rs_A,
598  z21, rs_Z,
599  &beta );
600  bl1_dinvscals( buff_2, &beta );
601  bl1_dcopyconj( &beta, &conj_beta );
602 
603  // FLA_Scal( minus_inv_tau11, conj_beta );
604  // FLA_Axpy( conj_beta, a21, y21 );
605  // FLA_Scal( inv_tau11, y21 );
606  bl1_dscals( &minus_inv_tau11, &conj_beta );
608  m_ahead,
609  &conj_beta,
610  a21, rs_A,
611  y21, rs_Y );
613  m_ahead,
614  &inv_tau11,
615  y21, rs_Y );
616 
617  // FLA_Scal( minus_inv_tau11, beta );
618  // FLA_Axpy( beta, a21, z21 );
619  // FLA_Scal( inv_tau11, z21 );
620  bl1_dscals( &minus_inv_tau11, &beta );
622  m_ahead,
623  &beta,
624  a21, rs_A,
625  z21, rs_Z );
627  m_ahead,
628  &inv_tau11,
629  z21, rs_Z );
630 
631  // FLA_Dot( a12t, a21, dot_product );
632  // FLA_Scal( minus_inv_tau11, dot_product );
633  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
635  m_ahead,
636  a12t, cs_A,
637  a21, rs_A,
638  &dot_product );
639  bl1_dscals( &minus_inv_tau11, &dot_product );
641  m_ahead,
642  &dot_product,
643  a21, rs_A,
644  a12t, cs_A );
645 
646  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, e0 );
647  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, e0, a21, A02 );
650  m_behind,
651  n_ahead,
652  buff_1,
653  A02, rs_A, cs_A,
654  a21, rs_A,
655  buff_0,
656  e0, inc_e );
659  m_behind,
660  n_ahead,
661  &minus_inv_tau11,
662  e0, inc_e,
663  a21, rs_A,
664  A02, rs_A, cs_A );
665 
666  // FLA_Copy( first_elem, a21_t );
667  *a21_t = first_elem;
668  }
669 
670  /*------------------------------------------------------------*/
671 
672  }
673 
674  // FLA_Obj_free( &e );
675  FLA_free( buff_e );
676 
677  return FLA_SUCCESS;
678 }
FLA_Error FLA_Fused_Uhu_Yhu_Zhu_opd_var1(int m_U, int n_U, double *buff_delta, double *buff_U, int rs_U, int cs_U, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_t, int inc_t, double *buff_u, int inc_u, double *buff_y, int inc_y, double *buff_z, int inc_z)
Definition: FLA_Fused_Uhu_Yhu_Zhu_opt_var1.c:270
void bl1_dsetm(int m, int n, double *sigma, double *a, int a_rs, int a_cs)
Definition: bl1_setm.c:45

References bl1_daxpyv(), bl1_ddot(), bl1_dgemv(), bl1_dger(), bl1_dscalv(), bl1_dsetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_opd_var1(), FLA_Fused_Uhu_Yhu_Zhu_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_ofu_var4().

◆ FLA_Hess_UT_step_ofs_var1()

FLA_Error FLA_Hess_UT_step_ofs_var1 ( int  m_A,
int  m_T,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_T,
int  rs_T,
int  cs_T 
)

◆ FLA_Hess_UT_step_ofs_var2()

FLA_Error FLA_Hess_UT_step_ofs_var2 ( int  m_A,
int  m_T,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_T,
int  rs_T,
int  cs_T 
)
101 {
102  float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
103  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
104  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
105  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
106 
107  float first_elem;
108  float dot_product;
109  float beta, conj_beta;
110  float inv_tau11;
111  float minus_inv_tau11;
112  int i;
113 
114  // b_alg = FLA_Obj_length( T );
115  int b_alg = m_T;
116 
117  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
118  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
119  float* buff_y = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
120  float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
121  int inc_y = 1;
122  int inc_z = 1;
123 
124  for ( i = 0; i < b_alg; ++i )
125  {
126  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
127  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
128  float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
129  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
130  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
131 
132  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
133  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
134 
135  float* y0 = buff_y + (0 )*inc_y;
136  float* y2 = buff_y + (i+1)*inc_y;
137 
138  float* z2 = buff_z + (i+1)*inc_z;
139 
140  float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
141  float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
142 
143  int m_ahead = m_A - i - 1;
144  int n_ahead = m_A - i - 1;
145  int m_behind = i;
146  int n_behind = i;
147 
148  /*------------------------------------------------------------*/
149 
150  if ( m_ahead > 0 )
151  {
152  // FLA_Househ2_UT( FLA_LEFT,
153  // a21_t,
154  // a21_b, tau11 );
155  FLA_Househ2_UT_l_ops( m_ahead - 1,
156  a21_t,
157  a21_b, rs_A,
158  tau11 );
159 
160  // FLA_Set( FLA_ONE, inv_tau11 );
161  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
162  // FLA_Copy( inv_tau11, minus_inv_tau11 );
163  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
164  bl1_sdiv3( buff_1, tau11, &inv_tau11 );
165  bl1_sneg2( &inv_tau11, &minus_inv_tau11 );
166 
167  // FLA_Copy( a21_t, first_elem );
168  // FLA_Set( FLA_ONE, a21_t );
169  first_elem = *a21_t;
170  *a21_t = *buff_1;
171 
172  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y2 );
173  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z2 );
174  FLA_Fused_Ahx_Ax_ops_var1( m_ahead,
175  n_ahead,
176  A22, rs_A, cs_A,
177  a21, rs_A,
178  y2, inc_y,
179  z2, inc_z );
180 
181  // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
182  // FLA_Inv_scal( FLA_TWO, beta );
183  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
185  m_ahead,
186  a21, rs_A,
187  z2, inc_z,
188  &beta );
189  bl1_sinvscals( buff_2, &beta );
190  bl1_scopyconj( &beta, &conj_beta );
191 
192  // FLA_Scal( minus_inv_tau11, conj_beta );
193  // FLA_Axpy( conj_beta, a21, y2 );
194  // FLA_Scal( inv_tau11, y2 );
195  bl1_sscals( &minus_inv_tau11, &conj_beta );
197  m_ahead,
198  &conj_beta,
199  a21, rs_A,
200  y2, inc_y );
202  m_ahead,
203  &inv_tau11,
204  y2, inc_y );
205 
206  // FLA_Scal( minus_inv_tau11, beta );
207  // FLA_Axpy( beta, a21, z2 );
208  // FLA_Scal( inv_tau11, z2 );
209  bl1_sscals( &minus_inv_tau11, &beta );
211  m_ahead,
212  &beta,
213  a21, rs_A,
214  z2, inc_z );
216  m_ahead,
217  &inv_tau11,
218  z2, inc_z );
219 
220  // FLA_Dot( a12t, a21, dot_product );
221  // FLA_Scal( minus_inv_tau11, dot_product );
222  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
224  m_ahead,
225  a12t, cs_A,
226  a21, rs_A,
227  &dot_product );
228  bl1_sscals( &minus_inv_tau11, &dot_product );
230  m_ahead,
231  &dot_product,
232  a21, rs_A,
233  a12t, cs_A );
234 
235  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
236  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
239  m_behind,
240  n_ahead,
241  buff_1,
242  A02, rs_A, cs_A,
243  a21, rs_A,
244  buff_0,
245  y0, inc_y );
248  m_behind,
249  n_ahead,
250  &minus_inv_tau11,
251  y0, inc_y,
252  a21, rs_A,
253  A02, rs_A, cs_A );
254 
255  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y2, A22 );
256  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, a21, A22 );
257  FLA_Fused_Gerc2_ops_var1( m_ahead,
258  n_ahead,
259  buff_m1,
260  a21, rs_A,
261  y2, inc_y,
262  z2, inc_z,
263  a21, rs_A,
264  A22, rs_A, cs_A );
265 
266  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
269  m_ahead,
270  n_behind,
271  buff_1,
272  A20, rs_A, cs_A,
273  a21, rs_A,
274  buff_0,
275  t01, rs_T );
276 
277  // FLA_Copy( first_elem, a21_t );
278  *a21_t = first_elem;
279  }
280 
281  /*------------------------------------------------------------*/
282 
283  }
284 
285  // FLA_Obj_free( &y );
286  // FLA_Obj_free( &z );
287  FLA_free( buff_y );
288  FLA_free( buff_z );
289 
290  return FLA_SUCCESS;
291 }
FLA_Error FLA_Fused_Gerc2_ops_var1(int m_A, int n_A, float *buff_alpha, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z, float *buff_v, int inc_v, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Fused_Gerc2_opt_var1.c:130
FLA_Error FLA_Fused_Ahx_Ax_ops_var1(int m_A, int n_A, float *buff_A, int rs_A, int cs_A, float *buff_x, int inc_x, float *buff_v, int inc_v, float *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Ax_opt_var1.c:116
FLA_Error FLA_Househ2_UT_l_ops(int m_x2, float *chi_1, float *x2, int inc_x2, float *tau)
Definition: FLA_Househ2_UT.c:160
void bl1_sgemv(trans1_t transa, conj1_t conjx, int m, int n, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_gemv.c:13
void bl1_sger(conj1_t conjx, conj1_t conjy, int m, int n, float *alpha, float *x, int incx, float *y, int incy, float *a, int a_rs, int a_cs)
Definition: bl1_ger.c:13
void bl1_sscalv(conj1_t conj, int n, float *alpha, float *x, int incx)
Definition: bl1_scalv.c:13

References bl1_saxpyv(), bl1_sdot(), bl1_sgemv(), bl1_sger(), bl1_sscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_ops_var1(), FLA_Fused_Gerc2_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_ofu_var2().

◆ FLA_Hess_UT_step_ofs_var3()

FLA_Error FLA_Hess_UT_step_ofs_var3 ( int  m_A,
int  m_T,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_T,
int  rs_T,
int  cs_T 
)
101 {
102  float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
103  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
104  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
105  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
106 
107  float first_elem;
108  float dot_product;
109  float beta, conj_beta;
110  float inv_tau11;
111  float minus_inv_tau11;
112  float minus_upsilon1, minus_conj_upsilon1;
113  float minus_psi1, minus_conj_psi1;
114  float minus_zeta1;
115  int i;
116 
117  // b_alg = FLA_Obj_length( T );
118  int b_alg = m_T;
119 
120  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
121  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
122  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
123  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &v );
124  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
125  float* buff_u = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
126  float* buff_y = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
127  float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
128  float* buff_v = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
129  float* buff_w = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
130  int inc_u = 1;
131  int inc_y = 1;
132  int inc_z = 1;
133  int inc_v = 1;
134  int inc_w = 1;
135 
136  for ( i = 0; i < b_alg; ++i )
137  {
138  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
139  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
140  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
141  float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
142  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
143  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
144 
145  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
146  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
147 
148  float* upsilon1 = buff_u + (i )*inc_u;
149  float* u2 = buff_u + (i+1)*inc_u;
150 
151  float* y0 = buff_y + (0 )*inc_y;
152  float* psi1 = buff_y + (i )*inc_y;
153  float* y2 = buff_y + (i+1)*inc_y;
154 
155  float* zeta1 = buff_z + (i )*inc_z;
156  float* z2 = buff_z + (i+1)*inc_z;
157 
158  float* v2 = buff_v + (i+1)*inc_v;
159 
160  float* w2 = buff_w + (i+1)*inc_w;
161 
162  float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
163  float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
164 
165  int m_ahead = m_A - i - 1;
166  int n_ahead = m_A - i - 1;
167  int m_behind = i;
168  int n_behind = i;
169 
170  /*------------------------------------------------------------*/
171 
172  if ( m_behind > 0 )
173  {
174  // FLA_Copy( upsilon1, minus_upsilon1 );
175  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon1 );
176  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, minus_conj_upsilon1 );
177  bl1_smult3( buff_m1, upsilon1, &minus_upsilon1 );
178  bl1_scopyconj( &minus_upsilon1, &minus_conj_upsilon1 );
179 
180  // FLA_Copy( psi1, minus_psi1 );
181  // FLA_Scal( FLA_MINUS_ONE, minus_psi1 );
182  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_psi1, minus_conj_psi1 );
183  bl1_smult3( buff_m1, psi1, &minus_psi1 );
184  bl1_scopyconj( &minus_psi1, &minus_conj_psi1 );
185 
186  // FLA_Copy( zeta1, minus_zeta1 );
187  // FLA_Scal( FLA_MINUS_ONE, minus_zeta1 );
188  bl1_smult3( buff_m1, zeta1, &minus_zeta1 );
189 
190  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, psi1, alpha11 );
191  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta1, upsilon1, alpha11 );
193  1,
194  &minus_upsilon1,
195  psi1, 1,
196  alpha11, 1 );
198  1,
199  &minus_zeta1,
200  upsilon1, 1,
201  alpha11, 1 );
202 
203  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon1, y2, a12t );
204  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta1, u2, a12t );
206  m_ahead,
207  &minus_upsilon1,
208  y2, inc_y,
209  a12t, cs_A );
211  m_ahead,
212  &minus_zeta1,
213  u2, inc_u,
214  a12t, cs_A );
215 
216  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi1, u2, a21 );
217  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon1, z2, a21 );
219  m_ahead,
220  &minus_conj_psi1,
221  u2, inc_u,
222  a21, rs_A );
224  m_ahead,
225  &minus_conj_upsilon1,
226  z2, inc_z,
227  a21, rs_A );
228  }
229 
230  if ( m_ahead > 0 )
231  {
232  // FLA_Househ2_UT( FLA_LEFT,
233  // a21_t,
234  // a21_b, tau11 );
235  FLA_Househ2_UT_l_ops( m_ahead - 1,
236  a21_t,
237  a21_b, rs_A,
238  tau11 );
239 
240  // FLA_Set( FLA_ONE, inv_tau11 );
241  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
242  // FLA_Copy( inv_tau11, minus_inv_tau11 );
243  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
244  bl1_sdiv3( buff_1, tau11, &inv_tau11 );
245  bl1_sneg2( &inv_tau11, &minus_inv_tau11 );
246 
247  // FLA_Copy( a21_t, first_elem );
248  // FLA_Set( FLA_ONE, a21_t );
249  first_elem = *a21_t;
250  *a21_t = *buff_1;
251  }
252 
253  if ( m_behind > 0 && m_ahead > 0 )
254  {
255  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
256  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
257  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
258  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
260  n_ahead,
261  buff_m1,
262  u2, inc_u,
263  y2, inc_y,
264  z2, inc_z,
265  A22, rs_A, cs_A,
266  a21, rs_A,
267  v2, inc_v,
268  w2, inc_w );
269  }
270  else if ( m_ahead > 0 )
271  {
272  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
273  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
274  FLA_Fused_Ahx_Ax_ops_var1( m_ahead,
275  n_ahead,
276  A22, rs_A, cs_A,
277  a21, rs_A,
278  v2, inc_v,
279  w2, inc_w );
280  }
281 
282  if ( m_ahead > 0 )
283  {
284  // FLA_Copy( a21, u2 );
285  // FLA_Copy( v2, y2 );
286  // FLA_Copy( w2, z2 );
288  m_ahead,
289  a21, rs_A,
290  u2, inc_u );
292  m_ahead,
293  v2, inc_v,
294  y2, inc_y );
296  m_ahead,
297  w2, inc_w,
298  z2, inc_z );
299 
300  // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
301  // FLA_Inv_scal( FLA_TWO, beta );
302  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
304  m_ahead,
305  a21, rs_A,
306  z2, inc_z,
307  &beta );
308  bl1_sinvscals( buff_2, &beta );
309  bl1_scopyconj( &beta, &conj_beta );
310 
311  // FLA_Scal( minus_inv_tau11, conj_beta );
312  // FLA_Axpy( conj_beta, a21, y2 );
313  // FLA_Scal( inv_tau11, y2 );
314  bl1_sscals( &minus_inv_tau11, &conj_beta );
316  m_ahead,
317  &conj_beta,
318  a21, rs_A,
319  y2, inc_y );
321  m_ahead,
322  &inv_tau11,
323  y2, inc_y );
324 
325  // FLA_Scal( minus_inv_tau11, beta );
326  // FLA_Axpy( beta, a21, z2 );
327  // FLA_Scal( inv_tau11, z2 );
328  bl1_sscals( &minus_inv_tau11, &beta );
330  m_ahead,
331  &beta,
332  a21, rs_A,
333  z2, inc_z );
335  m_ahead,
336  &inv_tau11,
337  z2, inc_z );
338 
339  // FLA_Dot( a12t, a21, dot_product );
340  // FLA_Scal( minus_inv_tau11, dot_product );
341  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
343  m_ahead,
344  a12t, cs_A,
345  a21, rs_A,
346  &dot_product );
347  bl1_sscals( &minus_inv_tau11, &dot_product );
349  m_ahead,
350  &dot_product,
351  a21, rs_A,
352  a12t, cs_A );
353 
354  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
355  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
358  m_behind,
359  n_ahead,
360  buff_1,
361  A02, rs_A, cs_A,
362  a21, rs_A,
363  buff_0,
364  y0, inc_y );
367  m_behind,
368  n_ahead,
369  &minus_inv_tau11,
370  y0, inc_y,
371  a21, rs_A,
372  A02, rs_A, cs_A );
373 
374  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
377  m_ahead,
378  n_behind,
379  buff_1,
380  A20, rs_A, cs_A,
381  a21, rs_A,
382  buff_0,
383  t01, rs_T );
384 
385  // FLA_Copy( first_elem, a21_t );
386  *a21_t = first_elem;
387  }
388 
389  if ( m_behind + 1 == b_alg && m_ahead > 0 )
390  {
391  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
392  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
393  FLA_Fused_Gerc2_ops_var1( m_ahead,
394  n_ahead,
395  buff_m1,
396  u2, inc_u,
397  y2, inc_y,
398  z2, inc_z,
399  u2, inc_u,
400  A22, rs_A, cs_A );
401  }
402 
403  /*------------------------------------------------------------*/
404 
405  }
406 
407  // FLA_Obj_free( &u );
408  // FLA_Obj_free( &y );
409  // FLA_Obj_free( &z );
410  // FLA_Obj_free( &v );
411  // FLA_Obj_free( &w );
412  FLA_free( buff_u );
413  FLA_free( buff_y );
414  FLA_free( buff_z );
415  FLA_free( buff_v );
416  FLA_free( buff_w );
417 
418  return FLA_SUCCESS;
419 }
FLA_Error FLA_Fused_Gerc2_Ahx_Ax_ops_var1(int m_A, int n_A, float *buff_alpha, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z, float *buff_A, int rs_A, int cs_A, float *buff_x, int inc_x, float *buff_v, int inc_v, float *buff_w, int inc_w)
Definition: FLA_Fused_Gerc2_Ahx_Ax_opt_var1.c:150
void bl1_scopyv(conj1_t conj, int m, float *x, int incx, float *y, int incy)
Definition: bl1_copyv.c:35

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sger(), bl1_sscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_ops_var1(), FLA_Fused_Gerc2_Ahx_Ax_ops_var1(), FLA_Fused_Gerc2_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, i, psi1, upsilon1, and zeta1.

Referenced by FLA_Hess_UT_step_ofu_var3().

◆ FLA_Hess_UT_step_ofs_var4()

FLA_Error FLA_Hess_UT_step_ofs_var4 ( int  m_A,
int  m_T,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_Y,
int  rs_Y,
int  cs_Y,
float *  buff_Z,
int  rs_Z,
int  cs_Z,
float *  buff_T,
int  rs_T,
int  cs_T 
)
138 {
139  float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
140  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
141  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
142  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
143 
144  float first_elem, last_elem;
145  float dot_product;
146  float beta, conj_beta;
147  float inv_tau11;
148  float minus_inv_tau11;
149  int i;
150 
151  // b_alg = FLA_Obj_length( T );
152  int b_alg = m_T;
153 
154  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
155  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &e );
156  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
157  float* buff_e = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
158  int inc_e = 1;
159 
160  // FLA_Set( FLA_ZERO, Y );
161  // FLA_Set( FLA_ZERO, Z );
162  bl1_ssetm( m_A,
163  b_alg,
164  buff_0,
165  buff_Y, rs_Y, cs_Y );
166  bl1_ssetm( m_A,
167  b_alg,
168  buff_0,
169  buff_Z, rs_Z, cs_Z );
170 
171  for ( i = 0; i < b_alg; ++i )
172  {
173  float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
174  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
175  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
176  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
177  float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
178  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
179  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
180 
181  float* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
182  float* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
183  float* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
184 
185  float* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
186  float* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
187  float* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
188 
189  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
190  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
191 
192  float* e0 = buff_e + (0 )*inc_e;
193 
194  float* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
195 
196  float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
197  float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
198 
199  float* ABL = a10t;
200  float* ZBL = z10t;
201 
202  float* a2 = alpha11;
203 
204  int m_ahead = m_A - i - 1;
205  int n_ahead = m_A - i - 1;
206  int m_behind = i;
207  int n_behind = i;
208 
209  /*------------------------------------------------------------*/
210 
211  if ( m_behind > 0 )
212  {
213  // FLA_Copy( a10t_r, last_elem );
214  // FLA_Set( FLA_ONE, a10t_r );
215  last_elem = *a10t_r;
216  *a10t_r = *buff_1;
217  }
218 
219  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
220  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
223  m_ahead + 1,
224  n_behind,
225  buff_m1,
226  ABL, rs_A, cs_A,
227  y10t, cs_Y,
228  buff_1,
229  a2, rs_A );
232  m_ahead + 1,
233  n_behind,
234  buff_m1,
235  ZBL, rs_Z, cs_Z,
236  a10t, cs_A,
237  buff_1,
238  a2, rs_A );
239 
240  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
241  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, A20, z10t, FLA_ONE, a12t );
244  m_ahead,
245  n_behind,
246  buff_m1,
247  Y20, rs_Y, cs_Y,
248  a10t, cs_A,
249  buff_1,
250  a12t, cs_A );
253  m_ahead,
254  n_behind,
255  buff_m1,
256  A20, rs_A, cs_A,
257  z10t, cs_Z,
258  buff_1,
259  a12t, cs_A );
260 
261  if ( m_behind > 0 )
262  {
263  // FLA_Copy( last_elem, a10t_r );
264  *a10t_r = last_elem;
265  }
266 
267  if ( m_ahead > 0 )
268  {
269  // FLA_Househ2_UT( FLA_LEFT,
270  // a21_t,
271  // a21_b, tau11 );
272  FLA_Househ2_UT_l_ops( m_ahead - 1,
273  a21_t,
274  a21_b, rs_A,
275  tau11 );
276 
277  // FLA_Set( FLA_ONE, inv_tau11 );
278  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
279  // FLA_Copy( inv_tau11, minus_inv_tau11 );
280  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
281  bl1_sdiv3( buff_1, tau11, &inv_tau11 );
282  bl1_sneg2( &inv_tau11, &minus_inv_tau11 );
283 
284  // FLA_Copy( a21_t, first_elem );
285  // FLA_Set( FLA_ONE, a21_t );
286  first_elem = *a21_t;
287  *a21_t = *buff_1;
288 
289  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y21 );
290  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z21 );
291  FLA_Fused_Ahx_Ax_ops_var1( m_ahead,
292  n_ahead,
293  A22, rs_A, cs_A,
294  a21, rs_A,
295  y21, rs_Y,
296  z21, rs_Z );
297 
298  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d0 );
299  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, a21, FLA_ZERO, e0 );
300  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f0 );
301  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
302  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, y21 );
303  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, e0, FLA_ONE, z21 );
304  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d0, FLA_ONE, z21 );
305  // FLA_Copy( d0, t01 );
307  n_behind,
308  buff_m1,
309  A20, rs_A, cs_A,
310  Y20, rs_Y, cs_Y,
311  Z20, rs_Z, cs_Z,
312  t01, rs_T,
313  a21, rs_A,
314  y21, rs_Y,
315  z21, rs_Z );
316 
317  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
318  // FLA_Inv_scal( FLA_TWO, beta );
319  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
321  m_ahead,
322  a21, rs_A,
323  z21, rs_Z,
324  &beta );
325  bl1_sinvscals( buff_2, &beta );
326  bl1_scopyconj( &beta, &conj_beta );
327 
328  // FLA_Scal( minus_inv_tau11, conj_beta );
329  // FLA_Axpy( conj_beta, a21, y21 );
330  // FLA_Scal( inv_tau11, y21 );
331  bl1_sscals( &minus_inv_tau11, &conj_beta );
333  m_ahead,
334  &conj_beta,
335  a21, rs_A,
336  y21, rs_Y );
338  m_ahead,
339  &inv_tau11,
340  y21, rs_Y );
341 
342  // FLA_Scal( minus_inv_tau11, beta );
343  // FLA_Axpy( beta, a21, z21 );
344  // FLA_Scal( inv_tau11, z21 );
345  bl1_sscals( &minus_inv_tau11, &beta );
347  m_ahead,
348  &beta,
349  a21, rs_A,
350  z21, rs_Z );
352  m_ahead,
353  &inv_tau11,
354  z21, rs_Z );
355 
356  // FLA_Dot( a12t, a21, dot_product );
357  // FLA_Scal( minus_inv_tau11, dot_product );
358  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
360  m_ahead,
361  a12t, cs_A,
362  a21, rs_A,
363  &dot_product );
364  bl1_sscals( &minus_inv_tau11, &dot_product );
366  m_ahead,
367  &dot_product,
368  a21, rs_A,
369  a12t, cs_A );
370 
371  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, e0 );
372  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, e0, a21, A02 );
375  m_behind,
376  n_ahead,
377  buff_1,
378  A02, rs_A, cs_A,
379  a21, rs_A,
380  buff_0,
381  e0, inc_e );
384  m_behind,
385  n_ahead,
386  &minus_inv_tau11,
387  e0, inc_e,
388  a21, rs_A,
389  A02, rs_A, cs_A );
390 
391  // FLA_Copy( first_elem, a21_t );
392  *a21_t = first_elem;
393  }
394 
395  /*------------------------------------------------------------*/
396 
397  }
398 
399  // FLA_Obj_free( &e );
400  FLA_free( buff_e );
401 
402  return FLA_SUCCESS;
403 }
FLA_Error FLA_Fused_Uhu_Yhu_Zhu_ops_var1(int m_U, int n_U, float *buff_delta, float *buff_U, int rs_U, int cs_U, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_t, int inc_t, float *buff_u, int inc_u, float *buff_y, int inc_y, float *buff_z, int inc_z)
Definition: FLA_Fused_Uhu_Yhu_Zhu_opt_var1.c:156
void bl1_ssetm(int m, int n, float *sigma, float *a, int a_rs, int a_cs)
Definition: bl1_setm.c:29

References bl1_saxpyv(), bl1_sdot(), bl1_sgemv(), bl1_sger(), bl1_sscalv(), bl1_ssetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_ops_var1(), FLA_Fused_Uhu_Yhu_Zhu_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_ofu_var4().

◆ FLA_Hess_UT_step_ofu_var1()

FLA_Error FLA_Hess_UT_step_ofu_var1 ( FLA_Obj  A,
FLA_Obj  T 
)

◆ FLA_Hess_UT_step_ofu_var2()

FLA_Error FLA_Hess_UT_step_ofu_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
19 {
20  FLA_Datatype datatype;
21  int m_A, m_T;
22  int rs_A, cs_A;
23  int rs_T, cs_T;
24 
25  datatype = FLA_Obj_datatype( A );
26 
27  m_A = FLA_Obj_length( A );
28  m_T = FLA_Obj_length( T );
29 
30  rs_A = FLA_Obj_row_stride( A );
31  cs_A = FLA_Obj_col_stride( A );
32 
33  rs_T = FLA_Obj_row_stride( T );
34  cs_T = FLA_Obj_col_stride( T );
35 
36 
37  switch ( datatype )
38  {
39  case FLA_FLOAT:
40  {
41  float* buff_A = FLA_FLOAT_PTR( A );
42  float* buff_T = FLA_FLOAT_PTR( T );
43 
45  m_T,
46  buff_A, rs_A, cs_A,
47  buff_T, rs_T, cs_T );
48 
49  break;
50  }
51 
52  case FLA_DOUBLE:
53  {
54  double* buff_A = FLA_DOUBLE_PTR( A );
55  double* buff_T = FLA_DOUBLE_PTR( T );
56 
58  m_T,
59  buff_A, rs_A, cs_A,
60  buff_T, rs_T, cs_T );
61 
62  break;
63  }
64 
65  case FLA_COMPLEX:
66  {
67  scomplex* buff_A = FLA_COMPLEX_PTR( A );
68  scomplex* buff_T = FLA_COMPLEX_PTR( T );
69 
71  m_T,
72  buff_A, rs_A, cs_A,
73  buff_T, rs_T, cs_T );
74 
75  break;
76  }
77 
78  case FLA_DOUBLE_COMPLEX:
79  {
80  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
81  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
82 
84  m_T,
85  buff_A, rs_A, cs_A,
86  buff_T, rs_T, cs_T );
87 
88  break;
89  }
90  }
91 
92  return FLA_SUCCESS;
93 }
FLA_Error FLA_Hess_UT_step_ofd_var2(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_fus_var2.c:295
FLA_Error FLA_Hess_UT_step_ofs_var2(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_fus_var2.c:97
FLA_Error FLA_Hess_UT_step_ofc_var2(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_fus_var2.c:493
FLA_Error FLA_Hess_UT_step_ofz_var2(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_fus_var2.c:691
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174

References FLA_Hess_UT_step_ofc_var2(), FLA_Hess_UT_step_ofd_var2(), FLA_Hess_UT_step_ofs_var2(), FLA_Hess_UT_step_ofz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), and FLA_Obj_row_stride().

Referenced by FLA_Hess_UT_blf_var2(), and FLA_Hess_UT_ofu_var2().

◆ FLA_Hess_UT_step_ofu_var3()

FLA_Error FLA_Hess_UT_step_ofu_var3 ( FLA_Obj  A,
FLA_Obj  T 
)
19 {
20  FLA_Datatype datatype;
21  int m_A, m_T;
22  int rs_A, cs_A;
23  int rs_T, cs_T;
24 
25  datatype = FLA_Obj_datatype( A );
26 
27  m_A = FLA_Obj_length( A );
28  m_T = FLA_Obj_length( T );
29 
30  rs_A = FLA_Obj_row_stride( A );
31  cs_A = FLA_Obj_col_stride( A );
32 
33  rs_T = FLA_Obj_row_stride( T );
34  cs_T = FLA_Obj_col_stride( T );
35 
36 
37  switch ( datatype )
38  {
39  case FLA_FLOAT:
40  {
41  float* buff_A = FLA_FLOAT_PTR( A );
42  float* buff_T = FLA_FLOAT_PTR( T );
43 
45  m_T,
46  buff_A, rs_A, cs_A,
47  buff_T, rs_T, cs_T );
48 
49  break;
50  }
51 
52  case FLA_DOUBLE:
53  {
54  double* buff_A = FLA_DOUBLE_PTR( A );
55  double* buff_T = FLA_DOUBLE_PTR( T );
56 
58  m_T,
59  buff_A, rs_A, cs_A,
60  buff_T, rs_T, cs_T );
61 
62  break;
63  }
64 
65  case FLA_COMPLEX:
66  {
67  scomplex* buff_A = FLA_COMPLEX_PTR( A );
68  scomplex* buff_T = FLA_COMPLEX_PTR( T );
69 
71  m_T,
72  buff_A, rs_A, cs_A,
73  buff_T, rs_T, cs_T );
74 
75  break;
76  }
77 
78  case FLA_DOUBLE_COMPLEX:
79  {
80  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
81  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
82 
84  m_T,
85  buff_A, rs_A, cs_A,
86  buff_T, rs_T, cs_T );
87 
88  break;
89  }
90  }
91 
92  return FLA_SUCCESS;
93 }
FLA_Error FLA_Hess_UT_step_ofs_var3(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_fus_var3.c:97
FLA_Error FLA_Hess_UT_step_ofz_var3(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_fus_var3.c:1075
FLA_Error FLA_Hess_UT_step_ofd_var3(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_fus_var3.c:423
FLA_Error FLA_Hess_UT_step_ofc_var3(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_fus_var3.c:749

References FLA_Hess_UT_step_ofc_var3(), FLA_Hess_UT_step_ofd_var3(), FLA_Hess_UT_step_ofs_var3(), FLA_Hess_UT_step_ofz_var3(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), and FLA_Obj_row_stride().

Referenced by FLA_Hess_UT_blf_var3(), and FLA_Hess_UT_ofu_var3().

◆ FLA_Hess_UT_step_ofu_var4()

FLA_Error FLA_Hess_UT_step_ofu_var4 ( FLA_Obj  A,
FLA_Obj  Y,
FLA_Obj  Z,
FLA_Obj  T 
)
30 {
31  FLA_Datatype datatype;
32  int m_A, m_T;
33  int rs_A, cs_A;
34  int rs_Y, cs_Y;
35  int rs_Z, cs_Z;
36  int rs_T, cs_T;
37 
38  datatype = FLA_Obj_datatype( A );
39 
40  m_A = FLA_Obj_length( A );
41  m_T = FLA_Obj_length( T );
42 
43  rs_A = FLA_Obj_row_stride( A );
44  cs_A = FLA_Obj_col_stride( A );
45 
46  rs_Y = FLA_Obj_row_stride( Y );
47  cs_Y = FLA_Obj_col_stride( Y );
48 
49  rs_Z = FLA_Obj_row_stride( Z );
50  cs_Z = FLA_Obj_col_stride( Z );
51 
52  rs_T = FLA_Obj_row_stride( T );
53  cs_T = FLA_Obj_col_stride( T );
54 
55 
56  switch ( datatype )
57  {
58  case FLA_FLOAT:
59  {
60  float* buff_A = FLA_FLOAT_PTR( A );
61  float* buff_Y = FLA_FLOAT_PTR( Y );
62  float* buff_Z = FLA_FLOAT_PTR( Z );
63  float* buff_T = FLA_FLOAT_PTR( T );
64 
66  m_T,
67  buff_A, rs_A, cs_A,
68  buff_Y, rs_Y, cs_Y,
69  buff_Z, rs_Z, cs_Z,
70  buff_T, rs_T, cs_T );
71 
72  break;
73  }
74 
75  case FLA_DOUBLE:
76  {
77  double* buff_A = FLA_DOUBLE_PTR( A );
78  double* buff_Y = FLA_DOUBLE_PTR( Y );
79  double* buff_Z = FLA_DOUBLE_PTR( Z );
80  double* buff_T = FLA_DOUBLE_PTR( T );
81 
83  m_T,
84  buff_A, rs_A, cs_A,
85  buff_Y, rs_Y, cs_Y,
86  buff_Z, rs_Z, cs_Z,
87  buff_T, rs_T, cs_T );
88 
89  break;
90  }
91 
92  case FLA_COMPLEX:
93  {
94  scomplex* buff_A = FLA_COMPLEX_PTR( A );
95  scomplex* buff_Y = FLA_COMPLEX_PTR( Y );
96  scomplex* buff_Z = FLA_COMPLEX_PTR( Z );
97  scomplex* buff_T = FLA_COMPLEX_PTR( T );
98 
100  m_T,
101  buff_A, rs_A, cs_A,
102  buff_Y, rs_Y, cs_Y,
103  buff_Z, rs_Z, cs_Z,
104  buff_T, rs_T, cs_T );
105 
106  break;
107  }
108 
109  case FLA_DOUBLE_COMPLEX:
110  {
111  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
112  dcomplex* buff_Y = FLA_DOUBLE_COMPLEX_PTR( Y );
113  dcomplex* buff_Z = FLA_DOUBLE_COMPLEX_PTR( Z );
114  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
115 
117  m_T,
118  buff_A, rs_A, cs_A,
119  buff_Y, rs_Y, cs_Y,
120  buff_Z, rs_Z, cs_Z,
121  buff_T, rs_T, cs_T );
122 
123  break;
124  }
125  }
126 
127  return FLA_SUCCESS;
128 }
FLA_Error FLA_Hess_UT_step_ofz_var4(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_fus_var4.c:957
FLA_Error FLA_Hess_UT_step_ofs_var4(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_fus_var4.c:132
FLA_Error FLA_Hess_UT_step_ofd_var4(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_fus_var4.c:407
FLA_Error FLA_Hess_UT_step_ofc_var4(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_fus_var4.c:682

References FLA_Hess_UT_step_ofc_var4(), FLA_Hess_UT_step_ofd_var4(), FLA_Hess_UT_step_ofs_var4(), FLA_Hess_UT_step_ofz_var4(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), and FLA_Obj_row_stride().

Referenced by FLA_Hess_UT_blf_var4(), and FLA_Hess_UT_ofu_var4().

◆ FLA_Hess_UT_step_ofz_var1()

FLA_Error FLA_Hess_UT_step_ofz_var1 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)

◆ FLA_Hess_UT_step_ofz_var2()

FLA_Error FLA_Hess_UT_step_ofz_var2 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
695 {
696  dcomplex* buff_2 = FLA_DOUBLE_COMPLEX_PTR( FLA_TWO );
697  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
698  dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
699  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
700 
701  dcomplex first_elem;
702  dcomplex dot_product;
703  dcomplex beta, conj_beta;
704  dcomplex inv_tau11;
705  dcomplex minus_inv_tau11;
706  int i;
707 
708  // b_alg = FLA_Obj_length( T );
709  int b_alg = m_T;
710 
711  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
712  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
713  dcomplex* buff_y = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
714  dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
715  int inc_y = 1;
716  int inc_z = 1;
717 
718  for ( i = 0; i < b_alg; ++i )
719  {
720  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
721  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
722  dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
723  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
724  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
725 
726  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
727  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
728 
729  dcomplex* y0 = buff_y + (0 )*inc_y;
730  dcomplex* y2 = buff_y + (i+1)*inc_y;
731 
732  dcomplex* z2 = buff_z + (i+1)*inc_z;
733 
734  dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
735  dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
736 
737  int m_ahead = m_A - i - 1;
738  int n_ahead = m_A - i - 1;
739  int m_behind = i;
740  int n_behind = i;
741 
742  /*------------------------------------------------------------*/
743 
744  if ( m_ahead > 0 )
745  {
746  // FLA_Househ2_UT( FLA_LEFT,
747  // a21_t,
748  // a21_b, tau11 );
749  FLA_Househ2_UT_l_opz( m_ahead - 1,
750  a21_t,
751  a21_b, rs_A,
752  tau11 );
753 
754  // FLA_Set( FLA_ONE, inv_tau11 );
755  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
756  // FLA_Copy( inv_tau11, minus_inv_tau11 );
757  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
758  bl1_zdiv3( buff_1, tau11, &inv_tau11 );
759  bl1_zneg2( &inv_tau11, &minus_inv_tau11 );
760 
761  // FLA_Copy( a21_t, first_elem );
762  // FLA_Set( FLA_ONE, a21_t );
763  first_elem = *a21_t;
764  *a21_t = *buff_1;
765 
766  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y2 );
767  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z2 );
768  FLA_Fused_Ahx_Ax_opz_var1( m_ahead,
769  n_ahead,
770  A22, rs_A, cs_A,
771  a21, rs_A,
772  y2, inc_y,
773  z2, inc_z );
774 
775  // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
776  // FLA_Inv_scal( FLA_TWO, beta );
777  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
779  m_ahead,
780  a21, rs_A,
781  z2, inc_z,
782  &beta );
783  bl1_zinvscals( buff_2, &beta );
784  bl1_zcopyconj( &beta, &conj_beta );
785 
786  // FLA_Scal( minus_inv_tau11, conj_beta );
787  // FLA_Axpy( conj_beta, a21, y2 );
788  // FLA_Scal( inv_tau11, y2 );
789  bl1_zscals( &minus_inv_tau11, &conj_beta );
791  m_ahead,
792  &conj_beta,
793  a21, rs_A,
794  y2, inc_y );
796  m_ahead,
797  &inv_tau11,
798  y2, inc_y );
799 
800  // FLA_Scal( minus_inv_tau11, beta );
801  // FLA_Axpy( beta, a21, z2 );
802  // FLA_Scal( inv_tau11, z2 );
803  bl1_zscals( &minus_inv_tau11, &beta );
805  m_ahead,
806  &beta,
807  a21, rs_A,
808  z2, inc_z );
810  m_ahead,
811  &inv_tau11,
812  z2, inc_z );
813 
814  // FLA_Dot( a12t, a21, dot_product );
815  // FLA_Scal( minus_inv_tau11, dot_product );
816  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
818  m_ahead,
819  a12t, cs_A,
820  a21, rs_A,
821  &dot_product );
822  bl1_zscals( &minus_inv_tau11, &dot_product );
824  m_ahead,
825  &dot_product,
826  a21, rs_A,
827  a12t, cs_A );
828 
829  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
830  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
833  m_behind,
834  n_ahead,
835  buff_1,
836  A02, rs_A, cs_A,
837  a21, rs_A,
838  buff_0,
839  y0, inc_y );
842  m_behind,
843  n_ahead,
844  &minus_inv_tau11,
845  y0, inc_y,
846  a21, rs_A,
847  A02, rs_A, cs_A );
848 
849  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y2, A22 );
850  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, a21, A22 );
851  FLA_Fused_Gerc2_opz_var1( m_ahead,
852  n_ahead,
853  buff_m1,
854  a21, rs_A,
855  y2, inc_y,
856  z2, inc_z,
857  a21, rs_A,
858  A22, rs_A, cs_A );
859 
860  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
863  m_ahead,
864  n_behind,
865  buff_1,
866  A20, rs_A, cs_A,
867  a21, rs_A,
868  buff_0,
869  t01, rs_T );
870 
871  // FLA_Copy( first_elem, a21_t );
872  *a21_t = first_elem;
873  }
874 
875  /*------------------------------------------------------------*/
876 
877  }
878 
879  // FLA_Obj_free( &y );
880  // FLA_Obj_free( &z );
881  FLA_free( buff_y );
882  FLA_free( buff_z );
883 
884  return FLA_SUCCESS;
885 }
FLA_Error FLA_Fused_Gerc2_opz_var1(int m_A, int n_A, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z, dcomplex *buff_v, int inc_v, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Fused_Gerc2_opt_var1.c:306
FLA_Error FLA_Fused_Ahx_Ax_opz_var1(int m_A, int n_A, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_x, int inc_x, dcomplex *buff_v, int inc_v, dcomplex *buff_w, int inc_w)
Definition: FLA_Fused_Ahx_Ax_opt_var1.c:307
FLA_Error FLA_Househ2_UT_l_opz(int m_x2, dcomplex *chi_1, dcomplex *x2, int inc_x2, dcomplex *tau)
Definition: FLA_Househ2_UT.c:521
void bl1_zaxpyv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_axpyv.c:60
void bl1_zdot(conj1_t conj, int n, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *rho)
Definition: bl1_dot.c:65
void bl1_zgemv(trans1_t transa, conj1_t conjx, int m, int n, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_gemv.c:255
void bl1_zger(conj1_t conjx, conj1_t conjy, int m, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_ger.c:194
void bl1_zscalv(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx)
Definition: bl1_scalv.c:72

References bl1_zaxpyv(), bl1_zdot(), bl1_zgemv(), bl1_zger(), bl1_zscals(), bl1_zscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_opz_var1(), FLA_Fused_Gerc2_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_ofu_var2().

◆ FLA_Hess_UT_step_ofz_var3()

FLA_Error FLA_Hess_UT_step_ofz_var3 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
1079 {
1080  dcomplex* buff_2 = FLA_DOUBLE_COMPLEX_PTR( FLA_TWO );
1081  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
1082  dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
1083  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
1084 
1085  dcomplex first_elem;
1086  dcomplex dot_product;
1087  dcomplex beta, conj_beta;
1088  dcomplex inv_tau11;
1089  dcomplex minus_inv_tau11;
1090  dcomplex minus_upsilon1, minus_conj_upsilon1;
1091  dcomplex minus_psi1, minus_conj_psi1;
1092  dcomplex minus_zeta1;
1093  int i;
1094 
1095  // b_alg = FLA_Obj_length( T );
1096  int b_alg = m_T;
1097 
1098  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
1099  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
1100  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
1101  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &v );
1102  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
1103  dcomplex* buff_u = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1104  dcomplex* buff_y = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1105  dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1106  dcomplex* buff_v = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1107  dcomplex* buff_w = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1108  int inc_u = 1;
1109  int inc_y = 1;
1110  int inc_z = 1;
1111  int inc_v = 1;
1112  int inc_w = 1;
1113 
1114  for ( i = 0; i < b_alg; ++i )
1115  {
1116  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1117  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1118  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1119  dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1120  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1121  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1122 
1123  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1124  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1125 
1126  dcomplex* upsilon1 = buff_u + (i )*inc_u;
1127  dcomplex* u2 = buff_u + (i+1)*inc_u;
1128 
1129  dcomplex* y0 = buff_y + (0 )*inc_y;
1130  dcomplex* psi1 = buff_y + (i )*inc_y;
1131  dcomplex* y2 = buff_y + (i+1)*inc_y;
1132 
1133  dcomplex* zeta1 = buff_z + (i )*inc_z;
1134  dcomplex* z2 = buff_z + (i+1)*inc_z;
1135 
1136  dcomplex* v2 = buff_v + (i+1)*inc_v;
1137 
1138  dcomplex* w2 = buff_w + (i+1)*inc_w;
1139 
1140  dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
1141  dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
1142 
1143  int m_ahead = m_A - i - 1;
1144  int n_ahead = m_A - i - 1;
1145  int m_behind = i;
1146  int n_behind = i;
1147 
1148  /*------------------------------------------------------------*/
1149 
1150  if ( m_behind > 0 )
1151  {
1152  // FLA_Copy( upsilon1, minus_upsilon1 );
1153  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon1 );
1154  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, minus_conj_upsilon1 );
1155  bl1_zmult3( buff_m1, upsilon1, &minus_upsilon1 );
1156  bl1_zcopyconj( &minus_upsilon1, &minus_conj_upsilon1 );
1157 
1158  // FLA_Copy( psi1, minus_psi1 );
1159  // FLA_Scal( FLA_MINUS_ONE, minus_psi1 );
1160  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_psi1, minus_conj_psi1 );
1161  bl1_zmult3( buff_m1, psi1, &minus_psi1 );
1162  bl1_zcopyconj( &minus_psi1, &minus_conj_psi1 );
1163 
1164  // FLA_Copy( zeta1, minus_zeta1 );
1165  // FLA_Scal( FLA_MINUS_ONE, minus_zeta1 );
1166  bl1_zmult3( buff_m1, zeta1, &minus_zeta1 );
1167 
1168  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, psi1, alpha11 );
1169  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta1, upsilon1, alpha11 );
1171  1,
1172  &minus_upsilon1,
1173  psi1, 1,
1174  alpha11, 1 );
1176  1,
1177  &minus_zeta1,
1178  upsilon1, 1,
1179  alpha11, 1 );
1180 
1181  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon1, y2, a12t );
1182  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta1, u2, a12t );
1184  m_ahead,
1185  &minus_upsilon1,
1186  y2, inc_y,
1187  a12t, cs_A );
1189  m_ahead,
1190  &minus_zeta1,
1191  u2, inc_u,
1192  a12t, cs_A );
1193 
1194  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi1, u2, a21 );
1195  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon1, z2, a21 );
1197  m_ahead,
1198  &minus_conj_psi1,
1199  u2, inc_u,
1200  a21, rs_A );
1202  m_ahead,
1203  &minus_conj_upsilon1,
1204  z2, inc_z,
1205  a21, rs_A );
1206  }
1207 
1208  if ( m_ahead > 0 )
1209  {
1210  // FLA_Househ2_UT( FLA_LEFT,
1211  // a21_t,
1212  // a21_b, tau11 );
1213  FLA_Househ2_UT_l_opz( m_ahead - 1,
1214  a21_t,
1215  a21_b, rs_A,
1216  tau11 );
1217 
1218  // FLA_Set( FLA_ONE, inv_tau11 );
1219  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
1220  // FLA_Copy( inv_tau11, minus_inv_tau11 );
1221  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
1222  bl1_zdiv3( buff_1, tau11, &inv_tau11 );
1223  bl1_zneg2( &inv_tau11, &minus_inv_tau11 );
1224 
1225  // FLA_Copy( a21_t, first_elem );
1226  // FLA_Set( FLA_ONE, a21_t );
1227  first_elem = *a21_t;
1228  *a21_t = *buff_1;
1229  }
1230 
1231  if ( m_behind > 0 && m_ahead > 0 )
1232  {
1233  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
1234  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
1235  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
1236  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
1238  n_ahead,
1239  buff_m1,
1240  u2, inc_u,
1241  y2, inc_y,
1242  z2, inc_z,
1243  A22, rs_A, cs_A,
1244  a21, rs_A,
1245  v2, inc_v,
1246  w2, inc_w );
1247  }
1248  else if ( m_ahead > 0 )
1249  {
1250  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
1251  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
1252  FLA_Fused_Ahx_Ax_opz_var1( m_ahead,
1253  n_ahead,
1254  A22, rs_A, cs_A,
1255  a21, rs_A,
1256  v2, inc_v,
1257  w2, inc_w );
1258  }
1259 
1260  if ( m_ahead > 0 )
1261  {
1262  // FLA_Copy( a21, u2 );
1263  // FLA_Copy( v2, y2 );
1264  // FLA_Copy( w2, z2 );
1266  m_ahead,
1267  a21, rs_A,
1268  u2, inc_u );
1270  m_ahead,
1271  v2, inc_v,
1272  y2, inc_y );
1274  m_ahead,
1275  w2, inc_w,
1276  z2, inc_z );
1277 
1278  // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
1279  // FLA_Inv_scal( FLA_TWO, beta );
1280  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
1282  m_ahead,
1283  a21, rs_A,
1284  z2, inc_z,
1285  &beta );
1286  bl1_zinvscals( buff_2, &beta );
1287  bl1_zcopyconj( &beta, &conj_beta );
1288 
1289  // FLA_Scal( minus_inv_tau11, conj_beta );
1290  // FLA_Axpy( conj_beta, a21, y2 );
1291  // FLA_Scal( inv_tau11, y2 );
1292  bl1_zscals( &minus_inv_tau11, &conj_beta );
1294  m_ahead,
1295  &conj_beta,
1296  a21, rs_A,
1297  y2, inc_y );
1299  m_ahead,
1300  &inv_tau11,
1301  y2, inc_y );
1302 
1303  // FLA_Scal( minus_inv_tau11, beta );
1304  // FLA_Axpy( beta, a21, z2 );
1305  // FLA_Scal( inv_tau11, z2 );
1306  bl1_zscals( &minus_inv_tau11, &beta );
1308  m_ahead,
1309  &beta,
1310  a21, rs_A,
1311  z2, inc_z );
1313  m_ahead,
1314  &inv_tau11,
1315  z2, inc_z );
1316 
1317  // FLA_Dot( a12t, a21, dot_product );
1318  // FLA_Scal( minus_inv_tau11, dot_product );
1319  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
1321  m_ahead,
1322  a12t, cs_A,
1323  a21, rs_A,
1324  &dot_product );
1325  bl1_zscals( &minus_inv_tau11, &dot_product );
1327  m_ahead,
1328  &dot_product,
1329  a21, rs_A,
1330  a12t, cs_A );
1331 
1332  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
1333  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
1336  m_behind,
1337  n_ahead,
1338  buff_1,
1339  A02, rs_A, cs_A,
1340  a21, rs_A,
1341  buff_0,
1342  y0, inc_y );
1345  m_behind,
1346  n_ahead,
1347  &minus_inv_tau11,
1348  y0, inc_y,
1349  a21, rs_A,
1350  A02, rs_A, cs_A );
1351 
1352  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
1355  m_ahead,
1356  n_behind,
1357  buff_1,
1358  A20, rs_A, cs_A,
1359  a21, rs_A,
1360  buff_0,
1361  t01, rs_T );
1362 
1363  // FLA_Copy( first_elem, a21_t );
1364  *a21_t = first_elem;
1365  }
1366 
1367  if ( m_behind + 1 == b_alg && m_ahead > 0 )
1368  {
1369  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
1370  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
1371  FLA_Fused_Gerc2_opz_var1( m_ahead,
1372  n_ahead,
1373  buff_m1,
1374  u2, inc_u,
1375  y2, inc_y,
1376  z2, inc_z,
1377  u2, inc_u,
1378  A22, rs_A, cs_A );
1379  }
1380 
1381  /*------------------------------------------------------------*/
1382 
1383  }
1384 
1385  // FLA_Obj_free( &u );
1386  // FLA_Obj_free( &y );
1387  // FLA_Obj_free( &z );
1388  // FLA_Obj_free( &v );
1389  // FLA_Obj_free( &w );
1390  FLA_free( buff_u );
1391  FLA_free( buff_y );
1392  FLA_free( buff_z );
1393  FLA_free( buff_v );
1394  FLA_free( buff_w );
1395 
1396  return FLA_SUCCESS;
1397 }
FLA_Error FLA_Fused_Gerc2_Ahx_Ax_opz_var1(int m_A, int n_A, dcomplex *buff_alpha, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_x, int inc_x, dcomplex *buff_v, int inc_v, dcomplex *buff_w, int inc_w)
Definition: FLA_Fused_Gerc2_Ahx_Ax_opt_var1.c:421
void bl1_zcopyv(conj1_t conj, int m, dcomplex *x, int incx, dcomplex *y, int incy)
Definition: bl1_copyv.c:63

References bl1_zaxpyv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zger(), bl1_zscals(), bl1_zscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_opz_var1(), FLA_Fused_Gerc2_Ahx_Ax_opz_var1(), FLA_Fused_Gerc2_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, i, psi1, upsilon1, and zeta1.

Referenced by FLA_Hess_UT_step_ofu_var3().

◆ FLA_Hess_UT_step_ofz_var4()

FLA_Error FLA_Hess_UT_step_ofz_var4 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_Y,
int  rs_Y,
int  cs_Y,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
963 {
964  dcomplex* buff_2 = FLA_DOUBLE_COMPLEX_PTR( FLA_TWO );
965  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
966  dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
967  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
968 
969  dcomplex first_elem, last_elem;
970  dcomplex dot_product;
971  dcomplex beta, conj_beta;
972  dcomplex inv_tau11;
973  dcomplex minus_inv_tau11;
974  int i;
975 
976  // b_alg = FLA_Obj_length( T );
977  int b_alg = m_T;
978 
979  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
980  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &e );
981  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
982  dcomplex* buff_e = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
983  int inc_e = 1;
984 
985  // FLA_Set( FLA_ZERO, Y );
986  // FLA_Set( FLA_ZERO, Z );
987  bl1_zsetm( m_A,
988  b_alg,
989  buff_0,
990  buff_Y, rs_Y, cs_Y );
991  bl1_zsetm( m_A,
992  b_alg,
993  buff_0,
994  buff_Z, rs_Z, cs_Z );
995 
996  for ( i = 0; i < b_alg; ++i )
997  {
998  dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
999  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1000  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1001  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1002  dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1003  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1004  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1005 
1006  dcomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
1007  dcomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
1008  dcomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
1009 
1010  dcomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
1011  dcomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
1012  dcomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
1013 
1014  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1015  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1016 
1017  dcomplex* e0 = buff_e + (0 )*inc_e;
1018 
1019  dcomplex* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
1020 
1021  dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
1022  dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
1023 
1024  dcomplex* ABL = a10t;
1025  dcomplex* ZBL = z10t;
1026 
1027  dcomplex* a2 = alpha11;
1028 
1029  int m_ahead = m_A - i - 1;
1030  int n_ahead = m_A - i - 1;
1031  int m_behind = i;
1032  int n_behind = i;
1033 
1034  /*------------------------------------------------------------*/
1035 
1036  if ( m_behind > 0 )
1037  {
1038  // FLA_Copy( a10t_r, last_elem );
1039  // FLA_Set( FLA_ONE, a10t_r );
1040  last_elem = *a10t_r;
1041  *a10t_r = *buff_1;
1042  }
1043 
1044  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
1045  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
1048  m_ahead + 1,
1049  n_behind,
1050  buff_m1,
1051  ABL, rs_A, cs_A,
1052  y10t, cs_Y,
1053  buff_1,
1054  a2, rs_A );
1057  m_ahead + 1,
1058  n_behind,
1059  buff_m1,
1060  ZBL, rs_Z, cs_Z,
1061  a10t, cs_A,
1062  buff_1,
1063  a2, rs_A );
1064 
1065  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
1066  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, A20, z10t, FLA_ONE, a12t );
1069  m_ahead,
1070  n_behind,
1071  buff_m1,
1072  Y20, rs_Y, cs_Y,
1073  a10t, cs_A,
1074  buff_1,
1075  a12t, cs_A );
1078  m_ahead,
1079  n_behind,
1080  buff_m1,
1081  A20, rs_A, cs_A,
1082  z10t, cs_Z,
1083  buff_1,
1084  a12t, cs_A );
1085 
1086  if ( m_behind > 0 )
1087  {
1088  // FLA_Copy( last_elem, a10t_r );
1089  *a10t_r = last_elem;
1090  }
1091 
1092  if ( m_ahead > 0 )
1093  {
1094  // FLA_Househ2_UT( FLA_LEFT,
1095  // a21_t,
1096  // a21_b, tau11 );
1097  FLA_Househ2_UT_l_opz( m_ahead - 1,
1098  a21_t,
1099  a21_b, rs_A,
1100  tau11 );
1101 
1102  // FLA_Set( FLA_ONE, inv_tau11 );
1103  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
1104  // FLA_Copy( inv_tau11, minus_inv_tau11 );
1105  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
1106  bl1_zdiv3( buff_1, tau11, &inv_tau11 );
1107  bl1_zneg2( &inv_tau11, &minus_inv_tau11 );
1108 
1109  // FLA_Copy( a21_t, first_elem );
1110  // FLA_Set( FLA_ONE, a21_t );
1111  first_elem = *a21_t;
1112  *a21_t = *buff_1;
1113 
1114  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y21 );
1115  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z21 );
1116  FLA_Fused_Ahx_Ax_opz_var1( m_ahead,
1117  n_ahead,
1118  A22, rs_A, cs_A,
1119  a21, rs_A,
1120  y21, rs_Y,
1121  z21, rs_Z );
1122 
1123  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d0 );
1124  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, a21, FLA_ZERO, e0 );
1125  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f0 );
1126  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
1127  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, y21 );
1128  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, e0, FLA_ONE, z21 );
1129  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d0, FLA_ONE, z21 );
1130  // FLA_Copy( d0, t01 );
1132  n_behind,
1133  buff_m1,
1134  A20, rs_A, cs_A,
1135  Y20, rs_Y, cs_Y,
1136  Z20, rs_Z, cs_Z,
1137  t01, rs_T,
1138  a21, rs_A,
1139  y21, rs_Y,
1140  z21, rs_Z );
1141 
1142  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
1143  // FLA_Inv_scal( FLA_TWO, beta );
1144  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
1146  m_ahead,
1147  a21, rs_A,
1148  z21, rs_Z,
1149  &beta );
1150  bl1_zinvscals( buff_2, &beta );
1151  bl1_zcopyconj( &beta, &conj_beta );
1152 
1153  // FLA_Scal( minus_inv_tau11, conj_beta );
1154  // FLA_Axpy( conj_beta, a21, y21 );
1155  // FLA_Scal( inv_tau11, y21 );
1156  bl1_zscals( &minus_inv_tau11, &conj_beta );
1158  m_ahead,
1159  &conj_beta,
1160  a21, rs_A,
1161  y21, rs_Y );
1163  m_ahead,
1164  &inv_tau11,
1165  y21, rs_Y );
1166 
1167  // FLA_Scal( minus_inv_tau11, beta );
1168  // FLA_Axpy( beta, a21, z21 );
1169  // FLA_Scal( inv_tau11, z21 );
1170  bl1_zscals( &minus_inv_tau11, &beta );
1172  m_ahead,
1173  &beta,
1174  a21, rs_A,
1175  z21, rs_Z );
1177  m_ahead,
1178  &inv_tau11,
1179  z21, rs_Z );
1180 
1181  // FLA_Dot( a12t, a21, dot_product );
1182  // FLA_Scal( minus_inv_tau11, dot_product );
1183  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
1185  m_ahead,
1186  a12t, cs_A,
1187  a21, rs_A,
1188  &dot_product );
1189  bl1_zscals( &minus_inv_tau11, &dot_product );
1191  m_ahead,
1192  &dot_product,
1193  a21, rs_A,
1194  a12t, cs_A );
1195 
1196  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, e0 );
1197  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, e0, a21, A02 );
1200  m_behind,
1201  n_ahead,
1202  buff_1,
1203  A02, rs_A, cs_A,
1204  a21, rs_A,
1205  buff_0,
1206  e0, inc_e );
1209  m_behind,
1210  n_ahead,
1211  &minus_inv_tau11,
1212  e0, inc_e,
1213  a21, rs_A,
1214  A02, rs_A, cs_A );
1215 
1216  // FLA_Copy( first_elem, a21_t );
1217  *a21_t = first_elem;
1218  }
1219 
1220  /*------------------------------------------------------------*/
1221 
1222  }
1223 
1224  // FLA_Obj_free( &e );
1225  FLA_free( buff_e );
1226 
1227  return FLA_SUCCESS;
1228 }
FLA_Error FLA_Fused_Uhu_Yhu_Zhu_opz_var1(int m_U, int n_U, dcomplex *buff_delta, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_t, int inc_t, dcomplex *buff_u, int inc_u, dcomplex *buff_y, int inc_y, dcomplex *buff_z, int inc_z)
Definition: FLA_Fused_Uhu_Yhu_Zhu_opt_var1.c:500
void bl1_zsetm(int m, int n, dcomplex *sigma, dcomplex *a, int a_rs, int a_cs)
Definition: bl1_setm.c:78

References bl1_zaxpyv(), bl1_zdot(), bl1_zgemv(), bl1_zger(), bl1_zscals(), bl1_zscalv(), bl1_zsetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Fused_Ahx_Ax_opz_var1(), FLA_Fused_Uhu_Yhu_Zhu_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_ofu_var4().

◆ FLA_Hess_UT_step_opc_var1()

FLA_Error FLA_Hess_UT_step_opc_var1 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
287 {
288  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
289  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
290 
291  scomplex first_elem;
292  int i;
293 
294  // b_alg = FLA_Obj_length( T );
295  int b_alg = m_T;
296 
297  for ( i = 0; i < b_alg; ++i )
298  {
299  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
300  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
301 
302  scomplex* a21_t = buff_A + (i )*cs_A + (i+1)*rs_A;
303  scomplex* a21_b = buff_A + (i )*cs_A + (i+2)*rs_A;
304 
305  scomplex* A22_t = buff_A + (i+1)*cs_A + (i+1)*rs_A;
306  scomplex* A22_b = buff_A + (i+1)*cs_A + (i+2)*rs_A;
307 
308  scomplex* A2_l = buff_A + (i+1)*cs_A + (0 )*rs_A;
309  scomplex* A2_r = buff_A + (i+2)*cs_A + (0 )*rs_A;
310 
311  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
312  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
313 
314  int m_ahead = m_A - i - 1;
315  int n_ahead = m_A - i - 1;
316  int n_behind = i;
317 
318  /*------------------------------------------------------------*/
319 
320  if ( m_ahead > 0 )
321  {
322  // FLA_Househ2_UT( FLA_LEFT,
323  // a21_t,
324  // a21_b, tau11 );
325  FLA_Househ2_UT_l_opc( m_ahead - 1,
326  a21_t,
327  a21_b, rs_A,
328  tau11 );
329 
330  // FLA_Copy( a21_t, first_elem );
331  // FLA_Set( FLA_ONE, a21_t );
332  first_elem = *a21_t;
333  *a21_t = *buff_1;
334 
335  // FLA_Apply_H2_UT( FLA_LEFT, tau11, a21_b, A22_t,
336  // A22_b );
337  FLA_Apply_H2_UT_l_opc_var1( m_ahead - 1,
338  n_ahead,
339  tau11,
340  a21_b, rs_A,
341  A22_t, cs_A,
342  A22_b, rs_A, cs_A );
343 
344  // FLA_Apply_H2_UT( FLA_RIGHT, tau11, a21_b, A2_l, A2_r );
346  n_ahead - 1,
347  tau11,
348  a21_b, rs_A,
349  A2_l, rs_A,
350  A2_r, rs_A, cs_A );
351 
352  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
355  m_ahead,
356  n_behind,
357  buff_1,
358  A20, rs_A, cs_A,
359  a21, rs_A,
360  buff_0,
361  t01, rs_T );
362 
363  // FLA_Copy( first_elem, a21_t );
364  *a21_t = first_elem;
365  }
366 
367  /*------------------------------------------------------------*/
368 
369  }
370 
371  return FLA_SUCCESS;
372 }
FLA_Error FLA_Apply_H2_UT_l_opc_var1(int m_u2_A2, int n_a1t, scomplex *tau, scomplex *u2, int inc_u2, scomplex *a1t, int inc_a1t, scomplex *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_l_opt_var1.c:269
FLA_Error FLA_Apply_H2_UT_r_opc_var1(int n_u2h_A2, int m_a1, scomplex *tau, scomplex *u2h, int inc_u2h, scomplex *a1, int inc_a1, scomplex *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_r_opt_var1.c:254

References bl1_cgemv(), BLIS1_CONJ_TRANSPOSE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_opc_var1(), FLA_Apply_H2_UT_r_opc_var1(), FLA_Househ2_UT_l_opc(), FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var1().

◆ FLA_Hess_UT_step_opc_var2()

FLA_Error FLA_Hess_UT_step_opc_var2 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
539 {
540  scomplex* buff_2 = FLA_COMPLEX_PTR( FLA_TWO );
541  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
542  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
543  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
544 
545  scomplex first_elem;
546  scomplex dot_product;
547  scomplex beta, conj_beta;
548  scomplex inv_tau11;
549  scomplex minus_inv_tau11;
550  int i;
551 
552  // b_alg = FLA_Obj_length( T );
553  int b_alg = m_T;
554 
555  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
556  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
557  scomplex* buff_y = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
558  scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
559  int inc_y = 1;
560  int inc_z = 1;
561 
562  for ( i = 0; i < b_alg; ++i )
563  {
564  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
565  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
566  scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
567  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
568  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
569 
570  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
571  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
572 
573  scomplex* y0 = buff_y + (0 )*inc_y;
574  scomplex* y2 = buff_y + (i+1)*inc_y;
575 
576  scomplex* z2 = buff_z + (i+1)*inc_z;
577 
578  scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
579  scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
580 
581  int m_ahead = m_A - i - 1;
582  int n_ahead = m_A - i - 1;
583  int m_behind = i;
584  int n_behind = i;
585 
586  /*------------------------------------------------------------*/
587 
588  if ( m_ahead > 0 )
589  {
590  // FLA_Househ2_UT( FLA_LEFT,
591  // a21_t,
592  // a21_b, tau11 );
593  FLA_Househ2_UT_l_opc( m_ahead - 1,
594  a21_t,
595  a21_b, rs_A,
596  tau11 );
597 
598  // FLA_Set( FLA_ONE, inv_tau11 );
599  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
600  // FLA_Copy( inv_tau11, minus_inv_tau11 );
601  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
602  bl1_cdiv3( buff_1, tau11, &inv_tau11 );
603  bl1_cneg2( &inv_tau11, &minus_inv_tau11 );
604 
605  // FLA_Copy( a21_t, first_elem );
606  // FLA_Set( FLA_ONE, a21_t );
607  first_elem = *a21_t;
608  *a21_t = *buff_1;
609 
610  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y2 );
613  m_ahead,
614  n_ahead,
615  buff_1,
616  A22, rs_A, cs_A,
617  a21, rs_A,
618  buff_0,
619  y2, inc_y );
620 
621  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z2 );
624  m_ahead,
625  n_ahead,
626  buff_1,
627  A22, rs_A, cs_A,
628  a21, rs_A,
629  buff_0,
630  z2, inc_z );
631 
632  // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
633  // FLA_Inv_scal( FLA_TWO, beta );
634  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
636  m_ahead,
637  a21, rs_A,
638  z2, inc_z,
639  &beta );
640  bl1_cinvscals( buff_2, &beta );
641  bl1_ccopyconj( &beta, &conj_beta );
642 
643  // FLA_Scal( minus_inv_tau11, conj_beta );
644  // FLA_Axpy( conj_beta, a21, y2 );
645  // FLA_Scal( inv_tau11, y2 );
646  bl1_cscals( &minus_inv_tau11, &conj_beta );
648  m_ahead,
649  &conj_beta,
650  a21, rs_A,
651  y2, inc_y );
653  m_ahead,
654  &inv_tau11,
655  y2, inc_y );
656 
657  // FLA_Scal( minus_inv_tau11, beta );
658  // FLA_Axpy( beta, a21, z2 );
659  // FLA_Scal( inv_tau11, z2 );
660  bl1_cscals( &minus_inv_tau11, &beta );
662  m_ahead,
663  &beta,
664  a21, rs_A,
665  z2, inc_z );
667  m_ahead,
668  &inv_tau11,
669  z2, inc_z );
670 
671  // FLA_Dot( a12t, a21, dot_product );
672  // FLA_Scal( minus_inv_tau11, dot_product );
673  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
675  m_ahead,
676  a12t, cs_A,
677  a21, rs_A,
678  &dot_product );
679  bl1_cscals( &minus_inv_tau11, &dot_product );
681  m_ahead,
682  &dot_product,
683  a21, rs_A,
684  a12t, cs_A );
685 
686  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
687  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
690  m_behind,
691  n_ahead,
692  buff_1,
693  A02, rs_A, cs_A,
694  a21, rs_A,
695  buff_0,
696  y0, inc_y );
699  m_behind,
700  n_ahead,
701  &minus_inv_tau11,
702  y0, inc_y,
703  a21, rs_A,
704  A02, rs_A, cs_A );
705 
706  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y2, A22 );
707  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, a21, A22 );
710  m_ahead,
711  n_ahead,
712  buff_m1,
713  a21, rs_A,
714  y2, inc_y,
715  A22, rs_A, cs_A );
718  m_ahead,
719  n_ahead,
720  buff_m1,
721  z2, inc_z,
722  a21, rs_A,
723  A22, rs_A, cs_A );
724 
725  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
728  m_ahead,
729  n_behind,
730  buff_1,
731  A20, rs_A, cs_A,
732  a21, rs_A,
733  buff_0,
734  t01, rs_T );
735 
736  // FLA_Copy( first_elem, a21_t );
737  *a21_t = first_elem;
738  }
739 
740  /*------------------------------------------------------------*/
741 
742  }
743 
744  // FLA_Obj_free( &y );
745  // FLA_Obj_free( &z );
746  FLA_free( buff_y );
747  FLA_free( buff_z );
748 
749  return FLA_SUCCESS;
750 }

References bl1_caxpyv(), bl1_cdot(), bl1_cgemv(), bl1_cger(), bl1_cscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var2().

◆ FLA_Hess_UT_step_opc_var3()

FLA_Error FLA_Hess_UT_step_opc_var3 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
807 {
808  scomplex* buff_2 = FLA_COMPLEX_PTR( FLA_TWO );
809  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
810  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
811  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
812 
813  scomplex first_elem;
814  scomplex dot_product;
815  scomplex beta, conj_beta;
816  scomplex inv_tau11;
817  scomplex minus_inv_tau11;
818  scomplex minus_upsilon1, minus_conj_upsilon1;
819  scomplex minus_psi1, minus_conj_psi1;
820  scomplex minus_zeta1;
821  int i;
822 
823  // b_alg = FLA_Obj_length( T );
824  int b_alg = m_T;
825 
826  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
827  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
828  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
829  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &v );
830  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
831  scomplex* buff_u = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
832  scomplex* buff_y = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
833  scomplex* buff_z = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
834  scomplex* buff_v = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
835  scomplex* buff_w = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
836  int inc_u = 1;
837  int inc_y = 1;
838  int inc_z = 1;
839  int inc_v = 1;
840  int inc_w = 1;
841 
842  // Initialize some variables (only to prevent compiler warnings).
843  first_elem = *buff_0;
844  minus_inv_tau11 = *buff_0;
845 
846  for ( i = 0; i < b_alg; ++i )
847  {
848  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
849  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
850  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
851  scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
852  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
853  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
854 
855  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
856  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
857 
858  scomplex* upsilon1 = buff_u + (i )*inc_u;
859  scomplex* u2 = buff_u + (i+1)*inc_u;
860 
861  scomplex* y0 = buff_y + (0 )*inc_y;
862  scomplex* psi1 = buff_y + (i )*inc_y;
863  scomplex* y2 = buff_y + (i+1)*inc_y;
864 
865  scomplex* zeta1 = buff_z + (i )*inc_z;
866  scomplex* z2 = buff_z + (i+1)*inc_z;
867 
868  scomplex* v2 = buff_v + (i+1)*inc_v;
869 
870  scomplex* w2 = buff_w + (i+1)*inc_w;
871 
872  scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
873  scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
874 
875  int m_ahead = m_A - i - 1;
876  int n_ahead = m_A - i - 1;
877  int m_behind = i;
878  int n_behind = i;
879 
880  /*------------------------------------------------------------*/
881 
882  if ( m_behind > 0 )
883  {
884  // FLA_Copy( upsilon1, minus_upsilon1 );
885  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon1 );
886  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, minus_conj_upsilon1 );
887  bl1_cmult3( buff_m1, upsilon1, &minus_upsilon1 );
888  bl1_ccopyconj( &minus_upsilon1, &minus_conj_upsilon1 );
889 
890  // FLA_Copy( psi1, minus_psi1 );
891  // FLA_Scal( FLA_MINUS_ONE, minus_psi1 );
892  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_psi1, minus_conj_psi1 );
893  bl1_cmult3( buff_m1, psi1, &minus_psi1 );
894  bl1_ccopyconj( &minus_psi1, &minus_conj_psi1 );
895 
896  // FLA_Copy( zeta1, minus_zeta1 );
897  // FLA_Scal( FLA_MINUS_ONE, minus_zeta1 );
898  bl1_cmult3( buff_m1, zeta1, &minus_zeta1 );
899 
900  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, psi1, alpha11 );
901  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta1, upsilon1, alpha11 );
903  1,
904  &minus_upsilon1,
905  psi1, 1,
906  alpha11, 1 );
908  1,
909  &minus_zeta1,
910  upsilon1, 1,
911  alpha11, 1 );
912 
913  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon1, y2, a12t );
914  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta1, u2, a12t );
916  m_ahead,
917  &minus_upsilon1,
918  y2, inc_y,
919  a12t, cs_A );
921  m_ahead,
922  &minus_zeta1,
923  u2, inc_u,
924  a12t, cs_A );
925 
926  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi1, u2, a21 );
927  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon1, z2, a21 );
929  m_ahead,
930  &minus_conj_psi1,
931  u2, inc_u,
932  a21, rs_A );
934  m_ahead,
935  &minus_conj_upsilon1,
936  z2, inc_z,
937  a21, rs_A );
938  }
939 
940  if ( m_ahead > 0 )
941  {
942  // FLA_Househ2_UT( FLA_LEFT,
943  // a21_t,
944  // a21_b, tau11 );
945  FLA_Househ2_UT_l_opc( m_ahead - 1,
946  a21_t,
947  a21_b, rs_A,
948  tau11 );
949 
950  // FLA_Set( FLA_ONE, inv_tau11 );
951  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
952  // FLA_Copy( inv_tau11, minus_inv_tau11 );
953  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
954  bl1_cdiv3( buff_1, tau11, &inv_tau11 );
955  bl1_cneg2( &inv_tau11, &minus_inv_tau11 );
956 
957  // FLA_Copy( a21_t, first_elem );
958  // FLA_Set( FLA_ONE, a21_t );
959  first_elem = *a21_t;
960  *a21_t = *buff_1;
961  }
962 
963  if ( m_behind > 0 )
964  {
965  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
966  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
969  m_ahead,
970  n_ahead,
971  buff_m1,
972  u2, inc_u,
973  y2, inc_y,
974  A22, rs_A, cs_A );
977  m_ahead,
978  n_ahead,
979  buff_m1,
980  z2, inc_z,
981  u2, inc_u,
982  A22, rs_A, cs_A );
983  }
984 
985  if ( m_ahead > 0 )
986  {
987  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
990  m_ahead,
991  n_ahead,
992  buff_1,
993  A22, rs_A, cs_A,
994  a21, rs_A,
995  buff_0,
996  v2, inc_v );
997 
998  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
1001  m_ahead,
1002  n_ahead,
1003  buff_1,
1004  A22, rs_A, cs_A,
1005  a21, rs_A,
1006  buff_0,
1007  w2, inc_w );
1008 
1009  // FLA_Copy( a21, u2 );
1010  // FLA_Copy( v2, y2 );
1011  // FLA_Copy( w2, z2 );
1013  m_ahead,
1014  a21, rs_A,
1015  u2, inc_u );
1017  m_ahead,
1018  v2, inc_v,
1019  y2, inc_y );
1021  m_ahead,
1022  w2, inc_w,
1023  z2, inc_z );
1024 
1025  // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
1026  // FLA_Inv_scal( FLA_TWO, beta );
1027  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
1029  m_ahead,
1030  a21, rs_A,
1031  z2, inc_z,
1032  &beta );
1033  bl1_cinvscals( buff_2, &beta );
1034  bl1_ccopyconj( &beta, &conj_beta );
1035 
1036  // FLA_Scal( minus_inv_tau11, conj_beta );
1037  // FLA_Axpy( conj_beta, a21, y2 );
1038  // FLA_Scal( inv_tau11, y2 );
1039  bl1_cscals( &minus_inv_tau11, &conj_beta );
1041  m_ahead,
1042  &conj_beta,
1043  a21, rs_A,
1044  y2, inc_y );
1046  m_ahead,
1047  &inv_tau11,
1048  y2, inc_y );
1049 
1050  // FLA_Scal( minus_inv_tau11, beta );
1051  // FLA_Axpy( beta, a21, z2 );
1052  // FLA_Scal( inv_tau11, z2 );
1053  bl1_cscals( &minus_inv_tau11, &beta );
1055  m_ahead,
1056  &beta,
1057  a21, rs_A,
1058  z2, inc_z );
1060  m_ahead,
1061  &inv_tau11,
1062  z2, inc_z );
1063 
1064  // FLA_Dot( a12t, a21, dot_product );
1065  // FLA_Scal( minus_inv_tau11, dot_product );
1066  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
1068  m_ahead,
1069  a12t, cs_A,
1070  a21, rs_A,
1071  &dot_product );
1072  bl1_cscals( &minus_inv_tau11, &dot_product );
1074  m_ahead,
1075  &dot_product,
1076  a21, rs_A,
1077  a12t, cs_A );
1078 
1079  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
1080  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
1083  m_behind,
1084  n_ahead,
1085  buff_1,
1086  A02, rs_A, cs_A,
1087  a21, rs_A,
1088  buff_0,
1089  y0, inc_y );
1092  m_behind,
1093  n_ahead,
1094  &minus_inv_tau11,
1095  y0, inc_y,
1096  a21, rs_A,
1097  A02, rs_A, cs_A );
1098 
1099  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
1102  m_ahead,
1103  n_behind,
1104  buff_1,
1105  A20, rs_A, cs_A,
1106  a21, rs_A,
1107  buff_0,
1108  t01, rs_T );
1109 
1110  // FLA_Copy( first_elem, a21_t );
1111  *a21_t = first_elem;
1112  }
1113 
1114  if ( m_behind + 1 == b_alg && m_ahead > 0 )
1115  {
1116  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
1117  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
1120  m_ahead,
1121  n_ahead,
1122  buff_m1,
1123  u2, inc_u,
1124  y2, inc_y,
1125  A22, rs_A, cs_A );
1128  m_ahead,
1129  n_ahead,
1130  buff_m1,
1131  z2, inc_z,
1132  u2, inc_u,
1133  A22, rs_A, cs_A );
1134  }
1135 
1136  /*------------------------------------------------------------*/
1137 
1138  }
1139 
1140  // FLA_Obj_free( &u );
1141  // FLA_Obj_free( &y );
1142  // FLA_Obj_free( &z );
1143  // FLA_Obj_free( &v );
1144  // FLA_Obj_free( &w );
1145  FLA_free( buff_u );
1146  FLA_free( buff_y );
1147  FLA_free( buff_z );
1148  FLA_free( buff_v );
1149  FLA_free( buff_w );
1150 
1151  return FLA_SUCCESS;
1152 }

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cger(), bl1_cscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, i, psi1, upsilon1, and zeta1.

Referenced by FLA_Hess_UT_step_opt_var3().

◆ FLA_Hess_UT_step_opc_var4()

FLA_Error FLA_Hess_UT_step_opc_var4 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_Y,
int  rs_Y,
int  cs_Y,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
858 {
859  scomplex* buff_2 = FLA_COMPLEX_PTR( FLA_TWO );
860  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
861  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
862  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
863 
864  scomplex first_elem, last_elem;
865  scomplex dot_product;
866  scomplex beta, conj_beta;
867  scomplex inv_tau11;
868  scomplex minus_inv_tau11;
869  int i;
870 
871  // b_alg = FLA_Obj_length( T );
872  int b_alg = m_T;
873 
874  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
875  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &e );
876  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
877  scomplex* buff_d = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
878  scomplex* buff_e = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
879  scomplex* buff_f = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
880  int inc_d = 1;
881  int inc_e = 1;
882  int inc_f = 1;
883 
884  // FLA_Set( FLA_ZERO, Y );
885  // FLA_Set( FLA_ZERO, Z );
886  bl1_csetm( m_A,
887  b_alg,
888  buff_0,
889  buff_Y, rs_Y, cs_Y );
890  bl1_csetm( m_A,
891  b_alg,
892  buff_0,
893  buff_Z, rs_Z, cs_Z );
894 
895  for ( i = 0; i < b_alg; ++i )
896  {
897  scomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
898  scomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
899  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
900  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
901  scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
902  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
903  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
904 
905  scomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
906  scomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
907  scomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
908 
909  scomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
910  scomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
911  scomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
912 
913  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
914  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
915 
916  scomplex* d0 = buff_d + (0 )*inc_d;
917 
918  scomplex* e0 = buff_e + (0 )*inc_e;
919 
920  scomplex* f0 = buff_f + (0 )*inc_f;
921 
922  scomplex* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
923 
924  scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
925  scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
926 
927  scomplex* ABL = a10t;
928  scomplex* ZBL = z10t;
929 
930  scomplex* a2 = alpha11;
931 
932  int m_ahead = m_A - i - 1;
933  int n_ahead = m_A - i - 1;
934  int m_behind = i;
935  int n_behind = i;
936 
937  /*------------------------------------------------------------*/
938 
939  if ( m_behind > 0 )
940  {
941  // FLA_Copy( a10t_r, last_elem );
942  // FLA_Set( FLA_ONE, a10t_r );
943  last_elem = *a10t_r;
944  *a10t_r = *buff_1;
945  }
946 
947  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
948  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
951  m_ahead + 1,
952  n_behind,
953  buff_m1,
954  ABL, rs_A, cs_A,
955  y10t, cs_Y,
956  buff_1,
957  a2, rs_A );
960  m_ahead + 1,
961  n_behind,
962  buff_m1,
963  ZBL, rs_Z, cs_Z,
964  a10t, cs_A,
965  buff_1,
966  a2, rs_A );
967 
968  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
969  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, A20, z10t, FLA_ONE, a12t );
972  m_ahead,
973  n_behind,
974  buff_m1,
975  Y20, rs_Y, cs_Y,
976  a10t, cs_A,
977  buff_1,
978  a12t, cs_A );
981  m_ahead,
982  n_behind,
983  buff_m1,
984  A20, rs_A, cs_A,
985  z10t, cs_Z,
986  buff_1,
987  a12t, cs_A );
988 
989  if ( m_behind > 0 )
990  {
991  // FLA_Copy( last_elem, a10t_r );
992  *a10t_r = last_elem;
993  }
994 
995  if ( m_ahead > 0 )
996  {
997  // FLA_Househ2_UT( FLA_LEFT,
998  // a21_t,
999  // a21_b, tau11 );
1000  FLA_Househ2_UT_l_opc( m_ahead - 1,
1001  a21_t,
1002  a21_b, rs_A,
1003  tau11 );
1004 
1005  // FLA_Set( FLA_ONE, inv_tau11 );
1006  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
1007  // FLA_Copy( inv_tau11, minus_inv_tau11 );
1008  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
1009  bl1_cdiv3( buff_1, tau11, &inv_tau11 );
1010  bl1_cneg2( &inv_tau11, &minus_inv_tau11 );
1011 
1012  // FLA_Copy( a21_t, first_elem );
1013  // FLA_Set( FLA_ONE, a21_t );
1014  first_elem = *a21_t;
1015  *a21_t = *buff_1;
1016 
1017  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y21 );
1020  m_ahead,
1021  n_ahead,
1022  buff_1,
1023  A22, rs_A, cs_A,
1024  a21, rs_A,
1025  buff_0,
1026  y21, rs_Y );
1027 
1028  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z21 );
1031  m_ahead,
1032  n_ahead,
1033  buff_1,
1034  A22, rs_A, cs_A,
1035  a21, rs_A,
1036  buff_0,
1037  z21, rs_Z );
1038 
1039  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d0 );
1040  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, a21, FLA_ZERO, e0 );
1041  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f0 );
1044  m_ahead,
1045  n_behind,
1046  buff_1,
1047  A20, rs_A, cs_A,
1048  a21, rs_A,
1049  buff_0,
1050  d0, inc_d );
1053  m_ahead,
1054  n_behind,
1055  buff_1,
1056  Y20, rs_Y, cs_Y,
1057  a21, rs_A,
1058  buff_0,
1059  e0, inc_e );
1062  m_ahead,
1063  n_behind,
1064  buff_1,
1065  Z20, rs_Z, cs_Z,
1066  a21, rs_A,
1067  buff_0,
1068  f0, inc_f );
1069 
1070  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
1071  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, y21 );
1074  m_ahead,
1075  n_behind,
1076  buff_m1,
1077  Y20, rs_Y, cs_Y,
1078  d0, inc_d,
1079  buff_1,
1080  y21, rs_Y );
1083  m_ahead,
1084  n_behind,
1085  buff_m1,
1086  A20, rs_A, cs_A,
1087  f0, inc_f,
1088  buff_1,
1089  y21, rs_Y );
1090 
1091  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, e0, FLA_ONE, z21 );
1092  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d0, FLA_ONE, z21 );
1095  m_ahead,
1096  n_behind,
1097  buff_m1,
1098  A20, rs_A, cs_A,
1099  e0, inc_e,
1100  buff_1,
1101  z21, rs_Z );
1104  m_ahead,
1105  n_behind,
1106  buff_m1,
1107  Z20, rs_Z, cs_Z,
1108  d0, inc_d,
1109  buff_1,
1110  z21, rs_Z );
1111 
1112  // FLA_Copy( d0, t01 );
1114  n_behind,
1115  d0, inc_d,
1116  t01, rs_T );
1117 
1118  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
1119  // FLA_Inv_scal( FLA_TWO, beta );
1120  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
1122  m_ahead,
1123  a21, rs_A,
1124  z21, rs_Z,
1125  &beta );
1126  bl1_cinvscals( buff_2, &beta );
1127  bl1_ccopyconj( &beta, &conj_beta );
1128 
1129  // FLA_Scal( minus_inv_tau11, conj_beta );
1130  // FLA_Axpy( conj_beta, a21, y21 );
1131  // FLA_Scal( inv_tau11, y21 );
1132  bl1_cscals( &minus_inv_tau11, &conj_beta );
1134  m_ahead,
1135  &conj_beta,
1136  a21, rs_A,
1137  y21, rs_Y );
1139  m_ahead,
1140  &inv_tau11,
1141  y21, rs_Y );
1142 
1143  // FLA_Scal( minus_inv_tau11, beta );
1144  // FLA_Axpy( beta, a21, z21 );
1145  // FLA_Scal( inv_tau11, z21 );
1146  bl1_cscals( &minus_inv_tau11, &beta );
1148  m_ahead,
1149  &beta,
1150  a21, rs_A,
1151  z21, rs_Z );
1153  m_ahead,
1154  &inv_tau11,
1155  z21, rs_Z );
1156 
1157  // FLA_Dot( a12t, a21, dot_product );
1158  // FLA_Scal( minus_inv_tau11, dot_product );
1159  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
1161  m_ahead,
1162  a12t, cs_A,
1163  a21, rs_A,
1164  &dot_product );
1165  bl1_cscals( &minus_inv_tau11, &dot_product );
1167  m_ahead,
1168  &dot_product,
1169  a21, rs_A,
1170  a12t, cs_A );
1171 
1172  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, e0 );
1173  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, e0, a21, A02 );
1176  m_behind,
1177  n_ahead,
1178  buff_1,
1179  A02, rs_A, cs_A,
1180  a21, rs_A,
1181  buff_0,
1182  e0, inc_e );
1185  m_behind,
1186  n_ahead,
1187  &minus_inv_tau11,
1188  e0, inc_e,
1189  a21, rs_A,
1190  A02, rs_A, cs_A );
1191 
1192  // FLA_Copy( first_elem, a21_t );
1193  *a21_t = first_elem;
1194  }
1195 
1196  /*------------------------------------------------------------*/
1197 
1198  }
1199 
1200  // FLA_Obj_free( &d );
1201  // FLA_Obj_free( &e );
1202  // FLA_Obj_free( &f );
1203  FLA_free( buff_d );
1204  FLA_free( buff_e );
1205  FLA_free( buff_f );
1206 
1207  return FLA_SUCCESS;
1208 }

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), bl1_cgemv(), bl1_cger(), bl1_cscalv(), bl1_csetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var4().

◆ FLA_Hess_UT_step_opc_var5()

FLA_Error FLA_Hess_UT_step_opc_var5 ( int  m_A,
int  m_T,
scomplex buff_A,
int  rs_A,
int  cs_A,
scomplex buff_U,
int  rs_U,
int  cs_U,
scomplex buff_Z,
int  rs_Z,
int  cs_Z,
scomplex buff_T,
int  rs_T,
int  cs_T 
)
644 {
645  scomplex* buff_1 = FLA_COMPLEX_PTR( FLA_ONE );
646  scomplex* buff_0 = FLA_COMPLEX_PTR( FLA_ZERO );
647  scomplex* buff_m1 = FLA_COMPLEX_PTR( FLA_MINUS_ONE );
648  int i;
649 
650  // b_alg = FLA_Obj_length( T );
651  int b_alg = m_T;
652 
653  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
654  scomplex* buff_w = ( scomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
655  int inc_w = 1;
656 
657  // FLA_Set( FLA_ZERO, U );
658  // FLA_Set( FLA_ZERO, Z );
659  bl1_csetm( m_A,
660  b_alg,
661  buff_0,
662  buff_U, rs_U, cs_U );
663  bl1_csetm( m_A,
664  b_alg,
665  buff_0,
666  buff_Z, rs_Z, cs_Z );
667 
668  for ( i = 0; i < b_alg; ++i )
669  {
670  scomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
671  scomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
672  scomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
673  scomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
674  scomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
675  scomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
676 
677  scomplex* U00 = buff_U + (0 )*cs_U + (0 )*rs_U;
678  scomplex* u10t = buff_U + (0 )*cs_U + (i )*rs_U;
679  scomplex* U20 = buff_U + (0 )*cs_U + (i+1)*rs_U;
680  scomplex* u21 = buff_U + (i )*cs_U + (i+1)*rs_U;
681 
682  scomplex* Z00 = buff_Z + (0 )*cs_Z + (0 )*rs_Z;
683  scomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
684  scomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
685  scomplex* z01 = buff_Z + (i )*cs_Z + (0 )*rs_Z;
686  scomplex* zeta11 = buff_Z + (i )*cs_Z + (i )*rs_Z;
687  scomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
688 
689  scomplex* T00 = buff_T + (0 )*cs_T + (0 )*rs_T;
690  scomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
691  scomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
692 
693  scomplex* w0 = buff_w + (0 )*inc_w;
694 
695  scomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
696  scomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
697 
698  scomplex* u21_t = u21 + (0 )*cs_U + (0 )*rs_U;
699 
700  int m_ahead = m_A - i - 1;
701  int n_ahead = m_A - i - 1;
702  int m_behind = i;
703  int n_behind = i;
704 
705  /*------------------------------------------------------------*/
706 
707  if ( m_behind > 0 )
708  {
709  // FLA_Copyt( FLA_CONJ_TRANSPOSE, u10t, w0 );
710  // FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
711  // T00, w0 );
713  m_behind,
714  u10t, cs_U,
715  w0, inc_w );
719  m_behind,
720  T00, rs_T, cs_T,
721  w0, inc_w );
722 
723  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z00, w0, FLA_ONE, a01 );
724  // FLA_Dots( FLA_MINUS_ONE, z10t, w0, FLA_ONE, alpha11 );
725  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, w0, FLA_ONE, a21 );
728  m_behind,
729  n_behind,
730  buff_m1,
731  Z00, rs_Z, cs_Z,
732  w0, inc_w,
733  buff_1,
734  a01, rs_A );
736  m_behind,
737  buff_m1,
738  z10t, cs_Z,
739  w0, inc_w,
740  buff_1,
741  alpha11 );
744  m_ahead,
745  n_behind,
746  buff_m1,
747  Z20, rs_Z, cs_Z,
748  w0, inc_w,
749  buff_1,
750  a21, rs_A );
751 
752  // FLA_Trmvsx( FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
753  // FLA_ONE, U00, a01, FLA_ZERO, w0 );
754  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, alpha11, u10t, w0 );
755  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, U20, a21, FLA_ONE, w0 );
757  m_behind,
758  a01, rs_A,
759  w0, inc_w );
763  m_behind,
764  U00, rs_U, cs_U,
765  w0, inc_w );
767  m_behind,
768  alpha11,
769  u10t, cs_U,
770  w0, inc_w );
773  m_ahead,
774  n_behind,
775  buff_1,
776  U20, rs_U, cs_U,
777  a21, rs_A,
778  buff_1,
779  w0, inc_w );
780 
781  // FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
782  // T00, w0 );
786  m_behind,
787  T00, rs_T, cs_T,
788  w0, inc_w );
789 
790  // FLA_Trmvsx( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
791  // FLA_MINUS_ONE, U00, w0, FLA_ONE, a01 );
792  // FLA_Dots( FLA_MINUS_ONE, u10t, w0, FLA_ONE, alpha11 );
793  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, U20, w0, FLA_ONE, a21 );
797  m_behind,
798  buff_m1,
799  U00, rs_U, cs_U,
800  w0, inc_w,
801  buff_1,
802  a01, rs_A );
804  m_behind,
805  buff_m1,
806  u10t, cs_U,
807  w0, inc_w,
808  buff_1,
809  alpha11 );
812  m_ahead,
813  n_behind,
814  buff_m1,
815  U20, rs_U, cs_U,
816  w0, inc_w,
817  buff_1,
818  a21, rs_A );
819  }
820 
821  if ( m_ahead > 0 )
822  {
823  // FLA_Househ2_UT( FLA_LEFT,
824  // a21_t,
825  // a21_b, tau11 );
826  FLA_Househ2_UT_l_opc( m_ahead - 1,
827  a21_t,
828  a21_b, rs_A,
829  tau11 );
830 
831  // FLA_Copy( a21, u21 );
833  m_ahead,
834  a21, rs_A,
835  u21, rs_U );
836 
837  // FLA_Set( FLA_ONE, u21_t );
838  *u21_t = *buff_1;
839 
840  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, u21, FLA_ZERO, z01 );
841  // FLA_Dot( a12t, u21, zeta11 );
842  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, u21, FLA_ZERO, z21 );
845  m_behind,
846  n_ahead,
847  buff_1,
848  A02, rs_A, cs_A,
849  u21, rs_U,
850  buff_0,
851  z01, rs_Z );
853  m_ahead,
854  a12t, cs_A,
855  u21, rs_U,
856  zeta11 );
859  m_ahead,
860  n_ahead,
861  buff_1,
862  A22, rs_A, cs_A,
863  u21, rs_U,
864  buff_0,
865  z21, rs_Z );
866 
867  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, U20, u21, FLA_ZERO, t01 );
870  m_ahead,
871  n_behind,
872  buff_1,
873  U20, rs_U, cs_U,
874  u21, rs_U,
875  buff_0,
876  t01, rs_T );
877  }
878 
879  /*------------------------------------------------------------*/
880 
881  }
882 
883  // FLA_Obj_free( &w );
884  FLA_free( buff_w );
885 
886  return FLA_SUCCESS;
887 }
void bl1_cdots(conj1_t conj, int n, scomplex *alpha, scomplex *x, int incx, scomplex *y, int incy, scomplex *beta, scomplex *rho)
Definition: bl1_dots.c:39
void bl1_ctrmv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx)
Definition: bl1_trmv.c:99
void bl1_ctrmvsx(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, scomplex *alpha, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx, scomplex *beta, scomplex *y, int incy)
Definition: bl1_trmvsx.c:129
void bl1_ctrsv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, scomplex *a, int a_rs, int a_cs, scomplex *x, int incx)
Definition: bl1_trsv.c:99
@ BLIS1_LOWER_TRIANGULAR
Definition: blis_type_defs.h:62
@ BLIS1_UPPER_TRIANGULAR
Definition: blis_type_defs.h:63
@ BLIS1_NONUNIT_DIAG
Definition: blis_type_defs.h:74

References bl1_caxpyv(), bl1_ccopyv(), bl1_cdot(), bl1_cdots(), bl1_cgemv(), bl1_csetm(), bl1_ctrmv(), bl1_ctrmvsx(), bl1_ctrsv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_NONUNIT_DIAG, BLIS1_UPPER_TRIANGULAR, FLA_free(), FLA_Househ2_UT_l_opc(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var5().

◆ FLA_Hess_UT_step_opd_var1()

FLA_Error FLA_Hess_UT_step_opd_var1 ( int  m_A,
int  m_T,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_T,
int  rs_T,
int  cs_T 
)
194 {
195  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
196  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
197 
198  double first_elem;
199  int i;
200 
201  // b_alg = FLA_Obj_length( T );
202  int b_alg = m_T;
203 
204  for ( i = 0; i < b_alg; ++i )
205  {
206  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
207  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
208 
209  double* a21_t = buff_A + (i )*cs_A + (i+1)*rs_A;
210  double* a21_b = buff_A + (i )*cs_A + (i+2)*rs_A;
211 
212  double* A22_t = buff_A + (i+1)*cs_A + (i+1)*rs_A;
213  double* A22_b = buff_A + (i+1)*cs_A + (i+2)*rs_A;
214 
215  double* A2_l = buff_A + (i+1)*cs_A + (0 )*rs_A;
216  double* A2_r = buff_A + (i+2)*cs_A + (0 )*rs_A;
217 
218  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
219  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
220 
221  int m_ahead = m_A - i - 1;
222  int n_ahead = m_A - i - 1;
223  int n_behind = i;
224 
225  /*------------------------------------------------------------*/
226 
227  if ( m_ahead > 0 )
228  {
229  // FLA_Househ2_UT( FLA_LEFT,
230  // a21_t,
231  // a21_b, tau11 );
232  FLA_Househ2_UT_l_opd( m_ahead - 1,
233  a21_t,
234  a21_b, rs_A,
235  tau11 );
236 
237  // FLA_Copy( a21_t, first_elem );
238  // FLA_Set( FLA_ONE, a21_t );
239  first_elem = *a21_t;
240  *a21_t = *buff_1;
241 
242  // FLA_Apply_H2_UT( FLA_LEFT, tau11, a21_b, A22_t,
243  // A22_b );
244  FLA_Apply_H2_UT_l_opd_var1( m_ahead - 1,
245  n_ahead,
246  tau11,
247  a21_b, rs_A,
248  A22_t, cs_A,
249  A22_b, rs_A, cs_A );
250 
251  // FLA_Apply_H2_UT( FLA_RIGHT, tau11, a21_b, A2_l, A2_r );
253  n_ahead - 1,
254  tau11,
255  a21_b, rs_A,
256  A2_l, rs_A,
257  A2_r, rs_A, cs_A );
258 
259  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
262  m_ahead,
263  n_behind,
264  buff_1,
265  A20, rs_A, cs_A,
266  a21, rs_A,
267  buff_0,
268  t01, rs_T );
269 
270  // FLA_Copy( first_elem, a21_t );
271  *a21_t = first_elem;
272  }
273 
274  /*------------------------------------------------------------*/
275 
276  }
277 
278  return FLA_SUCCESS;
279 }
FLA_Error FLA_Apply_H2_UT_l_opd_var1(int m_u2_A2, int n_a1t, double *tau, double *u2, int inc_u2, double *a1t, int inc_a1t, double *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_l_opt_var1.c:195
FLA_Error FLA_Apply_H2_UT_r_opd_var1(int n_u2h_A2, int m_a1, double *tau, double *u2h, int inc_u2h, double *a1, int inc_a1, double *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_r_opt_var1.c:181

References bl1_dgemv(), BLIS1_CONJ_TRANSPOSE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_opd_var1(), FLA_Apply_H2_UT_r_opd_var1(), FLA_Househ2_UT_l_opd(), FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var1().

◆ FLA_Hess_UT_step_opd_var2()

FLA_Error FLA_Hess_UT_step_opd_var2 ( int  m_A,
int  m_T,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_T,
int  rs_T,
int  cs_T 
)
320 {
321  double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
322  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
323  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
324  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
325 
326  double first_elem;
327  double dot_product;
328  double beta, conj_beta;
329  double inv_tau11;
330  double minus_inv_tau11;
331  int i;
332 
333  // b_alg = FLA_Obj_length( T );
334  int b_alg = m_T;
335 
336  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
337  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
338  double* buff_y = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
339  double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
340  int inc_y = 1;
341  int inc_z = 1;
342 
343  for ( i = 0; i < b_alg; ++i )
344  {
345  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
346  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
347  double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
348  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
349  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
350 
351  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
352  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
353 
354  double* y0 = buff_y + (0 )*inc_y;
355  double* y2 = buff_y + (i+1)*inc_y;
356 
357  double* z2 = buff_z + (i+1)*inc_z;
358 
359  double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
360  double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
361 
362  int m_ahead = m_A - i - 1;
363  int n_ahead = m_A - i - 1;
364  int m_behind = i;
365  int n_behind = i;
366 
367  /*------------------------------------------------------------*/
368 
369  if ( m_ahead > 0 )
370  {
371  // FLA_Househ2_UT( FLA_LEFT,
372  // a21_t,
373  // a21_b, tau11 );
374  FLA_Househ2_UT_l_opd( m_ahead - 1,
375  a21_t,
376  a21_b, rs_A,
377  tau11 );
378 
379  // FLA_Set( FLA_ONE, inv_tau11 );
380  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
381  // FLA_Copy( inv_tau11, minus_inv_tau11 );
382  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
383  bl1_ddiv3( buff_1, tau11, &inv_tau11 );
384  bl1_dneg2( &inv_tau11, &minus_inv_tau11 );
385 
386  // FLA_Copy( a21_t, first_elem );
387  // FLA_Set( FLA_ONE, a21_t );
388  first_elem = *a21_t;
389  *a21_t = *buff_1;
390 
391  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y2 );
394  m_ahead,
395  n_ahead,
396  buff_1,
397  A22, rs_A, cs_A,
398  a21, rs_A,
399  buff_0,
400  y2, inc_y );
401 
402  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z2 );
405  m_ahead,
406  n_ahead,
407  buff_1,
408  A22, rs_A, cs_A,
409  a21, rs_A,
410  buff_0,
411  z2, inc_z );
412 
413  // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
414  // FLA_Inv_scal( FLA_TWO, beta );
415  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
417  m_ahead,
418  a21, rs_A,
419  z2, inc_z,
420  &beta );
421  bl1_dinvscals( buff_2, &beta );
422  bl1_dcopyconj( &beta, &conj_beta );
423 
424  // FLA_Scal( minus_inv_tau11, conj_beta );
425  // FLA_Axpy( conj_beta, a21, y2 );
426  // FLA_Scal( inv_tau11, y2 );
427  bl1_dscals( &minus_inv_tau11, &conj_beta );
429  m_ahead,
430  &conj_beta,
431  a21, rs_A,
432  y2, inc_y );
434  m_ahead,
435  &inv_tau11,
436  y2, inc_y );
437 
438  // FLA_Scal( minus_inv_tau11, beta );
439  // FLA_Axpy( beta, a21, z2 );
440  // FLA_Scal( inv_tau11, z2 );
441  bl1_dscals( &minus_inv_tau11, &beta );
443  m_ahead,
444  &beta,
445  a21, rs_A,
446  z2, inc_z );
448  m_ahead,
449  &inv_tau11,
450  z2, inc_z );
451 
452  // FLA_Dot( a12t, a21, dot_product );
453  // FLA_Scal( minus_inv_tau11, dot_product );
454  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
456  m_ahead,
457  a12t, cs_A,
458  a21, rs_A,
459  &dot_product );
460  bl1_dscals( &minus_inv_tau11, &dot_product );
462  m_ahead,
463  &dot_product,
464  a21, rs_A,
465  a12t, cs_A );
466 
467  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
468  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
471  m_behind,
472  n_ahead,
473  buff_1,
474  A02, rs_A, cs_A,
475  a21, rs_A,
476  buff_0,
477  y0, inc_y );
480  m_behind,
481  n_ahead,
482  &minus_inv_tau11,
483  y0, inc_y,
484  a21, rs_A,
485  A02, rs_A, cs_A );
486 
487  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y2, A22 );
488  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, a21, A22 );
491  m_ahead,
492  n_ahead,
493  buff_m1,
494  a21, rs_A,
495  y2, inc_y,
496  A22, rs_A, cs_A );
499  m_ahead,
500  n_ahead,
501  buff_m1,
502  z2, inc_z,
503  a21, rs_A,
504  A22, rs_A, cs_A );
505 
506  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
509  m_ahead,
510  n_behind,
511  buff_1,
512  A20, rs_A, cs_A,
513  a21, rs_A,
514  buff_0,
515  t01, rs_T );
516 
517  // FLA_Copy( first_elem, a21_t );
518  *a21_t = first_elem;
519  }
520 
521  /*------------------------------------------------------------*/
522 
523  }
524 
525  // FLA_Obj_free( &y );
526  // FLA_Obj_free( &z );
527  FLA_free( buff_y );
528  FLA_free( buff_z );
529 
530  return FLA_SUCCESS;
531 }

References bl1_daxpyv(), bl1_ddot(), bl1_dgemv(), bl1_dger(), bl1_dscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var2().

◆ FLA_Hess_UT_step_opd_var3()

FLA_Error FLA_Hess_UT_step_opd_var3 ( int  m_A,
int  m_T,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_T,
int  rs_T,
int  cs_T 
)
454 {
455  double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
456  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
457  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
458  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
459 
460  double first_elem;
461  double dot_product;
462  double beta, conj_beta;
463  double inv_tau11;
464  double minus_inv_tau11;
465  double minus_upsilon1, minus_conj_upsilon1;
466  double minus_psi1, minus_conj_psi1;
467  double minus_zeta1;
468  int i;
469 
470  // b_alg = FLA_Obj_length( T );
471  int b_alg = m_T;
472 
473  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
474  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
475  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
476  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &v );
477  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
478  double* buff_u = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
479  double* buff_y = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
480  double* buff_z = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
481  double* buff_v = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
482  double* buff_w = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
483  int inc_u = 1;
484  int inc_y = 1;
485  int inc_z = 1;
486  int inc_v = 1;
487  int inc_w = 1;
488 
489  // Initialize some variables (only to prevent compiler warnings).
490  first_elem = *buff_0;
491  minus_inv_tau11 = *buff_0;
492 
493  for ( i = 0; i < b_alg; ++i )
494  {
495  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
496  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
497  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
498  double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
499  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
500  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
501 
502  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
503  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
504 
505  double* upsilon1 = buff_u + (i )*inc_u;
506  double* u2 = buff_u + (i+1)*inc_u;
507 
508  double* y0 = buff_y + (0 )*inc_y;
509  double* psi1 = buff_y + (i )*inc_y;
510  double* y2 = buff_y + (i+1)*inc_y;
511 
512  double* zeta1 = buff_z + (i )*inc_z;
513  double* z2 = buff_z + (i+1)*inc_z;
514 
515  double* v2 = buff_v + (i+1)*inc_v;
516 
517  double* w2 = buff_w + (i+1)*inc_w;
518 
519  double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
520  double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
521 
522  int m_ahead = m_A - i - 1;
523  int n_ahead = m_A - i - 1;
524  int m_behind = i;
525  int n_behind = i;
526 
527  /*------------------------------------------------------------*/
528 
529  if ( m_behind > 0 )
530  {
531  // FLA_Copy( upsilon1, minus_upsilon1 );
532  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon1 );
533  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, minus_conj_upsilon1 );
534  bl1_dmult3( buff_m1, upsilon1, &minus_upsilon1 );
535  bl1_dcopyconj( &minus_upsilon1, &minus_conj_upsilon1 );
536 
537  // FLA_Copy( psi1, minus_psi1 );
538  // FLA_Scal( FLA_MINUS_ONE, minus_psi1 );
539  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_psi1, minus_conj_psi1 );
540  bl1_dmult3( buff_m1, psi1, &minus_psi1 );
541  bl1_dcopyconj( &minus_psi1, &minus_conj_psi1 );
542 
543  // FLA_Copy( zeta1, minus_zeta1 );
544  // FLA_Scal( FLA_MINUS_ONE, minus_zeta1 );
545  bl1_dmult3( buff_m1, zeta1, &minus_zeta1 );
546 
547  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, psi1, alpha11 );
548  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta1, upsilon1, alpha11 );
550  1,
551  &minus_upsilon1,
552  psi1, 1,
553  alpha11, 1 );
555  1,
556  &minus_zeta1,
557  upsilon1, 1,
558  alpha11, 1 );
559 
560  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon1, y2, a12t );
561  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta1, u2, a12t );
563  m_ahead,
564  &minus_upsilon1,
565  y2, inc_y,
566  a12t, cs_A );
568  m_ahead,
569  &minus_zeta1,
570  u2, inc_u,
571  a12t, cs_A );
572 
573  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi1, u2, a21 );
574  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon1, z2, a21 );
576  m_ahead,
577  &minus_conj_psi1,
578  u2, inc_u,
579  a21, rs_A );
581  m_ahead,
582  &minus_conj_upsilon1,
583  z2, inc_z,
584  a21, rs_A );
585  }
586 
587  if ( m_ahead > 0 )
588  {
589  // FLA_Househ2_UT( FLA_LEFT,
590  // a21_t,
591  // a21_b, tau11 );
592  FLA_Househ2_UT_l_opd( m_ahead - 1,
593  a21_t,
594  a21_b, rs_A,
595  tau11 );
596 
597  // FLA_Set( FLA_ONE, inv_tau11 );
598  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
599  // FLA_Copy( inv_tau11, minus_inv_tau11 );
600  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
601  bl1_ddiv3( buff_1, tau11, &inv_tau11 );
602  bl1_dneg2( &inv_tau11, &minus_inv_tau11 );
603 
604  // FLA_Copy( a21_t, first_elem );
605  // FLA_Set( FLA_ONE, a21_t );
606  first_elem = *a21_t;
607  *a21_t = *buff_1;
608  }
609 
610  if ( m_behind > 0 )
611  {
612  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
613  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
616  m_ahead,
617  n_ahead,
618  buff_m1,
619  u2, inc_u,
620  y2, inc_y,
621  A22, rs_A, cs_A );
624  m_ahead,
625  n_ahead,
626  buff_m1,
627  z2, inc_z,
628  u2, inc_u,
629  A22, rs_A, cs_A );
630  }
631 
632  if ( m_ahead > 0 )
633  {
634  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
637  m_ahead,
638  n_ahead,
639  buff_1,
640  A22, rs_A, cs_A,
641  a21, rs_A,
642  buff_0,
643  v2, inc_v );
644 
645  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
648  m_ahead,
649  n_ahead,
650  buff_1,
651  A22, rs_A, cs_A,
652  a21, rs_A,
653  buff_0,
654  w2, inc_w );
655 
656  // FLA_Copy( a21, u2 );
657  // FLA_Copy( v2, y2 );
658  // FLA_Copy( w2, z2 );
660  m_ahead,
661  a21, rs_A,
662  u2, inc_u );
664  m_ahead,
665  v2, inc_v,
666  y2, inc_y );
668  m_ahead,
669  w2, inc_w,
670  z2, inc_z );
671 
672  // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
673  // FLA_Inv_scal( FLA_TWO, beta );
674  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
676  m_ahead,
677  a21, rs_A,
678  z2, inc_z,
679  &beta );
680  bl1_dinvscals( buff_2, &beta );
681  bl1_dcopyconj( &beta, &conj_beta );
682 
683  // FLA_Scal( minus_inv_tau11, conj_beta );
684  // FLA_Axpy( conj_beta, a21, y2 );
685  // FLA_Scal( inv_tau11, y2 );
686  bl1_dscals( &minus_inv_tau11, &conj_beta );
688  m_ahead,
689  &conj_beta,
690  a21, rs_A,
691  y2, inc_y );
693  m_ahead,
694  &inv_tau11,
695  y2, inc_y );
696 
697  // FLA_Scal( minus_inv_tau11, beta );
698  // FLA_Axpy( beta, a21, z2 );
699  // FLA_Scal( inv_tau11, z2 );
700  bl1_dscals( &minus_inv_tau11, &beta );
702  m_ahead,
703  &beta,
704  a21, rs_A,
705  z2, inc_z );
707  m_ahead,
708  &inv_tau11,
709  z2, inc_z );
710 
711  // FLA_Dot( a12t, a21, dot_product );
712  // FLA_Scal( minus_inv_tau11, dot_product );
713  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
715  m_ahead,
716  a12t, cs_A,
717  a21, rs_A,
718  &dot_product );
719  bl1_dscals( &minus_inv_tau11, &dot_product );
721  m_ahead,
722  &dot_product,
723  a21, rs_A,
724  a12t, cs_A );
725 
726  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
727  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
730  m_behind,
731  n_ahead,
732  buff_1,
733  A02, rs_A, cs_A,
734  a21, rs_A,
735  buff_0,
736  y0, inc_y );
739  m_behind,
740  n_ahead,
741  &minus_inv_tau11,
742  y0, inc_y,
743  a21, rs_A,
744  A02, rs_A, cs_A );
745 
746  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
749  m_ahead,
750  n_behind,
751  buff_1,
752  A20, rs_A, cs_A,
753  a21, rs_A,
754  buff_0,
755  t01, rs_T );
756 
757  // FLA_Copy( first_elem, a21_t );
758  *a21_t = first_elem;
759  }
760 
761  if ( m_behind + 1 == b_alg && m_ahead > 0 )
762  {
763  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
764  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
767  m_ahead,
768  n_ahead,
769  buff_m1,
770  u2, inc_u,
771  y2, inc_y,
772  A22, rs_A, cs_A );
775  m_ahead,
776  n_ahead,
777  buff_m1,
778  z2, inc_z,
779  u2, inc_u,
780  A22, rs_A, cs_A );
781  }
782 
783  /*------------------------------------------------------------*/
784 
785  }
786 
787  // FLA_Obj_free( &u );
788  // FLA_Obj_free( &y );
789  // FLA_Obj_free( &z );
790  // FLA_Obj_free( &v );
791  // FLA_Obj_free( &w );
792  FLA_free( buff_u );
793  FLA_free( buff_y );
794  FLA_free( buff_z );
795  FLA_free( buff_v );
796  FLA_free( buff_w );
797 
798  return FLA_SUCCESS;
799 }

References bl1_daxpyv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dger(), bl1_dscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, i, psi1, upsilon1, and zeta1.

Referenced by FLA_Hess_UT_step_opt_var3().

◆ FLA_Hess_UT_step_opd_var4()

FLA_Error FLA_Hess_UT_step_opd_var4 ( int  m_A,
int  m_T,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_Y,
int  rs_Y,
int  cs_Y,
double *  buff_Z,
int  rs_Z,
int  cs_Z,
double *  buff_T,
int  rs_T,
int  cs_T 
)
498 {
499  double* buff_2 = FLA_DOUBLE_PTR( FLA_TWO );
500  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
501  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
502  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
503 
504  double first_elem, last_elem;
505  double dot_product;
506  double beta, conj_beta;
507  double inv_tau11;
508  double minus_inv_tau11;
509  int i;
510 
511  // b_alg = FLA_Obj_length( T );
512  int b_alg = m_T;
513 
514  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
515  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &e );
516  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
517  double* buff_d = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
518  double* buff_e = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
519  double* buff_f = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
520  int inc_d = 1;
521  int inc_e = 1;
522  int inc_f = 1;
523 
524  // FLA_Set( FLA_ZERO, Y );
525  // FLA_Set( FLA_ZERO, Z );
526  bl1_dsetm( m_A,
527  b_alg,
528  buff_0,
529  buff_Y, rs_Y, cs_Y );
530  bl1_dsetm( m_A,
531  b_alg,
532  buff_0,
533  buff_Z, rs_Z, cs_Z );
534 
535  for ( i = 0; i < b_alg; ++i )
536  {
537  double* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
538  double* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
539  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
540  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
541  double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
542  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
543  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
544 
545  double* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
546  double* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
547  double* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
548 
549  double* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
550  double* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
551  double* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
552 
553  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
554  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
555 
556  double* d0 = buff_d + (0 )*inc_d;
557 
558  double* e0 = buff_e + (0 )*inc_e;
559 
560  double* f0 = buff_f + (0 )*inc_f;
561 
562  double* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
563 
564  double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
565  double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
566 
567  double* ABL = a10t;
568  double* ZBL = z10t;
569 
570  double* a2 = alpha11;
571 
572  int m_ahead = m_A - i - 1;
573  int n_ahead = m_A - i - 1;
574  int m_behind = i;
575  int n_behind = i;
576 
577  /*------------------------------------------------------------*/
578 
579  if ( m_behind > 0 )
580  {
581  // FLA_Copy( a10t_r, last_elem );
582  // FLA_Set( FLA_ONE, a10t_r );
583  last_elem = *a10t_r;
584  *a10t_r = *buff_1;
585  }
586 
587  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
588  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
591  m_ahead + 1,
592  n_behind,
593  buff_m1,
594  ABL, rs_A, cs_A,
595  y10t, cs_Y,
596  buff_1,
597  a2, rs_A );
600  m_ahead + 1,
601  n_behind,
602  buff_m1,
603  ZBL, rs_Z, cs_Z,
604  a10t, cs_A,
605  buff_1,
606  a2, rs_A );
607 
608  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
609  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, A20, z10t, FLA_ONE, a12t );
612  m_ahead,
613  n_behind,
614  buff_m1,
615  Y20, rs_Y, cs_Y,
616  a10t, cs_A,
617  buff_1,
618  a12t, cs_A );
621  m_ahead,
622  n_behind,
623  buff_m1,
624  A20, rs_A, cs_A,
625  z10t, cs_Z,
626  buff_1,
627  a12t, cs_A );
628 
629  if ( m_behind > 0 )
630  {
631  // FLA_Copy( last_elem, a10t_r );
632  *a10t_r = last_elem;
633  }
634 
635  if ( m_ahead > 0 )
636  {
637  // FLA_Househ2_UT( FLA_LEFT,
638  // a21_t,
639  // a21_b, tau11 );
640  FLA_Househ2_UT_l_opd( m_ahead - 1,
641  a21_t,
642  a21_b, rs_A,
643  tau11 );
644 
645  // FLA_Set( FLA_ONE, inv_tau11 );
646  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
647  // FLA_Copy( inv_tau11, minus_inv_tau11 );
648  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
649  bl1_ddiv3( buff_1, tau11, &inv_tau11 );
650  bl1_dneg2( &inv_tau11, &minus_inv_tau11 );
651 
652  // FLA_Copy( a21_t, first_elem );
653  // FLA_Set( FLA_ONE, a21_t );
654  first_elem = *a21_t;
655  *a21_t = *buff_1;
656 
657  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y21 );
660  m_ahead,
661  n_ahead,
662  buff_1,
663  A22, rs_A, cs_A,
664  a21, rs_A,
665  buff_0,
666  y21, rs_Y );
667 
668  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z21 );
671  m_ahead,
672  n_ahead,
673  buff_1,
674  A22, rs_A, cs_A,
675  a21, rs_A,
676  buff_0,
677  z21, rs_Z );
678 
679  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d0 );
680  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, a21, FLA_ZERO, e0 );
681  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f0 );
684  m_ahead,
685  n_behind,
686  buff_1,
687  A20, rs_A, cs_A,
688  a21, rs_A,
689  buff_0,
690  d0, inc_d );
693  m_ahead,
694  n_behind,
695  buff_1,
696  Y20, rs_Y, cs_Y,
697  a21, rs_A,
698  buff_0,
699  e0, inc_e );
702  m_ahead,
703  n_behind,
704  buff_1,
705  Z20, rs_Z, cs_Z,
706  a21, rs_A,
707  buff_0,
708  f0, inc_f );
709 
710  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
711  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, y21 );
714  m_ahead,
715  n_behind,
716  buff_m1,
717  Y20, rs_Y, cs_Y,
718  d0, inc_d,
719  buff_1,
720  y21, rs_Y );
723  m_ahead,
724  n_behind,
725  buff_m1,
726  A20, rs_A, cs_A,
727  f0, inc_f,
728  buff_1,
729  y21, rs_Y );
730 
731  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, e0, FLA_ONE, z21 );
732  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d0, FLA_ONE, z21 );
735  m_ahead,
736  n_behind,
737  buff_m1,
738  A20, rs_A, cs_A,
739  e0, inc_e,
740  buff_1,
741  z21, rs_Z );
744  m_ahead,
745  n_behind,
746  buff_m1,
747  Z20, rs_Z, cs_Z,
748  d0, inc_d,
749  buff_1,
750  z21, rs_Z );
751 
752  // FLA_Copy( d0, t01 );
754  n_behind,
755  d0, inc_d,
756  t01, rs_T );
757 
758  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
759  // FLA_Inv_scal( FLA_TWO, beta );
760  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
762  m_ahead,
763  a21, rs_A,
764  z21, rs_Z,
765  &beta );
766  bl1_dinvscals( buff_2, &beta );
767  bl1_dcopyconj( &beta, &conj_beta );
768 
769  // FLA_Scal( minus_inv_tau11, conj_beta );
770  // FLA_Axpy( conj_beta, a21, y21 );
771  // FLA_Scal( inv_tau11, y21 );
772  bl1_dscals( &minus_inv_tau11, &conj_beta );
774  m_ahead,
775  &conj_beta,
776  a21, rs_A,
777  y21, rs_Y );
779  m_ahead,
780  &inv_tau11,
781  y21, rs_Y );
782 
783  // FLA_Scal( minus_inv_tau11, beta );
784  // FLA_Axpy( beta, a21, z21 );
785  // FLA_Scal( inv_tau11, z21 );
786  bl1_dscals( &minus_inv_tau11, &beta );
788  m_ahead,
789  &beta,
790  a21, rs_A,
791  z21, rs_Z );
793  m_ahead,
794  &inv_tau11,
795  z21, rs_Z );
796 
797  // FLA_Dot( a12t, a21, dot_product );
798  // FLA_Scal( minus_inv_tau11, dot_product );
799  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
801  m_ahead,
802  a12t, cs_A,
803  a21, rs_A,
804  &dot_product );
805  bl1_dscals( &minus_inv_tau11, &dot_product );
807  m_ahead,
808  &dot_product,
809  a21, rs_A,
810  a12t, cs_A );
811 
812  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, e0 );
813  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, e0, a21, A02 );
816  m_behind,
817  n_ahead,
818  buff_1,
819  A02, rs_A, cs_A,
820  a21, rs_A,
821  buff_0,
822  e0, inc_e );
825  m_behind,
826  n_ahead,
827  &minus_inv_tau11,
828  e0, inc_e,
829  a21, rs_A,
830  A02, rs_A, cs_A );
831 
832  // FLA_Copy( first_elem, a21_t );
833  *a21_t = first_elem;
834  }
835 
836  /*------------------------------------------------------------*/
837 
838  }
839 
840  // FLA_Obj_free( &d );
841  // FLA_Obj_free( &e );
842  // FLA_Obj_free( &f );
843  FLA_free( buff_d );
844  FLA_free( buff_e );
845  FLA_free( buff_f );
846 
847  return FLA_SUCCESS;
848 }

References bl1_daxpyv(), bl1_dcopyv(), bl1_ddot(), bl1_dgemv(), bl1_dger(), bl1_dscalv(), bl1_dsetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var4().

◆ FLA_Hess_UT_step_opd_var5()

FLA_Error FLA_Hess_UT_step_opd_var5 ( int  m_A,
int  m_T,
double *  buff_A,
int  rs_A,
int  cs_A,
double *  buff_U,
int  rs_U,
int  cs_U,
double *  buff_Z,
int  rs_Z,
int  cs_Z,
double *  buff_T,
int  rs_T,
int  cs_T 
)
391 {
392  double* buff_1 = FLA_DOUBLE_PTR( FLA_ONE );
393  double* buff_0 = FLA_DOUBLE_PTR( FLA_ZERO );
394  double* buff_m1 = FLA_DOUBLE_PTR( FLA_MINUS_ONE );
395  int i;
396 
397  // b_alg = FLA_Obj_length( T );
398  int b_alg = m_T;
399 
400  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
401  double* buff_w = ( double* ) FLA_malloc( m_A * sizeof( *buff_A ) );
402  int inc_w = 1;
403 
404  // FLA_Set( FLA_ZERO, U );
405  // FLA_Set( FLA_ZERO, Z );
406  bl1_dsetm( m_A,
407  b_alg,
408  buff_0,
409  buff_U, rs_U, cs_U );
410  bl1_dsetm( m_A,
411  b_alg,
412  buff_0,
413  buff_Z, rs_Z, cs_Z );
414 
415  for ( i = 0; i < b_alg; ++i )
416  {
417  double* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
418  double* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
419  double* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
420  double* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
421  double* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
422  double* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
423 
424  double* U00 = buff_U + (0 )*cs_U + (0 )*rs_U;
425  double* u10t = buff_U + (0 )*cs_U + (i )*rs_U;
426  double* U20 = buff_U + (0 )*cs_U + (i+1)*rs_U;
427  double* u21 = buff_U + (i )*cs_U + (i+1)*rs_U;
428 
429  double* Z00 = buff_Z + (0 )*cs_Z + (0 )*rs_Z;
430  double* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
431  double* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
432  double* z01 = buff_Z + (i )*cs_Z + (0 )*rs_Z;
433  double* zeta11 = buff_Z + (i )*cs_Z + (i )*rs_Z;
434  double* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
435 
436  double* T00 = buff_T + (0 )*cs_T + (0 )*rs_T;
437  double* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
438  double* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
439 
440  double* w0 = buff_w + (0 )*inc_w;
441 
442  double* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
443  double* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
444 
445  double* u21_t = u21 + (0 )*cs_U + (0 )*rs_U;
446 
447  int m_ahead = m_A - i - 1;
448  int n_ahead = m_A - i - 1;
449  int m_behind = i;
450  int n_behind = i;
451 
452  /*------------------------------------------------------------*/
453 
454  if ( m_behind > 0 )
455  {
456  // FLA_Copyt( FLA_CONJ_TRANSPOSE, u10t, w0 );
457  // FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
458  // T00, w0 );
460  m_behind,
461  u10t, cs_U,
462  w0, inc_w );
466  m_behind,
467  T00, rs_T, cs_T,
468  w0, inc_w );
469 
470  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z00, w0, FLA_ONE, a01 );
471  // FLA_Dots( FLA_MINUS_ONE, z10t, w0, FLA_ONE, alpha11 );
472  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, w0, FLA_ONE, a21 );
475  m_behind,
476  n_behind,
477  buff_m1,
478  Z00, rs_Z, cs_Z,
479  w0, inc_w,
480  buff_1,
481  a01, rs_A );
483  m_behind,
484  buff_m1,
485  z10t, cs_Z,
486  w0, inc_w,
487  buff_1,
488  alpha11 );
491  m_ahead,
492  n_behind,
493  buff_m1,
494  Z20, rs_Z, cs_Z,
495  w0, inc_w,
496  buff_1,
497  a21, rs_A );
498 
499  // FLA_Trmvsx( FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
500  // FLA_ONE, U00, a01, FLA_ZERO, w0 );
501  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, alpha11, u10t, w0 );
502  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, U20, a21, FLA_ONE, w0 );
504  m_behind,
505  a01, rs_A,
506  w0, inc_w );
510  m_behind,
511  U00, rs_U, cs_U,
512  w0, inc_w );
514  m_behind,
515  alpha11,
516  u10t, cs_U,
517  w0, inc_w );
520  m_ahead,
521  n_behind,
522  buff_1,
523  U20, rs_U, cs_U,
524  a21, rs_A,
525  buff_1,
526  w0, inc_w );
527 
528  // FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
529  // T00, w0 );
533  m_behind,
534  T00, rs_T, cs_T,
535  w0, inc_w );
536 
537  // FLA_Trmvsx( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
538  // FLA_MINUS_ONE, U00, w0, FLA_ONE, a01 );
539  // FLA_Dots( FLA_MINUS_ONE, u10t, w0, FLA_ONE, alpha11 );
540  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, U20, w0, FLA_ONE, a21 );
544  m_behind,
545  buff_m1,
546  U00, rs_U, cs_U,
547  w0, inc_w,
548  buff_1,
549  a01, rs_A );
551  m_behind,
552  buff_m1,
553  u10t, cs_U,
554  w0, inc_w,
555  buff_1,
556  alpha11 );
559  m_ahead,
560  n_behind,
561  buff_m1,
562  U20, rs_U, cs_U,
563  w0, inc_w,
564  buff_1,
565  a21, rs_A );
566  }
567 
568  if ( m_ahead > 0 )
569  {
570  // FLA_Househ2_UT( FLA_LEFT,
571  // a21_t,
572  // a21_b, tau11 );
573  FLA_Househ2_UT_l_opd( m_ahead - 1,
574  a21_t,
575  a21_b, rs_A,
576  tau11 );
577 
578  // FLA_Copy( a21, u21 );
580  m_ahead,
581  a21, rs_A,
582  u21, rs_U );
583 
584  // FLA_Set( FLA_ONE, u21_t );
585  *u21_t = *buff_1;
586 
587  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, u21, FLA_ZERO, z01 );
588  // FLA_Dot( a12t, u21, zeta11 );
589  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, u21, FLA_ZERO, z21 );
592  m_behind,
593  n_ahead,
594  buff_1,
595  A02, rs_A, cs_A,
596  u21, rs_U,
597  buff_0,
598  z01, rs_Z );
600  m_ahead,
601  a12t, cs_A,
602  u21, rs_U,
603  zeta11 );
606  m_ahead,
607  n_ahead,
608  buff_1,
609  A22, rs_A, cs_A,
610  u21, rs_U,
611  buff_0,
612  z21, rs_Z );
613 
614  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, U20, u21, FLA_ZERO, t01 );
617  m_ahead,
618  n_behind,
619  buff_1,
620  U20, rs_U, cs_U,
621  u21, rs_U,
622  buff_0,
623  t01, rs_T );
624  }
625 
626  /*------------------------------------------------------------*/
627 
628  }
629 
630  // FLA_Obj_free( &w );
631  FLA_free( buff_w );
632 
633  return FLA_SUCCESS;
634 }
void bl1_ddots(conj1_t conj, int n, double *alpha, double *x, int incx, double *y, int incy, double *beta, double *rho)
Definition: bl1_dots.c:26
void bl1_dtrmv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, double *a, int a_rs, int a_cs, double *x, int incx)
Definition: bl1_trmv.c:56
void bl1_dtrmvsx(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, double *alpha, double *a, int a_rs, int a_cs, double *x, int incx, double *beta, double *y, int incy)
Definition: bl1_trmvsx.c:71
void bl1_dtrsv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, double *a, int a_rs, int a_cs, double *x, int incx)
Definition: bl1_trsv.c:56

References bl1_daxpyv(), bl1_dcopyv(), bl1_ddot(), bl1_ddots(), bl1_dgemv(), bl1_dsetm(), bl1_dtrmv(), bl1_dtrmvsx(), bl1_dtrsv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_NONUNIT_DIAG, BLIS1_UPPER_TRIANGULAR, FLA_free(), FLA_Househ2_UT_l_opd(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var5().

◆ FLA_Hess_UT_step_ops_var1()

FLA_Error FLA_Hess_UT_step_ops_var1 ( int  m_A,
int  m_T,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_T,
int  rs_T,
int  cs_T 
)
101 {
102  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
103  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
104 
105  float first_elem;
106  int i;
107 
108  // b_alg = FLA_Obj_length( T );
109  int b_alg = m_T;
110 
111  for ( i = 0; i < b_alg; ++i )
112  {
113  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
114  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
115 
116  float* a21_t = buff_A + (i )*cs_A + (i+1)*rs_A;
117  float* a21_b = buff_A + (i )*cs_A + (i+2)*rs_A;
118 
119  float* A22_t = buff_A + (i+1)*cs_A + (i+1)*rs_A;
120  float* A22_b = buff_A + (i+1)*cs_A + (i+2)*rs_A;
121 
122  float* A2_l = buff_A + (i+1)*cs_A + (0 )*rs_A;
123  float* A2_r = buff_A + (i+2)*cs_A + (0 )*rs_A;
124 
125  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
126  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
127 
128  int m_ahead = m_A - i - 1;
129  int n_ahead = m_A - i - 1;
130  int n_behind = i;
131 
132  /*------------------------------------------------------------*/
133 
134  if ( m_ahead > 0 )
135  {
136  // FLA_Househ2_UT( FLA_LEFT,
137  // a21_t,
138  // a21_b, tau11 );
139  FLA_Househ2_UT_l_ops( m_ahead - 1,
140  a21_t,
141  a21_b, rs_A,
142  tau11 );
143 
144  // FLA_Copy( a21_t, first_elem );
145  // FLA_Set( FLA_ONE, a21_t );
146  first_elem = *a21_t;
147  *a21_t = *buff_1;
148 
149  // FLA_Apply_H2_UT( FLA_LEFT, tau11, a21_b, A22_t,
150  // A22_b );
151  FLA_Apply_H2_UT_l_ops_var1( m_ahead - 1,
152  n_ahead,
153  tau11,
154  a21_b, rs_A,
155  A22_t, cs_A,
156  A22_b, rs_A, cs_A );
157 
158  // FLA_Apply_H2_UT( FLA_RIGHT, tau11, a21_b, A2_l, A2_r );
160  n_ahead - 1,
161  tau11,
162  a21_b, rs_A,
163  A2_l, rs_A,
164  A2_r, rs_A, cs_A );
165 
166  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
169  m_ahead,
170  n_behind,
171  buff_1,
172  A20, rs_A, cs_A,
173  a21, rs_A,
174  buff_0,
175  t01, rs_T );
176 
177  // FLA_Copy( first_elem, a21_t );
178  *a21_t = first_elem;
179  }
180 
181  /*------------------------------------------------------------*/
182 
183  }
184 
185  return FLA_SUCCESS;
186 }
FLA_Error FLA_Apply_H2_UT_l_ops_var1(int m_u2_A2, int n_a1t, float *tau, float *u2, int inc_u2, float *a1t, int inc_a1t, float *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_l_opt_var1.c:121
FLA_Error FLA_Apply_H2_UT_r_ops_var1(int n_u2h_A2, int m_a1, float *tau, float *u2h, int inc_u2h, float *a1, int inc_a1, float *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_r_opt_var1.c:108

References bl1_sgemv(), BLIS1_CONJ_TRANSPOSE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_ops_var1(), FLA_Apply_H2_UT_r_ops_var1(), FLA_Househ2_UT_l_ops(), FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var1().

◆ FLA_Hess_UT_step_ops_var2()

FLA_Error FLA_Hess_UT_step_ops_var2 ( int  m_A,
int  m_T,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_T,
int  rs_T,
int  cs_T 
)
101 {
102  float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
103  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
104  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
105  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
106 
107  float first_elem;
108  float dot_product;
109  float beta, conj_beta;
110  float inv_tau11;
111  float minus_inv_tau11;
112  int i;
113 
114  // b_alg = FLA_Obj_length( T );
115  int b_alg = m_T;
116 
117  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
118  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
119  float* buff_y = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
120  float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
121  int inc_y = 1;
122  int inc_z = 1;
123 
124  for ( i = 0; i < b_alg; ++i )
125  {
126  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
127  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
128  float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
129  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
130  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
131 
132  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
133  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
134 
135  float* y0 = buff_y + (0 )*inc_y;
136  float* y2 = buff_y + (i+1)*inc_y;
137 
138  float* z2 = buff_z + (i+1)*inc_z;
139 
140  float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
141  float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
142 
143  int m_ahead = m_A - i - 1;
144  int n_ahead = m_A - i - 1;
145  int m_behind = i;
146  int n_behind = i;
147 
148  /*------------------------------------------------------------*/
149 
150  if ( m_ahead > 0 )
151  {
152  // FLA_Househ2_UT( FLA_LEFT,
153  // a21_t,
154  // a21_b, tau11 );
155  FLA_Househ2_UT_l_ops( m_ahead - 1,
156  a21_t,
157  a21_b, rs_A,
158  tau11 );
159 
160  // FLA_Set( FLA_ONE, inv_tau11 );
161  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
162  // FLA_Copy( inv_tau11, minus_inv_tau11 );
163  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
164  bl1_sdiv3( buff_1, tau11, &inv_tau11 );
165  bl1_sneg2( &inv_tau11, &minus_inv_tau11 );
166 
167  // FLA_Copy( a21_t, first_elem );
168  // FLA_Set( FLA_ONE, a21_t );
169  first_elem = *a21_t;
170  *a21_t = *buff_1;
171 
172  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y2 );
175  m_ahead,
176  n_ahead,
177  buff_1,
178  A22, rs_A, cs_A,
179  a21, rs_A,
180  buff_0,
181  y2, inc_y );
182 
183  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z2 );
186  m_ahead,
187  n_ahead,
188  buff_1,
189  A22, rs_A, cs_A,
190  a21, rs_A,
191  buff_0,
192  z2, inc_z );
193 
194  // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
195  // FLA_Inv_scal( FLA_TWO, beta );
196  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
198  m_ahead,
199  a21, rs_A,
200  z2, inc_z,
201  &beta );
202  bl1_sinvscals( buff_2, &beta );
203  bl1_scopyconj( &beta, &conj_beta );
204 
205  // FLA_Scal( minus_inv_tau11, conj_beta );
206  // FLA_Axpy( conj_beta, a21, y2 );
207  // FLA_Scal( inv_tau11, y2 );
208  bl1_sscals( &minus_inv_tau11, &conj_beta );
210  m_ahead,
211  &conj_beta,
212  a21, rs_A,
213  y2, inc_y );
215  m_ahead,
216  &inv_tau11,
217  y2, inc_y );
218 
219  // FLA_Scal( minus_inv_tau11, beta );
220  // FLA_Axpy( beta, a21, z2 );
221  // FLA_Scal( inv_tau11, z2 );
222  bl1_sscals( &minus_inv_tau11, &beta );
224  m_ahead,
225  &beta,
226  a21, rs_A,
227  z2, inc_z );
229  m_ahead,
230  &inv_tau11,
231  z2, inc_z );
232 
233  // FLA_Dot( a12t, a21, dot_product );
234  // FLA_Scal( minus_inv_tau11, dot_product );
235  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
237  m_ahead,
238  a12t, cs_A,
239  a21, rs_A,
240  &dot_product );
241  bl1_sscals( &minus_inv_tau11, &dot_product );
243  m_ahead,
244  &dot_product,
245  a21, rs_A,
246  a12t, cs_A );
247 
248  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
249  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
252  m_behind,
253  n_ahead,
254  buff_1,
255  A02, rs_A, cs_A,
256  a21, rs_A,
257  buff_0,
258  y0, inc_y );
261  m_behind,
262  n_ahead,
263  &minus_inv_tau11,
264  y0, inc_y,
265  a21, rs_A,
266  A02, rs_A, cs_A );
267 
268  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y2, A22 );
269  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, a21, A22 );
272  m_ahead,
273  n_ahead,
274  buff_m1,
275  a21, rs_A,
276  y2, inc_y,
277  A22, rs_A, cs_A );
280  m_ahead,
281  n_ahead,
282  buff_m1,
283  z2, inc_z,
284  a21, rs_A,
285  A22, rs_A, cs_A );
286 
287  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
290  m_ahead,
291  n_behind,
292  buff_1,
293  A20, rs_A, cs_A,
294  a21, rs_A,
295  buff_0,
296  t01, rs_T );
297 
298  // FLA_Copy( first_elem, a21_t );
299  *a21_t = first_elem;
300  }
301 
302  /*------------------------------------------------------------*/
303 
304  }
305 
306  // FLA_Obj_free( &y );
307  // FLA_Obj_free( &z );
308  FLA_free( buff_y );
309  FLA_free( buff_z );
310 
311  return FLA_SUCCESS;
312 }

References bl1_saxpyv(), bl1_sdot(), bl1_sgemv(), bl1_sger(), bl1_sscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var2().

◆ FLA_Hess_UT_step_ops_var3()

FLA_Error FLA_Hess_UT_step_ops_var3 ( int  m_A,
int  m_T,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_T,
int  rs_T,
int  cs_T 
)
101 {
102  float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
103  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
104  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
105  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
106 
107  float first_elem;
108  float dot_product;
109  float beta, conj_beta;
110  float inv_tau11;
111  float minus_inv_tau11;
112  float minus_upsilon1, minus_conj_upsilon1;
113  float minus_psi1, minus_conj_psi1;
114  float minus_zeta1;
115  int i;
116 
117  // b_alg = FLA_Obj_length( T );
118  int b_alg = m_T;
119 
120  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
121  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
122  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
123  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &v );
124  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
125  float* buff_u = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
126  float* buff_y = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
127  float* buff_z = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
128  float* buff_v = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
129  float* buff_w = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
130  int inc_u = 1;
131  int inc_y = 1;
132  int inc_z = 1;
133  int inc_v = 1;
134  int inc_w = 1;
135 
136  // Initialize some variables (only to prevent compiler warnings).
137  first_elem = *buff_0;
138  minus_inv_tau11 = *buff_0;
139 
140  for ( i = 0; i < b_alg; ++i )
141  {
142  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
143  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
144  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
145  float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
146  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
147  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
148 
149  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
150  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
151 
152  float* upsilon1 = buff_u + (i )*inc_u;
153  float* u2 = buff_u + (i+1)*inc_u;
154 
155  float* y0 = buff_y + (0 )*inc_y;
156  float* psi1 = buff_y + (i )*inc_y;
157  float* y2 = buff_y + (i+1)*inc_y;
158 
159  float* zeta1 = buff_z + (i )*inc_z;
160  float* z2 = buff_z + (i+1)*inc_z;
161 
162  float* v2 = buff_v + (i+1)*inc_v;
163 
164  float* w2 = buff_w + (i+1)*inc_w;
165 
166  float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
167  float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
168 
169  int m_ahead = m_A - i - 1;
170  int n_ahead = m_A - i - 1;
171  int m_behind = i;
172  int n_behind = i;
173 
174  /*------------------------------------------------------------*/
175 
176  if ( m_behind > 0 )
177  {
178  // FLA_Copy( upsilon1, minus_upsilon1 );
179  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon1 );
180  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, minus_conj_upsilon1 );
181  bl1_smult3( buff_m1, upsilon1, &minus_upsilon1 );
182  bl1_scopyconj( &minus_upsilon1, &minus_conj_upsilon1 );
183 
184  // FLA_Copy( psi1, minus_psi1 );
185  // FLA_Scal( FLA_MINUS_ONE, minus_psi1 );
186  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_psi1, minus_conj_psi1 );
187  bl1_smult3( buff_m1, psi1, &minus_psi1 );
188  bl1_scopyconj( &minus_psi1, &minus_conj_psi1 );
189 
190  // FLA_Copy( zeta1, minus_zeta1 );
191  // FLA_Scal( FLA_MINUS_ONE, minus_zeta1 );
192  bl1_smult3( buff_m1, zeta1, &minus_zeta1 );
193 
194  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, psi1, alpha11 );
195  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta1, upsilon1, alpha11 );
197  1,
198  &minus_upsilon1,
199  psi1, 1,
200  alpha11, 1 );
202  1,
203  &minus_zeta1,
204  upsilon1, 1,
205  alpha11, 1 );
206 
207  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon1, y2, a12t );
208  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta1, u2, a12t );
210  m_ahead,
211  &minus_upsilon1,
212  y2, inc_y,
213  a12t, cs_A );
215  m_ahead,
216  &minus_zeta1,
217  u2, inc_u,
218  a12t, cs_A );
219 
220  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi1, u2, a21 );
221  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon1, z2, a21 );
223  m_ahead,
224  &minus_conj_psi1,
225  u2, inc_u,
226  a21, rs_A );
228  m_ahead,
229  &minus_conj_upsilon1,
230  z2, inc_z,
231  a21, rs_A );
232  }
233 
234  if ( m_ahead > 0 )
235  {
236  // FLA_Househ2_UT( FLA_LEFT,
237  // a21_t,
238  // a21_b, tau11 );
239  FLA_Househ2_UT_l_ops( m_ahead - 1,
240  a21_t,
241  a21_b, rs_A,
242  tau11 );
243 
244  // FLA_Set( FLA_ONE, inv_tau11 );
245  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
246  // FLA_Copy( inv_tau11, minus_inv_tau11 );
247  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
248  bl1_sdiv3( buff_1, tau11, &inv_tau11 );
249  bl1_sneg2( &inv_tau11, &minus_inv_tau11 );
250 
251  // FLA_Copy( a21_t, first_elem );
252  // FLA_Set( FLA_ONE, a21_t );
253  first_elem = *a21_t;
254  *a21_t = *buff_1;
255  }
256 
257  if ( m_behind > 0 )
258  {
259  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
260  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
263  m_ahead,
264  n_ahead,
265  buff_m1,
266  u2, inc_u,
267  y2, inc_y,
268  A22, rs_A, cs_A );
271  m_ahead,
272  n_ahead,
273  buff_m1,
274  z2, inc_z,
275  u2, inc_u,
276  A22, rs_A, cs_A );
277  }
278 
279  if ( m_ahead > 0 )
280  {
281  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
284  m_ahead,
285  n_ahead,
286  buff_1,
287  A22, rs_A, cs_A,
288  a21, rs_A,
289  buff_0,
290  v2, inc_v );
291 
292  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
295  m_ahead,
296  n_ahead,
297  buff_1,
298  A22, rs_A, cs_A,
299  a21, rs_A,
300  buff_0,
301  w2, inc_w );
302 
303  // FLA_Copy( a21, u2 );
304  // FLA_Copy( v2, y2 );
305  // FLA_Copy( w2, z2 );
307  m_ahead,
308  a21, rs_A,
309  u2, inc_u );
311  m_ahead,
312  v2, inc_v,
313  y2, inc_y );
315  m_ahead,
316  w2, inc_w,
317  z2, inc_z );
318 
319  // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
320  // FLA_Inv_scal( FLA_TWO, beta );
321  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
323  m_ahead,
324  a21, rs_A,
325  z2, inc_z,
326  &beta );
327  bl1_sinvscals( buff_2, &beta );
328  bl1_scopyconj( &beta, &conj_beta );
329 
330  // FLA_Scal( minus_inv_tau11, conj_beta );
331  // FLA_Axpy( conj_beta, a21, y2 );
332  // FLA_Scal( inv_tau11, y2 );
333  bl1_sscals( &minus_inv_tau11, &conj_beta );
335  m_ahead,
336  &conj_beta,
337  a21, rs_A,
338  y2, inc_y );
340  m_ahead,
341  &inv_tau11,
342  y2, inc_y );
343 
344  // FLA_Scal( minus_inv_tau11, beta );
345  // FLA_Axpy( beta, a21, z2 );
346  // FLA_Scal( inv_tau11, z2 );
347  bl1_sscals( &minus_inv_tau11, &beta );
349  m_ahead,
350  &beta,
351  a21, rs_A,
352  z2, inc_z );
354  m_ahead,
355  &inv_tau11,
356  z2, inc_z );
357 
358  // FLA_Dot( a12t, a21, dot_product );
359  // FLA_Scal( minus_inv_tau11, dot_product );
360  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
362  m_ahead,
363  a12t, cs_A,
364  a21, rs_A,
365  &dot_product );
366  bl1_sscals( &minus_inv_tau11, &dot_product );
368  m_ahead,
369  &dot_product,
370  a21, rs_A,
371  a12t, cs_A );
372 
373  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
374  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
377  m_behind,
378  n_ahead,
379  buff_1,
380  A02, rs_A, cs_A,
381  a21, rs_A,
382  buff_0,
383  y0, inc_y );
386  m_behind,
387  n_ahead,
388  &minus_inv_tau11,
389  y0, inc_y,
390  a21, rs_A,
391  A02, rs_A, cs_A );
392 
393  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
396  m_ahead,
397  n_behind,
398  buff_1,
399  A20, rs_A, cs_A,
400  a21, rs_A,
401  buff_0,
402  t01, rs_T );
403 
404  // FLA_Copy( first_elem, a21_t );
405  *a21_t = first_elem;
406  }
407 
408  if ( m_behind + 1 == b_alg && m_ahead > 0 )
409  {
410  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
411  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
414  m_ahead,
415  n_ahead,
416  buff_m1,
417  u2, inc_u,
418  y2, inc_y,
419  A22, rs_A, cs_A );
422  m_ahead,
423  n_ahead,
424  buff_m1,
425  z2, inc_z,
426  u2, inc_u,
427  A22, rs_A, cs_A );
428  }
429 
430  /*------------------------------------------------------------*/
431 
432  }
433 
434  // FLA_Obj_free( &u );
435  // FLA_Obj_free( &y );
436  // FLA_Obj_free( &z );
437  // FLA_Obj_free( &v );
438  // FLA_Obj_free( &w );
439  FLA_free( buff_u );
440  FLA_free( buff_y );
441  FLA_free( buff_z );
442  FLA_free( buff_v );
443  FLA_free( buff_w );
444 
445  return FLA_SUCCESS;
446 }

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sger(), bl1_sscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, i, psi1, upsilon1, and zeta1.

Referenced by FLA_Hess_UT_step_opt_var3().

◆ FLA_Hess_UT_step_ops_var4()

FLA_Error FLA_Hess_UT_step_ops_var4 ( int  m_A,
int  m_T,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_Y,
int  rs_Y,
int  cs_Y,
float *  buff_Z,
int  rs_Z,
int  cs_Z,
float *  buff_T,
int  rs_T,
int  cs_T 
)
138 {
139  float* buff_2 = FLA_FLOAT_PTR( FLA_TWO );
140  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
141  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
142  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
143 
144  float first_elem, last_elem;
145  float dot_product;
146  float beta, conj_beta;
147  float inv_tau11;
148  float minus_inv_tau11;
149  int i;
150 
151  // b_alg = FLA_Obj_length( T );
152  int b_alg = m_T;
153 
154  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
155  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &e );
156  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
157  float* buff_d = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
158  float* buff_e = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
159  float* buff_f = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
160  int inc_d = 1;
161  int inc_e = 1;
162  int inc_f = 1;
163 
164  // FLA_Set( FLA_ZERO, Y );
165  // FLA_Set( FLA_ZERO, Z );
166  bl1_ssetm( m_A,
167  b_alg,
168  buff_0,
169  buff_Y, rs_Y, cs_Y );
170  bl1_ssetm( m_A,
171  b_alg,
172  buff_0,
173  buff_Z, rs_Z, cs_Z );
174 
175  for ( i = 0; i < b_alg; ++i )
176  {
177  float* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
178  float* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
179  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
180  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
181  float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
182  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
183  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
184 
185  float* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
186  float* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
187  float* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
188 
189  float* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
190  float* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
191  float* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
192 
193  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
194  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
195 
196  float* d0 = buff_d + (0 )*inc_d;
197 
198  float* e0 = buff_e + (0 )*inc_e;
199 
200  float* f0 = buff_f + (0 )*inc_f;
201 
202  float* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
203 
204  float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
205  float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
206 
207  float* ABL = a10t;
208  float* ZBL = z10t;
209 
210  float* a2 = alpha11;
211 
212  int m_ahead = m_A - i - 1;
213  int n_ahead = m_A - i - 1;
214  int m_behind = i;
215  int n_behind = i;
216 
217  /*------------------------------------------------------------*/
218 
219  if ( m_behind > 0 )
220  {
221  // FLA_Copy( a10t_r, last_elem );
222  // FLA_Set( FLA_ONE, a10t_r );
223  last_elem = *a10t_r;
224  *a10t_r = *buff_1;
225  }
226 
227  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
228  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
231  m_ahead + 1,
232  n_behind,
233  buff_m1,
234  ABL, rs_A, cs_A,
235  y10t, cs_Y,
236  buff_1,
237  a2, rs_A );
240  m_ahead + 1,
241  n_behind,
242  buff_m1,
243  ZBL, rs_Z, cs_Z,
244  a10t, cs_A,
245  buff_1,
246  a2, rs_A );
247 
248  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
249  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, A20, z10t, FLA_ONE, a12t );
252  m_ahead,
253  n_behind,
254  buff_m1,
255  Y20, rs_Y, cs_Y,
256  a10t, cs_A,
257  buff_1,
258  a12t, cs_A );
261  m_ahead,
262  n_behind,
263  buff_m1,
264  A20, rs_A, cs_A,
265  z10t, cs_Z,
266  buff_1,
267  a12t, cs_A );
268 
269  if ( m_behind > 0 )
270  {
271  // FLA_Copy( last_elem, a10t_r );
272  *a10t_r = last_elem;
273  }
274 
275  if ( m_ahead > 0 )
276  {
277  // FLA_Househ2_UT( FLA_LEFT,
278  // a21_t,
279  // a21_b, tau11 );
280  FLA_Househ2_UT_l_ops( m_ahead - 1,
281  a21_t,
282  a21_b, rs_A,
283  tau11 );
284 
285  // FLA_Set( FLA_ONE, inv_tau11 );
286  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
287  // FLA_Copy( inv_tau11, minus_inv_tau11 );
288  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
289  bl1_sdiv3( buff_1, tau11, &inv_tau11 );
290  bl1_sneg2( &inv_tau11, &minus_inv_tau11 );
291 
292  // FLA_Copy( a21_t, first_elem );
293  // FLA_Set( FLA_ONE, a21_t );
294  first_elem = *a21_t;
295  *a21_t = *buff_1;
296 
297  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y21 );
300  m_ahead,
301  n_ahead,
302  buff_1,
303  A22, rs_A, cs_A,
304  a21, rs_A,
305  buff_0,
306  y21, rs_Y );
307 
308  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z21 );
311  m_ahead,
312  n_ahead,
313  buff_1,
314  A22, rs_A, cs_A,
315  a21, rs_A,
316  buff_0,
317  z21, rs_Z );
318 
319  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d0 );
320  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, a21, FLA_ZERO, e0 );
321  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f0 );
324  m_ahead,
325  n_behind,
326  buff_1,
327  A20, rs_A, cs_A,
328  a21, rs_A,
329  buff_0,
330  d0, inc_d );
333  m_ahead,
334  n_behind,
335  buff_1,
336  Y20, rs_Y, cs_Y,
337  a21, rs_A,
338  buff_0,
339  e0, inc_e );
342  m_ahead,
343  n_behind,
344  buff_1,
345  Z20, rs_Z, cs_Z,
346  a21, rs_A,
347  buff_0,
348  f0, inc_f );
349 
350  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
351  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, y21 );
354  m_ahead,
355  n_behind,
356  buff_m1,
357  Y20, rs_Y, cs_Y,
358  d0, inc_d,
359  buff_1,
360  y21, rs_Y );
363  m_ahead,
364  n_behind,
365  buff_m1,
366  A20, rs_A, cs_A,
367  f0, inc_f,
368  buff_1,
369  y21, rs_Y );
370 
371  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, e0, FLA_ONE, z21 );
372  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d0, FLA_ONE, z21 );
375  m_ahead,
376  n_behind,
377  buff_m1,
378  A20, rs_A, cs_A,
379  e0, inc_e,
380  buff_1,
381  z21, rs_Z );
384  m_ahead,
385  n_behind,
386  buff_m1,
387  Z20, rs_Z, cs_Z,
388  d0, inc_d,
389  buff_1,
390  z21, rs_Z );
391 
392  // FLA_Copy( d0, t01 );
394  n_behind,
395  d0, inc_d,
396  t01, rs_T );
397 
398  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
399  // FLA_Inv_scal( FLA_TWO, beta );
400  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
402  m_ahead,
403  a21, rs_A,
404  z21, rs_Z,
405  &beta );
406  bl1_sinvscals( buff_2, &beta );
407  bl1_scopyconj( &beta, &conj_beta );
408 
409  // FLA_Scal( minus_inv_tau11, conj_beta );
410  // FLA_Axpy( conj_beta, a21, y21 );
411  // FLA_Scal( inv_tau11, y21 );
412  bl1_sscals( &minus_inv_tau11, &conj_beta );
414  m_ahead,
415  &conj_beta,
416  a21, rs_A,
417  y21, rs_Y );
419  m_ahead,
420  &inv_tau11,
421  y21, rs_Y );
422 
423  // FLA_Scal( minus_inv_tau11, beta );
424  // FLA_Axpy( beta, a21, z21 );
425  // FLA_Scal( inv_tau11, z21 );
426  bl1_sscals( &minus_inv_tau11, &beta );
428  m_ahead,
429  &beta,
430  a21, rs_A,
431  z21, rs_Z );
433  m_ahead,
434  &inv_tau11,
435  z21, rs_Z );
436 
437  // FLA_Dot( a12t, a21, dot_product );
438  // FLA_Scal( minus_inv_tau11, dot_product );
439  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
441  m_ahead,
442  a12t, cs_A,
443  a21, rs_A,
444  &dot_product );
445  bl1_sscals( &minus_inv_tau11, &dot_product );
447  m_ahead,
448  &dot_product,
449  a21, rs_A,
450  a12t, cs_A );
451 
452  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, e0 );
453  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, e0, a21, A02 );
456  m_behind,
457  n_ahead,
458  buff_1,
459  A02, rs_A, cs_A,
460  a21, rs_A,
461  buff_0,
462  e0, inc_e );
465  m_behind,
466  n_ahead,
467  &minus_inv_tau11,
468  e0, inc_e,
469  a21, rs_A,
470  A02, rs_A, cs_A );
471 
472  // FLA_Copy( first_elem, a21_t );
473  *a21_t = first_elem;
474  }
475 
476  /*------------------------------------------------------------*/
477 
478  }
479 
480  // FLA_Obj_free( &d );
481  // FLA_Obj_free( &e );
482  // FLA_Obj_free( &f );
483  FLA_free( buff_d );
484  FLA_free( buff_e );
485  FLA_free( buff_f );
486 
487  return FLA_SUCCESS;
488 }

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sgemv(), bl1_sger(), bl1_sscalv(), bl1_ssetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var4().

◆ FLA_Hess_UT_step_ops_var5()

FLA_Error FLA_Hess_UT_step_ops_var5 ( int  m_A,
int  m_T,
float *  buff_A,
int  rs_A,
int  cs_A,
float *  buff_U,
int  rs_U,
int  cs_U,
float *  buff_Z,
int  rs_Z,
int  cs_Z,
float *  buff_T,
int  rs_T,
int  cs_T 
)
138 {
139  float* buff_1 = FLA_FLOAT_PTR( FLA_ONE );
140  float* buff_0 = FLA_FLOAT_PTR( FLA_ZERO );
141  float* buff_m1 = FLA_FLOAT_PTR( FLA_MINUS_ONE );
142  int i;
143 
144  // b_alg = FLA_Obj_length( T );
145  int b_alg = m_T;
146 
147  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
148  float* buff_w = ( float* ) FLA_malloc( m_A * sizeof( *buff_A ) );
149  int inc_w = 1;
150 
151  // FLA_Set( FLA_ZERO, U );
152  // FLA_Set( FLA_ZERO, Z );
153  bl1_ssetm( m_A,
154  b_alg,
155  buff_0,
156  buff_U, rs_U, cs_U );
157  bl1_ssetm( m_A,
158  b_alg,
159  buff_0,
160  buff_Z, rs_Z, cs_Z );
161 
162  for ( i = 0; i < b_alg; ++i )
163  {
164  float* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
165  float* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
166  float* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
167  float* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
168  float* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
169  float* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
170 
171  float* U00 = buff_U + (0 )*cs_U + (0 )*rs_U;
172  float* u10t = buff_U + (0 )*cs_U + (i )*rs_U;
173  float* U20 = buff_U + (0 )*cs_U + (i+1)*rs_U;
174  float* u21 = buff_U + (i )*cs_U + (i+1)*rs_U;
175 
176  float* Z00 = buff_Z + (0 )*cs_Z + (0 )*rs_Z;
177  float* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
178  float* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
179  float* z01 = buff_Z + (i )*cs_Z + (0 )*rs_Z;
180  float* zeta11 = buff_Z + (i )*cs_Z + (i )*rs_Z;
181  float* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
182 
183  float* T00 = buff_T + (0 )*cs_T + (0 )*rs_T;
184  float* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
185  float* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
186 
187  float* w0 = buff_w + (0 )*inc_w;
188 
189  float* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
190  float* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
191 
192  float* u21_t = u21 + (0 )*cs_U + (0 )*rs_U;
193 
194  int m_ahead = m_A - i - 1;
195  int n_ahead = m_A - i - 1;
196  int m_behind = i;
197  int n_behind = i;
198 
199  /*------------------------------------------------------------*/
200 
201  if ( m_behind > 0 )
202  {
203  // FLA_Copyt( FLA_CONJ_TRANSPOSE, u10t, w0 );
204  // FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
205  // T00, w0 );
207  m_behind,
208  u10t, cs_U,
209  w0, inc_w );
213  m_behind,
214  T00, rs_T, cs_T,
215  w0, inc_w );
216 
217  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z00, w0, FLA_ONE, a01 );
218  // FLA_Dots( FLA_MINUS_ONE, z10t, w0, FLA_ONE, alpha11 );
219  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, w0, FLA_ONE, a21 );
222  m_behind,
223  n_behind,
224  buff_m1,
225  Z00, rs_Z, cs_Z,
226  w0, inc_w,
227  buff_1,
228  a01, rs_A );
230  m_behind,
231  buff_m1,
232  z10t, cs_Z,
233  w0, inc_w,
234  buff_1,
235  alpha11 );
238  m_ahead,
239  n_behind,
240  buff_m1,
241  Z20, rs_Z, cs_Z,
242  w0, inc_w,
243  buff_1,
244  a21, rs_A );
245 
246  // FLA_Trmvsx( FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
247  // FLA_ONE, U00, a01, FLA_ZERO, w0 );
248  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, alpha11, u10t, w0 );
249  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, U20, a21, FLA_ONE, w0 );
251  m_behind,
252  a01, rs_A,
253  w0, inc_w );
257  m_behind,
258  U00, rs_U, cs_U,
259  w0, inc_w );
261  m_behind,
262  alpha11,
263  u10t, cs_U,
264  w0, inc_w );
267  m_ahead,
268  n_behind,
269  buff_1,
270  U20, rs_U, cs_U,
271  a21, rs_A,
272  buff_1,
273  w0, inc_w );
274 
275  // FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
276  // T00, w0 );
280  m_behind,
281  T00, rs_T, cs_T,
282  w0, inc_w );
283 
284  // FLA_Trmvsx( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
285  // FLA_MINUS_ONE, U00, w0, FLA_ONE, a01 );
286  // FLA_Dots( FLA_MINUS_ONE, u10t, w0, FLA_ONE, alpha11 );
287  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, U20, w0, FLA_ONE, a21 );
291  m_behind,
292  buff_m1,
293  U00, rs_U, cs_U,
294  w0, inc_w,
295  buff_1,
296  a01, rs_A );
298  m_behind,
299  buff_m1,
300  u10t, cs_U,
301  w0, inc_w,
302  buff_1,
303  alpha11 );
306  m_ahead,
307  n_behind,
308  buff_m1,
309  U20, rs_U, cs_U,
310  w0, inc_w,
311  buff_1,
312  a21, rs_A );
313  }
314 
315  if ( m_ahead > 0 )
316  {
317  // FLA_Househ2_UT( FLA_LEFT,
318  // a21_t,
319  // a21_b, tau11 );
320  FLA_Househ2_UT_l_ops( m_ahead - 1,
321  a21_t,
322  a21_b, rs_A,
323  tau11 );
324 
325  // FLA_Copy( a21, u21 );
327  m_ahead,
328  a21, rs_A,
329  u21, rs_U );
330 
331  // FLA_Set( FLA_ONE, u21_t );
332  *u21_t = *buff_1;
333 
334  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, u21, FLA_ZERO, z01 );
335  // FLA_Dot( a12t, u21, zeta11 );
336  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, u21, FLA_ZERO, z21 );
339  m_behind,
340  n_ahead,
341  buff_1,
342  A02, rs_A, cs_A,
343  u21, rs_U,
344  buff_0,
345  z01, rs_Z );
347  m_ahead,
348  a12t, cs_A,
349  u21, rs_U,
350  zeta11 );
353  m_ahead,
354  n_ahead,
355  buff_1,
356  A22, rs_A, cs_A,
357  u21, rs_U,
358  buff_0,
359  z21, rs_Z );
360 
361  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, U20, u21, FLA_ZERO, t01 );
364  m_ahead,
365  n_behind,
366  buff_1,
367  U20, rs_U, cs_U,
368  u21, rs_U,
369  buff_0,
370  t01, rs_T );
371  }
372 
373  /*------------------------------------------------------------*/
374 
375  }
376 
377  // FLA_Obj_free( &w );
378  FLA_free( buff_w );
379 
380  return FLA_SUCCESS;
381 }
void bl1_sdots(conj1_t conj, int n, float *alpha, float *x, int incx, float *y, int incy, float *beta, float *rho)
Definition: bl1_dots.c:13
void bl1_strmv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, float *a, int a_rs, int a_cs, float *x, int incx)
Definition: bl1_trmv.c:13
void bl1_strmvsx(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, float *alpha, float *a, int a_rs, int a_cs, float *x, int incx, float *beta, float *y, int incy)
Definition: bl1_trmvsx.c:13
void bl1_strsv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, float *a, int a_rs, int a_cs, float *x, int incx)
Definition: bl1_trsv.c:13

References bl1_saxpyv(), bl1_scopyv(), bl1_sdot(), bl1_sdots(), bl1_sgemv(), bl1_ssetm(), bl1_strmv(), bl1_strmvsx(), bl1_strsv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_NONUNIT_DIAG, BLIS1_UPPER_TRIANGULAR, FLA_free(), FLA_Househ2_UT_l_ops(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var5().

◆ FLA_Hess_UT_step_opt_var1()

FLA_Error FLA_Hess_UT_step_opt_var1 ( FLA_Obj  A,
FLA_Obj  T 
)
19 {
20  FLA_Datatype datatype;
21  int m_A, m_T;
22  int rs_A, cs_A;
23  int rs_T, cs_T;
24 
25  datatype = FLA_Obj_datatype( A );
26 
27  m_A = FLA_Obj_length( A );
28  m_T = FLA_Obj_length( T );
29 
30  rs_A = FLA_Obj_row_stride( A );
31  cs_A = FLA_Obj_col_stride( A );
32 
33  rs_T = FLA_Obj_row_stride( T );
34  cs_T = FLA_Obj_col_stride( T );
35 
36 
37  switch ( datatype )
38  {
39  case FLA_FLOAT:
40  {
41  float* buff_A = FLA_FLOAT_PTR( A );
42  float* buff_T = FLA_FLOAT_PTR( T );
43 
45  m_T,
46  buff_A, rs_A, cs_A,
47  buff_T, rs_T, cs_T );
48 
49  break;
50  }
51 
52  case FLA_DOUBLE:
53  {
54  double* buff_A = FLA_DOUBLE_PTR( A );
55  double* buff_T = FLA_DOUBLE_PTR( T );
56 
58  m_T,
59  buff_A, rs_A, cs_A,
60  buff_T, rs_T, cs_T );
61 
62  break;
63  }
64 
65  case FLA_COMPLEX:
66  {
67  scomplex* buff_A = FLA_COMPLEX_PTR( A );
68  scomplex* buff_T = FLA_COMPLEX_PTR( T );
69 
71  m_T,
72  buff_A, rs_A, cs_A,
73  buff_T, rs_T, cs_T );
74 
75  break;
76  }
77 
78  case FLA_DOUBLE_COMPLEX:
79  {
80  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
81  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
82 
84  m_T,
85  buff_A, rs_A, cs_A,
86  buff_T, rs_T, cs_T );
87 
88  break;
89  }
90  }
91 
92  return FLA_SUCCESS;
93 }
FLA_Error FLA_Hess_UT_step_opd_var1(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_opt_var1.c:190
FLA_Error FLA_Hess_UT_step_opz_var1(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_opt_var1.c:376
FLA_Error FLA_Hess_UT_step_opc_var1(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_opt_var1.c:283
FLA_Error FLA_Hess_UT_step_ops_var1(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_opt_var1.c:97

References FLA_Hess_UT_step_opc_var1(), FLA_Hess_UT_step_opd_var1(), FLA_Hess_UT_step_ops_var1(), FLA_Hess_UT_step_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), and FLA_Obj_row_stride().

Referenced by FLA_Hess_UT_blk_var1(), and FLA_Hess_UT_opt_var1().

◆ FLA_Hess_UT_step_opt_var2()

FLA_Error FLA_Hess_UT_step_opt_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
19 {
20  FLA_Datatype datatype;
21  int m_A, m_T;
22  int rs_A, cs_A;
23  int rs_T, cs_T;
24 
25  datatype = FLA_Obj_datatype( A );
26 
27  m_A = FLA_Obj_length( A );
28  m_T = FLA_Obj_length( T );
29 
30  rs_A = FLA_Obj_row_stride( A );
31  cs_A = FLA_Obj_col_stride( A );
32 
33  rs_T = FLA_Obj_row_stride( T );
34  cs_T = FLA_Obj_col_stride( T );
35 
36 
37  switch ( datatype )
38  {
39  case FLA_FLOAT:
40  {
41  float* buff_A = FLA_FLOAT_PTR( A );
42  float* buff_T = FLA_FLOAT_PTR( T );
43 
45  m_T,
46  buff_A, rs_A, cs_A,
47  buff_T, rs_T, cs_T );
48 
49  break;
50  }
51 
52  case FLA_DOUBLE:
53  {
54  double* buff_A = FLA_DOUBLE_PTR( A );
55  double* buff_T = FLA_DOUBLE_PTR( T );
56 
58  m_T,
59  buff_A, rs_A, cs_A,
60  buff_T, rs_T, cs_T );
61 
62  break;
63  }
64 
65  case FLA_COMPLEX:
66  {
67  scomplex* buff_A = FLA_COMPLEX_PTR( A );
68  scomplex* buff_T = FLA_COMPLEX_PTR( T );
69 
71  m_T,
72  buff_A, rs_A, cs_A,
73  buff_T, rs_T, cs_T );
74 
75  break;
76  }
77 
78  case FLA_DOUBLE_COMPLEX:
79  {
80  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
81  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
82 
84  m_T,
85  buff_A, rs_A, cs_A,
86  buff_T, rs_T, cs_T );
87 
88  break;
89  }
90  }
91 
92  return FLA_SUCCESS;
93 }
FLA_Error FLA_Hess_UT_step_opc_var2(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_opt_var2.c:535
FLA_Error FLA_Hess_UT_step_opd_var2(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_opt_var2.c:316
FLA_Error FLA_Hess_UT_step_opz_var2(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_opt_var2.c:754
FLA_Error FLA_Hess_UT_step_ops_var2(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_opt_var2.c:97

References FLA_Hess_UT_step_opc_var2(), FLA_Hess_UT_step_opd_var2(), FLA_Hess_UT_step_ops_var2(), FLA_Hess_UT_step_opz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), and FLA_Obj_row_stride().

Referenced by FLA_Hess_UT_blk_var2(), and FLA_Hess_UT_opt_var2().

◆ FLA_Hess_UT_step_opt_var3()

FLA_Error FLA_Hess_UT_step_opt_var3 ( FLA_Obj  A,
FLA_Obj  T 
)
19 {
20  FLA_Datatype datatype;
21  int m_A, m_T;
22  int rs_A, cs_A;
23  int rs_T, cs_T;
24 
25  datatype = FLA_Obj_datatype( A );
26 
27  m_A = FLA_Obj_length( A );
28  m_T = FLA_Obj_length( T );
29 
30  rs_A = FLA_Obj_row_stride( A );
31  cs_A = FLA_Obj_col_stride( A );
32 
33  rs_T = FLA_Obj_row_stride( T );
34  cs_T = FLA_Obj_col_stride( T );
35 
36 
37  switch ( datatype )
38  {
39  case FLA_FLOAT:
40  {
41  float* buff_A = FLA_FLOAT_PTR( A );
42  float* buff_T = FLA_FLOAT_PTR( T );
43 
45  m_T,
46  buff_A, rs_A, cs_A,
47  buff_T, rs_T, cs_T );
48 
49  break;
50  }
51 
52  case FLA_DOUBLE:
53  {
54  double* buff_A = FLA_DOUBLE_PTR( A );
55  double* buff_T = FLA_DOUBLE_PTR( T );
56 
58  m_T,
59  buff_A, rs_A, cs_A,
60  buff_T, rs_T, cs_T );
61 
62  break;
63  }
64 
65  case FLA_COMPLEX:
66  {
67  scomplex* buff_A = FLA_COMPLEX_PTR( A );
68  scomplex* buff_T = FLA_COMPLEX_PTR( T );
69 
71  m_T,
72  buff_A, rs_A, cs_A,
73  buff_T, rs_T, cs_T );
74 
75  break;
76  }
77 
78  case FLA_DOUBLE_COMPLEX:
79  {
80  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
81  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
82 
84  m_T,
85  buff_A, rs_A, cs_A,
86  buff_T, rs_T, cs_T );
87 
88  break;
89  }
90  }
91 
92  return FLA_SUCCESS;
93 }
FLA_Error FLA_Hess_UT_step_opz_var3(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_opt_var3.c:1156
FLA_Error FLA_Hess_UT_step_opd_var3(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_opt_var3.c:450
FLA_Error FLA_Hess_UT_step_opc_var3(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_opt_var3.c:803
FLA_Error FLA_Hess_UT_step_ops_var3(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_opt_var3.c:97

References FLA_Hess_UT_step_opc_var3(), FLA_Hess_UT_step_opd_var3(), FLA_Hess_UT_step_ops_var3(), FLA_Hess_UT_step_opz_var3(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), and FLA_Obj_row_stride().

Referenced by FLA_Hess_UT_blk_var3(), and FLA_Hess_UT_opt_var3().

◆ FLA_Hess_UT_step_opt_var4()

FLA_Error FLA_Hess_UT_step_opt_var4 ( FLA_Obj  A,
FLA_Obj  Y,
FLA_Obj  Z,
FLA_Obj  T 
)
30 {
31  FLA_Datatype datatype;
32  int m_A, m_T;
33  int rs_A, cs_A;
34  int rs_Y, cs_Y;
35  int rs_Z, cs_Z;
36  int rs_T, cs_T;
37 
38  datatype = FLA_Obj_datatype( A );
39 
40  m_A = FLA_Obj_length( A );
41  m_T = FLA_Obj_length( T );
42 
43  rs_A = FLA_Obj_row_stride( A );
44  cs_A = FLA_Obj_col_stride( A );
45 
46  rs_Y = FLA_Obj_row_stride( Y );
47  cs_Y = FLA_Obj_col_stride( Y );
48 
49  rs_Z = FLA_Obj_row_stride( Z );
50  cs_Z = FLA_Obj_col_stride( Z );
51 
52  rs_T = FLA_Obj_row_stride( T );
53  cs_T = FLA_Obj_col_stride( T );
54 
55 
56  switch ( datatype )
57  {
58  case FLA_FLOAT:
59  {
60  float* buff_A = FLA_FLOAT_PTR( A );
61  float* buff_Y = FLA_FLOAT_PTR( Y );
62  float* buff_Z = FLA_FLOAT_PTR( Z );
63  float* buff_T = FLA_FLOAT_PTR( T );
64 
66  m_T,
67  buff_A, rs_A, cs_A,
68  buff_Y, rs_Y, cs_Y,
69  buff_Z, rs_Z, cs_Z,
70  buff_T, rs_T, cs_T );
71 
72  break;
73  }
74 
75  case FLA_DOUBLE:
76  {
77  double* buff_A = FLA_DOUBLE_PTR( A );
78  double* buff_Y = FLA_DOUBLE_PTR( Y );
79  double* buff_Z = FLA_DOUBLE_PTR( Z );
80  double* buff_T = FLA_DOUBLE_PTR( T );
81 
83  m_T,
84  buff_A, rs_A, cs_A,
85  buff_Y, rs_Y, cs_Y,
86  buff_Z, rs_Z, cs_Z,
87  buff_T, rs_T, cs_T );
88 
89  break;
90  }
91 
92  case FLA_COMPLEX:
93  {
94  scomplex* buff_A = FLA_COMPLEX_PTR( A );
95  scomplex* buff_Y = FLA_COMPLEX_PTR( Y );
96  scomplex* buff_Z = FLA_COMPLEX_PTR( Z );
97  scomplex* buff_T = FLA_COMPLEX_PTR( T );
98 
100  m_T,
101  buff_A, rs_A, cs_A,
102  buff_Y, rs_Y, cs_Y,
103  buff_Z, rs_Z, cs_Z,
104  buff_T, rs_T, cs_T );
105 
106  break;
107  }
108 
109  case FLA_DOUBLE_COMPLEX:
110  {
111  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
112  dcomplex* buff_Y = FLA_DOUBLE_COMPLEX_PTR( Y );
113  dcomplex* buff_Z = FLA_DOUBLE_COMPLEX_PTR( Z );
114  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
115 
117  m_T,
118  buff_A, rs_A, cs_A,
119  buff_Y, rs_Y, cs_Y,
120  buff_Z, rs_Z, cs_Z,
121  buff_T, rs_T, cs_T );
122 
123  break;
124  }
125  }
126 
127  return FLA_SUCCESS;
128 }
FLA_Error FLA_Hess_UT_step_opc_var4(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_Y, int rs_Y, int cs_Y, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_opt_var4.c:852
FLA_Error FLA_Hess_UT_step_opz_var4(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_Y, int rs_Y, int cs_Y, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_opt_var4.c:1212
FLA_Error FLA_Hess_UT_step_ops_var4(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_Y, int rs_Y, int cs_Y, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_opt_var4.c:132
FLA_Error FLA_Hess_UT_step_opd_var4(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_Y, int rs_Y, int cs_Y, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_opt_var4.c:492

References FLA_Hess_UT_step_opc_var4(), FLA_Hess_UT_step_opd_var4(), FLA_Hess_UT_step_ops_var4(), FLA_Hess_UT_step_opz_var4(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), and FLA_Obj_row_stride().

Referenced by FLA_Hess_UT_blk_var4(), and FLA_Hess_UT_opt_var4().

◆ FLA_Hess_UT_step_opt_var5()

FLA_Error FLA_Hess_UT_step_opt_var5 ( FLA_Obj  A,
FLA_Obj  U,
FLA_Obj  Z,
FLA_Obj  T 
)
30 {
31  FLA_Datatype datatype;
32  int m_A, m_T;
33  int rs_A, cs_A;
34  int rs_U, cs_U;
35  int rs_Z, cs_Z;
36  int rs_T, cs_T;
37 
38  datatype = FLA_Obj_datatype( A );
39 
40  m_A = FLA_Obj_length( A );
41  m_T = FLA_Obj_length( T );
42 
43  rs_A = FLA_Obj_row_stride( A );
44  cs_A = FLA_Obj_col_stride( A );
45 
46  rs_U = FLA_Obj_row_stride( U );
47  cs_U = FLA_Obj_col_stride( U );
48 
49  rs_Z = FLA_Obj_row_stride( Z );
50  cs_Z = FLA_Obj_col_stride( Z );
51 
52  rs_T = FLA_Obj_row_stride( T );
53  cs_T = FLA_Obj_col_stride( T );
54 
55 
56  switch ( datatype )
57  {
58  case FLA_FLOAT:
59  {
60  float* buff_A = FLA_FLOAT_PTR( A );
61  float* buff_U = FLA_FLOAT_PTR( U );
62  float* buff_Z = FLA_FLOAT_PTR( Z );
63  float* buff_T = FLA_FLOAT_PTR( T );
64 
66  m_T,
67  buff_A, rs_A, cs_A,
68  buff_U, rs_U, cs_U,
69  buff_Z, rs_Z, cs_Z,
70  buff_T, rs_T, cs_T );
71 
72  break;
73  }
74 
75  case FLA_DOUBLE:
76  {
77  double* buff_A = FLA_DOUBLE_PTR( A );
78  double* buff_U = FLA_DOUBLE_PTR( U );
79  double* buff_Z = FLA_DOUBLE_PTR( Z );
80  double* buff_T = FLA_DOUBLE_PTR( T );
81 
83  m_T,
84  buff_A, rs_A, cs_A,
85  buff_U, rs_U, cs_U,
86  buff_Z, rs_Z, cs_Z,
87  buff_T, rs_T, cs_T );
88 
89  break;
90  }
91 
92  case FLA_COMPLEX:
93  {
94  scomplex* buff_A = FLA_COMPLEX_PTR( A );
95  scomplex* buff_U = FLA_COMPLEX_PTR( U );
96  scomplex* buff_Z = FLA_COMPLEX_PTR( Z );
97  scomplex* buff_T = FLA_COMPLEX_PTR( T );
98 
100  m_T,
101  buff_A, rs_A, cs_A,
102  buff_U, rs_U, cs_U,
103  buff_Z, rs_Z, cs_Z,
104  buff_T, rs_T, cs_T );
105 
106  break;
107  }
108 
109  case FLA_DOUBLE_COMPLEX:
110  {
111  dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A );
112  dcomplex* buff_U = FLA_DOUBLE_COMPLEX_PTR( U );
113  dcomplex* buff_Z = FLA_DOUBLE_COMPLEX_PTR( Z );
114  dcomplex* buff_T = FLA_DOUBLE_COMPLEX_PTR( T );
115 
117  m_T,
118  buff_A, rs_A, cs_A,
119  buff_U, rs_U, cs_U,
120  buff_Z, rs_Z, cs_Z,
121  buff_T, rs_T, cs_T );
122 
123  break;
124  }
125  }
126 
127  return FLA_SUCCESS;
128 }
FLA_Error FLA_Hess_UT_step_opz_var5(int m_A, int m_T, dcomplex *buff_A, int rs_A, int cs_A, dcomplex *buff_U, int rs_U, int cs_U, dcomplex *buff_Z, int rs_Z, int cs_Z, dcomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_opt_var5.c:891
FLA_Error FLA_Hess_UT_step_opd_var5(int m_A, int m_T, double *buff_A, int rs_A, int cs_A, double *buff_U, int rs_U, int cs_U, double *buff_Z, int rs_Z, int cs_Z, double *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_opt_var5.c:385
FLA_Error FLA_Hess_UT_step_opc_var5(int m_A, int m_T, scomplex *buff_A, int rs_A, int cs_A, scomplex *buff_U, int rs_U, int cs_U, scomplex *buff_Z, int rs_Z, int cs_Z, scomplex *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_opt_var5.c:638
FLA_Error FLA_Hess_UT_step_ops_var5(int m_A, int m_T, float *buff_A, int rs_A, int cs_A, float *buff_U, int rs_U, int cs_U, float *buff_Z, int rs_Z, int cs_Z, float *buff_T, int rs_T, int cs_T)
Definition: FLA_Hess_UT_opt_var5.c:132

References FLA_Hess_UT_step_opc_var5(), FLA_Hess_UT_step_opd_var5(), FLA_Hess_UT_step_ops_var5(), FLA_Hess_UT_step_opz_var5(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), and FLA_Obj_row_stride().

Referenced by FLA_Hess_UT_blk_var5(), and FLA_Hess_UT_opt_var5().

◆ FLA_Hess_UT_step_opz_var1()

FLA_Error FLA_Hess_UT_step_opz_var1 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
380 {
381  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
382  dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
383 
384  dcomplex first_elem;
385  int i;
386 
387  // b_alg = FLA_Obj_length( T );
388  int b_alg = m_T;
389 
390  for ( i = 0; i < b_alg; ++i )
391  {
392  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
393  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
394 
395  dcomplex* a21_t = buff_A + (i )*cs_A + (i+1)*rs_A;
396  dcomplex* a21_b = buff_A + (i )*cs_A + (i+2)*rs_A;
397 
398  dcomplex* A22_t = buff_A + (i+1)*cs_A + (i+1)*rs_A;
399  dcomplex* A22_b = buff_A + (i+1)*cs_A + (i+2)*rs_A;
400 
401  dcomplex* A2_l = buff_A + (i+1)*cs_A + (0 )*rs_A;
402  dcomplex* A2_r = buff_A + (i+2)*cs_A + (0 )*rs_A;
403 
404  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
405  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
406 
407  int m_ahead = m_A - i - 1;
408  int n_ahead = m_A - i - 1;
409  int n_behind = i;
410 
411  /*------------------------------------------------------------*/
412 
413  if ( m_ahead > 0 )
414  {
415  // FLA_Househ2_UT( FLA_LEFT,
416  // a21_t,
417  // a21_b, tau11 );
418  FLA_Househ2_UT_l_opz( m_ahead - 1,
419  a21_t,
420  a21_b, rs_A,
421  tau11 );
422 
423  // FLA_Copy( a21_t, first_elem );
424  // FLA_Set( FLA_ONE, a21_t );
425  first_elem = *a21_t;
426  *a21_t = *buff_1;
427 
428  // FLA_Apply_H2_UT( FLA_LEFT, tau11, a21_b, A22_t,
429  // A22_b );
430  FLA_Apply_H2_UT_l_opz_var1( m_ahead - 1,
431  n_ahead,
432  tau11,
433  a21_b, rs_A,
434  A22_t, cs_A,
435  A22_b, rs_A, cs_A );
436 
437  // FLA_Apply_H2_UT( FLA_RIGHT, tau11, a21_b, A2_l, A2_r );
439  n_ahead - 1,
440  tau11,
441  a21_b, rs_A,
442  A2_l, rs_A,
443  A2_r, rs_A, cs_A );
444 
445  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
448  m_ahead,
449  n_behind,
450  buff_1,
451  A20, rs_A, cs_A,
452  a21, rs_A,
453  buff_0,
454  t01, rs_T );
455 
456  // FLA_Copy( first_elem, a21_t );
457  *a21_t = first_elem;
458  }
459 
460  /*------------------------------------------------------------*/
461 
462  }
463 
464  return FLA_SUCCESS;
465 }
FLA_Error FLA_Apply_H2_UT_l_opz_var1(int m_u2_A2, int n_a1t, dcomplex *tau, dcomplex *u2, int inc_u2, dcomplex *a1t, int inc_a1t, dcomplex *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_l_opt_var1.c:343
FLA_Error FLA_Apply_H2_UT_r_opz_var1(int n_u2h_A2, int m_a1, dcomplex *tau, dcomplex *u2h, int inc_u2h, dcomplex *a1, int inc_a1, dcomplex *A2, int rs_A2, int cs_A2)
Definition: FLA_Apply_H2_UT_r_opt_var1.c:327

References bl1_zgemv(), BLIS1_CONJ_TRANSPOSE, BLIS1_NO_CONJUGATE, FLA_Apply_H2_UT_l_opz_var1(), FLA_Apply_H2_UT_r_opz_var1(), FLA_Househ2_UT_l_opz(), FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var1().

◆ FLA_Hess_UT_step_opz_var2()

FLA_Error FLA_Hess_UT_step_opz_var2 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
758 {
759  dcomplex* buff_2 = FLA_DOUBLE_COMPLEX_PTR( FLA_TWO );
760  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
761  dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
762  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
763 
764  dcomplex first_elem;
765  dcomplex dot_product;
766  dcomplex beta, conj_beta;
767  dcomplex inv_tau11;
768  dcomplex minus_inv_tau11;
769  int i;
770 
771  // b_alg = FLA_Obj_length( T );
772  int b_alg = m_T;
773 
774  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
775  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
776  dcomplex* buff_y = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
777  dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
778  int inc_y = 1;
779  int inc_z = 1;
780 
781  for ( i = 0; i < b_alg; ++i )
782  {
783  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
784  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
785  dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
786  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
787  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
788 
789  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
790  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
791 
792  dcomplex* y0 = buff_y + (0 )*inc_y;
793  dcomplex* y2 = buff_y + (i+1)*inc_y;
794 
795  dcomplex* z2 = buff_z + (i+1)*inc_z;
796 
797  dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
798  dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
799 
800  int m_ahead = m_A - i - 1;
801  int n_ahead = m_A - i - 1;
802  int m_behind = i;
803  int n_behind = i;
804 
805  /*------------------------------------------------------------*/
806 
807  if ( m_ahead > 0 )
808  {
809  // FLA_Househ2_UT( FLA_LEFT,
810  // a21_t,
811  // a21_b, tau11 );
812  FLA_Househ2_UT_l_opz( m_ahead - 1,
813  a21_t,
814  a21_b, rs_A,
815  tau11 );
816 
817  // FLA_Set( FLA_ONE, inv_tau11 );
818  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
819  // FLA_Copy( inv_tau11, minus_inv_tau11 );
820  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
821  bl1_zdiv3( buff_1, tau11, &inv_tau11 );
822  bl1_zneg2( &inv_tau11, &minus_inv_tau11 );
823 
824  // FLA_Copy( a21_t, first_elem );
825  // FLA_Set( FLA_ONE, a21_t );
826  first_elem = *a21_t;
827  *a21_t = *buff_1;
828 
829  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y2 );
832  m_ahead,
833  n_ahead,
834  buff_1,
835  A22, rs_A, cs_A,
836  a21, rs_A,
837  buff_0,
838  y2, inc_y );
839 
840  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z2 );
843  m_ahead,
844  n_ahead,
845  buff_1,
846  A22, rs_A, cs_A,
847  a21, rs_A,
848  buff_0,
849  z2, inc_z );
850 
851  // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
852  // FLA_Inv_scal( FLA_TWO, beta );
853  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
855  m_ahead,
856  a21, rs_A,
857  z2, inc_z,
858  &beta );
859  bl1_zinvscals( buff_2, &beta );
860  bl1_zcopyconj( &beta, &conj_beta );
861 
862  // FLA_Scal( minus_inv_tau11, conj_beta );
863  // FLA_Axpy( conj_beta, a21, y2 );
864  // FLA_Scal( inv_tau11, y2 );
865  bl1_zscals( &minus_inv_tau11, &conj_beta );
867  m_ahead,
868  &conj_beta,
869  a21, rs_A,
870  y2, inc_y );
872  m_ahead,
873  &inv_tau11,
874  y2, inc_y );
875 
876  // FLA_Scal( minus_inv_tau11, beta );
877  // FLA_Axpy( beta, a21, z2 );
878  // FLA_Scal( inv_tau11, z2 );
879  bl1_zscals( &minus_inv_tau11, &beta );
881  m_ahead,
882  &beta,
883  a21, rs_A,
884  z2, inc_z );
886  m_ahead,
887  &inv_tau11,
888  z2, inc_z );
889 
890  // FLA_Dot( a12t, a21, dot_product );
891  // FLA_Scal( minus_inv_tau11, dot_product );
892  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
894  m_ahead,
895  a12t, cs_A,
896  a21, rs_A,
897  &dot_product );
898  bl1_zscals( &minus_inv_tau11, &dot_product );
900  m_ahead,
901  &dot_product,
902  a21, rs_A,
903  a12t, cs_A );
904 
905  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
906  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
909  m_behind,
910  n_ahead,
911  buff_1,
912  A02, rs_A, cs_A,
913  a21, rs_A,
914  buff_0,
915  y0, inc_y );
918  m_behind,
919  n_ahead,
920  &minus_inv_tau11,
921  y0, inc_y,
922  a21, rs_A,
923  A02, rs_A, cs_A );
924 
925  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y2, A22 );
926  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, a21, A22 );
929  m_ahead,
930  n_ahead,
931  buff_m1,
932  a21, rs_A,
933  y2, inc_y,
934  A22, rs_A, cs_A );
937  m_ahead,
938  n_ahead,
939  buff_m1,
940  z2, inc_z,
941  a21, rs_A,
942  A22, rs_A, cs_A );
943 
944  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
947  m_ahead,
948  n_behind,
949  buff_1,
950  A20, rs_A, cs_A,
951  a21, rs_A,
952  buff_0,
953  t01, rs_T );
954 
955  // FLA_Copy( first_elem, a21_t );
956  *a21_t = first_elem;
957  }
958 
959  /*------------------------------------------------------------*/
960 
961  }
962 
963  // FLA_Obj_free( &y );
964  // FLA_Obj_free( &z );
965  FLA_free( buff_y );
966  FLA_free( buff_z );
967 
968  return FLA_SUCCESS;
969 }

References bl1_zaxpyv(), bl1_zdot(), bl1_zgemv(), bl1_zger(), bl1_zscals(), bl1_zscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var2().

◆ FLA_Hess_UT_step_opz_var3()

FLA_Error FLA_Hess_UT_step_opz_var3 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
1160 {
1161  dcomplex* buff_2 = FLA_DOUBLE_COMPLEX_PTR( FLA_TWO );
1162  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
1163  dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
1164  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
1165 
1166  dcomplex first_elem;
1167  dcomplex dot_product;
1168  dcomplex beta, conj_beta;
1169  dcomplex inv_tau11;
1170  dcomplex minus_inv_tau11;
1171  dcomplex minus_upsilon1, minus_conj_upsilon1;
1172  dcomplex minus_psi1, minus_conj_psi1;
1173  dcomplex minus_zeta1;
1174  int i;
1175 
1176  // b_alg = FLA_Obj_length( T );
1177  int b_alg = m_T;
1178 
1179  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
1180  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
1181  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
1182  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &v );
1183  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
1184  dcomplex* buff_u = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1185  dcomplex* buff_y = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1186  dcomplex* buff_z = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1187  dcomplex* buff_v = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1188  dcomplex* buff_w = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1189  int inc_u = 1;
1190  int inc_y = 1;
1191  int inc_z = 1;
1192  int inc_v = 1;
1193  int inc_w = 1;
1194 
1195  // Initialize some variables (only to prevent compiler warnings).
1196  first_elem = *buff_0;
1197  minus_inv_tau11 = *buff_0;
1198 
1199  for ( i = 0; i < b_alg; ++i )
1200  {
1201  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1202  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1203  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1204  dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1205  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1206  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1207 
1208  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1209  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1210 
1211  dcomplex* upsilon1 = buff_u + (i )*inc_u;
1212  dcomplex* u2 = buff_u + (i+1)*inc_u;
1213 
1214  dcomplex* y0 = buff_y + (0 )*inc_y;
1215  dcomplex* psi1 = buff_y + (i )*inc_y;
1216  dcomplex* y2 = buff_y + (i+1)*inc_y;
1217 
1218  dcomplex* zeta1 = buff_z + (i )*inc_z;
1219  dcomplex* z2 = buff_z + (i+1)*inc_z;
1220 
1221  dcomplex* v2 = buff_v + (i+1)*inc_v;
1222 
1223  dcomplex* w2 = buff_w + (i+1)*inc_w;
1224 
1225  dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
1226  dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
1227 
1228  int m_ahead = m_A - i - 1;
1229  int n_ahead = m_A - i - 1;
1230  int m_behind = i;
1231  int n_behind = i;
1232 
1233  /*------------------------------------------------------------*/
1234 
1235  if ( m_behind > 0 )
1236  {
1237  // FLA_Copy( upsilon1, minus_upsilon1 );
1238  // FLA_Scal( FLA_MINUS_ONE, minus_upsilon1 );
1239  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, minus_conj_upsilon1 );
1240  bl1_zmult3( buff_m1, upsilon1, &minus_upsilon1 );
1241  bl1_zcopyconj( &minus_upsilon1, &minus_conj_upsilon1 );
1242 
1243  // FLA_Copy( psi1, minus_psi1 );
1244  // FLA_Scal( FLA_MINUS_ONE, minus_psi1 );
1245  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_psi1, minus_conj_psi1 );
1246  bl1_zmult3( buff_m1, psi1, &minus_psi1 );
1247  bl1_zcopyconj( &minus_psi1, &minus_conj_psi1 );
1248 
1249  // FLA_Copy( zeta1, minus_zeta1 );
1250  // FLA_Scal( FLA_MINUS_ONE, minus_zeta1 );
1251  bl1_zmult3( buff_m1, zeta1, &minus_zeta1 );
1252 
1253  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, psi1, alpha11 );
1254  // FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta1, upsilon1, alpha11 );
1256  1,
1257  &minus_upsilon1,
1258  psi1, 1,
1259  alpha11, 1 );
1261  1,
1262  &minus_zeta1,
1263  upsilon1, 1,
1264  alpha11, 1 );
1265 
1266  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon1, y2, a12t );
1267  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta1, u2, a12t );
1269  m_ahead,
1270  &minus_upsilon1,
1271  y2, inc_y,
1272  a12t, cs_A );
1274  m_ahead,
1275  &minus_zeta1,
1276  u2, inc_u,
1277  a12t, cs_A );
1278 
1279  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi1, u2, a21 );
1280  // FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon1, z2, a21 );
1282  m_ahead,
1283  &minus_conj_psi1,
1284  u2, inc_u,
1285  a21, rs_A );
1287  m_ahead,
1288  &minus_conj_upsilon1,
1289  z2, inc_z,
1290  a21, rs_A );
1291  }
1292 
1293  if ( m_ahead > 0 )
1294  {
1295  // FLA_Househ2_UT( FLA_LEFT,
1296  // a21_t,
1297  // a21_b, tau11 );
1298  FLA_Househ2_UT_l_opz( m_ahead - 1,
1299  a21_t,
1300  a21_b, rs_A,
1301  tau11 );
1302 
1303  // FLA_Set( FLA_ONE, inv_tau11 );
1304  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
1305  // FLA_Copy( inv_tau11, minus_inv_tau11 );
1306  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
1307  bl1_zdiv3( buff_1, tau11, &inv_tau11 );
1308  bl1_zneg2( &inv_tau11, &minus_inv_tau11 );
1309 
1310  // FLA_Copy( a21_t, first_elem );
1311  // FLA_Set( FLA_ONE, a21_t );
1312  first_elem = *a21_t;
1313  *a21_t = *buff_1;
1314  }
1315 
1316  if ( m_behind > 0 )
1317  {
1318  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
1319  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
1322  m_ahead,
1323  n_ahead,
1324  buff_m1,
1325  u2, inc_u,
1326  y2, inc_y,
1327  A22, rs_A, cs_A );
1330  m_ahead,
1331  n_ahead,
1332  buff_m1,
1333  z2, inc_z,
1334  u2, inc_u,
1335  A22, rs_A, cs_A );
1336  }
1337 
1338  if ( m_ahead > 0 )
1339  {
1340  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
1343  m_ahead,
1344  n_ahead,
1345  buff_1,
1346  A22, rs_A, cs_A,
1347  a21, rs_A,
1348  buff_0,
1349  v2, inc_v );
1350 
1351  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
1354  m_ahead,
1355  n_ahead,
1356  buff_1,
1357  A22, rs_A, cs_A,
1358  a21, rs_A,
1359  buff_0,
1360  w2, inc_w );
1361 
1362  // FLA_Copy( a21, u2 );
1363  // FLA_Copy( v2, y2 );
1364  // FLA_Copy( w2, z2 );
1366  m_ahead,
1367  a21, rs_A,
1368  u2, inc_u );
1370  m_ahead,
1371  v2, inc_v,
1372  y2, inc_y );
1374  m_ahead,
1375  w2, inc_w,
1376  z2, inc_z );
1377 
1378  // FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
1379  // FLA_Inv_scal( FLA_TWO, beta );
1380  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
1382  m_ahead,
1383  a21, rs_A,
1384  z2, inc_z,
1385  &beta );
1386  bl1_zinvscals( buff_2, &beta );
1387  bl1_zcopyconj( &beta, &conj_beta );
1388 
1389  // FLA_Scal( minus_inv_tau11, conj_beta );
1390  // FLA_Axpy( conj_beta, a21, y2 );
1391  // FLA_Scal( inv_tau11, y2 );
1392  bl1_zscals( &minus_inv_tau11, &conj_beta );
1394  m_ahead,
1395  &conj_beta,
1396  a21, rs_A,
1397  y2, inc_y );
1399  m_ahead,
1400  &inv_tau11,
1401  y2, inc_y );
1402 
1403  // FLA_Scal( minus_inv_tau11, beta );
1404  // FLA_Axpy( beta, a21, z2 );
1405  // FLA_Scal( inv_tau11, z2 );
1406  bl1_zscals( &minus_inv_tau11, &beta );
1408  m_ahead,
1409  &beta,
1410  a21, rs_A,
1411  z2, inc_z );
1413  m_ahead,
1414  &inv_tau11,
1415  z2, inc_z );
1416 
1417  // FLA_Dot( a12t, a21, dot_product );
1418  // FLA_Scal( minus_inv_tau11, dot_product );
1419  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
1421  m_ahead,
1422  a12t, cs_A,
1423  a21, rs_A,
1424  &dot_product );
1425  bl1_zscals( &minus_inv_tau11, &dot_product );
1427  m_ahead,
1428  &dot_product,
1429  a21, rs_A,
1430  a12t, cs_A );
1431 
1432  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
1433  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
1436  m_behind,
1437  n_ahead,
1438  buff_1,
1439  A02, rs_A, cs_A,
1440  a21, rs_A,
1441  buff_0,
1442  y0, inc_y );
1445  m_behind,
1446  n_ahead,
1447  &minus_inv_tau11,
1448  y0, inc_y,
1449  a21, rs_A,
1450  A02, rs_A, cs_A );
1451 
1452  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
1455  m_ahead,
1456  n_behind,
1457  buff_1,
1458  A20, rs_A, cs_A,
1459  a21, rs_A,
1460  buff_0,
1461  t01, rs_T );
1462 
1463  // FLA_Copy( first_elem, a21_t );
1464  *a21_t = first_elem;
1465  }
1466 
1467  if ( m_behind + 1 == b_alg && m_ahead > 0 )
1468  {
1469  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
1470  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
1473  m_ahead,
1474  n_ahead,
1475  buff_m1,
1476  u2, inc_u,
1477  y2, inc_y,
1478  A22, rs_A, cs_A );
1481  m_ahead,
1482  n_ahead,
1483  buff_m1,
1484  z2, inc_z,
1485  u2, inc_u,
1486  A22, rs_A, cs_A );
1487  }
1488 
1489  /*------------------------------------------------------------*/
1490 
1491  }
1492 
1493  // FLA_Obj_free( &u );
1494  // FLA_Obj_free( &y );
1495  // FLA_Obj_free( &z );
1496  // FLA_Obj_free( &v );
1497  // FLA_Obj_free( &w );
1498  FLA_free( buff_u );
1499  FLA_free( buff_y );
1500  FLA_free( buff_z );
1501  FLA_free( buff_v );
1502  FLA_free( buff_w );
1503 
1504  return FLA_SUCCESS;
1505 }

References bl1_zaxpyv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zger(), bl1_zscals(), bl1_zscalv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, i, psi1, upsilon1, and zeta1.

Referenced by FLA_Hess_UT_step_opt_var3().

◆ FLA_Hess_UT_step_opz_var4()

FLA_Error FLA_Hess_UT_step_opz_var4 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_Y,
int  rs_Y,
int  cs_Y,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
1218 {
1219  dcomplex* buff_2 = FLA_DOUBLE_COMPLEX_PTR( FLA_TWO );
1220  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
1221  dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
1222  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
1223 
1224  dcomplex first_elem, last_elem;
1225  dcomplex dot_product;
1226  dcomplex beta, conj_beta;
1227  dcomplex inv_tau11;
1228  dcomplex minus_inv_tau11;
1229  int i;
1230 
1231  // b_alg = FLA_Obj_length( T );
1232  int b_alg = m_T;
1233 
1234  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
1235  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &e );
1236  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
1237  dcomplex* buff_d = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1238  dcomplex* buff_e = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1239  dcomplex* buff_f = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
1240  int inc_d = 1;
1241  int inc_e = 1;
1242  int inc_f = 1;
1243 
1244  // FLA_Set( FLA_ZERO, Y );
1245  // FLA_Set( FLA_ZERO, Z );
1246  bl1_zsetm( m_A,
1247  b_alg,
1248  buff_0,
1249  buff_Y, rs_Y, cs_Y );
1250  bl1_zsetm( m_A,
1251  b_alg,
1252  buff_0,
1253  buff_Z, rs_Z, cs_Z );
1254 
1255  for ( i = 0; i < b_alg; ++i )
1256  {
1257  dcomplex* a10t = buff_A + (0 )*cs_A + (i )*rs_A;
1258  dcomplex* A20 = buff_A + (0 )*cs_A + (i+1)*rs_A;
1259  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
1260  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
1261  dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
1262  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
1263  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
1264 
1265  dcomplex* y10t = buff_Y + (0 )*cs_Y + (i )*rs_Y;
1266  dcomplex* Y20 = buff_Y + (0 )*cs_Y + (i+1)*rs_Y;
1267  dcomplex* y21 = buff_Y + (i )*cs_Y + (i+1)*rs_Y;
1268 
1269  dcomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
1270  dcomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
1271  dcomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
1272 
1273  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
1274  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
1275 
1276  dcomplex* d0 = buff_d + (0 )*inc_d;
1277 
1278  dcomplex* e0 = buff_e + (0 )*inc_e;
1279 
1280  dcomplex* f0 = buff_f + (0 )*inc_f;
1281 
1282  dcomplex* a10t_r = a10t + (i-1)*cs_A + (0 )*rs_A;
1283 
1284  dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
1285  dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
1286 
1287  dcomplex* ABL = a10t;
1288  dcomplex* ZBL = z10t;
1289 
1290  dcomplex* a2 = alpha11;
1291 
1292  int m_ahead = m_A - i - 1;
1293  int n_ahead = m_A - i - 1;
1294  int m_behind = i;
1295  int n_behind = i;
1296 
1297  /*------------------------------------------------------------*/
1298 
1299  if ( m_behind > 0 )
1300  {
1301  // FLA_Copy( a10t_r, last_elem );
1302  // FLA_Set( FLA_ONE, a10t_r );
1303  last_elem = *a10t_r;
1304  *a10t_r = *buff_1;
1305  }
1306 
1307  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
1308  // FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
1311  m_ahead + 1,
1312  n_behind,
1313  buff_m1,
1314  ABL, rs_A, cs_A,
1315  y10t, cs_Y,
1316  buff_1,
1317  a2, rs_A );
1320  m_ahead + 1,
1321  n_behind,
1322  buff_m1,
1323  ZBL, rs_Z, cs_Z,
1324  a10t, cs_A,
1325  buff_1,
1326  a2, rs_A );
1327 
1328  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
1329  // FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, A20, z10t, FLA_ONE, a12t );
1332  m_ahead,
1333  n_behind,
1334  buff_m1,
1335  Y20, rs_Y, cs_Y,
1336  a10t, cs_A,
1337  buff_1,
1338  a12t, cs_A );
1341  m_ahead,
1342  n_behind,
1343  buff_m1,
1344  A20, rs_A, cs_A,
1345  z10t, cs_Z,
1346  buff_1,
1347  a12t, cs_A );
1348 
1349  if ( m_behind > 0 )
1350  {
1351  // FLA_Copy( last_elem, a10t_r );
1352  *a10t_r = last_elem;
1353  }
1354 
1355  if ( m_ahead > 0 )
1356  {
1357  // FLA_Househ2_UT( FLA_LEFT,
1358  // a21_t,
1359  // a21_b, tau11 );
1360  FLA_Househ2_UT_l_opz( m_ahead - 1,
1361  a21_t,
1362  a21_b, rs_A,
1363  tau11 );
1364 
1365  // FLA_Set( FLA_ONE, inv_tau11 );
1366  // FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
1367  // FLA_Copy( inv_tau11, minus_inv_tau11 );
1368  // FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
1369  bl1_zdiv3( buff_1, tau11, &inv_tau11 );
1370  bl1_zneg2( &inv_tau11, &minus_inv_tau11 );
1371 
1372  // FLA_Copy( a21_t, first_elem );
1373  // FLA_Set( FLA_ONE, a21_t );
1374  first_elem = *a21_t;
1375  *a21_t = *buff_1;
1376 
1377  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y21 );
1380  m_ahead,
1381  n_ahead,
1382  buff_1,
1383  A22, rs_A, cs_A,
1384  a21, rs_A,
1385  buff_0,
1386  y21, rs_Y );
1387 
1388  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z21 );
1391  m_ahead,
1392  n_ahead,
1393  buff_1,
1394  A22, rs_A, cs_A,
1395  a21, rs_A,
1396  buff_0,
1397  z21, rs_Z );
1398 
1399  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d0 );
1400  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, a21, FLA_ZERO, e0 );
1401  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f0 );
1404  m_ahead,
1405  n_behind,
1406  buff_1,
1407  A20, rs_A, cs_A,
1408  a21, rs_A,
1409  buff_0,
1410  d0, inc_d );
1413  m_ahead,
1414  n_behind,
1415  buff_1,
1416  Y20, rs_Y, cs_Y,
1417  a21, rs_A,
1418  buff_0,
1419  e0, inc_e );
1422  m_ahead,
1423  n_behind,
1424  buff_1,
1425  Z20, rs_Z, cs_Z,
1426  a21, rs_A,
1427  buff_0,
1428  f0, inc_f );
1429 
1430  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
1431  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, y21 );
1434  m_ahead,
1435  n_behind,
1436  buff_m1,
1437  Y20, rs_Y, cs_Y,
1438  d0, inc_d,
1439  buff_1,
1440  y21, rs_Y );
1443  m_ahead,
1444  n_behind,
1445  buff_m1,
1446  A20, rs_A, cs_A,
1447  f0, inc_f,
1448  buff_1,
1449  y21, rs_Y );
1450 
1451  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, e0, FLA_ONE, z21 );
1452  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d0, FLA_ONE, z21 );
1455  m_ahead,
1456  n_behind,
1457  buff_m1,
1458  A20, rs_A, cs_A,
1459  e0, inc_e,
1460  buff_1,
1461  z21, rs_Z );
1464  m_ahead,
1465  n_behind,
1466  buff_m1,
1467  Z20, rs_Z, cs_Z,
1468  d0, inc_d,
1469  buff_1,
1470  z21, rs_Z );
1471 
1472  // FLA_Copy( d0, t01 );
1474  n_behind,
1475  d0, inc_d,
1476  t01, rs_T );
1477 
1478  // FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
1479  // FLA_Inv_scal( FLA_TWO, beta );
1480  // FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
1482  m_ahead,
1483  a21, rs_A,
1484  z21, rs_Z,
1485  &beta );
1486  bl1_zinvscals( buff_2, &beta );
1487  bl1_zcopyconj( &beta, &conj_beta );
1488 
1489  // FLA_Scal( minus_inv_tau11, conj_beta );
1490  // FLA_Axpy( conj_beta, a21, y21 );
1491  // FLA_Scal( inv_tau11, y21 );
1492  bl1_zscals( &minus_inv_tau11, &conj_beta );
1494  m_ahead,
1495  &conj_beta,
1496  a21, rs_A,
1497  y21, rs_Y );
1499  m_ahead,
1500  &inv_tau11,
1501  y21, rs_Y );
1502 
1503  // FLA_Scal( minus_inv_tau11, beta );
1504  // FLA_Axpy( beta, a21, z21 );
1505  // FLA_Scal( inv_tau11, z21 );
1506  bl1_zscals( &minus_inv_tau11, &beta );
1508  m_ahead,
1509  &beta,
1510  a21, rs_A,
1511  z21, rs_Z );
1513  m_ahead,
1514  &inv_tau11,
1515  z21, rs_Z );
1516 
1517  // FLA_Dot( a12t, a21, dot_product );
1518  // FLA_Scal( minus_inv_tau11, dot_product );
1519  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
1521  m_ahead,
1522  a12t, cs_A,
1523  a21, rs_A,
1524  &dot_product );
1525  bl1_zscals( &minus_inv_tau11, &dot_product );
1527  m_ahead,
1528  &dot_product,
1529  a21, rs_A,
1530  a12t, cs_A );
1531 
1532  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, e0 );
1533  // FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, e0, a21, A02 );
1536  m_behind,
1537  n_ahead,
1538  buff_1,
1539  A02, rs_A, cs_A,
1540  a21, rs_A,
1541  buff_0,
1542  e0, inc_e );
1545  m_behind,
1546  n_ahead,
1547  &minus_inv_tau11,
1548  e0, inc_e,
1549  a21, rs_A,
1550  A02, rs_A, cs_A );
1551 
1552  // FLA_Copy( first_elem, a21_t );
1553  *a21_t = first_elem;
1554  }
1555 
1556  /*------------------------------------------------------------*/
1557 
1558  }
1559 
1560  // FLA_Obj_free( &d );
1561  // FLA_Obj_free( &e );
1562  // FLA_Obj_free( &f );
1563  FLA_free( buff_d );
1564  FLA_free( buff_e );
1565  FLA_free( buff_f );
1566 
1567  return FLA_SUCCESS;
1568 }

References bl1_zaxpyv(), bl1_zcopyv(), bl1_zdot(), bl1_zgemv(), bl1_zger(), bl1_zscals(), bl1_zscalv(), bl1_zsetm(), BLIS1_CONJ_NO_TRANSPOSE, BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_TWO, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var4().

◆ FLA_Hess_UT_step_opz_var5()

FLA_Error FLA_Hess_UT_step_opz_var5 ( int  m_A,
int  m_T,
dcomplex buff_A,
int  rs_A,
int  cs_A,
dcomplex buff_U,
int  rs_U,
int  cs_U,
dcomplex buff_Z,
int  rs_Z,
int  cs_Z,
dcomplex buff_T,
int  rs_T,
int  cs_T 
)
897 {
898  dcomplex* buff_1 = FLA_DOUBLE_COMPLEX_PTR( FLA_ONE );
899  dcomplex* buff_0 = FLA_DOUBLE_COMPLEX_PTR( FLA_ZERO );
900  dcomplex* buff_m1 = FLA_DOUBLE_COMPLEX_PTR( FLA_MINUS_ONE );
901  int i;
902 
903  // b_alg = FLA_Obj_length( T );
904  int b_alg = m_T;
905 
906  // FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
907  dcomplex* buff_w = ( dcomplex* ) FLA_malloc( m_A * sizeof( *buff_A ) );
908  int inc_w = 1;
909 
910  // FLA_Set( FLA_ZERO, U );
911  // FLA_Set( FLA_ZERO, Z );
912  bl1_zsetm( m_A,
913  b_alg,
914  buff_0,
915  buff_U, rs_U, cs_U );
916  bl1_zsetm( m_A,
917  b_alg,
918  buff_0,
919  buff_Z, rs_Z, cs_Z );
920 
921  for ( i = 0; i < b_alg; ++i )
922  {
923  dcomplex* a01 = buff_A + (i )*cs_A + (0 )*rs_A;
924  dcomplex* alpha11 = buff_A + (i )*cs_A + (i )*rs_A;
925  dcomplex* a21 = buff_A + (i )*cs_A + (i+1)*rs_A;
926  dcomplex* A02 = buff_A + (i+1)*cs_A + (0 )*rs_A;
927  dcomplex* a12t = buff_A + (i+1)*cs_A + (i )*rs_A;
928  dcomplex* A22 = buff_A + (i+1)*cs_A + (i+1)*rs_A;
929 
930  dcomplex* U00 = buff_U + (0 )*cs_U + (0 )*rs_U;
931  dcomplex* u10t = buff_U + (0 )*cs_U + (i )*rs_U;
932  dcomplex* U20 = buff_U + (0 )*cs_U + (i+1)*rs_U;
933  dcomplex* u21 = buff_U + (i )*cs_U + (i+1)*rs_U;
934 
935  dcomplex* Z00 = buff_Z + (0 )*cs_Z + (0 )*rs_Z;
936  dcomplex* z10t = buff_Z + (0 )*cs_Z + (i )*rs_Z;
937  dcomplex* Z20 = buff_Z + (0 )*cs_Z + (i+1)*rs_Z;
938  dcomplex* z01 = buff_Z + (i )*cs_Z + (0 )*rs_Z;
939  dcomplex* zeta11 = buff_Z + (i )*cs_Z + (i )*rs_Z;
940  dcomplex* z21 = buff_Z + (i )*cs_Z + (i+1)*rs_Z;
941 
942  dcomplex* T00 = buff_T + (0 )*cs_T + (0 )*rs_T;
943  dcomplex* t01 = buff_T + (i )*cs_T + (0 )*rs_T;
944  dcomplex* tau11 = buff_T + (i )*cs_T + (i )*rs_T;
945 
946  dcomplex* w0 = buff_w + (0 )*inc_w;
947 
948  dcomplex* a21_t = a21 + (0 )*cs_A + (0 )*rs_A;
949  dcomplex* a21_b = a21 + (0 )*cs_A + (1 )*rs_A;
950 
951  dcomplex* u21_t = u21 + (0 )*cs_U + (0 )*rs_U;
952 
953  int m_ahead = m_A - i - 1;
954  int n_ahead = m_A - i - 1;
955  int m_behind = i;
956  int n_behind = i;
957 
958  /*------------------------------------------------------------*/
959 
960  if ( m_behind > 0 )
961  {
962  // FLA_Copyt( FLA_CONJ_TRANSPOSE, u10t, w0 );
963  // FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
964  // T00, w0 );
966  m_behind,
967  u10t, cs_U,
968  w0, inc_w );
972  m_behind,
973  T00, rs_T, cs_T,
974  w0, inc_w );
975 
976  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z00, w0, FLA_ONE, a01 );
977  // FLA_Dots( FLA_MINUS_ONE, z10t, w0, FLA_ONE, alpha11 );
978  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, w0, FLA_ONE, a21 );
981  m_behind,
982  n_behind,
983  buff_m1,
984  Z00, rs_Z, cs_Z,
985  w0, inc_w,
986  buff_1,
987  a01, rs_A );
989  m_behind,
990  buff_m1,
991  z10t, cs_Z,
992  w0, inc_w,
993  buff_1,
994  alpha11 );
997  m_ahead,
998  n_behind,
999  buff_m1,
1000  Z20, rs_Z, cs_Z,
1001  w0, inc_w,
1002  buff_1,
1003  a21, rs_A );
1004 
1005  // FLA_Trmvsx( FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
1006  // FLA_ONE, U00, a01, FLA_ZERO, w0 );
1007  // FLA_Axpyt( FLA_CONJ_TRANSPOSE, alpha11, u10t, w0 );
1008  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, U20, a21, FLA_ONE, w0 );
1010  m_behind,
1011  a01, rs_A,
1012  w0, inc_w );
1016  m_behind,
1017  U00, rs_U, cs_U,
1018  w0, inc_w );
1020  m_behind,
1021  alpha11,
1022  u10t, cs_U,
1023  w0, inc_w );
1026  m_ahead,
1027  n_behind,
1028  buff_1,
1029  U20, rs_U, cs_U,
1030  a21, rs_A,
1031  buff_1,
1032  w0, inc_w );
1033 
1034  // FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
1035  // T00, w0 );
1039  m_behind,
1040  T00, rs_T, cs_T,
1041  w0, inc_w );
1042 
1043  // FLA_Trmvsx( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
1044  // FLA_MINUS_ONE, U00, w0, FLA_ONE, a01 );
1045  // FLA_Dots( FLA_MINUS_ONE, u10t, w0, FLA_ONE, alpha11 );
1046  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, U20, w0, FLA_ONE, a21 );
1050  m_behind,
1051  buff_m1,
1052  U00, rs_U, cs_U,
1053  w0, inc_w,
1054  buff_1,
1055  a01, rs_A );
1057  m_behind,
1058  buff_m1,
1059  u10t, cs_U,
1060  w0, inc_w,
1061  buff_1,
1062  alpha11 );
1065  m_ahead,
1066  n_behind,
1067  buff_m1,
1068  U20, rs_U, cs_U,
1069  w0, inc_w,
1070  buff_1,
1071  a21, rs_A );
1072  }
1073 
1074  if ( m_ahead > 0 )
1075  {
1076  // FLA_Househ2_UT( FLA_LEFT,
1077  // a21_t,
1078  // a21_b, tau11 );
1079  FLA_Househ2_UT_l_opz( m_ahead - 1,
1080  a21_t,
1081  a21_b, rs_A,
1082  tau11 );
1083 
1084  // FLA_Copy( a21, u21 );
1086  m_ahead,
1087  a21, rs_A,
1088  u21, rs_U );
1089 
1090  // FLA_Set( FLA_ONE, u21_t );
1091  *u21_t = *buff_1;
1092 
1093  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, u21, FLA_ZERO, z01 );
1094  // FLA_Dot( a12t, u21, zeta11 );
1095  // FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, u21, FLA_ZERO, z21 );
1098  m_behind,
1099  n_ahead,
1100  buff_1,
1101  A02, rs_A, cs_A,
1102  u21, rs_U,
1103  buff_0,
1104  z01, rs_Z );
1106  m_ahead,
1107  a12t, cs_A,
1108  u21, rs_U,
1109  zeta11 );
1112  m_ahead,
1113  n_ahead,
1114  buff_1,
1115  A22, rs_A, cs_A,
1116  u21, rs_U,
1117  buff_0,
1118  z21, rs_Z );
1119 
1120  // FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, U20, u21, FLA_ZERO, t01 );
1123  m_ahead,
1124  n_behind,
1125  buff_1,
1126  U20, rs_U, cs_U,
1127  u21, rs_U,
1128  buff_0,
1129  t01, rs_T );
1130  }
1131 
1132  /*------------------------------------------------------------*/
1133 
1134  }
1135 
1136  // FLA_Obj_free( &w );
1137  FLA_free( buff_w );
1138 
1139  return FLA_SUCCESS;
1140 }
void bl1_zdots(conj1_t conj, int n, dcomplex *alpha, dcomplex *x, int incx, dcomplex *y, int incy, dcomplex *beta, dcomplex *rho)
Definition: bl1_dots.c:56
void bl1_ztrmv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx)
Definition: bl1_trmv.c:177
void bl1_ztrmvsx(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, dcomplex *alpha, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx, dcomplex *beta, dcomplex *y, int incy)
Definition: bl1_trmvsx.c:187
void bl1_ztrsv(uplo1_t uplo, trans1_t trans, diag1_t diag, int m, dcomplex *a, int a_rs, int a_cs, dcomplex *x, int incx)
Definition: bl1_trsv.c:177

References bl1_zaxpyv(), bl1_zcopyv(), bl1_zdot(), bl1_zdots(), bl1_zgemv(), bl1_zsetm(), bl1_ztrmv(), bl1_ztrmvsx(), bl1_ztrsv(), BLIS1_CONJ_TRANSPOSE, BLIS1_CONJUGATE, BLIS1_LOWER_TRIANGULAR, BLIS1_NO_CONJUGATE, BLIS1_NO_TRANSPOSE, BLIS1_NONUNIT_DIAG, BLIS1_UPPER_TRIANGULAR, FLA_free(), FLA_Househ2_UT_l_opz(), FLA_malloc(), FLA_MINUS_ONE, FLA_ONE, FLA_ZERO, and i.

Referenced by FLA_Hess_UT_step_opt_var5().

◆ FLA_Hess_UT_step_unb_var1()

FLA_Error FLA_Hess_UT_step_unb_var1 ( FLA_Obj  A,
FLA_Obj  T 
)
19 {
20  FLA_Obj ATL, ATR, A00, a01, A02,
21  ABL, ABR, a10t, alpha11, a12t,
22  A20, a21, A22;
23  FLA_Obj AL, AR, A0, a1, A2;
24  FLA_Obj TTL, TTR, T00, t01, T02,
25  TBL, TBR, t10t, tau11, t12t,
26  T20, t21, T22;
27 
28  FLA_Obj a21_t,
29  a21_b;
30 
31  FLA_Obj A22_t,
32  A22_b;
33 
34  FLA_Obj A2_l, A2_r;
35 
36  FLA_Obj first_elem;
37 
38  dim_t b_alg;
39  FLA_Datatype datatype_A;
40 
41 
42  b_alg = FLA_Obj_length( T );
43  datatype_A = FLA_Obj_datatype( A );
44 
45  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &first_elem );
46 
47  FLA_Part_2x2( A, &ATL, &ATR,
48  &ABL, &ABR, 0, 0, FLA_TL );
49  FLA_Part_1x2( A, &AL, &AR, 0, FLA_LEFT );
50  FLA_Part_2x2( T, &TTL, &TTR,
51  &TBL, &TBR, 0, 0, FLA_TL );
52 
53  while ( FLA_Obj_length( ATL ) < b_alg )
54  {
55  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
56  /* ************* */ /* ************************** */
57  &a10t, /**/ &alpha11, &a12t,
58  ABL, /**/ ABR, &A20, /**/ &a21, &A22,
59  1, 1, FLA_BR );
60 
61  FLA_Repart_1x2_to_1x3( AL, /**/ AR, &A0, /**/ &a1, &A2,
62  1, FLA_RIGHT );
63 
64  FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02,
65  /* ************* */ /* ************************** */
66  &t10t, /**/ &tau11, &t12t,
67  TBL, /**/ TBR, &T20, /**/ &t21, &T22,
68  1, 1, FLA_BR );
69 
70  /*------------------------------------------------------------*/
71 
72  if ( FLA_Obj_length( A22 ) > 0 )
73  {
74  FLA_Part_2x1( a21, &a21_t,
75  &a21_b, 1, FLA_TOP );
76 
77  FLA_Part_2x1( A22, &A22_t,
78  &A22_b, 1, FLA_TOP );
79 
80  FLA_Part_1x2( A2, &A2_l, &A2_r, 1, FLA_LEFT );
81 
82  // [ u21, tau11, a21 ] = House( a21 );
83  FLA_Househ2_UT( FLA_LEFT,
84  a21_t,
85  a21_b, tau11 );
86 
87  // Save first element of a21_t and set it to one so we can use a21 as
88  // u21 in subsequent computations. We will restore a21_t later on.
89  FLA_Copy( a21_t, first_elem );
90  FLA_Set( FLA_ONE, a21_t );
91 
92  // A22 = ( I - inv( tau ) * u21 * u21' ) * A22;
93  FLA_Apply_H2_UT( FLA_LEFT, tau11, a21_b, A22_t,
94  A22_b );
95 
96  // A02 = A02 * ( I - inv( tau ) * u21 * u21' );
97  // a12t = a12t * ( I - inv( tau ) * u21 * u21' );
98  // A22 = A22 * ( I - inv( tau ) * u21 * u21' );
99  FLA_Apply_H2_UT( FLA_RIGHT, tau11, a21_b, A2_l, A2_r );
100 
101  // t01 = U20' * u21;
102  FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
103 
104  // Restore first element of a21.
105  FLA_Copy( first_elem, a21_t );
106  }
107 
108  /*------------------------------------------------------------*/
109 
110  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
111  a10t, alpha11, /**/ a12t,
112  /* ************** */ /* ************************ */
113  &ABL, /**/ &ABR, A20, a21, /**/ A22,
114  FLA_TL );
115 
116  FLA_Cont_with_1x3_to_1x2( &AL, /**/ &AR, A0, a1, /**/ A2,
117  FLA_LEFT );
118 
119  FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02,
120  t10t, tau11, /**/ t12t,
121  /* ************** */ /* ************************ */
122  &TBL, /**/ &TBR, T20, t21, /**/ T22,
123  FLA_TL );
124  }
125 
126  FLA_Obj_free( &first_elem );
127 
128  return FLA_SUCCESS;
129 }
FLA_Error FLA_Copy(FLA_Obj A, FLA_Obj B)
Definition: FLA_Copy.c:15
FLA_Error FLA_Gemv(FLA_Trans transa, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y)
Definition: FLA_Gemv.c:15
FLA_Error FLA_Apply_H2_UT(FLA_Side side, FLA_Obj tau, FLA_Obj u2, FLA_Obj a1, FLA_Obj A2)
Definition: FLA_Apply_H2_UT.c:13
FLA_Error FLA_Househ2_UT(FLA_Side side, FLA_Obj chi_1, FLA_Obj x2, FLA_Obj tau)
Definition: FLA_Househ2_UT.c:16

References FLA_Apply_H2_UT(), FLA_Cont_with_1x3_to_1x2(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Gemv(), FLA_Househ2_UT(), FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_1x2_to_1x3(), FLA_Repart_2x2_to_3x3(), FLA_Set(), and FLA_ZERO.

Referenced by FLA_Hess_UT_unb_var1().

◆ FLA_Hess_UT_step_unb_var2()

FLA_Error FLA_Hess_UT_step_unb_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
19 {
20  FLA_Obj ATL, ATR, A00, a01, A02,
21  ABL, ABR, a10t, alpha11, a12t,
22  A20, a21, A22;
23  FLA_Obj TTL, TTR, T00, t01, T02,
24  TBL, TBR, t10t, tau11, t12t,
25  T20, t21, T22;
26  FLA_Obj yT, y0,
27  yB, psi1,
28  y2;
29  FLA_Obj zT, z0,
30  zB, zeta1,
31  z2;
32  FLA_Obj y, z;
33 
34  FLA_Obj inv_tau11;
35  FLA_Obj minus_inv_tau11;
36  FLA_Obj first_elem;
37  FLA_Obj beta;
38  FLA_Obj conj_beta;
39  FLA_Obj dot_product;
40 
41  FLA_Obj a21_t,
42  a21_b;
43 
44  FLA_Datatype datatype_A;
45  dim_t m_A;
46  dim_t b_alg;
47 
48 
49  b_alg = FLA_Obj_length( T );
50 
51  datatype_A = FLA_Obj_datatype( A );
52  m_A = FLA_Obj_length( A );
53 
54  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &inv_tau11 );
55  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_inv_tau11 );
56  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &first_elem );
57  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &beta );
58  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &conj_beta );
59  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &dot_product );
60  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
61  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
62 
63  FLA_Part_2x2( A, &ATL, &ATR,
64  &ABL, &ABR, 0, 0, FLA_TL );
65  FLA_Part_2x2( T, &TTL, &TTR,
66  &TBL, &TBR, 0, 0, FLA_TL );
67  FLA_Part_2x1( y, &yT,
68  &yB, 0, FLA_TOP );
69  FLA_Part_2x1( z, &zT,
70  &zB, 0, FLA_TOP );
71 
72  while ( FLA_Obj_length( ATL ) < b_alg )
73  {
74  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
75  /* ************* */ /* ************************** */
76  &a10t, /**/ &alpha11, &a12t,
77  ABL, /**/ ABR, &A20, /**/ &a21, &A22,
78  1, 1, FLA_BR );
79  FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02,
80  /* ************* */ /* ************************** */
81  &t10t, /**/ &tau11, &t12t,
82  TBL, /**/ TBR, &T20, /**/ &t21, &T22,
83  1, 1, FLA_BR );
84  FLA_Repart_2x1_to_3x1( yT, &y0,
85  /* ** */ /* **** */
86  &psi1,
87  yB, &y2, 1, FLA_BOTTOM );
88  FLA_Repart_2x1_to_3x1( zT, &z0,
89  /* ** */ /* ***** */
90  &zeta1,
91  zB, &z2, 1, FLA_BOTTOM );
92 
93  /*------------------------------------------------------------*/
94 
95  if ( FLA_Obj_length( A22 ) > 0 )
96  {
97  FLA_Part_2x1( a21, &a21_t,
98  &a21_b, 1, FLA_TOP );
99 
100  // [ u21, tau11, a21 ] = House( a21 );
101  FLA_Househ2_UT( FLA_LEFT,
102  a21_t,
103  a21_b, tau11 );
104 
105  // inv_tau11 = 1 / tau11;
106  // minus_inv_tau11 = -1 / tau11;
107  FLA_Set( FLA_ONE, inv_tau11 );
108  FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
109  FLA_Copy( inv_tau11, minus_inv_tau11 );
110  FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
111 
112  // Save first element of a21_t and set it to one so we can use a21 as
113  // u21 in subsequent computations. We will restore a21_t later on.
114  FLA_Copy( a21_t, first_elem );
115  FLA_Set( FLA_ONE, a21_t );
116 
117  // y21 = A22' * u21;
118  FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y2 );
119 
120  // z21 = A22 * u21;
121  FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z2 );
122 
123  // beta = u21' * z21 / 2;
124  // conj_beta = conj(beta);
125  FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
126  FLA_Inv_scal( FLA_TWO, beta );
127  FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
128 
129  // y21' = ( y21' - beta / tau * u21' ) / tau;
130  // y21 = ( y21 - conj(beta) / tau * u21 ) / tau;
131  FLA_Scal( minus_inv_tau11, conj_beta );
132  FLA_Axpy( conj_beta, a21, y2 );
133  FLA_Scal( inv_tau11, y2 );
134 
135  // z21 = ( z21 - beta / tau * u21 ) / tau;
136  FLA_Scal( minus_inv_tau11, beta );
137  FLA_Axpy( beta, a21, z2 );
138  FLA_Scal( inv_tau11, z2 );
139 
140  // a12t = a12t * ( I - u21 * u21' / tau );
141  // = a12t - ( a12t * u21 ) * u21' / tau;
142  FLA_Dot( a12t, a21, dot_product );
143  FLA_Scal( minus_inv_tau11, dot_product );
144  FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
145 
146  // A02 = A02 * ( I - u21 * u21' / tau );
147  // = A02 - ( A02 * u21 ) * u21' / tau;
148  FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
149  FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
150 
151  // A22 = A22 - u21 * y21' - z21 * u21';
152  FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, a21, y2, A22 );
153  FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, a21, A22 );
154 
155  // t01 = U20' * u21;
156  FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
157 
158  // Restore first element of a21.
159  FLA_Copy( first_elem, a21_t );
160  }
161 
162  /*------------------------------------------------------------*/
163 
164  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
165  a10t, alpha11, /**/ a12t,
166  /* ************** */ /* ************************ */
167  &ABL, /**/ &ABR, A20, a21, /**/ A22,
168  FLA_TL );
169  FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02,
170  t10t, tau11, /**/ t12t,
171  /* ************** */ /* ************************ */
172  &TBL, /**/ &TBR, T20, t21, /**/ T22,
173  FLA_TL );
174  FLA_Cont_with_3x1_to_2x1( &yT, y0,
175  psi1,
176  /* ** */ /* **** */
177  &yB, y2, FLA_TOP );
178  FLA_Cont_with_3x1_to_2x1( &zT, z0,
179  zeta1,
180  /* ** */ /* ***** */
181  &zB, z2, FLA_TOP );
182  }
183 
184  FLA_Obj_free( &inv_tau11 );
185  FLA_Obj_free( &minus_inv_tau11 );
186  FLA_Obj_free( &first_elem );
187  FLA_Obj_free( &beta );
188  FLA_Obj_free( &conj_beta );
189  FLA_Obj_free( &dot_product );
190  FLA_Obj_free( &y );
191  FLA_Obj_free( &z );
192 
193  return FLA_SUCCESS;
194 }
FLA_Error FLA_Scal(FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Scal.c:15
FLA_Error FLA_Dot(FLA_Obj x, FLA_Obj y, FLA_Obj rho)
Definition: FLA_Dot.c:13
FLA_Error FLA_Dotc(FLA_Conj conj, FLA_Obj x, FLA_Obj y, FLA_Obj rho)
Definition: FLA_Dotc.c:13
FLA_Error FLA_Axpy(FLA_Obj alpha, FLA_Obj A, FLA_Obj B)
Definition: FLA_Axpy.c:15
FLA_Error FLA_Axpyt(FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj B)
Definition: FLA_Axpyt.c:15
FLA_Error FLA_Inv_scal(FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Inv_scal.c:13
FLA_Error FLA_Copyt(FLA_Trans trans, FLA_Obj A, FLA_Obj B)
Definition: FLA_Copyt.c:15
FLA_Error FLA_Inv_scalc(FLA_Conj conjalpha, FLA_Obj alpha, FLA_Obj A)
Definition: FLA_Inv_scalc.c:13
FLA_Error FLA_Gerc(FLA_Conj conjx, FLA_Conj conjy, FLA_Obj alpha, FLA_Obj x, FLA_Obj y, FLA_Obj A)
Definition: FLA_Gerc.c:13

References FLA_Axpy(), FLA_Axpyt(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Copyt(), FLA_Dot(), FLA_Dotc(), FLA_Gemv(), FLA_Gerc(), FLA_Househ2_UT(), FLA_Inv_scal(), FLA_Inv_scalc(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Scal(), FLA_Set(), FLA_TWO, FLA_ZERO, psi1, and zeta1.

Referenced by FLA_Hess_UT_unb_var2().

◆ FLA_Hess_UT_step_unb_var3()

FLA_Error FLA_Hess_UT_step_unb_var3 ( FLA_Obj  A,
FLA_Obj  T 
)
19 {
20  FLA_Obj ATL, ATR, A00, a01, A02,
21  ABL, ABR, a10t, alpha11, a12t,
22  A20, a21, A22;
23  FLA_Obj TTL, TTR, T00, t01, T02,
24  TBL, TBR, t10t, tau11, t12t,
25  T20, t21, T22;
26  FLA_Obj uT, u0,
27  uB, upsilon1,
28  u2;
29  FLA_Obj yT, y0,
30  yB, psi1,
31  y2;
32  FLA_Obj zT, z0,
33  zB, zeta1,
34  z2;
35  FLA_Obj vT, v0,
36  vB, nu1,
37  v2;
38  FLA_Obj wT, w0,
39  wB, omega1,
40  w2;
41  FLA_Obj u, y, z, v, w;
42 
43  FLA_Obj inv_tau11;
44  FLA_Obj minus_inv_tau11;
45  FLA_Obj first_elem;
46  FLA_Obj beta;
47  FLA_Obj conj_beta;
48  FLA_Obj dot_product;
49  FLA_Obj minus_upsilon1;
50  FLA_Obj minus_conj_upsilon1;
51  FLA_Obj minus_psi1;
52  FLA_Obj minus_conj_psi1;
53  FLA_Obj minus_zeta1;
54 
55  FLA_Obj a21_t,
56  a21_b;
57 
58  FLA_Datatype datatype_A;
59  dim_t m_A;
60  dim_t b_alg;
61 
62 
63  b_alg = FLA_Obj_length( T );
64 
65  datatype_A = FLA_Obj_datatype( A );
66  m_A = FLA_Obj_length( A );
67 
68  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &inv_tau11 );
69  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_inv_tau11 );
70  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &first_elem );
71  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &beta );
72  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &conj_beta );
73  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &dot_product );
74  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_upsilon1 );
75  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_conj_upsilon1 );
76  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_psi1 );
77  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_conj_psi1 );
78  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_zeta1 );
79  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &u );
80  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &y );
81  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &z );
82  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &v );
83  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
84 
85  FLA_Part_2x2( A, &ATL, &ATR,
86  &ABL, &ABR, 0, 0, FLA_TL );
87  FLA_Part_2x2( T, &TTL, &TTR,
88  &TBL, &TBR, 0, 0, FLA_TL );
89  FLA_Part_2x1( u, &uT,
90  &uB, 0, FLA_TOP );
91  FLA_Part_2x1( y, &yT,
92  &yB, 0, FLA_TOP );
93  FLA_Part_2x1( z, &zT,
94  &zB, 0, FLA_TOP );
95  FLA_Part_2x1( v, &vT,
96  &vB, 0, FLA_TOP );
97  FLA_Part_2x1( w, &wT,
98  &wB, 0, FLA_TOP );
99 
100  while ( FLA_Obj_length( ATL ) < b_alg )
101  {
102  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
103  /* ************* */ /* ************************** */
104  &a10t, /**/ &alpha11, &a12t,
105  ABL, /**/ ABR, &A20, /**/ &a21, &A22,
106  1, 1, FLA_BR );
107  FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02,
108  /* ************* */ /* ************************** */
109  &t10t, /**/ &tau11, &t12t,
110  TBL, /**/ TBR, &T20, /**/ &t21, &T22,
111  1, 1, FLA_BR );
112  FLA_Repart_2x1_to_3x1( uT, &u0,
113  /* ** */ /* ******** */
114  &upsilon1,
115  uB, &u2, 1, FLA_BOTTOM );
116  FLA_Repart_2x1_to_3x1( yT, &y0,
117  /* ** */ /* **** */
118  &psi1,
119  yB, &y2, 1, FLA_BOTTOM );
120  FLA_Repart_2x1_to_3x1( zT, &z0,
121  /* ** */ /* ***** */
122  &zeta1,
123  zB, &z2, 1, FLA_BOTTOM );
124  FLA_Repart_2x1_to_3x1( vT, &v0,
125  /* ** */ /* *** */
126  &nu1,
127  vB, &v2, 1, FLA_BOTTOM );
128  FLA_Repart_2x1_to_3x1( wT, &w0,
129  /* ** */ /* ****** */
130  &omega1,
131  wB, &w2, 1, FLA_BOTTOM );
132 
133  /*------------------------------------------------------------*/
134 
135  if ( FLA_Obj_length( ATL ) > 0 )
136  {
137  FLA_Copy( upsilon1, minus_upsilon1 );
138  FLA_Scal( FLA_MINUS_ONE, minus_upsilon1 );
139  FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, minus_conj_upsilon1 );
140 
141  FLA_Copy( psi1, minus_psi1 );
142  FLA_Scal( FLA_MINUS_ONE, minus_psi1 );
143  FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, minus_psi1, minus_conj_psi1 );
144 
145  FLA_Copy( zeta1, minus_zeta1 );
146  FLA_Scal( FLA_MINUS_ONE, minus_zeta1 );
147 
148  // alpha11 = alpha11 - upsilon11 * conj(psi11) - zeta11 * conj(upsilon11);
149  FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_upsilon1, psi1, alpha11 );
150  FLA_Axpyt( FLA_CONJ_NO_TRANSPOSE, minus_zeta1, upsilon1, alpha11 );
151 
152  // a12t = a12t - upsilon11 * y21' - zeta11 * u21';
153  FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_upsilon1, y2, a12t );
154  FLA_Axpyt( FLA_CONJ_TRANSPOSE, minus_zeta1, u2, a12t );
155 
156  // a21 = a21 - conj(psi11) * u21 - conj(upsilon11) * z21;
157  FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_psi1, u2, a21 );
158  FLA_Axpyt( FLA_NO_TRANSPOSE, minus_conj_upsilon1, z2, a21 );
159  }
160 
161  if ( FLA_Obj_length( A22 ) > 0 )
162  {
163  FLA_Part_2x1( a21, &a21_t,
164  &a21_b, 1, FLA_TOP );
165 
166  // [ x21, tau11, a21 ] = House( a21 );
167  FLA_Househ2_UT( FLA_LEFT,
168  a21_t,
169  a21_b, tau11 );
170 
171  // inv_tau11 = 1 / tau11;
172  // minus_inv_tau11 = -1 / tau11;
173  FLA_Set( FLA_ONE, inv_tau11 );
174  FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
175  FLA_Copy( inv_tau11, minus_inv_tau11 );
176  FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
177 
178  // Save first element of a21_t and set it to one so we can use a21 as
179  // u21 in subsequent computations. We will restore a21_t later on.
180  FLA_Copy( a21_t, first_elem );
181  FLA_Set( FLA_ONE, a21_t );
182  }
183 
184  if ( FLA_Obj_length( ATL ) > 0 )
185  {
186  // A22 = A22 - u21 * y21' - z21 * u21';
187  FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
188  FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
189  }
190 
191  if ( FLA_Obj_length( A22 ) > 0 )
192  {
193  // v2 = A22' * x21;
194  FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, v2 );
195 
196  // w2 = A22 * x21;
197  FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, w2 );
198 
199  // u21 = x21;
200  // y21 = v2;
201  // z21 = w2;
202  FLA_Copy( a21, u2 );
203  FLA_Copy( v2, y2 );
204  FLA_Copy( w2, z2 );
205 
206  // beta = u21' * z21 / 2;
207  // conj_beta = conj(beta);
208  FLA_Dotc( FLA_CONJUGATE, a21, z2, beta );
209  FLA_Inv_scal( FLA_TWO, beta );
210  FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
211 
212  // y21' = ( y21' - beta / tau * u21' ) / tau;
213  // y21 = ( y21 - conj(beta) / tau * u21 ) / tau;
214  FLA_Scal( minus_inv_tau11, conj_beta );
215  FLA_Axpy( conj_beta, a21, y2 );
216  FLA_Scal( inv_tau11, y2 );
217 
218  // z21 = ( z21 - beta / tau * u21 ) / tau;
219  FLA_Scal( minus_inv_tau11, beta );
220  FLA_Axpy( beta, a21, z2 );
221  FLA_Scal( inv_tau11, z2 );
222 
223  // a12t = a12t * ( I - u21 * u21' / tau );
224  // = a12t - ( a12t * u21 ) * u21' / tau;
225  FLA_Dot( a12t, a21, dot_product );
226  FLA_Scal( minus_inv_tau11, dot_product );
227  FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
228 
229  // A02 = A02 * ( I - u21 * u21' / tau );
230  // = A02 - ( A02 * u21 ) * u21' / tau;
231  FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, y0 );
232  FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, y0, a21, A02 );
233 
234  // t01 = U20' * u21;
235  FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, t01 );
236 
237  // Restore first element of a21.
238  FLA_Copy( first_elem, a21_t );
239  }
240 
241  // Update A22 if this is the last iteration; this is needed when we're
242  // being called from the blocked routine so A22 is left in a valid state.
243  if ( FLA_Obj_length( ATL ) + 1 == b_alg &&
244  FLA_Obj_length( A22 ) > 0 )
245  {
246  // A22 = A22 - u21 * y21' - z21 * u21';
247  FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, u2, y2, A22 );
248  FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, FLA_MINUS_ONE, z2, u2, A22 );
249  }
250 
251  /*------------------------------------------------------------*/
252 
253  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
254  a10t, alpha11, /**/ a12t,
255  /* ************** */ /* ************************ */
256  &ABL, /**/ &ABR, A20, a21, /**/ A22,
257  FLA_TL );
258  FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02,
259  t10t, tau11, /**/ t12t,
260  /* ************** */ /* ************************ */
261  &TBL, /**/ &TBR, T20, t21, /**/ T22,
262  FLA_TL );
263  FLA_Cont_with_3x1_to_2x1( &uT, u0,
264  upsilon1,
265  /* ** */ /* ******** */
266  &uB, u2, FLA_TOP );
267  FLA_Cont_with_3x1_to_2x1( &yT, y0,
268  psi1,
269  /* ** */ /* **** */
270  &yB, y2, FLA_TOP );
271  FLA_Cont_with_3x1_to_2x1( &zT, z0,
272  zeta1,
273  /* ** */ /* ***** */
274  &zB, z2, FLA_TOP );
275  FLA_Cont_with_3x1_to_2x1( &vT, v0,
276  nu1,
277  /* ** */ /* *** */
278  &vB, v2, FLA_TOP );
279  FLA_Cont_with_3x1_to_2x1( &wT, w0,
280  omega1,
281  /* ** */ /* ****** */
282  &wB, w2, FLA_TOP );
283  }
284 
285  FLA_Obj_free( &inv_tau11 );
286  FLA_Obj_free( &minus_inv_tau11 );
287  FLA_Obj_free( &first_elem );
288  FLA_Obj_free( &beta );
289  FLA_Obj_free( &conj_beta );
290  FLA_Obj_free( &dot_product );
291  FLA_Obj_free( &minus_upsilon1 );
292  FLA_Obj_free( &minus_conj_upsilon1 );
293  FLA_Obj_free( &minus_psi1 );
294  FLA_Obj_free( &minus_conj_psi1 );
295  FLA_Obj_free( &minus_zeta1 );
296  FLA_Obj_free( &u );
297  FLA_Obj_free( &y );
298  FLA_Obj_free( &z );
299  FLA_Obj_free( &v );
300  FLA_Obj_free( &w );
301 
302  return FLA_SUCCESS;
303 }
double *restrict omega1
Definition: bl1_axpyv2bdotaxpy.c:200

References FLA_Axpy(), FLA_Axpyt(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Copyt(), FLA_Dot(), FLA_Dotc(), FLA_Gemv(), FLA_Gerc(), FLA_Househ2_UT(), FLA_Inv_scal(), FLA_Inv_scalc(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Scal(), FLA_Set(), FLA_TWO, FLA_ZERO, omega1, psi1, upsilon1, and zeta1.

Referenced by FLA_Hess_UT_unb_var3().

◆ FLA_Hess_UT_step_unb_var4()

FLA_Error FLA_Hess_UT_step_unb_var4 ( FLA_Obj  A,
FLA_Obj  Y,
FLA_Obj  Z,
FLA_Obj  T 
)
30 {
31  FLA_Obj ATL, ATR, A00, a01, A02,
32  ABL, ABR, a10t, alpha11, a12t,
33  A20, a21, A22;
34  FLA_Obj YTL, YTR, Y00, y01, Y02,
35  YBL, YBR, y10t, psi11, y12t,
36  Y20, y21, Y22;
37  FLA_Obj ZTL, ZTR, Z00, z01, Z02,
38  ZBL, ZBR, z10t, zeta11, z12t,
39  Z20, z21, Z22;
40  FLA_Obj TTL, TTR, T00, t01, T02,
41  TBL, TBR, t10t, tau11, t12t,
42  T20, t21, T22;
43  FLA_Obj dT, d0,
44  dB, delta1,
45  d2;
46  FLA_Obj eT, e0,
47  eB, epsilon1,
48  e2;
49  FLA_Obj fT, f0,
50  fB, phi1,
51  f2;
52  FLA_Obj d, e, f;
53 
54  FLA_Obj inv_tau11;
55  FLA_Obj minus_inv_tau11;
56  FLA_Obj first_elem;
57  FLA_Obj last_elem;
58  FLA_Obj beta;
59  FLA_Obj conj_beta;
60  FLA_Obj dot_product;
61 
62  FLA_Obj a10t_l, a10t_r;
63  FLA_Obj a21_t,
64  a21_b;
65  FLA_Obj a2;
66 
67  FLA_Datatype datatype_A;
68  dim_t m_A;
69  dim_t b_alg;
70 
71 
72  b_alg = FLA_Obj_length( T );
73 
74  datatype_A = FLA_Obj_datatype( A );
75  m_A = FLA_Obj_length( A );
76 
77  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &inv_tau11 );
78  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &minus_inv_tau11 );
79  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &first_elem );
80  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &last_elem );
81  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &beta );
82  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &conj_beta );
83  FLA_Obj_create( datatype_A, 1, 1, 0, 0, &dot_product );
84  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &d );
85  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &e );
86  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &f );
87 
88  FLA_Set( FLA_ZERO, Y );
89  FLA_Set( FLA_ZERO, Z );
90 
91  FLA_Part_2x2( A, &ATL, &ATR,
92  &ABL, &ABR, 0, 0, FLA_TL );
93  FLA_Part_2x2( Y, &YTL, &YTR,
94  &YBL, &YBR, 0, 0, FLA_TL );
95  FLA_Part_2x2( Z, &ZTL, &ZTR,
96  &ZBL, &ZBR, 0, 0, FLA_TL );
97  FLA_Part_2x2( T, &TTL, &TTR,
98  &TBL, &TBR, 0, 0, FLA_TL );
99  FLA_Part_2x1( d, &dT,
100  &dB, 0, FLA_TOP );
101  FLA_Part_2x1( e, &eT,
102  &eB, 0, FLA_TOP );
103  FLA_Part_2x1( f, &fT,
104  &fB, 0, FLA_TOP );
105 
106  while ( FLA_Obj_length( ATL ) < b_alg )
107  {
108  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
109  /* ************* */ /* ************************** */
110  &a10t, /**/ &alpha11, &a12t,
111  ABL, /**/ ABR, &A20, /**/ &a21, &A22,
112  1, 1, FLA_BR );
113  FLA_Repart_2x2_to_3x3( YTL, /**/ YTR, &Y00, /**/ &y01, &Y02,
114  /* ************* */ /* ************************ */
115  &y10t, /**/ &psi11, &y12t,
116  YBL, /**/ YBR, &Y20, /**/ &y21, &Y22,
117  1, 1, FLA_BR );
118  FLA_Repart_2x2_to_3x3( ZTL, /**/ ZTR, &Z00, /**/ &z01, &Z02,
119  /* ************* */ /* ************************* */
120  &z10t, /**/ &zeta11, &z12t,
121  ZBL, /**/ ZBR, &Z20, /**/ &z21, &Z22,
122  1, 1, FLA_BR );
123  FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02,
124  /* ************* */ /* ************************** */
125  &t10t, /**/ &tau11, &t12t,
126  TBL, /**/ TBR, &T20, /**/ &t21, &T22,
127  1, 1, FLA_BR );
128  FLA_Repart_2x1_to_3x1( dT, &d0,
129  /* ** */ /* ****** */
130  &delta1,
131  dB, &d2, 1, FLA_BOTTOM );
132  FLA_Repart_2x1_to_3x1( eT, &e0,
133  /* ** */ /* ******** */
134  &epsilon1,
135  eB, &e2, 1, FLA_BOTTOM );
136  FLA_Repart_2x1_to_3x1( fT, &f0,
137  /* ** */ /* **** */
138  &phi1,
139  fB, &f2, 1, FLA_BOTTOM );
140 
141  /*------------------------------------------------------------*/
142 
143  // Save first element of a10_r and set it to one so we can use a10t as
144  // u10t in subsequent computations. We will restore a10_r later on.
145  if ( FLA_Obj_length( ATL ) > 0 )
146  {
147  FLA_Part_1x2( a10t, &a10t_l, &a10t_r, 1, FLA_RIGHT );
148  FLA_Copy( a10t_r, last_elem );
149  FLA_Set( FLA_ONE, a10t_r );
150  }
151 
152  FLA_Merge_2x1( alpha11,
153  a21, &a2 );
154 
155  // alpha11 = alpha11 - u10t * y10t' - z10t * u10t';
156  // a21 = a21 - U20 * y10t' - Z20 * u10t';
157  FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ABL, y10t, FLA_ONE, a2 );
158  FLA_Gemvc( FLA_NO_TRANSPOSE, FLA_CONJUGATE, FLA_MINUS_ONE, ZBL, a10t, FLA_ONE, a2 );
159 
160  // a12t = a12t - u10t * Y20' - z10t * U20';
161  FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, a10t, FLA_ONE, a12t );
162  FLA_Gemv( FLA_CONJ_NO_TRANSPOSE, FLA_MINUS_ONE, A20, z10t, FLA_ONE, a12t );
163 
164  // Restore last element of a10t.
165  if ( FLA_Obj_length( ATL ) > 0 )
166  {
167  FLA_Copy( last_elem, a10t_r );
168  }
169 
170  if ( FLA_Obj_length( A22 ) > 0 )
171  {
172  FLA_Part_2x1( a21, &a21_t,
173  &a21_b, 1, FLA_TOP );
174 
175  // [ u21, tau11, a21 ] = House( a21 );
176  FLA_Househ2_UT( FLA_LEFT,
177  a21_t,
178  a21_b, tau11 );
179 
180  // inv_tau11 = 1 / tau11;
181  // minus_inv_tau11 = -1 / tau11;
182  FLA_Set( FLA_ONE, inv_tau11 );
183  FLA_Inv_scalc( FLA_NO_CONJUGATE, tau11, inv_tau11 );
184  FLA_Copy( inv_tau11, minus_inv_tau11 );
185  FLA_Scal( FLA_MINUS_ONE, minus_inv_tau11 );
186 
187  // Save first element of a21_t and set it to one.
188  FLA_Copy( a21_t, first_elem );
189  FLA_Set( FLA_ONE, a21_t );
190 
191  // y21 = A22' * u21;
192  FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, y21 );
193 
194  // z21 = A22 * u21;
195  FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, a21, FLA_ZERO, z21 );
196 
197  // y21 = y21 - Y20 * ( U20' * u21 ) - U20 * ( Z20' * u21 );
198  FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, A20, a21, FLA_ZERO, d0 );
199  FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Y20, a21, FLA_ZERO, e0 );
200  FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, Z20, a21, FLA_ZERO, f0 );
201 
202  FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Y20, d0, FLA_ONE, y21 );
203  FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, f0, FLA_ONE, y21 );
204 
205  // t01 = U20' * u21;
206  FLA_Copy( d0, t01 );
207 
208  // z21 = z21 - U20 * ( Y20' * u21 ) - Z20 * ( U20' * u21 );
209  FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, A20, e0, FLA_ONE, z21 );
210  FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, d0, FLA_ONE, z21 );
211 
212  // beta = u21' * z21 / 2;
213  // conj_beta = conj(beta);
214  FLA_Dotc( FLA_CONJUGATE, a21, z21, beta );
215  FLA_Inv_scal( FLA_TWO, beta );
216  FLA_Copyt( FLA_CONJ_NO_TRANSPOSE, beta, conj_beta );
217 
218  // y21' = ( y21' - beta / tau * u21' ) / tau;
219  // y21 = ( y21 - conj(beta) / tau * u21 ) / tau;
220  FLA_Scal( minus_inv_tau11, conj_beta );
221  FLA_Axpy( conj_beta, a21, y21 );
222  FLA_Scal( inv_tau11, y21 );
223 
224  // z21 = ( z21 - beta / tau * u21 ) / tau;
225  FLA_Scal( minus_inv_tau11, beta );
226  FLA_Axpy( beta, a21, z21 );
227  FLA_Scal( inv_tau11, z21 );
228 
229  // a12t = a12t * ( I - u21 * u21' / tau );
230  // = a12t - ( a12t * u21 ) * u21' / tau;
231  FLA_Dot( a12t, a21, dot_product );
232  FLA_Scal( minus_inv_tau11, dot_product );
233  FLA_Axpyt( FLA_CONJ_TRANSPOSE, dot_product, a21, a12t );
234 
235  // A02 = A02 * ( I - u21 * u21' / tau );
236  // = A02 - ( A02 * u21 ) * u21' / tau;
237  FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, a21, FLA_ZERO, e0 );
238  FLA_Gerc( FLA_NO_CONJUGATE, FLA_CONJUGATE, minus_inv_tau11, e0, a21, A02 );
239 
240  // Restore first element of a21.
241  FLA_Copy( first_elem, a21_t );
242  }
243 
244  /*------------------------------------------------------------*/
245 
246  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
247  a10t, alpha11, /**/ a12t,
248  /* ************** */ /* ************************ */
249  &ABL, /**/ &ABR, A20, a21, /**/ A22,
250  FLA_TL );
251  FLA_Cont_with_3x3_to_2x2( &YTL, /**/ &YTR, Y00, y01, /**/ Y02,
252  y10t, psi11, /**/ y12t,
253  /* ************** */ /* ********************** */
254  &YBL, /**/ &YBR, Y20, y21, /**/ Y22,
255  FLA_TL );
256  FLA_Cont_with_3x3_to_2x2( &ZTL, /**/ &ZTR, Z00, z01, /**/ Z02,
257  z10t, zeta11, /**/ z12t,
258  /* ************** */ /* *********************** */
259  &ZBL, /**/ &ZBR, Z20, z21, /**/ Z22,
260  FLA_TL );
261  FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02,
262  t10t, tau11, /**/ t12t,
263  /* ************** */ /* ************************ */
264  &TBL, /**/ &TBR, T20, t21, /**/ T22,
265  FLA_TL );
266  FLA_Cont_with_3x1_to_2x1( &dT, d0,
267  delta1,
268  /* ** */ /* ****** */
269  &dB, d2, FLA_TOP );
270  FLA_Cont_with_3x1_to_2x1( &eT, e0,
271  epsilon1,
272  /* ** */ /* ******** */
273  &eB, e2, FLA_TOP );
274  FLA_Cont_with_3x1_to_2x1( &fT, f0,
275  phi1,
276  /* ** */ /* **** */
277  &fB, f2, FLA_TOP );
278  }
279 
280  FLA_Obj_free( &inv_tau11 );
281  FLA_Obj_free( &minus_inv_tau11 );
282  FLA_Obj_free( &first_elem );
283  FLA_Obj_free( &last_elem );
284  FLA_Obj_free( &beta );
285  FLA_Obj_free( &conj_beta );
286  FLA_Obj_free( &dot_product );
287  FLA_Obj_free( &d );
288  FLA_Obj_free( &e );
289  FLA_Obj_free( &f );
290 
291  return FLA_SUCCESS;
292 }
FLA_Error FLA_Gemvc(FLA_Trans transa, FLA_Conj conjx, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y)
Definition: FLA_Gemvc.c:13

References FLA_Axpy(), FLA_Axpyt(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Copyt(), FLA_Dot(), FLA_Dotc(), FLA_Gemv(), FLA_Gemvc(), FLA_Gerc(), FLA_Househ2_UT(), FLA_Inv_scal(), FLA_Inv_scalc(), FLA_Merge_2x1(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_ONE, FLA_Part_1x2(), FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Scal(), FLA_Set(), FLA_TWO, and FLA_ZERO.

Referenced by FLA_Hess_UT_unb_var4().

◆ FLA_Hess_UT_step_unb_var5()

FLA_Error FLA_Hess_UT_step_unb_var5 ( FLA_Obj  A,
FLA_Obj  U,
FLA_Obj  Z,
FLA_Obj  T 
)
30 {
31  FLA_Obj ATL, ATR, A00, a01, A02,
32  ABL, ABR, a10t, alpha11, a12t,
33  A20, a21, A22;
34  FLA_Obj UTL, UTR, U00, u01, U02,
35  UBL, UBR, u10t, upsilon11, u12t,
36  U20, u21, U22;
37  FLA_Obj ZTL, ZTR, Z00, z01, Z02,
38  ZBL, ZBR, z10t, zeta11, z12t,
39  Z20, z21, Z22;
40  FLA_Obj TTL, TTR, T00, t01, T02,
41  TBL, TBR, t10t, tau11, t12t,
42  T20, t21, T22;
43  FLA_Obj wT, w0,
44  wB, omega1,
45  w2;
46  FLA_Obj w;
47 
48  FLA_Obj a21_t,
49  a21_b;
50  FLA_Obj u21_t,
51  u21_b;
52 
53  FLA_Datatype datatype_A;
54  dim_t m_A;
55  dim_t b_alg;
56 
57 
58  b_alg = FLA_Obj_length( T );
59 
60  datatype_A = FLA_Obj_datatype( A );
61  m_A = FLA_Obj_length( A );
62 
63  FLA_Obj_create( datatype_A, m_A, 1, 0, 0, &w );
64 
65  FLA_Set( FLA_ZERO, U );
66  FLA_Set( FLA_ZERO, Z );
67 
68  FLA_Part_2x2( A, &ATL, &ATR,
69  &ABL, &ABR, 0, 0, FLA_TL );
70  FLA_Part_2x2( U, &UTL, &UTR,
71  &UBL, &UBR, 0, 0, FLA_TL );
72  FLA_Part_2x2( Z, &ZTL, &ZTR,
73  &ZBL, &ZBR, 0, 0, FLA_TL );
74  FLA_Part_2x2( T, &TTL, &TTR,
75  &TBL, &TBR, 0, 0, FLA_TL );
76  FLA_Part_2x1( w, &wT,
77  &wB, 0, FLA_TOP );
78 
79  while ( FLA_Obj_length( ATL ) < b_alg )
80  {
81  FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &a01, &A02,
82  /* ************* */ /* ************************** */
83  &a10t, /**/ &alpha11, &a12t,
84  ABL, /**/ ABR, &A20, /**/ &a21, &A22,
85  1, 1, FLA_BR );
86  FLA_Repart_2x2_to_3x3( UTL, /**/ UTR, &U00, /**/ &u01, &U02,
87  /* ************* */ /* **************************** */
88  &u10t, /**/ &upsilon11, &u12t,
89  UBL, /**/ UBR, &U20, /**/ &u21, &U22,
90  1, 1, FLA_BR );
91  FLA_Repart_2x2_to_3x3( ZTL, /**/ ZTR, &Z00, /**/ &z01, &Z02,
92  /* ************* */ /* ************************* */
93  &z10t, /**/ &zeta11, &z12t,
94  ZBL, /**/ ZBR, &Z20, /**/ &z21, &Z22,
95  1, 1, FLA_BR );
96  FLA_Repart_2x2_to_3x3( TTL, /**/ TTR, &T00, /**/ &t01, &T02,
97  /* ************* */ /* ************************** */
98  &t10t, /**/ &tau11, &t12t,
99  TBL, /**/ TBR, &T20, /**/ &t21, &T22,
100  1, 1, FLA_BR );
101  FLA_Repart_2x1_to_3x1( wT, &w0,
102  /* ** */ /* ****** */
103  &omega1,
104  wB, &w2, 1, FLA_BOTTOM );
105 
106  /*------------------------------------------------------------*/
107 
108  if ( FLA_Obj_length( ATL ) > 0 )
109  {
110  // w0 = inv( triu( T00 ) ) * u10t';
111  FLA_Copyt( FLA_CONJ_TRANSPOSE, u10t, w0 );
112  FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
113  T00, w0 );
114 
115  // a01 = a01 - Z00 * w0;
116  // alpha11 = alpha11 - z10t * w0;
117  // a21 = a21 - Z20 * w0;
118  FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z00, w0, FLA_ONE, a01 );
119  FLA_Dots( FLA_MINUS_ONE, z10t, w0, FLA_ONE, alpha11 );
120  FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, Z20, w0, FLA_ONE, a21 );
121 
122  // w0 = inv( triu( T00 ) )' * ( U00' * a01 + u10t' * alpha11 + U20' * a21 );
123  FLA_Trmvsx( FLA_LOWER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
124  FLA_ONE, U00, a01, FLA_ZERO, w0 );
125  FLA_Axpyt( FLA_CONJ_TRANSPOSE, alpha11, u10t, w0 );
126  FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, U20, a21, FLA_ONE, w0 );
127 
128  FLA_Trsv( FLA_UPPER_TRIANGULAR, FLA_CONJ_TRANSPOSE, FLA_NONUNIT_DIAG,
129  T00, w0 );
130 
131  // a01 = a01 - U00 * w0;
132  // alpha11 = alpha11 - u10t * w0;
133  // a21 = a21 - U20 * w0;
134  FLA_Trmvsx( FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_NONUNIT_DIAG,
135  FLA_MINUS_ONE, U00, w0, FLA_ONE, a01 );
136  FLA_Dots( FLA_MINUS_ONE, u10t, w0, FLA_ONE, alpha11 );
137  FLA_Gemv( FLA_NO_TRANSPOSE, FLA_MINUS_ONE, U20, w0, FLA_ONE, a21 );
138  }
139 
140  if ( FLA_Obj_length( a21 ) > 0 )
141  {
142  FLA_Part_2x1( a21, &a21_t,
143  &a21_b, 1, FLA_TOP );
144 
145  // [ u21, tau11, a21 ] = House( a21 );
146  FLA_Househ2_UT( FLA_LEFT,
147  a21_t,
148  a21_b, tau11 );
149 
150  // u21 := a21;
151  FLA_Copy( a21, u21 );
152 
153  // Explicitly set the first element of the Householder vector so we
154  // can use it in regular computations.
155  FLA_Part_2x1( u21, &u21_t,
156  &u21_b, 1, FLA_TOP );
157  FLA_Set( FLA_ONE, u21_t );
158 
159  // z01 = A02 * u21;
160  // zeta11 = a12t * u21;
161  // z21 = A22 * u21;
162  FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A02, u21, FLA_ZERO, z01 );
163  FLA_Dot( a12t, u21, zeta11 );
164  FLA_Gemv( FLA_NO_TRANSPOSE, FLA_ONE, A22, u21, FLA_ZERO, z21 );
165 
166  // t01 = U20' * u21;
167  FLA_Gemv( FLA_CONJ_TRANSPOSE, FLA_ONE, U20, u21, FLA_ZERO, t01 );
168  }
169 
170  /*------------------------------------------------------------*/
171 
172  FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, a01, /**/ A02,
173  a10t, alpha11, /**/ a12t,
174  /* ************** */ /* ************************ */
175  &ABL, /**/ &ABR, A20, a21, /**/ A22,
176  FLA_TL );
177  FLA_Cont_with_3x3_to_2x2( &UTL, /**/ &UTR, U00, u01, /**/ U02,
178  u10t, upsilon11, /**/ u12t,
179  /* ************** */ /* ************************** */
180  &UBL, /**/ &UBR, U20, u21, /**/ U22,
181  FLA_TL );
182  FLA_Cont_with_3x3_to_2x2( &ZTL, /**/ &ZTR, Z00, z01, /**/ Z02,
183  z10t, zeta11, /**/ z12t,
184  /* ************** */ /* *********************** */
185  &ZBL, /**/ &ZBR, Z20, z21, /**/ Z22,
186  FLA_TL );
187  FLA_Cont_with_3x3_to_2x2( &TTL, /**/ &TTR, T00, t01, /**/ T02,
188  t10t, tau11, /**/ t12t,
189  /* ************** */ /* ************************ */
190  &TBL, /**/ &TBR, T20, t21, /**/ T22,
191  FLA_TL );
192  FLA_Cont_with_3x1_to_2x1( &wT, w0,
193  omega1,
194  /* ** */ /* ****** */
195  &wB, w2, FLA_TOP );
196  }
197 
198  FLA_Obj_free( &w );
199 
200  return FLA_SUCCESS;
201 }
FLA_Error FLA_Dots(FLA_Obj alpha, FLA_Obj x, FLA_Obj y, FLA_Obj beta, FLA_Obj rho)
Definition: FLA_Dots.c:13
FLA_Error FLA_Trmvsx(FLA_Uplo uplo, FLA_Trans transa, FLA_Diag diag, FLA_Obj alpha, FLA_Obj A, FLA_Obj x, FLA_Obj beta, FLA_Obj y)
Definition: FLA_Trmvsx.c:13
FLA_Error FLA_Trsv(FLA_Uplo uplo, FLA_Trans transa, FLA_Diag diag, FLA_Obj A, FLA_Obj x)
Definition: FLA_Trsv.c:15

References FLA_Axpyt(), FLA_Cont_with_3x1_to_2x1(), FLA_Cont_with_3x3_to_2x2(), FLA_Copy(), FLA_Copyt(), FLA_Dot(), FLA_Dots(), FLA_Gemv(), FLA_Househ2_UT(), FLA_MINUS_ONE, FLA_Obj_create(), FLA_Obj_datatype(), FLA_Obj_free(), FLA_Obj_length(), FLA_ONE, FLA_Part_2x1(), FLA_Part_2x2(), FLA_Repart_2x1_to_3x1(), FLA_Repart_2x2_to_3x3(), FLA_Set(), FLA_Trmvsx(), FLA_Trsv(), FLA_ZERO, and omega1.

Referenced by FLA_Hess_UT_unb_var5().

◆ FLA_Hess_UT_unb_var1()

FLA_Error FLA_Hess_UT_unb_var1 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  return FLA_Hess_UT_step_unb_var1( A, T );
16 }
FLA_Error FLA_Hess_UT_step_unb_var1(FLA_Obj A, FLA_Obj T)
Definition: FLA_Hess_UT_unb_var1.c:18

References FLA_Hess_UT_step_unb_var1().

Referenced by FLA_Hess_UT_internal().

◆ FLA_Hess_UT_unb_var2()

FLA_Error FLA_Hess_UT_unb_var2 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  return FLA_Hess_UT_step_unb_var2( A, T );
16 }
FLA_Error FLA_Hess_UT_step_unb_var2(FLA_Obj A, FLA_Obj T)
Definition: FLA_Hess_UT_unb_var2.c:18

References FLA_Hess_UT_step_unb_var2().

Referenced by FLA_Hess_UT_internal().

◆ FLA_Hess_UT_unb_var3()

FLA_Error FLA_Hess_UT_unb_var3 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  return FLA_Hess_UT_step_unb_var3( A, T );
16 }
FLA_Error FLA_Hess_UT_step_unb_var3(FLA_Obj A, FLA_Obj T)
Definition: FLA_Hess_UT_unb_var3.c:18

References FLA_Hess_UT_step_unb_var3().

Referenced by FLA_Hess_UT_internal().

◆ FLA_Hess_UT_unb_var4()

FLA_Error FLA_Hess_UT_unb_var4 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  FLA_Error r_val;
16  FLA_Obj Y, Z;
17 
18  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &Y );
19  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &Z );
20 
21  r_val = FLA_Hess_UT_step_unb_var4( A, Y, Z, T );
22 
23  FLA_Obj_free( &Y );
24  FLA_Obj_free( &Z );
25 
26  return r_val;
27 }
FLA_Error FLA_Hess_UT_step_unb_var4(FLA_Obj A, FLA_Obj Y, FLA_Obj Z, FLA_Obj T)
Definition: FLA_Hess_UT_unb_var4.c:29

References FLA_Hess_UT_step_unb_var4(), FLA_Obj_create_conf_to(), and FLA_Obj_free().

Referenced by FLA_Hess_UT_internal().

◆ FLA_Hess_UT_unb_var5()

FLA_Error FLA_Hess_UT_unb_var5 ( FLA_Obj  A,
FLA_Obj  T 
)
14 {
15  FLA_Error r_val;
16  FLA_Obj U, Z;
17 
18  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &U );
19  FLA_Obj_create_conf_to( FLA_NO_TRANSPOSE, A, &Z );
20 
21  r_val = FLA_Hess_UT_step_unb_var5( A, U, Z, T );
22 
23  FLA_Obj_free( &U );
24  FLA_Obj_free( &Z );
25 
26  return r_val;
27 }
FLA_Error FLA_Hess_UT_step_unb_var5(FLA_Obj A, FLA_Obj U, FLA_Obj Z, FLA_Obj T)
Definition: FLA_Hess_UT_unb_var5.c:29

References FLA_Hess_UT_step_unb_var5(), FLA_Obj_create_conf_to(), and FLA_Obj_free().

Referenced by FLA_Hess_UT_internal().