libflame  revision_anchor
Functions
FLA_Apply_G_rf.h File Reference

(r)

Go to the source code of this file.

Functions

FLA_Error FLA_Apply_G_rf_opt_var1 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ops_var1 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opd_var1 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opc_var1 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opz_var1 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asm_var1 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ass_var1 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asd_var1 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asc_var1 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asz_var1 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_blk_var1 (FLA_Obj G, FLA_Obj A, dim_t b_alg)
 
FLA_Error FLA_Apply_G_rf_bls_var1 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_bld_var1 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blc_var1 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blz_var1 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_opt_var2 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ops_var2 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opd_var2 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opc_var2 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opz_var2 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asm_var2 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ass_var2 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asd_var2 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asc_var2 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asz_var2 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_blk_var2 (FLA_Obj G, FLA_Obj A, dim_t b_alg)
 
FLA_Error FLA_Apply_G_rf_bls_var2 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_bld_var2 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blc_var2 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blz_var2 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_opt_var3 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ops_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opd_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opc_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opz_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asm_var3 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ass_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asd_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asc_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asz_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_blk_var3 (FLA_Obj G, FLA_Obj A, dim_t b_alg)
 
FLA_Error FLA_Apply_G_rf_bls_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_bld_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blc_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blz_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_opt_var4 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ops_var4 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opd_var4 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opc_var4 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opz_var4 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asm_var4 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ass_var4 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asd_var4 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asc_var4 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asz_var4 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_blk_var4 (FLA_Obj G, FLA_Obj A, dim_t b_alg)
 
FLA_Error FLA_Apply_G_rf_bls_var4 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_bld_var4 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blc_var4 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blz_var4 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_opt_var5 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ops_var5 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opd_var5 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opc_var5 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opz_var5 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asm_var5 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ass_var5 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asd_var5 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asc_var5 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asz_var5 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_blk_var5 (FLA_Obj G, FLA_Obj A, dim_t b_alg)
 
FLA_Error FLA_Apply_G_rf_bls_var5 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_bld_var5 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blc_var5 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blz_var5 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_opt_var6 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ops_var6 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opd_var6 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opc_var6 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opz_var6 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asm_var6 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ass_var6 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asd_var6 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asc_var6 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asz_var6 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_blk_var6 (FLA_Obj G, FLA_Obj A, dim_t b_alg)
 
FLA_Error FLA_Apply_G_rf_bls_var6 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_bld_var6 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blc_var6 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blz_var6 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_opt_var7 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ops_var7 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opd_var7 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opc_var7 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opz_var7 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asm_var7 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ass_var7 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asd_var7 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asc_var7 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asz_var7 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_blk_var7 (FLA_Obj G, FLA_Obj A, dim_t b_alg)
 
FLA_Error FLA_Apply_G_rf_bls_var7 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_bld_var7 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blc_var7 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blz_var7 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_opt_var8 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ops_var8 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opd_var8 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opc_var8 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opz_var8 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asm_var8 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ass_var8 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asd_var8 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asc_var8 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asz_var8 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_blk_var8 (FLA_Obj G, FLA_Obj A, dim_t b_alg)
 
FLA_Error FLA_Apply_G_rf_bls_var8 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_bld_var8 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blc_var8 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blz_var8 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_opt_var9 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ops_var9 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opd_var9 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opc_var9 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_opz_var9 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asm_var9 (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ass_var9 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asd_var9 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asc_var9 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asz_var9 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_blk_var9 (FLA_Obj G, FLA_Obj A, dim_t b_alg)
 
FLA_Error FLA_Apply_G_rf_bls_var9 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_bld_var9 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blc_var9 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blz_var9 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_asm_var3b (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ass_var3b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asd_var3b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asc_var3b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asz_var3b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_blk_var3b (FLA_Obj G, FLA_Obj A, dim_t b_alg)
 
FLA_Error FLA_Apply_G_rf_bls_var3b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_bld_var3b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blc_var3b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blz_var3b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_asm_var5b (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ass_var5b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asd_var5b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asc_var5b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asz_var5b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_blk_var5b (FLA_Obj G, FLA_Obj A, dim_t b_alg)
 
FLA_Error FLA_Apply_G_rf_bls_var5b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_bld_var5b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blc_var5b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blz_var5b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_asm_var6b (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ass_var6b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asd_var6b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asc_var6b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asz_var6b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_blk_var6b (FLA_Obj G, FLA_Obj A, dim_t b_alg)
 
FLA_Error FLA_Apply_G_rf_bls_var6b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_bld_var6b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blc_var6b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blz_var6b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_asm_var8b (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ass_var8b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asd_var8b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asc_var8b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asz_var8b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_blk_var8b (FLA_Obj G, FLA_Obj A, dim_t b_alg)
 
FLA_Error FLA_Apply_G_rf_bls_var8b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_bld_var8b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blc_var8b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blz_var8b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_bhs_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_bhd_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_bhc_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_bhz_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, FLA_Obj *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_asm_var9b (FLA_Obj G, FLA_Obj A)
 
FLA_Error FLA_Apply_G_rf_ass_var9b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asd_var9b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asc_var9b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_asz_var9b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
 
FLA_Error FLA_Apply_G_rf_blk_var9b (FLA_Obj G, FLA_Obj A, dim_t b_alg)
 
FLA_Error FLA_Apply_G_rf_bls_var9b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_bld_var9b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blc_var9b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
 
FLA_Error FLA_Apply_G_rf_blz_var9b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
 

Function Documentation

◆ FLA_Apply_G_rf_asc_var1()

FLA_Error FLA_Apply_G_rf_asc_var1 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
220 {
221  float one = bl1_s1();
222  float zero = bl1_s0();
223  int nG_app = n_A - 1;
224  int l, j;
225  float gamma;
226  float sigma;
227  scomplex* a1;
228  scomplex* a2;
229  scomplex* g1;
230  scomplex* g11;
231 
232  g1 = buff_G;
233 
234  for ( l = 0; l < k_G; ++l )
235  {
236  a1 = buff_A;
237  a2 = buff_A + cs_A;
238  g11 = g1;
239 
240  for ( j = 0; j < nG_app; ++j )
241  {
242  gamma = g11->real;
243  sigma = g11->imag;
244 
245  // Skip the current iteration if the rotation is identity.
246  if ( gamma != one || sigma != zero )
247  {
248  MAC_Apply_G_mx2_asc( m_A,
249  &gamma,
250  &sigma,
251  a1, 1,
252  a2, 1 );
253  }
254 
255  a1 += cs_A;
256  a2 += cs_A;
257  g11 += rs_G;
258  }
259 
260  g1 += cs_G;
261  }
262 
263  return FLA_SUCCESS;
264 }
float bl1_s0(void)
Definition: bl1_constants.c:111
float bl1_s1(void)
Definition: bl1_constants.c:47
Definition: blis_type_defs.h:133
float imag
Definition: blis_type_defs.h:134
float real
Definition: blis_type_defs.h:134

References bl1_s0(), bl1_s1(), scomplex::imag, and scomplex::real.

Referenced by FLA_Apply_G_rf_asc_var2(), FLA_Apply_G_rf_asc_var3(), FLA_Apply_G_rf_asc_var6(), FLA_Apply_G_rf_asc_var9(), FLA_Apply_G_rf_asm_var1(), and FLA_Apply_G_rf_blc_var1().

◆ FLA_Apply_G_rf_asc_var2()

FLA_Error FLA_Apply_G_rf_asc_var2 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
344 {
345  float one = bl1_s1();
346  float zero = bl1_s0();
347  float gamma;
348  float sigma;
349  scomplex* a1;
350  scomplex* a2;
351  scomplex* g11;
352  int j, g, k;
353  int nG, nG_app;
354  int k_minus_1;
355 
356  k_minus_1 = k_G - 1;
357  nG = n_A - 1;
358 
359  // Use the simple variant for nG < 2(k - 1).
360  if ( nG < k_minus_1 || k_G == 1 )
361  {
363  m_A,
364  n_A,
365  buff_G, rs_G, cs_G,
366  buff_A, rs_A, cs_A );
367  return FLA_SUCCESS;
368  }
369 
370 
371  // Start-up phase.
372 
373  for ( j = 0; j < k_minus_1; ++j )
374  {
375  nG_app = j + 1;
376 
377  for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g )
378  {
379  g11 = buff_G + (g )*rs_G + (k )*cs_G;
380  a1 = buff_A + (g )*cs_A;
381  a2 = buff_A + (g + 1)*cs_A;
382 
383  gamma = g11->real;
384  sigma = g11->imag;
385 
386  // Skip the current iteration if the rotation is identity.
387  if ( gamma == one && sigma == zero ) continue;
388 
389  MAC_Apply_G_mx2_asc( m_A,
390  &gamma,
391  &sigma,
392  a1, 1,
393  a2, 1 );
394  }
395  }
396 
397  // Pipeline stage
398 
399  for ( j = k_minus_1; j < nG; ++j )
400  {
401  nG_app = k_G;
402 
403  for ( k = 0, g = j; k < nG_app; ++k, --g )
404  {
405  g11 = buff_G + (g )*rs_G + (k )*cs_G;
406  a1 = buff_A + (g )*cs_A;
407  a2 = buff_A + (g + 1)*cs_A;
408 
409  gamma = g11->real;
410  sigma = g11->imag;
411 
412  // Skip the current iteration if the rotation is identity.
413  if ( gamma == one && sigma == zero ) continue;
414 
415  MAC_Apply_G_mx2_asc( m_A,
416  &gamma,
417  &sigma,
418  a1, 1,
419  a2, 1 );
420  }
421  }
422 
423  // Shutdown stage
424 
425  for ( j = nG - k_minus_1; j < nG; ++j )
426  {
427  nG_app = nG - j;
428 
429  for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g )
430  {
431  g11 = buff_G + (g )*rs_G + (k )*cs_G;
432  a1 = buff_A + (g )*cs_A;
433  a2 = buff_A + (g + 1)*cs_A;
434 
435  gamma = g11->real;
436  sigma = g11->imag;
437 
438  // Skip the current iteration if the rotation is identity.
439  if ( gamma == one && sigma == zero ) continue;
440 
441  MAC_Apply_G_mx2_asc( m_A,
442  &gamma,
443  &sigma,
444  a1, 1,
445  a2, 1 );
446  }
447  }
448 
449  return FLA_SUCCESS;
450 }
FLA_Error FLA_Apply_G_rf_asc_var1(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var1.c:215

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_asc_var1(), scomplex::imag, and scomplex::real.

Referenced by FLA_Apply_G_rf_asm_var2(), and FLA_Apply_G_rf_blc_var2().

◆ FLA_Apply_G_rf_asc_var3()

FLA_Error FLA_Apply_G_rf_asc_var3 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
1024 {
1025  float one = bl1_s1();
1026  float zero = bl1_s0();
1027  float gamma23_k1;
1028  float sigma23_k1;
1029  float gamma34_k1;
1030  float sigma34_k1;
1031  float gamma12_k2;
1032  float sigma12_k2;
1033  float gamma23_k2;
1034  float sigma23_k2;
1035  scomplex* a1;
1036  scomplex* a2;
1037  scomplex* a3;
1038  scomplex* a4;
1039  scomplex* g23_k1;
1040  scomplex* g34_k1;
1041  scomplex* g12_k2;
1042  scomplex* g23_k2;
1043  int i, j, g, k;
1044  int nG, nG_app;
1045  int n_iter;
1046  int n_left;
1047  int k_minus_1;
1048  int n_fuse;
1049  int k_fuse;
1050  int is_ident23_k1, is_ident34_k1;
1051  int is_ident12_k2, is_ident23_k2;
1052  int has_ident;
1053 
1054  k_minus_1 = k_G - 1;
1055  nG = n_A - 1;
1056  n_fuse = 2;
1057  k_fuse = 2;
1058 
1059  // Use the simple variant for nG < (k - 1) or k == 1.
1060  if ( nG < 2*k_minus_1 || k_G == 1 )
1061  {
1063  m_A,
1064  n_A,
1065  buff_G, rs_G, cs_G,
1066  buff_A, rs_A, cs_A );
1067  return FLA_SUCCESS;
1068  }
1069 
1070 
1071  // Start-up phase.
1072 
1073  for ( j = -1; j < k_minus_1; j += n_fuse )
1074  {
1075  nG_app = j + 2;
1076  n_iter = nG_app / k_fuse;
1077  //n_iter = nG_app % k_fuse;
1078  n_left = 1;
1079 
1080  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1081  {
1082  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1083  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1084  g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1085  g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1086  a1 = buff_A + (g - 1)*cs_A;
1087  a2 = buff_A + (g )*cs_A;
1088  a3 = buff_A + (g + 1)*cs_A;
1089  a4 = buff_A + (g + 2)*cs_A;
1090 
1091  gamma23_k1 = g23_k1->real;
1092  sigma23_k1 = g23_k1->imag;
1093  gamma34_k1 = g34_k1->real;
1094  sigma34_k1 = g34_k1->imag;
1095  gamma12_k2 = g12_k2->real;
1096  sigma12_k2 = g12_k2->imag;
1097  gamma23_k2 = g23_k2->real;
1098  sigma23_k2 = g23_k2->imag;
1099 
1100  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
1101  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
1102  is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
1103  is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
1104  has_ident = ( is_ident23_k1 || is_ident34_k1 ||
1105  is_ident12_k2 || is_ident23_k2 );
1106 
1107  if ( has_ident )
1108  {
1109  // Apply to pairs of columns as needed.
1110 
1111  if ( !is_ident23_k1 )
1112  MAC_Apply_G_mx2_asc( m_A,
1113  &gamma23_k1,
1114  &sigma23_k1,
1115  a2, 1,
1116  a3, 1 );
1117 
1118  if ( !is_ident34_k1 )
1119  MAC_Apply_G_mx2_asc( m_A,
1120  &gamma34_k1,
1121  &sigma34_k1,
1122  a3, 1,
1123  a4, 1 );
1124 
1125  if ( !is_ident12_k2 )
1126  MAC_Apply_G_mx2_asc( m_A,
1127  &gamma12_k2,
1128  &sigma12_k2,
1129  a1, 1,
1130  a2, 1 );
1131 
1132  if ( !is_ident23_k2 )
1133  MAC_Apply_G_mx2_asc( m_A,
1134  &gamma23_k2,
1135  &sigma23_k2,
1136  a2, 1,
1137  a3, 1 );
1138  }
1139  else
1140  {
1141  // Apply to all four columns.
1142 
1143  MAC_Apply_G_mx4s_asc( m_A,
1144  &gamma23_k1,
1145  &sigma23_k1,
1146  &gamma34_k1,
1147  &sigma34_k1,
1148  &gamma12_k2,
1149  &sigma12_k2,
1150  &gamma23_k2,
1151  &sigma23_k2,
1152  a1, 1,
1153  a2, 1,
1154  a3, 1,
1155  a4, 1 );
1156  }
1157  }
1158 
1159  if ( n_left == 1 )
1160  {
1161  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1162  a3 = buff_A + (g + 1)*cs_A;
1163  a4 = buff_A + (g + 2)*cs_A;
1164 
1165  gamma34_k1 = g34_k1->real;
1166  sigma34_k1 = g34_k1->imag;
1167 
1168  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
1169 
1170  if ( !is_ident34_k1 )
1171  MAC_Apply_G_mx2_asc( m_A,
1172  &gamma34_k1,
1173  &sigma34_k1,
1174  a3, 1,
1175  a4, 1 );
1176  }
1177  }
1178 
1179  // Pipeline stage
1180 
1181  for ( ; j < nG - 1; j += n_fuse )
1182  {
1183  nG_app = k_G;
1184  n_iter = nG_app / k_fuse;
1185  n_left = nG_app % k_fuse;
1186 
1187  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1188  {
1189  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1190  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1191  g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1192  g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1193  a1 = buff_A + (g - 1)*cs_A;
1194  a2 = buff_A + (g )*cs_A;
1195  a3 = buff_A + (g + 1)*cs_A;
1196  a4 = buff_A + (g + 2)*cs_A;
1197 
1198  gamma23_k1 = g23_k1->real;
1199  sigma23_k1 = g23_k1->imag;
1200  gamma34_k1 = g34_k1->real;
1201  sigma34_k1 = g34_k1->imag;
1202  gamma12_k2 = g12_k2->real;
1203  sigma12_k2 = g12_k2->imag;
1204  gamma23_k2 = g23_k2->real;
1205  sigma23_k2 = g23_k2->imag;
1206 
1207  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
1208  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
1209  is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
1210  is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
1211  has_ident = ( is_ident23_k1 || is_ident34_k1 ||
1212  is_ident12_k2 || is_ident23_k2 );
1213 
1214  if ( has_ident )
1215  {
1216  // Apply to pairs of columns as needed.
1217 
1218  if ( !is_ident23_k1 )
1219  MAC_Apply_G_mx2_asc( m_A,
1220  &gamma23_k1,
1221  &sigma23_k1,
1222  a2, 1,
1223  a3, 1 );
1224 
1225  if ( !is_ident34_k1 )
1226  MAC_Apply_G_mx2_asc( m_A,
1227  &gamma34_k1,
1228  &sigma34_k1,
1229  a3, 1,
1230  a4, 1 );
1231 
1232  if ( !is_ident12_k2 )
1233  MAC_Apply_G_mx2_asc( m_A,
1234  &gamma12_k2,
1235  &sigma12_k2,
1236  a1, 1,
1237  a2, 1 );
1238 
1239  if ( !is_ident23_k2 )
1240  MAC_Apply_G_mx2_asc( m_A,
1241  &gamma23_k2,
1242  &sigma23_k2,
1243  a2, 1,
1244  a3, 1 );
1245  }
1246  else
1247  {
1248  // Apply to all four columns.
1249 
1250  MAC_Apply_G_mx4s_asc( m_A,
1251  &gamma23_k1,
1252  &sigma23_k1,
1253  &gamma34_k1,
1254  &sigma34_k1,
1255  &gamma12_k2,
1256  &sigma12_k2,
1257  &gamma23_k2,
1258  &sigma23_k2,
1259  a1, 1,
1260  a2, 1,
1261  a3, 1,
1262  a4, 1 );
1263  }
1264  }
1265 
1266  if ( n_left == 1 )
1267  {
1268  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1269  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1270  a2 = buff_A + (g )*cs_A;
1271  a3 = buff_A + (g + 1)*cs_A;
1272  a4 = buff_A + (g + 2)*cs_A;
1273 
1274  gamma23_k1 = g23_k1->real;
1275  sigma23_k1 = g23_k1->imag;
1276  gamma34_k1 = g34_k1->real;
1277  sigma34_k1 = g34_k1->imag;
1278 
1279  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
1280  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
1281 
1282  if ( !is_ident23_k1 && is_ident34_k1 )
1283  {
1284  MAC_Apply_G_mx2_asc( m_A,
1285  &gamma23_k1,
1286  &sigma23_k1,
1287  a2, 1,
1288  a3, 1 );
1289  }
1290  else if ( is_ident23_k1 && !is_ident34_k1 )
1291  {
1292  MAC_Apply_G_mx2_asc( m_A,
1293  &gamma34_k1,
1294  &sigma34_k1,
1295  a3, 1,
1296  a4, 1 );
1297  }
1298  else
1299  {
1300  MAC_Apply_G_mx3_asc( m_A,
1301  &gamma23_k1,
1302  &sigma23_k1,
1303  &gamma34_k1,
1304  &sigma34_k1,
1305  a2, 1,
1306  a3, 1,
1307  a4, 1 );
1308  }
1309  }
1310  }
1311 
1312  // Shutdown stage
1313 
1314  for ( j = nG % n_fuse; j < k_G; j += n_fuse )
1315  {
1316  g = nG - 1;
1317  k = j;
1318 
1319  //n_left = 1;
1320  //if ( n_left == 1 )
1321  {
1322  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1323  a2 = buff_A + (g )*cs_A;
1324  a3 = buff_A + (g + 1)*cs_A;
1325 
1326  gamma23_k1 = g23_k1->real;
1327  sigma23_k1 = g23_k1->imag;
1328 
1329  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
1330 
1331  if ( !is_ident23_k1 )
1332  MAC_Apply_G_mx2_asc( m_A,
1333  &gamma23_k1,
1334  &sigma23_k1,
1335  a2, 1,
1336  a3, 1 );
1337  ++k;
1338  --g;
1339  }
1340 
1341  nG_app = k_minus_1 - j;
1342  n_iter = nG_app / k_fuse;
1343  n_left = nG_app % k_fuse;
1344 
1345  for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1346  {
1347  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1348  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1349  g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1350  g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1351  a1 = buff_A + (g - 1)*cs_A;
1352  a2 = buff_A + (g )*cs_A;
1353  a3 = buff_A + (g + 1)*cs_A;
1354  a4 = buff_A + (g + 2)*cs_A;
1355 
1356  gamma23_k1 = g23_k1->real;
1357  sigma23_k1 = g23_k1->imag;
1358  gamma34_k1 = g34_k1->real;
1359  sigma34_k1 = g34_k1->imag;
1360  gamma12_k2 = g12_k2->real;
1361  sigma12_k2 = g12_k2->imag;
1362  gamma23_k2 = g23_k2->real;
1363  sigma23_k2 = g23_k2->imag;
1364 
1365  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
1366  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
1367  is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
1368  is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
1369  has_ident = ( is_ident23_k1 || is_ident34_k1 ||
1370  is_ident12_k2 || is_ident23_k2 );
1371 
1372  if ( has_ident )
1373  {
1374  // Apply to pairs of columns as needed.
1375 
1376  if ( !is_ident23_k1 )
1377  MAC_Apply_G_mx2_asc( m_A,
1378  &gamma23_k1,
1379  &sigma23_k1,
1380  a2, 1,
1381  a3, 1 );
1382 
1383  if ( !is_ident34_k1 )
1384  MAC_Apply_G_mx2_asc( m_A,
1385  &gamma34_k1,
1386  &sigma34_k1,
1387  a3, 1,
1388  a4, 1 );
1389 
1390  if ( !is_ident12_k2 )
1391  MAC_Apply_G_mx2_asc( m_A,
1392  &gamma12_k2,
1393  &sigma12_k2,
1394  a1, 1,
1395  a2, 1 );
1396 
1397  if ( !is_ident23_k2 )
1398  MAC_Apply_G_mx2_asc( m_A,
1399  &gamma23_k2,
1400  &sigma23_k2,
1401  a2, 1,
1402  a3, 1 );
1403  }
1404  else
1405  {
1406  // Apply to all four columns.
1407 
1408  MAC_Apply_G_mx4s_asc( m_A,
1409  &gamma23_k1,
1410  &sigma23_k1,
1411  &gamma34_k1,
1412  &sigma34_k1,
1413  &gamma12_k2,
1414  &sigma12_k2,
1415  &gamma23_k2,
1416  &sigma23_k2,
1417  a1, 1,
1418  a2, 1,
1419  a3, 1,
1420  a4, 1 );
1421  }
1422  }
1423 
1424  if ( n_left == 1 )
1425  {
1426  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1427  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1428  a2 = buff_A + (g )*cs_A;
1429  a3 = buff_A + (g + 1)*cs_A;
1430  a4 = buff_A + (g + 2)*cs_A;
1431 
1432  gamma23_k1 = g23_k1->real;
1433  sigma23_k1 = g23_k1->imag;
1434  gamma34_k1 = g34_k1->real;
1435  sigma34_k1 = g34_k1->imag;
1436 
1437  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
1438  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
1439 
1440  if ( !is_ident23_k1 && is_ident34_k1 )
1441  {
1442  MAC_Apply_G_mx2_asc( m_A,
1443  &gamma23_k1,
1444  &sigma23_k1,
1445  a2, 1,
1446  a3, 1 );
1447  }
1448  else if ( is_ident23_k1 && !is_ident34_k1 )
1449  {
1450  MAC_Apply_G_mx2_asc( m_A,
1451  &gamma34_k1,
1452  &sigma34_k1,
1453  a3, 1,
1454  a4, 1 );
1455  }
1456  else
1457  {
1458  MAC_Apply_G_mx3_asc( m_A,
1459  &gamma23_k1,
1460  &sigma23_k1,
1461  &gamma34_k1,
1462  &sigma34_k1,
1463  a2, 1,
1464  a3, 1,
1465  a4, 1 );
1466  }
1467  }
1468  }
1469 
1470  return FLA_SUCCESS;
1471 }
int n_left
Definition: bl1_axmyv2.c:149
int i
Definition: bl1_axmyv2.c:145

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_asc_var1(), i, scomplex::imag, n_left, and scomplex::real.

Referenced by FLA_Apply_G_rf_asm_var3().

◆ FLA_Apply_G_rf_asc_var3b()

FLA_Error FLA_Apply_G_rf_asc_var3b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
614 {
615  FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
616 
617  return FLA_SUCCESS;
618 }

Referenced by FLA_Apply_G_rf_asm_var3b().

◆ FLA_Apply_G_rf_asc_var4()

FLA_Error FLA_Apply_G_rf_asc_var4 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_asc_var5()

FLA_Error FLA_Apply_G_rf_asc_var5 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_asc_var5b()

FLA_Error FLA_Apply_G_rf_asc_var5b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_asc_var6()

FLA_Error FLA_Apply_G_rf_asc_var6 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
680 {
681  float one = bl1_s1();
682  float zero = bl1_s0();
683  float gamma12;
684  float sigma12;
685  float gamma23;
686  float sigma23;
687  scomplex* a1;
688  scomplex* a2;
689  scomplex* a3;
690  scomplex* g12;
691  scomplex* g23;
692  int i, j, g, k;
693  int nG, nG_app;
694  int n_iter;
695  int n_left;
696  int k_minus_1;
697  int n_fuse;
698  int is_ident12, is_ident23;
699 
700  k_minus_1 = k_G - 1;
701  nG = n_A - 1;
702  n_fuse = 2;
703 
704  // Use the simple variant for nG < (k - 1) or k == 1.
705  if ( nG < k_minus_1 || k_G == 1 )
706  {
708  m_A,
709  n_A,
710  buff_G, rs_G, cs_G,
711  buff_A, rs_A, cs_A );
712  return FLA_SUCCESS;
713  }
714 
715 
716  // Start-up phase.
717 
718  for ( j = 0; j < k_minus_1; ++j )
719  {
720  nG_app = j + 1;
721  n_iter = nG_app / n_fuse;
722  n_left = nG_app % n_fuse;
723 
724  for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
725  {
726  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
727  g23 = buff_G + (g )*rs_G + (k )*cs_G;
728  a1 = buff_A + (g - 1)*cs_A;
729  a2 = buff_A + (g )*cs_A;
730  a3 = buff_A + (g + 1)*cs_A;
731 
732  gamma12 = g12->real;
733  sigma12 = g12->imag;
734  gamma23 = g23->real;
735  sigma23 = g23->imag;
736 
737  is_ident12 = ( gamma12 == one && sigma12 == zero );
738  is_ident23 = ( gamma23 == one && sigma23 == zero );
739 
740  if ( !is_ident12 && is_ident23 )
741  {
742  // Apply only to columns 1 and 2.
743 
744  MAC_Apply_G_mx2_asc( m_A,
745  &gamma12,
746  &sigma12,
747  a1, 1,
748  a2, 1 );
749  }
750  else if ( is_ident12 && !is_ident23 )
751  {
752  // Apply only to columns 2 and 3.
753 
754  MAC_Apply_G_mx2_asc( m_A,
755  &gamma23,
756  &sigma23,
757  a2, 1,
758  a3, 1 );
759  }
760  else if ( !is_ident12 && !is_ident23 )
761  {
762  // Apply to all three columns.
763 
764  MAC_Apply_G_mx3b_asc( m_A,
765  &gamma12,
766  &sigma12,
767  &gamma23,
768  &sigma23,
769  a1, 1,
770  a2, 1,
771  a3, 1 );
772  }
773  }
774 
775  if ( n_left == 1 )
776  {
777  g23 = buff_G + (g )*rs_G + (k )*cs_G;
778  a2 = buff_A + (g )*cs_A;
779  a3 = buff_A + (g + 1)*cs_A;
780 
781  gamma23 = g23->real;
782  sigma23 = g23->imag;
783 
784  is_ident23 = ( gamma23 == one && sigma23 == zero );
785 
786  if ( !is_ident23 )
787  MAC_Apply_G_mx2_asc( m_A,
788  &gamma23,
789  &sigma23,
790  a2, 1,
791  a3, 1 );
792  }
793  }
794 
795  // Pipeline stage
796 
797  for ( j = k_minus_1; j < nG; ++j )
798  {
799  nG_app = k_G;
800  n_iter = nG_app / n_fuse;
801  n_left = nG_app % n_fuse;
802 
803  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
804  {
805  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
806  g23 = buff_G + (g )*rs_G + (k )*cs_G;
807  a1 = buff_A + (g - 1)*cs_A;
808  a2 = buff_A + (g )*cs_A;
809  a3 = buff_A + (g + 1)*cs_A;
810 
811  gamma12 = g12->real;
812  sigma12 = g12->imag;
813  gamma23 = g23->real;
814  sigma23 = g23->imag;
815 
816  is_ident12 = ( gamma12 == one && sigma12 == zero );
817  is_ident23 = ( gamma23 == one && sigma23 == zero );
818 
819  if ( !is_ident12 && is_ident23 )
820  {
821  // Apply only to columns 1 and 2.
822 
823  MAC_Apply_G_mx2_asc( m_A,
824  &gamma12,
825  &sigma12,
826  a1, 1,
827  a2, 1 );
828  }
829  else if ( is_ident12 && !is_ident23 )
830  {
831  // Apply only to columns 2 and 3.
832 
833  MAC_Apply_G_mx2_asc( m_A,
834  &gamma23,
835  &sigma23,
836  a2, 1,
837  a3, 1 );
838  }
839  else if ( !is_ident12 && !is_ident23 )
840  {
841  // Apply to all three columns.
842 
843  MAC_Apply_G_mx3b_asc( m_A,
844  &gamma12,
845  &sigma12,
846  &gamma23,
847  &sigma23,
848  a1, 1,
849  a2, 1,
850  a3, 1 );
851  }
852  }
853 
854  if ( n_left == 1 )
855  {
856  g23 = buff_G + (g )*rs_G + (k )*cs_G;
857  a2 = buff_A + (g )*cs_A;
858  a3 = buff_A + (g + 1)*cs_A;
859 
860  gamma23 = g23->real;
861  sigma23 = g23->imag;
862 
863  is_ident23 = ( gamma23 == one && sigma23 == zero );
864 
865  if ( !is_ident23 )
866  MAC_Apply_G_mx2_asc( m_A,
867  &gamma23,
868  &sigma23,
869  a2, 1,
870  a3, 1 );
871  }
872  }
873 
874  // Shutdown stage
875 
876  for ( j = 1; j < k_G; ++j )
877  {
878  nG_app = k_G - j;
879  n_iter = nG_app / n_fuse;
880  n_left = nG_app % n_fuse;
881 
882  for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
883  {
884  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
885  g23 = buff_G + (g )*rs_G + (k )*cs_G;
886  a1 = buff_A + (g - 1)*cs_A;
887  a2 = buff_A + (g )*cs_A;
888  a3 = buff_A + (g + 1)*cs_A;
889 
890  gamma12 = g12->real;
891  sigma12 = g12->imag;
892  gamma23 = g23->real;
893  sigma23 = g23->imag;
894 
895  is_ident12 = ( gamma12 == one && sigma12 == zero );
896  is_ident23 = ( gamma23 == one && sigma23 == zero );
897 
898  if ( !is_ident12 && is_ident23 )
899  {
900  // Apply only to columns 1 and 2.
901 
902  MAC_Apply_G_mx2_asc( m_A,
903  &gamma12,
904  &sigma12,
905  a1, 1,
906  a2, 1 );
907  }
908  else if ( is_ident12 && !is_ident23 )
909  {
910  // Apply only to columns 2 and 3.
911 
912  MAC_Apply_G_mx2_asc( m_A,
913  &gamma23,
914  &sigma23,
915  a2, 1,
916  a3, 1 );
917  }
918  else if ( !is_ident12 && !is_ident23 )
919  {
920  // Apply to all three columns.
921 
922  MAC_Apply_G_mx3b_asc( m_A,
923  &gamma12,
924  &sigma12,
925  &gamma23,
926  &sigma23,
927  a1, 1,
928  a2, 1,
929  a3, 1 );
930  }
931  }
932 
933  if ( n_left == 1 )
934  {
935  g23 = buff_G + (g )*rs_G + (k )*cs_G;
936  a2 = buff_A + (g )*cs_A;
937  a3 = buff_A + (g + 1)*cs_A;
938 
939  gamma23 = g23->real;
940  sigma23 = g23->imag;
941 
942  is_ident23 = ( gamma23 == one && sigma23 == zero );
943 
944  if ( !is_ident23 )
945  MAC_Apply_G_mx2_asc( m_A,
946  &gamma23,
947  &sigma23,
948  a2, 1,
949  a3, 1 );
950  }
951  }
952 
953  return FLA_SUCCESS;
954 }

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_asc_var1(), i, scomplex::imag, n_left, and scomplex::real.

Referenced by FLA_Apply_G_rf_asm_var6(), and FLA_Apply_G_rf_blc_var6().

◆ FLA_Apply_G_rf_asc_var6b()

FLA_Error FLA_Apply_G_rf_asc_var6b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
437 {
438  FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
439 
440  return FLA_SUCCESS;
441 }

Referenced by FLA_Apply_G_rf_asm_var6b().

◆ FLA_Apply_G_rf_asc_var7()

FLA_Error FLA_Apply_G_rf_asc_var7 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_asc_var8()

FLA_Error FLA_Apply_G_rf_asc_var8 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_asc_var8b()

FLA_Error FLA_Apply_G_rf_asc_var8b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_asc_var9()

FLA_Error FLA_Apply_G_rf_asc_var9 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
652 {
653  float one = bl1_s1();
654  float zero = bl1_s0();
655  float gamma12;
656  float sigma12;
657  float gamma23;
658  float sigma23;
659  scomplex* a1;
660  scomplex* a2;
661  scomplex* a3;
662  scomplex* g12;
663  scomplex* g23;
664  int i, j, g, k;
665  int nG, nG_app;
666  int n_iter;
667  int n_left;
668  int k_minus_1;
669  int n_fuse;
670  int is_ident12, is_ident23;
671 
672  k_minus_1 = k_G - 1;
673  nG = n_A - 1;
674  n_fuse = 2;
675 
676  // Use the simple variant for nG < (k - 1) or k == 1.
677  if ( nG < 2*k_minus_1 || k_G == 1 )
678  {
680  m_A,
681  n_A,
682  buff_G, rs_G, cs_G,
683  buff_A, rs_A, cs_A );
684  return FLA_SUCCESS;
685  }
686 
687 
688  // Start-up phase.
689 
690  for ( j = -1; j < k_minus_1; j += n_fuse )
691  {
692  nG_app = j + 1;
693  n_iter = nG_app;
694  n_left = 1;
695 
696  for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
697  {
698  g12 = buff_G + (g )*rs_G + (k )*cs_G;
699  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
700  a1 = buff_A + (g )*cs_A;
701  a2 = buff_A + (g + 1)*cs_A;
702  a3 = buff_A + (g + 2)*cs_A;
703 
704  gamma12 = g12->real;
705  sigma12 = g12->imag;
706  gamma23 = g23->real;
707  sigma23 = g23->imag;
708 
709  is_ident12 = ( gamma12 == one && sigma12 == zero );
710  is_ident23 = ( gamma23 == one && sigma23 == zero );
711 
712  if ( !is_ident12 && is_ident23 )
713  {
714  // Apply only to columns 1 and 2.
715 
716  MAC_Apply_G_mx2_asc( m_A,
717  &gamma12,
718  &sigma12,
719  a1, 1,
720  a2, 1 );
721  }
722  else if ( is_ident12 && !is_ident23 )
723  {
724  // Apply only to columns 2 and 3.
725 
726  MAC_Apply_G_mx2_asc( m_A,
727  &gamma23,
728  &sigma23,
729  a2, 1,
730  a3, 1 );
731  }
732  else if ( !is_ident12 && !is_ident23 )
733  {
734  // Apply to all three columns.
735 
736  MAC_Apply_G_mx3_asc( m_A,
737  &gamma12,
738  &sigma12,
739  &gamma23,
740  &sigma23,
741  a1, 1,
742  a2, 1,
743  a3, 1 );
744  }
745  }
746 
747  if ( n_left == 1 )
748  {
749  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
750  a2 = buff_A + (g + 1)*cs_A;
751  a3 = buff_A + (g + 2)*cs_A;
752 
753  gamma23 = g23->real;
754  sigma23 = g23->imag;
755 
756  is_ident23 = ( gamma23 == one && sigma23 == zero );
757 
758  if ( !is_ident23 )
759  MAC_Apply_G_mx2_asc( m_A,
760  &gamma23,
761  &sigma23,
762  a2, 1,
763  a3, 1 );
764  }
765  }
766 
767  // Pipeline stage
768 
769  for ( ; j < nG - 1; j += n_fuse )
770  {
771  nG_app = k_G;
772  n_iter = nG_app;
773  n_left = 0;
774 
775  for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
776  {
777  g12 = buff_G + (g )*rs_G + (k )*cs_G;
778  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
779  a1 = buff_A + (g )*cs_A;
780  a2 = buff_A + (g + 1)*cs_A;
781  a3 = buff_A + (g + 2)*cs_A;
782 
783  gamma12 = g12->real;
784  sigma12 = g12->imag;
785  gamma23 = g23->real;
786  sigma23 = g23->imag;
787 
788  is_ident12 = ( gamma12 == one && sigma12 == zero );
789  is_ident23 = ( gamma23 == one && sigma23 == zero );
790 
791  if ( !is_ident12 && is_ident23 )
792  {
793  // Apply only to columns 1 and 2.
794 
795  MAC_Apply_G_mx2_asc( m_A,
796  &gamma12,
797  &sigma12,
798  a1, 1,
799  a2, 1 );
800  }
801  else if ( is_ident12 && !is_ident23 )
802  {
803  // Apply only to columns 2 and 3.
804 
805  MAC_Apply_G_mx2_asc( m_A,
806  &gamma23,
807  &sigma23,
808  a2, 1,
809  a3, 1 );
810  }
811  else if ( !is_ident12 && !is_ident23 )
812  {
813  // Apply to all three columns.
814 
815  MAC_Apply_G_mx3_asc( m_A,
816  &gamma12,
817  &sigma12,
818  &gamma23,
819  &sigma23,
820  a1, 1,
821  a2, 1,
822  a3, 1 );
823  }
824  }
825  }
826 
827  // Shutdown stage
828 
829  for ( j = nG % n_fuse; j < k_G; j += n_fuse )
830  {
831  g = nG - 1;
832  k = j;
833 
834  n_left = 1;
835  if ( n_left == 1 )
836  {
837  g12 = buff_G + (g )*rs_G + (k )*cs_G;
838  a1 = buff_A + (g )*cs_A;
839  a2 = buff_A + (g + 1)*cs_A;
840 
841  gamma12 = g12->real;
842  sigma12 = g12->imag;
843 
844  is_ident12 = ( gamma12 == one && sigma12 == zero );
845 
846  if ( !is_ident12 )
847  MAC_Apply_G_mx2_asc( m_A,
848  &gamma12,
849  &sigma12,
850  a1, 1,
851  a2, 1 );
852  ++k;
853  --g;
854  }
855 
856  nG_app = k_minus_1 - j;
857  n_iter = nG_app;
858 
859  for ( i = 0; i < n_iter; ++i, ++k, --g )
860  {
861  g12 = buff_G + (g )*rs_G + (k )*cs_G;
862  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
863  a1 = buff_A + (g )*cs_A;
864  a2 = buff_A + (g + 1)*cs_A;
865  a3 = buff_A + (g + 2)*cs_A;
866 
867  gamma12 = g12->real;
868  sigma12 = g12->imag;
869  gamma23 = g23->real;
870  sigma23 = g23->imag;
871 
872  is_ident12 = ( gamma12 == one && sigma12 == zero );
873  is_ident23 = ( gamma23 == one && sigma23 == zero );
874 
875  if ( !is_ident12 && is_ident23 )
876  {
877  // Apply only to columns 1 and 2.
878 
879  MAC_Apply_G_mx2_asc( m_A,
880  &gamma12,
881  &sigma12,
882  a1, 1,
883  a2, 1 );
884  }
885  else if ( is_ident12 && !is_ident23 )
886  {
887  // Apply only to columns 2 and 3.
888 
889  MAC_Apply_G_mx2_asc( m_A,
890  &gamma23,
891  &sigma23,
892  a2, 1,
893  a3, 1 );
894  }
895  else if ( !is_ident12 && !is_ident23 )
896  {
897  // Apply to all three columns.
898 
899  MAC_Apply_G_mx3_asc( m_A,
900  &gamma12,
901  &sigma12,
902  &gamma23,
903  &sigma23,
904  a1, 1,
905  a2, 1,
906  a3, 1 );
907  }
908  }
909  }
910 
911  return FLA_SUCCESS;
912 }

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_asc_var1(), i, scomplex::imag, n_left, and scomplex::real.

Referenced by FLA_Apply_G_rf_asm_var9(), and FLA_Apply_G_rf_blc_var9().

◆ FLA_Apply_G_rf_asc_var9b()

FLA_Error FLA_Apply_G_rf_asc_var9b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
422 {
423  FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
424 
425  return FLA_SUCCESS;
426 }

Referenced by FLA_Apply_G_rf_asm_var9b().

◆ FLA_Apply_G_rf_asd_var1()

FLA_Error FLA_Apply_G_rf_asd_var1 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A 
)
169 {
170  double one = bl1_d1();
171  double zero = bl1_d0();
172  int nG_app = n_A - 1;
173  int l, j;
174  double gamma;
175  double sigma;
176  double* a1;
177  double* a2;
178  dcomplex* g1;
179  dcomplex* g11;
180 
181  g1 = buff_G;
182 
183  for ( l = 0; l < k_G; ++l )
184  {
185  a1 = buff_A;
186  a2 = buff_A + cs_A;
187  g11 = g1;
188 
189  for ( j = 0; j < nG_app; ++j )
190  {
191  gamma = g11->real;
192  sigma = g11->imag;
193 
194  // Skip the current iteration if the rotation is identity.
195  if ( gamma != one || sigma != zero )
196  {
197  MAC_Apply_G_mx2_asd( m_A,
198  &gamma,
199  &sigma,
200  a1, 1,
201  a2, 1 );
202  }
203 
204  a1 += cs_A;
205  a2 += cs_A;
206  g11 += rs_G;
207  }
208 
209  g1 += cs_G;
210  }
211 
212  return FLA_SUCCESS;
213 }
double bl1_d0(void)
Definition: bl1_constants.c:118
double bl1_d1(void)
Definition: bl1_constants.c:54
Definition: blis_type_defs.h:138
double real
Definition: blis_type_defs.h:139
double imag
Definition: blis_type_defs.h:139

References bl1_d0(), bl1_d1(), dcomplex::imag, and dcomplex::real.

Referenced by FLA_Apply_G_rf_asd_var2(), FLA_Apply_G_rf_asd_var3(), FLA_Apply_G_rf_asd_var3b(), FLA_Apply_G_rf_asd_var6(), FLA_Apply_G_rf_asd_var6b(), FLA_Apply_G_rf_asd_var9(), FLA_Apply_G_rf_asd_var9b(), FLA_Apply_G_rf_asm_var1(), and FLA_Apply_G_rf_bld_var1().

◆ FLA_Apply_G_rf_asd_var2()

FLA_Error FLA_Apply_G_rf_asd_var2 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A 
)
231 {
232  double one = bl1_d1();
233  double zero = bl1_d0();
234  double gamma;
235  double sigma;
236  double* a1;
237  double* a2;
238  dcomplex* g11;
239  int j, g, k;
240  int nG, nG_app;
241  int k_minus_1;
242 
243  k_minus_1 = k_G - 1;
244  nG = n_A - 1;
245 
246  // Use the simple variant for nG < 2(k - 1).
247  if ( nG < k_minus_1 || k_G == 1 )
248  {
250  m_A,
251  n_A,
252  buff_G, rs_G, cs_G,
253  buff_A, rs_A, cs_A );
254  return FLA_SUCCESS;
255  }
256 
257 
258  // Start-up phase.
259 
260  for ( j = 0; j < k_minus_1; ++j )
261  {
262  nG_app = j + 1;
263 
264  for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g )
265  {
266  g11 = buff_G + (g )*rs_G + (k )*cs_G;
267  a1 = buff_A + (g )*cs_A;
268  a2 = buff_A + (g + 1)*cs_A;
269 
270  gamma = g11->real;
271  sigma = g11->imag;
272 
273  // Skip the current iteration if the rotation is identity.
274  if ( gamma == one && sigma == zero ) continue;
275 
276  MAC_Apply_G_mx2_asd( m_A,
277  &gamma,
278  &sigma,
279  a1, 1,
280  a2, 1 );
281  }
282  }
283 
284  // Pipeline stage
285 
286  for ( j = k_minus_1; j < nG; ++j )
287  {
288  nG_app = k_G;
289 
290  for ( k = 0, g = j; k < nG_app; ++k, --g )
291  {
292  g11 = buff_G + (g )*rs_G + (k )*cs_G;
293  a1 = buff_A + (g )*cs_A;
294  a2 = buff_A + (g + 1)*cs_A;
295 
296  gamma = g11->real;
297  sigma = g11->imag;
298 
299  // Skip the current iteration if the rotation is identity.
300  if ( gamma == one && sigma == zero ) continue;
301 
302  MAC_Apply_G_mx2_asd( m_A,
303  &gamma,
304  &sigma,
305  a1, 1,
306  a2, 1 );
307  }
308  }
309 
310  // Shutdown stage
311 
312  for ( j = nG - k_minus_1; j < nG; ++j )
313  {
314  nG_app = nG - j;
315 
316  for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g )
317  {
318  g11 = buff_G + (g )*rs_G + (k )*cs_G;
319  a1 = buff_A + (g )*cs_A;
320  a2 = buff_A + (g + 1)*cs_A;
321 
322  gamma = g11->real;
323  sigma = g11->imag;
324 
325  // Skip the current iteration if the rotation is identity.
326  if ( gamma == one && sigma == zero ) continue;
327 
328  MAC_Apply_G_mx2_asd( m_A,
329  &gamma,
330  &sigma,
331  a1, 1,
332  a2, 1 );
333  }
334  }
335 
336  return FLA_SUCCESS;
337 }
FLA_Error FLA_Apply_G_rf_asd_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var1.c:164

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_asd_var1(), dcomplex::imag, and dcomplex::real.

Referenced by FLA_Apply_G_rf_asm_var2(), and FLA_Apply_G_rf_bld_var2().

◆ FLA_Apply_G_rf_asd_var3()

FLA_Error FLA_Apply_G_rf_asd_var3 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A 
)
571 {
572  double one = bl1_d1();
573  double zero = bl1_d0();
574  double gamma23_k1;
575  double sigma23_k1;
576  double gamma34_k1;
577  double sigma34_k1;
578  double gamma12_k2;
579  double sigma12_k2;
580  double gamma23_k2;
581  double sigma23_k2;
582  double* a1;
583  double* a2;
584  double* a3;
585  double* a4;
586  dcomplex* g23_k1;
587  dcomplex* g34_k1;
588  dcomplex* g12_k2;
589  dcomplex* g23_k2;
590  int i, j, g, k;
591  int nG, nG_app;
592  int n_iter;
593  int n_left;
594  int k_minus_1;
595  int n_fuse;
596  int k_fuse;
597  int is_ident23_k1, is_ident34_k1;
598  int is_ident12_k2, is_ident23_k2;
599  int has_ident;
600 
601  k_minus_1 = k_G - 1;
602  nG = n_A - 1;
603  n_fuse = 2;
604  k_fuse = 2;
605 
606  // Use the simple variant for nG < (k - 1) or k == 1.
607  if ( nG < 2*k_minus_1 || k_G == 1 )
608  {
610  m_A,
611  n_A,
612  buff_G, rs_G, cs_G,
613  buff_A, rs_A, cs_A );
614  return FLA_SUCCESS;
615  }
616 
617 
618  // Start-up phase.
619 
620  for ( j = -1; j < k_minus_1; j += n_fuse )
621  {
622  nG_app = j + 2;
623  n_iter = nG_app / k_fuse;
624  n_left = 1;
625 
626  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
627  {
628  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
629  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
630  g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
631  g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
632  a1 = buff_A + (g - 1)*cs_A;
633  a2 = buff_A + (g )*cs_A;
634  a3 = buff_A + (g + 1)*cs_A;
635  a4 = buff_A + (g + 2)*cs_A;
636 
637  gamma23_k1 = g23_k1->real;
638  sigma23_k1 = g23_k1->imag;
639  gamma34_k1 = g34_k1->real;
640  sigma34_k1 = g34_k1->imag;
641  gamma12_k2 = g12_k2->real;
642  sigma12_k2 = g12_k2->imag;
643  gamma23_k2 = g23_k2->real;
644  sigma23_k2 = g23_k2->imag;
645 
646  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
647  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
648  is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
649  is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
650  has_ident = ( is_ident23_k1 || is_ident34_k1 ||
651  is_ident12_k2 || is_ident23_k2 );
652 
653  if ( has_ident )
654  {
655  // Apply to pairs of columns as needed.
656 
657  if ( !is_ident23_k1 )
658  MAC_Apply_G_mx2_asd( m_A,
659  &gamma23_k1,
660  &sigma23_k1,
661  a2, 1,
662  a3, 1 );
663 
664  if ( !is_ident34_k1 )
665  MAC_Apply_G_mx2_asd( m_A,
666  &gamma34_k1,
667  &sigma34_k1,
668  a3, 1,
669  a4, 1 );
670 
671  if ( !is_ident12_k2 )
672  MAC_Apply_G_mx2_asd( m_A,
673  &gamma12_k2,
674  &sigma12_k2,
675  a1, 1,
676  a2, 1 );
677 
678  if ( !is_ident23_k2 )
679  MAC_Apply_G_mx2_asd( m_A,
680  &gamma23_k2,
681  &sigma23_k2,
682  a2, 1,
683  a3, 1 );
684  }
685  else
686  {
687  // Apply to all four columns.
688 
689  MAC_Apply_G_mx4s_asd( m_A,
690  &gamma23_k1,
691  &sigma23_k1,
692  &gamma34_k1,
693  &sigma34_k1,
694  &gamma12_k2,
695  &sigma12_k2,
696  &gamma23_k2,
697  &sigma23_k2,
698  a1, 1,
699  a2, 1,
700  a3, 1,
701  a4, 1 );
702  }
703  }
704 
705  if ( n_left == 1 )
706  {
707  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
708  a3 = buff_A + (g + 1)*cs_A;
709  a4 = buff_A + (g + 2)*cs_A;
710 
711  gamma34_k1 = g34_k1->real;
712  sigma34_k1 = g34_k1->imag;
713 
714  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
715 
716  if ( !is_ident34_k1 )
717  MAC_Apply_G_mx2_asd( m_A,
718  &gamma34_k1,
719  &sigma34_k1,
720  a3, 1,
721  a4, 1 );
722  }
723  }
724 
725  // Pipeline stage
726 
727  for ( ; j < nG - 1; j += n_fuse )
728  {
729  nG_app = k_G;
730  n_iter = nG_app / k_fuse;
731  n_left = nG_app % k_fuse;
732 
733  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
734  {
735  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
736  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
737  g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
738  g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
739  a1 = buff_A + (g - 1)*cs_A;
740  a2 = buff_A + (g )*cs_A;
741  a3 = buff_A + (g + 1)*cs_A;
742  a4 = buff_A + (g + 2)*cs_A;
743 
744  gamma23_k1 = g23_k1->real;
745  sigma23_k1 = g23_k1->imag;
746  gamma34_k1 = g34_k1->real;
747  sigma34_k1 = g34_k1->imag;
748  gamma12_k2 = g12_k2->real;
749  sigma12_k2 = g12_k2->imag;
750  gamma23_k2 = g23_k2->real;
751  sigma23_k2 = g23_k2->imag;
752 
753  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
754  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
755  is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
756  is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
757  has_ident = ( is_ident23_k1 || is_ident34_k1 ||
758  is_ident12_k2 || is_ident23_k2 );
759 
760  if ( has_ident )
761  {
762  // Apply to pairs of columns as needed.
763 
764  if ( !is_ident23_k1 )
765  MAC_Apply_G_mx2_asd( m_A,
766  &gamma23_k1,
767  &sigma23_k1,
768  a2, 1,
769  a3, 1 );
770 
771  if ( !is_ident34_k1 )
772  MAC_Apply_G_mx2_asd( m_A,
773  &gamma34_k1,
774  &sigma34_k1,
775  a3, 1,
776  a4, 1 );
777 
778  if ( !is_ident12_k2 )
779  MAC_Apply_G_mx2_asd( m_A,
780  &gamma12_k2,
781  &sigma12_k2,
782  a1, 1,
783  a2, 1 );
784 
785  if ( !is_ident23_k2 )
786  MAC_Apply_G_mx2_asd( m_A,
787  &gamma23_k2,
788  &sigma23_k2,
789  a2, 1,
790  a3, 1 );
791  }
792  else
793  {
794  // Apply to all four columns.
795 
796  MAC_Apply_G_mx4s_asd( m_A,
797  &gamma23_k1,
798  &sigma23_k1,
799  &gamma34_k1,
800  &sigma34_k1,
801  &gamma12_k2,
802  &sigma12_k2,
803  &gamma23_k2,
804  &sigma23_k2,
805  a1, 1,
806  a2, 1,
807  a3, 1,
808  a4, 1 );
809  }
810  }
811 
812  if ( n_left == 1 )
813  {
814  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
815  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
816  a2 = buff_A + (g )*cs_A;
817  a3 = buff_A + (g + 1)*cs_A;
818  a4 = buff_A + (g + 2)*cs_A;
819 
820  gamma23_k1 = g23_k1->real;
821  sigma23_k1 = g23_k1->imag;
822  gamma34_k1 = g34_k1->real;
823  sigma34_k1 = g34_k1->imag;
824 
825  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
826  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
827 
828  if ( !is_ident23_k1 && is_ident34_k1 )
829  {
830  MAC_Apply_G_mx2_asd( m_A,
831  &gamma23_k1,
832  &sigma23_k1,
833  a2, 1,
834  a3, 1 );
835  }
836  else if ( is_ident23_k1 && !is_ident34_k1 )
837  {
838  MAC_Apply_G_mx2_asd( m_A,
839  &gamma34_k1,
840  &sigma34_k1,
841  a3, 1,
842  a4, 1 );
843  }
844  else
845  {
846  MAC_Apply_G_mx3_asd( m_A,
847  &gamma23_k1,
848  &sigma23_k1,
849  &gamma34_k1,
850  &sigma34_k1,
851  a2, 1,
852  a3, 1,
853  a4, 1 );
854  }
855  }
856  }
857 
858  // Shutdown stage
859 
860  for ( j = nG % n_fuse; j < k_G; j += n_fuse )
861  {
862  g = nG - 1;
863  k = j;
864 
865  //n_left = 1;
866  //if ( n_left == 1 )
867  {
868  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
869  a2 = buff_A + (g )*cs_A;
870  a3 = buff_A + (g + 1)*cs_A;
871 
872  gamma23_k1 = g23_k1->real;
873  sigma23_k1 = g23_k1->imag;
874 
875  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
876 
877  if ( !is_ident23_k1 )
878  MAC_Apply_G_mx2_asd( m_A,
879  &gamma23_k1,
880  &sigma23_k1,
881  a2, 1,
882  a3, 1 );
883  ++k;
884  --g;
885  }
886 
887  nG_app = k_minus_1 - j;
888  n_iter = nG_app / k_fuse;
889  n_left = nG_app % k_fuse;
890 
891  for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
892  {
893  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
894  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
895  g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
896  g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
897  a1 = buff_A + (g - 1)*cs_A;
898  a2 = buff_A + (g )*cs_A;
899  a3 = buff_A + (g + 1)*cs_A;
900  a4 = buff_A + (g + 2)*cs_A;
901 
902  gamma23_k1 = g23_k1->real;
903  sigma23_k1 = g23_k1->imag;
904  gamma34_k1 = g34_k1->real;
905  sigma34_k1 = g34_k1->imag;
906  gamma12_k2 = g12_k2->real;
907  sigma12_k2 = g12_k2->imag;
908  gamma23_k2 = g23_k2->real;
909  sigma23_k2 = g23_k2->imag;
910 
911  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
912  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
913  is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
914  is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
915  has_ident = ( is_ident23_k1 || is_ident34_k1 ||
916  is_ident12_k2 || is_ident23_k2 );
917 
918  if ( has_ident )
919  {
920  // Apply to pairs of columns as needed.
921 
922  if ( !is_ident23_k1 )
923  MAC_Apply_G_mx2_asd( m_A,
924  &gamma23_k1,
925  &sigma23_k1,
926  a2, 1,
927  a3, 1 );
928 
929  if ( !is_ident34_k1 )
930  MAC_Apply_G_mx2_asd( m_A,
931  &gamma34_k1,
932  &sigma34_k1,
933  a3, 1,
934  a4, 1 );
935 
936  if ( !is_ident12_k2 )
937  MAC_Apply_G_mx2_asd( m_A,
938  &gamma12_k2,
939  &sigma12_k2,
940  a1, 1,
941  a2, 1 );
942 
943  if ( !is_ident23_k2 )
944  MAC_Apply_G_mx2_asd( m_A,
945  &gamma23_k2,
946  &sigma23_k2,
947  a2, 1,
948  a3, 1 );
949  }
950  else
951  {
952  // Apply to all four columns.
953 
954  MAC_Apply_G_mx4s_asd( m_A,
955  &gamma23_k1,
956  &sigma23_k1,
957  &gamma34_k1,
958  &sigma34_k1,
959  &gamma12_k2,
960  &sigma12_k2,
961  &gamma23_k2,
962  &sigma23_k2,
963  a1, 1,
964  a2, 1,
965  a3, 1,
966  a4, 1 );
967  }
968  }
969 
970  if ( n_left == 1 )
971  {
972  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
973  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
974  a2 = buff_A + (g )*cs_A;
975  a3 = buff_A + (g + 1)*cs_A;
976  a4 = buff_A + (g + 2)*cs_A;
977 
978  gamma23_k1 = g23_k1->real;
979  sigma23_k1 = g23_k1->imag;
980  gamma34_k1 = g34_k1->real;
981  sigma34_k1 = g34_k1->imag;
982 
983  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
984  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
985 
986  if ( !is_ident23_k1 && is_ident34_k1 )
987  {
988  MAC_Apply_G_mx2_asd( m_A,
989  &gamma23_k1,
990  &sigma23_k1,
991  a2, 1,
992  a3, 1 );
993  }
994  else if ( is_ident23_k1 && !is_ident34_k1 )
995  {
996  MAC_Apply_G_mx2_asd( m_A,
997  &gamma34_k1,
998  &sigma34_k1,
999  a3, 1,
1000  a4, 1 );
1001  }
1002  else
1003  {
1004  MAC_Apply_G_mx3_asd( m_A,
1005  &gamma23_k1,
1006  &sigma23_k1,
1007  &gamma34_k1,
1008  &sigma34_k1,
1009  a2, 1,
1010  a3, 1,
1011  a4, 1 );
1012  }
1013  }
1014  }
1015 
1016  return FLA_SUCCESS;
1017 }

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_asd_var1(), i, dcomplex::imag, n_left, and dcomplex::real.

Referenced by FLA_Apply_G_rf_asm_var3().

◆ FLA_Apply_G_rf_asd_var3b()

FLA_Error FLA_Apply_G_rf_asd_var3b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A 
)
138 {
139  double one = bl1_d1();
140  double zero = bl1_d0();
141  double gamma23_k1;
142  double sigma23_k1;
143  double gamma34_k1;
144  double sigma34_k1;
145  double gamma12_k2;
146  double sigma12_k2;
147  double gamma23_k2;
148  double sigma23_k2;
149  double* a1;
150  double* a2;
151  double* a3;
152  double* a4;
153  dcomplex* g23_k1;
154  dcomplex* g34_k1;
155  dcomplex* g12_k2;
156  dcomplex* g23_k2;
157  int i, j, g, k;
158  int nG, nG_app;
159  int n_iter;
160  int n_left;
161  int k_minus_1;
162  int n_fuse;
163  int k_fuse;
164  int is_ident23_k1, is_ident34_k1;
165  int is_ident12_k2, is_ident23_k2;
166  int has_ident;
167  int m_app;
168 
169 
170  k_minus_1 = k_G - 1;
171  nG = n_A - 1;
172  n_fuse = 2;
173  k_fuse = 2;
174 
175  // Use the simple variant for nG < (k - 1) or k == 1.
176  if ( nG < 2*k_minus_1 || k_G == 1 )
177  {
179  m_A,
180  n_A,
181  buff_G, rs_G, cs_G,
182  buff_A, rs_A, cs_A );
183  return FLA_SUCCESS;
184  }
185 
186 
187  // Start-up phase.
188 
189  for ( j = -1; j < k_minus_1; j += n_fuse )
190  {
191  nG_app = j + 2;
192  n_iter = nG_app / k_fuse;
193  //n_iter = nG_app % k_fuse;
194  n_left = 1;
195 
196  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
197  {
198  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
199  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
200  g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
201  g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
202  a1 = buff_A + (g - 1)*cs_A;
203  a2 = buff_A + (g )*cs_A;
204  a3 = buff_A + (g + 1)*cs_A;
205  a4 = buff_A + (g + 2)*cs_A;
206 
207  gamma23_k1 = g23_k1->real;
208  sigma23_k1 = g23_k1->imag;
209  gamma34_k1 = g34_k1->real;
210  sigma34_k1 = g34_k1->imag;
211  gamma12_k2 = g12_k2->real;
212  sigma12_k2 = g12_k2->imag;
213  gamma23_k2 = g23_k2->real;
214  sigma23_k2 = g23_k2->imag;
215 
216  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
217  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
218  is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
219  is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
220  has_ident = ( is_ident23_k1 || is_ident34_k1 ||
221  is_ident12_k2 || is_ident23_k2 );
222 
223  m_app = min( i_k + 3 + j - iTL, m_A );
224  m_app = max( m_app, 0 );
225 
226  if ( has_ident )
227  {
228  // Apply to pairs of columns as needed.
229 
230  if ( !is_ident23_k1 )
231  MAC_Apply_G_mx2_asd( m_app,
232  &gamma23_k1,
233  &sigma23_k1,
234  a2, 1,
235  a3, 1 );
236 
237  if ( !is_ident34_k1 )
238  MAC_Apply_G_mx2_asd( m_app,
239  &gamma34_k1,
240  &sigma34_k1,
241  a3, 1,
242  a4, 1 );
243 
244  if ( !is_ident12_k2 )
245  MAC_Apply_G_mx2_asd( m_app,
246  &gamma12_k2,
247  &sigma12_k2,
248  a1, 1,
249  a2, 1 );
250 
251  if ( !is_ident23_k2 )
252  MAC_Apply_G_mx2_asd( m_app,
253  &gamma23_k2,
254  &sigma23_k2,
255  a2, 1,
256  a3, 1 );
257  }
258  else
259  {
260  // Apply to all four columns.
261 
262  MAC_Apply_G_mx4s_asd( m_app,
263  &gamma23_k1,
264  &sigma23_k1,
265  &gamma34_k1,
266  &sigma34_k1,
267  &gamma12_k2,
268  &sigma12_k2,
269  &gamma23_k2,
270  &sigma23_k2,
271  a1, 1,
272  a2, 1,
273  a3, 1,
274  a4, 1 );
275  }
276  }
277 
278  if ( n_left == 1 )
279  {
280  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
281  a3 = buff_A + (g + 1)*cs_A;
282  a4 = buff_A + (g + 2)*cs_A;
283 
284  gamma34_k1 = g34_k1->real;
285  sigma34_k1 = g34_k1->imag;
286 
287  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
288 
289  m_app = min( i_k + 3 + j - iTL, m_A );
290  m_app = max( m_app, 0 );
291 
292  if ( !is_ident34_k1 )
293  MAC_Apply_G_mx2_asd( m_app,
294  &gamma34_k1,
295  &sigma34_k1,
296  a3, 1,
297  a4, 1 );
298  }
299  }
300 
301  // Pipeline stage
302 
303  for ( ; j < nG - 1; j += n_fuse )
304  {
305  nG_app = k_G;
306  n_iter = nG_app / k_fuse;
307  n_left = nG_app % k_fuse;
308 
309  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
310  {
311  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
312  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
313  g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
314  g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
315  a1 = buff_A + (g - 1)*cs_A;
316  a2 = buff_A + (g )*cs_A;
317  a3 = buff_A + (g + 1)*cs_A;
318  a4 = buff_A + (g + 2)*cs_A;
319 
320  gamma23_k1 = g23_k1->real;
321  sigma23_k1 = g23_k1->imag;
322  gamma34_k1 = g34_k1->real;
323  sigma34_k1 = g34_k1->imag;
324  gamma12_k2 = g12_k2->real;
325  sigma12_k2 = g12_k2->imag;
326  gamma23_k2 = g23_k2->real;
327  sigma23_k2 = g23_k2->imag;
328 
329  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
330  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
331  is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
332  is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
333  has_ident = ( is_ident23_k1 || is_ident34_k1 ||
334  is_ident12_k2 || is_ident23_k2 );
335 
336  m_app = min( i_k + 3 + j - iTL, m_A );
337  m_app = max( m_app, 0 );
338 
339  if ( has_ident )
340  {
341  // Apply to pairs of columns as needed.
342 
343  if ( !is_ident23_k1 )
344  MAC_Apply_G_mx2_asd( m_app,
345  &gamma23_k1,
346  &sigma23_k1,
347  a2, 1,
348  a3, 1 );
349 
350  if ( !is_ident34_k1 )
351  MAC_Apply_G_mx2_asd( m_app,
352  &gamma34_k1,
353  &sigma34_k1,
354  a3, 1,
355  a4, 1 );
356 
357  if ( !is_ident12_k2 )
358  MAC_Apply_G_mx2_asd( m_app,
359  &gamma12_k2,
360  &sigma12_k2,
361  a1, 1,
362  a2, 1 );
363 
364  if ( !is_ident23_k2 )
365  MAC_Apply_G_mx2_asd( m_app,
366  &gamma23_k2,
367  &sigma23_k2,
368  a2, 1,
369  a3, 1 );
370  }
371  else
372  {
373  // Apply to all four columns.
374 
375  MAC_Apply_G_mx4s_asd( m_app,
376  &gamma23_k1,
377  &sigma23_k1,
378  &gamma34_k1,
379  &sigma34_k1,
380  &gamma12_k2,
381  &sigma12_k2,
382  &gamma23_k2,
383  &sigma23_k2,
384  a1, 1,
385  a2, 1,
386  a3, 1,
387  a4, 1 );
388  }
389  }
390 
391  if ( n_left == 1 )
392  {
393  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
394  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
395  a2 = buff_A + (g )*cs_A;
396  a3 = buff_A + (g + 1)*cs_A;
397  a4 = buff_A + (g + 2)*cs_A;
398 
399  gamma23_k1 = g23_k1->real;
400  sigma23_k1 = g23_k1->imag;
401  gamma34_k1 = g34_k1->real;
402  sigma34_k1 = g34_k1->imag;
403 
404  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
405  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
406 
407  m_app = min( i_k + 3 + j - iTL, m_A );
408  m_app = max( m_app, 0 );
409 
410  if ( !is_ident23_k1 && is_ident34_k1 )
411  {
412  MAC_Apply_G_mx2_asd( m_app,
413  &gamma23_k1,
414  &sigma23_k1,
415  a2, 1,
416  a3, 1 );
417  }
418  else if ( is_ident23_k1 && !is_ident34_k1 )
419  {
420  MAC_Apply_G_mx2_asd( m_app,
421  &gamma34_k1,
422  &sigma34_k1,
423  a3, 1,
424  a4, 1 );
425  }
426  else
427  {
428  MAC_Apply_G_mx3_asd( m_app,
429  &gamma23_k1,
430  &sigma23_k1,
431  &gamma34_k1,
432  &sigma34_k1,
433  a2, 1,
434  a3, 1,
435  a4, 1 );
436  }
437  }
438  }
439 
440  // Shutdown stage
441 
442  for ( j = nG % n_fuse; j < k_G; j += n_fuse )
443  {
444  g = nG - 1;
445  k = j;
446 
447  //n_left = 1;
448  //if ( n_left == 1 )
449  {
450  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
451  a2 = buff_A + (g )*cs_A;
452  a3 = buff_A + (g + 1)*cs_A;
453 
454  gamma23_k1 = g23_k1->real;
455  sigma23_k1 = g23_k1->imag;
456 
457  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
458 
459  m_app = m_A;
460 
461  if ( !is_ident23_k1 )
462  MAC_Apply_G_mx2_asd( m_app,
463  &gamma23_k1,
464  &sigma23_k1,
465  a2, 1,
466  a3, 1 );
467  ++k;
468  --g;
469  }
470 
471  nG_app = k_minus_1 - j;
472  n_iter = nG_app / k_fuse;
473  n_left = nG_app % k_fuse;
474 
475  for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
476  {
477  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
478  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
479  g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
480  g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
481  a1 = buff_A + (g - 1)*cs_A;
482  a2 = buff_A + (g )*cs_A;
483  a3 = buff_A + (g + 1)*cs_A;
484  a4 = buff_A + (g + 2)*cs_A;
485 
486  gamma23_k1 = g23_k1->real;
487  sigma23_k1 = g23_k1->imag;
488  gamma34_k1 = g34_k1->real;
489  sigma34_k1 = g34_k1->imag;
490  gamma12_k2 = g12_k2->real;
491  sigma12_k2 = g12_k2->imag;
492  gamma23_k2 = g23_k2->real;
493  sigma23_k2 = g23_k2->imag;
494 
495  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
496  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
497  is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
498  is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
499  has_ident = ( is_ident23_k1 || is_ident34_k1 ||
500  is_ident12_k2 || is_ident23_k2 );
501 
502  m_app = m_A;
503 
504  if ( has_ident )
505  {
506  // Apply to pairs of columns as needed.
507 
508  if ( !is_ident23_k1 )
509  MAC_Apply_G_mx2_asd( m_app,
510  &gamma23_k1,
511  &sigma23_k1,
512  a2, 1,
513  a3, 1 );
514 
515  if ( !is_ident34_k1 )
516  MAC_Apply_G_mx2_asd( m_app,
517  &gamma34_k1,
518  &sigma34_k1,
519  a3, 1,
520  a4, 1 );
521 
522  if ( !is_ident12_k2 )
523  MAC_Apply_G_mx2_asd( m_app,
524  &gamma12_k2,
525  &sigma12_k2,
526  a1, 1,
527  a2, 1 );
528 
529  if ( !is_ident23_k2 )
530  MAC_Apply_G_mx2_asd( m_app,
531  &gamma23_k2,
532  &sigma23_k2,
533  a2, 1,
534  a3, 1 );
535  }
536  else
537  {
538  // Apply to all four columns.
539 
540  MAC_Apply_G_mx4s_asd( m_app,
541  &gamma23_k1,
542  &sigma23_k1,
543  &gamma34_k1,
544  &sigma34_k1,
545  &gamma12_k2,
546  &sigma12_k2,
547  &gamma23_k2,
548  &sigma23_k2,
549  a1, 1,
550  a2, 1,
551  a3, 1,
552  a4, 1 );
553  }
554  }
555 
556  if ( n_left == 1 )
557  {
558  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
559  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
560  a2 = buff_A + (g )*cs_A;
561  a3 = buff_A + (g + 1)*cs_A;
562  a4 = buff_A + (g + 2)*cs_A;
563 
564  gamma23_k1 = g23_k1->real;
565  sigma23_k1 = g23_k1->imag;
566  gamma34_k1 = g34_k1->real;
567  sigma34_k1 = g34_k1->imag;
568 
569  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
570  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
571 
572  m_app = m_A;
573 
574  if ( !is_ident23_k1 && is_ident34_k1 )
575  {
576  MAC_Apply_G_mx2_asd( m_app,
577  &gamma23_k1,
578  &sigma23_k1,
579  a2, 1,
580  a3, 1 );
581  }
582  else if ( is_ident23_k1 && !is_ident34_k1 )
583  {
584  MAC_Apply_G_mx2_asd( m_app,
585  &gamma34_k1,
586  &sigma34_k1,
587  a3, 1,
588  a4, 1 );
589  }
590  else
591  {
592  MAC_Apply_G_mx3_asd( m_app,
593  &gamma23_k1,
594  &sigma23_k1,
595  &gamma34_k1,
596  &sigma34_k1,
597  a2, 1,
598  a3, 1,
599  a4, 1 );
600  }
601  }
602  }
603 
604  return FLA_SUCCESS;
605 }

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_asd_var1(), i, dcomplex::imag, n_left, and dcomplex::real.

Referenced by FLA_Apply_G_rf_asm_var3b(), and FLA_Apply_G_rf_bld_var3b().

◆ FLA_Apply_G_rf_asd_var4()

FLA_Error FLA_Apply_G_rf_asd_var4 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_asd_var5()

FLA_Error FLA_Apply_G_rf_asd_var5 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_asd_var5b()

FLA_Error FLA_Apply_G_rf_asd_var5b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_asd_var6()

FLA_Error FLA_Apply_G_rf_asd_var6 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A 
)
399 {
400  double one = bl1_d1();
401  double zero = bl1_d0();
402  double gamma12;
403  double sigma12;
404  double gamma23;
405  double sigma23;
406  double* a1;
407  double* a2;
408  double* a3;
409  dcomplex* g12;
410  dcomplex* g23;
411  int i, j, g, k;
412  int nG, nG_app;
413  int n_iter;
414  int n_left;
415  int k_minus_1;
416  int n_fuse;
417  int is_ident12, is_ident23;
418 
419  k_minus_1 = k_G - 1;
420  nG = n_A - 1;
421  n_fuse = 2;
422 
423  // Use the simple variant for nG < (k - 1) or k == 1.
424  if ( nG < k_minus_1 || k_G == 1 )
425  {
427  m_A,
428  n_A,
429  buff_G, rs_G, cs_G,
430  buff_A, rs_A, cs_A );
431  return FLA_SUCCESS;
432  }
433 
434 
435  // Start-up phase.
436 
437  for ( j = 0; j < k_minus_1; ++j )
438  {
439  nG_app = j + 1;
440  n_iter = nG_app / n_fuse;
441  n_left = nG_app % n_fuse;
442 
443  for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
444  {
445  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
446  g23 = buff_G + (g )*rs_G + (k )*cs_G;
447  a1 = buff_A + (g - 1)*cs_A;
448  a2 = buff_A + (g )*cs_A;
449  a3 = buff_A + (g + 1)*cs_A;
450 
451  gamma12 = g12->real;
452  sigma12 = g12->imag;
453  gamma23 = g23->real;
454  sigma23 = g23->imag;
455 
456  is_ident12 = ( gamma12 == one && sigma12 == zero );
457  is_ident23 = ( gamma23 == one && sigma23 == zero );
458 
459  if ( !is_ident12 && is_ident23 )
460  {
461  // Apply only to columns 1 and 2.
462 
463  MAC_Apply_G_mx2_asd( m_A,
464  &gamma12,
465  &sigma12,
466  a1, 1,
467  a2, 1 );
468  }
469  else if ( is_ident12 && !is_ident23 )
470  {
471  // Apply only to columns 2 and 3.
472 
473  MAC_Apply_G_mx2_asd( m_A,
474  &gamma23,
475  &sigma23,
476  a2, 1,
477  a3, 1 );
478  }
479  else if ( !is_ident12 && !is_ident23 )
480  {
481  // Apply to all three columns.
482 
483  MAC_Apply_G_mx3b_asd( m_A,
484  &gamma12,
485  &sigma12,
486  &gamma23,
487  &sigma23,
488  a1, 1,
489  a2, 1,
490  a3, 1 );
491  }
492  }
493 
494  if ( n_left == 1 )
495  {
496  g23 = buff_G + (g )*rs_G + (k )*cs_G;
497  a2 = buff_A + (g )*cs_A;
498  a3 = buff_A + (g + 1)*cs_A;
499 
500  gamma23 = g23->real;
501  sigma23 = g23->imag;
502 
503  is_ident23 = ( gamma23 == one && sigma23 == zero );
504 
505  if ( !is_ident23 )
506  MAC_Apply_G_mx2_asd( m_A,
507  &gamma23,
508  &sigma23,
509  a2, 1,
510  a3, 1 );
511  }
512  }
513 
514  // Pipeline stage
515 
516  for ( j = k_minus_1; j < nG; ++j )
517  {
518  nG_app = k_G;
519  n_iter = nG_app / n_fuse;
520  n_left = nG_app % n_fuse;
521 
522  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
523  {
524  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
525  g23 = buff_G + (g )*rs_G + (k )*cs_G;
526  a1 = buff_A + (g - 1)*cs_A;
527  a2 = buff_A + (g )*cs_A;
528  a3 = buff_A + (g + 1)*cs_A;
529 
530  gamma12 = g12->real;
531  sigma12 = g12->imag;
532  gamma23 = g23->real;
533  sigma23 = g23->imag;
534 
535  is_ident12 = ( gamma12 == one && sigma12 == zero );
536  is_ident23 = ( gamma23 == one && sigma23 == zero );
537 
538  if ( !is_ident12 && is_ident23 )
539  {
540  // Apply only to columns 1 and 2.
541 
542  MAC_Apply_G_mx2_asd( m_A,
543  &gamma12,
544  &sigma12,
545  a1, 1,
546  a2, 1 );
547  }
548  else if ( is_ident12 && !is_ident23 )
549  {
550  // Apply only to columns 2 and 3.
551 
552  MAC_Apply_G_mx2_asd( m_A,
553  &gamma23,
554  &sigma23,
555  a2, 1,
556  a3, 1 );
557  }
558  else if ( !is_ident12 && !is_ident23 )
559  {
560  // Apply to all three columns.
561 
562  MAC_Apply_G_mx3b_asd( m_A,
563  &gamma12,
564  &sigma12,
565  &gamma23,
566  &sigma23,
567  a1, 1,
568  a2, 1,
569  a3, 1 );
570  }
571  }
572 
573  if ( n_left == 1 )
574  {
575  g23 = buff_G + (g )*rs_G + (k )*cs_G;
576  a2 = buff_A + (g )*cs_A;
577  a3 = buff_A + (g + 1)*cs_A;
578 
579  gamma23 = g23->real;
580  sigma23 = g23->imag;
581 
582  is_ident23 = ( gamma23 == one && sigma23 == zero );
583 
584  if ( !is_ident23 )
585  MAC_Apply_G_mx2_asd( m_A,
586  &gamma23,
587  &sigma23,
588  a2, 1,
589  a3, 1 );
590  }
591  }
592 
593  // Shutdown stage
594 
595  for ( j = 1; j < k_G; ++j )
596  {
597  nG_app = k_G - j;
598  n_iter = nG_app / n_fuse;
599  n_left = nG_app % n_fuse;
600 
601  for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
602  {
603  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
604  g23 = buff_G + (g )*rs_G + (k )*cs_G;
605  a1 = buff_A + (g - 1)*cs_A;
606  a2 = buff_A + (g )*cs_A;
607  a3 = buff_A + (g + 1)*cs_A;
608 
609  gamma12 = g12->real;
610  sigma12 = g12->imag;
611  gamma23 = g23->real;
612  sigma23 = g23->imag;
613 
614  is_ident12 = ( gamma12 == one && sigma12 == zero );
615  is_ident23 = ( gamma23 == one && sigma23 == zero );
616 
617  if ( !is_ident12 && is_ident23 )
618  {
619  // Apply only to columns 1 and 2.
620 
621  MAC_Apply_G_mx2_asd( m_A,
622  &gamma12,
623  &sigma12,
624  a1, 1,
625  a2, 1 );
626  }
627  else if ( is_ident12 && !is_ident23 )
628  {
629  // Apply only to columns 2 and 3.
630 
631  MAC_Apply_G_mx2_asd( m_A,
632  &gamma23,
633  &sigma23,
634  a2, 1,
635  a3, 1 );
636  }
637  else if ( !is_ident12 && !is_ident23 )
638  {
639  // Apply to all three columns.
640 
641  MAC_Apply_G_mx3b_asd( m_A,
642  &gamma12,
643  &sigma12,
644  &gamma23,
645  &sigma23,
646  a1, 1,
647  a2, 1,
648  a3, 1 );
649  }
650  }
651 
652  if ( n_left == 1 )
653  {
654  g23 = buff_G + (g )*rs_G + (k )*cs_G;
655  a2 = buff_A + (g )*cs_A;
656  a3 = buff_A + (g + 1)*cs_A;
657 
658  gamma23 = g23->real;
659  sigma23 = g23->imag;
660 
661  is_ident23 = ( gamma23 == one && sigma23 == zero );
662 
663  if ( !is_ident23 )
664  MAC_Apply_G_mx2_asd( m_A,
665  &gamma23,
666  &sigma23,
667  a2, 1,
668  a3, 1 );
669  }
670  }
671 
672  return FLA_SUCCESS;
673 }

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_asd_var1(), i, dcomplex::imag, n_left, and dcomplex::real.

Referenced by FLA_Apply_G_rf_asm_var6(), and FLA_Apply_G_rf_bld_var6().

◆ FLA_Apply_G_rf_asd_var6b()

FLA_Error FLA_Apply_G_rf_asd_var6b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A 
)
138 {
139  double one = bl1_d1();
140  double zero = bl1_d0();
141  double gamma12;
142  double sigma12;
143  double gamma23;
144  double sigma23;
145  double* a1;
146  double* a2;
147  double* a3;
148  dcomplex* g12;
149  dcomplex* g23;
150  int i, j, g, k;
151  int nG, nG_app;
152  int n_iter;
153  int n_left;
154  int k_minus_1;
155  int n_fuse;
156  int is_ident12, is_ident23;
157  int m_app;
158 
159 
160  k_minus_1 = k_G - 1;
161  nG = n_A - 1;
162  n_fuse = 2;
163 
164  // Use the simple variant for nG < (k - 1) or k == 1.
165  if ( nG < k_minus_1 || k_G == 1 )
166  {
168  m_A,
169  n_A,
170  buff_G, rs_G, cs_G,
171  buff_A, rs_A, cs_A );
172  return FLA_SUCCESS;
173  }
174 
175 
176  // Start-up phase.
177 
178  for ( j = 0; j < k_minus_1; ++j )
179  {
180  nG_app = j + 1;
181  n_iter = nG_app / n_fuse;
182  n_left = nG_app % n_fuse;
183 
184  for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
185  {
186  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
187  g23 = buff_G + (g )*rs_G + (k )*cs_G;
188  a1 = buff_A + (g - 1)*cs_A;
189  a2 = buff_A + (g )*cs_A;
190  a3 = buff_A + (g + 1)*cs_A;
191 
192  gamma12 = g12->real;
193  sigma12 = g12->imag;
194  gamma23 = g23->real;
195  sigma23 = g23->imag;
196 
197  is_ident12 = ( gamma12 == one && sigma12 == zero );
198  is_ident23 = ( gamma23 == one && sigma23 == zero );
199 
200  m_app = min( i_k + 2 + j - iTL, m_A );
201  m_app = max( m_app, 0 );
202 
203  if ( !is_ident12 && is_ident23 )
204  {
205  // Apply only to columns 1 and 2.
206 
207  MAC_Apply_G_mx2_asd( m_app,
208  &gamma12,
209  &sigma12,
210  a1, 1,
211  a2, 1 );
212  }
213  else if ( is_ident12 && !is_ident23 )
214  {
215  // Apply only to columns 2 and 3.
216 
217  MAC_Apply_G_mx2_asd( m_app,
218  &gamma23,
219  &sigma23,
220  a2, 1,
221  a3, 1 );
222  }
223  else if ( !is_ident12 && !is_ident23 )
224  {
225  // Apply to all three columns.
226 
227  MAC_Apply_G_mx3b_asd( m_app,
228  &gamma12,
229  &sigma12,
230  &gamma23,
231  &sigma23,
232  a1, 1,
233  a2, 1,
234  a3, 1 );
235  }
236  }
237  if ( n_left == 1 )
238  {
239  g23 = buff_G + (g )*rs_G + (k )*cs_G;
240  a2 = buff_A + (g )*cs_A;
241  a3 = buff_A + (g + 1)*cs_A;
242 
243  gamma23 = g23->real;
244  sigma23 = g23->imag;
245 
246  is_ident23 = ( gamma23 == one && sigma23 == zero );
247 
248  m_app = min( i_k + 2 + j - iTL, m_A );
249  m_app = max( m_app, 0 );
250 
251  if ( !is_ident23 )
252  MAC_Apply_G_mx2_asd( m_app,
253  &gamma23,
254  &sigma23,
255  a2, 1,
256  a3, 1 );
257  }
258  }
259 
260  // Pipeline stage
261 
262  for ( j = k_minus_1; j < nG; ++j )
263  {
264  nG_app = k_G;
265  n_iter = nG_app / n_fuse;
266  n_left = nG_app % n_fuse;
267 
268  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
269  {
270  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
271  g23 = buff_G + (g )*rs_G + (k )*cs_G;
272  a1 = buff_A + (g - 1)*cs_A;
273  a2 = buff_A + (g )*cs_A;
274  a3 = buff_A + (g + 1)*cs_A;
275 
276  gamma12 = g12->real;
277  sigma12 = g12->imag;
278  gamma23 = g23->real;
279  sigma23 = g23->imag;
280 
281  is_ident12 = ( gamma12 == one && sigma12 == zero );
282  is_ident23 = ( gamma23 == one && sigma23 == zero );
283 
284  m_app = min( i_k + 2 + j - iTL, m_A );
285  m_app = max( m_app, 0 );
286 
287  if ( !is_ident12 && is_ident23 )
288  {
289  // Apply only to columns 1 and 2.
290 
291  MAC_Apply_G_mx2_asd( m_app,
292  &gamma12,
293  &sigma12,
294  a1, 1,
295  a2, 1 );
296  }
297  else if ( is_ident12 && !is_ident23 )
298  {
299  // Apply only to columns 2 and 3.
300 
301  MAC_Apply_G_mx2_asd( m_app,
302  &gamma23,
303  &sigma23,
304  a2, 1,
305  a3, 1 );
306  }
307  else if ( !is_ident12 && !is_ident23 )
308  {
309  // Apply to all three columns.
310 
311  MAC_Apply_G_mx3b_asd( m_app,
312  &gamma12,
313  &sigma12,
314  &gamma23,
315  &sigma23,
316  a1, 1,
317  a2, 1,
318  a3, 1 );
319  }
320  }
321  if ( n_left == 1 )
322  {
323  g23 = buff_G + (g )*rs_G + (k )*cs_G;
324  a2 = buff_A + (g )*cs_A;
325  a3 = buff_A + (g + 1)*cs_A;
326 
327  gamma23 = g23->real;
328  sigma23 = g23->imag;
329 
330  is_ident23 = ( gamma23 == one && sigma23 == zero );
331 
332  m_app = min( i_k + 2 + j - iTL, m_A );
333  m_app = max( m_app, 0 );
334 
335  if ( !is_ident23 )
336  MAC_Apply_G_mx2_asd( m_app,
337  &gamma23,
338  &sigma23,
339  a2, 1,
340  a3, 1 );
341  }
342  }
343 
344  // Shutdown stage
345 
346  for ( j = 1; j < k_G; ++j )
347  {
348  nG_app = k_G - j;
349  n_iter = nG_app / n_fuse;
350  n_left = nG_app % n_fuse;
351 
352  for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
353  {
354  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
355  g23 = buff_G + (g )*rs_G + (k )*cs_G;
356  a1 = buff_A + (g - 1)*cs_A;
357  a2 = buff_A + (g )*cs_A;
358  a3 = buff_A + (g + 1)*cs_A;
359 
360  gamma12 = g12->real;
361  sigma12 = g12->imag;
362  gamma23 = g23->real;
363  sigma23 = g23->imag;
364 
365  is_ident12 = ( gamma12 == one && sigma12 == zero );
366  is_ident23 = ( gamma23 == one && sigma23 == zero );
367 
368  m_app = m_A;
369 
370  if ( !is_ident12 && is_ident23 )
371  {
372  // Apply only to columns 1 and 2.
373 
374  MAC_Apply_G_mx2_asd( m_app,
375  &gamma12,
376  &sigma12,
377  a1, 1,
378  a2, 1 );
379  }
380  else if ( is_ident12 && !is_ident23 )
381  {
382  // Apply only to columns 2 and 3.
383 
384  MAC_Apply_G_mx2_asd( m_app,
385  &gamma23,
386  &sigma23,
387  a2, 1,
388  a3, 1 );
389  }
390  else if ( !is_ident12 && !is_ident23 )
391  {
392  // Apply to all three columns.
393 
394  MAC_Apply_G_mx3b_asd( m_app,
395  &gamma12,
396  &sigma12,
397  &gamma23,
398  &sigma23,
399  a1, 1,
400  a2, 1,
401  a3, 1 );
402  }
403  }
404  //for ( k = 0; k < nG_app_left; k += 1, g -= 1 )
405  if ( n_left == 1 )
406  {
407  g23 = buff_G + (g )*rs_G + (k )*cs_G;
408  a2 = buff_A + (g )*cs_A;
409  a3 = buff_A + (g + 1)*cs_A;
410 
411  gamma23 = g23->real;
412  sigma23 = g23->imag;
413 
414  is_ident23 = ( gamma23 == one && sigma23 == zero );
415 
416  m_app = m_A;
417 
418  if ( !is_ident23 )
419  MAC_Apply_G_mx2_asd( m_app,
420  &gamma23,
421  &sigma23,
422  a2, 1,
423  a3, 1 );
424  }
425  }
426 
427  return FLA_SUCCESS;
428 }

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_asd_var1(), i, dcomplex::imag, n_left, and dcomplex::real.

Referenced by FLA_Apply_G_rf_asm_var6b(), and FLA_Apply_G_rf_bld_var6b().

◆ FLA_Apply_G_rf_asd_var7()

FLA_Error FLA_Apply_G_rf_asd_var7 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_asd_var8()

FLA_Error FLA_Apply_G_rf_asd_var8 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_asd_var8b()

FLA_Error FLA_Apply_G_rf_asd_var8b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_asd_var9()

FLA_Error FLA_Apply_G_rf_asd_var9 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A 
)
385 {
386  double one = bl1_d1();
387  double zero = bl1_d0();
388  double gamma12;
389  double sigma12;
390  double gamma23;
391  double sigma23;
392  double* a1;
393  double* a2;
394  double* a3;
395  dcomplex* g12;
396  dcomplex* g23;
397  int i, j, g, k;
398  int nG, nG_app;
399  int n_iter;
400  int n_left;
401  int k_minus_1;
402  int n_fuse;
403  int is_ident12, is_ident23;
404 
405  k_minus_1 = k_G - 1;
406  nG = n_A - 1;
407  n_fuse = 2;
408 
409  // Use the simple variant for nG < (k - 1) or k == 1.
410  if ( nG < 2*k_minus_1 || k_G == 1 )
411  {
413  m_A,
414  n_A,
415  buff_G, rs_G, cs_G,
416  buff_A, rs_A, cs_A );
417  return FLA_SUCCESS;
418  }
419 
420 
421  // Start-up phase.
422 
423  for ( j = -1; j < k_minus_1; j += n_fuse )
424  {
425  nG_app = j + 1;
426  n_iter = nG_app;
427  n_left = 1;
428 
429  for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
430  {
431  g12 = buff_G + (g )*rs_G + (k )*cs_G;
432  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
433  a1 = buff_A + (g )*cs_A;
434  a2 = buff_A + (g + 1)*cs_A;
435  a3 = buff_A + (g + 2)*cs_A;
436 
437  gamma12 = g12->real;
438  sigma12 = g12->imag;
439  gamma23 = g23->real;
440  sigma23 = g23->imag;
441 
442  is_ident12 = ( gamma12 == one && sigma12 == zero );
443  is_ident23 = ( gamma23 == one && sigma23 == zero );
444 
445  if ( !is_ident12 && is_ident23 )
446  {
447  // Apply only to columns 1 and 2.
448 
449  MAC_Apply_G_mx2_asd( m_A,
450  &gamma12,
451  &sigma12,
452  a1, 1,
453  a2, 1 );
454  }
455  else if ( is_ident12 && !is_ident23 )
456  {
457  // Apply only to columns 2 and 3.
458 
459  MAC_Apply_G_mx2_asd( m_A,
460  &gamma23,
461  &sigma23,
462  a2, 1,
463  a3, 1 );
464  }
465  else if ( !is_ident12 && !is_ident23 )
466  {
467  // Apply to all three columns.
468 
469  MAC_Apply_G_mx3_asd( m_A,
470  &gamma12,
471  &sigma12,
472  &gamma23,
473  &sigma23,
474  a1, 1,
475  a2, 1,
476  a3, 1 );
477  }
478  }
479 
480  if ( n_left == 1 )
481  {
482  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
483  a2 = buff_A + (g + 1)*cs_A;
484  a3 = buff_A + (g + 2)*cs_A;
485 
486  gamma23 = g23->real;
487  sigma23 = g23->imag;
488 
489  is_ident23 = ( gamma23 == one && sigma23 == zero );
490 
491  if ( !is_ident23 )
492  MAC_Apply_G_mx2_asd( m_A,
493  &gamma23,
494  &sigma23,
495  a2, 1,
496  a3, 1 );
497  }
498  }
499 
500  // Pipeline stage
501 
502  for ( ; j < nG - 1; j += n_fuse )
503  {
504  nG_app = k_G;
505  n_iter = nG_app;
506  n_left = 0;
507 
508  for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
509  {
510  g12 = buff_G + (g )*rs_G + (k )*cs_G;
511  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
512  a1 = buff_A + (g )*cs_A;
513  a2 = buff_A + (g + 1)*cs_A;
514  a3 = buff_A + (g + 2)*cs_A;
515 
516  gamma12 = g12->real;
517  sigma12 = g12->imag;
518  gamma23 = g23->real;
519  sigma23 = g23->imag;
520 
521  is_ident12 = ( gamma12 == one && sigma12 == zero );
522  is_ident23 = ( gamma23 == one && sigma23 == zero );
523 
524  if ( !is_ident12 && is_ident23 )
525  {
526  // Apply only to columns 1 and 2.
527 
528  MAC_Apply_G_mx2_asd( m_A,
529  &gamma12,
530  &sigma12,
531  a1, 1,
532  a2, 1 );
533  }
534  else if ( is_ident12 && !is_ident23 )
535  {
536  // Apply only to columns 2 and 3.
537 
538  MAC_Apply_G_mx2_asd( m_A,
539  &gamma23,
540  &sigma23,
541  a2, 1,
542  a3, 1 );
543  }
544  else if ( !is_ident12 && !is_ident23 )
545  {
546  // Apply to all three columns.
547 
548  MAC_Apply_G_mx3_asd( m_A,
549  &gamma12,
550  &sigma12,
551  &gamma23,
552  &sigma23,
553  a1, 1,
554  a2, 1,
555  a3, 1 );
556  }
557  }
558  }
559 
560  // Shutdown stage
561 
562  for ( j = nG % n_fuse; j < k_G; j += n_fuse )
563  {
564  g = nG - 1;
565  k = j;
566 
567  n_left = 1;
568  if ( n_left == 1 )
569  {
570  g12 = buff_G + (g )*rs_G + (k )*cs_G;
571  a1 = buff_A + (g )*cs_A;
572  a2 = buff_A + (g + 1)*cs_A;
573 
574  gamma12 = g12->real;
575  sigma12 = g12->imag;
576 
577  is_ident12 = ( gamma12 == one && sigma12 == zero );
578 
579  if ( !is_ident12 )
580  MAC_Apply_G_mx2_asd( m_A,
581  &gamma12,
582  &sigma12,
583  a1, 1,
584  a2, 1 );
585  ++k;
586  --g;
587  }
588 
589  nG_app = k_minus_1 - j;
590  n_iter = nG_app;
591 
592  for ( i = 0; i < n_iter; ++i, ++k, --g )
593  {
594  g12 = buff_G + (g )*rs_G + (k )*cs_G;
595  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
596  a1 = buff_A + (g )*cs_A;
597  a2 = buff_A + (g + 1)*cs_A;
598  a3 = buff_A + (g + 2)*cs_A;
599 
600  gamma12 = g12->real;
601  sigma12 = g12->imag;
602  gamma23 = g23->real;
603  sigma23 = g23->imag;
604 
605  is_ident12 = ( gamma12 == one && sigma12 == zero );
606  is_ident23 = ( gamma23 == one && sigma23 == zero );
607 
608  if ( !is_ident12 && is_ident23 )
609  {
610  // Apply only to columns 1 and 2.
611 
612  MAC_Apply_G_mx2_asd( m_A,
613  &gamma12,
614  &sigma12,
615  a1, 1,
616  a2, 1 );
617  }
618  else if ( is_ident12 && !is_ident23 )
619  {
620  // Apply only to columns 2 and 3.
621 
622  MAC_Apply_G_mx2_asd( m_A,
623  &gamma23,
624  &sigma23,
625  a2, 1,
626  a3, 1 );
627  }
628  else if ( !is_ident12 && !is_ident23 )
629  {
630  // Apply to all three columns.
631 
632  MAC_Apply_G_mx3_asd( m_A,
633  &gamma12,
634  &sigma12,
635  &gamma23,
636  &sigma23,
637  a1, 1,
638  a2, 1,
639  a3, 1 );
640  }
641  }
642  }
643 
644  return FLA_SUCCESS;
645 }

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_asd_var1(), i, dcomplex::imag, n_left, and dcomplex::real.

Referenced by FLA_Apply_G_rf_asm_var9(), and FLA_Apply_G_rf_bld_var9().

◆ FLA_Apply_G_rf_asd_var9b()

FLA_Error FLA_Apply_G_rf_asd_var9b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A 
)
138 {
139  double one = bl1_d1();
140  double zero = bl1_d0();
141  double gamma12;
142  double sigma12;
143  double gamma23;
144  double sigma23;
145  double* a1;
146  double* a2;
147  double* a3;
148  dcomplex* g12;
149  dcomplex* g23;
150  int i, j, g, k;
151  int nG, nG_app;
152  int n_iter;
153  int n_left;
154  int k_minus_1;
155  int n_fuse;
156  int is_ident12, is_ident23;
157  int m_app;
158 
159 
160  k_minus_1 = k_G - 1;
161  nG = n_A - 1;
162  n_fuse = 2;
163 
164  // Use the simple variant for nG < (k - 1) or k == 1.
165  if ( nG < 2*k_minus_1 || k_G == 1 )
166  {
168  m_A,
169  n_A,
170  buff_G, rs_G, cs_G,
171  buff_A, rs_A, cs_A );
172  return FLA_SUCCESS;
173  }
174 
175 
176  // Start-up phase.
177 
178  for ( j = -1; j < k_minus_1; j += n_fuse )
179  {
180  nG_app = j + 1;
181  n_iter = nG_app;
182  n_left = 1;
183 
184  for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
185  {
186  g12 = buff_G + (g )*rs_G + (k )*cs_G;
187  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
188  a1 = buff_A + (g )*cs_A;
189  a2 = buff_A + (g + 1)*cs_A;
190  a3 = buff_A + (g + 2)*cs_A;
191 
192  gamma12 = g12->real;
193  sigma12 = g12->imag;
194  gamma23 = g23->real;
195  sigma23 = g23->imag;
196 
197  is_ident12 = ( gamma12 == one && sigma12 == zero );
198  is_ident23 = ( gamma23 == one && sigma23 == zero );
199 
200  m_app = min( i_k + 3 + j - iTL, m_A );
201  m_app = max( m_app, 0 );
202 
203  if ( !is_ident12 && is_ident23 )
204  {
205  // Apply only to columns 1 and 2.
206 
207  MAC_Apply_G_mx2_asd( m_app,
208  &gamma12,
209  &sigma12,
210  a1, 1,
211  a2, 1 );
212  }
213  else if ( is_ident12 && !is_ident23 )
214  {
215  // Apply only to columns 2 and 3.
216 
217  MAC_Apply_G_mx2_asd( m_app,
218  &gamma23,
219  &sigma23,
220  a2, 1,
221  a3, 1 );
222  }
223  else if ( !is_ident12 && !is_ident23 )
224  {
225  // Apply to all three columns.
226 
227  MAC_Apply_G_mx3_asd( m_app,
228  &gamma12,
229  &sigma12,
230  &gamma23,
231  &sigma23,
232  a1, 1,
233  a2, 1,
234  a3, 1 );
235  }
236  }
237 
238  if ( n_left == 1 )
239  {
240  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
241  a2 = buff_A + (g + 1)*cs_A;
242  a3 = buff_A + (g + 2)*cs_A;
243 
244  gamma23 = g23->real;
245  sigma23 = g23->imag;
246 
247  is_ident23 = ( gamma23 == one && sigma23 == zero );
248 
249  m_app = min( i_k + 3 + j - iTL, m_A );
250  m_app = max( m_app, 0 );
251 
252  if ( !is_ident23 )
253  MAC_Apply_G_mx2_asd( m_app,
254  &gamma23,
255  &sigma23,
256  a2, 1,
257  a3, 1 );
258  }
259  }
260 
261  // Pipeline stage
262 
263  for ( ; j < nG - 1; j += n_fuse )
264  {
265  nG_app = k_G;
266  n_iter = nG_app;
267  n_left = 0;
268 
269  for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
270  {
271  g12 = buff_G + (g )*rs_G + (k )*cs_G;
272  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
273  a1 = buff_A + (g )*cs_A;
274  a2 = buff_A + (g + 1)*cs_A;
275  a3 = buff_A + (g + 2)*cs_A;
276 
277  gamma12 = g12->real;
278  sigma12 = g12->imag;
279  gamma23 = g23->real;
280  sigma23 = g23->imag;
281 
282  is_ident12 = ( gamma12 == one && sigma12 == zero );
283  is_ident23 = ( gamma23 == one && sigma23 == zero );
284 
285  m_app = min( i_k + 3 + j - iTL, m_A );
286  m_app = max( m_app, 0 );
287 
288  if ( !is_ident12 && is_ident23 )
289  {
290  // Apply only to columns 1 and 2.
291 
292  MAC_Apply_G_mx2_asd( m_app,
293  &gamma12,
294  &sigma12,
295  a1, 1,
296  a2, 1 );
297  }
298  else if ( is_ident12 && !is_ident23 )
299  {
300  // Apply only to columns 2 and 3.
301 
302  MAC_Apply_G_mx2_asd( m_app,
303  &gamma23,
304  &sigma23,
305  a2, 1,
306  a3, 1 );
307  }
308  else if ( !is_ident12 && !is_ident23 )
309  {
310  // Apply to all three columns.
311 
312  MAC_Apply_G_mx3_asd( m_app,
313  &gamma12,
314  &sigma12,
315  &gamma23,
316  &sigma23,
317  a1, 1,
318  a2, 1,
319  a3, 1 );
320  }
321  }
322  }
323 
324  // Shutdown stage
325 
326  for ( j = nG % n_fuse; j < k_G; j += n_fuse )
327  {
328  g = nG - 1;
329  k = j;
330 
331  n_left = 1;
332  if ( n_left == 1 )
333  {
334  g12 = buff_G + (g )*rs_G + (k )*cs_G;
335  a1 = buff_A + (g )*cs_A;
336  a2 = buff_A + (g + 1)*cs_A;
337 
338  gamma12 = g12->real;
339  sigma12 = g12->imag;
340 
341  is_ident12 = ( gamma12 == one && sigma12 == zero );
342 
343  m_app = m_A;
344 
345  if ( !is_ident12 )
346  MAC_Apply_G_mx2_asd( m_app,
347  &gamma12,
348  &sigma12,
349  a1, 1,
350  a2, 1 );
351  ++k;
352  --g;
353  }
354 
355  nG_app = k_minus_1 - j;
356  n_iter = nG_app;
357 
358  for ( i = 0; i < n_iter; ++i, ++k, --g )
359  {
360  g12 = buff_G + (g )*rs_G + (k )*cs_G;
361  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
362  a1 = buff_A + (g )*cs_A;
363  a2 = buff_A + (g + 1)*cs_A;
364  a3 = buff_A + (g + 2)*cs_A;
365 
366  gamma12 = g12->real;
367  sigma12 = g12->imag;
368  gamma23 = g23->real;
369  sigma23 = g23->imag;
370 
371  is_ident12 = ( gamma12 == one && sigma12 == zero );
372  is_ident23 = ( gamma23 == one && sigma23 == zero );
373 
374  m_app = m_A;
375 
376  if ( !is_ident12 && is_ident23 )
377  {
378  // Apply only to columns 1 and 2.
379 
380  MAC_Apply_G_mx2_asd( m_app,
381  &gamma12,
382  &sigma12,
383  a1, 1,
384  a2, 1 );
385  }
386  else if ( is_ident12 && !is_ident23 )
387  {
388  // Apply only to columns 2 and 3.
389 
390  MAC_Apply_G_mx2_asd( m_app,
391  &gamma23,
392  &sigma23,
393  a2, 1,
394  a3, 1 );
395  }
396  else if ( !is_ident12 && !is_ident23 )
397  {
398  // Apply to all three columns.
399 
400  MAC_Apply_G_mx3_asd( m_app,
401  &gamma12,
402  &sigma12,
403  &gamma23,
404  &sigma23,
405  a1, 1,
406  a2, 1,
407  a3, 1 );
408  }
409  }
410  }
411 
412  return FLA_SUCCESS;
413 }

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_asd_var1(), i, dcomplex::imag, n_left, and dcomplex::real.

Referenced by FLA_Apply_G_rf_asm_var9b(), and FLA_Apply_G_rf_bld_var9b().

◆ FLA_Apply_G_rf_asm_var1()

FLA_Error FLA_Apply_G_rf_asm_var1 ( FLA_Obj  G,
FLA_Obj  A 
)
32 {
33  FLA_Datatype datatype;
34  int k_G, m_A, n_A;
35  int rs_G, cs_G;
36  int rs_A, cs_A;
37 
38  datatype = FLA_Obj_datatype( A );
39 
40  k_G = FLA_Obj_width( G );
41  m_A = FLA_Obj_length( A );
42  n_A = FLA_Obj_width( A );
43 
44  rs_G = FLA_Obj_row_stride( G );
45  cs_G = FLA_Obj_col_stride( G );
46 
47  rs_A = FLA_Obj_row_stride( A );
48  cs_A = FLA_Obj_col_stride( A );
49 
50  switch ( datatype )
51  {
52  case FLA_FLOAT:
53  {
54  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
55  float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
56 
58  m_A,
59  n_A,
60  buff_G, rs_G, cs_G,
61  buff_A, rs_A, cs_A );
62 
63  break;
64  }
65 
66  case FLA_DOUBLE:
67  {
68  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
69  double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
70 
72  m_A,
73  n_A,
74  buff_G, rs_G, cs_G,
75  buff_A, rs_A, cs_A );
76 
77  break;
78  }
79 
80  case FLA_COMPLEX:
81  {
82  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
83  scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
84 
86  m_A,
87  n_A,
88  buff_G, rs_G, cs_G,
89  buff_A, rs_A, cs_A );
90 
91  break;
92  }
93 
94  case FLA_DOUBLE_COMPLEX:
95  {
96  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
97  dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
98 
100  m_A,
101  n_A,
102  buff_G, rs_G, cs_G,
103  buff_A, rs_A, cs_A );
104 
105  break;
106  }
107  }
108 
109  return FLA_SUCCESS;
110 }
FLA_Error FLA_Apply_G_rf_ass_var1(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var1.c:113
FLA_Error FLA_Apply_G_rf_asz_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var1.c:267
FLA_Error FLA_Apply_G_rf_asc_var1(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var1.c:215
FLA_Error FLA_Apply_G_rf_asd_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var1.c:164
dim_t FLA_Obj_width(FLA_Obj obj)
Definition: FLA_Query.c:123
dim_t FLA_Obj_row_stride(FLA_Obj obj)
Definition: FLA_Query.c:167
dim_t FLA_Obj_length(FLA_Obj obj)
Definition: FLA_Query.c:116
dim_t FLA_Obj_col_stride(FLA_Obj obj)
Definition: FLA_Query.c:174
FLA_Datatype FLA_Obj_datatype(FLA_Obj obj)
Definition: FLA_Query.c:13
int FLA_Datatype
Definition: FLA_type_defs.h:49

References FLA_Apply_G_rf_asc_var1(), FLA_Apply_G_rf_asd_var1(), FLA_Apply_G_rf_ass_var1(), FLA_Apply_G_rf_asz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

◆ FLA_Apply_G_rf_asm_var2()

FLA_Error FLA_Apply_G_rf_asm_var2 ( FLA_Obj  G,
FLA_Obj  A 
)
32 {
33  FLA_Datatype datatype;
34  int k_G, m_A, n_A;
35  int rs_G, cs_G;
36  int rs_A, cs_A;
37 
38  datatype = FLA_Obj_datatype( A );
39 
40  k_G = FLA_Obj_width( G );
41  m_A = FLA_Obj_length( A );
42  n_A = FLA_Obj_width( A );
43 
44  rs_G = FLA_Obj_row_stride( G );
45  cs_G = FLA_Obj_col_stride( G );
46 
47  rs_A = FLA_Obj_row_stride( A );
48  cs_A = FLA_Obj_col_stride( A );
49 
50  switch ( datatype )
51  {
52  case FLA_FLOAT:
53  {
54  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
55  float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
56 
58  m_A,
59  n_A,
60  buff_G, rs_G, cs_G,
61  buff_A, rs_A, cs_A );
62 
63  break;
64  }
65 
66  case FLA_DOUBLE:
67  {
68  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
69  double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
70 
72  m_A,
73  n_A,
74  buff_G, rs_G, cs_G,
75  buff_A, rs_A, cs_A );
76 
77  break;
78  }
79 
80  case FLA_COMPLEX:
81  {
82  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
83  scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
84 
86  m_A,
87  n_A,
88  buff_G, rs_G, cs_G,
89  buff_A, rs_A, cs_A );
90 
91  break;
92  }
93 
94  case FLA_DOUBLE_COMPLEX:
95  {
96  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
97  dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
98 
100  m_A,
101  n_A,
102  buff_G, rs_G, cs_G,
103  buff_A, rs_A, cs_A );
104 
105  break;
106  }
107  }
108 
109  return FLA_SUCCESS;
110 }
FLA_Error FLA_Apply_G_rf_asc_var2(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var2.c:339
FLA_Error FLA_Apply_G_rf_asd_var2(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var2.c:226
FLA_Error FLA_Apply_G_rf_ass_var2(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var2.c:113
FLA_Error FLA_Apply_G_rf_asz_var2(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var2.c:452

References FLA_Apply_G_rf_asc_var2(), FLA_Apply_G_rf_asd_var2(), FLA_Apply_G_rf_ass_var2(), FLA_Apply_G_rf_asz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

◆ FLA_Apply_G_rf_asm_var3()

FLA_Error FLA_Apply_G_rf_asm_var3 ( FLA_Obj  G,
FLA_Obj  A 
)
32 {
33  FLA_Datatype datatype;
34  int k_G, m_A, n_A;
35  int rs_G, cs_G;
36  int rs_A, cs_A;
37 
38  datatype = FLA_Obj_datatype( A );
39 
40  k_G = FLA_Obj_width( G );
41  m_A = FLA_Obj_length( A );
42  n_A = FLA_Obj_width( A );
43 
44  rs_G = FLA_Obj_row_stride( G );
45  cs_G = FLA_Obj_col_stride( G );
46 
47  rs_A = FLA_Obj_row_stride( A );
48  cs_A = FLA_Obj_col_stride( A );
49 
50  switch ( datatype )
51  {
52  case FLA_FLOAT:
53  {
54  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
55  float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
56 
58  m_A,
59  n_A,
60  buff_G, rs_G, cs_G,
61  buff_A, rs_A, cs_A );
62 
63  break;
64  }
65 
66  case FLA_DOUBLE:
67  {
68  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
69  double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
70 
72  m_A,
73  n_A,
74  buff_G, rs_G, cs_G,
75  buff_A, rs_A, cs_A );
76 
77  break;
78  }
79 
80  case FLA_COMPLEX:
81  {
82  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
83  scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
84 
86  m_A,
87  n_A,
88  buff_G, rs_G, cs_G,
89  buff_A, rs_A, cs_A );
90 
91  break;
92  }
93 
94  case FLA_DOUBLE_COMPLEX:
95  {
96  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
97  dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
98 
100  m_A,
101  n_A,
102  buff_G, rs_G, cs_G,
103  buff_A, rs_A, cs_A );
104 
105  break;
106  }
107  }
108 
109  return FLA_SUCCESS;
110 }
FLA_Error FLA_Apply_G_rf_ass_var3(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var3.c:113
FLA_Error FLA_Apply_G_rf_asz_var3(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var3.c:1473
FLA_Error FLA_Apply_G_rf_asd_var3(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var3.c:566
FLA_Error FLA_Apply_G_rf_asc_var3(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var3.c:1019

References FLA_Apply_G_rf_asc_var3(), FLA_Apply_G_rf_asd_var3(), FLA_Apply_G_rf_ass_var3(), FLA_Apply_G_rf_asz_var3(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

◆ FLA_Apply_G_rf_asm_var3b()

FLA_Error FLA_Apply_G_rf_asm_var3b ( FLA_Obj  G,
FLA_Obj  A 
)
29 {
30  FLA_Datatype datatype;
31  int k_G, m_A, n_A;
32  int rs_G, cs_G;
33  int rs_A, cs_A;
34 
35  datatype = FLA_Obj_datatype( A );
36 
37  k_G = FLA_Obj_width( G );
38  m_A = FLA_Obj_length( A );
39  n_A = FLA_Obj_width( A );
40 
41  rs_G = FLA_Obj_row_stride( G );
42  cs_G = FLA_Obj_col_stride( G );
43 
44  rs_A = FLA_Obj_row_stride( A );
45  cs_A = FLA_Obj_col_stride( A );
46 
47  switch ( datatype )
48  {
49  case FLA_FLOAT:
50  {
51  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
52  float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
53 
55  m_A,
56  n_A,
57  0,
58  0,
59  buff_G, rs_G, cs_G,
60  buff_A, rs_A, cs_A );
61 
62  break;
63  }
64 
65  case FLA_DOUBLE:
66  {
67  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
68  double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
69 
71  m_A,
72  n_A,
73  0,
74  0,
75  buff_G, rs_G, cs_G,
76  buff_A, rs_A, cs_A );
77 
78  break;
79  }
80 
81  case FLA_COMPLEX:
82  {
83  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
84  scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
85 
87  m_A,
88  n_A,
89  0,
90  0,
91  buff_G, rs_G, cs_G,
92  buff_A, rs_A, cs_A );
93 
94  break;
95  }
96 
97  case FLA_DOUBLE_COMPLEX:
98  {
99  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
100  dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
101 
103  m_A,
104  n_A,
105  0,
106  0,
107  buff_G, rs_G, cs_G,
108  buff_A, rs_A, cs_A );
109 
110  break;
111  }
112  }
113 
114  return FLA_SUCCESS;
115 }
FLA_Error FLA_Apply_G_rf_asd_var3b(int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var3b.c:131
FLA_Error FLA_Apply_G_rf_asz_var3b(int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var3b.c:620
FLA_Error FLA_Apply_G_rf_asc_var3b(int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var3b.c:607
FLA_Error FLA_Apply_G_rf_ass_var3b(int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var3b.c:118

References FLA_Apply_G_rf_asc_var3b(), FLA_Apply_G_rf_asd_var3b(), FLA_Apply_G_rf_ass_var3b(), FLA_Apply_G_rf_asz_var3b(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

◆ FLA_Apply_G_rf_asm_var4()

FLA_Error FLA_Apply_G_rf_asm_var4 ( FLA_Obj  G,
FLA_Obj  A 
)

◆ FLA_Apply_G_rf_asm_var5()

FLA_Error FLA_Apply_G_rf_asm_var5 ( FLA_Obj  G,
FLA_Obj  A 
)

◆ FLA_Apply_G_rf_asm_var5b()

FLA_Error FLA_Apply_G_rf_asm_var5b ( FLA_Obj  G,
FLA_Obj  A 
)

◆ FLA_Apply_G_rf_asm_var6()

FLA_Error FLA_Apply_G_rf_asm_var6 ( FLA_Obj  G,
FLA_Obj  A 
)
32 {
33  FLA_Datatype datatype;
34  int k_G, m_A, n_A;
35  int rs_G, cs_G;
36  int rs_A, cs_A;
37 
38  datatype = FLA_Obj_datatype( A );
39 
40  k_G = FLA_Obj_width( G );
41  m_A = FLA_Obj_length( A );
42  n_A = FLA_Obj_width( A );
43 
44  rs_G = FLA_Obj_row_stride( G );
45  cs_G = FLA_Obj_col_stride( G );
46 
47  rs_A = FLA_Obj_row_stride( A );
48  cs_A = FLA_Obj_col_stride( A );
49 
50  switch ( datatype )
51  {
52  case FLA_FLOAT:
53  {
54  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
55  float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
56 
58  m_A,
59  n_A,
60  buff_G, rs_G, cs_G,
61  buff_A, rs_A, cs_A );
62 
63  break;
64  }
65 
66  case FLA_DOUBLE:
67  {
68  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
69  double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
70 
72  m_A,
73  n_A,
74  buff_G, rs_G, cs_G,
75  buff_A, rs_A, cs_A );
76 
77  break;
78  }
79 
80  case FLA_COMPLEX:
81  {
82  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
83  scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
84 
86  m_A,
87  n_A,
88  buff_G, rs_G, cs_G,
89  buff_A, rs_A, cs_A );
90 
91  break;
92  }
93 
94  case FLA_DOUBLE_COMPLEX:
95  {
96  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
97  dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
98 
100  m_A,
101  n_A,
102  buff_G, rs_G, cs_G,
103  buff_A, rs_A, cs_A );
104 
105  break;
106  }
107  }
108 
109  return FLA_SUCCESS;
110 }
FLA_Error FLA_Apply_G_rf_asc_var6(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var6.c:675
FLA_Error FLA_Apply_G_rf_asd_var6(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var6.c:394
FLA_Error FLA_Apply_G_rf_asz_var6(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var6.c:956
FLA_Error FLA_Apply_G_rf_ass_var6(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var6.c:113

References FLA_Apply_G_rf_asc_var6(), FLA_Apply_G_rf_asd_var6(), FLA_Apply_G_rf_ass_var6(), FLA_Apply_G_rf_asz_var6(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

◆ FLA_Apply_G_rf_asm_var6b()

FLA_Error FLA_Apply_G_rf_asm_var6b ( FLA_Obj  G,
FLA_Obj  A 
)
29 {
30  FLA_Datatype datatype;
31  int k_G, m_A, n_A;
32  int rs_G, cs_G;
33  int rs_A, cs_A;
34 
35  datatype = FLA_Obj_datatype( A );
36 
37  k_G = FLA_Obj_width( G );
38  m_A = FLA_Obj_length( A );
39  n_A = FLA_Obj_width( A );
40 
41  rs_G = FLA_Obj_row_stride( G );
42  cs_G = FLA_Obj_col_stride( G );
43 
44  rs_A = FLA_Obj_row_stride( A );
45  cs_A = FLA_Obj_col_stride( A );
46 
47  switch ( datatype )
48  {
49  case FLA_FLOAT:
50  {
51  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
52  float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
53 
55  m_A,
56  n_A,
57  0,
58  0,
59  buff_G, rs_G, cs_G,
60  buff_A, rs_A, cs_A );
61 
62  break;
63  }
64 
65  case FLA_DOUBLE:
66  {
67  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
68  double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
69 
71  m_A,
72  n_A,
73  0,
74  0,
75  buff_G, rs_G, cs_G,
76  buff_A, rs_A, cs_A );
77 
78  break;
79  }
80 
81  case FLA_COMPLEX:
82  {
83  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
84  scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
85 
87  m_A,
88  n_A,
89  0,
90  0,
91  buff_G, rs_G, cs_G,
92  buff_A, rs_A, cs_A );
93 
94  break;
95  }
96 
97  case FLA_DOUBLE_COMPLEX:
98  {
99  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
100  dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
101 
103  m_A,
104  n_A,
105  0,
106  0,
107  buff_G, rs_G, cs_G,
108  buff_A, rs_A, cs_A );
109 
110  break;
111  }
112  }
113 
114  return FLA_SUCCESS;
115 }
FLA_Error FLA_Apply_G_rf_asd_var6b(int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var6b.c:131
FLA_Error FLA_Apply_G_rf_ass_var6b(int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var6b.c:118
FLA_Error FLA_Apply_G_rf_asc_var6b(int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var6b.c:430
FLA_Error FLA_Apply_G_rf_asz_var6b(int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var6b.c:443

References FLA_Apply_G_rf_asc_var6b(), FLA_Apply_G_rf_asd_var6b(), FLA_Apply_G_rf_ass_var6b(), FLA_Apply_G_rf_asz_var6b(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

◆ FLA_Apply_G_rf_asm_var7()

FLA_Error FLA_Apply_G_rf_asm_var7 ( FLA_Obj  G,
FLA_Obj  A 
)

◆ FLA_Apply_G_rf_asm_var8()

FLA_Error FLA_Apply_G_rf_asm_var8 ( FLA_Obj  G,
FLA_Obj  A 
)

◆ FLA_Apply_G_rf_asm_var8b()

FLA_Error FLA_Apply_G_rf_asm_var8b ( FLA_Obj  G,
FLA_Obj  A 
)

◆ FLA_Apply_G_rf_asm_var9()

FLA_Error FLA_Apply_G_rf_asm_var9 ( FLA_Obj  G,
FLA_Obj  A 
)
32 {
33  FLA_Datatype datatype;
34  int k_G, m_A, n_A;
35  int rs_G, cs_G;
36  int rs_A, cs_A;
37 
38  datatype = FLA_Obj_datatype( A );
39 
40  k_G = FLA_Obj_width( G );
41  m_A = FLA_Obj_length( A );
42  n_A = FLA_Obj_width( A );
43 
44  rs_G = FLA_Obj_row_stride( G );
45  cs_G = FLA_Obj_col_stride( G );
46 
47  rs_A = FLA_Obj_row_stride( A );
48  cs_A = FLA_Obj_col_stride( A );
49 
50  switch ( datatype )
51  {
52  case FLA_FLOAT:
53  {
54  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
55  float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
56 
58  m_A,
59  n_A,
60  buff_G, rs_G, cs_G,
61  buff_A, rs_A, cs_A );
62 
63  break;
64  }
65 
66  case FLA_DOUBLE:
67  {
68  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
69  double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
70 
72  m_A,
73  n_A,
74  buff_G, rs_G, cs_G,
75  buff_A, rs_A, cs_A );
76 
77  break;
78  }
79 
80  case FLA_COMPLEX:
81  {
82  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
83  scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
84 
86  m_A,
87  n_A,
88  buff_G, rs_G, cs_G,
89  buff_A, rs_A, cs_A );
90 
91  break;
92  }
93 
94  case FLA_DOUBLE_COMPLEX:
95  {
96  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
97  dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
98 
100  m_A,
101  n_A,
102  buff_G, rs_G, cs_G,
103  buff_A, rs_A, cs_A );
104 
105  break;
106  }
107  }
108 
109  return FLA_SUCCESS;
110 }
FLA_Error FLA_Apply_G_rf_asc_var9(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var9.c:647
FLA_Error FLA_Apply_G_rf_asd_var9(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var9.c:380
FLA_Error FLA_Apply_G_rf_asz_var9(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var9.c:914
FLA_Error FLA_Apply_G_rf_ass_var9(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var9.c:113

References FLA_Apply_G_rf_asc_var9(), FLA_Apply_G_rf_asd_var9(), FLA_Apply_G_rf_ass_var9(), FLA_Apply_G_rf_asz_var9(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

◆ FLA_Apply_G_rf_asm_var9b()

FLA_Error FLA_Apply_G_rf_asm_var9b ( FLA_Obj  G,
FLA_Obj  A 
)
29 {
30  FLA_Datatype datatype;
31  int k_G, m_A, n_A;
32  int rs_G, cs_G;
33  int rs_A, cs_A;
34 
35  datatype = FLA_Obj_datatype( A );
36 
37  k_G = FLA_Obj_width( G );
38  m_A = FLA_Obj_length( A );
39  n_A = FLA_Obj_width( A );
40 
41  rs_G = FLA_Obj_row_stride( G );
42  cs_G = FLA_Obj_col_stride( G );
43 
44  rs_A = FLA_Obj_row_stride( A );
45  cs_A = FLA_Obj_col_stride( A );
46 
47  switch ( datatype )
48  {
49  case FLA_FLOAT:
50  {
51  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
52  float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
53 
55  m_A,
56  n_A,
57  0,
58  0,
59  buff_G, rs_G, cs_G,
60  buff_A, rs_A, cs_A );
61 
62  break;
63  }
64 
65  case FLA_DOUBLE:
66  {
67  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
68  double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
69 
71  m_A,
72  n_A,
73  0,
74  0,
75  buff_G, rs_G, cs_G,
76  buff_A, rs_A, cs_A );
77 
78  break;
79  }
80 
81  case FLA_COMPLEX:
82  {
83  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
84  scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
85 
87  m_A,
88  n_A,
89  0,
90  0,
91  buff_G, rs_G, cs_G,
92  buff_A, rs_A, cs_A );
93 
94  break;
95  }
96 
97  case FLA_DOUBLE_COMPLEX:
98  {
99  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
100  dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
101 
103  m_A,
104  n_A,
105  0,
106  0,
107  buff_G, rs_G, cs_G,
108  buff_A, rs_A, cs_A );
109 
110  break;
111  }
112  }
113 
114  return FLA_SUCCESS;
115 }
FLA_Error FLA_Apply_G_rf_asc_var9b(int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var9b.c:415
FLA_Error FLA_Apply_G_rf_ass_var9b(int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var9b.c:118
FLA_Error FLA_Apply_G_rf_asd_var9b(int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var9b.c:131
FLA_Error FLA_Apply_G_rf_asz_var9b(int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var9b.c:428

References FLA_Apply_G_rf_asc_var9b(), FLA_Apply_G_rf_asd_var9b(), FLA_Apply_G_rf_ass_var9b(), FLA_Apply_G_rf_asz_var9b(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

◆ FLA_Apply_G_rf_ass_var1()

FLA_Error FLA_Apply_G_rf_ass_var1 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A 
)
118 {
119  float one = bl1_s1();
120  float zero = bl1_s0();
121  int nG_app = n_A - 1;
122  int l, j;
123  float gamma;
124  float sigma;
125  float* a1;
126  float* a2;
127  scomplex* g1;
128  scomplex* g11;
129 
130  g1 = buff_G;
131 
132  for ( l = 0; l < k_G; ++l )
133  {
134  a1 = buff_A;
135  a2 = buff_A + cs_A;
136  g11 = g1;
137 
138  for ( j = 0; j < nG_app; ++j )
139  {
140  gamma = g11->real;
141  sigma = g11->imag;
142 
143  // Skip the current iteration if the rotation is identity.
144  if ( gamma != one || sigma != zero )
145  {
146  MAC_Apply_G_mx2_ass( m_A,
147  &gamma,
148  &sigma,
149  a1, 1,
150  a2, 1 );
151  }
152 
153  a1 += cs_A;
154  a2 += cs_A;
155  g11 += rs_G;
156  }
157 
158  g1 += cs_G;
159  }
160 
161  return FLA_SUCCESS;
162 }

References bl1_s0(), bl1_s1(), scomplex::imag, and scomplex::real.

Referenced by FLA_Apply_G_rf_asm_var1(), FLA_Apply_G_rf_ass_var2(), FLA_Apply_G_rf_ass_var3(), FLA_Apply_G_rf_ass_var6(), FLA_Apply_G_rf_ass_var9(), and FLA_Apply_G_rf_bls_var1().

◆ FLA_Apply_G_rf_ass_var2()

FLA_Error FLA_Apply_G_rf_ass_var2 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A 
)
118 {
119  float one = bl1_s1();
120  float zero = bl1_s0();
121  float gamma;
122  float sigma;
123  float* a1;
124  float* a2;
125  scomplex* g11;
126  int j, g, k;
127  int nG, nG_app;
128  int k_minus_1;
129 
130  k_minus_1 = k_G - 1;
131  nG = n_A - 1;
132 
133  // Use the simple variant for nG < 2(k - 1).
134  if ( nG < k_minus_1 || k_G == 1 )
135  {
137  m_A,
138  n_A,
139  buff_G, rs_G, cs_G,
140  buff_A, rs_A, cs_A );
141  return FLA_SUCCESS;
142  }
143 
144 
145  // Start-up phase.
146 
147  for ( j = 0; j < k_minus_1; ++j )
148  {
149  nG_app = j + 1;
150 
151  for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g )
152  {
153  g11 = buff_G + (g )*rs_G + (k )*cs_G;
154  a1 = buff_A + (g )*cs_A;
155  a2 = buff_A + (g + 1)*cs_A;
156 
157  gamma = g11->real;
158  sigma = g11->imag;
159 
160  // Skip the current iteration if the rotation is identity.
161  if ( gamma == one && sigma == zero ) continue;
162 
163  MAC_Apply_G_mx2_ass( m_A,
164  &gamma,
165  &sigma,
166  a1, 1,
167  a2, 1 );
168  }
169  }
170 
171  // Pipeline stage
172 
173  for ( j = k_minus_1; j < nG; ++j )
174  {
175  nG_app = k_G;
176 
177  for ( k = 0, g = j; k < nG_app; ++k, --g )
178  {
179  g11 = buff_G + (g )*rs_G + (k )*cs_G;
180  a1 = buff_A + (g )*cs_A;
181  a2 = buff_A + (g + 1)*cs_A;
182 
183  gamma = g11->real;
184  sigma = g11->imag;
185 
186  // Skip the current iteration if the rotation is identity.
187  if ( gamma == one && sigma == zero ) continue;
188 
189  MAC_Apply_G_mx2_ass( m_A,
190  &gamma,
191  &sigma,
192  a1, 1,
193  a2, 1 );
194  }
195  }
196 
197  // Shutdown stage
198 
199  for ( j = nG - k_minus_1; j < nG; ++j )
200  {
201  nG_app = nG - j;
202 
203  for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g )
204  {
205  g11 = buff_G + (g )*rs_G + (k )*cs_G;
206  a1 = buff_A + (g )*cs_A;
207  a2 = buff_A + (g + 1)*cs_A;
208 
209  gamma = g11->real;
210  sigma = g11->imag;
211 
212  // Skip the current iteration if the rotation is identity.
213  if ( gamma == one && sigma == zero ) continue;
214 
215  MAC_Apply_G_mx2_ass( m_A,
216  &gamma,
217  &sigma,
218  a1, 1,
219  a2, 1 );
220  }
221  }
222 
223  return FLA_SUCCESS;
224 }
FLA_Error FLA_Apply_G_rf_ass_var1(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var1.c:113

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_ass_var1(), scomplex::imag, and scomplex::real.

Referenced by FLA_Apply_G_rf_asm_var2(), and FLA_Apply_G_rf_bls_var2().

◆ FLA_Apply_G_rf_ass_var3()

FLA_Error FLA_Apply_G_rf_ass_var3 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A 
)
118 {
119  float one = bl1_s1();
120  float zero = bl1_s0();
121  float gamma23_k1;
122  float sigma23_k1;
123  float gamma34_k1;
124  float sigma34_k1;
125  float gamma12_k2;
126  float sigma12_k2;
127  float gamma23_k2;
128  float sigma23_k2;
129  float* a1;
130  float* a2;
131  float* a3;
132  float* a4;
133  scomplex* g23_k1;
134  scomplex* g34_k1;
135  scomplex* g12_k2;
136  scomplex* g23_k2;
137  int i, j, g, k;
138  int nG, nG_app;
139  int n_iter;
140  int n_left;
141  int k_minus_1;
142  int n_fuse;
143  int k_fuse;
144  int is_ident23_k1, is_ident34_k1;
145  int is_ident12_k2, is_ident23_k2;
146  int has_ident;
147 
148  k_minus_1 = k_G - 1;
149  nG = n_A - 1;
150  n_fuse = 2;
151  k_fuse = 2;
152 
153  // Use the simple variant for nG < (k - 1) or k == 1.
154  if ( nG < 2*k_minus_1 || k_G == 1 )
155  {
157  m_A,
158  n_A,
159  buff_G, rs_G, cs_G,
160  buff_A, rs_A, cs_A );
161  return FLA_SUCCESS;
162  }
163 
164 
165  // Start-up phase.
166 
167  for ( j = -1; j < k_minus_1; j += n_fuse )
168  {
169  nG_app = j + 2;
170  n_iter = nG_app / k_fuse;
171  n_left = 1;
172 
173  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
174  {
175  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
176  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
177  g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
178  g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
179  a1 = buff_A + (g - 1)*cs_A;
180  a2 = buff_A + (g )*cs_A;
181  a3 = buff_A + (g + 1)*cs_A;
182  a4 = buff_A + (g + 2)*cs_A;
183 
184  gamma23_k1 = g23_k1->real;
185  sigma23_k1 = g23_k1->imag;
186  gamma34_k1 = g34_k1->real;
187  sigma34_k1 = g34_k1->imag;
188  gamma12_k2 = g12_k2->real;
189  sigma12_k2 = g12_k2->imag;
190  gamma23_k2 = g23_k2->real;
191  sigma23_k2 = g23_k2->imag;
192 
193  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
194  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
195  is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
196  is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
197  has_ident = ( is_ident23_k1 || is_ident34_k1 ||
198  is_ident12_k2 || is_ident23_k2 );
199 
200  if ( has_ident )
201  {
202  // Apply to pairs of columns as needed.
203 
204  if ( !is_ident23_k1 )
205  MAC_Apply_G_mx2_ass( m_A,
206  &gamma23_k1,
207  &sigma23_k1,
208  a2, 1,
209  a3, 1 );
210 
211  if ( !is_ident34_k1 )
212  MAC_Apply_G_mx2_ass( m_A,
213  &gamma34_k1,
214  &sigma34_k1,
215  a3, 1,
216  a4, 1 );
217 
218  if ( !is_ident12_k2 )
219  MAC_Apply_G_mx2_ass( m_A,
220  &gamma12_k2,
221  &sigma12_k2,
222  a1, 1,
223  a2, 1 );
224 
225  if ( !is_ident23_k2 )
226  MAC_Apply_G_mx2_ass( m_A,
227  &gamma23_k2,
228  &sigma23_k2,
229  a2, 1,
230  a3, 1 );
231  }
232  else
233  {
234  // Apply to all four columns.
235 
236  MAC_Apply_G_mx4s_ass( m_A,
237  &gamma23_k1,
238  &sigma23_k1,
239  &gamma34_k1,
240  &sigma34_k1,
241  &gamma12_k2,
242  &sigma12_k2,
243  &gamma23_k2,
244  &sigma23_k2,
245  a1, 1,
246  a2, 1,
247  a3, 1,
248  a4, 1 );
249  }
250  }
251 
252  if ( n_left == 1 )
253  {
254  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
255  a3 = buff_A + (g + 1)*cs_A;
256  a4 = buff_A + (g + 2)*cs_A;
257 
258  gamma34_k1 = g34_k1->real;
259  sigma34_k1 = g34_k1->imag;
260 
261  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
262 
263  if ( !is_ident34_k1 )
264  MAC_Apply_G_mx2_ass( m_A,
265  &gamma34_k1,
266  &sigma34_k1,
267  a3, 1,
268  a4, 1 );
269  }
270  }
271 
272  // Pipeline stage
273 
274  for ( ; j < nG - 1; j += n_fuse )
275  {
276  nG_app = k_G;
277  n_iter = nG_app / k_fuse;
278  n_left = nG_app % k_fuse;
279 
280  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
281  {
282  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
283  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
284  g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
285  g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
286  a1 = buff_A + (g - 1)*cs_A;
287  a2 = buff_A + (g )*cs_A;
288  a3 = buff_A + (g + 1)*cs_A;
289  a4 = buff_A + (g + 2)*cs_A;
290 
291  gamma23_k1 = g23_k1->real;
292  sigma23_k1 = g23_k1->imag;
293  gamma34_k1 = g34_k1->real;
294  sigma34_k1 = g34_k1->imag;
295  gamma12_k2 = g12_k2->real;
296  sigma12_k2 = g12_k2->imag;
297  gamma23_k2 = g23_k2->real;
298  sigma23_k2 = g23_k2->imag;
299 
300  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
301  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
302  is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
303  is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
304  has_ident = ( is_ident23_k1 || is_ident34_k1 ||
305  is_ident12_k2 || is_ident23_k2 );
306 
307  if ( has_ident )
308  {
309  // Apply to pairs of columns as needed.
310 
311  if ( !is_ident23_k1 )
312  MAC_Apply_G_mx2_ass( m_A,
313  &gamma23_k1,
314  &sigma23_k1,
315  a2, 1,
316  a3, 1 );
317 
318  if ( !is_ident34_k1 )
319  MAC_Apply_G_mx2_ass( m_A,
320  &gamma34_k1,
321  &sigma34_k1,
322  a3, 1,
323  a4, 1 );
324 
325  if ( !is_ident12_k2 )
326  MAC_Apply_G_mx2_ass( m_A,
327  &gamma12_k2,
328  &sigma12_k2,
329  a1, 1,
330  a2, 1 );
331 
332  if ( !is_ident23_k2 )
333  MAC_Apply_G_mx2_ass( m_A,
334  &gamma23_k2,
335  &sigma23_k2,
336  a2, 1,
337  a3, 1 );
338  }
339  else
340  {
341  // Apply to all four columns.
342 
343  MAC_Apply_G_mx4s_ass( m_A,
344  &gamma23_k1,
345  &sigma23_k1,
346  &gamma34_k1,
347  &sigma34_k1,
348  &gamma12_k2,
349  &sigma12_k2,
350  &gamma23_k2,
351  &sigma23_k2,
352  a1, 1,
353  a2, 1,
354  a3, 1,
355  a4, 1 );
356  }
357  }
358 
359  if ( n_left == 1 )
360  {
361  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
362  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
363  a2 = buff_A + (g )*cs_A;
364  a3 = buff_A + (g + 1)*cs_A;
365  a4 = buff_A + (g + 2)*cs_A;
366 
367  gamma23_k1 = g23_k1->real;
368  sigma23_k1 = g23_k1->imag;
369  gamma34_k1 = g34_k1->real;
370  sigma34_k1 = g34_k1->imag;
371 
372  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
373  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
374 
375  if ( !is_ident23_k1 && is_ident34_k1 )
376  {
377  MAC_Apply_G_mx2_ass( m_A,
378  &gamma23_k1,
379  &sigma23_k1,
380  a2, 1,
381  a3, 1 );
382  }
383  else if ( is_ident23_k1 && !is_ident34_k1 )
384  {
385  MAC_Apply_G_mx2_ass( m_A,
386  &gamma34_k1,
387  &sigma34_k1,
388  a3, 1,
389  a4, 1 );
390  }
391  else
392  {
393  MAC_Apply_G_mx3_ass( m_A,
394  &gamma23_k1,
395  &sigma23_k1,
396  &gamma34_k1,
397  &sigma34_k1,
398  a2, 1,
399  a3, 1,
400  a4, 1 );
401  }
402  }
403  }
404 
405  // Shutdown stage
406 
407  for ( j = nG % n_fuse; j < k_G; j += n_fuse )
408  {
409  g = nG - 1;
410  k = j;
411 
412  //n_left = 1;
413  //if ( n_left == 1 )
414  {
415  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
416  a2 = buff_A + (g )*cs_A;
417  a3 = buff_A + (g + 1)*cs_A;
418 
419  gamma23_k1 = g23_k1->real;
420  sigma23_k1 = g23_k1->imag;
421 
422  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
423 
424  if ( !is_ident23_k1 )
425  MAC_Apply_G_mx2_ass( m_A,
426  &gamma23_k1,
427  &sigma23_k1,
428  a2, 1,
429  a3, 1 );
430  ++k;
431  --g;
432  }
433 
434  nG_app = k_minus_1 - j;
435  n_iter = nG_app / k_fuse;
436  n_left = nG_app % k_fuse;
437 
438  for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
439  {
440  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
441  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
442  g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
443  g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
444  a1 = buff_A + (g - 1)*cs_A;
445  a2 = buff_A + (g )*cs_A;
446  a3 = buff_A + (g + 1)*cs_A;
447  a4 = buff_A + (g + 2)*cs_A;
448 
449  gamma23_k1 = g23_k1->real;
450  sigma23_k1 = g23_k1->imag;
451  gamma34_k1 = g34_k1->real;
452  sigma34_k1 = g34_k1->imag;
453  gamma12_k2 = g12_k2->real;
454  sigma12_k2 = g12_k2->imag;
455  gamma23_k2 = g23_k2->real;
456  sigma23_k2 = g23_k2->imag;
457 
458  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
459  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
460  is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
461  is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
462  has_ident = ( is_ident23_k1 || is_ident34_k1 ||
463  is_ident12_k2 || is_ident23_k2 );
464 
465  if ( has_ident )
466  {
467  // Apply to pairs of columns as needed.
468 
469  if ( !is_ident23_k1 )
470  MAC_Apply_G_mx2_ass( m_A,
471  &gamma23_k1,
472  &sigma23_k1,
473  a2, 1,
474  a3, 1 );
475 
476  if ( !is_ident34_k1 )
477  MAC_Apply_G_mx2_ass( m_A,
478  &gamma34_k1,
479  &sigma34_k1,
480  a3, 1,
481  a4, 1 );
482 
483  if ( !is_ident12_k2 )
484  MAC_Apply_G_mx2_ass( m_A,
485  &gamma12_k2,
486  &sigma12_k2,
487  a1, 1,
488  a2, 1 );
489 
490  if ( !is_ident23_k2 )
491  MAC_Apply_G_mx2_ass( m_A,
492  &gamma23_k2,
493  &sigma23_k2,
494  a2, 1,
495  a3, 1 );
496  }
497  else
498  {
499  // Apply to all four columns.
500 
501  MAC_Apply_G_mx4s_ass( m_A,
502  &gamma23_k1,
503  &sigma23_k1,
504  &gamma34_k1,
505  &sigma34_k1,
506  &gamma12_k2,
507  &sigma12_k2,
508  &gamma23_k2,
509  &sigma23_k2,
510  a1, 1,
511  a2, 1,
512  a3, 1,
513  a4, 1 );
514  }
515  }
516 
517  if ( n_left == 1 )
518  {
519  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
520  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
521  a2 = buff_A + (g )*cs_A;
522  a3 = buff_A + (g + 1)*cs_A;
523  a4 = buff_A + (g + 2)*cs_A;
524 
525  gamma23_k1 = g23_k1->real;
526  sigma23_k1 = g23_k1->imag;
527  gamma34_k1 = g34_k1->real;
528  sigma34_k1 = g34_k1->imag;
529 
530  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
531  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
532 
533  if ( !is_ident23_k1 && is_ident34_k1 )
534  {
535  MAC_Apply_G_mx2_ass( m_A,
536  &gamma23_k1,
537  &sigma23_k1,
538  a2, 1,
539  a3, 1 );
540  }
541  else if ( is_ident23_k1 && !is_ident34_k1 )
542  {
543  MAC_Apply_G_mx2_ass( m_A,
544  &gamma34_k1,
545  &sigma34_k1,
546  a3, 1,
547  a4, 1 );
548  }
549  else
550  {
551  MAC_Apply_G_mx3_ass( m_A,
552  &gamma23_k1,
553  &sigma23_k1,
554  &gamma34_k1,
555  &sigma34_k1,
556  a2, 1,
557  a3, 1,
558  a4, 1 );
559  }
560  }
561  }
562 
563  return FLA_SUCCESS;
564 }

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_ass_var1(), i, scomplex::imag, n_left, and scomplex::real.

Referenced by FLA_Apply_G_rf_asm_var3().

◆ FLA_Apply_G_rf_ass_var3b()

FLA_Error FLA_Apply_G_rf_ass_var3b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A 
)
125 {
126  FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
127 
128  return FLA_SUCCESS;
129 }

Referenced by FLA_Apply_G_rf_asm_var3b(), and FLA_Apply_G_rf_bls_var3b().

◆ FLA_Apply_G_rf_ass_var4()

FLA_Error FLA_Apply_G_rf_ass_var4 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_ass_var5()

FLA_Error FLA_Apply_G_rf_ass_var5 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_ass_var5b()

FLA_Error FLA_Apply_G_rf_ass_var5b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_ass_var6()

FLA_Error FLA_Apply_G_rf_ass_var6 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A 
)
118 {
119  float one = bl1_s1();
120  float zero = bl1_s0();
121  float gamma12;
122  float sigma12;
123  float gamma23;
124  float sigma23;
125  float* a1;
126  float* a2;
127  float* a3;
128  scomplex* g12;
129  scomplex* g23;
130  int i, j, g, k;
131  int nG, nG_app;
132  int n_iter;
133  int n_left;
134  int k_minus_1;
135  int n_fuse;
136  int is_ident12, is_ident23;
137 
138  k_minus_1 = k_G - 1;
139  nG = n_A - 1;
140  n_fuse = 2;
141 
142  // Use the simple variant for nG < (k - 1) or k == 1.
143  if ( nG < k_minus_1 || k_G == 1 )
144  {
146  m_A,
147  n_A,
148  buff_G, rs_G, cs_G,
149  buff_A, rs_A, cs_A );
150  return FLA_SUCCESS;
151  }
152 
153 
154  // Start-up phase.
155 
156  for ( j = 0; j < k_minus_1; ++j )
157  {
158  nG_app = j + 1;
159  n_iter = nG_app / n_fuse;
160  n_left = nG_app % n_fuse;
161 
162  for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
163  {
164  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
165  g23 = buff_G + (g )*rs_G + (k )*cs_G;
166  a1 = buff_A + (g - 1)*cs_A;
167  a2 = buff_A + (g )*cs_A;
168  a3 = buff_A + (g + 1)*cs_A;
169 
170  gamma12 = g12->real;
171  sigma12 = g12->imag;
172  gamma23 = g23->real;
173  sigma23 = g23->imag;
174 
175  is_ident12 = ( gamma12 == one && sigma12 == zero );
176  is_ident23 = ( gamma23 == one && sigma23 == zero );
177 
178  if ( !is_ident12 && is_ident23 )
179  {
180  // Apply only to columns 1 and 2.
181 
182  MAC_Apply_G_mx2_ass( m_A,
183  &gamma12,
184  &sigma12,
185  a1, 1,
186  a2, 1 );
187  }
188  else if ( is_ident12 && !is_ident23 )
189  {
190  // Apply only to columns 2 and 3.
191 
192  MAC_Apply_G_mx2_ass( m_A,
193  &gamma23,
194  &sigma23,
195  a2, 1,
196  a3, 1 );
197  }
198  else if ( !is_ident12 && !is_ident23 )
199  {
200  // Apply to all three columns.
201 
202  MAC_Apply_G_mx3b_ass( m_A,
203  &gamma12,
204  &sigma12,
205  &gamma23,
206  &sigma23,
207  a1, 1,
208  a2, 1,
209  a3, 1 );
210  }
211  }
212 
213  if ( n_left == 1 )
214  {
215  g23 = buff_G + (g )*rs_G + (k )*cs_G;
216  a2 = buff_A + (g )*cs_A;
217  a3 = buff_A + (g + 1)*cs_A;
218 
219  gamma23 = g23->real;
220  sigma23 = g23->imag;
221 
222  is_ident23 = ( gamma23 == one && sigma23 == zero );
223 
224  if ( !is_ident23 )
225  MAC_Apply_G_mx2_ass( m_A,
226  &gamma23,
227  &sigma23,
228  a2, 1,
229  a3, 1 );
230  }
231  }
232 
233  // Pipeline stage
234 
235  for ( j = k_minus_1; j < nG; ++j )
236  {
237  nG_app = k_G;
238  n_iter = nG_app / n_fuse;
239  n_left = nG_app % n_fuse;
240 
241  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
242  {
243  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
244  g23 = buff_G + (g )*rs_G + (k )*cs_G;
245  a1 = buff_A + (g - 1)*cs_A;
246  a2 = buff_A + (g )*cs_A;
247  a3 = buff_A + (g + 1)*cs_A;
248 
249  gamma12 = g12->real;
250  sigma12 = g12->imag;
251  gamma23 = g23->real;
252  sigma23 = g23->imag;
253 
254  is_ident12 = ( gamma12 == one && sigma12 == zero );
255  is_ident23 = ( gamma23 == one && sigma23 == zero );
256 
257  if ( !is_ident12 && is_ident23 )
258  {
259  // Apply only to columns 1 and 2.
260 
261  MAC_Apply_G_mx2_ass( m_A,
262  &gamma12,
263  &sigma12,
264  a1, 1,
265  a2, 1 );
266  }
267  else if ( is_ident12 && !is_ident23 )
268  {
269  // Apply only to columns 2 and 3.
270 
271  MAC_Apply_G_mx2_ass( m_A,
272  &gamma23,
273  &sigma23,
274  a2, 1,
275  a3, 1 );
276  }
277  else if ( !is_ident12 && !is_ident23 )
278  {
279  // Apply to all three columns.
280 
281  MAC_Apply_G_mx3b_ass( m_A,
282  &gamma12,
283  &sigma12,
284  &gamma23,
285  &sigma23,
286  a1, 1,
287  a2, 1,
288  a3, 1 );
289  }
290  }
291 
292  if ( n_left == 1 )
293  {
294  g23 = buff_G + (g )*rs_G + (k )*cs_G;
295  a2 = buff_A + (g )*cs_A;
296  a3 = buff_A + (g + 1)*cs_A;
297 
298  gamma23 = g23->real;
299  sigma23 = g23->imag;
300 
301  is_ident23 = ( gamma23 == one && sigma23 == zero );
302 
303  if ( !is_ident23 )
304  MAC_Apply_G_mx2_ass( m_A,
305  &gamma23,
306  &sigma23,
307  a2, 1,
308  a3, 1 );
309  }
310  }
311 
312  // Shutdown stage
313 
314  for ( j = 1; j < k_G; ++j )
315  {
316  nG_app = k_G - j;
317  n_iter = nG_app / n_fuse;
318  n_left = nG_app % n_fuse;
319 
320  for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
321  {
322  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
323  g23 = buff_G + (g )*rs_G + (k )*cs_G;
324  a1 = buff_A + (g - 1)*cs_A;
325  a2 = buff_A + (g )*cs_A;
326  a3 = buff_A + (g + 1)*cs_A;
327 
328  gamma12 = g12->real;
329  sigma12 = g12->imag;
330  gamma23 = g23->real;
331  sigma23 = g23->imag;
332 
333  is_ident12 = ( gamma12 == one && sigma12 == zero );
334  is_ident23 = ( gamma23 == one && sigma23 == zero );
335 
336  if ( !is_ident12 && is_ident23 )
337  {
338  // Apply only to columns 1 and 2.
339 
340  MAC_Apply_G_mx2_ass( m_A,
341  &gamma12,
342  &sigma12,
343  a1, 1,
344  a2, 1 );
345  }
346  else if ( is_ident12 && !is_ident23 )
347  {
348  // Apply only to columns 2 and 3.
349 
350  MAC_Apply_G_mx2_ass( m_A,
351  &gamma23,
352  &sigma23,
353  a2, 1,
354  a3, 1 );
355  }
356  else if ( !is_ident12 && !is_ident23 )
357  {
358  // Apply to all three columns.
359 
360  MAC_Apply_G_mx3b_ass( m_A,
361  &gamma12,
362  &sigma12,
363  &gamma23,
364  &sigma23,
365  a1, 1,
366  a2, 1,
367  a3, 1 );
368  }
369  }
370 
371  if ( n_left == 1 )
372  {
373  g23 = buff_G + (g )*rs_G + (k )*cs_G;
374  a2 = buff_A + (g )*cs_A;
375  a3 = buff_A + (g + 1)*cs_A;
376 
377  gamma23 = g23->real;
378  sigma23 = g23->imag;
379 
380  is_ident23 = ( gamma23 == one && sigma23 == zero );
381 
382  if ( !is_ident23 )
383  MAC_Apply_G_mx2_ass( m_A,
384  &gamma23,
385  &sigma23,
386  a2, 1,
387  a3, 1 );
388  }
389  }
390 
391  return FLA_SUCCESS;
392 }

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_ass_var1(), i, scomplex::imag, n_left, and scomplex::real.

Referenced by FLA_Apply_G_rf_asm_var6(), and FLA_Apply_G_rf_bls_var6().

◆ FLA_Apply_G_rf_ass_var6b()

FLA_Error FLA_Apply_G_rf_ass_var6b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A 
)
125 {
126  FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
127 
128  return FLA_SUCCESS;
129 }

Referenced by FLA_Apply_G_rf_asm_var6b(), and FLA_Apply_G_rf_bls_var6b().

◆ FLA_Apply_G_rf_ass_var7()

FLA_Error FLA_Apply_G_rf_ass_var7 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_ass_var8()

FLA_Error FLA_Apply_G_rf_ass_var8 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_ass_var8b()

FLA_Error FLA_Apply_G_rf_ass_var8b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_ass_var9()

FLA_Error FLA_Apply_G_rf_ass_var9 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A 
)
118 {
119  float one = bl1_s1();
120  float zero = bl1_s0();
121  float gamma12;
122  float sigma12;
123  float gamma23;
124  float sigma23;
125  float* a1;
126  float* a2;
127  float* a3;
128  scomplex* g12;
129  scomplex* g23;
130  int i, j, g, k;
131  int nG, nG_app;
132  int n_iter;
133  int n_left;
134  int k_minus_1;
135  int n_fuse;
136  int is_ident12, is_ident23;
137 
138  k_minus_1 = k_G - 1;
139  nG = n_A - 1;
140  n_fuse = 2;
141 
142  // Use the simple variant for nG < (k - 1) or k == 1.
143  if ( nG < 2*k_minus_1 || k_G == 1 )
144  {
146  m_A,
147  n_A,
148  buff_G, rs_G, cs_G,
149  buff_A, rs_A, cs_A );
150  return FLA_SUCCESS;
151  }
152 
153 
154  // Start-up phase.
155 
156  for ( j = -1; j < k_minus_1; j += n_fuse )
157  {
158  nG_app = j + 1;
159  n_iter = nG_app;
160  n_left = 1;
161 
162  for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
163  {
164  g12 = buff_G + (g )*rs_G + (k )*cs_G;
165  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
166  a1 = buff_A + (g )*cs_A;
167  a2 = buff_A + (g + 1)*cs_A;
168  a3 = buff_A + (g + 2)*cs_A;
169 
170  gamma12 = g12->real;
171  sigma12 = g12->imag;
172  gamma23 = g23->real;
173  sigma23 = g23->imag;
174 
175  is_ident12 = ( gamma12 == one && sigma12 == zero );
176  is_ident23 = ( gamma23 == one && sigma23 == zero );
177 
178  if ( !is_ident12 && is_ident23 )
179  {
180  // Apply only to columns 1 and 2.
181 
182  MAC_Apply_G_mx2_ass( m_A,
183  &gamma12,
184  &sigma12,
185  a1, 1,
186  a2, 1 );
187  }
188  else if ( is_ident12 && !is_ident23 )
189  {
190  // Apply only to columns 2 and 3.
191 
192  MAC_Apply_G_mx2_ass( m_A,
193  &gamma23,
194  &sigma23,
195  a2, 1,
196  a3, 1 );
197  }
198  else if ( !is_ident12 && !is_ident23 )
199  {
200  // Apply to all three columns.
201 
202  MAC_Apply_G_mx3_ass( m_A,
203  &gamma12,
204  &sigma12,
205  &gamma23,
206  &sigma23,
207  a1, 1,
208  a2, 1,
209  a3, 1 );
210  }
211  }
212 
213  if ( n_left == 1 )
214  {
215  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
216  a2 = buff_A + (g + 1)*cs_A;
217  a3 = buff_A + (g + 2)*cs_A;
218 
219  gamma23 = g23->real;
220  sigma23 = g23->imag;
221 
222  is_ident23 = ( gamma23 == one && sigma23 == zero );
223 
224  if ( !is_ident23 )
225  MAC_Apply_G_mx2_ass( m_A,
226  &gamma23,
227  &sigma23,
228  a2, 1,
229  a3, 1 );
230  }
231  }
232 
233  // Pipeline stage
234 
235  for ( ; j < nG - 1; j += n_fuse )
236  {
237  nG_app = k_G;
238  n_iter = nG_app;
239  n_left = 0;
240 
241  for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
242  {
243  g12 = buff_G + (g )*rs_G + (k )*cs_G;
244  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
245  a1 = buff_A + (g )*cs_A;
246  a2 = buff_A + (g + 1)*cs_A;
247  a3 = buff_A + (g + 2)*cs_A;
248 
249  gamma12 = g12->real;
250  sigma12 = g12->imag;
251  gamma23 = g23->real;
252  sigma23 = g23->imag;
253 
254  is_ident12 = ( gamma12 == one && sigma12 == zero );
255  is_ident23 = ( gamma23 == one && sigma23 == zero );
256 
257  if ( !is_ident12 && is_ident23 )
258  {
259  // Apply only to columns 1 and 2.
260 
261  MAC_Apply_G_mx2_ass( m_A,
262  &gamma12,
263  &sigma12,
264  a1, 1,
265  a2, 1 );
266  }
267  else if ( is_ident12 && !is_ident23 )
268  {
269  // Apply only to columns 2 and 3.
270 
271  MAC_Apply_G_mx2_ass( m_A,
272  &gamma23,
273  &sigma23,
274  a2, 1,
275  a3, 1 );
276  }
277  else if ( !is_ident12 && !is_ident23 )
278  {
279  // Apply to all three columns.
280 
281  MAC_Apply_G_mx3_ass( m_A,
282  &gamma12,
283  &sigma12,
284  &gamma23,
285  &sigma23,
286  a1, 1,
287  a2, 1,
288  a3, 1 );
289  }
290  }
291  }
292 
293  // Shutdown stage
294 
295  for ( j = nG % n_fuse; j < k_G; j += n_fuse )
296  {
297  g = nG - 1;
298  k = j;
299 
300  n_left = 1;
301  if ( n_left == 1 )
302  {
303  g12 = buff_G + (g )*rs_G + (k )*cs_G;
304  a1 = buff_A + (g )*cs_A;
305  a2 = buff_A + (g + 1)*cs_A;
306 
307  gamma12 = g12->real;
308  sigma12 = g12->imag;
309 
310  is_ident12 = ( gamma12 == one && sigma12 == zero );
311 
312  if ( !is_ident12 )
313  MAC_Apply_G_mx2_ass( m_A,
314  &gamma12,
315  &sigma12,
316  a1, 1,
317  a2, 1 );
318  ++k;
319  --g;
320  }
321 
322  nG_app = k_minus_1 - j;
323  n_iter = nG_app;
324 
325  for ( i = 0; i < n_iter; ++i, ++k, --g )
326  {
327  g12 = buff_G + (g )*rs_G + (k )*cs_G;
328  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
329  a1 = buff_A + (g )*cs_A;
330  a2 = buff_A + (g + 1)*cs_A;
331  a3 = buff_A + (g + 2)*cs_A;
332 
333  gamma12 = g12->real;
334  sigma12 = g12->imag;
335  gamma23 = g23->real;
336  sigma23 = g23->imag;
337 
338  is_ident12 = ( gamma12 == one && sigma12 == zero );
339  is_ident23 = ( gamma23 == one && sigma23 == zero );
340 
341  if ( !is_ident12 && is_ident23 )
342  {
343  // Apply only to columns 1 and 2.
344 
345  MAC_Apply_G_mx2_ass( m_A,
346  &gamma12,
347  &sigma12,
348  a1, 1,
349  a2, 1 );
350  }
351  else if ( is_ident12 && !is_ident23 )
352  {
353  // Apply only to columns 2 and 3.
354 
355  MAC_Apply_G_mx2_ass( m_A,
356  &gamma23,
357  &sigma23,
358  a2, 1,
359  a3, 1 );
360  }
361  else if ( !is_ident12 && !is_ident23 )
362  {
363  // Apply to all three columns.
364 
365  MAC_Apply_G_mx3_ass( m_A,
366  &gamma12,
367  &sigma12,
368  &gamma23,
369  &sigma23,
370  a1, 1,
371  a2, 1,
372  a3, 1 );
373  }
374  }
375  }
376 
377  return FLA_SUCCESS;
378 }

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_ass_var1(), i, scomplex::imag, n_left, and scomplex::real.

Referenced by FLA_Apply_G_rf_asm_var9(), and FLA_Apply_G_rf_bls_var9().

◆ FLA_Apply_G_rf_ass_var9b()

FLA_Error FLA_Apply_G_rf_ass_var9b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A 
)
125 {
126  FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
127 
128  return FLA_SUCCESS;
129 }

Referenced by FLA_Apply_G_rf_asm_var9b(), and FLA_Apply_G_rf_bls_var9b().

◆ FLA_Apply_G_rf_asz_var1()

FLA_Error FLA_Apply_G_rf_asz_var1 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)
272 {
273  double one = bl1_d1();
274  double zero = bl1_d0();
275  int nG_app = n_A - 1;
276  int l, j;
277  double gamma;
278  double sigma;
279  dcomplex* a1;
280  dcomplex* a2;
281  dcomplex* g1;
282  dcomplex* g11;
283 
284  g1 = buff_G;
285 
286  for ( l = 0; l < k_G; ++l )
287  {
288  a1 = buff_A;
289  a2 = buff_A + cs_A;
290  g11 = g1;
291 
292  for ( j = 0; j < nG_app; ++j )
293  {
294  gamma = g11->real;
295  sigma = g11->imag;
296 
297  // Skip the current iteration if the rotation is identity.
298  if ( gamma != one || sigma != zero )
299  {
300  MAC_Apply_G_mx2_asz( m_A,
301  &gamma,
302  &sigma,
303  a1, 1,
304  a2, 1 );
305  }
306 
307  a1 += cs_A;
308  a2 += cs_A;
309  g11 += rs_G;
310  }
311 
312  g1 += cs_G;
313  }
314 
315  return FLA_SUCCESS;
316 }

References bl1_d0(), bl1_d1(), dcomplex::imag, and dcomplex::real.

Referenced by FLA_Apply_G_rf_asm_var1(), FLA_Apply_G_rf_asz_var2(), FLA_Apply_G_rf_asz_var3(), FLA_Apply_G_rf_asz_var6(), FLA_Apply_G_rf_asz_var9(), and FLA_Apply_G_rf_blz_var1().

◆ FLA_Apply_G_rf_asz_var2()

FLA_Error FLA_Apply_G_rf_asz_var2 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)
457 {
458  double one = bl1_d1();
459  double zero = bl1_d0();
460  double gamma;
461  double sigma;
462  dcomplex* a1;
463  dcomplex* a2;
464  dcomplex* g11;
465  int j, g, k;
466  int nG, nG_app;
467  int k_minus_1;
468 
469  k_minus_1 = k_G - 1;
470  nG = n_A - 1;
471 
472  // Use the simple variant for nG < 2(k - 1).
473  if ( nG < k_minus_1 || k_G == 1 )
474  {
476  m_A,
477  n_A,
478  buff_G, rs_G, cs_G,
479  buff_A, rs_A, cs_A );
480  return FLA_SUCCESS;
481  }
482 
483 
484  // Start-up phase.
485 
486  for ( j = 0; j < k_minus_1; ++j )
487  {
488  nG_app = j + 1;
489 
490  for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g )
491  {
492  g11 = buff_G + (g )*rs_G + (k )*cs_G;
493  a1 = buff_A + (g )*cs_A;
494  a2 = buff_A + (g + 1)*cs_A;
495 
496  gamma = g11->real;
497  sigma = g11->imag;
498 
499  // Skip the current iteration if the rotation is identity.
500  if ( gamma == one && sigma == zero ) continue;
501 
502  MAC_Apply_G_mx2_asz( m_A,
503  &gamma,
504  &sigma,
505  a1, 1,
506  a2, 1 );
507  }
508  }
509 
510  // Pipeline stage
511 
512  for ( j = k_minus_1; j < nG; ++j )
513  {
514  nG_app = k_G;
515 
516  for ( k = 0, g = j; k < nG_app; ++k, --g )
517  {
518  g11 = buff_G + (g )*rs_G + (k )*cs_G;
519  a1 = buff_A + (g )*cs_A;
520  a2 = buff_A + (g + 1)*cs_A;
521 
522  gamma = g11->real;
523  sigma = g11->imag;
524 
525  // Skip the current iteration if the rotation is identity.
526  if ( gamma == one && sigma == zero ) continue;
527 
528  MAC_Apply_G_mx2_asz( m_A,
529  &gamma,
530  &sigma,
531  a1, 1,
532  a2, 1 );
533  }
534  }
535 
536  // Shutdown stage
537 
538  for ( j = nG - k_minus_1; j < nG; ++j )
539  {
540  nG_app = nG - j;
541 
542  for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g )
543  {
544  g11 = buff_G + (g )*rs_G + (k )*cs_G;
545  a1 = buff_A + (g )*cs_A;
546  a2 = buff_A + (g + 1)*cs_A;
547 
548  gamma = g11->real;
549  sigma = g11->imag;
550 
551  // Skip the current iteration if the rotation is identity.
552  if ( gamma == one && sigma == zero ) continue;
553 
554  MAC_Apply_G_mx2_asz( m_A,
555  &gamma,
556  &sigma,
557  a1, 1,
558  a2, 1 );
559  }
560  }
561 
562  return FLA_SUCCESS;
563 }
FLA_Error FLA_Apply_G_rf_asz_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var1.c:267

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_asz_var1(), dcomplex::imag, and dcomplex::real.

Referenced by FLA_Apply_G_rf_asm_var2(), and FLA_Apply_G_rf_blz_var2().

◆ FLA_Apply_G_rf_asz_var3()

FLA_Error FLA_Apply_G_rf_asz_var3 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)
1478 {
1479  double one = bl1_d1();
1480  double zero = bl1_d0();
1481  double gamma23_k1;
1482  double sigma23_k1;
1483  double gamma34_k1;
1484  double sigma34_k1;
1485  double gamma12_k2;
1486  double sigma12_k2;
1487  double gamma23_k2;
1488  double sigma23_k2;
1489  dcomplex* a1;
1490  dcomplex* a2;
1491  dcomplex* a3;
1492  dcomplex* a4;
1493  dcomplex* g23_k1;
1494  dcomplex* g34_k1;
1495  dcomplex* g12_k2;
1496  dcomplex* g23_k2;
1497  int i, j, g, k;
1498  int nG, nG_app;
1499  int n_iter;
1500  int n_left;
1501  int k_minus_1;
1502  int n_fuse;
1503  int k_fuse;
1504  int is_ident23_k1, is_ident34_k1;
1505  int is_ident12_k2, is_ident23_k2;
1506  int has_ident;
1507 
1508  k_minus_1 = k_G - 1;
1509  nG = n_A - 1;
1510  n_fuse = 2;
1511  k_fuse = 2;
1512 
1513  // Use the simple variant for nG < (k - 1) or k == 1.
1514  if ( nG < 2*k_minus_1 || k_G == 1 )
1515  {
1517  m_A,
1518  n_A,
1519  buff_G, rs_G, cs_G,
1520  buff_A, rs_A, cs_A );
1521  return FLA_SUCCESS;
1522  }
1523 
1524 
1525  // Start-up phase.
1526 
1527  for ( j = -1; j < k_minus_1; j += n_fuse )
1528  {
1529  nG_app = j + 2;
1530  n_iter = nG_app / k_fuse;
1531  //n_iter = nG_app % k_fuse;
1532  n_left = 1;
1533 
1534  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1535  {
1536  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1537  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1538  g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1539  g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1540  a1 = buff_A + (g - 1)*cs_A;
1541  a2 = buff_A + (g )*cs_A;
1542  a3 = buff_A + (g + 1)*cs_A;
1543  a4 = buff_A + (g + 2)*cs_A;
1544 
1545  gamma23_k1 = g23_k1->real;
1546  sigma23_k1 = g23_k1->imag;
1547  gamma34_k1 = g34_k1->real;
1548  sigma34_k1 = g34_k1->imag;
1549  gamma12_k2 = g12_k2->real;
1550  sigma12_k2 = g12_k2->imag;
1551  gamma23_k2 = g23_k2->real;
1552  sigma23_k2 = g23_k2->imag;
1553 
1554  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
1555  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
1556  is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
1557  is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
1558  has_ident = ( is_ident23_k1 || is_ident34_k1 ||
1559  is_ident12_k2 || is_ident23_k2 );
1560 
1561  if ( has_ident )
1562  {
1563  // Apply to pairs of columns as needed.
1564 
1565  if ( !is_ident23_k1 )
1566  MAC_Apply_G_mx2_asz( m_A,
1567  &gamma23_k1,
1568  &sigma23_k1,
1569  a2, 1,
1570  a3, 1 );
1571 
1572  if ( !is_ident34_k1 )
1573  MAC_Apply_G_mx2_asz( m_A,
1574  &gamma34_k1,
1575  &sigma34_k1,
1576  a3, 1,
1577  a4, 1 );
1578 
1579  if ( !is_ident12_k2 )
1580  MAC_Apply_G_mx2_asz( m_A,
1581  &gamma12_k2,
1582  &sigma12_k2,
1583  a1, 1,
1584  a2, 1 );
1585 
1586  if ( !is_ident23_k2 )
1587  MAC_Apply_G_mx2_asz( m_A,
1588  &gamma23_k2,
1589  &sigma23_k2,
1590  a2, 1,
1591  a3, 1 );
1592  }
1593  else
1594  {
1595  // Apply to all four columns.
1596 
1597  MAC_Apply_G_mx4s_asz( m_A,
1598  &gamma23_k1,
1599  &sigma23_k1,
1600  &gamma34_k1,
1601  &sigma34_k1,
1602  &gamma12_k2,
1603  &sigma12_k2,
1604  &gamma23_k2,
1605  &sigma23_k2,
1606  a1, 1,
1607  a2, 1,
1608  a3, 1,
1609  a4, 1 );
1610  }
1611  }
1612 
1613  if ( n_left == 1 )
1614  {
1615  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1616  a3 = buff_A + (g + 1)*cs_A;
1617  a4 = buff_A + (g + 2)*cs_A;
1618 
1619  gamma34_k1 = g34_k1->real;
1620  sigma34_k1 = g34_k1->imag;
1621 
1622  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
1623 
1624  if ( !is_ident34_k1 )
1625  MAC_Apply_G_mx2_asz( m_A,
1626  &gamma34_k1,
1627  &sigma34_k1,
1628  a3, 1,
1629  a4, 1 );
1630  }
1631  }
1632 
1633  // Pipeline stage
1634 
1635  for ( ; j < nG - 1; j += n_fuse )
1636  {
1637  nG_app = k_G;
1638  n_iter = nG_app / k_fuse;
1639  n_left = nG_app % k_fuse;
1640 
1641  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1642  {
1643  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1644  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1645  g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1646  g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1647  a1 = buff_A + (g - 1)*cs_A;
1648  a2 = buff_A + (g )*cs_A;
1649  a3 = buff_A + (g + 1)*cs_A;
1650  a4 = buff_A + (g + 2)*cs_A;
1651 
1652  gamma23_k1 = g23_k1->real;
1653  sigma23_k1 = g23_k1->imag;
1654  gamma34_k1 = g34_k1->real;
1655  sigma34_k1 = g34_k1->imag;
1656  gamma12_k2 = g12_k2->real;
1657  sigma12_k2 = g12_k2->imag;
1658  gamma23_k2 = g23_k2->real;
1659  sigma23_k2 = g23_k2->imag;
1660 
1661  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
1662  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
1663  is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
1664  is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
1665  has_ident = ( is_ident23_k1 || is_ident34_k1 ||
1666  is_ident12_k2 || is_ident23_k2 );
1667 
1668  if ( has_ident )
1669  {
1670  // Apply to pairs of columns as needed.
1671 
1672  if ( !is_ident23_k1 )
1673  MAC_Apply_G_mx2_asz( m_A,
1674  &gamma23_k1,
1675  &sigma23_k1,
1676  a2, 1,
1677  a3, 1 );
1678 
1679  if ( !is_ident34_k1 )
1680  MAC_Apply_G_mx2_asz( m_A,
1681  &gamma34_k1,
1682  &sigma34_k1,
1683  a3, 1,
1684  a4, 1 );
1685 
1686  if ( !is_ident12_k2 )
1687  MAC_Apply_G_mx2_asz( m_A,
1688  &gamma12_k2,
1689  &sigma12_k2,
1690  a1, 1,
1691  a2, 1 );
1692 
1693  if ( !is_ident23_k2 )
1694  MAC_Apply_G_mx2_asz( m_A,
1695  &gamma23_k2,
1696  &sigma23_k2,
1697  a2, 1,
1698  a3, 1 );
1699  }
1700  else
1701  {
1702  // Apply to all four columns.
1703 
1704  MAC_Apply_G_mx4s_asz( m_A,
1705  &gamma23_k1,
1706  &sigma23_k1,
1707  &gamma34_k1,
1708  &sigma34_k1,
1709  &gamma12_k2,
1710  &sigma12_k2,
1711  &gamma23_k2,
1712  &sigma23_k2,
1713  a1, 1,
1714  a2, 1,
1715  a3, 1,
1716  a4, 1 );
1717  }
1718  }
1719 
1720  if ( n_left == 1 )
1721  {
1722  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1723  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1724  a2 = buff_A + (g )*cs_A;
1725  a3 = buff_A + (g + 1)*cs_A;
1726  a4 = buff_A + (g + 2)*cs_A;
1727 
1728  gamma23_k1 = g23_k1->real;
1729  sigma23_k1 = g23_k1->imag;
1730  gamma34_k1 = g34_k1->real;
1731  sigma34_k1 = g34_k1->imag;
1732 
1733  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
1734  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
1735 
1736  if ( !is_ident23_k1 && is_ident34_k1 )
1737  {
1738  MAC_Apply_G_mx2_asz( m_A,
1739  &gamma23_k1,
1740  &sigma23_k1,
1741  a2, 1,
1742  a3, 1 );
1743  }
1744  else if ( is_ident23_k1 && !is_ident34_k1 )
1745  {
1746  MAC_Apply_G_mx2_asz( m_A,
1747  &gamma34_k1,
1748  &sigma34_k1,
1749  a3, 1,
1750  a4, 1 );
1751  }
1752  else
1753  {
1754  MAC_Apply_G_mx3_asz( m_A,
1755  &gamma23_k1,
1756  &sigma23_k1,
1757  &gamma34_k1,
1758  &sigma34_k1,
1759  a2, 1,
1760  a3, 1,
1761  a4, 1 );
1762  }
1763  }
1764  }
1765 
1766  // Shutdown stage
1767 
1768  for ( j = nG % n_fuse; j < k_G; j += n_fuse )
1769  {
1770  g = nG - 1;
1771  k = j;
1772 
1773  //n_left = 1;
1774  //if ( n_left == 1 )
1775  {
1776  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1777  a2 = buff_A + (g )*cs_A;
1778  a3 = buff_A + (g + 1)*cs_A;
1779 
1780  gamma23_k1 = g23_k1->real;
1781  sigma23_k1 = g23_k1->imag;
1782 
1783  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
1784 
1785  if ( !is_ident23_k1 )
1786  MAC_Apply_G_mx2_asz( m_A,
1787  &gamma23_k1,
1788  &sigma23_k1,
1789  a2, 1,
1790  a3, 1 );
1791  ++k;
1792  --g;
1793  }
1794 
1795  nG_app = k_minus_1 - j;
1796  n_iter = nG_app / k_fuse;
1797  n_left = nG_app % k_fuse;
1798 
1799  for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1800  {
1801  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1802  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1803  g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1804  g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1805  a1 = buff_A + (g - 1)*cs_A;
1806  a2 = buff_A + (g )*cs_A;
1807  a3 = buff_A + (g + 1)*cs_A;
1808  a4 = buff_A + (g + 2)*cs_A;
1809 
1810  gamma23_k1 = g23_k1->real;
1811  sigma23_k1 = g23_k1->imag;
1812  gamma34_k1 = g34_k1->real;
1813  sigma34_k1 = g34_k1->imag;
1814  gamma12_k2 = g12_k2->real;
1815  sigma12_k2 = g12_k2->imag;
1816  gamma23_k2 = g23_k2->real;
1817  sigma23_k2 = g23_k2->imag;
1818 
1819  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
1820  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
1821  is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
1822  is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
1823  has_ident = ( is_ident23_k1 || is_ident34_k1 ||
1824  is_ident12_k2 || is_ident23_k2 );
1825 
1826  if ( has_ident )
1827  {
1828  // Apply to pairs of columns as needed.
1829 
1830  if ( !is_ident23_k1 )
1831  MAC_Apply_G_mx2_asz( m_A,
1832  &gamma23_k1,
1833  &sigma23_k1,
1834  a2, 1,
1835  a3, 1 );
1836 
1837  if ( !is_ident34_k1 )
1838  MAC_Apply_G_mx2_asz( m_A,
1839  &gamma34_k1,
1840  &sigma34_k1,
1841  a3, 1,
1842  a4, 1 );
1843 
1844  if ( !is_ident12_k2 )
1845  MAC_Apply_G_mx2_asz( m_A,
1846  &gamma12_k2,
1847  &sigma12_k2,
1848  a1, 1,
1849  a2, 1 );
1850 
1851  if ( !is_ident23_k2 )
1852  MAC_Apply_G_mx2_asz( m_A,
1853  &gamma23_k2,
1854  &sigma23_k2,
1855  a2, 1,
1856  a3, 1 );
1857  }
1858  else
1859  {
1860  // Apply to all four columns.
1861 
1862  MAC_Apply_G_mx4s_asz( m_A,
1863  &gamma23_k1,
1864  &sigma23_k1,
1865  &gamma34_k1,
1866  &sigma34_k1,
1867  &gamma12_k2,
1868  &sigma12_k2,
1869  &gamma23_k2,
1870  &sigma23_k2,
1871  a1, 1,
1872  a2, 1,
1873  a3, 1,
1874  a4, 1 );
1875  }
1876  }
1877 
1878  if ( n_left == 1 )
1879  {
1880  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1881  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1882  a2 = buff_A + (g )*cs_A;
1883  a3 = buff_A + (g + 1)*cs_A;
1884  a4 = buff_A + (g + 2)*cs_A;
1885 
1886  gamma23_k1 = g23_k1->real;
1887  sigma23_k1 = g23_k1->imag;
1888  gamma34_k1 = g34_k1->real;
1889  sigma34_k1 = g34_k1->imag;
1890 
1891  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
1892  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
1893 
1894  if ( !is_ident23_k1 && is_ident34_k1 )
1895  {
1896  MAC_Apply_G_mx2_asz( m_A,
1897  &gamma23_k1,
1898  &sigma23_k1,
1899  a2, 1,
1900  a3, 1 );
1901  }
1902  else if ( is_ident23_k1 && !is_ident34_k1 )
1903  {
1904  MAC_Apply_G_mx2_asz( m_A,
1905  &gamma34_k1,
1906  &sigma34_k1,
1907  a3, 1,
1908  a4, 1 );
1909  }
1910  else
1911  {
1912  MAC_Apply_G_mx3_asz( m_A,
1913  &gamma23_k1,
1914  &sigma23_k1,
1915  &gamma34_k1,
1916  &sigma34_k1,
1917  a2, 1,
1918  a3, 1,
1919  a4, 1 );
1920  }
1921  }
1922  }
1923 
1924  return FLA_SUCCESS;
1925 }

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_asz_var1(), i, dcomplex::imag, n_left, and dcomplex::real.

Referenced by FLA_Apply_G_rf_asm_var3().

◆ FLA_Apply_G_rf_asz_var3b()

FLA_Error FLA_Apply_G_rf_asz_var3b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)
627 {
628  FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
629 
630  return FLA_SUCCESS;
631 }

Referenced by FLA_Apply_G_rf_asm_var3b().

◆ FLA_Apply_G_rf_asz_var4()

FLA_Error FLA_Apply_G_rf_asz_var4 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_asz_var5()

FLA_Error FLA_Apply_G_rf_asz_var5 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_asz_var5b()

FLA_Error FLA_Apply_G_rf_asz_var5b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_asz_var6()

FLA_Error FLA_Apply_G_rf_asz_var6 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)
961 {
962  double one = bl1_d1();
963  double zero = bl1_d0();
964  double gamma12;
965  double sigma12;
966  double gamma23;
967  double sigma23;
968  dcomplex* a1;
969  dcomplex* a2;
970  dcomplex* a3;
971  dcomplex* g12;
972  dcomplex* g23;
973  int i, j, g, k;
974  int nG, nG_app;
975  int n_iter;
976  int n_left;
977  int k_minus_1;
978  int n_fuse;
979  int is_ident12, is_ident23;
980 
981  k_minus_1 = k_G - 1;
982  nG = n_A - 1;
983  n_fuse = 2;
984 
985  // Use the simple variant for nG < (k - 1) or k == 1.
986  if ( nG < k_minus_1 || k_G == 1 )
987  {
989  m_A,
990  n_A,
991  buff_G, rs_G, cs_G,
992  buff_A, rs_A, cs_A );
993  return FLA_SUCCESS;
994  }
995 
996 
997  // Start-up phase.
998 
999  for ( j = 0; j < k_minus_1; ++j )
1000  {
1001  nG_app = j + 1;
1002  n_iter = nG_app / n_fuse;
1003  n_left = nG_app % n_fuse;
1004 
1005  //for ( k = 0, g = nG_app - 1; k < nG_app; k += n_fuse, g -= n_fuse )
1006  for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
1007  {
1008  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1009  g23 = buff_G + (g )*rs_G + (k )*cs_G;
1010  a1 = buff_A + (g - 1)*cs_A;
1011  a2 = buff_A + (g )*cs_A;
1012  a3 = buff_A + (g + 1)*cs_A;
1013 
1014  gamma12 = g12->real;
1015  sigma12 = g12->imag;
1016  gamma23 = g23->real;
1017  sigma23 = g23->imag;
1018 
1019  is_ident12 = ( gamma12 == one && sigma12 == zero );
1020  is_ident23 = ( gamma23 == one && sigma23 == zero );
1021 
1022  if ( !is_ident12 && is_ident23 )
1023  {
1024  // Apply only to columns 1 and 2.
1025 
1026  MAC_Apply_G_mx2_asz( m_A,
1027  &gamma12,
1028  &sigma12,
1029  a1, 1,
1030  a2, 1 );
1031  }
1032  else if ( is_ident12 && !is_ident23 )
1033  {
1034  // Apply only to columns 2 and 3.
1035 
1036  MAC_Apply_G_mx2_asz( m_A,
1037  &gamma23,
1038  &sigma23,
1039  a2, 1,
1040  a3, 1 );
1041  }
1042  else if ( !is_ident12 && !is_ident23 )
1043  {
1044  // Apply to all three columns.
1045 
1046  MAC_Apply_G_mx3b_asz( m_A,
1047  &gamma12,
1048  &sigma12,
1049  &gamma23,
1050  &sigma23,
1051  a1, 1,
1052  a2, 1,
1053  a3, 1 );
1054  }
1055  }
1056  //for ( k = 0; k < n_left; k += 1, g -= 1 )
1057  if ( n_left == 1 )
1058  {
1059  g23 = buff_G + (g )*rs_G + (k )*cs_G;
1060  a2 = buff_A + (g )*cs_A;
1061  a3 = buff_A + (g + 1)*cs_A;
1062 
1063  gamma23 = g23->real;
1064  sigma23 = g23->imag;
1065 
1066  is_ident23 = ( gamma23 == one && sigma23 == zero );
1067 
1068  if ( !is_ident23 )
1069  MAC_Apply_G_mx2_asz( m_A,
1070  &gamma23,
1071  &sigma23,
1072  a2, 1,
1073  a3, 1 );
1074  }
1075  }
1076 
1077  // Pipeline stage
1078 
1079  for ( j = k_minus_1; j < nG; ++j )
1080  {
1081  nG_app = k_G;
1082  n_iter = nG_app / n_fuse;
1083  n_left = nG_app % n_fuse;
1084 
1085  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
1086  {
1087  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1088  g23 = buff_G + (g )*rs_G + (k )*cs_G;
1089  a1 = buff_A + (g - 1)*cs_A;
1090  a2 = buff_A + (g )*cs_A;
1091  a3 = buff_A + (g + 1)*cs_A;
1092 
1093  gamma12 = g12->real;
1094  sigma12 = g12->imag;
1095  gamma23 = g23->real;
1096  sigma23 = g23->imag;
1097 
1098  is_ident12 = ( gamma12 == one && sigma12 == zero );
1099  is_ident23 = ( gamma23 == one && sigma23 == zero );
1100 
1101  if ( !is_ident12 && is_ident23 )
1102  {
1103  // Apply only to columns 1 and 2.
1104 
1105  MAC_Apply_G_mx2_asz( m_A,
1106  &gamma12,
1107  &sigma12,
1108  a1, 1,
1109  a2, 1 );
1110  }
1111  else if ( is_ident12 && !is_ident23 )
1112  {
1113  // Apply only to columns 2 and 3.
1114 
1115  MAC_Apply_G_mx2_asz( m_A,
1116  &gamma23,
1117  &sigma23,
1118  a2, 1,
1119  a3, 1 );
1120  }
1121  else if ( !is_ident12 && !is_ident23 )
1122  {
1123  // Apply to all three columns.
1124 
1125  MAC_Apply_G_mx3b_asz( m_A,
1126  &gamma12,
1127  &sigma12,
1128  &gamma23,
1129  &sigma23,
1130  a1, 1,
1131  a2, 1,
1132  a3, 1 );
1133  }
1134  }
1135  //for ( k = 0; k < n_left; k += 1, g -= 1 )
1136  if ( n_left == 1 )
1137  {
1138  g23 = buff_G + (g )*rs_G + (k )*cs_G;
1139  a2 = buff_A + (g )*cs_A;
1140  a3 = buff_A + (g + 1)*cs_A;
1141 
1142  gamma23 = g23->real;
1143  sigma23 = g23->imag;
1144 
1145  is_ident23 = ( gamma23 == one && sigma23 == zero );
1146 
1147  if ( !is_ident23 )
1148  MAC_Apply_G_mx2_asz( m_A,
1149  &gamma23,
1150  &sigma23,
1151  a2, 1,
1152  a3, 1 );
1153  }
1154  }
1155 
1156  // Shutdown stage
1157 
1158  for ( j = 1; j < k_G; ++j )
1159  {
1160  nG_app = k_G - j;
1161  n_iter = nG_app / n_fuse;
1162  n_left = nG_app % n_fuse;
1163 
1164  for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
1165  {
1166  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1167  g23 = buff_G + (g )*rs_G + (k )*cs_G;
1168  a1 = buff_A + (g - 1)*cs_A;
1169  a2 = buff_A + (g )*cs_A;
1170  a3 = buff_A + (g + 1)*cs_A;
1171 
1172  gamma12 = g12->real;
1173  sigma12 = g12->imag;
1174  gamma23 = g23->real;
1175  sigma23 = g23->imag;
1176 
1177  is_ident12 = ( gamma12 == one && sigma12 == zero );
1178  is_ident23 = ( gamma23 == one && sigma23 == zero );
1179 
1180  if ( !is_ident12 && is_ident23 )
1181  {
1182  // Apply only to columns 1 and 2.
1183 
1184  MAC_Apply_G_mx2_asz( m_A,
1185  &gamma12,
1186  &sigma12,
1187  a1, 1,
1188  a2, 1 );
1189  }
1190  else if ( is_ident12 && !is_ident23 )
1191  {
1192  // Apply only to columns 2 and 3.
1193 
1194  MAC_Apply_G_mx2_asz( m_A,
1195  &gamma23,
1196  &sigma23,
1197  a2, 1,
1198  a3, 1 );
1199  }
1200  else if ( !is_ident12 && !is_ident23 )
1201  {
1202  // Apply to all three columns.
1203 
1204  MAC_Apply_G_mx3b_asz( m_A,
1205  &gamma12,
1206  &sigma12,
1207  &gamma23,
1208  &sigma23,
1209  a1, 1,
1210  a2, 1,
1211  a3, 1 );
1212  }
1213  }
1214  //for ( k = 0; k < nG_app_left; k += 1, g -= 1 )
1215  if ( n_left == 1 )
1216  {
1217  g23 = buff_G + (g )*rs_G + (k )*cs_G;
1218  a2 = buff_A + (g )*cs_A;
1219  a3 = buff_A + (g + 1)*cs_A;
1220 
1221  gamma23 = g23->real;
1222  sigma23 = g23->imag;
1223 
1224  is_ident23 = ( gamma23 == one && sigma23 == zero );
1225 
1226  if ( !is_ident23 )
1227  MAC_Apply_G_mx2_asz( m_A,
1228  &gamma23,
1229  &sigma23,
1230  a2, 1,
1231  a3, 1 );
1232  }
1233  }
1234 
1235  return FLA_SUCCESS;
1236 }

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_asz_var1(), i, dcomplex::imag, n_left, and dcomplex::real.

Referenced by FLA_Apply_G_rf_asm_var6(), and FLA_Apply_G_rf_blz_var6().

◆ FLA_Apply_G_rf_asz_var6b()

FLA_Error FLA_Apply_G_rf_asz_var6b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)
450 {
451  FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
452 
453  return FLA_SUCCESS;
454 }

Referenced by FLA_Apply_G_rf_asm_var6b().

◆ FLA_Apply_G_rf_asz_var7()

FLA_Error FLA_Apply_G_rf_asz_var7 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_asz_var8()

FLA_Error FLA_Apply_G_rf_asz_var8 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_asz_var8b()

FLA_Error FLA_Apply_G_rf_asz_var8b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_asz_var9()

FLA_Error FLA_Apply_G_rf_asz_var9 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)
919 {
920  double one = bl1_d1();
921  double zero = bl1_d0();
922  double gamma12;
923  double sigma12;
924  double gamma23;
925  double sigma23;
926  dcomplex* a1;
927  dcomplex* a2;
928  dcomplex* a3;
929  dcomplex* g12;
930  dcomplex* g23;
931  int i, j, g, k;
932  int nG, nG_app;
933  int n_iter;
934  int n_left;
935  int k_minus_1;
936  int n_fuse;
937  int is_ident12, is_ident23;
938 
939  k_minus_1 = k_G - 1;
940  nG = n_A - 1;
941  n_fuse = 2;
942 
943  // Use the simple variant for nG < (k - 1) or k == 1.
944  if ( nG < 2*k_minus_1 || k_G == 1 )
945  {
947  m_A,
948  n_A,
949  buff_G, rs_G, cs_G,
950  buff_A, rs_A, cs_A );
951  return FLA_SUCCESS;
952  }
953 
954 
955  // Start-up phase.
956 
957  for ( j = -1; j < k_minus_1; j += n_fuse )
958  {
959  nG_app = j + 1;
960  n_iter = nG_app;
961  n_left = 1;
962 
963  for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
964  {
965  g12 = buff_G + (g )*rs_G + (k )*cs_G;
966  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
967  a1 = buff_A + (g )*cs_A;
968  a2 = buff_A + (g + 1)*cs_A;
969  a3 = buff_A + (g + 2)*cs_A;
970 
971  gamma12 = g12->real;
972  sigma12 = g12->imag;
973  gamma23 = g23->real;
974  sigma23 = g23->imag;
975 
976  is_ident12 = ( gamma12 == one && sigma12 == zero );
977  is_ident23 = ( gamma23 == one && sigma23 == zero );
978 
979  if ( !is_ident12 && is_ident23 )
980  {
981  // Apply only to columns 1 and 2.
982 
983  MAC_Apply_G_mx2_asz( m_A,
984  &gamma12,
985  &sigma12,
986  a1, 1,
987  a2, 1 );
988  }
989  else if ( is_ident12 && !is_ident23 )
990  {
991  // Apply only to columns 2 and 3.
992 
993  MAC_Apply_G_mx2_asz( m_A,
994  &gamma23,
995  &sigma23,
996  a2, 1,
997  a3, 1 );
998  }
999  else if ( !is_ident12 && !is_ident23 )
1000  {
1001  // Apply to all three columns.
1002 
1003  MAC_Apply_G_mx3_asz( m_A,
1004  &gamma12,
1005  &sigma12,
1006  &gamma23,
1007  &sigma23,
1008  a1, 1,
1009  a2, 1,
1010  a3, 1 );
1011  }
1012  }
1013 
1014  if ( n_left == 1 )
1015  {
1016  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1017  a2 = buff_A + (g + 1)*cs_A;
1018  a3 = buff_A + (g + 2)*cs_A;
1019 
1020  gamma23 = g23->real;
1021  sigma23 = g23->imag;
1022 
1023  is_ident23 = ( gamma23 == one && sigma23 == zero );
1024 
1025  if ( !is_ident23 )
1026  MAC_Apply_G_mx2_asz( m_A,
1027  &gamma23,
1028  &sigma23,
1029  a2, 1,
1030  a3, 1 );
1031  }
1032  }
1033 
1034  // Pipeline stage
1035 
1036  for ( ; j < nG - 1; j += n_fuse )
1037  {
1038  nG_app = k_G;
1039  n_iter = nG_app;
1040  n_left = 0;
1041 
1042  for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
1043  {
1044  g12 = buff_G + (g )*rs_G + (k )*cs_G;
1045  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1046  a1 = buff_A + (g )*cs_A;
1047  a2 = buff_A + (g + 1)*cs_A;
1048  a3 = buff_A + (g + 2)*cs_A;
1049 
1050  gamma12 = g12->real;
1051  sigma12 = g12->imag;
1052  gamma23 = g23->real;
1053  sigma23 = g23->imag;
1054 
1055  is_ident12 = ( gamma12 == one && sigma12 == zero );
1056  is_ident23 = ( gamma23 == one && sigma23 == zero );
1057 
1058  if ( !is_ident12 && is_ident23 )
1059  {
1060  // Apply only to columns 1 and 2.
1061 
1062  MAC_Apply_G_mx2_asz( m_A,
1063  &gamma12,
1064  &sigma12,
1065  a1, 1,
1066  a2, 1 );
1067  }
1068  else if ( is_ident12 && !is_ident23 )
1069  {
1070  // Apply only to columns 2 and 3.
1071 
1072  MAC_Apply_G_mx2_asz( m_A,
1073  &gamma23,
1074  &sigma23,
1075  a2, 1,
1076  a3, 1 );
1077  }
1078  else if ( !is_ident12 && !is_ident23 )
1079  {
1080  // Apply to all three columns.
1081 
1082  MAC_Apply_G_mx3_asz( m_A,
1083  &gamma12,
1084  &sigma12,
1085  &gamma23,
1086  &sigma23,
1087  a1, 1,
1088  a2, 1,
1089  a3, 1 );
1090  }
1091  }
1092  }
1093 
1094  // Shutdown stage
1095 
1096  for ( j = nG % n_fuse; j < k_G; j += n_fuse )
1097  {
1098  g = nG - 1;
1099  k = j;
1100 
1101  n_left = 1;
1102  if ( n_left == 1 )
1103  {
1104  g12 = buff_G + (g )*rs_G + (k )*cs_G;
1105  a1 = buff_A + (g )*cs_A;
1106  a2 = buff_A + (g + 1)*cs_A;
1107 
1108  gamma12 = g12->real;
1109  sigma12 = g12->imag;
1110 
1111  is_ident12 = ( gamma12 == one && sigma12 == zero );
1112 
1113  if ( !is_ident12 )
1114  MAC_Apply_G_mx2_asz( m_A,
1115  &gamma12,
1116  &sigma12,
1117  a1, 1,
1118  a2, 1 );
1119  ++k;
1120  --g;
1121  }
1122 
1123  nG_app = k_minus_1 - j;
1124  n_iter = nG_app;
1125 
1126  for ( i = 0; i < n_iter; ++i, ++k, --g )
1127  {
1128  g12 = buff_G + (g )*rs_G + (k )*cs_G;
1129  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1130  a1 = buff_A + (g )*cs_A;
1131  a2 = buff_A + (g + 1)*cs_A;
1132  a3 = buff_A + (g + 2)*cs_A;
1133 
1134  gamma12 = g12->real;
1135  sigma12 = g12->imag;
1136  gamma23 = g23->real;
1137  sigma23 = g23->imag;
1138 
1139  is_ident12 = ( gamma12 == one && sigma12 == zero );
1140  is_ident23 = ( gamma23 == one && sigma23 == zero );
1141 
1142  if ( !is_ident12 && is_ident23 )
1143  {
1144  // Apply only to columns 1 and 2.
1145 
1146  MAC_Apply_G_mx2_asz( m_A,
1147  &gamma12,
1148  &sigma12,
1149  a1, 1,
1150  a2, 1 );
1151  }
1152  else if ( is_ident12 && !is_ident23 )
1153  {
1154  // Apply only to columns 2 and 3.
1155 
1156  MAC_Apply_G_mx2_asz( m_A,
1157  &gamma23,
1158  &sigma23,
1159  a2, 1,
1160  a3, 1 );
1161  }
1162  else if ( !is_ident12 && !is_ident23 )
1163  {
1164  // Apply to all three columns.
1165 
1166  MAC_Apply_G_mx3_asz( m_A,
1167  &gamma12,
1168  &sigma12,
1169  &gamma23,
1170  &sigma23,
1171  a1, 1,
1172  a2, 1,
1173  a3, 1 );
1174  }
1175  }
1176  }
1177 
1178  return FLA_SUCCESS;
1179 }

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_asz_var1(), i, dcomplex::imag, n_left, and dcomplex::real.

Referenced by FLA_Apply_G_rf_asm_var9(), and FLA_Apply_G_rf_blz_var9().

◆ FLA_Apply_G_rf_asz_var9b()

FLA_Error FLA_Apply_G_rf_asz_var9b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
int  iTL,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)
435 {
436  FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
437 
438  return FLA_SUCCESS;
439 }

Referenced by FLA_Apply_G_rf_asm_var9b().

◆ FLA_Apply_G_rf_bhc_var3()

FLA_Error FLA_Apply_G_rf_bhc_var3 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_bhd_var3()

FLA_Error FLA_Apply_G_rf_bhd_var3 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_bhs_var3()

FLA_Error FLA_Apply_G_rf_bhs_var3 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_bhz_var3()

FLA_Error FLA_Apply_G_rf_bhz_var3 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
FLA_Obj buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_blc_var1()

FLA_Error FLA_Apply_G_rf_blc_var1 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
161 {
162  int i;
163  int b = 0;
164 
165  for ( i = 0; i < m_A; i += b )
166  {
167  scomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
168  int m_ahead = max( 0, m_A - i );
169 
170  b = min( b_alg, m_ahead );
171 
172  //FLA_Apply_G_rf_opc_var1( k_G,
174  b,
175  n_A,
176  buff_G, rs_G, cs_G,
177  A1, rs_A, cs_A );
178  }
179 
180  return FLA_SUCCESS;
181 }

References FLA_Apply_G_rf_asc_var1(), and i.

Referenced by FLA_Apply_G_rf_blk_var1().

◆ FLA_Apply_G_rf_blc_var2()

FLA_Error FLA_Apply_G_rf_blc_var2 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
161 {
162  int i;
163  int b = 0;
164 
165  for ( i = 0; i < m_A; i += b )
166  {
167  scomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
168  int m_ahead = max( 0, m_A - i );
169 
170  b = min( b_alg, m_ahead );
171 
172  //FLA_Apply_G_rf_opc_var2( k_G,
174  b,
175  n_A,
176  buff_G, rs_G, cs_G,
177  A1, rs_A, cs_A );
178  }
179 
180  return FLA_SUCCESS;
181 }
FLA_Error FLA_Apply_G_rf_asc_var2(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var2.c:339

References FLA_Apply_G_rf_asc_var2(), and i.

Referenced by FLA_Apply_G_rf_blk_var2().

◆ FLA_Apply_G_rf_blc_var3()

FLA_Error FLA_Apply_G_rf_blc_var3 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
163 {
164  int i;
165  int b = 0;
166 
167  for ( i = 0; i < m_A; i += b )
168  {
169  scomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
170  int m_ahead = max( 0, m_A - i );
171 
172  b = min( b_alg, m_ahead );
173 
174  // ass_var3 does not support arbitrary strides: TODO
176  //FLA_Apply_G_rf_asc_var3( k_G,
177  b,
178  n_A,
179  buff_G, rs_G, cs_G,
180  A1, rs_A, cs_A );
181  }
182 
183  return FLA_SUCCESS;
184 }
FLA_Error FLA_Apply_G_rf_opc_var3(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var3.c:1018

References FLA_Apply_G_rf_opc_var3(), and i.

Referenced by FLA_Apply_G_lf_blk_var3(), FLA_Apply_G_rf_blk_var3(), FLA_Bsvd_ext_opc_var1(), and FLA_Bsvd_v_opc_var1().

◆ FLA_Apply_G_rf_blc_var3b()

FLA_Error FLA_Apply_G_rf_blc_var3b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
174 {
175  FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
176 
177  return FLA_SUCCESS;
178 }

Referenced by FLA_Apply_G_rf_blk_var3b().

◆ FLA_Apply_G_rf_blc_var4()

FLA_Error FLA_Apply_G_rf_blc_var4 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_blc_var5()

FLA_Error FLA_Apply_G_rf_blc_var5 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_blc_var5b()

FLA_Error FLA_Apply_G_rf_blc_var5b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_blc_var6()

FLA_Error FLA_Apply_G_rf_blc_var6 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
161 {
162  int i;
163  int b = 0;
164 
165  for ( i = 0; i < m_A; i += b )
166  {
167  scomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
168  int m_ahead = max( 0, m_A - i );
169 
170  b = min( b_alg, m_ahead );
171 
172  //FLA_Apply_G_rf_opc_var6( k_G,
174  b,
175  n_A,
176  buff_G, rs_G, cs_G,
177  A1, rs_A, cs_A );
178  }
179 
180  return FLA_SUCCESS;
181 }
FLA_Error FLA_Apply_G_rf_asc_var6(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var6.c:675

References FLA_Apply_G_rf_asc_var6(), and i.

Referenced by FLA_Apply_G_rf_blk_var6().

◆ FLA_Apply_G_rf_blc_var6b()

FLA_Error FLA_Apply_G_rf_blc_var6b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
174 {
175  FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
176 
177  return FLA_SUCCESS;
178 }

Referenced by FLA_Apply_G_rf_blk_var6b().

◆ FLA_Apply_G_rf_blc_var7()

FLA_Error FLA_Apply_G_rf_blc_var7 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_blc_var8()

FLA_Error FLA_Apply_G_rf_blc_var8 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_blc_var8b()

FLA_Error FLA_Apply_G_rf_blc_var8b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_blc_var9()

FLA_Error FLA_Apply_G_rf_blc_var9 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
161 {
162  int i;
163  int b = 0;
164 
165  for ( i = 0; i < m_A; i += b )
166  {
167  scomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
168  int m_ahead = max( 0, m_A - i );
169 
170  b = min( b_alg, m_ahead );
171 
172  //FLA_Apply_G_rf_opc_var9( k_G,
174  b,
175  n_A,
176  buff_G, rs_G, cs_G,
177  A1, rs_A, cs_A );
178  }
179 
180  return FLA_SUCCESS;
181 }
FLA_Error FLA_Apply_G_rf_asc_var9(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var9.c:647

References FLA_Apply_G_rf_asc_var9(), and i.

Referenced by FLA_Apply_G_rf_blk_var9().

◆ FLA_Apply_G_rf_blc_var9b()

FLA_Error FLA_Apply_G_rf_blc_var9b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
174 {
175  FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
176 
177  return FLA_SUCCESS;
178 }

Referenced by FLA_Apply_G_rf_blk_var9b().

◆ FLA_Apply_G_rf_bld_var1()

FLA_Error FLA_Apply_G_rf_bld_var1 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
133 {
134  int i;
135  int b = 0;
136 
137  for ( i = 0; i < m_A; i += b )
138  {
139  double* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
140  int m_ahead = max( 0, m_A - i );
141 
142  b = min( b_alg, m_ahead );
143 
144  //FLA_Apply_G_rf_opd_var1( k_G,
146  b,
147  n_A,
148  buff_G, rs_G, cs_G,
149  A1, rs_A, cs_A );
150  }
151 
152  return FLA_SUCCESS;
153 }

References FLA_Apply_G_rf_asd_var1(), and i.

Referenced by FLA_Apply_G_rf_blk_var1().

◆ FLA_Apply_G_rf_bld_var2()

FLA_Error FLA_Apply_G_rf_bld_var2 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
133 {
134  int i;
135  int b = 0;
136 
137  for ( i = 0; i < m_A; i += b )
138  {
139  double* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
140  int m_ahead = max( 0, m_A - i );
141 
142  b = min( b_alg, m_ahead );
143 
144  //FLA_Apply_G_rf_opd_var2( k_G,
146  b,
147  n_A,
148  buff_G, rs_G, cs_G,
149  A1, rs_A, cs_A );
150  }
151 
152  return FLA_SUCCESS;
153 }
FLA_Error FLA_Apply_G_rf_asd_var2(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var2.c:226

References FLA_Apply_G_rf_asd_var2(), and i.

Referenced by FLA_Apply_G_rf_blk_var2().

◆ FLA_Apply_G_rf_bld_var3()

FLA_Error FLA_Apply_G_rf_bld_var3 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
134 {
135  int i;
136  int b = 0;
137 
138  for ( i = 0; i < m_A; i += b )
139  {
140  double* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
141  int m_ahead = max( 0, m_A - i );
142 
143  b = min( b_alg, m_ahead );
144 
145  // ass_var3 does not support arbitrary strides: TODO
147  //FLA_Apply_G_rf_asd_var3( k_G,
148  b,
149  n_A,
150  buff_G, rs_G, cs_G,
151  A1, rs_A, cs_A );
152  }
153 
154  return FLA_SUCCESS;
155 }
FLA_Error FLA_Apply_G_rf_opd_var3(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var3.c:565

References FLA_Apply_G_rf_opd_var3(), and i.

Referenced by FLA_Apply_G_lf_blk_var3(), FLA_Apply_G_rf_blk_var3(), FLA_Bsvd_ext_opd_var1(), FLA_Bsvd_v_opd_var1(), and FLA_Tevd_v_opd_var1().

◆ FLA_Apply_G_rf_bld_var3b()

FLA_Error FLA_Apply_G_rf_bld_var3b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
142 {
143  int i;
144  int b = 0;
145 
146  for ( i = 0; i < m_A; i += b )
147  {
148  double* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
149  int m_behind = i;
150  int m_ahead = max( 0, m_A - i );
151 
152  b = min( b_alg, m_ahead );
153 
154  //FLA_Apply_G_rf_opd_var3b( k_G,
156  b,
157  n_A,
158  i_k,
159  m_behind,
160  buff_G, rs_G, cs_G,
161  A1, rs_A, cs_A );
162  }
163 
164  return FLA_SUCCESS;
165 }
FLA_Error FLA_Apply_G_rf_asd_var3b(int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var3b.c:131

References FLA_Apply_G_rf_asd_var3b(), and i.

Referenced by FLA_Apply_G_rf_blk_var3b(), FLA_Bsvd_v_opd_var2(), FLA_Bsvd_v_opz_var2(), FLA_Tevd_v_opd_var2(), and FLA_Tevd_v_opz_var2().

◆ FLA_Apply_G_rf_bld_var4()

FLA_Error FLA_Apply_G_rf_bld_var4 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_bld_var5()

FLA_Error FLA_Apply_G_rf_bld_var5 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_bld_var5b()

FLA_Error FLA_Apply_G_rf_bld_var5b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_bld_var6()

FLA_Error FLA_Apply_G_rf_bld_var6 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
133 {
134  int i;
135  int b = 0;
136 
137  for ( i = 0; i < m_A; i += b )
138  {
139  double* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
140  int m_ahead = max( 0, m_A - i );
141 
142  b = min( b_alg, m_ahead );
143 
144  //FLA_Apply_G_rf_opd_var6( k_G,
146  b,
147  n_A,
148  buff_G, rs_G, cs_G,
149  A1, rs_A, cs_A );
150  }
151 
152  return FLA_SUCCESS;
153 }
FLA_Error FLA_Apply_G_rf_asd_var6(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var6.c:394

References FLA_Apply_G_rf_asd_var6(), and i.

Referenced by FLA_Apply_G_rf_blk_var6().

◆ FLA_Apply_G_rf_bld_var6b()

FLA_Error FLA_Apply_G_rf_bld_var6b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
142 {
143  int i;
144  int b = 0;
145 
146  for ( i = 0; i < m_A; i += b )
147  {
148  double* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
149  int m_behind = i;
150  int m_ahead = max( 0, m_A - i );
151 
152  b = min( b_alg, m_ahead );
153 
154  //FLA_Apply_G_rf_opd_var6b( k_G,
156  b,
157  n_A,
158  i_k,
159  m_behind,
160  buff_G, rs_G, cs_G,
161  A1, rs_A, cs_A );
162  }
163 
164  return FLA_SUCCESS;
165 }
FLA_Error FLA_Apply_G_rf_asd_var6b(int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var6b.c:131

References FLA_Apply_G_rf_asd_var6b(), and i.

Referenced by FLA_Apply_G_rf_blk_var6b().

◆ FLA_Apply_G_rf_bld_var7()

FLA_Error FLA_Apply_G_rf_bld_var7 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_bld_var8()

FLA_Error FLA_Apply_G_rf_bld_var8 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_bld_var8b()

FLA_Error FLA_Apply_G_rf_bld_var8b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_bld_var9()

FLA_Error FLA_Apply_G_rf_bld_var9 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
133 {
134  int i;
135  int b = 0;
136 
137  for ( i = 0; i < m_A; i += b )
138  {
139  double* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
140  int m_ahead = max( 0, m_A - i );
141 
142  b = min( b_alg, m_ahead );
143 
144  //FLA_Apply_G_rf_opd_var9( k_G,
146  b,
147  n_A,
148  buff_G, rs_G, cs_G,
149  A1, rs_A, cs_A );
150  }
151 
152  return FLA_SUCCESS;
153 }
FLA_Error FLA_Apply_G_rf_asd_var9(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var9.c:380

References FLA_Apply_G_rf_asd_var9(), and i.

Referenced by FLA_Apply_G_rf_blk_var9().

◆ FLA_Apply_G_rf_bld_var9b()

FLA_Error FLA_Apply_G_rf_bld_var9b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
142 {
143  int i;
144  int b = 0;
145 
146  for ( i = 0; i < m_A; i += b )
147  {
148  double* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
149  int m_behind = i;
150  int m_ahead = max( 0, m_A - i );
151 
152  b = min( b_alg, m_ahead );
153 
154  //FLA_Apply_G_rf_opd_var9b( k_G,
156  b,
157  n_A,
158  i_k,
159  m_behind,
160  buff_G, rs_G, cs_G,
161  A1, rs_A, cs_A );
162  }
163 
164  return FLA_SUCCESS;
165 }
FLA_Error FLA_Apply_G_rf_asd_var9b(int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var9b.c:131

References FLA_Apply_G_rf_asd_var9b(), and i.

Referenced by FLA_Apply_G_rf_blk_var9b().

◆ FLA_Apply_G_rf_blk_var1()

FLA_Error FLA_Apply_G_rf_blk_var1 ( FLA_Obj  G,
FLA_Obj  A,
dim_t  b_alg 
)
14 {
15  FLA_Datatype datatype;
16  int k_G, m_A, n_A;
17  int rs_G, cs_G;
18  int rs_A, cs_A;
19 
20  datatype = FLA_Obj_datatype( A );
21 
22  k_G = FLA_Obj_width( G );
23  m_A = FLA_Obj_length( A );
24  n_A = FLA_Obj_width( A );
25 
26  rs_G = FLA_Obj_row_stride( G );
27  cs_G = FLA_Obj_col_stride( G );
28 
29  rs_A = FLA_Obj_row_stride( A );
30  cs_A = FLA_Obj_col_stride( A );
31 
32  switch ( datatype )
33  {
34  case FLA_FLOAT:
35  {
36  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
37  float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
38 
40  m_A,
41  n_A,
42  buff_G, rs_G, cs_G,
43  buff_A, rs_A, cs_A,
44  b_alg );
45 
46  break;
47  }
48 
49  case FLA_DOUBLE:
50  {
51  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
52  double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
53 
55  m_A,
56  n_A,
57  buff_G, rs_G, cs_G,
58  buff_A, rs_A, cs_A,
59  b_alg );
60 
61  break;
62  }
63 
64  case FLA_COMPLEX:
65  {
66  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
67  scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
68 
70  m_A,
71  n_A,
72  buff_G, rs_G, cs_G,
73  buff_A, rs_A, cs_A,
74  b_alg );
75 
76  break;
77  }
78 
79  case FLA_DOUBLE_COMPLEX:
80  {
81  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
82  dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
83 
85  m_A,
86  n_A,
87  buff_G, rs_G, cs_G,
88  buff_A, rs_A, cs_A,
89  b_alg );
90 
91  break;
92  }
93  }
94 
95  return FLA_SUCCESS;
96 }
FLA_Error FLA_Apply_G_rf_blz_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var1.c:183
FLA_Error FLA_Apply_G_rf_blc_var1(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var1.c:155
FLA_Error FLA_Apply_G_rf_bls_var1(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var1.c:99
FLA_Error FLA_Apply_G_rf_bld_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var1.c:127

References FLA_Apply_G_rf_blc_var1(), FLA_Apply_G_rf_bld_var1(), FLA_Apply_G_rf_bls_var1(), FLA_Apply_G_rf_blz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

◆ FLA_Apply_G_rf_blk_var2()

FLA_Error FLA_Apply_G_rf_blk_var2 ( FLA_Obj  G,
FLA_Obj  A,
dim_t  b_alg 
)
14 {
15  FLA_Datatype datatype;
16  int k_G, m_A, n_A;
17  int rs_G, cs_G;
18  int rs_A, cs_A;
19 
20  datatype = FLA_Obj_datatype( A );
21 
22  k_G = FLA_Obj_width( G );
23  m_A = FLA_Obj_length( A );
24  n_A = FLA_Obj_width( A );
25 
26  rs_G = FLA_Obj_row_stride( G );
27  cs_G = FLA_Obj_col_stride( G );
28 
29  rs_A = FLA_Obj_row_stride( A );
30  cs_A = FLA_Obj_col_stride( A );
31 
32  switch ( datatype )
33  {
34  case FLA_FLOAT:
35  {
36  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
37  float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
38 
40  m_A,
41  n_A,
42  buff_G, rs_G, cs_G,
43  buff_A, rs_A, cs_A,
44  b_alg );
45 
46  break;
47  }
48 
49  case FLA_DOUBLE:
50  {
51  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
52  double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
53 
55  m_A,
56  n_A,
57  buff_G, rs_G, cs_G,
58  buff_A, rs_A, cs_A,
59  b_alg );
60 
61  break;
62  }
63 
64  case FLA_COMPLEX:
65  {
66  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
67  scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
68 
70  m_A,
71  n_A,
72  buff_G, rs_G, cs_G,
73  buff_A, rs_A, cs_A,
74  b_alg );
75 
76  break;
77  }
78 
79  case FLA_DOUBLE_COMPLEX:
80  {
81  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
82  dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
83 
85  m_A,
86  n_A,
87  buff_G, rs_G, cs_G,
88  buff_A, rs_A, cs_A,
89  b_alg );
90 
91  break;
92  }
93  }
94 
95  return FLA_SUCCESS;
96 }
FLA_Error FLA_Apply_G_rf_blz_var2(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var2.c:183
FLA_Error FLA_Apply_G_rf_bld_var2(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var2.c:127
FLA_Error FLA_Apply_G_rf_blc_var2(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var2.c:155
FLA_Error FLA_Apply_G_rf_bls_var2(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var2.c:99

References FLA_Apply_G_rf_blc_var2(), FLA_Apply_G_rf_bld_var2(), FLA_Apply_G_rf_bls_var2(), FLA_Apply_G_rf_blz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

◆ FLA_Apply_G_rf_blk_var3()

FLA_Error FLA_Apply_G_rf_blk_var3 ( FLA_Obj  G,
FLA_Obj  A,
dim_t  b_alg 
)
14 {
15  FLA_Datatype datatype;
16  int k_G, m_A, n_A;
17  int rs_G, cs_G;
18  int rs_A, cs_A;
19 
20  datatype = FLA_Obj_datatype( A );
21 
22  k_G = FLA_Obj_width( G );
23  m_A = FLA_Obj_length( A );
24  n_A = FLA_Obj_width( A );
25 
26  rs_G = FLA_Obj_row_stride( G );
27  cs_G = FLA_Obj_col_stride( G );
28 
29  rs_A = FLA_Obj_row_stride( A );
30  cs_A = FLA_Obj_col_stride( A );
31 
32  switch ( datatype )
33  {
34  case FLA_FLOAT:
35  {
36  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
37  float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
38 
40  m_A,
41  n_A,
42  buff_G, rs_G, cs_G,
43  buff_A, rs_A, cs_A,
44  b_alg );
45 
46  break;
47  }
48 
49  case FLA_DOUBLE:
50  {
51  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
52  double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
53 
55  m_A,
56  n_A,
57  buff_G, rs_G, cs_G,
58  buff_A, rs_A, cs_A,
59  b_alg );
60 
61  break;
62  }
63 
64  case FLA_COMPLEX:
65  {
66  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
67  scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
68 
70  m_A,
71  n_A,
72  buff_G, rs_G, cs_G,
73  buff_A, rs_A, cs_A,
74  b_alg );
75 
76  break;
77  }
78 
79  case FLA_DOUBLE_COMPLEX:
80  {
81  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
82  dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
83 
85  m_A,
86  n_A,
87  buff_G, rs_G, cs_G,
88  buff_A, rs_A, cs_A,
89  b_alg );
90 
91  break;
92  }
93  }
94 
95  return FLA_SUCCESS;
96 }
FLA_Error FLA_Apply_G_rf_blz_var3(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var3.c:186
FLA_Error FLA_Apply_G_rf_bld_var3(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var3.c:128
FLA_Error FLA_Apply_G_rf_blc_var3(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var3.c:157
FLA_Error FLA_Apply_G_rf_bls_var3(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var3.c:99

References FLA_Apply_G_rf_blc_var3(), FLA_Apply_G_rf_bld_var3(), FLA_Apply_G_rf_bls_var3(), FLA_Apply_G_rf_blz_var3(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

◆ FLA_Apply_G_rf_blk_var3b()

FLA_Error FLA_Apply_G_rf_blk_var3b ( FLA_Obj  G,
FLA_Obj  A,
dim_t  b_alg 
)
14 {
15  FLA_Datatype datatype;
16  int k_G, m_A, n_A;
17  int rs_G, cs_G;
18  int rs_A, cs_A;
19 
20  datatype = FLA_Obj_datatype( A );
21 
22  k_G = FLA_Obj_width( G );
23  m_A = FLA_Obj_length( A );
24  n_A = FLA_Obj_width( A );
25 
26  rs_G = FLA_Obj_row_stride( G );
27  cs_G = FLA_Obj_col_stride( G );
28 
29  rs_A = FLA_Obj_row_stride( A );
30  cs_A = FLA_Obj_col_stride( A );
31 
32  switch ( datatype )
33  {
34  case FLA_FLOAT:
35  {
36  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
37  float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
38 
40  m_A,
41  n_A,
42  0,
43  buff_G, rs_G, cs_G,
44  buff_A, rs_A, cs_A,
45  b_alg );
46 
47  break;
48  }
49 
50  case FLA_DOUBLE:
51  {
52  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
53  double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
54 
56  m_A,
57  n_A,
58  0,
59  buff_G, rs_G, cs_G,
60  buff_A, rs_A, cs_A,
61  b_alg );
62 
63  break;
64  }
65 
66  case FLA_COMPLEX:
67  {
68  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
69  scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
70 
72  m_A,
73  n_A,
74  0,
75  buff_G, rs_G, cs_G,
76  buff_A, rs_A, cs_A,
77  b_alg );
78 
79  break;
80  }
81 
82  case FLA_DOUBLE_COMPLEX:
83  {
84  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
85  dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
86 
88  m_A,
89  n_A,
90  0,
91  buff_G, rs_G, cs_G,
92  buff_A, rs_A, cs_A,
93  b_alg );
94 
95  break;
96  }
97  }
98 
99  return FLA_SUCCESS;
100 }
FLA_Error FLA_Apply_G_rf_blc_var3b(int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var3b.c:167
FLA_Error FLA_Apply_G_rf_blz_var3b(int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var3b.c:180
FLA_Error FLA_Apply_G_rf_bls_var3b(int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var3b.c:103
FLA_Error FLA_Apply_G_rf_bld_var3b(int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var3b.c:135

References FLA_Apply_G_rf_blc_var3b(), FLA_Apply_G_rf_bld_var3b(), FLA_Apply_G_rf_bls_var3b(), FLA_Apply_G_rf_blz_var3b(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

◆ FLA_Apply_G_rf_blk_var4()

FLA_Error FLA_Apply_G_rf_blk_var4 ( FLA_Obj  G,
FLA_Obj  A,
dim_t  b_alg 
)

◆ FLA_Apply_G_rf_blk_var5()

FLA_Error FLA_Apply_G_rf_blk_var5 ( FLA_Obj  G,
FLA_Obj  A,
dim_t  b_alg 
)

◆ FLA_Apply_G_rf_blk_var5b()

FLA_Error FLA_Apply_G_rf_blk_var5b ( FLA_Obj  G,
FLA_Obj  A,
dim_t  b_alg 
)

◆ FLA_Apply_G_rf_blk_var6()

FLA_Error FLA_Apply_G_rf_blk_var6 ( FLA_Obj  G,
FLA_Obj  A,
dim_t  b_alg 
)
14 {
15  FLA_Datatype datatype;
16  int k_G, m_A, n_A;
17  int rs_G, cs_G;
18  int rs_A, cs_A;
19 
20  datatype = FLA_Obj_datatype( A );
21 
22  k_G = FLA_Obj_width( G );
23  m_A = FLA_Obj_length( A );
24  n_A = FLA_Obj_width( A );
25 
26  rs_G = FLA_Obj_row_stride( G );
27  cs_G = FLA_Obj_col_stride( G );
28 
29  rs_A = FLA_Obj_row_stride( A );
30  cs_A = FLA_Obj_col_stride( A );
31 
32  switch ( datatype )
33  {
34  case FLA_FLOAT:
35  {
36  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
37  float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
38 
40  m_A,
41  n_A,
42  buff_G, rs_G, cs_G,
43  buff_A, rs_A, cs_A,
44  b_alg );
45 
46  break;
47  }
48 
49  case FLA_DOUBLE:
50  {
51  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
52  double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
53 
55  m_A,
56  n_A,
57  buff_G, rs_G, cs_G,
58  buff_A, rs_A, cs_A,
59  b_alg );
60 
61  break;
62  }
63 
64  case FLA_COMPLEX:
65  {
66  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
67  scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
68 
70  m_A,
71  n_A,
72  buff_G, rs_G, cs_G,
73  buff_A, rs_A, cs_A,
74  b_alg );
75 
76  break;
77  }
78 
79  case FLA_DOUBLE_COMPLEX:
80  {
81  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
82  dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
83 
85  m_A,
86  n_A,
87  buff_G, rs_G, cs_G,
88  buff_A, rs_A, cs_A,
89  b_alg );
90 
91  break;
92  }
93  }
94 
95  return FLA_SUCCESS;
96 }
FLA_Error FLA_Apply_G_rf_blc_var6(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var6.c:155
FLA_Error FLA_Apply_G_rf_bld_var6(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var6.c:127
FLA_Error FLA_Apply_G_rf_bls_var6(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var6.c:99
FLA_Error FLA_Apply_G_rf_blz_var6(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var6.c:183

References FLA_Apply_G_rf_blc_var6(), FLA_Apply_G_rf_bld_var6(), FLA_Apply_G_rf_bls_var6(), FLA_Apply_G_rf_blz_var6(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

◆ FLA_Apply_G_rf_blk_var6b()

FLA_Error FLA_Apply_G_rf_blk_var6b ( FLA_Obj  G,
FLA_Obj  A,
dim_t  b_alg 
)
14 {
15  FLA_Datatype datatype;
16  int k_G, m_A, n_A;
17  int rs_G, cs_G;
18  int rs_A, cs_A;
19 
20  datatype = FLA_Obj_datatype( A );
21 
22  k_G = FLA_Obj_width( G );
23  m_A = FLA_Obj_length( A );
24  n_A = FLA_Obj_width( A );
25 
26  rs_G = FLA_Obj_row_stride( G );
27  cs_G = FLA_Obj_col_stride( G );
28 
29  rs_A = FLA_Obj_row_stride( A );
30  cs_A = FLA_Obj_col_stride( A );
31 
32  switch ( datatype )
33  {
34  case FLA_FLOAT:
35  {
36  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
37  float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
38 
40  m_A,
41  n_A,
42  0,
43  buff_G, rs_G, cs_G,
44  buff_A, rs_A, cs_A,
45  b_alg );
46 
47  break;
48  }
49 
50  case FLA_DOUBLE:
51  {
52  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
53  double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
54 
56  m_A,
57  n_A,
58  0,
59  buff_G, rs_G, cs_G,
60  buff_A, rs_A, cs_A,
61  b_alg );
62 
63  break;
64  }
65 
66  case FLA_COMPLEX:
67  {
68  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
69  scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
70 
72  m_A,
73  n_A,
74  0,
75  buff_G, rs_G, cs_G,
76  buff_A, rs_A, cs_A,
77  b_alg );
78 
79  break;
80  }
81 
82  case FLA_DOUBLE_COMPLEX:
83  {
84  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
85  dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
86 
88  m_A,
89  n_A,
90  0,
91  buff_G, rs_G, cs_G,
92  buff_A, rs_A, cs_A,
93  b_alg );
94 
95  break;
96  }
97  }
98 
99  return FLA_SUCCESS;
100 }
FLA_Error FLA_Apply_G_rf_blc_var6b(int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var6b.c:167
FLA_Error FLA_Apply_G_rf_blz_var6b(int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var6b.c:180
FLA_Error FLA_Apply_G_rf_bls_var6b(int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var6b.c:103
FLA_Error FLA_Apply_G_rf_bld_var6b(int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var6b.c:135

References FLA_Apply_G_rf_blc_var6b(), FLA_Apply_G_rf_bld_var6b(), FLA_Apply_G_rf_bls_var6b(), FLA_Apply_G_rf_blz_var6b(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

◆ FLA_Apply_G_rf_blk_var7()

FLA_Error FLA_Apply_G_rf_blk_var7 ( FLA_Obj  G,
FLA_Obj  A,
dim_t  b_alg 
)

◆ FLA_Apply_G_rf_blk_var8()

FLA_Error FLA_Apply_G_rf_blk_var8 ( FLA_Obj  G,
FLA_Obj  A,
dim_t  b_alg 
)

◆ FLA_Apply_G_rf_blk_var8b()

FLA_Error FLA_Apply_G_rf_blk_var8b ( FLA_Obj  G,
FLA_Obj  A,
dim_t  b_alg 
)

◆ FLA_Apply_G_rf_blk_var9()

FLA_Error FLA_Apply_G_rf_blk_var9 ( FLA_Obj  G,
FLA_Obj  A,
dim_t  b_alg 
)
14 {
15  FLA_Datatype datatype;
16  int k_G, m_A, n_A;
17  int rs_G, cs_G;
18  int rs_A, cs_A;
19 
20  datatype = FLA_Obj_datatype( A );
21 
22  k_G = FLA_Obj_width( G );
23  m_A = FLA_Obj_length( A );
24  n_A = FLA_Obj_width( A );
25 
26  rs_G = FLA_Obj_row_stride( G );
27  cs_G = FLA_Obj_col_stride( G );
28 
29  rs_A = FLA_Obj_row_stride( A );
30  cs_A = FLA_Obj_col_stride( A );
31 
32  switch ( datatype )
33  {
34  case FLA_FLOAT:
35  {
36  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
37  float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
38 
40  m_A,
41  n_A,
42  buff_G, rs_G, cs_G,
43  buff_A, rs_A, cs_A,
44  b_alg );
45 
46  break;
47  }
48 
49  case FLA_DOUBLE:
50  {
51  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
52  double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
53 
55  m_A,
56  n_A,
57  buff_G, rs_G, cs_G,
58  buff_A, rs_A, cs_A,
59  b_alg );
60 
61  break;
62  }
63 
64  case FLA_COMPLEX:
65  {
66  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
67  scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
68 
70  m_A,
71  n_A,
72  buff_G, rs_G, cs_G,
73  buff_A, rs_A, cs_A,
74  b_alg );
75 
76  break;
77  }
78 
79  case FLA_DOUBLE_COMPLEX:
80  {
81  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
82  dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
83 
85  m_A,
86  n_A,
87  buff_G, rs_G, cs_G,
88  buff_A, rs_A, cs_A,
89  b_alg );
90 
91  break;
92  }
93  }
94 
95  return FLA_SUCCESS;
96 }
FLA_Error FLA_Apply_G_rf_bls_var9(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var9.c:99
FLA_Error FLA_Apply_G_rf_blz_var9(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var9.c:183
FLA_Error FLA_Apply_G_rf_blc_var9(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var9.c:155
FLA_Error FLA_Apply_G_rf_bld_var9(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var9.c:127

References FLA_Apply_G_rf_blc_var9(), FLA_Apply_G_rf_bld_var9(), FLA_Apply_G_rf_bls_var9(), FLA_Apply_G_rf_blz_var9(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

◆ FLA_Apply_G_rf_blk_var9b()

FLA_Error FLA_Apply_G_rf_blk_var9b ( FLA_Obj  G,
FLA_Obj  A,
dim_t  b_alg 
)
14 {
15  FLA_Datatype datatype;
16  int k_G, m_A, n_A;
17  int rs_G, cs_G;
18  int rs_A, cs_A;
19 
20  datatype = FLA_Obj_datatype( A );
21 
22  k_G = FLA_Obj_width( G );
23  m_A = FLA_Obj_length( A );
24  n_A = FLA_Obj_width( A );
25 
26  rs_G = FLA_Obj_row_stride( G );
27  cs_G = FLA_Obj_col_stride( G );
28 
29  rs_A = FLA_Obj_row_stride( A );
30  cs_A = FLA_Obj_col_stride( A );
31 
32  switch ( datatype )
33  {
34  case FLA_FLOAT:
35  {
36  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
37  float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
38 
40  m_A,
41  n_A,
42  0,
43  buff_G, rs_G, cs_G,
44  buff_A, rs_A, cs_A,
45  b_alg );
46 
47  break;
48  }
49 
50  case FLA_DOUBLE:
51  {
52  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
53  double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
54 
56  m_A,
57  n_A,
58  0,
59  buff_G, rs_G, cs_G,
60  buff_A, rs_A, cs_A,
61  b_alg );
62 
63  break;
64  }
65 
66  case FLA_COMPLEX:
67  {
68  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
69  scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
70 
72  m_A,
73  n_A,
74  0,
75  buff_G, rs_G, cs_G,
76  buff_A, rs_A, cs_A,
77  b_alg );
78 
79  break;
80  }
81 
82  case FLA_DOUBLE_COMPLEX:
83  {
84  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
85  dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
86 
88  m_A,
89  n_A,
90  0,
91  buff_G, rs_G, cs_G,
92  buff_A, rs_A, cs_A,
93  b_alg );
94 
95  break;
96  }
97  }
98 
99  return FLA_SUCCESS;
100 }
FLA_Error FLA_Apply_G_rf_bld_var9b(int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var9b.c:135
FLA_Error FLA_Apply_G_rf_blc_var9b(int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var9b.c:167
FLA_Error FLA_Apply_G_rf_bls_var9b(int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var9b.c:103
FLA_Error FLA_Apply_G_rf_blz_var9b(int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg)
Definition: FLA_Apply_G_rf_blk_var9b.c:180

References FLA_Apply_G_rf_blc_var9b(), FLA_Apply_G_rf_bld_var9b(), FLA_Apply_G_rf_bls_var9b(), FLA_Apply_G_rf_blz_var9b(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

◆ FLA_Apply_G_rf_bls_var1()

FLA_Error FLA_Apply_G_rf_bls_var1 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
105 {
106  int i;
107  int b = 0;
108 
109  for ( i = 0; i < m_A; i += b )
110  {
111  float* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
112  int m_ahead = max( 0, m_A - i );
113 
114  b = min( b_alg, m_ahead );
115 
116  //FLA_Apply_G_rf_ops_var1( k_G,
118  b,
119  n_A,
120  buff_G, rs_G, cs_G,
121  A1, rs_A, cs_A );
122  }
123 
124  return FLA_SUCCESS;
125 }

References FLA_Apply_G_rf_ass_var1(), and i.

Referenced by FLA_Apply_G_rf_blk_var1().

◆ FLA_Apply_G_rf_bls_var2()

FLA_Error FLA_Apply_G_rf_bls_var2 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
105 {
106  int i;
107  int b = 0;
108 
109  for ( i = 0; i < m_A; i += b )
110  {
111  float* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
112  int m_ahead = max( 0, m_A - i );
113 
114  b = min( b_alg, m_ahead );
115 
116  //FLA_Apply_G_rf_ops_var2( k_G,
118  b,
119  n_A,
120  buff_G, rs_G, cs_G,
121  A1, rs_A, cs_A );
122  }
123 
124  return FLA_SUCCESS;
125 }
FLA_Error FLA_Apply_G_rf_ass_var2(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var2.c:113

References FLA_Apply_G_rf_ass_var2(), and i.

Referenced by FLA_Apply_G_rf_blk_var2().

◆ FLA_Apply_G_rf_bls_var3()

FLA_Error FLA_Apply_G_rf_bls_var3 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
105 {
106  int i;
107  int b = 0;
108 
109  for ( i = 0; i < m_A; i += b )
110  {
111  float* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
112  int m_ahead = max( 0, m_A - i );
113 
114  b = min( b_alg, m_ahead );
115 
116  // ass_var3 does not support arbitrary strides: TODO
118  //FLA_Apply_G_rf_ass_var3( k_G,
119  b,
120  n_A,
121  buff_G, rs_G, cs_G,
122  A1, rs_A, cs_A );
123  }
124 
125  return FLA_SUCCESS;
126 }
FLA_Error FLA_Apply_G_rf_ops_var3(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var3.c:112

References FLA_Apply_G_rf_ops_var3(), and i.

Referenced by FLA_Apply_G_lf_blk_var3(), FLA_Apply_G_rf_blk_var3(), FLA_Bsvd_ext_ops_var1(), and FLA_Bsvd_v_ops_var1().

◆ FLA_Apply_G_rf_bls_var3b()

FLA_Error FLA_Apply_G_rf_bls_var3b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
110 {
111  int i;
112  int b = 0;
113 
114  for ( i = 0; i < m_A; i += b )
115  {
116  float* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
117  int m_behind = i;
118  int m_ahead = max( 0, m_A - i );
119 
120  b = min( b_alg, m_ahead );
121 
122  //FLA_Apply_G_rf_ops_var3b( k_G,
124  b,
125  n_A,
126  i_k,
127  m_behind,
128  buff_G, rs_G, cs_G,
129  A1, rs_A, cs_A );
130  }
131 
132  return FLA_SUCCESS;
133 }
FLA_Error FLA_Apply_G_rf_ass_var3b(int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var3b.c:118

References FLA_Apply_G_rf_ass_var3b(), and i.

Referenced by FLA_Apply_G_rf_blk_var3b().

◆ FLA_Apply_G_rf_bls_var4()

FLA_Error FLA_Apply_G_rf_bls_var4 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_bls_var5()

FLA_Error FLA_Apply_G_rf_bls_var5 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_bls_var5b()

FLA_Error FLA_Apply_G_rf_bls_var5b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_bls_var6()

FLA_Error FLA_Apply_G_rf_bls_var6 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
105 {
106  int i;
107  int b = 0;
108 
109  for ( i = 0; i < m_A; i += b )
110  {
111  float* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
112  int m_ahead = max( 0, m_A - i );
113 
114  b = min( b_alg, m_ahead );
115 
116  //FLA_Apply_G_rf_ops_var6( k_G,
118  b,
119  n_A,
120  buff_G, rs_G, cs_G,
121  A1, rs_A, cs_A );
122  }
123 
124  return FLA_SUCCESS;
125 }
FLA_Error FLA_Apply_G_rf_ass_var6(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var6.c:113

References FLA_Apply_G_rf_ass_var6(), and i.

Referenced by FLA_Apply_G_rf_blk_var6().

◆ FLA_Apply_G_rf_bls_var6b()

FLA_Error FLA_Apply_G_rf_bls_var6b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
110 {
111  int i;
112  int b = 0;
113 
114  for ( i = 0; i < m_A; i += b )
115  {
116  float* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
117  int m_behind = i;
118  int m_ahead = max( 0, m_A - i );
119 
120  b = min( b_alg, m_ahead );
121 
122  //FLA_Apply_G_rf_ops_var6b( k_G,
124  b,
125  n_A,
126  i_k,
127  m_behind,
128  buff_G, rs_G, cs_G,
129  A1, rs_A, cs_A );
130  }
131 
132  return FLA_SUCCESS;
133 }
FLA_Error FLA_Apply_G_rf_ass_var6b(int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var6b.c:118

References FLA_Apply_G_rf_ass_var6b(), and i.

Referenced by FLA_Apply_G_rf_blk_var6b().

◆ FLA_Apply_G_rf_bls_var7()

FLA_Error FLA_Apply_G_rf_bls_var7 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_bls_var8()

FLA_Error FLA_Apply_G_rf_bls_var8 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_bls_var8b()

FLA_Error FLA_Apply_G_rf_bls_var8b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_bls_var9()

FLA_Error FLA_Apply_G_rf_bls_var9 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
105 {
106  int i;
107  int b = 0;
108 
109  for ( i = 0; i < m_A; i += b )
110  {
111  float* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
112  int m_ahead = max( 0, m_A - i );
113 
114  b = min( b_alg, m_ahead );
115 
116  //FLA_Apply_G_rf_ops_var9( k_G,
118  b,
119  n_A,
120  buff_G, rs_G, cs_G,
121  A1, rs_A, cs_A );
122  }
123 
124  return FLA_SUCCESS;
125 }
FLA_Error FLA_Apply_G_rf_ass_var9(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var9.c:113

References FLA_Apply_G_rf_ass_var9(), and i.

Referenced by FLA_Apply_G_rf_blk_var9().

◆ FLA_Apply_G_rf_bls_var9b()

FLA_Error FLA_Apply_G_rf_bls_var9b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
110 {
111  int i;
112  int b = 0;
113 
114  for ( i = 0; i < m_A; i += b )
115  {
116  float* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
117  int m_behind = i;
118  int m_ahead = max( 0, m_A - i );
119 
120  b = min( b_alg, m_ahead );
121 
122  //FLA_Apply_G_rf_ops_var9b( k_G,
124  b,
125  n_A,
126  i_k,
127  m_behind,
128  buff_G, rs_G, cs_G,
129  A1, rs_A, cs_A );
130  }
131 
132  return FLA_SUCCESS;
133 }
FLA_Error FLA_Apply_G_rf_ass_var9b(int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var9b.c:118

References FLA_Apply_G_rf_ass_var9b(), and i.

Referenced by FLA_Apply_G_rf_blk_var9b().

◆ FLA_Apply_G_rf_blz_var1()

FLA_Error FLA_Apply_G_rf_blz_var1 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
189 {
190  int i;
191  int b = 0;
192 
193  for ( i = 0; i < m_A; i += b )
194  {
195  dcomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
196  int m_ahead = max( 0, m_A - i );
197 
198  b = min( b_alg, m_ahead );
199 
200  //FLA_Apply_G_rf_opz_var1( k_G,
202  b,
203  n_A,
204  buff_G, rs_G, cs_G,
205  A1, rs_A, cs_A );
206  }
207 
208  return FLA_SUCCESS;
209 }

References FLA_Apply_G_rf_asz_var1(), and i.

Referenced by FLA_Apply_G_rf_blk_var1().

◆ FLA_Apply_G_rf_blz_var2()

FLA_Error FLA_Apply_G_rf_blz_var2 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
189 {
190  int i;
191  int b = 0;
192 
193  for ( i = 0; i < m_A; i += b )
194  {
195  dcomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
196  int m_ahead = max( 0, m_A - i );
197 
198  b = min( b_alg, m_ahead );
199 
200  //FLA_Apply_G_rf_opz_var2( k_G,
202  b,
203  n_A,
204  buff_G, rs_G, cs_G,
205  A1, rs_A, cs_A );
206  }
207 
208  return FLA_SUCCESS;
209 }
FLA_Error FLA_Apply_G_rf_asz_var2(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var2.c:452

References FLA_Apply_G_rf_asz_var2(), and i.

Referenced by FLA_Apply_G_rf_blk_var2().

◆ FLA_Apply_G_rf_blz_var3()

FLA_Error FLA_Apply_G_rf_blz_var3 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
192 {
193  int i;
194  int b = 0;
195 
196  for ( i = 0; i < m_A; i += b )
197  {
198  dcomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
199  int m_ahead = max( 0, m_A - i );
200 
201  b = min( b_alg, m_ahead );
202 
203  // ass_var3 does not support arbitrary strides: TODO
205  //FLA_Apply_G_rf_asz_var3( k_G,
206  b,
207  n_A,
208  buff_G, rs_G, cs_G,
209  A1, rs_A, cs_A );
210  }
211 
212  return FLA_SUCCESS;
213 }
FLA_Error FLA_Apply_G_rf_opz_var3(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var3.c:1471

References FLA_Apply_G_rf_opz_var3(), and i.

Referenced by FLA_Apply_G_lf_blk_var3(), FLA_Apply_G_rf_blk_var3(), FLA_Bsvd_ext_opz_var1(), FLA_Bsvd_v_opz_var1(), and FLA_Tevd_v_opz_var1().

◆ FLA_Apply_G_rf_blz_var3b()

FLA_Error FLA_Apply_G_rf_blz_var3b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
187 {
188  FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
189 
190  return FLA_SUCCESS;
191 }

Referenced by FLA_Apply_G_rf_blk_var3b().

◆ FLA_Apply_G_rf_blz_var4()

FLA_Error FLA_Apply_G_rf_blz_var4 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_blz_var5()

FLA_Error FLA_Apply_G_rf_blz_var5 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_blz_var5b()

FLA_Error FLA_Apply_G_rf_blz_var5b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_blz_var6()

FLA_Error FLA_Apply_G_rf_blz_var6 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
189 {
190  int i;
191  int b = 0;
192 
193  for ( i = 0; i < m_A; i += b )
194  {
195  dcomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
196  int m_ahead = max( 0, m_A - i );
197 
198  b = min( b_alg, m_ahead );
199 
200  //FLA_Apply_G_rf_opz_var6( k_G,
202  b,
203  n_A,
204  buff_G, rs_G, cs_G,
205  A1, rs_A, cs_A );
206  }
207 
208  return FLA_SUCCESS;
209 }
FLA_Error FLA_Apply_G_rf_asz_var6(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var6.c:956

References FLA_Apply_G_rf_asz_var6(), and i.

Referenced by FLA_Apply_G_rf_blk_var6().

◆ FLA_Apply_G_rf_blz_var6b()

FLA_Error FLA_Apply_G_rf_blz_var6b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
187 {
188  FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
189 
190  return FLA_SUCCESS;
191 }

Referenced by FLA_Apply_G_rf_blk_var6b().

◆ FLA_Apply_G_rf_blz_var7()

FLA_Error FLA_Apply_G_rf_blz_var7 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_blz_var8()

FLA_Error FLA_Apply_G_rf_blz_var8 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_blz_var8b()

FLA_Error FLA_Apply_G_rf_blz_var8b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)

◆ FLA_Apply_G_rf_blz_var9()

FLA_Error FLA_Apply_G_rf_blz_var9 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
189 {
190  int i;
191  int b = 0;
192 
193  for ( i = 0; i < m_A; i += b )
194  {
195  dcomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A;
196  int m_ahead = max( 0, m_A - i );
197 
198  b = min( b_alg, m_ahead );
199 
200  //FLA_Apply_G_rf_opz_var9( k_G,
202  b,
203  n_A,
204  buff_G, rs_G, cs_G,
205  A1, rs_A, cs_A );
206  }
207 
208  return FLA_SUCCESS;
209 }
FLA_Error FLA_Apply_G_rf_asz_var9(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_asm_var9.c:914

References FLA_Apply_G_rf_asz_var9(), and i.

Referenced by FLA_Apply_G_rf_blk_var9().

◆ FLA_Apply_G_rf_blz_var9b()

FLA_Error FLA_Apply_G_rf_blz_var9b ( int  k_G,
int  m_A,
int  n_A,
int  i_k,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A,
int  b_alg 
)
187 {
188  FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
189 
190  return FLA_SUCCESS;
191 }

Referenced by FLA_Apply_G_rf_blk_var9b().

◆ FLA_Apply_G_rf_opc_var1()

FLA_Error FLA_Apply_G_rf_opc_var1 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
220 {
221  float one = bl1_s1();
222  float zero = bl1_s0();
223  int nG_app = n_A - 1;
224  int l, j;
225  float gamma;
226  float sigma;
227  scomplex* a1;
228  scomplex* a2;
229  scomplex* g1;
230  scomplex* g11;
231 
232  g1 = buff_G;
233 
234  for ( l = 0; l < k_G; ++l )
235  {
236  a1 = buff_A;
237  a2 = buff_A + cs_A;
238  g11 = g1;
239 
240  for ( j = 0; j < nG_app; ++j )
241  {
242  gamma = g11->real;
243  sigma = g11->imag;
244 
245  // Skip the current iteration if the rotation is identity.
246  if ( gamma != one || sigma != zero )
247  {
248  MAC_Apply_G_mx2_opc( m_A,
249  &gamma,
250  &sigma,
251  a1, rs_A,
252  a2, rs_A );
253  }
254 
255  a1 += cs_A;
256  a2 += cs_A;
257  g11 += rs_G;
258  }
259 
260  g1 += cs_G;
261  }
262 
263  return FLA_SUCCESS;
264 }

References bl1_s0(), bl1_s1(), scomplex::imag, and scomplex::real.

Referenced by FLA_Apply_G_lf_opt_var1(), FLA_Apply_G_rf_opc_var2(), FLA_Apply_G_rf_opc_var3(), FLA_Apply_G_rf_opc_var6(), FLA_Apply_G_rf_opc_var9(), and FLA_Apply_G_rf_opt_var1().

◆ FLA_Apply_G_rf_opc_var2()

FLA_Error FLA_Apply_G_rf_opc_var2 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
343 {
344  float one = bl1_s1();
345  float zero = bl1_s0();
346  float gamma;
347  float sigma;
348  scomplex* a1;
349  scomplex* a2;
350  scomplex* g11;
351  int j, g, k;
352  int nG, nG_app;
353  int k_minus_1;
354 
355  k_minus_1 = k_G - 1;
356  nG = n_A - 1;
357 
358  // Use the simple variant for nG < 2(k - 1).
359  if ( nG < k_minus_1 || k_G == 1 )
360  {
362  m_A,
363  n_A,
364  buff_G, rs_G, cs_G,
365  buff_A, rs_A, cs_A );
366  return FLA_SUCCESS;
367  }
368 
369 
370  // Start-up phase.
371 
372  for ( j = 0; j < k_minus_1; ++j )
373  {
374  nG_app = j + 1;
375 
376  for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g )
377  {
378  g11 = buff_G + (g )*rs_G + (k )*cs_G;
379  a1 = buff_A + (g )*cs_A;
380  a2 = buff_A + (g + 1)*cs_A;
381 
382  gamma = g11->real;
383  sigma = g11->imag;
384 
385  // Skip the current iteration if the rotation is identity.
386  if ( gamma == one && sigma == zero ) continue;
387 
388  MAC_Apply_G_mx2_opc( m_A,
389  &gamma,
390  &sigma,
391  a1, rs_A,
392  a2, rs_A );
393  }
394  }
395 
396  // Pipeline stage
397 
398  for ( j = k_minus_1; j < nG; ++j )
399  {
400  nG_app = k_G;
401 
402  for ( k = 0, g = j; k < nG_app; ++k, --g )
403  {
404  g11 = buff_G + (g )*rs_G + (k )*cs_G;
405  a1 = buff_A + (g )*cs_A;
406  a2 = buff_A + (g + 1)*cs_A;
407 
408  gamma = g11->real;
409  sigma = g11->imag;
410 
411  // Skip the current iteration if the rotation is identity.
412  if ( gamma == one && sigma == zero ) continue;
413 
414  MAC_Apply_G_mx2_opc( m_A,
415  &gamma,
416  &sigma,
417  a1, rs_A,
418  a2, rs_A );
419  }
420  }
421 
422  // Shutdown stage
423 
424  for ( j = nG - k_minus_1; j < nG; ++j )
425  {
426  nG_app = nG - j;
427 
428  for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g )
429  {
430  g11 = buff_G + (g )*rs_G + (k )*cs_G;
431  a1 = buff_A + (g )*cs_A;
432  a2 = buff_A + (g + 1)*cs_A;
433 
434  gamma = g11->real;
435  sigma = g11->imag;
436 
437  // Skip the current iteration if the rotation is identity.
438  if ( gamma == one && sigma == zero ) continue;
439 
440  MAC_Apply_G_mx2_opc( m_A,
441  &gamma,
442  &sigma,
443  a1, rs_A,
444  a2, rs_A );
445  }
446  }
447 
448  return FLA_SUCCESS;
449 }
FLA_Error FLA_Apply_G_rf_opc_var1(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var1.c:215

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_opc_var1(), scomplex::imag, and scomplex::real.

Referenced by FLA_Apply_G_rf_opt_var2().

◆ FLA_Apply_G_rf_opc_var3()

FLA_Error FLA_Apply_G_rf_opc_var3 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
1023 {
1024  float one = bl1_s1();
1025  float zero = bl1_s0();
1026  float gamma23_k1;
1027  float sigma23_k1;
1028  float gamma34_k1;
1029  float sigma34_k1;
1030  float gamma12_k2;
1031  float sigma12_k2;
1032  float gamma23_k2;
1033  float sigma23_k2;
1034  scomplex* a1;
1035  scomplex* a2;
1036  scomplex* a3;
1037  scomplex* a4;
1038  scomplex* g23_k1;
1039  scomplex* g34_k1;
1040  scomplex* g12_k2;
1041  scomplex* g23_k2;
1042  int i, j, g, k;
1043  int nG, nG_app;
1044  int n_iter;
1045  int n_left;
1046  int k_minus_1;
1047  int n_fuse;
1048  int k_fuse;
1049  int is_ident23_k1, is_ident34_k1;
1050  int is_ident12_k2, is_ident23_k2;
1051  int has_ident;
1052 
1053  k_minus_1 = k_G - 1;
1054  nG = n_A - 1;
1055  n_fuse = 2;
1056  k_fuse = 2;
1057 
1058  // Use the simple variant for nG < (k - 1) or k == 1.
1059  if ( nG < 2*k_minus_1 || k_G == 1 )
1060  {
1062  m_A,
1063  n_A,
1064  buff_G, rs_G, cs_G,
1065  buff_A, rs_A, cs_A );
1066  return FLA_SUCCESS;
1067  }
1068 
1069 
1070  // Start-up phase.
1071 
1072  for ( j = -1; j < k_minus_1; j += n_fuse )
1073  {
1074  nG_app = j + 2;
1075  n_iter = nG_app / k_fuse;
1076  n_left = 1;
1077 
1078  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1079  {
1080  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1081  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1082  g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1083  g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1084  a1 = buff_A + (g - 1)*cs_A;
1085  a2 = buff_A + (g )*cs_A;
1086  a3 = buff_A + (g + 1)*cs_A;
1087  a4 = buff_A + (g + 2)*cs_A;
1088 
1089  gamma23_k1 = g23_k1->real;
1090  sigma23_k1 = g23_k1->imag;
1091  gamma34_k1 = g34_k1->real;
1092  sigma34_k1 = g34_k1->imag;
1093  gamma12_k2 = g12_k2->real;
1094  sigma12_k2 = g12_k2->imag;
1095  gamma23_k2 = g23_k2->real;
1096  sigma23_k2 = g23_k2->imag;
1097 
1098  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
1099  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
1100  is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
1101  is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
1102  has_ident = ( is_ident23_k1 || is_ident34_k1 ||
1103  is_ident12_k2 || is_ident23_k2 );
1104 
1105  if ( has_ident )
1106  {
1107  // Apply to pairs of columns as needed.
1108 
1109  if ( !is_ident23_k1 )
1110  MAC_Apply_G_mx2_opc( m_A,
1111  &gamma23_k1,
1112  &sigma23_k1,
1113  a2, rs_A,
1114  a3, rs_A );
1115 
1116  if ( !is_ident34_k1 )
1117  MAC_Apply_G_mx2_opc( m_A,
1118  &gamma34_k1,
1119  &sigma34_k1,
1120  a3, rs_A,
1121  a4, rs_A );
1122 
1123  if ( !is_ident12_k2 )
1124  MAC_Apply_G_mx2_opc( m_A,
1125  &gamma12_k2,
1126  &sigma12_k2,
1127  a1, rs_A,
1128  a2, rs_A );
1129 
1130  if ( !is_ident23_k2 )
1131  MAC_Apply_G_mx2_opc( m_A,
1132  &gamma23_k2,
1133  &sigma23_k2,
1134  a2, rs_A,
1135  a3, rs_A );
1136  }
1137  else
1138  {
1139  // Apply to all four columns.
1140 
1141  MAC_Apply_G_mx4s_opc( m_A,
1142  &gamma23_k1,
1143  &sigma23_k1,
1144  &gamma34_k1,
1145  &sigma34_k1,
1146  &gamma12_k2,
1147  &sigma12_k2,
1148  &gamma23_k2,
1149  &sigma23_k2,
1150  a1, rs_A,
1151  a2, rs_A,
1152  a3, rs_A,
1153  a4, rs_A );
1154  }
1155  }
1156 
1157  if ( n_left == 1 )
1158  {
1159  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1160  a3 = buff_A + (g + 1)*cs_A;
1161  a4 = buff_A + (g + 2)*cs_A;
1162 
1163  gamma34_k1 = g34_k1->real;
1164  sigma34_k1 = g34_k1->imag;
1165 
1166  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
1167 
1168  if ( !is_ident34_k1 )
1169  MAC_Apply_G_mx2_opc( m_A,
1170  &gamma34_k1,
1171  &sigma34_k1,
1172  a3, rs_A,
1173  a4, rs_A );
1174  }
1175  }
1176 
1177  // Pipeline stage
1178 
1179  for ( ; j < nG - 1; j += n_fuse )
1180  {
1181  nG_app = k_G;
1182  n_iter = nG_app / k_fuse;
1183  n_left = nG_app % k_fuse;
1184 
1185  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1186  {
1187  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1188  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1189  g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1190  g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1191  a1 = buff_A + (g - 1)*cs_A;
1192  a2 = buff_A + (g )*cs_A;
1193  a3 = buff_A + (g + 1)*cs_A;
1194  a4 = buff_A + (g + 2)*cs_A;
1195 
1196  gamma23_k1 = g23_k1->real;
1197  sigma23_k1 = g23_k1->imag;
1198  gamma34_k1 = g34_k1->real;
1199  sigma34_k1 = g34_k1->imag;
1200  gamma12_k2 = g12_k2->real;
1201  sigma12_k2 = g12_k2->imag;
1202  gamma23_k2 = g23_k2->real;
1203  sigma23_k2 = g23_k2->imag;
1204 
1205  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
1206  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
1207  is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
1208  is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
1209  has_ident = ( is_ident23_k1 || is_ident34_k1 ||
1210  is_ident12_k2 || is_ident23_k2 );
1211 
1212  if ( has_ident )
1213  {
1214  // Apply to pairs of columns as needed.
1215 
1216  if ( !is_ident23_k1 )
1217  MAC_Apply_G_mx2_opc( m_A,
1218  &gamma23_k1,
1219  &sigma23_k1,
1220  a2, rs_A,
1221  a3, rs_A );
1222 
1223  if ( !is_ident34_k1 )
1224  MAC_Apply_G_mx2_opc( m_A,
1225  &gamma34_k1,
1226  &sigma34_k1,
1227  a3, rs_A,
1228  a4, rs_A );
1229 
1230  if ( !is_ident12_k2 )
1231  MAC_Apply_G_mx2_opc( m_A,
1232  &gamma12_k2,
1233  &sigma12_k2,
1234  a1, rs_A,
1235  a2, rs_A );
1236 
1237  if ( !is_ident23_k2 )
1238  MAC_Apply_G_mx2_opc( m_A,
1239  &gamma23_k2,
1240  &sigma23_k2,
1241  a2, rs_A,
1242  a3, rs_A );
1243  }
1244  else
1245  {
1246  // Apply to all four columns.
1247 
1248  MAC_Apply_G_mx4s_opc( m_A,
1249  &gamma23_k1,
1250  &sigma23_k1,
1251  &gamma34_k1,
1252  &sigma34_k1,
1253  &gamma12_k2,
1254  &sigma12_k2,
1255  &gamma23_k2,
1256  &sigma23_k2,
1257  a1, rs_A,
1258  a2, rs_A,
1259  a3, rs_A,
1260  a4, rs_A );
1261  }
1262  }
1263 
1264  if ( n_left == 1 )
1265  {
1266  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1267  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1268  a2 = buff_A + (g )*cs_A;
1269  a3 = buff_A + (g + 1)*cs_A;
1270  a4 = buff_A + (g + 2)*cs_A;
1271 
1272  gamma23_k1 = g23_k1->real;
1273  sigma23_k1 = g23_k1->imag;
1274  gamma34_k1 = g34_k1->real;
1275  sigma34_k1 = g34_k1->imag;
1276 
1277  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
1278  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
1279 
1280  if ( !is_ident23_k1 && is_ident34_k1 )
1281  {
1282  MAC_Apply_G_mx2_opc( m_A,
1283  &gamma23_k1,
1284  &sigma23_k1,
1285  a2, rs_A,
1286  a3, rs_A );
1287  }
1288  else if ( is_ident23_k1 && !is_ident34_k1 )
1289  {
1290  MAC_Apply_G_mx2_opc( m_A,
1291  &gamma34_k1,
1292  &sigma34_k1,
1293  a3, rs_A,
1294  a4, rs_A );
1295  }
1296  else
1297  {
1298  MAC_Apply_G_mx3_opc( m_A,
1299  &gamma23_k1,
1300  &sigma23_k1,
1301  &gamma34_k1,
1302  &sigma34_k1,
1303  a2, rs_A,
1304  a3, rs_A,
1305  a4, rs_A );
1306  }
1307  }
1308  }
1309 
1310  // Shutdown stage
1311 
1312  for ( j = nG % n_fuse; j < k_G; j += n_fuse )
1313  {
1314  g = nG - 1;
1315  k = j;
1316 
1317  //n_left = 1;
1318  //if ( n_left == 1 )
1319  {
1320  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1321  a2 = buff_A + (g )*cs_A;
1322  a3 = buff_A + (g + 1)*cs_A;
1323 
1324  gamma23_k1 = g23_k1->real;
1325  sigma23_k1 = g23_k1->imag;
1326 
1327  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
1328 
1329  if ( !is_ident23_k1 )
1330  MAC_Apply_G_mx2_opc( m_A,
1331  &gamma23_k1,
1332  &sigma23_k1,
1333  a2, rs_A,
1334  a3, rs_A );
1335  ++k;
1336  --g;
1337  }
1338 
1339  nG_app = k_minus_1 - j;
1340  n_iter = nG_app / k_fuse;
1341  n_left = nG_app % k_fuse;
1342 
1343  for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1344  {
1345  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1346  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1347  g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1348  g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1349  a1 = buff_A + (g - 1)*cs_A;
1350  a2 = buff_A + (g )*cs_A;
1351  a3 = buff_A + (g + 1)*cs_A;
1352  a4 = buff_A + (g + 2)*cs_A;
1353 
1354  gamma23_k1 = g23_k1->real;
1355  sigma23_k1 = g23_k1->imag;
1356  gamma34_k1 = g34_k1->real;
1357  sigma34_k1 = g34_k1->imag;
1358  gamma12_k2 = g12_k2->real;
1359  sigma12_k2 = g12_k2->imag;
1360  gamma23_k2 = g23_k2->real;
1361  sigma23_k2 = g23_k2->imag;
1362 
1363  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
1364  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
1365  is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
1366  is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
1367  has_ident = ( is_ident23_k1 || is_ident34_k1 ||
1368  is_ident12_k2 || is_ident23_k2 );
1369 
1370  if ( has_ident )
1371  {
1372  // Apply to pairs of columns as needed.
1373 
1374  if ( !is_ident23_k1 )
1375  MAC_Apply_G_mx2_opc( m_A,
1376  &gamma23_k1,
1377  &sigma23_k1,
1378  a2, rs_A,
1379  a3, rs_A );
1380 
1381  if ( !is_ident34_k1 )
1382  MAC_Apply_G_mx2_opc( m_A,
1383  &gamma34_k1,
1384  &sigma34_k1,
1385  a3, rs_A,
1386  a4, rs_A );
1387 
1388  if ( !is_ident12_k2 )
1389  MAC_Apply_G_mx2_opc( m_A,
1390  &gamma12_k2,
1391  &sigma12_k2,
1392  a1, rs_A,
1393  a2, rs_A );
1394 
1395  if ( !is_ident23_k2 )
1396  MAC_Apply_G_mx2_opc( m_A,
1397  &gamma23_k2,
1398  &sigma23_k2,
1399  a2, rs_A,
1400  a3, rs_A );
1401  }
1402  else
1403  {
1404  // Apply to all four columns.
1405 
1406  MAC_Apply_G_mx4s_opc( m_A,
1407  &gamma23_k1,
1408  &sigma23_k1,
1409  &gamma34_k1,
1410  &sigma34_k1,
1411  &gamma12_k2,
1412  &sigma12_k2,
1413  &gamma23_k2,
1414  &sigma23_k2,
1415  a1, rs_A,
1416  a2, rs_A,
1417  a3, rs_A,
1418  a4, rs_A );
1419  }
1420  }
1421 
1422  if ( n_left == 1 )
1423  {
1424  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1425  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1426  a2 = buff_A + (g )*cs_A;
1427  a3 = buff_A + (g + 1)*cs_A;
1428  a4 = buff_A + (g + 2)*cs_A;
1429 
1430  gamma23_k1 = g23_k1->real;
1431  sigma23_k1 = g23_k1->imag;
1432  gamma34_k1 = g34_k1->real;
1433  sigma34_k1 = g34_k1->imag;
1434 
1435  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
1436  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
1437 
1438  if ( !is_ident23_k1 && is_ident34_k1 )
1439  {
1440  MAC_Apply_G_mx2_opc( m_A,
1441  &gamma23_k1,
1442  &sigma23_k1,
1443  a2, rs_A,
1444  a3, rs_A );
1445  }
1446  else if ( is_ident23_k1 && !is_ident34_k1 )
1447  {
1448  MAC_Apply_G_mx2_opc( m_A,
1449  &gamma34_k1,
1450  &sigma34_k1,
1451  a3, rs_A,
1452  a4, rs_A );
1453  }
1454  else
1455  {
1456  MAC_Apply_G_mx3_opc( m_A,
1457  &gamma23_k1,
1458  &sigma23_k1,
1459  &gamma34_k1,
1460  &sigma34_k1,
1461  a2, rs_A,
1462  a3, rs_A,
1463  a4, rs_A );
1464  }
1465  }
1466  }
1467 
1468  return FLA_SUCCESS;
1469 }

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_opc_var1(), i, scomplex::imag, n_left, and scomplex::real.

Referenced by FLA_Apply_G_rf_blc_var3(), and FLA_Apply_G_rf_opt_var3().

◆ FLA_Apply_G_rf_opc_var4()

FLA_Error FLA_Apply_G_rf_opc_var4 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_opc_var5()

FLA_Error FLA_Apply_G_rf_opc_var5 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_opc_var6()

FLA_Error FLA_Apply_G_rf_opc_var6 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
679 {
680  float one = bl1_s1();
681  float zero = bl1_s0();
682  float gamma12;
683  float sigma12;
684  float gamma23;
685  float sigma23;
686  scomplex* a1;
687  scomplex* a2;
688  scomplex* a3;
689  scomplex* g12;
690  scomplex* g23;
691  int i, j, g, k;
692  int nG, nG_app;
693  int n_iter;
694  int n_left;
695  int k_minus_1;
696  int n_fuse;
697  int is_ident12, is_ident23;
698 
699  k_minus_1 = k_G - 1;
700  nG = n_A - 1;
701  n_fuse = 2;
702 
703  // Use the simple variant for nG < (k - 1) or k == 1.
704  if ( nG < k_minus_1 || k_G == 1 )
705  {
707  m_A,
708  n_A,
709  buff_G, rs_G, cs_G,
710  buff_A, rs_A, cs_A );
711  return FLA_SUCCESS;
712  }
713 
714 
715  // Start-up phase.
716 
717  for ( j = 0; j < k_minus_1; ++j )
718  {
719  nG_app = j + 1;
720  n_iter = nG_app / n_fuse;
721  n_left = nG_app % n_fuse;
722 
723  for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
724  {
725  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
726  g23 = buff_G + (g )*rs_G + (k )*cs_G;
727  a1 = buff_A + (g - 1)*cs_A;
728  a2 = buff_A + (g )*cs_A;
729  a3 = buff_A + (g + 1)*cs_A;
730 
731  gamma12 = g12->real;
732  sigma12 = g12->imag;
733  gamma23 = g23->real;
734  sigma23 = g23->imag;
735 
736  is_ident12 = ( gamma12 == one && sigma12 == zero );
737  is_ident23 = ( gamma23 == one && sigma23 == zero );
738 
739  if ( !is_ident12 && is_ident23 )
740  {
741  // Apply only to columns 1 and 2.
742 
743  MAC_Apply_G_mx2_opc( m_A,
744  &gamma12,
745  &sigma12,
746  a1, rs_A,
747  a2, rs_A );
748  }
749  else if ( is_ident12 && !is_ident23 )
750  {
751  // Apply only to columns 2 and 3.
752 
753  MAC_Apply_G_mx2_opc( m_A,
754  &gamma23,
755  &sigma23,
756  a2, rs_A,
757  a3, rs_A );
758  }
759  else if ( !is_ident12 && !is_ident23 )
760  {
761  // Apply to all three columns.
762 
763  MAC_Apply_G_mx3b_opc( m_A,
764  &gamma12,
765  &sigma12,
766  &gamma23,
767  &sigma23,
768  a1, rs_A,
769  a2, rs_A,
770  a3, rs_A );
771  }
772  }
773  //for ( k = 0; k < n_left; k += 1, g -= 1 )
774  if ( n_left == 1 )
775  {
776  g23 = buff_G + (g )*rs_G + (k )*cs_G;
777  a2 = buff_A + (g )*cs_A;
778  a3 = buff_A + (g + 1)*cs_A;
779 
780  gamma23 = g23->real;
781  sigma23 = g23->imag;
782 
783  is_ident23 = ( gamma23 == one && sigma23 == zero );
784 
785  if ( !is_ident23 )
786  MAC_Apply_G_mx2_opc( m_A,
787  &gamma23,
788  &sigma23,
789  a2, rs_A,
790  a3, rs_A );
791  }
792  }
793 
794  // Pipeline stage
795 
796  for ( j = k_minus_1; j < nG; ++j )
797  {
798  nG_app = k_G;
799  n_iter = nG_app / n_fuse;
800  n_left = nG_app % n_fuse;
801 
802  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
803  {
804  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
805  g23 = buff_G + (g )*rs_G + (k )*cs_G;
806  a1 = buff_A + (g - 1)*cs_A;
807  a2 = buff_A + (g )*cs_A;
808  a3 = buff_A + (g + 1)*cs_A;
809 
810  gamma12 = g12->real;
811  sigma12 = g12->imag;
812  gamma23 = g23->real;
813  sigma23 = g23->imag;
814 
815  is_ident12 = ( gamma12 == one && sigma12 == zero );
816  is_ident23 = ( gamma23 == one && sigma23 == zero );
817 
818  if ( !is_ident12 && is_ident23 )
819  {
820  // Apply only to columns 1 and 2.
821 
822  MAC_Apply_G_mx2_opc( m_A,
823  &gamma12,
824  &sigma12,
825  a1, rs_A,
826  a2, rs_A );
827  }
828  else if ( is_ident12 && !is_ident23 )
829  {
830  // Apply only to columns 2 and 3.
831 
832  MAC_Apply_G_mx2_opc( m_A,
833  &gamma23,
834  &sigma23,
835  a2, rs_A,
836  a3, rs_A );
837  }
838  else if ( !is_ident12 && !is_ident23 )
839  {
840  // Apply to all three columns.
841 
842  MAC_Apply_G_mx3b_opc( m_A,
843  &gamma12,
844  &sigma12,
845  &gamma23,
846  &sigma23,
847  a1, rs_A,
848  a2, rs_A,
849  a3, rs_A );
850  }
851  }
852  //for ( k = 0; k < n_left; k += 1, g -= 1 )
853  if ( n_left == 1 )
854  {
855  g23 = buff_G + (g )*rs_G + (k )*cs_G;
856  a2 = buff_A + (g )*cs_A;
857  a3 = buff_A + (g + 1)*cs_A;
858 
859  gamma23 = g23->real;
860  sigma23 = g23->imag;
861 
862  is_ident23 = ( gamma23 == one && sigma23 == zero );
863 
864  if ( !is_ident23 )
865  MAC_Apply_G_mx2_opc( m_A,
866  &gamma23,
867  &sigma23,
868  a2, rs_A,
869  a3, rs_A );
870  }
871  }
872 
873  // Shutdown stage
874 
875  for ( j = 1; j < k_G; ++j )
876  {
877  nG_app = k_G - j;
878  n_iter = nG_app / n_fuse;
879  n_left = nG_app % n_fuse;
880 
881  for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
882  {
883  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
884  g23 = buff_G + (g )*rs_G + (k )*cs_G;
885  a1 = buff_A + (g - 1)*cs_A;
886  a2 = buff_A + (g )*cs_A;
887  a3 = buff_A + (g + 1)*cs_A;
888 
889  gamma12 = g12->real;
890  sigma12 = g12->imag;
891  gamma23 = g23->real;
892  sigma23 = g23->imag;
893 
894  is_ident12 = ( gamma12 == one && sigma12 == zero );
895  is_ident23 = ( gamma23 == one && sigma23 == zero );
896 
897  if ( !is_ident12 && is_ident23 )
898  {
899  // Apply only to columns 1 and 2.
900 
901  MAC_Apply_G_mx2_opc( m_A,
902  &gamma12,
903  &sigma12,
904  a1, rs_A,
905  a2, rs_A );
906  }
907  else if ( is_ident12 && !is_ident23 )
908  {
909  // Apply only to columns 2 and 3.
910 
911  MAC_Apply_G_mx2_opc( m_A,
912  &gamma23,
913  &sigma23,
914  a2, rs_A,
915  a3, rs_A );
916  }
917  else if ( !is_ident12 && !is_ident23 )
918  {
919  // Apply to all three columns.
920 
921  MAC_Apply_G_mx3b_opc( m_A,
922  &gamma12,
923  &sigma12,
924  &gamma23,
925  &sigma23,
926  a1, rs_A,
927  a2, rs_A,
928  a3, rs_A );
929  }
930  }
931  //for ( k = 0; k < nG_app_left; k += 1, g -= 1 )
932  if ( n_left == 1 )
933  {
934  g23 = buff_G + (g )*rs_G + (k )*cs_G;
935  a2 = buff_A + (g )*cs_A;
936  a3 = buff_A + (g + 1)*cs_A;
937 
938  gamma23 = g23->real;
939  sigma23 = g23->imag;
940 
941  is_ident23 = ( gamma23 == one && sigma23 == zero );
942 
943  if ( !is_ident23 )
944  MAC_Apply_G_mx2_opc( m_A,
945  &gamma23,
946  &sigma23,
947  a2, rs_A,
948  a3, rs_A );
949  }
950  }
951 
952  return FLA_SUCCESS;
953 }

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_opc_var1(), i, scomplex::imag, n_left, and scomplex::real.

Referenced by FLA_Apply_G_rf_opt_var6().

◆ FLA_Apply_G_rf_opc_var7()

FLA_Error FLA_Apply_G_rf_opc_var7 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_opc_var8()

FLA_Error FLA_Apply_G_rf_opc_var8 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_opc_var9()

FLA_Error FLA_Apply_G_rf_opc_var9 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
scomplex buff_A,
int  rs_A,
int  cs_A 
)
652 {
653  float one = bl1_s1();
654  float zero = bl1_s0();
655  float gamma12;
656  float sigma12;
657  float gamma23;
658  float sigma23;
659  scomplex* a1;
660  scomplex* a2;
661  scomplex* a3;
662  scomplex* g12;
663  scomplex* g23;
664  int i, j, g, k;
665  int nG, nG_app;
666  int n_iter;
667  int n_left;
668  int k_minus_1;
669  int n_fuse;
670  int is_ident12, is_ident23;
671 
672  k_minus_1 = k_G - 1;
673  nG = n_A - 1;
674  n_fuse = 2;
675 
676  // Use the simple variant for nG < (k - 1) or k == 1.
677  if ( nG < 2*k_minus_1 || k_G == 1 )
678  {
680  m_A,
681  n_A,
682  buff_G, rs_G, cs_G,
683  buff_A, rs_A, cs_A );
684  return FLA_SUCCESS;
685  }
686 
687 
688  // Start-up phase.
689 
690  for ( j = -1; j < k_minus_1; j += n_fuse )
691  {
692  nG_app = j + 1;
693  n_iter = nG_app;
694  n_left = 1;
695 
696  for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
697  {
698  g12 = buff_G + (g )*rs_G + (k )*cs_G;
699  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
700  a1 = buff_A + (g )*cs_A;
701  a2 = buff_A + (g + 1)*cs_A;
702  a3 = buff_A + (g + 2)*cs_A;
703 
704  gamma12 = g12->real;
705  sigma12 = g12->imag;
706  gamma23 = g23->real;
707  sigma23 = g23->imag;
708 
709  is_ident12 = ( gamma12 == one && sigma12 == zero );
710  is_ident23 = ( gamma23 == one && sigma23 == zero );
711 
712  if ( !is_ident12 && is_ident23 )
713  {
714  // Apply only to columns 1 and 2.
715 
716  MAC_Apply_G_mx2_opc( m_A,
717  &gamma12,
718  &sigma12,
719  a1, rs_A,
720  a2, rs_A );
721  }
722  else if ( is_ident12 && !is_ident23 )
723  {
724  // Apply only to columns 2 and 3.
725 
726  MAC_Apply_G_mx2_opc( m_A,
727  &gamma23,
728  &sigma23,
729  a2, rs_A,
730  a3, rs_A );
731  }
732  else if ( !is_ident12 && !is_ident23 )
733  {
734  // Apply to all three columns.
735 
736  MAC_Apply_G_mx3_opc( m_A,
737  &gamma12,
738  &sigma12,
739  &gamma23,
740  &sigma23,
741  a1, rs_A,
742  a2, rs_A,
743  a3, rs_A );
744  }
745  }
746 
747  if ( n_left == 1 )
748  {
749  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
750  a2 = buff_A + (g + 1)*cs_A;
751  a3 = buff_A + (g + 2)*cs_A;
752 
753  gamma23 = g23->real;
754  sigma23 = g23->imag;
755 
756  is_ident23 = ( gamma23 == one && sigma23 == zero );
757 
758  if ( !is_ident23 )
759  MAC_Apply_G_mx2_opc( m_A,
760  &gamma23,
761  &sigma23,
762  a2, rs_A,
763  a3, rs_A );
764  }
765  }
766 
767  // Pipeline stage
768 
769  for ( ; j < nG - 1; j += n_fuse )
770  {
771  nG_app = k_G;
772  n_iter = nG_app;
773  n_left = 0;
774 
775  for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
776  {
777  g12 = buff_G + (g )*rs_G + (k )*cs_G;
778  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
779  a1 = buff_A + (g )*cs_A;
780  a2 = buff_A + (g + 1)*cs_A;
781  a3 = buff_A + (g + 2)*cs_A;
782 
783  gamma12 = g12->real;
784  sigma12 = g12->imag;
785  gamma23 = g23->real;
786  sigma23 = g23->imag;
787 
788  is_ident12 = ( gamma12 == one && sigma12 == zero );
789  is_ident23 = ( gamma23 == one && sigma23 == zero );
790 
791  if ( !is_ident12 && is_ident23 )
792  {
793  // Apply only to columns 1 and 2.
794 
795  MAC_Apply_G_mx2_opc( m_A,
796  &gamma12,
797  &sigma12,
798  a1, rs_A,
799  a2, rs_A );
800  }
801  else if ( is_ident12 && !is_ident23 )
802  {
803  // Apply only to columns 2 and 3.
804 
805  MAC_Apply_G_mx2_opc( m_A,
806  &gamma23,
807  &sigma23,
808  a2, rs_A,
809  a3, rs_A );
810  }
811  else if ( !is_ident12 && !is_ident23 )
812  {
813  // Apply to all three columns.
814 
815  MAC_Apply_G_mx3_opc( m_A,
816  &gamma12,
817  &sigma12,
818  &gamma23,
819  &sigma23,
820  a1, rs_A,
821  a2, rs_A,
822  a3, rs_A );
823  }
824  }
825  }
826 
827  // Shutdown stage
828 
829  for ( j = nG % n_fuse; j < k_G; j += n_fuse )
830  {
831  g = nG - 1;
832  k = j;
833 
834  n_left = 1;
835  if ( n_left == 1 )
836  {
837  g12 = buff_G + (g )*rs_G + (k )*cs_G;
838  a1 = buff_A + (g )*cs_A;
839  a2 = buff_A + (g + 1)*cs_A;
840 
841  gamma12 = g12->real;
842  sigma12 = g12->imag;
843 
844  is_ident12 = ( gamma12 == one && sigma12 == zero );
845 
846  if ( !is_ident12 )
847  MAC_Apply_G_mx2_opc( m_A,
848  &gamma12,
849  &sigma12,
850  a1, rs_A,
851  a2, rs_A );
852  ++k;
853  --g;
854  }
855 
856  nG_app = k_minus_1 - j;
857  n_iter = nG_app;
858 
859  for ( i = 0; i < n_iter; ++i, ++k, --g )
860  {
861  g12 = buff_G + (g )*rs_G + (k )*cs_G;
862  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
863  a1 = buff_A + (g )*cs_A;
864  a2 = buff_A + (g + 1)*cs_A;
865  a3 = buff_A + (g + 2)*cs_A;
866 
867  gamma12 = g12->real;
868  sigma12 = g12->imag;
869  gamma23 = g23->real;
870  sigma23 = g23->imag;
871 
872  is_ident12 = ( gamma12 == one && sigma12 == zero );
873  is_ident23 = ( gamma23 == one && sigma23 == zero );
874 
875  if ( !is_ident12 && is_ident23 )
876  {
877  // Apply only to columns 1 and 2.
878 
879  MAC_Apply_G_mx2_opc( m_A,
880  &gamma12,
881  &sigma12,
882  a1, rs_A,
883  a2, rs_A );
884  }
885  else if ( is_ident12 && !is_ident23 )
886  {
887  // Apply only to columns 2 and 3.
888 
889  MAC_Apply_G_mx2_opc( m_A,
890  &gamma23,
891  &sigma23,
892  a2, rs_A,
893  a3, rs_A );
894  }
895  else if ( !is_ident12 && !is_ident23 )
896  {
897  // Apply to all three columns.
898 
899  MAC_Apply_G_mx3_opc( m_A,
900  &gamma12,
901  &sigma12,
902  &gamma23,
903  &sigma23,
904  a1, rs_A,
905  a2, rs_A,
906  a3, rs_A );
907  }
908  }
909  }
910 
911  return FLA_SUCCESS;
912 }

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_opc_var1(), i, scomplex::imag, n_left, and scomplex::real.

Referenced by FLA_Apply_G_rf_opt_var9().

◆ FLA_Apply_G_rf_opd_var1()

FLA_Error FLA_Apply_G_rf_opd_var1 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A 
)
169 {
170  double one = bl1_d1();
171  double zero = bl1_d0();
172  int nG_app = n_A - 1;
173  int l, j;
174  double gamma;
175  double sigma;
176  double* a1;
177  double* a2;
178  dcomplex* g1;
179  dcomplex* g11;
180 
181  g1 = buff_G;
182 
183  for ( l = 0; l < k_G; ++l )
184  {
185  a1 = buff_A;
186  a2 = buff_A + cs_A;
187  g11 = g1;
188 
189  for ( j = 0; j < nG_app; ++j )
190  {
191  gamma = g11->real;
192  sigma = g11->imag;
193 
194  // Skip the current iteration if the rotation is identity.
195  if ( gamma != one || sigma != zero )
196  {
197  MAC_Apply_G_mx2_opd( m_A,
198  &gamma,
199  &sigma,
200  a1, rs_A,
201  a2, rs_A );
202  }
203 
204  a1 += cs_A;
205  a2 += cs_A;
206  g11 += rs_G;
207  }
208 
209  g1 += cs_G;
210  }
211 
212  return FLA_SUCCESS;
213 }

References bl1_d0(), bl1_d1(), dcomplex::imag, and dcomplex::real.

Referenced by FLA_Apply_G_lf_opt_var1(), FLA_Apply_G_rf_opd_var2(), FLA_Apply_G_rf_opd_var3(), FLA_Apply_G_rf_opd_var6(), FLA_Apply_G_rf_opd_var9(), and FLA_Apply_G_rf_opt_var1().

◆ FLA_Apply_G_rf_opd_var2()

FLA_Error FLA_Apply_G_rf_opd_var2 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A 
)
230 {
231  double one = bl1_d1();
232  double zero = bl1_d0();
233  double gamma;
234  double sigma;
235  double* a1;
236  double* a2;
237  dcomplex* g11;
238  int j, g, k;
239  int nG, nG_app;
240  int k_minus_1;
241 
242  k_minus_1 = k_G - 1;
243  nG = n_A - 1;
244 
245  // Use the simple variant for nG < 2(k - 1).
246  if ( nG < k_minus_1 || k_G == 1 )
247  {
249  m_A,
250  n_A,
251  buff_G, rs_G, cs_G,
252  buff_A, rs_A, cs_A );
253  return FLA_SUCCESS;
254  }
255 
256 
257  // Start-up phase.
258 
259  for ( j = 0; j < k_minus_1; ++j )
260  {
261  nG_app = j + 1;
262 
263  for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g )
264  {
265  g11 = buff_G + (g )*rs_G + (k )*cs_G;
266  a1 = buff_A + (g )*cs_A;
267  a2 = buff_A + (g + 1)*cs_A;
268 
269  gamma = g11->real;
270  sigma = g11->imag;
271 
272  // Skip the current iteration if the rotation is identity.
273  if ( gamma == one && sigma == zero ) continue;
274 
275  MAC_Apply_G_mx2_opd( m_A,
276  &gamma,
277  &sigma,
278  a1, rs_A,
279  a2, rs_A );
280  }
281  }
282 
283  // Pipeline stage
284 
285  for ( j = k_minus_1; j < nG; ++j )
286  {
287  nG_app = k_G;
288 
289  for ( k = 0, g = j; k < nG_app; ++k, --g )
290  {
291  g11 = buff_G + (g )*rs_G + (k )*cs_G;
292  a1 = buff_A + (g )*cs_A;
293  a2 = buff_A + (g + 1)*cs_A;
294 
295  gamma = g11->real;
296  sigma = g11->imag;
297 
298  // Skip the current iteration if the rotation is identity.
299  if ( gamma == one && sigma == zero ) continue;
300 
301  MAC_Apply_G_mx2_opd( m_A,
302  &gamma,
303  &sigma,
304  a1, rs_A,
305  a2, rs_A );
306  }
307  }
308 
309  // Shutdown stage
310 
311  for ( j = nG - k_minus_1; j < nG; ++j )
312  {
313  nG_app = nG - j;
314 
315  for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g )
316  {
317  g11 = buff_G + (g )*rs_G + (k )*cs_G;
318  a1 = buff_A + (g )*cs_A;
319  a2 = buff_A + (g + 1)*cs_A;
320 
321  gamma = g11->real;
322  sigma = g11->imag;
323 
324  // Skip the current iteration if the rotation is identity.
325  if ( gamma == one && sigma == zero ) continue;
326 
327  MAC_Apply_G_mx2_opd( m_A,
328  &gamma,
329  &sigma,
330  a1, rs_A,
331  a2, rs_A );
332  }
333  }
334 
335  return FLA_SUCCESS;
336 }
FLA_Error FLA_Apply_G_rf_opd_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var1.c:164

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_opd_var1(), dcomplex::imag, and dcomplex::real.

Referenced by FLA_Apply_G_rf_opt_var2().

◆ FLA_Apply_G_rf_opd_var3()

FLA_Error FLA_Apply_G_rf_opd_var3 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A 
)
570 {
571  double one = bl1_d1();
572  double zero = bl1_d0();
573  double gamma23_k1;
574  double sigma23_k1;
575  double gamma34_k1;
576  double sigma34_k1;
577  double gamma12_k2;
578  double sigma12_k2;
579  double gamma23_k2;
580  double sigma23_k2;
581  double* a1;
582  double* a2;
583  double* a3;
584  double* a4;
585  dcomplex* g23_k1;
586  dcomplex* g34_k1;
587  dcomplex* g12_k2;
588  dcomplex* g23_k2;
589  int i, j, g, k;
590  int nG, nG_app;
591  int n_iter;
592  int n_left;
593  int k_minus_1;
594  int n_fuse;
595  int k_fuse;
596  int is_ident23_k1, is_ident34_k1;
597  int is_ident12_k2, is_ident23_k2;
598  int has_ident;
599 
600  k_minus_1 = k_G - 1;
601  nG = n_A - 1;
602  n_fuse = 2;
603  k_fuse = 2;
604 
605  // Use the simple variant for nG < (k - 1) or k == 1.
606  if ( nG < 2*k_minus_1 || k_G == 1 )
607  {
609  m_A,
610  n_A,
611  buff_G, rs_G, cs_G,
612  buff_A, rs_A, cs_A );
613  return FLA_SUCCESS;
614  }
615 
616 
617  // Start-up phase.
618 
619  for ( j = -1; j < k_minus_1; j += n_fuse )
620  {
621  nG_app = j + 2;
622  n_iter = nG_app / k_fuse;
623  n_left = 1;
624 
625  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
626  {
627  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
628  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
629  g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
630  g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
631  a1 = buff_A + (g - 1)*cs_A;
632  a2 = buff_A + (g )*cs_A;
633  a3 = buff_A + (g + 1)*cs_A;
634  a4 = buff_A + (g + 2)*cs_A;
635 
636  gamma23_k1 = g23_k1->real;
637  sigma23_k1 = g23_k1->imag;
638  gamma34_k1 = g34_k1->real;
639  sigma34_k1 = g34_k1->imag;
640  gamma12_k2 = g12_k2->real;
641  sigma12_k2 = g12_k2->imag;
642  gamma23_k2 = g23_k2->real;
643  sigma23_k2 = g23_k2->imag;
644 
645  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
646  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
647  is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
648  is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
649  has_ident = ( is_ident23_k1 || is_ident34_k1 ||
650  is_ident12_k2 || is_ident23_k2 );
651 
652  if ( has_ident )
653  {
654  // Apply to pairs of columns as needed.
655 
656  if ( !is_ident23_k1 )
657  MAC_Apply_G_mx2_opd( m_A,
658  &gamma23_k1,
659  &sigma23_k1,
660  a2, rs_A,
661  a3, rs_A );
662 
663  if ( !is_ident34_k1 )
664  MAC_Apply_G_mx2_opd( m_A,
665  &gamma34_k1,
666  &sigma34_k1,
667  a3, rs_A,
668  a4, rs_A );
669 
670  if ( !is_ident12_k2 )
671  MAC_Apply_G_mx2_opd( m_A,
672  &gamma12_k2,
673  &sigma12_k2,
674  a1, rs_A,
675  a2, rs_A );
676 
677  if ( !is_ident23_k2 )
678  MAC_Apply_G_mx2_opd( m_A,
679  &gamma23_k2,
680  &sigma23_k2,
681  a2, rs_A,
682  a3, rs_A );
683  }
684  else
685  {
686  // Apply to all four columns.
687 
688  MAC_Apply_G_mx4s_opd( m_A,
689  &gamma23_k1,
690  &sigma23_k1,
691  &gamma34_k1,
692  &sigma34_k1,
693  &gamma12_k2,
694  &sigma12_k2,
695  &gamma23_k2,
696  &sigma23_k2,
697  a1, rs_A,
698  a2, rs_A,
699  a3, rs_A,
700  a4, rs_A );
701  }
702  }
703 
704  if ( n_left == 1 )
705  {
706  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
707  a3 = buff_A + (g + 1)*cs_A;
708  a4 = buff_A + (g + 2)*cs_A;
709 
710  gamma34_k1 = g34_k1->real;
711  sigma34_k1 = g34_k1->imag;
712 
713  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
714 
715  if ( !is_ident34_k1 )
716  MAC_Apply_G_mx2_opd( m_A,
717  &gamma34_k1,
718  &sigma34_k1,
719  a3, rs_A,
720  a4, rs_A );
721  }
722  }
723 
724  // Pipeline stage
725 
726  for ( ; j < nG - 1; j += n_fuse )
727  {
728  nG_app = k_G;
729  n_iter = nG_app / k_fuse;
730  n_left = nG_app % k_fuse;
731 
732  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
733  {
734  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
735  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
736  g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
737  g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
738  a1 = buff_A + (g - 1)*cs_A;
739  a2 = buff_A + (g )*cs_A;
740  a3 = buff_A + (g + 1)*cs_A;
741  a4 = buff_A + (g + 2)*cs_A;
742 
743  gamma23_k1 = g23_k1->real;
744  sigma23_k1 = g23_k1->imag;
745  gamma34_k1 = g34_k1->real;
746  sigma34_k1 = g34_k1->imag;
747  gamma12_k2 = g12_k2->real;
748  sigma12_k2 = g12_k2->imag;
749  gamma23_k2 = g23_k2->real;
750  sigma23_k2 = g23_k2->imag;
751 
752  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
753  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
754  is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
755  is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
756  has_ident = ( is_ident23_k1 || is_ident34_k1 ||
757  is_ident12_k2 || is_ident23_k2 );
758 
759  if ( has_ident )
760  {
761  // Apply to pairs of columns as needed.
762 
763  if ( !is_ident23_k1 )
764  MAC_Apply_G_mx2_opd( m_A,
765  &gamma23_k1,
766  &sigma23_k1,
767  a2, rs_A,
768  a3, rs_A );
769 
770  if ( !is_ident34_k1 )
771  MAC_Apply_G_mx2_opd( m_A,
772  &gamma34_k1,
773  &sigma34_k1,
774  a3, rs_A,
775  a4, rs_A );
776 
777  if ( !is_ident12_k2 )
778  MAC_Apply_G_mx2_opd( m_A,
779  &gamma12_k2,
780  &sigma12_k2,
781  a1, rs_A,
782  a2, rs_A );
783 
784  if ( !is_ident23_k2 )
785  MAC_Apply_G_mx2_opd( m_A,
786  &gamma23_k2,
787  &sigma23_k2,
788  a2, rs_A,
789  a3, rs_A );
790  }
791  else
792  {
793  // Apply to all four columns.
794 
795  MAC_Apply_G_mx4s_opd( m_A,
796  &gamma23_k1,
797  &sigma23_k1,
798  &gamma34_k1,
799  &sigma34_k1,
800  &gamma12_k2,
801  &sigma12_k2,
802  &gamma23_k2,
803  &sigma23_k2,
804  a1, rs_A,
805  a2, rs_A,
806  a3, rs_A,
807  a4, rs_A );
808  }
809  }
810 
811  if ( n_left == 1 )
812  {
813  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
814  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
815  a2 = buff_A + (g )*cs_A;
816  a3 = buff_A + (g + 1)*cs_A;
817  a4 = buff_A + (g + 2)*cs_A;
818 
819  gamma23_k1 = g23_k1->real;
820  sigma23_k1 = g23_k1->imag;
821  gamma34_k1 = g34_k1->real;
822  sigma34_k1 = g34_k1->imag;
823 
824  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
825  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
826 
827  if ( !is_ident23_k1 && is_ident34_k1 )
828  {
829  MAC_Apply_G_mx2_opd( m_A,
830  &gamma23_k1,
831  &sigma23_k1,
832  a2, rs_A,
833  a3, rs_A );
834  }
835  else if ( is_ident23_k1 && !is_ident34_k1 )
836  {
837  MAC_Apply_G_mx2_opd( m_A,
838  &gamma34_k1,
839  &sigma34_k1,
840  a3, rs_A,
841  a4, rs_A );
842  }
843  else
844  {
845  MAC_Apply_G_mx3_opd( m_A,
846  &gamma23_k1,
847  &sigma23_k1,
848  &gamma34_k1,
849  &sigma34_k1,
850  a2, rs_A,
851  a3, rs_A,
852  a4, rs_A );
853  }
854  }
855  }
856 
857  // Shutdown stage
858 
859  for ( j = nG % n_fuse; j < k_G; j += n_fuse )
860  {
861  g = nG - 1;
862  k = j;
863 
864  //n_left = 1;
865  //if ( n_left == 1 )
866  {
867  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
868  a2 = buff_A + (g )*cs_A;
869  a3 = buff_A + (g + 1)*cs_A;
870 
871  gamma23_k1 = g23_k1->real;
872  sigma23_k1 = g23_k1->imag;
873 
874  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
875 
876  if ( !is_ident23_k1 )
877  MAC_Apply_G_mx2_opd( m_A,
878  &gamma23_k1,
879  &sigma23_k1,
880  a2, rs_A,
881  a3, rs_A );
882  ++k;
883  --g;
884  }
885 
886  nG_app = k_minus_1 - j;
887  n_iter = nG_app / k_fuse;
888  n_left = nG_app % k_fuse;
889 
890  for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
891  {
892  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
893  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
894  g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
895  g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
896  a1 = buff_A + (g - 1)*cs_A;
897  a2 = buff_A + (g )*cs_A;
898  a3 = buff_A + (g + 1)*cs_A;
899  a4 = buff_A + (g + 2)*cs_A;
900 
901  gamma23_k1 = g23_k1->real;
902  sigma23_k1 = g23_k1->imag;
903  gamma34_k1 = g34_k1->real;
904  sigma34_k1 = g34_k1->imag;
905  gamma12_k2 = g12_k2->real;
906  sigma12_k2 = g12_k2->imag;
907  gamma23_k2 = g23_k2->real;
908  sigma23_k2 = g23_k2->imag;
909 
910  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
911  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
912  is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
913  is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
914  has_ident = ( is_ident23_k1 || is_ident34_k1 ||
915  is_ident12_k2 || is_ident23_k2 );
916 
917  if ( has_ident )
918  {
919  // Apply to pairs of columns as needed.
920 
921  if ( !is_ident23_k1 )
922  MAC_Apply_G_mx2_opd( m_A,
923  &gamma23_k1,
924  &sigma23_k1,
925  a2, rs_A,
926  a3, rs_A );
927 
928  if ( !is_ident34_k1 )
929  MAC_Apply_G_mx2_opd( m_A,
930  &gamma34_k1,
931  &sigma34_k1,
932  a3, rs_A,
933  a4, rs_A );
934 
935  if ( !is_ident12_k2 )
936  MAC_Apply_G_mx2_opd( m_A,
937  &gamma12_k2,
938  &sigma12_k2,
939  a1, rs_A,
940  a2, rs_A );
941 
942  if ( !is_ident23_k2 )
943  MAC_Apply_G_mx2_opd( m_A,
944  &gamma23_k2,
945  &sigma23_k2,
946  a2, rs_A,
947  a3, rs_A );
948  }
949  else
950  {
951  // Apply to all four columns.
952 
953  MAC_Apply_G_mx4s_opd( m_A,
954  &gamma23_k1,
955  &sigma23_k1,
956  &gamma34_k1,
957  &sigma34_k1,
958  &gamma12_k2,
959  &sigma12_k2,
960  &gamma23_k2,
961  &sigma23_k2,
962  a1, rs_A,
963  a2, rs_A,
964  a3, rs_A,
965  a4, rs_A );
966  }
967  }
968 
969  if ( n_left == 1 )
970  {
971  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
972  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
973  a2 = buff_A + (g )*cs_A;
974  a3 = buff_A + (g + 1)*cs_A;
975  a4 = buff_A + (g + 2)*cs_A;
976 
977  gamma23_k1 = g23_k1->real;
978  sigma23_k1 = g23_k1->imag;
979  gamma34_k1 = g34_k1->real;
980  sigma34_k1 = g34_k1->imag;
981 
982  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
983  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
984 
985  if ( !is_ident23_k1 && is_ident34_k1 )
986  {
987  MAC_Apply_G_mx2_opd( m_A,
988  &gamma23_k1,
989  &sigma23_k1,
990  a2, rs_A,
991  a3, rs_A );
992  }
993  else if ( is_ident23_k1 && !is_ident34_k1 )
994  {
995  MAC_Apply_G_mx2_opd( m_A,
996  &gamma34_k1,
997  &sigma34_k1,
998  a3, rs_A,
999  a4, rs_A );
1000  }
1001  else
1002  {
1003  MAC_Apply_G_mx3_opd( m_A,
1004  &gamma23_k1,
1005  &sigma23_k1,
1006  &gamma34_k1,
1007  &sigma34_k1,
1008  a2, rs_A,
1009  a3, rs_A,
1010  a4, rs_A );
1011  }
1012  }
1013  }
1014 
1015  return FLA_SUCCESS;
1016 }

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_opd_var1(), i, dcomplex::imag, n_left, and dcomplex::real.

Referenced by FLA_Apply_G_rf_bld_var3(), and FLA_Apply_G_rf_opt_var3().

◆ FLA_Apply_G_rf_opd_var4()

FLA_Error FLA_Apply_G_rf_opd_var4 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_opd_var5()

FLA_Error FLA_Apply_G_rf_opd_var5 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_opd_var6()

FLA_Error FLA_Apply_G_rf_opd_var6 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A 
)
398 {
399  double one = bl1_d1();
400  double zero = bl1_d0();
401  double gamma12;
402  double sigma12;
403  double gamma23;
404  double sigma23;
405  double* a1;
406  double* a2;
407  double* a3;
408  dcomplex* g12;
409  dcomplex* g23;
410  int i, j, g, k;
411  int nG, nG_app;
412  int n_iter;
413  int n_left;
414  int k_minus_1;
415  int n_fuse;
416  int is_ident12, is_ident23;
417 
418  k_minus_1 = k_G - 1;
419  nG = n_A - 1;
420  n_fuse = 2;
421 
422  // Use the simple variant for nG < (k - 1) or k == 1.
423  if ( nG < k_minus_1 || k_G == 1 )
424  {
426  m_A,
427  n_A,
428  buff_G, rs_G, cs_G,
429  buff_A, rs_A, cs_A );
430  return FLA_SUCCESS;
431  }
432 
433 
434  // Start-up phase.
435 
436  for ( j = 0; j < k_minus_1; ++j )
437  {
438  nG_app = j + 1;
439  n_iter = nG_app / n_fuse;
440  n_left = nG_app % n_fuse;
441 
442  for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
443  {
444  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
445  g23 = buff_G + (g )*rs_G + (k )*cs_G;
446  a1 = buff_A + (g - 1)*cs_A;
447  a2 = buff_A + (g )*cs_A;
448  a3 = buff_A + (g + 1)*cs_A;
449 
450  gamma12 = g12->real;
451  sigma12 = g12->imag;
452  gamma23 = g23->real;
453  sigma23 = g23->imag;
454 
455  is_ident12 = ( gamma12 == one && sigma12 == zero );
456  is_ident23 = ( gamma23 == one && sigma23 == zero );
457 
458  if ( !is_ident12 && is_ident23 )
459  {
460  // Apply only to columns 1 and 2.
461 
462  MAC_Apply_G_mx2_opd( m_A,
463  &gamma12,
464  &sigma12,
465  a1, rs_A,
466  a2, rs_A );
467  }
468  else if ( is_ident12 && !is_ident23 )
469  {
470  // Apply only to columns 2 and 3.
471 
472  MAC_Apply_G_mx2_opd( m_A,
473  &gamma23,
474  &sigma23,
475  a2, rs_A,
476  a3, rs_A );
477  }
478  else if ( !is_ident12 && !is_ident23 )
479  {
480  // Apply to all three columns.
481 
482  MAC_Apply_G_mx3b_opd( m_A,
483  &gamma12,
484  &sigma12,
485  &gamma23,
486  &sigma23,
487  a1, rs_A,
488  a2, rs_A,
489  a3, rs_A );
490  }
491  }
492  //for ( k = 0; k < n_left; k += 1, g -= 1 )
493  if ( n_left == 1 )
494  {
495  g23 = buff_G + (g )*rs_G + (k )*cs_G;
496  a2 = buff_A + (g )*cs_A;
497  a3 = buff_A + (g + 1)*cs_A;
498 
499  gamma23 = g23->real;
500  sigma23 = g23->imag;
501 
502  is_ident23 = ( gamma23 == one && sigma23 == zero );
503 
504  if ( !is_ident23 )
505  MAC_Apply_G_mx2_opd( m_A,
506  &gamma23,
507  &sigma23,
508  a2, rs_A,
509  a3, rs_A );
510  }
511  }
512 
513  // Pipeline stage
514 
515  for ( j = k_minus_1; j < nG; ++j )
516  {
517  nG_app = k_G;
518  n_iter = nG_app / n_fuse;
519  n_left = nG_app % n_fuse;
520 
521  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
522  {
523  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
524  g23 = buff_G + (g )*rs_G + (k )*cs_G;
525  a1 = buff_A + (g - 1)*cs_A;
526  a2 = buff_A + (g )*cs_A;
527  a3 = buff_A + (g + 1)*cs_A;
528 
529  gamma12 = g12->real;
530  sigma12 = g12->imag;
531  gamma23 = g23->real;
532  sigma23 = g23->imag;
533 
534  is_ident12 = ( gamma12 == one && sigma12 == zero );
535  is_ident23 = ( gamma23 == one && sigma23 == zero );
536 
537  if ( !is_ident12 && is_ident23 )
538  {
539  // Apply only to columns 1 and 2.
540 
541  MAC_Apply_G_mx2_opd( m_A,
542  &gamma12,
543  &sigma12,
544  a1, rs_A,
545  a2, rs_A );
546  }
547  else if ( is_ident12 && !is_ident23 )
548  {
549  // Apply only to columns 2 and 3.
550 
551  MAC_Apply_G_mx2_opd( m_A,
552  &gamma23,
553  &sigma23,
554  a2, rs_A,
555  a3, rs_A );
556  }
557  else if ( !is_ident12 && !is_ident23 )
558  {
559  // Apply to all three columns.
560 
561  MAC_Apply_G_mx3b_opd( m_A,
562  &gamma12,
563  &sigma12,
564  &gamma23,
565  &sigma23,
566  a1, rs_A,
567  a2, rs_A,
568  a3, rs_A );
569  }
570  }
571  //for ( k = 0; k < n_left; k += 1, g -= 1 )
572  if ( n_left == 1 )
573  {
574  g23 = buff_G + (g )*rs_G + (k )*cs_G;
575  a2 = buff_A + (g )*cs_A;
576  a3 = buff_A + (g + 1)*cs_A;
577 
578  gamma23 = g23->real;
579  sigma23 = g23->imag;
580 
581  is_ident23 = ( gamma23 == one && sigma23 == zero );
582 
583  if ( !is_ident23 )
584  MAC_Apply_G_mx2_opd( m_A,
585  &gamma23,
586  &sigma23,
587  a2, rs_A,
588  a3, rs_A );
589  }
590  }
591 
592  // Shutdown stage
593 
594  for ( j = 1; j < k_G; ++j )
595  {
596  nG_app = k_G - j;
597  n_iter = nG_app / n_fuse;
598  n_left = nG_app % n_fuse;
599 
600  for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
601  {
602  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
603  g23 = buff_G + (g )*rs_G + (k )*cs_G;
604  a1 = buff_A + (g - 1)*cs_A;
605  a2 = buff_A + (g )*cs_A;
606  a3 = buff_A + (g + 1)*cs_A;
607 
608  gamma12 = g12->real;
609  sigma12 = g12->imag;
610  gamma23 = g23->real;
611  sigma23 = g23->imag;
612 
613  is_ident12 = ( gamma12 == one && sigma12 == zero );
614  is_ident23 = ( gamma23 == one && sigma23 == zero );
615 
616  if ( !is_ident12 && is_ident23 )
617  {
618  // Apply only to columns 1 and 2.
619 
620  MAC_Apply_G_mx2_opd( m_A,
621  &gamma12,
622  &sigma12,
623  a1, rs_A,
624  a2, rs_A );
625  }
626  else if ( is_ident12 && !is_ident23 )
627  {
628  // Apply only to columns 2 and 3.
629 
630  MAC_Apply_G_mx2_opd( m_A,
631  &gamma23,
632  &sigma23,
633  a2, rs_A,
634  a3, rs_A );
635  }
636  else if ( !is_ident12 && !is_ident23 )
637  {
638  // Apply to all three columns.
639 
640  MAC_Apply_G_mx3b_opd( m_A,
641  &gamma12,
642  &sigma12,
643  &gamma23,
644  &sigma23,
645  a1, rs_A,
646  a2, rs_A,
647  a3, rs_A );
648  }
649  }
650  //for ( k = 0; k < nG_app_left; k += 1, g -= 1 )
651  if ( n_left == 1 )
652  {
653  g23 = buff_G + (g )*rs_G + (k )*cs_G;
654  a2 = buff_A + (g )*cs_A;
655  a3 = buff_A + (g + 1)*cs_A;
656 
657  gamma23 = g23->real;
658  sigma23 = g23->imag;
659 
660  is_ident23 = ( gamma23 == one && sigma23 == zero );
661 
662  if ( !is_ident23 )
663  MAC_Apply_G_mx2_opd( m_A,
664  &gamma23,
665  &sigma23,
666  a2, rs_A,
667  a3, rs_A );
668  }
669  }
670 
671  return FLA_SUCCESS;
672 }

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_opd_var1(), i, dcomplex::imag, n_left, and dcomplex::real.

Referenced by FLA_Apply_G_rf_opt_var6().

◆ FLA_Apply_G_rf_opd_var7()

FLA_Error FLA_Apply_G_rf_opd_var7 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_opd_var8()

FLA_Error FLA_Apply_G_rf_opd_var8 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_opd_var9()

FLA_Error FLA_Apply_G_rf_opd_var9 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
double *  buff_A,
int  rs_A,
int  cs_A 
)
385 {
386  double one = bl1_d1();
387  double zero = bl1_d0();
388  double gamma12;
389  double sigma12;
390  double gamma23;
391  double sigma23;
392  double* a1;
393  double* a2;
394  double* a3;
395  dcomplex* g12;
396  dcomplex* g23;
397  int i, j, g, k;
398  int nG, nG_app;
399  int n_iter;
400  int n_left;
401  int k_minus_1;
402  int n_fuse;
403  int is_ident12, is_ident23;
404 
405  k_minus_1 = k_G - 1;
406  nG = n_A - 1;
407  n_fuse = 2;
408 
409  // Use the simple variant for nG < (k - 1) or k == 1.
410  if ( nG < 2*k_minus_1 || k_G == 1 )
411  {
413  m_A,
414  n_A,
415  buff_G, rs_G, cs_G,
416  buff_A, rs_A, cs_A );
417  return FLA_SUCCESS;
418  }
419 
420 
421  // Start-up phase.
422 
423  for ( j = -1; j < k_minus_1; j += n_fuse )
424  {
425  nG_app = j + 1;
426  n_iter = nG_app;
427  n_left = 1;
428 
429  for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
430  {
431  g12 = buff_G + (g )*rs_G + (k )*cs_G;
432  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
433  a1 = buff_A + (g )*cs_A;
434  a2 = buff_A + (g + 1)*cs_A;
435  a3 = buff_A + (g + 2)*cs_A;
436 
437  gamma12 = g12->real;
438  sigma12 = g12->imag;
439  gamma23 = g23->real;
440  sigma23 = g23->imag;
441 
442  is_ident12 = ( gamma12 == one && sigma12 == zero );
443  is_ident23 = ( gamma23 == one && sigma23 == zero );
444 
445  if ( !is_ident12 && is_ident23 )
446  {
447  // Apply only to columns 1 and 2.
448 
449  MAC_Apply_G_mx2_opd( m_A,
450  &gamma12,
451  &sigma12,
452  a1, rs_A,
453  a2, rs_A );
454  }
455  else if ( is_ident12 && !is_ident23 )
456  {
457  // Apply only to columns 2 and 3.
458 
459  MAC_Apply_G_mx2_opd( m_A,
460  &gamma23,
461  &sigma23,
462  a2, rs_A,
463  a3, rs_A );
464  }
465  else if ( !is_ident12 && !is_ident23 )
466  {
467  // Apply to all three columns.
468 
469  MAC_Apply_G_mx3_opd( m_A,
470  &gamma12,
471  &sigma12,
472  &gamma23,
473  &sigma23,
474  a1, rs_A,
475  a2, rs_A,
476  a3, rs_A );
477  }
478  }
479 
480  if ( n_left == 1 )
481  {
482  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
483  a2 = buff_A + (g + 1)*cs_A;
484  a3 = buff_A + (g + 2)*cs_A;
485 
486  gamma23 = g23->real;
487  sigma23 = g23->imag;
488 
489  is_ident23 = ( gamma23 == one && sigma23 == zero );
490 
491  if ( !is_ident23 )
492  MAC_Apply_G_mx2_opd( m_A,
493  &gamma23,
494  &sigma23,
495  a2, rs_A,
496  a3, rs_A );
497  }
498  }
499 
500  // Pipeline stage
501 
502  for ( ; j < nG - 1; j += n_fuse )
503  {
504  nG_app = k_G;
505  n_iter = nG_app;
506  n_left = 0;
507 
508  for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
509  {
510  g12 = buff_G + (g )*rs_G + (k )*cs_G;
511  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
512  a1 = buff_A + (g )*cs_A;
513  a2 = buff_A + (g + 1)*cs_A;
514  a3 = buff_A + (g + 2)*cs_A;
515 
516  gamma12 = g12->real;
517  sigma12 = g12->imag;
518  gamma23 = g23->real;
519  sigma23 = g23->imag;
520 
521  is_ident12 = ( gamma12 == one && sigma12 == zero );
522  is_ident23 = ( gamma23 == one && sigma23 == zero );
523 
524  if ( !is_ident12 && is_ident23 )
525  {
526  // Apply only to columns 1 and 2.
527 
528  MAC_Apply_G_mx2_opd( m_A,
529  &gamma12,
530  &sigma12,
531  a1, rs_A,
532  a2, rs_A );
533  }
534  else if ( is_ident12 && !is_ident23 )
535  {
536  // Apply only to columns 2 and 3.
537 
538  MAC_Apply_G_mx2_opd( m_A,
539  &gamma23,
540  &sigma23,
541  a2, rs_A,
542  a3, rs_A );
543  }
544  else if ( !is_ident12 && !is_ident23 )
545  {
546  // Apply to all three columns.
547 
548  MAC_Apply_G_mx3_opd( m_A,
549  &gamma12,
550  &sigma12,
551  &gamma23,
552  &sigma23,
553  a1, rs_A,
554  a2, rs_A,
555  a3, rs_A );
556  }
557  }
558  }
559 
560  // Shutdown stage
561 
562  for ( j = nG % n_fuse; j < k_G; j += n_fuse )
563  {
564  g = nG - 1;
565  k = j;
566 
567  n_left = 1;
568  if ( n_left == 1 )
569  {
570  g12 = buff_G + (g )*rs_G + (k )*cs_G;
571  a1 = buff_A + (g )*cs_A;
572  a2 = buff_A + (g + 1)*cs_A;
573 
574  gamma12 = g12->real;
575  sigma12 = g12->imag;
576 
577  is_ident12 = ( gamma12 == one && sigma12 == zero );
578 
579  if ( !is_ident12 )
580  MAC_Apply_G_mx2_opd( m_A,
581  &gamma12,
582  &sigma12,
583  a1, rs_A,
584  a2, rs_A );
585  ++k;
586  --g;
587  }
588 
589  nG_app = k_minus_1 - j;
590  n_iter = nG_app;
591 
592  for ( i = 0; i < n_iter; ++i, ++k, --g )
593  {
594  g12 = buff_G + (g )*rs_G + (k )*cs_G;
595  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
596  a1 = buff_A + (g )*cs_A;
597  a2 = buff_A + (g + 1)*cs_A;
598  a3 = buff_A + (g + 2)*cs_A;
599 
600  gamma12 = g12->real;
601  sigma12 = g12->imag;
602  gamma23 = g23->real;
603  sigma23 = g23->imag;
604 
605  is_ident12 = ( gamma12 == one && sigma12 == zero );
606  is_ident23 = ( gamma23 == one && sigma23 == zero );
607 
608  if ( !is_ident12 && is_ident23 )
609  {
610  // Apply only to columns 1 and 2.
611 
612  MAC_Apply_G_mx2_opd( m_A,
613  &gamma12,
614  &sigma12,
615  a1, rs_A,
616  a2, rs_A );
617  }
618  else if ( is_ident12 && !is_ident23 )
619  {
620  // Apply only to columns 2 and 3.
621 
622  MAC_Apply_G_mx2_opd( m_A,
623  &gamma23,
624  &sigma23,
625  a2, rs_A,
626  a3, rs_A );
627  }
628  else if ( !is_ident12 && !is_ident23 )
629  {
630  // Apply to all three columns.
631 
632  MAC_Apply_G_mx3_opd( m_A,
633  &gamma12,
634  &sigma12,
635  &gamma23,
636  &sigma23,
637  a1, rs_A,
638  a2, rs_A,
639  a3, rs_A );
640  }
641  }
642  }
643 
644  return FLA_SUCCESS;
645 }

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_opd_var1(), i, dcomplex::imag, n_left, and dcomplex::real.

Referenced by FLA_Apply_G_rf_opt_var9().

◆ FLA_Apply_G_rf_ops_var1()

FLA_Error FLA_Apply_G_rf_ops_var1 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A 
)
118 {
119  float one = bl1_s1();
120  float zero = bl1_s0();
121  int nG_app = n_A - 1;
122  int l, j;
123  float gamma;
124  float sigma;
125  float* a1;
126  float* a2;
127  scomplex* g1;
128  scomplex* g11;
129 
130  g1 = buff_G;
131 
132  for ( l = 0; l < k_G; ++l )
133  {
134  a1 = buff_A;
135  a2 = buff_A + cs_A;
136  g11 = g1;
137 
138  for ( j = 0; j < nG_app; ++j )
139  {
140  gamma = g11->real;
141  sigma = g11->imag;
142 
143  // Skip the current iteration if the rotation is identity.
144  if ( gamma != one || sigma != zero )
145  {
146  MAC_Apply_G_mx2_ops( m_A,
147  &gamma,
148  &sigma,
149  a1, rs_A,
150  a2, rs_A );
151  }
152 
153  a1 += cs_A;
154  a2 += cs_A;
155  g11 += rs_G;
156  }
157 
158  g1 += cs_G;
159  }
160 
161  return FLA_SUCCESS;
162 }

References bl1_s0(), bl1_s1(), scomplex::imag, and scomplex::real.

Referenced by FLA_Apply_G_lf_opt_var1(), FLA_Apply_G_rf_ops_var2(), FLA_Apply_G_rf_ops_var3(), FLA_Apply_G_rf_ops_var6(), FLA_Apply_G_rf_ops_var9(), and FLA_Apply_G_rf_opt_var1().

◆ FLA_Apply_G_rf_ops_var2()

FLA_Error FLA_Apply_G_rf_ops_var2 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A 
)
117 {
118  float one = bl1_s1();
119  float zero = bl1_s0();
120  float gamma;
121  float sigma;
122  float* a1;
123  float* a2;
124  scomplex* g11;
125  int j, g, k;
126  int nG, nG_app;
127  int k_minus_1;
128 
129  k_minus_1 = k_G - 1;
130  nG = n_A - 1;
131 
132  // Use the simple variant for nG < 2(k - 1).
133  if ( nG < k_minus_1 || k_G == 1 )
134  {
136  m_A,
137  n_A,
138  buff_G, rs_G, cs_G,
139  buff_A, rs_A, cs_A );
140  return FLA_SUCCESS;
141  }
142 
143 
144  // Start-up phase.
145 
146  for ( j = 0; j < k_minus_1; ++j )
147  {
148  nG_app = j + 1;
149 
150  for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g )
151  {
152  g11 = buff_G + (g )*rs_G + (k )*cs_G;
153  a1 = buff_A + (g )*cs_A;
154  a2 = buff_A + (g + 1)*cs_A;
155 
156  gamma = g11->real;
157  sigma = g11->imag;
158 
159  // Skip the current iteration if the rotation is identity.
160  if ( gamma == one && sigma == zero ) continue;
161 
162  MAC_Apply_G_mx2_ops( m_A,
163  &gamma,
164  &sigma,
165  a1, rs_A,
166  a2, rs_A );
167  }
168  }
169 
170  // Pipeline stage
171 
172  for ( j = k_minus_1; j < nG; ++j )
173  {
174  nG_app = k_G;
175 
176  for ( k = 0, g = j; k < nG_app; ++k, --g )
177  {
178  g11 = buff_G + (g )*rs_G + (k )*cs_G;
179  a1 = buff_A + (g )*cs_A;
180  a2 = buff_A + (g + 1)*cs_A;
181 
182  gamma = g11->real;
183  sigma = g11->imag;
184 
185  // Skip the current iteration if the rotation is identity.
186  if ( gamma == one && sigma == zero ) continue;
187 
188  MAC_Apply_G_mx2_ops( m_A,
189  &gamma,
190  &sigma,
191  a1, rs_A,
192  a2, rs_A );
193  }
194  }
195 
196  // Shutdown stage
197 
198  for ( j = nG - k_minus_1; j < nG; ++j )
199  {
200  nG_app = nG - j;
201 
202  for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g )
203  {
204  g11 = buff_G + (g )*rs_G + (k )*cs_G;
205  a1 = buff_A + (g )*cs_A;
206  a2 = buff_A + (g + 1)*cs_A;
207 
208  gamma = g11->real;
209  sigma = g11->imag;
210 
211  // Skip the current iteration if the rotation is identity.
212  if ( gamma == one && sigma == zero ) continue;
213 
214  MAC_Apply_G_mx2_ops( m_A,
215  &gamma,
216  &sigma,
217  a1, rs_A,
218  a2, rs_A );
219  }
220  }
221 
222  return FLA_SUCCESS;
223 }
FLA_Error FLA_Apply_G_rf_ops_var1(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var1.c:113

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_ops_var1(), scomplex::imag, and scomplex::real.

Referenced by FLA_Apply_G_rf_opt_var2().

◆ FLA_Apply_G_rf_ops_var3()

FLA_Error FLA_Apply_G_rf_ops_var3 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A 
)
117 {
118  float one = bl1_s1();
119  float zero = bl1_s0();
120  float gamma23_k1;
121  float sigma23_k1;
122  float gamma34_k1;
123  float sigma34_k1;
124  float gamma12_k2;
125  float sigma12_k2;
126  float gamma23_k2;
127  float sigma23_k2;
128  float* a1;
129  float* a2;
130  float* a3;
131  float* a4;
132  scomplex* g23_k1;
133  scomplex* g34_k1;
134  scomplex* g12_k2;
135  scomplex* g23_k2;
136  int i, j, g, k;
137  int nG, nG_app;
138  int n_iter;
139  int n_left;
140  int k_minus_1;
141  int n_fuse;
142  int k_fuse;
143  int is_ident23_k1, is_ident34_k1;
144  int is_ident12_k2, is_ident23_k2;
145  int has_ident;
146 
147  k_minus_1 = k_G - 1;
148  nG = n_A - 1;
149  n_fuse = 2;
150  k_fuse = 2;
151 
152  // Use the simple variant for nG < (k - 1) or k == 1.
153  if ( nG < 2*k_minus_1 || k_G == 1 )
154  {
156  m_A,
157  n_A,
158  buff_G, rs_G, cs_G,
159  buff_A, rs_A, cs_A );
160  return FLA_SUCCESS;
161  }
162 
163 
164  // Start-up phase.
165 
166  for ( j = -1; j < k_minus_1; j += n_fuse )
167  {
168  nG_app = j + 2;
169  n_iter = nG_app / k_fuse;
170  n_left = 1;
171 
172  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
173  {
174  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
175  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
176  g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
177  g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
178  a1 = buff_A + (g - 1)*cs_A;
179  a2 = buff_A + (g )*cs_A;
180  a3 = buff_A + (g + 1)*cs_A;
181  a4 = buff_A + (g + 2)*cs_A;
182 
183  gamma23_k1 = g23_k1->real;
184  sigma23_k1 = g23_k1->imag;
185  gamma34_k1 = g34_k1->real;
186  sigma34_k1 = g34_k1->imag;
187  gamma12_k2 = g12_k2->real;
188  sigma12_k2 = g12_k2->imag;
189  gamma23_k2 = g23_k2->real;
190  sigma23_k2 = g23_k2->imag;
191 
192  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
193  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
194  is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
195  is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
196  has_ident = ( is_ident23_k1 || is_ident34_k1 ||
197  is_ident12_k2 || is_ident23_k2 );
198 
199  if ( has_ident )
200  {
201  // Apply to pairs of columns as needed.
202 
203  if ( !is_ident23_k1 )
204  MAC_Apply_G_mx2_ops( m_A,
205  &gamma23_k1,
206  &sigma23_k1,
207  a2, rs_A,
208  a3, rs_A );
209 
210  if ( !is_ident34_k1 )
211  MAC_Apply_G_mx2_ops( m_A,
212  &gamma34_k1,
213  &sigma34_k1,
214  a3, rs_A,
215  a4, rs_A );
216 
217  if ( !is_ident12_k2 )
218  MAC_Apply_G_mx2_ops( m_A,
219  &gamma12_k2,
220  &sigma12_k2,
221  a1, rs_A,
222  a2, rs_A );
223 
224  if ( !is_ident23_k2 )
225  MAC_Apply_G_mx2_ops( m_A,
226  &gamma23_k2,
227  &sigma23_k2,
228  a2, rs_A,
229  a3, rs_A );
230  }
231  else
232  {
233  // Apply to all four columns.
234 
235  MAC_Apply_G_mx4s_ops( m_A,
236  &gamma23_k1,
237  &sigma23_k1,
238  &gamma34_k1,
239  &sigma34_k1,
240  &gamma12_k2,
241  &sigma12_k2,
242  &gamma23_k2,
243  &sigma23_k2,
244  a1, rs_A,
245  a2, rs_A,
246  a3, rs_A,
247  a4, rs_A );
248  }
249  }
250 
251  if ( n_left == 1 )
252  {
253  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
254  a3 = buff_A + (g + 1)*cs_A;
255  a4 = buff_A + (g + 2)*cs_A;
256 
257  gamma34_k1 = g34_k1->real;
258  sigma34_k1 = g34_k1->imag;
259 
260  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
261 
262  if ( !is_ident34_k1 )
263  MAC_Apply_G_mx2_ops( m_A,
264  &gamma34_k1,
265  &sigma34_k1,
266  a3, rs_A,
267  a4, rs_A );
268  }
269  }
270 
271  // Pipeline stage
272 
273  for ( ; j < nG - 1; j += n_fuse )
274  {
275  nG_app = k_G;
276  n_iter = nG_app / k_fuse;
277  n_left = nG_app % k_fuse;
278 
279  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
280  {
281  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
282  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
283  g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
284  g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
285  a1 = buff_A + (g - 1)*cs_A;
286  a2 = buff_A + (g )*cs_A;
287  a3 = buff_A + (g + 1)*cs_A;
288  a4 = buff_A + (g + 2)*cs_A;
289 
290  gamma23_k1 = g23_k1->real;
291  sigma23_k1 = g23_k1->imag;
292  gamma34_k1 = g34_k1->real;
293  sigma34_k1 = g34_k1->imag;
294  gamma12_k2 = g12_k2->real;
295  sigma12_k2 = g12_k2->imag;
296  gamma23_k2 = g23_k2->real;
297  sigma23_k2 = g23_k2->imag;
298 
299  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
300  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
301  is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
302  is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
303  has_ident = ( is_ident23_k1 || is_ident34_k1 ||
304  is_ident12_k2 || is_ident23_k2 );
305 
306  if ( has_ident )
307  {
308  // Apply to pairs of columns as needed.
309 
310  if ( !is_ident23_k1 )
311  MAC_Apply_G_mx2_ops( m_A,
312  &gamma23_k1,
313  &sigma23_k1,
314  a2, rs_A,
315  a3, rs_A );
316 
317  if ( !is_ident34_k1 )
318  MAC_Apply_G_mx2_ops( m_A,
319  &gamma34_k1,
320  &sigma34_k1,
321  a3, rs_A,
322  a4, rs_A );
323 
324  if ( !is_ident12_k2 )
325  MAC_Apply_G_mx2_ops( m_A,
326  &gamma12_k2,
327  &sigma12_k2,
328  a1, rs_A,
329  a2, rs_A );
330 
331  if ( !is_ident23_k2 )
332  MAC_Apply_G_mx2_ops( m_A,
333  &gamma23_k2,
334  &sigma23_k2,
335  a2, rs_A,
336  a3, rs_A );
337  }
338  else
339  {
340  // Apply to all four columns.
341 
342  MAC_Apply_G_mx4s_ops( m_A,
343  &gamma23_k1,
344  &sigma23_k1,
345  &gamma34_k1,
346  &sigma34_k1,
347  &gamma12_k2,
348  &sigma12_k2,
349  &gamma23_k2,
350  &sigma23_k2,
351  a1, rs_A,
352  a2, rs_A,
353  a3, rs_A,
354  a4, rs_A );
355  }
356  }
357 
358  if ( n_left == 1 )
359  {
360  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
361  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
362  a2 = buff_A + (g )*cs_A;
363  a3 = buff_A + (g + 1)*cs_A;
364  a4 = buff_A + (g + 2)*cs_A;
365 
366  gamma23_k1 = g23_k1->real;
367  sigma23_k1 = g23_k1->imag;
368  gamma34_k1 = g34_k1->real;
369  sigma34_k1 = g34_k1->imag;
370 
371  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
372  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
373 
374  if ( !is_ident23_k1 && is_ident34_k1 )
375  {
376  MAC_Apply_G_mx2_ops( m_A,
377  &gamma23_k1,
378  &sigma23_k1,
379  a2, rs_A,
380  a3, rs_A );
381  }
382  else if ( is_ident23_k1 && !is_ident34_k1 )
383  {
384  MAC_Apply_G_mx2_ops( m_A,
385  &gamma34_k1,
386  &sigma34_k1,
387  a3, rs_A,
388  a4, rs_A );
389  }
390  else
391  {
392  MAC_Apply_G_mx3_ops( m_A,
393  &gamma23_k1,
394  &sigma23_k1,
395  &gamma34_k1,
396  &sigma34_k1,
397  a2, rs_A,
398  a3, rs_A,
399  a4, rs_A );
400  }
401  }
402  }
403 
404  // Shutdown stage
405 
406  for ( j = nG % n_fuse; j < k_G; j += n_fuse )
407  {
408  g = nG - 1;
409  k = j;
410 
411  //n_left = 1;
412  //if ( n_left == 1 )
413  {
414  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
415  a2 = buff_A + (g )*cs_A;
416  a3 = buff_A + (g + 1)*cs_A;
417 
418  gamma23_k1 = g23_k1->real;
419  sigma23_k1 = g23_k1->imag;
420 
421  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
422 
423  if ( !is_ident23_k1 )
424  MAC_Apply_G_mx2_ops( m_A,
425  &gamma23_k1,
426  &sigma23_k1,
427  a2, rs_A,
428  a3, rs_A );
429  ++k;
430  --g;
431  }
432 
433  nG_app = k_minus_1 - j;
434  n_iter = nG_app / k_fuse;
435  n_left = nG_app % k_fuse;
436 
437  for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
438  {
439  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
440  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
441  g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
442  g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
443  a1 = buff_A + (g - 1)*cs_A;
444  a2 = buff_A + (g )*cs_A;
445  a3 = buff_A + (g + 1)*cs_A;
446  a4 = buff_A + (g + 2)*cs_A;
447 
448  gamma23_k1 = g23_k1->real;
449  sigma23_k1 = g23_k1->imag;
450  gamma34_k1 = g34_k1->real;
451  sigma34_k1 = g34_k1->imag;
452  gamma12_k2 = g12_k2->real;
453  sigma12_k2 = g12_k2->imag;
454  gamma23_k2 = g23_k2->real;
455  sigma23_k2 = g23_k2->imag;
456 
457  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
458  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
459  is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
460  is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
461  has_ident = ( is_ident23_k1 || is_ident34_k1 ||
462  is_ident12_k2 || is_ident23_k2 );
463 
464  if ( has_ident )
465  {
466  // Apply to pairs of columns as needed.
467 
468  if ( !is_ident23_k1 )
469  MAC_Apply_G_mx2_ops( m_A,
470  &gamma23_k1,
471  &sigma23_k1,
472  a2, rs_A,
473  a3, rs_A );
474 
475  if ( !is_ident34_k1 )
476  MAC_Apply_G_mx2_ops( m_A,
477  &gamma34_k1,
478  &sigma34_k1,
479  a3, rs_A,
480  a4, rs_A );
481 
482  if ( !is_ident12_k2 )
483  MAC_Apply_G_mx2_ops( m_A,
484  &gamma12_k2,
485  &sigma12_k2,
486  a1, rs_A,
487  a2, rs_A );
488 
489  if ( !is_ident23_k2 )
490  MAC_Apply_G_mx2_ops( m_A,
491  &gamma23_k2,
492  &sigma23_k2,
493  a2, rs_A,
494  a3, rs_A );
495  }
496  else
497  {
498  // Apply to all four columns.
499 
500  MAC_Apply_G_mx4s_ops( m_A,
501  &gamma23_k1,
502  &sigma23_k1,
503  &gamma34_k1,
504  &sigma34_k1,
505  &gamma12_k2,
506  &sigma12_k2,
507  &gamma23_k2,
508  &sigma23_k2,
509  a1, rs_A,
510  a2, rs_A,
511  a3, rs_A,
512  a4, rs_A );
513  }
514  }
515 
516  if ( n_left == 1 )
517  {
518  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
519  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
520  a2 = buff_A + (g )*cs_A;
521  a3 = buff_A + (g + 1)*cs_A;
522  a4 = buff_A + (g + 2)*cs_A;
523 
524  gamma23_k1 = g23_k1->real;
525  sigma23_k1 = g23_k1->imag;
526  gamma34_k1 = g34_k1->real;
527  sigma34_k1 = g34_k1->imag;
528 
529  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
530  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
531 
532  if ( !is_ident23_k1 && is_ident34_k1 )
533  {
534  MAC_Apply_G_mx2_ops( m_A,
535  &gamma23_k1,
536  &sigma23_k1,
537  a2, rs_A,
538  a3, rs_A );
539  }
540  else if ( is_ident23_k1 && !is_ident34_k1 )
541  {
542  MAC_Apply_G_mx2_ops( m_A,
543  &gamma34_k1,
544  &sigma34_k1,
545  a3, rs_A,
546  a4, rs_A );
547  }
548  else
549  {
550  MAC_Apply_G_mx3_ops( m_A,
551  &gamma23_k1,
552  &sigma23_k1,
553  &gamma34_k1,
554  &sigma34_k1,
555  a2, rs_A,
556  a3, rs_A,
557  a4, rs_A );
558  }
559  }
560  }
561 
562  return FLA_SUCCESS;
563 }

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_ops_var1(), i, scomplex::imag, n_left, and scomplex::real.

Referenced by FLA_Apply_G_rf_bls_var3(), and FLA_Apply_G_rf_opt_var3().

◆ FLA_Apply_G_rf_ops_var4()

FLA_Error FLA_Apply_G_rf_ops_var4 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_ops_var5()

FLA_Error FLA_Apply_G_rf_ops_var5 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_ops_var6()

FLA_Error FLA_Apply_G_rf_ops_var6 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A 
)
117 {
118  float one = bl1_s1();
119  float zero = bl1_s0();
120  float gamma12;
121  float sigma12;
122  float gamma23;
123  float sigma23;
124  float* a1;
125  float* a2;
126  float* a3;
127  scomplex* g12;
128  scomplex* g23;
129  int i, j, g, k;
130  int nG, nG_app;
131  int n_iter;
132  int n_left;
133  int k_minus_1;
134  int n_fuse;
135  int is_ident12, is_ident23;
136 
137  k_minus_1 = k_G - 1;
138  nG = n_A - 1;
139  n_fuse = 2;
140 
141  // Use the simple variant for nG < (k - 1) or k == 1.
142  if ( nG < k_minus_1 || k_G == 1 )
143  {
145  m_A,
146  n_A,
147  buff_G, rs_G, cs_G,
148  buff_A, rs_A, cs_A );
149  return FLA_SUCCESS;
150  }
151 
152 
153  // Start-up phase.
154 
155  for ( j = 0; j < k_minus_1; ++j )
156  {
157  nG_app = j + 1;
158  n_iter = nG_app / n_fuse;
159  n_left = nG_app % n_fuse;
160 
161  for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
162  {
163  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
164  g23 = buff_G + (g )*rs_G + (k )*cs_G;
165  a1 = buff_A + (g - 1)*cs_A;
166  a2 = buff_A + (g )*cs_A;
167  a3 = buff_A + (g + 1)*cs_A;
168 
169  gamma12 = g12->real;
170  sigma12 = g12->imag;
171  gamma23 = g23->real;
172  sigma23 = g23->imag;
173 
174  is_ident12 = ( gamma12 == one && sigma12 == zero );
175  is_ident23 = ( gamma23 == one && sigma23 == zero );
176 
177  if ( !is_ident12 && is_ident23 )
178  {
179  // Apply only to columns 1 and 2.
180 
181  MAC_Apply_G_mx2_ops( m_A,
182  &gamma12,
183  &sigma12,
184  a1, rs_A,
185  a2, rs_A );
186  }
187  else if ( is_ident12 && !is_ident23 )
188  {
189  // Apply only to columns 2 and 3.
190 
191  MAC_Apply_G_mx2_ops( m_A,
192  &gamma23,
193  &sigma23,
194  a2, rs_A,
195  a3, rs_A );
196  }
197  else if ( !is_ident12 && !is_ident23 )
198  {
199  // Apply to all three columns.
200 
201  MAC_Apply_G_mx3b_ops( m_A,
202  &gamma12,
203  &sigma12,
204  &gamma23,
205  &sigma23,
206  a1, rs_A,
207  a2, rs_A,
208  a3, rs_A );
209  }
210  }
211  //for ( k = 0; k < n_left; k += 1, g -= 1 )
212  if ( n_left == 1 )
213  {
214  g23 = buff_G + (g )*rs_G + (k )*cs_G;
215  a2 = buff_A + (g )*cs_A;
216  a3 = buff_A + (g + 1)*cs_A;
217 
218  gamma23 = g23->real;
219  sigma23 = g23->imag;
220 
221  is_ident23 = ( gamma23 == one && sigma23 == zero );
222 
223  if ( !is_ident23 )
224  MAC_Apply_G_mx2_ops( m_A,
225  &gamma23,
226  &sigma23,
227  a2, rs_A,
228  a3, rs_A );
229  }
230  }
231 
232  // Pipeline stage
233 
234  for ( j = k_minus_1; j < nG; ++j )
235  {
236  nG_app = k_G;
237  n_iter = nG_app / n_fuse;
238  n_left = nG_app % n_fuse;
239 
240  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
241  {
242  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
243  g23 = buff_G + (g )*rs_G + (k )*cs_G;
244  a1 = buff_A + (g - 1)*cs_A;
245  a2 = buff_A + (g )*cs_A;
246  a3 = buff_A + (g + 1)*cs_A;
247 
248  gamma12 = g12->real;
249  sigma12 = g12->imag;
250  gamma23 = g23->real;
251  sigma23 = g23->imag;
252 
253  is_ident12 = ( gamma12 == one && sigma12 == zero );
254  is_ident23 = ( gamma23 == one && sigma23 == zero );
255 
256  if ( !is_ident12 && is_ident23 )
257  {
258  // Apply only to columns 1 and 2.
259 
260  MAC_Apply_G_mx2_ops( m_A,
261  &gamma12,
262  &sigma12,
263  a1, rs_A,
264  a2, rs_A );
265  }
266  else if ( is_ident12 && !is_ident23 )
267  {
268  // Apply only to columns 2 and 3.
269 
270  MAC_Apply_G_mx2_ops( m_A,
271  &gamma23,
272  &sigma23,
273  a2, rs_A,
274  a3, rs_A );
275  }
276  else if ( !is_ident12 && !is_ident23 )
277  {
278  // Apply to all three columns.
279 
280  MAC_Apply_G_mx3b_ops( m_A,
281  &gamma12,
282  &sigma12,
283  &gamma23,
284  &sigma23,
285  a1, rs_A,
286  a2, rs_A,
287  a3, rs_A );
288  }
289  }
290  //for ( k = 0; k < n_left; k += 1, g -= 1 )
291  if ( n_left == 1 )
292  {
293  g23 = buff_G + (g )*rs_G + (k )*cs_G;
294  a2 = buff_A + (g )*cs_A;
295  a3 = buff_A + (g + 1)*cs_A;
296 
297  gamma23 = g23->real;
298  sigma23 = g23->imag;
299 
300  is_ident23 = ( gamma23 == one && sigma23 == zero );
301 
302  if ( !is_ident23 )
303  MAC_Apply_G_mx2_ops( m_A,
304  &gamma23,
305  &sigma23,
306  a2, rs_A,
307  a3, rs_A );
308  }
309  }
310 
311  // Shutdown stage
312 
313  for ( j = 1; j < k_G; ++j )
314  {
315  nG_app = k_G - j;
316  n_iter = nG_app / n_fuse;
317  n_left = nG_app % n_fuse;
318 
319  for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
320  {
321  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
322  g23 = buff_G + (g )*rs_G + (k )*cs_G;
323  a1 = buff_A + (g - 1)*cs_A;
324  a2 = buff_A + (g )*cs_A;
325  a3 = buff_A + (g + 1)*cs_A;
326 
327  gamma12 = g12->real;
328  sigma12 = g12->imag;
329  gamma23 = g23->real;
330  sigma23 = g23->imag;
331 
332  is_ident12 = ( gamma12 == one && sigma12 == zero );
333  is_ident23 = ( gamma23 == one && sigma23 == zero );
334 
335  if ( !is_ident12 && is_ident23 )
336  {
337  // Apply only to columns 1 and 2.
338 
339  MAC_Apply_G_mx2_ops( m_A,
340  &gamma12,
341  &sigma12,
342  a1, rs_A,
343  a2, rs_A );
344  }
345  else if ( is_ident12 && !is_ident23 )
346  {
347  // Apply only to columns 2 and 3.
348 
349  MAC_Apply_G_mx2_ops( m_A,
350  &gamma23,
351  &sigma23,
352  a2, rs_A,
353  a3, rs_A );
354  }
355  else if ( !is_ident12 && !is_ident23 )
356  {
357  // Apply to all three columns.
358 
359  MAC_Apply_G_mx3b_ops( m_A,
360  &gamma12,
361  &sigma12,
362  &gamma23,
363  &sigma23,
364  a1, rs_A,
365  a2, rs_A,
366  a3, rs_A );
367  }
368  }
369  //for ( k = 0; k < nG_app_left; k += 1, g -= 1 )
370  if ( n_left == 1 )
371  {
372  g23 = buff_G + (g )*rs_G + (k )*cs_G;
373  a2 = buff_A + (g )*cs_A;
374  a3 = buff_A + (g + 1)*cs_A;
375 
376  gamma23 = g23->real;
377  sigma23 = g23->imag;
378 
379  is_ident23 = ( gamma23 == one && sigma23 == zero );
380 
381  if ( !is_ident23 )
382  MAC_Apply_G_mx2_ops( m_A,
383  &gamma23,
384  &sigma23,
385  a2, rs_A,
386  a3, rs_A );
387  }
388  }
389 
390  return FLA_SUCCESS;
391 }

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_ops_var1(), i, scomplex::imag, n_left, and scomplex::real.

Referenced by FLA_Apply_G_rf_opt_var6().

◆ FLA_Apply_G_rf_ops_var7()

FLA_Error FLA_Apply_G_rf_ops_var7 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_ops_var8()

FLA_Error FLA_Apply_G_rf_ops_var8 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_ops_var9()

FLA_Error FLA_Apply_G_rf_ops_var9 ( int  k_G,
int  m_A,
int  n_A,
scomplex buff_G,
int  rs_G,
int  cs_G,
float *  buff_A,
int  rs_A,
int  cs_A 
)
118 {
119  float one = bl1_s1();
120  float zero = bl1_s0();
121  float gamma12;
122  float sigma12;
123  float gamma23;
124  float sigma23;
125  float* a1;
126  float* a2;
127  float* a3;
128  scomplex* g12;
129  scomplex* g23;
130  int i, j, g, k;
131  int nG, nG_app;
132  int n_iter;
133  int n_left;
134  int k_minus_1;
135  int n_fuse;
136  int is_ident12, is_ident23;
137 
138  k_minus_1 = k_G - 1;
139  nG = n_A - 1;
140  n_fuse = 2;
141 
142  // Use the simple variant for nG < (k - 1) or k == 1.
143  if ( nG < 2*k_minus_1 || k_G == 1 )
144  {
146  m_A,
147  n_A,
148  buff_G, rs_G, cs_G,
149  buff_A, rs_A, cs_A );
150  return FLA_SUCCESS;
151  }
152 
153 
154  // Start-up phase.
155 
156  for ( j = -1; j < k_minus_1; j += n_fuse )
157  {
158  nG_app = j + 1;
159  n_iter = nG_app;
160  n_left = 1;
161 
162  for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
163  {
164  g12 = buff_G + (g )*rs_G + (k )*cs_G;
165  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
166  a1 = buff_A + (g )*cs_A;
167  a2 = buff_A + (g + 1)*cs_A;
168  a3 = buff_A + (g + 2)*cs_A;
169 
170  gamma12 = g12->real;
171  sigma12 = g12->imag;
172  gamma23 = g23->real;
173  sigma23 = g23->imag;
174 
175  is_ident12 = ( gamma12 == one && sigma12 == zero );
176  is_ident23 = ( gamma23 == one && sigma23 == zero );
177 
178  if ( !is_ident12 && is_ident23 )
179  {
180  // Apply only to columns 1 and 2.
181 
182  MAC_Apply_G_mx2_ops( m_A,
183  &gamma12,
184  &sigma12,
185  a1, rs_A,
186  a2, rs_A );
187  }
188  else if ( is_ident12 && !is_ident23 )
189  {
190  // Apply only to columns 2 and 3.
191 
192  MAC_Apply_G_mx2_ops( m_A,
193  &gamma23,
194  &sigma23,
195  a2, rs_A,
196  a3, rs_A );
197  }
198  else if ( !is_ident12 && !is_ident23 )
199  {
200  // Apply to all three columns.
201 
202  MAC_Apply_G_mx3_ops( m_A,
203  &gamma12,
204  &sigma12,
205  &gamma23,
206  &sigma23,
207  a1, rs_A,
208  a2, rs_A,
209  a3, rs_A );
210  }
211  }
212 
213  if ( n_left == 1 )
214  {
215  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
216  a2 = buff_A + (g + 1)*cs_A;
217  a3 = buff_A + (g + 2)*cs_A;
218 
219  gamma23 = g23->real;
220  sigma23 = g23->imag;
221 
222  is_ident23 = ( gamma23 == one && sigma23 == zero );
223 
224  if ( !is_ident23 )
225  MAC_Apply_G_mx2_ops( m_A,
226  &gamma23,
227  &sigma23,
228  a2, rs_A,
229  a3, rs_A );
230  }
231  }
232 
233  // Pipeline stage
234 
235  for ( ; j < nG - 1; j += n_fuse )
236  {
237  nG_app = k_G;
238  n_iter = nG_app;
239  n_left = 0;
240 
241  for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
242  {
243  g12 = buff_G + (g )*rs_G + (k )*cs_G;
244  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
245  a1 = buff_A + (g )*cs_A;
246  a2 = buff_A + (g + 1)*cs_A;
247  a3 = buff_A + (g + 2)*cs_A;
248 
249  gamma12 = g12->real;
250  sigma12 = g12->imag;
251  gamma23 = g23->real;
252  sigma23 = g23->imag;
253 
254  is_ident12 = ( gamma12 == one && sigma12 == zero );
255  is_ident23 = ( gamma23 == one && sigma23 == zero );
256 
257  if ( !is_ident12 && is_ident23 )
258  {
259  // Apply only to columns 1 and 2.
260 
261  MAC_Apply_G_mx2_ops( m_A,
262  &gamma12,
263  &sigma12,
264  a1, rs_A,
265  a2, rs_A );
266  }
267  else if ( is_ident12 && !is_ident23 )
268  {
269  // Apply only to columns 2 and 3.
270 
271  MAC_Apply_G_mx2_ops( m_A,
272  &gamma23,
273  &sigma23,
274  a2, rs_A,
275  a3, rs_A );
276  }
277  else if ( !is_ident12 && !is_ident23 )
278  {
279  // Apply to all three columns.
280 
281  MAC_Apply_G_mx3_ops( m_A,
282  &gamma12,
283  &sigma12,
284  &gamma23,
285  &sigma23,
286  a1, rs_A,
287  a2, rs_A,
288  a3, rs_A );
289  }
290  }
291  }
292 
293  // Shutdown stage
294 
295  for ( j = nG % n_fuse; j < k_G; j += n_fuse )
296  {
297  g = nG - 1;
298  k = j;
299 
300  n_left = 1;
301  if ( n_left == 1 )
302  {
303  g12 = buff_G + (g )*rs_G + (k )*cs_G;
304  a1 = buff_A + (g )*cs_A;
305  a2 = buff_A + (g + 1)*cs_A;
306 
307  gamma12 = g12->real;
308  sigma12 = g12->imag;
309 
310  is_ident12 = ( gamma12 == one && sigma12 == zero );
311 
312  if ( !is_ident12 )
313  MAC_Apply_G_mx2_ops( m_A,
314  &gamma12,
315  &sigma12,
316  a1, rs_A,
317  a2, rs_A );
318  ++k;
319  --g;
320  }
321 
322  nG_app = k_minus_1 - j;
323  n_iter = nG_app;
324 
325  for ( i = 0; i < n_iter; ++i, ++k, --g )
326  {
327  g12 = buff_G + (g )*rs_G + (k )*cs_G;
328  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
329  a1 = buff_A + (g )*cs_A;
330  a2 = buff_A + (g + 1)*cs_A;
331  a3 = buff_A + (g + 2)*cs_A;
332 
333  gamma12 = g12->real;
334  sigma12 = g12->imag;
335  gamma23 = g23->real;
336  sigma23 = g23->imag;
337 
338  is_ident12 = ( gamma12 == one && sigma12 == zero );
339  is_ident23 = ( gamma23 == one && sigma23 == zero );
340 
341  if ( !is_ident12 && is_ident23 )
342  {
343  // Apply only to columns 1 and 2.
344 
345  MAC_Apply_G_mx2_ops( m_A,
346  &gamma12,
347  &sigma12,
348  a1, rs_A,
349  a2, rs_A );
350  }
351  else if ( is_ident12 && !is_ident23 )
352  {
353  // Apply only to columns 2 and 3.
354 
355  MAC_Apply_G_mx2_ops( m_A,
356  &gamma23,
357  &sigma23,
358  a2, rs_A,
359  a3, rs_A );
360  }
361  else if ( !is_ident12 && !is_ident23 )
362  {
363  // Apply to all three columns.
364 
365  MAC_Apply_G_mx3_ops( m_A,
366  &gamma12,
367  &sigma12,
368  &gamma23,
369  &sigma23,
370  a1, rs_A,
371  a2, rs_A,
372  a3, rs_A );
373  }
374  }
375  }
376 
377  return FLA_SUCCESS;
378 }

References bl1_s0(), bl1_s1(), FLA_Apply_G_rf_ops_var1(), i, scomplex::imag, n_left, and scomplex::real.

Referenced by FLA_Apply_G_rf_opt_var9().

◆ FLA_Apply_G_rf_opt_var1()

FLA_Error FLA_Apply_G_rf_opt_var1 ( FLA_Obj  G,
FLA_Obj  A 
)
32 {
33  FLA_Datatype datatype;
34  int k_G, m_A, n_A;
35  int rs_G, cs_G;
36  int rs_A, cs_A;
37 
38  datatype = FLA_Obj_datatype( A );
39 
40  k_G = FLA_Obj_width( G );
41  m_A = FLA_Obj_length( A );
42  n_A = FLA_Obj_width( A );
43 
44  rs_G = FLA_Obj_row_stride( G );
45  cs_G = FLA_Obj_col_stride( G );
46 
47  rs_A = FLA_Obj_row_stride( A );
48  cs_A = FLA_Obj_col_stride( A );
49 
50  switch ( datatype )
51  {
52  case FLA_FLOAT:
53  {
54  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
55  float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
56 
58  m_A,
59  n_A,
60  buff_G, rs_G, cs_G,
61  buff_A, rs_A, cs_A );
62 
63  break;
64  }
65 
66  case FLA_DOUBLE:
67  {
68  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
69  double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
70 
72  m_A,
73  n_A,
74  buff_G, rs_G, cs_G,
75  buff_A, rs_A, cs_A );
76 
77  break;
78  }
79 
80  case FLA_COMPLEX:
81  {
82  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
83  scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
84 
86  m_A,
87  n_A,
88  buff_G, rs_G, cs_G,
89  buff_A, rs_A, cs_A );
90 
91  break;
92  }
93 
94  case FLA_DOUBLE_COMPLEX:
95  {
96  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
97  dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
98 
100  m_A,
101  n_A,
102  buff_G, rs_G, cs_G,
103  buff_A, rs_A, cs_A );
104 
105  break;
106  }
107  }
108 
109  return FLA_SUCCESS;
110 }
FLA_Error FLA_Apply_G_rf_opc_var1(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var1.c:215
FLA_Error FLA_Apply_G_rf_ops_var1(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var1.c:113
FLA_Error FLA_Apply_G_rf_opd_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var1.c:164
FLA_Error FLA_Apply_G_rf_opz_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var1.c:267

References FLA_Apply_G_rf_opc_var1(), FLA_Apply_G_rf_opd_var1(), FLA_Apply_G_rf_ops_var1(), FLA_Apply_G_rf_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

Referenced by FLA_Apply_G_internal().

◆ FLA_Apply_G_rf_opt_var2()

FLA_Error FLA_Apply_G_rf_opt_var2 ( FLA_Obj  G,
FLA_Obj  A 
)
32 {
33  FLA_Datatype datatype;
34  int k_G, m_A, n_A;
35  int rs_G, cs_G;
36  int rs_A, cs_A;
37 
38  datatype = FLA_Obj_datatype( A );
39 
40  k_G = FLA_Obj_width( G );
41  m_A = FLA_Obj_length( A );
42  n_A = FLA_Obj_width( A );
43 
44  rs_G = FLA_Obj_row_stride( G );
45  cs_G = FLA_Obj_col_stride( G );
46 
47  rs_A = FLA_Obj_row_stride( A );
48  cs_A = FLA_Obj_col_stride( A );
49 
50  switch ( datatype )
51  {
52  case FLA_FLOAT:
53  {
54  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
55  float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
56 
58  m_A,
59  n_A,
60  buff_G, rs_G, cs_G,
61  buff_A, rs_A, cs_A );
62 
63  break;
64  }
65 
66  case FLA_DOUBLE:
67  {
68  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
69  double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
70 
72  m_A,
73  n_A,
74  buff_G, rs_G, cs_G,
75  buff_A, rs_A, cs_A );
76 
77  break;
78  }
79 
80  case FLA_COMPLEX:
81  {
82  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
83  scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
84 
86  m_A,
87  n_A,
88  buff_G, rs_G, cs_G,
89  buff_A, rs_A, cs_A );
90 
91  break;
92  }
93 
94  case FLA_DOUBLE_COMPLEX:
95  {
96  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
97  dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
98 
100  m_A,
101  n_A,
102  buff_G, rs_G, cs_G,
103  buff_A, rs_A, cs_A );
104 
105  break;
106  }
107  }
108 
109  return FLA_SUCCESS;
110 }
FLA_Error FLA_Apply_G_rf_ops_var2(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var2.c:112
FLA_Error FLA_Apply_G_rf_opd_var2(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var2.c:225
FLA_Error FLA_Apply_G_rf_opz_var2(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var2.c:451
FLA_Error FLA_Apply_G_rf_opc_var2(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var2.c:338

References FLA_Apply_G_rf_opc_var2(), FLA_Apply_G_rf_opd_var2(), FLA_Apply_G_rf_ops_var2(), FLA_Apply_G_rf_opz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

◆ FLA_Apply_G_rf_opt_var3()

FLA_Error FLA_Apply_G_rf_opt_var3 ( FLA_Obj  G,
FLA_Obj  A 
)
32 {
33  FLA_Datatype datatype;
34  int k_G, m_A, n_A;
35  int rs_G, cs_G;
36  int rs_A, cs_A;
37 
38  datatype = FLA_Obj_datatype( A );
39 
40  k_G = FLA_Obj_width( G );
41  m_A = FLA_Obj_length( A );
42  n_A = FLA_Obj_width( A );
43 
44  rs_G = FLA_Obj_row_stride( G );
45  cs_G = FLA_Obj_col_stride( G );
46 
47  rs_A = FLA_Obj_row_stride( A );
48  cs_A = FLA_Obj_col_stride( A );
49 
50  switch ( datatype )
51  {
52  case FLA_FLOAT:
53  {
54  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
55  float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
56 
58  m_A,
59  n_A,
60  buff_G, rs_G, cs_G,
61  buff_A, rs_A, cs_A );
62 
63  break;
64  }
65 
66  case FLA_DOUBLE:
67  {
68  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
69  double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
70 
72  m_A,
73  n_A,
74  buff_G, rs_G, cs_G,
75  buff_A, rs_A, cs_A );
76 
77  break;
78  }
79 
80  case FLA_COMPLEX:
81  {
82  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
83  scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
84 
86  m_A,
87  n_A,
88  buff_G, rs_G, cs_G,
89  buff_A, rs_A, cs_A );
90 
91  break;
92  }
93 
94  case FLA_DOUBLE_COMPLEX:
95  {
96  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
97  dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
98 
100  m_A,
101  n_A,
102  buff_G, rs_G, cs_G,
103  buff_A, rs_A, cs_A );
104 
105  break;
106  }
107  }
108 
109  return FLA_SUCCESS;
110 }
FLA_Error FLA_Apply_G_rf_opd_var3(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var3.c:565
FLA_Error FLA_Apply_G_rf_ops_var3(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var3.c:112
FLA_Error FLA_Apply_G_rf_opz_var3(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var3.c:1471
FLA_Error FLA_Apply_G_rf_opc_var3(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var3.c:1018

References FLA_Apply_G_rf_opc_var3(), FLA_Apply_G_rf_opd_var3(), FLA_Apply_G_rf_ops_var3(), FLA_Apply_G_rf_opz_var3(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

◆ FLA_Apply_G_rf_opt_var4()

FLA_Error FLA_Apply_G_rf_opt_var4 ( FLA_Obj  G,
FLA_Obj  A 
)

◆ FLA_Apply_G_rf_opt_var5()

FLA_Error FLA_Apply_G_rf_opt_var5 ( FLA_Obj  G,
FLA_Obj  A 
)

◆ FLA_Apply_G_rf_opt_var6()

FLA_Error FLA_Apply_G_rf_opt_var6 ( FLA_Obj  G,
FLA_Obj  A 
)
32 {
33  FLA_Datatype datatype;
34  int k_G, m_A, n_A;
35  int rs_G, cs_G;
36  int rs_A, cs_A;
37 
38  datatype = FLA_Obj_datatype( A );
39 
40  k_G = FLA_Obj_width( G );
41  m_A = FLA_Obj_length( A );
42  n_A = FLA_Obj_width( A );
43 
44  rs_G = FLA_Obj_row_stride( G );
45  cs_G = FLA_Obj_col_stride( G );
46 
47  rs_A = FLA_Obj_row_stride( A );
48  cs_A = FLA_Obj_col_stride( A );
49 
50  switch ( datatype )
51  {
52  case FLA_FLOAT:
53  {
54  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
55  float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
56 
58  m_A,
59  n_A,
60  buff_G, rs_G, cs_G,
61  buff_A, rs_A, cs_A );
62 
63  break;
64  }
65 
66  case FLA_DOUBLE:
67  {
68  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
69  double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
70 
72  m_A,
73  n_A,
74  buff_G, rs_G, cs_G,
75  buff_A, rs_A, cs_A );
76 
77  break;
78  }
79 
80  case FLA_COMPLEX:
81  {
82  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
83  scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
84 
86  m_A,
87  n_A,
88  buff_G, rs_G, cs_G,
89  buff_A, rs_A, cs_A );
90 
91  break;
92  }
93 
94  case FLA_DOUBLE_COMPLEX:
95  {
96  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
97  dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
98 
100  m_A,
101  n_A,
102  buff_G, rs_G, cs_G,
103  buff_A, rs_A, cs_A );
104 
105  break;
106  }
107  }
108 
109  return FLA_SUCCESS;
110 }
FLA_Error FLA_Apply_G_rf_opc_var6(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var6.c:674
FLA_Error FLA_Apply_G_rf_opd_var6(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var6.c:393
FLA_Error FLA_Apply_G_rf_ops_var6(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var6.c:112
FLA_Error FLA_Apply_G_rf_opz_var6(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var6.c:955

References FLA_Apply_G_rf_opc_var6(), FLA_Apply_G_rf_opd_var6(), FLA_Apply_G_rf_ops_var6(), FLA_Apply_G_rf_opz_var6(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

◆ FLA_Apply_G_rf_opt_var7()

FLA_Error FLA_Apply_G_rf_opt_var7 ( FLA_Obj  G,
FLA_Obj  A 
)

◆ FLA_Apply_G_rf_opt_var8()

FLA_Error FLA_Apply_G_rf_opt_var8 ( FLA_Obj  G,
FLA_Obj  A 
)

◆ FLA_Apply_G_rf_opt_var9()

FLA_Error FLA_Apply_G_rf_opt_var9 ( FLA_Obj  G,
FLA_Obj  A 
)
32 {
33  FLA_Datatype datatype;
34  int k_G, m_A, n_A;
35  int rs_G, cs_G;
36  int rs_A, cs_A;
37 
38  datatype = FLA_Obj_datatype( A );
39 
40  k_G = FLA_Obj_width( G );
41  m_A = FLA_Obj_length( A );
42  n_A = FLA_Obj_width( A );
43 
44  rs_G = FLA_Obj_row_stride( G );
45  cs_G = FLA_Obj_col_stride( G );
46 
47  rs_A = FLA_Obj_row_stride( A );
48  cs_A = FLA_Obj_col_stride( A );
49 
50  switch ( datatype )
51  {
52  case FLA_FLOAT:
53  {
54  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
55  float* buff_A = ( float* ) FLA_FLOAT_PTR( A );
56 
58  m_A,
59  n_A,
60  buff_G, rs_G, cs_G,
61  buff_A, rs_A, cs_A );
62 
63  break;
64  }
65 
66  case FLA_DOUBLE:
67  {
68  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
69  double* buff_A = ( double* ) FLA_DOUBLE_PTR( A );
70 
72  m_A,
73  n_A,
74  buff_G, rs_G, cs_G,
75  buff_A, rs_A, cs_A );
76 
77  break;
78  }
79 
80  case FLA_COMPLEX:
81  {
82  scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G );
83  scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A );
84 
86  m_A,
87  n_A,
88  buff_G, rs_G, cs_G,
89  buff_A, rs_A, cs_A );
90 
91  break;
92  }
93 
94  case FLA_DOUBLE_COMPLEX:
95  {
96  dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G );
97  dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A );
98 
100  m_A,
101  n_A,
102  buff_G, rs_G, cs_G,
103  buff_A, rs_A, cs_A );
104 
105  break;
106  }
107  }
108 
109  return FLA_SUCCESS;
110 }
FLA_Error FLA_Apply_G_rf_opz_var9(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var9.c:914
FLA_Error FLA_Apply_G_rf_ops_var9(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var9.c:113
FLA_Error FLA_Apply_G_rf_opd_var9(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var9.c:380
FLA_Error FLA_Apply_G_rf_opc_var9(int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var9.c:647

References FLA_Apply_G_rf_opc_var9(), FLA_Apply_G_rf_opd_var9(), FLA_Apply_G_rf_ops_var9(), FLA_Apply_G_rf_opz_var9(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().

◆ FLA_Apply_G_rf_opz_var1()

FLA_Error FLA_Apply_G_rf_opz_var1 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)
272 {
273  double one = bl1_d1();
274  double zero = bl1_d0();
275  int nG_app = n_A - 1;
276  int l, j;
277  double gamma;
278  double sigma;
279  dcomplex* a1;
280  dcomplex* a2;
281  dcomplex* g1;
282  dcomplex* g11;
283 
284  g1 = buff_G;
285 
286  for ( l = 0; l < k_G; ++l )
287  {
288  a1 = buff_A;
289  a2 = buff_A + cs_A;
290  g11 = g1;
291 
292  for ( j = 0; j < nG_app; ++j )
293  {
294  gamma = g11->real;
295  sigma = g11->imag;
296 
297  // Skip the current iteration if the rotation is identity.
298  if ( gamma != one || sigma != zero )
299  {
300  MAC_Apply_G_mx2_opz( m_A,
301  &gamma,
302  &sigma,
303  a1, rs_A,
304  a2, rs_A );
305  }
306 
307  a1 += cs_A;
308  a2 += cs_A;
309  g11 += rs_G;
310  }
311 
312  g1 += cs_G;
313  }
314 
315  return FLA_SUCCESS;
316 }

References bl1_d0(), bl1_d1(), dcomplex::imag, and dcomplex::real.

Referenced by FLA_Apply_G_lf_opt_var1(), FLA_Apply_G_rf_opt_var1(), FLA_Apply_G_rf_opz_var2(), FLA_Apply_G_rf_opz_var3(), FLA_Apply_G_rf_opz_var6(), and FLA_Apply_G_rf_opz_var9().

◆ FLA_Apply_G_rf_opz_var2()

FLA_Error FLA_Apply_G_rf_opz_var2 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)
456 {
457  double one = bl1_d1();
458  double zero = bl1_d0();
459  double gamma;
460  double sigma;
461  dcomplex* a1;
462  dcomplex* a2;
463  dcomplex* g11;
464  int j, g, k;
465  int nG, nG_app;
466  int k_minus_1;
467 
468  k_minus_1 = k_G - 1;
469  nG = n_A - 1;
470 
471  // Use the simple variant for nG < 2(k - 1).
472  if ( nG < k_minus_1 || k_G == 1 )
473  {
475  m_A,
476  n_A,
477  buff_G, rs_G, cs_G,
478  buff_A, rs_A, cs_A );
479  return FLA_SUCCESS;
480  }
481 
482 
483  // Start-up phase.
484 
485  for ( j = 0; j < k_minus_1; ++j )
486  {
487  nG_app = j + 1;
488 
489  for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g )
490  {
491  g11 = buff_G + (g )*rs_G + (k )*cs_G;
492  a1 = buff_A + (g )*cs_A;
493  a2 = buff_A + (g + 1)*cs_A;
494 
495  gamma = g11->real;
496  sigma = g11->imag;
497 
498  // Skip the current iteration if the rotation is identity.
499  if ( gamma == one && sigma == zero ) continue;
500 
501  MAC_Apply_G_mx2_opz( m_A,
502  &gamma,
503  &sigma,
504  a1, rs_A,
505  a2, rs_A );
506  }
507  }
508 
509  // Pipeline stage
510 
511  for ( j = k_minus_1; j < nG; ++j )
512  {
513  nG_app = k_G;
514 
515  for ( k = 0, g = j; k < nG_app; ++k, --g )
516  {
517  g11 = buff_G + (g )*rs_G + (k )*cs_G;
518  a1 = buff_A + (g )*cs_A;
519  a2 = buff_A + (g + 1)*cs_A;
520 
521  gamma = g11->real;
522  sigma = g11->imag;
523 
524  // Skip the current iteration if the rotation is identity.
525  if ( gamma == one && sigma == zero ) continue;
526 
527  MAC_Apply_G_mx2_opz( m_A,
528  &gamma,
529  &sigma,
530  a1, rs_A,
531  a2, rs_A );
532  }
533  }
534 
535  // Shutdown stage
536 
537  for ( j = nG - k_minus_1; j < nG; ++j )
538  {
539  nG_app = nG - j;
540 
541  for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g )
542  {
543  g11 = buff_G + (g )*rs_G + (k )*cs_G;
544  a1 = buff_A + (g )*cs_A;
545  a2 = buff_A + (g + 1)*cs_A;
546 
547  gamma = g11->real;
548  sigma = g11->imag;
549 
550  // Skip the current iteration if the rotation is identity.
551  if ( gamma == one && sigma == zero ) continue;
552 
553  MAC_Apply_G_mx2_opz( m_A,
554  &gamma,
555  &sigma,
556  a1, rs_A,
557  a2, rs_A );
558  }
559  }
560 
561  return FLA_SUCCESS;
562 }
FLA_Error FLA_Apply_G_rf_opz_var1(int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A)
Definition: FLA_Apply_G_rf_opt_var1.c:267

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_opz_var1(), dcomplex::imag, and dcomplex::real.

Referenced by FLA_Apply_G_rf_opt_var2().

◆ FLA_Apply_G_rf_opz_var3()

FLA_Error FLA_Apply_G_rf_opz_var3 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)
1476 {
1477  double one = bl1_d1();
1478  double zero = bl1_d0();
1479  double gamma23_k1;
1480  double sigma23_k1;
1481  double gamma34_k1;
1482  double sigma34_k1;
1483  double gamma12_k2;
1484  double sigma12_k2;
1485  double gamma23_k2;
1486  double sigma23_k2;
1487  dcomplex* a1;
1488  dcomplex* a2;
1489  dcomplex* a3;
1490  dcomplex* a4;
1491  dcomplex* g23_k1;
1492  dcomplex* g34_k1;
1493  dcomplex* g12_k2;
1494  dcomplex* g23_k2;
1495  int i, j, g, k;
1496  int nG, nG_app;
1497  int n_iter;
1498  int n_left;
1499  int k_minus_1;
1500  int n_fuse;
1501  int k_fuse;
1502  int is_ident23_k1, is_ident34_k1;
1503  int is_ident12_k2, is_ident23_k2;
1504  int has_ident;
1505 
1506  k_minus_1 = k_G - 1;
1507  nG = n_A - 1;
1508  n_fuse = 2;
1509  k_fuse = 2;
1510 
1511  // Use the simple variant for nG < (k - 1) or k == 1.
1512  if ( nG < 2*k_minus_1 || k_G == 1 )
1513  {
1515  m_A,
1516  n_A,
1517  buff_G, rs_G, cs_G,
1518  buff_A, rs_A, cs_A );
1519  return FLA_SUCCESS;
1520  }
1521 
1522 
1523  // Start-up phase.
1524 
1525  for ( j = -1; j < k_minus_1; j += n_fuse )
1526  {
1527  nG_app = j + 2;
1528  n_iter = nG_app / k_fuse;
1529  n_left = 1;
1530 
1531  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1532  {
1533  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1534  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1535  g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1536  g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1537  a1 = buff_A + (g - 1)*cs_A;
1538  a2 = buff_A + (g )*cs_A;
1539  a3 = buff_A + (g + 1)*cs_A;
1540  a4 = buff_A + (g + 2)*cs_A;
1541 
1542  gamma23_k1 = g23_k1->real;
1543  sigma23_k1 = g23_k1->imag;
1544  gamma34_k1 = g34_k1->real;
1545  sigma34_k1 = g34_k1->imag;
1546  gamma12_k2 = g12_k2->real;
1547  sigma12_k2 = g12_k2->imag;
1548  gamma23_k2 = g23_k2->real;
1549  sigma23_k2 = g23_k2->imag;
1550 
1551  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
1552  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
1553  is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
1554  is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
1555  has_ident = ( is_ident23_k1 || is_ident34_k1 ||
1556  is_ident12_k2 || is_ident23_k2 );
1557 
1558  if ( has_ident )
1559  {
1560  // Apply to pairs of columns as needed.
1561 
1562  if ( !is_ident23_k1 )
1563  MAC_Apply_G_mx2_opz( m_A,
1564  &gamma23_k1,
1565  &sigma23_k1,
1566  a2, rs_A,
1567  a3, rs_A );
1568 
1569  if ( !is_ident34_k1 )
1570  MAC_Apply_G_mx2_opz( m_A,
1571  &gamma34_k1,
1572  &sigma34_k1,
1573  a3, rs_A,
1574  a4, rs_A );
1575 
1576  if ( !is_ident12_k2 )
1577  MAC_Apply_G_mx2_opz( m_A,
1578  &gamma12_k2,
1579  &sigma12_k2,
1580  a1, rs_A,
1581  a2, rs_A );
1582 
1583  if ( !is_ident23_k2 )
1584  MAC_Apply_G_mx2_opz( m_A,
1585  &gamma23_k2,
1586  &sigma23_k2,
1587  a2, rs_A,
1588  a3, rs_A );
1589  }
1590  else
1591  {
1592  // Apply to all four columns.
1593 
1594  MAC_Apply_G_mx4s_opz( m_A,
1595  &gamma23_k1,
1596  &sigma23_k1,
1597  &gamma34_k1,
1598  &sigma34_k1,
1599  &gamma12_k2,
1600  &sigma12_k2,
1601  &gamma23_k2,
1602  &sigma23_k2,
1603  a1, rs_A,
1604  a2, rs_A,
1605  a3, rs_A,
1606  a4, rs_A );
1607  }
1608  }
1609 
1610  if ( n_left == 1 )
1611  {
1612  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1613  a3 = buff_A + (g + 1)*cs_A;
1614  a4 = buff_A + (g + 2)*cs_A;
1615 
1616  gamma34_k1 = g34_k1->real;
1617  sigma34_k1 = g34_k1->imag;
1618 
1619  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
1620 
1621  if ( !is_ident34_k1 )
1622  MAC_Apply_G_mx2_opz( m_A,
1623  &gamma34_k1,
1624  &sigma34_k1,
1625  a3, rs_A,
1626  a4, rs_A );
1627  }
1628  }
1629 
1630  // Pipeline stage
1631 
1632  for ( ; j < nG - 1; j += n_fuse )
1633  {
1634  nG_app = k_G;
1635  n_iter = nG_app / k_fuse;
1636  n_left = nG_app % k_fuse;
1637 
1638  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1639  {
1640  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1641  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1642  g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1643  g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1644  a1 = buff_A + (g - 1)*cs_A;
1645  a2 = buff_A + (g )*cs_A;
1646  a3 = buff_A + (g + 1)*cs_A;
1647  a4 = buff_A + (g + 2)*cs_A;
1648 
1649  gamma23_k1 = g23_k1->real;
1650  sigma23_k1 = g23_k1->imag;
1651  gamma34_k1 = g34_k1->real;
1652  sigma34_k1 = g34_k1->imag;
1653  gamma12_k2 = g12_k2->real;
1654  sigma12_k2 = g12_k2->imag;
1655  gamma23_k2 = g23_k2->real;
1656  sigma23_k2 = g23_k2->imag;
1657 
1658  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
1659  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
1660  is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
1661  is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
1662  has_ident = ( is_ident23_k1 || is_ident34_k1 ||
1663  is_ident12_k2 || is_ident23_k2 );
1664 
1665  if ( has_ident )
1666  {
1667  // Apply to pairs of columns as needed.
1668 
1669  if ( !is_ident23_k1 )
1670  MAC_Apply_G_mx2_opz( m_A,
1671  &gamma23_k1,
1672  &sigma23_k1,
1673  a2, rs_A,
1674  a3, rs_A );
1675 
1676  if ( !is_ident34_k1 )
1677  MAC_Apply_G_mx2_opz( m_A,
1678  &gamma34_k1,
1679  &sigma34_k1,
1680  a3, rs_A,
1681  a4, rs_A );
1682 
1683  if ( !is_ident12_k2 )
1684  MAC_Apply_G_mx2_opz( m_A,
1685  &gamma12_k2,
1686  &sigma12_k2,
1687  a1, rs_A,
1688  a2, rs_A );
1689 
1690  if ( !is_ident23_k2 )
1691  MAC_Apply_G_mx2_opz( m_A,
1692  &gamma23_k2,
1693  &sigma23_k2,
1694  a2, rs_A,
1695  a3, rs_A );
1696  }
1697  else
1698  {
1699  // Apply to all four columns.
1700 
1701  MAC_Apply_G_mx4s_opz( m_A,
1702  &gamma23_k1,
1703  &sigma23_k1,
1704  &gamma34_k1,
1705  &sigma34_k1,
1706  &gamma12_k2,
1707  &sigma12_k2,
1708  &gamma23_k2,
1709  &sigma23_k2,
1710  a1, rs_A,
1711  a2, rs_A,
1712  a3, rs_A,
1713  a4, rs_A );
1714  }
1715  }
1716 
1717  if ( n_left == 1 )
1718  {
1719  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1720  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1721  a2 = buff_A + (g )*cs_A;
1722  a3 = buff_A + (g + 1)*cs_A;
1723  a4 = buff_A + (g + 2)*cs_A;
1724 
1725  gamma23_k1 = g23_k1->real;
1726  sigma23_k1 = g23_k1->imag;
1727  gamma34_k1 = g34_k1->real;
1728  sigma34_k1 = g34_k1->imag;
1729 
1730  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
1731  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
1732 
1733  if ( !is_ident23_k1 && is_ident34_k1 )
1734  {
1735  MAC_Apply_G_mx2_opz( m_A,
1736  &gamma23_k1,
1737  &sigma23_k1,
1738  a2, rs_A,
1739  a3, rs_A );
1740  }
1741  else if ( is_ident23_k1 && !is_ident34_k1 )
1742  {
1743  MAC_Apply_G_mx2_opz( m_A,
1744  &gamma34_k1,
1745  &sigma34_k1,
1746  a3, rs_A,
1747  a4, rs_A );
1748  }
1749  else
1750  {
1751  MAC_Apply_G_mx3_opz( m_A,
1752  &gamma23_k1,
1753  &sigma23_k1,
1754  &gamma34_k1,
1755  &sigma34_k1,
1756  a2, rs_A,
1757  a3, rs_A,
1758  a4, rs_A );
1759  }
1760  }
1761  }
1762 
1763  // Shutdown stage
1764 
1765  for ( j = nG % n_fuse; j < k_G; j += n_fuse )
1766  {
1767  g = nG - 1;
1768  k = j;
1769 
1770  //n_left = 1;
1771  //if ( n_left == 1 )
1772  {
1773  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1774  a2 = buff_A + (g )*cs_A;
1775  a3 = buff_A + (g + 1)*cs_A;
1776 
1777  gamma23_k1 = g23_k1->real;
1778  sigma23_k1 = g23_k1->imag;
1779 
1780  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
1781 
1782  if ( !is_ident23_k1 )
1783  MAC_Apply_G_mx2_opz( m_A,
1784  &gamma23_k1,
1785  &sigma23_k1,
1786  a2, rs_A,
1787  a3, rs_A );
1788  ++k;
1789  --g;
1790  }
1791 
1792  nG_app = k_minus_1 - j;
1793  n_iter = nG_app / k_fuse;
1794  n_left = nG_app % k_fuse;
1795 
1796  for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse )
1797  {
1798  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1799  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1800  g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1801  g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G;
1802  a1 = buff_A + (g - 1)*cs_A;
1803  a2 = buff_A + (g )*cs_A;
1804  a3 = buff_A + (g + 1)*cs_A;
1805  a4 = buff_A + (g + 2)*cs_A;
1806 
1807  gamma23_k1 = g23_k1->real;
1808  sigma23_k1 = g23_k1->imag;
1809  gamma34_k1 = g34_k1->real;
1810  sigma34_k1 = g34_k1->imag;
1811  gamma12_k2 = g12_k2->real;
1812  sigma12_k2 = g12_k2->imag;
1813  gamma23_k2 = g23_k2->real;
1814  sigma23_k2 = g23_k2->imag;
1815 
1816  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
1817  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
1818  is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero );
1819  is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero );
1820  has_ident = ( is_ident23_k1 || is_ident34_k1 ||
1821  is_ident12_k2 || is_ident23_k2 );
1822 
1823  if ( has_ident )
1824  {
1825  // Apply to pairs of columns as needed.
1826 
1827  if ( !is_ident23_k1 )
1828  MAC_Apply_G_mx2_opz( m_A,
1829  &gamma23_k1,
1830  &sigma23_k1,
1831  a2, rs_A,
1832  a3, rs_A );
1833 
1834  if ( !is_ident34_k1 )
1835  MAC_Apply_G_mx2_opz( m_A,
1836  &gamma34_k1,
1837  &sigma34_k1,
1838  a3, rs_A,
1839  a4, rs_A );
1840 
1841  if ( !is_ident12_k2 )
1842  MAC_Apply_G_mx2_opz( m_A,
1843  &gamma12_k2,
1844  &sigma12_k2,
1845  a1, rs_A,
1846  a2, rs_A );
1847 
1848  if ( !is_ident23_k2 )
1849  MAC_Apply_G_mx2_opz( m_A,
1850  &gamma23_k2,
1851  &sigma23_k2,
1852  a2, rs_A,
1853  a3, rs_A );
1854  }
1855  else
1856  {
1857  // Apply to all four columns.
1858 
1859  MAC_Apply_G_mx4s_opz( m_A,
1860  &gamma23_k1,
1861  &sigma23_k1,
1862  &gamma34_k1,
1863  &sigma34_k1,
1864  &gamma12_k2,
1865  &sigma12_k2,
1866  &gamma23_k2,
1867  &sigma23_k2,
1868  a1, rs_A,
1869  a2, rs_A,
1870  a3, rs_A,
1871  a4, rs_A );
1872  }
1873  }
1874 
1875  if ( n_left == 1 )
1876  {
1877  g23_k1 = buff_G + (g )*rs_G + (k )*cs_G;
1878  g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1879  a2 = buff_A + (g )*cs_A;
1880  a3 = buff_A + (g + 1)*cs_A;
1881  a4 = buff_A + (g + 2)*cs_A;
1882 
1883  gamma23_k1 = g23_k1->real;
1884  sigma23_k1 = g23_k1->imag;
1885  gamma34_k1 = g34_k1->real;
1886  sigma34_k1 = g34_k1->imag;
1887 
1888  is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero );
1889  is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero );
1890 
1891  if ( !is_ident23_k1 && is_ident34_k1 )
1892  {
1893  MAC_Apply_G_mx2_opz( m_A,
1894  &gamma23_k1,
1895  &sigma23_k1,
1896  a2, rs_A,
1897  a3, rs_A );
1898  }
1899  else if ( is_ident23_k1 && !is_ident34_k1 )
1900  {
1901  MAC_Apply_G_mx2_opz( m_A,
1902  &gamma34_k1,
1903  &sigma34_k1,
1904  a3, rs_A,
1905  a4, rs_A );
1906  }
1907  else
1908  {
1909  MAC_Apply_G_mx3_opz( m_A,
1910  &gamma23_k1,
1911  &sigma23_k1,
1912  &gamma34_k1,
1913  &sigma34_k1,
1914  a2, rs_A,
1915  a3, rs_A,
1916  a4, rs_A );
1917  }
1918  }
1919  }
1920 
1921  return FLA_SUCCESS;
1922 }

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_opz_var1(), i, dcomplex::imag, n_left, and dcomplex::real.

Referenced by FLA_Apply_G_rf_blz_var3(), and FLA_Apply_G_rf_opt_var3().

◆ FLA_Apply_G_rf_opz_var4()

FLA_Error FLA_Apply_G_rf_opz_var4 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_opz_var5()

FLA_Error FLA_Apply_G_rf_opz_var5 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_opz_var6()

FLA_Error FLA_Apply_G_rf_opz_var6 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)
960 {
961  double one = bl1_d1();
962  double zero = bl1_d0();
963  double gamma12;
964  double sigma12;
965  double gamma23;
966  double sigma23;
967  dcomplex* a1;
968  dcomplex* a2;
969  dcomplex* a3;
970  dcomplex* g12;
971  dcomplex* g23;
972  int i, j, g, k;
973  int nG, nG_app;
974  int n_iter;
975  int n_left;
976  int k_minus_1;
977  int n_fuse;
978  int is_ident12, is_ident23;
979 
980  k_minus_1 = k_G - 1;
981  nG = n_A - 1;
982  n_fuse = 2;
983 
984  // Use the simple variant for nG < (k - 1) or k == 1.
985  if ( nG < k_minus_1 || k_G == 1 )
986  {
988  m_A,
989  n_A,
990  buff_G, rs_G, cs_G,
991  buff_A, rs_A, cs_A );
992  return FLA_SUCCESS;
993  }
994 
995 
996  // Start-up phase.
997 
998  for ( j = 0; j < k_minus_1; ++j )
999  {
1000  nG_app = j + 1;
1001  n_iter = nG_app / n_fuse;
1002  n_left = nG_app % n_fuse;
1003 
1004  for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
1005  {
1006  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1007  g23 = buff_G + (g )*rs_G + (k )*cs_G;
1008  a1 = buff_A + (g - 1)*cs_A;
1009  a2 = buff_A + (g )*cs_A;
1010  a3 = buff_A + (g + 1)*cs_A;
1011 
1012  gamma12 = g12->real;
1013  sigma12 = g12->imag;
1014  gamma23 = g23->real;
1015  sigma23 = g23->imag;
1016 
1017  is_ident12 = ( gamma12 == one && sigma12 == zero );
1018  is_ident23 = ( gamma23 == one && sigma23 == zero );
1019 
1020  if ( !is_ident12 && is_ident23 )
1021  {
1022  // Apply only to columns 1 and 2.
1023 
1024  MAC_Apply_G_mx2_opz( m_A,
1025  &gamma12,
1026  &sigma12,
1027  a1, rs_A,
1028  a2, rs_A );
1029  }
1030  else if ( is_ident12 && !is_ident23 )
1031  {
1032  // Apply only to columns 2 and 3.
1033 
1034  MAC_Apply_G_mx2_opz( m_A,
1035  &gamma23,
1036  &sigma23,
1037  a2, rs_A,
1038  a3, rs_A );
1039  }
1040  else if ( !is_ident12 && !is_ident23 )
1041  {
1042  // Apply to all three columns.
1043 
1044  MAC_Apply_G_mx3b_opz( m_A,
1045  &gamma12,
1046  &sigma12,
1047  &gamma23,
1048  &sigma23,
1049  a1, rs_A,
1050  a2, rs_A,
1051  a3, rs_A );
1052  }
1053  }
1054  //for ( k = 0; k < n_left; k += 1, g -= 1 )
1055  if ( n_left == 1 )
1056  {
1057  g23 = buff_G + (g )*rs_G + (k )*cs_G;
1058  a2 = buff_A + (g )*cs_A;
1059  a3 = buff_A + (g + 1)*cs_A;
1060 
1061  gamma23 = g23->real;
1062  sigma23 = g23->imag;
1063 
1064  is_ident23 = ( gamma23 == one && sigma23 == zero );
1065 
1066  if ( !is_ident23 )
1067  MAC_Apply_G_mx2_opz( m_A,
1068  &gamma23,
1069  &sigma23,
1070  a2, rs_A,
1071  a3, rs_A );
1072  }
1073  }
1074 
1075  // Pipeline stage
1076 
1077  for ( j = k_minus_1; j < nG; ++j )
1078  {
1079  nG_app = k_G;
1080  n_iter = nG_app / n_fuse;
1081  n_left = nG_app % n_fuse;
1082 
1083  for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
1084  {
1085  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1086  g23 = buff_G + (g )*rs_G + (k )*cs_G;
1087  a1 = buff_A + (g - 1)*cs_A;
1088  a2 = buff_A + (g )*cs_A;
1089  a3 = buff_A + (g + 1)*cs_A;
1090 
1091  gamma12 = g12->real;
1092  sigma12 = g12->imag;
1093  gamma23 = g23->real;
1094  sigma23 = g23->imag;
1095 
1096  is_ident12 = ( gamma12 == one && sigma12 == zero );
1097  is_ident23 = ( gamma23 == one && sigma23 == zero );
1098 
1099  if ( !is_ident12 && is_ident23 )
1100  {
1101  // Apply only to columns 1 and 2.
1102 
1103  MAC_Apply_G_mx2_opz( m_A,
1104  &gamma12,
1105  &sigma12,
1106  a1, rs_A,
1107  a2, rs_A );
1108  }
1109  else if ( is_ident12 && !is_ident23 )
1110  {
1111  // Apply only to columns 2 and 3.
1112 
1113  MAC_Apply_G_mx2_opz( m_A,
1114  &gamma23,
1115  &sigma23,
1116  a2, rs_A,
1117  a3, rs_A );
1118  }
1119  else if ( !is_ident12 && !is_ident23 )
1120  {
1121  // Apply to all three columns.
1122 
1123  MAC_Apply_G_mx3b_opz( m_A,
1124  &gamma12,
1125  &sigma12,
1126  &gamma23,
1127  &sigma23,
1128  a1, rs_A,
1129  a2, rs_A,
1130  a3, rs_A );
1131  }
1132  }
1133  //for ( k = 0; k < n_left; k += 1, g -= 1 )
1134  if ( n_left == 1 )
1135  {
1136  g23 = buff_G + (g )*rs_G + (k )*cs_G;
1137  a2 = buff_A + (g )*cs_A;
1138  a3 = buff_A + (g + 1)*cs_A;
1139 
1140  gamma23 = g23->real;
1141  sigma23 = g23->imag;
1142 
1143  is_ident23 = ( gamma23 == one && sigma23 == zero );
1144 
1145  if ( !is_ident23 )
1146  MAC_Apply_G_mx2_opz( m_A,
1147  &gamma23,
1148  &sigma23,
1149  a2, rs_A,
1150  a3, rs_A );
1151  }
1152  }
1153 
1154  // Shutdown stage
1155 
1156  for ( j = 1; j < k_G; ++j )
1157  {
1158  nG_app = k_G - j;
1159  n_iter = nG_app / n_fuse;
1160  n_left = nG_app % n_fuse;
1161 
1162  for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse )
1163  {
1164  g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G;
1165  g23 = buff_G + (g )*rs_G + (k )*cs_G;
1166  a1 = buff_A + (g - 1)*cs_A;
1167  a2 = buff_A + (g )*cs_A;
1168  a3 = buff_A + (g + 1)*cs_A;
1169 
1170  gamma12 = g12->real;
1171  sigma12 = g12->imag;
1172  gamma23 = g23->real;
1173  sigma23 = g23->imag;
1174 
1175  is_ident12 = ( gamma12 == one && sigma12 == zero );
1176  is_ident23 = ( gamma23 == one && sigma23 == zero );
1177 
1178  if ( !is_ident12 && is_ident23 )
1179  {
1180  // Apply only to columns 1 and 2.
1181 
1182  MAC_Apply_G_mx2_opz( m_A,
1183  &gamma12,
1184  &sigma12,
1185  a1, rs_A,
1186  a2, rs_A );
1187  }
1188  else if ( is_ident12 && !is_ident23 )
1189  {
1190  // Apply only to columns 2 and 3.
1191 
1192  MAC_Apply_G_mx2_opz( m_A,
1193  &gamma23,
1194  &sigma23,
1195  a2, rs_A,
1196  a3, rs_A );
1197  }
1198  else if ( !is_ident12 && !is_ident23 )
1199  {
1200  // Apply to all three columns.
1201 
1202  MAC_Apply_G_mx3b_opz( m_A,
1203  &gamma12,
1204  &sigma12,
1205  &gamma23,
1206  &sigma23,
1207  a1, rs_A,
1208  a2, rs_A,
1209  a3, rs_A );
1210  }
1211  }
1212  //for ( k = 0; k < nG_app_left; k += 1, g -= 1 )
1213  if ( n_left == 1 )
1214  {
1215  g23 = buff_G + (g )*rs_G + (k )*cs_G;
1216  a2 = buff_A + (g )*cs_A;
1217  a3 = buff_A + (g + 1)*cs_A;
1218 
1219  gamma23 = g23->real;
1220  sigma23 = g23->imag;
1221 
1222  is_ident23 = ( gamma23 == one && sigma23 == zero );
1223 
1224  if ( !is_ident23 )
1225  MAC_Apply_G_mx2_opz( m_A,
1226  &gamma23,
1227  &sigma23,
1228  a2, rs_A,
1229  a3, rs_A );
1230  }
1231  }
1232 
1233  return FLA_SUCCESS;
1234 }

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_opz_var1(), i, dcomplex::imag, n_left, and dcomplex::real.

Referenced by FLA_Apply_G_rf_opt_var6().

◆ FLA_Apply_G_rf_opz_var7()

FLA_Error FLA_Apply_G_rf_opz_var7 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_opz_var8()

FLA_Error FLA_Apply_G_rf_opz_var8 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)

◆ FLA_Apply_G_rf_opz_var9()

FLA_Error FLA_Apply_G_rf_opz_var9 ( int  k_G,
int  m_A,
int  n_A,
dcomplex buff_G,
int  rs_G,
int  cs_G,
dcomplex buff_A,
int  rs_A,
int  cs_A 
)
919 {
920  double one = bl1_d1();
921  double zero = bl1_d0();
922  double gamma12;
923  double sigma12;
924  double gamma23;
925  double sigma23;
926  dcomplex* a1;
927  dcomplex* a2;
928  dcomplex* a3;
929  dcomplex* g12;
930  dcomplex* g23;
931  int i, j, g, k;
932  int nG, nG_app;
933  int n_iter;
934  int n_left;
935  int k_minus_1;
936  int n_fuse;
937  int is_ident12, is_ident23;
938 
939  k_minus_1 = k_G - 1;
940  nG = n_A - 1;
941  n_fuse = 2;
942 
943  // Use the simple variant for nG < (k - 1) or k == 1.
944  if ( nG < 2*k_minus_1 || k_G == 1 )
945  {
947  m_A,
948  n_A,
949  buff_G, rs_G, cs_G,
950  buff_A, rs_A, cs_A );
951  return FLA_SUCCESS;
952  }
953 
954 
955  // Start-up phase.
956 
957  for ( j = -1; j < k_minus_1; j += n_fuse )
958  {
959  nG_app = j + 1;
960  n_iter = nG_app;
961  n_left = 1;
962 
963  for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
964  {
965  g12 = buff_G + (g )*rs_G + (k )*cs_G;
966  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
967  a1 = buff_A + (g )*cs_A;
968  a2 = buff_A + (g + 1)*cs_A;
969  a3 = buff_A + (g + 2)*cs_A;
970 
971  gamma12 = g12->real;
972  sigma12 = g12->imag;
973  gamma23 = g23->real;
974  sigma23 = g23->imag;
975 
976  is_ident12 = ( gamma12 == one && sigma12 == zero );
977  is_ident23 = ( gamma23 == one && sigma23 == zero );
978 
979  if ( !is_ident12 && is_ident23 )
980  {
981  // Apply only to columns 1 and 2.
982 
983  MAC_Apply_G_mx2_opz( m_A,
984  &gamma12,
985  &sigma12,
986  a1, rs_A,
987  a2, rs_A );
988  }
989  else if ( is_ident12 && !is_ident23 )
990  {
991  // Apply only to columns 2 and 3.
992 
993  MAC_Apply_G_mx2_opz( m_A,
994  &gamma23,
995  &sigma23,
996  a2, rs_A,
997  a3, rs_A );
998  }
999  else if ( !is_ident12 && !is_ident23 )
1000  {
1001  // Apply to all three columns.
1002 
1003  MAC_Apply_G_mx3_opz( m_A,
1004  &gamma12,
1005  &sigma12,
1006  &gamma23,
1007  &sigma23,
1008  a1, rs_A,
1009  a2, rs_A,
1010  a3, rs_A );
1011  }
1012  }
1013 
1014  if ( n_left == 1 )
1015  {
1016  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1017  a2 = buff_A + (g + 1)*cs_A;
1018  a3 = buff_A + (g + 2)*cs_A;
1019 
1020  gamma23 = g23->real;
1021  sigma23 = g23->imag;
1022 
1023  is_ident23 = ( gamma23 == one && sigma23 == zero );
1024 
1025  if ( !is_ident23 )
1026  MAC_Apply_G_mx2_opz( m_A,
1027  &gamma23,
1028  &sigma23,
1029  a2, rs_A,
1030  a3, rs_A );
1031  }
1032  }
1033 
1034  // Pipeline stage
1035 
1036  for ( ; j < nG - 1; j += n_fuse )
1037  {
1038  nG_app = k_G;
1039  n_iter = nG_app;
1040  n_left = 0;
1041 
1042  for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g )
1043  {
1044  g12 = buff_G + (g )*rs_G + (k )*cs_G;
1045  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1046  a1 = buff_A + (g )*cs_A;
1047  a2 = buff_A + (g + 1)*cs_A;
1048  a3 = buff_A + (g + 2)*cs_A;
1049 
1050  gamma12 = g12->real;
1051  sigma12 = g12->imag;
1052  gamma23 = g23->real;
1053  sigma23 = g23->imag;
1054 
1055  is_ident12 = ( gamma12 == one && sigma12 == zero );
1056  is_ident23 = ( gamma23 == one && sigma23 == zero );
1057 
1058  if ( !is_ident12 && is_ident23 )
1059  {
1060  // Apply only to columns 1 and 2.
1061 
1062  MAC_Apply_G_mx2_opz( m_A,
1063  &gamma12,
1064  &sigma12,
1065  a1, rs_A,
1066  a2, rs_A );
1067  }
1068  else if ( is_ident12 && !is_ident23 )
1069  {
1070  // Apply only to columns 2 and 3.
1071 
1072  MAC_Apply_G_mx2_opz( m_A,
1073  &gamma23,
1074  &sigma23,
1075  a2, rs_A,
1076  a3, rs_A );
1077  }
1078  else if ( !is_ident12 && !is_ident23 )
1079  {
1080  // Apply to all three columns.
1081 
1082  MAC_Apply_G_mx3_opz( m_A,
1083  &gamma12,
1084  &sigma12,
1085  &gamma23,
1086  &sigma23,
1087  a1, rs_A,
1088  a2, rs_A,
1089  a3, rs_A );
1090  }
1091  }
1092  }
1093 
1094  // Shutdown stage
1095 
1096  for ( j = nG % n_fuse; j < k_G; j += n_fuse )
1097  {
1098  g = nG - 1;
1099  k = j;
1100 
1101  n_left = 1;
1102  if ( n_left == 1 )
1103  {
1104  g12 = buff_G + (g )*rs_G + (k )*cs_G;
1105  a1 = buff_A + (g )*cs_A;
1106  a2 = buff_A + (g + 1)*cs_A;
1107 
1108  gamma12 = g12->real;
1109  sigma12 = g12->imag;
1110 
1111  is_ident12 = ( gamma12 == one && sigma12 == zero );
1112 
1113  if ( !is_ident12 )
1114  MAC_Apply_G_mx2_opz( m_A,
1115  &gamma12,
1116  &sigma12,
1117  a1, rs_A,
1118  a2, rs_A );
1119  ++k;
1120  --g;
1121  }
1122 
1123  nG_app = k_minus_1 - j;
1124  n_iter = nG_app;
1125 
1126  for ( i = 0; i < n_iter; ++i, ++k, --g )
1127  {
1128  g12 = buff_G + (g )*rs_G + (k )*cs_G;
1129  g23 = buff_G + (g + 1)*rs_G + (k )*cs_G;
1130  a1 = buff_A + (g )*cs_A;
1131  a2 = buff_A + (g + 1)*cs_A;
1132  a3 = buff_A + (g + 2)*cs_A;
1133 
1134  gamma12 = g12->real;
1135  sigma12 = g12->imag;
1136  gamma23 = g23->real;
1137  sigma23 = g23->imag;
1138 
1139  is_ident12 = ( gamma12 == one && sigma12 == zero );
1140  is_ident23 = ( gamma23 == one && sigma23 == zero );
1141 
1142  if ( !is_ident12 && is_ident23 )
1143  {
1144  // Apply only to columns 1 and 2.
1145 
1146  MAC_Apply_G_mx2_opz( m_A,
1147  &gamma12,
1148  &sigma12,
1149  a1, rs_A,
1150  a2, rs_A );
1151  }
1152  else if ( is_ident12 && !is_ident23 )
1153  {
1154  // Apply only to columns 2 and 3.
1155 
1156  MAC_Apply_G_mx2_opz( m_A,
1157  &gamma23,
1158  &sigma23,
1159  a2, rs_A,
1160  a3, rs_A );
1161  }
1162  else if ( !is_ident12 && !is_ident23 )
1163  {
1164  // Apply to all three columns.
1165 
1166  MAC_Apply_G_mx3_opz( m_A,
1167  &gamma12,
1168  &sigma12,
1169  &gamma23,
1170  &sigma23,
1171  a1, rs_A,
1172  a2, rs_A,
1173  a3, rs_A );
1174  }
1175  }
1176  }
1177 
1178  return FLA_SUCCESS;
1179 }

References bl1_d0(), bl1_d1(), FLA_Apply_G_rf_opz_var1(), i, dcomplex::imag, n_left, and dcomplex::real.

Referenced by FLA_Apply_G_rf_opt_var9().