libflame
revision_anchor
|
Go to the source code of this file.
Functions | |
FLA_Error | FLA_Apply_G_rf_opt_var1 (FLA_Obj G, FLA_Obj A) |
FLA_Error | FLA_Apply_G_rf_ops_var1 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_opd_var1 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_opc_var1 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_opz_var1 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asm_var1 (FLA_Obj G, FLA_Obj A) |
FLA_Error | FLA_Apply_G_rf_ass_var1 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asd_var1 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asc_var1 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asz_var1 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_blk_var1 (FLA_Obj G, FLA_Obj A, dim_t b_alg) |
FLA_Error | FLA_Apply_G_rf_bls_var1 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_bld_var1 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_blc_var1 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_blz_var1 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_opt_var2 (FLA_Obj G, FLA_Obj A) |
FLA_Error | FLA_Apply_G_rf_ops_var2 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_opd_var2 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_opc_var2 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_opz_var2 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asm_var2 (FLA_Obj G, FLA_Obj A) |
FLA_Error | FLA_Apply_G_rf_ass_var2 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asd_var2 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asc_var2 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asz_var2 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_blk_var2 (FLA_Obj G, FLA_Obj A, dim_t b_alg) |
FLA_Error | FLA_Apply_G_rf_bls_var2 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_bld_var2 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_blc_var2 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_blz_var2 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_opt_var3 (FLA_Obj G, FLA_Obj A) |
FLA_Error | FLA_Apply_G_rf_ops_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_opd_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_opc_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_opz_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asm_var3 (FLA_Obj G, FLA_Obj A) |
FLA_Error | FLA_Apply_G_rf_ass_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asd_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asc_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asz_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_blk_var3 (FLA_Obj G, FLA_Obj A, dim_t b_alg) |
FLA_Error | FLA_Apply_G_rf_bls_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_bld_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_blc_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_blz_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_opt_var4 (FLA_Obj G, FLA_Obj A) |
FLA_Error | FLA_Apply_G_rf_ops_var4 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_opd_var4 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_opc_var4 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_opz_var4 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asm_var4 (FLA_Obj G, FLA_Obj A) |
FLA_Error | FLA_Apply_G_rf_ass_var4 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asd_var4 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asc_var4 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asz_var4 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_blk_var4 (FLA_Obj G, FLA_Obj A, dim_t b_alg) |
FLA_Error | FLA_Apply_G_rf_bls_var4 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_bld_var4 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_blc_var4 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_blz_var4 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_opt_var5 (FLA_Obj G, FLA_Obj A) |
FLA_Error | FLA_Apply_G_rf_ops_var5 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_opd_var5 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_opc_var5 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_opz_var5 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asm_var5 (FLA_Obj G, FLA_Obj A) |
FLA_Error | FLA_Apply_G_rf_ass_var5 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asd_var5 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asc_var5 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asz_var5 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_blk_var5 (FLA_Obj G, FLA_Obj A, dim_t b_alg) |
FLA_Error | FLA_Apply_G_rf_bls_var5 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_bld_var5 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_blc_var5 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_blz_var5 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_opt_var6 (FLA_Obj G, FLA_Obj A) |
FLA_Error | FLA_Apply_G_rf_ops_var6 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_opd_var6 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_opc_var6 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_opz_var6 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asm_var6 (FLA_Obj G, FLA_Obj A) |
FLA_Error | FLA_Apply_G_rf_ass_var6 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asd_var6 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asc_var6 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asz_var6 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_blk_var6 (FLA_Obj G, FLA_Obj A, dim_t b_alg) |
FLA_Error | FLA_Apply_G_rf_bls_var6 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_bld_var6 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_blc_var6 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_blz_var6 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_opt_var7 (FLA_Obj G, FLA_Obj A) |
FLA_Error | FLA_Apply_G_rf_ops_var7 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_opd_var7 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_opc_var7 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_opz_var7 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asm_var7 (FLA_Obj G, FLA_Obj A) |
FLA_Error | FLA_Apply_G_rf_ass_var7 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asd_var7 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asc_var7 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asz_var7 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_blk_var7 (FLA_Obj G, FLA_Obj A, dim_t b_alg) |
FLA_Error | FLA_Apply_G_rf_bls_var7 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_bld_var7 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_blc_var7 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_blz_var7 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_opt_var8 (FLA_Obj G, FLA_Obj A) |
FLA_Error | FLA_Apply_G_rf_ops_var8 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_opd_var8 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_opc_var8 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_opz_var8 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asm_var8 (FLA_Obj G, FLA_Obj A) |
FLA_Error | FLA_Apply_G_rf_ass_var8 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asd_var8 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asc_var8 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asz_var8 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_blk_var8 (FLA_Obj G, FLA_Obj A, dim_t b_alg) |
FLA_Error | FLA_Apply_G_rf_bls_var8 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_bld_var8 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_blc_var8 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_blz_var8 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_opt_var9 (FLA_Obj G, FLA_Obj A) |
FLA_Error | FLA_Apply_G_rf_ops_var9 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_opd_var9 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_opc_var9 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_opz_var9 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asm_var9 (FLA_Obj G, FLA_Obj A) |
FLA_Error | FLA_Apply_G_rf_ass_var9 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asd_var9 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asc_var9 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asz_var9 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_blk_var9 (FLA_Obj G, FLA_Obj A, dim_t b_alg) |
FLA_Error | FLA_Apply_G_rf_bls_var9 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_bld_var9 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_blc_var9 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_blz_var9 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_asm_var3b (FLA_Obj G, FLA_Obj A) |
FLA_Error | FLA_Apply_G_rf_ass_var3b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asd_var3b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asc_var3b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asz_var3b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_blk_var3b (FLA_Obj G, FLA_Obj A, dim_t b_alg) |
FLA_Error | FLA_Apply_G_rf_bls_var3b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_bld_var3b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_blc_var3b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_blz_var3b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_asm_var5b (FLA_Obj G, FLA_Obj A) |
FLA_Error | FLA_Apply_G_rf_ass_var5b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asd_var5b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asc_var5b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asz_var5b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_blk_var5b (FLA_Obj G, FLA_Obj A, dim_t b_alg) |
FLA_Error | FLA_Apply_G_rf_bls_var5b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_bld_var5b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_blc_var5b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_blz_var5b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_asm_var6b (FLA_Obj G, FLA_Obj A) |
FLA_Error | FLA_Apply_G_rf_ass_var6b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asd_var6b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asc_var6b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asz_var6b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_blk_var6b (FLA_Obj G, FLA_Obj A, dim_t b_alg) |
FLA_Error | FLA_Apply_G_rf_bls_var6b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_bld_var6b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_blc_var6b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_blz_var6b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_asm_var8b (FLA_Obj G, FLA_Obj A) |
FLA_Error | FLA_Apply_G_rf_ass_var8b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asd_var8b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asc_var8b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asz_var8b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_blk_var8b (FLA_Obj G, FLA_Obj A, dim_t b_alg) |
FLA_Error | FLA_Apply_G_rf_bls_var8b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_bld_var8b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_blc_var8b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_blz_var8b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_bhs_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_bhd_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_bhc_var3 (int k_G, int m_A, int n_A, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_bhz_var3 (int k_G, int m_A, int n_A, dcomplex *buff_G, int rs_G, int cs_G, FLA_Obj *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_asm_var9b (FLA_Obj G, FLA_Obj A) |
FLA_Error | FLA_Apply_G_rf_ass_var9b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asd_var9b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asc_var9b (int k_G, int m_A, int n_A, int i_k, int iTL, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_asz_var9b (int k_G, int m_A, int n_A, int i_k, int iTL, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A) |
FLA_Error | FLA_Apply_G_rf_blk_var9b (FLA_Obj G, FLA_Obj A, dim_t b_alg) |
FLA_Error | FLA_Apply_G_rf_bls_var9b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, float *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_bld_var9b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, double *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_blc_var9b (int k_G, int m_A, int n_A, int i_k, scomplex *buff_G, int rs_G, int cs_G, scomplex *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error | FLA_Apply_G_rf_blz_var9b (int k_G, int m_A, int n_A, int i_k, dcomplex *buff_G, int rs_G, int cs_G, dcomplex *buff_A, int rs_A, int cs_A, int b_alg) |
FLA_Error FLA_Apply_G_rf_asc_var1 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_s0(), bli_s1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_asc_var2(), FLA_Apply_G_rf_asc_var3(), FLA_Apply_G_rf_asc_var6(), FLA_Apply_G_rf_asc_var9(), FLA_Apply_G_rf_asm_var1(), and FLA_Apply_G_rf_blc_var1().
{ float one = bli_s1(); float zero = bli_s0(); int nG_app = n_A - 1; int l, j; float gamma; float sigma; scomplex* a1; scomplex* a2; scomplex* g1; scomplex* g11; g1 = buff_G; for ( l = 0; l < k_G; ++l ) { a1 = buff_A; a2 = buff_A + cs_A; g11 = g1; for ( j = 0; j < nG_app; ++j ) { gamma = g11->real; sigma = g11->imag; // Skip the current iteration if the rotation is identity. if ( gamma != one || sigma != zero ) { MAC_Apply_G_mx2_asc( m_A, &gamma, &sigma, a1, 1, a2, 1 ); } a1 += cs_A; a2 += cs_A; g11 += rs_G; } g1 += cs_G; } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_asc_var2 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_s0(), bli_s1(), FLA_Apply_G_rf_asc_var1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_asm_var2(), and FLA_Apply_G_rf_blc_var2().
{ float one = bli_s1(); float zero = bli_s0(); float gamma; float sigma; scomplex* a1; scomplex* a2; scomplex* g11; int j, g, k; int nG, nG_app; int k_minus_1; k_minus_1 = k_G - 1; nG = n_A - 1; // Use the simple variant for nG < 2(k - 1). if ( nG < k_minus_1 || k_G == 1 ) { FLA_Apply_G_rf_asc_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); return FLA_SUCCESS; } // Start-up phase. for ( j = 0; j < k_minus_1; ++j ) { nG_app = j + 1; for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g ) { g11 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; gamma = g11->real; sigma = g11->imag; // Skip the current iteration if the rotation is identity. if ( gamma == one && sigma == zero ) continue; MAC_Apply_G_mx2_asc( m_A, &gamma, &sigma, a1, 1, a2, 1 ); } } // Pipeline stage for ( j = k_minus_1; j < nG; ++j ) { nG_app = k_G; for ( k = 0, g = j; k < nG_app; ++k, --g ) { g11 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; gamma = g11->real; sigma = g11->imag; // Skip the current iteration if the rotation is identity. if ( gamma == one && sigma == zero ) continue; MAC_Apply_G_mx2_asc( m_A, &gamma, &sigma, a1, 1, a2, 1 ); } } // Shutdown stage for ( j = nG - k_minus_1; j < nG; ++j ) { nG_app = nG - j; for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g ) { g11 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; gamma = g11->real; sigma = g11->imag; // Skip the current iteration if the rotation is identity. if ( gamma == one && sigma == zero ) continue; MAC_Apply_G_mx2_asc( m_A, &gamma, &sigma, a1, 1, a2, 1 ); } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_asc_var3 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_s0(), bli_s1(), FLA_Apply_G_rf_asc_var1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_asm_var3(), and FLA_Apply_G_rf_blc_var3().
{ float one = bli_s1(); float zero = bli_s0(); float gamma23_k1; float sigma23_k1; float gamma34_k1; float sigma34_k1; float gamma12_k2; float sigma12_k2; float gamma23_k2; float sigma23_k2; scomplex* a1; scomplex* a2; scomplex* a3; scomplex* a4; scomplex* g23_k1; scomplex* g34_k1; scomplex* g12_k2; scomplex* g23_k2; int i, j, g, k; int nG, nG_app; int n_iter; int n_left; int k_minus_1; int n_fuse; int k_fuse; int is_ident23_k1, is_ident34_k1; int is_ident12_k2, is_ident23_k2; int has_ident; k_minus_1 = k_G - 1; nG = n_A - 1; n_fuse = 2; k_fuse = 2; // Use the simple variant for nG < (k - 1) or k == 1. if ( nG < 2*k_minus_1 || k_G == 1 ) { FLA_Apply_G_rf_asc_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); return FLA_SUCCESS; } // Start-up phase. for ( j = -1; j < k_minus_1; j += n_fuse ) { nG_app = j + 2; n_iter = nG_app / k_fuse; //n_iter = nG_app % k_fuse; n_left = 1; for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; gamma12_k2 = g12_k2->real; sigma12_k2 = g12_k2->imag; gamma23_k2 = g23_k2->real; sigma23_k2 = g23_k2->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero ); is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero ); has_ident = ( is_ident23_k1 || is_ident34_k1 || is_ident12_k2 || is_ident23_k2 ); if ( has_ident ) { // Apply to pairs of columns as needed. if ( !is_ident23_k1 ) MAC_Apply_G_mx2_asc( m_A, &gamma23_k1, &sigma23_k1, a2, 1, a3, 1 ); if ( !is_ident34_k1 ) MAC_Apply_G_mx2_asc( m_A, &gamma34_k1, &sigma34_k1, a3, 1, a4, 1 ); if ( !is_ident12_k2 ) MAC_Apply_G_mx2_asc( m_A, &gamma12_k2, &sigma12_k2, a1, 1, a2, 1 ); if ( !is_ident23_k2 ) MAC_Apply_G_mx2_asc( m_A, &gamma23_k2, &sigma23_k2, a2, 1, a3, 1 ); } else { // Apply to all four columns. MAC_Apply_G_mx4s_asc( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, &gamma12_k2, &sigma12_k2, &gamma23_k2, &sigma23_k2, a1, 1, a2, 1, a3, 1, a4, 1 ); } } if ( n_left == 1 ) { g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); if ( !is_ident34_k1 ) MAC_Apply_G_mx2_asc( m_A, &gamma34_k1, &sigma34_k1, a3, 1, a4, 1 ); } } // Pipeline stage for ( ; j < nG - 1; j += n_fuse ) { nG_app = k_G; n_iter = nG_app / k_fuse; n_left = nG_app % k_fuse; for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; gamma12_k2 = g12_k2->real; sigma12_k2 = g12_k2->imag; gamma23_k2 = g23_k2->real; sigma23_k2 = g23_k2->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero ); is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero ); has_ident = ( is_ident23_k1 || is_ident34_k1 || is_ident12_k2 || is_ident23_k2 ); if ( has_ident ) { // Apply to pairs of columns as needed. if ( !is_ident23_k1 ) MAC_Apply_G_mx2_asc( m_A, &gamma23_k1, &sigma23_k1, a2, 1, a3, 1 ); if ( !is_ident34_k1 ) MAC_Apply_G_mx2_asc( m_A, &gamma34_k1, &sigma34_k1, a3, 1, a4, 1 ); if ( !is_ident12_k2 ) MAC_Apply_G_mx2_asc( m_A, &gamma12_k2, &sigma12_k2, a1, 1, a2, 1 ); if ( !is_ident23_k2 ) MAC_Apply_G_mx2_asc( m_A, &gamma23_k2, &sigma23_k2, a2, 1, a3, 1 ); } else { // Apply to all four columns. MAC_Apply_G_mx4s_asc( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, &gamma12_k2, &sigma12_k2, &gamma23_k2, &sigma23_k2, a1, 1, a2, 1, a3, 1, a4, 1 ); } } if ( n_left == 1 ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); if ( !is_ident23_k1 && is_ident34_k1 ) { MAC_Apply_G_mx2_asc( m_A, &gamma23_k1, &sigma23_k1, a2, 1, a3, 1 ); } else if ( is_ident23_k1 && !is_ident34_k1 ) { MAC_Apply_G_mx2_asc( m_A, &gamma34_k1, &sigma34_k1, a3, 1, a4, 1 ); } else { MAC_Apply_G_mx3_asc( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, a2, 1, a3, 1, a4, 1 ); } } } // Shutdown stage for ( j = nG % n_fuse; j < k_G; j += n_fuse ) { g = nG - 1; k = j; //n_left = 1; //if ( n_left == 1 ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); if ( !is_ident23_k1 ) MAC_Apply_G_mx2_asc( m_A, &gamma23_k1, &sigma23_k1, a2, 1, a3, 1 ); ++k; --g; } nG_app = k_minus_1 - j; n_iter = nG_app / k_fuse; n_left = nG_app % k_fuse; for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; gamma12_k2 = g12_k2->real; sigma12_k2 = g12_k2->imag; gamma23_k2 = g23_k2->real; sigma23_k2 = g23_k2->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero ); is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero ); has_ident = ( is_ident23_k1 || is_ident34_k1 || is_ident12_k2 || is_ident23_k2 ); if ( has_ident ) { // Apply to pairs of columns as needed. if ( !is_ident23_k1 ) MAC_Apply_G_mx2_asc( m_A, &gamma23_k1, &sigma23_k1, a2, 1, a3, 1 ); if ( !is_ident34_k1 ) MAC_Apply_G_mx2_asc( m_A, &gamma34_k1, &sigma34_k1, a3, 1, a4, 1 ); if ( !is_ident12_k2 ) MAC_Apply_G_mx2_asc( m_A, &gamma12_k2, &sigma12_k2, a1, 1, a2, 1 ); if ( !is_ident23_k2 ) MAC_Apply_G_mx2_asc( m_A, &gamma23_k2, &sigma23_k2, a2, 1, a3, 1 ); } else { // Apply to all four columns. MAC_Apply_G_mx4s_asc( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, &gamma12_k2, &sigma12_k2, &gamma23_k2, &sigma23_k2, a1, 1, a2, 1, a3, 1, a4, 1 ); } } if ( n_left == 1 ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); if ( !is_ident23_k1 && is_ident34_k1 ) { MAC_Apply_G_mx2_asc( m_A, &gamma23_k1, &sigma23_k1, a2, 1, a3, 1 ); } else if ( is_ident23_k1 && !is_ident34_k1 ) { MAC_Apply_G_mx2_asc( m_A, &gamma34_k1, &sigma34_k1, a3, 1, a4, 1 ); } else { MAC_Apply_G_mx3_asc( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, a2, 1, a3, 1, a4, 1 ); } } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_asc_var3b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
int | iTL, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
Referenced by FLA_Apply_G_rf_asm_var3b().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
FLA_Error FLA_Apply_G_rf_asc_var4 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_asc_var5 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_asc_var5b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
int | iTL, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_asc_var6 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_s0(), bli_s1(), FLA_Apply_G_rf_asc_var1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_asm_var6(), and FLA_Apply_G_rf_blc_var6().
{ float one = bli_s1(); float zero = bli_s0(); float gamma12; float sigma12; float gamma23; float sigma23; scomplex* a1; scomplex* a2; scomplex* a3; scomplex* g12; scomplex* g23; int i, j, g, k; int nG, nG_app; int n_iter; int n_left; int k_minus_1; int n_fuse; int is_ident12, is_ident23; k_minus_1 = k_G - 1; nG = n_A - 1; n_fuse = 2; // Use the simple variant for nG < (k - 1) or k == 1. if ( nG < k_minus_1 || k_G == 1 ) { FLA_Apply_G_rf_asc_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); return FLA_SUCCESS; } // Start-up phase. for ( j = 0; j < k_minus_1; ++j ) { nG_app = j + 1; n_iter = nG_app / n_fuse; n_left = nG_app % n_fuse; for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse ) { g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_asc( m_A, &gamma12, &sigma12, a1, 1, a2, 1 ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_asc( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3b_asc( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, 1, a2, 1, a3, 1 ); } } if ( n_left == 1 ) { g23 = buff_G + (g )*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma23 = g23->real; sigma23 = g23->imag; is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident23 ) MAC_Apply_G_mx2_asc( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } } // Pipeline stage for ( j = k_minus_1; j < nG; ++j ) { nG_app = k_G; n_iter = nG_app / n_fuse; n_left = nG_app % n_fuse; for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse ) { g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_asc( m_A, &gamma12, &sigma12, a1, 1, a2, 1 ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_asc( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3b_asc( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, 1, a2, 1, a3, 1 ); } } if ( n_left == 1 ) { g23 = buff_G + (g )*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma23 = g23->real; sigma23 = g23->imag; is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident23 ) MAC_Apply_G_mx2_asc( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } } // Shutdown stage for ( j = 1; j < k_G; ++j ) { nG_app = k_G - j; n_iter = nG_app / n_fuse; n_left = nG_app % n_fuse; for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse ) { g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_asc( m_A, &gamma12, &sigma12, a1, 1, a2, 1 ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_asc( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3b_asc( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, 1, a2, 1, a3, 1 ); } } if ( n_left == 1 ) { g23 = buff_G + (g )*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma23 = g23->real; sigma23 = g23->imag; is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident23 ) MAC_Apply_G_mx2_asc( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_asc_var6b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
int | iTL, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
Referenced by FLA_Apply_G_rf_asm_var6b().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
FLA_Error FLA_Apply_G_rf_asc_var7 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_asc_var8 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_asc_var8b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
int | iTL, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_asc_var9 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_s0(), bli_s1(), FLA_Apply_G_rf_asc_var1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_asm_var9(), and FLA_Apply_G_rf_blc_var9().
{ float one = bli_s1(); float zero = bli_s0(); float gamma12; float sigma12; float gamma23; float sigma23; scomplex* a1; scomplex* a2; scomplex* a3; scomplex* g12; scomplex* g23; int i, j, g, k; int nG, nG_app; int n_iter; int n_left; int k_minus_1; int n_fuse; int is_ident12, is_ident23; k_minus_1 = k_G - 1; nG = n_A - 1; n_fuse = 2; // Use the simple variant for nG < (k - 1) or k == 1. if ( nG < 2*k_minus_1 || k_G == 1 ) { FLA_Apply_G_rf_asc_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); return FLA_SUCCESS; } // Start-up phase. for ( j = -1; j < k_minus_1; j += n_fuse ) { nG_app = j + 1; n_iter = nG_app; n_left = 1; for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g ) { g12 = buff_G + (g )*rs_G + (k )*cs_G; g23 = buff_G + (g + 1)*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; a3 = buff_A + (g + 2)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_asc( m_A, &gamma12, &sigma12, a1, 1, a2, 1 ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_asc( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3_asc( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, 1, a2, 1, a3, 1 ); } } if ( n_left == 1 ) { g23 = buff_G + (g + 1)*rs_G + (k )*cs_G; a2 = buff_A + (g + 1)*cs_A; a3 = buff_A + (g + 2)*cs_A; gamma23 = g23->real; sigma23 = g23->imag; is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident23 ) MAC_Apply_G_mx2_asc( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } } // Pipeline stage for ( ; j < nG - 1; j += n_fuse ) { nG_app = k_G; n_iter = nG_app; n_left = 0; for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g ) { g12 = buff_G + (g )*rs_G + (k )*cs_G; g23 = buff_G + (g + 1)*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; a3 = buff_A + (g + 2)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_asc( m_A, &gamma12, &sigma12, a1, 1, a2, 1 ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_asc( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3_asc( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, 1, a2, 1, a3, 1 ); } } } // Shutdown stage for ( j = nG % n_fuse; j < k_G; j += n_fuse ) { g = nG - 1; k = j; n_left = 1; if ( n_left == 1 ) { g12 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); if ( !is_ident12 ) MAC_Apply_G_mx2_asc( m_A, &gamma12, &sigma12, a1, 1, a2, 1 ); ++k; --g; } nG_app = k_minus_1 - j; n_iter = nG_app; for ( i = 0; i < n_iter; ++i, ++k, --g ) { g12 = buff_G + (g )*rs_G + (k )*cs_G; g23 = buff_G + (g + 1)*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; a3 = buff_A + (g + 2)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_asc( m_A, &gamma12, &sigma12, a1, 1, a2, 1 ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_asc( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3_asc( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, 1, a2, 1, a3, 1 ); } } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_asc_var9b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
int | iTL, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
Referenced by FLA_Apply_G_rf_asm_var9b().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
FLA_Error FLA_Apply_G_rf_asd_var1 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_d0(), bli_d1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_asd_var2(), FLA_Apply_G_rf_asd_var3(), FLA_Apply_G_rf_asd_var3b(), FLA_Apply_G_rf_asd_var6(), FLA_Apply_G_rf_asd_var6b(), FLA_Apply_G_rf_asd_var9(), FLA_Apply_G_rf_asd_var9b(), FLA_Apply_G_rf_asm_var1(), and FLA_Apply_G_rf_bld_var1().
{ double one = bli_d1(); double zero = bli_d0(); int nG_app = n_A - 1; int l, j; double gamma; double sigma; double* a1; double* a2; dcomplex* g1; dcomplex* g11; g1 = buff_G; for ( l = 0; l < k_G; ++l ) { a1 = buff_A; a2 = buff_A + cs_A; g11 = g1; for ( j = 0; j < nG_app; ++j ) { gamma = g11->real; sigma = g11->imag; // Skip the current iteration if the rotation is identity. if ( gamma != one || sigma != zero ) { MAC_Apply_G_mx2_asd( m_A, &gamma, &sigma, a1, 1, a2, 1 ); } a1 += cs_A; a2 += cs_A; g11 += rs_G; } g1 += cs_G; } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_asd_var2 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_d0(), bli_d1(), FLA_Apply_G_rf_asd_var1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_asm_var2(), and FLA_Apply_G_rf_bld_var2().
{ double one = bli_d1(); double zero = bli_d0(); double gamma; double sigma; double* a1; double* a2; dcomplex* g11; int j, g, k; int nG, nG_app; int k_minus_1; k_minus_1 = k_G - 1; nG = n_A - 1; // Use the simple variant for nG < 2(k - 1). if ( nG < k_minus_1 || k_G == 1 ) { FLA_Apply_G_rf_asd_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); return FLA_SUCCESS; } // Start-up phase. for ( j = 0; j < k_minus_1; ++j ) { nG_app = j + 1; for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g ) { g11 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; gamma = g11->real; sigma = g11->imag; // Skip the current iteration if the rotation is identity. if ( gamma == one && sigma == zero ) continue; MAC_Apply_G_mx2_asd( m_A, &gamma, &sigma, a1, 1, a2, 1 ); } } // Pipeline stage for ( j = k_minus_1; j < nG; ++j ) { nG_app = k_G; for ( k = 0, g = j; k < nG_app; ++k, --g ) { g11 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; gamma = g11->real; sigma = g11->imag; // Skip the current iteration if the rotation is identity. if ( gamma == one && sigma == zero ) continue; MAC_Apply_G_mx2_asd( m_A, &gamma, &sigma, a1, 1, a2, 1 ); } } // Shutdown stage for ( j = nG - k_minus_1; j < nG; ++j ) { nG_app = nG - j; for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g ) { g11 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; gamma = g11->real; sigma = g11->imag; // Skip the current iteration if the rotation is identity. if ( gamma == one && sigma == zero ) continue; MAC_Apply_G_mx2_asd( m_A, &gamma, &sigma, a1, 1, a2, 1 ); } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_asd_var3 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_d0(), bli_d1(), FLA_Apply_G_rf_asd_var1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_asm_var3(), and FLA_Apply_G_rf_bld_var3().
{ double one = bli_d1(); double zero = bli_d0(); double gamma23_k1; double sigma23_k1; double gamma34_k1; double sigma34_k1; double gamma12_k2; double sigma12_k2; double gamma23_k2; double sigma23_k2; double* a1; double* a2; double* a3; double* a4; dcomplex* g23_k1; dcomplex* g34_k1; dcomplex* g12_k2; dcomplex* g23_k2; int i, j, g, k; int nG, nG_app; int n_iter; int n_left; int k_minus_1; int n_fuse; int k_fuse; int is_ident23_k1, is_ident34_k1; int is_ident12_k2, is_ident23_k2; int has_ident; k_minus_1 = k_G - 1; nG = n_A - 1; n_fuse = 2; k_fuse = 2; // Use the simple variant for nG < (k - 1) or k == 1. if ( nG < 2*k_minus_1 || k_G == 1 ) { FLA_Apply_G_rf_asd_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); return FLA_SUCCESS; } // Start-up phase. for ( j = -1; j < k_minus_1; j += n_fuse ) { nG_app = j + 2; n_iter = nG_app / k_fuse; n_left = 1; for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; gamma12_k2 = g12_k2->real; sigma12_k2 = g12_k2->imag; gamma23_k2 = g23_k2->real; sigma23_k2 = g23_k2->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero ); is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero ); has_ident = ( is_ident23_k1 || is_ident34_k1 || is_ident12_k2 || is_ident23_k2 ); if ( has_ident ) { // Apply to pairs of columns as needed. if ( !is_ident23_k1 ) MAC_Apply_G_mx2_asd( m_A, &gamma23_k1, &sigma23_k1, a2, 1, a3, 1 ); if ( !is_ident34_k1 ) MAC_Apply_G_mx2_asd( m_A, &gamma34_k1, &sigma34_k1, a3, 1, a4, 1 ); if ( !is_ident12_k2 ) MAC_Apply_G_mx2_asd( m_A, &gamma12_k2, &sigma12_k2, a1, 1, a2, 1 ); if ( !is_ident23_k2 ) MAC_Apply_G_mx2_asd( m_A, &gamma23_k2, &sigma23_k2, a2, 1, a3, 1 ); } else { // Apply to all four columns. MAC_Apply_G_mx4s_asd( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, &gamma12_k2, &sigma12_k2, &gamma23_k2, &sigma23_k2, a1, 1, a2, 1, a3, 1, a4, 1 ); } } if ( n_left == 1 ) { g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); if ( !is_ident34_k1 ) MAC_Apply_G_mx2_asd( m_A, &gamma34_k1, &sigma34_k1, a3, 1, a4, 1 ); } } // Pipeline stage for ( ; j < nG - 1; j += n_fuse ) { nG_app = k_G; n_iter = nG_app / k_fuse; n_left = nG_app % k_fuse; for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; gamma12_k2 = g12_k2->real; sigma12_k2 = g12_k2->imag; gamma23_k2 = g23_k2->real; sigma23_k2 = g23_k2->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero ); is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero ); has_ident = ( is_ident23_k1 || is_ident34_k1 || is_ident12_k2 || is_ident23_k2 ); if ( has_ident ) { // Apply to pairs of columns as needed. if ( !is_ident23_k1 ) MAC_Apply_G_mx2_asd( m_A, &gamma23_k1, &sigma23_k1, a2, 1, a3, 1 ); if ( !is_ident34_k1 ) MAC_Apply_G_mx2_asd( m_A, &gamma34_k1, &sigma34_k1, a3, 1, a4, 1 ); if ( !is_ident12_k2 ) MAC_Apply_G_mx2_asd( m_A, &gamma12_k2, &sigma12_k2, a1, 1, a2, 1 ); if ( !is_ident23_k2 ) MAC_Apply_G_mx2_asd( m_A, &gamma23_k2, &sigma23_k2, a2, 1, a3, 1 ); } else { // Apply to all four columns. MAC_Apply_G_mx4s_asd( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, &gamma12_k2, &sigma12_k2, &gamma23_k2, &sigma23_k2, a1, 1, a2, 1, a3, 1, a4, 1 ); } } if ( n_left == 1 ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); if ( !is_ident23_k1 && is_ident34_k1 ) { MAC_Apply_G_mx2_asd( m_A, &gamma23_k1, &sigma23_k1, a2, 1, a3, 1 ); } else if ( is_ident23_k1 && !is_ident34_k1 ) { MAC_Apply_G_mx2_asd( m_A, &gamma34_k1, &sigma34_k1, a3, 1, a4, 1 ); } else { MAC_Apply_G_mx3_asd( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, a2, 1, a3, 1, a4, 1 ); } } } // Shutdown stage for ( j = nG % n_fuse; j < k_G; j += n_fuse ) { g = nG - 1; k = j; //n_left = 1; //if ( n_left == 1 ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); if ( !is_ident23_k1 ) MAC_Apply_G_mx2_asd( m_A, &gamma23_k1, &sigma23_k1, a2, 1, a3, 1 ); ++k; --g; } nG_app = k_minus_1 - j; n_iter = nG_app / k_fuse; n_left = nG_app % k_fuse; for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; gamma12_k2 = g12_k2->real; sigma12_k2 = g12_k2->imag; gamma23_k2 = g23_k2->real; sigma23_k2 = g23_k2->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero ); is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero ); has_ident = ( is_ident23_k1 || is_ident34_k1 || is_ident12_k2 || is_ident23_k2 ); if ( has_ident ) { // Apply to pairs of columns as needed. if ( !is_ident23_k1 ) MAC_Apply_G_mx2_asd( m_A, &gamma23_k1, &sigma23_k1, a2, 1, a3, 1 ); if ( !is_ident34_k1 ) MAC_Apply_G_mx2_asd( m_A, &gamma34_k1, &sigma34_k1, a3, 1, a4, 1 ); if ( !is_ident12_k2 ) MAC_Apply_G_mx2_asd( m_A, &gamma12_k2, &sigma12_k2, a1, 1, a2, 1 ); if ( !is_ident23_k2 ) MAC_Apply_G_mx2_asd( m_A, &gamma23_k2, &sigma23_k2, a2, 1, a3, 1 ); } else { // Apply to all four columns. MAC_Apply_G_mx4s_asd( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, &gamma12_k2, &sigma12_k2, &gamma23_k2, &sigma23_k2, a1, 1, a2, 1, a3, 1, a4, 1 ); } } if ( n_left == 1 ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); if ( !is_ident23_k1 && is_ident34_k1 ) { MAC_Apply_G_mx2_asd( m_A, &gamma23_k1, &sigma23_k1, a2, 1, a3, 1 ); } else if ( is_ident23_k1 && !is_ident34_k1 ) { MAC_Apply_G_mx2_asd( m_A, &gamma34_k1, &sigma34_k1, a3, 1, a4, 1 ); } else { MAC_Apply_G_mx3_asd( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, a2, 1, a3, 1, a4, 1 ); } } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_asd_var3b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
int | iTL, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_d0(), bli_d1(), FLA_Apply_G_rf_asd_var1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_asm_var3b(), and FLA_Apply_G_rf_bld_var3b().
{ double one = bli_d1(); double zero = bli_d0(); double gamma23_k1; double sigma23_k1; double gamma34_k1; double sigma34_k1; double gamma12_k2; double sigma12_k2; double gamma23_k2; double sigma23_k2; double* a1; double* a2; double* a3; double* a4; dcomplex* g23_k1; dcomplex* g34_k1; dcomplex* g12_k2; dcomplex* g23_k2; int i, j, g, k; int nG, nG_app; int n_iter; int n_left; int k_minus_1; int n_fuse; int k_fuse; int is_ident23_k1, is_ident34_k1; int is_ident12_k2, is_ident23_k2; int has_ident; int m_app; k_minus_1 = k_G - 1; nG = n_A - 1; n_fuse = 2; k_fuse = 2; // Use the simple variant for nG < (k - 1) or k == 1. if ( nG < 2*k_minus_1 || k_G == 1 ) { FLA_Apply_G_rf_asd_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); return FLA_SUCCESS; } // Start-up phase. for ( j = -1; j < k_minus_1; j += n_fuse ) { nG_app = j + 2; n_iter = nG_app / k_fuse; //n_iter = nG_app % k_fuse; n_left = 1; for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; gamma12_k2 = g12_k2->real; sigma12_k2 = g12_k2->imag; gamma23_k2 = g23_k2->real; sigma23_k2 = g23_k2->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero ); is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero ); has_ident = ( is_ident23_k1 || is_ident34_k1 || is_ident12_k2 || is_ident23_k2 ); m_app = min( i_k + 3 + j - iTL, m_A ); m_app = max( m_app, 0 ); if ( has_ident ) { // Apply to pairs of columns as needed. if ( !is_ident23_k1 ) MAC_Apply_G_mx2_asd( m_app, &gamma23_k1, &sigma23_k1, a2, 1, a3, 1 ); if ( !is_ident34_k1 ) MAC_Apply_G_mx2_asd( m_app, &gamma34_k1, &sigma34_k1, a3, 1, a4, 1 ); if ( !is_ident12_k2 ) MAC_Apply_G_mx2_asd( m_app, &gamma12_k2, &sigma12_k2, a1, 1, a2, 1 ); if ( !is_ident23_k2 ) MAC_Apply_G_mx2_asd( m_app, &gamma23_k2, &sigma23_k2, a2, 1, a3, 1 ); } else { // Apply to all four columns. MAC_Apply_G_mx4s_asd( m_app, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, &gamma12_k2, &sigma12_k2, &gamma23_k2, &sigma23_k2, a1, 1, a2, 1, a3, 1, a4, 1 ); } } if ( n_left == 1 ) { g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); m_app = min( i_k + 3 + j - iTL, m_A ); m_app = max( m_app, 0 ); if ( !is_ident34_k1 ) MAC_Apply_G_mx2_asd( m_app, &gamma34_k1, &sigma34_k1, a3, 1, a4, 1 ); } } // Pipeline stage for ( ; j < nG - 1; j += n_fuse ) { nG_app = k_G; n_iter = nG_app / k_fuse; n_left = nG_app % k_fuse; for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; gamma12_k2 = g12_k2->real; sigma12_k2 = g12_k2->imag; gamma23_k2 = g23_k2->real; sigma23_k2 = g23_k2->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero ); is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero ); has_ident = ( is_ident23_k1 || is_ident34_k1 || is_ident12_k2 || is_ident23_k2 ); m_app = min( i_k + 3 + j - iTL, m_A ); m_app = max( m_app, 0 ); if ( has_ident ) { // Apply to pairs of columns as needed. if ( !is_ident23_k1 ) MAC_Apply_G_mx2_asd( m_app, &gamma23_k1, &sigma23_k1, a2, 1, a3, 1 ); if ( !is_ident34_k1 ) MAC_Apply_G_mx2_asd( m_app, &gamma34_k1, &sigma34_k1, a3, 1, a4, 1 ); if ( !is_ident12_k2 ) MAC_Apply_G_mx2_asd( m_app, &gamma12_k2, &sigma12_k2, a1, 1, a2, 1 ); if ( !is_ident23_k2 ) MAC_Apply_G_mx2_asd( m_app, &gamma23_k2, &sigma23_k2, a2, 1, a3, 1 ); } else { // Apply to all four columns. MAC_Apply_G_mx4s_asd( m_app, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, &gamma12_k2, &sigma12_k2, &gamma23_k2, &sigma23_k2, a1, 1, a2, 1, a3, 1, a4, 1 ); } } if ( n_left == 1 ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); m_app = min( i_k + 3 + j - iTL, m_A ); m_app = max( m_app, 0 ); if ( !is_ident23_k1 && is_ident34_k1 ) { MAC_Apply_G_mx2_asd( m_app, &gamma23_k1, &sigma23_k1, a2, 1, a3, 1 ); } else if ( is_ident23_k1 && !is_ident34_k1 ) { MAC_Apply_G_mx2_asd( m_app, &gamma34_k1, &sigma34_k1, a3, 1, a4, 1 ); } else { MAC_Apply_G_mx3_asd( m_app, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, a2, 1, a3, 1, a4, 1 ); } } } // Shutdown stage for ( j = nG % n_fuse; j < k_G; j += n_fuse ) { g = nG - 1; k = j; //n_left = 1; //if ( n_left == 1 ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); m_app = m_A; if ( !is_ident23_k1 ) MAC_Apply_G_mx2_asd( m_app, &gamma23_k1, &sigma23_k1, a2, 1, a3, 1 ); ++k; --g; } nG_app = k_minus_1 - j; n_iter = nG_app / k_fuse; n_left = nG_app % k_fuse; for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; gamma12_k2 = g12_k2->real; sigma12_k2 = g12_k2->imag; gamma23_k2 = g23_k2->real; sigma23_k2 = g23_k2->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero ); is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero ); has_ident = ( is_ident23_k1 || is_ident34_k1 || is_ident12_k2 || is_ident23_k2 ); m_app = m_A; if ( has_ident ) { // Apply to pairs of columns as needed. if ( !is_ident23_k1 ) MAC_Apply_G_mx2_asd( m_app, &gamma23_k1, &sigma23_k1, a2, 1, a3, 1 ); if ( !is_ident34_k1 ) MAC_Apply_G_mx2_asd( m_app, &gamma34_k1, &sigma34_k1, a3, 1, a4, 1 ); if ( !is_ident12_k2 ) MAC_Apply_G_mx2_asd( m_app, &gamma12_k2, &sigma12_k2, a1, 1, a2, 1 ); if ( !is_ident23_k2 ) MAC_Apply_G_mx2_asd( m_app, &gamma23_k2, &sigma23_k2, a2, 1, a3, 1 ); } else { // Apply to all four columns. MAC_Apply_G_mx4s_asd( m_app, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, &gamma12_k2, &sigma12_k2, &gamma23_k2, &sigma23_k2, a1, 1, a2, 1, a3, 1, a4, 1 ); } } if ( n_left == 1 ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); m_app = m_A; if ( !is_ident23_k1 && is_ident34_k1 ) { MAC_Apply_G_mx2_asd( m_app, &gamma23_k1, &sigma23_k1, a2, 1, a3, 1 ); } else if ( is_ident23_k1 && !is_ident34_k1 ) { MAC_Apply_G_mx2_asd( m_app, &gamma34_k1, &sigma34_k1, a3, 1, a4, 1 ); } else { MAC_Apply_G_mx3_asd( m_app, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, a2, 1, a3, 1, a4, 1 ); } } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_asd_var4 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_asd_var5 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_asd_var5b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
int | iTL, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_asd_var6 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_d0(), bli_d1(), FLA_Apply_G_rf_asd_var1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_asm_var6(), and FLA_Apply_G_rf_bld_var6().
{ double one = bli_d1(); double zero = bli_d0(); double gamma12; double sigma12; double gamma23; double sigma23; double* a1; double* a2; double* a3; dcomplex* g12; dcomplex* g23; int i, j, g, k; int nG, nG_app; int n_iter; int n_left; int k_minus_1; int n_fuse; int is_ident12, is_ident23; k_minus_1 = k_G - 1; nG = n_A - 1; n_fuse = 2; // Use the simple variant for nG < (k - 1) or k == 1. if ( nG < k_minus_1 || k_G == 1 ) { FLA_Apply_G_rf_asd_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); return FLA_SUCCESS; } // Start-up phase. for ( j = 0; j < k_minus_1; ++j ) { nG_app = j + 1; n_iter = nG_app / n_fuse; n_left = nG_app % n_fuse; for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse ) { g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_asd( m_A, &gamma12, &sigma12, a1, 1, a2, 1 ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_asd( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3b_asd( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, 1, a2, 1, a3, 1 ); } } if ( n_left == 1 ) { g23 = buff_G + (g )*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma23 = g23->real; sigma23 = g23->imag; is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident23 ) MAC_Apply_G_mx2_asd( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } } // Pipeline stage for ( j = k_minus_1; j < nG; ++j ) { nG_app = k_G; n_iter = nG_app / n_fuse; n_left = nG_app % n_fuse; for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse ) { g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_asd( m_A, &gamma12, &sigma12, a1, 1, a2, 1 ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_asd( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3b_asd( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, 1, a2, 1, a3, 1 ); } } if ( n_left == 1 ) { g23 = buff_G + (g )*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma23 = g23->real; sigma23 = g23->imag; is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident23 ) MAC_Apply_G_mx2_asd( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } } // Shutdown stage for ( j = 1; j < k_G; ++j ) { nG_app = k_G - j; n_iter = nG_app / n_fuse; n_left = nG_app % n_fuse; for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse ) { g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_asd( m_A, &gamma12, &sigma12, a1, 1, a2, 1 ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_asd( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3b_asd( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, 1, a2, 1, a3, 1 ); } } if ( n_left == 1 ) { g23 = buff_G + (g )*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma23 = g23->real; sigma23 = g23->imag; is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident23 ) MAC_Apply_G_mx2_asd( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_asd_var6b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
int | iTL, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_d0(), bli_d1(), FLA_Apply_G_rf_asd_var1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_asm_var6b(), and FLA_Apply_G_rf_bld_var6b().
{ double one = bli_d1(); double zero = bli_d0(); double gamma12; double sigma12; double gamma23; double sigma23; double* a1; double* a2; double* a3; dcomplex* g12; dcomplex* g23; int i, j, g, k; int nG, nG_app; int n_iter; int n_left; int k_minus_1; int n_fuse; int is_ident12, is_ident23; int m_app; k_minus_1 = k_G - 1; nG = n_A - 1; n_fuse = 2; // Use the simple variant for nG < (k - 1) or k == 1. if ( nG < k_minus_1 || k_G == 1 ) { FLA_Apply_G_rf_asd_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); return FLA_SUCCESS; } // Start-up phase. for ( j = 0; j < k_minus_1; ++j ) { nG_app = j + 1; n_iter = nG_app / n_fuse; n_left = nG_app % n_fuse; for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse ) { g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); m_app = min( i_k + 2 + j - iTL, m_A ); m_app = max( m_app, 0 ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_asd( m_app, &gamma12, &sigma12, a1, 1, a2, 1 ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_asd( m_app, &gamma23, &sigma23, a2, 1, a3, 1 ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3b_asd( m_app, &gamma12, &sigma12, &gamma23, &sigma23, a1, 1, a2, 1, a3, 1 ); } } if ( n_left == 1 ) { g23 = buff_G + (g )*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma23 = g23->real; sigma23 = g23->imag; is_ident23 = ( gamma23 == one && sigma23 == zero ); m_app = min( i_k + 2 + j - iTL, m_A ); m_app = max( m_app, 0 ); if ( !is_ident23 ) MAC_Apply_G_mx2_asd( m_app, &gamma23, &sigma23, a2, 1, a3, 1 ); } } // Pipeline stage for ( j = k_minus_1; j < nG; ++j ) { nG_app = k_G; n_iter = nG_app / n_fuse; n_left = nG_app % n_fuse; for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse ) { g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); m_app = min( i_k + 2 + j - iTL, m_A ); m_app = max( m_app, 0 ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_asd( m_app, &gamma12, &sigma12, a1, 1, a2, 1 ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_asd( m_app, &gamma23, &sigma23, a2, 1, a3, 1 ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3b_asd( m_app, &gamma12, &sigma12, &gamma23, &sigma23, a1, 1, a2, 1, a3, 1 ); } } if ( n_left == 1 ) { g23 = buff_G + (g )*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma23 = g23->real; sigma23 = g23->imag; is_ident23 = ( gamma23 == one && sigma23 == zero ); m_app = min( i_k + 2 + j - iTL, m_A ); m_app = max( m_app, 0 ); if ( !is_ident23 ) MAC_Apply_G_mx2_asd( m_app, &gamma23, &sigma23, a2, 1, a3, 1 ); } } // Shutdown stage for ( j = 1; j < k_G; ++j ) { nG_app = k_G - j; n_iter = nG_app / n_fuse; n_left = nG_app % n_fuse; for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse ) { g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); m_app = m_A; if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_asd( m_app, &gamma12, &sigma12, a1, 1, a2, 1 ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_asd( m_app, &gamma23, &sigma23, a2, 1, a3, 1 ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3b_asd( m_app, &gamma12, &sigma12, &gamma23, &sigma23, a1, 1, a2, 1, a3, 1 ); } } //for ( k = 0; k < nG_app_left; k += 1, g -= 1 ) if ( n_left == 1 ) { g23 = buff_G + (g )*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma23 = g23->real; sigma23 = g23->imag; is_ident23 = ( gamma23 == one && sigma23 == zero ); m_app = m_A; if ( !is_ident23 ) MAC_Apply_G_mx2_asd( m_app, &gamma23, &sigma23, a2, 1, a3, 1 ); } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_asd_var7 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_asd_var8 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_asd_var8b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
int | iTL, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_asd_var9 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_d0(), bli_d1(), FLA_Apply_G_rf_asd_var1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_asm_var9(), and FLA_Apply_G_rf_bld_var9().
{ double one = bli_d1(); double zero = bli_d0(); double gamma12; double sigma12; double gamma23; double sigma23; double* a1; double* a2; double* a3; dcomplex* g12; dcomplex* g23; int i, j, g, k; int nG, nG_app; int n_iter; int n_left; int k_minus_1; int n_fuse; int is_ident12, is_ident23; k_minus_1 = k_G - 1; nG = n_A - 1; n_fuse = 2; // Use the simple variant for nG < (k - 1) or k == 1. if ( nG < 2*k_minus_1 || k_G == 1 ) { FLA_Apply_G_rf_asd_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); return FLA_SUCCESS; } // Start-up phase. for ( j = -1; j < k_minus_1; j += n_fuse ) { nG_app = j + 1; n_iter = nG_app; n_left = 1; for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g ) { g12 = buff_G + (g )*rs_G + (k )*cs_G; g23 = buff_G + (g + 1)*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; a3 = buff_A + (g + 2)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_asd( m_A, &gamma12, &sigma12, a1, 1, a2, 1 ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_asd( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3_asd( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, 1, a2, 1, a3, 1 ); } } if ( n_left == 1 ) { g23 = buff_G + (g + 1)*rs_G + (k )*cs_G; a2 = buff_A + (g + 1)*cs_A; a3 = buff_A + (g + 2)*cs_A; gamma23 = g23->real; sigma23 = g23->imag; is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident23 ) MAC_Apply_G_mx2_asd( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } } // Pipeline stage for ( ; j < nG - 1; j += n_fuse ) { nG_app = k_G; n_iter = nG_app; n_left = 0; for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g ) { g12 = buff_G + (g )*rs_G + (k )*cs_G; g23 = buff_G + (g + 1)*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; a3 = buff_A + (g + 2)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_asd( m_A, &gamma12, &sigma12, a1, 1, a2, 1 ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_asd( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3_asd( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, 1, a2, 1, a3, 1 ); } } } // Shutdown stage for ( j = nG % n_fuse; j < k_G; j += n_fuse ) { g = nG - 1; k = j; n_left = 1; if ( n_left == 1 ) { g12 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); if ( !is_ident12 ) MAC_Apply_G_mx2_asd( m_A, &gamma12, &sigma12, a1, 1, a2, 1 ); ++k; --g; } nG_app = k_minus_1 - j; n_iter = nG_app; for ( i = 0; i < n_iter; ++i, ++k, --g ) { g12 = buff_G + (g )*rs_G + (k )*cs_G; g23 = buff_G + (g + 1)*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; a3 = buff_A + (g + 2)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_asd( m_A, &gamma12, &sigma12, a1, 1, a2, 1 ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_asd( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3_asd( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, 1, a2, 1, a3, 1 ); } } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_asd_var9b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
int | iTL, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_d0(), bli_d1(), FLA_Apply_G_rf_asd_var1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_asm_var9b(), and FLA_Apply_G_rf_bld_var9b().
{ double one = bli_d1(); double zero = bli_d0(); double gamma12; double sigma12; double gamma23; double sigma23; double* a1; double* a2; double* a3; dcomplex* g12; dcomplex* g23; int i, j, g, k; int nG, nG_app; int n_iter; int n_left; int k_minus_1; int n_fuse; int is_ident12, is_ident23; int m_app; k_minus_1 = k_G - 1; nG = n_A - 1; n_fuse = 2; // Use the simple variant for nG < (k - 1) or k == 1. if ( nG < 2*k_minus_1 || k_G == 1 ) { FLA_Apply_G_rf_asd_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); return FLA_SUCCESS; } // Start-up phase. for ( j = -1; j < k_minus_1; j += n_fuse ) { nG_app = j + 1; n_iter = nG_app; n_left = 1; for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g ) { g12 = buff_G + (g )*rs_G + (k )*cs_G; g23 = buff_G + (g + 1)*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; a3 = buff_A + (g + 2)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); m_app = min( i_k + 3 + j - iTL, m_A ); m_app = max( m_app, 0 ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_asd( m_app, &gamma12, &sigma12, a1, 1, a2, 1 ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_asd( m_app, &gamma23, &sigma23, a2, 1, a3, 1 ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3_asd( m_app, &gamma12, &sigma12, &gamma23, &sigma23, a1, 1, a2, 1, a3, 1 ); } } if ( n_left == 1 ) { g23 = buff_G + (g + 1)*rs_G + (k )*cs_G; a2 = buff_A + (g + 1)*cs_A; a3 = buff_A + (g + 2)*cs_A; gamma23 = g23->real; sigma23 = g23->imag; is_ident23 = ( gamma23 == one && sigma23 == zero ); m_app = min( i_k + 3 + j - iTL, m_A ); m_app = max( m_app, 0 ); if ( !is_ident23 ) MAC_Apply_G_mx2_asd( m_app, &gamma23, &sigma23, a2, 1, a3, 1 ); } } // Pipeline stage for ( ; j < nG - 1; j += n_fuse ) { nG_app = k_G; n_iter = nG_app; n_left = 0; for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g ) { g12 = buff_G + (g )*rs_G + (k )*cs_G; g23 = buff_G + (g + 1)*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; a3 = buff_A + (g + 2)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); m_app = min( i_k + 3 + j - iTL, m_A ); m_app = max( m_app, 0 ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_asd( m_app, &gamma12, &sigma12, a1, 1, a2, 1 ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_asd( m_app, &gamma23, &sigma23, a2, 1, a3, 1 ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3_asd( m_app, &gamma12, &sigma12, &gamma23, &sigma23, a1, 1, a2, 1, a3, 1 ); } } } // Shutdown stage for ( j = nG % n_fuse; j < k_G; j += n_fuse ) { g = nG - 1; k = j; n_left = 1; if ( n_left == 1 ) { g12 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); m_app = m_A; if ( !is_ident12 ) MAC_Apply_G_mx2_asd( m_app, &gamma12, &sigma12, a1, 1, a2, 1 ); ++k; --g; } nG_app = k_minus_1 - j; n_iter = nG_app; for ( i = 0; i < n_iter; ++i, ++k, --g ) { g12 = buff_G + (g )*rs_G + (k )*cs_G; g23 = buff_G + (g + 1)*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; a3 = buff_A + (g + 2)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); m_app = m_A; if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_asd( m_app, &gamma12, &sigma12, a1, 1, a2, 1 ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_asd( m_app, &gamma23, &sigma23, a2, 1, a3, 1 ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3_asd( m_app, &gamma12, &sigma12, &gamma23, &sigma23, a1, 1, a2, 1, a3, 1 ); } } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_asm_var1 | ( | FLA_Obj | G, |
FLA_Obj | A | ||
) |
References FLA_Apply_G_rf_asc_var1(), FLA_Apply_G_rf_asd_var1(), FLA_Apply_G_rf_ass_var1(), FLA_Apply_G_rf_asz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{ FLA_Datatype datatype; int k_G, m_A, n_A; int rs_G, cs_G; int rs_A, cs_A; datatype = FLA_Obj_datatype( A ); k_G = FLA_Obj_width( G ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); switch ( datatype ) { case FLA_FLOAT: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); float* buff_A = ( float* ) FLA_FLOAT_PTR( A ); FLA_Apply_G_rf_ass_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); double* buff_A = ( double* ) FLA_DOUBLE_PTR( A ); FLA_Apply_G_rf_asd_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_COMPLEX: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A ); FLA_Apply_G_rf_asc_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A ); FLA_Apply_G_rf_asz_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_asm_var2 | ( | FLA_Obj | G, |
FLA_Obj | A | ||
) |
References FLA_Apply_G_rf_asc_var2(), FLA_Apply_G_rf_asd_var2(), FLA_Apply_G_rf_ass_var2(), FLA_Apply_G_rf_asz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{ FLA_Datatype datatype; int k_G, m_A, n_A; int rs_G, cs_G; int rs_A, cs_A; datatype = FLA_Obj_datatype( A ); k_G = FLA_Obj_width( G ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); switch ( datatype ) { case FLA_FLOAT: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); float* buff_A = ( float* ) FLA_FLOAT_PTR( A ); FLA_Apply_G_rf_ass_var2( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); double* buff_A = ( double* ) FLA_DOUBLE_PTR( A ); FLA_Apply_G_rf_asd_var2( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_COMPLEX: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A ); FLA_Apply_G_rf_asc_var2( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A ); FLA_Apply_G_rf_asz_var2( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_asm_var3 | ( | FLA_Obj | G, |
FLA_Obj | A | ||
) |
References FLA_Apply_G_rf_asc_var3(), FLA_Apply_G_rf_asd_var3(), FLA_Apply_G_rf_ass_var3(), FLA_Apply_G_rf_asz_var3(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{ FLA_Datatype datatype; int k_G, m_A, n_A; int rs_G, cs_G; int rs_A, cs_A; datatype = FLA_Obj_datatype( A ); k_G = FLA_Obj_width( G ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); switch ( datatype ) { case FLA_FLOAT: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); float* buff_A = ( float* ) FLA_FLOAT_PTR( A ); FLA_Apply_G_rf_ass_var3( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); double* buff_A = ( double* ) FLA_DOUBLE_PTR( A ); FLA_Apply_G_rf_asd_var3( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_COMPLEX: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A ); FLA_Apply_G_rf_asc_var3( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A ); FLA_Apply_G_rf_asz_var3( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_asm_var3b | ( | FLA_Obj | G, |
FLA_Obj | A | ||
) |
References FLA_Apply_G_rf_asc_var3b(), FLA_Apply_G_rf_asd_var3b(), FLA_Apply_G_rf_ass_var3b(), FLA_Apply_G_rf_asz_var3b(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{ FLA_Datatype datatype; int k_G, m_A, n_A; int rs_G, cs_G; int rs_A, cs_A; datatype = FLA_Obj_datatype( A ); k_G = FLA_Obj_width( G ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); switch ( datatype ) { case FLA_FLOAT: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); float* buff_A = ( float* ) FLA_FLOAT_PTR( A ); FLA_Apply_G_rf_ass_var3b( k_G, m_A, n_A, 0, 0, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); double* buff_A = ( double* ) FLA_DOUBLE_PTR( A ); FLA_Apply_G_rf_asd_var3b( k_G, m_A, n_A, 0, 0, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_COMPLEX: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A ); FLA_Apply_G_rf_asc_var3b( k_G, m_A, n_A, 0, 0, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A ); FLA_Apply_G_rf_asz_var3b( k_G, m_A, n_A, 0, 0, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_asm_var4 | ( | FLA_Obj | G, |
FLA_Obj | A | ||
) |
FLA_Error FLA_Apply_G_rf_asm_var5 | ( | FLA_Obj | G, |
FLA_Obj | A | ||
) |
FLA_Error FLA_Apply_G_rf_asm_var5b | ( | FLA_Obj | G, |
FLA_Obj | A | ||
) |
FLA_Error FLA_Apply_G_rf_asm_var6 | ( | FLA_Obj | G, |
FLA_Obj | A | ||
) |
References FLA_Apply_G_rf_asc_var6(), FLA_Apply_G_rf_asd_var6(), FLA_Apply_G_rf_ass_var6(), FLA_Apply_G_rf_asz_var6(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{ FLA_Datatype datatype; int k_G, m_A, n_A; int rs_G, cs_G; int rs_A, cs_A; datatype = FLA_Obj_datatype( A ); k_G = FLA_Obj_width( G ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); switch ( datatype ) { case FLA_FLOAT: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); float* buff_A = ( float* ) FLA_FLOAT_PTR( A ); FLA_Apply_G_rf_ass_var6( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); double* buff_A = ( double* ) FLA_DOUBLE_PTR( A ); FLA_Apply_G_rf_asd_var6( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_COMPLEX: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A ); FLA_Apply_G_rf_asc_var6( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A ); FLA_Apply_G_rf_asz_var6( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_asm_var6b | ( | FLA_Obj | G, |
FLA_Obj | A | ||
) |
References FLA_Apply_G_rf_asc_var6b(), FLA_Apply_G_rf_asd_var6b(), FLA_Apply_G_rf_ass_var6b(), FLA_Apply_G_rf_asz_var6b(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{ FLA_Datatype datatype; int k_G, m_A, n_A; int rs_G, cs_G; int rs_A, cs_A; datatype = FLA_Obj_datatype( A ); k_G = FLA_Obj_width( G ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); switch ( datatype ) { case FLA_FLOAT: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); float* buff_A = ( float* ) FLA_FLOAT_PTR( A ); FLA_Apply_G_rf_ass_var6b( k_G, m_A, n_A, 0, 0, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); double* buff_A = ( double* ) FLA_DOUBLE_PTR( A ); FLA_Apply_G_rf_asd_var6b( k_G, m_A, n_A, 0, 0, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_COMPLEX: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A ); FLA_Apply_G_rf_asc_var6b( k_G, m_A, n_A, 0, 0, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A ); FLA_Apply_G_rf_asz_var6b( k_G, m_A, n_A, 0, 0, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_asm_var7 | ( | FLA_Obj | G, |
FLA_Obj | A | ||
) |
FLA_Error FLA_Apply_G_rf_asm_var8 | ( | FLA_Obj | G, |
FLA_Obj | A | ||
) |
FLA_Error FLA_Apply_G_rf_asm_var8b | ( | FLA_Obj | G, |
FLA_Obj | A | ||
) |
FLA_Error FLA_Apply_G_rf_asm_var9 | ( | FLA_Obj | G, |
FLA_Obj | A | ||
) |
References FLA_Apply_G_rf_asc_var9(), FLA_Apply_G_rf_asd_var9(), FLA_Apply_G_rf_ass_var9(), FLA_Apply_G_rf_asz_var9(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{ FLA_Datatype datatype; int k_G, m_A, n_A; int rs_G, cs_G; int rs_A, cs_A; datatype = FLA_Obj_datatype( A ); k_G = FLA_Obj_width( G ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); switch ( datatype ) { case FLA_FLOAT: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); float* buff_A = ( float* ) FLA_FLOAT_PTR( A ); FLA_Apply_G_rf_ass_var9( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); double* buff_A = ( double* ) FLA_DOUBLE_PTR( A ); FLA_Apply_G_rf_asd_var9( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_COMPLEX: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A ); FLA_Apply_G_rf_asc_var9( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A ); FLA_Apply_G_rf_asz_var9( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_asm_var9b | ( | FLA_Obj | G, |
FLA_Obj | A | ||
) |
References FLA_Apply_G_rf_asc_var9b(), FLA_Apply_G_rf_asd_var9b(), FLA_Apply_G_rf_ass_var9b(), FLA_Apply_G_rf_asz_var9b(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{ FLA_Datatype datatype; int k_G, m_A, n_A; int rs_G, cs_G; int rs_A, cs_A; datatype = FLA_Obj_datatype( A ); k_G = FLA_Obj_width( G ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); switch ( datatype ) { case FLA_FLOAT: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); float* buff_A = ( float* ) FLA_FLOAT_PTR( A ); FLA_Apply_G_rf_ass_var9b( k_G, m_A, n_A, 0, 0, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); double* buff_A = ( double* ) FLA_DOUBLE_PTR( A ); FLA_Apply_G_rf_asd_var9b( k_G, m_A, n_A, 0, 0, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_COMPLEX: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A ); FLA_Apply_G_rf_asc_var9b( k_G, m_A, n_A, 0, 0, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A ); FLA_Apply_G_rf_asz_var9b( k_G, m_A, n_A, 0, 0, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_ass_var1 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_s0(), bli_s1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_asm_var1(), FLA_Apply_G_rf_ass_var2(), FLA_Apply_G_rf_ass_var3(), FLA_Apply_G_rf_ass_var6(), FLA_Apply_G_rf_ass_var9(), and FLA_Apply_G_rf_bls_var1().
{ float one = bli_s1(); float zero = bli_s0(); int nG_app = n_A - 1; int l, j; float gamma; float sigma; float* a1; float* a2; scomplex* g1; scomplex* g11; g1 = buff_G; for ( l = 0; l < k_G; ++l ) { a1 = buff_A; a2 = buff_A + cs_A; g11 = g1; for ( j = 0; j < nG_app; ++j ) { gamma = g11->real; sigma = g11->imag; // Skip the current iteration if the rotation is identity. if ( gamma != one || sigma != zero ) { MAC_Apply_G_mx2_ass( m_A, &gamma, &sigma, a1, 1, a2, 1 ); } a1 += cs_A; a2 += cs_A; g11 += rs_G; } g1 += cs_G; } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_ass_var2 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_s0(), bli_s1(), FLA_Apply_G_rf_ass_var1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_asm_var2(), and FLA_Apply_G_rf_bls_var2().
{ float one = bli_s1(); float zero = bli_s0(); float gamma; float sigma; float* a1; float* a2; scomplex* g11; int j, g, k; int nG, nG_app; int k_minus_1; k_minus_1 = k_G - 1; nG = n_A - 1; // Use the simple variant for nG < 2(k - 1). if ( nG < k_minus_1 || k_G == 1 ) { FLA_Apply_G_rf_ass_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); return FLA_SUCCESS; } // Start-up phase. for ( j = 0; j < k_minus_1; ++j ) { nG_app = j + 1; for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g ) { g11 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; gamma = g11->real; sigma = g11->imag; // Skip the current iteration if the rotation is identity. if ( gamma == one && sigma == zero ) continue; MAC_Apply_G_mx2_ass( m_A, &gamma, &sigma, a1, 1, a2, 1 ); } } // Pipeline stage for ( j = k_minus_1; j < nG; ++j ) { nG_app = k_G; for ( k = 0, g = j; k < nG_app; ++k, --g ) { g11 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; gamma = g11->real; sigma = g11->imag; // Skip the current iteration if the rotation is identity. if ( gamma == one && sigma == zero ) continue; MAC_Apply_G_mx2_ass( m_A, &gamma, &sigma, a1, 1, a2, 1 ); } } // Shutdown stage for ( j = nG - k_minus_1; j < nG; ++j ) { nG_app = nG - j; for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g ) { g11 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; gamma = g11->real; sigma = g11->imag; // Skip the current iteration if the rotation is identity. if ( gamma == one && sigma == zero ) continue; MAC_Apply_G_mx2_ass( m_A, &gamma, &sigma, a1, 1, a2, 1 ); } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_ass_var3 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_s0(), bli_s1(), FLA_Apply_G_rf_ass_var1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_asm_var3(), and FLA_Apply_G_rf_bls_var3().
{ float one = bli_s1(); float zero = bli_s0(); float gamma23_k1; float sigma23_k1; float gamma34_k1; float sigma34_k1; float gamma12_k2; float sigma12_k2; float gamma23_k2; float sigma23_k2; float* a1; float* a2; float* a3; float* a4; scomplex* g23_k1; scomplex* g34_k1; scomplex* g12_k2; scomplex* g23_k2; int i, j, g, k; int nG, nG_app; int n_iter; int n_left; int k_minus_1; int n_fuse; int k_fuse; int is_ident23_k1, is_ident34_k1; int is_ident12_k2, is_ident23_k2; int has_ident; k_minus_1 = k_G - 1; nG = n_A - 1; n_fuse = 2; k_fuse = 2; // Use the simple variant for nG < (k - 1) or k == 1. if ( nG < 2*k_minus_1 || k_G == 1 ) { FLA_Apply_G_rf_ass_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); return FLA_SUCCESS; } // Start-up phase. for ( j = -1; j < k_minus_1; j += n_fuse ) { nG_app = j + 2; n_iter = nG_app / k_fuse; n_left = 1; for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; gamma12_k2 = g12_k2->real; sigma12_k2 = g12_k2->imag; gamma23_k2 = g23_k2->real; sigma23_k2 = g23_k2->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero ); is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero ); has_ident = ( is_ident23_k1 || is_ident34_k1 || is_ident12_k2 || is_ident23_k2 ); if ( has_ident ) { // Apply to pairs of columns as needed. if ( !is_ident23_k1 ) MAC_Apply_G_mx2_ass( m_A, &gamma23_k1, &sigma23_k1, a2, 1, a3, 1 ); if ( !is_ident34_k1 ) MAC_Apply_G_mx2_ass( m_A, &gamma34_k1, &sigma34_k1, a3, 1, a4, 1 ); if ( !is_ident12_k2 ) MAC_Apply_G_mx2_ass( m_A, &gamma12_k2, &sigma12_k2, a1, 1, a2, 1 ); if ( !is_ident23_k2 ) MAC_Apply_G_mx2_ass( m_A, &gamma23_k2, &sigma23_k2, a2, 1, a3, 1 ); } else { // Apply to all four columns. MAC_Apply_G_mx4s_ass( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, &gamma12_k2, &sigma12_k2, &gamma23_k2, &sigma23_k2, a1, 1, a2, 1, a3, 1, a4, 1 ); } } if ( n_left == 1 ) { g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); if ( !is_ident34_k1 ) MAC_Apply_G_mx2_ass( m_A, &gamma34_k1, &sigma34_k1, a3, 1, a4, 1 ); } } // Pipeline stage for ( ; j < nG - 1; j += n_fuse ) { nG_app = k_G; n_iter = nG_app / k_fuse; n_left = nG_app % k_fuse; for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; gamma12_k2 = g12_k2->real; sigma12_k2 = g12_k2->imag; gamma23_k2 = g23_k2->real; sigma23_k2 = g23_k2->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero ); is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero ); has_ident = ( is_ident23_k1 || is_ident34_k1 || is_ident12_k2 || is_ident23_k2 ); if ( has_ident ) { // Apply to pairs of columns as needed. if ( !is_ident23_k1 ) MAC_Apply_G_mx2_ass( m_A, &gamma23_k1, &sigma23_k1, a2, 1, a3, 1 ); if ( !is_ident34_k1 ) MAC_Apply_G_mx2_ass( m_A, &gamma34_k1, &sigma34_k1, a3, 1, a4, 1 ); if ( !is_ident12_k2 ) MAC_Apply_G_mx2_ass( m_A, &gamma12_k2, &sigma12_k2, a1, 1, a2, 1 ); if ( !is_ident23_k2 ) MAC_Apply_G_mx2_ass( m_A, &gamma23_k2, &sigma23_k2, a2, 1, a3, 1 ); } else { // Apply to all four columns. MAC_Apply_G_mx4s_ass( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, &gamma12_k2, &sigma12_k2, &gamma23_k2, &sigma23_k2, a1, 1, a2, 1, a3, 1, a4, 1 ); } } if ( n_left == 1 ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); if ( !is_ident23_k1 && is_ident34_k1 ) { MAC_Apply_G_mx2_ass( m_A, &gamma23_k1, &sigma23_k1, a2, 1, a3, 1 ); } else if ( is_ident23_k1 && !is_ident34_k1 ) { MAC_Apply_G_mx2_ass( m_A, &gamma34_k1, &sigma34_k1, a3, 1, a4, 1 ); } else { MAC_Apply_G_mx3_ass( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, a2, 1, a3, 1, a4, 1 ); } } } // Shutdown stage for ( j = nG % n_fuse; j < k_G; j += n_fuse ) { g = nG - 1; k = j; //n_left = 1; //if ( n_left == 1 ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); if ( !is_ident23_k1 ) MAC_Apply_G_mx2_ass( m_A, &gamma23_k1, &sigma23_k1, a2, 1, a3, 1 ); ++k; --g; } nG_app = k_minus_1 - j; n_iter = nG_app / k_fuse; n_left = nG_app % k_fuse; for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; gamma12_k2 = g12_k2->real; sigma12_k2 = g12_k2->imag; gamma23_k2 = g23_k2->real; sigma23_k2 = g23_k2->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero ); is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero ); has_ident = ( is_ident23_k1 || is_ident34_k1 || is_ident12_k2 || is_ident23_k2 ); if ( has_ident ) { // Apply to pairs of columns as needed. if ( !is_ident23_k1 ) MAC_Apply_G_mx2_ass( m_A, &gamma23_k1, &sigma23_k1, a2, 1, a3, 1 ); if ( !is_ident34_k1 ) MAC_Apply_G_mx2_ass( m_A, &gamma34_k1, &sigma34_k1, a3, 1, a4, 1 ); if ( !is_ident12_k2 ) MAC_Apply_G_mx2_ass( m_A, &gamma12_k2, &sigma12_k2, a1, 1, a2, 1 ); if ( !is_ident23_k2 ) MAC_Apply_G_mx2_ass( m_A, &gamma23_k2, &sigma23_k2, a2, 1, a3, 1 ); } else { // Apply to all four columns. MAC_Apply_G_mx4s_ass( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, &gamma12_k2, &sigma12_k2, &gamma23_k2, &sigma23_k2, a1, 1, a2, 1, a3, 1, a4, 1 ); } } if ( n_left == 1 ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); if ( !is_ident23_k1 && is_ident34_k1 ) { MAC_Apply_G_mx2_ass( m_A, &gamma23_k1, &sigma23_k1, a2, 1, a3, 1 ); } else if ( is_ident23_k1 && !is_ident34_k1 ) { MAC_Apply_G_mx2_ass( m_A, &gamma34_k1, &sigma34_k1, a3, 1, a4, 1 ); } else { MAC_Apply_G_mx3_ass( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, a2, 1, a3, 1, a4, 1 ); } } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_ass_var3b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
int | iTL, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
Referenced by FLA_Apply_G_rf_asm_var3b(), and FLA_Apply_G_rf_bls_var3b().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
FLA_Error FLA_Apply_G_rf_ass_var4 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_ass_var5 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_ass_var5b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
int | iTL, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_ass_var6 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_s0(), bli_s1(), FLA_Apply_G_rf_ass_var1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_asm_var6(), and FLA_Apply_G_rf_bls_var6().
{ float one = bli_s1(); float zero = bli_s0(); float gamma12; float sigma12; float gamma23; float sigma23; float* a1; float* a2; float* a3; scomplex* g12; scomplex* g23; int i, j, g, k; int nG, nG_app; int n_iter; int n_left; int k_minus_1; int n_fuse; int is_ident12, is_ident23; k_minus_1 = k_G - 1; nG = n_A - 1; n_fuse = 2; // Use the simple variant for nG < (k - 1) or k == 1. if ( nG < k_minus_1 || k_G == 1 ) { FLA_Apply_G_rf_ass_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); return FLA_SUCCESS; } // Start-up phase. for ( j = 0; j < k_minus_1; ++j ) { nG_app = j + 1; n_iter = nG_app / n_fuse; n_left = nG_app % n_fuse; for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse ) { g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_ass( m_A, &gamma12, &sigma12, a1, 1, a2, 1 ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_ass( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3b_ass( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, 1, a2, 1, a3, 1 ); } } if ( n_left == 1 ) { g23 = buff_G + (g )*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma23 = g23->real; sigma23 = g23->imag; is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident23 ) MAC_Apply_G_mx2_ass( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } } // Pipeline stage for ( j = k_minus_1; j < nG; ++j ) { nG_app = k_G; n_iter = nG_app / n_fuse; n_left = nG_app % n_fuse; for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse ) { g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_ass( m_A, &gamma12, &sigma12, a1, 1, a2, 1 ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_ass( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3b_ass( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, 1, a2, 1, a3, 1 ); } } if ( n_left == 1 ) { g23 = buff_G + (g )*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma23 = g23->real; sigma23 = g23->imag; is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident23 ) MAC_Apply_G_mx2_ass( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } } // Shutdown stage for ( j = 1; j < k_G; ++j ) { nG_app = k_G - j; n_iter = nG_app / n_fuse; n_left = nG_app % n_fuse; for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse ) { g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_ass( m_A, &gamma12, &sigma12, a1, 1, a2, 1 ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_ass( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3b_ass( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, 1, a2, 1, a3, 1 ); } } if ( n_left == 1 ) { g23 = buff_G + (g )*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma23 = g23->real; sigma23 = g23->imag; is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident23 ) MAC_Apply_G_mx2_ass( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_ass_var6b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
int | iTL, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
Referenced by FLA_Apply_G_rf_asm_var6b(), and FLA_Apply_G_rf_bls_var6b().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
FLA_Error FLA_Apply_G_rf_ass_var7 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_ass_var8 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_ass_var8b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
int | iTL, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_ass_var9 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_s0(), bli_s1(), FLA_Apply_G_rf_ass_var1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_asm_var9(), and FLA_Apply_G_rf_bls_var9().
{ float one = bli_s1(); float zero = bli_s0(); float gamma12; float sigma12; float gamma23; float sigma23; float* a1; float* a2; float* a3; scomplex* g12; scomplex* g23; int i, j, g, k; int nG, nG_app; int n_iter; int n_left; int k_minus_1; int n_fuse; int is_ident12, is_ident23; k_minus_1 = k_G - 1; nG = n_A - 1; n_fuse = 2; // Use the simple variant for nG < (k - 1) or k == 1. if ( nG < 2*k_minus_1 || k_G == 1 ) { FLA_Apply_G_rf_ass_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); return FLA_SUCCESS; } // Start-up phase. for ( j = -1; j < k_minus_1; j += n_fuse ) { nG_app = j + 1; n_iter = nG_app; n_left = 1; for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g ) { g12 = buff_G + (g )*rs_G + (k )*cs_G; g23 = buff_G + (g + 1)*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; a3 = buff_A + (g + 2)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_ass( m_A, &gamma12, &sigma12, a1, 1, a2, 1 ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_ass( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3_ass( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, 1, a2, 1, a3, 1 ); } } if ( n_left == 1 ) { g23 = buff_G + (g + 1)*rs_G + (k )*cs_G; a2 = buff_A + (g + 1)*cs_A; a3 = buff_A + (g + 2)*cs_A; gamma23 = g23->real; sigma23 = g23->imag; is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident23 ) MAC_Apply_G_mx2_ass( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } } // Pipeline stage for ( ; j < nG - 1; j += n_fuse ) { nG_app = k_G; n_iter = nG_app; n_left = 0; for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g ) { g12 = buff_G + (g )*rs_G + (k )*cs_G; g23 = buff_G + (g + 1)*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; a3 = buff_A + (g + 2)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_ass( m_A, &gamma12, &sigma12, a1, 1, a2, 1 ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_ass( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3_ass( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, 1, a2, 1, a3, 1 ); } } } // Shutdown stage for ( j = nG % n_fuse; j < k_G; j += n_fuse ) { g = nG - 1; k = j; n_left = 1; if ( n_left == 1 ) { g12 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); if ( !is_ident12 ) MAC_Apply_G_mx2_ass( m_A, &gamma12, &sigma12, a1, 1, a2, 1 ); ++k; --g; } nG_app = k_minus_1 - j; n_iter = nG_app; for ( i = 0; i < n_iter; ++i, ++k, --g ) { g12 = buff_G + (g )*rs_G + (k )*cs_G; g23 = buff_G + (g + 1)*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; a3 = buff_A + (g + 2)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_ass( m_A, &gamma12, &sigma12, a1, 1, a2, 1 ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_ass( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3_ass( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, 1, a2, 1, a3, 1 ); } } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_ass_var9b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
int | iTL, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
Referenced by FLA_Apply_G_rf_asm_var9b(), and FLA_Apply_G_rf_bls_var9b().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
FLA_Error FLA_Apply_G_rf_asz_var1 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_d0(), bli_d1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_asm_var1(), FLA_Apply_G_rf_asz_var2(), FLA_Apply_G_rf_asz_var3(), FLA_Apply_G_rf_asz_var6(), FLA_Apply_G_rf_asz_var9(), and FLA_Apply_G_rf_blz_var1().
{ double one = bli_d1(); double zero = bli_d0(); int nG_app = n_A - 1; int l, j; double gamma; double sigma; dcomplex* a1; dcomplex* a2; dcomplex* g1; dcomplex* g11; g1 = buff_G; for ( l = 0; l < k_G; ++l ) { a1 = buff_A; a2 = buff_A + cs_A; g11 = g1; for ( j = 0; j < nG_app; ++j ) { gamma = g11->real; sigma = g11->imag; // Skip the current iteration if the rotation is identity. if ( gamma != one || sigma != zero ) { MAC_Apply_G_mx2_asz( m_A, &gamma, &sigma, a1, 1, a2, 1 ); } a1 += cs_A; a2 += cs_A; g11 += rs_G; } g1 += cs_G; } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_asz_var2 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_d0(), bli_d1(), FLA_Apply_G_rf_asz_var1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_asm_var2(), and FLA_Apply_G_rf_blz_var2().
{ double one = bli_d1(); double zero = bli_d0(); double gamma; double sigma; dcomplex* a1; dcomplex* a2; dcomplex* g11; int j, g, k; int nG, nG_app; int k_minus_1; k_minus_1 = k_G - 1; nG = n_A - 1; // Use the simple variant for nG < 2(k - 1). if ( nG < k_minus_1 || k_G == 1 ) { FLA_Apply_G_rf_asz_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); return FLA_SUCCESS; } // Start-up phase. for ( j = 0; j < k_minus_1; ++j ) { nG_app = j + 1; for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g ) { g11 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; gamma = g11->real; sigma = g11->imag; // Skip the current iteration if the rotation is identity. if ( gamma == one && sigma == zero ) continue; MAC_Apply_G_mx2_asz( m_A, &gamma, &sigma, a1, 1, a2, 1 ); } } // Pipeline stage for ( j = k_minus_1; j < nG; ++j ) { nG_app = k_G; for ( k = 0, g = j; k < nG_app; ++k, --g ) { g11 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; gamma = g11->real; sigma = g11->imag; // Skip the current iteration if the rotation is identity. if ( gamma == one && sigma == zero ) continue; MAC_Apply_G_mx2_asz( m_A, &gamma, &sigma, a1, 1, a2, 1 ); } } // Shutdown stage for ( j = nG - k_minus_1; j < nG; ++j ) { nG_app = nG - j; for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g ) { g11 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; gamma = g11->real; sigma = g11->imag; // Skip the current iteration if the rotation is identity. if ( gamma == one && sigma == zero ) continue; MAC_Apply_G_mx2_asz( m_A, &gamma, &sigma, a1, 1, a2, 1 ); } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_asz_var3 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_d0(), bli_d1(), FLA_Apply_G_rf_asz_var1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_asm_var3(), and FLA_Apply_G_rf_blz_var3().
{ double one = bli_d1(); double zero = bli_d0(); double gamma23_k1; double sigma23_k1; double gamma34_k1; double sigma34_k1; double gamma12_k2; double sigma12_k2; double gamma23_k2; double sigma23_k2; dcomplex* a1; dcomplex* a2; dcomplex* a3; dcomplex* a4; dcomplex* g23_k1; dcomplex* g34_k1; dcomplex* g12_k2; dcomplex* g23_k2; int i, j, g, k; int nG, nG_app; int n_iter; int n_left; int k_minus_1; int n_fuse; int k_fuse; int is_ident23_k1, is_ident34_k1; int is_ident12_k2, is_ident23_k2; int has_ident; k_minus_1 = k_G - 1; nG = n_A - 1; n_fuse = 2; k_fuse = 2; // Use the simple variant for nG < (k - 1) or k == 1. if ( nG < 2*k_minus_1 || k_G == 1 ) { FLA_Apply_G_rf_asz_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); return FLA_SUCCESS; } // Start-up phase. for ( j = -1; j < k_minus_1; j += n_fuse ) { nG_app = j + 2; n_iter = nG_app / k_fuse; //n_iter = nG_app % k_fuse; n_left = 1; for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; gamma12_k2 = g12_k2->real; sigma12_k2 = g12_k2->imag; gamma23_k2 = g23_k2->real; sigma23_k2 = g23_k2->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero ); is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero ); has_ident = ( is_ident23_k1 || is_ident34_k1 || is_ident12_k2 || is_ident23_k2 ); if ( has_ident ) { // Apply to pairs of columns as needed. if ( !is_ident23_k1 ) MAC_Apply_G_mx2_asz( m_A, &gamma23_k1, &sigma23_k1, a2, 1, a3, 1 ); if ( !is_ident34_k1 ) MAC_Apply_G_mx2_asz( m_A, &gamma34_k1, &sigma34_k1, a3, 1, a4, 1 ); if ( !is_ident12_k2 ) MAC_Apply_G_mx2_asz( m_A, &gamma12_k2, &sigma12_k2, a1, 1, a2, 1 ); if ( !is_ident23_k2 ) MAC_Apply_G_mx2_asz( m_A, &gamma23_k2, &sigma23_k2, a2, 1, a3, 1 ); } else { // Apply to all four columns. MAC_Apply_G_mx4s_asz( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, &gamma12_k2, &sigma12_k2, &gamma23_k2, &sigma23_k2, a1, 1, a2, 1, a3, 1, a4, 1 ); } } if ( n_left == 1 ) { g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); if ( !is_ident34_k1 ) MAC_Apply_G_mx2_asz( m_A, &gamma34_k1, &sigma34_k1, a3, 1, a4, 1 ); } } // Pipeline stage for ( ; j < nG - 1; j += n_fuse ) { nG_app = k_G; n_iter = nG_app / k_fuse; n_left = nG_app % k_fuse; for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; gamma12_k2 = g12_k2->real; sigma12_k2 = g12_k2->imag; gamma23_k2 = g23_k2->real; sigma23_k2 = g23_k2->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero ); is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero ); has_ident = ( is_ident23_k1 || is_ident34_k1 || is_ident12_k2 || is_ident23_k2 ); if ( has_ident ) { // Apply to pairs of columns as needed. if ( !is_ident23_k1 ) MAC_Apply_G_mx2_asz( m_A, &gamma23_k1, &sigma23_k1, a2, 1, a3, 1 ); if ( !is_ident34_k1 ) MAC_Apply_G_mx2_asz( m_A, &gamma34_k1, &sigma34_k1, a3, 1, a4, 1 ); if ( !is_ident12_k2 ) MAC_Apply_G_mx2_asz( m_A, &gamma12_k2, &sigma12_k2, a1, 1, a2, 1 ); if ( !is_ident23_k2 ) MAC_Apply_G_mx2_asz( m_A, &gamma23_k2, &sigma23_k2, a2, 1, a3, 1 ); } else { // Apply to all four columns. MAC_Apply_G_mx4s_asz( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, &gamma12_k2, &sigma12_k2, &gamma23_k2, &sigma23_k2, a1, 1, a2, 1, a3, 1, a4, 1 ); } } if ( n_left == 1 ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); if ( !is_ident23_k1 && is_ident34_k1 ) { MAC_Apply_G_mx2_asz( m_A, &gamma23_k1, &sigma23_k1, a2, 1, a3, 1 ); } else if ( is_ident23_k1 && !is_ident34_k1 ) { MAC_Apply_G_mx2_asz( m_A, &gamma34_k1, &sigma34_k1, a3, 1, a4, 1 ); } else { MAC_Apply_G_mx3_asz( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, a2, 1, a3, 1, a4, 1 ); } } } // Shutdown stage for ( j = nG % n_fuse; j < k_G; j += n_fuse ) { g = nG - 1; k = j; //n_left = 1; //if ( n_left == 1 ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); if ( !is_ident23_k1 ) MAC_Apply_G_mx2_asz( m_A, &gamma23_k1, &sigma23_k1, a2, 1, a3, 1 ); ++k; --g; } nG_app = k_minus_1 - j; n_iter = nG_app / k_fuse; n_left = nG_app % k_fuse; for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; gamma12_k2 = g12_k2->real; sigma12_k2 = g12_k2->imag; gamma23_k2 = g23_k2->real; sigma23_k2 = g23_k2->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero ); is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero ); has_ident = ( is_ident23_k1 || is_ident34_k1 || is_ident12_k2 || is_ident23_k2 ); if ( has_ident ) { // Apply to pairs of columns as needed. if ( !is_ident23_k1 ) MAC_Apply_G_mx2_asz( m_A, &gamma23_k1, &sigma23_k1, a2, 1, a3, 1 ); if ( !is_ident34_k1 ) MAC_Apply_G_mx2_asz( m_A, &gamma34_k1, &sigma34_k1, a3, 1, a4, 1 ); if ( !is_ident12_k2 ) MAC_Apply_G_mx2_asz( m_A, &gamma12_k2, &sigma12_k2, a1, 1, a2, 1 ); if ( !is_ident23_k2 ) MAC_Apply_G_mx2_asz( m_A, &gamma23_k2, &sigma23_k2, a2, 1, a3, 1 ); } else { // Apply to all four columns. MAC_Apply_G_mx4s_asz( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, &gamma12_k2, &sigma12_k2, &gamma23_k2, &sigma23_k2, a1, 1, a2, 1, a3, 1, a4, 1 ); } } if ( n_left == 1 ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); if ( !is_ident23_k1 && is_ident34_k1 ) { MAC_Apply_G_mx2_asz( m_A, &gamma23_k1, &sigma23_k1, a2, 1, a3, 1 ); } else if ( is_ident23_k1 && !is_ident34_k1 ) { MAC_Apply_G_mx2_asz( m_A, &gamma34_k1, &sigma34_k1, a3, 1, a4, 1 ); } else { MAC_Apply_G_mx3_asz( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, a2, 1, a3, 1, a4, 1 ); } } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_asz_var3b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
int | iTL, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
Referenced by FLA_Apply_G_rf_asm_var3b().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
FLA_Error FLA_Apply_G_rf_asz_var4 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_asz_var5 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_asz_var5b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
int | iTL, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_asz_var6 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_d0(), bli_d1(), FLA_Apply_G_rf_asz_var1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_asm_var6(), and FLA_Apply_G_rf_blz_var6().
{ double one = bli_d1(); double zero = bli_d0(); double gamma12; double sigma12; double gamma23; double sigma23; dcomplex* a1; dcomplex* a2; dcomplex* a3; dcomplex* g12; dcomplex* g23; int i, j, g, k; int nG, nG_app; int n_iter; int n_left; int k_minus_1; int n_fuse; int is_ident12, is_ident23; k_minus_1 = k_G - 1; nG = n_A - 1; n_fuse = 2; // Use the simple variant for nG < (k - 1) or k == 1. if ( nG < k_minus_1 || k_G == 1 ) { FLA_Apply_G_rf_asz_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); return FLA_SUCCESS; } // Start-up phase. for ( j = 0; j < k_minus_1; ++j ) { nG_app = j + 1; n_iter = nG_app / n_fuse; n_left = nG_app % n_fuse; //for ( k = 0, g = nG_app - 1; k < nG_app; k += n_fuse, g -= n_fuse ) for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse ) { g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_asz( m_A, &gamma12, &sigma12, a1, 1, a2, 1 ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_asz( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3b_asz( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, 1, a2, 1, a3, 1 ); } } //for ( k = 0; k < n_left; k += 1, g -= 1 ) if ( n_left == 1 ) { g23 = buff_G + (g )*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma23 = g23->real; sigma23 = g23->imag; is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident23 ) MAC_Apply_G_mx2_asz( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } } // Pipeline stage for ( j = k_minus_1; j < nG; ++j ) { nG_app = k_G; n_iter = nG_app / n_fuse; n_left = nG_app % n_fuse; for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse ) { g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_asz( m_A, &gamma12, &sigma12, a1, 1, a2, 1 ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_asz( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3b_asz( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, 1, a2, 1, a3, 1 ); } } //for ( k = 0; k < n_left; k += 1, g -= 1 ) if ( n_left == 1 ) { g23 = buff_G + (g )*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma23 = g23->real; sigma23 = g23->imag; is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident23 ) MAC_Apply_G_mx2_asz( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } } // Shutdown stage for ( j = 1; j < k_G; ++j ) { nG_app = k_G - j; n_iter = nG_app / n_fuse; n_left = nG_app % n_fuse; for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse ) { g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_asz( m_A, &gamma12, &sigma12, a1, 1, a2, 1 ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_asz( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3b_asz( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, 1, a2, 1, a3, 1 ); } } //for ( k = 0; k < nG_app_left; k += 1, g -= 1 ) if ( n_left == 1 ) { g23 = buff_G + (g )*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma23 = g23->real; sigma23 = g23->imag; is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident23 ) MAC_Apply_G_mx2_asz( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_asz_var6b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
int | iTL, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
Referenced by FLA_Apply_G_rf_asm_var6b().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
FLA_Error FLA_Apply_G_rf_asz_var7 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_asz_var8 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_asz_var8b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
int | iTL, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_asz_var9 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_d0(), bli_d1(), FLA_Apply_G_rf_asz_var1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_asm_var9(), and FLA_Apply_G_rf_blz_var9().
{ double one = bli_d1(); double zero = bli_d0(); double gamma12; double sigma12; double gamma23; double sigma23; dcomplex* a1; dcomplex* a2; dcomplex* a3; dcomplex* g12; dcomplex* g23; int i, j, g, k; int nG, nG_app; int n_iter; int n_left; int k_minus_1; int n_fuse; int is_ident12, is_ident23; k_minus_1 = k_G - 1; nG = n_A - 1; n_fuse = 2; // Use the simple variant for nG < (k - 1) or k == 1. if ( nG < 2*k_minus_1 || k_G == 1 ) { FLA_Apply_G_rf_asz_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); return FLA_SUCCESS; } // Start-up phase. for ( j = -1; j < k_minus_1; j += n_fuse ) { nG_app = j + 1; n_iter = nG_app; n_left = 1; for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g ) { g12 = buff_G + (g )*rs_G + (k )*cs_G; g23 = buff_G + (g + 1)*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; a3 = buff_A + (g + 2)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_asz( m_A, &gamma12, &sigma12, a1, 1, a2, 1 ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_asz( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3_asz( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, 1, a2, 1, a3, 1 ); } } if ( n_left == 1 ) { g23 = buff_G + (g + 1)*rs_G + (k )*cs_G; a2 = buff_A + (g + 1)*cs_A; a3 = buff_A + (g + 2)*cs_A; gamma23 = g23->real; sigma23 = g23->imag; is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident23 ) MAC_Apply_G_mx2_asz( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } } // Pipeline stage for ( ; j < nG - 1; j += n_fuse ) { nG_app = k_G; n_iter = nG_app; n_left = 0; for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g ) { g12 = buff_G + (g )*rs_G + (k )*cs_G; g23 = buff_G + (g + 1)*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; a3 = buff_A + (g + 2)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_asz( m_A, &gamma12, &sigma12, a1, 1, a2, 1 ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_asz( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3_asz( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, 1, a2, 1, a3, 1 ); } } } // Shutdown stage for ( j = nG % n_fuse; j < k_G; j += n_fuse ) { g = nG - 1; k = j; n_left = 1; if ( n_left == 1 ) { g12 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); if ( !is_ident12 ) MAC_Apply_G_mx2_asz( m_A, &gamma12, &sigma12, a1, 1, a2, 1 ); ++k; --g; } nG_app = k_minus_1 - j; n_iter = nG_app; for ( i = 0; i < n_iter; ++i, ++k, --g ) { g12 = buff_G + (g )*rs_G + (k )*cs_G; g23 = buff_G + (g + 1)*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; a3 = buff_A + (g + 2)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_asz( m_A, &gamma12, &sigma12, a1, 1, a2, 1 ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_asz( m_A, &gamma23, &sigma23, a2, 1, a3, 1 ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3_asz( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, 1, a2, 1, a3, 1 ); } } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_asz_var9b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
int | iTL, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
Referenced by FLA_Apply_G_rf_asm_var9b().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
FLA_Error FLA_Apply_G_rf_bhc_var3 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
FLA_Error FLA_Apply_G_rf_bhd_var3 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
FLA_Error FLA_Apply_G_rf_bhs_var3 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
FLA_Error FLA_Apply_G_rf_bhz_var3 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
FLA_Obj * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
FLA_Error FLA_Apply_G_rf_blc_var1 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
References FLA_Apply_G_rf_asc_var1().
Referenced by FLA_Apply_G_rf_blk_var1().
{ int i; int b = 0; for ( i = 0; i < m_A; i += b ) { scomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A; int m_ahead = max( 0, m_A - i ); b = min( b_alg, m_ahead ); //FLA_Apply_G_rf_opc_var1( k_G, FLA_Apply_G_rf_asc_var1( k_G, b, n_A, buff_G, rs_G, cs_G, A1, rs_A, cs_A ); } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_blc_var2 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
References FLA_Apply_G_rf_asc_var2().
Referenced by FLA_Apply_G_rf_blk_var2().
{ int i; int b = 0; for ( i = 0; i < m_A; i += b ) { scomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A; int m_ahead = max( 0, m_A - i ); b = min( b_alg, m_ahead ); //FLA_Apply_G_rf_opc_var2( k_G, FLA_Apply_G_rf_asc_var2( k_G, b, n_A, buff_G, rs_G, cs_G, A1, rs_A, cs_A ); } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_blc_var3 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
References FLA_Apply_G_rf_asc_var3().
Referenced by FLA_Apply_G_rf_blk_var3().
{ int i; int b = 0; for ( i = 0; i < m_A; i += b ) { scomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A; int m_ahead = max( 0, m_A - i ); b = min( b_alg, m_ahead ); //FLA_Apply_G_rf_opc_var3( k_G, FLA_Apply_G_rf_asc_var3( k_G, b, n_A, buff_G, rs_G, cs_G, A1, rs_A, cs_A ); } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_blc_var3b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
Referenced by FLA_Apply_G_rf_blk_var3b().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
FLA_Error FLA_Apply_G_rf_blc_var4 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
FLA_Error FLA_Apply_G_rf_blc_var5 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
FLA_Error FLA_Apply_G_rf_blc_var5b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
FLA_Error FLA_Apply_G_rf_blc_var6 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
References FLA_Apply_G_rf_asc_var6().
Referenced by FLA_Apply_G_rf_blk_var6().
{ int i; int b = 0; for ( i = 0; i < m_A; i += b ) { scomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A; int m_ahead = max( 0, m_A - i ); b = min( b_alg, m_ahead ); //FLA_Apply_G_rf_opc_var6( k_G, FLA_Apply_G_rf_asc_var6( k_G, b, n_A, buff_G, rs_G, cs_G, A1, rs_A, cs_A ); } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_blc_var6b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
Referenced by FLA_Apply_G_rf_blk_var6b().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
FLA_Error FLA_Apply_G_rf_blc_var7 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
FLA_Error FLA_Apply_G_rf_blc_var8 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
FLA_Error FLA_Apply_G_rf_blc_var8b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
FLA_Error FLA_Apply_G_rf_blc_var9 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
References FLA_Apply_G_rf_asc_var9().
Referenced by FLA_Apply_G_rf_blk_var9().
{ int i; int b = 0; for ( i = 0; i < m_A; i += b ) { scomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A; int m_ahead = max( 0, m_A - i ); b = min( b_alg, m_ahead ); //FLA_Apply_G_rf_opc_var9( k_G, FLA_Apply_G_rf_asc_var9( k_G, b, n_A, buff_G, rs_G, cs_G, A1, rs_A, cs_A ); } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_blc_var9b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
Referenced by FLA_Apply_G_rf_blk_var9b().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
FLA_Error FLA_Apply_G_rf_bld_var1 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
References FLA_Apply_G_rf_asd_var1().
Referenced by FLA_Apply_G_rf_blk_var1().
{ int i; int b = 0; for ( i = 0; i < m_A; i += b ) { double* A1 = buff_A + (0 )*cs_A + (i )*rs_A; int m_ahead = max( 0, m_A - i ); b = min( b_alg, m_ahead ); //FLA_Apply_G_rf_opd_var1( k_G, FLA_Apply_G_rf_asd_var1( k_G, b, n_A, buff_G, rs_G, cs_G, A1, rs_A, cs_A ); } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_bld_var2 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
References FLA_Apply_G_rf_asd_var2().
Referenced by FLA_Apply_G_rf_blk_var2().
{ int i; int b = 0; for ( i = 0; i < m_A; i += b ) { double* A1 = buff_A + (0 )*cs_A + (i )*rs_A; int m_ahead = max( 0, m_A - i ); b = min( b_alg, m_ahead ); //FLA_Apply_G_rf_opd_var2( k_G, FLA_Apply_G_rf_asd_var2( k_G, b, n_A, buff_G, rs_G, cs_G, A1, rs_A, cs_A ); } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_bld_var3 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
References FLA_Apply_G_rf_asd_var3().
Referenced by FLA_Apply_G_rf_blk_var3(), FLA_Bsvd_v_opd_var1(), FLA_Tevd_v_opd_var1(), and FLA_Tevd_v_opd_var3().
{ int i; int b = 0; for ( i = 0; i < m_A; i += b ) { double* A1 = buff_A + (0 )*cs_A + (i )*rs_A; int m_ahead = max( 0, m_A - i ); b = min( b_alg, m_ahead ); //FLA_Apply_G_rf_opd_var3( k_G, FLA_Apply_G_rf_asd_var3( k_G, b, n_A, buff_G, rs_G, cs_G, A1, rs_A, cs_A ); } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_bld_var3b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
References FLA_Apply_G_rf_asd_var3b().
Referenced by FLA_Apply_G_rf_blk_var3b(), FLA_Bsvd_v_opd_var2(), FLA_Bsvd_v_opz_var2(), FLA_Tevd_v_opd_var2(), FLA_Tevd_v_opd_var4(), FLA_Tevd_v_opz_var2(), and FLA_Tevd_v_opz_var4().
{ int i; int b = 0; for ( i = 0; i < m_A; i += b ) { double* A1 = buff_A + (0 )*cs_A + (i )*rs_A; int m_behind = i; int m_ahead = max( 0, m_A - i ); b = min( b_alg, m_ahead ); //FLA_Apply_G_rf_opd_var3b( k_G, FLA_Apply_G_rf_asd_var3b( k_G, b, n_A, i_k, m_behind, buff_G, rs_G, cs_G, A1, rs_A, cs_A ); } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_bld_var4 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
FLA_Error FLA_Apply_G_rf_bld_var5 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
FLA_Error FLA_Apply_G_rf_bld_var5b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
FLA_Error FLA_Apply_G_rf_bld_var6 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
References FLA_Apply_G_rf_asd_var6().
Referenced by FLA_Apply_G_rf_blk_var6().
{ int i; int b = 0; for ( i = 0; i < m_A; i += b ) { double* A1 = buff_A + (0 )*cs_A + (i )*rs_A; int m_ahead = max( 0, m_A - i ); b = min( b_alg, m_ahead ); //FLA_Apply_G_rf_opd_var6( k_G, FLA_Apply_G_rf_asd_var6( k_G, b, n_A, buff_G, rs_G, cs_G, A1, rs_A, cs_A ); } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_bld_var6b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
References FLA_Apply_G_rf_asd_var6b().
Referenced by FLA_Apply_G_rf_blk_var6b().
{ int i; int b = 0; for ( i = 0; i < m_A; i += b ) { double* A1 = buff_A + (0 )*cs_A + (i )*rs_A; int m_behind = i; int m_ahead = max( 0, m_A - i ); b = min( b_alg, m_ahead ); //FLA_Apply_G_rf_opd_var6b( k_G, FLA_Apply_G_rf_asd_var6b( k_G, b, n_A, i_k, m_behind, buff_G, rs_G, cs_G, A1, rs_A, cs_A ); } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_bld_var7 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
FLA_Error FLA_Apply_G_rf_bld_var8 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
FLA_Error FLA_Apply_G_rf_bld_var8b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
FLA_Error FLA_Apply_G_rf_bld_var9 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
References FLA_Apply_G_rf_asd_var9().
Referenced by FLA_Apply_G_rf_blk_var9().
{ int i; int b = 0; for ( i = 0; i < m_A; i += b ) { double* A1 = buff_A + (0 )*cs_A + (i )*rs_A; int m_ahead = max( 0, m_A - i ); b = min( b_alg, m_ahead ); //FLA_Apply_G_rf_opd_var9( k_G, FLA_Apply_G_rf_asd_var9( k_G, b, n_A, buff_G, rs_G, cs_G, A1, rs_A, cs_A ); } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_bld_var9b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
References FLA_Apply_G_rf_asd_var9b().
Referenced by FLA_Apply_G_rf_blk_var9b().
{ int i; int b = 0; for ( i = 0; i < m_A; i += b ) { double* A1 = buff_A + (0 )*cs_A + (i )*rs_A; int m_behind = i; int m_ahead = max( 0, m_A - i ); b = min( b_alg, m_ahead ); //FLA_Apply_G_rf_opd_var9b( k_G, FLA_Apply_G_rf_asd_var9b( k_G, b, n_A, i_k, m_behind, buff_G, rs_G, cs_G, A1, rs_A, cs_A ); } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_blk_var1 | ( | FLA_Obj | G, |
FLA_Obj | A, | ||
dim_t | b_alg | ||
) |
References FLA_Apply_G_rf_blc_var1(), FLA_Apply_G_rf_bld_var1(), FLA_Apply_G_rf_bls_var1(), FLA_Apply_G_rf_blz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{ FLA_Datatype datatype; int k_G, m_A, n_A; int rs_G, cs_G; int rs_A, cs_A; datatype = FLA_Obj_datatype( A ); k_G = FLA_Obj_width( G ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); switch ( datatype ) { case FLA_FLOAT: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); float* buff_A = ( float* ) FLA_FLOAT_PTR( A ); FLA_Apply_G_rf_bls_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } case FLA_DOUBLE: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); double* buff_A = ( double* ) FLA_DOUBLE_PTR( A ); FLA_Apply_G_rf_bld_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } case FLA_COMPLEX: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A ); FLA_Apply_G_rf_blc_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A ); FLA_Apply_G_rf_blz_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_blk_var2 | ( | FLA_Obj | G, |
FLA_Obj | A, | ||
dim_t | b_alg | ||
) |
References FLA_Apply_G_rf_blc_var2(), FLA_Apply_G_rf_bld_var2(), FLA_Apply_G_rf_bls_var2(), FLA_Apply_G_rf_blz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{ FLA_Datatype datatype; int k_G, m_A, n_A; int rs_G, cs_G; int rs_A, cs_A; datatype = FLA_Obj_datatype( A ); k_G = FLA_Obj_width( G ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); switch ( datatype ) { case FLA_FLOAT: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); float* buff_A = ( float* ) FLA_FLOAT_PTR( A ); FLA_Apply_G_rf_bls_var2( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } case FLA_DOUBLE: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); double* buff_A = ( double* ) FLA_DOUBLE_PTR( A ); FLA_Apply_G_rf_bld_var2( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } case FLA_COMPLEX: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A ); FLA_Apply_G_rf_blc_var2( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A ); FLA_Apply_G_rf_blz_var2( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_blk_var3 | ( | FLA_Obj | G, |
FLA_Obj | A, | ||
dim_t | b_alg | ||
) |
References FLA_Apply_G_rf_blc_var3(), FLA_Apply_G_rf_bld_var3(), FLA_Apply_G_rf_bls_var3(), FLA_Apply_G_rf_blz_var3(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{ FLA_Datatype datatype; int k_G, m_A, n_A; int rs_G, cs_G; int rs_A, cs_A; datatype = FLA_Obj_datatype( A ); k_G = FLA_Obj_width( G ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); switch ( datatype ) { case FLA_FLOAT: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); float* buff_A = ( float* ) FLA_FLOAT_PTR( A ); FLA_Apply_G_rf_bls_var3( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } case FLA_DOUBLE: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); double* buff_A = ( double* ) FLA_DOUBLE_PTR( A ); FLA_Apply_G_rf_bld_var3( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } case FLA_COMPLEX: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A ); FLA_Apply_G_rf_blc_var3( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A ); FLA_Apply_G_rf_blz_var3( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_blk_var3b | ( | FLA_Obj | G, |
FLA_Obj | A, | ||
dim_t | b_alg | ||
) |
References FLA_Apply_G_rf_blc_var3b(), FLA_Apply_G_rf_bld_var3b(), FLA_Apply_G_rf_bls_var3b(), FLA_Apply_G_rf_blz_var3b(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{ FLA_Datatype datatype; int k_G, m_A, n_A; int rs_G, cs_G; int rs_A, cs_A; datatype = FLA_Obj_datatype( A ); k_G = FLA_Obj_width( G ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); switch ( datatype ) { case FLA_FLOAT: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); float* buff_A = ( float* ) FLA_FLOAT_PTR( A ); FLA_Apply_G_rf_bls_var3b( k_G, m_A, n_A, 0, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } case FLA_DOUBLE: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); double* buff_A = ( double* ) FLA_DOUBLE_PTR( A ); FLA_Apply_G_rf_bld_var3b( k_G, m_A, n_A, 0, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } case FLA_COMPLEX: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A ); FLA_Apply_G_rf_blc_var3b( k_G, m_A, n_A, 0, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A ); FLA_Apply_G_rf_blz_var3b( k_G, m_A, n_A, 0, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_blk_var4 | ( | FLA_Obj | G, |
FLA_Obj | A, | ||
dim_t | b_alg | ||
) |
FLA_Error FLA_Apply_G_rf_blk_var5 | ( | FLA_Obj | G, |
FLA_Obj | A, | ||
dim_t | b_alg | ||
) |
FLA_Error FLA_Apply_G_rf_blk_var5b | ( | FLA_Obj | G, |
FLA_Obj | A, | ||
dim_t | b_alg | ||
) |
FLA_Error FLA_Apply_G_rf_blk_var6 | ( | FLA_Obj | G, |
FLA_Obj | A, | ||
dim_t | b_alg | ||
) |
References FLA_Apply_G_rf_blc_var6(), FLA_Apply_G_rf_bld_var6(), FLA_Apply_G_rf_bls_var6(), FLA_Apply_G_rf_blz_var6(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{ FLA_Datatype datatype; int k_G, m_A, n_A; int rs_G, cs_G; int rs_A, cs_A; datatype = FLA_Obj_datatype( A ); k_G = FLA_Obj_width( G ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); switch ( datatype ) { case FLA_FLOAT: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); float* buff_A = ( float* ) FLA_FLOAT_PTR( A ); FLA_Apply_G_rf_bls_var6( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } case FLA_DOUBLE: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); double* buff_A = ( double* ) FLA_DOUBLE_PTR( A ); FLA_Apply_G_rf_bld_var6( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } case FLA_COMPLEX: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A ); FLA_Apply_G_rf_blc_var6( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A ); FLA_Apply_G_rf_blz_var6( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_blk_var6b | ( | FLA_Obj | G, |
FLA_Obj | A, | ||
dim_t | b_alg | ||
) |
References FLA_Apply_G_rf_blc_var6b(), FLA_Apply_G_rf_bld_var6b(), FLA_Apply_G_rf_bls_var6b(), FLA_Apply_G_rf_blz_var6b(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{ FLA_Datatype datatype; int k_G, m_A, n_A; int rs_G, cs_G; int rs_A, cs_A; datatype = FLA_Obj_datatype( A ); k_G = FLA_Obj_width( G ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); switch ( datatype ) { case FLA_FLOAT: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); float* buff_A = ( float* ) FLA_FLOAT_PTR( A ); FLA_Apply_G_rf_bls_var6b( k_G, m_A, n_A, 0, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } case FLA_DOUBLE: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); double* buff_A = ( double* ) FLA_DOUBLE_PTR( A ); FLA_Apply_G_rf_bld_var6b( k_G, m_A, n_A, 0, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } case FLA_COMPLEX: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A ); FLA_Apply_G_rf_blc_var6b( k_G, m_A, n_A, 0, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A ); FLA_Apply_G_rf_blz_var6b( k_G, m_A, n_A, 0, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_blk_var7 | ( | FLA_Obj | G, |
FLA_Obj | A, | ||
dim_t | b_alg | ||
) |
FLA_Error FLA_Apply_G_rf_blk_var8 | ( | FLA_Obj | G, |
FLA_Obj | A, | ||
dim_t | b_alg | ||
) |
FLA_Error FLA_Apply_G_rf_blk_var8b | ( | FLA_Obj | G, |
FLA_Obj | A, | ||
dim_t | b_alg | ||
) |
FLA_Error FLA_Apply_G_rf_blk_var9 | ( | FLA_Obj | G, |
FLA_Obj | A, | ||
dim_t | b_alg | ||
) |
References FLA_Apply_G_rf_blc_var9(), FLA_Apply_G_rf_bld_var9(), FLA_Apply_G_rf_bls_var9(), FLA_Apply_G_rf_blz_var9(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{ FLA_Datatype datatype; int k_G, m_A, n_A; int rs_G, cs_G; int rs_A, cs_A; datatype = FLA_Obj_datatype( A ); k_G = FLA_Obj_width( G ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); switch ( datatype ) { case FLA_FLOAT: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); float* buff_A = ( float* ) FLA_FLOAT_PTR( A ); FLA_Apply_G_rf_bls_var9( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } case FLA_DOUBLE: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); double* buff_A = ( double* ) FLA_DOUBLE_PTR( A ); FLA_Apply_G_rf_bld_var9( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } case FLA_COMPLEX: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A ); FLA_Apply_G_rf_blc_var9( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A ); FLA_Apply_G_rf_blz_var9( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_blk_var9b | ( | FLA_Obj | G, |
FLA_Obj | A, | ||
dim_t | b_alg | ||
) |
References FLA_Apply_G_rf_blc_var9b(), FLA_Apply_G_rf_bld_var9b(), FLA_Apply_G_rf_bls_var9b(), FLA_Apply_G_rf_blz_var9b(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{ FLA_Datatype datatype; int k_G, m_A, n_A; int rs_G, cs_G; int rs_A, cs_A; datatype = FLA_Obj_datatype( A ); k_G = FLA_Obj_width( G ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); switch ( datatype ) { case FLA_FLOAT: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); float* buff_A = ( float* ) FLA_FLOAT_PTR( A ); FLA_Apply_G_rf_bls_var9b( k_G, m_A, n_A, 0, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } case FLA_DOUBLE: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); double* buff_A = ( double* ) FLA_DOUBLE_PTR( A ); FLA_Apply_G_rf_bld_var9b( k_G, m_A, n_A, 0, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } case FLA_COMPLEX: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A ); FLA_Apply_G_rf_blc_var9b( k_G, m_A, n_A, 0, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A ); FLA_Apply_G_rf_blz_var9b( k_G, m_A, n_A, 0, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A, b_alg ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_bls_var1 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
References FLA_Apply_G_rf_ass_var1().
Referenced by FLA_Apply_G_rf_blk_var1().
{ int i; int b = 0; for ( i = 0; i < m_A; i += b ) { float* A1 = buff_A + (0 )*cs_A + (i )*rs_A; int m_ahead = max( 0, m_A - i ); b = min( b_alg, m_ahead ); //FLA_Apply_G_rf_ops_var1( k_G, FLA_Apply_G_rf_ass_var1( k_G, b, n_A, buff_G, rs_G, cs_G, A1, rs_A, cs_A ); } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_bls_var2 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
References FLA_Apply_G_rf_ass_var2().
Referenced by FLA_Apply_G_rf_blk_var2().
{ int i; int b = 0; for ( i = 0; i < m_A; i += b ) { float* A1 = buff_A + (0 )*cs_A + (i )*rs_A; int m_ahead = max( 0, m_A - i ); b = min( b_alg, m_ahead ); //FLA_Apply_G_rf_ops_var2( k_G, FLA_Apply_G_rf_ass_var2( k_G, b, n_A, buff_G, rs_G, cs_G, A1, rs_A, cs_A ); } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_bls_var3 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
References FLA_Apply_G_rf_ass_var3().
Referenced by FLA_Apply_G_rf_blk_var3().
{ int i; int b = 0; for ( i = 0; i < m_A; i += b ) { float* A1 = buff_A + (0 )*cs_A + (i )*rs_A; int m_ahead = max( 0, m_A - i ); b = min( b_alg, m_ahead ); //FLA_Apply_G_rf_ops_var3( k_G, FLA_Apply_G_rf_ass_var3( k_G, b, n_A, buff_G, rs_G, cs_G, A1, rs_A, cs_A ); } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_bls_var3b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
References FLA_Apply_G_rf_ass_var3b().
Referenced by FLA_Apply_G_rf_blk_var3b().
{ int i; int b = 0; for ( i = 0; i < m_A; i += b ) { float* A1 = buff_A + (0 )*cs_A + (i )*rs_A; int m_behind = i; int m_ahead = max( 0, m_A - i ); b = min( b_alg, m_ahead ); //FLA_Apply_G_rf_ops_var3b( k_G, FLA_Apply_G_rf_ass_var3b( k_G, b, n_A, i_k, m_behind, buff_G, rs_G, cs_G, A1, rs_A, cs_A ); } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_bls_var4 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
FLA_Error FLA_Apply_G_rf_bls_var5 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
FLA_Error FLA_Apply_G_rf_bls_var5b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
FLA_Error FLA_Apply_G_rf_bls_var6 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
References FLA_Apply_G_rf_ass_var6().
Referenced by FLA_Apply_G_rf_blk_var6().
{ int i; int b = 0; for ( i = 0; i < m_A; i += b ) { float* A1 = buff_A + (0 )*cs_A + (i )*rs_A; int m_ahead = max( 0, m_A - i ); b = min( b_alg, m_ahead ); //FLA_Apply_G_rf_ops_var6( k_G, FLA_Apply_G_rf_ass_var6( k_G, b, n_A, buff_G, rs_G, cs_G, A1, rs_A, cs_A ); } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_bls_var6b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
References FLA_Apply_G_rf_ass_var6b().
Referenced by FLA_Apply_G_rf_blk_var6b().
{ int i; int b = 0; for ( i = 0; i < m_A; i += b ) { float* A1 = buff_A + (0 )*cs_A + (i )*rs_A; int m_behind = i; int m_ahead = max( 0, m_A - i ); b = min( b_alg, m_ahead ); //FLA_Apply_G_rf_ops_var6b( k_G, FLA_Apply_G_rf_ass_var6b( k_G, b, n_A, i_k, m_behind, buff_G, rs_G, cs_G, A1, rs_A, cs_A ); } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_bls_var7 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
FLA_Error FLA_Apply_G_rf_bls_var8 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
FLA_Error FLA_Apply_G_rf_bls_var8b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
FLA_Error FLA_Apply_G_rf_bls_var9 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
References FLA_Apply_G_rf_ass_var9().
Referenced by FLA_Apply_G_rf_blk_var9().
{ int i; int b = 0; for ( i = 0; i < m_A; i += b ) { float* A1 = buff_A + (0 )*cs_A + (i )*rs_A; int m_ahead = max( 0, m_A - i ); b = min( b_alg, m_ahead ); //FLA_Apply_G_rf_ops_var9( k_G, FLA_Apply_G_rf_ass_var9( k_G, b, n_A, buff_G, rs_G, cs_G, A1, rs_A, cs_A ); } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_bls_var9b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
References FLA_Apply_G_rf_ass_var9b().
Referenced by FLA_Apply_G_rf_blk_var9b().
{ int i; int b = 0; for ( i = 0; i < m_A; i += b ) { float* A1 = buff_A + (0 )*cs_A + (i )*rs_A; int m_behind = i; int m_ahead = max( 0, m_A - i ); b = min( b_alg, m_ahead ); //FLA_Apply_G_rf_ops_var9b( k_G, FLA_Apply_G_rf_ass_var9b( k_G, b, n_A, i_k, m_behind, buff_G, rs_G, cs_G, A1, rs_A, cs_A ); } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_blz_var1 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
References FLA_Apply_G_rf_asz_var1().
Referenced by FLA_Apply_G_rf_blk_var1().
{ int i; int b = 0; for ( i = 0; i < m_A; i += b ) { dcomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A; int m_ahead = max( 0, m_A - i ); b = min( b_alg, m_ahead ); //FLA_Apply_G_rf_opz_var1( k_G, FLA_Apply_G_rf_asz_var1( k_G, b, n_A, buff_G, rs_G, cs_G, A1, rs_A, cs_A ); } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_blz_var2 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
References FLA_Apply_G_rf_asz_var2().
Referenced by FLA_Apply_G_rf_blk_var2().
{ int i; int b = 0; for ( i = 0; i < m_A; i += b ) { dcomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A; int m_ahead = max( 0, m_A - i ); b = min( b_alg, m_ahead ); //FLA_Apply_G_rf_opz_var2( k_G, FLA_Apply_G_rf_asz_var2( k_G, b, n_A, buff_G, rs_G, cs_G, A1, rs_A, cs_A ); } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_blz_var3 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
References FLA_Apply_G_rf_asz_var3().
Referenced by FLA_Apply_G_rf_blk_var3(), FLA_Bsvd_v_opz_var1(), FLA_Tevd_v_opz_var1(), and FLA_Tevd_v_opz_var3().
{ int i; int b = 0; for ( i = 0; i < m_A; i += b ) { dcomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A; int m_ahead = max( 0, m_A - i ); b = min( b_alg, m_ahead ); //FLA_Apply_G_rf_opz_var3( k_G, FLA_Apply_G_rf_asz_var3( k_G, b, n_A, buff_G, rs_G, cs_G, A1, rs_A, cs_A ); } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_blz_var3b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
Referenced by FLA_Apply_G_rf_blk_var3b().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
FLA_Error FLA_Apply_G_rf_blz_var4 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
FLA_Error FLA_Apply_G_rf_blz_var5 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
FLA_Error FLA_Apply_G_rf_blz_var5b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
FLA_Error FLA_Apply_G_rf_blz_var6 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
References FLA_Apply_G_rf_asz_var6().
Referenced by FLA_Apply_G_rf_blk_var6().
{ int i; int b = 0; for ( i = 0; i < m_A; i += b ) { dcomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A; int m_ahead = max( 0, m_A - i ); b = min( b_alg, m_ahead ); //FLA_Apply_G_rf_opz_var6( k_G, FLA_Apply_G_rf_asz_var6( k_G, b, n_A, buff_G, rs_G, cs_G, A1, rs_A, cs_A ); } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_blz_var6b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
Referenced by FLA_Apply_G_rf_blk_var6b().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
FLA_Error FLA_Apply_G_rf_blz_var7 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
FLA_Error FLA_Apply_G_rf_blz_var8 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
FLA_Error FLA_Apply_G_rf_blz_var8b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
FLA_Error FLA_Apply_G_rf_blz_var9 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
References FLA_Apply_G_rf_asz_var9().
Referenced by FLA_Apply_G_rf_blk_var9().
{ int i; int b = 0; for ( i = 0; i < m_A; i += b ) { dcomplex* A1 = buff_A + (0 )*cs_A + (i )*rs_A; int m_ahead = max( 0, m_A - i ); b = min( b_alg, m_ahead ); //FLA_Apply_G_rf_opz_var9( k_G, FLA_Apply_G_rf_asz_var9( k_G, b, n_A, buff_G, rs_G, cs_G, A1, rs_A, cs_A ); } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_blz_var9b | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
int | i_k, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A, | ||
int | b_alg | ||
) |
Referenced by FLA_Apply_G_rf_blk_var9b().
{
FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED );
return FLA_SUCCESS;
}
FLA_Error FLA_Apply_G_rf_opc_var1 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_s0(), bli_s1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_opc_var2(), FLA_Apply_G_rf_opc_var3(), FLA_Apply_G_rf_opc_var6(), FLA_Apply_G_rf_opc_var9(), and FLA_Apply_G_rf_opt_var1().
{ float one = bli_s1(); float zero = bli_s0(); int nG_app = n_A - 1; int l, j; float gamma; float sigma; scomplex* a1; scomplex* a2; scomplex* g1; scomplex* g11; g1 = buff_G; for ( l = 0; l < k_G; ++l ) { a1 = buff_A; a2 = buff_A + cs_A; g11 = g1; for ( j = 0; j < nG_app; ++j ) { gamma = g11->real; sigma = g11->imag; // Skip the current iteration if the rotation is identity. if ( gamma != one || sigma != zero ) { MAC_Apply_G_mx2_opc( m_A, &gamma, &sigma, a1, rs_A, a2, rs_A ); } a1 += cs_A; a2 += cs_A; g11 += rs_G; } g1 += cs_G; } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_opc_var2 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_s0(), bli_s1(), FLA_Apply_G_rf_opc_var1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_opt_var2().
{ float one = bli_s1(); float zero = bli_s0(); float gamma; float sigma; scomplex* a1; scomplex* a2; scomplex* g11; int j, g, k; int nG, nG_app; int k_minus_1; k_minus_1 = k_G - 1; nG = n_A - 1; // Use the simple variant for nG < 2(k - 1). if ( nG < k_minus_1 || k_G == 1 ) { FLA_Apply_G_rf_opc_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); return FLA_SUCCESS; } // Start-up phase. for ( j = 0; j < k_minus_1; ++j ) { nG_app = j + 1; for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g ) { g11 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; gamma = g11->real; sigma = g11->imag; // Skip the current iteration if the rotation is identity. if ( gamma == one && sigma == zero ) continue; MAC_Apply_G_mx2_opc( m_A, &gamma, &sigma, a1, rs_A, a2, rs_A ); } } // Pipeline stage for ( j = k_minus_1; j < nG; ++j ) { nG_app = k_G; for ( k = 0, g = j; k < nG_app; ++k, --g ) { g11 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; gamma = g11->real; sigma = g11->imag; // Skip the current iteration if the rotation is identity. if ( gamma == one && sigma == zero ) continue; MAC_Apply_G_mx2_opc( m_A, &gamma, &sigma, a1, rs_A, a2, rs_A ); } } // Shutdown stage for ( j = nG - k_minus_1; j < nG; ++j ) { nG_app = nG - j; for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g ) { g11 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; gamma = g11->real; sigma = g11->imag; // Skip the current iteration if the rotation is identity. if ( gamma == one && sigma == zero ) continue; MAC_Apply_G_mx2_opc( m_A, &gamma, &sigma, a1, rs_A, a2, rs_A ); } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_opc_var3 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_s0(), bli_s1(), FLA_Apply_G_rf_opc_var1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_opt_var3().
{ float one = bli_s1(); float zero = bli_s0(); float gamma23_k1; float sigma23_k1; float gamma34_k1; float sigma34_k1; float gamma12_k2; float sigma12_k2; float gamma23_k2; float sigma23_k2; scomplex* a1; scomplex* a2; scomplex* a3; scomplex* a4; scomplex* g23_k1; scomplex* g34_k1; scomplex* g12_k2; scomplex* g23_k2; int i, j, g, k; int nG, nG_app; int n_iter; int n_left; int k_minus_1; int n_fuse; int k_fuse; int is_ident23_k1, is_ident34_k1; int is_ident12_k2, is_ident23_k2; int has_ident; k_minus_1 = k_G - 1; nG = n_A - 1; n_fuse = 2; k_fuse = 2; // Use the simple variant for nG < (k - 1) or k == 1. if ( nG < 2*k_minus_1 || k_G == 1 ) { FLA_Apply_G_rf_opc_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); return FLA_SUCCESS; } // Start-up phase. for ( j = -1; j < k_minus_1; j += n_fuse ) { nG_app = j + 2; n_iter = nG_app / k_fuse; n_left = 1; for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; gamma12_k2 = g12_k2->real; sigma12_k2 = g12_k2->imag; gamma23_k2 = g23_k2->real; sigma23_k2 = g23_k2->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero ); is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero ); has_ident = ( is_ident23_k1 || is_ident34_k1 || is_ident12_k2 || is_ident23_k2 ); if ( has_ident ) { // Apply to pairs of columns as needed. if ( !is_ident23_k1 ) MAC_Apply_G_mx2_opc( m_A, &gamma23_k1, &sigma23_k1, a2, rs_A, a3, rs_A ); if ( !is_ident34_k1 ) MAC_Apply_G_mx2_opc( m_A, &gamma34_k1, &sigma34_k1, a3, rs_A, a4, rs_A ); if ( !is_ident12_k2 ) MAC_Apply_G_mx2_opc( m_A, &gamma12_k2, &sigma12_k2, a1, rs_A, a2, rs_A ); if ( !is_ident23_k2 ) MAC_Apply_G_mx2_opc( m_A, &gamma23_k2, &sigma23_k2, a2, rs_A, a3, rs_A ); } else { // Apply to all four columns. MAC_Apply_G_mx4s_opc( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, &gamma12_k2, &sigma12_k2, &gamma23_k2, &sigma23_k2, a1, rs_A, a2, rs_A, a3, rs_A, a4, rs_A ); } } if ( n_left == 1 ) { g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); if ( !is_ident34_k1 ) MAC_Apply_G_mx2_opc( m_A, &gamma34_k1, &sigma34_k1, a3, rs_A, a4, rs_A ); } } // Pipeline stage for ( ; j < nG - 1; j += n_fuse ) { nG_app = k_G; n_iter = nG_app / k_fuse; n_left = nG_app % k_fuse; for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; gamma12_k2 = g12_k2->real; sigma12_k2 = g12_k2->imag; gamma23_k2 = g23_k2->real; sigma23_k2 = g23_k2->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero ); is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero ); has_ident = ( is_ident23_k1 || is_ident34_k1 || is_ident12_k2 || is_ident23_k2 ); if ( has_ident ) { // Apply to pairs of columns as needed. if ( !is_ident23_k1 ) MAC_Apply_G_mx2_opc( m_A, &gamma23_k1, &sigma23_k1, a2, rs_A, a3, rs_A ); if ( !is_ident34_k1 ) MAC_Apply_G_mx2_opc( m_A, &gamma34_k1, &sigma34_k1, a3, rs_A, a4, rs_A ); if ( !is_ident12_k2 ) MAC_Apply_G_mx2_opc( m_A, &gamma12_k2, &sigma12_k2, a1, rs_A, a2, rs_A ); if ( !is_ident23_k2 ) MAC_Apply_G_mx2_opc( m_A, &gamma23_k2, &sigma23_k2, a2, rs_A, a3, rs_A ); } else { // Apply to all four columns. MAC_Apply_G_mx4s_opc( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, &gamma12_k2, &sigma12_k2, &gamma23_k2, &sigma23_k2, a1, rs_A, a2, rs_A, a3, rs_A, a4, rs_A ); } } if ( n_left == 1 ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); if ( !is_ident23_k1 && is_ident34_k1 ) { MAC_Apply_G_mx2_opc( m_A, &gamma23_k1, &sigma23_k1, a2, rs_A, a3, rs_A ); } else if ( is_ident23_k1 && !is_ident34_k1 ) { MAC_Apply_G_mx2_opc( m_A, &gamma34_k1, &sigma34_k1, a3, rs_A, a4, rs_A ); } else { MAC_Apply_G_mx3_opc( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, a2, rs_A, a3, rs_A, a4, rs_A ); } } } // Shutdown stage for ( j = nG % n_fuse; j < k_G; j += n_fuse ) { g = nG - 1; k = j; //n_left = 1; //if ( n_left == 1 ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); if ( !is_ident23_k1 ) MAC_Apply_G_mx2_opc( m_A, &gamma23_k1, &sigma23_k1, a2, rs_A, a3, rs_A ); ++k; --g; } nG_app = k_minus_1 - j; n_iter = nG_app / k_fuse; n_left = nG_app % k_fuse; for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; gamma12_k2 = g12_k2->real; sigma12_k2 = g12_k2->imag; gamma23_k2 = g23_k2->real; sigma23_k2 = g23_k2->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero ); is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero ); has_ident = ( is_ident23_k1 || is_ident34_k1 || is_ident12_k2 || is_ident23_k2 ); if ( has_ident ) { // Apply to pairs of columns as needed. if ( !is_ident23_k1 ) MAC_Apply_G_mx2_opc( m_A, &gamma23_k1, &sigma23_k1, a2, rs_A, a3, rs_A ); if ( !is_ident34_k1 ) MAC_Apply_G_mx2_opc( m_A, &gamma34_k1, &sigma34_k1, a3, rs_A, a4, rs_A ); if ( !is_ident12_k2 ) MAC_Apply_G_mx2_opc( m_A, &gamma12_k2, &sigma12_k2, a1, rs_A, a2, rs_A ); if ( !is_ident23_k2 ) MAC_Apply_G_mx2_opc( m_A, &gamma23_k2, &sigma23_k2, a2, rs_A, a3, rs_A ); } else { // Apply to all four columns. MAC_Apply_G_mx4s_opc( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, &gamma12_k2, &sigma12_k2, &gamma23_k2, &sigma23_k2, a1, rs_A, a2, rs_A, a3, rs_A, a4, rs_A ); } } if ( n_left == 1 ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); if ( !is_ident23_k1 && is_ident34_k1 ) { MAC_Apply_G_mx2_opc( m_A, &gamma23_k1, &sigma23_k1, a2, rs_A, a3, rs_A ); } else if ( is_ident23_k1 && !is_ident34_k1 ) { MAC_Apply_G_mx2_opc( m_A, &gamma34_k1, &sigma34_k1, a3, rs_A, a4, rs_A ); } else { MAC_Apply_G_mx3_opc( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, a2, rs_A, a3, rs_A, a4, rs_A ); } } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_opc_var4 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_opc_var5 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_opc_var6 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_s0(), bli_s1(), FLA_Apply_G_rf_opc_var1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_opt_var6().
{ float one = bli_s1(); float zero = bli_s0(); float gamma12; float sigma12; float gamma23; float sigma23; scomplex* a1; scomplex* a2; scomplex* a3; scomplex* g12; scomplex* g23; int i, j, g, k; int nG, nG_app; int n_iter; int n_left; int k_minus_1; int n_fuse; int is_ident12, is_ident23; k_minus_1 = k_G - 1; nG = n_A - 1; n_fuse = 2; // Use the simple variant for nG < (k - 1) or k == 1. if ( nG < k_minus_1 || k_G == 1 ) { FLA_Apply_G_rf_opc_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); return FLA_SUCCESS; } // Start-up phase. for ( j = 0; j < k_minus_1; ++j ) { nG_app = j + 1; n_iter = nG_app / n_fuse; n_left = nG_app % n_fuse; for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse ) { g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_opc( m_A, &gamma12, &sigma12, a1, rs_A, a2, rs_A ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_opc( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3b_opc( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, rs_A, a2, rs_A, a3, rs_A ); } } //for ( k = 0; k < n_left; k += 1, g -= 1 ) if ( n_left == 1 ) { g23 = buff_G + (g )*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma23 = g23->real; sigma23 = g23->imag; is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident23 ) MAC_Apply_G_mx2_opc( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } } // Pipeline stage for ( j = k_minus_1; j < nG; ++j ) { nG_app = k_G; n_iter = nG_app / n_fuse; n_left = nG_app % n_fuse; for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse ) { g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_opc( m_A, &gamma12, &sigma12, a1, rs_A, a2, rs_A ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_opc( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3b_opc( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, rs_A, a2, rs_A, a3, rs_A ); } } //for ( k = 0; k < n_left; k += 1, g -= 1 ) if ( n_left == 1 ) { g23 = buff_G + (g )*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma23 = g23->real; sigma23 = g23->imag; is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident23 ) MAC_Apply_G_mx2_opc( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } } // Shutdown stage for ( j = 1; j < k_G; ++j ) { nG_app = k_G - j; n_iter = nG_app / n_fuse; n_left = nG_app % n_fuse; for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse ) { g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_opc( m_A, &gamma12, &sigma12, a1, rs_A, a2, rs_A ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_opc( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3b_opc( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, rs_A, a2, rs_A, a3, rs_A ); } } //for ( k = 0; k < nG_app_left; k += 1, g -= 1 ) if ( n_left == 1 ) { g23 = buff_G + (g )*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma23 = g23->real; sigma23 = g23->imag; is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident23 ) MAC_Apply_G_mx2_opc( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_opc_var7 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_opc_var8 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_opc_var9 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
scomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_s0(), bli_s1(), FLA_Apply_G_rf_opc_var1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_opt_var9().
{ float one = bli_s1(); float zero = bli_s0(); float gamma12; float sigma12; float gamma23; float sigma23; scomplex* a1; scomplex* a2; scomplex* a3; scomplex* g12; scomplex* g23; int i, j, g, k; int nG, nG_app; int n_iter; int n_left; int k_minus_1; int n_fuse; int is_ident12, is_ident23; k_minus_1 = k_G - 1; nG = n_A - 1; n_fuse = 2; // Use the simple variant for nG < (k - 1) or k == 1. if ( nG < 2*k_minus_1 || k_G == 1 ) { FLA_Apply_G_rf_opc_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); return FLA_SUCCESS; } // Start-up phase. for ( j = -1; j < k_minus_1; j += n_fuse ) { nG_app = j + 1; n_iter = nG_app; n_left = 1; for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g ) { g12 = buff_G + (g )*rs_G + (k )*cs_G; g23 = buff_G + (g + 1)*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; a3 = buff_A + (g + 2)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_opc( m_A, &gamma12, &sigma12, a1, rs_A, a2, rs_A ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_opc( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3_opc( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, rs_A, a2, rs_A, a3, rs_A ); } } if ( n_left == 1 ) { g23 = buff_G + (g + 1)*rs_G + (k )*cs_G; a2 = buff_A + (g + 1)*cs_A; a3 = buff_A + (g + 2)*cs_A; gamma23 = g23->real; sigma23 = g23->imag; is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident23 ) MAC_Apply_G_mx2_opc( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } } // Pipeline stage for ( ; j < nG - 1; j += n_fuse ) { nG_app = k_G; n_iter = nG_app; n_left = 0; for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g ) { g12 = buff_G + (g )*rs_G + (k )*cs_G; g23 = buff_G + (g + 1)*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; a3 = buff_A + (g + 2)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_opc( m_A, &gamma12, &sigma12, a1, rs_A, a2, rs_A ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_opc( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3_opc( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, rs_A, a2, rs_A, a3, rs_A ); } } } // Shutdown stage for ( j = nG % n_fuse; j < k_G; j += n_fuse ) { g = nG - 1; k = j; n_left = 1; if ( n_left == 1 ) { g12 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); if ( !is_ident12 ) MAC_Apply_G_mx2_opc( m_A, &gamma12, &sigma12, a1, rs_A, a2, rs_A ); ++k; --g; } nG_app = k_minus_1 - j; n_iter = nG_app; for ( i = 0; i < n_iter; ++i, ++k, --g ) { g12 = buff_G + (g )*rs_G + (k )*cs_G; g23 = buff_G + (g + 1)*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; a3 = buff_A + (g + 2)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_opc( m_A, &gamma12, &sigma12, a1, rs_A, a2, rs_A ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_opc( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3_opc( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, rs_A, a2, rs_A, a3, rs_A ); } } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_opd_var1 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_d0(), bli_d1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_opd_var2(), FLA_Apply_G_rf_opd_var3(), FLA_Apply_G_rf_opd_var6(), FLA_Apply_G_rf_opd_var9(), and FLA_Apply_G_rf_opt_var1().
{ double one = bli_d1(); double zero = bli_d0(); int nG_app = n_A - 1; int l, j; double gamma; double sigma; double* a1; double* a2; dcomplex* g1; dcomplex* g11; g1 = buff_G; for ( l = 0; l < k_G; ++l ) { a1 = buff_A; a2 = buff_A + cs_A; g11 = g1; for ( j = 0; j < nG_app; ++j ) { gamma = g11->real; sigma = g11->imag; // Skip the current iteration if the rotation is identity. if ( gamma != one || sigma != zero ) { MAC_Apply_G_mx2_opd( m_A, &gamma, &sigma, a1, rs_A, a2, rs_A ); } a1 += cs_A; a2 += cs_A; g11 += rs_G; } g1 += cs_G; } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_opd_var2 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_d0(), bli_d1(), FLA_Apply_G_rf_opd_var1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_opt_var2().
{ double one = bli_d1(); double zero = bli_d0(); double gamma; double sigma; double* a1; double* a2; dcomplex* g11; int j, g, k; int nG, nG_app; int k_minus_1; k_minus_1 = k_G - 1; nG = n_A - 1; // Use the simple variant for nG < 2(k - 1). if ( nG < k_minus_1 || k_G == 1 ) { FLA_Apply_G_rf_opd_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); return FLA_SUCCESS; } // Start-up phase. for ( j = 0; j < k_minus_1; ++j ) { nG_app = j + 1; for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g ) { g11 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; gamma = g11->real; sigma = g11->imag; // Skip the current iteration if the rotation is identity. if ( gamma == one && sigma == zero ) continue; MAC_Apply_G_mx2_opd( m_A, &gamma, &sigma, a1, rs_A, a2, rs_A ); } } // Pipeline stage for ( j = k_minus_1; j < nG; ++j ) { nG_app = k_G; for ( k = 0, g = j; k < nG_app; ++k, --g ) { g11 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; gamma = g11->real; sigma = g11->imag; // Skip the current iteration if the rotation is identity. if ( gamma == one && sigma == zero ) continue; MAC_Apply_G_mx2_opd( m_A, &gamma, &sigma, a1, rs_A, a2, rs_A ); } } // Shutdown stage for ( j = nG - k_minus_1; j < nG; ++j ) { nG_app = nG - j; for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g ) { g11 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; gamma = g11->real; sigma = g11->imag; // Skip the current iteration if the rotation is identity. if ( gamma == one && sigma == zero ) continue; MAC_Apply_G_mx2_opd( m_A, &gamma, &sigma, a1, rs_A, a2, rs_A ); } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_opd_var3 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_d0(), bli_d1(), FLA_Apply_G_rf_opd_var1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_opt_var3().
{ double one = bli_d1(); double zero = bli_d0(); double gamma23_k1; double sigma23_k1; double gamma34_k1; double sigma34_k1; double gamma12_k2; double sigma12_k2; double gamma23_k2; double sigma23_k2; double* a1; double* a2; double* a3; double* a4; dcomplex* g23_k1; dcomplex* g34_k1; dcomplex* g12_k2; dcomplex* g23_k2; int i, j, g, k; int nG, nG_app; int n_iter; int n_left; int k_minus_1; int n_fuse; int k_fuse; int is_ident23_k1, is_ident34_k1; int is_ident12_k2, is_ident23_k2; int has_ident; k_minus_1 = k_G - 1; nG = n_A - 1; n_fuse = 2; k_fuse = 2; // Use the simple variant for nG < (k - 1) or k == 1. if ( nG < 2*k_minus_1 || k_G == 1 ) { FLA_Apply_G_rf_opd_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); return FLA_SUCCESS; } // Start-up phase. for ( j = -1; j < k_minus_1; j += n_fuse ) { nG_app = j + 2; n_iter = nG_app / k_fuse; n_left = 1; for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; gamma12_k2 = g12_k2->real; sigma12_k2 = g12_k2->imag; gamma23_k2 = g23_k2->real; sigma23_k2 = g23_k2->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero ); is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero ); has_ident = ( is_ident23_k1 || is_ident34_k1 || is_ident12_k2 || is_ident23_k2 ); if ( has_ident ) { // Apply to pairs of columns as needed. if ( !is_ident23_k1 ) MAC_Apply_G_mx2_opd( m_A, &gamma23_k1, &sigma23_k1, a2, rs_A, a3, rs_A ); if ( !is_ident34_k1 ) MAC_Apply_G_mx2_opd( m_A, &gamma34_k1, &sigma34_k1, a3, rs_A, a4, rs_A ); if ( !is_ident12_k2 ) MAC_Apply_G_mx2_opd( m_A, &gamma12_k2, &sigma12_k2, a1, rs_A, a2, rs_A ); if ( !is_ident23_k2 ) MAC_Apply_G_mx2_opd( m_A, &gamma23_k2, &sigma23_k2, a2, rs_A, a3, rs_A ); } else { // Apply to all four columns. MAC_Apply_G_mx4s_opd( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, &gamma12_k2, &sigma12_k2, &gamma23_k2, &sigma23_k2, a1, rs_A, a2, rs_A, a3, rs_A, a4, rs_A ); } } if ( n_left == 1 ) { g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); if ( !is_ident34_k1 ) MAC_Apply_G_mx2_opd( m_A, &gamma34_k1, &sigma34_k1, a3, rs_A, a4, rs_A ); } } // Pipeline stage for ( ; j < nG - 1; j += n_fuse ) { nG_app = k_G; n_iter = nG_app / k_fuse; n_left = nG_app % k_fuse; for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; gamma12_k2 = g12_k2->real; sigma12_k2 = g12_k2->imag; gamma23_k2 = g23_k2->real; sigma23_k2 = g23_k2->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero ); is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero ); has_ident = ( is_ident23_k1 || is_ident34_k1 || is_ident12_k2 || is_ident23_k2 ); if ( has_ident ) { // Apply to pairs of columns as needed. if ( !is_ident23_k1 ) MAC_Apply_G_mx2_opd( m_A, &gamma23_k1, &sigma23_k1, a2, rs_A, a3, rs_A ); if ( !is_ident34_k1 ) MAC_Apply_G_mx2_opd( m_A, &gamma34_k1, &sigma34_k1, a3, rs_A, a4, rs_A ); if ( !is_ident12_k2 ) MAC_Apply_G_mx2_opd( m_A, &gamma12_k2, &sigma12_k2, a1, rs_A, a2, rs_A ); if ( !is_ident23_k2 ) MAC_Apply_G_mx2_opd( m_A, &gamma23_k2, &sigma23_k2, a2, rs_A, a3, rs_A ); } else { // Apply to all four columns. MAC_Apply_G_mx4s_opd( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, &gamma12_k2, &sigma12_k2, &gamma23_k2, &sigma23_k2, a1, rs_A, a2, rs_A, a3, rs_A, a4, rs_A ); } } if ( n_left == 1 ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); if ( !is_ident23_k1 && is_ident34_k1 ) { MAC_Apply_G_mx2_opd( m_A, &gamma23_k1, &sigma23_k1, a2, rs_A, a3, rs_A ); } else if ( is_ident23_k1 && !is_ident34_k1 ) { MAC_Apply_G_mx2_opd( m_A, &gamma34_k1, &sigma34_k1, a3, rs_A, a4, rs_A ); } else { MAC_Apply_G_mx3_opd( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, a2, rs_A, a3, rs_A, a4, rs_A ); } } } // Shutdown stage for ( j = nG % n_fuse; j < k_G; j += n_fuse ) { g = nG - 1; k = j; //n_left = 1; //if ( n_left == 1 ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); if ( !is_ident23_k1 ) MAC_Apply_G_mx2_opd( m_A, &gamma23_k1, &sigma23_k1, a2, rs_A, a3, rs_A ); ++k; --g; } nG_app = k_minus_1 - j; n_iter = nG_app / k_fuse; n_left = nG_app % k_fuse; for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; gamma12_k2 = g12_k2->real; sigma12_k2 = g12_k2->imag; gamma23_k2 = g23_k2->real; sigma23_k2 = g23_k2->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero ); is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero ); has_ident = ( is_ident23_k1 || is_ident34_k1 || is_ident12_k2 || is_ident23_k2 ); if ( has_ident ) { // Apply to pairs of columns as needed. if ( !is_ident23_k1 ) MAC_Apply_G_mx2_opd( m_A, &gamma23_k1, &sigma23_k1, a2, rs_A, a3, rs_A ); if ( !is_ident34_k1 ) MAC_Apply_G_mx2_opd( m_A, &gamma34_k1, &sigma34_k1, a3, rs_A, a4, rs_A ); if ( !is_ident12_k2 ) MAC_Apply_G_mx2_opd( m_A, &gamma12_k2, &sigma12_k2, a1, rs_A, a2, rs_A ); if ( !is_ident23_k2 ) MAC_Apply_G_mx2_opd( m_A, &gamma23_k2, &sigma23_k2, a2, rs_A, a3, rs_A ); } else { // Apply to all four columns. MAC_Apply_G_mx4s_opd( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, &gamma12_k2, &sigma12_k2, &gamma23_k2, &sigma23_k2, a1, rs_A, a2, rs_A, a3, rs_A, a4, rs_A ); } } if ( n_left == 1 ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); if ( !is_ident23_k1 && is_ident34_k1 ) { MAC_Apply_G_mx2_opd( m_A, &gamma23_k1, &sigma23_k1, a2, rs_A, a3, rs_A ); } else if ( is_ident23_k1 && !is_ident34_k1 ) { MAC_Apply_G_mx2_opd( m_A, &gamma34_k1, &sigma34_k1, a3, rs_A, a4, rs_A ); } else { MAC_Apply_G_mx3_opd( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, a2, rs_A, a3, rs_A, a4, rs_A ); } } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_opd_var4 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_opd_var5 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_opd_var6 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_d0(), bli_d1(), FLA_Apply_G_rf_opd_var1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_opt_var6().
{ double one = bli_d1(); double zero = bli_d0(); double gamma12; double sigma12; double gamma23; double sigma23; double* a1; double* a2; double* a3; dcomplex* g12; dcomplex* g23; int i, j, g, k; int nG, nG_app; int n_iter; int n_left; int k_minus_1; int n_fuse; int is_ident12, is_ident23; k_minus_1 = k_G - 1; nG = n_A - 1; n_fuse = 2; // Use the simple variant for nG < (k - 1) or k == 1. if ( nG < k_minus_1 || k_G == 1 ) { FLA_Apply_G_rf_opd_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); return FLA_SUCCESS; } // Start-up phase. for ( j = 0; j < k_minus_1; ++j ) { nG_app = j + 1; n_iter = nG_app / n_fuse; n_left = nG_app % n_fuse; for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse ) { g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_opd( m_A, &gamma12, &sigma12, a1, rs_A, a2, rs_A ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_opd( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3b_opd( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, rs_A, a2, rs_A, a3, rs_A ); } } //for ( k = 0; k < n_left; k += 1, g -= 1 ) if ( n_left == 1 ) { g23 = buff_G + (g )*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma23 = g23->real; sigma23 = g23->imag; is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident23 ) MAC_Apply_G_mx2_opd( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } } // Pipeline stage for ( j = k_minus_1; j < nG; ++j ) { nG_app = k_G; n_iter = nG_app / n_fuse; n_left = nG_app % n_fuse; for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse ) { g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_opd( m_A, &gamma12, &sigma12, a1, rs_A, a2, rs_A ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_opd( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3b_opd( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, rs_A, a2, rs_A, a3, rs_A ); } } //for ( k = 0; k < n_left; k += 1, g -= 1 ) if ( n_left == 1 ) { g23 = buff_G + (g )*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma23 = g23->real; sigma23 = g23->imag; is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident23 ) MAC_Apply_G_mx2_opd( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } } // Shutdown stage for ( j = 1; j < k_G; ++j ) { nG_app = k_G - j; n_iter = nG_app / n_fuse; n_left = nG_app % n_fuse; for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse ) { g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_opd( m_A, &gamma12, &sigma12, a1, rs_A, a2, rs_A ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_opd( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3b_opd( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, rs_A, a2, rs_A, a3, rs_A ); } } //for ( k = 0; k < nG_app_left; k += 1, g -= 1 ) if ( n_left == 1 ) { g23 = buff_G + (g )*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma23 = g23->real; sigma23 = g23->imag; is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident23 ) MAC_Apply_G_mx2_opd( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_opd_var7 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_opd_var8 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_opd_var9 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
double * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_d0(), bli_d1(), FLA_Apply_G_rf_opd_var1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_opt_var9().
{ double one = bli_d1(); double zero = bli_d0(); double gamma12; double sigma12; double gamma23; double sigma23; double* a1; double* a2; double* a3; dcomplex* g12; dcomplex* g23; int i, j, g, k; int nG, nG_app; int n_iter; int n_left; int k_minus_1; int n_fuse; int is_ident12, is_ident23; k_minus_1 = k_G - 1; nG = n_A - 1; n_fuse = 2; // Use the simple variant for nG < (k - 1) or k == 1. if ( nG < 2*k_minus_1 || k_G == 1 ) { FLA_Apply_G_rf_opd_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); return FLA_SUCCESS; } // Start-up phase. for ( j = -1; j < k_minus_1; j += n_fuse ) { nG_app = j + 1; n_iter = nG_app; n_left = 1; for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g ) { g12 = buff_G + (g )*rs_G + (k )*cs_G; g23 = buff_G + (g + 1)*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; a3 = buff_A + (g + 2)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_opd( m_A, &gamma12, &sigma12, a1, rs_A, a2, rs_A ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_opd( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3_opd( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, rs_A, a2, rs_A, a3, rs_A ); } } if ( n_left == 1 ) { g23 = buff_G + (g + 1)*rs_G + (k )*cs_G; a2 = buff_A + (g + 1)*cs_A; a3 = buff_A + (g + 2)*cs_A; gamma23 = g23->real; sigma23 = g23->imag; is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident23 ) MAC_Apply_G_mx2_opd( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } } // Pipeline stage for ( ; j < nG - 1; j += n_fuse ) { nG_app = k_G; n_iter = nG_app; n_left = 0; for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g ) { g12 = buff_G + (g )*rs_G + (k )*cs_G; g23 = buff_G + (g + 1)*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; a3 = buff_A + (g + 2)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_opd( m_A, &gamma12, &sigma12, a1, rs_A, a2, rs_A ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_opd( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3_opd( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, rs_A, a2, rs_A, a3, rs_A ); } } } // Shutdown stage for ( j = nG % n_fuse; j < k_G; j += n_fuse ) { g = nG - 1; k = j; n_left = 1; if ( n_left == 1 ) { g12 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); if ( !is_ident12 ) MAC_Apply_G_mx2_opd( m_A, &gamma12, &sigma12, a1, rs_A, a2, rs_A ); ++k; --g; } nG_app = k_minus_1 - j; n_iter = nG_app; for ( i = 0; i < n_iter; ++i, ++k, --g ) { g12 = buff_G + (g )*rs_G + (k )*cs_G; g23 = buff_G + (g + 1)*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; a3 = buff_A + (g + 2)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_opd( m_A, &gamma12, &sigma12, a1, rs_A, a2, rs_A ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_opd( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3_opd( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, rs_A, a2, rs_A, a3, rs_A ); } } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_ops_var1 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_s0(), bli_s1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_ops_var2(), FLA_Apply_G_rf_ops_var3(), FLA_Apply_G_rf_ops_var6(), FLA_Apply_G_rf_ops_var9(), and FLA_Apply_G_rf_opt_var1().
{ float one = bli_s1(); float zero = bli_s0(); int nG_app = n_A - 1; int l, j; float gamma; float sigma; float* a1; float* a2; scomplex* g1; scomplex* g11; g1 = buff_G; for ( l = 0; l < k_G; ++l ) { a1 = buff_A; a2 = buff_A + cs_A; g11 = g1; for ( j = 0; j < nG_app; ++j ) { gamma = g11->real; sigma = g11->imag; // Skip the current iteration if the rotation is identity. if ( gamma != one || sigma != zero ) { MAC_Apply_G_mx2_ops( m_A, &gamma, &sigma, a1, rs_A, a2, rs_A ); } a1 += cs_A; a2 += cs_A; g11 += rs_G; } g1 += cs_G; } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_ops_var2 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_s0(), bli_s1(), FLA_Apply_G_rf_ops_var1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_opt_var2().
{ float one = bli_s1(); float zero = bli_s0(); float gamma; float sigma; float* a1; float* a2; scomplex* g11; int j, g, k; int nG, nG_app; int k_minus_1; k_minus_1 = k_G - 1; nG = n_A - 1; // Use the simple variant for nG < 2(k - 1). if ( nG < k_minus_1 || k_G == 1 ) { FLA_Apply_G_rf_ops_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); return FLA_SUCCESS; } // Start-up phase. for ( j = 0; j < k_minus_1; ++j ) { nG_app = j + 1; for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g ) { g11 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; gamma = g11->real; sigma = g11->imag; // Skip the current iteration if the rotation is identity. if ( gamma == one && sigma == zero ) continue; MAC_Apply_G_mx2_ops( m_A, &gamma, &sigma, a1, rs_A, a2, rs_A ); } } // Pipeline stage for ( j = k_minus_1; j < nG; ++j ) { nG_app = k_G; for ( k = 0, g = j; k < nG_app; ++k, --g ) { g11 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; gamma = g11->real; sigma = g11->imag; // Skip the current iteration if the rotation is identity. if ( gamma == one && sigma == zero ) continue; MAC_Apply_G_mx2_ops( m_A, &gamma, &sigma, a1, rs_A, a2, rs_A ); } } // Shutdown stage for ( j = nG - k_minus_1; j < nG; ++j ) { nG_app = nG - j; for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g ) { g11 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; gamma = g11->real; sigma = g11->imag; // Skip the current iteration if the rotation is identity. if ( gamma == one && sigma == zero ) continue; MAC_Apply_G_mx2_ops( m_A, &gamma, &sigma, a1, rs_A, a2, rs_A ); } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_ops_var3 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_s0(), bli_s1(), FLA_Apply_G_rf_ops_var1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_opt_var3().
{ float one = bli_s1(); float zero = bli_s0(); float gamma23_k1; float sigma23_k1; float gamma34_k1; float sigma34_k1; float gamma12_k2; float sigma12_k2; float gamma23_k2; float sigma23_k2; float* a1; float* a2; float* a3; float* a4; scomplex* g23_k1; scomplex* g34_k1; scomplex* g12_k2; scomplex* g23_k2; int i, j, g, k; int nG, nG_app; int n_iter; int n_left; int k_minus_1; int n_fuse; int k_fuse; int is_ident23_k1, is_ident34_k1; int is_ident12_k2, is_ident23_k2; int has_ident; k_minus_1 = k_G - 1; nG = n_A - 1; n_fuse = 2; k_fuse = 2; // Use the simple variant for nG < (k - 1) or k == 1. if ( nG < 2*k_minus_1 || k_G == 1 ) { FLA_Apply_G_rf_ops_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); return FLA_SUCCESS; } // Start-up phase. for ( j = -1; j < k_minus_1; j += n_fuse ) { nG_app = j + 2; n_iter = nG_app / k_fuse; n_left = 1; for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; gamma12_k2 = g12_k2->real; sigma12_k2 = g12_k2->imag; gamma23_k2 = g23_k2->real; sigma23_k2 = g23_k2->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero ); is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero ); has_ident = ( is_ident23_k1 || is_ident34_k1 || is_ident12_k2 || is_ident23_k2 ); if ( has_ident ) { // Apply to pairs of columns as needed. if ( !is_ident23_k1 ) MAC_Apply_G_mx2_ops( m_A, &gamma23_k1, &sigma23_k1, a2, rs_A, a3, rs_A ); if ( !is_ident34_k1 ) MAC_Apply_G_mx2_ops( m_A, &gamma34_k1, &sigma34_k1, a3, rs_A, a4, rs_A ); if ( !is_ident12_k2 ) MAC_Apply_G_mx2_ops( m_A, &gamma12_k2, &sigma12_k2, a1, rs_A, a2, rs_A ); if ( !is_ident23_k2 ) MAC_Apply_G_mx2_ops( m_A, &gamma23_k2, &sigma23_k2, a2, rs_A, a3, rs_A ); } else { // Apply to all four columns. MAC_Apply_G_mx4s_ops( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, &gamma12_k2, &sigma12_k2, &gamma23_k2, &sigma23_k2, a1, rs_A, a2, rs_A, a3, rs_A, a4, rs_A ); } } if ( n_left == 1 ) { g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); if ( !is_ident34_k1 ) MAC_Apply_G_mx2_ops( m_A, &gamma34_k1, &sigma34_k1, a3, rs_A, a4, rs_A ); } } // Pipeline stage for ( ; j < nG - 1; j += n_fuse ) { nG_app = k_G; n_iter = nG_app / k_fuse; n_left = nG_app % k_fuse; for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; gamma12_k2 = g12_k2->real; sigma12_k2 = g12_k2->imag; gamma23_k2 = g23_k2->real; sigma23_k2 = g23_k2->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero ); is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero ); has_ident = ( is_ident23_k1 || is_ident34_k1 || is_ident12_k2 || is_ident23_k2 ); if ( has_ident ) { // Apply to pairs of columns as needed. if ( !is_ident23_k1 ) MAC_Apply_G_mx2_ops( m_A, &gamma23_k1, &sigma23_k1, a2, rs_A, a3, rs_A ); if ( !is_ident34_k1 ) MAC_Apply_G_mx2_ops( m_A, &gamma34_k1, &sigma34_k1, a3, rs_A, a4, rs_A ); if ( !is_ident12_k2 ) MAC_Apply_G_mx2_ops( m_A, &gamma12_k2, &sigma12_k2, a1, rs_A, a2, rs_A ); if ( !is_ident23_k2 ) MAC_Apply_G_mx2_ops( m_A, &gamma23_k2, &sigma23_k2, a2, rs_A, a3, rs_A ); } else { // Apply to all four columns. MAC_Apply_G_mx4s_ops( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, &gamma12_k2, &sigma12_k2, &gamma23_k2, &sigma23_k2, a1, rs_A, a2, rs_A, a3, rs_A, a4, rs_A ); } } if ( n_left == 1 ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); if ( !is_ident23_k1 && is_ident34_k1 ) { MAC_Apply_G_mx2_ops( m_A, &gamma23_k1, &sigma23_k1, a2, rs_A, a3, rs_A ); } else if ( is_ident23_k1 && !is_ident34_k1 ) { MAC_Apply_G_mx2_ops( m_A, &gamma34_k1, &sigma34_k1, a3, rs_A, a4, rs_A ); } else { MAC_Apply_G_mx3_ops( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, a2, rs_A, a3, rs_A, a4, rs_A ); } } } // Shutdown stage for ( j = nG % n_fuse; j < k_G; j += n_fuse ) { g = nG - 1; k = j; //n_left = 1; //if ( n_left == 1 ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); if ( !is_ident23_k1 ) MAC_Apply_G_mx2_ops( m_A, &gamma23_k1, &sigma23_k1, a2, rs_A, a3, rs_A ); ++k; --g; } nG_app = k_minus_1 - j; n_iter = nG_app / k_fuse; n_left = nG_app % k_fuse; for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; gamma12_k2 = g12_k2->real; sigma12_k2 = g12_k2->imag; gamma23_k2 = g23_k2->real; sigma23_k2 = g23_k2->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero ); is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero ); has_ident = ( is_ident23_k1 || is_ident34_k1 || is_ident12_k2 || is_ident23_k2 ); if ( has_ident ) { // Apply to pairs of columns as needed. if ( !is_ident23_k1 ) MAC_Apply_G_mx2_ops( m_A, &gamma23_k1, &sigma23_k1, a2, rs_A, a3, rs_A ); if ( !is_ident34_k1 ) MAC_Apply_G_mx2_ops( m_A, &gamma34_k1, &sigma34_k1, a3, rs_A, a4, rs_A ); if ( !is_ident12_k2 ) MAC_Apply_G_mx2_ops( m_A, &gamma12_k2, &sigma12_k2, a1, rs_A, a2, rs_A ); if ( !is_ident23_k2 ) MAC_Apply_G_mx2_ops( m_A, &gamma23_k2, &sigma23_k2, a2, rs_A, a3, rs_A ); } else { // Apply to all four columns. MAC_Apply_G_mx4s_ops( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, &gamma12_k2, &sigma12_k2, &gamma23_k2, &sigma23_k2, a1, rs_A, a2, rs_A, a3, rs_A, a4, rs_A ); } } if ( n_left == 1 ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); if ( !is_ident23_k1 && is_ident34_k1 ) { MAC_Apply_G_mx2_ops( m_A, &gamma23_k1, &sigma23_k1, a2, rs_A, a3, rs_A ); } else if ( is_ident23_k1 && !is_ident34_k1 ) { MAC_Apply_G_mx2_ops( m_A, &gamma34_k1, &sigma34_k1, a3, rs_A, a4, rs_A ); } else { MAC_Apply_G_mx3_ops( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, a2, rs_A, a3, rs_A, a4, rs_A ); } } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_ops_var4 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_ops_var5 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_ops_var6 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_s0(), bli_s1(), FLA_Apply_G_rf_ops_var1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_opt_var6().
{ float one = bli_s1(); float zero = bli_s0(); float gamma12; float sigma12; float gamma23; float sigma23; float* a1; float* a2; float* a3; scomplex* g12; scomplex* g23; int i, j, g, k; int nG, nG_app; int n_iter; int n_left; int k_minus_1; int n_fuse; int is_ident12, is_ident23; k_minus_1 = k_G - 1; nG = n_A - 1; n_fuse = 2; // Use the simple variant for nG < (k - 1) or k == 1. if ( nG < k_minus_1 || k_G == 1 ) { FLA_Apply_G_rf_ops_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); return FLA_SUCCESS; } // Start-up phase. for ( j = 0; j < k_minus_1; ++j ) { nG_app = j + 1; n_iter = nG_app / n_fuse; n_left = nG_app % n_fuse; for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse ) { g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_ops( m_A, &gamma12, &sigma12, a1, rs_A, a2, rs_A ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_ops( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3b_ops( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, rs_A, a2, rs_A, a3, rs_A ); } } //for ( k = 0; k < n_left; k += 1, g -= 1 ) if ( n_left == 1 ) { g23 = buff_G + (g )*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma23 = g23->real; sigma23 = g23->imag; is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident23 ) MAC_Apply_G_mx2_ops( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } } // Pipeline stage for ( j = k_minus_1; j < nG; ++j ) { nG_app = k_G; n_iter = nG_app / n_fuse; n_left = nG_app % n_fuse; for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse ) { g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_ops( m_A, &gamma12, &sigma12, a1, rs_A, a2, rs_A ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_ops( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3b_ops( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, rs_A, a2, rs_A, a3, rs_A ); } } //for ( k = 0; k < n_left; k += 1, g -= 1 ) if ( n_left == 1 ) { g23 = buff_G + (g )*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma23 = g23->real; sigma23 = g23->imag; is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident23 ) MAC_Apply_G_mx2_ops( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } } // Shutdown stage for ( j = 1; j < k_G; ++j ) { nG_app = k_G - j; n_iter = nG_app / n_fuse; n_left = nG_app % n_fuse; for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse ) { g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_ops( m_A, &gamma12, &sigma12, a1, rs_A, a2, rs_A ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_ops( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3b_ops( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, rs_A, a2, rs_A, a3, rs_A ); } } //for ( k = 0; k < nG_app_left; k += 1, g -= 1 ) if ( n_left == 1 ) { g23 = buff_G + (g )*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma23 = g23->real; sigma23 = g23->imag; is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident23 ) MAC_Apply_G_mx2_ops( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_ops_var7 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_ops_var8 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_ops_var9 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
scomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
float * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_s0(), bli_s1(), FLA_Apply_G_rf_ops_var1(), scomplex::imag, and scomplex::real.
Referenced by FLA_Apply_G_rf_opt_var9().
{ float one = bli_s1(); float zero = bli_s0(); float gamma12; float sigma12; float gamma23; float sigma23; float* a1; float* a2; float* a3; scomplex* g12; scomplex* g23; int i, j, g, k; int nG, nG_app; int n_iter; int n_left; int k_minus_1; int n_fuse; int is_ident12, is_ident23; k_minus_1 = k_G - 1; nG = n_A - 1; n_fuse = 2; // Use the simple variant for nG < (k - 1) or k == 1. if ( nG < 2*k_minus_1 || k_G == 1 ) { FLA_Apply_G_rf_ops_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); return FLA_SUCCESS; } // Start-up phase. for ( j = -1; j < k_minus_1; j += n_fuse ) { nG_app = j + 1; n_iter = nG_app; n_left = 1; for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g ) { g12 = buff_G + (g )*rs_G + (k )*cs_G; g23 = buff_G + (g + 1)*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; a3 = buff_A + (g + 2)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_ops( m_A, &gamma12, &sigma12, a1, rs_A, a2, rs_A ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_ops( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3_ops( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, rs_A, a2, rs_A, a3, rs_A ); } } if ( n_left == 1 ) { g23 = buff_G + (g + 1)*rs_G + (k )*cs_G; a2 = buff_A + (g + 1)*cs_A; a3 = buff_A + (g + 2)*cs_A; gamma23 = g23->real; sigma23 = g23->imag; is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident23 ) MAC_Apply_G_mx2_ops( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } } // Pipeline stage for ( ; j < nG - 1; j += n_fuse ) { nG_app = k_G; n_iter = nG_app; n_left = 0; for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g ) { g12 = buff_G + (g )*rs_G + (k )*cs_G; g23 = buff_G + (g + 1)*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; a3 = buff_A + (g + 2)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_ops( m_A, &gamma12, &sigma12, a1, rs_A, a2, rs_A ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_ops( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3_ops( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, rs_A, a2, rs_A, a3, rs_A ); } } } // Shutdown stage for ( j = nG % n_fuse; j < k_G; j += n_fuse ) { g = nG - 1; k = j; n_left = 1; if ( n_left == 1 ) { g12 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); if ( !is_ident12 ) MAC_Apply_G_mx2_ops( m_A, &gamma12, &sigma12, a1, rs_A, a2, rs_A ); ++k; --g; } nG_app = k_minus_1 - j; n_iter = nG_app; for ( i = 0; i < n_iter; ++i, ++k, --g ) { g12 = buff_G + (g )*rs_G + (k )*cs_G; g23 = buff_G + (g + 1)*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; a3 = buff_A + (g + 2)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_ops( m_A, &gamma12, &sigma12, a1, rs_A, a2, rs_A ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_ops( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3_ops( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, rs_A, a2, rs_A, a3, rs_A ); } } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_opt_var1 | ( | FLA_Obj | G, |
FLA_Obj | A | ||
) |
References FLA_Apply_G_rf_opc_var1(), FLA_Apply_G_rf_opd_var1(), FLA_Apply_G_rf_ops_var1(), FLA_Apply_G_rf_opz_var1(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
Referenced by FLA_Apply_G_internal().
{ FLA_Datatype datatype; int k_G, m_A, n_A; int rs_G, cs_G; int rs_A, cs_A; datatype = FLA_Obj_datatype( A ); k_G = FLA_Obj_width( G ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); switch ( datatype ) { case FLA_FLOAT: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); float* buff_A = ( float* ) FLA_FLOAT_PTR( A ); FLA_Apply_G_rf_ops_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); double* buff_A = ( double* ) FLA_DOUBLE_PTR( A ); FLA_Apply_G_rf_opd_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_COMPLEX: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A ); FLA_Apply_G_rf_opc_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A ); FLA_Apply_G_rf_opz_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_opt_var2 | ( | FLA_Obj | G, |
FLA_Obj | A | ||
) |
References FLA_Apply_G_rf_opc_var2(), FLA_Apply_G_rf_opd_var2(), FLA_Apply_G_rf_ops_var2(), FLA_Apply_G_rf_opz_var2(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{ FLA_Datatype datatype; int k_G, m_A, n_A; int rs_G, cs_G; int rs_A, cs_A; datatype = FLA_Obj_datatype( A ); k_G = FLA_Obj_width( G ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); switch ( datatype ) { case FLA_FLOAT: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); float* buff_A = ( float* ) FLA_FLOAT_PTR( A ); FLA_Apply_G_rf_ops_var2( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); double* buff_A = ( double* ) FLA_DOUBLE_PTR( A ); FLA_Apply_G_rf_opd_var2( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_COMPLEX: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A ); FLA_Apply_G_rf_opc_var2( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A ); FLA_Apply_G_rf_opz_var2( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_opt_var3 | ( | FLA_Obj | G, |
FLA_Obj | A | ||
) |
References FLA_Apply_G_rf_opc_var3(), FLA_Apply_G_rf_opd_var3(), FLA_Apply_G_rf_ops_var3(), FLA_Apply_G_rf_opz_var3(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{ FLA_Datatype datatype; int k_G, m_A, n_A; int rs_G, cs_G; int rs_A, cs_A; datatype = FLA_Obj_datatype( A ); k_G = FLA_Obj_width( G ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); switch ( datatype ) { case FLA_FLOAT: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); float* buff_A = ( float* ) FLA_FLOAT_PTR( A ); FLA_Apply_G_rf_ops_var3( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); double* buff_A = ( double* ) FLA_DOUBLE_PTR( A ); FLA_Apply_G_rf_opd_var3( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_COMPLEX: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A ); FLA_Apply_G_rf_opc_var3( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A ); FLA_Apply_G_rf_opz_var3( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_opt_var4 | ( | FLA_Obj | G, |
FLA_Obj | A | ||
) |
FLA_Error FLA_Apply_G_rf_opt_var5 | ( | FLA_Obj | G, |
FLA_Obj | A | ||
) |
FLA_Error FLA_Apply_G_rf_opt_var6 | ( | FLA_Obj | G, |
FLA_Obj | A | ||
) |
References FLA_Apply_G_rf_opc_var6(), FLA_Apply_G_rf_opd_var6(), FLA_Apply_G_rf_ops_var6(), FLA_Apply_G_rf_opz_var6(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{ FLA_Datatype datatype; int k_G, m_A, n_A; int rs_G, cs_G; int rs_A, cs_A; datatype = FLA_Obj_datatype( A ); k_G = FLA_Obj_width( G ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); switch ( datatype ) { case FLA_FLOAT: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); float* buff_A = ( float* ) FLA_FLOAT_PTR( A ); FLA_Apply_G_rf_ops_var6( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); double* buff_A = ( double* ) FLA_DOUBLE_PTR( A ); FLA_Apply_G_rf_opd_var6( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_COMPLEX: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A ); FLA_Apply_G_rf_opc_var6( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A ); FLA_Apply_G_rf_opz_var6( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_opt_var7 | ( | FLA_Obj | G, |
FLA_Obj | A | ||
) |
FLA_Error FLA_Apply_G_rf_opt_var8 | ( | FLA_Obj | G, |
FLA_Obj | A | ||
) |
FLA_Error FLA_Apply_G_rf_opt_var9 | ( | FLA_Obj | G, |
FLA_Obj | A | ||
) |
References FLA_Apply_G_rf_opc_var9(), FLA_Apply_G_rf_opd_var9(), FLA_Apply_G_rf_ops_var9(), FLA_Apply_G_rf_opz_var9(), FLA_Obj_col_stride(), FLA_Obj_datatype(), FLA_Obj_length(), FLA_Obj_row_stride(), and FLA_Obj_width().
{ FLA_Datatype datatype; int k_G, m_A, n_A; int rs_G, cs_G; int rs_A, cs_A; datatype = FLA_Obj_datatype( A ); k_G = FLA_Obj_width( G ); m_A = FLA_Obj_length( A ); n_A = FLA_Obj_width( A ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); switch ( datatype ) { case FLA_FLOAT: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); float* buff_A = ( float* ) FLA_FLOAT_PTR( A ); FLA_Apply_G_rf_ops_var9( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); double* buff_A = ( double* ) FLA_DOUBLE_PTR( A ); FLA_Apply_G_rf_opd_var9( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_COMPLEX: { scomplex* buff_G = ( scomplex* ) FLA_COMPLEX_PTR( G ); scomplex* buff_A = ( scomplex* ) FLA_COMPLEX_PTR( A ); FLA_Apply_G_rf_opc_var9( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_G = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( G ); dcomplex* buff_A = ( dcomplex* ) FLA_DOUBLE_COMPLEX_PTR( A ); FLA_Apply_G_rf_opz_var9( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_opz_var1 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_d0(), bli_d1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_opt_var1(), FLA_Apply_G_rf_opz_var2(), FLA_Apply_G_rf_opz_var3(), FLA_Apply_G_rf_opz_var6(), and FLA_Apply_G_rf_opz_var9().
{ double one = bli_d1(); double zero = bli_d0(); int nG_app = n_A - 1; int l, j; double gamma; double sigma; dcomplex* a1; dcomplex* a2; dcomplex* g1; dcomplex* g11; g1 = buff_G; for ( l = 0; l < k_G; ++l ) { a1 = buff_A; a2 = buff_A + cs_A; g11 = g1; for ( j = 0; j < nG_app; ++j ) { gamma = g11->real; sigma = g11->imag; // Skip the current iteration if the rotation is identity. if ( gamma != one || sigma != zero ) { MAC_Apply_G_mx2_opz( m_A, &gamma, &sigma, a1, rs_A, a2, rs_A ); } a1 += cs_A; a2 += cs_A; g11 += rs_G; } g1 += cs_G; } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_opz_var2 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_d0(), bli_d1(), FLA_Apply_G_rf_opz_var1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_opt_var2().
{ double one = bli_d1(); double zero = bli_d0(); double gamma; double sigma; dcomplex* a1; dcomplex* a2; dcomplex* g11; int j, g, k; int nG, nG_app; int k_minus_1; k_minus_1 = k_G - 1; nG = n_A - 1; // Use the simple variant for nG < 2(k - 1). if ( nG < k_minus_1 || k_G == 1 ) { FLA_Apply_G_rf_opz_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); return FLA_SUCCESS; } // Start-up phase. for ( j = 0; j < k_minus_1; ++j ) { nG_app = j + 1; for ( k = 0, g = nG_app - 1; k < nG_app; ++k, --g ) { g11 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; gamma = g11->real; sigma = g11->imag; // Skip the current iteration if the rotation is identity. if ( gamma == one && sigma == zero ) continue; MAC_Apply_G_mx2_opz( m_A, &gamma, &sigma, a1, rs_A, a2, rs_A ); } } // Pipeline stage for ( j = k_minus_1; j < nG; ++j ) { nG_app = k_G; for ( k = 0, g = j; k < nG_app; ++k, --g ) { g11 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; gamma = g11->real; sigma = g11->imag; // Skip the current iteration if the rotation is identity. if ( gamma == one && sigma == zero ) continue; MAC_Apply_G_mx2_opz( m_A, &gamma, &sigma, a1, rs_A, a2, rs_A ); } } // Shutdown stage for ( j = nG - k_minus_1; j < nG; ++j ) { nG_app = nG - j; for ( k = k_G - nG_app, g = nG - 1; k < k_G; ++k, --g ) { g11 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; gamma = g11->real; sigma = g11->imag; // Skip the current iteration if the rotation is identity. if ( gamma == one && sigma == zero ) continue; MAC_Apply_G_mx2_opz( m_A, &gamma, &sigma, a1, rs_A, a2, rs_A ); } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_opz_var3 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_d0(), bli_d1(), FLA_Apply_G_rf_opz_var1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_opt_var3().
{ double one = bli_d1(); double zero = bli_d0(); double gamma23_k1; double sigma23_k1; double gamma34_k1; double sigma34_k1; double gamma12_k2; double sigma12_k2; double gamma23_k2; double sigma23_k2; dcomplex* a1; dcomplex* a2; dcomplex* a3; dcomplex* a4; dcomplex* g23_k1; dcomplex* g34_k1; dcomplex* g12_k2; dcomplex* g23_k2; int i, j, g, k; int nG, nG_app; int n_iter; int n_left; int k_minus_1; int n_fuse; int k_fuse; int is_ident23_k1, is_ident34_k1; int is_ident12_k2, is_ident23_k2; int has_ident; k_minus_1 = k_G - 1; nG = n_A - 1; n_fuse = 2; k_fuse = 2; // Use the simple variant for nG < (k - 1) or k == 1. if ( nG < 2*k_minus_1 || k_G == 1 ) { FLA_Apply_G_rf_opz_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); return FLA_SUCCESS; } // Start-up phase. for ( j = -1; j < k_minus_1; j += n_fuse ) { nG_app = j + 2; n_iter = nG_app / k_fuse; n_left = 1; for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; gamma12_k2 = g12_k2->real; sigma12_k2 = g12_k2->imag; gamma23_k2 = g23_k2->real; sigma23_k2 = g23_k2->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero ); is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero ); has_ident = ( is_ident23_k1 || is_ident34_k1 || is_ident12_k2 || is_ident23_k2 ); if ( has_ident ) { // Apply to pairs of columns as needed. if ( !is_ident23_k1 ) MAC_Apply_G_mx2_opz( m_A, &gamma23_k1, &sigma23_k1, a2, rs_A, a3, rs_A ); if ( !is_ident34_k1 ) MAC_Apply_G_mx2_opz( m_A, &gamma34_k1, &sigma34_k1, a3, rs_A, a4, rs_A ); if ( !is_ident12_k2 ) MAC_Apply_G_mx2_opz( m_A, &gamma12_k2, &sigma12_k2, a1, rs_A, a2, rs_A ); if ( !is_ident23_k2 ) MAC_Apply_G_mx2_opz( m_A, &gamma23_k2, &sigma23_k2, a2, rs_A, a3, rs_A ); } else { // Apply to all four columns. MAC_Apply_G_mx4s_opz( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, &gamma12_k2, &sigma12_k2, &gamma23_k2, &sigma23_k2, a1, rs_A, a2, rs_A, a3, rs_A, a4, rs_A ); } } if ( n_left == 1 ) { g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); if ( !is_ident34_k1 ) MAC_Apply_G_mx2_opz( m_A, &gamma34_k1, &sigma34_k1, a3, rs_A, a4, rs_A ); } } // Pipeline stage for ( ; j < nG - 1; j += n_fuse ) { nG_app = k_G; n_iter = nG_app / k_fuse; n_left = nG_app % k_fuse; for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += k_fuse, g -= n_fuse ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; gamma12_k2 = g12_k2->real; sigma12_k2 = g12_k2->imag; gamma23_k2 = g23_k2->real; sigma23_k2 = g23_k2->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero ); is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero ); has_ident = ( is_ident23_k1 || is_ident34_k1 || is_ident12_k2 || is_ident23_k2 ); if ( has_ident ) { // Apply to pairs of columns as needed. if ( !is_ident23_k1 ) MAC_Apply_G_mx2_opz( m_A, &gamma23_k1, &sigma23_k1, a2, rs_A, a3, rs_A ); if ( !is_ident34_k1 ) MAC_Apply_G_mx2_opz( m_A, &gamma34_k1, &sigma34_k1, a3, rs_A, a4, rs_A ); if ( !is_ident12_k2 ) MAC_Apply_G_mx2_opz( m_A, &gamma12_k2, &sigma12_k2, a1, rs_A, a2, rs_A ); if ( !is_ident23_k2 ) MAC_Apply_G_mx2_opz( m_A, &gamma23_k2, &sigma23_k2, a2, rs_A, a3, rs_A ); } else { // Apply to all four columns. MAC_Apply_G_mx4s_opz( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, &gamma12_k2, &sigma12_k2, &gamma23_k2, &sigma23_k2, a1, rs_A, a2, rs_A, a3, rs_A, a4, rs_A ); } } if ( n_left == 1 ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); if ( !is_ident23_k1 && is_ident34_k1 ) { MAC_Apply_G_mx2_opz( m_A, &gamma23_k1, &sigma23_k1, a2, rs_A, a3, rs_A ); } else if ( is_ident23_k1 && !is_ident34_k1 ) { MAC_Apply_G_mx2_opz( m_A, &gamma34_k1, &sigma34_k1, a3, rs_A, a4, rs_A ); } else { MAC_Apply_G_mx3_opz( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, a2, rs_A, a3, rs_A, a4, rs_A ); } } } // Shutdown stage for ( j = nG % n_fuse; j < k_G; j += n_fuse ) { g = nG - 1; k = j; //n_left = 1; //if ( n_left == 1 ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); if ( !is_ident23_k1 ) MAC_Apply_G_mx2_opz( m_A, &gamma23_k1, &sigma23_k1, a2, rs_A, a3, rs_A ); ++k; --g; } nG_app = k_minus_1 - j; n_iter = nG_app / k_fuse; n_left = nG_app % k_fuse; for ( i = 0; i < n_iter; ++i, k += k_fuse, g -= n_fuse ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; g12_k2 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23_k2 = buff_G + (g )*rs_G + (k + 1)*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; gamma12_k2 = g12_k2->real; sigma12_k2 = g12_k2->imag; gamma23_k2 = g23_k2->real; sigma23_k2 = g23_k2->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); is_ident12_k2 = ( gamma12_k2 == one && sigma12_k2 == zero ); is_ident23_k2 = ( gamma23_k2 == one && sigma23_k2 == zero ); has_ident = ( is_ident23_k1 || is_ident34_k1 || is_ident12_k2 || is_ident23_k2 ); if ( has_ident ) { // Apply to pairs of columns as needed. if ( !is_ident23_k1 ) MAC_Apply_G_mx2_opz( m_A, &gamma23_k1, &sigma23_k1, a2, rs_A, a3, rs_A ); if ( !is_ident34_k1 ) MAC_Apply_G_mx2_opz( m_A, &gamma34_k1, &sigma34_k1, a3, rs_A, a4, rs_A ); if ( !is_ident12_k2 ) MAC_Apply_G_mx2_opz( m_A, &gamma12_k2, &sigma12_k2, a1, rs_A, a2, rs_A ); if ( !is_ident23_k2 ) MAC_Apply_G_mx2_opz( m_A, &gamma23_k2, &sigma23_k2, a2, rs_A, a3, rs_A ); } else { // Apply to all four columns. MAC_Apply_G_mx4s_opz( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, &gamma12_k2, &sigma12_k2, &gamma23_k2, &sigma23_k2, a1, rs_A, a2, rs_A, a3, rs_A, a4, rs_A ); } } if ( n_left == 1 ) { g23_k1 = buff_G + (g )*rs_G + (k )*cs_G; g34_k1 = buff_G + (g + 1)*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; a4 = buff_A + (g + 2)*cs_A; gamma23_k1 = g23_k1->real; sigma23_k1 = g23_k1->imag; gamma34_k1 = g34_k1->real; sigma34_k1 = g34_k1->imag; is_ident23_k1 = ( gamma23_k1 == one && sigma23_k1 == zero ); is_ident34_k1 = ( gamma34_k1 == one && sigma34_k1 == zero ); if ( !is_ident23_k1 && is_ident34_k1 ) { MAC_Apply_G_mx2_opz( m_A, &gamma23_k1, &sigma23_k1, a2, rs_A, a3, rs_A ); } else if ( is_ident23_k1 && !is_ident34_k1 ) { MAC_Apply_G_mx2_opz( m_A, &gamma34_k1, &sigma34_k1, a3, rs_A, a4, rs_A ); } else { MAC_Apply_G_mx3_opz( m_A, &gamma23_k1, &sigma23_k1, &gamma34_k1, &sigma34_k1, a2, rs_A, a3, rs_A, a4, rs_A ); } } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_opz_var4 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_opz_var5 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_opz_var6 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_d0(), bli_d1(), FLA_Apply_G_rf_opz_var1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_opt_var6().
{ double one = bli_d1(); double zero = bli_d0(); double gamma12; double sigma12; double gamma23; double sigma23; dcomplex* a1; dcomplex* a2; dcomplex* a3; dcomplex* g12; dcomplex* g23; int i, j, g, k; int nG, nG_app; int n_iter; int n_left; int k_minus_1; int n_fuse; int is_ident12, is_ident23; k_minus_1 = k_G - 1; nG = n_A - 1; n_fuse = 2; // Use the simple variant for nG < (k - 1) or k == 1. if ( nG < k_minus_1 || k_G == 1 ) { FLA_Apply_G_rf_opz_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); return FLA_SUCCESS; } // Start-up phase. for ( j = 0; j < k_minus_1; ++j ) { nG_app = j + 1; n_iter = nG_app / n_fuse; n_left = nG_app % n_fuse; for ( i = 0, k = 0, g = nG_app - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse ) { g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_opz( m_A, &gamma12, &sigma12, a1, rs_A, a2, rs_A ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_opz( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3b_opz( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, rs_A, a2, rs_A, a3, rs_A ); } } //for ( k = 0; k < n_left; k += 1, g -= 1 ) if ( n_left == 1 ) { g23 = buff_G + (g )*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma23 = g23->real; sigma23 = g23->imag; is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident23 ) MAC_Apply_G_mx2_opz( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } } // Pipeline stage for ( j = k_minus_1; j < nG; ++j ) { nG_app = k_G; n_iter = nG_app / n_fuse; n_left = nG_app % n_fuse; for ( i = 0, k = 0, g = j; i < n_iter; ++i, k += n_fuse, g -= n_fuse ) { g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_opz( m_A, &gamma12, &sigma12, a1, rs_A, a2, rs_A ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_opz( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3b_opz( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, rs_A, a2, rs_A, a3, rs_A ); } } //for ( k = 0; k < n_left; k += 1, g -= 1 ) if ( n_left == 1 ) { g23 = buff_G + (g )*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma23 = g23->real; sigma23 = g23->imag; is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident23 ) MAC_Apply_G_mx2_opz( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } } // Shutdown stage for ( j = 1; j < k_G; ++j ) { nG_app = k_G - j; n_iter = nG_app / n_fuse; n_left = nG_app % n_fuse; for ( i = 0, k = j, g = nG - 1; i < n_iter; ++i, k += n_fuse, g -= n_fuse ) { g12 = buff_G + (g - 1)*rs_G + (k + 1)*cs_G; g23 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g - 1)*cs_A; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_opz( m_A, &gamma12, &sigma12, a1, rs_A, a2, rs_A ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_opz( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3b_opz( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, rs_A, a2, rs_A, a3, rs_A ); } } //for ( k = 0; k < nG_app_left; k += 1, g -= 1 ) if ( n_left == 1 ) { g23 = buff_G + (g )*rs_G + (k )*cs_G; a2 = buff_A + (g )*cs_A; a3 = buff_A + (g + 1)*cs_A; gamma23 = g23->real; sigma23 = g23->imag; is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident23 ) MAC_Apply_G_mx2_opz( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } } return FLA_SUCCESS; }
FLA_Error FLA_Apply_G_rf_opz_var7 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_opz_var8 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
FLA_Error FLA_Apply_G_rf_opz_var9 | ( | int | k_G, |
int | m_A, | ||
int | n_A, | ||
dcomplex * | buff_G, | ||
int | rs_G, | ||
int | cs_G, | ||
dcomplex * | buff_A, | ||
int | rs_A, | ||
int | cs_A | ||
) |
References bli_d0(), bli_d1(), FLA_Apply_G_rf_opz_var1(), dcomplex::imag, and dcomplex::real.
Referenced by FLA_Apply_G_rf_opt_var9().
{ double one = bli_d1(); double zero = bli_d0(); double gamma12; double sigma12; double gamma23; double sigma23; dcomplex* a1; dcomplex* a2; dcomplex* a3; dcomplex* g12; dcomplex* g23; int i, j, g, k; int nG, nG_app; int n_iter; int n_left; int k_minus_1; int n_fuse; int is_ident12, is_ident23; k_minus_1 = k_G - 1; nG = n_A - 1; n_fuse = 2; // Use the simple variant for nG < (k - 1) or k == 1. if ( nG < 2*k_minus_1 || k_G == 1 ) { FLA_Apply_G_rf_opz_var1( k_G, m_A, n_A, buff_G, rs_G, cs_G, buff_A, rs_A, cs_A ); return FLA_SUCCESS; } // Start-up phase. for ( j = -1; j < k_minus_1; j += n_fuse ) { nG_app = j + 1; n_iter = nG_app; n_left = 1; for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g ) { g12 = buff_G + (g )*rs_G + (k )*cs_G; g23 = buff_G + (g + 1)*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; a3 = buff_A + (g + 2)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_opz( m_A, &gamma12, &sigma12, a1, rs_A, a2, rs_A ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_opz( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3_opz( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, rs_A, a2, rs_A, a3, rs_A ); } } if ( n_left == 1 ) { g23 = buff_G + (g + 1)*rs_G + (k )*cs_G; a2 = buff_A + (g + 1)*cs_A; a3 = buff_A + (g + 2)*cs_A; gamma23 = g23->real; sigma23 = g23->imag; is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident23 ) MAC_Apply_G_mx2_opz( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } } // Pipeline stage for ( ; j < nG - 1; j += n_fuse ) { nG_app = k_G; n_iter = nG_app; n_left = 0; for ( i = 0, k = 0, g = j; i < n_iter; ++i, ++k, --g ) { g12 = buff_G + (g )*rs_G + (k )*cs_G; g23 = buff_G + (g + 1)*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; a3 = buff_A + (g + 2)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_opz( m_A, &gamma12, &sigma12, a1, rs_A, a2, rs_A ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_opz( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3_opz( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, rs_A, a2, rs_A, a3, rs_A ); } } } // Shutdown stage for ( j = nG % n_fuse; j < k_G; j += n_fuse ) { g = nG - 1; k = j; n_left = 1; if ( n_left == 1 ) { g12 = buff_G + (g )*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); if ( !is_ident12 ) MAC_Apply_G_mx2_opz( m_A, &gamma12, &sigma12, a1, rs_A, a2, rs_A ); ++k; --g; } nG_app = k_minus_1 - j; n_iter = nG_app; for ( i = 0; i < n_iter; ++i, ++k, --g ) { g12 = buff_G + (g )*rs_G + (k )*cs_G; g23 = buff_G + (g + 1)*rs_G + (k )*cs_G; a1 = buff_A + (g )*cs_A; a2 = buff_A + (g + 1)*cs_A; a3 = buff_A + (g + 2)*cs_A; gamma12 = g12->real; sigma12 = g12->imag; gamma23 = g23->real; sigma23 = g23->imag; is_ident12 = ( gamma12 == one && sigma12 == zero ); is_ident23 = ( gamma23 == one && sigma23 == zero ); if ( !is_ident12 && is_ident23 ) { // Apply only to columns 1 and 2. MAC_Apply_G_mx2_opz( m_A, &gamma12, &sigma12, a1, rs_A, a2, rs_A ); } else if ( is_ident12 && !is_ident23 ) { // Apply only to columns 2 and 3. MAC_Apply_G_mx2_opz( m_A, &gamma23, &sigma23, a2, rs_A, a3, rs_A ); } else if ( !is_ident12 && !is_ident23 ) { // Apply to all three columns. MAC_Apply_G_mx3_opz( m_A, &gamma12, &sigma12, &gamma23, &sigma23, a1, rs_A, a2, rs_A, a3, rs_A ); } } } return FLA_SUCCESS; }