libflame
revision_anchor
|
00001 /* 00002 libflame 00003 An object-based infrastructure for developing high-performance 00004 dense linear algebra libraries. 00005 00006 Copyright (C) 2011, The University of Texas 00007 00008 libflame is free software; you can redistribute it and/or modify 00009 it under the terms of the GNU Lesser General Public License as 00010 published by the Free Software Foundation; either version 2.1 of 00011 the License, or (at your option) any later version. 00012 00013 libflame is distributed in the hope that it will be useful, but 00014 WITHOUT ANY WARRANTY; without even the implied warranty of 00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00016 Lesser General Public License for more details. 00017 00018 You should have received a copy of the GNU Lesser General Public 00019 License along with libflame; if you did not receive a copy, see 00020 http://www.gnu.org/licenses/. 00021 00022 For more information, please contact us at flame@cs.utexas.edu or 00023 send mail to: 00024 00025 Field G. Van Zee and/or 00026 Robert A. van de Geijn 00027 The University of Texas at Austin 00028 Department of Computer Sciences 00029 1 University Station C0500 00030 Austin TX 78712 00031 */ 00032 00033 FLA_Error FLA_Tridiag_UT_l_blk_var1( FLA_Obj A, FLA_Obj T ); 00034 FLA_Error FLA_Tridiag_UT_l_unb_var1( FLA_Obj A, FLA_Obj T ); 00035 FLA_Error FLA_Tridiag_UT_l_step_unb_var1( FLA_Obj A, FLA_Obj T ); 00036 00037 FLA_Error FLA_Tridiag_UT_l_blk_var2( FLA_Obj A, FLA_Obj T ); 00038 FLA_Error FLA_Tridiag_UT_l_blf_var2( FLA_Obj A, FLA_Obj T ); 00039 FLA_Error FLA_Tridiag_UT_l_unb_var2( FLA_Obj A, FLA_Obj T ); 00040 FLA_Error FLA_Tridiag_UT_l_step_unb_var2( FLA_Obj A, FLA_Obj T ); 00041 00042 FLA_Error FLA_Tridiag_UT_l_blk_var3( FLA_Obj A, FLA_Obj T ); 00043 FLA_Error FLA_Tridiag_UT_l_blf_var3( FLA_Obj A, FLA_Obj T ); 00044 FLA_Error FLA_Tridiag_UT_l_unb_var3( FLA_Obj A, FLA_Obj T ); 00045 FLA_Error FLA_Tridiag_UT_l_step_unb_var3( FLA_Obj A, FLA_Obj Z, FLA_Obj T ); 00046 00047 FLA_Error FLA_Tridiag_UT_l_opt_var1( FLA_Obj A, FLA_Obj T ); 00048 FLA_Error FLA_Tridiag_UT_l_step_opt_var1( FLA_Obj A, FLA_Obj T ); 00049 FLA_Error FLA_Tridiag_UT_l_step_ops_var1( int m_A, 00050 int m_T, 00051 float* buff_A, int rs_A, int cs_A, 00052 float* buff_T, int rs_T, int cs_T ); 00053 FLA_Error FLA_Tridiag_UT_l_step_opd_var1( int m_A, 00054 int m_T, 00055 double* buff_A, int rs_A, int cs_A, 00056 double* buff_T, int rs_T, int cs_T ); 00057 FLA_Error FLA_Tridiag_UT_l_step_opc_var1( int m_A, 00058 int m_T, 00059 scomplex* buff_A, int rs_A, int cs_A, 00060 scomplex* buff_T, int rs_T, int cs_T ); 00061 FLA_Error FLA_Tridiag_UT_l_step_opz_var1( int m_A, 00062 int m_T, 00063 dcomplex* buff_A, int rs_A, int cs_A, 00064 dcomplex* buff_T, int rs_T, int cs_T ); 00065 00066 FLA_Error FLA_Tridiag_UT_l_opt_var2( FLA_Obj A, FLA_Obj T ); 00067 FLA_Error FLA_Tridiag_UT_l_step_opt_var2( FLA_Obj A, FLA_Obj T ); 00068 FLA_Error FLA_Tridiag_UT_l_step_ops_var2( int m_A, 00069 int m_T, 00070 float* buff_A, int rs_A, int cs_A, 00071 float* buff_T, int rs_T, int cs_T ); 00072 FLA_Error FLA_Tridiag_UT_l_step_opd_var2( int m_A, 00073 int m_T, 00074 double* buff_A, int rs_A, int cs_A, 00075 double* buff_T, int rs_T, int cs_T ); 00076 FLA_Error FLA_Tridiag_UT_l_step_opc_var2( int m_A, 00077 int m_T, 00078 scomplex* buff_A, int rs_A, int cs_A, 00079 scomplex* buff_T, int rs_T, int cs_T ); 00080 FLA_Error FLA_Tridiag_UT_l_step_opz_var2( int m_A, 00081 int m_T, 00082 dcomplex* buff_A, int rs_A, int cs_A, 00083 dcomplex* buff_T, int rs_T, int cs_T ); 00084 00085 FLA_Error FLA_Tridiag_UT_l_opt_var3( FLA_Obj A, FLA_Obj T ); 00086 FLA_Error FLA_Tridiag_UT_l_step_opt_var3( FLA_Obj A, FLA_Obj Z, FLA_Obj T ); 00087 FLA_Error FLA_Tridiag_UT_l_step_ops_var3( int m_A, 00088 int m_T, 00089 float* buff_A, int rs_A, int cs_A, 00090 float* buff_Z, int rs_Z, int cs_Z, 00091 float* buff_T, int rs_T, int cs_T ); 00092 FLA_Error FLA_Tridiag_UT_l_step_opd_var3( int m_A, 00093 int m_T, 00094 double* buff_A, int rs_A, int cs_A, 00095 double* buff_Z, int rs_Z, int cs_Z, 00096 double* buff_T, int rs_T, int cs_T ); 00097 FLA_Error FLA_Tridiag_UT_l_step_opc_var3( int m_A, 00098 int m_T, 00099 scomplex* buff_A, int rs_A, int cs_A, 00100 scomplex* buff_Z, int rs_Z, int cs_Z, 00101 scomplex* buff_T, int rs_T, int cs_T ); 00102 FLA_Error FLA_Tridiag_UT_l_step_opz_var3( int m_A, 00103 int m_T, 00104 dcomplex* buff_A, int rs_A, int cs_A, 00105 dcomplex* buff_Z, int rs_Z, int cs_Z, 00106 dcomplex* buff_T, int rs_T, int cs_T ); 00107 00108 FLA_Error FLA_Tridiag_UT_l_ofu_var1( FLA_Obj A, FLA_Obj T ); 00109 FLA_Error FLA_Tridiag_UT_l_step_ofu_var1( FLA_Obj A, FLA_Obj T ); 00110 FLA_Error FLA_Tridiag_UT_l_step_ofs_var1( int m_A, 00111 int m_T, 00112 float* buff_A, int rs_A, int cs_A, 00113 float* buff_T, int rs_T, int cs_T ); 00114 FLA_Error FLA_Tridiag_UT_l_step_ofd_var1( int m_A, 00115 int m_T, 00116 double* buff_A, int rs_A, int cs_A, 00117 double* buff_T, int rs_T, int cs_T ); 00118 FLA_Error FLA_Tridiag_UT_l_step_ofc_var1( int m_A, 00119 int m_T, 00120 scomplex* buff_A, int rs_A, int cs_A, 00121 scomplex* buff_T, int rs_T, int cs_T ); 00122 FLA_Error FLA_Tridiag_UT_l_step_ofz_var1( int m_A, 00123 int m_T, 00124 dcomplex* buff_A, int rs_A, int cs_A, 00125 dcomplex* buff_T, int rs_T, int cs_T ); 00126 00127 FLA_Error FLA_Tridiag_UT_l_ofu_var2( FLA_Obj A, FLA_Obj T ); 00128 FLA_Error FLA_Tridiag_UT_l_step_ofu_var2( FLA_Obj A, FLA_Obj T ); 00129 FLA_Error FLA_Tridiag_UT_l_step_ofs_var2( int m_A, 00130 int m_T, 00131 float* buff_A, int rs_A, int cs_A, 00132 float* buff_T, int rs_T, int cs_T ); 00133 FLA_Error FLA_Tridiag_UT_l_step_ofd_var2( int m_A, 00134 int m_T, 00135 double* buff_A, int rs_A, int cs_A, 00136 double* buff_T, int rs_T, int cs_T ); 00137 FLA_Error FLA_Tridiag_UT_l_step_ofc_var2( int m_A, 00138 int m_T, 00139 scomplex* buff_A, int rs_A, int cs_A, 00140 scomplex* buff_T, int rs_T, int cs_T ); 00141 FLA_Error FLA_Tridiag_UT_l_step_ofz_var2( int m_A, 00142 int m_T, 00143 dcomplex* buff_A, int rs_A, int cs_A, 00144 dcomplex* buff_T, int rs_T, int cs_T ); 00145 00146 FLA_Error FLA_Tridiag_UT_l_ofu_var3( FLA_Obj A, FLA_Obj T ); 00147 FLA_Error FLA_Tridiag_UT_l_step_ofu_var3( FLA_Obj A, FLA_Obj Z, FLA_Obj T ); 00148 FLA_Error FLA_Tridiag_UT_l_step_ofs_var3( int m_A, 00149 int m_T, 00150 float* buff_A, int rs_A, int cs_A, 00151 float* buff_Z, int rs_Z, int cs_Z, 00152 float* buff_T, int rs_T, int cs_T ); 00153 FLA_Error FLA_Tridiag_UT_l_step_ofd_var3( int m_A, 00154 int m_T, 00155 double* buff_A, int rs_A, int cs_A, 00156 double* buff_Z, int rs_Z, int cs_Z, 00157 double* buff_T, int rs_T, int cs_T ); 00158 FLA_Error FLA_Tridiag_UT_l_step_ofc_var3( int m_A, 00159 int m_T, 00160 scomplex* buff_A, int rs_A, int cs_A, 00161 scomplex* buff_Z, int rs_Z, int cs_Z, 00162 scomplex* buff_T, int rs_T, int cs_T ); 00163 FLA_Error FLA_Tridiag_UT_l_step_ofz_var3( int m_A, 00164 int m_T, 00165 dcomplex* buff_A, int rs_A, int cs_A, 00166 dcomplex* buff_Z, int rs_Z, int cs_Z, 00167 dcomplex* buff_T, int rs_T, int cs_T ); 00168 00169 // --- Fused operations --- 00170 00171 FLA_Error FLA_Fused_Her2_Ax_l_opt_var1( FLA_Obj alpha, FLA_Obj u, FLA_Obj z, FLA_Obj A, FLA_Obj x, FLA_Obj w ); 00172 FLA_Error FLA_Fused_Her2_Ax_l_ops_var1( int m_A, 00173 float* buff_alpha, 00174 float* buff_u, int inc_u, 00175 float* buff_z, int inc_z, 00176 float* buff_A, int rs_A, int cs_A, 00177 float* buff_x, int inc_x, 00178 float* buff_w, int inc_w ); 00179 FLA_Error FLA_Fused_Her2_Ax_l_opd_var1( int m_A, 00180 double* buff_alpha, 00181 double* buff_u, int inc_u, 00182 double* buff_z, int inc_z, 00183 double* buff_A, int rs_A, int cs_A, 00184 double* buff_x, int inc_x, 00185 double* buff_w, int inc_w ); 00186 FLA_Error FLA_Fused_Her2_Ax_l_opc_var1( int m_A, 00187 scomplex* buff_alpha, 00188 scomplex* buff_u, int inc_u, 00189 scomplex* buff_z, int inc_z, 00190 scomplex* buff_A, int rs_A, int cs_A, 00191 scomplex* buff_x, int inc_x, 00192 scomplex* buff_w, int inc_w ); 00193 FLA_Error FLA_Fused_Her2_Ax_l_opz_var1( int m_A, 00194 dcomplex* buff_alpha, 00195 dcomplex* buff_u, int inc_u, 00196 dcomplex* buff_z, int inc_z, 00197 dcomplex* buff_A, int rs_A, int cs_A, 00198 dcomplex* buff_x, int inc_x, 00199 dcomplex* buff_w, int inc_w ); 00200 00201 FLA_Error FLA_Fused_UZhu_ZUhu_opt_var1( FLA_Obj delta, FLA_Obj U, FLA_Obj Z, FLA_Obj t, FLA_Obj u, FLA_Obj w ); 00202 FLA_Error FLA_Fused_UZhu_ZUhu_ops_var1( int m_U, 00203 int n_U, 00204 float* buff_delta, 00205 float* buff_U, int rs_U, int cs_U, 00206 float* buff_Z, int rs_Z, int cs_Z, 00207 float* buff_t, int inc_t, 00208 float* buff_u, int inc_u, 00209 float* buff_w, int inc_w ); 00210 FLA_Error FLA_Fused_UZhu_ZUhu_opd_var1( int m_U, 00211 int n_U, 00212 double* buff_delta, 00213 double* buff_U, int rs_U, int cs_U, 00214 double* buff_Z, int rs_Z, int cs_Z, 00215 double* buff_t, int inc_t, 00216 double* buff_u, int inc_u, 00217 double* buff_w, int inc_w ); 00218 FLA_Error FLA_Fused_UZhu_ZUhu_opc_var1( int m_U, 00219 int n_U, 00220 scomplex* buff_delta, 00221 scomplex* buff_U, int rs_U, int cs_U, 00222 scomplex* buff_Z, int rs_Z, int cs_Z, 00223 scomplex* buff_t, int inc_t, 00224 scomplex* buff_u, int inc_u, 00225 scomplex* buff_w, int inc_w ); 00226 FLA_Error FLA_Fused_UZhu_ZUhu_opz_var1( int m_U, 00227 int n_U, 00228 dcomplex* buff_delta, 00229 dcomplex* buff_U, int rs_U, int cs_U, 00230 dcomplex* buff_Z, int rs_Z, int cs_Z, 00231 dcomplex* buff_t, int inc_t, 00232 dcomplex* buff_u, int inc_u, 00233 dcomplex* buff_w, int inc_w );