libflame
revision_anchor
|
00001 /* 00002 libflame 00003 An object-based infrastructure for developing high-performance 00004 dense linear algebra libraries. 00005 00006 Copyright (C) 2011, The University of Texas 00007 00008 libflame is free software; you can redistribute it and/or modify 00009 it under the terms of the GNU Lesser General Public License as 00010 published by the Free Software Foundation; either version 2.1 of 00011 the License, or (at your option) any later version. 00012 00013 libflame is distributed in the hope that it will be useful, but 00014 WITHOUT ANY WARRANTY; without even the implied warranty of 00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00016 Lesser General Public License for more details. 00017 00018 You should have received a copy of the GNU Lesser General Public 00019 License along with libflame; if you did not receive a copy, see 00020 http://www.gnu.org/licenses/. 00021 00022 For more information, please contact us at flame@cs.utexas.edu or 00023 send mail to: 00024 00025 Field G. Van Zee and/or 00026 Robert A. van de Geijn 00027 The University of Texas at Austin 00028 Department of Computer Sciences 00029 1 University Station C0500 00030 Austin TX 78712 00031 */ 00032 00033 // --- top-level wrapper prototypes -------------------------------------------- 00034 00035 FLA_Error FLA_Asum( FLA_Obj x, FLA_Obj asum_x ); 00036 FLA_Error FLA_Axpy( FLA_Obj alpha, FLA_Obj A, FLA_Obj B ); 00037 FLA_Error FLA_Axpys( FLA_Obj alpha0, FLA_Obj alpha1, FLA_Obj A, FLA_Obj beta, FLA_Obj B ); 00038 FLA_Error FLA_Axpyt( FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj B ); 00039 FLA_Error FLA_Axpyrt( FLA_Uplo uplo, FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj B ); 00040 FLA_Error FLA_Copy( FLA_Obj A, FLA_Obj B ); 00041 FLA_Error FLA_Copyr( FLA_Uplo uplo, FLA_Obj A, FLA_Obj B ); 00042 FLA_Error FLA_Copyrt( FLA_Uplo uplo, FLA_Trans trans, FLA_Obj A, FLA_Obj B ); 00043 FLA_Error FLA_Copyt( FLA_Trans trans, FLA_Obj A, FLA_Obj B ); 00044 FLA_Error FLA_Copyr( FLA_Uplo uplo, FLA_Obj A, FLA_Obj B ); 00045 FLA_Error FLA_Dot( FLA_Obj x, FLA_Obj y, FLA_Obj rho ); 00046 FLA_Error FLA_Dot2cs( FLA_Conj conj, FLA_Obj alpha, FLA_Obj x, FLA_Obj y, FLA_Obj beta, FLA_Obj rho ); 00047 FLA_Error FLA_Dot2s( FLA_Obj alpha, FLA_Obj x, FLA_Obj y, FLA_Obj beta, FLA_Obj rho ); 00048 FLA_Error FLA_Dotc( FLA_Conj conj, FLA_Obj x, FLA_Obj y, FLA_Obj rho ); 00049 FLA_Error FLA_Dotcs( FLA_Conj conj, FLA_Obj alpha, FLA_Obj x, FLA_Obj y, FLA_Obj beta, FLA_Obj rho ); 00050 FLA_Error FLA_Dots( FLA_Obj alpha, FLA_Obj x, FLA_Obj y, FLA_Obj beta, FLA_Obj rho ); 00051 FLA_Error FLA_Amax( FLA_Obj x, FLA_Obj index ); 00052 FLA_Error FLA_Inv_scal( FLA_Obj alpha, FLA_Obj A ); 00053 FLA_Error FLA_Inv_scalc( FLA_Conj conjalpha, FLA_Obj alpha, FLA_Obj A ); 00054 FLA_Error FLA_Nrm2( FLA_Obj x, FLA_Obj norm_x ); 00055 FLA_Error FLA_Scal( FLA_Obj alpha, FLA_Obj A ); 00056 FLA_Error FLA_Scalc( FLA_Conj conjalpha, FLA_Obj alpha, FLA_Obj A ); 00057 FLA_Error FLA_Scalr( FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A ); 00058 FLA_Error FLA_Swap( FLA_Obj A, FLA_Obj B ); 00059 FLA_Error FLA_Swapt( FLA_Trans trans, FLA_Obj A, FLA_Obj B ); 00060 00061 00062 // --- task wrapper prototypes ------------------------------------------------- 00063 00064 FLA_Error FLA_Axpy_task( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_axpy_t* cntl ); 00065 FLA_Error FLA_Axpyt_task( FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_axpyt_t* cntl ); 00066 FLA_Error FLA_Copy_task( FLA_Obj A, FLA_Obj B, fla_copy_t* cntl ); 00067 FLA_Error FLA_Copyt_task( FLA_Trans trans, FLA_Obj A, FLA_Obj B, fla_copyt_t* cntl ); 00068 FLA_Error FLA_Copyr_task( FLA_Uplo uplo, FLA_Obj A, FLA_Obj B, fla_copyr_t* cntl ); 00069 FLA_Error FLA_Scal_task( FLA_Obj alpha, FLA_Obj A, fla_scal_t* cntl ); 00070 FLA_Error FLA_Scalr_task( FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, fla_scalr_t* cntl ); 00071 00072 FLA_Error FLA_Axpyt_n_task( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_axpyt_t* cntl ); 00073 FLA_Error FLA_Axpyt_t_task( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_axpyt_t* cntl ); 00074 FLA_Error FLA_Axpyt_c_task( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_axpyt_t* cntl ); 00075 FLA_Error FLA_Axpyt_h_task( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_axpyt_t* cntl ); 00076 00077 FLA_Error FLA_Copyt_n_task( FLA_Obj A, FLA_Obj B, fla_copyt_t* cntl ); 00078 FLA_Error FLA_Copyt_t_task( FLA_Obj A, FLA_Obj B, fla_copyt_t* cntl ); 00079 FLA_Error FLA_Copyt_c_task( FLA_Obj A, FLA_Obj B, fla_copyt_t* cntl ); 00080 FLA_Error FLA_Copyt_h_task( FLA_Obj A, FLA_Obj B, fla_copyt_t* cntl ); 00081 00082 FLA_Error FLA_Copyr_l_task( FLA_Obj A, FLA_Obj B, fla_copyr_t* cntl ); 00083 FLA_Error FLA_Copyr_u_task( FLA_Obj A, FLA_Obj B, fla_copyr_t* cntl ); 00084 00085 FLA_Error FLA_Scalr_l_task( FLA_Obj alpha, FLA_Obj A, fla_scalr_t* cntl ); 00086 FLA_Error FLA_Scalr_u_task( FLA_Obj alpha, FLA_Obj A, fla_scalr_t* cntl ); 00087 00088 00089 // --- external wrapper prototypes --------------------------------------------- 00090 00091 FLA_Error FLA_Asum_external( FLA_Obj x, FLA_Obj asum_x ); 00092 FLA_Error FLA_Axpy_external( FLA_Obj alpha, FLA_Obj A, FLA_Obj B ); 00093 FLA_Error FLA_Axpys_external( FLA_Obj alpha0, FLA_Obj alpha1, FLA_Obj A, FLA_Obj beta, FLA_Obj B ); 00094 FLA_Error FLA_Axpyt_external( FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj B ); 00095 FLA_Error FLA_Axpyrt_external( FLA_Uplo uplo, FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj B ); 00096 FLA_Error FLA_Copy_external( FLA_Obj A, FLA_Obj B ); 00097 FLA_Error FLA_Copyr_external( FLA_Uplo uplo, FLA_Obj A, FLA_Obj B ); 00098 FLA_Error FLA_Copyrt_external( FLA_Uplo uplo, FLA_Trans trans, FLA_Obj A, FLA_Obj B ); 00099 FLA_Error FLA_Copyt_external( FLA_Trans trans, FLA_Obj A, FLA_Obj B ); 00100 FLA_Error FLA_Dot_external( FLA_Obj x, FLA_Obj y, FLA_Obj rho ); 00101 FLA_Error FLA_Dotc_external( FLA_Conj conj, FLA_Obj x, FLA_Obj y, FLA_Obj rho ); 00102 FLA_Error FLA_Dots_external( FLA_Obj alpha, FLA_Obj x, FLA_Obj y, FLA_Obj beta, FLA_Obj rho ); 00103 FLA_Error FLA_Dotcs_external( FLA_Conj conj, FLA_Obj alpha, FLA_Obj x, FLA_Obj y, FLA_Obj beta, FLA_Obj rho ); 00104 FLA_Error FLA_Dot2s_external( FLA_Obj alpha, FLA_Obj x, FLA_Obj y, FLA_Obj beta, FLA_Obj rho ); 00105 FLA_Error FLA_Dot2cs_external( FLA_Conj conj, FLA_Obj alpha, FLA_Obj x, FLA_Obj y, FLA_Obj beta, FLA_Obj rho ); 00106 FLA_Error FLA_Amax_external( FLA_Obj x, FLA_Obj index ); 00107 FLA_Error FLA_Inv_scal_external( FLA_Obj alpha, FLA_Obj A ); 00108 FLA_Error FLA_Inv_scalc_external( FLA_Conj conjalpha, FLA_Obj alpha, FLA_Obj A ); 00109 FLA_Error FLA_Nrm2_external( FLA_Obj x, FLA_Obj nrm_x ); 00110 FLA_Error FLA_Scal_external( FLA_Obj alpha, FLA_Obj A ); 00111 FLA_Error FLA_Scalc_external( FLA_Conj conjalpha, FLA_Obj alpha, FLA_Obj A ); 00112 FLA_Error FLA_Scalr_external( FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A ); 00113 FLA_Error FLA_Swap_external( FLA_Obj A, FLA_Obj B ); 00114 FLA_Error FLA_Swapt_external( FLA_Trans trans, FLA_Obj A, FLA_Obj B ); 00115 00116 00117 // --- gpu wrapper prototypes -------------------------------------------------- 00118 00119 FLA_Error FLA_Axpy_external_gpu( FLA_Obj alpha, FLA_Obj A, void* A_gpu, FLA_Obj B, void* B_gpu ); 00120 FLA_Error FLA_Copy_external_gpu( FLA_Obj A, void* A_gpu, FLA_Obj B, void* B_gpu ); 00121 FLA_Error FLA_Scal_external_gpu( FLA_Obj alpha, FLA_Obj A, void* A_gpu ); 00122 FLA_Error FLA_Scalr_external_gpu( FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, void* A_gpu ); 00123 00124 00125 // --- check routine prototypes ------------------------------------------------ 00126 00127 // front-ends 00128 FLA_Error FLA_Asum_check( FLA_Obj x, FLA_Obj asum_x ); 00129 FLA_Error FLA_Axpy_check( FLA_Obj alpha, FLA_Obj A, FLA_Obj B ); 00130 FLA_Error FLA_Axpys_check( FLA_Obj alpha0, FLA_Obj alpha1, FLA_Obj A, FLA_Obj beta, FLA_Obj B ); 00131 FLA_Error FLA_Axpyt_check( FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj B ); 00132 FLA_Error FLA_Axpyrt_check( FLA_Uplo uplo, FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj B ); 00133 FLA_Error FLA_Copy_check( FLA_Obj A, FLA_Obj B ); 00134 FLA_Error FLA_Copyr_check( FLA_Uplo uplo, FLA_Obj A, FLA_Obj B ); 00135 FLA_Error FLA_Copyrt_check( FLA_Uplo uplo, FLA_Trans trans, FLA_Obj A, FLA_Obj B ); 00136 FLA_Error FLA_Copyt_check( FLA_Trans trans, FLA_Obj A, FLA_Obj B ); 00137 FLA_Error FLA_Dot_check( FLA_Obj x, FLA_Obj y, FLA_Obj rho ); 00138 FLA_Error FLA_Dotc_check( FLA_Conj conj, FLA_Obj x, FLA_Obj y, FLA_Obj rho ); 00139 FLA_Error FLA_Dots_check( FLA_Obj alpha, FLA_Obj x, FLA_Obj y, FLA_Obj beta, FLA_Obj rho ); 00140 FLA_Error FLA_Dotcs_check( FLA_Conj conj, FLA_Obj alpha, FLA_Obj x, FLA_Obj y, FLA_Obj beta, FLA_Obj rho ); 00141 FLA_Error FLA_Dot2s_check( FLA_Obj alpha, FLA_Obj x, FLA_Obj y, FLA_Obj beta, FLA_Obj rho ); 00142 FLA_Error FLA_Dot2cs_check( FLA_Conj conj, FLA_Obj alpha, FLA_Obj x, FLA_Obj y, FLA_Obj beta, FLA_Obj rho ); 00143 FLA_Error FLA_Amax_check( FLA_Obj x, FLA_Obj index ); 00144 FLA_Error FLA_Inv_scal_check( FLA_Obj alpha, FLA_Obj A ); 00145 FLA_Error FLA_Inv_scalc_check( FLA_Conj conjalpha, FLA_Obj alpha, FLA_Obj A ); 00146 FLA_Error FLA_Nrm2_check( FLA_Obj x, FLA_Obj nrm_x ); 00147 FLA_Error FLA_Scal_check( FLA_Obj alpha, FLA_Obj A ); 00148 FLA_Error FLA_Scalc_check( FLA_Conj conjalpha, FLA_Obj alpha, FLA_Obj A ); 00149 FLA_Error FLA_Scalr_check( FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A ); 00150 FLA_Error FLA_Swap_check( FLA_Obj A, FLA_Obj B ); 00151 FLA_Error FLA_Swapt_check( FLA_Trans trans, FLA_Obj A, FLA_Obj B ); 00152 00153 // internal back-ends 00154 FLA_Error FLA_Axpy_internal_check( FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_axpy_t* cntl ); 00155 FLA_Error FLA_Axpyt_internal_check( FLA_Trans trans, FLA_Obj alpha, FLA_Obj A, FLA_Obj B, fla_axpyt_t* cntl ); 00156 FLA_Error FLA_Copy_internal_check( FLA_Obj A, FLA_Obj B, fla_copy_t* cntl ); 00157 FLA_Error FLA_Copyt_internal_check( FLA_Trans trans, FLA_Obj A, FLA_Obj B, fla_copyt_t* cntl ); 00158 FLA_Error FLA_Copyr_internal_check( FLA_Uplo uplo, FLA_Obj A, FLA_Obj B, fla_copyr_t* cntl ); 00159 FLA_Error FLA_Scal_internal_check( FLA_Obj alpha, FLA_Obj A, fla_scal_t* cntl ); 00160 FLA_Error FLA_Scalr_internal_check( FLA_Uplo uplo, FLA_Obj alpha, FLA_Obj A, fla_scalr_t* cntl ); 00161