libflame
revision_anchor
|
00001 /* 00002 libflame 00003 An object-based infrastructure for developing high-performance 00004 dense linear algebra libraries. 00005 00006 Copyright (C) 2011, The University of Texas 00007 00008 libflame is free software; you can redistribute it and/or modify 00009 it under the terms of the GNU Lesser General Public License as 00010 published by the Free Software Foundation; either version 2.1 of 00011 the License, or (at your option) any later version. 00012 00013 libflame is distributed in the hope that it will be useful, but 00014 WITHOUT ANY WARRANTY; without even the implied warranty of 00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00016 Lesser General Public License for more details. 00017 00018 You should have received a copy of the GNU Lesser General Public 00019 License along with libflame; if you did not receive a copy, see 00020 http://www.gnu.org/licenses/. 00021 00022 For more information, please contact us at flame@cs.utexas.edu or 00023 send mail to: 00024 00025 Field G. Van Zee and/or 00026 Robert A. van de Geijn 00027 The University of Texas at Austin 00028 Department of Computer Sciences 00029 1 University Station C0500 00030 Austin TX 78712 00031 */ 00032 00033 FLA_Error FLA_Apply_GTG( FLA_Obj gamma, FLA_Obj sigma, FLA_Obj delta1, FLA_Obj epsilon1, FLA_Obj delta2 ); 00034 FLA_Error FLA_Apply_GTG_ops( float* gamma, 00035 float* sigma, 00036 float* delta1, 00037 float* epsilon1, 00038 float* delta2 ); 00039 FLA_Error FLA_Apply_GTG_opd( double* gamma, 00040 double* sigma, 00041 double* delta1, 00042 double* epsilon1, 00043 double* delta2 ); 00044 00045 #define MAC_Apply_GTG_ops( gamma, sigma, delta1, epsilon, delta2 ) \ 00046 { \ 00047 float g, s; \ 00048 float d1, e, d2; \ 00049 float g2, s2, tgse; \ 00050 \ 00051 g = *(gamma); \ 00052 s = *(sigma); \ 00053 \ 00054 d1 = *(delta1); \ 00055 e = *(epsilon); \ 00056 d2 = *(delta2); \ 00057 \ 00058 g2 = g * g; \ 00059 s2 = s * s; \ 00060 tgse = 2.0 * g * s * e; \ 00061 \ 00062 *(delta1) = g2 * d1 + tgse + s2 * d2; \ 00063 *(delta2) = s2 * d1 - tgse + g2 * d2; \ 00064 *(epsilon) = g * s * (d2 - d1) + e * (g2 - s2); \ 00065 } 00066 00067 #define MAC_Apply_GTG_opd( gamma, sigma, delta1, epsilon, delta2 ) \ 00068 { \ 00069 /* 00070 double g, s; \ 00071 double d1, e, d2; \ 00072 double t, st; \ 00073 \ 00074 g = *(gamma); \ 00075 s = *(sigma); \ 00076 \ 00077 d1 = *(delta1); \ 00078 e = *(epsilon); \ 00079 d2 = *(delta2); \ 00080 \ 00081 t = s * ( d2 - d1 ) + 2.0 * g * e; \ 00082 st = s * t; \ 00083 e = g * t - e; \ 00084 d1 = st + d1; \ 00085 d2 = d2 - st; \ 00086 \ 00087 *(delta1) = d1; \ 00088 *(epsilon) = e; \ 00089 *(delta2) = d2; \ 00090 */ \ 00091 double g, s; \ 00092 double d1, e, d2; \ 00093 double g2, s2, tgse; \ 00094 \ 00095 g = *(gamma); \ 00096 s = *(sigma); \ 00097 \ 00098 d1 = *(delta1); \ 00099 e = *(epsilon); \ 00100 d2 = *(delta2); \ 00101 \ 00102 g2 = g * g; \ 00103 s2 = s * s; \ 00104 tgse = 2.0 * g * s * e; \ 00105 \ 00106 *(delta1) = g2 * d1 + tgse + s2 * d2; \ 00107 *(delta2) = s2 * d1 - tgse + g2 * d2; \ 00108 *(epsilon) = g * s * (d2 - d1) + e * (g2 - s2); \ 00109 \ 00110 /* 00111 double g, s; \ 00112 double d1, e, d2; \ 00113 double g2, s2; \ 00114 double st; \ 00115 \ 00116 g = *(gamma); \ 00117 s = *(sigma); \ 00118 \ 00119 d1 = *(delta1); \ 00120 e = *(epsilon); \ 00121 d2 = *(delta2); \ 00122 \ 00123 g2 = g * g; \ 00124 s2 = s * s; \ 00125 st = s2 * (d2 - d1) + 2.0 * g * s * e; \ 00126 \ 00127 *(delta1) = st + d1; \ 00128 *(delta2) = d2 - st; \ 00129 *(epsilon) = g * s * (d2 - d1) + e * (g2 - s2); \ 00130 */ \ 00131 } 00132