libflame  revision_anchor
FLA_Apply_GTG.h
Go to the documentation of this file.
00001 /*
00002    libflame
00003    An object-based infrastructure for developing high-performance
00004    dense linear algebra libraries.
00005 
00006    Copyright (C) 2011, The University of Texas
00007 
00008    libflame is free software; you can redistribute it and/or modify
00009    it under the terms of the GNU Lesser General Public License as
00010    published by the Free Software Foundation; either version 2.1 of
00011    the License, or (at your option) any later version.
00012 
00013    libflame is distributed in the hope that it will be useful, but
00014    WITHOUT ANY WARRANTY; without even the implied warranty of
00015    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
00016    Lesser General Public License for more details.
00017 
00018    You should have received a copy of the GNU Lesser General Public
00019    License along with libflame; if you did not receive a copy, see
00020    http://www.gnu.org/licenses/.
00021 
00022    For more information, please contact us at flame@cs.utexas.edu or
00023    send mail to:
00024 
00025    Field G. Van Zee and/or
00026    Robert A. van de Geijn
00027    The University of Texas at Austin
00028    Department of Computer Sciences
00029    1 University Station C0500
00030    Austin TX 78712
00031 */
00032 
00033 FLA_Error FLA_Apply_GTG( FLA_Obj gamma, FLA_Obj sigma, FLA_Obj delta1, FLA_Obj epsilon1, FLA_Obj delta2 );
00034 FLA_Error FLA_Apply_GTG_ops( float*  gamma,
00035                              float*  sigma,
00036                              float*  delta1,
00037                              float*  epsilon1,
00038                              float*  delta2 );
00039 FLA_Error FLA_Apply_GTG_opd( double* gamma,
00040                              double* sigma,
00041                              double* delta1,
00042                              double* epsilon1,
00043                              double* delta2 );
00044 
00045 #define MAC_Apply_GTG_ops( gamma, sigma, delta1, epsilon, delta2 ) \
00046 { \
00047     float  g, s; \
00048     float  d1, e, d2; \
00049     float  g2, s2, tgse; \
00050 \
00051     g = *(gamma); \
00052     s = *(sigma); \
00053 \
00054     d1 = *(delta1); \
00055     e  = *(epsilon); \
00056     d2 = *(delta2); \
00057 \
00058     g2 = g * g; \
00059     s2 = s * s; \
00060     tgse = 2.0 * g * s * e; \
00061 \
00062     *(delta1)  = g2 * d1 + tgse + s2 * d2; \
00063     *(delta2)  = s2 * d1 - tgse + g2 * d2; \
00064     *(epsilon) = g * s * (d2 - d1) + e * (g2 - s2); \
00065 }
00066 
00067 #define MAC_Apply_GTG_opd( gamma, sigma, delta1, epsilon, delta2 ) \
00068 { \
00069 /*
00070     double g, s; \
00071     double d1, e, d2; \
00072     double t, st; \
00073 \
00074     g = *(gamma); \
00075     s = *(sigma); \
00076 \
00077     d1 = *(delta1); \
00078     e  = *(epsilon); \
00079     d2 = *(delta2); \
00080 \
00081     t   = s * ( d2 - d1 ) + 2.0 * g * e; \
00082     st  = s * t; \
00083     e   = g * t - e; \
00084     d1  = st + d1; \
00085     d2  = d2 - st; \
00086 \
00087     *(delta1)  = d1; \
00088     *(epsilon) = e; \
00089     *(delta2)  = d2; \
00090 */ \
00091     double g, s; \
00092     double d1, e, d2; \
00093     double g2, s2, tgse; \
00094 \
00095     g = *(gamma); \
00096     s = *(sigma); \
00097 \
00098     d1 = *(delta1); \
00099     e  = *(epsilon); \
00100     d2 = *(delta2); \
00101 \
00102     g2 = g * g; \
00103     s2 = s * s; \
00104     tgse = 2.0 * g * s * e; \
00105 \
00106     *(delta1)  = g2 * d1 + tgse + s2 * d2; \
00107     *(delta2)  = s2 * d1 - tgse + g2 * d2; \
00108     *(epsilon) = g * s * (d2 - d1) + e * (g2 - s2); \
00109 \
00110 /*
00111     double g, s; \
00112     double d1, e, d2; \
00113     double g2, s2; \
00114     double st; \
00115 \
00116     g = *(gamma); \
00117     s = *(sigma); \
00118 \
00119     d1 = *(delta1); \
00120     e  = *(epsilon); \
00121     d2 = *(delta2); \
00122 \
00123     g2 = g * g; \
00124     s2 = s * s; \
00125     st = s2 * (d2 - d1) + 2.0 * g * s * e; \
00126 \
00127     *(delta1)  = st + d1; \
00128     *(delta2)  = d2 - st; \
00129     *(epsilon) = g * s * (d2 - d1) + e * (g2 - s2); \
00130 */ \
00131 }
00132