libflame  revision_anchor
FLA_Apply_G_mx3_opt.h
Go to the documentation of this file.
00001 /*
00002    libflame
00003    An object-based infrastructure for developing high-performance
00004    dense linear algebra libraries.
00005 
00006    Copyright (C) 2011, The University of Texas
00007 
00008    libflame is free software; you can redistribute it and/or modify
00009    it under the terms of the GNU Lesser General Public License as
00010    published by the Free Software Foundation; either version 2.1 of
00011    the License, or (at your option) any later version.
00012 
00013    libflame is distributed in the hope that it will be useful, but
00014    WITHOUT ANY WARRANTY; without even the implied warranty of
00015    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
00016    Lesser General Public License for more details.
00017 
00018    You should have received a copy of the GNU Lesser General Public
00019    License along with libflame; if you did not receive a copy, see
00020    http://www.gnu.org/licenses/.
00021 
00022    For more information, please contact us at flame@cs.utexas.edu or
00023    send mail to:
00024 
00025    Field G. Van Zee and/or
00026    Robert A. van de Geijn
00027    The University of Texas at Austin
00028    Department of Computer Sciences
00029    1 University Station C0500
00030    Austin TX 78712
00031 */
00032 
00033 #define MAC_Apply_G_mx3_ops( m_A, \
00034                              gamma12, \
00035                              sigma12, \
00036                              gamma23, \
00037                              sigma23, \
00038                              a1, inc_a1, \
00039                              a2, inc_a2, \
00040                              a3, inc_a3 ) \
00041 { \
00042     float              ga12   = *gamma12; \
00043     float              si12   = *sigma12; \
00044     float              ga23   = *gamma23; \
00045     float              si23   = *sigma23; \
00046     float*    restrict alpha1 = a1; \
00047     float*    restrict alpha2 = a2; \
00048     float*    restrict alpha3 = a3; \
00049     float              temp1; \
00050     float              temp2; \
00051     float              temp3; \
00052     int                i; \
00053 \
00054     for ( i = 0; i < m_A; ++i ) \
00055     { \
00056         temp1 = *alpha1; \
00057         temp2 = *alpha2; \
00058 \
00059         *alpha1 = temp1 * ga12 + temp2 * si12; \
00060         *alpha2 = temp2 * ga12 - temp1 * si12; \
00061 \
00062         temp2 = *alpha2; \
00063         temp3 = *alpha3; \
00064 \
00065         *alpha2 = temp2 * ga23 + temp3 * si23; \
00066         *alpha3 = temp3 * ga23 - temp2 * si23; \
00067 \
00068         alpha1 += inc_a1; \
00069         alpha2 += inc_a2; \
00070         alpha3 += inc_a3; \
00071     } \
00072 }
00073 
00074 #define MAC_Apply_G_mx3_opd( m_A, \
00075                              gamma12, \
00076                              sigma12, \
00077                              gamma23, \
00078                              sigma23, \
00079                              a1, inc_a1, \
00080                              a2, inc_a2, \
00081                              a3, inc_a3 ) \
00082 { \
00083     double             ga12   = *gamma12; \
00084     double             si12   = *sigma12; \
00085     double             ga23   = *gamma23; \
00086     double             si23   = *sigma23; \
00087     double*   restrict alpha1 = a1; \
00088     double*   restrict alpha2 = a2; \
00089     double*   restrict alpha3 = a3; \
00090     double             temp1; \
00091     double             temp2; \
00092     double             temp3; \
00093     int                i; \
00094 \
00095     for ( i = 0; i < m_A; ++i ) \
00096     { \
00097         temp1 = *alpha1; \
00098         temp2 = *alpha2; \
00099 \
00100         *alpha1 = temp1 * ga12 + temp2 * si12; \
00101         *alpha2 = temp2 * ga12 - temp1 * si12; \
00102 \
00103         temp2 = *alpha2; \
00104         temp3 = *alpha3; \
00105 \
00106         *alpha2 = temp2 * ga23 + temp3 * si23; \
00107         *alpha3 = temp3 * ga23 - temp2 * si23; \
00108 \
00109         alpha1 += inc_a1; \
00110         alpha2 += inc_a2; \
00111         alpha3 += inc_a3; \
00112     } \
00113 }
00114 
00115 #define MAC_Apply_G_mx3_opc( m_A, \
00116                              gamma12, \
00117                              sigma12, \
00118                              gamma23, \
00119                              sigma23, \
00120                              a1, inc_a1, \
00121                              a2, inc_a2, \
00122                              a3, inc_a3 ) \
00123 { \
00124     float              ga12   = *gamma12; \
00125     float              si12   = *sigma12; \
00126     float              ga23   = *gamma23; \
00127     float              si23   = *sigma23; \
00128     scomplex* restrict alpha1 = a1; \
00129     scomplex* restrict alpha2 = a2; \
00130     scomplex* restrict alpha3 = a3; \
00131     scomplex           temp1; \
00132     scomplex           temp2; \
00133     scomplex           temp3; \
00134     int                i; \
00135 \
00136     for ( i = 0; i < m_A; ++i ) \
00137     { \
00138         temp1 = *alpha1; \
00139         temp2 = *alpha2; \
00140 \
00141         alpha1->real =  ga12 * temp1.real + si12 * temp2.real; \
00142         alpha1->imag =  ga12 * temp1.imag + si12 * temp2.imag; \
00143 \
00144         alpha2->real = -si12 * temp1.real + ga12 * temp2.real; \
00145         alpha2->imag = -si12 * temp1.imag + ga12 * temp2.imag; \
00146 \
00147         temp2 = *alpha2; \
00148         temp3 = *alpha3; \
00149 \
00150         alpha2->real =  ga23 * temp2.real + si23 * temp3.real; \
00151         alpha2->imag =  ga23 * temp2.imag + si23 * temp3.imag; \
00152 \
00153         alpha3->real = -si23 * temp2.real + ga23 * temp3.real; \
00154         alpha3->imag = -si23 * temp2.imag + ga23 * temp3.imag; \
00155 \
00156         alpha1 += inc_a1; \
00157         alpha2 += inc_a2; \
00158         alpha3 += inc_a3; \
00159     } \
00160 }
00161 
00162 #define MAC_Apply_G_mx3_opz( m_A, \
00163                              gamma12, \
00164                              sigma12, \
00165                              gamma23, \
00166                              sigma23, \
00167                              a1, inc_a1, \
00168                              a2, inc_a2, \
00169                              a3, inc_a3 ) \
00170 { \
00171     double             ga12   = *gamma12; \
00172     double             si12   = *sigma12; \
00173     double             ga23   = *gamma23; \
00174     double             si23   = *sigma23; \
00175     dcomplex* restrict alpha1 = a1; \
00176     dcomplex* restrict alpha2 = a2; \
00177     dcomplex* restrict alpha3 = a3; \
00178     dcomplex           temp1; \
00179     dcomplex           temp2; \
00180     dcomplex           temp3; \
00181     int                i; \
00182 \
00183     for ( i = 0; i < m_A; ++i ) \
00184     { \
00185         temp1 = *alpha1; \
00186         temp2 = *alpha2; \
00187 \
00188         alpha1->real =  ga12 * temp1.real + si12 * temp2.real; \
00189         alpha1->imag =  ga12 * temp1.imag + si12 * temp2.imag; \
00190 \
00191         alpha2->real = -si12 * temp1.real + ga12 * temp2.real; \
00192         alpha2->imag = -si12 * temp1.imag + ga12 * temp2.imag; \
00193 \
00194         temp2 = *alpha2; \
00195         temp3 = *alpha3; \
00196 \
00197         alpha2->real =  ga23 * temp2.real + si23 * temp3.real; \
00198         alpha2->imag =  ga23 * temp2.imag + si23 * temp3.imag; \
00199 \
00200         alpha3->real = -si23 * temp2.real + ga23 * temp3.real; \
00201         alpha3->imag = -si23 * temp2.imag + ga23 * temp3.imag; \
00202 \
00203         alpha1 += inc_a1; \
00204         alpha2 += inc_a2; \
00205         alpha3 += inc_a3; \
00206     } \
00207 }
00208