libflame  revision_anchor
FLA_Apply_G_mx3b_opt.h
Go to the documentation of this file.
00001 /*
00002    libflame
00003    An object-based infrastructure for developing high-performance
00004    dense linear algebra libraries.
00005 
00006    Copyright (C) 2011, The University of Texas
00007 
00008    libflame is free software; you can redistribute it and/or modify
00009    it under the terms of the GNU Lesser General Public License as
00010    published by the Free Software Foundation; either version 2.1 of
00011    the License, or (at your option) any later version.
00012 
00013    libflame is distributed in the hope that it will be useful, but
00014    WITHOUT ANY WARRANTY; without even the implied warranty of
00015    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
00016    Lesser General Public License for more details.
00017 
00018    You should have received a copy of the GNU Lesser General Public
00019    License along with libflame; if you did not receive a copy, see
00020    http://www.gnu.org/licenses/.
00021 
00022    For more information, please contact us at flame@cs.utexas.edu or
00023    send mail to:
00024 
00025    Field G. Van Zee and/or
00026    Robert A. van de Geijn
00027    The University of Texas at Austin
00028    Department of Computer Sciences
00029    1 University Station C0500
00030    Austin TX 78712
00031 */
00032 
00033 #define MAC_Apply_G_mx3b_ops( m_A, \
00034                               gamma12, \
00035                               sigma12, \
00036                               gamma23, \
00037                               sigma23, \
00038                               a1, inc_a1, \
00039                               a2, inc_a2, \
00040                               a3, inc_a3 ) \
00041 { \
00042     float              ga12   = *gamma12; \
00043     float              si12   = *sigma12; \
00044     float              ga23   = *gamma23; \
00045     float              si23   = *sigma23; \
00046     float*    restrict alpha1 = a1; \
00047     float*    restrict alpha2 = a2; \
00048     float*    restrict alpha3 = a3; \
00049     float              temp1; \
00050     float              temp2; \
00051     float              temp3; \
00052     int                i; \
00053 \
00054     for ( i = 0; i < m_A; ++i ) \
00055     { \
00056         temp2 = *alpha2; \
00057         temp3 = *alpha3; \
00058 \
00059         *alpha2 = temp2 * ga23 + temp3 * si23; \
00060         *alpha3 = temp3 * ga23 - temp2 * si23; \
00061 \
00062         temp1 = *alpha1; \
00063         temp2 = *alpha2; \
00064 \
00065         *alpha1 = temp1 * ga12 + temp2 * si12; \
00066         *alpha2 = temp2 * ga12 - temp1 * si12; \
00067 \
00068         alpha1 += inc_a1; \
00069         alpha2 += inc_a2; \
00070         alpha3 += inc_a3; \
00071     } \
00072 }
00073 
00074 #define MAC_Apply_G_mx3b_opc( m_A, \
00075                               gamma12, \
00076                               sigma12, \
00077                               gamma23, \
00078                               sigma23, \
00079                               a1, inc_a1, \
00080                               a2, inc_a2, \
00081                               a3, inc_a3 ) \
00082 { \
00083     float              ga12   = *gamma12; \
00084     float              si12   = *sigma12; \
00085     float              ga23   = *gamma23; \
00086     float              si23   = *sigma23; \
00087     scomplex* restrict alpha1 = a1; \
00088     scomplex* restrict alpha2 = a2; \
00089     scomplex* restrict alpha3 = a3; \
00090     scomplex           temp1; \
00091     scomplex           temp2; \
00092     scomplex           temp3; \
00093     int                i; \
00094 \
00095     for ( i = 0; i < m_A; ++i ) \
00096     { \
00097         temp2 = *alpha2; \
00098         temp3 = *alpha3; \
00099 \
00100         alpha2->real =  ga23 * temp2.real + si23 * temp3.real; \
00101         alpha2->imag =  ga23 * temp2.imag + si23 * temp3.imag; \
00102 \
00103         alpha3->real = -si23 * temp2.real + ga23 * temp3.real; \
00104         alpha3->imag = -si23 * temp2.imag + ga23 * temp3.imag; \
00105 \
00106         temp1 = *alpha1; \
00107         temp2 = *alpha2; \
00108 \
00109         alpha1->real =  ga12 * temp1.real + si12 * temp2.real; \
00110         alpha1->imag =  ga12 * temp1.imag + si12 * temp2.imag; \
00111 \
00112         alpha2->real = -si12 * temp1.real + ga12 * temp2.real; \
00113         alpha2->imag = -si12 * temp1.imag + ga12 * temp2.imag; \
00114 \
00115         alpha1 += inc_a1; \
00116         alpha2 += inc_a2; \
00117         alpha3 += inc_a3; \
00118     } \
00119 }
00120 
00121 #define MAC_Apply_G_mx3b_opd( m_A, \
00122                               gamma12, \
00123                               sigma12, \
00124                               gamma23, \
00125                               sigma23, \
00126                               a1, inc_a1, \
00127                               a2, inc_a2, \
00128                               a3, inc_a3 ) \
00129 { \
00130     double             ga12   = *gamma12; \
00131     double             si12   = *sigma12; \
00132     double             ga23   = *gamma23; \
00133     double             si23   = *sigma23; \
00134     double*   restrict alpha1 = a1; \
00135     double*   restrict alpha2 = a2; \
00136     double*   restrict alpha3 = a3; \
00137     double             temp1; \
00138     double             temp2; \
00139     double             temp3; \
00140     int                i; \
00141 \
00142     for ( i = 0; i < m_A; ++i ) \
00143     { \
00144         temp2 = *alpha2; \
00145         temp3 = *alpha3; \
00146 \
00147         *alpha2 = temp2 * ga23 + temp3 * si23; \
00148         *alpha3 = temp3 * ga23 - temp2 * si23; \
00149 \
00150         temp1 = *alpha1; \
00151         temp2 = *alpha2; \
00152 \
00153         *alpha1 = temp1 * ga12 + temp2 * si12; \
00154         *alpha2 = temp2 * ga12 - temp1 * si12; \
00155 \
00156         alpha1 += inc_a1; \
00157         alpha2 += inc_a2; \
00158         alpha3 += inc_a3; \
00159     } \
00160 }
00161 
00162 #define MAC_Apply_G_mx3b_opz( m_A, \
00163                               gamma12, \
00164                               sigma12, \
00165                               gamma23, \
00166                               sigma23, \
00167                               a1, inc_a1, \
00168                               a2, inc_a2, \
00169                               a3, inc_a3 ) \
00170 { \
00171     double             ga12   = *gamma12; \
00172     double             si12   = *sigma12; \
00173     double             ga23   = *gamma23; \
00174     double             si23   = *sigma23; \
00175     dcomplex* restrict alpha1 = a1; \
00176     dcomplex* restrict alpha2 = a2; \
00177     dcomplex* restrict alpha3 = a3; \
00178     dcomplex           temp1; \
00179     dcomplex           temp2; \
00180     dcomplex           temp3; \
00181     int                i; \
00182 \
00183     for ( i = 0; i < m_A; ++i ) \
00184     { \
00185         temp2 = *alpha2; \
00186         temp3 = *alpha3; \
00187 \
00188         alpha2->real =  ga23 * temp2.real + si23 * temp3.real; \
00189         alpha2->imag =  ga23 * temp2.imag + si23 * temp3.imag; \
00190 \
00191         alpha3->real = -si23 * temp2.real + ga23 * temp3.real; \
00192         alpha3->imag = -si23 * temp2.imag + ga23 * temp3.imag; \
00193 \
00194         temp1 = *alpha1; \
00195         temp2 = *alpha2; \
00196 \
00197         alpha1->real =  ga12 * temp1.real + si12 * temp2.real; \
00198         alpha1->imag =  ga12 * temp1.imag + si12 * temp2.imag; \
00199 \
00200         alpha2->real = -si12 * temp1.real + ga12 * temp2.real; \
00201         alpha2->imag = -si12 * temp1.imag + ga12 * temp2.imag; \
00202 \
00203         alpha1 += inc_a1; \
00204         alpha2 += inc_a2; \
00205         alpha3 += inc_a3; \
00206     } \
00207 }
00208