libflame  revision_anchor
FLA_Apply_G_mx2_opt.h
Go to the documentation of this file.
00001 /*
00002    libflame
00003    An object-based infrastructure for developing high-performance
00004    dense linear algebra libraries.
00005 
00006    Copyright (C) 2011, The University of Texas
00007 
00008    libflame is free software; you can redistribute it and/or modify
00009    it under the terms of the GNU Lesser General Public License as
00010    published by the Free Software Foundation; either version 2.1 of
00011    the License, or (at your option) any later version.
00012 
00013    libflame is distributed in the hope that it will be useful, but
00014    WITHOUT ANY WARRANTY; without even the implied warranty of
00015    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
00016    Lesser General Public License for more details.
00017 
00018    You should have received a copy of the GNU Lesser General Public
00019    License along with libflame; if you did not receive a copy, see
00020    http://www.gnu.org/licenses/.
00021 
00022    For more information, please contact us at flame@cs.utexas.edu or
00023    send mail to:
00024 
00025    Field G. Van Zee and/or
00026    Robert A. van de Geijn
00027    The University of Texas at Austin
00028    Department of Computer Sciences
00029    1 University Station C0500
00030    Austin TX 78712
00031 */
00032 
00033 #define MAC_Apply_G_mx2_ops( m_A, \
00034                              gamma12, \
00035                              sigma12, \
00036                              a1, inc_a1, \
00037                              a2, inc_a2 ) \
00038 { \
00039     float             ga     = *gamma12; \
00040     float             si     = *sigma12; \
00041     float*  restrict  alpha1 = a1; \
00042     float*  restrict  alpha2 = a2; \
00043     float             temp1; \
00044     float             temp2; \
00045     int               i; \
00046 \
00047     for ( i = 0; i < m_A; ++i ) \
00048     { \
00049         temp1 = *alpha1; \
00050         temp2 = *alpha2; \
00051 \
00052         *alpha1 =  ga * temp1 + si * temp2; \
00053         *alpha2 = -si * temp1 + ga * temp2; \
00054 \
00055         alpha1 += inc_a1; \
00056         alpha2 += inc_a2; \
00057     } \
00058 }
00059 
00060 #define MAC_Apply_G_mx2_opc( m_A, \
00061                              gamma12, \
00062                              sigma12, \
00063                              a1, inc_a1, \
00064                              a2, inc_a2 ) \
00065 { \
00066     float              ga12   = *gamma12; \
00067     float              si12   = *sigma12; \
00068     scomplex* restrict alpha1 = a1; \
00069     scomplex* restrict alpha2 = a2; \
00070     scomplex           temp1; \
00071     scomplex           temp2; \
00072     int                i; \
00073 \
00074     for ( i = 0; i < m_A; ++i ) \
00075     { \
00076         temp1 = *alpha1; \
00077         temp2 = *alpha2; \
00078 \
00079         alpha1->real =  ga12 * temp1.real + si12 * temp2.real; \
00080         alpha1->imag =  ga12 * temp1.imag + si12 * temp2.imag; \
00081 \
00082         alpha2->real = -si12 * temp1.real + ga12 * temp2.real; \
00083         alpha2->imag = -si12 * temp1.imag + ga12 * temp2.imag; \
00084 \
00085         alpha1 += inc_a1; \
00086         alpha2 += inc_a2; \
00087     } \
00088 }
00089 
00090 #define MAC_Apply_G_mx2_opd( m_A, \
00091                              gamma12, \
00092                              sigma12, \
00093                              a1, inc_a1, \
00094                              a2, inc_a2 ) \
00095 { \
00096     double            ga     = *gamma12; \
00097     double            si     = *sigma12; \
00098     double* restrict  alpha1 = a1; \
00099     double* restrict  alpha2 = a2; \
00100     double            temp1; \
00101     double            temp2; \
00102     int               i; \
00103 \
00104     for ( i = 0; i < m_A; ++i ) \
00105     { \
00106         temp1 = *alpha1; \
00107         temp2 = *alpha2; \
00108 \
00109         *alpha1 =  ga * temp1 + si * temp2; \
00110         *alpha2 = -si * temp1 + ga * temp2; \
00111 \
00112         alpha1 += inc_a1; \
00113         alpha2 += inc_a2; \
00114     } \
00115 }
00116 
00117 #define MAC_Apply_G_mx2_opz( m_A, \
00118                              gamma12, \
00119                              sigma12, \
00120                              a1, inc_a1, \
00121                              a2, inc_a2 ) \
00122 {\
00123     double             ga12   = *gamma12; \
00124     double             si12   = *sigma12; \
00125     dcomplex* restrict alpha1 = a1; \
00126     dcomplex* restrict alpha2 = a2; \
00127     dcomplex           temp1; \
00128     dcomplex           temp2; \
00129     int                i; \
00130 \
00131     for ( i = 0; i < m_A; ++i ) \
00132     { \
00133         temp1 = *alpha1; \
00134         temp2 = *alpha2; \
00135 \
00136         alpha1->real =  ga12 * temp1.real + si12 * temp2.real; \
00137         alpha1->imag =  ga12 * temp1.imag + si12 * temp2.imag; \
00138 \
00139         alpha2->real = -si12 * temp1.real + ga12 * temp2.real; \
00140         alpha2->imag = -si12 * temp1.imag + ga12 * temp2.imag; \
00141 \
00142         alpha1 += inc_a1; \
00143         alpha2 += inc_a2; \
00144     } \
00145 }
00146