libflame
revision_anchor
|
00001 /* 00002 libflame 00003 An object-based infrastructure for developing high-performance 00004 dense linear algebra libraries. 00005 00006 Copyright (C) 2011, The University of Texas 00007 00008 libflame is free software; you can redistribute it and/or modify 00009 it under the terms of the GNU Lesser General Public License as 00010 published by the Free Software Foundation; either version 2.1 of 00011 the License, or (at your option) any later version. 00012 00013 libflame is distributed in the hope that it will be useful, but 00014 WITHOUT ANY WARRANTY; without even the implied warranty of 00015 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00016 Lesser General Public License for more details. 00017 00018 You should have received a copy of the GNU Lesser General Public 00019 License along with libflame; if you did not receive a copy, see 00020 http://www.gnu.org/licenses/. 00021 00022 For more information, please contact us at flame@cs.utexas.edu or 00023 send mail to: 00024 00025 Field G. Van Zee and/or 00026 Robert A. van de Geijn 00027 The University of Texas at Austin 00028 Department of Computer Sciences 00029 1 University Station C0500 00030 Austin TX 78712 00031 */ 00032 00033 #define MAC_Apply_G_mx2_ops( m_A, \ 00034 gamma12, \ 00035 sigma12, \ 00036 a1, inc_a1, \ 00037 a2, inc_a2 ) \ 00038 { \ 00039 float ga = *gamma12; \ 00040 float si = *sigma12; \ 00041 float* restrict alpha1 = a1; \ 00042 float* restrict alpha2 = a2; \ 00043 float temp1; \ 00044 float temp2; \ 00045 int i; \ 00046 \ 00047 for ( i = 0; i < m_A; ++i ) \ 00048 { \ 00049 temp1 = *alpha1; \ 00050 temp2 = *alpha2; \ 00051 \ 00052 *alpha1 = ga * temp1 + si * temp2; \ 00053 *alpha2 = -si * temp1 + ga * temp2; \ 00054 \ 00055 alpha1 += inc_a1; \ 00056 alpha2 += inc_a2; \ 00057 } \ 00058 } 00059 00060 #define MAC_Apply_G_mx2_opc( m_A, \ 00061 gamma12, \ 00062 sigma12, \ 00063 a1, inc_a1, \ 00064 a2, inc_a2 ) \ 00065 { \ 00066 float ga12 = *gamma12; \ 00067 float si12 = *sigma12; \ 00068 scomplex* restrict alpha1 = a1; \ 00069 scomplex* restrict alpha2 = a2; \ 00070 scomplex temp1; \ 00071 scomplex temp2; \ 00072 int i; \ 00073 \ 00074 for ( i = 0; i < m_A; ++i ) \ 00075 { \ 00076 temp1 = *alpha1; \ 00077 temp2 = *alpha2; \ 00078 \ 00079 alpha1->real = ga12 * temp1.real + si12 * temp2.real; \ 00080 alpha1->imag = ga12 * temp1.imag + si12 * temp2.imag; \ 00081 \ 00082 alpha2->real = -si12 * temp1.real + ga12 * temp2.real; \ 00083 alpha2->imag = -si12 * temp1.imag + ga12 * temp2.imag; \ 00084 \ 00085 alpha1 += inc_a1; \ 00086 alpha2 += inc_a2; \ 00087 } \ 00088 } 00089 00090 #define MAC_Apply_G_mx2_opd( m_A, \ 00091 gamma12, \ 00092 sigma12, \ 00093 a1, inc_a1, \ 00094 a2, inc_a2 ) \ 00095 { \ 00096 double ga = *gamma12; \ 00097 double si = *sigma12; \ 00098 double* restrict alpha1 = a1; \ 00099 double* restrict alpha2 = a2; \ 00100 double temp1; \ 00101 double temp2; \ 00102 int i; \ 00103 \ 00104 for ( i = 0; i < m_A; ++i ) \ 00105 { \ 00106 temp1 = *alpha1; \ 00107 temp2 = *alpha2; \ 00108 \ 00109 *alpha1 = ga * temp1 + si * temp2; \ 00110 *alpha2 = -si * temp1 + ga * temp2; \ 00111 \ 00112 alpha1 += inc_a1; \ 00113 alpha2 += inc_a2; \ 00114 } \ 00115 } 00116 00117 #define MAC_Apply_G_mx2_opz( m_A, \ 00118 gamma12, \ 00119 sigma12, \ 00120 a1, inc_a1, \ 00121 a2, inc_a2 ) \ 00122 {\ 00123 double ga12 = *gamma12; \ 00124 double si12 = *sigma12; \ 00125 dcomplex* restrict alpha1 = a1; \ 00126 dcomplex* restrict alpha2 = a2; \ 00127 dcomplex temp1; \ 00128 dcomplex temp2; \ 00129 int i; \ 00130 \ 00131 for ( i = 0; i < m_A; ++i ) \ 00132 { \ 00133 temp1 = *alpha1; \ 00134 temp2 = *alpha2; \ 00135 \ 00136 alpha1->real = ga12 * temp1.real + si12 * temp2.real; \ 00137 alpha1->imag = ga12 * temp1.imag + si12 * temp2.imag; \ 00138 \ 00139 alpha2->real = -si12 * temp1.real + ga12 * temp2.real; \ 00140 alpha2->imag = -si12 * temp1.imag + ga12 * temp2.imag; \ 00141 \ 00142 alpha1 += inc_a1; \ 00143 alpha2 += inc_a2; \ 00144 } \ 00145 } 00146