libflame  revision_anchor
FLA_Apply_G_mx3_opt.h
Go to the documentation of this file.
1 /*
2 
3  Copyright (C) 2014, The University of Texas at Austin
4 
5  This file is part of libflame and is available under the 3-Clause
6  BSD license, which can be found in the LICENSE file at the top-level
7  directory, or at http://opensource.org/licenses/BSD-3-Clause
8 
9 */
10 
11 #define MAC_Apply_G_mx3_ops( m_A, \
12  gamma12, \
13  sigma12, \
14  gamma23, \
15  sigma23, \
16  a1, inc_a1, \
17  a2, inc_a2, \
18  a3, inc_a3 ) \
19 { \
20  float ga12 = *gamma12; \
21  float si12 = *sigma12; \
22  float ga23 = *gamma23; \
23  float si23 = *sigma23; \
24  float* restrict alpha1 = a1; \
25  float* restrict alpha2 = a2; \
26  float* restrict alpha3 = a3; \
27  float temp1; \
28  float temp2; \
29  float temp3; \
30  int i; \
31 \
32  for ( i = 0; i < m_A; ++i ) \
33  { \
34  temp1 = *alpha1; \
35  temp2 = *alpha2; \
36 \
37  *alpha1 = temp1 * ga12 + temp2 * si12; \
38  *alpha2 = temp2 * ga12 - temp1 * si12; \
39 \
40  temp2 = *alpha2; \
41  temp3 = *alpha3; \
42 \
43  *alpha2 = temp2 * ga23 + temp3 * si23; \
44  *alpha3 = temp3 * ga23 - temp2 * si23; \
45 \
46  alpha1 += inc_a1; \
47  alpha2 += inc_a2; \
48  alpha3 += inc_a3; \
49  } \
50 }
51 
52 #define MAC_Apply_G_mx3_opd( m_A, \
53  gamma12, \
54  sigma12, \
55  gamma23, \
56  sigma23, \
57  a1, inc_a1, \
58  a2, inc_a2, \
59  a3, inc_a3 ) \
60 { \
61  double ga12 = *gamma12; \
62  double si12 = *sigma12; \
63  double ga23 = *gamma23; \
64  double si23 = *sigma23; \
65  double* restrict alpha1 = a1; \
66  double* restrict alpha2 = a2; \
67  double* restrict alpha3 = a3; \
68  double temp1; \
69  double temp2; \
70  double temp3; \
71  int i; \
72 \
73  for ( i = 0; i < m_A; ++i ) \
74  { \
75  temp1 = *alpha1; \
76  temp2 = *alpha2; \
77 \
78  *alpha1 = temp1 * ga12 + temp2 * si12; \
79  *alpha2 = temp2 * ga12 - temp1 * si12; \
80 \
81  temp2 = *alpha2; \
82  temp3 = *alpha3; \
83 \
84  *alpha2 = temp2 * ga23 + temp3 * si23; \
85  *alpha3 = temp3 * ga23 - temp2 * si23; \
86 \
87  alpha1 += inc_a1; \
88  alpha2 += inc_a2; \
89  alpha3 += inc_a3; \
90  } \
91 }
92 
93 #define MAC_Apply_G_mx3_opc( m_A, \
94  gamma12, \
95  sigma12, \
96  gamma23, \
97  sigma23, \
98  a1, inc_a1, \
99  a2, inc_a2, \
100  a3, inc_a3 ) \
101 { \
102  float ga12 = *gamma12; \
103  float si12 = *sigma12; \
104  float ga23 = *gamma23; \
105  float si23 = *sigma23; \
106  scomplex* restrict alpha1 = a1; \
107  scomplex* restrict alpha2 = a2; \
108  scomplex* restrict alpha3 = a3; \
109  scomplex temp1; \
110  scomplex temp2; \
111  scomplex temp3; \
112  int i; \
113 \
114  for ( i = 0; i < m_A; ++i ) \
115  { \
116  temp1 = *alpha1; \
117  temp2 = *alpha2; \
118 \
119  alpha1->real = ga12 * temp1.real + si12 * temp2.real; \
120  alpha1->imag = ga12 * temp1.imag + si12 * temp2.imag; \
121 \
122  alpha2->real = -si12 * temp1.real + ga12 * temp2.real; \
123  alpha2->imag = -si12 * temp1.imag + ga12 * temp2.imag; \
124 \
125  temp2 = *alpha2; \
126  temp3 = *alpha3; \
127 \
128  alpha2->real = ga23 * temp2.real + si23 * temp3.real; \
129  alpha2->imag = ga23 * temp2.imag + si23 * temp3.imag; \
130 \
131  alpha3->real = -si23 * temp2.real + ga23 * temp3.real; \
132  alpha3->imag = -si23 * temp2.imag + ga23 * temp3.imag; \
133 \
134  alpha1 += inc_a1; \
135  alpha2 += inc_a2; \
136  alpha3 += inc_a3; \
137  } \
138 }
139 
140 #define MAC_Apply_G_mx3_opz( m_A, \
141  gamma12, \
142  sigma12, \
143  gamma23, \
144  sigma23, \
145  a1, inc_a1, \
146  a2, inc_a2, \
147  a3, inc_a3 ) \
148 { \
149  double ga12 = *gamma12; \
150  double si12 = *sigma12; \
151  double ga23 = *gamma23; \
152  double si23 = *sigma23; \
153  dcomplex* restrict alpha1 = a1; \
154  dcomplex* restrict alpha2 = a2; \
155  dcomplex* restrict alpha3 = a3; \
156  dcomplex temp1; \
157  dcomplex temp2; \
158  dcomplex temp3; \
159  int i; \
160 \
161  for ( i = 0; i < m_A; ++i ) \
162  { \
163  temp1 = *alpha1; \
164  temp2 = *alpha2; \
165 \
166  alpha1->real = ga12 * temp1.real + si12 * temp2.real; \
167  alpha1->imag = ga12 * temp1.imag + si12 * temp2.imag; \
168 \
169  alpha2->real = -si12 * temp1.real + ga12 * temp2.real; \
170  alpha2->imag = -si12 * temp1.imag + ga12 * temp2.imag; \
171 \
172  temp2 = *alpha2; \
173  temp3 = *alpha3; \
174 \
175  alpha2->real = ga23 * temp2.real + si23 * temp3.real; \
176  alpha2->imag = ga23 * temp2.imag + si23 * temp3.imag; \
177 \
178  alpha3->real = -si23 * temp2.real + ga23 * temp3.real; \
179  alpha3->imag = -si23 * temp2.imag + ga23 * temp3.imag; \
180 \
181  alpha1 += inc_a1; \
182  alpha2 += inc_a2; \
183  alpha3 += inc_a3; \
184  } \
185 }
186