Blender  V3.3
util_avxf_test.h
Go to the documentation of this file.
1 /* SPDX-License-Identifier: Apache-2.0
2  * Copyright 2011-2022 Blender Foundation */
3 
4 #include "testing/testing.h"
5 #include "util/system.h"
6 #include "util/types.h"
7 
9 
11 {
12 
13 #ifdef __KERNEL_AVX2__
14  return system_cpu_support_avx2();
15 #else
16 # ifdef __KERNEL_AVX__
17  return system_cpu_support_avx();
18 # endif
19 #endif
20 }
21 
22 #define INIT_AVX_TEST \
23  if (!validate_cpu_capabilities()) \
24  return; \
25 \
26  const avxf avxf_a(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); \
27  const avxf avxf_b(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); \
28  const avxf avxf_c(1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f);
29 
30 #define compare_vector_scalar(a, b) \
31  for (size_t index = 0; index < a.size; index++) \
32  EXPECT_FLOAT_EQ(a[index], b);
33 
34 #define compare_vector_vector(a, b) \
35  for (size_t index = 0; index < a.size; index++) \
36  EXPECT_FLOAT_EQ(a[index], b[index]);
37 
38 #define compare_vector_vector_near(a, b, abserror) \
39  for (size_t index = 0; index < a.size; index++) \
40  EXPECT_NEAR(a[index], b[index], abserror);
41 
42 #define basic_test_vv(a, b, op) \
43  INIT_AVX_TEST \
44  avxf c = a op b; \
45  for (size_t i = 0; i < a.size; i++) \
46  EXPECT_FLOAT_EQ(c[i], a[i] op b[i]);
47 
48 /* vector op float tests */
49 #define basic_test_vf(a, b, op) \
50  INIT_AVX_TEST \
51  avxf c = a op b; \
52  for (size_t i = 0; i < a.size; i++) \
53  EXPECT_FLOAT_EQ(c[i], a[i] op b);
54 
55 static const float float_b = 1.5f;
56 
57 TEST(TEST_CATEGORY_NAME, avxf_add_vv){basic_test_vv(avxf_a, avxf_b, +)} TEST(TEST_CATEGORY_NAME,
58  avxf_sub_vv){
59  basic_test_vv(avxf_a, avxf_b, -)} TEST(TEST_CATEGORY_NAME, avxf_mul_vv){
60  basic_test_vv(avxf_a, avxf_b, *)} TEST(TEST_CATEGORY_NAME, avxf_div_vv){
61  basic_test_vv(avxf_a, avxf_b, /)} TEST(TEST_CATEGORY_NAME, avxf_add_vf){
62  basic_test_vf(avxf_a, float_b, +)} TEST(TEST_CATEGORY_NAME, avxf_sub_vf){
63  basic_test_vf(avxf_a, float_b, -)} TEST(TEST_CATEGORY_NAME, avxf_mul_vf){
65  avxf_div_vf){basic_test_vf(avxf_a, float_b, /)}
66 
68 {
70  compare_vector_scalar(avxf(7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f),
71  static_cast<float>(index));
72  compare_vector_scalar(avxf(1.0f), 1.0f);
73  compare_vector_vector(avxf(1.0f, 2.0f), avxf(1.0f, 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 2.0f));
74  compare_vector_vector(avxf(1.0f, 2.0f, 3.0f, 4.0f),
75  avxf(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f));
76  compare_vector_vector(avxf(make_float3(1.0f, 2.0f, 3.0f)),
77  avxf(0.0f, 3.0f, 2.0f, 1.0f, 0.0f, 3.0f, 2.0f, 1.0f));
78 }
79 
81 {
83  compare_vector_vector(mm256_sqrt(avxf(1.0f, 4.0f, 9.0f, 16.0f, 25.0f, 36.0f, 49.0f, 64.0f)),
84  avxf(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f));
85 }
86 
87 TEST(TEST_CATEGORY_NAME, avxf_min_max)
88 {
90  compare_vector_vector(min(avxf_a, avxf_b), avxf_a);
91  compare_vector_vector(max(avxf_a, avxf_b), avxf_b);
92 }
93 
94 TEST(TEST_CATEGORY_NAME, avxf_set_sign)
95 {
97  avxf res = set_sign_bit<1, 0, 0, 0, 0, 0, 0, 0>(avxf_a);
98  compare_vector_vector(res, avxf(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, -0.8f));
99 }
100 
102 {
104  avxf res = msub(avxf_a, avxf_b, avxf_c);
105  avxf exp = avxf((avxf_a[7] * avxf_b[7]) - avxf_c[7],
106  (avxf_a[6] * avxf_b[6]) - avxf_c[6],
107  (avxf_a[5] * avxf_b[5]) - avxf_c[5],
108  (avxf_a[4] * avxf_b[4]) - avxf_c[4],
109  (avxf_a[3] * avxf_b[3]) - avxf_c[3],
110  (avxf_a[2] * avxf_b[2]) - avxf_c[2],
111  (avxf_a[1] * avxf_b[1]) - avxf_c[1],
112  (avxf_a[0] * avxf_b[0]) - avxf_c[0]);
114 }
115 
117 {
119  avxf res = madd(avxf_a, avxf_b, avxf_c);
120  avxf exp = avxf((avxf_a[7] * avxf_b[7]) + avxf_c[7],
121  (avxf_a[6] * avxf_b[6]) + avxf_c[6],
122  (avxf_a[5] * avxf_b[5]) + avxf_c[5],
123  (avxf_a[4] * avxf_b[4]) + avxf_c[4],
124  (avxf_a[3] * avxf_b[3]) + avxf_c[3],
125  (avxf_a[2] * avxf_b[2]) + avxf_c[2],
126  (avxf_a[1] * avxf_b[1]) + avxf_c[1],
127  (avxf_a[0] * avxf_b[0]) + avxf_c[0]);
129 }
130 
132 {
134  avxf res = nmadd(avxf_a, avxf_b, avxf_c);
135  avxf exp = avxf(avxf_c[7] - (avxf_a[7] * avxf_b[7]),
136  avxf_c[6] - (avxf_a[6] * avxf_b[6]),
137  avxf_c[5] - (avxf_a[5] * avxf_b[5]),
138  avxf_c[4] - (avxf_a[4] * avxf_b[4]),
139  avxf_c[3] - (avxf_a[3] * avxf_b[3]),
140  avxf_c[2] - (avxf_a[2] * avxf_b[2]),
141  avxf_c[1] - (avxf_a[1] * avxf_b[1]),
142  avxf_c[0] - (avxf_a[0] * avxf_b[0]));
144 }
145 
146 TEST(TEST_CATEGORY_NAME, avxf_compare)
147 {
149  avxf a(0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f);
150  avxf b(7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f);
151  avxb res = a <= b;
152  int exp[8] = {
153  a[0] <= b[0] ? -1 : 0,
154  a[1] <= b[1] ? -1 : 0,
155  a[2] <= b[2] ? -1 : 0,
156  a[3] <= b[3] ? -1 : 0,
157  a[4] <= b[4] ? -1 : 0,
158  a[5] <= b[5] ? -1 : 0,
159  a[6] <= b[6] ? -1 : 0,
160  a[7] <= b[7] ? -1 : 0,
161  };
163 }
164 
165 TEST(TEST_CATEGORY_NAME, avxf_permute)
166 {
168  avxf res = permute<3, 0, 1, 7, 6, 5, 2, 4>(avxf_b);
169  compare_vector_vector(res, avxf(4.0f, 6.0f, 3.0f, 2.0f, 1.0f, 7.0f, 8.0f, 5.0f));
170 }
171 
173 {
175  avxf res = blend<0, 0, 1, 0, 1, 0, 1, 0>(avxf_a, avxf_b);
176  compare_vector_vector(res, avxf(0.1f, 0.2f, 3.0f, 0.4f, 5.0f, 0.6f, 7.0f, 0.8f));
177 }
178 
179 TEST(TEST_CATEGORY_NAME, avxf_shuffle)
180 {
182  avxf res = shuffle<0, 1, 2, 3, 1, 3, 2, 0>(avxf_a);
183  compare_vector_vector(res, avxf(0.4f, 0.2f, 0.1f, 0.3f, 0.5f, 0.6f, 0.7f, 0.8f));
184 }
185 
187 {
189  avxf res = cross(avxf_b, avxf_c);
191  avxf(0.0f,
192  -9.5367432e-07f,
193  0.0f,
194  4.7683716e-07f,
195  0.0f,
196  -3.8146973e-06f,
197  3.8146973e-06f,
198  3.8146973e-06f),
199  0.000002000f);
200 }
201 
203 {
205  float den, den2;
206  dot3(avxf_a, avxf_b, den, den2);
207  EXPECT_FLOAT_EQ(den, 14.9f);
208  EXPECT_FLOAT_EQ(den2, 2.9f);
209 }
210 
__forceinline const avxf madd(const avxf &a, const avxf &b, const avxf &c)
Ternary Operators.
Definition: avxf.h:321
__forceinline const avxf msub(const avxf &a, const avxf &b, const avxf &c)
Definition: avxf.h:338
__forceinline const avxf nmadd(const avxf &a, const avxf &b, const avxf &c)
Definition: avxf.h:330
__forceinline void dot3(const avxf &a, const avxf &b, float &den, float &den2)
Definition: avxf.h:119
__forceinline const avxf mm256_sqrt(const avxf &a)
Definition: avxf.h:135
#define CCL_NAMESPACE_END
Definition: cuda/compat.h:9
ccl_device_inline float3 exp(float3 v)
Definition: math_float3.h:392
#define make_float3(x, y, z)
Definition: metal/compat.h:204
static unsigned a[3]
Definition: RandGen.cpp:78
vec_base< T, 3 > cross(const vec_base< T, 3 > &a, const vec_base< T, 3 > &b)
static const pxr::TfToken b("b", pxr::TfToken::Immortal)
#define min(a, b)
Definition: sort.c:35
Definition: avxb.h:13
Definition: avxf.h:11
bool system_cpu_support_avx2()
Definition: system.cpp:251
bool system_cpu_support_avx()
Definition: system.cpp:247
float max
#define TEST_CATEGORY_NAME
#define basic_test_vv(a, b, op)
static CCL_NAMESPACE_BEGIN bool validate_cpu_capabilities()
#define INIT_AVX_TEST
#define compare_vector_vector(a, b)
static const float float_b
TEST(TEST_CATEGORY_NAME, avxf_add_vv)
#define basic_test_vf(a, b, op)
#define compare_vector_vector_near(a, b, abserror)
#define compare_vector_scalar(a, b)