4 #include "testing/testing.h"
13 #ifdef __KERNEL_AVX2__
16 # ifdef __KERNEL_AVX__
22 #define INIT_AVX_TEST \
23 if (!validate_cpu_capabilities()) \
26 const avxf avxf_a(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f); \
27 const avxf avxf_b(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f); \
28 const avxf avxf_c(1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f);
30 #define compare_vector_scalar(a, b) \
31 for (size_t index = 0; index < a.size; index++) \
32 EXPECT_FLOAT_EQ(a[index], b);
34 #define compare_vector_vector(a, b) \
35 for (size_t index = 0; index < a.size; index++) \
36 EXPECT_FLOAT_EQ(a[index], b[index]);
38 #define compare_vector_vector_near(a, b, abserror) \
39 for (size_t index = 0; index < a.size; index++) \
40 EXPECT_NEAR(a[index], b[index], abserror);
42 #define basic_test_vv(a, b, op) \
45 for (size_t i = 0; i < a.size; i++) \
46 EXPECT_FLOAT_EQ(c[i], a[i] op b[i]);
49 #define basic_test_vf(a, b, op) \
52 for (size_t i = 0; i < a.size; i++) \
53 EXPECT_FLOAT_EQ(c[i], a[i] op b);
71 static_cast<float>(index));
75 avxf(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f));
77 avxf(0.0f, 3.0f, 2.0f, 1.0f, 0.0f, 3.0f, 2.0f, 1.0f));
84 avxf(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f));
97 avxf res = set_sign_bit<1, 0, 0, 0, 0, 0, 0, 0>(avxf_a);
104 avxf res =
msub(avxf_a, avxf_b, avxf_c);
105 avxf exp =
avxf((avxf_a[7] * avxf_b[7]) - avxf_c[7],
106 (avxf_a[6] * avxf_b[6]) - avxf_c[6],
107 (avxf_a[5] * avxf_b[5]) - avxf_c[5],
108 (avxf_a[4] * avxf_b[4]) - avxf_c[4],
109 (avxf_a[3] * avxf_b[3]) - avxf_c[3],
110 (avxf_a[2] * avxf_b[2]) - avxf_c[2],
111 (avxf_a[1] * avxf_b[1]) - avxf_c[1],
112 (avxf_a[0] * avxf_b[0]) - avxf_c[0]);
119 avxf res =
madd(avxf_a, avxf_b, avxf_c);
120 avxf exp =
avxf((avxf_a[7] * avxf_b[7]) + avxf_c[7],
121 (avxf_a[6] * avxf_b[6]) + avxf_c[6],
122 (avxf_a[5] * avxf_b[5]) + avxf_c[5],
123 (avxf_a[4] * avxf_b[4]) + avxf_c[4],
124 (avxf_a[3] * avxf_b[3]) + avxf_c[3],
125 (avxf_a[2] * avxf_b[2]) + avxf_c[2],
126 (avxf_a[1] * avxf_b[1]) + avxf_c[1],
127 (avxf_a[0] * avxf_b[0]) + avxf_c[0]);
134 avxf res =
nmadd(avxf_a, avxf_b, avxf_c);
135 avxf exp =
avxf(avxf_c[7] - (avxf_a[7] * avxf_b[7]),
136 avxf_c[6] - (avxf_a[6] * avxf_b[6]),
137 avxf_c[5] - (avxf_a[5] * avxf_b[5]),
138 avxf_c[4] - (avxf_a[4] * avxf_b[4]),
139 avxf_c[3] - (avxf_a[3] * avxf_b[3]),
140 avxf_c[2] - (avxf_a[2] * avxf_b[2]),
141 avxf_c[1] - (avxf_a[1] * avxf_b[1]),
142 avxf_c[0] - (avxf_a[0] * avxf_b[0]));
149 avxf a(0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f);
150 avxf b(7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f);
153 a[0] <=
b[0] ? -1 : 0,
154 a[1] <=
b[1] ? -1 : 0,
155 a[2] <=
b[2] ? -1 : 0,
156 a[3] <=
b[3] ? -1 : 0,
157 a[4] <=
b[4] ? -1 : 0,
158 a[5] <=
b[5] ? -1 : 0,
159 a[6] <=
b[6] ? -1 : 0,
160 a[7] <=
b[7] ? -1 : 0,
168 avxf res = permute<3, 0, 1, 7, 6, 5, 2, 4>(avxf_b);
175 avxf res = blend<0, 0, 1, 0, 1, 0, 1, 0>(avxf_a, avxf_b);
182 avxf res = shuffle<0, 1, 2, 3, 1, 3, 2, 0>(avxf_a);
206 dot3(avxf_a, avxf_b, den, den2);
207 EXPECT_FLOAT_EQ(den, 14.9f);
208 EXPECT_FLOAT_EQ(den2, 2.9f);
__forceinline const avxf madd(const avxf &a, const avxf &b, const avxf &c)
Ternary Operators.
__forceinline const avxf msub(const avxf &a, const avxf &b, const avxf &c)
__forceinline const avxf nmadd(const avxf &a, const avxf &b, const avxf &c)
__forceinline void dot3(const avxf &a, const avxf &b, float &den, float &den2)
__forceinline const avxf mm256_sqrt(const avxf &a)
#define CCL_NAMESPACE_END
ccl_device_inline float3 exp(float3 v)
vec_base< T, 3 > cross(const vec_base< T, 3 > &a, const vec_base< T, 3 > &b)
static const pxr::TfToken b("b", pxr::TfToken::Immortal)
bool system_cpu_support_avx2()
bool system_cpu_support_avx()
#define TEST_CATEGORY_NAME
#define basic_test_vv(a, b, op)
static CCL_NAMESPACE_BEGIN bool validate_cpu_capabilities()
#define compare_vector_vector(a, b)
static const float float_b
TEST(TEST_CATEGORY_NAME, avxf_add_vv)
#define basic_test_vf(a, b, op)
#define compare_vector_vector_near(a, b, abserror)
#define compare_vector_scalar(a, b)