Blender  V3.3
math_int4.h
Go to the documentation of this file.
1 /* SPDX-License-Identifier: Apache-2.0
2  * Copyright 2011-2022 Blender Foundation */
3 
4 #ifndef __UTIL_MATH_INT4_H__
5 #define __UTIL_MATH_INT4_H__
6 
7 #ifndef __UTIL_MATH_H__
8 # error "Do not include this file directly, include util/types.h instead."
9 #endif
10 
12 
13 /*******************************************************************************
14  * Declaration.
15  */
16 
17 #ifndef __KERNEL_GPU__
18 ccl_device_inline int4 operator+(const int4 &a, const int4 &b);
20 ccl_device_inline int4 operator>>(const int4 &a, int i);
21 ccl_device_inline int4 operator<<(const int4 &a, int i);
22 ccl_device_inline int4 operator<(const int4 &a, const int4 &b);
23 ccl_device_inline int4 operator>=(const int4 &a, const int4 &b);
24 ccl_device_inline int4 operator&(const int4 &a, const int4 &b);
27 ccl_device_inline int4 clamp(const int4 &a, const int4 &mn, const int4 &mx);
28 ccl_device_inline int4 select(const int4 &mask, const int4 &a, const int4 &b);
29 #endif /* __KERNEL_GPU__ */
30 
31 /*******************************************************************************
32  * Definition.
33  */
34 
35 #ifndef __KERNEL_GPU__
37 {
38 # ifdef __KERNEL_SSE__
39  return int4(_mm_add_epi32(a.m128, b.m128));
40 # else
41  return make_int4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
42 # endif
43 }
44 
46 {
47  return a = a + b;
48 }
49 
51 {
52 # ifdef __KERNEL_SSE__
53  return int4(_mm_srai_epi32(a.m128, i));
54 # else
55  return make_int4(a.x >> i, a.y >> i, a.z >> i, a.w >> i);
56 # endif
57 }
58 
60 {
61 # ifdef __KERNEL_SSE__
62  return int4(_mm_slli_epi32(a.m128, i));
63 # else
64  return make_int4(a.x << i, a.y << i, a.z << i, a.w << i);
65 # endif
66 }
67 
69 {
70 # ifdef __KERNEL_SSE__
71  return int4(_mm_cmplt_epi32(a.m128, b.m128));
72 # else
73  return make_int4(a.x < b.x, a.y < b.y, a.z < b.z, a.w < b.w);
74 # endif
75 }
76 
78 {
79 # ifdef __KERNEL_SSE__
80  return int4(_mm_xor_si128(_mm_set1_epi32(0xffffffff), _mm_cmplt_epi32(a.m128, b.m128)));
81 # else
82  return make_int4(a.x >= b.x, a.y >= b.y, a.z >= b.z, a.w >= b.w);
83 # endif
84 }
85 
87 {
88 # ifdef __KERNEL_SSE__
89  return int4(_mm_and_si128(a.m128, b.m128));
90 # else
91  return make_int4(a.x & b.x, a.y & b.y, a.z & b.z, a.w & b.w);
92 # endif
93 }
94 
96 {
97 # if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__)
98  return int4(_mm_min_epi32(a.m128, b.m128));
99 # else
100  return make_int4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w));
101 # endif
102 }
103 
105 {
106 # if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE41__)
107  return int4(_mm_max_epi32(a.m128, b.m128));
108 # else
109  return make_int4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w));
110 # endif
111 }
112 
113 ccl_device_inline int4 clamp(const int4 &a, const int4 &mn, const int4 &mx)
114 {
115  return min(max(a, mn), mx);
116 }
117 
118 ccl_device_inline int4 select(const int4 &mask, const int4 &a, const int4 &b)
119 {
120 # ifdef __KERNEL_SSE__
121  return int4(_mm_or_si128(_mm_and_si128(mask, a), _mm_andnot_si128(mask, b)));
122 # else
123  return make_int4(
124  (mask.x) ? a.x : b.x, (mask.y) ? a.y : b.y, (mask.z) ? a.z : b.z, (mask.w) ? a.w : b.w);
125 # endif
126 }
127 
129 {
130 # ifdef __KERNEL_SSE__
131  return int4(_mm_loadu_si128((__m128i *)v));
132 # else
133  return make_int4(v[0], v[1], v[2], v[3]);
134 # endif
135 }
136 #endif /* __KERNEL_GPU__ */
137 
139 
140 #endif /* __UTIL_MATH_INT4_H__ */
int int4[4]
ATTR_WARN_UNUSED_RESULT const BMVert * v
#define ccl_device_inline
Definition: cuda/compat.h:34
#define CCL_NAMESPACE_END
Definition: cuda/compat.h:9
ccl_device_inline float4 mask(const int4 &mask, const float4 &a)
Definition: math_float4.h:513
ccl_device_inline int4 load_int4(const int *v)
Definition: math_int4.h:128
ccl_device_inline int4 operator>>(const int4 &a, int i)
Definition: math_int4.h:50
ccl_device_inline int4 operator<<(const int4 &a, int i)
Definition: math_int4.h:59
ccl_device_inline int4 operator<(const int4 &a, const int4 &b)
Definition: math_int4.h:68
ccl_device_inline int4 operator&(const int4 &a, const int4 &b)
Definition: math_int4.h:86
ccl_device_inline int4 clamp(const int4 &a, const int4 &mn, const int4 &mx)
Definition: math_int4.h:113
ccl_device_inline int4 min(int4 a, int4 b)
Definition: math_int4.h:95
ccl_device_inline int4 max(int4 a, int4 b)
Definition: math_int4.h:104
CCL_NAMESPACE_BEGIN ccl_device_inline int4 operator+(const int4 &a, const int4 &b)
Definition: math_int4.h:36
ccl_device_inline int4 operator>=(const int4 &a, const int4 &b)
Definition: math_int4.h:77
ccl_device_inline int4 select(const int4 &mask, const int4 &a, const int4 &b)
Definition: math_int4.h:118
ccl_device_inline int4 operator+=(int4 &a, const int4 &b)
Definition: math_int4.h:45
#define make_int4(x, y, z, w)
Definition: metal/compat.h:208
static unsigned a[3]
Definition: RandGen.cpp:78
static const pxr::TfToken b("b", pxr::TfToken::Immortal)