5 #ifndef __UTIL_SSEB_H__
6 #define __UTIL_SSEB_H__
10 #ifdef __KERNEL_SSE2__
53 return _mm_castps_si128(m128);
57 return _mm_castps_pd(m128);
61 : m128(_mm_lookupmask_ps[(size_t(
a) << 3) | (size_t(
a) << 2) | (size_t(
a) << 1) | size_t(
a)])
65 : m128(_mm_lookupmask_ps[(size_t(
b) << 3) | (size_t(
a) << 2) | (size_t(
b) << 1) | size_t(
a)])
69 : m128(_mm_lookupmask_ps[(size_t(
d) << 3) | (size_t(
c) << 2) | (size_t(
b) << 1) | size_t(
a)])
75 m128 = _mm_lookupmask_ps[
mask];
86 : m128(_mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())))
97 return (_mm_movemask_ps(m128) >> i) & 1;
112 return _mm_xor_ps(
a, sseb(True));
121 return _mm_and_ps(
a,
b);
125 return _mm_or_ps(
a,
b);
129 return _mm_xor_ps(
a,
b);
155 return _mm_xor_ps(
a,
b);
159 return _mm_castsi128_ps(_mm_cmpeq_epi32(
a,
b));
164 # if defined(__KERNEL_SSE41__)
165 return _mm_blendv_ps(f,
t, m);
167 return _mm_or_ps(_mm_and_ps(m,
t), _mm_andnot_ps(m, f));
177 return _mm_unpacklo_ps(
a,
b);
181 return _mm_unpackhi_ps(
a,
b);
184 template<
size_t i0,
size_t i1,
size_t i2,
size_t i3>
187 # ifdef __KERNEL_NEON__
188 return shuffle_neon<int32x4_t, i0, i1, i2, i3>(
a);
190 return _mm_castsi128_ps(_mm_shuffle_epi32(
a, _MM_SHUFFLE(i3, i2,
i1, i0)));
194 # ifndef __KERNEL_NEON__
197 return _mm_movelh_ps(
a,
a);
200 template<>
__forceinline const sseb shuffle<2, 3, 2, 3>(
const sseb &
a)
202 return _mm_movehl_ps(
a,
a);
206 template<
size_t i0,
size_t i1,
size_t i2,
size_t i3>
209 # ifdef __KERNEL_NEON__
210 return shuffle_neon<int32x4_t, i0, i1, i2, i3>(
a,
b);
212 return _mm_shuffle_ps(
a,
b, _MM_SHUFFLE(i3, i2,
i1, i0));
216 # ifndef __KERNEL_NEON__
219 return _mm_movelh_ps(
a,
b);
222 template<>
__forceinline const sseb shuffle<2, 3, 2, 3>(
const sseb &
a,
const sseb &
b)
224 return _mm_movehl_ps(
b,
a);
228 # if defined(__KERNEL_SSE3__) && !defined(__KERNEL_NEON__)
231 return _mm_moveldup_ps(
a);
235 return _mm_movehdup_ps(
a);
239 # if defined(__KERNEL_SSE41__)
240 template<
size_t dst,
size_t src,
size_t clr>
243 # ifdef __KERNEL_NEON__
251 return _mm_insert_ps(
a,
b, (dst << 4) | (
src << 6) | clr);
256 return insert<dst, src, 0>(
a,
b);
260 return insert<dst, 0>(
a, sseb(
b));
268 # if defined(__KERNEL_SSE41__)
271 # if defined(__KERNEL_NEON__)
272 const int32x4_t
mask = {1, 1, 1, 1};
273 int32x4_t
t = vandq_s32(vreinterpretq_s32_m128(
a.m128),
mask);
274 return vaddvq_s32(
t);
276 return _mm_popcnt_u32(_mm_movemask_ps(
a));
288 # if defined(__KERNEL_NEON__)
289 return vaddvq_s32(vreinterpretq_s32_m128(
a.m128)) == -4;
291 return _mm_movemask_ps(
a) == 0xf;
296 # if defined(__KERNEL_NEON__)
297 return vaddvq_s32(vreinterpretq_s32_m128(
a.m128)) != 0x0;
299 return _mm_movemask_ps(
a) != 0x0;
304 # if defined(__KERNEL_NEON__)
305 return vaddvq_s32(vreinterpretq_s32_m128(
b.m128)) == -4;
307 return _mm_movemask_ps(
b) == 0xf;
312 # if defined(__KERNEL_NEON__)
313 return vaddvq_s32(vreinterpretq_s32_m128(
b.m128)) != 0x0;
315 return _mm_movemask_ps(
b) != 0x0;
320 # if defined(__KERNEL_NEON__)
321 return vaddvq_s32(vreinterpretq_s32_m128(
b.m128)) == 0x0;
323 return _mm_movemask_ps(
b) == 0x0;
329 return _mm_movemask_ps(
a);
338 printf(
"%s: %d %d %d %d\n",
label,
a[0],
a[1],
a[2],
a[3]);
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint i1
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_BOOL GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLushort pattern _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble u2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLdouble GLdouble v2 _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLdouble GLdouble nz _GL_VOID_RET _GL_VOID GLfloat GLfloat nz _GL_VOID_RET _GL_VOID GLint GLint nz _GL_VOID_RET _GL_VOID GLshort GLshort nz _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const GLfloat *values _GL_VOID_RET _GL_VOID GLsizei const GLushort *values _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID const GLuint const GLclampf *priorities _GL_VOID_RET _GL_VOID GLdouble y _GL_VOID_RET _GL_VOID GLfloat y _GL_VOID_RET _GL_VOID GLint y _GL_VOID_RET _GL_VOID GLshort y _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLfloat GLfloat z _GL_VOID_RET _GL_VOID GLint GLint z _GL_VOID_RET _GL_VOID GLshort GLshort z _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble w _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat w _GL_VOID_RET _GL_VOID GLint GLint GLint w _GL_VOID_RET _GL_VOID GLshort GLshort GLshort w _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble y2 _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat y2 _GL_VOID_RET _GL_VOID GLint GLint GLint y2 _GL_VOID_RET _GL_VOID GLshort GLshort GLshort y2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLuint *buffer _GL_VOID_RET _GL_VOID GLdouble t _GL_VOID_RET _GL_VOID GLfloat t _GL_VOID_RET _GL_VOID GLint t _GL_VOID_RET _GL_VOID GLshort t _GL_VOID_RET _GL_VOID GLdouble t
__forceinline const avxb operator|=(avxb &a, const avxb &b)
__forceinline uint32_t movemask(const avxb &a)
__forceinline bool any(const avxb &b)
__forceinline bool all(const avxb &b)
__forceinline bool reduce_and(const avxb &a)
__forceinline const avxb operator&(const avxb &a, const avxb &b)
Binary Operators.
__forceinline const avxb operator^=(avxb &a, const avxb &b)
__forceinline const avxb operator&=(avxb &a, const avxb &b)
Assignment Operators.
__forceinline const avxb unpacklo(const avxb &a, const avxb &b)
Movement/Shifting/Shuffling Functions.
__forceinline const avxb operator|(const avxb &a, const avxb &b)
__forceinline const avxb select(const avxb &m, const avxb &t, const avxb &f)
__forceinline const avxb operator!(const avxb &a)
Unary Operators.
__forceinline const avxb unpackhi(const avxb &a, const avxb &b)
__forceinline bool reduce_or(const avxb &a)
__forceinline uint32_t popcnt(const avxb &a)
Reduction Operations.
__forceinline bool none(const avxb &b)
__forceinline const avxi shuffle< 0, 0, 2, 2 >(const avxi &b)
__forceinline const avxi shuffle< 0, 1, 0, 1 >(const avxi &b)
__forceinline const avxi shuffle< 1, 1, 3, 3 >(const avxi &b)
ATTR_WARN_UNUSED_RESULT const BMVert * v
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
btGeneric6DofConstraint & operator=(btGeneric6DofConstraint &other)
SIMD_FORCE_INLINE btVector3 & operator[](int i)
Get a mutable reference to a row of the matrix as a vector.
#define ccl_device_inline
#define CCL_NAMESPACE_END
SyclQueue void void * src
SyclQueue void void size_t num_bytes void
static void shuffle(float2 points[], int size, int rng_seed)
ccl_global KernelShaderEvalInput * input
ccl_device_inline float4 mask(const int4 &mask, const float4 &a)
Insertion insert(const float3 &point_prev, const float3 &handle_prev, const float3 &handle_next, const float3 &point_next, float parameter)
bool operator==(const AttributeIDRef &a, const AttributeIDRef &b)
GPUState operator^(const GPUState &a, const GPUState &b)
constexpr bool operator!=(StringRef a, StringRef b)
static const pxr::TfToken b("b", pxr::TfToken::Immortal)