5 #ifndef __UTIL_AVXB_H__
6 #define __UTIL_AVXB_H__
44 :
m256(_mm256_insertf128_ps(_mm256_castps128_ps256(
a),
b, 1))
53 return _mm256_castps_si256(
m256);
57 return _mm256_castps_pd(
m256);
78 return (_mm256_movemask_ps(
m256) >> i) & 1;
93 return _mm256_xor_ps(
a,
avxb(True));
102 return _mm256_and_ps(
a,
b);
106 return _mm256_or_ps(
a,
b);
110 return _mm256_xor_ps(
a,
b);
136 return _mm256_xor_ps(
a,
b);
140 #ifdef __KERNEL_AVX2__
141 return _mm256_castsi256_ps(_mm256_cmpeq_epi32(
a,
b));
143 __m128i a_lo = _mm_castps_si128(_mm256_extractf128_ps(
a, 0));
144 __m128i a_hi = _mm_castps_si128(_mm256_extractf128_ps(
a, 1));
145 __m128i b_lo = _mm_castps_si128(_mm256_extractf128_ps(
b, 0));
146 __m128i b_hi = _mm_castps_si128(_mm256_extractf128_ps(
b, 1));
147 __m128i c_lo = _mm_cmpeq_epi32(a_lo, b_lo);
148 __m128i c_hi = _mm_cmpeq_epi32(a_hi, b_hi);
149 __m256i
result = _mm256_insertf128_si256(_mm256_castsi128_si256(c_lo), c_hi, 1);
150 return _mm256_castsi256_ps(
result);
156 #if defined(__KERNEL_SSE41__)
157 return _mm256_blendv_ps(f,
t, m);
159 return _mm256_or_ps(_mm256_and_ps(m,
t), _mm256_andnot_ps(m, f));
169 return _mm256_unpacklo_ps(
a,
b);
173 return _mm256_unpackhi_ps(
a,
b);
180 #if defined(__KERNEL_SSE41__)
183 return _mm_popcnt_u32(_mm256_movemask_ps(
a));
195 return _mm256_movemask_ps(
a) == 0xf;
199 return _mm256_movemask_ps(
a) != 0x0;
203 return _mm256_movemask_ps(
b) == 0xf;
207 return _mm256_movemask_ps(
b) != 0x0;
211 return _mm256_movemask_ps(
b) == 0x0;
216 return _mm256_movemask_ps(
a);
225 printf(
"%s: %d %d %d %d %d %d %d %d\n",
label,
a[0],
a[1],
a[2],
a[3],
a[4],
a[5],
a[6],
a[7]);
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_BOOL GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLushort pattern _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble u2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLdouble GLdouble v2 _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLdouble GLdouble nz _GL_VOID_RET _GL_VOID GLfloat GLfloat nz _GL_VOID_RET _GL_VOID GLint GLint nz _GL_VOID_RET _GL_VOID GLshort GLshort nz _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const GLfloat *values _GL_VOID_RET _GL_VOID GLsizei const GLushort *values _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID const GLuint const GLclampf *priorities _GL_VOID_RET _GL_VOID GLdouble y _GL_VOID_RET _GL_VOID GLfloat y _GL_VOID_RET _GL_VOID GLint y _GL_VOID_RET _GL_VOID GLshort y _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLfloat GLfloat z _GL_VOID_RET _GL_VOID GLint GLint z _GL_VOID_RET _GL_VOID GLshort GLshort z _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble w _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat w _GL_VOID_RET _GL_VOID GLint GLint GLint w _GL_VOID_RET _GL_VOID GLshort GLshort GLshort w _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble y2 _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat y2 _GL_VOID_RET _GL_VOID GLint GLint GLint y2 _GL_VOID_RET _GL_VOID GLshort GLshort GLshort y2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLuint *buffer _GL_VOID_RET _GL_VOID GLdouble t _GL_VOID_RET _GL_VOID GLfloat t _GL_VOID_RET _GL_VOID GLint t _GL_VOID_RET _GL_VOID GLshort t _GL_VOID_RET _GL_VOID GLdouble t
__forceinline const avxb operator|=(avxb &a, const avxb &b)
__forceinline uint32_t movemask(const avxb &a)
__forceinline bool any(const avxb &b)
__forceinline bool all(const avxb &b)
__forceinline const avxb operator==(const avxb &a, const avxb &b)
__forceinline bool reduce_and(const avxb &a)
__forceinline const avxb operator&(const avxb &a, const avxb &b)
Binary Operators.
__forceinline const avxb operator^=(avxb &a, const avxb &b)
__forceinline const avxb operator&=(avxb &a, const avxb &b)
Assignment Operators.
__forceinline const avxb unpacklo(const avxb &a, const avxb &b)
Movement/Shifting/Shuffling Functions.
__forceinline const avxb operator|(const avxb &a, const avxb &b)
__forceinline const avxb select(const avxb &m, const avxb &t, const avxb &f)
ccl_device_inline void print_avxb(const char *label, const avxb &a)
Debug Functions.
__forceinline const avxb operator^(const avxb &a, const avxb &b)
__forceinline const avxb operator!(const avxb &a)
Unary Operators.
__forceinline const avxb unpackhi(const avxb &a, const avxb &b)
__forceinline bool reduce_or(const avxb &a)
__forceinline const avxb operator!=(const avxb &a, const avxb &b)
Comparison Operators + Select.
__forceinline uint32_t popcnt(const avxb &a)
Reduction Operations.
__forceinline bool none(const avxb &b)
#define ccl_device_inline
#define CCL_NAMESPACE_END
SyclQueue void void size_t num_bytes void
ccl_global KernelShaderEvalInput * input
static const pxr::TfToken b("b", pxr::TfToken::Immortal)
__forceinline avxb(const __m256 input)
__forceinline avxb(TrueTy)
__forceinline avxb(const __m128 &a, const __m128 &b)
__forceinline avxb & operator=(const avxb &other)
__forceinline avxb(FalseTy)
Constants.
__forceinline avxb()
Constructors, Assignment & Cast Operators.
__forceinline bool operator[](const size_t i) const
Array Access.
__forceinline int32_t & operator[](const size_t i)
__forceinline avxb(const avxb &other)