Blender  V3.3
simd.h
Go to the documentation of this file.
1 /* SPDX-License-Identifier: Apache-2.0
2  * Copyright 2011-2013 Intel Corporation
3  * Modifications Copyright 2014-2022 Blender Foundation. */
4 
5 #ifndef __UTIL_SIMD_TYPES_H__
6 #define __UTIL_SIMD_TYPES_H__
7 
8 #include <limits>
9 #include <stdint.h>
10 
11 #include "util/defines.h"
12 
13 /* SSE Intrinsics includes
14  *
15  * We assume __KERNEL_SSEX__ flags to have been defined at this point.
16  *
17  * MinGW64 has conflicting declarations for these SSE headers in <windows.h>.
18  * Since we can't avoid including <windows.h>, better only include that */
19 #if defined(FREE_WINDOWS64)
20 # include "util/windows.h"
21 #elif defined(_MSC_VER)
22 # include <intrin.h>
23 #elif (defined(__x86_64__) || defined(__i386__))
24 # include <x86intrin.h>
25 #elif defined(__KERNEL_NEON__)
26 # define SSE2NEON_PRECISE_MINMAX 1
27 # include <sse2neon.h>
28 #endif
29 
30 /* Floating Point Control, for Embree. */
31 #if defined(__x86_64__) || defined(_M_X64)
32 # define SIMD_SET_FLUSH_TO_ZERO \
33  _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); \
34  _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
35 #elif defined(__aarch64__) || defined(_M_ARM64)
36 # define _MM_FLUSH_ZERO_ON 24
37 # define __get_fpcr(__fpcr) __asm__ __volatile__("mrs %0,fpcr" : "=r"(__fpcr))
38 # define __set_fpcr(__fpcr) __asm__ __volatile__("msr fpcr,%0" : : "ri"(__fpcr))
39 # define SIMD_SET_FLUSH_TO_ZERO set_fz(_MM_FLUSH_ZERO_ON);
40 # define SIMD_GET_FLUSH_TO_ZERO get_fz(_MM_FLUSH_ZERO_ON)
41 #else
42 # define SIMD_SET_FLUSH_TO_ZERO
43 #endif
44 
46 
47 /* Data structures used by SSE classes. */
48 #ifdef __KERNEL_SSE2__
49 
50 extern const __m128 _mm_lookupmask_ps[16];
51 
52 static struct TrueTy {
53  __forceinline operator bool() const
54  {
55  return true;
56  }
58 
59 static struct FalseTy {
60  __forceinline operator bool() const
61  {
62  return false;
63  }
64 } False ccl_attr_maybe_unused;
65 
66 static struct ZeroTy {
67  __forceinline operator float() const
68  {
69  return 0;
70  }
71  __forceinline operator int() const
72  {
73  return 0;
74  }
76 
77 static struct OneTy {
78  __forceinline operator float() const
79  {
80  return 1;
81  }
82  __forceinline operator int() const
83  {
84  return 1;
85  }
87 
88 static struct NegInfTy {
89  __forceinline operator float() const
90  {
91  return -std::numeric_limits<float>::infinity();
92  }
93  __forceinline operator int() const
94  {
96  }
97 } neg_inf ccl_attr_maybe_unused;
98 
99 static struct PosInfTy {
100  __forceinline operator float() const
101  {
102  return std::numeric_limits<float>::infinity();
103  }
104  __forceinline operator int() const
105  {
107  }
109 
110 static struct StepTy {
111 } step ccl_attr_maybe_unused;
112 
113 #endif
114 #if defined(__aarch64__) || defined(_M_ARM64)
115 __forceinline int set_fz(uint32_t flag)
116 {
117  uint64_t old_fpcr, new_fpcr;
118  __get_fpcr(old_fpcr);
119  new_fpcr = old_fpcr | (1ULL << flag);
120  __set_fpcr(new_fpcr);
121  __get_fpcr(old_fpcr);
122  return old_fpcr == new_fpcr;
123 }
124 __forceinline int get_fz(uint32_t flag)
125 {
126  uint64_t cur_fpcr;
127  __get_fpcr(cur_fpcr);
128  return (cur_fpcr & (1ULL << flag)) > 0 ? 1 : 0;
129 }
130 #endif
131 
132 /* Utilities used by Neon */
133 #if defined(__KERNEL_NEON__)
134 template<class type, int i0, int i1, int i2, int i3> type shuffle_neon(const type &a)
135 {
136  if (i0 == i1 && i0 == i2 && i0 == i3) {
137  return type(vdupq_laneq_s32(int32x4_t(a), i0));
138  }
139  static const uint8_t tbl[16] = {(i0 * 4) + 0,
140  (i0 * 4) + 1,
141  (i0 * 4) + 2,
142  (i0 * 4) + 3,
143  (i1 * 4) + 0,
144  (i1 * 4) + 1,
145  (i1 * 4) + 2,
146  (i1 * 4) + 3,
147  (i2 * 4) + 0,
148  (i2 * 4) + 1,
149  (i2 * 4) + 2,
150  (i2 * 4) + 3,
151  (i3 * 4) + 0,
152  (i3 * 4) + 1,
153  (i3 * 4) + 2,
154  (i3 * 4) + 3};
155 
156  return type(vqtbl1q_s8(int8x16_t(a), *(uint8x16_t *)tbl));
157 }
158 
159 template<class type, int i0, int i1, int i2, int i3>
160 type shuffle_neon(const type &a, const type &b)
161 {
162  if (&a == &b) {
163  static const uint8_t tbl[16] = {(i0 * 4) + 0,
164  (i0 * 4) + 1,
165  (i0 * 4) + 2,
166  (i0 * 4) + 3,
167  (i1 * 4) + 0,
168  (i1 * 4) + 1,
169  (i1 * 4) + 2,
170  (i1 * 4) + 3,
171  (i2 * 4) + 0,
172  (i2 * 4) + 1,
173  (i2 * 4) + 2,
174  (i2 * 4) + 3,
175  (i3 * 4) + 0,
176  (i3 * 4) + 1,
177  (i3 * 4) + 2,
178  (i3 * 4) + 3};
179 
180  return type(vqtbl1q_s8(int8x16_t(b), *(uint8x16_t *)tbl));
181  }
182  else {
183 
184  static const uint8_t tbl[16] = {(i0 * 4) + 0,
185  (i0 * 4) + 1,
186  (i0 * 4) + 2,
187  (i0 * 4) + 3,
188  (i1 * 4) + 0,
189  (i1 * 4) + 1,
190  (i1 * 4) + 2,
191  (i1 * 4) + 3,
192  (i2 * 4) + 0 + 16,
193  (i2 * 4) + 1 + 16,
194  (i2 * 4) + 2 + 16,
195  (i2 * 4) + 3 + 16,
196  (i3 * 4) + 0 + 16,
197  (i3 * 4) + 1 + 16,
198  (i3 * 4) + 2 + 16,
199  (i3 * 4) + 3 + 16};
200 
201  return type(vqtbl2q_s8((int8x16x2_t){int8x16_t(a), int8x16_t(b)}, *(uint8x16_t *)tbl));
202  }
203 }
204 #endif /* __KERNEL_NEON */
205 
206 /* Intrinsics Functions
207  *
208  * For fast bit operations. */
209 
210 #if defined(__BMI__) && defined(__GNUC__)
211 # ifndef _tzcnt_u32
212 # define _tzcnt_u32 __tzcnt_u32
213 # endif
214 # ifndef _tzcnt_u64
215 # define _tzcnt_u64 __tzcnt_u64
216 # endif
217 #endif
218 
219 #if defined(__LZCNT__)
220 # define _lzcnt_u32 __lzcnt32
221 # define _lzcnt_u64 __lzcnt64
222 #endif
223 
224 #if defined(_WIN32) && !defined(__MINGW32__) && !defined(__clang__)
225 /* Intrinsic functions on Windows. */
227 {
228 # if defined(__KERNEL_AVX2__)
229  return _tzcnt_u32(v);
230 # else
231  unsigned long r = 0;
232  _BitScanForward(&r, v);
233  return r;
234 # endif
235 }
236 
238 {
239  unsigned long r = 0;
240  _BitScanReverse(&r, v);
241  return r;
242 }
243 
245 {
246  long r = v;
247  _bittestandcomplement(&r, i);
248  return r;
249 }
250 
252 {
253 # if defined(__KERNEL_AVX2__)
254  return _tzcnt_u32(v);
255 # else
256  return __bsf(v);
257 # endif
258 }
259 
260 # if defined(__KERNEL_64_BIT__)
261 
263 {
264 # if defined(__KERNEL_AVX2__)
265  return _tzcnt_u64(v);
266 # else
267  unsigned long r = 0;
268  _BitScanForward64(&r, v);
269  return r;
270 # endif
271 }
272 
274 {
275  unsigned long r = 0;
276  _BitScanReverse64(&r, v);
277  return r;
278 }
279 
281 {
282  uint64_t r = v;
283  _bittestandcomplement64((__int64 *)&r, i);
284  return r;
285 }
286 
288 {
289 # if defined(__KERNEL_AVX2__)
290 # if defined(__KERNEL_64_BIT__)
291  return _tzcnt_u64(v);
292 # else
293  return _tzcnt_u32(v);
294 # endif
295 # else
296  return __bsf(v);
297 # endif
298 }
299 
300 # endif /* __KERNEL_64_BIT__ */
301 
302 #elif (defined(__x86_64__) || defined(__i386__)) && defined(__KERNEL_SSE2__)
303 /* Intrinsic functions with x86 SSE. */
304 
306 {
307  uint32_t r = 0;
308  asm("bsf %1,%0" : "=r"(r) : "r"(v));
309  return r;
310 }
311 
313 {
314  uint32_t r = 0;
315  asm("bsr %1,%0" : "=r"(r) : "r"(v));
316  return r;
317 }
318 
320 {
321  uint32_t r = 0;
322  asm("btc %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags");
323  return r;
324 }
325 
326 # if (defined(__KERNEL_64_BIT__) || defined(__APPLE__)) && \
327  !(defined(__ILP32__) && defined(__x86_64__))
329 {
330  uint64_t r = 0;
331  asm("bsf %1,%0" : "=r"(r) : "r"(v));
332  return r;
333 }
334 # endif
335 
337 {
338  uint64_t r = 0;
339  asm("bsr %1,%0" : "=r"(r) : "r"(v));
340  return r;
341 }
342 
344 {
345  uint64_t r = 0;
346  asm("btc %1,%0" : "=r"(r) : "r"(i), "0"(v) : "flags");
347  return r;
348 }
349 
351 {
352 # if defined(__KERNEL_AVX2__)
353  return _tzcnt_u32(v);
354 # else
355  return __bsf(v);
356 # endif
357 }
358 
359 # if (defined(__KERNEL_64_BIT__) || defined(__APPLE__)) && \
360  !(defined(__ILP32__) && defined(__x86_64__))
362 {
363 # if defined(__KERNEL_AVX2__)
364 # if defined(__KERNEL_64_BIT__)
365  return _tzcnt_u64(v);
366 # else
367  return _tzcnt_u32(v);
368 # endif
369 # else
370  return __bsf(v);
371 # endif
372 }
373 # endif
374 
375 #else
376 /* Intrinsic functions fallback for arbitrary processor. */
378 {
379  for (int i = 0; i < 32; i++) {
380  if (x & (1U << i))
381  return i;
382  }
383  return 32;
384 }
385 
387 {
388  for (int i = 0; i < 32; i++) {
389  if (x & (1U << (31 - i)))
390  return (31 - i);
391  }
392  return 32;
393 }
394 
396 {
397  uint32_t mask = 1U << bit;
398  return x & (~mask);
399 }
400 
402 {
403  for (int i = 0; i < 64; i++) {
404  if (x & (1UL << i))
405  return i;
406  }
407  return 64;
408 }
409 
411 {
412  for (int i = 0; i < 64; i++) {
413  if (x & (1UL << (63 - i)))
414  return (63 - i);
415  }
416  return 64;
417 }
418 
420 {
421  uint64_t mask = 1UL << bit;
422  return x & (~mask);
423 }
424 
426 {
427  assert(value != 0);
428  uint32_t bit = 0;
429  while ((value & (1 << bit)) == 0) {
430  ++bit;
431  }
432  return bit;
433 }
434 
436 {
437  assert(value != 0);
438  uint64_t bit = 0;
439  while ((value & (1 << bit)) == 0) {
440  ++bit;
441  }
442  return bit;
443 }
444 
445 #endif /* Intrinsics */
446 
447 /* SSE compatibility.
448  *
449  * Various utilities to smooth over differences between SSE versions and
450  * implementations. */
451 #ifdef __KERNEL_SSE2__
452 
453 /* Test __KERNEL_SSE41__ for MSVC which does not define __SSE4_1__, and test
454  * __SSE4_1__ to avoid OpenImageIO conflicts with our emulation macros on other
455  * platforms when compiling code outside the kernel. */
456 # if !(defined(__KERNEL_SSE41__) || defined(__SSE4_1__) || defined(__SSE4_2__))
457 
458 /* Emulation of SSE4 functions with SSE2 */
459 
460 # define _MM_FROUND_TO_NEAREST_INT 0x00
461 # define _MM_FROUND_TO_NEG_INF 0x01
462 # define _MM_FROUND_TO_POS_INF 0x02
463 # define _MM_FROUND_TO_ZERO 0x03
464 # define _MM_FROUND_CUR_DIRECTION 0x04
465 
466 # undef _mm_blendv_ps
467 # define _mm_blendv_ps _mm_blendv_ps_emu
468 __forceinline __m128 _mm_blendv_ps_emu(__m128 value, __m128 input, __m128 mask)
469 {
470  __m128i isignmask = _mm_set1_epi32(0x80000000);
471  __m128 signmask = _mm_castsi128_ps(isignmask);
472  __m128i iandsign = _mm_castps_si128(_mm_and_ps(mask, signmask));
473  __m128i icmpmask = _mm_cmpeq_epi32(iandsign, isignmask);
474  __m128 cmpmask = _mm_castsi128_ps(icmpmask);
475  return _mm_or_ps(_mm_and_ps(cmpmask, input), _mm_andnot_ps(cmpmask, value));
476 }
477 
478 # undef _mm_blend_ps
479 # define _mm_blend_ps _mm_blend_ps_emu
480 __forceinline __m128 _mm_blend_ps_emu(__m128 value, __m128 input, const int mask)
481 {
482  assert(mask < 0x10);
483  return _mm_blendv_ps(value, input, _mm_lookupmask_ps[mask]);
484 }
485 
486 # undef _mm_blendv_epi8
487 # define _mm_blendv_epi8 _mm_blendv_epi8_emu
488 __forceinline __m128i _mm_blendv_epi8_emu(__m128i value, __m128i input, __m128i mask)
489 {
490  return _mm_or_si128(_mm_and_si128(mask, input), _mm_andnot_si128(mask, value));
491 }
492 
493 # undef _mm_min_epi32
494 # define _mm_min_epi32 _mm_min_epi32_emu
495 __forceinline __m128i _mm_min_epi32_emu(__m128i value, __m128i input)
496 {
497  return _mm_blendv_epi8(input, value, _mm_cmplt_epi32(value, input));
498 }
499 
500 # undef _mm_max_epi32
501 # define _mm_max_epi32 _mm_max_epi32_emu
502 __forceinline __m128i _mm_max_epi32_emu(__m128i value, __m128i input)
503 {
504  return _mm_blendv_epi8(value, input, _mm_cmplt_epi32(value, input));
505 }
506 
507 # ifndef __KERNEL_NEON__
508 # undef _mm_extract_epi32
509 # define _mm_extract_epi32 _mm_extract_epi32_emu
510 __forceinline int _mm_extract_epi32_emu(__m128i input, const int index)
511 {
512  switch (index) {
513  case 0:
514  return _mm_cvtsi128_si32(input);
515  case 1:
516  return _mm_cvtsi128_si32(_mm_shuffle_epi32(input, _MM_SHUFFLE(1, 1, 1, 1)));
517  case 2:
518  return _mm_cvtsi128_si32(_mm_shuffle_epi32(input, _MM_SHUFFLE(2, 2, 2, 2)));
519  case 3:
520  return _mm_cvtsi128_si32(_mm_shuffle_epi32(input, _MM_SHUFFLE(3, 3, 3, 3)));
521  default:
522  assert(false);
523  return 0;
524  }
525 }
526 # endif
527 
528 # undef _mm_insert_epi32
529 # define _mm_insert_epi32 _mm_insert_epi32_emu
530 __forceinline __m128i _mm_insert_epi32_emu(__m128i value, int input, const int index)
531 {
532  assert(index >= 0 && index < 4);
533  ((int *)&value)[index] = input;
534  return value;
535 }
536 
537 # undef _mm_insert_ps
538 # define _mm_insert_ps _mm_insert_ps_emu
539 __forceinline __m128 _mm_insert_ps_emu(__m128 value, __m128 input, const int index)
540 {
541  assert(index < 0x100);
542  ((float *)&value)[(index >> 4) & 0x3] = ((float *)&input)[index >> 6];
543  return _mm_andnot_ps(_mm_lookupmask_ps[index & 0xf], value);
544 }
545 
546 # undef _mm_round_ps
547 # define _mm_round_ps _mm_round_ps_emu
548 __forceinline __m128 _mm_round_ps_emu(__m128 value, const int flags)
549 {
550  switch (flags) {
551  case _MM_FROUND_TO_NEAREST_INT:
552  return _mm_cvtepi32_ps(_mm_cvtps_epi32(value));
553  case _MM_FROUND_TO_NEG_INF:
554  return _mm_cvtepi32_ps(_mm_cvtps_epi32(_mm_add_ps(value, _mm_set1_ps(-0.5f))));
555  case _MM_FROUND_TO_POS_INF:
556  return _mm_cvtepi32_ps(_mm_cvtps_epi32(_mm_add_ps(value, _mm_set1_ps(0.5f))));
557  case _MM_FROUND_TO_ZERO:
558  return _mm_cvtepi32_ps(_mm_cvttps_epi32(value));
559  }
560  return value;
561 }
562 
563 # endif /* !(defined(__KERNEL_SSE41__) || defined(__SSE4_1__) || defined(__SSE4_2__)) */
564 
565 /* Older GCC versions do not have _mm256_cvtss_f32 yet, so define it ourselves.
566  * _mm256_castps256_ps128 generates no instructions so this is just as efficient. */
567 # if defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__)
568 # undef _mm256_cvtss_f32
569 # define _mm256_cvtss_f32(a) (_mm_cvtss_f32(_mm256_castps256_ps128(a)))
570 # endif
571 
572 #endif /* __KERNEL_SSE2__ */
573 
574 /* quiet unused define warnings */
575 #if defined(__KERNEL_SSE2__) || defined(__KERNEL_SSE3__) || defined(__KERNEL_SSSE3__) || \
576  defined(__KERNEL_SSE41__) || defined(__KERNEL_AVX__) || defined(__KERNEL_AVX2__)
577 /* do nothing */
578 #endif
579 
581 
582 #endif /* __UTIL_SIMD_TYPES_H__ */
typedef float(TangentPoint)[2]
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_BOOL GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLushort pattern _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble u2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLdouble GLdouble v2 _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLdouble GLdouble nz _GL_VOID_RET _GL_VOID GLfloat GLfloat nz _GL_VOID_RET _GL_VOID GLint GLint nz _GL_VOID_RET _GL_VOID GLshort GLshort nz _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const GLfloat *values _GL_VOID_RET _GL_VOID GLsizei const GLushort *values _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID const GLuint const GLclampf *priorities _GL_VOID_RET _GL_VOID GLdouble y _GL_VOID_RET _GL_VOID GLfloat y _GL_VOID_RET _GL_VOID GLint y _GL_VOID_RET _GL_VOID GLshort y _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLfloat GLfloat z _GL_VOID_RET _GL_VOID GLint GLint z _GL_VOID_RET _GL_VOID GLshort GLshort z _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble w _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat w _GL_VOID_RET _GL_VOID GLint GLint GLint w _GL_VOID_RET _GL_VOID GLshort GLshort GLshort w _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble y2 _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat y2 _GL_VOID_RET _GL_VOID GLint GLint GLint y2 _GL_VOID_RET _GL_VOID GLshort GLshort GLshort y2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLuint *buffer _GL_VOID_RET _GL_VOID GLdouble t _GL_VOID_RET _GL_VOID GLfloat t _GL_VOID_RET _GL_VOID GLint t _GL_VOID_RET _GL_VOID GLshort t _GL_VOID_RET _GL_VOID GLdouble GLdouble r _GL_VOID_RET _GL_VOID GLfloat GLfloat r _GL_VOID_RET _GL_VOID GLint GLint r _GL_VOID_RET _GL_VOID GLshort GLshort r _GL_VOID_RET _GL_VOID GLdouble GLdouble r
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum type
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint i1
ATTR_WARN_UNUSED_RESULT const BMVert * v
#define CCL_NAMESPACE_END
Definition: cuda/compat.h:9
#define ccl_attr_maybe_unused
Definition: defines.h:68
ccl_global KernelShaderEvalInput * input
ccl_device_inline float4 mask(const int4 &mask, const float4 &a)
Definition: math_float4.h:513
static unsigned a[3]
Definition: RandGen.cpp:78
static const pxr::TfToken b("b", pxr::TfToken::Immortal)
#define __forceinline
CCL_NAMESPACE_BEGIN __forceinline uint32_t __bsf(const uint32_t x)
Definition: simd.h:377
__forceinline uint32_t __bsr(const uint32_t x)
Definition: simd.h:386
__forceinline uint32_t __btc(const uint32_t x, const uint32_t bit)
Definition: simd.h:395
__forceinline uint32_t bitscan(uint32_t value)
Definition: simd.h:425
#define min(a, b)
Definition: sort.c:35
unsigned int uint32_t
Definition: stdint.h:80
unsigned char uint8_t
Definition: stdint.h:78
unsigned __int64 uint64_t
Definition: stdint.h:90
float max