Blender  V3.3
math_fast.h
Go to the documentation of this file.
1 /* SPDX-License-Identifier: BSD-3-Clause
2  *
3  * Adapted from OpenImageIO
4  * Copyright 2008-2014 Larry Gritz and the other authors and contributors.
5  * All Rights Reserved.
6  *
7  * A few bits here are based upon code from NVIDIA that was also released
8  * under the same modified BSD license, and marked as:
9  * Copyright 2004 NVIDIA Corporation. All Rights Reserved.
10  *
11  * Some parts of this file were first open-sourced in Open Shading Language,
12  * then later moved here. The original copyright notice was:
13  * Copyright (c) 2009-2014 Sony Pictures Imageworks Inc., et al.
14  *
15  * Many of the math functions were copied from or inspired by other
16  * public domain sources or open source packages with compatible licenses.
17  * The individual functions give references were applicable.
18  */
19 
20 #ifndef __UTIL_FAST_MATH__
21 #define __UTIL_FAST_MATH__
22 
24 
25 ccl_device_inline float madd(const float a, const float b, const float c)
26 {
27  /* NOTE: In the future we may want to explicitly ask for a fused
28  * multiply-add in a specialized version for float.
29  *
30  * NOTE: GCC/ICC will turn this (for float) into a FMA unless
31  * explicitly asked not to, clang seems to leave the code alone.
32  */
33  return a * b + c;
34 }
35 
37 {
38  return a * b + c;
39 }
40 
41 /*
42  * FAST & APPROXIMATE MATH
43  *
44  * The functions named "fast_*" provide a set of replacements to libm that
45  * are much faster at the expense of some accuracy and robust handling of
46  * extreme values. One design goal for these approximation was to avoid
47  * branches as much as possible and operate on single precision values only
48  * so that SIMD versions should be straightforward ports We also try to
49  * implement "safe" semantics (ie: clamp to valid range where possible)
50  * natively since wrapping these inline calls in another layer would be
51  * wasteful.
52  *
53  * Some functions are fast_safe_*, which is both a faster approximation as
54  * well as clamped input domain to ensure no NaN, Inf, or divide by zero.
55  */
56 
57 /* Round to nearest integer, returning as an int. */
59 {
60  /* used by sin/cos/tan range reduction. */
61 #ifdef __KERNEL_SSE4__
62  /* Single `roundps` instruction on SSE4.1+ (for gcc/clang at least). */
63  return float_to_int(rintf(x));
64 #else
65  /* emulate rounding by adding/subtracting 0.5. */
66  return float_to_int(x + copysignf(0.5f, x));
67 #endif
68 }
69 
70 ccl_device float fast_sinf(float x)
71 {
72  /* Very accurate argument reduction from SLEEF,
73  * starts failing around x=262000
74  *
75  * Results on: [-2pi,2pi].
76  *
77  * Examined 2173837240 values of sin: 0.00662760244 avg ulp diff, 2 max ulp,
78  * 1.19209e-07 max error
79  */
80  int q = fast_rint(x * M_1_PI_F);
81  float qf = (float)q;
82  x = madd(qf, -0.78515625f * 4, x);
83  x = madd(qf, -0.00024187564849853515625f * 4, x);
84  x = madd(qf, -3.7747668102383613586e-08f * 4, x);
85  x = madd(qf, -1.2816720341285448015e-12f * 4, x);
86  x = M_PI_2_F - (M_PI_2_F - x); /* Crush denormals */
87  float s = x * x;
88  if ((q & 1) != 0)
89  x = -x;
90  /* This polynomial approximation has very low error on [-pi/2,+pi/2]
91  * 1.19209e-07 max error in total over [-2pi,+2pi]. */
92  float u = 2.6083159809786593541503e-06f;
93  u = madd(u, s, -0.0001981069071916863322258f);
94  u = madd(u, s, +0.00833307858556509017944336f);
95  u = madd(u, s, -0.166666597127914428710938f);
96  u = madd(s, u * x, x);
97  /* For large x, the argument reduction can fail and the polynomial can be
98  * evaluated with arguments outside the valid internal. Just clamp the bad
99  * values away (setting to 0.0f means no branches need to be generated). */
100  if (fabsf(u) > 1.0f) {
101  u = 0.0f;
102  }
103  return u;
104 }
105 
106 ccl_device float fast_cosf(float x)
107 {
108  /* Same argument reduction as fast_sinf(). */
109  int q = fast_rint(x * M_1_PI_F);
110  float qf = (float)q;
111  x = madd(qf, -0.78515625f * 4, x);
112  x = madd(qf, -0.00024187564849853515625f * 4, x);
113  x = madd(qf, -3.7747668102383613586e-08f * 4, x);
114  x = madd(qf, -1.2816720341285448015e-12f * 4, x);
115  x = M_PI_2_F - (M_PI_2_F - x); /* Crush denormals. */
116  float s = x * x;
117  /* Polynomial from SLEEF's sincosf, max error is
118  * 4.33127e-07 over [-2pi,2pi] (98% of values are "exact"). */
119  float u = -2.71811842367242206819355e-07f;
120  u = madd(u, s, +2.47990446951007470488548e-05f);
121  u = madd(u, s, -0.00138888787478208541870117f);
122  u = madd(u, s, +0.0416666641831398010253906f);
123  u = madd(u, s, -0.5f);
124  u = madd(u, s, +1.0f);
125  if ((q & 1) != 0) {
126  u = -u;
127  }
128  if (fabsf(u) > 1.0f) {
129  u = 0.0f;
130  }
131  return u;
132 }
133 
134 ccl_device void fast_sincosf(float x, ccl_private float *sine, ccl_private float *cosine)
135 {
136  /* Same argument reduction as fast_sin. */
137  int q = fast_rint(x * M_1_PI_F);
138  float qf = (float)q;
139  x = madd(qf, -0.78515625f * 4, x);
140  x = madd(qf, -0.00024187564849853515625f * 4, x);
141  x = madd(qf, -3.7747668102383613586e-08f * 4, x);
142  x = madd(qf, -1.2816720341285448015e-12f * 4, x);
143  x = M_PI_2_F - (M_PI_2_F - x); // crush denormals
144  float s = x * x;
145  /* NOTE: same exact polynomials as fast_sinf() and fast_cosf() above. */
146  if ((q & 1) != 0) {
147  x = -x;
148  }
149  float su = 2.6083159809786593541503e-06f;
150  su = madd(su, s, -0.0001981069071916863322258f);
151  su = madd(su, s, +0.00833307858556509017944336f);
152  su = madd(su, s, -0.166666597127914428710938f);
153  su = madd(s, su * x, x);
154  float cu = -2.71811842367242206819355e-07f;
155  cu = madd(cu, s, +2.47990446951007470488548e-05f);
156  cu = madd(cu, s, -0.00138888787478208541870117f);
157  cu = madd(cu, s, +0.0416666641831398010253906f);
158  cu = madd(cu, s, -0.5f);
159  cu = madd(cu, s, +1.0f);
160  if ((q & 1) != 0) {
161  cu = -cu;
162  }
163  if (fabsf(su) > 1.0f) {
164  su = 0.0f;
165  }
166  if (fabsf(cu) > 1.0f) {
167  cu = 0.0f;
168  }
169  *sine = su;
170  *cosine = cu;
171 }
172 
173 /* NOTE: this approximation is only valid on [-8192.0,+8192.0], it starts
174  * becoming really poor outside of this range because the reciprocal amplifies
175  * errors.
176  */
177 ccl_device float fast_tanf(float x)
178 {
179  /* Derived from SLEEF implementation.
180  *
181  * Note that we cannot apply the "denormal crush" trick everywhere because
182  * we sometimes need to take the reciprocal of the polynomial
183  */
184  int q = fast_rint(x * 2.0f * M_1_PI_F);
185  float qf = (float)q;
186  x = madd(qf, -0.78515625f * 2, x);
187  x = madd(qf, -0.00024187564849853515625f * 2, x);
188  x = madd(qf, -3.7747668102383613586e-08f * 2, x);
189  x = madd(qf, -1.2816720341285448015e-12f * 2, x);
190  if ((q & 1) == 0) {
191  /* Crush denormals (only if we aren't inverting the result later). */
192  x = M_PI_4_F - (M_PI_4_F - x);
193  }
194  float s = x * x;
195  float u = 0.00927245803177356719970703f;
196  u = madd(u, s, 0.00331984995864331722259521f);
197  u = madd(u, s, 0.0242998078465461730957031f);
198  u = madd(u, s, 0.0534495301544666290283203f);
199  u = madd(u, s, 0.133383005857467651367188f);
200  u = madd(u, s, 0.333331853151321411132812f);
201  u = madd(s, u * x, x);
202  if ((q & 1) != 0) {
203  u = -1.0f / u;
204  }
205  return u;
206 }
207 
208 /* Fast, approximate sin(x*M_PI) with maximum absolute error of 0.000918954611.
209  *
210  * Adapted from http://devmaster.net/posts/9648/fast-and-accurate-sine-cosine#comment-76773
211  */
213 {
214  /* Fast trick to strip the integral part off, so our domain is [-1, 1]. */
215  const float z = x - ((x + 25165824.0f) - 25165824.0f);
216  const float y = z - z * fabsf(z);
217  const float Q = 3.10396624f;
218  const float P = 3.584135056f; /* P = 16-4*Q */
219  return y * (Q + P * fabsf(y));
220 
221  /* The original article used inferior constants for Q and P and
222  * so had max error 1.091e-3.
223  *
224  * The optimal value for Q was determined by exhaustive search, minimizing
225  * the absolute numerical error relative to float(std::sin(double(phi*M_PI)))
226  * over the interval [0,2] (which is where most of the invocations happen).
227  *
228  * The basic idea of this approximation starts with the coarse approximation:
229  * sin(pi*x) ~= f(x) = 4 * (x - x * abs(x))
230  *
231  * This approximation always _over_ estimates the target. On the other hand,
232  * the curve:
233  * sin(pi*x) ~= f(x) * abs(f(x)) / 4
234  *
235  * always lies _under_ the target. Thus we can simply numerically search for
236  * the optimal constant to LERP these curves into a more precise
237  * approximation.
238  *
239  * After folding the constants together and simplifying the resulting math,
240  * we end up with the compact implementation above.
241  *
242  * NOTE: this function actually computes sin(x * pi) which avoids one or two
243  * mults in many cases and guarantees exact values at integer periods.
244  */
245 }
246 
247 /* Fast approximate cos(x*M_PI) with ~0.1% absolute error. */
249 {
250  return fast_sinpif(x + 0.5f);
251 }
252 
253 ccl_device float fast_acosf(float x)
254 {
255  const float f = fabsf(x);
256  /* clamp and crush denormals. */
257  const float m = (f < 1.0f) ? 1.0f - (1.0f - f) : 1.0f;
258  /* Based on http://www.pouet.net/topic.php?which=9132&page=2
259  * 85% accurate (ulp 0)
260  * Examined 2130706434 values of acos:
261  * 15.2000597 avg ulp diff, 4492 max ulp, 4.51803e-05 max error // without "denormal crush"
262  * Examined 2130706434 values of acos:
263  * 15.2007108 avg ulp diff, 4492 max ulp, 4.51803e-05 max error // with "denormal crush"
264  */
265  const float a = sqrtf(1.0f - m) *
266  (1.5707963267f + m * (-0.213300989f + m * (0.077980478f + m * -0.02164095f)));
267  return x < 0 ? M_PI_F - a : a;
268 }
269 
270 ccl_device float fast_asinf(float x)
271 {
272  /* Based on acosf approximation above.
273  * Max error is 4.51133e-05 (ulps are higher because we are consistently off
274  * by a little amount).
275  */
276  const float f = fabsf(x);
277  /* Clamp and crush denormals. */
278  const float m = (f < 1.0f) ? 1.0f - (1.0f - f) : 1.0f;
279  const float a = M_PI_2_F -
280  sqrtf(1.0f - m) * (1.5707963267f +
281  m * (-0.213300989f + m * (0.077980478f + m * -0.02164095f)));
282  return copysignf(a, x);
283 }
284 
285 ccl_device float fast_atanf(float x)
286 {
287  const float a = fabsf(x);
288  const float k = a > 1.0f ? 1 / a : a;
289  const float s = 1.0f - (1.0f - k); /* Crush denormals. */
290  const float t = s * s;
291  /* http://mathforum.org/library/drmath/view/62672.html
292  * Examined 4278190080 values of atan:
293  * 2.36864877 avg ulp diff, 302 max ulp, 6.55651e-06 max error // (with denormals)
294  * Examined 4278190080 values of atan:
295  * 171160502 avg ulp diff, 855638016 max ulp, 6.55651e-06 max error // (crush denormals)
296  */
297  float r = s * madd(0.43157974f, t, 1.0f) / madd(madd(0.05831938f, t, 0.76443945f), t, 1.0f);
298  if (a > 1.0f) {
299  r = M_PI_2_F - r;
300  }
301  return copysignf(r, x);
302 }
303 
304 ccl_device float fast_atan2f(float y, float x)
305 {
306  /* Based on atan approximation above.
307  *
308  * The special cases around 0 and infinity were tested explicitly.
309  *
310  * The only case not handled correctly is x=NaN,y=0 which returns 0 instead
311  * of nan.
312  */
313  const float a = fabsf(x);
314  const float b = fabsf(y);
315 
316  const float k = (b == 0) ? 0.0f : ((a == b) ? 1.0f : (b > a ? a / b : b / a));
317  const float s = 1.0f - (1.0f - k); /* Crush denormals */
318  const float t = s * s;
319 
320  float r = s * madd(0.43157974f, t, 1.0f) / madd(madd(0.05831938f, t, 0.76443945f), t, 1.0f);
321 
322  if (b > a) {
323  /* Account for arg reduction. */
324  r = M_PI_2_F - r;
325  }
326  /* Test sign bit of x. */
327  if (__float_as_uint(x) & 0x80000000u) {
328  r = M_PI_F - r;
329  }
330  return copysignf(r, y);
331 }
332 
333 /* Based on:
334  *
335  * https://github.com/LiraNuna/glsl-sse2/blob/master/source/vec4.h
336  */
337 ccl_device float fast_log2f(float x)
338 {
339  /* NOTE: clamp to avoid special cases and make result "safe" from large
340  * negative values/NAN's. */
341  x = clamp(x, FLT_MIN, FLT_MAX);
342  unsigned bits = __float_as_uint(x);
343  int exponent = (int)(bits >> 23) - 127;
344  float f = __uint_as_float((bits & 0x007FFFFF) | 0x3f800000) - 1.0f;
345  /* Examined 2130706432 values of log2 on [1.17549435e-38,3.40282347e+38]:
346  * 0.0797524457 avg ulp diff, 3713596 max ulp, 7.62939e-06 max error.
347  * ulp histogram:
348  * 0 = 97.46%
349  * 1 = 2.29%
350  * 2 = 0.11%
351  */
352  float f2 = f * f;
353  float f4 = f2 * f2;
354  float hi = madd(f, -0.00931049621349f, 0.05206469089414f);
355  float lo = madd(f, 0.47868480909345f, -0.72116591947498f);
356  hi = madd(f, hi, -0.13753123777116f);
357  hi = madd(f, hi, 0.24187369696082f);
358  hi = madd(f, hi, -0.34730547155299f);
359  lo = madd(f, lo, 1.442689881667200f);
360  return ((f4 * hi) + (f * lo)) + exponent;
361 }
362 
364 {
365  /* Examined 2130706432 values of logf on [1.17549435e-38,3.40282347e+38]:
366  * 0.313865375 avg ulp diff, 5148137 max ulp, 7.62939e-06 max error.
367  */
368  return fast_log2f(x) * M_LN2_F;
369 }
370 
372 {
373  /* Examined 2130706432 values of log10f on [1.17549435e-38,3.40282347e+38]:
374  * 0.631237033 avg ulp diff, 4471615 max ulp, 3.8147e-06 max error.
375  */
376  return fast_log2f(x) * M_LN2_F / M_LN10_F;
377 }
378 
379 ccl_device float fast_logb(float x)
380 {
381  /* Don't bother with denormals. */
382  x = fabsf(x);
383  x = clamp(x, FLT_MIN, FLT_MAX);
384  unsigned bits = __float_as_uint(x);
385  return (float)((int)(bits >> 23) - 127);
386 }
387 
388 ccl_device float fast_exp2f(float x)
389 {
390  /* Clamp to safe range for final addition. */
391  x = clamp(x, -126.0f, 126.0f);
392  /* Range reduction. */
393  int m = (int)x;
394  x -= m;
395  x = 1.0f - (1.0f - x); /* Crush denormals (does not affect max ulps!). */
396  /* 5th degree polynomial generated with sollya
397  * Examined 2247622658 values of exp2 on [-126,126]: 2.75764912 avg ulp diff,
398  * 232 max ulp.
399  *
400  * ulp histogram:
401  * 0 = 87.81%
402  * 1 = 4.18%
403  */
404  float r = 1.33336498402e-3f;
405  r = madd(x, r, 9.810352697968e-3f);
406  r = madd(x, r, 5.551834031939e-2f);
407  r = madd(x, r, 0.2401793301105f);
408  r = madd(x, r, 0.693144857883f);
409  r = madd(x, r, 1.0f);
410  /* Multiply by 2 ^ m by adding in the exponent. */
411  /* NOTE: left-shift of negative number is undefined behavior. */
412  return __uint_as_float(__float_as_uint(r) + ((unsigned)m << 23));
413 }
414 
416 {
417  /* Examined 2237485550 values of exp on [-87.3300018,87.3300018]:
418  * 2.6666452 avg ulp diff, 230 max ulp.
419  */
420  return fast_exp2f(x / M_LN2_F);
421 }
422 
423 #if !defined(__KERNEL_GPU__) && !defined(_MSC_VER)
424 /* MSVC seems to have a code-gen bug here in at least SSE41/AVX, see
425  * T78047 and T78869 for details. Just disable for now, it only makes
426  * a small difference in denoising performance. */
428 {
429  const float4 one = make_float4(1.0f);
430  const float4 limit = make_float4(126.0f);
431  x = clamp(x, -limit, limit);
432  int4 m = make_int4(x);
433  x = one - (one - (x - make_float4(m)));
434  float4 r = make_float4(1.33336498402e-3f);
435  r = madd4(x, r, make_float4(9.810352697968e-3f));
436  r = madd4(x, r, make_float4(5.551834031939e-2f));
437  r = madd4(x, r, make_float4(0.2401793301105f));
438  r = madd4(x, r, make_float4(0.693144857883f));
439  r = madd4(x, r, make_float4(1.0f));
440  return __int4_as_float4(__float4_as_int4(r) + (m << 23));
441 }
442 
444 {
445  return fast_exp2f4(x / M_LN2_F);
446 }
447 #else
449 {
450  return make_float4(fast_expf(x.x), fast_expf(x.y), fast_expf(x.z), fast_expf(x.w));
451 }
452 #endif
453 
455 {
456  /* Examined 2217701018 values of exp10 on [-37.9290009,37.9290009]:
457  * 2.71732409 avg ulp diff, 232 max ulp.
458  */
459  return fast_exp2f(x * M_LN10_F / M_LN2_F);
460 }
461 
463 {
464  if (fabsf(x) < 1e-5f) {
465  x = 1.0f - (1.0f - x); /* Crush denormals. */
466  return madd(0.5f, x * x, x);
467  }
468  else {
469  return fast_expf(x) - 1.0f;
470  }
471 }
472 
473 ccl_device float fast_sinhf(float x)
474 {
475  float a = fabsf(x);
476  if (a > 1.0f) {
477  /* Examined 53389559 values of sinh on [1,87.3300018]:
478  * 33.6886442 avg ulp diff, 178 max ulp. */
479  float e = fast_expf(a);
480  return copysignf(0.5f * e - 0.5f / e, x);
481  }
482  else {
483  a = 1.0f - (1.0f - a); /* Crush denorms. */
484  float a2 = a * a;
485  /* Degree 7 polynomial generated with sollya. */
486  /* Examined 2130706434 values of sinh on [-1,1]: 1.19209e-07 max error. */
487  float r = 2.03945513931e-4f;
488  r = madd(r, a2, 8.32990277558e-3f);
489  r = madd(r, a2, 0.1666673421859f);
490  r = madd(r * a, a2, a);
491  return copysignf(r, x);
492  }
493 }
494 
496 {
497  /* Examined 2237485550 values of cosh on [-87.3300018,87.3300018]:
498  * 1.78256726 avg ulp diff, 178 max ulp.
499  */
500  float e = fast_expf(fabsf(x));
501  return 0.5f * e + 0.5f / e;
502 }
503 
505 {
506  /* Examined 4278190080 values of tanh on [-3.40282347e+38,3.40282347e+38]:
507  * 3.12924e-06 max error.
508  */
509  /* NOTE: ulp error is high because of sub-optimal handling around the origin. */
510  float e = fast_expf(2.0f * fabsf(x));
511  return copysignf(1.0f - 2.0f / (1.0f + e), x);
512 }
513 
514 ccl_device float fast_safe_powf(float x, float y)
515 {
516  if (y == 0)
517  return 1.0f; /* x^1=1 */
518  if (x == 0)
519  return 0.0f; /* 0^y=0 */
520  float sign = 1.0f;
521  if (x < 0.0f) {
522  /* if x is negative, only deal with integer powers
523  * powf returns NaN for non-integers, we will return 0 instead.
524  */
525  int ybits = __float_as_int(y) & 0x7fffffff;
526  if (ybits >= 0x4b800000) {
527  // always even int, keep positive
528  }
529  else if (ybits >= 0x3f800000) {
530  /* Bigger than 1, check. */
531  int k = (ybits >> 23) - 127; /* Get exponent. */
532  int j = ybits >> (23 - k); /* Shift out possible fractional bits. */
533  if ((j << (23 - k)) == ybits) { /* rebuild number and check for a match. */
534  /* +1 for even, -1 for odd. */
535  sign = __int_as_float(0x3f800000 | (j << 31));
536  }
537  else {
538  /* Not an integer. */
539  return 0.0f;
540  }
541  }
542  else {
543  /* Not an integer. */
544  return 0.0f;
545  }
546  }
547  return sign * fast_exp2f(y * fast_log2f(fabsf(x)));
548 }
549 
550 /* TODO(sergey): Check speed with our erf functions implementation from
551  * bsdf_microfacet.h.
552  */
553 
555 {
556  /* Examined 1082130433 values of erff on [0,4]: 1.93715e-06 max error. */
557  /* Abramowitz and Stegun, 7.1.28. */
558  const float a1 = 0.0705230784f;
559  const float a2 = 0.0422820123f;
560  const float a3 = 0.0092705272f;
561  const float a4 = 0.0001520143f;
562  const float a5 = 0.0002765672f;
563  const float a6 = 0.0000430638f;
564  const float a = fabsf(x);
565  if (a >= 12.3f) {
566  return copysignf(1.0f, x);
567  }
568  const float b = 1.0f - (1.0f - a); /* Crush denormals. */
569  const float r = madd(
570  madd(madd(madd(madd(madd(a6, b, a5), b, a4), b, a3), b, a2), b, a1), b, 1.0f);
571  const float s = r * r; /* ^2 */
572  const float t = s * s; /* ^4 */
573  const float u = t * t; /* ^8 */
574  const float v = u * u; /* ^16 */
575  return copysignf(1.0f - 1.0f / v, x);
576 }
577 
579 {
580  /* Examined 2164260866 values of erfcf on [-4,4]: 1.90735e-06 max error.
581  *
582  * ulp histogram:
583  *
584  * 0 = 80.30%
585  */
586  return 1.0f - fast_erff(x);
587 }
588 
590 {
591  /* From: Approximating the `erfinv` function by Mike Giles. */
592  /* To avoid trouble at the limit, clamp input to 1-eps. */
593  float a = fabsf(x);
594  if (a > 0.99999994f) {
595  a = 0.99999994f;
596  }
597  float w = -fast_logf((1.0f - a) * (1.0f + a)), p;
598  if (w < 5.0f) {
599  w = w - 2.5f;
600  p = 2.81022636e-08f;
601  p = madd(p, w, 3.43273939e-07f);
602  p = madd(p, w, -3.5233877e-06f);
603  p = madd(p, w, -4.39150654e-06f);
604  p = madd(p, w, 0.00021858087f);
605  p = madd(p, w, -0.00125372503f);
606  p = madd(p, w, -0.00417768164f);
607  p = madd(p, w, 0.246640727f);
608  p = madd(p, w, 1.50140941f);
609  }
610  else {
611  w = sqrtf(w) - 3.0f;
612  p = -0.000200214257f;
613  p = madd(p, w, 0.000100950558f);
614  p = madd(p, w, 0.00134934322f);
615  p = madd(p, w, -0.00367342844f);
616  p = madd(p, w, 0.00573950773f);
617  p = madd(p, w, -0.0076224613f);
618  p = madd(p, w, 0.00943887047f);
619  p = madd(p, w, 1.00167406f);
620  p = madd(p, w, 2.83297682f);
621  }
622  return p * x;
623 }
624 
626 
627 #endif /* __UTIL_FAST_MATH__ */
typedef float(TangentPoint)[2]
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_BOOL GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLushort pattern _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble u2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLdouble GLdouble v2 _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLdouble GLdouble nz _GL_VOID_RET _GL_VOID GLfloat GLfloat nz _GL_VOID_RET _GL_VOID GLint GLint nz _GL_VOID_RET _GL_VOID GLshort GLshort nz _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const GLfloat *values _GL_VOID_RET _GL_VOID GLsizei const GLushort *values _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID const GLuint const GLclampf *priorities _GL_VOID_RET _GL_VOID GLdouble y _GL_VOID_RET _GL_VOID GLfloat y _GL_VOID_RET _GL_VOID GLint y _GL_VOID_RET _GL_VOID GLshort y _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLfloat GLfloat z _GL_VOID_RET _GL_VOID GLint GLint z _GL_VOID_RET _GL_VOID GLshort GLshort z _GL_VOID_RET _GL_VOID GLdouble GLdouble z
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_BOOL GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLushort pattern _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble u2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLdouble GLdouble v2 _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLdouble GLdouble nz _GL_VOID_RET _GL_VOID GLfloat GLfloat nz _GL_VOID_RET _GL_VOID GLint GLint nz _GL_VOID_RET _GL_VOID GLshort GLshort nz _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const GLfloat *values _GL_VOID_RET _GL_VOID GLsizei const GLushort *values _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID const GLuint const GLclampf *priorities _GL_VOID_RET _GL_VOID GLdouble y _GL_VOID_RET _GL_VOID GLfloat y _GL_VOID_RET _GL_VOID GLint y _GL_VOID_RET _GL_VOID GLshort y _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLfloat GLfloat z _GL_VOID_RET _GL_VOID GLint GLint z _GL_VOID_RET _GL_VOID GLshort GLshort z _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble w _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat w _GL_VOID_RET _GL_VOID GLint GLint GLint w _GL_VOID_RET _GL_VOID GLshort GLshort GLshort w _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble y2 _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat y2 _GL_VOID_RET _GL_VOID GLint GLint GLint y2 _GL_VOID_RET _GL_VOID GLshort GLshort GLshort y2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLuint *buffer _GL_VOID_RET _GL_VOID GLdouble t _GL_VOID_RET _GL_VOID GLfloat t _GL_VOID_RET _GL_VOID GLint t _GL_VOID_RET _GL_VOID GLshort t _GL_VOID_RET _GL_VOID GLdouble GLdouble r _GL_VOID_RET _GL_VOID GLfloat GLfloat r _GL_VOID_RET _GL_VOID GLint GLint r _GL_VOID_RET _GL_VOID GLshort GLshort r _GL_VOID_RET _GL_VOID GLdouble GLdouble r
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint y
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_BOOL GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLushort pattern _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble u2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLdouble GLdouble v2 _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLdouble GLdouble nz _GL_VOID_RET _GL_VOID GLfloat GLfloat nz _GL_VOID_RET _GL_VOID GLint GLint nz _GL_VOID_RET _GL_VOID GLshort GLshort nz _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const GLfloat *values _GL_VOID_RET _GL_VOID GLsizei const GLushort *values _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID const GLuint const GLclampf *priorities _GL_VOID_RET _GL_VOID GLdouble y _GL_VOID_RET _GL_VOID GLfloat y _GL_VOID_RET _GL_VOID GLint y _GL_VOID_RET _GL_VOID GLshort y _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLfloat GLfloat z _GL_VOID_RET _GL_VOID GLint GLint z _GL_VOID_RET _GL_VOID GLshort GLshort z _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble w _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat w _GL_VOID_RET _GL_VOID GLint GLint GLint w _GL_VOID_RET _GL_VOID GLshort GLshort GLshort w _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble y2 _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat y2 _GL_VOID_RET _GL_VOID GLint GLint GLint y2 _GL_VOID_RET _GL_VOID GLshort GLshort GLshort y2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLuint *buffer _GL_VOID_RET _GL_VOID GLdouble t _GL_VOID_RET _GL_VOID GLfloat t _GL_VOID_RET _GL_VOID GLint t _GL_VOID_RET _GL_VOID GLshort t _GL_VOID_RET _GL_VOID GLdouble t
float float4[4]
ATTR_WARN_UNUSED_RESULT const BMVert const BMEdge * e
ATTR_WARN_UNUSED_RESULT const BMVert * v
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Definition: btQuadWord.h:119
#define ccl_device
Definition: cuda/compat.h:32
#define ccl_private
Definition: cuda/compat.h:48
#define ccl_device_inline
Definition: cuda/compat.h:34
#define CCL_NAMESPACE_END
Definition: cuda/compat.h:9
ccl_device float fast_exp2f(float x)
Definition: math_fast.h:388
ccl_device_inline float fast_cospif(float x)
Definition: math_fast.h:248
ccl_device_inline float fast_log10(float x)
Definition: math_fast.h:371
CCL_NAMESPACE_BEGIN ccl_device_inline float madd(const float a, const float b, const float c)
Definition: math_fast.h:25
ccl_device_inline int fast_rint(float x)
Definition: math_fast.h:58
ccl_device_inline float fast_logf(float x)
Definition: math_fast.h:363
ccl_device_inline float4 madd4(const float4 a, const float4 b, const float4 c)
Definition: math_fast.h:36
ccl_device void fast_sincosf(float x, ccl_private float *sine, ccl_private float *cosine)
Definition: math_fast.h:134
ccl_device float4 fast_exp2f4(float4 x)
Definition: math_fast.h:427
ccl_device_inline float fast_erfcf(float x)
Definition: math_fast.h:578
ccl_device float fast_acosf(float x)
Definition: math_fast.h:253
ccl_device float fast_sinpif(float x)
Definition: math_fast.h:212
ccl_device_inline float fast_erff(float x)
Definition: math_fast.h:554
ccl_device float fast_asinf(float x)
Definition: math_fast.h:270
ccl_device float fast_logb(float x)
Definition: math_fast.h:379
ccl_device float fast_tanf(float x)
Definition: math_fast.h:177
ccl_device_inline float fast_coshf(float x)
Definition: math_fast.h:495
ccl_device float fast_atan2f(float y, float x)
Definition: math_fast.h:304
ccl_device float fast_atanf(float x)
Definition: math_fast.h:285
ccl_device_inline float fast_tanhf(float x)
Definition: math_fast.h:504
ccl_device_inline float fast_expm1f(float x)
Definition: math_fast.h:462
ccl_device_inline float fast_ierff(float x)
Definition: math_fast.h:589
ccl_device_inline float fast_exp10(float x)
Definition: math_fast.h:454
ccl_device float fast_sinf(float x)
Definition: math_fast.h:70
ccl_device float fast_cosf(float x)
Definition: math_fast.h:106
ccl_device_inline float fast_expf(float x)
Definition: math_fast.h:415
ccl_device float fast_safe_powf(float x, float y)
Definition: math_fast.h:514
ccl_device float fast_log2f(float x)
Definition: math_fast.h:337
ccl_device_inline float4 fast_expf4(float4 x)
Definition: math_fast.h:443
ccl_device float fast_sinhf(float x)
Definition: math_fast.h:473
static float P(float k)
Definition: math_interp.c:25
#define make_int4(x, y, z, w)
Definition: metal/compat.h:208
#define copysignf(x, y)
Definition: metal/compat.h:220
#define make_float4(x, y, z, w)
Definition: metal/compat.h:205
#define fabsf(x)
Definition: metal/compat.h:219
#define sqrtf(x)
Definition: metal/compat.h:243
static unsigned c
Definition: RandGen.cpp:83
static unsigned a[3]
Definition: RandGen.cpp:78
double sign(double arg)
Definition: utility.h:250
T clamp(const T &a, const T &min, const T &max)
static const pxr::TfToken b("b", pxr::TfToken::Immortal)
ccl_device_inline float __uint_as_float(uint i)
Definition: util/math.h:273
#define M_LN10_F
Definition: util/math.h:77
ccl_device_inline uint __float_as_uint(float f)
Definition: util/math.h:263
ccl_device_inline int4 __float4_as_int4(float4 f)
Definition: util/math.h:284
ccl_device_inline int __float_as_int(float f)
Definition: util/math.h:243
ccl_device_inline int float_to_int(float f)
Definition: util/math.h:410
#define M_PI_2_F
Definition: util/math.h:37
#define M_PI_4_F
Definition: util/math.h:40
#define M_1_PI_F
Definition: util/math.h:43
#define M_LN2_F
Definition: util/math.h:74
#define M_PI_F
Definition: util/math.h:34
ccl_device_inline float4 __int4_as_float4(int4 i)
Definition: util/math.h:294
ccl_device_inline float __int_as_float(int i)
Definition: util/math.h:253