Blender  V3.3
binning.cpp
Go to the documentation of this file.
1 /* SPDX-License-Identifier: Apache-2.0
2  * Adapted from code copyright 2009-2011 Intel Corporation
3  * Modifications Copyright 2012-2022 Blender Foundation. */
4 
5 //#define __KERNEL_SSE__
6 
7 #include "bvh/binning.h"
8 
9 #include <stdlib.h>
10 
11 #include "util/algorithm.h"
12 #include "util/boundbox.h"
13 #include "util/types.h"
14 
16 
17 /* SSE replacements */
18 
19 __forceinline void prefetch_L1(const void * /*ptr*/)
20 {
21 }
22 __forceinline void prefetch_L2(const void * /*ptr*/)
23 {
24 }
25 __forceinline void prefetch_L3(const void * /*ptr*/)
26 {
27 }
28 __forceinline void prefetch_NTA(const void * /*ptr*/)
29 {
30 }
31 
32 template<size_t src> __forceinline float extract(const int4 &b)
33 {
34  return b[src];
35 }
36 template<size_t dst> __forceinline const float4 insert(const float4 &a, const float b)
37 {
38  float4 r = a;
39  r[dst] = b;
40  return r;
41 }
42 
44 {
45  // return (int)__bsf(movemask(reduce_min(bestSAH) == bestSAH));
46 
47  float minSAH = min(bestSAH.x, min(bestSAH.y, bestSAH.z));
48 
49  if (bestSAH.x == minSAH)
50  return 0;
51  else if (bestSAH.y == minSAH)
52  return 1;
53  else
54  return 2;
55 }
56 
57 /* BVH Object Binning */
58 
60  BVHReference *prims,
61  const BVHUnaligned *unaligned_heuristic,
62  const Transform *aligned_space)
63  : BVHRange(job),
64  splitSAH(FLT_MAX),
65  dim(0),
66  pos(0),
67  unaligned_heuristic_(unaligned_heuristic),
68  aligned_space_(aligned_space)
69 {
70  if (aligned_space_ == NULL) {
71  bounds_ = bounds();
73  }
74  else {
75  /* TODO(sergey): With some additional storage we can avoid
76  * need in re-calculating this.
77  */
78  bounds_ = unaligned_heuristic->compute_aligned_boundbox(
79  *this, prims, *aligned_space, &cent_bounds_);
80  }
81 
82  /* compute number of bins to use and precompute scaling factor for binning */
83  num_bins = min(size_t(MAX_BINS), size_t(4.0f + 0.05f * size()));
85 
86  /* initialize binning counter and bounds */
87  BoundBox bin_bounds[MAX_BINS][4]; /* bounds for every bin in every dimension */
88  int4 bin_count[MAX_BINS]; /* number of primitives mapped to bin */
89 
90  for (size_t i = 0; i < num_bins; i++) {
91  bin_count[i] = make_int4(0);
92  bin_bounds[i][0] = bin_bounds[i][1] = bin_bounds[i][2] = BoundBox::empty;
93  }
94 
95  /* map geometry to bins, unrolled once */
96  {
97  int64_t i;
98 
99  for (i = 0; i < int64_t(size()) - 1; i += 2) {
100  prefetch_L2(&prims[start() + i + 8]);
101 
102  /* map even and odd primitive to bin */
103  const BVHReference &prim0 = prims[start() + i + 0];
104  const BVHReference &prim1 = prims[start() + i + 1];
105 
106  BoundBox bounds0 = get_prim_bounds(prim0);
107  BoundBox bounds1 = get_prim_bounds(prim1);
108 
109  int4 bin0 = get_bin(bounds0);
110  int4 bin1 = get_bin(bounds1);
111 
112  /* increase bounds for bins for even primitive */
113  int b00 = (int)extract<0>(bin0);
114  bin_count[b00][0]++;
115  bin_bounds[b00][0].grow(bounds0);
116  int b01 = (int)extract<1>(bin0);
117  bin_count[b01][1]++;
118  bin_bounds[b01][1].grow(bounds0);
119  int b02 = (int)extract<2>(bin0);
120  bin_count[b02][2]++;
121  bin_bounds[b02][2].grow(bounds0);
122 
123  /* increase bounds of bins for odd primitive */
124  int b10 = (int)extract<0>(bin1);
125  bin_count[b10][0]++;
126  bin_bounds[b10][0].grow(bounds1);
127  int b11 = (int)extract<1>(bin1);
128  bin_count[b11][1]++;
129  bin_bounds[b11][1].grow(bounds1);
130  int b12 = (int)extract<2>(bin1);
131  bin_count[b12][2]++;
132  bin_bounds[b12][2].grow(bounds1);
133  }
134 
135  /* for uneven number of primitives */
136  if (i < int64_t(size())) {
137  /* map primitive to bin */
138  const BVHReference &prim0 = prims[start() + i];
139  BoundBox bounds0 = get_prim_bounds(prim0);
140  int4 bin0 = get_bin(bounds0);
141 
142  /* increase bounds of bins */
143  int b00 = (int)extract<0>(bin0);
144  bin_count[b00][0]++;
145  bin_bounds[b00][0].grow(bounds0);
146  int b01 = (int)extract<1>(bin0);
147  bin_count[b01][1]++;
148  bin_bounds[b01][1].grow(bounds0);
149  int b02 = (int)extract<2>(bin0);
150  bin_count[b02][2]++;
151  bin_bounds[b02][2].grow(bounds0);
152  }
153  }
154 
155  /* sweep from right to left and compute parallel prefix of merged bounds */
156  float4 r_area[MAX_BINS]; /* area of bounds of primitives on the right */
157  float4 r_count[MAX_BINS]; /* number of primitives on the right */
158  int4 count = make_int4(0);
159 
163 
164  for (size_t i = num_bins - 1; i > 0; i--) {
165  count = count + bin_count[i];
166  r_count[i] = blocks(count);
167 
168  bx = merge(bx, bin_bounds[i][0]);
169  r_area[i][0] = bx.half_area();
170  by = merge(by, bin_bounds[i][1]);
171  r_area[i][1] = by.half_area();
172  bz = merge(bz, bin_bounds[i][2]);
173  r_area[i][2] = bz.half_area();
174  r_area[i][3] = r_area[i][2];
175  }
176 
177  /* sweep from left to right and compute SAH */
178  int4 ii = make_int4(1);
179  float4 bestSAH = make_float4(FLT_MAX);
180  int4 bestSplit = make_int4(-1);
181 
182  count = make_int4(0);
183 
184  bx = BoundBox::empty;
185  by = BoundBox::empty;
186  bz = BoundBox::empty;
187 
188  for (size_t i = 1; i < num_bins; i++, ii += make_int4(1)) {
189  count = count + bin_count[i - 1];
190 
191  bx = merge(bx, bin_bounds[i - 1][0]);
192  float Ax = bx.half_area();
193  by = merge(by, bin_bounds[i - 1][1]);
194  float Ay = by.half_area();
195  bz = merge(bz, bin_bounds[i - 1][2]);
196  float Az = bz.half_area();
197 
198  float4 lCount = blocks(count);
199  float4 lArea = make_float4(Ax, Ay, Az, Az);
200  float4 sah = lArea * lCount + r_area[i] * r_count[i];
201 
202  bestSplit = select(sah < bestSAH, ii, bestSplit);
203  bestSAH = min(sah, bestSAH);
204  }
205 
207  bestSAH = insert<3>(select(mask, make_float4(FLT_MAX), bestSAH), FLT_MAX);
208 
209  /* find best dimension */
210  dim = get_best_dimension(bestSAH);
211  splitSAH = bestSAH[dim];
212  pos = bestSplit[dim];
214 }
215 
217  BVHObjectBinning &left_o,
218  BVHObjectBinning &right_o) const
219 {
220  size_t N = size();
221 
222  BoundBox lgeom_bounds = BoundBox::empty;
223  BoundBox rgeom_bounds = BoundBox::empty;
224  BoundBox lcent_bounds = BoundBox::empty;
225  BoundBox rcent_bounds = BoundBox::empty;
226 
227  int64_t l = 0, r = N - 1;
228 
229  while (l <= r) {
230  prefetch_L2(&prims[start() + l + 8]);
231  prefetch_L2(&prims[start() + r - 8]);
232 
233  BVHReference prim = prims[start() + l];
235  float3 unaligned_center = unaligned_bounds.center2();
236  float3 center = prim.bounds().center2();
237 
238  if (get_bin(unaligned_center)[dim] < pos) {
239  lgeom_bounds.grow(prim.bounds());
240  lcent_bounds.grow(center);
241  l++;
242  }
243  else {
244  rgeom_bounds.grow(prim.bounds());
245  rcent_bounds.grow(center);
246  swap(prims[start() + l], prims[start() + r]);
247  r--;
248  }
249  }
250  /* finish */
251  if (l != 0 && N - 1 - r != 0) {
252  right_o = BVHObjectBinning(BVHRange(rgeom_bounds, rcent_bounds, start() + l, N - 1 - r),
253  prims);
254  left_o = BVHObjectBinning(BVHRange(lgeom_bounds, lcent_bounds, start(), l), prims);
255  return;
256  }
257 
258  /* object medium split if we did not make progress, can happen when all
259  * primitives have same centroid */
260  lgeom_bounds = BoundBox::empty;
261  rgeom_bounds = BoundBox::empty;
262  lcent_bounds = BoundBox::empty;
263  rcent_bounds = BoundBox::empty;
264 
265  for (size_t i = 0; i < N / 2; i++) {
266  lgeom_bounds.grow(prims[start() + i].bounds());
267  lcent_bounds.grow(prims[start() + i].bounds().center2());
268  }
269 
270  for (size_t i = N / 2; i < N; i++) {
271  rgeom_bounds.grow(prims[start() + i].bounds());
272  rcent_bounds.grow(prims[start() + i].bounds().center2());
273  }
274 
275  right_o = BVHObjectBinning(BVHRange(rgeom_bounds, rcent_bounds, start() + N / 2, N / 2 + N % 2),
276  prims);
277  left_o = BVHObjectBinning(BVHRange(lgeom_bounds, lcent_bounds, start(), N / 2), prims);
278 }
279 
void swap(T &a, T &b)
Definition: Common.h:19
NSNotificationCenter * center
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_BOOL GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLushort pattern _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble u2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLdouble GLdouble v2 _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLdouble GLdouble nz _GL_VOID_RET _GL_VOID GLfloat GLfloat nz _GL_VOID_RET _GL_VOID GLint GLint nz _GL_VOID_RET _GL_VOID GLshort GLshort nz _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const GLfloat *values _GL_VOID_RET _GL_VOID GLsizei const GLushort *values _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID const GLuint const GLclampf *priorities _GL_VOID_RET _GL_VOID GLdouble y _GL_VOID_RET _GL_VOID GLfloat y _GL_VOID_RET _GL_VOID GLint y _GL_VOID_RET _GL_VOID GLshort y _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLfloat GLfloat z _GL_VOID_RET _GL_VOID GLint GLint z _GL_VOID_RET _GL_VOID GLshort GLshort z _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble w _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat w _GL_VOID_RET _GL_VOID GLint GLint GLint w _GL_VOID_RET _GL_VOID GLshort GLshort GLshort w _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble y2 _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat y2 _GL_VOID_RET _GL_VOID GLint GLint GLint y2 _GL_VOID_RET _GL_VOID GLshort GLshort GLshort y2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLdouble GLdouble z _GL_VOID_RET _GL_VOID GLuint *buffer _GL_VOID_RET _GL_VOID GLdouble t _GL_VOID_RET _GL_VOID GLfloat t _GL_VOID_RET _GL_VOID GLint t _GL_VOID_RET _GL_VOID GLshort t _GL_VOID_RET _GL_VOID GLdouble GLdouble r _GL_VOID_RET _GL_VOID GLfloat GLfloat r _GL_VOID_RET _GL_VOID GLint GLint r _GL_VOID_RET _GL_VOID GLshort GLshort r _GL_VOID_RET _GL_VOID GLdouble GLdouble r
float float4[4]
__forceinline const avxb select(const avxb &m, const avxb &t, const avxb &f)
Definition: avxb.h:154
__forceinline float extract< 0 >(const avxf &a)
Definition: avxf.h:259
CCL_NAMESPACE_BEGIN __forceinline void prefetch_L1(const void *)
Definition: binning.cpp:19
__forceinline float extract(const int4 &b)
Definition: binning.cpp:32
__forceinline void prefetch_L2(const void *)
Definition: binning.cpp:22
__forceinline void prefetch_NTA(const void *)
Definition: binning.cpp:28
__forceinline void prefetch_L3(const void *)
Definition: binning.cpp:25
__forceinline const float4 insert(const float4 &a, const float b)
Definition: binning.cpp:36
__forceinline int get_best_dimension(const float4 &bestSAH)
Definition: binning.cpp:43
ATTR_WARN_UNUSED_RESULT const BMLoop * l
__forceinline BoundBox merge(const BoundBox &bbox, const float3 &pt)
Definition: boundbox.h:161
float3 scale
Definition: binning.h:49
void split(BVHReference *prims, BVHObjectBinning &left_o, BVHObjectBinning &right_o) const
Definition: binning.cpp:216
__forceinline const BoundBox & unaligned_bounds()
Definition: binning.h:37
__forceinline int4 get_bin(const BoundBox &box) const
Definition: binning.h:62
__forceinline BVHObjectBinning()
Definition: binning.h:26
float leafSAH
Definition: binning.h:43
float splitSAH
Definition: binning.h:42
BoundBox bounds_
Definition: binning.h:52
__forceinline BoundBox get_prim_bounds(const BVHReference &prim) const
Definition: binning.h:89
__forceinline float4 blocks(const int4 &a) const
Definition: binning.h:78
const Transform * aligned_space_
Definition: binning.h:56
size_t num_bins
Definition: binning.h:48
BoundBox cent_bounds_
Definition: binning.h:53
__forceinline int size() const
Definition: params.h:289
__forceinline int start() const
Definition: params.h:285
__forceinline const BoundBox & cent_bounds() const
Definition: params.h:281
__forceinline BVHRange()
Definition: params.h:253
__forceinline const BoundBox & bounds() const
Definition: params.h:277
__forceinline const BoundBox & bounds() const
Definition: params.h:203
BoundBox compute_aligned_boundbox(const BVHObjectBinning &range, const BVHReference *references, const Transform &aligned_space, BoundBox *cent_bounds=NULL) const
Definition: unaligned.cpp:99
#define CCL_NAMESPACE_END
Definition: cuda/compat.h:9
SyclQueue void void * src
uint pos
int count
ccl_device_inline float3 rcp(const float3 &a)
Definition: math_float3.h:377
ccl_device_inline float4 zero_float4()
Definition: math_float4.h:92
ccl_device_inline float4 mask(const int4 &mask, const float4 &a)
Definition: math_float4.h:513
#define N
#define make_int4(x, y, z, w)
Definition: metal/compat.h:208
#define make_float4(x, y, z, w)
Definition: metal/compat.h:205
#define make_float3(x, y, z)
Definition: metal/compat.h:204
static unsigned a[3]
Definition: RandGen.cpp:78
static const pxr::TfToken b("b", pxr::TfToken::Immortal)
#define __forceinline
#define min(a, b)
Definition: sort.c:35
__int64 int64_t
Definition: stdint.h:89
__forceinline float half_area() const
Definition: boundbox.h:108
@ empty
Definition: boundbox.h:35
__forceinline float3 size() const
Definition: boundbox.h:124
__forceinline void grow(const float3 &pt)
Definition: boundbox.h:42
__forceinline float3 center2() const
Definition: boundbox.h:119
ccl_device_inline float4 float3_to_float4(const float3 a)
Definition: util/math.h:505