Blender  V3.3
device/multi/device.cpp
Go to the documentation of this file.
1 /* SPDX-License-Identifier: Apache-2.0
2  * Copyright 2011-2022 Blender Foundation */
3 
4 #include "device/multi/device.h"
5 
6 #include <sstream>
7 #include <stdlib.h>
8 
9 #include "bvh/multi.h"
10 
11 #include "device/device.h"
12 #include "device/queue.h"
13 
14 #include "scene/geometry.h"
15 
16 #include "util/foreach.h"
17 #include "util/list.h"
18 #include "util/log.h"
19 #include "util/map.h"
20 #include "util/time.h"
21 
23 
24 class MultiDevice : public Device {
25  public:
26  struct SubDevice {
29  map<device_ptr, device_ptr> ptr_map;
31  };
32 
33  list<SubDevice> devices;
36 
39  {
40  foreach (const DeviceInfo &subinfo, info.multi_devices) {
41  /* Always add CPU devices at the back since GPU devices can change
42  * host memory pointers, which CPU uses as device pointer. */
43  SubDevice *sub;
44  if (subinfo.type == DEVICE_CPU) {
45  devices.emplace_back();
46  sub = &devices.back();
47  }
48  else {
49  devices.emplace_front();
50  sub = &devices.front();
51  }
52 
53  /* The pointer to 'sub->stats' will stay valid even after new devices
54  * are added, since 'devices' is a linked list. */
55  sub->device = Device::create(subinfo, sub->stats, profiler);
56  }
57 
58  /* Build a list of peer islands for the available render devices */
59  foreach (SubDevice &sub, devices) {
60  /* First ensure that every device is in at least once peer island */
61  if (sub.peer_island_index < 0) {
62  peer_islands.emplace_back();
63  sub.peer_island_index = (int)peer_islands.size() - 1;
64  peer_islands[sub.peer_island_index].push_back(&sub);
65  }
66 
67  if (!info.has_peer_memory) {
68  continue;
69  }
70 
71  /* Second check peer access between devices and fill up the islands accordingly */
72  foreach (SubDevice &peer_sub, devices) {
73  if (peer_sub.peer_island_index < 0 &&
74  peer_sub.device->info.type == sub.device->info.type &&
75  peer_sub.device->check_peer_access(sub.device)) {
76  peer_sub.peer_island_index = sub.peer_island_index;
77  peer_islands[sub.peer_island_index].push_back(&peer_sub);
78  }
79  }
80  }
81  }
82 
84  {
85  foreach (SubDevice &sub, devices)
86  delete sub.device;
87  }
88 
89  const string &error_message() override
90  {
91  error_msg.clear();
92 
93  foreach (SubDevice &sub, devices)
94  error_msg += sub.device->error_message();
95 
96  return error_msg;
97  }
98 
99  virtual BVHLayoutMask get_bvh_layout_mask() const override
100  {
101  BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_ALL;
102  BVHLayoutMask bvh_layout_mask_all = BVH_LAYOUT_NONE;
103  foreach (const SubDevice &sub_device, devices) {
104  BVHLayoutMask device_bvh_layout_mask = sub_device.device->get_bvh_layout_mask();
105  bvh_layout_mask &= device_bvh_layout_mask;
106  bvh_layout_mask_all |= device_bvh_layout_mask;
107  }
108 
109  /* With multiple OptiX devices, every device needs its own acceleration structure */
110  if (bvh_layout_mask == BVH_LAYOUT_OPTIX) {
111  return BVH_LAYOUT_MULTI_OPTIX;
112  }
113 
114  /* With multiple Metal devices, every device needs its own acceleration structure */
115  if (bvh_layout_mask == BVH_LAYOUT_METAL) {
116  return BVH_LAYOUT_MULTI_METAL;
117  }
118 
119  /* When devices do not share a common BVH layout, fall back to creating one for each */
120  const BVHLayoutMask BVH_LAYOUT_OPTIX_EMBREE = (BVH_LAYOUT_OPTIX | BVH_LAYOUT_EMBREE);
121  if ((bvh_layout_mask_all & BVH_LAYOUT_OPTIX_EMBREE) == BVH_LAYOUT_OPTIX_EMBREE) {
123  }
124  const BVHLayoutMask BVH_LAYOUT_METAL_EMBREE = (BVH_LAYOUT_METAL | BVH_LAYOUT_EMBREE);
125  if ((bvh_layout_mask_all & BVH_LAYOUT_METAL_EMBREE) == BVH_LAYOUT_METAL_EMBREE) {
127  }
128 
129  return bvh_layout_mask;
130  }
131 
132  bool load_kernels(const uint kernel_features) override
133  {
134  foreach (SubDevice &sub, devices)
135  if (!sub.device->load_kernels(kernel_features))
136  return false;
137 
138  return true;
139  }
140 
141  void build_bvh(BVH *bvh, Progress &progress, bool refit) override
142  {
143  /* Try to build and share a single acceleration structure, if possible */
145  devices.back().device->build_bvh(bvh, progress, refit);
146  return;
147  }
148 
149  assert(bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX ||
153 
154  BVHMulti *const bvh_multi = static_cast<BVHMulti *>(bvh);
155  bvh_multi->sub_bvhs.resize(devices.size());
156 
157  vector<BVHMulti *> geom_bvhs;
158  geom_bvhs.reserve(bvh->geometry.size());
159  foreach (Geometry *geom, bvh->geometry) {
160  geom_bvhs.push_back(static_cast<BVHMulti *>(geom->bvh));
161  }
162 
163  /* Broadcast acceleration structure build to all render devices */
164  size_t i = 0;
165  foreach (SubDevice &sub, devices) {
166  /* Change geometry BVH pointers to the sub BVH */
167  for (size_t k = 0; k < bvh->geometry.size(); ++k) {
168  bvh->geometry[k]->bvh = geom_bvhs[k]->sub_bvhs[i];
169  }
170 
171  if (!bvh_multi->sub_bvhs[i]) {
172  BVHParams params = bvh->params;
174  params.bvh_layout = BVH_LAYOUT_OPTIX;
175  else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_METAL)
176  params.bvh_layout = BVH_LAYOUT_METAL;
178  params.bvh_layout = sub.device->info.type == DEVICE_OPTIX ? BVH_LAYOUT_OPTIX :
181  params.bvh_layout = sub.device->info.type == DEVICE_METAL ? BVH_LAYOUT_METAL :
183 
184  /* Skip building a bottom level acceleration structure for non-instanced geometry on Embree
185  * (since they are put into the top level directly, see bvh_embree.cpp) */
186  if (!params.top_level && params.bvh_layout == BVH_LAYOUT_EMBREE &&
187  !bvh->geometry[0]->is_instanced()) {
188  i++;
189  continue;
190  }
191 
192  bvh_multi->sub_bvhs[i] = BVH::create(params, bvh->geometry, bvh->objects, sub.device);
193  }
194 
195  sub.device->build_bvh(bvh_multi->sub_bvhs[i], progress, refit);
196  i++;
197  }
198 
199  /* Change geometry BVH pointers back to the multi BVH. */
200  for (size_t k = 0; k < bvh->geometry.size(); ++k) {
201  bvh->geometry[k]->bvh = geom_bvhs[k];
202  }
203  }
204 
205  virtual void *get_cpu_osl_memory() override
206  {
207  if (devices.size() > 1) {
208  return NULL;
209  }
210  return devices.front().device->get_cpu_osl_memory();
211  }
212 
213  bool is_resident(device_ptr key, Device *sub_device) override
214  {
215  foreach (SubDevice &sub, devices) {
216  if (sub.device == sub_device) {
217  return find_matching_mem_device(key, sub)->device == sub_device;
218  }
219  }
220  return false;
221  }
222 
224  {
225  assert(key != 0 && (sub.peer_island_index >= 0 || sub.ptr_map.find(key) != sub.ptr_map.end()));
226 
227  /* Get the memory owner of this key (first try current device, then peer devices) */
228  SubDevice *owner_sub = &sub;
229  if (owner_sub->ptr_map.find(key) == owner_sub->ptr_map.end()) {
230  foreach (SubDevice *island_sub, peer_islands[sub.peer_island_index]) {
231  if (island_sub != owner_sub &&
232  island_sub->ptr_map.find(key) != island_sub->ptr_map.end()) {
233  owner_sub = island_sub;
234  }
235  }
236  }
237  return owner_sub;
238  }
239 
241  {
242  assert(!island.empty());
243 
244  /* Get the memory owner of this key or the device with the lowest memory usage when new */
245  SubDevice *owner_sub = island.front();
246  foreach (SubDevice *island_sub, island) {
247  if (key ? (island_sub->ptr_map.find(key) != island_sub->ptr_map.end()) :
248  (island_sub->device->stats.mem_used < owner_sub->device->stats.mem_used)) {
249  owner_sub = island_sub;
250  }
251  }
252  return owner_sub;
253  }
254 
256  {
257  return find_matching_mem_device(key, sub)->ptr_map[key];
258  }
259 
260  void mem_alloc(device_memory &mem) override
261  {
262  device_ptr key = unique_key++;
263 
264  assert(mem.type == MEM_READ_ONLY || mem.type == MEM_READ_WRITE || mem.type == MEM_DEVICE_ONLY);
265  /* The remaining memory types can be distributed across devices */
266  foreach (const vector<SubDevice *> &island, peer_islands) {
267  SubDevice *owner_sub = find_suitable_mem_device(key, island);
268  mem.device = owner_sub->device;
269  mem.device_pointer = 0;
270  mem.device_size = 0;
271 
272  owner_sub->device->mem_alloc(mem);
273  owner_sub->ptr_map[key] = mem.device_pointer;
274  }
275 
276  mem.device = this;
277  mem.device_pointer = key;
279  }
280 
281  void mem_copy_to(device_memory &mem) override
282  {
283  device_ptr existing_key = mem.device_pointer;
284  device_ptr key = (existing_key) ? existing_key : unique_key++;
285  size_t existing_size = mem.device_size;
286 
287  /* The tile buffers are allocated on each device (see below), so copy to all of them */
288  foreach (const vector<SubDevice *> &island, peer_islands) {
289  SubDevice *owner_sub = find_suitable_mem_device(existing_key, island);
290  mem.device = owner_sub->device;
291  mem.device_pointer = (existing_key) ? owner_sub->ptr_map[existing_key] : 0;
292  mem.device_size = existing_size;
293 
294  owner_sub->device->mem_copy_to(mem);
295  owner_sub->ptr_map[key] = mem.device_pointer;
296 
297  if (mem.type == MEM_GLOBAL || mem.type == MEM_TEXTURE) {
298  /* Need to create texture objects and update pointer in kernel globals on all devices */
299  foreach (SubDevice *island_sub, island) {
300  if (island_sub != owner_sub) {
301  island_sub->device->mem_copy_to(mem);
302  }
303  }
304  }
305  }
306 
307  mem.device = this;
308  mem.device_pointer = key;
309  stats.mem_alloc(mem.device_size - existing_size);
310  }
311 
312  void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override
313  {
314  device_ptr key = mem.device_pointer;
315  size_t i = 0, sub_h = h / devices.size();
316 
317  foreach (SubDevice &sub, devices) {
318  size_t sy = y + i * sub_h;
319  size_t sh = (i == (size_t)devices.size() - 1) ? h - sub_h * i : sub_h;
320 
321  SubDevice *owner_sub = find_matching_mem_device(key, sub);
322  mem.device = owner_sub->device;
323  mem.device_pointer = owner_sub->ptr_map[key];
324 
325  owner_sub->device->mem_copy_from(mem, sy, w, sh, elem);
326  i++;
327  }
328 
329  mem.device = this;
330  mem.device_pointer = key;
331  }
332 
333  void mem_zero(device_memory &mem) override
334  {
335  device_ptr existing_key = mem.device_pointer;
336  device_ptr key = (existing_key) ? existing_key : unique_key++;
337  size_t existing_size = mem.device_size;
338 
339  foreach (const vector<SubDevice *> &island, peer_islands) {
340  SubDevice *owner_sub = find_suitable_mem_device(existing_key, island);
341  mem.device = owner_sub->device;
342  mem.device_pointer = (existing_key) ? owner_sub->ptr_map[existing_key] : 0;
343  mem.device_size = existing_size;
344 
345  owner_sub->device->mem_zero(mem);
346  owner_sub->ptr_map[key] = mem.device_pointer;
347  }
348 
349  mem.device = this;
350  mem.device_pointer = key;
351  stats.mem_alloc(mem.device_size - existing_size);
352  }
353 
354  void mem_free(device_memory &mem) override
355  {
356  device_ptr key = mem.device_pointer;
357  size_t existing_size = mem.device_size;
358 
359  /* Free memory that was allocated for all devices (see above) on each device */
360  foreach (const vector<SubDevice *> &island, peer_islands) {
361  SubDevice *owner_sub = find_matching_mem_device(key, *island.front());
362  mem.device = owner_sub->device;
363  mem.device_pointer = owner_sub->ptr_map[key];
364  mem.device_size = existing_size;
365 
366  owner_sub->device->mem_free(mem);
367  owner_sub->ptr_map.erase(owner_sub->ptr_map.find(key));
368 
369  if (mem.type == MEM_TEXTURE) {
370  /* Free texture objects on all devices */
371  foreach (SubDevice *island_sub, island) {
372  if (island_sub != owner_sub) {
373  island_sub->device->mem_free(mem);
374  }
375  }
376  }
377  }
378 
379  mem.device = this;
380  mem.device_pointer = 0;
381  mem.device_size = 0;
382  stats.mem_free(existing_size);
383  }
384 
385  void const_copy_to(const char *name, void *host, size_t size) override
386  {
387  foreach (SubDevice &sub, devices)
388  sub.device->const_copy_to(name, host, size);
389  }
390 
391  int device_number(Device *sub_device) override
392  {
393  int i = 0;
394 
395  foreach (SubDevice &sub, devices) {
396  if (sub.device == sub_device)
397  return i;
398  i++;
399  }
400 
401  return -1;
402  }
403 
404  virtual void foreach_device(const function<void(Device *)> &callback) override
405  {
406  foreach (SubDevice &sub, devices) {
408  }
409  }
410 };
411 
412 Device *device_multi_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
413 {
414  return new MultiDevice(info, stats, profiler);
415 }
416 
unsigned int uint
Definition: BLI_sys_types.h:67
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint y
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition: btDbvt.cpp:52
void refit(btStridingMeshInterface *triangles, const btVector3 &aabbMin, const btVector3 &aabbMax)
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Definition: btQuadWord.h:119
Definition: multi.h:12
vector< BVH * > sub_bvhs
Definition: multi.h:14
BVHLayout bvh_layout
Definition: params.h:80
Definition: bvh/bvh.h:63
vector< Geometry * > geometry
Definition: bvh/bvh.h:66
static BVH * create(const BVHParams &params, const vector< Geometry * > &geometry, const vector< Object * > &objects, Device *device)
Definition: bvh.cpp:77
BVHParams params
Definition: bvh/bvh.h:65
vector< Object * > objects
Definition: bvh/bvh.h:67
vector< DeviceInfo > multi_devices
Definition: device/device.h:75
bool has_peer_memory
Definition: device/device.h:70
DeviceType type
Definition: device/device.h:62
virtual BVHLayoutMask get_bvh_layout_mask() const =0
virtual void const_copy_to(const char *name, void *host, size_t size)=0
virtual void mem_zero(device_memory &mem)=0
string error_msg
static Device * create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
virtual void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem)=0
Profiler & profiler
Stats & stats
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit)
virtual bool load_kernels(uint)
virtual const string & error_message()
virtual bool check_peer_access(Device *)
virtual void mem_free(device_memory &mem)=0
virtual void foreach_device(const function< void(Device *)> &callback)
virtual void mem_copy_to(device_memory &mem)=0
DeviceInfo info
virtual void mem_alloc(device_memory &mem)=0
list< SubDevice > devices
bool is_resident(device_ptr key, Device *sub_device) override
const string & error_message() override
int device_number(Device *sub_device) override
virtual BVHLayoutMask get_bvh_layout_mask() const override
void mem_copy_to(device_memory &mem) override
void mem_free(device_memory &mem) override
void const_copy_to(const char *name, void *host, size_t size) override
SubDevice * find_matching_mem_device(device_ptr key, SubDevice &sub)
vector< vector< SubDevice * > > peer_islands
void mem_zero(device_memory &mem) override
MultiDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
void build_bvh(BVH *bvh, Progress &progress, bool refit) override
SubDevice * find_suitable_mem_device(device_ptr key, const vector< SubDevice * > &island)
void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override
void mem_alloc(device_memory &mem) override
bool load_kernels(const uint kernel_features) override
virtual void foreach_device(const function< void(Device *)> &callback) override
virtual void * get_cpu_osl_memory() override
device_ptr unique_key
device_ptr find_matching_mem(device_ptr key, SubDevice &sub)
size_t mem_used
Definition: util/stats.h:35
void mem_free(size_t size)
Definition: util/stats.h:29
void mem_alloc(size_t size)
Definition: util/stats.h:23
device_ptr device_pointer
#define CCL_NAMESPACE_END
Definition: cuda/compat.h:9
@ MEM_GLOBAL
@ MEM_TEXTURE
@ MEM_READ_WRITE
@ MEM_DEVICE_ONLY
@ MEM_READ_ONLY
DEGForeachIDComponentCallback callback
@ DEVICE_METAL
Definition: device/device.h:43
@ DEVICE_CPU
Definition: device/device.h:38
@ DEVICE_OPTIX
Definition: device/device.h:41
Device * device_multi_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
uiWidgetBaseParameters params[MAX_WIDGET_BASE_BATCH]
ccl_gpu_kernel_postfix ccl_global float int int sy
ccl_gpu_kernel_postfix ccl_global float int int int int sh
@ BVH_LAYOUT_OPTIX
@ BVH_LAYOUT_NONE
@ BVH_LAYOUT_METAL
@ BVH_LAYOUT_EMBREE
@ BVH_LAYOUT_MULTI_OPTIX
@ BVH_LAYOUT_BVH2
@ BVH_LAYOUT_MULTI_METAL
@ BVH_LAYOUT_MULTI_METAL_EMBREE
@ BVH_LAYOUT_ALL
@ BVH_LAYOUT_MULTI_OPTIX_EMBREE
int BVHLayoutMask
Definition: params.h:47
map< device_ptr, device_ptr > ptr_map
uint64_t device_ptr
Definition: util/types.h:43