Blender  V3.3
hip/device_impl.cpp
Go to the documentation of this file.
1 /* SPDX-License-Identifier: Apache-2.0
2  * Copyright 2011-2022 Blender Foundation */
3 
4 #ifdef WITH_HIP
5 
6 # include <climits>
7 # include <limits.h>
8 # include <stdio.h>
9 # include <stdlib.h>
10 # include <string.h>
11 
12 # include "device/hip/device_impl.h"
13 
14 # include "util/debug.h"
15 # include "util/foreach.h"
16 # include "util/log.h"
17 # include "util/map.h"
18 # include "util/md5.h"
19 # include "util/opengl.h"
20 # include "util/path.h"
21 # include "util/string.h"
22 # include "util/system.h"
23 # include "util/time.h"
24 # include "util/types.h"
25 # include "util/windows.h"
26 
28 
30 
31 class HIPDevice;
32 
33 bool HIPDevice::have_precompiled_kernels()
34 {
35  string fatbins_path = path_get("lib");
36  return path_exists(fatbins_path);
37 }
38 
39 BVHLayoutMask HIPDevice::get_bvh_layout_mask() const
40 {
41  return BVH_LAYOUT_BVH2;
42 }
43 
44 void HIPDevice::set_error(const string &error)
45 {
47 
48  if (first_error) {
49  fprintf(stderr, "\nRefer to the Cycles GPU rendering documentation for possible solutions:\n");
50  fprintf(stderr,
51  "https://docs.blender.org/manual/en/latest/render/cycles/gpu_rendering.html\n\n");
52  first_error = false;
53  }
54 }
55 
56 HIPDevice::HIPDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
57  : Device(info, stats, profiler), texture_info(this, "texture_info", MEM_GLOBAL)
58 {
59  first_error = true;
60 
61  hipDevId = info.num;
62  hipDevice = 0;
63  hipContext = 0;
64 
65  hipModule = 0;
66 
67  need_texture_info = false;
68 
69  device_texture_headroom = 0;
70  device_working_headroom = 0;
71  move_texture_to_host = false;
72  map_host_limit = 0;
73  map_host_used = 0;
74  can_map_host = 0;
75  pitch_alignment = 0;
76 
77  /* Initialize HIP. */
78  hipError_t result = hipInit(0);
79  if (result != hipSuccess) {
80  set_error(string_printf("Failed to initialize HIP runtime (%s)", hipewErrorString(result)));
81  return;
82  }
83 
84  /* Setup device and context. */
85  result = hipDeviceGet(&hipDevice, hipDevId);
86  if (result != hipSuccess) {
87  set_error(string_printf("Failed to get HIP device handle from ordinal (%s)",
88  hipewErrorString(result)));
89  return;
90  }
91 
92  /* hipDeviceMapHost for mapping host memory when out of device memory.
93  * hipDeviceLmemResizeToMax for reserving local memory ahead of render,
94  * so we can predict which memory to map to host. */
95  hip_assert(hipDeviceGetAttribute(&can_map_host, hipDeviceAttributeCanMapHostMemory, hipDevice));
96 
97  hip_assert(
98  hipDeviceGetAttribute(&pitch_alignment, hipDeviceAttributeTexturePitchAlignment, hipDevice));
99 
100  unsigned int ctx_flags = hipDeviceLmemResizeToMax;
101  if (can_map_host) {
102  ctx_flags |= hipDeviceMapHost;
103  init_host_memory();
104  }
105 
106  /* Create context. */
107  result = hipCtxCreate(&hipContext, ctx_flags, hipDevice);
108 
109  if (result != hipSuccess) {
110  set_error(string_printf("Failed to create HIP context (%s)", hipewErrorString(result)));
111  return;
112  }
113 
114  int major, minor;
115  hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
116  hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
117  hipDevArchitecture = major * 100 + minor * 10;
118 
119  /* Get hip runtime Version needed for memory types. */
120  hip_assert(hipRuntimeGetVersion(&hipRuntimeVersion));
121 
122  /* Pop context set by hipCtxCreate. */
123  hipCtxPopCurrent(NULL);
124 }
125 
126 HIPDevice::~HIPDevice()
127 {
128  texture_info.free();
129 
130  hip_assert(hipCtxDestroy(hipContext));
131 }
132 
133 bool HIPDevice::support_device(const uint /*kernel_features*/)
134 {
135  if (hipSupportsDevice(hipDevId)) {
136  return true;
137  }
138  else {
139  /* We only support Navi and above. */
140  hipDeviceProp_t props;
141  hipGetDeviceProperties(&props, hipDevId);
142 
143  set_error(string_printf("HIP backend requires AMD RDNA graphics card or up, but found %s.",
144  props.name));
145  return false;
146  }
147 }
148 
149 bool HIPDevice::check_peer_access(Device *peer_device)
150 {
151  if (peer_device == this) {
152  return false;
153  }
154  if (peer_device->info.type != DEVICE_HIP && peer_device->info.type != DEVICE_OPTIX) {
155  return false;
156  }
157 
158  HIPDevice *const peer_device_hip = static_cast<HIPDevice *>(peer_device);
159 
160  int can_access = 0;
161  hip_assert(hipDeviceCanAccessPeer(&can_access, hipDevice, peer_device_hip->hipDevice));
162  if (can_access == 0) {
163  return false;
164  }
165 
166  // Ensure array access over the link is possible as well (for 3D textures)
167  hip_assert(hipDeviceGetP2PAttribute(
168  &can_access, hipDevP2PAttrHipArrayAccessSupported, hipDevice, peer_device_hip->hipDevice));
169  if (can_access == 0) {
170  return false;
171  }
172 
173  // Enable peer access in both directions
174  {
175  const HIPContextScope scope(this);
176  hipError_t result = hipCtxEnablePeerAccess(peer_device_hip->hipContext, 0);
177  if (result != hipSuccess) {
178  set_error(string_printf("Failed to enable peer access on HIP context (%s)",
179  hipewErrorString(result)));
180  return false;
181  }
182  }
183  {
184  const HIPContextScope scope(peer_device_hip);
185  hipError_t result = hipCtxEnablePeerAccess(hipContext, 0);
186  if (result != hipSuccess) {
187  set_error(string_printf("Failed to enable peer access on HIP context (%s)",
188  hipewErrorString(result)));
189  return false;
190  }
191  }
192 
193  return true;
194 }
195 
196 bool HIPDevice::use_adaptive_compilation()
197 {
199 }
200 
201 /* Common HIPCC flags which stays the same regardless of shading model,
202  * kernel sources md5 and only depends on compiler or compilation settings.
203  */
204 string HIPDevice::compile_kernel_get_common_cflags(const uint kernel_features)
205 {
206  const int machine = system_cpu_bits();
207  const string source_path = path_get("source");
208  const string include_path = source_path;
209  string cflags = string_printf(
210  "-m%d "
211  "--use_fast_math "
212  "-DHIPCC "
213  "-I\"%s\"",
214  machine,
215  include_path.c_str());
216  if (use_adaptive_compilation()) {
217  cflags += " -D__KERNEL_FEATURES__=" + to_string(kernel_features);
218  }
219  return cflags;
220 }
221 
222 string HIPDevice::compile_kernel(const uint kernel_features, const char *name, const char *base)
223 {
224  /* Compute kernel name. */
225  int major, minor;
226  hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
227  hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
228  hipDeviceProp_t props;
229  hipGetDeviceProperties(&props, hipDevId);
230 
231  /* gcnArchName can contain tokens after the arch name with features, ie.
232  * `gfx1010:sramecc-:xnack-` so we tokenize it to get the first part. */
233  char *arch = strtok(props.gcnArchName, ":");
234  if (arch == NULL) {
235  arch = props.gcnArchName;
236  }
237 
238  /* Attempt to use kernel provided with Blender. */
239  if (!use_adaptive_compilation()) {
240  const string fatbin = path_get(string_printf("lib/%s_%s.fatbin", name, arch));
241  VLOG_INFO << "Testing for pre-compiled kernel " << fatbin << ".";
242  if (path_exists(fatbin)) {
243  VLOG_INFO << "Using precompiled kernel.";
244  return fatbin;
245  }
246  }
247 
248  /* Try to use locally compiled kernel. */
249  string source_path = path_get("source");
250  const string source_md5 = path_files_md5_hash(source_path);
251 
252  /* We include cflags into md5 so changing hip toolkit or changing other
253  * compiler command line arguments makes sure fatbin gets re-built.
254  */
255  string common_cflags = compile_kernel_get_common_cflags(kernel_features);
256  const string kernel_md5 = util_md5_string(source_md5 + common_cflags);
257 
258  const char *const kernel_ext = "genco";
259  std::string options;
260 # ifdef _WIN32
261  options.append("Wno-parentheses-equality -Wno-unused-value --hipcc-func-supp -ffast-math");
262 # else
263  options.append("Wno-parentheses-equality -Wno-unused-value --hipcc-func-supp -O3 -ffast-math");
264 # endif
265 # ifdef _DEBUG
266  options.append(" -save-temps");
267 # endif
268  options.append(" --amdgpu-target=").append(arch);
269 
270  const string include_path = source_path;
271  const string fatbin_file = string_printf("cycles_%s_%s_%s", name, arch, kernel_md5.c_str());
272  const string fatbin = path_cache_get(path_join("kernels", fatbin_file));
273  VLOG_INFO << "Testing for locally compiled kernel " << fatbin << ".";
274  if (path_exists(fatbin)) {
275  VLOG_INFO << "Using locally compiled kernel.";
276  return fatbin;
277  }
278 
279 # ifdef _WIN32
280  if (!use_adaptive_compilation() && have_precompiled_kernels()) {
281  if (!hipSupportsDevice(hipDevId)) {
282  set_error(
283  string_printf("HIP backend requires compute capability 10.1 or up, but found %d.%d. "
284  "Your GPU is not supported.",
285  major,
286  minor));
287  }
288  else {
289  set_error(
290  string_printf("HIP binary kernel for this graphics card compute "
291  "capability (%d.%d) not found.",
292  major,
293  minor));
294  }
295  return string();
296  }
297 # endif
298 
299  /* Compile. */
300  const char *const hipcc = hipewCompilerPath();
301  if (hipcc == NULL) {
302  set_error(
303  "HIP hipcc compiler not found. "
304  "Install HIP toolkit in default location.");
305  return string();
306  }
307 
308  const int hipcc_hip_version = hipewCompilerVersion();
309  VLOG_INFO << "Found hipcc " << hipcc << ", HIP version " << hipcc_hip_version << ".";
310  if (hipcc_hip_version < 40) {
311  printf(
312  "Unsupported HIP version %d.%d detected, "
313  "you need HIP 4.0 or newer.\n",
314  hipcc_hip_version / 10,
315  hipcc_hip_version % 10);
316  return string();
317  }
318 
319  double starttime = time_dt();
320 
321  path_create_directories(fatbin);
322 
323  source_path = path_join(path_join(source_path, "kernel"),
324  path_join("device", path_join(base, string_printf("%s.cpp", name))));
325 
326  string command = string_printf("%s -%s -I %s --%s %s -o \"%s\"",
327  hipcc,
328  options.c_str(),
329  include_path.c_str(),
330  kernel_ext,
331  source_path.c_str(),
332  fatbin.c_str());
333 
334  printf("Compiling %sHIP kernel ...\n%s\n",
335  (use_adaptive_compilation()) ? "adaptive " : "",
336  command.c_str());
337 
338 # ifdef _WIN32
339  command = "call " + command;
340 # endif
341  if (system(command.c_str()) != 0) {
342  set_error(
343  "Failed to execute compilation command, "
344  "see console for details.");
345  return string();
346  }
347 
348  /* Verify if compilation succeeded */
349  if (!path_exists(fatbin)) {
350  set_error(
351  "HIP kernel compilation failed, "
352  "see console for details.");
353  return string();
354  }
355 
356  printf("Kernel compilation finished in %.2lfs.\n", time_dt() - starttime);
357 
358  return fatbin;
359 }
360 
361 bool HIPDevice::load_kernels(const uint kernel_features)
362 {
363  /* TODO(sergey): Support kernels re-load for HIP devices adaptive compile.
364  *
365  * Currently re-loading kernels will invalidate memory pointers.
366  */
367  if (hipModule) {
368  if (use_adaptive_compilation()) {
369  VLOG_INFO << "Skipping HIP kernel reload for adaptive compilation, not currently supported.";
370  }
371  return true;
372  }
373 
374  /* check if hip init succeeded */
375  if (hipContext == 0)
376  return false;
377 
378  /* check if GPU is supported */
379  if (!support_device(kernel_features)) {
380  return false;
381  }
382 
383  /* get kernel */
384  const char *kernel_name = "kernel";
385  string fatbin = compile_kernel(kernel_features, kernel_name);
386  if (fatbin.empty())
387  return false;
388 
389  /* open module */
390  HIPContextScope scope(this);
391 
392  string fatbin_data;
393  hipError_t result;
394 
395  if (path_read_text(fatbin, fatbin_data))
396  result = hipModuleLoadData(&hipModule, fatbin_data.c_str());
397  else
398  result = hipErrorFileNotFound;
399 
400  if (result != hipSuccess)
401  set_error(string_printf(
402  "Failed to load HIP kernel from '%s' (%s)", fatbin.c_str(), hipewErrorString(result)));
403 
404  if (result == hipSuccess) {
405  kernels.load(this);
406  reserve_local_memory(kernel_features);
407  }
408 
409  return (result == hipSuccess);
410 }
411 
412 void HIPDevice::reserve_local_memory(const uint kernel_features)
413 {
414  /* Together with hipDeviceLmemResizeToMax, this reserves local memory
415  * needed for kernel launches, so that we can reliably figure out when
416  * to allocate scene data in mapped host memory. */
417  size_t total = 0, free_before = 0, free_after = 0;
418 
419  {
420  HIPContextScope scope(this);
421  hipMemGetInfo(&free_before, &total);
422  }
423 
424  {
425  /* Use the biggest kernel for estimation. */
426  const DeviceKernel test_kernel = (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) ?
428  (kernel_features & KERNEL_FEATURE_MNEE) ?
431 
432  /* Launch kernel, using just 1 block appears sufficient to reserve memory for all
433  * multiprocessors. It would be good to do this in parallel for the multi GPU case
434  * still to make it faster. */
435  HIPDeviceQueue queue(this);
436 
437  device_ptr d_path_index = 0;
438  device_ptr d_render_buffer = 0;
439  int d_work_size = 0;
440  DeviceKernelArguments args(&d_path_index, &d_render_buffer, &d_work_size);
441 
442  queue.init_execution();
443  queue.enqueue(test_kernel, 1, args);
444  queue.synchronize();
445  }
446 
447  {
448  HIPContextScope scope(this);
449  hipMemGetInfo(&free_after, &total);
450  }
451 
452  VLOG_INFO << "Local memory reserved " << string_human_readable_number(free_before - free_after)
453  << " bytes. (" << string_human_readable_size(free_before - free_after) << ")";
454 
455 # if 0
456  /* For testing mapped host memory, fill up device memory. */
457  const size_t keep_mb = 1024;
458 
459  while (free_after > keep_mb * 1024 * 1024LL) {
460  hipDeviceptr_t tmp;
461  hip_assert(hipMalloc(&tmp, 10 * 1024 * 1024LL));
462  hipMemGetInfo(&free_after, &total);
463  }
464 # endif
465 }
466 
467 void HIPDevice::init_host_memory()
468 {
469  /* Limit amount of host mapped memory, because allocating too much can
470  * cause system instability. Leave at least half or 4 GB of system
471  * memory free, whichever is smaller. */
472  size_t default_limit = 4 * 1024 * 1024 * 1024LL;
473  size_t system_ram = system_physical_ram();
474 
475  if (system_ram > 0) {
476  if (system_ram / 2 > default_limit) {
477  map_host_limit = system_ram - default_limit;
478  }
479  else {
480  map_host_limit = system_ram / 2;
481  }
482  }
483  else {
484  VLOG_WARNING << "Mapped host memory disabled, failed to get system RAM";
485  map_host_limit = 0;
486  }
487 
488  /* Amount of device memory to keep is free after texture memory
489  * and working memory allocations respectively. We set the working
490  * memory limit headroom lower so that some space is left after all
491  * texture memory allocations. */
492  device_working_headroom = 32 * 1024 * 1024LL; // 32MB
493  device_texture_headroom = 128 * 1024 * 1024LL; // 128MB
494 
495  VLOG_INFO << "Mapped host memory limit set to " << string_human_readable_number(map_host_limit)
496  << " bytes. (" << string_human_readable_size(map_host_limit) << ")";
497 }
498 
499 void HIPDevice::load_texture_info()
500 {
501  if (need_texture_info) {
502  /* Unset flag before copying, so this does not loop indefinitely if the copy below calls
503  * into 'move_textures_to_host' (which calls 'load_texture_info' again). */
504  need_texture_info = false;
505  texture_info.copy_to_device();
506  }
507 }
508 
509 void HIPDevice::move_textures_to_host(size_t size, bool for_texture)
510 {
511  /* Break out of recursive call, which can happen when moving memory on a multi device. */
512  static bool any_device_moving_textures_to_host = false;
513  if (any_device_moving_textures_to_host) {
514  return;
515  }
516 
517  /* Signal to reallocate textures in host memory only. */
518  move_texture_to_host = true;
519 
520  while (size > 0) {
521  /* Find suitable memory allocation to move. */
522  device_memory *max_mem = NULL;
523  size_t max_size = 0;
524  bool max_is_image = false;
525 
526  thread_scoped_lock lock(hip_mem_map_mutex);
527  foreach (HIPMemMap::value_type &pair, hip_mem_map) {
528  device_memory &mem = *pair.first;
529  HIPMem *cmem = &pair.second;
530 
531  /* Can only move textures allocated on this device (and not those from peer devices).
532  * And need to ignore memory that is already on the host. */
533  if (!mem.is_resident(this) || cmem->use_mapped_host) {
534  continue;
535  }
536 
537  bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) &&
538  (&mem != &texture_info);
539  bool is_image = is_texture && (mem.data_height > 1);
540 
541  /* Can't move this type of memory. */
542  if (!is_texture || cmem->array) {
543  continue;
544  }
545 
546  /* For other textures, only move image textures. */
547  if (for_texture && !is_image) {
548  continue;
549  }
550 
551  /* Try to move largest allocation, prefer moving images. */
552  if (is_image > max_is_image || (is_image == max_is_image && mem.device_size > max_size)) {
553  max_is_image = is_image;
554  max_size = mem.device_size;
555  max_mem = &mem;
556  }
557  }
558  lock.unlock();
559 
560  /* Move to host memory. This part is mutex protected since
561  * multiple HIP devices could be moving the memory. The
562  * first one will do it, and the rest will adopt the pointer. */
563  if (max_mem) {
564  VLOG_WORK << "Move memory from device to host: " << max_mem->name;
565 
566  static thread_mutex move_mutex;
567  thread_scoped_lock lock(move_mutex);
568 
569  any_device_moving_textures_to_host = true;
570 
571  /* Potentially need to call back into multi device, so pointer mapping
572  * and peer devices are updated. This is also necessary since the device
573  * pointer may just be a key here, so cannot be accessed and freed directly.
574  * Unfortunately it does mean that memory is reallocated on all other
575  * devices as well, which is potentially dangerous when still in use (since
576  * a thread rendering on another devices would only be caught in this mutex
577  * if it so happens to do an allocation at the same time as well. */
578  max_mem->device_copy_to();
579  size = (max_size >= size) ? 0 : size - max_size;
580 
581  any_device_moving_textures_to_host = false;
582  }
583  else {
584  break;
585  }
586  }
587 
588  /* Unset flag before texture info is reloaded, since it should stay in device memory. */
589  move_texture_to_host = false;
590 
591  /* Update texture info array with new pointers. */
592  load_texture_info();
593 }
594 
595 HIPDevice::HIPMem *HIPDevice::generic_alloc(device_memory &mem, size_t pitch_padding)
596 {
597  HIPContextScope scope(this);
598 
599  hipDeviceptr_t device_pointer = 0;
600  size_t size = mem.memory_size() + pitch_padding;
601 
602  hipError_t mem_alloc_result = hipErrorOutOfMemory;
603  const char *status = "";
604 
605  /* First try allocating in device memory, respecting headroom. We make
606  * an exception for texture info. It is small and frequently accessed,
607  * so treat it as working memory.
608  *
609  * If there is not enough room for working memory, we will try to move
610  * textures to host memory, assuming the performance impact would have
611  * been worse for working memory. */
612  bool is_texture = (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) && (&mem != &texture_info);
613  bool is_image = is_texture && (mem.data_height > 1);
614 
615  size_t headroom = (is_texture) ? device_texture_headroom : device_working_headroom;
616 
617  size_t total = 0, free = 0;
618  hipMemGetInfo(&free, &total);
619 
620  /* Move textures to host memory if needed. */
621  if (!move_texture_to_host && !is_image && (size + headroom) >= free && can_map_host) {
622  move_textures_to_host(size + headroom - free, is_texture);
623  hipMemGetInfo(&free, &total);
624  }
625 
626  /* Allocate in device memory. */
627  if (!move_texture_to_host && (size + headroom) < free) {
628  mem_alloc_result = hipMalloc(&device_pointer, size);
629  if (mem_alloc_result == hipSuccess) {
630  status = " in device memory";
631  }
632  }
633 
634  /* Fall back to mapped host memory if needed and possible. */
635 
636  void *shared_pointer = 0;
637 
638  if (mem_alloc_result != hipSuccess && can_map_host) {
639  if (mem.shared_pointer) {
640  /* Another device already allocated host memory. */
641  mem_alloc_result = hipSuccess;
642  shared_pointer = mem.shared_pointer;
643  }
644  else if (map_host_used + size < map_host_limit) {
645  /* Allocate host memory ourselves. */
646  mem_alloc_result = hipHostMalloc(
647  &shared_pointer, size, hipHostMallocMapped | hipHostMallocWriteCombined);
648 
649  assert((mem_alloc_result == hipSuccess && shared_pointer != 0) ||
650  (mem_alloc_result != hipSuccess && shared_pointer == 0));
651  }
652 
653  if (mem_alloc_result == hipSuccess) {
654  hip_assert(hipHostGetDevicePointer(&device_pointer, shared_pointer, 0));
655  map_host_used += size;
656  status = " in host memory";
657  }
658  }
659 
660  if (mem_alloc_result != hipSuccess) {
661  status = " failed, out of device and host memory";
662  set_error("System is out of GPU and shared host memory");
663  }
664 
665  if (mem.name) {
666  VLOG_WORK << "Buffer allocate: " << mem.name << ", "
667  << string_human_readable_number(mem.memory_size()) << " bytes. ("
668  << string_human_readable_size(mem.memory_size()) << ")" << status;
669  }
670 
671  mem.device_pointer = (device_ptr)device_pointer;
672  mem.device_size = size;
673  stats.mem_alloc(size);
674 
675  if (!mem.device_pointer) {
676  return NULL;
677  }
678 
679  /* Insert into map of allocations. */
680  thread_scoped_lock lock(hip_mem_map_mutex);
681  HIPMem *cmem = &hip_mem_map[&mem];
682  if (shared_pointer != 0) {
683  /* Replace host pointer with our host allocation. Only works if
684  * HIP memory layout is the same and has no pitch padding. Also
685  * does not work if we move textures to host during a render,
686  * since other devices might be using the memory. */
687 
688  if (!move_texture_to_host && pitch_padding == 0 && mem.host_pointer &&
689  mem.host_pointer != shared_pointer) {
690  memcpy(shared_pointer, mem.host_pointer, size);
691 
692  /* A Call to device_memory::host_free() should be preceded by
693  * a call to device_memory::device_free() for host memory
694  * allocated by a device to be handled properly. Two exceptions
695  * are here and a call in OptiXDevice::generic_alloc(), where
696  * the current host memory can be assumed to be allocated by
697  * device_memory::host_alloc(), not by a device */
698 
699  mem.host_free();
700  mem.host_pointer = shared_pointer;
701  }
702  mem.shared_pointer = shared_pointer;
703  mem.shared_counter++;
704  cmem->use_mapped_host = true;
705  }
706  else {
707  cmem->use_mapped_host = false;
708  }
709 
710  return cmem;
711 }
712 
713 void HIPDevice::generic_copy_to(device_memory &mem)
714 {
715  if (!mem.host_pointer || !mem.device_pointer) {
716  return;
717  }
718 
719  /* If use_mapped_host of mem is false, the current device only uses device memory allocated by
720  * hipMalloc regardless of mem.host_pointer and mem.shared_pointer, and should copy data from
721  * mem.host_pointer. */
722  thread_scoped_lock lock(hip_mem_map_mutex);
723  if (!hip_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
724  const HIPContextScope scope(this);
725  hip_assert(
726  hipMemcpyHtoD((hipDeviceptr_t)mem.device_pointer, mem.host_pointer, mem.memory_size()));
727  }
728 }
729 
730 void HIPDevice::generic_free(device_memory &mem)
731 {
732  if (mem.device_pointer) {
733  HIPContextScope scope(this);
734  thread_scoped_lock lock(hip_mem_map_mutex);
735  DCHECK(hip_mem_map.find(&mem) != hip_mem_map.end());
736  const HIPMem &cmem = hip_mem_map[&mem];
737 
738  /* If cmem.use_mapped_host is true, reference counting is used
739  * to safely free a mapped host memory. */
740 
741  if (cmem.use_mapped_host) {
742  assert(mem.shared_pointer);
743  if (mem.shared_pointer) {
744  assert(mem.shared_counter > 0);
745  if (--mem.shared_counter == 0) {
746  if (mem.host_pointer == mem.shared_pointer) {
747  mem.host_pointer = 0;
748  }
749  hipHostFree(mem.shared_pointer);
750  mem.shared_pointer = 0;
751  }
752  }
753  map_host_used -= mem.device_size;
754  }
755  else {
756  /* Free device memory. */
757  hip_assert(hipFree(mem.device_pointer));
758  }
759 
760  stats.mem_free(mem.device_size);
761  mem.device_pointer = 0;
762  mem.device_size = 0;
763 
764  hip_mem_map.erase(hip_mem_map.find(&mem));
765  }
766 }
767 
768 void HIPDevice::mem_alloc(device_memory &mem)
769 {
770  if (mem.type == MEM_TEXTURE) {
771  assert(!"mem_alloc not supported for textures.");
772  }
773  else if (mem.type == MEM_GLOBAL) {
774  assert(!"mem_alloc not supported for global memory.");
775  }
776  else {
777  generic_alloc(mem);
778  }
779 }
780 
781 void HIPDevice::mem_copy_to(device_memory &mem)
782 {
783  if (mem.type == MEM_GLOBAL) {
784  global_free(mem);
785  global_alloc(mem);
786  }
787  else if (mem.type == MEM_TEXTURE) {
788  tex_free((device_texture &)mem);
789  tex_alloc((device_texture &)mem);
790  }
791  else {
792  if (!mem.device_pointer) {
793  generic_alloc(mem);
794  }
795  generic_copy_to(mem);
796  }
797 }
798 
799 void HIPDevice::mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem)
800 {
801  if (mem.type == MEM_TEXTURE || mem.type == MEM_GLOBAL) {
802  assert(!"mem_copy_from not supported for textures.");
803  }
804  else if (mem.host_pointer) {
805  const size_t size = elem * w * h;
806  const size_t offset = elem * y * w;
807 
808  if (mem.device_pointer) {
809  const HIPContextScope scope(this);
810  hip_assert(hipMemcpyDtoH(
811  (char *)mem.host_pointer + offset, (hipDeviceptr_t)mem.device_pointer + offset, size));
812  }
813  else {
814  memset((char *)mem.host_pointer + offset, 0, size);
815  }
816  }
817 }
818 
819 void HIPDevice::mem_zero(device_memory &mem)
820 {
821  if (!mem.device_pointer) {
822  mem_alloc(mem);
823  }
824  if (!mem.device_pointer) {
825  return;
826  }
827 
828  /* If use_mapped_host of mem is false, mem.device_pointer currently refers to device memory
829  * regardless of mem.host_pointer and mem.shared_pointer. */
830  thread_scoped_lock lock(hip_mem_map_mutex);
831  if (!hip_mem_map[&mem].use_mapped_host || mem.host_pointer != mem.shared_pointer) {
832  const HIPContextScope scope(this);
833  hip_assert(hipMemsetD8((hipDeviceptr_t)mem.device_pointer, 0, mem.memory_size()));
834  }
835  else if (mem.host_pointer) {
836  memset(mem.host_pointer, 0, mem.memory_size());
837  }
838 }
839 
840 void HIPDevice::mem_free(device_memory &mem)
841 {
842  if (mem.type == MEM_GLOBAL) {
843  global_free(mem);
844  }
845  else if (mem.type == MEM_TEXTURE) {
846  tex_free((device_texture &)mem);
847  }
848  else {
849  generic_free(mem);
850  }
851 }
852 
853 device_ptr HIPDevice::mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t /*size*/)
854 {
855  return (device_ptr)(((char *)mem.device_pointer) + mem.memory_elements_size(offset));
856 }
857 
858 void HIPDevice::const_copy_to(const char *name, void *host, size_t size)
859 {
860  HIPContextScope scope(this);
861  hipDeviceptr_t mem;
862  size_t bytes;
863 
864  hip_assert(hipModuleGetGlobal(&mem, &bytes, hipModule, "kernel_params"));
865  assert(bytes == sizeof(KernelParamsHIP));
866 
867  /* Update data storage pointers in launch parameters. */
868 # define KERNEL_DATA_ARRAY(data_type, data_name) \
869  if (strcmp(name, #data_name) == 0) { \
870  hip_assert(hipMemcpyHtoD(mem + offsetof(KernelParamsHIP, data_name), host, size)); \
871  return; \
872  }
874  KERNEL_DATA_ARRAY(IntegratorStateGPU, integrator_state)
875 # include "kernel/data_arrays.h"
876 # undef KERNEL_DATA_ARRAY
877 }
878 
879 void HIPDevice::global_alloc(device_memory &mem)
880 {
881  if (mem.is_resident(this)) {
882  generic_alloc(mem);
883  generic_copy_to(mem);
884  }
885 
886  const_copy_to(mem.name, &mem.device_pointer, sizeof(mem.device_pointer));
887 }
888 
889 void HIPDevice::global_free(device_memory &mem)
890 {
891  if (mem.is_resident(this) && mem.device_pointer) {
892  generic_free(mem);
893  }
894 }
895 
896 void HIPDevice::tex_alloc(device_texture &mem)
897 {
898  HIPContextScope scope(this);
899 
900  size_t dsize = datatype_size(mem.data_type);
901  size_t size = mem.memory_size();
902 
903  hipTextureAddressMode address_mode = hipAddressModeWrap;
904  switch (mem.info.extension) {
905  case EXTENSION_REPEAT:
906  address_mode = hipAddressModeWrap;
907  break;
908  case EXTENSION_EXTEND:
909  address_mode = hipAddressModeClamp;
910  break;
911  case EXTENSION_CLIP:
912  address_mode = hipAddressModeBorder;
913  break;
914  default:
915  assert(0);
916  break;
917  }
918 
919  hipTextureFilterMode filter_mode;
921  filter_mode = hipFilterModePoint;
922  }
923  else {
924  filter_mode = hipFilterModeLinear;
925  }
926 
927  /* Image Texture Storage */
928  hipArray_Format format;
929  switch (mem.data_type) {
930  case TYPE_UCHAR:
931  format = HIP_AD_FORMAT_UNSIGNED_INT8;
932  break;
933  case TYPE_UINT16:
934  format = HIP_AD_FORMAT_UNSIGNED_INT16;
935  break;
936  case TYPE_UINT:
937  format = HIP_AD_FORMAT_UNSIGNED_INT32;
938  break;
939  case TYPE_INT:
940  format = HIP_AD_FORMAT_SIGNED_INT32;
941  break;
942  case TYPE_FLOAT:
943  format = HIP_AD_FORMAT_FLOAT;
944  break;
945  case TYPE_HALF:
946  format = HIP_AD_FORMAT_HALF;
947  break;
948  default:
949  assert(0);
950  return;
951  }
952 
953  HIPMem *cmem = NULL;
954  hArray array_3d = NULL;
955  size_t src_pitch = mem.data_width * dsize * mem.data_elements;
956  size_t dst_pitch = src_pitch;
957 
958  if (!mem.is_resident(this)) {
959  thread_scoped_lock lock(hip_mem_map_mutex);
960  cmem = &hip_mem_map[&mem];
961  cmem->texobject = 0;
962 
963  if (mem.data_depth > 1) {
964  array_3d = (hArray)mem.device_pointer;
965  cmem->array = array_3d;
966  }
967  else if (mem.data_height > 0) {
968  dst_pitch = align_up(src_pitch, pitch_alignment);
969  }
970  }
971  else if (mem.data_depth > 1) {
972  /* 3D texture using array, there is no API for linear memory. */
973  HIP_ARRAY3D_DESCRIPTOR desc;
974 
975  desc.Width = mem.data_width;
976  desc.Height = mem.data_height;
977  desc.Depth = mem.data_depth;
978  desc.Format = format;
979  desc.NumChannels = mem.data_elements;
980  desc.Flags = 0;
981 
982  VLOG_WORK << "Array 3D allocate: " << mem.name << ", "
983  << string_human_readable_number(mem.memory_size()) << " bytes. ("
984  << string_human_readable_size(mem.memory_size()) << ")";
985 
986  hip_assert(hipArray3DCreate((hArray *)&array_3d, &desc));
987 
988  if (!array_3d) {
989  return;
990  }
991 
992  HIP_MEMCPY3D param;
993  memset(&param, 0, sizeof(HIP_MEMCPY3D));
994  param.dstMemoryType = get_memory_type(hipMemoryTypeArray);
995  param.dstArray = array_3d;
996  param.srcMemoryType = get_memory_type(hipMemoryTypeHost);
997  param.srcHost = mem.host_pointer;
998  param.srcPitch = src_pitch;
999  param.WidthInBytes = param.srcPitch;
1000  param.Height = mem.data_height;
1001  param.Depth = mem.data_depth;
1002 
1003  hip_assert(hipDrvMemcpy3D(&param));
1004 
1005  mem.device_pointer = (device_ptr)array_3d;
1006  mem.device_size = size;
1007  stats.mem_alloc(size);
1008 
1009  thread_scoped_lock lock(hip_mem_map_mutex);
1010  cmem = &hip_mem_map[&mem];
1011  cmem->texobject = 0;
1012  cmem->array = array_3d;
1013  }
1014  else if (mem.data_height > 0) {
1015  /* 2D texture, using pitch aligned linear memory. */
1016  dst_pitch = align_up(src_pitch, pitch_alignment);
1017  size_t dst_size = dst_pitch * mem.data_height;
1018 
1019  cmem = generic_alloc(mem, dst_size - mem.memory_size());
1020  if (!cmem) {
1021  return;
1022  }
1023 
1024  hip_Memcpy2D param;
1025  memset(&param, 0, sizeof(param));
1026  param.dstMemoryType = get_memory_type(hipMemoryTypeDevice);
1027  param.dstDevice = mem.device_pointer;
1028  param.dstPitch = dst_pitch;
1029  param.srcMemoryType = get_memory_type(hipMemoryTypeHost);
1030  param.srcHost = mem.host_pointer;
1031  param.srcPitch = src_pitch;
1032  param.WidthInBytes = param.srcPitch;
1033  param.Height = mem.data_height;
1034 
1035  hip_assert(hipDrvMemcpy2DUnaligned(&param));
1036  }
1037  else {
1038  /* 1D texture, using linear memory. */
1039  cmem = generic_alloc(mem);
1040  if (!cmem) {
1041  return;
1042  }
1043 
1044  hip_assert(hipMemcpyHtoD(mem.device_pointer, mem.host_pointer, size));
1045  }
1046 
1047  /* Resize once */
1048  const uint slot = mem.slot;
1049  if (slot >= texture_info.size()) {
1050  /* Allocate some slots in advance, to reduce amount
1051  * of re-allocations. */
1052  texture_info.resize(slot + 128);
1053  }
1054 
1055  /* Set Mapping and tag that we need to (re-)upload to device */
1056  texture_info[slot] = mem.info;
1057  need_texture_info = true;
1058 
1063  /* Bindless textures. */
1064  hipResourceDesc resDesc;
1065  memset(&resDesc, 0, sizeof(resDesc));
1066 
1067  if (array_3d) {
1068  resDesc.resType = hipResourceTypeArray;
1069  resDesc.res.array.h_Array = array_3d;
1070  resDesc.flags = 0;
1071  }
1072  else if (mem.data_height > 0) {
1073  resDesc.resType = hipResourceTypePitch2D;
1074  resDesc.res.pitch2D.devPtr = mem.device_pointer;
1075  resDesc.res.pitch2D.format = format;
1076  resDesc.res.pitch2D.numChannels = mem.data_elements;
1077  resDesc.res.pitch2D.height = mem.data_height;
1078  resDesc.res.pitch2D.width = mem.data_width;
1079  resDesc.res.pitch2D.pitchInBytes = dst_pitch;
1080  }
1081  else {
1082  resDesc.resType = hipResourceTypeLinear;
1083  resDesc.res.linear.devPtr = mem.device_pointer;
1084  resDesc.res.linear.format = format;
1085  resDesc.res.linear.numChannels = mem.data_elements;
1086  resDesc.res.linear.sizeInBytes = mem.device_size;
1087  }
1088 
1089  hipTextureDesc texDesc;
1090  memset(&texDesc, 0, sizeof(texDesc));
1091  texDesc.addressMode[0] = address_mode;
1092  texDesc.addressMode[1] = address_mode;
1093  texDesc.addressMode[2] = address_mode;
1094  texDesc.filterMode = filter_mode;
1095  texDesc.flags = HIP_TRSF_NORMALIZED_COORDINATES;
1096 
1097  thread_scoped_lock lock(hip_mem_map_mutex);
1098  cmem = &hip_mem_map[&mem];
1099 
1100  if (hipTexObjectCreate(&cmem->texobject, &resDesc, &texDesc, NULL) != hipSuccess) {
1101  set_error(
1102  "Failed to create texture. Maximum GPU texture size or available GPU memory was likely "
1103  "exceeded.");
1104  }
1105 
1106  texture_info[slot].data = (uint64_t)cmem->texobject;
1107  }
1108  else {
1109  texture_info[slot].data = (uint64_t)mem.device_pointer;
1110  }
1111 }
1112 
1113 void HIPDevice::tex_free(device_texture &mem)
1114 {
1115  if (mem.device_pointer) {
1116  HIPContextScope scope(this);
1117  thread_scoped_lock lock(hip_mem_map_mutex);
1118  DCHECK(hip_mem_map.find(&mem) != hip_mem_map.end());
1119  const HIPMem &cmem = hip_mem_map[&mem];
1120 
1121  if (cmem.texobject) {
1122  /* Free bindless texture. */
1123  hipTexObjectDestroy(cmem.texobject);
1124  }
1125 
1126  if (!mem.is_resident(this)) {
1127  /* Do not free memory here, since it was allocated on a different device. */
1128  hip_mem_map.erase(hip_mem_map.find(&mem));
1129  }
1130  else if (cmem.array) {
1131  /* Free array. */
1132  hipArrayDestroy(cmem.array);
1133  stats.mem_free(mem.device_size);
1134  mem.device_pointer = 0;
1135  mem.device_size = 0;
1136 
1137  hip_mem_map.erase(hip_mem_map.find(&mem));
1138  }
1139  else {
1140  lock.unlock();
1141  generic_free(mem);
1142  }
1143  }
1144 }
1145 
1146 unique_ptr<DeviceQueue> HIPDevice::gpu_queue_create()
1147 {
1148  return make_unique<HIPDeviceQueue>(this);
1149 }
1150 
1151 bool HIPDevice::should_use_graphics_interop()
1152 {
1153  /* Check whether this device is part of OpenGL context.
1154  *
1155  * Using HIP device for graphics interoperability which is not part of the OpenGL context is
1156  * possible, but from the empiric measurements it can be considerably slower than using naive
1157  * pixels copy. */
1158 
1159  /* Disable graphics interop for now, because of driver bug in 21.40. See T92972 */
1160 # if 0
1161  HIPContextScope scope(this);
1162 
1163  int num_all_devices = 0;
1164  hip_assert(hipGetDeviceCount(&num_all_devices));
1165 
1166  if (num_all_devices == 0) {
1167  return false;
1168  }
1169 
1170  vector<hipDevice_t> gl_devices(num_all_devices);
1171  uint num_gl_devices = 0;
1172  hipGLGetDevices(&num_gl_devices, gl_devices.data(), num_all_devices, hipGLDeviceListAll);
1173 
1174  for (hipDevice_t gl_device : gl_devices) {
1175  if (gl_device == hipDevice) {
1176  return true;
1177  }
1178  }
1179 # endif
1180 
1181  return false;
1182 }
1183 
1184 int HIPDevice::get_num_multiprocessors()
1185 {
1186  return get_device_default_attribute(hipDeviceAttributeMultiprocessorCount, 0);
1187 }
1188 
1189 int HIPDevice::get_max_num_threads_per_multiprocessor()
1190 {
1191  return get_device_default_attribute(hipDeviceAttributeMaxThreadsPerMultiProcessor, 0);
1192 }
1193 
1194 bool HIPDevice::get_device_attribute(hipDeviceAttribute_t attribute, int *value)
1195 {
1196  HIPContextScope scope(this);
1197 
1198  return hipDeviceGetAttribute(value, attribute, hipDevice) == hipSuccess;
1199 }
1200 
1201 int HIPDevice::get_device_default_attribute(hipDeviceAttribute_t attribute, int default_value)
1202 {
1203  int value = 0;
1204  if (!get_device_attribute(attribute, &value)) {
1205  return default_value;
1206  }
1207  return value;
1208 }
1209 
1210 hipMemoryType HIPDevice::get_memory_type(hipMemoryType mem_type)
1211 {
1212  return get_hip_memory_type(mem_type, hipRuntimeVersion);
1213 }
1214 
1216 
1217 #endif
void BLI_kdtree_nd_() free(KDTree *tree)
Definition: kdtree_impl.h:102
unsigned int uint
Definition: BLI_sys_types.h:67
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint y
in reality light always falls off quadratically Particle Retrieve the data of the particle that spawned the object for example to give variation to multiple instances of an object Point Retrieve information about points in a point cloud Retrieve the edges of an object as it appears to Cycles topology will always appear triangulated Convert a blackbody temperature to an RGB value Normal Generate a perturbed normal from an RGB normal map image Typically used for faking highly detailed surfaces Generate an OSL shader from a file or text data block Image Sample an image file as a texture Sky Generate a procedural sky texture Noise Generate fractal Perlin noise Wave Generate procedural bands or rings with noise Voronoi Generate Worley noise based on the distance to random points Typically used to generate textures such as or biological cells Brick Generate a procedural texture producing bricks Texture Retrieve multiple types of texture coordinates nTypically used as inputs for texture nodes Vector Convert a or normal between and object coordinate space Combine Create a color from its and value channels Color Retrieve a color attribute
volatile int lock
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition: btDbvt.cpp:52
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Definition: btQuadWord.h:119
HIP hip
Definition: debug.h:137
DeviceType type
Definition: device/device.h:62
virtual void set_error(const string &error)
DeviceInfo info
void mem_free(size_t size)
Definition: util/stats.h:29
void mem_alloc(size_t size)
Definition: util/stats.h:23
bool is_resident(Device *sub_device) const
Definition: memory.cpp:125
size_t memory_elements_size(int elements)
void device_copy_to()
Definition: memory.cpp:80
device_ptr device_pointer
void host_free()
Definition: memory.cpp:58
#define CCL_NAMESPACE_END
Definition: cuda/compat.h:9
static constexpr size_t datatype_size(DataType datatype)
@ MEM_GLOBAL
@ MEM_TEXTURE
@ TYPE_FLOAT
@ TYPE_INT
@ TYPE_HALF
@ TYPE_UINT
@ TYPE_UINT16
@ TYPE_UCHAR
CCL_NAMESPACE_BEGIN struct Options options
#define KERNEL_DATA_ARRAY(type, name)
Definition: data_arrays.h:5
DebugFlags & DebugFlags()
Definition: debug.h:159
@ DEVICE_OPTIX
Definition: device/device.h:41
@ DEVICE_HIP
Definition: device/device.h:42
SyclQueue * queue
static const char * to_string(const Interpolation &interp)
Definition: gl_shader.cc:63
ccl_gpu_kernel_postfix ccl_global float int int int int float bool int offset
@ KERNEL_FEATURE_MNEE
@ KERNEL_FEATURE_NODE_RAYTRACE
@ BVH_LAYOUT_BVH2
DeviceKernel
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE
format
Definition: logImageCore.h:38
#define VLOG_INFO
Definition: log.h:77
#define VLOG_WARNING
Definition: log.h:75
#define DCHECK(expression)
Definition: log.h:55
#define VLOG_WORK
Definition: log.h:80
string util_md5_string(const string &str)
Definition: md5.cpp:362
static void error(const char *str)
Definition: meshlaplacian.c:51
int BVHLayoutMask
Definition: params.h:47
string path_cache_get(const string &sub)
Definition: path.cpp:358
string path_get(const string &sub)
Definition: path.cpp:338
string path_files_md5_hash(const string &dir)
Definition: path.cpp:606
string path_join(const string &dir, const string &file)
Definition: path.cpp:413
bool path_exists(const string &path)
Definition: path.cpp:559
void path_create_directories(const string &filepath)
Definition: path.cpp:642
bool path_read_text(const string &path, string &text)
Definition: path.cpp:701
unsigned __int64 uint64_t
Definition: stdint.h:90
string string_human_readable_size(size_t size)
Definition: string.cpp:229
string string_human_readable_number(size_t num)
Definition: string.cpp:248
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
Definition: string.cpp:22
bool adaptive_compile
Definition: debug.h:91
uint data_type
Definition: util/texture.h:76
uint extension
Definition: util/texture.h:78
uint interpolation
Definition: util/texture.h:78
size_t system_physical_ram()
Definition: system.cpp:258
int system_cpu_bits()
Definition: system.cpp:123
std::unique_lock< std::mutex > thread_scoped_lock
Definition: thread.h:28
CCL_NAMESPACE_BEGIN typedef std::mutex thread_mutex
Definition: thread.h:27
CCL_NAMESPACE_BEGIN double time_dt()
Definition: time.cpp:35
@ IMAGE_DATA_TYPE_NANOVDB_FP16
Definition: util/texture.h:41
@ IMAGE_DATA_TYPE_NANOVDB_FLOAT
Definition: util/texture.h:38
@ IMAGE_DATA_TYPE_NANOVDB_FLOAT3
Definition: util/texture.h:39
@ IMAGE_DATA_TYPE_NANOVDB_FPN
Definition: util/texture.h:40
@ INTERPOLATION_CLOSEST
Definition: util/texture.h:22
@ EXTENSION_REPEAT
Definition: util/texture.h:63
@ EXTENSION_CLIP
Definition: util/texture.h:67
@ EXTENSION_EXTEND
Definition: util/texture.h:65
ccl_device_inline size_t align_up(size_t offset, size_t alignment)
Definition: util/types.h:46
uint64_t device_ptr
Definition: util/types.h:43