19 MetalDeviceQueue::MetalDeviceQueue(MetalDevice *device)
20 :
DeviceQueue(device), metal_device_(device), stats_(device->stats)
22 if (@available(macos 11.0, *)) {
23 command_buffer_desc_ = [[MTLCommandBufferDescriptor alloc]
init];
24 command_buffer_desc_.errorOptions = MTLCommandBufferErrorOptionEncoderExecutionStatus;
27 mtlDevice_ = device->mtlDevice;
28 mtlCommandQueue_ = [mtlDevice_ newCommandQueue];
30 if (@available(macos 10.14, *)) {
31 shared_event_ = [mtlDevice_ newSharedEvent];
35 event_queue_ = dispatch_queue_create(
"com.cycles.metal.event_queue",
NULL);
36 shared_event_listener_ = [[MTLSharedEventListener alloc] initWithDispatchQueue:event_queue_];
39 wait_semaphore_ = dispatch_semaphore_create(0);
41 if (@available(macos 10.14, *)) {
42 if (getenv(
"CYCLES_METAL_PROFILING")) {
44 timing_shared_event_ = [mtlDevice_ newSharedEvent];
45 label_command_encoders_ =
true;
47 if (getenv(
"CYCLES_METAL_DEBUG")) {
49 verbose_tracing_ =
true;
50 label_command_encoders_ =
true;
52 timing_shared_event_id_ = 1;
58 void MetalDeviceQueue::setup_capture()
62 if (
auto capture_kernel_str = getenv(
"CYCLES_DEBUG_METAL_CAPTURE_KERNEL")) {
64 capture_kernel_ =
DeviceKernel(atoi(capture_kernel_str));
67 capture_dispatch_counter_ = 0;
68 if (
auto capture_dispatch_str = getenv(
"CYCLES_DEBUG_METAL_CAPTURE_DISPATCH")) {
69 capture_dispatch_counter_ = atoi(capture_dispatch_str);
71 printf(
"Capture dispatch number %d\n", capture_dispatch_counter_);
74 else if (
auto capture_samples_str = getenv(
"CYCLES_DEBUG_METAL_CAPTURE_SAMPLES")) {
77 capture_samples_ =
true;
78 capture_reset_counter_ = atoi(capture_samples_str);
80 capture_dispatch_counter_ = INT_MAX;
81 if (
auto capture_limit_str = getenv(
"CYCLES_DEBUG_METAL_CAPTURE_LIMIT")) {
83 capture_dispatch_counter_ = atoi(capture_limit_str);
86 printf(
"Capturing sample block %d (dispatch limit: %d)\n",
87 capture_reset_counter_,
88 capture_dispatch_counter_);
96 MTLCaptureManager *captureManager = [MTLCaptureManager sharedCaptureManager];
97 mtlCaptureScope_ = [captureManager newCaptureScopeWithDevice:mtlDevice_];
98 mtlCaptureScope_.label = [NSString stringWithFormat:
@"Cycles kernel dispatch"];
99 [captureManager setDefaultCaptureScope:mtlCaptureScope_];
101 label_command_encoders_ =
true;
103 if (
auto capture_url = getenv(
"CYCLES_DEBUG_METAL_CAPTURE_URL")) {
104 if (@available(macos 10.15, *)) {
105 if ([captureManager supportsDestination:MTLCaptureDestinationGPUTraceDocument]) {
107 MTLCaptureDescriptor *captureDescriptor = [[MTLCaptureDescriptor alloc]
init];
108 captureDescriptor.captureObject = mtlCaptureScope_;
109 captureDescriptor.destination = MTLCaptureDestinationGPUTraceDocument;
110 captureDescriptor.outputURL = [NSURL fileURLWithPath:@(capture_url)];
113 if (![captureManager startCaptureWithDescriptor:captureDescriptor
error:&
error]) {
114 NSString *
err = [
error localizedDescription];
115 printf(
"Start capture failed: %s\n", [
err UTF8String]);
118 printf(
"Capture started (URL: %s)\n", capture_url);
119 is_capturing_to_disk_ =
true;
123 printf(
"Capture to file is not supported\n");
133 capture_dispatch_counter_ -= 1;
141 if (capture_dispatch_counter_ < 0) {
147 if (
kernel == capture_kernel_) {
149 if (capture_dispatch_counter_ == 0) {
152 capture_dispatch_counter_ -= 1;
157 if (capture_samples_) {
159 if (capture_reset_counter_ == 0) {
164 capture_reset_counter_ -= 1;
170 void MetalDeviceQueue::begin_capture()
173 if (mtlCommandBuffer_) {
176 [mtlCaptureScope_ beginScope];
177 printf(
"[mtlCaptureScope_ beginScope]\n");
178 is_capturing_ =
true;
181 void MetalDeviceQueue::end_capture()
183 [mtlCaptureScope_ endScope];
184 is_capturing_ =
false;
185 printf(
"[mtlCaptureScope_ endScope]\n");
187 if (is_capturing_to_disk_) {
188 if (@available(macos 10.15, *)) {
189 [[MTLCaptureManager sharedCaptureManager] stopCapture];
190 has_captured_to_disk_ =
true;
191 is_capturing_to_disk_ =
false;
192 is_capturing_ =
false;
193 printf(
"Capture stopped\n");
198 MetalDeviceQueue::~MetalDeviceQueue()
202 assert(mtlCommandBuffer_ == nil);
203 assert(command_buffers_submitted_ == command_buffers_completed_);
205 if (@available(macos 10.14, *)) {
206 [shared_event_listener_ release];
207 [shared_event_ release];
210 if (@available(macos 11.0, *)) {
211 [command_buffer_desc_ release];
213 if (mtlCommandQueue_) {
214 [mtlCommandQueue_ release];
215 mtlCommandQueue_ = nil;
218 if (mtlCaptureScope_) {
219 [mtlCaptureScope_ release];
222 double total_time = 0.0;
226 for (
auto &stat : timing_stats_) {
227 total_time += stat.total_time;
228 num_dispatches += stat.num_dispatches;
231 if (num_dispatches) {
232 printf(
"\nMetal dispatch stats:\n\n");
240 auto divider =
string(header.length(),
'-');
241 printf(
"%s\n%s\n%s\n", divider.c_str(), header.c_str(), divider.c_str());
244 auto &stat = timing_stats_[i];
245 if (stat.num_dispatches > 0) {
246 printf(
"%-40s %16s %12s %12s %6.2fs %6.2f%%\n",
252 stat.total_time * 100.0 / total_time);
255 printf(
"%s\n", divider.c_str());
256 printf(
"%-40s %16s %12s %12s %6.2fs %6.2f%%\n",
263 printf(
"%s\n\n", divider.c_str());
267 int MetalDeviceQueue::num_concurrent_states(
const size_t )
const
273 if (metal_device_->device_vendor == METAL_GPU_AMD) {
276 else if (metal_device_->device_vendor == METAL_GPU_APPLE) {
282 int MetalDeviceQueue::num_concurrent_busy_states()
const
287 if (metal_device_->device_vendor == METAL_GPU_AMD) {
290 else if (metal_device_->device_vendor == METAL_GPU_APPLE) {
296 int MetalDeviceQueue::num_sort_partition_elements()
const
298 return MetalInfo::optimal_sort_partition_elements(metal_device_->mtlDevice);
304 metal_device_->load_texture_info();
315 if (metal_device_->have_error()) {
322 id<MTLComputeCommandEncoder> mtlComputeCommandEncoder = get_compute_encoder(
kernel);
324 if (@available(macos 10.14, *)) {
325 if (timing_shared_event_) {
326 command_encoder_labels_.push_back({
kernel,
work_size, timing_shared_event_id_});
331 size_t arg_buffer_length = 0;
332 for (
size_t i = 0; i < args.
count; i++) {
333 size_t size_in_bytes = args.
sizes[i];
334 arg_buffer_length =
round_up(arg_buffer_length, size_in_bytes) + size_in_bytes;
337 arg_buffer_length =
round_up(arg_buffer_length, 256);
340 size_t globals_offsets = arg_buffer_length;
342 arg_buffer_length =
round_up(arg_buffer_length, 256);
345 size_t metal_offsets = arg_buffer_length;
346 arg_buffer_length += metal_device_->mtlAncillaryArgEncoder.encodedLength;
347 arg_buffer_length =
round_up(arg_buffer_length, metal_device_->mtlAncillaryArgEncoder.alignment);
351 memset(init_arg_buffer, 0, arg_buffer_length);
354 size_t bytes_written = 0;
355 for (
size_t i = 0; i < args.
count; i++) {
356 size_t size_in_bytes = args.
sizes[i];
357 bytes_written =
round_up(bytes_written, size_in_bytes);
359 memcpy(init_arg_buffer + bytes_written, args.
values[i], size_in_bytes);
361 bytes_written += size_in_bytes;
366 size_t plain_old_launch_data_offset = offsetof(
KernelParamsMetal, integrator_state) +
368 size_t plain_old_launch_data_size =
sizeof(
KernelParamsMetal) - plain_old_launch_data_offset;
369 memcpy(init_arg_buffer + globals_offsets + plain_old_launch_data_offset,
370 (
uint8_t *)&metal_device_->launch_params + plain_old_launch_data_offset,
371 plain_old_launch_data_size);
374 MTLResourceOptions arg_buffer_options = MTLResourceStorageModeManaged;
375 if (@available(macOS 11.0, *)) {
376 if ([mtlDevice_ hasUnifiedMemory]) {
377 arg_buffer_options = MTLResourceStorageModeShared;
381 id<MTLBuffer> arg_buffer = temp_buffer_pool_.get_buffer(mtlDevice_,
390 for (
size_t i = 0; i < args.
count; i++) {
391 size_t size_in_bytes = args.
sizes[i];
392 bytes_written =
round_up(bytes_written, size_in_bytes);
394 [metal_device_->mtlBufferKernelParamsEncoder setArgumentBuffer:arg_buffer
396 if (MetalDevice::MetalMem *mmem = *(MetalDevice::MetalMem **)args.
values[i]) {
397 [mtlComputeCommandEncoder useResource:mmem->mtlBuffer
398 usage:MTLResourceUsageRead | MTLResourceUsageWrite];
399 [metal_device_->mtlBufferKernelParamsEncoder setBuffer:mmem->mtlBuffer
offset:0 atIndex:0];
402 if (@available(macos 12.0, *)) {
403 [metal_device_->mtlBufferKernelParamsEncoder setBuffer:nil
offset:0 atIndex:0];
407 bytes_written += size_in_bytes;
411 [metal_device_->mtlBufferKernelParamsEncoder setArgumentBuffer:arg_buffer
414 if (label_command_encoders_) {
416 mtlComputeCommandEncoder.label = [[NSString alloc]
417 initWithFormat:
@"Metal queue launch %s, work_size %d",
427 MetalDevice::MetalMem *mmem = *(
428 MetalDevice::MetalMem **)((
uint8_t *)&metal_device_->launch_params +
offset);
429 if (mmem && mmem->mem && (mmem->mtlBuffer || mmem->mtlTexture)) {
430 [metal_device_->mtlBufferKernelParamsEncoder setBuffer:mmem->mtlBuffer
432 atIndex:pointer_index];
435 if (@available(macos 12.0, *)) {
436 [metal_device_->mtlBufferKernelParamsEncoder setBuffer:nil
offset:0 atIndex:pointer_index];
442 const MetalKernelPipeline *metal_kernel_pso = MetalDeviceKernels::get_best_pipeline(
444 if (!metal_kernel_pso) {
445 metal_device_->set_error(
451 [metal_device_->mtlAncillaryArgEncoder setArgumentBuffer:arg_buffer
offset:metal_offsets];
452 [metal_device_->mtlAncillaryArgEncoder setBuffer:metal_device_->texture_bindings_2d
455 [metal_device_->mtlAncillaryArgEncoder setBuffer:metal_device_->texture_bindings_3d
458 if (@available(macos 12.0, *)) {
459 if (metal_device_->use_metalrt) {
460 if (metal_device_->bvhMetalRT) {
461 id<MTLAccelerationStructure> accel_struct = metal_device_->bvhMetalRT->accel_struct;
462 [metal_device_->mtlAncillaryArgEncoder setAccelerationStructure:accel_struct atIndex:2];
465 for (
int table = 0; table < METALRT_TABLE_NUM; table++) {
466 if (metal_kernel_pso->intersection_func_table[table]) {
467 [metal_kernel_pso->intersection_func_table[table] setBuffer:arg_buffer
470 [metal_device_->mtlAncillaryArgEncoder
471 setIntersectionFunctionTable:metal_kernel_pso->intersection_func_table[table]
473 [mtlComputeCommandEncoder useResource:metal_kernel_pso->intersection_func_table[table]
474 usage:MTLResourceUsageRead];
477 [metal_device_->mtlAncillaryArgEncoder setIntersectionFunctionTable:nil
482 bytes_written = metal_offsets + metal_device_->mtlAncillaryArgEncoder.encodedLength;
485 if (arg_buffer.storageMode == MTLStorageModeManaged) {
486 [arg_buffer didModifyRange:NSMakeRange(0, bytes_written)];
489 [mtlComputeCommandEncoder setBuffer:arg_buffer
offset:0 atIndex:0];
490 [mtlComputeCommandEncoder setBuffer:arg_buffer
offset:globals_offsets atIndex:1];
491 [mtlComputeCommandEncoder setBuffer:arg_buffer
offset:metal_offsets atIndex:2];
493 if (metal_device_->use_metalrt) {
494 if (@available(macos 12.0, *)) {
496 auto bvhMetalRT = metal_device_->bvhMetalRT;
512 [mtlComputeCommandEncoder useResource:bvhMetalRT->accel_struct usage:MTLResourceUsageRead];
513 [mtlComputeCommandEncoder useResources:bvhMetalRT->blas_array.data()
514 count:bvhMetalRT->blas_array.size()
515 usage:MTLResourceUsageRead];
520 [mtlComputeCommandEncoder setComputePipelineState:metal_kernel_pso->pipeline];
523 const int num_threads_per_block = metal_kernel_pso->num_threads_per_block;
525 int shared_mem_bytes = 0;
538 shared_mem_bytes = (int)
round_up((num_threads_per_block + 1) *
sizeof(int), 16);
539 [mtlComputeCommandEncoder setThreadgroupMemoryLength:shared_mem_bytes atIndex:0];
546 MTLSize size_threadgroups_per_dispatch = MTLSizeMake(
548 MTLSize size_threads_per_threadgroup = MTLSizeMake(num_threads_per_block, 1, 1);
549 [mtlComputeCommandEncoder dispatchThreadgroups:size_threadgroups_per_dispatch
550 threadsPerThreadgroup:size_threads_per_threadgroup];
552 [mtlCommandBuffer_ addCompletedHandler:^(id<MTLCommandBuffer> command_buffer) {
553 NSString *kernel_name = metal_kernel_pso->function.label;
556 if (@available(macos 11.0, *)) {
557 if (command_buffer.status == MTLCommandBufferStatusError && command_buffer.error != nil) {
558 metal_device_->set_error(
string(
"CommandBuffer Failed: ") + [kernel_name UTF8String]);
559 NSArray<id<MTLCommandBufferEncoderInfo>> *encoderInfos = [command_buffer.error.userInfo
560 valueForKey:MTLCommandBufferEncoderInfoErrorKey];
561 if (encoderInfos != nil) {
562 for (id<MTLCommandBufferEncoderInfo> encoderInfo : encoderInfos) {
563 NSLog(
@"%@", encoderInfo);
566 id<MTLLogContainer> logs = command_buffer.logs;
567 for (id<MTLFunctionLog>
log in logs) {
571 else if (command_buffer.error) {
572 metal_device_->set_error(
string(
"CommandBuffer Failed: ") + [kernel_name UTF8String]);
577 if (verbose_tracing_ || is_capturing_) {
582 if (verbose_tracing_) {
585 "_____________________________________.____________________.______________.___________"
586 "______________________________________\n");
589 printf(
"%-40s| %7d threads |%5.2fms | buckets [",
592 last_completion_time_ * 1000.0);
593 std::lock_guard<std::recursive_mutex>
lock(metal_device_->metal_mem_map_mutex);
594 for (
auto &it : metal_device_->metal_mem_map) {
595 const string c_integrator_queue_counter =
"integrator_queue_counter";
596 if (it.first->name == c_integrator_queue_counter) {
599 void device_copy_from__IntegratorQueueCounter()
604 ((MyDeviceMemory *)it.first)->device_copy_from__IntegratorQueueCounter();
607 it.first->host_pointer) {
609 printf(
"%s%d", i == 0 ?
"" :
",",
int(queue_counter->num_queued[i]));
618 return !(metal_device_->have_error());
621 bool MetalDeviceQueue::synchronize()
623 if (has_captured_to_disk_ || metal_device_->have_error()) {
627 if (mtlComputeEncoder_) {
628 close_compute_encoder();
630 close_blit_encoder();
632 if (mtlCommandBuffer_) {
635 if (@available(macos 10.14, *)) {
636 if (timing_shared_event_) {
638 __block
double completion_time = 0;
639 for (
uint64_t i = command_buffer_start_timing_id_; i < timing_shared_event_id_; i++) {
640 [timing_shared_event_ notifyListener:shared_event_listener_
642 block:^(id<MTLSharedEvent> sharedEvent,
uint64_t value) {
643 completion_time = timer.
get_time() - completion_time;
644 last_completion_time_ = completion_time;
645 for (
auto label : command_encoder_labels_) {
646 if (
label.timing_id == value) {
647 TimingStats &stat = timing_stats_[
label.kernel];
648 stat.num_dispatches++;
649 stat.total_time += completion_time;
650 stat.total_work_size +=
label.work_size;
658 uint64_t shared_event_id_ = this->shared_event_id_++;
660 if (@available(macos 10.14, *)) {
661 __block dispatch_semaphore_t block_sema = wait_semaphore_;
662 [shared_event_ notifyListener:shared_event_listener_
663 atValue:shared_event_id_
664 block:^(id<MTLSharedEvent> sharedEvent,
uint64_t value) {
665 dispatch_semaphore_signal(block_sema);
668 [mtlCommandBuffer_ encodeSignalEvent:shared_event_ value:shared_event_id_];
669 [mtlCommandBuffer_ commit];
670 dispatch_semaphore_wait(wait_semaphore_, DISPATCH_TIME_FOREVER);
673 [mtlCommandBuffer_ release];
675 for (
const CopyBack &mmem : copy_back_mem_) {
676 memcpy((
uchar *)mmem.host_pointer, (
uchar *)mmem.gpu_mem, mmem.size);
678 copy_back_mem_.clear();
680 temp_buffer_pool_.process_command_buffer_completion(mtlCommandBuffer_);
681 metal_device_->flush_delayed_free_list();
683 mtlCommandBuffer_ = nil;
684 command_encoder_labels_.clear();
687 return !(metal_device_->have_error());
700 metal_device_->mem_alloc(mem);
706 std::lock_guard<std::recursive_mutex>
lock(metal_device_->metal_mem_map_mutex);
707 MetalDevice::MetalMem &mmem = *metal_device_->metal_mem_map.at(&mem);
708 if (mmem.mtlBuffer) {
709 id<MTLBlitCommandEncoder> blitEncoder = get_blit_encoder();
710 [blitEncoder fillBuffer:mmem.mtlBuffer range:NSMakeRange(mmem.offset, mmem.size) value:0];
713 metal_device_->mem_zero(mem);
725 metal_device_->mem_alloc(mem);
731 std::lock_guard<std::recursive_mutex>
lock(metal_device_->metal_mem_map_mutex);
732 auto result = metal_device_->metal_mem_map.find(&mem);
733 if (
result != metal_device_->metal_mem_map.end()) {
738 MetalDevice::MetalMem &mmem = *
result->second;
739 id<MTLBlitCommandEncoder> blitEncoder = get_blit_encoder();
741 id<MTLBuffer>
buffer = temp_buffer_pool_.get_buffer(mtlDevice_,
744 MTLResourceStorageModeShared,
748 [blitEncoder copyFromBuffer:
buffer
750 toBuffer:mmem.mtlBuffer
751 destinationOffset:mmem.offset
755 metal_device_->mem_copy_to(mem);
770 std::lock_guard<std::recursive_mutex>
lock(metal_device_->metal_mem_map_mutex);
771 MetalDevice::MetalMem &mmem = *metal_device_->metal_mem_map.at(&mem);
772 if (mmem.mtlBuffer) {
776 if ([mmem.mtlBuffer storageMode] == MTLStorageModeManaged) {
777 id<MTLBlitCommandEncoder> blitEncoder = get_blit_encoder();
778 [blitEncoder synchronizeResource:mmem.mtlBuffer];
781 if (mtlCommandBuffer_) {
794 metal_device_->mem_copy_from(mem);
800 std::lock_guard<std::recursive_mutex>
lock(metal_device_->metal_mem_map_mutex);
803 for (
auto &it : metal_device_->metal_mem_map) {
806 MTLResourceUsage usage = MTLResourceUsageRead;
808 usage |= MTLResourceUsageWrite;
811 if (it.second->mtlBuffer) {
813 [mtlComputeEncoder_ useResource:it.second->mtlBuffer usage:usage];
815 else if (it.second->mtlTexture) {
817 [mtlComputeEncoder_ useResource:it.second->mtlTexture usage:usage | MTLResourceUsageSample];
822 [mtlComputeEncoder_ useResource:metal_device_->texture_bindings_2d usage:MTLResourceUsageRead];
823 [mtlComputeEncoder_ useResource:metal_device_->texture_bindings_3d usage:MTLResourceUsageRead];
826 id<MTLComputeCommandEncoder> MetalDeviceQueue::get_compute_encoder(
DeviceKernel kernel)
830 if (@available(macos 10.14, *)) {
831 if (timing_shared_event_) {
833 if (mtlComputeEncoder_) {
834 close_compute_encoder();
838 if (mtlComputeEncoder_) {
839 if (mtlComputeEncoder_.dispatchType == concurrent ? MTLDispatchTypeConcurrent :
840 MTLDispatchTypeSerial) {
842 prepare_resources(
kernel);
844 return mtlComputeEncoder_;
846 close_compute_encoder();
849 close_blit_encoder();
851 if (!mtlCommandBuffer_) {
852 mtlCommandBuffer_ = [mtlCommandQueue_ commandBuffer];
853 [mtlCommandBuffer_ retain];
856 mtlComputeEncoder_ = [mtlCommandBuffer_
857 computeCommandEncoderWithDispatchType:concurrent ? MTLDispatchTypeConcurrent :
858 MTLDispatchTypeSerial];
863 prepare_resources(
kernel);
866 return mtlComputeEncoder_;
869 id<MTLBlitCommandEncoder> MetalDeviceQueue::get_blit_encoder()
871 if (mtlBlitEncoder_) {
872 return mtlBlitEncoder_;
875 if (mtlComputeEncoder_) {
876 close_compute_encoder();
879 if (!mtlCommandBuffer_) {
880 mtlCommandBuffer_ = [mtlCommandQueue_ commandBuffer];
881 [mtlCommandBuffer_ retain];
882 command_buffer_start_timing_id_ = timing_shared_event_id_;
885 mtlBlitEncoder_ = [mtlCommandBuffer_ blitCommandEncoder];
886 return mtlBlitEncoder_;
889 void MetalDeviceQueue::close_compute_encoder()
891 [mtlComputeEncoder_ endEncoding];
892 mtlComputeEncoder_ = nil;
894 if (@available(macos 10.14, *)) {
895 if (timing_shared_event_) {
896 [mtlCommandBuffer_ encodeSignalEvent:timing_shared_event_ value:timing_shared_event_id_++];
901 void MetalDeviceQueue::close_blit_encoder()
903 if (mtlBlitEncoder_) {
904 [mtlBlitEncoder_ endEncoding];
905 mtlBlitEncoder_ = nil;
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
device_ptr device_pointer
void device_copy_from(size_t y, size_t w, size_t h, size_t elem)
#define CCL_NAMESPACE_END
CCL_NAMESPACE_BEGIN const char * device_kernel_as_string(DeviceKernel kernel)
SyclQueue void void size_t num_bytes SyclQueue void const char void *memory_device_pointer KernelContext int kernel
static struct ImBuf * init_execution(const SeqRenderData *context, ImBuf *ibuf1, ImBuf *ibuf2, ImBuf *ibuf3)
ccl_gpu_kernel_postfix ccl_global const int ccl_global float const int work_size
ccl_global float * buffer
ccl_gpu_kernel_postfix ccl_global float int int int int float bool int offset
@ DEVICE_KERNEL_INTEGRATOR_NUM
@ DEVICE_KERNEL_INTEGRATOR_RESET
@ DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE
@ DEVICE_KERNEL_INTEGRATOR_QUEUED_SHADOW_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_TERMINATED_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK
@ DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_COMPACT_SHADOW_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE
@ DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE
@ DEVICE_KERNEL_INTEGRATOR_TERMINATED_SHADOW_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_ACTIVE_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW
@ DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST
#define VLOG_DEVICE_STATS
ccl_device_inline float3 log(float3 v)
static void error(const char *str)
unsigned __int64 uint64_t
string string_human_readable_number(size_t num)
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
ccl_device_inline size_t round_up(size_t x, size_t multiple)
ccl_device_inline size_t divide_up(size_t x, size_t y)