Blender  V3.3
path_trace_work_gpu.h
Go to the documentation of this file.
1 /* SPDX-License-Identifier: Apache-2.0
2  * Copyright 2011-2022 Blender Foundation */
3 
4 #pragma once
5 
7 
9 #include "device/memory.h"
10 #include "device/queue.h"
11 
14 
15 #include "util/vector.h"
16 
18 
19 struct KernelWorkTile;
20 
21 /* Implementation of PathTraceWork which schedules work to the device in tiles which are sized
22  * to match device queue's number of path states.
23  * This implementation suits best devices which have a lot of integrator states, such as GPU. */
25  public:
26  PathTraceWorkGPU(Device *device,
27  Film *film,
28  DeviceScene *device_scene,
29  bool *cancel_requested_flag);
30 
31  virtual void alloc_work_memory() override;
32  virtual void init_execution() override;
33 
34  virtual void render_samples(RenderStatistics &statistics,
35  int start_sample,
36  int samples_num,
37  int sample_offset) override;
38 
39  virtual void copy_to_display(PathTraceDisplay *display,
40  PassMode pass_mode,
41  int num_samples) override;
42  virtual void destroy_gpu_resources(PathTraceDisplay *display) override;
43 
44  virtual bool copy_render_buffers_from_device() override;
45  virtual bool copy_render_buffers_to_device() override;
46  virtual bool zero_render_buffers() override;
47 
48  virtual int adaptive_sampling_converge_filter_count_active(float threshold, bool reset) override;
49  virtual void cryptomatte_postproces() override;
50 
51  protected:
52  void alloc_integrator_soa();
56 
57  /* Returns DEVICE_KERNEL_NUM if there are no scheduled kernels. */
59 
60  void enqueue_reset();
61 
62  bool enqueue_work_tiles(bool &finished);
64  const KernelWorkTile work_tiles[],
65  const int num_work_tiles,
66  const int num_active_paths,
67  const int num_predicted_splits);
68 
70  void enqueue_path_iteration(DeviceKernel kernel, const int num_paths_limit = INT_MAX);
71 
74  DeviceKernel queued_kernel,
75  const int num_paths_limit);
76 
77  void compact_main_paths(const int num_active_paths);
78  void compact_shadow_paths();
79  void compact_paths(const int num_active_paths,
80  const int max_active_path_index,
81  DeviceKernel terminated_paths_kernel,
82  DeviceKernel compact_paths_kernel,
83  DeviceKernel compact_kernel);
84 
86 
87  /* Check whether graphics interop can be used for the PathTraceDisplay update. */
89 
90  /* Naive implementation of the `copy_to_display()` which performs film conversion on the
91  * device, then copies pixels to the host and pushes them to the `display`. */
92  void copy_to_display_naive(PathTraceDisplay *display, PassMode pass_mode, int num_samples);
93 
94  /* Implementation of `copy_to_display()` which uses driver's OpenGL/GPU interoperability
95  * functionality, avoiding copy of pixels to the host. */
96  bool copy_to_display_interop(PathTraceDisplay *display, PassMode pass_mode, int num_samples);
97 
98  /* Synchronously run film conversion kernel and store display result in the given destination. */
100  PassMode pass_mode,
101  int num_samples);
102 
106 
107  bool has_shadow_catcher() const;
108 
109  /* Count how many currently scheduled paths can still split. */
111 
112  /* Kernel properties. */
118 
119  /* Integrator queue. */
120  unique_ptr<DeviceQueue> queue_;
121 
122  /* Scheduler which gives work to path tracing threads. */
124 
125  /* Integrate state for paths. */
127  /* SoA arrays for integrator state. */
131  /* Keep track of number of queued kernels. */
133  /* Shader sorting. */
138  /* Path split. */
141 
142  /* Temporary buffer to get an array of queued path for a particular kernel. */
145 
146  /* Temporary buffer for passing work tiles to kernel. */
148 
149  /* Temporary buffer used by the copy_to_display() whenever graphics interoperability is not
150  * available. Is allocated on-demand. */
152 
153  unique_ptr<DeviceGraphicsInterop> device_graphics_interop_;
154 
155  /* Cached result of device->should_use_graphics_interop(). */
156  bool interop_use_checked_ = false;
157  bool interop_use_ = false;
158 
159  /* Number of partitions to sort state indices into prior to material sort. */
161 
162  /* Maximum number of concurrent integrator states. */
164 
165  /* Minimum number of paths which keeps the device bust. If the actual number of paths falls below
166  * this value more work will be scheduled. */
168 
169  /* Maximum path index, effective number of paths used may be smaller than
170  * the size of the integrator_state_ buffer so can avoid iterating over the
171  * full buffer. */
173 };
174 
unsigned int uint
Definition: BLI_sys_types.h:67
Definition: film.h:29
bool kernel_is_shadow_path(DeviceKernel kernel)
virtual bool copy_render_buffers_from_device() override
void compact_paths(const int num_active_paths, const int max_active_path_index, DeviceKernel terminated_paths_kernel, DeviceKernel compact_paths_kernel, DeviceKernel compact_kernel)
IntegratorStateGPU integrator_state_gpu_
device_vector< int > integrator_shader_sort_counter_
int integrator_state_soa_volume_stack_size_
uint integrator_state_soa_kernel_features_
PathTraceWorkGPU(Device *device, Film *film, DeviceScene *device_scene, bool *cancel_requested_flag)
device_vector< int > num_queued_paths_
bool copy_to_display_interop(PathTraceDisplay *display, PassMode pass_mode, int num_samples)
virtual void destroy_gpu_resources(PathTraceDisplay *display) override
virtual void alloc_work_memory() override
device_vector< int > integrator_next_main_path_index_
unique_ptr< DeviceQueue > queue_
int adaptive_sampling_convergence_check_count_active(float threshold, bool reset)
virtual bool zero_render_buffers() override
bool kernel_uses_sorting(DeviceKernel kernel)
virtual void init_execution() override
device_vector< int > integrator_shader_sort_prefix_sum_
device_vector< KernelWorkTile > work_tiles_
virtual void cryptomatte_postproces() override
void compute_sorted_queued_paths(DeviceKernel kernel, DeviceKernel queued_kernel, const int num_paths_limit)
device_vector< IntegratorQueueCounter > integrator_queue_counter_
bool enqueue_work_tiles(bool &finished)
device_vector< int > queued_paths_
void compact_main_paths(const int num_active_paths)
bool has_shadow_catcher() const
bool kernel_creates_ao_paths(DeviceKernel kernel)
virtual void render_samples(RenderStatistics &statistics, int start_sample, int samples_num, int sample_offset) override
unique_ptr< DeviceGraphicsInterop > device_graphics_interop_
device_vector< int > integrator_next_shadow_path_index_
void compute_queued_paths(DeviceKernel kernel, DeviceKernel queued_kernel)
DeviceKernel get_most_queued_kernel() const
device_vector< int > integrator_shader_raytrace_sort_counter_
bool kernel_creates_shadow_paths(DeviceKernel kernel)
void copy_to_display_naive(PathTraceDisplay *display, PassMode pass_mode, int num_samples)
void get_render_tile_film_pixels(const PassAccessor::Destination &destination, PassMode pass_mode, int num_samples)
vector< unique_ptr< device_memory > > integrator_state_soa_
device_vector< int > integrator_shader_mnee_sort_counter_
WorkTileScheduler work_tile_scheduler_
virtual bool copy_render_buffers_to_device() override
virtual void copy_to_display(PathTraceDisplay *display, PassMode pass_mode, int num_samples) override
virtual int adaptive_sampling_converge_filter_count_active(float threshold, bool reset) override
int kernel_max_active_main_path_index(DeviceKernel kernel)
device_vector< half4 > display_rgba_half_
#define CCL_NAMESPACE_END
Definition: cuda/compat.h:9
SyclQueue void void size_t num_bytes SyclQueue void const char void *memory_device_pointer KernelContext int kernel
ccl_gpu_kernel_postfix ccl_global float int int int int float threshold
ccl_gpu_kernel_postfix int ccl_global int ccl_global int int num_active_paths
ccl_gpu_kernel_postfix ccl_global float int int int int float bool reset
clear internal cached data and reset random seed
ccl_gpu_kernel_postfix ccl_global float int int int int ccl_global const float int int int int int int int int int int int int num_samples
DeviceKernel
PassMode
Definition: pass.h:19