Blender  V3.3
work_tile_scheduler.cpp
Go to the documentation of this file.
1 /* SPDX-License-Identifier: Apache-2.0
2  * Copyright 2011-2022 Blender Foundation */
3 
5 
6 #include "device/queue.h"
7 #include "integrator/tile.h"
8 #include "session/buffers.h"
9 #include "util/atomic.h"
10 #include "util/log.h"
11 
13 
15 {
16 }
17 
18 void WorkTileScheduler::set_accelerated_rt(bool accelerated_rt)
19 {
20  accelerated_rt_ = accelerated_rt;
21 }
22 
23 void WorkTileScheduler::set_max_num_path_states(int max_num_path_states)
24 {
25  max_num_path_states_ = max_num_path_states;
26 }
27 
28 void WorkTileScheduler::reset(const BufferParams &buffer_params,
29  int sample_start,
30  int samples_num,
31  int sample_offset,
32  float scrambling_distance)
33 {
34  /* Image buffer parameters. */
35  image_full_offset_px_.x = buffer_params.full_x;
36  image_full_offset_px_.y = buffer_params.full_y;
37 
38  image_size_px_ = make_int2(buffer_params.width, buffer_params.height);
39  scrambling_distance_ = scrambling_distance;
40 
41  offset_ = buffer_params.offset;
42  stride_ = buffer_params.stride;
43 
44  /* Samples parameters. */
45  sample_start_ = sample_start;
46  samples_num_ = samples_num;
47  sample_offset_ = sample_offset;
48 
49  /* Initialize new scheduling. */
51 }
52 
54 {
57 
58  VLOG_WORK << "Will schedule tiles of size " << tile_size_;
59 
60  const int num_path_states_in_tile = tile_size_.width * tile_size_.height *
62 
63  if (num_path_states_in_tile == 0) {
64  num_tiles_x_ = 0;
65  num_tiles_y_ = 0;
67  }
68  else {
69  if (VLOG_IS_ON(3)) {
70  /* The logging is based on multiple tiles scheduled, ignoring overhead of multi-tile
71  * scheduling and purely focusing on the number of used path states. */
72  const int num_tiles = max_num_path_states_ / num_path_states_in_tile;
73  VLOG_WORK << "Number of unused path states: "
74  << max_num_path_states_ - num_tiles * num_path_states_in_tile;
75  }
76 
80  }
81 
83 
84  next_work_index_ = 0;
86 }
87 
88 bool WorkTileScheduler::get_work(KernelWorkTile *work_tile_, const int max_work_size)
89 {
90  /* Note that the `max_work_size` can be higher than the `max_num_path_states_`: this is because
91  * the path trace work can decide to use smaller tile sizes and greedily schedule multiple tiles,
92  * improving overall device occupancy.
93  * So the `max_num_path_states_` is a "scheduling unit", and the `max_work_size` is a "scheduling
94  * limit". */
95 
97 
98  const int work_index = next_work_index_++;
100  return false;
101  }
102 
103  const int sample_range_index = work_index % num_tiles_per_sample_range_;
104  const int start_sample = sample_range_index * tile_size_.num_samples;
106  const int tile_y = tile_index / num_tiles_x_;
107  const int tile_x = tile_index - tile_y * num_tiles_x_;
108 
109  KernelWorkTile work_tile;
110  work_tile.x = tile_x * tile_size_.width;
111  work_tile.y = tile_y * tile_size_.height;
112  work_tile.w = tile_size_.width;
113  work_tile.h = tile_size_.height;
114  work_tile.start_sample = sample_start_ + start_sample;
115  work_tile.num_samples = min(tile_size_.num_samples, samples_num_ - start_sample);
116  work_tile.sample_offset = sample_offset_;
117  work_tile.offset = offset_;
118  work_tile.stride = stride_;
119 
120  work_tile.w = min(work_tile.w, image_size_px_.x - work_tile.x);
121  work_tile.h = min(work_tile.h, image_size_px_.y - work_tile.y);
122 
123  work_tile.x += image_full_offset_px_.x;
124  work_tile.y += image_full_offset_px_.y;
125 
126  const int tile_work_size = work_tile.w * work_tile.h * work_tile.num_samples;
127 
128  DCHECK_GT(tile_work_size, 0);
129 
130  if (max_work_size && tile_work_size > max_work_size) {
131  /* The work did not fit into the requested limit of the work size. Unschedule the tile,
132  * so it can be picked up again later. */
134  return false;
135  }
136 
137  *work_tile_ = work_tile;
138 
139  return true;
140 }
141 
int offset
Definition: buffers.h:90
int full_x
Definition: buffers.h:84
int stride
Definition: buffers.h:90
int height
Definition: buffers.h:72
NODE_DECLARE int width
Definition: buffers.h:71
int full_y
Definition: buffers.h:85
bool get_work(KernelWorkTile *work_tile, const int max_work_size=0)
void set_accelerated_rt(bool state)
void set_max_num_path_states(int max_num_path_states)
void reset(const BufferParams &buffer_params, int sample_start, int samples_num, int sample_offset, float scrambling_distance)
#define CCL_NAMESPACE_END
Definition: cuda/compat.h:9
TileSize tile_calculate_best_size(const bool accel_rt, const int2 &image_size, const int num_samples, const int max_num_path_states, const float scrambling_distance)
ccl_gpu_kernel_postfix ccl_global KernelWorkTile const int num_tiles
const int tile_index
const int work_index
#define VLOG_IS_ON(severity)
Definition: log.h:39
#define VLOG_WORK
Definition: log.h:80
#define DCHECK_GT(a, b)
Definition: log.h:65
#define DCHECK_NE(a, b)
Definition: log.h:63
#define make_int2(x, y)
Definition: metal/compat.h:206
#define min(a, b)
Definition: sort.c:35
int x
Definition: types_int2.h:15
int y
Definition: types_int2.h:15
ccl_device_inline size_t divide_up(size_t x, size_t y)
Definition: util/types.h:51