Blender  V3.3
path_trace.cpp
Go to the documentation of this file.
1 /* SPDX-License-Identifier: Apache-2.0
2  * Copyright 2011-2022 Blender Foundation */
3 
5 
6 #include "device/cpu/device.h"
7 #include "device/device.h"
12 #include "scene/pass.h"
13 #include "scene/scene.h"
14 #include "session/tile.h"
15 #include "util/algorithm.h"
16 #include "util/log.h"
17 #include "util/progress.h"
18 #include "util/tbb.h"
19 #include "util/time.h"
20 
22 
24  Film *film,
25  DeviceScene *device_scene,
26  RenderScheduler &render_scheduler,
27  TileManager &tile_manager)
28  : device_(device),
29  film_(film),
30  device_scene_(device_scene),
31  render_scheduler_(render_scheduler),
32  tile_manager_(tile_manager)
33 {
34  DCHECK_NE(device_, nullptr);
35 
36  {
37  vector<DeviceInfo> cpu_devices;
38  device_cpu_info(cpu_devices);
39 
40  cpu_device_.reset(device_cpu_create(cpu_devices[0], device->stats, device->profiler));
41  }
42 
43  /* Create path tracing work in advance, so that it can be reused by incremental sampling as much
44  * as possible. */
45  device_->foreach_device([&](Device *path_trace_device) {
46  unique_ptr<PathTraceWork> work = PathTraceWork::create(
47  path_trace_device, film, device_scene, &render_cancel_.is_requested);
48  if (work) {
49  path_trace_works_.emplace_back(std::move(work));
50  }
51  });
52 
55 
56  render_scheduler.set_need_schedule_rebalance(path_trace_works_.size() > 1);
57 }
58 
60 {
62 }
63 
65 {
66  if (denoiser_) {
67  /* Activate graphics interop while denoiser device is created, so that it can choose a device
68  * that supports interop for faster display updates. */
69  if (display_ && path_trace_works_.size() > 1) {
70  display_->graphics_interop_activate();
71  }
72 
73  denoiser_->load_kernels(progress_);
74 
75  if (display_ && path_trace_works_.size() > 1) {
76  display_->graphics_interop_deactivate();
77  }
78  }
79 }
80 
82 {
83  for (auto &&path_trace_work : path_trace_works_) {
84  path_trace_work->alloc_work_memory();
85  }
86 }
87 
89 {
90  /* The logic here is optimized for the best feedback in the viewport, which implies having a GPU
91  * display. Of there is no such display, the logic here will break. */
93 
94  /* The logic here tries to provide behavior which feels the most interactive feel to artists.
95  * General idea is to be able to reset as quickly as possible, while still providing interactive
96  * feel.
97  *
98  * If the render result was ever drawn after previous reset, consider that reset is now possible.
99  * This way camera navigation gives the quickest feedback of rendered pixels, regardless of
100  * whether CPU or GPU drawing pipeline is used.
101  *
102  * Consider reset happening after redraw "slow" enough to not clog anything. This is a bit
103  * arbitrary, but seems to work very well with viewport navigation in Blender. */
104 
105  if (did_draw_after_reset_) {
106  return true;
107  }
108 
109  return false;
110 }
111 
112 void PathTrace::reset(const BufferParams &full_params,
113  const BufferParams &big_tile_params,
114  const bool reset_rendering)
115 {
116  if (big_tile_params_.modified(big_tile_params)) {
117  big_tile_params_ = big_tile_params;
118  render_state_.need_reset_params = true;
119  }
120 
121  full_params_ = full_params;
122 
123  /* NOTE: GPU display checks for buffer modification and avoids unnecessary re-allocation.
124  * It is requires to inform about reset whenever it happens, so that the redraw state tracking is
125  * properly updated. */
126  if (display_) {
127  display_->reset(big_tile_params, reset_rendering);
128  }
129 
130  render_state_.has_denoised_result = false;
131  render_state_.tile_written = false;
132 
133  did_draw_after_reset_ = false;
134 }
135 
137 {
138  /* Free render buffers used by the path trace work to reduce memory peak. */
139  BufferParams empty_params;
140  empty_params.pass_stride = 0;
141  empty_params.update_offset_stride();
142  for (auto &&path_trace_work : path_trace_works_) {
143  path_trace_work->get_render_buffers()->reset(empty_params);
144  }
145  render_state_.need_reset_params = true;
146 }
147 
149 {
150  progress_ = progress;
151 }
152 
153 void PathTrace::render(const RenderWork &render_work)
154 {
155  /* Indicate that rendering has started and that it can be requested to cancel. */
156  {
158  if (render_cancel_.is_requested) {
159  return;
160  }
161  render_cancel_.is_rendering = true;
162  }
163 
164  render_pipeline(render_work);
165 
166  /* Indicate that rendering has finished, making it so thread which requested `cancel()` can carry
167  * on. */
168  {
170  render_cancel_.is_rendering = false;
171  render_cancel_.condition.notify_one();
172  }
173 }
174 
176 {
177  /* NOTE: Only check for "instant" cancel here. The user-requested cancel via progress is
178  * checked in Session and the work in the event of cancel is to be finished here. */
179 
181  0);
182 
184 
186 
187  init_render_buffers(render_work);
188 
189  rebalance(render_work);
190 
191  path_trace(render_work);
192  if (render_cancel_.is_requested) {
193  return;
194  }
195 
196  adaptive_sample(render_work);
197  if (render_cancel_.is_requested) {
198  return;
199  }
200 
201  cryptomatte_postprocess(render_work);
202  if (render_cancel_.is_requested) {
203  return;
204  }
205 
206  denoise(render_work);
207  if (render_cancel_.is_requested) {
208  return;
209  }
210 
211  write_tile_buffer(render_work);
212  update_display(render_work);
213 
214  progress_update_if_needed(render_work);
215 
216  finalize_full_buffer_on_disk(render_work);
217 }
218 
220 {
221  for (auto &&path_trace_work : path_trace_works_) {
222  path_trace_work->init_execution();
223  }
224 }
225 
226 /* TODO(sergey): Look into `std::function` rather than using a template. Should not be a
227  * measurable performance impact at runtime, but will make compilation faster and binary somewhat
228  * smaller. */
229 template<typename Callback>
230 static void foreach_sliced_buffer_params(const vector<unique_ptr<PathTraceWork>> &path_trace_works,
231  const vector<WorkBalanceInfo> &work_balance_infos,
232  const BufferParams &buffer_params,
233  const int overscan,
234  const Callback &callback)
235 {
236  const int num_works = path_trace_works.size();
237  const int window_height = buffer_params.window_height;
238 
239  int current_y = 0;
240  for (int i = 0; i < num_works; ++i) {
241  const double weight = work_balance_infos[i].weight;
242  const int slice_window_full_y = buffer_params.full_y + buffer_params.window_y + current_y;
243  const int slice_window_height = max(lround(window_height * weight), 1);
244 
245  /* Disallow negative values to deal with situations when there are more compute devices than
246  * scan-lines. */
247  const int remaining_window_height = max(0, window_height - current_y);
248 
249  BufferParams slice_params = buffer_params;
250 
251  slice_params.full_y = max(slice_window_full_y - overscan, buffer_params.full_y);
252  slice_params.window_y = slice_window_full_y - slice_params.full_y;
253 
254  if (i < num_works - 1) {
255  slice_params.window_height = min(slice_window_height, remaining_window_height);
256  }
257  else {
258  slice_params.window_height = remaining_window_height;
259  }
260 
261  slice_params.height = slice_params.window_y + slice_params.window_height + overscan;
262  slice_params.height = min(slice_params.height,
263  buffer_params.height + buffer_params.full_y - slice_params.full_y);
264 
265  slice_params.update_offset_stride();
266 
267  callback(path_trace_works[i].get(), slice_params);
268 
269  current_y += slice_params.window_height;
270  }
271 }
272 
274 {
275  const int overscan = tile_manager_.get_tile_overscan();
279  overscan,
280  [](PathTraceWork *path_trace_work, const BufferParams &params) {
281  RenderBuffers *buffers = path_trace_work->get_render_buffers();
282  buffers->reset(params);
283  });
284 }
285 
286 static BufferParams scale_buffer_params(const BufferParams &params, int resolution_divider)
287 {
288  BufferParams scaled_params = params;
289 
290  scaled_params.width = max(1, params.width / resolution_divider);
291  scaled_params.height = max(1, params.height / resolution_divider);
292 
293  scaled_params.window_x = params.window_x / resolution_divider;
294  scaled_params.window_y = params.window_y / resolution_divider;
295  scaled_params.window_width = max(1, params.window_width / resolution_divider);
296  scaled_params.window_height = max(1, params.window_height / resolution_divider);
297 
298  scaled_params.full_x = params.full_x / resolution_divider;
299  scaled_params.full_y = params.full_y / resolution_divider;
300  scaled_params.full_width = max(1, params.full_width / resolution_divider);
301  scaled_params.full_height = max(1, params.full_height / resolution_divider);
302 
303  scaled_params.update_offset_stride();
304 
305  return scaled_params;
306 }
307 
309 {
310  const int resolution_divider = render_work.resolution_divider;
311 
313  const BufferParams scaled_big_tile_params = scale_buffer_params(big_tile_params_,
315 
316  const int overscan = tile_manager_.get_tile_overscan();
317 
320  scaled_big_tile_params,
321  overscan,
322  [&](PathTraceWork *path_trace_work, const BufferParams params) {
323  path_trace_work->set_effective_buffer_params(
324  scaled_full_params, scaled_big_tile_params, params);
325  });
326 
327  render_state_.effective_big_tile_params = scaled_big_tile_params;
328 }
329 
331 {
332  if (render_state_.need_reset_params) {
334  }
335 
336  if (render_state_.need_reset_params ||
337  render_state_.resolution_divider != render_work.resolution_divider) {
339  }
340 
341  render_state_.resolution_divider = render_work.resolution_divider;
342  render_state_.need_reset_params = false;
343 }
344 
346 {
348 
349  /* Handle initialization scheduled by the render scheduler. */
350  if (render_work.init_render_buffers) {
351  parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
352  path_trace_work->zero_render_buffers();
353  });
354 
356  }
357 }
358 
360 {
361  if (!render_work.path_trace.num_samples) {
362  return;
363  }
364 
365  VLOG_WORK << "Will path trace " << render_work.path_trace.num_samples
366  << " samples at the resolution divider " << render_work.resolution_divider;
367 
368  const double start_time = time_dt();
369 
370  const int num_works = path_trace_works_.size();
371 
373 
374  parallel_for(0, num_works, [&](int i) {
375  const double work_start_time = time_dt();
376  const int num_samples = render_work.path_trace.num_samples;
377 
378  PathTraceWork *path_trace_work = path_trace_works_[i].get();
379 
381  path_trace_work->render_samples(statistics,
382  render_work.path_trace.start_sample,
383  num_samples,
384  render_work.path_trace.sample_offset);
385 
386  const double work_time = time_dt() - work_start_time;
387  work_balance_infos_[i].time_spent += work_time;
388  work_balance_infos_[i].occupancy = statistics.occupancy;
389 
390  VLOG_INFO << "Rendered " << num_samples << " samples in " << work_time << " seconds ("
391  << work_time / num_samples
392  << " seconds per sample), occupancy: " << statistics.occupancy;
393  });
394 
395  float occupancy_accum = 0.0f;
396  for (const WorkBalanceInfo &balance_info : work_balance_infos_) {
397  occupancy_accum += balance_info.occupancy;
398  }
399  const float occupancy = occupancy_accum / num_works;
400  render_scheduler_.report_path_trace_occupancy(render_work, occupancy);
401 
403  render_work, time_dt() - start_time, is_cancel_requested());
404 }
405 
407 {
408  if (!render_work.adaptive_sampling.filter) {
409  return;
410  }
411 
412  bool did_reschedule_on_idle = false;
413 
414  while (true) {
415  VLOG_WORK << "Will filter adaptive stopping buffer, threshold "
416  << render_work.adaptive_sampling.threshold;
417  if (render_work.adaptive_sampling.reset) {
418  VLOG_WORK << "Will re-calculate convergency flag for currently converged pixels.";
419  }
420 
421  const double start_time = time_dt();
422 
424  parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
425  const uint num_active_pixels_in_work =
426  path_trace_work->adaptive_sampling_converge_filter_count_active(
427  render_work.adaptive_sampling.threshold, render_work.adaptive_sampling.reset);
428  if (num_active_pixels_in_work) {
429  atomic_add_and_fetch_u(&num_active_pixels, num_active_pixels_in_work);
430  }
431  });
432 
434  render_work, time_dt() - start_time, is_cancel_requested());
435 
436  if (num_active_pixels == 0) {
437  VLOG_WORK << "All pixels converged.";
439  break;
440  }
441  VLOG_WORK << "Continuing with lower threshold.";
442  }
443  else if (did_reschedule_on_idle) {
444  break;
445  }
446  else if (num_active_pixels < 128 * 128) {
447  /* NOTE: The hardcoded value of 128^2 is more of an empirical value to keep GPU busy so that
448  * there is no performance loss from the progressive noise floor feature.
449  *
450  * A better heuristic is possible here: for example, use maximum of 128^2 and percentage of
451  * the final resolution. */
453  VLOG_WORK << "Rescheduling is not possible: final threshold is reached.";
454  break;
455  }
456  VLOG_WORK << "Rescheduling lower threshold.";
457  did_reschedule_on_idle = true;
458  }
459  else {
460  break;
461  }
462  }
463 }
464 
466 {
468 
469  if (!params.use) {
470  denoiser_.reset();
471  return;
472  }
473 
474  if (denoiser_) {
475  const DenoiseParams old_denoiser_params = denoiser_->get_params();
476  if (old_denoiser_params.type == params.type) {
477  denoiser_->set_params(params);
478  return;
479  }
480  }
481 
483 
484  /* Only take into account the "immediate" cancel to have interactive rendering responding to
485  * navigation as quickly as possible, but allow to run denoiser after user hit Escape key while
486  * doing offline rendering. */
487  denoiser_->is_cancelled_cb = [this]() { return render_cancel_.is_requested; };
488 }
489 
491 {
492  render_scheduler_.set_adaptive_sampling(adaptive_sampling);
493 }
494 
496 {
497  if (!render_work.cryptomatte.postprocess) {
498  return;
499  }
500  VLOG_WORK << "Perform cryptomatte work.";
501 
502  parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
503  path_trace_work->cryptomatte_postproces();
504  });
505 }
506 
507 void PathTrace::denoise(const RenderWork &render_work)
508 {
509  if (!render_work.tile.denoise) {
510  return;
511  }
512 
513  if (!denoiser_) {
514  /* Denoiser was not configured, so nothing to do here. */
515  return;
516  }
517 
518  VLOG_WORK << "Perform denoising work.";
519 
520  const double start_time = time_dt();
521 
522  RenderBuffers *buffer_to_denoise = nullptr;
523  bool allow_inplace_modification = false;
524 
525  Device *denoiser_device = denoiser_->get_denoiser_device();
526  if (path_trace_works_.size() > 1 && denoiser_device && !big_tile_denoise_work_) {
527  big_tile_denoise_work_ = PathTraceWork::create(denoiser_device, film_, device_scene_, nullptr);
528  }
529 
531  big_tile_denoise_work_->set_effective_buffer_params(render_state_.effective_big_tile_params,
532  render_state_.effective_big_tile_params,
533  render_state_.effective_big_tile_params);
534 
535  buffer_to_denoise = big_tile_denoise_work_->get_render_buffers();
536  buffer_to_denoise->reset(render_state_.effective_big_tile_params);
537 
538  copy_to_render_buffers(buffer_to_denoise);
539 
540  allow_inplace_modification = true;
541  }
542  else {
543  DCHECK_EQ(path_trace_works_.size(), 1);
544 
545  buffer_to_denoise = path_trace_works_.front()->get_render_buffers();
546  }
547 
548  if (denoiser_->denoise_buffer(render_state_.effective_big_tile_params,
549  buffer_to_denoise,
551  allow_inplace_modification)) {
552  render_state_.has_denoised_result = true;
553  }
554 
555  render_scheduler_.report_denoise_time(render_work, time_dt() - start_time);
556 }
557 
558 void PathTrace::set_output_driver(unique_ptr<OutputDriver> driver)
559 {
560  output_driver_ = move(driver);
561 }
562 
563 void PathTrace::set_display_driver(unique_ptr<DisplayDriver> driver)
564 {
565  /* The display driver is the source of the drawing context which might be used by
566  * path trace works. Make sure there is no graphics interop using resources from
567  * the old display, as it might no longer be available after this call. */
569 
570  if (driver) {
571  display_ = make_unique<PathTraceDisplay>(move(driver));
572  }
573  else {
574  display_ = nullptr;
575  }
576 }
577 
579 {
580  if (display_) {
581  display_->clear();
582  }
583 }
584 
586 {
587  if (!display_) {
588  return;
589  }
590 
591  did_draw_after_reset_ |= display_->draw();
592 }
593 
595 {
596  if (!display_) {
597  return;
598  }
599 
600  display_->flush();
601 }
602 
603 void PathTrace::update_display(const RenderWork &render_work)
604 {
605  if (!render_work.display.update) {
606  return;
607  }
608 
609  if (!display_ && !output_driver_) {
610  VLOG_WORK << "Ignore display update.";
611  return;
612  }
613 
614  if (full_params_.width == 0 || full_params_.height == 0) {
615  VLOG_WORK << "Skipping PathTraceDisplay update due to 0 size of the render buffer.";
616  return;
617  }
618 
619  const double start_time = time_dt();
620 
621  if (output_driver_) {
622  VLOG_WORK << "Invoke buffer update callback.";
623 
624  PathTraceTile tile(*this);
625  output_driver_->update_render_tile(tile);
626  }
627 
628  if (display_) {
629  VLOG_WORK << "Perform copy to GPUDisplay work.";
630 
631  const int texture_width = render_state_.effective_big_tile_params.window_width;
632  const int texture_height = render_state_.effective_big_tile_params.window_height;
633  if (!display_->update_begin(texture_width, texture_height)) {
634  LOG(ERROR) << "Error beginning GPUDisplay update.";
635  return;
636  }
637 
638  const PassMode pass_mode = render_work.display.use_denoised_result &&
639  render_state_.has_denoised_result ?
642 
643  /* TODO(sergey): When using multi-device rendering map the GPUDisplay once and copy data from
644  * all works in parallel. */
646  if (big_tile_denoise_work_ && render_state_.has_denoised_result) {
647  big_tile_denoise_work_->copy_to_display(display_.get(), pass_mode, num_samples);
648  }
649  else {
650  for (auto &&path_trace_work : path_trace_works_) {
651  path_trace_work->copy_to_display(display_.get(), pass_mode, num_samples);
652  }
653  }
654 
655  display_->update_end();
656  }
657 
658  render_scheduler_.report_display_update_time(render_work, time_dt() - start_time);
659 }
660 
661 void PathTrace::rebalance(const RenderWork &render_work)
662 {
663  if (!render_work.rebalance) {
664  return;
665  }
666 
667  const int num_works = path_trace_works_.size();
668 
669  if (num_works == 1) {
670  VLOG_WORK << "Ignoring rebalance work due to single device render.";
671  return;
672  }
673 
674  const double start_time = time_dt();
675 
676  if (VLOG_IS_ON(3)) {
677  VLOG_WORK << "Perform rebalance work.";
678  VLOG_WORK << "Per-device path tracing time (seconds):";
679  for (int i = 0; i < num_works; ++i) {
680  VLOG_WORK << path_trace_works_[i]->get_device()->info.description << ": "
681  << work_balance_infos_[i].time_spent;
682  }
683  }
684 
685  const bool did_rebalance = work_balance_do_rebalance(work_balance_infos_);
686 
687  if (VLOG_IS_ON(3)) {
688  VLOG_WORK << "Calculated per-device weights for works:";
689  for (int i = 0; i < num_works; ++i) {
690  VLOG_WORK << path_trace_works_[i]->get_device()->info.description << ": "
691  << work_balance_infos_[i].weight;
692  }
693  }
694 
695  if (!did_rebalance) {
696  VLOG_WORK << "Balance in path trace works did not change.";
697  render_scheduler_.report_rebalance_time(render_work, time_dt() - start_time, false);
698  return;
699  }
700 
701  RenderBuffers big_tile_cpu_buffers(cpu_device_.get());
702  big_tile_cpu_buffers.reset(render_state_.effective_big_tile_params);
703 
704  copy_to_render_buffers(&big_tile_cpu_buffers);
705 
706  render_state_.need_reset_params = true;
708 
709  copy_from_render_buffers(&big_tile_cpu_buffers);
710 
711  render_scheduler_.report_rebalance_time(render_work, time_dt() - start_time, true);
712 }
713 
714 void PathTrace::write_tile_buffer(const RenderWork &render_work)
715 {
716  if (!render_work.tile.write) {
717  return;
718  }
719 
720  VLOG_WORK << "Write tile result.";
721 
722  render_state_.tile_written = true;
723 
724  const bool has_multiple_tiles = tile_manager_.has_multiple_tiles();
725 
726  /* Write render tile result, but only if not using tiled rendering.
727  *
728  * Tiles are written to a file during rendering, and written to the software at the end
729  * of rendering (wither when all tiles are finished, or when rendering was requested to be
730  * canceled).
731  *
732  * Important thing is: tile should be written to the software via callback only once. */
733  if (!has_multiple_tiles) {
734  VLOG_WORK << "Write tile result via buffer write callback.";
736  }
737  /* Write tile to disk, so that the render work's render buffer can be re-used for the next tile.
738  */
739  else {
740  VLOG_WORK << "Write tile result to disk.";
742  }
743 }
744 
746 {
747  if (!render_work.full.write) {
748  return;
749  }
750 
751  VLOG_WORK << "Handle full-frame render buffer work.";
752 
754  VLOG_WORK << "No tiles on disk.";
755  return;
756  }
757 
758  /* Make sure writing to the file is fully finished.
759  * This will include writing all possible missing tiles, ensuring validness of the file. */
761 
762  /* NOTE: The rest of full-frame post-processing (such as full-frame denoising) will be done after
763  * all scenes and layers are rendered by the Session (which happens after freeing Session memory,
764  * so that we never hold scene and full-frame buffer in memory at the same time). */
765 }
766 
768 {
770 
771  render_cancel_.is_requested = true;
772 
773  while (render_cancel_.is_rendering) {
774  render_cancel_.condition.wait(lock);
775  }
776 
777  render_cancel_.is_requested = false;
778 }
779 
781 {
783 }
784 
786 {
787  if (render_cancel_.is_requested) {
788  return true;
789  }
790 
791  if (progress_ != nullptr) {
792  if (progress_->get_cancel()) {
793  return true;
794  }
795  }
796 
797  return false;
798 }
799 
801 {
802  if (!output_driver_) {
803  return;
804  }
805 
806  PathTraceTile tile(*this);
807  output_driver_->write_render_tile(tile);
808 }
809 
811 {
812  if (!device_scene_->data.bake.use) {
813  return;
814  }
815 
816  if (!output_driver_) {
817  return;
818  }
819 
820  /* Read buffers back from device. */
821  parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
822  path_trace_work->copy_render_buffers_from_device();
823  });
824 
825  /* Read (subset of) passes from output driver. */
826  PathTraceTile tile(*this);
827  if (output_driver_->read_render_tile(tile)) {
828  /* Copy buffers to device again. */
829  parallel_for_each(path_trace_works_, [](unique_ptr<PathTraceWork> &path_trace_work) {
830  path_trace_work->copy_render_buffers_to_device();
831  });
832  }
833 }
834 
836 {
837  /* Sample count pass is required to support per-tile partial results stored in the file. */
839 
840  const int num_rendered_samples = render_scheduler_.get_num_rendered_samples();
841 
842  if (num_rendered_samples == 0) {
843  /* The tile has zero samples, no need to write it. */
844  return;
845  }
846 
847  /* Get access to the CPU-side render buffers of the current big tile. */
848  RenderBuffers *buffers;
849  RenderBuffers big_tile_cpu_buffers(cpu_device_.get());
850 
851  if (path_trace_works_.size() == 1) {
852  path_trace_works_[0]->copy_render_buffers_from_device();
853  buffers = path_trace_works_[0]->get_render_buffers();
854  }
855  else {
856  big_tile_cpu_buffers.reset(render_state_.effective_big_tile_params);
857  copy_to_render_buffers(&big_tile_cpu_buffers);
858 
859  buffers = &big_tile_cpu_buffers;
860  }
861 
862  if (!tile_manager_.write_tile(*buffers)) {
863  device_->set_error("Error writing tile to file");
864  }
865 }
866 
868 {
869  if (progress_ != nullptr) {
870  const int2 tile_size = get_render_tile_size();
871  const uint64_t num_samples_added = uint64_t(tile_size.x) * tile_size.y *
872  render_work.path_trace.num_samples;
873  const int current_sample = render_work.path_trace.start_sample +
874  render_work.path_trace.num_samples -
875  render_work.path_trace.sample_offset;
876  progress_->add_samples(num_samples_added, current_sample);
877  }
878 
879  if (progress_update_cb) {
881  }
882 }
883 
884 void PathTrace::progress_set_status(const string &status, const string &substatus)
885 {
886  if (progress_ != nullptr) {
887  progress_->set_status(status, substatus);
888  }
889 }
890 
892 {
894  [&render_buffers](unique_ptr<PathTraceWork> &path_trace_work) {
895  path_trace_work->copy_to_render_buffers(render_buffers);
896  });
898 }
899 
901 {
904  [&render_buffers](unique_ptr<PathTraceWork> &path_trace_work) {
905  path_trace_work->copy_from_render_buffers(render_buffers);
906  });
907 }
908 
910 {
911  if (full_frame_state_.render_buffers) {
912  /* Full-frame buffer is always allocated on CPU. */
913  return true;
914  }
915 
916  if (big_tile_denoise_work_ && render_state_.has_denoised_result) {
917  return big_tile_denoise_work_->copy_render_buffers_from_device();
918  }
919 
920  bool success = true;
921 
922  parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
923  if (!success) {
924  return;
925  }
926  if (!path_trace_work->copy_render_buffers_from_device()) {
927  success = false;
928  }
929  });
930 
931  return success;
932 }
933 
934 static string get_layer_view_name(const RenderBuffers &buffers)
935 {
936  string result;
937 
938  if (buffers.params.layer.size()) {
939  result += string(buffers.params.layer);
940  }
941 
942  if (buffers.params.view.size()) {
943  if (!result.empty()) {
944  result += ", ";
945  }
946  result += string(buffers.params.view);
947  }
948 
949  return result;
950 }
951 
953 {
954  VLOG_WORK << "Processing full frame buffer file " << filename;
955 
956  progress_set_status("Reading full buffer from disk");
957 
958  RenderBuffers full_frame_buffers(cpu_device_.get());
959 
960  DenoiseParams denoise_params;
961  if (!tile_manager_.read_full_buffer_from_disk(filename, &full_frame_buffers, &denoise_params)) {
962  const string error_message = "Error reading tiles from file";
963  if (progress_) {
964  progress_->set_error(error_message);
965  progress_->set_cancel(error_message);
966  }
967  else {
968  LOG(ERROR) << error_message;
969  }
970  return;
971  }
972 
973  const string layer_view_name = get_layer_view_name(full_frame_buffers);
974 
975  render_state_.has_denoised_result = false;
976 
977  if (denoise_params.use) {
978  progress_set_status(layer_view_name, "Denoising");
979 
980  /* Re-use the denoiser as much as possible, avoiding possible device re-initialization.
981  *
982  * It will not conflict with the regular rendering as:
983  * - Rendering is supposed to be finished here.
984  * - The next rendering will go via Session's `run_update_for_next_iteration` which will
985  * ensure proper denoiser is used. */
986  set_denoiser_params(denoise_params);
987 
988  /* Number of samples doesn't matter too much, since the samples count pass will be used. */
989  denoiser_->denoise_buffer(full_frame_buffers.params, &full_frame_buffers, 0, false);
990 
991  render_state_.has_denoised_result = true;
992  }
993 
994  full_frame_state_.render_buffers = &full_frame_buffers;
995 
996  progress_set_status(layer_view_name, "Finishing");
997 
998  /* Write the full result pretending that there is a single tile.
999  * Requires some state change, but allows to use same communication API with the software. */
1001 
1002  full_frame_state_.render_buffers = nullptr;
1003 }
1004 
1006 {
1007  if (full_frame_state_.render_buffers) {
1008  return full_frame_state_.render_buffers->params.samples;
1009  }
1010 
1012 }
1013 
1015  const PassAccessor::Destination &destination)
1016 {
1017  if (full_frame_state_.render_buffers) {
1018  return pass_accessor.get_render_tile_pixels(full_frame_state_.render_buffers, destination);
1019  }
1020 
1021  if (big_tile_denoise_work_ && render_state_.has_denoised_result) {
1022  return big_tile_denoise_work_->get_render_tile_pixels(pass_accessor, destination);
1023  }
1024 
1025  bool success = true;
1026 
1027  parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
1028  if (!success) {
1029  return;
1030  }
1031  if (!path_trace_work->get_render_tile_pixels(pass_accessor, destination)) {
1032  success = false;
1033  }
1034  });
1035 
1036  return success;
1037 }
1038 
1040  const PassAccessor::Source &source)
1041 {
1042  bool success = true;
1043 
1044  parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
1045  if (!success) {
1046  return;
1047  }
1048  if (!path_trace_work->set_render_tile_pixels(pass_accessor, source)) {
1049  success = false;
1050  }
1051  });
1052 
1053  return success;
1054 }
1055 
1057 {
1058  if (full_frame_state_.render_buffers) {
1059  return make_int2(full_frame_state_.render_buffers->params.window_width,
1060  full_frame_state_.render_buffers->params.window_height);
1061  }
1062 
1064  return make_int2(tile.window_width, tile.window_height);
1065 }
1066 
1068 {
1069  if (full_frame_state_.render_buffers) {
1070  return make_int2(0, 0);
1071  }
1072 
1074  return make_int2(tile.x + tile.window_x, tile.y + tile.window_y);
1075 }
1076 
1078 {
1079  return tile_manager_.get_size();
1080 }
1081 
1083 {
1084  if (full_frame_state_.render_buffers) {
1085  return full_frame_state_.render_buffers->params;
1086  }
1087 
1088  return big_tile_params_;
1089 }
1090 
1091 bool PathTrace::has_denoised_result() const
1092 {
1093  return render_state_.has_denoised_result;
1094 }
1095 
1097 {
1098  /* Destroy any GPU resource which was used for graphics interop.
1099  * Need to have access to the PathTraceDisplay as it is the only source of drawing context which
1100  * is used for interop. */
1101  if (display_) {
1102  for (auto &&path_trace_work : path_trace_works_) {
1103  path_trace_work->destroy_gpu_resources(display_.get());
1104  }
1105 
1106  if (big_tile_denoise_work_) {
1107  big_tile_denoise_work_->destroy_gpu_resources(display_.get());
1108  }
1109  }
1110 }
1111 
1112 /* --------------------------------------------------------------------
1113  * Report generation.
1114  */
1115 
1117 {
1118  switch (type) {
1119  case DEVICE_NONE:
1120  return "None";
1121 
1122  case DEVICE_CPU:
1123  return "CPU";
1124  case DEVICE_CUDA:
1125  return "CUDA";
1126  case DEVICE_OPTIX:
1127  return "OptiX";
1128  case DEVICE_HIP:
1129  return "HIP";
1130  case DEVICE_ONEAPI:
1131  return "oneAPI";
1132  case DEVICE_DUMMY:
1133  return "Dummy";
1134  case DEVICE_MULTI:
1135  return "Multi";
1136  case DEVICE_METAL:
1137  return "Metal";
1138  }
1139 
1140  return "UNKNOWN";
1141 }
1142 
1143 /* Construct description of the device which will appear in the full report. */
1144 /* TODO(sergey): Consider making it more reusable utility. */
1145 static string full_device_info_description(const DeviceInfo &device_info)
1146 {
1147  string full_description = device_info.description;
1148 
1149  full_description += " (" + string(device_type_for_description(device_info.type)) + ")";
1150 
1151  if (device_info.display_device) {
1152  full_description += " (display)";
1153  }
1154 
1155  if (device_info.type == DEVICE_CPU) {
1156  full_description += " (" + to_string(device_info.cpu_threads) + " threads)";
1157  }
1158 
1159  full_description += " [" + device_info.id + "]";
1160 
1161  return full_description;
1162 }
1163 
1164 /* Construct string which will contain information about devices, possibly multiple of the devices.
1165  *
1166  * In the simple case the result looks like:
1167  *
1168  * Message: Full Device Description
1169  *
1170  * If there are multiple devices then the result looks like:
1171  *
1172  * Message: Full First Device Description
1173  * Full Second Device Description
1174  *
1175  * Note that the newlines are placed in a way so that the result can be easily concatenated to the
1176  * full report. */
1177 static string device_info_list_report(const string &message, const DeviceInfo &device_info)
1178 {
1179  string result = "\n" + message + ": ";
1180  const string pad(message.length() + 2, ' ');
1181 
1182  if (device_info.multi_devices.empty()) {
1183  result += full_device_info_description(device_info) + "\n";
1184  return result;
1185  }
1186 
1187  bool is_first = true;
1188  for (const DeviceInfo &sub_device_info : device_info.multi_devices) {
1189  if (!is_first) {
1190  result += pad;
1191  }
1192 
1193  result += full_device_info_description(sub_device_info) + "\n";
1194 
1195  is_first = false;
1196  }
1197 
1198  return result;
1199 }
1200 
1201 static string path_trace_devices_report(const vector<unique_ptr<PathTraceWork>> &path_trace_works)
1202 {
1203  DeviceInfo device_info;
1204  device_info.type = DEVICE_MULTI;
1205 
1206  for (auto &&path_trace_work : path_trace_works) {
1207  device_info.multi_devices.push_back(path_trace_work->get_device()->info);
1208  }
1209 
1210  return device_info_list_report("Path tracing on", device_info);
1211 }
1212 
1213 static string denoiser_device_report(const Denoiser *denoiser)
1214 {
1215  if (!denoiser) {
1216  return "";
1217  }
1218 
1219  if (!denoiser->get_params().use) {
1220  return "";
1221  }
1222 
1223  const Device *denoiser_device = denoiser->get_denoiser_device();
1224  if (!denoiser_device) {
1225  return "";
1226  }
1227 
1228  return device_info_list_report("Denoising on", denoiser_device->info);
1229 }
1230 
1232 {
1233  string result = "\nFull path tracing report\n";
1234 
1237 
1238  /* Report from the render scheduler, which includes:
1239  * - Render mode (interactive, offline, headless)
1240  * - Adaptive sampling and denoiser parameters
1241  * - Breakdown of timing. */
1243 
1244  return result;
1245 }
1246 
unsigned int uint
Definition: BLI_sys_types.h:67
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum type
ATOMIC_INLINE unsigned int atomic_add_and_fetch_u(unsigned int *p, unsigned int x)
int pad[32 - sizeof(int)]
volatile int lock
ustring view
Definition: buffers.h:98
int pass_stride
Definition: buffers.h:93
ustring layer
Definition: buffers.h:97
int full_x
Definition: buffers.h:84
int full_width
Definition: buffers.h:86
int height
Definition: buffers.h:72
int get_pass_offset(PassType type, PassMode mode=PassMode::NOISY) const
Definition: buffers.cpp:168
int window_y
Definition: buffers.h:79
void update_offset_stride()
Definition: buffers.cpp:222
bool modified(const BufferParams &other) const
Definition: buffers.cpp:228
int full_height
Definition: buffers.h:87
int window_height
Definition: buffers.h:81
int window_width
Definition: buffers.h:80
NODE_DECLARE int width
Definition: buffers.h:71
int window_x
Definition: buffers.h:78
int full_y
Definition: buffers.h:85
DenoiserType type
Definition: denoise.h:53
NODE_DECLARE bool use
Definition: denoise.h:50
static unique_ptr< Denoiser > create(Device *path_trace_device, const DenoiseParams &params)
Definition: denoiser.cpp:15
Device * get_denoiser_device() const
Definition: denoiser.cpp:67
const DenoiseParams & get_params() const
Definition: denoiser.cpp:47
vector< DeviceInfo > multi_devices
Definition: device/device.h:75
bool display_device
Definition: device/device.h:66
DeviceType type
Definition: device/device.h:62
string description
Definition: device/device.h:63
KernelData data
Definition: scene.h:130
Profiler & profiler
Stats & stats
virtual void foreach_device(const function< void(Device *)> &callback)
virtual void set_error(const string &error)
DeviceInfo info
Definition: film.h:29
bool get_render_tile_pixels(const RenderBuffers *render_buffers, const Destination &destination) const
RenderBuffers * get_render_buffers()
void set_effective_buffer_params(const BufferParams &effective_full_params, const BufferParams &effective_big_tile_params, const BufferParams &effective_buffer_params)
virtual void render_samples(RenderStatistics &statistics, int start_sample, int samples_num, int sample_offset)=0
static unique_ptr< PathTraceWork > create(Device *device, Film *film, DeviceScene *device_scene, bool *cancel_requested_flag)
void device_free()
Definition: path_trace.cpp:136
Progress * progress_
Definition: path_trace.h:295
function< void(void)> progress_update_cb
Definition: path_trace.h:174
void denoise(const RenderWork &render_work)
Definition: path_trace.cpp:507
int2 get_render_tile_size() const
void finalize_full_buffer_on_disk(const RenderWork &render_work)
Definition: path_trace.cpp:745
void process_full_buffer_from_disk(string_view filename)
Definition: path_trace.cpp:952
void tile_buffer_write()
Definition: path_trace.cpp:800
Device * device_
Definition: path_trace.h:234
struct PathTrace::@1234 render_state_
unique_ptr< Denoiser > denoiser_
Definition: path_trace.h:263
void update_effective_work_buffer_params(const RenderWork &render_work)
Definition: path_trace.cpp:308
BufferParams full_params_
Definition: path_trace.h:259
void rebalance(const RenderWork &render_work)
Definition: path_trace.cpp:661
bool is_cancel_requested()
Definition: path_trace.cpp:785
PathTrace(Device *device, Film *film, DeviceScene *device_scene, RenderScheduler &render_scheduler, TileManager &tile_manager)
Definition: path_trace.cpp:23
void init_render_buffers(const RenderWork &render_work)
Definition: path_trace.cpp:345
void copy_to_render_buffers(RenderBuffers *render_buffers)
Definition: path_trace.cpp:891
void destroy_gpu_resources()
void set_progress(Progress *progress)
Definition: path_trace.cpp:148
vector< unique_ptr< PathTraceWork > > path_trace_works_
Definition: path_trace.h:253
void copy_from_render_buffers(RenderBuffers *render_buffers)
Definition: path_trace.cpp:900
void set_denoiser_params(const DenoiseParams &params)
Definition: path_trace.cpp:465
void progress_update_if_needed(const RenderWork &render_work)
Definition: path_trace.cpp:867
void update_display(const RenderWork &render_work)
Definition: path_trace.cpp:603
void progress_set_status(const string &status, const string &substatus="")
Definition: path_trace.cpp:884
void set_adaptive_sampling(const AdaptiveSampling &adaptive_sampling)
Definition: path_trace.cpp:490
void set_output_driver(unique_ptr< OutputDriver > driver)
Definition: path_trace.cpp:558
void reset(const BufferParams &full_params, const BufferParams &big_tile_params, bool reset_rendering)
Definition: path_trace.cpp:112
DeviceScene * device_scene_
Definition: path_trace.h:240
bool ready_to_reset()
Definition: path_trace.cpp:88
bool get_render_tile_pixels(const PassAccessor &pass_accessor, const PassAccessor::Destination &destination)
void cancel()
Definition: path_trace.cpp:767
struct PathTrace::@1236 full_frame_state_
int2 get_render_tile_offset() const
unique_ptr< Device > cpu_device_
Definition: path_trace.h:237
void tile_buffer_write_to_disk()
Definition: path_trace.cpp:835
void set_display_driver(unique_ptr< DisplayDriver > driver)
Definition: path_trace.cpp:563
void clear_display()
Definition: path_trace.cpp:578
void path_trace(RenderWork &render_work)
Definition: path_trace.cpp:359
void render_pipeline(RenderWork render_work)
Definition: path_trace.cpp:175
unique_ptr< OutputDriver > output_driver_
Definition: path_trace.h:249
unique_ptr< PathTraceDisplay > display_
Definition: path_trace.h:246
vector< WorkBalanceInfo > work_balance_infos_
Definition: path_trace.h:256
RenderBuffers * render_buffers
Definition: path_trace.h:317
bool copy_render_tile_from_device()
Definition: path_trace.cpp:909
bool has_denoised_result
Definition: path_trace.h:287
bool did_draw_after_reset_
Definition: path_trace.h:313
void render_init_kernel_execution()
Definition: path_trace.cpp:219
int2 get_render_size() const
TileManager & tile_manager_
Definition: path_trace.h:243
unique_ptr< PathTraceWork > big_tile_denoise_work_
Definition: path_trace.h:266
void update_work_buffer_params_if_needed(const RenderWork &render_work)
Definition: path_trace.cpp:330
struct PathTrace::@1235 render_cancel_
void alloc_work_memory()
Definition: path_trace.cpp:81
BufferParams big_tile_params_
Definition: path_trace.h:260
void draw()
Definition: path_trace.cpp:585
void write_tile_buffer(const RenderWork &render_work)
Definition: path_trace.cpp:714
void tile_buffer_read()
Definition: path_trace.cpp:810
bool set_render_tile_pixels(PassAccessor &pass_accessor, const PassAccessor::Source &source)
void render(const RenderWork &render_work)
Definition: path_trace.cpp:153
Film * film_
Definition: path_trace.h:239
string full_report() const
void adaptive_sample(RenderWork &render_work)
Definition: path_trace.cpp:406
void load_kernels()
Definition: path_trace.cpp:64
const BufferParams & get_render_tile_params() const
int resolution_divider
Definition: path_trace.h:281
int get_num_render_tile_samples() const
void flush_display()
Definition: path_trace.cpp:594
RenderScheduler & render_scheduler_
Definition: path_trace.h:242
int get_num_samples_in_buffer()
Definition: path_trace.cpp:780
void cryptomatte_postprocess(const RenderWork &render_work)
Definition: path_trace.cpp:495
void update_allocated_work_buffer_params()
Definition: path_trace.cpp:273
void set_cancel(const string &cancel_message_)
Definition: progress.h:83
bool get_cancel() const
Definition: progress.h:90
void set_status(const string &status_, const string &substatus_="")
Definition: progress.h:248
void add_samples(uint64_t pixel_samples_, int tile_sample)
Definition: progress.h:200
void set_error(const string &error_message_)
Definition: progress.h:110
BufferParams params
Definition: buffers.h:156
bool copy_from_device()
Definition: buffers.cpp:289
void copy_to_device()
Definition: buffers.cpp:301
void reset(const BufferParams &params)
Definition: buffers.cpp:274
void report_display_update_time(const RenderWork &render_work, double time)
void report_adaptive_filter_time(const RenderWork &render_work, double time, bool is_cancelled)
void set_need_schedule_rebalance(bool need_schedule_rebalance)
int get_num_rendered_samples() const
string full_report() const
void report_rebalance_time(const RenderWork &render_work, double time, bool balance_changed)
void report_denoise_time(const RenderWork &render_work, double time)
void set_denoiser_params(const DenoiseParams &params)
bool render_work_reschedule_on_idle(RenderWork &render_work)
void report_path_trace_time(const RenderWork &render_work, double time, bool is_cancelled)
void report_path_trace_occupancy(const RenderWork &render_work, float occupancy)
void set_adaptive_sampling(const AdaptiveSampling &adaptive_sampling)
bool render_work_reschedule_on_converge(RenderWork &render_work)
void report_work_begin(const RenderWork &render_work)
void set_need_schedule_cryptomatte(bool need_schedule_cryptomatte)
int resolution_divider
struct RenderWork::@1244 full
bool use_denoised_result
struct RenderWork::@1245 display
bool init_render_buffers
struct RenderWork::@1242 cryptomatte
struct RenderWork::@1241 adaptive_sampling
struct RenderWork::@1240 path_trace
struct RenderWork::@1243 tile
bool has_multiple_tiles() const
Definition: session/tile.h:68
bool has_written_tiles() const
Definition: session/tile.h:96
const int2 get_size() const
const Tile & get_current_tile() const
bool write_tile(const RenderBuffers &tile_buffers)
bool read_full_buffer_from_disk(string_view filename, RenderBuffers *buffers, DenoiseParams *denoise_params)
void finish_write_tiles()
int get_tile_overscan() const
Definition: session/tile.h:73
#define CCL_NAMESPACE_END
Definition: cuda/compat.h:9
DEGForeachIDComponentCallback callback
void device_cpu_info(vector< DeviceInfo > &devices)
CCL_NAMESPACE_BEGIN Device * device_cpu_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
DeviceType
Definition: device/device.h:36
@ DEVICE_DUMMY
Definition: device/device.h:45
@ DEVICE_NONE
Definition: device/device.h:37
@ DEVICE_METAL
Definition: device/device.h:43
@ DEVICE_MULTI
Definition: device/device.h:40
@ DEVICE_CUDA
Definition: device/device.h:39
@ DEVICE_CPU
Definition: device/device.h:38
@ DEVICE_OPTIX
Definition: device/device.h:41
@ DEVICE_HIP
Definition: device/device.h:42
@ DEVICE_ONEAPI
Definition: device/device.h:44
static const char * to_string(const Interpolation &interp)
Definition: gl_shader.cc:63
uiWidgetBaseParameters params[MAX_WIDGET_BASE_BATCH]
ccl_global const KernelWorkTile * tile
ccl_gpu_kernel_postfix ccl_global float int int int int float bool int int ccl_global uint * num_active_pixels
ccl_gpu_kernel_postfix ccl_global float int int int int ccl_global const float int int int int int int int int int int int int num_samples
#define PASS_UNUSED
Definition: kernel/types.h:44
@ PASS_SAMPLE_COUNT
Definition: kernel/types.h:371
#define VLOG_INFO
Definition: log.h:77
#define VLOG_IS_ON(severity)
Definition: log.h:39
#define DCHECK(expression)
Definition: log.h:55
#define DCHECK_EQ(a, b)
Definition: log.h:64
#define VLOG_WORK
Definition: log.h:80
#define LOG(severity)
Definition: log.h:36
#define DCHECK_NE(a, b)
Definition: log.h:63
#define make_int2(x, y)
Definition: metal/compat.h:206
void parallel_for(IndexRange range, int64_t grain_size, const Function &function)
Definition: BLI_task.hh:51
void parallel_for_each(Range &range, const Function &function)
Definition: BLI_task.hh:39
PassMode
Definition: pass.h:19
static string device_info_list_report(const string &message, const DeviceInfo &device_info)
static void foreach_sliced_buffer_params(const vector< unique_ptr< PathTraceWork >> &path_trace_works, const vector< WorkBalanceInfo > &work_balance_infos, const BufferParams &buffer_params, const int overscan, const Callback &callback)
Definition: path_trace.cpp:230
static string path_trace_devices_report(const vector< unique_ptr< PathTraceWork >> &path_trace_works)
static const char * device_type_for_description(const DeviceType type)
static string full_device_info_description(const DeviceInfo &device_info)
static BufferParams scale_buffer_params(const BufferParams &params, int resolution_divider)
Definition: path_trace.cpp:286
static string get_layer_view_name(const RenderBuffers &buffers)
Definition: path_trace.cpp:934
static string denoiser_device_report(const Denoiser *denoiser)
#define min(a, b)
Definition: sort.c:35
unsigned __int64 uint64_t
Definition: stdint.h:90
KernelBake bake
int x
Definition: types_int2.h:15
int y
Definition: types_int2.h:15
static void thread_capture_fp_settings()
Definition: tbb.h:30
std::unique_lock< std::mutex > thread_scoped_lock
Definition: thread.h:28
CCL_NAMESPACE_BEGIN double time_dt()
Definition: time.cpp:35
float max
CCL_NAMESPACE_BEGIN void work_balance_do_initial(vector< WorkBalanceInfo > &work_balance_infos)
bool work_balance_do_rebalance(vector< WorkBalanceInfo > &work_balance_infos)