Blender  V3.3
oneapi/queue.cpp
Go to the documentation of this file.
1 /* SPDX-License-Identifier: Apache-2.0
2  * Copyright 2021-2022 Intel Corporation */
3 
4 #ifdef WITH_ONEAPI
5 
6 # include "device/oneapi/queue.h"
8 # include "util/log.h"
9 # include "util/time.h"
10 # include <iomanip>
11 # include <vector>
12 
14 
16 
17 struct KernelExecutionInfo {
18  double elapsed_summary = 0.0;
19  int enqueue_count = 0;
20 };
21 
22 /* OneapiDeviceQueue */
23 
24 OneapiDeviceQueue::OneapiDeviceQueue(OneapiDevice *device)
25  : DeviceQueue(device),
26  oneapi_device_(device),
27  oneapi_dll_(device->oneapi_dll_object()),
28  kernel_context_(nullptr)
29 {
30 }
31 
32 OneapiDeviceQueue::~OneapiDeviceQueue()
33 {
34  delete kernel_context_;
35 }
36 
37 int OneapiDeviceQueue::num_concurrent_states(const size_t state_size) const
38 {
39  const int max_num_threads = oneapi_device_->get_num_multiprocessors() *
40  oneapi_device_->get_max_num_threads_per_multiprocessor();
41  int num_states = max(8 * max_num_threads, 65536) * 16;
42 
43  VLOG_DEVICE_STATS << "GPU queue concurrent states: " << num_states << ", using up to "
44  << string_human_readable_size(num_states * state_size);
45 
46  return num_states;
47 }
48 
49 int OneapiDeviceQueue::num_concurrent_busy_states() const
50 {
51  const int max_num_threads = oneapi_device_->get_num_multiprocessors() *
52  oneapi_device_->get_max_num_threads_per_multiprocessor();
53 
54  return 4 * max(8 * max_num_threads, 65536);
55 }
56 
58 {
59  oneapi_device_->load_texture_info();
60 
61  SyclQueue *device_queue = oneapi_device_->sycl_queue();
62  void *kg_dptr = (void *)oneapi_device_->kernel_globals_device_pointer();
63  assert(device_queue);
64  assert(kg_dptr);
65  kernel_context_ = new KernelContext{device_queue, kg_dptr};
66 
67  debug_init_execution();
68 }
69 
70 bool OneapiDeviceQueue::enqueue(DeviceKernel kernel,
71  const int signed_kernel_work_size,
72  DeviceKernelArguments const &_args)
73 {
74  if (oneapi_device_->have_error()) {
75  return false;
76  }
77 
78  void **args = const_cast<void **>(_args.values);
79 
80  debug_enqueue(kernel, signed_kernel_work_size);
81  assert(signed_kernel_work_size >= 0);
82  size_t kernel_work_size = (size_t)signed_kernel_work_size;
83 
84  size_t kernel_local_size = oneapi_dll_.oneapi_kernel_preferred_local_size(
85  kernel_context_->queue, (::DeviceKernel)kernel, kernel_work_size);
86  size_t uniformed_kernel_work_size = round_up(kernel_work_size, kernel_local_size);
87 
88  assert(kernel_context_);
89 
90  /* Call the oneAPI kernel DLL to launch the requested kernel. */
91  bool is_finished_ok = oneapi_dll_.oneapi_enqueue_kernel(
92  kernel_context_, kernel, uniformed_kernel_work_size, args);
93 
94  if (is_finished_ok == false) {
95  oneapi_device_->set_error("oneAPI kernel \"" + std::string(device_kernel_as_string(kernel)) +
96  "\" execution error: got runtime exception \"" +
97  oneapi_device_->oneapi_error_message() + "\"");
98  }
99 
100  return is_finished_ok;
101 }
102 
103 bool OneapiDeviceQueue::synchronize()
104 {
105  if (oneapi_device_->have_error()) {
106  return false;
107  }
108 
109  bool is_finished_ok = oneapi_dll_.oneapi_queue_synchronize(oneapi_device_->sycl_queue());
110  if (is_finished_ok == false)
111  oneapi_device_->set_error("oneAPI unknown kernel execution error: got runtime exception \"" +
112  oneapi_device_->oneapi_error_message() + "\"");
113 
114  debug_synchronize();
115 
116  return !(oneapi_device_->have_error());
117 }
118 
119 void OneapiDeviceQueue::zero_to_device(device_memory &mem)
120 {
121  oneapi_device_->mem_zero(mem);
122 }
123 
124 void OneapiDeviceQueue::copy_to_device(device_memory &mem)
125 {
126  oneapi_device_->mem_copy_to(mem);
127 }
128 
129 void OneapiDeviceQueue::copy_from_device(device_memory &mem)
130 {
131  oneapi_device_->mem_copy_from(mem);
132 }
133 
135 
136 #endif /* WITH_ONEAPI */
#define CCL_NAMESPACE_END
Definition: cuda/compat.h:9
CCL_NAMESPACE_BEGIN const char * device_kernel_as_string(DeviceKernel kernel)
SyclQueue void void size_t num_bytes SyclQueue void const char void *memory_device_pointer KernelContext int kernel
static struct ImBuf * init_execution(const SeqRenderData *context, ImBuf *ibuf1, ImBuf *ibuf2, ImBuf *ibuf3)
Definition: effects.c:3519
int num_states
DeviceKernel
#define VLOG_DEVICE_STATS
Definition: log.h:83
string string_human_readable_size(size_t size)
Definition: string.cpp:229
void * values[MAX_ARGS]
Definition: device/queue.h:35
float max
ccl_device_inline size_t round_up(size_t x, size_t multiple)
Definition: util/types.h:56