Blender  V3.3
device/cuda/device.cpp
Go to the documentation of this file.
1 /* SPDX-License-Identifier: Apache-2.0
2  * Copyright 2011-2022 Blender Foundation */
3 
4 #include "device/cuda/device.h"
5 
6 #include "util/log.h"
7 
8 #ifdef WITH_CUDA
10 # include "device/device.h"
11 
12 # include "util/string.h"
13 # include "util/windows.h"
14 #endif /* WITH_CUDA */
15 
17 
19 {
20 #if !defined(WITH_CUDA)
21  return false;
22 #elif defined(WITH_CUDA_DYNLOAD)
23  static bool initialized = false;
24  static bool result = false;
25 
26  if (initialized)
27  return result;
28 
29  initialized = true;
30  int cuew_result = cuewInit(CUEW_INIT_CUDA);
31  if (cuew_result == CUEW_SUCCESS) {
32  VLOG_INFO << "CUEW initialization succeeded";
33  if (CUDADevice::have_precompiled_kernels()) {
34  VLOG_INFO << "Found precompiled kernels";
35  result = true;
36  }
37  else if (cuewCompilerPath() != NULL) {
38  VLOG_INFO << "Found CUDA compiler " << cuewCompilerPath();
39  result = true;
40  }
41  else {
42  VLOG_INFO << "Neither precompiled kernels nor CUDA compiler was found,"
43  << " unable to use CUDA";
44  }
45  }
46  else {
47  VLOG_WARNING << "CUEW initialization failed: "
48  << ((cuew_result == CUEW_ERROR_ATEXIT_FAILED) ?
49  "Error setting up atexit() handler" :
50  "Error opening the library");
51  }
52 
53  return result;
54 #else /* WITH_CUDA_DYNLOAD */
55  return true;
56 #endif /* WITH_CUDA_DYNLOAD */
57 }
58 
59 Device *device_cuda_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
60 {
61 #ifdef WITH_CUDA
62  return new CUDADevice(info, stats, profiler);
63 #else
64  (void)info;
65  (void)stats;
66  (void)profiler;
67 
68  LOG(FATAL) << "Request to create CUDA device without compiled-in support. Should never happen.";
69 
70  return nullptr;
71 #endif
72 }
73 
74 #ifdef WITH_CUDA
75 static CUresult device_cuda_safe_init()
76 {
77 # ifdef _WIN32
78  __try {
79  return cuInit(0);
80  }
81  __except (EXCEPTION_EXECUTE_HANDLER) {
82  /* Ignore crashes inside the CUDA driver and hope we can
83  * survive even with corrupted CUDA installs. */
84  fprintf(stderr, "Cycles CUDA: driver crashed, continuing without CUDA.\n");
85  }
86 
87  return CUDA_ERROR_NO_DEVICE;
88 # else
89  return cuInit(0);
90 # endif
91 }
92 #endif /* WITH_CUDA */
93 
95 {
96 #ifdef WITH_CUDA
97  CUresult result = device_cuda_safe_init();
98  if (result != CUDA_SUCCESS) {
99  if (result != CUDA_ERROR_NO_DEVICE)
100  fprintf(stderr, "CUDA cuInit: %s\n", cuewErrorString(result));
101  return;
102  }
103 
104  int count = 0;
105  result = cuDeviceGetCount(&count);
106  if (result != CUDA_SUCCESS) {
107  fprintf(stderr, "CUDA cuDeviceGetCount: %s\n", cuewErrorString(result));
108  return;
109  }
110 
111  vector<DeviceInfo> display_devices;
112 
113  for (int num = 0; num < count; num++) {
114  char name[256];
115 
116  result = cuDeviceGetName(name, 256, num);
117  if (result != CUDA_SUCCESS) {
118  fprintf(stderr, "CUDA cuDeviceGetName: %s\n", cuewErrorString(result));
119  continue;
120  }
121 
122  int major;
123  cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, num);
124  if (major < 3) {
125  VLOG_INFO << "Ignoring device \"" << name
126  << "\", this graphics card is no longer supported.";
127  continue;
128  }
129 
130  DeviceInfo info;
131 
132  info.type = DEVICE_CUDA;
133  info.description = string(name);
134  info.num = num;
135 
136  info.has_nanovdb = true;
137  info.denoisers = 0;
138 
139  info.has_gpu_queue = true;
140 
141  /* Check if the device has P2P access to any other device in the system. */
142  for (int peer_num = 0; peer_num < count && !info.has_peer_memory; peer_num++) {
143  if (num != peer_num) {
144  int can_access = 0;
145  cuDeviceCanAccessPeer(&can_access, num, peer_num);
146  info.has_peer_memory = (can_access != 0);
147  }
148  }
149 
150  int pci_location[3] = {0, 0, 0};
151  cuDeviceGetAttribute(&pci_location[0], CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, num);
152  cuDeviceGetAttribute(&pci_location[1], CU_DEVICE_ATTRIBUTE_PCI_BUS_ID, num);
153  cuDeviceGetAttribute(&pci_location[2], CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, num);
154  info.id = string_printf("CUDA_%s_%04x:%02x:%02x",
155  name,
156  (unsigned int)pci_location[0],
157  (unsigned int)pci_location[1],
158  (unsigned int)pci_location[2]);
159 
160  /* If device has a kernel timeout and no compute preemption, we assume
161  * it is connected to a display and will freeze the display while doing
162  * computations. */
163  int timeout_attr = 0, preempt_attr = 0;
164  cuDeviceGetAttribute(&timeout_attr, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, num);
165  cuDeviceGetAttribute(&preempt_attr, CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED, num);
166 
167  /* The CUDA driver reports compute preemption as not being available on
168  * Windows 10 even when it is, due to an issue in application profiles.
169  * Detect case where we expect it to be available and override. */
170  if (preempt_attr == 0 && (major >= 6) && system_windows_version_at_least(10, 17134)) {
171  VLOG_INFO << "Assuming device has compute preemption on Windows 10.";
172  preempt_attr = 1;
173  }
174 
175  if (timeout_attr && !preempt_attr) {
176  VLOG_INFO << "Device is recognized as display.";
177  info.description += " (Display)";
178  info.display_device = true;
179  display_devices.push_back(info);
180  }
181  else {
182  VLOG_INFO << "Device has compute preemption or is not used for display.";
183  devices.push_back(info);
184  }
185  VLOG_INFO << "Added device \"" << name << "\" with id \"" << info.id << "\".";
186  }
187 
188  if (!display_devices.empty())
189  devices.insert(devices.end(), display_devices.begin(), display_devices.end());
190 #else /* WITH_CUDA */
191  (void)devices;
192 #endif /* WITH_CUDA */
193 }
194 
196 {
197 #ifdef WITH_CUDA
198  CUresult result = device_cuda_safe_init();
199  if (result != CUDA_SUCCESS) {
200  if (result != CUDA_ERROR_NO_DEVICE) {
201  return string("Error initializing CUDA: ") + cuewErrorString(result);
202  }
203  return "No CUDA device found\n";
204  }
205 
206  int count;
207  result = cuDeviceGetCount(&count);
208  if (result != CUDA_SUCCESS) {
209  return string("Error getting devices: ") + cuewErrorString(result);
210  }
211 
212  string capabilities = "";
213  for (int num = 0; num < count; num++) {
214  char name[256];
215  if (cuDeviceGetName(name, 256, num) != CUDA_SUCCESS) {
216  continue;
217  }
218  capabilities += string("\t") + name + "\n";
219  int value;
220 # define GET_ATTR(attr) \
221  { \
222  if (cuDeviceGetAttribute(&value, CU_DEVICE_ATTRIBUTE_##attr, num) == CUDA_SUCCESS) { \
223  capabilities += string_printf("\t\tCU_DEVICE_ATTRIBUTE_" #attr "\t\t\t%d\n", value); \
224  } \
225  } \
226  (void)0
227  /* TODO(sergey): Strip all attributes which are not useful for us
228  * or does not depend on the driver.
229  */
230  GET_ATTR(MAX_THREADS_PER_BLOCK);
231  GET_ATTR(MAX_BLOCK_DIM_X);
232  GET_ATTR(MAX_BLOCK_DIM_Y);
233  GET_ATTR(MAX_BLOCK_DIM_Z);
234  GET_ATTR(MAX_GRID_DIM_X);
235  GET_ATTR(MAX_GRID_DIM_Y);
236  GET_ATTR(MAX_GRID_DIM_Z);
237  GET_ATTR(MAX_SHARED_MEMORY_PER_BLOCK);
238  GET_ATTR(SHARED_MEMORY_PER_BLOCK);
239  GET_ATTR(TOTAL_CONSTANT_MEMORY);
240  GET_ATTR(WARP_SIZE);
241  GET_ATTR(MAX_PITCH);
242  GET_ATTR(MAX_REGISTERS_PER_BLOCK);
243  GET_ATTR(REGISTERS_PER_BLOCK);
244  GET_ATTR(CLOCK_RATE);
245  GET_ATTR(TEXTURE_ALIGNMENT);
246  GET_ATTR(GPU_OVERLAP);
247  GET_ATTR(MULTIPROCESSOR_COUNT);
248  GET_ATTR(KERNEL_EXEC_TIMEOUT);
249  GET_ATTR(INTEGRATED);
250  GET_ATTR(CAN_MAP_HOST_MEMORY);
251  GET_ATTR(COMPUTE_MODE);
252  GET_ATTR(MAXIMUM_TEXTURE1D_WIDTH);
253  GET_ATTR(MAXIMUM_TEXTURE2D_WIDTH);
254  GET_ATTR(MAXIMUM_TEXTURE2D_HEIGHT);
255  GET_ATTR(MAXIMUM_TEXTURE3D_WIDTH);
256  GET_ATTR(MAXIMUM_TEXTURE3D_HEIGHT);
257  GET_ATTR(MAXIMUM_TEXTURE3D_DEPTH);
258  GET_ATTR(MAXIMUM_TEXTURE2D_LAYERED_WIDTH);
259  GET_ATTR(MAXIMUM_TEXTURE2D_LAYERED_HEIGHT);
260  GET_ATTR(MAXIMUM_TEXTURE2D_LAYERED_LAYERS);
261  GET_ATTR(MAXIMUM_TEXTURE2D_ARRAY_WIDTH);
262  GET_ATTR(MAXIMUM_TEXTURE2D_ARRAY_HEIGHT);
263  GET_ATTR(MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES);
264  GET_ATTR(SURFACE_ALIGNMENT);
265  GET_ATTR(CONCURRENT_KERNELS);
266  GET_ATTR(ECC_ENABLED);
267  GET_ATTR(TCC_DRIVER);
268  GET_ATTR(MEMORY_CLOCK_RATE);
269  GET_ATTR(GLOBAL_MEMORY_BUS_WIDTH);
270  GET_ATTR(L2_CACHE_SIZE);
271  GET_ATTR(MAX_THREADS_PER_MULTIPROCESSOR);
272  GET_ATTR(ASYNC_ENGINE_COUNT);
273  GET_ATTR(UNIFIED_ADDRESSING);
274  GET_ATTR(MAXIMUM_TEXTURE1D_LAYERED_WIDTH);
275  GET_ATTR(MAXIMUM_TEXTURE1D_LAYERED_LAYERS);
276  GET_ATTR(CAN_TEX2D_GATHER);
277  GET_ATTR(MAXIMUM_TEXTURE2D_GATHER_WIDTH);
278  GET_ATTR(MAXIMUM_TEXTURE2D_GATHER_HEIGHT);
279  GET_ATTR(MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE);
280  GET_ATTR(MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE);
281  GET_ATTR(MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE);
282  GET_ATTR(TEXTURE_PITCH_ALIGNMENT);
283  GET_ATTR(MAXIMUM_TEXTURECUBEMAP_WIDTH);
284  GET_ATTR(MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH);
285  GET_ATTR(MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS);
286  GET_ATTR(MAXIMUM_SURFACE1D_WIDTH);
287  GET_ATTR(MAXIMUM_SURFACE2D_WIDTH);
288  GET_ATTR(MAXIMUM_SURFACE2D_HEIGHT);
289  GET_ATTR(MAXIMUM_SURFACE3D_WIDTH);
290  GET_ATTR(MAXIMUM_SURFACE3D_HEIGHT);
291  GET_ATTR(MAXIMUM_SURFACE3D_DEPTH);
292  GET_ATTR(MAXIMUM_SURFACE1D_LAYERED_WIDTH);
293  GET_ATTR(MAXIMUM_SURFACE1D_LAYERED_LAYERS);
294  GET_ATTR(MAXIMUM_SURFACE2D_LAYERED_WIDTH);
295  GET_ATTR(MAXIMUM_SURFACE2D_LAYERED_HEIGHT);
296  GET_ATTR(MAXIMUM_SURFACE2D_LAYERED_LAYERS);
297  GET_ATTR(MAXIMUM_SURFACECUBEMAP_WIDTH);
298  GET_ATTR(MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH);
299  GET_ATTR(MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS);
300  GET_ATTR(MAXIMUM_TEXTURE1D_LINEAR_WIDTH);
301  GET_ATTR(MAXIMUM_TEXTURE2D_LINEAR_WIDTH);
302  GET_ATTR(MAXIMUM_TEXTURE2D_LINEAR_HEIGHT);
303  GET_ATTR(MAXIMUM_TEXTURE2D_LINEAR_PITCH);
304  GET_ATTR(MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH);
305  GET_ATTR(MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT);
306  GET_ATTR(COMPUTE_CAPABILITY_MAJOR);
307  GET_ATTR(COMPUTE_CAPABILITY_MINOR);
308  GET_ATTR(MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH);
309  GET_ATTR(STREAM_PRIORITIES_SUPPORTED);
310  GET_ATTR(GLOBAL_L1_CACHE_SUPPORTED);
311  GET_ATTR(LOCAL_L1_CACHE_SUPPORTED);
312  GET_ATTR(MAX_SHARED_MEMORY_PER_MULTIPROCESSOR);
313  GET_ATTR(MAX_REGISTERS_PER_MULTIPROCESSOR);
314  GET_ATTR(MANAGED_MEMORY);
315  GET_ATTR(MULTI_GPU_BOARD);
316  GET_ATTR(MULTI_GPU_BOARD_GROUP_ID);
317 # undef GET_ATTR
318  capabilities += "\n";
319  }
320 
321  return capabilities;
322 
323 #else /* WITH_CUDA */
324  return "";
325 #endif /* WITH_CUDA */
326 }
327 
DenoiserTypeMask denoisers
Definition: device/device.h:73
bool display_device
Definition: device/device.h:66
bool has_peer_memory
Definition: device/device.h:70
bool has_nanovdb
Definition: device/device.h:67
bool has_gpu_queue
Definition: device/device.h:71
DeviceType type
Definition: device/device.h:62
string description
Definition: device/device.h:63
#define CCL_NAMESPACE_END
Definition: cuda/compat.h:9
void device_cuda_info(vector< DeviceInfo > &devices)
string device_cuda_capabilities()
Device * device_cuda_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
CCL_NAMESPACE_BEGIN bool device_cuda_init()
@ DEVICE_CUDA
Definition: device/device.h:39
SyclQueue void void size_t num_bytes void
static bool initialized
Definition: gpu_init_exit.c:22
int count
#define VLOG_INFO
Definition: log.h:77
#define VLOG_WARNING
Definition: log.h:75
#define LOG(severity)
Definition: log.h:36
Vector< CPUDevice > devices
list of all CPUDevices. for every hardware thread an instance of CPUDevice is created
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
Definition: string.cpp:22
CCL_NAMESPACE_BEGIN bool system_windows_version_at_least(int major, int build)
Definition: windows.cpp:12