Blender  V3.3
util.mm
Go to the documentation of this file.
1 /* SPDX-License-Identifier: Apache-2.0
2  * Copyright 2021-2022 Blender Foundation */
3 
4 #ifdef WITH_METAL
5 
6 # include "device/metal/util.h"
8 # include "util/md5.h"
9 # include "util/path.h"
10 # include "util/string.h"
11 # include "util/time.h"
12 
13 # include <IOKit/IOKitLib.h>
14 # include <pwd.h>
15 # include <sys/shm.h>
16 # include <time.h>
17 
19 
20 string MetalInfo::get_device_name(id<MTLDevice> device)
21 {
22  string device_name = [device.name UTF8String];
23  if (get_device_vendor(device) == METAL_GPU_APPLE) {
24  /* Append the GPU core count so we can distinguish between GPU variants in benchmarks. */
25  int gpu_core_count = get_apple_gpu_core_count(device);
26  device_name += string_printf(gpu_core_count ? " (GPU - %d cores)" : " (GPU)", gpu_core_count);
27  }
28  return device_name;
29 }
30 
31 int MetalInfo::get_apple_gpu_core_count(id<MTLDevice> device)
32 {
33  int core_count = 0;
34  if (@available(macos 12.0, *)) {
35  io_service_t gpu_service = IOServiceGetMatchingService(
36  kIOMainPortDefault, IORegistryEntryIDMatching(device.registryID));
37  if (CFNumberRef numberRef = (CFNumberRef)IORegistryEntryCreateCFProperty(
38  gpu_service, CFSTR("gpu-core-count"), 0, 0)) {
39  if (CFGetTypeID(numberRef) == CFNumberGetTypeID()) {
40  CFNumberGetValue(numberRef, kCFNumberSInt32Type, &core_count);
41  }
42  CFRelease(numberRef);
43  }
44  }
45  return core_count;
46 }
47 
48 AppleGPUArchitecture MetalInfo::get_apple_gpu_architecture(id<MTLDevice> device)
49 {
50  const char *device_name = [device.name UTF8String];
51  if (strstr(device_name, "M1")) {
52  return APPLE_M1;
53  }
54  else if (strstr(device_name, "M2")) {
55  return APPLE_M2;
56  }
57  return APPLE_UNKNOWN;
58 }
59 
60 MetalGPUVendor MetalInfo::get_device_vendor(id<MTLDevice> device)
61 {
62  const char *device_name = [device.name UTF8String];
63  if (strstr(device_name, "Intel")) {
64  return METAL_GPU_INTEL;
65  }
66  else if (strstr(device_name, "AMD")) {
67  return METAL_GPU_AMD;
68  }
69  else if (strstr(device_name, "Apple")) {
70  return METAL_GPU_APPLE;
71  }
72  return METAL_GPU_UNKNOWN;
73 }
74 
75 int MetalInfo::optimal_sort_partition_elements(id<MTLDevice> device)
76 {
77  if (auto str = getenv("CYCLES_METAL_SORT_PARTITION_ELEMENTS")) {
78  return atoi(str);
79  }
80 
81  /* On M1 and M2 GPUs, we see better cache utilization if we partition the active indices before
82  * sorting each partition by material. Partitioning into chunks of 65536 elements results in an
83  * overall render time speedup of up to 15%. */
84  if (get_device_vendor(device) == METAL_GPU_APPLE) {
85  return 65536;
86  }
87  return 0;
88 }
89 
90 vector<id<MTLDevice>> const &MetalInfo::get_usable_devices()
91 {
92  static vector<id<MTLDevice>> usable_devices;
93  static bool already_enumerated = false;
94 
95  if (already_enumerated) {
96  return usable_devices;
97  }
98 
99  metal_printf("Usable Metal devices:\n");
100  for (id<MTLDevice> device in MTLCopyAllDevices()) {
101  string device_name = get_device_name(device);
102  MetalGPUVendor vendor = get_device_vendor(device);
103  bool usable = false;
104 
105  if (@available(macos 12.2, *)) {
106  usable |= (vendor == METAL_GPU_APPLE);
107  }
108 
109  if (@available(macos 12.3, *)) {
110  usable |= (vendor == METAL_GPU_AMD);
111  }
112 
113 # if defined(MAC_OS_VERSION_13_0)
114  if (@available(macos 13.0, *)) {
115  usable |= (vendor == METAL_GPU_INTEL);
116  }
117 # endif
118 
119  if (usable) {
120  metal_printf("- %s\n", device_name.c_str());
121  [device retain];
122  usable_devices.push_back(device);
123  }
124  else {
125  metal_printf(" (skipping \"%s\")\n", device_name.c_str());
126  }
127  }
128  if (usable_devices.empty()) {
129  metal_printf(" No usable Metal devices found\n");
130  }
131  already_enumerated = true;
132 
133  return usable_devices;
134 }
135 
136 id<MTLBuffer> MetalBufferPool::get_buffer(id<MTLDevice> device,
137  id<MTLCommandBuffer> command_buffer,
138  NSUInteger length,
139  MTLResourceOptions options,
140  const void *pointer,
141  Stats &stats)
142 {
143  id<MTLBuffer> buffer;
144 
145  MTLStorageMode storageMode = MTLStorageMode((options & MTLResourceStorageModeMask) >>
146  MTLResourceStorageModeShift);
147  MTLCPUCacheMode cpuCacheMode = MTLCPUCacheMode((options & MTLResourceCPUCacheModeMask) >>
148  MTLResourceCPUCacheModeShift);
149 
150  buffer_mutex.lock();
151  for (auto entry = buffer_free_list.begin(); entry != buffer_free_list.end(); entry++) {
152  MetalBufferListEntry bufferEntry = *entry;
153 
154  /* Check if buffer matches size and storage mode and is old enough to reuse */
155  if (bufferEntry.buffer.length == length && storageMode == bufferEntry.buffer.storageMode &&
156  cpuCacheMode == bufferEntry.buffer.cpuCacheMode) {
157  buffer = bufferEntry.buffer;
158  buffer_free_list.erase(entry);
159  bufferEntry.command_buffer = command_buffer;
160  buffer_in_use_list.push_back(bufferEntry);
161  buffer_mutex.unlock();
162 
163  /* Copy over data */
164  if (pointer) {
165  memcpy(buffer.contents, pointer, length);
166  if (bufferEntry.buffer.storageMode == MTLStorageModeManaged) {
167  [buffer didModifyRange:NSMakeRange(0, length)];
168  }
169  }
170 
171  return buffer;
172  }
173  }
174  // NSLog(@"Creating buffer of length %lu (%lu)", length, frameCount);
175  if (pointer) {
176  buffer = [device newBufferWithBytes:pointer length:length options:options];
177  }
178  else {
179  buffer = [device newBufferWithLength:length options:options];
180  }
181 
182  MetalBufferListEntry buffer_entry(buffer, command_buffer);
183 
184  stats.mem_alloc(buffer.allocatedSize);
185 
186  total_temp_mem_size += buffer.allocatedSize;
187  buffer_in_use_list.push_back(buffer_entry);
188  buffer_mutex.unlock();
189 
190  return buffer;
191 }
192 
193 void MetalBufferPool::process_command_buffer_completion(id<MTLCommandBuffer> command_buffer)
194 {
195  assert(command_buffer);
196  thread_scoped_lock lock(buffer_mutex);
197  /* Release all buffers that have not been recently reused back into the free pool */
198  for (auto entry = buffer_in_use_list.begin(); entry != buffer_in_use_list.end();) {
199  MetalBufferListEntry buffer_entry = *entry;
200  if (buffer_entry.command_buffer == command_buffer) {
201  entry = buffer_in_use_list.erase(entry);
202  buffer_entry.command_buffer = nil;
203  buffer_free_list.push_back(buffer_entry);
204  }
205  else {
206  entry++;
207  }
208  }
209 }
210 
211 MetalBufferPool::~MetalBufferPool()
212 {
213  thread_scoped_lock lock(buffer_mutex);
214  /* Release all buffers that have not been recently reused */
215  for (auto entry = buffer_free_list.begin(); entry != buffer_free_list.end();) {
216  MetalBufferListEntry buffer_entry = *entry;
217 
218  id<MTLBuffer> buffer = buffer_entry.buffer;
219  // NSLog(@"Releasing buffer of length %lu (%lu) (%lu outstanding)", buffer.length, frameCount,
220  // bufferFreeList.size());
221  total_temp_mem_size -= buffer.allocatedSize;
222  [buffer release];
223  entry = buffer_free_list.erase(entry);
224  }
225 }
226 
228 
229 #endif /* WITH_METAL */
volatile int lock
void mem_alloc(size_t size)
Definition: util/stats.h:23
#define CCL_NAMESPACE_END
Definition: cuda/compat.h:9
CCL_NAMESPACE_BEGIN struct Options options
#define str(s)
ccl_global float * buffer
T length(const vec_base< T, Size > &a)
CCL_NAMESPACE_BEGIN string string_printf(const char *format,...)
Definition: string.cpp:22
std::unique_lock< std::mutex > thread_scoped_lock
Definition: thread.h:28