Blender  V3.3
device/metal/queue.h
Go to the documentation of this file.
1 /* SPDX-License-Identifier: Apache-2.0
2  * Copyright 2021-2022 Blender Foundation */
3 
4 #pragma once
5 
6 #ifdef WITH_METAL
7 
8 # include "device/kernel.h"
9 # include "device/memory.h"
10 # include "device/queue.h"
11 
12 # include "device/metal/util.h"
14 
16 
17 class MetalDevice;
18 
19 /* Base class for Metal queues. */
20 class MetalDeviceQueue : public DeviceQueue {
21  public:
22  MetalDeviceQueue(MetalDevice *device);
23  ~MetalDeviceQueue();
24 
25  virtual int num_concurrent_states(const size_t) const override;
26  virtual int num_concurrent_busy_states() const override;
27  virtual int num_sort_partition_elements() const override;
28 
29  virtual void init_execution() override;
30 
31  virtual bool enqueue(DeviceKernel kernel,
32  const int work_size,
33  DeviceKernelArguments const &args) override;
34 
35  virtual bool synchronize() override;
36 
37  virtual void zero_to_device(device_memory &mem) override;
38  virtual void copy_to_device(device_memory &mem) override;
39  virtual void copy_from_device(device_memory &mem) override;
40 
41  protected:
42  void setup_capture();
43  void update_capture(DeviceKernel kernel);
44  void begin_capture();
45  void end_capture();
46  void prepare_resources(DeviceKernel kernel);
47 
48  id<MTLComputeCommandEncoder> get_compute_encoder(DeviceKernel kernel);
49  id<MTLBlitCommandEncoder> get_blit_encoder();
50 
51  MetalDevice *metal_device_;
52  MetalBufferPool temp_buffer_pool_;
53 
54  API_AVAILABLE(macos(11.0), ios(14.0))
55  MTLCommandBufferDescriptor *command_buffer_desc_ = nullptr;
56  id<MTLDevice> mtlDevice_ = nil;
57  id<MTLCommandQueue> mtlCommandQueue_ = nil;
58  id<MTLCommandBuffer> mtlCommandBuffer_ = nil;
59  id<MTLComputeCommandEncoder> mtlComputeEncoder_ = nil;
60  id<MTLBlitCommandEncoder> mtlBlitEncoder_ = nil;
61  API_AVAILABLE(macos(10.14), ios(14.0))
62  id<MTLSharedEvent> shared_event_ = nil;
63  API_AVAILABLE(macos(10.14), ios(14.0))
64  MTLSharedEventListener *shared_event_listener_ = nil;
65 
66  dispatch_queue_t event_queue_;
67  dispatch_semaphore_t wait_semaphore_;
68 
69  struct CopyBack {
70  void *host_pointer;
71  void *gpu_mem;
72  uint64_t size;
73  };
74  std::vector<CopyBack> copy_back_mem_;
75 
76  uint64_t shared_event_id_;
77  uint64_t command_buffers_submitted_ = 0;
78  uint64_t command_buffers_completed_ = 0;
79  Stats &stats_;
80 
81  void close_compute_encoder();
82  void close_blit_encoder();
83 
84  bool verbose_tracing_ = false;
85  bool label_command_encoders_ = false;
86 
87  /* Per-kernel profiling (see CYCLES_METAL_PROFILING). */
88 
89  struct TimingData {
91  int work_size;
92  uint64_t timing_id;
93  };
94  std::vector<TimingData> command_encoder_labels_;
95  API_AVAILABLE(macos(10.14), ios(14.0))
96  id<MTLSharedEvent> timing_shared_event_ = nil;
97  uint64_t timing_shared_event_id_;
98  uint64_t command_buffer_start_timing_id_;
99 
100  struct TimingStats {
101  double total_time = 0.0;
102  uint64_t total_work_size = 0;
103  uint64_t num_dispatches = 0;
104  };
105  TimingStats timing_stats_[DEVICE_KERNEL_NUM];
106  double last_completion_time_ = 0.0;
107 
108  /* .gputrace capture (see CYCLES_DEBUG_METAL_CAPTURE_...). */
109 
110  id<MTLCaptureScope> mtlCaptureScope_ = nil;
111  DeviceKernel capture_kernel_;
112  int capture_dispatch_counter_ = 0;
113  bool capture_samples_ = false;
114  int capture_reset_counter_ = 0;
115  bool is_capturing_ = false;
116  bool is_capturing_to_disk_ = false;
117  bool has_captured_to_disk_ = false;
118 };
119 
121 
122 #endif /* WITH_METAL */
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition: btDbvt.cpp:52
virtual int num_sort_partition_elements() const
Definition: device/queue.h:110
virtual void copy_from_device(device_memory &mem)=0
virtual int num_concurrent_states(const size_t state_size) const =0
virtual void init_execution()=0
virtual void copy_to_device(device_memory &mem)=0
virtual int num_concurrent_busy_states() const =0
virtual bool synchronize()=0
virtual bool enqueue(DeviceKernel kernel, const int work_size, DeviceKernelArguments const &args)=0
virtual void zero_to_device(device_memory &mem)=0
#define CCL_NAMESPACE_END
Definition: cuda/compat.h:9
SyclQueue void void size_t num_bytes SyclQueue void const char void *memory_device_pointer KernelContext int kernel
ccl_gpu_kernel_postfix ccl_global const int ccl_global float const int work_size
DeviceKernel
@ DEVICE_KERNEL_NUM
unsigned __int64 uint64_t
Definition: stdint.h:90