Blender  V3.3
cuda/device_impl.h
Go to the documentation of this file.
1 /* SPDX-License-Identifier: Apache-2.0
2  * Copyright 2011-2022 Blender Foundation */
3 
4 #ifdef WITH_CUDA
5 
6 # include "device/cuda/kernel.h"
7 # include "device/cuda/queue.h"
8 # include "device/cuda/util.h"
9 # include "device/device.h"
10 
11 # include "util/map.h"
12 
13 # ifdef WITH_CUDA_DYNLOAD
14 # include "cuew.h"
15 # else
16 # include <cuda.h>
17 # include <cudaGL.h>
18 # endif
19 
21 
22 class DeviceQueue;
23 
24 class CUDADevice : public Device {
25 
26  friend class CUDAContextScope;
27 
28  public:
29  CUdevice cuDevice;
30  CUcontext cuContext;
31  CUmodule cuModule;
32  size_t device_texture_headroom;
33  size_t device_working_headroom;
34  bool move_texture_to_host;
35  size_t map_host_used;
36  size_t map_host_limit;
37  int can_map_host;
38  int pitch_alignment;
39  int cuDevId;
40  int cuDevArchitecture;
41  bool first_error;
42 
43  struct CUDAMem {
44  CUDAMem() : texobject(0), array(0), use_mapped_host(false)
45  {
46  }
47 
48  CUtexObject texobject;
49  CUarray array;
50 
51  /* If true, a mapped host memory in shared_pointer is being used. */
52  bool use_mapped_host;
53  };
54  typedef map<device_memory *, CUDAMem> CUDAMemMap;
55  CUDAMemMap cuda_mem_map;
56  thread_mutex cuda_mem_map_mutex;
57 
58  /* Bindless Textures */
59  device_vector<TextureInfo> texture_info;
60  bool need_texture_info;
61 
62  CUDADeviceKernels kernels;
63 
64  static bool have_precompiled_kernels();
65 
66  virtual BVHLayoutMask get_bvh_layout_mask() const override;
67 
68  void set_error(const string &error) override;
69 
70  CUDADevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
71 
72  virtual ~CUDADevice();
73 
74  bool support_device(const uint /*kernel_features*/);
75 
76  bool check_peer_access(Device *peer_device) override;
77 
78  bool use_adaptive_compilation();
79 
80  virtual string compile_kernel_get_common_cflags(const uint kernel_features);
81 
82  string compile_kernel(const uint kernel_features,
83  const char *name,
84  const char *base = "cuda",
85  bool force_ptx = false);
86 
87  virtual bool load_kernels(const uint kernel_features) override;
88 
89  void reserve_local_memory(const uint kernel_features);
90 
91  void init_host_memory();
92 
93  void load_texture_info();
94 
95  void move_textures_to_host(size_t size, bool for_texture);
96 
97  CUDAMem *generic_alloc(device_memory &mem, size_t pitch_padding = 0);
98 
99  void generic_copy_to(device_memory &mem);
100 
101  void generic_free(device_memory &mem);
102 
103  void mem_alloc(device_memory &mem) override;
104 
105  void mem_copy_to(device_memory &mem) override;
106 
107  void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override;
108 
109  void mem_zero(device_memory &mem) override;
110 
111  void mem_free(device_memory &mem) override;
112 
113  device_ptr mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t /*size*/) override;
114 
115  virtual void const_copy_to(const char *name, void *host, size_t size) override;
116 
117  void global_alloc(device_memory &mem);
118 
119  void global_free(device_memory &mem);
120 
121  void tex_alloc(device_texture &mem);
122 
123  void tex_free(device_texture &mem);
124 
125  virtual bool should_use_graphics_interop() override;
126 
127  virtual unique_ptr<DeviceQueue> gpu_queue_create() override;
128 
129  int get_num_multiprocessors();
130  int get_max_num_threads_per_multiprocessor();
131 
132  protected:
133  bool get_device_attribute(CUdevice_attribute attribute, int *value);
134  int get_device_default_attribute(CUdevice_attribute attribute, int default_value);
135 };
136 
138 
139 #endif
unsigned int uint
Definition: BLI_sys_types.h:67
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint y
in reality light always falls off quadratically Particle Retrieve the data of the particle that spawned the object for example to give variation to multiple instances of an object Point Retrieve information about points in a point cloud Retrieve the edges of an object as it appears to Cycles topology will always appear triangulated Convert a blackbody temperature to an RGB value Normal Generate a perturbed normal from an RGB normal map image Typically used for faking highly detailed surfaces Generate an OSL shader from a file or text data block Image Sample an image file as a texture Sky Generate a procedural sky texture Noise Generate fractal Perlin noise Wave Generate procedural bands or rings with noise Voronoi Generate Worley noise based on the distance to random points Typically used to generate textures such as or biological cells Brick Generate a procedural texture producing bricks Texture Retrieve multiple types of texture coordinates nTypically used as inputs for texture nodes Vector Convert a or normal between and object coordinate space Combine Create a color from its and value channels Color Retrieve a color attribute
static DBVT_INLINE btScalar size(const btDbvtVolume &a)
Definition: btDbvt.cpp:52
SIMD_FORCE_INLINE const btScalar & w() const
Return the w value.
Definition: btQuadWord.h:119
virtual BVHLayoutMask get_bvh_layout_mask() const =0
virtual void const_copy_to(const char *name, void *host, size_t size)=0
virtual void mem_zero(device_memory &mem)=0
virtual void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem)=0
virtual unique_ptr< DeviceQueue > gpu_queue_create()
virtual bool load_kernels(uint)
virtual bool check_peer_access(Device *)
virtual void mem_free(device_memory &mem)=0
virtual void set_error(const string &error)
virtual void mem_copy_to(device_memory &mem)=0
virtual device_ptr mem_alloc_sub_ptr(device_memory &, size_t, size_t)
virtual void mem_alloc(device_memory &mem)=0
virtual bool should_use_graphics_interop()
unsigned long long CUtexObject
Definition: cuda/compat.h:78
#define CCL_NAMESPACE_END
Definition: cuda/compat.h:9
ccl_gpu_kernel_postfix ccl_global float int int int int float bool int offset
static void error(const char *str)
Definition: meshlaplacian.c:51
int BVHLayoutMask
Definition: params.h:47
CCL_NAMESPACE_BEGIN typedef std::mutex thread_mutex
Definition: thread.h:27
uint64_t device_ptr
Definition: util/types.h:43