Blender  V3.3
kernel/device/hip/config.h
Go to the documentation of this file.
1 /* SPDX-License-Identifier: Apache-2.0
2  * Copyright 2011-2022 Blender Foundation */
3 
4 /* Device data taken from HIP occupancy calculator.
5  *
6  * Terminology
7  * - HIP GPUs have multiple streaming multiprocessors
8  * - Each multiprocessor executes multiple thread blocks
9  * - Each thread block contains a number of threads, also known as the block size
10  * - Multiprocessors have a fixed number of registers, and the amount of registers
11  * used by each threads limits the number of threads per block.
12  */
13 
14 /* Launch Bound Definitions */
15 #define GPU_MULTIPRESSOR_MAX_REGISTERS 65536
16 #define GPU_MULTIPROCESSOR_MAX_BLOCKS 64
17 #define GPU_BLOCK_MAX_THREADS 1024
18 #define GPU_THREAD_MAX_REGISTERS 255
19 
20 #define GPU_KERNEL_BLOCK_NUM_THREADS 1024
21 #define GPU_KERNEL_MAX_REGISTERS 64
22 
23 /* Compute number of threads per block and minimum blocks per multiprocessor
24  * given the maximum number of registers per thread. */
25 #define ccl_gpu_kernel(block_num_threads, thread_num_registers) \
26  extern "C" __global__ void __launch_bounds__(block_num_threads, \
27  GPU_MULTIPRESSOR_MAX_REGISTERS / \
28  (block_num_threads * thread_num_registers))
29 
30 #define ccl_gpu_kernel_threads(block_num_threads) \
31  extern "C" __global__ void __launch_bounds__(block_num_threads)
32 
33 #define ccl_gpu_kernel_signature(name, ...) kernel_gpu_##name(__VA_ARGS__)
34 #define ccl_gpu_kernel_postfix
35 
36 #define ccl_gpu_kernel_call(x) x
37 
38 /* Define a function object where "func" is the lambda body, and additional parameters are used to
39  * specify captured state */
40 #define ccl_gpu_kernel_lambda(func, ...) \
41  struct KernelLambda { \
42  __VA_ARGS__; \
43  __device__ int operator()(const int state) \
44  { \
45  return (func); \
46  } \
47  } ccl_gpu_kernel_lambda_pass
48 
49 /* sanity checks */
50 
51 #if GPU_KERNEL_BLOCK_NUM_THREADS > GPU_BLOCK_MAX_THREADS
52 # error "Maximum number of threads per block exceeded"
53 #endif
54 
55 #if GPU_MULTIPRESSOR_MAX_REGISTERS / (GPU_KERNEL_BLOCK_NUM_THREADS * GPU_KERNEL_MAX_REGISTERS) > \
56  GPU_MULTIPROCESSOR_MAX_BLOCKS
57 # error "Maximum number of blocks per multiprocessor exceeded"
58 #endif
59 
60 #if GPU_KERNEL_MAX_REGISTERS > GPU_THREAD_MAX_REGISTERS
61 # error "Maximum number of registers per thread exceeded"
62 #endif