Blender  V3.3
parallel_prefix_sum.h
Go to the documentation of this file.
1 /* SPDX-License-Identifier: Apache-2.0
2  * Copyright 2021-2022 Blender Foundation */
3 
4 #pragma once
5 
7 
8 /* Parallel prefix sum.
9  *
10  * TODO: actually make this work in parallel.
11  *
12  * This is used for an array the size of the number of shaders in the scene
13  * which is not usually huge, so might not be a significant bottleneck. */
14 
15 #include "util/atomic.h"
16 
17 #ifdef __HIP__
18 # define GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE 1024
19 #else
20 # define GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE 512
21 #endif
22 
23 __device__ void gpu_parallel_prefix_sum(const int global_id,
24  ccl_global int *counter,
26  const int num_values)
27 {
28  if (global_id != 0) {
29  return;
30  }
31 
32  int offset = 0;
33  for (int i = 0; i < num_values; i++) {
34  const int new_offset = offset + counter[i];
35  prefix_sum[i] = offset;
36  counter[i] = 0;
37  offset = new_offset;
38  }
39 }
40 
#define ccl_global
Definition: cuda/compat.h:43
#define CCL_NAMESPACE_END
Definition: cuda/compat.h:9
ccl_gpu_kernel_postfix ccl_global int ccl_global int int num_values
ccl_gpu_kernel_postfix ccl_global int ccl_global int * prefix_sum
ccl_gpu_kernel_postfix ccl_global int * counter
ccl_gpu_kernel_postfix ccl_global float int int int int float bool int offset
#define __device__
Definition: metal/compat.h:248
__device__ void gpu_parallel_prefix_sum(const int global_id, ccl_global int *counter, ccl_global int *prefix_sum, const int num_values)