Blender  V3.3
COM_GaussianYBlurOperation.cc
Go to the documentation of this file.
1 /* SPDX-License-Identifier: GPL-2.0-or-later
2  * Copyright 2011 Blender Foundation. */
3 
5 #include "COM_OpenCLDevice.h"
6 
7 namespace blender::compositor {
8 
10 {
11 }
12 
14 {
15  lock_mutex();
16  if (!sizeavailable_) {
17  update_gauss();
18  }
19  void *buffer = get_input_operation(0)->initialize_tile_data(nullptr);
20  unlock_mutex();
21  return buffer;
22 }
23 
25 {
27 
28  init_mutex();
29 
31  float rad = max_ff(size_ * data_.sizey, 0.0f);
33 
35 #ifdef BLI_HAVE_SSE2
36  gausstab_sse_ = BlurBaseOperation::convert_gausstab_sse(gausstab_, filtersize_);
37 #endif
38  }
39 }
40 
41 void GaussianYBlurOperation::update_gauss()
42 {
43  if (gausstab_ == nullptr) {
44  update_size();
45  float rad = max_ff(size_ * data_.sizey, 0.0f);
46  rad = min_ff(rad, MAX_GAUSSTAB_RADIUS);
48 
50 #ifdef BLI_HAVE_SSE2
51  gausstab_sse_ = BlurBaseOperation::convert_gausstab_sse(gausstab_, filtersize_);
52 #endif
53  }
54 }
55 
56 void GaussianYBlurOperation::execute_pixel(float output[4], int x, int y, void *data)
57 {
58  float ATTR_ALIGN(16) color_accum[4] = {0.0f, 0.0f, 0.0f, 0.0f};
59  float multiplier_accum = 0.0f;
60  MemoryBuffer *input_buffer = (MemoryBuffer *)data;
61  const rcti &input_rect = input_buffer->get_rect();
62  float *buffer = input_buffer->get_buffer();
63  int bufferwidth = input_buffer->get_width();
64  int bufferstartx = input_rect.xmin;
65  int bufferstarty = input_rect.ymin;
66 
67  int xmin = max_ii(x, input_rect.xmin);
68  int ymin = max_ii(y - filtersize_, input_rect.ymin);
69  int ymax = min_ii(y + filtersize_ + 1, input_rect.ymax);
70 
71  int index;
72  int step = get_step();
73  const int buffer_indexx = ((xmin - bufferstartx) * 4);
74 
75 #ifdef BLI_HAVE_SSE2
76  __m128 accum_r = _mm_load_ps(color_accum);
77  for (int ny = ymin; ny < ymax; ny += step) {
78  index = (ny - y) + filtersize_;
79  int bufferindex = buffer_indexx + ((ny - bufferstarty) * 4 * bufferwidth);
80  const float multiplier = gausstab_[index];
81  __m128 reg_a = _mm_load_ps(&buffer[bufferindex]);
82  reg_a = _mm_mul_ps(reg_a, gausstab_sse_[index]);
83  accum_r = _mm_add_ps(accum_r, reg_a);
84  multiplier_accum += multiplier;
85  }
86  _mm_store_ps(color_accum, accum_r);
87 #else
88  for (int ny = ymin; ny < ymax; ny += step) {
89  index = (ny - y) + filtersize_;
90  int bufferindex = buffer_indexx + ((ny - bufferstarty) * 4 * bufferwidth);
91  const float multiplier = gausstab_[index];
92  madd_v4_v4fl(color_accum, &buffer[bufferindex], multiplier);
93  multiplier_accum += multiplier;
94  }
95 #endif
96  mul_v4_v4fl(output, color_accum, 1.0f / multiplier_accum);
97 }
98 
100  MemoryBuffer *output_memory_buffer,
101  cl_mem cl_output_buffer,
102  MemoryBuffer **input_memory_buffers,
103  std::list<cl_mem> *cl_mem_to_clean_up,
104  std::list<cl_kernel> * /*cl_kernels_to_clean_up*/)
105 {
106  cl_kernel gaussian_yblur_operation_kernel = device->COM_cl_create_kernel(
107  "gaussian_yblur_operation_kernel", nullptr);
108  cl_int filter_size = filtersize_;
109 
110  cl_mem gausstab = clCreateBuffer(device->get_context(),
111  CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
112  sizeof(float) * (filtersize_ * 2 + 1),
113  gausstab_,
114  nullptr);
115 
116  device->COM_cl_attach_memory_buffer_to_kernel_parameter(gaussian_yblur_operation_kernel,
117  0,
118  1,
119  cl_mem_to_clean_up,
120  input_memory_buffers,
123  gaussian_yblur_operation_kernel, 2, cl_output_buffer);
125  gaussian_yblur_operation_kernel, 3, output_memory_buffer);
126  clSetKernelArg(gaussian_yblur_operation_kernel, 4, sizeof(cl_int), &filter_size);
127  device->COM_cl_attach_size_to_kernel_parameter(gaussian_yblur_operation_kernel, 5, this);
128  clSetKernelArg(gaussian_yblur_operation_kernel, 6, sizeof(cl_mem), &gausstab);
129 
130  device->COM_cl_enqueue_range(gaussian_yblur_operation_kernel, output_memory_buffer, 7, this);
131 
132  clReleaseMemObject(gausstab);
133 }
134 
136 {
138 
139  if (gausstab_) {
141  gausstab_ = nullptr;
142  }
143 #ifdef BLI_HAVE_SSE2
144  if (gausstab_sse_) {
145  MEM_freeN(gausstab_sse_);
146  gausstab_sse_ = nullptr;
147  }
148 #endif
149 
150  deinit_mutex();
151 }
152 
154  rcti *input, ReadBufferOperation *read_operation, rcti *output)
155 {
156  rcti new_input;
157 
158  if (!sizeavailable_) {
159  rcti size_input;
160  size_input.xmin = 0;
161  size_input.ymin = 0;
162  size_input.xmax = 5;
163  size_input.ymax = 5;
164  NodeOperation *operation = this->get_input_operation(1);
165  if (operation->determine_depending_area_of_interest(&size_input, read_operation, output)) {
166  return true;
167  }
168  }
169  {
170  if (sizeavailable_ && gausstab_ != nullptr) {
171  new_input.xmax = input->xmax;
172  new_input.xmin = input->xmin;
173  new_input.ymax = input->ymax + filtersize_ + 1;
174  new_input.ymin = input->ymin - filtersize_ - 1;
175  }
176  else {
177  new_input.xmax = this->get_width();
178  new_input.xmin = 0;
179  new_input.ymax = this->get_height();
180  new_input.ymin = 0;
181  }
182  return NodeOperation::determine_depending_area_of_interest(&new_input, read_operation, output);
183  }
184 }
185 
186 } // namespace blender::compositor
#define ATTR_ALIGN(x)
MINLINE float max_ff(float a, float b)
MINLINE int min_ii(int a, int b)
MINLINE float min_ff(float a, float b)
MINLINE int max_ii(int a, int b)
MINLINE void mul_v4_v4fl(float r[4], const float a[4], float f)
MINLINE void madd_v4_v4fl(float r[4], const float a[4], float f)
#define MAX_GAUSSTAB_RADIUS
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint GLsizei GLsizei GLenum type _GL_VOID_RET _GL_VOID GLsizei GLenum GLenum const void *pixels _GL_VOID_RET _GL_VOID const void *pointer _GL_VOID_RET _GL_VOID GLdouble v _GL_VOID_RET _GL_VOID GLfloat v _GL_VOID_RET _GL_VOID GLint GLint i2 _GL_VOID_RET _GL_VOID GLint j _GL_VOID_RET _GL_VOID GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble GLdouble GLdouble zFar _GL_VOID_RET _GL_UINT GLdouble *equation _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLenum GLfloat *v _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLfloat *values _GL_VOID_RET _GL_VOID GLushort *values _GL_VOID_RET _GL_VOID GLenum GLfloat *params _GL_VOID_RET _GL_VOID GLenum GLdouble *params _GL_VOID_RET _GL_VOID GLenum GLint *params _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_VOID GLsizei const void *pointer _GL_VOID_RET _GL_BOOL GLfloat param _GL_VOID_RET _GL_VOID GLint param _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLushort pattern _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLint GLdouble GLdouble GLint GLint const GLdouble *points _GL_VOID_RET _GL_VOID GLdouble GLdouble u2 _GL_VOID_RET _GL_VOID GLdouble GLdouble GLint GLdouble GLdouble v2 _GL_VOID_RET _GL_VOID GLenum GLfloat param _GL_VOID_RET _GL_VOID GLenum GLint param _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLdouble ny
_GL_VOID GLfloat value _GL_VOID_RET _GL_VOID const GLuint GLboolean *residences _GL_BOOL_RET _GL_VOID GLsizei GLfloat GLfloat GLfloat GLfloat const GLubyte *bitmap _GL_VOID_RET _GL_VOID GLenum const void *lists _GL_VOID_RET _GL_VOID const GLdouble *equation _GL_VOID_RET _GL_VOID GLdouble GLdouble blue _GL_VOID_RET _GL_VOID GLfloat GLfloat blue _GL_VOID_RET _GL_VOID GLint GLint blue _GL_VOID_RET _GL_VOID GLshort GLshort blue _GL_VOID_RET _GL_VOID GLubyte GLubyte blue _GL_VOID_RET _GL_VOID GLuint GLuint blue _GL_VOID_RET _GL_VOID GLushort GLushort blue _GL_VOID_RET _GL_VOID GLbyte GLbyte GLbyte alpha _GL_VOID_RET _GL_VOID GLdouble GLdouble GLdouble alpha _GL_VOID_RET _GL_VOID GLfloat GLfloat GLfloat alpha _GL_VOID_RET _GL_VOID GLint GLint GLint alpha _GL_VOID_RET _GL_VOID GLshort GLshort GLshort alpha _GL_VOID_RET _GL_VOID GLubyte GLubyte GLubyte alpha _GL_VOID_RET _GL_VOID GLuint GLuint GLuint alpha _GL_VOID_RET _GL_VOID GLushort GLushort GLushort alpha _GL_VOID_RET _GL_VOID GLenum mode _GL_VOID_RET _GL_VOID GLint y
#define Y
Definition: GeomUtils.cpp:200
float * make_gausstab(float rad, int size)
bool determine_depending_area_of_interest(rcti *input, ReadBufferOperation *read_operation, rcti *output) override
void execute_opencl(OpenCLDevice *device, MemoryBuffer *output_memory_buffer, cl_mem cl_output_buffer, MemoryBuffer **input_memory_buffers, std::list< cl_mem > *cl_mem_to_clean_up, std::list< cl_kernel > *cl_kernels_to_clean_up) override
custom handle to add new tasks to the OpenCL command queue in order to execute a chunk on an GPUDevic...
void init_execution() override
initialize the execution
void execute_pixel(float output[4], int x, int y, void *data) override
a MemoryBuffer contains access to the data of a chunk
const rcti & get_rect() const
get the rect of this MemoryBuffer
const int get_width() const
get the width of this MemoryBuffer
float * get_buffer()
get the data of this MemoryBuffer
NodeOperation contains calculation logic.
NodeOperation * get_input_operation(int index)
virtual bool determine_depending_area_of_interest(rcti *input, ReadBufferOperation *read_operation, rcti *output)
virtual void * initialize_tile_data(rcti *)
device representing an GPU OpenCL device. an instance of this class represents a single cl_device
void COM_cl_attach_size_to_kernel_parameter(cl_kernel kernel, int offset_index, NodeOperation *operation)
cl_mem COM_cl_attach_memory_buffer_to_kernel_parameter(cl_kernel kernel, int parameter_index, int offset_index, std::list< cl_mem > *cleanup, MemoryBuffer **input_memory_buffers, SocketReader *reader)
void COM_cl_attach_output_memory_buffer_to_kernel_parameter(cl_kernel kernel, int parameter_index, cl_mem cl_output_memory_buffer)
void COM_cl_enqueue_range(cl_kernel kernel, MemoryBuffer *output_memory_buffer)
cl_kernel COM_cl_create_kernel(const char *kernelname, std::list< cl_kernel > *cl_kernels_to_clean_up)
void COM_cl_attach_memory_buffer_offset_to_kernel_parameter(cl_kernel kernel, int offset_index, MemoryBuffer *memory_buffers)
ccl_global float * buffer
ccl_global KernelShaderEvalInput ccl_global float * output
ccl_global KernelShaderEvalInput * input
void(* MEM_freeN)(void *vmemh)
Definition: mallocn.c:27
ccl_device_inline float3 ceil(const float3 &a)
Definition: math_float3.h:363
int ymin
Definition: DNA_vec_types.h:64
int ymax
Definition: DNA_vec_types.h:64
int xmin
Definition: DNA_vec_types.h:63
int xmax
Definition: DNA_vec_types.h:63