[gegl/opencl-ops: 11/14] Add OpenCL support for gaussian-blur
- From: Ãyvind KolÃs <ok src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gegl/opencl-ops: 11/14] Add OpenCL support for gaussian-blur
- Date: Tue, 20 Mar 2012 13:50:42 +0000 (UTC)
commit d1f7524f1c24ee155a53459e3607916d905c297d
Author: Victor Oliveira <victormatheus gmail com>
Date: Sat Mar 17 23:09:47 2012 -0300
Add OpenCL support for gaussian-blur
operations/common/gaussian-blur.c | 231 ++++++++++++++++++++++++++++++++++++-
1 files changed, 228 insertions(+), 3 deletions(-)
---
diff --git a/operations/common/gaussian-blur.c b/operations/common/gaussian-blur.c
index 57b986b..15b7499 100644
--- a/operations/common/gaussian-blur.c
+++ b/operations/common/gaussian-blur.c
@@ -407,11 +407,231 @@ static void prepare (GeglOperation *operation)
area->left = area->right = ceil ( max (fir_radius_x, iir_radius_x));
area->top = area->bottom = ceil ( max (fir_radius_y, iir_radius_y));
+ gegl_operation_set_format (operation, "input",
+ babl_format ("RaGaBaA float"));
gegl_operation_set_format (operation, "output",
babl_format ("RaGaBaA float"));
#undef max
}
+#include "opencl/gegl-cl.h"
+#include "buffer/gegl-buffer-cl-iterator.h"
+
+static const char* kernel_source =
+"float4 fir_get_mean_component_1D_CL(const global float4 *buf, \n"
+" int offset, \n"
+" const int delta_offset, \n"
+" constant float *cmatrix, \n"
+" const int matrix_length) \n"
+"{ \n"
+" float4 acc = 0.0f; \n"
+" int i; \n"
+" \n"
+" for(i=0; i<matrix_length; i++) \n"
+" { \n"
+" acc += buf[offset] * cmatrix[i]; \n"
+" offset += delta_offset; \n"
+" } \n"
+" return acc; \n"
+"} \n"
+" \n"
+"__kernel void fir_ver_blur_CL(const global float4 *src_buf, \n"
+" const int src_width, \n"
+" global float4 *dst_buf, \n"
+" constant float *cmatrix, \n"
+" const int matrix_length, \n"
+" const int yoff) \n"
+"{ \n"
+" int gidx = get_global_id(0); \n"
+" int gidy = get_global_id(1); \n"
+" int gid = gidx + gidy * get_global_size(0); \n"
+" \n"
+" int radius = matrix_length / 2; \n"
+" int src_offset = gidx + (gidy - radius + yoff) * src_width; \n"
+" \n"
+" dst_buf[gid] = fir_get_mean_component_1D_CL( \n"
+" src_buf, src_offset, src_width, cmatrix, matrix_length); \n"
+"} \n"
+" \n"
+"__kernel void fir_hor_blur_CL(const global float4 *src_buf, \n"
+" const int src_width, \n"
+" global float4 *dst_buf, \n"
+" constant float *cmatrix, \n"
+" const int matrix_length, \n"
+" const int yoff) \n"
+"{ \n"
+" int gidx = get_global_id(0); \n"
+" int gidy = get_global_id(1); \n"
+" int gid = gidx + gidy * get_global_size(0); \n"
+" \n"
+" int radius = matrix_length / 2; \n"
+" int src_offset = gidy * src_width + (gidx - radius + yoff); \n"
+" \n"
+" dst_buf[gid] = fir_get_mean_component_1D_CL( \n"
+" src_buf, src_offset, 1, cmatrix, matrix_length); \n"
+"} \n";
+
+static gegl_cl_run_data *cl_data = NULL;
+
+static cl_int
+cl_gaussian_blur (cl_mem in_tex,
+ cl_mem out_tex,
+ cl_mem aux_tex,
+ size_t global_worksize,
+ const GeglRectangle *roi,
+ const GeglRectangle *src_rect,
+ const GeglRectangle *aux_rect,
+ gfloat *dmatrix_x,
+ gint matrix_length_x,
+ gint xoff,
+ gfloat *dmatrix_y,
+ gint matrix_length_y,
+ gint yoff)
+{
+ cl_int cl_err = 0;
+
+ size_t global_ws[2];
+ global_ws[0] = roi->width;
+ global_ws[1] = roi->height;
+
+ if (!cl_data)
+ {
+ const char *kernel_name[] = {"fir_ver_blur_CL", "fir_hor_blur_CL", NULL};
+ cl_data = gegl_cl_compile_and_build (kernel_source, kernel_name);
+ }
+
+ if (!cl_data) return 1;
+
+ cl_mem cl_matrix_x = gegl_clCreateBuffer(gegl_cl_get_context(),
+ CL_MEM_ALLOC_HOST_PTR|CL_MEM_READ_ONLY,
+ matrix_length_x * sizeof(cl_float), NULL, &cl_err);
+ if (cl_err != CL_SUCCESS) return cl_err;
+
+ cl_err = gegl_clEnqueueWriteBuffer(gegl_cl_get_command_queue(), cl_matrix_x,
+ CL_TRUE, NULL, matrix_length_x * sizeof(cl_float), dmatrix_x,
+ NULL, NULL, NULL);
+ if (cl_err != CL_SUCCESS) return cl_err;
+
+ cl_mem cl_matrix_y = gegl_clCreateBuffer(gegl_cl_get_context(),
+ CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY,
+ matrix_length_y * sizeof(cl_float), NULL, &cl_err);
+ if (cl_err != CL_SUCCESS) return cl_err;
+
+ cl_err = gegl_clEnqueueWriteBuffer(gegl_cl_get_command_queue(), cl_matrix_y,
+ CL_TRUE, NULL, matrix_length_y * sizeof(cl_float), dmatrix_y,
+ NULL, NULL, NULL);
+ if (cl_err != CL_SUCCESS) return cl_err;
+
+ global_ws[0] = aux_rect->width;
+ global_ws[1] = aux_rect->height;
+
+ cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 0, sizeof(cl_mem), (void*)&in_tex);
+ cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 1, sizeof(cl_int), (void*)&src_rect->width);
+ cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 2, sizeof(cl_mem), (void*)&aux_tex);
+ cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 3, sizeof(cl_mem), (void*)&cl_matrix_x);
+ cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 4, sizeof(cl_int), (void*)&matrix_length_x);
+ cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 5, sizeof(cl_int), (void*)&xoff);
+ if (cl_err != CL_SUCCESS) return cl_err;
+
+ cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
+ cl_data->kernel[1], 2,
+ NULL, global_ws, NULL,
+ 0, NULL, NULL);
+ if (cl_err != CL_SUCCESS) return cl_err;
+
+ global_ws[0] = roi->width;
+ global_ws[1] = roi->height;
+
+ cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&aux_tex);
+ cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_int), (void*)&aux_rect->width);
+ cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_mem), (void*)&out_tex);
+ cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_mem), (void*)&cl_matrix_y);
+ cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 4, sizeof(cl_int), (void*)&matrix_length_y);
+ cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 5, sizeof(cl_int), (void*)&yoff);
+ if (cl_err != CL_SUCCESS) return cl_err;
+
+ cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
+ cl_data->kernel[0], 2,
+ NULL, global_ws, NULL,
+ 0, NULL, NULL);
+ if (cl_err != CL_SUCCESS) return cl_err;
+
+ gegl_clFinish(gegl_cl_get_command_queue ());
+
+ gegl_clReleaseMemObject(cl_matrix_x);
+ gegl_clReleaseMemObject(cl_matrix_y);
+ return cl_err;
+}
+
+static gboolean
+cl_process (GeglOperation *operation,
+ GeglBuffer *input,
+ GeglBuffer *output,
+ const GeglRectangle *result)
+{
+ const Babl *in_format = gegl_operation_get_format (operation, "input");
+ const Babl *out_format = gegl_operation_get_format (operation, "output");
+ gint err;
+ gint j;
+ cl_int cl_err;
+
+ GeglOperationAreaFilter *op_area = GEGL_OPERATION_AREA_FILTER (operation);
+ GeglChantO *o = GEGL_CHANT_PROPERTIES (operation);
+
+ gdouble B_x, b_x[4], B_y, b_y[4];
+ gdouble *cmatrix_x, *cmatrix_y;
+ gint cmatrix_len_x, cmatrix_len_y;
+
+ cmatrix_len_x = fir_gen_convolve_matrix (o->std_dev_x, &cmatrix_x);
+ cmatrix_len_y = fir_gen_convolve_matrix (o->std_dev_y, &cmatrix_y);
+
+ gfloat *fmatrix_x = g_new (gfloat, cmatrix_len_x);
+ gfloat *fmatrix_y = g_new (gfloat, cmatrix_len_y);
+
+ for(j=0; j<cmatrix_len_x; j++)
+ fmatrix_x[j] = (gfloat) cmatrix_x[j];
+
+ for(j=0; j<cmatrix_len_y; j++)
+ fmatrix_y[j] = (gfloat) cmatrix_y[j];
+
+ GeglBufferClIterator *i = gegl_buffer_cl_iterator_new (output, result, out_format, GEGL_CL_BUFFER_WRITE);
+ gint read = gegl_buffer_cl_iterator_add_2 (i, input, result, in_format, GEGL_CL_BUFFER_READ,
+ op_area->left, op_area->right, op_area->top, op_area->bottom);
+ gint aux = gegl_buffer_cl_iterator_add_2 (i, NULL, result, in_format, GEGL_CL_BUFFER_AUX,
+ 0, 0, op_area->top, op_area->bottom);
+ while (gegl_buffer_cl_iterator_next (i, &err))
+ {
+ if (err) return FALSE;
+ for (j=0; j < i->n; j++)
+ {
+ cl_err = cl_gaussian_blur(i->tex[read][j],
+ i->tex[0][j],
+ i->tex[aux][j],
+ i->size[0][j],
+ &i->roi[0][j],
+ &i->roi[read][j],
+ &i->roi[aux][j],
+ fmatrix_x,
+ cmatrix_len_x,
+ op_area->left,
+ fmatrix_y,
+ cmatrix_len_y,
+ op_area->top);
+ if (cl_err != CL_SUCCESS)
+ {
+ g_warning("[OpenCL] Error in gegl:gaussian-blur");
+ return FALSE;
+ }
+ }
+ }
+
+ g_free (fmatrix_x);
+ g_free (fmatrix_y);
+
+ g_free (cmatrix_x);
+ g_free (cmatrix_y);
+ return TRUE;
+}
static gboolean
process (GeglOperation *operation,
@@ -436,14 +656,18 @@ process (GeglOperation *operation,
rect.y = result->y - op_area->top;
rect.height = result->height + op_area->top + op_area->bottom;
+ force_iir = o->filter && !strcmp (o->filter, "iir");
+ force_fir = o->filter && !strcmp (o->filter, "fir");
+
+ if (cl_state.is_accelerated && !force_iir)
+ if (cl_process(operation, input, output, result))
+ return TRUE;
+
temp_extend = rect;
temp_extend.x = result->x;
temp_extend.width = result->width;
temp = gegl_buffer_new (&temp_extend, babl_format ("RaGaBaA float"));
- force_iir = o->filter && !strcmp (o->filter, "iir");
- force_fir = o->filter && !strcmp (o->filter, "fir");
-
if ((force_iir || o->std_dev_x > 1.0) && !force_fir)
{
iir_young_find_constants (o->std_dev_x, &B, b);
@@ -489,6 +713,7 @@ gegl_chant_class_init (GeglChantClass *klass)
operation_class->prepare = prepare;
operation_class->categories = "blur";
+ operation_class->opencl_support = TRUE;
operation_class->name = "gegl:gaussian-blur";
operation_class->description =
_("Performs an averaging of neighbouring pixels with the "
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]