[gegl] gaussian-blur-selective: Add CL implementation
- From: Øyvind Kolås <ok src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gegl] gaussian-blur-selective: Add CL implementation
- Date: Mon, 22 May 2017 21:17:52 +0000 (UTC)
commit b60aa0177857490c9e62e58ea9746004dc34e290
Author: Anton Gorenko <anton streamcomputing eu>
Date: Fri May 12 16:37:25 2017 +0600
gaussian-blur-selective: Add CL implementation
opencl/gaussian-blur-selective.cl | 46 +++++++++++
opencl/gaussian-blur-selective.cl.h | 48 +++++++++++
operations/common/gaussian-blur-selective.c | 118 ++++++++++++++++++++++++++-
3 files changed, 211 insertions(+), 1 deletions(-)
---
diff --git a/opencl/gaussian-blur-selective.cl b/opencl/gaussian-blur-selective.cl
new file mode 100644
index 0000000..34a96a4
--- /dev/null
+++ b/opencl/gaussian-blur-selective.cl
@@ -0,0 +1,46 @@
+kernel void
+cl_gblur_selective(global const float4 *in,
+ global const float4 *delta,
+ global float4 *out,
+ const float radius,
+ const float max_delta)
+{
+ const int gidx = get_global_id(0);
+ const int gidy = get_global_id(1);
+ const int iradius = (int)radius;
+ const int dst_width = get_global_size(0);
+ const int src_width = dst_width + iradius * 2;
+
+ const int center_gid1d = (gidy + iradius) * src_width + gidx + iradius;
+ const float4 center_pix = in[center_gid1d];
+ const float3 center_delta = delta[center_gid1d].xyz;
+
+ float3 accumulated = 0.0f;
+ float3 count = 0.0f;
+
+ for (int v = -iradius; v <= iradius; v++)
+ {
+ for (int u = -iradius; u <= iradius; u++)
+ {
+ const int i = gidx + iradius + u;
+ const int j = gidy + iradius + v;
+ const int gid1d = i + j * src_width;
+
+ const float4 src_pix = in[gid1d];
+ const float3 delta_pix = delta[gid1d].xyz;
+
+ const float gaussian_weight = exp(-0.5f * (u * u + v * v) / radius);
+
+ const float weight = gaussian_weight * src_pix.w;
+ const float3 diff = center_delta - delta_pix;
+ const float3 w = convert_float3 (fabs (diff) <= max_delta);
+ accumulated += w * weight * src_pix.xyz;
+ count += w * weight;
+ }
+ }
+
+ const float3 out_v = select (center_pix.xyz,
+ accumulated / count,
+ count != 0.0f);
+ out[gidx + gidy * dst_width] = (float4)(out_v, center_pix.w);
+}
diff --git a/opencl/gaussian-blur-selective.cl.h b/opencl/gaussian-blur-selective.cl.h
new file mode 100644
index 0000000..ea41142
--- /dev/null
+++ b/opencl/gaussian-blur-selective.cl.h
@@ -0,0 +1,48 @@
+static const char* gaussian_blur_selective_cl_source =
+"kernel void \n"
+"cl_gblur_selective(global const float4 *in, \n"
+" global const float4 *delta, \n"
+" global float4 *out, \n"
+" const float radius, \n"
+" const float max_delta) \n"
+"{ \n"
+" const int gidx = get_global_id(0); \n"
+" const int gidy = get_global_id(1); \n"
+" const int iradius = (int)radius; \n"
+" const int dst_width = get_global_size(0); \n"
+" const int src_width = dst_width + iradius * 2; \n"
+" \n"
+" const int center_gid1d = (gidy + iradius) * src_width + gidx + iradius; \n"
+" const float4 center_pix = in[center_gid1d]; \n"
+" const float3 center_delta = delta[center_gid1d].xyz; \n"
+" \n"
+" float3 accumulated = 0.0f; \n"
+" float3 count = 0.0f; \n"
+" \n"
+" for (int v = -iradius; v <= iradius; v++) \n"
+" { \n"
+" for (int u = -iradius; u <= iradius; u++) \n"
+" { \n"
+" const int i = gidx + iradius + u; \n"
+" const int j = gidy + iradius + v; \n"
+" const int gid1d = i + j * src_width; \n"
+" \n"
+" const float4 src_pix = in[gid1d]; \n"
+" const float3 delta_pix = delta[gid1d].xyz; \n"
+" \n"
+" const float gaussian_weight = exp(-0.5f * (u * u + v * v) / radius);\n"
+" \n"
+" const float weight = gaussian_weight * src_pix.w; \n"
+" const float3 diff = center_delta - delta_pix; \n"
+" const float3 w = convert_float3 (fabs (diff) <= max_delta); \n"
+" accumulated += w * weight * src_pix.xyz; \n"
+" count += w * weight; \n"
+" } \n"
+" } \n"
+" \n"
+" const float3 out_v = select (center_pix.xyz, \n"
+" accumulated / count, \n"
+" count != 0.0f); \n"
+" out[gidx + gidy * dst_width] = (float4)(out_v, center_pix.w); \n"
+"} \n"
+;
diff --git a/operations/common/gaussian-blur-selective.c b/operations/common/gaussian-blur-selective.c
index fd477fe..ad6106b 100644
--- a/operations/common/gaussian-blur-selective.c
+++ b/operations/common/gaussian-blur-selective.c
@@ -218,6 +218,118 @@ gblur_selective (GeglBuffer *input,
return TRUE;
}
+#include "opencl/gegl-cl.h"
+#include "gegl-buffer-cl-iterator.h"
+
+#include "opencl/gaussian-blur-selective.cl.h"
+
+static GeglClRunData *cl_data = NULL;
+
+static gboolean
+cl_gblur_selective (cl_mem in,
+ cl_mem delta,
+ cl_mem out,
+ size_t global_worksize,
+ const GeglRectangle *roi,
+ gfloat radius,
+ gfloat max_delta)
+{
+ cl_int cl_err = 0;
+ size_t global_ws[2];
+
+ if (!cl_data)
+ {
+ const char *kernel_name[] = { "cl_gblur_selective", NULL };
+ cl_data = gegl_cl_compile_and_build (gaussian_blur_selective_cl_source,
+ kernel_name);
+ }
+
+ if (!cl_data)
+ return TRUE;
+
+ global_ws[0] = roi->width;
+ global_ws[1] = roi->height;
+
+ gegl_cl_set_kernel_args (cl_data->kernel[0],
+ sizeof(cl_mem), &in,
+ sizeof(cl_mem), &delta,
+ sizeof(cl_mem), &out,
+ sizeof(cl_float), &radius,
+ sizeof(cl_float), &max_delta,
+ NULL);
+ CL_CHECK;
+
+ cl_err = gegl_clEnqueueNDRangeKernel (gegl_cl_get_command_queue (),
+ cl_data->kernel[0], 2,
+ NULL, global_ws, NULL,
+ 0, NULL, NULL);
+ CL_CHECK;
+
+ return FALSE;
+
+error:
+ return TRUE;
+}
+
+static gboolean
+cl_process (GeglOperation *operation,
+ GeglBuffer *input,
+ GeglBuffer *aux,
+ GeglBuffer *output,
+ const GeglRectangle *result)
+{
+ const Babl *in_format = gegl_operation_get_format (operation, "input");
+ const Babl *aux_format = gegl_operation_get_format (operation, "aux");
+ const Babl *out_format = gegl_operation_get_format (operation, "output");
+ gint err;
+
+ GeglProperties *o = GEGL_PROPERTIES (operation);
+
+ GeglBufferClIterator *i = gegl_buffer_cl_iterator_new (output,
+ result,
+ out_format,
+ GEGL_CL_BUFFER_WRITE);
+
+ gint radius = o->blur_radius;
+
+ gint read = gegl_buffer_cl_iterator_add_2 (i,
+ input,
+ result,
+ in_format,
+ GEGL_CL_BUFFER_READ,
+ radius, radius, radius, radius,
+ GEGL_ABYSS_CLAMP);
+
+ gint delta = !aux ?
+ read :
+ gegl_buffer_cl_iterator_add_2 (i,
+ aux,
+ result,
+ aux_format,
+ GEGL_CL_BUFFER_READ,
+ radius, radius, radius, radius,
+ GEGL_ABYSS_CLAMP);
+
+ while (gegl_buffer_cl_iterator_next (i, &err))
+ {
+ if (err)
+ return FALSE;
+
+ err = cl_gblur_selective(i->tex[read],
+ i->tex[delta],
+ i->tex[0],
+ i->size[0],
+ &i->roi[0],
+ o->blur_radius,
+ o->max_delta);
+
+ if (err)
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
static gboolean
process (GeglOperation *operation,
GeglBuffer *input,
@@ -232,6 +344,10 @@ process (GeglOperation *operation,
compute = get_required_for_output (operation, "input", result);
+ if (gegl_operation_use_opencl (operation))
+ if (cl_process (operation, input, aux, output, result))
+ return TRUE;
+
success = gblur_selective (input, &compute,
aux,
output, result,
@@ -251,7 +367,7 @@ gegl_op_class_init (GeglOpClass *klass)
operation_class->prepare = prepare;
operation_class->get_required_for_output = get_required_for_output;
operation_class->get_invalidated_by_change = get_invalidated_by_change;
- operation_class->opencl_support = FALSE;
+ operation_class->opencl_support = TRUE;
composer_class->process = process;
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]