[gegl] gaussian-blur-selective: Add CL implementation

From: Øyvind Kolås <ok src gnome org>
To: commits-list gnome org
Cc:
Subject: [gegl] gaussian-blur-selective: Add CL implementation
Date: Mon, 22 May 2017 21:17:52 +0000 (UTC)
commit b60aa0177857490c9e62e58ea9746004dc34e290
Author: Anton Gorenko <anton streamcomputing eu>
Date:   Fri May 12 16:37:25 2017 +0600

    gaussian-blur-selective: Add CL implementation

 opencl/gaussian-blur-selective.cl           |   46 +++++++++++
 opencl/gaussian-blur-selective.cl.h         |   48 +++++++++++
 operations/common/gaussian-blur-selective.c |  118 ++++++++++++++++++++++++++-
 3 files changed, 211 insertions(+), 1 deletions(-)
---
diff --git a/opencl/gaussian-blur-selective.cl b/opencl/gaussian-blur-selective.cl
new file mode 100644
index 0000000..34a96a4
--- /dev/null
+++ b/opencl/gaussian-blur-selective.cl
@@ -0,0 +1,46 @@
+kernel void
+cl_gblur_selective(global const float4 *in,
+                   global const float4 *delta,
+                   global       float4 *out,
+                          const float   radius,
+                          const float   max_delta)
+{
+  const int gidx       = get_global_id(0);
+  const int gidy       = get_global_id(1);
+  const int iradius    = (int)radius;
+  const int dst_width  = get_global_size(0);
+  const int src_width  = dst_width + iradius * 2;
+
+  const int center_gid1d = (gidy + iradius) * src_width + gidx + iradius;
+  const float4 center_pix = in[center_gid1d];
+  const float3 center_delta = delta[center_gid1d].xyz;
+
+  float3 accumulated = 0.0f;
+  float3 count       = 0.0f;
+
+  for (int v = -iradius; v <= iradius; v++)
+    {
+      for (int u = -iradius; u <= iradius; u++)
+        {
+          const int i = gidx + iradius + u;
+          const int j = gidy + iradius + v;
+          const int gid1d = i + j * src_width;
+
+          const float4 src_pix = in[gid1d];
+          const float3 delta_pix = delta[gid1d].xyz;
+
+          const float gaussian_weight = exp(-0.5f * (u * u + v * v) / radius);
+
+          const float weight = gaussian_weight * src_pix.w;
+          const float3 diff = center_delta - delta_pix;
+          const float3 w = convert_float3 (fabs (diff) <= max_delta);
+          accumulated += w * weight * src_pix.xyz;
+          count += w * weight;
+        }
+    }
+
+  const float3 out_v = select (center_pix.xyz,
+                               accumulated / count,
+                               count != 0.0f);
+  out[gidx + gidy * dst_width] = (float4)(out_v, center_pix.w);
+}
diff --git a/opencl/gaussian-blur-selective.cl.h b/opencl/gaussian-blur-selective.cl.h
new file mode 100644
index 0000000..ea41142
--- /dev/null
+++ b/opencl/gaussian-blur-selective.cl.h
@@ -0,0 +1,48 @@
+static const char* gaussian_blur_selective_cl_source =
+"kernel void                                                                   \n"
+"cl_gblur_selective(global const float4 *in,                                   \n"
+"                   global const float4 *delta,                                \n"
+"                   global       float4 *out,                                  \n"
+"                          const float   radius,                               \n"
+"                          const float   max_delta)                            \n"
+"{                                                                             \n"
+"  const int gidx       = get_global_id(0);                                    \n"
+"  const int gidy       = get_global_id(1);                                    \n"
+"  const int iradius    = (int)radius;                                         \n"
+"  const int dst_width  = get_global_size(0);                                  \n"
+"  const int src_width  = dst_width + iradius * 2;                             \n"
+"                                                                              \n"
+"  const int center_gid1d = (gidy + iradius) * src_width + gidx + iradius;     \n"
+"  const float4 center_pix = in[center_gid1d];                                 \n"
+"  const float3 center_delta = delta[center_gid1d].xyz;                        \n"
+"                                                                              \n"
+"  float3 accumulated = 0.0f;                                                  \n"
+"  float3 count       = 0.0f;                                                  \n"
+"                                                                              \n"
+"  for (int v = -iradius; v <= iradius; v++)                                   \n"
+"    {                                                                         \n"
+"      for (int u = -iradius; u <= iradius; u++)                               \n"
+"        {                                                                     \n"
+"          const int i = gidx + iradius + u;                                   \n"
+"          const int j = gidy + iradius + v;                                   \n"
+"          const int gid1d = i + j * src_width;                                \n"
+"                                                                              \n"
+"          const float4 src_pix = in[gid1d];                                   \n"
+"          const float3 delta_pix = delta[gid1d].xyz;                          \n"
+"                                                                              \n"
+"          const float gaussian_weight = exp(-0.5f * (u * u + v * v) / radius);\n"
+"                                                                              \n"
+"          const float weight = gaussian_weight * src_pix.w;                   \n"
+"          const float3 diff = center_delta - delta_pix;                       \n"
+"          const float3 w = convert_float3 (fabs (diff) <= max_delta);         \n"
+"          accumulated += w * weight * src_pix.xyz;                            \n"
+"          count += w * weight;                                                \n"
+"        }                                                                     \n"
+"    }                                                                         \n"
+"                                                                              \n"
+"  const float3 out_v = select (center_pix.xyz,                                \n"
+"                               accumulated / count,                           \n"
+"                               count != 0.0f);                                \n"
+"  out[gidx + gidy * dst_width] = (float4)(out_v, center_pix.w);               \n"
+"}                                                                             \n"
+;
diff --git a/operations/common/gaussian-blur-selective.c b/operations/common/gaussian-blur-selective.c
index fd477fe..ad6106b 100644
--- a/operations/common/gaussian-blur-selective.c
+++ b/operations/common/gaussian-blur-selective.c
@@ -218,6 +218,118 @@ gblur_selective (GeglBuffer          *input,
   return TRUE;
 }
 
+#include "opencl/gegl-cl.h"
+#include "gegl-buffer-cl-iterator.h"
+
+#include "opencl/gaussian-blur-selective.cl.h"
+
+static GeglClRunData *cl_data = NULL;
+
+static gboolean
+cl_gblur_selective (cl_mem                in,
+                    cl_mem                delta,
+                    cl_mem                out,
+                    size_t                global_worksize,
+                    const GeglRectangle  *roi,
+                    gfloat                radius,
+                    gfloat                max_delta)
+{
+  cl_int cl_err = 0;
+  size_t global_ws[2];
+
+  if (!cl_data)
+    {
+      const char *kernel_name[] = { "cl_gblur_selective", NULL };
+      cl_data = gegl_cl_compile_and_build (gaussian_blur_selective_cl_source,
+                                           kernel_name);
+    }
+
+  if (!cl_data)
+    return TRUE;
+
+  global_ws[0] = roi->width;
+  global_ws[1] = roi->height;
+
+  gegl_cl_set_kernel_args (cl_data->kernel[0],
+                           sizeof(cl_mem),     &in,
+                           sizeof(cl_mem),     &delta,
+                           sizeof(cl_mem),     &out,
+                           sizeof(cl_float),   &radius,
+                           sizeof(cl_float),   &max_delta,
+                           NULL);
+  CL_CHECK;
+
+  cl_err = gegl_clEnqueueNDRangeKernel (gegl_cl_get_command_queue (),
+                                        cl_data->kernel[0], 2,
+                                        NULL, global_ws, NULL,
+                                        0, NULL, NULL);
+  CL_CHECK;
+
+  return FALSE;
+
+error:
+  return TRUE;
+}
+
+static gboolean
+cl_process (GeglOperation       *operation,
+            GeglBuffer          *input,
+            GeglBuffer          *aux,
+            GeglBuffer          *output,
+            const GeglRectangle *result)
+{
+  const Babl *in_format  = gegl_operation_get_format (operation, "input");
+  const Babl *aux_format = gegl_operation_get_format (operation, "aux");
+  const Babl *out_format = gegl_operation_get_format (operation, "output");
+  gint err;
+
+  GeglProperties *o = GEGL_PROPERTIES (operation);
+
+  GeglBufferClIterator *i = gegl_buffer_cl_iterator_new (output,
+                                                         result,
+                                                         out_format,
+                                                         GEGL_CL_BUFFER_WRITE);
+
+  gint radius  = o->blur_radius;
+
+  gint read = gegl_buffer_cl_iterator_add_2 (i,
+                                             input,
+                                             result,
+                                             in_format,
+                                             GEGL_CL_BUFFER_READ,
+                                             radius, radius, radius, radius,
+                                             GEGL_ABYSS_CLAMP);
+
+  gint delta = !aux ?
+               read :
+               gegl_buffer_cl_iterator_add_2 (i,
+                                              aux,
+                                              result,
+                                              aux_format,
+                                              GEGL_CL_BUFFER_READ,
+                                              radius, radius, radius, radius,
+                                              GEGL_ABYSS_CLAMP);
+
+  while (gegl_buffer_cl_iterator_next (i, &err))
+    {
+      if (err)
+        return FALSE;
+
+      err = cl_gblur_selective(i->tex[read],
+                               i->tex[delta],
+                               i->tex[0],
+                               i->size[0],
+                               &i->roi[0],
+                               o->blur_radius,
+                               o->max_delta);
+
+      if (err)
+        return FALSE;
+    }
+
+  return TRUE;
+}
+
 static gboolean
 process (GeglOperation       *operation,
          GeglBuffer          *input,
@@ -232,6 +344,10 @@ process (GeglOperation       *operation,
 
   compute = get_required_for_output (operation, "input", result);
 
+  if (gegl_operation_use_opencl (operation))
+    if (cl_process (operation, input, aux, output, result))
+      return TRUE;
+
   success = gblur_selective (input, &compute,
                              aux,
                              output, result,
@@ -251,7 +367,7 @@ gegl_op_class_init (GeglOpClass *klass)
   operation_class->prepare                   = prepare;
   operation_class->get_required_for_output   = get_required_for_output;
   operation_class->get_invalidated_by_change = get_invalidated_by_change;
-  operation_class->opencl_support            = FALSE;
+  operation_class->opencl_support            = TRUE;
 
   composer_class->process = process;
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]