[gegl] Add OpenCL support for gegl:noise-reduction



commit 7e281f0977a38431e7bb275d6037a9052c94a543
Author: Victor Oliveira <victormatheus gmail com>
Date:   Sun Mar 18 01:32:12 2012 -0300

    Add OpenCL support for gegl:noise-reduction

 operations/common/noise-reduction.c |  223 +++++++++++++++++++++++++++++++++++
 1 files changed, 223 insertions(+), 0 deletions(-)
---
diff --git a/operations/common/noise-reduction.c b/operations/common/noise-reduction.c
index 067388f..21a963f 100644
--- a/operations/common/noise-reduction.c
+++ b/operations/common/noise-reduction.c
@@ -15,6 +15,8 @@
  *
  * Ali Alsam, Hans Jakob Rivertz, Ãyvind KolÃs (c) 2011
  */
+#include "config.h"
+#include <glib/gi18n-lib.h>
 
 #include "config.h"
 #include <glib/gi18n-lib.h>
@@ -147,6 +149,219 @@ static void prepare (GeglOperation *operation)
   gegl_operation_set_format (operation, "output", babl_format ("R'G'B'A float"));
 }
 
+#include "opencl/gegl-cl.h"
+#include "buffer/gegl-buffer-cl-iterator.h"
+
+static const char* kernel_source =
+"#define NEIGHBOURS 8                                                              \n"
+"#define AXES       (NEIGHBOURS/2)                                                 \n"
+"                                                                                  \n"
+"#define POW2(a) ((a)*(a))                                                         \n"
+"                                                                                  \n"
+"#define GEN_METRIC(before, center, after) POW2((center) * 2 - (before) - (after)) \n"
+"                                                                                  \n"
+"#define BAIL_CONDITION(new,original) ((new) < (original))                         \n"
+"                                                                                  \n"
+"#define SYMMETRY(a)  (NEIGHBOURS - (a) - 1)                                       \n"
+"                                                                                  \n"
+"#define O(u,v) (((u)+((v) * (src_stride))))                                       \n"
+"                                                                                  \n"
+"__kernel void noise_reduction_cl (__global       float4 *src_buf,                 \n"
+"                                  int src_stride,                                 \n"
+"                                  __global       float4 *dst_buf,                 \n"
+"                                  int dst_stride)                                 \n"
+"{                                                                                 \n"
+"    int gidx = get_global_id(0);                                                  \n"
+"    int gidy = get_global_id(1);                                                  \n"
+"                                                                                  \n"
+"    __global float4 *center_pix = src_buf + (gidy + 1) * src_stride + gidx + 1;   \n"
+"    int dst_offset = dst_stride * gidy + gidx;                                    \n"
+"                                                                                  \n"
+"    int offsets[NEIGHBOURS] = {                                                   \n"
+"        O(-1, -1), O( 0, -1), O( 1, -1),                                          \n"
+"        O(-1,  0),            O( 1,  0),                                          \n"
+"        O(-1,  1), O( 0,  1), O( 1,  1)                                           \n"
+"    };                                                                            \n"
+"                                                                                  \n"
+"    float4 sum;                                                                   \n"
+"    int4   count;                                                                 \n"
+"    float4 cur;                                                                   \n"
+"    float4 metric_reference[AXES];                                                \n"
+"                                                                                  \n"
+"    for (int axis = 0; axis < AXES; axis++)                                       \n"
+"      {                                                                           \n"
+"        float4 before_pix = *(center_pix + offsets[axis]);                        \n"
+"        float4 after_pix  = *(center_pix + offsets[SYMMETRY(axis)]);              \n"
+"        metric_reference[axis] = GEN_METRIC (before_pix, *center_pix, after_pix); \n"
+"      }                                                                           \n"
+"                                                                                  \n"
+"    cur = sum = *center_pix;                                                      \n"
+"    count = 1;                                                                    \n"
+"                                                                                  \n"
+"    for (int direction = 0; direction < NEIGHBOURS; direction++)                  \n"
+"      {                                                                           \n"
+"        float4 pix   = *(center_pix + offsets[direction]);                        \n"
+"        float4 value = (pix + cur) * (0.5f);                                      \n"
+"        int    axis;                                                              \n"
+"        int4   mask = {1, 1, 1, 0};                                               \n"
+"                                                                                  \n"
+"        for (axis = 0; axis < AXES; axis++)                                       \n"
+"          {                                                                       \n"
+"            float4 before_pix = *(center_pix + offsets[axis]);                    \n"
+"            float4 after_pix  = *(center_pix + offsets[SYMMETRY(axis)]);          \n"
+"                                                                                  \n"
+"            float4 metric_new = GEN_METRIC (before_pix,                           \n"
+"                                            value,                                \n"
+"                                            after_pix);                           \n"
+"            mask = BAIL_CONDITION (metric_new, metric_reference[axis]) & mask;    \n"
+"          }                                                                       \n"
+"        sum   += mask >0 ? value : 0;                                             \n"
+"        count += mask >0 ? 1     : 0;                                             \n"
+"      }                                                                           \n"
+"    dst_buf[dst_offset]   = (sum/convert_float4(count));                          \n"
+"    dst_buf[dst_offset].w = cur.w;                                                \n"
+"}                                                                                 \n"
+"__kernel void transfer(__global float4 * in,                                      \n"
+"              int               in_width,                                         \n"
+"              __global float4 * out)                                              \n"
+"{                                                                                 \n"
+"    int gidx = get_global_id(0);                                                  \n"
+"    int gidy = get_global_id(1);                                                  \n"
+"    int width = get_global_size(0);                                               \n"
+"    out[gidy * width + gidx] = in[gidy * in_width + gidx];                        \n"
+"}                                                                                 \n";
+
+static gegl_cl_run_data *cl_data = NULL;
+
+static cl_int
+cl_noise_reduction (cl_mem                in_tex,
+                    cl_mem                aux_tex,
+                    cl_mem                out_tex,
+                    size_t                global_worksize,
+                    const GeglRectangle  *src_roi,
+                    const GeglRectangle  *roi,
+                    const int             iterations)
+{
+  int i = 0;
+  size_t gbl_size_tmp[2];
+
+  cl_int n_src_stride  = roi->width + iterations * 2;
+  cl_int cl_err = 0;
+
+  cl_mem temp_tex, tmptex;
+
+  gint stride = 16; /*R'G'B'A float*/
+
+  if (!cl_data)
+    {
+      const char *kernel_name[] ={"noise_reduction_cl","transfer", NULL};
+      cl_data = gegl_cl_compile_and_build(kernel_source, kernel_name);
+    }
+  if (!cl_data)  return 0;
+
+  temp_tex = gegl_clCreateBuffer (gegl_cl_get_context(),
+                                  CL_MEM_READ_WRITE,
+                                  src_roi->width * src_roi->height * stride,
+                                  NULL, &cl_err);
+  if (cl_err != CL_SUCCESS) return cl_err;
+
+
+  cl_err = gegl_clEnqueueCopyBuffer(gegl_cl_get_command_queue(),
+                                    in_tex , temp_tex , 0 , 0 ,
+                                    src_roi->width * src_roi->height * stride,
+                                    0, NULL, NULL);
+
+  cl_err = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
+  if (CL_SUCCESS != cl_err) return cl_err;
+
+  tmptex = temp_tex;
+  for (i = 0;i<iterations;i++)
+    {
+      if (i > 0)
+      {
+        cl_mem temp = aux_tex;
+        aux_tex     = temp_tex;
+        temp_tex    = temp;
+      }
+      gbl_size_tmp[0] = roi->width  + 2 * (iterations - 1 -i);
+      gbl_size_tmp[1] = roi->height + 2 * (iterations - 1 -i);
+
+      cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&temp_tex);
+      cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_int), (void*)&n_src_stride);
+      cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_mem), (void*)&aux_tex);
+      cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_int), (void*)&n_src_stride);
+      if (cl_err != CL_SUCCESS) return cl_err;
+
+      cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue(), cl_data->kernel[0],
+                                           2, NULL, gbl_size_tmp, NULL,
+                                           0, NULL, NULL);
+      cl_err = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
+      if (CL_SUCCESS != cl_err) return cl_err;
+    }
+
+  gbl_size_tmp[0] = roi->width ;
+  gbl_size_tmp[1] = roi->height;
+
+  cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 0, sizeof(cl_mem), (void*)&aux_tex);
+  cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 1, sizeof(cl_int), (void*)&n_src_stride);
+  cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 2, sizeof(cl_mem), (void*)&out_tex);
+  if (cl_err != CL_SUCCESS) return cl_err;
+
+  cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue(), cl_data->kernel[1],
+                                       2, NULL, gbl_size_tmp, NULL,
+                                       0, NULL, NULL);
+
+  cl_err = gegl_clFinish(gegl_cl_get_command_queue());
+  if (CL_SUCCESS != cl_err) return cl_err;
+
+  if (tmptex) gegl_clReleaseMemObject (tmptex);
+
+  return cl_err;
+}
+
+static gboolean
+cl_process (GeglOperation       *operation,
+      GeglBuffer                *input,
+      GeglBuffer                *output,
+      const GeglRectangle       *result)
+{
+  const Babl *in_format  = gegl_operation_get_format (operation, "input");
+  const Babl *out_format = gegl_operation_get_format (operation, "output");
+  gint err;
+  gint j;
+  cl_int cl_err;
+
+  GeglOperationAreaFilter *op_area = GEGL_OPERATION_AREA_FILTER (operation);
+  GeglChantO *o = GEGL_CHANT_PROPERTIES (operation);
+
+  GeglBufferClIterator *i = gegl_buffer_cl_iterator_new (output,   result, out_format, GEGL_CL_BUFFER_WRITE, GEGL_ABYSS_NONE);
+  gint read = gegl_buffer_cl_iterator_add_2 (i, input, result, in_format,  GEGL_CL_BUFFER_READ,
+                                             op_area->left, op_area->right, op_area->top, op_area->bottom, GEGL_ABYSS_NONE);
+  gint aux  = gegl_buffer_cl_iterator_add_2 (i, NULL, result, in_format,  GEGL_CL_BUFFER_AUX,
+                                             op_area->left, op_area->right, op_area->top, op_area->bottom, GEGL_ABYSS_NONE);
+
+  while (gegl_buffer_cl_iterator_next (i, &err))
+  {
+    if (err) return FALSE;
+    for (j=0; j < i->n; j++)
+      {
+        cl_err = cl_noise_reduction(i->tex[read][j],
+                                    i->tex[aux][j],
+                                    i->tex[0][j],
+                                    i->size[0][j],
+                                    &i->roi[read][j],
+                                    &i->roi[0][j],
+                                    o->iterations);
+        if (cl_err != CL_SUCCESS)
+        {
+          g_warning("[OpenCL] Error in gegl:noise-reduction: %s", gegl_cl_errstring(cl_err));
+          return FALSE;
+        }
+      }
+  }
+  return TRUE;
+}
+
 #define INPLACE 1
 
 static gboolean
@@ -157,6 +372,7 @@ process (GeglOperation       *operation,
          gint                 level)
 {
   GeglChantO   *o = GEGL_CHANT_PROPERTIES (operation);
+
   int iteration;
   int stride;
   float *src_buf;
@@ -164,6 +380,11 @@ process (GeglOperation       *operation,
   float *dst_buf;
 #endif
   GeglRectangle rect;
+
+  if (gegl_cl_is_accelerated ())
+    if(cl_process(operation, input, output, result))
+      return TRUE;
+
   rect = *result;
 
   stride = result->width + o->iterations * 2;
@@ -243,6 +464,8 @@ gegl_chant_class_init (GeglChantClass *klass)
 
   filter_class->process   = process;
   operation_class->prepare = prepare;
+  operation_class->opencl_support = TRUE;
+
   operation_class->get_bounding_box = get_bounding_box;
 
   gegl_operation_class_set_keys (operation_class,



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]