[gegl/opencl-ops: 3/14] Add OpenCL support for gegl:edge-laplace



commit 38aa2216313551947f1ec7019d1b28c224e7408b
Author: Zhang Peixuan <zhangpeixuan cn gmail com>
Date:   Tue Mar 6 17:39:41 2012 +0800

    Add OpenCL support for gegl:edge-laplace

 operations/common/edge-laplace.c |  316 +++++++++++++++++++++++++++++++++++---
 1 files changed, 296 insertions(+), 20 deletions(-)
---
diff --git a/operations/common/edge-laplace.c b/operations/common/edge-laplace.c
index 3475d0b..cc683ec 100644
--- a/operations/common/edge-laplace.c
+++ b/operations/common/edge-laplace.c
@@ -53,21 +53,6 @@ static void prepare (GeglOperation *operation)
   gegl_operation_set_format (operation, "output", babl_format ("RGBA float"));
 }
 
-static gboolean
-process (GeglOperation       *operation,
-         GeglBuffer          *input,
-         GeglBuffer          *output,
-         const GeglRectangle *result)
-{
-  GeglRectangle compute;
-
-  compute = gegl_operation_get_required_for_output (operation, "input", result);
-
-  edge_laplace (input, &compute, output, result);
-
-  return  TRUE;
-}
-
 static void
 minmax  (gfloat  x1,
          gfloat  x2,
@@ -223,6 +208,296 @@ edge_laplace (GeglBuffer          *src,
   g_free (dst_buf);
 }
 
+#include "opencl/gegl-cl.h"
+#include "buffer/gegl-buffer-cl-iterator.h"
+
+static const char* kernel_source =
+"#define LAPLACE_RADIUS 1                                              \n"
+"void minmax(float x1, float x2, float x3,                             \n"
+"            float x4, float x5,                                       \n"
+"            float *min_result,                                        \n"
+"            float *max_result)                                        \n"
+"{                                                                     \n"
+"    float min1, min2, max1, max2;                                     \n"
+"                                                                      \n"
+"    if (x1 > x2)                                                      \n"
+"    {                                                                 \n"
+"        max1 = x1;                                                    \n"
+"        min1 = x2;                                                    \n"
+"    }                                                                 \n"
+"    else                                                              \n"
+"    {                                                                 \n"
+"        max1 = x2;                                                    \n"
+"        min1 = x1;                                                    \n"
+"    }                                                                 \n"
+"                                                                      \n"
+"    if (x3 > x4)                                                      \n"
+"    {                                                                 \n"
+"        max2 = x3;                                                    \n"
+"        min2 = x4;                                                    \n"
+"    }                                                                 \n"
+"    else                                                              \n"
+"    {                                                                 \n"
+"        max2 = x4;                                                    \n"
+"        min2 = x3;                                                    \n"
+"    }                                                                 \n"
+"                                                                      \n"
+"    if (min1 < min2)                                                  \n"
+"        *min_result = fmin(min1, x5);                                 \n"
+"    else                                                              \n"
+"        *min_result = fmin(min2, x5);                                 \n"
+"    if (max1 > max2)                                                  \n"
+"        *max_result = fmax(max1, x5);                                 \n"
+"    else                                                              \n"
+"        *max_result = fmax(max2, x5);                                 \n"
+"}                                                                     \n"
+"                                                                      \n"
+"kernel void pre_edgelaplace (global float4 *in,                       \n"
+"                             global float4 *out)                      \n"
+"{                                                                     \n"
+"    int gidx = get_global_id(0);                                      \n"
+"    int gidy = get_global_id(1);                                      \n"
+"                                                                      \n"
+"    int src_width  = get_global_size(0) + LAPLACE_RADIUS * 2;         \n"
+"    int src_height = get_global_size(1);                              \n"
+"                                                                      \n"
+"    int i = gidx + LAPLACE_RADIUS, j = gidy + LAPLACE_RADIUS;         \n"
+"    int gid1d = i + j * src_width;                                    \n"
+"                                                                      \n"
+"    float pix_fl[4] = {                                               \n"
+"        in[gid1d - 1 - src_width].x, in[gid1d - 1 - src_width].y,     \n"
+"        in[gid1d - 1 - src_width].z, in[gid1d - 1 - src_width].w      \n"
+"    };                                                                \n"
+"    float pix_fm[4] = {                                               \n"
+"        in[gid1d     - src_width].x, in[gid1d     - src_width].y,     \n"
+"        in[gid1d     - src_width].z, in[gid1d     - src_width].w      \n"
+"    };                                                                \n"
+"    float pix_fr[4] = {                                               \n"
+"        in[gid1d + 1 - src_width].x, in[gid1d + 1 - src_width].y,     \n"
+"        in[gid1d + 1 - src_width].z, in[gid1d + 1 - src_width].w      \n"
+"    };                                                                \n"
+"    float pix_ml[4] = {                                               \n"
+"        in[gid1d - 1            ].x, in[gid1d - 1            ].y,     \n"
+"        in[gid1d - 1            ].z, in[gid1d - 1            ].w      \n"
+"    };                                                                \n"
+"    float pix_mm[4] = {                                               \n"
+"        in[gid1d                ].x, in[gid1d                ].y,     \n"
+"        in[gid1d                ].z, in[gid1d                ].w      \n"
+"    };                                                                \n"
+"    float pix_mr[4] = {                                               \n"
+"        in[gid1d + 1            ].x, in[gid1d + 1            ].y,     \n"
+"        in[gid1d + 1            ].z, in[gid1d + 1            ].w      \n"
+"    };                                                                \n"
+"    float pix_bl[4] = {                                               \n"
+"        in[gid1d - 1 + src_width].x, in[gid1d - 1 + src_width].y,     \n"
+"        in[gid1d - 1 + src_width].z, in[gid1d - 1 + src_width].w      \n"
+"    };                                                                \n"
+"    float pix_bm[4] = {                                               \n"
+"        in[gid1d     + src_width].x, in[gid1d     + src_width].y,     \n"
+"        in[gid1d     + src_width].z, in[gid1d     + src_width].w      \n"
+"    };                                                                \n"
+"    float pix_br[4] = {                                               \n"
+"        in[gid1d + 1 + src_width].x, in[gid1d + 1 + src_width].y,     \n"
+"        in[gid1d + 1 + src_width].z, in[gid1d + 1 + src_width].w      \n"
+"    };                                                                \n"
+"                                                                      \n"
+"    int c;                                                            \n"
+"    float minval, maxval;                                             \n"
+"    float gradient[4];                                                \n"
+"                                                                      \n"
+"    for (c = 0;c < 3; ++c)                                            \n"
+"    {                                                                 \n"
+"        minmax(pix_fm[c], pix_bm[c], pix_ml[c], pix_mr[c],            \n"
+"            pix_mm[c], &minval, &maxval);                             \n"
+"        gradient[c] = 0.5f *                                          \n"
+"            fmax((maxval - pix_mm[c]),(pix_mm[c] - minval));          \n"
+"        gradient[c] =                                                 \n"
+"            (pix_fl[c] + pix_fm[c] + pix_fr[c] +                      \n"
+"             pix_ml[c] + pix_mr[c] + pix_bl[c] +                      \n"
+"             pix_bm[c] + pix_br[c] - 8.0f * pix_mm[c]) >              \n"
+"             0.0f ? gradient[c] : -1.0f * gradient[c];                \n"
+"    }                                                                 \n"
+"    gradient[3] = pix_mm[3];                                          \n"
+"                                                                      \n"
+"    out[gid1d] = (float4)                                             \n"
+"        (gradient[0], gradient[1], gradient[2], gradient[3]);         \n"
+"}                                                                     \n"
+"                                                                      \n"
+"kernel void knl_edgelaplace (global float4 *in,                       \n"
+"                             global float4 *out)                      \n"
+"{                                                                     \n"
+"    int gidx = get_global_id(0);                                      \n"
+"    int gidy = get_global_id(1);                                      \n"
+"                                                                      \n"
+"    int src_width  = get_global_size(0) + LAPLACE_RADIUS * 2;         \n"
+"    int src_height = get_global_size(1);                              \n"
+"                                                                      \n"
+"    int i = gidx + LAPLACE_RADIUS, j = gidy + LAPLACE_RADIUS;         \n"
+"    int gid1d = i + j * src_width;                                    \n"
+"                                                                      \n"
+"    float pix_fl[4] = {                                               \n"
+"        in[gid1d - 1 - src_width].x, in[gid1d - 1 - src_width].y,     \n"
+"        in[gid1d - 1 - src_width].z, in[gid1d - 1 - src_width].w      \n"
+"    };                                                                \n"
+"    float pix_fm[4] = {                                               \n"
+"        in[gid1d     - src_width].x, in[gid1d     - src_width].y,     \n"
+"        in[gid1d     - src_width].z, in[gid1d     - src_width].w      \n"
+"    };                                                                \n"
+"    float pix_fr[4] = {                                               \n"
+"        in[gid1d + 1 - src_width].x, in[gid1d + 1 - src_width].y,     \n"
+"        in[gid1d + 1 - src_width].z, in[gid1d + 1 - src_width].w      \n"
+"    };                                                                \n"
+"    float pix_ml[4] = {                                               \n"
+"        in[gid1d - 1            ].x, in[gid1d - 1            ].y,     \n"
+"        in[gid1d - 1            ].z, in[gid1d - 1            ].w      \n"
+"    };                                                                \n"
+"    float pix_mm[4] = {                                               \n"
+"        in[gid1d                ].x, in[gid1d                ].y,     \n"
+"        in[gid1d                ].z, in[gid1d                ].w      \n"
+"    };                                                                \n"
+"    float pix_mr[4] = {                                               \n"
+"        in[gid1d + 1            ].x, in[gid1d + 1            ].y,     \n"
+"        in[gid1d + 1            ].z, in[gid1d + 1            ].w      \n"
+"    };                                                                \n"
+"    float pix_bl[4] = {                                               \n"
+"        in[gid1d - 1 + src_width].x, in[gid1d - 1 + src_width].y,     \n"
+"        in[gid1d - 1 + src_width].z, in[gid1d - 1 + src_width].w      \n"
+"    };                                                                \n"
+"    float pix_bm[4] = {                                               \n"
+"        in[gid1d     + src_width].x, in[gid1d     + src_width].y,     \n"
+"        in[gid1d     + src_width].z, in[gid1d     + src_width].w      \n"
+"    };                                                                \n"
+"    float pix_br[4] = {                                               \n"
+"        in[gid1d + 1 + src_width].x, in[gid1d + 1 + src_width].y,     \n"
+"        in[gid1d + 1 + src_width].z, in[gid1d + 1 + src_width].w      \n"
+"    };                                                                \n"
+"                                                                      \n"
+"    int c;                                                            \n"
+"    float value[4];                                                   \n"
+"                                                                      \n"
+"    for (c = 0;c < 3; ++c)                                            \n"
+"    {                                                                 \n"
+"        float current = pix_mm[c];                                    \n"
+"        current =                                                     \n"
+"            ((current > 0.0f) &&                                      \n"
+"             (pix_fl[c] < 0.0f || pix_fm[c] < 0.0f ||                 \n"
+"              pix_fr[c] < 0.0f || pix_ml[c] < 0.0f ||                 \n"
+"              pix_mr[c] < 0.0f || pix_bl[c] < 0.0f ||                 \n"
+"              pix_bm[c] < 0.0f || pix_br[c] < 0.0f )                  \n"
+"            ) ? current : 0.0f;                                       \n"
+"        value[c] = current;                                           \n"
+"    }                                                                 \n"
+"    value[3] = pix_mm[3];                                             \n"
+"                                                                      \n"
+"    out[gidx + gidy * get_global_size(0)] = (float4)                  \n"
+"        (value[0], value[1], value[2], value[3]);                     \n"
+"}                                                                     \n";
+
+static gegl_cl_run_data *cl_data = NULL;
+
+static cl_int
+cl_edge_laplace (cl_mem                in_tex,
+                 cl_mem                aux_tex,
+                 cl_mem                out_tex,
+                 const GeglRectangle  *src_rect,
+                 const GeglRectangle  *roi,
+                 gint                  radius)
+{
+  cl_int cl_err = 0;
+  size_t global_ws[2];
+  if (!cl_data)
+  {
+    const char *kernel_name[] = {"pre_edgelaplace", "knl_edgelaplace", NULL};
+    cl_data = gegl_cl_compile_and_build (kernel_source, kernel_name);
+  }
+
+  if (!cl_data) return 1;
+
+  global_ws[0] = roi->width;
+  global_ws[1] = roi->height;
+
+  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem),   (void*)&in_tex);
+  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem),   (void*)&aux_tex);
+  if (cl_err != CL_SUCCESS) return cl_err;
+
+  cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
+                                       cl_data->kernel[0], 2,
+                                       NULL, global_ws, NULL,
+                                       0, NULL, NULL);
+  if (cl_err != CL_SUCCESS) return cl_err;
+
+  cl_err = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
+  if (CL_SUCCESS != cl_err) return cl_err;
+
+  cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 0, sizeof(cl_mem),   (void*)&aux_tex);
+  cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 1, sizeof(cl_mem),   (void*)&out_tex);
+  if (cl_err != CL_SUCCESS) return cl_err;
+
+  cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
+                                       cl_data->kernel[1], 2,
+                                       NULL, global_ws, NULL,
+                                       0, NULL, NULL);
+  if (cl_err != CL_SUCCESS) return cl_err;
+  return cl_err;
+}
+
+static gboolean
+cl_process (GeglOperation       *operation,
+            GeglBuffer          *input,
+            GeglBuffer          *output,
+            const GeglRectangle *result)
+{
+  const Babl *in_format  = gegl_operation_get_format (operation, "input");
+  const Babl *out_format = gegl_operation_get_format (operation, "output");
+  gint err;
+  gint j;
+  cl_int cl_err;
+
+  GeglOperationAreaFilter *op_area = GEGL_OPERATION_AREA_FILTER (operation);
+  GeglChantO *o = GEGL_CHANT_PROPERTIES (operation);
+
+  GeglBufferClIterator *i = gegl_buffer_cl_iterator_new (output,   result, out_format, GEGL_CL_BUFFER_WRITE);
+  gint read = gegl_buffer_cl_iterator_add_2 (i, input, result, in_format,  GEGL_CL_BUFFER_READ, op_area->left, op_area->right, op_area->top, op_area->bottom);
+  gint aux  = gegl_buffer_cl_iterator_add_2 (i, NULL, result, in_format,  GEGL_CL_BUFFER_AUX, op_area->left, op_area->right, op_area->top, op_area->bottom);
+  while (gegl_buffer_cl_iterator_next (i, &err))
+  {
+    if (err) return FALSE;
+    for (j=0; j < i->n; j++)
+    {
+      cl_err = cl_edge_laplace(i->tex[read][j], i->tex[aux][j], i->tex[0][j], &i->roi[read][j], &i->roi[0][j], LAPLACE_RADIUS);
+      if (cl_err != CL_SUCCESS)
+      {
+        g_warning("[OpenCL] Error in gegl:edge-laplace: %s\n", gegl_cl_errstring(cl_err));
+        return FALSE;
+      }
+    }
+  }
+  return TRUE;
+}
+
+static gboolean
+process (GeglOperation       *operation,
+         GeglBuffer          *input,
+         GeglBuffer          *output,
+         const GeglRectangle *result)
+{
+  GeglRectangle rect;
+  GeglChantO *o = GEGL_CHANT_PROPERTIES (operation);
+  GeglBuffer *temp;
+  GeglOperationAreaFilter *op_area;
+  op_area = GEGL_OPERATION_AREA_FILTER (operation);
+
+  if (cl_state.is_accelerated)
+    if (cl_process (operation, input, output, result))
+      return TRUE;
+ 
+  GeglRectangle compute;
+  compute = gegl_operation_get_required_for_output (operation, "input", result);
+  edge_laplace (input, &compute, output, result);
+
+  return  TRUE;
+}
+
 
 static void
 gegl_chant_class_init (GeglChantClass *klass)
@@ -230,16 +505,17 @@ gegl_chant_class_init (GeglChantClass *klass)
   GeglOperationClass       *operation_class;
   GeglOperationFilterClass *filter_class;
 
-  operation_class  = GEGL_OPERATION_CLASS (klass);
-  filter_class     = GEGL_OPERATION_FILTER_CLASS (klass);
+  operation_class = GEGL_OPERATION_CLASS (klass);
+  filter_class    = GEGL_OPERATION_FILTER_CLASS (klass);
 
-  filter_class->process   = process;
+  filter_class->process    = process;
   operation_class->prepare = prepare;
 
-  operation_class->name        = "gegl:edge-laplace";
   operation_class->categories  = "edge-detect";
+  operation_class->name        = "gegl:edge-laplace";
+  operation_class->opencl_support = TRUE;
   operation_class->description =
-        _("High-resolution edge detection");
+    _("High-resolution edge detection.");
 }
 
 #endif



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]