[gegl] Support for Area filters in cl-iterator and use example in box-blur



commit 87401edfe429cadd6a2d3313dc69ae3a7e9cc8cd
Author: Victor Oliveira <victormatheus gmail com>
Date:   Fri Mar 2 16:32:38 2012 -0300

    Support for Area filters in cl-iterator and use example in box-blur

 gegl/buffer/gegl-buffer-cl-iterator.c |   49 +++++++++---
 gegl/buffer/gegl-buffer-cl-iterator.h |   10 +++
 gegl/opencl/gegl-cl-init.c            |    1 +
 operations/common/box-blur.c          |  136 ++++++++++++++++++++++++++++++++-
 4 files changed, 183 insertions(+), 13 deletions(-)
---
diff --git a/gegl/buffer/gegl-buffer-cl-iterator.c b/gegl/buffer/gegl-buffer-cl-iterator.c
index 55576e0..1cb8c95 100644
--- a/gegl/buffer/gegl-buffer-cl-iterator.c
+++ b/gegl/buffer/gegl-buffer-cl-iterator.c
@@ -34,6 +34,7 @@ typedef struct GeglBufferClIterators
   gboolean       is_finished;
 
   guint          flags          [GEGL_CL_BUFFER_MAX_ITERATORS];
+  gint           area           [GEGL_CL_BUFFER_MAX_ITERATORS][4];
 
   GeglRectangle  rect           [GEGL_CL_BUFFER_MAX_ITERATORS]; /* the region we iterate on. They can be
                                                                    different from each other, but width
@@ -57,11 +58,15 @@ typedef struct GeglBufferClIterators
 } GeglBufferClIterators;
 
 gint
-gegl_buffer_cl_iterator_add (GeglBufferClIterator  *iterator,
-                             GeglBuffer            *buffer,
-                             const GeglRectangle   *result,
-                             const Babl            *format,
-                             guint                  flags)
+gegl_buffer_cl_iterator_add_2 (GeglBufferClIterator  *iterator,
+                               GeglBuffer            *buffer,
+                               const GeglRectangle   *result,
+                               const Babl            *format,
+                               guint                  flags,
+                               gint                   left,
+                               gint                   right,
+                               gint                   top,
+                               gint                   bottom)
 {
   GeglBufferClIterators *i = (gpointer)iterator;
   gint self = 0;
@@ -97,6 +102,14 @@ gegl_buffer_cl_iterator_add (GeglBufferClIterator  *iterator,
   gegl_cl_color_babl (buffer->format, &i->buf_cl_format_size[self]);
   gegl_cl_color_babl (format,         &i->op_cl_format_size [self]);
 
+  i->area[self][0] = left;
+  i->area[self][1] = right;
+  i->area[self][2] = top;
+  i->area[self][3] = bottom;
+  if (flags == GEGL_CL_BUFFER_WRITE
+      && (left > 0 || right > 0 || top > 0 || bottom > 0))
+	g_assert(FALSE);
+
   if (self!=0)
     {
       /* we make all subsequently added iterators share the width and height of the first one */
@@ -130,6 +143,16 @@ gegl_buffer_cl_iterator_add (GeglBufferClIterator  *iterator,
   return self;
 }
 
+gint
+gegl_buffer_cl_iterator_add (GeglBufferClIterator  *iterator,
+                             GeglBuffer            *buffer,
+                             const GeglRectangle   *result,
+                             const Babl            *format,
+                             guint                  flags)
+{
+  return gegl_buffer_cl_iterator_add_2 (iterator, buffer, result, format, flags, 0,0,0,0);
+}
+
 gboolean
 gegl_buffer_cl_iterator_next (GeglBufferClIterator *iterator, gboolean *err)
 {
@@ -155,8 +178,12 @@ gegl_buffer_cl_iterator_next (GeglBufferClIterator *iterator, gboolean *err)
           if (!found)
             gegl_buffer_lock (i->buffer[no]);
 
-          if (i->flags[no] == GEGL_CL_BUFFER_WRITE)
-            gegl_buffer_cl_cache_invalidate (i->buffer[no], &i->rect[no]);
+          if (i->flags[no] == GEGL_CL_BUFFER_WRITE
+              || (i->flags[no] == GEGL_CL_BUFFER_READ
+                  && (i->area[no][0] > 0 || i->area[no][1] > 0 || i->area[no][2] > 0 || i->area[no][3] > 0)))
+            {
+              gegl_buffer_cl_cache_invalidate (i->buffer[no], &i->rect[no]);
+            }
         }
     }
   else
@@ -240,10 +267,10 @@ gegl_buffer_cl_iterator_next (GeglBufferClIterator *iterator, gboolean *err)
     {
       for (j = 0; j < i->n; j++)
         {
-          GeglRectangle r = {i->rect[no].x + i->roi_all[i->roi_no+j].x,
-                             i->rect[no].y + i->roi_all[i->roi_no+j].y,
-                             i->roi_all[i->roi_no+j].width,
-                             i->roi_all[i->roi_no+j].height};
+          GeglRectangle r = {i->rect[no].x + i->roi_all[i->roi_no+j].x - i->area[no][0],
+                             i->rect[no].y + i->roi_all[i->roi_no+j].y - i->area[no][2],
+                             i->roi_all[i->roi_no+j].width             + i->area[no][0] + i->area[no][1],
+                             i->roi_all[i->roi_no+j].height            + i->area[no][2] + i->area[no][3]};
           i->roi [no][j] = r;
           i->size[no][j] = r.width * r.height;
         }
diff --git a/gegl/buffer/gegl-buffer-cl-iterator.h b/gegl/buffer/gegl-buffer-cl-iterator.h
index 9ccbf45..936974a 100644
--- a/gegl/buffer/gegl-buffer-cl-iterator.h
+++ b/gegl/buffer/gegl-buffer-cl-iterator.h
@@ -27,6 +27,16 @@ gint gegl_buffer_cl_iterator_add (GeglBufferClIterator  *iterator,
                                   const Babl            *format,
                                   guint                  flags);
 
+gint gegl_buffer_cl_iterator_add_2 (GeglBufferClIterator  *iterator,
+                                    GeglBuffer            *buffer,
+                                    const GeglRectangle   *roi,
+                                    const Babl            *format,
+                                    guint                  flags,
+                                    gint                   left,
+                                    gint                   right,
+                                    gint                   top,
+                                    gint                   bottom);
+
 gboolean gegl_buffer_cl_iterator_next (GeglBufferClIterator *iterator, gboolean *err);
 
 GeglBufferClIterator *gegl_buffer_cl_iterator_new (GeglBuffer          *buffer,
diff --git a/gegl/opencl/gegl-cl-init.c b/gegl/opencl/gegl-cl-init.c
index 83bce1f..ec80712 100644
--- a/gegl/opencl/gegl-cl-init.c
+++ b/gegl/opencl/gegl-cl-init.c
@@ -238,6 +238,7 @@ gegl_cl_init (GError **error)
           else
             cl_state.max_image_height /= 2;
         }
+      cl_state.max_image_width  /= 2;
 
       g_printf("[OpenCL] Iteration size: (%d, %d)\n", cl_state.max_image_width, cl_state.max_image_height);
 
diff --git a/operations/common/box-blur.c b/operations/common/box-blur.c
index 6cb3b2e..7f26cdf 100644
--- a/operations/common/box-blur.c
+++ b/operations/common/box-blur.c
@@ -218,8 +218,135 @@ static void prepare (GeglOperation *operation)
   op_area->top    =
   op_area->bottom = ceil (o->radius);
 
-  gegl_operation_set_format (operation, "output",
-                             babl_format ("RaGaBaA float"));
+  gegl_operation_set_format (operation, "input",  babl_format ("RaGaBaA float"));
+  gegl_operation_set_format (operation, "output", babl_format ("RaGaBaA float"));
+}
+
+#include "opencl/gegl-cl.h"
+#include "buffer/gegl-buffer-cl-iterator.h"
+
+static const char* kernel_source =
+"__kernel void kernel_blur(__global const float4     *in,                                           \n"
+"                          __global       float4     *out,                                          \n"
+"                          __local        float4     *shared_roi,                                   \n"
+"                          int width, int radius)                                                   \n"
+"{                                                                                                  \n"
+"                                                                                                   \n"
+"  const int out_index    = get_global_id(0) * width + get_global_id(1);                            \n"
+"  const int in_top_index = (get_group_id (0) * get_local_size (0)) * (width + 2 * radius)          \n"
+"                            + (get_group_id (1) * get_local_size (1));                             \n"
+"                                                                                                   \n"
+"  const int local_width = (2 * radius + get_local_size (1));                                       \n"
+"  const int local_index = (radius + get_local_id (0)) * local_width + (radius + get_local_id (1)); \n"
+"  int i, x, y;                                                                                     \n"
+"                                                                                                   \n"
+"  float4 mean;                                                                                     \n"
+"                                                                                                   \n"
+"  for (y = get_local_id (0); y < get_local_size (0) + 2 * radius; y += get_local_size (0))         \n"
+"    for (x = get_local_id (1); x < get_local_size (1) + 2 * radius; x += get_local_size (1))       \n"
+"      shared_roi[y*local_width+x] = in[in_top_index + y * (width + 2 * radius) + x];               \n"
+"                                                                                                   \n"
+"  barrier(CLK_LOCAL_MEM_FENCE);                                                                    \n"
+"                                                                                                   \n"
+"  mean = (float4)(0.0f);                                                                           \n"
+"                                                                                                   \n"
+"  for (i=-radius; i <= radius; i++)                                                                \n"
+"   {                                                                                               \n"
+"     mean += shared_roi[local_index + i];                                                          \n"
+"   }                                                                                               \n"
+"                                                                                                   \n"
+"  shared_roi[local_index] = mean / (2 * radius + 1);                                               \n"
+"                                                                                                   \n"
+"  barrier(CLK_LOCAL_MEM_FENCE);                                                                    \n"
+"                                                                                                   \n"
+"  mean = (float4)(0.0f);                                                                           \n"
+"                                                                                                   \n"
+"  for (i=-radius; i <= radius; i++)                                                                \n"
+"   {                                                                                               \n"
+"     mean += shared_roi[local_index + i * local_width];                                            \n"
+"   }                                                                                               \n"
+"                                                                                                   \n"
+"  shared_roi[local_index] = mean / (2 * radius + 1);                                               \n"
+"                                                                                                   \n"
+"  barrier(CLK_LOCAL_MEM_FENCE);                                                                    \n"
+"                                                                                                   \n"
+"  out[out_index] = shared_roi[local_index];                                                        \n"
+"}                                                                                                  \n";
+
+static gegl_cl_run_data *cl_data = NULL;
+
+static cl_int
+cl_box_blur (cl_mem                in_tex,
+             cl_mem                out_tex,
+             size_t                global_worksize,
+             const GeglRectangle  *roi,
+             gint                  radius)
+{
+  cl_int cl_err = 0;
+  size_t local_ws[2], global_ws[2], local_mem_size;
+
+  if (!cl_data)
+    {
+      const char *kernel_name[] = {"kernel_blur", NULL};
+      cl_data = gegl_cl_compile_and_build (kernel_source, kernel_name);
+    }
+
+  if (!cl_data) return 1;
+
+  local_ws[0] = 16;
+  local_ws[1] = 16;
+  global_ws[0] = roi->height;
+  global_ws[1] = roi->width;
+  local_mem_size = sizeof(cl_float4) * (local_ws[0] + 2 * radius) * (local_ws[1] + 2 * radius);
+
+  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem),   (void*)&in_tex);
+  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem),   (void*)&out_tex);
+  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 2, local_mem_size,   NULL);
+  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_int),   (void*)&roi->width);
+  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 4, sizeof(cl_int),   (void*)&radius);
+  if (cl_err != CL_SUCCESS) return cl_err;
+
+  cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
+                                        cl_data->kernel[0], 2,
+                                        NULL, global_ws, local_ws,
+                                        0, NULL, NULL);
+  if (cl_err != CL_SUCCESS) return cl_err;
+
+  return cl_err;
+}
+
+static gboolean
+cl_process (GeglOperation       *operation,
+            GeglBuffer          *input,
+            GeglBuffer          *output,
+            const GeglRectangle *result)
+{
+  const Babl *in_format  = gegl_operation_get_format (operation, "input");
+  const Babl *out_format = gegl_operation_get_format (operation, "output");
+  gint err;
+  gint j;
+  cl_int cl_err;
+
+  GeglOperationAreaFilter *op_area = GEGL_OPERATION_AREA_FILTER (operation);
+  GeglChantO *o = GEGL_CHANT_PROPERTIES (operation);
+
+  GeglBufferClIterator *i = gegl_buffer_cl_iterator_new (output,   result, out_format, GEGL_CL_BUFFER_WRITE);
+                gint read = gegl_buffer_cl_iterator_add_2 (i, input, result, in_format,  GEGL_CL_BUFFER_READ, op_area->left, op_area->right, op_area->top, op_area->bottom);
+  while (gegl_buffer_cl_iterator_next (i, &err))
+    {
+      if (err) return FALSE;
+      for (j=0; j < i->n; j++)
+        {
+          cl_err = cl_box_blur(i->tex[read][j], i->tex[0][j], i->size[0][j], &i->roi[0][j], o->radius);
+          if (cl_err != CL_SUCCESS)
+            {
+              g_warning("[OpenCL] Error in %s [GeglOperationPointFilter] Kernel\n",
+                        GEGL_OPERATION_CLASS (operation)->name);
+              return FALSE;
+            }
+        }
+    }
+  return TRUE;
 }
 
 static gboolean
@@ -234,6 +361,10 @@ process (GeglOperation       *operation,
   GeglOperationAreaFilter *op_area;
   op_area = GEGL_OPERATION_AREA_FILTER (operation);
 
+  if (cl_state.is_accelerated)
+    if (cl_process (operation, input, output, result))
+      return TRUE;
+
   rect = *result;
 
   rect.x-=op_area->left;
@@ -266,6 +397,7 @@ gegl_chant_class_init (GeglChantClass *klass)
 
   operation_class->categories  = "blur";
   operation_class->name        = "gegl:box-blur";
+  operation_class->opencl_support = TRUE;
   operation_class->description =
        _("Performs an averaging of a square box of pixels.");
 }



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]