[gegl/soc-2013-opecl-ops: 6/6] Added OpenCL support to box-max



commit 137764bf35d2bec7a92657d37422a204aceddf04
Author: Carlos Zubieta <czubieta dev gmail com>
Date:   Tue Jul 23 18:49:25 2013 -0500

    Added OpenCL support to box-max

 opencl/box-max.cl             |   49 ++++++++++++++
 opencl/box-max.cl.h           |   51 +++++++++++++++
 operations/workshop/box-max.c |  140 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 240 insertions(+), 0 deletions(-)
---
diff --git a/opencl/box-max.cl b/opencl/box-max.cl
new file mode 100644
index 0000000..b780ece
--- /dev/null
+++ b/opencl/box-max.cl
@@ -0,0 +1,49 @@
+__kernel void kernel_max_hor (__global const float4     *in,
+                              __global       float4     *aux,
+                              int width, int radius)
+{
+  const int in_index = get_global_id(0) * (width + 2 * radius)
+                       + (radius + get_global_id (1));
+
+  const int aux_index = get_global_id(0) * width + get_global_id (1);
+  int i;
+  float4 max;
+  float4 in_v;
+
+  max = (float4)(-1000000000.0f);
+
+  if (get_global_id(1) < width)
+    {
+      for (i=-radius; i <= radius; i++)
+        {
+          in_v = in[in_index + i];
+          max = max < in_v ? in_v : max;
+        }
+        aux[aux_index] = max;
+    }
+}
+
+__kernel void kernel_max_ver (__global const float4     *aux,
+                              __global       float4     *out,
+                              int width, int radius)
+{
+
+  const int out_index = get_global_id(0) * width + get_global_id (1);
+  int aux_index = out_index;
+  int i;
+  float4 max;
+  float4 aux_v;
+
+  max = (float4)(-1000000000.0f);
+
+  if(get_global_id(1) < width)
+    {
+      for (i=-radius; i <= radius; i++)
+        {
+          aux_v = aux[aux_index];
+          max = max < aux_v ? aux_v : max;
+          aux_index += width;
+        }
+        out[out_index] = max;
+    }
+}
diff --git a/opencl/box-max.cl.h b/opencl/box-max.cl.h
new file mode 100644
index 0000000..983625d
--- /dev/null
+++ b/opencl/box-max.cl.h
@@ -0,0 +1,51 @@
+static const char* box_max_cl_source = 
+"__kernel void kernel_max_hor (__global const float4     *in,                  \n"
+"                              __global       float4     *aux,                 \n"
+"                              int width, int radius)                          \n"
+"{                                                                             \n"
+"  const int in_index = get_global_id(0) * (width + 2 * radius)                \n"
+"                       + (radius + get_global_id (1));                        \n"
+"                                                                              \n"
+"  const int aux_index = get_global_id(0) * width + get_global_id (1);         \n"
+"  int i;                                                                      \n"
+"  float4 max;                                                                 \n"
+"  float4 in_v;                                                                \n"
+"                                                                              \n"
+"  max = (float4)(-1000000000.0f);                                             \n"
+"                                                                              \n"
+"  if (get_global_id(1) < width)                                               \n"
+"    {                                                                         \n"
+"      for (i=-radius; i <= radius; i++)                                       \n"
+"        {                                                                     \n"
+"          in_v = in[in_index + i];                                            \n"
+"          max = max < in_v ? in_v : max;                                      \n"
+"        }                                                                     \n"
+"        aux[aux_index] = max;                                                 \n"
+"    }                                                                         \n"
+"}                                                                             \n"
+"                                                                              \n"
+"__kernel void kernel_max_ver (__global const float4     *aux,                 \n"
+"                              __global       float4     *out,                 \n"
+"                              int width, int radius)                          \n"
+"{                                                                             \n"
+"                                                                              \n"
+"  const int out_index = get_global_id(0) * width + get_global_id (1);         \n"
+"  int aux_index = out_index;                                                  \n"
+"  int i;                                                                      \n"
+"  float4 max;                                                                 \n"
+"  float4 aux_v;                                                               \n"
+"                                                                              \n"
+"  max = (float4)(-1000000000.0f);                                             \n"
+"                                                                              \n"
+"  if(get_global_id(1) < width)                                                \n"
+"    {                                                                         \n"
+"      for (i=-radius; i <= radius; i++)                                       \n"
+"        {                                                                     \n"
+"          aux_v = aux[aux_index];                                             \n"
+"          max = max < aux_v ? aux_v : max;                                    \n"
+"          aux_index += width;                                                 \n"
+"        }                                                                     \n"
+"        out[out_index] = max;                                                 \n"
+"    }                                                                         \n"
+"}                                                                             \n"
+;
diff --git a/operations/workshop/box-max.c b/operations/workshop/box-max.c
index 37b77d2..ae52837 100644
--- a/operations/workshop/box-max.c
+++ b/operations/workshop/box-max.c
@@ -162,6 +162,136 @@ static void prepare (GeglOperation *operation)
   gegl_operation_set_format (operation, "output", babl_format ("RGBA float"));
 }
 
+#include "opencl/gegl-cl.h"
+#include "buffer/gegl-buffer-cl-iterator.h"
+
+#include "opencl/box-max.cl.h"
+
+static GeglClRunData *cl_data = NULL;
+
+static gboolean
+cl_box_max (cl_mem                in_tex,
+            cl_mem                aux_tex,
+            cl_mem                out_tex,
+            size_t                global_worksize,
+            const GeglRectangle  *roi,
+            gint                  radius)
+{
+  cl_int cl_err = 0;
+  size_t global_ws_hor[2], global_ws_ver[2];
+  size_t local_ws_hor[2], local_ws_ver[2];
+
+  if (!cl_data)
+    {
+      const char *kernel_name[] = {"kernel_max_hor", "kernel_max_ver", NULL};
+      cl_data = gegl_cl_compile_and_build (box_max_cl_source, kernel_name);
+    }
+  if (!cl_data) return TRUE;
+
+  local_ws_hor[0] = 1;
+  local_ws_hor[1] = 256;
+  global_ws_hor[0] = roi->height + 2 * radius;
+  global_ws_hor[1] = ((roi->width + local_ws_hor[1] -1)/local_ws_hor[1]) * local_ws_hor[1];
+
+  local_ws_ver[0] = 1;
+  local_ws_ver[1] = 256;
+  global_ws_ver[0] = roi->height;
+  global_ws_ver[1] = ((roi->width + local_ws_ver[1] -1)/local_ws_ver[1]) * local_ws_ver[1];
+
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem),   (void*)&in_tex);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem),   (void*)&aux_tex);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_int),   (void*)&roi->width);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_int),   (void*)&radius);
+  CL_CHECK;
+
+  cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
+                                        cl_data->kernel[0], 2,
+                                        NULL, global_ws_hor, local_ws_hor,
+                                        0, NULL, NULL);
+  CL_CHECK;
+
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 0, sizeof(cl_mem),   (void*)&aux_tex);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 1, sizeof(cl_mem),   (void*)&out_tex);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 2, sizeof(cl_int),   (void*)&roi->width);
+  CL_CHECK;
+  cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 3, sizeof(cl_int),   (void*)&radius);
+  CL_CHECK;
+
+  cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
+                                        cl_data->kernel[1], 2,
+                                        NULL, global_ws_ver, local_ws_ver,
+                                        0, NULL, NULL);
+  CL_CHECK;
+
+  return FALSE;
+
+error:
+  return TRUE;
+}
+
+static gboolean
+cl_process (GeglOperation       *operation,
+            GeglBuffer          *input,
+            GeglBuffer          *output,
+            const GeglRectangle *result)
+{
+  const Babl *in_format  = gegl_operation_get_format (operation, "input");
+  const Babl *out_format = gegl_operation_get_format (operation, "output");
+
+  gint err;
+
+  GeglOperationAreaFilter *op_area = GEGL_OPERATION_AREA_FILTER (operation);
+  GeglChantO *o = GEGL_CHANT_PROPERTIES (operation);
+
+  GeglBufferClIterator *i = gegl_buffer_cl_iterator_new (output,
+                                                         result,
+                                                         out_format,
+                                                         GEGL_CL_BUFFER_WRITE);
+
+  gint read = gegl_buffer_cl_iterator_add_2 (i,
+                                             input,
+                                             result,
+                                             in_format,
+                                             GEGL_CL_BUFFER_READ,
+                                             op_area->left,
+                                             op_area->right,
+                                             op_area->top,
+                                             op_area->bottom,
+                                             GEGL_ABYSS_NONE);
+
+  gint aux  = gegl_buffer_cl_iterator_add_2 (i,
+                                             NULL,
+                                             result,
+                                             in_format,
+                                             GEGL_CL_BUFFER_AUX,
+                                             0,
+                                             0,
+                                             op_area->top,
+                                             op_area->bottom,
+                                             GEGL_ABYSS_NONE);
+
+  while (gegl_buffer_cl_iterator_next (i, &err))
+    {
+      if (err) return FALSE;
+
+      err = cl_box_max(i->tex[read],
+                       i->tex[aux],
+                       i->tex[0],
+                       i->size[0],
+                       &i->roi[0],
+                       ceil (o->radius));
+
+      if (err) return FALSE;
+    }
+
+  return TRUE;
+}
+
 static gboolean
 process (GeglOperation       *operation,
          GeglBuffer          *input,
@@ -172,6 +302,14 @@ process (GeglOperation       *operation,
   GeglChantO *o = GEGL_CHANT_PROPERTIES (operation);
   GeglRectangle input_rect = gegl_operation_get_required_for_output (operation, "input", result);
 
+  if (gegl_cl_is_accelerated ())
+    {
+      if (cl_process (operation, input, output, result))
+        return TRUE;
+      else
+        gegl_cl_disable();
+    }
+
   hor_max ( input, &input_rect, output, result, o->radius);
   ver_max (output,      result, output, result, o->radius);
 
@@ -191,6 +329,8 @@ gegl_chant_class_init (GeglChantClass *klass)
   filter_class->process = process;
   operation_class->prepare = prepare;
 
+  operation_class->opencl_support = TRUE;
+
   gegl_operation_class_set_keys (operation_class,
     "name"        , "gegl:box-max",
     "categories"  , "misc",


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]