[gegl] Add OpenCL support for gegl:motion-blur



commit 3494ff4c04b03ff24825b8fdfafe3e9972ca31e8
Author: Zhang Peixuan <zhangpeixuan cn gmail com>
Date:   Thu Mar 15 12:41:14 2012 -0300

    Add OpenCL support for gegl:motion-blur

 operations/common/motion-blur.c |  175 +++++++++++++++++++++++++++++++++++++++
 1 files changed, 175 insertions(+), 0 deletions(-)
---
diff --git a/operations/common/motion-blur.c b/operations/common/motion-blur.c
index d3611d7..bb7dc60 100644
--- a/operations/common/motion-blur.c
+++ b/operations/common/motion-blur.c
@@ -51,9 +51,178 @@ prepare (GeglOperation *operation)
   op_area->top    =
   op_area->bottom = (gint)ceil(0.5 * offset_y);
 
+  gegl_operation_set_format (operation, "input", babl_format ("RaGaBaA float"));
   gegl_operation_set_format (operation, "output", babl_format ("RaGaBaA float"));
 }
 
+#include "opencl/gegl-cl.h"
+#include "buffer/gegl-buffer-cl-iterator.h"
+
+static const char* kernel_source =
+"int CLAMP(int val,int lo,int hi)                                      \n"
+"{                                                                     \n"
+"    return (val < lo) ? lo : ((hi < val) ? hi : val);                 \n"
+"}                                                                     \n"
+"                                                                      \n"
+"float4 get_pixel_color_CL(const __global float4 *in_buf,              \n"
+"                          int     rect_width,                         \n"
+"                          int     rect_height,                        \n"
+"                          int     rect_x,                             \n"
+"                          int     rect_y,                             \n"
+"                          int     x,                                  \n"
+"                          int     y)                                  \n"
+"{                                                                     \n"
+"    int ix = x - rect_x;                                              \n"
+"    int iy = y - rect_y;                                              \n"
+"                                                                      \n"
+"    ix = CLAMP(ix, 0, rect_width-1);                                  \n"
+"    iy = CLAMP(iy, 0, rect_height-1);                                 \n"
+"                                                                      \n"
+"    return in_buf[iy * rect_width + ix];                              \n"
+"}                                                                     \n"
+"                                                                      \n"
+"__kernel void motion_blur_CL(const __global float4 *src_buf,          \n"
+"                             int     src_width,                       \n"
+"                             int     src_height,                      \n"
+"                             int     src_x,                           \n"
+"                             int     src_y,                           \n"
+"                             __global float4 *dst_buf,                \n"
+"                             int     dst_x,                           \n"
+"                             int     dst_y,                           \n"
+"                             int     num_steps,                       \n"
+"                             float   offset_x,                        \n"
+"                             float   offset_y)                        \n"
+"{                                                                     \n"
+"    int gidx = get_global_id(0);                                      \n"
+"    int gidy = get_global_id(1);                                      \n"
+"                                                                      \n"
+"    float4 sum = 0.0f;                                                \n"
+"    int px = gidx + dst_x;                                            \n"
+"    int py = gidy + dst_y;                                            \n"
+"                                                                      \n"
+"    for(int step = 0; step < num_steps; ++step)                       \n"
+"    {                                                                 \n"
+"        float t = num_steps == 1 ? 0.0f :                             \n"
+"            step / (float)(num_steps - 1) - 0.5f;                     \n"
+"                                                                      \n"
+"        float xx = px + t * offset_x;                                 \n"
+"        float yy = py + t * offset_y;                                 \n"
+"                                                                      \n"
+"        int   ix = (int)floor(xx);                                    \n"
+"        int   iy = (int)floor(yy);                                    \n"
+"                                                                      \n"
+"        float dx = xx - floor(xx);                                    \n"
+"        float dy = yy - floor(yy);                                    \n"
+"                                                                      \n"
+"        float4 mixy0,mixy1,pix0,pix1,pix2,pix3;                       \n"
+"                                                                      \n"
+"        pix0 = get_pixel_color_CL(src_buf, src_width,                 \n"
+"            src_height, src_x, src_y, ix,   iy);                      \n"
+"        pix1 = get_pixel_color_CL(src_buf, src_width,                 \n"
+"            src_height, src_x, src_y, ix+1, iy);                      \n"
+"        pix2 = get_pixel_color_CL(src_buf, src_width,                 \n"
+"            src_height, src_x, src_y, ix,   iy+1);                    \n"
+"        pix3 = get_pixel_color_CL(src_buf, src_width,                 \n"
+"            src_height, src_x, src_y, ix+1, iy+1);                    \n"
+"                                                                      \n"
+"        mixy0 = dy * (pix2 - pix0) + pix0;                            \n"
+"        mixy1 = dy * (pix3 - pix1) + pix1;                            \n"
+"                                                                      \n"
+"        sum  += dx * (mixy1 - mixy0) + mixy0;                         \n"
+"    }                                                                 \n"
+"                                                                      \n"
+"    dst_buf[gidy * get_global_size(0) + gidx] =                       \n"
+"        sum / num_steps;                                              \n"
+"}                                                                     \n";
+
+static gegl_cl_run_data *cl_data = NULL;
+
+static cl_int
+cl_motion_blur (cl_mem                in_tex,
+                cl_mem                out_tex,
+                size_t                global_worksize,
+                const GeglRectangle  *roi,
+                const GeglRectangle  *src_rect,
+                gint                  num_steps,
+                gfloat                offset_x,
+                gfloat                offset_y)
+{
+  cl_int cl_err = 0;
+  size_t global_ws[2];
+
+  if (!cl_data)
+  {
+    const char *kernel_name[] = {"motion_blur_CL", NULL};
+    cl_data = gegl_cl_compile_and_build (kernel_source, kernel_name);
+  }
+
+  if (!cl_data) return 1;
+
+  global_ws[0] = roi->width;
+  global_ws[1] = roi->height;
+
+  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0],  0, sizeof(cl_mem),   (void*)&in_tex);
+  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0],  1, sizeof(cl_int),   (void*)&src_rect->width);
+  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0],  2, sizeof(cl_int),   (void*)&src_rect->height);
+  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0],  3, sizeof(cl_int),   (void*)&src_rect->x);
+  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0],  4, sizeof(cl_int),   (void*)&src_rect->y);
+  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0],  5, sizeof(cl_mem),   (void*)&out_tex);
+  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0],  6, sizeof(cl_int),   (void*)&roi->x);
+  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0],  7, sizeof(cl_int),   (void*)&roi->y);
+  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0],  8, sizeof(cl_int),   (void*)&num_steps);
+  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0],  9, sizeof(cl_float), (void*)&offset_x);
+  cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 10, sizeof(cl_float), (void*)&offset_y);
+  if (cl_err != CL_SUCCESS) return cl_err;
+
+  cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
+                                       cl_data->kernel[0], 2,
+                                       NULL, global_ws, NULL,
+                                       0, NULL, NULL);
+  if (cl_err != CL_SUCCESS) return cl_err;
+
+  return cl_err;
+}
+
+static gboolean
+cl_process (GeglOperation       *operation,
+            GeglBuffer          *input,
+            GeglBuffer          *output,
+            const GeglRectangle *result,
+            const GeglRectangle *src_rect)
+{
+  const Babl *in_format  = gegl_operation_get_format (operation, "input");
+  const Babl *out_format = gegl_operation_get_format (operation, "output");
+  gint err;
+  gint j;
+  cl_int cl_err;
+
+  GeglOperationAreaFilter *op_area = GEGL_OPERATION_AREA_FILTER (operation);
+  GeglChantO *o = GEGL_CHANT_PROPERTIES (operation);
+
+  gdouble theta = o->angle * G_PI / 180.0;
+  gfloat  offset_x = (gfloat)(o->length * cos(theta));
+  gfloat  offset_y = (gfloat)(o->length * sin(theta));
+  gint num_steps = (gint)ceil(o->length) + 1;
+
+  GeglBufferClIterator *i = gegl_buffer_cl_iterator_new (output,   result, out_format, GEGL_CL_BUFFER_WRITE, GEGL_ABYSS_NONE);
+                gint read = gegl_buffer_cl_iterator_add_2 (i, input, result, in_format,  GEGL_CL_BUFFER_READ,
+                                                           op_area->left, op_area->right, op_area->top, op_area->bottom, GEGL_ABYSS_NONE);
+  while (gegl_buffer_cl_iterator_next (i, &err))
+  {
+    if (err) return FALSE;
+    for (j=0; j < i->n; j++)
+    {
+      cl_err = cl_motion_blur(i->tex[read][j], i->tex[0][j], i->size[0][j], &i->roi[0][j], &i->roi[read][j], num_steps, offset_x, offset_y);
+      if (cl_err != CL_SUCCESS)
+      {
+        g_warning("[OpenCL] Error in gegl:motion-blur: %s", gegl_cl_errstring(cl_err));
+        return FALSE;
+      }
+    }
+  }
+  return TRUE;
+}
+
 static inline gfloat*
 get_pixel_color(gfloat* in_buf,
                 const GeglRectangle* rect,
@@ -97,6 +266,10 @@ process (GeglOperation       *operation,
   src_rect.width += op_area->left + op_area->right;
   src_rect.height += op_area->top + op_area->bottom;
 
+  if (gegl_cl_is_accelerated ())
+    if (cl_process (operation, input, output, roi, &src_rect))
+      return TRUE;
+
   in_buf = g_new (gfloat, src_rect.width * src_rect.height * 4);
   out_buf = g_new0 (gfloat, roi->width * roi->height * 4);
   out_pixel = out_buf;
@@ -168,6 +341,8 @@ gegl_chant_class_init (GeglChantClass *klass)
   filter_class->process = process;
   operation_class->prepare = prepare;
 
+  operation_class->opencl_support = TRUE;
+
   gegl_operation_class_set_keys (operation_class,
     "name"       , "gegl:motion-blur",
     "categories" , "blur",



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]