[gegl] motion-blur-linear: Add CL implementation

From: Øyvind Kolås <ok src gnome org>
To: commits-list gnome org
Cc:
Subject: [gegl] motion-blur-linear: Add CL implementation
Date: Mon, 22 May 2017 21:17:57 +0000 (UTC)
commit ad2fa300e6b103361f416e70f66c9756a460dc37
Author: Anton Gorenko <anton streamcomputing eu>
Date:   Mon May 15 16:13:45 2017 +0600

    motion-blur-linear: Add CL implementation

 opencl/motion-blur-circular.cl           |  113 ++++++++++++++++++++++++
 opencl/motion-blur-circular.cl.h         |  115 +++++++++++++++++++++++++
 operations/common/motion-blur-circular.c |  138 ++++++++++++++++++++++++++++--
 3 files changed, 360 insertions(+), 6 deletions(-)
---
diff --git a/opencl/motion-blur-circular.cl b/opencl/motion-blur-circular.cl
new file mode 100644
index 0000000..933050d
--- /dev/null
+++ b/opencl/motion-blur-circular.cl
@@ -0,0 +1,113 @@
+float4
+get_pixel_color (__global const float4 *in,
+                          const int     rect_width,
+                          const int     rect_height,
+                          const int     rect_x,
+                          const int     rect_y,
+                          const int     x,
+                          const int     y)
+{
+  const int ix = clamp (x - rect_x, 0, rect_width - 1);
+  const int iy = clamp (y - rect_y, 0, rect_height - 1);
+  return in[iy * rect_width + ix];
+}
+
+float4
+bilinear_mix (const float4 p00,
+              const float4 p01,
+              const float4 p10,
+              const float4 p11,
+              const float dx,
+              const float dy)
+{
+  return mix (mix (p00, p10, dy), mix (p01, p11, dy), dx);
+}
+
+float
+compute_phi (float xr,
+             float yr)
+{
+  return atan2 (yr, xr);
+}
+
+#define NOMINAL_NUM_IT 100
+
+kernel void
+cl_motion_blur_circular (__global const float4 *in,
+                         __global       float4 *out,
+                                  const int     src_width,
+                                  const int     src_height,
+                                  const int     src_x,
+                                  const int     src_y,
+                                  const int     dst_x,
+                                  const int     dst_y,
+                                  const int     whole_region_width,
+                                  const int     whole_region_height,
+                                  const int     whole_region_x,
+                                  const int     whole_region_y,
+                                  const float   angle,
+                                  const float   center_x,
+                                  const float   center_y)
+{
+  const int gidx = get_global_id(0);
+  const int gidy = get_global_id(1);
+
+  const int x = gidx + dst_x;
+  const int y = gidy + dst_y;
+
+  const float xr = x - center_x;
+  const float yr = y - center_y;
+  const float radius = hypot (xr, yr);
+
+  /* This is not the "real" length, a bit shorter */
+  const float arc_length = radius * angle * M_SQRT2_F;
+
+  /* ensure quality with small angles */
+  int n = max ((int)ceil (arc_length), 3);
+
+  /* performance concern */
+  if (n > NOMINAL_NUM_IT)
+    n = NOMINAL_NUM_IT + (int) sqrt ((float)(n - NOMINAL_NUM_IT));
+
+  const float phi_base = compute_phi (xr, yr);
+  const float phi_start = phi_base + angle / 2.0f;
+  const float phi_step = angle / (float)n;
+
+  float4 sum = (float4)0.0f;
+  int count = 0;
+
+  /* Iterate over the arc */
+  for (int i = 0; i < n; i++)
+    {
+      float s_val, c_val;
+      s_val = sincos (phi_start - i * phi_step, &c_val);
+
+      const float fx = center_x + radius * c_val;
+      const float fy = center_y + radius * s_val;
+      const int ix = (int)floor (fx);
+      const int iy = (int)floor (fy);
+
+      if (ix >= whole_region_x && ix < whole_region_x + whole_region_width &&
+          iy >= whole_region_y && iy < whole_region_y + whole_region_height)
+        {
+          /* do bilinear interpolation to get a nice smooth result */
+          const float4 p00 = get_pixel_color (
+            in, src_width, src_height, src_x, src_y, ix, iy);
+          const float4 p01 = get_pixel_color (
+            in, src_width, src_height, src_x, src_y, ix + 1, iy);
+          const float4 p10 = get_pixel_color (
+            in, src_width, src_height, src_x, src_y, ix, iy + 1);
+          const float4 p11 = get_pixel_color (
+            in, src_width, src_height, src_x, src_y, ix + 1, iy + 1);
+          sum += bilinear_mix(p00, p01, p10, p11, fx - ix, fy - iy);
+          count++;
+        }
+    }
+
+  float4 out_v;
+  if (count == 0)
+    out_v = get_pixel_color (in, src_width, src_height, src_x, src_y, x, y);
+  else
+    out_v = sum / (float) count;
+  out[gidy * get_global_size(0) + gidx] = out_v;
+}
diff --git a/opencl/motion-blur-circular.cl.h b/opencl/motion-blur-circular.cl.h
new file mode 100644
index 0000000..a109526
--- /dev/null
+++ b/opencl/motion-blur-circular.cl.h
@@ -0,0 +1,115 @@
+static const char* motion_blur_circular_cl_source =
+"float4                                                                        \n"
+"get_pixel_color (__global const float4 *in,                                   \n"
+"                          const int     rect_width,                           \n"
+"                          const int     rect_height,                          \n"
+"                          const int     rect_x,                               \n"
+"                          const int     rect_y,                               \n"
+"                          const int     x,                                    \n"
+"                          const int     y)                                    \n"
+"{                                                                             \n"
+"  const int ix = clamp (x - rect_x, 0, rect_width - 1);                       \n"
+"  const int iy = clamp (y - rect_y, 0, rect_height - 1);                      \n"
+"  return in[iy * rect_width + ix];                                            \n"
+"}                                                                             \n"
+"                                                                              \n"
+"float4                                                                        \n"
+"bilinear_mix (const float4 p00,                                               \n"
+"              const float4 p01,                                               \n"
+"              const float4 p10,                                               \n"
+"              const float4 p11,                                               \n"
+"              const float dx,                                                 \n"
+"              const float dy)                                                 \n"
+"{                                                                             \n"
+"  return mix (mix (p00, p10, dy), mix (p01, p11, dy), dx);                    \n"
+"}                                                                             \n"
+"                                                                              \n"
+"float                                                                         \n"
+"compute_phi (float xr,                                                        \n"
+"             float yr)                                                        \n"
+"{                                                                             \n"
+"  return atan2 (yr, xr);                                                      \n"
+"}                                                                             \n"
+"                                                                              \n"
+"#define NOMINAL_NUM_IT 100                                                    \n"
+"                                                                              \n"
+"kernel void                                                                   \n"
+"cl_motion_blur_circular (__global const float4 *in,                           \n"
+"                         __global       float4 *out,                          \n"
+"                                  const int     src_width,                    \n"
+"                                  const int     src_height,                   \n"
+"                                  const int     src_x,                        \n"
+"                                  const int     src_y,                        \n"
+"                                  const int     dst_x,                        \n"
+"                                  const int     dst_y,                        \n"
+"                                  const int     whole_region_width,           \n"
+"                                  const int     whole_region_height,          \n"
+"                                  const int     whole_region_x,               \n"
+"                                  const int     whole_region_y,               \n"
+"                                  const float   angle,                        \n"
+"                                  const float   center_x,                     \n"
+"                                  const float   center_y)                     \n"
+"{                                                                             \n"
+"  const int gidx = get_global_id(0);                                          \n"
+"  const int gidy = get_global_id(1);                                          \n"
+"                                                                              \n"
+"  const int x = gidx + dst_x;                                                 \n"
+"  const int y = gidy + dst_y;                                                 \n"
+"                                                                              \n"
+"  const float xr = x - center_x;                                              \n"
+"  const float yr = y - center_y;                                              \n"
+"  const float radius = hypot (xr, yr);                                        \n"
+"                                                                              \n"
+"  /* This is not the \042real\042 length, a bit shorter */                    \n"
+"  const float arc_length = radius * angle * M_SQRT2_F;                        \n"
+"                                                                              \n"
+"  /* ensure quality with small angles */                                      \n"
+"  int n = max ((int)ceil (arc_length), 3);                                    \n"
+"                                                                              \n"
+"  /* performance concern */                                                   \n"
+"  if (n > NOMINAL_NUM_IT)                                                     \n"
+"    n = NOMINAL_NUM_IT + (int) sqrt ((float)(n - NOMINAL_NUM_IT));            \n"
+"                                                                              \n"
+"  const float phi_base = compute_phi (xr, yr);                                \n"
+"  const float phi_start = phi_base + angle / 2.0f;                            \n"
+"  const float phi_step = angle / (float)n;                                    \n"
+"                                                                              \n"
+"  float4 sum = (float4)0.0f;                                                  \n"
+"  int count = 0;                                                              \n"
+"                                                                              \n"
+"  /* Iterate over the arc */                                                  \n"
+"  for (int i = 0; i < n; i++)                                                 \n"
+"    {                                                                         \n"
+"      float s_val, c_val;                                                     \n"
+"      s_val = sincos (phi_start - i * phi_step, &c_val);                      \n"
+"                                                                              \n"
+"      const float fx = center_x + radius * c_val;                             \n"
+"      const float fy = center_y + radius * s_val;                             \n"
+"      const int ix = (int)floor (fx);                                         \n"
+"      const int iy = (int)floor (fy);                                         \n"
+"                                                                              \n"
+"      if (ix >= whole_region_x && ix < whole_region_x + whole_region_width && \n"
+"          iy >= whole_region_y && iy < whole_region_y + whole_region_height)  \n"
+"        {                                                                     \n"
+"          /* do bilinear interpolation to get a nice smooth result */         \n"
+"          const float4 p00 = get_pixel_color (                                \n"
+"            in, src_width, src_height, src_x, src_y, ix, iy);                 \n"
+"          const float4 p01 = get_pixel_color (                                \n"
+"            in, src_width, src_height, src_x, src_y, ix + 1, iy);             \n"
+"          const float4 p10 = get_pixel_color (                                \n"
+"            in, src_width, src_height, src_x, src_y, ix, iy + 1);             \n"
+"          const float4 p11 = get_pixel_color (                                \n"
+"            in, src_width, src_height, src_x, src_y, ix + 1, iy + 1);         \n"
+"          sum += bilinear_mix(p00, p01, p10, p11, fx - ix, fy - iy);          \n"
+"          count++;                                                            \n"
+"        }                                                                     \n"
+"    }                                                                         \n"
+"                                                                              \n"
+"  float4 out_v;                                                               \n"
+"  if (count == 0)                                                             \n"
+"    out_v = get_pixel_color (in, src_width, src_height, src_x, src_y, x, y);  \n"
+"  else                                                                        \n"
+"    out_v = sum / (float) count;                                              \n"
+"  out[gidy * get_global_size(0) + gidx] = out_v;                              \n"
+"}                                                                             \n"
+;
diff --git a/operations/common/motion-blur-circular.c b/operations/common/motion-blur-circular.c
index b94a085..e8df6a6 100644
--- a/operations/common/motion-blur-circular.c
+++ b/operations/common/motion-blur-circular.c
@@ -82,7 +82,7 @@ prepare (GeglOperation *operation)
 
   if (whole_region != NULL)
     {
-      gdouble center_x = gegl_coordinate_relative_to_pixel (o->center_x, 
+      gdouble center_x = gegl_coordinate_relative_to_pixel (o->center_x,
                                                             whole_region->width);
       gdouble center_y = gegl_coordinate_relative_to_pixel (o->center_y,
                                                             whole_region->height);
@@ -151,6 +151,128 @@ compute_phi (gdouble xr,
   return phi;
 }
 
+#include "opencl/gegl-cl.h"
+#include "gegl-buffer-cl-iterator.h"
+
+#include "opencl/motion-blur-circular.cl.h"
+
+static GeglClRunData *cl_data = NULL;
+
+static gboolean
+cl_motion_blur_circular (cl_mem               in,
+                         cl_mem               out,
+                         const GeglRectangle *src_rect,
+                         const GeglRectangle *dst_rect,
+                         const GeglRectangle *whole_region,
+                         gdouble              angle,
+                         gdouble              center_x,
+                         gdouble              center_y)
+{
+  cl_int cl_err = 0;
+  size_t global_ws[2];
+  cl_float a, cx, cy;
+
+  if (!cl_data)
+    {
+      const char *kernel_name[] = { "cl_motion_blur_circular", NULL };
+      cl_data = gegl_cl_compile_and_build (motion_blur_circular_cl_source,
+                                           kernel_name);
+    }
+
+  if (!cl_data)
+    return TRUE;
+
+  a  = (cl_float)angle;
+  cx = (cl_float)center_x;
+  cy = (cl_float)center_y;
+  cl_err = gegl_cl_set_kernel_args (cl_data->kernel[0],
+                                    sizeof(cl_mem),   &in,
+                                    sizeof(cl_mem),   &out,
+                                    sizeof(cl_int),   &src_rect->width,
+                                    sizeof(cl_int),   &src_rect->height,
+                                    sizeof(cl_int),   &src_rect->x,
+                                    sizeof(cl_int),   &src_rect->y,
+                                    sizeof(cl_int),   &dst_rect->x,
+                                    sizeof(cl_int),   &dst_rect->y,
+                                    sizeof(cl_int),   &whole_region->width,
+                                    sizeof(cl_int),   &whole_region->height,
+                                    sizeof(cl_int),   &whole_region->x,
+                                    sizeof(cl_int),   &whole_region->y,
+                                    sizeof(cl_float), &a,
+                                    sizeof(cl_float), &cx,
+                                    sizeof(cl_float), &cy,
+                                    NULL);
+  CL_CHECK;
+
+  global_ws[0] = dst_rect->width;
+  global_ws[1] = dst_rect->height;
+  cl_err = gegl_clEnqueueNDRangeKernel (gegl_cl_get_command_queue (),
+                                        cl_data->kernel[0], 2,
+                                        NULL, global_ws, NULL,
+                                        0, NULL, NULL);
+  CL_CHECK;
+
+  return FALSE;
+
+error:
+  return TRUE;
+}
+
+static gboolean
+cl_process (GeglOperation       *operation,
+            GeglBuffer          *input,
+            GeglBuffer          *output,
+            const GeglRectangle *result,
+            const GeglRectangle *whole_region,
+            gdouble              angle,
+            gdouble              center_x,
+            gdouble              center_y)
+{
+  GeglOperationAreaFilter *op_area = GEGL_OPERATION_AREA_FILTER (operation);
+
+  GeglBufferClIterator *i;
+  const Babl *in_format  = gegl_operation_get_format (operation, "input");
+  const Babl *out_format = gegl_operation_get_format (operation, "output");
+  gint        err;
+  gint        read;
+
+  i = gegl_buffer_cl_iterator_new (output,
+                                   result,
+                                   out_format,
+                                   GEGL_CL_BUFFER_WRITE);
+
+  read = gegl_buffer_cl_iterator_add_2 (i,
+                                        input,
+                                        result,
+                                        in_format,
+                                        GEGL_CL_BUFFER_READ,
+                                        op_area->left,
+                                        op_area->right,
+                                        op_area->top,
+                                        op_area->bottom,
+                                        GEGL_ABYSS_NONE);
+
+  while (gegl_buffer_cl_iterator_next (i, &err))
+    {
+      if (err)
+        return FALSE;
+
+      err = cl_motion_blur_circular (i->tex[read],
+                                     i->tex[0],
+                                     &i->roi[read],
+                                     &i->roi[0],
+                                     whole_region,
+                                     angle,
+                                     center_x,
+                                     center_y);
+
+      if (err)
+        return FALSE;
+    }
+
+  return TRUE;
+}
+
 static gboolean
 process (GeglOperation       *operation,
          GeglBuffer          *input,
@@ -174,6 +296,13 @@ process (GeglOperation       *operation,
   center_y = gegl_coordinate_relative_to_pixel (
                     o->center_y, whole_region->height);
 
+  angle = o->angle * G_PI / 180.0;
+  while (angle < 0.0)
+    angle += 2 * G_PI;
+
+  if (gegl_operation_use_opencl (operation))
+    if (cl_process (operation, input, output, roi, whole_region, angle, center_x, center_y))
+      return TRUE;
 
   src_rect = *roi;
   src_rect.x -= op_area->left;
@@ -188,11 +317,6 @@ process (GeglOperation       *operation,
   gegl_buffer_get (input, &src_rect, 1.0, babl_format ("RaGaBaA float"),
                    in_buf, GEGL_AUTO_ROWSTRIDE, GEGL_ABYSS_NONE);
 
-  angle = o->angle * G_PI / 180.0;
-
-  while (angle < 0.0)
-    angle += 2 * G_PI;
-
   for (y = roi->y; y < roi->height + roi->y; ++y)
     {
       for (x = roi->x; x < roi->width + roi->x; ++x)
@@ -290,6 +414,8 @@ gegl_op_class_init (GeglOpClass *klass)
   filter_class    = GEGL_OPERATION_FILTER_CLASS (klass);
 
   operation_class->prepare = prepare;
+  operation_class->opencl_support = TRUE;
+
   filter_class->process    = process;
 
   gegl_operation_class_set_keys (operation_class,
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]