[gegl] motion-blur-linear: Add CL implementation
- From: Øyvind Kolås <ok src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gegl] motion-blur-linear: Add CL implementation
- Date: Mon, 22 May 2017 21:17:57 +0000 (UTC)
commit ad2fa300e6b103361f416e70f66c9756a460dc37
Author: Anton Gorenko <anton streamcomputing eu>
Date: Mon May 15 16:13:45 2017 +0600
motion-blur-linear: Add CL implementation
opencl/motion-blur-circular.cl | 113 ++++++++++++++++++++++++
opencl/motion-blur-circular.cl.h | 115 +++++++++++++++++++++++++
operations/common/motion-blur-circular.c | 138 ++++++++++++++++++++++++++++--
3 files changed, 360 insertions(+), 6 deletions(-)
---
diff --git a/opencl/motion-blur-circular.cl b/opencl/motion-blur-circular.cl
new file mode 100644
index 0000000..933050d
--- /dev/null
+++ b/opencl/motion-blur-circular.cl
@@ -0,0 +1,113 @@
+float4
+get_pixel_color (__global const float4 *in,
+ const int rect_width,
+ const int rect_height,
+ const int rect_x,
+ const int rect_y,
+ const int x,
+ const int y)
+{
+ const int ix = clamp (x - rect_x, 0, rect_width - 1);
+ const int iy = clamp (y - rect_y, 0, rect_height - 1);
+ return in[iy * rect_width + ix];
+}
+
+float4
+bilinear_mix (const float4 p00,
+ const float4 p01,
+ const float4 p10,
+ const float4 p11,
+ const float dx,
+ const float dy)
+{
+ return mix (mix (p00, p10, dy), mix (p01, p11, dy), dx);
+}
+
+float
+compute_phi (float xr,
+ float yr)
+{
+ return atan2 (yr, xr);
+}
+
+#define NOMINAL_NUM_IT 100
+
+kernel void
+cl_motion_blur_circular (__global const float4 *in,
+ __global float4 *out,
+ const int src_width,
+ const int src_height,
+ const int src_x,
+ const int src_y,
+ const int dst_x,
+ const int dst_y,
+ const int whole_region_width,
+ const int whole_region_height,
+ const int whole_region_x,
+ const int whole_region_y,
+ const float angle,
+ const float center_x,
+ const float center_y)
+{
+ const int gidx = get_global_id(0);
+ const int gidy = get_global_id(1);
+
+ const int x = gidx + dst_x;
+ const int y = gidy + dst_y;
+
+ const float xr = x - center_x;
+ const float yr = y - center_y;
+ const float radius = hypot (xr, yr);
+
+ /* This is not the "real" length, a bit shorter */
+ const float arc_length = radius * angle * M_SQRT2_F;
+
+ /* ensure quality with small angles */
+ int n = max ((int)ceil (arc_length), 3);
+
+ /* performance concern */
+ if (n > NOMINAL_NUM_IT)
+ n = NOMINAL_NUM_IT + (int) sqrt ((float)(n - NOMINAL_NUM_IT));
+
+ const float phi_base = compute_phi (xr, yr);
+ const float phi_start = phi_base + angle / 2.0f;
+ const float phi_step = angle / (float)n;
+
+ float4 sum = (float4)0.0f;
+ int count = 0;
+
+ /* Iterate over the arc */
+ for (int i = 0; i < n; i++)
+ {
+ float s_val, c_val;
+ s_val = sincos (phi_start - i * phi_step, &c_val);
+
+ const float fx = center_x + radius * c_val;
+ const float fy = center_y + radius * s_val;
+ const int ix = (int)floor (fx);
+ const int iy = (int)floor (fy);
+
+ if (ix >= whole_region_x && ix < whole_region_x + whole_region_width &&
+ iy >= whole_region_y && iy < whole_region_y + whole_region_height)
+ {
+ /* do bilinear interpolation to get a nice smooth result */
+ const float4 p00 = get_pixel_color (
+ in, src_width, src_height, src_x, src_y, ix, iy);
+ const float4 p01 = get_pixel_color (
+ in, src_width, src_height, src_x, src_y, ix + 1, iy);
+ const float4 p10 = get_pixel_color (
+ in, src_width, src_height, src_x, src_y, ix, iy + 1);
+ const float4 p11 = get_pixel_color (
+ in, src_width, src_height, src_x, src_y, ix + 1, iy + 1);
+ sum += bilinear_mix(p00, p01, p10, p11, fx - ix, fy - iy);
+ count++;
+ }
+ }
+
+ float4 out_v;
+ if (count == 0)
+ out_v = get_pixel_color (in, src_width, src_height, src_x, src_y, x, y);
+ else
+ out_v = sum / (float) count;
+ out[gidy * get_global_size(0) + gidx] = out_v;
+}
diff --git a/opencl/motion-blur-circular.cl.h b/opencl/motion-blur-circular.cl.h
new file mode 100644
index 0000000..a109526
--- /dev/null
+++ b/opencl/motion-blur-circular.cl.h
@@ -0,0 +1,115 @@
+static const char* motion_blur_circular_cl_source =
+"float4 \n"
+"get_pixel_color (__global const float4 *in, \n"
+" const int rect_width, \n"
+" const int rect_height, \n"
+" const int rect_x, \n"
+" const int rect_y, \n"
+" const int x, \n"
+" const int y) \n"
+"{ \n"
+" const int ix = clamp (x - rect_x, 0, rect_width - 1); \n"
+" const int iy = clamp (y - rect_y, 0, rect_height - 1); \n"
+" return in[iy * rect_width + ix]; \n"
+"} \n"
+" \n"
+"float4 \n"
+"bilinear_mix (const float4 p00, \n"
+" const float4 p01, \n"
+" const float4 p10, \n"
+" const float4 p11, \n"
+" const float dx, \n"
+" const float dy) \n"
+"{ \n"
+" return mix (mix (p00, p10, dy), mix (p01, p11, dy), dx); \n"
+"} \n"
+" \n"
+"float \n"
+"compute_phi (float xr, \n"
+" float yr) \n"
+"{ \n"
+" return atan2 (yr, xr); \n"
+"} \n"
+" \n"
+"#define NOMINAL_NUM_IT 100 \n"
+" \n"
+"kernel void \n"
+"cl_motion_blur_circular (__global const float4 *in, \n"
+" __global float4 *out, \n"
+" const int src_width, \n"
+" const int src_height, \n"
+" const int src_x, \n"
+" const int src_y, \n"
+" const int dst_x, \n"
+" const int dst_y, \n"
+" const int whole_region_width, \n"
+" const int whole_region_height, \n"
+" const int whole_region_x, \n"
+" const int whole_region_y, \n"
+" const float angle, \n"
+" const float center_x, \n"
+" const float center_y) \n"
+"{ \n"
+" const int gidx = get_global_id(0); \n"
+" const int gidy = get_global_id(1); \n"
+" \n"
+" const int x = gidx + dst_x; \n"
+" const int y = gidy + dst_y; \n"
+" \n"
+" const float xr = x - center_x; \n"
+" const float yr = y - center_y; \n"
+" const float radius = hypot (xr, yr); \n"
+" \n"
+" /* This is not the \042real\042 length, a bit shorter */ \n"
+" const float arc_length = radius * angle * M_SQRT2_F; \n"
+" \n"
+" /* ensure quality with small angles */ \n"
+" int n = max ((int)ceil (arc_length), 3); \n"
+" \n"
+" /* performance concern */ \n"
+" if (n > NOMINAL_NUM_IT) \n"
+" n = NOMINAL_NUM_IT + (int) sqrt ((float)(n - NOMINAL_NUM_IT)); \n"
+" \n"
+" const float phi_base = compute_phi (xr, yr); \n"
+" const float phi_start = phi_base + angle / 2.0f; \n"
+" const float phi_step = angle / (float)n; \n"
+" \n"
+" float4 sum = (float4)0.0f; \n"
+" int count = 0; \n"
+" \n"
+" /* Iterate over the arc */ \n"
+" for (int i = 0; i < n; i++) \n"
+" { \n"
+" float s_val, c_val; \n"
+" s_val = sincos (phi_start - i * phi_step, &c_val); \n"
+" \n"
+" const float fx = center_x + radius * c_val; \n"
+" const float fy = center_y + radius * s_val; \n"
+" const int ix = (int)floor (fx); \n"
+" const int iy = (int)floor (fy); \n"
+" \n"
+" if (ix >= whole_region_x && ix < whole_region_x + whole_region_width && \n"
+" iy >= whole_region_y && iy < whole_region_y + whole_region_height) \n"
+" { \n"
+" /* do bilinear interpolation to get a nice smooth result */ \n"
+" const float4 p00 = get_pixel_color ( \n"
+" in, src_width, src_height, src_x, src_y, ix, iy); \n"
+" const float4 p01 = get_pixel_color ( \n"
+" in, src_width, src_height, src_x, src_y, ix + 1, iy); \n"
+" const float4 p10 = get_pixel_color ( \n"
+" in, src_width, src_height, src_x, src_y, ix, iy + 1); \n"
+" const float4 p11 = get_pixel_color ( \n"
+" in, src_width, src_height, src_x, src_y, ix + 1, iy + 1); \n"
+" sum += bilinear_mix(p00, p01, p10, p11, fx - ix, fy - iy); \n"
+" count++; \n"
+" } \n"
+" } \n"
+" \n"
+" float4 out_v; \n"
+" if (count == 0) \n"
+" out_v = get_pixel_color (in, src_width, src_height, src_x, src_y, x, y); \n"
+" else \n"
+" out_v = sum / (float) count; \n"
+" out[gidy * get_global_size(0) + gidx] = out_v; \n"
+"} \n"
+;
diff --git a/operations/common/motion-blur-circular.c b/operations/common/motion-blur-circular.c
index b94a085..e8df6a6 100644
--- a/operations/common/motion-blur-circular.c
+++ b/operations/common/motion-blur-circular.c
@@ -82,7 +82,7 @@ prepare (GeglOperation *operation)
if (whole_region != NULL)
{
- gdouble center_x = gegl_coordinate_relative_to_pixel (o->center_x,
+ gdouble center_x = gegl_coordinate_relative_to_pixel (o->center_x,
whole_region->width);
gdouble center_y = gegl_coordinate_relative_to_pixel (o->center_y,
whole_region->height);
@@ -151,6 +151,128 @@ compute_phi (gdouble xr,
return phi;
}
+#include "opencl/gegl-cl.h"
+#include "gegl-buffer-cl-iterator.h"
+
+#include "opencl/motion-blur-circular.cl.h"
+
+static GeglClRunData *cl_data = NULL;
+
+static gboolean
+cl_motion_blur_circular (cl_mem in,
+ cl_mem out,
+ const GeglRectangle *src_rect,
+ const GeglRectangle *dst_rect,
+ const GeglRectangle *whole_region,
+ gdouble angle,
+ gdouble center_x,
+ gdouble center_y)
+{
+ cl_int cl_err = 0;
+ size_t global_ws[2];
+ cl_float a, cx, cy;
+
+ if (!cl_data)
+ {
+ const char *kernel_name[] = { "cl_motion_blur_circular", NULL };
+ cl_data = gegl_cl_compile_and_build (motion_blur_circular_cl_source,
+ kernel_name);
+ }
+
+ if (!cl_data)
+ return TRUE;
+
+ a = (cl_float)angle;
+ cx = (cl_float)center_x;
+ cy = (cl_float)center_y;
+ cl_err = gegl_cl_set_kernel_args (cl_data->kernel[0],
+ sizeof(cl_mem), &in,
+ sizeof(cl_mem), &out,
+ sizeof(cl_int), &src_rect->width,
+ sizeof(cl_int), &src_rect->height,
+ sizeof(cl_int), &src_rect->x,
+ sizeof(cl_int), &src_rect->y,
+ sizeof(cl_int), &dst_rect->x,
+ sizeof(cl_int), &dst_rect->y,
+ sizeof(cl_int), &whole_region->width,
+ sizeof(cl_int), &whole_region->height,
+ sizeof(cl_int), &whole_region->x,
+ sizeof(cl_int), &whole_region->y,
+ sizeof(cl_float), &a,
+ sizeof(cl_float), &cx,
+ sizeof(cl_float), &cy,
+ NULL);
+ CL_CHECK;
+
+ global_ws[0] = dst_rect->width;
+ global_ws[1] = dst_rect->height;
+ cl_err = gegl_clEnqueueNDRangeKernel (gegl_cl_get_command_queue (),
+ cl_data->kernel[0], 2,
+ NULL, global_ws, NULL,
+ 0, NULL, NULL);
+ CL_CHECK;
+
+ return FALSE;
+
+error:
+ return TRUE;
+}
+
+static gboolean
+cl_process (GeglOperation *operation,
+ GeglBuffer *input,
+ GeglBuffer *output,
+ const GeglRectangle *result,
+ const GeglRectangle *whole_region,
+ gdouble angle,
+ gdouble center_x,
+ gdouble center_y)
+{
+ GeglOperationAreaFilter *op_area = GEGL_OPERATION_AREA_FILTER (operation);
+
+ GeglBufferClIterator *i;
+ const Babl *in_format = gegl_operation_get_format (operation, "input");
+ const Babl *out_format = gegl_operation_get_format (operation, "output");
+ gint err;
+ gint read;
+
+ i = gegl_buffer_cl_iterator_new (output,
+ result,
+ out_format,
+ GEGL_CL_BUFFER_WRITE);
+
+ read = gegl_buffer_cl_iterator_add_2 (i,
+ input,
+ result,
+ in_format,
+ GEGL_CL_BUFFER_READ,
+ op_area->left,
+ op_area->right,
+ op_area->top,
+ op_area->bottom,
+ GEGL_ABYSS_NONE);
+
+ while (gegl_buffer_cl_iterator_next (i, &err))
+ {
+ if (err)
+ return FALSE;
+
+ err = cl_motion_blur_circular (i->tex[read],
+ i->tex[0],
+ &i->roi[read],
+ &i->roi[0],
+ whole_region,
+ angle,
+ center_x,
+ center_y);
+
+ if (err)
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
static gboolean
process (GeglOperation *operation,
GeglBuffer *input,
@@ -174,6 +296,13 @@ process (GeglOperation *operation,
center_y = gegl_coordinate_relative_to_pixel (
o->center_y, whole_region->height);
+ angle = o->angle * G_PI / 180.0;
+ while (angle < 0.0)
+ angle += 2 * G_PI;
+
+ if (gegl_operation_use_opencl (operation))
+ if (cl_process (operation, input, output, roi, whole_region, angle, center_x, center_y))
+ return TRUE;
src_rect = *roi;
src_rect.x -= op_area->left;
@@ -188,11 +317,6 @@ process (GeglOperation *operation,
gegl_buffer_get (input, &src_rect, 1.0, babl_format ("RaGaBaA float"),
in_buf, GEGL_AUTO_ROWSTRIDE, GEGL_ABYSS_NONE);
- angle = o->angle * G_PI / 180.0;
-
- while (angle < 0.0)
- angle += 2 * G_PI;
-
for (y = roi->y; y < roi->height + roi->y; ++y)
{
for (x = roi->x; x < roi->width + roi->x; ++x)
@@ -290,6 +414,8 @@ gegl_op_class_init (GeglOpClass *klass)
filter_class = GEGL_OPERATION_FILTER_CLASS (klass);
operation_class->prepare = prepare;
+ operation_class->opencl_support = TRUE;
+
filter_class->process = process;
gegl_operation_class_set_keys (operation_class,
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]