[Gegl-developer] [PATCH] Add opencl implementation of operation motion-blur-zoom



Signed-off-by: Yongjia Zhang<yongjia zhang intel com>
---
 opencl/motion-blur-zoom.cl           |  78 ++++++++++++++++++++++++++
 opencl/motion-blur-zoom.cl.h         |  80 ++++++++++++++++++++++++++
 operations/common/motion-blur-zoom.c | 105 +++++++++++++++++++++++++++++++++++
 3 files changed, 263 insertions(+)
 create mode 100644 opencl/motion-blur-zoom.cl
 create mode 100644 opencl/motion-blur-zoom.cl.h

diff --git a/opencl/motion-blur-zoom.cl b/opencl/motion-blur-zoom.cl
new file mode 100644
index 0000000..085c70c
--- /dev/null
+++ b/opencl/motion-blur-zoom.cl
@@ -0,0 +1,78 @@
+#define CLAMP(val,lo,hi) ((val)<(lo)?(lo):((hi)<(val)?(hi):(val)))
+#define SQR(x) ((x)*(x))
+#define MAX_NUM_IT 200
+#define NOMINAL_NUM_IT 100
+
+float4 get_pixel_color(const __global float4 *in_buf,
+                       int rect_width,
+                       int rect_height,
+                       int rect_x,
+                       int rect_y,
+                       int x,
+                       int y)
+{
+    int ix = x - rect_x;
+    int iy = y - rect_y;
+    
+    ix = CLAMP(ix, 0, rect_width-1);
+    iy = CLAMP(iy, 0, rect_height-1);
+
+    return in_buf[iy * rect_width + ix];
+}
+
+
+__kernel void motion_blur_zoom(const __global float4 *src_buf,
+                               __global float4 *dst_buf,
+                               int src_width,
+                               int src_height,
+                               int src_x,
+                               int src_y,
+                               int x,
+                               int y,
+                               float center_x,
+                               float center_y,
+                               float factor)
+{
+    const int gidx = get_global_id(0);
+    const int gidy = get_global_id(1);
+    float dxx,dyy,ix,iy,inv_xy_len;
+    float4 sum = 0.0f;
+    float x_start = x + gidx;
+    float y_start = y + gidy;
+    float x_end = x + (center_x - x - gidx) * factor + gidx;
+    float y_end = y + (center_y - y - gidy) * factor + gidy;
+    int dist = ceil(sqrt(SQR(x_end-x_start)+SQR(y_end-y_start))+1);
+    int xy_len = max(dist, 3);
+    if(xy_len > NOMINAL_NUM_IT)
+       xy_len = min(NOMINAL_NUM_IT + (int)sqrt((float)(xy_len - NOMINAL_NUM_IT)),
+                    MAX_NUM_IT);
+    inv_xy_len = 1.0f / xy_len;
+    dxx = (x_end - x_start) * inv_xy_len;
+    dyy = (y_end - y_start) * inv_xy_len;
+    ix = x_start;
+    iy = y_start;
+  
+    for( int i=0; i<xy_len; ++i )
+    {
+        float dx = ix - floor(ix);
+        float dy = iy - floor(iy);
+        float4 mixy0,mixy1,pix0,pix1,pix2,pix3;
+        pix0 = get_pixel_color(src_buf, src_width, src_height,
+                               src_x, src_y, (int)ix, (int)iy);
+        pix1 = get_pixel_color(src_buf, src_width, src_height,
+                               src_x, src_y, (int)(ix+1.0f), (int)iy);
+        pix2 = get_pixel_color(src_buf, src_width, src_height,
+                               src_x, src_y, (int)ix, (int)(iy+1.0f));
+        pix3 = get_pixel_color(src_buf, src_width, src_height,
+                               src_x, src_y, (int)(ix+1.0f), (int)(iy+1.0f));
+        
+        mixy0 = dy * (pix2 - pix0) + pix0;
+        mixy1 = dy * (pix3 - pix1) + pix1;
+        sum += dx * (mixy1 - mixy0) + mixy0;
+        ix += dxx;
+        iy += dyy;
+    }
+
+    dst_buf[gidy * get_global_size(0) + gidx] = sum * (float4)(inv_xy_len);
+    
+}
diff --git a/opencl/motion-blur-zoom.cl.h b/opencl/motion-blur-zoom.cl.h
new file mode 100644
index 0000000..69b0ada
--- /dev/null
+++ b/opencl/motion-blur-zoom.cl.h
@@ -0,0 +1,80 @@
+static const char* motion_blur_zoom_cl_source =
+"#define CLAMP(val,lo,hi) ((val)<(lo)?(lo):((hi)<(val)?(hi):(val)))                \n"
+"#define SQR(x) ((x)*(x))                                                          \n"
+"#define MAX_NUM_IT 200                                                            \n"
+"#define NOMINAL_NUM_IT 100                                                        \n"
+"                                                                                  \n"
+"float4 get_pixel_color(const __global float4 *in_buf,                             \n"
+"                       int rect_width,                                            \n" 
+"                       int rect_height,                                           \n"
+"                       int rect_x,                                                \n"
+"                       int rect_y,                                                \n"
+"                       int x,                                                     \n"
+"                       int y)                                                     \n"
+"{                                                                                 \n"
+"    int ix = x - rect_x;                                                          \n"
+"    int iy = y - rect_y;                                                          \n"
+"                                                                                  \n"
+"    ix = CLAMP(ix, 0, rect_width-1);                                              \n"
+"    iy = CLAMP(iy, 0, rect_height-1);                                             \n"
+"                                                                                  \n"
+"    return in_buf[iy * rect_width + ix];                                          \n"
+"}                                                                                 \n"
+"                                                                                  \n"
+"                                                                                  \n"
+"__kernel void motion_blur_zoom(const __global float4 *src_buf,                    \n"
+"                               __global float4 *dst_buf,                          \n"
+"                               int src_width,                                     \n"
+"                               int src_height,                                    \n"
+"                               int src_x,                                         \n"
+"                               int src_y,                                         \n"
+"                               int x,                                             \n"
+"                               int y,                                             \n"
+"                               float center_x,                                    \n"
+"                               float center_y,                                    \n"
+"                               float factor)                                      \n"
+"{                                                                                 \n"
+"    const int gidx = get_global_id(0);                                            \n"
+"    const int gidy = get_global_id(1);                                            \n"
+"    float dxx,dyy,ix,iy,inv_xy_len;                                               \n"
+"    float4 sum = 0.0f;                                                            \n"
+"    float x_start = x + gidx;                                                     \n"
+"    float y_start = y + gidy;                                                     \n"
+"    float x_end = x + (center_x - x - gidx) * factor + gidx;                      \n"
+"    float y_end = y + (center_y - y - gidy) * factor + gidy;                      \n"
+"    int dist = ceil(sqrt(SQR(x_end-x_start)+SQR(y_end-y_start))+1);               \n"
+"    int xy_len = max(dist, 3);                                                    \n"
+"    if(xy_len > NOMINAL_NUM_IT)                                                   \n"
+"       xy_len = min(NOMINAL_NUM_IT + (int)sqrt((float)(xy_len - NOMINAL_NUM_IT)), \n"
+"                    MAX_NUM_IT);                                                  \n"
+"    inv_xy_len = 1.0f / xy_len;                                                   \n"
+"    dxx = (x_end - x_start) * inv_xy_len;                                         \n"
+"    dyy = (y_end - y_start) * inv_xy_len;                                         \n"
+"    ix = x_start;                                                                 \n"
+"    iy = y_start;                                                                 \n"
+"                                                                                  \n"
+"    for( int i=0; i<xy_len; ++i )                                                 \n"
+"    {                                                                             \n"
+"        float dx = ix - floor(ix);                                                \n"
+"        float dy = iy - floor(iy);                                                \n"
+"        float4 mixy0,mixy1,pix0,pix1,pix2,pix3;                                   \n"
+"        pix0 = get_pixel_color(src_buf, src_width, src_height,                    \n"
+"                               src_x, src_y, (int)ix, (int)iy);                   \n"
+"        pix1 = get_pixel_color(src_buf, src_width, src_height,                    \n"
+"                               src_x, src_y, (int)(ix+1.0f), (int)iy);            \n"
+"        pix2 = get_pixel_color(src_buf, src_width, src_height,                    \n"
+"                               src_x, src_y, (int)ix, (int)(iy+1.0f));            \n"
+"        pix3 = get_pixel_color(src_buf, src_width, src_height,                    \n"
+"                               src_x, src_y, (int)(ix+1.0f), (int)(iy+1.0f));     \n"
+"                                                                                  \n"
+"        mixy0 = dy * (pix2 - pix0) + pix0;                                        \n"
+"        mixy1 = dy * (pix3 - pix1) + pix1;                                        \n"
+"        sum += dx * (mixy1 - mixy0) + mixy0;                                      \n"
+"        ix += dxx;                                                                \n"
+"        iy += dyy;                                                                \n"
+"    }                                                                             \n"
+"                                                                                  \n"
+"    dst_buf[gidy * get_global_size(0) + gidx] = sum * (float4)(inv_xy_len);       \n"
+"                                                                                  \n"
+"}                                                                                 \n"    
+;
diff --git a/operations/common/motion-blur-zoom.c b/operations/common/motion-blur-zoom.c
index 841c1c2..25ee2f0 100644
--- a/operations/common/motion-blur-zoom.c
+++ b/operations/common/motion-blur-zoom.c
@@ -95,6 +95,107 @@ prepare (GeglOperation *operation)
   gegl_operation_set_format (operation, "output", babl_format ("RaGaBaA float"));
 }
 
+#include "opencl/gegl-cl.h"
+#include "buffer/gegl-buffer-cl-iterator.h"
+#include "opencl/motion-blur-zoom.cl.h"
+
+static GeglClRunData *cl_data = NULL;
+
+static gboolean
+cl_motion_blur_zoom(cl_mem in_tex,
+                    cl_mem out_tex,
+                    const GeglRectangle *roi,
+                    const GeglRectangle *src_rect,
+                    float center_x,
+                    float center_y,
+                    float factor)
+{
+    cl_int cl_err = 0;
+    size_t global_ws[2] = {roi->width,roi->height};
+    if(!cl_data)
+    {
+          const char *kernel_name[] = {"motion_blur_zoom", NULL};
+       cl_data = gegl_cl_compile_and_build(motion_blur_zoom_cl_source, kernel_name);
+    }
+    if(!cl_data)
+       return TRUE;
+
+    cl_err = gegl_cl_set_kernel_args(cl_data->kernel[0],
+                                     sizeof(cl_mem), (void *)&in_tex,
+                                     sizeof(cl_mem), (void *)&out_tex,
+                                     sizeof(cl_int), (void *)&src_rect->width,
+                                     sizeof(cl_int), (void *)&src_rect->height,
+                                     sizeof(cl_int), (void *)&src_rect->x,
+                                     sizeof(cl_int), (void *)&src_rect->y,
+                                     sizeof(cl_int), (void *)&roi->x,
+                                     sizeof(cl_int), (void *)&roi->y,
+                                     sizeof(cl_float), (void *)&center_x,
+                                     sizeof(cl_float), (void *)&center_y,
+                                     sizeof(cl_float), (void *)&factor, NULL);
+    CL_CHECK;
+
+    cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue(),
+                                         cl_data->kernel[0], 2,
+                                         NULL, global_ws, NULL,
+                                         0, NULL, NULL);
+    CL_CHECK;
+
+    return FALSE;
+
+error:
+    return TRUE;
+}
+
+
+static gboolean
+cl_process(GeglOperation *operation,
+           GeglBuffer *input,
+           GeglBuffer *output,
+           const GeglRectangle *result,
+           const GeglRectangle *src_rect)
+{
+    GeglOperationAreaFilter *op_area = GEGL_OPERATION_AREA_FILTER(operation);
+    GeglChantO *o = GEGL_CHANT_PROPERTIES(operation);
+
+    const Babl *in_format = gegl_operation_get_format(operation,"input");
+    const Babl *out_format = gegl_operation_get_format(operation, "output");
+
+    gint err;
+   
+    GeglBufferClIterator *i = gegl_buffer_cl_iterator_new(output,
+                                                          result,
+                                                          out_format,
+                                                          GEGL_CL_BUFFER_WRITE);
+    gint read = gegl_buffer_cl_iterator_add_2(i,
+                                              input,
+                                              result,
+                                              in_format,
+                                              GEGL_CL_BUFFER_READ,
+                                              op_area->left,
+                                              op_area->right,
+                                              op_area->top,
+                                              op_area->bottom,
+                                              GEGL_ABYSS_NONE);
+    while(gegl_buffer_cl_iterator_next(i,&err))
+    {
+       if(err) return FALSE;
+       err = cl_motion_blur_zoom(i->tex[read],
+                                 i->tex[0],
+                                 &i->roi[0],
+                                 &i->roi[read],
+                                 o->center_x,
+                                 o->center_y,
+                                 o->factor);
+
+       if(err) return FALSE;
+    }
+
+    return TRUE;
+    
+
+}
+
+
 static inline gfloat *
 get_pixel_color (gfloat              *in_buf,
                  const GeglRectangle *rect,
@@ -130,6 +231,9 @@ process (GeglOperation       *operation,
   src_rect.width += op_area->left + op_area->right;
   src_rect.height += op_area->top + op_area->bottom;
 
+  if(gegl_operation_use_opencl(operation))
+    if(cl_process(operation, input, output, roi, &src_rect))
+      return TRUE;
   in_buf  = g_new  (gfloat, src_rect.width * src_rect.height * 4);
   out_buf = g_new0 (gfloat, roi->width * roi->height * 4);
   out_pixel = out_buf;
@@ -218,6 +322,7 @@ gegl_chant_class_init (GeglChantClass *klass)
   filter_class    = GEGL_OPERATION_FILTER_CLASS (klass);
 
   operation_class->prepare = prepare;
+  operation_class->opencl_support = TRUE;
   filter_class->process    = process;
 
   gegl_operation_class_set_keys (operation_class,
-- 
1.8.3.2




[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]