[gegl/opencl-ops: 3/14] Add OpenCL support for gegl:edge-laplace
- From: Ãyvind KolÃs <ok src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gegl/opencl-ops: 3/14] Add OpenCL support for gegl:edge-laplace
- Date: Tue, 20 Mar 2012 13:50:01 +0000 (UTC)
commit 38aa2216313551947f1ec7019d1b28c224e7408b
Author: Zhang Peixuan <zhangpeixuan cn gmail com>
Date: Tue Mar 6 17:39:41 2012 +0800
Add OpenCL support for gegl:edge-laplace
operations/common/edge-laplace.c | 316 +++++++++++++++++++++++++++++++++++---
1 files changed, 296 insertions(+), 20 deletions(-)
---
diff --git a/operations/common/edge-laplace.c b/operations/common/edge-laplace.c
index 3475d0b..cc683ec 100644
--- a/operations/common/edge-laplace.c
+++ b/operations/common/edge-laplace.c
@@ -53,21 +53,6 @@ static void prepare (GeglOperation *operation)
gegl_operation_set_format (operation, "output", babl_format ("RGBA float"));
}
-static gboolean
-process (GeglOperation *operation,
- GeglBuffer *input,
- GeglBuffer *output,
- const GeglRectangle *result)
-{
- GeglRectangle compute;
-
- compute = gegl_operation_get_required_for_output (operation, "input", result);
-
- edge_laplace (input, &compute, output, result);
-
- return TRUE;
-}
-
static void
minmax (gfloat x1,
gfloat x2,
@@ -223,6 +208,296 @@ edge_laplace (GeglBuffer *src,
g_free (dst_buf);
}
+#include "opencl/gegl-cl.h"
+#include "buffer/gegl-buffer-cl-iterator.h"
+
+static const char* kernel_source =
+"#define LAPLACE_RADIUS 1 \n"
+"void minmax(float x1, float x2, float x3, \n"
+" float x4, float x5, \n"
+" float *min_result, \n"
+" float *max_result) \n"
+"{ \n"
+" float min1, min2, max1, max2; \n"
+" \n"
+" if (x1 > x2) \n"
+" { \n"
+" max1 = x1; \n"
+" min1 = x2; \n"
+" } \n"
+" else \n"
+" { \n"
+" max1 = x2; \n"
+" min1 = x1; \n"
+" } \n"
+" \n"
+" if (x3 > x4) \n"
+" { \n"
+" max2 = x3; \n"
+" min2 = x4; \n"
+" } \n"
+" else \n"
+" { \n"
+" max2 = x4; \n"
+" min2 = x3; \n"
+" } \n"
+" \n"
+" if (min1 < min2) \n"
+" *min_result = fmin(min1, x5); \n"
+" else \n"
+" *min_result = fmin(min2, x5); \n"
+" if (max1 > max2) \n"
+" *max_result = fmax(max1, x5); \n"
+" else \n"
+" *max_result = fmax(max2, x5); \n"
+"} \n"
+" \n"
+"kernel void pre_edgelaplace (global float4 *in, \n"
+" global float4 *out) \n"
+"{ \n"
+" int gidx = get_global_id(0); \n"
+" int gidy = get_global_id(1); \n"
+" \n"
+" int src_width = get_global_size(0) + LAPLACE_RADIUS * 2; \n"
+" int src_height = get_global_size(1); \n"
+" \n"
+" int i = gidx + LAPLACE_RADIUS, j = gidy + LAPLACE_RADIUS; \n"
+" int gid1d = i + j * src_width; \n"
+" \n"
+" float pix_fl[4] = { \n"
+" in[gid1d - 1 - src_width].x, in[gid1d - 1 - src_width].y, \n"
+" in[gid1d - 1 - src_width].z, in[gid1d - 1 - src_width].w \n"
+" }; \n"
+" float pix_fm[4] = { \n"
+" in[gid1d - src_width].x, in[gid1d - src_width].y, \n"
+" in[gid1d - src_width].z, in[gid1d - src_width].w \n"
+" }; \n"
+" float pix_fr[4] = { \n"
+" in[gid1d + 1 - src_width].x, in[gid1d + 1 - src_width].y, \n"
+" in[gid1d + 1 - src_width].z, in[gid1d + 1 - src_width].w \n"
+" }; \n"
+" float pix_ml[4] = { \n"
+" in[gid1d - 1 ].x, in[gid1d - 1 ].y, \n"
+" in[gid1d - 1 ].z, in[gid1d - 1 ].w \n"
+" }; \n"
+" float pix_mm[4] = { \n"
+" in[gid1d ].x, in[gid1d ].y, \n"
+" in[gid1d ].z, in[gid1d ].w \n"
+" }; \n"
+" float pix_mr[4] = { \n"
+" in[gid1d + 1 ].x, in[gid1d + 1 ].y, \n"
+" in[gid1d + 1 ].z, in[gid1d + 1 ].w \n"
+" }; \n"
+" float pix_bl[4] = { \n"
+" in[gid1d - 1 + src_width].x, in[gid1d - 1 + src_width].y, \n"
+" in[gid1d - 1 + src_width].z, in[gid1d - 1 + src_width].w \n"
+" }; \n"
+" float pix_bm[4] = { \n"
+" in[gid1d + src_width].x, in[gid1d + src_width].y, \n"
+" in[gid1d + src_width].z, in[gid1d + src_width].w \n"
+" }; \n"
+" float pix_br[4] = { \n"
+" in[gid1d + 1 + src_width].x, in[gid1d + 1 + src_width].y, \n"
+" in[gid1d + 1 + src_width].z, in[gid1d + 1 + src_width].w \n"
+" }; \n"
+" \n"
+" int c; \n"
+" float minval, maxval; \n"
+" float gradient[4]; \n"
+" \n"
+" for (c = 0;c < 3; ++c) \n"
+" { \n"
+" minmax(pix_fm[c], pix_bm[c], pix_ml[c], pix_mr[c], \n"
+" pix_mm[c], &minval, &maxval); \n"
+" gradient[c] = 0.5f * \n"
+" fmax((maxval - pix_mm[c]),(pix_mm[c] - minval)); \n"
+" gradient[c] = \n"
+" (pix_fl[c] + pix_fm[c] + pix_fr[c] + \n"
+" pix_ml[c] + pix_mr[c] + pix_bl[c] + \n"
+" pix_bm[c] + pix_br[c] - 8.0f * pix_mm[c]) > \n"
+" 0.0f ? gradient[c] : -1.0f * gradient[c]; \n"
+" } \n"
+" gradient[3] = pix_mm[3]; \n"
+" \n"
+" out[gid1d] = (float4) \n"
+" (gradient[0], gradient[1], gradient[2], gradient[3]); \n"
+"} \n"
+" \n"
+"kernel void knl_edgelaplace (global float4 *in, \n"
+" global float4 *out) \n"
+"{ \n"
+" int gidx = get_global_id(0); \n"
+" int gidy = get_global_id(1); \n"
+" \n"
+" int src_width = get_global_size(0) + LAPLACE_RADIUS * 2; \n"
+" int src_height = get_global_size(1); \n"
+" \n"
+" int i = gidx + LAPLACE_RADIUS, j = gidy + LAPLACE_RADIUS; \n"
+" int gid1d = i + j * src_width; \n"
+" \n"
+" float pix_fl[4] = { \n"
+" in[gid1d - 1 - src_width].x, in[gid1d - 1 - src_width].y, \n"
+" in[gid1d - 1 - src_width].z, in[gid1d - 1 - src_width].w \n"
+" }; \n"
+" float pix_fm[4] = { \n"
+" in[gid1d - src_width].x, in[gid1d - src_width].y, \n"
+" in[gid1d - src_width].z, in[gid1d - src_width].w \n"
+" }; \n"
+" float pix_fr[4] = { \n"
+" in[gid1d + 1 - src_width].x, in[gid1d + 1 - src_width].y, \n"
+" in[gid1d + 1 - src_width].z, in[gid1d + 1 - src_width].w \n"
+" }; \n"
+" float pix_ml[4] = { \n"
+" in[gid1d - 1 ].x, in[gid1d - 1 ].y, \n"
+" in[gid1d - 1 ].z, in[gid1d - 1 ].w \n"
+" }; \n"
+" float pix_mm[4] = { \n"
+" in[gid1d ].x, in[gid1d ].y, \n"
+" in[gid1d ].z, in[gid1d ].w \n"
+" }; \n"
+" float pix_mr[4] = { \n"
+" in[gid1d + 1 ].x, in[gid1d + 1 ].y, \n"
+" in[gid1d + 1 ].z, in[gid1d + 1 ].w \n"
+" }; \n"
+" float pix_bl[4] = { \n"
+" in[gid1d - 1 + src_width].x, in[gid1d - 1 + src_width].y, \n"
+" in[gid1d - 1 + src_width].z, in[gid1d - 1 + src_width].w \n"
+" }; \n"
+" float pix_bm[4] = { \n"
+" in[gid1d + src_width].x, in[gid1d + src_width].y, \n"
+" in[gid1d + src_width].z, in[gid1d + src_width].w \n"
+" }; \n"
+" float pix_br[4] = { \n"
+" in[gid1d + 1 + src_width].x, in[gid1d + 1 + src_width].y, \n"
+" in[gid1d + 1 + src_width].z, in[gid1d + 1 + src_width].w \n"
+" }; \n"
+" \n"
+" int c; \n"
+" float value[4]; \n"
+" \n"
+" for (c = 0;c < 3; ++c) \n"
+" { \n"
+" float current = pix_mm[c]; \n"
+" current = \n"
+" ((current > 0.0f) && \n"
+" (pix_fl[c] < 0.0f || pix_fm[c] < 0.0f || \n"
+" pix_fr[c] < 0.0f || pix_ml[c] < 0.0f || \n"
+" pix_mr[c] < 0.0f || pix_bl[c] < 0.0f || \n"
+" pix_bm[c] < 0.0f || pix_br[c] < 0.0f ) \n"
+" ) ? current : 0.0f; \n"
+" value[c] = current; \n"
+" } \n"
+" value[3] = pix_mm[3]; \n"
+" \n"
+" out[gidx + gidy * get_global_size(0)] = (float4) \n"
+" (value[0], value[1], value[2], value[3]); \n"
+"} \n";
+
+static gegl_cl_run_data *cl_data = NULL;
+
+static cl_int
+cl_edge_laplace (cl_mem in_tex,
+ cl_mem aux_tex,
+ cl_mem out_tex,
+ const GeglRectangle *src_rect,
+ const GeglRectangle *roi,
+ gint radius)
+{
+ cl_int cl_err = 0;
+ size_t global_ws[2];
+ if (!cl_data)
+ {
+ const char *kernel_name[] = {"pre_edgelaplace", "knl_edgelaplace", NULL};
+ cl_data = gegl_cl_compile_and_build (kernel_source, kernel_name);
+ }
+
+ if (!cl_data) return 1;
+
+ global_ws[0] = roi->width;
+ global_ws[1] = roi->height;
+
+ cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem), (void*)&in_tex);
+ cl_err |= gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem), (void*)&aux_tex);
+ if (cl_err != CL_SUCCESS) return cl_err;
+
+ cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
+ cl_data->kernel[0], 2,
+ NULL, global_ws, NULL,
+ 0, NULL, NULL);
+ if (cl_err != CL_SUCCESS) return cl_err;
+
+ cl_err = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
+ if (CL_SUCCESS != cl_err) return cl_err;
+
+ cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 0, sizeof(cl_mem), (void*)&aux_tex);
+ cl_err |= gegl_clSetKernelArg(cl_data->kernel[1], 1, sizeof(cl_mem), (void*)&out_tex);
+ if (cl_err != CL_SUCCESS) return cl_err;
+
+ cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
+ cl_data->kernel[1], 2,
+ NULL, global_ws, NULL,
+ 0, NULL, NULL);
+ if (cl_err != CL_SUCCESS) return cl_err;
+ return cl_err;
+}
+
+static gboolean
+cl_process (GeglOperation *operation,
+ GeglBuffer *input,
+ GeglBuffer *output,
+ const GeglRectangle *result)
+{
+ const Babl *in_format = gegl_operation_get_format (operation, "input");
+ const Babl *out_format = gegl_operation_get_format (operation, "output");
+ gint err;
+ gint j;
+ cl_int cl_err;
+
+ GeglOperationAreaFilter *op_area = GEGL_OPERATION_AREA_FILTER (operation);
+ GeglChantO *o = GEGL_CHANT_PROPERTIES (operation);
+
+ GeglBufferClIterator *i = gegl_buffer_cl_iterator_new (output, result, out_format, GEGL_CL_BUFFER_WRITE);
+ gint read = gegl_buffer_cl_iterator_add_2 (i, input, result, in_format, GEGL_CL_BUFFER_READ, op_area->left, op_area->right, op_area->top, op_area->bottom);
+ gint aux = gegl_buffer_cl_iterator_add_2 (i, NULL, result, in_format, GEGL_CL_BUFFER_AUX, op_area->left, op_area->right, op_area->top, op_area->bottom);
+ while (gegl_buffer_cl_iterator_next (i, &err))
+ {
+ if (err) return FALSE;
+ for (j=0; j < i->n; j++)
+ {
+ cl_err = cl_edge_laplace(i->tex[read][j], i->tex[aux][j], i->tex[0][j], &i->roi[read][j], &i->roi[0][j], LAPLACE_RADIUS);
+ if (cl_err != CL_SUCCESS)
+ {
+ g_warning("[OpenCL] Error in gegl:edge-laplace: %s\n", gegl_cl_errstring(cl_err));
+ return FALSE;
+ }
+ }
+ }
+ return TRUE;
+}
+
+static gboolean
+process (GeglOperation *operation,
+ GeglBuffer *input,
+ GeglBuffer *output,
+ const GeglRectangle *result)
+{
+ GeglRectangle rect;
+ GeglChantO *o = GEGL_CHANT_PROPERTIES (operation);
+ GeglBuffer *temp;
+ GeglOperationAreaFilter *op_area;
+ op_area = GEGL_OPERATION_AREA_FILTER (operation);
+
+ if (cl_state.is_accelerated)
+ if (cl_process (operation, input, output, result))
+ return TRUE;
+
+ GeglRectangle compute;
+ compute = gegl_operation_get_required_for_output (operation, "input", result);
+ edge_laplace (input, &compute, output, result);
+
+ return TRUE;
+}
+
static void
gegl_chant_class_init (GeglChantClass *klass)
@@ -230,16 +505,17 @@ gegl_chant_class_init (GeglChantClass *klass)
GeglOperationClass *operation_class;
GeglOperationFilterClass *filter_class;
- operation_class = GEGL_OPERATION_CLASS (klass);
- filter_class = GEGL_OPERATION_FILTER_CLASS (klass);
+ operation_class = GEGL_OPERATION_CLASS (klass);
+ filter_class = GEGL_OPERATION_FILTER_CLASS (klass);
- filter_class->process = process;
+ filter_class->process = process;
operation_class->prepare = prepare;
- operation_class->name = "gegl:edge-laplace";
operation_class->categories = "edge-detect";
+ operation_class->name = "gegl:edge-laplace";
+ operation_class->opencl_support = TRUE;
operation_class->description =
- _("High-resolution edge detection");
+ _("High-resolution edge detection.");
}
#endif
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]