[gegl/soc-2013-opecl-ops] Operations: Add OpenCL support to stretch-contrast
- From: Carlos Zubieta <czubieta src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gegl/soc-2013-opecl-ops] Operations: Add OpenCL support to stretch-contrast
- Date: Mon, 23 Sep 2013 05:40:55 +0000 (UTC)
commit 261d11e1b8b320105747f7460a9b0aac414ecfce
Author: Carlos Zubieta <czubieta dev gmail com>
Date: Mon Sep 23 00:39:50 2013 -0500
Operations: Add OpenCL support to stretch-contrast
opencl/stretch-contrast.cl | 124 +++++++++++++
opencl/stretch-contrast.cl.h | 126 +++++++++++++
operations/common/stretch-contrast.c | 321 ++++++++++++++++++++++++++++++++++
3 files changed, 571 insertions(+), 0 deletions(-)
---
diff --git a/opencl/stretch-contrast.cl b/opencl/stretch-contrast.cl
new file mode 100644
index 0000000..93d3987
--- /dev/null
+++ b/opencl/stretch-contrast.cl
@@ -0,0 +1,124 @@
+/* This file is an image processing operation for GEGL
+ *
+ * GEGL is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * GEGL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GEGL; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Copyright 2013 Carlos Zubieta <czubieta dev gmail com>
+ */
+
+__kernel void two_stages_local_min_max_reduce (__global const float4 *in,
+ __global float *out_min,
+ __global float *out_max,
+ __local float *aux_min,
+ __local float *aux_max,
+ int n_pixels)
+{
+ int gid = get_global_id(0);
+ int gsize = get_global_size(0);
+ int lid = get_local_id(0);
+ int lsize = get_local_size(0);
+ float4 min_v = (float4)(FLT_MAX);
+ float4 max_v = (float4)(FLT_MIN);
+ float4 in_v;
+ float aux0, aux1;
+ int it;
+ /* Loop sequentially over chunks of input vector */
+ while (gid < n_pixels)
+ {
+ in_v = in[gid];
+ min_v = fmin(min_v,in_v);
+ max_v = fmax(max_v,in_v);
+ gid += gsize;
+ }
+
+ /* Perform parallel reduction */
+ aux_min[lid] = min(min(min_v.x,min_v.y),min_v.z);
+ aux_max[lid] = max(max(max_v.x,max_v.y),max_v.z);
+ barrier(CLK_LOCAL_MEM_FENCE);
+ for(it = lsize / 2; it > 0; it >>= 1)
+ {
+ if (lid < it)
+ {
+ aux0 = aux_min[lid + it];
+ aux1 = aux_min[lid];
+ aux_min[lid] = fmin(aux0, aux1);
+
+ aux0 = aux_max[lid + it];
+ aux1 = aux_max[lid];
+ aux_max[lid] = fmax(aux0, aux1);
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+ }
+ if (lid == 0)
+ {
+ out_min[get_group_id(0)] = aux_min[0];
+ out_max[get_group_id(0)] = aux_max[0];
+ }
+}
+
+__kernel void init_to_float_max (__global float *in)
+{
+ int gid = get_global_id(0);
+ in[gid] = FLT_MAX;
+}
+
+__kernel void init_to_float_min (__global float *in)
+{
+ int gid = get_global_id(0);
+ in[gid] = FLT_MIN;
+}
+
+__kernel void global_min_max_reduce (__global float *in_min,
+ __global float *in_max,
+ __global float *out_min,
+ __global float *out_max)
+{
+ int gid = get_global_id(0);
+ int lid = get_local_id(0);
+ int lsize = get_local_size(0);
+ float aux0, aux1;
+ int it;
+
+ /* Perform parallel reduction */
+ for(it = lsize / 2; it > 0; it >>= 1)
+ {
+ if (lid < it)
+ {
+ aux0 = in_min[gid + it];
+ aux1 = in_min[gid];
+ in_min[gid] = fmin(aux0, aux1);
+
+ aux0 = in_max[gid + it];
+ aux1 = in_max[gid];
+ in_max[gid] = fmax(aux0, aux1);
+ }
+ barrier(CLK_LOCAL_MEM_FENCE);
+ }
+ if (lid == 0)
+ {
+ out_min[get_group_id(0)] = in_min[gid];
+ out_max[get_group_id(0)] = in_max[gid];
+ }
+}
+
+__kernel void cl_stretch_contrast (__global const float4 *in,
+ __global float4 *out,
+ float min,
+ float diff)
+{
+ int gid = get_global_id(0);
+ float4 in_v = in[gid];
+
+ in_v.xyz = (in_v.xyz - min) / diff;
+ out[gid] = in_v;
+}
diff --git a/opencl/stretch-contrast.cl.h b/opencl/stretch-contrast.cl.h
new file mode 100644
index 0000000..9871c40
--- /dev/null
+++ b/opencl/stretch-contrast.cl.h
@@ -0,0 +1,126 @@
+/* This file is an image processing operation for GEGL
+ *
+ * GEGL is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * GEGL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GEGL; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Copyright 2013 Carlos Zubieta <czubieta dev gmail com>
+ */
+
+static const char* stretch_contrast_cl_source =
+"__kernel void two_stages_local_min_max_reduce (__global const float4 *in, \n"
+" __global float *out_min,\n"
+" __global float *out_max,\n"
+" __local float *aux_min,\n"
+" __local float *aux_max,\n"
+" int n_pixels)\n"
+"{ \n"
+" int gid = get_global_id(0); \n"
+" int gsize = get_global_size(0); \n"
+" int lid = get_local_id(0); \n"
+" int lsize = get_local_size(0); \n"
+" float4 min_v = (float4)(FLT_MAX); \n"
+" float4 max_v = (float4)(FLT_MIN); \n"
+" float4 in_v; \n"
+" float aux0, aux1; \n"
+" int it; \n"
+" /* Loop sequentially over chunks of input vector */ \n"
+" while (gid < n_pixels) \n"
+" { \n"
+" in_v = in[gid]; \n"
+" min_v = fmin(min_v,in_v); \n"
+" max_v = fmax(max_v,in_v); \n"
+" gid += gsize; \n"
+" } \n"
+" \n"
+" /* Perform parallel reduction */ \n"
+" aux_min[lid] = min(min(min_v.x,min_v.y),min_v.z); \n"
+" aux_max[lid] = max(max(max_v.x,max_v.y),max_v.z); \n"
+" barrier(CLK_LOCAL_MEM_FENCE); \n"
+" for(it = lsize / 2; it > 0; it >>= 1) \n"
+" { \n"
+" if (lid < it) \n"
+" { \n"
+" aux0 = aux_min[lid + it]; \n"
+" aux1 = aux_min[lid]; \n"
+" aux_min[lid] = fmin(aux0, aux1); \n"
+" \n"
+" aux0 = aux_max[lid + it]; \n"
+" aux1 = aux_max[lid]; \n"
+" aux_max[lid] = fmax(aux0, aux1); \n"
+" } \n"
+" barrier(CLK_LOCAL_MEM_FENCE); \n"
+" } \n"
+" if (lid == 0) \n"
+" { \n"
+" out_min[get_group_id(0)] = aux_min[0]; \n"
+" out_max[get_group_id(0)] = aux_max[0]; \n"
+" } \n"
+"} \n"
+" \n"
+"__kernel void init_to_float_max (__global float *in) \n"
+"{ \n"
+" int gid = get_global_id(0); \n"
+" in[gid] = FLT_MAX; \n"
+"} \n"
+" \n"
+"__kernel void init_to_float_min (__global float *in) \n"
+"{ \n"
+" int gid = get_global_id(0); \n"
+" in[gid] = FLT_MIN; \n"
+"} \n"
+" \n"
+"__kernel void global_min_max_reduce (__global float *in_min, \n"
+" __global float *in_max, \n"
+" __global float *out_min, \n"
+" __global float *out_max) \n"
+"{ \n"
+" int gid = get_global_id(0); \n"
+" int lid = get_local_id(0); \n"
+" int lsize = get_local_size(0); \n"
+" float aux0, aux1; \n"
+" int it; \n"
+" \n"
+" /* Perform parallel reduction */ \n"
+" for(it = lsize / 2; it > 0; it >>= 1) \n"
+" { \n"
+" if (lid < it) \n"
+" { \n"
+" aux0 = in_min[gid + it]; \n"
+" aux1 = in_min[gid]; \n"
+" in_min[gid] = fmin(aux0, aux1); \n"
+" \n"
+" aux0 = in_max[gid + it]; \n"
+" aux1 = in_max[gid]; \n"
+" in_max[gid] = fmax(aux0, aux1); \n"
+" } \n"
+" barrier(CLK_LOCAL_MEM_FENCE); \n"
+" } \n"
+" if (lid == 0) \n"
+" { \n"
+" out_min[get_group_id(0)] = in_min[gid]; \n"
+" out_max[get_group_id(0)] = in_max[gid]; \n"
+" } \n"
+"} \n"
+" \n"
+"__kernel void cl_stretch_contrast (__global const float4 *in, \n"
+" __global float4 *out, \n"
+" float min, \n"
+" float diff) \n"
+"{ \n"
+" int gid = get_global_id(0); \n"
+" float4 in_v = in[gid]; \n"
+" \n"
+" in_v.xyz = (in_v.xyz - min) / diff; \n"
+" out[gid] = in_v; \n"
+"} \n"
+;
diff --git a/operations/common/stretch-contrast.c b/operations/common/stretch-contrast.c
index e442424..ca8165b 100644
--- a/operations/common/stretch-contrast.c
+++ b/operations/common/stretch-contrast.c
@@ -86,6 +86,318 @@ get_cached_region (GeglOperation *operation,
return result;
}
+#include "opencl/gegl-cl.h"
+#include "buffer/gegl-buffer-cl-iterator.h"
+#include "opencl/stretch-contrast.cl.h"
+
+GEGL_CL_STATIC
+
+static gboolean
+cl_build_kernels( void )
+{
+ GEGL_CL_BUILD( stretch_contrast,
+ "two_stages_local_min_max_reduce",
+ "init_to_float_max",
+ "init_to_float_min",
+ "global_min_max_reduce",
+ "cl_stretch_contrast")
+ return FALSE;
+}
+
+static gboolean
+cl_buffer_get_min_max (cl_mem in_tex,
+ size_t global_worksize,
+ const GeglRectangle *roi,
+ gfloat *min,
+ gfloat *max)
+{
+ cl_int cl_err = 0;
+ size_t local_ws = 256;
+ size_t work_groups = (global_worksize + local_ws - 1) / local_ws;
+ size_t global_ws = work_groups * local_ws;
+ cl_mem cl_aux_min = NULL;
+ cl_mem cl_aux_min0 = NULL;
+ cl_mem cl_aux_max = NULL;
+ cl_mem cl_aux_max0 = NULL;
+
+ cl_aux_min = gegl_clCreateBuffer(gegl_cl_get_context(),
+ CL_MEM_READ_WRITE,
+ work_groups * sizeof(cl_float),
+ NULL, &cl_err);
+ CL_CHECK;
+ cl_aux_min0 = gegl_clCreateBuffer(gegl_cl_get_context(),
+ CL_MEM_READ_WRITE,
+ work_groups * sizeof(cl_float),
+ NULL, &cl_err);
+ CL_CHECK;
+ cl_aux_max = gegl_clCreateBuffer(gegl_cl_get_context(),
+ CL_MEM_READ_WRITE,
+ work_groups * sizeof(cl_float),
+ NULL, &cl_err);
+ CL_CHECK;
+ cl_aux_max0 = gegl_clCreateBuffer(gegl_cl_get_context(),
+ CL_MEM_READ_WRITE,
+ work_groups * sizeof(cl_float),
+ NULL, &cl_err);
+ CL_CHECK;
+
+ {
+ cl_int n_pixels = (cl_int)global_worksize;
+ cl_float cl_min, cl_max;
+ int it, wg;
+
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem),
+ (void*)&in_tex);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem),
+ (void*)&cl_aux_min);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_mem),
+ (void*)&cl_aux_max);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 3,
+ sizeof(cl_float) * local_ws, NULL);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 4,
+ sizeof(cl_float) * local_ws, NULL);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 5, sizeof(cl_int),
+ (void*)&n_pixels);
+ CL_CHECK;
+
+ cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
+ cl_data->kernel[0], 1,
+ NULL, &global_ws, &local_ws,
+ 0, NULL, NULL);
+ CL_CHECK;
+
+ if(work_groups > 1)
+ {
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 0, sizeof(cl_mem),
+ (void*)&cl_aux_min0);
+ CL_CHECK;
+
+ cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
+ cl_data->kernel[1], 1,
+ NULL, &work_groups, NULL,
+ 0, NULL, NULL);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[2], 0, sizeof(cl_mem),
+ (void*)&cl_aux_max0);
+ CL_CHECK;
+
+ cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
+ cl_data->kernel[2], 1,
+ NULL, &work_groups, NULL,
+ 0, NULL, NULL);
+ CL_CHECK;
+ }
+
+ it = 0;
+ wg = work_groups;
+ while(wg > 1)
+ {
+ wg = (wg + local_ws - 1) / local_ws;
+ global_ws = wg * local_ws;
+ if(it % 2 == 0)
+ {
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[3], 0, sizeof(cl_mem),
+ (void*)&cl_aux_min);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[3], 1, sizeof(cl_mem),
+ (void*)&cl_aux_max);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[3], 2, sizeof(cl_mem),
+ (void*)&cl_aux_min0);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[3], 3, sizeof(cl_mem),
+ (void*)&cl_aux_max0);
+ CL_CHECK;
+ }
+ else
+ {
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[3], 0, sizeof(cl_mem),
+ (void*)&cl_aux_min0);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[3], 1, sizeof(cl_mem),
+ (void*)&cl_aux_max0);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[3], 2, sizeof(cl_mem),
+ (void*)&cl_aux_min);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[3], 3, sizeof(cl_mem),
+ (void*)&cl_aux_max);
+ CL_CHECK;
+ }
+ cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
+ cl_data->kernel[3], 1,
+ NULL, &global_ws, &local_ws,
+ 0, NULL, NULL);
+ CL_CHECK;
+
+ ++it;
+ }
+
+ // Read the memory buffer
+ if(it % 2 == 0)
+ {
+ cl_err = gegl_clEnqueueReadBuffer(gegl_cl_get_command_queue(), cl_aux_min,
+ CL_TRUE, 0, sizeof(cl_float),
+ &cl_min, 0, NULL, NULL);
+ CL_CHECK;
+ cl_err = gegl_clEnqueueReadBuffer(gegl_cl_get_command_queue(), cl_aux_max,
+ CL_TRUE, 0, sizeof(cl_float),
+ &cl_max, 0, NULL, NULL);
+ }
+ else
+ {
+ cl_err = gegl_clEnqueueReadBuffer(gegl_cl_get_command_queue(),
+ cl_aux_min0, CL_TRUE, 0,
+ sizeof(cl_float), &cl_min, 0,
+ NULL, NULL);
+ CL_CHECK;
+ cl_err = gegl_clEnqueueReadBuffer(gegl_cl_get_command_queue(),
+ cl_aux_max0, CL_TRUE, 0,
+ sizeof(cl_float), &cl_max, 0,
+ NULL, NULL);
+ CL_CHECK;
+ }
+
+ *min = cl_min;
+ *max = cl_max;
+
+ GEGL_CL_RELEASE(cl_aux_min)
+ GEGL_CL_RELEASE(cl_aux_min0)
+ GEGL_CL_RELEASE(cl_aux_max)
+ GEGL_CL_RELEASE(cl_aux_max0)
+ }
+ return FALSE;
+
+error:
+ if(cl_aux_min)
+ GEGL_CL_RELEASE(cl_aux_min)
+ if(cl_aux_min0)
+ GEGL_CL_RELEASE(cl_aux_min0)
+ if(cl_aux_max)
+ GEGL_CL_RELEASE(cl_aux_max)
+ if(cl_aux_max0)
+ GEGL_CL_RELEASE(cl_aux_max0)
+ return TRUE;
+}
+
+static gboolean
+cl_stretch_contrast (cl_mem in_tex,
+ cl_mem out_tex,
+ size_t global_worksize,
+ const GeglRectangle *roi,
+ gfloat min,
+ gfloat diff)
+{
+ cl_int cl_err = 0;
+ cl_float cl_min = min;
+ cl_float cl_diff = diff;
+
+ {
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[4], 0, sizeof(cl_mem),
+ (void*)&in_tex);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[4], 1, sizeof(cl_mem),
+ (void*)&out_tex);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[4], 2, sizeof(cl_float),
+ (void*)&cl_min);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[4], 3, sizeof(cl_float),
+ (void*)&cl_diff);
+ CL_CHECK;
+
+ cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
+ cl_data->kernel[4], 1,
+ NULL, &global_worksize, NULL,
+ 0, NULL, NULL);
+ CL_CHECK;
+
+ }
+ return FALSE;
+
+error:
+ return TRUE;
+}
+
+static gboolean
+cl_process (GeglOperation *operation,
+ GeglBuffer *input,
+ GeglBuffer *output,
+ const GeglRectangle *result)
+{
+ const Babl *in_format = gegl_operation_get_format (operation, "input");
+ const Babl *out_format = gegl_operation_get_format (operation, "output");
+
+ gfloat min = 1.0f;
+ gfloat max = 0.0f;
+ gfloat i_min, i_max, diff;
+ gint err, read;
+ GeglBufferClIterator *i;
+
+ if(cl_build_kernels())
+ return FALSE;
+
+ i = gegl_buffer_cl_iterator_new (input,
+ result,
+ in_format,
+ GEGL_CL_BUFFER_READ);
+
+ while (gegl_buffer_cl_iterator_next (i, &err))
+ {
+ if (err) return FALSE;
+
+ err = cl_buffer_get_min_max(i->tex[0],
+ i->size[0],
+ &i->roi[0],
+ &i_min,
+ &i_max);
+ if (err) return FALSE;
+
+ if(i_min < min)
+ min = i_min;
+ if(i_max > max)
+ max = i_max;
+ }
+
+ diff = max-min;
+
+ i = gegl_buffer_cl_iterator_new (output,
+ result,
+ out_format,
+ GEGL_CL_BUFFER_WRITE);
+
+ read = gegl_buffer_cl_iterator_add_2 (i,
+ input,
+ result,
+ in_format,
+ GEGL_CL_BUFFER_READ,
+ 0,
+ 0,
+ 0,
+ 0,
+ GEGL_ABYSS_NONE);
+
+ while (gegl_buffer_cl_iterator_next (i, &err))
+ {
+ if (err) return FALSE;
+
+ err = cl_stretch_contrast(i->tex[read],
+ i->tex[0],
+ i->size[0],
+ &i->roi[0],
+ min,
+ diff);
+ if (err) return FALSE;
+ }
+
+ return TRUE;
+}
+
static gboolean
process (GeglOperation *operation,
GeglBuffer *input,
@@ -96,6 +408,14 @@ process (GeglOperation *operation,
gdouble min, max, diff;
GeglBufferIterator *gi;
+ if (gegl_cl_is_accelerated ())
+ {
+ if (cl_process (operation, input, output, result))
+ return TRUE;
+ else
+ gegl_cl_disable();
+ }
+
buffer_get_min_max (input, &min, &max);
diff = max - min;
@@ -150,6 +470,7 @@ gegl_chant_class_init (GeglChantClass *klass)
operation_class->prepare = prepare;
operation_class->get_required_for_output = get_required_for_output;
operation_class->get_cached_region = get_cached_region;
+ operation_class->opencl_support = TRUE;
gegl_operation_class_set_keys (operation_class,
"name" , "gegl:stretch-contrast",
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]