[gegl] Operations: Add OpenCL support to stretch-contrast
- From: Téo Mazars <teom src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gegl] Operations: Add OpenCL support to stretch-contrast
- Date: Thu, 31 Oct 2013 11:02:00 +0000 (UTC)
commit cab389d42b1067d2c518ce1baa9f3ec8c0d637c3
Author: Carlos Zubieta <czubieta dev gmail com>
Date: Mon Sep 23 00:39:50 2013 -0500
Operations: Add OpenCL support to stretch-contrast
opencl/stretch-contrast.cl | 136 +++++++++++++++++
opencl/stretch-contrast.cl.h | 138 +++++++++++++++++
operations/common/stretch-contrast.c | 274 ++++++++++++++++++++++++++++++++++
3 files changed, 548 insertions(+), 0 deletions(-)
---
diff --git a/opencl/stretch-contrast.cl b/opencl/stretch-contrast.cl
new file mode 100644
index 0000000..83f7ff9
--- /dev/null
+++ b/opencl/stretch-contrast.cl
@@ -0,0 +1,136 @@
+/* This file is an image processing operation for GEGL
+ *
+ * GEGL is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * GEGL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GEGL; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Copyright 2013 Carlos Zubieta <czubieta dev gmail com>
+ */
+
+
+__kernel void init_stretch (__global float *out_min,
+ __global float *out_max)
+{
+ int gid = get_global_id (0);
+
+ out_min[gid] = FLT_MAX;
+ out_max[gid] = -FLT_MAX;
+}
+
+__kernel void two_stages_local_min_max_reduce (__global const float4 *in,
+ __global float *out_min,
+ __global float *out_max,
+ __local float *aux_min,
+ __local float *aux_max,
+ int n_pixels)
+{
+ int gid = get_global_id(0);
+ int gsize = get_global_size(0);
+ int lid = get_local_id(0);
+ int lsize = get_local_size(0);
+ float4 min_v = (float4)( FLT_MAX);
+ float4 max_v = (float4)(-FLT_MAX);
+ float4 in_v;
+ float aux0, aux1;
+ int it;
+
+ /* Loop sequentially over chunks of input vector */
+ for (it = gid; it < n_pixels; it += gsize)
+ {
+ in_v = in[it];
+ min_v = fmin (min_v, in_v);
+ max_v = fmax (max_v, in_v);
+ }
+
+ /* Perform parallel reduction */
+ aux_min[lid] = min (min (min_v.x, min_v.y), min_v.z);
+ aux_max[lid] = max (max (max_v.x, max_v.y), max_v.z);
+
+ barrier (CLK_LOCAL_MEM_FENCE);
+
+ for(it = lsize / 2; it > 0; it >>= 1)
+ {
+ if (lid < it)
+ {
+ aux0 = aux_min[lid + it];
+ aux1 = aux_min[lid];
+ aux_min[lid] = fmin (aux0, aux1);
+
+ aux0 = aux_max[lid + it];
+ aux1 = aux_max[lid];
+ aux_max[lid] = fmax (aux0, aux1);
+ }
+ barrier (CLK_LOCAL_MEM_FENCE);
+ }
+ if (lid == 0)
+ {
+ out_min[get_group_id(0)] = aux_min[0];
+ out_max[get_group_id(0)] = aux_max[0];
+ }
+
+ /* the work-group size is the size of the buffer.
+ * Make sure it's fully initialized */
+ if (gid == 0)
+ {
+ /* No special case handling, gsize is a multiple of lsize */
+ int nb_wg = gsize / lsize;
+ for (it = nb_wg; it < lsize; it++)
+ {
+ out_min[it] = FLT_MAX;
+ out_max[it] = -FLT_MAX;
+ }
+ }
+}
+
+__kernel void global_min_max_reduce (__global float *in_min,
+ __global float *in_max,
+ __global float *out_min_max)
+{
+ int gid = get_global_id(0);
+ int lid = get_local_id(0);
+ int lsize = get_local_size(0);
+ float aux0, aux1;
+ int it;
+
+ /* Perform parallel reduction */
+ for (it = lsize / 2; it > 0; it >>= 1)
+ {
+ if (lid < it)
+ {
+ aux0 = in_min[lid + it];
+ aux1 = in_min[lid];
+ in_min[gid] = fmin (aux0, aux1);
+
+ aux0 = in_max[lid + it];
+ aux1 = in_max[lid];
+ in_max[gid] = fmax (aux0, aux1);
+ }
+ barrier (CLK_GLOBAL_MEM_FENCE);
+ }
+ if (lid == 0)
+ {
+ out_min_max[0] = in_min[gid];
+ out_min_max[1] = in_max[gid];
+ }
+}
+
+__kernel void cl_stretch_contrast (__global const float4 *in,
+ __global float4 *out,
+ float min,
+ float diff)
+{
+ int gid = get_global_id(0);
+ float4 in_v = in[gid];
+
+ in_v.xyz = (in_v.xyz - min) / diff;
+ out[gid] = in_v;
+}
diff --git a/opencl/stretch-contrast.cl.h b/opencl/stretch-contrast.cl.h
new file mode 100644
index 0000000..c4bd39e
--- /dev/null
+++ b/opencl/stretch-contrast.cl.h
@@ -0,0 +1,138 @@
+static const char* stretch_contrast_cl_source =
+"/* This file is an image processing operation for GEGL \n"
+" * \n"
+" * GEGL is free software; you can redistribute it and/or \n"
+" * modify it under the terms of the GNU Lesser General Public \n"
+" * License as published by the Free Software Foundation; either \n"
+" * version 3 of the License, or (at your option) any later version. \n"
+" * \n"
+" * GEGL is distributed in the hope that it will be useful, \n"
+" * but WITHOUT ANY WARRANTY; without even the implied warranty of \n"
+" * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU \n"
+" * Lesser General Public License for more details. \n"
+" * \n"
+" * You should have received a copy of the GNU Lesser General Public \n"
+" * License along with GEGL; if not, see <http://www.gnu.org/licenses/>. \n"
+" * \n"
+" * Copyright 2013 Carlos Zubieta <czubieta dev gmail com> \n"
+" */ \n"
+" \n"
+" \n"
+"__kernel void init_stretch (__global float *out_min, \n"
+" __global float *out_max) \n"
+"{ \n"
+" int gid = get_global_id (0); \n"
+" \n"
+" out_min[gid] = FLT_MAX; \n"
+" out_max[gid] = -FLT_MAX; \n"
+"} \n"
+" \n"
+"__kernel void two_stages_local_min_max_reduce (__global const float4 *in, \n"
+" __global float *out_min,\n"
+" __global float *out_max,\n"
+" __local float *aux_min,\n"
+" __local float *aux_max,\n"
+" int n_pixels)\n"
+"{ \n"
+" int gid = get_global_id(0); \n"
+" int gsize = get_global_size(0); \n"
+" int lid = get_local_id(0); \n"
+" int lsize = get_local_size(0); \n"
+" float4 min_v = (float4)( FLT_MAX); \n"
+" float4 max_v = (float4)(-FLT_MAX); \n"
+" float4 in_v; \n"
+" float aux0, aux1; \n"
+" int it; \n"
+" \n"
+" /* Loop sequentially over chunks of input vector */ \n"
+" for (it = gid; it < n_pixels; it += gsize) \n"
+" { \n"
+" in_v = in[it]; \n"
+" min_v = fmin (min_v, in_v); \n"
+" max_v = fmax (max_v, in_v); \n"
+" } \n"
+" \n"
+" /* Perform parallel reduction */ \n"
+" aux_min[lid] = min (min (min_v.x, min_v.y), min_v.z); \n"
+" aux_max[lid] = max (max (max_v.x, max_v.y), max_v.z); \n"
+" \n"
+" barrier (CLK_LOCAL_MEM_FENCE); \n"
+" \n"
+" for(it = lsize / 2; it > 0; it >>= 1) \n"
+" { \n"
+" if (lid < it) \n"
+" { \n"
+" aux0 = aux_min[lid + it]; \n"
+" aux1 = aux_min[lid]; \n"
+" aux_min[lid] = fmin (aux0, aux1); \n"
+" \n"
+" aux0 = aux_max[lid + it]; \n"
+" aux1 = aux_max[lid]; \n"
+" aux_max[lid] = fmax (aux0, aux1); \n"
+" } \n"
+" barrier (CLK_LOCAL_MEM_FENCE); \n"
+" } \n"
+" if (lid == 0) \n"
+" { \n"
+" out_min[get_group_id(0)] = aux_min[0]; \n"
+" out_max[get_group_id(0)] = aux_max[0]; \n"
+" } \n"
+" \n"
+" /* the work-group size is the size of the buffer. \n"
+" * Make sure it's fully initialized */ \n"
+" if (gid == 0) \n"
+" { \n"
+" /* No special case handling, gsize is a multiple of lsize */ \n"
+" int nb_wg = gsize / lsize; \n"
+" for (it = nb_wg; it < lsize; it++) \n"
+" { \n"
+" out_min[it] = FLT_MAX; \n"
+" out_max[it] = -FLT_MAX; \n"
+" } \n"
+" } \n"
+"} \n"
+" \n"
+"__kernel void global_min_max_reduce (__global float *in_min, \n"
+" __global float *in_max, \n"
+" __global float *out_min_max) \n"
+"{ \n"
+" int gid = get_global_id(0); \n"
+" int lid = get_local_id(0); \n"
+" int lsize = get_local_size(0); \n"
+" float aux0, aux1; \n"
+" int it; \n"
+" \n"
+" /* Perform parallel reduction */ \n"
+" for (it = lsize / 2; it > 0; it >>= 1) \n"
+" { \n"
+" if (lid < it) \n"
+" { \n"
+" aux0 = in_min[lid + it]; \n"
+" aux1 = in_min[lid]; \n"
+" in_min[gid] = fmin (aux0, aux1); \n"
+" \n"
+" aux0 = in_max[lid + it]; \n"
+" aux1 = in_max[lid]; \n"
+" in_max[gid] = fmax (aux0, aux1); \n"
+" } \n"
+" barrier (CLK_GLOBAL_MEM_FENCE); \n"
+" } \n"
+" if (lid == 0) \n"
+" { \n"
+" out_min_max[0] = in_min[gid]; \n"
+" out_min_max[1] = in_max[gid]; \n"
+" } \n"
+"} \n"
+" \n"
+"__kernel void cl_stretch_contrast (__global const float4 *in, \n"
+" __global float4 *out, \n"
+" float min, \n"
+" float diff) \n"
+"{ \n"
+" int gid = get_global_id(0); \n"
+" float4 in_v = in[gid]; \n"
+" \n"
+" in_v.xyz = (in_v.xyz - min) / diff; \n"
+" out[gid] = in_v; \n"
+"} \n"
+;
diff --git a/operations/common/stretch-contrast.c b/operations/common/stretch-contrast.c
index e442424..3798909 100644
--- a/operations/common/stretch-contrast.c
+++ b/operations/common/stretch-contrast.c
@@ -86,6 +86,275 @@ get_cached_region (GeglOperation *operation,
return result;
}
+#include "opencl/gegl-cl.h"
+#include "buffer/gegl-buffer-cl-iterator.h"
+#include "opencl/stretch-contrast.cl.h"
+
+GEGL_CL_STATIC
+
+static gboolean
+cl_build_kernels( void )
+{
+ GEGL_CL_BUILD( stretch_contrast,
+ "two_stages_local_min_max_reduce",
+ "global_min_max_reduce",
+ "cl_stretch_contrast",
+ "init_stretch")
+ return FALSE;
+}
+
+static gboolean
+cl_buffer_get_min_max (cl_mem in_tex,
+ size_t global_worksize,
+ const GeglRectangle *roi,
+ gfloat *min,
+ gfloat *max)
+{
+ cl_int cl_err = 0;
+ size_t local_ws, max_local_ws;
+ size_t work_groups;
+ size_t global_ws;
+ cl_mem cl_aux_min = NULL;
+ cl_mem cl_aux_max = NULL;
+ cl_mem cl_min_max = NULL;
+ cl_int n_pixels = (cl_int)global_worksize;
+ cl_float min_max_buf[2];
+
+ if (global_worksize < 1)
+ {
+ *min = G_MAXFLOAT;
+ *max = G_MINFLOAT;
+ return FALSE;
+ }
+
+ cl_err = gegl_clGetDeviceInfo (gegl_cl_get_device (),
+ CL_DEVICE_MAX_WORK_GROUP_SIZE,
+ sizeof (size_t), &max_local_ws, NULL);
+ CL_CHECK;
+
+ /* Needs to be a power of two */
+ local_ws = 256;
+ while (local_ws > max_local_ws)
+ local_ws /= 2;
+
+ work_groups = MIN ((global_worksize + local_ws - 1) / local_ws, local_ws);
+ global_ws = work_groups * local_ws;
+
+
+ cl_aux_min = gegl_clCreateBuffer(gegl_cl_get_context(),
+ CL_MEM_READ_WRITE,
+ local_ws * sizeof(cl_float),
+ NULL, &cl_err);
+ CL_CHECK;
+ cl_aux_max = gegl_clCreateBuffer(gegl_cl_get_context(),
+ CL_MEM_READ_WRITE,
+ local_ws * sizeof(cl_float),
+ NULL, &cl_err);
+ CL_CHECK;
+ cl_min_max = gegl_clCreateBuffer(gegl_cl_get_context(),
+ CL_MEM_WRITE_ONLY,
+ 2 * sizeof(cl_float),
+ NULL, &cl_err);
+ CL_CHECK;
+
+ /* The full initialization is done in the two_stages_local_min_max_reduce
+ kernel */
+#if 0
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[3], 0, sizeof(cl_mem),
+ (void*)&cl_aux_min);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[3], 1, sizeof(cl_mem),
+ (void*)&cl_aux_max);
+ CL_CHECK;
+
+ cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
+ cl_data->kernel[3], 1,
+ NULL, &local_ws, &local_ws,
+ 0, NULL, NULL);
+ CL_CHECK;
+#endif
+
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem),
+ (void*)&in_tex);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem),
+ (void*)&cl_aux_min);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_mem),
+ (void*)&cl_aux_max);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 3,
+ sizeof(cl_float) * local_ws, NULL);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 4,
+ sizeof(cl_float) * local_ws, NULL);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[0], 5, sizeof(cl_int),
+ (void*)&n_pixels);
+ CL_CHECK;
+
+ cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
+ cl_data->kernel[0], 1,
+ NULL, &global_ws, &local_ws,
+ 0, NULL, NULL);
+ CL_CHECK;
+
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 0, sizeof(cl_mem),
+ (void*)&cl_aux_min);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 1, sizeof(cl_mem),
+ (void*)&cl_aux_max);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[1], 2, sizeof(cl_mem),
+ (void*)&cl_min_max);
+ CL_CHECK;
+
+ /* Only one work group */
+ cl_err = gegl_clEnqueueNDRangeKernel (gegl_cl_get_command_queue (),
+ cl_data->kernel[1], 1,
+ NULL, &local_ws, &local_ws,
+ 0, NULL, NULL);
+ CL_CHECK;
+
+ /* Read the memory buffer, probably better to keep it in GPU memory */
+ cl_err = gegl_clEnqueueReadBuffer (gegl_cl_get_command_queue(),
+ cl_min_max, CL_TRUE, 0,
+ 2 * sizeof (cl_float), &min_max_buf, 0,
+ NULL, NULL);
+ CL_CHECK;
+
+ *min = min_max_buf[0];
+ *max = min_max_buf[1];
+
+ GEGL_CL_RELEASE(cl_aux_min)
+ GEGL_CL_RELEASE(cl_aux_max)
+ GEGL_CL_RELEASE(cl_min_max)
+
+ return FALSE;
+
+error:
+ if(cl_aux_min)
+ GEGL_CL_RELEASE(cl_aux_min)
+ if(cl_aux_max)
+ GEGL_CL_RELEASE(cl_aux_max)
+ if(cl_min_max)
+ GEGL_CL_RELEASE(cl_min_max)
+ return TRUE;
+}
+
+static gboolean
+cl_stretch_contrast (cl_mem in_tex,
+ cl_mem out_tex,
+ size_t global_worksize,
+ const GeglRectangle *roi,
+ gfloat min,
+ gfloat diff)
+{
+ cl_int cl_err = 0;
+ cl_float cl_min = min;
+ cl_float cl_diff = diff;
+
+ {
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[2], 0, sizeof(cl_mem),
+ (void*)&in_tex);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[2], 1, sizeof(cl_mem),
+ (void*)&out_tex);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[2], 2, sizeof(cl_float),
+ (void*)&cl_min);
+ CL_CHECK;
+ cl_err = gegl_clSetKernelArg(cl_data->kernel[2], 3, sizeof(cl_float),
+ (void*)&cl_diff);
+ CL_CHECK;
+
+ cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
+ cl_data->kernel[2], 1,
+ NULL, &global_worksize, NULL,
+ 0, NULL, NULL);
+ CL_CHECK;
+
+ }
+ return FALSE;
+
+error:
+ return TRUE;
+}
+
+static gboolean
+cl_process (GeglOperation *operation,
+ GeglBuffer *input,
+ GeglBuffer *output,
+ const GeglRectangle *result)
+{
+ const Babl *in_format = gegl_operation_get_format (operation, "input");
+ const Babl *out_format = gegl_operation_get_format (operation, "output");
+
+ gfloat min = 1.0f;
+ gfloat max = 0.0f;
+ gfloat i_min, i_max, diff;
+ gint err, read;
+ GeglBufferClIterator *i;
+
+ if(cl_build_kernels())
+ return FALSE;
+
+ i = gegl_buffer_cl_iterator_new (input,
+ result,
+ in_format,
+ GEGL_CL_BUFFER_READ);
+
+ while (gegl_buffer_cl_iterator_next (i, &err))
+ {
+ if (err) return FALSE;
+
+ err = cl_buffer_get_min_max(i->tex[0],
+ i->size[0],
+ &i->roi[0],
+ &i_min,
+ &i_max);
+ if (err) return FALSE;
+
+ if(i_min < min)
+ min = i_min;
+ if(i_max > max)
+ max = i_max;
+ }
+
+ diff = max-min;
+
+ i = gegl_buffer_cl_iterator_new (output,
+ result,
+ out_format,
+ GEGL_CL_BUFFER_WRITE);
+
+ read = gegl_buffer_cl_iterator_add_2 (i,
+ input,
+ result,
+ in_format,
+ GEGL_CL_BUFFER_READ,
+ 0,
+ 0,
+ 0,
+ 0,
+ GEGL_ABYSS_NONE);
+
+ while (gegl_buffer_cl_iterator_next (i, &err))
+ {
+ if (err) return FALSE;
+
+ err = cl_stretch_contrast(i->tex[read],
+ i->tex[0],
+ i->size[0],
+ &i->roi[0],
+ min,
+ diff);
+ if (err) return FALSE;
+ }
+
+ return TRUE;
+}
+
static gboolean
process (GeglOperation *operation,
GeglBuffer *input,
@@ -96,6 +365,10 @@ process (GeglOperation *operation,
gdouble min, max, diff;
GeglBufferIterator *gi;
+ if (gegl_cl_is_accelerated ())
+ if (cl_process (operation, input, output, result))
+ return TRUE;
+
buffer_get_min_max (input, &min, &max);
diff = max - min;
@@ -150,6 +423,7 @@ gegl_chant_class_init (GeglChantClass *klass)
operation_class->prepare = prepare;
operation_class->get_required_for_output = get_required_for_output;
operation_class->get_cached_region = get_cached_region;
+ operation_class->opencl_support = TRUE;
gegl_operation_class_set_keys (operation_class,
"name" , "gegl:stretch-contrast",
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]