[gegl] Common color conversions performed in the GPU and fixing some bugs in point-filter



commit 77598e58e3b26ad9abcf389680d8837d044c49cd
Author: Victor Oliveira <victormatheus gmail com>
Date:   Tue Dec 6 15:35:49 2011 -0200

    Common color conversions performed in the GPU and fixing some bugs in point-filter

 gegl/opencl/Makefile.am                      |    5 +-
 gegl/opencl/cl_gl_ext.h                      |    2 +-
 gegl/opencl/gegl-cl-color-kernel.h           |  108 ++++++++++++
 gegl/opencl/gegl-cl-color.c                  |  131 +++++++++++++++
 gegl/opencl/gegl-cl-color.h                  |   14 ++
 gegl/opencl/gegl-cl-init.c                   |    5 +
 gegl/opencl/gegl-cl-init.h                   |    1 -
 gegl/opencl/gegl-cl.h                        |    1 +
 gegl/operation/gegl-operation-context.c      |    2 +
 gegl/operation/gegl-operation-point-filter.c |  224 ++++++++++----------------
 operations/common/brightness-contrast.c      |   11 +-
 11 files changed, 354 insertions(+), 150 deletions(-)
---
diff --git a/gegl/opencl/Makefile.am b/gegl/opencl/Makefile.am
index 39891d0..ce6edbd 100644
--- a/gegl/opencl/Makefile.am
+++ b/gegl/opencl/Makefile.am
@@ -20,6 +20,7 @@ libcl_public_HEADERS = \
 	gegl-cl-init.h \
 	gegl-cl-texture.h \
 	gegl-cl-types.h \
+	gegl-cl-color.h \
 	cl_d3d10.h \
 	cl_ext.h \
 	cl_gl_ext.h \
@@ -33,7 +34,9 @@ libcl_sources = \
 	gegl-cl-init.c \
 	gegl-cl-init.h \
 	gegl-cl-texture.c \
-	gegl-cl-texture.h
+	gegl-cl-texture.h \
+	gegl-cl-color.c \
+	gegl-cl-color.h
 
 noinst_LTLIBRARIES = libcl.la
 
diff --git a/gegl/opencl/cl_gl_ext.h b/gegl/opencl/cl_gl_ext.h
index e62be7b..c8a8293 100644
--- a/gegl/opencl/cl_gl_ext.h
+++ b/gegl/opencl/cl_gl_ext.h
@@ -41,7 +41,7 @@ extern "C" {
 
 /*
  * For each extension, follow this template
- * /* cl_VEN_extname extension  */
+ * cl_VEN_extname extension  */
 /* #define cl_VEN_extname 1
  * ... define new types, if any
  * ... define new tokens, if any
diff --git a/gegl/opencl/gegl-cl-color-kernel.h b/gegl/opencl/gegl-cl-color-kernel.h
new file mode 100644
index 0000000..4d42943
--- /dev/null
+++ b/gegl/opencl/gegl-cl-color-kernel.h
@@ -0,0 +1,108 @@
+static const char* kernel_color_source =
+"/* This is almost a copy-paste from babl/base conversion functions in RGBA space */      \n"
+"                                                                                         \n"
+"/* Alpha threshold used in the reference implementation for                              \n"
+" * un-pre-multiplication of color data:                                                  \n"
+" *                                                                                       \n"
+" * 0.01 / (2^16 - 1)                                                                     \n"
+" */                                                                                      \n"
+"#define BABL_ALPHA_THRESHOLD 0.000000152590219                                           \n"
+"                                                                                         \n"
+"float linear_to_gamma_2_2 (float value)                                                  \n"
+"{                                                                                        \n"
+"  if (value > 0.0030402477f)                                                             \n"
+"    return 1.055f * native_powr (value, (1.0f/2.4f)) - 0.055f;                           \n"
+"  return 12.92f * value;                                                                 \n"
+"}                                                                                        \n"
+"                                                                                         \n"
+"float gamma_2_2_to_linear (float value)                                                  \n"
+"{                                                                                        \n"
+"  if (value > 0.03928f)                                                                  \n"
+"    return native_powr ((value + 0.055f) / 1.055f, 2.4f);                                \n"
+"  return value / 12.92f;                                                                 \n"
+"}                                                                                        \n"
+"                                                                                         \n"
+"__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |                             \n"
+"                               CLK_ADDRESS_NONE            |                             \n"
+"                               CLK_FILTER_NEAREST;                                       \n"
+"                                                                                         \n"
+"/* RGBA float -> RaGaBaA float */                                                        \n"
+"__kernel void non_premultiplied_to_premultiplied (__read_only  image2d_t in,             \n"
+"                                                  __write_only image2d_t out)            \n"
+"{                                                                                        \n"
+"  int2 gid = (int2)(get_global_id(0), get_global_id(1));                                 \n"
+"  float4 in_v  = read_imagef(in, sampler, gid);                                          \n"
+"  float4 out_v;                                                                          \n"
+"  out_v   = in_v * in_v.w;                                                               \n"
+"  out_v.w = in_v.w;                                                                      \n"
+"  write_imagef(out, gid, out_v);                                                         \n"
+"}                                                                                        \n"
+"                                                                                         \n"
+"/* RaGaBaA float -> RGBA float */                                                        \n"
+"__kernel void premultiplied_to_non_premultiplied (__read_only  image2d_t in,             \n"
+"                                                  __write_only image2d_t out)            \n"
+"{                                                                                        \n"
+"  int2 gid = (int2)(get_global_id(0), get_global_id(1));                                 \n"
+"  float4 in_v  = read_imagef(in, sampler, gid);                                          \n"
+"  float4 out_v;                                                                          \n"
+"  out_v = (in_v.w > BABL_ALPHA_THRESHOLD)? in_v / in_v.w : (float4)(0.0f);               \n"
+"  out_v.w = in_v.w;                                                                      \n"
+"  write_imagef(out, gid, out_v);                                                         \n"
+"}                                                                                        \n"
+"                                                                                         \n"
+"/* RGBA float -> R'G'B'A float */                                                        \n"
+"__kernel void rgba2rgba_gamma_2_2 (__read_only  image2d_t in,                            \n"
+"                                   __write_only image2d_t out)                           \n"
+"{                                                                                        \n"
+"  int2 gid = (int2)(get_global_id(0), get_global_id(1));                                 \n"
+"  float4 in_v  = read_imagef(in, sampler, gid);                                          \n"
+"  float4 out_v;                                                                          \n"
+"  out_v = (float4)(linear_to_gamma_2_2(in_v.x),                                          \n"
+"                   linear_to_gamma_2_2(in_v.y),                                          \n"
+"                   linear_to_gamma_2_2(in_v.z),                                          \n"
+"                   in_v.w);                                                              \n"
+"  write_imagef(out, gid, out_v);                                                         \n"
+"}                                                                                        \n"
+"                                                                                         \n"
+"/* R'G'B'A float -> RGBA float */                                                        \n"
+"__kernel void rgba_gamma_2_22rgba (__read_only  image2d_t in,                            \n"
+"                                   __write_only image2d_t out)                           \n"
+"{                                                                                        \n"
+"  int2 gid = (int2)(get_global_id(0), get_global_id(1));                                 \n"
+"  float4 in_v  = read_imagef(in, sampler, gid);                                          \n"
+"  float4 out_v;                                                                          \n"
+"  out_v = (float4)(gamma_2_2_to_linear(in_v.x),                                          \n"
+"                   gamma_2_2_to_linear(in_v.y),                                          \n"
+"                   gamma_2_2_to_linear(in_v.z),                                          \n"
+"                   in_v.w);                                                              \n"
+"  write_imagef(out, gid, out_v);                                                         \n"
+"}                                                                                        \n"
+"                                                                                         \n"
+"/* RGBA float -> R'aG'aB'aA float */                                                     \n"
+"__kernel void rgba2rgba_gamma_2_2_premultiplied (__read_only  image2d_t in,              \n"
+"                                                 __write_only image2d_t out)             \n"
+"{                                                                                        \n"
+"  int2 gid = (int2)(get_global_id(0), get_global_id(1));                                 \n"
+"  float4 in_v  = read_imagef(in, sampler, gid);                                          \n"
+"  float4 out_v;                                                                          \n"
+"  out_v = (float4)(linear_to_gamma_2_2(in_v.x) * in_v.w,                                 \n"
+"                   linear_to_gamma_2_2(in_v.y) * in_v.w,                                 \n"
+"                   linear_to_gamma_2_2(in_v.z) * in_v.w,                                 \n"
+"                   in_v.w);                                                              \n"
+"  write_imagef(out, gid, out_v);                                                         \n"
+"}                                                                                        \n"
+"                                                                                         \n"
+"/* R'aG'aB'aA float -> RGBA float */                                                     \n"
+"__kernel void rgba_gamma_2_2_premultiplied2rgba (__read_only  image2d_t in,              \n"
+"                                                 __write_only image2d_t out)             \n"
+"{                                                                                        \n"
+"  int2 gid = (int2)(get_global_id(0), get_global_id(1));                                 \n"
+"  float4 in_v  = read_imagef(in, sampler, gid);                                          \n"
+"  float4 out_v;                                                                          \n"
+"  out_v = (in_v.w > BABL_ALPHA_THRESHOLD)? (float4)(linear_to_gamma_2_2(in_v.x) / in_v.w,\n"
+"                                                    linear_to_gamma_2_2(in_v.y) / in_v.w,\n"
+"                                                    linear_to_gamma_2_2(in_v.z) / in_v.w,\n"
+"                                                    in_v.w) :                            \n"
+"                                           (float4)(0.0f);                               \n"
+"  write_imagef(out, gid, out_v);                                                         \n"
+"}                                                                                        \n";
diff --git a/gegl/opencl/gegl-cl-color.c b/gegl/opencl/gegl-cl-color.c
new file mode 100644
index 0000000..149a608
--- /dev/null
+++ b/gegl/opencl/gegl-cl-color.c
@@ -0,0 +1,131 @@
+#include "gegl.h"
+#include "gegl-cl-color.h"
+#include "gegl-cl-init.h"
+
+#include "gegl-cl-color-kernel.h"
+
+static gegl_cl_run_data *kernels_color = NULL;
+
+static const Babl *format[6];
+
+void
+gegl_cl_color_compile_kernels(void)
+{
+  const char *kernel_name[] = {"non_premultiplied_to_premultiplied", /* 0 */
+                               "premultiplied_to_non_premultiplied", /* 1 */
+                               "rgba2rgba_gamma_2_2",                /* 2 */
+                               "rgba_gamma_2_22rgba",                /* 3 */
+                               "rgba2rgba_gamma_2_2_premultiplied",  /* 4 */
+                               "rgba_gamma_2_2_premultiplied2rgba",  /* 5 */
+                               NULL};
+
+  format[0] = babl_format ("RaGaBaA float"),
+  format[1] = babl_format ("RGBA float"),
+  format[2] = babl_format ("R'G'B'A float"),
+  format[3] = babl_format ("RGBA float"),
+  format[4] = babl_format ("R'aG'aB'aA float"),
+  format[5] = babl_format ("RGBA float"),
+
+  kernels_color = gegl_cl_compile_and_build (kernel_color_source, kernel_name);
+}
+
+gboolean
+gegl_cl_color_supported (const Babl *in_format, const Babl *out_format)
+{
+  int i;
+  gboolean supported_format_in  = FALSE;
+  gboolean supported_format_out = FALSE;
+
+  for (i = 0; i < 6; i++)
+    {
+      if (format[i] == in_format)  supported_format_in  = TRUE;
+      if (format[i] == out_format) supported_format_out = TRUE;
+    }
+
+  return (supported_format_in && supported_format_out);
+}
+
+#define CONV_1(x)   {conv[0] = x; conv[1] = -1;}
+#define CONV_2(x,y) {conv[0] = x; conv[1] =  y;}
+
+//#define CL_ERROR {g_assert(0);}
+#define CL_ERROR {g_printf("[OpenCL] Error in %s:%d %s - %s\n", __FILE__, __LINE__, __func__, gegl_cl_errstring(errcode)); return FALSE;}
+
+gboolean
+gegl_cl_color_conv (cl_mem in_tex, cl_mem out_tex, const size_t size[2],
+                    const Babl *in_format, const Babl *out_format)
+{
+  int i;
+  int errcode;
+  int conv[2] = {-1, -1};
+
+  if (!gegl_cl_color_supported (in_format, out_format))
+    CL_ERROR
+
+  if (in_format == out_format)
+    {
+      const size_t origin[3] = {0, 0, 0};
+      const size_t region[3] = {size[0], size[1], 1};
+
+      /* just copy in_tex to out_tex */
+      errcode = gegl_clEnqueueCopyImage (gegl_cl_get_command_queue(),
+                                         in_tex, out_tex, origin, origin, region,
+                                         0, NULL, NULL);
+      if (errcode != CL_SUCCESS) CL_ERROR
+
+      errcode = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
+      if (errcode != CL_SUCCESS) CL_ERROR
+    }
+  else
+    {
+      if      (in_format == babl_format ("RGBA float"))
+        {
+          if      (out_format == babl_format ("RaGaBaA float"))    CONV_1(0)
+          else if (out_format == babl_format ("R'G'B'A float"))    CONV_1(2)
+          else if (out_format == babl_format ("R'aG'aB'aA float")) CONV_1(4)
+        }
+      else if (in_format == babl_format ("RaGaBaA float"))
+        {
+          if      (out_format == babl_format ("RGBA float"))       CONV_1(1)
+          else if (out_format == babl_format ("R'G'B'A float"))    CONV_2(1, 2)
+          else if (out_format == babl_format ("R'aG'aB'aA float")) CONV_2(1, 4)
+        }
+      else if (in_format == babl_format ("R'G'B'A float"))
+        {
+          if      (out_format == babl_format ("RGBA float"))       CONV_1(3)
+          else if (out_format == babl_format ("RaGaBaA float"))    CONV_2(3, 0)
+          else if (out_format == babl_format ("R'aG'aB'aA float")) CONV_2(3, 4)
+        }
+      else if (in_format == babl_format ("R'aG'aB'aA float"))
+        {
+          if      (out_format == babl_format ("RGBA float"))       CONV_1(5)
+          else if (out_format == babl_format ("RaGaBaA float"))    CONV_2(5, 0)
+          else if (out_format == babl_format ("R'G'B'A float"))    CONV_2(5, 2)
+        }
+
+      for (i=0; i<2; i++)
+        {
+          if (conv[i] >= 0)
+            {
+              errcode = gegl_clSetKernelArg(kernels_color->kernel[conv[i]], 0, sizeof(cl_mem), (void*)&in_tex);
+              if (errcode != CL_SUCCESS) CL_ERROR
+
+              errcode = gegl_clSetKernelArg(kernels_color->kernel[conv[i]], 1, sizeof(cl_mem), (void*)&out_tex);
+              if (errcode != CL_SUCCESS) CL_ERROR
+
+              errcode = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
+                                                    kernels_color->kernel[conv[i]], 2,
+                                                    NULL, size, NULL,
+                                                    0, NULL, NULL);
+              if (errcode != CL_SUCCESS) CL_ERROR
+
+              errcode = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
+              if (errcode != CL_SUCCESS) CL_ERROR
+            }
+        }
+    }
+
+  return TRUE;
+}
+
+#undef CL_ERROR
diff --git a/gegl/opencl/gegl-cl-color.h b/gegl/opencl/gegl-cl-color.h
new file mode 100644
index 0000000..214fd95
--- /dev/null
+++ b/gegl/opencl/gegl-cl-color.h
@@ -0,0 +1,14 @@
+#ifndef __GEGL_CL_COLOR_H__
+#define __GEGL_CL_COLOR_H__
+
+#include <gegl.h>
+#include "gegl-cl-types.h"
+
+void gegl_cl_color_compile_kernels(void);
+
+gboolean gegl_cl_color_supported (const Babl *in_format, const Babl *out_format);
+
+gboolean gegl_cl_color_conv (cl_mem in_tex, cl_mem out_tex, const size_t size[2],
+                             const Babl *in_format, const Babl *out_format);
+
+#endif
diff --git a/gegl/opencl/gegl-cl-init.c b/gegl/opencl/gegl-cl-init.c
index 06dd39a..7a0ea89 100644
--- a/gegl/opencl/gegl-cl-init.c
+++ b/gegl/opencl/gegl-cl-init.c
@@ -6,6 +6,8 @@
 #include <string.h>
 #include <stdio.h>
 
+#include "gegl-cl-color.h"
+
 const char *gegl_cl_errstring(cl_int err) {
   static const char* strings[] =
   {
@@ -257,6 +259,9 @@ gegl_cl_init (GError **error)
   /* XXX: this dict is being leaked */
   cl_program_hash = g_hash_table_new (g_str_hash, g_str_equal);
 
+  if (cl_state.is_accelerated)
+    gegl_cl_color_compile_kernels();
+
   g_printf("[OpenCL] OK\n");
 
   return TRUE;
diff --git a/gegl/opencl/gegl-cl-init.h b/gegl/opencl/gegl-cl-init.h
index 13ecf87..b69de92 100644
--- a/gegl/opencl/gegl-cl-init.h
+++ b/gegl/opencl/gegl-cl-init.h
@@ -2,7 +2,6 @@
 #define __GEGL_CL_INIT_H__
 
 #include "gegl-cl-types.h"
-#include <gmodule.h>
 
 #define CL_SAFE_CALL(func)                                          \
 func;                                                               \
diff --git a/gegl/opencl/gegl-cl.h b/gegl/opencl/gegl-cl.h
index d91a0e0..b1c4105 100644
--- a/gegl/opencl/gegl-cl.h
+++ b/gegl/opencl/gegl-cl.h
@@ -4,5 +4,6 @@
 #include "gegl-cl-types.h"
 #include "gegl-cl-init.h"
 #include "gegl-cl-texture.h"
+#include "gegl-cl-color.h"
 
 #endif
diff --git a/gegl/operation/gegl-operation-context.c b/gegl/operation/gegl-operation-context.c
index d2dd1be..076cde2 100644
--- a/gegl/operation/gegl-operation-context.c
+++ b/gegl/operation/gegl-operation-context.c
@@ -315,6 +315,7 @@ gegl_operation_context_get_source (GeglOperationContext *context,
   if (!real_input)
     return NULL;
   input = g_object_ref (real_input);
+
   return input;
 }
 
@@ -391,6 +392,7 @@ gegl_operation_context_get_target (GeglOperationContext *context,
     }
 
   gegl_operation_context_take_object (context, padname, G_OBJECT (output));
+
   return output;
 }
 
diff --git a/gegl/operation/gegl-operation-point-filter.c b/gegl/operation/gegl-operation-point-filter.c
index 622bdc1..19d7dba 100644
--- a/gegl/operation/gegl-operation-point-filter.c
+++ b/gegl/operation/gegl-operation-point-filter.c
@@ -72,115 +72,6 @@ gegl_operation_point_filter_init (GeglOperationPointFilter *self)
 {
 }
 
-static gboolean
-gegl_operation_point_filter_cl_process_tiled (GeglOperation       *operation,
-                                              GeglBuffer          *input,
-                                              GeglBuffer          *output,
-                                              const GeglRectangle *result)
-{
-  GeglOperationPointFilterClass *point_filter_class = GEGL_OPERATION_POINT_FILTER_GET_CLASS (operation);
-
-  const gint bpp = babl_format_get_bytes_per_pixel (babl_format ("RGBA float"));
-
-  int y, x;
-  int errcode;
-  cl_mem in_tex = NULL, out_tex = NULL;
-  cl_image_format format;
-
-  gfloat* in_data  = (gfloat*) gegl_malloc(result->width * result->height * bpp);
-  gfloat* out_data = (gfloat*) gegl_malloc(result->width * result->height * bpp);
-
-  if (in_data == NULL || out_data == NULL) goto error;
-
-  /* un-tile */
-  gegl_buffer_get (input, 1.0, result, babl_format ("RGBA float"), in_data, GEGL_AUTO_ROWSTRIDE);
-
-  format.image_channel_order = CL_RGBA;
-  format.image_channel_data_type = CL_FLOAT;
-
-  in_tex  = gegl_clCreateImage2D (gegl_cl_get_context(),
-                                  CL_MEM_READ_ONLY,
-                                  &format,
-                                  cl_state.max_image_width,
-                                  cl_state.max_image_height,
-                                  0,  NULL, &errcode);
-
-  if (errcode != CL_SUCCESS) goto error;
-
-  out_tex = gegl_clCreateImage2D (gegl_cl_get_context(),
-                                  CL_MEM_WRITE_ONLY,
-                                  &format,
-                                  cl_state.max_image_width,
-                                  cl_state.max_image_height,
-                                  0,  NULL, &errcode);
-
-  if (errcode != CL_SUCCESS) goto error;
-
-  for (y=0; y < result->height; y += cl_state.max_image_height)
-    for (x=0; x < result->width;  x += cl_state.max_image_width)
-      {
-        const size_t offset = y * (4 * result->width) + (4 * x);
-        const size_t origin[3] = {0, 0, 0};
-        const size_t region[3] = {MIN(cl_state.max_image_width,  result->width -x),
-                                  MIN(cl_state.max_image_height, result->height-y),
-                                  1};
-        const size_t global_worksize[2] = {region[0], region[1]};
-
-        GeglRectangle roi = {x, y, region[0], region[1]};
-
-        /* CPU -> GPU */
-        errcode = gegl_clEnqueueWriteImage(gegl_cl_get_command_queue(), in_tex, CL_FALSE,
-                                           origin, region, result->width * 4 * sizeof(gfloat), 0, &in_data[offset],
-                                           0, NULL, NULL);
-        if (errcode != CL_SUCCESS) goto error;
-
-        /* Wait */
-        errcode = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
-        if (errcode != CL_SUCCESS) goto error;
-
-        /* Process */
-        errcode = point_filter_class->cl_process(operation, in_tex, out_tex, global_worksize, &roi);
-        if (errcode != CL_SUCCESS) goto error;
-
-        /* Wait */
-        errcode = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
-        if (errcode != CL_SUCCESS) goto error;
-
-        /* GPU -> CPU */
-        errcode = gegl_clEnqueueReadImage(gegl_cl_get_command_queue(), out_tex, CL_FALSE,
-                                           origin, region, result->width * 4 * sizeof(gfloat), 0, &out_data[offset],
-                                           0, NULL, NULL);
-        if (errcode != CL_SUCCESS) goto error;
-
-        /* Wait */
-        errcode = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
-        if (errcode != CL_SUCCESS) goto error;
-      }
-
-  errcode = gegl_clFinish(gegl_cl_get_command_queue());
-  if (errcode != CL_SUCCESS) goto error;
-
-  /* tile-ize */
-  gegl_buffer_set (output, result, babl_format ("RGBA float"), out_data, GEGL_AUTO_ROWSTRIDE);
-
-  gegl_clReleaseMemObject (in_tex);
-  gegl_clReleaseMemObject (out_tex);
-
-  gegl_free(in_data);
-  gegl_free(out_data);
-
-  return TRUE;
-
-error:
-  g_warning("[OpenCL] Error: %s", gegl_cl_errstring(errcode));
-  if (in_tex)   gegl_clReleaseMemObject (in_tex);
-  if (out_tex)  gegl_clReleaseMemObject (out_tex);
-  if (in_data)  free (in_data);
-  if (out_data) free (out_data);
-
-  return FALSE;
-}
-
 struct buf_tex
 {
   GeglBuffer *buf;
@@ -188,15 +79,19 @@ struct buf_tex
   cl_mem *tex;
 };
 
+//#define CL_ERROR {g_assert(0);}
+#define CL_ERROR {g_printf("[OpenCL] Error in %s:%d %s - %s\n", __FILE__, __LINE__, __func__, gegl_cl_errstring(errcode)); goto error;}
+
 static gboolean
 gegl_operation_point_filter_cl_process_full (GeglOperation       *operation,
                                              GeglBuffer          *input,
                                              GeglBuffer          *output,
                                              const GeglRectangle *result)
 {
-  GeglOperationPointFilterClass *point_filter_class = GEGL_OPERATION_POINT_FILTER_GET_CLASS (operation);
+  const Babl *in_format  = gegl_operation_get_format (operation, "input");
+  const Babl *out_format = gegl_operation_get_format (operation, "output");
 
-  const gint bpp = babl_format_get_bytes_per_pixel (babl_format ("RGBA float"));
+  GeglOperationPointFilterClass *point_filter_class = GEGL_OPERATION_POINT_FILTER_GET_CLASS (operation);
 
   int y, x, i;
   int errcode;
@@ -207,13 +102,14 @@ gegl_operation_point_filter_cl_process_full (GeglOperation       *operation,
   int ntex = 0;
   struct buf_tex input_tex;
   struct buf_tex output_tex;
+  size_t *pitch = NULL;
 
   cl_image_format format;
   format.image_channel_order = CL_RGBA;
   format.image_channel_data_type = CL_FLOAT;
 
-  for (y=0; y < result->height; y += cl_state.max_image_height)
-   for (x=0; x < result->width;  x += cl_state.max_image_width)
+  for (y=result->y; y < result->height; y += cl_state.max_image_height)
+   for (x=result->x; x < result->width;  x += cl_state.max_image_width)
      ntex++;
 
   input_tex.region  = (GeglRectangle *) gegl_malloc(ntex * sizeof(GeglRectangle));
@@ -221,35 +117,50 @@ gegl_operation_point_filter_cl_process_full (GeglOperation       *operation,
   input_tex.tex     = (cl_mem *)        gegl_malloc(ntex * sizeof(cl_mem));
   output_tex.tex    = (cl_mem *)        gegl_malloc(ntex * sizeof(cl_mem));
 
+  g_printf("[OpenCL] BABL formats: (%s,%s:%d) (%s,%s:%d)\n \t Tile Size:(%d, %d)\n", babl_get_name(gegl_buffer_get_format(input)),  babl_get_name(in_format),
+                                                             gegl_cl_color_supported (gegl_buffer_get_format(input), in_format),
+                                                             babl_get_name(out_format), babl_get_name(gegl_buffer_get_format(output)),
+                                                             gegl_cl_color_supported (out_format, gegl_buffer_get_format(output)),
+                                                             input->tile_storage->tile_width,
+                                                             input->tile_storage->tile_height);
+
+  input_tex.tex  = (cl_mem *) gegl_malloc(ntex * sizeof(cl_mem));
+  output_tex.tex = (cl_mem *) gegl_malloc(ntex * sizeof(cl_mem));
+
   if (input_tex.region == NULL || output_tex.region == NULL || input_tex.tex == NULL || output_tex.tex == NULL)
-    goto error;
+    CL_ERROR;
 
-  size_t *pitch = (size_t *) gegl_malloc(ntex * sizeof(size_t *));
+  pitch = (size_t *) gegl_malloc(ntex * sizeof(size_t *));
   in_data  = (gfloat**) gegl_malloc(ntex * sizeof(gfloat *));
   out_data = (gfloat**) gegl_malloc(ntex * sizeof(gfloat *));
 
-  if (pitch == NULL || in_data == NULL || out_data == NULL) goto error;
+  if (pitch == NULL || in_data == NULL || out_data == NULL) CL_ERROR;
 
   i = 0;
-  for (y=0; y < result->height; y += cl_state.max_image_height)
-    for (x=0; x < result->width;  x += cl_state.max_image_width)
+  for (y=result->y; y < result->height; y += cl_state.max_image_height)
+    for (x=result->x; x < result->width;  x += cl_state.max_image_width)
       {
         const size_t region[3] = {MIN(cl_state.max_image_width,  result->width -x),
-                                  MIN(cl_state.max_image_height, result->height-y)};
+                                  MIN(cl_state.max_image_height, result->height-y),
+                                  1};
 
         GeglRectangle r = {x, y, region[0], region[1]};
         input_tex.region[i] = output_tex.region[i] = r;
 
-        input_tex.tex[i]  = gegl_clCreateImage2D (gegl_cl_get_context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY, &format, region[0], region[1],
+        input_tex.tex[i]  = gegl_clCreateImage2D (gegl_cl_get_context(),
+                                                  CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, &format,
+                                                  region[0], region[1],
                                                   0,  NULL, &errcode);
-        if (errcode != CL_SUCCESS) goto error;
+        if (errcode != CL_SUCCESS) CL_ERROR;
 
-        output_tex.tex[i] = gegl_clCreateImage2D (gegl_cl_get_context(), CL_MEM_WRITE_ONLY, &format, region[0], region[1],
+        output_tex.tex[i] = gegl_clCreateImage2D (gegl_cl_get_context(),
+                                                  CL_MEM_READ_WRITE, &format,
+                                                  region[0], region[1],
                                                   0,  NULL, &errcode);
-        if (errcode != CL_SUCCESS) goto error;
+        if (errcode != CL_SUCCESS) CL_ERROR;
 
-        out_data[i] = (gfloat *) gegl_malloc(region[0] * region[1] * bpp);
-        if (out_data[i] == NULL) goto error;
+        out_data[i] = (gfloat *) gegl_malloc(region[0] * region[1] * babl_format_get_bytes_per_pixel(out_format));
+        if (out_data[i] == NULL) CL_ERROR;
 
         i++;
       }
@@ -264,10 +175,13 @@ gegl_operation_point_filter_cl_process_full (GeglOperation       *operation,
                                            CL_MAP_WRITE,
                                            origin, region, &pitch[i], NULL,
                                            0, NULL, NULL, &errcode);
-      if (errcode != CL_SUCCESS) goto error;
+      if (errcode != CL_SUCCESS) CL_ERROR;
 
       /* un-tile */
-      gegl_buffer_get (input, 1.0, &input_tex.region[i], babl_format ("RGBA float"), in_data[i], GEGL_AUTO_ROWSTRIDE);
+      if (gegl_cl_color_supported (gegl_buffer_get_format(input), in_format)) /* color conversion will be performed in the GPU later */
+        gegl_buffer_get (input, 1.0, &input_tex.region[i], gegl_buffer_get_format(input), in_data[i], GEGL_AUTO_ROWSTRIDE);
+      else                                                 /* color conversion using BABL */
+        gegl_buffer_get (input, 1.0, &input_tex.region[i], in_format, in_data[i], GEGL_AUTO_ROWSTRIDE);
     }
 
   /* CPU -> GPU */
@@ -275,26 +189,55 @@ gegl_operation_point_filter_cl_process_full (GeglOperation       *operation,
     {
       errcode = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), input_tex.tex[i], in_data[i],
                                               0, NULL, NULL);
-      if (errcode != CL_SUCCESS) goto error;
+      if (errcode != CL_SUCCESS) CL_ERROR;
     }
 
   errcode = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
-  if (errcode != CL_SUCCESS) goto error;
+  if (errcode != CL_SUCCESS) CL_ERROR;
+
+  /* color conversion in the GPU (input) */
+  if (gegl_cl_color_supported (gegl_buffer_get_format(input), in_format))
+    for (i=0; i < ntex; i++)
+     {
+       cl_mem swap;
+       const size_t size[2] = {input_tex.region[i].width, input_tex.region[i].height};
+       errcode = gegl_cl_color_conv (input_tex.tex[i], output_tex.tex[i], size, gegl_buffer_get_format(input), in_format);
+
+       if (errcode == FALSE) CL_ERROR;
+
+       swap = input_tex.tex[i];
+       input_tex.tex[i]  = output_tex.tex[i];
+       output_tex.tex[i] = swap;
+     }
 
   /* Process */
   for (i=0; i < ntex; i++)
     {
-      const size_t origin[3] = {0, 0, 0};
       const size_t region[3] = {input_tex.region[i].width, input_tex.region[i].height, 1};
       const size_t global_worksize[2] = {region[0], region[1]};
 
       errcode = point_filter_class->cl_process(operation, input_tex.tex[i], output_tex.tex[i], global_worksize, &input_tex.region[i]);
-      if (errcode != CL_SUCCESS) goto error;
+      if (errcode != CL_SUCCESS) CL_ERROR;
     }
 
   /* Wait Processing */
   errcode = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
-  if (errcode != CL_SUCCESS) goto error;
+  if (errcode != CL_SUCCESS) CL_ERROR;
+
+  /* color conversion in the GPU (output) */
+  if (gegl_cl_color_supported (out_format, gegl_buffer_get_format(output)))
+    for (i=0; i < ntex; i++)
+     {
+       cl_mem swap;
+       const size_t size[2] = {output_tex.region[i].width, output_tex.region[i].height};
+       errcode = gegl_cl_color_conv (output_tex.tex[i], input_tex.tex[i], size, out_format, gegl_buffer_get_format(output));
+
+       if (errcode == FALSE) CL_ERROR;
+
+       swap = input_tex.tex[i];
+       input_tex.tex[i]  = output_tex.tex[i];
+       output_tex.tex[i] = swap;
+     }
 
   /* GPU -> CPU */
   for (i=0; i < ntex; i++)
@@ -305,21 +248,24 @@ gegl_operation_point_filter_cl_process_full (GeglOperation       *operation,
       errcode = gegl_clEnqueueReadImage(gegl_cl_get_command_queue(), output_tex.tex[i], CL_FALSE,
                                          origin, region, pitch[i], 0, out_data[i],
                                          0, NULL, NULL);
-      if (errcode != CL_SUCCESS) goto error;
+      if (errcode != CL_SUCCESS) CL_ERROR;
     }
 
   /* Wait */
   errcode = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
-  if (errcode != CL_SUCCESS) goto error;
+  if (errcode != CL_SUCCESS) CL_ERROR;
 
   /* Run! */
   errcode = gegl_clFinish(gegl_cl_get_command_queue());
-  if (errcode != CL_SUCCESS) goto error;
+  if (errcode != CL_SUCCESS) CL_ERROR;
 
   for (i=0; i < ntex; i++)
     {
       /* tile-ize */
-      gegl_buffer_set (output, &output_tex.region[i], babl_format ("RGBA float"), out_data[i], GEGL_AUTO_ROWSTRIDE);
+      if (gegl_cl_color_supported (out_format, gegl_buffer_get_format(output))) /* color conversion has already been be performed in the GPU */
+        gegl_buffer_set (output, &output_tex.region[i], gegl_buffer_get_format(output), out_data[i], GEGL_AUTO_ROWSTRIDE);
+      else                                                 /* color conversion using BABL */
+        gegl_buffer_set (output, &output_tex.region[i], out_format, out_data[i], GEGL_AUTO_ROWSTRIDE);
     }
 
   for (i=0; i < ntex; i++)
@@ -338,7 +284,6 @@ gegl_operation_point_filter_cl_process_full (GeglOperation       *operation,
   return TRUE;
 
 error:
-  g_warning("[OpenCL] Error: %s", gegl_cl_errstring(errcode));
 
     for (i=0; i < ntex; i++)
       {
@@ -356,6 +301,7 @@ error:
   return FALSE;
 }
 
+#undef CL_ERROR
 
 static gboolean
 gegl_operation_point_filter_process (GeglOperation       *operation,
@@ -375,10 +321,6 @@ gegl_operation_point_filter_process (GeglOperation       *operation,
         {
           if (gegl_operation_point_filter_cl_process_full (operation, input, output, result))
             return TRUE;
-
-          /* the function above failed */
-          if (gegl_operation_point_filter_cl_process_tiled (operation, input, output, result))
-            return TRUE;
         }
 
       {
diff --git a/operations/common/brightness-contrast.c b/operations/common/brightness-contrast.c
index 7d0edf2..4ea8159 100644
--- a/operations/common/brightness-contrast.c
+++ b/operations/common/brightness-contrast.c
@@ -152,12 +152,12 @@ cl_process (GeglOperation       *op,
 
   if (!cl_data) return 1;
 
-  CL_SAFE_CALL(errcode |= gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem),   (void*)&in_tex));
-  CL_SAFE_CALL(errcode |= gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem),   (void*)&out_tex));
-  CL_SAFE_CALL(errcode |= gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_float), (void*)&brightness));
-  CL_SAFE_CALL(errcode |= gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_float), (void*)&contrast));
+  CL_SAFE_CALL(errcode = gegl_clSetKernelArg(cl_data->kernel[0], 0, sizeof(cl_mem),   (void*)&in_tex));
+  CL_SAFE_CALL(errcode = gegl_clSetKernelArg(cl_data->kernel[0], 1, sizeof(cl_mem),   (void*)&out_tex));
+  CL_SAFE_CALL(errcode = gegl_clSetKernelArg(cl_data->kernel[0], 2, sizeof(cl_float), (void*)&brightness));
+  CL_SAFE_CALL(errcode = gegl_clSetKernelArg(cl_data->kernel[0], 3, sizeof(cl_float), (void*)&contrast));
 
-  CL_SAFE_CALL(errcode |= gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
+  CL_SAFE_CALL(errcode = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
                                                      cl_data->kernel[0], 2,
                                                      NULL, global_worksize, NULL,
                                                      0, NULL, NULL) );
@@ -168,7 +168,6 @@ cl_process (GeglOperation       *op,
       return errcode;
     }
 
-  g_printf("[OpenCL] Running Brightness-Constrast Kernel in region (%d %d %d %d)\n", roi->x, roi->y, roi->width, roi->height);
   return errcode;
 }
 



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]