[gegl] Color conversion rgba8 to/from rgba float with OpenCL



commit f366e26f9371112eb7831dd2d6a4707a7b226433
Author: Victor Oliveira <victormatheus gmail com>
Date:   Mon Dec 19 12:16:48 2011 -0200

    Color conversion rgba8 to/from rgba float with OpenCL

 gegl/opencl/gegl-cl-color-kernel.h           |   20 ++++++
 gegl/opencl/gegl-cl-color.c                  |   95 +++++++++++++++++++-------
 gegl/opencl/gegl-cl-color.h                  |   11 +++-
 gegl/operation/gegl-operation-point-filter.c |   64 +++++++++--------
 4 files changed, 132 insertions(+), 58 deletions(-)
---
diff --git a/gegl/opencl/gegl-cl-color-kernel.h b/gegl/opencl/gegl-cl-color-kernel.h
index 4d42943..149c370 100644
--- a/gegl/opencl/gegl-cl-color-kernel.h
+++ b/gegl/opencl/gegl-cl-color-kernel.h
@@ -105,4 +105,24 @@ static const char* kernel_color_source =
 "                                                    in_v.w) :                            \n"
 "                                           (float4)(0.0f);                               \n"
 "  write_imagef(out, gid, out_v);                                                         \n"
+"}                                                                                        \n"
+"                                                                                         \n"
+"/* RGBA float -> RGBA u8 */                                                              \n"
+"__kernel void rgbaf_to_rgbau8 (__read_only  image2d_t in,                                \n"
+"                               __write_only image2d_t out)                               \n"
+"{                                                                                        \n"
+"  int2 gid = (int2)(get_global_id(0), get_global_id(1));                                 \n"
+"  float4 in_v  = read_imagef(in, sampler, gid);                                          \n"
+"  float4 out_v;                                                                          \n"
+"  write_imagef(out, gid, out_v);                                                         \n"
+"}                                                                                        \n"
+"                                                                                         \n"
+"/* RGBAu8 -> RGBA float */                                                               \n"
+"__kernel void rgbau8_to_rgbaf (__read_only  image2d_t in,                                \n"
+"                               __write_only image2d_t out)                               \n"
+"{                                                                                        \n"
+"  int2 gid = (int2)(get_global_id(0), get_global_id(1));                                 \n"
+"  float4 in_v  = read_imagef(in, sampler, gid);                                          \n"
+"  float4 out_v;                                                                          \n"
+"  write_imagef(out, gid, out_v);                                                         \n"
 "}                                                                                        \n";
diff --git a/gegl/opencl/gegl-cl-color.c b/gegl/opencl/gegl-cl-color.c
index 149a608..7e37e92 100644
--- a/gegl/opencl/gegl-cl-color.c
+++ b/gegl/opencl/gegl-cl-color.c
@@ -6,7 +6,9 @@
 
 static gegl_cl_run_data *kernels_color = NULL;
 
-static const Babl *format[6];
+#define CL_FORMAT_N 8
+
+static const Babl *format[CL_FORMAT_N];
 
 void
 gegl_cl_color_compile_kernels(void)
@@ -17,6 +19,8 @@ gegl_cl_color_compile_kernels(void)
                                "rgba_gamma_2_22rgba",                /* 3 */
                                "rgba2rgba_gamma_2_2_premultiplied",  /* 4 */
                                "rgba_gamma_2_2_premultiplied2rgba",  /* 5 */
+                               "rgbaf_to_rgbau8",                    /* 6 */
+                               "rgbau8_to_rgbaf",                    /* 7 */
                                NULL};
 
   format[0] = babl_format ("RaGaBaA float"),
@@ -25,24 +29,32 @@ gegl_cl_color_compile_kernels(void)
   format[3] = babl_format ("RGBA float"),
   format[4] = babl_format ("R'aG'aB'aA float"),
   format[5] = babl_format ("RGBA float"),
+  format[6] = babl_format ("RGBA u8"),
+  format[7] = babl_format ("RGBA float"),
 
   kernels_color = gegl_cl_compile_and_build (kernel_color_source, kernel_name);
 }
 
-gboolean
+gegl_cl_color_op
 gegl_cl_color_supported (const Babl *in_format, const Babl *out_format)
 {
   int i;
   gboolean supported_format_in  = FALSE;
   gboolean supported_format_out = FALSE;
 
-  for (i = 0; i < 6; i++)
+  if (in_format == out_format)
+    return CL_COLOR_EQUAL;
+
+  for (i = 0; i < CL_FORMAT_N; i++)
     {
       if (format[i] == in_format)  supported_format_in  = TRUE;
       if (format[i] == out_format) supported_format_out = TRUE;
     }
 
-  return (supported_format_in && supported_format_out);
+  if (supported_format_in && supported_format_out)
+    return CL_COLOR_CONVERT;
+  else
+    return CL_COLOR_NOT_SUPPORTED;
 }
 
 #define CONV_1(x)   {conv[0] = x; conv[1] = -1;}
@@ -51,16 +63,20 @@ gegl_cl_color_supported (const Babl *in_format, const Babl *out_format)
 //#define CL_ERROR {g_assert(0);}
 #define CL_ERROR {g_printf("[OpenCL] Error in %s:%d %s - %s\n", __FILE__, __LINE__, __func__, gegl_cl_errstring(errcode)); return FALSE;}
 
+/* in_tex and aux_tex may be destroyed to keep intermediate results,
+   converted result will be stored in in_tex */
 gboolean
-gegl_cl_color_conv (cl_mem in_tex, cl_mem out_tex, const size_t size[2],
+gegl_cl_color_conv (cl_mem *in_tex, cl_mem *aux_tex, const size_t size[2],
                     const Babl *in_format, const Babl *out_format)
 {
   int i;
   int errcode;
   int conv[2] = {-1, -1};
 
-  if (!gegl_cl_color_supported (in_format, out_format))
-    CL_ERROR
+  cl_mem ping_tex = *in_tex, pong_tex = *aux_tex;
+
+  if (gegl_cl_color_supported (in_format, out_format) == CL_COLOR_NOT_SUPPORTED)
+    return FALSE;
 
   if (in_format == out_format)
     {
@@ -69,7 +85,7 @@ gegl_cl_color_conv (cl_mem in_tex, cl_mem out_tex, const size_t size[2],
 
       /* just copy in_tex to out_tex */
       errcode = gegl_clEnqueueCopyImage (gegl_cl_get_command_queue(),
-                                         in_tex, out_tex, origin, origin, region,
+                                         *in_tex, *aux_tex, origin, origin, region,
                                          0, NULL, NULL);
       if (errcode != CL_SUCCESS) CL_ERROR
 
@@ -83,46 +99,73 @@ gegl_cl_color_conv (cl_mem in_tex, cl_mem out_tex, const size_t size[2],
           if      (out_format == babl_format ("RaGaBaA float"))    CONV_1(0)
           else if (out_format == babl_format ("R'G'B'A float"))    CONV_1(2)
           else if (out_format == babl_format ("R'aG'aB'aA float")) CONV_1(4)
+          else if (out_format == babl_format ("RGBA u8"))          CONV_1(6)
         }
       else if (in_format == babl_format ("RaGaBaA float"))
         {
           if      (out_format == babl_format ("RGBA float"))       CONV_1(1)
           else if (out_format == babl_format ("R'G'B'A float"))    CONV_2(1, 2)
           else if (out_format == babl_format ("R'aG'aB'aA float")) CONV_2(1, 4)
+          else if (out_format == babl_format ("RGBA u8"))          CONV_2(1, 6)
         }
       else if (in_format == babl_format ("R'G'B'A float"))
         {
           if      (out_format == babl_format ("RGBA float"))       CONV_1(3)
           else if (out_format == babl_format ("RaGaBaA float"))    CONV_2(3, 0)
           else if (out_format == babl_format ("R'aG'aB'aA float")) CONV_2(3, 4)
+          else if (out_format == babl_format ("RGBA u8"))          CONV_2(3, 6)
         }
       else if (in_format == babl_format ("R'aG'aB'aA float"))
         {
           if      (out_format == babl_format ("RGBA float"))       CONV_1(5)
           else if (out_format == babl_format ("RaGaBaA float"))    CONV_2(5, 0)
           else if (out_format == babl_format ("R'G'B'A float"))    CONV_2(5, 2)
+          else if (out_format == babl_format ("RGBA u8"))          CONV_2(5, 6)
+        }
+      else if (in_format == babl_format ("RGBA u8"))
+        {
+          if      (out_format == babl_format ("RGBA float"))       CONV_1(7)
+          else if (out_format == babl_format ("RaGaBaA float"))    CONV_2(7, 0)
+          else if (out_format == babl_format ("R'G'B'A float"))    CONV_2(7, 2)
+          else if (out_format == babl_format ("RGBA u8"))          CONV_2(7, 6)
         }
 
-      for (i=0; i<2; i++)
+      /* XXX: maybe there are precision problems if a 8-bit texture is used as intermediate */
+      for (i=0; conv[i] >= 0 && i<2; i++)
         {
-          if (conv[i] >= 0)
-            {
-              errcode = gegl_clSetKernelArg(kernels_color->kernel[conv[i]], 0, sizeof(cl_mem), (void*)&in_tex);
-              if (errcode != CL_SUCCESS) CL_ERROR
-
-              errcode = gegl_clSetKernelArg(kernels_color->kernel[conv[i]], 1, sizeof(cl_mem), (void*)&out_tex);
-              if (errcode != CL_SUCCESS) CL_ERROR
-
-              errcode = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
-                                                    kernels_color->kernel[conv[i]], 2,
-                                                    NULL, size, NULL,
-                                                    0, NULL, NULL);
-              if (errcode != CL_SUCCESS) CL_ERROR
-
-              errcode = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
-              if (errcode != CL_SUCCESS) CL_ERROR
-            }
+            cl_mem tmp_tex;
+
+            errcode = gegl_clSetKernelArg(kernels_color->kernel[conv[i]], 0, sizeof(cl_mem), (void*)&ping_tex);
+            if (errcode != CL_SUCCESS) CL_ERROR
+
+            errcode = gegl_clSetKernelArg(kernels_color->kernel[conv[i]], 1, sizeof(cl_mem), (void*)&pong_tex);
+            if (errcode != CL_SUCCESS) CL_ERROR
+
+            errcode = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
+                                                  kernels_color->kernel[conv[i]], 2,
+                                                  NULL, size, NULL,
+                                                  0, NULL, NULL);
+            if (errcode != CL_SUCCESS) CL_ERROR
+
+            errcode = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
+            if (errcode != CL_SUCCESS) CL_ERROR
+
+            tmp_tex = ping_tex;
+            ping_tex = pong_tex;
+            pong_tex = tmp_tex;
         }
+
+      if (i % 2 == 0)
+        {
+          *in_tex  = ping_tex;
+          *aux_tex = pong_tex;
+        }
+      else
+        {
+          *in_tex  = pong_tex;
+          *aux_tex = ping_tex;
+        }
+
     }
 
   return TRUE;
diff --git a/gegl/opencl/gegl-cl-color.h b/gegl/opencl/gegl-cl-color.h
index 214fd95..08fdfcd 100644
--- a/gegl/opencl/gegl-cl-color.h
+++ b/gegl/opencl/gegl-cl-color.h
@@ -4,11 +4,18 @@
 #include <gegl.h>
 #include "gegl-cl-types.h"
 
+typedef enum
+{
+  CL_COLOR_NOT_SUPPORTED = 0,
+  CL_COLOR_EQUAL         = 1,
+  CL_COLOR_CONVERT       = 2
+} gegl_cl_color_op;
+
 void gegl_cl_color_compile_kernels(void);
 
-gboolean gegl_cl_color_supported (const Babl *in_format, const Babl *out_format);
+gegl_cl_color_op gegl_cl_color_supported (const Babl *in_format, const Babl *out_format);
 
-gboolean gegl_cl_color_conv (cl_mem in_tex, cl_mem out_tex, const size_t size[2],
+gboolean gegl_cl_color_conv (cl_mem *in_tex, cl_mem *aux_tex, const size_t size[2],
                              const Babl *in_format, const Babl *out_format);
 
 #endif
diff --git a/gegl/operation/gegl-operation-point-filter.c b/gegl/operation/gegl-operation-point-filter.c
index 19d7dba..c24574d 100644
--- a/gegl/operation/gegl-operation-point-filter.c
+++ b/gegl/operation/gegl-operation-point-filter.c
@@ -104,9 +104,20 @@ gegl_operation_point_filter_cl_process_full (GeglOperation       *operation,
   struct buf_tex output_tex;
   size_t *pitch = NULL;
 
-  cl_image_format format;
-  format.image_channel_order = CL_RGBA;
-  format.image_channel_data_type = CL_FLOAT;
+  /* supported babl formats up to now:
+     RGBA u8
+     All formats with four floating-point channels
+     (I suppose others formats would be hard to put on GPU)
+  */
+
+  cl_image_format rgbaf_format;
+  cl_image_format rgbau8_format;
+
+  rgbaf_format.image_channel_order      = CL_RGBA;
+  rgbaf_format.image_channel_data_type  = CL_FLOAT;
+
+  rgbau8_format.image_channel_order     = CL_RGBA;
+  rgbau8_format.image_channel_data_type = CL_UNORM_INT8;
 
   for (y=result->y; y < result->height; y += cl_state.max_image_height)
    for (x=result->x; x < result->width;  x += cl_state.max_image_width)
@@ -148,13 +159,15 @@ gegl_operation_point_filter_cl_process_full (GeglOperation       *operation,
         input_tex.region[i] = output_tex.region[i] = r;
 
         input_tex.tex[i]  = gegl_clCreateImage2D (gegl_cl_get_context(),
-                                                  CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, &format,
+                                                  CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE,
+                                                  (gegl_buffer_get_format(input) == babl_format ("RGBA u8"))? &rgbau8_format : &rgbaf_format,
                                                   region[0], region[1],
                                                   0,  NULL, &errcode);
         if (errcode != CL_SUCCESS) CL_ERROR;
 
         output_tex.tex[i] = gegl_clCreateImage2D (gegl_cl_get_context(),
-                                                  CL_MEM_READ_WRITE, &format,
+                                                  CL_MEM_READ_WRITE,
+                                                  (gegl_buffer_get_format(output) == babl_format ("RGBA u8"))? &rgbau8_format : &rgbaf_format,
                                                   region[0], region[1],
                                                   0,  NULL, &errcode);
         if (errcode != CL_SUCCESS) CL_ERROR;
@@ -178,10 +191,12 @@ gegl_operation_point_filter_cl_process_full (GeglOperation       *operation,
       if (errcode != CL_SUCCESS) CL_ERROR;
 
       /* un-tile */
-      if (gegl_cl_color_supported (gegl_buffer_get_format(input), in_format)) /* color conversion will be performed in the GPU later */
-        gegl_buffer_get (input, 1.0, &input_tex.region[i], gegl_buffer_get_format(input), in_data[i], GEGL_AUTO_ROWSTRIDE);
-      else                                                 /* color conversion using BABL */
-        gegl_buffer_get (input, 1.0, &input_tex.region[i], in_format, in_data[i], GEGL_AUTO_ROWSTRIDE);
+      if (gegl_cl_color_supported (gegl_buffer_get_format(input), in_format) == CL_COLOR_NOT_SUPPORTED)
+        /* color conversion using BABL */
+        gegl_buffer_get (input, 1.0, &input_tex.region[i], in_format, in_data[i], pitch[i]);
+      else
+        /* color conversion will be performed in the GPU later */
+        gegl_buffer_get (input, 1.0, &input_tex.region[i], input->format, in_data[i], pitch[i]);
     }
 
   /* CPU -> GPU */
@@ -196,18 +211,12 @@ gegl_operation_point_filter_cl_process_full (GeglOperation       *operation,
   if (errcode != CL_SUCCESS) CL_ERROR;
 
   /* color conversion in the GPU (input) */
-  if (gegl_cl_color_supported (gegl_buffer_get_format(input), in_format))
+  if (gegl_cl_color_supported (input->format, in_format) == CL_COLOR_CONVERT)
     for (i=0; i < ntex; i++)
      {
-       cl_mem swap;
        const size_t size[2] = {input_tex.region[i].width, input_tex.region[i].height};
-       errcode = gegl_cl_color_conv (input_tex.tex[i], output_tex.tex[i], size, gegl_buffer_get_format(input), in_format);
-
+       errcode = gegl_cl_color_conv (&input_tex.tex[i], &output_tex.tex[i], size, input->format, in_format);
        if (errcode == FALSE) CL_ERROR;
-
-       swap = input_tex.tex[i];
-       input_tex.tex[i]  = output_tex.tex[i];
-       output_tex.tex[i] = swap;
      }
 
   /* Process */
@@ -225,18 +234,11 @@ gegl_operation_point_filter_cl_process_full (GeglOperation       *operation,
   if (errcode != CL_SUCCESS) CL_ERROR;
 
   /* color conversion in the GPU (output) */
-  if (gegl_cl_color_supported (out_format, gegl_buffer_get_format(output)))
+  if (gegl_cl_color_supported (out_format, output->format) == CL_COLOR_CONVERT)
     for (i=0; i < ntex; i++)
      {
-       cl_mem swap;
        const size_t size[2] = {output_tex.region[i].width, output_tex.region[i].height};
-       errcode = gegl_cl_color_conv (output_tex.tex[i], input_tex.tex[i], size, out_format, gegl_buffer_get_format(output));
-
-       if (errcode == FALSE) CL_ERROR;
-
-       swap = input_tex.tex[i];
-       input_tex.tex[i]  = output_tex.tex[i];
-       output_tex.tex[i] = swap;
+       errcode = gegl_cl_color_conv (&output_tex.tex[i], &input_tex.tex[i], size, out_format, output->format);
      }
 
   /* GPU -> CPU */
@@ -246,7 +248,7 @@ gegl_operation_point_filter_cl_process_full (GeglOperation       *operation,
       const size_t region[3] = {input_tex.region[i].width, input_tex.region[i].height, 1};
 
       errcode = gegl_clEnqueueReadImage(gegl_cl_get_command_queue(), output_tex.tex[i], CL_FALSE,
-                                         origin, region, pitch[i], 0, out_data[i],
+                                         origin, region, 0, 0, out_data[i],
                                          0, NULL, NULL);
       if (errcode != CL_SUCCESS) CL_ERROR;
     }
@@ -262,10 +264,12 @@ gegl_operation_point_filter_cl_process_full (GeglOperation       *operation,
   for (i=0; i < ntex; i++)
     {
       /* tile-ize */
-      if (gegl_cl_color_supported (out_format, gegl_buffer_get_format(output))) /* color conversion has already been be performed in the GPU */
-        gegl_buffer_set (output, &output_tex.region[i], gegl_buffer_get_format(output), out_data[i], GEGL_AUTO_ROWSTRIDE);
-      else                                                 /* color conversion using BABL */
+      if (gegl_cl_color_supported (out_format, output->format) == CL_COLOR_NOT_SUPPORTED)
+        /* color conversion using BABL */
         gegl_buffer_set (output, &output_tex.region[i], out_format, out_data[i], GEGL_AUTO_ROWSTRIDE);
+      else
+        /* color conversion has already been be performed in the GPU */
+        gegl_buffer_set (output, &output_tex.region[i], output->format, out_data[i], GEGL_AUTO_ROWSTRIDE);
     }
 
   for (i=0; i < ntex; i++)



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]