[gegl] Using opencl buffers instead of Image2D



commit 6914269cf5cd5c89083774a1b8bfd3f05b3e41c7
Author: Victor Oliveira <victormatheus gmail com>
Date:   Tue Jan 31 11:13:58 2012 -0200

    Using opencl buffers instead of Image2D
    
    changing memory flags also.

 gegl/buffer/gegl-buffer-cl-iterator.c        |  156 +++++++++++---------------
 gegl/buffer/gegl-buffer-cl-iterator.h        |    2 +-
 gegl/opencl/gegl-cl-color-kernel.h           |  109 +++++++++---------
 gegl/opencl/gegl-cl-color.c                  |   16 ++--
 gegl/opencl/gegl-cl-color.h                  |    2 +-
 gegl/operation/gegl-operation-point-filter.h |    2 +-
 operations/common/brightness-contrast.c      |   23 ++--
 7 files changed, 140 insertions(+), 170 deletions(-)
---
diff --git a/gegl/buffer/gegl-buffer-cl-iterator.c b/gegl/buffer/gegl-buffer-cl-iterator.c
index 55a8159..bce2bbe 100644
--- a/gegl/buffer/gegl-buffer-cl-iterator.c
+++ b/gegl/buffer/gegl-buffer-cl-iterator.c
@@ -20,7 +20,7 @@ typedef struct GeglBufferClIterators
 {
   /* current region of interest */
   gint          n;
-  size_t        size [GEGL_CL_BUFFER_MAX_ITERATORS][GEGL_CL_NTEX][2];  /* length of current data in pixels */
+  size_t        size [GEGL_CL_BUFFER_MAX_ITERATORS][GEGL_CL_NTEX];  /* length of current data in pixels */
   cl_mem        tex  [GEGL_CL_BUFFER_MAX_ITERATORS][GEGL_CL_NTEX];
   GeglRectangle roi  [GEGL_CL_BUFFER_MAX_ITERATORS][GEGL_CL_NTEX];
 
@@ -42,9 +42,9 @@ typedef struct GeglBufferClIterators
   GeglBuffer    *buffer         [GEGL_CL_BUFFER_MAX_ITERATORS];
 
   /* buffer->format */
-  cl_image_format buf_cl_format [GEGL_CL_BUFFER_MAX_ITERATORS];
+  size_t buf_cl_format_size     [GEGL_CL_BUFFER_MAX_ITERATORS];
   /* format */
-  cl_image_format op_cl_format  [GEGL_CL_BUFFER_MAX_ITERATORS];
+  size_t op_cl_format_size      [GEGL_CL_BUFFER_MAX_ITERATORS];
 
   gegl_cl_color_op conv         [GEGL_CL_BUFFER_MAX_ITERATORS];
 
@@ -93,8 +93,8 @@ gegl_buffer_cl_iterator_add (GeglBufferClIterator  *iterator,
   else
     i->conv[self] = gegl_cl_color_supported (buffer->format, format);
 
-  gegl_cl_color_babl (buffer->format, &i->buf_cl_format[self], NULL);
-  gegl_cl_color_babl (format,         &i->op_cl_format [self], NULL);
+  gegl_cl_color_babl (buffer->format, NULL, &i->buf_cl_format_size[self]);
+  gegl_cl_color_babl (format,         NULL, &i->op_cl_format_size [self]);
 
   if (self!=0)
     {
@@ -137,8 +137,6 @@ gegl_buffer_cl_iterator_next (GeglBufferClIterator *iterator, gboolean *err)
   gint no, j;
   cl_int cl_err = 0;
 
-  const size_t origin_zero[3] = {0, 0, 0};
-
   if (i->is_finished)
     g_error ("%s called on finished buffer iterator", G_STRFUNC);
   if (i->iteration_no == 0)
@@ -185,38 +183,36 @@ gegl_buffer_cl_iterator_next (GeglBufferClIterator *iterator, gboolean *err)
               for (j=0; j < i->n; j++)
                 {
                   gpointer data;
-                  size_t pitch;
-                  const size_t region[3] = {i->roi[no][j].width, i->roi[no][j].height, 1};
 
                   /* tile-ize */
                   if (i->conv[no] == GEGL_CL_COLOR_NOT_SUPPORTED)
                     {
-                      data = gegl_clEnqueueMapImage(gegl_cl_get_command_queue(), i->tex_op[no][j], CL_TRUE,
-                                                    CL_MAP_READ,
-                                                    origin_zero, region, &pitch, NULL,
-                                                    0, NULL, NULL, &cl_err);
+                      data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_op[no][j], CL_TRUE,
+                                                     CL_MAP_READ,
+                                                     0, i->size[no][j] * i->op_cl_format_size [no],
+                                                     0, NULL, NULL, &cl_err);
                       if (cl_err != CL_SUCCESS) CL_ERROR;
 
                       /* color conversion using BABL */
-                      gegl_buffer_set (i->buffer[no], &i->roi[no][j], i->format[no], data, pitch);
+                      gegl_buffer_set (i->buffer[no], &i->roi[no][j], i->format[no], data, GEGL_AUTO_ROWSTRIDE);
 
                       cl_err = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), i->tex_op[no][j], data,
-                                                            0, NULL, NULL);
+                                                             0, NULL, NULL);
                       if (cl_err != CL_SUCCESS) CL_ERROR;
                     }
                   else
                     {
-                      data = gegl_clEnqueueMapImage(gegl_cl_get_command_queue(), i->tex_buf[no][j], CL_TRUE,
-                                                    CL_MAP_READ,
-                                                    origin_zero, region, &pitch, NULL,
-                                                    0, NULL, NULL, &cl_err);
+                      data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_buf[no][j], CL_TRUE,
+                                                     CL_MAP_READ,
+                                                     0, i->size[no][j] * i->buf_cl_format_size [no],
+                                                     0, NULL, NULL, &cl_err);
                       if (cl_err != CL_SUCCESS) CL_ERROR;
 
                       /* color conversion has already been performed in the GPU */
-                      gegl_buffer_set (i->buffer[no], &i->roi[no][j], i->buffer[no]->format, data, pitch);
+                      gegl_buffer_set (i->buffer[no], &i->roi[no][j], i->buffer[no]->format, data, GEGL_AUTO_ROWSTRIDE);
 
                       cl_err = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), i->tex_buf[no][j], data,
-                                                            0, NULL, NULL);
+                                                             0, NULL, NULL);
                       if (cl_err != CL_SUCCESS) CL_ERROR;
                     }
                 }
@@ -254,9 +250,7 @@ gegl_buffer_cl_iterator_next (GeglBufferClIterator *iterator, gboolean *err)
                              i->roi_all[i->roi_no+j].width,
                              i->roi_all[i->roi_no+j].height};
           i->roi [no][j] = r;
-
-          i->size[no][j][0] = r.width;
-          i->size[no][j][1] = r.height;
+          i->size[no][j] = r.width * r.height;
         }
 
       if (i->flags[no] == GEGL_CL_BUFFER_READ)
@@ -264,8 +258,6 @@ gegl_buffer_cl_iterator_next (GeglBufferClIterator *iterator, gboolean *err)
           for (j=0; j < i->n; j++)
             {
               gpointer data;
-              size_t pitch;
-              const size_t region[3] = {i->roi[no][j].width, i->roi[no][j].height, 1};
 
               /* un-tile */
               switch (i->conv[no])
@@ -274,23 +266,21 @@ gegl_buffer_cl_iterator_next (GeglBufferClIterator *iterator, gboolean *err)
 
                     {
                     g_assert (i->tex_op[no][j] == NULL);
-                    i->tex_op[no][j] = gegl_clCreateImage2D (gegl_cl_get_context (),
-                                                          CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE,
-                                                          &i->op_cl_format [no],
-                                                          i->roi[no][j].width,
-                                                          i->roi[no][j].height,
-                                                          0, NULL, &cl_err);
+                    i->tex_op[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (),
+                                                            CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY,
+                                                            i->size[no][j] * i->op_cl_format_size [no],
+                                                            NULL, &cl_err);
                     if (cl_err != CL_SUCCESS) CL_ERROR;
 
                     /* pre-pinned memory */
-                    data = gegl_clEnqueueMapImage(gegl_cl_get_command_queue(), i->tex_op[no][j], CL_TRUE,
-                                                  CL_MAP_WRITE,
-                                                  origin_zero, region, &pitch, NULL,
-                                                  0, NULL, NULL, &cl_err);
+                    data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_op[no][j], CL_TRUE,
+                                                   CL_MAP_WRITE,
+                                                   0, i->size[no][j] * i->op_cl_format_size [no],
+                                                   0, NULL, NULL, &cl_err);
                     if (cl_err != CL_SUCCESS) CL_ERROR;
 
                     /* color conversion using BABL */
-                    gegl_buffer_get (i->buffer[no], 1.0, &i->roi[no][j], i->format[no], data, pitch);
+                    gegl_buffer_get (i->buffer[no], 1.0, &i->roi[no][j], i->format[no], data, GEGL_AUTO_ROWSTRIDE);
 
                     i->tex[no][j] = i->tex_op[no][j];
 
@@ -301,23 +291,21 @@ gegl_buffer_cl_iterator_next (GeglBufferClIterator *iterator, gboolean *err)
 
                     {
                     g_assert (i->tex_buf[no][j] == NULL);
-                    i->tex_buf[no][j] = gegl_clCreateImage2D (gegl_cl_get_context (),
-                                                              CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE,
-                                                              &i->buf_cl_format [no],
-                                                              i->roi[no][j].width,
-                                                              i->roi[no][j].height,
-                                                              0, NULL, &cl_err);
+                    i->tex_buf[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (),
+                                                             CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY,
+                                                             i->size[no][j] * i->buf_cl_format_size [no],
+                                                             NULL, &cl_err);
                     if (cl_err != CL_SUCCESS) CL_ERROR;
 
                     /* pre-pinned memory */
-                    data = gegl_clEnqueueMapImage(gegl_cl_get_command_queue(), i->tex_buf[no][j], CL_TRUE,
-                                                  CL_MAP_WRITE,
-                                                  origin_zero, region, &pitch, NULL,
-                                                  0, NULL, NULL, &cl_err);
+                    data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_buf[no][j], CL_TRUE,
+                                                   CL_MAP_WRITE,
+                                                   0, i->size[no][j] * i->buf_cl_format_size [no],
+                                                   0, NULL, NULL, &cl_err);
                     if (cl_err != CL_SUCCESS) CL_ERROR;
 
                     /* color conversion will be performed in the GPU later */
-                    gegl_buffer_get (i->buffer[no], 1.0, &i->roi[no][j], i->buffer[no]->format, data, pitch);
+                    gegl_buffer_get (i->buffer[no], 1.0, &i->roi[no][j], i->buffer[no]->format, data, GEGL_AUTO_ROWSTRIDE);
 
                     cl_err = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), i->tex_buf[no][j], data,
                                                            0, NULL, NULL);
@@ -332,32 +320,28 @@ gegl_buffer_cl_iterator_next (GeglBufferClIterator *iterator, gboolean *err)
 
                     {
                     g_assert (i->tex_buf[no][j] == NULL);
-                    i->tex_buf[no][j] = gegl_clCreateImage2D (gegl_cl_get_context (),
-                                                              CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE,
-                                                              &i->buf_cl_format [no],
-                                                              i->roi[no][j].width,
-                                                              i->roi[no][j].height,
-                                                              0, NULL, &cl_err);
+                    i->tex_buf[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (),
+                                                             CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY,
+                                                             i->size[no][j] * i->buf_cl_format_size [no],
+                                                             NULL, &cl_err);
                     if (cl_err != CL_SUCCESS) CL_ERROR;
 
                     g_assert (i->tex_op[no][j] == NULL);
-                    i->tex_op[no][j] = gegl_clCreateImage2D (gegl_cl_get_context (),
-                                                             CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE,
-                                                             &i->op_cl_format [no],
-                                                             i->roi[no][j].width,
-                                                             i->roi[no][j].height,
-                                                             0, NULL, &cl_err);
+                    i->tex_op[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (),
+                                                            CL_MEM_READ_WRITE,
+                                                            i->size[no][j] * i->op_cl_format_size [no],
+                                                            NULL, &cl_err);
                     if (cl_err != CL_SUCCESS) CL_ERROR;
 
                     /* pre-pinned memory */
-                    data = gegl_clEnqueueMapImage(gegl_cl_get_command_queue(), i->tex_buf[no][j], CL_TRUE,
-                                                  CL_MAP_WRITE,
-                                                  origin_zero, region, &pitch, NULL,
-                                                  0, NULL, NULL, &cl_err);
+                    data = gegl_clEnqueueMapBuffer(gegl_cl_get_command_queue(), i->tex_buf[no][j], CL_TRUE,
+                                                   CL_MAP_WRITE,
+                                                   0, i->size[no][j] * i->buf_cl_format_size [no],
+                                                   0, NULL, NULL, &cl_err);
                     if (cl_err != CL_SUCCESS) CL_ERROR;
 
                     /* color conversion will be performed in the GPU later */
-                    gegl_buffer_get (i->buffer[no], 1.0, &i->roi[no][j], i->buffer[no]->format, data, pitch);
+                    gegl_buffer_get (i->buffer[no], 1.0, &i->roi[no][j], i->buffer[no]->format, data, GEGL_AUTO_ROWSTRIDE);
 
                     cl_err = gegl_clEnqueueUnmapMemObject (gegl_cl_get_command_queue(), i->tex_buf[no][j], data,
                                                            0, NULL, NULL);
@@ -369,7 +353,7 @@ gegl_buffer_cl_iterator_next (GeglBufferClIterator *iterator, gboolean *err)
                                                  i->buffer[no]->format, i->format[no]);
                     if (cl_err == FALSE) CL_ERROR;
 
-                    i->tex[no][j] = i->tex_buf[no][j];
+                    i->tex[no][j] = i->tex_op[no][j];
 
                     break;
                     }
@@ -390,12 +374,10 @@ gegl_buffer_cl_iterator_next (GeglBufferClIterator *iterator, gboolean *err)
 
                   {
                   g_assert (i->tex_op[no][j] == NULL);
-                  i->tex_op[no][j] = gegl_clCreateImage2D (gegl_cl_get_context (),
-                                                           CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE,
-                                                           &i->op_cl_format [no],
-                                                           i->roi[no][j].width,
-                                                           i->roi[no][j].height,
-                                                           0, NULL, &cl_err);
+                  i->tex_op[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (),
+                                                          CL_MEM_ALLOC_HOST_PTR | CL_MEM_WRITE_ONLY,
+                                                          i->size[no][j] * i->op_cl_format_size [no],
+                                                          NULL, &cl_err);
                   if (cl_err != CL_SUCCESS) CL_ERROR;
 
                   i->tex[no][j] = i->tex_op[no][j];
@@ -407,12 +389,10 @@ gegl_buffer_cl_iterator_next (GeglBufferClIterator *iterator, gboolean *err)
 
                   {
                   g_assert (i->tex_buf[no][j] == NULL);
-                  i->tex_buf[no][j] = gegl_clCreateImage2D (gegl_cl_get_context (),
-                                                            CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE,
-                                                            &i->buf_cl_format [no],
-                                                            i->roi[no][j].width,
-                                                            i->roi[no][j].height,
-                                                            0, NULL, &cl_err);
+                  i->tex_buf[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (),
+                                                           CL_MEM_ALLOC_HOST_PTR | CL_MEM_WRITE_ONLY,
+                                                           i->size[no][j] * i->buf_cl_format_size [no],
+                                                           NULL, &cl_err);
                   if (cl_err != CL_SUCCESS) CL_ERROR;
 
                   i->tex[no][j] = i->tex_buf[no][j];
@@ -424,21 +404,17 @@ gegl_buffer_cl_iterator_next (GeglBufferClIterator *iterator, gboolean *err)
 
                   {
                   g_assert (i->tex_buf[no][j] == NULL);
-                  i->tex_buf[no][j] = gegl_clCreateImage2D (gegl_cl_get_context (),
-                                                            CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE,
-                                                            &i->buf_cl_format [no],
-                                                            i->roi[no][j].width,
-                                                            i->roi[no][j].height,
-                                                            0, NULL, &cl_err);
+                  i->tex_buf[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (),
+                                                           CL_MEM_ALLOC_HOST_PTR | CL_MEM_WRITE_ONLY,
+                                                           i->size[no][j] * i->buf_cl_format_size [no],
+                                                           NULL, &cl_err);
                   if (cl_err != CL_SUCCESS) CL_ERROR;
 
                   g_assert (i->tex_op[no][j] == NULL);
-                  i->tex_op[no][j] = gegl_clCreateImage2D (gegl_cl_get_context (),
-                                                           CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE,
-                                                           &i->op_cl_format [no],
-                                                           i->roi[no][j].width,
-                                                           i->roi[no][j].height,
-                                                           0, NULL, &cl_err);
+                  i->tex_op[no][j] = gegl_clCreateBuffer (gegl_cl_get_context (),
+                                                          CL_MEM_READ_WRITE,
+                                                          i->size[no][j] * i->op_cl_format_size [no],
+                                                          NULL, &cl_err);
                   if (cl_err != CL_SUCCESS) CL_ERROR;
 
                   i->tex[no][j] = i->tex_op[no][j];
diff --git a/gegl/buffer/gegl-buffer-cl-iterator.h b/gegl/buffer/gegl-buffer-cl-iterator.h
index 5558f9f..9ccbf45 100644
--- a/gegl/buffer/gegl-buffer-cl-iterator.h
+++ b/gegl/buffer/gegl-buffer-cl-iterator.h
@@ -16,7 +16,7 @@ enum
 typedef struct GeglBufferClIterator
 {
   gint          n;
-  size_t        size [GEGL_CL_BUFFER_MAX_ITERATORS][GEGL_CL_NTEX][2];  /* length of current data in pixels */
+  size_t        size [GEGL_CL_BUFFER_MAX_ITERATORS][GEGL_CL_NTEX];  /* length of current data in pixels */
   cl_mem        tex  [GEGL_CL_BUFFER_MAX_ITERATORS][GEGL_CL_NTEX];
   GeglRectangle roi  [GEGL_CL_BUFFER_MAX_ITERATORS][GEGL_CL_NTEX];
 } GeglBufferClIterator;
diff --git a/gegl/opencl/gegl-cl-color-kernel.h b/gegl/opencl/gegl-cl-color-kernel.h
index b31f6f0..898be52 100644
--- a/gegl/opencl/gegl-cl-color-kernel.h
+++ b/gegl/opencl/gegl-cl-color-kernel.h
@@ -22,118 +22,115 @@ static const char* kernel_color_source =
 "  return value / 12.92f;                                                                 \n"
 "}                                                                                        \n"
 "                                                                                         \n"
-"__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |                             \n"
-"                               CLK_ADDRESS_NONE            |                             \n"
-"                               CLK_FILTER_NEAREST;                                       \n"
 "                                                                                         \n"
 "/* RGBA float -> RaGaBaA float */                                                        \n"
-"__kernel void non_premultiplied_to_premultiplied (__read_only  image2d_t in,             \n"
-"                                                  __write_only image2d_t out)            \n"
+"__kernel void non_premultiplied_to_premultiplied (__global const float4 * in,            \n"
+"                                                  __global       float4 * out)           \n"
 "{                                                                                        \n"
-"  int2 gid = (int2)(get_global_id(0), get_global_id(1));                                 \n"
-"  float4 in_v  = read_imagef(in, sampler, gid);                                          \n"
+"  int gid = get_global_id(0);                                                            \n"
+"  float4 in_v = in[gid];                                                                 \n"
 "  float4 out_v;                                                                          \n"
 "  out_v   = in_v * in_v.w;                                                               \n"
 "  out_v.w = in_v.w;                                                                      \n"
-"  write_imagef(out, gid, out_v);                                                         \n"
+"  out[gid] = out_v;                                                                      \n"
 "}                                                                                        \n"
 "                                                                                         \n"
 "/* RaGaBaA float -> RGBA float */                                                        \n"
-"__kernel void premultiplied_to_non_premultiplied (__read_only  image2d_t in,             \n"
-"                                                  __write_only image2d_t out)            \n"
+"__kernel void premultiplied_to_non_premultiplied (__global const float4 * in,            \n"
+"                                                  __global       float4 * out)           \n"
 "{                                                                                        \n"
-"  int2 gid = (int2)(get_global_id(0), get_global_id(1));                                 \n"
-"  float4 in_v  = read_imagef(in, sampler, gid);                                          \n"
+"  int gid = get_global_id(0);                                                            \n"
+"  float4 in_v  = in[gid];                                                                \n"
 "  float4 out_v;                                                                          \n"
 "  out_v = (in_v.w > BABL_ALPHA_THRESHOLD)? in_v / in_v.w : (float4)(0.0f);               \n"
 "  out_v.w = in_v.w;                                                                      \n"
-"  write_imagef(out, gid, out_v);                                                         \n"
+"  out[gid] = out_v;                                                                      \n"
 "}                                                                                        \n"
 "                                                                                         \n"
 "/* RGBA float -> R'G'B'A float */                                                        \n"
-"__kernel void rgba2rgba_gamma_2_2 (__read_only  image2d_t in,                            \n"
-"                                   __write_only image2d_t out)                           \n"
+"__kernel void rgba2rgba_gamma_2_2 (__global const float4 * in,                           \n"
+"                                   __global       float4 * out)                          \n"
 "{                                                                                        \n"
-"  int2 gid = (int2)(get_global_id(0), get_global_id(1));                                 \n"
-"  float4 in_v  = read_imagef(in, sampler, gid);                                          \n"
+"  int gid = get_global_id(0);                                                            \n"
+"  float4 in_v  = in[gid];                                                                \n"
 "  float4 out_v;                                                                          \n"
 "  out_v = (float4)(linear_to_gamma_2_2(in_v.x),                                          \n"
 "                   linear_to_gamma_2_2(in_v.y),                                          \n"
 "                   linear_to_gamma_2_2(in_v.z),                                          \n"
 "                   in_v.w);                                                              \n"
-"  write_imagef(out, gid, out_v);                                                         \n"
+"  out[gid] = out_v;                                                                      \n"
 "}                                                                                        \n"
 "                                                                                         \n"
 "/* R'G'B'A float -> RGBA float */                                                        \n"
-"__kernel void rgba_gamma_2_22rgba (__read_only  image2d_t in,                            \n"
-"                                   __write_only image2d_t out)                           \n"
+"__kernel void rgba_gamma_2_22rgba (__global const float4 * in,                           \n"
+"                                   __global       float4 * out)                          \n"
 "{                                                                                        \n"
-"  int2 gid = (int2)(get_global_id(0), get_global_id(1));                                 \n"
-"  float4 in_v  = read_imagef(in, sampler, gid);                                          \n"
+"  int gid = get_global_id(0);                                                            \n"
+"  float4 in_v  = in[gid];                                                                \n"
 "  float4 out_v;                                                                          \n"
 "  out_v = (float4)(gamma_2_2_to_linear(in_v.x),                                          \n"
 "                   gamma_2_2_to_linear(in_v.y),                                          \n"
 "                   gamma_2_2_to_linear(in_v.z),                                          \n"
 "                   in_v.w);                                                              \n"
-"  write_imagef(out, gid, out_v);                                                         \n"
+"  out[gid] = out_v;                                                                      \n"
 "}                                                                                        \n"
 "                                                                                         \n"
 "/* RGBA float -> R'aG'aB'aA float */                                                     \n"
-"__kernel void rgba2rgba_gamma_2_2_premultiplied (__read_only  image2d_t in,              \n"
-"                                                 __write_only image2d_t out)             \n"
+"__kernel void rgba2rgba_gamma_2_2_premultiplied (__global const float4 * in,             \n"
+"                                                 __global       float4 * out)            \n"
 "{                                                                                        \n"
-"  int2 gid = (int2)(get_global_id(0), get_global_id(1));                                 \n"
-"  float4 in_v  = read_imagef(in, sampler, gid);                                          \n"
+"  int gid = get_global_id(0);                                                            \n"
+"  float4 in_v  = in[gid];                                                                \n"
 "  float4 out_v;                                                                          \n"
 "  out_v = (float4)(linear_to_gamma_2_2(in_v.x) * in_v.w,                                 \n"
 "                   linear_to_gamma_2_2(in_v.y) * in_v.w,                                 \n"
 "                   linear_to_gamma_2_2(in_v.z) * in_v.w,                                 \n"
 "                   in_v.w);                                                              \n"
-"  write_imagef(out, gid, out_v);                                                         \n"
+"  out[gid] = out_v;                                                                      \n"
 "}                                                                                        \n"
 "                                                                                         \n"
 "/* R'aG'aB'aA float -> RGBA float */                                                     \n"
-"__kernel void rgba_gamma_2_2_premultiplied2rgba (__read_only  image2d_t in,              \n"
-"                                                 __write_only image2d_t out)             \n"
+"__kernel void rgba_gamma_2_2_premultiplied2rgba (__global const float4 * in,             \n"
+"                                                 __global       float4 * out)            \n"
 "{                                                                                        \n"
-"  int2 gid = (int2)(get_global_id(0), get_global_id(1));                                 \n"
-"  float4 in_v  = read_imagef(in, sampler, gid);                                          \n"
+"  int gid = get_global_id(0);                                                            \n"
+"  float4 in_v  = in[gid];                                                                \n"
 "  float4 out_v;                                                                          \n"
 "  out_v = (in_v.w > BABL_ALPHA_THRESHOLD)? (float4)(linear_to_gamma_2_2(in_v.x) / in_v.w,\n"
 "                                                    linear_to_gamma_2_2(in_v.y) / in_v.w,\n"
 "                                                    linear_to_gamma_2_2(in_v.z) / in_v.w,\n"
 "                                                    in_v.w) :                            \n"
 "                                           (float4)(0.0f);                               \n"
-"  write_imagef(out, gid, out_v);                                                         \n"
+"  out[gid] = out_v;                                                                      \n"
 "}                                                                                        \n"
 "                                                                                         \n"
 "/* RGBA float -> RGBA u8 */                                                              \n"
-"__kernel void rgbaf_to_rgbau8 (__read_only  image2d_t in,                                \n"
-"                               __write_only image2d_t out)                               \n"
+"__kernel void rgbaf_to_rgbau8 (__global const float4 * in,                               \n"
+"                               __global       uchar4 * out)                              \n"
 "{                                                                                        \n"
-"  int2 gid = (int2)(get_global_id(0), get_global_id(1));                                 \n"
-"  float4 in_v  = read_imagef(in, sampler, gid);                                          \n"
-"  float4 out_v = in_v;                                                                   \n"
-"  write_imagef(out, gid, out_v);                                                         \n"
+"  int gid = get_global_id(0);                                                            \n"
+"  float4 in_v  = in[gid];                                                                \n"
+"  float4 out_v = in_v * 255.0f;                                                          \n"
+"  out[gid] = convert_uchar4_sat_rte(out_v);                                              \n"
 "}                                                                                        \n"
 "                                                                                         \n"
 "/* RGBAu8 -> RGBA float */                                                               \n"
-"__kernel void rgbau8_to_rgbaf (__read_only  image2d_t in,                                \n"
-"                               __write_only image2d_t out)                               \n"
+"__kernel void rgbau8_to_rgbaf (__global const uchar4 * in,                               \n"
+"                               __global       float4 * out)                              \n"
 "{                                                                                        \n"
-"  int2 gid = (int2)(get_global_id(0), get_global_id(1));                                 \n"
-"  float4 in_v  = read_imagef(in, sampler, gid);                                          \n"
-"  float4 out_v = in_v;                                                                   \n"
-"  write_imagef(out, gid, out_v);                                                         \n"
+"  int gid = get_global_id(0);                                                            \n"
+"  float4 in_v  = convert_float4(in[gid]);                                                \n"
+"  float4 out_v = in_v / 255.0f;                                                          \n"
+"  out[gid] = out_v;                                                                      \n"
 "}                                                                                        \n"
 "                                                                                         \n"
 "/* RGBA float -> Y'CbCrA float */                                                        \n"
 "                                                                                         \n"
-"__kernel void rgba_to_ycbcra (__read_only  image2d_t in,                                 \n"
-"                              __write_only image2d_t out)                                \n"
+"__kernel void rgba_to_ycbcra (__global const float4 * in,                                \n"
+"                              __global       float4 * out)                               \n"
 "{                                                                                        \n"
-"  int2 gid = (int2)(get_global_id(0), get_global_id(1));                                 \n"
-"  float4 in_v  = read_imagef(in, sampler, gid);                                          \n"
+"  int gid = get_global_id(0);                                                            \n"
+"  float4 in_v  = in[gid];                                                                \n"
 "  float4 out_v;                                                                          \n"
 "                                                                                         \n"
 "  float4 rgb = (float4)(linear_to_gamma_2_2(in_v.x),                                     \n"
@@ -145,16 +142,16 @@ static const char* kernel_color_source =
 "                   -0.168736f * rgb.x - 0.331264f * rgb.y + 0.5f      * rgb.z,           \n"
 "                    0.5f      * rgb.x - 0.418688f * rgb.y - 0.081312f * rgb.z,           \n"
 "                   in_v.w);                                                              \n"
-"  write_imagef(out, gid, out_v);                                                         \n"
+"  out[gid] = out_v;                                                                      \n"
 "}                                                                                        \n"
 "                                                                                         \n"
 "/* Y'CbCrA float -> RGBA float */                                                        \n"
 "                                                                                         \n"
-"__kernel void ycbcra_to_rgba (__read_only  image2d_t in,                                 \n"
-"                              __write_only image2d_t out)                                \n"
+"__kernel void ycbcra_to_rgba (__global const float4 * in,                                \n"
+"                              __global       float4 * out)                               \n"
 "{                                                                                        \n"
-"  int2 gid = (int2)(get_global_id(0), get_global_id(1));                                 \n"
-"  float4 in_v  = read_imagef(in, sampler, gid);                                          \n"
+"  int gid = get_global_id(0);                                                            \n"
+"  float4 in_v  = in[gid];                                                                \n"
 "  float4 out_v;                                                                          \n"
 "                                                                                         \n"
 "  float4 rgb = (float4)(1.0f * in_v.x + 0.0f      * in_v.y + 1.40200f    * in_v.z,       \n"
@@ -166,5 +163,5 @@ static const char* kernel_color_source =
 "                   linear_to_gamma_2_2(rgb.y),                                           \n"
 "                   linear_to_gamma_2_2(rgb.z),                                           \n"
 "                   in_v.w);                                                              \n"
-"  write_imagef(out, gid, out_v);                                                         \n"
-"}                                                                                        \n";
+"  out[gid] = out_v;                                                                      \n"
+"}                                                                                        \n";
\ No newline at end of file
diff --git a/gegl/opencl/gegl-cl-color.c b/gegl/opencl/gegl-cl-color.c
index bb9de3c..40fc574 100644
--- a/gegl/opencl/gegl-cl-color.c
+++ b/gegl/opencl/gegl-cl-color.c
@@ -130,7 +130,7 @@ gegl_cl_color_supported (const Babl *in_format, const Babl *out_format)
 #define CL_ERROR {g_printf("[OpenCL] Error in %s:%d %s - %s\n", __FILE__, __LINE__, __func__, gegl_cl_errstring(errcode)); return FALSE;}
 
 gboolean
-gegl_cl_color_conv (cl_mem in_tex, cl_mem out_tex, const size_t size[2],
+gegl_cl_color_conv (cl_mem in_tex, cl_mem out_tex, const size_t size,
                     const Babl *in_format, const Babl *out_format)
 {
   int errcode;
@@ -140,13 +140,13 @@ gegl_cl_color_conv (cl_mem in_tex, cl_mem out_tex, const size_t size[2],
 
   if (in_format == out_format)
     {
-      const size_t origin[3] = {0, 0, 0};
-      const size_t region[3] = {size[0], size[1], 1};
+      size_t s;
+      gegl_cl_color_babl (in_format, NULL, &s);
 
       /* just copy in_tex to out_tex */
-      errcode = gegl_clEnqueueCopyImage (gegl_cl_get_command_queue(),
-                                         in_tex, out_tex, origin, origin, region,
-                                         0, NULL, NULL);
+      errcode = gegl_clEnqueueCopyBuffer (gegl_cl_get_command_queue(),
+                                          in_tex, out_tex, 0, 0, size * s,
+                                          0, NULL, NULL);
       if (errcode != CL_SUCCESS) CL_ERROR
 
       errcode = gegl_clEnqueueBarrier(gegl_cl_get_command_queue());
@@ -163,8 +163,8 @@ gegl_cl_color_conv (cl_mem in_tex, cl_mem out_tex, const size_t size[2],
       if (errcode != CL_SUCCESS) CL_ERROR
 
       errcode = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
-                                            kernels_color->kernel[k], 2,
-                                            NULL, size, NULL,
+                                            kernels_color->kernel[k], 1,
+                                            NULL, &size, NULL,
                                             0, NULL, NULL);
       if (errcode != CL_SUCCESS) CL_ERROR
 
diff --git a/gegl/opencl/gegl-cl-color.h b/gegl/opencl/gegl-cl-color.h
index 59b04b6..797ed6c 100644
--- a/gegl/opencl/gegl-cl-color.h
+++ b/gegl/opencl/gegl-cl-color.h
@@ -17,7 +17,7 @@ gboolean gegl_cl_color_babl (const Babl *buffer_format, cl_image_format *cl_form
 
 gegl_cl_color_op gegl_cl_color_supported (const Babl *in_format, const Babl *out_format);
 
-gboolean gegl_cl_color_conv (cl_mem in_tex, cl_mem aux_tex, const size_t size[2],
+gboolean gegl_cl_color_conv (cl_mem in_tex, cl_mem aux_tex, const size_t size,
                              const Babl *in_format, const Babl *out_format);
 
 #endif
diff --git a/gegl/operation/gegl-operation-point-filter.h b/gegl/operation/gegl-operation-point-filter.h
index 3e80856..f42677f 100644
--- a/gegl/operation/gegl-operation-point-filter.h
+++ b/gegl/operation/gegl-operation-point-filter.h
@@ -62,7 +62,7 @@ struct _GeglOperationPointFilterClass
   cl_int   (* cl_process) (GeglOperation      *self,
                            cl_mem             in_tex,
                            cl_mem             out_tex,
-                           const size_t global_worksize[2],
+                           size_t             global_worksize,
                            const GeglRectangle *roi);
 };
 
diff --git a/operations/common/brightness-contrast.c b/operations/common/brightness-contrast.c
index 3501af5..ce8b207 100644
--- a/operations/common/brightness-contrast.c
+++ b/operations/common/brightness-contrast.c
@@ -107,20 +107,17 @@ process (GeglOperation       *op,
 #include "opencl/gegl-cl.h"
 
 static const char* kernel_source =
-"__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |   \n"
-"                    CLK_ADDRESS_NONE                       |   \n"
-"                    CLK_FILTER_NEAREST;                        \n"
-"__kernel void kernel_bc(__read_only  image2d_t in,             \n"
-"                        __write_only image2d_t out,            \n"
-"                         float brightness,                     \n"
-"                         float contrast)                       \n"
+"__kernel void kernel_bc(__global const float4     *in,         \n"
+"                        __global       float4     *out,        \n"
+"                        float brightness,                      \n"
+"                        float contrast)                        \n"
 "{                                                              \n"
-"  int2 gid = (int2)(get_global_id(0), get_global_id(1));       \n"
-"  float4 in_v  = read_imagef(in, sampler, gid);                \n"
+"  int gid = get_global_id(0);                                  \n"
+"  float4 in_v  = in[gid];                                      \n"
 "  float4 out_v;                                                \n"
 "  out_v.xyz = (in_v.xyz - 0.5f) * contrast + brightness + 0.5f;\n"
 "  out_v.w   =  in_v.w;                                         \n"
-"  write_imagef(out, gid, out_v);                               \n"
+"  out[gid]  =  out_v;                                          \n"
 "}                                                              \n";
 
 static gegl_cl_run_data *cl_data = NULL;
@@ -130,7 +127,7 @@ static cl_int
 cl_process (GeglOperation       *op,
             cl_mem              in_tex,
             cl_mem              out_tex,
-            const size_t global_worksize[2],
+            size_t              global_worksize,
             const GeglRectangle *roi)
 {
   /* Retrieve a pointer to GeglChantO structure which contains all the
@@ -159,8 +156,8 @@ cl_process (GeglOperation       *op,
   if (cl_err != CL_SUCCESS) return cl_err;
 
   cl_err = gegl_clEnqueueNDRangeKernel(gegl_cl_get_command_queue (),
-                                        cl_data->kernel[0], 2,
-                                        NULL, global_worksize, NULL,
+                                        cl_data->kernel[0], 1,
+                                        NULL, &global_worksize, NULL,
                                         0, NULL, NULL);
   if (cl_err != CL_SUCCESS) return cl_err;
 



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]