[gegl/gsoc2011-opencl: 11/19] GeglBuffer supports OpenCL



commit 9182f87ded39c22ecc443e46e1de03d6a10a6a93
Author: Victor Oliveira <victormatheus gmail com>
Date:   Fri Jul 22 13:25:15 2011 -0300

    GeglBuffer supports OpenCL

 gegl/buffer/gegl-buffer-access.c   |  294 ++++++++++++++++++++++++++++++-
 gegl/buffer/gegl-buffer-iterator.c |  348 +++++++++++++++++++++++++++++++-----
 gegl/buffer/gegl-buffer-iterator.h |   16 ++-
 gegl/buffer/gegl-buffer.h          |   11 ++
 4 files changed, 620 insertions(+), 49 deletions(-)
---
diff --git a/gegl/buffer/gegl-buffer-access.c b/gegl/buffer/gegl-buffer-access.c
index c115e62..4cb7d6f 100644
--- a/gegl/buffer/gegl-buffer-access.c
+++ b/gegl/buffer/gegl-buffer-access.c
@@ -40,6 +40,10 @@
 #include "gegl-tile-backend.h"
 #include "gegl-buffer-iterator.h"
 
+#include "gegl-region.h"
+#include "gegl-cl-init.h"
+#include "gegl-cl-texture.h"
+
 #if 0
 static inline void
 gegl_buffer_pixel_set (GeglBuffer *buffer,
@@ -263,13 +267,19 @@ gegl_buffer_get_pixel (GeglBuffer *buffer,
           {
             gint    offsetx = gegl_tile_offset (tiledx, tile_width);
             gint    offsety = gegl_tile_offset (tiledy, tile_height);
-            guchar *tp      = gegl_tile_get_data (tile) +
-                              (offsety * tile_width + offsetx) * px_size;
+            guchar  *tp;
+
+            gegl_tile_lock (tile, GEGL_TILE_LOCK_READ);
+
+            tp = gegl_tile_get_data (tile) +
+                   (offsety * tile_width + offsetx) * px_size;
             if (fish)
               babl_process (fish, tp, buf, 1);
             else
               memcpy (buf, tp, px_size);
 
+            gegl_tile_unlock (tile);
+
             /*gegl_tile_unref (tile);*/
             buffer->hot_tile = tile;
           }
@@ -391,7 +401,7 @@ gegl_buffer_iterate (GeglBuffer          *buffer,
       if (!(buffer_y + bufy + (tile_height) >= buffer_abyss_y &&
             buffer_y + bufy < abyss_y_total))
         { /* entire row of tiles is in abyss */
-          if (!write)
+          if (!write) /* we have to setup abyss for buf */
             {
               gint    row;
               gint    y  = bufy;
@@ -471,6 +481,8 @@ gegl_buffer_iterate (GeglBuffer          *buffer,
 
                 if (write)
                   gegl_tile_lock (tile, GEGL_TILE_LOCK_WRITE);
+                else
+                  gegl_tile_lock (tile, GEGL_TILE_LOCK_READ);
 
                 tile_base = gegl_tile_get_data (tile);
                 tp        = ((guchar *) tile_base) + (offsety * tile_width + offsetx) * px_size;
@@ -521,7 +533,6 @@ gegl_buffer_iterate (GeglBuffer          *buffer,
                           }
                       }
 
-                    gegl_tile_unlock (tile);
                   }
                 else /* read */
                   {
@@ -561,6 +572,7 @@ gegl_buffer_iterate (GeglBuffer          *buffer,
                         bp += buf_stride;
                       }
                   }
+                gegl_tile_unlock (tile);
                 gegl_tile_unref (tile);
               }
             bufx += (tile_width - offsetx);
@@ -569,6 +581,221 @@ gegl_buffer_iterate (GeglBuffer          *buffer,
     }
 }
 
+/* it supposes buffer is RGBA float */
+static inline void
+gegl_buffer_cl_iterate (GeglBuffer          *buffer,
+                        const GeglRectangle *roi, /* or NULL for extent */
+                        GeglClTexture       *texture,
+                        gboolean             write,
+                        gint                 level)
+{
+  GeglRectangle original_rect;
+  GeglRectangle abyss;
+  GeglRectangle final_rect;
+
+  gint tile_width  = buffer->tile_storage->tile_width;
+  gint tile_height = buffer->tile_storage->tile_height;
+
+  gint last_x;
+  gint last_y;
+
+  gint first_tile_index_x;
+  gint first_tile_index_y;
+  gint last_tile_index_x;
+  gint last_tile_index_y;
+
+  gint first_tile_offset_x;
+  gint first_tile_offset_y;
+  gint last_tile_offset_x;
+  gint last_tile_offset_y;
+
+  gint first_texture_x;
+  gint first_texture_y;
+
+  gint tile_index_y;
+  gint tile_offset_y;
+  gint texture_y;
+  gint copy_area_height;
+
+  gint tile_index_x;
+  gint tile_offset_x;
+  gint texture_x;
+  gint copy_area_width;
+
+  gint factor = 1<<level;
+
+  if (roi != NULL)
+    {
+      original_rect.x      = (roi->x + buffer->shift_x) / factor;
+      original_rect.y      = (roi->y + buffer->shift_y) / factor;
+      original_rect.width  = roi->width / factor;
+      original_rect.height = roi->height / factor;
+    }
+  else
+    {
+      original_rect.x      = (buffer->extent.x + buffer->shift_x) / factor;
+      original_rect.y      = (buffer->extent.y + buffer->shift_y) / factor;
+      original_rect.width  = buffer->extent.width / factor;
+      original_rect.height = buffer->extent.height / factor;
+    }
+
+  abyss.x      = (buffer->abyss.x + buffer->shift_x) / factor;
+  abyss.y      = (buffer->abyss.y + buffer->shift_y) / factor;
+  abyss.width  = buffer->abyss.width / factor;
+  abyss.height = buffer->abyss.height / factor;
+
+  gegl_rectangle_intersect (&final_rect, &abyss, &original_rect);
+
+  last_x = final_rect.x + (final_rect.width - 1);
+  last_y = final_rect.y + (final_rect.height - 1);
+
+  first_tile_index_x = gegl_tile_indice (final_rect.x, tile_width);
+  first_tile_index_y = gegl_tile_indice (final_rect.y, tile_height);
+  last_tile_index_x  = gegl_tile_indice (last_x, tile_width);
+  last_tile_index_y  = gegl_tile_indice (last_y, tile_height);
+
+  first_tile_offset_x = gegl_tile_offset (final_rect.x, tile_width);
+  first_tile_offset_y = gegl_tile_offset (final_rect.y, tile_height);
+  last_tile_offset_x  = gegl_tile_offset (last_x, tile_width);
+  last_tile_offset_y  = gegl_tile_offset (last_y, tile_height);
+
+  first_texture_x = final_rect.x - original_rect.x;
+  first_texture_y = final_rect.y - original_rect.y;
+
+  for (tile_index_y       = first_tile_index_y,
+         tile_offset_y    = first_tile_offset_y,
+         texture_y        = first_texture_y,
+         copy_area_height = tile_index_y < last_tile_index_y
+                              ? tile_height - tile_offset_y
+                              : last_tile_offset_y - tile_offset_y + 1;
+
+       tile_index_y <= last_tile_index_y;
+
+       tile_index_y++,
+         tile_offset_y    = 0,
+         texture_y       += copy_area_height,
+         copy_area_height = tile_index_y < last_tile_index_y
+                              ? tile_height
+                              : last_tile_offset_y + 1)
+    {
+      for (tile_index_x      = first_tile_index_x,
+             tile_offset_x   = first_tile_offset_x,
+             texture_x       = first_texture_x,
+             copy_area_width = tile_index_x < last_tile_index_x
+                                 ? tile_width - tile_offset_x
+                                 : last_tile_offset_x - tile_offset_x + 1;
+
+           tile_index_x <= last_tile_index_x;
+
+           tile_index_x++,
+             tile_offset_x   = 0,
+             texture_x      += copy_area_width,
+             copy_area_width = tile_index_x < last_tile_index_x
+                                 ? tile_width
+                                 : last_tile_offset_x + 1)
+        {
+          GeglTile *tile = gegl_tile_source_get_tile ((GeglTileSource *) buffer,
+                                                      tile_index_x,
+                                                      tile_index_y,
+                                                      level);
+          if (tile == NULL)
+            {
+              g_warning ("didn't get tile, trying to continue");
+              continue;
+            }
+
+          if (write)
+            {
+              GeglClTexture *dest;
+              GeglRectangle   temp_rect = {
+                                            texture_x,
+                                            texture_y,
+                                            copy_area_width,
+                                            copy_area_height
+                                          };
+
+              gegl_tile_lock (tile, GEGL_TILE_LOCK_CL_WRITE);
+
+              dest = gegl_tile_get_cl_data (tile);
+              gegl_cl_texture_copy (texture,
+                                    &temp_rect,
+                                    dest,
+                                    tile_offset_x,
+                                    tile_offset_y);
+
+              gegl_tile_unlock (tile);
+            }
+          else
+            {
+              GeglClTexture *src;
+              GeglRectangle   temp_rect = {
+                                            tile_offset_x,
+                                            tile_offset_y,
+                                            copy_area_width,
+                                            copy_area_height
+                                          };
+
+              gegl_tile_lock (tile, GEGL_TILE_LOCK_CL_READ);
+
+              src = gegl_tile_get_cl_data (tile);
+              gegl_cl_texture_copy (src,
+                                    &temp_rect,
+                                    texture,
+                                    texture_x,
+                                    texture_y);
+
+              gegl_tile_unlock (tile);
+            }
+
+          gegl_tile_unref (tile);
+        }
+    }
+
+  /* "read" from abyss */
+    {
+      /* original requested rectangle on the texture */
+      GeglRectangle abyss_rect   = {
+                                     0,
+                                     0,
+                                     original_rect.width,
+                                     original_rect.height
+                                   };
+
+      /* rectangle on the texture written with actual pixel
+       * data from the buffer
+       */
+      GeglRectangle texture_rect = {
+                                     first_texture_x,
+                                     first_texture_y,
+                                     final_rect.width,
+                                     final_rect.height
+                                   };
+
+      if (!write && !gegl_rectangle_equal (&abyss_rect, &texture_rect))
+        {
+          gint cnt;
+
+          GeglRegion *abyss_region   = gegl_region_rectangle (&abyss_rect);
+          GeglRegion *texture_region = gegl_region_rectangle (&texture_rect);
+
+          GeglRectangle *abyss_rects;
+          gint rect_count;
+
+          gegl_region_subtract (abyss_region, texture_region);
+          gegl_region_destroy  (texture_region);
+
+          gegl_region_get_rectangles (abyss_region, &abyss_rects, &rect_count);
+
+          for (cnt = 0; cnt < rect_count; cnt++)
+            gegl_cl_texture_clear (texture, &abyss_rects[cnt]);
+
+          g_free (abyss_rects);
+          gegl_region_destroy (abyss_region);
+        }
+    }
+}
+
+
 void
 gegl_buffer_set_unlocked (GeglBuffer          *buffer,
                           const GeglRectangle *rect,
@@ -608,6 +835,24 @@ gegl_buffer_set (GeglBuffer          *buffer,
   gegl_buffer_unlock (buffer);
 }
 
+void
+gegl_buffer_cl_set (GeglBuffer          *buffer,
+                    const GeglRectangle *rect,
+                    GeglClTexture *src)
+{
+  g_return_if_fail (GEGL_IS_BUFFER (buffer));
+
+  gegl_buffer_lock (buffer);
+
+  gegl_buffer_cl_iterate (buffer, rect, src, TRUE, 0);
+
+  if (gegl_buffer_is_shared (buffer))
+    {
+      gegl_buffer_flush (buffer);
+    }
+
+  gegl_buffer_unlock (buffer);
+}
 
 #if 0
 
@@ -1058,6 +1303,47 @@ gegl_buffer_get (GeglBuffer          *buffer,
   gegl_buffer_get_unlocked (buffer, scale, rect, format, dest_buf, rowstride);
 }
 
+void
+gegl_buffer_cl_get (GeglBuffer          *buffer,
+                    gdouble              scale,
+                    const GeglRectangle *rect,
+                    GeglClTexture       *dest)
+{
+  g_return_if_fail (GEGL_IS_BUFFER (buffer));
+
+  /* this is based on gegl_buffer_get_unlock */
+
+  if (!rect && scale == 1.0)
+    {
+      gegl_buffer_cl_iterate (buffer, NULL, dest, FALSE, 0);
+      return;
+    }
+  if (rect->width == 0 ||
+      rect->height == 0)
+    {
+      return;
+    }
+  if (GEGL_FLOAT_EQUAL (scale, 1.0))
+    {
+      gegl_buffer_cl_iterate (buffer, rect, dest, FALSE, 0);
+      return;
+    }
+  else
+    {
+      /* TODO: Resampler in OpenCL
+         It would be better if we avoided this
+         ping-pong to the GPU */
+      gint bpp = babl_format_get_bytes_per_pixel (babl_format ("RGBA float"));
+      gfloat *dest_buf = g_malloc (dest->width * dest->height * bpp);
+      gegl_buffer_get_unlocked (buffer, scale, rect, babl_format ("RGBA float"),
+                                dest_buf, GEGL_AUTO_ROWSTRIDE);
+      gegl_cl_texture_set (dest, dest_buf);
+      g_free (dest_buf);
+    }
+
+}
+
+
 const GeglRectangle *
 gegl_buffer_get_abyss (GeglBuffer *buffer)
 {
diff --git a/gegl/buffer/gegl-buffer-iterator.c b/gegl/buffer/gegl-buffer-iterator.c
index 360a2f2..a9beb09 100644
--- a/gegl/buffer/gegl-buffer-iterator.c
+++ b/gegl/buffer/gegl-buffer-iterator.c
@@ -32,16 +32,21 @@
 #include "gegl-tile-storage.h"
 #include "gegl-utils.h"
 
+#include "gegl-cl-init.h"
+
 typedef struct GeglBufferTileIterator
 {
   GeglBuffer    *buffer;
   GeglRectangle  roi;     /* the rectangular region we're iterating over */
   GeglTile      *tile;    /* current tile */
   gpointer       data;    /* current tile's data */
+  GeglClTexture *cl_data; /* current tile's opencl data */
 
   gint           col;     /* the column currently provided for */
   gint           row;     /* the row currently provided for */
-  gboolean       write;
+
+  GeglTileLockMode mode;
+
   GeglRectangle  subrect;    /* the subrect that intersected roi */
   gpointer       sub_data;   /* pointer to the subdata as indicated by subrect */
   gint           rowstride;  /* rowstride for tile, in bytes */
@@ -63,9 +68,10 @@ typedef struct GeglBufferTileIterator
 typedef struct GeglBufferIterators
 {
   /* current region of interest */
-  gint          length;             /* length of current data in pixels */
-  gpointer      data[GEGL_BUFFER_MAX_ITERATORS];
-  GeglRectangle roi[GEGL_BUFFER_MAX_ITERATORS]; /* roi of the current data */
+  gint           length;             /* length of current data in pixels */
+  gpointer       data[GEGL_BUFFER_MAX_ITERATORS];
+  GeglClTexture *cl_data[GEGL_BUFFER_MAX_ITERATORS];
+  GeglRectangle  roi[GEGL_BUFFER_MAX_ITERATORS]; /* roi of the current data */
 
   /* the following is private: */
   gint           iterators;
@@ -76,7 +82,11 @@ typedef struct GeglBufferIterators
   const Babl    *format     [GEGL_BUFFER_MAX_ITERATORS]; /* The format required for the data */
   GeglBuffer    *buffer     [GEGL_BUFFER_MAX_ITERATORS]; /* currently a subbuffer of the original, need to go away */
   guint          flags      [GEGL_BUFFER_MAX_ITERATORS];
-  gpointer       buf        [GEGL_BUFFER_MAX_ITERATORS]; /* no idea */
+
+
+  gpointer       buf        [GEGL_BUFFER_MAX_ITERATORS]; /* when roi != tile size, buf and cl_buf will be changed by */
+  GeglClTexture *cl_buf     [GEGL_BUFFER_MAX_ITERATORS]; /* operators, so they don't have to care about image */
+                                                         /* borders and tile size */
   GeglBufferTileIterator   i[GEGL_BUFFER_MAX_ITERATORS];
 } GeglBufferIterators;
 
@@ -84,7 +94,7 @@ typedef struct GeglBufferIterators
 static void      gegl_buffer_tile_iterator_init (GeglBufferTileIterator *i,
                                                  GeglBuffer             *buffer,
                                                  GeglRectangle           roi,
-                                                 gboolean                write);
+                                                 GeglTileLockMode        mode);
 static gboolean  gegl_buffer_tile_iterator_next (GeglBufferTileIterator *i);
 
 /*
@@ -116,7 +126,7 @@ static gboolean gegl_buffer_scan_compatible (GeglBuffer *bufferA,
 static void gegl_buffer_tile_iterator_init (GeglBufferTileIterator *i,
                                             GeglBuffer             *buffer,
                                             GeglRectangle           roi,
-                                            gboolean                write)
+                                            GeglTileLockMode        mode)
 {
   g_assert (i);
   memset (i, 0, sizeof (GeglBufferTileIterator));
@@ -130,7 +140,7 @@ static void gegl_buffer_tile_iterator_init (GeglBufferTileIterator *i,
   i->tile = NULL;
   i->col = 0;
   i->row = 0;
-  i->write = write;
+  i->mode = mode;
   i->max_size = i->buffer->tile_storage->tile_width *
                 i->buffer->tile_storage->tile_height;
 }
@@ -146,6 +156,8 @@ gegl_buffer_tile_iterator_next (GeglBufferTileIterator *i)
   gint  buffer_x       = i->roi.x + buffer_shift_x;
   gint  buffer_y       = i->roi.y + buffer_shift_y;
 
+  gboolean direct_access, cl_direct_access;
+
   if (i->roi.width == 0 || i->roi.height == 0)
     return FALSE;
 
@@ -154,7 +166,11 @@ gulp:
   /* unref previously held tile */
   if (i->tile)
     {
-      if (i->write && i->subrect.width == tile_width)
+      direct_access    = (i->mode & GEGL_TILE_LOCK_READWRITE    && tile_width == i->subrect.width);
+      cl_direct_access = (i->mode & GEGL_TILE_LOCK_CL_READWRITE && tile_width == i->subrect.width
+                          && tile_height == i->subrect.height);
+
+      if (direct_access || cl_direct_access)
         {
           gegl_tile_unlock (i->tile);
         }
@@ -182,15 +198,32 @@ gulp:
          else
            i->subrect.height = tile_height - offsety;
 
+         direct_access    = (i->mode & GEGL_TILE_LOCK_READWRITE    && tile_width == i->subrect.width);
+         cl_direct_access = (i->mode & GEGL_TILE_LOCK_CL_READWRITE && tile_width == i->subrect.width
+                             && tile_height == i->subrect.height);
+
          i->tile = gegl_tile_source_get_tile ((GeglTileSource *) (buffer),
                                                gegl_tile_indice (tiledx, tile_width),
                                                gegl_tile_indice (tiledy, tile_height),
                                                0);
-         if (i->write && tile_width==i->subrect.width)
+
+         i->data    = gegl_tile_get_data (i->tile);
+
+         /* XXX: Maybe this should be better integrated into the core */
+         /* OpenCL Texture Alloc */
+         if (i->mode & GEGL_TILE_LOCK_CL_READWRITE &&
+             !gegl_tile_get_cl_data (i->tile))
            {
-             gegl_tile_lock (i->tile, GEGL_TILE_LOCK_WRITE);
+             gegl_tile_cl_enable(i->tile, tile_width, tile_height);
+           }
+         i->cl_data = gegl_tile_get_cl_data (i->tile);
+
+         /* lock for direct access in gegl_buffer_iterate_next
+            check for cpu/opencl alignment */
+         if (direct_access || cl_direct_access)
+           {
+             gegl_tile_lock (i->tile, i->mode);
            }
-         i->data = gegl_tile_get_data (i->tile);
 
          {
          gint bpp = babl_format_get_bytes_per_pixel (i->buffer->format);
@@ -250,6 +283,9 @@ gegl_buffer_iterator_add (GeglBufferIterator  *iterator,
 {
   GeglBufferIterators *i = (gpointer)iterator;
   gint self = 0;
+
+  GeglTileLockMode lock_mode = GEGL_TILE_LOCK_NONE;
+
   if (i->iterators+1 > GEGL_BUFFER_MAX_ITERATORS)
     {
       g_error ("too many iterators (%i)", i->iterators+1);
@@ -274,10 +310,34 @@ gegl_buffer_iterator_add (GeglBufferIterator  *iterator,
     i->format[self]=buffer->format;
   i->flags[self]=flags;
 
+  if (flags & GEGL_BUFFER_READ)
+    lock_mode |= GEGL_TILE_LOCK_READ;
+  if (flags & GEGL_BUFFER_WRITE)
+    lock_mode |= GEGL_TILE_LOCK_WRITE;
+
+  /* GeglClTexture only supports RGBA float at the moment */
+  if (gegl_cl_is_accelerated () && i->buffer[self]->format == babl_format ("RGBA float"))
+    {
+      if (flags & GEGL_BUFFER_CL_READ)
+        lock_mode |= GEGL_TILE_LOCK_CL_READ;
+      if (flags & GEGL_BUFFER_CL_WRITE)
+        lock_mode |= GEGL_TILE_LOCK_CL_WRITE;
+    }
+  else
+    {
+      /* do not allow iteration on OpenCL data when OpenCL acceleration
+       * is disabled
+       */
+      if (flags & GEGL_BUFFER_CL_READWRITE)
+        g_warning ("OpenCL Buffer Lock specified, but impossible to use it.\n");
+
+      i->flags[self] &= ~GEGL_BUFFER_CL_READ & ~GEGL_BUFFER_CL_WRITE;
+    }
+
   if (self==0) /* The first buffer which is always scan aligned */
     {
       i->flags[self] |= GEGL_BUFFER_SCAN_COMPATIBLE;
-      gegl_buffer_tile_iterator_init (&i->i[self], i->buffer[self], i->rect[self], ((i->flags[self] & GEGL_BUFFER_WRITE) != 0) );
+      gegl_buffer_tile_iterator_init (&i->i[self], i->buffer[self], i->rect[self], lock_mode);
     }
   else
     {
@@ -289,11 +349,12 @@ gegl_buffer_iterator_add (GeglBufferIterator  *iterator,
                                        i->buffer[self], i->rect[self].x, i->rect[self].y))
         {
           i->flags[self] |= GEGL_BUFFER_SCAN_COMPATIBLE;
-          gegl_buffer_tile_iterator_init (&i->i[self], i->buffer[self], i->rect[self], ((i->flags[self] & GEGL_BUFFER_WRITE) != 0));
+          gegl_buffer_tile_iterator_init (&i->i[self], i->buffer[self], i->rect[self], lock_mode);
         }
     }
 
-  i->buf[self] = NULL;
+  i->buf[self]    = NULL;
+  i->cl_buf[self] = NULL;
 
   if (i->format[self] == i->buffer[self]->format)
     {
@@ -363,6 +424,94 @@ static void iterator_buf_pool_release (gpointer buf)
   g_static_mutex_unlock (&pool_mutex);
 }
 
+/* <OpenCL Texture Pool> */
+
+typedef struct ClTextureInfo {
+  gint           width;
+  gint           height;
+  gint           used; /* if this texture is currently allocated */
+  GeglClTexture *texture;
+} ClTextureInfo;
+
+static GArray *cl_texture_pool = NULL;
+
+static GStaticMutex cl_pool_mutex = G_STATIC_MUTEX_INIT;
+
+static gpointer iterator_cl_texture_pool_get (gint width, gint height)
+{
+  gint i;
+  g_static_mutex_lock (&cl_pool_mutex);
+
+  if (G_UNLIKELY (!cl_texture_pool))
+    {
+      cl_texture_pool = g_array_new (TRUE, TRUE, sizeof (ClTextureInfo));
+    }
+  for (i=0; i<cl_texture_pool->len; i++)
+    {
+      ClTextureInfo *info = &g_array_index (cl_texture_pool, ClTextureInfo, i);
+      if (info->width >= width && info->height >= height && info->used == 0)
+        {
+          info->used ++;
+          g_static_mutex_unlock (&cl_pool_mutex);
+          return info->texture;
+        }
+    }
+  {
+    ClTextureInfo info = {0, 0, 1, NULL};
+    info.width   = width;
+    info.height = height;
+    info.texture = gegl_cl_texture_new (width, height);
+    g_array_append_val (cl_texture_pool, info);
+    g_static_mutex_unlock (&cl_pool_mutex);
+    return info.texture;
+  }
+}
+
+static void iterator_cl_texture_pool_release (GeglClTexture* texture)
+{
+  gint i;
+  g_static_mutex_lock (&cl_pool_mutex);
+  for (i=0; i<cl_texture_pool->len; i++)
+    {
+      ClTextureInfo *info = &g_array_index (cl_texture_pool, ClTextureInfo, i);
+      if (info->texture == texture)
+        {
+          info->used --;
+          g_static_mutex_unlock (&cl_pool_mutex);
+          return;
+        }
+    }
+  g_assert (0);
+  g_static_mutex_unlock (&cl_pool_mutex);
+}
+
+
+static void iterator_cl_texture_pool_cleanup (void)
+{
+  gint cnt;
+
+  g_static_mutex_lock (&cl_pool_mutex);
+  if (cl_texture_pool != NULL)
+    {
+      for (cnt = 0; cnt < cl_texture_pool->len; cnt++)
+        {
+          ClTextureInfo *info = &g_array_index (cl_texture_pool,
+                                                ClTextureInfo,
+                                                cnt);
+
+          gegl_cl_texture_free (info->texture);
+          info->texture = NULL;
+        }
+
+      g_array_free (cl_texture_pool, TRUE);
+      cl_texture_pool = NULL;
+    }
+  g_static_mutex_unlock (&cl_pool_mutex);
+}
+
+/* </OpenCL Texture Pool> */
+
+
 static void ensure_buf (GeglBufferIterators *i, gint no)
 {
   if (i->buf[no]==NULL)
@@ -399,12 +548,18 @@ gboolean gegl_buffer_iterator_next     (GeglBufferIterator *iterator)
       /* complete pending write work */
       for (no=0; no<i->iterators;no++)
         {
+
+          gboolean direct_access = (i->flags[no] & GEGL_BUFFER_SCAN_COMPATIBLE &&
+                                    i->flags[no] & GEGL_BUFFER_FORMAT_COMPATIBLE &&
+                                    i->roi[no].width == i->i[no].buffer->tile_storage->tile_width);
+
+          gboolean cl_direct_access = (direct_access &&
+                                       i->roi[no].height == i->i[no].buffer->tile_storage->tile_height);
+
           if (i->flags[no] & GEGL_BUFFER_WRITE)
             {
 
-              if (i->flags[no] & GEGL_BUFFER_SCAN_COMPATIBLE &&
-                  i->flags[no] & GEGL_BUFFER_FORMAT_COMPATIBLE &&
-                  i->roi[no].width == i->i[no].buffer->tile_storage->tile_width && (i->flags[no] & GEGL_BUFFER_FORMAT_COMPATIBLE))
+              if (direct_access)
                 {
                    /* direct access */
 #if DEBUG_DIRECT
@@ -418,13 +573,37 @@ gboolean gegl_buffer_iterator_next     (GeglBufferIterator *iterator)
 #endif
 
                   ensure_buf (i, no);
-
-  /* XXX: should perhaps use _set_unlocked, and keep the lock in the
-   * iterator.
-   */
+                  /* XXX: should perhaps use _set_unlocked, and keep the lock in the
+                   * iterator.
+                   */
                   gegl_buffer_set (i->buffer[no], &(i->roi[no]), i->format[no], i->buf[no], GEGL_AUTO_ROWSTRIDE);
                 }
             }
+
+          if (i->flags[no] & GEGL_BUFFER_CL_WRITE)
+            {
+
+              if (cl_direct_access)
+                {
+                   /* direct access */
+#if DEBUG_DIRECT
+                   direct_write += i->roi[no].width * i->roi[no].height;
+#endif
+                }
+              else
+                {
+#if DEBUG_DIRECT
+                  in_direct_write += i->roi[no].width * i->roi[no].height;
+#endif
+
+                  /* update tiles' cl_data from this buffer to cl_buf */
+                  gegl_buffer_cl_set (i->buffer[no], &(i->roi[no]), i->cl_buf[no]);
+                  iterator_cl_texture_pool_release (i->cl_buf[no]);
+                }
+
+              i->cl_buf[no] = NULL;
+
+            }
         }
     }
 
@@ -435,6 +614,11 @@ gboolean gegl_buffer_iterator_next     (GeglBufferIterator *iterator)
     {
       if (i->flags[no] & GEGL_BUFFER_SCAN_COMPATIBLE)
         {
+          gint tile_width  = i->i[no].buffer->tile_storage->tile_width;
+          gint tile_height = i->i[no].buffer->tile_storage->tile_height;
+          gboolean direct_access;
+          gboolean cl_direct_access;
+
           gboolean res;
           res = gegl_buffer_tile_iterator_next (&i->i[no]);
           if (no == 0)
@@ -450,30 +634,89 @@ gboolean gegl_buffer_iterator_next     (GeglBufferIterator *iterator)
             }
           g_assert (res == result);
 
-          if ((i->flags[no] & GEGL_BUFFER_FORMAT_COMPATIBLE) &&
-              i->roi[no].width == i->i[no].buffer->tile_storage->tile_width
-           )
+          direct_access = (i->flags[no] & GEGL_BUFFER_FORMAT_COMPATIBLE &&
+                           i->roi[no].width == tile_width);
+
+          cl_direct_access = (direct_access &&
+                              i->roi[no].height == tile_height);
+
+          if (i->flags[no] & GEGL_BUFFER_READWRITE)
             {
-              /* direct access */
-              i->data[no]=i->i[no].sub_data;
+              if (direct_access)
+                {
+                  /* direct access */
+                  i->data[no]=i->i[no].sub_data;
 #if DEBUG_DIRECT
-              direct_read += i->roi[no].width * i->roi[no].height;
+                  direct_read += i->roi[no].width * i->roi[no].height;
 #endif
+                }
+              else
+                {
+                  /* unref held tile to prevent lock contention
+                     notice that conditions for tile direct access
+                     are different from buffer lock access (different formats, etc) */
+                  if (i->i[no].tile != NULL &&
+                      (i->i[no].tile->read_locks > 0 || i->i[no].tile->write_locks > 0))
+                    {
+                      gegl_tile_unlock (i->i[no].tile);
+                      gegl_tile_unref (i->i[no].tile);
+                      i->i[no].tile = NULL;
+                      i->i[no].sub_data = NULL;
+                    }
+
+                  ensure_buf (i, no);
+
+                  if (i->flags[no] & GEGL_BUFFER_READ)
+                    {
+                      gegl_buffer_get_unlocked (i->buffer[no], 1.0, &(i->roi[no]), i->format[no], i->buf[no], GEGL_AUTO_ROWSTRIDE);
+                    }
+
+                  i->data[no]=i->buf[no];
+#if DEBUG_DIRECT
+                  in_direct_read += i->roi[no].width * i->roi[no].height;
+#endif
+                }
             }
-          else
-            {
-              ensure_buf (i, no);
 
-              if (i->flags[no] & GEGL_BUFFER_READ)
+          if (i->flags[no] & GEGL_BUFFER_CL_READWRITE)
+            {
+              if (cl_direct_access)
                 {
-                  gegl_buffer_get_unlocked (i->buffer[no], 1.0, &(i->roi[no]), i->format[no], i->buf[no], GEGL_AUTO_ROWSTRIDE);
+                  /* cl direct access - no need to allocate a different
+                     memory region in the gpu because default tile size
+                     is equal to our ROI */
+                  i->cl_data[no]=i->i[no].cl_data;
+#if DEBUG_DIRECT
+                  direct_read += i->roi[no].width * i->roi[no].height;
+#endif
                 }
-
-              i->data[no]=i->buf[no];
+              else
+                {
+                  /* unref held tile to prevent lock contention
+                     same as above */
+                  if (i->i[no].tile != NULL &&
+                      (i->i[no].tile->read_locks > 0 || i->i[no].tile->write_locks > 0))
+                    {
+                      gegl_tile_unlock (i->i[no].tile);
+                      gegl_tile_unref (i->i[no].tile);
+                      i->i[no].tile = NULL;
+                      i->i[no].cl_data = NULL;
+                    }
+
+                  i->cl_buf[no] = iterator_cl_texture_pool_get (i->roi[no].width, i->roi[no].height);
+
+                  if (i->flags[no] & GEGL_BUFFER_CL_READ)
+                    {
+                      gegl_buffer_cl_get(i->buffer[no], 1.0, &(i->roi[no]), i->cl_buf[no]);
+                    }
+
+                  i->cl_data[no]=i->cl_buf[no];
 #if DEBUG_DIRECT
-              in_direct_read += i->roi[no].width * i->roi[no].height;
+                  in_direct_read += i->roi[no].width * i->roi[no].height;
 #endif
+                }
             }
+
         }
       else
         {
@@ -482,17 +725,34 @@ gboolean gegl_buffer_iterator_next     (GeglBufferIterator *iterator)
           i->roi[no].x += (i->rect[no].x-i->rect[0].x);
           i->roi[no].y += (i->rect[no].y-i->rect[0].y);
 
-          ensure_buf (i, no);
-
-          if (i->flags[no] & GEGL_BUFFER_READ)
+          if (i->flags[no] & GEGL_BUFFER_READWRITE)
             {
-              gegl_buffer_get_unlocked (i->buffer[no], 1.0, &(i->roi[no]), i->format[no], i->buf[no], GEGL_AUTO_ROWSTRIDE);
+              ensure_buf (i, no);
+
+              if (i->flags[no] & GEGL_BUFFER_READ)
+                {
+                  gegl_buffer_get_unlocked (i->buffer[no], 1.0, &(i->roi[no]), i->format[no], i->buf[no], GEGL_AUTO_ROWSTRIDE);
+                }
+              i->data[no]=i->buf[no];
+#if DEBUG_DIRECT
+              in_direct_read += i->roi[no].width * i->roi[no].height;
+#endif
             }
-          i->data[no]=i->buf[no];
 
+          if (i->flags[no] & GEGL_BUFFER_CL_READWRITE)
+            {
+              i->cl_buf[no] = iterator_cl_texture_pool_get (i->roi[no].width, i->roi[no].height);
+
+              if (i->flags[no] & GEGL_BUFFER_CL_READ)
+                {
+                  gegl_buffer_cl_get(i->buffer[no], 1.0, &(i->roi[no]), i->cl_buf[no]);
+                }
+              i->cl_data[no]=i->cl_buf[no];
 #if DEBUG_DIRECT
-          in_direct_read += i->roi[no].width * i->roi[no].height;
+              in_direct_read += i->roi[no].width * i->roi[no].height;
 #endif
+            }
+
         }
       i->length = i->roi[no].width * i->roi[no].height;
     }
@@ -520,8 +780,12 @@ gboolean gegl_buffer_iterator_next     (GeglBufferIterator *iterator)
           if (i->buf[no])
             iterator_buf_pool_release (i->buf[no]);
           i->buf[no]=NULL;
+
           g_object_unref (i->buffer[no]);
         }
+
+      iterator_cl_texture_pool_cleanup ();
+
 #if DEBUG_DIRECT
       g_print ("%f %f\n", (100.0*direct_read/(in_direct_read+direct_read)),
                            100.0*direct_write/(in_direct_write+direct_write));
diff --git a/gegl/buffer/gegl-buffer-iterator.h b/gegl/buffer/gegl-buffer-iterator.h
index 68b2619..4d084a4 100644
--- a/gegl/buffer/gegl-buffer-iterator.h
+++ b/gegl/buffer/gegl-buffer-iterator.h
@@ -22,16 +22,26 @@
 
 #include "gegl-buffer.h"
 
+#include "gegl-cl-init.h"
+#include "gegl-cl-texture.h"
+
 #define GEGL_BUFFER_MAX_ITERATORS 6
 
-#define GEGL_BUFFER_READ      1
-#define GEGL_BUFFER_WRITE     2
+#define GEGL_BUFFER_READ      (1 << 0)
+#define GEGL_BUFFER_WRITE     (1 << 1)
 #define GEGL_BUFFER_READWRITE (GEGL_BUFFER_READ|GEGL_BUFFER_WRITE)
+#define GEGL_BUFFER_CL_READ      (1 << 2)
+#define GEGL_BUFFER_CL_WRITE     (1 << 3)
+#define GEGL_BUFFER_CL_READWRITE (GEGL_BUFFER_CL_READ | GEGL_BUFFER_CL_WRITE)
+#define GEGL_BUFFER_ALL_READ      (GEGL_BUFFER_READ | GEGL_BUFFER_CL_READ)
+#define GEGL_BUFFER_ALL_WRITE     (GEGL_BUFFER_WRITE | GEGL_BUFFER_CL_WRITE)
+#define GEGL_BUFFER_ALL           (GEGL_BUFFER_READ_ALL | GEGL_BUFFER_WRITE_ALL)
 
 typedef struct GeglBufferIterator
 {
   gint          length;
   gpointer      data[GEGL_BUFFER_MAX_ITERATORS];
+  GeglClTexture *cl_data[GEGL_BUFFER_MAX_ITERATORS];
   GeglRectangle roi[GEGL_BUFFER_MAX_ITERATORS];
 } GeglBufferIterator;
 
@@ -41,7 +51,7 @@ typedef struct GeglBufferIterator
  * @buffer: a #GeglBuffer
  * @roi: the rectangle to iterate over
  * @format: the format we want to process this buffers data in, pass 0 to use the buffers format.
- * @flags: whether we need reading or writing to this buffer one of GEGL_BUFFER_READ, GEGL_BUFFER_WRITE and GEGL_BUFFER_READWRITE.
+ * @flags: whether we need reading or writing to this buffer one of GEGL_BUFFER_READ, GEGL_BUFFER_WRITE and GEGL_BUFFER_READWRITE and the equivalent opencl versions.
  *
  * Create a new buffer iterator, this buffer will be iterated through
  * in linear chunks, some chunks might be full tiles the coordinates, see
diff --git a/gegl/buffer/gegl-buffer.h b/gegl/buffer/gegl-buffer.h
index 9038074..466c625 100644
--- a/gegl/buffer/gegl-buffer.h
+++ b/gegl/buffer/gegl-buffer.h
@@ -23,6 +23,9 @@
 #include <babl/babl.h>
 #include <gegl-matrix.h>
 
+#include "gegl-cl-init.h"
+#include "gegl-cl-texture.h"
+
 G_BEGIN_DECLS
 
 #define GEGL_TYPE_BUFFER (gegl_buffer_get_type ())
@@ -238,6 +241,11 @@ void            gegl_buffer_get               (GeglBuffer          *buffer,
                                                gpointer             dest,
                                                gint                 rowstride);
 
+void gegl_buffer_cl_get (GeglBuffer          *buffer,
+                         gdouble              scale,
+                         const GeglRectangle *rect,
+                         GeglClTexture       *dest);
+
 /**
  * gegl_buffer_set:
  * @buffer: the buffer to modify.
@@ -255,6 +263,9 @@ void            gegl_buffer_set               (GeglBuffer          *buffer,
                                                void                *src,
                                                gint                 rowstride);
 
+void            gegl_buffer_cl_set            (GeglBuffer          *buffer,
+                                               const GeglRectangle *rect,
+                                               GeglClTexture *src);
 
 /**
  * gegl_buffer_get_format:



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]