[gegl/gsoc2009-gpu: 20/20] Merge branch 'master' into gsoc2009-gpu



commit 7a8398bbee55c0808e5e06c80d3466475e6ba1ef
Merge: fc419f6... a1902d2...
Author: �yvind Kolås <pippin gimp org>
Date:   Sun Nov 22 23:59:42 2009 +0000

    Merge branch 'master' into gsoc2009-gpu
    
    Checked that it compiles with all permutations of gpu|multithreads.
    Multithreaded locks up during testing probably due to GeglTile using
    mutexes differently from GEGL master.
    
    Not been able to verify function of GPU support.
    
    Conflicts:
    	configure.ac
    	gegl/buffer/gegl-buffer-access.c
    	gegl/buffer/gegl-buffer-iterator.c
    	gegl/buffer/gegl-buffer-linear.c
    	gegl/buffer/gegl-tile.c
    	gegl/buffer/gegl-tile.h
    	gegl/gegl-config.c
    	gegl/gegl-config.h
    	gegl/gegl-init.c

 bin/gegl.c                              |    1 +
 configure.ac                            |   65 ++++---
 examples/csugar.c                       |   58 ++++++
 examples/gegl-paint.c                   |    3 +-
 examples/hello-world.c                  |    1 +
 examples/util/gegl-view.c               |    3 +
 gegl/Makefile.am                        |    1 +
 gegl/buffer/gegl-buffer-access.c        |  127 +++++--------
 gegl/buffer/gegl-buffer-iterator.c      |  146 ++++++++++-----
 gegl/buffer/gegl-buffer-linear.c        |   20 ++-
 gegl/buffer/gegl-buffer-private.h       |   43 +++--
 gegl/buffer/gegl-buffer.c               |   69 +++++--
 gegl/buffer/gegl-cache.c                |   16 ++
 gegl/buffer/gegl-sampler.c              |    4 -
 gegl/buffer/gegl-tile-backend.c         |    2 +-
 gegl/buffer/gegl-tile-handler-cache.c   |   79 +++++++-
 gegl/buffer/gegl-tile-handler-empty.c   |    2 +-
 gegl/buffer/gegl-tile-storage.c         |    6 +
 gegl/buffer/gegl-tile-storage.h         |    4 +-
 gegl/buffer/gegl-tile.c                 |  173 +++++-------------
 gegl/buffer/gegl-tile.h                 |   48 +++---
 gegl/gegl-c.c                           |  158 ++++++++++++++++
 gegl/gegl-config.c                      |   27 +++-
 gegl/gegl-config.h                      |    3 +
 gegl/gegl-init.c                        |   21 ++-
 gegl/gegl-utils.h                       |    5 +-
 gegl/gegl.h                             |   41 ++++-
 gegl/graph/gegl-node.c                  |  297 +++++++++++++++++++++++++------
 gegl/graph/gegl-node.h                  |    4 +
 gegl/graph/gegl-visitor.c               |   10 +
 gegl/operation/gegl-operation.c         |   12 ++-
 gegl/process/gegl-eval-mgr.c            |   10 +-
 gegl/process/gegl-have-visitor.c        |    6 +
 gegl/process/gegl-prepare-visitor.c     |   12 ++
 gegl/property-types/gegl-path.c         |   18 +-
 gegl/property-types/gegl-path.h         |    4 +
 operations/affine/affine.c              |  101 ++++++-----
 operations/affine/affine.h              |    1 -
 operations/common/brightness-contrast.c |   12 +-
 operations/external/path.c              |    4 +-
 tests/test-change-processor-rect.c      |    1 +
 tests/test-color-op.c                   |    1 +
 tests/test-proxynop-processing.c        |    1 +
 tools/introspect.c                      |    1 +
 44 files changed, 1127 insertions(+), 494 deletions(-)
---
diff --cc configure.ac
index 046de9c,38146ba..07546fd
--- a/configure.ac
+++ b/configure.ac
@@@ -1052,31 -1027,31 +1066,32 @@@ AC_MSG_RESULT(
  Building GEGL with prefix=$prefix
  
  Optional features:
-   GEGL docs:      $enable_docs
-   Build workshop: $enable_workshop
-   Build website:  $have_asciidoc
-   SIMD:           sse:$enable_sse mmx:$enable_mmx
-   GPU:            $have_gpu
+   GEGL docs:       $enable_docs
+   Build workshop:  $enable_workshop
+   Build website:   $have_asciidoc
+   SIMD:            sse:$enable_sse mmx:$enable_mmx
++  GPU:             $have_gpu
+   Multi threading: $enable_mt
  
  Optional dependencies:
-   asciidoc:       $have_asciidoc
-   enscript:       $have_enscript
-   GIO:            $have_gio
-   GTK+:           $have_gtk
-   Ruby:           $have_ruby
-   Lua:            $have_lua
-   Cairo:          $have_cairo
-   Pango:          $have_pango
-   pangocairo:     $have_pangocairo
-   GDKPixbuf:      $have_gdk_pixbuf
-   JPEG:           $jpeg_ok
-   PNG:            $have_libpng
-   OpenEXR:        $have_openexr
-   rsvg:           $have_librsvg
-   SDL:            $have_sdl
-   openraw:        $have_libopenraw
-   graphviz:       $have_graphviz
-   avformat:       $have_libavformat
-   V4L:            $have_v4l
-   spiro:          $spiro_ok
+   asciidoc:        $have_asciidoc
+   enscript:        $have_enscript
+   GIO:             $have_gio
+   GTK+:            $have_gtk
+   Ruby:            $have_ruby
+   Lua:             $have_lua
+   Cairo:           $have_cairo
+   Pango:           $have_pango
+   pangocairo:      $have_pangocairo
+   GDKPixbuf:       $have_gdk_pixbuf
+   JPEG:            $jpeg_ok
+   PNG:             $have_libpng
+   OpenEXR:         $have_openexr
+   rsvg:            $have_librsvg
+   SDL:             $have_sdl
+   openraw:         $have_libopenraw
+   graphviz:        $have_graphviz
+   avformat:        $have_libavformat
+   V4L:             $have_v4l
+   spiro:           $spiro_ok
  ]);
diff --cc gegl/buffer/gegl-buffer-access.c
index bcb4a92,7589a62..b36e286
--- a/gegl/buffer/gegl-buffer-access.c
+++ b/gegl/buffer/gegl-buffer-access.c
@@@ -43,17 -43,6 +43,12 @@@
  #include "gegl-tile-backend.h"
  #include "gegl-buffer-iterator.h"
  
 +#include "gegl-region.h"
 +#if HAVE_GPU
 +#include "gegl-gpu-types.h"
 +#include "gegl-gpu-texture.h"
 +#endif
 +
- #if ENABLE_MP
- GStaticRecMutex mutex = G_STATIC_REC_MUTEX_INIT;
- #endif
- 
- 
  #if 0
  static inline void
  gegl_buffer_pixel_set (GeglBuffer *buffer,
@@@ -720,8 -570,7 +715,8 @@@ gegl_buffer_iterate (GeglBuffe
                          bp += buf_stride;
                        }
                    }
 +                gegl_tile_unlock (tile);
-                 g_object_unref (tile);
+                 gegl_tile_unref (tile);
                }
              bufx += (tile_width - offsetx);
            }
@@@ -729,237 -578,13 +724,230 @@@
      }
  }
  
 +#if HAVE_GPU
 +static void inline
 +gegl_buffer_gpu_iterate (GeglBuffer          *buffer,
 +                         const GeglRectangle *roi, /* or NULL for extent */
 +                         GeglGpuTexture      *texture,
 +                         gboolean             write,
 +                         gint                 level)
 +{
 +  GeglRectangle original_rect;
 +  GeglRectangle abyss;
 +  GeglRectangle final_rect;
 +
 +  gint factor = 1;
 +
 +  gint tile_width  = buffer->tile_storage->tile_width;
 +  gint tile_height = buffer->tile_storage->tile_height;
 +
 +  gint last_x;
 +  gint last_y;
 +
 +  gint first_tile_index_x;
 +  gint first_tile_index_y;
 +  gint last_tile_index_x;
 +  gint last_tile_index_y;
 +
 +  gint first_tile_offset_x;
 +  gint first_tile_offset_y;
 +  gint last_tile_offset_x;
 +  gint last_tile_offset_y;
 +
 +  gint first_texture_x;
 +  gint first_texture_y;
 +
 +  gint tile_index_y;
 +  gint tile_offset_y;
 +  gint texture_y;
 +  gint copy_area_height;
 +
 +  gint tile_index_x;
 +  gint tile_offset_x;
 +  gint texture_x;
 +  gint copy_area_width;
 +
 +  gint cnt;
 +
 +  for (cnt = 0; cnt < level; cnt++)
 +    factor *= 2;
 +
 +  if (roi != NULL)
 +    {
 +      original_rect.x      = (roi->x + buffer->shift_x) / factor;
 +      original_rect.y      = (roi->y + buffer->shift_y) / factor;
 +      original_rect.width  = roi->width / factor;
 +      original_rect.height = roi->height / factor;
 +    }
 +  else
 +    {
 +      original_rect.x      = (buffer->extent.x + buffer->shift_x) / factor;
 +      original_rect.y      = (buffer->extent.y + buffer->shift_y) / factor;
 +      original_rect.width  = buffer->extent.width / factor;
 +      original_rect.height = buffer->extent.height / factor;
 +    }
 +
 +  abyss.x      = (buffer->abyss.x + buffer->shift_x) / factor;
 +  abyss.y      = (buffer->abyss.y + buffer->shift_y) / factor;
 +  abyss.width  = buffer->abyss.width / factor;
 +  abyss.height = buffer->abyss.height / factor;
 +
 +  gegl_rectangle_intersect (&final_rect, &abyss, &original_rect);
 +
 +  last_x = final_rect.x + (final_rect.width - 1);
 +  last_y = final_rect.y + (final_rect.height - 1);
 +
 +  first_tile_index_x = gegl_tile_index (final_rect.x, tile_width);
 +  first_tile_index_y = gegl_tile_index (final_rect.y, tile_height);
 +  last_tile_index_x  = gegl_tile_index (last_x, tile_width);
 +  last_tile_index_y  = gegl_tile_index (last_y, tile_height);
 +
 +  first_tile_offset_x = gegl_tile_offset (final_rect.x, tile_width);
 +  first_tile_offset_y = gegl_tile_offset (final_rect.y, tile_height);
 +  last_tile_offset_x  = gegl_tile_offset (last_x, tile_width);
 +  last_tile_offset_y  = gegl_tile_offset (last_y, tile_height);
 +
 +  first_texture_x = final_rect.x - original_rect.x;
 +  first_texture_y = final_rect.y - original_rect.y;
 +
 +  for (tile_index_y       = first_tile_index_y,
 +         tile_offset_y    = first_tile_offset_y,
 +         texture_y        = first_texture_y,
 +         copy_area_height = tile_index_y < last_tile_index_y
 +                              ? tile_height - tile_offset_y
 +                              : last_tile_offset_y - tile_offset_y + 1;
 +
 +       tile_index_y <= last_tile_index_y;
 +
 +       tile_index_y++,
 +         tile_offset_y    = 0,
 +         texture_y       += copy_area_height,
 +         copy_area_height = tile_index_y < last_tile_index_y
 +                              ? tile_height
 +                              : last_tile_offset_y + 1)
 +    {
 +      for (tile_index_x      = first_tile_index_x,
 +             tile_offset_x   = first_tile_offset_x,
 +             texture_x       = first_texture_x,
 +             copy_area_width = tile_index_x < last_tile_index_x
 +                                 ? tile_width - tile_offset_x
 +                                 : last_tile_offset_x - tile_offset_x + 1;
 +
 +           tile_index_x <= last_tile_index_x;
 +
 +           tile_index_x++,
 +             tile_offset_x   = 0,
 +             texture_x      += copy_area_width,
 +             copy_area_width = tile_index_x < last_tile_index_x
 +                                 ? tile_width
 +                                 : last_tile_offset_x + 1)
 +        {
 +          GeglTile *tile = gegl_tile_source_get_tile ((GeglTileSource *) buffer,
 +                                                      tile_index_x,
 +                                                      tile_index_y,
 +                                                      level);
 +
 +          if (tile == NULL)
 +            {
 +              g_warning ("didn't get tile, trying to continue");
 +              continue;
 +            }
 +
 +          if (write)
 +            {
 +              GeglGpuTexture *dest;
 +              GeglRectangle   temp_rect = {
 +                                            texture_x,
 +                                            texture_y,
 +                                            copy_area_width,
 +                                            copy_area_height
 +                                          };
 +
 +              gegl_tile_lock (tile, GEGL_TILE_LOCK_GPU_WRITE);
 +
 +              dest = gegl_tile_get_gpu_data (tile);
 +              gegl_gpu_texture_copy (texture,
 +                                     &temp_rect,
 +                                     dest,
 +                                     tile_offset_x,
 +                                     tile_offset_y);
 +
 +              gegl_tile_unlock (tile);
 +            }
 +          else
 +            {
 +              GeglGpuTexture *src;
 +              GeglRectangle   temp_rect = {
 +                                            tile_offset_x,
 +                                            tile_offset_y,
 +                                            copy_area_width,
 +                                            copy_area_height
 +                                          };
 +
 +              gegl_tile_lock (tile, GEGL_TILE_LOCK_GPU_READ);
 +
 +              src = gegl_tile_get_gpu_data (tile);
 +              gegl_gpu_texture_copy (src,
 +                                     &temp_rect,
 +                                     texture,
 +                                     texture_x,
 +                                     texture_y);
 +
 +              gegl_tile_unlock (tile);
 +            }
 +        }
 +    }
 +
 +  /* "read" from abyss */
 +    {
 +      /* original requested rectangle on the texture */
 +      GeglRectangle abyss_rect   = {
 +                                     0,
 +                                     0,
 +                                     original_rect.width,
 +                                     original_rect.height
 +                                   };
 +
 +      /* rectangle on the texture written with actual pixel
 +       * data from the buffer
 +       */
 +      GeglRectangle texture_rect = {
 +                                     first_texture_x,
 +                                     first_texture_y,
 +                                     final_rect.width,
 +                                     final_rect.height
 +                                   };
 +
 +      if (!write && !gegl_rectangle_equal (&abyss_rect, &texture_rect))
 +        {
 +          GeglRegion *abyss_region   = gegl_region_rectangle (&abyss_rect);
 +          GeglRegion *texture_region = gegl_region_rectangle (&texture_rect);
 +
 +          GeglRectangle *abyss_rects;
 +          gint rect_count;
 +
 +          gegl_region_subtract (abyss_region, texture_region);
 +          gegl_region_destroy  (texture_region);
 +
 +          gegl_region_get_rectangles (abyss_region, &abyss_rects, &rect_count);
 +
 +          for (cnt = 0; cnt < rect_count; cnt++)
 +            gegl_gpu_texture_clear (texture, &abyss_rects[cnt]);
 +
 +          g_free (abyss_rects);
 +          gegl_region_destroy (abyss_region);
 +        }
 +    }
 +}
 +#endif
 +
  void
- gegl_buffer_set (GeglBuffer          *buffer,
-                  const GeglRectangle *rect,
-                  const Babl          *format,
-                  void                *src,
-                  gint                 rowstride)
+ gegl_buffer_set_unlocked (GeglBuffer          *buffer,
+                           const GeglRectangle *rect,
+                           const Babl          *format,
+                           void                *src,
+                           gint                 rowstride)
  {
-   g_return_if_fail (GEGL_IS_BUFFER (buffer));
- 
- #if ENABLE_MP
-   g_static_rec_mutex_lock (&mutex);
- #endif
-   gegl_buffer_lock (buffer);
- 
    if (format == NULL)
      format = buffer->format;
  
@@@ -976,42 -601,25 +964,46 @@@
      {
        gegl_buffer_flush (buffer);
      }
-   gegl_buffer_unlock (buffer); /* XXX: should this happen before flush? */
- #if ENABLE_MP
-   g_static_rec_mutex_unlock (&mutex);
- #endif
  }
  
 +#if HAVE_GPU
 +void
 +gegl_buffer_gpu_set (GeglBuffer           *buffer,
 +                     const GeglRectangle  *rect,
 +                     const GeglGpuTexture *src)
 +{
 +  g_return_if_fail (GEGL_IS_BUFFER (buffer));
 +
- #if ENABLE_MP
-   g_static_rec_mutex_lock (&mutex);
- #endif
 +  gegl_buffer_lock (buffer);
 +
 +  if (rect != NULL && rect->width == 1 && rect->height == 1) /* fast path */
 +    gegl_buffer_gpu_set_pixel (buffer, rect->x, rect->y, src);
 +  else
 +    gegl_buffer_gpu_iterate (buffer, rect, (GeglGpuTexture *) src, TRUE, 0);
 +
 +  if (gegl_buffer_is_shared (buffer))
 +    gegl_buffer_flush (buffer);
 +
 +  gegl_buffer_unlock (buffer); /* XXX: should this happen before flush? */
- #if ENABLE_MP
-   g_static_rec_mutex_unlock (&mutex);
- #endif
 +}
 +#endif
 +
+ void
+ gegl_buffer_set (GeglBuffer          *buffer,
+                  const GeglRectangle *rect,
+                  const Babl          *format,
+                  void                *src,
+                  gint                 rowstride)
+ {
+   g_return_if_fail (GEGL_IS_BUFFER (buffer));
+ 
+   gegl_buffer_lock (buffer);
+   gegl_buffer_set_unlocked (buffer, rect, format, src, rowstride);
+   gegl_buffer_unlock (buffer);
+ }
+ 
  
  
 -
  #if 0
  
  /*
@@@ -1457,167 -1050,22 +1434,178 @@@ gegl_buffer_get_unlocked (GeglBuffe
          }
        g_free (sample_buf);
      }
- #if ENABLE_MP
-   g_static_rec_mutex_unlock (&mutex);
- #endif
+ }
+ 
+ void
+ gegl_buffer_get (GeglBuffer          *buffer,
+                  gdouble              scale,
+                  const GeglRectangle *rect,
+                  const Babl          *format,
+                  gpointer             dest_buf,
+                  gint                 rowstride)
+ {
+   g_return_if_fail (GEGL_IS_BUFFER (buffer));
+   gegl_buffer_lock (buffer);
+   gegl_buffer_get_unlocked (buffer, scale, rect, format, dest_buf, rowstride);
+   gegl_buffer_unlock (buffer);
  }
  
 +#if HAVE_GPU
 +void
 +gegl_buffer_gpu_get (GeglBuffer          *buffer,
 +                     gdouble              scale,
 +                     const GeglRectangle *rect,
 +                     GeglGpuTexture      *dest)
 +{
 +  g_return_if_fail (GEGL_IS_BUFFER (buffer));
- #if ENABLE_MP
++#if ENABLE_MT
 +  g_static_rec_mutex_lock (&mutex);
 +#endif
 +
 +  if (GEGL_FLOAT_EQUAL (scale, 1.0)
 +      && rect != NULL
 +      && rect->width == 1
 +      && rect->height == 1)  /* fast path */
 +    {
 +      gegl_buffer_gpu_get_pixel (buffer, rect->x, rect->y, dest);
- #if ENABLE_MP
++#if ENABLE_MT
 +      g_static_rec_mutex_unlock (&mutex);
 +#endif
 +      return;
 +    }
 +
 +  if (rect == NULL && GEGL_FLOAT_EQUAL (scale, 1.0))
 +    {
 +      gegl_buffer_gpu_iterate (buffer, NULL, dest, FALSE, 0);
- #if ENABLE_MP
++#if ENABLE_MT
 +      g_static_rec_mutex_unlock (&mutex);
 +#endif
 +      return;
 +    }
 +
 +  if (rect->width == 0 || rect->height == 0)
 +    {
- #if ENABLE_MP
++#if ENABLE_MT
 +      g_static_rec_mutex_unlock (&mutex);
 +#endif
 +      return;
 +    }
 +
 +  if (GEGL_FLOAT_EQUAL (scale, 1.0))
 +    {
 +      gegl_buffer_gpu_iterate (buffer, rect, dest, FALSE, 0);
- #if ENABLE_MP
++#if ENABLE_MT
 +      g_static_rec_mutex_unlock (&mutex);
 +#endif
 +      return;
 +    }
 +  else
 +    {
 +      /* mostly, a direct copy from gegl_buffer_get()
 +       *
 +       * TODO: implement GPU-based resamplers to prevent reading back
 +       *       to the CPU like what we are doing here
 +       */
 +      gint level      = 0;
 +      gint buf_width  = rect->width / scale;
 +      gint buf_height = rect->height / scale;
 +      gint bpp        = babl_format_get_bytes_per_pixel (dest->format);
 +
 +      void         *dest_buf;
 +      void         *sample_buf;
 +      GeglRectangle sample_rect;
 +
 +      gint factor = 1;
 +
 +      gdouble offset_x;
 +      gdouble offset_y;
 +
 +      sample_rect.x      = floor (rect->x/scale);
 +      sample_rect.y      = floor (rect->y/scale);
 +      sample_rect.width  = buf_width;
 +      sample_rect.height = buf_height;
 +
 +      while (scale <= 0.5)
 +        {
 +          scale  *= 2;
 +          factor *= 2;
 +          level++;
 +        }
 +
 +      buf_width  /= factor;
 +      buf_height /= factor;
 +
 +      /* ensure we always have some data to sample from */
 +      sample_rect.width  += factor * 2;
 +      sample_rect.height += factor * 2;
 +      buf_width          += 2;
 +      buf_height         += 2;
 +
 +      offset_x = rect->x - floor (rect->x / scale) * scale;
 +      offset_y = rect->y - floor (rect->y / scale) * scale;
 +
 +      dest_buf = g_malloc (dest->width * dest->height * bpp);
 +      sample_buf = g_malloc (buf_width * buf_height * bpp);
 +
 +      gegl_buffer_iterate (buffer,
 +                           &sample_rect,
 +                           sample_buf,
 +                           GEGL_AUTO_ROWSTRIDE,
 +                           FALSE,
 +                           dest->format,
 +                           level);
 +#if 1
 +      /* slows testing of rendering code speed to much for now and
 +       * no time to make a fast implementation
 +       */
 +      if (babl_format_get_type (dest->format, 0) == babl_type ("u8")
 +          && !(level == 0 && scale > 1.99))
 +        {
 +          /* do box-filter resampling if we're 8bit (which projections are) */
 +
 +          /* XXX: use box-filter also for > 1.99 when testing and probably
 +           * later, there are some bugs when doing so
 +           */
 +          resample_boxfilter_u8 (dest_buf,
 +                                 sample_buf,
 +                                 rect->width,
 +                                 rect->height,
 +                                 buf_width,
 +                                 buf_height,
 +                                 offset_x,
 +                                 offset_y,
 +                                 scale,
 +                                 bpp,
 +                                 GEGL_AUTO_ROWSTRIDE);
 +        }
 +      else
 +#endif
 +        {
 +          resample_nearest (dest_buf,
 +                            sample_buf,
 +                            rect->width,
 +                            rect->height,
 +                            buf_width,
 +                            buf_height,
 +                            offset_x,
 +                            offset_y,
 +                            scale,
 +                            bpp,
 +                            GEGL_AUTO_ROWSTRIDE);
 +        }
 +
 +      gegl_gpu_texture_set (dest, NULL, dest_buf, NULL);
 +
 +      g_free (sample_buf);
 +      g_free (dest_buf);
 +    }
 +
- #if ENABLE_MP
++#if ENABLE_MT
 +  g_static_rec_mutex_unlock (&mutex);
 +#endif
 +}
 +#endif
 +
  const GeglRectangle *
  gegl_buffer_get_abyss (GeglBuffer *buffer)
  {
diff --cc gegl/buffer/gegl-buffer-iterator.c
index e4d6968,0ebcbea..f56ce15
--- a/gegl/buffer/gegl-buffer-iterator.c
+++ b/gegl/buffer/gegl-buffer-iterator.c
@@@ -32,36 -32,31 +32,36 @@@
  #include "gegl-tile-storage.h"
  #include "gegl-utils.h"
  
 +#if HAVE_GPU
 +#include "gegl-gpu-types.h"
 +#include "gegl-gpu-texture.h"
 +#include "gegl-gpu-init.h"
 +#endif
 +
- typedef struct _GeglBufferTileIterator
+ typedef struct GeglBufferTileIterator
  {
 -  GeglBuffer    *buffer;
 -  GeglRectangle  roi;     /* the rectangular region we're iterating over */
 -  GeglTile      *tile;    /* current tile */
 -  gpointer       data;    /* current tile's data */
 -
 -  gint           col;     /* the column currently provided for */
 -  gint           row;     /* the row currently provided for */
 -  gboolean       write;
 -  GeglRectangle  subrect;    /* the subrect that intersected roi */
 -  gpointer       sub_data;   /* pointer to the subdata as indicated by subrect */
 -  gint           rowstride;  /* rowstride for tile, in bytes */
 -
 -  gint           next_col; /* used internally */
 -  gint           next_row; /* used internally */
 -  gint           max_size; /* maximum data buffer needed, in bytes */
 -  GeglRectangle  roi2;     /* the rectangular subregion of data
 -                            * in the buffer represented by this scan.
 -                            */
 -
 -} GeglBufferTileIterator;
 -
 -#define GEGL_BUFFER_SCAN_COMPATIBLE   128   /* should be integrated into enum */
 -#define GEGL_BUFFER_FORMAT_COMPATIBLE 256   /* should be integrated into enum */
 +  GeglBuffer      *buffer;
 +  GeglRectangle    roi;      /* the rectangular region we're iterating over */
 +  GeglTile        *tile;     /* current tile */
 +
 +  GeglTileLockMode lock_mode;
 +
 +  GeglRectangle    subrect;  /* the rectangular subregion of data in the
 +                              * buffer represented by this scan
 +                              */
 +  gpointer         sub_data; /* pointer to the data as indicated by subrect */
 +#if HAVE_GPU
 +  GeglGpuTexture  *gpu_data; /* pointer to the tile's full GPU data */
 +#endif
 +
 +  /* used internally */
 +  gint             next_col;
 +  gint             next_row;
 +
 +} _GeglBufferTileIterator;
 +
 +#define GEGL_BUFFER_SCAN_COMPATIBLE   128 /* should be integrated into enum */
 +#define GEGL_BUFFER_FORMAT_COMPATIBLE 256 /* should be integrated into enum */
  
  #define DEBUG_DIRECT 0
  
@@@ -174,108 -150,85 +174,111 @@@ gegl_buffer_tile_iterator_next (_GeglBu
  gulp:
  
    /* unref previously held tile */
 -  if (i->tile)
 +  if (i->tile != NULL)
      {
-       gegl_tile_unlock (i->tile);
- 
-       g_object_unref (i->tile);
 -      if (i->write && i->subrect.width == tile_width)
++      if ((i->lock_mode & GEGL_TILE_LOCK_WRITE)
++       && i->subrect.width == tile_width)
+         {
+           gegl_tile_unlock (i->tile);
+         }
+       gegl_tile_unref (i->tile);
        i->tile = NULL;
 +
 +      i->sub_data = NULL;
 +#if HAVE_GPU
 +      i->gpu_data = NULL;
 +#endif
      }
  
 +  memset (&i->subrect, 0, sizeof (GeglRectangle));
 +
    if (i->next_col < i->roi.width)
 -    { /* return tile on this row */
 -      gint tiledx = buffer_x + i->next_col;
 -      gint tiledy = buffer_y + i->next_row;
 -      gint offsetx = gegl_tile_offset (tiledx, tile_width);
 -      gint offsety = gegl_tile_offset (tiledy, tile_height);
 +    {
 +      /* return tile on this row */
 +      gint x = buffer_x + i->next_col;
 +      gint y = buffer_y + i->next_row;
 +
 +      gint offset_x = gegl_tile_offset (x, tile_width);
 +      gint offset_y = gegl_tile_offset (y, tile_height);
 +
 +      GeglRectangle rect = {offset_x, offset_y, 0, 0};
 +
 +      gboolean direct_access;
 +#if HAVE_GPU
 +      gboolean gpu_direct_access;
 +#endif
 +
 +      if (i->roi.width + offset_x - i->next_col < tile_width)
 +        rect.width = (i->roi.width + offset_x - i->next_col) - offset_x;
 +      else
 +        rect.width = tile_width - offset_x;
  
 +      if (i->roi.height + offset_y - i->next_row < tile_height)
 +        rect.height = (i->roi.height + offset_y - i->next_row) - offset_y;
 +      else
 +        rect.height = tile_height - offset_y;
 +
 +      direct_access = ((i->lock_mode & GEGL_TILE_LOCK_READ
 +                                 || i->lock_mode & GEGL_TILE_LOCK_WRITE)
 +                                && tile_width == rect.width);
 +
 +#if HAVE_GPU
 +      gpu_direct_access = ((i->lock_mode & GEGL_TILE_LOCK_GPU_READ
 +                            || i->lock_mode & GEGL_TILE_LOCK_GPU_WRITE)
 +                           && tile_width == rect.width
 +                           && tile_height == rect.height);
 +#endif
 +
 +      if (direct_access 
 +#if HAVE_GPU
 +       || gpu_direct_access
 +#endif
 +       )
          {
 -         i->subrect.x = offsetx;
 -         i->subrect.y = offsety;
 -         if (i->roi.width + offsetx - i->next_col < tile_width)
 -           i->subrect.width = (i->roi.width + offsetx - i->next_col) - offsetx;
 -         else
 -           i->subrect.width = tile_width - offsetx;
 -
 -         if (i->roi.height + offsety - i->next_row < tile_height)
 -           i->subrect.height = (i->roi.height + offsety - i->next_row) - offsety;
 -         else
 -           i->subrect.height = tile_height - offsety;
 -
 -         i->tile = gegl_tile_source_get_tile ((GeglTileSource *) (buffer),
 -                                               gegl_tile_indice (tiledx, tile_width),
 -                                               gegl_tile_indice (tiledy, tile_height),
 +          i->tile = gegl_tile_source_get_tile ((GeglTileSource *) buffer,
 +                                               gegl_tile_index (x, tile_width),
 +                                               gegl_tile_index (y, tile_height),
                                                 0);
 -         if (i->write && tile_width==i->subrect.width)
 -           {
 -             gegl_tile_lock (i->tile);
 -           }
 -         i->data = gegl_tile_get_data (i->tile);
 -
 -         {
 -         gint bpp = babl_format_get_bytes_per_pixel (i->buffer->format);
 -         i->rowstride = bpp * tile_width;
 -         i->sub_data = (guchar*)(i->data) + bpp * (i->subrect.y * tile_width + i->subrect.x);
 -         }
 -
 -         i->col = i->next_col;
 -         i->row = i->next_row;
 -         i->next_col += tile_width - offsetx;
 -
 -
 -         i->roi2.x      = i->roi.x + i->col;
 -         i->roi2.y      = i->roi.y + i->row;
 -         i->roi2.width  = i->subrect.width;
 -         i->roi2.height = i->subrect.height;
 -
 -         return TRUE;
 -       }
 -    }
 -  else /* move down to next row */
 -    {
 -      gint tiledy;
 -      gint offsety;
  
 -      i->row = i->next_row;
 -      i->col = i->next_col;
 +          gegl_tile_lock (i->tile, i->lock_mode);
  
 -      tiledy = buffer_y + i->next_row;
 -      offsety = gegl_tile_offset (tiledy, tile_height);
 +          if (direct_access)
 +            {
 +              gpointer data = gegl_tile_get_data (i->tile);
 +              gint bpp = babl_format_get_bytes_per_pixel (buffer->format);
 +              i->sub_data = (guchar *) data + (bpp * rect.y * tile_width);
 +            }
 +#if HAVE_GPU
 +          if (gpu_direct_access)
 +            i->gpu_data = gegl_tile_get_gpu_data (i->tile);
 +#endif
 +        }
 +
 +      i->subrect.x      = i->roi.x + i->next_col;
 +      i->subrect.y      = i->roi.y + i->next_row;
 +      i->subrect.width  = rect.width;
 +      i->subrect.height = rect.height;
 +
 +      i->next_col += tile_width - offset_x;
  
 -      i->next_row += tile_height - offsety;
 -      i->next_col=0;
 +      return TRUE;
 +    }
 +  else
 +    {
 +      /* move down to the next row */
 +      gint y        = buffer_y + i->next_row;
 +      gint offset_y = gegl_tile_offset (y, tile_height);
  
 +      i->next_row += tile_height - offset_y;
 +      i->next_col  = 0;
 +
 +      /* return the first tile in the next row */
        if (i->next_row < i->roi.height)
 -        {
 -          goto gulp; /* return the first tile in the next row */
 -        }
 +        goto gulp;
 +
        return FALSE;
      }
 +
    return FALSE;
  }
  
@@@ -379,255 -312,130 +382,284 @@@ typedef struct BufInfo 
  
  static GArray *buf_pool = NULL;
  
- static gpointer
- iterator_buf_pool_get (gint width,
-                        gint height,
-                        const Babl *format)
+ #if ENABLE_MT
+ static GStaticMutex pool_mutex = G_STATIC_MUTEX_INIT;
+ #endif
+ 
+ static gpointer iterator_buf_pool_get (gint size)
  {
 -  gint i;
 +  gint cnt;
- 
-   gint bpp  = babl_format_get_bytes_per_pixel (format);
-   gint size = width * height * bpp;
+ #if ENABLE_MT
+   g_static_mutex_lock (&pool_mutex);
+ #endif
  
    if (G_UNLIKELY (!buf_pool))
 +    buf_pool = g_array_new (TRUE, TRUE, sizeof (BufInfo));
 +
 +  for (cnt = 0; cnt < buf_pool->len; cnt++)
      {
 -      buf_pool = g_array_new (TRUE, TRUE, sizeof (BufInfo));
 -    }
 -  for (i=0; i<buf_pool->len; i++)
 -    {
 -      BufInfo *info = &g_array_index (buf_pool, BufInfo, i);
 +      BufInfo *info = &g_array_index (buf_pool, BufInfo, cnt);
 +
        if (info->size >= size && info->used == 0)
          {
-           info->used++;
+           info->used ++;
+ #if ENABLE_MT
+           g_static_mutex_unlock (&pool_mutex);
 -          return info->buf;
+ #endif
 +          return info->buf;
          }
      }
-     {
-       BufInfo info = {0, 1, NULL};
- 
-       info.size = size;
-       info.buf  = gegl_malloc (size);
- 
-       g_array_append_val (buf_pool, info);
-       return info.buf;
-     }
+   {
+     BufInfo info = {0, 1, NULL};
+     info.size = size;
+     info.buf = gegl_malloc (size);
+     g_array_append_val (buf_pool, info);
+ #if ENABLE_MT
+     g_static_mutex_unlock (&pool_mutex);
+ #endif
+     return info.buf;
+   }
  }
  
 -static void iterator_buf_pool_release (gpointer buf)
 +static void
 +iterator_buf_pool_release (gpointer buf)
  {
-   gint cnt;
- 
-   for (cnt = 0; cnt < buf_pool->len; cnt++)
+   gint i;
+ #if ENABLE_MT
+   g_static_mutex_lock (&pool_mutex);
+ #endif
+   for (i=0; i<buf_pool->len; i++)
      {
-       BufInfo *info = &g_array_index (buf_pool, BufInfo, cnt);
+       BufInfo *info = &g_array_index (buf_pool, BufInfo, i);
 +
        if (info->buf == buf)
 -        {
 -          info->used --;
 -#if ENABLE_MT
 -          g_static_mutex_unlock (&pool_mutex);
 -#endif
 -          return;
 -        }
 +        info->used--;
      }
 -  g_assert (0);
+ #if ENABLE_MT
+   g_static_mutex_unlock (&pool_mutex);
+ #endif
  }
  
 -static void ensure_buf (GeglBufferIterators *i, gint no)
 +#if HAVE_GPU
 +
 +typedef struct GpuTextureInfo {
 +  gint            used; /* if this texture is currently allocated */
 +  GeglGpuTexture *texture;
 +} GpuTextureInfo;
 +
 +static GArray *gpu_texture_pool = NULL;
 +
 +static GeglGpuTexture *
 +iterator_gpu_texture_pool_get (gint width,
 +                               gint height,
 +                               const Babl *format)
  {
 -  if (i->buf[no]==NULL)
 -    i->buf[no] = iterator_buf_pool_get (babl_format_get_bytes_per_pixel (i->format[no]) *
 -                                        i->i[0].max_size);
 +  gint cnt;
 +
 +  if (G_UNLIKELY (!gpu_texture_pool))
 +    gpu_texture_pool = g_array_new (TRUE, TRUE, sizeof (GpuTextureInfo));
 +
 +  for (cnt = 0; cnt < gpu_texture_pool->len; cnt++)
 +    {
 +      GpuTextureInfo *info = &g_array_index (gpu_texture_pool,
 +                                             GpuTextureInfo,
 +                                             cnt);
 +
 +      if (info->texture->width == width
 +          && info->texture->height == height
 +          && info->texture->format == format
 +          && info->used == 0)
 +        {
 +          info->used++;
 +          return info->texture;
 +        }
 +    }
 +    {
 +      GpuTextureInfo info = {1, NULL};
 +      info.texture  = gegl_gpu_texture_new (width, height, format);
 +
 +      g_array_append_val (gpu_texture_pool, info);
 +      return info.texture;
 +    }
  }
  
 -gboolean gegl_buffer_iterator_next     (GeglBufferIterator *iterator)
 +static void
 +iterator_gpu_texture_pool_release (GeglGpuTexture *texture)
  {
 -  GeglBufferIterators *i = (gpointer)iterator;
 +  gint cnt;
 +
 +  for (cnt = 0; cnt < gpu_texture_pool->len; cnt++)
 +    {
 +      GpuTextureInfo *info = &g_array_index (gpu_texture_pool,
 +                                             GpuTextureInfo,
 +                                             cnt);
 +
 +      if (info->texture == texture)
 +        info->used--;
 +    }
 +}
 +#endif
 +
 +void
 +gegl_buffer_iterator_cleanup (void)
 +{
 +  gint cnt;
 +
 +  if (buf_pool != NULL)
 +    {
 +      for (cnt = 0; cnt < buf_pool->len; cnt++)
 +        {
 +          BufInfo *info = &g_array_index (buf_pool, BufInfo, cnt);
 +          gegl_free (info->buf);
 +          info->buf = NULL;
 +        }
 +
 +      g_array_free (buf_pool, TRUE);
 +      buf_pool = NULL;
 +    }
 +
 +#if HAVE_GPU
 +  if (gpu_texture_pool != NULL)
 +    {
 +      for (cnt = 0; cnt < gpu_texture_pool->len; cnt++)
 +        {
 +          GpuTextureInfo *info = &g_array_index (gpu_texture_pool,
 +                                                 GpuTextureInfo,
 +                                                 cnt);
 +
 +          gegl_gpu_texture_free (info->texture);
 +          info->texture = NULL;
 +        }
 +
 +      g_array_free (gpu_texture_pool, TRUE);
 +      gpu_texture_pool = NULL;
 +    }
 +#endif
 +}
 +
++
++
 +#if DEBUG_DIRECT
 +static glong direct_read = 0;
 +static glong direct_write = 0;
 +static glong in_direct_read = 0;
 +static glong in_direct_write = 0;
 +#endif
 +
 +gboolean
 +gegl_buffer_iterator_next (GeglBufferIterator *iterator)
 +{
 +  gint     no;
    gboolean result = FALSE;
 -  gint no;
  
 -  if (i->buf[0] == (void*)0xdeadbeef)
 +  _GeglBufferIterator *i = (gpointer) iterator;
 +
 +  if (i->is_done)
      g_error ("%s called on finished buffer iterator", G_STRFUNC);
- 
-   /* first we need to finish off any pending write work */
-   if (i->iteration_no > 0)
+   if (i->iteration_no == 0)
+     {
+ #if ENABLE_MT
 -      for (no=0; no<i->iterators;no++)
++      for (no=0; no<i->iterable_count; no++)
+         {
+           gint j;
+           gboolean found = FALSE;
+           for (j=0; j<no; j++)
+             if (i->buffer[no]==i->buffer[j])
+               {
+                 found = TRUE;
+                 break;
+               }
+           if (!found)
+             gegl_buffer_lock (i->buffer[no]);
+         }
+ #endif
+     }
+   else
      {
-       for (no = 0; no < i->iterable_count; no++)
+       /* complete pending write work */
 -      for (no=0; no<i->iterators;no++)
++      for (no=0; no<i->iterable_count;no++)
          {
 -          if (i->flags[no] & GEGL_BUFFER_WRITE)
 -            {
 +          gboolean direct_access
 +            = (i->flags[no] & GEGL_BUFFER_SCAN_COMPATIBLE
 +               && i->flags[no] & GEGL_BUFFER_FORMAT_COMPATIBLE
 +               && i->roi[no].width
 +                    == i->i[no].buffer->tile_storage->tile_width);
 +
 +#if HAVE_GPU
 +          gboolean gpu_direct_access
 +            = (direct_access && i->roi[no].height
 +                 == i->i[no].buffer->tile_storage->tile_height);
 +#endif
  
 -              if (i->flags[no] & GEGL_BUFFER_SCAN_COMPATIBLE &&
 -                  i->flags[no] & GEGL_BUFFER_FORMAT_COMPATIBLE &&
 -                  i->roi[no].width == i->i[no].buffer->tile_storage->tile_width && (i->flags[no] & GEGL_BUFFER_FORMAT_COMPATIBLE))
 +          if (i->flags[no] & GEGL_BUFFER_READ
 +              || i->flags[no] & GEGL_BUFFER_WRITE)
 +            {
 +              if (direct_access)
                  {
 -                   /* direct access */
  #if DEBUG_DIRECT
 -                   direct_write += i->roi[no].width * i->roi[no].height;
 +                  if (i->flags[no] & GEGL_BUFFER_WRITE)
 +                    direct_write += i->roi[no].width * i->roi[no].height;
  #endif
                  }
                else
                  {
 +                  if (i->flags[no] & GEGL_BUFFER_WRITE)
 +                    {
  #if DEBUG_DIRECT
 -                  in_direct_write += i->roi[no].width * i->roi[no].height;
 +                      in_direct_write += i->roi[no].width * i->roi[no].height;
  #endif
-                       gegl_buffer_set (i->buffer[no],
-                                        &i->roi[no],
-                                        i->format[no],
-                                        i->data[no],
-                                        GEGL_AUTO_ROWSTRIDE);
++                      gegl_buffer_set_unlocked (i->buffer[no],
++                                                &(i->roi[no]),
++                                                i->format[no],
++                                                i->data[no],
++                                                GEGL_AUTO_ROWSTRIDE);
 +                    }
 +
 +                  /* XXX: might be inefficient given the current
 +                   * implementation, it should be easy to reimplement the
 +                   * pool as a hash table though
 +                   */
 +                  iterator_buf_pool_release (i->data[no]);
 +                }
  
 -                  ensure_buf (i, no);
 +              i->data[no] = NULL;
 +            }
  
 -                  gegl_buffer_set_unlocked (i->buffer[no], &(i->roi[no]), i->format[no], i->buf[no], GEGL_AUTO_ROWSTRIDE);
 +#if HAVE_GPU
 +          if (i->flags[no] & GEGL_BUFFER_GPU_READ
 +              || i->flags[no] & GEGL_BUFFER_GPU_WRITE)
 +            {
 +              if (gpu_direct_access)
 +                {
 +#if DEBUG_DIRECT
 +                  if (i->flags[no] & GEGL_BUFFER_GPU_WRITE)
 +                    direct_write += i->roi[no].width * i->roi[no].height;
 +#endif
                  }
 +              else
 +                {
 +                  if (i->flags[no] & GEGL_BUFFER_GPU_WRITE)
 +                    {
 +#if DEBUG_DIRECT
 +                      in_direct_write += i->roi[no].width * i->roi[no].height;
 +#endif
 +                      gegl_buffer_gpu_set (i->buffer[no],
 +                                           &i->roi[no],
 +                                           i->gpu_data[no]);
 +                    }
 +
 +                  /* XXX: might be inefficient given the current
 +                   * implementation, it should be easy to reimplement the
 +                   * pool as a hash table though
 +                   */
 +                  iterator_gpu_texture_pool_release (i->gpu_data[no]);
 +                }
 +
 +              i->gpu_data[no] = NULL;
              }
 +#endif
 +
 +          memset (&i->roi[no], 0, sizeof (GeglRectangle));
          }
      }
  
@@@ -638,155 -446,65 +670,154 @@@
      {
        if (i->flags[no] & GEGL_BUFFER_SCAN_COMPATIBLE)
          {
 -          gboolean res;
 -          res = gegl_buffer_tile_iterator_next (&i->i[no]);
 -          if (no == 0)
 -            {
 -              result = res;
 -            }
 -          i->roi[no] = i->i[no].roi2;
 +          gboolean res = gegl_buffer_tile_iterator_next (&i->i[no]);
 +
 +          gint tile_width  = i->i[no].buffer->tile_storage->tile_width;
 +          gint tile_height = i->i[no].buffer->tile_storage->tile_height;
 +
 +          gboolean direct_access;
 +          gboolean gpu_direct_access;
 +
 +          result     = (no == 0) ? res : result;
 +          i->roi[no] = i->i[no].subrect;
  
 -          /* since they were scan compatible this should be true */
 +          /* since they were scan compatible, this shouldn't happen */
            if (res != result)
 -            {
 -              g_print ("%i==%i != 0==%i\n", no, res, result);
 -             } 
 -          g_assert (res == result);
 +            g_error ("%i==%i != 0==%i\n", no, res, result);
 +
 +          direct_access = (i->flags[no] & GEGL_BUFFER_FORMAT_COMPATIBLE
 +                           && i->roi[no].width == tile_width);
 +
 +          gpu_direct_access = (i->roi[no].height == tile_height
 +                               && direct_access);
  
 -          if ((i->flags[no] & GEGL_BUFFER_FORMAT_COMPATIBLE) && 
 -              i->roi[no].width == i->i[no].buffer->tile_storage->tile_width 
 -           )
 +          if (i->flags[no] & GEGL_BUFFER_READ
 +              || i->flags[no] & GEGL_BUFFER_WRITE)
              {
 -              /* direct access */
 -              i->data[no]=i->i[no].sub_data;
 +              if (direct_access)
 +                {
 +                  i->data[no] = i->i[no].sub_data;
  #if DEBUG_DIRECT
 -              direct_read += i->roi[no].width * i->roi[no].height;
 +                  direct_read += i->roi[no].width * i->roi[no].height;
  #endif
 +                }
 +              else
 +                {
 +                  /* unref held tile to prevent lock contention */
 +                  if (i->i[no].tile != NULL)
 +                    {
 +                      gegl_tile_unlock (i->i[no].tile);
 +
-                       g_object_unref (i->i[no].tile);
++                      gegl_tile_unref (i->i[no].tile);
 +                      i->i[no].tile = NULL;
 +
 +                      i->i[no].sub_data = NULL;
 +                    }
 +
-                   i->data[no] = iterator_buf_pool_get (i->roi[no].width,
-                                                        i->roi[no].height,
-                                                        i->format[no]);
++                  i->data[no] = iterator_buf_pool_get (
++                                  i->roi[no].width * i->roi[no].height *
++                              babl_format_get_bytes_per_pixel (i->format[no]));
 +
 +                  if (i->flags[no] & GEGL_BUFFER_READ)
-                     gegl_buffer_get (i->buffer[no],
-                                      1.0,
-                                      &i->roi[no],
-                                      i->format[no],
-                                      i->data[no],
-                                      GEGL_AUTO_ROWSTRIDE);
++                    gegl_buffer_get_unlocked (i->buffer[no],
++                                              1.0, &(i->roi[no]),
++                                              i->format[no], 
++                                              i->data[no],
++                                              GEGL_AUTO_ROWSTRIDE);
 +#if DEBUG_DIRECT
 +                  in_direct_read += i->roi[no].width * i->roi[no].height;
 +#endif
 +                }
              }
 -          else
 -            {
 -              ensure_buf (i, no);
  
 -              if (i->flags[no] & GEGL_BUFFER_READ)
 +#if HAVE_GPU
 +          if (i->flags[no] & GEGL_BUFFER_GPU_READ
 +              || i->flags[no] & GEGL_BUFFER_GPU_WRITE)
 +            {
 +              if (gpu_direct_access)
                  {
 -                  gegl_buffer_get_unlocked (i->buffer[no], 1.0, &(i->roi[no]), i->format[no], i->buf[no], GEGL_AUTO_ROWSTRIDE);
 +                  i->gpu_data[no] = i->i[no].gpu_data;
 +#if DEBUG_DIRECT
 +                  direct_read += i->roi[no].width * i->roi[no].height;
 +#endif
                  }
 -
 -              i->data[no]=i->buf[no];
 +              else
 +                {
 +                  /* unref held tile to prevent lock contention */
 +                  if (i->i[no].tile != NULL)
 +                    {
 +                      gegl_tile_unlock (i->i[no].tile);
 +
-                       g_object_unref (i->i[no].tile);
++                      gegl_tile_unref (i->i[no].tile);
 +                      i->i[no].tile = NULL;
 +
 +                      i->i[no].gpu_data = NULL;
 +                    }
 +
 +                  i->gpu_data[no] = iterator_gpu_texture_pool_get (
 +                                      i->roi[no].width,
 +                                      i->roi[no].height,
 +                                      i->format[no]);
 +
 +                  if (i->flags[no] & GEGL_BUFFER_GPU_READ)
 +                    gegl_buffer_gpu_get (i->buffer[no],
 +                                         1.0,
 +                                         &i->roi[no],
 +                                         i->gpu_data[no]);
  #if DEBUG_DIRECT
 -              in_direct_read += i->roi[no].width * i->roi[no].height;
 +                  in_direct_read += i->roi[no].width * i->roi[no].height;
  #endif
 +                }
              }
 +#endif
          }
        else
 -        { 
 +        {
            /* we copy the roi from iterator 0  */
 -          i->roi[no] = i->roi[0];
 -          i->roi[no].x += (i->rect[no].x-i->rect[0].x);
 -          i->roi[no].y += (i->rect[no].y-i->rect[0].y);
 -
 -          ensure_buf (i, no);
 +          i->roi[no]    = i->roi[0];
 +          i->roi[no].x += i->rect[no].x - i->rect[0].x;
 +          i->roi[no].y += i->rect[no].y - i->rect[0].y;
  
 -          if (i->flags[no] & GEGL_BUFFER_READ)
 +          if (i->flags[no] & GEGL_BUFFER_READ
 +              || i->flags[no] & GEGL_BUFFER_WRITE)
              {
-               i->data[no] = iterator_buf_pool_get (i->roi[no].width,
-                                                    i->roi[no].height,
-                                                    i->format[no]);
 -              gegl_buffer_get_unlocked (i->buffer[no], 1.0, &(i->roi[no]), i->format[no], i->buf[no], GEGL_AUTO_ROWSTRIDE);
++              i->data[no] = iterator_buf_pool_get (i->roi[no].width *
++                                                   i->roi[no].height *
++                               babl_format_get_bytes_per_pixel (i->format[no]));
 +
 +              if (i->flags[no] & GEGL_BUFFER_READ)
 +                gegl_buffer_get (i->buffer[no],
 +                                 1.0,
 +                                 &i->roi[no],
 +                                 i->format[no],
 +                                 i->data[no],
 +                                 GEGL_AUTO_ROWSTRIDE);
 +#if DEBUG_DIRECT
 +              in_direct_read += i->roi[no].width * i->roi[no].height;
 +#endif
              }
 -          i->data[no]=i->buf[no];
  
 +#if HAVE_GPU
 +          if (i->flags[no] & GEGL_BUFFER_GPU_READ
 +              || i->flags[no] & GEGL_BUFFER_GPU_WRITE)
 +            {
 +              i->gpu_data[no] = iterator_gpu_texture_pool_get (
 +                                  i->roi[no].width,
 +                                  i->roi[no].height,
 +                                  i->format[no]);
 +
 +              if (i->flags[no] & GEGL_BUFFER_GPU_READ)
 +                gegl_buffer_gpu_get (i->buffer[no],
 +                                     1.0,
 +                                     &i->roi[no],
 +                                     i->gpu_data[no]);
  #if DEBUG_DIRECT
 -          in_direct_read += i->roi[no].width * i->roi[no].height;
 +              in_direct_read += i->roi[no].width * i->roi[no].height;
 +#endif
 +            }
  #endif
          }
 +
        i->length = i->roi[no].width * i->roi[no].height;
      }
  
@@@ -794,20 -512,39 +825,37 @@@
  
    if (result == FALSE)
      {
-       for (no = 0; no < i->iterable_count; no++)
+ 
+ #if ENABLE_MT
 -      for (no=0; no<i->iterators;no++)
++      for (no=0; no<i->iterable_count;no++)
+         {
+           gint j;
+           gboolean found = FALSE;
+           for (j=0; j<no; j++)
+             if (i->buffer[no]==i->buffer[j])
+               {
+                 found = TRUE;
+                 break;
+               }
+           if (!found)
+             gegl_buffer_unlock (i->buffer[no]);
+         }
+ #endif
+ 
 -      for (no=0; no<i->iterators;no++)
++      for (no=0; no<i->iterable_count;no++)
          {
 -          if (i->buf[no])
 -            iterator_buf_pool_release (i->buf[no]);
 -          i->buf[no]=NULL;
            g_object_unref (i->buffer[no]);
 +          i->buffer[no] = NULL;
          }
 +
  #if DEBUG_DIRECT
 -      g_print ("%f %f\n", (100.0*direct_read/(in_direct_read+direct_read)),
 -                           100.0*direct_write/(in_direct_write+direct_write));
 +      g_print ("%f %f\n",
 +               100.0 * direct_read / (in_direct_read + direct_read),
 +               100.0 * direct_write / (in_direct_write + direct_write));
  #endif
 -      i->buf[0]=(void*)0xdeadbeef;
 -      g_free (i);
 +      i->is_done = TRUE;
      }
  
 -
    return result;
  }
  
diff --cc gegl/buffer/gegl-buffer-linear.c
index 9c26ecb,7a7a32d..7d8a04b
--- a/gegl/buffer/gegl-buffer-linear.c
+++ b/gegl/buffer/gegl-buffer-linear.c
@@@ -151,8 -155,7 +155,7 @@@ gegl_buffer_linear_open (GeglBuffe
        tile = gegl_tile_source_get_tile ((GeglTileSource*) (buffer),
                                          0,0,0);
        g_assert (tile);
-       gegl_buffer_lock (buffer);
 -      gegl_tile_lock (tile);
 +      gegl_tile_lock (tile, GEGL_TILE_LOCK_READWRITE);
  
        g_object_set_data (G_OBJECT (buffer), "linear-tile", tile);
  
diff --cc gegl/buffer/gegl-buffer.c
index 5d3f98a,ff8d91c..012dab7
--- a/gegl/buffer/gegl-buffer.c
+++ b/gegl/buffer/gegl-buffer.c
@@@ -721,10 -725,15 +725,19 @@@ gegl_buffer_get_tile (GeglTileSource *s
         * coordinates.
         */
        {
-         tile->tile_storage = buffer->tile_storage;
++#if ENABLE_MT
++        g_mutex_lock (tile->mutex);
++#endif
+         if (!tile->tile_storage)
+           {
 -            gegl_tile_lock (tile);
+             tile->tile_storage = buffer->tile_storage;
 -            gegl_tile_unlock (tile);
+           }
          tile->x = x;
          tile->y = y;
          tile->z = z;
++#if ENABLE_MT
++        g_mutex_unlock (tile->mutex);
++#endif
        }
      }
  
diff --cc gegl/buffer/gegl-tile.c
index 1b3f430,3388ad2..5095114
--- a/gegl/buffer/gegl-tile.c
+++ b/gegl/buffer/gegl-tile.c
@@@ -35,107 -35,24 +35,39 @@@
  #include "gegl-tile-source.h"
  #include "gegl-tile-storage.h"
  
 +#if HAVE_GPU
 +#include "gegl-gpu-types.h"
 +#include "gegl-gpu-texture.h"
 +#include "gegl-gpu-init.h"
 +#endif
- 
- 
- G_DEFINE_TYPE (GeglTile, gegl_tile, G_TYPE_OBJECT)
- enum
- {
-   PROP_0,
-   PROP_X,
-   PROP_Y,
-   PROP_Z,
-   PROP_SIZE
- };
- static GObjectClass *parent_class = NULL;
- 
- 
- static void
- get_property (GObject    *gobject,
-               guint       property_id,
-               GValue     *value,
-               GParamSpec *pspec)
- {
-   GeglTile *tile = GEGL_TILE (gobject);
- 
-   switch (property_id)
-     {
-       case PROP_X:
-         g_value_set_int (value, tile->x);
-         break;
- 
-       case PROP_Y:
-         g_value_set_int (value, tile->y);
-         break;
- 
-       case PROP_Z:
-         g_value_set_int (value, tile->z);
-         break;
- 
-       case PROP_SIZE:
-         g_value_set_int (value, tile->size);
-         break;
- 
-       default:
-         G_OBJECT_WARN_INVALID_PROPERTY_ID (gobject, property_id, pspec);
-         break;
-     }
- }
- 
- static void
- set_property (GObject      *gobject,
-               guint         property_id,
-               const GValue *value,
-               GParamSpec   *pspec)
- {
-   GeglTile *tile = GEGL_TILE (gobject);
- 
-   switch (property_id)
-     {
-       case PROP_X:
-         tile->x = g_value_get_int (value);
-         return;
- 
-       case PROP_Y:
-         tile->y = g_value_get_int (value);
-         return;
- 
-       case PROP_Z:
-         tile->z = g_value_get_int (value);
-         return;
- 
-       default:
-         G_OBJECT_WARN_INVALID_PROPERTY_ID (gobject, property_id, pspec);
-         break;
-     }
- }
- 
  #include "gegl-utils.h"
  
 -static void default_free (gpointer data,
 -                          gpointer userdata)
 +static void
 +default_free (gpointer        data,
 +#if HAVE_GPU
 +              GeglGpuTexture *gpu_data,
 +#endif
 +              gpointer        userdata)
  {
 -  gegl_free (data);
 +  if (data != NULL)
 +    gegl_free (data);
 +
 +#if HAVE_GPU
 +  if (gpu_data != NULL)
 +    gegl_gpu_texture_free (gpu_data);
 +#endif
  }
  
- static void
- dispose (GObject *object)
+ GeglTile *gegl_tile_ref (GeglTile *tile)
  {
-   GeglTile *tile = (GeglTile *) object;
+   g_atomic_int_inc (&tile->ref_count);
+   return tile;
+ }
+ 
+ void gegl_tile_unref (GeglTile *tile)
+ {
+   if (!g_atomic_int_dec_and_test (&tile->ref_count))
+     return;
  
    if (!gegl_tile_is_stored (tile))
      gegl_tile_store (tile);
@@@ -175,107 -79,41 +107,77 @@@
        tile->mutex = NULL;
      }
  #endif
- 
-   (*G_OBJECT_CLASS (parent_class)->dispose) (object);
+   g_slice_free (GeglTile, tile);
  }
  
- static void
- gegl_tile_class_init (GeglTileClass *class)
+ GeglTile *
+ gegl_tile_new_bare (void)
  {
-   GObjectClass *gobject_class = G_OBJECT_CLASS (class);
- 
-   gobject_class->set_property = set_property;
-   gobject_class->get_property = get_property;
-   gobject_class->dispose      = dispose;
-   parent_class                = g_type_class_peek_parent (class);
- 
-   g_object_class_install_property (
-     gobject_class, PROP_X,
-     g_param_spec_int ("x", "x", "Horizontal index",
-                       G_MININT / 2, G_MAXINT / 2, 0,
-                       G_PARAM_READWRITE));
- 
-   g_object_class_install_property (
-     gobject_class, PROP_Y,
-     g_param_spec_int ("y", "y", "Vertical index",
-                       G_MININT / 2, G_MAXINT / 2, 0,
-                       G_PARAM_READWRITE));
- 
-   g_object_class_install_property (
-     gobject_class, PROP_Z,
-     g_param_spec_int ("z", "z", "Pyramid level 0=100% 1=50% 2=25%",
-                       0, 256, 0,
-                       G_PARAM_READWRITE));
- 
-   g_object_class_install_property (
-     gobject_class, PROP_SIZE,
-     g_param_spec_int ("size", "size", "size of linear memory buffer in bytes.",
-                       0, 0, 0,
-                       G_PARAM_READABLE));
- }
+   GeglTile *tile = g_slice_new0 (GeglTile);
 +
- static void
- gegl_tile_init (GeglTile *tile)
- {
-   tile->data     = NULL;
+   tile->ref_count = 1;
+   tile->tile_storage = NULL;
+   tile->stored_rev = 0;
 +
++  tile->data     = NULL;
 +#if HAVE_GPU 
 +  tile->gpu_data = NULL;
 +  tile->gpu_rev    = 0;
 +#endif
-   tile->tile_storage = NULL;
 +
    tile->rev        = 0;
 -  tile->lock       = 0;
 -  tile->data       = NULL;
 +  tile->stored_rev = 0;
 +
 +  tile->read_locks  = 0;
 +  tile->write_locks = 0;
 +  tile->lock_mode   = GEGL_TILE_LOCK_NONE;
  
    tile->next_shared = tile;
    tile->prev_shared = tile;
  
- #if ENABLE_MP
-   tile->lock = g_mutex_new ();
+ #if ENABLE_MT
+   tile->mutex = g_mutex_new ();
  #endif
 +
    tile->destroy_notify = default_free;
+ 
+   return tile;
  }
  
  GeglTile *
  gegl_tile_dup (GeglTile *src)
  {
-   GeglTile *tile = g_object_new (GEGL_TYPE_TILE, NULL);
+   GeglTile *tile = gegl_tile_new_bare ();
  
 +#if HAVE_GPU
 +  if (gegl_gpu_is_accelerated ())
 +    {
 +      if (src->rev > src->gpu_rev)
 +        {
 +          gegl_gpu_texture_set (src->gpu_data, NULL, src->data,
 +                                gegl_tile_get_format (src));
 +
 +          src->gpu_rev = src->rev;
 +        }
 +      else if (src->gpu_rev > src->rev)
 +        {
 +          gegl_gpu_texture_get (src->gpu_data, NULL, src->data,
 +                                gegl_tile_get_format (src));
 +
 +          src->rev = src->gpu_rev;
 +        }
 +    }
 +  tile->gpu_data = src->gpu_data;
 +  tile->gpu_rev    = 1;
 +#endif
 +
 +  tile->data     = src->data;
 +  tile->size     = src->size;
 +
 +  tile->tile_storage = src->tile_storage;
 +
    tile->rev        = 1;
    tile->stored_rev = 1;
 -  tile->tile_storage    = src->tile_storage;
 -  tile->data       = src->data;
 -  tile->size       = src->size;
  
    tile->next_shared              = src->next_shared;
    src->next_shared               = tile;
@@@ -286,20 -136,12 +200,20 @@@
  }
  
  GeglTile *
 -gegl_tile_new (gint size)
 +gegl_tile_new (gint        width,
 +               gint        height,
 +               const Babl *format)
  {
-   GeglTile *tile = g_object_new (GEGL_TYPE_TILE, NULL);
+   GeglTile *tile = gegl_tile_new_bare ();
  
 -  tile->data       = gegl_malloc (size);
 -  tile->size       = size;
 +  tile->size = width * height * babl_format_get_bytes_per_pixel (format);
 +  tile->data = gegl_malloc (tile->size);
 +
 +#if HAVE_GPU
 +  if (gegl_gpu_is_accelerated ())
 +    tile->gpu_data = gegl_gpu_texture_new (width, height, format);
 +#endif
 +
    tile->stored_rev = 1;
  
    return tile;
@@@ -366,108 -169,36 +280,110 @@@ gegl_tile_unclone (GeglTile *tile
        tile->next_shared->prev_shared = tile->prev_shared;
        tile->prev_shared              = tile;
        tile->next_shared              = tile;
 +
 +#if HAVE_GPU
 +      if (gegl_gpu_is_accelerated ())
 +        tile->gpu_data = gegl_gpu_texture_dup (tile->gpu_data);
 +#endif
      }
  }
 -#if 0
 -static gint total_locks   = 0;
 -static gint total_unlocks = 0;
 -#endif
 +
 +static gint total_write_locks   = 0;
 +static gint total_write_unlocks = 0;
 +
 +static gint total_read_locks    = 0;
 +static gint total_read_unlocks  = 0;
  
+ void gegl_bt (void);
+ 
  void
 -gegl_tile_lock (GeglTile *tile)
 +gegl_tile_lock (GeglTile        *tile,
 +                GeglTileLockMode lock_mode)
  {
 -#if ENABLE_MT
 -  g_mutex_lock (tile->mutex);
 +#if HAVE_GPU
 +  if (!gegl_gpu_is_accelerated ())
 +    lock_mode &= ~GEGL_TILE_LOCK_GPU_READ & ~GEGL_TILE_LOCK_GPU_WRITE;
  #endif
  
 -  if (tile->lock != 0)
 +  if (tile->write_locks > 0)
 +    {
 +      if (lock_mode & GEGL_TILE_LOCK_WRITE
 +          || lock_mode & GEGL_TILE_LOCK_GPU_WRITE)
 +        {
 +          g_print ("hm\n");
 +          g_warning ("strange tile write-lock count: %i", tile->write_locks);
 +        }
 +
 +      if (lock_mode & GEGL_TILE_LOCK_READ
 +          || lock_mode & GEGL_TILE_LOCK_GPU_READ)
 +        g_warning ("shouldn't lock for reading while write-lock (%i) is active",
 +                   tile->write_locks);
 +    }
 +
 +  if (tile->read_locks > 0)
      {
 -      g_warning ("strange tile lock count: %i", tile->lock);
 -      gegl_bt ();
 +      if (lock_mode & GEGL_TILE_LOCK_READ
 +          || lock_mode & GEGL_TILE_LOCK_GPU_READ)
 +        {
 +          g_print ("hm\n");
 +          g_warning ("strange tile read-lock count: %i", tile->read_locks);
 +        }
 +
 +      if (lock_mode & GEGL_TILE_LOCK_WRITE
 +          || lock_mode & GEGL_TILE_LOCK_GPU_WRITE)
 +        g_warning ("shouldn't lock for writing while read-lock (%i) is active",
 +                   tile->read_locks);
      }
 -#if 0
 -  total_locks++;
 +
 +  if (lock_mode != GEGL_TILE_LOCK_NONE)
 +    {
- #if ENABLE_MP
-       g_static_mutex_lock (tile->mutex);
++#if ENABLE_MT
++      g_mutex_lock (tile->mutex);
  #endif
 +    }
 +  else
 +    g_warning ("%s called with lock_mode GEGL_TILE_LOCK_NONE", G_STRFUNC);
  
 -  tile->lock++;
 -  /*fprintf (stderr, "global tile locking: %i %i\n", locks, unlocks);*/
 +  if (lock_mode & GEGL_TILE_LOCK_READ
 +      || lock_mode & GEGL_TILE_LOCK_GPU_READ)
 +    {
 +      tile->read_locks++;
 +      total_read_locks++;
 +    }
  
 -  gegl_tile_unclone (tile);
 -  /*gegl_buffer_add_dirty (tile->buffer, tile->x, tile->y);*/
 +  if (lock_mode & GEGL_TILE_LOCK_WRITE
 +      || lock_mode & GEGL_TILE_LOCK_GPU_WRITE)
 +    {
 +      tile->write_locks++;
 +      total_write_locks++;
 +
 +      /*fprintf (stderr, "global tile locking: %i %i\n", locks, unlocks);*/
 +      gegl_tile_unclone (tile);
 +      /*gegl_buffer_add_dirty (tile->buffer, tile->x, tile->y);*/
 +    }
 +
 +#if HAVE_GPU
 +  if (gegl_gpu_is_accelerated ())
 +    {
 +      if (lock_mode & GEGL_TILE_LOCK_GPU_READ && tile->rev > tile->gpu_rev)
 +        {
 +          gegl_gpu_texture_set (tile->gpu_data, NULL, tile->data,
 +                                gegl_tile_get_format (tile));
 +
 +          tile->gpu_rev = tile->rev;
 +        }
 +
 +      if (lock_mode & GEGL_TILE_LOCK_READ && tile->gpu_rev > tile->rev)
 +        {
 +          gegl_gpu_texture_get (tile->gpu_data, NULL, tile->data,
 +                                gegl_tile_get_format (tile));
 +
 +          tile->rev = tile->gpu_rev;
 +        }
 +    }
 +#endif
 +
 +  tile->lock_mode = lock_mode;
  }
  
  static void
@@@ -501,57 -231,27 +417,56 @@@ gegl_tile_void_pyramid (GeglTile *tile
  void
  gegl_tile_unlock (GeglTile *tile)
  {
 -#if 0
 -  total_unlocks++;
 -#endif
 -  if (tile->lock == 0)
 +  if (tile->lock_mode & GEGL_TILE_LOCK_WRITE
 +      || tile->lock_mode & GEGL_TILE_LOCK_GPU_WRITE)
      {
 -      g_warning ("unlocked a tile with lock count == 0");
 +      total_write_unlocks++;
 +
 +      if (tile->write_locks == 0)
 +        g_warning ("unlocked a tile with write-lock count == 0");
 +
 +      tile->write_locks--;
 +
 +      if (tile->write_locks == 0)
 +        {
 +          guint rev     = tile->rev;
 +#if HAVE_GPU
 +          guint gpu_rev = tile->gpu_rev;
 +
 +          if (tile->lock_mode & GEGL_TILE_LOCK_GPU_WRITE)
 +            tile->gpu_rev = MAX (gpu_rev, rev) + 1;
 +#endif
 +
 +          if (tile->lock_mode & GEGL_TILE_LOCK_WRITE)
 +            tile->rev = 
 +#if HAVE_GPU 
 +              MAX (rev, gpu_rev) + 1;
 +#else
 +          rev + 1;
 +#endif
 +
 +          /* TODO: examine how this can be improved with h/w mipmaps */
 +          if (tile->z == 0)
 +            gegl_tile_void_pyramid (tile);
 +        }
      }
 -  tile->lock--;
 -  if (tile->lock == 0 &&
 -      tile->z == 0)
 +
 +  if (tile->lock_mode & GEGL_TILE_LOCK_READ
 +      || tile->lock_mode & GEGL_TILE_LOCK_GPU_READ)
      {
 -      gegl_tile_void_pyramid (tile);
 +      total_read_unlocks++;
 +
 +      if (tile->read_locks == 0)
 +        g_warning ("unlocked a tile with read-lock count == 0");
 +
 +      tile->read_locks--;
      }
- 
- #if ENABLE_MP
 -  if (tile->lock==0)
 -    tile->rev++;
+ #if ENABLE_MT
    g_mutex_unlock (tile->mutex);
  #endif
 +  tile->lock_mode = GEGL_TILE_LOCK_NONE;
  }
  
 -
  gboolean
  gegl_tile_is_stored (GeglTile *tile)
  {
diff --cc gegl/buffer/gegl-tile.h
index 5c4b0bf,403652b..57f55ab
--- a/gegl/buffer/gegl-tile.h
+++ b/gegl/buffer/gegl-tile.h
@@@ -22,79 -22,34 +22,77 @@@
  #include <glib-object.h>
  
  #include "gegl-buffer-types.h"
 +#include "gegl-gpu-types.h"
 +
- #define GEGL_TYPE_TILE            (gegl_tile_get_type ())
- #define GEGL_TILE(obj)            (G_TYPE_CHECK_INSTANCE_CAST ((obj), GEGL_TYPE_TILE, GeglTile))
- #define GEGL_TILE_CLASS(klass)    (G_TYPE_CHECK_CLASS_CAST ((klass),  GEGL_TYPE_TILE, GeglTileClass))
- #define GEGL_IS_TILE(obj)         (G_TYPE_CHECK_INSTANCE_TYPE ((obj), GEGL_TYPE_TILE))
- #define GEGL_IS_TILE_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass),  GEGL_TYPE_TILE))
- #define GEGL_TILE_GET_CLASS(obj)  (G_TYPE_INSTANCE_GET_CLASS ((obj),  GEGL_TYPE_TILE, GeglTileClass))
- 
 +typedef enum
 +{
 +  GEGL_TILE_LOCK_NONE,
 +  GEGL_TILE_LOCK_READ          =  1,
 +  GEGL_TILE_LOCK_WRITE         = (1 << 1),
 +  GEGL_TILE_LOCK_READWRITE     = GEGL_TILE_LOCK_READ
 +                                 | GEGL_TILE_LOCK_WRITE,
 +
 +  GEGL_TILE_LOCK_GPU_READ      = (1 << 2),
 +  GEGL_TILE_LOCK_GPU_WRITE     = (1 << 3),
 +  GEGL_TILE_LOCK_GPU_READWRITE = GEGL_TILE_LOCK_GPU_READ
 +                                 | GEGL_TILE_LOCK_GPU_WRITE,
 +
 +  GEGL_TILE_LOCK_ALL_READ      = GEGL_TILE_LOCK_READ
 +                                 | GEGL_TILE_LOCK_GPU_READ,
 +  GEGL_TILE_LOCK_ALL_WRITE     = GEGL_TILE_LOCK_WRITE
 +                                 | GEGL_TILE_LOCK_GPU_WRITE,
 +
 +  GEGL_TILE_LOCK_ALL           = GEGL_TILE_LOCK_ALL_READ
 +                                 | GEGL_TILE_LOCK_ALL_WRITE
 +} GeglTileLockMode;
 +
+ 
  /* the instance size of a GeglTile is a bit large, and should if possible be
   * trimmed down
   */
  struct _GeglTile
  {
-   GObject          parent_instance;
+  /* GObject          parent_instance;*/
+   gint             ref_count;
  
 -  guchar          *data;        /* actual pixel data for tile, a linear buffer*/
 -  gint             size;        /* The size of the linear buffer */
 +  guchar          *data;         /* actual pixel data for tile,
 +                                  * a linear buffer
 +                                  */
 +  gint             size;         /* The size of the linear buffer */
 +#if HAVE_GPU
 +  GeglGpuTexture  *gpu_data;     /* pixel data for tile, stored in the GPU */
 +#endif
  
    GeglTileStorage *tile_storage; /* the buffer from which this tile was
 -                                  * retrieved needed for the tile to be able to
 -                                  * store itself back (for instance when it is
 -                                  * unreffed for the last time)
 +                                  * retrieved, needed for the tile to be able
 +                                  * to store itself back (for instance when it
 +                                  * is unreffed for the last time)
                                    */
    gint             x, y, z;
  
 +  guint            rev;          /* this tile's revision */
 +#if HAVE_GPU
 +  guint            gpu_rev;      /* this tile's GPU data revision */ 
 +#endif
 +  guint            stored_rev;   /* what revision was the tile when it was
 +                                  * committed to the tile_storage? (currently
 +                                  * set to 1 when loaded from disk)
 +                                  */
  
 -  guint            rev;         /* this tile revision */
 -  guint            stored_rev;  /* what revision was we when we from tile_storage?
 -                                   (currently set to 1 when loaded from disk */
 +  guint            read_locks;   /* number of times the tile is read-locked,
 +                                  * should in theory just have the values 0/1,
 +                                  * note that we might want to have shared
 +                                  * reads though (not yet implemented)
 +                                  */
 +  gchar            write_locks;  /* number of times the tile is write-locked,
 +                                  * should in theory just have the values 0/1
 +                                  */
 +  GeglTileLockMode lock_mode;
  
- #if ENABLE_MP
+   gchar            lock;        /* number of times the tile is write locked
+                                  * should in theory just have the values 0/1
+                                  */
+ #if ENABLE_MT
    GMutex          *mutex;
  #endif
  
@@@ -111,66 -62,57 +109,66 @@@
    gpointer         destroy_notify_data;
  };
  
- struct _GeglTileClass
- {
-   GObjectClass parent_class;
- };
- 
- GType           gegl_tile_get_type     (void) G_GNUC_CONST;
 -GeglTile   * gegl_tile_new        (gint     size);
 -GeglTile   * gegl_tile_new_bare   (void); /* special hack for linear bufs */
 -GeglTile   * gegl_tile_ref        (GeglTile *tile);
 -void         gegl_tile_unref      (GeglTile *tile);
 -void       * gegl_tile_get_format (GeglTile *tile);
  
- GeglTile       *gegl_tile_new          (gint width,
-                                         gint height,
-                                         const Babl *format);
++GeglTile      * gegl_tile_new_bare   (void);
++GeglTile      * gegl_tile_new        (gint width,
++                                      gint height,
++                                      const Babl *format);
++GeglTile      * gegl_tile_ref        (GeglTile *tile);
++void            gegl_tile_unref      (GeglTile *tile);
  
- Babl           *gegl_tile_get_format   (GeglTile *tile);
 -/* lock a tile for writing, this would allow writing to buffers
 - * later gotten with get_data()
 - */
 -void         gegl_tile_lock       (GeglTile *tile);
 -/* get a pointer to the linear buffer of the tile.
 - */
 -#define gegl_tile_get_data(tile)  ((guchar*)((tile)->data))
++Babl          * gegl_tile_get_format (GeglTile *tile);
 +gint            gegl_tile_get_width    (GeglTile *tile);
 +gint            gegl_tile_get_height   (GeglTile *tile);
  
 -/* unlock the tile notifying the tile that we're done manipulating
 - * the data.
 +/* lock a tile for access, this would allow access to buffers
 + * later gotten with get_data() or get_gpu_data()
   */
 -void         gegl_tile_unlock     (GeglTile *tile);
 +void            gegl_tile_lock         (GeglTile *tile,
 +                                        GeglTileLockMode lock_mode);
  
 +/* get a pointer to the linear buffer of the tile */
 +guchar         *gegl_tile_get_data     (GeglTile *tile);
  
 +#if HAVE_GPU
 +/* get a pointer to the GPU data of the tile */
 +GeglGpuTexture *gegl_tile_get_gpu_data (GeglTile *tile);
 +#endif
  
 -gboolean     gegl_tile_is_stored  (GeglTile *tile);
 -gboolean     gegl_tile_store      (GeglTile *tile);
 -void         gegl_tile_void       (GeglTile *tile);
 -GeglTile    *gegl_tile_dup        (GeglTile *tile);
 -
 -/* computes the positive integer remainder (also for negative dividends)
 +/* unlock the tile notifying the tile that we're done accessing
 + * the data
   */
 +void            gegl_tile_unlock       (GeglTile *tile);
 +
 +gboolean        gegl_tile_is_stored    (GeglTile *tile);
 +gboolean        gegl_tile_store        (GeglTile *tile);
 +void            gegl_tile_void         (GeglTile *tile);
 +GeglTile       *gegl_tile_dup          (GeglTile *tile);
 +
 +/* computes the positive integer remainder (also for negative dividends) */
  #define GEGL_REMAINDER(dividend, divisor) \
                     (((dividend) < 0) ? \
 -                    (divisor) - 1 - ((-((dividend) + 1)) % (divisor)) : \
 -                    (dividend) % (divisor))
 +                     (divisor) - 1 - ((-((dividend) + 1)) % (divisor)) : \
 +                     (dividend) % (divisor))
  
 -#define gegl_tile_offset(coordinate, stride) GEGL_REMAINDER((coordinate), (stride))
 +#define gegl_tile_offset(coordinate, stride) \
 +                   GEGL_REMAINDER ((coordinate), (stride))
  
  /* helper function to compute tile indices and offsets for coordinates
   * based on a tile stride (tile_width or tile_height)
   */
 -#define gegl_tile_indice(coordinate,stride) \
 -  (((coordinate) >= 0)?\
 -      (coordinate) / (stride):\
 -      ((((coordinate) + 1) /(stride)) - 1))
 +#define gegl_tile_index(coordinate, stride) \
 +                   (((coordinate) >= 0) ? \
 +                     (coordinate) / (stride) : \
 +                     ((((coordinate) + 1) / (stride)) - 1))
  
- /* utility low-level functions used by undo system */
- void            gegl_tile_swp          (GeglTile *a,
-                                         GeglTile *b);
- void            gegl_tile_cpy          (GeglTile *src,
-                                         GeglTile *dst);
+ /* utility low-level functions used by an undo system in horizon
+  * where the geglbufer originated, kept around in case they
+  * become useful again
+  */
+ void         gegl_tile_swp        (GeglTile *a,
+                                    GeglTile *b);
+ void         gegl_tile_cpy        (GeglTile *src,
+                                    GeglTile *dst);
  
 -#endif
 +#endif /* __GEGL_TILE_H__ */
diff --cc gegl/gegl-config.c
index b0f45a3,74a5365..87c16ad
--- a/gegl/gegl-config.c
+++ b/gegl/gegl-config.c
@@@ -37,9 -37,10 +37,11 @@@ enu
    PROP_SWAP,
    PROP_BABL_TOLERANCE,
    PROP_TILE_WIDTH,
 -  PROP_TILE_HEIGHT
 +  PROP_TILE_HEIGHT,
- 
 +  PROP_GPU_ENABLED
+ #if ENABLE_MT
+   ,PROP_THREADS
+ #endif
  };
  
  static void
@@@ -80,9 -81,10 +82,16 @@@ get_property (GObject    *gobject
          g_value_set_string (value, config->swap);
          break;
  
 +#if HAVE_GPU
 +      case PROP_GPU_ENABLED:
 +        g_value_set_boolean (value, config->gpu_enabled);
++        break;
++#endif
++
+ #if ENABLE_MT
+       case PROP_THREADS:
+         g_value_set_int (value, config->threads);
+         break;
  #endif
  
        default:
@@@ -133,11 -135,11 +142,16 @@@ set_property (GObject      *gobject
           g_free (config->swap);
          config->swap = g_value_dup_string (value);
          break;
 +#if HAVE_GPU
 +      case PROP_GPU_ENABLED:
 +        config->gpu_enabled = g_value_get_boolean (value);
 +        break;
 +#endif
+ #if ENABLE_MT
+       case PROP_THREADS:
+         config->threads = g_value_get_int (value);
 -        return;
++        break;
+ #endif
        default:
          G_OBJECT_WARN_INVALID_PROPERTY_ID (gobject, property_id, pspec);
          break;
@@@ -203,10 -205,12 +217,16 @@@ gegl_config_class_init (GeglConfigClas
                                     g_param_spec_string ("swap", "Swap", "where gegl stores it's swap files", NULL,
                                                       G_PARAM_READWRITE));
  
 +
 +  g_object_class_install_property (gobject_class, PROP_GPU_ENABLED,
 +                                   g_param_spec_string ("gpu-enabled", "GPU-support enabled", "whether or not GPU support is enabled", FALSE,
 +                                                     G_PARAM_READWRITE));
+ #if ENABLE_MT
+   g_object_class_install_property (gobject_class, PROP_TILE_HEIGHT,
+                                    g_param_spec_int ("threads", "Number of concurrent evaluation threads", "default tile height for created buffers.",
+                                                      0, 16, 2,
+                                                      G_PARAM_READWRITE));
+ #endif
  }
  
  static void
@@@ -218,7 -222,7 +238,10 @@@ gegl_config_init (GeglConfig *self
    self->chunk_size  = 512 * 512;
    self->tile_width  = 64;
    self->tile_height = 128;
 +#if HAVE_GPU
 +  self->gpu_enabled = FALSE;
 +#endif
+ #if ENABLE_MT
+   self->threads = 2;
+ #endif
  }
diff --cc gegl/gegl-config.h
index d2d39cc,f7bbd46..94506ab
--- a/gegl/gegl-config.h
+++ b/gegl/gegl-config.h
@@@ -44,10 -44,9 +44,13 @@@ struct _GeglConfi
    gdouble  babl_tolerance;
    gint     tile_width;
    gint     tile_height;
 +
 +#if HAVE_GPU
 +  gboolean gpu_enabled;
 +#endif
+ #if ENABLE_MT
+   gint     threads;
+ #endif
  };
  
  struct _GeglConfigClass
diff --cc gegl/gegl-init.c
index bf65a41,950f27a..c7f82fb
--- a/gegl/gegl-init.c
+++ b/gegl/gegl-init.c
@@@ -203,20 -197,17 +203,23 @@@ gegl_init (gint    *argc
  
    g_option_context_free (context);
  #endif
 +
 +#if HAVE_GPU
 +  if (config->gpu_enabled)
 +    gegl_gpu_init (argc, argv);
 +#endif
  }
  
 -static gchar   *cmd_gegl_swap=NULL;
 -static gchar   *cmd_gegl_cache_size=NULL;
 -static gchar   *cmd_gegl_chunk_size=NULL;
 -static gchar   *cmd_gegl_quality=NULL;
 -static gchar   *cmd_gegl_tile_size=NULL;
 -static gchar   *cmd_babl_tolerance =NULL;
 +static gchar *cmd_gegl_swap        = NULL;
 +static gchar *cmd_gegl_cache_size  = NULL;
 +static gchar *cmd_gegl_chunk_size  = NULL;
 +static gchar *cmd_gegl_quality     = NULL;
 +static gchar *cmd_gegl_tile_size   = NULL;
 +static gchar *cmd_babl_tolerance   = NULL;
 +static gchar *cmd_gegl_enable_gpu  = NULL;
+ #if ENABLE_MT
 -static gchar   *cmd_gegl_threads=NULL;
++static gchar *cmd_gegl_threads=NULL;
+ #endif
  
  static const GOptionEntry cmd_entries[]=
  {
@@@ -250,11 -241,13 +253,20 @@@
       G_OPTION_ARG_STRING, &cmd_gegl_quality,
       N_("The quality of rendering a value between 0.0(fast) and 1.0(reference)"), "<quality>"
      },
++#if HAVE_GPU
 +    {
 +     "gegl-enable-gpu", 0, 0,
 +     G_OPTION_ARG_STRING, &cmd_gegl_enable_gpu,
 +     N_("Whether or not GPU support is enabled"), "<true|false>"
 +    },
++#endif
+ #if ENABLE_MT
+     {
+      "gegl-threads", 0, 0,
+      G_OPTION_ARG_STRING, &cmd_gegl_threads,
+      N_("The number of concurrent processing threads to use."), "<threads>"
+     },
+ #endif
      { NULL }
  };
  
@@@ -301,12 -294,12 +313,16 @@@ GeglConfig *gegl_config (void
            if (str)
              config->tile_height = atoi(str+1);
          }
+ #if ENABLE_MT
+       if (g_getenv ("GEGL_THREADS"))
+         config->threads = atoi(g_getenv("GEGL_THREADS"));
+ #endif
        if (gegl_swap_dir())
          config->swap = g_strdup(gegl_swap_dir ());
 +#if HAVE_GPU
 +      if (g_getenv ("GEGL_ENABLE_GPU") != NULL)
 +        config->gpu_enabled = TRUE;
 +#endif
      }
    return GEGL_CONFIG (config);
  }
diff --cc operations/common/brightness-contrast.c
index f4ba671,ade7864..8f0f4f6
--- a/operations/common/brightness-contrast.c
+++ b/operations/common/brightness-contrast.c
@@@ -13,14 -13,9 +13,11 @@@
   * You should have received a copy of the GNU Lesser General Public
   * License along with GEGL; if not, see <http://www.gnu.org/licenses/>.
   *
 - * Copyright 2006 �yvind Kolås <pippin gimp org>
 + * Copyright 2006-2009 �yvind Kolås <pippin gimp org>
 + *           2009      daerd
   */
  
- #ifdef HAVE_GPU
- #include <GL/glew.h>
- #endif
 +
  #include "config.h"
  #include <glib/gi18n-lib.h>
  
@@@ -59,52 -54,8 +56,55 @@@ gegl_chant_double (brightness, _("Brigh
   * structure for our operation, as well as the needed code to register
   * our new gobject with GEGL.
   */
++#if HAVE_GPU
++#include <GL/glew.h>
++#endif
  #include "gegl-chant.h"
  
 +/* XXX: the amount of boiler plate to be able to write a sahder
 + * for the op needs to be reduced.
 + */
- #if 0
++#if HAVE_GPU
 +
 +#include "gegl-gpu-types.h"
 +#include "gegl-gpu-init.h"
 +
 +static gchar* shader_program_str = "                              \
 +uniform sampler2DRect pixels;                                     \
 +uniform float brightness, contrast;                               \
 +                                                                  \
 +void main()                                                       \
 +{                                                                 \
 +  vec4 pixel   = texture2DRect (pixels, gl_TexCoord[0].st);       \
 +  vec3 color   = (pixel.rgb - 0.5) * contrast + brightness + 0.5; \
 +  gl_FragColor = vec4 (color, pixel.a);                           \
 +}                                                                 ";
 +
 +static GLuint shader_program = 0;
 +static GLuint pixels_param;
 +static GLuint brightness_param;
 +static GLuint contrast_param;
 +
 +static GLuint
 +create_shader_program (void)
 +{
 +  GLuint shader = glCreateShader (GL_FRAGMENT_SHADER);
 +  GLuint program;
 +
 +  glShaderSource (shader, 1, &shader_program_str, NULL);
 +  glCompileShader (shader);
 +
 +  program = glCreateProgram ();
 +  glAttachShader (program, shader);
 +  glLinkProgram (program);
 +
 +  pixels_param = glGetUniformLocation (program, "pixels");
 +  brightness_param = glGetUniformLocation (program, "brightness");
 +  contrast_param = glGetUniformLocation (program, "contrast");
 +
 +  return program;
 +}
 +#endif
  
  /* prepare() is called on each operation providing data to a node that
   * is requested to provide a rendered result. When prepare is called
@@@ -157,79 -108,6 +157,79 @@@ process (GeglOperation       *op
    return TRUE;
  }
  
 +/* XXX: could this perhaps be done more generically in the baseclass,
 + * allowing the implementation to be much more minimal?
 + */
- #if 0
++#if HAVE_GPU
 +
 +static gboolean
 +process_gpu (GeglOperation       *op,
 +             GeglGpuTexture      *in,
 +             GeglGpuTexture      *out,
 +             glong                samples,
 +             const GeglRectangle *roi)
 +{
 +  /* Retrieve a pointer to GeglChantO structure which contains all the
 +   * chanted properties
 +   */
 +  GeglChantO *o          = GEGL_CHANT_PROPERTIES (op);
 +  gfloat      brightness = o->brightness;
 +  gfloat      contrast   = o->contrast;
 +
 +  /* attach *out* texture to offscreen framebuffer */
 +  glFramebufferTexture2DEXT (GL_FRAMEBUFFER_EXT, 
 +                             GL_COLOR_ATTACHMENT0_EXT,
 +                             GL_TEXTURE_RECTANGLE_ARB,
 +                             out->handle,
 +                             0);
 +
 +  /* set *out* texture as render target */
 +  glDrawBuffer (GL_COLOR_ATTACHMENT0_EXT);
 +
 +  /* create and register shader program, all shader programs will be deleted
 +   * after GEGL terminates
 +   */
 +  if (shader_program == 0)
 +    {
 +      shader_program = create_shader_program ();
 +      gegl_gpu_register_shader_program (shader_program);
 +    }
 +  glUseProgram (shader_program);
 +
 +  /* setup shader parameters */
 +  glActiveTexture (GL_TEXTURE0);
 +  glBindTexture (GL_TEXTURE_RECTANGLE_ARB, in->handle);
 +  glUniform1i (pixels_param, 0);
 +  glUniform1f (brightness_param, brightness);
 +  glUniform1f (contrast_param, contrast);
 +
 +  /* viewport transform for 1:1 pixel=texel=data mapping */
 +  glMatrixMode (GL_PROJECTION);
 +  glLoadIdentity ();
 +  gluOrtho2D (0.0, roi->width, 0.0, roi->height);
 +  glMatrixMode (GL_MODELVIEW);
 +  glLoadIdentity ();
 +  glViewport (0, 0, roi->width, roi->height);
 +
 +  /* make quad filled to hit every pixel/texel */
 +  glPolygonMode (GL_FRONT, GL_FILL);
 +
 +  /* and render quad */
 +  glBegin (GL_QUADS);
 +    glTexCoord2f (0.0, 0.0);
 +    glVertex2f (0.0, 0.0);
 +    glTexCoord2f (roi->width, 0.0);
 +    glVertex2f (roi->width, 0.0);
 +    glTexCoord2f (roi->width, roi->height);
 +    glVertex2f (roi->width, roi->height);
 +    glTexCoord2f (0.0, roi->height);
 +    glVertex2f (0.0, roi->height);
 +  glEnd ();
 +
 +  return TRUE;
 +}
 +
 +#endif
  
  #ifdef HAS_G4FLOAT
  /* The compiler supports vector extensions allowing an version of
@@@ -305,11 -183,6 +305,11 @@@ gegl_chant_class_init (GeglChantClass *
    gegl_operation_class_add_processor (operation_class,
                                        G_CALLBACK (process_simd), "simd");
  #endif
- #if 0
++#if HAVE_GPU
 +  gegl_operation_class_add_processor (operation_class,
 +                                      G_CALLBACK (process_gpu),
 +                                      "gpu:reference");
 +#endif
  }
  
  #endif /* closing #ifdef GEGL_CHANT_PROPERTIES ... else ... */



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]