[gegl/wip/pippin/per-thread-buffer-iterator] gegl: use gegl_buffer_iterator per thread for point-ops



commit 6f68850595518ef872b21ac88ae44cd3de11b6ef
Author: Øyvind Kolås <pippin gimp org>
Date:   Thu Sep 28 00:48:34 2017 +0200

    gegl: use gegl_buffer_iterator per thread for point-ops
    
    When originally implemented, threading on the buffers provided by a
    buffer iterator, keeping fishes locally, was faster than iterators
    stepping on each others locks. Now however lock contention in geglbuffer
    has improved and we can use an iterator per thread.
    
    This is a code duplication from each of the parent classes, needed to
    keep per base-class opencl dispatch.

 gegl/operation/gegl-operation-point-composer.c  |  199 +++++++-----------
 gegl/operation/gegl-operation-point-composer3.c |  255 ++++++++---------------
 gegl/operation/gegl-operation-point-filter.c    |  161 +++++++--------
 3 files changed, 233 insertions(+), 382 deletions(-)
---
diff --git a/gegl/operation/gegl-operation-point-composer.c b/gegl/operation/gegl-operation-point-composer.c
index 37268bf..2d745f0 100644
--- a/gegl/operation/gegl-operation-point-composer.c
+++ b/gegl/operation/gegl-operation-point-composer.c
@@ -40,54 +40,44 @@ typedef struct ThreadData
 {
   GeglOperationPointComposerClass *klass;
   GeglOperation                   *operation;
-  guchar                          *input;
-  guchar                          *aux;
-  guchar                          *output;
+  GeglBuffer                      *input;
+  GeglBuffer                      *aux;
+  GeglBuffer                      *output;
   gint                            *pending;
   gint                            *started;
   gint                             level;
   gboolean                         success;
-  GeglRectangle                    roi;
-
-  guchar                          *in_tmp;
-  guchar                          *aux_tmp;
-  guchar                          *output_tmp;
-  const Babl *input_fish;
-  const Babl *aux_fish;
-  const Babl *output_fish;
+  GeglRectangle                    result;
+  const Babl *input_format;
+  const Babl *aux_format;
+  const Babl *output_format;
 } ThreadData;
 
 static void thread_process (gpointer thread_data, gpointer unused)
 {
   ThreadData *data = thread_data;
 
-  guchar *input = data->input;
-  guchar *aux = data->aux;
-  guchar *output = data->output;
-  glong samples = data->roi.width * data->roi.height;
-
-  if (data->input_fish && input)
-    {
-      babl_process (data->input_fish, data->input, data->in_tmp, samples);
-      input = data->in_tmp;
-    }
-  if (data->aux_fish && aux)
-    {
-      babl_process (data->aux_fish, data->aux, data->aux_tmp, samples);
-      aux = data->aux_tmp;
-    }
-  if (data->output_fish)
-    output = data->output_tmp;
-
-  if (!data->klass->process (data->operation,
-                       input, aux,
-                       output, samples,
-                       &data->roi, data->level))
-    data->success = FALSE;
-  
-  if (data->output_fish)
-    babl_process (data->output_fish, data->output_tmp, data->output, samples);
-
+  GeglBufferIterator *i = gegl_buffer_iterator_new (data->output,
+                                                    &data->result,
+                                                    data->level,
+                                                    data->output_format,
+                                                    GEGL_ACCESS_WRITE,
+                                                    GEGL_ABYSS_NONE);
+   gint foo = 0, read = 0;
+
+   if (data->input)
+     read = gegl_buffer_iterator_add (i, data->input, &data->result, data->level, data->input_format, 
GEGL_ACCESS_READ, GEGL_ABYSS_NONE);
+   if (data->aux)
+     foo = gegl_buffer_iterator_add (i, data->aux, &data->result, data->level, data->aux_format, 
GEGL_ACCESS_READ, GEGL_ABYSS_NONE);
+
+   while (gegl_buffer_iterator_next (i))
+     {
+       data->klass->process (data->operation, data->input?i->data[read]:NULL,
+                             data->aux?i->data[foo]:NULL,
+                             i->data[0], i->length, &(i->roi[0]), data->level);
+     }
+
+  data->success = TRUE;
   g_atomic_int_add (data->pending, -1);
 }
 
@@ -313,9 +303,6 @@ gegl_operation_point_composer_process (GeglOperation       *operation,
 
   if ((result->width > 0) && (result->height > 0))
     {
-      const Babl *in_buf_format  = input?gegl_buffer_get_format(input):NULL;
-      const Babl *aux_buf_format = aux?gegl_buffer_get_format(aux):NULL;
-      const Babl *output_buf_format = output?gegl_buffer_get_format(output):NULL;
 
       if (gegl_operation_use_opencl (operation) && (operation_class->cl_data || 
point_composer_class->cl_process))
         {
@@ -328,105 +315,73 @@ gegl_operation_point_composer_process (GeglOperation       *operation,
         gint threads = gegl_config_threads ();
         GThreadPool *pool = thread_pool ();
         ThreadData thread_data[GEGL_MAX_THREADS];
-        GeglBufferIterator *i = gegl_buffer_iterator_new (output, result, level, output_buf_format,
-                                                          GEGL_ACCESS_WRITE, GEGL_ABYSS_NONE);
-        gint foo = 0, read = 0;
-
-        gint in_bpp = input?babl_format_get_bytes_per_pixel (in_format):0;
-        gint aux_bpp = aux?babl_format_get_bytes_per_pixel (aux_format):0;
-        gint out_bpp = babl_format_get_bytes_per_pixel (out_format);
-
-        gint in_buf_bpp = input?babl_format_get_bytes_per_pixel (in_buf_format):0;
-        gint aux_buf_bpp = aux?babl_format_get_bytes_per_pixel (aux_buf_format):0;
-        gint out_buf_bpp = babl_format_get_bytes_per_pixel (output_buf_format);
-        gint temp_id = 0;
-
-        if (input)
-        {
-          read = gegl_buffer_iterator_add (i, input, result, level, in_buf_format,
-                                           GEGL_ACCESS_READ, GEGL_ABYSS_NONE);
-          for (gint j = 0; j < threads; j ++)
-          {
-            if (in_buf_format != in_format)
-            {
-              thread_data[j].input_fish = babl_fish (in_buf_format, in_format);
-              thread_data[j].in_tmp = gegl_temp_buffer (temp_id++, in_bpp * output->tile_storage->tile_width 
* output->tile_storage->tile_height);
-            }
-            else
-            {
-              thread_data[j].input_fish = NULL;
-            }
-          }
-        }
-        else
-          for (gint j = 0; j < threads; j ++)
-            thread_data[j].input_fish = NULL;
-        if (aux)
-        {
-          foo = gegl_buffer_iterator_add (i, aux, result, level, aux_buf_format,
-                                          GEGL_ACCESS_READ, GEGL_ABYSS_NONE);
-          for (gint j = 0; j < threads; j ++)
-          {
-            if (aux_buf_format != aux_format)
-            {
-              thread_data[j].aux_fish = babl_fish (aux_buf_format, aux_format);
-              thread_data[j].aux_tmp = gegl_temp_buffer (temp_id++, aux_bpp * 
output->tile_storage->tile_width * output->tile_storage->tile_height);
-            }
-            else
-            {
-              thread_data[j].aux_fish = NULL;
-            }
-          }
-        }
-        else
-        {
-          for (gint j = 0; j < threads; j ++)
-            thread_data[j].aux_fish = NULL;
-        }
 
         for (gint j = 0; j < threads; j ++)
-        {
-          if (output_buf_format != gegl_buffer_get_format (output))
-          {
-            thread_data[j].output_fish = babl_fish (out_format, output_buf_format);
-            thread_data[j].output_tmp = gegl_temp_buffer (temp_id++, out_bpp * 
output->tile_storage->tile_width * output->tile_storage->tile_height);
-          }
-          else
           {
-            thread_data[j].output_fish = NULL;
+            thread_data[j].input = input;
+            thread_data[j].aux = aux;
+            thread_data[j].output = output;
+            thread_data[j].input_format = in_format;
+            thread_data[j].aux_format = aux_format;
+            thread_data[j].output_format = out_format;
           }
-        }
 
-        while (gegl_buffer_iterator_next (i))
           {
-            gint threads = gegl_config_threads ();
             gint pending;
             gint bit;
 
-            if (i->roi[0].height < threads)
+            if (result->height < threads)
             {
-              threads = i->roi[0].height;
+              threads = result->height;
+            }
+            if (result->width < threads)
+            {
+              threads = result->width;
             }
 
-            bit = i->roi[0].height / threads;
-            pending = threads;
-
-            for (gint j = 0; j < threads; j++)
+            if (result->height > result->width)
             {
-              thread_data[j].roi.x = (i->roi[0]).x;
-              thread_data[j].roi.width = (i->roi[0]).width;
-              thread_data[j].roi.y = (i->roi[0]).y + bit * j;
-              thread_data[j].roi.height = bit;
+              bit = result->height / threads;
+              {
+                gint tbit = bit / output->tile_storage->tile_height;
+                tbit = tbit * output->tile_storage->tile_height;
+                if (tbit > bit * 0.66) /* to avoid making the last much bigger */
+                  bit = tbit;
+              }
+
+              pending = threads;
+              for (gint j = 0; j < threads; j++)
+              {
+                thread_data[j].result = *result;
+                thread_data[j].result.y = result->y + bit * j;
+                thread_data[j].result.height = bit;
+              }
+              thread_data[threads-1].result.height = result->height - (bit * (threads-1));
             }
-            thread_data[threads-1].roi.height = i->roi[0].height - (bit * (threads-1));
-            
+            else
+            {
+              bit = result->width / threads;
+              {
+                gint tbit = bit / output->tile_storage->tile_width;
+                tbit = tbit * output->tile_storage->tile_width;
+                if (tbit > bit * 0.66) /* to avoid making the last much bigger */
+                  bit = tbit;
+              }
+
+              pending = threads;
+              for (gint j = 0; j < threads; j++)
+              {
+                thread_data[j].result = *result;
+                thread_data[j].result.y = result->x + bit * j;
+                thread_data[j].result.width = bit;
+              }
+              thread_data[threads-1].result.width = result->width - (bit * (threads-1));
+            }
+
             for (gint j = 0; j < threads; j++)
             {
               thread_data[j].klass = point_composer_class;
               thread_data[j].operation = operation;
-              thread_data[j].input = input?((guchar*)i->data[read]) + (bit * j * i->roi[0].width * 
in_buf_bpp):NULL;
-              thread_data[j].aux = aux?((guchar*)i->data[foo]) + (bit * j * i->roi[0].width * 
aux_buf_bpp):NULL;
-              thread_data[j].output = ((guchar*)i->data[0]) + (bit * j * i->roi[0].width * out_buf_bpp);
               thread_data[j].pending = &pending;
               thread_data[j].level = level;
               thread_data[j].success = TRUE;
diff --git a/gegl/operation/gegl-operation-point-composer3.c b/gegl/operation/gegl-operation-point-composer3.c
index 3f352de..7becdef 100644
--- a/gegl/operation/gegl-operation-point-composer3.c
+++ b/gegl/operation/gegl-operation-point-composer3.c
@@ -36,63 +36,49 @@ typedef struct ThreadData
 {
   GeglOperationPointComposer3Class *klass;
   GeglOperation                    *operation;
-  guchar                           *input;
-  guchar                           *aux;
-  guchar                           *aux2;
-  guchar                           *output;
+  GeglBuffer                       *input;
+  GeglBuffer                       *aux;
+  GeglBuffer                       *aux2;
+  GeglBuffer                       *output;
   gint                             *pending;
   gint                             *started;
   gint                              level;
   gboolean                          success;
-  GeglRectangle                     roi;
-
-  guchar                           *in_tmp;
-  guchar                           *aux_tmp;
-  guchar                           *aux2_tmp;
-  guchar                           *output_tmp;
-  const Babl *input_fish;
-  const Babl *aux_fish;
-  const Babl *aux2_fish;
-  const Babl *output_fish;
+  GeglRectangle                     result;
+  const Babl *input_format;
+  const Babl *aux_format;
+  const Babl *aux2_format;
+  const Babl *output_format;
 } ThreadData;
 
 static void thread_process (gpointer thread_data, gpointer unused)
 {
   ThreadData *data = thread_data;
 
-  guchar *input = data->input;
-  guchar *aux = data->aux;
-  guchar *aux2 = data->aux2;
-  guchar *output = data->output;
-  glong samples = data->roi.width * data->roi.height;
-
-  if (data->input_fish && input)
-    {
-      babl_process (data->input_fish, data->input, data->in_tmp, samples);
-      input = data->in_tmp;
-    }
-  if (data->aux_fish && aux)
-    {
-      babl_process (data->aux_fish, data->aux, data->aux_tmp, samples);
-      aux = data->aux_tmp;
-    }
-  if (data->aux2_fish && aux2)
-    {
-      babl_process (data->aux2_fish, data->aux2, data->aux2_tmp, samples);
-      aux2 = data->aux2_tmp;
-    }
-  if (data->output_fish)
-    output = data->output_tmp;
-
-  if (!data->klass->process (data->operation,
-                       input, aux, aux2, 
-                       output, samples,
-                       &data->roi, data->level))
-    data->success = FALSE;
-  
-  if (data->output_fish)
-    babl_process (data->output_fish, data->output_tmp, data->output, samples);
-
+  GeglBufferIterator *i = gegl_buffer_iterator_new (data->output,
+                                                    &data->result,
+                                                    data->level,
+                                                    data->output_format,
+                                                    GEGL_ACCESS_WRITE,
+                                                    GEGL_ABYSS_NONE);
+   gint foo = 0, bar = 0, read = 0;
+
+   if (data->input)
+     read = gegl_buffer_iterator_add (i, data->input, &data->result, data->level, data->input_format, 
GEGL_ACCESS_READ, GEGL_ABYSS_NONE);
+   if (data->aux)
+     foo = gegl_buffer_iterator_add (i, data->aux, &data->result, data->level, data->aux_format, 
GEGL_ACCESS_READ, GEGL_ABYSS_NONE);
+   if (data->aux2)
+     bar= gegl_buffer_iterator_add (i, data->aux2, &data->result, data->level, data->aux2_format, 
GEGL_ACCESS_READ, GEGL_ABYSS_NONE);
+
+   while (gegl_buffer_iterator_next (i))
+     {
+       data->klass->process (data->operation, data->input?i->data[read]:NULL,
+                             data->aux?i->data[foo]:NULL,
+                             data->aux2?i->data[bar]:NULL,
+                             i->data[0], i->length, &(i->roi[0]), data->level);
+     }
+
+  data->success = TRUE;
   g_atomic_int_add (data->pending, -1);
 }
 
@@ -234,157 +220,80 @@ gegl_operation_point_composer3_process (GeglOperation       *operation,
 
   if ((result->width > 0) && (result->height > 0))
     {
-      const Babl *in_buf_format  = input?gegl_buffer_get_format(input):NULL;
-      const Babl *aux_buf_format = aux?gegl_buffer_get_format(aux):NULL;
-      const Babl *aux2_buf_format = aux2?gegl_buffer_get_format(aux2):NULL;
-      const Babl *output_buf_format = output?gegl_buffer_get_format(output):NULL;
-
       if (gegl_operation_use_threading (operation, result) && result->height > 1)
       {
         gint threads = gegl_config_threads ();
         GThreadPool *pool = thread_pool ();
         ThreadData thread_data[GEGL_MAX_THREADS];
-        GeglBufferIterator *i = gegl_buffer_iterator_new (output, result, level, output_buf_format, 
GEGL_ACCESS_WRITE, GEGL_ABYSS_NONE);
-        gint foo = 0, bar = 0, read = 0;
-
-        gint in_bpp = input?babl_format_get_bytes_per_pixel (in_format):0;
-        gint aux_bpp = aux?babl_format_get_bytes_per_pixel (aux_format):0;
-        gint aux2_bpp = aux2?babl_format_get_bytes_per_pixel (aux2_format):0;
-        gint out_bpp = babl_format_get_bytes_per_pixel (out_format);
-
-        gint in_buf_bpp = input?babl_format_get_bytes_per_pixel (in_buf_format):0;
-        gint aux_buf_bpp = aux?babl_format_get_bytes_per_pixel (aux_buf_format):0;
-        gint aux2_buf_bpp = aux2?babl_format_get_bytes_per_pixel (aux2_buf_format):0;
-        gint out_buf_bpp = babl_format_get_bytes_per_pixel (output_buf_format);
 
-        gint temp_id = 0;
-
-        if (input)
-        {
-          if (! babl_format_has_alpha (in_buf_format))
-            {
-              in_buf_format = in_format;
-              in_buf_bpp = in_bpp;
-            }
-
-          read = gegl_buffer_iterator_add (i, input, result, level, in_buf_format, GEGL_ACCESS_READ, 
GEGL_ABYSS_NONE);
-          for (gint j = 0; j < threads; j ++)
+        for (gint j = 0; j < threads; j ++)
           {
-            if (in_buf_format != in_format)
-            {
-              thread_data[j].input_fish = babl_fish (in_buf_format, in_format);
-              thread_data[j].in_tmp = gegl_temp_buffer (temp_id++, in_bpp * output->tile_storage->tile_width 
* output->tile_storage->tile_height);
-            }
-            else
-            {
-              thread_data[j].input_fish = NULL;
-            }
+            thread_data[j].input = input;
+            thread_data[j].aux = aux;
+            thread_data[j].aux2 = aux2;
+            thread_data[j].output = output;
+            thread_data[j].input_format = in_format;
+            thread_data[j].aux_format = aux_format;
+            thread_data[j].aux2_format = aux2_format;
+            thread_data[j].output_format = out_format;
           }
-        }
-        else
-          for (gint j = 0; j < threads; j ++)
-            thread_data[j].input_fish = NULL;
-        if (aux)
-        {
-          if (! babl_format_has_alpha (aux_buf_format))
-            {
-              aux_buf_format = aux_format;
-              aux_buf_bpp = aux_bpp;
-            }
 
-          foo = gegl_buffer_iterator_add (i, aux, result, level, aux_buf_format,
-                                          GEGL_ACCESS_READ, GEGL_ABYSS_NONE);
-          for (gint j = 0; j < threads; j ++)
           {
-            if (aux_buf_format != aux_format)
-            {
-              thread_data[j].aux_fish = babl_fish (aux_buf_format, aux_format);
-              thread_data[j].aux_tmp = gegl_temp_buffer (temp_id++, aux_bpp * 
output->tile_storage->tile_width * output->tile_storage->tile_height);
-            }
-            else
+            gint pending;
+            gint bit;
+
+            if (result->height < threads)
             {
-              thread_data[j].aux_fish = NULL;
+              threads = result->height;
             }
-          }
-        }
-        else
-        {
-          for (gint j = 0; j < threads; j ++)
-            thread_data[j].aux_fish = NULL;
-        }
-        if (aux2)
-        {
-          if (! babl_format_has_alpha (aux2_buf_format))
+            if (result->width < threads)
             {
-              aux2_buf_format = aux2_format;
-              aux2_buf_bpp = aux2_bpp;
+              threads = result->width;
             }
 
-          bar = gegl_buffer_iterator_add (i, aux2, result, level, aux2_buf_format,
-                                          GEGL_ACCESS_READ, GEGL_ABYSS_NONE);
-          for (gint j = 0; j < threads; j ++)
-          {
-            if (aux2_buf_format != aux2_format)
+            if (result->height > result->width)
             {
-              thread_data[j].aux2_fish = babl_fish (aux2_buf_format, aux2_format);
-              thread_data[j].aux2_tmp = gegl_temp_buffer (temp_id++, aux2_bpp * 
output->tile_storage->tile_width * output->tile_storage->tile_height);
+              bit = result->height / threads;
+              {
+                gint tbit = bit / output->tile_storage->tile_height;
+                tbit = tbit * output->tile_storage->tile_height;
+                if (tbit > bit * 0.66) /* to avoid making the last much bigger */
+                  bit = tbit;
+              }
+
+              pending = threads;
+              for (gint j = 0; j < threads; j++)
+              {
+                thread_data[j].result = *result;
+                thread_data[j].result.y = result->y + bit * j;
+                thread_data[j].result.height = bit;
+              }
+              thread_data[threads-1].result.height = result->height - (bit * (threads-1));
             }
             else
             {
-              thread_data[j].aux2_fish = NULL;
-            }
-          }
-        }
-        else
-        {
-          for (gint j = 0; j < threads; j ++)
-            thread_data[j].aux2_fish = NULL;
-        }
-
-        for (gint j = 0; j < threads; j ++)
-        {
-          if (output_buf_format != gegl_buffer_get_format (output))
-          {
-            thread_data[j].output_fish = babl_fish (out_format, output_buf_format);
-            thread_data[j].output_tmp = gegl_temp_buffer (temp_id++, out_bpp * 
output->tile_storage->tile_width * output->tile_storage->tile_height);
-          }
-          else
-          {
-            thread_data[j].output_fish = NULL;
-          }
-        }
-
-        while (gegl_buffer_iterator_next (i))
-          {
-            gint threads = gegl_config_threads ();
-            gint pending;
-            gint bit;
-
-            if (i->roi[0].height < threads)
-            {
-              threads = i->roi[0].height;
+              bit = result->width / threads;
+              {
+                gint tbit = bit / output->tile_storage->tile_width;
+                tbit = tbit * output->tile_storage->tile_width;
+                if (tbit > bit * 0.66) /* to avoid making the last much bigger */
+                  bit = tbit;
+              }
+
+              pending = threads;
+              for (gint j = 0; j < threads; j++)
+              {
+                thread_data[j].result = *result;
+                thread_data[j].result.y = result->x + bit * j;
+                thread_data[j].result.width = bit;
+              }
+              thread_data[threads-1].result.width = result->width - (bit * (threads-1));
             }
 
-            bit = i->roi[0].height / threads;
-            pending = threads;
-
-            for (gint j = 0; j < threads; j++)
-            {
-              thread_data[j].roi.x = (i->roi[0]).x;
-              thread_data[j].roi.width = (i->roi[0]).width;
-              thread_data[j].roi.y = (i->roi[0]).y + bit * j;
-              thread_data[j].roi.height = bit;
-            }
-            thread_data[threads-1].roi.height = i->roi[0].height - (bit * (threads-1));
-            
             for (gint j = 0; j < threads; j++)
             {
               thread_data[j].klass = point_composer3_class;
               thread_data[j].operation = operation;
-              thread_data[j].input = input?((guchar*)i->data[read]) + (bit * j * i->roi[0].width * 
in_buf_bpp):NULL;
-              thread_data[j].aux = aux?((guchar*)i->data[foo]) + (bit * j * i->roi[0].width * 
aux_buf_bpp):NULL;
-              thread_data[j].aux2 = aux2?((guchar*)i->data[bar]) + (bit * j * i->roi[0].width * 
aux2_buf_bpp):NULL;
-              thread_data[j].output = ((guchar*)i->data[0]) + (bit * j * i->roi[0].width * out_buf_bpp);
               thread_data[j].pending = &pending;
               thread_data[j].level = level;
               thread_data[j].success = TRUE;
diff --git a/gegl/operation/gegl-operation-point-filter.c b/gegl/operation/gegl-operation-point-filter.c
index 49679d0..213992d 100644
--- a/gegl/operation/gegl-operation-point-filter.c
+++ b/gegl/operation/gegl-operation-point-filter.c
@@ -39,45 +39,41 @@
 typedef struct ThreadData
 {
   GeglOperationPointFilterClass *klass;
-  GeglOperation                   *operation;
-  guchar                          *input;
-  guchar                          *output;
-  gint                            *pending;
-  gint                            *started;
-  gint                             level;
-  gboolean                         success;
-  GeglRectangle                    roi;
-
-  guchar                          *in_tmp;
-  guchar                          *output_tmp;
-  const Babl *input_fish;
-  const Babl *output_fish;
+  GeglOperation                 *operation;
+  GeglBuffer                    *input;
+  GeglBuffer                    *output;
+  GeglRectangle                  result;
+  gint                           level;
+  gint                         *pending;
+  gboolean                       success;
+  const Babl                    *in_format;
+  const Babl                    *out_format;
 } ThreadData;
 
 static void thread_process (gpointer thread_data, gpointer unused)
 {
   ThreadData *data = thread_data;
 
-  guchar *input = data->input;
-  guchar *output = data->output;
-  glong samples = data->roi.width * data->roi.height;
+  GeglBufferIterator *i = gegl_buffer_iterator_new (data->output,
+                                                    &data->result,
+                                                    data->level,
+                                                    data->out_format,
+                                                    GEGL_ACCESS_WRITE,
+                                                    GEGL_ABYSS_NONE);
+  gint read = 0;
 
-  if (data->input_fish && input)
+  if (data->input)
+    read = gegl_buffer_iterator_add (i, data->input, &data->result, data->level, 
+                                     data->in_format,
+                                     GEGL_ACCESS_READ, GEGL_ABYSS_NONE);
+
+  while (gegl_buffer_iterator_next (i))
     {
-      babl_process (data->input_fish, data->input, data->in_tmp, samples);
-      input = data->in_tmp;
+       data->klass->process (data->operation, data->input?i->data[read]:NULL,
+                             i->data[0], i->length, &(i->roi[0]), data->level);
     }
-  if (data->output_fish)
-    output = data->output_tmp;
-
-  if (!data->klass->process (data->operation,
-                       input, 
-                       output, samples,
-                       &data->roi, data->level))
-    data->success = FALSE;
-  
-  if (data->output_fish)
-    babl_process (data->output_fish, data->output_tmp, data->output, samples);
+
+  data->success = TRUE;
 
   g_atomic_int_add (data->pending, -1);
 }
@@ -251,7 +247,7 @@ error:
 static void
 gegl_operation_point_filter_class_init (GeglOperationPointFilterClass *klass)
 {
-  GeglOperationClass          *operation_class = GEGL_OPERATION_CLASS (klass);
+  GeglOperationClass       *operation_class = GEGL_OPERATION_CLASS (klass);
   GeglOperationFilterClass *filter_class  = GEGL_OPERATION_FILTER_CLASS (klass);
 
   filter_class->process = gegl_operation_point_filter_process;
@@ -283,9 +279,6 @@ gegl_operation_point_filter_process (GeglOperation       *operation,
 
   if ((result->width > 0) && (result->height > 0))
     {
-      const Babl *in_buf_format  = input?gegl_buffer_get_format(input):NULL;
-      const Babl *output_buf_format = output?gegl_buffer_get_format(output):NULL;
-
       if (gegl_operation_use_opencl (operation) && (operation_class->cl_data || 
point_filter_class->cl_process))
       {
         if (gegl_operation_point_filter_cl_process (operation, input, output, result, level))
@@ -297,77 +290,71 @@ gegl_operation_point_filter_process (GeglOperation       *operation,
         gint threads = gegl_config_threads ();
         GThreadPool *pool = thread_pool ();
         ThreadData thread_data[GEGL_MAX_THREADS];
-        GeglBufferIterator *i = gegl_buffer_iterator_new (output, result, level, output_buf_format, 
GEGL_ACCESS_WRITE, GEGL_ABYSS_NONE);
-        gint read = 0;
-
-        gint in_bpp = input?babl_format_get_bytes_per_pixel (in_format):0;
-        gint out_bpp = babl_format_get_bytes_per_pixel (out_format);
-        gint in_buf_bpp = input?babl_format_get_bytes_per_pixel (in_buf_format):0;
-        gint out_buf_bpp = babl_format_get_bytes_per_pixel (output_buf_format);
-        gint temp_id = 0;
-
-        if (input)
-        {
-          read = gegl_buffer_iterator_add (i, input, result, level, in_buf_format, GEGL_ACCESS_READ, 
GEGL_ABYSS_NONE);
-          for (gint j = 0; j < threads; j ++)
-          {
-            if (in_buf_format != in_format)
-            {
-              thread_data[j].input_fish = babl_fish (in_buf_format, in_format);
-              thread_data[j].in_tmp = gegl_temp_buffer (temp_id++, in_bpp * output->tile_storage->tile_width 
* output->tile_storage->tile_height);
-            }
-            else
-            {
-              thread_data[j].input_fish = NULL;
-            }
-          }
-        }
-        else
-          for (gint j = 0; j < threads; j ++)
-            thread_data[j].input_fish = NULL;
 
         for (gint j = 0; j < threads; j ++)
-        {
-          if (output_buf_format != gegl_buffer_get_format (output))
-          {
-            thread_data[j].output_fish = babl_fish (out_format, output_buf_format);
-            thread_data[j].output_tmp = gegl_temp_buffer (temp_id++, out_bpp * 
output->tile_storage->tile_width * output->tile_storage->tile_height);
-          }
-          else
           {
-            thread_data[j].output_fish = NULL;
+            thread_data[j].input = input;
+            thread_data[j].output = output;
+            thread_data[j].in_format = in_format;
+            thread_data[j].out_format = out_format;
           }
-        }
 
-        while (gegl_buffer_iterator_next (i))
           {
-            gint threads = gegl_config_threads ();
             gint pending;
             gint bit;
 
-            if (i->roi[0].height < threads)
+            if (result->height < threads)
             {
-              threads = i->roi[0].height;
+              threads = result->height;
+            }
+            if (result->width < threads)
+            {
+              threads = result->width;
             }
 
-            bit = i->roi[0].height / threads;
-            pending = threads;
-
-            for (gint j = 0; j < threads; j++)
+            if (result->height > result->width)
             {
-              thread_data[j].roi.x = (i->roi[0]).x;
-              thread_data[j].roi.width = (i->roi[0]).width;
-              thread_data[j].roi.y = (i->roi[0]).y + bit * j;
-              thread_data[j].roi.height = bit;
+              bit = result->height / threads;
+              {
+                gint tbit = bit / output->tile_storage->tile_height;
+                tbit = tbit * output->tile_storage->tile_height;
+                if (tbit > bit * 0.66) /* to avoid making the last much bigger */
+                  bit = tbit;
+              }
+
+              pending = threads;
+              for (gint j = 0; j < threads; j++)
+              {
+                thread_data[j].result = *result;
+                thread_data[j].result.y = result->y + bit * j;
+                thread_data[j].result.height = bit;
+              }
+              thread_data[threads-1].result.height = result->height - (bit * (threads-1));
             }
-            thread_data[threads-1].roi.height = i->roi[0].height - (bit * (threads-1));
-            
+            else
+            {
+              bit = result->width / threads;
+              {
+                gint tbit = bit / output->tile_storage->tile_width;
+                tbit = tbit * output->tile_storage->tile_width;
+                if (tbit > bit * 0.66) /* to avoid making the last much bigger */
+                  bit = tbit;
+              }
+
+              pending = threads;
+              for (gint j = 0; j < threads; j++)
+              {
+                thread_data[j].result = *result;
+                thread_data[j].result.y = result->x + bit * j;
+                thread_data[j].result.width = bit;
+              }
+              thread_data[threads-1].result.width = result->width - (bit * (threads-1));
+            }
+
             for (gint j = 0; j < threads; j++)
             {
               thread_data[j].klass = point_filter_class;
               thread_data[j].operation = operation;
-              thread_data[j].input = input?((guchar*)i->data[read]) + (bit * j * i->roi[0].width * 
in_buf_bpp):NULL;
-              thread_data[j].output = ((guchar*)i->data[0]) + (bit * j * i->roi[0].width * out_buf_bpp);
               thread_data[j].pending = &pending;
               thread_data[j].level = level;
               thread_data[j].success = TRUE;


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]