[babl] Move chuking of stack allocated buffer to innermost loop



commit 0230c19d5258bbd1886373de6c621d04f275c603
Author: Ãyvind KolÃs <pippin gimp org>
Date:   Wed Nov 14 01:36:50 2012 +0100

    Move chuking of stack allocated buffer to innermost loop
    
    Moves the logic to the only place where it is needed, speeding up the other,
    more likely and hopeful code path by not needing to do the conditionals at
    all.

 babl/babl-fish-path.c |  149 ++++++++++++++++++++++++++++++++++++++----------
 babl/babl-list.h      |    5 +-
 2 files changed, 121 insertions(+), 33 deletions(-)
---
diff --git a/babl/babl-fish-path.c b/babl/babl-fish-path.c
index f7ab32a..0166608 100644
--- a/babl/babl-fish-path.c
+++ b/babl/babl-fish-path.c
@@ -25,7 +25,9 @@
 #define BABL_HARD_MAX_PATH_LENGTH  8
 #define BABL_MAX_NAME_LEN          1024
 
+#ifndef MIN
 #define MIN(a, b) (((a) > (b)) ? (b) : (a))
+#endif
 
 #define NUM_TEST_PIXELS            3072
 
@@ -68,10 +70,13 @@ get_path_instrumentation (FishPathInstrumentation *fpi,
                           double                  *ref_cost,
                           double                  *path_error);
 
+
 static long
 process_conversion_path (BablList   *path,
                          const void *source_buffer,
+                         int         source_bpp,
                          void       *destination_buffer,
+                         int         dest_bpp,
                          long        n);
 
 static void
@@ -177,6 +182,10 @@ get_conversion_path (PathContext *pc,
         {
           FishPathInstrumentation fpi;
           memset (&fpi, 0, sizeof (fpi));
+
+          fpi.source = current_format;
+          fpi.destination = pc->to_format;
+
           get_path_instrumentation (&fpi, pc->current_path, &path_cost, &ref_cost, &path_error);
 
           if ((path_cost < ref_cost) && /* do not use paths that took longer to compute than reference */
@@ -327,9 +336,40 @@ babl_fish_path_process (Babl       *babl,
                         void       *destination,
                         long        n)
 {
+   const Babl *babl_source = babl->fish.source;
+   const Babl *babl_dest = babl->fish.destination;
+   int source_bpp;
+   int dest_bpp;
+
+   switch (babl_source->instance.class_type)
+     {
+       case BABL_FORMAT:
+         source_bpp = babl_source->format.bytes_per_pixel;
+         break;
+       case BABL_TYPE:
+         source_bpp = babl_source->type.bits / 8;
+         break;
+       default:
+         babl_log ("=eeek{%i}\n", babl_source->instance.class_type - BABL_MAGIC);
+     }
+
+   switch (babl_dest->instance.class_type)
+     {
+       case BABL_FORMAT:
+         dest_bpp = babl_dest->format.bytes_per_pixel;
+         break;
+       case BABL_TYPE:
+         dest_bpp = babl_dest->type.bits / 8;
+         break;
+       default:
+         babl_log ("-eeek{%i}\n", babl_dest->instance.class_type - BABL_MAGIC);
+     }
+
   return process_conversion_path (babl->fish_path.conversion_list,
                                   source,
+                                  source_bpp,
                                   destination,
+                                  dest_bpp,
                                   n);
 
 }
@@ -382,7 +422,7 @@ babl_fish_process (Babl       *babl,
 }
 
 /* This size buffers needs to be possible to allocate on the stack..*/
-#define MAX_BUFFER_SIZE   14000
+#define MAX_BUFFER_SIZE   1024
 
 static long
 babl_process_chunks (const Babl *cbabl,
@@ -494,7 +534,9 @@ static void inline *align_16 (unsigned char *ret)
 static long
 process_conversion_path (BablList   *path,
                          const void *source_buffer,
+                         int         source_bpp,
                          void       *destination_buffer,
+                         int         dest_bpp,
                          long        n)
 {
   int conversions = babl_list_size (path);
@@ -508,42 +550,59 @@ process_conversion_path (BablList   *path,
     }
   else
     {
-      void *aux1_buffer = align_16 (alloca (n * sizeof (double) * 5 + 16));
-      void *aux2_buffer = NULL;
-      void *swap_buffer = NULL;
-      int   i;
+      long j;
+      int source_bpp = 0;
+      int dest_bpp = 0;
+
+      void *temp_buffer = align_16 (alloca (MIN(n, MAX_BUFFER_SIZE) * sizeof (double) * 5 + 16));
+      void *temp_buffer2 = NULL;
 
       if (conversions > 2)
         {
           /* We'll need one more auxiliary buffer */
-          aux2_buffer = align_16 (alloca ((n * sizeof (double) * 5 + 16)));
+          temp_buffer2 = align_16 (alloca (MIN(n, MAX_BUFFER_SIZE) * sizeof (double) * 5 + 16));
         }
 
-      /* The first conversion goes from source_buffer to aux1_buffer */
-      babl_conversion_process (babl_list_get_first (path),
-                               source_buffer,
-                               aux1_buffer,
-                               n);
 
-      /* Process, if any, conversions between the first and the last
-       * conversion in the path, in a loop */
-      for (i = 1; i < conversions - 1; i++)
+
+
+      for (j = 0; j < n; j+= MAX_BUFFER_SIZE)
         {
-          babl_conversion_process (path->items[i],
+          long c = MIN (n - 1, MAX_BUFFER_SIZE);
+          int i;
+
+          /* this is where the loop unrolling should happen */
+          void *aux1_buffer = temp_buffer;
+          void *aux2_buffer = NULL;
+          void *swap_buffer = NULL;
+          aux2_buffer = temp_buffer2;
+
+          /* The first conversion goes from source_buffer to aux1_buffer */
+          babl_conversion_process (babl_list_get_first (path),
+                                   (void*)(((unsigned char*)source_buffer) + (j * source_bpp)),
                                    aux1_buffer,
-                                   aux2_buffer,
-                                   n);
-          /* Swap the auxiliary buffers */
-          swap_buffer = aux1_buffer;
-          aux1_buffer = aux2_buffer;
-          aux2_buffer = swap_buffer;
-        }
+                                   c);
 
-      /* The last conversion goes from aux1_buffer to destination_buffer */
-      babl_conversion_process (babl_list_get_last (path),
-                               aux1_buffer,
-                               destination_buffer,
-                               n);
+          /* Process, if any, conversions between the first and the last
+           * conversion in the path, in a loop */
+          for (i = 1; i < conversions - 1; i++)
+            {
+              babl_conversion_process (path->items[i],
+                                       aux1_buffer,
+                                       aux2_buffer,
+                                       c);
+              /* Swap the auxiliary buffers */
+              swap_buffer = aux1_buffer;
+              aux1_buffer = aux2_buffer;
+              aux2_buffer = swap_buffer;
+            }
+
+          /* The last conversion goes from aux1_buffer to destination_buffer */
+          babl_conversion_process (babl_list_get_last (path),
+                                   aux1_buffer,
+                                   (void*)((unsigned char*)destination_buffer + (j * dest_bpp)),
+                                   c);
+        }
   }
 
   return n;
@@ -664,20 +723,48 @@ get_path_instrumentation (FishPathInstrumentation *fpi,
   long   ticks_start = 0;
   long   ticks_end   = 0;
 
+  Babl *babl_source = fpi->source;
+  Babl *babl_destination = fpi->destination;
+
+  int source_bpp;
+  int dest_bpp;
+
+  switch (babl_source->instance.class_type)
+    {
+      case BABL_FORMAT:
+        source_bpp = babl_source->format.bytes_per_pixel;
+        break;
+      case BABL_TYPE:
+        source_bpp = babl_source->type.bits / 8;
+        break;
+      default:
+        babl_log ("=eeek{%i}\n", babl_source->instance.class_type - BABL_MAGIC);
+    }
+
+  switch (babl_destination->instance.class_type)
+    {
+      case BABL_FORMAT:
+        dest_bpp = babl_destination->format.bytes_per_pixel;
+        break;
+      case BABL_TYPE:
+        dest_bpp = babl_destination->type.bits / 8;
+        break;
+      default:
+        babl_log ("-eeek{%i}\n", babl_destination->instance.class_type - BABL_MAGIC);
+     }
+
   if (!fpi->init_instrumentation_done)
     {
       /* this initialization can be done only once since the
        * source and destination formats do not change during
        * the search */
-      Babl *fmt_source = (Babl *) BABL (babl_list_get_first (path))->conversion.source;
-      Babl *fmt_destination = (Babl *) BABL (babl_list_get_last (path))->conversion.destination;
-      init_path_instrumentation (fpi, fmt_source, fmt_destination);
+      init_path_instrumentation (fpi, babl_source, babl_destination);
       fpi->init_instrumentation_done = 1;
     }
 
   /* calculate this path's view of what the result should be */
   ticks_start = babl_ticks ();
-  process_conversion_path (path, fpi->source, fpi->destination, NUM_TEST_PIXELS);
+  process_conversion_path (path, fpi->source, source_bpp, fpi->destination, dest_bpp, NUM_TEST_PIXELS);
   ticks_end = babl_ticks ();
   *path_cost = babl_process_cost (ticks_start, ticks_end);
 
diff --git a/babl/babl-list.h b/babl/babl-list.h
index 60cdf6e..69af9fc 100644
--- a/babl/babl-list.h
+++ b/babl/babl-list.h
@@ -46,9 +46,10 @@ babl_list_insert_last (BablList *list,
 void
 babl_list_remove_last (BablList *list);
 
-#define babl_list_get_first(list) (list->items[0])
-#define babl_list_get_last(list)  (list->items[list->count-1])
+#define babl_list_get_n(list,n)   (list->items[(n)])
+#define babl_list_get_first(list) (babl_list_get_n(list,0))
 #define babl_list_size(list)      (list->count)
+#define babl_list_get_last(list)  (babl_list_get_n(list, babl_list_size(list)-1))
 
 void
 babl_list_copy (BablList *from,



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]