[babl] babl: measure performance of LUTs and only replace if faster



commit 1773494c339e02b13ce800bcc2dcd7214181e000
Author: Øyvind Kolås <pippin gimp org>
Date:   Wed Jan 26 03:15:14 2022 +0100

    babl: measure performance of LUTs and only replace if faster
    
    With this also enabling replacement of conversions where spaces do not
    differ.

 babl/babl-fish-path.c | 1640 ++++++++++++++++++++++++++-----------------------
 1 file changed, 859 insertions(+), 781 deletions(-)
---
diff --git a/babl/babl-fish-path.c b/babl/babl-fish-path.c
index e49dcfd29..f254452cd 100644
--- a/babl/babl-fish-path.c
+++ b/babl/babl-fish-path.c
@@ -32,576 +32,909 @@
 #define MIN(a, b) (((a) > (b)) ? (b) : (a))
 #endif
 
-#define MAX_BUFFER_SIZE            512
-#define ITERATIONS                 4
 
-int   babl_in_fish_path = 0;
+typedef struct GcContext {
+   long time;
+} GcContext;
 
-typedef struct _FishPathInstrumentation
+static int gc_fishes (Babl *babl, void *userdata)
 {
-  const Babl   *fmt_rgba_double;
-  int     num_test_pixels;
-  void   *source;
-  void   *destination;
-  void   *ref_destination;
-  double *destination_rgba_double;
-  double *ref_destination_rgba_double;
-  const Babl   *fish_rgba_to_source;
-  const Babl   *fish_reference;
-  const Babl   *fish_destination_to_rgba;
-  double  reference_cost;
-  int     init_instrumentation_done;
-} FishPathInstrumentation;
-
-typedef struct PathContext {
-  Babl     *fish_path;
-  Babl     *to_format;
-  BablList *current_path;
-} PathContext;
-
-static void
-init_path_instrumentation (FishPathInstrumentation *fpi,
-                           Babl                    *fmt_source,
-                           Babl                    *fmt_destination);
-
-static void
-destroy_path_instrumentation (FishPathInstrumentation *fpi);
-
-static void
-get_path_instrumentation (FishPathInstrumentation *fpi,
-                          BablList                *path,
-                          double                  *path_cost,
-                          double                  *ref_cost,
-                          double                  *path_error);
-
-
-static inline void
-process_conversion_path (BablList   *path,
-                         const void *source_buffer,
-                         int         source_bpp,
-                         void       *destination_buffer,
-                         int         dest_bpp,
-                         long        n);
-
+  GcContext *context = userdata;
+  if (babl->class_type == BABL_FISH_PATH)
+  {
+    if (babl->fish_path.u8_lut)
+    {
+      if (context->time - babl->fish_path.last_lut_use >
+          1000 * 1000 * 60 * 5)
+      {
+        void *lut =babl->fish_path.u8_lut;
+        BABL(babl)->fish_path.u8_lut = NULL;
+        free (lut);
+#if 0
+        fprintf (stderr, "freeing LUT %s to %s unused for >5 minutes\n",
+                        babl_get_name (babl->conversion.source),
+                        babl_get_name (babl->conversion.destination));
+#endif
+      }
+    }
+  }
+  return 0;
+}
+                           
 static void
-get_conversion_path (PathContext *pc,
-                     Babl        *current_format,
-                     int          current_length,
-                     int          max_length,
-                     double       legal_error);
-
-char *
-_babl_fish_create_name (char       *buf,
-                        const Babl *source,
-                        const Babl *destination,
-                        int         is_reference);
-
-
-static int max_path_length (void);
-
-static int debug_conversions = 0;
-int _babl_instrument = 0;
-
-double
-_babl_legal_error (void)
+babl_gc_fishes (void)
 {
-  static double error = 0.0;
-  const char   *env;
-
-  if (error != 0.0)
-    return error;
+  GcContext context;
+  context.time = babl_ticks ();
+  babl_fish_class_for_each (gc_fishes, &context);
+  //malloc_trim (0); 
+  //  is responsibility of higher layers
+}
 
-  env = getenv ("BABL_TOLERANCE");
-  if (env && env[0] != '\0')
-    error = babl_parse_double (env);
-  else
-    error = BABL_TOLERANCE;
+#define BABL_LIKELY(x)      __builtin_expect(!!(x), 1)
+#define BABL_UNLIKELY(x)    __builtin_expect(!!(x), 0)
 
-  env = getenv ("BABL_DEBUG_CONVERSIONS");
-  if (env && env[0] != '\0')
-    debug_conversions = 1;
-  else
-    debug_conversions = 0;
+static long timings[256] = {0,};
 
-  env = getenv ("BABL_INSTRUMENT");
-  if (env && env[0] != '\0')
-    _babl_instrument = 1;
-  else
-    _babl_instrument = 0;
+static inline void _do_lut (uint32_t *lut,
+                           int   source_bpp,
+                           int   dest_bpp,
+                           void *__restrict__ source,
+                           void *__restrict__ destination,
+                           long n)
+{
+        if (source_bpp == 4 && dest_bpp == 16)
+        {
+          uint32_t *src = (uint32_t*)source;
+          uint32_t *dst = (uint32_t*)destination;
+          while (n--)
+          {
+             uint32_t col = *src++;
+             uint32_t lut_offset = col & 0xffffff;
+             float alpha = (col>>24)/255.0;
 
-  return error;
+             *dst++ = lut[lut_offset*4+0];
+             *dst++ = lut[lut_offset*4+1];
+             *dst++ = lut[lut_offset*4+2];
+             ((float*)(dst))[0] = alpha;
+             dst++;
+          }
+          return 1;
+        }
+        else if (source_bpp == 4 && dest_bpp == 4)
+        {
+          uint32_t *src = (uint32_t*)source;
+          uint32_t *dst = (uint32_t*)destination;
+          while (n--)
+          {
+             uint32_t col = *src++;
+             *dst = col & 0xff000000;
+             *dst |= lut[col & 0xffffff];
+             dst++;
+          }
+          return 1;
+        }
+        else if (source_bpp == 2 && dest_bpp == 4)
+        {
+          uint16_t *src = (uint16_t*)source;
+          uint32_t *dst = (uint32_t*)destination;
+          while (n--)
+          {
+             *dst = lut[*src++];
+             dst++;
+          }
+          return 1;
+        }
+        else if (source_bpp == 1 && dest_bpp == 4)
+        {
+          uint8_t *src = (uint8_t*)source;
+          uint32_t *dst = (uint32_t*)destination;
+          while (n--)
+          {
+             *dst = lut[*src++];
+             dst++;
+          }
+          return 1;
+        }
+        else if (source_bpp == 3 && dest_bpp == 3)
+        {
+          uint8_t *src = (uint8_t*)source;
+          uint8_t *dst = (uint8_t*)destination;
+          while (n--)
+          {
+             uint32_t col = src[0]*256*256+src[1]*256+src[2];
+             uint32_t val = lut[col];
+             dst[2]=(val >> 16) & 0xff;
+             dst[1]=(val >> 8) & 0xff;
+             dst[0]=val & 0xff;
+             dst+=3;
+             src+=3;
+          }
+          return 1;
+        }
+        else if (source_bpp == 3 && dest_bpp == 4)
+        {
+          uint8_t *src = (uint8_t*)source;
+          uint32_t *dst = (uint32_t*)destination;
+          while (n--)
+          {
+             uint32_t col = src[0]*256*256+src[1]*256+src[2];
+             *dst = lut[col];
+             dst++;
+             src+=3;
+          }
+          return 1;
+        }
 }
 
-static int
-max_path_length (void)
+void do_lut (uint32_t *lut,
+                           int   source_bpp,
+                           int   dest_bpp,
+                           void *__restrict__ source,
+                           void *__restrict__ dest,
+                           long count)
 {
-  static int  max_length = 0;
-  const char *env;
-
-  if (max_length != 0)
-    return max_length;
-
-  env = getenv ("BABL_PATH_LENGTH");
-  if (env)
-    max_length = atoi (env);
-  else
-    max_length = 3; /* reducing this number makes finding short fishes much
-                       faster - even if we lose out on some of the fast
-                       bigger fish, the fishes we can get with a max_length of 3
-                       is actually 5, since we deepen the search to that
-                       depth if none are found within two steps in the
-                       initial search.
-                     */
-  if (max_length > BABL_HARD_MAX_PATH_LENGTH)
-    max_length = BABL_HARD_MAX_PATH_LENGTH;
-  else if (max_length <= 0)
-    max_length = 1;
-  return max_length;
+   _do_lut (lut, source_bpp, dest_bpp, source, dest, count);
 }
 
-int
-_babl_max_path_len (void)
+static inline long lut_timing_for (int source_bpp, int dest_bpp)
 {
-  return max_path_length ();
+  return timings[source_bpp * 16 + dest_bpp];
 }
 
-static int
-bad_idea (const Babl *from, const Babl *to, const Babl *format)
+static void measure_timings(void)
 {
-  if (babl_format_has_alpha (from) &&
-      babl_format_has_alpha (to) &&
-      !babl_format_has_alpha (format))
-  {
-    return 1;
-  }
-  if (from->format.components > format->format.components &&
-      to->format.components > format->format.components)
-  {
-    return 1;
-  }
-  if (from->format.type[0]->bits > format->format.type[0]->bits &&
-      to->format.type[0]->bits > format->format.type[0]->bits)
-  {
-    /* XXX: perhaps we especially avoid going to half-float, when
-     * going between u16 formats as well? */
-    return 1;
-  }
-
-  return 0;
+   int num_pixels = babl_get_num_path_test_pixels () * 1000;
+   int pairs[][2]={{4,4},{3,4},{3,3},{2,4},{1,4},{4,16}};
+   uint32_t *lut = malloc (256 * 256 * 256 * 16);
+   uint32_t *src = malloc (num_pixels * 16);
+   uint32_t *dst = malloc (num_pixels * 16);
+   fprintf (stderr, "measuring lut timings          \n");
+   for (int p = 0; p < sizeof (pairs)/sizeof(pairs[0]);p++)
+   {
+     int source_bpp = pairs[p][0];
+     int dest_bpp = pairs[p][1];
+     long start,end;
+     start = babl_ticks ();
+     _do_lut (lut, source_bpp, dest_bpp, src, dst, num_pixels);
+
+     end = babl_ticks ();
+
+     timings[source_bpp * 16 + dest_bpp] = (end-start)/1000;
+     fprintf (stderr, "%i %i: %i\n", source_bpp, dest_bpp,
+         timings[source_bpp * 16 + dest_bpp]
+                     );
+   }
+   free (lut);
+   free (src);
+   free (dst);
 }
 
+static inline void
+process_conversion_path (BablList   *path,
+                         const void *source_buffer,
+                         int         source_bpp,
+                         void       *destination_buffer,
+                         int         dest_bpp,
+                         long        n);
 
-/* The task of BablFishPath construction is to compute
- * the shortest path in a graph where formats are the vertices
- * and conversions are the edges. However, there is an additional
- * constraint to the shortest path, that limits conversion error
- * introduced by such a path to be less than BABL_TOLERANCE. This
- * prohibits usage of any reasonable shortest path construction
- * algorithm such as Dijkstra's algorithm. The shortest path is
- * constructed by enumerating all available paths that are less
- * than BABL_PATH_LENGTH long, computing their costs and
- * conversion errors and backtracking. The backtracking is
- * implemented by recursive function get_conversion_path ().
- */
-
-static void
-get_conversion_path (PathContext *pc,
-                     Babl        *current_format,
-                     int          current_length,
-                     int          max_length,
-                     double       legal_error)
+static int babl_fish_lut_process_maybe (const Babl *babl,
+                                        const char *source,
+                                        const char *destination,
+                                        long        n,
+                                        void       *data)
 {
-  if (current_length > max_length)
-    {
-      /* We have reached the maximum recursion
-       * depth, let's bail out */
-      return;
-    }
-  else if ((current_length > 0) && (current_format == pc->to_format))
-    {
-       /* We have found a candidate path, let's
-        * see about it's properties */
-      double path_cost  = 0.0;
-      double ref_cost   = 0.0;
-      double path_error = 1.0;
-#if 1
-      int    i;
-      for (i = 0; i < babl_list_size (pc->current_path); i++)
-        {
-          path_error *= (1.0 + babl_conversion_error ((BablConversion *) pc->current_path->items[i]));
-        }
-
-      if (path_error - 1.0 <= legal_error )
-                /* check this before the more accurate measurement of error -
-                   to bail earlier, this also leads to a stricter
-                   discarding of bad fast paths  */
-#endif
-        {
-          FishPathInstrumentation fpi;
-          memset (&fpi, 0, sizeof (fpi));
+     int source_bpp = babl->fish_path.source_bpp;
+     int dest_bpp = babl->fish_path.dest_bpp;
+     uint32_t *lut = (uint32_t*)babl->fish_path.u8_lut;
+     BABL(babl)->fish.pixels += n;
 
-          fpi.source = (Babl*) babl_list_get_first (pc->current_path)->conversion.source;
-          fpi.destination = pc->to_format;
 
-          get_path_instrumentation (&fpi, pc->current_path, &path_cost, &ref_cost, &path_error);
-          if(debug_conversions && current_length == 1)
-            fprintf (stderr, "%s  error:%f cost:%f  \n",
-                 babl_get_name (pc->current_path->items[0]), path_error, path_cost);
+     if (BABL_UNLIKELY(!lut && babl->fish.pixels >= 128 * 256))
+     {
+#if 0
+       fprintf (stderr, "building LUT for %s to %s\n",
+                        babl_get_name (babl->conversion.source),
+                        babl_get_name (babl->conversion.destination));
+#endif
+       if (source_bpp ==4 && dest_bpp == 4)
+       {
+         lut = malloc (256 * 256 * 256 * 4);
+         for (int o = 0; o < 256 * 256 * 256; o++)
+           lut[o] = o;
+         process_conversion_path (babl->fish_path.conversion_list,
+                                  lut, 4,
+                                  lut, 4,
+                                  256*256*256);
+         for (int o = 0; o < 256 * 256 * 256; o++)
+           lut[o] = lut[o] & 0x00ffffff;
+       }
+       else if (source_bpp == 4 && dest_bpp == 16)
+       {
+         uint32_t *temp_lut = malloc (256 * 256 * 256 * 4);
+         lut = malloc (256 * 256 * 256 * 16);
+         for (int o = 0; o < 256 * 256 * 256; o++)
+           temp_lut[o] = o;
+         process_conversion_path (babl->fish_path.conversion_list,
+                                  temp_lut, 4,
+                                  lut, 16,
+                                  256*256*256);
+         free (temp_lut);
+       }
+       else if (source_bpp == 3 && dest_bpp == 3)
+       {
+         lut = malloc (256 * 256 * 256 * 4);
+         uint8_t *temp_lut = malloc (256 * 256 * 256 * 3);
+         uint8_t *temp_lut2 = malloc (256 * 256 * 256 * 3);
+         int o = 0;
+         for (int r = 0; r < 256; r++)
+         for (int g = 0; g < 256; g++)
+         for (int b = 0; b < 256; b++, o++)
+         {
+           temp_lut[o*3+0]=r;
+           temp_lut[o*3+1]=g;
+           temp_lut[o*3+2]=b;
+         }
+         process_conversion_path (babl->fish_path.conversion_list,
+                                  temp_lut, 3,
+                                  temp_lut2, 3,
+                                  256*256*256);
+         babl_process (babl_fish (babl_format ("R'G'B' u8"), babl_format ("R'G'B'A u8")),
+                       temp_lut2, lut, 256*256*256);
+         for (int o = 0; o < 256 * 256 * 256; o++)
+           lut[o] = lut[o] & 0x00ffffff;
+         free (temp_lut);
+         free (temp_lut2);
+       }
+       else if (source_bpp == 3 && dest_bpp == 4)
+       {
+         lut = malloc (256 * 256 * 256 * 4);
+         uint8_t *temp_lut = malloc (256 * 256 * 256 * 3);
+         int o = 0;
+         for (int r = 0; r < 256; r++)
+         for (int g = 0; g < 256; g++)
+         for (int b = 0; b < 256; b++, o++)
+         {
+           temp_lut[o*3+0]=r;
+           temp_lut[o*3+1]=g;
+           temp_lut[o*3+2]=b;
+         }
+         process_conversion_path (babl->fish_path.conversion_list,
+                                  temp_lut, 3,
+                                  lut, 4,
+                                  256*256*256);
+         for (int o = 0; o < 256 * 256 * 256; o++)
+           lut[o] = lut[o] & 0x00ffffff;
+         free (temp_lut);
+       }
+       else if (source_bpp == 2 && dest_bpp == 4)
+       {
+         lut = malloc (256 * 256 * 4);
+         uint16_t *temp_lut = malloc (256 * 256 * 2);
+         for (int o = 0; o < 256*256; o++)
+         {
+           temp_lut[o]=o;
+         }
+         process_conversion_path (babl->fish_path.conversion_list,
+                                  temp_lut, 2,
+                                  lut, 4,
+                                  256*256);
+         for (int o = 0; o < 256 * 256; o++)
+           lut[o] = lut[o] & 0x00ffffff;
+         free (temp_lut);
+       }
+       else if (source_bpp == 1 && dest_bpp == 4)
+       {
+         lut = malloc (256 * 4);
+         uint8_t *temp_lut = malloc (256);
+         for (int o = 0; o < 256; o++)
+         {
+           temp_lut[o]=o;
+         }
+         process_conversion_path (babl->fish_path.conversion_list,
+                                  temp_lut, 1,
+                                  lut, 4,
+                                  256);
+         for (int o = 0; o < 256; o++)
+           lut[o] = lut[o] & 0x00ffffff;
+         free (temp_lut);
+       }
 
-          if ((path_cost < ref_cost) && /* do not use paths that took longer to compute than reference */
-              (path_cost < pc->fish_path->fish_path.cost) && // best thus far
-              (path_error <= legal_error )               // within tolerance
-              )
-            {
-              /* We have found the best path so far,
-               * let's copy it into our new fish */
-              pc->fish_path->fish_path.cost = path_cost;
-              pc->fish_path->fish.error  = path_error;
-              babl_list_copy (pc->current_path,
-                              pc->fish_path->fish_path.conversion_list);
-            }
+       if (babl->fish_path.u8_lut == NULL)
+       {
+         (BABL(babl)->fish_path.u8_lut) = lut;
+         // XXX need memory barrier?
+         if ((BABL(babl)->fish_path.u8_lut) != lut)
+         {
+           free (lut);
+           lut = babl->fish_path.u8_lut;
+         }
+       }
+       else
+       {
+         free (lut);
+         lut = babl->fish_path.u8_lut;
+       }
+     }
+     if (lut)
+     {
+        do_lut (lut, source_bpp, dest_bpp, source, destination, n);
+        BABL(babl)->fish_path.last_lut_use = babl_ticks ();
+     }
+     return 0;
+}
 
-          destroy_path_instrumentation (&fpi);
-        }
-    }
-  else
-    {
-      /*
-       * we have to search deeper...
-       */
-      BablList *list;
-      int i;
 
-      list = current_format->format.from_list;
-      if (list)
-        {
-          /* Mark the current format in conversion path as visited */
-          current_format->format.visited = 1;
 
-          /* Iterate through unvisited formats from the current format ...*/
-          for (i = 0; i < babl_list_size (list); i++)
-            {
-              Babl *next_conversion = BABL (list->items[i]);
-              Babl *next_format = BABL (next_conversion->conversion.destination);
-              if (!next_format->format.visited && !bad_idea (current_format, pc->to_format, next_format))
-                {
-                  /* next_format is not in the current path, we can pay a visit */
-                  babl_list_insert_last (pc->current_path, next_conversion);
-                  get_conversion_path (pc, next_format, current_length + 1, max_length, legal_error);
-                  babl_list_remove_last (pc->current_path);
-                }
-            }
+#define MAX_BUFFER_SIZE            512
 
-          /* Remove the current format from current path */
-          current_format->format.visited = 0;
-        }
-   }
-}
+int   babl_in_fish_path = 0;
 
-char *
-_babl_fish_create_name (char       *buf,
-                        const Babl *source,
-                        const Babl *destination,
-                        int         is_reference)
+typedef struct _FishPathInstrumentation
 {
-  /* fish names are intentionally kept short */
-  snprintf (buf, BABL_MAX_NAME_LEN, "%s %p %p %i", "",
-            source, destination, is_reference);
-  return buf;
-}
-
-int
-_babl_fish_path_destroy (void *data);
+  const Babl   *fmt_rgba_double;
+  int     num_test_pixels;
+  void   *source;
+  void   *destination;
+  void   *ref_destination;
+  double *destination_rgba_double;
+  double *ref_destination_rgba_double;
+  const Babl   *fish_rgba_to_source;
+  const Babl   *fish_reference;
+  const Babl   *fish_destination_to_rgba;
+  double  reference_cost;
+  int     init_instrumentation_done;
+} FishPathInstrumentation;
 
-int
-_babl_fish_path_destroy (void *data)
+typedef struct PathContext {
+  Babl     *fish_path;
+  Babl     *to_format;
+  BablList *current_path;
+} PathContext;
+
+static void
+init_path_instrumentation (FishPathInstrumentation *fpi,
+                           Babl                    *fmt_source,
+                           Babl                    *fmt_destination);
+
+static void
+destroy_path_instrumentation (FishPathInstrumentation *fpi);
+
+static void
+get_path_instrumentation (FishPathInstrumentation *fpi,
+                          BablList                *path,
+                          double                  *path_cost,
+                          double                  *ref_cost,
+                          double                  *path_error);
+
+
+static inline void
+process_conversion_path (BablList   *path,
+                         const void *source_buffer,
+                         int         source_bpp,
+                         void       *destination_buffer,
+                         int         dest_bpp,
+                         long        n);
+
+static void
+get_conversion_path (PathContext *pc,
+                     Babl        *current_format,
+                     int          current_length,
+                     int          max_length,
+                     double       legal_error);
+
+char *
+_babl_fish_create_name (char       *buf,
+                        const Babl *source,
+                        const Babl *destination,
+                        int         is_reference);
+
+
+static int max_path_length (void);
+
+static int debug_conversions = 0;
+int _babl_instrument = 0;
+
+double
+_babl_legal_error (void)
 {
-  Babl *babl=data;
-  if (babl->fish_path.u8_lut)
-    free (babl->fish_path.u8_lut);
-  babl->fish_path.u8_lut = NULL;
-  if (babl->fish_path.conversion_list)
-    babl_free (babl->fish_path.conversion_list);
-  babl->fish_path.conversion_list = NULL;
-  return 0;
+  static double error = 0.0;
+  const char   *env;
+
+  if (error != 0.0)
+    return error;
+
+  env = getenv ("BABL_TOLERANCE");
+  if (env && env[0] != '\0')
+    error = babl_parse_double (env);
+  else
+    error = BABL_TOLERANCE;
+
+  env = getenv ("BABL_DEBUG_CONVERSIONS");
+  if (env && env[0] != '\0')
+    debug_conversions = 1;
+  else
+    debug_conversions = 0;
+
+  env = getenv ("BABL_INSTRUMENT");
+  if (env && env[0] != '\0')
+    _babl_instrument = 1;
+  else
+    _babl_instrument = 0;
+
+  return error;
 }
 
 static int
-show_item (Babl *babl,
-           void *user_data)
+max_path_length (void)
 {
-  BablConversion *conv = (void *)babl;
+  static int  max_length = 0;
+  const char *env;
 
-  if (conv->destination->class_type == BABL_FORMAT)
-  {
-    fprintf (stderr, "%s : %.12f\n", babl_get_name (babl), babl_conversion_error(conv));
-  }
+  if (max_length != 0)
+    return max_length;
 
-  return 0;
+  env = getenv ("BABL_PATH_LENGTH");
+  if (env)
+    max_length = atoi (env);
+  else
+    max_length = 3; /* reducing this number makes finding short fishes much
+                       faster - even if we lose out on some of the fast
+                       bigger fish, the fishes we can get with a max_length of 3
+                       is actually 5, since we deepen the search to that
+                       depth if none are found within two steps in the
+                       initial search.
+                     */
+  if (max_length > BABL_HARD_MAX_PATH_LENGTH)
+    max_length = BABL_HARD_MAX_PATH_LENGTH;
+  else if (max_length <= 0)
+    max_length = 1;
+  return max_length;
 }
 
-static int
-alias_conversion (Babl *babl,
-                  void *user_data)
+int
+_babl_max_path_len (void)
 {
-  const Babl *sRGB = babl_space ("sRGB");
-  BablConversion *conv = (void *)babl;
-  BablSpace *space = user_data;
+  return max_path_length ();
+}
 
-  if ((conv->source->class_type == BABL_FORMAT) &&
-      (conv->destination->class_type == BABL_FORMAT) &&
-      (!babl_format_is_palette (conv->source)) &&
-      (!babl_format_is_palette (conv->destination)))
+static int
+bad_idea (const Babl *from, const Babl *to, const Babl *format)
+{
+  if (babl_format_has_alpha (from) &&
+      babl_format_has_alpha (to) &&
+      !babl_format_has_alpha (format))
   {
-    if ((conv->source->format.space == sRGB) &&
-        (conv->destination->format.space == sRGB))
-    {
-      switch (conv->instance.class_type)
-      {
-        case BABL_CONVERSION_LINEAR:
-         babl_conversion_new (
-                babl_format_with_space (
-                      (void*)conv->source->instance.name, (void*)space),
-                babl_format_with_space (
-                      (void*)conv->destination->instance.name, (void*)space),
-                "linear", conv->function.linear,
-                "data", conv->data,
-                NULL);
-          break;
-        case BABL_CONVERSION_PLANAR:
-         babl_conversion_new (
-                babl_format_with_space (
-                      (void*)conv->source->instance.name, (void*)space),
-                babl_format_with_space (
-                      (void*)conv->destination->instance.name, (void*)space),
-                "planar", conv->function.planar,
-                "data", conv->data,
-                NULL);
-          break;
-        case BABL_CONVERSION_PLANE:
-          babl_conversion_new (
-                babl_format_with_space (
-                      (void*)conv->source->instance.name, (void*)space),
-                babl_format_with_space (
-                      (void*)conv->destination->instance.name, (void*)space),
-                "plane", conv->function.plane,
-                "data", conv->data,
-                NULL);
-          break;
-        default:
-          break;
-      }
-    }
+    return 1;
   }
-  else
-  if ((conv->source->class_type == BABL_MODEL) &&
-      (conv->destination->class_type == BABL_MODEL))
+  if (from->format.components > format->format.components &&
+      to->format.components > format->format.components)
   {
-    if ((conv->source->model.space == sRGB) &&
-        (conv->destination->model.space == sRGB))
-    {
-      switch (conv->instance.class_type)
-      {
-        case BABL_CONVERSION_LINEAR:
-          babl_conversion_new (
-                babl_remodel_with_space (
-                      (void*)conv->source, (void*)space),
-                babl_remodel_with_space (
-                      (void*)conv->destination, (void*)space),
-                "linear", conv->function.linear,
-                "data",   conv->data,
-                NULL);
-          break;
-        case BABL_CONVERSION_PLANAR:
-          babl_conversion_new (
-                babl_remodel_with_space (
-                      (void*)conv->source, (void*)space),
-                babl_remodel_with_space (
-                      (void*)conv->destination, (void*)space),
-                "planar", conv->function.planar,
-                "data",   conv->data,
-                NULL);
-          break;
-        case BABL_CONVERSION_PLANE:
-          babl_conversion_new (
-                babl_remodel_with_space (
-                      (void*)conv->source, (void*)space),
-                babl_remodel_with_space (
-                      (void*)conv->destination, (void*)space),
-                "plane", conv->function.plane,
-                "data",  conv->data,
-                NULL);
-          break;
-        default:
-          break;
-      }
-    }
+    return 1;
   }
-  else
-  if ((conv->source->class_type == BABL_TYPE) &&
-      (conv->destination->class_type == BABL_TYPE))
+  if (from->format.type[0]->bits > format->format.type[0]->bits &&
+      to->format.type[0]->bits > format->format.type[0]->bits)
   {
+    /* XXX: perhaps we especially avoid going to half-float, when
+     * going between u16 formats as well? */
+    return 1;
   }
+
   return 0;
 }
 
-void
-_babl_fish_prepare_bpp (Babl *babl)
-{
-   const Babl *babl_source = babl->fish.source;
-   const Babl *babl_dest = babl->fish.destination;
 
-   switch (babl_source->instance.class_type)
-     {
-       case BABL_FORMAT:
-         babl->fish_path.source_bpp = babl_source->format.bytes_per_pixel;
-         break;
-       case BABL_TYPE:
-         babl->fish_path.source_bpp = babl_source->type.bits / 8;
-         break;
-       default:
-         babl_log ("=eeek{%i}\n",
-                         babl_source->instance.class_type - BABL_MAGIC);
-     }
-
-   switch (babl_dest->instance.class_type)
-     {
-       case BABL_FORMAT:
-         babl->fish_path.dest_bpp = babl_dest->format.bytes_per_pixel;
-         break;
-       case BABL_TYPE:
-         babl->fish_path.dest_bpp = babl_dest->type.bits / 8;
-         break;
-       default:
-         babl_log ("-eeek{%i}\n", babl_dest->instance.class_type - BABL_MAGIC);
-     }
-}
+/* The task of BablFishPath construction is to compute
+ * the shortest path in a graph where formats are the vertices
+ * and conversions are the edges. However, there is an additional
+ * constraint to the shortest path, that limits conversion error
+ * introduced by such a path to be less than BABL_TOLERANCE. This
+ * prohibits usage of any reasonable shortest path construction
+ * algorithm such as Dijkstra's algorithm. The shortest path is
+ * constructed by enumerating all available paths that are less
+ * than BABL_PATH_LENGTH long, computing their costs and
+ * conversion errors and backtracking. The backtracking is
+ * implemented by recursive function get_conversion_path ().
+ */
 
-void
-_babl_fish_missing_fast_path_warning (const Babl *source,
-                                      const Babl *destination)
+static void
+get_conversion_path (PathContext *pc,
+                     Babl        *current_format,
+                     int          current_length,
+                     int          max_length,
+                     double       legal_error)
 {
-#ifndef BABL_UNSTABLE
-  if (debug_conversions)
-#endif
-  {
-    static int warnings = 0;
-
-    if (_babl_legal_error() <= 0.0000000001)
+  if (current_length > max_length)
+    {
+      /* We have reached the maximum recursion
+       * depth, let's bail out */
       return;
+    }
+  else if ((current_length > 0) && (current_format == pc->to_format))
+    {
+       /* We have found a candidate path, let's
+        * see about it's properties */
+      double path_cost  = 0.0;
+      double ref_cost   = 0.0;
+      double path_error = 1.0;
+#if 1
+      int    i;
+      for (i = 0; i < babl_list_size (pc->current_path); i++)
+        {
+          path_error *= (1.0 + babl_conversion_error ((BablConversion *) pc->current_path->items[i]));
+        }
 
-    if (warnings++ == 0)
-      fprintf (stderr,
-"Missing fast-path babl conversion detected, Implementing missing babl fast paths\n"
-"accelerates GEGL, GIMP and other software using babl, warnings are printed on\n"
-"first occurance of formats used where a conversion has to be synthesized\n"
-"programmatically by babl based on format description\n"
-"\n");
-
-    fprintf (stderr, "*WARNING* missing babl fast path(s): \"%s\" to \"%s\"\n",
-       babl_get_name (source),
-       babl_get_name (destination));
-
-  }
-}
+      if (path_error - 1.0 <= legal_error )
+                /* check this before the more accurate measurement of error -
+                   to bail earlier, this also leads to a stricter
+                   discarding of bad fast paths  */
+#endif
+        {
+          FishPathInstrumentation fpi;
+          memset (&fpi, 0, sizeof (fpi));
 
+          fpi.source = (Babl*) babl_list_get_first (pc->current_path)->conversion.source;
+          fpi.destination = pc->to_format;
 
-static Babl *
-babl_fish_path2 (const Babl *source,
-                 const Babl *destination,
-                 double      tolerance)
-{
-  Babl *babl = NULL;
-  const Babl *sRGB = babl_space ("sRGB");
-  char name[BABL_MAX_NAME_LEN];
-  int is_fast = 0;
-  static int debug_missing = -1;
-  if (debug_missing < 0)
-  {
-     const char *val = getenv ("BABL_DEBUG_MISSING");
-     if (val && strcmp (val, "0"))
-       debug_missing = 1;
-     else
-       debug_missing = 0;
-  }
+          get_path_instrumentation (&fpi, pc->current_path, &path_cost, &ref_cost, &path_error);
+          if(debug_conversions && current_length == 1)
+            fprintf (stderr, "%s  error:%f cost:%f  \n",
+                 babl_get_name (pc->current_path->items[0]), path_error, path_cost);
 
-  _babl_fish_create_name (name, source, destination, 1);
-  babl_mutex_lock (babl_format_mutex);
-  babl = babl_db_exist_by_name (babl_fish_db (), name);
+          if ((path_cost < ref_cost) && /* do not use paths that took longer to compute than reference */
+              (path_cost < pc->fish_path->fish_path.cost) && // best thus far
+              (path_error <= legal_error )               // within tolerance
+              )
+            {
+              /* We have found the best path so far,
+               * let's copy it into our new fish */
+              pc->fish_path->fish_path.cost = path_cost;
+              pc->fish_path->fish.error  = path_error;
+              babl_list_copy (pc->current_path,
+                              pc->fish_path->fish_path.conversion_list);
+            }
 
-  if (tolerance <= 0.0)
-  {
-    is_fast = 0;
-    tolerance = _babl_legal_error ();
-  }
+          destroy_path_instrumentation (&fpi);
+        }
+    }
   else
-    is_fast = 1;
-
-  if (!is_fast)
-  {
-  if (babl)
     {
-      /* There is an instance already registered by the required name,
-       * returning the preexistent one instead.
+      /*
+       * we have to search deeper...
        */
-      babl_mutex_unlock (babl_format_mutex);
-      return babl;
-    }
-  }
+      BablList *list;
+      int i;
 
-  if ((source->format.space != sRGB) ||
-      (destination->format.space != sRGB))
-  {
-    static const Babl *run_once[512]={NULL};
-    int i;
-    int done = 0;
+      list = current_format->format.from_list;
+      if (list)
+        {
+          /* Mark the current format in conversion path as visited */
+          current_format->format.visited = 1;
 
-    for (i = 0; run_once[i]; i++)
-    {
-      if (run_once[i] == source->format.space)
-        done |= 1;
-      else if (run_once[i] == destination->format.space)
-        done |= 2;
-    }
+          /* Iterate through unvisited formats from the current format ...*/
+          for (i = 0; i < babl_list_size (list); i++)
+            {
+              Babl *next_conversion = BABL (list->items[i]);
+              Babl *next_format = BABL (next_conversion->conversion.destination);
+              if (!next_format->format.visited && !bad_idea (current_format, pc->to_format, next_format))
+                {
+                  /* next_format is not in the current path, we can pay a visit */
+                  babl_list_insert_last (pc->current_path, next_conversion);
+                  get_conversion_path (pc, next_format, current_length + 1, max_length, legal_error);
+                  babl_list_remove_last (pc->current_path);
+                }
+            }
 
-    /* source space not in initialization array */
-    if ((done & 1) == 0 && (source->format.space != sRGB))
-    {
-      run_once[i++] = source->format.space;
-      babl_conversion_class_for_each (alias_conversion, (void*)source->format.space);
+          /* Remove the current format from current path */
+          current_format->format.visited = 0;
+        }
+   }
+}
 
-      _babl_space_add_universal_rgb (source->format.space);
-    }
+char *
+_babl_fish_create_name (char       *buf,
+                        const Babl *source,
+                        const Babl *destination,
+                        int         is_reference)
+{
+  /* fish names are intentionally kept short */
+  snprintf (buf, BABL_MAX_NAME_LEN, "%s %p %p %i", "",
+            source, destination, is_reference);
+  return buf;
+}
 
-    /* destination space not in initialization array */
-    if ((done & 2) == 0 && (destination->format.space != source->format.space) && (destination->format.space 
!= sRGB))
-    {
-      run_once[i++] = destination->format.space;
-      babl_conversion_class_for_each (alias_conversion, (void*)destination->format.space);
+int
+_babl_fish_path_destroy (void *data);
 
-      _babl_space_add_universal_rgb (destination->format.space);
-    }
+int
+_babl_fish_path_destroy (void *data)
+{
+  Babl *babl=data;
+  if (babl->fish_path.u8_lut)
+    free (babl->fish_path.u8_lut);
+  babl->fish_path.u8_lut = NULL;
+  if (babl->fish_path.conversion_list)
+    babl_free (babl->fish_path.conversion_list);
+  babl->fish_path.conversion_list = NULL;
+  return 0;
+}
 
-    if (!done && 0)
-    {
-      babl_conversion_class_for_each (show_item, (void*)source->format.space);
-    }
+static int
+show_item (Babl *babl,
+           void *user_data)
+{
+  BablConversion *conv = (void *)babl;
+
+  if (conv->destination->class_type == BABL_FORMAT)
+  {
+    fprintf (stderr, "%s : %.12f\n", babl_get_name (babl), babl_conversion_error(conv));
   }
 
-  babl = babl_calloc (1, sizeof (BablFishPath) +
-                      strlen (name) + 1);
-  babl_set_destructor (babl, _babl_fish_path_destroy);
+  return 0;
+}
 
-  babl->class_type                = BABL_FISH_PATH;
-  babl->instance.id               = babl_fish_get_id (source, destination);
-  babl->instance.name             = ((char *) babl) + sizeof (BablFishPath);
-  strcpy (babl->instance.name, name);
-  babl->fish.source               = source;
-  babl->fish.destination          = destination;
-  babl->fish.pixels               = 0;
-  babl->fish.error                = BABL_MAX_COST_VALUE;
-  babl->fish_path.cost            = BABL_MAX_COST_VALUE;
-  babl->fish_path.conversion_list = babl_list_init_with_size (BABL_HARD_MAX_PATH_LENGTH);
+static int
+alias_conversion (Babl *babl,
+                  void *user_data)
+{
+  const Babl *sRGB = babl_space ("sRGB");
+  BablConversion *conv = (void *)babl;
+  BablSpace *space = user_data;
+
+  if ((conv->source->class_type == BABL_FORMAT) &&
+      (conv->destination->class_type == BABL_FORMAT) &&
+      (!babl_format_is_palette (conv->source)) &&
+      (!babl_format_is_palette (conv->destination)))
+  {
+    if ((conv->source->format.space == sRGB) &&
+        (conv->destination->format.space == sRGB))
+    {
+      switch (conv->instance.class_type)
+      {
+        case BABL_CONVERSION_LINEAR:
+         babl_conversion_new (
+                babl_format_with_space (
+                      (void*)conv->source->instance.name, (void*)space),
+                babl_format_with_space (
+                      (void*)conv->destination->instance.name, (void*)space),
+                "linear", conv->function.linear,
+                "data", conv->data,
+                NULL);
+          break;
+        case BABL_CONVERSION_PLANAR:
+         babl_conversion_new (
+                babl_format_with_space (
+                      (void*)conv->source->instance.name, (void*)space),
+                babl_format_with_space (
+                      (void*)conv->destination->instance.name, (void*)space),
+                "planar", conv->function.planar,
+                "data", conv->data,
+                NULL);
+          break;
+        case BABL_CONVERSION_PLANE:
+          babl_conversion_new (
+                babl_format_with_space (
+                      (void*)conv->source->instance.name, (void*)space),
+                babl_format_with_space (
+                      (void*)conv->destination->instance.name, (void*)space),
+                "plane", conv->function.plane,
+                "data", conv->data,
+                NULL);
+          break;
+        default:
+          break;
+      }
+    }
+  }
+  else
+  if ((conv->source->class_type == BABL_MODEL) &&
+      (conv->destination->class_type == BABL_MODEL))
+  {
+    if ((conv->source->model.space == sRGB) &&
+        (conv->destination->model.space == sRGB))
+    {
+      switch (conv->instance.class_type)
+      {
+        case BABL_CONVERSION_LINEAR:
+          babl_conversion_new (
+                babl_remodel_with_space (
+                      (void*)conv->source, (void*)space),
+                babl_remodel_with_space (
+                      (void*)conv->destination, (void*)space),
+                "linear", conv->function.linear,
+                "data",   conv->data,
+                NULL);
+          break;
+        case BABL_CONVERSION_PLANAR:
+          babl_conversion_new (
+                babl_remodel_with_space (
+                      (void*)conv->source, (void*)space),
+                babl_remodel_with_space (
+                      (void*)conv->destination, (void*)space),
+                "planar", conv->function.planar,
+                "data",   conv->data,
+                NULL);
+          break;
+        case BABL_CONVERSION_PLANE:
+          babl_conversion_new (
+                babl_remodel_with_space (
+                      (void*)conv->source, (void*)space),
+                babl_remodel_with_space (
+                      (void*)conv->destination, (void*)space),
+                "plane", conv->function.plane,
+                "data",  conv->data,
+                NULL);
+          break;
+        default:
+          break;
+      }
+    }
+  }
+  else
+  if ((conv->source->class_type == BABL_TYPE) &&
+      (conv->destination->class_type == BABL_TYPE))
+  {
+  }
+  return 0;
+}
+
+void
+_babl_fish_prepare_bpp (Babl *babl)
+{
+   const Babl *babl_source = babl->fish.source;
+   const Babl *babl_dest = babl->fish.destination;
+
+   switch (babl_source->instance.class_type)
+     {
+       case BABL_FORMAT:
+         babl->fish_path.source_bpp = babl_source->format.bytes_per_pixel;
+         break;
+       case BABL_TYPE:
+         babl->fish_path.source_bpp = babl_source->type.bits / 8;
+         break;
+       default:
+         babl_log ("=eeek{%i}\n",
+                         babl_source->instance.class_type - BABL_MAGIC);
+     }
+
+   switch (babl_dest->instance.class_type)
+     {
+       case BABL_FORMAT:
+         babl->fish_path.dest_bpp = babl_dest->format.bytes_per_pixel;
+         break;
+       case BABL_TYPE:
+         babl->fish_path.dest_bpp = babl_dest->type.bits / 8;
+         break;
+       default:
+         babl_log ("-eeek{%i}\n", babl_dest->instance.class_type - BABL_MAGIC);
+     }
+}
+
+void
+_babl_fish_missing_fast_path_warning (const Babl *source,
+                                      const Babl *destination)
+{
+#ifndef BABL_UNSTABLE
+  if (debug_conversions)
+#endif
+  {
+    static int warnings = 0;
+
+    if (_babl_legal_error() <= 0.0000000001)
+      return;
+
+    if (warnings++ == 0)
+      fprintf (stderr,
+"Missing fast-path babl conversion detected, Implementing missing babl fast paths\n"
+"accelerates GEGL, GIMP and other software using babl, warnings are printed on\n"
+"first occurance of formats used where a conversion has to be synthesized\n"
+"programmatically by babl based on format description\n"
+"\n");
+
+    fprintf (stderr, "*WARNING* missing babl fast path(s): \"%s\" to \"%s\"\n",
+       babl_get_name (source),
+       babl_get_name (destination));
+
+  }
+}
+
+
+static Babl *
+babl_fish_path2 (const Babl *source,
+                 const Babl *destination,
+                 double      tolerance)
+{
+  Babl *babl = NULL;
+  const Babl *sRGB = babl_space ("sRGB");
+  char name[BABL_MAX_NAME_LEN];
+  int is_fast = 0;
+  static int debug_missing = -1;
+  if (debug_missing < 0)
+  {
+     const char *val = getenv ("BABL_DEBUG_MISSING");
+     if (val && strcmp (val, "0"))
+       debug_missing = 1;
+     else
+       debug_missing = 0;
+  }
+
+  _babl_fish_create_name (name, source, destination, 1);
+  babl_mutex_lock (babl_format_mutex);
+  babl = babl_db_exist_by_name (babl_fish_db (), name);
+
+  if (tolerance <= 0.0)
+  {
+    is_fast = 0;
+    tolerance = _babl_legal_error ();
+  }
+  else
+    is_fast = 1;
+
+  if (!is_fast)
+  {
+  if (babl)
+    {
+      /* There is an instance already registered by the required name,
+       * returning the preexistent one instead.
+       */
+      babl_mutex_unlock (babl_format_mutex);
+      return babl;
+    }
+  }
+
+  if ((source->format.space != sRGB) ||
+      (destination->format.space != sRGB))
+  {
+    static const Babl *run_once[512]={NULL};
+    int i;
+    int done = 0;
+
+    for (i = 0; run_once[i]; i++)
+    {
+      if (run_once[i] == source->format.space)
+        done |= 1;
+      else if (run_once[i] == destination->format.space)
+        done |= 2;
+    }
+
+    /* source space not in initialization array */
+    if ((done & 1) == 0 && (source->format.space != sRGB))
+    {
+      run_once[i++] = source->format.space;
+      babl_conversion_class_for_each (alias_conversion, (void*)source->format.space);
+
+      _babl_space_add_universal_rgb (source->format.space);
+    }
+
+    /* destination space not in initialization array */
+    if ((done & 2) == 0 && (destination->format.space != source->format.space) && (destination->format.space 
!= sRGB))
+    {
+      run_once[i++] = destination->format.space;
+      babl_conversion_class_for_each (alias_conversion, (void*)destination->format.space);
+
+      _babl_space_add_universal_rgb (destination->format.space);
+    }
+
+    if (!done && 0)
+    {
+      babl_conversion_class_for_each (show_item, (void*)source->format.space);
+    }
+  }
+
+  babl = babl_calloc (1, sizeof (BablFishPath) +
+                      strlen (name) + 1);
+  babl_set_destructor (babl, _babl_fish_path_destroy);
+
+  babl->class_type                = BABL_FISH_PATH;
+  babl->instance.id               = babl_fish_get_id (source, destination);
+  babl->instance.name             = ((char *) babl) + sizeof (BablFishPath);
+  strcpy (babl->instance.name, name);
+  babl->fish.source               = source;
+  babl->fish.destination          = destination;
+  babl->fish.pixels               = 0;
+  babl->fish.error                = BABL_MAX_COST_VALUE;
+  babl->fish_path.cost            = BABL_MAX_COST_VALUE;
+  babl->fish_path.conversion_list = babl_list_init_with_size (BABL_HARD_MAX_PATH_LENGTH);
 
 
 
@@ -656,14 +989,17 @@ babl_fish_path2 (const Babl *source,
     }
 
   _babl_fish_prepare_bpp (babl);
+  {
+  int source_bpp = babl->fish_path.source_bpp;
+  int dest_bpp = babl->fish_path.dest_bpp;
   if (//source->format.space != destination->format.space &&
      (
-        (babl->fish_path.source_bpp == 4 && babl->fish_path.dest_bpp == 16)
-      ||(babl->fish_path.source_bpp == 4 && babl->fish_path.dest_bpp == 4)
-      ||(babl->fish_path.source_bpp == 3 && babl->fish_path.dest_bpp == 4)
-      ||(babl->fish_path.source_bpp == 2 && babl->fish_path.dest_bpp == 4)
-      ||(babl->fish_path.source_bpp == 1 && babl->fish_path.dest_bpp == 4)
-      ||(babl->fish_path.source_bpp == 3 && babl->fish_path.dest_bpp == 3)
+        (source_bpp == 4 && dest_bpp == 16)
+      ||(source_bpp == 4 && dest_bpp == 4)
+      ||(source_bpp == 3 && dest_bpp == 4)
+      ||(source_bpp == 2 && dest_bpp == 4)
+      ||(source_bpp == 1 && dest_bpp == 4)
+      ||(source_bpp == 3 && dest_bpp == 3)
       )
      )
   {
@@ -672,9 +1008,28 @@ babl_fish_path2 (const Babl *source,
      // exact data. Thus this is valid for instance for "YA half"
 
      if ((babl->conversion.source->format.type[0]->bits < 32 &&
-          (babl->fish_path.source_bpp < 4 
+          (source_bpp < 4 
          || (source->format.model->flags & BABL_MODEL_FLAG_ASSOCIATED)==0)))
-       babl->fish_path.is_u8_color_conv = 1;
+     {
+       static int measured_timings = 0;
+       if (!measured_timings) measure_timings ();
+       measured_timings = 1;
+       fprintf (stderr, "%sLUT for %s to %s   %i %f\n",
+
+       (lut_timing_for (source_bpp, dest_bpp) * 12 <
+                           babl->fish_path.cost)?"":"no ",
+
+                        babl_get_name (babl->conversion.source),
+                        babl_get_name (babl->conversion.destination),
+                        lut_timing_for (source_bpp, dest_bpp) * 12,
+                        babl->fish_path.cost);
+       if (lut_timing_for (source_bpp, dest_bpp) * 12 <
+                           babl->fish_path.cost)
+       {
+         babl->fish_path.is_u8_color_conv = 1;
+       }
+     }
+  }
   }
 
   _babl_fish_rig_dispatch (babl);
@@ -721,283 +1076,6 @@ babl_fish_path (const Babl *source,
   return babl_fish_path2 (source, destination, 0.0);
 }
 
-typedef struct GcContext {
-   long time;
-} GcContext;
-
-static int gc_fishes (Babl *babl, void *userdata)
-{
-  GcContext *context = userdata;
-  if (babl->class_type == BABL_FISH_PATH)
-  {
-    if (babl->fish_path.u8_lut)
-    {
-      if (context->time - babl->fish_path.last_lut_use >
-          1000 * 1000 * 60 * 5)
-      {
-        void *lut =babl->fish_path.u8_lut;
-        BABL(babl)->fish_path.u8_lut = NULL;
-        free (lut);
-#if 0
-        fprintf (stderr, "freeing LUT %s to %s unused for >5 minutes\n",
-                        babl_get_name (babl->conversion.source),
-                        babl_get_name (babl->conversion.destination));
-#endif
-      }
-    }
-  }
-  return 0;
-}
-                           
-static void
-babl_gc_fishes (void)
-{
-  GcContext context;
-  context.time = babl_ticks ();
-  babl_fish_class_for_each (gc_fishes, &context);
-  //malloc_trim (0); 
-  //  is responsibility of higher layers
-}
-
-#define BABL_LIKELY(x)      __builtin_expect(!!(x), 1)
-#define BABL_UNLIKELY(x)    __builtin_expect(!!(x), 0)
-
-static int babl_fish_lut_process_maybe (const Babl *babl,
-                                        const char *source,
-                                        const char *destination,
-                                        long        n,
-                                        void       *data)
-{
-     int source_bpp = babl->fish_path.source_bpp;
-     int dest_bpp = babl->fish_path.dest_bpp;
-     uint32_t *lut = (uint32_t*)babl->fish_path.u8_lut;
-     BABL(babl)->fish.pixels += n;
-
-     if (BABL_UNLIKELY(!lut && babl->fish.pixels >= 128 * 256))
-     {
-#if 0
-       fprintf (stderr, "building LUT for %s to %s\n",
-                        babl_get_name (babl->conversion.source),
-                        babl_get_name (babl->conversion.destination));
-#endif
-       if (source_bpp ==4 && dest_bpp == 4)
-       {
-         lut = malloc (256 * 256 * 256 * 4);
-         for (int o = 0; o < 256 * 256 * 256; o++)
-           lut[o] = o;
-         process_conversion_path (babl->fish_path.conversion_list,
-                                  lut, 4,
-                                  lut, 4,
-                                  256*256*256);
-         for (int o = 0; o < 256 * 256 * 256; o++)
-           lut[o] = lut[o] & 0x00ffffff;
-       }
-       else if (source_bpp == 4 && dest_bpp == 16)
-       {
-         uint32_t *temp_lut = malloc (256 * 256 * 256 * 4);
-         lut = malloc (256 * 256 * 256 * 16);
-         for (int o = 0; o < 256 * 256 * 256; o++)
-           temp_lut[o] = o;
-         process_conversion_path (babl->fish_path.conversion_list,
-                                  temp_lut, 4,
-                                  lut, 16,
-                                  256*256*256);
-         free (temp_lut);
-       }
-       else if (source_bpp == 3 && dest_bpp == 3)
-       {
-         lut = malloc (256 * 256 * 256 * 4);
-         uint8_t *temp_lut = malloc (256 * 256 * 256 * 3);
-         uint8_t *temp_lut2 = malloc (256 * 256 * 256 * 3);
-         int o = 0;
-         for (int r = 0; r < 256; r++)
-         for (int g = 0; g < 256; g++)
-         for (int b = 0; b < 256; b++, o++)
-         {
-           temp_lut[o*3+0]=r;
-           temp_lut[o*3+1]=g;
-           temp_lut[o*3+2]=b;
-         }
-         process_conversion_path (babl->fish_path.conversion_list,
-                                  temp_lut, 3,
-                                  temp_lut2, 3,
-                                  256*256*256);
-         babl_process (babl_fish (babl_format ("R'G'B' u8"), babl_format ("R'G'B'A u8")),
-                       temp_lut2, lut, 256*256*256);
-         for (int o = 0; o < 256 * 256 * 256; o++)
-           lut[o] = lut[o] & 0x00ffffff;
-         free (temp_lut);
-         free (temp_lut2);
-       }
-       else if (source_bpp == 3 && dest_bpp == 4)
-       {
-         lut = malloc (256 * 256 * 256 * 4);
-         uint8_t *temp_lut = malloc (256 * 256 * 256 * 3);
-         int o = 0;
-         for (int r = 0; r < 256; r++)
-         for (int g = 0; g < 256; g++)
-         for (int b = 0; b < 256; b++, o++)
-         {
-           temp_lut[o*3+0]=r;
-           temp_lut[o*3+1]=g;
-           temp_lut[o*3+2]=b;
-         }
-         process_conversion_path (babl->fish_path.conversion_list,
-                                  temp_lut, 3,
-                                  lut, 4,
-                                  256*256*256);
-         for (int o = 0; o < 256 * 256 * 256; o++)
-           lut[o] = lut[o] & 0x00ffffff;
-         free (temp_lut);
-       }
-       else if (source_bpp == 2 && dest_bpp == 4)
-       {
-         lut = malloc (256 * 256 * 4);
-         uint16_t *temp_lut = malloc (256 * 256 * 2);
-         for (int o = 0; o < 256*256; o++)
-         {
-           temp_lut[o]=o;
-         }
-         process_conversion_path (babl->fish_path.conversion_list,
-                                  temp_lut, 2,
-                                  lut, 4,
-                                  256*256);
-         for (int o = 0; o < 256 * 256; o++)
-           lut[o] = lut[o] & 0x00ffffff;
-         free (temp_lut);
-       }
-       else if (source_bpp == 1 && dest_bpp == 4)
-       {
-         lut = malloc (256 * 4);
-         uint8_t *temp_lut = malloc (256);
-         for (int o = 0; o < 256; o++)
-         {
-           temp_lut[o]=o;
-         }
-         process_conversion_path (babl->fish_path.conversion_list,
-                                  temp_lut, 1,
-                                  lut, 4,
-                                  256);
-         for (int o = 0; o < 256; o++)
-           lut[o] = lut[o] & 0x00ffffff;
-         free (temp_lut);
-       }
-
-       if (babl->fish_path.u8_lut == NULL)
-       {
-         (BABL(babl)->fish_path.u8_lut) = lut;
-         // XXX need memory barrier?
-         if ((BABL(babl)->fish_path.u8_lut) != lut)
-         {
-           free (lut);
-           lut = babl->fish_path.u8_lut;
-         }
-       }
-       else
-       {
-         free (lut);
-         lut = babl->fish_path.u8_lut;
-       }
-     }
-     if (lut)
-     {
-        if (source_bpp == 4 && dest_bpp == 16)
-        {
-          uint32_t *src = (uint32_t*)source;
-          uint32_t *dst = (uint32_t*)destination;
-          lut = (uint32_t*)babl->fish_path.u8_lut;
-          BABL(babl)->fish_path.last_lut_use = babl_ticks ();
-          while (n--)
-          {
-             uint32_t col = *src++;
-             uint32_t lut_offset = col & 0xffffff;
-             float alpha = (col>>24)/255.0;
-
-             *dst++ = lut[lut_offset*4+0];
-             *dst++ = lut[lut_offset*4+1];
-             *dst++ = lut[lut_offset*4+2];
-             ((float*)(dst))[0] = alpha;
-             dst++;
-          }
-          return 1;
-        }
-        if (source_bpp == 4 && dest_bpp == 4)
-        {
-          uint32_t *src = (uint32_t*)source;
-          uint32_t *dst = (uint32_t*)destination;
-          lut = (uint32_t*)babl->fish_path.u8_lut;
-          BABL(babl)->fish_path.last_lut_use = babl_ticks ();
-          while (n--)
-          {
-             uint32_t col = *src++;
-             *dst = col & 0xff000000;
-             *dst |= lut[col & 0xffffff];
-             dst++;
-          }
-          return 1;
-        }
-        if (source_bpp == 2 && dest_bpp == 4)
-        {
-          uint16_t *src = (uint16_t*)source;
-          uint32_t *dst = (uint32_t*)destination;
-          lut = (uint32_t*)babl->fish_path.u8_lut;
-          BABL(babl)->fish_path.last_lut_use = babl_ticks ();
-          while (n--)
-          {
-             *dst = lut[*src++];
-             dst++;
-          }
-          return 1;
-        }
-        if (source_bpp == 1 && dest_bpp == 4)
-        {
-          uint8_t *src = (uint8_t*)source;
-          uint32_t *dst = (uint32_t*)destination;
-          lut = (uint32_t*)babl->fish_path.u8_lut;
-          BABL(babl)->fish_path.last_lut_use = babl_ticks ();
-          while (n--)
-          {
-             *dst = lut[*src++];
-             dst++;
-          }
-          return 1;
-        }
-        else if (source_bpp == 3 && dest_bpp == 3)
-        {
-          uint8_t *src = (uint8_t*)source;
-          uint8_t *dst = (uint8_t*)destination;
-          lut = (uint32_t*)babl->fish_path.u8_lut;
-          BABL(babl)->fish_path.last_lut_use = babl_ticks ();
-          while (n--)
-          {
-             uint32_t col = src[0]*256*256+src[1]*256+src[2];
-             uint32_t val = lut[col];
-             dst[2]=(val >> 16) & 0xff;
-             dst[1]=(val >> 8) & 0xff;
-             dst[0]=val & 0xff;
-             dst+=3;
-             src+=3;
-          }
-          return 1;
-        }
-        else if (source_bpp == 3 && dest_bpp == 4)
-        {
-          uint8_t *src = (uint8_t*)source;
-          uint32_t *dst = (uint32_t*)destination;
-          lut = (uint32_t*)babl->fish_path.u8_lut;
-          BABL(babl)->fish_path.last_lut_use = babl_ticks ();
-          while (n--)
-          {
-             uint32_t col = src[0]*256*256+src[1]*256+src[2];
-             *dst = lut[col];
-             dst++;
-             src+=3;
-          }
-          return 1;
-        }
-     }
-     return 0;
-}
 
 static void
 babl_fish_path_process (const Babl *babl,


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]