[babl] babl: also do auto LUTs for 3-3 bpp and 3-4bpp paths



commit 3895c7d240edf918bed0558b0ae2f0058fce2a71
Author: Øyvind Kolås <pippin gimp org>
Date:   Mon Jan 24 11:08:28 2022 +0100

    babl: also do auto LUTs for 3-3 bpp and 3-4bpp paths

 babl/babl-fish-path.c   | 144 ++++++++++++++++++++++++++++++++++++++----------
 babl/babl-fish.h        |   2 +-
 tools/babl-benchmark.c  |   4 +-
 tools/babl-lut-verify.c |  59 ++++++++++++++++++++
 4 files changed, 178 insertions(+), 31 deletions(-)
---
diff --git a/babl/babl-fish-path.c b/babl/babl-fish-path.c
index 4d5679e1a..f93e010ae 100644
--- a/babl/babl-fish-path.c
+++ b/babl/babl-fish-path.c
@@ -660,6 +660,8 @@ babl_fish_path2 (const Babl *source,
      (source->format.model->flags & BABL_MODEL_FLAG_ASSOCIATED)==0  &&
      (
       (babl->fish_path.source_bpp == 4 && babl->fish_path.dest_bpp == 4)
+      ||(babl->fish_path.source_bpp == 3 && babl->fish_path.dest_bpp == 4)
+      ||(babl->fish_path.source_bpp == 3 && babl->fish_path.dest_bpp == 3)
       // XXX 16bit code paths not enabled yet.
       //
       //|| (babl->fish_path.source_bpp == 8 && babl->fish_path.dest_bpp == 4)
@@ -756,17 +758,21 @@ babl_gc_fishes (void)
   //  is responsibility of higher layers
 }
 
+#define BABL_LIKELY(x)      __builtin_expect(!!(x), 1)
+#define BABL_UNLIKELY(x)    __builtin_expect(!!(x), 0)
+
 static int babl_fish_lut_process_maybe (const Babl *babl,
                                         const char *source,
                                         const char *destination,
                                         long        n,
                                         void       *data)
 {
+     int source_bpp = babl->fish_path.source_bpp;
+     int dest_bpp = babl->fish_path.dest_bpp;
      uint32_t *lut = (uint32_t*)babl->fish_path.u8_lut;
-     ((Babl*)babl)->fish.pixels += n;
-
+     BABL(babl)->fish.pixels += n;
 
-     if (!lut && babl->fish.pixels > 256 * 128)
+     if (BABL_UNLIKELY(!lut && babl->fish.pixels >= 128 * 256))
      {
 #if 0
        fprintf (stderr, "building LUT for %s to %s\n",
@@ -774,44 +780,128 @@ static int babl_fish_lut_process_maybe (const Babl *babl,
                         babl_get_name (babl->conversion.destination));
 #endif
        lut = malloc (256 * 256 * 256 * 4);
+       if (source_bpp ==4)
+       {
+         for (int o = 0; o < 256 * 256 * 256; o++)
+           lut[o] = o;
+         process_conversion_path (babl->fish_path.conversion_list,
+                                  lut, 4,
+                                  lut, 4,
+                                  256*256*256);
+         for (int o = 0; o < 256 * 256 * 256; o++)
+           lut[o] = lut[o] & 0x00ffffff;
+       }
+       else if (source_bpp = 3 && dest_bpp == 3)
        {
-       for (int o = 0; o < 256 * 256 * 256; o++)
-         lut[o] = o;
-       process_conversion_path (babl->fish_path.conversion_list,
-                                lut,
-                                babl->fish_path.source_bpp,
-                                lut,
-                                babl->fish_path.dest_bpp,
-                                256*256*256);
+         uint8_t *temp_lut = malloc (256 * 256 * 256 * 3);
+         uint8_t *temp_lut2 = malloc (256 * 256 * 256 * 3);
+         int o = 0;
+         for (int r = 0; r < 256; r++)
+         for (int g = 0; g < 256; g++)
+         for (int b = 0; b < 256; b++, o++)
+         {
+           temp_lut[o*3+0]=r;
+           temp_lut[o*3+1]=g;
+           temp_lut[o*3+2]=b;
+         }
+         process_conversion_path (babl->fish_path.conversion_list,
+                                  temp_lut, 3,
+                                  temp_lut2, 3,
+                                  256*256*256);
+         babl_process (babl_fish (babl_format ("R'G'B' u8"), babl_format ("R'G'B'A u8")),
+                       temp_lut2, lut, 256*256*256);
+         for (int o = 0; o < 256 * 256 * 256; o++)
+           lut[o] = lut[o] & 0x00ffffff;
+         free (temp_lut);
+         free (temp_lut2);
        }
+       else if (source_bpp = 3 && dest_bpp == 4)
+       {
+         uint8_t *temp_lut = malloc (256 * 256 * 256 * 3);
+         int o = 0;
+         for (int r = 0; r < 256; r++)
+         for (int g = 0; g < 256; g++)
+         for (int b = 0; b < 256; b++, o++)
+         {
+           temp_lut[o*3+0]=r;
+           temp_lut[o*3+1]=g;
+           temp_lut[o*3+2]=b;
+         }
+         process_conversion_path (babl->fish_path.conversion_list,
+                                  temp_lut, 3,
+                                  lut, 4,
+                                  256*256*256);
+         for (int o = 0; o < 256 * 256 * 256; o++)
+           lut[o] = lut[o] & 0x00ffffff;
+         free (temp_lut);
+       }
+
        if (babl->fish_path.u8_lut == NULL)
        {
-         (((Babl*)babl)->fish_path.u8_lut) = (uint8_t*)lut;
+         (BABL(babl)->fish_path.u8_lut) = lut;
          // XXX need memory barrier?
-         if ((((Babl*)babl)->fish_path.u8_lut) != (uint8_t*)lut)
+         if ((BABL(babl)->fish_path.u8_lut) != lut)
          {
            free (lut);
-           lut = (uint32_t*)babl->fish_path.u8_lut;
+           lut = babl->fish_path.u8_lut;
          }
        }
        else
        {
          free (lut);
-         lut = (uint32_t*)babl->fish_path.u8_lut;
+         lut = babl->fish_path.u8_lut;
        }
      }
      if (lut)
      {
-        uint32_t *src = (uint32_t*)source;
-        uint32_t *dst = (uint32_t*)destination;
-        lut = (uint32_t*)babl->fish_path.u8_lut;
-        while (n--)
+        if (source_bpp == 4 && dest_bpp == 4)
         {
-           uint32_t col = *src++;
-           *dst++ = lut[col & 0xffffff] | (col & 0xff000000);
+          uint32_t *src = (uint32_t*)source;
+          uint32_t *dst = (uint32_t*)destination;
+          lut = (uint32_t*)babl->fish_path.u8_lut;
+          BABL(babl)->fish_path.last_lut_use = babl_ticks ();
+          while (n--)
+          {
+             uint32_t col = *src++;
+             *dst = col & 0xff000000;
+             *dst |= lut[col & 0xffffff];
+             dst++;
+          }
+          return 1;
+        }
+        else if (source_bpp == 3 && dest_bpp == 3)
+        {
+          uint8_t *src = (uint8_t*)source;
+          uint8_t *dst = (uint8_t*)destination;
+          lut = (uint32_t*)babl->fish_path.u8_lut;
+          BABL(babl)->fish_path.last_lut_use = babl_ticks ();
+          while (n--)
+          {
+             uint32_t col = src[0]*256*256+src[1]*256+src[2];
+             uint32_t val = lut[col];
+             dst[2]=(val >> 16) & 0xff;
+             dst[1]=(val >> 8) & 0xff;
+             dst[0]=val & 0xff;
+             dst+=3;
+             src+=3;
+          }
+          return 1;
+        }
+        else if (source_bpp == 3 && dest_bpp == 4)
+        {
+          uint8_t *src = (uint8_t*)source;
+          uint32_t *dst = (uint32_t*)destination;
+          lut = (uint32_t*)babl->fish_path.u8_lut;
+          BABL(babl)->fish_path.last_lut_use = babl_ticks ();
+          while (n--)
+          {
+             uint32_t col = src[0]*256*256+src[1]*256+src[2];
+             *dst = lut[col];
+             dst++;
+             src+=3;
+          }
+          return 1;
         }
-        BABL(babl)->fish_path.last_lut_use = babl_ticks ();
-        return 1;
      }
      return 0;
 }
@@ -826,17 +916,15 @@ babl_fish_path_process (const Babl *babl,
   if (babl->fish_path.is_u8_color_conv)
   {
      if (babl_fish_lut_process_maybe (babl,
-                                      source,
-                                      destination,
-                                      n,
+                                      source, destination, n,
                                       data))
-         return;
+       return;
   }
   else
   {
     static long conv_counter = 0;
     conv_counter+=n;
-    if (conv_counter > 1000 * 1000 * 10) // possibly run gc every 10 megapixels
+    if (conv_counter > 1000 * 1000 * 10) // run gc every 10 megapixels
     {
       babl_gc_fishes ();
       conv_counter = 0;
diff --git a/babl/babl-fish.h b/babl/babl-fish.h
index dfe07a3d7..0ad9101b1 100644
--- a/babl/babl-fish.h
+++ b/babl/babl-fish.h
@@ -70,7 +70,7 @@ typedef struct
   int        source_bpp;
   int        dest_bpp;
   unsigned int is_u8_color_conv:1; // keep track of count, and make 
-  uint8_t   *u8_lut;
+  uint32_t  *u8_lut;
   long       last_lut_use;
   BablList  *conversion_list;
 } BablFishPath;
diff --git a/tools/babl-benchmark.c b/tools/babl-benchmark.c
index f2a90f32c..895cfc2d6 100644
--- a/tools/babl-benchmark.c
+++ b/tools/babl-benchmark.c
@@ -25,8 +25,8 @@
 #define random  rand
 #endif
 
-int ITERATIONS = 20;
-#define  N_PIXELS (512*256)  // a too small batch makes the test set live
+int ITERATIONS = 5;
+#define  N_PIXELS (1024*1024)  // a too small batch makes the test set live
                                // in l2 cache skewing results
 
                                // we could also add a cache purger..
diff --git a/tools/babl-lut-verify.c b/tools/babl-lut-verify.c
index 5f7968252..65a6d00f6 100644
--- a/tools/babl-lut-verify.c
+++ b/tools/babl-lut-verify.c
@@ -61,6 +61,57 @@ test_u8_premul (void)
 }
 
 
+static double
+test_rgb (void)
+{
+  uint8_t *src = malloc (PIXELS*4);
+  uint8_t *dst = malloc (PIXELS*4);
+  uint8_t *dst2 = malloc (PIXELS*4);
+  double error = 0.0;
+
+  for (int i = 0; i < PIXELS; i++)
+    for (int c = 0; c < 4; c++)
+      src[i*4+c] = random();
+
+  babl_process (
+      babl_fish (
+          babl_format_with_space ("R'G'B' u8", babl_space("Apple")),
+          babl_format_with_space ("R'G'B' u8", babl_space("ProPhoto"))),
+      src, dst, PIXELS);
+  babl_process (
+      babl_fish (
+          babl_format_with_space ("R'G'B' u8", babl_space("Apple")),
+          babl_format_with_space ("R'G'B' u8", babl_space("ProPhoto"))),
+      src, dst2, PIXELS);
+  babl_process (
+      babl_fish (
+          babl_format_with_space ("R'G'B' u8", babl_space("Apple")),
+          babl_format_with_space ("R'G'B' u8", babl_space("ProPhoto"))),
+      src, dst2, PIXELS);
+  babl_process (
+      babl_fish (
+          babl_format_with_space ("R'G'B' u8", babl_space("Apple")),
+          babl_format_with_space ("R'G'B' u8", babl_space("ProPhoto"))),
+      src, dst2, PIXELS);
+
+  for (int i = 0; i < PIXELS; i++)
+  {
+    error += sqrt ((dst[i*3+0] - dst2[i*3+0])*
+                   (dst[i*3+0] - dst2[i*3+0])+
+                   (dst[i*3+1] - dst2[i*3+1])*
+                   (dst[i*3+1] - dst2[i*3+1])+
+                   (dst[i*3+2] - dst2[i*3+2])*
+                   (dst[i*3+2] - dst2[i*3+2]));
+  }
+
+  free (src);
+  free (dst);
+  free (dst2);
+
+  return error;
+}
+
+
 static double
 test_u8 (void)
 {
@@ -391,6 +442,14 @@ int main (int argc, char **argv)
   else
     fprintf (stdout, "OK\n");
 
+  fprintf (stdout, "R'G'B u8 ");
+  error = test_rgb ();
+  if (error != 0.0)
+    fprintf (stdout, "%.20f\n", error/(PIXELS*4));
+  else
+    fprintf (stdout, "OK\n");
+
+
   fprintf (stdout, "u8 premul ");
   error = test_u8_premul ();
   if (error != 0.0)


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]