[babl] cairo: accelerate R'G'B'A u8 -> cairo-ARGB32 conversion



commit 6d890333f6337fe044cce3eed3e474e43cd7afc9
Author: Ell <ell_se yahoo com>
Date:   Sun Oct 21 05:18:36 2018 -0400

    cairo: accelerate R'G'B'A u8 -> cairo-ARGB32 conversion
    
    Accelerate the R'G'B'A u8 -> cairo-ARGB32 conversion by processing
    the entire RGBA tuple in parallel (or, on 32-bit machines, in two
    parts), instead of component-by-component, using wide integer
    arithmetic/logic.  This speeds the conversion up by ~25%.

 extensions/cairo.c | 36 ++++++++++++++++++++++++++++++------
 1 file changed, 30 insertions(+), 6 deletions(-)
---
diff --git a/extensions/cairo.c b/extensions/cairo.c
index 059b07d..9a96fc3 100644
--- a/extensions/cairo.c
+++ b/extensions/cairo.c
@@ -187,12 +187,34 @@ conv_rgba8_cairo32_le (const Babl *conversion,unsigned char *src, unsigned char
   uint32_t *dsti = (void*) dst;
   while (n--)
     {
-      unsigned char alpha  = src[3];
-#define div_255(a) ((((a)+128)+(((a)+128)>>8))>>8)
-      *dsti++ = (alpha << 24) +
-                (div_255 (src[0] * alpha) << 16) +
-                (div_255 (src[1] * alpha) << 8) +
-                (div_255 (src[2] * alpha));
+      unsigned char alpha = src[3];
+#if SIZE_MAX >= UINT64_MAX /* 64-bit */
+      uint64_t rbag = ((uint64_t) src[0] << 48) |
+                      ((uint64_t) src[2] << 32) |
+                      ((uint64_t) 255    << 16) |
+                      ((uint64_t) src[1] <<  0);
+      rbag *= alpha;
+      rbag += 0x0080008000800080;
+      rbag += (rbag >> 8) & 0x00ff00ff00ff00ff;
+      rbag &= 0xff00ff00ff00ff00;
+      *dsti++ = (uint32_t) (rbag >>  0) |
+                (uint32_t) (rbag >> 40);
+#else /* 32-bit */
+      uint32_t rb = ((uint32_t) src[0] << 16) |
+                    ((uint32_t) src[2] <<  0);
+      uint64_t ag = ((uint32_t) 255    << 16) |
+                    ((uint32_t) src[1] <<  0);
+      rb *= alpha;
+      ag *= alpha;
+      rb += 0x00800080;
+      ag += 0x00800080;
+      rb += (rb >> 8) & 0x00ff00ff;
+      ag += (ag >> 8) & 0x00ff00ff;
+      rb &= 0xff00ff00;
+      ag &= 0xff00ff00;
+      *dsti++ = (uint32_t) (ag >> 0) |
+                (uint32_t) (rb >> 8);
+#endif
       src+=4;
     }
 }
@@ -223,6 +245,8 @@ conv_yA8_cairo32_le (const Babl *conversion,unsigned char *src, unsigned char *d
   long n = samples;
   while (n--)
     {
+#define div_255(a) ((((a)+128)+(((a)+128)>>8))>>8)
+
       unsigned char gray   = *src++;
       unsigned char alpha  = *src++;
       unsigned char val = div_255 (gray * alpha);


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]