[babl] Add SSE2 versions of "Y float" and "YA float" to "CIE L float"

From: Øyvind "pippin" Kolås <ok src gnome org>
To: commits-list gnome org
Cc:
Subject: [babl] Add SSE2 versions of "Y float" and "YA float" to "CIE L float"
Date: Tue, 17 Jul 2018 12:11:08 +0000 (UTC)

commit d2d45a19302e746de4363da7038e3614860bdd73
Author: Debarshi Ray <rishi is lostca se>
Date:   Tue Jul 17 12:09:31 2018 +0000

    Add SSE2 versions of "Y float" and "YA float" to "CIE L float"

 extensions/CIE.c | 92 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 91 insertions(+), 1 deletion(-)
---
diff --git a/extensions/CIE.c b/extensions/CIE.c
index 6d701ae..b96e124 100644
--- a/extensions/CIE.c
+++ b/extensions/CIE.c
@@ -1123,6 +1123,85 @@ lab_r_to_f_sse2 (__m128 r)
   return f;
 }
 
+static void
+Yf_to_Lf_sse2 (const Babl *conversion, const float *src, float *dst, long samples)
+{
+  long i = 0;
+  long remainder;
+
+  if (((uintptr_t) src % 16) + ((uintptr_t) dst % 16) == 0)
+    {
+      const long n = (samples / 4) * 4;
+
+      for ( ; i < n; i += 4)
+        {
+          __m128 Y = _mm_load_ps (src);
+
+          __m128 fy = lab_r_to_f_sse2 (Y);
+
+          __m128 L = _mm_sub_ps (_mm_mul_ps (_mm_set1_ps (116.0f), fy), _mm_set1_ps (16.0f));
+
+          _mm_store_ps (dst, L);
+
+          src += 4;
+          dst += 4;
+        }
+    }
+
+  remainder = samples - i;
+  while (remainder--)
+    {
+      float yr = src[0];
+      float L  = yr > LAB_EPSILON ? 116.0f * _cbrtf (yr) - 16 : LAB_KAPPA * yr;
+
+      dst[0] = L;
+
+      src++;
+      dst++;
+    }
+}
+
+static void
+Yaf_to_Lf_sse2 (const Babl *conversion, const float *src, float *dst, long samples)
+{
+  long i = 0;
+  long remainder;
+
+  if (((uintptr_t) src % 16) + ((uintptr_t) dst % 16) == 0)
+    {
+      const long n = (samples / 4) * 4;
+
+      for ( ; i < n; i += 4)
+        {
+          __m128 YaYa0 = _mm_load_ps (src);
+          __m128 YaYa1 = _mm_load_ps (src + 4);
+
+          __m128 Y = _mm_shuffle_ps (YaYa0, YaYa1, _MM_SHUFFLE (2, 0, 2, 0));
+
+          __m128 fy = lab_r_to_f_sse2 (Y);
+
+          __m128 L = _mm_sub_ps (_mm_mul_ps (_mm_set1_ps (116.0f), fy), _mm_set1_ps (16.0f));
+
+          _mm_store_ps (dst, L);
+
+          src += 8;
+          dst += 4;
+        }
+    }
+
+  remainder = samples - i;
+  while (remainder--)
+    {
+      float yr = src[0];
+      float L  = yr > LAB_EPSILON ? 116.0f * _cbrtf (yr) - 16 : LAB_KAPPA * yr;
+
+      dst[0] = L;
+
+      src += 2;
+      dst += 1;
+    }
+}
+
 static void
 rgbaf_to_Lf_sse2 (const Babl *conversion, const float *src, float *dst, long samples)
 {
@@ -1474,7 +1553,18 @@ conversions (void)
         "linear", rgbaf_to_Labaf_sse2,
         NULL
       );
-
+      babl_conversion_new (
+        babl_format ("Y float"),
+        babl_format ("CIE L float"),
+        "linear", Yf_to_Lf_sse2,
+        NULL
+      );
+      babl_conversion_new (
+        babl_format ("YA float"),
+        babl_format ("CIE L float"),
+        "linear", Yaf_to_Lf_sse2,
+        NULL
+      );
       babl_conversion_new (
         babl_format ("RGBA float"),
         babl_format ("CIE L float"),

[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]