[babl/wip/msvc: 4/6] build: Enable SSE4.1 on Visual Studio builds

commit ce7cd35b99a675434d2ade9a310ba93ae8159422
Author: Chun-wei Fan <fanchunwei src gnome org>
Date:   Tue Jan 21 18:01:05 2020 +0800

    build: Enable SSE4.1 on Visual Studio builds
    The supported Visual Studio versions all support building SSE4.1 code when
    building for x86 or x64, so enable it here as well.  However, we need to
    change the GCC intrinsics syntax to something that is more portable

 extensions/sse4-int8.c | 12 ++++++------
 meson.build            | 11 +++++++++++
 2 files changed, 17 insertions(+), 6 deletions(-)
diff --git a/extensions/sse4-int8.c b/extensions/sse4-int8.c
index d505fe511..d4386f69d 100644
--- a/extensions/sse4-int8.c
+++ b/extensions/sse4-int8.c
@@ -37,22 +37,22 @@ conv_y8_yF (const Babl    *conversion,
             long           samples)
   const float     factor = 1.0f / 255.0f;
-  const __v4sf    factor_vec = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
+  const __m128    factor_vec = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
   const uint32_t *s_vec;
-  __v4sf         *d_vec;
+  __m128         *d_vec;
   long n = samples;
   s_vec = (const uint32_t *)src;
-  d_vec = (__v4sf *)dst;
+  d_vec = (__m128 *)dst;
   while (n >= 4)
       __m128i in_val;
-      __v4sf out_val;
-      in_val = _mm_insert_epi32 ((__m128i)_mm_setzero_ps(), *s_vec++, 0);
+      __m128 out_val;
+      in_val = _mm_insert_epi32 (_mm_castps_si128(_mm_setzero_ps()), *s_vec++, 0);
       in_val = _mm_cvtepu8_epi32 (in_val);
-      out_val = _mm_cvtepi32_ps (in_val) * factor_vec;
+      out_val = _mm_mul_ps(_mm_cvtepi32_ps (in_val), factor_vec);
       _mm_storeu_ps ((float *)d_vec++, out_val);
       n -= 4;
diff --git a/meson.build b/meson.build
index 84028aee9..0fb394cfc 100644
--- a/meson.build
+++ b/meson.build
@@ -278,15 +278,26 @@ if cc.get_id() != 'msvc' and cc.has_argument('-mmmx') and get_option('enable-mmx
 if cc.get_id() == 'msvc' and have_x86
+  # mmx assembly
   if get_option('enable-mmx')
     conf.set('USE_MMX', 1, description:
       'Define to 1 if MMX assembly are available.')
+    # sse assembly
     if get_option('enable-sse')
       conf.set('USE_SSE', 1, description:
         'Define to 1 if SSE assembly are available.')
+      # sse2 assembly
       if get_option('enable-sse2')
         conf.set('USE_SSE2', 1, description:
           'Define to 1 if sse2 assembly are available.')
+        # sse4.1 assembly
+        if get_option('enable-sse4_1')
+          conf.set('USE_SSE4_1', 1, description:
+                   'Define to 1 if sse4.1 assembly is available.')
+        endif

[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]