[babl/wip/msvc: 18/20] build: Enable SSE4.1 on Visual Studio builds



commit e9a3e18b157bca833db49cfce1b3b0f09cac80d0
Author: Chun-wei Fan <fanchunwei src gnome org>
Date:   Tue Jan 21 18:01:05 2020 +0800

    build: Enable SSE4.1 on Visual Studio builds
    
    The supported Visual Studio versions all support building SSE4.1 code when
    building for x86 or x64, so enable it here as well.  However, we need to
    change the GCC intrinsics syntax to something that is more portable

 extensions/sse4-int8.c | 12 ++++++------
 meson.build            | 11 +++++++++++
 2 files changed, 17 insertions(+), 6 deletions(-)
---
diff --git a/extensions/sse4-int8.c b/extensions/sse4-int8.c
index d505fe511..d4386f69d 100644
--- a/extensions/sse4-int8.c
+++ b/extensions/sse4-int8.c
@@ -37,22 +37,22 @@ conv_y8_yF (const Babl    *conversion,
             long           samples)
 {
   const float     factor = 1.0f / 255.0f;
-  const __v4sf    factor_vec = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
+  const __m128    factor_vec = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
   const uint32_t *s_vec;
-  __v4sf         *d_vec;
+  __m128         *d_vec;
 
   long n = samples;
 
   s_vec = (const uint32_t *)src;
-  d_vec = (__v4sf *)dst;
+  d_vec = (__m128 *)dst;
 
   while (n >= 4)
     {
       __m128i in_val;
-      __v4sf out_val;
-      in_val = _mm_insert_epi32 ((__m128i)_mm_setzero_ps(), *s_vec++, 0);
+      __m128 out_val;
+      in_val = _mm_insert_epi32 (_mm_castps_si128(_mm_setzero_ps()), *s_vec++, 0);
       in_val = _mm_cvtepu8_epi32 (in_val);
-      out_val = _mm_cvtepi32_ps (in_val) * factor_vec;
+      out_val = _mm_mul_ps(_mm_cvtepi32_ps (in_val), factor_vec);
       _mm_storeu_ps ((float *)d_vec++, out_val);
       n -= 4;
     }
diff --git a/meson.build b/meson.build
index e99cffba6..395ebefe7 100644
--- a/meson.build
+++ b/meson.build
@@ -279,15 +279,26 @@ if cc.get_id() != 'msvc' and cc.has_argument('-mmmx') and get_option('enable-mmx
 endif
 
 if cc.get_id() == 'msvc' and have_x86
+  # mmx assembly
   if get_option('enable-mmx')
     conf.set('USE_MMX', 1, description:
       'Define to 1 if MMX assembly are available.')
+
+    # sse assembly
     if get_option('enable-sse')
       conf.set('USE_SSE', 1, description:
         'Define to 1 if SSE assembly are available.')
+
+      # sse2 assembly
       if get_option('enable-sse2')
         conf.set('USE_SSE2', 1, description:
           'Define to 1 if sse2 assembly are available.')
+
+        # sse4.1 assembly
+        if get_option('enable-sse4_1')
+          conf.set('USE_SSE4_1', 1, description:
+                   'Define to 1 if sse4.1 assembly is available.')
+        endif
       endif
     endif
   endif


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]