[babl/wip/msvc: 4/6] build: Enable SSE4.1 on Visual Studio builds
- From: Chun-wei Fan <fanchunwei src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [babl/wip/msvc: 4/6] build: Enable SSE4.1 on Visual Studio builds
- Date: Tue, 21 Jan 2020 10:20:00 +0000 (UTC)
commit ce7cd35b99a675434d2ade9a310ba93ae8159422
Author: Chun-wei Fan <fanchunwei src gnome org>
Date: Tue Jan 21 18:01:05 2020 +0800
build: Enable SSE4.1 on Visual Studio builds
The supported Visual Studio versions all support building SSE4.1 code when
building for x86 or x64, so enable it here as well. However, we need to
change the GCC intrinsics syntax to something that is more portable
extensions/sse4-int8.c | 12 ++++++------
meson.build | 11 +++++++++++
2 files changed, 17 insertions(+), 6 deletions(-)
---
diff --git a/extensions/sse4-int8.c b/extensions/sse4-int8.c
index d505fe511..d4386f69d 100644
--- a/extensions/sse4-int8.c
+++ b/extensions/sse4-int8.c
@@ -37,22 +37,22 @@ conv_y8_yF (const Babl *conversion,
long samples)
{
const float factor = 1.0f / 255.0f;
- const __v4sf factor_vec = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
+ const __m128 factor_vec = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f};
const uint32_t *s_vec;
- __v4sf *d_vec;
+ __m128 *d_vec;
long n = samples;
s_vec = (const uint32_t *)src;
- d_vec = (__v4sf *)dst;
+ d_vec = (__m128 *)dst;
while (n >= 4)
{
__m128i in_val;
- __v4sf out_val;
- in_val = _mm_insert_epi32 ((__m128i)_mm_setzero_ps(), *s_vec++, 0);
+ __m128 out_val;
+ in_val = _mm_insert_epi32 (_mm_castps_si128(_mm_setzero_ps()), *s_vec++, 0);
in_val = _mm_cvtepu8_epi32 (in_val);
- out_val = _mm_cvtepi32_ps (in_val) * factor_vec;
+ out_val = _mm_mul_ps(_mm_cvtepi32_ps (in_val), factor_vec);
_mm_storeu_ps ((float *)d_vec++, out_val);
n -= 4;
}
diff --git a/meson.build b/meson.build
index 84028aee9..0fb394cfc 100644
--- a/meson.build
+++ b/meson.build
@@ -278,15 +278,26 @@ if cc.get_id() != 'msvc' and cc.has_argument('-mmmx') and get_option('enable-mmx
endif
if cc.get_id() == 'msvc' and have_x86
+ # mmx assembly
if get_option('enable-mmx')
conf.set('USE_MMX', 1, description:
'Define to 1 if MMX assembly are available.')
+
+ # sse assembly
if get_option('enable-sse')
conf.set('USE_SSE', 1, description:
'Define to 1 if SSE assembly are available.')
+
+ # sse2 assembly
if get_option('enable-sse2')
conf.set('USE_SSE2', 1, description:
'Define to 1 if sse2 assembly are available.')
+
+ # sse4.1 assembly
+ if get_option('enable-sse4_1')
+ conf.set('USE_SSE4_1', 1, description:
+ 'Define to 1 if sse4.1 assembly is available.')
+ endif
endif
endif
endif
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]