[babl] Meson build: Improve SIMD assembly checking/use
- From: Øyvind Kolås <ok src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [babl] Meson build: Improve SIMD assembly checking/use
- Date: Sat, 19 May 2018 11:35:39 +0000 (UTC)
commit cbe30f4f8d9db8b9b68f4a8800e2d23bb2ff9f7b
Author: John Marshall <jtm home gmail com>
Date: Sat May 19 11:01:46 2018 +0100
Meson build: Improve SIMD assembly checking/use
extensions/meson.build | 93 +++++++++++++++++++++++++++----------------
meson.build | 104 ++++++++++++++++++++++++++++-------------------
2 files changed, 121 insertions(+), 76 deletions(-)
---
diff --git a/extensions/meson.build b/extensions/meson.build
index afc960d..ceee490 100644
--- a/extensions/meson.build
+++ b/extensions/meson.build
@@ -1,39 +1,64 @@
-extension_names = [
- 'u16',
- 'u32',
- 'cairo',
- 'CIE',
- 'double',
- 'fast-float',
- 'half',
- 'float',
- 'gegl-fixups',
- 'gggl-lies',
- 'gggl-table-lies',
- 'gggl-table',
- 'gggl',
- 'gimp-8bit',
- 'grey',
- 'HCY',
- 'HSL',
- 'HSV',
- 'naive-CMYK',
- 'simple',
- 'sse-half',
- 'sse2-float',
- 'sse2-int16',
- 'sse2-int8',
- 'sse4-int8',
- 'two-table',
- 'ycbcr',
+no_cflags = []
+
+# Dependencies
+babl_ext_dep = [
+ math,
+ thread,
+]
+
+# Include directories
+babl_ext_inc = [
+ rootInclude,
+ bablInclude,
+]
+
+# Linker arguments
+babl_ext_link_args = [
+]
+if platform_win32
+ babl_ext_link_args += '-Wl,--no-undefined'
+endif
+
+
+extensions = [
+ ['u16', no_cflags],
+ ['u32', no_cflags],
+ ['cairo', no_cflags],
+ ['CIE', no_cflags],
+ ['double', no_cflags],
+ ['fast-float', no_cflags],
+ ['half', no_cflags],
+ ['float', no_cflags],
+ ['gegl-fixups', no_cflags],
+ ['gggl-lies', no_cflags],
+ ['gggl-table-lies', no_cflags],
+ ['gggl-table', no_cflags],
+ ['gggl', no_cflags],
+ ['gimp-8bit', no_cflags],
+ ['grey', no_cflags],
+ ['HCY', no_cflags],
+ ['HSL', no_cflags],
+ ['HSV', no_cflags],
+ ['naive-CMYK', no_cflags],
+ ['simple', no_cflags],
+ ['sse-half', [sse4_1_cflags, f16c_cflags]],
+ ['sse2-float', sse2_cflags],
+ ['sse2-int16', sse2_cflags],
+ ['sse2-int8', sse2_cflags],
+ ['sse4-int8', sse4_1_cflags],
+ ['two-table', sse2_cflags],
+ ['ycbcr', sse2_cflags],
]
-foreach extension_name : extension_names
- extension = library(extension_name,
- extension_name + '.c',
- include_directories: [ rootInclude, bablInclude, ],
- link_with: [ babl, ],
- dependencies: [ math, thread, ],
+foreach ext : extensions
+ library(
+ ext[0],
+ ext[0] + '.c',
+ c_args: ext[1],
+ include_directories: babl_ext_inc,
+ link_with: babl,
+ link_args: babl_ext_link_args,
+ dependencies: babl_ext_dep,
name_prefix: '',
install: true,
install_dir: join_paths(get_option('libdir'), lib_name),
diff --git a/meson.build b/meson.build
index 53bf9a1..8909092 100644
--- a/meson.build
+++ b/meson.build
@@ -136,50 +136,70 @@ endforeach
################################################################################
# Check for compiler CPU extensions
-have_tls_run = cc.run('int main() { static __thread char buf[1024]; return 0; }')
-conf.set('HAVE_TLS', ( have_tls_run.compiled() and have_tls_run.returncode() == 0 ))
-
-has_ssem = cc.has_argument('-mfpmath=sse')
-if has_ssem
- add_project_arguments('-mfpmath=sse',
- language: 'c')
-endif
-
-has_mmx = cc.has_argument('-mmmx') and get_option('enable-mmx')
-if has_mmx
- add_project_arguments( '-mmmx',
- language: 'c')
-endif
-
-has_sse = cc.has_argument('-msse') and get_option('enable-sse')
-if has_sse
- add_project_arguments( '-msse',
- language: 'c')
-endif
-
-has_sse2 = cc.has_argument('-msse2') and get_option('enable-sse2')
-if has_sse2
- add_project_arguments( '-msse2',
- language: 'c')
-endif
-
-has_sse3 = cc.has_argument('-msse3') and get_option('enable-sse3')
-if has_sse3
- add_project_arguments( '-msse3',
- language: 'c')
-endif
-
-has_sse41= cc.has_argument('-msse4.1') and get_option('enable-sse4_1')
-if has_sse41
- add_project_arguments( '-msse4.1',
- language: 'c')
+# mmx assembly
+if cc.has_argument('-mmmx') and get_option('enable-mmx')
+ if cc.compiles('asm ("movq 0, %mm0");')
+ message('mmx assembly available')
+ add_project_arguments('-mmmx', language: 'c')
+ conf.set('USE_MMX', 1, description:
+ 'Define to 1 if MMX assembly is available.')
+
+ # sse assembly
+ if cc.has_argument('-msse') and get_option('enable-sse')
+ if cc.compiles('asm ("movntps %xmm0, 0");')
+ add_project_arguments('-msse', language: 'c')
+ message('sse assembly available')
+ conf.set('USE_SSE', 1, description:
+ 'Define to 1 if SSE assembly is available.')
+ sse_args = ['-mfpmath=sse']
+ if platform_win32
+ sse_args += '-mstackrealign'
+ endif
+
+ foreach sse_arg : sse_args
+ if cc.has_argument(sse_arg)
+ add_project_arguments(sse_arg, language: 'c')
+ endif
+ endforeach
+
+ # sse2 assembly
+ if cc.has_argument('-msse2') and get_option('enable-sse2')
+ if cc.compiles('asm ("punpckhwd %xmm0,%xmm1");')
+ message('sse2 assembly available')
+ sse2_cflags = '-msse2'
+ conf.set('USE_SSE2', 1, description:
+ 'Define to 1 if sse2 assembly is available.')
+
+ # sse4.1 assembly
+ if cc.has_argument('-msse4.1') and get_option('enable-sse4_1')
+ if cc.compiles('asm ("pmovzxbd %xmm0,%xmm1");')
+ message('sse4.1 assembly available')
+ sse4_1_cflags = '-msse4.1'
+ conf.set('USE_SSE4_1', 1, description:
+ 'Define to 1 if sse4.1 assembly is available.')
+ endif
+ endif
+ endif
+ endif
+ endif
+ if cc.has_argument('-mf16c') and get_option('enable-f16c')
+ if cc.compiles(
+ 'asm ("#include <immintrin.h>],' +
+ '[__m128 val = _mm_cvtph_ps ((__m128i)_mm_setzero_ps());' +
+ ' __m128i val2 = _mm_insert_epi64((__m128i)_mm_setzero_ps(),0,0);");'
+ )
+ message('Can compile half-floating point code (f16c)')
+ f16c_cflags = '-mf16c'
+ conf.set('USE_F16C', 1, description:
+ 'Define to 1 if f16c intrinsics are available.')
+ endif
+ endif
+ endif
+ endif
endif
-had_f16c= cc.has_argument('-mf16c') and get_option('enable-f16c')
-if had_f16c
- add_project_arguments( '-mf16c',
- language: 'c')
-endif
+have_tls_run = cc.run('int main() { static __thread char buf[1024]; return 0; }')
+conf.set('HAVE_TLS', ( have_tls_run.compiled() and have_tls_run.returncode() == 0 ))
have_dlfcn_h = cc.has_header('dlfcn.h')
have_dl_h = cc.has_header('dl.h')
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]