[babl] Meson build: Improve SIMD assembly checking/use



commit cbe30f4f8d9db8b9b68f4a8800e2d23bb2ff9f7b
Author: John Marshall <jtm home gmail com>
Date:   Sat May 19 11:01:46 2018 +0100

    Meson build: Improve SIMD assembly checking/use

 extensions/meson.build |   93 +++++++++++++++++++++++++++----------------
 meson.build            |  104 ++++++++++++++++++++++++++++-------------------
 2 files changed, 121 insertions(+), 76 deletions(-)
---
diff --git a/extensions/meson.build b/extensions/meson.build
index afc960d..ceee490 100644
--- a/extensions/meson.build
+++ b/extensions/meson.build
@@ -1,39 +1,64 @@
-extension_names = [
-  'u16',
-  'u32',
-  'cairo',
-  'CIE',
-  'double',
-  'fast-float',
-  'half',
-  'float',
-  'gegl-fixups',
-  'gggl-lies',
-  'gggl-table-lies',
-  'gggl-table',
-  'gggl',
-  'gimp-8bit',
-  'grey',
-  'HCY',
-  'HSL',
-  'HSV',
-  'naive-CMYK',
-  'simple',
-  'sse-half',
-  'sse2-float',
-  'sse2-int16',
-  'sse2-int8',
-  'sse4-int8',
-  'two-table',
-  'ycbcr',
+no_cflags = []
+
+# Dependencies
+babl_ext_dep = [
+  math,
+  thread,
+]
+
+# Include directories
+babl_ext_inc = [
+  rootInclude,
+  bablInclude,
+]
+
+# Linker arguments
+babl_ext_link_args = [
+]
+if platform_win32
+  babl_ext_link_args += '-Wl,--no-undefined'
+endif
+
+
+extensions = [
+  ['u16', no_cflags],
+  ['u32', no_cflags],
+  ['cairo', no_cflags],
+  ['CIE', no_cflags],
+  ['double', no_cflags],
+  ['fast-float', no_cflags],
+  ['half', no_cflags],
+  ['float', no_cflags],
+  ['gegl-fixups', no_cflags],
+  ['gggl-lies', no_cflags],
+  ['gggl-table-lies', no_cflags],
+  ['gggl-table', no_cflags],
+  ['gggl', no_cflags],
+  ['gimp-8bit', no_cflags],
+  ['grey', no_cflags],
+  ['HCY', no_cflags],
+  ['HSL', no_cflags],
+  ['HSV', no_cflags],
+  ['naive-CMYK', no_cflags],
+  ['simple', no_cflags],
+  ['sse-half', [sse4_1_cflags, f16c_cflags]], 
+  ['sse2-float', sse2_cflags],
+  ['sse2-int16', sse2_cflags],
+  ['sse2-int8', sse2_cflags],
+  ['sse4-int8', sse4_1_cflags],
+  ['two-table', sse2_cflags],
+  ['ycbcr', sse2_cflags],
 ]
 
-foreach extension_name : extension_names
-  extension = library(extension_name,
-    extension_name + '.c',
-    include_directories: [ rootInclude, bablInclude, ],
-    link_with: [ babl, ],
-    dependencies: [ math, thread, ],
+foreach ext : extensions
+  library(
+    ext[0],
+    ext[0] + '.c',
+    c_args: ext[1],
+    include_directories: babl_ext_inc,
+    link_with: babl,
+    link_args: babl_ext_link_args,
+    dependencies: babl_ext_dep,
     name_prefix: '',
     install: true,
     install_dir: join_paths(get_option('libdir'), lib_name),
diff --git a/meson.build b/meson.build
index 53bf9a1..8909092 100644
--- a/meson.build
+++ b/meson.build
@@ -136,50 +136,70 @@ endforeach
 ################################################################################
 # Check for compiler CPU extensions
 
-have_tls_run = cc.run('int main() { static __thread char buf[1024]; return 0; }')
-conf.set('HAVE_TLS', ( have_tls_run.compiled() and have_tls_run.returncode() == 0 ))
-
-has_ssem = cc.has_argument('-mfpmath=sse')
-if has_ssem
-  add_project_arguments('-mfpmath=sse',
-    language: 'c')
-endif
-
-has_mmx  = cc.has_argument('-mmmx') and get_option('enable-mmx')
-if has_mmx
-  add_project_arguments(   '-mmmx',
-    language: 'c')
-endif
-
-has_sse  = cc.has_argument('-msse') and get_option('enable-sse')
-if has_sse
-  add_project_arguments(   '-msse',
-    language: 'c')
-endif
-
-has_sse2 = cc.has_argument('-msse2') and get_option('enable-sse2')
-if has_sse2
-  add_project_arguments(   '-msse2',
-    language: 'c')
-endif
-
-has_sse3 = cc.has_argument('-msse3') and get_option('enable-sse3')
-if has_sse3
-  add_project_arguments(   '-msse3',
-    language: 'c')
-endif
-
-has_sse41= cc.has_argument('-msse4.1') and get_option('enable-sse4_1')
-if has_sse41
-  add_project_arguments(   '-msse4.1',
-    language: 'c')
+# mmx assembly
+if cc.has_argument('-mmmx') and get_option('enable-mmx')
+  if cc.compiles('asm ("movq 0, %mm0");')
+    message('mmx assembly available')
+    add_project_arguments('-mmmx', language: 'c')
+    conf.set('USE_MMX', 1, description:
+      'Define to 1 if MMX assembly is available.')
+
+    # sse assembly
+    if cc.has_argument('-msse') and get_option('enable-sse') 
+      if cc.compiles('asm ("movntps %xmm0, 0");')
+        add_project_arguments('-msse', language: 'c')
+        message('sse assembly available')
+        conf.set('USE_SSE', 1, description:
+          'Define to 1 if SSE assembly is available.')
+        sse_args = ['-mfpmath=sse']
+        if platform_win32
+          sse_args += '-mstackrealign'
+        endif
+
+        foreach sse_arg : sse_args
+          if cc.has_argument(sse_arg)
+            add_project_arguments(sse_arg, language: 'c')
+          endif
+        endforeach
+
+        # sse2 assembly
+        if cc.has_argument('-msse2') and get_option('enable-sse2')
+          if cc.compiles('asm ("punpckhwd %xmm0,%xmm1");')
+            message('sse2 assembly available')
+            sse2_cflags = '-msse2'
+            conf.set('USE_SSE2', 1, description:
+              'Define to 1 if sse2 assembly is available.')
+
+            # sse4.1 assembly
+            if cc.has_argument('-msse4.1') and get_option('enable-sse4_1')
+              if cc.compiles('asm ("pmovzxbd %xmm0,%xmm1");')
+                message('sse4.1 assembly available')
+                sse4_1_cflags = '-msse4.1'
+                conf.set('USE_SSE4_1', 1, description:
+                  'Define to 1 if sse4.1 assembly is available.')
+              endif
+            endif
+          endif
+        endif
+      endif
+      if cc.has_argument('-mf16c') and get_option('enable-f16c')
+        if cc.compiles(
+          'asm ("#include <immintrin.h>],' +
+          '[__m128 val = _mm_cvtph_ps ((__m128i)_mm_setzero_ps());' +
+          ' __m128i val2 = _mm_insert_epi64((__m128i)_mm_setzero_ps(),0,0);");'
+          )
+          message('Can compile half-floating point code (f16c)')
+          f16c_cflags = '-mf16c'
+          conf.set('USE_F16C', 1, description:
+            'Define to 1 if f16c intrinsics are available.')
+        endif
+      endif
+    endif
+  endif
 endif
 
-had_f16c= cc.has_argument('-mf16c') and get_option('enable-f16c')
-if had_f16c
-  add_project_arguments(   '-mf16c',
-    language: 'c')
-endif
+have_tls_run = cc.run('int main() { static __thread char buf[1024]; return 0; }')
+conf.set('HAVE_TLS', ( have_tls_run.compiled() and have_tls_run.returncode() == 0 ))
 
 have_dlfcn_h = cc.has_header('dlfcn.h')
 have_dl_h    = cc.has_header('dl.h')


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]