[babl] configure.ac, meson, cpu-accel: add AVX2 detection



commit 385f0b545727262f58d3cfcf5523f69ace0e0166
Author: Ell <ell_se yahoo com>
Date:   Wed Jul 24 23:21:01 2019 +0300

    configure.ac, meson, cpu-accel: add AVX2 detection
    
    Detect AVX2 support during configuration and runtime, in
    preperation for next commit.

 babl/babl-cpuaccel.c | 17 ++++++++++++++++-
 babl/babl-cpuaccel.h |  1 +
 configure.ac         | 24 ++++++++++++++++++++++++
 meson.build          | 10 ++++++++++
 meson_options.txt    |  1 +
 5 files changed, 52 insertions(+), 1 deletion(-)
---
diff --git a/babl/babl-cpuaccel.c b/babl/babl-cpuaccel.c
index 534fa89..ef26fa5 100644
--- a/babl/babl-cpuaccel.c
+++ b/babl/babl-cpuaccel.c
@@ -121,11 +121,15 @@ enum
   ARCH_X86_INTEL_FEATURE_SSE4_2   = 1 << 20,
   ARCH_X86_INTEL_FEATURE_AVX      = 1 << 28,
   ARCH_X86_INTEL_FEATURE_F16C     = 1 << 29,
+
+  /* extended features */
+  ARCH_X86_INTEL_FEATURE_AVX2     = 1 << 5
 };
 
 #if !defined(ARCH_X86_64) && (defined(PIC) || defined(__PIC__))
 #define cpuid(op,eax,ebx,ecx,edx)  \
   __asm__ ("movl %%ebx, %%esi\n\t" \
+           "xor %%ecx, %%ecx\n\t"  \
            "cpuid\n\t"             \
            "xchgl %%ebx,%%esi"     \
            : "=a" (eax),           \
@@ -135,7 +139,8 @@ enum
            : "0" (op))
 #else
 #define cpuid(op,eax,ebx,ecx,edx)  \
-  __asm__ ("cpuid"                 \
+  __asm__ ("xor %%ecx, %%ecx\n\t"  \
+           "cpuid"                 \
            : "=a" (eax),           \
              "=b" (ebx),           \
              "=c" (ecx),           \
@@ -253,6 +258,16 @@ arch_accel_intel (void)
 
     if (ecx & ARCH_X86_INTEL_FEATURE_F16C)
       caps |= BABL_CPU_ACCEL_X86_F16C;
+
+    cpuid (0, eax, ebx, ecx, edx);
+
+    if (eax >= 7)
+      {
+        cpuid (7, eax, ebx, ecx, edx);
+
+        if (ebx & ARCH_X86_INTEL_FEATURE_AVX2)
+          caps |= BABL_CPU_ACCEL_X86_AVX2;
+      }
 #endif /* USE_SSE */
   }
 #endif /* USE_MMX */
diff --git a/babl/babl-cpuaccel.h b/babl/babl-cpuaccel.h
index 738bc59..b8a6855 100644
--- a/babl/babl-cpuaccel.h
+++ b/babl/babl-cpuaccel.h
@@ -35,6 +35,7 @@ typedef enum
   /* BABL_CPU_ACCEL_X86_SSE4_2  = 0x00200000, */
   /* BABL_CPU_ACCEL_X86_AVX     = 0x00080000, */
   BABL_CPU_ACCEL_X86_F16C    = 0x00040000,
+  BABL_CPU_ACCEL_X86_AVX2    = 0x00020000,
 
   /* powerpc accelerations */
   BABL_CPU_ACCEL_PPC_ALTIVEC = 0x04000000,
diff --git a/configure.ac b/configure.ac
index 7f53331..bb29428 100644
--- a/configure.ac
+++ b/configure.ac
@@ -353,6 +353,10 @@ AC_ARG_ENABLE(sse4_1,
   [  --enable-sse4_1         enable SSE4_1 support (default=auto)],,
   enable_sse4_1=$enable_sse)
 
+AC_ARG_ENABLE(avx2,
+  [  --enable-avx2           enable AVX2 support (default=auto)],,
+  enable_avx2=$enable_sse)
+
 AC_ARG_ENABLE(f16c,
   [  --enable-f16c           enable hardware half-float support (default=auto)],,
   enable_f16c=$enable_sse)
@@ -363,6 +367,7 @@ if test "x$enable_mmx" = xyes; then
   SSE2_EXTRA_CFLAGS=
   SSE3_EXTRA_CFLAGS=
   SSE4_1_EXTRA_CFLAGS=
+  AVX2_EXTRA_CFLAGS=
   F16C_EXTRA_CFLAGS=
 
   AC_MSG_CHECKING(whether we can compile MMX code)
@@ -448,6 +453,24 @@ if test "x$enable_mmx" = xyes; then
               AC_MSG_RESULT(no)
               AC_MSG_WARN([The assembler does not support the SSE4_1 command set.])
             )
+
+            if test "x$enable_avx2" = xyes; then
+              BABL_DETECT_CFLAGS(avx2_flag, '-mavx2')
+              AVX2_EXTRA_CFLAGS="$SSE4_1_EXTRA_CFLAGS $avx2_flag"
+
+              AC_MSG_CHECKING(whether we can compile AVX2 code)
+
+              CFLAGS="$CFLAGS $avx2_flag"
+
+              AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,[asm ("vpgatherdd %ymm0,(%rax,%ymm1,4),%ymm2");])],
+                AC_DEFINE(USE_AVX2, 1, [Define to 1 if AVX2 assembly is available.])
+                AC_MSG_RESULT(yes)
+              ,
+                enable_avx2=no
+                AC_MSG_RESULT(no)
+                AC_MSG_WARN([The assembler does not support the AVX2 command set.])
+              )
+            fi
           fi
         fi
       fi
@@ -486,6 +509,7 @@ if test "x$enable_mmx" = xyes; then
   AC_SUBST(SSE2_EXTRA_CFLAGS)
   AC_SUBST(SSE3_EXTRA_CFLAGS)
   AC_SUBST(SSE4_1_EXTRA_CFLAGS)
+  AC_SUBST(AVX2_EXTRA_CFLAGS)
   AC_SUBST(F16C_EXTRA_CFLAGS)
 fi
 
diff --git a/meson.build b/meson.build
index b17db52..c72688e 100644
--- a/meson.build
+++ b/meson.build
@@ -216,6 +216,16 @@ if cc.has_argument('-mmmx') and get_option('enable-mmx')
                 conf.set('USE_SSE4_1', 1, description:
                   'Define to 1 if sse4.1 assembly is available.')
               endif
+
+              # avx2 assembly
+              if cc.has_argument('-mavx2') and get_option('enable-avx2')
+                if cc.compiles('asm ("vpgatherdd %ymm0,(%rax,%ymm1,4),%ymm2");')
+                  message('avx2 assembly available')
+                  avx2_cflags = '-mavx2'
+                  conf.set('USE_AVX2', 1, description:
+                    'Define to 1 if avx2 assembly is available.')
+                endif
+              endif
             endif
           endif
         endif
diff --git a/meson_options.txt b/meson_options.txt
index ab08ce9..f4a7ced 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -3,6 +3,7 @@ option('enable-sse',    type: 'boolean', value: true, description: 'enable SSE s
 option('enable-sse2',   type: 'boolean', value: true, description: 'enable SSE2 support')
 option('enable-sse3',   type: 'boolean', value: true, description: 'enable SSE3 support')
 option('enable-sse4_1', type: 'boolean', value: true, description: 'enable SSE4.1 support')
+option('enable-avx2',   type: 'boolean', value: true, description: 'enable AVX2 support')
 option('enable-f16c',   type: 'boolean', value: true, description: 'enable hardware half-float support')
 option('with-docs',     type: 'boolean', value: true, description: 'build website')
 option('with-lcms',     type: 'boolean', value: true, description: 'build with lcms')


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]