[babl] babl: sync cpuaccel with GEGL



commit 1c151bd73fb3cd28719addfcafa3f92569fd0baa
Author: Øyvind Kolås <pippin gimp org>
Date:   Sat Jan 22 00:02:43 2022 +0100

    babl: sync cpuaccel with GEGL

 babl/babl-cpuaccel.c | 94 ++++++++++++++++++++++++++++++++++++++++++++--------
 babl/babl-cpuaccel.h | 48 ++++++++++++++++++++++-----
 2 files changed, 120 insertions(+), 22 deletions(-)
---
diff --git a/babl/babl-cpuaccel.c b/babl/babl-cpuaccel.c
index 6c1f1cc11..7d613d94f 100644
--- a/babl/babl-cpuaccel.c
+++ b/babl/babl-cpuaccel.c
@@ -14,11 +14,10 @@
  * You should have received a copy of the GNU Lesser General
  * Public License along with this library; if not, see
  * <https://www.gnu.org/licenses/>.
+ *
+ * (c) Manish Singh, Aaron Holtzman, Jan Heller, Ell, Øyvind Kolås
  */
 
-/*
- * x86 bits Copyright (C) Manish Singh <yosh gimp org>
- */
 
 /*
  * PPC CPU acceleration detection was taken from DirectFB but seems to be
@@ -78,7 +77,6 @@ babl_cpu_accel_set_use (gboolean use)
 
 #define HAVE_ACCEL 1
 
-
 typedef enum
 {
   ARCH_X86_VENDOR_NONE,
@@ -117,15 +115,26 @@ enum
 {
   ARCH_X86_INTEL_FEATURE_PNI      = 1 << 0,
   ARCH_X86_INTEL_FEATURE_SSSE3    = 1 << 9,
+  ARCH_X86_INTEL_FEATURE_FMA      = 1 << 12,
   ARCH_X86_INTEL_FEATURE_SSE4_1   = 1 << 19,
   ARCH_X86_INTEL_FEATURE_SSE4_2   = 1 << 20,
+  ARCH_X86_INTEL_FEATURE_MOVBE    = 1 << 22,
+  ARCH_X86_INTEL_FEATURE_POPCNT   = 1 << 23,
+  ARCH_X86_INTEL_FEATURE_XSAVE    = 1 << 26,
+  ARCH_X86_INTEL_FEATURE_OSXSAVE  = 1 << 27,
   ARCH_X86_INTEL_FEATURE_AVX      = 1 << 28,
   ARCH_X86_INTEL_FEATURE_F16C     = 1 << 29,
 
-  /* extended features */
-  ARCH_X86_INTEL_FEATURE_AVX2     = 1 << 5
+ // extended features
+
+  ARCH_X86_INTEL_FEATURE_BMI1     = 1 << 3,
+  ARCH_X86_INTEL_FEATURE_BMI2     = 1 << 8,
+  ARCH_X86_INTEL_FEATURE_AVX2     = 1 << 5,
 };
 
+
+/* x86 asm bit Copyright (C) Manish Singh <yosh gimp org>
+ */
 #if !defined(ARCH_X86_64) && (defined(PIC) || defined(__PIC__))
 #define cpuid(op,eax,ebx,ecx,edx)  \
   __asm__ ("movl %%ebx, %%esi\n\t" \
@@ -256,19 +265,43 @@ arch_accel_intel (void)
     if (ecx & ARCH_X86_INTEL_FEATURE_SSE4_1)
       caps |= BABL_CPU_ACCEL_X86_SSE4_1;
 
+    if (ecx & ARCH_X86_INTEL_FEATURE_SSE4_2)
+      caps |= BABL_CPU_ACCEL_X86_SSE4_2;
+
+    if (ecx & ARCH_X86_INTEL_FEATURE_AVX)
+      caps |= BABL_CPU_ACCEL_X86_AVX;
+
+    if (ecx & ARCH_X86_INTEL_FEATURE_POPCNT)
+      caps |= BABL_CPU_ACCEL_X86_POPCNT;
+
+    if (ecx & ARCH_X86_INTEL_FEATURE_XSAVE)
+      caps |= BABL_CPU_ACCEL_X86_XSAVE;
+
+    if (ecx & ARCH_X86_INTEL_FEATURE_OSXSAVE)
+      caps |= BABL_CPU_ACCEL_X86_OSXSAVE;
+
+    if (ecx & ARCH_X86_INTEL_FEATURE_FMA)
+      caps |= BABL_CPU_ACCEL_X86_FMA;
+
     if (ecx & ARCH_X86_INTEL_FEATURE_F16C)
       caps |= BABL_CPU_ACCEL_X86_F16C;
 
-    cpuid (0, eax, ebx, ecx, edx);
+    if (ecx & ARCH_X86_INTEL_FEATURE_MOVBE)
+      caps |= BABL_CPU_ACCEL_X86_MOVBE;
 
+    cpuid (0, eax, ebx, ecx, edx);
     if (eax >= 7)
-      {
-        cpuid (7, eax, ebx, ecx, edx);
-
-        if (ebx & ARCH_X86_INTEL_FEATURE_AVX2)
-          caps |= BABL_CPU_ACCEL_X86_AVX2;
-      }
+    {
+      cpuid (7, eax, ebx, ecx, edx);
+      if (ebx & ARCH_X86_INTEL_FEATURE_AVX2)
+        caps |= BABL_CPU_ACCEL_X86_AVX2;
+      if (ebx & ARCH_X86_INTEL_FEATURE_BMI1)
+        caps |= BABL_CPU_ACCEL_X86_BMI1;
+      if (ebx & ARCH_X86_INTEL_FEATURE_BMI2)
+        caps |= BABL_CPU_ACCEL_X86_BMI2;
+    }
 #endif /* USE_SSE */
+
   }
 #endif /* USE_MMX */
 
@@ -517,6 +550,41 @@ arch_accel (void)
 
 #endif /* ARCH_PPC && USE_ALTIVEC */
 
+#if defined(ARCH_ARM)
+
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <elf.h>
+
+#define HAVE_ACCEL 1
+
+static guint32
+arch_accel (void)
+{
+  /* TODO : add or hardcode the other ways it can be on arm, where
+   *        this info comes from the system and not from running cpu
+   *        instructions
+   */
+  int has_neon = 0;
+  int fd = open ("/proc/self/auxv", O_RDONLY);
+  Elf32_auxv_t auxv;
+  if (fd >= 0)
+  {
+    while (read (fd, &auxv, sizeof (Elf32_auxv_t)) == sizeof (Elf32_auxv_t))
+    {
+      if (auxv.a_type == AT_HWCAP)
+      {
+        if (auxv.a_un.a_val & 4096)
+          has_neon = 1;
+      }
+    }
+    close (fd);
+  }
+  return has_neon?GEGL_CPU_ACCEL_ARM_NEON:0;
+}
+
+#endif /* ARCH_ARM  */
 
 static BablCpuAccelFlags
 cpu_accel (void)
diff --git a/babl/babl-cpuaccel.h b/babl/babl-cpuaccel.h
index b8a685510..133d13844 100644
--- a/babl/babl-cpuaccel.h
+++ b/babl/babl-cpuaccel.h
@@ -24,25 +24,55 @@ typedef enum
   BABL_CPU_ACCEL_NONE        = 0x0,
 
   /* x86 accelerations */
-  BABL_CPU_ACCEL_X86_MMX     = 0x01000000,
+  BABL_CPU_ACCEL_X86_MMX     = 0x80000000,
   BABL_CPU_ACCEL_X86_3DNOW   = 0x40000000,
   BABL_CPU_ACCEL_X86_MMXEXT  = 0x20000000,
   BABL_CPU_ACCEL_X86_SSE     = 0x10000000,
   BABL_CPU_ACCEL_X86_SSE2    = 0x08000000,
-  BABL_CPU_ACCEL_X86_SSE3    = 0x02000000,
-  BABL_CPU_ACCEL_X86_SSSE3   = 0x00800000,
-  BABL_CPU_ACCEL_X86_SSE4_1  = 0x00400000,
-  /* BABL_CPU_ACCEL_X86_SSE4_2  = 0x00200000, */
-  /* BABL_CPU_ACCEL_X86_AVX     = 0x00080000, */
+  BABL_CPU_ACCEL_X86_SSE3    = 0x04000000,
+  BABL_CPU_ACCEL_X86_SSSE3   = 0x02000000,
+  BABL_CPU_ACCEL_X86_SSE4_1  = 0x01000000,
+  BABL_CPU_ACCEL_X86_SSE4_2  = 0x00800000,
+  BABL_CPU_ACCEL_X86_AVX     = 0x00400000,
+  BABL_CPU_ACCEL_X86_POPCNT  = 0x00200000,
+  BABL_CPU_ACCEL_X86_FMA     = 0x00100000,
+  BABL_CPU_ACCEL_X86_MOVBE   = 0x00080000,
   BABL_CPU_ACCEL_X86_F16C    = 0x00040000,
-  BABL_CPU_ACCEL_X86_AVX2    = 0x00020000,
+  BABL_CPU_ACCEL_X86_XSAVE   = 0x00020000,
+  BABL_CPU_ACCEL_X86_OSXSAVE = 0x00010000,
+  BABL_CPU_ACCEL_X86_BMI1    = 0x00008000,
+  BABL_CPU_ACCEL_X86_BMI2    = 0x00004000,
+  BABL_CPU_ACCEL_X86_AVX2    = 0x00002000,
+
+  BABL_CPU_ACCEL_X86_64_V2 =
+    (BABL_CPU_ACCEL_X86_POPCNT|
+     BABL_CPU_ACCEL_X86_SSE4_1|
+     BABL_CPU_ACCEL_X86_SSE4_2|
+     BABL_CPU_ACCEL_X86_SSSE3),
+
+  BABL_CPU_ACCEL_X86_64_V3 =
+    (BABL_CPU_ACCEL_X86_64_V2|
+     BABL_CPU_ACCEL_X86_BMI1|
+     BABL_CPU_ACCEL_X86_BMI2|
+     BABL_CPU_ACCEL_X86_AVX|
+     BABL_CPU_ACCEL_X86_FMA|
+     BABL_CPU_ACCEL_X86_F16C|
+     BABL_CPU_ACCEL_X86_AVX2|
+     BABL_CPU_ACCEL_X86_OSXSAVE|
+     BABL_CPU_ACCEL_X86_MOVBE),
 
   /* powerpc accelerations */
-  BABL_CPU_ACCEL_PPC_ALTIVEC = 0x04000000,
-  BABL_CPU_ACCEL_X86_64      = 0x00100000
+  BABL_CPU_ACCEL_PPC_ALTIVEC = 0x00000010,
+
+  /* arm accelerations */
+  BABL_CPU_ACCEL_ARM_NEON    = 0x00000020,
+
+  /* x86_64 arch */
+  BABL_CPU_ACCEL_X86_64      = 0x00000040
 } BablCpuAccelFlags;
 
 
+
 BablCpuAccelFlags  babl_cpu_accel_get_support (void);
 void               babl_cpu_accel_set_use     (unsigned int use);
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]