[gegl] build, gegl, operations: extend SIMD dispatch to handle ARM NEON



commit feb357214af81ff2d500cf08a4ee6273e5e21675
Author: Øyvind Kolås <pippin gimp org>
Date:   Fri Jan 21 01:24:48 2022 +0100

    build,gegl,operations: extend SIMD dispatch to handle ARM NEON

 gegl/buffer/gegl-algorithms-arm-neon.c |  4 +++
 gegl/buffer/gegl-buffer.c              | 56 ++++++++++++++++++++++++++++++++--
 gegl/buffer/meson.build                |  7 +++++
 gegl/gegl-cpuaccel.c                   | 36 ++++++++++++++++++++++
 gegl/gegl-cpuaccel.h                   |  3 ++
 gegl/gegl-init.c                       |  5 +++
 gegl/meson.build                       |  8 +++--
 gegl/module/geglmoduledb.c             | 29 ++++++++++++++++--
 meson.build                            |  9 ++++--
 operations/common-cxx/meson.build      | 22 +++++++++++++
 operations/common-gpl3+/meson.build    | 22 +++++++++++++
 operations/common/meson.build          | 13 ++++++++
 operations/generated/meson.build       | 14 +++++++++
 operations/transform/meson.build       | 14 +++++++++
 14 files changed, 232 insertions(+), 10 deletions(-)
---
diff --git a/gegl/buffer/gegl-algorithms-arm-neon.c b/gegl/buffer/gegl-algorithms-arm-neon.c
new file mode 100644
index 000000000..015987bc8
--- /dev/null
+++ b/gegl/buffer/gegl-algorithms-arm-neon.c
@@ -0,0 +1,4 @@
+
+#define GEGL_SIMD_SUFFIX(symbol)  symbol##_arm_neon
+
+#include "gegl-algorithms.c"
diff --git a/gegl/buffer/gegl-buffer.c b/gegl/buffer/gegl-buffer.c
index 6b5980ee1..3bd9586be 100644
--- a/gegl/buffer/gegl-buffer.c
+++ b/gegl/buffer/gegl-buffer.c
@@ -1426,13 +1426,54 @@ void gegl_downscale_2x2_x86_64_v3 (const Babl *format,
 
 #endif
 
+#ifdef ARCH_ARM
+
+void gegl_resample_bilinear_arm_neon (guchar *dest_buf,
+                                      const guchar *source_buf,
+                                      const GeglRectangle *dst_rect,
+                                      const GeglRectangle *src_rect,
+                                      gint                 s_rowstride,
+                                      gdouble              scale,
+                                      const Babl          *format,
+                                      gint                 d_rowstride);
+
+
+void gegl_resample_boxfilter_arm_neon (guchar *dest_buf,
+                                       const guchar *source_buf,
+                                       const GeglRectangle *dst_rect,
+                                       const GeglRectangle *src_rect,
+                                       gint                 s_rowstride,
+                                       gdouble              scale,
+                                       const Babl          *format,
+                                       gint                 d_rowstride);
+
+
+void gegl_resample_nearest_arm_neon (guchar *dest_buf,
+                                     const guchar *source_buf,
+                                     const GeglRectangle *dst_rect,
+                                     const GeglRectangle *src_rect,
+                                     gint                 s_rowstride,
+                                     gdouble              scale,
+                                     const gint           bpp,
+                                     gint                 d_rowstride);
+
+void gegl_downscale_2x2_arm_neon (const Babl *format,
+                                  gint        src_width,
+                                  gint        src_height,
+                                  guchar     *src_data,
+                                  gint        src_rowstride,
+                                  guchar     *dst_data,
+                                  gint        dst_rowstride);
+
+#endif
+
 guint16 gegl_lut_u8_to_u16[256];
 gfloat  gegl_lut_u8_to_u16f[256];
 guint8  gegl_lut_u16_to_u8[65536/GEGL_ALGORITHMS_LUT_DIVISOR];
 
 
-void _gegl_init_buffer (int x86_64_version);
-void _gegl_init_buffer (int x86_64_version)
+void _gegl_init_buffer (int variant);
+void _gegl_init_buffer (int variant)
 {
   static int inited = 0;
   guint8 u8_ramp[256];
@@ -1457,8 +1498,17 @@ void _gegl_init_buffer (int x86_64_version)
   babl_process (babl_fish (babl_format ("Y u16"), babl_format("Y' u8")),
                 &u16_ramp[0], &gegl_lut_u16_to_u8[0],
                 65536/GEGL_ALGORITHMS_LUT_DIVISOR);
+#ifdef ARCH_ARM
+  if (variant)
+  {
+    gegl_resample_bilinear  = gegl_resample_bilinear_arm_neon;
+    gegl_resample_boxfilter = gegl_resample_boxfilter_arm_neon;
+    gegl_resample_nearest   = gegl_resample_nearest_arm_neon;
+    gegl_downscale_2x2      = gegl_downscale_2x2_arm_neon;
+  }
+#endif
 #ifdef ARCH_X86_64
-  switch (x86_64_version)
+  switch (variant)
   {
     case 0:
     case 1: break;
diff --git a/gegl/buffer/meson.build b/gegl/buffer/meson.build
index 7097743fc..1e125bac8 100644
--- a/gegl/buffer/meson.build
+++ b/gegl/buffer/meson.build
@@ -11,6 +11,13 @@ if host_cpu_family == 'x86_64'
     dependencies:[glib, babl],
     c_args: [gegl_cflags ] + x86_64_v3_flags
   )
+elif host_cpu_family == 'arm'
+  lib_gegl_arm_neon = static_library('gegl-arm-neon', 'gegl-algorithms-arm-neon.c',
+    include_directories:[geglInclude, rootInclude],
+    dependencies:[glib, babl],
+    c_args: [gegl_cflags ] + arm_neon_flags
+  )
+
 endif
 
 gegl_sources += files(
diff --git a/gegl/gegl-cpuaccel.c b/gegl/gegl-cpuaccel.c
index cf0395270..21353b912 100644
--- a/gegl/gegl-cpuaccel.c
+++ b/gegl/gegl-cpuaccel.c
@@ -546,6 +546,42 @@ arch_accel (void)
 #endif /* ARCH_PPC && USE_ALTIVEC */
 
 
+#if defined(ARCH_ARM)
+
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <elf.h>
+
+#define HAVE_ACCEL 1
+
+static guint32
+arch_accel (void)
+{
+  /* TODO : add or hardcode the other ways it can be on arm, where
+   *        this info comes from the system and not from running cpu
+   *        instructions
+   */
+  int has_neon = 0;
+  int fd = open ("/proc/self/auxv", O_RDONLY);
+  Elf32_auxv_t auxv;
+  if (fd >= 0)
+  {
+    while (read (fd, &auxv, sizeof (Elf32_auxv_t)) == sizeof (Elf32_auxv_t))
+    {
+      if (auxv.a_type == AT_HWCAP)
+      {
+        if (auxv.a_un.a_val & 4096)
+          has_neon = 1;
+      }
+    }
+    close (fd);
+  }
+  return has_neon?GEGL_CPU_ACCEL_ARM_NEON:0;
+}
+
+#endif /* ARCH_ARM  */
+
 static GeglCpuAccelFlags
 cpu_accel (void)
 {
diff --git a/gegl/gegl-cpuaccel.h b/gegl/gegl-cpuaccel.h
index 9e3dad36a..cf615719a 100644
--- a/gegl/gegl-cpuaccel.h
+++ b/gegl/gegl-cpuaccel.h
@@ -67,6 +67,9 @@ typedef enum
 
   /* powerpc accelerations */
   GEGL_CPU_ACCEL_PPC_ALTIVEC = 0x00000010,
+
+  /* arm accelerations */
+  GEGL_CPU_ACCEL_ARM_NEON    = 0x00000020,
 } GeglCpuAccelFlags;
 
 
diff --git a/gegl/gegl-init.c b/gegl/gegl-init.c
index 0744209a4..9381d9959 100644
--- a/gegl/gegl-init.c
+++ b/gegl/gegl-init.c
@@ -542,12 +542,17 @@ gegl_post_parse_hook (GOptionContext *context,
 
   babl_init ();
 
+#if ARCH_ARM
+  GeglCpuAccelFlags cpu_accel = gegl_cpu_accel_get_support ();
+  _gegl_init_buffer ((cpu_accel & GEGL_CPU_ACCEL_ARM_NEON) != 0);
+#else
   GeglCpuAccelFlags cpu_accel = gegl_cpu_accel_get_support ();
   int x86_64_version = 0;
   if (cpu_accel & GEGL_CPU_ACCEL_X86_64_V2) x86_64_version = 2;
   if (cpu_accel & GEGL_CPU_ACCEL_X86_64_V3) x86_64_version = 3;
 
   _gegl_init_buffer (x86_64_version);
+#endif
 
 #ifdef GEGL_ENABLE_DEBUG
   {
diff --git a/gegl/meson.build b/gegl/meson.build
index 48456757b..54977f5ed 100644
--- a/gegl/meson.build
+++ b/gegl/meson.build
@@ -96,9 +96,11 @@ opencl_dep = declare_dependency(
 
 
 if host_cpu_family == 'x86_64'
-  x86_64_extra = [lib_gegl_x86_64_v2, lib_gegl_x86_64_v3]
+  simd_extra = [lib_gegl_x86_64_v2, lib_gegl_x86_64_v3]
+elif host_cpu_family == 'arm'
+  simd_extra = [lib_gegl_arm_neon]
 else
-  x86_64_extra = []
+  simd_extra = []
 endif
 
 gegl_lib = library(api_name,
@@ -114,7 +116,7 @@ gegl_lib = library(api_name,
   ],
   c_args: gegl_cflags,
 
-  link_with: x86_64_extra,
+  link_with: simd_extra,
   link_args: gegl_ldflags,
   install: true,
   version: so_version,
diff --git a/gegl/module/geglmoduledb.c b/gegl/module/geglmoduledb.c
index c3b628035..f8848c13c 100644
--- a/gegl/module/geglmoduledb.c
+++ b/gegl/module/geglmoduledb.c
@@ -25,6 +25,15 @@
 #include "gegl-cpuaccel.h"
 #include "gegl-config.h"
 
+
+#ifdef ARCH_X86_64
+#define ARCH_SIMD
+#endif
+#ifdef ARCH_ARM
+#define ARCH_SIMD
+#endif
+
+
 enum
 {
   ADD,
@@ -228,7 +237,7 @@ gegl_module_db_get_load_inhibit (GeglModuleDB *db)
   return db->load_inhibit;
 }
 
-#ifdef ARCH_X86_64
+#ifdef ARCH_SIMD
 
 static gboolean
 gegl_str_has_one_of_suffixes (const char *str,
@@ -245,6 +254,8 @@ gegl_str_has_one_of_suffixes (const char *str,
 static void
 gegl_module_db_remove_duplicates (GeglModuleDB *db)
 {
+#ifdef ARCH_X86_64
+
 #ifdef __APPLE__ /* G_MODULE_SUFFIX is defined to .so instead of .dylib */
   char *suffix_list[] = {"-x86_64-v2.dylib","-x86_64-v3.dylib", NULL};
 #else
@@ -257,6 +268,20 @@ gegl_module_db_remove_duplicates (GeglModuleDB *db)
   if (cpu_accel & GEGL_CPU_ACCEL_X86_64_V3) preferred = 1;
   else if (cpu_accel & GEGL_CPU_ACCEL_X86_64_V2) preferred = 0;
 
+#endif
+#ifdef ARCH_ARM
+#ifdef __APPLE__ /* G_MODULE_SUFFIX is defined to .so instead of .dylib */
+  char *suffix_list[] = {"-arm-neon.dylib", NULL};
+#else
+  char *suffix_list[] = {"-arm-neon.so", NULL};
+#endif
+
+  GList *suffix_entries = NULL;
+  int preferred = -1;
+
+  GeglCpuAccelFlags cpu_accel = gegl_cpu_accel_get_support ();
+  if (cpu_accel & GEGL_CPU_ACCEL_ARM_NEON) preferred = 0;
+#endif
 
   for (GList *l = db->to_load; l; l = l->next)
   {
@@ -337,7 +362,7 @@ gegl_module_db_load (GeglModuleDB *db,
                                      G_FILE_TEST_EXISTS,
                                      gegl_module_db_module_search,
                                      db);
-#if ARCH_X86_64
+#ifdef ARCH_SIMD
     gegl_module_db_remove_duplicates (db);
 #endif
     while (db->to_load)
diff --git a/meson.build b/meson.build
index 9044dd429..a9366bc03 100644
--- a/meson.build
+++ b/meson.build
@@ -158,6 +158,9 @@ elif host_cpu_family == 'ppc64'
   have_ppc = true
   config.set10('ARCH_PPC',    true)
   config.set10('ARCH_PPC64',  true)
+elif host_cpu_family == 'arm'
+  have_arm = true
+  config.set10('ARCH_ARM',    true)
 endif
 
 # Only try to run compiled programs if native compile or cross-compile
@@ -207,8 +210,10 @@ add_project_arguments(cpp.get_supported_arguments(cflags_cpp), language: 'cpp')
 
 
 if host_cpu_family == 'x86_64'
-  x86_64_v2_flags = cc.get_supported_arguments(['-march=x86-64','-msse2', 
'-msse2','-msse4.1','-msse4.2','-mpopcnt','-mssse3'])
-  x86_64_v3_flags = x86_64_v2_flags + 
cc.get_supported_arguments(['-mavx','-mavx2','-mf16c','-mfma','-mmovbe', '-mbmi', '-mbmi2'])
+  x86_64_v2_flags = cc.get_supported_arguments(['-ftree-vectorize','-march=x86-64','-msse2', 
'-msse2','-msse4.1','-msse4.2','-mpopcnt','-mssse3'])
+  x86_64_v3_flags = x86_64_v2_flags + 
cc.get_supported_arguments(['-ftree-vectorize','-mavx','-mavx2','-mf16c','-mfma','-mmovbe', '-mbmi', 
'-mbmi2'])
+elif host_cpu_family == 'arm'
+  arm_neon_flags = cc.get_supported_arguments(['-ftree-vectorize','-mfpu=neon'])
 endif
 
 ################################################################################
diff --git a/operations/common-cxx/meson.build b/operations/common-cxx/meson.build
index ebced92c2..2d6c18b58 100644
--- a/operations/common-cxx/meson.build
+++ b/operations/common-cxx/meson.build
@@ -81,4 +81,26 @@ if host_cpu_family == 'x86_64'
   )
   gegl_operations += gegl_common_cxx_x86_64_v3
 
+elif host_cpu_family == 'arm'
+
+  gegl_common_cxx_arm_neon = shared_library('gegl-common-cxx-arm-neon',
+    gegl_common_cxx_sources, opencl_headers,
+    include_directories: [ rootInclude, geglInclude, ],
+    dependencies: [
+      babl,
+      glib,
+      json_glib,
+      math,
+    ],
+    link_with: [
+      gegl_lib,
+    ],
+    c_args: [ '-DGEGL_OP_BUNDLE' ] + arm_neon_flags,
+    cpp_args: [ '-DGEGL_OP_BUNDLE' ] + arm_neon_flags,
+    name_prefix: '',
+    install: true,
+    install_dir: get_option('libdir') / api_name,
+  )
+  gegl_operations += gegl_common_cxx_arm_neon
+
 endif
diff --git a/operations/common-gpl3+/meson.build b/operations/common-gpl3+/meson.build
index 3607b6e51..0513fb44d 100644
--- a/operations/common-gpl3+/meson.build
+++ b/operations/common-gpl3+/meson.build
@@ -125,4 +125,26 @@ if host_cpu_family == 'x86_64'
   )
   gegl_operations += gegl_common_gpl3_x86_64_v3
 
+elif host_cpu_family == 'arm'
+
+  gegl_common_gpl3_arm_neon = shared_library('gegl-common-gpl3-arm-neon',
+    gegl_common_gpl3_sources,
+    opencl_headers,
+    include_directories: [ rootInclude, geglInclude, ],
+    dependencies: [
+      babl,
+      glib,
+      json_glib,
+      math,
+    ],
+    link_with: [
+      gegl_lib,
+    ],
+    c_args: [ '-DGEGL_OP_BUNDLE' ] + arm_neon_flags,
+    name_prefix: '',
+    install: true,
+    install_dir: get_option('libdir') / api_name,
+  )
+  gegl_operations += gegl_common_gpl3_arm_neon
+
 endif
diff --git a/operations/common/meson.build b/operations/common/meson.build
index bc594a96d..120af48de 100644
--- a/operations/common/meson.build
+++ b/operations/common/meson.build
@@ -180,4 +180,17 @@ if host_cpu_family == 'x86_64'
   )
   
   gegl_operations += gegl_common_x86_64_v3
+elif host_cpu_family == 'arm'
+
+  gegl_common_arm_neon = shared_library('gegl-common-arm-neon',
+    gegl_common_sources, opencl_headers,
+    include_directories: [ rootInclude, geglInclude, ],
+    dependencies: [ babl, glib, json_glib, math, ],
+    link_with: [ gegl_lib, ],
+    c_args: [ '-DGEGL_OP_BUNDLE' ] + arm_neon_flags,
+    name_prefix: '',
+    install: true,
+    install_dir: get_option('libdir') / api_name,
+  )
+  gegl_operations += gegl_common_arm_neon
 endif
diff --git a/operations/generated/meson.build b/operations/generated/meson.build
index a7a400339..b2d0e82d2 100644
--- a/operations/generated/meson.build
+++ b/operations/generated/meson.build
@@ -83,4 +83,18 @@ if host_cpu_family == 'x86_64'
   )
   gegl_operations += gegl_generated_x86_64_v3
 
+elif host_cpu_family == 'arm'
+
+  gegl_generated_arm_neon = shared_library('gegl-generated-arm-neon',
+    gegl_generated_sources, opencl_headers,
+    include_directories: [ rootInclude, geglInclude, ],
+    dependencies: [ babl, glib, json_glib, math, ],
+    link_with: [ gegl_lib, ],
+    c_args: [ '-DGEGL_OP_BUNDLE' ] + arm_neon_flags,
+    name_prefix: '',
+    install: true,
+    install_dir: get_option('libdir') / api_name,
+  )
+  gegl_operations += gegl_generated_arm_neon
+
 endif
diff --git a/operations/transform/meson.build b/operations/transform/meson.build
index 352ce6564..110b28962 100644
--- a/operations/transform/meson.build
+++ b/operations/transform/meson.build
@@ -60,4 +60,18 @@ if host_cpu_family == 'x86_64'
   )
   gegl_operations += gegl_transformops_x86_64_v3
 
+elif host_cpu_family == 'arm'
+
+  gegl_transformops_arm_neon = shared_library('gegl-transformops-arm-neon',
+    gegl_transformops_sources, opencl_headers,
+    include_directories: [ rootInclude, geglInclude, ],
+    dependencies: [ babl, glib, json_glib, math, ],
+    link_with: [ gegl_lib, ],
+    c_args: arm_neon_flags,
+    name_prefix: '',
+    install: true,
+    install_dir: get_option('libdir') / api_name,
+  )
+  gegl_operations += gegl_transformops_arm_neon
+
 endif


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]