[babl] base: x86-64-v2 and x86-64-v3 variants



commit b4f0b28d0bb14d718660e1663fa48e7acfaf6849
Author: Øyvind Kolås <pippin gimp org>
Date:   Sat Jan 22 01:26:17 2022 +0100

    base: x86-64-v2 and x86-64-v3 variants

 babl/babl-extension.c   | 26 ++++++++++++++++++++++-
 babl/base/babl-base.c   | 26 +++++++++++------------
 babl/base/babl-base.h   | 56 +++++++++++++++++++++++++++++++++++--------------
 babl/base/formats.c     |  2 +-
 babl/base/meson.build   | 19 +++++++++++++++++
 babl/base/model-cmyk.c  |  2 +-
 babl/base/model-gray.c  |  2 +-
 babl/base/model-rgb.c   |  2 +-
 babl/base/model-ycbcr.c |  2 +-
 babl/base/type-float.c  |  2 +-
 babl/base/type-half.c   |  2 +-
 babl/base/type-u15.c    |  2 +-
 babl/base/type-u16.c    |  2 +-
 babl/base/type-u32.c    |  2 +-
 babl/base/type-u8.c     |  2 +-
 babl/meson.build        |  9 ++++++++
 meson.build             |  9 +++++---
 17 files changed, 123 insertions(+), 44 deletions(-)
---
diff --git a/babl/babl-extension.c b/babl/babl-extension.c
index 41edb8e9b..ed0a64b01 100644
--- a/babl/babl-extension.c
+++ b/babl/babl-extension.c
@@ -31,9 +31,31 @@
 #include "babl-internal.h"
 #include "babl-db.h"
 #include "babl-base.h"
+
+#include "babl-cpuaccel.h"
 #include <string.h>
 #include <stdarg.h>
 
+void (*babl_base_init)  (void) = babl_base_init_generic;
+#ifdef ARCH_X86_64
+void babl_base_init_x86_64_v2 (void);
+void babl_base_init_x86_64_v3 (void);
+#endif
+
+static void base_init (void)
+{
+#ifdef ARCH_X86_64
+  BablCpuAccelFlags accel = babl_cpu_accel_get_support ();
+  if (accel & BABL_CPU_ACCEL_X86_64_V3)
+    babl_base_init_x86_64_v3 ();
+  else if (accel & BABL_CPU_ACCEL_X86_64_V2)
+    babl_base_init_x86_64_v2 ();
+  else
+#endif
+    babl_base_init_generic ();
+}
+
+
 static Babl *babl_extension_current_extender = NULL;
 
 Babl *
@@ -107,7 +129,9 @@ babl_extension_base (void)
     if (ret != babl)
       babl_free (babl);
     else
-      babl_base_init ();
+    {
+        base_init ();
+    }
     babl = ret;
   }
   babl_set_extender (NULL);
diff --git a/babl/base/babl-base.c b/babl/base/babl-base.c
index 1d933419a..8b9cddef6 100644
--- a/babl/base/babl-base.c
+++ b/babl/base/babl-base.c
@@ -25,19 +25,19 @@ static void types (void);
 static void models (void);
 
 void
-babl_base_init (void)
+BABL_SIMD_SUFFIX(babl_base_init) (void)
 {
   babl_hmpf_on_name_lookups++;
 
   types ();
   models ();
-  babl_formats_init ();
+  BABL_SIMD_SUFFIX (babl_formats_init) ();
 
   babl_hmpf_on_name_lookups--;
 }
 
 void
-babl_base_destroy (void)
+BABL_SIMD_SUFFIX(babl_base_destroy) (void)
 {
   /* done by the destruction of the elemental babl clases */
 }
@@ -50,12 +50,12 @@ babl_base_destroy (void)
 static void
 types (void)
 {
-  babl_base_type_float ();
-  babl_base_type_u15 ();
-  babl_base_type_half ();
-  babl_base_type_u8 ();
-  babl_base_type_u16 ();
-  babl_base_type_u32 ();
+  BABL_SIMD_SUFFIX (babl_base_type_float) ();
+  BABL_SIMD_SUFFIX (babl_base_type_u15) ();
+  BABL_SIMD_SUFFIX (babl_base_type_half) ();
+  BABL_SIMD_SUFFIX (babl_base_type_u8) ();
+  BABL_SIMD_SUFFIX (babl_base_type_u16) ();
+  BABL_SIMD_SUFFIX (babl_base_type_u32) ();
 }
 
 /*
@@ -67,9 +67,9 @@ static void
 models (void)
 {
   babl_hmpf_on_name_lookups--;
-  babl_base_model_rgb ();
-  babl_base_model_gray ();
-  babl_base_model_cmyk ();
+  BABL_SIMD_SUFFIX (babl_base_model_rgb) ();
+  BABL_SIMD_SUFFIX (babl_base_model_gray) ();
+  BABL_SIMD_SUFFIX (babl_base_model_cmyk) ();
   babl_hmpf_on_name_lookups++;
-  babl_base_model_ycbcr ();
+  BABL_SIMD_SUFFIX (babl_base_model_ycbcr) ();
 }
diff --git a/babl/base/babl-base.h b/babl/base/babl-base.h
index 64f166757..67c4a539e 100644
--- a/babl/base/babl-base.h
+++ b/babl/base/babl-base.h
@@ -19,22 +19,46 @@
 #ifndef _BABL_BASE_H
 #define _BABL_BASE_H
 
+#ifdef X86_64_V2
+#define BABL_SIMD_SUFFIX(symbol) symbol##_x86_64_v2
+#else 
+#ifdef X86_64_V3
+#define BABL_SIMD_SUFFIX(symbol) symbol##_x86_64_v3
+#else
+#define BABL_SIMD_SUFFIX(symbol) symbol##_generic
+#endif
+#endif
+
+extern void (*babl_base_init)    (void);
+extern void (*babl_base_destroy) (void);
+extern void (*babl_formats_init) (void);
+
+extern void (*babl_base_type_half) (void);
+extern void (*babl_base_type_float)  (void);
+extern void (*babl_base_type_u8)     (void);
+extern void (*babl_base_type_u16)    (void);
+extern void (*babl_base_type_u15)    (void);
+extern void (*babl_base_type_u32)    (void);
+
+extern void (*babl_base_model_rgb)   (void);
+extern void (*babl_base_model_cmyk)  (void);
+extern void (*babl_base_model_gray)  (void);
+extern void (*babl_base_model_ycbcr) (void);
+
+void BABL_SIMD_SUFFIX(babl_base_init)    (void);
+void BABL_SIMD_SUFFIX(babl_base_destroy) (void);
+void BABL_SIMD_SUFFIX(babl_formats_init) (void);
+
+void BABL_SIMD_SUFFIX(babl_base_type_half) (void);
+void BABL_SIMD_SUFFIX(babl_base_type_float)  (void);
+void BABL_SIMD_SUFFIX(babl_base_type_u8)     (void);
+void BABL_SIMD_SUFFIX(babl_base_type_u16)    (void);
+void BABL_SIMD_SUFFIX(babl_base_type_u15)    (void);
+void BABL_SIMD_SUFFIX(babl_base_type_u32)    (void);
 
-void babl_base_init (void);
-void babl_base_destroy (void);
-void babl_formats_init (void);
-
-void babl_base_type_half   (void);
-void babl_base_type_float  (void);
-void babl_base_type_u8     (void);
-void babl_base_type_u16    (void);
-void babl_base_type_u15    (void);
-void babl_base_type_u32    (void);
-
-void babl_base_model_pal   (void);
-void babl_base_model_rgb   (void);
-void babl_base_model_cmyk  (void);
-void babl_base_model_gray  (void);
-void babl_base_model_ycbcr (void);
+void BABL_SIMD_SUFFIX(babl_base_model_rgb)   (void);
+void BABL_SIMD_SUFFIX(babl_base_model_cmyk)  (void);
+void BABL_SIMD_SUFFIX(babl_base_model_gray)  (void);
+void BABL_SIMD_SUFFIX(babl_base_model_ycbcr) (void);
 
 #endif
diff --git a/babl/base/formats.c b/babl/base/formats.c
index bad9d142a..bbdedf347 100644
--- a/babl/base/formats.c
+++ b/babl/base/formats.c
@@ -25,7 +25,7 @@
 #include "babl-base.h"
 
 void
-babl_formats_init (void)
+BABL_SIMD_SUFFIX (babl_formats_init) (void)
 {
   const Babl *types[]={
     babl_type_from_id (BABL_DOUBLE),
diff --git a/babl/base/meson.build b/babl/base/meson.build
index a78fd847b..41287c0a3 100644
--- a/babl/base/meson.build
+++ b/babl/base/meson.build
@@ -20,4 +20,23 @@ babl_base = static_library('babl_base',
   babl_base_sources,
   include_directories: [rootInclude, bablInclude],
   dependencies: [math, lcms],
+   c_args: common_c_flags
 )
+
+if host_cpu_family == 'x86_64'
+
+  babl_base_x86_64_v2 = static_library('babl_base-x86-64-v2',
+    babl_base_sources,
+    include_directories: [rootInclude, bablInclude],
+    dependencies: [math, lcms],
+    c_args: common_c_flags + x86_64_v2_flags + '-DX86_64_V2'
+  )
+
+  babl_base_x86_64_v3 = static_library('babl_base-x86-64-v3',
+    babl_base_sources,
+    include_directories: [rootInclude, bablInclude],
+    dependencies: [math, lcms],
+    c_args: common_c_flags + x86_64_v3_flags + '-DX86_64_V3'
+  )
+
+endif
diff --git a/babl/base/model-cmyk.c b/babl/base/model-cmyk.c
index 13fdedf87..1fa02beb6 100644
--- a/babl/base/model-cmyk.c
+++ b/babl/base/model-cmyk.c
@@ -613,7 +613,7 @@ cmy_to_rgba (const Babl *conversion,
 #endif
 
 void
-babl_base_model_cmyk (void)
+BABL_SIMD_SUFFIX (babl_base_model_cmyk) (void)
 {
   babl_component_new ("cyan", NULL);
   babl_component_new ("yellow", NULL);
diff --git a/babl/base/model-gray.c b/babl/base/model-gray.c
index 3862400d3..9a74096fb 100644
--- a/babl/base/model-gray.c
+++ b/babl/base/model-gray.c
@@ -31,7 +31,7 @@ static void formats (void);
 static void init_single_precision (void);
 
 void 
-babl_base_model_gray (void)
+BABL_SIMD_SUFFIX (babl_base_model_gray) (void)
 {
   components ();
   models ();
diff --git a/babl/base/model-rgb.c b/babl/base/model-rgb.c
index a3064efe0..824665aa6 100644
--- a/babl/base/model-rgb.c
+++ b/babl/base/model-rgb.c
@@ -32,7 +32,7 @@ static void formats (void);
 static void init_single_precision (void);
 
 void
-babl_base_model_rgb (void)
+BABL_SIMD_SUFFIX (babl_base_model_rgb) (void)
 {
   components ();
   models ();
diff --git a/babl/base/model-ycbcr.c b/babl/base/model-ycbcr.c
index 64db6a24b..e0612985e 100644
--- a/babl/base/model-ycbcr.c
+++ b/babl/base/model-ycbcr.c
@@ -34,7 +34,7 @@ static void conversions (void);
 static void formats (void);
 
 void
-babl_base_model_ycbcr (void)
+BABL_SIMD_SUFFIX (babl_base_model_ycbcr) (void)
 {
   components ();
   models ();
diff --git a/babl/base/type-float.c b/babl/base/type-float.c
index 5b03b3f5d..9517831d3 100644
--- a/babl/base/type-float.c
+++ b/babl/base/type-float.c
@@ -83,7 +83,7 @@ convert_float_float (const Babl *babl,
 
 
 void
-babl_base_type_float (void)
+BABL_SIMD_SUFFIX (babl_base_type_float) (void)
 {
   babl_type_new (
     "float",
diff --git a/babl/base/type-half.c b/babl/base/type-half.c
index 862d6629d..a14618596 100644
--- a/babl/base/type-half.c
+++ b/babl/base/type-half.c
@@ -395,7 +395,7 @@ convert_half_float (BablConversion *conversion,
 
 
 void
-babl_base_type_half (void)
+BABL_SIMD_SUFFIX (babl_base_type_half) (void)
 {
   babl_type_new (
     "half",
diff --git a/babl/base/type-u15.c b/babl/base/type-u15.c
index ea3545381..7224c6321 100644
--- a/babl/base/type-u15.c
+++ b/babl/base/type-u15.c
@@ -198,7 +198,7 @@ convert_u15_float_scaled (BablConversion *conversion,
 MAKE_CONVERSIONS_float (u15, 0.0, 1.0, 0, (1<<15))
 
 void
-babl_base_type_u15 (void)
+BABL_SIMD_SUFFIX (babl_base_type_u15) (void)
 {
   babl_hmpf_on_name_lookups--;
   babl_type_new (
diff --git a/babl/base/type-u16.c b/babl/base/type-u16.c
index c5a41dcfe..e7ab93653 100644
--- a/babl/base/type-u16.c
+++ b/babl/base/type-u16.c
@@ -196,7 +196,7 @@ MAKE_CONVERSIONS_float (u16, 0.0, 1.0, 0, UINT16_MAX)
 
 
 void
-babl_base_type_u16 (void)
+BABL_SIMD_SUFFIX (babl_base_type_u16) (void)
 {
   babl_type_new (
     "u16",
diff --git a/babl/base/type-u32.c b/babl/base/type-u32.c
index 48b1506ff..288ff2ec6 100644
--- a/babl/base/type-u32.c
+++ b/babl/base/type-u32.c
@@ -196,7 +196,7 @@ MAKE_CONVERSIONS_float(u32, 0.0, 1.0, 0, UINT32_MAX)
 
 
 void
-babl_base_type_u32 (void)
+BABL_SIMD_SUFFIX (babl_base_type_u32) (void)
 {
   babl_type_new (
     "u32",
diff --git a/babl/base/type-u8.c b/babl/base/type-u8.c
index d41d5e0ae..9abbf6771 100644
--- a/babl/base/type-u8.c
+++ b/babl/base/type-u8.c
@@ -202,7 +202,7 @@ MAKE_CONVERSIONS_float (u8_chroma, -0.5, 0.5, 16, 240)
 
 
 void
-babl_base_type_u8 (void)
+BABL_SIMD_SUFFIX (babl_base_type_u8) (void)
 {
   babl_type_new (
     "u8",
diff --git a/babl/meson.build b/babl/meson.build
index d19210a28..b9b2a0529 100644
--- a/babl/meson.build
+++ b/babl/meson.build
@@ -121,6 +121,14 @@ endif
 babl_deps = [math, thread, dl, lcms]
 babl_includes = [rootInclude, bablBaseInclude]
 
+if host_cpu_family == 'x86_64'
+  simd_extra = [babl_base_x86_64_v2, babl_base_x86_64_v3]
+#elif host_cpu_family == 'arm'
+#  simd_extra = [babl_base_arm_neon]
+else
+  simd_extra = []
+endif
+
 # build library
 babl = library(
   lib_name,
@@ -129,6 +137,7 @@ babl = library(
   c_args: babl_c_args,
   link_whole: babl_base,
   link_args: babl_link_args,
+  link_with: simd_extra,
   dependencies: babl_deps,
   link_depends: version_script,
   version: so_version,
diff --git a/meson.build b/meson.build
index 8c6eebf56..bfa5dcfe9 100644
--- a/meson.build
+++ b/meson.build
@@ -166,6 +166,7 @@ common_c_flags += cc.get_supported_arguments(
   ['-fno-unsafe-math-optimizations','-ftree-vectorize']
 )
 
+
 extra_warnings_list = [
   '-Wdeclaration-after-statement',
   '-Winit-self',
@@ -185,13 +186,15 @@ else
   no_undefined = []
 endif
 
-if host_cpu_family == 'arm'
+if host_cpu_family == 'x86_64'
+  x86_64_v2_flags = cc.get_supported_arguments(['-march=x86-64','-msse2', 
'-msse2','-msse4.1','-msse4.2','-mpopcnt','-mssse3'])
+  x86_64_v3_flags = x86_64_v2_flags + 
cc.get_supported_arguments(['-mavx','-mavx2','-mf16c','-mfma','-mmovbe', '-mbmi', '-mbmi2'])
+elif host_cpu_family == 'arm'
   arm_neon_flags = cc.get_supported_arguments(['-mfpu=neon'])
 elif host_cpu_family == 'aarch64'
-  common_cflags += cc.get_supported_arguments(['-mfpu=neon'])
+  common_c_flags += cc.get_supported_arguments(['-mfpu=neon'])
 endif
 
-
 ################################################################################
 # Check for compiler CPU extensions
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]