[gtk/half-float: 2/2] gsk: Add runtime checks for F16C

From: Matthias Clasen <matthiasc src gnome org>
To: commits-list gnome org
Cc:
Subject: [gtk/half-float: 2/2] gsk: Add runtime checks for F16C
Date: Thu, 8 Apr 2021 02:21:35 +0000 (UTC)

commit 885a6b8ebc2acc7915c7fc7d8fe814c1d20d8aaf
Author: Matthias Clasen <mclasen redhat com>
Date:   Wed Apr 7 21:29:21 2021 -0400

    gsk: Add runtime checks for F16C
    
    Use an IFUNC resolver to determine whether we can use
    intrinsics for FP16 conversion. This requires the functions
    to be no longer inline.
    
    Sadly, it turns out that __builtin_cpu_supports ("f16c")
    doesn't compile on the systems where we want it to prevent
    us from getting a SIGILL at runtime.

 gsk/meson.build       |   1 +
 gsk/ngl/fp16.c        | 129 ++++++++++++++++++++++++++++++++++++++++++++++++++
 gsk/ngl/fp16private.h |  84 ++------------------------------
 meson.build           |   4 ++
 4 files changed, 138 insertions(+), 80 deletions(-)
---
diff --git a/gsk/meson.build b/gsk/meson.build
index 5c381b51c9..f351941c22 100644
--- a/gsk/meson.build
+++ b/gsk/meson.build
@@ -85,6 +85,7 @@ gsk_private_sources = files([
   'ngl/gskngltexturelibrary.c',
   'ngl/gskngluniformstate.c',
   'ngl/gskngltexturepool.c',
+  'ngl/fp16.c',
 ])
 
 gsk_public_headers = files([
diff --git a/gsk/ngl/fp16.c b/gsk/ngl/fp16.c
new file mode 100644
index 0000000000..22453ae210
--- /dev/null
+++ b/gsk/ngl/fp16.c
@@ -0,0 +1,129 @@
+/* fp16.c
+ *
+ * Copyright 2021 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+#include <config.h>
+
+#include "fp16private.h"
+
+#ifdef HAVE_F16C
+#include <immintrin.h>
+#endif
+
+static inline guint
+as_uint (const float x)
+{
+  return *(guint*)&x;
+}
+
+static inline float
+as_float (const guint x)
+{
+  return *(float*)&x;
+}
+
+// IEEE-754 16-bit floating-point format (without infinity): 1-5-10
+
+static inline float
+half_to_float (const guint16 x)
+{
+  const guint e = (x&0x7C00)>>10; // exponent
+  const guint m = (x&0x03FF)<<13; // mantissa
+  const guint v = as_uint((float)m)>>23;
+  return as_float((x&0x8000)<<16 | (e!=0)*((e+112)<<23|m) | 
((e==0)&(m!=0))*((v-37)<<23|((m<<(150-v))&0x007FE000)));
+}
+
+static inline guint16
+float_to_half (const float x)
+{
+  const guint b = as_uint(x)+0x00001000; // round-to-nearest-even
+  const guint e = (b&0x7F800000)>>23; // exponent
+  const guint m = b&0x007FFFFF; // mantissa
+  return (b&0x80000000)>>16 | (e>112)*((((e-112)<<10)&0x7C00)|m>>13) | 
((e<113)&(e>101))*((((0x007FF000+m)>>(125-e))+1)>>1) | (e>143)*0x7FFF; // sign : normalized : denormalized : 
saturate
+}
+
+static void
+float_to_half4_c (const float f[4],
+                  guint16     h[4])
+{
+  h[0] = float_to_half (f[0]);
+  h[1] = float_to_half (f[1]);
+  h[2] = float_to_half (f[2]);
+  h[3] = float_to_half (f[3]);
+}
+
+static void
+half_to_float4_c (const guint16 h[4],
+                  float         f[4])
+{
+  f[0] = half_to_float (h[0]);
+  f[1] = half_to_float (h[1]);
+  f[2] = half_to_float (h[2]);
+  f[3] = half_to_float (h[3]);
+}
+
+#ifdef HAVE_F16C
+
+static void
+float_to_half4_f16c (const float f[4],
+                     guint16     h[4])
+{
+  __m128 s = _mm_loadu_ps (f);
+  __m128i i = _mm_cvtps_ph (s, 0);
+  _mm_storel_epi64 ((__m128i*)h, i);
+}
+
+static void
+half_to_float4_f16c (const guint16 h[4],
+                     float         f[4])
+{
+  __m128i i = _mm_loadl_epi64 ((__m128i_u const *)h);
+  __m128 s = _mm_cvtph_ps (i);
+  _mm_store_ps (f, s);
+}
+
+void float_to_half4 (const float f[4], guint16 h[4]) __attribute__((ifunc ("resolve_float_to_half4")));
+void half_to_float4 (const guint16 h[4], float f[4]) __attribute__((ifunc ("resolve_half_to_float4")));
+
+static void *
+resolve_float_to_half4 (void)
+{
+  __builtin_cpu_init ();
+  if (__builtin_cpu_supports ("f16c"))
+    return float_to_half4_f16c;
+  else
+    return float_to_half4_c;
+}
+
+static void *
+resolve_half_to_float4 (void)
+{
+  __builtin_cpu_init ();
+  if (__builtin_cpu_supports ("f16c"))
+    return half_to_float4_f16c;
+  else
+    return half_to_float4_c;
+}
+
+#else
+
+void float_to_half4 (const float f[4], guint16 h[4]) __attribute__((alias ("float_to_half4_c")));
+void half_to_float4 (const guint16 h[4], float f[4]) __attribute__((alias ("half_to_float4_c")));
+
+#endif  /* GTK_HAS_F16C */
diff --git a/gsk/ngl/fp16private.h b/gsk/ngl/fp16private.h
index 2bc9f84eea..d76f18a04f 100644
--- a/gsk/ngl/fp16private.h
+++ b/gsk/ngl/fp16private.h
@@ -21,13 +21,7 @@
 #ifndef __FP16_PRIVATE_H__
 #define __FP16_PRIVATE_H__
 
-#include <config.h>
 #include <glib.h>
-#include <graphene.h>
-
-#ifdef HAVE_F16C
-#include <immintrin.h>
-#endif
 
 G_BEGIN_DECLS
 
@@ -35,81 +29,11 @@ G_BEGIN_DECLS
 #define FP16_ONE ((guint16)15360)
 #define FP16_MINUS_ONE ((guint16)48128)
 
-#ifdef HAVE_F16C
-
-static inline void
-float_to_half4 (const float f[4],
-                guint16     h[4])
-{
-  __m128 s = _mm_loadu_ps (f);
-  __m128i i = _mm_cvtps_ph (s, 0);
-  _mm_storel_epi64 ((__m128i*)h, i);
-}
-
-static inline void
-half_to_float4 (const guint16 h[4],
-                float         f[4])
-{
-  __m128i i = _mm_loadl_epi64 ((__m128i_u const *)h);
-  __m128 s = _mm_cvtph_ps (i);
-  _mm_store_ps (f, s);
-}
-
-#else  /* GTK_HAS_F16C */
-
-static inline guint
-as_uint (const float x)
-{
-  return *(guint*)&x;
-}
-
-static inline float
-as_float (const guint x)
-{
-  return *(float*)&x;
-}
-
-// IEEE-754 16-bit floating-point format (without infinity): 1-5-10
-
-static inline float
-half_to_float (const guint16 x)
-{
-  const guint e = (x&0x7C00)>>10; // exponent
-  const guint m = (x&0x03FF)<<13; // mantissa
-  const guint v = as_uint((float)m)>>23;
-  return as_float((x&0x8000)<<16 | (e!=0)*((e+112)<<23|m) | 
((e==0)&(m!=0))*((v-37)<<23|((m<<(150-v))&0x007FE000)));
-}
-
-static inline guint16
-float_to_half (const float x)
-{
-  const guint b = as_uint(x)+0x00001000; // round-to-nearest-even
-  const guint e = (b&0x7F800000)>>23; // exponent
-  const guint m = b&0x007FFFFF; // mantissa
-  return (b&0x80000000)>>16 | (e>112)*((((e-112)<<10)&0x7C00)|m>>13) | 
((e<113)&(e>101))*((((0x007FF000+m)>>(125-e))+1)>>1) | (e>143)*0x7FFF; // sign : normalized : denormalized : 
saturate
-}
-
-static inline void
-float_to_half4 (const float f[4],
-                guint16     h[4])
-{
-  h[0] = float_to_half (f[0]);
-  h[1] = float_to_half (f[1]);
-  h[2] = float_to_half (f[2]);
-  h[3] = float_to_half (f[3]);
-}
-
-static inline void
-half_to_float4 (const guint16 h[4],
-                float         f[4])
-{
-  f[0] = half_to_float (h[0]);
-  f[1] = half_to_float (h[1]);
-  f[2] = half_to_float (h[2]);
-  f[3] = half_to_float (h[3]);
-}
+void float_to_half4 (const float f[4],
+                     guint16     h[4]);
 
-#endif  /* HAVE_F16C */
+void half_to_float4 (const guint16 h[4],
+                     float         f[4]);
 
 G_END_DECLS
 
diff --git a/meson.build b/meson.build
index c9586acc29..641208158c 100644
--- a/meson.build
+++ b/meson.build
@@ -706,6 +706,10 @@ int main () {
   __m128 s = _mm_loadu_ps (f);
   __m128i i = _mm_cvtps_ph (s, 0);
   _mm_storel_epi64 ((__m128i*)h, i);
+
+  __builtin_cpu_init ();
+  __builtin_cpu_supports ("f16c");
+
     return 0;
 }'''
   if cc.get_id() != 'msvc'
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]