[babl] extensions/CIE: use cbrtf from musl instead of libm
- From: Øyvind Kolås <ok src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [babl] extensions/CIE: use cbrtf from musl instead of libm
- Date: Fri, 13 Jan 2017 03:01:45 +0000 (UTC)
commit 7d3b349525533842c1b996840247e56f6e37e5bd
Author: Øyvind Kolås <pippin gimp org>
Date: Fri Jan 13 03:45:06 2017 +0100
extensions/CIE: use cbrtf from musl instead of libm
The inlining alone gives a ~6% performance boost.
extensions/CIE.c | 82 +++++++++++++++++++++++++++++++++++++++++++++++++----
1 files changed, 75 insertions(+), 7 deletions(-)
---
diff --git a/extensions/CIE.c b/extensions/CIE.c
index 23af1c9..fa9df11 100644
--- a/extensions/CIE.c
+++ b/extensions/CIE.c
@@ -444,6 +444,73 @@ cubef (float f)
return f * f * f;
}
+/* origin: FreeBSD /usr/src/lib/msun/src/s_cbrtf.c */
+/*
+ * Conversion to float by Ian Lance Taylor, Cygnus Support, ian cygnus com.
+ * Debugged and optimized by Bruce D. Evans.
+ */
+/*
+ * ====================================================
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunPro, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+/* _cbrtf(x)
+ * Return cube root of x
+ */
+
+#include <math.h>
+#include <stdint.h>
+
+static const unsigned
+B1 = 709958130, /* B1 = (127-127.0/3-0.03306235651)*2**23 */
+B2 = 642849266; /* B2 = (127-127.0/3-24/3-0.03306235651)*2**23 */
+
+static inline float _cbrtf(float x)
+{
+ double_t r,T;
+ union {float f; uint32_t i;} u = {x};
+ uint32_t hx = u.i & 0x7fffffff;
+
+ if (hx >= 0x7f800000) /* cbrt(NaN,INF) is itself */
+ return x + x;
+
+ /* rough cbrt to 5 bits */
+ if (hx < 0x00800000) { /* zero or subnormal? */
+ if (hx == 0)
+ return x; /* cbrt(+-0) is itself */
+ u.f = x*0x1p24f;
+ hx = u.i & 0x7fffffff;
+ hx = hx/3 + B2;
+ } else
+ hx = hx/3 + B1;
+ u.i &= 0x80000000;
+ u.i |= hx;
+
+ /*
+ * First step Newton iteration (solving t*t-x/t == 0) to 16 bits. In
+ * double precision so that its terms can be arranged for efficiency
+ * without causing overflow or underflow.
+ */
+ T = u.f;
+ r = T*T*T;
+ T = T*((double_t)x+x+r)/(x+r+r);
+
+ /*
+ * Second step Newton iteration to 47 bits. In double precision for
+ * efficiency and accuracy.
+ */
+ r = T*T*T;
+ T = T*((double_t)x+x+r)/(x+r+r);
+
+ /* rounding to 24 bits is perfect in round-to-nearest mode */
+ return T;
+}
+
static long
Yaf_to_Laf (float *src,
float *dst,
@@ -455,7 +522,7 @@ Yaf_to_Laf (float *src,
{
float yr = src[0];
float a = src[1];
- float L = yr > LAB_EPSILON ? 116.0 * cbrtf (yr) - 16 : LAB_KAPPA * yr;
+ float L = yr > LAB_EPSILON ? 116.0f * _cbrtf (yr) - 16 : LAB_KAPPA * yr;
dst[0] = L;
dst[1] = a;
@@ -467,6 +534,7 @@ Yaf_to_Laf (float *src,
return samples;
}
+
static long
rgbf_to_Labf (float *src,
float *dst,
@@ -484,9 +552,9 @@ rgbf_to_Labf (float *src,
float yr = 0.22248840f / D50_WHITE_REF_Y * r + 0.71690369f / D50_WHITE_REF_Y * g + 0.06060791f /
D50_WHITE_REF_Y * b;
float zr = 0.01391602f / D50_WHITE_REF_Z * r + 0.09706116f / D50_WHITE_REF_Z * g + 0.71392822f /
D50_WHITE_REF_Z * b;
- float fx = xr > LAB_EPSILON ? cbrtf (xr) : (LAB_KAPPA * xr + 16.0f) / 116.0f;
- float fy = yr > LAB_EPSILON ? cbrtf (yr) : (LAB_KAPPA * yr + 16.0f) / 116.0f;
- float fz = zr > LAB_EPSILON ? cbrtf (zr) : (LAB_KAPPA * zr + 16.0f) / 116.0f;
+ float fx = xr > LAB_EPSILON ? _cbrtf (xr) : (LAB_KAPPA * xr + 16.0f) / 116.0f;
+ float fy = yr > LAB_EPSILON ? _cbrtf (yr) : (LAB_KAPPA * yr + 16.0f) / 116.0f;
+ float fz = zr > LAB_EPSILON ? _cbrtf (zr) : (LAB_KAPPA * zr + 16.0f) / 116.0f;
float L = 116.0f * fy - 16.0f;
float A = 500.0f * (fx - fy);
@@ -521,9 +589,9 @@ rgbaf_to_Labaf (float *src,
float yr = 0.22248840f / D50_WHITE_REF_Y * r + 0.71690369f / D50_WHITE_REF_Y * g + 0.06060791f /
D50_WHITE_REF_Y * b;
float zr = 0.01391602f / D50_WHITE_REF_Z * r + 0.09706116f / D50_WHITE_REF_Z * g + 0.71392822f /
D50_WHITE_REF_Z * b;
- float fx = xr > LAB_EPSILON ? cbrtf (xr) : (LAB_KAPPA * xr + 16.0f) / 116.0f;
- float fy = yr > LAB_EPSILON ? cbrtf (yr) : (LAB_KAPPA * yr + 16.0f) / 116.0f;
- float fz = zr > LAB_EPSILON ? cbrtf (zr) : (LAB_KAPPA * zr + 16.0f) / 116.0f;
+ float fx = xr > LAB_EPSILON ? _cbrtf (xr) : (LAB_KAPPA * xr + 16.0f) / 116.0f;
+ float fy = yr > LAB_EPSILON ? _cbrtf (yr) : (LAB_KAPPA * yr + 16.0f) / 116.0f;
+ float fz = zr > LAB_EPSILON ? _cbrtf (zr) : (LAB_KAPPA * zr + 16.0f) / 116.0f;
float L = 116.0f * fy - 16.0f;
float A = 500.0f * (fx - fy);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]