[babl] extensions: add an extension with some float optimizations
- From: Ãyvind KolÃs <ok src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [babl] extensions: add an extension with some float optimizations
- Date: Sat, 31 Mar 2012 13:59:44 +0000 (UTC)
commit 1f5b281382b8511481bdd1ac0baef417a32779fb
Author: Ãyvind KolÃs <pippin gimp org>
Date: Sat Mar 31 14:58:49 2012 +0100
extensions: add an extension with some float optimizations
Alos improve some alpha unpremultiplications in other extensions.
extensions/Makefile.am | 4 +
extensions/float.c | 324 ++++++++++++++++++++++++++++++++++++++++++++++
extensions/gegl-fixups.c | 6 +-
extensions/gggl.c | 3 +-
4 files changed, 333 insertions(+), 4 deletions(-)
---
diff --git a/extensions/Makefile.am b/extensions/Makefile.am
index fac0123..c017046 100644
--- a/extensions/Makefile.am
+++ b/extensions/Makefile.am
@@ -6,6 +6,7 @@ endif
noinst_HEADERS = util.h
+
AM_CPPFLAGS = \
-I$(top_builddir) \
-I$(top_srcdir) \
@@ -20,6 +21,7 @@ ext_LTLIBRARIES = \
gggl-lies.la \
gggl.la \
gimp-8bit.la \
+ float.la \
naive-CMYK.la \
sse-fixups.la
@@ -31,7 +33,9 @@ gggl_la_SOURCES = gggl.c
gimp_8bit_la_SOURCES = gimp-8bit.c
naive_CMYK_la_SOURCES = naive-CMYK.c
sse_fixups_la_SOURCES = sse-fixups.c
+float_la_SOURCES = float.c
LIBS = $(top_builddir)/babl/libbabl- BABL_API_VERSION@.la $(MATH_LIB)
sse_fixups_la_CFLAGS = $(MMX_EXTRA_CFLAGS) $(SSE_EXTRA_CFLAGS)
+float_la_CFLAGS = -std=c99
diff --git a/extensions/float.c b/extensions/float.c
new file mode 100644
index 0000000..4fc7f98
--- /dev/null
+++ b/extensions/float.c
@@ -0,0 +1,324 @@
+/* babl - dynamically extendable universal pixel conversion library.
+ * Copyright (C) 2012, Ãyvind KolÃs
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General
+ * Public License along with this library; if not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+#include "config.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "babl.h"
+#include "babl-cpuaccel.h"
+#include "extensions/util.h"
+#include "base/util.h"
+
+#define INLINE inline
+
+
+/*
+ optimized powf from:
+
+http://stackoverflow.com/questions/6475373/optimizations-for-pow-with-const-non-integer-exponent
+
+ by David Hammen
+*/
+
+// Returns x^(5/12) for x in [1,2), to within 3e-8 (relative error).
+// Want more precision? Add more Chebychev polynomial coefs.
+static INLINE double pow512norm (
+ double x)
+{
+ static const int N = 8;
+
+ // Chebychev polynomial terms.
+ // Non-zero terms calculated via
+ // integrate (2/pi)*ChebyshevT[n,u]/sqrt(1-u^2)*((u+3)/2)^(5/12)
+ // from -1 to 1
+ // Zeroth term is similar except it uses 1/pi rather than 2/pi.
+ static const double Cn[8] = {
+ 1.1758200232996901923,
+ 0.16665763094889061230,
+ -0.0083154894939042125035,
+ 0.00075187976780420279038,
+ // Wolfram alpha doesn't want to compute the remaining terms
+ // to more precision (it times out).
+ -0.0000832402,
+ 0.0000102292,
+ -1.3401e-6,
+ 1.83334e-7};
+
+ double Tn[N];
+
+ double u = 2.0*x - 3.0;
+ int i;
+ double y = 0.0;
+
+ Tn[0] = 1.0;
+ Tn[1] = u;
+ for (i = 2; i < N; ++i) {
+ Tn[i] = 2*u*Tn[i-1] - Tn[i-2];
+ }
+
+ for (i = N-1; i >= 0; --i) {
+ y += Cn[i]*Tn[i];
+ }
+
+ return y;
+}
+
+// Returns x^(5/12) to within 3e-8 (relative error).
+static INLINE double pow512 (
+ double x)
+{
+ static const double pow2_512[12] = {
+ 1.0,
+ pow(2.0, 5.0/12.0),
+ pow(4.0, 5.0/12.0),
+ pow(8.0, 5.0/12.0),
+ pow(16.0, 5.0/12.0),
+ pow(32.0, 5.0/12.0),
+ pow(64.0, 5.0/12.0),
+ pow(128.0, 5.0/12.0),
+ pow(256.0, 5.0/12.0),
+ pow(512.0, 5.0/12.0),
+ pow(1024.0, 5.0/12.0),
+ pow(2048.0, 5.0/12.0)
+ };
+
+ double s;
+ int iexp;
+
+ s = frexp (x, &iexp);
+ s *= 2.0;
+ iexp -= 1;
+
+ div_t qr = div (iexp, 12);
+ if (qr.rem < 0) {
+ qr.quot -= 1;
+ qr.rem += 12;
+ }
+
+ return ldexp (pow512norm(s)*pow2_512[qr.rem], 5*qr.quot);
+}
+
+static inline double
+flinear_to_gamma_2_2 (double value)
+{
+ if (value > 0.0030402477F)
+ return 1.055F * pow512 (value) - 0.055F;
+ return 12.92F * value;
+}
+
+static INLINE long
+conv_rgbaF_linear_rgbAF_gamma (unsigned char *src,
+ unsigned char *dst,
+ long samples)
+{
+ float *fsrc = (float *) src;
+ float *fdst = (float *) dst;
+ int n = samples;
+
+ while (n--)
+ {
+ float alpha = fsrc[3];
+ *fdst++ = flinear_to_gamma_2_2 (*fsrc++) * alpha;
+ *fdst++ = flinear_to_gamma_2_2 (*fsrc++) * alpha;
+ *fdst++ = flinear_to_gamma_2_2 (*fsrc++) * alpha;
+ *fdst++ = *fsrc++;
+ }
+ return samples;
+}
+
+static INLINE long
+conv_rgbAF_linear_rgbAF_gamma (unsigned char *src,
+ unsigned char *dst,
+ long samples)
+{
+ float *fsrc = (float *) src;
+ float *fdst = (float *) dst;
+ int n = samples;
+
+ while (n--)
+ {
+ float alpha = fsrc[3];
+ if (alpha < BABL_ALPHA_THRESHOLD)
+ {
+ *fdst++ = 0.0;
+ *fdst++ = 0.0;
+ *fdst++ = 0.0;
+ *fdst++ = 0.0;
+ fsrc+=4;
+ }
+ else if (alpha >= 1.0)
+ {
+ *fdst++ = flinear_to_gamma_2_2 (*fsrc++);
+ *fdst++ = flinear_to_gamma_2_2 (*fsrc++);
+ *fdst++ = flinear_to_gamma_2_2 (*fsrc++);
+ *fdst++ = *fsrc++;
+ }
+ else
+ {
+ float alpha_recip = 1.0 / alpha;
+ *fdst++ = flinear_to_gamma_2_2 (*fsrc++ * alpha_recip) * alpha;
+ *fdst++ = flinear_to_gamma_2_2 (*fsrc++ * alpha_recip) * alpha;
+ *fdst++ = flinear_to_gamma_2_2 (*fsrc++ * alpha_recip) * alpha;
+ *fdst++ = *fsrc++;
+ }
+ }
+ return samples;
+}
+
+static INLINE long
+conv_rgbaF_linear_rgbaF_gamma (unsigned char *src,
+ unsigned char *dst,
+ long samples)
+{
+ float *fsrc = (float *) src;
+ float *fdst = (float *) dst;
+ int n = samples;
+
+ while (n--)
+ {
+ *fdst++ = flinear_to_gamma_2_2 (*fsrc++);
+ *fdst++ = flinear_to_gamma_2_2 (*fsrc++);
+ *fdst++ = flinear_to_gamma_2_2 (*fsrc++);
+ *fdst++ = *fsrc++;
+ }
+ return samples;
+}
+
+static INLINE long
+conv_rgbF_linear_rgbF_gamma (unsigned char *src,
+ unsigned char *dst,
+ long samples)
+{
+ float *fsrc = (float *) src;
+ float *fdst = (float *) dst;
+ int n = samples;
+
+ while (n--)
+ {
+ *fdst++ = flinear_to_gamma_2_2 (*fsrc++);
+ *fdst++ = flinear_to_gamma_2_2 (*fsrc++);
+ *fdst++ = flinear_to_gamma_2_2 (*fsrc++);
+ }
+ return samples;
+}
+
+
+static INLINE long
+conv_rgbaF_gamma_rgbaF_linear (unsigned char *src,
+ unsigned char *dst,
+ long samples)
+{
+ float *fsrc = (float *) src;
+ float *fdst = (float *) dst;
+ int n = samples;
+
+ while (n--)
+ {
+ *fdst++ = gamma_2_2_to_linear (*fsrc++);
+ *fdst++ = gamma_2_2_to_linear (*fsrc++);
+ *fdst++ = gamma_2_2_to_linear (*fsrc++);
+ *fdst++ = *fsrc++;
+ }
+ return samples;
+}
+
+static INLINE long
+conv_rgbF_gamma_rgbF_linear (unsigned char *src,
+ unsigned char *dst,
+ long samples)
+{
+ float *fsrc = (float *) src;
+ float *fdst = (float *) dst;
+ int n = samples;
+
+ while (n--)
+ {
+ *fdst++ = gamma_2_2_to_linear (*fsrc++);
+ *fdst++ = gamma_2_2_to_linear (*fsrc++);
+ *fdst++ = gamma_2_2_to_linear (*fsrc++);
+ }
+ return samples;
+}
+
+#define o(src, dst) \
+ babl_conversion_new (src, dst, "linear", conv_ ## src ## _ ## dst, NULL)
+
+int init (void);
+
+int
+init (void)
+{
+ const Babl *rgbaF_linear = babl_format_new (
+ babl_model ("RGBA"),
+ babl_type ("float"),
+ babl_component ("R"),
+ babl_component ("G"),
+ babl_component ("B"),
+ babl_component ("A"),
+ NULL);
+ const Babl *rgbAF_linear = babl_format_new (
+ babl_model ("RaGaBaA"),
+ babl_type ("float"),
+ babl_component ("Ra"),
+ babl_component ("Ga"),
+ babl_component ("Ba"),
+ babl_component ("A"),
+ NULL);
+ const Babl *rgbaF_gamma = babl_format_new (
+ babl_model ("R'G'B'A"),
+ babl_type ("float"),
+ babl_component ("R'"),
+ babl_component ("G'"),
+ babl_component ("B'"),
+ babl_component ("A"),
+ NULL);
+ const Babl *rgbAF_gamma = babl_format_new (
+ babl_model ("R'aG'aB'aA"),
+ babl_type ("float"),
+ babl_component ("R'a"),
+ babl_component ("G'a"),
+ babl_component ("B'a"),
+ babl_component ("A"),
+ NULL);
+ const Babl *rgbF_linear = babl_format_new (
+ babl_model ("RGB"),
+ babl_type ("float"),
+ babl_component ("R"),
+ babl_component ("G"),
+ babl_component ("B"),
+ NULL);
+ const Babl *rgbF_gamma = babl_format_new (
+ babl_model ("R'G'B'"),
+ babl_type ("float"),
+ babl_component ("R'"),
+ babl_component ("G'"),
+ babl_component ("B'"),
+ NULL);
+
+ o (rgbAF_linear, rgbAF_gamma);
+ o (rgbaF_linear, rgbAF_gamma);
+ o (rgbaF_linear, rgbaF_gamma);
+ o (rgbaF_gamma, rgbaF_linear);
+ o (rgbF_linear, rgbF_gamma);
+ o (rgbF_gamma, rgbF_linear);
+
+ return 0;
+}
+
diff --git a/extensions/gegl-fixups.c b/extensions/gegl-fixups.c
index 3fb6977..7f3b8ff 100644
--- a/extensions/gegl-fixups.c
+++ b/extensions/gegl-fixups.c
@@ -473,9 +473,11 @@ conv_rgbAF_rgbaF (unsigned char *srcc,
while (n--)
{
float alpha = src[3];
- float recip = 1.0/alpha;
+ float recip;
if (alpha < BABL_ALPHA_THRESHOLD)
recip = 0.0;
+ else
+ recip = 1.0/alpha;
dst[0] = src[0] * recip;
dst[1] = src[1] * recip;
dst[2] = src[2] * recip;
@@ -518,8 +520,6 @@ conv_rgbAF_lrgba8 (unsigned char *srcc,
return samples;
}
-
-
#define conv_rgb8_rgbAF conv_rgb8_rgbaF
int init (void);
diff --git a/extensions/gggl.c b/extensions/gggl.c
index 16d3276..6a007c6 100644
--- a/extensions/gggl.c
+++ b/extensions/gggl.c
@@ -610,9 +610,10 @@ conv_rgbAF_rgbaF (unsigned char *src, unsigned char *dst, long samples)
}
else
{
+ float alpha_recip = 1.0 / alpha;
for (c = 0; c < 3; c++)
{
- *(float *) dst = ((*(float *) src) / alpha);
+ *(float *) dst = ((*(float *) src) * alpha_recip);
dst += 4;
src += 4;
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]