[babl] Optimize gamma correction
- From: Øyvind Kolås <ok src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [babl] Optimize gamma correction
- Date: Fri, 3 May 2013 13:11:19 +0000 (UTC)
commit f683335fb0d06aef7170fac523ae2ab83173a7de
Author: Loren Merritt <pengvado akuvian org>
Date: Thu Apr 25 17:50:53 2013 +0000
Optimize gamma correction
Switch from Chebyshev polynomial to Newton's method, which is both simpler and
1.5x faster for the same precision.
babl/base/pow-24.c | 205 +++++++++++++---------------------------------------
1 files changed, 51 insertions(+), 154 deletions(-)
---
diff --git a/babl/base/pow-24.c b/babl/base/pow-24.c
index 50d8d87..401a102 100644
--- a/babl/base/pow-24.c
+++ b/babl/base/pow-24.c
@@ -1,5 +1,5 @@
/* babl - dynamically extendable universal pixel conversion library.
- * Copyright (C) 2012, Red Hat, Inc.
+ * Copyright (C) 2013 Loren Merritt
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@@ -16,170 +16,67 @@
* <http://www.gnu.org/licenses/>.
*/
-#include <stdlib.h>
#include <math.h>
#include "util.h"
-/**
- * This implements pow(x, 2.4) and pow(x, 1/2.4) using a chebyshev based polynominal approximation.
- * This is based on the approach in:
- *
http://stackoverflow.com/questions/6475373/optimizations-for-pow-with-const-non-integer-exponent/6478839#6478839
+/* a^b = exp(b*log(a))
*
- * The code includes checbychev constants for the 9th order, which seems to give a max error
- * of about 4e-09, but unless you define USE_MAX_POW24_ACCURACY the order has been limited to
- * give an error of about 1e-7 (i.e. single precission floats).
- */
-
-/* Chebychev polynomial terms for x^(5/12) expanded around x=1.5
- * Non-zero terms calculated via
- * NIntegrate[(2/Pi)*ChebyshevT[N,u]/Sqrt [1-u^2]*((u+3)/2)^(5/12),{u,-1,1}, PrecisionGoal->20,
WorkingPrecision -> 100]
- * Zeroth term is similar except it uses 1/pi rather than 2/pi.
+ * Extracting the exponent from a float gives us an approximate log.
+ * Or better yet, reinterpret the bitpattern of the whole float as an int.
+ *
+ * However, the output values of 12throot vary by less than a factor of 2
+ * over the domain we care about, so we only get log() that way, not exp().
+ *
+ * Approximate exp() with a low-degree polynomial; not exactly equal to the
+ * Taylor series since we're minimizing maximum error over a certain finite
+ * domain. It's not worthwhile to use lots of terms, since Newton's method
+ * has a better convergence rate once you get reasonably close to the answer.
*/
-static const double Cn[] = {
- 1.1758200232996901923,
- 0.16665763094889061230,
- -0.0083154894939042125035,
- 0.00075187976780420279038,
- -0.000083240178519391795367,
- 0.000010229209410070008679,
- -1.3401001466409860246e-6,
- 1.8333422241635376682e-7,
- -2.5878596761348859722e-8
-};
-
-
-/* Returns x^(/12) for x in [1,2) */
-static double pow512norm (double x)
+static inline double
+init_newton (double x, double exponent, double c0, double c1, double c2)
{
- double Tn[9];
- double u;
-
- u = 2.0*x - 3.0;
- Tn[0] = 1.0;
- Tn[1] = u;
- Tn[2] = 2*u*Tn[2-1] - Tn[2-2];
- Tn[3] = 2*u*Tn[3-1] - Tn[3-2];
- Tn[4] = 2*u*Tn[4-1] - Tn[4-2];
- Tn[5] = 2*u*Tn[5-1] - Tn[5-2];
- Tn[6] = 2*u*Tn[6-1] - Tn[6-2];
-#ifdef USE_MAX_POW24_ACCURACY
- Tn[7] = 2*u*Tn[7-1] - Tn[7-2];
- Tn[8] = 2*u*Tn[8-1] - Tn[8-2];
-#endif
-
- return Cn[0]*Tn[0] + Cn[1]*Tn[1] + Cn[2]*Tn[2] + Cn[3]*Tn[3] + Cn[4]*Tn[4] + Cn[5]*Tn[5] + Cn[6]*Tn[6]
-#ifdef USE_MAX_POW24_ACCURACY
- + Cn[7]*Tn[7] + Cn[8]*Tn[8]
-#endif
- ;
+ int iexp;
+ double y = frexp(x, &iexp);
+ y = 2*y+(iexp-2);
+ c1 *= M_LN2*exponent;
+ c2 *= M_LN2*M_LN2*exponent*exponent;
+ return y = c0 + c1*y + c2*y*y;
}
-/* Precalculated (2^N) ^ (5 / 12) */
-static const double pow2_512[12] = {
- 1.0,
- 1.3348398541700343678,
- 1.7817974362806785482,
- 2.3784142300054420538,
- 3.1748021039363991669,
- 4.2378523774371812394,
- 5.6568542494923805819,
- 7.5509945014535482244,
- 1.0079368399158985525e1,
- 1.3454342644059433809e1,
- 1.7959392772949968275e1,
- 2.3972913230026907883e1
-};
-
-
-/* Returns x^(1/2.4) == x^(5/12) */
-double babl_pow_1_24 (double x)
-{
- double s;
- int iexp;
- div_t qr;
-
- s = frexp (x, &iexp);
- s *= 2.0;
- iexp -= 1;
-
- qr = div (iexp, 12);
- if (qr.rem < 0) {
- qr.quot -= 1;
- qr.rem += 12;
- }
-
- return ldexp (pow512norm (s) * pow2_512[qr.rem], 5 * qr.quot);
-}
-
-/* Chebychev polynomial terms for x^(7/5) expanded around x=1.5
- * Non-zero terms calculated via
- * NIntegrate[(2/Pi)*ChebyshevT[N,u]/Sqrt [1-u^2]*((u+3)/2)^(7/5),{u,-1,1}, PrecisionGoal->20,
WorkingPrecision -> 100]
- * Zeroth term is similar except it uses 1/pi rather than 2/pi.
+/* Returns x^2.4 == (x*(x^(-1/5)))^3, using Newton's method for x^(-1/5).
+ * Max absolute error is 4e-8.
+ * One more iteration of Newton would bring error down to 4e-15.
+ *
+ * (The above measurements apply to the input range can that be involved in
+ * gamma correction. Accuracy for inputs very close to 0 is somewhat worse,
+ * but that will hit the linear part of the gamma curve rather than call these.
+ * Out-of-range pixels >1.3 are also somewhat worse.)
*/
-static const double iCn[] = {
- 1.7917488588043277509,
- 0.82045614371976854984,
- 0.027694100686325412819,
- -0.00094244335181762134018,
- 0.000064355540911469709545,
- -5.7224404636060757485e-6,
- 5.8767669437311184313e-7,
- -6.6139920053589721168e-8,
- 7.9323242696227458163e-9
-};
-
-/* Returns x^(7/5) for x in [1,2) */
-static double pow75norm (double x)
+double
+babl_pow_24 (double x)
{
- double Tn[9];
- double u;
-
- u = 2.0*x - 3.0;
- Tn[0] = 1.0;
- Tn[1] = u;
- Tn[2] = 2*u*Tn[2-1] - Tn[2-2];
- Tn[3] = 2*u*Tn[3-1] - Tn[3-2];
- Tn[4] = 2*u*Tn[4-1] - Tn[4-2];
- Tn[5] = 2*u*Tn[5-1] - Tn[5-2];
-#ifdef USE_MAX_POW24_ACCURACY
- Tn[6] = 2*u*Tn[6-1] - Tn[6-2];
- Tn[7] = 2*u*Tn[7-1] - Tn[7-2];
- Tn[8] = 2*u*Tn[8-1] - Tn[8-2];
-#endif
-
- return iCn[0]*Tn[0] + iCn[1]*Tn[1] + iCn[2]*Tn[2] + iCn[3]*Tn[3] + iCn[4]*Tn[4] + iCn[5]*Tn[5]
-#ifdef USE_MAX_POW24_ACCURACY
- + iCn[6]*Tn[6] + iCn[7]*Tn[7] + iCn[8]*Tn[8]
-#endif
- ;
+ double y = init_newton (x, -1./5, 0.9953189663, 0.9594345146, 0.6742970332);
+ int i;
+ for (i = 0; i < 2; i++)
+ y = (1.+1./5)*y - ((1./5)*x*(y*y))*((y*y)*(y*y));
+ x *= y;
+ return x*x*x;
}
-/* Precalculated (2^N) ^ (7 / 5) */
-static const double pow2_75[5] = {
- 1.0,
- 2.6390158215457883983,
- 6.9644045063689921093,
- 1.8379173679952558018e+1,
- 4.8502930128332728543e+1
-};
-
-/* Returns x^2.4 == x * x ^1.4 == x * x^(7/5) */
-double babl_pow_24 (double x)
+/* Returns x^(1/2.4) == x*(x^(-1/12))^7, using Newton's method for x^(-1/12).
+ * Max absolute error is 7e-8
+ * One more iteration of Newton would bring error down to 4e-14.
+ */
+double
+babl_pow_1_24 (double x)
{
- double s;
- int iexp;
- div_t qr;
-
- s = frexp (x, &iexp);
- s *= 2.0;
- iexp -= 1;
-
- qr = div (iexp, 5);
- if (qr.rem < 0) {
- qr.quot -= 1;
- qr.rem += 5;
- }
-
- return x * ldexp (pow75norm (s) * pow2_75[qr.rem], 7 * qr.quot);
+ double y = init_newton (x, -1./12, 0.9976740658, 0.9885035151, 0.5907708377);
+ int i;
+ for (i = 0; i < 2; i++)
+ {
+ double z = (y*y)*(y*y);
+ z = (y*z)*(z*z);
+ y = (1.+1./12)*y - (1./12)*x*z;
+ }
+ return ((x*y)*(y*y))*((y*y)*(y*y));
}
-
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]