[gnumeric] win32: range-reduce arguments to sin/cos/tan.



commit 918f203aa8c3c33973a6675d7733d209ac3a38f9
Author: Morten Welinder <terra gnome org>
Date:   Fri Nov 15 23:37:28 2013 -0500

    win32: range-reduce arguments to sin/cos/tan.
    
    Microsoft's library isn't accurate.  This helps by reducing the argument
    to [-Pi/4,+Pi/4].

 ChangeLog              |    4 +
 configure.ac           |    6 +
 gnumeric-features.h.in |    3 +
 src/mathfunc.c         |  277 ++++++++++++++++++++++++++++++++++++++++++++++++
 src/numbers.h          |   23 +++-
 5 files changed, 307 insertions(+), 6 deletions(-)
---
diff --git a/ChangeLog b/ChangeLog
index 0641339..33ca9ee 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2013-11-15  Morten Welinder  <terra gnome org>
+
+       * src/mathfunc.c (reduce_pi_half): New function.
+
 2013-11-14  Morten Welinder  <terra gnome org>
 
        * src/mathfunc.c (gnm_sinpi, gnm_cospi): New functions.
diff --git a/configure.ac b/configure.ac
index f6c624e..3c070bc 100644
--- a/configure.ac
+++ b/configure.ac
@@ -627,6 +627,12 @@ if test $ac_cv_func_lgamma = no; then
        )
 fi
 
+if test "x$with_win32" = "xyes"; then
+   AC_DEFINE(GNM_REDUCES_TRIG_RANGE, 1,
+       [Define if Gnumeric reduces the argument range of sin/cos/tan]
+       )
+fi
+
 float_msg=double
 AC_ARG_WITH(long_double,
        AS_HELP_STRING([--with-long-double], [Use long double for floating point]),
diff --git a/gnumeric-features.h.in b/gnumeric-features.h.in
index a20a5e2..9660d4f 100644
--- a/gnumeric-features.h.in
+++ b/gnumeric-features.h.in
@@ -32,4 +32,7 @@
 /* Define to 1 if Gnumeric supplies the `erfcl' function. */
 #undef GNM_SUPPLIES_ERFCL
 
+/* Define to 1 if Gnumeric reduces the argument range of sin/cos/tan */
+#undef GNM_REDUCES_TRIG_RANGE
+
 #endif  /* _LIBSPREADSHEET_CONFIG_H_ */
diff --git a/src/mathfunc.c b/src/mathfunc.c
index c36ac20..39c8cfb 100644
--- a/src/mathfunc.c
+++ b/src/mathfunc.c
@@ -9372,3 +9372,280 @@ gnm_bessel_y (gnm_float x, gnm_float alpha)
 }
 
 /* ------------------------------------------------------------------------- */
+
+#ifdef GNM_REDUCES_TRIG_RANGE
+
+static gnm_float
+reduce_pi_half_simple (gnm_float x, int *km4)
+{
+       static const gnm_float two_over_pi = 0.63661977236758134307553505349;
+       static const gnm_float pi_over_two[] = {
+               +0x1.921fb5p+0,
+               +0x1.110b46p-26,
+               +0x1.1a6263p-54,
+               +0x1.8a2e03p-81,
+               +0x1.c1cd12p-107
+       };
+       int i;
+       gnm_float k = gnm_floor (x * two_over_pi + 0.5);
+       gnm_float xx = 0;
+
+       g_assert (k < (1 << 26));
+       *km4 = (int)k & 3;
+
+       if (k == 0)
+               return x;
+
+       x -= k * pi_over_two[0];
+       for (i = 1; i < 5; i++) {
+               gnm_float dx = k * pi_over_two[i];
+               gnm_float s = x - dx;
+               xx += x - s - dx;
+               x = s;
+       }
+
+       return x + xx;
+}
+
+/*
+ * Add the 64-bit number p at *dst and backwards.  This would be very
+ * simple and fast in assembler.  In C it's a bit of a mess.
+ */
+static inline void
+add_at (guint32 *dst, guint64 p)
+{
+       unsigned h = p >> 63;
+
+       p += *dst;
+       *dst-- = p & 0xffffffffu;
+       p >>= 32;
+       if (p) {
+               p += *dst;
+               *dst-- = p & 0xffffffffu;
+               if (p >> 32)
+                       while (++*dst == 0)
+                               dst--;
+       } else if (h) {
+               /* p overflowed, pass carry on. */
+               dst--;
+               while (++*dst == 0)
+                       dst--;
+       }
+}
+
+static gnm_float
+reduce_pi_half_full (gnm_float x, int *km4)
+{
+       /* FIXME?  This table isn't big enough for long double's range */
+       static guint32 one_over_two_pi[] = {
+               0x28be60dbu,
+               0x9391054au,
+               0x7f09d5f4u,
+               0x7d4d3770u,
+               0x36d8a566u,
+               0x4f10e410u,
+               0x7f9458eau,
+               0xf7aef158u,
+               0x6dc91b8eu,
+               0x909374b8u,
+               0x01924bbau,
+               0x82746487u,
+               0x3f877ac7u,
+               0x2c4a69cfu,
+               0xba208d7du,
+               0x4baed121u,
+               0x3a671c09u,
+               0xad17df90u,
+               0x4e64758eu,
+               0x60d4ce7du,
+               0x272117e2u,
+               0xef7e4a0eu,
+               0xc7fe25ffu,
+               0xf7816603u,
+               0xfbcbc462u,
+               0xd6829b47u,
+               0xdb4d9fb3u,
+               0xc9f2c26du,
+               0xd3d18fd9u,
+               0xa797fa8bu,
+               0x5d49eeb1u,
+               0xfaf97c5eu,
+               0xcf41ce7du,
+               0xe294a4bau,
+               0x9afed7ecu,
+               0x47e35742u
+       };
+       static guint32 pi_over_four[] = {
+               0xc90fdaa2u,
+               0x2168c234u,
+               0xc4c6628bu,
+               0x80dc1cd1u
+       };
+
+       gnm_float m;
+       guint32 w2, w1, w0, wm1, wm2;
+       int e, neg;
+       unsigned di, i, j;
+       guint32 r[6], r4[4];
+       gnm_float rh, rl, l48, h48;
+
+       m = gnm_frexp (x, &e);
+       if (e >= GNM_MANT_DIG) {
+               di = ((unsigned)e - GNM_MANT_DIG) / 32u;
+               e -= di * 32;
+       } else
+               di = 0;
+       m = gnm_ldexp (m, e - 64);
+       w2  = (guint32)gnm_floor (m); m = gnm_ldexp (m - w2, 32);
+       w1  = (guint32)gnm_floor (m); m = gnm_ldexp (m - w1, 32);
+       w0  = (guint32)gnm_floor (m); m = gnm_ldexp (m - w0, 32);
+       wm1 = (guint32)gnm_floor (m); m = gnm_ldexp (m - wm1, 32);
+       wm2 = (guint32)gnm_floor (m);
+
+       /*
+        * r[0] is an integer overflow area to be ignored.  We will not create
+        * any carry into r[-1] because 5/(2pi) < 1.
+        */
+       r[0] = 0;
+
+       for (i = 0; i < 5; i++) {
+               g_assert (i + 2 + di < G_N_ELEMENTS (one_over_two_pi));
+               r[i + 1] = 0;
+               if (wm2 && i > 1)
+                       add_at (&r[i + 1], (guint64)wm2 * one_over_two_pi[i - 2]);
+               if (wm1 && i > 0)
+                       add_at (&r[i + 1], (guint64)wm1 * one_over_two_pi[i - 1]);
+               if (w0)
+                       add_at (&r[i + 1], (guint64)w0  * one_over_two_pi[i     + di]);
+               if (w1)
+                       add_at (&r[i + 1], (guint64)w1  * one_over_two_pi[i + 1 + di]);
+               if (w2)
+                       add_at (&r[i + 1], (guint64)w2  * one_over_two_pi[i + 2 + di]);
+
+               /*
+                * We're done at i==3 unless the first 29 bits, not counting
+                * those ending up in sign and *km4, are all zeros or ones.
+                */
+               if (i == 3 && ((r[1] + 1u) & 0x1fffffffu) > 1)
+                       break;
+       }
+
+       *km4 = r[1] >> 30;
+       if ((neg = ((r[1] >> 29) & 1))) {
+               *km4 = (*km4 + 1) & 3;
+               /* Two-complement negation */
+               for (j = 1; j <= i; j++) r[j] ^= 0xffffffffu;
+               add_at (&r[i], 1);
+       }
+       r[1] &= 0x3fffffffu;
+
+       j = 1;
+       if (r[j] == 0) j++;
+       r4[0] = r4[1] = r4[2] = r4[3] = 0;
+       add_at (&r4[1], (guint64)r[j    ] * pi_over_four[0]);
+       add_at (&r4[2], (guint64)r[j    ] * pi_over_four[1]);
+       add_at (&r4[2], (guint64)r[j + 1] * pi_over_four[0]);
+       add_at (&r4[3], (guint64)r[j    ] * pi_over_four[2]);
+       add_at (&r4[3], (guint64)r[j + 1] * pi_over_four[1]);
+       add_at (&r4[3], (guint64)r[j + 2] * pi_over_four[0]);
+
+       h48 = gnm_ldexp (((guint64)r4[0] << 16) | (r4[1] >> 16),
+                        -32 * j + 3 - 16);
+       l48 = gnm_ldexp (((guint64)(r4[1] & 0xffff) << 32) | r4[2],
+                        -32 * j + 3 - 64);
+
+       rh = h48 + l48;
+       rl = h48 - rh + l48;
+
+       if (neg) {
+               rh = -rh;
+               rl = -rl;
+       }
+
+       return rh + rl;
+}
+
+/*
+ * Subtract k times Pi from @x where k is picked such that the absolute
+ * value of x-k*Pi is no larger than Pi/4.  k%4 is returned in @km4.
+ * @x must not be negative.
+ *
+ * This function is used to reduce arguments to trigonometric functions
+ * to [-Pi/4;+Pi/4], a range in which hardware and libm generally are
+ * accurate.  This avoids problems for example with Intel chips for
+ * values near multiples of Pi/2 or values above 10^15.
+ *
+ * Note, that the Pi used has sufficient accuracy to not suffer cancellation
+ * errors throughout the entire range of "double".  That means 1000+ bits.
+ *
+ * The "no larger than Pi/4" condition may be violated by a hair.  That's
+ * not important since none of the trigonometric functions are critical
+ * there.
+ */
+static gnm_float
+reduce_pi_half (gnm_float x, int *km4)
+{
+       if (gnm_isnan (x))
+               return x;
+
+       g_assert (x >= 0);
+
+       if (x < (1u << 26))
+               return reduce_pi_half_simple (x, km4);
+       else
+               return reduce_pi_half_full (x, km4);
+}
+
+gnm_float
+gnm_sin (gnm_float x)
+{
+       int km4;
+       gnm_float ax = gnm_abs (x);
+       gnm_float xr = reduce_pi_half (ax, &km4);
+
+       if (x < 0) km4 ^= 2;
+
+       switch (km4) {
+       default:
+       case 0: return +sin (xr);
+       case 1: return +cos (xr);
+       case 2: return -sin (xr);
+       case 3: return -cos (xr);
+       }
+}
+
+gnm_float
+gnm_cos (gnm_float x)
+{
+       int km4;
+       gnm_float ax = gnm_abs (x);
+       gnm_float xr = reduce_pi_half (ax, &km4);
+
+       switch (km4) {
+       default:
+       case 0: return +cos (xr);
+       case 1: return -sin (xr);
+       case 2: return -cos (xr);
+       case 3: return +sin (xr);
+       }
+}
+
+gnm_float
+gnm_tan (gnm_float x)
+{
+       int km4;
+       gnm_float ax = gnm_abs (x);
+       gnm_float xr = reduce_pi_half (ax, &km4);
+
+       if (x < 0) xr = -xr;
+
+       switch (km4) {
+       default:
+       case 0: case 2: return +tan (xr);
+       case 1: case 3: return -cos (xr) / sin (xr);
+       }
+}
+
+#endif
+
+/* ------------------------------------------------------------------------- */
diff --git a/src/numbers.h b/src/numbers.h
index 6ad65e0..3a74cb4 100644
--- a/src/numbers.h
+++ b/src/numbers.h
@@ -43,7 +43,6 @@ typedef long double gnm_float;
 #define gnm_atan2 atan2l
 #define gnm_atanh atanhl
 #define gnm_ceil ceill
-#define gnm_cos cosl
 #define gnm_cosh coshl
 #define gnm_erf erfl
 #define gnm_erfc erfcl
@@ -75,13 +74,16 @@ typedef long double gnm_float;
 #define gnm_pow10 go_pow10l
 #define gnm_pow2 go_pow2l
 #define gnm_render_general go_render_generall
-#define gnm_sin sinl
 #define gnm_sinh sinhl
 #define gnm_sqrt sqrtl
 #define gnm_strto go_strtold
 #define gnm_sub_epsilon go_sub_epsilonl
-#define gnm_tan tanl
 #define gnm_tanh tanhl
+#ifndef GNM_REDUCES_TRIG_RANGE
+#define gnm_sin sinl
+#define gnm_cos cosl
+#define gnm_tan tanl
+#endif
 
 #define GNM_FORMAT_e   "Le"
 #define GNM_FORMAT_E   "LE"
@@ -140,7 +142,6 @@ typedef double gnm_float;
 #define gnm_atan2 atan2
 #define gnm_atanh atanh
 #define gnm_ceil ceil
-#define gnm_cos cos
 #define gnm_cosh cosh
 #define gnm_erf erf
 #define gnm_erfc erfc
@@ -172,13 +173,16 @@ typedef double gnm_float;
 #define gnm_pow10 go_pow10
 #define gnm_pow2 go_pow2
 #define gnm_render_general go_render_general
-#define gnm_sin sin
 #define gnm_sinh sinh
 #define gnm_sqrt sqrt
 #define gnm_strto go_strtod
 #define gnm_sub_epsilon go_sub_epsilon
-#define gnm_tan tan
 #define gnm_tanh tanh
+#ifndef GNM_REDUCES_TRIG_RANGE
+#define gnm_sin sin
+#define gnm_cos cos
+#define gnm_tan tan
+#endif
 
 #define GNM_FORMAT_e   "e"
 #define GNM_FORMAT_E   "E"
@@ -225,6 +229,13 @@ typedef double gnm_float;
 
 #endif
 
+#ifdef GNM_REDUCES_TRIG_RANGE
+gnm_float gnm_sin (gnm_float x);
+gnm_float gnm_cos (gnm_float x);
+gnm_float gnm_tan (gnm_float x);
+#endif
+
+
 G_END_DECLS
 
 #endif /* _GNM_NUMBERS_H_ */


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]