[gnumeric] R.PNORM2: New function for the normal distribution's cdf over a range.

From: Morten Welinder <mortenw src gnome org>
To: commits-list gnome org
Cc:
Subject: [gnumeric] R.PNORM2: New function for the normal distribution's cdf over a range.
Date: Sun, 19 May 2013 20:02:05 +0000 (UTC)
commit cee7963f4c66b21b99a34850a61657f8cbc7ad43
Author: Morten Welinder <terra gnome org>
Date:   Sun May 19 16:01:01 2013 -0400

    R.PNORM2: New function for the normal distribution's cdf over a range.
    
    R doesn't actually have this function, but it should.

 ChangeLog                  |    1 +
 NEWS                       |    1 +
 plugins/fn-r/functions.c   |   29 +++++++++++++++++
 plugins/fn-r/generate      |    9 ++++-
 plugins/fn-r/plugin.xml.in |    1 +
 samples/pnorm2.gnumeric    |  Bin 0 -> 2707 bytes
 src/mathfunc.c             |   73 +++++++++++++++++++++++++++----------------
 src/mathfunc.h             |    2 +-
 8 files changed, 86 insertions(+), 30 deletions(-)
---
diff --git a/ChangeLog b/ChangeLog
index d6554f4..2f4d379 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -2,6 +2,7 @@
 
        * src/mathfunc.c (ptukey_wprob): Sanitize handling of integration
        boundaries.
+       (pnorm2): Get rid of mu and sigma arguments.  Improve accuracy.
 
 2013-05-18  Morten Welinder  <terra gnome org>
 
diff --git a/NEWS b/NEWS
index de24be7..d477f22 100644
--- a/NEWS
+++ b/NEWS
@@ -20,6 +20,7 @@ Morten:
        * Improve mathfuns tests.  [#700295]
        * Add new R.PTUKEY function.  [#700132]
        * Fix missing translation of certain function examples.
+       * Add new R.PNORM2 function.
 
 --------------------------------------------------------------------------
 Gnumeric 1.12.2
diff --git a/plugins/fn-r/functions.c b/plugins/fn-r/functions.c
index b25b8c7..3eaf171 100644
--- a/plugins/fn-r/functions.c
+++ b/plugins/fn-r/functions.c
@@ -67,6 +67,28 @@ gnumeric_r_pnorm (GnmFuncEvalInfo *ei, GnmValue const * const *args)
 /* ------------------------------------------------------------------------- */
 
 
+static GnmFuncHelp const help_r_pnorm2[] = {
+       { GNM_FUNC_HELP_NAME, F_("R.PNORM2:cumulative distribution function of the normal distribution") },
+       { GNM_FUNC_HELP_ARG, F_("x1:first observation") },
+       { GNM_FUNC_HELP_ARG, F_("x2:first observation") },
+       { GNM_FUNC_HELP_ARG, F_("log_p:if true, log of the probability is used") },
+       { GNM_FUNC_HELP_DESCRIPTION, F_("This function returns the cumulative distribution function of the 
normal distribution.") },
+       { GNM_FUNC_HELP_END }
+};
+
+static GnmValue *
+gnumeric_r_pnorm2 (GnmFuncEvalInfo *ei, GnmValue const * const *args)
+{
+       gnm_float x1 = value_get_as_float (args[0]);
+       gnm_float x2 = value_get_as_float (args[1]);
+       gboolean log_p = args[2] ? value_get_as_checked_bool (args[2]) : FALSE;
+
+       return value_new_float (pnorm2 (x1, x2, log_p));
+}
+
+/* ------------------------------------------------------------------------- */
+
+
 static GnmFuncHelp const help_r_qnorm[] = {
        { GNM_FUNC_HELP_NAME, F_("R.QNORM:probability quantile function of the normal distribution") },
        { GNM_FUNC_HELP_ARG, F_("p:probability") },
@@ -1398,6 +1420,13 @@ GnmFuncDescriptor const stat_functions[] = {
                GNM_FUNC_SIMPLE, GNM_FUNC_IMPL_STATUS_UNIQUE_TO_GNUMERIC, GNM_FUNC_TEST_STATUS_NO_TESTSUITE,
        },
        {
+               "r.pnorm2",
+               "ff|b",
+               help_r_pnorm2,
+               gnumeric_r_pnorm2, NULL, NULL, NULL,
+               GNM_FUNC_SIMPLE, GNM_FUNC_IMPL_STATUS_UNIQUE_TO_GNUMERIC, GNM_FUNC_TEST_STATUS_NO_TESTSUITE,
+       },
+       {
                "r.qnorm",
                "fff|bb",
                help_r_qnorm,
diff --git a/plugins/fn-r/generate b/plugins/fn-r/generate
index e01a1c8..2ac0e63 100644
--- a/plugins/fn-r/generate
+++ b/plugins/fn-r/generate
@@ -27,11 +27,13 @@ my %defaults;
                  'scale' => "the scale parameter $of",
                  );
 
-    $funcs{'dnorm'} = $funcs{'pnorm'} = $funcs{'qnorm'} =
+    $funcs{'dnorm'} = $funcs{'pnorm'} = $funcs{'pnorm2'} = $funcs{'qnorm'} =
        [\&distribution,
         'normal',
         ({ 'mu' => "mean $of",
            'sigma' => "standard deviation $of",
+           'x1' => 'first observation',
+           'x2' => 'first observation',
            @common })];
 
     $funcs{'dlnorm'} = $funcs{'plnorm'} = $funcs{'qlnorm'} =
@@ -319,7 +321,10 @@ sub distribution {
               (defined ($def) ? " : $def" : "") .
               ";\n");
 
-       if (defined ($def) && $typespec !~ /\|/) {
+       if ($typespec =~ /\|/) {
+           die "$0: argument $name for $func needs a default"
+               unless defined $def;
+       } elsif (defined ($def)) {
            $typespec .= "|" ;
        }
        $typespec .= $type_spec{$type};
diff --git a/plugins/fn-r/plugin.xml.in b/plugins/fn-r/plugin.xml.in
index f3eb79f..acd1b03 100644
--- a/plugins/fn-r/plugin.xml.in
+++ b/plugins/fn-r/plugin.xml.in
@@ -40,6 +40,7 @@
                                <function name="r.plnorm"/>
                                <function name="r.pnbinom"/>
                                <function name="r.pnorm"/>
+                               <function name="r.pnorm2"/>
                                <function name="r.ppois"/>
                                <function name="r.psnorm"/>
                                <function name="r.pst"/>
diff --git a/samples/pnorm2.gnumeric b/samples/pnorm2.gnumeric
new file mode 100644
index 0000000..a13ca07
Binary files /dev/null and b/samples/pnorm2.gnumeric differ
diff --git a/src/mathfunc.c b/src/mathfunc.c
index 77355c5..a93fd85 100644
--- a/src/mathfunc.c
+++ b/src/mathfunc.c
@@ -5287,7 +5287,7 @@ static gnm_float ptukey_wprob(gnm_float w, gnm_float rr, gnm_float cc)
            /* then doesn't contribute to integral */
 
            // rinsum = (pplus * 0.5) - (pminus * 0.5);
-           rinsum = pnorm2 (ac - w, ac, 0.0, 1.0, FALSE);
+           rinsum = pnorm2 (ac - w, ac, FALSE);
            if (rinsum >= gnm_exp(C1 / cc1)) {
                rinsum = (aleg[j-1] * gnm_exp(-(0.5 * qexpo))) * gnm_pow(rinsum, cc1);
                elsum += rinsum;
@@ -5552,47 +5552,66 @@ swap_log_tail (gnm_float lp)
 
 
 gnm_float
-pnorm2 (gnm_float x1, gnm_float x2, gnm_float mu, gnm_float sigma, gboolean log_p)
+pnorm2 (gnm_float x1, gnm_float x2, gboolean log_p)
 {
-       gnm_float raw;
-       const gnm_float RAWOK = 0.1;
-       const gnm_float LOG_RAWOK = GNM_const(-2.3025850929940455);
-
-       if (gnm_isnan(x1) || gnm_isnan(x2) ||
-           gnm_isnan(mu) || gnm_isnan(sigma) ||
-           sigma <= 0)
+       if (gnm_isnan(x1) || gnm_isnan(x2))
                return gnm_nan;
 
        if (x1 > x2)
-               return log_p ? gnm_nan : 0 - pnorm2 (x2, x1, mu, sigma, log_p);
+               return log_p ? gnm_nan : 0 - pnorm2 (x2, x1, log_p);
 
        if (x1 == x2)
                return R_D__0;
 
-       x1 = (x1 - mu) / sigma;
-       x2 = (x2 - mu) / sigma;
+       if (x1 == gnm_ninf)
+               return pnorm (x2, 0.0, 1.0, TRUE, log_p);
+
+       if (x2 == gnm_pinf)
+               return pnorm (x1, 0.0, 1.0, FALSE, log_p);
 
-       /* At this point x1<x2 and we can assume N(0,1).  */
+       if (x1 == 0 && !log_p) {
+               return gnm_erf (x2 / M_SQRT2gnum) / 2;
+
+               /*
+                * for the log case we can use this log(erf(x)) expansion:
+                * (log(2 x)-(log(pi))/2)-x^2/3+(2 x^4)/45-(8 x^6)/2835-(4 x^8)/14175+(32 x^10)/467775+(736 
x^12)/1915538625-(2944 x^14)/1915538625+(5024 x^16)/44405668125+(49690112 x^18)/1754068296605625+O(x^20)
+                */
+       }
+       if (x2 == 0 && !log_p) {
+               return gnm_erf (x1 / -M_SQRT2gnum) / 2;
+       }
 
-       if (x1 < 0) {
-               gnm_float p1 = pnorm (x1, 0.0, 1.0, TRUE, log_p);
-               gnm_float p2 = pnorm (x2, 0.0, 1.0, TRUE, log_p);
-               raw = log_p ? logspace_sub (p2, p1) : p2 - p1;
+       if (x1 <= 0 && x2 >= 0) {
+               /* The interval spans 0.  */
+               gnm_float p1 = pnorm2 (0, MIN (-x1, x2), log_p);
+               gnm_float p2 = pnorm2 (MIN (-x1, x2), MAX (-x1, x2), log_p);
+               return log_p
+                       ? logspace_add (p1 + M_LN2gnum, p2)
+                       : 2 * p1 + p2;
+       } else if (x1 < 0) {
+               /* Both < 0 -- use symmetry */
+               return pnorm2 (-x2, -x1, log_p);
        } else {
+               /* Both >= 0 */
                gnm_float p1C = pnorm (x1, 0.0, 1.0, FALSE, log_p);
                gnm_float p2C = pnorm (x2, 0.0, 1.0, FALSE, log_p);
-               raw = log_p ? logspace_sub (p1C, p2C) : p1C - p2C;
-       }
+               gnm_float raw = log_p ? logspace_sub (p1C, p2C) : p1C - p2C;
+               gnm_float dx, d1, d2, ub, lb;
 
-       if (raw > (log_p ? LOG_RAWOK : RAWOK))
-               return raw;
+               if (gnm_abs (p1C - p2C) * 32 > gnm_abs (p1C + p2C))
+                       return raw;
 
-       if (0)
-               g_printerr ("x1=%.10g x2=%.10g  p1=%.10g p2=%.10g\n",
-                           x1, x2,
-                           pnorm (x1, 0.0, 1.0, TRUE, FALSE),
-                           pnorm (x2, 0.0, 1.0, TRUE, FALSE));
-       return raw;
+               /* dnorm is strictly decreasing in this area.  */
+               dx = log_p ? gnm_log (x2 - x1) : x2 - x1;
+               d1 = dnorm (x1, 0.0, 1.0, log_p);
+               d2 = dnorm (x2, 0.0, 1.0, log_p);
+               ub = log_p ? dx + d1 : dx * d1;  /* upper bound */
+               lb = log_p ? dx + d2 : dx * d2;  /* lower bound */
+
+               raw = MAX (raw, lb);
+               raw = MIN (raw, ub);
+               return raw;
+       }
 }
 
 
diff --git a/src/mathfunc.h b/src/mathfunc.h
index ccd7d72..8de5b47 100644
--- a/src/mathfunc.h
+++ b/src/mathfunc.h
@@ -56,7 +56,7 @@ gnm_float bessel_k (gnm_float x, gnm_float alpha, gnm_float expo);
 /* The normal distribution.  */
 gnm_float dnorm (gnm_float x, gnm_float mu, gnm_float sigma, gboolean give_log);
 gnm_float pnorm (gnm_float x, gnm_float mu, gnm_float sigma, gboolean lower_tail, gboolean log_p);
-gnm_float pnorm2 (gnm_float x1, gnm_float x2, gnm_float mu, gnm_float sigma, gboolean log_p);
+gnm_float pnorm2 (gnm_float x1, gnm_float x2, gboolean log_p);
 gnm_float qnorm (gnm_float p, gnm_float mu, gnm_float sigma, gboolean lower_tail, gboolean log_p);
 
 /* The log-normal distribution.  */
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]