[gnumeric] implement test_random_randnorm



commit 25d43a4bc1048fe7d606f00951ba7dfaf9c941b0
Author: Andreas J Guelzow <aguelzow pyrshep ca>
Date:   Sat Feb 11 10:07:24 2012 -0700

    implement test_random_randnorm
    
    2012-02-11  Andreas J. Guelzow <aguelzow pyrshep ca>
    
    	* src/sstest.c (test_random_randnorm): new
    	(test_random): enable test_random_randnorm
    	(test_random_normality): new
    	(test_random): use sample size of 20000
    	* src/rangefunc.h (gnm_range_adtest): new
    	* src/rangefunc.c (gnm_range_adtest): new, extracted from
    	plugins/fn-stat/functions.c
    
    2012-02-11  Andreas J. Guelzow <aguelzow pyrshep ca>
    
    	* functions.c (gnumeric_adtest): use gnm_range_adtest

 ChangeLog                   |   10 ++++
 plugins/fn-stat/ChangeLog   |    4 ++
 plugins/fn-stat/functions.c |   39 ++-----------
 src/rangefunc.c             |   50 ++++++++++++++++-
 src/rangefunc.h             |    3 +
 src/sstest.c                |  126 +++++++++++++++++++++++++++++++++++++++++-
 6 files changed, 194 insertions(+), 38 deletions(-)
---
diff --git a/ChangeLog b/ChangeLog
index 6b4b79d..2535a91 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+2012-02-11  Andreas J. Guelzow <aguelzow pyrshep ca>
+
+	* src/sstest.c (test_random_randnorm): new
+	(test_random): enable test_random_randnorm
+	(test_random_normality): new
+	(test_random): use sample size of 20000
+	* src/rangefunc.h (gnm_range_adtest): new
+	* src/rangefunc.c (gnm_range_adtest): new, extracted from
+	plugins/fn-stat/functions.c
+
 2012-02-08  Jean Brefort  <jean brefort normalesup org>
 
 	* src/wbc-gtk-edit.c (wbcg_insert_object): don't destroy the object
diff --git a/plugins/fn-stat/ChangeLog b/plugins/fn-stat/ChangeLog
index 8051a76..bdded9a 100644
--- a/plugins/fn-stat/ChangeLog
+++ b/plugins/fn-stat/ChangeLog
@@ -1,3 +1,7 @@
+2012-02-11  Andreas J. Guelzow <aguelzow pyrshep ca>
+
+	* functions.c (gnumeric_adtest): use gnm_range_adtest
+
 2011-11-27  Morten Welinder <terra gnome org>
 
 	* Release 1.11.1
diff --git a/plugins/fn-stat/functions.c b/plugins/fn-stat/functions.c
index af522c4..7622ac2 100644
--- a/plugins/fn-stat/functions.c
+++ b/plugins/fn-stat/functions.c
@@ -4851,8 +4851,8 @@ gnumeric_adtest (GnmFuncEvalInfo *ei, GnmValue const * const *argv)
 	gnm_float *xs;
 	int n;
 	GnmValue *result = NULL;
-	gnm_float mu = 0.;
-	gnm_float sigma = 1.;
+	gnm_float statistics = 0.;
+	gnm_float p = 0.;
 
 	xs = collect_floats_value (argv[0], ei->pos,
 				   COLLECT_IGNORE_STRINGS |
@@ -4868,43 +4868,16 @@ gnumeric_adtest (GnmFuncEvalInfo *ei, GnmValue const * const *argv)
 	value_array_set (result, 0, 2,
 			 value_new_int (n));
 
-	if ((n < 8) || gnm_range_average (xs, n, &mu)
-	    || gnm_range_stddev_est (xs, n, &sigma)) {
+	if ((n < 8) || gnm_range_adtest (xs, n, &p, &statistics)) {
 		value_array_set (result, 0, 0,
 				 value_new_error_VALUE (ei->pos));
 		value_array_set (result, 0, 1,
-				 value_new_error_VALUE (ei->pos));
+				 value_new_error_VALUE (ei->pos));		
 	} else {
-		int i;
-		gnm_float total = 0.;
-		gnm_float p;
-		gnm_float *ys;
-
-		ys = range_sort (xs, n);
-
-		for (i = 0; i < n; i++) {
-			gnm_float val = (pnorm (ys[i], mu, sigma, TRUE, TRUE) + pnorm (ys[n - i - 1], mu, sigma, FALSE, TRUE));
-			total += ((2*i+1)* val);
-		}
-
-		total = - n - total/n;
-		value_array_set (result, 0, 1,
-				 value_new_float (total));
-
-		g_free (ys);
-
-		total *= (1 + 0.75 / n + 2.25 / (n * n));
-		if (total < 0.2)
-			p = 1. - gnm_exp (-13.436 + 101.14 * total - 223.73 * total * total);
-		else if (total < 0.34)
-			p = 1. - gnm_exp (-8.318 + 42.796 * total - 59.938 * total * total);
-		else if (total < 0.6)
-			p = gnm_exp (0.9177 - 4.279  * total - 1.38 * total * total);
-		else
-			p = gnm_exp (1.2937 - 5.709 * total + 0.0186 * total * total);
-
 		value_array_set (result, 0, 0,
 				 value_new_float (p));
+		value_array_set (result, 0, 1,
+				 value_new_float (statistics));
 	}
 
  out:
diff --git a/src/rangefunc.c b/src/rangefunc.c
index d976265..06e10fc 100644
--- a/src/rangefunc.c
+++ b/src/rangefunc.c
@@ -1,3 +1,4 @@
+/* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
 /*
  * rangefunc.c: Functions on ranges (data sets).
  *
@@ -14,9 +15,9 @@
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
-
+#include "tools/analysis-tools.h"
 int
-gnm_range_count (gnm_float const *xs, int n, gnm_float *res)
+gnm_range_count (G_GNUC_UNUSED gnm_float const *xs, int n, gnm_float *res)
 {
 	*res = n;
 	return 0;
@@ -431,3 +432,48 @@ gnm_range_mode (gnm_float const *xs, int n, gnm_float *res)
 	*res = mode;
 	return 0;
 }
+
+int 
+gnm_range_adtest    (gnm_float const *xs, int n, gnm_float *pvalue, 
+		     gnm_float *statistics)
+{
+	gnm_float mu = 0.;
+	gnm_float sigma = 1.;
+
+	if ((n < 8) || gnm_range_average (xs, n, &mu)
+	    || gnm_range_stddev_est (xs, n, &sigma))
+		return 1;
+	else {
+		int i;
+		gnm_float total = 0.;
+		gnm_float p;
+		gnm_float *ys;
+
+		ys = range_sort (xs, n);
+
+		for (i = 0; i < n; i++) {
+			gnm_float val = (pnorm (ys[i], mu, sigma, TRUE, TRUE) + 
+					 pnorm (ys[n - i - 1], 
+						mu, sigma, FALSE, TRUE));
+			total += ((2*i+1)* val);
+		}
+
+		total = - n - total/n;
+		g_free (ys);
+
+		total *= (1 + 0.75 / n + 2.25 / (n * n));
+		if (total < 0.2)
+			p = 1. - gnm_exp (-13.436 + 101.14 * total - 223.73 * total * total);
+		else if (total < 0.34)
+			p = 1. - gnm_exp (-8.318 + 42.796 * total - 59.938 * total * total);
+		else if (total < 0.6)
+			p = gnm_exp (0.9177 - 4.279  * total - 1.38 * total * total);
+		else
+			p = gnm_exp (1.2937 - 5.709 * total + 0.0186 * total * total);
+		if (statistics)
+			*statistics = total;
+		if (pvalue)
+			*pvalue = p;
+		return 0;
+	}
+}
diff --git a/src/rangefunc.h b/src/rangefunc.h
index dba7f32..bfbfb01 100644
--- a/src/rangefunc.h
+++ b/src/rangefunc.h
@@ -59,6 +59,9 @@ int gnm_range_rsq_pop	(gnm_float const *xs, const gnm_float *ys, int n, gnm_floa
 
 int gnm_range_mode	(gnm_float const *xs, int n, gnm_float *res);
 
+int gnm_range_adtest    (gnm_float const *xs, int n, gnm_float *p, 
+			 gnm_float *statistics);
+
 G_END_DECLS
 
 #endif /* _GNM_RANGEFUNC_H_ */
diff --git a/src/sstest.c b/src/sstest.c
index 4e18ab3..008f682 100644
--- a/src/sstest.c
+++ b/src/sstest.c
@@ -56,7 +56,7 @@ mark_test_end (const char *name)
 }
 
 static void
-cb_collect_names (const char *name, GnmNamedExpr *nexpr, GSList **names)
+cb_collect_names (G_GNUC_UNUSED const char *name, GnmNamedExpr *nexpr, GSList **names)
 {
 	*names = g_slist_prepend (*names, nexpr);
 }
@@ -471,6 +471,73 @@ test_random_1 (int N, const char *expr,
 	return res;
 }
 
+static gnm_float *
+test_random_normality (int N, const char *expr,
+		       gnm_float *mean, gnm_float *var,
+		       gnm_float *adtest, gnm_float *cvmtest,
+		       gnm_float *lkstest, gnm_float *sftest)
+{
+	Workbook *wb = workbook_new ();
+	Sheet *sheet = workbook_sheet_add
+		(wb, -1, GNM_DEFAULT_COLS, GNM_DEFAULT_ROWS);
+	gnm_float *res = g_new (gnm_float, N);
+	int i;
+	char *s;
+
+	g_printerr ("Testing %s\n", expr);
+
+	for (i = 0; i < N; i++)
+		define_cell (sheet, 0, i, expr);
+
+	s = g_strdup_printf ("=average(a1:a%d)", N);
+	define_cell (sheet, 1, 0, s);
+	g_free (s);
+
+	s = g_strdup_printf ("=var(a1:a%d)", N);
+	define_cell (sheet, 1, 1, s);
+	g_free (s);
+
+	s = g_strdup_printf ("=adtest(a1:a%d)", N);
+	define_cell (sheet, 1, 2, s);
+	g_free (s);
+
+	s = g_strdup_printf ("=cvmtest(a1:a%d)", N);
+	define_cell (sheet, 1, 3, s);
+	g_free (s);
+
+	s = g_strdup_printf ("=lkstest(a1:a%d)", N);
+	define_cell (sheet, 1, 4, s);
+	g_free (s);
+
+	s = g_strdup_printf ("=sftest(a1:a%d)", N > 5000 ? 5000 : N);
+	define_cell (sheet, 1, 5, s);
+	g_free (s);
+
+	workbook_recalc (sheet->workbook);
+	for (i = 0; i < N; i++)
+		res[i] = value_get_as_float (sheet_cell_get (sheet, 0, i)->value);
+	*mean = value_get_as_float (sheet_cell_get (sheet, 1, 0)->value);
+	g_printerr ("Mean: %.10" GNM_FORMAT_g "\n", *mean);
+
+	*var = value_get_as_float (sheet_cell_get (sheet, 1, 1)->value);
+	g_printerr ("Var: %.10" GNM_FORMAT_g "\n", *var);
+
+	*adtest = value_get_as_float (sheet_cell_get (sheet, 1, 2)->value);
+	g_printerr ("ADTest: %.10" GNM_FORMAT_g "\n", *adtest);
+
+	*cvmtest = value_get_as_float (sheet_cell_get (sheet, 1, 3)->value);
+	g_printerr ("CVMTest: %.10" GNM_FORMAT_g "\n", *cvmtest);
+
+	*lkstest = value_get_as_float (sheet_cell_get (sheet, 1, 4)->value);
+	g_printerr ("LKSTest: %.10" GNM_FORMAT_g "\n", *lkstest);
+
+	*sftest = value_get_as_float (sheet_cell_get (sheet, 1, 5)->value);
+	g_printerr ("CVMTest: %.10" GNM_FORMAT_g "\n", *sftest);
+
+	g_object_unref (wb);
+	return res;
+}
+
 static void
 test_random_rand (int N)
 {
@@ -570,6 +637,59 @@ test_random_randbernoulli (int N)
 }
 
 static void
+test_random_randnorm (int N)
+{
+  gnm_float mean, var, adtest, cvmtest, lkstest, sftest;
+	gnm_float mean_target = 0, var_target = 1;
+	gnm_float *vals;
+	gboolean ok;
+	char *expr;
+	gnm_float T;
+
+	expr = g_strdup_printf ("=RANDNORM(%.10" GNM_FORMAT_g ",%.10" GNM_FORMAT_g ")", 
+				mean_target, var_target);
+	vals = test_random_normality (N, expr, &mean, &var, &adtest, &cvmtest, &lkstest, &sftest);
+	g_free (expr);
+	g_free (vals);
+
+	ok = TRUE;
+
+	T = mean_target;
+	if (gnm_abs (mean - T) > 0.02) {
+		g_printerr ("Mean failure [%.10" GNM_FORMAT_g "]\n", T);
+		ok = FALSE;
+	}
+	T = var_target;
+	if (gnm_abs (var - T) > 0.02) {
+		g_printerr ("Var failure [%.10" GNM_FORMAT_g "]\n", T);
+		ok = FALSE;
+	}
+
+	if (adtest < 0.05) {
+		g_printerr ("Anderson Darling Test rejected [%.10" GNM_FORMAT_g "]\n", adtest);
+		ok = FALSE;
+	}
+	if (cvmtest < 0.05) {
+		g_printerr ("CramÃr-von Mises Test rejected [%.10" GNM_FORMAT_g "]\n", cvmtest);
+		ok = FALSE;
+	}
+	if (lkstest < 0.05) {
+		g_printerr ("Lilliefors (Kolmogorov-Smirnov) Test rejected [%.10" GNM_FORMAT_g "]\n",
+			    lkstest);
+		ok = FALSE;
+	}
+	if (sftest < 0.05) {
+		g_printerr ("Shapiro-Francia Test rejected [%.10" GNM_FORMAT_g "]\n", sftest);
+		ok = FALSE;
+	}
+
+
+	if (ok)
+		g_printerr ("OK\n");
+	g_printerr ("\n");
+}
+
+static void
 test_random_randsnorm (int N)
 {
 	gnm_float mean, var, skew, kurt;
@@ -619,11 +739,12 @@ static void
 test_random (void)
 {
 	const char *test_name = "test_random";
-	const int N = 10000;
+	const int N = 20000;
 
 	mark_test_start (test_name);
 	test_random_rand (N);
         test_random_randbernoulli (N);
+        test_random_randnorm (N);
         test_random_randsnorm (N);
 #if 0
         test_random_randbeta (N);
@@ -647,7 +768,6 @@ test_random (void)
         test_random_randlogistic (N);
         test_random_randlognorm (N);
         test_random_randnegbinom (N);
-        test_random_randnorm (N);
         test_random_randpareto (N);
         test_random_randpoisson (N);
         test_random_randrayleigh (N);



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]