[gnome-software: 4/7] Add gs_utils_get_wilson_rating()



commit e60e92b1bfb5dfd83d595ceb4f6d47ff453771de
Author: Richard Hughes <richard hughsie com>
Date:   Wed Jul 27 12:19:13 2016 +0100

    Add gs_utils_get_wilson_rating()
    
    This can be used by plugins to get the lower bound of Wilson score confidence
    interval for a Bernoulli parameter. This ensures small numbers of ratings don't
    give overly high scores.
    
    This is based on code in gs-plugin-ubuntu-reviews.c

 src/Makefile.am    |    6 ++-
 src/gs-self-test.c |   11 ++++++
 src/gs-utils.c     |   87 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/gs-utils.h     |    6 +++
 4 files changed, 108 insertions(+), 2 deletions(-)
---
diff --git a/src/Makefile.am b/src/Makefile.am
index 9a2b4a5..949e38e 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -128,7 +128,8 @@ gnome_software_cmd_LDADD =                          \
        $(SOUP_LIBS)                                    \
        $(LIBSECRET_LIBS)                               \
        $(GLIB_LIBS)                                    \
-       $(GTK_LIBS)
+       $(GTK_LIBS)                                     \
+       -lm
 
 gnome_software_cmd_CFLAGS =                            \
        $(WARN_CFLAGS)
@@ -356,7 +357,8 @@ gs_self_test_LDADD =                                                \
        $(SOUP_LIBS)                                            \
        $(LIBSECRET_LIBS)                                       \
        $(GLIB_LIBS)                                            \
-       $(GTK_LIBS)
+       $(GTK_LIBS)                                             \
+       -lm
 
 gs_self_test_CFLAGS = $(WARN_CFLAGS)
 
diff --git a/src/gs-self-test.c b/src/gs-self-test.c
index 8b09a13..889a395 100644
--- a/src/gs-self-test.c
+++ b/src/gs-self-test.c
@@ -58,6 +58,16 @@ gs_app_list_filter_cb (GsApp *app, gpointer user_data)
 }
 
 static void
+gs_utils_wilson_func (void)
+{
+       g_assert_cmpint ((gint64) gs_utils_get_wilson_rating (0, 0, 0, 0, 0), ==, -1);
+       g_assert_cmpint ((gint64) gs_utils_get_wilson_rating (0, 0, 0, 0, 400), ==, 100);
+       g_assert_cmpint ((gint64) gs_utils_get_wilson_rating (10, 0, 0, 0, 400), ==, 98);
+       g_assert_cmpint ((gint64) gs_utils_get_wilson_rating (0, 0, 0, 0, 1), ==, 76);
+       g_assert_cmpint ((gint64) gs_utils_get_wilson_rating (5, 4, 20, 100, 400), ==, 93);
+}
+
+static void
 gs_os_release_func (void)
 {
        g_autofree gchar *fn = NULL;
@@ -1115,6 +1125,7 @@ main (int argc, char **argv)
        g_log_set_fatal_mask (NULL, G_LOG_LEVEL_ERROR | G_LOG_LEVEL_CRITICAL);
 
        /* generic tests go here */
+       g_test_add_func ("/gnome-software/utils{wilson}", gs_utils_wilson_func);
        g_test_add_func ("/gnome-software/os-release", gs_os_release_func);
        g_test_add_func ("/gnome-software/app", gs_app_func);
        g_test_add_func ("/gnome-software/plugin", gs_plugin_func);
diff --git a/src/gs-utils.c b/src/gs-utils.c
index 74de1d0..3904302 100644
--- a/src/gs-utils.c
+++ b/src/gs-utils.c
@@ -34,6 +34,7 @@
 
 #include <errno.h>
 #include <fnmatch.h>
+#include <math.h>
 #include <glib/gstdio.h>
 
 #ifdef HAVE_POLKIT
@@ -416,4 +417,90 @@ gs_utils_rmtree (const gchar *directory, GError **error)
        return gs_utils_rmtree_real (directory, error);
 }
 
+static gdouble
+pnormaldist (gdouble qn)
+{
+       static gdouble b[11] = { 1.570796288,      0.03706987906,   -0.8364353589e-3,
+                               -0.2250947176e-3,  0.6841218299e-5,  0.5824238515e-5,
+                               -0.104527497e-5,   0.8360937017e-7, -0.3231081277e-8,
+                                0.3657763036e-10, 0.6936233982e-12 };
+       gdouble w1, w3;
+       guint i;
+
+       if (qn < 0 || qn > 1)
+               return 0; // This is an error case
+       if (qn == 0.5)
+               return 0;
+
+       w1 = qn;
+       if (qn > 0.5)
+               w1 = 1.0 - w1;
+       w3 = -log (4.0 * w1 * (1.0 - w1));
+       w1 = b[0];
+       for (i = 1; i < 11; i++)
+               w1 = w1 + (b[i] * pow (w3, i));
+
+       if (qn > 0.5)
+               return sqrt (w1 * w3);
+       else
+               return -sqrt (w1 * w3);
+}
+
+static gdouble
+wilson_score (gdouble value, gdouble n, gdouble power)
+{
+       gdouble z, phat;
+       if (value == 0)
+               return 0;
+       z = pnormaldist (1 - power / 2);
+       phat = value / n;
+       return (phat + z * z / (2 * n) -
+               z * sqrt ((phat * (1 - phat) + z * z / (4 * n)) / n)) /
+               (1 + z * z / n);
+}
+
+/**
+ * gs_utils_get_wilson_rating:
+ * @star1: The number of 1 star reviews
+ * @star2: The number of 2 star reviews
+ * @star3: The number of 3 star reviews
+ * @star4: The number of 4 star reviews
+ * @star5: The number of 5 star reviews
+ *
+ * Returns the lower bound of Wilson score confidence interval for a
+ * Bernoulli parameter. This ensures small numbers of ratings don't give overly
+ * high scores.
+ * See https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval
+ * for details.
+ *
+ * Returns: Wilson rating percentage, or -1 for error
+ **/
+gint
+gs_utils_get_wilson_rating (guint64 star1,
+                           guint64 star2,
+                           guint64 star3,
+                           guint64 star4,
+                           guint64 star5)
+{
+       gdouble val;
+       guint64 star_sum = star1 + star2 + star3 + star4 + star5;
+       if (star_sum == 0)
+               return -1;
+
+       /* get score */
+       val =  (wilson_score ((gdouble) star1, (gdouble) star_sum, 0.2) * -2);
+       val += (wilson_score ((gdouble) star2, (gdouble) star_sum, 0.2) * -1);
+       val += (wilson_score ((gdouble) star4, (gdouble) star_sum, 0.2) * 1);
+       val += (wilson_score ((gdouble) star5, (gdouble) star_sum, 0.2) * 2);
+
+       /* normalize from -2..+2 to 0..5 */
+       val += 3;
+
+       /* multiply to a percentage */
+       val *= 20;
+
+       /* return rounded up integer */
+       return (gint) ceil (val);
+}
+
 /* vim: set noexpandtab: */
diff --git a/src/gs-utils.h b/src/gs-utils.h
index 884ab91..0a9db60 100644
--- a/src/gs-utils.h
+++ b/src/gs-utils.h
@@ -65,6 +65,12 @@ gboolean      gs_utils_strv_fnmatch          (gchar          **strv,
 GDesktopAppInfo *gs_utils_get_desktop_app_info (const gchar    *id);
 gboolean        gs_utils_rmtree                (const gchar    *directory,
                                                 GError         **error);
+gint            gs_utils_get_wilson_rating     (guint64         star1,
+                                                guint64         star2,
+                                                guint64         star3,
+                                                guint64         star4,
+                                                guint64         star5);
+
 G_END_DECLS
 
 #endif /* __GS_UTILS_H */


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]