[gnome-software] gs-content-rating: Correctly parse locale names



commit ed84b7a3f56dc2df2a6ff7125e4bf2eb92194d52
Author: Philip Withnall <withnall endlessm com>
Date:   Mon Jan 7 21:12:04 2019 +0000

    gs-content-rating: Correctly parse locale names
    
    Fix the matching system for locale names so that it correctly identifies
    (for example) LANG=en_GB.UTF-8 as being en_GB, rather than being
    confused by the codeset suffix.
    
    Add some unit tests.
    
    Signed-off-by: Philip Withnall <withnall endlessm com>

 src/gs-content-rating.c | 224 +++++++++++++++++++++++++++++++++---------------
 src/gs-self-test.c      |  55 ++++++++++++
 src/meson.build         |   1 +
 3 files changed, 211 insertions(+), 69 deletions(-)
---
diff --git a/src/gs-content-rating.c b/src/gs-content-rating.c
index c9fe90bc..61e4ab15 100644
--- a/src/gs-content-rating.c
+++ b/src/gs-content-rating.c
@@ -22,6 +22,7 @@
 #include "config.h"
 
 #include <glib/gi18n.h>
+#include <string.h>
 
 #include "gs-content-rating.h"
 
@@ -540,121 +541,206 @@ gs_utils_content_rating_age_to_str (GsContentRatingSystem system, guint age)
        return NULL;
 }
 
+/*
+ * parse_locale:
+ * @locale: (transfer full): a locale to parse
+ * @language_out: (out) (optional) (nullable): return location for the parsed
+ *    language, or %NULL to ignore
+ * @territory_out: (out) (optional) (nullable): return location for the parsed
+ *    territory, or %NULL to ignore
+ * @codeset_out: (out) (optional) (nullable): return location for the parsed
+ *    codeset, or %NULL to ignore
+ * @modifier_out: (out) (optional) (nullable): return location for the parsed
+ *    modifier, or %NULL to ignore
+ *
+ * Parse @locale as a locale string of the form
+ * `language[_territory][.codeset][@modifier]` — see `man 3 setlocale` for
+ * details.
+ *
+ * On success, %TRUE will be returned, and the components of the locale will be
+ * returned in the given addresses, with each component not including any
+ * separators. Otherwise, %FALSE will be returned and the components will be set
+ * to %NULL.
+ *
+ * @locale is modified, and any returned non-%NULL pointers will point inside
+ * it.
+ *
+ * Returns: %TRUE on success, %FALSE otherwise
+ */
+static gboolean
+parse_locale (gchar *locale  /* (transfer full) */,
+             const gchar **language_out,
+             const gchar **territory_out,
+             const gchar **codeset_out,
+             const gchar **modifier_out)
+{
+       gchar *separator;
+       const gchar *language = NULL, *territory = NULL, *codeset = NULL, *modifier = NULL;
+
+       separator = strrchr (locale, '@');
+       if (separator != NULL) {
+               modifier = separator + 1;
+               *separator = '\0';
+       }
+
+       separator = strrchr (locale, '.');
+       if (separator != NULL) {
+               codeset = separator + 1;
+               *separator = '\0';
+       }
+
+       separator = strrchr (locale, '_');
+       if (separator != NULL) {
+               territory = separator + 1;
+               *separator = '\0';
+       }
+
+       language = locale;
+
+       /* Parse failure? */
+       if (*language == '\0') {
+               language = NULL;
+               territory = NULL;
+               codeset = NULL;
+               modifier = NULL;
+       }
+
+       if (language_out != NULL)
+               *language_out = language;
+       if (territory_out != NULL)
+               *territory_out = territory;
+       if (codeset_out != NULL)
+               *codeset_out = codeset;
+       if (modifier_out != NULL)
+               *modifier_out = modifier;
+
+       return (language != NULL);
+}
+
 /* data obtained from https://en.wikipedia.org/wiki/Video_game_rating_system */
 GsContentRatingSystem
 gs_utils_content_rating_system_from_locale (const gchar *locale)
 {
-       g_auto(GStrv) split = g_strsplit (locale, "_", -1);
+       g_autofree gchar *locale_copy = g_strdup (locale);
+       const gchar *language, *territory;
+
+       /* Default to IARC for locales which can’t be parsed. */
+       if (!parse_locale (locale_copy, &language, &territory, NULL, NULL))
+               return GS_CONTENT_RATING_SYSTEM_IARC;
 
        /* Argentina */
-       if (g_strcmp0 (split[0], "ar") == 0)
+       if (g_strcmp0 (language, "ar") == 0)
                return GS_CONTENT_RATING_SYSTEM_INCAA;
 
        /* Australia */
-       if (g_strcmp0 (split[0], "au") == 0)
+       if (g_strcmp0 (language, "au") == 0)
                return GS_CONTENT_RATING_SYSTEM_ACB;
 
        /* Brazil */
-       if (g_strcmp0 (split[0], "br") == 0)
+       if (g_strcmp0 (language, "pt") == 0 &&
+           g_strcmp0 (territory, "BR") == 0)
                return GS_CONTENT_RATING_SYSTEM_DJCTQ;
 
        /* Taiwan */
-       if (g_strcmp0 (locale, "zh_TW") == 0)
+       if (g_strcmp0 (language, "zh") == 0 &&
+           g_strcmp0 (territory, "TW") == 0)
                return GS_CONTENT_RATING_SYSTEM_GSRR;
 
        /* Europe (but not Finland or Germany), India, Israel,
         * Pakistan, Quebec, South Africa */
-       if (g_strcmp0 (locale, "en_GB") == 0 ||
-           g_strcmp0 (split[0], "gb") == 0 ||
-           g_strcmp0 (split[0], "al") == 0 ||
-           g_strcmp0 (split[0], "ad") == 0 ||
-           g_strcmp0 (split[0], "am") == 0 ||
-           g_strcmp0 (split[0], "at") == 0 ||
-           g_strcmp0 (split[0], "az") == 0 ||
-           g_strcmp0 (split[0], "by") == 0 ||
-           g_strcmp0 (split[0], "be") == 0 ||
-           g_strcmp0 (split[0], "ba") == 0 ||
-           g_strcmp0 (split[0], "bg") == 0 ||
-           g_strcmp0 (split[0], "hr") == 0 ||
-           g_strcmp0 (split[0], "cy") == 0 ||
-           g_strcmp0 (split[0], "cz") == 0 ||
-           g_strcmp0 (split[0], "dk") == 0 ||
-           g_strcmp0 (split[0], "ee") == 0 ||
-           g_strcmp0 (split[0], "fr") == 0 ||
-           g_strcmp0 (split[0], "ge") == 0 ||
-           g_strcmp0 (split[0], "gr") == 0 ||
-           g_strcmp0 (split[0], "hu") == 0 ||
-           g_strcmp0 (split[0], "is") == 0 ||
-           g_strcmp0 (split[0], "it") == 0 ||
-           g_strcmp0 (split[0], "kz") == 0 ||
-           g_strcmp0 (split[0], "xk") == 0 ||
-           g_strcmp0 (split[0], "lv") == 0 ||
-           g_strcmp0 (split[0], "fl") == 0 ||
-           g_strcmp0 (split[0], "lu") == 0 ||
-           g_strcmp0 (split[0], "lt") == 0 ||
-           g_strcmp0 (split[0], "mk") == 0 ||
-           g_strcmp0 (split[0], "mt") == 0 ||
-           g_strcmp0 (split[0], "md") == 0 ||
-           g_strcmp0 (split[0], "mc") == 0 ||
-           g_strcmp0 (split[0], "me") == 0 ||
-           g_strcmp0 (split[0], "nl") == 0 ||
-           g_strcmp0 (split[0], "no") == 0 ||
-           g_strcmp0 (split[0], "pl") == 0 ||
-           g_strcmp0 (split[0], "pt") == 0 ||
-           g_strcmp0 (split[0], "ro") == 0 ||
-           g_strcmp0 (split[0], "sm") == 0 ||
-           g_strcmp0 (split[0], "rs") == 0 ||
-           g_strcmp0 (split[0], "sk") == 0 ||
-           g_strcmp0 (split[0], "si") == 0 ||
-           g_strcmp0 (split[0], "es") == 0 ||
-           g_strcmp0 (split[0], "se") == 0 ||
-           g_strcmp0 (split[0], "ch") == 0 ||
-           g_strcmp0 (split[0], "tr") == 0 ||
-           g_strcmp0 (split[0], "ua") == 0 ||
-           g_strcmp0 (split[0], "va") == 0 ||
-           g_strcmp0 (split[0], "in") == 0 ||
-           g_strcmp0 (split[0], "il") == 0 ||
-           g_strcmp0 (split[0], "pk") == 0 ||
-           g_strcmp0 (split[0], "za") == 0)
+       if ((g_strcmp0 (language, "en") == 0 &&
+            g_strcmp0 (territory, "GB") == 0) ||
+           g_strcmp0 (language, "gb") == 0 ||
+           g_strcmp0 (language, "al") == 0 ||
+           g_strcmp0 (language, "ad") == 0 ||
+           g_strcmp0 (language, "am") == 0 ||
+           g_strcmp0 (language, "at") == 0 ||
+           g_strcmp0 (language, "az") == 0 ||
+           g_strcmp0 (language, "by") == 0 ||
+           g_strcmp0 (language, "be") == 0 ||
+           g_strcmp0 (language, "ba") == 0 ||
+           g_strcmp0 (language, "bg") == 0 ||
+           g_strcmp0 (language, "hr") == 0 ||
+           g_strcmp0 (language, "cy") == 0 ||
+           g_strcmp0 (language, "cz") == 0 ||
+           g_strcmp0 (language, "dk") == 0 ||
+           g_strcmp0 (language, "ee") == 0 ||
+           g_strcmp0 (language, "fr") == 0 ||
+           g_strcmp0 (language, "ge") == 0 ||
+           g_strcmp0 (language, "gr") == 0 ||
+           g_strcmp0 (language, "hu") == 0 ||
+           g_strcmp0 (language, "is") == 0 ||
+           g_strcmp0 (language, "it") == 0 ||
+           g_strcmp0 (language, "kz") == 0 ||
+           g_strcmp0 (language, "xk") == 0 ||
+           g_strcmp0 (language, "lv") == 0 ||
+           g_strcmp0 (language, "fl") == 0 ||
+           g_strcmp0 (language, "lu") == 0 ||
+           g_strcmp0 (language, "lt") == 0 ||
+           g_strcmp0 (language, "mk") == 0 ||
+           g_strcmp0 (language, "mt") == 0 ||
+           g_strcmp0 (language, "md") == 0 ||
+           g_strcmp0 (language, "mc") == 0 ||
+           g_strcmp0 (language, "me") == 0 ||
+           g_strcmp0 (language, "nl") == 0 ||
+           g_strcmp0 (language, "no") == 0 ||
+           g_strcmp0 (language, "pl") == 0 ||
+           g_strcmp0 (language, "pt") == 0 ||
+           g_strcmp0 (language, "ro") == 0 ||
+           g_strcmp0 (language, "sm") == 0 ||
+           g_strcmp0 (language, "rs") == 0 ||
+           g_strcmp0 (language, "sk") == 0 ||
+           g_strcmp0 (language, "si") == 0 ||
+           g_strcmp0 (language, "es") == 0 ||
+           g_strcmp0 (language, "se") == 0 ||
+           g_strcmp0 (language, "ch") == 0 ||
+           g_strcmp0 (language, "tr") == 0 ||
+           g_strcmp0 (language, "ua") == 0 ||
+           g_strcmp0 (language, "va") == 0 ||
+           g_strcmp0 (language, "in") == 0 ||
+           g_strcmp0 (language, "il") == 0 ||
+           g_strcmp0 (language, "pk") == 0 ||
+           g_strcmp0 (language, "za") == 0)
                return GS_CONTENT_RATING_SYSTEM_PEGI;
 
        /* Finland */
-       if (g_strcmp0 (split[0], "fi") == 0)
+       if (g_strcmp0 (language, "fi") == 0)
                return GS_CONTENT_RATING_SYSTEM_KAVI;
 
        /* Germany */
-       if (g_strcmp0 (split[0], "de") == 0)
+       if (g_strcmp0 (language, "de") == 0)
                return GS_CONTENT_RATING_SYSTEM_USK;
 
        /* Iran */
-       if (g_strcmp0 (split[0], "ir") == 0)
+       if (g_strcmp0 (language, "ir") == 0)
                return GS_CONTENT_RATING_SYSTEM_ESRA;
 
        /* Japan */
-       if (g_strcmp0 (split[0], "jp") == 0)
+       if (g_strcmp0 (language, "jp") == 0)
                return GS_CONTENT_RATING_SYSTEM_CERO;
 
        /* New Zealand */
-       if (g_strcmp0 (split[0], "nz") == 0)
+       if (g_strcmp0 (language, "nz") == 0)
                return GS_CONTENT_RATING_SYSTEM_OFLCNZ;
 
        /* Russia: Content rating law */
-       if (g_strcmp0 (split[0], "ru") == 0)
+       if (g_strcmp0 (language, "ru") == 0)
                return GS_CONTENT_RATING_SYSTEM_RUSSIA;
 
        /* Singapore */
-       if (g_strcmp0 (split[0], "sg") == 0)
+       if (g_strcmp0 (language, "sg") == 0)
                return GS_CONTENT_RATING_SYSTEM_MDA;
 
        /* South Korea */
-       if (g_strcmp0 (split[0], "kr") == 0)
+       if (g_strcmp0 (language, "kr") == 0)
                return GS_CONTENT_RATING_SYSTEM_GRAC;
 
        /* USA, Canada, Mexico */
-       if (g_strcmp0 (locale, "en_US") == 0 ||
-           g_strcmp0 (split[0], "us") == 0 ||
-           g_strcmp0 (split[0], "ca") == 0 ||
-           g_strcmp0 (split[0], "mx") == 0)
+       if ((g_strcmp0 (language, "en") == 0 &&
+            g_strcmp0 (territory, "US") == 0) ||
+           g_strcmp0 (language, "us") == 0 ||
+           g_strcmp0 (language, "ca") == 0 ||
+           g_strcmp0 (language, "mx") == 0)
                return GS_CONTENT_RATING_SYSTEM_ESRB;
 
        /* everything else is IARC */
diff --git a/src/gs-self-test.c b/src/gs-self-test.c
index 9218f160..214c3f4f 100644
--- a/src/gs-self-test.c
+++ b/src/gs-self-test.c
@@ -23,6 +23,7 @@
 
 #include "gnome-software-private.h"
 
+#include "gs-content-rating.h"
 #include "gs-css.h"
 #include "gs-test.h"
 
@@ -53,6 +54,58 @@ gs_css_func (void)
        g_assert_cmpstr (tmp, ==, "color: white;");
 }
 
+/* Test that gs_utils_content_rating_system_from_locale() returns the correct
+ * rating system for various standard locales and various forms of locale name.
+ * See `locale -a` for the list of all available locales which some of these
+ * test vectors were derived from. */
+static void
+gs_content_rating_from_locale (void)
+{
+       const struct {
+               const gchar *locale;
+               GsContentRatingSystem expected_system;
+       } vectors[] = {
+               /* Simple tests to get coverage of each rating system: */
+               { "ar", GS_CONTENT_RATING_SYSTEM_INCAA },
+               { "au", GS_CONTENT_RATING_SYSTEM_ACB },
+               { "pt_BR", GS_CONTENT_RATING_SYSTEM_DJCTQ },
+               { "zh_TW", GS_CONTENT_RATING_SYSTEM_GSRR },
+               { "en_GB", GS_CONTENT_RATING_SYSTEM_PEGI },
+               { "am", GS_CONTENT_RATING_SYSTEM_PEGI },
+               { "bg", GS_CONTENT_RATING_SYSTEM_PEGI },
+               { "fi", GS_CONTENT_RATING_SYSTEM_KAVI },
+               { "de", GS_CONTENT_RATING_SYSTEM_USK },
+               { "ir", GS_CONTENT_RATING_SYSTEM_ESRA },
+               { "jp", GS_CONTENT_RATING_SYSTEM_CERO },
+               { "nz", GS_CONTENT_RATING_SYSTEM_OFLCNZ },
+               { "ru", GS_CONTENT_RATING_SYSTEM_RUSSIA },
+               { "sg", GS_CONTENT_RATING_SYSTEM_MDA },
+               { "kr", GS_CONTENT_RATING_SYSTEM_GRAC },
+               { "en_US", GS_CONTENT_RATING_SYSTEM_ESRB },
+               { "us", GS_CONTENT_RATING_SYSTEM_ESRB },
+               { "ca", GS_CONTENT_RATING_SYSTEM_ESRB },
+               { "mx", GS_CONTENT_RATING_SYSTEM_ESRB },
+               /* Fallback (arbitrarily chosen Venezuela since it seems to use IARC): */
+               { "ve", GS_CONTENT_RATING_SYSTEM_IARC },
+               /* Locale with a codeset: */
+               { "nl_NL.iso88591", GS_CONTENT_RATING_SYSTEM_PEGI },
+               /* Locale with a codeset and modifier: */
+               { "nl_NL.iso885915@euro", GS_CONTENT_RATING_SYSTEM_PEGI },
+               /* Locale with a less esoteric codeset: */
+               { "en_GB.UTF-8", GS_CONTENT_RATING_SYSTEM_PEGI },
+               /* Locale with a modifier but no codeset: */
+               { "fi_FI@euro", GS_CONTENT_RATING_SYSTEM_KAVI },
+               /* Invalid locale: */
+               { "_invalid", GS_CONTENT_RATING_SYSTEM_IARC },
+       };
+
+       for (gsize i = 0; i < G_N_ELEMENTS (vectors); i++) {
+               g_test_message ("Test %" G_GSIZE_FORMAT ": %s", i, vectors[i].locale);
+
+               g_assert_cmpint (gs_utils_content_rating_system_from_locale (vectors[i].locale), ==, 
vectors[i].expected_system);
+       }
+}
+
 int
 main (int argc, char **argv)
 {
@@ -64,6 +117,8 @@ main (int argc, char **argv)
 
        /* tests go here */
        g_test_add_func ("/gnome-software/src/css", gs_css_func);
+       g_test_add_func ("/gnome-software/src/content-rating/from-locale",
+                        gs_content_rating_from_locale);
 
        return g_test_run ();
 }
diff --git a/src/meson.build b/src/meson.build
index aafc9b7a..26954e0d 100644
--- a/src/meson.build
+++ b/src/meson.build
@@ -286,6 +286,7 @@ if get_option('tests')
     sources : [
       'gs-css.c',
       'gs-common.c',
+      'gs-content-rating.c',
       'gs-self-test.c',
     ],
     include_directories : [


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]