[glib: 1/2] gdate: Use longest matching month name in g_date_set_parse



commit ba18822f358c49f15435197dba7c11f6753396f1
Author: Tomasz Miąsko <tomasz miasko gmail com>
Date:   Tue Oct 30 00:00:00 2018 +0000

    gdate: Use longest matching month name in g_date_set_parse
    
    There are languages where a name of one month is a substring of another.
    Instead of stopping search on the first match use the month that
    constitutes the longest match.
    
    Fixes #1343.

 glib/gdate.c      | 75 +++++++++++++++++++++++--------------------------------
 glib/tests/date.c | 34 +++++++++++++++++++++++++
 2 files changed, 65 insertions(+), 44 deletions(-)
---
diff --git a/glib/gdate.c b/glib/gdate.c
index 4925818b3..5457a3b8c 100644
--- a/glib/gdate.c
+++ b/glib/gdate.c
@@ -931,6 +931,27 @@ struct _GDateParseTokens {
 
 typedef struct _GDateParseTokens GDateParseTokens;
 
+static inline gboolean
+update_month_match (gsize *longest,
+                    const gchar *haystack,
+                    const gchar *needle)
+{
+  gsize length;
+
+  if (needle == NULL)
+    return FALSE;
+
+  length = strlen (needle);
+  if (*longest >= length)
+    return FALSE;
+
+  if (strstr (haystack, needle) == NULL)
+    return FALSE;
+
+  *longest = length;
+  return TRUE;
+}
+
 #define NUM_LEN 10
 
 /* HOLDS: g_date_global_lock */
@@ -978,6 +999,7 @@ g_date_fill_parse_tokens (const gchar *str, GDateParseTokens *pt)
   
   if (pt->num_ints < 3)
     {
+      gsize longest = 0;
       gchar *casefold;
       gchar *normalized;
       
@@ -985,8 +1007,7 @@ g_date_fill_parse_tokens (const gchar *str, GDateParseTokens *pt)
       normalized = g_utf8_normalize (casefold, -1, G_NORMALIZE_ALL);
       g_free (casefold);
 
-      i = 1;
-      while (i < 13)
+      for (i = 1; i < 13; ++i)
         {
           /* Here month names may be in a genitive case if the language
            * grammatical rules require it.
@@ -997,60 +1018,26 @@ g_date_fill_parse_tokens (const gchar *str, GDateParseTokens *pt)
            * genitive case here so they use nominative everywhere.
            * For example, English always uses "January".
            */
-          if (long_month_names[i] != NULL) 
-            {
-              const gchar *found = strstr (normalized, long_month_names[i]);
-             
-              if (found != NULL)
-                {
-                  pt->month = i;
-                 break;
-                }
-            }
+          if (update_month_match (&longest, normalized, long_month_names[i]))
+            pt->month = i;
 
           /* Here month names will be in a nominative case.
            * Examples of how January may look in some languages:
            * Catalan: "gener", Croatian: "Siječanj", Polish: "styczeń",
            * Upper Sorbian: "Januar".
            */
-          if (long_month_names_alternative[i] != NULL)
-            {
-              const gchar *found = strstr (normalized, long_month_names_alternative[i]);
-
-              if (found != NULL)
-                {
-                  pt->month = i;
-                  break;
-                }
-            }
+          if (update_month_match (&longest, normalized, long_month_names_alternative[i]))
+            pt->month = i;
 
           /* Differences between abbreviated nominative and abbreviated
            * genitive month names are visible in very few languages but
            * let's handle them.
            */
-          if (short_month_names[i] != NULL) 
-            {
-              const gchar *found = strstr (normalized, short_month_names[i]);
-             
-              if (found != NULL)
-                {
-                  pt->month = i;
-                 break;
-                }
-            }
+          if (update_month_match (&longest, normalized, short_month_names[i]))
+            pt->month = i;
 
-          if (short_month_names_alternative[i] != NULL)
-            {
-              const gchar *found = strstr (normalized, short_month_names_alternative[i]);
-
-              if (found != NULL)
-                {
-                  pt->month = i;
-                  break;
-                }
-            }
-
-          ++i;
+          if (update_month_match (&longest, normalized, short_month_names_alternative[i]))
+            pt->month = i;
         }
 
       g_free (normalized);
diff --git a/glib/tests/date.c b/glib/tests/date.c
index 6cd91ab6c..8eb28712b 100644
--- a/glib/tests/date.c
+++ b/glib/tests/date.c
@@ -208,6 +208,39 @@ test_parse_locale_change (void)
   setlocale (LC_ALL, "");
 }
 
+static void
+test_month_substring (void)
+{
+  GDate date;
+
+  g_test_bug ("793550");
+
+  if (setlocale (LC_ALL, "pl_PL") == NULL)
+    {
+      g_test_skip ("pl_PL locale not available");
+      return;
+    }
+
+  /* In Polish language September is "wrzesień" and August is "sierpień"
+   * abbreviated as "sie". The former used to be confused with the latter
+   * because "sie" is a substring of "wrzesień" and was matched first. */
+
+  g_date_set_parse (&date, "wrzesień 2018");
+  g_assert_true (g_date_valid (&date));
+  g_assert_cmpint (g_date_get_month (&date), ==, G_DATE_SEPTEMBER);
+
+  g_date_set_parse (&date, "sie 2018");
+  g_assert_true (g_date_valid (&date));
+  g_assert_cmpint (g_date_get_month (&date), ==, G_DATE_AUGUST);
+
+  g_date_set_parse (&date, "sierpień 2018");
+  g_assert_true (g_date_valid (&date));
+  g_assert_cmpint (g_date_get_month (&date), ==, G_DATE_AUGUST);
+
+  setlocale (LC_ALL, "");
+}
+
+
 static void
 test_month_names (void)
 {
@@ -736,6 +769,7 @@ main (int argc, char** argv)
   g_test_add_func ("/date/dates", test_dates);
   g_test_add_func ("/date/parse", test_parse);
   g_test_add_func ("/date/parse_locale_change", test_parse_locale_change);
+  g_test_add_func ("/date/month_substring", test_month_substring);
   g_test_add_func ("/date/month_names", test_month_names);
   g_test_add_func ("/date/clamp", test_clamp);
   g_test_add_func ("/date/order", test_order);


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]