[glib: 1/2] gdate: Use longest matching month name in g_date_set_parse
- From: Philip Withnall <pwithnall src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [glib: 1/2] gdate: Use longest matching month name in g_date_set_parse
- Date: Tue, 13 Nov 2018 15:00:55 +0000 (UTC)
commit ba18822f358c49f15435197dba7c11f6753396f1
Author: Tomasz Miąsko <tomasz miasko gmail com>
Date: Tue Oct 30 00:00:00 2018 +0000
gdate: Use longest matching month name in g_date_set_parse
There are languages where a name of one month is a substring of another.
Instead of stopping search on the first match use the month that
constitutes the longest match.
Fixes #1343.
glib/gdate.c | 75 +++++++++++++++++++++++--------------------------------
glib/tests/date.c | 34 +++++++++++++++++++++++++
2 files changed, 65 insertions(+), 44 deletions(-)
---
diff --git a/glib/gdate.c b/glib/gdate.c
index 4925818b3..5457a3b8c 100644
--- a/glib/gdate.c
+++ b/glib/gdate.c
@@ -931,6 +931,27 @@ struct _GDateParseTokens {
typedef struct _GDateParseTokens GDateParseTokens;
+static inline gboolean
+update_month_match (gsize *longest,
+ const gchar *haystack,
+ const gchar *needle)
+{
+ gsize length;
+
+ if (needle == NULL)
+ return FALSE;
+
+ length = strlen (needle);
+ if (*longest >= length)
+ return FALSE;
+
+ if (strstr (haystack, needle) == NULL)
+ return FALSE;
+
+ *longest = length;
+ return TRUE;
+}
+
#define NUM_LEN 10
/* HOLDS: g_date_global_lock */
@@ -978,6 +999,7 @@ g_date_fill_parse_tokens (const gchar *str, GDateParseTokens *pt)
if (pt->num_ints < 3)
{
+ gsize longest = 0;
gchar *casefold;
gchar *normalized;
@@ -985,8 +1007,7 @@ g_date_fill_parse_tokens (const gchar *str, GDateParseTokens *pt)
normalized = g_utf8_normalize (casefold, -1, G_NORMALIZE_ALL);
g_free (casefold);
- i = 1;
- while (i < 13)
+ for (i = 1; i < 13; ++i)
{
/* Here month names may be in a genitive case if the language
* grammatical rules require it.
@@ -997,60 +1018,26 @@ g_date_fill_parse_tokens (const gchar *str, GDateParseTokens *pt)
* genitive case here so they use nominative everywhere.
* For example, English always uses "January".
*/
- if (long_month_names[i] != NULL)
- {
- const gchar *found = strstr (normalized, long_month_names[i]);
-
- if (found != NULL)
- {
- pt->month = i;
- break;
- }
- }
+ if (update_month_match (&longest, normalized, long_month_names[i]))
+ pt->month = i;
/* Here month names will be in a nominative case.
* Examples of how January may look in some languages:
* Catalan: "gener", Croatian: "Siječanj", Polish: "styczeń",
* Upper Sorbian: "Januar".
*/
- if (long_month_names_alternative[i] != NULL)
- {
- const gchar *found = strstr (normalized, long_month_names_alternative[i]);
-
- if (found != NULL)
- {
- pt->month = i;
- break;
- }
- }
+ if (update_month_match (&longest, normalized, long_month_names_alternative[i]))
+ pt->month = i;
/* Differences between abbreviated nominative and abbreviated
* genitive month names are visible in very few languages but
* let's handle them.
*/
- if (short_month_names[i] != NULL)
- {
- const gchar *found = strstr (normalized, short_month_names[i]);
-
- if (found != NULL)
- {
- pt->month = i;
- break;
- }
- }
+ if (update_month_match (&longest, normalized, short_month_names[i]))
+ pt->month = i;
- if (short_month_names_alternative[i] != NULL)
- {
- const gchar *found = strstr (normalized, short_month_names_alternative[i]);
-
- if (found != NULL)
- {
- pt->month = i;
- break;
- }
- }
-
- ++i;
+ if (update_month_match (&longest, normalized, short_month_names_alternative[i]))
+ pt->month = i;
}
g_free (normalized);
diff --git a/glib/tests/date.c b/glib/tests/date.c
index 6cd91ab6c..8eb28712b 100644
--- a/glib/tests/date.c
+++ b/glib/tests/date.c
@@ -208,6 +208,39 @@ test_parse_locale_change (void)
setlocale (LC_ALL, "");
}
+static void
+test_month_substring (void)
+{
+ GDate date;
+
+ g_test_bug ("793550");
+
+ if (setlocale (LC_ALL, "pl_PL") == NULL)
+ {
+ g_test_skip ("pl_PL locale not available");
+ return;
+ }
+
+ /* In Polish language September is "wrzesień" and August is "sierpień"
+ * abbreviated as "sie". The former used to be confused with the latter
+ * because "sie" is a substring of "wrzesień" and was matched first. */
+
+ g_date_set_parse (&date, "wrzesień 2018");
+ g_assert_true (g_date_valid (&date));
+ g_assert_cmpint (g_date_get_month (&date), ==, G_DATE_SEPTEMBER);
+
+ g_date_set_parse (&date, "sie 2018");
+ g_assert_true (g_date_valid (&date));
+ g_assert_cmpint (g_date_get_month (&date), ==, G_DATE_AUGUST);
+
+ g_date_set_parse (&date, "sierpień 2018");
+ g_assert_true (g_date_valid (&date));
+ g_assert_cmpint (g_date_get_month (&date), ==, G_DATE_AUGUST);
+
+ setlocale (LC_ALL, "");
+}
+
+
static void
test_month_names (void)
{
@@ -736,6 +769,7 @@ main (int argc, char** argv)
g_test_add_func ("/date/dates", test_dates);
g_test_add_func ("/date/parse", test_parse);
g_test_add_func ("/date/parse_locale_change", test_parse_locale_change);
+ g_test_add_func ("/date/month_substring", test_month_substring);
g_test_add_func ("/date/month_names", test_month_names);
g_test_add_func ("/date/clamp", test_clamp);
g_test_add_func ("/date/order", test_order);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]