[glib/glib-2-58: 1/3] gdatetime: Store intermediate result of g_date_time_format in UTF-8
- From: Emmanuele Bassi <ebassi src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [glib/glib-2-58: 1/3] gdatetime: Store intermediate result of g_date_time_format in UTF-8
- Date: Mon, 17 Dec 2018 18:49:10 +0000 (UTC)
commit 74c1468c98ce23f8dc233b668b3a42e79db6d666
Author: Tomasz Miąsko <tomasz miasko gmail com>
Date: Fri Nov 30 00:00:00 2018 +0000
gdatetime: Store intermediate result of g_date_time_format in UTF-8
In date time formatting routine, instead of converting from UTF-8 to
locale charset and then from locale charset to UTF-8, store all
intermediate result in UTF-8.
This solves the issue where user provided UTF-8 format string might be
unrepresentable in the current locale charset.
Fixes issue #1605.
glib/gdatetime.c | 320 +++++++++++++++++++++++++------------------------
glib/tests/gdatetime.c | 18 +++
2 files changed, 179 insertions(+), 159 deletions(-)
---
diff --git a/glib/gdatetime.c b/glib/gdatetime.c
index 32fd501aa..6afe14438 100644
--- a/glib/gdatetime.c
+++ b/glib/gdatetime.c
@@ -4,6 +4,7 @@
* Copyright (C) 2010 Thiago Santos <thiago sousa santos collabora co uk>
* Copyright (C) 2010 Emmanuele Bassi <ebassi linux intel com>
* Copyright © 2010 Codethink Limited
+ * Copyright © 2018 Tomasz Miąsko
*
* This library is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
@@ -2771,6 +2772,51 @@ format_z (GString *outstr,
return TRUE;
}
+#ifdef HAVE_LANGINFO_OUTDIGIT
+/** Initializes the array with UTF-8 encoded alternate digits suibtable for use
+ * in current locale. Returns NULL when current locale does not use alternate
+ * digits or there was an error converting them to UTF-8.
+ */
+static const gchar * const *
+initialize_alt_digits (void)
+{
+ guint i;
+ gsize digit_len;
+ gchar *digit;
+ const gchar *locale_digit;
+#define N_DIGITS 10
+#define MAX_UTF8_ENCODING_LEN 4
+ static gchar buffer[N_DIGITS * (MAX_UTF8_ENCODING_LEN + 1 /* null separator */)];
+#undef N_DIGITS
+#undef MAX_UTF8_ENCODING_LEN
+ gchar *buffer_end = buffer;
+ static const gchar *alt_digits[10];
+
+ for (i = 0; i != 10; ++i)
+ {
+ locale_digit = nl_langinfo (_NL_CTYPE_OUTDIGIT0_MB + i);
+
+ if (g_strcmp0 (locale_digit, "") == 0)
+ return NULL;
+
+ digit = g_locale_to_utf8 (locale_digit, -1, NULL, &digit_len, NULL);
+ if (digit == NULL)
+ return NULL;
+
+ g_assert (digit_len < buffer + sizeof (buffer) - buffer_end);
+
+ alt_digits[i] = buffer_end;
+ buffer_end = g_stpcpy (buffer_end, digit);
+ /* skip trailing null byte */
+ buffer_end += 1;
+
+ g_free (digit);
+ }
+
+ return alt_digits;
+}
+#endif /* HAVE_LANGINFO_OUTDIGIT */
+
static void
format_number (GString *str,
gboolean use_alt_digits,
@@ -2781,7 +2827,7 @@ format_number (GString *str,
const gchar *ascii_digits[10] = {
"0", "1", "2", "3", "4", "5", "6", "7", "8", "9"
};
- const gchar **digits = ascii_digits;
+ const gchar * const *digits = ascii_digits;
const gchar *tmp[10];
gint i = 0;
@@ -2790,17 +2836,16 @@ format_number (GString *str,
#ifdef HAVE_LANGINFO_OUTDIGIT
if (use_alt_digits)
{
- static const gchar *alt_digits[10];
+ static const gchar * const *alt_digits = NULL;
static gsize initialised;
- /* 2^32 has 10 digits */
if G_UNLIKELY (g_once_init_enter (&initialised))
{
-#define DO_DIGIT(n) \
- alt_digits[n] = nl_langinfo (_NL_CTYPE_OUTDIGIT## n ##_MB)
- DO_DIGIT(0); DO_DIGIT(1); DO_DIGIT(2); DO_DIGIT(3); DO_DIGIT(4);
- DO_DIGIT(5); DO_DIGIT(6); DO_DIGIT(7); DO_DIGIT(8); DO_DIGIT(9);
-#undef DO_DIGIT
+ alt_digits = initialize_alt_digits ();
+
+ if (alt_digits == NULL)
+ alt_digits = ascii_digits;
+
g_once_init_leave (&initialised, TRUE);
}
@@ -2833,7 +2878,6 @@ format_ampm (GDateTime *datetime,
{
const gchar *ampm;
gchar *tmp = NULL, *ampm_dup;
- gsize len;
ampm = GET_AMPM (datetime);
@@ -2844,104 +2888,107 @@ format_ampm (GDateTime *datetime,
{
/* This assumes that locale encoding can't have embedded NULs */
ampm = tmp = g_locale_to_utf8 (ampm, -1, NULL, NULL, NULL);
- if (!tmp)
+ if (tmp == NULL)
return FALSE;
}
if (uppercase)
ampm_dup = g_utf8_strup (ampm, -1);
else
ampm_dup = g_utf8_strdown (ampm, -1);
- len = strlen (ampm_dup);
- if (!locale_is_utf8 && GET_AMPM_IS_LOCALE)
- {
- g_free (tmp);
- tmp = g_locale_from_utf8 (ampm_dup, -1, NULL, &len, NULL);
- g_free (ampm_dup);
- if (!tmp)
- return FALSE;
- ampm_dup = tmp;
- }
- g_string_append_len (outstr, ampm_dup, len);
+ g_free (tmp);
+
+ g_string_append (outstr, ampm_dup);
g_free (ampm_dup);
return TRUE;
}
-static gboolean g_date_time_format_locale (GDateTime *datetime,
- const gchar *format,
- GString *outstr,
- gboolean locale_is_utf8);
+static gboolean g_date_time_format_utf8 (GDateTime *datetime,
+ const gchar *format,
+ GString *outstr,
+ gboolean locale_is_utf8);
/* g_date_time_format() subroutine that takes a locale-encoded format
- * string and produces a locale-encoded date/time string.
+ * string and produces a UTF-8 encoded date/time string.
*/
static gboolean
-g_date_time_locale_format_locale (GDateTime *datetime,
- const gchar *format,
- GString *outstr,
- gboolean locale_is_utf8)
+g_date_time_format_locale (GDateTime *datetime,
+ const gchar *locale_format,
+ GString *outstr,
+ gboolean locale_is_utf8)
{
gchar *utf8_format;
gboolean success;
if (locale_is_utf8)
- return g_date_time_format_locale (datetime, format, outstr,
- locale_is_utf8);
+ return g_date_time_format_utf8 (datetime, locale_format, outstr, locale_is_utf8);
- utf8_format = g_locale_to_utf8 (format, -1, NULL, NULL, NULL);
- if (!utf8_format)
+ utf8_format = g_locale_to_utf8 (locale_format, -1, NULL, NULL, NULL);
+ if (utf8_format == NULL)
return FALSE;
- success = g_date_time_format_locale (datetime, utf8_format, outstr,
- locale_is_utf8);
+ success = g_date_time_format_utf8 (datetime, utf8_format, outstr,
+ locale_is_utf8);
g_free (utf8_format);
return success;
}
-/* g_date_time_format() subroutine that takes a UTF-8 format
- * string and produces a locale-encoded date/time string.
+static inline gboolean
+string_append (GString *string,
+ const gchar *s,
+ gboolean s_is_utf8)
+{
+ gchar *utf8;
+ gsize utf8_len;
+
+ if (s_is_utf8)
+ {
+ g_string_append (string, s);
+ }
+ else
+ {
+ utf8 = g_locale_to_utf8 (s, -1, NULL, &utf8_len, NULL);
+ if (utf8 == NULL)
+ return FALSE;
+ g_string_append_len (string, utf8, utf8_len);
+ g_free (utf8);
+ }
+
+ return TRUE;
+}
+
+/* g_date_time_format() subroutine that takes a UTF-8 encoded format
+ * string and produces a UTF-8 encoded date/time string.
*/
static gboolean
-g_date_time_format_locale (GDateTime *datetime,
- const gchar *format,
- GString *outstr,
- gboolean locale_is_utf8)
+g_date_time_format_utf8 (GDateTime *datetime,
+ const gchar *utf8_format,
+ GString *outstr,
+ gboolean locale_is_utf8)
{
guint len;
guint colons;
- gchar *tmp;
- gsize tmp_len;
gunichar c;
gboolean alt_digits = FALSE;
gboolean pad_set = FALSE;
+ gboolean name_is_utf8;
const gchar *pad = "";
const gchar *name;
const gchar *tz;
- while (*format)
+ while (*utf8_format)
{
- len = strcspn (format, "%");
+ len = strcspn (utf8_format, "%");
if (len)
- {
- if (locale_is_utf8)
- g_string_append_len (outstr, format, len);
- else
- {
- tmp = g_locale_from_utf8 (format, len, NULL, &tmp_len, NULL);
- if (!tmp)
- return FALSE;
- g_string_append_len (outstr, tmp, tmp_len);
- g_free (tmp);
- }
- }
+ g_string_append_len (outstr, utf8_format, len);
- format += len;
- if (!*format)
+ utf8_format += len;
+ if (!*utf8_format)
break;
- g_assert (*format == '%');
- format++;
- if (!*format)
+ g_assert (*utf8_format == '%');
+ utf8_format++;
+ if (!*utf8_format)
break;
colons = 0;
@@ -2949,91 +2996,67 @@ g_date_time_format_locale (GDateTime *datetime,
pad_set = FALSE;
next_mod:
- c = g_utf8_get_char (format);
- format = g_utf8_next_char (format);
+ c = g_utf8_get_char (utf8_format);
+ utf8_format = g_utf8_next_char (utf8_format);
switch (c)
{
case 'a':
name = WEEKDAY_ABBR (datetime);
if (g_strcmp0 (name, "") == 0)
return FALSE;
- if (!locale_is_utf8 && !WEEKDAY_ABBR_IS_LOCALE)
- {
- tmp = g_locale_from_utf8 (name, -1, NULL, &tmp_len, NULL);
- if (!tmp)
- return FALSE;
- g_string_append_len (outstr, tmp, tmp_len);
- g_free (tmp);
- }
- else
- {
- g_string_append (outstr, name);
- }
+
+ name_is_utf8 = locale_is_utf8 || !WEEKDAY_ABBR_IS_LOCALE;
+
+ if (!string_append (outstr, name, name_is_utf8))
+ return FALSE;
+
break;
case 'A':
name = WEEKDAY_FULL (datetime);
if (g_strcmp0 (name, "") == 0)
return FALSE;
- if (!locale_is_utf8 && !WEEKDAY_FULL_IS_LOCALE)
- {
- tmp = g_locale_from_utf8 (name, -1, NULL, &tmp_len, NULL);
- if (!tmp)
- return FALSE;
- g_string_append_len (outstr, tmp, tmp_len);
- g_free (tmp);
- }
- else
- {
- g_string_append (outstr, name);
- }
+
+ name_is_utf8 = locale_is_utf8 || !WEEKDAY_FULL_IS_LOCALE;
+
+ if (!string_append (outstr, name, name_is_utf8))
+ return FALSE;
+
break;
case 'b':
name = alt_digits ? MONTH_ABBR_STANDALONE (datetime)
: MONTH_ABBR_WITH_DAY (datetime);
if (g_strcmp0 (name, "") == 0)
return FALSE;
- if (!locale_is_utf8 &&
- ((alt_digits && !MONTH_ABBR_STANDALONE_IS_LOCALE) ||
- (!alt_digits && !MONTH_ABBR_WITH_DAY_IS_LOCALE)))
- {
- tmp = g_locale_from_utf8 (name, -1, NULL, &tmp_len, NULL);
- if (!tmp)
- return FALSE;
- g_string_append_len (outstr, tmp, tmp_len);
- g_free (tmp);
- }
- else
- {
- g_string_append (outstr, name);
- }
+
+ name_is_utf8 = locale_is_utf8 ||
+ ((alt_digits && !MONTH_ABBR_STANDALONE_IS_LOCALE) ||
+ (!alt_digits && !MONTH_ABBR_WITH_DAY_IS_LOCALE));
+
+ if (!string_append (outstr, name, name_is_utf8))
+ return FALSE;
+
break;
case 'B':
name = alt_digits ? MONTH_FULL_STANDALONE (datetime)
: MONTH_FULL_WITH_DAY (datetime);
if (g_strcmp0 (name, "") == 0)
return FALSE;
- if (!locale_is_utf8 &&
- ((alt_digits && !MONTH_FULL_STANDALONE_IS_LOCALE) ||
- (!alt_digits && !MONTH_FULL_WITH_DAY_IS_LOCALE)))
- {
- tmp = g_locale_from_utf8 (name, -1, NULL, &tmp_len, NULL);
- if (!tmp)
- return FALSE;
- g_string_append_len (outstr, tmp, tmp_len);
- g_free (tmp);
- }
- else
- {
- g_string_append (outstr, name);
- }
+
+ name_is_utf8 = locale_is_utf8 ||
+ ((alt_digits && !MONTH_FULL_STANDALONE_IS_LOCALE) ||
+ (!alt_digits && !MONTH_FULL_WITH_DAY_IS_LOCALE));
+
+ if (!string_append (outstr, name, name_is_utf8))
+ return FALSE;
+
break;
case 'c':
{
if (g_strcmp0 (PREFERRED_DATE_TIME_FMT, "") == 0)
return FALSE;
- if (!g_date_time_locale_format_locale (datetime, PREFERRED_DATE_TIME_FMT,
- outstr, locale_is_utf8))
- return FALSE;
+ if (!g_date_time_format_locale (datetime, PREFERRED_DATE_TIME_FMT,
+ outstr, locale_is_utf8))
+ return FALSE;
}
break;
case 'C':
@@ -3067,20 +3090,14 @@ g_date_time_format_locale (GDateTime *datetime,
: MONTH_ABBR_WITH_DAY (datetime);
if (g_strcmp0 (name, "") == 0)
return FALSE;
- if (!locale_is_utf8 &&
- ((alt_digits && !MONTH_ABBR_STANDALONE_IS_LOCALE) ||
- (!alt_digits && !MONTH_ABBR_WITH_DAY_IS_LOCALE)))
- {
- tmp = g_locale_from_utf8 (name, -1, NULL, &tmp_len, NULL);
- if (!tmp)
- return FALSE;
- g_string_append_len (outstr, tmp, tmp_len);
- g_free (tmp);
- }
- else
- {
- g_string_append (outstr, name);
- }
+
+ name_is_utf8 = locale_is_utf8 ||
+ ((alt_digits && !MONTH_ABBR_STANDALONE_IS_LOCALE) ||
+ (!alt_digits && !MONTH_ABBR_WITH_DAY_IS_LOCALE));
+
+ if (!string_append (outstr, name, name_is_utf8))
+ return FALSE;
+
break;
case 'H':
format_number (outstr, alt_digits, pad_set ? pad : "0", 2,
@@ -3128,8 +3145,8 @@ g_date_time_format_locale (GDateTime *datetime,
{
if (g_strcmp0 (PREFERRED_12HR_TIME_FMT, "") == 0)
return FALSE;
- if (!g_date_time_locale_format_locale (datetime, PREFERRED_12HR_TIME_FMT,
- outstr, locale_is_utf8))
+ if (!g_date_time_format_locale (datetime, PREFERRED_12HR_TIME_FMT,
+ outstr, locale_is_utf8))
return FALSE;
}
break;
@@ -3170,8 +3187,8 @@ g_date_time_format_locale (GDateTime *datetime,
{
if (g_strcmp0 (PREFERRED_DATE_FMT, "") == 0)
return FALSE;
- if (!g_date_time_locale_format_locale (datetime, PREFERRED_DATE_FMT,
- outstr, locale_is_utf8))
+ if (!g_date_time_format_locale (datetime, PREFERRED_DATE_FMT,
+ outstr, locale_is_utf8))
return FALSE;
}
break;
@@ -3179,8 +3196,8 @@ g_date_time_format_locale (GDateTime *datetime,
{
if (g_strcmp0 (PREFERRED_TIME_FMT, "") == 0)
return FALSE;
- if (!g_date_time_locale_format_locale (datetime, PREFERRED_TIME_FMT,
- outstr, locale_is_utf8))
+ if (!g_date_time_format_locale (datetime, PREFERRED_TIME_FMT,
+ outstr, locale_is_utf8))
return FALSE;
}
break;
@@ -3202,16 +3219,7 @@ g_date_time_format_locale (GDateTime *datetime,
break;
case 'Z':
tz = g_date_time_get_timezone_abbreviation (datetime);
- tmp = NULL;
- tmp_len = strlen (tz);
- if (!locale_is_utf8)
- {
- tz = tmp = g_locale_from_utf8 (tz, -1, NULL, &tmp_len, NULL);
- if (!tmp)
- return FALSE;
- }
- g_string_append_len (outstr, tz, tmp_len);
- g_free (tmp);
+ g_string_append (outstr, tz);
break;
case '%':
g_string_append_c (outstr, '%');
@@ -3230,7 +3238,7 @@ g_date_time_format_locale (GDateTime *datetime,
goto next_mod;
case ':':
/* Colons are only allowed before 'z' */
- if (*format && *format != 'z' && *format != ':')
+ if (*utf8_format && *utf8_format != 'z' && *utf8_format != ':')
return FALSE;
colons++;
goto next_mod;
@@ -3355,7 +3363,6 @@ g_date_time_format (GDateTime *datetime,
const gchar *format)
{
GString *outstr;
- gchar *utf8;
gboolean locale_is_utf8 = g_get_charset (NULL);
g_return_val_if_fail (datetime != NULL, NULL);
@@ -3364,18 +3371,13 @@ g_date_time_format (GDateTime *datetime,
outstr = g_string_sized_new (strlen (format) * 2);
- if (!g_date_time_format_locale (datetime, format, outstr, locale_is_utf8))
+ if (!g_date_time_format_utf8 (datetime, format, outstr, locale_is_utf8))
{
g_string_free (outstr, TRUE);
return NULL;
}
- if (locale_is_utf8)
- return g_string_free (outstr, FALSE);
-
- utf8 = g_locale_to_utf8 (outstr->str, outstr->len, NULL, NULL, NULL);
- g_string_free (outstr, TRUE);
- return utf8;
+ return g_string_free (outstr, FALSE);
}
diff --git a/glib/tests/gdatetime.c b/glib/tests/gdatetime.c
index f4000d96c..6b2fa2f35 100644
--- a/glib/tests/gdatetime.c
+++ b/glib/tests/gdatetime.c
@@ -1525,6 +1525,23 @@ test_non_utf8_printf (void)
g_free (oldlocale);
}
+/* Checks that it is possible to use format string that
+ * is unrepresentable in current locale charset. */
+static void
+test_format_unrepresentable (void)
+{
+ gchar *oldlocale = g_strdup (setlocale (LC_ALL, NULL));
+ setlocale (LC_ALL, "POSIX");
+
+ TEST_PRINTF ("ąśćł", "ąśćł");
+
+ /* We are using Unicode ratio symbol here, which is outside ASCII. */
+ TEST_PRINTF_TIME (23, 15, 0, "%H∶%M", "23∶15");
+
+ setlocale (LC_ALL, oldlocale);
+ g_free (oldlocale);
+}
+
static void
test_modifiers (void)
{
@@ -2489,6 +2506,7 @@ main (gint argc,
g_test_add_func ("/GDateTime/now", test_GDateTime_now);
g_test_add_func ("/GDateTime/printf", test_GDateTime_printf);
g_test_add_func ("/GDateTime/non_utf8_printf", test_non_utf8_printf);
+ g_test_add_func ("/GDateTime/format_unrepresentable", test_format_unrepresentable);
g_test_add_func ("/GDateTime/strftime", test_strftime);
g_test_add_func ("/GDateTime/strftime/error_handling", test_GDateTime_strftime_error_handling);
g_test_add_func ("/GDateTime/modifiers", test_modifiers);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]