[glib: 3/4] Add private functions to correctly convert datetime when LC_TIME is not UTF8




commit 782eb1f7af8a1e934b786102b49abf731c0ed606
Author: Frederic Martinsons <frederic martinsons sigfox com>
Date:   Tue Dec 1 12:47:27 2020 +0100

    Add private functions to correctly convert datetime when LC_TIME is not UTF8
    
    Functions (_g_get_time_charset and _g_get_ctype_charset) to get LC_TIME and LC_CTYPE charset
    by using nl_langinfo with _NL_TIME_CODESET and CODESET).
    Another functions (_g_locale_time_to_utf8 and _g_locale_ctype_to_utf8) which uses thel and format
    the input string accordingly.
    Add new test cases with mixing UTF8 and non UTF8 LC_TIME along with UTF8
    and non UTF8 LC_MESSAGES.
    
    Closed #2055
    
    Signed-off-by: Frederic Martinsons <frederic martinsons sigfox com>

 glib/gcharset.c        |  87 +++++++++++++++++++++++++++++++++++
 glib/gcharsetprivate.h |   4 ++
 glib/gconvert.c        |  47 +++++++++++++++++++
 glib/gconvertprivate.h |  40 ++++++++++++++++
 glib/gdatetime.c       |  15 +++---
 glib/tests/gdatetime.c | 122 +++++++++++++++++++++++++++++++++++++++++++++++++
 meson.build            |   9 ++++
 7 files changed, 318 insertions(+), 6 deletions(-)
---
diff --git a/glib/gcharset.c b/glib/gcharset.c
index bb775bda4..9f91a9b48 100644
--- a/glib/gcharset.c
+++ b/glib/gcharset.c
@@ -36,6 +36,12 @@
 
 #include <string.h>
 #include <stdio.h>
+
+#if (HAVE_LANGINFO_TIME_CODESET || HAVE_LANGINFO_CODESET)
+#include <langinfo.h>
+#endif
+
+#include <locale.h>
 #ifdef G_OS_WIN32
 #define WIN32_LEAN_AND_MEAN
 #include <windows.h>
@@ -215,6 +221,87 @@ g_get_charset (const char **charset)
   return cache->is_utf8;
 }
 
+/*
+ * Do the same as g_get_charset() but it temporarily set locale (LC_ALL to
+ * LC_TIME) to correctly check for charset about time conversion relatives.
+ *
+ * Returns: %TRUE if the returned charset is UTF-8
+ */
+gboolean
+_g_get_time_charset (const char **charset)
+{
+  static GPrivate cache_private = G_PRIVATE_INIT (charset_cache_free);
+  GCharsetCache *cache = g_private_get (&cache_private);
+  const gchar *raw;
+
+  if (!cache)
+    cache = g_private_set_alloc0 (&cache_private, sizeof (GCharsetCache));
+
+#ifdef HAVE_LANGINFO_TIME_CODESET
+  raw = nl_langinfo (_NL_TIME_CODESET);
+#else
+  G_LOCK (aliases);
+  raw = _g_locale_charset_raw ();
+  G_UNLOCK (aliases);
+#endif
+
+  if (cache->raw == NULL || strcmp (cache->raw, raw) != 0)
+    {
+      const gchar *new_charset;
+
+      g_free (cache->raw);
+      g_free (cache->charset);
+      cache->raw = g_strdup (raw);
+      cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset);
+      cache->charset = g_strdup (new_charset);
+    }
+
+  if (charset)
+    *charset = cache->charset;
+
+  return cache->is_utf8;
+}
+/*
+ * Do the same as g_get_charset() but it temporarily set locale (LC_ALL to
+ * LC_CTYPE) to correctly check for charset about CTYPE conversion relatives.
+ *
+ * Returns: %TRUE if the returned charset is UTF-8
+ */
+gboolean
+_g_get_ctype_charset (const char **charset)
+{
+  static GPrivate cache_private = G_PRIVATE_INIT (charset_cache_free);
+  GCharsetCache *cache = g_private_get (&cache_private);
+  const gchar *raw;
+
+  if (!cache)
+    cache = g_private_set_alloc0 (&cache_private, sizeof (GCharsetCache));
+
+#ifdef HAVE_LANGINFO_CODESET
+  raw = nl_langinfo (CODESET);
+#else
+  G_LOCK (aliases);
+  raw = _g_locale_charset_raw ();
+  G_UNLOCK (aliases);
+#endif
+
+  if (cache->raw == NULL || strcmp (cache->raw, raw) != 0)
+    {
+      const gchar *new_charset;
+
+      g_free (cache->raw);
+      g_free (cache->charset);
+      cache->raw = g_strdup (raw);
+      cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset);
+      cache->charset = g_strdup (new_charset);
+    }
+
+  if (charset)
+    *charset = cache->charset;
+
+  return cache->is_utf8;
+}
+
 /**
  * g_get_codeset:
  *
diff --git a/glib/gcharsetprivate.h b/glib/gcharsetprivate.h
index f6b68dcd7..9b1def278 100644
--- a/glib/gcharsetprivate.h
+++ b/glib/gcharsetprivate.h
@@ -25,6 +25,10 @@ G_BEGIN_DECLS
 
 const char ** _g_charset_get_aliases (const char *canonical_name);
 
+gboolean      _g_get_time_charset    (const char **charset);
+
+gboolean      _g_get_ctype_charset   (const char **charset);
+
 G_END_DECLS
 
 #endif
diff --git a/glib/gconvert.c b/glib/gconvert.c
index f78cff01d..7697ff65d 100644
--- a/glib/gconvert.c
+++ b/glib/gconvert.c
@@ -40,6 +40,7 @@
 #endif
 
 #include "gconvert.h"
+#include "gconvertprivate.h"
 
 #include "gcharsetprivate.h"
 #include "gslist.h"
@@ -1015,6 +1016,52 @@ g_locale_to_utf8 (const gchar  *opsysstring,
                             bytes_read, bytes_written, error);
 }
 
+/*
+ * Do the exact same as g_locale_to_utf8 except that the charset would
+ * be retrieved from _g_get_time_charset (which uses LC_TIME)
+ *
+ * Returns: The converted string, or %NULL on an error.
+ */
+gchar *
+_g_time_locale_to_utf8 (const gchar *opsysstring,
+                        gssize       len,
+                        gsize       *bytes_read,
+                        gsize       *bytes_written,
+                        GError     **error)
+{
+  const char *charset;
+
+  if (_g_get_time_charset (&charset))
+    return strdup_len (opsysstring, len, bytes_read, bytes_written, error);
+  else
+    return convert_checked (opsysstring, len, "UTF-8", charset,
+                            CONVERT_CHECK_NO_NULS_IN_OUTPUT,
+                            bytes_read, bytes_written, error);
+}
+
+/*
+ * Do the exact same as g_locale_to_utf8 except that the charset would
+ * be retrieved from _g_get_ctype_charset (which uses LC_CTYPE)
+ *
+ * Returns: The converted string, or %NULL on an error.
+ */
+gchar *
+_g_ctype_locale_to_utf8 (const gchar *opsysstring,
+                         gssize       len,
+                         gsize       *bytes_read,
+                         gsize       *bytes_written,
+                         GError     **error)
+{
+  const char *charset;
+
+  if (_g_get_ctype_charset (&charset))
+    return strdup_len (opsysstring, len, bytes_read, bytes_written, error);
+  else
+    return convert_checked (opsysstring, len, "UTF-8", charset,
+                            CONVERT_CHECK_NO_NULS_IN_OUTPUT,
+                            bytes_read, bytes_written, error);
+}
+
 /**
  * g_locale_from_utf8:
  * @utf8string:    a UTF-8 encoded string 
diff --git a/glib/gconvertprivate.h b/glib/gconvertprivate.h
new file mode 100644
index 000000000..5bdc87ff6
--- /dev/null
+++ b/glib/gconvertprivate.h
@@ -0,0 +1,40 @@
+/* gconvertprivate.h - Private GLib gconvert functions
+ *
+ * Copyright 2020 Frederic Martinsons
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __G_CONVERTPRIVATE_H__
+#define __G_CONVERTPRIVATE_H__
+
+G_BEGIN_DECLS
+
+#include "glib.h"
+
+gchar *_g_time_locale_to_utf8 (const gchar *opsysstring,
+                               gssize len,
+                               gsize *bytes_read,
+                               gsize *bytes_written,
+                               GError **error) G_GNUC_MALLOC;
+
+gchar *_g_ctype_locale_to_utf8 (const gchar *opsysstring,
+                                gssize len,
+                                gsize *bytes_read,
+                                gsize *bytes_written,
+                                GError **error) G_GNUC_MALLOC;
+
+G_END_DECLS
+
+#endif /* __G_CONVERTPRIVATE_H__ */
diff --git a/glib/gdatetime.c b/glib/gdatetime.c
index 219dfb7de..a31afe713 100644
--- a/glib/gdatetime.c
+++ b/glib/gdatetime.c
@@ -62,7 +62,9 @@
 
 #include "gatomic.h"
 #include "gcharset.h"
+#include "gcharsetprivate.h"
 #include "gconvert.h"
+#include "gconvertprivate.h"
 #include "gdatetime.h"
 #include "gfileutils.h"
 #include "ghash.h"
@@ -2869,7 +2871,7 @@ initialize_alt_digits (void)
       if (g_strcmp0 (locale_digit, "") == 0)
         return NULL;
 
-      digit = g_locale_to_utf8 (locale_digit, -1, NULL, &digit_len, NULL);
+      digit = _g_ctype_locale_to_utf8 (locale_digit, -1, NULL, &digit_len, NULL);
       if (digit == NULL)
         return NULL;
 
@@ -2993,7 +2995,7 @@ g_date_time_format_locale (GDateTime   *datetime,
   if (locale_is_utf8)
     return g_date_time_format_utf8 (datetime, locale_format, outstr, locale_is_utf8);
 
-  utf8_format = g_locale_to_utf8 (locale_format, -1, NULL, NULL, NULL);
+  utf8_format = _g_time_locale_to_utf8 (locale_format, -1, NULL, NULL, NULL);
   if (utf8_format == NULL)
     return FALSE;
 
@@ -3017,7 +3019,7 @@ string_append (GString     *string,
     }
   else
     {
-      utf8 = g_locale_to_utf8 (s, -1, NULL, &utf8_len, NULL);
+      utf8 = _g_time_locale_to_utf8 (s, -1, NULL, &utf8_len, NULL);
       if (utf8 == NULL)
         return FALSE;
       g_string_append_len (string, utf8, utf8_len);
@@ -3443,10 +3445,11 @@ g_date_time_format (GDateTime   *datetime,
 {
   GString  *outstr;
   const gchar *charset;
-  /* Avoid conversions from locale charset to UTF-8 if charset is compatible
+  /* Avoid conversions from locale (for LC_TIME and not for LC_MESSAGES unless
+   * specified otherwise) charset to UTF-8 if charset is compatible
    * with UTF-8 already. Check for UTF-8 and synonymous canonical names of
    * ASCII. */
-  gboolean locale_is_utf8_compatible = g_get_charset (&charset) ||
+  gboolean time_is_utf8_compatible = _g_get_time_charset (&charset) ||
     g_strcmp0 ("ASCII", charset) == 0 ||
     g_strcmp0 ("ANSI_X3.4-1968", charset) == 0;
 
@@ -3457,7 +3460,7 @@ g_date_time_format (GDateTime   *datetime,
   outstr = g_string_sized_new (strlen (format) * 2);
 
   if (!g_date_time_format_utf8 (datetime, format, outstr,
-                                locale_is_utf8_compatible))
+                                time_is_utf8_compatible))
     {
       g_string_free (outstr, TRUE);
       return NULL;
diff --git a/glib/tests/gdatetime.c b/glib/tests/gdatetime.c
index bc4eba93a..12f332b44 100644
--- a/glib/tests/gdatetime.c
+++ b/glib/tests/gdatetime.c
@@ -2318,6 +2318,116 @@ test_format_iso8601 (void)
   g_time_zone_unref (tz);
 }
 
+typedef struct
+{
+  gboolean utf8_messages;
+  gboolean utf8_time;
+} MixedUtf8TestData;
+
+static const MixedUtf8TestData utf8_time_non_utf8_messages = {
+  .utf8_messages = FALSE,
+  .utf8_time = TRUE
+};
+
+static const MixedUtf8TestData non_utf8_time_utf8_messages = {
+  .utf8_messages = TRUE,
+  .utf8_time = FALSE
+};
+
+static const MixedUtf8TestData utf8_time_utf8_messages = {
+  .utf8_messages = TRUE,
+  .utf8_time = TRUE
+};
+
+static const MixedUtf8TestData non_utf8_time_non_utf8_messages = {
+  .utf8_messages = FALSE,
+  .utf8_time = FALSE
+};
+
+static gboolean
+check_and_set_locale (int          category,
+                      const gchar *name)
+{
+  setlocale (category, name);
+  if (strstr (setlocale (category, NULL), name) == NULL)
+    {
+      g_print ("Unavaible '%s' locale\n", name);
+      g_test_skip ("required locale not available, skipping tests");
+      return FALSE;
+    }
+  return TRUE;
+}
+
+static void
+test_format_time_mixed_utf8 (gconstpointer data)
+{
+  const MixedUtf8TestData *test_data;
+  gchar *old_time_locale;
+  gchar *old_messages_locale;
+  g_test_bug ("https://gitlab.gnome.org/GNOME/glib/-/issues/2055";);
+
+  test_data = (MixedUtf8TestData *) data;
+  old_time_locale = g_strdup (setlocale (LC_TIME, NULL));
+  old_messages_locale = g_strdup (setlocale (LC_MESSAGES, NULL));
+  if (test_data->utf8_time)
+    {
+      if (!check_and_set_locale (LC_TIME, "C.UTF-8"))
+        {
+          g_free (old_time_locale);
+          setlocale (LC_MESSAGES, old_messages_locale);
+          g_free (old_messages_locale);
+          return;
+        }
+    }
+  else
+    {
+      if (!check_and_set_locale (LC_TIME, "de_DE.iso88591"))
+        {
+          g_free (old_time_locale);
+          setlocale (LC_MESSAGES, old_messages_locale);
+          g_free (old_messages_locale);
+          return;
+        }
+    }
+  if (test_data->utf8_messages)
+    {
+      if (!check_and_set_locale (LC_MESSAGES, "C.UTF-8"))
+        {
+          g_free (old_messages_locale);
+          setlocale (LC_TIME, old_time_locale);
+          g_free (old_time_locale);
+          return;
+        }
+    }
+  else
+    {
+      if (!check_and_set_locale (LC_MESSAGES, "de_DE.iso88591"))
+        {
+          g_free (old_messages_locale);
+          setlocale (LC_TIME, old_time_locale);
+          g_free (old_time_locale);
+          return;
+        }
+    }
+
+  if (!test_data->utf8_time)
+    {
+      /* March to have März in german */
+      TEST_PRINTF_DATE (2020, 3, 1, "%b", "Mär");
+      TEST_PRINTF_DATE (2020, 3, 1, "%B", "März");
+    }
+  else
+    {
+      TEST_PRINTF_DATE (2020, 3, 1, "%b", "mar");
+      TEST_PRINTF_DATE (2020, 3, 1, "%B", "march");
+    }
+
+  setlocale (LC_TIME, old_time_locale);
+  setlocale (LC_MESSAGES, old_messages_locale);
+  g_free (old_time_locale);
+  g_free (old_messages_locale);
+}
+
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wformat-y2k"
 static void
@@ -2980,6 +3090,18 @@ main (gint   argc,
   g_test_add_func ("/GDateTime/non_utf8_printf", test_non_utf8_printf);
   g_test_add_func ("/GDateTime/format_unrepresentable", test_format_unrepresentable);
   g_test_add_func ("/GDateTime/format_iso8601", test_format_iso8601);
+  g_test_add_data_func ("/GDateTime/format_mixed/utf8_time_non_utf8_messages",
+                        &utf8_time_non_utf8_messages,
+                        test_format_time_mixed_utf8);
+  g_test_add_data_func ("/GDateTime/format_mixed/utf8_time_utf8_messages",
+                        &utf8_time_utf8_messages,
+                        test_format_time_mixed_utf8);
+  g_test_add_data_func ("/GDateTime/format_mixed/non_utf8_time_non_utf8_messages",
+                        &non_utf8_time_non_utf8_messages,
+                        test_format_time_mixed_utf8);
+  g_test_add_data_func ("/GDateTime/format_mixed/non_utf8_time_utf8_messages",
+                        &non_utf8_time_utf8_messages,
+                        test_format_time_mixed_utf8);
   g_test_add_func ("/GDateTime/strftime", test_strftime);
   g_test_add_func ("/GDateTime/strftime/error_handling", test_GDateTime_strftime_error_handling);
   g_test_add_func ("/GDateTime/modifiers", test_modifiers);
diff --git a/meson.build b/meson.build
index 2cf1c7b4f..eb401123c 100644
--- a/meson.build
+++ b/meson.build
@@ -1185,6 +1185,15 @@ if cc.links('''#ifndef _GNU_SOURCE
   glib_conf.set('HAVE_LANGINFO_ABALTMON', 1)
 endif
 
+# Check for nl_langinfo and _NL_TIME_CODESET
+if cc.links('''#include <langinfo.h>
+               int main (int argc, char ** argv) {
+                 char *codeset = nl_langinfo (_NL_TIME_CODESET);
+                 return 0;
+               }''', name : 'nl_langinfo and _NL_TIME_CODESET')
+  glib_conf.set('HAVE_LANGINFO_TIME_CODESET', 1)
+endif
+
 # Check if C compiler supports the 'signed' keyword
 if not cc.compiles('''signed char x;''', name : 'signed')
   glib_conf.set('signed', '/* NOOP */')


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]