[glib] gconvert: Consistently validate inputs and outputs for embedded NULs

From: Philip Withnall <pwithnall src gnome org>
To: commits-list gnome org
Cc:
Subject: [glib] gconvert: Consistently validate inputs and outputs for embedded NULs
Date: Fri, 19 Jan 2018 12:11:06 +0000 (UTC)
commit f35a6a70313bae374141f4297a2d8995c7935652
Author: Mikhail Zabaluev <mikhail zabaluev gmail com>
Date:   Wed Jan 17 09:25:23 2018 +0200

    gconvert: Consistently validate inputs and outputs for embedded NULs
    
    String inputs to convenience conversion functions g_locale_from_utf8(),
    g_filename_from_utf8(), and g_filename_to_utf8(), are annotated for the
    bindings as NUL-terminated strings of (type utf8) or (type filename).
    There is also a len parameter that allows converting part of the string,
    but it is exposed to the bindings as a value independent from the string
    buffer. Absent any more sophisticated ways to annotate, the way to
    provide a safeguard against len argument values longer than the
    string length is to check that no nul is encountered within the first
    len bytes of the string. strdup_len() includes this check as part of
    UTF-8 validation, but g_convert() permits embedded nuls.
    
    For g_filename_from_utf8(), also check the output to prevent embedded NUL
    bytes. It's not safe to allow embedded NULs in a string that is going
    to be used as (type filename), and no known bytestring encoding for
    file names allows them.
    
    https://bugzilla.gnome.org/show_bug.cgi?id=792516

 glib/gconvert.c      |   99 ++++++++++++++++++++++++---------
 glib/tests/convert.c |  152 +++++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 204 insertions(+), 47 deletions(-)
---
diff --git a/glib/gconvert.c b/glib/gconvert.c
index 586b53a..cef72ca 100644
--- a/glib/gconvert.c
+++ b/glib/gconvert.c
@@ -866,28 +866,67 @@ strdup_len (const gchar *string,
   return g_strndup (string, real_len);
 }
 
+typedef enum
+{
+  CONVERT_CHECK_NO_NULS_IN_INPUT  = 1 << 0,
+  CONVERT_CHECK_NO_NULS_IN_OUTPUT = 1 << 1
+} ConvertCheckFlags;
+
+/*
+ * Convert from @string in the encoding identified by @from_codeset,
+ * returning a string in the encoding identifed by @to_codeset.
+ * @len can be negative if @string is nul-terminated, or a non-negative
+ * value in bytes. Flags defined in #ConvertCheckFlags can be set in @flags
+ * to check the input, the output, or both, for embedded nul bytes.
+ * On success, @bytes_read, if provided, will be set to the number of bytes
+ * in @string up to @len or the terminating nul byte, and @bytes_written, if
+ * provided, will be set to the number of output bytes written into the
+ * returned buffer, excluding the terminating nul sequence.
+ * On error, @bytes_read will be set to the byte offset after the last valid
+ * sequence in @string, and @bytes_written will be set to 0.
+ */
 static gchar *
-convert_to_utf8 (const gchar *opsysstring,
-                 gssize       len,
-                 const gchar *charset,
-                 gsize       *bytes_read,
-                 gsize       *bytes_written,
-                 GError     **error)
+convert_checked (const gchar      *string,
+                 gssize            len,
+                 const gchar      *to_codeset,
+                 const gchar      *from_codeset,
+                 ConvertCheckFlags flags,
+                 gsize            *bytes_read,
+                 gsize            *bytes_written,
+                 GError          **error)
 {
-  gchar *utf8;
+  gchar *out;
   gsize outbytes;
 
-  utf8 = g_convert (opsysstring, len, "UTF-8", charset,
-                    bytes_read, &outbytes, error);
-  if (utf8 == NULL)
+  if ((flags & CONVERT_CHECK_NO_NULS_IN_INPUT) && len > 0)
+    {
+      const gchar *early_nul = memchr (string, '\0', len);
+      if (early_nul != NULL)
+        {
+          if (bytes_read)
+            *bytes_read = early_nul - string;
+          if (bytes_written)
+            *bytes_written = 0;
+
+          g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
+                               _("Embedded NUL byte in conversion input"));
+          return NULL;
+        }
+    }
+
+  out = g_convert (string, len, to_codeset, from_codeset,
+                   bytes_read, &outbytes, error);
+  if (out == NULL)
     {
       if (bytes_written)
         *bytes_written = 0;
       return NULL;
     }
-  if (memchr (utf8, '\0', outbytes) != NULL)
+
+  if ((flags & CONVERT_CHECK_NO_NULS_IN_OUTPUT)
+      && memchr (out, '\0', outbytes) != NULL)
     {
-      g_free (utf8);
+      g_free (out);
       if (bytes_written)
         *bytes_written = 0;
       g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_EMBEDDED_NUL,
@@ -897,7 +936,7 @@ convert_to_utf8 (const gchar *opsysstring,
 
   if (bytes_written)
     *bytes_written = outbytes;
-  return utf8;
+  return out;
 }
 
 /**
@@ -948,7 +987,8 @@ g_locale_to_utf8 (const gchar  *opsysstring,
   if (g_get_charset (&charset))
     return strdup_len (opsysstring, len, bytes_read, bytes_written, error);
   else
-    return convert_to_utf8 (opsysstring, len, charset,
+    return convert_checked (opsysstring, len, "UTF-8", charset,
+                            CONVERT_CHECK_NO_NULS_IN_OUTPUT,
                             bytes_read, bytes_written, error);
 }
 
@@ -975,8 +1015,8 @@ g_locale_to_utf8 (const gchar  *opsysstring,
  * system) in the [current locale][setlocale]. On Windows this means
  * the system codepage.
  *
- * The input string should not contain nul characters even if the @len
- * argument is positive. A nul character found inside the string may result
+ * The input string shall not contain nul characters even if the @len
+ * argument is positive. A nul character found inside the string will result
  * in error %G_CONVERT_ERROR_ILLEGAL_SEQUENCE. Use g_convert() to convert
  * input that may contain embedded nul characters.
  *
@@ -995,8 +1035,9 @@ g_locale_from_utf8 (const gchar *utf8string,
   if (g_get_charset (&charset))
     return strdup_len (utf8string, len, bytes_read, bytes_written, error);
   else
-    return g_convert (utf8string, len,
-                     charset, "UTF-8", bytes_read, bytes_written, error);
+    return convert_checked (utf8string, len, charset, "UTF-8",
+                            CONVERT_CHECK_NO_NULS_IN_INPUT,
+                            bytes_read, bytes_written, error);
 }
 
 #ifndef G_PLATFORM_WIN32
@@ -1184,12 +1225,12 @@ get_filename_charset (const gchar **filename_charset)
  * for filenames; on other platforms, this function indirectly depends on 
  * the [current locale][setlocale].
  *
+ * The input string shall not contain nul characters even if the @len
+ * argument is positive. A nul character found inside the string will result
+ * in error %G_CONVERT_ERROR_ILLEGAL_SEQUENCE.
  * If the source encoding is not UTF-8 and the conversion output contains a
  * nul character, the error %G_CONVERT_ERROR_EMBEDDED_NUL is set and the
- * function returns %NULL.
- * If the source encoding is UTF-8, an embedded nul character is treated with
- * the %G_CONVERT_ERROR_ILLEGAL_SEQUENCE error for backward compatibility with
- * earlier versions of this library. Use g_convert() to produce output that
+ * function returns %NULL. Use g_convert() to produce output that
  * may contain embedded nul characters.
  * 
  * Returns: The converted string, or %NULL on an error.
@@ -1208,7 +1249,9 @@ g_filename_to_utf8 (const gchar *opsysstring,
   if (get_filename_charset (&charset))
     return strdup_len (opsysstring, len, bytes_read, bytes_written, error);
   else
-    return convert_to_utf8 (opsysstring, len, charset,
+    return convert_checked (opsysstring, len, "UTF-8", charset,
+                            CONVERT_CHECK_NO_NULS_IN_INPUT |
+                            CONVERT_CHECK_NO_NULS_IN_OUTPUT,
                             bytes_read, bytes_written, error);
 }
 
@@ -1235,8 +1278,8 @@ g_filename_to_utf8 (const gchar *opsysstring,
  * on other platforms, this function indirectly depends on the 
  * [current locale][setlocale].
  *
- * The input string should not contain nul characters even if the @len
- * argument is positive. A nul character found inside the string may result
+ * The input string shall not contain nul characters even if the @len
+ * argument is positive. A nul character found inside the string will result
  * in error %G_CONVERT_ERROR_ILLEGAL_SEQUENCE. Note that nul bytes are
  * prohibited in all filename encodings that GLib is known to work with.
  *
@@ -1255,8 +1298,10 @@ g_filename_from_utf8 (const gchar *utf8string,
   if (get_filename_charset (&charset))
     return strdup_len (utf8string, len, bytes_read, bytes_written, error);
   else
-    return g_convert (utf8string, len,
-                     charset, "UTF-8", bytes_read, bytes_written, error);
+    return convert_checked (utf8string, len, charset, "UTF-8",
+                            CONVERT_CHECK_NO_NULS_IN_INPUT |
+                            CONVERT_CHECK_NO_NULS_IN_OUTPUT,
+                            bytes_read, bytes_written, error);
 }
 
 /* Test of haystack has the needle prefix, comparing case
diff --git a/glib/tests/convert.c b/glib/tests/convert.c
index fada356..695d7ce 100644
--- a/glib/tests/convert.c
+++ b/glib/tests/convert.c
@@ -685,11 +685,11 @@ test_filename_display (void)
 }
 
 static void
-test_locale_embedded_nul (void)
+test_locale_to_utf8_embedded_nul (void)
 {
-  g_test_trap_subprocess ("/conversion/locale-embedded-nul/subprocess/utf8", 0, 0);
+  g_test_trap_subprocess ("/conversion/locale-to-utf8/embedded-nul/subprocess/utf8", 0, 0);
   g_test_trap_assert_passed ();
-  g_test_trap_subprocess ("/conversion/locale-embedded-nul/subprocess/iconv", 0, 0);
+  g_test_trap_subprocess ("/conversion/locale-to-utf8/embedded-nul/subprocess/iconv", 0, 0);
   g_test_trap_assert_passed ();
 }
 
@@ -697,7 +697,7 @@ test_locale_embedded_nul (void)
  * result in an error.
  */
 static void
-test_locale_embedded_nul_utf8 (void)
+test_locale_to_utf8_embedded_nul_utf8 (void)
 {
   gchar *res;
   gsize bytes_read;
@@ -719,7 +719,7 @@ test_locale_embedded_nul_utf8 (void)
  * when converted from non-UTF8 input, result in an error.
  */
 static void
-test_locale_embedded_nul_iconv (void)
+test_locale_to_utf8_embedded_nul_iconv (void)
 {
   gchar *res;
   GError *error = NULL;
@@ -736,11 +736,64 @@ test_locale_embedded_nul_iconv (void)
 }
 
 static void
-test_filename_embedded_nul (void)
+test_locale_from_utf8_embedded_nul (void)
 {
-  g_test_trap_subprocess ("/conversion/filename-embedded-nul/subprocess/utf8", 0, 0);
+  g_test_trap_subprocess ("/conversion/locale-from-utf8/embedded-nul/subprocess/utf8", 0, 0);
   g_test_trap_assert_passed ();
-  g_test_trap_subprocess ("/conversion/filename-embedded-nul/subprocess/iconv", 0, 0);
+  g_test_trap_subprocess ("/conversion/locale-from-utf8/embedded-nul/subprocess/iconv", 0, 0);
+  g_test_trap_assert_passed ();
+}
+
+/* Test that embedded nul characters in input to g_locale_from_utf8(),
+ * when converting (copying) to UTF-8 output, result in an error.
+ */
+static void
+test_locale_from_utf8_embedded_nul_utf8 (void)
+{
+  gchar *res;
+  gsize bytes_read;
+  GError *error = NULL;
+
+  setlocale (LC_ALL, "");
+  g_setenv ("CHARSET", "UTF-8", TRUE);
+  g_assert_true (g_get_charset (NULL));
+
+  res = g_locale_from_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
+
+  g_assert_null (res);
+  g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
+  g_assert_cmpuint (bytes_read, ==, 2);
+  g_error_free (error);
+}
+
+/* Test that embedded nul characters in input to g_locale_from_utf8(),
+ * when converting to non-UTF-8 output, result in an error.
+ */
+static void
+test_locale_from_utf8_embedded_nul_iconv (void)
+{
+  gchar *res;
+  gsize bytes_read;
+  GError *error = NULL;
+
+  setlocale (LC_ALL, "C");
+  g_setenv ("CHARSET", "US-ASCII", TRUE);
+  g_assert_false (g_get_charset (NULL));
+
+  res = g_locale_from_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
+
+  g_assert_null (res);
+  g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
+  g_assert_cmpuint (bytes_read, ==, 2);
+  g_error_free (error);
+}
+
+static void
+test_filename_to_utf8_embedded_nul (void)
+{
+  g_test_trap_subprocess ("/conversion/filename-to-utf8/embedded-nul/subprocess/utf8", 0, 0);
+  g_test_trap_assert_passed ();
+  g_test_trap_subprocess ("/conversion/filename-to-utf8/embedded-nul/subprocess/iconv", 0, 0);
   g_test_trap_assert_passed ();
 }
 
@@ -748,7 +801,7 @@ test_filename_embedded_nul (void)
  * result in an error.
  */
 static void
-test_filename_embedded_nul_utf8 (void)
+test_filename_to_utf8_embedded_nul_utf8 (void)
 {
   gchar *res;
   gsize bytes_read;
@@ -765,22 +818,75 @@ test_filename_embedded_nul_utf8 (void)
   g_error_free (error);
 }
 
-/* Test that embedded nul characters in output of g_filename_to_utf8(),
- * when converted from non-UTF8 input, result in an error.
+/* Test that embedded nul characters in non-UTF-8 input of g_filename_to_utf8()
+ * result in an error.
  */
 static void
-test_filename_embedded_nul_iconv (void)
+test_filename_to_utf8_embedded_nul_iconv (void)
 {
   gchar *res;
+  gsize bytes_read;
   GError *error = NULL;
 
   g_setenv ("G_FILENAME_ENCODING", "US-ASCII", TRUE);
   g_assert_false (g_get_filename_charsets (NULL));
 
-  res = g_filename_to_utf8 ("ab\0c", 4, NULL, NULL, &error);
+  res = g_filename_to_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
 
   g_assert_null (res);
-  g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_EMBEDDED_NUL);
+  g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
+  g_assert_cmpuint (bytes_read, ==, 2);
+  g_error_free (error);
+}
+
+static void
+test_filename_from_utf8_embedded_nul (void)
+{
+  g_test_trap_subprocess ("/conversion/filename-from-utf8/embedded-nul/subprocess/utf8", 0, 0);
+  g_test_trap_assert_passed ();
+  g_test_trap_subprocess ("/conversion/filename-from-utf8/embedded-nul/subprocess/iconv", 0, 0);
+  g_test_trap_assert_passed ();
+}
+
+/* Test that embedded nul characters in input to g_filename_from_utf8(),
+ * when converting (copying) to UTF-8 output, result in an error.
+ */
+static void
+test_filename_from_utf8_embedded_nul_utf8 (void)
+{
+  gchar *res;
+  gsize bytes_read;
+  GError *error = NULL;
+
+  g_setenv ("G_FILENAME_ENCODING", "UTF-8", TRUE);
+  g_assert_true (g_get_filename_charsets (NULL));
+
+  res = g_filename_from_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
+
+  g_assert_null (res);
+  g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
+  g_assert_cmpuint (bytes_read, ==, 2);
+  g_error_free (error);
+}
+
+/* Test that embedded nul characters in input to g_filename_from_utf8(),
+ * when converting to non-UTF-8 output, result in an error.
+ */
+static void
+test_filename_from_utf8_embedded_nul_iconv (void)
+{
+  gchar *res;
+  gsize bytes_read;
+  GError *error = NULL;
+
+  g_setenv ("G_FILENAME_ENCODING", "US-ASCII", TRUE);
+  g_assert_false (g_get_filename_charsets (NULL));
+
+  res = g_filename_from_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
+
+  g_assert_null (res);
+  g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
+  g_assert_cmpuint (bytes_read, ==, 2);
   g_error_free (error);
 }
 
@@ -813,12 +919,18 @@ main (int argc, char *argv[])
   g_test_add_func ("/conversion/unicode", test_unicode_conversions);
   g_test_add_func ("/conversion/filename-utf8", test_filename_utf8);
   g_test_add_func ("/conversion/filename-display", test_filename_display);
-  g_test_add_func ("/conversion/locale-embedded-nul", test_locale_embedded_nul);
-  g_test_add_func ("/conversion/locale-embedded-nul/subprocess/utf8", test_locale_embedded_nul_utf8);
-  g_test_add_func ("/conversion/locale-embedded-nul/subprocess/iconv", test_locale_embedded_nul_iconv);
-  g_test_add_func ("/conversion/filename-embedded-nul", test_filename_embedded_nul);
-  g_test_add_func ("/conversion/filename-embedded-nul/subprocess/utf8", test_filename_embedded_nul_utf8);
-  g_test_add_func ("/conversion/filename-embedded-nul/subprocess/iconv", test_filename_embedded_nul_iconv);
+  g_test_add_func ("/conversion/locale-to-utf8/embedded-nul", test_locale_to_utf8_embedded_nul);
+  g_test_add_func ("/conversion/locale-to-utf8/embedded-nul/subprocess/utf8", 
test_locale_to_utf8_embedded_nul_utf8);
+  g_test_add_func ("/conversion/locale-to-utf8/embedded-nul/subprocess/iconv", 
test_locale_to_utf8_embedded_nul_iconv);
+  g_test_add_func ("/conversion/locale-from-utf8/embedded-nul", test_locale_from_utf8_embedded_nul);
+  g_test_add_func ("/conversion/locale-from-utf8/embedded-nul/subprocess/utf8", 
test_locale_from_utf8_embedded_nul_utf8);
+  g_test_add_func ("/conversion/locale-from-utf8/embedded-nul/subprocess/iconv", 
test_locale_from_utf8_embedded_nul_iconv);
+  g_test_add_func ("/conversion/filename-to-utf8/embedded-nul", test_filename_to_utf8_embedded_nul);
+  g_test_add_func ("/conversion/filename-to-utf8/embedded-nul/subprocess/utf8", 
test_filename_to_utf8_embedded_nul_utf8);
+  g_test_add_func ("/conversion/filename-to-utf8/embedded-nul/subprocess/iconv", 
test_filename_to_utf8_embedded_nul_iconv);
+  g_test_add_func ("/conversion/filename-from-utf8/embedded-nul", test_filename_from_utf8_embedded_nul);
+  g_test_add_func ("/conversion/filename-from-utf8/embedded-nul/subprocess/utf8", 
test_filename_from_utf8_embedded_nul_utf8);
+  g_test_add_func ("/conversion/filename-from-utf8/embedded-nul/subprocess/iconv", 
test_filename_from_utf8_embedded_nul_iconv);
 
   return g_test_run ();
 }
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]