[glib] gconvert: Consistently validate inputs and outputs for embedded NULs
- From: Philip Withnall <pwithnall src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [glib] gconvert: Consistently validate inputs and outputs for embedded NULs
- Date: Fri, 19 Jan 2018 12:11:06 +0000 (UTC)
commit f35a6a70313bae374141f4297a2d8995c7935652
Author: Mikhail Zabaluev <mikhail zabaluev gmail com>
Date: Wed Jan 17 09:25:23 2018 +0200
gconvert: Consistently validate inputs and outputs for embedded NULs
String inputs to convenience conversion functions g_locale_from_utf8(),
g_filename_from_utf8(), and g_filename_to_utf8(), are annotated for the
bindings as NUL-terminated strings of (type utf8) or (type filename).
There is also a len parameter that allows converting part of the string,
but it is exposed to the bindings as a value independent from the string
buffer. Absent any more sophisticated ways to annotate, the way to
provide a safeguard against len argument values longer than the
string length is to check that no nul is encountered within the first
len bytes of the string. strdup_len() includes this check as part of
UTF-8 validation, but g_convert() permits embedded nuls.
For g_filename_from_utf8(), also check the output to prevent embedded NUL
bytes. It's not safe to allow embedded NULs in a string that is going
to be used as (type filename), and no known bytestring encoding for
file names allows them.
https://bugzilla.gnome.org/show_bug.cgi?id=792516
glib/gconvert.c | 99 ++++++++++++++++++++++++---------
glib/tests/convert.c | 152 +++++++++++++++++++++++++++++++++++++++++++-------
2 files changed, 204 insertions(+), 47 deletions(-)
---
diff --git a/glib/gconvert.c b/glib/gconvert.c
index 586b53a..cef72ca 100644
--- a/glib/gconvert.c
+++ b/glib/gconvert.c
@@ -866,28 +866,67 @@ strdup_len (const gchar *string,
return g_strndup (string, real_len);
}
+typedef enum
+{
+ CONVERT_CHECK_NO_NULS_IN_INPUT = 1 << 0,
+ CONVERT_CHECK_NO_NULS_IN_OUTPUT = 1 << 1
+} ConvertCheckFlags;
+
+/*
+ * Convert from @string in the encoding identified by @from_codeset,
+ * returning a string in the encoding identifed by @to_codeset.
+ * @len can be negative if @string is nul-terminated, or a non-negative
+ * value in bytes. Flags defined in #ConvertCheckFlags can be set in @flags
+ * to check the input, the output, or both, for embedded nul bytes.
+ * On success, @bytes_read, if provided, will be set to the number of bytes
+ * in @string up to @len or the terminating nul byte, and @bytes_written, if
+ * provided, will be set to the number of output bytes written into the
+ * returned buffer, excluding the terminating nul sequence.
+ * On error, @bytes_read will be set to the byte offset after the last valid
+ * sequence in @string, and @bytes_written will be set to 0.
+ */
static gchar *
-convert_to_utf8 (const gchar *opsysstring,
- gssize len,
- const gchar *charset,
- gsize *bytes_read,
- gsize *bytes_written,
- GError **error)
+convert_checked (const gchar *string,
+ gssize len,
+ const gchar *to_codeset,
+ const gchar *from_codeset,
+ ConvertCheckFlags flags,
+ gsize *bytes_read,
+ gsize *bytes_written,
+ GError **error)
{
- gchar *utf8;
+ gchar *out;
gsize outbytes;
- utf8 = g_convert (opsysstring, len, "UTF-8", charset,
- bytes_read, &outbytes, error);
- if (utf8 == NULL)
+ if ((flags & CONVERT_CHECK_NO_NULS_IN_INPUT) && len > 0)
+ {
+ const gchar *early_nul = memchr (string, '\0', len);
+ if (early_nul != NULL)
+ {
+ if (bytes_read)
+ *bytes_read = early_nul - string;
+ if (bytes_written)
+ *bytes_written = 0;
+
+ g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
+ _("Embedded NUL byte in conversion input"));
+ return NULL;
+ }
+ }
+
+ out = g_convert (string, len, to_codeset, from_codeset,
+ bytes_read, &outbytes, error);
+ if (out == NULL)
{
if (bytes_written)
*bytes_written = 0;
return NULL;
}
- if (memchr (utf8, '\0', outbytes) != NULL)
+
+ if ((flags & CONVERT_CHECK_NO_NULS_IN_OUTPUT)
+ && memchr (out, '\0', outbytes) != NULL)
{
- g_free (utf8);
+ g_free (out);
if (bytes_written)
*bytes_written = 0;
g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_EMBEDDED_NUL,
@@ -897,7 +936,7 @@ convert_to_utf8 (const gchar *opsysstring,
if (bytes_written)
*bytes_written = outbytes;
- return utf8;
+ return out;
}
/**
@@ -948,7 +987,8 @@ g_locale_to_utf8 (const gchar *opsysstring,
if (g_get_charset (&charset))
return strdup_len (opsysstring, len, bytes_read, bytes_written, error);
else
- return convert_to_utf8 (opsysstring, len, charset,
+ return convert_checked (opsysstring, len, "UTF-8", charset,
+ CONVERT_CHECK_NO_NULS_IN_OUTPUT,
bytes_read, bytes_written, error);
}
@@ -975,8 +1015,8 @@ g_locale_to_utf8 (const gchar *opsysstring,
* system) in the [current locale][setlocale]. On Windows this means
* the system codepage.
*
- * The input string should not contain nul characters even if the @len
- * argument is positive. A nul character found inside the string may result
+ * The input string shall not contain nul characters even if the @len
+ * argument is positive. A nul character found inside the string will result
* in error %G_CONVERT_ERROR_ILLEGAL_SEQUENCE. Use g_convert() to convert
* input that may contain embedded nul characters.
*
@@ -995,8 +1035,9 @@ g_locale_from_utf8 (const gchar *utf8string,
if (g_get_charset (&charset))
return strdup_len (utf8string, len, bytes_read, bytes_written, error);
else
- return g_convert (utf8string, len,
- charset, "UTF-8", bytes_read, bytes_written, error);
+ return convert_checked (utf8string, len, charset, "UTF-8",
+ CONVERT_CHECK_NO_NULS_IN_INPUT,
+ bytes_read, bytes_written, error);
}
#ifndef G_PLATFORM_WIN32
@@ -1184,12 +1225,12 @@ get_filename_charset (const gchar **filename_charset)
* for filenames; on other platforms, this function indirectly depends on
* the [current locale][setlocale].
*
+ * The input string shall not contain nul characters even if the @len
+ * argument is positive. A nul character found inside the string will result
+ * in error %G_CONVERT_ERROR_ILLEGAL_SEQUENCE.
* If the source encoding is not UTF-8 and the conversion output contains a
* nul character, the error %G_CONVERT_ERROR_EMBEDDED_NUL is set and the
- * function returns %NULL.
- * If the source encoding is UTF-8, an embedded nul character is treated with
- * the %G_CONVERT_ERROR_ILLEGAL_SEQUENCE error for backward compatibility with
- * earlier versions of this library. Use g_convert() to produce output that
+ * function returns %NULL. Use g_convert() to produce output that
* may contain embedded nul characters.
*
* Returns: The converted string, or %NULL on an error.
@@ -1208,7 +1249,9 @@ g_filename_to_utf8 (const gchar *opsysstring,
if (get_filename_charset (&charset))
return strdup_len (opsysstring, len, bytes_read, bytes_written, error);
else
- return convert_to_utf8 (opsysstring, len, charset,
+ return convert_checked (opsysstring, len, "UTF-8", charset,
+ CONVERT_CHECK_NO_NULS_IN_INPUT |
+ CONVERT_CHECK_NO_NULS_IN_OUTPUT,
bytes_read, bytes_written, error);
}
@@ -1235,8 +1278,8 @@ g_filename_to_utf8 (const gchar *opsysstring,
* on other platforms, this function indirectly depends on the
* [current locale][setlocale].
*
- * The input string should not contain nul characters even if the @len
- * argument is positive. A nul character found inside the string may result
+ * The input string shall not contain nul characters even if the @len
+ * argument is positive. A nul character found inside the string will result
* in error %G_CONVERT_ERROR_ILLEGAL_SEQUENCE. Note that nul bytes are
* prohibited in all filename encodings that GLib is known to work with.
*
@@ -1255,8 +1298,10 @@ g_filename_from_utf8 (const gchar *utf8string,
if (get_filename_charset (&charset))
return strdup_len (utf8string, len, bytes_read, bytes_written, error);
else
- return g_convert (utf8string, len,
- charset, "UTF-8", bytes_read, bytes_written, error);
+ return convert_checked (utf8string, len, charset, "UTF-8",
+ CONVERT_CHECK_NO_NULS_IN_INPUT |
+ CONVERT_CHECK_NO_NULS_IN_OUTPUT,
+ bytes_read, bytes_written, error);
}
/* Test of haystack has the needle prefix, comparing case
diff --git a/glib/tests/convert.c b/glib/tests/convert.c
index fada356..695d7ce 100644
--- a/glib/tests/convert.c
+++ b/glib/tests/convert.c
@@ -685,11 +685,11 @@ test_filename_display (void)
}
static void
-test_locale_embedded_nul (void)
+test_locale_to_utf8_embedded_nul (void)
{
- g_test_trap_subprocess ("/conversion/locale-embedded-nul/subprocess/utf8", 0, 0);
+ g_test_trap_subprocess ("/conversion/locale-to-utf8/embedded-nul/subprocess/utf8", 0, 0);
g_test_trap_assert_passed ();
- g_test_trap_subprocess ("/conversion/locale-embedded-nul/subprocess/iconv", 0, 0);
+ g_test_trap_subprocess ("/conversion/locale-to-utf8/embedded-nul/subprocess/iconv", 0, 0);
g_test_trap_assert_passed ();
}
@@ -697,7 +697,7 @@ test_locale_embedded_nul (void)
* result in an error.
*/
static void
-test_locale_embedded_nul_utf8 (void)
+test_locale_to_utf8_embedded_nul_utf8 (void)
{
gchar *res;
gsize bytes_read;
@@ -719,7 +719,7 @@ test_locale_embedded_nul_utf8 (void)
* when converted from non-UTF8 input, result in an error.
*/
static void
-test_locale_embedded_nul_iconv (void)
+test_locale_to_utf8_embedded_nul_iconv (void)
{
gchar *res;
GError *error = NULL;
@@ -736,11 +736,64 @@ test_locale_embedded_nul_iconv (void)
}
static void
-test_filename_embedded_nul (void)
+test_locale_from_utf8_embedded_nul (void)
{
- g_test_trap_subprocess ("/conversion/filename-embedded-nul/subprocess/utf8", 0, 0);
+ g_test_trap_subprocess ("/conversion/locale-from-utf8/embedded-nul/subprocess/utf8", 0, 0);
g_test_trap_assert_passed ();
- g_test_trap_subprocess ("/conversion/filename-embedded-nul/subprocess/iconv", 0, 0);
+ g_test_trap_subprocess ("/conversion/locale-from-utf8/embedded-nul/subprocess/iconv", 0, 0);
+ g_test_trap_assert_passed ();
+}
+
+/* Test that embedded nul characters in input to g_locale_from_utf8(),
+ * when converting (copying) to UTF-8 output, result in an error.
+ */
+static void
+test_locale_from_utf8_embedded_nul_utf8 (void)
+{
+ gchar *res;
+ gsize bytes_read;
+ GError *error = NULL;
+
+ setlocale (LC_ALL, "");
+ g_setenv ("CHARSET", "UTF-8", TRUE);
+ g_assert_true (g_get_charset (NULL));
+
+ res = g_locale_from_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
+
+ g_assert_null (res);
+ g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
+ g_assert_cmpuint (bytes_read, ==, 2);
+ g_error_free (error);
+}
+
+/* Test that embedded nul characters in input to g_locale_from_utf8(),
+ * when converting to non-UTF-8 output, result in an error.
+ */
+static void
+test_locale_from_utf8_embedded_nul_iconv (void)
+{
+ gchar *res;
+ gsize bytes_read;
+ GError *error = NULL;
+
+ setlocale (LC_ALL, "C");
+ g_setenv ("CHARSET", "US-ASCII", TRUE);
+ g_assert_false (g_get_charset (NULL));
+
+ res = g_locale_from_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
+
+ g_assert_null (res);
+ g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
+ g_assert_cmpuint (bytes_read, ==, 2);
+ g_error_free (error);
+}
+
+static void
+test_filename_to_utf8_embedded_nul (void)
+{
+ g_test_trap_subprocess ("/conversion/filename-to-utf8/embedded-nul/subprocess/utf8", 0, 0);
+ g_test_trap_assert_passed ();
+ g_test_trap_subprocess ("/conversion/filename-to-utf8/embedded-nul/subprocess/iconv", 0, 0);
g_test_trap_assert_passed ();
}
@@ -748,7 +801,7 @@ test_filename_embedded_nul (void)
* result in an error.
*/
static void
-test_filename_embedded_nul_utf8 (void)
+test_filename_to_utf8_embedded_nul_utf8 (void)
{
gchar *res;
gsize bytes_read;
@@ -765,22 +818,75 @@ test_filename_embedded_nul_utf8 (void)
g_error_free (error);
}
-/* Test that embedded nul characters in output of g_filename_to_utf8(),
- * when converted from non-UTF8 input, result in an error.
+/* Test that embedded nul characters in non-UTF-8 input of g_filename_to_utf8()
+ * result in an error.
*/
static void
-test_filename_embedded_nul_iconv (void)
+test_filename_to_utf8_embedded_nul_iconv (void)
{
gchar *res;
+ gsize bytes_read;
GError *error = NULL;
g_setenv ("G_FILENAME_ENCODING", "US-ASCII", TRUE);
g_assert_false (g_get_filename_charsets (NULL));
- res = g_filename_to_utf8 ("ab\0c", 4, NULL, NULL, &error);
+ res = g_filename_to_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
g_assert_null (res);
- g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_EMBEDDED_NUL);
+ g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
+ g_assert_cmpuint (bytes_read, ==, 2);
+ g_error_free (error);
+}
+
+static void
+test_filename_from_utf8_embedded_nul (void)
+{
+ g_test_trap_subprocess ("/conversion/filename-from-utf8/embedded-nul/subprocess/utf8", 0, 0);
+ g_test_trap_assert_passed ();
+ g_test_trap_subprocess ("/conversion/filename-from-utf8/embedded-nul/subprocess/iconv", 0, 0);
+ g_test_trap_assert_passed ();
+}
+
+/* Test that embedded nul characters in input to g_filename_from_utf8(),
+ * when converting (copying) to UTF-8 output, result in an error.
+ */
+static void
+test_filename_from_utf8_embedded_nul_utf8 (void)
+{
+ gchar *res;
+ gsize bytes_read;
+ GError *error = NULL;
+
+ g_setenv ("G_FILENAME_ENCODING", "UTF-8", TRUE);
+ g_assert_true (g_get_filename_charsets (NULL));
+
+ res = g_filename_from_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
+
+ g_assert_null (res);
+ g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
+ g_assert_cmpuint (bytes_read, ==, 2);
+ g_error_free (error);
+}
+
+/* Test that embedded nul characters in input to g_filename_from_utf8(),
+ * when converting to non-UTF-8 output, result in an error.
+ */
+static void
+test_filename_from_utf8_embedded_nul_iconv (void)
+{
+ gchar *res;
+ gsize bytes_read;
+ GError *error = NULL;
+
+ g_setenv ("G_FILENAME_ENCODING", "US-ASCII", TRUE);
+ g_assert_false (g_get_filename_charsets (NULL));
+
+ res = g_filename_from_utf8 ("ab\0c", 4, &bytes_read, NULL, &error);
+
+ g_assert_null (res);
+ g_assert_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE);
+ g_assert_cmpuint (bytes_read, ==, 2);
g_error_free (error);
}
@@ -813,12 +919,18 @@ main (int argc, char *argv[])
g_test_add_func ("/conversion/unicode", test_unicode_conversions);
g_test_add_func ("/conversion/filename-utf8", test_filename_utf8);
g_test_add_func ("/conversion/filename-display", test_filename_display);
- g_test_add_func ("/conversion/locale-embedded-nul", test_locale_embedded_nul);
- g_test_add_func ("/conversion/locale-embedded-nul/subprocess/utf8", test_locale_embedded_nul_utf8);
- g_test_add_func ("/conversion/locale-embedded-nul/subprocess/iconv", test_locale_embedded_nul_iconv);
- g_test_add_func ("/conversion/filename-embedded-nul", test_filename_embedded_nul);
- g_test_add_func ("/conversion/filename-embedded-nul/subprocess/utf8", test_filename_embedded_nul_utf8);
- g_test_add_func ("/conversion/filename-embedded-nul/subprocess/iconv", test_filename_embedded_nul_iconv);
+ g_test_add_func ("/conversion/locale-to-utf8/embedded-nul", test_locale_to_utf8_embedded_nul);
+ g_test_add_func ("/conversion/locale-to-utf8/embedded-nul/subprocess/utf8",
test_locale_to_utf8_embedded_nul_utf8);
+ g_test_add_func ("/conversion/locale-to-utf8/embedded-nul/subprocess/iconv",
test_locale_to_utf8_embedded_nul_iconv);
+ g_test_add_func ("/conversion/locale-from-utf8/embedded-nul", test_locale_from_utf8_embedded_nul);
+ g_test_add_func ("/conversion/locale-from-utf8/embedded-nul/subprocess/utf8",
test_locale_from_utf8_embedded_nul_utf8);
+ g_test_add_func ("/conversion/locale-from-utf8/embedded-nul/subprocess/iconv",
test_locale_from_utf8_embedded_nul_iconv);
+ g_test_add_func ("/conversion/filename-to-utf8/embedded-nul", test_filename_to_utf8_embedded_nul);
+ g_test_add_func ("/conversion/filename-to-utf8/embedded-nul/subprocess/utf8",
test_filename_to_utf8_embedded_nul_utf8);
+ g_test_add_func ("/conversion/filename-to-utf8/embedded-nul/subprocess/iconv",
test_filename_to_utf8_embedded_nul_iconv);
+ g_test_add_func ("/conversion/filename-from-utf8/embedded-nul", test_filename_from_utf8_embedded_nul);
+ g_test_add_func ("/conversion/filename-from-utf8/embedded-nul/subprocess/utf8",
test_filename_from_utf8_embedded_nul_utf8);
+ g_test_add_func ("/conversion/filename-from-utf8/embedded-nul/subprocess/iconv",
test_filename_from_utf8_embedded_nul_iconv);
return g_test_run ();
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]