[glib] GVariant: strings are now utf8



commit 9eeab5868f0d8633f63e40f61263cd731ad99d0e
Author: Ryan Lortie <desrt desrt ca>
Date:   Sun Apr 25 13:36:36 2010 -0500

    GVariant: strings are now utf8
    
     - modify serialiser validation function to enforce utf8 encoding
     - add documentation to g_variant_new_string(), g_variant_get_string(),
       g_variant_dup_string()
     - add 2 new test cases to check that it works

 glib/gvariant-serialiser.c |   10 +++-------
 glib/gvariant.c            |   12 +++++++++---
 glib/tests/gvariant.c      |   33 +++++++++++++++++++++++++++++++++
 3 files changed, 45 insertions(+), 10 deletions(-)
---
diff --git a/glib/gvariant-serialiser.c b/glib/gvariant-serialiser.c
index b365a49..cf23812 100644
--- a/glib/gvariant-serialiser.c
+++ b/glib/gvariant-serialiser.c
@@ -1561,15 +1561,11 @@ gboolean
 g_variant_serialiser_is_string (gconstpointer data,
                                 gsize         size)
 {
-  const gchar *string = data;
-
-  if (size == 0)
-    return FALSE;
+  const gchar *end;
 
-  if (string[size - 1] != '\0')
-    return FALSE;
+  g_utf8_validate (data, size, &end);
 
-  return strlen (string) == size - 1;
+  return data == end - (size - 1);
 }
 
 /* < private >
diff --git a/glib/gvariant.c b/glib/gvariant.c
index c4ea71a..f1f118c 100644
--- a/glib/gvariant.c
+++ b/glib/gvariant.c
@@ -957,11 +957,13 @@ g_variant_get_fixed_array (GVariant *value,
 /* String type constructor/getters/validation {{{1 */
 /**
  * g_variant_new_string:
- * @string: a normal C nul-terminated string
+ * @string: a normal utf8 nul-terminated string
  * @returns: a new string #GVariant instance
  *
  * Creates a string #GVariant with the contents of @string.
  *
+ * @string must be valid utf8.
+ *
  * Since: 2.24
  **/
 GVariant *
@@ -1063,12 +1065,14 @@ g_variant_is_signature (const gchar *string)
  * g_variant_get_string:
  * @value: a string #GVariant instance
  * @length: a pointer to a #gsize, to store the length
- * @returns: the constant string
+ * @returns: the constant string, utf8 encoded
  *
  * Returns the string value of a #GVariant instance with a string
  * type.  This includes the types %G_VARIANT_TYPE_STRING,
  * %G_VARIANT_TYPE_OBJECT_PATH and %G_VARIANT_TYPE_SIGNATURE.
  *
+ * The string will always be utf8 encoded.
+ *
  * If @length is non-%NULL then the length of the string (in bytes) is
  * returned there.  For trusted values, this information is already
  * known.  For untrusted values, a strlen() will be performed.
@@ -1139,11 +1143,13 @@ g_variant_get_string (GVariant *value,
  * g_variant_dup_string:
  * @value: a string #GVariant instance
  * @length: a pointer to a #gsize, to store the length
- * @returns: a newly allocated string
+ * @returns: a newly allocated string, utf8 encoded
  *
  * Similar to g_variant_get_string() except that instead of returning
  * a constant string, the string is duplicated.
  *
+ * The string will always be utf8 encoded.
+ *
  * The return value must be freed using g_free().
  *
  * Since: 2.24
diff --git a/glib/tests/gvariant.c b/glib/tests/gvariant.c
index b37f5a7..3dc2ac0 100644
--- a/glib/tests/gvariant.c
+++ b/glib/tests/gvariant.c
@@ -1766,6 +1766,7 @@ test_strings (void)
 #define is_sig            is_string | 4
     { is_sig,       1, "" },
     { is_nval,      0, NULL },
+    { is_nval,     13, "hello\xffworld!" },
     { is_string,   13, "hello world!" },
     { is_nval,     13, "hello world\0" },
     { is_nval,     13, "hello\0world!" },
@@ -2665,6 +2666,37 @@ test_container (void)
 }
 
 static void
+test_utf8 (void)
+{
+  const gchar invalid[] = "hello\xffworld";
+  GVariant *value;
+
+  /* ensure that the test data is not valid utf8... */
+  g_assert (!g_utf8_validate (invalid, -1, NULL));
+
+  /* load the data untrusted */
+  value = g_variant_new_from_data (G_VARIANT_TYPE_STRING,
+                                   invalid, sizeof invalid,
+                                   FALSE, NULL, NULL);
+
+  /* ensure that the problem is caught and we get valid UTF-8 */
+  g_assert (g_utf8_validate (g_variant_get_string (value, NULL), -1, NULL));
+  g_variant_unref (value);
+
+
+  /* now load it trusted */
+  value = g_variant_new_from_data (G_VARIANT_TYPE_STRING,
+                                   invalid, sizeof invalid,
+                                   TRUE, NULL, NULL);
+
+  /* ensure we get the invalid data (ie: make sure that time wasn't
+   * wasted on validating data that was marked as trusted)
+   */
+  g_assert (g_variant_get_string (value, NULL) == invalid);
+  g_variant_unref (value);
+}
+
+static void
 test_containers (void)
 {
   gint i;
@@ -3725,6 +3757,7 @@ main (int argc, char **argv)
       g_free (testname);
     }
 
+  g_test_add_func ("/gvariant/utf8", test_utf8);
   g_test_add_func ("/gvariant/containers", test_containers);
   g_test_add_func ("/gvariant/format-strings", test_format_strings);
   g_test_add_func ("/gvariant/invalid-varargs", test_invalid_varargs);



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]