[gjs] Validate UTF-8 strings in gjs_string_to_utf8()
- From: Owen Taylor <otaylor src gnome org>
- To: svn-commits-list gnome org
- Subject: [gjs] Validate UTF-8 strings in gjs_string_to_utf8()
- Date: Tue, 5 May 2009 16:25:03 -0400 (EDT)
commit 62f57cb01cd2fdc8a8c1df1e90098ce7251e0aa9
Author: Owen W. Taylor <otaylor fishsoup net>
Date: Thu Apr 30 17:45:58 2009 -0400
Validate UTF-8 strings in gjs_string_to_utf8()
Make sure that when converting a Javascript string to UTF-8 it is valid
in GLib terms and doesn't contain any embedded NULs.
http://bugzilla.gnome.org/show_bug.cgi?id=580947
---
gjs/jsapi-util-string.c | 27 ++++++++++++++++++++++++++-
test/js/testLocale.js | 13 +++++++++++++
2 files changed, 39 insertions(+), 1 deletions(-)
diff --git a/gjs/jsapi-util-string.c b/gjs/jsapi-util-string.c
index cea82b7..bad5f32 100644
--- a/gjs/jsapi-util-string.c
+++ b/gjs/jsapi-util-string.c
@@ -23,6 +23,8 @@
#include <config.h>
+#include <string.h>
+
#include "jsapi-util.h"
JSBool
@@ -33,6 +35,8 @@ gjs_string_to_utf8(JSContext *context,
jschar *s;
size_t s_length;
char *utf8_string;
+ long read_items;
+ long utf8_length;
GError *error;
if (!JSVAL_IS_STRING(string_val)) {
@@ -47,7 +51,7 @@ gjs_string_to_utf8(JSContext *context,
error = NULL;
utf8_string = g_utf16_to_utf8(s,
(glong)s_length,
- NULL, NULL,
+ &read_items, &utf8_length,
&error);
if (!utf8_string) {
@@ -59,6 +63,27 @@ gjs_string_to_utf8(JSContext *context,
return JS_FALSE;
}
+ if ((size_t)read_items != s_length) {
+ gjs_throw(context, "JS string contains embedded NULs");
+ g_free(utf8_string);
+ return JS_FALSE;
+ }
+
+ /* Our assumption is that the string is being converted to UTF-8
+ * in order to use with GLib-style APIs; Javascript has a looser
+ * sense of validate-Unicode than GLib, so validate here to
+ * prevent problems later on. Given the success of the above,
+ * the only thing that could really be wrong here is including
+ * non-characters like a byte-reversed BOM. If the validation
+ * ever becomes a bottleneck, we could do an inline-special
+ * case of all-ASCII.
+ */
+ if (!g_utf8_validate (utf8_string, utf8_length, NULL)) {
+ gjs_throw(context, "JS string contains invalid Unicode characters");
+ g_free(utf8_string);
+ return JS_FALSE;
+ }
+
*utf8_string_p = utf8_string;
return JS_TRUE;
}
diff --git a/test/js/testLocale.js b/test/js/testLocale.js
index 1984efe..214b5e6 100644
--- a/test/js/testLocale.js
+++ b/test/js/testLocale.js
@@ -41,4 +41,17 @@ function testToLocaleCompare() {
assertRaises(function() { "a".localeCompare("\ud800"); });
}
+function testInvalidStrings() {
+ // Not really related to locale handling - here we are testing
+ // gjs_string_to_utf8() to properly catch things we'll choke
+ // on later.
+
+ // Unpaired surrogate
+ assertRaises(function() { "\ud800".toLocaleLowerCase(); });
+ // Embedded NUL
+ assertRaises(function() { "\u0000".toLocaleLowerCase(); });
+ // Byte-reversed BOM (an example of a non-character)
+ assertRaises(function() { "\ufffe".toLocaleLowerCase(); });
+}
+
gjstestRun();
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]