[gjs] Parse input as UTF-8, not ISO-8859-1



commit 4030b1e4a5ddb351b52f34690505d962372d447c
Author: Colin Walters <walters verbum org>
Date:   Sat Jun 4 11:25:10 2011 -0400

    Parse input as UTF-8, not ISO-8859-1
    
    Implement this by assuming input is UTF-8, convert it to UTF-16, and
    pass it to the "UC" variants of the JSAPI.
    
    https://bugzilla.gnome.org/show_bug.cgi?id=581028

 Makefile-test.am       |    1 +
 gjs/context.c          |   21 ++++++++++++++-------
 modules/console.c      |   17 +++++++++++++++--
 test/js/testUnicode.js |   12 ++++++++++++
 4 files changed, 42 insertions(+), 9 deletions(-)
---
diff --git a/Makefile-test.am b/Makefile-test.am
index dcb814a..60ff749 100644
--- a/Makefile-test.am
+++ b/Makefile-test.am
@@ -187,6 +187,7 @@ EXTRA_DIST +=					\
 	test/js/testMainloop.js			\
 	test/js/testSignals.js			\
 	test/js/testTweener.js			\
+	test/js/testUnicode.js			\
 	test/run-with-dbus			\
 	test/test-bus.conf
 
diff --git a/gjs/context.c b/gjs/context.c
index 758a25a..75b00e8 100644
--- a/gjs/context.c
+++ b/gjs/context.c
@@ -901,6 +901,8 @@ gjs_context_eval(GjsContext *js_context,
     int line_number;
     jsval retval;
     gboolean success;
+    gunichar2 *u16_script;
+    glong u16_script_len;
 
     g_object_ref(G_OBJECT(js_context));
 
@@ -926,6 +928,10 @@ gjs_context_eval(GjsContext *js_context,
         }
     }
 
+    if ((u16_script = g_utf8_to_utf16 (script, script_len, NULL, &u16_script_len, error)) == NULL)
+        return FALSE;
+    g_assert (u16_script_len < G_MAXUINT);
+
     /* log and clear exception if it's set (should not be, normally...) */
     if (gjs_log_exception(js_context->context,
                              NULL)) {
@@ -940,13 +946,13 @@ gjs_context_eval(GjsContext *js_context,
     JS_BeginRequest(js_context->context);
 
     retval = JSVAL_VOID;
-    if (!JS_EvaluateScript(js_context->context,
-                           js_context->global,
-                           script,
-                           script_len >= 0 ? script_len : (gssize) strlen(script),
-                           filename,
-                           line_number,
-                           &retval)) {
+    if (!JS_EvaluateUCScript(js_context->context,
+                             js_context->global,
+                             (const jschar*)u16_script,
+                             (guint) u16_script_len,
+                             filename,
+                             line_number,
+                             &retval)) {
         char *message;
 
         gjs_debug(GJS_DEBUG_CONTEXT,
@@ -973,6 +979,7 @@ gjs_context_eval(GjsContext *js_context,
 
         success = FALSE;
     }
+    g_free (u16_script);
 
     gjs_debug(GJS_DEBUG_CONTEXT,
               "Script evaluation succeeded");
diff --git a/modules/console.c b/modules/console.c
index d87b4a7..403c1dd 100644
--- a/modules/console.c
+++ b/modules/console.c
@@ -170,8 +170,11 @@ gjs_console_interact(JSContext *context,
     JSString *str;
     GString *buffer = NULL;
     char *temp_buf = NULL;
+    gunichar2 *u16_buffer;
+    glong u16_buffer_len;
     int lineno;
     int startline;
+    GError *error = NULL;
     FILE *file = stdin;
 
     JS_SetErrorReporter(context, gjs_console_error_reporter);
@@ -199,11 +202,21 @@ gjs_console_interact(JSContext *context,
 #ifdef HAVE_JS_DECODEUTF8
         } while (!JS_BufferIsCompilableUnit(context, JS_TRUE, object, buffer->str, buffer->len));
 #else
+        /* Note in this case, we are trying to parse the buffer as
+         * ISO-8859-1 which is broken for non-ASCII.
+         */
         } while (!JS_BufferIsCompilableUnit(context, object, buffer->str, buffer->len));
 #endif
 
-        script = JS_CompileScript(context, object, buffer->str, buffer->len, "typein",
-                                  startline);
+        if ((u16_buffer = g_utf8_to_utf16 (buffer->str, buffer->len, NULL, &u16_buffer_len, &error)) == NULL) {
+            g_printerr ("%s\n", error->message);
+            g_clear_error (&error);
+            continue;
+        }
+
+        script = JS_CompileUCScript(context, object, u16_buffer, u16_buffer_len, "typein",
+                                    startline);
+        g_free (u16_buffer);
 
         if (script)
             JS_ExecuteScript(context, object, script, &result);
diff --git a/test/js/testUnicode.js b/test/js/testUnicode.js
new file mode 100644
index 0000000..a9132ed
--- /dev/null
+++ b/test/js/testUnicode.js
@@ -0,0 +1,12 @@
+// application/javascript;version=1.8
+
+function testUnicode() {
+    assertEquals(6, 'Ð?огода'.length);
+    assertEquals(1055, 'Ð?огода'.charCodeAt(0));
+    assertEquals(1086, 'Ð?огода'.charCodeAt(3));
+    assertEquals("\u65e5", "���".charAt(0));
+    assertEquals("\u672c", "���".charAt(1));
+    assertEquals("\u8a9e", "���".charAt(2));
+}
+
+gjstestRun();



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]