[gjs: 2/3] context: Fix gjs_context_eval() for non-zero-terminated strings



commit 270dc48c25caf65d41b2b7ce5c9f03cac0809169
Author: Philip Chimento <philip chimento gmail com>
Date:   Mon Apr 1 23:37:30 2019 -0700

    context: Fix gjs_context_eval() for non-zero-terminated strings
    
    Calling gjs_context_eval() with a non-zero-terminated string has
    apparently been broken for quite a long time. I guess nobody ever does
    that.
    
    This is a surprisingly complicated fix for a simple-sounding problem.
    The complication is due to the passed-in strlen being ignored in more
    than one place: both in gjs_strip_unix_shebang() and in the code that
    converts UTF-8 to UTF-16.
    
    In addition, gjs_strip_unix_shebang() would access invalid memory if
    given a 1-length string or a non-zero-terminated string.
    
    We fix the UTF-16 conversion code, and replace gjs_strip_unix_shebang()
    with a safer version using C++ strings (which we have anyway after
    converting to UTF-16.) This new function, gjs_unix_shebang_len(),
    returns the offset that must be added to the string's starting position,
    in order to skip the shebang line.
    
    It would be better in the future to return a std::u16string_view from
    gjs_unix_shebang_len(), but that is not yet available in C++14.
    
    This bug was found by compiling with -Wunused-parameter!

 gi/foreign.cpp     |  3 +-
 gjs/context.cpp    | 27 ++++++-----------
 gjs/jsapi-util.cpp | 89 ++++++++++++++++++++++++------------------------------
 gjs/jsapi-util.h   | 11 ++++---
 gjs/module.cpp     | 42 +++++++++-----------------
 test/gjs-tests.cpp | 83 ++++++++++++++++++++++++++------------------------
 6 files changed, 114 insertions(+), 141 deletions(-)
---
diff --git a/gi/foreign.cpp b/gi/foreign.cpp
index bb65da49..3082c6c0 100644
--- a/gi/foreign.cpp
+++ b/gi/foreign.cpp
@@ -77,8 +77,7 @@ gjs_foreign_load_foreign_module(JSContext *context,
         script = g_strdup_printf("imports.%s;", gi_namespace);
         JS::RootedValue retval(context);
         GjsContextPrivate* gjs = GjsContextPrivate::from_cx(context);
-        if (!gjs->eval_with_scope(nullptr, script, strlen(script), "<internal>",
-                                  &retval)) {
+        if (!gjs->eval_with_scope(nullptr, script, -1, "<internal>", &retval)) {
             g_critical("ERROR importing foreign module %s\n", gi_namespace);
             g_free(script);
             return false;
diff --git a/gjs/context.cpp b/gjs/context.cpp
index c0020328..e99d1af3 100644
--- a/gjs/context.cpp
+++ b/gjs/context.cpp
@@ -931,7 +931,7 @@ gjs_context_eval_file(GjsContext    *js_context,
  * GjsContextPrivate::eval_with_scope:
  * @scope_object: an object to use as the global scope, or nullptr
  * @script: JavaScript program encoded in UTF-8
- * @script_len: length of @script
+ * @script_len: length of @script, or -1 if @script is 0-terminated
  * @filename: filename to use as the origin of @script
  * @retval: location for the return value of @script
  *
@@ -945,14 +945,7 @@ bool GjsContextPrivate::eval_with_scope(JS::HandleObject scope_object,
                                         const char* script, ssize_t script_len,
                                         const char* filename,
                                         JS::MutableHandleValue retval) {
-    int start_line_number = 1;
     JSAutoRequest ar(m_cx);
-    size_t real_len = script_len;
-
-    if (script_len < 0)
-        real_len = strlen(script);
-
-    script = gjs_strip_unix_shebang(script, &real_len, &start_line_number);
 
     /* log and clear exception if it's set (should not be, normally...) */
     if (JS_IsExceptionPending(m_cx)) {
@@ -964,18 +957,13 @@ bool GjsContextPrivate::eval_with_scope(JS::HandleObject scope_object,
     if (!eval_obj)
         eval_obj = JS_NewPlainObject(m_cx);
 
-    JS::CompileOptions options(m_cx);
-    options.setFileAndLine(filename, start_line_number);
+    std::u16string utf16_string = gjs_utf8_script_to_utf16(script, script_len);
 
-#if defined(G_OS_WIN32) && (defined(_MSC_VER) && (_MSC_VER >= 1900))
-    std::wstring wscript = gjs_win32_vc140_utf8_to_utf16(script);
-    std::u16string utf16_string(reinterpret_cast<const char16_t*>(wscript.c_str()));
-#else
-    std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> convert;
-    std::u16string utf16_string = convert.from_bytes(script);
-#endif
+    unsigned start_line_number = 1;
+    size_t offset = gjs_unix_shebang_len(utf16_string, &start_line_number);
 
-    JS::SourceBufferHolder buf(utf16_string.c_str(), utf16_string.size(),
+    JS::SourceBufferHolder buf(utf16_string.c_str() + offset,
+                               utf16_string.size() - offset,
                                JS::SourceBufferHolder::NoOwnership);
 
     JS::AutoObjectVector scope_chain(m_cx);
@@ -984,6 +972,9 @@ bool GjsContextPrivate::eval_with_scope(JS::HandleObject scope_object,
         return false;
     }
 
+    JS::CompileOptions options(m_cx);
+    options.setFileAndLine(filename, start_line_number);
+
     if (!JS::Evaluate(m_cx, scope_chain, options, buf, retval))
         return false;
 
diff --git a/gjs/jsapi-util.cpp b/gjs/jsapi-util.cpp
index 72718178..8bb2c66e 100644
--- a/gjs/jsapi-util.cpp
+++ b/gjs/jsapi-util.cpp
@@ -613,57 +613,35 @@ gjs_maybe_gc (JSContext *context)
 }
 
 /**
- * gjs_strip_unix_shebang:
+ * gjs_unix_shebang_len:
  *
- * @script: (in): A pointer to a JS script
- * @script_len: (inout): A pointer to the script length. The
- * pointer will be modified if a shebang is stripped.
- * @new_start_line_number: (out) (allow-none): A pointer to
- * write the start-line number to account for the offset
- * as a result of stripping the shebang.
+ * @script: A JS script
+ * @start_line_number: (out): the new start-line number to account for the
+ * offset as a result of stripping the shebang; can be either 1 or 2.
  *
- * Returns a pointer to the beginning of a script with unix
- * shebangs removed. The outparams are useful to know the
- * new length of the script and on what line of the
+ * Returns the offset in @script where the actual script begins with Unix
+ * shebangs removed. The outparam is useful to know what line of the
  * original script we're executing from, so that any relevant
  * offsets can be applied to the results of an execution pass.
  */
-const char *
-gjs_strip_unix_shebang(const char  *script,
-                       size_t      *script_len,
-                       int         *start_line_number_out)
-{
-    g_assert(script_len);
-
-    /* handle scripts with UNIX shebangs */
-    if (strncmp(script, "#!", 2) == 0) {
-        /* If we found a newline, advance the script by one line */
-        const char *s = (const char *) strstr (script, "\n");
-        if (s != NULL) {
-            if (*script_len > 0)
-                *script_len -= (s + 1 - script);
-            script = s + 1;
-
-            if (start_line_number_out)
-                *start_line_number_out = 2;
-
-            return script;
-        } else {
-            /* Just a shebang */
-            if (start_line_number_out)
-                *start_line_number_out = -1;
-
-            *script_len = 0;
-
-            return NULL;
-        }
+size_t gjs_unix_shebang_len(const std::u16string& script,
+                            unsigned* start_line_number) {
+    g_assert(start_line_number);
+
+    if (script.compare(0, 2, u"#!") != 0) {
+        // No shebang, leave the script unchanged
+        *start_line_number = 1;
+        return 0;
     }
 
-    /* No shebang, return the original script */
-    if (start_line_number_out)
-        *start_line_number_out = 1;
+    *start_line_number = 2;
+
+    size_t newline_pos = script.find('\n', 2);
+    if (newline_pos == std::u16string::npos)
+        return script.size();  // Script consists only of a shebang line
 
-    return script;
+    // Point the offset after the newline
+    return newline_pos + 1;
 }
 
 #if defined(G_OS_WIN32) && (defined(_MSC_VER) && (_MSC_VER >= 1900))
@@ -674,17 +652,30 @@ gjs_strip_unix_shebang(const char  *script,
  * obtain from MultiByteToWideChar().  See:
  * 
https://social.msdn.microsoft.com/Forums/en-US/8f40dcd8-c67f-4eba-9134-a19b9178e481/vs-2015-rc-linker-stdcodecvt-error?forum=vcgeneral
  */
-std::wstring gjs_win32_vc140_utf8_to_utf16(const char* str) {
-    int len = MultiByteToWideChar(CP_UTF8, 0, str, -1, nullptr, 0);
-    if (len == 0)
+static std::wstring gjs_win32_vc140_utf8_to_utf16(const char* str,
+                                                  ssize_t len) {
+    int bufsize = MultiByteToWideChar(CP_UTF8, 0, str, len, nullptr, 0);
+    if (bufsize == 0)
         return nullptr;
 
-    std::wstring wstr(len, 0);
-    int result = MultiByteToWideChar(CP_UTF8, 0, str, -1, &wstr[0], len);
+    std::wstring wstr(bufsize, 0);
+    int result = MultiByteToWideChar(CP_UTF8, 0, str, len, &wstr[0], bufsize);
     if (result == 0)
         return nullptr;
 
-    wstr.resize(strlen(str));
+    wstr.resize(len < 0 ? strlen(str) : len);
     return wstr;
 }
 #endif
+
+std::u16string gjs_utf8_script_to_utf16(const char* script, ssize_t len) {
+#if defined(G_OS_WIN32) && (defined(_MSC_VER) && (_MSC_VER >= 1900))
+    std::wstring wscript = gjs_win32_vc140_utf8_to_utf16(script, len);
+    return std::u16string(reinterpret_cast<const char16_t*>(wscript.c_str()));
+#else
+    std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> convert;
+    if (len < 0)
+        return convert.from_bytes(script);
+    return convert.from_bytes(script, script + len);
+#endif
+}
diff --git a/gjs/jsapi-util.h b/gjs/jsapi-util.h
index b0f83b37..287e7483 100644
--- a/gjs/jsapi-util.h
+++ b/gjs/jsapi-util.h
@@ -299,13 +299,14 @@ bool        gjs_unichar_from_string          (JSContext       *context,
 void gjs_maybe_gc (JSContext *context);
 void gjs_gc_if_needed(JSContext *cx);
 
-GJS_USE
-const char * gjs_strip_unix_shebang(const char *script,
-                                    size_t     *script_len,
-                                    int        *new_start_line_number);
-
 G_END_DECLS
 
+GJS_USE
+size_t gjs_unix_shebang_len(const std::u16string& script,
+                            unsigned* start_line_number);
+GJS_USE
+std::u16string gjs_utf8_script_to_utf16(const char* script, ssize_t len);
+
 GJS_JSAPI_RETURN_CONVENTION
 GjsAutoChar gjs_format_stack_trace(JSContext       *cx,
                                    JS::HandleObject saved_frame);
diff --git a/gjs/module.cpp b/gjs/module.cpp
index 696b0a86..f6f6656a 100644
--- a/gjs/module.cpp
+++ b/gjs/module.cpp
@@ -88,27 +88,17 @@ class GjsModule {
 
     /* Carries out the actual execution of the module code */
     GJS_JSAPI_RETURN_CONVENTION
-    bool
-    evaluate_import(JSContext       *cx,
-                    JS::HandleObject module,
-                    const char      *script,
-                    size_t           script_len,
-                    const char      *filename,
-                    int              line_number)
-    {
-        JS::CompileOptions options(cx);
-        options.setFileAndLine(filename, line_number);
-
-#if defined(G_OS_WIN32) && (defined(_MSC_VER) && (_MSC_VER >= 1900))
-        std::wstring wscript = gjs_win32_vc140_utf8_to_utf16(script);
-        std::u16string utf16_string(
-            reinterpret_cast<const char16_t*>(wscript.c_str()));
-#else
-        std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> convert;
-        std::u16string utf16_string = convert.from_bytes(script);
-#endif
-
-        JS::SourceBufferHolder buf(utf16_string.c_str(), utf16_string.size(),
+    bool evaluate_import(JSContext* cx, JS::HandleObject module,
+                         const char* script, ssize_t script_len,
+                         const char* filename) {
+        std::u16string utf16_string =
+            gjs_utf8_script_to_utf16(script, script_len);
+
+        unsigned start_line_number = 1;
+        size_t offset = gjs_unix_shebang_len(utf16_string, &start_line_number);
+
+        JS::SourceBufferHolder buf(utf16_string.c_str() + offset,
+                                   utf16_string.size() - offset,
                                    JS::SourceBufferHolder::NoOwnership);
 
         JS::AutoObjectVector scope_chain(cx);
@@ -117,6 +107,9 @@ class GjsModule {
             return false;
         }
 
+        JS::CompileOptions options(cx);
+        options.setFileAndLine(filename, start_line_number);
+
         JS::RootedValue ignored_retval(cx);
         if (!JS::Evaluate(cx, scope_chain, options, buf, &ignored_retval))
             return false;
@@ -139,7 +132,6 @@ class GjsModule {
         GError *error = nullptr;
         char *unowned_script;
         size_t script_len = 0;
-        int start_line_number = 1;
 
         if (!(g_file_load_contents(file, nullptr, &unowned_script, &script_len,
                                    nullptr, &error)))
@@ -148,12 +140,8 @@ class GjsModule {
         GjsAutoChar script = unowned_script;  /* steals ownership */
         g_assert(script != nullptr);
 
-        const char *stripped_script =
-            gjs_strip_unix_shebang(script, &script_len, &start_line_number);
-
         GjsAutoChar full_path = g_file_get_parse_name(file);
-        return evaluate_import(cx, module, stripped_script, script_len,
-                               full_path, start_line_number);
+        return evaluate_import(cx, module, script, script_len, full_path);
     }
 
     /* JSClass operations */
diff --git a/test/gjs-tests.cpp b/test/gjs-tests.cpp
index 309270c6..a6533fd9 100644
--- a/test/gjs-tests.cpp
+++ b/test/gjs-tests.cpp
@@ -65,6 +65,19 @@ gjstest_test_func_gjs_context_construct_eval(void)
     g_object_unref (context);
 }
 
+static void gjstest_test_func_gjs_context_eval_non_zero_terminated(void) {
+    GjsAutoUnref<GjsContext> gjs = gjs_context_new();
+    GError* error = NULL;
+    int status;
+
+    // This string is invalid JS if it is treated as zero-terminated
+    bool ok = gjs_context_eval(gjs, "77!", 2, "<input>", &status, &error);
+
+    g_assert_true(ok);
+    g_assert_no_error(error);
+    g_assert_cmpint(status, ==, 77);
+}
+
 static void
 gjstest_test_func_gjs_context_exit(void)
 {
@@ -329,52 +342,37 @@ gjstest_test_func_util_glib_strv_concat_pointers(void)
 static void
 gjstest_test_strip_shebang_no_advance_for_no_shebang(void)
 {
-    const char *script = "foo\nbar";
-    size_t script_len_original = strlen(script);
-    size_t script_len = script_len_original;
-    int        line_number = 1;
-
-    const char *stripped = gjs_strip_unix_shebang(script,
-                                                  &script_len,
-                                                  &line_number);
-
-    g_assert_cmpstr(script, ==, stripped);
-    g_assert(script_len == script_len_original);
-    g_assert(line_number == 1);
+    unsigned line_number = 1;
+    size_t offset = gjs_unix_shebang_len(u"foo\nbar", &line_number);
+
+    g_assert_cmpuint(offset, ==, 0);
+    g_assert_cmpuint(line_number, ==, 1);
+}
+
+static void gjstest_test_strip_shebang_no_advance_for_too_short_string(void) {
+    unsigned line_number = 1;
+    size_t offset = gjs_unix_shebang_len(u"Z", &line_number);
+
+    g_assert_cmpuint(offset, ==, 0);
+    g_assert_cmpuint(line_number, ==, 1);
 }
 
 static void
 gjstest_test_strip_shebang_advance_for_shebang(void)
 {
-    const char *script = "#!foo\nbar";
-    size_t script_len_original = strlen(script);
-    size_t script_len = script_len_original;
-    int        line_number = 1;
-
-    const char *stripped = gjs_strip_unix_shebang(script,
-                                                  &script_len,
-                                                  &line_number);
-
-    g_assert_cmpstr(stripped, ==, "bar");
-    g_assert(script_len == 3);
-    g_assert(line_number == 2);
+    unsigned line_number = 1;
+    size_t offset = gjs_unix_shebang_len(u"#!foo\nbar", &line_number);
+
+    g_assert_cmpuint(offset, ==, 6);
+    g_assert_cmpuint(line_number, ==, 2);
 }
 
-static void
-gjstest_test_strip_shebang_return_null_for_just_shebang(void)
-{
-    const char *script = "#!foo";
-    size_t script_len_original = strlen(script);
-    size_t script_len = script_len_original;
-    int        line_number = 1;
-
-    const char *stripped = gjs_strip_unix_shebang(script,
-                                                  &script_len,
-                                                  &line_number);
-
-    g_assert(stripped == NULL);
-    g_assert(script_len == 0);
-    g_assert(line_number == -1);
+static void gjstest_test_strip_shebang_advance_to_end_for_just_shebang(void) {
+    unsigned line_number = 1;
+    size_t offset = gjs_unix_shebang_len(u"#!foo", &line_number);
+
+    g_assert_cmpuint(offset, ==, 5);
+    g_assert_cmpuint(line_number, ==, 2);
 }
 
 static void
@@ -414,11 +412,16 @@ main(int    argc,
 
     g_test_add_func("/gjs/context/construct/destroy", gjstest_test_func_gjs_context_construct_destroy);
     g_test_add_func("/gjs/context/construct/eval", gjstest_test_func_gjs_context_construct_eval);
+    g_test_add_func("/gjs/context/eval/non-zero-terminated",
+                    gjstest_test_func_gjs_context_eval_non_zero_terminated);
     g_test_add_func("/gjs/context/exit", gjstest_test_func_gjs_context_exit);
     g_test_add_func("/gjs/gobject/js_defined_type", gjstest_test_func_gjs_gobject_js_defined_type);
     g_test_add_func("/gjs/jsutil/strip_shebang/no_shebang", 
gjstest_test_strip_shebang_no_advance_for_no_shebang);
+    g_test_add_func("/gjs/jsutil/strip_shebang/short_string",
+                    gjstest_test_strip_shebang_no_advance_for_too_short_string);
     g_test_add_func("/gjs/jsutil/strip_shebang/have_shebang", 
gjstest_test_strip_shebang_advance_for_shebang);
-    g_test_add_func("/gjs/jsutil/strip_shebang/only_shebang", 
gjstest_test_strip_shebang_return_null_for_just_shebang);
+    g_test_add_func("/gjs/jsutil/strip_shebang/only_shebang",
+                    gjstest_test_strip_shebang_advance_to_end_for_just_shebang);
     g_test_add_func("/gjs/profiler/start_stop", gjstest_test_profiler_start_stop);
     g_test_add_func("/util/glib/strv/concat/null", gjstest_test_func_util_glib_strv_concat_null);
     g_test_add_func("/util/glib/strv/concat/pointers", gjstest_test_func_util_glib_strv_concat_pointers);


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]