[gjs: 1/3] byte array: toString() should stop at 0 bytes



commit 2a0c56fde86daacb5ffee84a01a6dc6e3117cfdd
Author: Philip Chimento <philip endlessm com>
Date:   Thu Sep 13 16:28:28 2018 -0700

    byte array: toString() should stop at 0 bytes
    
    This was a regression from 1.50 to 1.52. JS strings with embedded 0
    bytes will behave in unexpected ways, and is almost never what client
    code wants, so we should avoid this.
    
    Additionally, avoids an extra copy in the non-fast-path of toString().
    
    The BOM in the output of toString('LATIN1') was a surprise to me, but I
    verified that it happened even before this patch, so that behaviour has
    not changed.
    
    Closes: #195

 gjs/byteArray.cpp                   | 36 +++++++++++++++++++-----------------
 installed-tests/js/testByteArray.js | 25 +++++++++++++++++++++++--
 2 files changed, 42 insertions(+), 19 deletions(-)
---
diff --git a/gjs/byteArray.cpp b/gjs/byteArray.cpp
index 5dacfe48..2dc6faf9 100644
--- a/gjs/byteArray.cpp
+++ b/gjs/byteArray.cpp
@@ -69,20 +69,24 @@ static bool to_string_impl(JSContext* context, JS::HandleObject byte_array,
         /* optimization, avoids iconv overhead and runs
          * libmozjs hardwired utf8-to-utf16
          */
+
+        // If there are any 0 bytes, including the terminating byte, stop at
+        // the first one
+        if (data[len - 1] == 0 || memchr(data, 0, len))
+            return gjs_string_from_utf8(context, reinterpret_cast<char*>(data),
+                                        rval);
+
         return gjs_string_from_utf8_n(context, reinterpret_cast<char*>(data),
                                       len, rval);
     } else {
-        bool ok = false;
         gsize bytes_written;
         GError *error;
-        JSString *s;
-        char *u16_str;
-        char16_t *u16_out;
 
         error = NULL;
-        u16_str = g_convert(reinterpret_cast<char*>(data), len, "UTF-16",
-                            encoding, nullptr, /* bytes read */
-                            &bytes_written, &error);
+        GjsAutoChar u16_str =
+            g_convert(reinterpret_cast<char*>(data), len, "UTF-16", encoding,
+                      nullptr, /* bytes read */
+                      &bytes_written, &error);
         if (u16_str == NULL) {
             /* frees the GError */
             gjs_throw_g_error(context, error);
@@ -94,17 +98,15 @@ static bool to_string_impl(JSContext* context, JS::HandleObject byte_array,
          */
         g_assert((bytes_written % 2) == 0);
 
-        u16_out = g_new(char16_t, bytes_written / 2);
-        memcpy(u16_out, u16_str, bytes_written);
-        s = JS_NewUCStringCopyN(context, u16_out, bytes_written / 2);
-        if (s != NULL) {
-            ok = true;
-            rval.setString(s);
-        }
+        // g_convert 0-terminates the string, although the 0 isn't included in
+        // bytes_written
+        JSString* s = JS_NewUCStringCopyZ(
+            context, reinterpret_cast<char16_t*>(u16_str.get()));
+        if (!s)
+            return false;
 
-        g_free(u16_str);
-        g_free(u16_out);
-        return ok;
+        rval.setString(s);
+        return true;
     }
 }
 
diff --git a/installed-tests/js/testByteArray.js b/installed-tests/js/testByteArray.js
index ec502ed6..fd3fd610 100644
--- a/installed-tests/js/testByteArray.js
+++ b/installed-tests/js/testByteArray.js
@@ -37,6 +37,28 @@ describe('Byte array', function () {
         expect(s).toEqual('abcd');
     });
 
+    it('can be converted to a string of UTF-8 characters even if it ends with a 0', function () {
+        const a = Uint8Array.of(97, 98, 99, 100, 0);
+        const s = ByteArray.toString(a);
+        expect(s.length).toEqual(4);
+        expect(s).toEqual('abcd');
+    });
+
+    it('can be converted to a string of encoded characters even with a 0 byte', function () {
+        const a = Uint8Array.of(97, 98, 99, 100, 0);
+        const s = ByteArray.toString(a, 'LATIN1');
+        expect(s.length).toEqual(5);
+        expect(s).toEqual('\uFEFFabcd');
+        // GLib puts a BOM in the string, who knows why
+    });
+
+    it('stops converting to a string at an embedded 0 byte', function () {
+        const a = Uint8Array.of(97, 98, 0, 99, 100);
+        const s = ByteArray.toString(a);
+        expect(s.length).toEqual(2);
+        expect(s).toEqual('ab');
+    });
+
     describe('legacy toString() behavior', function () {
         beforeEach(function () {
             GLib.test_expect_message('Gjs', GLib.LogLevelFlags.LEVEL_WARNING,
@@ -50,8 +72,7 @@ describe('Byte array', function () {
 
         it('is preserved when marshalled from GI', function () {
             let a = GIMarshallingTests.bytearray_full_return();
-            expect(() => a.toString()).toThrowError(TypeError,
-                /malformed UTF-8 character sequence/);
+            expect(a.toString()).toEqual('');
         });
 
         afterEach(function () {


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]