[gjs: 3/12] maint: Use GLib UTF8-to-UTF16 functions




commit 0a45c4bd61c15e78b62bb03ddd83ff29e139e344
Author: Philip Chimento <philip chimento gmail com>
Date:   Sun Jul 4 16:38:56 2021 -0700

    maint: Use GLib UTF8-to-UTF16 functions
    
    Add a style guide rule about avoiding the C++ standard library functions
    for this. Remove the current code that does this (and the portability hack
    to get it to work on Visual Studio) and instead use the GLib facilities.

 doc/CPP_Style_Guide.md | 12 ++++++++++--
 gjs/context.cpp        | 13 ++++++++++---
 gjs/gjs_pch.hh         |  2 --
 gjs/jsapi-util.cpp     | 38 --------------------------------------
 gjs/jsapi-util.h       |  5 ++---
 gjs/module.cpp         | 14 ++++++++++----
 6 files changed, 32 insertions(+), 52 deletions(-)
---
diff --git a/doc/CPP_Style_Guide.md b/doc/CPP_Style_Guide.md
index abcdda83..c40f04ed 100644
--- a/doc/CPP_Style_Guide.md
+++ b/doc/CPP_Style_Guide.md
@@ -441,8 +441,6 @@ Here is an example of all of the above rules together:
 #    include <windows.h>
 #endif
 
-#include <codecvt>  // for codecvt_utf8_utf16
-#include <locale>   // for wstring_convert
 #include <vector>
 
 #include <girepository.h>
@@ -1011,3 +1009,13 @@ class Foo {
     }
 };
 ```
+
+#### Don't use C++ standard library UTF-8/UTF-16 encoding facilities
+
+There are
+[bugs](https://social.msdn.microsoft.com/Forums/en-US/8f40dcd8-c67f-4eba-9134-a19b9178e481/vs-2015-rc-linker-stdcodecvt-error?forum=vcgeneral)
+in Visual Studio that make `wstring_convert` non-portable.
+Instead, use `g_utf8_to_utf16()` and friends (unfortunately not
+typesafe) or `mozilla::ConvertUtf8toUtf16()` and friends (when that
+becomes possible; it is currently not possible due to a linker bug.)
+
diff --git a/gjs/context.cpp b/gjs/context.cpp
index 10a80a0c..d8992a58 100644
--- a/gjs/context.cpp
+++ b/gjs/context.cpp
@@ -1431,13 +1431,20 @@ bool GjsContextPrivate::eval_with_scope(JS::HandleObject scope_object,
     if (!eval_obj)
         eval_obj = JS_NewPlainObject(m_cx);
 
-    std::u16string utf16_string = gjs_utf8_script_to_utf16(script, script_len);
+    long items_written;  // NOLINT(runtime/int) - this type required by GLib API
+    GError* error;
+    GjsAutoChar16 utf16_string =
+        g_utf8_to_utf16(script, script_len,
+                        /* items_read = */ nullptr, &items_written, &error);
+    if (!utf16_string)
+        return gjs_throw_gerror_message(m_cx, error);
+
     // COMPAT: This could use JS::SourceText<mozilla::Utf8Unit> directly,
     // but that messes up code coverage. See bug
     // https://bugzilla.mozilla.org/show_bug.cgi?id=1404784
     JS::SourceText<char16_t> buf;
-    if (!buf.init(m_cx, utf16_string.c_str(), utf16_string.size(),
-                  JS::SourceOwnership::Borrowed))
+    if (!buf.init(m_cx, reinterpret_cast<char16_t*>(utf16_string.get()),
+                  items_written, JS::SourceOwnership::Borrowed))
         return false;
 
     JS::RootedObjectVector scope_chain(m_cx);
diff --git a/gjs/gjs_pch.hh b/gjs/gjs_pch.hh
index 8de386a4..312f4d89 100644
--- a/gjs/gjs_pch.hh
+++ b/gjs/gjs_pch.hh
@@ -7,13 +7,11 @@
 
 #include <algorithm>
 #include <atomic>
-#include <codecvt>
 #include <cstddef>
 #include <deque>
 #include <forward_list>
 #include <functional>
 #include <limits>
-#include <locale>
 #include <memory>
 #include <new>
 #include <sstream>
diff --git a/gjs/jsapi-util.cpp b/gjs/jsapi-util.cpp
index cb4f24f2..f987a530 100644
--- a/gjs/jsapi-util.cpp
+++ b/gjs/jsapi-util.cpp
@@ -12,8 +12,6 @@
 #    include <windows.h>
 #endif
 
-#include <codecvt>  // for codecvt_utf8_utf16
-#include <locale>   // for wstring_convert
 #include <string>
 #include <utility>  // for move
 #include <vector>
@@ -639,42 +637,6 @@ JSObject* gjs_get_internal_global(JSContext* cx) {
     return GjsContextPrivate::from_cx(cx)->internal_global();
 }
 
-#if defined(G_OS_WIN32) && (defined(_MSC_VER) && (_MSC_VER >= 1900))
-/* Unfortunately Visual Studio's C++ .lib somehow did not contain the right
- * codecvt stuff that we need to convert from utf8 to utf16 (char16_t), so we
- * need to work around this Visual Studio bug.  Use Windows API
- * MultiByteToWideChar() and obtain the std::u16string on the std::wstring we
- * obtain from MultiByteToWideChar().  See:
- * 
https://social.msdn.microsoft.com/Forums/en-US/8f40dcd8-c67f-4eba-9134-a19b9178e481/vs-2015-rc-linker-stdcodecvt-error?forum=vcgeneral
- */
-static std::wstring gjs_win32_vc140_utf8_to_utf16(const char* str,
-                                                  ssize_t len) {
-    int bufsize = MultiByteToWideChar(CP_UTF8, 0, str, len, nullptr, 0);
-    if (bufsize == 0)
-        return nullptr;
-
-    std::wstring wstr(bufsize, 0);
-    int result = MultiByteToWideChar(CP_UTF8, 0, str, len, &wstr[0], bufsize);
-    if (result == 0)
-        return nullptr;
-
-    wstr.resize(len < 0 ? strlen(str) : len);
-    return wstr;
-}
-#endif
-
-std::u16string gjs_utf8_script_to_utf16(const char* script, ssize_t len) {
-#if defined(G_OS_WIN32) && (defined(_MSC_VER) && (_MSC_VER >= 1900))
-    std::wstring wscript = gjs_win32_vc140_utf8_to_utf16(script, len);
-    return std::u16string(reinterpret_cast<const char16_t*>(wscript.c_str()));
-#else
-    std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> convert;
-    if (len < 0)
-        return convert.from_bytes(script);
-    return convert.from_bytes(script, script + len);
-#endif
-}
-
 const char* gjs_explain_gc_reason(JS::GCReason reason) {
     if (JS::InternalGCReason(reason))
         return JS::ExplainGCReason(reason);
diff --git a/gjs/jsapi-util.h b/gjs/jsapi-util.h
index e1f41e5a..b30aa7a3 100644
--- a/gjs/jsapi-util.h
+++ b/gjs/jsapi-util.h
@@ -209,6 +209,8 @@ struct GjsAutoCharFuncs {
 using GjsAutoChar =
     GjsAutoPointer<char, char, GjsAutoCharFuncs::free, GjsAutoCharFuncs::dup>;
 
+using GjsAutoChar16 = GjsAutoPointer<uint16_t, void, &g_free>;
+
 struct GjsAutoErrorFuncs {
     static GError* error_copy(GError* error) { return g_error_copy(error); }
 };
@@ -504,9 +506,6 @@ bool        gjs_unichar_from_string          (JSContext       *context,
 void gjs_maybe_gc (JSContext *context);
 void gjs_gc_if_needed(JSContext *cx);
 
-[[nodiscard]] std::u16string gjs_utf8_script_to_utf16(const char* script,
-                                                      ssize_t len);
-
 GJS_JSAPI_RETURN_CONVENTION
 GjsAutoChar gjs_format_stack_trace(JSContext       *cx,
                                    JS::HandleObject saved_frame);
diff --git a/gjs/module.cpp b/gjs/module.cpp
index 2c0840f7..3c617135 100644
--- a/gjs/module.cpp
+++ b/gjs/module.cpp
@@ -96,14 +96,20 @@ class GjsScriptModule {
     bool evaluate_import(JSContext* cx, JS::HandleObject module,
                          const char* script, ssize_t script_len,
                          const char* filename, const char* uri) {
-        std::u16string utf16_string =
-            gjs_utf8_script_to_utf16(script, script_len);
+        long items_written;  // NOLINT(runtime/int) - required by GLib API
+        GError* error;
+        GjsAutoChar16 utf16_string =
+            g_utf8_to_utf16(script, script_len,
+                            /* items_read = */ nullptr, &items_written, &error);
+        if (!utf16_string)
+            return gjs_throw_gerror_message(cx, error);
+
         // COMPAT: This could use JS::SourceText<mozilla::Utf8Unit> directly,
         // but that messes up code coverage. See bug
         // https://bugzilla.mozilla.org/show_bug.cgi?id=1404784
         JS::SourceText<char16_t> buf;
-        if (!buf.init(cx, utf16_string.c_str(), utf16_string.size(),
-                      JS::SourceOwnership::Borrowed))
+        if (!buf.init(cx, reinterpret_cast<char16_t*>(utf16_string.get()),
+                      items_written, JS::SourceOwnership::Borrowed))
             return false;
 
         JS::RootedObjectVector scope_chain(cx);


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]