[gjs/ewlsh/text-encoding] Implement WHATWG Encoding specification.
- From: Philip Chimento <pchimento src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gjs/ewlsh/text-encoding] Implement WHATWG Encoding specification.
- Date: Fri, 12 Feb 2021 06:34:42 +0000 (UTC)
commit 6f2b8a858a4e0ba776d62acaa3d10afb18d6445b
Author: Evan Welsh <noreply evanwelsh com>
Date: Mon Nov 23 14:06:20 2020 -0600
Implement WHATWG Encoding specification.
.eslintignore | 2 +
.eslintrc.yml | 3 +-
gjs/byteArray.cpp | 273 +-------
gjs/context.cpp | 3 +
gjs/jsapi-util-string.cpp | 42 ++
gjs/jsapi-util.h | 6 +
gjs/textEncoding.cpp | 472 ++++++++++++++
gjs/textEncoding.h | 32 +
installed-tests/js/meson.build | 1 +
installed-tests/js/testEncoding.js | 1147 ++++++++++++++++++++++++++++++++++
js.gresource.xml | 2 +
meson.build | 1 +
modules/core/_encodings.js | 280 +++++++++
modules/core/_text.js | 127 ++++
modules/core/overrides/GLib.js | 5 +-
modules/script/_bootstrap/default.js | 13 +
modules/script/byteArray.js | 48 +-
17 files changed, 2195 insertions(+), 262 deletions(-)
---
diff --git a/.eslintignore b/.eslintignore
index 9ee950d3..8f8f93ff 100644
--- a/.eslintignore
+++ b/.eslintignore
@@ -3,4 +3,6 @@
installed-tests/js/jasmine.js
installed-tests/js/modules/badOverrides/WarnLib.js
+# Until ESLint merges class fields.
+modules/core/_text.js
modules/script/jsUnit.js
diff --git a/.eslintrc.yml b/.eslintrc.yml
index 0aa6acf2..c88b2e42 100644
--- a/.eslintrc.yml
+++ b/.eslintrc.yml
@@ -71,7 +71,6 @@ rules:
jsdoc/newline-after-description: error
jsdoc/require-jsdoc: error
jsdoc/require-param: error
- jsdoc/require-param-description: error
jsdoc/require-param-name: error
jsdoc/require-param-type: error
key-spacing:
@@ -252,5 +251,7 @@ globals:
print: readonly
printerr: readonly
window: readonly
+ TextEncoder: readonly
+ TextDecoder: readonly
parserOptions:
ecmaVersion: 2020
diff --git a/gjs/byteArray.cpp b/gjs/byteArray.cpp
index b1d27241..29b07765 100644
--- a/gjs/byteArray.cpp
+++ b/gjs/byteArray.cpp
@@ -5,7 +5,6 @@
#include <config.h>
#include <stdint.h>
-#include <string.h> // for strcmp, memchr, strlen
#include <girepository.h>
#include <glib-object.h>
@@ -13,7 +12,6 @@
#include <js/ArrayBuffer.h>
#include <js/CallArgs.h>
-#include <js/GCAPI.h> // for AutoCheckCannotGC
#include <js/PropertySpec.h>
#include <js/RootingAPI.h>
#include <js/TypeDecls.h>
@@ -28,137 +26,16 @@
#include "gjs/deprecation.h"
#include "gjs/jsapi-util-args.h"
#include "gjs/jsapi-util.h"
+#include "gjs/textEncoding.h"
/* Callbacks to use with JS::NewExternalArrayBuffer() */
-static void gfree_arraybuffer_contents(void* contents, void*) {
- g_free(contents);
-}
-
static void bytes_unref_arraybuffer(void* contents [[maybe_unused]],
void* user_data) {
auto* gbytes = static_cast<GBytes*>(user_data);
g_bytes_unref(gbytes);
}
-GJS_JSAPI_RETURN_CONVENTION
-bool to_string_impl_slow(JSContext* cx, uint8_t* data, uint32_t len,
- const char* encoding, JS::MutableHandleValue rval) {
- size_t bytes_written;
- GError* error = nullptr;
- GjsAutoChar u16_str = g_convert(reinterpret_cast<char*>(data), len,
- // Make sure the bytes of the UTF-16 string are laid out in memory
- // such that we can simply reinterpret_cast<char16_t> them.
-#if G_BYTE_ORDER == G_LITTLE_ENDIAN
- "UTF-16LE",
-#else
- "UTF-16BE",
-#endif
- encoding, nullptr, /* bytes read */
- &bytes_written, &error);
- if (!u16_str)
- return gjs_throw_gerror_message(cx, error); // frees GError
-
- // bytes_written should be bytes in a UTF-16 string so should be a multiple
- // of 2
- g_assert((bytes_written % 2) == 0);
-
- // g_convert 0-terminates the string, although the 0 isn't included in
- // bytes_written
- JSString* s =
- JS_NewUCStringCopyZ(cx, reinterpret_cast<char16_t*>(u16_str.get()));
- if (!s)
- return false;
-
- rval.setString(s);
- return true;
-}
-
-/* implement toString() with an optional encoding arg */
-GJS_JSAPI_RETURN_CONVENTION
-static bool to_string_impl(JSContext* context, JS::HandleObject byte_array,
- const char* encoding, JS::MutableHandleValue rval) {
- if (!JS_IsUint8Array(byte_array)) {
- gjs_throw(context,
- "Argument to ByteArray.toString() must be a Uint8Array");
- return false;
- }
-
- bool encoding_is_utf8;
- uint8_t* data;
-
- if (encoding) {
- /* maybe we should be smarter about utf8 synonyms here.
- * doesn't matter much though. encoding_is_utf8 is
- * just an optimization anyway.
- */
- encoding_is_utf8 = (strcmp(encoding, "UTF-8") == 0);
- } else {
- encoding_is_utf8 = true;
- }
-
- uint32_t len;
- bool is_shared_memory;
- js::GetUint8ArrayLengthAndData(byte_array, &len, &is_shared_memory, &data);
-
- if (len == 0) {
- rval.setString(JS_GetEmptyString(context));
- return true;
- }
-
- if (!encoding_is_utf8)
- return to_string_impl_slow(context, data, len, encoding, rval);
-
- // optimization, avoids iconv overhead and runs libmozjs hardwired
- // utf8-to-utf16
-
- // If there are any 0 bytes, including the terminating byte, stop at the
- // first one
- if (data[len - 1] == 0 || memchr(data, 0, len)) {
- if (!gjs_string_from_utf8(context, reinterpret_cast<char*>(data), rval))
- return false;
- } else {
- if (!gjs_string_from_utf8_n(context, reinterpret_cast<char*>(data), len,
- rval))
- return false;
- }
-
- uint8_t* current_data;
- uint32_t current_len;
- bool ignore_val;
-
- // If a garbage collection occurs between when we call
- // js::GetUint8ArrayLengthAndData and return from gjs_string_from_utf8, a
- // use-after-free corruption can occur if the garbage collector shifts the
- // location of the Uint8Array's private data. To mitigate this we call
- // js::GetUint8ArrayLengthAndData again and then compare if the length and
- // pointer are still the same. If the pointers differ, we use the slow path
- // to ensure no data corruption occurred. The shared-ness of an array cannot
- // change between calls, so we ignore it.
- js::GetUint8ArrayLengthAndData(byte_array, ¤t_len, &ignore_val,
- ¤t_data);
-
- // Ensure the private data hasn't changed
- if (current_len == len && current_data == data)
- return true;
-
- // This was the UTF-8 optimized path, so we explicitly pass the encoding
- return to_string_impl_slow(context, current_data, current_len, "UTF-8",
- rval);
-}
-
-GJS_JSAPI_RETURN_CONVENTION
-static bool to_string_func(JSContext* cx, unsigned argc, JS::Value* vp) {
- JS::CallArgs args = JS::CallArgsFromVp(argc, vp);
- JS::UniqueChars encoding;
- JS::RootedObject byte_array(cx);
-
- if (!gjs_parse_call_args(cx, "toString", args, "o|s", "byteArray",
- &byte_array, "encoding", &encoding))
- return false;
-
- return to_string_impl(cx, byte_array, encoding.get(), args.rval());
-}
/* Workaround to keep existing code compatible. This function is tacked onto
* any Uint8Array instances created in situations where previously a ByteArray
@@ -175,144 +52,33 @@ static bool instance_to_string_func(JSContext* cx, unsigned argc,
if (!gjs_parse_call_args(cx, "toString", args, "|s", "encoding", &encoding))
return false;
- return to_string_impl(cx, this_obj, encoding.get(), args.rval());
-}
-
-GJS_JSAPI_RETURN_CONVENTION
-static bool
-to_gbytes_func(JSContext *context,
- unsigned argc,
- JS::Value *vp)
-{
- JS::CallArgs rec = JS::CallArgsFromVp(argc, vp);
- GIBaseInfo *gbytes_info;
- JS::RootedObject byte_array(context);
-
- if (!gjs_parse_call_args(context, "toGBytes", rec, "o",
- "byteArray", &byte_array))
- return false;
-
- if (!JS_IsUint8Array(byte_array)) {
- gjs_throw(context,
- "Argument to ByteArray.toGBytes() must be a Uint8Array");
+ if (!JS_IsUint8Array(this_obj)) {
+ gjs_throw(cx, "Argument to ByteArray.toString() must be a Uint8Array");
return false;
}
- GBytes* bytes = gjs_byte_array_get_bytes(byte_array);
-
- g_irepository_require(nullptr, "GLib", "2.0", GIRepositoryLoadFlags(0),
- nullptr);
- gbytes_info = g_irepository_find_by_gtype(NULL, G_TYPE_BYTES);
- JSObject* ret_bytes_obj =
- BoxedInstance::new_for_c_struct(context, gbytes_info, bytes);
- g_bytes_unref(bytes);
- if (!ret_bytes_obj)
- return false;
-
- rec.rval().setObject(*ret_bytes_obj);
- return true;
+ return gjs_decode_from_uint8array(cx, this_obj, encoding.get(), true,
+ args.rval());
}
-/* fromString() function implementation */
-GJS_JSAPI_RETURN_CONVENTION
-static bool
-from_string_func(JSContext *context,
- unsigned argc,
- JS::Value *vp)
-{
- JS::CallArgs argv = JS::CallArgsFromVp (argc, vp);
- JS::UniqueChars encoding;
- JS::UniqueChars utf8;
- bool encoding_is_utf8;
- JS::RootedObject obj(context), array_buffer(context);
-
- if (!gjs_parse_call_args(context, "fromString", argv, "s|s",
- "string", &utf8,
- "encoding", &encoding))
- return false;
-
- if (argc > 1) {
- /* maybe we should be smarter about utf8 synonyms here.
- * doesn't matter much though. encoding_is_utf8 is
- * just an optimization anyway.
- */
- encoding_is_utf8 = (strcmp(encoding.get(), "UTF-8") == 0);
- } else {
- encoding_is_utf8 = true;
- }
-
- if (encoding_is_utf8) {
- /* optimization? avoids iconv overhead and runs
- * libmozjs hardwired utf16-to-utf8.
- */
- size_t len = strlen(utf8.get());
- array_buffer =
- JS::NewArrayBufferWithContents(context, len, utf8.release());
- } else {
- JSString *str = argv[0].toString(); /* Rooted by argv */
- GError *error = NULL;
- char *encoded = NULL;
- gsize bytes_written;
-
- /* Scope for AutoCheckCannotGC, will crash if a GC is triggered
- * while we are using the string's chars */
- {
- JS::AutoCheckCannotGC nogc;
- size_t len;
-
- if (JS_StringHasLatin1Chars(str)) {
- const JS::Latin1Char *chars =
- JS_GetLatin1StringCharsAndLength(context, nogc, str, &len);
- if (chars == NULL)
- return false;
-
- encoded = g_convert((char *) chars, len,
- encoding.get(), // to_encoding
- "LATIN1", /* from_encoding */
- NULL, /* bytes read */
- &bytes_written, &error);
- } else {
- const char16_t *chars =
- JS_GetTwoByteStringCharsAndLength(context, nogc, str, &len);
- if (chars == NULL)
- return false;
-
- encoded = g_convert((char *) chars, len * 2,
- encoding.get(), // to_encoding
- "UTF-16", /* from_encoding */
- NULL, /* bytes read */
- &bytes_written, &error);
- }
- }
-
- if (!encoded)
- return gjs_throw_gerror_message(context, error); // frees GError
-
- array_buffer =
- JS::NewExternalArrayBuffer(context, bytes_written, encoded,
- gfree_arraybuffer_contents, nullptr);
- }
+static bool define_to_string_func(JSContext* cx, unsigned argc, JS::Value* vp) {
+ const GjsAtoms& atoms = GjsContextPrivate::atoms(cx);
+ JS::CallArgs args = JS::CallArgsFromVp(argc, vp);
+ JS::RootedObject obj(cx);
- if (!array_buffer)
+ if (!gjs_parse_call_args(cx, "defineToString", args, "o", "obj", &obj))
return false;
- obj = JS_NewUint8ArrayWithBuffer(context, array_buffer, 0, -1);
- const GjsAtoms& atoms = GjsContextPrivate::atoms(context);
- if (!JS_DefineFunctionById(context, obj, atoms.to_string(),
+ if (!JS_DefineFunctionById(cx, obj, atoms.to_string(),
instance_to_string_func, 1, 0))
return false;
- argv.rval().setObject(*obj);
return true;
}
GJS_JSAPI_RETURN_CONVENTION
-static bool
-from_gbytes_func(JSContext *context,
- unsigned argc,
- JS::Value *vp)
-{
- JS::CallArgs argv = JS::CallArgsFromVp (argc, vp);
+static bool from_gbytes_func(JSContext* context, unsigned argc, JS::Value* vp) {
+ JS::CallArgs argv = JS::CallArgsFromVp(argc, vp);
JS::RootedObject bytes_obj(context);
GBytes *gbytes;
@@ -392,16 +158,11 @@ GByteArray* gjs_byte_array_get_byte_array(JSObject* obj) {
}
static JSFunctionSpec gjs_byte_array_module_funcs[] = {
- JS_FN("fromString", from_string_func, 2, 0),
JS_FN("fromGBytes", from_gbytes_func, 1, 0),
- JS_FN("toGBytes", to_gbytes_func, 1, 0),
- JS_FN("toString", to_string_func, 2, 0),
- JS_FS_END};
-
-bool
-gjs_define_byte_array_stuff(JSContext *cx,
- JS::MutableHandleObject module)
-{
+ JS_FN("defineToString", define_to_string_func, 1, 0), JS_FS_END};
+
+bool gjs_define_byte_array_stuff(JSContext* cx,
+ JS::MutableHandleObject module) {
module.set(JS_NewPlainObject(cx));
return JS_DefineFunctions(cx, module, gjs_byte_array_module_funcs);
}
diff --git a/gjs/context.cpp b/gjs/context.cpp
index 59310a69..e8963844 100644
--- a/gjs/context.cpp
+++ b/gjs/context.cpp
@@ -75,6 +75,7 @@
#include "gjs/objectbox.h"
#include "gjs/profiler-private.h"
#include "gjs/profiler.h"
+#include "gjs/textEncoding.h"
#include "modules/modules.h"
#include "util/log.h"
@@ -313,6 +314,8 @@ gjs_context_class_init(GjsContextClass *klass)
}
gjs_register_native_module("_byteArrayNative", gjs_define_byte_array_stuff);
+ gjs_register_native_module("_encodingNative",
+ gjs_define_text_encoding_stuff);
gjs_register_native_module("_gi", gjs_define_private_gi_stuff);
gjs_register_native_module("gi", gjs_define_repo);
diff --git a/gjs/jsapi-util-string.cpp b/gjs/jsapi-util-string.cpp
index 28182cec..09ad5914 100644
--- a/gjs/jsapi-util-string.cpp
+++ b/gjs/jsapi-util-string.cpp
@@ -74,6 +74,48 @@ JS::UniqueChars gjs_string_to_utf8(JSContext* cx, const JS::Value value) {
return JS_EncodeStringToUTF8(cx, str);
}
+bool gjs_lossy_string_from_utf8(JSContext* cx, const char* utf8_string,
+ JS::MutableHandleValue value_p) {
+ JS::ConstUTF8CharsZ chars(utf8_string, strlen(utf8_string));
+ size_t len;
+ JS::UniqueTwoByteChars twobyte_chars(
+ JS::LossyUTF8CharsToNewTwoByteCharsZ(cx, chars, &len, js::MallocArena)
+ .get());
+
+ if (!twobyte_chars) {
+ return false;
+ }
+
+ JS::RootedString str(cx, JS_NewUCStringCopyN(cx, twobyte_chars.get(), len));
+
+ if (str)
+ value_p.setString(str);
+
+ return str != nullptr;
+}
+bool gjs_lossy_string_from_utf8_n(JSContext* cx, const char* utf8_string,
+ size_t len, JS::MutableHandleValue value_p) {
+ JS::UTF8Chars chars(utf8_string, len);
+ size_t outlen;
+
+ JS::UniqueTwoByteChars twobyte_chars(
+ JS::LossyUTF8CharsToNewTwoByteCharsZ(cx, chars, &outlen,
+ js::MallocArena)
+ .get());
+
+ if (!twobyte_chars) {
+ return false;
+ }
+
+ JS::RootedString str(cx,
+ JS_NewUCStringCopyN(cx, twobyte_chars.get(), outlen));
+
+ if (str)
+ value_p.setString(str);
+
+ return str != nullptr;
+}
+
bool
gjs_string_from_utf8(JSContext *context,
const char *utf8_string,
diff --git a/gjs/jsapi-util.h b/gjs/jsapi-util.h
index 11c23776..43b8f4df 100644
--- a/gjs/jsapi-util.h
+++ b/gjs/jsapi-util.h
@@ -431,6 +431,12 @@ void gjs_warning_reporter(JSContext*, JSErrorReport* report);
GJS_JSAPI_RETURN_CONVENTION
JS::UniqueChars gjs_string_to_utf8(JSContext* cx, const JS::Value string_val);
GJS_JSAPI_RETURN_CONVENTION
+bool gjs_lossy_string_from_utf8(JSContext* context, const char* utf8_string,
+ JS::MutableHandleValue value_p);
+GJS_JSAPI_RETURN_CONVENTION
+bool gjs_lossy_string_from_utf8_n(JSContext* context, const char* utf8_string,
+ size_t len, JS::MutableHandleValue value_p);
+GJS_JSAPI_RETURN_CONVENTION
bool gjs_string_from_utf8(JSContext *context,
const char *utf8_string,
JS::MutableHandleValue value_p);
diff --git a/gjs/textEncoding.cpp b/gjs/textEncoding.cpp
new file mode 100644
index 00000000..743a79d6
--- /dev/null
+++ b/gjs/textEncoding.cpp
@@ -0,0 +1,472 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; -*- */
+// SPDX-License-Identifier: MIT OR LGPL-2.0-or-later
+// SPDX-FileCopyrightText: 2010 litl, LLC
+// SPDX-FileCopyrightText: 2020 Evan Welsh
+
+#include <config.h>
+
+#include <stdint.h>
+#include <string.h> // for strcmp, memchr, strlen
+
+#include <algorithm>
+#include <vector>
+
+#include <gio/gio.h>
+#include <girepository.h>
+#include <glib-object.h>
+#include <glib.h>
+
+#include <js/ArrayBuffer.h>
+#include <js/CallArgs.h>
+#include <js/CharacterEncoding.h>
+#include <js/GCAPI.h> // for AutoCheckCannotGC
+#include <js/PropertySpec.h>
+#include <js/RootingAPI.h>
+#include <js/TypeDecls.h>
+#include <js/Utility.h> // for UniqueChars
+#include <jsapi.h> // for JS_DefineFunctionById, JS_DefineFun...
+#include <jsfriendapi.h> // for JS_NewUint8ArrayWithBuffer, GetUint...
+
+#include "gi/boxed.h"
+#include "gjs/atoms.h"
+#include "gjs/context-private.h"
+#include "gjs/deprecation.h"
+#include "gjs/jsapi-util-args.h"
+#include "gjs/jsapi-util.h"
+#include "gjs/textEncoding.h"
+
+static void gfree_arraybuffer_contents(void* contents, void*) {
+ g_free(contents);
+}
+
+static const char* FALLBACK = "\ufffd";
+static size_t FALLBACK_LEN = strlen(FALLBACK);
+
+GJS_JSAPI_RETURN_CONVENTION
+static bool gjs_convert_invalid_input(JSContext* cx, uint8_t* data, size_t len,
+ const char* to_codeset,
+ const char* from_codeset,
+ char** converted) {
+ GError* error = nullptr;
+ GjsAutoUnref<GCharsetConverter> converter(
+ g_charset_converter_new(to_codeset, from_codeset, &error));
+
+ // This should only throw if an encoding is not available.
+ if (error)
+ return gjs_throw_gerror_message(cx, error);
+
+ size_t bytes_written, bytes_read;
+ char buffer[1024];
+
+ // Cast data to convert input type, calculate length.
+ const char* input = reinterpret_cast<const char*>(data);
+ size_t input_len = len * sizeof(char);
+
+ // Use a vector for the output for easy resizing.
+ std::vector<char> output;
+ size_t size = 0;
+
+ do {
+ g_converter_convert(G_CONVERTER(converter.get()), input, input_len,
+ buffer, sizeof(buffer), G_CONVERTER_INPUT_AT_END,
+ &bytes_read, &bytes_written, &error);
+
+ input += bytes_read;
+ input_len -= bytes_read;
+
+ if (bytes_written > 0) {
+ output.resize(size + bytes_written);
+ std::copy(buffer, buffer + bytes_written, output.data() + size);
+ size += bytes_written;
+ }
+
+ if (error) {
+ if (g_error_matches(error, G_IO_ERROR, G_IO_ERROR_INVALID_DATA)) {
+ // Skip the invalid character
+ input += sizeof(char);
+ input_len -= sizeof(char);
+
+ // Append fallback character to the output
+ output.resize(size + FALLBACK_LEN);
+ std::copy(FALLBACK, FALLBACK + FALLBACK_LEN,
+ output.data() + size);
+ size += FALLBACK_LEN;
+
+ g_clear_error(&error);
+ } else if (bytes_written > 0 &&
+ g_error_matches(error, G_IO_ERROR,
+ G_IO_ERROR_PARTIAL_INPUT)) {
+ // Only clear a partial input error if there are no bytes
+ // written. This occurs on the second loop, otherwise we could
+ // error mid-input.
+ g_clear_error(&error);
+ } else if (g_error_matches(error, G_IO_ERROR,
+ G_IO_ERROR_NO_SPACE)) {
+ // If the buffer was full, clear the error and continue
+ // converting.
+ g_clear_error(&error);
+ }
+ }
+ } while (input_len && !error);
+
+ if (!error) {
+ char* arr = reinterpret_cast<char*>(g_malloc0(output.size()));
+
+ std::copy(output.begin(), output.end(), arr);
+
+ *converted = arr;
+
+ // bytes_written should be bytes in a UTF-16 string so should be a
+ // multiple of 2
+ g_assert((bytes_written % 2) == 0);
+
+ return true;
+ }
+
+ return gjs_throw_gerror_message(cx, error);
+}
+
+GJS_JSAPI_RETURN_CONVENTION
+bool gjs_decode_from_uint8array_slow(JSContext* cx, uint8_t* data, uint32_t len,
+ const char* encoding, bool fatal,
+ JS::MutableHandleValue rval) {
+ size_t bytes_written, bytes_read;
+ GError* error = nullptr;
+ GjsAutoChar u16_str;
+
+// Make sure the bytes of the UTF-16 string are laid out in memory
+// such that we can simply reinterpret_cast<char16_t> them.
+#if G_BYTE_ORDER == G_LITTLE_ENDIAN
+ const char* to_codeset = "UTF-16LE";
+#else
+ const char* to_codeset = "UTF-16BE";
+#endif
+
+ if (fatal) {
+ u16_str = g_convert(reinterpret_cast<char*>(data), len, to_codeset,
+ encoding, nullptr, /* bytes read */
+ &bytes_written, &error);
+
+ // bytes_written should be bytes in a UTF-16 string so should be a
+ // multiple of 2
+ g_assert((bytes_written % 2) == 0);
+ } else {
+ // This will fail if the input contains invalid codepoints in the
+ // from_codeset. It inserts a replacement character if the input is
+ // valid but can't be represented in the output.
+ u16_str = g_convert_with_fallback(reinterpret_cast<char*>(data), len,
+ to_codeset, encoding, FALLBACK,
+ &bytes_read, &bytes_written, &error);
+
+ if (u16_str) {
+ g_assert((bytes_written % 2) == 0);
+ }
+
+ // If the input is invalid we need to do the conversion ourselves.
+ if (error && g_error_matches(error, G_CONVERT_ERROR,
+ G_CONVERT_ERROR_ILLEGAL_SEQUENCE)) {
+ // Clear the illegal sequence error.
+ g_clear_error(&error);
+
+ char* str;
+
+ if (!gjs_convert_invalid_input(cx, data, len, to_codeset, encoding,
+ &str)) {
+ return false;
+ }
+
+ u16_str = str;
+ }
+ }
+
+ if (error) {
+ return gjs_throw_gerror_message(cx, error);
+ }
+
+ // g_convert 0-terminates the string, although the 0 isn't included in
+ // bytes_written
+ JSString* s =
+ JS_NewUCStringCopyZ(cx, reinterpret_cast<char16_t*>(u16_str.get()));
+ if (!s)
+ return false;
+
+ rval.setString(s);
+ return true;
+}
+
+inline bool is_utf8_label(const char* encoding) {
+ if (encoding) {
+ /* maybe we should be smarter about utf8 synonyms here.
+ * doesn't matter much though. encoding_is_utf8 is
+ * just an optimization anyway.
+ */
+ if (strcasecmp(encoding, "utf-8") == 0) {
+ return true;
+ } else {
+ GjsAutoChar stripped(g_strdup(encoding));
+ return (strcasecmp(g_strstrip(stripped), "utf-8") == 0);
+ }
+ } else {
+ return true;
+ }
+}
+
+GJS_JSAPI_RETURN_CONVENTION
+bool gjs_decode_from_uint8array(JSContext* cx, JS::HandleObject uint8array,
+ const char* encoding, bool fatal,
+ JS::MutableHandleValue rval) {
+ if (!JS_IsUint8Array(uint8array)) {
+ gjs_throw(
+ cx, "Argument to gjs_decode_from_uint8array must be a Uint8Array");
+ return false;
+ }
+
+ bool encoding_is_utf8 = is_utf8_label(encoding);
+ uint8_t* data;
+
+ uint32_t len;
+ bool is_shared_memory;
+ js::GetUint8ArrayLengthAndData(uint8array, &len, &is_shared_memory, &data);
+
+ if (len == 0) {
+ rval.setString(JS_GetEmptyString(cx));
+ return true;
+ }
+
+ if (!encoding_is_utf8)
+ return gjs_decode_from_uint8array_slow(cx, data, len, encoding, fatal,
+ rval);
+
+ // optimization, avoids iconv overhead and runs libmozjs hardwired
+ // utf8-to-utf16
+
+ // If there are any 0 bytes, including the terminating byte, stop at the
+ // first one
+ if (data[len - 1] == 0 || memchr(data, 0, len)) {
+ if (fatal) {
+ if (!gjs_string_from_utf8(cx, reinterpret_cast<char*>(data), rval))
+ return false;
+ } else {
+ if (!gjs_lossy_string_from_utf8(cx, reinterpret_cast<char*>(data),
+ rval))
+ return false;
+ }
+ } else {
+ if (fatal) {
+ if (!gjs_string_from_utf8_n(cx, reinterpret_cast<char*>(data), len,
+ rval))
+ return false;
+ } else {
+ if (!gjs_lossy_string_from_utf8_n(cx, reinterpret_cast<char*>(data),
+ len, rval))
+ return false;
+ }
+ }
+
+ uint8_t* current_data;
+ uint32_t current_len;
+ bool ignore_val;
+
+ // If a garbage collection occurs between when we call
+ // js::GetUint8ArrayLengthAndData and return from gjs_string_from_utf8, a
+ // use-after-free corruption can occur if the garbage collector shifts the
+ // location of the Uint8Array's private data. To mitigate this we call
+ // js::GetUint8ArrayLengthAndData again and then compare if the length and
+ // pointer are still the same. If the pointers differ, we use the slow path
+ // to ensure no data corruption occurred. The shared-ness of an array cannot
+ // change between calls, so we ignore it.
+ js::GetUint8ArrayLengthAndData(uint8array, ¤t_len, &ignore_val,
+ ¤t_data);
+
+ // Ensure the private data hasn't changed
+ if (current_len == len && current_data == data)
+ return true;
+
+ // This was the UTF-8 optimized path, so we explicitly pass the encoding
+ return gjs_decode_from_uint8array_slow(cx, current_data, current_len,
+ "UTF-8", fatal, rval);
+}
+
+GJS_JSAPI_RETURN_CONVENTION
+static bool Decode(JSContext* cx, unsigned argc, JS::Value* vp) {
+ JS::CallArgs args = JS::CallArgsFromVp(argc, vp);
+ JS::UniqueChars encoding;
+ bool fatal = false;
+ JS::RootedObject uint8array(cx);
+
+ if (!gjs_parse_call_args(cx, "toString", args, "o|bs", "uint8array",
+ &uint8array, "fatal", &fatal, "encoding",
+ &encoding))
+ return false;
+
+ return gjs_decode_from_uint8array(cx, uint8array, encoding.get(), fatal,
+ args.rval());
+}
+
+/* fromString() function implementation */
+GJS_JSAPI_RETURN_CONVENTION
+bool gjs_encode_to_uint8array(JSContext* cx, JS::HandleString str,
+ const char* encoding,
+ JS::MutableHandleValue rval) {
+ bool encoding_is_utf8 = is_utf8_label(encoding);
+
+ JS::UniqueChars utf8 = JS_EncodeStringToUTF8(cx, str);
+ JS::RootedObject obj(cx), array_buffer(cx);
+
+ if (encoding_is_utf8) {
+ /* optimization? avoids iconv overhead and runs
+ * libmozjs hardwired utf16-to-utf8.
+ */
+ size_t len = strlen(utf8.get());
+ array_buffer = JS::NewArrayBufferWithContents(cx, len, utf8.release());
+ } else {
+ GError* error = nullptr;
+ char* encoded = nullptr;
+ gsize bytes_written;
+
+ /* Scope for AutoCheckCannotGC, will crash if a GC is triggered
+ * while we are using the string's chars */
+ {
+ JS::AutoCheckCannotGC nogc;
+ size_t len;
+
+ if (JS_StringHasLatin1Chars(str)) {
+ const JS::Latin1Char* chars =
+ JS_GetLatin1StringCharsAndLength(cx, nogc, str, &len);
+ if (chars == NULL)
+ return false;
+
+ encoded = g_convert(reinterpret_cast<const char*>(chars), len,
+ encoding, // to_encoding
+ "LATIN1", /* from_encoding */
+ NULL, /* bytes read */
+ &bytes_written, &error);
+ } else {
+ const char16_t* chars =
+ JS_GetTwoByteStringCharsAndLength(cx, nogc, str, &len);
+ if (chars == NULL)
+ return false;
+
+ encoded =
+ g_convert(reinterpret_cast<const char*>(chars), len * 2,
+ encoding, // to_encoding
+ "UTF-16", /* from_encoding */
+ NULL, /* bytes read */
+ &bytes_written, &error);
+ }
+ }
+
+ if (!encoded)
+ return gjs_throw_gerror_message(cx, error); // frees GError
+
+ array_buffer = JS::NewExternalArrayBuffer(
+ cx, bytes_written, encoded, gfree_arraybuffer_contents, nullptr);
+ }
+
+ if (!array_buffer)
+ return false;
+ obj = JS_NewUint8ArrayWithBuffer(cx, array_buffer, 0, -1);
+
+ rval.setObject(*obj);
+ return true;
+}
+
+GJS_JSAPI_RETURN_CONVENTION
+bool gjs_encode_into_uint8array(JSContext* cx, JS::HandleString str,
+ JS::HandleObject uint8array,
+ JS::MutableHandleValue rval) {
+ if (!JS_IsUint8Array(uint8array)) {
+ gjs_throw(
+ cx, "Argument to gjs_encode_into_uint8array must be a Uint8Array");
+ return false;
+ }
+
+ auto len = JS_GetTypedArrayByteLength(uint8array);
+ bool shared;
+
+ // TODO(ewlsh): Garbage collection cannot occur from here...
+ auto data =
+ JS_GetUint8ArrayData(uint8array, &shared, JS::AutoCheckCannotGC(cx));
+
+ if (shared) {
+ gjs_throw(cx, "Cannot encode data into shared memory.");
+ return false;
+ }
+
+ auto maybe = JS_EncodeStringToUTF8BufferPartial(
+ cx, str, mozilla::AsWritableChars(mozilla::Span(data, len)));
+ // ... to here
+
+ if (!maybe) {
+ JS_ReportOutOfMemory(cx);
+ return false;
+ }
+
+ size_t read, written;
+
+ mozilla::Tie(read, written) = *maybe;
+
+ g_assert(written <= len);
+
+ JS::RootedObject result(cx, JS_NewPlainObject(cx));
+ JS::RootedValue readv(cx, JS::NumberValue(read)),
+ writtenv(cx, JS::NumberValue(written));
+
+ if (!JS_SetProperty(cx, result, "read", readv) ||
+ !JS_SetProperty(cx, result, "written", writtenv)) {
+ return false;
+ }
+
+ rval.setObject(*result);
+ return true;
+}
+
+GJS_JSAPI_RETURN_CONVENTION
+static bool Encode(JSContext* cx, unsigned argc, JS::Value* vp) {
+ JS::CallArgs args = JS::CallArgsFromVp(argc, vp);
+ JS::UniqueChars encoding;
+ JS::UniqueChars utf8;
+
+ if (!gjs_parse_call_args(cx, "Encode", args, "s|s", "string", &utf8,
+ "encoding", &encoding))
+ return false;
+
+ if (!args[0].isString()) {
+ gjs_throw(cx, "First argument to encode() must be a string.");
+ return false;
+ }
+
+ JS::RootedString str(cx, args[0].toString());
+
+ return gjs_encode_to_uint8array(cx, str, encoding.get(), args.rval());
+}
+
+GJS_JSAPI_RETURN_CONVENTION
+static bool EncodeInto(JSContext* cx, unsigned argc, JS::Value* vp) {
+ JS::CallArgs args = JS::CallArgsFromVp(argc, vp);
+ JS::UniqueChars utf8;
+ JS::RootedObject uint8array(cx);
+
+ if (!gjs_parse_call_args(cx, "EncodeInto", args, "so", "string", &utf8,
+ "uint8array", &uint8array))
+ return false;
+
+ if (!args[0].isString()) {
+ gjs_throw(cx, "First argument to encode() must be a string.");
+ return false;
+ }
+
+ JS::RootedString str(cx, args[0].toString());
+
+ return gjs_encode_into_uint8array(cx, str, uint8array, args.rval());
+}
+
+static JSFunctionSpec gjs_text_encoding_module_funcs[] = {
+ JS_FN("encodeInto", EncodeInto, 2, 0), JS_FN("encode", Encode, 2, 0),
+ JS_FN("decode", Decode, 3, 0), JS_FS_END};
+
+bool gjs_define_text_encoding_stuff(JSContext* cx,
+ JS::MutableHandleObject module) {
+ module.set(JS_NewPlainObject(cx));
+ return JS_DefineFunctions(cx, module, gjs_text_encoding_module_funcs);
+}
diff --git a/gjs/textEncoding.h b/gjs/textEncoding.h
new file mode 100644
index 00000000..64e93205
--- /dev/null
+++ b/gjs/textEncoding.h
@@ -0,0 +1,32 @@
+/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; -*- */
+// SPDX-License-Identifier: MIT OR LGPL-2.0-or-later
+// SPDX-FileCopyrightText: 2020 Evan Welsh
+
+#ifndef GJS_TEXTENCODING_H_
+#define GJS_TEXTENCODING_H_
+
+#include <config.h>
+
+#include <stddef.h> // for size_t
+
+#include <glib.h>
+
+#include <js/TypeDecls.h>
+
+#include "gjs/macros.h"
+
+GJS_JSAPI_RETURN_CONVENTION
+bool gjs_decode_from_uint8array(JSContext* cx, JS::HandleObject uint8array,
+ const char* encoding, bool fatal,
+ JS::MutableHandleValue rval);
+
+GJS_JSAPI_RETURN_CONVENTION
+bool gjs_encode_to_uint8array(JSContext* cx, JS::HandleString str,
+ const char* encoding,
+ JS::MutableHandleValue rval);
+
+GJS_JSAPI_RETURN_CONVENTION
+bool gjs_define_text_encoding_stuff(JSContext* cx,
+ JS::MutableHandleObject module);
+
+#endif // GJS_TEXTENCODING_H_
diff --git a/installed-tests/js/meson.build b/installed-tests/js/meson.build
index 15b17d63..e3d0a88e 100644
--- a/installed-tests/js/meson.build
+++ b/installed-tests/js/meson.build
@@ -92,6 +92,7 @@ gimarshallingtests_typelib = gimarshallingtests_gir[1]
jasmine_tests = [
'self',
'ByteArray',
+ 'Encoding',
'Exceptions',
'Format',
'Fundamental',
diff --git a/installed-tests/js/testEncoding.js b/installed-tests/js/testEncoding.js
new file mode 100644
index 00000000..516ac591
--- /dev/null
+++ b/installed-tests/js/testEncoding.js
@@ -0,0 +1,1147 @@
+// SPDX-License-Identifier: MIT OR LGPL-2.0-or-later
+// SPDX-FileCopyrightText: Copyright 2018-2020 the Deno authors. All rights reserved.
+
+// Modified from https://github.com/denoland/deno/blob/master/op_crates/web/text_encoding_test.js
+// Data originally from https://encoding.spec.whatwg.org/encodings.json
+
+describe('Text Encoding', function () {
+ it('textDecoder', function () {
+ const fixture = new Uint8Array([
+ 0xf0,
+ 0x9d,
+ 0x93,
+ 0xbd,
+ 0xf0,
+ 0x9d,
+ 0x93,
+ 0xae,
+ 0xf0,
+ 0x9d,
+ 0x94,
+ 0x81,
+ 0xf0,
+ 0x9d,
+ 0x93,
+ 0xbd,
+ ]);
+ const decoder = new TextDecoder();
+ expect(decoder.decode(fixture)).toBe('𝓽𝓮𝔁𝓽');
+ });
+
+ it('textDecoderIgnoreBOM', function () {
+ const fixture = new Uint8Array([
+ 0xef,
+ 0xbb,
+ 0xbf,
+ 0xf0,
+ 0x9d,
+ 0x93,
+ 0xbd,
+ 0xf0,
+ 0x9d,
+ 0x93,
+ 0xae,
+ 0xf0,
+ 0x9d,
+ 0x94,
+ 0x81,
+ 0xf0,
+ 0x9d,
+ 0x93,
+ 0xbd,
+ ]);
+ const decoder = new TextDecoder('utf-8', {ignoreBOM: true});
+ expect(decoder.decode(fixture)).toBe('𝓽𝓮𝔁𝓽');
+ });
+
+ it('textDecoderNotBOM', function () {
+ const fixture = new Uint8Array([
+ 0xef,
+ 0xbb,
+ 0x89,
+ 0xf0,
+ 0x9d,
+ 0x93,
+ 0xbd,
+ 0xf0,
+ 0x9d,
+ 0x93,
+ 0xae,
+ 0xf0,
+ 0x9d,
+ 0x94,
+ 0x81,
+ 0xf0,
+ 0x9d,
+ 0x93,
+ 0xbd,
+ ]);
+ const decoder = new TextDecoder('utf-8', {ignoreBOM: true});
+ expect(decoder.decode(fixture)).toBe('ﻉ𝓽𝓮𝔁𝓽');
+ });
+
+ it('textDecoderASCII', function () {
+ const fixture = new Uint8Array([0x89, 0x95, 0x9f, 0xbf]);
+ const decoder = new TextDecoder('ascii');
+ expect(decoder.decode(fixture)).toBe('‰•Ÿ¿');
+ });
+
+ it('textDecoderErrorEncoding', function () {
+ expect(() => new TextDecoder('Foo')).toThrowError("Invalid encoding label: 'Foo'");
+ });
+
+ // GJS can handle 'gbk', Deno does not.
+ // it('textDecoderHandlesNotFoundInternalDecoder', function () {
+ // let didThrow = false;
+ // try {
+ // new TextDecoder("gbk");
+ // } catch (e) {
+ // didThrow = true;
+ // expect(e instanceof RangeError).toBe(true);
+ // }
+ // expect(didThrow).toBe(true);
+ // });
+
+ it('textDecoderHandlesUndefined', function () {
+ const fixture = undefined;
+ const decoder = new TextDecoder();
+ expect(decoder.decode(fixture)).toBe('');
+ });
+
+ it('textDecoderThrowsOnEmpty', function () {
+ const fixture = '';
+ const decoder = new TextDecoder();
+
+ expect(() => decoder.decode(fixture))
+ .toThrowError('Provided input cannot be converted to ArrayBufferView or ArrayBuffer');
+ });
+
+ it('textDecoderThrowsOnNull', function () {
+ const fixture = null;
+ const decoder = new TextDecoder();
+
+ expect(() => decoder.decode(fixture))
+ .toThrowError('Provided input cannot be converted to ArrayBufferView or ArrayBuffer');
+ });
+
+ it('textEncoder', function () {
+ const fixture = '𝓽𝓮𝔁𝓽';
+ const encoder = new TextEncoder();
+
+ expect(Array.from(encoder.encode(fixture))).toEqual([
+ 0xf0,
+ 0x9d,
+ 0x93,
+ 0xbd,
+ 0xf0,
+ 0x9d,
+ 0x93,
+ 0xae,
+ 0xf0,
+ 0x9d,
+ 0x94,
+ 0x81,
+ 0xf0,
+ 0x9d,
+ 0x93,
+ 0xbd,
+ ]);
+ });
+
+ it('textEncodeInto', function () {
+ const fixture = 'text';
+ const encoder = new TextEncoder();
+ const bytes = new Uint8Array(5);
+ const result = encoder.encodeInto(fixture, bytes);
+ expect(result.read).toBe(4);
+ expect(result.written).toBe(4);
+
+ expect(Array.from(bytes)).toEqual([0x74, 0x65, 0x78, 0x74, 0x00]);
+ });
+
+ it('textEncodeInto2', function () {
+ const fixture = '𝓽𝓮𝔁𝓽';
+ const encoder = new TextEncoder();
+ const bytes = new Uint8Array(17);
+ const result = encoder.encodeInto(fixture, bytes);
+ expect(result.read).toBe(8);
+ expect(result.written).toBe(16);
+
+ expect(Array.from(bytes)).toEqual([
+ 0xf0,
+ 0x9d,
+ 0x93,
+ 0xbd,
+ 0xf0,
+ 0x9d,
+ 0x93,
+ 0xae,
+ 0xf0,
+ 0x9d,
+ 0x94,
+ 0x81,
+ 0xf0,
+ 0x9d,
+ 0x93,
+ 0xbd,
+ 0x00,
+ ]);
+ });
+
+ it('textEncodeInto3', function () {
+ const fixture = '𝓽𝓮𝔁𝓽';
+ const encoder = new TextEncoder();
+ const bytes = new Uint8Array(5);
+ const result = encoder.encodeInto(fixture, bytes);
+ expect(result.read).toBe(2);
+ expect(result.written).toBe(4);
+
+ expect(Array.from(bytes)).toEqual([0xf0, 0x9d, 0x93, 0xbd, 0x00]);
+ });
+
+ // TODO(ewlsh): GJS doesn't support SharedArrayBuffer yet.
+
+ // it('textDecoderSharedUint8Array', function () {
+ // const ab = new SharedArrayBuffer(6);
+ // const dataView = new DataView(ab);
+ // const charCodeA = "A".charCodeAt(0);
+ // for (let i = 0; i < ab.byteLength; i++) {
+ // dataView.setUint8(i, charCodeA + i);
+ // }
+ // const ui8 = new Uint8Array(ab);
+ // const decoder = new TextDecoder();
+ // const actual = decoder.decode(ui8);
+ // expect(actual).toBe("ABCDEF");
+ // });
+ // it('textDecoderSharedInt32Array', function () {
+ // const ab = new SharedArrayBuffer(8);
+ // const dataView = new DataView(ab);
+ // const charCodeA = "A".charCodeAt(0);
+ // for (let i = 0; i < ab.byteLength; i++) {
+ // dataView.setUint8(i, charCodeA + i);
+ // }
+ // const i32 = new Int32Array(ab);
+ // const decoder = new TextDecoder();
+ // const actual = decoder.decode(i32);
+ // expect(actual).toBe("ABCDEFGH");
+ // });
+
+ it('toStringShouldBeWebCompatibility', function () {
+ const encoder = new TextEncoder();
+
+ expect(encoder.toString()).toBe('[object TextEncoder]');
+
+ const decoder = new TextDecoder();
+ expect(decoder.toString()).toBe('[object TextDecoder]');
+ });
+
+ it('singleByteEncodings', function () {
+ // Straight from https://encoding.spec.whatwg.org/encodings.json
+ const encodingsTable = [
+ {
+ encodings: [
+ {
+ labels: [
+ 'unicode-1-1-utf-8',
+ 'unicode11utf8',
+ 'unicode20utf8',
+ 'utf-8',
+ 'utf8',
+ 'x-unicode20utf8',
+ ],
+ name: 'UTF-8',
+ },
+ ],
+ heading: 'The Encoding',
+ },
+ {
+ encodings: [
+ {
+ labels: ['866', 'cp866', 'csibm866', 'ibm866'],
+ name: 'IBM866',
+ },
+ {
+ labels: [
+ 'csisolatin2',
+ 'iso-8859-2',
+ 'iso-ir-101',
+ 'iso8859-2',
+ 'iso88592',
+ 'iso_8859-2',
+ 'iso_8859-2:1987',
+ 'l2',
+ 'latin2',
+ ],
+ name: 'ISO-8859-2',
+ },
+ {
+ labels: [
+ 'csisolatin3',
+ 'iso-8859-3',
+ 'iso-ir-109',
+ 'iso8859-3',
+ 'iso88593',
+ 'iso_8859-3',
+ 'iso_8859-3:1988',
+ 'l3',
+ 'latin3',
+ ],
+ name: 'ISO-8859-3',
+ },
+ {
+ labels: [
+ 'csisolatin4',
+ 'iso-8859-4',
+ 'iso-ir-110',
+ 'iso8859-4',
+ 'iso88594',
+ 'iso_8859-4',
+ 'iso_8859-4:1988',
+ 'l4',
+ 'latin4',
+ ],
+ name: 'ISO-8859-4',
+ },
+ {
+ labels: [
+ 'csisolatincyrillic',
+ 'cyrillic',
+ 'iso-8859-5',
+ 'iso-ir-144',
+ 'iso8859-5',
+ 'iso88595',
+ 'iso_8859-5',
+ 'iso_8859-5:1988',
+ ],
+ name: 'ISO-8859-5',
+ },
+ {
+ labels: [
+ 'arabic',
+ 'asmo-708',
+ 'csiso88596e',
+ 'csiso88596i',
+ 'csisolatinarabic',
+ 'ecma-114',
+ 'iso-8859-6',
+ 'iso-8859-6-e',
+ 'iso-8859-6-i',
+ 'iso-ir-127',
+ 'iso8859-6',
+ 'iso88596',
+ 'iso_8859-6',
+ 'iso_8859-6:1987',
+ ],
+ name: 'ISO-8859-6',
+ },
+ {
+ labels: [
+ 'csisolatingreek',
+ 'ecma-118',
+ 'elot_928',
+ 'greek',
+ 'greek8',
+ 'iso-8859-7',
+ 'iso-ir-126',
+ 'iso8859-7',
+ 'iso88597',
+ 'iso_8859-7',
+ 'iso_8859-7:1987',
+ 'sun_eu_greek',
+ ],
+ name: 'ISO-8859-7',
+ },
+ {
+ labels: [
+ 'csiso88598e',
+ 'csisolatinhebrew',
+ 'hebrew',
+ 'iso-8859-8',
+ 'iso-8859-8-e',
+ 'iso-ir-138',
+ 'iso8859-8',
+ 'iso88598',
+ 'iso_8859-8',
+ 'iso_8859-8:1988',
+ 'visual',
+ ],
+ name: 'ISO-8859-8',
+ },
+ {
+ labels: ['csiso88598i', 'iso-8859-8-i', 'logical'],
+ name: 'ISO-8859-8-I',
+ },
+ {
+ labels: [
+ 'csisolatin6',
+ 'iso-8859-10',
+ 'iso-ir-157',
+ 'iso8859-10',
+ 'iso885910',
+ 'l6',
+ 'latin6',
+ ],
+ name: 'ISO-8859-10',
+ },
+ {
+ labels: ['iso-8859-13', 'iso8859-13', 'iso885913'],
+ name: 'ISO-8859-13',
+ },
+ {
+ labels: ['iso-8859-14', 'iso8859-14', 'iso885914'],
+ name: 'ISO-8859-14',
+ },
+ {
+ labels: [
+ 'csisolatin9',
+ 'iso-8859-15',
+ 'iso8859-15',
+ 'iso885915',
+ 'iso_8859-15',
+ 'l9',
+ ],
+ name: 'ISO-8859-15',
+ },
+ {
+ labels: ['iso-8859-16'],
+ name: 'ISO-8859-16',
+ },
+ {
+ labels: ['cskoi8r', 'koi', 'koi8', 'koi8-r', 'koi8_r'],
+ name: 'KOI8-R',
+ },
+ {
+ labels: ['koi8-ru', 'koi8-u'],
+ name: 'KOI8-U',
+ },
+ {
+ labels: ['csmacintosh', 'mac', 'macintosh', 'x-mac-roman'],
+ name: 'macintosh',
+ },
+ {
+ labels: [
+ 'dos-874',
+ 'iso-8859-11',
+ 'iso8859-11',
+ 'iso885911',
+ 'tis-620',
+ 'windows-874',
+ ],
+ name: 'windows-874',
+ },
+ {
+ labels: ['cp1250', 'windows-1250', 'x-cp1250'],
+ name: 'windows-1250',
+ },
+ {
+ labels: ['cp1251', 'windows-1251', 'x-cp1251'],
+ name: 'windows-1251',
+ },
+ {
+ labels: [
+ 'ansi_x3.4-1968',
+ 'ascii',
+ 'cp1252',
+ 'cp819',
+ 'csisolatin1',
+ 'ibm819',
+ 'iso-8859-1',
+ 'iso-ir-100',
+ 'iso8859-1',
+ 'iso88591',
+ 'iso_8859-1',
+ 'iso_8859-1:1987',
+ 'l1',
+ 'latin1',
+ 'us-ascii',
+ 'windows-1252',
+ 'x-cp1252',
+ ],
+ name: 'windows-1252',
+ },
+ {
+ labels: ['cp1253', 'windows-1253', 'x-cp1253'],
+ name: 'windows-1253',
+ },
+ {
+ labels: [
+ 'cp1254',
+ 'csisolatin5',
+ 'iso-8859-9',
+ 'iso-ir-148',
+ 'iso8859-9',
+ 'iso88599',
+ 'iso_8859-9',
+ 'iso_8859-9:1989',
+ 'l5',
+ 'latin5',
+ 'windows-1254',
+ 'x-cp1254',
+ ],
+ name: 'windows-1254',
+ },
+ {
+ labels: ['cp1255', 'windows-1255', 'x-cp1255'],
+ name: 'windows-1255',
+ },
+ {
+ labels: ['cp1256', 'windows-1256', 'x-cp1256'],
+ name: 'windows-1256',
+ },
+ {
+ labels: ['cp1257', 'windows-1257', 'x-cp1257'],
+ name: 'windows-1257',
+ },
+ {
+ labels: ['cp1258', 'windows-1258', 'x-cp1258'],
+ name: 'windows-1258',
+ },
+ {
+ labels: ['x-mac-cyrillic', 'x-mac-ukrainian'],
+ name: 'x-mac-cyrillic',
+ },
+ ],
+ heading: 'Legacy single-byte encodings',
+ },
+ {
+ encodings: [
+ {
+ labels: [
+ 'chinese',
+ 'csgb2312',
+ 'csiso58gb231280',
+ 'gb2312',
+ 'gb_2312',
+ 'gb_2312-80',
+ 'gbk',
+ 'iso-ir-58',
+ 'x-gbk',
+ ],
+ name: 'GBK',
+ },
+ {
+ labels: ['gb18030'],
+ name: 'gb18030',
+ },
+ ],
+ heading: 'Legacy multi-byte Chinese (simplified) encodings',
+ },
+ {
+ encodings: [
+ {
+ labels: ['big5', 'big5-hkscs', 'cn-big5', 'csbig5', 'x-x-big5'],
+ name: 'Big5',
+ },
+ ],
+ heading: 'Legacy multi-byte Chinese (traditional) encodings',
+ },
+ {
+ encodings: [
+ {
+ labels: ['cseucpkdfmtjapanese', 'euc-jp', 'x-euc-jp'],
+ name: 'EUC-JP',
+ },
+ {
+ labels: ['csiso2022jp', 'iso-2022-jp'],
+ name: 'ISO-2022-JP',
+ },
+ {
+ labels: [
+ 'csshiftjis',
+ 'ms932',
+ 'ms_kanji',
+ 'shift-jis',
+ 'shift_jis',
+ 'sjis',
+ 'windows-31j',
+ 'x-sjis',
+ ],
+ name: 'Shift_JIS',
+ },
+ ],
+ heading: 'Legacy multi-byte Japanese encodings',
+ },
+ {
+ encodings: [
+ {
+ labels: [
+ 'cseuckr',
+ 'csksc56011987',
+ 'euc-kr',
+ 'iso-ir-149',
+ 'korean',
+ 'ks_c_5601-1987',
+ 'ks_c_5601-1989',
+ 'ksc5601',
+ 'ksc_5601',
+ 'windows-949',
+ ],
+ name: 'EUC-KR',
+ },
+ ],
+ heading: 'Legacy multi-byte Korean encodings',
+ },
+ {
+ encodings: [
+ {
+ labels: [
+ 'csiso2022kr',
+ 'hz-gb-2312',
+ 'iso-2022-cn',
+ 'iso-2022-cn-ext',
+ 'iso-2022-kr',
+ 'replacement',
+ ],
+ name: 'replacement',
+ },
+ {
+ labels: ['unicodefffe', 'utf-16be'],
+ name: 'UTF-16BE',
+ },
+ {
+ labels: [
+ 'csunicode',
+ 'iso-10646-ucs-2',
+ 'ucs-2',
+ 'unicode',
+ 'unicodefeff',
+ 'utf-16',
+ 'utf-16le',
+ ],
+ name: 'UTF-16LE',
+ },
+ {
+ labels: ['x-user-defined'],
+ name: 'x-user-defined',
+ },
+ ],
+ heading: 'Legacy miscellaneous encodings',
+ },
+ ];
+
+ const singleByteEncodings = encodingsTable.filter(group => {
+ return group.heading === 'Legacy single-byte encodings';
+ })[0].encodings;
+
+ // https://encoding.spec.whatwg.org/indexes.json
+ const singleByteIndexes = {
+ 'IBM866': [
+ 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047,
+ 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055,
+ 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063,
+ 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071,
+ 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079,
+ 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087,
+ 9617, 9618, 9619, 9474, 9508, 9569, 9570, 9558,
+ 9557, 9571, 9553, 9559, 9565, 9564, 9563, 9488,
+ 9492, 9524, 9516, 9500, 9472, 9532, 9566, 9567,
+ 9562, 9556, 9577, 9574, 9568, 9552, 9580, 9575,
+ 9576, 9572, 9573, 9561, 9560, 9554, 9555, 9579,
+ 9578, 9496, 9484, 9608, 9604, 9612, 9616, 9600,
+ 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095,
+ 1096, 1097, 1098, 1099, 1100, 1101, 1102, 1103,
+ 1025, 1105, 1028, 1108, 1031, 1111, 1038, 1118,
+ 176, 8729, 183, 8730, 8470, 164, 9632, 160,
+ ],
+ 'ISO-8859-2': [
+ 128, 129, 130, 131, 132, 133, 134, 135,
+ 136, 137, 138, 139, 140, 141, 142, 143,
+ 144, 145, 146, 147, 148, 149, 150, 151,
+ 152, 153, 154, 155, 156, 157, 158, 159,
+ 160, 260, 728, 321, 164, 317, 346, 167,
+ 168, 352, 350, 356, 377, 173, 381, 379,
+ 176, 261, 731, 322, 180, 318, 347, 711,
+ 184, 353, 351, 357, 378, 733, 382, 380,
+ 340, 193, 194, 258, 196, 313, 262, 199,
+ 268, 201, 280, 203, 282, 205, 206, 270,
+ 272, 323, 327, 211, 212, 336, 214, 215,
+ 344, 366, 218, 368, 220, 221, 354, 223,
+ 341, 225, 226, 259, 228, 314, 263, 231,
+ 269, 233, 281, 235, 283, 237, 238, 271,
+ 273, 324, 328, 243, 244, 337, 246, 247,
+ 345, 367, 250, 369, 252, 253, 355, 729,
+ ],
+ 'ISO-8859-3': [
+ 128, 129, 130, 131, 132, 133, 134, 135,
+ 136, 137, 138, 139, 140, 141, 142, 143,
+ 144, 145, 146, 147, 148, 149, 150, 151,
+ 152, 153, 154, 155, 156, 157, 158, 159,
+ 160, 294, 728, 163, 164, null, 292, 167,
+ 168, 304, 350, 286, 308, 173, null, 379,
+ 176, 295, 178, 179, 180, 181, 293, 183,
+ 184, 305, 351, 287, 309, 189, null, 380,
+ 192, 193, 194, null, 196, 266, 264, 199,
+ 200, 201, 202, 203, 204, 205, 206, 207,
+ null, 209, 210, 211, 212, 288, 214, 215,
+ 284, 217, 218, 219, 220, 364, 348, 223,
+ 224, 225, 226, null, 228, 267, 265, 231,
+ 232, 233, 234, 235, 236, 237, 238, 239,
+ null, 241, 242, 243, 244, 289, 246, 247,
+ 285, 249, 250, 251, 252, 365, 349, 729,
+ ],
+ 'ISO-8859-4': [
+ 128, 129, 130, 131, 132, 133, 134, 135,
+ 136, 137, 138, 139, 140, 141, 142, 143,
+ 144, 145, 146, 147, 148, 149, 150, 151,
+ 152, 153, 154, 155, 156, 157, 158, 159,
+ 160, 260, 312, 342, 164, 296, 315, 167,
+ 168, 352, 274, 290, 358, 173, 381, 175,
+ 176, 261, 731, 343, 180, 297, 316, 711,
+ 184, 353, 275, 291, 359, 330, 382, 331,
+ 256, 193, 194, 195, 196, 197, 198, 302,
+ 268, 201, 280, 203, 278, 205, 206, 298,
+ 272, 325, 332, 310, 212, 213, 214, 215,
+ 216, 370, 218, 219, 220, 360, 362, 223,
+ 257, 225, 226, 227, 228, 229, 230, 303,
+ 269, 233, 281, 235, 279, 237, 238, 299,
+ 273, 326, 333, 311, 244, 245, 246, 247,
+ 248, 371, 250, 251, 252, 361, 363, 729,
+ ],
+ 'ISO-8859-5': [
+ 128, 129, 130, 131, 132, 133, 134, 135,
+ 136, 137, 138, 139, 140, 141, 142, 143,
+ 144, 145, 146, 147, 148, 149, 150, 151,
+ 152, 153, 154, 155, 156, 157, 158, 159,
+ 160, 1025, 1026, 1027, 1028, 1029, 1030, 1031,
+ 1032, 1033, 1034, 1035, 1036, 173, 1038, 1039,
+ 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047,
+ 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055,
+ 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063,
+ 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071,
+ 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079,
+ 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087,
+ 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095,
+ 1096, 1097, 1098, 1099, 1100, 1101, 1102, 1103,
+ 8470, 1105, 1106, 1107, 1108, 1109, 1110, 1111,
+ 1112, 1113, 1114, 1115, 1116, 167, 1118, 1119,
+ ],
+ 'ISO-8859-6': [
+ 128, 129, 130, 131, 132, 133, 134, 135,
+ 136, 137, 138, 139, 140, 141, 142, 143,
+ 144, 145, 146, 147, 148, 149, 150, 151,
+ 152, 153, 154, 155, 156, 157, 158, 159,
+ 160, null, null, null, 164, null, null, null,
+ null, null, null, null, 1548, 173, null, null,
+ null, null, null, null, null, null, null, null,
+ null, null, null, 1563, null, null, null, 1567,
+ null, 1569, 1570, 1571, 1572, 1573, 1574, 1575,
+ 1576, 1577, 1578, 1579, 1580, 1581, 1582, 1583,
+ 1584, 1585, 1586, 1587, 1588, 1589, 1590, 1591,
+ 1592, 1593, 1594, null, null, null, null, null,
+ 1600, 1601, 1602, 1603, 1604, 1605, 1606, 1607,
+ 1608, 1609, 1610, 1611, 1612, 1613, 1614, 1615,
+ 1616, 1617, 1618, null, null, null, null, null,
+ null, null, null, null, null, null, null, null,
+ ],
+ 'ISO-8859-7': [
+ 128, 129, 130, 131, 132, 133, 134, 135,
+ 136, 137, 138, 139, 140, 141, 142, 143,
+ 144, 145, 146, 147, 148, 149, 150, 151,
+ 152, 153, 154, 155, 156, 157, 158, 159,
+ 160, 8216, 8217, 163, 8364, 8367, 166, 167,
+ 168, 169, 890, 171, 172, 173, null, 8213,
+ 176, 177, 178, 179, 900, 901, 902, 183,
+ 904, 905, 906, 187, 908, 189, 910, 911,
+ 912, 913, 914, 915, 916, 917, 918, 919,
+ 920, 921, 922, 923, 924, 925, 926, 927,
+ 928, 929, null, 931, 932, 933, 934, 935,
+ 936, 937, 938, 939, 940, 941, 942, 943,
+ 944, 945, 946, 947, 948, 949, 950, 951,
+ 952, 953, 954, 955, 956, 957, 958, 959,
+ 960, 961, 962, 963, 964, 965, 966, 967,
+ 968, 969, 970, 971, 972, 973, 974, null,
+ ],
+ 'ISO-8859-8': [
+ 128, 129, 130, 131, 132, 133, 134, 135,
+ 136, 137, 138, 139, 140, 141, 142, 143,
+ 144, 145, 146, 147, 148, 149, 150, 151,
+ 152, 153, 154, 155, 156, 157, 158, 159,
+ 160, null, 162, 163, 164, 165, 166, 167,
+ 168, 169, 215, 171, 172, 173, 174, 175,
+ 176, 177, 178, 179, 180, 181, 182, 183,
+ 184, 185, 247, 187, 188, 189, 190, null,
+ null, null, null, null, null, null, null, null,
+ null, null, null, null, null, null, null, null,
+ null, null, null, null, null, null, null, null,
+ null, null, null, null, null, null, null, 8215,
+ 1488, 1489, 1490, 1491, 1492, 1493, 1494, 1495,
+ 1496, 1497, 1498, 1499, 1500, 1501, 1502, 1503,
+ 1504, 1505, 1506, 1507, 1508, 1509, 1510, 1511,
+ 1512, 1513, 1514, null, null, 8206, 8207, null,
+ ],
+ 'ISO-8859-10': [
+ 128, 129, 130, 131, 132, 133, 134, 135,
+ 136, 137, 138, 139, 140, 141, 142, 143,
+ 144, 145, 146, 147, 148, 149, 150, 151,
+ 152, 153, 154, 155, 156, 157, 158, 159,
+ 160, 260, 274, 290, 298, 296, 310, 167,
+ 315, 272, 352, 358, 381, 173, 362, 330,
+ 176, 261, 275, 291, 299, 297, 311, 183,
+ 316, 273, 353, 359, 382, 8213, 363, 331,
+ 256, 193, 194, 195, 196, 197, 198, 302,
+ 268, 201, 280, 203, 278, 205, 206, 207,
+ 208, 325, 332, 211, 212, 213, 214, 360,
+ 216, 370, 218, 219, 220, 221, 222, 223,
+ 257, 225, 226, 227, 228, 229, 230, 303,
+ 269, 233, 281, 235, 279, 237, 238, 239,
+ 240, 326, 333, 243, 244, 245, 246, 361,
+ 248, 371, 250, 251, 252, 253, 254, 312,
+ ],
+ 'ISO-8859-13': [
+ 128, 129, 130, 131, 132, 133, 134, 135,
+ 136, 137, 138, 139, 140, 141, 142, 143,
+ 144, 145, 146, 147, 148, 149, 150, 151,
+ 152, 153, 154, 155, 156, 157, 158, 159,
+ 160, 8221, 162, 163, 164, 8222, 166, 167,
+ 216, 169, 342, 171, 172, 173, 174, 198,
+ 176, 177, 178, 179, 8220, 181, 182, 183,
+ 248, 185, 343, 187, 188, 189, 190, 230,
+ 260, 302, 256, 262, 196, 197, 280, 274,
+ 268, 201, 377, 278, 290, 310, 298, 315,
+ 352, 323, 325, 211, 332, 213, 214, 215,
+ 370, 321, 346, 362, 220, 379, 381, 223,
+ 261, 303, 257, 263, 228, 229, 281, 275,
+ 269, 233, 378, 279, 291, 311, 299, 316,
+ 353, 324, 326, 243, 333, 245, 246, 247,
+ 371, 322, 347, 363, 252, 380, 382, 8217,
+ ],
+ 'ISO-8859-14': [
+ 128, 129, 130, 131, 132, 133, 134, 135,
+ 136, 137, 138, 139, 140, 141, 142, 143,
+ 144, 145, 146, 147, 148, 149, 150, 151,
+ 152, 153, 154, 155, 156, 157, 158, 159,
+ 160, 7682, 7683, 163, 266, 267, 7690, 167,
+ 7808, 169, 7810, 7691, 7922, 173, 174, 376,
+ 7710, 7711, 288, 289, 7744, 7745, 182, 7766,
+ 7809, 7767, 7811, 7776, 7923, 7812, 7813, 7777,
+ 192, 193, 194, 195, 196, 197, 198, 199,
+ 200, 201, 202, 203, 204, 205, 206, 207,
+ 372, 209, 210, 211, 212, 213, 214, 7786,
+ 216, 217, 218, 219, 220, 221, 374, 223,
+ 224, 225, 226, 227, 228, 229, 230, 231,
+ 232, 233, 234, 235, 236, 237, 238, 239,
+ 373, 241, 242, 243, 244, 245, 246, 7787,
+ 248, 249, 250, 251, 252, 253, 375, 255,
+ ],
+ 'ISO-8859-15': [
+ 128, 129, 130, 131, 132, 133, 134, 135,
+ 136, 137, 138, 139, 140, 141, 142, 143,
+ 144, 145, 146, 147, 148, 149, 150, 151,
+ 152, 153, 154, 155, 156, 157, 158, 159,
+ 160, 161, 162, 163, 8364, 165, 352, 167,
+ 353, 169, 170, 171, 172, 173, 174, 175,
+ 176, 177, 178, 179, 381, 181, 182, 183,
+ 382, 185, 186, 187, 338, 339, 376, 191,
+ 192, 193, 194, 195, 196, 197, 198, 199,
+ 200, 201, 202, 203, 204, 205, 206, 207,
+ 208, 209, 210, 211, 212, 213, 214, 215,
+ 216, 217, 218, 219, 220, 221, 222, 223,
+ 224, 225, 226, 227, 228, 229, 230, 231,
+ 232, 233, 234, 235, 236, 237, 238, 239,
+ 240, 241, 242, 243, 244, 245, 246, 247,
+ 248, 249, 250, 251, 252, 253, 254, 255,
+ ],
+ 'ISO-8859-16': [
+ 128, 129, 130, 131, 132, 133, 134, 135,
+ 136, 137, 138, 139, 140, 141, 142, 143,
+ 144, 145, 146, 147, 148, 149, 150, 151,
+ 152, 153, 154, 155, 156, 157, 158, 159,
+ 160, 260, 261, 321, 8364, 8222, 352, 167,
+ 353, 169, 536, 171, 377, 173, 378, 379,
+ 176, 177, 268, 322, 381, 8221, 182, 183,
+ 382, 269, 537, 187, 338, 339, 376, 380,
+ 192, 193, 194, 258, 196, 262, 198, 199,
+ 200, 201, 202, 203, 204, 205, 206, 207,
+ 272, 323, 210, 211, 212, 336, 214, 346,
+ 368, 217, 218, 219, 220, 280, 538, 223,
+ 224, 225, 226, 259, 228, 263, 230, 231,
+ 232, 233, 234, 235, 236, 237, 238, 239,
+ 273, 324, 242, 243, 244, 337, 246, 347,
+ 369, 249, 250, 251, 252, 281, 539, 255,
+ ],
+ 'KOI8-R': [
+ 9472, 9474, 9484, 9488, 9492, 9496, 9500, 9508,
+ 9516, 9524, 9532, 9600, 9604, 9608, 9612, 9616,
+ 9617, 9618, 9619, 8992, 9632, 8729, 8730, 8776,
+ 8804, 8805, 160, 8993, 176, 178, 183, 247,
+ 9552, 9553, 9554, 1105, 9555, 9556, 9557, 9558,
+ 9559, 9560, 9561, 9562, 9563, 9564, 9565, 9566,
+ 9567, 9568, 9569, 1025, 9570, 9571, 9572, 9573,
+ 9574, 9575, 9576, 9577, 9578, 9579, 9580, 169,
+ 1102, 1072, 1073, 1094, 1076, 1077, 1092, 1075,
+ 1093, 1080, 1081, 1082, 1083, 1084, 1085, 1086,
+ 1087, 1103, 1088, 1089, 1090, 1091, 1078, 1074,
+ 1100, 1099, 1079, 1096, 1101, 1097, 1095, 1098,
+ 1070, 1040, 1041, 1062, 1044, 1045, 1060, 1043,
+ 1061, 1048, 1049, 1050, 1051, 1052, 1053, 1054,
+ 1055, 1071, 1056, 1057, 1058, 1059, 1046, 1042,
+ 1068, 1067, 1047, 1064, 1069, 1065, 1063, 1066,
+ ],
+ 'KOI8-U': [
+ 9472, 9474, 9484, 9488, 9492, 9496, 9500, 9508,
+ 9516, 9524, 9532, 9600, 9604, 9608, 9612, 9616,
+ 9617, 9618, 9619, 8992, 9632, 8729, 8730, 8776,
+ 8804, 8805, 160, 8993, 176, 178, 183, 247,
+ 9552, 9553, 9554, 1105, 1108, 9556, 1110, 1111,
+ 9559, 9560, 9561, 9562, 9563, 1169, 1118, 9566,
+ 9567, 9568, 9569, 1025, 1028, 9571, 1030, 1031,
+ 9574, 9575, 9576, 9577, 9578, 1168, 1038, 169,
+ 1102, 1072, 1073, 1094, 1076, 1077, 1092, 1075,
+ 1093, 1080, 1081, 1082, 1083, 1084, 1085, 1086,
+ 1087, 1103, 1088, 1089, 1090, 1091, 1078, 1074,
+ 1100, 1099, 1079, 1096, 1101, 1097, 1095, 1098,
+ 1070, 1040, 1041, 1062, 1044, 1045, 1060, 1043,
+ 1061, 1048, 1049, 1050, 1051, 1052, 1053, 1054,
+ 1055, 1071, 1056, 1057, 1058, 1059, 1046, 1042,
+ 1068, 1067, 1047, 1064, 1069, 1065, 1063, 1066,
+ ],
+ 'macintosh': [
+ 196, 197, 199, 201, 209, 214, 220, 225,
+ 224, 226, 228, 227, 229, 231, 233, 232,
+ 234, 235, 237, 236, 238, 239, 241, 243,
+ 242, 244, 246, 245, 250, 249, 251, 252,
+ 8224, 176, 162, 163, 167, 8226, 182, 223,
+ 174, 169, 8482, 180, 168, 8800, 198, 216,
+ 8734, 177, 8804, 8805, 165, 181, 8706, 8721,
+ 8719, 960, 8747, 170, 186, 937, 230, 248,
+ 191, 161, 172, 8730, 402, 8776, 8710, 171,
+ 187, 8230, 160, 192, 195, 213, 338, 339,
+ 8211, 8212, 8220, 8221, 8216, 8217, 247, 9674,
+ 255, 376, 8260, 8364, 8249, 8250, 64257, 64258,
+ 8225, 183, 8218, 8222, 8240, 194, 202, 193,
+ 203, 200, 205, 206, 207, 204, 211, 212,
+ 63743, 210, 218, 219, 217, 305, 710, 732,
+ 175, 728, 729, 730, 184, 733, 731, 711,
+ ],
+ 'windows-874': [
+ 8364, 129, 130, 131, 132, 8230, 134, 135,
+ 136, 137, 138, 139, 140, 141, 142, 143,
+ 144, 8216, 8217, 8220, 8221, 8226, 8211, 8212,
+ 152, 153, 154, 155, 156, 157, 158, 159,
+ 160, 3585, 3586, 3587, 3588, 3589, 3590, 3591,
+ 3592, 3593, 3594, 3595, 3596, 3597, 3598, 3599,
+ 3600, 3601, 3602, 3603, 3604, 3605, 3606, 3607,
+ 3608, 3609, 3610, 3611, 3612, 3613, 3614, 3615,
+ 3616, 3617, 3618, 3619, 3620, 3621, 3622, 3623,
+ 3624, 3625, 3626, 3627, 3628, 3629, 3630, 3631,
+ 3632, 3633, 3634, 3635, 3636, 3637, 3638, 3639,
+ 3640, 3641, 3642, null, null, null, null, 3647,
+ 3648, 3649, 3650, 3651, 3652, 3653, 3654, 3655,
+ 3656, 3657, 3658, 3659, 3660, 3661, 3662, 3663,
+ 3664, 3665, 3666, 3667, 3668, 3669, 3670, 3671,
+ 3672, 3673, 3674, 3675, null, null, null, null,
+ ],
+ 'windows-1250': [
+ 8364, 129, 8218, 131, 8222, 8230, 8224, 8225,
+ 136, 8240, 352, 8249, 346, 356, 381, 377,
+ 144, 8216, 8217, 8220, 8221, 8226, 8211, 8212,
+ 152, 8482, 353, 8250, 347, 357, 382, 378,
+ 160, 711, 728, 321, 164, 260, 166, 167,
+ 168, 169, 350, 171, 172, 173, 174, 379,
+ 176, 177, 731, 322, 180, 181, 182, 183,
+ 184, 261, 351, 187, 317, 733, 318, 380,
+ 340, 193, 194, 258, 196, 313, 262, 199,
+ 268, 201, 280, 203, 282, 205, 206, 270,
+ 272, 323, 327, 211, 212, 336, 214, 215,
+ 344, 366, 218, 368, 220, 221, 354, 223,
+ 341, 225, 226, 259, 228, 314, 263, 231,
+ 269, 233, 281, 235, 283, 237, 238, 271,
+ 273, 324, 328, 243, 244, 337, 246, 247,
+ 345, 367, 250, 369, 252, 253, 355, 729,
+ ],
+ 'windows-1251': [
+ 1026, 1027, 8218, 1107, 8222, 8230, 8224, 8225,
+ 8364, 8240, 1033, 8249, 1034, 1036, 1035, 1039,
+ 1106, 8216, 8217, 8220, 8221, 8226, 8211, 8212,
+ 152, 8482, 1113, 8250, 1114, 1116, 1115, 1119,
+ 160, 1038, 1118, 1032, 164, 1168, 166, 167,
+ 1025, 169, 1028, 171, 172, 173, 174, 1031,
+ 176, 177, 1030, 1110, 1169, 181, 182, 183,
+ 1105, 8470, 1108, 187, 1112, 1029, 1109, 1111,
+ 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047,
+ 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055,
+ 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063,
+ 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071,
+ 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079,
+ 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087,
+ 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095,
+ 1096, 1097, 1098, 1099, 1100, 1101, 1102, 1103,
+ ],
+ 'windows-1252': [
+ 8364, 129, 8218, 402, 8222, 8230, 8224, 8225,
+ 710, 8240, 352, 8249, 338, 141, 381, 143,
+ 144, 8216, 8217, 8220, 8221, 8226, 8211, 8212,
+ 732, 8482, 353, 8250, 339, 157, 382, 376,
+ 160, 161, 162, 163, 164, 165, 166, 167,
+ 168, 169, 170, 171, 172, 173, 174, 175,
+ 176, 177, 178, 179, 180, 181, 182, 183,
+ 184, 185, 186, 187, 188, 189, 190, 191,
+ 192, 193, 194, 195, 196, 197, 198, 199,
+ 200, 201, 202, 203, 204, 205, 206, 207,
+ 208, 209, 210, 211, 212, 213, 214, 215,
+ 216, 217, 218, 219, 220, 221, 222, 223,
+ 224, 225, 226, 227, 228, 229, 230, 231,
+ 232, 233, 234, 235, 236, 237, 238, 239,
+ 240, 241, 242, 243, 244, 245, 246, 247,
+ 248, 249, 250, 251, 252, 253, 254, 255,
+ ],
+ 'windows-1253': [
+ 8364, 129, 8218, 402, 8222, 8230, 8224, 8225,
+ 136, 8240, 138, 8249, 140, 141, 142, 143,
+ 144, 8216, 8217, 8220, 8221, 8226, 8211, 8212,
+ 152, 8482, 154, 8250, 156, 157, 158, 159,
+ 160, 901, 902, 163, 164, 165, 166, 167,
+ 168, 169, null, 171, 172, 173, 174, 8213,
+ 176, 177, 178, 179, 900, 181, 182, 183,
+ 904, 905, 906, 187, 908, 189, 910, 911,
+ 912, 913, 914, 915, 916, 917, 918, 919,
+ 920, 921, 922, 923, 924, 925, 926, 927,
+ 928, 929, null, 931, 932, 933, 934, 935,
+ 936, 937, 938, 939, 940, 941, 942, 943,
+ 944, 945, 946, 947, 948, 949, 950, 951,
+ 952, 953, 954, 955, 956, 957, 958, 959,
+ 960, 961, 962, 963, 964, 965, 966, 967,
+ 968, 969, 970, 971, 972, 973, 974, null,
+ ],
+ 'windows-1254': [
+ 8364, 129, 8218, 402, 8222, 8230, 8224, 8225,
+ 710, 8240, 352, 8249, 338, 141, 142, 143,
+ 144, 8216, 8217, 8220, 8221, 8226, 8211, 8212,
+ 732, 8482, 353, 8250, 339, 157, 158, 376,
+ 160, 161, 162, 163, 164, 165, 166, 167,
+ 168, 169, 170, 171, 172, 173, 174, 175,
+ 176, 177, 178, 179, 180, 181, 182, 183,
+ 184, 185, 186, 187, 188, 189, 190, 191,
+ 192, 193, 194, 195, 196, 197, 198, 199,
+ 200, 201, 202, 203, 204, 205, 206, 207,
+ 286, 209, 210, 211, 212, 213, 214, 215,
+ 216, 217, 218, 219, 220, 304, 350, 223,
+ 224, 225, 226, 227, 228, 229, 230, 231,
+ 232, 233, 234, 235, 236, 237, 238, 239,
+ 287, 241, 242, 243, 244, 245, 246, 247,
+ 248, 249, 250, 251, 252, 305, 351, 255,
+ ],
+ 'windows-1255': [
+ 8364, 129, 8218, 402, 8222, 8230, 8224, 8225,
+ 710, 8240, 138, 8249, 140, 141, 142, 143,
+ 144, 8216, 8217, 8220, 8221, 8226, 8211, 8212,
+ 732, 8482, 154, 8250, 156, 157, 158, 159,
+ 160, 161, 162, 163, 8362, 165, 166, 167,
+ 168, 169, 215, 171, 172, 173, 174, 175,
+ 176, 177, 178, 179, 180, 181, 182, 183,
+ 184, 185, 247, 187, 188, 189, 190, 191,
+ 1456, 1457, 1458, 1459, 1460, 1461, 1462, 1463,
+ 1464, 1465, 1466, 1467, 1468, 1469, 1470, 1471,
+ 1472, 1473, 1474, 1475, 1520, 1521, 1522, 1523,
+ 1524, null, null, null, null, null, null, null,
+ 1488, 1489, 1490, 1491, 1492, 1493, 1494, 1495,
+ 1496, 1497, 1498, 1499, 1500, 1501, 1502, 1503,
+ 1504, 1505, 1506, 1507, 1508, 1509, 1510, 1511,
+ 1512, 1513, 1514, null, null, 8206, 8207, null,
+ ],
+ 'windows-1256': [
+ 8364, 1662, 8218, 402, 8222, 8230, 8224, 8225,
+ 710, 8240, 1657, 8249, 338, 1670, 1688, 1672,
+ 1711, 8216, 8217, 8220, 8221, 8226, 8211, 8212,
+ 1705, 8482, 1681, 8250, 339, 8204, 8205, 1722,
+ 160, 1548, 162, 163, 164, 165, 166, 167,
+ 168, 169, 1726, 171, 172, 173, 174, 175,
+ 176, 177, 178, 179, 180, 181, 182, 183,
+ 184, 185, 1563, 187, 188, 189, 190, 1567,
+ 1729, 1569, 1570, 1571, 1572, 1573, 1574, 1575,
+ 1576, 1577, 1578, 1579, 1580, 1581, 1582, 1583,
+ 1584, 1585, 1586, 1587, 1588, 1589, 1590, 215,
+ 1591, 1592, 1593, 1594, 1600, 1601, 1602, 1603,
+ 224, 1604, 226, 1605, 1606, 1607, 1608, 231,
+ 232, 233, 234, 235, 1609, 1610, 238, 239,
+ 1611, 1612, 1613, 1614, 244, 1615, 1616, 247,
+ 1617, 249, 1618, 251, 252, 8206, 8207, 1746,
+ ],
+ 'windows-1257': [
+ 8364, 129, 8218, 131, 8222, 8230, 8224, 8225,
+ 136, 8240, 138, 8249, 140, 168, 711, 184, 144,
+ 8216, 8217, 8220, 8221, 8226, 8211, 8212, 152,
+ 8482, 154, 8250, 156, 175, 731, 159, 160,
+ null, 162, 163, 164, null, 166, 167, 216,
+ 169, 342, 171, 172, 173, 174, 198, 176,
+ 177, 178, 179, 180, 181, 182, 183, 248,
+ 185, 343, 187, 188, 189, 190, 230, 260,
+ 302, 256, 262, 196, 197, 280, 274, 268,
+ 201, 377, 278, 290, 310, 298, 315, 352,
+ 323, 325, 211, 332, 213, 214, 215, 370,
+ 321, 346, 362, 220, 379, 381, 223, 261,
+ 303, 257, 263, 228, 229, 281, 275, 269,
+ 233, 378, 279, 291, 311, 299, 316, 353,
+ 324, 326, 243, 333, 245, 246, 247, 371,
+ 322, 347, 363, 252, 380, 382, 729,
+ ],
+ 'windows-1258': [
+ 8364, 129, 8218, 402, 8222, 8230, 8224, 8225,
+ 710, 8240, 138, 8249, 338, 141, 142, 143,
+ 144, 8216, 8217, 8220, 8221, 8226, 8211, 8212,
+ 732, 8482, 154, 8250, 339, 157, 158, 376,
+ 160, 161, 162, 163, 164, 165, 166, 167,
+ 168, 169, 170, 171, 172, 173, 174, 175,
+ 176, 177, 178, 179, 180, 181, 182, 183,
+ 184, 185, 186, 187, 188, 189, 190, 191,
+ 192, 193, 194, 258, 196, 197, 198, 199,
+ 200, 201, 202, 203, 768, 205, 206, 207,
+ 272, 209, 777, 211, 212, 416, 214, 215,
+ 216, 217, 218, 219, 220, 431, 771, 223,
+ 224, 225, 226, 259, 228, 229, 230, 231,
+ 232, 233, 234, 235, 769, 237, 238, 239,
+ 273, 241, 803, 243, 244, 417, 246, 247,
+ 248, 249, 250, 251, 252, 432, 8363, 255,
+ ],
+ 'x-mac-cyrillic': [
+ 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047,
+ 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055,
+ 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063,
+ 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071,
+ 8224, 176, 1168, 163, 167, 8226, 182, 1030,
+ 174, 169, 8482, 1026, 1106, 8800, 1027, 1107,
+ 8734, 177, 8804, 8805, 1110, 181, 1169, 1032,
+ 1028, 1108, 1031, 1111, 1033, 1113, 1034, 1114,
+ 1112, 1029, 172, 8730, 402, 8776, 8710, 171,
+ 187, 8230, 160, 1035, 1115, 1036, 1116, 1109,
+ 8211, 8212, 8220, 8221, 8216, 8217, 247, 8222,
+ 1038, 1118, 1039, 1119, 8470, 1025, 1105, 1103,
+ 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079,
+ 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087,
+ 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095,
+ 1096, 1097, 1098, 1099, 1100, 1101, 1102, 8364,
+ ],
+ };
+
+ function assertDecode(data, encoding) {
+ for (let i = 0, l = data.length; i < l; i++) {
+ const cp = data.charCodeAt(i);
+ let expectedCp = i < 0x80 ? i : singleByteIndexes[encoding][i - 0x80];
+ if (expectedCp === null)
+ expectedCp = 0xfffd;
+
+ // assertEquals(cp, expectedCp, encoding + ":" + i);
+ expect(cp).toBe(expectedCp);
+ }
+ }
+ const buffer = new ArrayBuffer(255);
+ const view = new Uint8Array(buffer);
+
+ for (let i = 0, l = view.byteLength; i < l; i++)
+ view[i] = i;
+
+
+ for (let i = 0, l = singleByteEncodings.length; i < l; i++) {
+ const encoding = singleByteEncodings[i];
+ for (let i2 = 0, l2 = encoding.labels.length; i2 < l2; i2++) {
+ const label = encoding.labels[i2];
+ const decoder = new TextDecoder(label);
+
+ const data = decoder.decode(view);
+
+ expect(decoder.encoding).toBe(encoding.name.toLowerCase());
+ assertDecode(data, encoding.name);
+ }
+ }
+ });
+});
diff --git a/js.gresource.xml b/js.gresource.xml
index fc55e597..a0b37730 100644
--- a/js.gresource.xml
+++ b/js.gresource.xml
@@ -42,8 +42,10 @@
<file>modules/core/_cairo.js</file>
<file>modules/core/_common.js</file>
+ <file>modules/core/_encodings.js</file>
<file>modules/core/_format.js</file>
<file>modules/core/_gettext.js</file>
<file>modules/core/_signals.js</file>
+ <file>modules/core/_text.js</file>
</gresource>
</gresources>
diff --git a/meson.build b/meson.build
index a828a24f..a75bca8a 100644
--- a/meson.build
+++ b/meson.build
@@ -409,6 +409,7 @@ libgjs_sources = [
'gjs/native.cpp', 'gjs/native.h',
'gjs/objectbox.cpp', 'gjs/objectbox.h',
'gjs/profiler.cpp', 'gjs/profiler-private.h',
+ 'gjs/textEncoding.cpp', 'gjs/textEncoding.h',
'gjs/stack.cpp',
'modules/console.cpp', 'modules/console.h',
'modules/modules.cpp', 'modules/modules.h',
diff --git a/modules/core/_encodings.js b/modules/core/_encodings.js
new file mode 100644
index 00000000..93f2b99d
--- /dev/null
+++ b/modules/core/_encodings.js
@@ -0,0 +1,280 @@
+// SPDX-License-Identifier: MIT OR LGPL-2.0-or-later
+// SPDX-FileCopyrightText: Node.js contributors. All rights reserved.
+
+// Modified from https://github.com/nodejs/node/blob/master/lib/internal/encoding.js
+// Data originally from https://encoding.spec.whatwg.org/encodings.json
+
+/* exported getEncodingFromLabel */
+
+const encodings = new Map([
+ ['unicode-1-1-utf-8', 'utf-8'],
+ ['utf8', 'utf-8'],
+ ['utf-8', 'utf-8'],
+ ['866', 'ibm866'],
+ ['cp866', 'ibm866'],
+ ['csibm866', 'ibm866'],
+ ['ibm866', 'ibm866'],
+ ['csisolatin2', 'iso-8859-2'],
+ ['iso-8859-2', 'iso-8859-2'],
+ ['iso-ir-101', 'iso-8859-2'],
+ ['iso8859-2', 'iso-8859-2'],
+ ['iso88592', 'iso-8859-2'],
+ ['iso_8859-2', 'iso-8859-2'],
+ ['iso_8859-2:1987', 'iso-8859-2'],
+ ['l2', 'iso-8859-2'],
+ ['latin2', 'iso-8859-2'],
+ ['csisolatin3', 'iso-8859-3'],
+ ['iso-8859-3', 'iso-8859-3'],
+ ['iso-ir-109', 'iso-8859-3'],
+ ['iso8859-3', 'iso-8859-3'],
+ ['iso88593', 'iso-8859-3'],
+ ['iso_8859-3', 'iso-8859-3'],
+ ['iso_8859-3:1988', 'iso-8859-3'],
+ ['l3', 'iso-8859-3'],
+ ['latin3', 'iso-8859-3'],
+ ['csisolatin4', 'iso-8859-4'],
+ ['iso-8859-4', 'iso-8859-4'],
+ ['iso-ir-110', 'iso-8859-4'],
+ ['iso8859-4', 'iso-8859-4'],
+ ['iso88594', 'iso-8859-4'],
+ ['iso_8859-4', 'iso-8859-4'],
+ ['iso_8859-4:1988', 'iso-8859-4'],
+ ['l4', 'iso-8859-4'],
+ ['latin4', 'iso-8859-4'],
+ ['csisolatincyrillic', 'iso-8859-5'],
+ ['cyrillic', 'iso-8859-5'],
+ ['iso-8859-5', 'iso-8859-5'],
+ ['iso-ir-144', 'iso-8859-5'],
+ ['iso8859-5', 'iso-8859-5'],
+ ['iso88595', 'iso-8859-5'],
+ ['iso_8859-5', 'iso-8859-5'],
+ ['iso_8859-5:1988', 'iso-8859-5'],
+ ['arabic', 'iso-8859-6'],
+ ['asmo-708', 'iso-8859-6'],
+ ['csiso88596e', 'iso-8859-6'],
+ ['csiso88596i', 'iso-8859-6'],
+ ['csisolatinarabic', 'iso-8859-6'],
+ ['ecma-114', 'iso-8859-6'],
+ ['iso-8859-6', 'iso-8859-6'],
+ ['iso-8859-6-e', 'iso-8859-6'],
+ ['iso-8859-6-i', 'iso-8859-6'],
+ ['iso-ir-127', 'iso-8859-6'],
+ ['iso8859-6', 'iso-8859-6'],
+ ['iso88596', 'iso-8859-6'],
+ ['iso_8859-6', 'iso-8859-6'],
+ ['iso_8859-6:1987', 'iso-8859-6'],
+ ['csisolatingreek', 'iso-8859-7'],
+ ['ecma-118', 'iso-8859-7'],
+ ['elot_928', 'iso-8859-7'],
+ ['greek', 'iso-8859-7'],
+ ['greek8', 'iso-8859-7'],
+ ['iso-8859-7', 'iso-8859-7'],
+ ['iso-ir-126', 'iso-8859-7'],
+ ['iso8859-7', 'iso-8859-7'],
+ ['iso88597', 'iso-8859-7'],
+ ['iso_8859-7', 'iso-8859-7'],
+ ['iso_8859-7:1987', 'iso-8859-7'],
+ ['sun_eu_greek', 'iso-8859-7'],
+ ['csiso88598e', 'iso-8859-8'],
+ ['csisolatinhebrew', 'iso-8859-8'],
+ ['hebrew', 'iso-8859-8'],
+ ['iso-8859-8', 'iso-8859-8'],
+ ['iso-8859-8-e', 'iso-8859-8'],
+ ['iso-ir-138', 'iso-8859-8'],
+ ['iso8859-8', 'iso-8859-8'],
+ ['iso88598', 'iso-8859-8'],
+ ['iso_8859-8', 'iso-8859-8'],
+ ['iso_8859-8:1988', 'iso-8859-8'],
+ ['visual', 'iso-8859-8'],
+ ['csiso88598i', 'iso-8859-8-i'],
+ ['iso-8859-8-i', 'iso-8859-8-i'],
+ ['logical', 'iso-8859-8-i'],
+ ['csisolatin6', 'iso-8859-10'],
+ ['iso-8859-10', 'iso-8859-10'],
+ ['iso-ir-157', 'iso-8859-10'],
+ ['iso8859-10', 'iso-8859-10'],
+ ['iso885910', 'iso-8859-10'],
+ ['l6', 'iso-8859-10'],
+ ['latin6', 'iso-8859-10'],
+ ['iso-8859-13', 'iso-8859-13'],
+ ['iso8859-13', 'iso-8859-13'],
+ ['iso885913', 'iso-8859-13'],
+ ['iso-8859-14', 'iso-8859-14'],
+ ['iso8859-14', 'iso-8859-14'],
+ ['iso885914', 'iso-8859-14'],
+ ['csisolatin9', 'iso-8859-15'],
+ ['iso-8859-15', 'iso-8859-15'],
+ ['iso8859-15', 'iso-8859-15'],
+ ['iso885915', 'iso-8859-15'],
+ ['iso_8859-15', 'iso-8859-15'],
+ ['iso-8859-16', 'iso-8859-16'],
+ ['ISO-8859-16', 'iso-8859-16'],
+ ['l9', 'iso-8859-15'],
+ ['cskoi8r', 'koi8-r'],
+ ['koi', 'koi8-r'],
+ ['koi8', 'koi8-r'],
+ ['koi8-r', 'koi8-r'],
+ ['koi8_r', 'koi8-r'],
+ ['koi8-ru', 'koi8-u'],
+ ['koi8-u', 'koi8-u'],
+ ['csmacintosh', 'macintosh'],
+ ['mac', 'macintosh'],
+ ['macintosh', 'macintosh'],
+ ['x-mac-roman', 'macintosh'],
+ ['dos-874', 'windows-874'],
+ ['iso-8859-11', 'windows-874'],
+ ['iso8859-11', 'windows-874'],
+ ['iso885911', 'windows-874'],
+ ['tis-620', 'windows-874'],
+ ['windows-874', 'windows-874'],
+ ['cp1250', 'windows-1250'],
+ ['windows-1250', 'windows-1250'],
+ ['x-cp1250', 'windows-1250'],
+ ['cp1251', 'windows-1251'],
+ ['windows-1251', 'windows-1251'],
+ ['x-cp1251', 'windows-1251'],
+ ['ansi_x3.4-1968', 'windows-1252'],
+ ['ascii', 'windows-1252'],
+ ['cp1252', 'windows-1252'],
+ ['cp819', 'windows-1252'],
+ ['csisolatin1', 'windows-1252'],
+ ['ibm819', 'windows-1252'],
+ ['iso-8859-1', 'windows-1252'],
+ ['iso-ir-100', 'windows-1252'],
+ ['iso8859-1', 'windows-1252'],
+ ['iso88591', 'windows-1252'],
+ ['iso_8859-1', 'windows-1252'],
+ ['iso_8859-1:1987', 'windows-1252'],
+ ['l1', 'windows-1252'],
+ ['latin1', 'windows-1252'],
+ ['us-ascii', 'windows-1252'],
+ ['windows-1252', 'windows-1252'],
+ ['x-cp1252', 'windows-1252'],
+ ['cp1253', 'windows-1253'],
+ ['windows-1253', 'windows-1253'],
+ ['x-cp1253', 'windows-1253'],
+ ['cp1254', 'windows-1254'],
+ ['csisolatin5', 'windows-1254'],
+ ['iso-8859-9', 'windows-1254'],
+ ['iso-ir-148', 'windows-1254'],
+ ['iso8859-9', 'windows-1254'],
+ ['iso88599', 'windows-1254'],
+ ['iso_8859-9', 'windows-1254'],
+ ['iso_8859-9:1989', 'windows-1254'],
+ ['l5', 'windows-1254'],
+ ['latin5', 'windows-1254'],
+ ['windows-1254', 'windows-1254'],
+ ['x-cp1254', 'windows-1254'],
+ ['cp1255', 'windows-1255'],
+ ['windows-1255', 'windows-1255'],
+ ['x-cp1255', 'windows-1255'],
+ ['cp1256', 'windows-1256'],
+ ['windows-1256', 'windows-1256'],
+ ['x-cp1256', 'windows-1256'],
+ ['cp1257', 'windows-1257'],
+ ['windows-1257', 'windows-1257'],
+ ['x-cp1257', 'windows-1257'],
+ ['cp1258', 'windows-1258'],
+ ['windows-1258', 'windows-1258'],
+ ['x-cp1258', 'windows-1258'],
+ ['x-mac-cyrillic', 'x-mac-cyrillic'],
+ ['x-mac-ukrainian', 'x-mac-cyrillic'],
+ ['chinese', 'gbk'],
+ ['csgb2312', 'gbk'],
+ ['csiso58gb231280', 'gbk'],
+ ['gb2312', 'gbk'],
+ ['gb_2312', 'gbk'],
+ ['gb_2312-80', 'gbk'],
+ ['gbk', 'gbk'],
+ ['iso-ir-58', 'gbk'],
+ ['x-gbk', 'gbk'],
+ ['gb18030', 'gb18030'],
+ ['big5', 'big5'],
+ ['big5-hkscs', 'big5'],
+ ['cn-big5', 'big5'],
+ ['csbig5', 'big5'],
+ ['x-x-big5', 'big5'],
+ ['cseucpkdfmtjapanese', 'euc-jp'],
+ ['euc-jp', 'euc-jp'],
+ ['x-euc-jp', 'euc-jp'],
+ ['csiso2022jp', 'iso-2022-jp'],
+ ['iso-2022-jp', 'iso-2022-jp'],
+ ['csshiftjis', 'shift_jis'],
+ ['ms932', 'shift_jis'],
+ ['ms_kanji', 'shift_jis'],
+ ['shift-jis', 'shift_jis'],
+ ['shift_jis', 'shift_jis'],
+ ['sjis', 'shift_jis'],
+ ['windows-31j', 'shift_jis'],
+ ['x-sjis', 'shift_jis'],
+ ['cseuckr', 'euc-kr'],
+ ['csksc56011987', 'euc-kr'],
+ ['euc-kr', 'euc-kr'],
+ ['iso-ir-149', 'euc-kr'],
+ ['korean', 'euc-kr'],
+ ['ks_c_5601-1987', 'euc-kr'],
+ ['ks_c_5601-1989', 'euc-kr'],
+ ['ksc5601', 'euc-kr'],
+ ['ksc_5601', 'euc-kr'],
+ ['windows-949', 'euc-kr'],
+ ['utf-16be', 'utf-16be'],
+ ['utf-16le', 'utf-16le'],
+ ['utf-16', 'utf-16le'],
+]);
+
+// Some of the web-specified encodings use
+// aliases which aren't supported in iconv
+const internalEncodings = new Map([
+ ['x-mac-cyrillic', 'MacCyrillic'],
+ // For our purposes we can encoding 8-i as 8
+ ['iso-8859-8-i', 'iso-8859-8'],
+]);
+
+/**
+ * Trims ASCII whitespace from a string.
+ * `String.prototype.trim` removes non-ASCII whitespace.
+ *
+ * @param {string} label the label to trim
+ * @returns {string}
+ */
+const trimAsciiWhitespace = label => {
+ let s = 0;
+ let e = label.length;
+ while (s < e && (
+ label[s] === '\u0009' ||
+ label[s] === '\u000a' ||
+ label[s] === '\u000c' ||
+ label[s] === '\u000d' ||
+ label[s] === '\u0020'))
+ s++;
+
+ while (e > s && (
+ label[e - 1] === '\u0009' ||
+ label[e - 1] === '\u000a' ||
+ label[e - 1] === '\u000c' ||
+ label[e - 1] === '\u000d' ||
+ label[e - 1] === '\u0020'))
+ e--;
+
+ return label.slice(s, e);
+};
+
+/**
+ * @param {string} label the encoding label
+ * @returns {string | undefined}
+ */
+function getEncodingFromLabel(label) {
+ const enc = encodings.get(label);
+
+ if (enc !== undefined) {
+ return {
+ internal: internalEncodings.get(enc),
+ external: enc,
+ };
+ }
+
+
+ const trimmed = encodings.get(trimAsciiWhitespace(label.toLowerCase()));
+
+ return {internal: internalEncodings.get(trimmed), external: trimmed};
+}
diff --git a/modules/core/_text.js b/modules/core/_text.js
new file mode 100644
index 00000000..9bdc7ef2
--- /dev/null
+++ b/modules/core/_text.js
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: MIT OR LGPL-2.0-or-later
+// SPDX-FileCopyrightText: Evan Welsh
+
+const Encoding = imports._encodingNative;
+
+const { getEncodingFromLabel } = imports._encodings;
+
+var TextDecoder = class TextDecoder {
+ /**
+ * @type {string}
+ */
+ encoding;
+
+ /**
+ * @type {boolean}
+ */
+ ignoreBOM;
+
+ /**
+ * @type {boolean}
+ */
+ fatal;
+
+ get [Symbol.toStringTag]() {
+ return 'TextDecoder';
+ }
+
+ /**
+ * @param {string} encoding
+ * @param {object} [options]
+ * @param {boolean=} options.fatal
+ * @param {boolean=} options.ignoreBOM
+ */
+ constructor(encoding = 'utf-8', options = {}) {
+ const { fatal = false, ignoreBOM = false } = options;
+
+ const encodings = getEncodingFromLabel(encoding);
+ const enc = encodings.internal ?? encodings.external;
+
+ if (enc === undefined) {
+ throw new Error(`Invalid encoding label: '${encoding}'`);
+ }
+
+ Object.defineProperty(this, '_internalEncoding', {
+ value: enc,
+ enumerable: true,
+ writable: false,
+ configurable: false,
+ });
+
+ Object.defineProperty(this, 'encoding', {
+ value: encodings.external,
+ enumerable: true,
+ writable: false,
+ configurable: false,
+ });
+
+ Object.defineProperty(this, 'ignoreBOM', {
+ value: ignoreBOM,
+ enumerable: true,
+ writable: false,
+ configurable: false,
+ });
+
+ Object.defineProperty(this, 'fatal', {
+ value: fatal,
+ enumerable: true,
+ writable: false,
+ configurable: false,
+ });
+ }
+
+ decode(bytes, options = {}) {
+ const { stream = false } = options;
+
+ if (stream) {
+ throw new Error(`TextDecoder does not implement the 'stream' option.`);
+ }
+
+ /** @type {Uint8Array} */
+ let input;
+
+ if (bytes instanceof ArrayBuffer) {
+ input = new Uint8Array(bytes);
+ } else if (bytes instanceof Uint8Array) {
+ input = bytes;
+ } else if (bytes instanceof Object.getPrototypeOf(Uint8Array)) {
+ let { buffer, byteLength, byteOffset } = /** @type {Uint32Array} */ (bytes);
+ input = new Uint8Array(buffer, byteOffset, byteLength);
+ } else if (bytes === undefined) {
+ input = new Uint8Array(0);
+ } else {
+ throw new Error(`Provided input cannot be converted to ArrayBufferView or ArrayBuffer`);
+ }
+
+ if (this.ignoreBOM && input.length > 2 && input[0] === 0xEF && input[1] === 0xBB && input[2] ===
0xBF) {
+ if (this.encoding !== 'utf-8') {
+ throw new Error(`Cannot ignore BOM for non-UTF8 encoding.`);
+ }
+
+ let { buffer, byteLength, byteOffset } = input;
+ input = new Uint8Array(buffer, byteOffset + 3, byteLength - 3);
+ }
+
+ return Encoding.decode(input, this.fatal, this._internalEncoding);
+ }
+}
+
+var TextEncoder = class TextEncoder {
+ get [Symbol.toStringTag]() {
+ return 'TextEncoder';
+ }
+
+ get encoding() {
+ return 'utf-8';
+ }
+
+ encode(input = '') {
+ // The TextEncoder specification only allows for UTF-8 encoding.
+ return Encoding.encode(`${input}`, 'UTF-8');
+ }
+
+ encodeInto(input = '', output = new Uint8Array()) {
+ // The TextEncoder specification only allows for UTF-8 encoding.
+ return Encoding.encodeInto(`${input}`, output);
+ }
+}
\ No newline at end of file
diff --git a/modules/core/overrides/GLib.js b/modules/core/overrides/GLib.js
index 5e3800a9..16061abd 100644
--- a/modules/core/overrides/GLib.js
+++ b/modules/core/overrides/GLib.js
@@ -51,10 +51,7 @@ function _readSingleType(signature, forceSimple) {
}
function _makeBytes(byteArray) {
- if (byteArray instanceof Uint8Array || byteArray instanceof ByteArray.ByteArray)
- return ByteArray.toGBytes(byteArray);
- else
- return new GLib.Bytes(byteArray);
+ return new GLib.Bytes(byteArray);
}
function _packVariant(signature, value) {
diff --git a/modules/script/_bootstrap/default.js b/modules/script/_bootstrap/default.js
index e31d80cb..1ac83ac5 100644
--- a/modules/script/_bootstrap/default.js
+++ b/modules/script/_bootstrap/default.js
@@ -6,8 +6,21 @@
'use strict';
const {print, printerr, log, logError} = imports._print;
+ const {TextEncoder, TextDecoder} = imports._text;
Object.defineProperties(exports, {
+ TextEncoder: {
+ configurable: false,
+ enumerable: true,
+ writable: false,
+ value: TextEncoder,
+ },
+ TextDecoder: {
+ configurable: false,
+ enumerable: true,
+ writable: false,
+ value: TextDecoder,
+ },
print: {
configurable: false,
enumerable: true,
diff --git a/modules/script/byteArray.js b/modules/script/byteArray.js
index fad022d2..9fe9a0e3 100644
--- a/modules/script/byteArray.js
+++ b/modules/script/byteArray.js
@@ -2,15 +2,61 @@
// SPDX-License-Identifier: MIT OR LGPL-2.0-or-later
// SPDX-FileCopyrightText: 2017 Philip Chimento <philip chimento gmail com>
+const Encoding = imports._encodingNative;
+
/* eslint no-redeclare: ["error", { "builtinGlobals": false }] */ // for toString
-var {fromGBytes, fromString, toGBytes, toString} = imports._byteArrayNative;
+var {fromGBytes, defineToString} = imports._byteArrayNative;
+
+const {GLib} = imports.gi;
// For backwards compatibility
+/**
+ * @param {Iterable<number>} a
+ * @returns {ByteArray}
+ */
function fromArray(a) {
return new ByteArray(Uint8Array.from(a));
}
+/**
+ * @param {Uint8Array} array
+ * @returns {GLib.Bytes}
+ */
+function toGBytes(array) {
+ if (!(array instanceof Uint8Array))
+ throw new Error('Argument to ByteArray.toGBytes() must be a Uint8Array');
+
+
+ return new GLib.Bytes(array);
+}
+
+/**
+ * @param {Uint8Array} array
+ * @param {string} [encoding]
+ * @returns {string}
+ */
+function toString(array, encoding = 'utf-8') {
+ if (!(array instanceof Uint8Array))
+ throw new Error('Argument to ByteArray.toString() must be a Uint8Array');
+
+
+ return Encoding.decode(array, true, encoding);
+}
+
+/**
+ * @param {string} str
+ * @param {string} [encoding]
+ * @returns {Uint8Array}
+ */
+function fromString(str, encoding = 'utf-8') {
+ const array = Encoding.encode(str, encoding);
+
+ defineToString(array);
+
+ return array;
+}
+
var ByteArray = class ByteArray {
constructor(arg = 0) {
if (arg instanceof Uint8Array)
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]