[gjs/ewlsh/text-encoding: 4/4] modules: Add tests for Encoding

From: Evan Welsh <ewlsh src gnome org>
To: commits-list gnome org
Cc:
Subject: [gjs/ewlsh/text-encoding: 4/4] modules: Add tests for Encoding
Date: Thu, 12 Aug 2021 01:35:48 +0000 (UTC)

commit 51cddb2e0a0ca7f52e2740b85ce68582e8198f3f
Author: Evan Welsh <contact evanwelsh com>
Date:   Sun Jul 4 22:18:17 2021 -0700

    modules: Add tests for Encoding

 .reuse/dep5                               |   8 +
 installed-tests/js/.eslintrc.yml          |   2 +
 installed-tests/js/jsunit.gresources.xml  |   1 +
 installed-tests/js/matchers.js            |  32 ++
 installed-tests/js/meson.build            |   5 +-
 installed-tests/js/modules/encodings.json | 466 ++++++++++++++++++++++++++++++
 installed-tests/js/testEncoding.js        | 408 ++++++++++++++++++++++++++
 jsconfig.json                             |   5 +
 8 files changed, 926 insertions(+), 1 deletion(-)
---
diff --git a/.reuse/dep5 b/.reuse/dep5
index e8cc3ec1..f8ae43ab 100644
--- a/.reuse/dep5
+++ b/.reuse/dep5
@@ -9,3 +9,11 @@ License: CC0-1.0
 Files: tools/package.json
 Copyright: 2020 Evan Welsh <contact evanwelsh com>
 License: MIT OR LGPL-2.0-or-later
+
+Files: installed-tests/js/modules/encodings.json
+Copyright: WHATWG (Apple, Google, Mozilla, Microsoft)
+License: BSD-3-Clause
+
+Files: jsconfig.json
+Copyright: 2021 Evan Welsh <contact evanwelsh com>
+License: MIT OR LGPL-2.0-or-later
diff --git a/installed-tests/js/.eslintrc.yml b/installed-tests/js/.eslintrc.yml
index c1a4c9bd..bcdadbea 100644
--- a/installed-tests/js/.eslintrc.yml
+++ b/installed-tests/js/.eslintrc.yml
@@ -31,8 +31,10 @@ globals:
   setTimeout: writable
 overrides:
   - files:
+      - matchers.js
       - testCairoModule.js
       - testESModules.js
+      - testEncoding.js
       - modules/importmeta.js
       - modules/exports.js
       - modules/say.js
diff --git a/installed-tests/js/jsunit.gresources.xml b/installed-tests/js/jsunit.gresources.xml
index b635c50c..fea06b50 100644
--- a/installed-tests/js/jsunit.gresources.xml
+++ b/installed-tests/js/jsunit.gresources.xml
@@ -13,6 +13,7 @@
     <file>modules/badOverrides/Regress.js</file>
     <file>modules/badOverrides/WarnLib.js</file>
     <file>modules/data.txt</file>
+    <file>modules/encodings.json</file>
     <file>modules/dynamic.js</file>
     <file>modules/importmeta.js</file>
     <file>modules/exports.js</file>
diff --git a/installed-tests/js/matchers.js b/installed-tests/js/matchers.js
new file mode 100644
index 00000000..6a2848f6
--- /dev/null
+++ b/installed-tests/js/matchers.js
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: MIT OR LGPL-2.0-or-later
+// SPDX-FileCopyrightText: 2021 Evan Welsh <contact evanwelsh com>
+
+/**
+ * A jasmine asymmetric matcher which expects an array-like object
+ * to contain the given element array in the same order with the
+ * same length. Useful for testing typed arrays.
+ *
+ * @template T
+ * @param {T[]} elements an array of elements to compare with
+ * @returns
+ */
+export function arrayLikeWithExactContents(elements) {
+    return {
+        /**
+         * @param {ArrayLike<T>} compareTo an array-like object to compare to
+         * @returns {boolean}
+         */
+        asymmetricMatch(compareTo) {
+            return (
+                compareTo.length === elements.length &&
+                elements.every((e, i) => e === compareTo[i])
+            );
+        },
+        /**
+         * @returns {string}
+         */
+        jasmineToString() {
+            return `${JSON.stringify(elements)}`;
+        },
+    };
+}
diff --git a/installed-tests/js/meson.build b/installed-tests/js/meson.build
index e11f1418..f5fa8e08 100644
--- a/installed-tests/js/meson.build
+++ b/installed-tests/js/meson.build
@@ -216,7 +216,10 @@ endif
 # tests using ES modules are also separate because they need an extra
 # minijasmine flag
 
-modules_tests = ['ESModules']
+modules_tests = [
+    'ESModules',
+    'Encoding',
+]
 if build_cairo
     modules_tests += 'CairoModule'
 endif
diff --git a/installed-tests/js/modules/encodings.json b/installed-tests/js/modules/encodings.json
new file mode 100644
index 00000000..c746f680
--- /dev/null
+++ b/installed-tests/js/modules/encodings.json
@@ -0,0 +1,466 @@
+[
+    {
+      "encodings": [
+        {
+          "labels": [
+            "unicode-1-1-utf-8",
+            "unicode11utf8",
+            "unicode20utf8",
+            "utf-8",
+            "utf8",
+            "x-unicode20utf8"
+          ],
+          "name": "UTF-8"
+        }
+      ],
+      "heading": "The Encoding"
+    },
+    {
+      "encodings": [
+        {
+          "labels": [
+            "866",
+            "cp866",
+            "csibm866",
+            "ibm866"
+          ],
+          "name": "IBM866"
+        },
+        {
+          "labels": [
+            "csisolatin2",
+            "iso-8859-2",
+            "iso-ir-101",
+            "iso8859-2",
+            "iso88592",
+            "iso_8859-2",
+            "iso_8859-2:1987",
+            "l2",
+            "latin2"
+          ],
+          "name": "ISO-8859-2"
+        },
+        {
+          "labels": [
+            "csisolatin3",
+            "iso-8859-3",
+            "iso-ir-109",
+            "iso8859-3",
+            "iso88593",
+            "iso_8859-3",
+            "iso_8859-3:1988",
+            "l3",
+            "latin3"
+          ],
+          "name": "ISO-8859-3"
+        },
+        {
+          "labels": [
+            "csisolatin4",
+            "iso-8859-4",
+            "iso-ir-110",
+            "iso8859-4",
+            "iso88594",
+            "iso_8859-4",
+            "iso_8859-4:1988",
+            "l4",
+            "latin4"
+          ],
+          "name": "ISO-8859-4"
+        },
+        {
+          "labels": [
+            "csisolatincyrillic",
+            "cyrillic",
+            "iso-8859-5",
+            "iso-ir-144",
+            "iso8859-5",
+            "iso88595",
+            "iso_8859-5",
+            "iso_8859-5:1988"
+          ],
+          "name": "ISO-8859-5"
+        },
+        {
+          "labels": [
+            "arabic",
+            "asmo-708",
+            "csiso88596e",
+            "csiso88596i",
+            "csisolatinarabic",
+            "ecma-114",
+            "iso-8859-6",
+            "iso-8859-6-e",
+            "iso-8859-6-i",
+            "iso-ir-127",
+            "iso8859-6",
+            "iso88596",
+            "iso_8859-6",
+            "iso_8859-6:1987"
+          ],
+          "name": "ISO-8859-6"
+        },
+        {
+          "labels": [
+            "csisolatingreek",
+            "ecma-118",
+            "elot_928",
+            "greek",
+            "greek8",
+            "iso-8859-7",
+            "iso-ir-126",
+            "iso8859-7",
+            "iso88597",
+            "iso_8859-7",
+            "iso_8859-7:1987",
+            "sun_eu_greek"
+          ],
+          "name": "ISO-8859-7"
+        },
+        {
+          "labels": [
+            "csiso88598e",
+            "csisolatinhebrew",
+            "hebrew",
+            "iso-8859-8",
+            "iso-8859-8-e",
+            "iso-ir-138",
+            "iso8859-8",
+            "iso88598",
+            "iso_8859-8",
+            "iso_8859-8:1988",
+            "visual"
+          ],
+          "name": "ISO-8859-8"
+        },
+        {
+          "labels": [
+            "csiso88598i",
+            "iso-8859-8-i",
+            "logical"
+          ],
+          "name": "ISO-8859-8-I"
+        },
+        {
+          "labels": [
+            "csisolatin6",
+            "iso-8859-10",
+            "iso-ir-157",
+            "iso8859-10",
+            "iso885910",
+            "l6",
+            "latin6"
+          ],
+          "name": "ISO-8859-10"
+        },
+        {
+          "labels": [
+            "iso-8859-13",
+            "iso8859-13",
+            "iso885913"
+          ],
+          "name": "ISO-8859-13"
+        },
+        {
+          "labels": [
+            "iso-8859-14",
+            "iso8859-14",
+            "iso885914"
+          ],
+          "name": "ISO-8859-14"
+        },
+        {
+          "labels": [
+            "csisolatin9",
+            "iso-8859-15",
+            "iso8859-15",
+            "iso885915",
+            "iso_8859-15",
+            "l9"
+          ],
+          "name": "ISO-8859-15"
+        },
+        {
+          "labels": [
+            "iso-8859-16"
+          ],
+          "name": "ISO-8859-16"
+        },
+        {
+          "labels": [
+            "cskoi8r",
+            "koi",
+            "koi8",
+            "koi8-r",
+            "koi8_r"
+          ],
+          "name": "KOI8-R"
+        },
+        {
+          "labels": [
+            "koi8-ru",
+            "koi8-u"
+          ],
+          "name": "KOI8-U"
+        },
+        {
+          "labels": [
+            "csmacintosh",
+            "mac",
+            "macintosh",
+            "x-mac-roman"
+          ],
+          "name": "macintosh"
+        },
+        {
+          "labels": [
+            "dos-874",
+            "iso-8859-11",
+            "iso8859-11",
+            "iso885911",
+            "tis-620",
+            "windows-874"
+          ],
+          "name": "windows-874"
+        },
+        {
+          "labels": [
+            "cp1250",
+            "windows-1250",
+            "x-cp1250"
+          ],
+          "name": "windows-1250"
+        },
+        {
+          "labels": [
+            "cp1251",
+            "windows-1251",
+            "x-cp1251"
+          ],
+          "name": "windows-1251"
+        },
+        {
+          "labels": [
+            "ansi_x3.4-1968",
+            "ascii",
+            "cp1252",
+            "cp819",
+            "csisolatin1",
+            "ibm819",
+            "iso-8859-1",
+            "iso-ir-100",
+            "iso8859-1",
+            "iso88591",
+            "iso_8859-1",
+            "iso_8859-1:1987",
+            "l1",
+            "latin1",
+            "us-ascii",
+            "windows-1252",
+            "x-cp1252"
+          ],
+          "name": "windows-1252"
+        },
+        {
+          "labels": [
+            "cp1253",
+            "windows-1253",
+            "x-cp1253"
+          ],
+          "name": "windows-1253"
+        },
+        {
+          "labels": [
+            "cp1254",
+            "csisolatin5",
+            "iso-8859-9",
+            "iso-ir-148",
+            "iso8859-9",
+            "iso88599",
+            "iso_8859-9",
+            "iso_8859-9:1989",
+            "l5",
+            "latin5",
+            "windows-1254",
+            "x-cp1254"
+          ],
+          "name": "windows-1254"
+        },
+        {
+          "labels": [
+            "cp1255",
+            "windows-1255",
+            "x-cp1255"
+          ],
+          "name": "windows-1255"
+        },
+        {
+          "labels": [
+            "cp1256",
+            "windows-1256",
+            "x-cp1256"
+          ],
+          "name": "windows-1256"
+        },
+        {
+          "labels": [
+            "cp1257",
+            "windows-1257",
+            "x-cp1257"
+          ],
+          "name": "windows-1257"
+        },
+        {
+          "labels": [
+            "cp1258",
+            "windows-1258",
+            "x-cp1258"
+          ],
+          "name": "windows-1258"
+        },
+        {
+          "labels": [
+            "x-mac-cyrillic",
+            "x-mac-ukrainian"
+          ],
+          "name": "x-mac-cyrillic"
+        }
+      ],
+      "heading": "Legacy single-byte encodings"
+    },
+    {
+      "encodings": [
+        {
+          "labels": [
+            "chinese",
+            "csgb2312",
+            "csiso58gb231280",
+            "gb2312",
+            "gb_2312",
+            "gb_2312-80",
+            "gbk",
+            "iso-ir-58",
+            "x-gbk"
+          ],
+          "name": "GBK"
+        },
+        {
+          "labels": [
+            "gb18030"
+          ],
+          "name": "gb18030"
+        }
+      ],
+      "heading": "Legacy multi-byte Chinese (simplified) encodings"
+    },
+    {
+      "encodings": [
+        {
+          "labels": [
+            "big5",
+            "big5-hkscs",
+            "cn-big5",
+            "csbig5",
+            "x-x-big5"
+          ],
+          "name": "Big5"
+        }
+      ],
+      "heading": "Legacy multi-byte Chinese (traditional) encodings"
+    },
+    {
+      "encodings": [
+        {
+          "labels": [
+            "cseucpkdfmtjapanese",
+            "euc-jp",
+            "x-euc-jp"
+          ],
+          "name": "EUC-JP"
+        },
+        {
+          "labels": [
+            "csiso2022jp",
+            "iso-2022-jp"
+          ],
+          "name": "ISO-2022-JP"
+        },
+        {
+          "labels": [
+            "csshiftjis",
+            "ms932",
+            "ms_kanji",
+            "shift-jis",
+            "shift_jis",
+            "sjis",
+            "windows-31j",
+            "x-sjis"
+          ],
+          "name": "Shift_JIS"
+        }
+      ],
+      "heading": "Legacy multi-byte Japanese encodings"
+    },
+    {
+      "encodings": [
+        {
+          "labels": [
+            "cseuckr",
+            "csksc56011987",
+            "euc-kr",
+            "iso-ir-149",
+            "korean",
+            "ks_c_5601-1987",
+            "ks_c_5601-1989",
+            "ksc5601",
+            "ksc_5601",
+            "windows-949"
+          ],
+          "name": "EUC-KR"
+        }
+      ],
+      "heading": "Legacy multi-byte Korean encodings"
+    },
+    {
+      "encodings": [
+        {
+          "labels": [
+            "csiso2022kr",
+            "hz-gb-2312",
+            "iso-2022-cn",
+            "iso-2022-cn-ext",
+            "iso-2022-kr",
+            "replacement"
+          ],
+          "name": "replacement"
+        },
+        {
+          "labels": [
+            "unicodefffe",
+            "utf-16be"
+          ],
+          "name": "UTF-16BE"
+        },
+        {
+          "labels": [
+            "csunicode",
+            "iso-10646-ucs-2",
+            "ucs-2",
+            "unicode",
+            "unicodefeff",
+            "utf-16",
+            "utf-16le"
+          ],
+          "name": "UTF-16LE"
+        },
+        {
+          "labels": [
+            "x-user-defined"
+          ],
+          "name": "x-user-defined"
+        }
+      ],
+      "heading": "Legacy miscellaneous encodings"
+    }
+  ]
+  
\ No newline at end of file
diff --git a/installed-tests/js/testEncoding.js b/installed-tests/js/testEncoding.js
new file mode 100644
index 00000000..55d65581
--- /dev/null
+++ b/installed-tests/js/testEncoding.js
@@ -0,0 +1,408 @@
+// SPDX-License-Identifier: MIT OR LGPL-2.0-or-later
+// SPDX-FileCopyrightText: 2021 Evan Welsh <contact evanwelsh com>
+
+// Some test inputs are derived from 
https://github.com/denoland/deno/blob/923214c53725651792f6d55c5401bf6b475622ea/op_crates/web/08_text_encoding.js
+// Data originally from https://encoding.spec.whatwg.org/encodings.json
+
+import Gio from 'gi://Gio';
+
+import {arrayLikeWithExactContents} from './matchers.js';
+
+/**
+ * Loads a JSON file from a URI and parses it.
+ *
+ * @param {string} src the URI to load from
+ * @returns {any}
+ */
+function loadJSONFromResource(src) {
+    const file = Gio.File.new_for_uri(src);
+    const [, bytes] = file.load_contents(null);
+
+    const decoder = new TextDecoder();
+    const jsonRaw = decoder.decode(bytes);
+    const json = JSON.parse(jsonRaw);
+
+    return json;
+}
+
+/**
+ * Encoded form of '𝓽𝓮𝔁𝓽'
+ *
+ * @returns {number[]}
+ */
+function encodedMultibyteCharArray() {
+    return [
+        0xf0, 0x9d, 0x93, 0xbd, 0xf0, 0x9d, 0x93, 0xae, 0xf0, 0x9d, 0x94, 0x81,
+        0xf0, 0x9d, 0x93, 0xbd,
+    ];
+}
+
+describe('Text Encoding', function () {
+    it('toString() uses spec-compliant tags', function () {
+        const encoder = new TextEncoder();
+
+        expect(encoder.toString()).toBe('[object TextEncoder]');
+
+        const decoder = new TextDecoder();
+        expect(decoder.toString()).toBe('[object TextDecoder]');
+    });
+
+    describe('TextEncoder', function () {
+        describe('encode()', function () {
+            it('can encode UTF8 (multi-byte chars)', function () {
+                const input = '𝓽𝓮𝔁𝓽';
+                const encoder = new TextEncoder();
+                const encoded = encoder.encode(input);
+
+                expect(encoded).toEqual(
+                    arrayLikeWithExactContents([...encodedMultibyteCharArray()])
+                );
+            });
+        });
+
+        describe('encodeInto()', function () {
+            it('can encode UTF8 (Latin chars) into a Uint8Array', function () {
+                const input = 'text';
+                const encoder = new TextEncoder();
+                const bytes = new Uint8Array(5);
+                const result = encoder.encodeInto(input, bytes);
+                expect(result.read).toBe(4);
+                expect(result.written).toBe(4);
+
+                expect(bytes).toEqual(
+                    arrayLikeWithExactContents([0x74, 0x65, 0x78, 0x74, 0x00])
+                );
+            });
+
+            it('can fully encode UTF8 (multi-byte chars) into a Uint8Array', function () {
+                const input = '𝓽𝓮𝔁𝓽';
+                const encoder = new TextEncoder();
+                const bytes = new Uint8Array(17);
+                const result = encoder.encodeInto(input, bytes);
+                expect(result.read).toBe(8);
+                expect(result.written).toBe(16);
+
+                expect(bytes).toEqual(
+                    arrayLikeWithExactContents([
+                        ...encodedMultibyteCharArray(),
+                        0x00,
+                    ])
+                );
+            });
+
+            it('can partially encode UTF8 into an under-allocated Uint8Array', function () {
+                const input = '𝓽𝓮𝔁𝓽';
+                const encoder = new TextEncoder();
+                const bytes = new Uint8Array(5);
+                const result = encoder.encodeInto(input, bytes);
+                expect(result.read).toBe(2);
+                expect(result.written).toBe(4);
+
+                expect(bytes).toEqual(
+                    arrayLikeWithExactContents([
+                        ...encodedMultibyteCharArray().slice(0, 4),
+                        0x00,
+                    ])
+                );
+            });
+        });
+    });
+
+    describe('TextDecoder', function () {
+        describe('decode()', function () {
+            it('fatal is false by default', function () {
+                const decoder = new TextDecoder();
+
+                expect(decoder.fatal).toBeFalse();
+            });
+
+            it('ignoreBOM is false by default', function () {
+                const decoder = new TextDecoder();
+
+                expect(decoder.ignoreBOM).toBeFalse();
+            });
+
+            it('fatal is true when passed', function () {
+                const decoder = new TextDecoder(undefined, {fatal: true});
+
+                expect(decoder.fatal).toBeTrue();
+            });
+
+            it('ignoreBOM is true when passed', function () {
+                const decoder = new TextDecoder(undefined, {ignoreBOM: true});
+
+                expect(decoder.ignoreBOM).toBeTrue();
+            });
+
+            it('fatal is coerced to a boolean value', function () {
+                const decoder = new TextDecoder(undefined, {fatal: 1});
+
+                expect(decoder.fatal).toBeTrue();
+            });
+
+            it('ignoreBOM is coerced to a boolean value', function () {
+                const decoder = new TextDecoder(undefined, {ignoreBOM: ''});
+
+                expect(decoder.ignoreBOM).toBeFalse();
+            });
+
+            it('throws on empty input', function () {
+                const decoder = new TextDecoder();
+                const input = '';
+
+                expect(() => decoder.decode(input)).toThrowError(
+                    'Provided input cannot be converted to ArrayBufferView or ArrayBuffer'
+                );
+            });
+
+            it('throws on null input', function () {
+                const decoder = new TextDecoder();
+                const input = null;
+
+                expect(() => decoder.decode(input)).toThrowError(
+                    'Provided input cannot be converted to ArrayBufferView or ArrayBuffer'
+                );
+            });
+
+            it('throws on invalid encoding label', function () {
+                expect(() => new TextDecoder('bad')).toThrowError(
+                    "Invalid encoding label: 'bad'"
+                );
+            });
+
+            it('decodes undefined as an empty string', function () {
+                const decoder = new TextDecoder();
+                const input = undefined;
+
+                expect(decoder.decode(input)).toBe('');
+            });
+
+            it('decodes UTF-8 byte array (Uint8Array)', function () {
+                const decoder = new TextDecoder();
+                const input = new Uint8Array([...encodedMultibyteCharArray()]);
+
+                expect(decoder.decode(input)).toBe('𝓽𝓮𝔁𝓽');
+            });
+
+            it('ignores byte order marker (BOM)', function () {
+                const decoder = new TextDecoder('utf-8', {ignoreBOM: true});
+                const input = new Uint8Array([
+                    0xef,
+                    0xbb,
+                    0xbf,
+                    ...encodedMultibyteCharArray(),
+                ]);
+
+                expect(decoder.decode(input)).toBe('𝓽𝓮𝔁𝓽');
+            });
+
+            it('handles invalid byte order marker (BOM)', function () {
+                const decoder = new TextDecoder('utf-8', {ignoreBOM: true});
+                const input = new Uint8Array([
+                    0xef,
+                    0xbb,
+                    0x89,
+                    ...encodedMultibyteCharArray(),
+                ]);
+
+                expect(decoder.decode(input)).toBe('ﻉ𝓽𝓮𝔁𝓽');
+            });
+        });
+
+        describe('UTF-8 Encoding Converter', function () {
+            it('can decode (not fatal)', function () {
+                const decoder = new TextDecoder();
+
+                const decoded = decoder.decode(new Uint8Array([120, 193, 120]));
+                expect(decoded).toEqual('x�x');
+            });
+
+            it('can decode (fatal)', function () {
+                const decoder = new TextDecoder(undefined, {
+                    fatal: true,
+                });
+
+                expect(() => {
+                    decoder.decode(new Uint8Array([120, 193, 120]));
+                }).toThrowError(
+                    TypeError,
+                    /malformed UTF-8 character sequence/
+                );
+            });
+        });
+
+        describe('Multi-byte Encoding Converter (iconv)', function () {
+            it('can decode Big-5', function () {
+                const decoder = new TextDecoder('big5');
+                const bytes = [
+                    164, 164, 177, 192, 183, 124, 177, 181, 168, 252, 184, 103,
+                    192, 217, 179, 161, 188, 208, 183, 199, 192, 203, 197, 231,
+                    167, 189, 169, 101, 176, 85,
+                ];
+
+                const decoded = decoder.decode(new Uint8Array(bytes));
+                expect(decoded).toEqual('中推會接受經濟部標準檢驗局委託');
+            });
+
+            it('can decode Big-5 with incorrect input bytes', function () {
+                const decoder = new TextDecoder('big5');
+                const bytes = [
+                    164, 164, 177, 192, 183, 124,
+                    // Invalid byte...
+                    0xa1,
+                ];
+
+                const decoded = decoder.decode(new Uint8Array(bytes));
+                expect(decoded).toEqual('中推會�');
+            });
+
+            it('can decode Big-5 with long incorrect input bytes', function () {
+                const decoder = new TextDecoder('big5');
+                const bytes = [164, 164, 177, 192, 183, 124];
+                const baseLength = 1000;
+                const longBytes = new Array(baseLength)
+                    .fill(bytes, 0, baseLength)
+                    .flat();
+
+                // Append invalid byte sequence...
+                longBytes.push(0xa3);
+
+                const decoded = decoder.decode(new Uint8Array(longBytes));
+
+                const baseResult = '中推會';
+                const longResult = [
+                    ...new Array(baseLength).fill(baseResult, 0, baseLength),
+                    '�',
+                ].join('');
+
+                expect(decoded).toEqual(longResult);
+            });
+
+            it('can decode Big-5 HKSCS with supplemental characters', function () {
+                // The characters below roughly mean 'hard' or 'solid' and
+                // 'rooster' respectively. They were chosen for their Unicode
+                // and HKSCS positioning, not meaning.
+
+                // Big5-HKSCS bytes for the supplemental character 𠕇
+                const supplementalBytes = [250, 64];
+                // Big5-HKSCS bytes for the non-supplemental characters 公雞
+                const nonSupplementalBytes = [164, 189, 194, 251];
+
+                const decoder = new TextDecoder('big5-hkscs');
+
+                // We currently allocate 12 additional bytes of padding
+                // and a minimum of 256...
+
+                // This should produce 400 non-supplemental bytes (50 * 2 * 4)
+                // and 16 supplemental bytes (4 * 4)
+                const repeatedNonSupplementalBytes = new Array(50).fill(nonSupplementalBytes).flat();
+                const bytes = [
+                    ...repeatedNonSupplementalBytes,
+                    ...supplementalBytes,
+                    ...repeatedNonSupplementalBytes,
+                    ...supplementalBytes,
+                    ...repeatedNonSupplementalBytes,
+                    ...supplementalBytes,
+                    ...repeatedNonSupplementalBytes,
+                    ...supplementalBytes,
+                ];
+
+                const expectedNonSupplemental  = new Array(50).fill('公雞');
+                const expected = [
+                    ...expectedNonSupplemental,
+                    '𠕇',
+                    ...expectedNonSupplemental,
+                    '𠕇',
+                    ...expectedNonSupplemental,
+                    '𠕇',
+                    ...expectedNonSupplemental,
+                    '𠕇',
+                ].join('');
+
+                // Calculate the number of bytes the UTF-16 characters should
+                // occupy.
+                const expectedU16Bytes = [...expected].reduce((prev, next) => {
+                    const utf16code = next.codePointAt(0);
+
+                    // Test whether this unit is supplemental
+                    const additionalBytes = utf16code > 0xFFFF ? 2 : 0;
+
+                    return prev + 2 + additionalBytes;
+                }, 0);
+
+
+                // We set a minimum buffer allocation of 256 bytes,
+                // this ensures that this test exceeds that.
+                expect(expectedU16Bytes / 2).toBeGreaterThan(256);
+
+                // The length of the input bytes should always be less
+                // than the expected output because UTF-16 uses 4 bytes
+                // to represent some characters HKSCS needs only 2 for.
+                expect(bytes.length).toBeLessThan(expectedU16Bytes);
+                // 4 supplemental characters, each with two additional bytes.
+                expect(bytes.length + 4 * 2).toBe(expectedU16Bytes);
+
+                const decoded = decoder.decode(new Uint8Array(bytes));
+
+                expect(decoded).toBe(expected);
+            });
+        });
+
+        describe('Single Byte Encoding Converter', function () {
+            it('can decode legacy single byte encoding (not fatal)', function () {
+                const decoder = new TextDecoder('iso-8859-6');
+
+                const decoded = decoder.decode(new Uint8Array([161, 200, 200]));
+                expect(decoded).toEqual('�بب');
+            });
+
+            it('can decode legacy single byte encoding (fatal)', function () {
+                const decoder = new TextDecoder('iso-8859-6', {
+                    fatal: true,
+                });
+
+                expect(() => {
+                    decoder.decode(new Uint8Array([161, 200, 200]));
+                }).toThrowError(
+                    TypeError,
+                    'Invalid byte sequence in conversion input'
+                );
+            });
+
+            it('can decode ASCII', function () {
+                const input = new Uint8Array([0x89, 0x95, 0x9f, 0xbf]);
+                const decoder = new TextDecoder('ascii');
+                expect(decoder.decode(input)).toBe('‰•Ÿ¿');
+            });
+
+            // Straight from https://encoding.spec.whatwg.org/encodings.json
+            const encodingsTable = loadJSONFromResource(
+                'resource:///org/gjs/jsunit/modules/encodings.json'
+            );
+
+            const singleByteEncodings = encodingsTable.filter(group => {
+                return group.heading === 'Legacy single-byte encodings';
+            })[0].encodings;
+
+            const buffer = new ArrayBuffer(255);
+            const view = new Uint8Array(buffer);
+
+            for (let i = 0, l = view.byteLength; i < l; i++)
+                view[i] = i;
+
+            for (let i = 0, l = singleByteEncodings.length; i < l; i++) {
+                const encoding = singleByteEncodings[i];
+
+                it(`${encoding.name} can be decoded.`, function () {
+                    for (const label of encoding.labels) {
+                        const decoder = new TextDecoder(label);
+                        expect(() => decoder.decode(view)).not.toThrow();
+                        expect(decoder.encoding).toBe(
+                            encoding.name.toLowerCase()
+                        );
+                    }
+                });
+            }
+        });
+    });
+});
diff --git a/jsconfig.json b/jsconfig.json
new file mode 100644
index 00000000..43feb2ed
--- /dev/null
+++ b/jsconfig.json
@@ -0,0 +1,5 @@
+{
+    "compilerOptions": {
+        "lib": ["es2020"],
+    }
+}
\ No newline at end of file
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]