[gjs/ewlsh/text-encoding: 4/4] modules: Add tests for Encoding
- From: Evan Welsh <ewlsh src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gjs/ewlsh/text-encoding: 4/4] modules: Add tests for Encoding
- Date: Thu, 12 Aug 2021 01:41:31 +0000 (UTC)
commit d6c979a7b8d51b8018d6c9cb6b1af7d2002acc95
Author: Evan Welsh <contact evanwelsh com>
Date: Sun Jul 4 22:18:17 2021 -0700
modules: Add tests for Encoding
.reuse/dep5 | 8 +
installed-tests/js/.eslintrc.yml | 2 +
installed-tests/js/jsunit.gresources.xml | 1 +
installed-tests/js/matchers.js | 32 ++
installed-tests/js/meson.build | 5 +-
installed-tests/js/modules/encodings.json | 466 ++++++++++++++++++++++++++++++
installed-tests/js/testEncoding.js | 408 ++++++++++++++++++++++++++
jsconfig.json | 5 +
8 files changed, 926 insertions(+), 1 deletion(-)
---
diff --git a/.reuse/dep5 b/.reuse/dep5
index e8cc3ec1..f8ae43ab 100644
--- a/.reuse/dep5
+++ b/.reuse/dep5
@@ -9,3 +9,11 @@ License: CC0-1.0
Files: tools/package.json
Copyright: 2020 Evan Welsh <contact evanwelsh com>
License: MIT OR LGPL-2.0-or-later
+
+Files: installed-tests/js/modules/encodings.json
+Copyright: WHATWG (Apple, Google, Mozilla, Microsoft)
+License: BSD-3-Clause
+
+Files: jsconfig.json
+Copyright: 2021 Evan Welsh <contact evanwelsh com>
+License: MIT OR LGPL-2.0-or-later
diff --git a/installed-tests/js/.eslintrc.yml b/installed-tests/js/.eslintrc.yml
index c1a4c9bd..bcdadbea 100644
--- a/installed-tests/js/.eslintrc.yml
+++ b/installed-tests/js/.eslintrc.yml
@@ -31,8 +31,10 @@ globals:
setTimeout: writable
overrides:
- files:
+ - matchers.js
- testCairoModule.js
- testESModules.js
+ - testEncoding.js
- modules/importmeta.js
- modules/exports.js
- modules/say.js
diff --git a/installed-tests/js/jsunit.gresources.xml b/installed-tests/js/jsunit.gresources.xml
index b635c50c..fea06b50 100644
--- a/installed-tests/js/jsunit.gresources.xml
+++ b/installed-tests/js/jsunit.gresources.xml
@@ -13,6 +13,7 @@
<file>modules/badOverrides/Regress.js</file>
<file>modules/badOverrides/WarnLib.js</file>
<file>modules/data.txt</file>
+ <file>modules/encodings.json</file>
<file>modules/dynamic.js</file>
<file>modules/importmeta.js</file>
<file>modules/exports.js</file>
diff --git a/installed-tests/js/matchers.js b/installed-tests/js/matchers.js
new file mode 100644
index 00000000..6a2848f6
--- /dev/null
+++ b/installed-tests/js/matchers.js
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: MIT OR LGPL-2.0-or-later
+// SPDX-FileCopyrightText: 2021 Evan Welsh <contact evanwelsh com>
+
+/**
+ * A jasmine asymmetric matcher which expects an array-like object
+ * to contain the given element array in the same order with the
+ * same length. Useful for testing typed arrays.
+ *
+ * @template T
+ * @param {T[]} elements an array of elements to compare with
+ * @returns
+ */
+export function arrayLikeWithExactContents(elements) {
+ return {
+ /**
+ * @param {ArrayLike<T>} compareTo an array-like object to compare to
+ * @returns {boolean}
+ */
+ asymmetricMatch(compareTo) {
+ return (
+ compareTo.length === elements.length &&
+ elements.every((e, i) => e === compareTo[i])
+ );
+ },
+ /**
+ * @returns {string}
+ */
+ jasmineToString() {
+ return `${JSON.stringify(elements)}`;
+ },
+ };
+}
diff --git a/installed-tests/js/meson.build b/installed-tests/js/meson.build
index e11f1418..f5fa8e08 100644
--- a/installed-tests/js/meson.build
+++ b/installed-tests/js/meson.build
@@ -216,7 +216,10 @@ endif
# tests using ES modules are also separate because they need an extra
# minijasmine flag
-modules_tests = ['ESModules']
+modules_tests = [
+ 'ESModules',
+ 'Encoding',
+]
if build_cairo
modules_tests += 'CairoModule'
endif
diff --git a/installed-tests/js/modules/encodings.json b/installed-tests/js/modules/encodings.json
new file mode 100644
index 00000000..c746f680
--- /dev/null
+++ b/installed-tests/js/modules/encodings.json
@@ -0,0 +1,466 @@
+[
+ {
+ "encodings": [
+ {
+ "labels": [
+ "unicode-1-1-utf-8",
+ "unicode11utf8",
+ "unicode20utf8",
+ "utf-8",
+ "utf8",
+ "x-unicode20utf8"
+ ],
+ "name": "UTF-8"
+ }
+ ],
+ "heading": "The Encoding"
+ },
+ {
+ "encodings": [
+ {
+ "labels": [
+ "866",
+ "cp866",
+ "csibm866",
+ "ibm866"
+ ],
+ "name": "IBM866"
+ },
+ {
+ "labels": [
+ "csisolatin2",
+ "iso-8859-2",
+ "iso-ir-101",
+ "iso8859-2",
+ "iso88592",
+ "iso_8859-2",
+ "iso_8859-2:1987",
+ "l2",
+ "latin2"
+ ],
+ "name": "ISO-8859-2"
+ },
+ {
+ "labels": [
+ "csisolatin3",
+ "iso-8859-3",
+ "iso-ir-109",
+ "iso8859-3",
+ "iso88593",
+ "iso_8859-3",
+ "iso_8859-3:1988",
+ "l3",
+ "latin3"
+ ],
+ "name": "ISO-8859-3"
+ },
+ {
+ "labels": [
+ "csisolatin4",
+ "iso-8859-4",
+ "iso-ir-110",
+ "iso8859-4",
+ "iso88594",
+ "iso_8859-4",
+ "iso_8859-4:1988",
+ "l4",
+ "latin4"
+ ],
+ "name": "ISO-8859-4"
+ },
+ {
+ "labels": [
+ "csisolatincyrillic",
+ "cyrillic",
+ "iso-8859-5",
+ "iso-ir-144",
+ "iso8859-5",
+ "iso88595",
+ "iso_8859-5",
+ "iso_8859-5:1988"
+ ],
+ "name": "ISO-8859-5"
+ },
+ {
+ "labels": [
+ "arabic",
+ "asmo-708",
+ "csiso88596e",
+ "csiso88596i",
+ "csisolatinarabic",
+ "ecma-114",
+ "iso-8859-6",
+ "iso-8859-6-e",
+ "iso-8859-6-i",
+ "iso-ir-127",
+ "iso8859-6",
+ "iso88596",
+ "iso_8859-6",
+ "iso_8859-6:1987"
+ ],
+ "name": "ISO-8859-6"
+ },
+ {
+ "labels": [
+ "csisolatingreek",
+ "ecma-118",
+ "elot_928",
+ "greek",
+ "greek8",
+ "iso-8859-7",
+ "iso-ir-126",
+ "iso8859-7",
+ "iso88597",
+ "iso_8859-7",
+ "iso_8859-7:1987",
+ "sun_eu_greek"
+ ],
+ "name": "ISO-8859-7"
+ },
+ {
+ "labels": [
+ "csiso88598e",
+ "csisolatinhebrew",
+ "hebrew",
+ "iso-8859-8",
+ "iso-8859-8-e",
+ "iso-ir-138",
+ "iso8859-8",
+ "iso88598",
+ "iso_8859-8",
+ "iso_8859-8:1988",
+ "visual"
+ ],
+ "name": "ISO-8859-8"
+ },
+ {
+ "labels": [
+ "csiso88598i",
+ "iso-8859-8-i",
+ "logical"
+ ],
+ "name": "ISO-8859-8-I"
+ },
+ {
+ "labels": [
+ "csisolatin6",
+ "iso-8859-10",
+ "iso-ir-157",
+ "iso8859-10",
+ "iso885910",
+ "l6",
+ "latin6"
+ ],
+ "name": "ISO-8859-10"
+ },
+ {
+ "labels": [
+ "iso-8859-13",
+ "iso8859-13",
+ "iso885913"
+ ],
+ "name": "ISO-8859-13"
+ },
+ {
+ "labels": [
+ "iso-8859-14",
+ "iso8859-14",
+ "iso885914"
+ ],
+ "name": "ISO-8859-14"
+ },
+ {
+ "labels": [
+ "csisolatin9",
+ "iso-8859-15",
+ "iso8859-15",
+ "iso885915",
+ "iso_8859-15",
+ "l9"
+ ],
+ "name": "ISO-8859-15"
+ },
+ {
+ "labels": [
+ "iso-8859-16"
+ ],
+ "name": "ISO-8859-16"
+ },
+ {
+ "labels": [
+ "cskoi8r",
+ "koi",
+ "koi8",
+ "koi8-r",
+ "koi8_r"
+ ],
+ "name": "KOI8-R"
+ },
+ {
+ "labels": [
+ "koi8-ru",
+ "koi8-u"
+ ],
+ "name": "KOI8-U"
+ },
+ {
+ "labels": [
+ "csmacintosh",
+ "mac",
+ "macintosh",
+ "x-mac-roman"
+ ],
+ "name": "macintosh"
+ },
+ {
+ "labels": [
+ "dos-874",
+ "iso-8859-11",
+ "iso8859-11",
+ "iso885911",
+ "tis-620",
+ "windows-874"
+ ],
+ "name": "windows-874"
+ },
+ {
+ "labels": [
+ "cp1250",
+ "windows-1250",
+ "x-cp1250"
+ ],
+ "name": "windows-1250"
+ },
+ {
+ "labels": [
+ "cp1251",
+ "windows-1251",
+ "x-cp1251"
+ ],
+ "name": "windows-1251"
+ },
+ {
+ "labels": [
+ "ansi_x3.4-1968",
+ "ascii",
+ "cp1252",
+ "cp819",
+ "csisolatin1",
+ "ibm819",
+ "iso-8859-1",
+ "iso-ir-100",
+ "iso8859-1",
+ "iso88591",
+ "iso_8859-1",
+ "iso_8859-1:1987",
+ "l1",
+ "latin1",
+ "us-ascii",
+ "windows-1252",
+ "x-cp1252"
+ ],
+ "name": "windows-1252"
+ },
+ {
+ "labels": [
+ "cp1253",
+ "windows-1253",
+ "x-cp1253"
+ ],
+ "name": "windows-1253"
+ },
+ {
+ "labels": [
+ "cp1254",
+ "csisolatin5",
+ "iso-8859-9",
+ "iso-ir-148",
+ "iso8859-9",
+ "iso88599",
+ "iso_8859-9",
+ "iso_8859-9:1989",
+ "l5",
+ "latin5",
+ "windows-1254",
+ "x-cp1254"
+ ],
+ "name": "windows-1254"
+ },
+ {
+ "labels": [
+ "cp1255",
+ "windows-1255",
+ "x-cp1255"
+ ],
+ "name": "windows-1255"
+ },
+ {
+ "labels": [
+ "cp1256",
+ "windows-1256",
+ "x-cp1256"
+ ],
+ "name": "windows-1256"
+ },
+ {
+ "labels": [
+ "cp1257",
+ "windows-1257",
+ "x-cp1257"
+ ],
+ "name": "windows-1257"
+ },
+ {
+ "labels": [
+ "cp1258",
+ "windows-1258",
+ "x-cp1258"
+ ],
+ "name": "windows-1258"
+ },
+ {
+ "labels": [
+ "x-mac-cyrillic",
+ "x-mac-ukrainian"
+ ],
+ "name": "x-mac-cyrillic"
+ }
+ ],
+ "heading": "Legacy single-byte encodings"
+ },
+ {
+ "encodings": [
+ {
+ "labels": [
+ "chinese",
+ "csgb2312",
+ "csiso58gb231280",
+ "gb2312",
+ "gb_2312",
+ "gb_2312-80",
+ "gbk",
+ "iso-ir-58",
+ "x-gbk"
+ ],
+ "name": "GBK"
+ },
+ {
+ "labels": [
+ "gb18030"
+ ],
+ "name": "gb18030"
+ }
+ ],
+ "heading": "Legacy multi-byte Chinese (simplified) encodings"
+ },
+ {
+ "encodings": [
+ {
+ "labels": [
+ "big5",
+ "big5-hkscs",
+ "cn-big5",
+ "csbig5",
+ "x-x-big5"
+ ],
+ "name": "Big5"
+ }
+ ],
+ "heading": "Legacy multi-byte Chinese (traditional) encodings"
+ },
+ {
+ "encodings": [
+ {
+ "labels": [
+ "cseucpkdfmtjapanese",
+ "euc-jp",
+ "x-euc-jp"
+ ],
+ "name": "EUC-JP"
+ },
+ {
+ "labels": [
+ "csiso2022jp",
+ "iso-2022-jp"
+ ],
+ "name": "ISO-2022-JP"
+ },
+ {
+ "labels": [
+ "csshiftjis",
+ "ms932",
+ "ms_kanji",
+ "shift-jis",
+ "shift_jis",
+ "sjis",
+ "windows-31j",
+ "x-sjis"
+ ],
+ "name": "Shift_JIS"
+ }
+ ],
+ "heading": "Legacy multi-byte Japanese encodings"
+ },
+ {
+ "encodings": [
+ {
+ "labels": [
+ "cseuckr",
+ "csksc56011987",
+ "euc-kr",
+ "iso-ir-149",
+ "korean",
+ "ks_c_5601-1987",
+ "ks_c_5601-1989",
+ "ksc5601",
+ "ksc_5601",
+ "windows-949"
+ ],
+ "name": "EUC-KR"
+ }
+ ],
+ "heading": "Legacy multi-byte Korean encodings"
+ },
+ {
+ "encodings": [
+ {
+ "labels": [
+ "csiso2022kr",
+ "hz-gb-2312",
+ "iso-2022-cn",
+ "iso-2022-cn-ext",
+ "iso-2022-kr",
+ "replacement"
+ ],
+ "name": "replacement"
+ },
+ {
+ "labels": [
+ "unicodefffe",
+ "utf-16be"
+ ],
+ "name": "UTF-16BE"
+ },
+ {
+ "labels": [
+ "csunicode",
+ "iso-10646-ucs-2",
+ "ucs-2",
+ "unicode",
+ "unicodefeff",
+ "utf-16",
+ "utf-16le"
+ ],
+ "name": "UTF-16LE"
+ },
+ {
+ "labels": [
+ "x-user-defined"
+ ],
+ "name": "x-user-defined"
+ }
+ ],
+ "heading": "Legacy miscellaneous encodings"
+ }
+ ]
+
\ No newline at end of file
diff --git a/installed-tests/js/testEncoding.js b/installed-tests/js/testEncoding.js
new file mode 100644
index 00000000..55d65581
--- /dev/null
+++ b/installed-tests/js/testEncoding.js
@@ -0,0 +1,408 @@
+// SPDX-License-Identifier: MIT OR LGPL-2.0-or-later
+// SPDX-FileCopyrightText: 2021 Evan Welsh <contact evanwelsh com>
+
+// Some test inputs are derived from
https://github.com/denoland/deno/blob/923214c53725651792f6d55c5401bf6b475622ea/op_crates/web/08_text_encoding.js
+// Data originally from https://encoding.spec.whatwg.org/encodings.json
+
+import Gio from 'gi://Gio';
+
+import {arrayLikeWithExactContents} from './matchers.js';
+
+/**
+ * Loads a JSON file from a URI and parses it.
+ *
+ * @param {string} src the URI to load from
+ * @returns {any}
+ */
+function loadJSONFromResource(src) {
+ const file = Gio.File.new_for_uri(src);
+ const [, bytes] = file.load_contents(null);
+
+ const decoder = new TextDecoder();
+ const jsonRaw = decoder.decode(bytes);
+ const json = JSON.parse(jsonRaw);
+
+ return json;
+}
+
+/**
+ * Encoded form of '𝓽𝓮𝔁𝓽'
+ *
+ * @returns {number[]}
+ */
+function encodedMultibyteCharArray() {
+ return [
+ 0xf0, 0x9d, 0x93, 0xbd, 0xf0, 0x9d, 0x93, 0xae, 0xf0, 0x9d, 0x94, 0x81,
+ 0xf0, 0x9d, 0x93, 0xbd,
+ ];
+}
+
+describe('Text Encoding', function () {
+ it('toString() uses spec-compliant tags', function () {
+ const encoder = new TextEncoder();
+
+ expect(encoder.toString()).toBe('[object TextEncoder]');
+
+ const decoder = new TextDecoder();
+ expect(decoder.toString()).toBe('[object TextDecoder]');
+ });
+
+ describe('TextEncoder', function () {
+ describe('encode()', function () {
+ it('can encode UTF8 (multi-byte chars)', function () {
+ const input = '𝓽𝓮𝔁𝓽';
+ const encoder = new TextEncoder();
+ const encoded = encoder.encode(input);
+
+ expect(encoded).toEqual(
+ arrayLikeWithExactContents([...encodedMultibyteCharArray()])
+ );
+ });
+ });
+
+ describe('encodeInto()', function () {
+ it('can encode UTF8 (Latin chars) into a Uint8Array', function () {
+ const input = 'text';
+ const encoder = new TextEncoder();
+ const bytes = new Uint8Array(5);
+ const result = encoder.encodeInto(input, bytes);
+ expect(result.read).toBe(4);
+ expect(result.written).toBe(4);
+
+ expect(bytes).toEqual(
+ arrayLikeWithExactContents([0x74, 0x65, 0x78, 0x74, 0x00])
+ );
+ });
+
+ it('can fully encode UTF8 (multi-byte chars) into a Uint8Array', function () {
+ const input = '𝓽𝓮𝔁𝓽';
+ const encoder = new TextEncoder();
+ const bytes = new Uint8Array(17);
+ const result = encoder.encodeInto(input, bytes);
+ expect(result.read).toBe(8);
+ expect(result.written).toBe(16);
+
+ expect(bytes).toEqual(
+ arrayLikeWithExactContents([
+ ...encodedMultibyteCharArray(),
+ 0x00,
+ ])
+ );
+ });
+
+ it('can partially encode UTF8 into an under-allocated Uint8Array', function () {
+ const input = '𝓽𝓮𝔁𝓽';
+ const encoder = new TextEncoder();
+ const bytes = new Uint8Array(5);
+ const result = encoder.encodeInto(input, bytes);
+ expect(result.read).toBe(2);
+ expect(result.written).toBe(4);
+
+ expect(bytes).toEqual(
+ arrayLikeWithExactContents([
+ ...encodedMultibyteCharArray().slice(0, 4),
+ 0x00,
+ ])
+ );
+ });
+ });
+ });
+
+ describe('TextDecoder', function () {
+ describe('decode()', function () {
+ it('fatal is false by default', function () {
+ const decoder = new TextDecoder();
+
+ expect(decoder.fatal).toBeFalse();
+ });
+
+ it('ignoreBOM is false by default', function () {
+ const decoder = new TextDecoder();
+
+ expect(decoder.ignoreBOM).toBeFalse();
+ });
+
+ it('fatal is true when passed', function () {
+ const decoder = new TextDecoder(undefined, {fatal: true});
+
+ expect(decoder.fatal).toBeTrue();
+ });
+
+ it('ignoreBOM is true when passed', function () {
+ const decoder = new TextDecoder(undefined, {ignoreBOM: true});
+
+ expect(decoder.ignoreBOM).toBeTrue();
+ });
+
+ it('fatal is coerced to a boolean value', function () {
+ const decoder = new TextDecoder(undefined, {fatal: 1});
+
+ expect(decoder.fatal).toBeTrue();
+ });
+
+ it('ignoreBOM is coerced to a boolean value', function () {
+ const decoder = new TextDecoder(undefined, {ignoreBOM: ''});
+
+ expect(decoder.ignoreBOM).toBeFalse();
+ });
+
+ it('throws on empty input', function () {
+ const decoder = new TextDecoder();
+ const input = '';
+
+ expect(() => decoder.decode(input)).toThrowError(
+ 'Provided input cannot be converted to ArrayBufferView or ArrayBuffer'
+ );
+ });
+
+ it('throws on null input', function () {
+ const decoder = new TextDecoder();
+ const input = null;
+
+ expect(() => decoder.decode(input)).toThrowError(
+ 'Provided input cannot be converted to ArrayBufferView or ArrayBuffer'
+ );
+ });
+
+ it('throws on invalid encoding label', function () {
+ expect(() => new TextDecoder('bad')).toThrowError(
+ "Invalid encoding label: 'bad'"
+ );
+ });
+
+ it('decodes undefined as an empty string', function () {
+ const decoder = new TextDecoder();
+ const input = undefined;
+
+ expect(decoder.decode(input)).toBe('');
+ });
+
+ it('decodes UTF-8 byte array (Uint8Array)', function () {
+ const decoder = new TextDecoder();
+ const input = new Uint8Array([...encodedMultibyteCharArray()]);
+
+ expect(decoder.decode(input)).toBe('𝓽𝓮𝔁𝓽');
+ });
+
+ it('ignores byte order marker (BOM)', function () {
+ const decoder = new TextDecoder('utf-8', {ignoreBOM: true});
+ const input = new Uint8Array([
+ 0xef,
+ 0xbb,
+ 0xbf,
+ ...encodedMultibyteCharArray(),
+ ]);
+
+ expect(decoder.decode(input)).toBe('𝓽𝓮𝔁𝓽');
+ });
+
+ it('handles invalid byte order marker (BOM)', function () {
+ const decoder = new TextDecoder('utf-8', {ignoreBOM: true});
+ const input = new Uint8Array([
+ 0xef,
+ 0xbb,
+ 0x89,
+ ...encodedMultibyteCharArray(),
+ ]);
+
+ expect(decoder.decode(input)).toBe('ﻉ𝓽𝓮𝔁𝓽');
+ });
+ });
+
+ describe('UTF-8 Encoding Converter', function () {
+ it('can decode (not fatal)', function () {
+ const decoder = new TextDecoder();
+
+ const decoded = decoder.decode(new Uint8Array([120, 193, 120]));
+ expect(decoded).toEqual('x�x');
+ });
+
+ it('can decode (fatal)', function () {
+ const decoder = new TextDecoder(undefined, {
+ fatal: true,
+ });
+
+ expect(() => {
+ decoder.decode(new Uint8Array([120, 193, 120]));
+ }).toThrowError(
+ TypeError,
+ /malformed UTF-8 character sequence/
+ );
+ });
+ });
+
+ describe('Multi-byte Encoding Converter (iconv)', function () {
+ it('can decode Big-5', function () {
+ const decoder = new TextDecoder('big5');
+ const bytes = [
+ 164, 164, 177, 192, 183, 124, 177, 181, 168, 252, 184, 103,
+ 192, 217, 179, 161, 188, 208, 183, 199, 192, 203, 197, 231,
+ 167, 189, 169, 101, 176, 85,
+ ];
+
+ const decoded = decoder.decode(new Uint8Array(bytes));
+ expect(decoded).toEqual('中推會接受經濟部標準檢驗局委託');
+ });
+
+ it('can decode Big-5 with incorrect input bytes', function () {
+ const decoder = new TextDecoder('big5');
+ const bytes = [
+ 164, 164, 177, 192, 183, 124,
+ // Invalid byte...
+ 0xa1,
+ ];
+
+ const decoded = decoder.decode(new Uint8Array(bytes));
+ expect(decoded).toEqual('中推會�');
+ });
+
+ it('can decode Big-5 with long incorrect input bytes', function () {
+ const decoder = new TextDecoder('big5');
+ const bytes = [164, 164, 177, 192, 183, 124];
+ const baseLength = 1000;
+ const longBytes = new Array(baseLength)
+ .fill(bytes, 0, baseLength)
+ .flat();
+
+ // Append invalid byte sequence...
+ longBytes.push(0xa3);
+
+ const decoded = decoder.decode(new Uint8Array(longBytes));
+
+ const baseResult = '中推會';
+ const longResult = [
+ ...new Array(baseLength).fill(baseResult, 0, baseLength),
+ '�',
+ ].join('');
+
+ expect(decoded).toEqual(longResult);
+ });
+
+ it('can decode Big-5 HKSCS with supplemental characters', function () {
+ // The characters below roughly mean 'hard' or 'solid' and
+ // 'rooster' respectively. They were chosen for their Unicode
+ // and HKSCS positioning, not meaning.
+
+ // Big5-HKSCS bytes for the supplemental character 𠕇
+ const supplementalBytes = [250, 64];
+ // Big5-HKSCS bytes for the non-supplemental characters 公雞
+ const nonSupplementalBytes = [164, 189, 194, 251];
+
+ const decoder = new TextDecoder('big5-hkscs');
+
+ // We currently allocate 12 additional bytes of padding
+ // and a minimum of 256...
+
+ // This should produce 400 non-supplemental bytes (50 * 2 * 4)
+ // and 16 supplemental bytes (4 * 4)
+ const repeatedNonSupplementalBytes = new Array(50).fill(nonSupplementalBytes).flat();
+ const bytes = [
+ ...repeatedNonSupplementalBytes,
+ ...supplementalBytes,
+ ...repeatedNonSupplementalBytes,
+ ...supplementalBytes,
+ ...repeatedNonSupplementalBytes,
+ ...supplementalBytes,
+ ...repeatedNonSupplementalBytes,
+ ...supplementalBytes,
+ ];
+
+ const expectedNonSupplemental = new Array(50).fill('公雞');
+ const expected = [
+ ...expectedNonSupplemental,
+ '𠕇',
+ ...expectedNonSupplemental,
+ '𠕇',
+ ...expectedNonSupplemental,
+ '𠕇',
+ ...expectedNonSupplemental,
+ '𠕇',
+ ].join('');
+
+ // Calculate the number of bytes the UTF-16 characters should
+ // occupy.
+ const expectedU16Bytes = [...expected].reduce((prev, next) => {
+ const utf16code = next.codePointAt(0);
+
+ // Test whether this unit is supplemental
+ const additionalBytes = utf16code > 0xFFFF ? 2 : 0;
+
+ return prev + 2 + additionalBytes;
+ }, 0);
+
+
+ // We set a minimum buffer allocation of 256 bytes,
+ // this ensures that this test exceeds that.
+ expect(expectedU16Bytes / 2).toBeGreaterThan(256);
+
+ // The length of the input bytes should always be less
+ // than the expected output because UTF-16 uses 4 bytes
+ // to represent some characters HKSCS needs only 2 for.
+ expect(bytes.length).toBeLessThan(expectedU16Bytes);
+ // 4 supplemental characters, each with two additional bytes.
+ expect(bytes.length + 4 * 2).toBe(expectedU16Bytes);
+
+ const decoded = decoder.decode(new Uint8Array(bytes));
+
+ expect(decoded).toBe(expected);
+ });
+ });
+
+ describe('Single Byte Encoding Converter', function () {
+ it('can decode legacy single byte encoding (not fatal)', function () {
+ const decoder = new TextDecoder('iso-8859-6');
+
+ const decoded = decoder.decode(new Uint8Array([161, 200, 200]));
+ expect(decoded).toEqual('�بب');
+ });
+
+ it('can decode legacy single byte encoding (fatal)', function () {
+ const decoder = new TextDecoder('iso-8859-6', {
+ fatal: true,
+ });
+
+ expect(() => {
+ decoder.decode(new Uint8Array([161, 200, 200]));
+ }).toThrowError(
+ TypeError,
+ 'Invalid byte sequence in conversion input'
+ );
+ });
+
+ it('can decode ASCII', function () {
+ const input = new Uint8Array([0x89, 0x95, 0x9f, 0xbf]);
+ const decoder = new TextDecoder('ascii');
+ expect(decoder.decode(input)).toBe('‰•Ÿ¿');
+ });
+
+ // Straight from https://encoding.spec.whatwg.org/encodings.json
+ const encodingsTable = loadJSONFromResource(
+ 'resource:///org/gjs/jsunit/modules/encodings.json'
+ );
+
+ const singleByteEncodings = encodingsTable.filter(group => {
+ return group.heading === 'Legacy single-byte encodings';
+ })[0].encodings;
+
+ const buffer = new ArrayBuffer(255);
+ const view = new Uint8Array(buffer);
+
+ for (let i = 0, l = view.byteLength; i < l; i++)
+ view[i] = i;
+
+ for (let i = 0, l = singleByteEncodings.length; i < l; i++) {
+ const encoding = singleByteEncodings[i];
+
+ it(`${encoding.name} can be decoded.`, function () {
+ for (const label of encoding.labels) {
+ const decoder = new TextDecoder(label);
+ expect(() => decoder.decode(view)).not.toThrow();
+ expect(decoder.encoding).toBe(
+ encoding.name.toLowerCase()
+ );
+ }
+ });
+ }
+ });
+ });
+});
diff --git a/jsconfig.json b/jsconfig.json
new file mode 100644
index 00000000..43feb2ed
--- /dev/null
+++ b/jsconfig.json
@@ -0,0 +1,5 @@
+{
+ "compilerOptions": {
+ "lib": ["es2020"],
+ }
+}
\ No newline at end of file
[
Date Prev][
Date Next] [
Thread Prev][Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]