[gjs/ewlsh/text-encoding: 2/5] modules: Implement fatal TextEncoder and TextDecoder APIs
- From: Evan Welsh <ewlsh src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gjs/ewlsh/text-encoding: 2/5] modules: Implement fatal TextEncoder and TextDecoder APIs
- Date: Mon, 5 Jul 2021 05:20:13 +0000 (UTC)
commit 989a65387924179d83e459f0341616fe315436c5
Author: Evan Welsh <contact evanwelsh com>
Date: Sun Jul 4 22:13:25 2021 -0700
modules: Implement fatal TextEncoder and TextDecoder APIs
.eslintignore | 2 +
.eslintrc.yml | 5 +
js.gresource.xml | 2 +
modules/core/_encodings.js | 304 +++++++++++++++++++++++++++++++++++
modules/core/_text.js | 150 +++++++++++++++++
modules/script/_bootstrap/default.js | 13 ++
6 files changed, 476 insertions(+)
---
diff --git a/.eslintignore b/.eslintignore
index 9ee950d3..8f8f93ff 100644
--- a/.eslintignore
+++ b/.eslintignore
@@ -3,4 +3,6 @@
installed-tests/js/jasmine.js
installed-tests/js/modules/badOverrides/WarnLib.js
+# Until ESLint merges class fields.
+modules/core/_text.js
modules/script/jsUnit.js
diff --git a/.eslintrc.yml b/.eslintrc.yml
index 7ddf0e38..dadf40bd 100644
--- a/.eslintrc.yml
+++ b/.eslintrc.yml
@@ -242,6 +242,9 @@ rules:
- inside
yield-star-spacing: error
yoda: error
+settings:
+ jsdoc:
+ mode: typescript
globals:
ARGV: readonly
Debugger: readonly
@@ -254,5 +257,7 @@ globals:
print: readonly
printerr: readonly
window: readonly
+ TextEncoder: readonly
+ TextDecoder: readonly
parserOptions:
ecmaVersion: 2020
diff --git a/js.gresource.xml b/js.gresource.xml
index fc55e597..a0b37730 100644
--- a/js.gresource.xml
+++ b/js.gresource.xml
@@ -42,8 +42,10 @@
<file>modules/core/_cairo.js</file>
<file>modules/core/_common.js</file>
+ <file>modules/core/_encodings.js</file>
<file>modules/core/_format.js</file>
<file>modules/core/_gettext.js</file>
<file>modules/core/_signals.js</file>
+ <file>modules/core/_text.js</file>
</gresource>
</gresources>
diff --git a/modules/core/_encodings.js b/modules/core/_encodings.js
new file mode 100644
index 00000000..d729be55
--- /dev/null
+++ b/modules/core/_encodings.js
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: MIT
+// SPDX-FileCopyrightText: Node.js contributors. All rights reserved.
+
+// Modified from
https://github.com/nodejs/node/blob/78680c1cbc8b0c435963bc512e826b2a6227c315/lib/internal/encoding.js
+// Data derived from https://encoding.spec.whatwg.org/encodings.json
+
+/* exported getEncodingFromLabel */
+
+const encodings = new Map([
+ ['unicode-1-1-utf-8', 'utf-8'],
+ ['unicode11utf8', 'utf-8'],
+ ['unicode20utf8', 'utf-8'],
+ ['utf-8', 'utf-8'],
+ ['utf8', 'utf-8'],
+ ['x-unicode20utf8', 'utf-8'],
+ ['866', 'ibm866'],
+ ['cp866', 'ibm866'],
+ ['csibm866', 'ibm866'],
+ ['ibm866', 'ibm866'],
+ ['csisolatin2', 'iso-8859-2'],
+ ['iso-8859-2', 'iso-8859-2'],
+ ['iso-ir-101', 'iso-8859-2'],
+ ['iso8859-2', 'iso-8859-2'],
+ ['iso88592', 'iso-8859-2'],
+ ['iso_8859-2', 'iso-8859-2'],
+ ['iso_8859-2:1987', 'iso-8859-2'],
+ ['l2', 'iso-8859-2'],
+ ['latin2', 'iso-8859-2'],
+ ['csisolatin3', 'iso-8859-3'],
+ ['iso-8859-3', 'iso-8859-3'],
+ ['iso-ir-109', 'iso-8859-3'],
+ ['iso8859-3', 'iso-8859-3'],
+ ['iso88593', 'iso-8859-3'],
+ ['iso_8859-3', 'iso-8859-3'],
+ ['iso_8859-3:1988', 'iso-8859-3'],
+ ['l3', 'iso-8859-3'],
+ ['latin3', 'iso-8859-3'],
+ ['csisolatin4', 'iso-8859-4'],
+ ['iso-8859-4', 'iso-8859-4'],
+ ['iso-ir-110', 'iso-8859-4'],
+ ['iso8859-4', 'iso-8859-4'],
+ ['iso88594', 'iso-8859-4'],
+ ['iso_8859-4', 'iso-8859-4'],
+ ['iso_8859-4:1988', 'iso-8859-4'],
+ ['l4', 'iso-8859-4'],
+ ['latin4', 'iso-8859-4'],
+ ['csisolatincyrillic', 'iso-8859-5'],
+ ['cyrillic', 'iso-8859-5'],
+ ['iso-8859-5', 'iso-8859-5'],
+ ['iso-ir-144', 'iso-8859-5'],
+ ['iso8859-5', 'iso-8859-5'],
+ ['iso88595', 'iso-8859-5'],
+ ['iso_8859-5', 'iso-8859-5'],
+ ['iso_8859-5:1988', 'iso-8859-5'],
+ ['arabic', 'iso-8859-6'],
+ ['asmo-708', 'iso-8859-6'],
+ ['csiso88596e', 'iso-8859-6'],
+ ['csiso88596i', 'iso-8859-6'],
+ ['csisolatinarabic', 'iso-8859-6'],
+ ['ecma-114', 'iso-8859-6'],
+ ['iso-8859-6', 'iso-8859-6'],
+ ['iso-8859-6-e', 'iso-8859-6'],
+ ['iso-8859-6-i', 'iso-8859-6'],
+ ['iso-ir-127', 'iso-8859-6'],
+ ['iso8859-6', 'iso-8859-6'],
+ ['iso88596', 'iso-8859-6'],
+ ['iso_8859-6', 'iso-8859-6'],
+ ['iso_8859-6:1987', 'iso-8859-6'],
+ ['csisolatingreek', 'iso-8859-7'],
+ ['ecma-118', 'iso-8859-7'],
+ ['elot_928', 'iso-8859-7'],
+ ['greek', 'iso-8859-7'],
+ ['greek8', 'iso-8859-7'],
+ ['iso-8859-7', 'iso-8859-7'],
+ ['iso-ir-126', 'iso-8859-7'],
+ ['iso8859-7', 'iso-8859-7'],
+ ['iso88597', 'iso-8859-7'],
+ ['iso_8859-7', 'iso-8859-7'],
+ ['iso_8859-7:1987', 'iso-8859-7'],
+ ['sun_eu_greek', 'iso-8859-7'],
+ ['csiso88598e', 'iso-8859-8'],
+ ['csisolatinhebrew', 'iso-8859-8'],
+ ['hebrew', 'iso-8859-8'],
+ ['iso-8859-8', 'iso-8859-8'],
+ ['iso-8859-8-e', 'iso-8859-8'],
+ ['iso-ir-138', 'iso-8859-8'],
+ ['iso8859-8', 'iso-8859-8'],
+ ['iso88598', 'iso-8859-8'],
+ ['iso_8859-8', 'iso-8859-8'],
+ ['iso_8859-8:1988', 'iso-8859-8'],
+ ['visual', 'iso-8859-8'],
+ ['csiso88598i', 'iso-8859-8-i'],
+ ['iso-8859-8-i', 'iso-8859-8-i'],
+ ['logical', 'iso-8859-8-i'],
+ ['csisolatin6', 'iso-8859-10'],
+ ['iso-8859-10', 'iso-8859-10'],
+ ['iso-ir-157', 'iso-8859-10'],
+ ['iso8859-10', 'iso-8859-10'],
+ ['iso885910', 'iso-8859-10'],
+ ['l6', 'iso-8859-10'],
+ ['latin6', 'iso-8859-10'],
+ ['iso-8859-13', 'iso-8859-13'],
+ ['iso8859-13', 'iso-8859-13'],
+ ['iso885913', 'iso-8859-13'],
+ ['iso-8859-14', 'iso-8859-14'],
+ ['iso8859-14', 'iso-8859-14'],
+ ['iso885914', 'iso-8859-14'],
+ ['csisolatin9', 'iso-8859-15'],
+ ['iso-8859-15', 'iso-8859-15'],
+ ['iso8859-15', 'iso-8859-15'],
+ ['iso885915', 'iso-8859-15'],
+ ['iso_8859-15', 'iso-8859-15'],
+ ['l9', 'iso-8859-15'],
+ ['iso-8859-16', 'iso-8859-16'],
+ ['cskoi8r', 'koi8-r'],
+ ['koi', 'koi8-r'],
+ ['koi8', 'koi8-r'],
+ ['koi8-r', 'koi8-r'],
+ ['koi8_r', 'koi8-r'],
+ ['koi8-ru', 'koi8-u'],
+ ['koi8-u', 'koi8-u'],
+ ['csmacintosh', 'macintosh'],
+ ['mac', 'macintosh'],
+ ['macintosh', 'macintosh'],
+ ['x-mac-roman', 'macintosh'],
+ ['dos-874', 'windows-874'],
+ ['iso-8859-11', 'windows-874'],
+ ['iso8859-11', 'windows-874'],
+ ['iso885911', 'windows-874'],
+ ['tis-620', 'windows-874'],
+ ['windows-874', 'windows-874'],
+ ['cp1250', 'windows-1250'],
+ ['windows-1250', 'windows-1250'],
+ ['x-cp1250', 'windows-1250'],
+ ['cp1251', 'windows-1251'],
+ ['windows-1251', 'windows-1251'],
+ ['x-cp1251', 'windows-1251'],
+ ['ansi_x3.4-1968', 'windows-1252'],
+ ['ascii', 'windows-1252'],
+ ['cp1252', 'windows-1252'],
+ ['cp819', 'windows-1252'],
+ ['csisolatin1', 'windows-1252'],
+ ['ibm819', 'windows-1252'],
+ ['iso-8859-1', 'windows-1252'],
+ ['iso-ir-100', 'windows-1252'],
+ ['iso8859-1', 'windows-1252'],
+ ['iso88591', 'windows-1252'],
+ ['iso_8859-1', 'windows-1252'],
+ ['iso_8859-1:1987', 'windows-1252'],
+ ['l1', 'windows-1252'],
+ ['latin1', 'windows-1252'],
+ ['us-ascii', 'windows-1252'],
+ ['windows-1252', 'windows-1252'],
+ ['x-cp1252', 'windows-1252'],
+ ['cp1253', 'windows-1253'],
+ ['windows-1253', 'windows-1253'],
+ ['x-cp1253', 'windows-1253'],
+ ['cp1254', 'windows-1254'],
+ ['csisolatin5', 'windows-1254'],
+ ['iso-8859-9', 'windows-1254'],
+ ['iso-ir-148', 'windows-1254'],
+ ['iso8859-9', 'windows-1254'],
+ ['iso88599', 'windows-1254'],
+ ['iso_8859-9', 'windows-1254'],
+ ['iso_8859-9:1989', 'windows-1254'],
+ ['l5', 'windows-1254'],
+ ['latin5', 'windows-1254'],
+ ['windows-1254', 'windows-1254'],
+ ['x-cp1254', 'windows-1254'],
+ ['cp1255', 'windows-1255'],
+ ['windows-1255', 'windows-1255'],
+ ['x-cp1255', 'windows-1255'],
+ ['cp1256', 'windows-1256'],
+ ['windows-1256', 'windows-1256'],
+ ['x-cp1256', 'windows-1256'],
+ ['cp1257', 'windows-1257'],
+ ['windows-1257', 'windows-1257'],
+ ['x-cp1257', 'windows-1257'],
+ ['cp1258', 'windows-1258'],
+ ['windows-1258', 'windows-1258'],
+ ['x-cp1258', 'windows-1258'],
+ ['x-mac-cyrillic', 'x-mac-cyrillic'],
+ ['x-mac-ukrainian', 'x-mac-cyrillic'],
+ ['chinese', 'gbk'],
+ ['csgb2312', 'gbk'],
+ ['csiso58gb231280', 'gbk'],
+ ['gb2312', 'gbk'],
+ ['gb_2312', 'gbk'],
+ ['gb_2312-80', 'gbk'],
+ ['gbk', 'gbk'],
+ ['iso-ir-58', 'gbk'],
+ ['x-gbk', 'gbk'],
+ ['gb18030', 'gb18030'],
+ ['big5', 'big5'],
+ ['big5-hkscs', 'big5'],
+ ['cn-big5', 'big5'],
+ ['csbig5', 'big5'],
+ ['x-x-big5', 'big5'],
+ ['cseucpkdfmtjapanese', 'euc-jp'],
+ ['euc-jp', 'euc-jp'],
+ ['x-euc-jp', 'euc-jp'],
+ ['csiso2022jp', 'iso-2022-jp'],
+ ['iso-2022-jp', 'iso-2022-jp'],
+ ['csshiftjis', 'shift_jis'],
+ ['ms932', 'shift_jis'],
+ ['ms_kanji', 'shift_jis'],
+ ['shift-jis', 'shift_jis'],
+ ['shift_jis', 'shift_jis'],
+ ['sjis', 'shift_jis'],
+ ['windows-31j', 'shift_jis'],
+ ['x-sjis', 'shift_jis'],
+ ['cseuckr', 'euc-kr'],
+ ['csksc56011987', 'euc-kr'],
+ ['euc-kr', 'euc-kr'],
+ ['iso-ir-149', 'euc-kr'],
+ ['korean', 'euc-kr'],
+ ['ks_c_5601-1987', 'euc-kr'],
+ ['ks_c_5601-1989', 'euc-kr'],
+ ['ksc5601', 'euc-kr'],
+ ['ksc_5601', 'euc-kr'],
+ ['windows-949', 'euc-kr'],
+ ['csiso2022kr', 'replacement'],
+ ['hz-gb-2312', 'replacement'],
+ ['iso-2022-cn', 'replacement'],
+ ['iso-2022-cn-ext', 'replacement'],
+ ['iso-2022-kr', 'replacement'],
+ ['replacement', 'replacement'],
+ ['unicodefffe', 'utf-16be'],
+ ['utf-16be', 'utf-16be'],
+ ['csunicode', 'utf-16le'],
+ ['iso-10646-ucs-2', 'utf-16le'],
+ ['ucs-2', 'utf-16le'],
+ ['unicode', 'utf-16le'],
+ ['unicodefeff', 'utf-16le'],
+ ['utf-16', 'utf-16le'],
+ ['utf-16le', 'utf-16le'],
+ ['x-user-defined', 'x-user-defined'],
+]);
+
+
+// Some of the web-specified encodings use
+// aliases which aren't supported in iconv
+const internalEncodings = new Map([
+ // For our purposes we can encode 8-i as 8
+ ['iso-8859-8-i', 'iso-8859-8'],
+]);
+
+/**
+ * Trims ASCII whitespace from a string.
+ * `String.prototype.trim` removes non-ASCII whitespace.
+ *
+ * @param {string} label the label to trim
+ * @returns {string}
+ */
+const trimAsciiWhitespace = label => {
+ let s = 0;
+ let e = label.length;
+ while (s < e && (
+ label[s] === '\u0009' ||
+ label[s] === '\u000a' ||
+ label[s] === '\u000c' ||
+ label[s] === '\u000d' ||
+ label[s] === '\u0020'))
+ s++;
+
+ while (e > s && (
+ label[e - 1] === '\u0009' ||
+ label[e - 1] === '\u000a' ||
+ label[e - 1] === '\u000c' ||
+ label[e - 1] === '\u000d' ||
+ label[e - 1] === '\u0020'))
+ e--;
+
+ return label.slice(s, e);
+};
+
+/**
+ * @typedef Encoding
+ * @property {string} internalLabel
+ * @property {string} label
+ */
+
+/**
+ * @param {string} label the encoding label
+ * @returns {Encoding | null}
+ */
+function getEncodingFromLabel(label) {
+ let encoding = encodings.get(label);
+
+ if (encoding === undefined) {
+ const trimmedLabel = trimAsciiWhitespace(label.toLowerCase());
+ encoding = encodings.get(trimmedLabel);
+ }
+
+ if (!encoding)
+ return null;
+
+ let internalEncoding = internalEncodings.get(encoding);
+
+ return {
+ label: encoding,
+ internalLabel: internalEncoding ?? encoding,
+ };
+}
diff --git a/modules/core/_text.js b/modules/core/_text.js
new file mode 100644
index 00000000..a54d4342
--- /dev/null
+++ b/modules/core/_text.js
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: MIT OR LGPL-2.0-or-later
+// SPDX-FileCopyrightText: Evan Welsh
+
+const Encoding = imports._encodingNative;
+
+const { getEncodingFromLabel } = imports._encodings;
+
+var TextDecoder = class TextDecoder {
+ /**
+ * @type {string}
+ */
+ encoding;
+
+ /**
+ * @type {boolean}
+ */
+ ignoreBOM;
+
+ /**
+ * @type {boolean}
+ */
+ fatal;
+
+ get [Symbol.toStringTag]() {
+ return 'TextDecoder';
+ }
+
+ /**
+ * @param {string} encoding
+ * @param {object} [options]
+ * @param {boolean=} options.fatal
+ * @param {boolean=} options.ignoreBOM
+ */
+ constructor(encoding = 'utf-8', options = {}) {
+ const { fatal = false, ignoreBOM = false } = options;
+
+ const encodingDefinition = getEncodingFromLabel(`${encoding}`);
+
+ if (!encodingDefinition) {
+ throw new RangeError(`Invalid encoding label: '${encoding}'`);
+ }
+
+ if (encodingDefinition.label === 'replacement') {
+ throw new RangeError(`Unsupported replacement encoding: '${encoding}'`);
+ }
+
+ Object.defineProperty(this, '_internalEncoding', {
+ value: encodingDefinition.internalLabel,
+ enumerable: true,
+ writable: false,
+ configurable: false,
+ });
+
+ Object.defineProperty(this, 'encoding', {
+ value: encodingDefinition.label,
+ enumerable: true,
+ writable: false,
+ configurable: false,
+ });
+
+ Object.defineProperty(this, 'ignoreBOM', {
+ value: Boolean(ignoreBOM),
+ enumerable: true,
+ writable: false,
+ configurable: false,
+ });
+
+ Object.defineProperty(this, 'fatal', {
+ value: Boolean(fatal),
+ enumerable: true,
+ writable: false,
+ configurable: false,
+ });
+ }
+
+
+
+ /**
+ * @param {unknown} bytes
+ * @param {object} [options]
+ * @param {boolean=} options.stream
+ * @returns
+ */
+ decode(bytes, options = {}) {
+ const { stream = false } = options;
+
+ if (stream) {
+ throw new Error(`TextDecoder does not implement the 'stream' option.`);
+ }
+
+ /** @type {Uint8Array} */
+ let input;
+
+ if (bytes instanceof ArrayBuffer) {
+ input = new Uint8Array(bytes);
+ } else if (bytes instanceof Uint8Array) {
+ input = bytes;
+ } else if (bytes instanceof Object.getPrototypeOf(Uint8Array)) {
+ let { buffer, byteLength, byteOffset } = /** @type {Uint32Array} */ (bytes);
+ input = new Uint8Array(buffer, byteOffset, byteLength);
+ } else if (
+ typeof bytes === "object" &&
+ bytes !== null &&
+ "buffer" in bytes &&
+ bytes.buffer instanceof ArrayBuffer
+ ) {
+ let { buffer, byteLength, byteOffset } = bytes;
+ input = new Uint8Array(
+ buffer,
+ byteOffset,
+ byteLength
+ );
+ } else if (bytes === undefined) {
+ input = new Uint8Array(0);
+ } else {
+ throw new Error(`Provided input cannot be converted to ArrayBufferView or ArrayBuffer`);
+ }
+
+ if (this.ignoreBOM && input.length > 2 && input[0] === 0xEF && input[1] === 0xBB && input[2] ===
0xBF) {
+ if (this.encoding !== 'utf-8') {
+ throw new Error(`Cannot ignore BOM for non-UTF8 encoding.`);
+ }
+
+ let { buffer, byteLength, byteOffset } = input;
+ input = new Uint8Array(buffer, byteOffset + 3, byteLength - 3);
+ }
+
+ return Encoding.decode(input, this._internalEncoding);
+ }
+}
+
+var TextEncoder = class TextEncoder {
+ get [Symbol.toStringTag]() {
+ return 'TextEncoder';
+ }
+
+ get encoding() {
+ return 'utf-8';
+ }
+
+ encode(input = '') {
+ // The TextEncoder specification only allows for UTF-8 encoding.
+ return Encoding.encode(`${input}`, 'UTF-8');
+ }
+
+ encodeInto(input = '', output = new Uint8Array()) {
+ // The TextEncoder specification only allows for UTF-8 encoding.
+ return Encoding.encodeInto(`${input}`, output);
+ }
+}
\ No newline at end of file
diff --git a/modules/script/_bootstrap/default.js b/modules/script/_bootstrap/default.js
index 952d7fe3..fe354a02 100644
--- a/modules/script/_bootstrap/default.js
+++ b/modules/script/_bootstrap/default.js
@@ -6,6 +6,7 @@
'use strict';
const {print, printerr, log, logError} = imports._print;
+ const {TextEncoder, TextDecoder} = imports._text;
Object.defineProperties(exports, {
ARGV: {
@@ -16,6 +17,18 @@
return imports.system.programArgs;
},
},
+ TextEncoder: {
+ configurable: false,
+ enumerable: true,
+ writable: false,
+ value: TextEncoder,
+ },
+ TextDecoder: {
+ configurable: false,
+ enumerable: true,
+ writable: false,
+ value: TextDecoder,
+ },
print: {
configurable: false,
enumerable: true,
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]