[gjs/ewlsh/text-encoding: 2/5] modules: Implement fatal TextEncoder and TextDecoder APIs




commit 480f6d6aa493cd0bf1285246365092429d74b891
Author: Evan Welsh <contact evanwelsh com>
Date:   Sun Jul 4 22:13:25 2021 -0700

    modules: Implement fatal TextEncoder and TextDecoder APIs

 .eslintignore                        |   2 +
 .eslintrc.yml                        |   5 +
 js.gresource.xml                     |   2 +
 modules/core/_encodings.js           | 304 +++++++++++++++++++++++++++++++++++
 modules/core/_text.js                | 150 +++++++++++++++++
 modules/script/_bootstrap/default.js |  13 ++
 6 files changed, 476 insertions(+)
---
diff --git a/.eslintignore b/.eslintignore
index 9ee950d3..8f8f93ff 100644
--- a/.eslintignore
+++ b/.eslintignore
@@ -3,4 +3,6 @@
 
 installed-tests/js/jasmine.js
 installed-tests/js/modules/badOverrides/WarnLib.js
+# Until ESLint merges class fields.
+modules/core/_text.js
 modules/script/jsUnit.js
diff --git a/.eslintrc.yml b/.eslintrc.yml
index 7ddf0e38..dadf40bd 100644
--- a/.eslintrc.yml
+++ b/.eslintrc.yml
@@ -242,6 +242,9 @@ rules:
     - inside
   yield-star-spacing: error
   yoda: error
+settings:
+  jsdoc:
+    mode: typescript
 globals:
   ARGV: readonly
   Debugger: readonly
@@ -254,5 +257,7 @@ globals:
   print: readonly
   printerr: readonly
   window: readonly
+  TextEncoder: readonly
+  TextDecoder: readonly
 parserOptions:
   ecmaVersion: 2020
diff --git a/js.gresource.xml b/js.gresource.xml
index fc55e597..a0b37730 100644
--- a/js.gresource.xml
+++ b/js.gresource.xml
@@ -42,8 +42,10 @@
 
     <file>modules/core/_cairo.js</file>
     <file>modules/core/_common.js</file>
+    <file>modules/core/_encodings.js</file>
     <file>modules/core/_format.js</file>
     <file>modules/core/_gettext.js</file>
     <file>modules/core/_signals.js</file>
+    <file>modules/core/_text.js</file>
   </gresource>
 </gresources>
diff --git a/modules/core/_encodings.js b/modules/core/_encodings.js
new file mode 100644
index 00000000..d729be55
--- /dev/null
+++ b/modules/core/_encodings.js
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: MIT
+// SPDX-FileCopyrightText: Node.js contributors. All rights reserved.
+
+// Modified from 
https://github.com/nodejs/node/blob/78680c1cbc8b0c435963bc512e826b2a6227c315/lib/internal/encoding.js
+// Data derived from https://encoding.spec.whatwg.org/encodings.json
+
+/* exported getEncodingFromLabel */
+
+const encodings = new Map([
+    ['unicode-1-1-utf-8', 'utf-8'],
+    ['unicode11utf8', 'utf-8'],
+    ['unicode20utf8', 'utf-8'],
+    ['utf-8', 'utf-8'],
+    ['utf8', 'utf-8'],
+    ['x-unicode20utf8', 'utf-8'],
+    ['866', 'ibm866'],
+    ['cp866', 'ibm866'],
+    ['csibm866', 'ibm866'],
+    ['ibm866', 'ibm866'],
+    ['csisolatin2', 'iso-8859-2'],
+    ['iso-8859-2', 'iso-8859-2'],
+    ['iso-ir-101', 'iso-8859-2'],
+    ['iso8859-2', 'iso-8859-2'],
+    ['iso88592', 'iso-8859-2'],
+    ['iso_8859-2', 'iso-8859-2'],
+    ['iso_8859-2:1987', 'iso-8859-2'],
+    ['l2', 'iso-8859-2'],
+    ['latin2', 'iso-8859-2'],
+    ['csisolatin3', 'iso-8859-3'],
+    ['iso-8859-3', 'iso-8859-3'],
+    ['iso-ir-109', 'iso-8859-3'],
+    ['iso8859-3', 'iso-8859-3'],
+    ['iso88593', 'iso-8859-3'],
+    ['iso_8859-3', 'iso-8859-3'],
+    ['iso_8859-3:1988', 'iso-8859-3'],
+    ['l3', 'iso-8859-3'],
+    ['latin3', 'iso-8859-3'],
+    ['csisolatin4', 'iso-8859-4'],
+    ['iso-8859-4', 'iso-8859-4'],
+    ['iso-ir-110', 'iso-8859-4'],
+    ['iso8859-4', 'iso-8859-4'],
+    ['iso88594', 'iso-8859-4'],
+    ['iso_8859-4', 'iso-8859-4'],
+    ['iso_8859-4:1988', 'iso-8859-4'],
+    ['l4', 'iso-8859-4'],
+    ['latin4', 'iso-8859-4'],
+    ['csisolatincyrillic', 'iso-8859-5'],
+    ['cyrillic', 'iso-8859-5'],
+    ['iso-8859-5', 'iso-8859-5'],
+    ['iso-ir-144', 'iso-8859-5'],
+    ['iso8859-5', 'iso-8859-5'],
+    ['iso88595', 'iso-8859-5'],
+    ['iso_8859-5', 'iso-8859-5'],
+    ['iso_8859-5:1988', 'iso-8859-5'],
+    ['arabic', 'iso-8859-6'],
+    ['asmo-708', 'iso-8859-6'],
+    ['csiso88596e', 'iso-8859-6'],
+    ['csiso88596i', 'iso-8859-6'],
+    ['csisolatinarabic', 'iso-8859-6'],
+    ['ecma-114', 'iso-8859-6'],
+    ['iso-8859-6', 'iso-8859-6'],
+    ['iso-8859-6-e', 'iso-8859-6'],
+    ['iso-8859-6-i', 'iso-8859-6'],
+    ['iso-ir-127', 'iso-8859-6'],
+    ['iso8859-6', 'iso-8859-6'],
+    ['iso88596', 'iso-8859-6'],
+    ['iso_8859-6', 'iso-8859-6'],
+    ['iso_8859-6:1987', 'iso-8859-6'],
+    ['csisolatingreek', 'iso-8859-7'],
+    ['ecma-118', 'iso-8859-7'],
+    ['elot_928', 'iso-8859-7'],
+    ['greek', 'iso-8859-7'],
+    ['greek8', 'iso-8859-7'],
+    ['iso-8859-7', 'iso-8859-7'],
+    ['iso-ir-126', 'iso-8859-7'],
+    ['iso8859-7', 'iso-8859-7'],
+    ['iso88597', 'iso-8859-7'],
+    ['iso_8859-7', 'iso-8859-7'],
+    ['iso_8859-7:1987', 'iso-8859-7'],
+    ['sun_eu_greek', 'iso-8859-7'],
+    ['csiso88598e', 'iso-8859-8'],
+    ['csisolatinhebrew', 'iso-8859-8'],
+    ['hebrew', 'iso-8859-8'],
+    ['iso-8859-8', 'iso-8859-8'],
+    ['iso-8859-8-e', 'iso-8859-8'],
+    ['iso-ir-138', 'iso-8859-8'],
+    ['iso8859-8', 'iso-8859-8'],
+    ['iso88598', 'iso-8859-8'],
+    ['iso_8859-8', 'iso-8859-8'],
+    ['iso_8859-8:1988', 'iso-8859-8'],
+    ['visual', 'iso-8859-8'],
+    ['csiso88598i', 'iso-8859-8-i'],
+    ['iso-8859-8-i', 'iso-8859-8-i'],
+    ['logical', 'iso-8859-8-i'],
+    ['csisolatin6', 'iso-8859-10'],
+    ['iso-8859-10', 'iso-8859-10'],
+    ['iso-ir-157', 'iso-8859-10'],
+    ['iso8859-10', 'iso-8859-10'],
+    ['iso885910', 'iso-8859-10'],
+    ['l6', 'iso-8859-10'],
+    ['latin6', 'iso-8859-10'],
+    ['iso-8859-13', 'iso-8859-13'],
+    ['iso8859-13', 'iso-8859-13'],
+    ['iso885913', 'iso-8859-13'],
+    ['iso-8859-14', 'iso-8859-14'],
+    ['iso8859-14', 'iso-8859-14'],
+    ['iso885914', 'iso-8859-14'],
+    ['csisolatin9', 'iso-8859-15'],
+    ['iso-8859-15', 'iso-8859-15'],
+    ['iso8859-15', 'iso-8859-15'],
+    ['iso885915', 'iso-8859-15'],
+    ['iso_8859-15', 'iso-8859-15'],
+    ['l9', 'iso-8859-15'],
+    ['iso-8859-16', 'iso-8859-16'],
+    ['cskoi8r', 'koi8-r'],
+    ['koi', 'koi8-r'],
+    ['koi8', 'koi8-r'],
+    ['koi8-r', 'koi8-r'],
+    ['koi8_r', 'koi8-r'],
+    ['koi8-ru', 'koi8-u'],
+    ['koi8-u', 'koi8-u'],
+    ['csmacintosh', 'macintosh'],
+    ['mac', 'macintosh'],
+    ['macintosh', 'macintosh'],
+    ['x-mac-roman', 'macintosh'],
+    ['dos-874', 'windows-874'],
+    ['iso-8859-11', 'windows-874'],
+    ['iso8859-11', 'windows-874'],
+    ['iso885911', 'windows-874'],
+    ['tis-620', 'windows-874'],
+    ['windows-874', 'windows-874'],
+    ['cp1250', 'windows-1250'],
+    ['windows-1250', 'windows-1250'],
+    ['x-cp1250', 'windows-1250'],
+    ['cp1251', 'windows-1251'],
+    ['windows-1251', 'windows-1251'],
+    ['x-cp1251', 'windows-1251'],
+    ['ansi_x3.4-1968', 'windows-1252'],
+    ['ascii', 'windows-1252'],
+    ['cp1252', 'windows-1252'],
+    ['cp819', 'windows-1252'],
+    ['csisolatin1', 'windows-1252'],
+    ['ibm819', 'windows-1252'],
+    ['iso-8859-1', 'windows-1252'],
+    ['iso-ir-100', 'windows-1252'],
+    ['iso8859-1', 'windows-1252'],
+    ['iso88591', 'windows-1252'],
+    ['iso_8859-1', 'windows-1252'],
+    ['iso_8859-1:1987', 'windows-1252'],
+    ['l1', 'windows-1252'],
+    ['latin1', 'windows-1252'],
+    ['us-ascii', 'windows-1252'],
+    ['windows-1252', 'windows-1252'],
+    ['x-cp1252', 'windows-1252'],
+    ['cp1253', 'windows-1253'],
+    ['windows-1253', 'windows-1253'],
+    ['x-cp1253', 'windows-1253'],
+    ['cp1254', 'windows-1254'],
+    ['csisolatin5', 'windows-1254'],
+    ['iso-8859-9', 'windows-1254'],
+    ['iso-ir-148', 'windows-1254'],
+    ['iso8859-9', 'windows-1254'],
+    ['iso88599', 'windows-1254'],
+    ['iso_8859-9', 'windows-1254'],
+    ['iso_8859-9:1989', 'windows-1254'],
+    ['l5', 'windows-1254'],
+    ['latin5', 'windows-1254'],
+    ['windows-1254', 'windows-1254'],
+    ['x-cp1254', 'windows-1254'],
+    ['cp1255', 'windows-1255'],
+    ['windows-1255', 'windows-1255'],
+    ['x-cp1255', 'windows-1255'],
+    ['cp1256', 'windows-1256'],
+    ['windows-1256', 'windows-1256'],
+    ['x-cp1256', 'windows-1256'],
+    ['cp1257', 'windows-1257'],
+    ['windows-1257', 'windows-1257'],
+    ['x-cp1257', 'windows-1257'],
+    ['cp1258', 'windows-1258'],
+    ['windows-1258', 'windows-1258'],
+    ['x-cp1258', 'windows-1258'],
+    ['x-mac-cyrillic', 'x-mac-cyrillic'],
+    ['x-mac-ukrainian', 'x-mac-cyrillic'],
+    ['chinese', 'gbk'],
+    ['csgb2312', 'gbk'],
+    ['csiso58gb231280', 'gbk'],
+    ['gb2312', 'gbk'],
+    ['gb_2312', 'gbk'],
+    ['gb_2312-80', 'gbk'],
+    ['gbk', 'gbk'],
+    ['iso-ir-58', 'gbk'],
+    ['x-gbk', 'gbk'],
+    ['gb18030', 'gb18030'],
+    ['big5', 'big5'],
+    ['big5-hkscs', 'big5'],
+    ['cn-big5', 'big5'],
+    ['csbig5', 'big5'],
+    ['x-x-big5', 'big5'],
+    ['cseucpkdfmtjapanese', 'euc-jp'],
+    ['euc-jp', 'euc-jp'],
+    ['x-euc-jp', 'euc-jp'],
+    ['csiso2022jp', 'iso-2022-jp'],
+    ['iso-2022-jp', 'iso-2022-jp'],
+    ['csshiftjis', 'shift_jis'],
+    ['ms932', 'shift_jis'],
+    ['ms_kanji', 'shift_jis'],
+    ['shift-jis', 'shift_jis'],
+    ['shift_jis', 'shift_jis'],
+    ['sjis', 'shift_jis'],
+    ['windows-31j', 'shift_jis'],
+    ['x-sjis', 'shift_jis'],
+    ['cseuckr', 'euc-kr'],
+    ['csksc56011987', 'euc-kr'],
+    ['euc-kr', 'euc-kr'],
+    ['iso-ir-149', 'euc-kr'],
+    ['korean', 'euc-kr'],
+    ['ks_c_5601-1987', 'euc-kr'],
+    ['ks_c_5601-1989', 'euc-kr'],
+    ['ksc5601', 'euc-kr'],
+    ['ksc_5601', 'euc-kr'],
+    ['windows-949', 'euc-kr'],
+    ['csiso2022kr', 'replacement'],
+    ['hz-gb-2312', 'replacement'],
+    ['iso-2022-cn', 'replacement'],
+    ['iso-2022-cn-ext', 'replacement'],
+    ['iso-2022-kr', 'replacement'],
+    ['replacement', 'replacement'],
+    ['unicodefffe', 'utf-16be'],
+    ['utf-16be', 'utf-16be'],
+    ['csunicode', 'utf-16le'],
+    ['iso-10646-ucs-2', 'utf-16le'],
+    ['ucs-2', 'utf-16le'],
+    ['unicode', 'utf-16le'],
+    ['unicodefeff', 'utf-16le'],
+    ['utf-16', 'utf-16le'],
+    ['utf-16le', 'utf-16le'],
+    ['x-user-defined', 'x-user-defined'],
+]);
+
+
+// Some of the web-specified encodings use
+// aliases which aren't supported in iconv
+const internalEncodings = new Map([
+    // For our purposes we can encode 8-i as 8
+    ['iso-8859-8-i', 'iso-8859-8'],
+]);
+
+/**
+ * Trims ASCII whitespace from a string.
+ * `String.prototype.trim` removes non-ASCII whitespace.
+ *
+ * @param {string} label the label to trim
+ * @returns {string}
+ */
+const trimAsciiWhitespace = label => {
+    let s = 0;
+    let e = label.length;
+    while (s < e && (
+        label[s] === '\u0009' ||
+        label[s] === '\u000a' ||
+        label[s] === '\u000c' ||
+        label[s] === '\u000d' ||
+        label[s] === '\u0020'))
+        s++;
+
+    while (e > s && (
+        label[e - 1] === '\u0009' ||
+        label[e - 1] === '\u000a' ||
+        label[e - 1] === '\u000c' ||
+        label[e - 1] === '\u000d' ||
+        label[e - 1] === '\u0020'))
+        e--;
+
+    return label.slice(s, e);
+};
+
+/**
+ * @typedef Encoding
+ * @property {string} internalLabel
+ * @property {string} label
+ */
+
+/**
+ * @param {string} label the encoding label
+ * @returns {Encoding | null}
+ */
+function getEncodingFromLabel(label) {
+    let encoding = encodings.get(label);
+
+    if (encoding === undefined) {
+        const trimmedLabel = trimAsciiWhitespace(label.toLowerCase());
+        encoding = encodings.get(trimmedLabel);
+    }
+
+    if (!encoding)
+        return null;
+
+    let internalEncoding = internalEncodings.get(encoding);
+
+    return {
+        label: encoding,
+        internalLabel: internalEncoding ?? encoding,
+    };
+}
diff --git a/modules/core/_text.js b/modules/core/_text.js
new file mode 100644
index 00000000..a54d4342
--- /dev/null
+++ b/modules/core/_text.js
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: MIT OR LGPL-2.0-or-later
+// SPDX-FileCopyrightText: Evan Welsh
+
+const Encoding = imports._encodingNative;
+
+const { getEncodingFromLabel } = imports._encodings;
+
+var TextDecoder = class TextDecoder {
+    /**  
+     * @type {string}
+     */
+    encoding;
+
+    /**  
+     * @type {boolean}
+     */
+    ignoreBOM;
+
+    /**  
+     * @type {boolean}
+     */
+    fatal;
+
+    get [Symbol.toStringTag]() {
+        return 'TextDecoder';
+    }
+
+    /**
+     * @param {string} encoding 
+     * @param {object} [options]
+     * @param {boolean=} options.fatal
+     * @param {boolean=} options.ignoreBOM 
+     */
+    constructor(encoding = 'utf-8', options = {}) {
+        const { fatal = false, ignoreBOM = false } = options;
+
+        const encodingDefinition = getEncodingFromLabel(`${encoding}`);
+
+        if (!encodingDefinition) {
+            throw new RangeError(`Invalid encoding label: '${encoding}'`);
+        }
+
+        if (encodingDefinition.label === 'replacement') {
+            throw new RangeError(`Unsupported replacement encoding: '${encoding}'`);
+        }
+
+        Object.defineProperty(this, '_internalEncoding', {
+            value: encodingDefinition.internalLabel,
+            enumerable: true,
+            writable: false,
+            configurable: false,
+        });
+
+        Object.defineProperty(this, 'encoding', {
+            value: encodingDefinition.label,
+            enumerable: true,
+            writable: false,
+            configurable: false,
+        });
+
+        Object.defineProperty(this, 'ignoreBOM', {
+            value: Boolean(ignoreBOM),
+            enumerable: true,
+            writable: false,
+            configurable: false,
+        });
+
+        Object.defineProperty(this, 'fatal', {
+            value: Boolean(fatal),
+            enumerable: true,
+            writable: false,
+            configurable: false,
+        });
+    }
+
+
+
+    /**
+     * @param {unknown} bytes 
+     * @param {object} [options]
+     * @param {boolean=} options.stream
+     * @returns 
+     */
+    decode(bytes, options = {}) {
+        const { stream = false } = options;
+
+        if (stream) {
+            throw new Error(`TextDecoder does not implement the 'stream' option.`);
+        }
+
+        /** @type {Uint8Array} */
+        let input;
+
+        if (bytes instanceof ArrayBuffer) {
+            input = new Uint8Array(bytes);
+        } else if (bytes instanceof Uint8Array) {
+            input = bytes;
+        } else if (bytes instanceof Object.getPrototypeOf(Uint8Array)) {
+            let { buffer, byteLength, byteOffset } = /** @type {Uint32Array} */ (bytes);
+            input = new Uint8Array(buffer, byteOffset, byteLength);
+        } else if (
+            typeof bytes === "object" &&
+            bytes !== null &&
+            "buffer" in bytes &&
+            bytes.buffer instanceof ArrayBuffer
+        ) {
+            let { buffer, byteLength, byteOffset } = bytes;
+            input = new Uint8Array(
+                buffer,
+                byteOffset,
+                byteLength
+            );
+        } else if (bytes === undefined) {
+            input = new Uint8Array(0);
+        } else {
+            throw new Error(`Provided input cannot be converted to ArrayBufferView or ArrayBuffer`);
+        }
+
+        if (this.ignoreBOM && input.length > 2 && input[0] === 0xEF && input[1] === 0xBB && input[2] === 
0xBF) {
+            if (this.encoding !== 'utf-8') {
+                throw new Error(`Cannot ignore BOM for non-UTF8 encoding.`);
+            }
+
+            let { buffer, byteLength, byteOffset } = input;
+            input = new Uint8Array(buffer, byteOffset + 3, byteLength - 3);
+        }
+
+        return Encoding.decode(input, this._internalEncoding);
+    }
+}
+
+var TextEncoder = class TextEncoder {
+    get [Symbol.toStringTag]() {
+        return 'TextEncoder';
+    }
+
+    get encoding() {
+        return 'utf-8';
+    }
+
+    encode(input = '') {
+        // The TextEncoder specification only allows for UTF-8 encoding.
+        return Encoding.encode(`${input}`, 'UTF-8');
+    }
+
+    encodeInto(input = '', output = new Uint8Array()) {
+        // The TextEncoder specification only allows for UTF-8 encoding.
+        return Encoding.encodeInto(`${input}`, output);
+    }
+}
\ No newline at end of file
diff --git a/modules/script/_bootstrap/default.js b/modules/script/_bootstrap/default.js
index 952d7fe3..fe354a02 100644
--- a/modules/script/_bootstrap/default.js
+++ b/modules/script/_bootstrap/default.js
@@ -6,6 +6,7 @@
     'use strict';
 
     const {print, printerr, log, logError} = imports._print;
+    const {TextEncoder, TextDecoder} = imports._text;
 
     Object.defineProperties(exports, {
         ARGV: {
@@ -16,6 +17,18 @@
                 return imports.system.programArgs;
             },
         },
+        TextEncoder: {
+            configurable: false,
+            enumerable: true,
+            writable: false,
+            value: TextEncoder,
+        },
+        TextDecoder: {
+            configurable: false,
+            enumerable: true,
+            writable: false,
+            value: TextDecoder,
+        },
         print: {
             configurable: false,
             enumerable: true,


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]