Re: GJS : How can I use Regex ( parse HTML ) ?



Hi, you can use the global RegExp from JavaScript standard library like
in browser or Node, e.g. /(foobar)/g.exec(html). Alternatively, import
some dedicated HTML parsing module from the npm registry as a dependency
to your project using the https://github.com/cgjs/cgjs CommonJS shim.

If you're parsing guaranteed valid XML subset, such as programmatically
generated XHTML, with all tags properly closed, then it might be of use
for you to copy paste this ad hoc tokenizer into your program. Since
GLib XML tools aren't introspection friendly, I had been reading the
recent files list with it in a JS app, back when Mozilla hadn't yet
broke musl+clang builds and quietly banned my email provider from the
bug tracker, leading to my cease of SpiderMonkey and GJS usage in favor
of V8 and Node, though I'm also considering Duktape and something like
Lowjs. The 0BSD license is equivalent to public domain.

```js
#!/usr/bin/env gjs
/** @license 0BSD 2019 makepost <makepost firemail cc> */
var Xbel = class {
  /** @param {string} $x @param {any[]} values */
  static Tag($x, values) {
    const x = ANY({ name: $x.replace(/^<|\s[\s\S]*>$|>$/g, ""), values });
    for (const y of $x.split(/\s*(\S*="[^"]*")/)) {
      if (y && !/^<|^\s*\/?>$/.test(y)) {
        const [, key, value] = /(.+?)=(.+)/.exec(y) || ["", "", "null"];
        x[key] = JSON.parse(value);
      }
    }
    return x;
  }

  /** @param {string} $ */
  constructor($) {
    const literals = $.split(/(<[^\>]+>)\s*/).filter(Boolean);
    this.tags = literals.reduce((xs, x) => {
      for (var i = xs.length - 1; i >= 0; i--) {
        const y = xs[i];
        if (typeof y === "string" && y.startsWith(`<${x.slice(2, -1)}`)) {
          break;
        }
      }
      return /^<\//.test(x)
        ? xs.slice(0, i).concat(Xbel.Tag(xs[i], xs.slice(++i)))
        : xs.concat(/\/>$/.test(x) ? Xbel.Tag(x, []) : x);
    }, ANY([]));
  }
};
/** @type {any} */ (window).exports = {};
exports.Xbel = Xbel;

/** @param {any} [x] */ const ANY = x => x;

if (
  imports.gi.GLib.basename(imports.system.programInvocationName) === "xbel.js"
) {
  print(
    JSON.stringify(
      new Xbel(
        `<!doctype html><html><head><title>Recent</title></head><body><h1>It works!</h1></body></html>`
      ).tags
    )
  );
  print(
    JSON.stringify(
      new Xbel(
        imports.byteArray.toString(
          imports.gi.GLib.file_get_contents(
            ".local/share/recently-used.xbel"
          )[1]
        )
      ).tags
    )
  );
}
```


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]