[vte] parser: Improve test programme
- From: Christian Persch <chpe src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [vte] parser: Improve test programme
- Date: Tue, 27 Mar 2018 17:41:57 +0000 (UTC)
commit cc76ded64133f942d5074fbc22120ab3f2c73f9a
Author: Christian Persch <chpe src gnome org>
Date: Tue Mar 27 19:40:12 2018 +0200
parser: Improve test programme
src/Makefile.am | 25 ++--
src/interpret.cc | 231 -----------------------------
src/iso2022.cc | 2 -
src/iso2022.h | 1 -
src/parser-cat.cc | 414 +++++++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 424 insertions(+), 249 deletions(-)
---
diff --git a/src/Makefile.am b/src/Makefile.am
index aedd030..c82f02f 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -175,7 +175,7 @@ vteresources.cc: vte.gresource.xml Makefile $(shell $(GLIB_COMPILE_RESOURCES) --
# Misc unit tests and utilities
-noinst_PROGRAMS += interpret slowcat test-parser
+noinst_PROGRAMS += parser-cat slowcat test-parser
noinst_SCRIPTS = decset osc window
EXTRA_DIST += $(noinst_SCRIPTS)
@@ -230,7 +230,7 @@ reflect_vte_CFLAGS = $(VTE_CFLAGS) $(AM_CFLAGS)
reflect_vte_SOURCES = reflect.c
reflect_vte_LDADD = libvte-$(VTE_API_VERSION).la $(VTE_LIBS)
-interpret_SOURCES = \
+parser_cat_SOURCES = \
buffer.h \
caps.hh \
debug.cc \
@@ -239,32 +239,27 @@ interpret_SOURCES = \
iso2022.h \
parser.cc \
parser.hh \
+ parser-arg.hh \
parser-charset.hh \
parser-charset-tables.hh \
parser-cmd.hh \
parser-glue.hh \
+ parser-cat.cc \
vteconv.cc \
vteconv.h \
- interpret.cc
-interpret_CPPFLAGS = \
- -DINTERPRET_MAIN \
- -DVTE_API_VERSION=\"$(VTE_API_VERSION)\" \
+ $(NULL)
+parser_cat_CPPFLAGS = \
-I$(builddir) \
-I$(srcdir) \
$(AM_CPPFLAGS)
-interpret_CFLAGS = \
+parser_cat_CFLAGS = \
$(GLIB_CFLAGS) \
- $(GOBJECT_CFLAGS) \
- $(GTK_CFLAGS) \
$(AM_CFLAGS)
-interpret_CXXFLAGS = \
+parser_cat_CXXFLAGS = \
$(GLIB_CFLAGS) \
- $(GOBJECT_CFLAGS) \
- $(GTK_CFLAGS) \
$(AM_CXXFLAGS)
-interpret_LDADD = \
- $(GLIB_LIBS) \
- $(GOBJECT_LIBS)
+parser_cat_LDADD = \
+ $(GLIB_LIBS)
slowcat_SOURCES = \
slowcat.c \
diff --git a/src/iso2022.cc b/src/iso2022.cc
index 44c492f..bad0ee6 100644
--- a/src/iso2022.cc
+++ b/src/iso2022.cc
@@ -39,8 +39,6 @@
#endif
#include <glib/gi18n-lib.h>
-#include <gdk/gdkkeysyms.h>
-
/* An invalid codepoint. */
#define INVALID_CODEPOINT 0xFFFD
diff --git a/src/iso2022.h b/src/iso2022.h
index 4b28edc..0b0f8bd 100644
--- a/src/iso2022.h
+++ b/src/iso2022.h
@@ -23,7 +23,6 @@
#include <glib.h>
-#include <glib-object.h>
#include "buffer.h"
G_BEGIN_DECLS
diff --git a/src/parser-cat.cc b/src/parser-cat.cc
new file mode 100644
index 0000000..ef72900
--- /dev/null
+++ b/src/parser-cat.cc
@@ -0,0 +1,414 @@
+/*
+ * Copyright (C) 2001,2002,2003 Red Hat, Inc.
+ * Copyright © 2017, 2018 Christian Persch
+ *
+ * This programme is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This programme is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include "config.h"
+
+#include <glib.h>
+#include <locale.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include <cassert>
+#include <cstring>
+#include <cerrno>
+#include <cstdio>
+#include <cstdlib>
+
+#include "debug.h"
+#include "iso2022.h"
+#include "parser.hh"
+
+static char const*
+seq_to_str(unsigned int type)
+{
+ switch (type) {
+ case VTE_SEQ_NONE: return "NONE";
+ case VTE_SEQ_IGNORE: return "IGNORE";
+ case VTE_SEQ_GRAPHIC: return "GRAPHIC";
+ case VTE_SEQ_CONTROL: return "CONTROL";
+ case VTE_SEQ_ESCAPE: return "ESCAPE";
+ case VTE_SEQ_CSI: return "CSI";
+ case VTE_SEQ_DCS: return "DCS";
+ case VTE_SEQ_OSC: return "OSC";
+ default:
+ assert(false);
+ }
+}
+
+static char const*
+cmd_to_str(unsigned int command)
+{
+ switch (command) {
+#define _VTE_CMD(cmd) case VTE_CMD_##cmd: return #cmd;
+#include "parser-cmd.hh"
+#undef _VTE_CMD
+ default:
+ return nullptr;
+ }
+}
+
+#if 0
+static char const*
+charset_alias_to_str(unsigned int cs)
+{
+ switch (cs) {
+#define _VTE_CHARSET_PASTE(name)
+#define _VTE_CHARSET(name) _VTE_CHARSET_PASTE(name)
+#define _VTE_CHARSET_ALIAS_PASTE(name1,name2) case VTE_CHARSET_##name1: return #name1 "(" ## #name2 ## ")";
+#define _VTE_CHARSET_ALIAS(name1,name2)
+#include "parser-charset.hh"
+#undef _VTE_CHARSET_PASTE
+#undef _VTE_CHARSET
+#undef _VTE_CHARSET_ALIAS_PASTE
+#undef _VTE_CHARSET_ALIAS
+ default:
+ return nullptr; /* not an alias */
+ }
+}
+
+static char const*
+charset_to_str(unsigned int cs)
+{
+ auto alias = charset_alias_to_str(cs);
+ if (alias)
+ return alias;
+
+ switch (cs) {
+#define _VTE_CHARSET_PASTE(name) case VTE_CHARSET_##name: return #name;
+#define _VTE_CHARSET(name) _VTE_CHARSET_PASTE(name)
+#define _VTE_CHARSET_ALIAS_PASTE(name1,name2)
+#define _VTE_CHARSET_ALIAS(name1,name2)
+#include "parser-charset.hh"
+#undef _VTE_CHARSET_PASTE
+#undef _VTE_CHARSET
+#undef _VTE_CHARSET_ALIAS_PASTE
+#undef _VTE_CHARSET_ALIAS
+ default:
+ static char buf[32];
+ snprintf(buf, sizeof(buf), "UNKOWN(%u)", cs);
+ return buf;
+ }
+}
+#endif
+
+#define SEQ_START "\e[7m"
+#define SEQ_END "\e[27m"
+
+#define SEQ_START_RED "\e[7;31m"
+#define SEQ_END_RED "\e[27;39m"
+
+class printer {
+public:
+ printer(GString* str,
+ bool plain,
+ char const* intro,
+ char const* outro)
+ : m_str(str),
+ m_plain(plain),
+ m_outro(outro) {
+ if (!m_plain)
+ g_string_append(m_str, intro);
+ }
+ ~printer() {
+ if (!m_plain)
+ g_string_append(m_str, m_outro);
+ }
+private:
+ GString* m_str;
+ bool m_plain;
+ char const* m_outro;
+};
+
+static bool
+print_params(GString* str,
+ struct vte_seq const* seq)
+{
+ for (unsigned int i = 0; i < seq->n_args; i++) {
+ auto arg = seq->args[i];
+ if (!vte_seq_arg_default(arg))
+ g_string_append_printf(str, "%d", vte_seq_arg_value(arg));
+ if (i + 1 < seq->n_args)
+ g_string_append_c(str, vte_seq_arg_nonfinal(arg) ? ':' : ';');
+ }
+
+ return seq->n_args > 0;
+}
+
+static bool
+print_intermediates(GString* str,
+ unsigned int intermediates,
+ unsigned int start,
+ unsigned int end)
+{
+ bool any = false;
+ for (unsigned int i = start; i <= end; i++) {
+ unsigned int mask = (1U << (i - 0x20));
+
+ if (intermediates & mask) {
+ g_string_append_c(str, i);
+ any = true;
+ }
+ }
+
+ return any;
+}
+
+static void
+print_seq_and_params(GString* str,
+ const struct vte_seq *seq,
+ bool plain)
+{
+ printer p(str, plain, SEQ_START, SEQ_END);
+
+ if (seq->command != VTE_CMD_NONE) {
+ g_string_append_printf(str, "{%s ", cmd_to_str(seq->command));
+ print_params(str, seq);
+ g_string_append_c(str, '}');
+ } else {
+ g_string_append_printf(str, "{%s ", seq_to_str(seq->type));
+ if ((seq->intermediates & 0xffff0000U) &&
+ print_intermediates(str, seq->intermediates, 0x30, 0x3f))
+ g_string_append_c(str, ' ');
+ if (print_params(str, seq))
+ g_string_append_c(str, ' ');
+ if ((seq->intermediates & 0x0000ffffU) &&
+ print_intermediates(str, seq->intermediates, 0x20, 0x2f))
+ g_string_append_c(str, ' ');
+ g_string_append_printf(str, " %c}", seq->terminator);
+ }
+}
+
+static void
+print_seq(GString* str,
+ struct vte_seq const* seq,
+ bool codepoints,
+ bool plain)
+{
+ switch (seq->type) {
+ case VTE_SEQ_NONE: {
+ printer p(str, plain, SEQ_START_RED, SEQ_END_RED);
+ g_string_append(str, "{NONE}");
+ break;
+ }
+
+ case VTE_SEQ_IGNORE: {
+ printer p(str, plain, SEQ_START_RED, SEQ_END_RED);
+ g_string_append(str, "{IGN}");
+ break;
+ }
+
+ case VTE_SEQ_GRAPHIC: {
+ bool printable = g_unichar_isprint(seq->terminator);
+ if (codepoints || !printable) {
+ if (printable) {
+ char ubuf[7];
+ ubuf[g_unichar_to_utf8(seq->terminator, ubuf)] = 0;
+ g_string_append_printf(str, "[%04X %s]",
+ seq->terminator, ubuf);
+ } else {
+ g_string_append_printf(str, "[%04X]",
+ seq->terminator);
+ }
+ } else {
+ g_string_append_unichar(str, seq->terminator);
+ }
+ break;
+ }
+
+ case VTE_SEQ_CONTROL:
+ case VTE_SEQ_ESCAPE: {
+ printer p(str, plain, SEQ_START, SEQ_END);
+ g_string_append_printf(str, "{%s}", cmd_to_str(seq->command));
+ break;
+ }
+
+ case VTE_SEQ_CSI:
+ case VTE_SEQ_DCS:
+ case VTE_SEQ_OSC: {
+ print_seq_and_params(str, seq, plain);
+ break;
+ }
+
+ default:
+ assert(false);
+ }
+}
+
+static void
+printout(GString* str)
+{
+ g_print("%s\n", str->str);
+ g_string_truncate(str, 0);
+}
+
+static gsize seq_stats[VTE_SEQ_N];
+static gsize cmd_stats[VTE_CMD_N];
+
+static void
+process_file(int fd,
+ char const* charset,
+ bool codepoints,
+ bool plain)
+{
+ struct vte_parser *parser;
+ if (vte_parser_new(&parser) != 0)
+ return;
+
+ auto subst = _vte_iso2022_state_new(charset);
+
+ gsize const buf_size = 16384;
+ guchar* buf = g_new0(guchar, buf_size);
+ auto unichars = g_array_new(FALSE, FALSE, sizeof(gunichar));
+ auto outbuf = g_string_sized_new(buf_size);
+
+ gsize buf_start = 0;
+ for (;;) {
+ auto len = read(fd, buf + buf_start, buf_size - buf_start);
+ if (!len)
+ break;
+ if (len == -1) {
+ if (errno == EAGAIN)
+ continue;
+ break;
+ }
+
+ g_array_set_size(unichars, 0);
+ auto plen = _vte_iso2022_process(subst, buf, len, unichars);
+ if ((gsize)plen != (gsize)len) {
+ /* Save it for next round */
+ memmove(buf, buf + plen, len - plen);
+ buf_start = len - plen;
+ } else
+ buf_start = 0;
+
+ auto wbuf = &g_array_index(unichars, gunichar, 0);
+ gsize wcount = unichars->len;
+
+ struct vte_seq *seq;
+ for (gsize i = 0; i < wcount; i++) {
+ auto ret = vte_parser_feed(parser,
+ &seq,
+ wbuf[i]);
+ if (ret < 0) {
+ g_printerr("Parser error!\n");
+ goto out;
+ }
+
+ seq_stats[ret]++;
+ if (ret != VTE_SEQ_NONE) {
+ cmd_stats[seq->command]++;
+ print_seq(outbuf, seq, codepoints, plain);
+ if (seq->command == VTE_CMD_LF)
+ printout(outbuf);
+ }
+ }
+ }
+
+ out:
+ printout(outbuf);
+ g_string_free(outbuf, TRUE);
+ g_array_free(unichars, TRUE);
+ g_free(buf);
+ vte_parser_free(parser);
+ _vte_iso2022_state_free(subst);
+}
+
+int
+main(int argc,
+ char *argv[])
+{
+ char* charset = nullptr;
+ gboolean codepoints = false;
+ gboolean plain = false;
+ gboolean statistics = false;
+ char** filenames = nullptr;
+ GOptionEntry const entries[] = {
+ { "charset", 'c', 0, G_OPTION_ARG_STRING, &charset,
+ "Charset to use (default: UTF-8)", "CHARSET" },
+ { "codepoints", 'u', 0, G_OPTION_ARG_NONE, &codepoints,
+ "Output unicode code points by number", nullptr },
+ { "plain", 'p', 0, G_OPTION_ARG_NONE, &plain,
+ "Output plain text without attributes", nullptr },
+ { "statistics", 's', 0, G_OPTION_ARG_NONE, &statistics,
+ "Output statistics", nullptr },
+ { G_OPTION_REMAINING, 0, 0, G_OPTION_ARG_FILENAME_ARRAY, &filenames,
+ nullptr, nullptr },
+ { nullptr }
+ };
+
+ setlocale(LC_ALL, "");
+ _vte_debug_init();
+
+ auto context = g_option_context_new("[FILE…] — parser cat");
+ g_option_context_set_help_enabled(context, true);
+ g_option_context_add_main_entries(context, entries, nullptr);
+
+ GError* err = nullptr;
+ bool rv = g_option_context_parse(context, &argc, &argv, &err);
+ g_option_context_free(context);
+
+ if (!rv) {
+ g_printerr("Failed to parse arguments: %s\n", err->message);
+ g_error_free(err);
+ return EXIT_FAILURE;
+ }
+
+ memset(&seq_stats, 0, sizeof(seq_stats));
+ memset(&cmd_stats, 0, sizeof(cmd_stats));
+
+ if (filenames != nullptr) {
+ for (auto i = 0; filenames[i] != nullptr; i++) {
+ char const* filename = filenames[i];
+
+ int fd = -1;
+ if (g_str_equal(filename, "-")) {
+ fd = STDIN_FILENO;
+ } else {
+ fd = open(filename, O_RDONLY);
+ if (fd == -1) {
+ g_printerr("Error opening file %s: %m\n", filename);
+ }
+ }
+ if (fd != -1) {
+ process_file(fd, charset, codepoints, plain);
+ close(fd);
+ }
+ }
+
+ g_strfreev(filenames);
+ } else {
+ process_file(STDIN_FILENO, charset, codepoints, plain);
+ }
+
+ g_free(charset);
+
+ if (statistics) {
+ for (unsigned int s = VTE_SEQ_NONE + 1; s < VTE_SEQ_N; s++) {
+ g_printerr("%-7s: %" G_GSIZE_FORMAT "\n", seq_to_str(s), seq_stats[s]);
+ }
+
+ g_printerr("\n");
+ for (unsigned int s = 0; s < VTE_CMD_N; s++) {
+ if (cmd_stats[s] > 0) {
+ g_printerr("%-12s: %" G_GSIZE_FORMAT "\n", cmd_to_str(s), cmd_stats[s]);
+ }
+ }
+ }
+
+ return 0;
+}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]