[vte] lib: Add infrastructure for data syntax switching

From: Christian Persch <chpe src gnome org>
To: commits-list gnome org
Cc:
Subject: [vte] lib: Add infrastructure for data syntax switching
Date: Sun, 18 Oct 2020 22:17:08 +0000 (UTC)
commit 299e2d1d8138b3de264e34d44b44135918fd017a
Author: Christian Persch <chpe src gnome org>
Date:   Mon Oct 19 00:16:36 2020 +0200

    lib: Add infrastructure for data syntax switching
    
    Add a way for a sequence handler to signal to the processing loop
    that it has switched data syntax, and thus the processing loop must return
    so the handler for the new data syntax can take over.

 src/meson.build    |   1 +
 src/parser-glue.hh |  43 +++++++++++++++++-
 src/parser-seq.py  |  71 +++++++++++++++++++++++++-----
 src/parser-test.cc |  10 ++---
 src/parser.cc      |  86 +++++++++++++++++++++++++++++++-----
 src/parser.hh      |   8 ++++
 src/vte.cc         | 125 ++++++++++++++++++++++++++++++++++++++---------------
 src/vtedefines.hh  |   2 +
 src/vteinternal.hh |  42 ++++++++++++++----
 src/vteseq.cc      |   2 +-
 10 files changed, 319 insertions(+), 71 deletions(-)
---
diff --git a/src/meson.build b/src/meson.build
index 68bfa213..f91582df 100644
--- a/src/meson.build
+++ b/src/meson.build
@@ -316,6 +316,7 @@ decoder_cat = executable(
 
 parser_cat_sources = glib_glue_sources + libc_glue_sources + parser_sources + utf8_sources + debug_sources + 
files(
   'parser-cat.cc',
+  'vtedefines.hh',
 )
 
 parser_cat = executable(
diff --git a/src/parser-glue.hh b/src/parser-glue.hh
index 8813b710..e81d3378 100644
--- a/src/parser-glue.hh
+++ b/src/parser-glue.hh
@@ -58,6 +58,16 @@ public:
                 vte_parser_reset(&m_parser);
         }
 
+        inline void set_dispatch_unripe(bool enable) noexcept
+        {
+                vte_parser_set_dispatch_unripe(&m_parser, enable);
+        }
+
+        inline void ignore_until_st() noexcept
+        {
+                vte_parser_ignore_until_st(&m_parser);
+        }
+
 protected:
         vte_parser_t m_parser;
 }; // class Parser
@@ -165,13 +175,44 @@ public:
          *
          * Whether the sequence was introduced with a C0 or C1 control.
          *
-         * Returns: the introducing character
+         * Returns: true iff the introducer was a C1 control
          */
         inline constexpr bool is_c1() const noexcept
         {
                 return (introducer() & 0x80) != 0;
         }
 
+        /* is_st_c1:
+         *
+         * Whether the control string was terminated with a C0 or C1 control.
+         *
+         * Returns: true iff the terminator was the C1 ST
+         */
+        inline constexpr bool is_st_c1() const noexcept
+        {
+                return (st() & 0x80) != 0;
+        }
+
+        /* is_ripe:
+         *
+         * Whether the control string is complete.
+         * This returns true when the final character has been received,
+         * and false when the string terminator has been received.
+         * This is only meaningful for DCS sequences, which are dispatched
+         * twice.
+         *
+         * Returns: true iff the DCS sequence is complete
+         */
+        inline constexpr bool is_ripe() const noexcept
+        {
+                return st() != 0;
+        }
+
+        inline constexpr bool is_unripe() const noexcept
+        {
+                return !is_ripe();
+        }
+
         /* intermediates:
          *
          * This is the pintro and intermediate characters in the sequence, if any.
diff --git a/src/parser-seq.py b/src/parser-seq.py
index ce355814..7b2c6ebe 100755
--- a/src/parser-seq.py
+++ b/src/parser-seq.py
@@ -73,15 +73,18 @@ class Direction(enum.Flag):
     BIDI = HTT | TTH
 
 class Flags(enum.Flag):
-    NOP_TTH = enum.auto()
-    NOP_HTT = enum.auto()
-    NOP = NOP_TTH | NOP_HTT
+    NOP_TTH = enum.auto()    # NOP terminal to host
+    NOP_HTT = enum.auto()    # NOP host to terminal
+    NOP = NOP_TTH | NOP_HTT  # NOP both directions
+    UNRIPE = enum.auto()     # dispatch when unripe
+    HANDLER_RV = enum.auto() # handler has return value
 
 class Source(enum.Enum):
     DEC    = enum.auto(),
     ECMA16 = enum.auto() # eq ISO 1745
     ECMA35 = enum.auto() # eq ISO 2022
     ECMA48 = enum.auto() # eq ISO 6429
+    ITERM2 = enum.auto()
     RLOGIN = enum.auto()
     SCO    = enum.auto()
     VTE    = enum.auto()
@@ -95,6 +98,8 @@ class Source(enum.Enum):
             return cls.DEC
         elif name.endswith('_ECMA'):
             return cls.ECMA48
+        elif name.startswith('ITERM'):
+            return cls.ITERM2
         elif name.startswith('RLOGIN'):
             return cls.RLOGIN
         elif name.startswith('SCO'):
@@ -240,13 +245,13 @@ sequences = [
     # Commands that are handled specially by the parser than the other sequences
     seq_COMMAND('ACS', source=Source.ECMA35,
                 comment='announce code structure'),
-    seq_COMMAND('CnD', source=Source.ECMA35,
+    seq_COMMAND('CnD', flags=Flags.NOP, source=Source.ECMA35,
                 comment='Cn designate'),
-    seq_COMMAND('DOCS', source=Source.ECMA35,
+    seq_COMMAND('DOCS', flags=Flags.NOP, source=Source.ECMA35,
                 comment='designate other coding system'),
     seq_COMMAND('GnDm', source=Source.ECMA35,
                 comment='Gn designate 9m charset'),
-    seq_COMMAND('GnDMm', source=Source.ECMA35,
+    seq_COMMAND('GnDMm', flags=Flags.NOP, source=Source.ECMA35,
                 comment='Gn designate multibyte 9m charset'),
     seq_COMMAND('IRR', flags=Flags.NOP, source=Source.ECMA35,
                 comment='identify revised registration'),
@@ -860,7 +865,7 @@ sequences = [
             comment='restore terminal state'),
     seq_DCS('XTERM_STCAP', 'p', intermediates=(Intermediate.PLUS,), flags=Flags.NOP,
             comment='xterm set termcap/terminfo'),
-    seq_DCS('DECSIXEL', 'q', flags=Flags.NOP,
+    seq_DCS('DECSIXEL', 'q',
             comment='SIXEL graphics'),
     seq_DCS('DECRQSS', 'q', intermediates=(Intermediate.CASH,),
             comment='request selection or setting'),
@@ -999,7 +1004,6 @@ def get_seqs(predicate):
 
     return seqs
 
-
 ''' Write copyright header '''
 def write_header(outfile):
     outfile.write('''
@@ -1050,6 +1054,11 @@ def write_seqs(output, stype):
         else:
             return f'0x{c:02x}'
 
+    def flags_to_dispatch_flags(flags):
+        if flags is not None and flags & Flags.UNRIPE:
+            return "VTE_DISPATCH_UNRIPE"
+        return "0"
+
     seqs = get_seqs(lambda seq: seq.stype == stype)
     for seq in seqs:
         stype, name, final, pintro, intermediates, flags, comment = seqs[seq]
@@ -1071,9 +1080,15 @@ def write_seqs(output, stype):
         else:
             macro = '_VTE_SEQ'
 
-        outfile.write(f'{macro}({name}, {stype.name}, {final_char(final)}, '
+        outfile.write(f'{macro}('
+                      f'{name}, '
+                      f'{stype.name}, '
+                      f'{final_char(final)}, '
                       f'{name_or_none(pintro0)}, '
-                      f'{len(intermediates)}, {name_or_none(intermediate0)}) '
+                      f'{len(intermediates)}, '
+                      f'{name_or_none(intermediate0)}, '
+                      f'{flags_to_dispatch_flags(flags)} '
+                      f') '
                       f'/* {comment} */\n')
 
 
@@ -1112,6 +1127,41 @@ _VTE_CMD(GRAPHIC) /* graphics character */
                 outfile.write(f'_VTE_NOP({name})\n')
 
 
+''' Write command handlers '''
+def write_hdlr(output):
+
+    def cmd_handler_macro(flags):
+        if flags is None:
+            return '_VTE_CMD_HANDLER'
+        elif flags & Flags.NOP:
+            return '_VTE_CMD_HANDLER_NOP'
+        elif flags & Flags.HANDLER_RV:
+            return '_VTE_CMD_HANDLER_R'
+        else:
+            return '_VTE_CMD_HANDLER'
+
+    outfile = open(output.as_posix(), 'w')
+    write_header(outfile)
+    outfile.write('''
+#if !defined(_VTE_CMD_HANDLER) || !defined(_VTE_CMD_HANDLER_R) || !defined(_VTE_CMD_HANDLER_NOP)
+#error "Must define _VTE_CMD_HANDLER, _VTE_CMD_HANDLER_R and _VTE_CMD_HANDLER_NOP before including this file"
+#endif
+''')
+
+    outfile.write('''
+_VTE_CMD_HANDLER_NOP(NONE) /* placeholder */
+_VTE_CMD_HANDLER(GRAPHIC) /* graphics character */
+''')
+
+    cmds = get_commands(lambda seq: True)
+    for name in sorted(cmds):
+        flags, comment = cmds[name]
+        if comment is not None:
+            outfile.write(f'{cmd_handler_macro(flags)}({name}) /* {comment} */\n')
+        else:
+            outfile.write(f'{cmd_handler_macro(flags)}({name})\n')
+
+
 # main
 
 ''' main '''
@@ -1136,3 +1186,4 @@ if __name__ == '__main__':
     write_seqs(args.destdir / "parser-sci.hh", Type.SCI)
 
     write_cmds(args.destdir / "parser-cmd.hh")
+    write_hdlr(args.destdir / "parser-cmd-handlers.hh")
diff --git a/src/parser-test.cc b/src/parser-test.cc
index f7b32993..2863da0c 100644
--- a/src/parser-test.cc
+++ b/src/parser-test.cc
@@ -291,7 +291,7 @@ test_seq_control(void)
                 uint32_t c;
                 unsigned int cmd;
         } const controls [] = {
-#define _VTE_SEQ(cmd,type,f,pi,ni,i0) { f, VTE_CMD_##cmd },
+#define _VTE_SEQ(cmd,type,f,pi,ni,i0,flags) { f, VTE_CMD_##cmd },
 #include "parser-c01.hh"
 #undef _VTE_SEQ
         };
@@ -644,7 +644,7 @@ test_seq_esc_known(void)
 {
         parser.reset();
 
-#define _VTE_SEQ(cmd,type,f,p,ni,i) \
+#define _VTE_SEQ(cmd,type,f,p,ni,i,flags) \
         test_seq_esc_known(f, VTE_SEQ_INTERMEDIATE_CHAR_##i, VTE_CMD_##cmd);
 #include "parser-esc.hh"
 #undef _VTE_SEQ
@@ -782,7 +782,7 @@ test_seq_sci_known(void)
 {
         parser.reset();
 
-#define _VTE_SEQ(cmd,type,f,p,ni,i) \
+#define _VTE_SEQ(cmd,type,f,p,ni,i,flags) \
         test_seq_sci_known(f, VTE_CMD_##cmd);
 #include "parser-sci.hh"
 #undef _VTE_SEQ
@@ -810,7 +810,7 @@ test_seq_csi_known(void)
 {
         parser.reset();
 
-#define _VTE_SEQ(cmd,type,f,p,ni,i) \
+#define _VTE_SEQ(cmd,type,f,p,ni,i,flags) \
         test_seq_csi_known(f, VTE_SEQ_PARAMETER_CHAR_##p, VTE_SEQ_INTERMEDIATE_CHAR_##i, VTE_CMD_##cmd);
 #include "parser-csi.hh"
 #undef _VTE_SEQ
@@ -951,7 +951,7 @@ test_seq_dcs_known(void)
 {
         parser.reset();
 
-#define _VTE_SEQ(cmd,type,f,p,ni,i) \
+#define _VTE_SEQ(cmd,type,f,p,ni,i,flags) \
         test_seq_dcs_known(f, VTE_SEQ_PARAMETER_CHAR_##p, VTE_SEQ_INTERMEDIATE_CHAR_##i, VTE_CMD_##cmd);
 #include "parser-dcs.hh"
 #undef _VTE_SEQ
diff --git a/src/parser.cc b/src/parser.cc
index b9e1d20a..c81144b6 100644
--- a/src/parser.cc
+++ b/src/parser.cc
@@ -12,7 +12,7 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License
+ * You should have received a copy of the GNU Lesser General Public License
  * along with this program.  If not, see <https://www.gnu.org/licenses/>.
  */
 
@@ -163,7 +163,7 @@ static unsigned int
 vte_parse_host_control(vte_seq_t const* seq)
 {
         switch (seq->terminator) {
-#define _VTE_SEQ(cmd,type,f,pi,ni,i0) case f: return VTE_CMD_##cmd;
+#define _VTE_SEQ(cmd,type,f,pi,ni,i0,flags) case f: return VTE_CMD_##cmd;
 #include "parser-c01.hh"
 #undef _VTE_SEQ
         default: return VTE_CMD_NONE;
@@ -346,7 +346,7 @@ vte_parse_host_escape(vte_seq_t const* seq,
         case VTE_SEQ_INTERMEDIATE_NONE:
         case VTE_SEQ_INTERMEDIATE_HASH: {  /* Single control functions */
                 switch (_VTE_SEQ_CODE_ESC(seq->terminator, intermediates)) {
-#define _VTE_SEQ(cmd,type,f,p,ni,i) \
+#define _VTE_SEQ(cmd,type,f,p,ni,i,flags) \
                         case _VTE_SEQ_CODE_ESC(f, VTE_SEQ_INTERMEDIATE_##i): return VTE_CMD_##cmd;
 #include "parser-esc.hh"
 #undef _VTE_SEQ
@@ -453,7 +453,7 @@ static unsigned int
 vte_parse_host_csi(vte_seq_t const* seq)
 {
         switch (_VTE_SEQ_CODE(seq->terminator, seq->intermediates)) {
-#define _VTE_SEQ(cmd,type,f,p,ni,i) \
+#define _VTE_SEQ(cmd,type,f,p,ni,i,flags) \
                 case _VTE_SEQ_CODE(f, _VTE_SEQ_CODE_COMBINE(VTE_SEQ_PARAMETER_##p, 
VTE_SEQ_INTERMEDIATE_##i)): return VTE_CMD_##cmd;
 #include "parser-csi.hh"
 #undef _VTE_SEQ
@@ -462,11 +462,12 @@ vte_parse_host_csi(vte_seq_t const* seq)
 }
 
 static unsigned int
-vte_parse_host_dcs(vte_seq_t const* seq)
+vte_parse_host_dcs(vte_seq_t const* seq,
+                   unsigned int* flagsptr)
 {
         switch (_VTE_SEQ_CODE(seq->terminator, seq->intermediates)) {
-#define _VTE_SEQ(cmd,type,f,p,ni,i) \
-                case _VTE_SEQ_CODE(f, _VTE_SEQ_CODE_COMBINE(VTE_SEQ_PARAMETER_##p, 
VTE_SEQ_INTERMEDIATE_##i)): return VTE_CMD_##cmd;
+#define _VTE_SEQ(cmd,type,f,p,ni,i,flags) \
+                case _VTE_SEQ_CODE(f, _VTE_SEQ_CODE_COMBINE(VTE_SEQ_PARAMETER_##p, 
VTE_SEQ_INTERMEDIATE_##i)): *flagsptr = flags; return VTE_CMD_##cmd;
 #include "parser-dcs.hh"
 #undef _VTE_SEQ
         default: return VTE_CMD_NONE;
@@ -477,7 +478,7 @@ static unsigned int
 vte_parse_host_sci(vte_seq_t const* seq)
 {
         switch (_VTE_SEQ_CODE(seq->terminator, 0)) {
-#define _VTE_SEQ(cmd,type,f,p,ni,i) \
+#define _VTE_SEQ(cmd,type,f,p,ni,i,flags) \
                 case _VTE_SEQ_CODE(f, 0): return VTE_CMD_##cmd;
 #include "parser-sci.hh"
 #undef _VTE_SEQ
@@ -882,9 +883,11 @@ parser_dcs_consume(vte_parser_t* parser,
         parser->seq.type = VTE_SEQ_DCS;
         parser->seq.terminator = raw;
         parser->seq.st = 0;
-        parser->seq.command = vte_parse_host_dcs(&parser->seq);
 
-        return VTE_SEQ_NONE;
+        auto flags = unsigned{};
+        parser->seq.command = vte_parse_host_dcs(&parser->seq, &flags);
+
+        return (flags & VTE_DISPATCH_UNRIPE) && parser->dispatch_unripe ? VTE_SEQ_DCS : VTE_SEQ_NONE;
 }
 
 static int
@@ -1446,3 +1449,66 @@ vte_parser_reset(vte_parser_t* parser)
 {
         parser_transition(parser, 0, STATE_GROUND, ACTION_IGNORE);
 }
+
+/*
+ * vte_parser_set_dispatch_unripe:
+ * @parser: a #vte_parser_t
+ * @enable:
+ *
+ * Enables or disables dispatch of unripe DCS sequences.
+ * If enabled, known DCS sequences with the %VTE_DISPATCH_UNRIPE
+ * flag will be dispatched when the Final character is received,
+ * instead of when the control string terminator (ST) is received.
+ * The application handling the unripe DCS sequence may then
+ * either
+ * * do nothing; in this case the DCS sequence will be dispatched
+ *   again when the control string was fully received. Ripe and
+ *   unripe sequences can be distinguished by the value of
+ *   parser.seq.st which will be 0 for an unripe sequence and
+ *   either 0x5c (C0 ST) or 0x9c (C1 ST) for a ripe sequence. Or
+ * * call vte_parser_ignore_until_st(); in this case the DCS
+ *   sequence will be ignored until after the ST (or an other
+ *   character that aborts the control string) has been
+ *   received; or
+ * * switch to a different parser (e.g. DECSIXEL) to parse the
+ *   control string directly on-the-fly. Note that in this case,
+ *   the subparser should take care to handle C0 and C1 controls
+ *   the same way as this parser would.
+ */
+void
+vte_parser_set_dispatch_unripe(vte_parser_t* parser,
+                               bool enable)
+{
+        parser->dispatch_unripe = enable;
+}
+
+/*
+ * vte_parser_ignore_until_st:
+ * @parser: a #vte_parser_t
+ *
+ * When used on an unrip %VTE_SEQ_DCS sequence, makes the
+ * parser ignore everything until the ST is received (or
+ * the DCS is aborted by the usual other means).
+ *
+ * Note that there is some inconsistencies here:
+ *
+ * * SUB aborts the DCS in our parser, but e.g. a DECSIXEL
+ *   parser will handle it as if 3/15 was received.
+ *
+ * * the ST terminating the DCS will be dispatched as an ST
+ *   sequence, instead of producing an IGNORE sequence
+ *   (this is easily fixable but would slightly complicate
+ *   the parser for no actual gain).
+ */
+void
+vte_parser_ignore_until_st(vte_parser_t* parser)
+{
+        switch (parser->state) {
+        case STATE_DCS_PASS:
+                parser_transition_no_action(parser, 0, STATE_DCS_IGNORE);
+                break;
+        default:
+                g_assert_not_reached();
+                break;
+        }
+}
diff --git a/src/parser.hh b/src/parser.hh
index 7fe775cc..1d4a30c7 100644
--- a/src/parser.hh
+++ b/src/parser.hh
@@ -186,6 +186,10 @@ enum {
 #define VTE_CHARSET_GET_CHARSET(c) ((c) & VTE_CHARSET_CHARSET_MASK)
 #define VTE_CHARSET_GET_SLOT(c)    ((c) >> VTE_CHARSET_SLOT_OFFSET)
 
+enum {
+      VTE_DISPATCH_UNRIPE = 1u << 0,
+};
+
 struct vte_seq_t {
         unsigned int type;
         unsigned int command;
@@ -204,6 +208,7 @@ struct vte_seq_t {
 struct vte_parser_t {
         vte_seq_t seq;
         unsigned int state;
+        bool dispatch_unripe;
 };
 
 void vte_parser_init(vte_parser_t* parser);
@@ -211,3 +216,6 @@ void vte_parser_deinit(vte_parser_t* parser);
 int vte_parser_feed(vte_parser_t* parser,
                     uint32_t raw);
 void vte_parser_reset(vte_parser_t* parser);
+void vte_parser_set_dispatch_unripe(vte_parser_t* parser,
+                                    bool enable);
+void vte_parser_ignore_until_st(vte_parser_t* parser);
diff --git a/src/vte.cc b/src/vte.cc
index 981b1bfd..adc28c98 100644
--- a/src/vte.cc
+++ b/src/vte.cc
@@ -2012,26 +2012,35 @@ bool
 Terminal::set_encoding(char const* charset,
                        GError** error)
 {
-#ifdef WITH_ICU
         auto const to_utf8 = bool{charset == nullptr || g_ascii_strcasecmp(charset, "UTF-8") == 0};
 
+#ifdef WITH_ICU
+        /* Note that if the current data syntax is not a primary one, the change
+         * will only be applied when returning to the primrary data syntax.
+         */
+
         if (to_utf8) {
-                if (data_syntax() == DataSyntax::eECMA48_UTF8)
+                if (primary_data_syntax() == DataSyntax::eECMA48_UTF8)
                         return true;
 
                 m_converter.reset();
-                m_data_syntax = DataSyntax::eECMA48_UTF8;
+                m_primary_data_syntax = DataSyntax::eECMA48_UTF8;
         } else {
-                if (data_syntax() == DataSyntax::eECMA48_PCTERM &&
+                if (primary_data_syntax() == DataSyntax::eECMA48_PCTERM &&
                     m_converter->charset() == charset)
                         return true;
 
-                auto converter = vte::base::ICUConverter::make(charset, error);
-                if (!converter)
-                        return false;
+                try {
+                        auto converter = vte::base::ICUConverter::make(charset, error);
+                        if (!converter)
+                               return false;
 
-                m_converter = std::move(converter);
-                m_data_syntax = DataSyntax::eECMA48_PCTERM;
+                        m_converter = std::move(converter);
+                        m_primary_data_syntax = DataSyntax::eECMA48_PCTERM;
+
+                } catch (...) {
+                        return vte::glib::set_error_from_exception(error);
+                }
         }
 
         /* Note: we DON'T convert any pending output from the previous charset to
@@ -2046,14 +2055,19 @@ Terminal::set_encoding(char const* charset,
         reset_decoder();
 
         if (pty())
-                pty()->set_utf8(data_syntax() == DataSyntax::eECMA48_UTF8);
+                pty()->set_utf8(primary_data_syntax() == DataSyntax::eECMA48_UTF8);
 
        _vte_debug_print(VTE_DEBUG_IO,
                          "Set terminal encoding to `%s'.\n",
                          encoding());
 
         return true;
+
 #else
+
+        if (to_utf8)
+                return true;
+
         g_set_error_literal(error, G_CONVERT_ERROR, G_CONVERT_ERROR_NO_CONVERSION,
                             "ICU support not available");
         return false;
@@ -3441,7 +3455,7 @@ Terminal::process_incoming()
         // FIXMEchpe move to context
         m_line_wrapped = false;
 
-        auto bytes_processed = size_t{0};
+        auto bytes_processed = ssize_t{0};
 
         auto context = ProcessingContext{*this};
 
@@ -3454,7 +3468,8 @@ Terminal::process_incoming()
 
                 _VTE_DEBUG_IF(VTE_DEBUG_IO) {
                         _vte_debug_print(VTE_DEBUG_IO,
-                                         "Processing chunk %p starting at offset %u\n",
+                                         "Processing data syntax %d chunk %p starting at offset %u\n",
+                                         (int)current_data_syntax(),
                                          (void*)chunk.get(),
                                          unsigned(chunk->begin_reading() - chunk->data()));
 
@@ -3463,15 +3478,17 @@ Terminal::process_incoming()
                                            chunk->size_reading());
                 }
 
-                switch (data_syntax()) {
+                switch (current_data_syntax()) {
                 case DataSyntax::eECMA48_UTF8:
                         process_incoming_utf8(context, *chunk);
                         break;
+
 #ifdef WITH_ICU
                 case DataSyntax::eECMA48_PCTERM:
                         process_incoming_pcterm(context, *chunk);
                         break;
 #endif
+
                 default:
                         g_assert_not_reached();
                         break;
@@ -3479,6 +3496,10 @@ Terminal::process_incoming()
 
                 bytes_processed += size_t(chunk->begin_reading() - start);
 
+                _vte_debug_print(VTE_DEBUG_IO, "read %d bytes, chunk %s, data syntax now %d\n",
+                                 int(chunk->begin_reading() - start),
+                                 chunk->has_reading()?"has more":"finished",
+                                 (int)current_data_syntax());
                 // If all data from this chunk has been processed, go to the next one
                 if (!chunk->has_reading())
                         m_incoming_queue.pop();
@@ -3558,7 +3579,6 @@ Terminal::process_incoming()
 
 /* Note that this code is mostly copied to process_incoming_pcterm() below; any non-charset-decoding
  * related changes made here need to be made there, too.
- * FIXMEchpe: refactor this to share more code with process_incoming_pcterm().
  */
 void
 Terminal::process_incoming_utf8(ProcessingContext& context,
@@ -3569,9 +3589,9 @@ Terminal::process_incoming_utf8(ProcessingContext& context,
         auto const iend = chunk.end_reading();
         auto ip = chunk.begin_reading();
 
-        for ( ; ip < iend; ++ip) {
+        while (ip < iend) {
 
-                switch (m_utf8_decoder.decode(*ip)) {
+                switch (m_utf8_decoder.decode(*(ip++))) {
                 case vte::base::UTF8Decoder::REJECT_REWIND:
                         /* Rewind the stream.
                          * Note that this will never lead to a loop, since in the
@@ -3634,11 +3654,19 @@ Terminal::process_incoming_utf8(ProcessingContext& context,
 
                         default: {
                                 switch (seq.command()) {
-#define _VTE_CMD(cmd)           case VTE_CMD_##cmd: cmd(seq); break;
-#define _VTE_NOP(cmd)
-#include "parser-cmd.hh"
-#undef _VTE_CMD
-#undef _VTE_NOP
+#define _VTE_CMD_HANDLER(cmd)   \
+                                case VTE_CMD_##cmd: cmd(seq); break;
+#define _VTE_CMD_HANDLER_R(cmd) \
+                                case VTE_CMD_##cmd: if (cmd(seq)) { \
+                                        context.post_CMD(*this); \
+                                        goto switched_data_syntax; \
+                                        } \
+                                        break;
+#define _VTE_CMD_HANDLER_NOP(cmd)
+#include "parser-cmd-handlers.hh"
+#undef _VTE_CMD_HANDLER
+#undef _VTE_CMD_HANDLER_NOP
+#undef _VTE_CMD_HANDLER_R
                                 default:
                                         _vte_debug_print(VTE_DEBUG_PARSER,
                                                          "Unknown parser command %d\n", seq.command());
@@ -3656,23 +3684,24 @@ Terminal::process_incoming_utf8(ProcessingContext& context,
                 }
         }
 
-        // Update start for data consumed
-        chunk.set_begin_reading(ip);
-
-        if (chunk.eos()) {
+        if (chunk.eos() && ip == iend) {
                 m_eos_pending = true;
                 /* If there's an unfinished character in the queue, insert a replacement character */
                 if (m_utf8_decoder.flush()) {
                         insert_char(m_utf8_decoder.codepoint(), false, true);
                 }
         }
+
+ switched_data_syntax:
+
+        // Update start for data consumed
+        chunk.set_begin_reading(ip);
 }
 
 #ifdef WITH_ICU
 
 /* Note that this is mostly a copy of process_incoming_utf8() above; any non-charset-decoding
  * related changes made here need to be made there, too.
- * FIXMEchpe: refactor this to share more code with process_incoming_utf8().
  */
 void
 Terminal::process_incoming_pcterm(ProcessingContext& context,
@@ -3742,11 +3771,20 @@ Terminal::process_incoming_pcterm(ProcessingContext& context,
 
                         default: {
                                 switch (seq.command()) {
-#define _VTE_CMD(cmd)           case VTE_CMD_##cmd: cmd(seq); break;
-#define _VTE_NOP(cmd)
-#include "parser-cmd.hh"
-#undef _VTE_CMD
-#undef _VTE_NOP
+#define _VTE_CMD_HANDLER(cmd)   \
+                                case VTE_CMD_##cmd: cmd(seq); break;
+#define _VTE_CMD_HANDLER_R(cmd) \
+                                case VTE_CMD_##cmd: \
+                                        if (cmd(seq)) { \
+                                                context.post_CMD(*this); \
+                                                goto switched_data_syntax; \
+                                        } \
+                                        break;
+#define _VTE_CMD_HANDLER_NOP(cmd)
+#include "parser-cmd-handlers.hh"
+#undef _VTE_CMD_HANDLER
+#undef _VTE_CMD_HANDLER_NOP
+#undef _VTE_CMD_HANDLER_R
                                 default:
                                         _vte_debug_print(VTE_DEBUG_PARSER,
                                                          "Unknown parser command %d\n", seq.command());
@@ -3779,10 +3817,12 @@ Terminal::process_incoming_pcterm(ProcessingContext& context,
                 return;
         }
 
+ switched_data_syntax:
+
         // Update start for data consumed
         chunk.set_begin_reading(ip);
 
-        if (chunk.eos()) {
+        if (chunk.eos() && ip == chunk.end_reading()) {
                 /* On EOS, we still need to flush the decoder before we can finish */
                 eos = flush = true;
                 goto start;
@@ -4058,7 +4098,7 @@ Terminal::send_child(std::string_view const& data)
          * ::commit signal even if there is no PTY. See issue vte#222.
          */
 
-        switch (data_syntax()) {
+        switch (primary_data_syntax()) {
         case DataSyntax::eECMA48_UTF8:
                 emit_commit(data);
                 if (pty())
@@ -9745,7 +9785,7 @@ Terminal::set_mouse_autohide(bool autohide)
 void
 Terminal::reset_decoder()
 {
-        switch (data_syntax()) {
+        switch (primary_data_syntax()) {
         case DataSyntax::eECMA48_UTF8:
                 m_utf8_decoder.reset();
                 break;
@@ -9761,6 +9801,20 @@ Terminal::reset_decoder()
         }
 }
 
+void
+Terminal::reset_data_syntax()
+{
+        if (current_data_syntax() == primary_data_syntax())
+                return;
+
+        switch (current_data_syntax()) {
+        default:
+                break;
+        }
+
+        pop_data_syntax();
+}
+
 /*
  * Terminal::reset:
  * @clear_tabstops: whether to reset tabstops
@@ -9793,6 +9847,7 @@ Terminal::reset(bool clear_tabstops,
         reset_decoder();
 
         /* Reset parser */
+        reset_data_syntax();
         m_parser.reset();
         m_last_graphic_character = 0;
 
@@ -9924,7 +9979,7 @@ Terminal::set_pty(vte::base::Pty *new_pty)
 
         set_size(m_column_count, m_row_count);
 
-        if (!pty()->set_utf8(data_syntax() == DataSyntax::eECMA48_UTF8)) {
+        if (!pty()->set_utf8(primary_data_syntax() == DataSyntax::eECMA48_UTF8)) {
                 // nothing we can do here
         }
 
diff --git a/src/vtedefines.hh b/src/vtedefines.hh
index 44371d6d..d3c07501 100644
--- a/src/vtedefines.hh
+++ b/src/vtedefines.hh
@@ -145,3 +145,5 @@
 #define VTE_TERMINFO_NAME "xterm-256color"
 
 #define VTE_SIXEL_ENABLED_DEFAULT false
+
+#define VTE_SIXEL_NUM_COLOR_REGISTERS (1024)
diff --git a/src/vteinternal.hh b/src/vteinternal.hh
index fe8de1a4..307c1f9e 100644
--- a/src/vteinternal.hh
+++ b/src/vteinternal.hh
@@ -33,6 +33,7 @@
 #include <glib.h>
 #include "glib-glue.hh"
 
+#include "debug.h"
 #include "drawing-cairo.hh"
 #include "vtedefines.hh"
 #include "vtetypes.hh"
@@ -381,16 +382,35 @@ public:
         vte::base::UTF8Decoder m_utf8_decoder;
 
         enum class DataSyntax {
+                /* The primary data syntax is always one of the following: */
                 eECMA48_UTF8,
                 #ifdef WITH_ICU
                 eECMA48_PCTERM,
                 #endif
-                /* eECMA48_ECMA35, not supported */
+                /* ECMA48_ECMA35, not supported */
         };
 
-        DataSyntax m_data_syntax{DataSyntax::eECMA48_UTF8};
+        DataSyntax m_primary_data_syntax{DataSyntax::eECMA48_UTF8};
+        DataSyntax m_current_data_syntax{DataSyntax::eECMA48_UTF8};
 
-        auto data_syntax() const noexcept { return m_data_syntax; }
+        auto primary_data_syntax() const noexcept { return m_primary_data_syntax; }
+        auto current_data_syntax() const noexcept { return m_current_data_syntax; }
+
+        void push_data_syntax(DataSyntax syntax) noexcept
+        {
+                _vte_debug_print(VTE_DEBUG_IO, "Pushing data syntax %d -> %d\n",
+                                 int(m_current_data_syntax), int(syntax));
+                m_current_data_syntax = syntax;
+        }
+
+        void pop_data_syntax() noexcept
+        {
+                _vte_debug_print(VTE_DEBUG_IO, "Popping data syntax %d -> %d\n",
+                                 int(m_current_data_syntax), int(m_primary_data_syntax));
+                m_current_data_syntax = m_primary_data_syntax;
+        }
+
+        void reset_data_syntax();
 
         int m_utf8_ambiguous_width{VTE_DEFAULT_UTF8_AMBIGUOUS_WIDTH};
         gunichar m_last_graphic_character{0}; /* for REP */
@@ -417,7 +437,7 @@ public:
 
         char const* encoding() const noexcept
         {
-                switch (m_data_syntax) {
+                switch (primary_data_syntax()) {
                 case DataSyntax::eECMA48_UTF8:   return "UTF-8";
                 #ifdef WITH_ICU
                 case DataSyntax::eECMA48_PCTERM: return m_converter->charset().c_str();
@@ -1469,12 +1489,16 @@ public:
         /* Sequence handlers */
         bool m_line_wrapped; // signals line wrapped from character insertion
         // Note: inlining the handlers seems to worsen the performance, so we don't do that
-#define _VTE_CMD(cmd) \
+#define _VTE_CMD_HANDLER(cmd) \
+       /* inline */ void cmd (vte::parser::Sequence const& seq);
+#define _VTE_CMD_HANDLER_NOP(cmd) \
        /* inline */ void cmd (vte::parser::Sequence const& seq);
-#define _VTE_NOP(cmd) G_GNUC_UNUSED _VTE_CMD(cmd)
-#include "parser-cmd.hh"
-#undef _VTE_CMD
-#undef _VTE_NOP
+#define _VTE_CMD_HANDLER_R(cmd) \
+       /* inline */ bool cmd (vte::parser::Sequence const& seq);
+#include "parser-cmd-handlers.hh"
+#undef _VTE_CMD_HANDLER
+#undef _VTE_CMD_HANDLER_NOP
+#undef _VTE_CMD_HANDLER_R
 };
 
 } // namespace terminal
diff --git a/src/vteseq.cc b/src/vteseq.cc
index b0e5fbe4..4f2d6b83 100644
--- a/src/vteseq.cc
+++ b/src/vteseq.cc
@@ -2379,7 +2379,7 @@ Terminal::DA1(vte::parser::Sequence const& seq)
 
         reply(seq, VTE_REPLY_DECDA1R, {65, 1,
 #ifdef WITH_SIXEL
-                                       m_sixel_enabled ? 4 : -2 /* skip */,
+                                       4,
 #endif
                                        9});
 }
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]