[vte] parser: Correct charset designation sequences with final byte 7/14



commit bd53b684f319284321d8fab47f7f2bf01487ec4e
Author: Christian Persch <chpe src gnome org>
Date:   Fri Feb 5 22:43:03 2021 +0100

    parser: Correct charset designation sequences with final byte 7/14

 src/parser-charset-tables.hh | 21 ++++++---------------
 src/parser-charset.hh        |  1 +
 src/parser-test.cc           |  4 ++++
 src/parser.cc                | 21 +++++++++++++++------
 4 files changed, 26 insertions(+), 21 deletions(-)
---
diff --git a/src/parser-charset-tables.hh b/src/parser-charset-tables.hh
index 47be8eb2..fd15b66e 100644
--- a/src/parser-charset-tables.hh
+++ b/src/parser-charset-tables.hh
@@ -18,7 +18,6 @@
 #define IR(num)    VTE_CHARSET_ISO_2375_IR_##num
 #define DEC(name)  VTE_CHARSET_DEC_##name
 #define NRCS(name) VTE_CHARSET_##name##_NRCS
-#define EMPTY      VTE_CHARSET_EMPTY
 #define NA         VTE_CHARSET_NONE
 #define RET        VTE_CHARSET_RETURN
 
@@ -52,7 +51,7 @@ static uint8_t const charset_graphic_94[] = {
         IR(85), IR(86), IR(88), IR(89), IR(90), IR(91), IR(92), IR(93),
         /* 7/0..7/13 */
         IR(94), IR(95), IR(96), IR(98), IR(99), IR(102), IR(103), IR(121),
-        IR(122), IR(137), IR(141), IR(146), IR(128), IR(147), EMPTY
+        IR(122), IR(137), IR(141), IR(146), IR(128), IR(147),
 };
 
 /* 94-character graphic character sets, with second intermediate byte 2/1:
@@ -131,9 +130,9 @@ static uint8_t const charset_graphic_96[] = {
         /* 6/0..6/15 */
         IR(200), IR(201), IR(203), IR(204), IR(205), IR(206), IR(226), IR(208),
         IR(209), IR(227), IR(234), NA, NA, NA, NA, NA,
-        /* 7/0..7/14 */
+        /* 7/0..7/13 */
         NA, NA, NA, NA, NA, NA, NA, NA,
-        NA, NA, NA, NA, NA, IR(129), EMPTY
+        NA, NA, NA, NA, NA, IR(129),
 };
 
 /* Multibyte graphic character sets:
@@ -156,15 +155,8 @@ static uint8_t const charset_graphic_94_n[] = {
         /* 4/0..4/15 */
         IR(42), IR(58), IR(87_OR_168), IR(149), IR(159), IR(165), IR(169), IR(171),
         IR(172), IR(183), IR(184), IR(185), IR(186), IR(187), IR(202), IR(228),
-        /* 5/0..5/15 */
-        IR(229), IR(233), NA, NA, NA, NA, NA, NA,
-        NA, NA, NA, NA, NA, NA, NA, NA,
-        /* 6/0..6/15 */
-        NA, NA, NA, NA, NA, NA, NA, NA,
-        NA, NA, NA, NA, NA, NA, NA, NA,
-        /* 7/0..7/14 */
-        NA, NA, NA, NA, NA, NA, NA, NA,
-        NA, NA, NA, NA, NA, NA, EMPTY,
+        /* 5/0..5/1 */
+        IR(229), IR(233),
 };
 
 /* Multibyte graphic character sets, with third intermediate byte 2/1:
@@ -195,7 +187,7 @@ static uint8_t const charset_graphic_94_n_with_2_1[] = {
 static uint8_t const charset_control_c0[] = {
         /* 4/0..4/12 */
         IR(1), IR(7), IR(48), IR(26), IR(36), IR(106), IR(74), IR(104),
-        IR(130), IR(132), IR(134), IR(135), IR(140)
+        IR(130), IR(132), IR(134), IR(135), IR(140),
 };
 
 /* C1 control character sets:
@@ -258,6 +250,5 @@ static uint8_t const charset_ocs_with_2_15[] = {
 #undef IR
 #undef DEC
 #undef NRCS
-#undef EMPTY
 #undef NA
 #undef RET
diff --git a/src/parser-charset.hh b/src/parser-charset.hh
index 995bf1ca..b9a989ce 100644
--- a/src/parser-charset.hh
+++ b/src/parser-charset.hh
@@ -30,6 +30,7 @@ _VTE_CHARSET(NONE)
 /* See ECMA-35 § 14.4 for the meaning of this */
 _VTE_CHARSET(DRCS)
 
+/* See ECMA-35 § 14.1 (and ECMA-6 § 9.2) for the meaning of this */
 _VTE_CHARSET(EMPTY)
 
 /* Return to ISO-2022 */
diff --git a/src/parser-test.cc b/src/parser-test.cc
index 6b93e9af..1fa5a8ed 100644
--- a/src/parser-test.cc
+++ b/src/parser-test.cc
@@ -396,6 +396,10 @@ test_seq_esc_charset(uint32_t i[], /* intermediates */
 
                 if (f >= ts && f < (ts + ntable))
                         cs = table[f - ts];
+                else if (f == 0x7e &&
+                         cmd != VTE_CMD_DOCS &&
+                         defaultcs != VTE_CHARSET_DRCS)
+                        cs = VTE_CHARSET_EMPTY;
                 else
                         cs = defaultcs;
 
diff --git a/src/parser.cc b/src/parser.cc
index 58281dbb..1786ed7d 100644
--- a/src/parser.cc
+++ b/src/parser.cc
@@ -170,6 +170,16 @@ vte_parse_host_control(vte_seq_t const* seq)
         }
 }
 
+/* ECMA-35 § 14.1 specifies that the final character 7/14 always identifies
+ * an empty set. Note that that this does not apply for DRCS sets (§ 14.4),
+ * since § 13.3.3 says that all the Ft (4/0..7/14) bytes are private-use.
+ */
+static inline constexpr unsigned int
+charset_empty_or_none(uint32_t raw)
+{
+        return raw == 0x7e ? VTE_CHARSET_EMPTY : VTE_CHARSET_NONE;
+}
+
 static unsigned int
 vte_parse_charset_94(uint32_t raw,
                      unsigned int intermediates)
@@ -217,7 +227,7 @@ vte_parse_charset_94(uint32_t raw,
                 break;
         }
 
-        return VTE_CHARSET_NONE;
+        return charset_empty_or_none(raw);
 }
 
 static unsigned int
@@ -245,7 +255,7 @@ vte_parse_charset_94_n(uint32_t raw,
                 break;
         }
 
-        return VTE_CHARSET_NONE;
+        return charset_empty_or_none(raw);
 }
 
 static unsigned int
@@ -267,7 +277,7 @@ vte_parse_charset_96(uint32_t raw,
                 return VTE_CHARSET_DRCS;
         }
 
-        return VTE_CHARSET_NONE;
+        return charset_empty_or_none(raw);
 }
 
 static unsigned int
@@ -277,7 +287,7 @@ vte_parse_charset_96_n(uint32_t raw,
         if (VTE_SEQ_INTERMEDIATE(intermediates) == VTE_SEQ_INTERMEDIATE_SPACE)
                 return VTE_CHARSET_DRCS;
 
-        return VTE_CHARSET_NONE;
+        return charset_empty_or_none(raw);
 }
 
 static unsigned int
@@ -299,7 +309,6 @@ vte_parse_charset_ocs(uint32_t raw,
                 if (remaining_intermediates == 0 &&
                     raw >= 0x30 && raw < (0x30 + G_N_ELEMENTS(charset_ocs_with_2_0)))
                         return charset_ocs_with_2_0[raw - 0x30];
-                /* Or should this return VTE_CHARSET_DRCS; ? */
                 break;
 
         case VTE_SEQ_INTERMEDIATE_BANG ... VTE_SEQ_INTERMEDIATE_DOT: /* OCS with standard return */
@@ -337,7 +346,7 @@ vte_parse_charset_control(uint32_t raw,
                 break;
         }
 
-        return VTE_CHARSET_NONE;
+        return charset_empty_or_none(raw);
 }
 
 static unsigned int


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]