[vte] parser: Correct charset designation sequences with final byte 7/14
- From: Christian Persch <chpe src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [vte] parser: Correct charset designation sequences with final byte 7/14
- Date: Fri, 5 Feb 2021 21:43:08 +0000 (UTC)
commit bd53b684f319284321d8fab47f7f2bf01487ec4e
Author: Christian Persch <chpe src gnome org>
Date: Fri Feb 5 22:43:03 2021 +0100
parser: Correct charset designation sequences with final byte 7/14
src/parser-charset-tables.hh | 21 ++++++---------------
src/parser-charset.hh | 1 +
src/parser-test.cc | 4 ++++
src/parser.cc | 21 +++++++++++++++------
4 files changed, 26 insertions(+), 21 deletions(-)
---
diff --git a/src/parser-charset-tables.hh b/src/parser-charset-tables.hh
index 47be8eb2..fd15b66e 100644
--- a/src/parser-charset-tables.hh
+++ b/src/parser-charset-tables.hh
@@ -18,7 +18,6 @@
#define IR(num) VTE_CHARSET_ISO_2375_IR_##num
#define DEC(name) VTE_CHARSET_DEC_##name
#define NRCS(name) VTE_CHARSET_##name##_NRCS
-#define EMPTY VTE_CHARSET_EMPTY
#define NA VTE_CHARSET_NONE
#define RET VTE_CHARSET_RETURN
@@ -52,7 +51,7 @@ static uint8_t const charset_graphic_94[] = {
IR(85), IR(86), IR(88), IR(89), IR(90), IR(91), IR(92), IR(93),
/* 7/0..7/13 */
IR(94), IR(95), IR(96), IR(98), IR(99), IR(102), IR(103), IR(121),
- IR(122), IR(137), IR(141), IR(146), IR(128), IR(147), EMPTY
+ IR(122), IR(137), IR(141), IR(146), IR(128), IR(147),
};
/* 94-character graphic character sets, with second intermediate byte 2/1:
@@ -131,9 +130,9 @@ static uint8_t const charset_graphic_96[] = {
/* 6/0..6/15 */
IR(200), IR(201), IR(203), IR(204), IR(205), IR(206), IR(226), IR(208),
IR(209), IR(227), IR(234), NA, NA, NA, NA, NA,
- /* 7/0..7/14 */
+ /* 7/0..7/13 */
NA, NA, NA, NA, NA, NA, NA, NA,
- NA, NA, NA, NA, NA, IR(129), EMPTY
+ NA, NA, NA, NA, NA, IR(129),
};
/* Multibyte graphic character sets:
@@ -156,15 +155,8 @@ static uint8_t const charset_graphic_94_n[] = {
/* 4/0..4/15 */
IR(42), IR(58), IR(87_OR_168), IR(149), IR(159), IR(165), IR(169), IR(171),
IR(172), IR(183), IR(184), IR(185), IR(186), IR(187), IR(202), IR(228),
- /* 5/0..5/15 */
- IR(229), IR(233), NA, NA, NA, NA, NA, NA,
- NA, NA, NA, NA, NA, NA, NA, NA,
- /* 6/0..6/15 */
- NA, NA, NA, NA, NA, NA, NA, NA,
- NA, NA, NA, NA, NA, NA, NA, NA,
- /* 7/0..7/14 */
- NA, NA, NA, NA, NA, NA, NA, NA,
- NA, NA, NA, NA, NA, NA, EMPTY,
+ /* 5/0..5/1 */
+ IR(229), IR(233),
};
/* Multibyte graphic character sets, with third intermediate byte 2/1:
@@ -195,7 +187,7 @@ static uint8_t const charset_graphic_94_n_with_2_1[] = {
static uint8_t const charset_control_c0[] = {
/* 4/0..4/12 */
IR(1), IR(7), IR(48), IR(26), IR(36), IR(106), IR(74), IR(104),
- IR(130), IR(132), IR(134), IR(135), IR(140)
+ IR(130), IR(132), IR(134), IR(135), IR(140),
};
/* C1 control character sets:
@@ -258,6 +250,5 @@ static uint8_t const charset_ocs_with_2_15[] = {
#undef IR
#undef DEC
#undef NRCS
-#undef EMPTY
#undef NA
#undef RET
diff --git a/src/parser-charset.hh b/src/parser-charset.hh
index 995bf1ca..b9a989ce 100644
--- a/src/parser-charset.hh
+++ b/src/parser-charset.hh
@@ -30,6 +30,7 @@ _VTE_CHARSET(NONE)
/* See ECMA-35 § 14.4 for the meaning of this */
_VTE_CHARSET(DRCS)
+/* See ECMA-35 § 14.1 (and ECMA-6 § 9.2) for the meaning of this */
_VTE_CHARSET(EMPTY)
/* Return to ISO-2022 */
diff --git a/src/parser-test.cc b/src/parser-test.cc
index 6b93e9af..1fa5a8ed 100644
--- a/src/parser-test.cc
+++ b/src/parser-test.cc
@@ -396,6 +396,10 @@ test_seq_esc_charset(uint32_t i[], /* intermediates */
if (f >= ts && f < (ts + ntable))
cs = table[f - ts];
+ else if (f == 0x7e &&
+ cmd != VTE_CMD_DOCS &&
+ defaultcs != VTE_CHARSET_DRCS)
+ cs = VTE_CHARSET_EMPTY;
else
cs = defaultcs;
diff --git a/src/parser.cc b/src/parser.cc
index 58281dbb..1786ed7d 100644
--- a/src/parser.cc
+++ b/src/parser.cc
@@ -170,6 +170,16 @@ vte_parse_host_control(vte_seq_t const* seq)
}
}
+/* ECMA-35 § 14.1 specifies that the final character 7/14 always identifies
+ * an empty set. Note that that this does not apply for DRCS sets (§ 14.4),
+ * since § 13.3.3 says that all the Ft (4/0..7/14) bytes are private-use.
+ */
+static inline constexpr unsigned int
+charset_empty_or_none(uint32_t raw)
+{
+ return raw == 0x7e ? VTE_CHARSET_EMPTY : VTE_CHARSET_NONE;
+}
+
static unsigned int
vte_parse_charset_94(uint32_t raw,
unsigned int intermediates)
@@ -217,7 +227,7 @@ vte_parse_charset_94(uint32_t raw,
break;
}
- return VTE_CHARSET_NONE;
+ return charset_empty_or_none(raw);
}
static unsigned int
@@ -245,7 +255,7 @@ vte_parse_charset_94_n(uint32_t raw,
break;
}
- return VTE_CHARSET_NONE;
+ return charset_empty_or_none(raw);
}
static unsigned int
@@ -267,7 +277,7 @@ vte_parse_charset_96(uint32_t raw,
return VTE_CHARSET_DRCS;
}
- return VTE_CHARSET_NONE;
+ return charset_empty_or_none(raw);
}
static unsigned int
@@ -277,7 +287,7 @@ vte_parse_charset_96_n(uint32_t raw,
if (VTE_SEQ_INTERMEDIATE(intermediates) == VTE_SEQ_INTERMEDIATE_SPACE)
return VTE_CHARSET_DRCS;
- return VTE_CHARSET_NONE;
+ return charset_empty_or_none(raw);
}
static unsigned int
@@ -299,7 +309,6 @@ vte_parse_charset_ocs(uint32_t raw,
if (remaining_intermediates == 0 &&
raw >= 0x30 && raw < (0x30 + G_N_ELEMENTS(charset_ocs_with_2_0)))
return charset_ocs_with_2_0[raw - 0x30];
- /* Or should this return VTE_CHARSET_DRCS; ? */
break;
case VTE_SEQ_INTERMEDIATE_BANG ... VTE_SEQ_INTERMEDIATE_DOT: /* OCS with standard return */
@@ -337,7 +346,7 @@ vte_parse_charset_control(uint32_t raw,
break;
}
- return VTE_CHARSET_NONE;
+ return charset_empty_or_none(raw);
}
static unsigned int
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]