[vte] terminal: Make pastification not replace something with nothing



commit 4bbd956a155f133c289e514a8f2e48c435096258
Author: Christian Persch <chpe src gnome org>
Date:   Sat Nov 27 19:45:07 2021 +0100

    terminal: Make pastification not replace something with nothing
    
    Replacing something with nothing may be dangerous in itself. So
    instead of omitting the forbidden controls from the pasted string,
    replace the controls with their control picture (or U+FFFD for
    the C1 controls which have no control pictures in unicode).

 src/pastify-test.cc | 29 +++++++++++--------
 src/pastify.cc      | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 src/pastify.hh      |  3 ++
 3 files changed, 101 insertions(+), 12 deletions(-)
---
diff --git a/src/pastify-test.cc b/src/pastify-test.cc
index 1a4c7f56..fd8ab6d9 100644
--- a/src/pastify-test.cc
+++ b/src/pastify-test.cc
@@ -63,16 +63,17 @@ test_pastify_brackets_c1(void)
 }
 
 static void
-test_pastify_control(std::string const& ctrl)
+test_pastify_control(std::string const& ctrl,
+                     std::string const& pict)
 {
-        test_pastify(ctrl, ""sv);
-        test_pastify(ctrl + ctrl, ""sv);
-        test_pastify("abc"s + ctrl, "abc"sv);
-        test_pastify("abc"s + ctrl + ctrl, "abc"sv);
-        test_pastify(ctrl + "abc"s, "abc"sv);
-        test_pastify(ctrl + ctrl + "abc"s, "abc"sv);
-        test_pastify("abc"s + ctrl + "abc"s, "abcabc"sv);
-        test_pastify("abc"s + ctrl + ctrl + "abc"s, "abcabc"sv);
+        test_pastify(ctrl, pict);
+        test_pastify(ctrl + ctrl, pict + pict);
+        test_pastify("abc"s + ctrl, "abc"s + pict);
+        test_pastify("abc"s + ctrl + ctrl, "abc"s + pict + pict);
+        test_pastify(ctrl + "abc"s, pict + "abc"s);
+        test_pastify(ctrl + ctrl + "abc"s, pict + pict + "abc"s);
+        test_pastify("abc"s + ctrl + "abc"s, "abc"s + pict + "abc"s);
+        test_pastify("abc"s + ctrl + ctrl + "abc"s, "abc"s + pict + pict + "abc"s);
 }
 
 static void
@@ -82,7 +83,10 @@ test_pastify_control_c0(void const* ptr)
         auto ctrl = ""s;
         ctrl.push_back(c);
 
-        test_pastify_control(ctrl);
+        auto pict = std::string{};
+        vte::terminal::append_control_picture(pict, c);
+
+        test_pastify_control(ctrl, pict);
 }
 
 static void
@@ -94,7 +98,10 @@ test_pastify_control_c1(void const* ptr)
         ctrl.push_back(0xc2);
         ctrl.push_back(c);
 
-        test_pastify_control(ctrl);
+        auto pict = std::string{};
+        vte::terminal::append_control_picture(pict, c);
+
+        test_pastify_control(ctrl, pict);
 }
 
 struct TestString {
diff --git a/src/pastify.cc b/src/pastify.cc
index a52e0e46..da9560db 100644
--- a/src/pastify.cc
+++ b/src/pastify.cc
@@ -70,6 +70,14 @@ pastify_string(std::string_view str,
                         break;
 
                 switch (str[run]) {
+                case 0x01 ... 0x09:
+                case 0x0b ... 0x0c:
+                case 0x0e ... 0x1f:
+                case 0x7f:
+                        append_control_picture(rv, str[run]);
+                        ++run;
+                        break;
+
                 case '\x0a':
                         rv.push_back('\x0d');
                         ++run;
@@ -77,6 +85,8 @@ pastify_string(std::string_view str,
                 case '\xc2': {
                         auto const c = next_char(run);
                         if (c >= 0x80 && c <= 0x9f) {
+                                append_control_picture(rv, c);
+
                                 /* Skip both bytes of a C1 */
                                 run += 2;
                         } else {
@@ -87,11 +97,11 @@ pastify_string(std::string_view str,
                         break;
                 }
                 default:
-                        /* Swallow this byte */
                         ++run;
                         break;
                 }
 
+                /* run is <= str.size() */
                 str = str.substr(run);
         }
 
@@ -105,4 +115,73 @@ pastify_string(std::string_view str,
         return rv;
 }
 
+/*
+ * append_control_picture:
+ * @str:
+ * @c:
+ *
+ * Appends the control picture for @ctrl (or if @ctrl has no control
+ * picture in unicode, appends U+FFFD).
+ */
+void
+append_control_picture(std::string& str,
+                       char32_t ctrl)
+{
+        switch (ctrl) {
+        case 0x00 ... 0x1f: /* C0 */
+                // U+2400 SYMBOL FOR NULL
+                // U+2401 SYMBOL FOR START OF HEADING
+                // U+2402 SYMBOL FOR START OF TEXT
+                // U+2403 SYMBOL FOR END OF TEXT
+                // U+2404 SYMBOL FOR END OF TRANSMISSION
+                // U+2405 SYMBOL FOR ENQUIRY
+                // U+2406 SYMBOL FOR ACKNOWLEDGE
+                // U+2407 SYMBOL FOR BELL
+                // U+2408 SYMBOL FOR BACKSPACE
+                // U+2409 SYMBOL FOR HORIZONTAL TABULATION
+                // U+240A SYMBOL FOR LINE FEED
+                // U+240B SYMBOL FOR VERTICAL TABULATION
+                // U+240C SYMBOL FOR FORM FEED
+                // U+240D SYMBOL FOR CARRIAGE RETURN
+                // U+240E SYMBOL FOR SHIFT OUT
+                // U+240F SYMBOL FOR SHIFT IN
+                // U+2410 SYMBOL FOR DATA LINK ESCAPE
+                // U+2411 SYMBOL FOR DEVICE CONTROL ONE
+                // U+2412 SYMBOL FOR DEVICE CONTROL TWO
+                // U+2413 SYMBOL FOR DEVICE CONTROL THREE
+                // U+2414 SYMBOL FOR DEVICE CONTROL FOUR
+                // U+2415 SYMBOL FOR NEGATIVE ACKNOWLEDGE
+                // U+2416 SYMBOL FOR SYNCHRONOUS IDLE
+                // U+2417 SYMBOL FOR END OF TRANSMISSION BLOCK
+                // U+2418 SYMBOL FOR CANCEL
+                // U+2419 SYMBOL FOR END OF MEDIUM
+                // U+241A SYMBOL FOR SUBSTITUTE
+                // U+241B SYMBOL FOR ESCAPE
+                // U+241C SYMBOL FOR FILE SEPARATOR
+                // U+241D SYMBOL FOR GROUP SEPARATOR
+                // U+241E SYMBOL FOR RECORD SEPARATOR
+                // U+241F SYMBOL FOR UNIT SEPARATOR
+                str.push_back('\xe2');
+                str.push_back('\x90');
+                str.push_back(ctrl + 0x80);
+                break;
+
+        case 0x7f: /* DEL */
+                str.append("\xe2\x90\xa1"); // U+2421 SYMBOL FOR DELETE
+                break;
+
+        case 0x80 ... 0x9f: /* C1 */
+                // Unfortunately, over 20 years after being first proposed, unicode
+                // **still** does not have control pictures for the C1 controls.
+                //
+                // Use U+FFFD instead.
+                str.append("\xef\xbf\xbd");
+                break;
+
+        default:
+                // This function may only be called for controls
+                __builtin_unreachable();
+        }
+}
+
 } // namespace vte::terminal
diff --git a/src/pastify.hh b/src/pastify.hh
index a0abf3ba..807205c0 100644
--- a/src/pastify.hh
+++ b/src/pastify.hh
@@ -27,4 +27,7 @@ std::string pastify_string(std::string_view str,
                            bool insert_brackets,
                            bool c1 = false);
 
+void append_control_picture(std::string& str,
+                            char32_t ctrl);
+
 } // namespace vte::terminal


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]