[vte] terminal: Add tests for text paste transformation



commit 7daaabbc6d8c217b01f94e28c43656fad1170391
Author: Christian Persch <chpe src gnome org>
Date:   Sat Nov 27 19:45:07 2021 +0100

    terminal: Add tests for text paste transformation
    
    Move the paste transformation to its own file, and add tests.

 src/meson.build     |  20 +++++-
 src/pastify-test.cc | 198 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/pastify.cc      | 108 ++++++++++++++++++++++++++++
 src/pastify.hh      |  30 ++++++++
 src/vte.cc          |  65 ++---------------
 src/vteinternal.hh  |   2 +-
 src/widget.cc       |   2 +-
 src/widget.hh       |   2 +-
 8 files changed, 364 insertions(+), 63 deletions(-)
---
diff --git a/src/meson.build b/src/meson.build
index a1440da5..605dadbf 100644
--- a/src/meson.build
+++ b/src/meson.build
@@ -107,6 +107,11 @@ parser_sources += custom_target(
   ],
 )
 
+pastify_sources = files(
+  'pastify.cc',
+  'pastify.hh',
+)
+
 pcre2_glue_sources = files(
   'pcre2-glue.hh',
 )
@@ -159,7 +164,7 @@ vte_glue_sources = files(
   'vte-glue.hh',
 )
 
-libvte_common_sources = config_sources + debug_sources + glib_glue_sources + gtk_glue_sources + 
libc_glue_sources + modes_sources + pango_glue_sources + parser_sources + pcre2_glue_sources + pty_sources + 
refptr_sources + regex_sources + std_glue_sources + utf8_sources + vte_glue_sources + files(
+libvte_common_sources = config_sources + debug_sources + glib_glue_sources + gtk_glue_sources + 
libc_glue_sources + modes_sources + pango_glue_sources + parser_sources + pastify_sources + 
pcre2_glue_sources + pty_sources + refptr_sources + regex_sources + std_glue_sources + utf8_sources + 
vte_glue_sources + files(
   'attr.hh',
   'bidi.cc',
   'bidi.hh',
@@ -543,6 +548,18 @@ test_parser = executable(
   install: false,
 )
 
+test_pastify_sources = config_sources + pastify_sources + files(
+  'pastify-test.cc',
+)
+
+test_pastify = executable(
+  'test-pastify',
+  sources: test_pastify_sources,
+  dependencies: [glib_dep],
+  include_directories: top_inc,
+  install: false,
+)
+
 test_reaper_sources = config_sources + debug_sources + files(
   'reaper.cc',
   'reaper.hh'
@@ -665,6 +682,7 @@ test_env = [
 test_units = [
   ['modes', test_modes],
   ['parser', test_parser],
+  ['pastify', test_pastify],
   ['reaper', test_reaper],
   ['refptr', test_refptr],
   ['stream', test_stream],
diff --git a/src/pastify-test.cc b/src/pastify-test.cc
new file mode 100644
index 00000000..1a4c7f56
--- /dev/null
+++ b/src/pastify-test.cc
@@ -0,0 +1,198 @@
+/*
+ * Copyright © 2021 Christian Persch
+ *
+ * This library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#include <glib.h>
+
+#include "pastify.hh"
+
+using namespace std::literals;
+
+static bool
+assert_streq(std::string_view const& str1,
+             std::string_view const& str2)
+{
+        auto s1 = std::string{str1};
+        auto s2 = std::string{str2};
+
+        g_assert_cmpstr(s1.c_str(), ==, s2.c_str());
+        return true;
+}
+
+static void
+test_pastify(std::string_view const& str,
+             std::string_view const& expected,
+             bool insert_brackets = false,
+             bool c1 = false)
+{
+        auto rv = vte::terminal::pastify_string(str, insert_brackets, c1);
+        assert_streq(rv, expected);
+
+        /* Check idempotence */
+        if (!insert_brackets) {
+                auto rv2 = vte::terminal::pastify_string(rv, false, false);
+                assert_streq(rv, rv2);
+        }
+}
+
+static void
+test_pastify_brackets_c0(void)
+{
+        test_pastify("0"sv, "\e[200~0\e[201~"sv, true, false);
+}
+
+static void
+test_pastify_brackets_c1(void)
+{
+        test_pastify("0"sv, "\xc2\x9b" "200~0\xc2\x9b" "201~"sv, true, true);
+}
+
+static void
+test_pastify_control(std::string const& ctrl)
+{
+        test_pastify(ctrl, ""sv);
+        test_pastify(ctrl + ctrl, ""sv);
+        test_pastify("abc"s + ctrl, "abc"sv);
+        test_pastify("abc"s + ctrl + ctrl, "abc"sv);
+        test_pastify(ctrl + "abc"s, "abc"sv);
+        test_pastify(ctrl + ctrl + "abc"s, "abc"sv);
+        test_pastify("abc"s + ctrl + "abc"s, "abcabc"sv);
+        test_pastify("abc"s + ctrl + ctrl + "abc"s, "abcabc"sv);
+}
+
+static void
+test_pastify_control_c0(void const* ptr)
+{
+        auto const c = *reinterpret_cast<unsigned char const*>(ptr);
+        auto ctrl = ""s;
+        ctrl.push_back(c);
+
+        test_pastify_control(ctrl);
+}
+
+static void
+test_pastify_control_c1(void const* ptr)
+{
+        auto const c = *reinterpret_cast<unsigned char const*>(ptr);
+
+        auto ctrl = ""s;
+        ctrl.push_back(0xc2);
+        ctrl.push_back(c);
+
+        test_pastify_control(ctrl);
+}
+
+struct TestString {
+public:
+        char const* m_str;
+        char const* m_expected;
+        int m_line;
+
+        TestString() = default;
+        consteval TestString(char const* str,
+                             char const* expected,
+                             int line = __builtin_LINE()) noexcept :
+                m_str(str),
+                m_expected(expected),
+                m_line(line)
+        {
+        }
+};
+
+consteval auto
+identity_test(char const *str,
+              int line = __builtin_LINE()) noexcept
+{
+        return TestString(str, str, line);
+}
+
+static void
+test_pastify_string(void const* ptr)
+{
+        auto str = reinterpret_cast<TestString const*>(ptr);
+        test_pastify(str->m_str, str->m_expected);
+}
+
+static constinit TestString const test_strings[] = {
+        /* Controls */
+        identity_test("\x09"), /* HT passes through */
+        identity_test("\x0d"), /* CR passes through */
+
+        /* Non-C1 but starting with a 0xC2 byte */
+        identity_test("abc\xc2\xa0xyz"),
+
+        /* CR/LF conversion */
+        TestString("\x0a", "\x0d"),
+        TestString("\x0d\x0a", "\x0d\x0d"),
+};
+
+int
+main(int argc,
+     char* argv[])
+{
+        g_test_init(&argc, &argv, nullptr);
+
+        g_test_add_func("/vte/pastify/brackets/c0", test_pastify_brackets_c0);
+        g_test_add_func("/vte/pastify/brackets/c1", test_pastify_brackets_c1);
+
+        /* C0 controls */
+        for (auto c = 0; c < 0x20; ++c) {
+                /* NUL, HT, CR, LF */
+                if (c == 0 || c == 0x09 || c == 0x0a || c == 0x0d)
+                        continue;
+
+                char path[64];
+                g_snprintf(path, sizeof(path), "/vte/pastify/controls/c0/%02x", c);
+
+                auto ptr = g_new(unsigned char, 1);
+                *ptr = c;
+
+                g_test_add_data_func_full(path, ptr, test_pastify_control_c0, g_free);
+        }
+
+        /* DEL too */
+        {
+                auto const path = "/vte/pastify/controls/c0/7f";
+                auto ptr = g_new(unsigned char, 1);
+                *ptr = 0x7f;
+
+                g_test_add_data_func_full(path, ptr, test_pastify_control_c0, g_free);
+        }
+
+        /* C1 controls */
+        for (auto c = 0x80; c < 0xa0; ++c) {
+                char path[64];
+                g_snprintf(path, sizeof(path), "/vte/pastify/controls/c1/%02x", c);
+
+                auto ptr = g_new(unsigned char, 1);
+                *ptr = c;
+
+                g_test_add_data_func_full(path, ptr, test_pastify_control_c1, g_free);
+        }
+
+        /* Extra test strings */
+        for (auto i = 0u; i < G_N_ELEMENTS (test_strings); ++i) {
+                auto const* str = &test_strings[i];
+
+                char path[64];
+                g_snprintf(path, sizeof(path), "/vte/pastify/string/%d", str->m_line);
+                g_test_add_data_func(path, str, test_pastify_string);
+        }
+
+        return g_test_run();
+}
diff --git a/src/pastify.cc b/src/pastify.cc
new file mode 100644
index 00000000..a52e0e46
--- /dev/null
+++ b/src/pastify.cc
@@ -0,0 +1,108 @@
+/*
+ * Copyright © 2015, 2019, Egmont Koblinger
+ * Copyright © 2015, 2018, 2019, 2020, 2021 Christian Persch
+ *
+ * This library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#include "pastify.hh"
+
+namespace vte::terminal {
+
+using namespace std::literals;
+
+/*
+ * pastify_string:
+ * @str:
+ * @insert_brackets:
+ * @c1:
+ *
+ * Converts @str into a form safe for pasting to the child. Elide
+ * C0 controls except NUL, HT, CR, LF, and C1 controls.
+ * We also convert newlines to carriage returns, which more software
+ * is able to cope with (cough, pico, cough).
+ *
+ * Also insert bracketed paste controls around the string if
+ * @insert_brackets is true, using C1 CSI if @c1 is true or C0 controls
+ * otherwise.
+ */
+std::string
+pastify_string(std::string_view str,
+               bool insert_brackets,
+               bool c1)
+{
+        auto rv = std::string{};
+        rv.reserve(str.size() + 1 + insert_brackets ? 12 : 0);
+
+        if (insert_brackets) {
+                if (c1)
+                        rv.append("\xc2\x9b" "200~");
+                else
+                        rv.append("\e[200~");
+        }
+
+        /* C0 \ { NUL, HT, CR, LF } + { DEL } + { C1 control start byte } */
+        auto const controls = 
"\x01\x02\x03\x04\x05\x06\x07\x08\x0a\x0b\x0c\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\x7f\xc2"sv;
+
+        auto next_char = [&str](size_t pos) constexpr noexcept -> unsigned char
+        {
+                return pos + 1 < str.size() ? str[pos + 1] : 0;
+        };
+
+        while (str.size() != 0) {
+                auto run = str.find_first_of(controls, 0);
+
+                rv.append(str, 0, run);
+                if (run == str.npos)
+                        break;
+
+                switch (str[run]) {
+                case '\x0a':
+                        rv.push_back('\x0d');
+                        ++run;
+                        break;
+                case '\xc2': {
+                        auto const c = next_char(run);
+                        if (c >= 0x80 && c <= 0x9f) {
+                                /* Skip both bytes of a C1 */
+                                run += 2;
+                        } else {
+                                /* Move along, nothing to see here */
+                                rv.push_back('\xc2');
+                                ++run;
+                        }
+                        break;
+                }
+                default:
+                        /* Swallow this byte */
+                        ++run;
+                        break;
+                }
+
+                str = str.substr(run);
+        }
+
+        if (insert_brackets) {
+                if (c1)
+                        rv.append("\xc2\x9b" "201~");
+                else
+                        rv.append("\e[201~");
+        }
+
+        return rv;
+}
+
+} // namespace vte::terminal
diff --git a/src/pastify.hh b/src/pastify.hh
new file mode 100644
index 00000000..a0abf3ba
--- /dev/null
+++ b/src/pastify.hh
@@ -0,0 +1,30 @@
+/*
+ * Copyright © 2015, 2019, Egmont Koblinger
+ * Copyright © 2015, 2018, 2019, 2020, 2021 Christian Persch
+ *
+ * This library is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <string>
+#include <string_view>
+
+namespace vte::terminal {
+
+std::string pastify_string(std::string_view str,
+                           bool insert_brackets,
+                           bool c1 = false);
+
+} // namespace vte::terminal
diff --git a/src/vte.cc b/src/vte.cc
index fbcb8df4..c326dd54 100644
--- a/src/vte.cc
+++ b/src/vte.cc
@@ -67,6 +67,7 @@
 #include <pango/pango.h>
 #include "keymap.h"
 #include "marshal.h"
+#include "pastify.hh"
 #include "vtepty.h"
 #include "vtegtk.hh"
 #include "cxx-utils.hh"
@@ -5479,65 +5480,11 @@ Terminal::cell_is_selected_vis(vte::grid::column_t vcol,
 }
 
 void
-Terminal::widget_clipboard_text_received(std::string_view const& data)
-{
-       gchar *paste, *p;
-        gsize run;
-        unsigned char c;
-
-        auto const len = data.size();
-        auto text = data.data();
-
-        /* Convert newlines to carriage returns, which more software
-         * is able to cope with (cough, pico, cough).
-         * Filter out control chars except HT, CR (even stricter than xterm).
-         * Also filter out C1 controls: U+0080 (0xC2 0x80) - U+009F (0xC2 0x9F). */
-        p = paste = (gchar *) g_malloc(len + 1);
-        while (p != nullptr && text[0] != '\0') {
-                run = strcspn(text, "\x01\x02\x03\x04\x05\x06\x07"
-                              "\x08\x0A\x0B\x0C\x0E\x0F"
-                              "\x10\x11\x12\x13\x14\x15\x16\x17"
-                              "\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F"
-                              "\x7F\xC2");
-                memcpy(p, text, run);
-                p += run;
-                text += run;
-                switch (text[0]) {
-                case '\x00':
-                        break;
-                case '\x0A':
-                        *p = '\x0D';
-                        p++;
-                        text++;
-                        break;
-                case '\xC2':
-                        c = text[1];
-                        if (c >= 0x80 && c <= 0x9F) {
-                                /* Skip both bytes of a C1 */
-                                text += 2;
-                        } else {
-                                /* Move along, nothing to see here */
-                                *p = '\xC2';
-                                p++;
-                                text++;
-                        }
-                        break;
-                default:
-                        /* Swallow this byte */
-                        text++;
-                        break;
-                }
-        }
-
-        bool const bracketed_paste = m_modes_private.XTERM_READLINE_BRACKETED_PASTE();
-        // FIXMEchpe can we not hardcode C0 controls here?
-        if (bracketed_paste)
-                feed_child("\e[200~"sv);
-        // FIXMEchpe add a way to avoid the extra string copy done here
-        feed_child(paste, p - paste);
-        if (bracketed_paste)
-                feed_child("\e[201~"sv);
-        g_free(paste);
+Terminal::widget_paste(std::string_view const& data)
+{
+        feed_child(vte::terminal::pastify_string(data,
+                                                 m_modes_private.XTERM_READLINE_BRACKETED_PASTE(),
+                                                 false /* C1 */));
 }
 
 bool
diff --git a/src/vteinternal.hh b/src/vteinternal.hh
index 2274b026..db5f6364 100644
--- a/src/vteinternal.hh
+++ b/src/vteinternal.hh
@@ -894,7 +894,7 @@ public:
         void widget_copy(vte::platform::ClipboardType selection,
                          vte::platform::ClipboardFormat format);
 
-        void widget_clipboard_text_received(std::string_view const& text);
+        void widget_paste(std::string_view const& text);
 
         std::optional<std::string_view> widget_clipboard_data_get(vte::platform::Clipboard const& clipboard,
                                                                   vte::platform::ClipboardFormat format);
diff --git a/src/widget.cc b/src/widget.cc
index c3a13ab8..c5181505 100644
--- a/src/widget.cc
+++ b/src/widget.cc
@@ -558,7 +558,7 @@ void
 Widget::clipboard_request_received_cb(Clipboard const& clipboard,
                                       std::string_view const& text)
 {
-        terminal()->widget_clipboard_text_received(text);
+        terminal()->widget_paste(text);
 }
 
 void
diff --git a/src/widget.hh b/src/widget.hh
index f38e7af4..634f6614 100644
--- a/src/widget.hh
+++ b/src/widget.hh
@@ -380,7 +380,7 @@ public:
         void clipboard_set_text(ClipboardType type,
                                 std::string_view const& str) noexcept;
 
-        void paste_text(std::string_view const& text) { m_terminal->widget_clipboard_text_received(text); }
+        void paste_text(std::string_view const& text) { m_terminal->widget_paste(text); }
         void paste(vte::platform::ClipboardType type) { clipboard_request_text(type); }
         void copy(vte::platform::ClipboardType type,
                   vte::platform::ClipboardFormat format) noexcept { m_terminal->widget_copy(type, format); }


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]