[vte] terminal: Prepare for processing only part of a chunk



commit 21817903f532b715d5f68762a5ea3a039cc00422
Author: Christian Persch <chpe src gnome org>
Date:   Mon Oct 19 00:16:36 2020 +0200

    terminal: Prepare for processing only part of a chunk
    
    Copy the last byte of the preceding chunk (except for EOS chunks) to
    the next chunk's dataminusone. This will allow rewinding the stream
    during processing by one byte, without keeping the preceding chunk
    around.
    This is in preparation of upcoming refactoring.

 src/chunk.cc       |   8 +--
 src/chunk.hh       | 140 +++++++++++++++++++++++++++++++++++++++++++++++------
 src/parser-cat.cc  |  19 +++++---
 src/vte.cc         |  88 +++++++++++++++++++++------------
 src/vteinternal.hh |   4 +-
 5 files changed, 200 insertions(+), 59 deletions(-)
---
diff --git a/src/chunk.cc b/src/chunk.cc
index 5049aa64..d25c453a 100644
--- a/src/chunk.cc
+++ b/src/chunk.cc
@@ -26,9 +26,6 @@ namespace vte {
 
 namespace base {
 
-static_assert(sizeof(Chunk) <= Chunk::k_chunk_size - 2 *sizeof(void*), "Chunk too large");
-static_assert(offsetof(Chunk, data) == offsetof(Chunk, dataminusone) + 1, "Chunk layout wrong");
-
 void
 Chunk::recycle() noexcept
 {
@@ -39,7 +36,7 @@ Chunk::recycle() noexcept
 std::stack<std::unique_ptr<Chunk>, std::list<std::unique_ptr<Chunk>>> Chunk::g_free_chunks;
 
 Chunk::unique_type
-Chunk::get(void) noexcept
+Chunk::get(Chunk const* chain_to) noexcept
 {
         Chunk* chunk;
         if (!g_free_chunks.empty()) {
@@ -51,6 +48,9 @@ Chunk::get(void) noexcept
                 chunk = new Chunk();
         }
 
+        if (chain_to)
+                chunk->chain(chain_to);
+
         return Chunk::unique_type(chunk);
 }
 void
diff --git a/src/chunk.hh b/src/chunk.hh
index 65bce52a..eeb0a7d2 100644
--- a/src/chunk.hh
+++ b/src/chunk.hh
@@ -1,5 +1,5 @@
 /*
- * Copyright © 2018 Christian Persch
+ * Copyright © 2018, 2020 Christian Persch
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -17,7 +17,9 @@
 
 #pragma once
 
+#include <cassert>
 #include <cstdint>
+#include <cstring>
 #include <list>
 #include <memory>
 #include <stack>
@@ -27,6 +29,12 @@ namespace vte {
 namespace base {
 
 class Chunk {
+        // A Chunk contains the raw data read from PTY.
+        //
+        // Data will be read in blocks and accumulated into chunks.
+        // Chunks will be processed in (potentially) multiple
+        // parts (by potentially multiple (sub)parsers).
+        //
 private:
         class Recycler {
         public:
@@ -39,24 +47,110 @@ private:
 
         void recycle() noexcept;
 
-        static unsigned int const k_max_free_chunks = 16;
+        static constexpr const unsigned k_max_free_chunks = 16u;
+        static constexpr const unsigned k_chunk_size = 0x2000u - 2 * sizeof(void*);
+        static constexpr const unsigned k_overlap_size = 1u;
 
         enum class Flags : uint8_t {
-                eSEALED = 1u << 0,
-                eEOS    = 1u << 1,
+                eSEALED  = 1u << 0,
+                eEOS     = 1u << 1,
+                eCHAINED = 1u << 2,
         };
 
+        uint8_t* m_data{nullptr};
+        size_t m_capacity{0};
+        size_t m_start{k_overlap_size};
+        size_t m_size{k_overlap_size};
+        uint8_t m_flags{0};
+
 public:
-        using unique_type = std::unique_ptr<Chunk, Recycler>;
 
-        static unsigned int const k_chunk_size = 0x2000;
+        // Returns: pointer to the raw data storage (includes space for pre-begin data)
+        inline constexpr uint8_t* data() const noexcept { return m_data; }
 
-        unsigned int len{0};
-        uint8_t m_flags{0};
-        uint8_t dataminusone;    /* Hack: Keep it right before data, so that data[-1] is valid and usable */
-        uint8_t data[k_chunk_size - 2 * sizeof(void*) - 2 - sizeof(unsigned int)];
+        // Returns: the storage capacity of data()
+        inline constexpr auto capacity() const noexcept { return m_capacity; }
+
+        // Returns: pointer to where to start reading available data (inside data())
+        inline constexpr uint8_t const* begin_reading() const noexcept { assert(m_start <= m_size); return 
data() + m_start; }
+
+        // Returns: pointer to the end of available data()
+        inline constexpr uint8_t const* end_reading() const noexcept { return data() + m_size; };
+
+        // Returns: how much data there is to read between begin_reading() and end_reading()
+        inline constexpr auto size_reading() const noexcept { return m_size - m_start; }
+
+        // Returns: whether there is any data to read
+        inline constexpr bool has_reading() const noexcept { return begin_reading() < end_reading(); }
+
+        // Sets the value returned by begin_reading(). To be used after
+        // processing some data, so that the next round knows where to start.
+        void set_begin_reading(uint8_t const* ptr) noexcept
+        {
+                assert(ptr >= data() &&
+                       (!chained() || ptr > data()) &&
+                       ptr <= data() + capacity());
+                m_start = unsigned(ptr - data());
+        }
+
+        // Returns: pointer to buffer to write data into
+        // Note that there is *always* at least one byte writable at begin_writing()-1
+        // to be used when reading from a PTY in CPKT mode.
+        inline constexpr uint8_t* begin_writing() const noexcept { assert(m_size > 0); return data() + 
m_size; }
+
+        // Returns: size of begin_writing() buffer
+        inline constexpr auto capacity_writing() const noexcept { return m_capacity - m_size; }
+
+        // Add to chunk size. To be called after writing data to begin_writing().
+        inline void add_size(ssize_t len)
+        {
+                assert(len >= 0 && size_t(len) <= capacity_writing());
+                m_size += len;
+        }
+
+        // Chain this Chunk to some other Chunk.
+        // If the other chunk isn't EOS, we
+        // copy the last k_overlap_size byte(s) from it to the start of
+        // the new chunk, and set the new chunk as chained.
+        // This will allow rewinding the stream during processing,
+        // without keeping the preceding chunk around.
+        void chain(Chunk const* previous)
+        {
+                assert(m_size == k_overlap_size && m_start == m_size); // or call reset() here?
+
+                if (!previous->eos()) {
+                        std::memcpy(m_data,
+                                    previous->m_data + previous->m_size - k_overlap_size,
+                                    k_overlap_size);
+
+                        set_chained();
+                }
+        }
+
+        // Special-case operator new, so that we can allocate
+        // the chunk data together with the instance.
+        void* operator new(std::size_t count)
+        {
+                assert(count < k_chunk_size);
+                return std::malloc(k_chunk_size);
+        }
+
+        // Special-case operator delete for pairing with operator new.
+        void operator delete(void* ptr)
+        {
+                std::free(ptr);
+        }
+
+        // Type to use when storing a Chunk, so that chunks can be recycled.
+        using unique_type = std::unique_ptr<Chunk, Recycler>;
+
+        Chunk()
+                : m_data{reinterpret_cast<uint8_t*>(this) + sizeof(*this)},
+                  m_capacity{k_chunk_size - sizeof(*this)}
+        {
+                std::memset(m_data, 0, k_overlap_size);
+        }
 
-        Chunk() = default;
         Chunk(Chunk const&) = delete;
         Chunk(Chunk&&) = delete;
         ~Chunk() = default;
@@ -64,24 +158,40 @@ public:
         Chunk& operator= (Chunk const&) = delete;
         Chunk& operator= (Chunk&&) = delete;
 
+        // Resets the chunk. Reset chunks will not be rewindable!
         void reset() noexcept
         {
-                len = 0;
+                std::memset(m_data, 0, k_overlap_size);
+                m_start = m_size = k_overlap_size;
                 m_flags = 0;
         }
 
-        inline constexpr size_t capacity() const noexcept { return sizeof(data); }
-        inline constexpr size_t remaining_capacity() const noexcept { return capacity() - len; }
+        // Returns: a new or recycled Chunk
+        static unique_type get(Chunk const* chain_to) noexcept;
 
-        static unique_type get() noexcept;
+        // Prune recycled chunks
         static void prune(unsigned int max_size = k_max_free_chunks) noexcept;
 
+        // Returns: whether the chunk is sealed, i.e. must not be used
+        // to write more data into
         inline constexpr bool sealed() const noexcept { return m_flags & (uint8_t)Flags::eSEALED; }
+
+        // Seal the chunk
         inline void set_sealed() noexcept { m_flags |= (uint8_t)Flags::eSEALED; }
 
+        // Returns: whether the chunk is an EOS (end-of-stream)
         inline constexpr bool eos() const noexcept { return m_flags & (uint8_t)Flags::eEOS; }
+
+        // Set the chunk EOS
         inline void set_eos() noexcept { m_flags |= (uint8_t)Flags::eEOS; }
 
+        // Returns: whether the chunk was chained to some other chunk
+        // and thus m_start may be set to < k_overlap_size.
+        inline constexpr bool chained() const noexcept { return m_flags & (uint8_t)Flags::eCHAINED; }
+
+        // Set the chunk as chained
+        inline void set_chained() noexcept { m_flags |= (uint8_t)Flags::eCHAINED; }
+
 private:
 
         /* Note that this is using the standard deleter, not Recycler */
diff --git a/src/parser-cat.cc b/src/parser-cat.cc
index 5fdb4b7c..4dfb472a 100644
--- a/src/parser-cat.cc
+++ b/src/parser-cat.cc
@@ -625,6 +625,8 @@ private:
         gsize m_cmd_stats[VTE_CMD_N];
         GArray* m_bench_times;
 
+        static constexpr const size_t k_buf_overlap = 1u;
+
         template<class Functor>
         void
         process_file_utf8(int fd,
@@ -633,14 +635,15 @@ private:
                 vte::parser::Parser parser{};
                 vte::parser::Sequence seq{parser};
 
-                gsize const buf_size = 16384;
-                guchar* buf = g_new0(guchar, buf_size);
+                auto const buf_size = size_t{16384};
+                auto buf = g_new0(uint8_t, buf_size);
 
                 auto start_time = g_get_monotonic_time();
 
                 vte::base::UTF8Decoder decoder;
 
-                gsize buf_start = 0;
+                std::memset(buf, 0, k_buf_overlap);
+                auto buf_start = k_buf_overlap;
                 for (;;) {
                         auto len = read(fd, buf + buf_start, buf_size - buf_start);
                         if (!len)
@@ -651,8 +654,10 @@ private:
                                 break;
                         }
 
-                        auto const bufend = buf + len;
-                        for (auto sptr = buf; sptr < bufend; ++sptr) {
+                        auto const bufstart = buf + buf_start;
+                        auto const bufend = bufstart + len;
+
+                        for (auto sptr = bufstart; sptr < bufend; ++sptr) {
                                 switch (decoder.decode(*sptr)) {
                                 case vte::base::UTF8Decoder::REJECT_REWIND:
                                         /* Rewind the stream.
@@ -679,11 +684,13 @@ private:
                                         }
                                         break;
                                 }
-
                                 default:
                                         break;
                                 }
                         }
+
+                        /* Chain buffers by copying data from end of buf to the start */
+                        std::memmove(buf, buf + buf_start + len - k_buf_overlap, k_buf_overlap);
                 }
 
         out:
diff --git a/src/vte.cc b/src/vte.cc
index bd3d4431..d3d8c83b 100644
--- a/src/vte.cc
+++ b/src/vte.cc
@@ -3446,18 +3446,23 @@ Terminal::process_incoming()
         auto context = ProcessingContext{*this};
 
         while (!m_incoming_queue.empty()) {
-                auto chunk = std::move(m_incoming_queue.front());
-                m_incoming_queue.pop();
+                auto& chunk = m_incoming_queue.front();
 
                 assert((bool)chunk);
-                g_assert_nonnull(chunk.get());
+
+                auto const start = chunk->begin_reading();
 
                 _VTE_DEBUG_IF(VTE_DEBUG_IO) {
-                        _vte_debug_hexdump("Incoming buffer", chunk->data, chunk->len);
+                        _vte_debug_print(VTE_DEBUG_IO,
+                                         "Processing chunk %p starting at offset %u\n",
+                                         (void*)chunk.get(),
+                                         unsigned(chunk->begin_reading() - chunk->data()));
+
+                        _vte_debug_hexdump("Incoming buffer",
+                                           chunk->begin_reading(),
+                                           chunk->size_reading());
                 }
 
-                bytes_processed += chunk->len;
-
                 switch (data_syntax()) {
                 case DataSyntax::eECMA48_UTF8:
                         process_incoming_utf8(context, *chunk);
@@ -3471,6 +3476,12 @@ Terminal::process_incoming()
                         g_assert_not_reached();
                         break;
                 }
+
+                bytes_processed += size_t(chunk->begin_reading() - start);
+
+                // If all data from this chunk has been processed, go to the next one
+                if (!chunk->has_reading())
+                        m_incoming_queue.pop();
         }
 
 #ifdef VTE_DEBUG
@@ -3551,12 +3562,12 @@ Terminal::process_incoming()
  */
 void
 Terminal::process_incoming_utf8(ProcessingContext& context,
-                                vte::base::Chunk const& chunk)
+                                vte::base::Chunk& chunk)
 {
         auto seq = vte::parser::Sequence{m_parser};
 
-        auto const* ip = chunk.data;
-        auto const* iend = chunk.data + chunk.len;
+        auto const iend = chunk.end_reading();
+        auto ip = chunk.begin_reading();
 
         for ( ; ip < iend; ++ip) {
 
@@ -3645,6 +3656,9 @@ Terminal::process_incoming_utf8(ProcessingContext& context,
                 }
         }
 
+        // Update start for data consumed
+        chunk.set_begin_reading(ip);
+
         if (chunk.eos()) {
                 m_eos_pending = true;
                 /* If there's an unfinished character in the queue, insert a replacement character */
@@ -3662,18 +3676,18 @@ Terminal::process_incoming_utf8(ProcessingContext& context,
  */
 void
 Terminal::process_incoming_pcterm(ProcessingContext& context,
-                                  vte::base::Chunk const& chunk)
+                                  vte::base::Chunk& chunk)
 {
         auto seq = vte::parser::Sequence{m_parser};
 
         auto& decoder = m_converter->decoder();
 
-        auto const* ip = chunk.data;
-        auto const* iend = chunk.data + chunk.len;
-
         auto eos = bool{false};
         auto flush = bool{false};
 
+        auto const iend = chunk.end_reading();
+        auto ip = chunk.begin_reading();
+
  start:
         while (ip < iend || flush) {
                 switch (decoder.decode(&ip, flush)) {
@@ -3765,6 +3779,9 @@ Terminal::process_incoming_pcterm(ProcessingContext& context,
                 return;
         }
 
+        // Update start for data consumed
+        chunk.set_begin_reading(ip);
+
         if (chunk.eos()) {
                 /* On EOS, we still need to flush the decoder before we can finish */
                 eos = flush = true;
@@ -3827,14 +3844,13 @@ Terminal::pty_io_read(int const fd,
                         /* No chunk, chunk sealed or at least ¾ full? Get a new chunk */
                        if (!chunk ||
                             chunk->sealed() ||
-                            chunk->len >= 3 * chunk->capacity() / 4) {
-                                m_incoming_queue.push(vte::base::Chunk::get());
-
+                            chunk->capacity_writing() < chunk->capacity() / 4) {
+                                m_incoming_queue.push(vte::base::Chunk::get(chunk));
                                 chunk = m_incoming_queue.back().get();
                        }
 
-                       rem = chunk->remaining_capacity();
-                       bp = chunk->data + chunk->len;
+                       rem = chunk->capacity_writing();
+                       bp = chunk->begin_writing();
                        len = 0;
                        do {
                                 /* We'd like to read (fd, bp, rem); but due to TIOCPKT mode
@@ -3842,12 +3858,12 @@ Terminal::pty_io_read(int const fd,
                                  * We need to see what that byte is, but otherwise drop it
                                  * and write continuously to chunk->data.
                                  */
-                                char pkt_header;
-                                char save = bp[-1];
+                                auto const save = bp[-1];
                                 errno = 0;
-                                int ret = read (fd, bp - 1, rem + 1);
-                                pkt_header = bp[-1];
+                                auto ret = read(fd, bp - 1, rem + 1);
+                                auto const pkt_header = bp[-1];
                                 bp[-1] = save;
+
                                switch (ret){
                                        case -1:
                                                err = errno;
@@ -3887,10 +3903,18 @@ Terminal::pty_io_read(int const fd,
                                }
                        } while (rem);
 out:
-                       chunk->len += len;
+                       chunk->add_size(len);
                        bytes += len;
                } while (bytes < max_bytes &&
-                        chunk->len == chunk->capacity());
+                         // This means that a read into a not-yet-¾-full
+                         // chunk used up all the available capacity, so
+                         // let's assume that we can read more and thus
+                         // we'll get a new chunk in the loop above and
+                         // continue on. (See commit 49a0cdf11.)
+                         // Note also that on EOS or error, this condition
+                         // is false (since there was capacity, but it wasn't
+                         // used up).
+                        chunk->capacity_writing() == 0);
 
                 /* We may have an empty chunk at the back of the queue, but
                  * that doesn't matter, we'll fill it next time.
@@ -3933,10 +3957,10 @@ out:
                _vte_debug_print(VTE_DEBUG_IO, "got PTY EOF\n");
 
                 /* Make a note of the EOS; but do not process it since there may be data
-                 * to be processed first in the incomding queue.
+                 * to be processed first in the incoming queue.
                  */
                 if (!chunk || chunk->sealed()) {
-                        m_incoming_queue.push(vte::base::Chunk::get());
+                        m_incoming_queue.push(vte::base::Chunk::get(chunk));
                         chunk = m_incoming_queue.back().get();
                 }
 
@@ -3973,20 +3997,20 @@ Terminal::feed(std::string_view const& data,
         vte::base::Chunk* chunk = nullptr;
         if (!m_incoming_queue.empty()) {
                 auto& achunk = m_incoming_queue.back();
-                if (length < achunk->remaining_capacity() && !achunk->sealed())
+                if (length < achunk->capacity_writing() && !achunk->sealed())
                         chunk = achunk.get();
         }
         if (chunk == nullptr) {
-                m_incoming_queue.push(vte::base::Chunk::get());
+                m_incoming_queue.push(vte::base::Chunk::get(nullptr));
                 chunk = m_incoming_queue.back().get();
         }
 
         /* Break the incoming data into chunks. */
         do {
-                auto rem = chunk->remaining_capacity();
+                auto rem = chunk->capacity_writing();
                 auto len = std::min(length, rem);
-                memcpy (chunk->data + chunk->len, ptr, len);
-                chunk->len += len;
+                memcpy (chunk->begin_writing(), ptr, len);
+                chunk->add_size(len);
                 length -= len;
                 if (length == 0)
                         break;
@@ -3994,7 +4018,7 @@ Terminal::feed(std::string_view const& data,
                 ptr += len;
 
                 /* Get another chunk for the remaining data */
-                m_incoming_queue.push(vte::base::Chunk::get());
+                m_incoming_queue.push(vte::base::Chunk::get(chunk));
                 chunk = m_incoming_queue.back().get();
         } while (true);
 
diff --git a/src/vteinternal.hh b/src/vteinternal.hh
index 35c66ff1..fe8de1a4 100644
--- a/src/vteinternal.hh
+++ b/src/vteinternal.hh
@@ -843,10 +843,10 @@ public:
         void time_process_incoming();
         void process_incoming();
         void process_incoming_utf8(ProcessingContext& context,
-                                   vte::base::Chunk const& chunk);
+                                   vte::base::Chunk& chunk);
         #ifdef WITH_ICU
         void process_incoming_pcterm(ProcessingContext& context,
-                                     vte::base::Chunk const& chunk);
+                                     vte::base::Chunk& chunk);
         #endif
         bool process(bool emit_adj_changed);
         inline bool is_processing() const { return m_active_terminals_link != nullptr; }


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]