[gnome-text-editor] textregion: import text region for use in spell adapters
- From: Christian Hergert <chergert src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gnome-text-editor] textregion: import text region for use in spell adapters
- Date: Fri, 25 Jun 2021 23:23:41 +0000 (UTC)
commit 6cee70b724cd6e019916900384988ded612016f9
Author: Christian Hergert <chergert redhat com>
Date: Fri Jun 25 16:21:34 2021 -0700
textregion: import text region for use in spell adapters
This can be used to track regions of text similar to a piecetable using the
combination of a piecetable and a b+tree.
We will use it from the textbuffer spellcheck adapter to track ranges that
need to have spellcheck udpated.
src/cjhtextregion.c | 1295 ++++++++++++++++++++++++++++++++++++++++++++
src/cjhtextregionbtree.h | 555 +++++++++++++++++++
src/cjhtextregionprivate.h | 121 +++++
src/meson.build | 1 +
4 files changed, 1972 insertions(+)
---
diff --git a/src/cjhtextregion.c b/src/cjhtextregion.c
new file mode 100644
index 0000000..f7219b9
--- /dev/null
+++ b/src/cjhtextregion.c
@@ -0,0 +1,1295 @@
+/* cjhtextregion.c
+ *
+ * Copyright 2021 Christian Hergert <chergert redhat com>
+ *
+ * This file is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * This file is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+#include "config.h"
+
+#include "cjhtextregionprivate.h"
+#include "cjhtextregionbtree.h"
+
+#ifndef G_DISABLE_ASSERT
+# define DEBUG_VALIDATE(a,b) G_STMT_START { if (a) cjh_text_region_node_validate(a,b); } G_STMT_END
+#else
+# define DEBUG_VALIDATE(a,b) G_STMT_START { } G_STMT_END
+#endif
+
+static inline void
+cjh_text_region_invalid_cache (CjhTextRegion *region)
+{
+ region->cached_result = NULL;
+ region->cached_result_offset = 0;
+}
+
+G_GNUC_UNUSED static void
+cjh_text_region_node_validate (CjhTextRegionNode *node,
+ CjhTextRegionNode *parent)
+{
+ gsize length = 0;
+ gsize length_in_parent = 0;
+
+ g_assert (node != NULL);
+ g_assert (UNTAG (node->tagged_parent) == parent);
+ g_assert (cjh_text_region_node_is_leaf (node) ||
+ UNTAG (node->tagged_parent) == node->tagged_parent);
+ g_assert (!parent || !cjh_text_region_node_is_leaf (parent));
+ g_assert (!parent || !SORTED_ARRAY_IS_EMPTY (&parent->branch.children));
+
+ if (parent != NULL)
+ {
+ SORTED_ARRAY_FOREACH (&parent->branch.children, CjhTextRegionChild, child, {
+ if (child->node == node)
+ {
+ length_in_parent = child->length;
+ goto found;
+ }
+ });
+ g_assert_not_reached ();
+ }
+found:
+
+ if (parent != NULL)
+ g_assert_cmpint (length_in_parent, ==, cjh_text_region_node_length (node));
+
+ for (CjhTextRegionNode *iter = parent;
+ iter != NULL;
+ iter = cjh_text_region_node_get_parent (iter))
+ g_assert_false (cjh_text_region_node_is_leaf (iter));
+
+ if (cjh_text_region_node_is_leaf (node))
+ {
+ SORTED_ARRAY_FOREACH (&node->leaf.runs, CjhTextRegionRun, run, {
+ g_assert_cmpint (run->length, >, 0);
+ length += run->length;
+ });
+
+ if (node->leaf.prev != NULL)
+ g_assert_true (cjh_text_region_node_is_leaf (node->leaf.prev));
+
+ if (node->leaf.next != NULL)
+ g_assert_true (cjh_text_region_node_is_leaf (node->leaf.next));
+ }
+ else
+ {
+ SORTED_ARRAY_FOREACH (&node->branch.children, CjhTextRegionChild, child, {
+ CjhTextRegionChild *next = SORTED_ARRAY_FOREACH_PEEK (&node->branch.children);
+
+ g_assert_nonnull (child->node);
+ g_assert_cmpint (child->length, >, 0);
+ g_assert_cmpint (child->length, ==, cjh_text_region_node_length (child->node));
+ g_assert_true (cjh_text_region_node_get_parent (child->node) == node);
+
+ length += child->length;
+
+ if (next != NULL && next->node)
+ {
+ g_assert_cmpint (cjh_text_region_node_is_leaf (child->node), ==,
+ cjh_text_region_node_is_leaf (next->node));
+
+ if (cjh_text_region_node_is_leaf (child->node))
+ {
+ g_assert_true (child->node->leaf.next == next->node);
+ g_assert_true (child->node == next->node->leaf.prev);
+ }
+ else
+ {
+ g_assert_true (child->node->branch.next == next->node);
+ g_assert_true (child->node == next->node->branch.prev);
+ }
+ }
+ });
+ }
+
+ if (parent != NULL)
+ g_assert_cmpint (length_in_parent, ==, length);
+}
+
+static void
+cjh_text_region_split (CjhTextRegion *region,
+ gsize offset,
+ const CjhTextRegionRun *run,
+ CjhTextRegionRun *left,
+ CjhTextRegionRun *right)
+{
+ if (region->split_func != NULL)
+ region->split_func (offset, run, left, right);
+}
+
+static CjhTextRegionNode *
+cjh_text_region_node_new (CjhTextRegionNode *parent,
+ gboolean is_leaf)
+{
+ CjhTextRegionNode *node;
+
+ g_assert (UNTAG (parent) == parent);
+
+ node = g_new0 (CjhTextRegionNode, 1);
+ node->tagged_parent = TAG (parent, is_leaf);
+
+ if (is_leaf)
+ {
+ SORTED_ARRAY_INIT (&node->leaf.runs);
+ node->leaf.prev = NULL;
+ node->leaf.next = NULL;
+ }
+ else
+ {
+ SORTED_ARRAY_INIT (&node->branch.children);
+ }
+
+ g_assert (cjh_text_region_node_get_parent (node) == parent);
+
+ return node;
+}
+
+static void
+cjh_text_region_subtract_from_parents (CjhTextRegion *region,
+ CjhTextRegionNode *node,
+ gsize length)
+{
+ CjhTextRegionNode *parent = cjh_text_region_node_get_parent (node);
+
+ if (parent == NULL || length == 0)
+ return;
+
+ cjh_text_region_invalid_cache (region);
+
+ SORTED_ARRAY_FOREACH (&parent->branch.children, CjhTextRegionChild, child, {
+ if (child->node == node)
+ {
+ g_assert (length <= child->length);
+ child->length -= length;
+ cjh_text_region_subtract_from_parents (region, parent, length);
+ return;
+ }
+ });
+
+ g_assert_not_reached ();
+}
+
+static void
+cjh_text_region_add_to_parents (CjhTextRegion *region,
+ CjhTextRegionNode *node,
+ gsize length)
+{
+ CjhTextRegionNode *parent = cjh_text_region_node_get_parent (node);
+
+ if (parent == NULL || length == 0)
+ return;
+
+ cjh_text_region_invalid_cache (region);
+
+ SORTED_ARRAY_FOREACH (&parent->branch.children, CjhTextRegionChild, child, {
+ if (child->node == node)
+ {
+ child->length += length;
+ cjh_text_region_add_to_parents (region, parent, length);
+ return;
+ }
+ });
+
+ g_assert_not_reached ();
+}
+
+static inline gboolean
+cjh_text_region_node_is_root (CjhTextRegionNode *node)
+{
+ return node != NULL && cjh_text_region_node_get_parent (node) == NULL;
+}
+
+static CjhTextRegionNode *
+cjh_text_region_node_search_recurse (CjhTextRegionNode *node,
+ gsize offset,
+ gsize *offset_within_node)
+{
+ CjhTextRegionChild *last_child = NULL;
+
+ g_assert (node != NULL);
+ g_assert (offset_within_node != NULL);
+
+ /* If we reached a leaf, that is all we need to do */
+ if (cjh_text_region_node_is_leaf (node))
+ {
+ *offset_within_node = offset;
+ return node;
+ }
+
+ g_assert (!cjh_text_region_node_is_leaf (node));
+ g_assert (!SORTED_ARRAY_IS_EMPTY (&node->branch.children));
+ g_assert (offset <= cjh_text_region_node_length (node));
+
+ SORTED_ARRAY_FOREACH (&node->branch.children, CjhTextRegionChild, child, {
+ g_assert (child->length > 0);
+ g_assert (child->node != NULL);
+
+ if (offset < child->length)
+ return cjh_text_region_node_search_recurse (child->node, offset, offset_within_node);
+
+ offset -= child->length;
+ last_child = child;
+ });
+
+ /* We're right-most, so it belongs at the end. Add back the length we removed
+ * while trying to resolve within the parent branch.
+ */
+ g_assert (last_child != NULL);
+ g_assert (node->branch.next == NULL);
+ return cjh_text_region_node_search_recurse (last_child->node,
+ offset + last_child->length,
+ offset_within_node);
+}
+
+static CjhTextRegionNode *
+cjh_text_region_search (CjhTextRegion *region,
+ gsize offset,
+ gsize *offset_within_node)
+{
+ CjhTextRegionNode *result;
+
+ *offset_within_node = 0;
+
+ g_assert (region->cached_result == NULL ||
+ cjh_text_region_node_is_leaf (region->cached_result));
+
+ /* Try to reuse cached node to avoid traversal since in most cases
+ * an insert will be followed by another insert.
+ */
+ if (region->cached_result != NULL && offset >= region->cached_result_offset)
+ {
+ gsize calc_offset = region->cached_result_offset + cjh_text_region_node_length (region->cached_result);
+
+ if (offset < calc_offset ||
+ (offset == calc_offset && region->cached_result->leaf.next == NULL))
+ {
+ *offset_within_node = offset - region->cached_result_offset;
+ return region->cached_result;
+ }
+ }
+
+ if (offset == 0)
+ result = _cjh_text_region_get_first_leaf (region);
+ else
+ result = cjh_text_region_node_search_recurse (®ion->root, offset, offset_within_node);
+
+ /* Now save it for cached reuse */
+ if (result != NULL)
+ {
+ region->cached_result = result;
+ region->cached_result_offset = offset - *offset_within_node;
+ }
+
+ return result;
+}
+
+static void
+cjh_text_region_root_split (CjhTextRegion *region,
+ CjhTextRegionNode *root)
+{
+ CjhTextRegionNode *left;
+ CjhTextRegionNode *right;
+ CjhTextRegionChild new_child;
+
+ g_assert (region != NULL);
+ g_assert (!cjh_text_region_node_is_leaf (root));
+ g_assert (cjh_text_region_node_is_root (root));
+ g_assert (!SORTED_ARRAY_IS_EMPTY (&root->branch.children));
+
+ left = cjh_text_region_node_new (root, FALSE);
+ right = cjh_text_region_node_new (root, FALSE);
+
+ left->branch.next = right;
+ right->branch.prev = left;
+
+ SORTED_ARRAY_SPLIT2 (&root->branch.children, &left->branch.children, &right->branch.children);
+ SORTED_ARRAY_FOREACH (&left->branch.children, CjhTextRegionChild, child, {
+ cjh_text_region_node_set_parent (child->node, left);
+ });
+ SORTED_ARRAY_FOREACH (&right->branch.children, CjhTextRegionChild, child, {
+ cjh_text_region_node_set_parent (child->node, right);
+ });
+
+ g_assert (SORTED_ARRAY_IS_EMPTY (&root->branch.children));
+
+ new_child.node = right;
+ new_child.length = cjh_text_region_node_length (right);
+ SORTED_ARRAY_PUSH_HEAD (&root->branch.children, new_child);
+
+ new_child.node = left;
+ new_child.length = cjh_text_region_node_length (left);
+ SORTED_ARRAY_PUSH_HEAD (&root->branch.children, new_child);
+
+ g_assert (SORTED_ARRAY_LENGTH (&root->branch.children) == 2);
+
+ DEBUG_VALIDATE (root, NULL);
+ DEBUG_VALIDATE (left, root);
+ DEBUG_VALIDATE (right, root);
+}
+
+static CjhTextRegionNode *
+cjh_text_region_branch_split (CjhTextRegion *region,
+ CjhTextRegionNode *left)
+{
+ G_GNUC_UNUSED gsize old_length;
+ CjhTextRegionNode *parent;
+ CjhTextRegionNode *right;
+ gsize right_length = 0;
+ gsize left_length = 0;
+ guint i = 0;
+
+ g_assert (region != NULL);
+ g_assert (left != NULL);
+ g_assert (!cjh_text_region_node_is_leaf (left));
+ g_assert (!cjh_text_region_node_is_root (left));
+
+ old_length = cjh_text_region_node_length (left);
+
+ /*
+ * This operation should not change the height of the tree. Only
+ * splitting the root node can change the height of the tree. So
+ * here we add a new right node, and update the parent to point to
+ * it right after our node.
+ *
+ * Since no new items are added, lengths do not change and we do
+ * not need to update lengths up the hierarchy except for our two
+ * effected nodes (and their direct parent).
+ */
+
+ parent = cjh_text_region_node_get_parent (left);
+
+ /* Create a new node to split half the items into */
+ right = cjh_text_region_node_new (parent, FALSE);
+
+ /* Insert node into branches linked list */
+ right->branch.next = left->branch.next;
+ right->branch.prev = left;
+ if (right->branch.next != NULL)
+ right->branch.next->branch.prev = right;
+ left->branch.next = right;
+
+ SORTED_ARRAY_SPLIT (&left->branch.children, &right->branch.children);
+ SORTED_ARRAY_FOREACH (&right->branch.children, CjhTextRegionChild, child, {
+ cjh_text_region_node_set_parent (child->node, right);
+ });
+
+#ifndef G_DISABLE_ASSERT
+ SORTED_ARRAY_FOREACH (&left->branch.children, CjhTextRegionChild, child, {
+ g_assert (cjh_text_region_node_get_parent (child->node) == left);
+ });
+#endif
+
+ right_length = cjh_text_region_node_length (right);
+ left_length = cjh_text_region_node_length (left);
+
+ g_assert (right_length + left_length == old_length);
+ g_assert (SORTED_ARRAY_LENGTH (&parent->branch.children) < SORTED_ARRAY_CAPACITY
(&parent->branch.children));
+
+ SORTED_ARRAY_FOREACH (&parent->branch.children, CjhTextRegionChild, child, {
+ i++;
+
+ if (child->node == left)
+ {
+ CjhTextRegionChild right_child;
+
+ right_child.node = right;
+ right_child.length = right_length;
+
+ child->length = left_length;
+
+ SORTED_ARRAY_INSERT_VAL (&parent->branch.children, i, right_child);
+
+ DEBUG_VALIDATE (left, parent);
+ DEBUG_VALIDATE (right, parent);
+ DEBUG_VALIDATE (parent, cjh_text_region_node_get_parent (parent));
+
+ return right;
+ }
+ });
+
+ g_assert_not_reached ();
+}
+
+static CjhTextRegionNode *
+cjh_text_region_leaf_split (CjhTextRegion *region,
+ CjhTextRegionNode *left)
+{
+ G_GNUC_UNUSED gsize length;
+ CjhTextRegionNode *parent;
+ CjhTextRegionNode *right;
+ gsize right_length;
+ guint i;
+
+ g_assert (region != NULL);
+ g_assert (left != NULL);
+ g_assert (cjh_text_region_node_is_leaf (left));
+
+ parent = cjh_text_region_node_get_parent (left);
+
+ g_assert (parent != left);
+ g_assert (!cjh_text_region_node_is_leaf (parent));
+ g_assert (!SORTED_ARRAY_IS_EMPTY (&parent->branch.children));
+ g_assert (!SORTED_ARRAY_IS_FULL (&parent->branch.children));
+
+ length = cjh_text_region_node_length (left);
+
+ g_assert (length > 0);
+
+ DEBUG_VALIDATE (parent, cjh_text_region_node_get_parent (parent));
+ DEBUG_VALIDATE (left, parent);
+
+ right = cjh_text_region_node_new (parent, TRUE);
+
+ SORTED_ARRAY_SPLIT (&left->leaf.runs, &right->leaf.runs);
+ right_length = cjh_text_region_node_length (right);
+
+ g_assert (length == right_length + cjh_text_region_node_length (left));
+ g_assert (cjh_text_region_node_is_leaf (left));
+ g_assert (cjh_text_region_node_is_leaf (right));
+
+ i = 0;
+ SORTED_ARRAY_FOREACH (&parent->branch.children, CjhTextRegionChild, child, {
+ G_GNUC_UNUSED const CjhTextRegionChild *next = SORTED_ARRAY_FOREACH_PEEK (&parent->branch.children);
+
+ ++i;
+
+ g_assert (cjh_text_region_node_is_leaf (child->node));
+ g_assert (next == NULL || cjh_text_region_node_is_leaf (next->node));
+
+ if (child->node == left)
+ {
+ CjhTextRegionChild right_child;
+
+ g_assert (child->length >= right_length);
+ g_assert (next == NULL || left->leaf.next == next->node);
+
+ if (left->leaf.next != NULL)
+ left->leaf.next->leaf.prev = right;
+
+ right->leaf.prev = left;
+ right->leaf.next = left->leaf.next;
+ left->leaf.next = right;
+
+ right_child.node = right;
+ right_child.length = right_length;
+
+ child->length -= right_length;
+
+ g_assert (child->length > 0);
+ g_assert (right_child.length > 0);
+
+ SORTED_ARRAY_INSERT_VAL (&parent->branch.children, i, right_child);
+
+ g_assert (right != NULL);
+ g_assert (cjh_text_region_node_is_leaf (right));
+ g_assert (right->leaf.prev == left);
+ g_assert (left->leaf.next == right);
+
+ DEBUG_VALIDATE (left, parent);
+ DEBUG_VALIDATE (right, parent);
+ DEBUG_VALIDATE (parent, cjh_text_region_node_get_parent (parent));
+
+ return right;
+ }
+ });
+
+ g_assert_not_reached ();
+}
+
+static inline gboolean
+cjh_text_region_node_needs_split (CjhTextRegionNode *node)
+{
+ /*
+ * We want to split the tree node if there is not enough space to
+ * split a single entry into two AND add a new entry. That means we
+ * need two empty slots before we ever perform an insert.
+ */
+
+ if (!cjh_text_region_node_is_leaf (node))
+ return SORTED_ARRAY_LENGTH (&node->branch.children) >= (SORTED_ARRAY_CAPACITY (&node->branch.children) -
2);
+ else
+ return SORTED_ARRAY_LENGTH (&node->leaf.runs) >= (SORTED_ARRAY_CAPACITY (&node->leaf.runs) - 2);
+}
+
+static inline CjhTextRegionNode *
+cjh_text_region_node_split (CjhTextRegion *region,
+ CjhTextRegionNode *node)
+{
+ CjhTextRegionNode *parent;
+
+ g_assert (node != NULL);
+
+ cjh_text_region_invalid_cache (region);
+
+ parent = cjh_text_region_node_get_parent (node);
+
+ if (parent != NULL &&
+ cjh_text_region_node_needs_split (parent))
+ cjh_text_region_node_split (region, parent);
+
+ if (!cjh_text_region_node_is_leaf (node))
+ {
+ if (cjh_text_region_node_is_root (node))
+ {
+ cjh_text_region_root_split (region, node);
+ return ®ion->root;
+ }
+
+ return cjh_text_region_branch_split (region, node);
+ }
+ else
+ {
+ return cjh_text_region_leaf_split (region, node);
+ }
+}
+
+CjhTextRegion *
+_cjh_text_region_new (CjhTextRegionJoinFunc join_func,
+ CjhTextRegionSplitFunc split_func)
+{
+ CjhTextRegion *self;
+ CjhTextRegionNode *leaf;
+ CjhTextRegionChild child;
+
+ self = g_new0 (CjhTextRegion, 1);
+ self->length = 0;
+ self->join_func = join_func;
+ self->split_func = split_func;
+
+ /* The B+Tree has a root node (a branch) and a single leaf
+ * as a child to simplify how we do splits/rotations/etc.
+ */
+ leaf = cjh_text_region_node_new (&self->root, TRUE);
+
+ child.node = leaf;
+ child.length = 0;
+
+ SORTED_ARRAY_INIT (&self->root.branch.children);
+ SORTED_ARRAY_PUSH_HEAD (&self->root.branch.children, child);
+
+ return self;
+}
+
+static void
+cjh_text_region_node_free (CjhTextRegionNode *node)
+{
+ if (node == NULL)
+ return;
+
+ if (!cjh_text_region_node_is_leaf (node))
+ {
+ SORTED_ARRAY_FOREACH (&node->branch.children, CjhTextRegionChild, child, {
+ cjh_text_region_node_free (child->node);
+ });
+ }
+
+ g_free (node);
+}
+
+void
+_cjh_text_region_free (CjhTextRegion *region)
+{
+ if (region != NULL)
+ {
+ g_assert (cjh_text_region_node_is_root (®ion->root));
+ g_assert (!SORTED_ARRAY_IS_EMPTY (®ion->root.branch.children));
+
+ SORTED_ARRAY_FOREACH (®ion->root.branch.children, CjhTextRegionChild, child, {
+ cjh_text_region_node_free (child->node);
+ });
+
+ g_free (region);
+ }
+}
+
+static inline gboolean
+join_run (CjhTextRegion *region,
+ gsize offset,
+ const CjhTextRegionRun *left,
+ const CjhTextRegionRun *right,
+ CjhTextRegionRun *joined)
+{
+ gboolean join;
+
+ g_assert (region != NULL);
+ g_assert (left != NULL);
+ g_assert (right != NULL);
+ g_assert (joined != NULL);
+
+ if (region->join_func != NULL)
+ join = region->join_func (offset, left, right);
+ else
+ join = FALSE;
+
+ if (join)
+ {
+ joined->length = left->length + right->length;
+ joined->data = left->data;
+
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+void
+_cjh_text_region_insert (CjhTextRegion *region,
+ gsize offset,
+ gsize length,
+ gpointer data)
+{
+ CjhTextRegionRun to_insert = { length, data };
+ CjhTextRegionNode *target;
+ CjhTextRegionNode *node;
+ CjhTextRegionNode *parent;
+ gsize offset_within_node = offset;
+ guint i;
+
+ g_assert (region != NULL);
+ g_assert (offset <= region->length);
+
+ if (length == 0)
+ return;
+
+ target = cjh_text_region_search (region, offset, &offset_within_node);
+
+ g_assert (cjh_text_region_node_is_leaf (target));
+ g_assert (offset_within_node <= cjh_text_region_node_length (target));
+
+ /* We should only hit this if we have an empty tree. */
+ if G_UNLIKELY (SORTED_ARRAY_IS_EMPTY (&target->leaf.runs))
+ {
+ g_assert (offset == 0);
+ SORTED_ARRAY_PUSH_HEAD (&target->leaf.runs, to_insert);
+ g_assert (cjh_text_region_node_length (target) == length);
+ goto inserted;
+ }
+
+ /* Split up to region->root if necessary */
+ if (cjh_text_region_node_needs_split (target))
+ {
+ DEBUG_VALIDATE (target, cjh_text_region_node_get_parent (target));
+
+ /* Split the target into two and then re-locate our position as
+ * we might need to be in another node.
+ *
+ * TODO: Potentially optimization here to look at prev/next to
+ * locate which we need. Complicated though since we don't
+ * have real offsets.
+ */
+ cjh_text_region_node_split (region, target);
+
+ target = cjh_text_region_search (region, offset, &offset_within_node);
+
+ g_assert (cjh_text_region_node_is_leaf (target));
+ g_assert (offset_within_node <= cjh_text_region_node_length (target));
+ DEBUG_VALIDATE (target, cjh_text_region_node_get_parent (target));
+ }
+
+ i = 0;
+ SORTED_ARRAY_FOREACH (&target->leaf.runs, CjhTextRegionRun, run, {
+ /*
+ * If this insert request would happen immediately after this run,
+ * we want to see if we can chain it to this run or the beginning
+ * of the next run.
+ *
+ * Note: We coudld also follow the the B+tree style linked-leaf to
+ * the next leaf and compare against it's first item. But that is
+ * out of scope for this prototype.
+ */
+
+ if (offset_within_node == 0)
+ {
+ if (!join_run (region, offset, &to_insert, run, run))
+ SORTED_ARRAY_INSERT_VAL (&target->leaf.runs, i, to_insert);
+ goto inserted;
+ }
+ else if (offset_within_node == run->length)
+ {
+ CjhTextRegionRun *next = SORTED_ARRAY_FOREACH_PEEK (&target->leaf.runs);
+
+ /* Try to chain to the end of this run or the beginning of the next */
+ if (!join_run (region, offset, run, &to_insert, run) &&
+ (next == NULL || !join_run (region, offset, &to_insert, next, next)))
+ SORTED_ARRAY_INSERT_VAL (&target->leaf.runs, i + 1, to_insert);
+ goto inserted;
+ }
+ else if (offset_within_node < run->length)
+ {
+ CjhTextRegionRun left;
+ CjhTextRegionRun right;
+
+ left.length = offset_within_node;
+ left.data = run->data;
+
+ right.length = run->length - offset_within_node;
+ right.data = run->data;
+
+ cjh_text_region_split (region, offset - offset_within_node, run, &left, &right);
+
+ *run = left;
+
+ if (!join_run (region, offset, &to_insert, &right, &to_insert))
+ SORTED_ARRAY_INSERT_VAL (&target->leaf.runs, i + 1, right);
+
+ if (!join_run (region, offset - offset_within_node, run, &to_insert, run))
+ SORTED_ARRAY_INSERT_VAL (&target->leaf.runs, i + 1, to_insert);
+
+ goto inserted;
+ }
+
+ offset_within_node -= run->length;
+
+ i++;
+ });
+
+ g_assert_not_reached ();
+
+inserted:
+
+ g_assert (target != NULL);
+
+ /*
+ * Now update each of the parent nodes in the tree so that they have
+ * an apprporiate length along with the child pointer. This allows them
+ * to calculate offsets while walking the tree (without derefrencing the
+ * child node) at the cost of us walking back up the tree.
+ */
+ for (parent = cjh_text_region_node_get_parent (target), node = target;
+ parent != NULL;
+ node = parent, parent = cjh_text_region_node_get_parent (node))
+ {
+ SORTED_ARRAY_FOREACH (&parent->branch.children, CjhTextRegionChild, child, {
+ if (child->node == node)
+ {
+ child->length += length;
+ goto found_in_parent;
+ }
+ });
+
+ g_assert_not_reached ();
+
+ found_in_parent:
+ DEBUG_VALIDATE (node, parent);
+ continue;
+ }
+
+ region->length += length;
+
+ g_assert (region->length == cjh_text_region_node_length (®ion->root));
+}
+
+void
+_cjh_text_region_replace (CjhTextRegion *region,
+ gsize offset,
+ gsize length,
+ gpointer data)
+{
+ g_assert (region != NULL);
+
+ if (length == 0)
+ return;
+
+ /* TODO: This could be optimized to avoid possible splits
+ * by merging adjoining runs.
+ */
+
+ _cjh_text_region_remove (region, offset, length);
+ _cjh_text_region_insert (region, offset, length, data);
+
+ g_assert (region->length == cjh_text_region_node_length (®ion->root));
+}
+
+guint
+_cjh_text_region_get_length (CjhTextRegion *region)
+{
+ g_assert (region != NULL);
+
+ return region->length;
+}
+
+static void
+cjh_text_region_branch_compact (CjhTextRegion *region,
+ CjhTextRegionNode *node)
+{
+ CjhTextRegionNode *parent;
+ CjhTextRegionNode *left;
+ CjhTextRegionNode *right;
+ CjhTextRegionNode *target;
+ gsize added = 0;
+ gsize length;
+
+ g_assert (region != NULL);
+ g_assert (node != NULL);
+ g_assert (!cjh_text_region_node_is_leaf (node));
+
+ SORTED_ARRAY_FOREACH (&node->branch.children, CjhTextRegionChild, child, {
+ if (child->node == NULL)
+ {
+ g_assert (child->length == 0);
+ SORTED_ARRAY_FOREACH_REMOVE (&node->branch.children);
+ }
+ });
+
+ if (cjh_text_region_node_is_root (node))
+ return;
+
+ parent = cjh_text_region_node_get_parent (node);
+
+ g_assert (parent != NULL);
+ g_assert (!cjh_text_region_node_is_leaf (parent));
+
+ /* Reparent child in our stead if we can remove this node */
+ if (SORTED_ARRAY_LENGTH (&node->branch.children) == 1 &&
+ SORTED_ARRAY_LENGTH (&parent->branch.children) == 1)
+ {
+ CjhTextRegionChild *descendant = &SORTED_ARRAY_PEEK_HEAD (&node->branch.children);
+
+ g_assert (parent->branch.prev == NULL);
+ g_assert (parent->branch.next == NULL);
+ g_assert (node->branch.prev == NULL);
+ g_assert (node->branch.next == NULL);
+ g_assert (descendant->node != NULL);
+
+ SORTED_ARRAY_FOREACH (&parent->branch.children, CjhTextRegionChild, child, {
+ if (child->node == node)
+ {
+ child->node = descendant->node;
+ cjh_text_region_node_set_parent (child->node, parent);
+
+ descendant->node = NULL;
+ descendant->length = 0;
+
+ goto compact_parent;
+ }
+ });
+
+ g_assert_not_reached ();
+ }
+
+ if (node->branch.prev == NULL && node->branch.next == NULL)
+ return;
+
+ if (SORTED_ARRAY_LENGTH (&node->branch.children) >= CJH_TEXT_REGION_MIN_BRANCHES)
+ return;
+
+ length = cjh_text_region_node_length (node);
+ cjh_text_region_subtract_from_parents (region, node, length);
+
+ /* Remove this node, we'll reparent the children with edges */
+ SORTED_ARRAY_FOREACH (&parent->branch.children, CjhTextRegionChild, child, {
+ if (child->node == node)
+ {
+ SORTED_ARRAY_FOREACH_REMOVE (&parent->branch.children);
+ goto found;
+ }
+ });
+
+ g_assert_not_reached ();
+
+found:
+ left = node->branch.prev;
+ right = node->branch.next;
+
+ if (left != NULL)
+ left->branch.next = right;
+
+ if (right != NULL)
+ right->branch.prev = left;
+
+ if (left == NULL ||
+ (right != NULL &&
+ SORTED_ARRAY_LENGTH (&left->branch.children) > SORTED_ARRAY_LENGTH (&right->branch.children)))
+ {
+ target = right;
+
+ g_assert (target->branch.prev == left);
+
+ SORTED_ARRAY_FOREACH_REVERSE (&node->branch.children, CjhTextRegionChild, child, {
+ if (SORTED_ARRAY_LENGTH (&target->branch.children) >= CJH_TEXT_REGION_MAX_BRANCHES-1)
+ {
+ cjh_text_region_add_to_parents (region, target, added);
+ added = 0;
+ cjh_text_region_branch_split (region, target);
+ g_assert (target->branch.prev == left);
+ }
+
+ cjh_text_region_node_set_parent (child->node, target);
+ added += child->length;
+ SORTED_ARRAY_PUSH_HEAD (&target->branch.children, *child);
+
+ child->node = NULL;
+ child->length = 0;
+ });
+
+ cjh_text_region_add_to_parents (region, target, added);
+ }
+ else
+ {
+ target = left;
+
+ g_assert (target->branch.next == right);
+
+ SORTED_ARRAY_FOREACH (&node->branch.children, CjhTextRegionChild, child, {
+ if (SORTED_ARRAY_LENGTH (&target->branch.children) >= CJH_TEXT_REGION_MAX_BRANCHES-1)
+ {
+ cjh_text_region_add_to_parents (region, target, added);
+ added = 0;
+ target = cjh_text_region_branch_split (region, target);
+ }
+
+ cjh_text_region_node_set_parent (child->node, target);
+ added += child->length;
+ SORTED_ARRAY_PUSH_TAIL (&target->branch.children, *child);
+
+ child->node = NULL;
+ child->length = 0;
+ });
+
+ cjh_text_region_add_to_parents (region, target, added);
+ }
+
+ DEBUG_VALIDATE (left, cjh_text_region_node_get_parent (left));
+ DEBUG_VALIDATE (right, cjh_text_region_node_get_parent (right));
+ DEBUG_VALIDATE (parent, cjh_text_region_node_get_parent (parent));
+
+compact_parent:
+ if (parent != NULL)
+ cjh_text_region_branch_compact (region, parent);
+
+ cjh_text_region_node_free (node);
+}
+
+static void
+cjh_text_region_leaf_compact (CjhTextRegion *region,
+ CjhTextRegionNode *node)
+{
+ CjhTextRegionNode *parent;
+ CjhTextRegionNode *target;
+ CjhTextRegionNode *left;
+ CjhTextRegionNode *right;
+ gsize added = 0;
+
+ g_assert (region != NULL);
+ g_assert (node != NULL);
+ g_assert (cjh_text_region_node_is_leaf (node));
+ g_assert (SORTED_ARRAY_LENGTH (&node->leaf.runs) < CJH_TEXT_REGION_MIN_RUNS);
+
+ /* Short-circuit if we are the only node */
+ if (node->leaf.prev == NULL && node->leaf.next == NULL)
+ return;
+
+ parent = cjh_text_region_node_get_parent (node);
+ left = node->leaf.prev;
+ right = node->leaf.next;
+
+ g_assert (parent != NULL);
+ g_assert (!cjh_text_region_node_is_leaf (parent));
+ g_assert (left == NULL || cjh_text_region_node_is_leaf (left));
+ g_assert (right == NULL || cjh_text_region_node_is_leaf (right));
+
+ SORTED_ARRAY_FOREACH (&parent->branch.children, CjhTextRegionChild, child, {
+ if (child->node == node)
+ {
+ cjh_text_region_subtract_from_parents (region, node, child->length);
+ g_assert (child->length == 0);
+ SORTED_ARRAY_FOREACH_REMOVE (&parent->branch.children);
+ goto found;
+ }
+ });
+
+ g_assert_not_reached ();
+
+found:
+ if (left != NULL)
+ left->leaf.next = right;
+
+ if (right != NULL)
+ right->leaf.prev = left;
+
+ node->leaf.next = NULL;
+ node->leaf.prev = NULL;
+
+ if (left == NULL ||
+ (right != NULL &&
+ SORTED_ARRAY_LENGTH (&left->leaf.runs) > SORTED_ARRAY_LENGTH (&right->leaf.runs)))
+ {
+ target = right;
+
+ g_assert (target->leaf.prev == left);
+
+ SORTED_ARRAY_FOREACH_REVERSE (&node->leaf.runs, CjhTextRegionRun, run, {
+ if (SORTED_ARRAY_LENGTH (&target->leaf.runs) >= CJH_TEXT_REGION_MAX_RUNS-1)
+ {
+ cjh_text_region_add_to_parents (region, target, added);
+ added = 0;
+ cjh_text_region_node_split (region, target);
+ g_assert (target->leaf.prev == left);
+ }
+
+ added += run->length;
+ SORTED_ARRAY_PUSH_HEAD (&target->leaf.runs, *run);
+ });
+
+ cjh_text_region_add_to_parents (region, target, added);
+ }
+ else
+ {
+ target = left;
+
+ g_assert (target->leaf.next == right);
+
+ SORTED_ARRAY_FOREACH (&node->leaf.runs, CjhTextRegionRun, run, {
+ if (SORTED_ARRAY_LENGTH (&target->leaf.runs) >= CJH_TEXT_REGION_MAX_RUNS-1)
+ {
+ cjh_text_region_add_to_parents (region, target, added);
+ added = 0;
+
+ target = cjh_text_region_node_split (region, target);
+
+ left = target;
+ }
+
+ added += run->length;
+ SORTED_ARRAY_PUSH_TAIL (&target->leaf.runs, *run);
+ });
+
+ cjh_text_region_add_to_parents (region, target, added);
+ }
+
+ DEBUG_VALIDATE (left, cjh_text_region_node_get_parent (left));
+ DEBUG_VALIDATE (right, cjh_text_region_node_get_parent (right));
+ DEBUG_VALIDATE (parent, cjh_text_region_node_get_parent (parent));
+
+ cjh_text_region_branch_compact (region, parent);
+
+ cjh_text_region_node_free (node);
+}
+
+void
+_cjh_text_region_remove (CjhTextRegion *region,
+ gsize offset,
+ gsize length)
+{
+ CjhTextRegionNode *target;
+ gsize offset_within_node;
+ gsize to_remove = length;
+ gsize calc_offset;
+ guint i;
+
+ g_assert (region != NULL);
+ g_assert (length <= region->length);
+ g_assert (offset < region->length);
+ g_assert (length <= region->length - offset);
+
+ if (length == 0)
+ return;
+
+ target = cjh_text_region_search (region, offset, &offset_within_node);
+
+ g_assert (target != NULL);
+ g_assert (cjh_text_region_node_is_leaf (target));
+ g_assert (SORTED_ARRAY_LENGTH (&target->leaf.runs) > 0);
+ g_assert (offset >= offset_within_node);
+
+ calc_offset = offset - offset_within_node;
+
+ i = 0;
+ SORTED_ARRAY_FOREACH (&target->leaf.runs, CjhTextRegionRun, run, {
+ ++i;
+
+ g_assert (to_remove > 0);
+
+ if (offset_within_node >= run->length)
+ {
+ offset_within_node -= run->length;
+ calc_offset += run->length;
+ }
+ else if (offset_within_node > 0 && to_remove >= run->length - offset_within_node)
+ {
+ CjhTextRegionRun left;
+ CjhTextRegionRun right;
+
+ left.length = offset_within_node;
+ left.data = run->data;
+ right.length = run->length - left.length;
+ right.data = run->data;
+ cjh_text_region_split (region, calc_offset, run, &left, &right);
+
+ to_remove -= right.length;
+ calc_offset += left.length;
+ offset_within_node = 0;
+
+ *run = left;
+
+ if (to_remove == 0)
+ break;
+ }
+ else if (offset_within_node > 0 && to_remove < run->length - offset_within_node)
+ {
+ CjhTextRegionRun left;
+ CjhTextRegionRun right;
+ CjhTextRegionRun right2;
+ CjhTextRegionRun center;
+
+ left.length = offset_within_node;
+ left.data = run->data;
+ right.length = run->length - left.length;
+ right.data = run->data;
+ cjh_text_region_split (region, calc_offset, run, &left, &right);
+
+ center.length = to_remove;
+ center.data = run->data;
+ right2.length = run->length - offset_within_node - to_remove;
+ right2.data = run->data;
+ cjh_text_region_split (region, calc_offset + left.length, &right, ¢er, &right2);
+
+ *run = left;
+
+ if (!join_run (region, calc_offset, run, &right2, run))
+ SORTED_ARRAY_INSERT_VAL (&target->leaf.runs, i, right2);
+
+ offset_within_node = 0;
+ to_remove = 0;
+
+ break;
+ }
+ else if (offset_within_node == 0 && to_remove < run->length)
+ {
+ CjhTextRegionRun left;
+ CjhTextRegionRun right;
+
+ left.length = to_remove;
+ left.data = run->data;
+
+ right.length = run->length - to_remove;
+ right.data = run->data;
+
+ cjh_text_region_split (region, calc_offset, run, &left, &right);
+
+ to_remove = 0;
+ offset_within_node = 0;
+
+ *run = right;
+
+ break;
+ }
+ else if (offset_within_node == 0 && to_remove >= run->length)
+ {
+ to_remove -= run->length;
+
+ SORTED_ARRAY_FOREACH_REMOVE (&target->leaf.runs);
+
+ if (to_remove == 0)
+ break;
+ }
+ else
+ {
+ g_assert_not_reached ();
+ }
+
+ g_assert (to_remove > 0);
+ });
+
+ region->length -= length - to_remove;
+ cjh_text_region_subtract_from_parents (region, target, length - to_remove);
+
+ if (SORTED_ARRAY_LENGTH (&target->leaf.runs) < CJH_TEXT_REGION_MIN_RUNS)
+ cjh_text_region_leaf_compact (region, target);
+
+ g_assert (region->length == cjh_text_region_node_length (®ion->root));
+
+ if (to_remove > 0)
+ _cjh_text_region_remove (region, offset, to_remove);
+}
+
+void
+_cjh_text_region_foreach (CjhTextRegion *region,
+ CjhTextRegionForeachFunc func,
+ gpointer user_data)
+{
+ CjhTextRegionNode *leaf;
+ gsize offset = 0;
+
+ g_return_if_fail (region != NULL);
+ g_return_if_fail (func != NULL);
+
+ for (leaf = _cjh_text_region_get_first_leaf (region);
+ leaf != NULL;
+ leaf = leaf->leaf.next)
+ {
+ g_assert (leaf->leaf.next == NULL || leaf->leaf.next->leaf.prev == leaf);
+
+ SORTED_ARRAY_FOREACH (&leaf->leaf.runs, CjhTextRegionRun, run, {
+ if (func (offset, run, user_data))
+ return;
+ offset += run->length;
+ });
+ }
+}
+
+void
+_cjh_text_region_foreach_in_range (CjhTextRegion *region,
+ gsize begin,
+ gsize end,
+ CjhTextRegionForeachFunc func,
+ gpointer user_data)
+{
+ CjhTextRegionNode *leaf;
+ gsize position;
+ gsize offset_within_node = 0;
+
+ g_return_if_fail (region != NULL);
+ g_return_if_fail (func != NULL);
+ g_return_if_fail (begin <= region->length);
+ g_return_if_fail (end <= region->length);
+ g_return_if_fail (begin <= end);
+
+ if (begin == end || begin == region->length)
+ return;
+
+ if (begin == 0)
+ leaf = _cjh_text_region_get_first_leaf (region);
+ else
+ leaf = cjh_text_region_search (region, begin, &offset_within_node);
+
+ g_assert (offset_within_node < cjh_text_region_node_length (leaf));
+
+ position = begin - offset_within_node;
+
+ while (position < end)
+ {
+ SORTED_ARRAY_FOREACH (&leaf->leaf.runs, CjhTextRegionRun, run, {
+ if (offset_within_node >= run->length)
+ {
+ offset_within_node -= run->length;
+ }
+ else
+ {
+ offset_within_node = 0;
+ if (func (position, run, user_data))
+ return;
+ }
+
+ position += run->length;
+
+ if (position >= end)
+ break;
+ });
+
+ leaf = leaf->leaf.next;
+ }
+}
diff --git a/src/cjhtextregionbtree.h b/src/cjhtextregionbtree.h
new file mode 100644
index 0000000..53609a6
--- /dev/null
+++ b/src/cjhtextregionbtree.h
@@ -0,0 +1,555 @@
+/* cjhtextregionbtree.h
+ *
+ * Copyright 2021 Christian Hergert <chergert redhat com>
+ *
+ * This file is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * This file is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+#ifndef __CJH_TEXT_REGION_BTREE_H__
+#define __CJH_TEXT_REGION_BTREE_H__
+
+#include "cjhtextregionprivate.h"
+
+G_BEGIN_DECLS
+
+/* The following set of macros are used to create a queue similar to a
+ * double-ended linked list but using integers as indexes for items within the
+ * queue. Doing so allows for inserting or removing items from a b+tree node
+ * without having to memmove() data to maintain sorting orders.
+ */
+#define VAL_QUEUE_INVALID(Node) ((glib_typeof((Node)->head))-1)
+#define VAL_QUEUE_LENGTH(Node) ((Node)->length)
+#define VAL_QUEUE_EMPTY(Node) ((Node)->head == VAL_QUEUE_INVALID(Node))
+#define VAL_QUEUE_PEEK_HEAD(Node) ((Node)->head)
+#define VAL_QUEUE_PEEK_TAIL(Node) ((Node)->tail)
+#define VAL_QUEUE_IS_VALID(Node, ID) ((ID) != VAL_QUEUE_INVALID(Node))
+#define VAL_QUEUE_NODE(Type, N_Items) \
+ struct { \
+ Type length; \
+ Type head; \
+ Type tail; \
+ struct { \
+ Type prev; \
+ Type next; \
+ } items[N_Items]; \
+ }
+#define VAL_QUEUE_INIT(Node) \
+ G_STMT_START { \
+ (Node)->length = 0; \
+ (Node)->head = VAL_QUEUE_INVALID(Node); \
+ (Node)->tail = VAL_QUEUE_INVALID(Node); \
+ for (guint _i = 0; _i < G_N_ELEMENTS ((Node)->items); _i++) \
+ { \
+ (Node)->items[_i].next = VAL_QUEUE_INVALID(Node); \
+ (Node)->items[_i].prev = VAL_QUEUE_INVALID(Node); \
+ } \
+ } G_STMT_END
+#ifndef G_DISABLE_ASSERT
+# define _VAL_QUEUE_VALIDATE(Node) \
+ G_STMT_START { \
+ glib_typeof((Node)->head) count = 0; \
+ \
+ if ((Node)->tail != VAL_QUEUE_INVALID(Node)) \
+ g_assert_cmpint((Node)->items[(Node)->tail].next, ==, VAL_QUEUE_INVALID(Node)); \
+ if ((Node)->head != VAL_QUEUE_INVALID(Node)) \
+ g_assert_cmpint((Node)->items[(Node)->head].prev , ==, VAL_QUEUE_INVALID(Node)); \
+ \
+ for (glib_typeof((Node)->head) _viter = (Node)->head; \
+ VAL_QUEUE_IS_VALID(Node, _viter); \
+ _viter = (Node)->items[_viter].next) \
+ { \
+ count++; \
+ } \
+ \
+ g_assert_cmpint(count, ==, (Node)->length); \
+ } G_STMT_END
+#else
+# define _VAL_QUEUE_VALIDATE(Node) G_STMT_START { } G_STMT_END
+#endif
+#define VAL_QUEUE_PUSH_HEAD(Node, ID) \
+ G_STMT_START { \
+ (Node)->items[ID].prev = VAL_QUEUE_INVALID(Node); \
+ (Node)->items[ID].next = (Node)->head; \
+ if (VAL_QUEUE_IS_VALID(Node, (Node)->head)) \
+ (Node)->items[(Node)->head].prev = ID; \
+ (Node)->head = ID; \
+ if (!VAL_QUEUE_IS_VALID(Node, (Node)->tail)) \
+ (Node)->tail = ID; \
+ (Node)->length++; \
+ _VAL_QUEUE_VALIDATE(Node); \
+ } G_STMT_END
+#define VAL_QUEUE_PUSH_TAIL(Node, ID) \
+ G_STMT_START { \
+ (Node)->items[ID].prev = (Node)->tail; \
+ (Node)->items[ID].next = VAL_QUEUE_INVALID(Node); \
+ if (VAL_QUEUE_IS_VALID (Node, (Node)->tail)) \
+ (Node)->items[(Node)->tail].next = ID; \
+ (Node)->tail = ID; \
+ if (!VAL_QUEUE_IS_VALID(Node, (Node)->head)) \
+ (Node)->head = ID; \
+ (Node)->length++; \
+ _VAL_QUEUE_VALIDATE(Node); \
+ } G_STMT_END
+#define VAL_QUEUE_INSERT(Node, Nth, Val) \
+ G_STMT_START { \
+ g_assert_cmpint (VAL_QUEUE_LENGTH(Node),<,G_N_ELEMENTS((Node)->items)); \
+ \
+ if ((Nth) == 0) \
+ { \
+ VAL_QUEUE_PUSH_HEAD(Node, Val); \
+ } \
+ else if ((Nth) == (Node)->length) \
+ { \
+ VAL_QUEUE_PUSH_TAIL(Node, Val); \
+ } \
+ else \
+ { \
+ glib_typeof((Node)->head) ID; \
+ glib_typeof((Node)->head) _nth; \
+ \
+ g_assert_cmpint (VAL_QUEUE_LENGTH(Node), >, 0); \
+ g_assert (VAL_QUEUE_IS_VALID(Node, (Node)->head)); \
+ g_assert (VAL_QUEUE_IS_VALID(Node, (Node)->tail)); \
+ \
+ for (ID = (Node)->head, _nth = 0; \
+ _nth < (Nth) && VAL_QUEUE_IS_VALID(Node, ID); \
+ ID = (Node)->items[ID].next, ++_nth) \
+ { /* Do Nothing */ } \
+ \
+ g_assert (VAL_QUEUE_IS_VALID(Node, ID)); \
+ g_assert (VAL_QUEUE_IS_VALID(Node, (Node)->items[ID].prev)); \
+ \
+ (Node)->items[Val].prev = (Node)->items[ID].prev; \
+ (Node)->items[Val].next = ID; \
+ (Node)->items[(Node)->items[ID].prev].next = Val; \
+ (Node)->items[ID].prev = Val; \
+ \
+ (Node)->length++; \
+ \
+ _VAL_QUEUE_VALIDATE(Node); \
+ } \
+ } G_STMT_END
+#define VAL_QUEUE_POP_HEAD(Node,_pos) VAL_QUEUE_POP_NTH((Node), 0, _pos)
+#define VAL_QUEUE_POP_TAIL(Node,_pos) VAL_QUEUE_POP_NTH((Node), (Node)->length - 1, _pos)
+#define VAL_QUEUE_POP_AT(Node, _pos) \
+ G_STMT_START { \
+ g_assert (_pos != VAL_QUEUE_INVALID(Node)); \
+ g_assert (_pos < G_N_ELEMENTS ((Node)->items)); \
+ \
+ if ((Node)->items[_pos].prev != VAL_QUEUE_INVALID(Node)) \
+ (Node)->items[(Node)->items[_pos].prev].next = (Node)->items[_pos].next; \
+ if ((Node)->items[_pos].next != VAL_QUEUE_INVALID(Node)) \
+ (Node)->items[(Node)->items[_pos].next].prev = (Node)->items[_pos].prev; \
+ if ((Node)->head == _pos) \
+ (Node)->head = (Node)->items[_pos].next; \
+ if ((Node)->tail == _pos) \
+ (Node)->tail = (Node)->items[_pos].prev; \
+ \
+ (Node)->items[_pos].prev = VAL_QUEUE_INVALID((Node)); \
+ (Node)->items[_pos].next = VAL_QUEUE_INVALID((Node)); \
+ \
+ (Node)->length--; \
+ \
+ _VAL_QUEUE_VALIDATE(Node); \
+ } G_STMT_END
+#define VAL_QUEUE_POP_NTH(Node, Nth, _pos) \
+ G_STMT_START { \
+ _pos = VAL_QUEUE_INVALID(Node); \
+ \
+ if (Nth == 0) \
+ _pos = (Node)->head; \
+ else if (Nth >= (((Node)->length) - 1)) \
+ _pos = (Node)->tail; \
+ else \
+ VAL_QUEUE_NTH (Node, Nth, _pos); \
+ \
+ if (_pos != VAL_QUEUE_INVALID(Node)) \
+ VAL_QUEUE_POP_AT (Node, _pos); \
+ } G_STMT_END
+#define VAL_QUEUE_NTH(Node, Nth, _iter) \
+ G_STMT_START { \
+ glib_typeof((Node)->head) _nth; \
+ if (Nth == 0) \
+ _iter = (Node)->head; \
+ else if (Nth >= (((Node)->length) - 1)) \
+ _iter = (Node)->tail; \
+ else \
+ { \
+ for (_iter = (Node)->head, _nth = 0; \
+ _nth < (Nth); \
+ _iter = (Node)->items[_iter].next, ++_nth) \
+ { /* Do Nothing */ } \
+ } \
+ } G_STMT_END
+#define _VAL_QUEUE_MOVE(Node, Old, New) \
+ G_STMT_START { \
+ (Node)->items[New] = (Node)->items[Old]; \
+ if ((Node)->items[New].prev != VAL_QUEUE_INVALID(Node)) \
+ (Node)->items[(Node)->items[New].prev].next = New; \
+ if ((Node)->items[New].next != VAL_QUEUE_INVALID(Node)) \
+ (Node)->items[(Node)->items[New].next].prev = New; \
+ if ((Node)->head == Old) \
+ (Node)->head = New; \
+ if ((Node)->tail == Old) \
+ (Node)->tail = New; \
+ } G_STMT_END
+/*
+ * SORTED_ARRAY_FIELD:
+ * @TYPE: The type of the structure used by elements in the array
+ * @N_ITEMS: The maximum number of items in the array
+ *
+ * This creates a new inline structure that can be embedded within
+ * other super-structures.
+ *
+ * @N_ITEMS must be <= 254 or this macro will fail.
+ */
+#define SORTED_ARRAY_FIELD(TYPE,N_ITEMS) \
+ struct { \
+ TYPE items[N_ITEMS]; \
+ VAL_QUEUE_NODE(guint8, N_ITEMS) q; \
+ }
+/*
+ * SORTED_ARRAY_INIT:
+ * @FIELD: A pointer to a SortedArray
+ *
+ * This will initialize a node that has been previously registered
+ * using %SORTED_ARRAY_FIELD(). You must call this macro before
+ * using the SortedArray structure.
+ */
+#define SORTED_ARRAY_INIT(FIELD) \
+ G_STMT_START { \
+ G_STATIC_ASSERT (G_N_ELEMENTS((FIELD)->items) < 255); \
+ VAL_QUEUE_INIT(&(FIELD)->q); \
+ } G_STMT_END
+/*
+ * SORTED_ARRAY_LENGTH:
+ * @FIELD: A pointer to the SortedArray field.
+ *
+ * This macro will evaluate to the number of items inserted into
+ * the SortedArray.
+ */
+#define SORTED_ARRAY_LENGTH(FIELD) (VAL_QUEUE_LENGTH(&(FIELD)->q))
+/*
+ * SORTED_ARRAY_CAPACITY:
+ * @FIELD: A pointer to the SortedArray field.
+ *
+ * This macro will evaluate to the number of elements in the SortedArray.
+ * This is dependent on how the SortedArray was instantiated using
+ * the %SORTED_ARRAY_FIELD() macro.
+ */
+#define SORTED_ARRAY_CAPACITY(FIELD) (G_N_ELEMENTS((FIELD)->items))
+/*
+ * SORTED_ARRAY_IS_FULL:
+ * @FIELD: A pointer to the SortedArray field.
+ *
+ * This macro will evaluate to 1 if the SortedArray is at capacity.
+ * Otherwise, the macro will evaluate to 0.
+ */
+#define SORTED_ARRAY_IS_FULL(FIELD) (SORTED_ARRAY_LENGTH(FIELD) == SORTED_ARRAY_CAPACITY(FIELD))
+/*
+ * SORTED_ARRAY_IS_EMPTY:
+ * @FIELD: A SortedArray field
+ *
+ * This macro will evaluate to 1 if the SortedArray contains zero children.
+ */
+#define SORTED_ARRAY_IS_EMPTY(FIELD) (SORTED_ARRAY_LENGTH(FIELD) == 0)
+/*
+ * SORTED_ARRAY_INSERT_VAL:
+ * @FIELD: A pointer to a SortedArray field.
+ * @POSITION: the logical position at which to insert
+ * @ELEMENT: The element to insert
+ *
+ * This will insert a new item into the array. It is invalid API use
+ * to call this function while the SortedArray is at capacity. Check
+ * SORTED_ARRAY_IS_FULL() before using this function to be certain.
+ */
+#define SORTED_ARRAY_INSERT_VAL(FIELD,POSITION,ELEMENT) \
+ G_STMT_START { \
+ guint8 _pos; \
+ \
+ g_assert (POSITION <= SORTED_ARRAY_LENGTH(FIELD)); \
+ \
+ _pos = VAL_QUEUE_LENGTH(&(FIELD)->q); \
+ g_assert (_pos != VAL_QUEUE_INVALID(&(FIELD)->q)); \
+ (FIELD)->items[_pos] = ELEMENT; \
+ VAL_QUEUE_INSERT(&(FIELD)->q, POSITION, _pos); \
+ } G_STMT_END
+#define SORTED_ARRAY_REMOVE_INDEX(FIELD,POSITION,_ele) \
+ G_STMT_START { \
+ guint8 _pos; \
+ guint8 _len; \
+ \
+ VAL_QUEUE_POP_NTH(&(FIELD)->q, POSITION, _pos); \
+ _ele = (FIELD)->items[_pos]; \
+ _len = VAL_QUEUE_LENGTH(&(FIELD)->q); \
+ \
+ /* We must preserve our invariant of having no empty gaps \
+ * in the array so that se can place new items always at the \
+ * end (to avoid scanning for an empty spot). \
+ * Therefore we move our tail item into the removed slot and \
+ * adjust the iqueue positions (which are all O(1). \
+ */ \
+ \
+ if (_pos < _len) \
+ { \
+ (FIELD)->items[_pos] = (FIELD)->items[_len]; \
+ _VAL_QUEUE_MOVE(&(FIELD)->q, _len, _pos); \
+ } \
+ } G_STMT_END
+/* SORTED_ARRAY_FOREACH_REMOVE:
+ *
+ * This a form of SORTED_ARRAY_REMOVE_INDEX but to be used when you
+ * are within a SORTED_ARRAY_FOREACH() to avoid extra scanning.
+ */
+#define SORTED_ARRAY_FOREACH_REMOVE(FIELD) \
+ G_STMT_START { \
+ guint8 _pos = _current; \
+ guint8 _len = VAL_QUEUE_LENGTH(&(FIELD)->q); \
+ \
+ g_assert (_len > 0); \
+ g_assert (_pos < _len); \
+ VAL_QUEUE_POP_AT(&(FIELD)->q, _pos); \
+ g_assert (VAL_QUEUE_LENGTH(&(FIELD)->q) == _len-1); \
+ _len--; \
+ \
+ /* We must preserve our invariant of having no empty gaps \
+ * in the array so that se can place new items always at the \
+ * end (to avoid scanning for an empty spot). \
+ * Therefore we move our tail item into the removed slot and \
+ * adjust the iqueue positions (which are all O(1). \
+ */ \
+ \
+ if (_pos < _len) \
+ { \
+ (FIELD)->items[_pos] = (FIELD)->items[_len]; \
+ _VAL_QUEUE_MOVE(&(FIELD)->q, _len, _pos); \
+ \
+ /* We might need to change the iter if next position moved */ \
+ if (_aiter == _len) \
+ _aiter = _pos; \
+ } \
+ \
+ } G_STMT_END
+/*
+ * SORTED_ARRAY_FOREACH:
+ * @FIELD: A pointer to a SortedArray
+ * @Element: The type of the elements in @FIELD
+ * @Name: the name for a pointer of type @Element
+ * @LABlock: a {} tyle block to execute for each item. You may use
+ * "break" to exit the foreach.
+ *
+ * Calls @Block for every element stored in @FIELD. A pointer to
+ * each element will be provided as a variable named @Name.
+ */
+#define SORTED_ARRAY_FOREACH(FIELD, Element, Name, LABlock) \
+ G_STMT_START { \
+ for (glib_typeof((FIELD)->q.head) _aiter = (FIELD)->q.head; \
+ _aiter != VAL_QUEUE_INVALID(&(FIELD)->q); \
+ /* Do Nothing */) \
+ { \
+ G_GNUC_UNUSED glib_typeof((FIELD)->q.head) _current = _aiter; \
+ Element * Name = &(FIELD)->items[_aiter]; \
+ _aiter = (FIELD)->q.items[_aiter].next; \
+ LABlock \
+ } \
+ } G_STMT_END
+#define SORTED_ARRAY_FOREACH_REVERSE(FIELD, Element, Name, LABlock) \
+ G_STMT_START { \
+ for (glib_typeof((FIELD)->q.head) _aiter = (FIELD)->q.tail; \
+ _aiter != VAL_QUEUE_INVALID(&(FIELD)->q); \
+ /* Do Nothing */) \
+ { \
+ G_GNUC_UNUSED glib_typeof((FIELD)->q.head) _current = _aiter; \
+ Element * Name = &(FIELD)->items[_aiter]; \
+ _aiter = (FIELD)->q.items[_aiter].prev; \
+ LABlock \
+ } \
+ } G_STMT_END
+#define SORTED_ARRAY_FOREACH_PEEK(FIELD) \
+ (((FIELD)->q.items[_current].next != VAL_QUEUE_INVALID(&(FIELD)->q)) \
+ ? &(FIELD)->items[(FIELD)->q.items[_current].next] : NULL)
+#define SORTED_ARRAY_SPLIT(FIELD, SPLIT) \
+ G_STMT_START { \
+ guint8 _mid; \
+ \
+ SORTED_ARRAY_INIT(SPLIT); \
+ \
+ _mid = SORTED_ARRAY_LENGTH(FIELD) / 2; \
+ \
+ for (guint8 _z = 0; _z < _mid; _z++) \
+ { \
+ glib_typeof((FIELD)->items[0]) ele; \
+ SORTED_ARRAY_POP_TAIL(FIELD, ele); \
+ SORTED_ARRAY_PUSH_HEAD(SPLIT, ele); \
+ } \
+ } G_STMT_END
+#define SORTED_ARRAY_SPLIT2(FIELD, LEFT, RIGHT) \
+ G_STMT_START { \
+ guint8 mid; \
+ \
+ SORTED_ARRAY_INIT(LEFT); \
+ SORTED_ARRAY_INIT(RIGHT); \
+ \
+ mid = SORTED_ARRAY_LENGTH(FIELD) / 2; \
+ \
+ for (guint8 i = 0; i < mid; i++) \
+ { \
+ glib_typeof((FIELD)->items[0]) ele; \
+ SORTED_ARRAY_POP_TAIL(FIELD, ele); \
+ SORTED_ARRAY_PUSH_HEAD(RIGHT, ele); \
+ } \
+ \
+ while (!SORTED_ARRAY_IS_EMPTY(FIELD)) \
+ { \
+ glib_typeof((FIELD)->items[0]) ele; \
+ SORTED_ARRAY_POP_TAIL(FIELD, ele); \
+ SORTED_ARRAY_PUSH_HEAD(LEFT, ele); \
+ } \
+ } G_STMT_END
+#define SORTED_ARRAY_PEEK_HEAD(FIELD) ((FIELD)->items[VAL_QUEUE_PEEK_HEAD(&(FIELD)->q)])
+#define SORTED_ARRAY_POP_HEAD(FIELD,_ele) SORTED_ARRAY_REMOVE_INDEX(FIELD, 0, _ele)
+#define SORTED_ARRAY_POP_TAIL(FIELD,_ele) SORTED_ARRAY_REMOVE_INDEX(FIELD, SORTED_ARRAY_LENGTH(FIELD)-1,
_ele)
+#define SORTED_ARRAY_PUSH_HEAD(FIELD, ele) \
+ G_STMT_START { \
+ guint8 _pos = VAL_QUEUE_LENGTH(&(FIELD)->q); \
+ g_assert_cmpint (_pos, <, G_N_ELEMENTS ((FIELD)->items)); \
+ (FIELD)->items[_pos] = ele; \
+ VAL_QUEUE_PUSH_HEAD(&(FIELD)->q, _pos); \
+ } G_STMT_END
+#define SORTED_ARRAY_PUSH_TAIL(FIELD, ele) \
+ G_STMT_START { \
+ guint8 _pos = VAL_QUEUE_LENGTH(&(FIELD)->q); \
+ g_assert_cmpint (_pos, <, G_N_ELEMENTS ((FIELD)->items)); \
+ (FIELD)->items[_pos] = ele; \
+ VAL_QUEUE_PUSH_TAIL(&(FIELD)->q, _pos); \
+ } G_STMT_END
+
+#define CJH_TEXT_REGION_MAX_BRANCHES 26
+#define CJH_TEXT_REGION_MIN_BRANCHES (CJH_TEXT_REGION_MAX_BRANCHES/3)
+#define CJH_TEXT_REGION_MAX_RUNS 26
+#define CJH_TEXT_REGION_MIN_RUNS (CJH_TEXT_REGION_MAX_RUNS/3)
+
+typedef union _CjhTextRegionNode CjhTextRegionNode;
+typedef struct _CjhTextRegionBranch CjhTextRegionBranch;
+typedef struct _CjhTextRegionLeaf CjhTextRegionLeaf;
+typedef struct _CjhTextRegionChild CjhTextRegionChild;
+
+struct _CjhTextRegionChild
+{
+ CjhTextRegionNode *node;
+ gsize length;
+};
+
+struct _CjhTextRegionBranch
+{
+ CjhTextRegionNode *tagged_parent;
+ CjhTextRegionNode *prev;
+ CjhTextRegionNode *next;
+ SORTED_ARRAY_FIELD (CjhTextRegionChild, CJH_TEXT_REGION_MAX_BRANCHES) children;
+};
+
+struct _CjhTextRegionLeaf
+{
+ CjhTextRegionNode *tagged_parent;
+ CjhTextRegionNode *prev;
+ CjhTextRegionNode *next;
+ SORTED_ARRAY_FIELD (CjhTextRegionRun, CJH_TEXT_REGION_MAX_RUNS) runs;
+};
+
+union _CjhTextRegionNode
+{
+ /* pointer to the parent, low bit 0x1 means leaf node */
+ CjhTextRegionNode *tagged_parent;
+ struct _CjhTextRegionLeaf leaf;
+ struct _CjhTextRegionBranch branch;
+};
+
+struct _CjhTextRegion
+{
+ CjhTextRegionNode root;
+ CjhTextRegionJoinFunc join_func;
+ CjhTextRegionSplitFunc split_func;
+ gsize length;
+ CjhTextRegionNode *cached_result;
+ gsize cached_result_offset;
+};
+
+#define TAG(ptr,val) GSIZE_TO_POINTER(GPOINTER_TO_SIZE(ptr)|(gsize)val)
+#define UNTAG(ptr) GSIZE_TO_POINTER(GPOINTER_TO_SIZE(ptr) & ~(gsize)1)
+
+static inline CjhTextRegionNode *
+cjh_text_region_node_get_parent (CjhTextRegionNode *node)
+{
+ if (node == NULL)
+ return NULL;
+ return UNTAG (node->tagged_parent);
+}
+
+static inline gboolean
+cjh_text_region_node_is_leaf (CjhTextRegionNode *node)
+{
+ CjhTextRegionNode *parent = cjh_text_region_node_get_parent (node);
+
+ return parent != NULL && node->tagged_parent != parent;
+}
+
+static inline void
+cjh_text_region_node_set_parent (CjhTextRegionNode *node,
+ CjhTextRegionNode *parent)
+{
+ node->tagged_parent = TAG (parent, cjh_text_region_node_is_leaf (node));
+}
+
+static inline gsize
+cjh_text_region_node_length (CjhTextRegionNode *node)
+{
+ gsize length = 0;
+
+ g_assert (node != NULL);
+
+ if (cjh_text_region_node_is_leaf (node))
+ {
+ SORTED_ARRAY_FOREACH (&node->leaf.runs, CjhTextRegionRun, run, {
+ length += run->length;
+ });
+ }
+ else
+ {
+ SORTED_ARRAY_FOREACH (&node->branch.children, CjhTextRegionChild, child, {
+ length += child->length;
+ });
+ }
+
+ return length;
+}
+
+static inline CjhTextRegionNode *
+_cjh_text_region_get_first_leaf (CjhTextRegion *self)
+{
+ for (CjhTextRegionNode *iter = &self->root;
+ iter;
+ iter = SORTED_ARRAY_PEEK_HEAD (&iter->branch.children).node)
+ {
+ if (cjh_text_region_node_is_leaf (iter))
+ return iter;
+ }
+
+ g_assert_not_reached ();
+}
+
+G_END_DECLS
+
+#endif /* __CJH_TEXT_REGION_BTREE_H__ */
diff --git a/src/cjhtextregionprivate.h b/src/cjhtextregionprivate.h
new file mode 100644
index 0000000..d1f16bf
--- /dev/null
+++ b/src/cjhtextregionprivate.h
@@ -0,0 +1,121 @@
+/* cjhtextregionprivate.h
+ *
+ * Copyright 2021 Christian Hergert <chergert redhat com>
+ *
+ * This file is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * This file is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+#ifndef __CJH_TEXT_REGION_PRIVATE_H__
+#define __CJH_TEXT_REGION_PRIVATE_H__
+
+#include <glib.h>
+
+G_BEGIN_DECLS
+
+typedef struct _CjhTextRegion CjhTextRegion;
+
+typedef struct _CjhTextRegionRun
+{
+ gsize length;
+ gpointer data;
+} CjhTextRegionRun;
+
+/*
+ * CjhTextRegionForeachFunc:
+ * @offset: the offset in characters within the text region
+ * @run: the run of text and data pointer
+ * @user_data: user data supplied
+ *
+ * Function callback to iterate through runs within a text region.
+ *
+ * Returns: %FALSE to coninue iteration, otherwise %TRUE to stop.
+ */
+typedef gboolean (*CjhTextRegionForeachFunc) (gsize offset,
+ const CjhTextRegionRun *run,
+ gpointer user_data);
+
+/*
+ * CjhTextRegionJoinFunc:
+ *
+ * This callback is used to determine if two runs can be joined together.
+ * This is useful when you have similar data pointers between two runs
+ * and seeing them as one run is irrelevant to the code using the
+ * text region.
+ *
+ * The default calllback for joining will return %FALSE so that no joins
+ * may occur.
+ *
+ * Returns: %TRUE if the runs can be joined; otherwise %FALSE
+ */
+typedef gboolean (*CjhTextRegionJoinFunc) (gsize offset,
+ const CjhTextRegionRun *left,
+ const CjhTextRegionRun *right);
+
+/*
+ * CjhTextRegionSplitFunc:
+ *
+ * This function is responsible for splitting a run into two runs.
+ * This can happen a delete happens in the middle of a run.
+ *
+ * By default, @left will contain the run prior to the delete, and
+ * @right will contain the run after the delete.
+ *
+ * You can use the run lengths to determine where the delete was made
+ * using @offset which is an absolute offset from the beginning of the
+ * region.
+ *
+ * If you would like to keep a single run after the deletion, then
+ * set @right to contain a length of zero and add it's previous
+ * length to @left.
+ *
+ * All the length in @left and @right must be accounted for.
+ *
+ * This function is useful when using CjhTextRegion as a piecetable
+ * where you want to adjust the data pointer to point at a new
+ * section of an original or change buffer.
+ */
+typedef void (*CjhTextRegionSplitFunc) (gsize offset,
+ const CjhTextRegionRun *run,
+ CjhTextRegionRun *left,
+ CjhTextRegionRun *right);
+
+CjhTextRegion *_cjh_text_region_new (CjhTextRegionJoinFunc join_func,
+ CjhTextRegionSplitFunc split_func);
+void _cjh_text_region_insert (CjhTextRegion *region,
+ gsize offset,
+ gsize length,
+ gpointer data);
+void _cjh_text_region_replace (CjhTextRegion *region,
+ gsize offset,
+ gsize length,
+ gpointer data);
+void _cjh_text_region_remove (CjhTextRegion *region,
+ gsize offset,
+ gsize length);
+guint _cjh_text_region_get_length (CjhTextRegion *region);
+void _cjh_text_region_foreach (CjhTextRegion *region,
+ CjhTextRegionForeachFunc func,
+ gpointer user_data);
+void _cjh_text_region_foreach_in_range (CjhTextRegion *region,
+ gsize begin,
+ gsize end,
+ CjhTextRegionForeachFunc func,
+ gpointer user_data);
+void _cjh_text_region_free (CjhTextRegion *region);
+
+G_END_DECLS
+
+#endif /* __CJH_TEXT_REGION_PRIVATE_H__ */
diff --git a/src/meson.build b/src/meson.build
index b047762..3ba3182 100644
--- a/src/meson.build
+++ b/src/meson.build
@@ -1,4 +1,5 @@
editor_sources = [
+ 'cjhtextregion.c',
'editor-animation.c',
'editor-application.c',
'editor-application-actions.c',
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]