[gtksourceview/wip/chergert/backport-fixes-for-pathological-data: 1/2] bufferoutputstream: improve fallback insertion performance
- From: Christian Hergert <chergert src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gtksourceview/wip/chergert/backport-fixes-for-pathological-data: 1/2] bufferoutputstream: improve fallback insertion performance
- Date: Wed, 3 Aug 2022 05:16:59 +0000 (UTC)
commit 8580818311446ecb2e5e6b783ae4c2f0d3dbda0b
Author: Christian Hergert <chergert redhat com>
Date: Mon Aug 1 14:48:12 2022 -0700
bufferoutputstream: improve fallback insertion performance
There are a number of cases where users accidentally open binary files and
we spend an inordimate amount of time converting that to hex character
fallbacks, one character at a time.
This tries to rectify the situation a bit by coalescing invalid characters
together. Particularly useful for those ranges of zeros opened in data
files.
GtkTextView itself still struggles with long paragraphs, but this at least
improves the loading time from the equation.
Fixes #284
gtksourceview/gtksourcebufferoutputstream.c | 102 ++++++++++++++++++++++------
1 file changed, 82 insertions(+), 20 deletions(-)
---
diff --git a/gtksourceview/gtksourcebufferoutputstream.c b/gtksourceview/gtksourcebufferoutputstream.c
index 77a2a113..44d6f7fa 100644
--- a/gtksourceview/gtksourcebufferoutputstream.c
+++ b/gtksourceview/gtksourcebufferoutputstream.c
@@ -566,32 +566,71 @@ apply_error_tag (GtkSourceBufferOutputStream *stream)
stream->priv->error_offset = -1;
}
+static const char *hex_fallback[] = {
+ "\\00", "\\01", "\\02", "\\03", "\\04", "\\05", "\\06", "\\07", "\\08", "\\09",
+ "\\0A", "\\0B", "\\0C", "\\0D", "\\0E", "\\0F", "\\10", "\\11", "\\12", "\\13",
+ "\\14", "\\15", "\\16", "\\17", "\\18", "\\19", "\\1A", "\\1B", "\\1C", "\\1D",
+ "\\1E", "\\1F", "\\20", "\\21", "\\22", "\\23", "\\24", "\\25", "\\26", "\\27",
+ "\\28", "\\29", "\\2A", "\\2B", "\\2C", "\\2D", "\\2E", "\\2F", "\\30", "\\31",
+ "\\32", "\\33", "\\34", "\\35", "\\36", "\\37", "\\38", "\\39", "\\3A", "\\3B",
+ "\\3C", "\\3D", "\\3E", "\\3F", "\\40", "\\41", "\\42", "\\43", "\\44", "\\45",
+ "\\46", "\\47", "\\48", "\\49", "\\4A", "\\4B", "\\4C", "\\4D", "\\4E", "\\4F",
+ "\\50", "\\51", "\\52", "\\53", "\\54", "\\55", "\\56", "\\57", "\\58", "\\59",
+ "\\5A", "\\5B", "\\5C", "\\5D", "\\5E", "\\5F", "\\60", "\\61", "\\62", "\\63",
+ "\\64", "\\65", "\\66", "\\67", "\\68", "\\69", "\\6A", "\\6B", "\\6C", "\\6D",
+ "\\6E", "\\6F", "\\70", "\\71", "\\72", "\\73", "\\74", "\\75", "\\76", "\\77",
+ "\\78", "\\79", "\\7A", "\\7B", "\\7C", "\\7D", "\\7E", "\\7F", "\\80", "\\81",
+ "\\82", "\\83", "\\84", "\\85", "\\86", "\\87", "\\88", "\\89", "\\8A", "\\8B",
+ "\\8C", "\\8D", "\\8E", "\\8F", "\\90", "\\91", "\\92", "\\93", "\\94", "\\95",
+ "\\96", "\\97", "\\98", "\\99", "\\9A", "\\9B", "\\9C", "\\9D", "\\9E", "\\9F",
+ "\\A0", "\\A1", "\\A2", "\\A3", "\\A4", "\\A5", "\\A6", "\\A7", "\\A8", "\\A9",
+ "\\AA", "\\AB", "\\AC", "\\AD", "\\AE", "\\AF", "\\B0", "\\B1", "\\B2", "\\B3",
+ "\\B4", "\\B5", "\\B6", "\\B7", "\\B8", "\\B9", "\\BA", "\\BB", "\\BC", "\\BD",
+ "\\BE", "\\BF", "\\C0", "\\C1", "\\C2", "\\C3", "\\C4", "\\C5", "\\C6", "\\C7",
+ "\\C8", "\\C9", "\\CA", "\\CB", "\\CC", "\\CD", "\\CE", "\\CF", "\\D0", "\\D1",
+ "\\D2", "\\D3", "\\D4", "\\D5", "\\D6", "\\D7", "\\D8", "\\D9", "\\DA", "\\DB",
+ "\\DC", "\\DD", "\\DE", "\\DF", "\\E0", "\\E1", "\\E2", "\\E3", "\\E4", "\\E5",
+ "\\E6", "\\E7", "\\E8", "\\E9", "\\EA", "\\EB", "\\EC", "\\ED", "\\EE", "\\EF",
+ "\\F0", "\\F1", "\\F2", "\\F3", "\\F4", "\\F5", "\\F6", "\\F7", "\\F8", "\\F9",
+ "\\FA", "\\FB", "\\FC", "\\FD", "\\FE", "\\FF",
+
+};
+
static void
insert_fallback (GtkSourceBufferOutputStream *stream,
- const gchar *buffer)
+ const char *buffer,
+ gsize count)
{
- guint8 out[4];
- guint8 v;
- const gchar hex[] = "0123456789ABCDEF";
+ g_assert (count > 0);
if (stream->priv->source_buffer == NULL)
{
return;
}
- /* If we are here it is because we are pointing to an invalid char so we
- * substitute it by an hex value.
- */
- v = *(guint8 *)buffer;
- out[0] = '\\';
- out[1] = hex[(v & 0xf0) >> 4];
- out[2] = hex[(v & 0x0f) >> 0];
- out[3] = '\0';
+ if (count > 1)
+ {
+ GString *str = g_string_new (NULL);
+
+ for (gsize i = 0; i < count; i++)
+ {
+ guint8 c = ((const guint8 *)buffer)[i];
+ g_string_append_len (str, hex_fallback[c], 3);
+ }
- gtk_text_buffer_insert (GTK_TEXT_BUFFER (stream->priv->source_buffer),
- &stream->priv->pos, (const gchar *)out, 3);
+ gtk_text_buffer_insert (GTK_TEXT_BUFFER (stream->priv->source_buffer),
+ &stream->priv->pos, str->str, str->len);
- ++stream->priv->n_fallback_errors;
+ g_string_free (str, TRUE);
+ }
+ else
+ {
+ guint8 c = ((const guint8 *)buffer)[0];
+ gtk_text_buffer_insert (GTK_TEXT_BUFFER (stream->priv->source_buffer),
+ &stream->priv->pos, hex_fallback[c], 3);
+ }
+
+ stream->priv->n_fallback_errors += count;
}
static void
@@ -619,6 +658,7 @@ validate_and_insert (GtkSourceBufferOutputStream *stream,
const gchar *end;
gboolean valid;
gsize nvalid;
+ gsize invalseq;
/* validate */
valid = g_utf8_validate (buffer, len, &end);
@@ -713,9 +753,31 @@ validate_and_insert (GtkSourceBufferOutputStream *stream,
stream->priv->error_offset = gtk_text_iter_get_offset (&stream->priv->pos);
}
- insert_fallback (stream, buffer);
- ++buffer;
- --len;
+ /* We failed hard if we got no characters valid. Try to scan ahead
+ * a bit and see where that stops so that we can insert them as
+ * a group instead of individually. Often, we have large sequences
+ * of invalid characters and this improves load time dramatically.
+ */
+ invalseq = 1;
+ if (!valid && nvalid == 0)
+ {
+ while (invalseq < len)
+ {
+ if (!g_utf8_validate (&buffer[invalseq], len - invalseq, NULL))
+ {
+ invalseq++;
+ }
+ else
+ {
+ break;
+ }
+ }
+ }
+
+ insert_fallback (stream, buffer, invalseq);
+
+ buffer += invalseq;
+ len -= invalseq;
}
g_free (free_text);
@@ -1108,7 +1170,7 @@ gtk_source_buffer_output_stream_flush (GOutputStream *stream,
text = ostream->priv->buffer;
while (ostream->priv->buflen != 0)
{
- insert_fallback (ostream, text);
+ insert_fallback (ostream, text, 1);
++text;
--ostream->priv->buflen;
}
@@ -1146,7 +1208,7 @@ gtk_source_buffer_output_stream_flush (GOutputStream *stream,
text = ostream->priv->iconv_buffer;
while (ostream->priv->iconv_buflen != 0)
{
- insert_fallback (ostream, text);
+ insert_fallback (ostream, text, 1);
++text;
--ostream->priv->iconv_buflen;
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]