[glib/wip/alexl/gmarkup-record: 1/2] gmarkup: Add record and playback
- From: Alexander Larsson <alexl src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [glib/wip/alexl/gmarkup-record: 1/2] gmarkup: Add record and playback
- Date: Wed, 12 Sep 2018 11:54:42 +0000 (UTC)
commit a50ccb3c6c2f15aed2f189702b8faa49b96f9073
Author: Alexander Larsson <alexl redhat com>
Date: Sat Sep 8 23:58:09 2018 +0200
gmarkup: Add record and playback
g_markup_parse_context_record() parses an xml file, and on success
it returns a GBytes with binary data which essentially records
the callouts to GMarkupParser in a binary format.
You can then call g_markup_parse_context_replay() to replay
the parsing into any GMarkupParser (any number of times).
The binary format is more compact than xml, especially if
stings are repeated (because they are only stored once in the
file), and replay is faster than parsing.
For example, gtkprintunixdialog.ui is 70k, and the binary format
is 17kb, and parsing it is 8.75 times faster (goes from 0.32 msec
to 0.037 msec).
The main use for this is to pre-parse the ui files that gtk use
for template-based widgets, because these are parsed each time
you instantiate a widget. Ideally we should auto-convert the
xml when we put it in GResources, and use this automatically.
glib/gmarkup.c | 584 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
glib/gmarkup.h | 12 ++
2 files changed, 596 insertions(+)
---
diff --git a/glib/gmarkup.c b/glib/gmarkup.c
index de6607a0e..f7a9536ae 100644
--- a/glib/gmarkup.c
+++ b/glib/gmarkup.c
@@ -35,6 +35,7 @@
#include "gtestutils.h"
#include "glibintl.h"
#include "gthread.h"
+#include "ghash.h"
/**
* SECTION:markup
@@ -2896,3 +2897,586 @@ failure:
return FALSE;
}
+
+typedef enum
+{
+ RECORD_TYPE_ELEMENT,
+ RECORD_TYPE_END_ELEMENT,
+ RECORD_TYPE_TEXT,
+ RECORD_TYPE_PASSTHROUGH,
+} RecordTreeType;
+
+/* All strings are owned by the string table */
+typedef struct RecordDataTree RecordDataTree;
+
+struct RecordDataTree {
+ RecordDataTree *parent;
+ RecordTreeType type;
+ const char *data;
+ const char **attributes;
+ const char **values;
+ GList *children;
+};
+
+typedef struct {
+ char *string;
+ int count;
+ int offset;
+} RecordDataString;
+
+
+static RecordDataTree *
+record_data_tree_new (RecordDataTree *parent, RecordTreeType type, const char *data)
+{
+ RecordDataTree *tree = g_slice_new0 (RecordDataTree);
+ tree->parent = parent;
+ tree->type = type;
+ tree->data = data;
+
+ if (parent)
+ parent->children = g_list_prepend (parent->children, tree);
+
+ return tree;
+}
+
+static void
+record_data_tree_free (RecordDataTree *tree)
+{
+ g_list_free_full (tree->children, (GDestroyNotify)record_data_tree_free);
+ g_free (tree->attributes);
+ g_free (tree->values);
+ g_slice_free (RecordDataTree, tree);
+}
+
+static void
+record_data_string_free (RecordDataString *s)
+{
+ g_free (s->string);
+ g_slice_free (RecordDataString, s);
+}
+
+static const char *
+record_data_string_lookup (GHashTable *strings, const char *str, gssize len)
+{
+ char *copy = NULL;
+ RecordDataString *s;
+
+ if (len >= 0)
+ {
+ /* Ensure str is zero terminated */
+ copy = g_strndup (str, len);
+ str = copy;
+ }
+
+ s = g_hash_table_lookup (strings, str);
+ if (s)
+ {
+ g_free (copy);
+ s->count++;
+ return s->string;
+ }
+
+ s = g_slice_new (RecordDataString);
+ s->string = copy ? copy : g_strdup (str);
+ s->count = 1;
+
+ g_hash_table_insert (strings, s->string, s);
+ return s->string;
+}
+
+typedef struct {
+ GHashTable *strings;
+ RecordDataTree *root;
+ RecordDataTree *current;
+} RecordData;
+
+static void
+record_start_element (GMarkupParseContext *context,
+ const gchar *element_name,
+ const gchar **names,
+ const gchar **values,
+ gpointer user_data,
+ GError **error)
+{
+ gsize n_attrs = g_strv_length ((char **)names);
+ RecordData *data = user_data;
+ RecordDataTree *child;
+ int i;
+
+ child = record_data_tree_new (data->current, RECORD_TYPE_ELEMENT,
+ record_data_string_lookup (data->strings, element_name, -1));
+ data->current = child;
+
+ child->attributes = g_new (const char *, n_attrs + 1);
+ child->values = g_new (const char *, n_attrs + 1);
+
+ for (i = 0; i < n_attrs; i++)
+ {
+ child->attributes[i] = record_data_string_lookup (data->strings, names[i], -1);
+ child->values[i] = record_data_string_lookup (data->strings, values[i], -1);
+ }
+
+ child->attributes[i] = NULL;
+ child->values[i] = NULL;
+}
+
+static void
+record_end_element (GMarkupParseContext *context,
+ const gchar *element_name,
+ gpointer user_data,
+ GError **error)
+{
+ RecordData *data = user_data;
+
+ data->current = data->current->parent;
+}
+
+static void
+record_text (GMarkupParseContext *context,
+ const gchar *text,
+ gsize text_len,
+ gpointer user_data,
+ GError **error)
+{
+ RecordData *data = user_data;
+
+ record_data_tree_new (data->current, RECORD_TYPE_TEXT,
+ record_data_string_lookup (data->strings, text, text_len));
+}
+
+static void
+record_passthrough (GMarkupParseContext *context,
+ const gchar *passthrough_text,
+ gsize text_len,
+ gpointer user_data,
+ GError **error)
+{
+ RecordData *data = user_data;
+ record_data_tree_new (data->current, RECORD_TYPE_PASSTHROUGH,
+ record_data_string_lookup (data->strings, passthrough_text, text_len));
+}
+
+static const GMarkupParser record_parser =
+{
+ record_start_element,
+ record_end_element,
+ record_text,
+ record_passthrough,
+};
+
+
+static gint
+compare_string (gconstpointer _a,
+ gconstpointer _b)
+{
+ const RecordDataString *a = _a;
+ const RecordDataString *b = _b;
+
+ return b->count - a->count;
+}
+
+static void
+marshal_uint32 (GString *str,
+ guint32 v)
+{
+ /*
+ We encode in a variable length format similar to
+ utf8:
+
+ v size byte 1 byte 2 byte 3 byte 4 byte 5
+ 7 bit: 0xxxxxxx
+ 14 bit: 10xxxxxx xxxxxxxx
+ 21 bit: 110xxxxx xxxxxxxx xxxxxxxx
+ 28 bit: 1110xxxx xxxxxxxx xxxxxxxx xxxxxxxx
+ 32 bit: 11110000 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxx
+ */
+
+ if (v < 128)
+ {
+ g_string_append_c (str, (guchar)v);
+ }
+ else if (v < (1<<14))
+ {
+ g_string_append_c (str, (guchar)(v >> 8) | 0x80);
+ g_string_append_c (str, (guchar)(v & 0xff));
+ }
+ else if (v < (1<<21))
+ {
+ g_string_append_c (str, (guchar)(v >> 16) | 0xc0);
+ g_string_append_c (str, (guchar)((v >> 8) & 0xff));
+ g_string_append_c (str, (guchar)(v & 0xff));
+ }
+ else if (v < (1<<28))
+ {
+ g_string_append_c (str, (guchar)(v >> 24) | 0xe0);
+ g_string_append_c (str, (guchar)((v >> 16) & 0xff));
+ g_string_append_c (str, (guchar)((v >> 8) & 0xff));
+ g_string_append_c (str, (guchar)(v & 0xff));
+ }
+ else
+ {
+ g_string_append_c (str, 0xf0);
+ g_string_append_c (str, (guchar)((v >> 24) & 0xff));
+ g_string_append_c (str, (guchar)((v >> 16) & 0xff));
+ g_string_append_c (str, (guchar)((v >> 8) & 0xff));
+ g_string_append_c (str, (guchar)(v & 0xff));
+ }
+}
+
+static void
+marshal_string (GString *marshaled,
+ GHashTable *strings,
+ const char *string)
+{
+ RecordDataString *s;
+
+ s = g_hash_table_lookup (strings, string);
+ g_assert (s != NULL);
+
+ marshal_uint32 (marshaled, s->offset);
+}
+
+static void
+marshal_tree (GString *marshaled,
+ GHashTable *strings,
+ RecordDataTree *tree)
+{
+ GList *l;
+ int i;
+
+ /* Special case the root */
+ if (tree->parent == NULL)
+ {
+ for (l = g_list_last (tree->children); l != NULL; l = l->prev)
+ marshal_tree (marshaled, strings, l->data);
+ return;
+ }
+
+ switch (tree->type)
+ {
+ case RECORD_TYPE_ELEMENT:
+ marshal_uint32 (marshaled, RECORD_TYPE_ELEMENT);
+ marshal_string (marshaled, strings, tree->data);
+ marshal_uint32 (marshaled, g_strv_length ((char **)tree->attributes));
+ for (i = 0; tree->attributes[i] != NULL; i++)
+ {
+ marshal_string (marshaled, strings, tree->attributes[i]);
+ marshal_string (marshaled, strings, tree->values[i]);
+ }
+ for (l = g_list_last (tree->children); l != NULL; l = l->prev)
+ marshal_tree (marshaled, strings, l->data);
+
+ marshal_uint32 (marshaled, RECORD_TYPE_END_ELEMENT);
+ break;
+ case RECORD_TYPE_TEXT:
+ marshal_uint32 (marshaled, RECORD_TYPE_TEXT);
+ marshal_string (marshaled, strings, tree->data);
+ break;
+ case RECORD_TYPE_PASSTHROUGH:
+ marshal_uint32 (marshaled, RECORD_TYPE_PASSTHROUGH);
+ marshal_string (marshaled, strings, tree->data);
+ break;
+ case RECORD_TYPE_END_ELEMENT:
+ default:
+ g_assert_not_reached ();
+ }
+}
+
+static guint32 demarshal_uint32 (const char **tree);
+
+GLIB_AVAILABLE_IN_ALL
+GBytes *
+g_markup_parse_context_record (GMarkupParseFlags flags,
+ const gchar *text,
+ gssize text_len,
+ GError **error)
+{
+ GMarkupParseContext *ctx;
+ RecordData data = { 0 };
+ GList *string_table, *l;
+ GString *marshaled;
+ int offset;
+
+ data.strings = g_hash_table_new_full (g_str_hash, g_str_equal, NULL,
(GDestroyNotify)record_data_string_free);
+ data.root = record_data_tree_new (NULL, RECORD_TYPE_ELEMENT, NULL);
+ data.current = data.root;
+
+ ctx = g_markup_parse_context_new (&record_parser, flags,
+ &data, NULL);
+
+ if (!g_markup_parse_context_parse (ctx, text, text_len, error))
+ {
+ record_data_tree_free (data.root);
+ g_hash_table_destroy (data.strings);
+ g_markup_parse_context_free (ctx);
+ return NULL;
+ }
+
+ g_markup_parse_context_free (ctx);
+
+ string_table = g_hash_table_get_values (data.strings);
+
+ string_table = g_list_sort (string_table, compare_string);
+
+ offset = 0;
+ for (l = string_table; l != NULL; l = l->next)
+ {
+ RecordDataString *s = l->data;
+ s->offset = offset;
+ offset += strlen (s->string) + 1;
+ }
+
+ marshaled = g_string_new ("");
+ /* Magic marker */
+ g_string_append_len (marshaled, "GMU\0", 4);
+ marshal_uint32 (marshaled, offset);
+
+ for (l = string_table; l != NULL; l = l->next)
+ {
+ RecordDataString *s = l->data;
+ g_string_append_len (marshaled, s->string, strlen (s->string) + 1);
+ }
+
+ g_list_free (string_table);
+
+ marshal_tree (marshaled, data.strings, data.root);
+
+ record_data_tree_free (data.root);
+ g_hash_table_destroy (data.strings);
+
+ return g_string_free_to_bytes (marshaled);
+}
+
+static guint32
+demarshal_uint32 (const char **tree)
+{
+ const guchar *p = (const guchar *)*tree;
+ guchar c = *p;
+ /* see marshal_uint32 for format */
+
+ if (c < 128) /* 7 bit */
+ {
+ *tree += 1;
+ return c;
+ }
+ else if ((c & 0xc0) == 0x80) /* 14 bit */
+ {
+ *tree += 2;
+ return (c & 0x3f) << 8 | p[1];
+ }
+ else if ((c & 0xe0) == 0xc0) /* 21 bit */
+ {
+ *tree += 3;
+ return (c & 0x1f) << 16 | p[1] << 8 | p[2];
+ }
+ else if ((c & 0xf0) == 0xe0) /* 28 bit */
+ {
+ *tree += 4;
+ return (c & 0xf) << 24 | p[1] << 16 | p[2] << 8 | p[3];
+ }
+ else
+ {
+ *tree += 5;
+ return p[1] << 24 | p[2] << 16 | p[3] << 8 | p[4];
+ }
+}
+
+static const char *
+demarshal_string (const char **tree, const char *strings)
+{
+ guint32 offset = demarshal_uint32 (tree);
+
+ return strings + offset;
+}
+
+static gboolean
+replay_start_element (GMarkupParseContext *context,
+ const char **tree,
+ const char *strings,
+ GError **error)
+{
+ const char *element_name;
+ guint32 i, n_attrs;
+ const gchar **attr_names;
+ const gchar **attr_values;
+ GError *tmp_error = NULL;
+
+ element_name = demarshal_string (tree, strings);
+ n_attrs = demarshal_uint32 (tree);
+
+ attr_names = g_newa (const gchar *, n_attrs + 1);
+ attr_values = g_newa (const gchar *, n_attrs + 1);
+ for (i = 0; i < n_attrs; i++)
+ {
+ attr_names[i] = demarshal_string (tree, strings);
+ attr_values[i] = demarshal_string (tree, strings);
+ }
+ attr_names[i] = NULL;
+ attr_values[i] = NULL;
+
+ context->tag_stack = g_slist_concat (get_list_node (context, (char *)element_name), context->tag_stack);
+
+ if (context->parser->start_element)
+ (* context->parser->start_element) (context,
+ element_name,
+ attr_names,
+ attr_values,
+ context->user_data,
+ &tmp_error);
+
+ if (tmp_error)
+ {
+ propagate_error (context, error, tmp_error);
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static gboolean
+replay_end_element (GMarkupParseContext *context,
+ const char **tree,
+ const char *strings,
+ GError **error)
+{
+ GError *tmp_error = NULL;
+ GSList *node;
+
+ g_assert (context->tag_stack != NULL);
+
+ possibly_finish_subparser (context);
+
+ if (context->parser->end_element)
+ (* context->parser->end_element) (context,
+ current_element (context),
+ context->user_data,
+ &tmp_error);
+
+ ensure_no_outstanding_subparser (context);
+
+ node = context->tag_stack;
+ context->tag_stack = g_slist_remove_link (context->tag_stack, node);
+ free_list_node (context, node);
+
+ if (tmp_error)
+ {
+ propagate_error (context, error, tmp_error);
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static gboolean
+replay_text (GMarkupParseContext *context,
+ const char **tree,
+ const char *strings,
+ GError **error)
+{
+ const char *text;
+ GError *tmp_error = NULL;
+
+ text = demarshal_string (tree, strings);
+
+ if (context->parser->text)
+ (*context->parser->text) (context,
+ text,
+ strlen (text),
+ context->user_data,
+ &tmp_error);
+
+ if (tmp_error)
+ {
+ propagate_error (context, error, tmp_error);
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static gboolean
+replay_passthrough (GMarkupParseContext *context,
+ const char **tree,
+ const char *strings,
+ GError **error)
+{
+ const char *text;
+ GError *tmp_error = NULL;
+
+ text = demarshal_string (tree, strings);
+
+ if (context->parser->passthrough)
+ (*context->parser->passthrough) (context,
+ text,
+ strlen (text),
+ context->user_data,
+ &tmp_error);
+
+ if (tmp_error)
+ {
+ propagate_error (context, error, tmp_error);
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+gboolean
+g_markup_parse_context_replay (GMarkupParseContext *context,
+ const char *data,
+ gsize data_size,
+ GError **error)
+{
+ const char *data_end;
+ guint32 len, type;
+ const char *strings;
+ const char *tree;
+
+ data_end = data + data_size;
+
+ if (!(data[0] == 'G' &&
+ data[1] == 'M' &&
+ data[2] == 'U' &&
+ data[3] == 0))
+ {
+ g_set_error (error, G_MARKUP_ERROR, G_MARKUP_ERROR_PARSE,
+ _("Invalid gmarkup replay data"));
+ return FALSE;
+ }
+ data = data + 4;
+
+ len = demarshal_uint32 (&data);
+
+ strings = data;
+ data = data + len;
+ tree = data;
+
+ while (tree < data_end)
+ {
+ gboolean res;
+ type = demarshal_uint32 (&tree);
+
+ switch (type)
+ {
+ case RECORD_TYPE_ELEMENT:
+ res = replay_start_element (context, &tree, strings, error);
+ break;
+ case RECORD_TYPE_END_ELEMENT:
+ res = replay_end_element (context, &tree, strings, error);
+ break;
+ case RECORD_TYPE_TEXT:
+ res = replay_text (context, &tree, strings, error);
+ break;
+ case RECORD_TYPE_PASSTHROUGH:
+ res = replay_passthrough (context, &tree, strings, error);
+ break;
+ default:
+ g_assert_not_reached ();
+ }
+
+ if (!res)
+ return FALSE;
+ }
+
+ return TRUE;
+}
diff --git a/glib/gmarkup.h b/glib/gmarkup.h
index 96375b55a..130df3e6d 100644
--- a/glib/gmarkup.h
+++ b/glib/gmarkup.h
@@ -27,6 +27,7 @@
#include <glib/gerror.h>
#include <glib/gslist.h>
+#include <glib/gbytes.h>
G_BEGIN_DECLS
@@ -222,6 +223,17 @@ void g_markup_parse_context_get_position (GMarkupParseContext *c
GLIB_AVAILABLE_IN_ALL
gpointer g_markup_parse_context_get_user_data (GMarkupParseContext *context);
+GLIB_AVAILABLE_IN_2_60
+GBytes *g_markup_parse_context_record (GMarkupParseFlags flags,
+ const gchar *text,
+ gssize text_len,
+ GError **error);
+GLIB_AVAILABLE_IN_2_60
+gboolean g_markup_parse_context_replay (GMarkupParseContext *context,
+ const char *data,
+ gsize data_size,
+ GError **error);
+
/* useful when saving */
GLIB_AVAILABLE_IN_ALL
gchar* g_markup_escape_text (const gchar *text,
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]