[almanah] core: Add a new XML-based serialisation format which can represent links



commit a90413df6b1edc6a0bb24d938935b0b03d775038
Author: Philip Withnall <philip tecnocode co uk>
Date:   Thu Apr 14 00:59:10 2011 +0100

    core: Add a new XML-based serialisation format which can represent links
    
    The GTK+ default serialisation format can't represent hyperlinks using
    a GtkTextTag subclass as we were using. It's a better long-term move to have
    our own serialisation format that we control anyway. This defines a new
    data format version, and uses it as the default, converting entries as they
    are edited/written.

 data/Makefile.am   |    3 +-
 data/entry-2.0.rnc |   30 ++++
 src/entry.c        |  395 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 421 insertions(+), 7 deletions(-)
---
diff --git a/data/Makefile.am b/data/Makefile.am
index 4acfc31..69b1db0 100644
--- a/data/Makefile.am
+++ b/data/Makefile.am
@@ -55,7 +55,8 @@ EXTRA_DIST = \
 	$(ui_DATA)		\
 	$(desktop_in_files)	\
 	$(schemas_in_files)	\
-	$(convert_DATA)
+	$(convert_DATA)		\
+	entry-2.0.rnc
 CLEANFILES = \
 	$(desktop_DATA)		\
 	$(gsettings_SCHEMAS)
diff --git a/data/entry-2.0.rnc b/data/entry-2.0.rnc
new file mode 100644
index 0000000..51e2b22
--- /dev/null
+++ b/data/entry-2.0.rnc
@@ -0,0 +1,30 @@
+default namespace almanah = "http://www.gnome.org/almanah-diary/entry/2.0/";
+start = almanah_entry
+
+almanah_entry = element entry {
+	almanah_inline
+}
+
+almanah_inline = mixed {
+	almanah_inline_underline* &
+	almanah_inline_bold* &
+	almanah_inline_italic* &
+	almanah_inline_link*
+}
+
+almanah_inline_underline = element underline {
+	almanah_inline
+}
+
+almanah_inline_bold = element bold {
+	almanah_inline
+}
+
+almanah_inline_italic = element italic {
+	almanah_inline
+}
+
+almanah_inline_link = element link {
+	attribute uri { xsd:anyURI },
+	almanah_inline
+}
diff --git a/src/entry.c b/src/entry.c
index 4b80127..6740e1b 100644
--- a/src/entry.c
+++ b/src/entry.c
@@ -24,6 +24,7 @@
 
 #include "entry.h"
 #include "main.h"
+#include "widgets/hyperlink-tag.h"
 
 GQuark
 almanah_entry_error_quark (void)
@@ -36,12 +37,19 @@ typedef enum {
 	DATA_FORMAT_UNSET = 0,
 	/* Plain text or GtkTextBuffer's default serialisation format, as used in Almanah versions < 0.8.0 */
 	DATA_FORMAT_PLAIN_TEXT__GTK_TEXT_BUFFER = 1,
+	/* Custom XML serialisation format using schema data/entry-2.0.rnc. */
+	DATA_FORMAT_XML_2_0 = 2,
 } DataFormat;
 
 static void almanah_entry_finalize (GObject *object);
 static void almanah_entry_get_property (GObject *object, guint property_id, GValue *value, GParamSpec *pspec);
 static void almanah_entry_set_property (GObject *object, guint property_id, const GValue *value, GParamSpec *pspec);
 
+static guint8 *serialise_entry_xml_2_0 (GtkTextBuffer *register_buffer, GtkTextBuffer *content_buffer, const GtkTextIter *start,
+                                        const GtkTextIter *end, gsize *length, gpointer user_data);
+static gboolean deserialise_entry_xml_2_0 (GtkTextBuffer *register_buffer, GtkTextBuffer *content_buffer, GtkTextIter *iter, const guint8 *data,
+                                           gsize length, gboolean create_tags, gpointer user_data, GError **error);
+
 struct _AlmanahEntryPrivate {
 	GDate date;
 	guint8 *data;
@@ -249,6 +257,18 @@ almanah_entry_get_content (AlmanahEntry *self, GtkTextBuffer *text_buffer, gbool
 
 	/* Deserialise the data according to the version of the data format attached to the entry */
 	switch (priv->version) {
+		case DATA_FORMAT_XML_2_0: {
+			GdkAtom format_atom;
+			GtkTextIter start_iter;
+
+			format_atom = gtk_text_buffer_register_deserialize_format (text_buffer, "application/x-almanah-entry-xml",
+			                                                           (GtkTextBufferDeserializeFunc) deserialise_entry_xml_2_0,
+			                                                           NULL, NULL);
+			gtk_text_buffer_get_start_iter (text_buffer, &start_iter);
+
+			/* Try deserializing the serialized data */
+			return gtk_text_buffer_deserialize (text_buffer, text_buffer, format_atom, &start_iter, priv->data, priv->length, error);
+		}
 		case DATA_FORMAT_PLAIN_TEXT__GTK_TEXT_BUFFER: {
 			GdkAtom format_atom;
 			GtkTextIter start_iter;
@@ -301,14 +321,13 @@ almanah_entry_set_content (AlmanahEntry *self, GtkTextBuffer *text_buffer)
 	g_free (priv->data);
 
 	gtk_text_buffer_get_bounds (text_buffer, &start, &end);
-	format_atom = gtk_text_buffer_register_serialize_tagset (text_buffer, PACKAGE_NAME);
-	priv->data = gtk_text_buffer_serialize (text_buffer, text_buffer,
-						format_atom,
-						&start, &end,
-						&(priv->length));
+	format_atom = gtk_text_buffer_register_serialize_format (text_buffer, "application/x-almanah-entry-xml",
+	                                                         (GtkTextBufferSerializeFunc) serialise_entry_xml_2_0,
+	                                                         NULL, NULL);
+	priv->data = gtk_text_buffer_serialize (text_buffer, text_buffer, format_atom, &start, &end, &(priv->length));
 
 	/* Always serialise data in the latest format */
-	priv->version = DATA_FORMAT_PLAIN_TEXT__GTK_TEXT_BUFFER;
+	priv->version = DATA_FORMAT_XML_2_0;
 }
 
 /* NOTE: Designed for use on the stack */
@@ -384,3 +403,367 @@ almanah_entry_set_last_edited (AlmanahEntry *self, GDate *last_edited)
 
 	self->priv->last_edited = *last_edited;
 }
+
+/* Copied from GTK+'s gtktextbufferserialize.c, LGPLv2.1+:
+ * Copyright (C) 2001 Havoc Pennington
+ * Copyright (C) 2004 Nokia Corporation
+ */
+static void
+find_list_delta (GSList  *old_list,
+                 GSList  *new_list,
+		 GList  **added,
+                 GList  **removed)
+{
+  GSList *tmp;
+  GList *tmp_added, *tmp_removed;
+
+  tmp_added = NULL;
+  tmp_removed = NULL;
+
+  /* Find added tags */
+  tmp = new_list;
+  while (tmp)
+    {
+      if (!g_slist_find (old_list, tmp->data))
+	tmp_added = g_list_prepend (tmp_added, tmp->data);
+
+      tmp = tmp->next;
+    }
+
+  *added = tmp_added;
+
+  /* Find removed tags */
+  tmp = old_list;
+  while (tmp)
+    {
+      if (!g_slist_find (new_list, tmp->data))
+	tmp_removed = g_list_prepend (tmp_removed, tmp->data);
+
+      tmp = tmp->next;
+    }
+
+  /* We reverse the list here to match the xml semantics */
+  *removed = g_list_reverse (tmp_removed);
+}
+
+/* Returns NULL for unknown/unhandled tags */
+static const gchar *
+get_text_tag_element_name (GtkTextTag *tag)
+{
+	gchar *name;
+	const gchar *element_name = NULL;
+
+	if (ALMANAH_IS_HYPERLINK_TAG (tag)) {
+		return "link";
+	}
+
+	g_object_get (G_OBJECT (tag), "name", &name, NULL);
+
+	/* Unknown tag */
+	if (name == NULL) {
+		return NULL;
+	}
+
+	/* Handle the normal tags */
+	if (strcmp (name, "bold") == 0) {
+		element_name = "bold";
+	} else if (strcmp (name, "italic") == 0) {
+		element_name = "italic";
+	} else if (strcmp (name, "underline") == 0) {
+		element_name = "underline";
+	}
+
+	g_free (name);
+
+	return element_name;
+}
+
+static guint8 *
+serialise_entry_xml_2_0 (GtkTextBuffer *register_buffer, GtkTextBuffer *content_buffer, const GtkTextIter *start, const GtkTextIter *end, gsize *length,
+                         gpointer user_data)
+{
+	GString *markup;
+	GtkTextIter iter, old_iter;
+	GSList *active_tags, *old_tag_list;
+
+	markup = g_string_new (NULL);
+
+	/* Markup preamble */
+	g_string_append (markup,
+		"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
+		"<entry xmlns=\"http://www.gnome.org/almanah-diary/entry/2.0/\";>");
+
+	/* Serialise the text. We maintain a stack of the currently open tags so that the outputted markup is properly nested. We progress through
+	 * the text buffer between tag toggle points, comparing the list of open tags at each point to determine which tags have opened and which
+	 * have closed, and reflecting that in the markup as appropriate. */
+	active_tags = NULL;
+	old_tag_list = NULL;
+
+	for (old_iter = iter = *start; gtk_text_iter_compare (&iter, end) <= 0; old_iter = iter, gtk_text_iter_forward_to_tag_toggle (&iter, NULL)) {
+		GSList *new_tag_list;
+		GList *added, *removed;
+		const GList *i;
+
+		/* Append the text */
+		if (!gtk_text_iter_equal (&old_iter, &iter)) {
+			gchar *text, *escaped_text;
+
+			text = gtk_text_iter_get_slice (&old_iter, &iter);
+			escaped_text = g_markup_escape_text (text, -1);
+			g_free (text);
+
+			g_string_append (markup, escaped_text);
+			g_free (escaped_text);
+		}
+
+		/* Calculate which tags have been opened and closed */
+		new_tag_list = gtk_text_iter_get_tags (&iter);
+		find_list_delta (old_tag_list, new_tag_list, &added, &removed);
+
+		/* Handle removed tags first so that we retain proper nesting */
+		for (i = removed; i != NULL; i = i->next) {
+			GtkTextTag *tag;
+			const gchar *element_name;
+
+			tag = GTK_TEXT_TAG (i->data);
+			element_name = get_text_tag_element_name (tag);
+
+			/* Ignore unknown/unhandled tags */
+			if (element_name == NULL) {
+				continue;
+			}
+
+			/* Close the tag */
+			if (g_slist_find (active_tags, tag)) {
+				/* Close all tags that were opened after this one (i.e. which are above this on in the stack), but ensure that they're
+				 * re-opened again afterwards by pushing them onto the added list. */
+				while (active_tags->data != tag) {
+					GtkTextTag *tag2;
+					const gchar *element_name2;
+
+					tag2 = GTK_TEXT_TAG (active_tags->data);
+					element_name2 = get_text_tag_element_name (tag2);
+
+					active_tags = g_slist_remove (active_tags, tag2);
+
+					g_string_append (markup, "</");
+					g_string_append (markup, element_name2);
+					g_string_append_c (markup, '>');
+
+					/* Push the tag onto the added list iff it's not also being closed now */
+					if (g_list_find (removed, tag2) == NULL) {
+						added = g_list_prepend (added, tag2);
+					}
+				}
+
+				/* Close this tag */
+				active_tags = g_slist_remove (active_tags, active_tags->data);
+
+				g_string_append (markup, "</");
+				g_string_append (markup, element_name);
+				g_string_append_c (markup, '>');
+			}
+		}
+
+		for (i = added; i != NULL; i = i->next) {
+			GtkTextTag *tag;
+			const gchar *element_name;
+
+			tag = GTK_TEXT_TAG (i->data);
+			element_name = get_text_tag_element_name (tag);
+
+			/* Ignore unknown/unhandled tags */
+			if (element_name == NULL) {
+				continue;
+			}
+
+			g_string_append_c (markup, '<');
+			g_string_append (markup, element_name);
+
+			if (ALMANAH_IS_HYPERLINK_TAG (tag)) {
+				gchar *escaped_uri;
+
+				escaped_uri = g_markup_escape_text (almanah_hyperlink_tag_get_uri (ALMANAH_HYPERLINK_TAG (tag)), -1);
+				g_string_append (markup, " uri=\"");
+				g_string_append (markup, escaped_uri);
+				g_string_append_c (markup, '"');
+				g_free (escaped_uri);
+			}
+
+			g_string_append_c (markup, '>');
+
+			active_tags = g_slist_prepend (active_tags, tag);
+		}
+
+		g_list_free (added);
+		g_list_free (removed);
+
+		/* Swap the new and old tag lists */
+		g_slist_free (old_tag_list);
+		old_tag_list = new_tag_list;
+
+		if (gtk_text_iter_equal (&iter, end)) {
+			break;
+		}
+	}
+
+	g_slist_free (old_tag_list);
+
+	/* Close any tags which remain open */
+	while (active_tags != NULL) {
+		GtkTextTag *tag;
+		const gchar *element_name;
+
+		tag = GTK_TEXT_TAG (active_tags->data);
+		element_name = get_text_tag_element_name (tag);
+
+		active_tags = g_slist_remove (active_tags, tag);
+
+		g_string_append (markup, "</");
+		g_string_append (markup, element_name);
+		g_string_append_c (markup, '>');
+	}
+
+	/* Markup postamble */
+	g_string_append (markup, "</entry>");
+
+	*length = markup->len;
+
+	return (guint8*) g_string_free (markup, FALSE);
+}
+
+typedef struct {
+	GtkTextBuffer *buffer;
+	GtkTextIter *iter;
+	gboolean in_entry;
+	GSList *active_tags;
+} DeserialiseContext;
+
+static void
+start_element_cb (GMarkupParseContext *parse_context, const gchar *element_name, const gchar **attribute_names, const gchar **attribute_values,
+                  gpointer user_data, GError **error)
+{
+	DeserialiseContext *deserialise_context = (DeserialiseContext*) user_data;
+
+	if (strcmp (element_name, "entry") == 0) {
+		if (deserialise_context->in_entry) {
+			g_set_error_literal (error, G_MARKUP_ERROR, G_MARKUP_ERROR_PARSE, "<entry> elements can only be at the top level.");
+			return;
+		}
+
+		deserialise_context->in_entry = TRUE;
+		return;
+	} else {
+		GtkTextTagTable *table;
+		GtkTextTag *tag = NULL;
+
+		if (!deserialise_context->in_entry) {
+			g_set_error_literal (error, G_MARKUP_ERROR, G_MARKUP_ERROR_PARSE, "An <entry> element must be at the top level.");
+			return;
+		}
+
+		table = gtk_text_buffer_get_tag_table (deserialise_context->buffer);
+
+		if (strcmp (element_name, "bold") == 0 ||
+		    strcmp (element_name, "italic") == 0 ||
+		    strcmp (element_name, "underline") == 0) {
+			/* Just retrieve the predefined tag from the tag table */
+			tag = gtk_text_tag_table_lookup (table, element_name);
+		} else if (strcmp (element_name, "link") == 0) {
+			guint i;
+			const gchar *uri;
+
+			/* Extract the URI */
+			while (attribute_names[i] != NULL && strcmp (attribute_names[i], "uri") != 0) {
+				i++;
+			}
+
+			uri = attribute_values[i];
+
+			if (uri == NULL || *uri == '\0') {
+				g_set_error_literal (error, G_MARKUP_ERROR, G_MARKUP_ERROR_PARSE, "A <link> element must have a 'uri' attribute.");
+				return;
+			}
+
+			/* Create the tag and register it in the tag table */
+			tag = GTK_TEXT_TAG (almanah_hyperlink_tag_new (uri));
+			gtk_text_tag_table_add (table, tag);
+			g_object_unref (tag); /* the tag table keeps a reference */
+		}
+
+		/* Ignore unrecognised tags */
+
+		if (tag != NULL) {
+			/* Push the tag onto the stack of active tags which will be applied to the next text run */
+			deserialise_context->active_tags = g_slist_prepend (deserialise_context->active_tags, tag);
+		}
+	}
+}
+
+static void
+end_element_cb (GMarkupParseContext *parse_context, const gchar *element_name, gpointer user_data, GError **error)
+{
+	DeserialiseContext *deserialise_context = (DeserialiseContext*) user_data;
+
+	if (strcmp (element_name, "entry") == 0) {
+		/* We should be finished parsing now */
+		deserialise_context->in_entry = FALSE;
+		return;
+	} else {
+		if (strcmp (element_name, "bold") == 0 ||
+		    strcmp (element_name, "italic") == 0 ||
+		    strcmp (element_name, "underline") == 0 ||
+		    strcmp (element_name, "link") == 0) {
+			/* Pop the topmost tag off the active tags stack */
+			deserialise_context->active_tags = g_slist_remove (deserialise_context->active_tags, deserialise_context->active_tags->data);
+		}
+
+		/* Ignore unrecognised tags */
+	}
+}
+
+static void
+text_cb (GMarkupParseContext *parse_context, const gchar *text, gsize text_len, gpointer user_data, GError **error)
+{
+	DeserialiseContext *deserialise_context = (DeserialiseContext*) user_data;
+	GtkTextIter start_iter;
+	gint start_offset;
+	const GSList *i;
+
+	/* Add the text to the text buffer, and apply all the tags in the current active tags stack to it */
+	start_offset = gtk_text_iter_get_offset (deserialise_context->iter);
+	gtk_text_buffer_insert (deserialise_context->buffer, deserialise_context->iter, text, text_len);
+	gtk_text_buffer_get_iter_at_offset (deserialise_context->buffer, &start_iter, start_offset);
+
+	for (i = deserialise_context->active_tags; i != NULL; i = i->next) {
+		gtk_text_buffer_apply_tag (deserialise_context->buffer, GTK_TEXT_TAG (i->data), &start_iter, deserialise_context->iter);
+	}
+}
+
+static gboolean
+deserialise_entry_xml_2_0 (GtkTextBuffer *register_buffer, GtkTextBuffer *content_buffer, GtkTextIter *iter, const guint8 *data, gsize length,
+                           gboolean create_tags, gpointer user_data, GError **error)
+{
+	GMarkupParseContext *parse_context;
+	gboolean success;
+
+	DeserialiseContext deserialise_context = {
+		content_buffer,
+		iter,
+		FALSE,
+	};
+
+	const GMarkupParser parser = {
+		start_element_cb,
+		end_element_cb,
+		text_cb,
+		NULL,
+		NULL,
+	};
+
+	parse_context = g_markup_parse_context_new (&parser, 0, &deserialise_context, NULL);
+	success = g_markup_parse_context_parse (parse_context, (const gchar*) data, length, error);
+	g_markup_parse_context_free (parse_context);
+
+	return success;
+}



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]