Delayed load for info documents
- From: Rupert Swarbrick <rswarbrick gmail com>
- To: Gnome Doc Devel <gnome-doc-devel-list gnome org>
- Subject: Delayed load for info documents
- Date: Fri, 06 May 2011 16:24:52 +0100
Here's a massively rewritten version of the previous lazy info parsing
code. It follows the design of the Mallard code for yelp-info-document.c
and I've spent some time trying to find bugs in the threading: hopefully
it's all correct!
Rupert
>From 4bec5e319c2985c170cd39d93f64b1794a7da8af Mon Sep 17 00:00:00 2001
From: Rupert Swarbrick <rswarbrick gmail com>
Date: Wed, 4 May 2011 18:22:04 +0100
Subject: [PATCH] Parse info documents one page at a time.
---
libyelp/yelp-info-document.c | 664 +++++++++++++++++++++++--------------
libyelp/yelp-info-parser.c | 756 +++++++++++++++---------------------------
libyelp/yelp-info-parser.h | 34 ++-
stylesheets/info2html.xsl.in | 64 ++---
4 files changed, 732 insertions(+), 786 deletions(-)
diff --git a/libyelp/yelp-info-document.c b/libyelp/yelp-info-document.c
index 136b274..9c5d4d8 100644
--- a/libyelp/yelp-info-document.c
+++ b/libyelp/yelp-info-document.c
@@ -40,33 +40,54 @@
#define STYLESHEET DATADIR"/yelp/xslt/info2html.xsl"
typedef enum {
- INFO_STATE_BLANK, /* Brand new, run transform as needed */
- INFO_STATE_PARSING, /* Parsing/transforming document, please wait */
- INFO_STATE_PARSED, /* All done, if we ain't got it, it ain't here */
- INFO_STATE_STOP /* Stop everything now, object to be disposed */
+ INFO_STATE_BLANK,
+ INFO_STATE_THINKING,
+ INFO_STATE_IDLE,
+ INFO_STATE_STOP
} InfoState;
-typedef struct _YelpInfoDocumentPrivate YelpInfoDocumentPrivate;
-struct _YelpInfoDocumentPrivate {
- YelpUri *uri;
- InfoState state;
+/*
+ Info documents are prepared in two stages: reading and
+ parsing/transforming.
+
+ When the document is first created, we read in the file, calculate
+ offsets and generate a hash table of InfoPageData structures by
+ pageid.
- GMutex *mutex;
- GThread *thread;
+ When a page is requested, we then parse it properly and then run an
+ xslt transform.
+ */
- xmlDocPtr xmldoc;
- GtkTreeModel *sections;
+/*
+ InfoPageData stores the unparsed text until transforming, along with
+ links to neighbouring nodes.
+ */
+typedef struct _InfoPageData InfoPageData;
+struct _InfoPageData {
+ YelpInfoDocument *info;
+ gchar *page_id;
- gboolean process_running;
- gboolean transform_running;
+ UnprocessedSection* unprocessed;
- YelpTransform *transform;
- guint chunk_ready;
- guint finished;
- guint error;
+ YelpTransform *transform;
+ xmlDocPtr xmldoc;
- gchar *root_id;
- gchar *visit_prev_id;
+ guint chunk_ready;
+ guint finished;
+ guint error;
+};
+
+typedef struct _YelpInfoDocumentPrivate YelpInfoDocumentPrivate;
+struct _YelpInfoDocumentPrivate {
+ YelpUri *uri;
+ InfoState state;
+
+ GMutex *mutex;
+ GThread *thread;
+ gboolean running;
+
+ GSList *pending;
+ GHashTable *pages;
};
@@ -82,24 +103,30 @@ static gboolean info_request_page (YelpDocument
YelpDocumentCallback callback,
gpointer user_data);
-/* YelpTransform */
-static void transform_chunk_ready (YelpTransform *transform,
- gchar *chunk_id,
- YelpInfoDocument *info);
-static void transform_finished (YelpTransform *transform,
- YelpInfoDocument *info);
-static void transform_error (YelpTransform *transform,
- YelpInfoDocument *info);
-static void transform_finalized (YelpInfoDocument *info,
- gpointer transform);
-
-static void info_document_process (YelpInfoDocument *info);
-static gboolean info_sections_visit (GtkTreeModel *model,
- GtkTreePath *path,
- GtkTreeIter *iter,
- YelpInfoDocument *info);
-static void info_document_disconnect (YelpInfoDocument *info);
+static void read_document (YelpInfoDocument *info);
+static void do_pending_pages (YelpInfoDocument *info);
+static void try_transform_page (YelpInfoDocument *info,
+ const gchar *page_id);
+static void transform_page (InfoPageData *page_data);
+
+/* YelpTransform */
+static void transform_chunk_ready (YelpTransform *transform,
+ gchar *chunk_id,
+ InfoPageData *page_data);
+static void transform_finished (YelpTransform *transform,
+ InfoPageData *page_data);
+static void transform_error (YelpTransform *transform,
+ InfoPageData *page_data);
+static void transform_finalized (InfoPageData *page_data,
+ gpointer transform);
+
+/* InfoPageData */
+static InfoPageData* info_page_data_new (YelpInfoDocument *doc,
+ gchar *page_id,
+ UnprocessedSection *us);
+static void info_page_data_free (InfoPageData *pd);
+static void info_page_data_cancel (InfoPageData *page_data);
G_DEFINE_TYPE (YelpInfoDocument, yelp_info_document, YELP_TYPE_DOCUMENT);
#define GET_PRIV(object) (G_TYPE_INSTANCE_GET_PRIVATE ((object), YELP_TYPE_INFO_DOCUMENT, YelpInfoDocumentPrivate))
@@ -114,6 +141,7 @@ yelp_info_document_class_init (YelpInfoDocumentClass *klass)
object_class->finalize = yelp_info_document_finalize;
document_class->request_page = info_request_page;
+ // TODO: Should there also be an indexing step?
g_type_class_add_private (klass, sizeof (YelpInfoDocumentPrivate));
}
@@ -123,9 +151,19 @@ yelp_info_document_init (YelpInfoDocument *info)
{
YelpInfoDocumentPrivate *priv = GET_PRIV (info);
+ priv->uri = NULL;
priv->state = INFO_STATE_BLANK;
- priv->xmldoc = NULL;
+
priv->mutex = g_mutex_new ();
+ priv->thread = NULL;
+ priv->running = FALSE;
+
+ priv->pending = NULL;
+
+ priv->pages =
+ g_hash_table_new_full (g_str_hash, g_str_equal,
+ g_free,
+ (GDestroyNotify)info_page_data_free);
}
static void
@@ -138,14 +176,21 @@ yelp_info_document_dispose (GObject *object)
priv->uri = NULL;
}
- if (priv->sections) {
- g_object_unref (priv->sections);
- priv->sections = NULL;
+ if (priv->mutex) {
+ g_mutex_free (priv->mutex);
+ priv->mutex = NULL;
+ }
+
+ if (priv->thread) {
+ g_free (priv->thread);
+ priv->thread = NULL;
}
- if (priv->transform) {
- g_object_unref (priv->transform);
- priv->transform = NULL;
+ g_slist_free_full (priv->pending, g_free);
+
+ if (priv->pages) {
+ g_hash_table_destroy (priv->pages);
+ priv->pages = NULL;
}
G_OBJECT_CLASS (yelp_info_document_parent_class)->dispose (object);
@@ -154,17 +199,30 @@ yelp_info_document_dispose (GObject *object)
static void
yelp_info_document_finalize (GObject *object)
{
- YelpInfoDocumentPrivate *priv = GET_PRIV (object);
+ G_OBJECT_CLASS (yelp_info_document_parent_class)->finalize (object);
+}
- if (priv->xmldoc)
- xmlFreeDoc (priv->xmldoc);
+static InfoPageData*
+info_page_data_new (YelpInfoDocument *doc,
+ gchar *page_id,
+ UnprocessedSection *us)
+{
+ InfoPageData *ipd = g_new0 (InfoPageData, 1);
+ ipd->info = doc;
+ ipd->page_id = page_id;
+ ipd->unprocessed = us;
- g_free (priv->root_id);
- g_free (priv->visit_prev_id);
+ return ipd;
+}
- g_mutex_free (priv->mutex);
+static void
+info_page_data_free (InfoPageData *pd)
+{
+ g_free (pd->page_id);
+ unprocessed_section_free (pd->unprocessed);
+ g_object_unref (pd->transform);
- G_OBJECT_CLASS (yelp_info_document_parent_class)->finalize (object);
+ g_free (pd);
}
/******************************************************************************/
@@ -184,16 +242,14 @@ yelp_info_document_new (YelpUri *uri)
NULL);
g_free (doc_uri);
priv = GET_PRIV (info);
-
priv->uri = g_object_ref (uri);
+ yelp_document_set_page_id ((YelpDocument *) info, NULL, "Top");
+ yelp_document_set_page_id ((YelpDocument *) info, "Top", "Top");
+
return (YelpDocument *) info;
}
-
-/******************************************************************************/
-/** YelpDocument **************************************************************/
-
static gboolean
info_request_page (YelpDocument *document,
const gchar *page_id,
@@ -206,32 +262,40 @@ info_request_page (YelpDocument *document,
GError *error;
gboolean handled;
- if (page_id == NULL)
- page_id = priv->root_id;
+ debug_print (DB_FUNCTION, "entering (id = %s)\n", page_id);
+
+ if (page_id == NULL) page_id = "Top";
handled =
- YELP_DOCUMENT_CLASS (yelp_info_document_parent_class)->request_page (document,
- page_id,
- cancellable,
- callback,
- user_data);
+ YELP_DOCUMENT_CLASS (yelp_info_document_parent_class)->
+ request_page (document, page_id, cancellable,
+ callback, user_data);
+
if (handled) {
return TRUE;
}
g_mutex_lock (priv->mutex);
- switch (priv->state) {
- case INFO_STATE_BLANK:
- priv->state = INFO_STATE_PARSING;
- priv->process_running = TRUE;
+ if (priv->state == INFO_STATE_BLANK) {
+ priv->state = INFO_STATE_THINKING;
+ priv->running = TRUE;
g_object_ref (document);
- priv->thread = g_thread_create ((GThreadFunc) info_document_process,
+ priv->thread = g_thread_create ((GThreadFunc) read_document,
document, FALSE, NULL);
+ }
+
+ switch (priv->state) {
+ case INFO_STATE_THINKING:
+ priv->pending =
+ g_slist_prepend (priv->pending,
+ (gpointer) g_strdup (page_id));
break;
- case INFO_STATE_PARSING:
- break;
- case INFO_STATE_PARSED:
+ case INFO_STATE_IDLE:
+ try_transform_page (YELP_INFO_DOCUMENT (document), page_id);
+ break;
+
+ case INFO_STATE_BLANK:
case INFO_STATE_STOP:
docuri = yelp_uri_get_document_uri (priv->uri);
error = g_error_new (YELP_ERROR, YELP_ERROR_NOT_FOUND,
@@ -246,247 +310,341 @@ info_request_page (YelpDocument *document,
}
g_mutex_unlock (priv->mutex);
- return TRUE;
-}
-
-/******************************************************************************/
-/** YelpTransform *************************************************************/
+ return FALSE;
+}
+/*
+ read_document is responsible for reading in all the pages
+ (unparsed), and populating the hash table with InfoPageData
+ structures.
+ */
static void
-transform_chunk_ready (YelpTransform *transform,
- gchar *chunk_id,
- YelpInfoDocument *info)
+read_document (YelpInfoDocument *info)
{
YelpInfoDocumentPrivate *priv = GET_PRIV (info);
- gchar *content;
+ GFile *file = NULL;
+ gchar *filepath = NULL;
+ GError *error;
- g_assert (transform == priv->transform);
+ file = yelp_uri_get_file (priv->uri);
+ if (file == NULL) {
+ gchar *uri = yelp_uri_get_canonical_uri (priv->uri);
+ error = g_error_new (YELP_ERROR, YELP_ERROR_NOT_FOUND,
+ _("The file for uri '%s' does not exist."),
+ uri);
+ yelp_document_error_pending ((YelpDocument *) info, error);
+ g_free (uri);
+ g_error_free (error);
+ goto done;
+ }
- if (priv->state == INFO_STATE_STOP) {
- info_document_disconnect (info);
- return;
+ filepath = g_file_get_path (file);
+ g_object_unref (file);
+ if (!g_file_test (filepath, G_FILE_TEST_IS_REGULAR)) {
+ error = g_error_new (YELP_ERROR, YELP_ERROR_NOT_FOUND,
+ _("The file '%s' does not exist."),
+ filepath);
+ yelp_document_error_pending ((YelpDocument *) info, error);
+ g_error_free (error);
+ goto done;
}
- content = yelp_transform_take_chunk (transform, chunk_id);
- yelp_document_give_contents (YELP_DOCUMENT (info),
- chunk_id,
- content,
- "application/xhtml+xml");
+ GHashTable *unparsed_sections;
- yelp_document_signal (YELP_DOCUMENT (info),
- chunk_id,
- YELP_DOCUMENT_SIGNAL_INFO,
- NULL);
- yelp_document_signal (YELP_DOCUMENT (info),
- chunk_id,
- YELP_DOCUMENT_SIGNAL_CONTENTS,
- NULL);
+ unparsed_sections = yelp_info_parser_read_file (filepath);
+ if (!unparsed_sections) {
+ error = g_error_new (YELP_ERROR, YELP_ERROR_PROCESSING,
+ _("The file '%s' could not be parsed as "
+ "an info document."),
+ filepath);
+ yelp_document_error_pending ((YelpDocument *) info, error);
+ goto done;
+ }
+
+ /*
+ unparsed_sections is a hash of pageid -> OpaqueStruct, and we
+ need to wrap these structs up in InfoPageData structures, then
+ stick the result in priv->pages.
+ */
+ GHashTableIter iter;
+ gchar* page_id;
+ UnprocessedSection* section;
+
+ g_mutex_lock (priv->mutex);
+ g_hash_table_iter_init (&iter, unparsed_sections);
+ while (g_hash_table_iter_next (&iter,
+ (gpointer*)&page_id,
+ (gpointer*)§ion)) {
+ /*
+ info_page_data_new takes ownership of the memory of page_id
+ and section
+ */
+ InfoPageData *ipd = info_page_data_new (info,
+ page_id, section);
+
+ register_section (YELP_DOCUMENT (info), section);
+
+ g_hash_table_insert (priv->pages, g_strdup(page_id), ipd);
+ }
+ g_hash_table_destroy (unparsed_sections);
+ priv->state = INFO_STATE_IDLE;
+ g_mutex_unlock (priv->mutex);
+
+ /*
+ Finally, we can deal with whatever backlog of pages is waiting.
+ */
+ do_pending_pages (info);
+
+done:
+ g_free (filepath);
+ priv->running = FALSE;
+ g_object_unref (info);
}
+/*
+ This assumes that the current state is INFO_STATE_IDLE.
+ */
static void
-transform_finished (YelpTransform *transform,
- YelpInfoDocument *info)
+do_pending_pages (YelpInfoDocument *info)
{
YelpInfoDocumentPrivate *priv = GET_PRIV (info);
- gchar *docuri;
+
+ g_mutex_lock (priv->mutex);
+ while (priv->pending) {
+ gchar *page_id = (gchar *) priv->pending->data;
+ try_transform_page (info, page_id);
+ g_free (page_id);
+ priv->pending = g_slist_delete_link (priv->pending, priv->pending);
+ }
+ g_mutex_unlock (priv->mutex);
+}
+
+/*
+ Should be called with the mutex held. Starts a transform thread for
+ the given page id
+*/
+static void
+try_transform_page (YelpInfoDocument *info, const gchar *page_id)
+{
+ YelpInfoDocumentPrivate *priv = GET_PRIV (info);
+ InfoPageData *page_data = NULL;
+ gchar *real_id = NULL;
GError *error;
- g_assert (transform == priv->transform);
+ debug_print (DB_FUNCTION, "entering\n");
- if (priv->state == INFO_STATE_STOP) {
- info_document_disconnect (info);
- return;
+ if (page_id)
+ real_id = yelp_document_get_page_id (YELP_DOCUMENT (info),
+ page_id);
+
+ if (real_id) {
+ page_data = g_hash_table_lookup (priv->pages, real_id);
+ g_free (real_id);
}
- info_document_disconnect (info);
- priv->state = INFO_STATE_PARSED;
- /* We want to free priv->xmldoc, but we can't free it before transform
- is finalized. Otherwise, we could crash when YelpTransform frees
- its libxslt resources.
- */
- g_object_weak_ref ((GObject *) transform,
- (GWeakNotify) transform_finalized,
- info);
-
- docuri = yelp_uri_get_document_uri (priv->uri);
- error = g_error_new (YELP_ERROR, YELP_ERROR_NOT_FOUND,
- _("The requested page was not found in the document ‘%s’."),
- docuri);
- g_free (docuri);
- yelp_document_error_pending ((YelpDocument *) info, error);
- g_error_free (error);
+ if (!page_data) {
+ gchar *docuri = yelp_uri_get_document_uri (priv->uri);
+ error = g_error_new (YELP_ERROR, YELP_ERROR_NOT_FOUND,
+ _("The page ‘%s’ was not found in the document ‘%s’."),
+ page_id, docuri);
+ g_free (docuri);
+ yelp_document_signal ((YelpDocument *) info, page_id,
+ YELP_DOCUMENT_SIGNAL_ERROR,
+ error);
+ g_error_free (error);
+ return;
+ }
+
+ transform_page (page_data);
}
+/* This is called with the mutex held */
static void
-transform_error (YelpTransform *transform,
- YelpInfoDocument *info)
+transform_page (InfoPageData *page_data)
{
- YelpInfoDocumentPrivate *priv = GET_PRIV (info);
+ gint params_i = 0;
+ gchar **params = NULL;
GError *error;
- g_assert (transform == priv->transform);
-
- if (priv->state == INFO_STATE_STOP) {
- info_document_disconnect (info);
+ /*
+ A transform might already be running. If so, leave it be!
+ Note, the other obvious approach would be to call
+ page_data_cancel on the existing transform and start
+ again. Besides the fact this is wasteful, it leaves a problem
+ with when to release xmldoc.
+ */
+ if (page_data->transform) return;
+
+ /*
+ There are two jobs to do. Firstly, we must parse the text in the
+ unprocessed section into xml. Secondly, it must be transformed
+ to xhtml.
+ */
+ UnprocessedSection *us = page_data->unprocessed;
+ g_assert (us);
+
+ page_data->xmldoc = yelp_info_parser_parse_section (us, NULL);
+ if (!page_data->xmldoc) {
+ error = g_error_new (YELP_ERROR, YELP_ERROR_PROCESSING,
+ _("Cannot parse page '%s'."),
+ page_data->page_id);
+ yelp_document_error_pending ((YelpDocument *) page_data->info, error);
return;
}
- error = yelp_transform_get_error (transform);
- yelp_document_error_pending ((YelpDocument *) info, error);
- g_error_free (error);
+ page_data->transform = yelp_transform_new (STYLESHEET);
+ page_data->chunk_ready =
+ g_signal_connect (page_data->transform, "chunk-ready",
+ (GCallback) transform_chunk_ready,
+ page_data);
+ page_data->finished =
+ g_signal_connect (page_data->transform, "finished",
+ (GCallback) transform_finished,
+ page_data);
+ page_data->error =
+ g_signal_connect (page_data->transform, "error",
+ (GCallback) transform_error,
+ page_data);
+
+ params =
+ yelp_settings_get_all_params (yelp_settings_get_default (),
+ 0, ¶ms_i);
+
+ /* transform_finalized gets called when we destroy transform and
+ * frees the xmldoc.
+ *
+ * We have to use a weak_ref since if we free the memory before
+ * that of transform, we might crash when YelpTransform frees its
+ * libxslt resources.
+ */
+ g_object_weak_ref ((GObject *) page_data->transform,
+ (GWeakNotify) transform_finalized,
+ page_data);
- info_document_disconnect (info);
+ yelp_transform_start (page_data->transform,
+ page_data->xmldoc,
+ NULL,
+ (const gchar * const *) params);
+
+ g_strfreev (params);
}
+/******************************************************************************/
+/** InfoPageData **************************************************************/
+
static void
-transform_finalized (YelpInfoDocument *info,
- gpointer transform)
+info_page_data_cancel (InfoPageData *page_data)
{
- YelpInfoDocumentPrivate *priv = GET_PRIV (info);
-
- if (priv->xmldoc)
- xmlFreeDoc (priv->xmldoc);
- priv->xmldoc = NULL;
-}
+ debug_print (DB_FUNCTION, "entering\n");
+ if (!page_data->transform) return;
+
+ if (page_data->chunk_ready) {
+ g_signal_handler_disconnect (page_data->transform, page_data->chunk_ready);
+ page_data->chunk_ready = 0;
+ }
+ if (page_data->finished) {
+ g_signal_handler_disconnect (page_data->transform, page_data->finished);
+ page_data->finished = 0;
+ }
+ if (page_data->error) {
+ g_signal_handler_disconnect (page_data->transform, page_data->error);
+ page_data->error = 0;
+ }
+ yelp_transform_cancel (page_data->transform);
+ g_object_unref (page_data->transform);
+ page_data->transform = NULL;
+}
/******************************************************************************/
-/** Threaded ******************************************************************/
+/** YelpTransform *************************************************************/
static void
-info_document_process (YelpInfoDocument *info)
+transform_chunk_ready (YelpTransform *transform,
+ gchar *chunk_id,
+ InfoPageData *page_data)
{
- YelpInfoDocumentPrivate *priv = GET_PRIV (info);
- GFile *file = NULL;
- gchar *filepath = NULL;
- GError *error;
- gint params_i = 0;
- gchar **params = NULL;
+ YelpInfoDocumentPrivate *priv = GET_PRIV (page_data->info);
+ gchar *content;
- file = yelp_uri_get_file (priv->uri);
- if (file == NULL) {
- error = g_error_new (YELP_ERROR, YELP_ERROR_NOT_FOUND,
- _("The file does not exist."));
- yelp_document_error_pending ((YelpDocument *) info, error);
- g_error_free (error);
- goto done;
- }
+ g_assert (page_data && page_data->info &&
+ YELP_IS_INFO_DOCUMENT (page_data->info));
+ g_assert (transform == page_data->transform);
- filepath = g_file_get_path (file);
- g_object_unref (file);
- if (!g_file_test (filepath, G_FILE_TEST_IS_REGULAR)) {
- error = g_error_new (YELP_ERROR, YELP_ERROR_NOT_FOUND,
- _("The file ‘%s’ does not exist."),
- filepath);
- yelp_document_error_pending ((YelpDocument *) info, error);
- g_error_free (error);
- goto done;
+ if (priv->state == INFO_STATE_STOP) {
+ info_page_data_cancel (page_data);
+ return;
}
- priv->sections = (GtkTreeModel *) yelp_info_parser_parse_file (filepath);
- gtk_tree_model_foreach (priv->sections,
- (GtkTreeModelForeachFunc) info_sections_visit,
- info);
- priv->xmldoc = yelp_info_parser_parse_tree ((GtkTreeStore *) priv->sections);
+ content = yelp_transform_take_chunk (transform, chunk_id);
- if (priv->xmldoc == NULL) {
- error = g_error_new (YELP_ERROR, YELP_ERROR_PROCESSING,
- _("The file ‘%s’ could not be parsed because it is"
- " not a well-formed info page."),
- filepath);
- yelp_document_error_pending ((YelpDocument *) info, error);
- goto done;
- }
+ yelp_document_give_contents (YELP_DOCUMENT (page_data->info),
+ chunk_id,
+ content,
+ "application/xhtml+xml");
- g_mutex_lock (priv->mutex);
- if (priv->state == INFO_STATE_STOP) {
- g_mutex_unlock (priv->mutex);
- goto done;
- }
+ yelp_document_signal (YELP_DOCUMENT (page_data->info),
+ chunk_id,
+ YELP_DOCUMENT_SIGNAL_CONTENTS,
+ NULL);
+}
- priv->transform = yelp_transform_new (STYLESHEET);
- priv->chunk_ready =
- g_signal_connect (priv->transform, "chunk-ready",
- (GCallback) transform_chunk_ready,
- info);
- priv->finished =
- g_signal_connect (priv->transform, "finished",
- (GCallback) transform_finished,
- info);
- priv->error =
- g_signal_connect (priv->transform, "error",
- (GCallback) transform_error,
- info);
+static void
+transform_finished (YelpTransform *transform,
+ InfoPageData *page_data)
+{
+ YelpInfoDocumentPrivate *priv;
- params = yelp_settings_get_all_params (yelp_settings_get_default (), 0, ¶ms_i);
+ debug_print (DB_FUNCTION, "entering\n");
- priv->transform_running = TRUE;
- yelp_transform_start (priv->transform,
- priv->xmldoc,
- NULL,
- (const gchar * const *) params);
- g_strfreev (params);
- g_mutex_unlock (priv->mutex);
+ g_assert (page_data && page_data->info &&
+ YELP_IS_INFO_DOCUMENT (page_data->info));
+ g_assert (transform == page_data->transform);
- done:
- g_free (filepath);
- priv->process_running = FALSE;
- g_object_unref (info);
+ priv = GET_PRIV (page_data->info);
+
+ if (priv->state == INFO_STATE_STOP) {
+ info_page_data_cancel (page_data);
+ return;
+ }
+
+ info_page_data_cancel (page_data);
+ transform_finalized (page_data, transform);
}
-static gboolean
-info_sections_visit (GtkTreeModel *model,
- GtkTreePath *path,
- GtkTreeIter *iter,
- YelpInfoDocument *info)
+static void
+transform_error (YelpTransform *transform,
+ InfoPageData *page_data)
{
- YelpInfoDocumentPrivate *priv = GET_PRIV (info);
- gchar *page_id, *title;
-
- gtk_tree_model_get (model, iter,
- INFO_PARSER_COLUMN_PAGE_NO, &page_id,
- INFO_PARSER_COLUMN_PAGE_NAME, &title,
- -1);
- yelp_document_set_page_id ((YelpDocument *) info, page_id, page_id);
- yelp_document_set_page_title ((YelpDocument *) info, page_id, title);
-
- if (priv->root_id == NULL) {
- priv->root_id = g_strdup (page_id);
- yelp_document_set_page_id ((YelpDocument *) info, NULL, page_id);
- }
- yelp_document_set_root_id ((YelpDocument *) info, page_id, priv->root_id);
+ YelpInfoDocumentPrivate *priv;
+ GError *error;
+
+ g_assert (page_data != NULL && page_data->info != NULL &&
+ YELP_IS_INFO_DOCUMENT (page_data->info));
+ g_assert (transform == page_data->transform);
- if (priv->visit_prev_id != NULL) {
- yelp_document_set_prev_id ((YelpDocument *) info, page_id, priv->visit_prev_id);
- yelp_document_set_next_id ((YelpDocument *) info, priv->visit_prev_id, page_id);
- g_free (priv->visit_prev_id);
+ priv = GET_PRIV (page_data->info);
+
+ if (priv->state == INFO_STATE_STOP) {
+ info_page_data_cancel (page_data);
+ return;
}
- priv->visit_prev_id = page_id;
- g_free (title);
- return FALSE;
+
+ error = yelp_transform_get_error (transform);
+ yelp_document_error_pending (YELP_DOCUMENT (page_data->info),
+ error);
+ g_error_free (error);
+
+ info_page_data_cancel (page_data);
}
static void
-info_document_disconnect (YelpInfoDocument *info)
+transform_finalized (InfoPageData *page_data,
+ gpointer transform)
{
- YelpInfoDocumentPrivate *priv = GET_PRIV (info);
- if (priv->chunk_ready) {
- g_signal_handler_disconnect (priv->transform, priv->chunk_ready);
- priv->chunk_ready = 0;
- }
- if (priv->finished) {
- g_signal_handler_disconnect (priv->transform, priv->finished);
- priv->finished = 0;
- }
- if (priv->error) {
- g_signal_handler_disconnect (priv->transform, priv->error);
- priv->error = 0;
- }
- yelp_transform_cancel (priv->transform);
- g_object_unref (priv->transform);
- priv->transform = NULL;
- priv->transform_running = FALSE;
+ if (page_data->xmldoc) xmlFreeDoc (page_data->xmldoc);
+ page_data->xmldoc = NULL;
}
diff --git a/libyelp/yelp-info-parser.c b/libyelp/yelp-info-parser.c
index 5ecdc5a..33bbe44 100644
--- a/libyelp/yelp-info-parser.c
+++ b/libyelp/yelp-info-parser.c
@@ -32,29 +32,89 @@
#include "yelp-magic-decompressor.h"
#include "yelp-debug.h"
+static void yelp_info_parse_menu (xmlNodePtr root,
+ gchar *page_content,
+ gboolean notes);
+static gboolean get_menuoptions (gchar *line, gchar **title,
+ gchar **ref, gchar **desc,
+ gchar **xref);
+static void info_process_text_notes (xmlNodePtr node, gchar *content);
-GtkTreeIter * find_real_top (GtkTreeModel *model,
- GtkTreeIter *it);
-GtkTreeIter * find_real_sibling (GtkTreeModel *model,
- GtkTreeIter *it,
- GtkTreeIter *comp);
-xmlNodePtr yelp_info_parse_menu (GtkTreeStore *tree,
- xmlNodePtr *node,
- gchar *page_content,
- gboolean notes);
-gboolean get_menuoptions (gchar *line,
- gchar **title,
- gchar **ref,
- gchar **desc,
- gchar **xref);
-gboolean resolve_frag_id (GtkTreeModel *model,
- GtkTreePath *path,
- GtkTreeIter *iter,
- gpointer data);
-void info_process_text_notes (xmlNodePtr *node,
- gchar *content,
- GtkTreeStore
- *tree);
+/*
+ This structure is used to represent links. name is the name which
+ should appear in the text (with spaces etc.) and link is the mangled
+ name which is used as an id.
+ */
+typedef struct _XRef XRef;
+struct _XRef {
+ gchar *name, *id;
+};
+
+/*
+ This is created in the first pass through the info file.
+
+ contents is the text content of the section, without any
+ processing. The XRefs are so that we can build next/prev links and
+ the upward linktrail.
+ */
+struct _UnprocessedSection {
+ gchar *contents;
+ XRef *node, *up, *prev, *next;
+ GSList *ancestry;
+};
+
+static XRef*
+xref_new ()
+{
+ return g_new0 (XRef, 1);
+}
+
+static void
+xref_free (XRef *xref)
+{
+ g_free (xref->name);
+ g_free (xref->id);
+ g_free (xref);
+}
+
+static UnprocessedSection*
+unprocessed_section_new ()
+{
+ UnprocessedSection *ret = g_new0 (UnprocessedSection, 1);
+ ret->node = xref_new ();
+ ret->up = xref_new ();
+ ret->prev = xref_new ();
+ ret->next = xref_new ();
+ return ret;
+}
+
+void
+unprocessed_section_free (UnprocessedSection *us)
+{
+ if (!us) return;
+
+ g_free (us->contents);
+ xref_free (us->node);
+ xref_free (us->up);
+ xref_free (us->prev);
+ xref_free (us->next);
+ g_slist_free_full (us->ancestry, g_free);
+ g_free (us);
+}
+
+void
+register_section (YelpDocument *doc, const UnprocessedSection *us)
+{
+ g_assert (us || us->node->id);
+
+ yelp_document_set_page_id (doc, us->node->id, us->node->id);
+ if (us->next->id)
+ yelp_document_set_next_id (doc, us->node->id, us->next->id);
+ if (us->prev->id)
+ yelp_document_set_prev_id (doc, us->node->id, us->prev->id);
+ if (us->up->id)
+ yelp_document_set_up_id (doc, us->node->id, us->up->id);
+}
/*
Used to output the correct <heading level="?" /> tag.
@@ -608,491 +668,214 @@ get_value_after (const char* source, const char *key)
return get_value_after_ext (source, key, ",", "\n\x7f");
}
-static int
-node2page (GHashTable *nodes2pages, char *node)
-{
- gint page;
-
- if (g_hash_table_lookup_extended (nodes2pages, node,
- NULL, (gpointer*) &page))
- return page;
-
- /* This shouldn't happen: we should only ever have to look up pages
- * that exist. */
- g_return_val_if_reached (0);
-}
+/*
+ Convert a page name into something that can go in a xref. (At the
+ moment, this is just cleaning spaces).
-static GtkTreeIter
-*node2iter (GHashTable *nodes2iters, char *node)
+ Operates in place but returns a pointer to the string (so chains
+ with g_strdup are easier).
+ */
+static gchar*
+name2id (gchar* str)
{
- GtkTreeIter *iter;
-
- iter = g_hash_table_lookup (nodes2iters, node);
- d (if (!iter) debug_print (DB_WARN, "Could not retrieve iter for node !%s!\n", node));
- return iter;
+ return g_strdelimit (str, " ", '_');
}
-GtkTreeIter
-*find_real_top (GtkTreeModel *model, GtkTreeIter *it)
+/*
+ process_page is responsible for understanding enough of page_text to
+ make an UnprocessedSection structure out of it and insert it
+ correctly into usections.
+ */
+static void
+process_page (GHashTable *usections, char *page_text)
{
- GtkTreeIter *r = NULL;
- GtkTreeIter *tmp = NULL;
-
- if (!it)
- return NULL;
-
- r = gtk_tree_iter_copy (it);
- tmp = g_malloc0 (sizeof (GtkTreeIter));
- while (gtk_tree_model_iter_parent (model, tmp, r)) {
- gtk_tree_iter_free (r);
- r = gtk_tree_iter_copy (tmp);
- }
- g_free (tmp);
-
- return r;
-}
+ char **parts;
+ UnprocessedSection *section = unprocessed_section_new ();
-GtkTreeIter * find_real_sibling (GtkTreeModel *model,
- GtkTreeIter *it, GtkTreeIter *comp)
-{
- GtkTreeIter *r;
- GtkTreeIter *tmp = NULL;
- gboolean result = FALSE;
- gchar *title;
- gchar *reftitle;
+ /* Split out first line from text and find prev/next/up links. */
+ parts = g_strsplit (page_text, "\n", 3);
- if (!it) {
- return NULL;
+ section->node->name = get_value_after (parts[0], "Node: ");
+ if (!section->node->name) {
+ /* Section doesn't have a name, so nothing can link to it. */
+ goto cleanup;
}
+ section->node->id = name2id (g_strdup (section->node->name));
- r = gtk_tree_iter_copy (it);
- tmp = gtk_tree_iter_copy (it);
-
- reftitle = gtk_tree_model_get_string_from_iter (model, comp);
-
- result = gtk_tree_model_iter_parent (model, r, it);
- if (!result)
- return it;
-
- title = gtk_tree_model_get_string_from_iter (model, r);
-
- while (!g_str_equal (title, reftitle) && result) {
- gtk_tree_iter_free (tmp);
- tmp = gtk_tree_iter_copy (r);
- result = gtk_tree_model_iter_parent (model, r, tmp);
- if (result)
- title = gtk_tree_model_get_string_from_iter (model, r);
+ /*
+ Don't look for the parent if we're already at Top (you'll get
+ '(dir)', but that doesn't appear in *this* document)
+ */
+ if (strcmp (section->node->id, "Top"))
+ section->up->name = get_value_after (parts[0], "Up: ");
+
+ section->prev->name = get_value_after (parts[0], "Prev: ");
+ section->next->name = get_value_after (parts[0], "Next: ");
+
+ if (section->up->name)
+ section->up->id = name2id (g_strdup (section->up->name));
+ if (section->prev->name)
+ section->prev->id = name2id (g_strdup (section->prev->name));
+ if (section->next->name)
+ section->next->id = name2id (g_strdup (section->next->name));
+
+ if (section->next->id && g_str_equal (section->next->id, "Top")) {
+ g_free (section->next->id);
+ section->next->id = NULL;
}
- if (!g_str_equal (title, reftitle))
- {
- gtk_tree_iter_free (tmp);
- tmp = NULL;
- }
-
- gtk_tree_iter_free (r);
- g_free (title);
- g_free (reftitle);
- return tmp;
-
-}
-
-static void
-process_page (GtkTreeStore *tree,
- GHashTable *nodes2pages, GHashTable *nodes2iters,
- int *processed_table, char **page_list, char *page_text)
-{
- GtkTreeIter *iter;
-
- char **parts;
- char *node;
- char *up;
- char *prev;
- char *next;
- gchar *tmp;
-
- int page;
-
- /* split out the header line and the text */
- parts = g_strsplit (page_text, "\n", 3);
-
- node = get_value_after (parts[0], "Node: ");
- up = get_value_after (parts[0], "Up: ");
- prev = get_value_after (parts[0], "Prev: ");
- next = get_value_after (parts[0], "Next: ");
-
- if (next && g_str_equal (next, "Top")) {
- g_free (next);
- next = NULL;
- }
- if (g_str_equal (node, "Top") && prev != NULL) {
- g_free (prev);
- prev = NULL;
- }
-
- /* check to see if this page has been processed already */
- page = node2page (nodes2pages, node);
- if (processed_table[page]) {
- return;
- }
- processed_table[page] = 1;
-
- debug_print (DB_DEBUG, "-- Processing Page %s\n\tParent: %s\n", node, up);
-
- iter = g_slice_alloc0 (sizeof (GtkTreeIter));
- /* check to see if we need to process our parent and siblings */
- if (up && g_ascii_strncasecmp (up, "(dir)", 5) && strcmp (up, "Top"))
- {
- page = node2page (nodes2pages, up);
- if (!processed_table[page])
- {
- debug_print (DB_DEBUG, "%% Processing Node %s\n", up);
- process_page (tree, nodes2pages,
- nodes2iters, processed_table, page_list,
- page_list[page]);
- }
- }
- if (prev && g_ascii_strncasecmp (prev, "(dir)", 5))
- {
- if (strncmp (node, "Top", 3)) {
- /* Special case the Top node to always appear first */
- } else {
- page = node2page (nodes2pages, prev);
- if (!processed_table[page])
- {
- debug_print (DB_DEBUG, "%% Processing Node %s\n", prev);
- process_page (tree, nodes2pages,
- nodes2iters, processed_table, page_list,
- page_list[page]);
- }
- }
- }
-
- /* by this point our parent and older sibling should be processed */
- if (!up || !g_ascii_strcasecmp (up, "(dir)"))
- {
- debug_print (DB_DEBUG, "\t> no parent\n");
- if (!prev || !g_ascii_strcasecmp (prev, "(dir)"))
- {
- debug_print (DB_DEBUG, "\t> no previous\n");
- gtk_tree_store_append (tree, iter, NULL);
- }
- else if (prev) {
- GtkTreeIter *real;
- real = find_real_top (GTK_TREE_MODEL (tree),
- node2iter (nodes2iters, prev));
- if (real) {
- gtk_tree_store_insert_after (tree, iter, NULL,
- real);
- gtk_tree_iter_free (real);
- }
- else
- gtk_tree_store_append (tree, iter, NULL);
- }
- }
- else if (!prev || !g_ascii_strcasecmp (prev, "(dir)") || !strcmp (prev, up))
- {
- debug_print (DB_DEBUG, "\t> no previous\n");
- gtk_tree_store_append (tree, iter,
- node2iter (nodes2iters, up));
- }
- else if (up && prev)
- {
- GtkTreeIter *upit = node2iter (nodes2iters, up);
- GtkTreeIter *previt = node2iter (nodes2iters, prev);
- GtkTreeIter *nit = NULL;
- debug_print (DB_DEBUG, "+++ Parent: %s Previous: %s\n", up, prev);
-
- d (if (upit) debug_print (DB_DEBUG, "++++ Have parent node!\n"));
- d (if (previt) debug_print (DB_DEBUG, "++++ Have previous node!\n"));
- nit = find_real_sibling (GTK_TREE_MODEL (tree), previt, upit);
- if (nit) {
- gtk_tree_store_insert_after (tree, iter,
- upit,
- nit);
- gtk_tree_iter_free (nit);
- }
- else
- gtk_tree_store_append (tree, iter, upit);
- }
- else
- {
- debug_print (DB_DEBUG, "# node %s was not put in tree\n", node);
- return;
- }
-
- d (if (iter) debug_print (DB_DEBUG, "Have a valid iter, storing for %s\n", node));
-
- g_hash_table_insert (nodes2iters, g_strdup (node), iter);
- debug_print (DB_DEBUG, "size: %i\n", g_hash_table_size (nodes2iters));
-
- /*tmp = g_strdup_printf ("%i",
- node2page (nodes2pages, node));*/
- tmp = g_strdup (node);
- tmp = g_strdelimit (tmp, " ", '_');
- gtk_tree_store_set (tree, iter,
- INFO_PARSER_COLUMN_PAGE_NO, tmp,
- INFO_PARSER_COLUMN_PAGE_NAME, node,
- INFO_PARSER_COLUMN_PAGE_CONTENT, parts[2],
- -1);
+ if (section->prev->id && g_str_equal (section->node->id, "Top")) {
+ g_free (section->prev->id);
+ section->prev->id = NULL;
+ }
- g_free (tmp);
- g_free (node);
- g_free (up);
- g_free (prev);
- g_free (next);
- g_strfreev (parts);
-}
+ /* Check to see if this page has been processed already */
+ if (g_hash_table_lookup (usections, section->node->id)) {
+ goto cleanup;
+ }
-struct TagTableFix {
- GHashTable *nodes2pages; /* Build this... */
- GHashTable *pages2nodes; /* ... using this. */
-};
+ /* Set pointers to NULL so that the free-fest in cleanup doesn't
+ free the memory. */
+ section->contents = parts[2];
+ parts[2] = NULL;
-static void
-use_offset2page (gpointer o, gpointer p, gpointer ud)
-{
- struct TagTableFix* ttf = (struct TagTableFix*)ud;
+ g_hash_table_insert (usections,
+ g_strdup (section->node->id), section);
+ section = NULL;
- const gchar* node = g_hash_table_lookup (ttf->pages2nodes, p);
- if (node) {
- g_hash_table_insert (ttf->nodes2pages, g_strdup (node), p);
- }
+ cleanup:
+ unprocessed_section_free (section);
+ g_strfreev (parts);
}
/*
- We had a nodes2offsets hash table, but sometimes these things
- lie. How terribly rude. Anyway, use offsets2pages and pages2nodes
- (and injectivity!) to construct the nodes2pages hash table.
-*/
-static GHashTable *
-make_nodes2pages (GHashTable* offsets2pages,
- GHashTable* pages2nodes)
+ Returns a list of XRef*'s for the parents of usection, with the most
+ senior first.
+ */
+static GSList*
+get_parent_list (const UnprocessedSection *usection,
+ GHashTable *usections)
{
- struct TagTableFix ttf;
-
- ttf.nodes2pages =
- g_hash_table_new_full (g_str_hash, g_str_equal, g_free, NULL);
- ttf.pages2nodes = pages2nodes;
+ GSList *ret = NULL;
- g_hash_table_foreach (offsets2pages, use_offset2page, &ttf);
+ while (usection->up->id && usection->up->name) {
+ ret = g_slist_prepend (ret, usection->up);
+ usection = g_hash_table_lookup (usections, usection->up->id);
+ if (!usection)
+ break;
+ }
- return ttf.nodes2pages;
+ return ret;
}
-/**
- * Parse file into a GtkTreeStore containing useful information that we can
- * later convert into a nice XML document or something else.
- */
-GtkTreeStore
-*yelp_info_parser_parse_file (char *file)
+GHashTable*
+yelp_info_parser_read_file (const gchar *file)
{
- gchar **page_list;
- char **ptr;
- int pages;
- int offset;
- GHashTable *offsets2pages = NULL;
- GHashTable *pages2nodes = NULL;
- GHashTable *nodes2pages = NULL;
- GHashTable *nodes2iters = NULL;
- int *processed_table;
- GtkTreeStore *tree;
- int pt;
-
- page_list = expanded_info_file (file);
- if (!page_list)
- return NULL;
-
- pages = 0;
- offset = 0;
-
- offsets2pages = g_hash_table_new_full (g_str_hash, g_str_equal, g_free,
- NULL);
- pages2nodes = g_hash_table_new_full (g_direct_hash, g_direct_equal, NULL,
- g_free);
-
- for (ptr = page_list; *ptr != NULL; ptr++)
- {
- gchar *name = NULL;
-
- g_hash_table_insert (offsets2pages,
- g_strdup_printf ("%i", offset),
- GINT_TO_POINTER (pages));
-
- name = get_value_after (*ptr, "Node: ");
- if (name)
- g_hash_table_insert (pages2nodes,
- GINT_TO_POINTER (pages), name);
-
- offset += strlen (*ptr);
- if (pages) offset += 2;
- pages++;
-
- pt = page_type (*ptr);
- if (pt == PAGE_INDIRECT) {
- g_warning ("Found an indirect page in a file "
- "we thought we'd expanded.");
- }
- }
-
- /* Now consolidate (and correct) the two hash tables */
- nodes2pages = make_nodes2pages (offsets2pages, pages2nodes);
-
- g_hash_table_destroy (offsets2pages);
- g_hash_table_destroy (pages2nodes);
+ gchar **page_list;
+ char **ptr;
+ GHashTable *usections;
- processed_table = g_malloc0 (pages * sizeof (int));
- tree = gtk_tree_store_new (INFO_PARSER_N_COLUMNS, G_TYPE_STRING, G_TYPE_STRING,
- G_TYPE_STRING);
- nodes2iters = g_hash_table_new_full (g_str_hash, g_str_equal, g_free,
- (GDestroyNotify) gtk_tree_iter_free);
+ page_list = expanded_info_file (file);
+ if (!page_list)
+ return NULL;
- pages = 0;
- for (ptr = page_list; *ptr != NULL; ptr++)
- {
- if (page_type (*ptr) != PAGE_NODE) continue;
- process_page (tree, nodes2pages, nodes2iters,
- processed_table, page_list, *ptr);
- }
+ /*
+ Don't provide destroy functions, because we want to grab the
+ contents in yelp-info-document.c and reuse them.
+ */
+ usections = g_hash_table_new (g_str_hash, g_str_equal);
- g_strfreev (page_list);
+ for (ptr = page_list; *ptr != NULL; ptr++) {
+ if (page_type (*ptr) == PAGE_NODE)
+ process_page (usections, *ptr);
+ }
- g_hash_table_destroy (nodes2iters);
- g_hash_table_destroy (nodes2pages);
+ g_strfreev (page_list);
- g_free (processed_table);
+ /*
+ Now we've read in each page, we can calculate a list of parents
+ for each page.
+ */
+ GHashTableIter iter;
+ const gchar *key;
+ UnprocessedSection *value;
+
+ g_hash_table_iter_init (&iter, usections);
+ while (g_hash_table_iter_next (&iter,
+ (const gpointer)&key,
+ (gpointer)&value)) {
+ value->ancestry = get_parent_list (value, usections);
+ }
- return tree;
+ return usections;
}
/* End Part 1 */
/* Part 2: Parse Tree into XML */
-static void
-parse_tree_level (GtkTreeStore *tree, xmlNodePtr *node, GtkTreeIter iter)
+
+xmlDocPtr
+yelp_info_parser_parse_section (const UnprocessedSection* usection,
+ GHashTable *usections)
{
- GtkTreeIter children, parent;
- xmlNodePtr newnode;
+ xmlDocPtr doc;
+ xmlNodePtr root, parents, parent;
+ gboolean notes = FALSE;
+ GSList *parent_list;
+ XRef *xref;
+
+ doc = xmlNewDoc (BAD_CAST "1.0");
+ root = xmlNewNode (NULL, BAD_CAST "Info");
+ xmlDocSetRootElement (doc, root);
+
+ if (strstr (usection->contents, "*Note") ||
+ strstr (usection->contents, "*note")) {
+ notes = TRUE;
+ }
- char *page_no = NULL;
- char *page_name = NULL;
- char *page_content = NULL;
- gboolean notes = FALSE;
+ if (strstr (usection->contents, "* Menu:")) {
+ yelp_info_parse_menu (root, usection->contents, notes);
+ }
+ else {
+ if (!notes) {
+ info_body_text (root, NULL, NULL, FALSE, usection->contents);
+ }
+ else {
+ info_process_text_notes (root, usection->contents);
+ }
+ }
- debug_print (DB_DEBUG, "Decended\n");
- do
- {
- gtk_tree_model_get (GTK_TREE_MODEL (tree), &iter,
- INFO_PARSER_COLUMN_PAGE_NO, &page_no,
- INFO_PARSER_COLUMN_PAGE_NAME, &page_name,
- INFO_PARSER_COLUMN_PAGE_CONTENT, &page_content,
- -1);
- debug_print (DB_DEBUG, "Got Section: %s\n", page_name);
- if (strstr (page_content, "*Note") ||
- strstr (page_content, "*note")) {
- notes = TRUE;
- }
- if (strstr (page_content, "* Menu:")) {
- newnode = yelp_info_parse_menu (tree, node, page_content, notes);
- } else {
- newnode = xmlNewTextChild (*node, NULL,
- BAD_CAST "Section",
- NULL);
- if (!notes)
- info_body_text (newnode, NULL, NULL, FALSE, page_content);
-
- else {
- /* Handle notes here */
- info_process_text_notes (&newnode, page_content, tree);
- }
- }
- /* if we free the page content, now it's in the XML, we can
- * save some memory */
- g_free (page_content);
- page_content = NULL;
-
- if (gtk_tree_model_iter_parent (GTK_TREE_MODEL (tree), &parent, &iter)) {
- gchar *parent_id;
- gtk_tree_model_get (GTK_TREE_MODEL (tree), &parent,
- INFO_PARSER_COLUMN_PAGE_NO, &parent_id,
- -1);
- xmlNewProp (newnode, BAD_CAST "up", BAD_CAST parent_id);
- g_free (parent_id);
- }
-
- xmlNewProp (newnode, BAD_CAST "id",
- BAD_CAST page_no);
- xmlNewProp (newnode, BAD_CAST "name",
- BAD_CAST page_name);
- if (gtk_tree_model_iter_children (GTK_TREE_MODEL (tree),
- &children,
- &iter))
- parse_tree_level (tree, &newnode, children);
- g_free (page_no);
- g_free (page_name);
- }
- while (gtk_tree_model_iter_next (GTK_TREE_MODEL (tree), &iter));
- debug_print (DB_DEBUG, "Ascending\n");
-}
+ if (usection->next->id && usection->next->name) {
+ xmlNewProp (root, BAD_CAST "next", BAD_CAST usection->next->id);
+ xmlNewProp (root, BAD_CAST "next-name", BAD_CAST usection->next->name);
+ }
+ if (usection->prev->id && usection->prev->name) {
+ xmlNewProp (root, BAD_CAST "prev", BAD_CAST usection->prev->id);
+ xmlNewProp (root, BAD_CAST "prev-name", BAD_CAST usection->prev->name);
+ }
-xmlDocPtr
-yelp_info_parser_parse_tree (GtkTreeStore *tree)
-{
- xmlDocPtr doc;
- xmlNodePtr node;
- GtkTreeIter iter;
-
- /*
- xmlChar *xmlbuf;
- int bufsiz;
- */
-
- doc = xmlNewDoc (BAD_CAST "1.0");
- node = xmlNewNode (NULL, BAD_CAST "Info");
- xmlDocSetRootElement (doc, node);
-
- /* functions I will want:
- gtk_tree_model_get_iter_first;
- gtk_tree_model_iter_next;
- gtk_tree_model_iter_children;
- */
-
- if (gtk_tree_model_get_iter_first (GTK_TREE_MODEL (tree), &iter))
- parse_tree_level (tree, &node, iter);
- d (else debug_print (DB_DEBUG, "Empty tree?\n"));
-
- /*
- xmlDocDumpFormatMemory (doc, &xmlbuf, &bufsiz, 1);
- g_print ("XML follows:\n%s\n", xmlbuf);
- */
-
- return doc;
-}
+ xmlNewProp (root, BAD_CAST "id", BAD_CAST usection->node->id);
+ xmlNewProp (root, BAD_CAST "name", BAD_CAST usection->node->name);
-gboolean
-resolve_frag_id (GtkTreeModel *model, GtkTreePath *path, GtkTreeIter *iter,
- gpointer data)
-{
- gchar *page_no = NULL;
- gchar *page_name = NULL;
- gchar **xref = data;
-
- gtk_tree_model_get (GTK_TREE_MODEL (model), iter,
- INFO_PARSER_COLUMN_PAGE_NO, &page_no,
- INFO_PARSER_COLUMN_PAGE_NAME, &page_name,
- -1);
- if (g_str_equal (page_name, *xref)) {
- g_free (*xref);
- *xref = g_strdup (page_name);
- *xref = g_strdelimit (*xref, " ", '_');
-
- g_free (page_name);
- g_free (page_no);
- return TRUE;
+ /*
+ Add
+ <parents><parent>id1</parent><parent>id2</parent></parents>
+ to allow us to make a linktrail in the xslt.
+ */
+
+ parent_list = usection->ancestry;
+
+ if (parent_list) {
+ parents = xmlNewChild (root, NULL, BAD_CAST "parents", NULL);
+ while (parent_list) {
+ xref = (XRef *)parent_list->data;
+ parent = xmlNewChild (parents, NULL, BAD_CAST "parent", NULL);
+ xmlNewProp (parent, BAD_CAST "id", BAD_CAST xref->id);
+ xmlNewProp (parent, BAD_CAST "name", BAD_CAST xref->name);
+ parent_list = g_slist_next (parent_list);
+ }
}
- g_free (page_name);
- g_free (page_no);
- return FALSE;
+ return doc;
}
gboolean
@@ -1155,26 +938,22 @@ first_non_space (gchar* str)
return str;
}
-xmlNodePtr
-yelp_info_parse_menu (GtkTreeStore *tree, xmlNodePtr *node,
+static void
+yelp_info_parse_menu (xmlNodePtr root,
gchar *page_content, gboolean notes)
{
gchar **split;
gchar **menuitems;
gchar *tmp = NULL;
- xmlNodePtr newnode, menu_node, mholder = NULL;
+ xmlNodePtr menu_node, mholder = NULL;
int i=0;
split = g_strsplit (page_content, "* Menu:", 2);
- newnode = xmlNewChild (*node, NULL,
- BAD_CAST "Section", NULL);
-
-
if (!notes)
- info_body_text (newnode, NULL, NULL, FALSE, split[0]);
+ info_body_text (root, NULL, NULL, FALSE, split[0]);
else {
- info_process_text_notes (&newnode, split[0], tree);
+ info_process_text_notes (root, split[0]);
}
menuitems = g_strsplit (split[1], "\n", -1);
@@ -1205,7 +984,7 @@ yelp_info_parse_menu (GtkTreeStore *tree, xmlNodePtr *node,
if (menuitems[0] != NULL) {
/* If there are any menu items, make the <menu> node */
- menu_node = xmlNewChild (newnode, NULL, BAD_CAST "menu", NULL);
+ menu_node = xmlNewChild (root, NULL, BAD_CAST "menu", NULL);
}
while (menuitems[i] != NULL) {
@@ -1227,8 +1006,8 @@ yelp_info_parse_menu (GtkTreeStore *tree, xmlNodePtr *node,
if (menu) {
mholder = xmlNewChild (menu_node, NULL, BAD_CAST "menuholder", NULL);
- gtk_tree_model_foreach (GTK_TREE_MODEL (tree), resolve_frag_id, &xref);
-
+ name2id (xref);
+
if (ref == NULL) { /* A standard type menu */
/* title+2 skips the "* ". We know we haven't jumped over the
end of the string because strlen (title) >= 3 */
@@ -1311,12 +1090,10 @@ yelp_info_parse_menu (GtkTreeStore *tree, xmlNodePtr *node,
}
g_strfreev (menuitems);
-
- return newnode;
}
void
-info_process_text_notes (xmlNodePtr *node, gchar *content, GtkTreeStore *tree)
+info_process_text_notes (xmlNodePtr node, gchar *content)
{
gchar **notes;
gchar **current;
@@ -1336,7 +1113,7 @@ info_process_text_notes (xmlNodePtr *node, gchar *content, GtkTreeStore *tree)
notes = g_regex_split_simple ("\\*[Nn]ote(?!_)", content, 0, 0);
for (current = notes; *current != NULL; current++) {
- gchar *url, **urls, **ulink;
+ gchar *url, **urls;
gchar *append;
gchar *alt_append, *alt_append1;
gchar *link_text;
@@ -1348,14 +1125,14 @@ info_process_text_notes (xmlNodePtr *node, gchar *content, GtkTreeStore *tree)
* start, so we can just add it and forget about it.
*/
first = FALSE;
- info_body_text (*node, ¶graph, NULL, TRUE, (*current));
+ info_body_text (node, ¶graph, NULL, TRUE, (*current));
continue;
}
/* If we got to here, we now gotta parse the note reference */
append = strchr (*current, ':');
if (!append) {
- info_body_text (*node, ¶graph, NULL, TRUE, *current);
+ info_body_text (node, ¶graph, NULL, TRUE, *current);
continue;
}
append++;
@@ -1370,7 +1147,7 @@ info_process_text_notes (xmlNodePtr *node, gchar *content, GtkTreeStore *tree)
}
alt_append1 = strchr (alt_append1, ',');
if (!append && !alt_append && !alt_append1) {
- info_body_text (*node, ¶graph, NULL, TRUE, *current);
+ info_body_text (node, ¶graph, NULL, TRUE, *current);
continue;
}
if (!append || alt_append || alt_append1) {
@@ -1416,12 +1193,13 @@ info_process_text_notes (xmlNodePtr *node, gchar *content, GtkTreeStore *tree)
/* Massive space. Fix. */
gchar *next = break_point;
gchar *url_copy;
+ gchar *old = url;
while (*next == ' ')
next++;
next--;
url_copy = g_strndup (url, break_point-url);
- g_free (url);
url = g_strconcat (url_copy, next, NULL);
+ g_free (old);
break_point = strchr (url, ' ');
g_free (url_copy);
} else {
@@ -1477,14 +1255,14 @@ info_process_text_notes (xmlNodePtr *node, gchar *content, GtkTreeStore *tree)
else
frag = g_strndup (url, tmp1 - url);
g_strstrip (frag);
- gtk_tree_model_foreach (GTK_TREE_MODEL (tree), resolve_frag_id, &frag);
+ name2id (frag);
href = g_strconcat ("xref:", frag, NULL);
g_free (frag);
}
/* Check we've got a valid paragraph node */
if (!paragraph) {
- paragraph = xmlNewChild (*node, NULL, BAD_CAST "para", NULL);
+ paragraph = xmlNewChild (node, NULL, BAD_CAST "para", NULL);
}
/*
@@ -1502,7 +1280,7 @@ info_process_text_notes (xmlNodePtr *node, gchar *content, GtkTreeStore *tree)
g_strfreev (urls);
/* Finally, we can add the following text as required */
- info_body_text (*node, ¶graph, NULL, TRUE, append);
+ info_body_text (node, ¶graph, NULL, TRUE, append);
g_free (url);
g_free (href);
diff --git a/libyelp/yelp-info-parser.h b/libyelp/yelp-info-parser.h
index d338a59..e659edc 100644
--- a/libyelp/yelp-info-parser.h
+++ b/libyelp/yelp-info-parser.h
@@ -26,6 +26,7 @@
#include <glib.h>
#include <gtk/gtk.h>
#include <libxml/tree.h>
+#include "yelp-document.h"
enum {
INFO_PARSER_COLUMN_PAGE_NO,
@@ -34,8 +35,37 @@ enum {
INFO_PARSER_N_COLUMNS
};
+/*
+ A structure for information about a section which has been read into
+ memory but not yet parsed to xml.
+ */
+typedef struct _UnprocessedSection UnprocessedSection;
+
+/*
+ Free the memory in the opaque UnprocessedSection structure.
+ */
+void unprocessed_section_free (UnprocessedSection *us);
+
+/*
+ Register a section with the document: calls
+ yelp_document_set_page_id and also registers up/next/prev links.
+ */
+void register_section (YelpDocument *doc,
+ const UnprocessedSection *us);
+
+/*
+ Read the given file into memory. Stores each (unparsed) section
+ keyed by page_id into a hash table. Returns NULL on failure.
-GtkTreeStore *yelp_info_parser_parse_file (char *file);
-xmlDocPtr yelp_info_parser_parse_tree (GtkTreeStore *tree);
+ The page_id and sections will not be freed on destroying the hash
+ table, so the caller must use g_free and unprocessed_section_free.
+ */
+GHashTable* yelp_info_parser_read_file (const gchar *file);
+
+/*
+ Parse a particular section to xml.
+ */
+xmlDocPtr yelp_info_parser_parse_section (const UnprocessedSection* usection,
+ GHashTable *usections);
#endif /* __YELP_INFO_PARSER_H__ */
diff --git a/stylesheets/info2html.xsl.in b/stylesheets/info2html.xsl.in
index a97b054..7f45cb5 100644
--- a/stylesheets/info2html.xsl.in
+++ b/stylesheets/info2html.xsl.in
@@ -12,37 +12,11 @@
<xsl:import href="@XSL_HTML@"/>
<xsl:include href="yelp-common.xsl"/>
-<xsl:template name="linktrails">
- <xsl:param name="up" select="@up"/>
- <xsl:variable name="upnode" select="/Info//Section[@id = $up]"/>
- <xsl:if test="$upnode/@up">
- <xsl:call-template name="linktrails">
- <xsl:with-param name="up" select="$upnode/@up"/>
- </xsl:call-template>
- </xsl:if>
- <a href="xref:{$upnode/@id}">
- <xsl:value-of select="$upnode/@name"/>
- </a>
- <xsl:text> » </xsl:text>
-</xsl:template>
-
-<xsl:template match="/">
- <xsl:for-each select="/Info/Section">
- <xsl:call-template name="html.output"/>
- </xsl:for-each>
-</xsl:template>
-
-<xsl:template mode="html.output.after.mode" match="Section">
- <xsl:for-each select="Section">
- <xsl:call-template name="html.output"/>
- </xsl:for-each>
-</xsl:template>
-
-<xsl:template mode="html.title.mode" match="Section">
+<xsl:template mode="html.title.mode" match="Info">
<xsl:value-of select="@name"/>
</xsl:template>
-<xsl:template mode="html.css.mode" match="Section">
+<xsl:template mode="html.css.mode" match="Info">
<xsl:param name="direction"/>
<xsl:param name="left"/>
<xsl:param name="right"/>
@@ -72,35 +46,41 @@ a.navbar-next::after {
</xsl:text>
</xsl:template>
-<xsl:template mode="html.header.mode" match="Section">
- <xsl:if test="@up">
+<xsl:template mode="html.header.mode" match="Info">
+ <xsl:if test="parents">
<div class="trails">
<div class="trail">
- <xsl:call-template name="linktrails"/>
+ <xsl:for-each select="parents//parent">
+ <a href="xref:{@id}">
+ <xsl:value-of select="@name"/>
+ </a>
+ <xsl:text> » </xsl:text>
+ </xsl:for-each>
</div>
</div>
</xsl:if>
</xsl:template>
-<xsl:template mode="html.body.mode" match="Section">
+<xsl:template mode="html.body.mode" match="Info">
+ <xsl:variable name="p" select="@prev-name"/>
+ <xsl:variable name="n" select="@next-name"/>
<div class="navbar">
- <xsl:variable name="preceding" select="(parent::Section[1] | preceding::Section[1])[last()]"/>
- <xsl:variable name="following" select="(Section[1] | following::Section[1])[1]"/>
- <xsl:if test="$preceding">
- <a class="navbar-prev" href="xref:{$preceding/@id}">
- <xsl:value-of select="$preceding/@name"/>
+ <xsl:if test="$p">
+ <a class="navbar-prev" href="xref:{@prev}">
+ <xsl:value-of select="@prev-name"/>
</a>
</xsl:if>
- <xsl:if test="$preceding and $following">
+ <xsl:if test="$p and $n">
<xsl:text>  |  </xsl:text>
</xsl:if>
- <xsl:if test="$following">
- <a class="navbar-next" href="xref:{$following/@id}">
- <xsl:value-of select="$following/@name"/>
+ <xsl:if test="$n">
+ <a class="navbar-next" href="xref:{@next}">
+ <xsl:value-of select="@next-name"/>
</a>
</xsl:if>
</div>
- <xsl:apply-templates select="node()[not(self::Section)]"/>
+
+ <xsl:apply-templates select="node()[not(self::Info)]"/>
</xsl:template>
--
1.7.4.4
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]