Delayed load for info documents



Here's a massively rewritten version of the previous lazy info parsing
code. It follows the design of the Mallard code for yelp-info-document.c
and I've spent some time trying to find bugs in the threading: hopefully
it's all correct!

Rupert

>From 4bec5e319c2985c170cd39d93f64b1794a7da8af Mon Sep 17 00:00:00 2001
From: Rupert Swarbrick <rswarbrick gmail com>
Date: Wed, 4 May 2011 18:22:04 +0100
Subject: [PATCH] Parse info documents one page at a time.

---
 libyelp/yelp-info-document.c |  664 +++++++++++++++++++++++--------------
 libyelp/yelp-info-parser.c   |  756 +++++++++++++++---------------------------
 libyelp/yelp-info-parser.h   |   34 ++-
 stylesheets/info2html.xsl.in |   64 ++---
 4 files changed, 732 insertions(+), 786 deletions(-)

diff --git a/libyelp/yelp-info-document.c b/libyelp/yelp-info-document.c
index 136b274..9c5d4d8 100644
--- a/libyelp/yelp-info-document.c
+++ b/libyelp/yelp-info-document.c
@@ -40,33 +40,54 @@
 #define STYLESHEET DATADIR"/yelp/xslt/info2html.xsl"
 
 typedef enum {
-    INFO_STATE_BLANK,   /* Brand new, run transform as needed */
-    INFO_STATE_PARSING, /* Parsing/transforming document, please wait */
-    INFO_STATE_PARSED,  /* All done, if we ain't got it, it ain't here */
-    INFO_STATE_STOP     /* Stop everything now, object to be disposed */
+    INFO_STATE_BLANK,
+    INFO_STATE_THINKING,
+    INFO_STATE_IDLE,
+    INFO_STATE_STOP
 } InfoState;
 
-typedef struct _YelpInfoDocumentPrivate  YelpInfoDocumentPrivate;
-struct _YelpInfoDocumentPrivate {
-    YelpUri       *uri;
-    InfoState    state;
+/*
+  Info documents are prepared in two stages: reading and
+  parsing/transforming.
+
+  When the document is first created, we read in the file, calculate
+  offsets and generate a hash table of InfoPageData structures by
+  pageid.
 
-    GMutex     *mutex;
-    GThread    *thread;
+  When a page is requested, we then parse it properly and then run an
+  xslt transform.
+ */
 
-    xmlDocPtr   xmldoc;
-    GtkTreeModel  *sections;
+/*
+  InfoPageData stores the unparsed text until transforming, along with
+  links to neighbouring nodes.
+ */
+typedef struct _InfoPageData InfoPageData;
+struct _InfoPageData {
+    YelpInfoDocument *info;
+    gchar            *page_id;
 
-    gboolean    process_running;
-    gboolean    transform_running;
+    UnprocessedSection* unprocessed;
 
-    YelpTransform *transform;
-    guint          chunk_ready;
-    guint          finished;
-    guint          error;
+    YelpTransform    *transform;
+    xmlDocPtr         xmldoc;
 
-    gchar   *root_id;
-    gchar   *visit_prev_id;
+    guint             chunk_ready;
+    guint             finished;
+    guint             error;
+};
+
+typedef struct _YelpInfoDocumentPrivate YelpInfoDocumentPrivate;
+struct _YelpInfoDocumentPrivate {
+    YelpUri     *uri;
+    InfoState    state;
+
+    GMutex      *mutex;
+    GThread     *thread;
+    gboolean     running;
+
+    GSList      *pending;
+    GHashTable  *pages;
 };
 
 
@@ -82,24 +103,30 @@ static gboolean       info_request_page                   (YelpDocument
                                                            YelpDocumentCallback  callback,
                                                            gpointer              user_data);
 
-/* YelpTransform */
-static void           transform_chunk_ready     (YelpTransform        *transform,
-                                                 gchar                *chunk_id,
-                                                 YelpInfoDocument     *info);
-static void           transform_finished        (YelpTransform        *transform,
-                                                 YelpInfoDocument     *info);
-static void           transform_error           (YelpTransform        *transform,
-                                                 YelpInfoDocument     *info);
-static void           transform_finalized       (YelpInfoDocument     *info,
-                                                 gpointer              transform);
-
-static void           info_document_process     (YelpInfoDocument     *info);
-static gboolean       info_sections_visit       (GtkTreeModel         *model,
-                                                 GtkTreePath          *path,
-                                                 GtkTreeIter          *iter,
-                                                 YelpInfoDocument     *info);
-static void           info_document_disconnect  (YelpInfoDocument     *info);
+static void read_document            (YelpInfoDocument *info);
+static void do_pending_pages         (YelpInfoDocument *info);
 
+static void try_transform_page       (YelpInfoDocument *info,
+                                      const gchar      *page_id);
+static void transform_page           (InfoPageData     *page_data);
+
+/* YelpTransform */
+static void transform_chunk_ready    (YelpTransform    *transform,
+                                      gchar            *chunk_id,
+                                      InfoPageData     *page_data);
+static void transform_finished       (YelpTransform    *transform,
+                                      InfoPageData     *page_data);
+static void transform_error          (YelpTransform    *transform,
+                                      InfoPageData     *page_data);
+static void transform_finalized      (InfoPageData     *page_data,
+                                      gpointer          transform);
+
+/* InfoPageData */
+static InfoPageData* info_page_data_new    (YelpInfoDocument     *doc,
+                                            gchar                *page_id,
+                                            UnprocessedSection   *us);
+static void          info_page_data_free   (InfoPageData         *pd);
+static void          info_page_data_cancel (InfoPageData         *page_data);
 
 G_DEFINE_TYPE (YelpInfoDocument, yelp_info_document, YELP_TYPE_DOCUMENT);
 #define GET_PRIV(object) (G_TYPE_INSTANCE_GET_PRIVATE ((object), YELP_TYPE_INFO_DOCUMENT, YelpInfoDocumentPrivate))
@@ -114,6 +141,7 @@ yelp_info_document_class_init (YelpInfoDocumentClass *klass)
     object_class->finalize = yelp_info_document_finalize;
 
     document_class->request_page = info_request_page;
+    // TODO: Should there also be an indexing step?
 
     g_type_class_add_private (klass, sizeof (YelpInfoDocumentPrivate));
 }
@@ -123,9 +151,19 @@ yelp_info_document_init (YelpInfoDocument *info)
 {
     YelpInfoDocumentPrivate *priv = GET_PRIV (info);
 
+    priv->uri = NULL;
     priv->state = INFO_STATE_BLANK;
-    priv->xmldoc = NULL;
+
     priv->mutex = g_mutex_new ();
+    priv->thread = NULL;
+    priv->running = FALSE;
+
+    priv->pending = NULL;
+
+    priv->pages =
+        g_hash_table_new_full (g_str_hash, g_str_equal,
+                               g_free,
+                               (GDestroyNotify)info_page_data_free);
 }
 
 static void
@@ -138,14 +176,21 @@ yelp_info_document_dispose (GObject *object)
         priv->uri = NULL;
     }
 
-    if (priv->sections) {
-        g_object_unref (priv->sections);
-        priv->sections = NULL;
+    if (priv->mutex) {
+        g_mutex_free (priv->mutex);
+        priv->mutex = NULL;
+    }
+
+    if (priv->thread) {
+        g_free (priv->thread);
+        priv->thread = NULL;
     }
 
-    if (priv->transform) {
-        g_object_unref (priv->transform);
-        priv->transform = NULL;
+    g_slist_free_full (priv->pending, g_free);
+
+    if (priv->pages) {
+        g_hash_table_destroy (priv->pages);
+        priv->pages = NULL;
     }
 
     G_OBJECT_CLASS (yelp_info_document_parent_class)->dispose (object);
@@ -154,17 +199,30 @@ yelp_info_document_dispose (GObject *object)
 static void
 yelp_info_document_finalize (GObject *object)
 {
-    YelpInfoDocumentPrivate *priv = GET_PRIV (object);
+    G_OBJECT_CLASS (yelp_info_document_parent_class)->finalize (object);
+}
 
-    if (priv->xmldoc)
-        xmlFreeDoc (priv->xmldoc);
+static InfoPageData*
+info_page_data_new (YelpInfoDocument *doc,
+                    gchar *page_id,
+                    UnprocessedSection *us)
+{
+    InfoPageData *ipd = g_new0 (InfoPageData, 1);
+    ipd->info = doc;
+    ipd->page_id = page_id;
+    ipd->unprocessed = us;
 
-    g_free (priv->root_id);
-    g_free (priv->visit_prev_id);
+    return ipd;
+}
 
-    g_mutex_free (priv->mutex);
+static void
+info_page_data_free (InfoPageData *pd)
+{
+    g_free (pd->page_id);
+    unprocessed_section_free (pd->unprocessed);
+    g_object_unref (pd->transform);
 
-    G_OBJECT_CLASS (yelp_info_document_parent_class)->finalize (object);
+    g_free (pd);
 }
 
 /******************************************************************************/
@@ -184,16 +242,14 @@ yelp_info_document_new (YelpUri *uri)
                                               NULL);
     g_free (doc_uri);
     priv = GET_PRIV (info);
-
     priv->uri = g_object_ref (uri);
 
+    yelp_document_set_page_id ((YelpDocument *) info, NULL, "Top");
+    yelp_document_set_page_id ((YelpDocument *) info, "Top", "Top");
+
     return (YelpDocument *) info;
 }
 
-
-/******************************************************************************/
-/** YelpDocument **************************************************************/
-
 static gboolean
 info_request_page (YelpDocument         *document,
                    const gchar          *page_id,
@@ -206,32 +262,40 @@ info_request_page (YelpDocument         *document,
     GError *error;
     gboolean handled;
 
-    if (page_id == NULL)
-        page_id = priv->root_id;
+    debug_print (DB_FUNCTION, "entering (id = %s)\n", page_id);
+
+    if (page_id == NULL) page_id = "Top";
 
     handled =
-        YELP_DOCUMENT_CLASS (yelp_info_document_parent_class)->request_page (document,
-                                                                             page_id,
-                                                                             cancellable,
-                                                                             callback,
-                                                                             user_data);
+        YELP_DOCUMENT_CLASS (yelp_info_document_parent_class)->
+        request_page (document, page_id, cancellable,
+                      callback, user_data);
+
     if (handled) {
         return TRUE;
     }
 
     g_mutex_lock (priv->mutex);
 
-    switch (priv->state) {
-    case INFO_STATE_BLANK:
-	priv->state = INFO_STATE_PARSING;
-	priv->process_running = TRUE;
+    if (priv->state == INFO_STATE_BLANK) {
+        priv->state = INFO_STATE_THINKING;
+        priv->running = TRUE;
         g_object_ref (document);
-	priv->thread = g_thread_create ((GThreadFunc) info_document_process,
+        priv->thread = g_thread_create ((GThreadFunc) read_document,
                                         document, FALSE, NULL);
+    }
+
+    switch (priv->state) {
+    case INFO_STATE_THINKING:
+        priv->pending =
+            g_slist_prepend (priv->pending,
+                             (gpointer) g_strdup (page_id));
 	break;
-    case INFO_STATE_PARSING:
-	break;
-    case INFO_STATE_PARSED:
+    case INFO_STATE_IDLE:
+        try_transform_page (YELP_INFO_DOCUMENT (document), page_id);
+        break;
+
+    case INFO_STATE_BLANK:
     case INFO_STATE_STOP:
         docuri = yelp_uri_get_document_uri (priv->uri);
         error = g_error_new (YELP_ERROR, YELP_ERROR_NOT_FOUND,
@@ -246,247 +310,341 @@ info_request_page (YelpDocument         *document,
     }
 
     g_mutex_unlock (priv->mutex);
-    return TRUE;
-}
-
 
-/******************************************************************************/
-/** YelpTransform *************************************************************/
+    return FALSE;
+}
 
+/*
+  read_document is responsible for reading in all the pages
+  (unparsed), and populating the hash table with InfoPageData
+  structures.
+ */
 static void
-transform_chunk_ready (YelpTransform    *transform,
-                       gchar            *chunk_id,
-                       YelpInfoDocument *info)
+read_document (YelpInfoDocument *info)
 {
     YelpInfoDocumentPrivate *priv = GET_PRIV (info);
-    gchar *content;
+    GFile *file = NULL;
+    gchar *filepath = NULL;
+    GError *error;
 
-    g_assert (transform == priv->transform);
+    file = yelp_uri_get_file (priv->uri);
+    if (file == NULL) {
+        gchar *uri = yelp_uri_get_canonical_uri (priv->uri);
+        error = g_error_new (YELP_ERROR, YELP_ERROR_NOT_FOUND,
+                             _("The file for uri '%s' does not exist."),
+                             uri);
+        yelp_document_error_pending ((YelpDocument *) info, error);
+        g_free (uri);
+        g_error_free (error);
+        goto done;
+    }
 
-    if (priv->state == INFO_STATE_STOP) {
-        info_document_disconnect (info);
-        return;
+    filepath = g_file_get_path (file);
+    g_object_unref (file);
+    if (!g_file_test (filepath, G_FILE_TEST_IS_REGULAR)) {
+        error = g_error_new (YELP_ERROR, YELP_ERROR_NOT_FOUND,
+                             _("The file '%s' does not exist."),
+                             filepath);
+        yelp_document_error_pending ((YelpDocument *) info, error);
+        g_error_free (error);
+        goto done;
     }
 
-    content = yelp_transform_take_chunk (transform, chunk_id);
-    yelp_document_give_contents (YELP_DOCUMENT (info),
-                                 chunk_id,
-                                 content,
-                                 "application/xhtml+xml");
+    GHashTable *unparsed_sections;
 
-    yelp_document_signal (YELP_DOCUMENT (info),
-                          chunk_id,
-                          YELP_DOCUMENT_SIGNAL_INFO,
-                          NULL);
-    yelp_document_signal (YELP_DOCUMENT (info),
-                          chunk_id,
-                          YELP_DOCUMENT_SIGNAL_CONTENTS,
-                          NULL);
+    unparsed_sections = yelp_info_parser_read_file (filepath);
+    if (!unparsed_sections) {
+        error = g_error_new (YELP_ERROR, YELP_ERROR_PROCESSING,
+                             _("The file '%s' could not be parsed as "
+                               "an info document."),
+                             filepath);
+        yelp_document_error_pending ((YelpDocument *) info, error);
+        goto done;
+    }
+
+    /*
+      unparsed_sections is a hash of pageid -> OpaqueStruct, and we
+      need to wrap these structs up in InfoPageData structures, then
+      stick the result in priv->pages.
+    */
+    GHashTableIter iter;
+    gchar* page_id;
+    UnprocessedSection* section;
+
+    g_mutex_lock (priv->mutex);
+    g_hash_table_iter_init (&iter, unparsed_sections);
+    while (g_hash_table_iter_next (&iter,
+                                   (gpointer*)&page_id,
+                                   (gpointer*)&section)) {
+        /*
+          info_page_data_new takes ownership of the memory of page_id
+          and section
+        */
+        InfoPageData *ipd = info_page_data_new (info,
+                                                page_id, section);
+
+        register_section (YELP_DOCUMENT (info), section);
+
+        g_hash_table_insert (priv->pages, g_strdup(page_id), ipd);
+    }
+    g_hash_table_destroy (unparsed_sections);
+    priv->state = INFO_STATE_IDLE;
+    g_mutex_unlock (priv->mutex);
+
+    /*
+      Finally, we can deal with whatever backlog of pages is waiting.
+     */
+    do_pending_pages (info);
+
+done:
+    g_free (filepath);
+    priv->running = FALSE;
+    g_object_unref (info);
 }
 
+/*
+  This assumes that the current state is INFO_STATE_IDLE.
+ */
 static void
-transform_finished (YelpTransform    *transform,
-                    YelpInfoDocument *info)
+do_pending_pages (YelpInfoDocument *info)
 {
     YelpInfoDocumentPrivate *priv = GET_PRIV (info);
-    gchar *docuri;
+
+    g_mutex_lock (priv->mutex);
+    while (priv->pending) {
+        gchar *page_id = (gchar *) priv->pending->data;
+        try_transform_page (info, page_id);
+        g_free (page_id);
+        priv->pending = g_slist_delete_link (priv->pending, priv->pending);
+    }
+    g_mutex_unlock (priv->mutex);
+}
+
+/*
+  Should be called with the mutex held. Starts a transform thread for
+  the given page id
+*/
+static void
+try_transform_page (YelpInfoDocument *info, const gchar *page_id)
+{
+    YelpInfoDocumentPrivate *priv = GET_PRIV (info);
+    InfoPageData *page_data = NULL;
+    gchar *real_id = NULL;
     GError *error;
 
-    g_assert (transform == priv->transform);
+    debug_print (DB_FUNCTION, "entering\n");
 
-    if (priv->state == INFO_STATE_STOP) {
-        info_document_disconnect (info);
-        return;
+    if (page_id)
+        real_id = yelp_document_get_page_id (YELP_DOCUMENT (info),
+                                             page_id);
+
+    if (real_id) {
+        page_data = g_hash_table_lookup (priv->pages, real_id);
+        g_free (real_id);
     }
 
-    info_document_disconnect (info);
-    priv->state = INFO_STATE_PARSED;
 
-    /* We want to free priv->xmldoc, but we can't free it before transform
-       is finalized.   Otherwise, we could crash when YelpTransform frees
-       its libxslt resources.
-     */
-    g_object_weak_ref ((GObject *) transform,
-                       (GWeakNotify) transform_finalized,
-                       info);
-
-    docuri = yelp_uri_get_document_uri (priv->uri);
-    error = g_error_new (YELP_ERROR, YELP_ERROR_NOT_FOUND,
-                         _("The requested page was not found in the document ‘%s’."),
-                         docuri);
-    g_free (docuri);
-    yelp_document_error_pending ((YelpDocument *) info, error);
-    g_error_free (error);
+    if (!page_data) {
+        gchar *docuri = yelp_uri_get_document_uri (priv->uri);
+        error = g_error_new (YELP_ERROR, YELP_ERROR_NOT_FOUND,
+                             _("The page ‘%s’ was not found in the document ‘%s’."),
+                             page_id, docuri);
+        g_free (docuri);
+        yelp_document_signal ((YelpDocument *) info, page_id,
+                              YELP_DOCUMENT_SIGNAL_ERROR,
+                              error);
+        g_error_free (error);
+        return;
+    }
+
+    transform_page (page_data);
 }
 
+/* This is called with the mutex held */
 static void
-transform_error (YelpTransform    *transform,
-                 YelpInfoDocument *info)
+transform_page (InfoPageData *page_data)
 {
-    YelpInfoDocumentPrivate *priv = GET_PRIV (info);
+    gint  params_i = 0;
+    gchar **params = NULL;
     GError *error;
 
-    g_assert (transform == priv->transform);
-
-    if (priv->state == INFO_STATE_STOP) {
-        info_document_disconnect (info);
+    /*
+      A transform might already be running. If so, leave it be!
+      Note, the other obvious approach would be to call
+      page_data_cancel on the existing transform and start
+      again. Besides the fact this is wasteful, it leaves a problem
+      with when to release xmldoc.
+    */
+    if (page_data->transform) return;
+
+    /*
+      There are two jobs to do. Firstly, we must parse the text in the
+      unprocessed section into xml. Secondly, it must be transformed
+      to xhtml.
+    */
+    UnprocessedSection *us = page_data->unprocessed;
+    g_assert (us);
+
+    page_data->xmldoc = yelp_info_parser_parse_section (us, NULL);
+    if (!page_data->xmldoc) {
+        error = g_error_new (YELP_ERROR, YELP_ERROR_PROCESSING,
+                             _("Cannot parse page '%s'."),
+                             page_data->page_id);
+        yelp_document_error_pending ((YelpDocument *) page_data->info, error);
         return;
     }
 
-    error = yelp_transform_get_error (transform);
-    yelp_document_error_pending ((YelpDocument *) info, error);
-    g_error_free (error);
+    page_data->transform = yelp_transform_new (STYLESHEET);
+    page_data->chunk_ready =
+        g_signal_connect (page_data->transform, "chunk-ready",
+                          (GCallback) transform_chunk_ready,
+                          page_data);
+    page_data->finished =
+        g_signal_connect (page_data->transform, "finished",
+                          (GCallback) transform_finished,
+                          page_data);
+    page_data->error =
+        g_signal_connect (page_data->transform, "error",
+                          (GCallback) transform_error,
+                          page_data);
+
+    params =
+        yelp_settings_get_all_params (yelp_settings_get_default (),
+                                      0, &params_i);
+
+    /* transform_finalized gets called when we destroy transform and
+     * frees the xmldoc.
+     *
+     * We have to use a weak_ref since if we free the memory before
+     * that of transform, we might crash when YelpTransform frees its
+     * libxslt resources.
+     */
+    g_object_weak_ref ((GObject *) page_data->transform,
+                       (GWeakNotify) transform_finalized,
+                       page_data);
 
-    info_document_disconnect (info);
+    yelp_transform_start (page_data->transform,
+                          page_data->xmldoc,
+                          NULL,
+                          (const gchar * const *) params);
+
+    g_strfreev (params);
 }
 
+/******************************************************************************/
+/** InfoPageData **************************************************************/
+
 static void
-transform_finalized (YelpInfoDocument *info,
-                     gpointer          transform)
+info_page_data_cancel (InfoPageData *page_data)
 {
-    YelpInfoDocumentPrivate *priv = GET_PRIV (info);
- 
-    if (priv->xmldoc)
-	xmlFreeDoc (priv->xmldoc);
-    priv->xmldoc = NULL;
-}
+    debug_print (DB_FUNCTION, "entering\n");
 
+    if (!page_data->transform) return;
+
+    if (page_data->chunk_ready) {
+        g_signal_handler_disconnect (page_data->transform, page_data->chunk_ready);
+        page_data->chunk_ready = 0;
+    }
+    if (page_data->finished) {
+        g_signal_handler_disconnect (page_data->transform, page_data->finished);
+        page_data->finished = 0;
+    }
+    if (page_data->error) {
+        g_signal_handler_disconnect (page_data->transform, page_data->error);
+        page_data->error = 0;
+    }
+    yelp_transform_cancel (page_data->transform);
+    g_object_unref (page_data->transform);
+    page_data->transform = NULL;
+}
 
 
 /******************************************************************************/
-/** Threaded ******************************************************************/
+/** YelpTransform *************************************************************/
 
 static void
-info_document_process (YelpInfoDocument *info)
+transform_chunk_ready (YelpTransform    *transform,
+                       gchar            *chunk_id,
+                       InfoPageData     *page_data)
 {
-    YelpInfoDocumentPrivate *priv = GET_PRIV (info);
-    GFile *file = NULL;
-    gchar *filepath = NULL;
-    GError *error;
-    gint  params_i = 0;
-    gchar **params = NULL;
+    YelpInfoDocumentPrivate *priv = GET_PRIV (page_data->info);
+    gchar *content;
 
-    file = yelp_uri_get_file (priv->uri);
-    if (file == NULL) {
-        error = g_error_new (YELP_ERROR, YELP_ERROR_NOT_FOUND,
-                             _("The file does not exist."));
-        yelp_document_error_pending ((YelpDocument *) info, error);
-        g_error_free (error);
-        goto done;
-    }
+    g_assert (page_data && page_data->info &&
+              YELP_IS_INFO_DOCUMENT (page_data->info));
+    g_assert (transform == page_data->transform);
 
-    filepath = g_file_get_path (file);
-    g_object_unref (file);
-    if (!g_file_test (filepath, G_FILE_TEST_IS_REGULAR)) {
-        error = g_error_new (YELP_ERROR, YELP_ERROR_NOT_FOUND,
-                             _("The file ‘%s’ does not exist."),
-                             filepath);
-        yelp_document_error_pending ((YelpDocument *) info, error);
-        g_error_free (error);
-        goto done;
+    if (priv->state == INFO_STATE_STOP) {
+        info_page_data_cancel (page_data);
+        return;
     }
 
-    priv->sections = (GtkTreeModel *) yelp_info_parser_parse_file (filepath);
-    gtk_tree_model_foreach (priv->sections,
-                            (GtkTreeModelForeachFunc) info_sections_visit,
-                            info);
-    priv->xmldoc = yelp_info_parser_parse_tree ((GtkTreeStore *) priv->sections);
+    content = yelp_transform_take_chunk (transform, chunk_id);
 
-    if (priv->xmldoc == NULL) {
-	error = g_error_new (YELP_ERROR, YELP_ERROR_PROCESSING,
-                             _("The file ‘%s’ could not be parsed because it is"
-                               " not a well-formed info page."),
-                             filepath);
-	yelp_document_error_pending ((YelpDocument *) info, error);
-        goto done;
-    }
+    yelp_document_give_contents (YELP_DOCUMENT (page_data->info),
+                                 chunk_id,
+                                 content,
+                                 "application/xhtml+xml");
 
-    g_mutex_lock (priv->mutex);
-    if (priv->state == INFO_STATE_STOP) {
-	g_mutex_unlock (priv->mutex);
-	goto done;
-    }
+    yelp_document_signal (YELP_DOCUMENT (page_data->info),
+                          chunk_id,
+                          YELP_DOCUMENT_SIGNAL_CONTENTS,
+                          NULL);
+}
 
-    priv->transform = yelp_transform_new (STYLESHEET);
-    priv->chunk_ready =
-        g_signal_connect (priv->transform, "chunk-ready",
-                          (GCallback) transform_chunk_ready,
-                          info);
-    priv->finished =
-        g_signal_connect (priv->transform, "finished",
-                          (GCallback) transform_finished,
-                          info);
-    priv->error =
-        g_signal_connect (priv->transform, "error",
-                          (GCallback) transform_error,
-                          info);
+static void
+transform_finished (YelpTransform    *transform,
+                    InfoPageData     *page_data)
+{
+    YelpInfoDocumentPrivate *priv;
 
-    params = yelp_settings_get_all_params (yelp_settings_get_default (), 0, &params_i);
+    debug_print (DB_FUNCTION, "entering\n");
 
-    priv->transform_running = TRUE;
-    yelp_transform_start (priv->transform,
-                          priv->xmldoc,
-                          NULL,
-			  (const gchar * const *) params);
-    g_strfreev (params);
-    g_mutex_unlock (priv->mutex);
+    g_assert (page_data && page_data->info &&
+              YELP_IS_INFO_DOCUMENT (page_data->info));
+    g_assert (transform == page_data->transform);
 
- done:
-    g_free (filepath);
-    priv->process_running = FALSE;
-    g_object_unref (info);
+    priv = GET_PRIV (page_data->info);
+
+    if (priv->state == INFO_STATE_STOP) {
+        info_page_data_cancel (page_data);
+        return;
+    }
+
+    info_page_data_cancel (page_data);
+    transform_finalized (page_data, transform);
 }
 
-static gboolean
-info_sections_visit (GtkTreeModel     *model,
-                     GtkTreePath      *path,
-                     GtkTreeIter      *iter,
-                     YelpInfoDocument *info)
+static void
+transform_error (YelpTransform    *transform,
+                 InfoPageData     *page_data)
 {
-    YelpInfoDocumentPrivate *priv = GET_PRIV (info);
-    gchar *page_id, *title;
-
-    gtk_tree_model_get (model, iter,
-                        INFO_PARSER_COLUMN_PAGE_NO, &page_id,
-                        INFO_PARSER_COLUMN_PAGE_NAME, &title,
-                        -1);
-    yelp_document_set_page_id ((YelpDocument *) info, page_id, page_id);
-    yelp_document_set_page_title ((YelpDocument *) info, page_id, title);
-
-    if (priv->root_id == NULL) {
-        priv->root_id = g_strdup (page_id);
-        yelp_document_set_page_id ((YelpDocument *) info, NULL, page_id);
-    }
-    yelp_document_set_root_id ((YelpDocument *) info, page_id, priv->root_id);
+    YelpInfoDocumentPrivate *priv;
+    GError *error;
+
+    g_assert (page_data != NULL && page_data->info != NULL &&
+              YELP_IS_INFO_DOCUMENT (page_data->info));
+    g_assert (transform == page_data->transform);
 
-    if (priv->visit_prev_id != NULL) {
-        yelp_document_set_prev_id ((YelpDocument *) info, page_id, priv->visit_prev_id);
-        yelp_document_set_next_id ((YelpDocument *) info, priv->visit_prev_id, page_id);
-        g_free (priv->visit_prev_id);
+    priv = GET_PRIV (page_data->info);
+
+    if (priv->state == INFO_STATE_STOP) {
+        info_page_data_cancel (page_data);
+        return;
     }
-    priv->visit_prev_id = page_id;
-    g_free (title);
-    return FALSE;
+
+    error = yelp_transform_get_error (transform);
+    yelp_document_error_pending (YELP_DOCUMENT (page_data->info),
+                                 error);
+    g_error_free (error);
+
+    info_page_data_cancel (page_data);
 }
 
 static void
-info_document_disconnect (YelpInfoDocument *info)
+transform_finalized (InfoPageData *page_data,
+                     gpointer      transform)
 {
-    YelpInfoDocumentPrivate *priv = GET_PRIV (info);
-    if (priv->chunk_ready) {
-        g_signal_handler_disconnect (priv->transform, priv->chunk_ready);
-        priv->chunk_ready = 0;
-    }
-    if (priv->finished) {
-        g_signal_handler_disconnect (priv->transform, priv->finished);
-        priv->finished = 0;
-    }
-    if (priv->error) {
-        g_signal_handler_disconnect (priv->transform, priv->error);
-        priv->error = 0;
-    }
-    yelp_transform_cancel (priv->transform);
-    g_object_unref (priv->transform);
-    priv->transform = NULL;
-    priv->transform_running = FALSE;
+    if (page_data->xmldoc) xmlFreeDoc (page_data->xmldoc);
+    page_data->xmldoc = NULL;
 }
diff --git a/libyelp/yelp-info-parser.c b/libyelp/yelp-info-parser.c
index 5ecdc5a..33bbe44 100644
--- a/libyelp/yelp-info-parser.c
+++ b/libyelp/yelp-info-parser.c
@@ -32,29 +32,89 @@
 #include "yelp-magic-decompressor.h"
 #include "yelp-debug.h"
 
+static void yelp_info_parse_menu    (xmlNodePtr root,
+                                     gchar *page_content,
+                                     gboolean notes);
+static gboolean get_menuoptions     (gchar *line, gchar **title,
+                                     gchar **ref, gchar **desc,
+                                     gchar **xref);
+static void info_process_text_notes (xmlNodePtr node, gchar *content);
 
-GtkTreeIter *         find_real_top                      (GtkTreeModel *model, 
-							  GtkTreeIter *it);
-GtkTreeIter *         find_real_sibling                  (GtkTreeModel *model,
-							  GtkTreeIter *it, 
-							  GtkTreeIter *comp);
-xmlNodePtr            yelp_info_parse_menu               (GtkTreeStore *tree,
-							  xmlNodePtr *node,
-							  gchar *page_content,
-							  gboolean notes);
-gboolean              get_menuoptions                    (gchar *line, 
-							  gchar **title, 
-							  gchar **ref, 
-							  gchar **desc, 
-							  gchar **xref);
-gboolean              resolve_frag_id                    (GtkTreeModel *model, 
-							  GtkTreePath *path, 
-							  GtkTreeIter *iter,
-							  gpointer data);
-void   		      info_process_text_notes            (xmlNodePtr *node, 
-							  gchar *content,
-							  GtkTreeStore
-							  *tree);
+/*
+  This structure is used to represent links. name is the name which
+  should appear in the text (with spaces etc.) and link is the mangled
+  name which is used as an id.
+ */
+typedef struct _XRef XRef;
+struct _XRef {
+  gchar *name, *id;
+};
+
+/*
+  This is created in the first pass through the info file.
+
+  contents is the text content of the section, without any
+  processing. The XRefs are so that we can build next/prev links and
+  the upward linktrail.
+ */
+struct _UnprocessedSection {
+    gchar  *contents;
+    XRef   *node, *up, *prev, *next;
+    GSList *ancestry;
+};
+
+static XRef*
+xref_new ()
+{
+  return g_new0 (XRef, 1);
+}
+
+static void
+xref_free (XRef *xref)
+{
+  g_free (xref->name);
+  g_free (xref->id);
+  g_free (xref);
+}
+
+static UnprocessedSection*
+unprocessed_section_new ()
+{
+  UnprocessedSection *ret = g_new0 (UnprocessedSection, 1);
+  ret->node = xref_new ();
+  ret->up = xref_new ();
+  ret->prev = xref_new ();
+  ret->next = xref_new ();
+  return ret;
+}
+
+void
+unprocessed_section_free (UnprocessedSection *us)
+{
+  if (!us) return;
+
+  g_free (us->contents);
+  xref_free (us->node);
+  xref_free (us->up);
+  xref_free (us->prev);
+  xref_free (us->next);
+  g_slist_free_full (us->ancestry, g_free);
+  g_free (us);
+}
+
+void
+register_section (YelpDocument *doc, const UnprocessedSection *us)
+{
+    g_assert (us || us->node->id);
+
+    yelp_document_set_page_id (doc, us->node->id, us->node->id);
+    if (us->next->id)
+        yelp_document_set_next_id (doc, us->node->id, us->next->id);
+    if (us->prev->id)
+        yelp_document_set_prev_id (doc, us->node->id, us->prev->id);
+    if (us->up->id)
+        yelp_document_set_up_id (doc, us->node->id, us->up->id);
+}
 
 /*
   Used to output the correct <heading level="?" /> tag.
@@ -608,491 +668,214 @@ get_value_after (const char* source, const char *key)
   return get_value_after_ext (source, key, ",", "\n\x7f");
 }
 
-static int
-node2page (GHashTable *nodes2pages, char *node)
-{
-  gint page;
-
-  if (g_hash_table_lookup_extended (nodes2pages, node,
-                                    NULL, (gpointer*) &page))
-    return page;
-
-  /* This shouldn't happen: we should only ever have to look up pages
-   * that exist. */
-  g_return_val_if_reached (0);
-}
+/*
+  Convert a page name into something that can go in a xref. (At the
+  moment, this is just cleaning spaces).
 
-static GtkTreeIter
-*node2iter (GHashTable *nodes2iters, char *node)
+  Operates in place but returns a pointer to the string (so chains
+  with g_strdup are easier).
+ */
+static gchar*
+name2id (gchar* str)
 {
-	GtkTreeIter *iter;
-
-	iter = g_hash_table_lookup (nodes2iters, node);
-	d (if (!iter) debug_print (DB_WARN, "Could not retrieve iter for node !%s!\n", node));
-	return iter;
+  return g_strdelimit (str, " ", '_');
 }
 
-GtkTreeIter 
-*find_real_top (GtkTreeModel *model, GtkTreeIter *it)
+/*
+  process_page is responsible for understanding enough of page_text to
+  make an UnprocessedSection structure out of it and insert it
+  correctly into usections.
+ */
+static void
+process_page (GHashTable *usections, char *page_text)
 {
-  GtkTreeIter *r = NULL;
-  GtkTreeIter *tmp = NULL;
-  
-  if (!it)
-    return NULL;
-
-  r = gtk_tree_iter_copy (it);
-  tmp = g_malloc0 (sizeof (GtkTreeIter));
-  while (gtk_tree_model_iter_parent (model, tmp, r)) {
-    gtk_tree_iter_free (r);
-    r = gtk_tree_iter_copy (tmp);
-  }
-  g_free (tmp);
-
-  return r;
-}
+  char **parts;
+  UnprocessedSection *section = unprocessed_section_new ();
 
-GtkTreeIter * find_real_sibling (GtkTreeModel *model,
-				 GtkTreeIter *it, GtkTreeIter *comp)
-{
-  GtkTreeIter *r;
-  GtkTreeIter *tmp = NULL;
-  gboolean result = FALSE;
-  gchar *title;
-  gchar *reftitle;
+  /* Split out first line from text and find prev/next/up links. */
+  parts = g_strsplit (page_text, "\n", 3);
 
-  if (!it) {
-    return NULL;
+  section->node->name = get_value_after (parts[0], "Node: ");
+  if (!section->node->name) {
+    /* Section doesn't have a name, so nothing can link to it. */
+    goto cleanup;
   }
+  section->node->id = name2id (g_strdup (section->node->name));
 
-  r = gtk_tree_iter_copy (it);
-  tmp = gtk_tree_iter_copy (it);
-
-  reftitle = gtk_tree_model_get_string_from_iter (model, comp);
-
-  result = gtk_tree_model_iter_parent (model, r, it);
-  if (!result)
-    return it;
-
-  title = gtk_tree_model_get_string_from_iter (model, r);
-
-  while (!g_str_equal (title, reftitle) && result) {
-    gtk_tree_iter_free (tmp);
-    tmp = gtk_tree_iter_copy (r);
-    result = gtk_tree_model_iter_parent (model, r, tmp);
-    if (result)
-      title = gtk_tree_model_get_string_from_iter (model, r);
+  /*
+    Don't look for the parent if we're already at Top (you'll get
+    '(dir)', but that doesn't appear in *this* document)
+   */
+  if (strcmp (section->node->id, "Top"))
+    section->up->name = get_value_after (parts[0], "Up: ");
+
+  section->prev->name = get_value_after (parts[0], "Prev: ");
+  section->next->name = get_value_after (parts[0], "Next: ");
+
+  if (section->up->name)
+    section->up->id = name2id (g_strdup (section->up->name));
+  if (section->prev->name)
+    section->prev->id = name2id (g_strdup (section->prev->name));
+  if (section->next->name)
+    section->next->id = name2id (g_strdup (section->next->name));
+
+  if (section->next->id && g_str_equal (section->next->id, "Top")) {
+    g_free (section->next->id);
+    section->next->id = NULL;
   }
 
-  if (!g_str_equal (title, reftitle))
-    {
-      gtk_tree_iter_free (tmp);
-      tmp = NULL;
-    }
-
-  gtk_tree_iter_free (r);
-  g_free (title);
-  g_free (reftitle);
-  return tmp;
-
-}
-
-static void
-process_page (GtkTreeStore *tree,
-              GHashTable *nodes2pages, GHashTable *nodes2iters,
-              int *processed_table, char **page_list, char *page_text)
-{
-	GtkTreeIter *iter;
-	
-	char **parts;
-	char *node;
-	char *up;
-	char *prev;
-	char *next;
-	gchar *tmp;
-
-	int page;
-	
-	/* split out the header line and the text */
-	parts = g_strsplit (page_text, "\n", 3);
-
-	node = get_value_after (parts[0], "Node: ");
-	up = get_value_after (parts[0], "Up: ");
-	prev = get_value_after (parts[0], "Prev: ");
-	next = get_value_after (parts[0], "Next: ");
-
-	if (next && g_str_equal (next, "Top")) {
-	  g_free (next);
-	  next = NULL;
-	}
-	if (g_str_equal (node, "Top") && prev != NULL) {
-	  g_free (prev);
-	  prev = NULL;
-	}
-
-	/* check to see if this page has been processed already */
-	page = node2page (nodes2pages, node);
-	if (processed_table[page]) {
-		return;
-	}
-	processed_table[page] = 1;
-	
-	debug_print (DB_DEBUG, "-- Processing Page %s\n\tParent: %s\n", node, up);
-
-	iter = g_slice_alloc0 (sizeof (GtkTreeIter));
-	/* check to see if we need to process our parent and siblings */
-	if (up && g_ascii_strncasecmp (up, "(dir)", 5) && strcmp (up, "Top"))
-	{
-		page = node2page (nodes2pages, up);
-		if (!processed_table[page])
-		{
-		  debug_print (DB_DEBUG, "%% Processing Node %s\n", up);
-                  process_page (tree, nodes2pages,
-				nodes2iters, processed_table, page_list,
-				page_list[page]);
-		}
-	}
-	if (prev && g_ascii_strncasecmp (prev, "(dir)", 5))
-	  {
-	    if (strncmp (node, "Top", 3)) {
-	      /* Special case the Top node to always appear first */
-	    } else {
-	      page = node2page (nodes2pages, prev);
-	      if (!processed_table[page])
-		{
-		  debug_print (DB_DEBUG, "%% Processing Node %s\n", prev);
-		  process_page (tree, nodes2pages,
-				nodes2iters, processed_table, page_list,
-				page_list[page]);
-		}
-	    }
-	  }
-	
-	/* by this point our parent and older sibling should be processed */
-	if (!up || !g_ascii_strcasecmp (up, "(dir)"))
-	{
-	  debug_print (DB_DEBUG, "\t> no parent\n");
-		if (!prev || !g_ascii_strcasecmp (prev, "(dir)"))
-		{
-		  debug_print (DB_DEBUG, "\t> no previous\n");
-			gtk_tree_store_append (tree, iter, NULL);
-		}
-		else if (prev) {
-		  GtkTreeIter *real;
-		  real = find_real_top (GTK_TREE_MODEL (tree), 
-					node2iter (nodes2iters, prev));
-		  if (real) {
-		    gtk_tree_store_insert_after (tree, iter, NULL,
-						 real);
-		    gtk_tree_iter_free (real);
-		  }
-		  else 
-		    gtk_tree_store_append (tree, iter, NULL);
-		}
-	}
-	else if (!prev || !g_ascii_strcasecmp (prev, "(dir)") || !strcmp (prev, up))
-	{
-	  debug_print (DB_DEBUG, "\t> no previous\n");
-		gtk_tree_store_append (tree, iter,
-			node2iter (nodes2iters, up));
-	}
-	else if (up && prev)
-	{
-	  GtkTreeIter *upit = node2iter (nodes2iters, up);
-	  GtkTreeIter *previt = node2iter (nodes2iters, prev);
-	  GtkTreeIter *nit = NULL;
-	  debug_print (DB_DEBUG, "+++ Parent: %s Previous: %s\n", up, prev);
-	  
-	  d (if (upit) debug_print (DB_DEBUG, "++++ Have parent node!\n"));
-	  d (if (previt) debug_print (DB_DEBUG, "++++ Have previous node!\n"));
-	  nit = find_real_sibling (GTK_TREE_MODEL (tree), previt, upit);
-	  if (nit) {
-	    gtk_tree_store_insert_after (tree, iter,
-					 upit,
-					 nit);
-	    gtk_tree_iter_free (nit);
-	  }
-	  else
-	    gtk_tree_store_append (tree, iter, upit);
-	}
-	else
-	{
-	  debug_print (DB_DEBUG, "# node %s was not put in tree\n", node);
-	  return;
-	}
-
-	d (if (iter) debug_print (DB_DEBUG, "Have a valid iter, storing for %s\n", node));
-
-	g_hash_table_insert (nodes2iters, g_strdup (node), iter);
-	debug_print (DB_DEBUG, "size: %i\n", g_hash_table_size (nodes2iters));
-
-	/*tmp = g_strdup_printf ("%i",
-	  node2page (nodes2pages, node));*/
-	tmp = g_strdup (node);
-	tmp = g_strdelimit (tmp, " ", '_');
-	gtk_tree_store_set (tree, iter,
-			    INFO_PARSER_COLUMN_PAGE_NO, tmp,
-			    INFO_PARSER_COLUMN_PAGE_NAME, node,
-			    INFO_PARSER_COLUMN_PAGE_CONTENT, parts[2],
-			    -1);
+  if (section->prev->id && g_str_equal (section->node->id, "Top")) {
+    g_free (section->prev->id);
+    section->prev->id = NULL;
+  }
 
-	g_free (tmp);
-	g_free (node);
-	g_free (up);
-	g_free (prev);
-	g_free (next);
-	g_strfreev (parts);
-}
+  /* Check to see if this page has been processed already */
+  if (g_hash_table_lookup (usections, section->node->id)) {
+    goto cleanup;
+  }
 
-struct TagTableFix {
-  GHashTable *nodes2pages; /* Build this... */
-  GHashTable *pages2nodes; /* ... using this. */
-};
+  /* Set pointers to NULL so that the free-fest in cleanup doesn't
+     free the memory. */
+  section->contents = parts[2];
+  parts[2] = NULL;
 
-static void
-use_offset2page (gpointer o, gpointer p, gpointer ud)
-{
-  struct TagTableFix* ttf = (struct TagTableFix*)ud;
+  g_hash_table_insert (usections,
+                       g_strdup (section->node->id), section);
+  section = NULL;
 
-  const gchar* node = g_hash_table_lookup (ttf->pages2nodes, p);
-  if (node) {
-    g_hash_table_insert (ttf->nodes2pages, g_strdup (node), p);
-  }
+ cleanup:
+  unprocessed_section_free (section);
+  g_strfreev (parts);
 }
 
 /*
-  We had a nodes2offsets hash table, but sometimes these things
-  lie. How terribly rude. Anyway, use offsets2pages and pages2nodes
-  (and injectivity!) to construct the nodes2pages hash table.
-*/
-static GHashTable *
-make_nodes2pages (GHashTable* offsets2pages,
-                  GHashTable* pages2nodes)
+  Returns a list of XRef*'s for the parents of usection, with the most
+  senior first.
+ */
+static GSList*
+get_parent_list (const UnprocessedSection *usection,
+                 GHashTable *usections)
 {
-  struct TagTableFix ttf;
-
-  ttf.nodes2pages =
-    g_hash_table_new_full (g_str_hash, g_str_equal, g_free, NULL);
-  ttf.pages2nodes = pages2nodes;
+  GSList *ret = NULL;
 
-  g_hash_table_foreach (offsets2pages, use_offset2page, &ttf);
+  while (usection->up->id && usection->up->name) {
+    ret = g_slist_prepend (ret, usection->up);
+    usection = g_hash_table_lookup (usections, usection->up->id);
+    if (!usection)
+      break;
+  }
 
-  return ttf.nodes2pages;
+  return ret;
 }
 
-/**
- * Parse file into a GtkTreeStore containing useful information that we can
- * later convert into a nice XML document or something else.
- */
-GtkTreeStore
-*yelp_info_parser_parse_file (char *file)
+GHashTable*
+yelp_info_parser_read_file (const gchar *file)
 {
-	gchar **page_list;
-	char **ptr;
-	int pages;
-	int offset;
-	GHashTable *offsets2pages = NULL;
-	GHashTable *pages2nodes = NULL;
-        GHashTable *nodes2pages = NULL;
-	GHashTable *nodes2iters = NULL;
-	int *processed_table;
-	GtkTreeStore *tree;
-	int pt;
-	
-	page_list = expanded_info_file (file);
-	if (!page_list)
-          return NULL;
-	
-	pages = 0;
-	offset = 0;
-
-	offsets2pages = g_hash_table_new_full (g_str_hash, g_str_equal, g_free,
-					       NULL);
-	pages2nodes = g_hash_table_new_full (g_direct_hash, g_direct_equal, NULL, 
-					     g_free);
-
-	for (ptr = page_list; *ptr != NULL; ptr++)
-	{
-	  gchar *name = NULL;
-
-          g_hash_table_insert (offsets2pages,
-                               g_strdup_printf ("%i", offset),
-                               GINT_TO_POINTER (pages));
-
-          name = get_value_after (*ptr, "Node: ");
-          if (name)
-            g_hash_table_insert (pages2nodes,
-                                 GINT_TO_POINTER (pages), name);
-		
-          offset += strlen (*ptr);
-          if (pages) offset += 2;
-          pages++;
-
-          pt = page_type (*ptr);
-          if (pt == PAGE_INDIRECT) {
-            g_warning ("Found an indirect page in a file "
-                       "we thought we'd expanded.");
-          }
-	}
-
-        /* Now consolidate (and correct) the two hash tables */
-        nodes2pages = make_nodes2pages (offsets2pages, pages2nodes);
-
-	g_hash_table_destroy (offsets2pages);
-        g_hash_table_destroy (pages2nodes);
+  gchar **page_list;
+  char **ptr;
+  GHashTable *usections;
 
-	processed_table = g_malloc0 (pages * sizeof (int));
-	tree = gtk_tree_store_new (INFO_PARSER_N_COLUMNS, G_TYPE_STRING, G_TYPE_STRING,
-			G_TYPE_STRING);
-	nodes2iters = g_hash_table_new_full (g_str_hash, g_str_equal, g_free,
-					     (GDestroyNotify) gtk_tree_iter_free);
+  page_list = expanded_info_file (file);
+  if (!page_list)
+    return NULL;
 
-	pages = 0;
-	for (ptr = page_list; *ptr != NULL; ptr++)
-	{
-	  if (page_type (*ptr) != PAGE_NODE) continue;
-	  process_page (tree, nodes2pages, nodes2iters,
-			processed_table, page_list, *ptr);
-	}
+  /*
+    Don't provide destroy functions, because we want to grab the
+    contents in yelp-info-document.c and reuse them.
+  */
+  usections = g_hash_table_new (g_str_hash, g_str_equal);
 
-	g_strfreev (page_list);
+  for (ptr = page_list; *ptr != NULL; ptr++) {
+    if (page_type (*ptr) == PAGE_NODE)
+      process_page (usections, *ptr);
+  }
 
-	g_hash_table_destroy (nodes2iters);
-	g_hash_table_destroy (nodes2pages);
+  g_strfreev (page_list);
 
-	g_free (processed_table);
+  /*
+    Now we've read in each page, we can calculate a list of parents
+    for each page.
+  */
+  GHashTableIter iter;
+  const gchar *key;
+  UnprocessedSection *value;
+
+  g_hash_table_iter_init (&iter, usections);
+  while (g_hash_table_iter_next (&iter,
+                                 (const gpointer)&key,
+                                 (gpointer)&value)) {
+    value->ancestry = get_parent_list (value, usections);
+  }
 
-	return tree;
+  return usections;
 }
 
 /* End Part 1 */
 /* Part 2: Parse Tree into XML */
-static void
-parse_tree_level (GtkTreeStore *tree, xmlNodePtr *node, GtkTreeIter iter)
+
+xmlDocPtr
+yelp_info_parser_parse_section (const UnprocessedSection* usection,
+                                GHashTable *usections)
 {
-    GtkTreeIter children, parent;
-	xmlNodePtr newnode;
+  xmlDocPtr doc;
+  xmlNodePtr root, parents, parent;
+  gboolean notes = FALSE;
+  GSList *parent_list;
+  XRef *xref;
+
+  doc = xmlNewDoc (BAD_CAST "1.0");
+  root = xmlNewNode (NULL, BAD_CAST "Info");
+  xmlDocSetRootElement (doc, root);
+
+  if (strstr (usection->contents, "*Note") ||
+      strstr (usection->contents, "*note")) {
+    notes = TRUE;
+  }
 
-	char *page_no = NULL;
-	char *page_name = NULL;
-	char *page_content = NULL;
-	gboolean notes = FALSE;
+  if (strstr (usection->contents, "* Menu:")) {
+    yelp_info_parse_menu (root, usection->contents, notes);
+  }
+  else {
+    if (!notes) {
+      info_body_text (root, NULL, NULL, FALSE, usection->contents);
+    }
+    else {
+      info_process_text_notes (root, usection->contents);
+    }
+  }
 
-	debug_print (DB_DEBUG, "Decended\n");
-	do
-	{
-		gtk_tree_model_get (GTK_TREE_MODEL (tree), &iter,
-				INFO_PARSER_COLUMN_PAGE_NO, &page_no,
-				INFO_PARSER_COLUMN_PAGE_NAME, &page_name,
-				INFO_PARSER_COLUMN_PAGE_CONTENT, &page_content,
-				-1);
-		debug_print (DB_DEBUG, "Got Section: %s\n", page_name);
-		if (strstr (page_content, "*Note") || 
-		    strstr (page_content, "*note")) {
-		  notes = TRUE;
-		}
-		if (strstr (page_content, "* Menu:")) {
-		  newnode = yelp_info_parse_menu (tree, node, page_content, notes);
-		} else {
-		  newnode = xmlNewTextChild (*node, NULL,
-					     BAD_CAST "Section",
-					     NULL);
-		  if (!notes)
-		    info_body_text (newnode, NULL, NULL, FALSE, page_content);
-
-		  else {
-		    /* Handle notes here */
-		    info_process_text_notes (&newnode, page_content, tree);
-		  }
-		}
-		/* if we free the page content, now it's in the XML, we can
-		 * save some memory */
-		g_free (page_content);
-		page_content = NULL;
-
-                if (gtk_tree_model_iter_parent (GTK_TREE_MODEL (tree), &parent, &iter)) {
-                    gchar *parent_id;
-                    gtk_tree_model_get (GTK_TREE_MODEL (tree), &parent,
-                                        INFO_PARSER_COLUMN_PAGE_NO, &parent_id,
-                                        -1);
-                    xmlNewProp (newnode, BAD_CAST "up", BAD_CAST parent_id);
-                    g_free (parent_id);
-                }
-
-		xmlNewProp (newnode, BAD_CAST "id", 
-			    BAD_CAST page_no);
-		xmlNewProp (newnode, BAD_CAST "name", 
-			    BAD_CAST page_name);
-		if (gtk_tree_model_iter_children (GTK_TREE_MODEL (tree),
-				&children,
-				&iter))
-		  parse_tree_level (tree, &newnode, children);
-		g_free (page_no);
-		g_free (page_name);
-	}
-	while (gtk_tree_model_iter_next (GTK_TREE_MODEL (tree), &iter));
-	debug_print (DB_DEBUG, "Ascending\n");
-}
+  if (usection->next->id && usection->next->name) {
+    xmlNewProp (root, BAD_CAST "next", BAD_CAST usection->next->id);
+    xmlNewProp (root, BAD_CAST "next-name", BAD_CAST usection->next->name);
+  }
+  if (usection->prev->id && usection->prev->name) {
+    xmlNewProp (root, BAD_CAST "prev", BAD_CAST usection->prev->id);
+    xmlNewProp (root, BAD_CAST "prev-name", BAD_CAST usection->prev->name);
+  }
 
-xmlDocPtr
-yelp_info_parser_parse_tree (GtkTreeStore *tree)
-{
-	xmlDocPtr doc;
-	xmlNodePtr node;
-	GtkTreeIter iter;
-
-	/*
-	xmlChar *xmlbuf;
-	int bufsiz;
-	*/
-
-	doc = xmlNewDoc (BAD_CAST "1.0");
-	node = xmlNewNode (NULL, BAD_CAST "Info");
-	xmlDocSetRootElement (doc, node);
-
-	/* functions I will want:
-	gtk_tree_model_get_iter_first;
-	gtk_tree_model_iter_next;
-	gtk_tree_model_iter_children;
-	*/
-
-	if (gtk_tree_model_get_iter_first (GTK_TREE_MODEL (tree), &iter))
-		parse_tree_level (tree, &node, iter);
-	d (else debug_print (DB_DEBUG, "Empty tree?\n"));
-
-	/*
-	xmlDocDumpFormatMemory (doc, &xmlbuf, &bufsiz, 1);
-	g_print ("XML follows:\n%s\n", xmlbuf);
-	*/
-
-	return doc;
-}
+  xmlNewProp (root, BAD_CAST "id", BAD_CAST usection->node->id);
+  xmlNewProp (root, BAD_CAST "name", BAD_CAST usection->node->name);
 
-gboolean
-resolve_frag_id (GtkTreeModel *model, GtkTreePath *path, GtkTreeIter *iter,
-		 gpointer data)
-{
-  gchar *page_no = NULL;
-  gchar *page_name = NULL;
-  gchar **xref = data;
-
-  gtk_tree_model_get (GTK_TREE_MODEL (model), iter,
-		      INFO_PARSER_COLUMN_PAGE_NO, &page_no,
-		      INFO_PARSER_COLUMN_PAGE_NAME, &page_name,
-		      -1);
-  if (g_str_equal (page_name, *xref)) {
-    g_free (*xref);
-    *xref = g_strdup (page_name);
-    *xref = g_strdelimit (*xref, " ", '_');
-
-    g_free (page_name);
-    g_free (page_no);
-    return TRUE;
+  /*
+    Add
+      <parents><parent>id1</parent><parent>id2</parent></parents>
+    to allow us to make a linktrail in the xslt.
+   */
+
+  parent_list = usection->ancestry;
+
+  if (parent_list) {
+    parents = xmlNewChild (root, NULL, BAD_CAST "parents", NULL);
+    while (parent_list) {
+      xref = (XRef *)parent_list->data;
+      parent = xmlNewChild (parents, NULL, BAD_CAST "parent", NULL);
+      xmlNewProp (parent, BAD_CAST "id", BAD_CAST xref->id);
+      xmlNewProp (parent, BAD_CAST "name", BAD_CAST xref->name);
+      parent_list = g_slist_next (parent_list);
+    }
   }
-  g_free (page_name);
-  g_free (page_no);
 
-  return FALSE;
+  return doc;
 }
 
 gboolean
@@ -1155,26 +938,22 @@ first_non_space (gchar* str)
   return str;
 }
 
-xmlNodePtr
-yelp_info_parse_menu (GtkTreeStore *tree, xmlNodePtr *node, 
+static void
+yelp_info_parse_menu (xmlNodePtr root,
 		      gchar *page_content, gboolean notes)
 {
   gchar **split;
   gchar **menuitems;
   gchar *tmp = NULL;
-  xmlNodePtr newnode, menu_node, mholder = NULL;
+  xmlNodePtr menu_node, mholder = NULL;
   int i=0;
 
   split = g_strsplit (page_content, "* Menu:", 2);
   
-  newnode = xmlNewChild (*node, NULL,
-			 BAD_CAST "Section", NULL);
-    
-
   if (!notes)
-    info_body_text (newnode, NULL, NULL, FALSE, split[0]);
+    info_body_text (root, NULL, NULL, FALSE, split[0]);
   else {
-    info_process_text_notes (&newnode, split[0], tree);
+    info_process_text_notes (root, split[0]);
   }
 
   menuitems = g_strsplit (split[1], "\n", -1);
@@ -1205,7 +984,7 @@ yelp_info_parse_menu (GtkTreeStore *tree, xmlNodePtr *node,
 
   if (menuitems[0] != NULL) {
     /* If there are any menu items, make the <menu> node */
-    menu_node = xmlNewChild (newnode, NULL, BAD_CAST "menu", NULL);
+    menu_node = xmlNewChild (root, NULL, BAD_CAST "menu", NULL);
   }
 
   while (menuitems[i] != NULL) {
@@ -1227,8 +1006,8 @@ yelp_info_parse_menu (GtkTreeStore *tree, xmlNodePtr *node,
 
     if (menu) {
       mholder = xmlNewChild (menu_node, NULL, BAD_CAST "menuholder", NULL);
-      gtk_tree_model_foreach (GTK_TREE_MODEL (tree), resolve_frag_id, &xref);
-      
+      name2id (xref);
+
       if (ref == NULL) { /* A standard type menu */
         /* title+2 skips the "* ". We know we haven't jumped over the
            end of the string because strlen (title) >= 3 */
@@ -1311,12 +1090,10 @@ yelp_info_parse_menu (GtkTreeStore *tree, xmlNodePtr *node,
 
   }
   g_strfreev (menuitems);
-  
-  return newnode;
 }
 
 void
-info_process_text_notes (xmlNodePtr *node, gchar *content, GtkTreeStore *tree)
+info_process_text_notes (xmlNodePtr node, gchar *content)
 {
   gchar **notes;
   gchar **current;
@@ -1336,7 +1113,7 @@ info_process_text_notes (xmlNodePtr *node, gchar *content, GtkTreeStore *tree)
   notes = g_regex_split_simple ("\\*[Nn]ote(?!_)", content, 0, 0);
 
   for (current = notes; *current != NULL; current++) {
-    gchar *url, **urls, **ulink;
+    gchar *url, **urls;
     gchar *append;
     gchar *alt_append, *alt_append1;
     gchar *link_text;
@@ -1348,14 +1125,14 @@ info_process_text_notes (xmlNodePtr *node, gchar *content, GtkTreeStore *tree)
        * start, so we can just add it and forget about it.
        */
       first = FALSE;
-      info_body_text (*node, &paragraph, NULL, TRUE, (*current));
+      info_body_text (node, &paragraph, NULL, TRUE, (*current));
       continue;
     }
 
     /* If we got to here, we now gotta parse the note reference */
     append = strchr (*current, ':');
     if (!append) {
-      info_body_text (*node, &paragraph, NULL, TRUE, *current);
+      info_body_text (node, &paragraph, NULL, TRUE, *current);
       continue;
     }
     append++;
@@ -1370,7 +1147,7 @@ info_process_text_notes (xmlNodePtr *node, gchar *content, GtkTreeStore *tree)
     }
     alt_append1 = strchr (alt_append1, ',');
     if (!append && !alt_append && !alt_append1) {
-      info_body_text (*node, &paragraph, NULL, TRUE, *current);
+      info_body_text (node, &paragraph, NULL, TRUE, *current);
       continue;
     }
     if (!append || alt_append || alt_append1) {
@@ -1416,12 +1193,13 @@ info_process_text_notes (xmlNodePtr *node, gchar *content, GtkTreeStore *tree)
         /* Massive space.  Fix. */
         gchar *next = break_point;
         gchar *url_copy;
+        gchar *old = url;
         while (*next == ' ')
           next++;
         next--;
         url_copy = g_strndup (url, break_point-url);
-        g_free (url);
         url = g_strconcat (url_copy, next, NULL);
+        g_free (old);
         break_point = strchr (url, ' ');
         g_free (url_copy);
       } else {
@@ -1477,14 +1255,14 @@ info_process_text_notes (xmlNodePtr *node, gchar *content, GtkTreeStore *tree)
       else
         frag = g_strndup (url, tmp1 - url);
       g_strstrip (frag);
-      gtk_tree_model_foreach (GTK_TREE_MODEL (tree), resolve_frag_id, &frag);
+      name2id (frag);
       href = g_strconcat ("xref:", frag, NULL);
       g_free (frag);
     }
 
     /* Check we've got a valid paragraph node */
     if (!paragraph) {
-      paragraph = xmlNewChild (*node, NULL, BAD_CAST "para", NULL);
+      paragraph = xmlNewChild (node, NULL, BAD_CAST "para", NULL);
     }
 
     /*
@@ -1502,7 +1280,7 @@ info_process_text_notes (xmlNodePtr *node, gchar *content, GtkTreeStore *tree)
     g_strfreev (urls);
 
     /* Finally, we can add the following text as required */
-    info_body_text (*node, &paragraph, NULL, TRUE, append);
+    info_body_text (node, &paragraph, NULL, TRUE, append);
 
     g_free (url);
     g_free (href);
diff --git a/libyelp/yelp-info-parser.h b/libyelp/yelp-info-parser.h
index d338a59..e659edc 100644
--- a/libyelp/yelp-info-parser.h
+++ b/libyelp/yelp-info-parser.h
@@ -26,6 +26,7 @@
 #include <glib.h>
 #include <gtk/gtk.h>
 #include <libxml/tree.h>
+#include "yelp-document.h"
 
 enum {
     INFO_PARSER_COLUMN_PAGE_NO,
@@ -34,8 +35,37 @@ enum {
     INFO_PARSER_N_COLUMNS
 };
 
+/*
+  A structure for information about a section which has been read into
+  memory but not yet parsed to xml.
+ */
+typedef struct _UnprocessedSection UnprocessedSection;
+
+/*
+  Free the memory in the opaque UnprocessedSection structure.
+ */
+void unprocessed_section_free (UnprocessedSection *us);
+
+/*
+  Register a section with the document: calls
+  yelp_document_set_page_id and also registers up/next/prev links.
+ */
+void register_section (YelpDocument *doc,
+                       const UnprocessedSection *us);
+
+/*
+  Read the given file into memory. Stores each (unparsed) section
+  keyed by page_id into a hash table. Returns NULL on failure.
 
-GtkTreeStore          *yelp_info_parser_parse_file  (char           *file);
-xmlDocPtr	       yelp_info_parser_parse_tree  (GtkTreeStore   *tree);
+  The page_id and sections will not be freed on destroying the hash
+  table, so the caller must use g_free and unprocessed_section_free.
+ */
+GHashTable*    yelp_info_parser_read_file  (const gchar  *file);
+
+/*
+  Parse a particular section to xml.
+ */
+xmlDocPtr      yelp_info_parser_parse_section (const UnprocessedSection* usection,
+                                               GHashTable *usections);
 
 #endif /* __YELP_INFO_PARSER_H__ */
diff --git a/stylesheets/info2html.xsl.in b/stylesheets/info2html.xsl.in
index a97b054..7f45cb5 100644
--- a/stylesheets/info2html.xsl.in
+++ b/stylesheets/info2html.xsl.in
@@ -12,37 +12,11 @@
 <xsl:import href="@XSL_HTML@"/>
 <xsl:include href="yelp-common.xsl"/>
 
-<xsl:template name="linktrails">
-  <xsl:param name="up" select="@up"/>
-  <xsl:variable name="upnode" select="/Info//Section[@id = $up]"/>
-  <xsl:if test="$upnode/@up">
-    <xsl:call-template name="linktrails">
-      <xsl:with-param name="up" select="$upnode/@up"/>
-    </xsl:call-template>
-  </xsl:if>
-  <a href="xref:{$upnode/@id}">
-    <xsl:value-of select="$upnode/@name"/>
-  </a>
-  <xsl:text>&#x00A0;» </xsl:text>
-</xsl:template>
-
-<xsl:template match="/">
-  <xsl:for-each select="/Info/Section">
-    <xsl:call-template name="html.output"/>
-  </xsl:for-each>
-</xsl:template>
-
-<xsl:template mode="html.output.after.mode" match="Section">
-  <xsl:for-each select="Section">
-    <xsl:call-template name="html.output"/>
-  </xsl:for-each>
-</xsl:template>
-
-<xsl:template mode="html.title.mode" match="Section">
+<xsl:template mode="html.title.mode" match="Info">
   <xsl:value-of select="@name"/>
 </xsl:template>
 
-<xsl:template mode="html.css.mode" match="Section">
+<xsl:template mode="html.css.mode" match="Info">
   <xsl:param name="direction"/>
   <xsl:param name="left"/>
   <xsl:param name="right"/>
@@ -72,35 +46,41 @@ a.navbar-next::after {
 </xsl:text>
 </xsl:template>
 
-<xsl:template mode="html.header.mode" match="Section">
-  <xsl:if test="@up">
+<xsl:template mode="html.header.mode" match="Info">
+  <xsl:if test="parents">
     <div class="trails">
       <div class="trail">
-        <xsl:call-template name="linktrails"/>
+        <xsl:for-each select="parents//parent">
+          <a href="xref:{@id}">
+            <xsl:value-of select="@name"/>
+          </a>
+          <xsl:text>&#x00A0;» </xsl:text>
+        </xsl:for-each>
       </div>
     </div>
   </xsl:if>
 </xsl:template>
 
-<xsl:template mode="html.body.mode" match="Section">
+<xsl:template mode="html.body.mode" match="Info">
+  <xsl:variable name="p" select="@prev-name"/>
+  <xsl:variable name="n" select="@next-name"/>
   <div class="navbar">
-    <xsl:variable name="preceding" select="(parent::Section[1] | preceding::Section[1])[last()]"/>
-    <xsl:variable name="following" select="(Section[1] | following::Section[1])[1]"/>
-    <xsl:if test="$preceding">
-      <a class="navbar-prev" href="xref:{$preceding/@id}">
-        <xsl:value-of select="$preceding/@name"/>
+    <xsl:if test="$p">
+      <a class="navbar-prev" href="xref:{@prev}">
+        <xsl:value-of select="@prev-name"/>
       </a>
     </xsl:if>
-    <xsl:if test="$preceding and $following">
+    <xsl:if test="$p and $n">
       <xsl:text>&#x00A0;&#x00A0;|&#x00A0;&#x00A0;</xsl:text>
     </xsl:if>
-    <xsl:if test="$following">
-      <a class="navbar-next" href="xref:{$following/@id}">
-        <xsl:value-of select="$following/@name"/>
+    <xsl:if test="$n">
+      <a class="navbar-next" href="xref:{@next}">
+        <xsl:value-of select="@next-name"/>
       </a>
     </xsl:if>
   </div>
-  <xsl:apply-templates select="node()[not(self::Section)]"/>
+
+  <xsl:apply-templates select="node()[not(self::Info)]"/>
 </xsl:template>
 
 
-- 
1.7.4.4



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]