Hi,
After disappearing for ages (sorry!), I've hacked together some patches
that seem to improve info file handling here.
I can open a bug etc. if it'd be better but maybe this is a good place
for someone to have a look at them?
Patches attached.
What they do:
- Use blank lines to detect paragraphs and output rather more semantic
html.
- Parse the menus more helpfully and display them as a <ul>
- Correctly format multi-line links (on my system at least, there's
one at info:info so it's quite noticeable)
I'd love to hear if I've introduced any regressions. There are still
some problems (from before!). Most notably, there seems to be a problem
with spaces in links or maybe with links between different info files
(info:info, then go to expert then click on a texinfo link). I'll try to
work out what's going on soon.
Rupert
From 7059753590f6f6371b3b04880e82eeb6edb57ce0 Mon Sep 17 00:00:00 2001
From: Rupert Swarbrick <rswarbrick gmail com>
Date: Wed, 16 Jun 2010 10:32:20 +0100
Subject: [PATCH 1/4] Support for headings in info files.
---
libyelp/yelp-info-parser.c | 173 +++++++++++++++++++++++++++++++++++++-----
stylesheets/info2html.xsl.in | 17 ++++
2 files changed, 170 insertions(+), 20 deletions(-)
diff --git a/libyelp/yelp-info-parser.c b/libyelp/yelp-info-parser.c
index 3310794..7d32905 100644
--- a/libyelp/yelp-info-parser.c
+++ b/libyelp/yelp-info-parser.c
@@ -1,4 +1,4 @@
-/* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil -*- */
/*
* Copyright (C) 2005 Davyd Madeley <davyd madeley id au>
*
@@ -58,8 +58,13 @@ void fix_tag_table (gchar *offset,
TagTableFix *a);
void info_process_text_notes (xmlNodePtr *node,
gchar *content,
- GtkTreeStore *tree);
+ GtkTreeStore
+ *tree);
+/*
+ Used to output the correct <heading level="?" /> tag.
+ */
+static const gchar* level_headings[] = { NULL, "1", "2", "3" };
static GHashTable *
info_image_get_attributes (gchar const* string)
@@ -141,15 +146,144 @@ info_insert_image (xmlNodePtr parent, GMatchInfo *match_info)
}
/*
- Convert body text CONTENT to xml nodes, processing info image tags
- when found. IWBN add a regex match for *Note: here and call the
- *Note ==> <a href> logic of info_process_text_notes from here.
+ If every element of `str' is `ch' then return TRUE, else FALSE.
*/
-static xmlNodePtr
-info_body_text (xmlNodePtr parent, xmlNsPtr ns, gchar const *name, gchar const *content)
+static gboolean
+string_all_char_p (const gchar* str, gchar ch)
+{
+ for (; *str; str++) {
+ if (*str != ch) return FALSE;
+ }
+ return TRUE;
+}
+
+/*
+ If `line' is a line of '*', '=' or '-', return 1,2,3 respectively
+ for the heading level. If it's anything else, return 0.
+ */
+static int
+header_underline_level (const gchar* line)
+{
+ if (*line != '*' && *line != '=' && *line != '-')
+ return 0;
+
+ if (string_all_char_p (line, '*')) return 1;
+ if (string_all_char_p (line, '=')) return 2;
+ if (string_all_char_p (line, '-')) return 3;
+
+ return 0;
+}
+
+/*
+ Use g_strjoinv to join up the strings from `strings', but they might
+ not actually be a null-terminated array. `end' should be strings+n,
+ where I want the first n strings (strings+0, ..., strings+(n-1)). It
+ shouldn't point outside of the array allocated, but it can point at
+ the null string at the end.
+ */
+static gchar*
+join_strings_subset (const gchar *separator,
+ gchar** strings, gchar** end)
+{
+ g_assert(end > strings);
+
+ gchar *ptr = *end;
+ *end = NULL;
+
+ gchar *glob = g_strjoinv (separator, strings);
+ *end = ptr;
+ return glob;
+}
+
+/*
+ Create a text node, child of `parent', with the lines strictly
+ between `first' and `last'.
+*/
+static void
+lines_subset_text_child (xmlNodePtr parent, xmlNsPtr ns,
+ gboolean inline_p,
+ gchar** first, gchar** last)
{
- if (!strstr (content, INFO_C_IMAGE_TAG_OPEN))
- return xmlNewTextChild (parent, ns, BAD_CAST name, BAD_CAST content);
+ /* TODO? Currently we're copying the split strings again, which is
+ less efficient than somehow storing lengths and using a sort of
+ window on `content'. But that's much more difficult, so unless
+ there's a problem, let's go with the stupid approach. */
+ gchar *glob;
+ if (last > first) {
+ glob = join_strings_subset ("\n", first, last);
+ xmlNewTextChild (parent, ns,
+ inline_p ? BAD_CAST "para1" : BAD_CAST "para",
+ BAD_CAST glob);
+ g_free (glob);
+ }
+}
+
+/*
+ Convert body text CONTENT to xml nodes. This function is responsible
+ for spotting headings etc and splitting them out correctly.
+
+ If `inline_p' is true, end with a <para1> tag. Otherwise, end with a
+ <para> tag.
+
+ TODO: IWBN add a regex match for *Note: here and call the *Note ==>
+ <a href> logic of info_process_text_notes from here.
+ */
+static void
+info_body_parse_text (xmlNodePtr parent, xmlNsPtr ns,
+ gboolean inline_p, const gchar *content)
+{
+ /* The easiest things to spot are headings: they look like a line of
+ * '*','=' or '-', corresponding to heading levels 1,2 or 3. To spot
+ * them, we split content into single lines and work with them. */
+ gchar **lines = g_strsplit (content, "\n", 0);
+ gchar **first = lines, **last = lines+1;
+ int header_level;
+ xmlNodePtr header_node;
+
+ /* Deal with the possibility that `content' is empty */
+ if (*lines == NULL) {
+ if (!inline_p) {
+ xmlNewTextChild (parent, NULL, BAD_CAST "para", BAD_CAST "");
+ }
+ return;
+ }
+
+ for (; *last; last++) {
+ header_level = header_underline_level (*last);
+ if (header_level) {
+ /* Write out any lines beforehand */
+ lines_subset_text_child (parent, ns, FALSE, first, last-1);
+ /* Now write out the actual header line */
+ header_node = xmlNewTextChild (parent, ns, BAD_CAST "header",
+ BAD_CAST *(last-1));
+ xmlNewProp (header_node, BAD_CAST "level",
+ BAD_CAST level_headings[header_level]);
+
+ first = last+1;
+ last = first+1;
+ }
+ }
+ /* Write out any lines left */
+ lines_subset_text_child (parent, ns, inline_p, first, last);
+
+ g_strfreev (lines);
+}
+
+/*
+ info_body_text is responsible for taking a hunk of the info page's
+ body and turning it into paragraph tags. It searches out images and
+ marks them up properly if necessary.
+
+ It uses info_body_parse_text to mark up the actual bits of text.
+ */
+static void
+info_body_text (xmlNodePtr parent, xmlNsPtr ns,
+ gboolean inline_p, gchar const *content)
+{
+ if (!strstr (content, INFO_C_IMAGE_TAG_OPEN)) {
+ info_body_parse_text (parent, ns, inline_p, content);
+ return;
+ }
gint content_len = strlen (content);
gint pos = 0;
@@ -164,16 +298,15 @@ info_body_text (xmlNodePtr parent, xmlNsPtr ns, gchar const *name, gchar const *
&image_start, &image_end);
gchar *before = g_strndup (&content[pos], image_start - pos);
pos = image_end + 1;
- xmlNewTextChild (parent, NULL, BAD_CAST "para1", BAD_CAST (before));
+ info_body_parse_text (parent, NULL, TRUE, before);
g_free (before);
if (image_found)
info_insert_image (parent, match_info);
g_match_info_next (match_info, NULL);
}
gchar *after = g_strndup (&content[pos], content_len - pos);
- xmlNewTextChild (parent, NULL, BAD_CAST "para1", BAD_CAST (after));
+ info_body_parse_text (parent, NULL, TRUE, after);
g_free (after);
- return 0;
}
/* Part 1: Parse File Into Tree Store */
@@ -840,7 +973,7 @@ parse_tree_level (GtkTreeStore *tree, xmlNodePtr *node, GtkTreeIter iter)
BAD_CAST "Section",
NULL);
if (!notes)
- info_body_text (newnode, NULL, "para", page_content);
+ info_body_text (newnode, NULL, FALSE, page_content);
else {
/* Handle notes here */
@@ -1005,7 +1138,7 @@ yelp_info_parse_menu (GtkTreeStore *tree, xmlNodePtr *node,
tmp = g_strconcat (split[0], "\n* Menu:", NULL);
if (!notes)
- info_body_text (newnode, NULL, "para", tmp);
+ info_body_text (newnode, NULL, FALSE, tmp);
else {
info_process_text_notes (&newnode, tmp, tree);
}
@@ -1119,7 +1252,7 @@ info_process_text_notes (xmlNodePtr *node, gchar *content, GtkTreeStore *tree)
* start, so we can just add it and forget about it.
*/
first = FALSE;
- info_body_text (holder, NULL, "para1", (*current_real));
+ info_body_text (holder, NULL, TRUE, (*current_real));
continue;
}
/* If we got to here, we now gotta parse the note reference */
@@ -1128,13 +1261,13 @@ info_process_text_notes (xmlNodePtr *node, gchar *content, GtkTreeStore *tree)
/* Special type of note that isn't really a note, but pretends
* it is
*/
- info_body_text (holder, NULL, "para1",
+ info_body_text (holder, NULL, TRUE,
g_strconcat ("*Note", *current_real, NULL));
continue;
}
append = strchr (*current_real, ':');
if (!append) {
- info_body_text (holder, NULL, "para1", *current_real);
+ info_body_text (holder, NULL, TRUE, *current_real);
continue;
}
append++;
@@ -1149,7 +1282,7 @@ info_process_text_notes (xmlNodePtr *node, gchar *content, GtkTreeStore *tree)
}
alt_append1 = strchr (alt_append1, ',');
if (!append && !alt_append && !alt_append1) {
- info_body_text (holder, NULL, "para1", *current_real);
+ info_body_text (holder, NULL, TRUE, *current_real);
continue;
}
if (!append || alt_append || alt_append1) {
@@ -1285,14 +1418,14 @@ info_process_text_notes (xmlNodePtr *node, gchar *content, GtkTreeStore *tree)
ref1 = xmlNewTextChild (holder, NULL, BAD_CAST "a",
BAD_CAST link_text);
if (*(ulink+1) != NULL)
- info_body_text (holder, NULL, "para", "");
+ info_body_text (holder, NULL, FALSE, "");
g_free (link_text);
xmlNewProp (ref1, BAD_CAST "href", BAD_CAST href);
}
g_strfreev (urls);
/* Finally, we can add the text as required */
- info_body_text (holder, NULL, "para1", append);
+ info_body_text (holder, NULL, TRUE, append);
g_free (url);
g_free (href);
}
diff --git a/stylesheets/info2html.xsl.in b/stylesheets/info2html.xsl.in
index ec75878..c029148 100644
--- a/stylesheets/info2html.xsl.in
+++ b/stylesheets/info2html.xsl.in
@@ -115,6 +115,23 @@ a.navbar-next::after {
<xsl:value-of select="node()"/>
</xsl:template>
+<xsl:template match="header">
+ <xsl:choose>
+ <xsl:when test='@level = 1'>
+ <h1><xsl:value-of select="node()"/></h1>
+ </xsl:when>
+ <xsl:when test='@level = 2'>
+ <h2><xsl:value-of select="node()"/></h2>
+ </xsl:when>
+ <xsl:when test='@level = 3'>
+ <h3><xsl:value-of select="node()"/></h3>
+ </xsl:when>
+ <xsl:otherwise>
+ <h1>(Unknown heading level) <xsl:value-of select="node()"/></h1>
+ </xsl:otherwise>
+ </xsl:choose>
+</xsl:template>
+
<xsl:template match="spacing">
<xsl:value-of select="node()"/>
</xsl:template>
--
1.7.1
From d1369b91a2bbde04a94911123c4d583087b0b692 Mon Sep 17 00:00:00 2001
From: Rupert Swarbrick <rswarbrick gmail com>
Date: Fri, 3 Sep 2010 00:09:31 +0100
Subject: [PATCH 2/4] Display menus as <ul>'s, rather than the original text.
---
libyelp/yelp-info-parser.c | 87 +++++++++++++++++++++++++++++++++--------
stylesheets/info2html.xsl.in | 24 +++++++++--
2 files changed, 89 insertions(+), 22 deletions(-)
diff --git a/libyelp/yelp-info-parser.c b/libyelp/yelp-info-parser.c
index 7d32905..1605ecf 100644
--- a/libyelp/yelp-info-parser.c
+++ b/libyelp/yelp-info-parser.c
@@ -248,7 +248,11 @@ info_body_parse_text (xmlNodePtr parent, xmlNsPtr ns,
return;
}
+ /* Use a pair of pointers, first and last, which point to two lines,
+ * the chunk of the body we're displaying (inclusive) */
for (; *last; last++) {
+
+ /* Check for a header */
header_level = header_underline_level (*last);
if (header_level) {
/* Write out any lines beforehand */
@@ -1120,6 +1124,16 @@ get_menuoptions (gchar *line, gchar **title, gchar **ref, gchar **desc,
return TRUE;
}
+/* Find the first non-space character in str or return pointer to the
+ * '\0' if there isn't one. */
+static gchar*
+first_non_space (gchar* str)
+{
+ /* As long as str is null terminated, this is ok! */
+ while (*str == ' ') str++;
+ return str;
+}
+
xmlNodePtr
yelp_info_parse_menu (GtkTreeStore *tree, xmlNodePtr *node,
gchar *page_content, gboolean notes)
@@ -1127,7 +1141,7 @@ yelp_info_parse_menu (GtkTreeStore *tree, xmlNodePtr *node,
gchar **split;
gchar **menuitems;
gchar *tmp = NULL;
- xmlNodePtr newnode;
+ xmlNodePtr newnode, menu_node, mholder = NULL;
int i=0;
split = g_strsplit (page_content, "* Menu:", 2);
@@ -1136,37 +1150,69 @@ yelp_info_parse_menu (GtkTreeStore *tree, xmlNodePtr *node,
BAD_CAST "Section", NULL);
- tmp = g_strconcat (split[0], "\n* Menu:", NULL);
if (!notes)
- info_body_text (newnode, NULL, FALSE, tmp);
+ info_body_text (newnode, NULL, FALSE, split[0]);
else {
- info_process_text_notes (&newnode, tmp, tree);
+ info_process_text_notes (&newnode, split[0], tree);
}
- g_free (tmp);
menuitems = g_strsplit (split[1], "\n", -1);
g_strfreev (split);
+ /* The output xml should look something like the following:
+
+ <menu>
+ <menuholder>
+ <a href="xref:Help-Inv">Help-Inv</a>
+ <para1>Invisible text in Emacs Info.</para1>
+ </menuholder>
+ <menuholder>
+ <a href="xref:Help-M">Help-M</a>
+ <para1>Menus.</para1>
+ </menuholder>
+ ...
+ </menu>
+
+ (from the top page of info:info). Note the absence of *'s and
+ ::'s on the links.
+
+ If there's a line with no "* Blah::", it looks like a child of
+ the previous menu item so (for i > 0) deal with that correctly by
+ not "closing" the <menuholder> tag until we find the next
+ start.
+ */
+
+ if (menuitems[0] != NULL) {
+ /* If there are any menu items, make the <menu> node */
+ menu_node = xmlNewChild (newnode, NULL, BAD_CAST "menu", NULL);
+ }
+
while (menuitems[i] != NULL) {
gboolean menu = FALSE;
gchar *title = NULL;
gchar *ref = NULL;
gchar *desc = NULL;
gchar *xref = NULL;
- xmlNodePtr mholder;
xmlNodePtr ref1;
menu = get_menuoptions (menuitems[i], &title, &ref, &desc, &xref);
-
+
+ if (menu && (*title == '\0' || *(title + 1) == '\0')) {
+ g_warning ("Info title unexpectedly short for menu item (%s)",
+ menuitems[i]);
+ menu = FALSE;
+ }
+
if (menu) {
- mholder = xmlNewChild (newnode, NULL, BAD_CAST "menuholder", NULL);
+ mholder = xmlNewChild (menu_node, NULL, BAD_CAST "menuholder", NULL);
gtk_tree_model_foreach (GTK_TREE_MODEL (tree), resolve_frag_id, &xref);
if (ref == NULL) { /* A standard type menu */
- tmp = g_strconcat (title, "::", NULL);
+ /* title+2 skips the "* ". We know we haven't jumped over the
+ end of the string because strlen (title) >= 3 */
ref1 = xmlNewTextChild (mholder, NULL, BAD_CAST "a",
- BAD_CAST tmp);
- g_free (tmp);
+ BAD_CAST title+2);
+
tmp = g_strconcat ("xref:", xref, NULL);
xmlNewProp (ref1, BAD_CAST "href", BAD_CAST tmp);
g_free (tmp);
@@ -1200,12 +1246,19 @@ yelp_info_parse_menu (GtkTreeStore *tree, xmlNodePtr *node,
g_free (tmp);
g_free (sp);
}
- xmlNewTextChild (mholder, NULL, BAD_CAST "para",
- BAD_CAST desc);
- } else {
- xmlNewTextChild (newnode, NULL, BAD_CAST "para",
- BAD_CAST menuitems[i]);
-
+
+ tmp = g_strconcat ("\n", first_non_space (desc), NULL);
+ xmlNewTextChild (mholder, NULL, BAD_CAST "para1",
+ BAD_CAST tmp);
+ g_free (tmp);
+
+ }
+ else if (*(menuitems[i]) != '\0') {
+ tmp = g_strconcat ("\n", first_non_space (menuitems[i]), NULL);
+ xmlNewTextChild (mholder ? mholder : menu_node,
+ NULL, BAD_CAST "para1",
+ BAD_CAST tmp);
+ g_free (tmp);
}
i++;
g_free (title);
diff --git a/stylesheets/info2html.xsl.in b/stylesheets/info2html.xsl.in
index c029148..1117a80 100644
--- a/stylesheets/info2html.xsl.in
+++ b/stylesheets/info2html.xsl.in
@@ -47,7 +47,8 @@
<xsl:param name="left"/>
<xsl:param name="right"/>
<xsl:text>
-div.body { white-space: pre; font-family: monospace; }
+div.body { font-family: monospace; }
+span.fixed { white-space: pre; }
<!-- navbar from mal2html, possibly move to html.xsl -->
div.navbar {
margin: 0 0 1em 0;
@@ -106,13 +107,17 @@ a.navbar-next::after {
<!-- = Normal Matches = -->
<xsl:template match="para">
- <xsl:value-of select="node()"/>
- <xsl:text>
+ <span class="fixed">
+ <xsl:value-of select="node()"/>
+ <xsl:text>
</xsl:text>
+ </span>
</xsl:template>
<xsl:template match="para1">
- <xsl:value-of select="node()"/>
+ <span class="fixed">
+ <xsl:value-of select="node()"/>
+ </span>
</xsl:template>
<xsl:template match="header">
@@ -156,8 +161,17 @@ a.navbar-next::after {
</xsl:element>
</xsl:template>
+<xsl:template match="menu">
+ <xsl:element name="p">Menu:</xsl:element>
+ <xsl:element name="ul">
+ <xsl:apply-templates />
+ </xsl:element>
+</xsl:template>
+
<xsl:template match="menuholder">
- <xsl:apply-templates select="node()[not(self::menuholder)]"/>
+ <xsl:element name="li">
+ <xsl:apply-templates />
+ </xsl:element>
</xsl:template>
<xsl:template match="noteholder">
--
1.7.1
From 7ede37523e6fe60ae13fe5e9f98b356d9da4b4d4 Mon Sep 17 00:00:00 2001
From: Rupert Swarbrick <rswarbrick gmail com>
Date: Fri, 10 Sep 2010 11:42:23 +0100
Subject: [PATCH 3/4] Parse info files into paragraphs (separated by blank lines).
---
libyelp/yelp-info-parser.c | 454 ++++++++++++++++++++++--------------------
stylesheets/info2html.xsl.in | 15 +-
2 files changed, 248 insertions(+), 221 deletions(-)
diff --git a/libyelp/yelp-info-parser.c b/libyelp/yelp-info-parser.c
index 1605ecf..a85f733 100644
--- a/libyelp/yelp-info-parser.c
+++ b/libyelp/yelp-info-parser.c
@@ -128,7 +128,8 @@ info_insert_image (xmlNodePtr parent, GMatchInfo *match_info)
source = (gchar*)g_hash_table_lookup (h, "src");
if (!h || !source || !*source)
- return xmlNewTextChild (parent, NULL, BAD_CAST "para1", BAD_CAST "[broken image]");
+ return xmlNewTextChild (parent, NULL, BAD_CAST "para",
+ BAD_CAST "[broken image]");
gchar *title = (gchar*)g_hash_table_lookup (h, "title");
gchar *text = (gchar*)g_hash_table_lookup (h, "text");
@@ -201,7 +202,6 @@ join_strings_subset (const gchar *separator,
*/
static void
lines_subset_text_child (xmlNodePtr parent, xmlNsPtr ns,
- gboolean inline_p,
gchar** first, gchar** last)
{
/* TODO? Currently we're copying the split strings again, which is
@@ -209,11 +209,10 @@ lines_subset_text_child (xmlNodePtr parent, xmlNsPtr ns,
window on `content'. But that's much more difficult, so unless
there's a problem, let's go with the stupid approach. */
gchar *glob;
+
if (last > first) {
glob = join_strings_subset ("\n", first, last);
- xmlNewTextChild (parent, ns,
- inline_p ? BAD_CAST "para1" : BAD_CAST "para",
- BAD_CAST glob);
+ xmlAddChild (parent, xmlNewText (BAD_CAST glob));
g_free (glob);
}
}
@@ -222,21 +221,24 @@ lines_subset_text_child (xmlNodePtr parent, xmlNsPtr ns,
Convert body text CONTENT to xml nodes. This function is responsible
for spotting headings etc and splitting them out correctly.
+ paragraph is as described in info_body_text, but cannot be null.
+
If `inline_p' is true, end with a <para1> tag. Otherwise, end with a
- <para> tag.
+ <para> tag.
TODO: IWBN add a regex match for *Note: here and call the *Note ==>
<a href> logic of info_process_text_notes from here.
*/
static void
-info_body_parse_text (xmlNodePtr parent, xmlNsPtr ns,
+info_body_parse_text (xmlNodePtr parent, xmlNodePtr *paragraph,
+ xmlNsPtr ns,
gboolean inline_p, const gchar *content)
{
/* The easiest things to spot are headings: they look like a line of
* '*','=' or '-', corresponding to heading levels 1,2 or 3. To spot
* them, we split content into single lines and work with them. */
gchar **lines = g_strsplit (content, "\n", 0);
- gchar **first = lines, **last = lines+1;
+ gchar **first = lines, **last = lines;
int header_level;
xmlNodePtr header_node;
@@ -252,11 +254,27 @@ info_body_parse_text (xmlNodePtr parent, xmlNsPtr ns,
* the chunk of the body we're displaying (inclusive) */
for (; *last; last++) {
+ /* Check for a blank line */
+ if (**last == '\0') {
+ if (last != first) {
+ if (!*paragraph) {
+ *paragraph = xmlNewChild (parent, ns, BAD_CAST "para", NULL);
+ }
+ lines_subset_text_child (*paragraph, ns, first, last);
+ }
+ /* On the next iteration, last==first both pointing at the next
+ line. */
+ first = last+1;
+ *paragraph = NULL;
+
+ continue;
+ }
+
/* Check for a header */
header_level = header_underline_level (*last);
if (header_level) {
/* Write out any lines beforehand */
- lines_subset_text_child (parent, ns, FALSE, first, last-1);
+ lines_subset_text_child (parent, ns, first, last-1);
/* Now write out the actual header line */
header_node = xmlNewTextChild (parent, ns, BAD_CAST "header",
BAD_CAST *(last-1));
@@ -264,11 +282,15 @@ info_body_parse_text (xmlNodePtr parent, xmlNsPtr ns,
BAD_CAST level_headings[header_level]);
first = last+1;
- last = first+1;
+ last = first-1;
}
}
+
/* Write out any lines left */
- lines_subset_text_child (parent, ns, inline_p, first, last);
+ if (!*paragraph) {
+ *paragraph = xmlNewChild (parent, ns, BAD_CAST "para", NULL);
+ }
+ lines_subset_text_child (*paragraph, ns, first, last);
g_strfreev (lines);
}
@@ -278,14 +300,21 @@ info_body_parse_text (xmlNodePtr parent, xmlNsPtr ns,
body and turning it into paragraph tags. It searches out images and
marks them up properly if necessary.
+ parent should be the node in which we're currently storing text and
+ paragraph a pointer to a <para> tag or NULL. At blank lines, we
+ finish with the current para tag and switch to a new one.
+
It uses info_body_parse_text to mark up the actual bits of text.
*/
static void
-info_body_text (xmlNodePtr parent, xmlNsPtr ns,
+info_body_text (xmlNodePtr parent, xmlNodePtr *paragraph, xmlNsPtr ns,
gboolean inline_p, gchar const *content)
{
+ xmlNodePtr thepara = NULL;
+ if (paragraph == NULL) paragraph = &thepara;
+
if (!strstr (content, INFO_C_IMAGE_TAG_OPEN)) {
- info_body_parse_text (parent, ns, inline_p, content);
+ info_body_parse_text (parent, paragraph, ns, inline_p, content);
return;
}
@@ -293,6 +322,7 @@ info_body_text (xmlNodePtr parent, xmlNsPtr ns,
gint pos = 0;
GRegex *regex = g_regex_new ("(" INFO_C_IMAGE_TAG_OPEN_RE "((?:[^" INFO_TAG_1 "]|[^" INFO_C_TAG_0 "]+" INFO_TAG_1 ")*)" INFO_C_TAG_CLOSE_RE ")", 0, 0, NULL);
GMatchInfo *match_info;
+
g_regex_match (regex, content, 0, &match_info);
while (g_match_info_matches (match_info))
{
@@ -302,14 +332,18 @@ info_body_text (xmlNodePtr parent, xmlNsPtr ns,
&image_start, &image_end);
gchar *before = g_strndup (&content[pos], image_start - pos);
pos = image_end + 1;
- info_body_parse_text (parent, NULL, TRUE, before);
+ info_body_parse_text (parent, paragraph, NULL, TRUE, before);
g_free (before);
+
+ /* End the paragraph that was before */
+ *paragraph = NULL;
+
if (image_found)
info_insert_image (parent, match_info);
g_match_info_next (match_info, NULL);
}
gchar *after = g_strndup (&content[pos], content_len - pos);
- info_body_parse_text (parent, NULL, TRUE, after);
+ info_body_parse_text (parent, paragraph, NULL, TRUE, after);
g_free (after);
}
@@ -977,8 +1011,8 @@ parse_tree_level (GtkTreeStore *tree, xmlNodePtr *node, GtkTreeIter iter)
BAD_CAST "Section",
NULL);
if (!notes)
- info_body_text (newnode, NULL, FALSE, page_content);
-
+ info_body_text (newnode, NULL, NULL, FALSE, page_content);
+
else {
/* Handle notes here */
info_process_text_notes (&newnode, page_content, tree);
@@ -1151,7 +1185,7 @@ yelp_info_parse_menu (GtkTreeStore *tree, xmlNodePtr *node,
if (!notes)
- info_body_text (newnode, NULL, FALSE, split[0]);
+ info_body_text (newnode, NULL, NULL, FALSE, split[0]);
else {
info_process_text_notes (&newnode, split[0], tree);
}
@@ -1277,212 +1311,208 @@ info_process_text_notes (xmlNodePtr *node, gchar *content, GtkTreeStore *tree)
{
gchar **notes;
gchar **current;
- xmlNodePtr holder;
xmlNodePtr ref1;
+ xmlNodePtr paragraph = NULL;
gboolean first = TRUE;
- notes = g_strsplit (content, "*Note", -1);
- holder = xmlNewChild (*node, NULL, BAD_CAST "noteholder", NULL);
+ /*
+ Split using the regular expression
+
+ \*[Nn]ote(?!_)
+
+ which deals with either case and the last bit is a lookahead so
+ that we don't split on things of the form *Note:_, which aren't
+ real notes.
+ */
+ notes = g_regex_split_simple ("\\*[Nn]ote(?!_)", content, 0, 0);
for (current = notes; *current != NULL; current++) {
- /* Since the notes can be either *Note or *note, we handle the second
- * variety here
- */
- gchar **subnotes;
- gchar **current_real;
-
- subnotes = g_strsplit (*current, "*note", -1);
- for (current_real = subnotes; *current_real != NULL; current_real++) {
- gchar *url, **urls, **ulink;
- gchar *append;
- gchar *alt_append, *alt_append1;
- gchar *link_text;
- gchar *href = NULL;
- gchar *break_point = NULL;
- gboolean broken = FALSE;
- if (first) {
- /* The first node is special. It doesn't have a note ref at the
- * start, so we can just add it and forget about it.
- */
- first = FALSE;
- info_body_text (holder, NULL, TRUE, (*current_real));
- continue;
- }
- /* If we got to here, we now gotta parse the note reference */
-
- if (*current_real[0] == '_') {
- /* Special type of note that isn't really a note, but pretends
- * it is
- */
- info_body_text (holder, NULL, TRUE,
- g_strconcat ("*Note", *current_real, NULL));
- continue;
- }
- append = strchr (*current_real, ':');
- if (!append) {
- info_body_text (holder, NULL, TRUE, *current_real);
- continue;
- }
- append++;
- alt_append = append;
- alt_append1 = alt_append;
- append = strchr (append, ':');
- alt_append = strchr (alt_append, '.');
- if (alt_append && g_str_has_prefix (alt_append, ".info")) {
- broken = TRUE;
- alt_append++;
- alt_append = strchr (alt_append, '.');
- }
- alt_append1 = strchr (alt_append1, ',');
- if (!append && !alt_append && !alt_append1) {
- info_body_text (holder, NULL, TRUE, *current_real);
- continue;
- }
- if (!append || alt_append || alt_append1) {
- if (!append) {
- if (alt_append) append = alt_append;
- else append = alt_append1;
- }
- if ((alt_append && alt_append < append))
- append = alt_append;
- if (alt_append1 && alt_append1 < append)
- append = alt_append1;
- }
- append++;
- url = g_strndup (*current_real, append - (*current_real));
-
- /* By now, we got 2 things. First, is append which is the (hopefully)
- * non-link text. Second, we got a url.
- * The url can be in several forms:
- * 1. linkend::
- * 2. linkend:(infofile)Linkend.
- * 3. Title: Linkend.
- * 4. Title: Linkend, (pretty sure this is just broken)
- * 5. Title: (infofile.info)Linkend.
- * All possibilities should have been picked up.
- * Here:
- * Clean up the split. Should be left with a real url and
- * a list of fragments that should be linked
- * Also goes through and removes extra spaces, leaving only one
- * space in place of many
+ gchar *url, **urls, **ulink;
+ gchar *append;
+ gchar *alt_append, *alt_append1;
+ gchar *link_text;
+ gchar *href = NULL;
+ gchar *break_point = NULL;
+ gboolean broken = FALSE;
+ if (first) {
+ /* The first node is special. It doesn't have a note ref at the
+ * start, so we can just add it and forget about it.
*/
- urls = g_strsplit (url, "\n", -1);
- break_point = strchr (url, '\n');
- while (break_point) {
- *break_point = ' ';
- break_point = strchr (++break_point, '\n');
+ first = FALSE;
+ info_body_text (*node, ¶graph, NULL, TRUE, (*current));
+ continue;
+ }
+
+ /* If we got to here, we now gotta parse the note reference */
+ append = strchr (*current, ':');
+ if (!append) {
+ info_body_text (*node, ¶graph, NULL, TRUE, *current);
+ continue;
+ }
+ append++;
+ alt_append = append;
+ alt_append1 = alt_append;
+ append = strchr (append, ':');
+ alt_append = strchr (alt_append, '.');
+ if (alt_append && g_str_has_prefix (alt_append, ".info")) {
+ broken = TRUE;
+ alt_append++;
+ alt_append = strchr (alt_append, '.');
+ }
+ alt_append1 = strchr (alt_append1, ',');
+ if (!append && !alt_append && !alt_append1) {
+ info_body_text (*node, ¶graph, NULL, TRUE, *current);
+ continue;
+ }
+ if (!append || alt_append || alt_append1) {
+ if (!append) {
+ if (alt_append) append = alt_append;
+ else append = alt_append1;
}
- break_point = strchr (url, ' ');
- while (break_point) {
- if (*(break_point+1) == ' ') {
- /* Massive space. Fix. */
- gchar *next = break_point;
- gchar *url_copy;
- while (*next == ' ')
- next++;
- next--;
- url_copy = g_strndup (url, break_point-url);
- g_free (url);
- url = g_strconcat (url_copy, next, NULL);
- break_point = strchr (url, ' ');
- g_free (url_copy);
- } else {
- break_point++;
- break_point = strchr (break_point, ' ');
- }
+ if ((alt_append && alt_append < append))
+ append = alt_append;
+ if (alt_append1 && alt_append1 < append)
+ append = alt_append1;
+ }
+ append++;
+ url = g_strndup (*current, append - (*current));
+
+ /* By now, we got 2 things. First, is append which is the (hopefully)
+ * non-link text. Second, we got a url.
+ * The url can be in several forms:
+ * 1. linkend::
+ * 2. linkend:(infofile)Linkend.
+ * 3. Title: Linkend.
+ * 4. Title: Linkend, (pretty sure this is just broken)
+ * 5. Title: (infofile.info)Linkend.
+ * All possibilities should have been picked up.
+ * Here:
+ * Clean up the split. Should be left with a real url and
+ * a list of fragments that should be linked
+ * Also goes through and removes extra spaces, leaving only one
+ * space in place of many
+ */
+ urls = g_strsplit (url, "\n", -1);
+ break_point = strchr (url, '\n');
+ while (break_point) {
+ *break_point = ' ';
+ break_point = strchr (++break_point, '\n');
+ }
+ break_point = strchr (url, ' ');
+ while (break_point) {
+ if (*(break_point+1) == ' ') {
+ /* Massive space. Fix. */
+ gchar *next = break_point;
+ gchar *url_copy;
+ while (*next == ' ')
+ next++;
+ next--;
+ url_copy = g_strndup (url, break_point-url);
+ g_free (url);
+ url = g_strconcat (url_copy, next, NULL);
+ break_point = strchr (url, ' ');
+ g_free (url_copy);
+ } else {
+ break_point++;
+ break_point = strchr (break_point, ' ');
}
- if (url[strlen(url)-1] == '.') { /* The 2nd or 3rd sort of link */
- gchar *stop = NULL;
- gchar *lurl = NULL;
- gchar *zloc = NULL;
- stop = strchr (url, ':');
- lurl = strchr (stop, '(');
- if (!lurl) { /* 3rd type of link */
- gchar *link;
- gint length;
- stop++;
- link = g_strdup (stop);
- link = g_strstrip (link);
- length = strlen (link) - 1;
- link[length] = '\0';
- href = g_strconcat ("xref:", link, NULL);
- link[length] = 'a';
- g_free (link);
-
-
- } else { /* 2nd type of link. Easy. Provided .info is neglected ;) */
- if (broken) {
- gchar *new_url;
- gchar *info;
- gchar *stripped;
-
- new_url = g_strdup (lurl);
- info = strstr (new_url, ".info)");
- stripped = g_strndup (new_url, info-new_url);
- info +=5;
- lurl = g_strconcat (stripped, info, NULL);
- g_free (stripped);
- g_free (new_url);
- }
- zloc = &(lurl[strlen(lurl)-1]);
- *zloc = '\0';
- href = g_strconcat ("info:", lurl, NULL);
- *zloc = 'a';
- }
- } else { /* First kind of link */
- gchar *tmp1;
- gchar *frag;
-
- tmp1 = strchr (url, ':');
- if (!tmp1)
- frag = g_strdup (url);
- else
- frag = g_strndup (url, tmp1 - url);
- g_strstrip (frag);
- gtk_tree_model_foreach (GTK_TREE_MODEL (tree), resolve_frag_id, &frag);
- href = g_strconcat ("xref:", frag, NULL);
- g_free (frag);
+ }
+ if (url[strlen(url)-1] == '.') { /* The 2nd or 3rd sort of link */
+ gchar *stop = NULL;
+ gchar *lurl = NULL;
+ gchar *zloc = NULL;
+ stop = strchr (url, ':');
+ lurl = strchr (stop, '(');
+ if (!lurl) { /* 3rd type of link */
+ gchar *link;
+ gint length;
+ stop++;
+ link = g_strdup (stop);
+ link = g_strstrip (link);
+ length = strlen (link) - 1;
+ link[length] = '\0';
+ href = g_strconcat ("xref:", link, NULL);
+ link[length] = 'a';
+ g_free (link);
+
+
+ } else { /* 2nd type of link. Easy. Provided .info is neglected ;) */
+ if (broken) {
+ gchar *new_url;
+ gchar *info;
+ gchar *stripped;
+
+ new_url = g_strdup (lurl);
+ info = strstr (new_url, ".info)");
+ stripped = g_strndup (new_url, info-new_url);
+ info +=5;
+ lurl = g_strconcat (stripped, info, NULL);
+ g_free (stripped);
+ g_free (new_url);
+ }
+ zloc = &(lurl[strlen(lurl)-1]);
+ *zloc = '\0';
+ href = g_strconcat ("info:", lurl, NULL);
+ *zloc = 'a';
}
- for (ulink = urls; *ulink != NULL; ulink++) {
- if (ulink == urls)
- link_text = g_strconcat ("*Note", *ulink, NULL);
- else {
- gchar *spacing = *ulink;
- gchar *tmp;
- gint count = 0;
- while (*spacing == ' ') {
- spacing++;
- count++;
- }
- if (spacing != *ulink) {
- if (count > 1)
- spacing-=2;
- tmp = g_strndup (*ulink, spacing-*ulink);
- if (count > 1)
- spacing+=2;
- xmlNewTextChild (holder, NULL, BAD_CAST "spacing",
- BAD_CAST tmp);
- g_free (tmp);
- link_text = g_strdup (spacing);
- } else {
- link_text = g_strdup (*ulink);
- }
- }
- ref1 = xmlNewTextChild (holder, NULL, BAD_CAST "a",
- BAD_CAST link_text);
- if (*(ulink+1) != NULL)
- info_body_text (holder, NULL, FALSE, "");
+ } else { /* First kind of link */
+ gchar *tmp1;
+ gchar *frag;
+
+ tmp1 = strchr (url, ':');
+ if (!tmp1)
+ frag = g_strdup (url);
+ else
+ frag = g_strndup (url, tmp1 - url);
+ g_strstrip (frag);
+ gtk_tree_model_foreach (GTK_TREE_MODEL (tree), resolve_frag_id, &frag);
+ href = g_strconcat ("xref:", frag, NULL);
+ g_free (frag);
+ }
- g_free (link_text);
- xmlNewProp (ref1, BAD_CAST "href", BAD_CAST href);
+ /* Check we've got a valid paragraph node */
+ if (!paragraph) {
+ paragraph = xmlNewChild (*node, NULL, BAD_CAST "para", NULL);
+ }
+
+ for (ulink = urls; *ulink != NULL; ulink++) {
+ if (ulink == urls)
+ link_text = g_strconcat ("*Note", *ulink, NULL);
+ else {
+ gchar *spacing = *ulink;
+ gchar *tmp;
+ gint count = 0;
+ while (*spacing == ' ') {
+ spacing++;
+ count++;
+ }
+ if (spacing != *ulink) {
+ if (count > 1)
+ spacing-=2;
+ tmp = g_strndup (*ulink, spacing-*ulink);
+ if (count > 1)
+ spacing+=2;
+ xmlNewTextChild (paragraph, NULL, BAD_CAST "spacing",
+ BAD_CAST tmp);
+ g_free (tmp);
+ link_text = g_strdup (spacing);
+ } else {
+ link_text = g_strdup (*ulink);
+ }
}
- g_strfreev (urls);
- /* Finally, we can add the text as required */
- info_body_text (holder, NULL, TRUE, append);
- g_free (url);
- g_free (href);
+ ref1 = xmlNewTextChild (paragraph, NULL, BAD_CAST "a",
+ BAD_CAST link_text);
+ if (*(ulink+1) != NULL)
+ info_body_text (*node, ¶graph, NULL, FALSE, "");
+
+ g_free (link_text);
+ xmlNewProp (ref1, BAD_CAST "href", BAD_CAST href);
}
- g_strfreev (subnotes);
+ g_strfreev (urls);
+ /* Finally, we can add the text as required */
+ info_body_text (*node, ¶graph, NULL, TRUE, append);
+ g_free (url);
+ g_free (href);
}
g_strfreev (notes);
}
diff --git a/stylesheets/info2html.xsl.in b/stylesheets/info2html.xsl.in
index 1117a80..a97b054 100644
--- a/stylesheets/info2html.xsl.in
+++ b/stylesheets/info2html.xsl.in
@@ -107,11 +107,12 @@ a.navbar-next::after {
<!-- = Normal Matches = -->
<xsl:template match="para">
- <span class="fixed">
- <xsl:value-of select="node()"/>
- <xsl:text>
- </xsl:text>
- </span>
+ <p>
+ <span class="fixed">
+ <!-- Apply templates for <a> tags and copy text straight through. -->
+ <xsl:apply-templates select="./text()|*"/>
+ </span>
+ </p>
</xsl:template>
<xsl:template match="para1">
@@ -174,8 +175,4 @@ a.navbar-next::after {
</xsl:element>
</xsl:template>
-<xsl:template match="noteholder">
- <xsl:apply-templates select="node()[not(self::noteholder)]"/>
-</xsl:template>
-
</xsl:stylesheet>
--
1.7.1
From 45762b7f91b57038f893df6e6221db0bd7fbe255 Mon Sep 17 00:00:00 2001
From: Rupert Swarbrick <rswarbrick gmail com>
Date: Sat, 11 Sep 2010 22:21:19 +0100
Subject: [PATCH 4/4] Render multi-line links correctly.
---
libyelp/yelp-info-parser.c | 50 +++++++++++++++-----------------------------
1 files changed, 17 insertions(+), 33 deletions(-)
diff --git a/libyelp/yelp-info-parser.c b/libyelp/yelp-info-parser.c
index a85f733..d4ef7bc 100644
--- a/libyelp/yelp-info-parser.c
+++ b/libyelp/yelp-info-parser.c
@@ -1377,6 +1377,9 @@ info_process_text_notes (xmlNodePtr *node, gchar *content, GtkTreeStore *tree)
append++;
url = g_strndup (*current, append - (*current));
+ /* Save a copy of the unadulterated link text for later. */
+ link_text = g_strconcat ("*Note", url, NULL);
+
/* By now, we got 2 things. First, is append which is the (hopefully)
* non-link text. Second, we got a url.
* The url can be in several forms:
@@ -1475,42 +1478,23 @@ info_process_text_notes (xmlNodePtr *node, gchar *content, GtkTreeStore *tree)
paragraph = xmlNewChild (*node, NULL, BAD_CAST "para", NULL);
}
- for (ulink = urls; *ulink != NULL; ulink++) {
- if (ulink == urls)
- link_text = g_strconcat ("*Note", *ulink, NULL);
- else {
- gchar *spacing = *ulink;
- gchar *tmp;
- gint count = 0;
- while (*spacing == ' ') {
- spacing++;
- count++;
- }
- if (spacing != *ulink) {
- if (count > 1)
- spacing-=2;
- tmp = g_strndup (*ulink, spacing-*ulink);
- if (count > 1)
- spacing+=2;
- xmlNewTextChild (paragraph, NULL, BAD_CAST "spacing",
- BAD_CAST tmp);
- g_free (tmp);
- link_text = g_strdup (spacing);
- } else {
- link_text = g_strdup (*ulink);
- }
- }
- ref1 = xmlNewTextChild (paragraph, NULL, BAD_CAST "a",
- BAD_CAST link_text);
- if (*(ulink+1) != NULL)
- info_body_text (*node, ¶graph, NULL, FALSE, "");
+ /*
+ Now we're supposed to actually render the link. I have a list of
+ bits of URL and actually this is really easy - I want to have
+ the link *text* exactly the same as it appeared in the .info
+ file, so don't use the list of strings urls, instead use the
+ whole lot: url (complete with embedded newlines etc.)
+ */
+ ref1 = xmlNewTextChild (paragraph, NULL, BAD_CAST "a",
+ BAD_CAST link_text);
+ g_free (link_text);
+ xmlNewProp (ref1, BAD_CAST "href", BAD_CAST href);
- g_free (link_text);
- xmlNewProp (ref1, BAD_CAST "href", BAD_CAST href);
- }
g_strfreev (urls);
- /* Finally, we can add the text as required */
+
+ /* Finally, we can add the following text as required */
info_body_text (*node, ¶graph, NULL, TRUE, append);
+
g_free (url);
g_free (href);
}
--
1.7.1
Attachment:
pgp1Jm02G5iqm.pgp
Description: PGP signature