Hi, After disappearing for ages (sorry!), I've hacked together some patches that seem to improve info file handling here. I can open a bug etc. if it'd be better but maybe this is a good place for someone to have a look at them? Patches attached. What they do: - Use blank lines to detect paragraphs and output rather more semantic html. - Parse the menus more helpfully and display them as a <ul> - Correctly format multi-line links (on my system at least, there's one at info:info so it's quite noticeable) I'd love to hear if I've introduced any regressions. There are still some problems (from before!). Most notably, there seems to be a problem with spaces in links or maybe with links between different info files (info:info, then go to expert then click on a texinfo link). I'll try to work out what's going on soon. Rupert
From 7059753590f6f6371b3b04880e82eeb6edb57ce0 Mon Sep 17 00:00:00 2001 From: Rupert Swarbrick <rswarbrick gmail com> Date: Wed, 16 Jun 2010 10:32:20 +0100 Subject: [PATCH 1/4] Support for headings in info files. --- libyelp/yelp-info-parser.c | 173 +++++++++++++++++++++++++++++++++++++----- stylesheets/info2html.xsl.in | 17 ++++ 2 files changed, 170 insertions(+), 20 deletions(-) diff --git a/libyelp/yelp-info-parser.c b/libyelp/yelp-info-parser.c index 3310794..7d32905 100644 --- a/libyelp/yelp-info-parser.c +++ b/libyelp/yelp-info-parser.c @@ -1,4 +1,4 @@ -/* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil -*- */ /* * Copyright (C) 2005 Davyd Madeley <davyd madeley id au> * @@ -58,8 +58,13 @@ void fix_tag_table (gchar *offset, TagTableFix *a); void info_process_text_notes (xmlNodePtr *node, gchar *content, - GtkTreeStore *tree); + GtkTreeStore + *tree); +/* + Used to output the correct <heading level="?" /> tag. + */ +static const gchar* level_headings[] = { NULL, "1", "2", "3" }; static GHashTable * info_image_get_attributes (gchar const* string) @@ -141,15 +146,144 @@ info_insert_image (xmlNodePtr parent, GMatchInfo *match_info) } /* - Convert body text CONTENT to xml nodes, processing info image tags - when found. IWBN add a regex match for *Note: here and call the - *Note ==> <a href> logic of info_process_text_notes from here. + If every element of `str' is `ch' then return TRUE, else FALSE. */ -static xmlNodePtr -info_body_text (xmlNodePtr parent, xmlNsPtr ns, gchar const *name, gchar const *content) +static gboolean +string_all_char_p (const gchar* str, gchar ch) +{ + for (; *str; str++) { + if (*str != ch) return FALSE; + } + return TRUE; +} + +/* + If `line' is a line of '*', '=' or '-', return 1,2,3 respectively + for the heading level. If it's anything else, return 0. + */ +static int +header_underline_level (const gchar* line) +{ + if (*line != '*' && *line != '=' && *line != '-') + return 0; + + if (string_all_char_p (line, '*')) return 1; + if (string_all_char_p (line, '=')) return 2; + if (string_all_char_p (line, '-')) return 3; + + return 0; +} + +/* + Use g_strjoinv to join up the strings from `strings', but they might + not actually be a null-terminated array. `end' should be strings+n, + where I want the first n strings (strings+0, ..., strings+(n-1)). It + shouldn't point outside of the array allocated, but it can point at + the null string at the end. + */ +static gchar* +join_strings_subset (const gchar *separator, + gchar** strings, gchar** end) +{ + g_assert(end > strings); + + gchar *ptr = *end; + *end = NULL; + + gchar *glob = g_strjoinv (separator, strings); + *end = ptr; + return glob; +} + +/* + Create a text node, child of `parent', with the lines strictly + between `first' and `last'. +*/ +static void +lines_subset_text_child (xmlNodePtr parent, xmlNsPtr ns, + gboolean inline_p, + gchar** first, gchar** last) { - if (!strstr (content, INFO_C_IMAGE_TAG_OPEN)) - return xmlNewTextChild (parent, ns, BAD_CAST name, BAD_CAST content); + /* TODO? Currently we're copying the split strings again, which is + less efficient than somehow storing lengths and using a sort of + window on `content'. But that's much more difficult, so unless + there's a problem, let's go with the stupid approach. */ + gchar *glob; + if (last > first) { + glob = join_strings_subset ("\n", first, last); + xmlNewTextChild (parent, ns, + inline_p ? BAD_CAST "para1" : BAD_CAST "para", + BAD_CAST glob); + g_free (glob); + } +} + +/* + Convert body text CONTENT to xml nodes. This function is responsible + for spotting headings etc and splitting them out correctly. + + If `inline_p' is true, end with a <para1> tag. Otherwise, end with a + <para> tag. + + TODO: IWBN add a regex match for *Note: here and call the *Note ==> + <a href> logic of info_process_text_notes from here. + */ +static void +info_body_parse_text (xmlNodePtr parent, xmlNsPtr ns, + gboolean inline_p, const gchar *content) +{ + /* The easiest things to spot are headings: they look like a line of + * '*','=' or '-', corresponding to heading levels 1,2 or 3. To spot + * them, we split content into single lines and work with them. */ + gchar **lines = g_strsplit (content, "\n", 0); + gchar **first = lines, **last = lines+1; + int header_level; + xmlNodePtr header_node; + + /* Deal with the possibility that `content' is empty */ + if (*lines == NULL) { + if (!inline_p) { + xmlNewTextChild (parent, NULL, BAD_CAST "para", BAD_CAST ""); + } + return; + } + + for (; *last; last++) { + header_level = header_underline_level (*last); + if (header_level) { + /* Write out any lines beforehand */ + lines_subset_text_child (parent, ns, FALSE, first, last-1); + /* Now write out the actual header line */ + header_node = xmlNewTextChild (parent, ns, BAD_CAST "header", + BAD_CAST *(last-1)); + xmlNewProp (header_node, BAD_CAST "level", + BAD_CAST level_headings[header_level]); + + first = last+1; + last = first+1; + } + } + /* Write out any lines left */ + lines_subset_text_child (parent, ns, inline_p, first, last); + + g_strfreev (lines); +} + +/* + info_body_text is responsible for taking a hunk of the info page's + body and turning it into paragraph tags. It searches out images and + marks them up properly if necessary. + + It uses info_body_parse_text to mark up the actual bits of text. + */ +static void +info_body_text (xmlNodePtr parent, xmlNsPtr ns, + gboolean inline_p, gchar const *content) +{ + if (!strstr (content, INFO_C_IMAGE_TAG_OPEN)) { + info_body_parse_text (parent, ns, inline_p, content); + return; + } gint content_len = strlen (content); gint pos = 0; @@ -164,16 +298,15 @@ info_body_text (xmlNodePtr parent, xmlNsPtr ns, gchar const *name, gchar const * &image_start, &image_end); gchar *before = g_strndup (&content[pos], image_start - pos); pos = image_end + 1; - xmlNewTextChild (parent, NULL, BAD_CAST "para1", BAD_CAST (before)); + info_body_parse_text (parent, NULL, TRUE, before); g_free (before); if (image_found) info_insert_image (parent, match_info); g_match_info_next (match_info, NULL); } gchar *after = g_strndup (&content[pos], content_len - pos); - xmlNewTextChild (parent, NULL, BAD_CAST "para1", BAD_CAST (after)); + info_body_parse_text (parent, NULL, TRUE, after); g_free (after); - return 0; } /* Part 1: Parse File Into Tree Store */ @@ -840,7 +973,7 @@ parse_tree_level (GtkTreeStore *tree, xmlNodePtr *node, GtkTreeIter iter) BAD_CAST "Section", NULL); if (!notes) - info_body_text (newnode, NULL, "para", page_content); + info_body_text (newnode, NULL, FALSE, page_content); else { /* Handle notes here */ @@ -1005,7 +1138,7 @@ yelp_info_parse_menu (GtkTreeStore *tree, xmlNodePtr *node, tmp = g_strconcat (split[0], "\n* Menu:", NULL); if (!notes) - info_body_text (newnode, NULL, "para", tmp); + info_body_text (newnode, NULL, FALSE, tmp); else { info_process_text_notes (&newnode, tmp, tree); } @@ -1119,7 +1252,7 @@ info_process_text_notes (xmlNodePtr *node, gchar *content, GtkTreeStore *tree) * start, so we can just add it and forget about it. */ first = FALSE; - info_body_text (holder, NULL, "para1", (*current_real)); + info_body_text (holder, NULL, TRUE, (*current_real)); continue; } /* If we got to here, we now gotta parse the note reference */ @@ -1128,13 +1261,13 @@ info_process_text_notes (xmlNodePtr *node, gchar *content, GtkTreeStore *tree) /* Special type of note that isn't really a note, but pretends * it is */ - info_body_text (holder, NULL, "para1", + info_body_text (holder, NULL, TRUE, g_strconcat ("*Note", *current_real, NULL)); continue; } append = strchr (*current_real, ':'); if (!append) { - info_body_text (holder, NULL, "para1", *current_real); + info_body_text (holder, NULL, TRUE, *current_real); continue; } append++; @@ -1149,7 +1282,7 @@ info_process_text_notes (xmlNodePtr *node, gchar *content, GtkTreeStore *tree) } alt_append1 = strchr (alt_append1, ','); if (!append && !alt_append && !alt_append1) { - info_body_text (holder, NULL, "para1", *current_real); + info_body_text (holder, NULL, TRUE, *current_real); continue; } if (!append || alt_append || alt_append1) { @@ -1285,14 +1418,14 @@ info_process_text_notes (xmlNodePtr *node, gchar *content, GtkTreeStore *tree) ref1 = xmlNewTextChild (holder, NULL, BAD_CAST "a", BAD_CAST link_text); if (*(ulink+1) != NULL) - info_body_text (holder, NULL, "para", ""); + info_body_text (holder, NULL, FALSE, ""); g_free (link_text); xmlNewProp (ref1, BAD_CAST "href", BAD_CAST href); } g_strfreev (urls); /* Finally, we can add the text as required */ - info_body_text (holder, NULL, "para1", append); + info_body_text (holder, NULL, TRUE, append); g_free (url); g_free (href); } diff --git a/stylesheets/info2html.xsl.in b/stylesheets/info2html.xsl.in index ec75878..c029148 100644 --- a/stylesheets/info2html.xsl.in +++ b/stylesheets/info2html.xsl.in @@ -115,6 +115,23 @@ a.navbar-next::after { <xsl:value-of select="node()"/> </xsl:template> +<xsl:template match="header"> + <xsl:choose> + <xsl:when test='@level = 1'> + <h1><xsl:value-of select="node()"/></h1> + </xsl:when> + <xsl:when test='@level = 2'> + <h2><xsl:value-of select="node()"/></h2> + </xsl:when> + <xsl:when test='@level = 3'> + <h3><xsl:value-of select="node()"/></h3> + </xsl:when> + <xsl:otherwise> + <h1>(Unknown heading level) <xsl:value-of select="node()"/></h1> + </xsl:otherwise> + </xsl:choose> +</xsl:template> + <xsl:template match="spacing"> <xsl:value-of select="node()"/> </xsl:template> -- 1.7.1
From d1369b91a2bbde04a94911123c4d583087b0b692 Mon Sep 17 00:00:00 2001 From: Rupert Swarbrick <rswarbrick gmail com> Date: Fri, 3 Sep 2010 00:09:31 +0100 Subject: [PATCH 2/4] Display menus as <ul>'s, rather than the original text. --- libyelp/yelp-info-parser.c | 87 +++++++++++++++++++++++++++++++++-------- stylesheets/info2html.xsl.in | 24 +++++++++-- 2 files changed, 89 insertions(+), 22 deletions(-) diff --git a/libyelp/yelp-info-parser.c b/libyelp/yelp-info-parser.c index 7d32905..1605ecf 100644 --- a/libyelp/yelp-info-parser.c +++ b/libyelp/yelp-info-parser.c @@ -248,7 +248,11 @@ info_body_parse_text (xmlNodePtr parent, xmlNsPtr ns, return; } + /* Use a pair of pointers, first and last, which point to two lines, + * the chunk of the body we're displaying (inclusive) */ for (; *last; last++) { + + /* Check for a header */ header_level = header_underline_level (*last); if (header_level) { /* Write out any lines beforehand */ @@ -1120,6 +1124,16 @@ get_menuoptions (gchar *line, gchar **title, gchar **ref, gchar **desc, return TRUE; } +/* Find the first non-space character in str or return pointer to the + * '\0' if there isn't one. */ +static gchar* +first_non_space (gchar* str) +{ + /* As long as str is null terminated, this is ok! */ + while (*str == ' ') str++; + return str; +} + xmlNodePtr yelp_info_parse_menu (GtkTreeStore *tree, xmlNodePtr *node, gchar *page_content, gboolean notes) @@ -1127,7 +1141,7 @@ yelp_info_parse_menu (GtkTreeStore *tree, xmlNodePtr *node, gchar **split; gchar **menuitems; gchar *tmp = NULL; - xmlNodePtr newnode; + xmlNodePtr newnode, menu_node, mholder = NULL; int i=0; split = g_strsplit (page_content, "* Menu:", 2); @@ -1136,37 +1150,69 @@ yelp_info_parse_menu (GtkTreeStore *tree, xmlNodePtr *node, BAD_CAST "Section", NULL); - tmp = g_strconcat (split[0], "\n* Menu:", NULL); if (!notes) - info_body_text (newnode, NULL, FALSE, tmp); + info_body_text (newnode, NULL, FALSE, split[0]); else { - info_process_text_notes (&newnode, tmp, tree); + info_process_text_notes (&newnode, split[0], tree); } - g_free (tmp); menuitems = g_strsplit (split[1], "\n", -1); g_strfreev (split); + /* The output xml should look something like the following: + + <menu> + <menuholder> + <a href="xref:Help-Inv">Help-Inv</a> + <para1>Invisible text in Emacs Info.</para1> + </menuholder> + <menuholder> + <a href="xref:Help-M">Help-M</a> + <para1>Menus.</para1> + </menuholder> + ... + </menu> + + (from the top page of info:info). Note the absence of *'s and + ::'s on the links. + + If there's a line with no "* Blah::", it looks like a child of + the previous menu item so (for i > 0) deal with that correctly by + not "closing" the <menuholder> tag until we find the next + start. + */ + + if (menuitems[0] != NULL) { + /* If there are any menu items, make the <menu> node */ + menu_node = xmlNewChild (newnode, NULL, BAD_CAST "menu", NULL); + } + while (menuitems[i] != NULL) { gboolean menu = FALSE; gchar *title = NULL; gchar *ref = NULL; gchar *desc = NULL; gchar *xref = NULL; - xmlNodePtr mholder; xmlNodePtr ref1; menu = get_menuoptions (menuitems[i], &title, &ref, &desc, &xref); - + + if (menu && (*title == '\0' || *(title + 1) == '\0')) { + g_warning ("Info title unexpectedly short for menu item (%s)", + menuitems[i]); + menu = FALSE; + } + if (menu) { - mholder = xmlNewChild (newnode, NULL, BAD_CAST "menuholder", NULL); + mholder = xmlNewChild (menu_node, NULL, BAD_CAST "menuholder", NULL); gtk_tree_model_foreach (GTK_TREE_MODEL (tree), resolve_frag_id, &xref); if (ref == NULL) { /* A standard type menu */ - tmp = g_strconcat (title, "::", NULL); + /* title+2 skips the "* ". We know we haven't jumped over the + end of the string because strlen (title) >= 3 */ ref1 = xmlNewTextChild (mholder, NULL, BAD_CAST "a", - BAD_CAST tmp); - g_free (tmp); + BAD_CAST title+2); + tmp = g_strconcat ("xref:", xref, NULL); xmlNewProp (ref1, BAD_CAST "href", BAD_CAST tmp); g_free (tmp); @@ -1200,12 +1246,19 @@ yelp_info_parse_menu (GtkTreeStore *tree, xmlNodePtr *node, g_free (tmp); g_free (sp); } - xmlNewTextChild (mholder, NULL, BAD_CAST "para", - BAD_CAST desc); - } else { - xmlNewTextChild (newnode, NULL, BAD_CAST "para", - BAD_CAST menuitems[i]); - + + tmp = g_strconcat ("\n", first_non_space (desc), NULL); + xmlNewTextChild (mholder, NULL, BAD_CAST "para1", + BAD_CAST tmp); + g_free (tmp); + + } + else if (*(menuitems[i]) != '\0') { + tmp = g_strconcat ("\n", first_non_space (menuitems[i]), NULL); + xmlNewTextChild (mholder ? mholder : menu_node, + NULL, BAD_CAST "para1", + BAD_CAST tmp); + g_free (tmp); } i++; g_free (title); diff --git a/stylesheets/info2html.xsl.in b/stylesheets/info2html.xsl.in index c029148..1117a80 100644 --- a/stylesheets/info2html.xsl.in +++ b/stylesheets/info2html.xsl.in @@ -47,7 +47,8 @@ <xsl:param name="left"/> <xsl:param name="right"/> <xsl:text> -div.body { white-space: pre; font-family: monospace; } +div.body { font-family: monospace; } +span.fixed { white-space: pre; } <!-- navbar from mal2html, possibly move to html.xsl --> div.navbar { margin: 0 0 1em 0; @@ -106,13 +107,17 @@ a.navbar-next::after { <!-- = Normal Matches = --> <xsl:template match="para"> - <xsl:value-of select="node()"/> - <xsl:text> + <span class="fixed"> + <xsl:value-of select="node()"/> + <xsl:text> </xsl:text> + </span> </xsl:template> <xsl:template match="para1"> - <xsl:value-of select="node()"/> + <span class="fixed"> + <xsl:value-of select="node()"/> + </span> </xsl:template> <xsl:template match="header"> @@ -156,8 +161,17 @@ a.navbar-next::after { </xsl:element> </xsl:template> +<xsl:template match="menu"> + <xsl:element name="p">Menu:</xsl:element> + <xsl:element name="ul"> + <xsl:apply-templates /> + </xsl:element> +</xsl:template> + <xsl:template match="menuholder"> - <xsl:apply-templates select="node()[not(self::menuholder)]"/> + <xsl:element name="li"> + <xsl:apply-templates /> + </xsl:element> </xsl:template> <xsl:template match="noteholder"> -- 1.7.1
From 7ede37523e6fe60ae13fe5e9f98b356d9da4b4d4 Mon Sep 17 00:00:00 2001 From: Rupert Swarbrick <rswarbrick gmail com> Date: Fri, 10 Sep 2010 11:42:23 +0100 Subject: [PATCH 3/4] Parse info files into paragraphs (separated by blank lines). --- libyelp/yelp-info-parser.c | 454 ++++++++++++++++++++++-------------------- stylesheets/info2html.xsl.in | 15 +- 2 files changed, 248 insertions(+), 221 deletions(-) diff --git a/libyelp/yelp-info-parser.c b/libyelp/yelp-info-parser.c index 1605ecf..a85f733 100644 --- a/libyelp/yelp-info-parser.c +++ b/libyelp/yelp-info-parser.c @@ -128,7 +128,8 @@ info_insert_image (xmlNodePtr parent, GMatchInfo *match_info) source = (gchar*)g_hash_table_lookup (h, "src"); if (!h || !source || !*source) - return xmlNewTextChild (parent, NULL, BAD_CAST "para1", BAD_CAST "[broken image]"); + return xmlNewTextChild (parent, NULL, BAD_CAST "para", + BAD_CAST "[broken image]"); gchar *title = (gchar*)g_hash_table_lookup (h, "title"); gchar *text = (gchar*)g_hash_table_lookup (h, "text"); @@ -201,7 +202,6 @@ join_strings_subset (const gchar *separator, */ static void lines_subset_text_child (xmlNodePtr parent, xmlNsPtr ns, - gboolean inline_p, gchar** first, gchar** last) { /* TODO? Currently we're copying the split strings again, which is @@ -209,11 +209,10 @@ lines_subset_text_child (xmlNodePtr parent, xmlNsPtr ns, window on `content'. But that's much more difficult, so unless there's a problem, let's go with the stupid approach. */ gchar *glob; + if (last > first) { glob = join_strings_subset ("\n", first, last); - xmlNewTextChild (parent, ns, - inline_p ? BAD_CAST "para1" : BAD_CAST "para", - BAD_CAST glob); + xmlAddChild (parent, xmlNewText (BAD_CAST glob)); g_free (glob); } } @@ -222,21 +221,24 @@ lines_subset_text_child (xmlNodePtr parent, xmlNsPtr ns, Convert body text CONTENT to xml nodes. This function is responsible for spotting headings etc and splitting them out correctly. + paragraph is as described in info_body_text, but cannot be null. + If `inline_p' is true, end with a <para1> tag. Otherwise, end with a - <para> tag. + <para> tag. TODO: IWBN add a regex match for *Note: here and call the *Note ==> <a href> logic of info_process_text_notes from here. */ static void -info_body_parse_text (xmlNodePtr parent, xmlNsPtr ns, +info_body_parse_text (xmlNodePtr parent, xmlNodePtr *paragraph, + xmlNsPtr ns, gboolean inline_p, const gchar *content) { /* The easiest things to spot are headings: they look like a line of * '*','=' or '-', corresponding to heading levels 1,2 or 3. To spot * them, we split content into single lines and work with them. */ gchar **lines = g_strsplit (content, "\n", 0); - gchar **first = lines, **last = lines+1; + gchar **first = lines, **last = lines; int header_level; xmlNodePtr header_node; @@ -252,11 +254,27 @@ info_body_parse_text (xmlNodePtr parent, xmlNsPtr ns, * the chunk of the body we're displaying (inclusive) */ for (; *last; last++) { + /* Check for a blank line */ + if (**last == '\0') { + if (last != first) { + if (!*paragraph) { + *paragraph = xmlNewChild (parent, ns, BAD_CAST "para", NULL); + } + lines_subset_text_child (*paragraph, ns, first, last); + } + /* On the next iteration, last==first both pointing at the next + line. */ + first = last+1; + *paragraph = NULL; + + continue; + } + /* Check for a header */ header_level = header_underline_level (*last); if (header_level) { /* Write out any lines beforehand */ - lines_subset_text_child (parent, ns, FALSE, first, last-1); + lines_subset_text_child (parent, ns, first, last-1); /* Now write out the actual header line */ header_node = xmlNewTextChild (parent, ns, BAD_CAST "header", BAD_CAST *(last-1)); @@ -264,11 +282,15 @@ info_body_parse_text (xmlNodePtr parent, xmlNsPtr ns, BAD_CAST level_headings[header_level]); first = last+1; - last = first+1; + last = first-1; } } + /* Write out any lines left */ - lines_subset_text_child (parent, ns, inline_p, first, last); + if (!*paragraph) { + *paragraph = xmlNewChild (parent, ns, BAD_CAST "para", NULL); + } + lines_subset_text_child (*paragraph, ns, first, last); g_strfreev (lines); } @@ -278,14 +300,21 @@ info_body_parse_text (xmlNodePtr parent, xmlNsPtr ns, body and turning it into paragraph tags. It searches out images and marks them up properly if necessary. + parent should be the node in which we're currently storing text and + paragraph a pointer to a <para> tag or NULL. At blank lines, we + finish with the current para tag and switch to a new one. + It uses info_body_parse_text to mark up the actual bits of text. */ static void -info_body_text (xmlNodePtr parent, xmlNsPtr ns, +info_body_text (xmlNodePtr parent, xmlNodePtr *paragraph, xmlNsPtr ns, gboolean inline_p, gchar const *content) { + xmlNodePtr thepara = NULL; + if (paragraph == NULL) paragraph = &thepara; + if (!strstr (content, INFO_C_IMAGE_TAG_OPEN)) { - info_body_parse_text (parent, ns, inline_p, content); + info_body_parse_text (parent, paragraph, ns, inline_p, content); return; } @@ -293,6 +322,7 @@ info_body_text (xmlNodePtr parent, xmlNsPtr ns, gint pos = 0; GRegex *regex = g_regex_new ("(" INFO_C_IMAGE_TAG_OPEN_RE "((?:[^" INFO_TAG_1 "]|[^" INFO_C_TAG_0 "]+" INFO_TAG_1 ")*)" INFO_C_TAG_CLOSE_RE ")", 0, 0, NULL); GMatchInfo *match_info; + g_regex_match (regex, content, 0, &match_info); while (g_match_info_matches (match_info)) { @@ -302,14 +332,18 @@ info_body_text (xmlNodePtr parent, xmlNsPtr ns, &image_start, &image_end); gchar *before = g_strndup (&content[pos], image_start - pos); pos = image_end + 1; - info_body_parse_text (parent, NULL, TRUE, before); + info_body_parse_text (parent, paragraph, NULL, TRUE, before); g_free (before); + + /* End the paragraph that was before */ + *paragraph = NULL; + if (image_found) info_insert_image (parent, match_info); g_match_info_next (match_info, NULL); } gchar *after = g_strndup (&content[pos], content_len - pos); - info_body_parse_text (parent, NULL, TRUE, after); + info_body_parse_text (parent, paragraph, NULL, TRUE, after); g_free (after); } @@ -977,8 +1011,8 @@ parse_tree_level (GtkTreeStore *tree, xmlNodePtr *node, GtkTreeIter iter) BAD_CAST "Section", NULL); if (!notes) - info_body_text (newnode, NULL, FALSE, page_content); - + info_body_text (newnode, NULL, NULL, FALSE, page_content); + else { /* Handle notes here */ info_process_text_notes (&newnode, page_content, tree); @@ -1151,7 +1185,7 @@ yelp_info_parse_menu (GtkTreeStore *tree, xmlNodePtr *node, if (!notes) - info_body_text (newnode, NULL, FALSE, split[0]); + info_body_text (newnode, NULL, NULL, FALSE, split[0]); else { info_process_text_notes (&newnode, split[0], tree); } @@ -1277,212 +1311,208 @@ info_process_text_notes (xmlNodePtr *node, gchar *content, GtkTreeStore *tree) { gchar **notes; gchar **current; - xmlNodePtr holder; xmlNodePtr ref1; + xmlNodePtr paragraph = NULL; gboolean first = TRUE; - notes = g_strsplit (content, "*Note", -1); - holder = xmlNewChild (*node, NULL, BAD_CAST "noteholder", NULL); + /* + Split using the regular expression + + \*[Nn]ote(?!_) + + which deals with either case and the last bit is a lookahead so + that we don't split on things of the form *Note:_, which aren't + real notes. + */ + notes = g_regex_split_simple ("\\*[Nn]ote(?!_)", content, 0, 0); for (current = notes; *current != NULL; current++) { - /* Since the notes can be either *Note or *note, we handle the second - * variety here - */ - gchar **subnotes; - gchar **current_real; - - subnotes = g_strsplit (*current, "*note", -1); - for (current_real = subnotes; *current_real != NULL; current_real++) { - gchar *url, **urls, **ulink; - gchar *append; - gchar *alt_append, *alt_append1; - gchar *link_text; - gchar *href = NULL; - gchar *break_point = NULL; - gboolean broken = FALSE; - if (first) { - /* The first node is special. It doesn't have a note ref at the - * start, so we can just add it and forget about it. - */ - first = FALSE; - info_body_text (holder, NULL, TRUE, (*current_real)); - continue; - } - /* If we got to here, we now gotta parse the note reference */ - - if (*current_real[0] == '_') { - /* Special type of note that isn't really a note, but pretends - * it is - */ - info_body_text (holder, NULL, TRUE, - g_strconcat ("*Note", *current_real, NULL)); - continue; - } - append = strchr (*current_real, ':'); - if (!append) { - info_body_text (holder, NULL, TRUE, *current_real); - continue; - } - append++; - alt_append = append; - alt_append1 = alt_append; - append = strchr (append, ':'); - alt_append = strchr (alt_append, '.'); - if (alt_append && g_str_has_prefix (alt_append, ".info")) { - broken = TRUE; - alt_append++; - alt_append = strchr (alt_append, '.'); - } - alt_append1 = strchr (alt_append1, ','); - if (!append && !alt_append && !alt_append1) { - info_body_text (holder, NULL, TRUE, *current_real); - continue; - } - if (!append || alt_append || alt_append1) { - if (!append) { - if (alt_append) append = alt_append; - else append = alt_append1; - } - if ((alt_append && alt_append < append)) - append = alt_append; - if (alt_append1 && alt_append1 < append) - append = alt_append1; - } - append++; - url = g_strndup (*current_real, append - (*current_real)); - - /* By now, we got 2 things. First, is append which is the (hopefully) - * non-link text. Second, we got a url. - * The url can be in several forms: - * 1. linkend:: - * 2. linkend:(infofile)Linkend. - * 3. Title: Linkend. - * 4. Title: Linkend, (pretty sure this is just broken) - * 5. Title: (infofile.info)Linkend. - * All possibilities should have been picked up. - * Here: - * Clean up the split. Should be left with a real url and - * a list of fragments that should be linked - * Also goes through and removes extra spaces, leaving only one - * space in place of many + gchar *url, **urls, **ulink; + gchar *append; + gchar *alt_append, *alt_append1; + gchar *link_text; + gchar *href = NULL; + gchar *break_point = NULL; + gboolean broken = FALSE; + if (first) { + /* The first node is special. It doesn't have a note ref at the + * start, so we can just add it and forget about it. */ - urls = g_strsplit (url, "\n", -1); - break_point = strchr (url, '\n'); - while (break_point) { - *break_point = ' '; - break_point = strchr (++break_point, '\n'); + first = FALSE; + info_body_text (*node, ¶graph, NULL, TRUE, (*current)); + continue; + } + + /* If we got to here, we now gotta parse the note reference */ + append = strchr (*current, ':'); + if (!append) { + info_body_text (*node, ¶graph, NULL, TRUE, *current); + continue; + } + append++; + alt_append = append; + alt_append1 = alt_append; + append = strchr (append, ':'); + alt_append = strchr (alt_append, '.'); + if (alt_append && g_str_has_prefix (alt_append, ".info")) { + broken = TRUE; + alt_append++; + alt_append = strchr (alt_append, '.'); + } + alt_append1 = strchr (alt_append1, ','); + if (!append && !alt_append && !alt_append1) { + info_body_text (*node, ¶graph, NULL, TRUE, *current); + continue; + } + if (!append || alt_append || alt_append1) { + if (!append) { + if (alt_append) append = alt_append; + else append = alt_append1; } - break_point = strchr (url, ' '); - while (break_point) { - if (*(break_point+1) == ' ') { - /* Massive space. Fix. */ - gchar *next = break_point; - gchar *url_copy; - while (*next == ' ') - next++; - next--; - url_copy = g_strndup (url, break_point-url); - g_free (url); - url = g_strconcat (url_copy, next, NULL); - break_point = strchr (url, ' '); - g_free (url_copy); - } else { - break_point++; - break_point = strchr (break_point, ' '); - } + if ((alt_append && alt_append < append)) + append = alt_append; + if (alt_append1 && alt_append1 < append) + append = alt_append1; + } + append++; + url = g_strndup (*current, append - (*current)); + + /* By now, we got 2 things. First, is append which is the (hopefully) + * non-link text. Second, we got a url. + * The url can be in several forms: + * 1. linkend:: + * 2. linkend:(infofile)Linkend. + * 3. Title: Linkend. + * 4. Title: Linkend, (pretty sure this is just broken) + * 5. Title: (infofile.info)Linkend. + * All possibilities should have been picked up. + * Here: + * Clean up the split. Should be left with a real url and + * a list of fragments that should be linked + * Also goes through and removes extra spaces, leaving only one + * space in place of many + */ + urls = g_strsplit (url, "\n", -1); + break_point = strchr (url, '\n'); + while (break_point) { + *break_point = ' '; + break_point = strchr (++break_point, '\n'); + } + break_point = strchr (url, ' '); + while (break_point) { + if (*(break_point+1) == ' ') { + /* Massive space. Fix. */ + gchar *next = break_point; + gchar *url_copy; + while (*next == ' ') + next++; + next--; + url_copy = g_strndup (url, break_point-url); + g_free (url); + url = g_strconcat (url_copy, next, NULL); + break_point = strchr (url, ' '); + g_free (url_copy); + } else { + break_point++; + break_point = strchr (break_point, ' '); } - if (url[strlen(url)-1] == '.') { /* The 2nd or 3rd sort of link */ - gchar *stop = NULL; - gchar *lurl = NULL; - gchar *zloc = NULL; - stop = strchr (url, ':'); - lurl = strchr (stop, '('); - if (!lurl) { /* 3rd type of link */ - gchar *link; - gint length; - stop++; - link = g_strdup (stop); - link = g_strstrip (link); - length = strlen (link) - 1; - link[length] = '\0'; - href = g_strconcat ("xref:", link, NULL); - link[length] = 'a'; - g_free (link); - - - } else { /* 2nd type of link. Easy. Provided .info is neglected ;) */ - if (broken) { - gchar *new_url; - gchar *info; - gchar *stripped; - - new_url = g_strdup (lurl); - info = strstr (new_url, ".info)"); - stripped = g_strndup (new_url, info-new_url); - info +=5; - lurl = g_strconcat (stripped, info, NULL); - g_free (stripped); - g_free (new_url); - } - zloc = &(lurl[strlen(lurl)-1]); - *zloc = '\0'; - href = g_strconcat ("info:", lurl, NULL); - *zloc = 'a'; - } - } else { /* First kind of link */ - gchar *tmp1; - gchar *frag; - - tmp1 = strchr (url, ':'); - if (!tmp1) - frag = g_strdup (url); - else - frag = g_strndup (url, tmp1 - url); - g_strstrip (frag); - gtk_tree_model_foreach (GTK_TREE_MODEL (tree), resolve_frag_id, &frag); - href = g_strconcat ("xref:", frag, NULL); - g_free (frag); + } + if (url[strlen(url)-1] == '.') { /* The 2nd or 3rd sort of link */ + gchar *stop = NULL; + gchar *lurl = NULL; + gchar *zloc = NULL; + stop = strchr (url, ':'); + lurl = strchr (stop, '('); + if (!lurl) { /* 3rd type of link */ + gchar *link; + gint length; + stop++; + link = g_strdup (stop); + link = g_strstrip (link); + length = strlen (link) - 1; + link[length] = '\0'; + href = g_strconcat ("xref:", link, NULL); + link[length] = 'a'; + g_free (link); + + + } else { /* 2nd type of link. Easy. Provided .info is neglected ;) */ + if (broken) { + gchar *new_url; + gchar *info; + gchar *stripped; + + new_url = g_strdup (lurl); + info = strstr (new_url, ".info)"); + stripped = g_strndup (new_url, info-new_url); + info +=5; + lurl = g_strconcat (stripped, info, NULL); + g_free (stripped); + g_free (new_url); + } + zloc = &(lurl[strlen(lurl)-1]); + *zloc = '\0'; + href = g_strconcat ("info:", lurl, NULL); + *zloc = 'a'; } - for (ulink = urls; *ulink != NULL; ulink++) { - if (ulink == urls) - link_text = g_strconcat ("*Note", *ulink, NULL); - else { - gchar *spacing = *ulink; - gchar *tmp; - gint count = 0; - while (*spacing == ' ') { - spacing++; - count++; - } - if (spacing != *ulink) { - if (count > 1) - spacing-=2; - tmp = g_strndup (*ulink, spacing-*ulink); - if (count > 1) - spacing+=2; - xmlNewTextChild (holder, NULL, BAD_CAST "spacing", - BAD_CAST tmp); - g_free (tmp); - link_text = g_strdup (spacing); - } else { - link_text = g_strdup (*ulink); - } - } - ref1 = xmlNewTextChild (holder, NULL, BAD_CAST "a", - BAD_CAST link_text); - if (*(ulink+1) != NULL) - info_body_text (holder, NULL, FALSE, ""); + } else { /* First kind of link */ + gchar *tmp1; + gchar *frag; + + tmp1 = strchr (url, ':'); + if (!tmp1) + frag = g_strdup (url); + else + frag = g_strndup (url, tmp1 - url); + g_strstrip (frag); + gtk_tree_model_foreach (GTK_TREE_MODEL (tree), resolve_frag_id, &frag); + href = g_strconcat ("xref:", frag, NULL); + g_free (frag); + } - g_free (link_text); - xmlNewProp (ref1, BAD_CAST "href", BAD_CAST href); + /* Check we've got a valid paragraph node */ + if (!paragraph) { + paragraph = xmlNewChild (*node, NULL, BAD_CAST "para", NULL); + } + + for (ulink = urls; *ulink != NULL; ulink++) { + if (ulink == urls) + link_text = g_strconcat ("*Note", *ulink, NULL); + else { + gchar *spacing = *ulink; + gchar *tmp; + gint count = 0; + while (*spacing == ' ') { + spacing++; + count++; + } + if (spacing != *ulink) { + if (count > 1) + spacing-=2; + tmp = g_strndup (*ulink, spacing-*ulink); + if (count > 1) + spacing+=2; + xmlNewTextChild (paragraph, NULL, BAD_CAST "spacing", + BAD_CAST tmp); + g_free (tmp); + link_text = g_strdup (spacing); + } else { + link_text = g_strdup (*ulink); + } } - g_strfreev (urls); - /* Finally, we can add the text as required */ - info_body_text (holder, NULL, TRUE, append); - g_free (url); - g_free (href); + ref1 = xmlNewTextChild (paragraph, NULL, BAD_CAST "a", + BAD_CAST link_text); + if (*(ulink+1) != NULL) + info_body_text (*node, ¶graph, NULL, FALSE, ""); + + g_free (link_text); + xmlNewProp (ref1, BAD_CAST "href", BAD_CAST href); } - g_strfreev (subnotes); + g_strfreev (urls); + /* Finally, we can add the text as required */ + info_body_text (*node, ¶graph, NULL, TRUE, append); + g_free (url); + g_free (href); } g_strfreev (notes); } diff --git a/stylesheets/info2html.xsl.in b/stylesheets/info2html.xsl.in index 1117a80..a97b054 100644 --- a/stylesheets/info2html.xsl.in +++ b/stylesheets/info2html.xsl.in @@ -107,11 +107,12 @@ a.navbar-next::after { <!-- = Normal Matches = --> <xsl:template match="para"> - <span class="fixed"> - <xsl:value-of select="node()"/> - <xsl:text> - </xsl:text> - </span> + <p> + <span class="fixed"> + <!-- Apply templates for <a> tags and copy text straight through. --> + <xsl:apply-templates select="./text()|*"/> + </span> + </p> </xsl:template> <xsl:template match="para1"> @@ -174,8 +175,4 @@ a.navbar-next::after { </xsl:element> </xsl:template> -<xsl:template match="noteholder"> - <xsl:apply-templates select="node()[not(self::noteholder)]"/> -</xsl:template> - </xsl:stylesheet> -- 1.7.1
From 45762b7f91b57038f893df6e6221db0bd7fbe255 Mon Sep 17 00:00:00 2001 From: Rupert Swarbrick <rswarbrick gmail com> Date: Sat, 11 Sep 2010 22:21:19 +0100 Subject: [PATCH 4/4] Render multi-line links correctly. --- libyelp/yelp-info-parser.c | 50 +++++++++++++++----------------------------- 1 files changed, 17 insertions(+), 33 deletions(-) diff --git a/libyelp/yelp-info-parser.c b/libyelp/yelp-info-parser.c index a85f733..d4ef7bc 100644 --- a/libyelp/yelp-info-parser.c +++ b/libyelp/yelp-info-parser.c @@ -1377,6 +1377,9 @@ info_process_text_notes (xmlNodePtr *node, gchar *content, GtkTreeStore *tree) append++; url = g_strndup (*current, append - (*current)); + /* Save a copy of the unadulterated link text for later. */ + link_text = g_strconcat ("*Note", url, NULL); + /* By now, we got 2 things. First, is append which is the (hopefully) * non-link text. Second, we got a url. * The url can be in several forms: @@ -1475,42 +1478,23 @@ info_process_text_notes (xmlNodePtr *node, gchar *content, GtkTreeStore *tree) paragraph = xmlNewChild (*node, NULL, BAD_CAST "para", NULL); } - for (ulink = urls; *ulink != NULL; ulink++) { - if (ulink == urls) - link_text = g_strconcat ("*Note", *ulink, NULL); - else { - gchar *spacing = *ulink; - gchar *tmp; - gint count = 0; - while (*spacing == ' ') { - spacing++; - count++; - } - if (spacing != *ulink) { - if (count > 1) - spacing-=2; - tmp = g_strndup (*ulink, spacing-*ulink); - if (count > 1) - spacing+=2; - xmlNewTextChild (paragraph, NULL, BAD_CAST "spacing", - BAD_CAST tmp); - g_free (tmp); - link_text = g_strdup (spacing); - } else { - link_text = g_strdup (*ulink); - } - } - ref1 = xmlNewTextChild (paragraph, NULL, BAD_CAST "a", - BAD_CAST link_text); - if (*(ulink+1) != NULL) - info_body_text (*node, ¶graph, NULL, FALSE, ""); + /* + Now we're supposed to actually render the link. I have a list of + bits of URL and actually this is really easy - I want to have + the link *text* exactly the same as it appeared in the .info + file, so don't use the list of strings urls, instead use the + whole lot: url (complete with embedded newlines etc.) + */ + ref1 = xmlNewTextChild (paragraph, NULL, BAD_CAST "a", + BAD_CAST link_text); + g_free (link_text); + xmlNewProp (ref1, BAD_CAST "href", BAD_CAST href); - g_free (link_text); - xmlNewProp (ref1, BAD_CAST "href", BAD_CAST href); - } g_strfreev (urls); - /* Finally, we can add the text as required */ + + /* Finally, we can add the following text as required */ info_body_text (*node, ¶graph, NULL, TRUE, append); + g_free (url); g_free (href); } -- 1.7.1
Attachment:
pgp1Jm02G5iqm.pgp
Description: PGP signature