[yelp] Initial implementation calling man -Z and parsing.
- From: Shaun McCance <shaunm src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [yelp] Initial implementation calling man -Z and parsing.
- Date: Mon, 20 Dec 2010 15:46:57 +0000 (UTC)
commit 46a82ade3e6f0fac8f08b18e7fc23d8665f6f728
Author: Rupert Swarbrick <rswarbrick gmail com>
Date: Thu Dec 16 00:57:11 2010 +0000
Initial implementation calling man -Z and parsing.
Currently doesn't deal with 'special characters' (including umlauts
and ... hyphens!)
Also seems to fail with pretty simple formatting (see list of sections
in man man)
libyelp/yelp-man-document.c | 6 +-
libyelp/yelp-man-parser.c | 2057 +++++++++----------------------------------
libyelp/yelp-man-parser.h | 4 +-
stylesheets/man2html.xsl.in | 366 +-------
4 files changed, 470 insertions(+), 1963 deletions(-)
---
diff --git a/libyelp/yelp-man-document.c b/libyelp/yelp-man-document.c
index 14ac8cd..4fac05a 100644
--- a/libyelp/yelp-man-document.c
+++ b/libyelp/yelp-man-document.c
@@ -436,14 +436,10 @@ man_document_process (YelpManDocument *man)
}
parser = yelp_man_parser_new ();
- priv->xmldoc = yelp_man_parser_parse_file (parser, filepath, encoding);
+ priv->xmldoc = yelp_man_parser_parse_file (parser, filepath, &error);
yelp_man_parser_free (parser);
if (priv->xmldoc == NULL) {
- error = g_error_new (YELP_ERROR, YELP_ERROR_PROCESSING,
- _("The file â??%sâ?? could not be parsed because it is"
- " not a well-formed man page."),
- filepath);
yelp_document_error_pending ((YelpDocument *) man, error);
}
diff --git a/libyelp/yelp-man-parser.c b/libyelp/yelp-man-parser.c
index 49efe9f..bceb465 100644
--- a/libyelp/yelp-man-parser.c
+++ b/libyelp/yelp-man-parser.c
@@ -27,1796 +27,581 @@
#include <glib.h>
#include <glib/gi18n.h>
#include <libxml/tree.h>
+#include <gio/gio.h>
#include <string.h>
+#include <math.h>
+#include "yelp-error.h"
#include "yelp-man-parser.h"
-#include "yelp-magic-decompressor.h"
-
-#define PARSER_CUR (g_utf8_get_char (parser->cur) != '\0' \
- && (parser->cur - parser->buffer < parser->length))
-
-static void parser_parse_line (YelpManParser *parser);
-static void parser_handle_linetag (YelpManParser *parser);
-static void parser_ensure_P (YelpManParser *parser);
-static void parser_read_until (YelpManParser *parser,
- gchar delim);
-static void parser_escape_tags (YelpManParser *parser,
- gchar **tags,
- gint ntags);
-static xmlNodePtr parser_append_text (YelpManParser *parser);
-static xmlNodePtr parser_append_given_text (YelpManParser *parser,
- gchar *text);
-static void parser_append_given_text_handle_escapes
- (YelpManParser *parser,
- gchar *text,
- gboolean make_links);
-static xmlNodePtr parser_append_node (YelpManParser *parser,
- gchar *name);
-static xmlNodePtr parser_append_node_attr (YelpManParser *parser,
- gchar *name,
- gchar *attr,
- gchar *value);
-static void parser_stack_push_node (YelpManParser *parser,
- xmlNodePtr node);
-static xmlNodePtr parser_stack_pop_node (YelpManParser *parser,
- gchar *name);
-static void parser_parse_table (YelpManParser *parser);
-
-typedef struct _StackElem StackElem;
-struct _YelpManParser {
- xmlDocPtr doc; /* The top-level XML document */
- xmlNodePtr ins; /* The insertion node */
- xmlNodePtr th_node; /* The TH node, or NULL if it doesn't exist */
- GDataInputStream *stream; /* The GIO input stream to read from */
- gchar *buffer; /* The buffer, line at a time */
- gsize length; /* The buffer length */
+#define MAN_FONTS 8
- gchar *anc; /* The anchor point in the document */
- gchar *cur; /* Our current position in the document */
-
- gchar *token; /* see ignore flag; we ignore the parsing stream until
- * this string is found in the stream */
- gboolean make_links; /* Allow auto-generated hyperlinks to be disabled. */
- gboolean ignore; /* when true, ignore stream until "token" is found */
-
- GSList *nodeStack;
-};
-
-YelpManParser *
-yelp_man_parser_new (void)
+/* The format has two copies of the title like MAN(1) at the top,
+ * possibly with a string of text in between for the collection.
+ *
+ * Start with the parser on START, then HAVE_TITLE when we've read the
+ * first word with parentheses. At that point, stick new words into
+ * the "collection" tag. Then finally switch to BODY when we've seen
+ * the second copy of the one with parentheses.
+ */
+typedef enum ManParserState
{
- YelpManParser *parser = g_new0 (YelpManParser, 1);
+ START,
+ HAVE_TITLE,
+ BODY
+} ManParserState;
- return parser;
-}
-
-xmlDocPtr
-yelp_man_parser_parse_file (YelpManParser *parser,
- gchar *file,
- const gchar *encoding)
+/* See parse_body_text for how this is used. */
+typedef enum ManParserSectionState
{
- GFile *gfile;
- GConverter *converter;
- GFileInputStream *file_stream;
- GInputStream *stream;
- gchar *line;
- gsize len;
-
- gfile = g_file_new_for_path (file);
- file_stream = g_file_read (gfile, NULL, NULL);
- converter = (GConverter *) yelp_magic_decompressor_new ();
- stream = g_converter_input_stream_new ((GInputStream *) file_stream, converter);
- parser->stream = g_data_input_stream_new (stream);
-
- parser->doc = xmlNewDoc (BAD_CAST "1.0");
- parser->ins = xmlNewNode (NULL, BAD_CAST "Man");
- xmlDocSetRootElement (parser->doc, parser->ins);
-
- parser->make_links = TRUE;
-
- while ((parser->buffer = g_data_input_stream_read_line (parser->stream, &(parser->length), NULL, NULL)) != NULL) {
- /* convert this line from the encoding indicated to UTF-8 */
- if (!g_str_equal (encoding, "UTF-8")) {
- GError *converr = NULL;
- gchar *new_buffer = NULL;
- gsize bytes_written = 0;
-
- /* We are making the
- * assumption that there are no partial characters at the end of this
- * string, and therefore can use calls like g_convert() which do not
- * preserve state - someone tell me if I'm wrong here */
- new_buffer = g_convert (parser->buffer, parser->length, "UTF-8",
- encoding, NULL, &bytes_written, &converr);
- if (converr != NULL) {
- g_print ("Error occurred converting %s to UTF-8: %s\n",
- encoding, converr->message);
- g_error_free (converr);
- break;
- } else if (parser->buffer == NULL) {
- g_print ("parser->buffer == NULL\n");
- break;
- }
-
- g_free (parser->buffer);
- parser->buffer = new_buffer;
- parser->length = bytes_written;
- }
-
- parser_parse_line (parser);
-
- g_free (parser->buffer);
- }
+ SECTION_TITLE,
+ SECTION_BODY
+} ManParserSectionState;
- g_object_unref (parser->stream);
+struct _YelpManParser {
+ xmlDocPtr doc; /* The top-level XML document */
+ xmlNodePtr header; /* The header node */
+ xmlNodePtr section_node; /* The current section */
+ xmlNodePtr sheet_node; /* The current sheet */
- return parser->doc;
-}
+ GDataInputStream *stream; /* The GIO input stream to read from */
+ gchar *buffer; /* The buffer, line at a time */
+ gsize length; /* The buffer length */
-void
-yelp_man_parser_free (YelpManParser *parser)
-{
- g_free (parser);
-}
+ /* The width and height of a character according to troff. */
+ guint char_width;
+ guint char_height;
-/******************************************************************************/
+ /* Count the number of lines we've parsed (needed to get prologue) */
+ guint lines_parsed;
-static void
-parser_parse_line (YelpManParser *parser) {
- parser->anc = parser->buffer;
- parser->cur = parser->buffer;
-
- /* check to see if we are ignoring input */
- if (parser->ignore) {
- gchar *ptr;
- /* needs to be utf-8 compatible */
- ptr = strstr (parser->buffer, parser->token);
- if (ptr != NULL) {
- while (PARSER_CUR) {
- parser->cur = g_utf8_next_char (parser->cur);
- parser->anc = parser->cur;
- }
- g_free (parser->token);
- parser->ignore = FALSE;
- } else {
- /* return to get another line of input */
- return;
- }
- } else {
- switch (*(parser->buffer)) {
- case '.':
- parser_handle_linetag (parser);
- /* we are ignoring everything until parser->token,
- * so return and get next line */
- if (parser->ignore)
- return;
- break;
- case '\0':
- parser->ins = xmlDocGetRootElement (parser->doc);
- break;
- case '\'':
- parser->cur = parser->buffer + parser->length - 1;
- parser->anc = parser->cur;
- default:
- break;
- }
- }
-
- parser_read_until (parser, '\0');
-
- if (parser->cur != parser->anc)
- parser_append_text (parser);
-
- if (PARSER_CUR) {
- parser->cur = g_utf8_next_char (parser->cur);
- parser_append_text (parser);
- }
-}
+ /* The x f k name command sets the k'th register to be name. */
+ gchar* font_registers[MAN_FONTS];
-/* creates a single string from all the macro arguments */
-static gchar *
-args_concat_all (GSList *args)
-{
- GSList *ptr = NULL;
- gchar **str_array = NULL;
- gchar *retval = NULL;
- gint i = 0;
-
- if (!args)
- return NULL;
+ /* The current font. Should be the index of one of the
+ * font_registers. Starts at 0 (of course!)
+ */
+ guint current_font;
- str_array = g_malloc0 ((sizeof (gchar *)) * (g_slist_length (args)+1) );
+ /* See description of ManParserState above */
+ ManParserState state;
- ptr = args;
- while (ptr && ptr->data) {
- str_array[i++] = ptr->data;
- ptr = g_slist_next (ptr);
- }
-
- str_array[i] = NULL;
+ /* Vertical and horizontal position as far as the troff output is
+ * concerned. (Measured from top-left).
+ */
+ guint vpos, hpos;
- retval = g_strjoinv (" ", str_array);
+ /* Text accumulator (needed since it comes through in dribs &
+ * drabs...) */
+ GString *accumulator;
- g_free (str_array);
+ /* See parse_body_text for how this is used. */
+ ManParserSectionState section_state;
- return retval;
-}
+ /* The indent of the current sheet */
+ guint sheet_indent;
-/* handler to ignore a macro by reading until the null character */
-static void
-macro_ignore_handler (YelpManParser *parser, gchar *macro, GSList *args)
-{
- while (PARSER_CUR) {
- parser->cur = g_utf8_next_char (parser->cur);
- parser->anc = parser->cur;
- }
-}
+ /* Set to TRUE if there's been a newline since the last text was
+ * parsed. */
+ gboolean newline;
+};
-static void
-macro_bold_small_italic_handler (YelpManParser *parser, gchar *macro, GSList *args)
-{
- gchar *str = NULL;
+static gboolean parser_parse_line (YelpManParser *parser, GError **error);
+static gboolean parse_prologue_line (YelpManParser *parser, GError **error);
+
+/* Parsers for different types of line */
+typedef gboolean (*LineParser)(YelpManParser *, GError **);
+#define DECLARE_LINE_PARSER(name) \
+ static gboolean (name) (YelpManParser *parser, GError **error);
+
+DECLARE_LINE_PARSER (parse_xf);
+DECLARE_LINE_PARSER (parse_f);
+DECLARE_LINE_PARSER (parse_V);
+DECLARE_LINE_PARSER (parse_H);
+DECLARE_LINE_PARSER (parse_v);
+DECLARE_LINE_PARSER (parse_h);
+DECLARE_LINE_PARSER (parse_text);
+DECLARE_LINE_PARSER (parse_w);
+DECLARE_LINE_PARSER (parse_body_text);
+DECLARE_LINE_PARSER (parse_n);
+
+/* Declare a sort of alist registry of parsers for different lines. */
+struct LineParsePair
+{
+ const gchar *prefix;
+ LineParser handler;
+};
+static struct LineParsePair line_parsers[] = {
+ { "x f", parse_xf }, { "f", parse_f },
+ { "V", parse_V }, { "H", parse_H },
+ { "v", parse_v }, { "h", parse_h },
+ { "t", parse_text },
+ { "w", parse_w },
+ { "n", parse_n },
+ { NULL, NULL }
+};
- parser_ensure_P (parser);
- parser->ins = parser_append_node (parser, macro);
-
- if (args && args->data) {
- str = args_concat_all (args);
- parser_append_given_text_handle_escapes (parser, str, TRUE);
- g_free (str);
- }
-
- parser->ins = parser->ins->parent;
-}
+/******************************************************************************/
+/* Parser helper functions (managing the state of the various parsing
+ * bits) */
+static void finish_span (YelpManParser *parser);
-static void
-macro_roman_bold_small_italic_handler (YelpManParser *parser, gchar *macro, GSList *args)
-{
- GSList *ptr = NULL;
- gchar a[2], b[2];
- gboolean toggle = TRUE;
-
- a[0] = macro[0];
- b[0] = macro[1];
- a[1] = b[1] = '\0';
-
- parser_ensure_P (parser);
-
- ptr = args;
- while (ptr && ptr->data) {
- if (toggle)
- parser->ins = parser_append_node (parser, a);
- else
- parser->ins = parser_append_node (parser, b);
-
- if (ptr->next) {
- gchar *tmp = ptr->next->data;
-
- if (tmp[0] == '(' && g_ascii_isdigit (tmp[1]) &&
- (tmp[2] == ')' || (g_ascii_isalpha (tmp[2]) && tmp[3] == ')'))) {
- tmp = g_strconcat (ptr->data, " ", tmp, NULL);
- parser_append_given_text_handle_escapes (parser, tmp, TRUE);
- g_free (tmp);
- parser->ins = parser->ins->parent;
- ptr = ptr->next->next;
- continue;
- }
- }
-
- parser_append_given_text_handle_escapes (parser, ptr->data, TRUE);
- parser->ins = parser->ins->parent;
-
- toggle = (toggle) ? 0 : 1;
- ptr = g_slist_next (ptr);
- }
-}
+/******************************************************************************/
-static void
-macro_new_paragraph_handler (YelpManParser *parser, gchar *macro, GSList *args)
+YelpManParser *
+yelp_man_parser_new (void)
{
- xmlNodePtr tmpNode;
-
- /* Clean up from 'lists'. If this is null we don't care. */
- tmpNode = parser_stack_pop_node (parser, "IP");
-
- tmpNode = parser_stack_pop_node (parser, "P");
- if (tmpNode != NULL) {
- parser->ins = tmpNode->parent;
- }
-
- parser_ensure_P (parser);
+ YelpManParser *parser = g_new0 (YelpManParser, 1);
+ parser->accumulator = g_string_sized_new (1024);
+ return parser;
}
-static void
-macro_insert_self_handler (YelpManParser *parser, gchar *macro, GSList *args)
-{
- parser_append_node (parser, macro);
-}
+/*
+ This function is responsible for taking a path to a man file and
+ returning something in the groff intermediate output format for us
+ to use.
-static void
-macro_title_header_handler (YelpManParser *parser, gchar *macro, GSList *args)
+ If something goes wrong, we return NULL and set error to be a
+ YelpError describing the problem.
+*/
+static GInputStream*
+get_troff (gchar *path, GError **error)
{
- GSList *ptr = NULL;
- gchar *fields[5] = { "Title", "Section", "Date", "Commentary", "Name" };
- gint i;
-
- parser->ins = parser_append_node (parser, macro);
-
- ptr = args;
- for (i=0; i < 5; i++) {
- if (ptr && ptr->data) {
- parser->ins = parser_append_node (parser, fields[i]);
- parser_append_given_text_handle_escapes (parser, ptr->data, FALSE);
- parser->ins = parser->ins->parent;
- ptr = g_slist_next (ptr);
- } else
- break;
- }
+ gint stdout;
+ GError *err = NULL;
+ gchar *argv[] = { "man", "-Z", "-Tutf8", NULL, NULL };
- parser->ins = parser->ins->parent;
-}
+ argv[3] = path;
-static void
-macro_section_header_handler (YelpManParser *parser, gchar *macro, GSList *args)
-{
- static gint id = 0;
- GIOStatus retval;
- GError *error = NULL;
- gchar *str = NULL;
- gchar *macro_uc = g_strdup (macro);
- gchar *ptr;
- gchar idval[20];
-
- if (!args) {
- str = g_data_input_stream_read_line (parser->stream, NULL, NULL, &error);
- if (error) {
- g_warning ("%s\n", error->message);
- g_error_free (error);
- }
+ if (!g_spawn_async_with_pipes (NULL, argv, NULL,
+ G_SPAWN_SEARCH_PATH, NULL, NULL,
+ NULL, NULL, &stdout, NULL, &err)) {
+ /* We failed to run the man program. Return a "Huh?" error. */
+ *error = g_error_new (YELP_ERROR, YELP_ERROR_UNKNOWN,
+ err->message);
+ g_error_free (err);
+ return NULL;
}
- else
- str = args_concat_all (args);
-
- for (ptr = macro_uc; *ptr != '\0'; ptr++)
- /* FIXME: utf-8 */
- *ptr = g_ascii_toupper (*ptr);
-
- parser_stack_pop_node (parser, "IP");
- g_snprintf (idval, 20, "%d", ++id);
-
- /* Sections should be their own, well, section */
- parser->ins = xmlDocGetRootElement (parser->doc);
- parser->ins = parser_append_node_attr (parser, macro_uc, "id", idval);
- parser_append_given_text_handle_escapes (parser, str, FALSE);
- parser->ins = parser->ins->parent;
-
- if (str)
- g_free (str);
+ return (GInputStream*) g_unix_input_stream_new (stdout, TRUE);
}
-static void
-macro_spacing_handler (YelpManParser *parser, gchar *macro, GSList *args)
+xmlDocPtr
+yelp_man_parser_parse_file (YelpManParser *parser,
+ gchar *path,
+ GError **error)
{
- parser->ins = parser_append_node (parser, macro);
+ GInputStream *troff_stream;
+ gchar *line;
+ gsize len;
+ gboolean ret;
+ xmlNodePtr root;
- if (args && args->data) {
- parser->ins = parser_append_node (parser, "Count");
- parser_append_given_text (parser, args->data);
- parser->ins = parser->ins->parent;
- }
+ troff_stream = get_troff (path, error);
+ if (!troff_stream) return NULL;
- parser->ins = parser->ins->parent;
-}
-
-/* this is used to define or redefine a macro until ".."
- * is reached. */
-static void
-macro_define_handler (YelpManParser *parser, gchar *macro, GSList *args)
-{
- parser->ignore = TRUE;
- parser->token = g_strdup("..");
-}
-
-static void
-macro_tp_handler (YelpManParser *parser, gchar *macro, GSList *args)
-{
- xmlNodePtr tmpNode = NULL;
- GError **errormsg = NULL;
-
- tmpNode = parser_stack_pop_node (parser, "IP");
+ parser->stream = g_data_input_stream_new (troff_stream);
- if (tmpNode != NULL)
- parser->ins = tmpNode->parent;
+ parser->doc = xmlNewDoc (BAD_CAST "1.0");
+ root = xmlNewNode (NULL, BAD_CAST "Man");
+ xmlDocSetRootElement (parser->doc, root);
- parser->ins = parser_append_node (parser, "IP");
+ parser->header = xmlNewNode (NULL, BAD_CAST "header");
+ xmlAddChild (root, parser->header);
- if (args && args->data) {
- parser->ins = parser_append_node (parser, "Indent");
- parser_append_given_text (parser, args->data);
- parser->ins = parser->ins->parent;
- }
+ while (1) {
+ parser->buffer =
+ g_data_input_stream_read_line (parser->stream,
+ &(parser->length),
+ NULL, NULL);
+ if (parser->buffer == NULL) break;
- g_free (parser->buffer);
+ ret = parser_parse_line (parser, error);
- parser->buffer = g_data_input_stream_read_line (parser->stream, &(parser->length), NULL, NULL);
- if (parser->buffer != NULL) {
- parser->ins = parser_append_node (parser, "Tag");
- parser_parse_line (parser);
- parser->ins = parser->ins->parent;
- }
+ g_free (parser->buffer);
- parser_stack_push_node (parser, parser->ins);
-}
-
-static void
-macro_ip_handler (YelpManParser *parser, gchar *macro, GSList *args)
-{
- xmlNodePtr tmpNode;
-
- tmpNode = parser_stack_pop_node (parser, "IP");
-
- if (tmpNode != NULL)
- parser->ins = tmpNode->parent;
-
- parser->ins = parser_append_node (parser, macro);
-
- if (args && args->data) {
- parser->ins = parser_append_node (parser, "Tag");
- parser_append_given_text_handle_escapes (parser, args->data, TRUE);
- parser->ins = parser->ins->parent;
-
- if (args->next && args->next->data) {
- parser->ins = parser_append_node (parser, "Indent");
- parser_append_given_text_handle_escapes (parser, args->next->data, TRUE);
- parser->ins = parser->ins->parent;
- }
+ if (!ret) {
+ xmlFreeDoc (parser->doc);
+ parser->doc = NULL;
+ break;
+ }
}
- parser_stack_push_node (parser, parser->ins);
-}
-
-static void
-macro_hanging_paragraph_handler (YelpManParser *parser, gchar *macro, GSList *args)
-{
- parser_stack_pop_node (parser, "IP");
-
- parser->ins = parser_append_node (parser, macro);
+ g_object_unref (parser->stream);
- if (args && args->data) {
- parser->ins = parser_append_node (parser, "Indent");
- parser_append_given_text (parser, args->data);
- parser->ins = parser->ins->parent;
- }
+ return parser->doc;
}
-static xmlNodePtr
-create_th_node (YelpManParser *parser)
+void
+yelp_man_parser_free (YelpManParser *parser)
{
- /* Create a TH node if we don't have one already */
- if (!parser->th_node) {
- parser->th_node = parser_append_node (parser, "TH");
+ guint k;
+ if (parser) {
+ for (k=0; k<MAN_FONTS; k++)
+ g_free (parser->font_registers[k]);
}
- return parser->th_node;
+ g_string_free (parser->accumulator, TRUE);
+ g_free (parser);
}
+/******************************************************************************/
+
+/* Sets the k'th font register to be name. Copies name, so free it
+ * afterwards. k should be in [0,MAN_FONTS). It seems that man always
+ * gives us ones at least 1, but groff_out(5) says non-negative.
+ */
static void
-macro_title_handler (YelpManParser *parser, gchar *macro, GSList *args)
+set_font_register (YelpManParser *parser, guint k, const gchar* name)
{
- gchar *str = NULL;
-
- parser->ins = create_th_node (parser);
-
- if (args && args->data) {
- parser->ins = parser_append_node (parser, "Title");
- parser_append_given_text (parser, args->data);
- parser->ins = parser->ins->parent;
+ if (k > MAN_FONTS) {
+ g_warning ("Tried to set nonexistant font register %d to %s",
+ k, name);
+ return;
}
-
- if (args && args->next && args->next->data) {
- parser->ins = parser_append_node (parser, "Section");
- parser_append_given_text (parser, args->next->data);
- }
- parser->ins = parser->th_node->parent;
+ g_free (parser->font_registers[k]);
+ parser->font_registers[k] = g_strdup (name);
}
-static void
-macro_os_handler (YelpManParser *parser, gchar *macro, GSList *args)
+static const gchar*
+get_font (const YelpManParser *parser)
{
- gchar *str = NULL;
- xmlNodePtr new_ins = parser->ins;
-
- parser->ins = create_th_node (parser);
+ guint k = parser->current_font;
+ if (k > MAN_FONTS ||
+ parser->font_registers[k] == NULL) {
+
+ g_warning ("Tried to get nonexistant font register %d", k);
- if (args && args->data) {
- parser->ins = parser_append_node (parser, "Os");
- parser_append_given_text (parser, args->data);
+ return "";
}
- parser->ins = parser->th_node->parent;
+ return parser->font_registers[k];
}
-static void
-macro_date_handler (YelpManParser *parser, gchar *macro, GSList *args)
-{
- gchar *str = NULL;
-
- parser->ins = create_th_node (parser);
-
- if (args && args->data) {
-
- str = args_concat_all (args);
-
- parser->ins = parser_append_node (parser, "Date");
- parser_append_given_text (parser, str);
+/******************************************************************************/
- g_free (str);
- }
+/*
+ Convenience macros to scan a string, checking for the correct number
+ of things read.
- parser->ins = parser->th_node->parent;
-}
+ Also to raise an error. Add an %s to the end of the format string,
+ which automatically gets given parser->buffer.
+ */
+#define SSCANF(fmt,num,...) \
+ (sscanf (parser->buffer, (fmt), __VA_ARGS__) != (num))
+#define PARSE_ERROR(...) \
+ g_error_new (YELP_ERROR, YELP_ERROR_PROCESSING, \
+ __VA_ARGS__, parser->buffer)
+#define RAISE_PARSE_ERROR(...) \
+ { *error = PARSE_ERROR (__VA_ARGS__); return FALSE; }
-static void
-macro_url_handler (YelpManParser *parser, gchar *macro, GSList *args)
+static gboolean
+parser_parse_line (YelpManParser *parser, GError **error)
{
- xmlNodePtr tmpNode = NULL;
-
- if (g_str_equal (macro, "UR")) {
- /* If someone wants to do automatic hyperlink wizardry outside
- * for the parser, then this should instead generate a tag.
- */
- if (args && args->data) {
- if (g_str_equal (args->data, ":"))
- parser->make_links = FALSE;
- else {
- parser->ins = parser_append_node (parser, macro);
-
- parser_stack_push_node (parser, parser->ins);
-
- parser->ins = parser_append_node (parser, "URI");
- parser_append_given_text (parser, args->data);
- parser->ins = parser->ins->parent;
- }
- }
- }
- else if (g_str_equal (macro, "UE")) {
-
- if (parser->make_links) {
- tmpNode = parser_stack_pop_node (parser, "UR");
-
- if (tmpNode == NULL)
- g_warning ("Found unexpected tag: '%s'\n", macro);
- else
- parser->ins = tmpNode->parent;
- } else
- parser->make_links = TRUE;
-
- }
- else if (g_str_equal (macro, "UN")) {
-
- if (args && args->data) {
- parser->ins = parser_append_node (parser, macro);
- parser_append_given_text (parser, args->data);
- parser->ins = parser->ins->parent;
- }
-
+ if (parser->lines_parsed < 3)
+ return parse_prologue_line (parser, error);
+
+ const struct LineParsePair *p = line_parsers;
+ while (p->handler != NULL) {
+ if (g_str_has_prefix (parser->buffer, p->prefix)) {
+ return p->handler(parser, error);
+ }
+ p++;
}
+ return TRUE;
}
-/* relative margin indent; FIXME: this takes a parameter that tells
- * how many indents to do, which needs to be implemented to fix
- * some man page formatting options */
-/*static void
-macro_rs_re_handler (YelpManParser *parser, gchar *macro, GSList *args)
+static gboolean
+parse_prologue_line (YelpManParser *parser, GError **error)
{
- xmlNodePtr tmpNode;
-
- if (g_str_equal (macro, "RS")) {
- parser->ins = parser_append_node (parser, macro);
+ parser->lines_parsed++;
+ if (parser->lines_parsed != 2) return TRUE;
- parser_stack_push_node (parser, parser->ins);
+ /* This is the interesting line, which should look like
+ x res 240 24 40
+ The interesting bits are the 24 and the 40, which are the
+ width and height of a character as far as -Tutf8 is
+ concerned.
+ */
+ if (SSCANF ("x %*s %*u %u %u", 2,
+ &parser->char_width, &parser->char_height)) {
+ RAISE_PARSE_ERROR ("Wrong 'x res' line from troff: %s");
+ }
- if (args && args->data) {
- parser->ins = parser_append_node (parser, "Indent");
- parser_append_given_text (parser, args->data);
- parser->ins = parser->ins->parent;
- }
- }
- else if (g_str_equal (macro, "RE")) {
- parser_stack_pop_node (parser, "IP");
+ return TRUE;
+}
- tmpNode = parser_stack_pop_node (parser, "RS");
+static gboolean
+parse_xf (YelpManParser *parser, GError **error)
+{
+ gchar name[10];
+ guint k;
- if (tmpNode == NULL)
- d (g_warning ("Found unexpected tag: '%s'\n", macro));
- else
- parser->ins = tmpNode->parent;
+ if (SSCANF ("x f%*s %u %10s", 2, &k, name)) {
+ RAISE_PARSE_ERROR ("Invalid 'x f' line from troff: %s");
}
-}*/
+ set_font_register (parser, k, name);
+ return TRUE;
+}
-static void
-macro_mandoc_list_handler (YelpManParser *parser, gchar *macro, GSList *args)
+static gboolean
+parse_f (YelpManParser *parser, GError **error)
{
- xmlNodePtr tmpNode;
-
- if (g_str_equal (macro, "Bl")) {
-
- parser->ins = parser_append_node (parser, macro);
-
- if (args && args->data) {
- gchar *listtype = (gchar *)args->data;
-
- if (g_str_equal (listtype, "-hang") ||
- g_str_equal (listtype, "-ohang") ||
- g_str_equal (listtype, "-tag") ||
- g_str_equal (listtype, "-diag") ||
- g_str_equal (listtype, "-inset")
- ) {
- listtype++;
- xmlNewProp (parser->ins, BAD_CAST "listtype",
- BAD_CAST listtype);
- /* TODO: check for -width, -offset, -compact */
- } else if (g_str_equal (listtype, "-column")) {
- /* TODO: support this */;
- } else if (g_str_equal (listtype, "-item") ||
- g_str_equal (listtype, "-bullet") ||
- g_str_equal (listtype, "-hyphen") ||
- g_str_equal (listtype, "-dash")
- ) {
- listtype++;
- xmlNewProp (parser->ins, BAD_CAST "listtype",
- BAD_CAST listtype);
- /* TODO: check for -offset, -compact */
- }
- }
-
- parser_stack_push_node (parser, parser->ins);
+ guint k;
+ if (SSCANF ("f%u", 1, &k)) {
+ RAISE_PARSE_ERROR ("Invalid font line from troff: %s");
}
- else if (g_str_equal (macro, "El")) {
-
- tmpNode = parser_stack_pop_node (parser, "It");
+ finish_span (parser);
- if (tmpNode != NULL)
- parser->ins = tmpNode->parent;
+ parser->current_font = k;
- tmpNode = parser_stack_pop_node (parser, "Bl");
-
- if (tmpNode == NULL)
- g_warning ("Found unexpected tag: '%s'\n", macro);
- else
- parser->ins = tmpNode->parent;
- }
+ return TRUE;
}
-static void
-macro_verbatim_handler (YelpManParser *parser, gchar *macro, GSList *args)
+static gboolean
+parse_v (YelpManParser *parser, GError **error)
{
- xmlNodePtr tmpNode;
-
- if (g_str_equal (macro, "nf") || g_str_equal (macro, "Vb")) {
- parser->ins = parser_append_node (parser, "Verbatim");
- parser_stack_push_node (parser, parser->ins);
- }
- else if (g_str_equal (macro, "fi") || g_str_equal (macro, "Ve")) {
- tmpNode = parser_stack_pop_node (parser, "Verbatim");
-
- if (tmpNode == NULL)
- g_warning ("Found unexpected tag: '%s'\n", macro);
- else
- parser->ins = tmpNode->parent;
+ guint dy;
+ if (SSCANF ("v%u", 1, &dy)) {
+ RAISE_PARSE_ERROR ("Invalid v line from troff: %s");
}
+ parser->vpos += dy;
+ return TRUE;
}
-static void
-macro_reference_handler (YelpManParser *parser, gchar *macro, GSList *args)
+static gboolean
+parse_h (YelpManParser *parser, GError **error)
{
- if (g_str_equal (macro, "so")) {
- gchar *basename = NULL;
- gchar *link = NULL;
-
- if (args && args->data) {
- basename = g_strrstr((const gchar *)args->data, "/");
-
- if (basename) {
- basename++;
- link = g_strdup_printf ("man:%s", basename);
- } else {
- link = g_strdup_printf ("man:%s", (const gchar *)args->data);
- basename = (gchar *)args->data;
- }
-
- parser->ins = create_th_node (parser);
- parser->ins = parser_append_node (parser, "Title");
- parser_append_given_text (parser, "REFERENCE");
- parser->ins = parser->ins->parent;
- parser->ins = parser->ins->parent;
-
- parser->ins = parser_append_node_attr (parser, "SH", "id", "9999");
- parser_append_given_text (parser, "REFERENCE");
- parser->ins = parser->ins->parent;
-
- parser_append_given_text (parser, "See ");
- parser->ins = parser_append_node (parser, "UR");
- parser->ins = parser_append_node (parser, "URI");
- parser_append_given_text (parser, link);
- parser->ins = parser->ins->parent;
- parser_append_given_text (parser, basename);
- parser->ins = parser->ins->parent;
-
- g_free (link);
- }
+ guint dx;
+ if (SSCANF ("h%u", 1, &dx)) {
+ RAISE_PARSE_ERROR ("Invalid h line from troff: %s");
}
+ parser->hpos += dx;
+ return TRUE;
}
-
-/* many mandoc macros have their arguments parsed so that other
- * macros can be called to operate on their arguments. This table
- * indicates which macros are _parsed_ for other callable macros,
- * and which are _callable_ from other macros: see mdoc(7) for more
- * details
- */
-
-#define MANDOC_NONE 0x01
-#define MANDOC_PARSED 0x01
-#define MANDOC_CALLABLE 0x02
-
-struct MandocMacro {
- gchar *macro;
- gint flags;
-};
-
-static struct MandocMacro manual_macros[] = {
- { "Ad", MANDOC_PARSED | MANDOC_CALLABLE },
- { "An", MANDOC_PARSED | MANDOC_CALLABLE },
- { "Ar", MANDOC_PARSED | MANDOC_CALLABLE },
- { "Cd", MANDOC_NONE },
- { "Cm", MANDOC_PARSED | MANDOC_CALLABLE },
- { "Dv", MANDOC_PARSED | MANDOC_CALLABLE },
- { "Er", MANDOC_PARSED | MANDOC_CALLABLE },
- { "Ev", MANDOC_PARSED | MANDOC_CALLABLE },
- { "Fa", MANDOC_PARSED | MANDOC_CALLABLE },
- { "Fd", MANDOC_PARSED | MANDOC_CALLABLE },
- { "Fl", MANDOC_PARSED | MANDOC_CALLABLE },
- { "Fn", MANDOC_PARSED | MANDOC_CALLABLE },
- { "Ic", MANDOC_PARSED | MANDOC_CALLABLE },
- { "Li", MANDOC_PARSED | MANDOC_CALLABLE },
- { "Nd", MANDOC_PARSED | MANDOC_CALLABLE },
- { "Nm", MANDOC_PARSED | MANDOC_CALLABLE },
- { "Op", MANDOC_PARSED | MANDOC_CALLABLE },
- { "Ot", MANDOC_PARSED | MANDOC_CALLABLE },
- { "Pa", MANDOC_PARSED | MANDOC_CALLABLE },
- { "St", MANDOC_PARSED | MANDOC_CALLABLE },
- { "Tn", MANDOC_PARSED | MANDOC_CALLABLE },
- { "Va", MANDOC_PARSED | MANDOC_CALLABLE },
- { "Vt", MANDOC_PARSED | MANDOC_CALLABLE },
- { "Xr", MANDOC_PARSED | MANDOC_CALLABLE },
- { NULL, MANDOC_NONE }
-};
static gboolean
-is_mandoc_manual_macro_parsed (gchar *macro)
+parse_V (YelpManParser *parser, GError **error)
{
- gint i;
-
- for (i=0; manual_macros[i].macro != NULL; i++) {
- if (g_str_equal (macro, manual_macros[i].macro) &&
- (manual_macros[i].flags & MANDOC_PARSED) == MANDOC_PARSED
- ) {
- return TRUE;
- }
+ guint y;
+ if (SSCANF ("V%u", 1, &y)) {
+ RAISE_PARSE_ERROR ("Invalid V line from troff: %s");
}
-
- return FALSE;
+ parser->vpos = y;
+ return TRUE;
}
static gboolean
-is_mandoc_manual_macro_callable (gchar *macro)
+parse_H (YelpManParser *parser, GError **error)
{
- gint i;
-
- for (i=0; manual_macros[i].macro != NULL; i++) {
- if (g_str_equal (macro, manual_macros[i].macro) &&
- (manual_macros[i].flags & MANDOC_CALLABLE) == MANDOC_CALLABLE
- ) {
- return TRUE;
- }
+ guint x;
+ if (SSCANF ("H%u", 1, &x)) {
+ RAISE_PARSE_ERROR ("Invalid H line from troff: %s");
}
-
- return FALSE;
+ parser->hpos = x;
+ return TRUE;
}
-static void
-macro_mandoc_utility_handler (YelpManParser *parser, gchar *macro, GSList *args)
+static gboolean
+parse_text (YelpManParser *parser, GError **error)
{
- GSList *ptr = NULL;
- gchar *str = NULL;
- gchar *manpage, *uri;
+ gchar *text, *section, *tmp;
+ xmlNodePtr node;
- g_return_if_fail (macro != NULL);
-
- if (is_mandoc_manual_macro_parsed (macro)) {
- parser->ins = parser_append_node (parser, macro);
-
- ptr = args;
- while (ptr && ptr->data) {
- if (is_mandoc_manual_macro_callable ((gchar *)ptr->data)) {
- macro_mandoc_utility_handler (parser, (gchar *)ptr->data, ptr->next);
- break;
- } else {
- parser_append_given_text_handle_escapes (parser, (gchar *)ptr->data, TRUE);
- }
- ptr = ptr->next;
- if (ptr && ptr->data)
- parser_append_given_text (parser, " ");
- }
-
- parser->ins = parser->ins->parent;
- } else {
- parser->ins = parser_append_node (parser, macro);
- str = args_concat_all (args);
- parser->ins = parser->ins->parent;
-
- g_free (str);
- }
+ g_assert (parser->buffer[0] == 't');
- return;
-
- if (g_str_equal (macro, "Op")) {
-
- } else if (g_str_equal (macro, "Nm")) {
-
- if (str) {
- parser_ensure_P (parser);
-
- parser->ins = parser_append_node (parser, "B");
- parser_append_given_text_handle_escapes (parser, str, TRUE);
- parser->ins = parser->ins->parent;
- }
- }
- else if (g_str_equal (macro, "Nd")) {
-
- if (str) {
- parser_append_given_text (parser, " -- ");
- parser_append_given_text_handle_escapes (parser, str, TRUE);
- }
- }
- else if (g_str_equal (macro, "Xr")) {
-
- if (args && args->data && args->next && args->next->data) {
-
- manpage = g_strdup_printf ("%s(%s)", (gchar *)args->data, (gchar *)args->next->data);
- uri = g_strdup_printf ("man:%s", manpage);
-
- parser_ensure_P (parser);
-
- parser->ins = parser_append_node (parser, "UR");
- parser->ins = parser_append_node (parser, "URI");
- parser_append_given_text (parser, uri);
- parser->ins = parser->ins->parent;
- parser_append_given_text (parser, manpage);
- parser->ins = parser->ins->parent;
-
- ptr = args->next->next;
-
- while (ptr && ptr->data) {
- parser_append_given_text (parser, ptr->data);
- ptr = g_slist_next (ptr);
- }
-
- g_free (uri);
- g_free (manpage);
- }
- }
+ if (parser->state == START) {
+ /* With a bit of luck, this will be the tBLAH(1) line. Can't
+ * use sscanf to chop it up since that needs whitespace. */
+ section = strchr (parser->buffer + 1, '(');
+ if (!section)
+ RAISE_PARSE_ERROR ("Expected t line with title. Got %s");
+ text = g_strndup (parser->buffer + 1,
+ section - (parser->buffer + 1));
- g_free (str);
-}
+ // Skip over the (
+ section++;
-static void
-macro_mandoc_listitem_handler (YelpManParser *parser, gchar *macro, GSList *args)
-{
- GSList *ptr = NULL;
- xmlNodePtr tmpNode;
-
- tmpNode = parser_stack_pop_node (parser, "It");
-
- if (tmpNode != NULL)
- parser->ins = tmpNode->parent;
-
- parser->ins = parser_append_node (parser, macro);
-
- if (args && args->data) {
- parser->ins = parser_append_node (parser, "ItTag");
-
- ptr = args;
- while (ptr && ptr->data) {
- if (is_mandoc_manual_macro_callable ((gchar *)ptr->data)) {
- macro_mandoc_utility_handler (parser, (gchar *)ptr->data, ptr->next);
- break;
- } else {
- parser_append_given_text (parser, (gchar *)ptr->data);
- }
- ptr = ptr->next;
- if (ptr && ptr->data)
- parser_append_given_text (parser, " ");
- }
-
- parser->ins = parser->ins->parent;
- }
+ tmp = strchr (section, ')');
+ if (!tmp || (*(tmp+1) != '\0'))
+ RAISE_PARSE_ERROR ("Strange format for t title line: %s");
+ section = g_strndup (section, tmp - section);
- parser_stack_push_node (parser, parser->ins);
-}
+ parser->state = HAVE_TITLE;
-/* the handler functions for each macro all have this form:
- * - the calling function, parser_handle_linetag owns the "macro", and "args"
- * parameters, so do not free them.
- */
-typedef void (*MacroFunc)(YelpManParser *parser, gchar *macro, GSList *args);
+ xmlNewTextChild (parser->header,
+ NULL, BAD_CAST "title", text);
+ xmlNewTextChild (parser->header,
+ NULL, BAD_CAST "section", section);
-struct MacroHandler {
- gchar *macro;
- MacroFunc handler;
-};
+ g_free (text);
+ g_free (section);
-/* We are calling all of these macros, when in reality some of them are
- * requests (lowercase, defined by groff system), and some of them are
- * macros (varying case, defined by man/mdoc/ms/tbl extensions)
- *
- * A great resource to figure out what each of these does is the groff
- * info page. Also groff(7), man(7), and mdoc(7) are useful as well.
- */
-static struct MacroHandler macro_handlers[] = {
- { "\\\"", macro_ignore_handler }, /* groff: comment */
- { "ad", macro_ignore_handler }, /* groff: set adjusting mode */
- { "Ad", macro_mandoc_utility_handler }, /* mandoc: Address */
- { "An", macro_mandoc_utility_handler }, /* mandoc: Author name */
- { "Ar", macro_mandoc_utility_handler }, /* mandoc: Command line argument */
- { "B", macro_bold_small_italic_handler }, /* man: set bold font */
- { "Bd", macro_ignore_handler }, /* mandoc: Begin-display block */
- { "BI", macro_roman_bold_small_italic_handler }, /* man: bold italic font */
- { "Bl", macro_mandoc_list_handler }, /* mandoc: begin list */
- { "bp", macro_ignore_handler }, /* groff: break page */
- { "br", macro_insert_self_handler }, /* groff: line break */
- { "BR", macro_roman_bold_small_italic_handler }, /* man: set bold roman font */
- { "Cd", macro_mandoc_utility_handler }, /* mandoc: Configuration declaration */
- { "Cm", macro_mandoc_utility_handler }, /* mandoc: Command line argument modifier */
- { "ce", macro_ignore_handler }, /* groff: center text */
- { "Dd", macro_date_handler }, /* mandoc: Document date */
- { "de", macro_define_handler }, /* groff: define macro */
- { "ds", macro_ignore_handler }, /* groff: define string variable */
- { "D1", macro_ignore_handler }, /* mandoc: Indent and display one text line */
- { "Dl", macro_ignore_handler }, /* mandoc: Indent and display one line of literal text */
- { "Dt", macro_title_handler }, /* mandoc: Document title */
- { "Dv", macro_mandoc_utility_handler }, /* mandoc: Defined variable */
- { "Ed", macro_ignore_handler }, /* mandoc: End-display block */
- { "El", macro_mandoc_list_handler }, /* mandoc: end list */
- { "Er", macro_mandoc_utility_handler }, /* mandoc: Error number */
- { "Ev", macro_mandoc_utility_handler }, /* mandoc: Environment variable */
- { "Fa", macro_mandoc_utility_handler }, /* mandoc: Function argument */
- { "Fd", macro_mandoc_utility_handler }, /* mandoc: Function declaration */
- { "fi", macro_verbatim_handler }, /* groff: activate fill mode */
- { "Fl", macro_mandoc_utility_handler }, /* mandoc: ? */
- { "Fn", macro_mandoc_utility_handler }, /* mandoc: Function call */
- { "ft", macro_ignore_handler }, /* groff: change font */
- { "HP", macro_hanging_paragraph_handler }, /* man: paragraph with hanging left indentation */
- { "hy", macro_ignore_handler }, /* groff: enable hyphenation */
- { "I", macro_bold_small_italic_handler }, /* man: set italic font */
- { "Ic", macro_mandoc_utility_handler }, /* mandoc: Interactive Command */
- { "ie", macro_ignore_handler }, /* groff: else portion of if-else */
- { "if", macro_ignore_handler }, /* groff: if statement */
- { "ig", macro_ignore_handler }, /* groff: comment until '..' or '.END' */
- { "ih", macro_ignore_handler }, /* ? */
- { "IX", macro_ignore_handler }, /* ms: print index to stderr */
- { "IB", macro_roman_bold_small_italic_handler }, /* man: set italic bold font */
- { "IP", macro_ip_handler }, /* man: indented paragraph */
- { "IR", macro_roman_bold_small_italic_handler }, /* man: set italic roman font */
- { "It", macro_mandoc_listitem_handler }, /* mandoc: item in list */
- { "Li", macro_mandoc_utility_handler }, /* mandoc: Literal text */
- { "LP", macro_new_paragraph_handler }, /* man: line break and left margin and indentation are reset */
- { "na", macro_ignore_handler }, /* groff: disable adjusting */
- { "Nd", macro_mandoc_utility_handler }, /* mandoc: description of utility/program */
- { "ne", macro_ignore_handler }, /* groff: force space at bottom of page */
- { "nf", macro_verbatim_handler }, /* groff: no fill mode */
- { "nh", macro_ignore_handler }, /* groff: disable hyphenation */
- { "Nd", macro_mandoc_utility_handler }, /* mandoc: ? */
- { "Nm", macro_mandoc_utility_handler }, /* mandoc: Command/utility/program name*/
- { "Op", macro_mandoc_utility_handler }, /* mandoc: Option */
- { "Os", macro_os_handler }, /* mandoc: Operating System */
- { "Ot", macro_mandoc_utility_handler }, /* mandoc: Old style function type (Fortran) */
- { "P", macro_new_paragraph_handler }, /* man: line break and left margin and indentation are reset */
- { "Pa", macro_mandoc_utility_handler }, /* mandoc: Pathname or filename */
- { "PP", macro_new_paragraph_handler }, /* man: line break and left margin and indentation are reset */
- { "Pp", macro_new_paragraph_handler }, /* man: line break and left margin and indentation are reset */
- { "ps", macro_ignore_handler }, /* groff: change type size */
- { "RB", macro_roman_bold_small_italic_handler }, /* man: set roman bold font */
- { "RE", macro_ignore_handler }, /* man: move left margin back to NNN */
- { "RI", macro_roman_bold_small_italic_handler }, /* man: set roman italic font */
- { "RS", macro_ignore_handler }, /* man: move left margin to right by NNN */
- { "SH", macro_section_header_handler }, /* man: unnumbered section heading */
- { "Sh", macro_section_header_handler }, /* man: unnumbered section heading */
- { "SM", macro_bold_small_italic_handler }, /* man: set font size one SMaller */
- { "so", macro_reference_handler }, /* groff: include file */
- { "sp", macro_spacing_handler }, /* groff: */
- { "SS", macro_section_header_handler }, /* man: unnumbered subsection heading */
- { "Ss", macro_section_header_handler }, /* man: unnumbered subsection heading */
- { "St", macro_mandoc_utility_handler }, /* mandoc: Standards (-p1003.2, -p1003.1 or -ansiC) */
- { "TH", macro_title_header_handler }, /* man: set title of man page */
- { "TP", macro_tp_handler }, /* man: set indented paragraph with label */
- { "UR", macro_url_handler }, /* man: URL start hyperlink */
- { "UE", macro_url_handler }, /* man: URL end hyperlink */
- { "UN", macro_ignore_handler }, /* ? */
- { "TE", macro_ignore_handler }, /* ms: table */
- { "Tn", macro_mandoc_utility_handler }, /* mandoc: Trade or type name (small Caps). */
- { "ti", macro_ignore_handler }, /* groff: temporary indent */
- { "tr", macro_ignore_handler }, /* groff: translate characters */
- { "TS", macro_ignore_handler }, /* ms: table with optional header */
- { "Va", macro_mandoc_utility_handler }, /* mandoc: Variable name */
- { "Vb", macro_verbatim_handler }, /* pod2man: start of verbatim text */
- { "Ve", macro_verbatim_handler }, /* pod2man: end of verbatim text */
- { "Vt", macro_mandoc_utility_handler }, /* mandoc: Variable type (Fortran only) */
- { "Xr", macro_mandoc_utility_handler }, /* mandoc: Manual page cross reference */
- { NULL, NULL }
-};
+ /* The accumulator should currently be "". */
+ g_assert (parser->accumulator &&
+ *(parser->accumulator->str) == '\0');
-static void
-parser_handle_linetag (YelpManParser *parser) {
- gchar c, *str, *ptr, *arg;
- GSList *arglist = NULL;
- GSList *listptr = NULL;
- MacroFunc handler_func = NULL;
-
- static GHashTable *macro_hash = NULL;
-
- /* check if we've created the hash of macros yet. If not, make it */
- if (!macro_hash) {
- gint i;
-
- macro_hash = g_hash_table_new (g_str_hash, g_str_equal);
-
- for (i=0; macro_handlers[i].macro != NULL; i++) {
- g_hash_table_insert (macro_hash,
- macro_handlers[i].macro,
- macro_handlers[i].handler);
- }
+ return TRUE;
}
+ if (parser->state == HAVE_TITLE) {
+ /* We expect (maybe!) to get some lines tThe wh24
+ * tCollection. We've found (and can ignore!) the second
+ * title line if there's a (). */
+ if (strchr (parser->buffer+1, '(') &&
+ strchr (parser->buffer+1, ')')) {
+ parser->state = BODY;
- /* FIXME: figure out a better way to handle these cases */
- /* special case, if the line is simply ".\0" then return */
- if (g_utf8_get_char (g_utf8_next_char (parser->cur)) == '\0') {
- parser->cur = g_utf8_next_char (parser->cur);
- parser->cur = g_utf8_next_char (parser->cur);
- parser->anc = parser->cur;
- return;
- }
- /* special case, if the line is simply "..\0" then return */
- else if (g_utf8_get_char (g_utf8_next_char(parser->cur)) == '.' &&
- g_utf8_get_char (g_utf8_next_char (g_utf8_next_char (parser->cur+2))) == '\0') {
- parser->cur = g_utf8_next_char (parser->cur);
- parser->cur = g_utf8_next_char (parser->cur);
- parser->cur = g_utf8_next_char (parser->cur);
- parser->anc = parser->cur;
- }
-
- /* skip any spaces after the control character . */
- while (PARSER_CUR && g_utf8_get_char (parser->cur) == ' ')
- parser->cur = g_utf8_next_char (parser->cur);
-
- while (PARSER_CUR
- && g_utf8_get_char (parser->cur) != ' '
- && ( (g_utf8_get_char (parser->cur) != '\\') ||
- (
- (g_utf8_get_char(parser->cur) == '\\') &&
- (g_utf8_get_char(g_utf8_next_char (parser->cur)) == '\"')
- )
- )
- && g_utf8_get_char (parser->cur) != '\0') {
- if (
- (g_utf8_get_char (parser->cur) == '\\') &&
- (g_utf8_get_char (g_utf8_next_char (parser->cur)) == '\"')
- ) {
- parser->cur = g_utf8_next_char (g_utf8_next_char (parser->cur));
- break;
- }
- parser->cur = g_utf8_next_char (parser->cur);
- }
+ xmlNewTextChild (parser->header,
+ NULL, BAD_CAST "collection",
+ parser->accumulator->str);
+ g_string_truncate (parser->accumulator, 0);
- /* copy the macro/request into str */
- c = *(parser->cur);
- *(parser->cur) = '\0';
- str = g_strdup (parser->anc + 1); /* skip control character '.' by adding one */
- *(parser->cur) = c;
- parser->anc = parser->cur;
-
- /* FIXME: need to handle escaped characters */
- /* perform argument parsing and store argument in a singly linked list */
- while (PARSER_CUR && g_utf8_get_char (parser->cur) != '\0') {
- ptr = NULL;
- arg = NULL;
-
- /* skip any whitespace */
- while (PARSER_CUR && g_utf8_get_char (parser->cur) == ' ') {
- parser->cur = g_utf8_next_char (parser->cur);
- parser->anc = parser->cur;
- }
-
-get_argument:
- /* search until we hit whitespace or an " */
- while (PARSER_CUR &&
- g_utf8_get_char (parser->cur) != '\0' &&
- g_utf8_get_char (parser->cur) != ' ' &&
- g_utf8_get_char (parser->cur) != '\"')
- parser->cur = g_utf8_next_char (parser->cur);
-
- /* this checks for escaped spaces */
- if (PARSER_CUR &&
- ((parser->cur - parser->buffer) > 0) &&
- g_utf8_get_char (parser->cur) == ' ' &&
- g_utf8_get_char (g_utf8_prev_char (parser->cur)) == '\\') {
- parser->cur = g_utf8_next_char (parser->cur);
- goto get_argument;
- }
-
- if (g_utf8_get_char (parser->cur) == '\0' &&
- (parser->cur == parser->anc))
- break;
-
- if (g_utf8_get_char (parser->cur) == '\"' &&
- g_utf8_get_char (g_utf8_prev_char (parser->cur)) == ' ') {
- /* quoted argument */
- ptr = strchr (parser->cur+1, '\"');
- if (ptr != NULL) {
- c = *(ptr);
- *(ptr) = '\0';
- arg = g_strdup (parser->anc+1);
- *(ptr) = c;
- parser->cur = ptr;
- parser->anc = ++parser->cur;
- } else {
- /* unmatched double quote: include the " as part of the argument */
- parser->cur++;
- goto get_argument;
- }
- }
- else if (*(parser->cur) == '\"') {
- /* quote in the middle of an argument */
- c = *(parser->cur+1);
- *(parser->cur+1) = '\0';
- arg = g_strdup (parser->anc);
- *(parser->cur+1) = c;
- parser->anc = ++parser->cur;
- }
- else if (*(parser->cur) == ' ') {
- /* normal space separated argument */
- c = *(parser->cur);
- *(parser->cur) = '\0';
- arg = g_strdup (parser->anc);
- *(parser->cur) = c;
- parser->anc = ++parser->cur;
- }
- else if (*(parser->cur) == '\0' && *(parser->cur-1) != ' ') {
- /* special case for EOL */
- c = *(parser->cur);
- *(parser->cur) = '\0';
- arg = g_strdup (parser->anc);
- *(parser->cur) = c;
- parser->anc = parser->cur;
- } else
- ; /* FIXME: do we need to handle this case? */
-
- arglist = g_slist_append (arglist, arg);
- }
-
- /*g_print ("handling macro (%s)\n", str);
-
- listptr = arglist;
- while (listptr && listptr->data) {
- g_print (" arg = %s\n", (gchar *)listptr->data);
- listptr = g_slist_next (listptr);
- }
- */
-
- /* lookup the macro handler and call that function */
- handler_func = g_hash_table_lookup (macro_hash, str);
- if (handler_func)
- (*handler_func) (parser, str, arglist);
-
- /* in case macro is not defined in hash table, ignore rest of line */
- else
- macro_ignore_handler (parser, str, arglist);
-
- g_free (str);
-
- listptr = arglist;
- while (listptr && listptr->data) {
- g_free (listptr->data);
- listptr = g_slist_next (listptr);
- }
-
- return;
-
- if (0) {
- }
- /* Table (tbl) macros */
- else if (g_str_equal (str, "TS")) {
- parser->ins = parser_append_node (parser, "TABLE");
- g_free (str);
-
- parser_stack_push_node (parser, parser->ins);
- g_free (parser->buffer);
- parser_parse_table (parser);
- }
- else if (g_str_equal (str, "TE")) {
- /* We should only see this from within parser_parse_table */
- g_warning ("Found unexpected tag: '%s'\n", str);
- g_free (str);
- }
- /* "ie" and "if" are conditional macros in groff
- * "ds" is to define a variable; see groff(7)
- * ignore anything between the \{ \}, otherwise ignore until
- * the end of the linee*/
- else if (g_str_equal (str, "ds") || g_str_equal (str, "ie")
- || g_str_equal (str, "if")) {
- /* skip any remaining spaces */
- while (PARSER_CUR && (*parser->cur == ' '))
- parser->anc = ++parser->cur;
-
- /* skip the "stringvar" or "cond"; see groff(7) */
- while (PARSER_CUR && (*parser->cur != ' '))
- parser->anc = ++parser->cur;
-
- /* skip any remaining spaces */
- while (PARSER_CUR && (*parser->cur == ' '))
- parser->anc = ++parser->cur;
-
- /* check to see if the next two characters are the
- * special "\{" sequence */
- if (*parser->cur == '\\' && *(parser->cur+1) == '{') {
- parser->ignore = TRUE;
- parser->token = g_strdup ("\\}");
- } else {
- /* otherwise just ignore till the end of the line */
- while (PARSER_CUR)
- parser->anc = ++parser->cur;
- }
- }
- /* else conditional macro */
- else if (g_str_equal (str, "el")) {
- /* check to see if the next two characters are the
- * special "\{" sequence */
- parser->ignore = 0;
- if (*parser->cur == '\\' && *(parser->cur+1) == '{') {
- parser->ignore = TRUE;
- parser->token = g_strdup ("\\}");
- } else {
- /* otherwise just ignore till the end of the line */
- while (PARSER_CUR)
- parser->anc = ++parser->cur;
- }
- }
-
-}
+ return TRUE;
+ }
-static void
-parser_ensure_P (YelpManParser *parser)
-{
- if (xmlStrEqual (parser->ins->name, BAD_CAST "Man")) {
- parser->ins = parser_append_node (parser, "P");
- parser_stack_push_node (parser, parser->ins);
- }
-}
+ g_string_append (parser->accumulator, parser->buffer+1);
-static void
-parser_read_until (YelpManParser *parser,
- gchar delim)
-{
- gchar c;
-
- while (PARSER_CUR
- && g_utf8_get_char (parser->cur) != '\0'
- && g_utf8_get_char (parser->cur) != delim) {
- parser->cur = g_utf8_next_char (parser->cur);
+ return TRUE;
}
- if (parser->anc == parser->cur)
- return;
-
- c = *(parser->cur);
- *(parser->cur) = '\0';
- parser_append_given_text_handle_escapes (parser, parser->anc, TRUE);
- *(parser->cur) = c;
-
- parser->anc = parser->cur;
+ return parse_body_text (parser, error);
}
-static void
-parser_escape_tags (YelpManParser *parser,
- gchar **tags,
- gint ntags)
+/*
+ w is a sort of prefix argument. It indicates a space, so we register
+ that here, then call parser_parse_line again on the rest of the
+ string to deal with that.
+ */
+static gboolean
+parse_w (YelpManParser *parser, GError **error)
{
- gint i;
- xmlNodePtr node = NULL;
- xmlNodePtr cur = parser->ins;
- GSList *path = NULL;
-
- /* Find the top node we can escape from */
- while (cur && cur != (xmlNodePtr)parser->doc &&
- cur->parent && cur->parent != (xmlNodePtr) parser->doc) {
- for (i = 0; i < ntags; i++)
- if (!xmlStrcmp (cur->name, BAD_CAST tags[i])) {
- node = cur;
- break;
- }
- path = g_slist_prepend (path, cur);
- cur = cur->parent;
- }
+ gboolean ret;
- /* Walk back down, reproducing nodes we aren't escaping */
- if (node) {
- GSList *c = path;
- while (c && (xmlNodePtr) c->data != node)
- c = g_slist_next (c);
-
- parser->ins = node->parent;
- parser_ensure_P (parser);
-
- while ((c = c->next)) {
- gboolean insert = TRUE;
- cur = (xmlNodePtr) c->data;
-
- for (i = 0; i < ntags; i++)
- if (!xmlStrcmp (cur->name, BAD_CAST tags[i])) {
- insert = FALSE;
- break;
- }
- if (insert)
- parser->ins = parser_append_node (parser, (gchar *) cur->name);
- }
+ if (parser->state != START) {
+ g_string_append_c (parser->accumulator, ' ');
}
-}
-
-static void
-parser_append_given_text_handle_escapes (YelpManParser *parser, gchar *text, gboolean make_links)
-{
- gchar *escape[] = { "fI", "fB" };
- gchar *baseptr, *ptr, *anc, *str;
- gint c, len;
-
- g_return_if_fail (parser != NULL);
-
- if (!text)
- return;
-
- baseptr = g_strdup (text);
- ptr = baseptr;
- anc = baseptr;
- len = strlen (baseptr);
- while (ptr && *ptr != '\0') {
-
- if (*ptr == '\\') {
-
- c = *ptr;
- *ptr = '\0';
- parser_append_given_text (parser, anc);
- *ptr = c;
-
- anc = ++ptr;
-
- switch (*ptr) {
- case '\0':
- break;
- case '-':
- case '\\':
- ptr++;
- c = *ptr;
- *ptr = '\0';
- parser_append_given_text (parser, anc);
- *ptr = c;
- anc = ptr;
- break;
- case 'f':
- ptr++;
- if ((ptr - baseptr) > len || *ptr == '\0') break;
- ptr++;
-
- c = *(ptr);
- *(ptr) = '\0';
- str = g_strdup (anc);
- *(ptr) = c;
-
- parser_ensure_P (parser);
- parser_escape_tags (parser, escape, 2);
-
- /* the \f escape sequence changes the font - R is Roman,
- * B is Bold, and I is italic */
- if (g_str_equal (str, "fI") || g_str_equal (str, "fB"))
- parser->ins = parser_append_node (parser, str);
- else if (!g_str_equal (str, "fR") && !g_str_equal (str, "fP"))
- g_warning ("No rule matching the tag '%s'\n", str);
-
- g_free (str);
- anc = ptr;
- break;
- case '(':
- ptr++;
- if ((ptr - baseptr) > len || *ptr == '\0') break;
- ptr++;
- if ((ptr - baseptr) > len || *ptr == '\0') break;
- ptr++;
-
- c = *(ptr);
- *(ptr) = '\0';
- str = g_strdup (anc);
- *(ptr) = c;
-
- if (g_str_equal (str, "(co"))
- parser_append_given_text (parser, "©");
- else if (g_str_equal (str, "(bu"))
- parser_append_given_text (parser, "â?¢");
- else if (g_str_equal (str, "(em"))
- parser_append_given_text (parser, "â??");
-
- g_free (str);
- anc = ptr;
- break;
- case '*':
- ptr++;
- if ((ptr - baseptr) > len || *ptr == '\0') break;
-
- if (*(ptr) == 'R') {
- parser_append_given_text (parser, "®");
- ptr++;
- } else if (*(ptr) == '=') {
- parser_append_given_text (parser, "--");
- ptr++;
- } else if (*(ptr) == '(') {
- ptr++;
- if ((ptr - baseptr) > len || *ptr == '\0') break;
- ptr++;
- if ((ptr - baseptr) > len || *ptr == '\0') break;
- ptr++;
-
- c = *(ptr);
- *(ptr) = '\0';
- str = g_strdup (anc);
- *(ptr) = c;
-
- if (g_str_equal (str, "*(Tm"))
- parser_append_given_text (parser, "â?¢");
- else if (g_str_equal (str, "*(lq"))
- parser_append_given_text (parser, "â??");
- else if (g_str_equal (str, "*(rq"))
- parser_append_given_text (parser, "â??");
-
- g_free (str);
- }
-
- anc = ptr;
- break;
- case 'e':
- anc = ++ptr;
- parser_append_given_text (parser, "\\");
- break;
- case '&':
- anc = ++ptr;
- break;
- case 's':
- /* this handles (actually ignores) the groff macros \s[+-][0-9] */
- ptr++;
- if (*(ptr) == '+' || *(ptr) == '-') {
- ptr++;
- if (g_ascii_isdigit (*ptr)) {
- ptr++;
- }
- } else if (g_ascii_isdigit (*ptr)) {
- ptr++;
- }
- anc = ptr;
- break;
- case '"':
- /* Marks comments till end of line. so we can ignore it. */
- while (ptr && *ptr != '\0')
- ptr++;
- anc = ptr;
- break;
- case '^':
- case '|':
- /* 1/12th and 1/16th em respectively - ignore this and simply output a space */
- anc = ++ptr;
- break;
- default:
- ptr++;
- c = *(ptr);
- *(ptr) = '\0';
- parser_append_given_text (parser, anc);
- *(ptr) = c;
-
- anc++;
- break;
- }
-
- }
- else if ((make_links) && (*ptr == '(')) {
- gchar *space_pos;
- gchar *url;
- gchar c;
- gchar *name_end;
- gchar *num_start;
- gchar *num_end;
-
-
- space_pos = ptr;
-
- while (space_pos != anc && *(space_pos - 1) != ' ') {
- space_pos--;
- }
- name_end = space_pos;
-
- if (space_pos != ptr &&
- g_ascii_isdigit(*(ptr+1)) &&
- (*(ptr+2) == ')' || (g_ascii_isalpha (*(ptr+2)) && *(ptr+3) == ')'))) {
- num_start = ptr;
- if (*(ptr+2) == ')')
- num_end = ptr + 2;
- else
- num_end = ptr + 3;
-
- ptr+=3;
-
- parser_ensure_P (parser);
-
- ptr = space_pos;
-
- c = (*ptr);
- *ptr = '\0';
- parser_append_given_text (parser, anc);
- *ptr = c;
- anc = ptr;
- ptr = num_start;
-
- c = *name_end;
- *name_end = '\0';
- *num_end = '\0';
- url = g_strdup_printf ("man:%s(%s)", anc, num_start + 1);
-
-
- parser->ins = parser_append_node (parser, "UR");
-
- parser->ins = parser_append_node (parser, "URI");
- parser_append_given_text (parser, url);
- parser->ins = parser->ins->parent;
-
- parser_append_given_text (parser, anc);
- parser->ins = parser->ins->parent;
-
- *name_end = c;
- *num_end = ')';
- anc = ptr;
-
- g_free (url);
-
- } else {
- ptr++;
- }
- }
- else {
- ptr++;
- }
-
- } /* end while */
-
- c = *(ptr);
- *(ptr) = '\0';
- parser_append_given_text (parser, anc);
- parser_append_given_text (parser, "\n");
- *(ptr) = c;
-
- g_free (baseptr);
+ parser->buffer++;
+ ret = parser_parse_line (parser, error);
+ parser->buffer--;
+ return ret;
}
-static xmlNodePtr
-parser_append_text (YelpManParser *parser)
+static gboolean
+parse_body_text (YelpManParser *parser, GError **error)
{
- xmlNodePtr node;
- gchar c;
+ gchar tmp[64];
- if (parser->anc == parser->cur)
- return NULL;
+ /*
+ It's this function which is responsible for trying to get *some*
+ semantic information back out of the manual page.
- c = *(parser->cur);
- *(parser->cur) = '\0';
+ The highest-level chopping up is into sections. We use the
+ heuristic that if either
+ (1) We haven't got a section yet or
+ (2) text starts a line (hpos=0)
+ then it's a section title.
- if (g_utf8_get_char (parser->anc) != '\0')
- parser_ensure_P (parser);
+ It's possible to have spaces in section titles, so we carry on
+ accumulating the section title until the next newline.
+ */
+ if (parser->section_state != SECTION_TITLE && parser->hpos == 0) {
+ g_string_truncate (parser->accumulator, 0);
+ /* End the current sheet & section */
+ parser->section_state = SECTION_TITLE;
+ parser->sheet_node = NULL;
- node = xmlNewText (BAD_CAST parser->anc);
- xmlAddChild (parser->ins, node);
+ parser->section_node =
+ xmlAddChild (xmlDocGetRootElement (parser->doc),
+ xmlNewNode (NULL, BAD_CAST "section"));
+ }
+ if (parser->section_state == SECTION_TITLE) goto do_append;
- *(parser->cur) = c;
+ /*
+ Here we've got real body text! If newline is true, this is the
+ first word on a line.
- parser->anc = parser->cur;
+ In which case, we check to see whether hpos agrees with the
+ current sheet's indent. If so (or if there isn't a sheet yet!),
+ we just add to the accumulator. If not, start a new sheet with
+ the correct indent.
- return node;
-}
+ If we aren't the first word on the line, just add to the
+ accumulator.
+ */
+ if ((!parser->sheet_node) ||
+ (parser->newline && (parser->hpos != parser->sheet_indent))) {
+ /* We don't need to worry about finishing the current sheet,
+ since the accumulator etc. get cleared on newlines and we
+ know we're at the start of a line.
+ */
+ parser->sheet_node =
+ xmlAddChild (parser->section_node,
+ xmlNewNode (NULL, BAD_CAST "sheet"));
+ parser->sheet_indent = parser->hpos;
-static xmlNodePtr
-parser_append_given_text (YelpManParser *parser,
- gchar *text)
-{
- xmlNodePtr node;
+ /* The indent is specified in em's. */
+ snprintf (tmp, 64, "%d",
+ (int)(parser->hpos / ((float)parser->char_width) / 1.5));
+ xmlNewProp (parser->sheet_node, BAD_CAST "indent", tmp);
+ }
- parser_ensure_P (parser);
+ do_append:
+ g_string_append (parser->accumulator, parser->buffer+1);
- node = xmlNewText (BAD_CAST text);
- xmlAddChild (parser->ins, node);
+ /* Move hpos forward per char */
+ parser->hpos += strlen (parser->buffer+1) * parser->char_width;
- return node;
-}
+ parser->newline = FALSE;
-static xmlNodePtr
-parser_append_node (YelpManParser *parser,
- gchar *name)
-{
- if (!name)
- return NULL;
-
- return xmlNewChild (parser->ins, NULL, BAD_CAST name, NULL);
+ return TRUE;
}
-static xmlNodePtr
-parser_append_node_attr (YelpManParser *parser,
- gchar *name,
- gchar *attr,
- gchar *value)
+static gboolean
+parse_n (YelpManParser *parser, GError **error)
{
- xmlNodePtr node = NULL;
-
- node = xmlNewChild (parser->ins, NULL, BAD_CAST name, NULL);
- xmlNewProp (node, BAD_CAST attr, BAD_CAST value);
+ xmlNodePtr node;
- return node;
-}
-
-static void
-parser_stack_push_node (YelpManParser *parser,
- xmlNodePtr node)
-{
- parser->nodeStack = g_slist_prepend (parser->nodeStack, node);
-}
+ /* Don't care about newlines in the header bit */
+ if (parser->state != BODY) return TRUE;
-static xmlNodePtr
-parser_stack_pop_node (YelpManParser *parser,
- gchar *name)
-{
- xmlNodePtr popped;
+ if (parser->section_state == SECTION_TITLE) {
+ g_strchomp (parser->accumulator->str);
+ xmlNewTextChild (parser->section_node, NULL,
+ BAD_CAST "title", parser->accumulator->str);
+ g_string_truncate (parser->accumulator, 0);
- if (parser->nodeStack == NULL)
- return NULL;
-
- popped = (xmlNodePtr) parser->nodeStack->data;
-
- if (!xmlStrEqual (BAD_CAST name, popped->name))
- return NULL;
-
- parser->nodeStack = g_slist_remove (parser->nodeStack, popped);
- return popped;
-}
+ parser->section_state = SECTION_BODY;
+ }
+ else if (parser->sheet_node != NULL) {
+ /*
+ In the body of a section, when we get to a newline we should
+ have an accumulator with text in it and a non-null sheet
+ (hopefully!).
-/*
- * Table (tbl) macro package parsing
- */
+ We know the current font, so add a span for that font
+ containing the relevant text. Then add a <br/> tag.
+ */
+ finish_span (parser);
+ node = xmlNewNode (NULL, BAD_CAST "br");
+ xmlAddChild (parser->sheet_node, node);
+ }
-static void
-parser_handle_table_options (YelpManParser *parser)
-{
- /* FIXME: do something with the options */
- g_free (parser->buffer);
+ parser->newline = TRUE;
- return;
+ return TRUE;
}
static void
-parser_handle_row_options (YelpManParser *parser)
+finish_span (YelpManParser *parser)
{
- /* FIXME: do something with these options */
-
- do {
- parser->anc = parser->buffer;
- parser->cur = parser->buffer;
-
- parser_read_until (parser, '.');
-
- if (*(parser->cur) == '.') {
- g_free (parser->buffer);
- break;
- }
-
- g_free (parser->buffer);
-
- } while ((parser->buffer =
- g_data_input_stream_read_line (parser->stream, &(parser->length), NULL, NULL))
- != NULL);
-}
+ xmlNodePtr node;
-static void
-parser_parse_table (YelpManParser *parser)
-{
- xmlNodePtr table_start;
- gboolean empty_row;
-
- table_start = parser->ins;
-
- parser->buffer = g_data_input_stream_read_line (parser->stream, &(parser->length), NULL, NULL);
- if (parser->buffer != NULL) {
- parser->anc = parser->buffer;
- parser->cur = parser->buffer;
-
- parser_read_until (parser, ';');
-
- if (*(parser->cur) == ';') {
- parser_handle_table_options (parser);
-
- parser->buffer = g_data_input_stream_read_line (parser->stream, &(parser->length), NULL, NULL);
- if (parser->buffer != NULL) {
- parser->anc = parser->buffer;
- parser->cur = parser->buffer;
-
- parser_read_until (parser, '\0');
- } else
- return;
- }
-
- parser_handle_row_options (parser);
-
- /* Now this is where we go through all the rows */
- while ((parser->buffer = g_data_input_stream_read_line (parser->stream, &(parser->length), NULL, NULL)) != NULL) {
- parser->anc = parser->buffer;
- parser->cur = parser->buffer;
-
- empty_row = FALSE;
-
- switch (*(parser->buffer)) {
- case '.':
- if (*(parser->buffer + 1) == 'T'
- && *(parser->buffer + 2) == 'E') {
- if (parser_stack_pop_node (parser, "TABLE") == NULL)
- g_warning ("Found unexpected tag: 'TE'\n");
- else {
- parser->ins = table_start;
-
- parser->anc = parser->buffer + 3;
- parser->cur = parser->buffer + 3;
- return;
- }
- } else if (*(parser->buffer + 1) == 'T'
- && *(parser->buffer + 2) == 'H') {
- /* Do nothing */
- empty_row = TRUE;
- } else {
- parser_handle_linetag (parser);
- break;
- }
- case '\0':
- empty_row = TRUE;
- break;
- default:
- break;
- }
-
- if (!empty_row) {
- parser->ins = parser_append_node (parser, "ROW");
- while (PARSER_CUR && *(parser->cur) != '\0') {
- parser_read_until (parser, '\t');
- parser->ins = parser_append_node (parser, "CELL");
- parser_append_text (parser);
- parser->ins = parser->ins->parent;
- parser->anc++;
- parser->cur++;
- }
- }
-
- g_free (parser->buffer);
-
- parser->ins = table_start;
- }
+ if (parser->accumulator->str[0] != '\0') {
+ node = xmlNewTextChild (parser->sheet_node, NULL,
+ BAD_CAST "span",
+ parser->accumulator->str);
+ xmlNewProp (node, BAD_CAST "class", get_font (parser));
+ g_string_truncate (parser->accumulator, 0);
}
}
diff --git a/libyelp/yelp-man-parser.h b/libyelp/yelp-man-parser.h
index 1901f1b..963dfbb 100644
--- a/libyelp/yelp-man-parser.h
+++ b/libyelp/yelp-man-parser.h
@@ -30,8 +30,8 @@ typedef struct _YelpManParser YelpManParser;
YelpManParser * yelp_man_parser_new (void);
xmlDocPtr yelp_man_parser_parse_file (YelpManParser *parser,
- gchar *file,
- const gchar *encoding);
+ gchar *path,
+ GError **error);
void yelp_man_parser_free (YelpManParser *parser);
#endif /* __YELP_MAN_PARSER_H__ */
diff --git a/stylesheets/man2html.xsl.in b/stylesheets/man2html.xsl.in
index 4b21bae..45aea88 100644
--- a/stylesheets/man2html.xsl.in
+++ b/stylesheets/man2html.xsl.in
@@ -17,349 +17,75 @@
<xsl:param name="linktrail" select="''"/>
<xsl:template mode="html.title.mode" match="Man">
- <xsl:value-of select="TH/Title"/>
-</xsl:template>
-
-<xsl:template mode="html.css.mode" match="Man">
- <xsl:param name="direction"/>
- <xsl:param name="left"/>
- <xsl:param name="right"/>
-<xsl:text>
-body { font-family: monospace; }
-div.hgroup { font-family: sans-serif; }
-</xsl:text>
-</xsl:template>
-
-<xsl:template mode="html.header.mode" match="Man">
- <xsl:call-template name="html.linktrail"/>
+ <xsl:value-of select="header/title"/>
</xsl:template>
<xsl:template mode="html.body.mode" match="Man">
- <xsl:apply-templates select="TH"/>
- <xsl:apply-templates select="SH"/>
-</xsl:template>
-
-<xsl:template name="html.linktrail">
- <div class="linktrail" id="linktrail">
- <xsl:call-template name="html.linktrail.one">
- <xsl:with-param name="str" select="$linktrail"/>
- </xsl:call-template>
- </div>
-</xsl:template>
-
-<xsl:template name="html.linktrail.one">
- <xsl:param name="str"/>
- <xsl:variable name="id" select="substring-before($str, '|')"/>
- <xsl:variable name="post_id" select="substring-after($str, '|')"/>
-
- <span class="linktrail">
- <a class="linktrail" href="x-yelp-toc:{$id}">
- <xsl:choose>
- <xsl:when test="contains($post_id, '|')">
- <xsl:value-of select="substring-before($post_id, '|')"/>
- </xsl:when>
- <xsl:otherwise>
- <xsl:value-of select="$post_id"/>
- </xsl:otherwise>
- </xsl:choose>
- </a>
- </span>
-
- <xsl:if test="contains($post_id, '|')">
- <xsl:call-template name="html.linktrail.one">
- <xsl:with-param name="str" select="substring-after($post_id, '|')"/>
- </xsl:call-template>
- </xsl:if>
+ <xsl:apply-templates select="header"/>
+ <xsl:apply-templates select="section"/>
</xsl:template>
<!-- ======================================================================= -->
-<xsl:template match="br">
- <xsl:apply-templates/><br/>
-</xsl:template>
-
-<!-- ignore anything in the Indent,Count,sp element for now -->
-<xsl:template match="Indent" />
-<xsl:template match="Count" />
-<xsl:template match="sp" />
-
-<xsl:template match="B | fB">
- <b><xsl:apply-templates/></b>
-</xsl:template>
-
-<xsl:template match="CELL">
- <td><xsl:apply-templates/></td>
-</xsl:template>
-
-<xsl:template match="I | fI">
- <i><xsl:apply-templates/></i>
-</xsl:template>
-
-<xsl:template match="R | fR">
- <span class="R"><xsl:apply-templates/></span>
-</xsl:template>
-
-<xsl:template match="Verbatim">
- <pre>
- <xsl:choose>
- <xsl:when test="node()[1]/self::text()">
- <xsl:variable name="node" select="node()[1]"/>
- <xsl:choose>
- <xsl:when test="starts-with(string($node), '
')">
- <xsl:value-of select="substring-after(string($node), '
')"/>
- <xsl:apply-templates select="node()[position() != 1]"/>
- </xsl:when>
- <xsl:otherwise>
- <xsl:value-of select="string($node)"/>
- <xsl:apply-templates select="node()[position() != 1]"/>
- </xsl:otherwise>
- </xsl:choose>
- </xsl:when>
- <xsl:otherwise>
- <xsl:apply-templates />
- </xsl:otherwise>
- </xsl:choose>
- </pre>
-</xsl:template>
-
-<xsl:template match="IP">
- <xsl:choose>
- <xsl:when test="preceding-sibling::*[1][self::IP]"/>
- <xsl:otherwise>
- <dl>
- <xsl:apply-templates mode="IP.mode" select="."/>
- </dl>
- </xsl:otherwise>
- </xsl:choose>
-</xsl:template>
-
-<xsl:template mode="IP.mode" match="IP">
- <dt>
- <xsl:choose>
- <xsl:when test="Tag">
- <xsl:apply-templates select="Tag"/>
- </xsl:when>
- <xsl:otherwise>
- <xsl:apply-templates/>
- </xsl:otherwise>
- </xsl:choose>
- </dt>
- <dd>
- <xsl:apply-templates select="Tag/following-sibling::node()"/>
- </dd>
- <xsl:apply-templates mode="IP.mode"
- select="following-sibling::*[1][self::IP]"/>
-</xsl:template>
-
-<xsl:template match="P">
- <p><xsl:apply-templates/></p>
-</xsl:template>
-
-<xsl:template match="ROW">
- <tr><xsl:apply-templates/></tr>
-</xsl:template>
-
-<xsl:template match="SS">
- <xsl:variable name="nextSH" select="following-sibling::SH[1]"/>
- <xsl:variable name="nextSS"
- select="following-sibling::SS[not($nextSH) or
- following-sibling::SH[1] = $nextSH][1]"/>
- <div class="sect sect-SS">
- <div class="hgroup">
- <h3 class="title"><xsl:apply-templates/></h3>
- </div>
- <div class="inner">
- <xsl:choose>
- <xsl:when test="$nextSS">
- <xsl:apply-templates
- select="following-sibling::*[following-sibling::SS[1] = $nextSS and
- following-sibling::SS[1]/@id = $nextSS/@id]"/>
- </xsl:when>
- <xsl:when test="$nextSH">
- <xsl:apply-templates
- select="following-sibling::*[following-sibling::SH[1] = $nextSH and
- following-sibling::SH[1]/@id = $nextSH/@id]"/>
- </xsl:when>
- <xsl:otherwise>
- <xsl:apply-templates select="following-sibling::*"/>
- </xsl:otherwise>
- </xsl:choose>
- </div>
- </div>
-</xsl:template>
-
-<xsl:template match="SH">
- <xsl:variable name="nextSH" select="following-sibling::SH[1]"/>
- <xsl:variable name="nextSS"
- select="following-sibling::SS[not($nextSH) or
- following-sibling::SH[1] = $nextSH]"/>
- <div class="sect sect-SH">
- <div class="hgroup">
- <h2 class="title"><xsl:apply-templates/></h2>
- </div>
- <div class="inner">
- <xsl:choose>
- <xsl:when test="$nextSS">
- <xsl:apply-templates
- select="following-sibling::*[following-sibling::SS[1] = $nextSS[1] and
- following-sibling::SS[1]/@id = $nextSS[1]/@id]"/>
- <xsl:apply-templates select="$nextSS"/>
- </xsl:when>
- <xsl:when test="$nextSH">
- <xsl:apply-templates
- select="following-sibling::*[following-sibling::SH[1] = $nextSH and
- following-sibling::SH[1]/@id = $nextSH/@id]"/>
- </xsl:when>
- <xsl:otherwise>
- <xsl:apply-templates select="following-sibling::*"/>
- </xsl:otherwise>
- </xsl:choose>
- </div>
- </div>
-</xsl:template>
-
-<xsl:template match="TABLE">
- <table><xsl:apply-templates/></table>
-</xsl:template>
-
-<xsl:template match="Tag">
- <span class="Tag"><xsl:apply-templates/></span>
-</xsl:template>
-
-<xsl:template match="TH">
+<xsl:template match="header">
<div class="hgroup">
<h1 class="title">
- <span class="Title">
- <xsl:apply-templates select="Title/node()"/>
- </span>
- <span class="Section">
- <xsl:text>(</xsl:text>
- <xsl:apply-templates select="Section/node()"/>
- <xsl:text>)</xsl:text>
- </span>
+ <xsl:value-of select="title"/>
+ <xsl:text>(</xsl:text>
+ <xsl:value-of select="section"/>
+ <xsl:text>)</xsl:text>
</h1>
+ <h3 style="text-align: right;">
+ <xsl:value-of select="collection"/>
+ </h3>
</div>
</xsl:template>
-<xsl:template match="UR">
- <a>
- <xsl:attribute name="href">
- <xsl:value-of select="URI" />
- </xsl:attribute>
- <xsl:apply-templates/>
- </a>
-</xsl:template>
-
-<xsl:template match="URI"/>
-
-<xsl:template match="UN">
- <a name="text()" id="text()"/>
-</xsl:template>
-
-<!-- these are all for mdoc (BSD) man page support -->
-
-<!-- these are just printed out -->
-<xsl:template match="An | Dv | Er | Ev | Ic | Li | St">
- <xsl:text>
-</xsl:text>
- <xsl:apply-templates/>
-</xsl:template>
-
-<!-- these are italicized -->
-<xsl:template match="Ad | Ar | Fa | Ot | Pa | Va | Vt">
- <i><xsl:apply-templates/></i>
+<xsl:template match="br">
+ <br/>
</xsl:template>
-<!-- these are bold -->
-<xsl:template match="Cd | Cm | Fd | Ic | Nm">
- <b><xsl:apply-templates/></b>
-</xsl:template>
+<xsl:template match="section">
+ <div class="section" style="padding-top: 1em;">
+ <h2>
+ <xsl:value-of select="title"/>
+ </h2>
-<!-- Function call - TODO need to do the ( , ) here -->
-<xsl:template match="Fn | Fo | Fc">
- <i><xsl:apply-templates/></i>
+ <xsl:apply-templates select="sheet"/>
+ </div>
</xsl:template>
-<!-- Cross reference -->
-<xsl:template match="Xr">
- <xsl:variable name="manpage" select="substring-before(string(.), ' ')"/>
- <xsl:variable name="section" select="substring-before(substring-after(string(.), ' '), ' ')"/>
- <xsl:variable name="extra" select="substring-after(substring-after(string(.), ' '), ' ')"/>
- <a>
- <xsl:attribute name="href">
- <xsl:text>man:</xsl:text>
- <xsl:value-of select="$manpage"/>
- <xsl:text>(</xsl:text>
- <xsl:value-of select="$section"/>
- <xsl:text>)</xsl:text>
+<xsl:template match="sheet">
+ <xsl:element name="div">
+ <xsl:attribute name="style">
+ <xsl:text>padding-left: </xsl:text>
+ <xsl:value-of select="@indent"/>
+ <xsl:text>em;</xsl:text>
</xsl:attribute>
- <xsl:value-of select="$manpage"/>
- <xsl:text>(</xsl:text>
- <xsl:value-of select="$section"/>
- <xsl:text>)</xsl:text>
- </a>
- <xsl:value-of select="$extra"/>
-</xsl:template>
-
-<!-- Option -->
-<xsl:template match="Op | Oo | Oc">
- <xsl:text> [</xsl:text>
- <xsl:apply-templates/>
- <xsl:text>]</xsl:text>
-</xsl:template>
-
-<!-- Trade or type name (small Caps). -->
-<xsl:template match="Tn">
- <xsl:variable name="txt" select="string(child::text())"/>
- <xsl:text> </xsl:text>
- <xsl:value-of select="translate($txt, 'abcdefghijklmnopqrstuvwxyz', 'ABCDEFGHIJKLMNOPQRSTUVWXYZ')"/>
- <xsl:apply-templates select="*"/>
-</xsl:template>
-<xsl:template match="Nd">
- <xsl:text> - </xsl:text>
- <xsl:apply-templates />
+ <p>
+ <xsl:apply-templates select="span|br"/>
+ </p>
+ </xsl:element>
</xsl:template>
-<xsl:template match="Fl">
- <xsl:text>-</xsl:text>
- <b><xsl:apply-templates select="child::text()"/></b>
- <xsl:apply-templates select="*"/>
-</xsl:template>
-
-<xsl:template match="Bl">
- <dl>
- <xsl:for-each select="It">
- <xsl:choose>
- <xsl:when test="ItTag">
- <dt><xsl:apply-templates select="ItTag"/></dt>
- <dd>
- <xsl:apply-templates select="ItTag/following-sibling::node()"/>
- </dd>
- </xsl:when>
- <xsl:otherwise>
- <dt>
- <xsl:text>â?¢</xsl:text>
- </dt>
- <dd>
- <xsl:apply-templates />
- </dd>
- </xsl:otherwise>
- </xsl:choose>
- </xsl:for-each>
- </dl>
-</xsl:template>
-
-<xsl:template match="ItTag">
- <xsl:apply-templates/>
-</xsl:template>
+<xsl:template match="span">
+ <xsl:element name="span">
+ <xsl:choose>
+ <xsl:when test="@class = 'B'">
+ <xsl:attribute name="style">
+ font-weight: 700;
+ </xsl:attribute>
+ </xsl:when>
+ <xsl:when test="@class = 'I'">
+ <xsl:attribute name="style">
+ font-style: italic;
+ </xsl:attribute>
+ </xsl:when>
+ </xsl:choose>
-<xsl:template match="*">
- <xsl:message>
- <xsl:text>Unmatched element: </xsl:text>
- <xsl:value-of select="local-name(.)"/>
- </xsl:message>
- <xsl:apply-templates/>
+ <xsl:value-of select="."/>
+ </xsl:element>
</xsl:template>
</xsl:stylesheet>
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]