[yelp] Treat 'C' and 'N' lines more carefully. Yay! Russian works properly at last!



commit c02ee6105f5fce508bf74935c538058a6e417e7e
Author: Rupert Swarbrick <rswarbrick gmail com>
Date:   Thu Dec 16 00:46:23 2010 +0000

    Treat 'C' and 'N' lines more carefully. Yay! Russian works properly at last!

 libyelp/yelp-man-parser.c |  121 +++++++++++++++++++++++++++++---------------
 1 files changed, 80 insertions(+), 41 deletions(-)
---
diff --git a/libyelp/yelp-man-parser.c b/libyelp/yelp-man-parser.c
index 350dc81..a821d3a 100644
--- a/libyelp/yelp-man-parser.c
+++ b/libyelp/yelp-man-parser.c
@@ -182,6 +182,10 @@ static void new_sheet (YelpManParser *parser);
 static void register_title (YelpManParser *parser,
                             const gchar* name, const gchar* section);
 static void right_truncate_common (gchar *dst, const gchar *src);
+static gboolean cheeky_call_parse_line (YelpManParser *parser,
+                                        GError **error,
+                                        gchar first_char,
+                                        const gchar *text);
 
 /******************************************************************************/
 /* Translations for the 'C' command. This is indeed hackish, but the
@@ -607,7 +611,13 @@ parse_text (YelpManParser *parser, GError **error)
     gchar *text, *section, *tmp;
     const gchar *acc;
 
-    g_assert (parser->buffer[0] == 't');
+    /*
+      Sneakily, this might get called with something other than t
+      starting the buffer: see parse_C and parse_N.
+    */
+    if (parser->buffer[0] == 't') {
+        parser->N_count = 0;
+    }
 
     if (parser->state == START) {
         /* This should be the 'Title String(1)' line. It might come in
@@ -669,29 +679,6 @@ parse_text (YelpManParser *parser, GError **error)
     }
 }
 
-/*
-  w is a sort of prefix argument. It indicates a space, so we register
-  that here, then call parser_parse_line again on the rest of the
-  string to deal with that.
- */
-static gboolean
-parse_w (YelpManParser *parser, GError **error)
-{
-    gboolean ret;
-
-    if (parser->state != START) {
-        g_string_append_c (parser->accumulator, ' ');
-    }
-    
-    parser->buffer++;
-    parser->last_char_was_space = TRUE;
-
-    ret = parser_parse_line (parser, error);
-
-    parser->buffer--;
-    return ret;
-}
-
 static gboolean
 parse_body_text (YelpManParser *parser, GError **error)
 {
@@ -708,7 +695,8 @@ parse_body_text (YelpManParser *parser, GError **error)
       It's possible to have spaces in section titles, so we carry on
       accumulating the section title until the next newline.
     */
-    if (parser->section_state != SECTION_TITLE && parser->hpos == 0) {
+    if (parser->section_state == SECTION_BODY &&
+        (!parser->section_node || (parser->hpos == 0))) {
         g_string_truncate (parser->accumulator, 0);
         /* End the current sheet & section */
         parser->section_state = SECTION_TITLE;
@@ -726,12 +714,35 @@ parse_body_text (YelpManParser *parser, GError **error)
 
     /* Move hpos forward per char */
     parser->hpos += strlen (parser->buffer+1) * parser->char_width;
+
     parser->last_char_was_space = FALSE;
-    parser->N_count = 0;
 
     return TRUE;
 }
 
+/*
+  w is a sort of prefix argument. It indicates a space, so we register
+  that here, then call parser_parse_line again on the rest of the
+  string to deal with that.
+ */
+static gboolean
+parse_w (YelpManParser *parser, GError **error)
+{
+    gboolean ret;
+
+    if (parser->state != START) {
+        g_string_append_c (parser->accumulator, ' ');
+    }
+
+    parser->buffer++;
+    parser->last_char_was_space = TRUE;
+
+    ret = parser_parse_line (parser, error);
+
+    parser->buffer--;
+    return ret;
+}
+
 static gboolean
 parse_n (YelpManParser *parser, GError **error)
 {
@@ -767,12 +778,13 @@ parse_n (YelpManParser *parser, GError **error)
                          parser->accumulator->str);
         g_string_truncate (parser->accumulator, 0);
         parser->state = BODY;
+        parser->section_state = SECTION_BODY;
         return TRUE;
     }
 
     /* parser->state == BODY */
-
     if (parser->section_state == SECTION_TITLE) {
+
         g_strchomp (parser->accumulator->str);
         xmlNewTextChild (parser->section_node, NULL,
                          BAD_CAST "title", parser->accumulator->str);
@@ -824,6 +836,8 @@ static gboolean
 parse_N (YelpManParser *parser, GError **error)
 {
     gint n;
+    gchar tmp[2];
+
     if (SSCANF ("N%i", 1, &n)) {
         RAISE_PARSE_ERROR ("Strange format for N line: %s");
     }
@@ -840,13 +854,15 @@ parse_N (YelpManParser *parser, GError **error)
     if (n < 0) {
         append_nbsps (parser, -n);
         parser->N_count += -n;
-    }
-    else {
-        g_string_append_c (parser->accumulator, (gchar)n);
-        parser->N_count++;
+        return TRUE;
     }
 
-    return TRUE;
+    parser->N_count++;
+
+    tmp[0] = (gchar)n;
+    tmp[1] = '\0';
+
+    return cheeky_call_parse_line (parser, error, 'N', tmp);
 }
 
 static void
@@ -887,17 +903,13 @@ parse_C (YelpManParser *parser, GError **error)
         code = 65533; /* Unicode replacement character */
     }
 
-    deal_with_newlines (parser);
-    parser->last_char_was_space = FALSE;
-
     /* Output buffer must be length >= 6. 16 >= 6, so we're ok. */
     len = g_unichar_to_utf8 (code, name);
     name[len] = '\0';
-    g_string_append (parser->accumulator, name);
 
     parser->N_count++;
 
-    return TRUE;
+    return cheeky_call_parse_line (parser, error, 'C', name);
 }
 
 static void
@@ -967,10 +979,10 @@ parse_p (YelpManParser *parser, GError **error)
 static void
 new_sheet (YelpManParser *parser)
 {
-   /* We don't need to worry about finishing the current sheet,
-      since the accumulator etc. get cleared on newlines and we
-      know we're at the start of a line.
-   */
+    /* We don't need to worry about finishing the current sheet,
+       since the accumulator etc. get cleared on newlines and we
+       know we're at the start of a line.
+    */
     parser->sheet_node =
         xmlAddChild (parser->section_node,
                      xmlNewNode (NULL, BAD_CAST "sheet"));
@@ -1007,3 +1019,30 @@ right_truncate_common (gchar *dst, const gchar *src)
         src--;
     }
 }
+
+static gboolean
+cheeky_call_parse_line (YelpManParser *parser, GError **error,
+                        gchar first_char, const gchar* text)
+{
+    /* Do a cunning trick. There's all sorts of code that parse_text
+     * does, which we don't want to duplicate in parse_N and
+     * parse_C. So feed a buffer back to parse_text. Tada! Start it
+     * with "C" or "N" rather than "t" so clever stuff in parse_text
+     * can tell the difference.
+     */
+    gchar *tmp;
+    gboolean ret;
+    guint len = strlen (text);
+
+    tmp = parser->buffer;
+    parser->buffer = g_new (gchar, 2 + len);
+    parser->buffer[0] = first_char;
+    strncpy (parser->buffer + 1, text, len + 1);
+
+    ret = parse_text (parser, error);
+
+    g_free (parser->buffer);
+    parser->buffer = tmp;
+
+    return ret;
+}



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]