[tracker] tracker-extract: Try harder when getting EPub contents



commit 3e993a9e59a32f5ce29206c8e3b917ae3a2ca346
Author: Bastien Nocera <hadess hadess net>
Date:   Fri Aug 29 00:01:31 2014 +0200

    tracker-extract: Try harder when getting EPub contents
    
    GMarkup is really not that good at parsing XML, so we need to try
    harder to ignore errors parsing the contents of EPub files, and
    populate the index with *some* data.
    
    https://bugzilla.gnome.org/show_bug.cgi?id=735645

 src/tracker-extract/tracker-extract-epub.c |   16 ++++++++--------
 1 files changed, 8 insertions(+), 8 deletions(-)
---
diff --git a/src/tracker-extract/tracker-extract-epub.c b/src/tracker-extract/tracker-extract-epub.c
index 40d9fdc..64b0859 100644
--- a/src/tracker-extract/tracker-extract-epub.c
+++ b/src/tracker-extract/tracker-extract-epub.c
@@ -551,7 +551,6 @@ extract_opf_contents (const gchar *uri,
                       GList       *content_files)
 {
        OPFContentData content_data = { 0 };
-       GMarkupParseContext *context;
        TrackerConfig *config;
        GError *error = NULL;
        GList *l;
@@ -562,7 +561,6 @@ extract_opf_contents (const gchar *uri,
        };
 
        config = tracker_main_get_config ();
-       context = g_markup_parse_context_new (&xml_parser, 0, &content_data, NULL);
 
        content_data.contents = g_string_new ("");
        content_data.limit = (gsize) tracker_config_get_max_bytes (config);
@@ -570,28 +568,30 @@ extract_opf_contents (const gchar *uri,
        g_debug ("Extracting up to %" G_GSIZE_FORMAT " bytes of content", content_data.limit);
 
        for (l = content_files; l; l = l->next) {
+               GMarkupParseContext *context;
                gchar *path;
 
+               context = g_markup_parse_context_new (&xml_parser, 0, &content_data, NULL);
+
                /* Page file is relative to OPF file location */
                path = g_build_filename (content_prefix, l->data, NULL);
                tracker_gsf_parse_xml_in_zip (uri, path, context, &error);
 
                if (error) {
-                       g_warning ("Error extracting EPUB contents (%s): %s\n",
-                                  path, error->message);
-                       g_free (path);
-                       break;
+                       g_warning ("Error extracting EPUB contents (%s): %s",
+                                  path, error->message);
+                       g_clear_error (&error);
                }
                g_free (path);
 
+               g_markup_parse_context_free (context);
+
                if (content_data.limit <= 0) {
                        /* Reached plain text extraction limit */
                        break;
                }
        }
 
-       g_markup_parse_context_free (context);
-
        return g_string_free (content_data.contents, FALSE);
 }
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]