[tracker] tracker-extract: Try harder when getting EPub contents
- From: Bastien Nocera <hadess src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker] tracker-extract: Try harder when getting EPub contents
- Date: Sun, 31 Aug 2014 21:15:47 +0000 (UTC)
commit 3e993a9e59a32f5ce29206c8e3b917ae3a2ca346
Author: Bastien Nocera <hadess hadess net>
Date: Fri Aug 29 00:01:31 2014 +0200
tracker-extract: Try harder when getting EPub contents
GMarkup is really not that good at parsing XML, so we need to try
harder to ignore errors parsing the contents of EPub files, and
populate the index with *some* data.
https://bugzilla.gnome.org/show_bug.cgi?id=735645
src/tracker-extract/tracker-extract-epub.c | 16 ++++++++--------
1 files changed, 8 insertions(+), 8 deletions(-)
---
diff --git a/src/tracker-extract/tracker-extract-epub.c b/src/tracker-extract/tracker-extract-epub.c
index 40d9fdc..64b0859 100644
--- a/src/tracker-extract/tracker-extract-epub.c
+++ b/src/tracker-extract/tracker-extract-epub.c
@@ -551,7 +551,6 @@ extract_opf_contents (const gchar *uri,
GList *content_files)
{
OPFContentData content_data = { 0 };
- GMarkupParseContext *context;
TrackerConfig *config;
GError *error = NULL;
GList *l;
@@ -562,7 +561,6 @@ extract_opf_contents (const gchar *uri,
};
config = tracker_main_get_config ();
- context = g_markup_parse_context_new (&xml_parser, 0, &content_data, NULL);
content_data.contents = g_string_new ("");
content_data.limit = (gsize) tracker_config_get_max_bytes (config);
@@ -570,28 +568,30 @@ extract_opf_contents (const gchar *uri,
g_debug ("Extracting up to %" G_GSIZE_FORMAT " bytes of content", content_data.limit);
for (l = content_files; l; l = l->next) {
+ GMarkupParseContext *context;
gchar *path;
+ context = g_markup_parse_context_new (&xml_parser, 0, &content_data, NULL);
+
/* Page file is relative to OPF file location */
path = g_build_filename (content_prefix, l->data, NULL);
tracker_gsf_parse_xml_in_zip (uri, path, context, &error);
if (error) {
- g_warning ("Error extracting EPUB contents (%s): %s\n",
- path, error->message);
- g_free (path);
- break;
+ g_warning ("Error extracting EPUB contents (%s): %s",
+ path, error->message);
+ g_clear_error (&error);
}
g_free (path);
+ g_markup_parse_context_free (context);
+
if (content_data.limit <= 0) {
/* Reached plain text extraction limit */
break;
}
}
- g_markup_parse_context_free (context);
-
return g_string_free (content_data.contents, FALSE);
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]