[tracker] tracker-extract, msoffice-xml: If max bytes reached don't even start parser
- From: Aleksander Morgado <aleksm src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker] tracker-extract, msoffice-xml: If max bytes reached don't even start parser
- Date: Wed, 24 Nov 2010 12:24:46 +0000 (UTC)
commit b636b3c7b2ef656f5cb5b4ae065eb053adea7c7a
Author: Aleksander Morgado <aleksander lanedo com>
Date: Wed Nov 24 13:07:27 2010 +0100
tracker-extract, msoffice-xml: If max bytes reached don't even start parser
src/tracker-extract/tracker-extract-msoffice-xml.c | 47 +++++++++-----------
1 files changed, 21 insertions(+), 26 deletions(-)
---
diff --git a/src/tracker-extract/tracker-extract-msoffice-xml.c b/src/tracker-extract/tracker-extract-msoffice-xml.c
index 7746aec..79d2755 100644
--- a/src/tracker-extract/tracker-extract-msoffice-xml.c
+++ b/src/tracker-extract/tracker-extract-msoffice-xml.c
@@ -606,13 +606,11 @@ msoffice_xml_content_types_parse_start (GMarkupParseContext *context,
gpointer user_data,
GError **error)
{
- MsOfficeXMLParserInfo *info;
+ MsOfficeXMLParserInfo *info = user_data;
const gchar *part_name = NULL;
const gchar *content_type = NULL;
gint i;
- info = user_data;
-
if (g_ascii_strcasecmp (element_name, "Override") != 0) {
return;
}
@@ -641,32 +639,29 @@ msoffice_xml_content_types_parse_start (GMarkupParseContext *context,
return;
}
- /* Content part? */
- switch (info->file_type) {
- case FILE_TYPE_DOCX:
- if (g_ascii_strcasecmp (content_type, "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml") == 0) {
- xml_read (info, part_name + 1, MS_OFFICE_XML_TAG_DOCUMENT_TEXT_DATA);
- }
- break;
-
- case FILE_TYPE_PPTX:
- case FILE_TYPE_PPSX:
- if ((g_ascii_strcasecmp (content_type, "application/vnd.openxmlformats-officedocument.presentationml.slide+xml") == 0) ||
- (g_ascii_strcasecmp (content_type, "application/vnd.openxmlformats-officedocument.drawingml.diagramData+xml") == 0)) {
- xml_read (info, part_name + 1, MS_OFFICE_XML_TAG_DOCUMENT_TEXT_DATA);
- }
- break;
+ /* If the file type is unknown, skip trying to extract content */
+ if (info->file_type == FILE_TYPE_INVALID) {
+ g_message ("Invalid file type, not extracting content from '%s'",
+ part_name + 1);
+ return;
+ }
- case FILE_TYPE_XLSX:
- if ((g_ascii_strcasecmp (content_type, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml") == 0) ||
- (g_ascii_strcasecmp (content_type, "application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml") == 0)) {
+ /* Content part? */
+ if ((info->file_type == FILE_TYPE_DOCX &&
+ g_ascii_strcasecmp (content_type, "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml") == 0) ||
+ ((info->file_type == FILE_TYPE_PPTX || info->file_type == FILE_TYPE_PPSX) &&
+ (g_ascii_strcasecmp (content_type, "application/vnd.openxmlformats-officedocument.presentationml.slide+xml") == 0 ||
+ g_ascii_strcasecmp (content_type, "application/vnd.openxmlformats-officedocument.drawingml.diagramData+xml") == 0)) ||
+ (info->file_type == FILE_TYPE_XLSX &&
+ (g_ascii_strcasecmp (content_type, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml") == 0 ||
+ g_ascii_strcasecmp (content_type, "application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml") == 0))) {
+ /* If reached max bytes to extract, don't event start parsing the file... just return */
+ if (info->bytes_pending == 0) {
+ g_debug ("Skipping '%s' as already reached max bytes to extract",
+ part_name + 1);
+ } else {
xml_read (info, part_name + 1, MS_OFFICE_XML_TAG_DOCUMENT_TEXT_DATA);
}
- break;
-
- case FILE_TYPE_INVALID:
- g_message ("Invalid file type:'%d'", info->file_type);
- break;
}
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]