[tracker] Probably fixes GB#616199: Avoid second nie:title in msoffice/xml and oasis documents



commit 5835ecb3e9aea1a699e9e2df61e39d8324794393
Author: Aleksander Morgado <aleksander lanedo com>
Date:   Mon Jul 19 13:17:33 2010 +0200

    Probably fixes GB#616199: Avoid second nie:title in msoffice/xml and oasis documents

 src/tracker-extract/tracker-extract-msoffice.c |   13 +++++++++++--
 src/tracker-extract/tracker-extract-oasis.c    |   12 ++++++++++--
 2 files changed, 21 insertions(+), 4 deletions(-)
---
diff --git a/src/tracker-extract/tracker-extract-msoffice.c b/src/tracker-extract/tracker-extract-msoffice.c
index ad8dcd9..7edd0f3 100644
--- a/src/tracker-extract/tracker-extract-msoffice.c
+++ b/src/tracker-extract/tracker-extract-msoffice.c
@@ -163,6 +163,7 @@ typedef struct {
 	gboolean preserve_attribute_present;
 	const gchar *uri;
 	GString *content;
+	gboolean title_already_set;
 } MsOfficeXMLParserInfo;
 
 typedef struct {
@@ -1918,8 +1919,14 @@ xml_text_handler_document_data (GMarkupParseContext  *context,
 		break;
 
 	case MS_OFFICE_XML_TAG_TITLE:
-		tracker_sparql_builder_predicate (info->metadata, "nie:title");
-		tracker_sparql_builder_object_unvalidated (info->metadata, text);
+		if (info->title_already_set) {
+			g_warning ("Avoiding additional title (%s) in MsOffice XML document '%s'",
+			           text, info->uri);
+		} else {
+			info->title_already_set = TRUE;
+			tracker_sparql_builder_predicate (info->metadata, "nie:title");
+			tracker_sparql_builder_object_unvalidated (info->metadata, text);
+		}
 		break;
 
 	case MS_OFFICE_XML_TAG_SUBJECT:
@@ -2030,6 +2037,7 @@ xml_read (MsOfficeXMLParserInfo *parser_info,
 	info.preserve_attribute_present = FALSE;
 	info.uri = parser_info->uri;
 	info.content = parser_info->content;
+	info.title_already_set = parser_info->title_already_set;
 
 	switch (type) {
 	case MS_OFFICE_XML_TAG_DOCUMENT_CORE_DATA: {
@@ -2221,6 +2229,7 @@ extract_msoffice_xml (const gchar          *uri,
 	info.preserve_attribute_present = FALSE;
 	info.uri = uri;
 	info.content = g_string_new ("");
+	info.title_already_set = FALSE;
 
 	context = g_markup_parse_context_new (&parser, 0, &info, NULL);
 
diff --git a/src/tracker-extract/tracker-extract-oasis.c b/src/tracker-extract/tracker-extract-oasis.c
index 76985fa..b1534f2 100644
--- a/src/tracker-extract/tracker-extract-oasis.c
+++ b/src/tracker-extract/tracker-extract-oasis.c
@@ -44,6 +44,7 @@ typedef struct {
 	TrackerSparqlBuilder *metadata;
 	ODTTagType current;
 	const gchar *uri;
+	gboolean title_already_set;
 } ODTParseInfo;
 
 static void xml_start_element_handler (GMarkupParseContext   *context,
@@ -149,6 +150,7 @@ extract_oasis (const gchar          *uri,
 	info.metadata = metadata;
 	info.current = ODT_TAG_TYPE_UNKNOWN;
 	info.uri = uri;
+	info.title_already_set = FALSE;
 
 	/* Create parsing context */
 	context = g_markup_parse_context_new (&parser, 0, &info, NULL);
@@ -245,8 +247,14 @@ xml_text_handler (GMarkupParseContext  *context,
 
 	switch (data->current) {
 	case ODT_TAG_TYPE_TITLE:
-		tracker_sparql_builder_predicate (metadata, "nie:title");
-		tracker_sparql_builder_object_unvalidated (metadata, text);
+		if (data->title_already_set) {
+			g_warning ("Avoiding additional title (%s) in OASIS document '%s'",
+			           text, data->uri);
+		} else {
+			data->title_already_set = TRUE;
+			tracker_sparql_builder_predicate (metadata, "nie:title");
+			tracker_sparql_builder_object_unvalidated (metadata, text);
+		}
 		break;
 
 	case ODT_TAG_TYPE_SUBJECT:



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]