[tracker] tracker-extract: Protect all single valued properties in oasis extractor
- From: Carlos Garnacho <carlosg src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker] tracker-extract: Protect all single valued properties in oasis extractor
- Date: Mon, 14 Mar 2016 22:27:39 +0000 (UTC)
commit cd1b468d2b4eda10bdc5bfa53017e0d5302eb6d6
Author: Carlos Garnacho <carlosg gnome org>
Date: Mon Mar 14 01:06:20 2016 +0100
tracker-extract: Protect all single valued properties in oasis extractor
In case of malformed documents or unexpected input, avoid creating sparql
that will break cardinality constraints, warn nicely instead.
src/tracker-extract/tracker-extract-oasis.c | 105 ++++++++++++++++++++-------
1 files changed, 78 insertions(+), 27 deletions(-)
---
diff --git a/src/tracker-extract/tracker-extract-oasis.c b/src/tracker-extract/tracker-extract-oasis.c
index a580b72..5e4bea0 100644
--- a/src/tracker-extract/tracker-extract-oasis.c
+++ b/src/tracker-extract/tracker-extract-oasis.c
@@ -58,7 +58,14 @@ typedef struct {
TrackerSparqlBuilder *metadata;
ODTTagType current;
const gchar *uri;
- gboolean title_already_set;
+ guint has_title : 1;
+ guint has_subject : 1;
+ guint has_publisher : 1;
+ guint has_comment : 1;
+ guint has_generator : 1;
+ guint has_word_count : 1;
+ guint has_page_count : 1;
+ guint has_content_created : 1;
} ODTMetadataParseInfo;
typedef struct {
@@ -163,7 +170,7 @@ tracker_extract_get_metadata (TrackerExtractInfo *extract_info)
{
TrackerSparqlBuilder *metadata;
TrackerConfig *config;
- ODTMetadataParseInfo info;
+ ODTMetadataParseInfo info = { 0 };
ODTFileType file_type;
GFile *file;
gchar *uri;
@@ -201,7 +208,6 @@ tracker_extract_get_metadata (TrackerExtractInfo *extract_info)
info.metadata = metadata;
info.current = ODT_TAG_TYPE_UNKNOWN;
info.uri = uri;
- info.title_already_set = FALSE;
/* Create parsing context */
context = g_markup_parse_context_new (&parser, 0, &info, NULL);
@@ -263,11 +269,23 @@ xml_start_element_handler_metadata (GMarkupParseContext *context,
for (a = attribute_names, v = attribute_values; *a; ++a, ++v) {
if (g_ascii_strcasecmp (*a, "meta:word-count") == 0) {
- tracker_sparql_builder_predicate (metadata, "nfo:wordCount");
- tracker_sparql_builder_object_unvalidated (metadata, *v);
+ if (data->has_word_count) {
+ g_warning ("Avoiding additional word count (%s) in OASIS document
'%s'",
+ *v, data->uri);
+ } else {
+ data->has_word_count = TRUE;
+ tracker_sparql_builder_predicate (metadata, "nfo:wordCount");
+ tracker_sparql_builder_object_unvalidated (metadata, *v);
+ }
} else if (g_ascii_strcasecmp (*a, "meta:page-count") == 0) {
- tracker_sparql_builder_predicate (metadata, "nfo:pageCount");
- tracker_sparql_builder_object_unvalidated (metadata, *v);
+ if (data->has_page_count) {
+ g_warning ("Avoiding additional page count (%s) in OASIS document
'%s'",
+ *v, data->uri);
+ } else {
+ data->has_page_count = TRUE;
+ tracker_sparql_builder_predicate (metadata, "nfo:pageCount");
+ tracker_sparql_builder_object_unvalidated (metadata, *v);
+ }
}
}
@@ -311,31 +329,43 @@ xml_text_handler_metadata (GMarkupParseContext *context,
switch (data->current) {
case ODT_TAG_TYPE_TITLE:
- if (data->title_already_set) {
+ if (data->has_title) {
g_warning ("Avoiding additional title (%s) in OASIS document '%s'",
text, data->uri);
} else {
- data->title_already_set = TRUE;
+ data->has_title = TRUE;
tracker_sparql_builder_predicate (metadata, "nie:title");
tracker_sparql_builder_object_unvalidated (metadata, text);
}
break;
case ODT_TAG_TYPE_SUBJECT:
- tracker_sparql_builder_predicate (metadata, "nie:subject");
- tracker_sparql_builder_object_unvalidated (metadata, text);
+ if (data->has_subject) {
+ g_warning ("Avoiding additional subject (%s) in OASIS document '%s'",
+ text, data->uri);
+ } else {
+ data->has_subject = TRUE;
+ tracker_sparql_builder_predicate (metadata, "nie:subject");
+ tracker_sparql_builder_object_unvalidated (metadata, text);
+ }
break;
case ODT_TAG_TYPE_AUTHOR:
- tracker_sparql_builder_predicate (metadata, "nco:publisher");
+ if (data->has_publisher) {
+ g_warning ("Avoiding additional publisher (%s) in OASIS document '%s'",
+ text, data->uri);
+ } else {
+ data->has_publisher = TRUE;
+ tracker_sparql_builder_predicate (metadata, "nco:publisher");
- tracker_sparql_builder_object_blank_open (metadata);
- tracker_sparql_builder_predicate (metadata, "a");
- tracker_sparql_builder_object (metadata, "nco:Contact");
+ tracker_sparql_builder_object_blank_open (metadata);
+ tracker_sparql_builder_predicate (metadata, "a");
+ tracker_sparql_builder_object (metadata, "nco:Contact");
- tracker_sparql_builder_predicate (metadata, "nco:fullname");
- tracker_sparql_builder_object_unvalidated (metadata, text);
- tracker_sparql_builder_object_blank_close (metadata);
+ tracker_sparql_builder_predicate (metadata, "nco:fullname");
+ tracker_sparql_builder_object_unvalidated (metadata, text);
+ tracker_sparql_builder_object_blank_close (metadata);
+ }
break;
case ODT_TAG_TYPE_KEYWORDS: {
@@ -357,22 +387,43 @@ xml_text_handler_metadata (GMarkupParseContext *context,
}
case ODT_TAG_TYPE_COMMENTS:
- tracker_sparql_builder_predicate (metadata, "nie:comment");
- tracker_sparql_builder_object_unvalidated (metadata, text);
+ if (data->has_comment) {
+ g_warning ("Avoiding additional comment (%s) in OASIS document '%s'",
+ text, data->uri);
+ } else {
+ data->has_comment = TRUE;
+ tracker_sparql_builder_predicate (metadata, "nie:comment");
+ tracker_sparql_builder_object_unvalidated (metadata, text);
+ }
break;
case ODT_TAG_TYPE_CREATED:
- date = tracker_date_guess (text);
- if (date) {
- tracker_sparql_builder_predicate (metadata, "nie:contentCreated");
- tracker_sparql_builder_object_unvalidated (metadata, date);
- g_free (date);
+ if (data->has_content_created) {
+ g_warning ("Avoiding additional creation time (%s) in OASIS document '%s'",
+ text, data->uri);
+ } else {
+ date = tracker_date_guess (text);
+ if (date) {
+ data->has_content_created = TRUE;
+ tracker_sparql_builder_predicate (metadata, "nie:contentCreated");
+ tracker_sparql_builder_object_unvalidated (metadata, date);
+ g_free (date);
+ } else {
+ g_warning ("Could not parse creation time (%s) in OASIS document '%s'",
+ text, data->uri);
+ }
}
break;
case ODT_TAG_TYPE_GENERATOR:
- tracker_sparql_builder_predicate (metadata, "nie:generator");
- tracker_sparql_builder_object_unvalidated (metadata, text);
+ if (data->has_generator) {
+ g_warning ("Avoiding additional creation time (%s) in OASIS document '%s'",
+ text, data->uri);
+ } else {
+ data->has_generator = TRUE;
+ tracker_sparql_builder_predicate (metadata, "nie:generator");
+ tracker_sparql_builder_object_unvalidated (metadata, text);
+ }
break;
default:
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]