[tracker] tracker-extract: Protect all single valued properties in abiword extractor



commit 3c420e0383b6e17416ba0296e80befeefe78cb80
Author: Carlos Garnacho <carlosg gnome org>
Date:   Mon Mar 14 01:01:15 2016 +0100

    tracker-extract: Protect all single valued properties in abiword extractor
    
    In case of malformed documents or unexpected input, avoid creating sparql
    that will break cardinality constraints, warn nicely instead.

 src/tracker-extract/tracker-extract-abw.c |   49 +++++++++++++++++++++++-----
 1 files changed, 40 insertions(+), 9 deletions(-)
---
diff --git a/src/tracker-extract/tracker-extract-abw.c b/src/tracker-extract/tracker-extract-abw.c
index a9b8a7b..621e534 100644
--- a/src/tracker-extract/tracker-extract-abw.c
+++ b/src/tracker-extract/tracker-extract-abw.c
@@ -50,9 +50,14 @@ struct AbwParserData {
        TrackerSparqlBuilder *metadata;
        TrackerSparqlBuilder *preupdate;
        GString *content;
+       gchar *uri;
 
        guint cur_tag;
-       guint in_text : 1;
+       guint in_text       : 1;
+       guint has_title     : 1;
+       guint has_subject   : 1;
+       guint has_comment   : 1;
+       guint has_generator : 1;
 };
 
 static void
@@ -99,12 +104,24 @@ abw_parser_text (GMarkupParseContext *context,
 
        switch (data->cur_tag) {
        case ABW_PARSER_TAG_TITLE:
-               tracker_sparql_builder_predicate (data->metadata, "nie:title");
-               tracker_sparql_builder_object_unvalidated (data->metadata, str);
+               if (data->has_title) {
+                       g_warning ("Avoiding additional title (%s) in Abiword document '%s'",
+                                  str, data->uri);
+               } else {
+                       data->has_title = TRUE;
+                       tracker_sparql_builder_predicate (data->metadata, "nie:title");
+                       tracker_sparql_builder_object_unvalidated (data->metadata, str);
+               }
                break;
        case ABW_PARSER_TAG_SUBJECT:
-               tracker_sparql_builder_predicate (data->metadata, "nie:subject");
-               tracker_sparql_builder_object_unvalidated (data->metadata, str);
+               if (data->has_subject) {
+                       g_warning ("Avoiding additional subject (%s) in Abiword document '%s'",
+                                  str, data->uri);
+               } else {
+                       data->has_subject = TRUE;
+                       tracker_sparql_builder_predicate (data->metadata, "nie:subject");
+                       tracker_sparql_builder_object_unvalidated (data->metadata, str);
+               }
                break;
        case ABW_PARSER_TAG_CREATOR:
                tracker_sparql_builder_predicate (data->metadata, "nco:creator");
@@ -118,12 +135,24 @@ abw_parser_text (GMarkupParseContext *context,
                tracker_sparql_builder_object_blank_close (data->metadata);
                break;
        case ABW_PARSER_TAG_DESCRIPTION:
-               tracker_sparql_builder_predicate (data->metadata, "nie:comment");
-               tracker_sparql_builder_object_unvalidated (data->metadata, str);
+               if (data->has_comment) {
+                       g_warning ("Avoiding additional comment (%s) in Abiword document '%s'",
+                                  str, data->uri);
+               } else {
+                       data->has_comment = TRUE;
+                       tracker_sparql_builder_predicate (data->metadata, "nie:comment");
+                       tracker_sparql_builder_object_unvalidated (data->metadata, str);
+               }
                break;
        case ABW_PARSER_TAG_GENERATOR:
-               tracker_sparql_builder_predicate (data->metadata, "nie:generator");
-               tracker_sparql_builder_object_unvalidated (data->metadata, str);
+               if (data->has_generator) {
+                       g_warning ("Avoiding additional generator (%s) in Abiword document '%s'",
+                                  str, data->uri);
+               } else {
+                       data->has_generator = TRUE;
+                       tracker_sparql_builder_predicate (data->metadata, "nie:generator");
+                       tracker_sparql_builder_object_unvalidated (data->metadata, str);
+               }
                break;
        case ABW_PARSER_TAG_KEYWORDS:
        {
@@ -218,6 +247,7 @@ tracker_extract_get_metadata (TrackerExtractInfo *info)
                GMarkupParseContext *context;
                AbwParserData data = { 0 };
 
+               data.uri = g_file_get_uri (f);
                data.metadata = metadata;
                data.preupdate = preupdate;
 
@@ -241,6 +271,7 @@ tracker_extract_get_metadata (TrackerExtractInfo *info)
                }
 
                g_markup_parse_context_free (context);
+               g_free (data.uri);
        }
 
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]