[tracker] tracker-extract: Protect all single valued properties in html extractor



commit 3059043f9547a2bc56251b0ebf3e4d439b6939b1
Author: Carlos Garnacho <carlosg gnome org>
Date:   Mon Mar 14 01:06:44 2016 +0100

    tracker-extract: Protect all single valued properties in html extractor
    
    In case of malformed documents or unexpected input, avoid creating sparql
    that will break cardinality constraints, and ignore those, because there's
    so much broken html around.

 src/tracker-extract/tracker-extract-html.c |    8 ++++++--
 1 files changed, 6 insertions(+), 2 deletions(-)
---
diff --git a/src/tracker-extract/tracker-extract-html.c b/src/tracker-extract/tracker-extract-html.c
index b03a4a5..29033c6 100644
--- a/src/tracker-extract/tracker-extract-html.c
+++ b/src/tracker-extract/tracker-extract-html.c
@@ -39,6 +39,8 @@ typedef struct {
        TrackerSparqlBuilder *metadata;
        tag_type current;
        guint in_body : 1;
+       guint has_license : 1;
+       guint has_description : 1;
        GString *title;
        GString *plain_text;
        guint n_bytes_remaining;
@@ -109,9 +111,10 @@ parser_start_element (void           *data,
 
                        href = lookup_attribute (attrs, "href");
 
-                       if (href) {
+                       if (href && !pd->has_license) {
                                tracker_sparql_builder_predicate (pd->metadata, "nie:license");
                                tracker_sparql_builder_object_unvalidated (pd->metadata, href);
+                               pd->has_license = TRUE;
                        }
                }
        } else if (g_ascii_strcasecmp (name, "title") == 0) {
@@ -138,9 +141,10 @@ parser_start_element (void           *data,
 
                        desc = lookup_attribute (attrs,"content");
 
-                       if (desc) {
+                       if (desc && !pd->has_description) {
                                tracker_sparql_builder_predicate (pd->metadata, "nie:description");
                                tracker_sparql_builder_object_unvalidated (pd->metadata, desc);
+                               pd->has_description = TRUE;
                        }
                }
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]