[tracker] tracker-extract: Protect all single valued properties in html extractor
- From: Carlos Garnacho <carlosg src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker] tracker-extract: Protect all single valued properties in html extractor
- Date: Mon, 14 Mar 2016 22:27:44 +0000 (UTC)
commit 3059043f9547a2bc56251b0ebf3e4d439b6939b1
Author: Carlos Garnacho <carlosg gnome org>
Date: Mon Mar 14 01:06:44 2016 +0100
tracker-extract: Protect all single valued properties in html extractor
In case of malformed documents or unexpected input, avoid creating sparql
that will break cardinality constraints, and ignore those, because there's
so much broken html around.
src/tracker-extract/tracker-extract-html.c | 8 ++++++--
1 files changed, 6 insertions(+), 2 deletions(-)
---
diff --git a/src/tracker-extract/tracker-extract-html.c b/src/tracker-extract/tracker-extract-html.c
index b03a4a5..29033c6 100644
--- a/src/tracker-extract/tracker-extract-html.c
+++ b/src/tracker-extract/tracker-extract-html.c
@@ -39,6 +39,8 @@ typedef struct {
TrackerSparqlBuilder *metadata;
tag_type current;
guint in_body : 1;
+ guint has_license : 1;
+ guint has_description : 1;
GString *title;
GString *plain_text;
guint n_bytes_remaining;
@@ -109,9 +111,10 @@ parser_start_element (void *data,
href = lookup_attribute (attrs, "href");
- if (href) {
+ if (href && !pd->has_license) {
tracker_sparql_builder_predicate (pd->metadata, "nie:license");
tracker_sparql_builder_object_unvalidated (pd->metadata, href);
+ pd->has_license = TRUE;
}
}
} else if (g_ascii_strcasecmp (name, "title") == 0) {
@@ -138,9 +141,10 @@ parser_start_element (void *data,
desc = lookup_attribute (attrs,"content");
- if (desc) {
+ if (desc && !pd->has_description) {
tracker_sparql_builder_predicate (pd->metadata, "nie:description");
tracker_sparql_builder_object_unvalidated (pd->metadata, desc);
+ pd->has_description = TRUE;
}
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]