[tracker] tracker-extract: Use safer method to insert tags in PDF module
- From: Carlos Garnacho <carlosg src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker] tracker-extract: Use safer method to insert tags in PDF module
- Date: Mon, 14 Mar 2016 22:28:25 +0000 (UTC)
commit 38bdc9d9e91273802ebb95111ed7ca745b66c537
Author: Carlos Garnacho <carlosg gnome org>
Date: Mon Mar 14 02:51:08 2016 +0100
tracker-extract: Use safer method to insert tags in PDF module
The current way of inserting the nao:hasTag relationship on the
extracted file involves one join operation per tag being inserted.
This has performance implications, plus we can feasibly hit the
sqlite limit of 64 tables in joins.
Instead insert the tags in separate inserts, that will be as fast
as it gets, plus there's no limit in the number of tags.
src/tracker-extract/tracker-extract-pdf.c | 61 +++++++++++++----------------
1 files changed, 27 insertions(+), 34 deletions(-)
---
diff --git a/src/tracker-extract/tracker-extract-pdf.c b/src/tracker-extract/tracker-extract-pdf.c
index b750678..6402ddd 100644
--- a/src/tracker-extract/tracker-extract-pdf.c
+++ b/src/tracker-extract/tracker-extract-pdf.c
@@ -298,6 +298,7 @@ tracker_extract_get_metadata (TrackerExtractInfo *info)
GError *error = NULL;
TrackerSparqlBuilder *metadata, *preupdate;
const gchar *graph;
+ const gchar *urn;
TrackerXmpData *xd = NULL;
PDFData pd = { 0 }; /* actual data */
PDFData md = { 0 }; /* for merging */
@@ -306,7 +307,6 @@ tracker_extract_get_metadata (TrackerExtractInfo *info)
gchar *content, *uri;
guint n_bytes;
GPtrArray *keywords;
- GString *where;
guint i;
GFile *file;
gchar *filename;
@@ -318,6 +318,7 @@ tracker_extract_get_metadata (TrackerExtractInfo *info)
metadata = tracker_extract_info_get_metadata_builder (info);
preupdate = tracker_extract_info_get_preupdate_builder (info);
graph = tracker_extract_info_get_graph (info);
+ urn = tracker_extract_info_get_urn (info);
file = tracker_extract_info_get_file (info);
filename = g_file_get_path (file);
@@ -413,7 +414,7 @@ tracker_extract_get_metadata (TrackerExtractInfo *info)
pd.creation_date = tracker_date_to_string ((time_t) creation_date);
}
- keywords = g_ptr_array_new ();
+ keywords = g_ptr_array_new_with_free_func ((GDestroyNotify) g_free);
if (xml && *xml &&
(xd = tracker_xmp_new (xml, strlen (xml), uri)) != NULL) {
@@ -680,34 +681,22 @@ tracker_extract_get_metadata (TrackerExtractInfo *info)
write_pdf_data (pd, metadata, keywords);
}
- where = g_string_new ("");
-
for (i = 0; i < keywords->len; i++) {
- gchar *p, *escaped, *var;
+ gchar *escaped, *subject;
+ const gchar *p;
p = g_ptr_array_index (keywords, i);
escaped = tracker_sparql_escape_string (p);
- var = g_strdup_printf ("tag%d", i + 1);
+ subject = g_strdup_printf ("_:tag%d", i + 1);
/* ensure tag with specified label exists */
- tracker_sparql_builder_append (preupdate, "INSERT { ");
-
- if (graph) {
- tracker_sparql_builder_append (preupdate, "GRAPH <");
- tracker_sparql_builder_append (preupdate, graph);
- tracker_sparql_builder_append (preupdate, "> { ");
- }
-
- tracker_sparql_builder_append (preupdate,
- "_:tag a nao:Tag ; nao:prefLabel \"");
- tracker_sparql_builder_append (preupdate, escaped);
- tracker_sparql_builder_append (preupdate, "\"");
-
- if (graph) {
- tracker_sparql_builder_append (preupdate, " } ");
- }
-
- tracker_sparql_builder_append (preupdate, " }\n");
+ tracker_sparql_builder_insert_open (preupdate, graph);
+ tracker_sparql_builder_subject (preupdate, subject);
+ tracker_sparql_builder_predicate (preupdate, "a");
+ tracker_sparql_builder_object (preupdate, "nao:Tag");
+ tracker_sparql_builder_predicate (preupdate, "nao:prefLabel");
+ tracker_sparql_builder_object_unvalidated (preupdate, escaped);
+ tracker_sparql_builder_insert_close (preupdate);
tracker_sparql_builder_append (preupdate,
"WHERE { FILTER (NOT EXISTS { "
"?tag a nao:Tag ; nao:prefLabel \"");
@@ -716,20 +705,24 @@ tracker_extract_get_metadata (TrackerExtractInfo *info)
"\" }) }\n");
/* associate file with tag */
- tracker_sparql_builder_predicate (metadata, "nao:hasTag");
- tracker_sparql_builder_object_variable (metadata, var);
-
- g_string_append_printf (where, "?%s a nao:Tag ; nao:prefLabel \"%s\" .\n", var, escaped);
-
- g_free (var);
+ tracker_sparql_builder_insert_open (preupdate, graph);
+ tracker_sparql_builder_subject_iri (preupdate, urn);
+ tracker_sparql_builder_predicate (preupdate, "nao:hasTag");
+ tracker_sparql_builder_object (preupdate, "?tag");
+ tracker_sparql_builder_insert_close (preupdate);
+ tracker_sparql_builder_where_open (preupdate);
+ tracker_sparql_builder_subject (preupdate, "?tag");
+ tracker_sparql_builder_predicate (preupdate, "a");
+ tracker_sparql_builder_object (preupdate, "nao:Tag");
+ tracker_sparql_builder_predicate (preupdate, "nao:prefLabel");
+ tracker_sparql_builder_object_unvalidated (preupdate, escaped);
+ tracker_sparql_builder_where_close (preupdate);
+
+ g_free (subject);
g_free (escaped);
- g_free (p);
}
g_ptr_array_free (keywords, TRUE);
- tracker_extract_info_set_where_clause (info, where->str);
- g_string_free (where, TRUE);
-
tracker_sparql_builder_predicate (metadata, "nfo:pageCount");
tracker_sparql_builder_object_int64 (metadata, poppler_document_get_n_pages (document));
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]