[tracker-miners/wip/carlosg/move-perf] tracker-miner-fs: Do not use unrestricted queries moving files
- From: Carlos Garnacho <carlosg src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker-miners/wip/carlosg/move-perf] tracker-miner-fs: Do not use unrestricted queries moving files
- Date: Sat, 6 Aug 2022 18:19:51 +0000 (UTC)
commit 5bb579e9a309d55108a5cbceef79220fa664edc4
Author: Carlos Garnacho <carlosg gnome org>
Date: Sat Aug 6 18:08:12 2022 +0200
tracker-miner-fs: Do not use unrestricted queries moving files
When a directory is renamed and a recursive renaming of the
nfo:FileDataObjects starts, we use very widely generic queries to
mirror all the data from the old nfo:FileDataObjects to the new
ones with the updated URIs.
This { ?s ?p ?o } query results in the triples virtual table being
queried in the most generic way possible, thus all values are
returned. This makes the updates that handle these recursive moves
dependent on the number of elements in the database.
The use of unrestricted queries is actually discouraged in Tracker
documentation [1], so avoid falling in our own foretold mistakes.
Since we do know the data we expect to exist in a nfo:FileDataObject
in both the tracker:FileSystem and content graphs, make the relevant
queries completely specific about the properties being moved from
the old nfo:FileDataObject to the new one.
The performance improvement when handling move events goes from
barely noticeable to massive, depending on the database size. For
~130K elements indexed, moving a directory containing 180 files
went down from 48-50s to ~2s.
[1]
https://gnome.pages.gitlab.gnome.org/tracker/docs/developer/performance.html#avoid-queries-with-unrestricted-predicates
Closes: https://gitlab.gnome.org/GNOME/tracker-miners/-/issues/228
src/miners/fs/tracker-miner-files.c | 66 +++++++++++++++++++++++++++++--------
1 file changed, 52 insertions(+), 14 deletions(-)
---
diff --git a/src/miners/fs/tracker-miner-files.c b/src/miners/fs/tracker-miner-files.c
index 5b3cfd62d..47edc5562 100644
--- a/src/miners/fs/tracker-miner-files.c
+++ b/src/miners/fs/tracker-miner-files.c
@@ -2141,6 +2141,36 @@ miner_files_move_file (TrackerMinerFS *fs,
}
}
+#define FS_PROPERTIES \
+ " nfo:fileSize ?fileSize ;" \
+ " nfo:fileLastModified ?fileLastModified ;" \
+ " nfo:fileLastAccessed ?fileLastAccessed ;" \
+ " nfo:fileCreated ?fileCreated ;" \
+ " nie:dataSource ?dataSource ;" \
+ " nie:interpretedAs ?interpretedAs ;" \
+ " tracker:extractorHash ?extractorHash ."
+#define FS_WHERE \
+ " ?f a nfo:FileDataObject ;" \
+ " nfo:fileSize ?fileSize ;" \
+ " nfo:fileLastModified ?fileLastModified ;" \
+ " nfo:fileLastAccessed ?fileLastAccessed ." \
+ " OPTIONAL { ?f nfo:fileCreated ?fileCreated } ." \
+ " OPTIONAL { ?f nie:dataSource ?dataSource } ." \
+ " OPTIONAL { ?f nie:interpretedAs ?interpretedAs } ." \
+ " OPTIONAL { ?f tracker:extractorHash ?extractorHash } ."
+
+#define GRAPH_PROPERTIES \
+ " nfo:fileSize ?fileSize ;" \
+ " nfo:fileLastModified ?fileLastModified ;" \
+ " nie:dataSource ?dataSource ;" \
+ " nie:interpretedAs ?interpretedAs ."
+#define GRAPH_WHERE \
+ " ?f a nfo:FileDataObject ; " \
+ " nfo:fileSize ?fileSize ;" \
+ " nfo:fileLastModified ?fileLastModified ;" \
+ " OPTIONAL { ?f nie:dataSource ?dataSource } ." \
+ " OPTIONAL { ?f nie:interpretedAs ?interpretedAs } ."
+
/* Update nie:isStoredAs in the nie:InformationElement */
g_string_append_printf (sparql,
"DELETE { "
@@ -2167,10 +2197,10 @@ miner_files_move_file (TrackerMinerFS *fs,
" nfo:fileName \"%s\" ; "
" nie:url \"%s\" "
" %s ; "
- " ?p ?o . "
+ FS_PROPERTIES
"} WHERE { "
- " <%s> ?p ?o ; "
- " FILTER (?p != nfo:fileName && ?p != nie:url && ?p !=
nfo:belongsToContainer) . "
+ " BIND (<%s> AS ?f) ."
+ FS_WHERE
"} ",
source_uri,
uri, display_name, uri, container_clause,
@@ -2184,14 +2214,14 @@ miner_files_move_file (TrackerMinerFS *fs,
"} INSERT {"
" GRAPH ?g {"
" <%s> a nfo:FileDataObject ; "
- " nfo:fileName \"%s\" ; "
- " ?p ?o "
+ " nfo:fileName \"%s\" ; "
+ GRAPH_PROPERTIES
" }"
"} WHERE {"
" GRAPH ?g {"
- " <%s> ?p ?o "
+ " BIND (<%s> AS ?f) ."
+ GRAPH_WHERE
" }"
- " FILTER (?p != nfo:fileName) . "
"}",
source_uri, uri, display_name, source_uri);
g_free (container_clause);
@@ -2224,13 +2254,15 @@ miner_files_move_file (TrackerMinerFS *fs,
"} INSERT { "
" ?new_url a nfo:FileDataObject ; "
" nie:url ?new_url ; "
- " ?p ?o . "
+ " nfo:belongsToContainer ?belongsToContainer ;"
+ " nfo:fileName ?fileName ;"
+ FS_PROPERTIES
"} WHERE { "
- " ?f a nfo:FileDataObject ;"
- " ?p ?o . "
+ FS_WHERE
+ " ?f nfo:fileName ?fileName ;"
+ " OPTIONAL { ?f nfo:belongsToContainer ?belongsToContainer } ."
" BIND (CONCAT (\"%s/\", SUBSTR (STR (?f), STRLEN (\"%s/\") + 1)) AS
?new_url) ."
" FILTER (STRSTARTS (STR (?f), \"%s/\")) . "
- " FILTER (?p != nie:url) . "
"} ",
uri, source_uri, source_uri);
/* Update nfo:FileDataObject in data graphs */
@@ -2242,12 +2274,13 @@ miner_files_move_file (TrackerMinerFS *fs,
"} INSERT {"
" GRAPH ?g {"
" ?new_url a nfo:FileDataObject ; "
- " ?p ?o ."
+ " nfo:fileName ?fileName ;"
+ GRAPH_PROPERTIES
" }"
"} WHERE {"
" GRAPH ?g {"
- " ?f a nfo:FileDataObject ;"
- " ?p ?o ."
+ GRAPH_WHERE
+ " ?f nfo:fileName ?fileName ."
" BIND (CONCAT (\"%s/\", SUBSTR (STR (?f), STRLEN (\"%s/\") + 1))
AS ?new_url) ."
" FILTER (STRSTARTS (STR (?f), \"%s/\")) . "
" }"
@@ -2255,6 +2288,11 @@ miner_files_move_file (TrackerMinerFS *fs,
uri, source_uri, source_uri);
}
+#undef FS_PROPERTIES
+#undef FS_WHERE
+#undef GRAPH_PROPERTIES
+#undef GRAPH_WHERE
+
tracker_sparql_buffer_push_sparql (buffer, file, sparql->str);
g_free (uri);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]