[tracker-miners/wip/carlosg/move-perf] tracker-miner-fs: Do not use unrestricted queries moving files




commit 5bb579e9a309d55108a5cbceef79220fa664edc4
Author: Carlos Garnacho <carlosg gnome org>
Date:   Sat Aug 6 18:08:12 2022 +0200

    tracker-miner-fs: Do not use unrestricted queries moving files
    
    When a directory is renamed and a recursive renaming of the
    nfo:FileDataObjects starts, we use very widely generic queries to
    mirror all the data from the old nfo:FileDataObjects to the new
    ones with the updated URIs.
    
    This { ?s ?p ?o } query results in the triples virtual table being
    queried in the most generic way possible, thus all values are
    returned. This makes the updates that handle these recursive moves
    dependent on the number of elements in the database.
    
    The use of unrestricted queries is actually discouraged in Tracker
    documentation [1], so avoid falling in our own foretold mistakes.
    Since we do know the data we expect to exist in a nfo:FileDataObject
    in both the tracker:FileSystem and content graphs, make the relevant
    queries completely specific about the properties being moved from
    the old nfo:FileDataObject to the new one.
    
    The performance improvement when handling move events goes from
    barely noticeable to massive, depending on the database size. For
    ~130K elements indexed, moving a directory containing 180 files
    went down from 48-50s to ~2s.
    
    [1] 
https://gnome.pages.gitlab.gnome.org/tracker/docs/developer/performance.html#avoid-queries-with-unrestricted-predicates
    
    Closes: https://gitlab.gnome.org/GNOME/tracker-miners/-/issues/228

 src/miners/fs/tracker-miner-files.c | 66 +++++++++++++++++++++++++++++--------
 1 file changed, 52 insertions(+), 14 deletions(-)
---
diff --git a/src/miners/fs/tracker-miner-files.c b/src/miners/fs/tracker-miner-files.c
index 5b3cfd62d..47edc5562 100644
--- a/src/miners/fs/tracker-miner-files.c
+++ b/src/miners/fs/tracker-miner-files.c
@@ -2141,6 +2141,36 @@ miner_files_move_file (TrackerMinerFS      *fs,
                }
        }
 
+#define FS_PROPERTIES \
+       "  nfo:fileSize ?fileSize ;" \
+       "  nfo:fileLastModified ?fileLastModified ;" \
+       "  nfo:fileLastAccessed ?fileLastAccessed ;" \
+       "  nfo:fileCreated ?fileCreated ;" \
+       "  nie:dataSource ?dataSource ;" \
+       "  nie:interpretedAs ?interpretedAs ;" \
+       "  tracker:extractorHash ?extractorHash ."
+#define FS_WHERE \
+       "  ?f a nfo:FileDataObject ;" \
+       "    nfo:fileSize ?fileSize ;" \
+       "    nfo:fileLastModified ?fileLastModified ;" \
+       "    nfo:fileLastAccessed ?fileLastAccessed ." \
+       "  OPTIONAL { ?f nfo:fileCreated ?fileCreated } ." \
+       "  OPTIONAL { ?f nie:dataSource ?dataSource } ." \
+       "  OPTIONAL { ?f nie:interpretedAs ?interpretedAs } ." \
+       "  OPTIONAL { ?f tracker:extractorHash ?extractorHash } ."
+
+#define GRAPH_PROPERTIES \
+       "  nfo:fileSize ?fileSize ;" \
+       "  nfo:fileLastModified ?fileLastModified ;" \
+       "  nie:dataSource ?dataSource ;" \
+       "  nie:interpretedAs ?interpretedAs ."
+#define GRAPH_WHERE \
+       "  ?f a nfo:FileDataObject ; " \
+       "    nfo:fileSize ?fileSize ;" \
+       "    nfo:fileLastModified ?fileLastModified ;" \
+       "  OPTIONAL { ?f nie:dataSource ?dataSource } ." \
+       "  OPTIONAL { ?f nie:interpretedAs ?interpretedAs } ."
+
        /* Update nie:isStoredAs in the nie:InformationElement */
        g_string_append_printf (sparql,
                                "DELETE { "
@@ -2167,10 +2197,10 @@ miner_files_move_file (TrackerMinerFS      *fs,
                                "       nfo:fileName \"%s\" ; "
                                "       nie:url \"%s\" "
                                "       %s ; "
-                               "       ?p ?o . "
+                               FS_PROPERTIES
                                "} WHERE { "
-                               "  <%s> ?p ?o ; "
-                               "  FILTER (?p != nfo:fileName && ?p != nie:url && ?p != 
nfo:belongsToContainer) . "
+                               "  BIND (<%s> AS ?f) ."
+                               FS_WHERE
                                "} ",
                                source_uri,
                                uri, display_name, uri, container_clause,
@@ -2184,14 +2214,14 @@ miner_files_move_file (TrackerMinerFS      *fs,
                                "} INSERT {"
                                "  GRAPH ?g {"
                                "    <%s> a nfo:FileDataObject ; "
-                               "         nfo:fileName \"%s\" ; "
-                               "         ?p ?o "
+                               "      nfo:fileName \"%s\" ; "
+                               GRAPH_PROPERTIES
                                "  }"
                                "} WHERE {"
                                "  GRAPH ?g {"
-                               "    <%s> ?p ?o "
+                               "    BIND (<%s> AS ?f) ."
+                               GRAPH_WHERE
                                "  }"
-                               "  FILTER (?p != nfo:fileName) . "
                                "}",
                                source_uri, uri, display_name, source_uri);
        g_free (container_clause);
@@ -2224,13 +2254,15 @@ miner_files_move_file (TrackerMinerFS      *fs,
                                        "} INSERT { "
                                        "  ?new_url a nfo:FileDataObject ; "
                                        "       nie:url ?new_url ; "
-                                       "       ?p ?o . "
+                                       "       nfo:belongsToContainer ?belongsToContainer ;"
+                                       "       nfo:fileName ?fileName ;"
+                                       FS_PROPERTIES
                                        "} WHERE { "
-                                       "  ?f a nfo:FileDataObject ;"
-                                       "     ?p ?o . "
+                                       FS_WHERE
+                                       "  ?f nfo:fileName ?fileName ;"
+                                       "  OPTIONAL { ?f nfo:belongsToContainer ?belongsToContainer } ."
                                        "  BIND (CONCAT (\"%s/\", SUBSTR (STR (?f), STRLEN (\"%s/\") + 1)) AS 
?new_url) ."
                                        "  FILTER (STRSTARTS (STR (?f), \"%s/\")) . "
-                                       "  FILTER (?p != nie:url) . "
                                        "} ",
                                        uri, source_uri, source_uri);
                /* Update nfo:FileDataObject in data graphs */
@@ -2242,12 +2274,13 @@ miner_files_move_file (TrackerMinerFS      *fs,
                                        "} INSERT {"
                                        "  GRAPH ?g {"
                                        "    ?new_url a nfo:FileDataObject ; "
-                                       "             ?p ?o ."
+                                       "      nfo:fileName ?fileName ;"
+                                       GRAPH_PROPERTIES
                                        "  }"
                                        "} WHERE {"
                                        "  GRAPH ?g {"
-                                       "    ?f a nfo:FileDataObject ;"
-                                       "       ?p ?o ."
+                                       GRAPH_WHERE
+                                       "    ?f nfo:fileName ?fileName ."
                                        "    BIND (CONCAT (\"%s/\", SUBSTR (STR (?f), STRLEN (\"%s/\") + 1)) 
AS ?new_url) ."
                                        "    FILTER (STRSTARTS (STR (?f), \"%s/\")) . "
                                        "  }"
@@ -2255,6 +2288,11 @@ miner_files_move_file (TrackerMinerFS      *fs,
                                        uri, source_uri, source_uri);
        }
 
+#undef FS_PROPERTIES
+#undef FS_WHERE
+#undef GRAPH_PROPERTIES
+#undef GRAPH_WHERE
+
        tracker_sparql_buffer_push_sparql (buffer, file, sparql->str);
 
        g_free (uri);


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]