[tracker-miners/wip/carlosg/fix-flaky-test] tracker-miner-fs: Preserve URN of nfo:Folders across updates



commit 311253bfe040c3e4111958928b1574b41f124fa2
Author: Carlos Garnacho <carlosg gnome org>
Date:   Sat Jun 6 19:43:52 2020 +0200

    tracker-miner-fs: Preserve URN of nfo:Folders across updates
    
    Commit ad34c7d673 tried to handle updates on folders like we do with
    any other nie:InformationElement (i.e. deleting it and waiting for
    a fresh one), with the added trickiness that child files must point
    to the nfo:Folder, old and new.
    
    This worked for the most part, but still there was a race condition
    it could not handle, and triggered randomly by the corresponding test
    (commit 8d1de376d), it goes like:
    
    - child file is updated
    - update is requested on the parent (indexed) folder
    - both things go in that order up to TrackerMiner::process_file(),
      the order is right and neither task blocks, both get processed
      asynchronously "simultaneously"
    - In the async g_file_query_info() processing, both tasks flip
      order.
    - The task for the parent folder completes first, it changes the
      nfo:belongsToContainer relation of all children, but the new file
      is still not inserted. The SPARQL is queued up.
    - The task for the child file completes second, it relates to the
      parent folder, but when looking up its URN the old one is found
      (as we didn't process the SPARQL for the parent folder yet).
    
    This order happening naturally in the event queue would result in
    the queue handling "blocked" until the parent folder is processed,
    here we go with it, and end up triggering random test failures like:
    
    Traceback (most recent call last):
       File "/builds/GNOME/tracker-miners/tests/functional-tests/miner-basic.py", line 378, in 
test_10_folder_update
         self.assertEqual(self.__get_parent_urn(document),
     AssertionError: 'urn:bnode:87544fe5-06e6-487e-93a7-cc8f72e544af' != 
'urn:bnode:a9c70d9c-c365-4fe6-8048-889eb6d9ca9e'
     - urn:bnode:87544fe5-06e6-487e-93a7-cc8f72e544af
     + urn:bnode:a9c70d9c-c365-4fe6-8048-889eb6d9ca9e
    
    Actually, dropping the nie:InformationElement is not that crucial
    with nfo:Folders (since we know they'll become a nfo:Folder again),
    preserving those avoids the need to update nfo:belongsToContainer
    in children altogether, and makes the race condition moot (as the
    URN is preserved across the update).
    
    Also, update the test to the new expectatives, the nfo:Folder URN
    is no longer different.

 src/miners/fs/tracker-miner-files.c   | 38 +++++++++--------------------------
 tests/functional-tests/miner-basic.py |  2 --
 2 files changed, 10 insertions(+), 30 deletions(-)
---
diff --git a/src/miners/fs/tracker-miner-files.c b/src/miners/fs/tracker-miner-files.c
index 3054c3135..0d618fe49 100644
--- a/src/miners/fs/tracker-miner-files.c
+++ b/src/miners/fs/tracker-miner-files.c
@@ -2299,7 +2299,7 @@ process_file_cb (GObject      *object,
        TrackerResource *resource, *element_resource;
        ProcessFileData *data;
        const gchar *mime_type;
-       gchar *parent_urn, *update_relations_sparql = NULL;
+       gchar *parent_urn;
        gchar *delete_properties_sparql = NULL, *mount_point_sparql;
        GFileInfo *file_info;
        guint64 time_;
@@ -2338,8 +2338,11 @@ process_file_cb (GObject      *object,
        mime_type = g_file_info_get_content_type (file_info);
 
        data->mime_type = g_strdup (mime_type);
+       is_directory = (g_file_info_get_file_type (file_info) == G_FILE_TYPE_DIRECTORY ?
+                       TRUE : FALSE);
 
-       if (tracker_miner_fs_get_urn (TRACKER_MINER_FS (data->miner), file)) {
+       if (!is_directory &&
+           tracker_miner_fs_get_urn (TRACKER_MINER_FS (data->miner), file)) {
                /* Update: delete all information elements for the given data object
                 * and delete dataSources, so we ensure the file is extracted again.
                 */
@@ -2363,9 +2366,6 @@ process_file_cb (GObject      *object,
 
        tracker_resource_add_uri (resource, "rdf:type", "nfo:FileDataObject");
 
-       is_directory = (g_file_info_get_file_type (file_info) == G_FILE_TYPE_DIRECTORY ?
-                       TRUE : FALSE);
-
        parent = g_file_get_parent (file);
        parent_urn = tracker_miner_fs_query_urn (TRACKER_MINER_FS (data->miner), parent);
        g_object_unref (parent);
@@ -2399,28 +2399,11 @@ process_file_cb (GObject      *object,
                                                                   mime_type,
                                                                   is_directory);
 
-       if (element_resource && is_directory &&
+       if (is_directory &&
            tracker_miner_fs_get_urn (TRACKER_MINER_FS (data->miner), file)) {
-               /* Directories need to have the child nfo:FileDataObjects
-                * updated to point to the new nfo:Folder.
-                */
-               update_relations_sparql =
-                       g_strdup_printf ("DELETE {"
-                                        "  GRAPH " DEFAULT_GRAPH " {"
-                                        "    ?u nfo:belongsToContainer ?ie "
-                                        "  }"
-                                        "} INSERT {"
-                                        "  GRAPH " DEFAULT_GRAPH " {"
-                                        "    ?u nfo:belongsToContainer %s "
-                                        "  }"
-                                        "} WHERE {"
-                                        "  GRAPH " DEFAULT_GRAPH " {"
-                                        "    ?u nfo:belongsToContainer ?ie . "
-                                        "    ?ie nie:isStoredAs <%s> "
-                                        "  }"
-                                        "}",
-                                        tracker_resource_get_identifier (element_resource),
-                                        uri);
+               /* Preserve URN for nfo:Folders */
+               tracker_resource_set_identifier (element_resource,
+                                                tracker_miner_fs_get_urn (TRACKER_MINER_FS (data->miner), 
file));
        }
 
        mount_point_sparql = update_mount_point_sparql (data);
@@ -2437,8 +2420,7 @@ process_file_cb (GObject      *object,
                                                                      DEFAULT_GRAPH);
        }
 
-       sparql_str = g_strdup_printf ("%s %s %s %s %s",
-                                     update_relations_sparql ? update_relations_sparql : "",
+       sparql_str = g_strdup_printf ("%s %s %s %s",
                                      delete_properties_sparql ? delete_properties_sparql : "",
                                      sparql_update_str,
                                      ie_update_str ? ie_update_str : "",
diff --git a/tests/functional-tests/miner-basic.py b/tests/functional-tests/miner-basic.py
index d11332188..396d7c662 100755
--- a/tests/functional-tests/miner-basic.py
+++ b/tests/functional-tests/miner-basic.py
@@ -370,8 +370,6 @@ class MinerCrawlTest(fixtures.TrackerMinerTest):
             self.miner_fs.index_file(directory_uri)
 
         new_urn = self.__get_file_urn(directory)
-        # After the update, the InformationElement should be brand new
-        assert(urn != new_urn)
         # Ensure that children remain consistent, old and new ones
         self.assertEqual(new_urn,
                          self.__get_parent_urn(self.path("test-monitored/file1.txt")))


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]