[tracker-miners/wip/carlosg/monitor-event-fixes: 2/3] libtracker-miner: Allow multiple tasks per file




commit 109f524820f51edb4a326d5ad82f50693e1de1f9
Author: Carlos Garnacho <carlosg gnome org>
Date:   Sun Aug 9 14:45:09 2020 +0200

    libtracker-miner: Allow multiple tasks per file
    
    This may happen in situations like browser downloads, where we cannot
    avoid multiple events on the same file. Given the race conditions involved
    in ignoring those, it's better to always process it. And even though we
    could stop processing in these situations (eg. stop in should_wait), there's
    no impediment to letting these several updates go through in the same SPARQL
    update.
    
    So handle properly those situations, instead of spewing a warning and maybe
    falling in those race conditions.

 src/libtracker-miner/tracker-task-pool.c        | 64 +++++++++++++++----------
 tests/libtracker-miner/tracker-task-pool-test.c | 33 +++++++------
 2 files changed, 54 insertions(+), 43 deletions(-)
---
diff --git a/src/libtracker-miner/tracker-task-pool.c b/src/libtracker-miner/tracker-task-pool.c
index be3acb5d1..ee8e09237 100644
--- a/src/libtracker-miner/tracker-task-pool.c
+++ b/src/libtracker-miner/tracker-task-pool.c
@@ -33,7 +33,8 @@ typedef struct _TrackerTaskPoolPrivate TrackerTaskPoolPrivate;
 
 struct _TrackerTaskPoolPrivate
 {
-       GHashTable *tasks;
+       GPtrArray *tasks;
+       GHashTable *tasks_by_file;
        guint limit;
 };
 
@@ -53,7 +54,8 @@ tracker_task_pool_finalize (GObject *object)
        TrackerTaskPoolPrivate *priv;
 
        priv = tracker_task_pool_get_instance_private (TRACKER_TASK_POOL (object));
-       g_hash_table_unref (priv->tasks);
+       g_ptr_array_unref (priv->tasks);
+       g_hash_table_unref (priv->tasks_by_file);
 
        G_OBJECT_CLASS (tracker_task_pool_parent_class)->finalize (object);
 }
@@ -141,10 +143,13 @@ tracker_task_pool_init (TrackerTaskPool *pool)
        TrackerTaskPoolPrivate *priv;
 
        priv = tracker_task_pool_get_instance_private (pool);
-       priv->tasks = g_hash_table_new_full (g_file_hash,
-                                            (GEqualFunc) file_equal,
-                                            NULL,
-                                            (GDestroyNotify) tracker_task_unref);
+       priv->tasks =
+               g_ptr_array_new_with_free_func ((GDestroyNotify) tracker_task_unref);
+       priv->tasks_by_file =
+               g_hash_table_new_full (g_file_hash,
+                                      (GEqualFunc) file_equal,
+                                      NULL,
+                                      (GDestroyNotify) g_list_free);
        priv->limit = 0;
 }
 
@@ -195,7 +200,7 @@ tracker_task_pool_get_size (TrackerTaskPool *pool)
        g_return_val_if_fail (TRACKER_IS_TASK_POOL (pool), 0);
 
        priv = tracker_task_pool_get_instance_private (pool);
-       return g_hash_table_size (priv->tasks);
+       return priv->tasks->len;
 }
 
 gboolean
@@ -206,7 +211,7 @@ tracker_task_pool_limit_reached (TrackerTaskPool *pool)
        g_return_val_if_fail (TRACKER_IS_TASK_POOL (pool), FALSE);
 
        priv = tracker_task_pool_get_instance_private (pool);
-       return (g_hash_table_size (priv->tasks) >= priv->limit);
+       return (priv->tasks->len >= priv->limit);
 }
 
 void
@@ -214,6 +219,7 @@ tracker_task_pool_add (TrackerTaskPool *pool,
                        TrackerTask     *task)
 {
        TrackerTaskPoolPrivate *priv;
+       GList *other_tasks;
        GFile *file;
 
        g_return_if_fail (TRACKER_IS_TASK_POOL (pool));
@@ -222,19 +228,14 @@ tracker_task_pool_add (TrackerTaskPool *pool,
 
        file = tracker_task_get_file (task);
 
-       if (g_hash_table_contains (priv->tasks, file)) {
-               /* This is bad! We use the task's associated GFile as the key for the
-                * hash table, so if there's already a value we are about to overwrite
-                * it. This suggests there's a bug in the tracker-miner-fs.c code.
-                */
-               g_warning ("Multiple update tasks for file %s", g_file_get_uri (file));
-       };
+       g_ptr_array_add (priv->tasks, tracker_task_ref (task));
 
-       g_hash_table_insert (priv->tasks,
-                            tracker_task_get_file (task),
-                            tracker_task_ref (task));
+       other_tasks = g_hash_table_lookup (priv->tasks_by_file, file);
+       g_hash_table_steal (priv->tasks_by_file, file);
+       other_tasks = g_list_prepend (other_tasks, task);
+       g_hash_table_insert (priv->tasks_by_file, file, other_tasks);
 
-       if (g_hash_table_size (priv->tasks) == priv->limit) {
+       if (priv->tasks->len == priv->limit) {
                g_object_notify (G_OBJECT (pool), "limit-reached");
        }
 }
@@ -244,14 +245,25 @@ tracker_task_pool_remove (TrackerTaskPool *pool,
                           TrackerTask     *task)
 {
        TrackerTaskPoolPrivate *priv;
+       GList *other_tasks;
+       GFile *file;
 
        g_return_val_if_fail (TRACKER_IS_TASK_POOL (pool), FALSE);
 
        priv = tracker_task_pool_get_instance_private (pool);
 
-       if (g_hash_table_remove (priv->tasks,
-                                tracker_task_get_file (task))) {
-               if (g_hash_table_size (priv->tasks) == priv->limit - 1) {
+       file = tracker_task_get_file (task);
+       other_tasks = g_hash_table_lookup (priv->tasks_by_file, file);
+       g_hash_table_steal (priv->tasks_by_file, file);
+       other_tasks = g_list_remove (other_tasks, task);
+
+       if (other_tasks)
+               g_hash_table_insert (priv->tasks_by_file, file, other_tasks);
+       else
+               g_hash_table_remove (priv->tasks_by_file, file);
+
+       if (g_ptr_array_remove_fast (priv->tasks, task)) {
+               if (priv->tasks->len == priv->limit - 1) {
                        /* We've gone below the threshold again */
                        g_object_notify (G_OBJECT (pool), "limit-reached");
                }
@@ -268,16 +280,16 @@ tracker_task_pool_foreach (TrackerTaskPool *pool,
                            gpointer         user_data)
 {
        TrackerTaskPoolPrivate *priv;
-       GHashTableIter iter;
        TrackerTask *task;
+       guint i;
 
        g_return_if_fail (TRACKER_IS_TASK_POOL (pool));
        g_return_if_fail (func != NULL);
 
        priv = tracker_task_pool_get_instance_private (pool);
-       g_hash_table_iter_init (&iter, priv->tasks);
 
-       while (g_hash_table_iter_next (&iter, NULL, (gpointer *) &task)) {
+       for (i = 0; i < priv->tasks->len; i++) {
+               task = g_ptr_array_index (priv->tasks, i);
                (func) (task, user_data);
        }
 }
@@ -292,7 +304,7 @@ tracker_task_pool_find (TrackerTaskPool *pool,
        g_return_val_if_fail (G_IS_FILE (file), FALSE);
 
        priv = tracker_task_pool_get_instance_private (pool);
-       return g_hash_table_contains (priv->tasks, file);
+       return g_hash_table_contains (priv->tasks_by_file, file);
 }
 
 /* Task */
diff --git a/tests/libtracker-miner/tracker-task-pool-test.c b/tests/libtracker-miner/tracker-task-pool-test.c
index be41309cd..c40266394 100644
--- a/tests/libtracker-miner/tracker-task-pool-test.c
+++ b/tests/libtracker-miner/tracker-task-pool-test.c
@@ -37,7 +37,7 @@ test_task_pool_limit_set (void)
         g_object_unref (pool);
 }
 
-static void
+static TrackerTask *
 add_task (TrackerTaskPool *pool,
           const gchar     *filename,
           gint             expected_size,
@@ -51,19 +51,15 @@ add_task (TrackerTaskPool *pool,
         g_assert_cmpint (tracker_task_pool_get_size (pool), ==, expected_size);
         g_assert (tracker_task_pool_limit_reached (pool) == hit_limit);
 
-        g_object_unref (tracker_task_get_file (task));
-        tracker_task_unref (task);
+        return task;
 }
 
 static void
 remove_task (TrackerTaskPool *pool,
-             const gchar     *filename,
+             TrackerTask     *task,
              gint             expected_size,
              gboolean         hit_limit)
 {
-        TrackerTask *task;
-
-        task = tracker_task_new (g_file_new_for_path (filename), NULL, NULL);
         tracker_task_pool_remove (pool, task);
 
         g_assert_cmpint (tracker_task_pool_get_size (pool), ==, expected_size);
@@ -77,28 +73,31 @@ static void
 test_task_pool_add_remove (void)
 {
         TrackerTaskPool *pool;
+        TrackerTask *a, *b, *c, *d, *nonexistent;
 
         pool = tracker_task_pool_new (3);
 
         /* Additions ... */
-        add_task (pool, "/dev/null", 1, FALSE);
-        add_task (pool, "/dev/null2", 2, FALSE);
-        add_task (pool, "/dev/null3", 3, TRUE);
+        a = add_task (pool, "/dev/null", 1, FALSE);
+        b = add_task (pool, "/dev/null2", 2, FALSE);
+        c = add_task (pool, "/dev/null3", 3, TRUE);
 
         /* We can go over the limit */
-        add_task (pool, "/dev/null4", 4, TRUE);
+        d = add_task (pool, "/dev/null4", 4, TRUE);
 
         /* Remove something that doesn't exist */
-        remove_task (pool, "/dev/null/imNotInThePool", 4, TRUE);
+        nonexistent = tracker_task_new (g_file_new_for_path ("/dev/null/imNotInThePool"), NULL, NULL);
+        remove_task (pool, nonexistent, 4, TRUE);
 
         /* Removals ... (in different order)*/
-        remove_task (pool, "/dev/null4", 3, TRUE);
-        remove_task (pool, "/dev/null2", 2, FALSE);
-        remove_task (pool, "/dev/null3", 1, FALSE);
-        remove_task (pool, "/dev/null", 0, FALSE);
+        remove_task (pool, d, 3, TRUE);
+        remove_task (pool, c, 2, FALSE);
+        remove_task (pool, b, 1, FALSE);
+        remove_task (pool, a, 0, FALSE);
 
         /* Remove in empty queue */
-        remove_task (pool, "/dev/null/random", 0, FALSE);
+        nonexistent = tracker_task_new (g_file_new_for_path ("/dev/null/random"), NULL, NULL);
+        remove_task (pool, nonexistent, 0, FALSE);
 
         g_object_unref (pool);
 }


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]