[gtk/wip/otte/sortlistmodel: 147/154] timsort: Add change tracking to gtk_tim_sort_step()



commit 37046131ce794b3034b5d2c6505d7a8496a92f64
Author: Benjamin Otte <otte redhat com>
Date:   Sun Jul 12 04:20:19 2020 +0200

    timsort: Add change tracking to gtk_tim_sort_step()

 gtk/gtktim1sortmodel.c  |   2 +-
 gtk/gtktim2sortmodel.c  |  49 +++++++++++++++-
 gtk/gtktimsort-impl.c   | 147 ++++++++++++++++++++----------------------------
 gtk/gtktimsort.c        |  53 ++++++++++++++---
 gtk/gtktimsortprivate.h |   3 +-
 testsuite/gtk/timsort.c |  41 ++++++++++++++
 6 files changed, 196 insertions(+), 99 deletions(-)
---
diff --git a/gtk/gtktim1sortmodel.c b/gtk/gtktim1sortmodel.c
index 832a2976f4..4cb0d1f710 100644
--- a/gtk/gtktim1sortmodel.c
+++ b/gtk/gtktim1sortmodel.c
@@ -188,7 +188,7 @@ gtk_tim1_sort_model_resort (GtkTim1SortModel *self,
                      self->sorter);
   gtk_tim_sort_set_already_sorted (&sort, already_sorted);
 
-  while (gtk_tim_sort_step (&sort));
+  while (gtk_tim_sort_step (&sort, NULL));
 
   gtk_tim_sort_finish (&sort);
 }
diff --git a/gtk/gtktim2sortmodel.c b/gtk/gtktim2sortmodel.c
index 7203843e63..f0bfc7e448 100644
--- a/gtk/gtktim2sortmodel.c
+++ b/gtk/gtktim2sortmodel.c
@@ -151,15 +151,58 @@ gtk_tim2_sort_model_stop_sorting (GtkTim2SortModel *self)
   g_clear_handle_id (&self->sort_cb, g_source_remove);
 }
 
+static gboolean
+gtk_tim2_sort_model_sort_step (GtkTim2SortModel *self,
+                               guint            *out_position,
+                               guint            *out_n_items)
+{
+  gint64 end_time = g_get_monotonic_time ();
+  gboolean result = FALSE;
+  GtkTimSortRun change;
+  SortItem *start_change, *end_change;
+
+  /* 1 millisecond */
+  end_time += 1000;
+  end_change = sort_array_get_data (&self->items);
+  start_change = end_change + sort_array_get_size (&self->items);
+
+  while (gtk_tim_sort_step (&self->sort, &change))
+    {
+      result = TRUE;
+      if (change.len)
+        {
+          start_change = MIN (start_change, (SortItem *) change.base);
+          end_change = MAX (end_change, ((SortItem *) change.base) + change.len);
+        }
+     
+      if (g_get_monotonic_time () >= end_time)
+        break;
+    }
+
+  if (start_change < end_change)
+    {
+      *out_position = start_change - sort_array_get_data (&self->items);
+      *out_n_items = end_change - start_change;
+    }
+  else
+    {
+      *out_position = 0;
+      *out_n_items = 0;
+    }
+
+  return result;
+}
+
 static gboolean
 gtk_tim2_sort_model_sort_cb (gpointer data)
 {
   GtkTim2SortModel *self = data;
+  guint pos, n_items;
 
-  if (gtk_tim_sort_step (&self->sort))
+  if (gtk_tim2_sort_model_sort_step (self, &pos, &n_items))
     {
-      guint n_items = sort_array_get_size (&self->items);
-      g_list_model_items_changed (G_LIST_MODEL (self), 0, n_items, n_items);
+      if (n_items)
+        g_list_model_items_changed (G_LIST_MODEL (self), pos, n_items, n_items);
       return G_SOURCE_CONTINUE;
     }
 
diff --git a/gtk/gtktimsort-impl.c b/gtk/gtktimsort-impl.c
index 8878db6a41..3e60706b21 100644
--- a/gtk/gtktimsort-impl.c
+++ b/gtk/gtktimsort-impl.c
@@ -82,14 +82,18 @@ static void gtk_tim_sort(reverse_range) (GtkTimSort *self,
  *          the specified array
  */
 static gsize
-gtk_tim_sort(prepare_run) (GtkTimSort *self)
+gtk_tim_sort(prepare_run) (GtkTimSort    *self,
+                           GtkTimSortRun *out_change)
 {
   gsize run_hi = 1;
   char *cur;
   char *next;
 
   if (self->size <= run_hi)
-    return self->size;
+    {
+      gtk_tim_sort_set_change (out_change, NULL, 0);
+      return self->size;
+    }
 
   cur = INCPTR (self->base);
   next = INCPTR (cur);
@@ -105,6 +109,7 @@ gtk_tim_sort(prepare_run) (GtkTimSort *self)
           next = INCPTR (next);
         }
       gtk_tim_sort(reverse_range) (self, self->base, run_hi);
+      gtk_tim_sort_set_change (out_change, self->base, run_hi);
     }
   else                          /* Ascending */
     {
@@ -114,6 +119,7 @@ gtk_tim_sort(prepare_run) (GtkTimSort *self)
           cur = next;
           next = INCPTR (next);
         }
+      gtk_tim_sort_set_change (out_change, NULL, 0);
     }
 
   return run_hi;
@@ -135,13 +141,16 @@ gtk_tim_sort(prepare_run) (GtkTimSort *self)
  * @param start the index of the first element in the range that is
  *        not already known to be sorted ({@code lo <= start <= hi})
  */
-static void gtk_tim_sort(binary_sort) (GtkTimSort *self,
-                                       gpointer    a,
-                                       gsize       hi,
-                                       gsize       start)
+static void gtk_tim_sort(binary_sort) (GtkTimSort    *self,
+                                       gpointer       a,
+                                       gsize          hi,
+                                       gsize          start,
+                                       GtkTimSortRun *inout_change)
 {
   DEFINE_TEMP (pivot);
   char *startp;
+  char *change_min = ELEM (a, hi);
+  char *change_max = a;
 
   g_assert (start <= hi);
 
@@ -186,22 +195,39 @@ static void gtk_tim_sort(binary_sort) (GtkTimSort *self,
        * Slide elements over to make room to make room for pivot.
        */
       n = startp - leftp;               /* The number of bytes to move */
+      if (n == 0)
+        continue;
 
       ASSIGN (pivot, startp);
       memmove (INCPTR (leftp), leftp, n);         /* POP: overlaps */
 
       /* a[left] = pivot; */
       ASSIGN (leftp, pivot);
+
+      change_min = MIN (change_min, leftp);
+      change_max = MAX (change_max, ELEM (startp, 1));
+    }
+
+  if (change_max > (char *) a)
+    {
+      g_assert (change_min < ELEM (a, hi));
+      if (inout_change && inout_change->len)
+        {
+          change_max = MAX (change_max, ELEM (inout_change->base, inout_change->len));
+          change_min = MIN (change_min, (char *) inout_change->base);
+        }
+      gtk_tim_sort_set_change (inout_change, change_min, (change_max - change_min) / WIDTH);
     }
 }
 
 static gboolean
-gtk_tim_sort(merge_append) (GtkTimSort *self)
+gtk_tim_sort(merge_append) (GtkTimSort    *self,
+                            GtkTimSortRun *out_change)
 {
   /* Identify next run */
   gsize run_len;
 
-  run_len = gtk_tim_sort(prepare_run) (self);
+  run_len = gtk_tim_sort(prepare_run) (self, out_change);
   if (run_len == 0)
     return FALSE;
 
@@ -209,7 +235,7 @@ gtk_tim_sort(merge_append) (GtkTimSort *self)
   if (run_len < self->min_run)
     {
       gsize force = MIN (self->size, self->min_run);
-      gtk_tim_sort(binary_sort) (self, self->base, force, run_len);
+      gtk_tim_sort(binary_sort) (self, self->base, force, run_len, out_change);
       run_len = force;
     }
   /* Push run onto pending-run stack, and maybe merge */
@@ -218,71 +244,6 @@ gtk_tim_sort(merge_append) (GtkTimSort *self)
   return TRUE;
 }
 
-#if 0
-static int gtk_tim_sort(timsort) (void *a, gsize nel)
-{
-  int err = SUCCESS;
-  GtkTimSort self;
-  gsize self->min_run;
-
-  g_assert (a || !nel || !width);
-  g_assert (c);
-
-  if (nel < 2 || !width)
-    return err;                 /* Arrays of size 0 and 1 are always sorted */
-
-  /* If array is small, do a "mini-TimSort" with no merges */
-  if (nel < MIN_MERGE)
-    {
-      gsize initRunLen =
-        gtk_tim_sort(prepare_run) (a, nel, CMPARGS (c, carg));
-      gtk_tim_sort(binary_sort) (self, a, nel, initRunLen, CMPARGS (c, carg));
-      return err;
-    }
-
-  /*
-   * March over the array once, left to right, finding natural runs,
-   * extending short natural runs to self->min_run elements, and merging runs
-   * to maintain stack invariant.
-   */
-  if ((err = timsort_init (&self, a, nel, CMPARGS (c, carg))))
-    return err;
-
-  do
-    {
-      /* Identify next run */
-      gsize run_len =
-        gtk_tim_sort(prepare_run) (a, nel, CMPARGS (c, carg));
-
-      /* If run is short, extend to min(self->min_run, nel) */
-      if (run_len < self->min_run)
-        {
-          gsize force = nel <= self->min_run ? nel : self->min_run;
-          gtk_tim_sort(binary_sort) (a, force, run_len, CMPARGS (c, carg));
-          run_len = force;
-        }
-      /* Push run onto pending-run stack, and maybe merge */
-      gtk_tim_sort_push_run (&self, a, run_len);
-      if ((err = gtk_tim_sort(mergeCollapse) (&self)))
-        goto out;
-
-      /* Advance to find next run */
-      a = ELEM (a, run_len);
-      nel -= run_len;
-    }
-  while (nel != 0);
-
-  /* Merge all remaining runs to complete sort */
-  if ((err = gtk_tim_sort(merge_force_collapse) (&self)))
-    goto out;
-
-  g_assert (self.pending_runs == 1);
-out:
-  timsort_deinit (&self);
-  return err;
-}
-#endif
-
 /*
  * Locates the position at which to insert the specified key into the
  * specified sorted range; if the range contains an element equal to key,
@@ -791,8 +752,9 @@ outer:
  * @param i stack index of the first of the two runs to merge
  */
 static void
-gtk_tim_sort(merge_at) (GtkTimSort *self,
-                        gsize       i)
+gtk_tim_sort(merge_at) (GtkTimSort    *self,
+                        gsize          i,
+                        GtkTimSortRun *out_change)
 {
   gpointer base1 = self->run[i].base;
   gsize len1 = self->run[i].len;
@@ -813,7 +775,10 @@ gtk_tim_sort(merge_at) (GtkTimSort *self,
   base1 = ELEM (base1, k);
   len1 -= k;
   if (len1 == 0)
-    goto done;
+    {
+      gtk_tim_sort_set_change (out_change, NULL, 0);
+      goto done;
+    }
 
   /*
    * Find where the last element of run1 goes in run2. Subsequent elements
@@ -823,7 +788,10 @@ gtk_tim_sort(merge_at) (GtkTimSort *self,
                                     ELEM (base1, len1 - 1),
                                     base2, len2, len2 - 1);
   if (len2 == 0)
-    goto done;
+    {
+      gtk_tim_sort_set_change (out_change, NULL, 0);
+      goto done;
+    }
 
   /* Merge remaining runs, using tmp array with min(len1, len2) elements */
   if (len1 <= len2)
@@ -832,6 +800,7 @@ gtk_tim_sort(merge_at) (GtkTimSort *self,
         {
           base1 = ELEM (self->run[i].base, self->run[i].len - self->max_merge_size);
           gtk_tim_sort(merge_lo) (self, base1, self->max_merge_size, base2, len2);
+          gtk_tim_sort_set_change (out_change, base1, self->max_merge_size + len2);
           self->run[i].len -= self->max_merge_size;
           self->run[i + 1].base = ELEM (self->run[i + 1].base, - self->max_merge_size);
           self->run[i + 1].len += self->max_merge_size;
@@ -841,6 +810,7 @@ gtk_tim_sort(merge_at) (GtkTimSort *self,
       else
         {
           gtk_tim_sort(merge_lo) (self, base1, len1, base2, len2);
+          gtk_tim_sort_set_change (out_change, base1, len1 + len2);
         }
     }
   else
@@ -848,6 +818,7 @@ gtk_tim_sort(merge_at) (GtkTimSort *self,
       if (len2 > self->max_merge_size)
         {
           gtk_tim_sort(merge_hi) (self, base1, len1, base2, self->max_merge_size);
+          gtk_tim_sort_set_change (out_change, base1, len1 + self->max_merge_size);
           self->run[i].len += self->max_merge_size;
           self->run[i + 1].base = ELEM (self->run[i + 1].base, self->max_merge_size);
           self->run[i + 1].len -= self->max_merge_size;
@@ -857,6 +828,7 @@ gtk_tim_sort(merge_at) (GtkTimSort *self,
       else
         {
           gtk_tim_sort(merge_hi) (self, base1, len1, base2, len2);
+          gtk_tim_sort_set_change (out_change, base1, len1 + len2);
         }
     }
 
@@ -893,7 +865,8 @@ done:
  *
  */
 static gboolean
-gtk_tim_sort(merge_collapse) (GtkTimSort *self)
+gtk_tim_sort(merge_collapse) (GtkTimSort *self,
+                              GtkTimSortRun *out_change)
 {
   GtkTimSortRun *run = self->run;
   gsize n;
@@ -913,7 +886,7 @@ gtk_tim_sort(merge_collapse) (GtkTimSort *self)
       return FALSE; /* Invariant is established */
     }
   
-  gtk_tim_sort(merge_at) (self, n);
+  gtk_tim_sort(merge_at) (self, n, out_change);
   return TRUE;
 }
 
@@ -922,7 +895,8 @@ gtk_tim_sort(merge_collapse) (GtkTimSort *self)
  * called once, to complete the sort.
  */
 static gboolean
-gtk_tim_sort(merge_force_collapse) (GtkTimSort *self)
+gtk_tim_sort(merge_force_collapse) (GtkTimSort    *self,
+                                    GtkTimSortRun *out_change)
 {
   gsize n;
 
@@ -932,22 +906,23 @@ gtk_tim_sort(merge_force_collapse) (GtkTimSort *self)
   n = self->pending_runs - 2;
   if (n > 0 && self->run[n - 1].len < self->run[n + 1].len)
     n--;
-  gtk_tim_sort(merge_at) (self, n);
+  gtk_tim_sort(merge_at) (self, n, out_change);
   return TRUE;
 }
 
 static gboolean
-gtk_tim_sort(step) (GtkTimSort * self)
+gtk_tim_sort(step) (GtkTimSort    *self,
+                    GtkTimSortRun *out_change)
 {
   g_assert (self);
 
-  if (gtk_tim_sort(merge_collapse) (self))
+  if (gtk_tim_sort(merge_collapse) (self, out_change))
     return TRUE;
 
-  if (gtk_tim_sort(merge_append) (self))
+  if (gtk_tim_sort(merge_append) (self, out_change))
     return TRUE;
 
-  if (gtk_tim_sort(merge_force_collapse) (self))
+  if (gtk_tim_sort(merge_force_collapse) (self, out_change))
     return TRUE;
 
   return FALSE;
diff --git a/gtk/gtktimsort.c b/gtk/gtktimsort.c
index 6ed9b2e1f3..4019b3c6f5 100644
--- a/gtk/gtktimsort.c
+++ b/gtk/gtktimsort.c
@@ -99,7 +99,7 @@ gtk_tim_sort (gpointer         base,
 
   gtk_tim_sort_init (&self, base, size, element_size, compare_func, user_data);
 
-  while (gtk_tim_sort_step (&self));
+  while (gtk_tim_sort_step (&self, NULL));
 
   gtk_tim_sort_finish (&self);
 }
@@ -171,6 +171,18 @@ gtk_tim_sort_ensure_capacity (GtkTimSort *self,
   return self->tmp;
 }
 
+static void
+gtk_tim_sort_set_change (GtkTimSortRun *out_change,
+                         gpointer       base,
+                         gsize          len)
+{
+  if (out_change)
+    {
+      out_change->base = base;
+      out_change->len = len;
+    }
+}
+
 void
 gtk_tim_sort_set_already_sorted (GtkTimSort *self,
                                  gsize       already_sorted)
@@ -232,23 +244,48 @@ gtk_tim_sort_set_max_merge_size (GtkTimSort *self,
 #define WIDTH (self->element_size)
 #include "gtktimsort-impl.c"
 
+/*
+ * gtk_tim_sort_step:
+ * @self: a #GtkTimSort
+ * @out_change: (optional): Return location for changed
+ *     area. If a change did not cause any changes (for example,
+ *     if an already sorted array gets sorted), out_change
+ *     will be set to %NULL and 0.
+ *
+ * Performs another step in the sorting process. If a
+ * step was performed, %TRUE is returned and @out_change is
+ * set to the smallest area that contains all changes while
+ * sorting.
+ *
+ * If the data is completely sorted, %FALSE will be
+ * returned.
+ *
+ * Returns: %TRUE if an action was performed
+ **/
 gboolean
-gtk_tim_sort_step (GtkTimSort *self)
+gtk_tim_sort_step (GtkTimSort    *self,
+                   GtkTimSortRun *out_change)
 {
+  gboolean result;
+
   g_assert (self);
 
   switch (self->element_size)
   {
-#if 1
     case 4:
-      return gtk_tim_sort_step_4 (self);
+      result = gtk_tim_sort_step_4 (self, out_change);
+      break;
     case 8:
-      return gtk_tim_sort_step_8 (self);
+      result = gtk_tim_sort_step_8 (self, out_change);
+      break;
     case 16:
-      return gtk_tim_sort_step_16 (self);
-#endif
+      result = gtk_tim_sort_step_16 (self, out_change);
+      break;
     default:
-      return gtk_tim_sort_step_default(self);
+      result = gtk_tim_sort_step_default (self, out_change);
+      break;
   }
+
+  return result;
 }
 
diff --git a/gtk/gtktimsortprivate.h b/gtk/gtktimsortprivate.h
index 6b0a419680..8a039a26d6 100644
--- a/gtk/gtktimsortprivate.h
+++ b/gtk/gtktimsortprivate.h
@@ -105,7 +105,8 @@ void            gtk_tim_sort_set_already_sorted                 (GtkTimSort
                                                                  gsize                   already_sorted);
 void            gtk_tim_sort_set_max_merge_size                 (GtkTimSort             *self,
                                                                  gsize                   max_merge_size);
-gboolean        gtk_tim_sort_step                               (GtkTimSort             *self);
+gboolean        gtk_tim_sort_step                               (GtkTimSort             *self,
+                                                                 GtkTimSortRun          *out_change);
 
 void            gtk_tim_sort                                    (gpointer                base,
                                                                  gsize                   size,
diff --git a/testsuite/gtk/timsort.c b/testsuite/gtk/timsort.c
index 5bc0179d35..ecd9348854 100644
--- a/testsuite/gtk/timsort.c
+++ b/testsuite/gtk/timsort.c
@@ -145,6 +145,46 @@ test_integers_huge (void)
   g_free (a);
 }
 
+static void
+test_steps (void)
+{
+  GtkTimSortRun change;
+  GtkTimSort sort;
+  int *a, *b;
+  gsize i, n;
+  
+  n = g_test_rand_int_range (20 * 1000, 50 * 1000);
+
+  a = g_new (int, n);
+  for (i = 0; i < n; i++)
+    a[i] = g_test_rand_int ();
+  b = g_memdup (a, sizeof (int) * n);
+
+  gtk_tim_sort_init (&sort, a, n, sizeof (int), compare_int, NULL);
+  gtk_tim_sort_set_max_merge_size (&sort, g_test_rand_int_range (512, 2048));
+
+  while (gtk_tim_sort_step (&sort, &change))
+    {
+      if (change.len)
+        {
+          int *a_start = change.base;
+          int *b_start = b + ((int *) change.base - a);
+          g_assert_cmpint (a_start[0], !=, b_start[0]); 
+          g_assert_cmpint (a_start[change.len - 1], !=, b_start[change.len - 1]); 
+          memcpy (b_start, a_start, change.len * sizeof (int));
+        }
+
+      assert_sort_equal (a, b, int, n);
+    }
+
+  g_qsort_with_data (b, n, sizeof (int), compare_int, NULL);
+  assert_sort_equal (a, b, int, n);
+
+  gtk_tim_sort_finish (&sort);
+  g_free (b);
+  g_free (a);
+}
+
 int
 main (int argc, char *argv[])
 {
@@ -154,6 +194,7 @@ main (int argc, char *argv[])
   g_test_add_func ("/timsort/integers", test_integers);
   g_test_add_func ("/timsort/integers/runs", test_integers_runs);
   g_test_add_func ("/timsort/integers/huge", test_integers_huge);
+  g_test_add_func ("/timsort/steps", test_steps);
 
   return g_test_run ();
 }


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]