Glib sort routines



I have been taking a look into sorting routines recently and was
wondering if it would be reasonable to replace the glib merge sort
with something faster in cases where parts are already sorted. I
realise that it is currently not broken, so I will understand if the
answer is to leave it as is.
I was hoping to use a simplified Timsort[1] (which I call Stimsort) as
it is already used in Python. I made an initial implementation
(attached) which attempts to see if this is a good thing.

Here are some results:
Number of comparisons for an array size 5000, of random data (worst
case for stim)
merge sort: 55307
stim sort:  57009

Number of instructions when executing in callgrind:
orig sort: 2 288 149
stim sort: 1 970 813

So there is a CPU saving even though this is a worst case scenario for
stim sort (because the merge sort implementation walks the full list
in order to cut it into two).
In the best scenario of the list being already sorted, or reverse
sorted the number of comparisons in stim sort will drop to 4999, while
the merge sort should remain pretty much unchanged.

The code is rather work in progress and I will clean it up with an
explanation of the algorithm before submitting the full proposal along
with a full set of benchmarks.

Is this kind of thing desirable?

[1] http://en.wikipedia.org/wiki/Timsort
diff --git a/glib/glist.c b/glib/glist.c
index 252330a..066cf15 100644
--- a/glib/glist.c
+++ b/glib/glist.c
@@ -1084,6 +1084,250 @@ g_list_sort_real (GList    *list,
 			    user_data);
 }
 
+static GList *
+g_list_sort_stim_merge (GList        *node_a,
+                        GList        *node_b,
+                        GCompareFunc  compare_func,
+		                gpointer      user_data)
+{
+  GList *merged_list;
+  GList **next_element_ptr;
+  next_element_ptr = &merged_list;
+  
+  while (1)
+    {
+      if (!node_a)
+        {
+          *next_element_ptr = node_b;
+          return merged_list;
+        }
+      if (!node_b)
+        {
+          *next_element_ptr = node_a;
+          return merged_list;
+        }
+
+      if (((GCompareDataFunc) compare_func) (node_a->data, node_b->data, user_data) <= 0)
+        {
+          *next_element_ptr = node_a;
+          next_element_ptr = &node_a->next;
+          node_a = node_a->next;
+        }
+      else
+        {
+          *next_element_ptr = node_b;
+          next_element_ptr = &node_b->next;
+          node_b = node_b->next;
+        }
+    }
+}
+
+static void
+g_list_sort_stim_extract (GList         *node,
+                          GList        **reply,
+                          int           *node_count,
+                          GList        **reply_rest,
+                          GCompareFunc compare_func,
+		                  gpointer     user_data)
+{
+  if (!node)
+    {
+      *reply = NULL;
+      *reply_rest = NULL;
+      *node_count = 0;
+      return;
+    }
+  if (!node->next)
+    {
+      *reply = node;
+      *reply_rest = NULL;
+      *node_count = 1;
+      return;
+    }
+  if (((GCompareDataFunc) compare_func) (node->data, node->next->data, user_data) <= 0)
+    {
+      /* a <= b so do ascending list */
+      *reply = node;
+      node = node->next;
+      int count = 2;
+      while(1)
+        {
+          if (!node->next){
+            *reply_rest = NULL;
+            *node_count = count;
+            return;
+            }
+          if (((GCompareDataFunc) compare_func) (node->data, node->next->data, user_data) > 0)
+            {
+            *reply_rest = node->next;
+            node->next = NULL;
+            *node_count = count;
+            return;
+            }
+          count++;
+          node = node->next;
+        }
+    }
+  else
+    {
+      /* a < b so do strictly descending list */
+      GList *new_list;
+      GList *cur_node;
+      cur_node = node;
+      node = node->next;
+      cur_node->next = NULL;
+      new_list = cur_node;
+      
+      cur_node = node;
+      node = node->next;
+      cur_node->next = new_list;
+      new_list = cur_node;
+      
+      int count = 2;
+      while(1)
+        {
+          if (!node)
+            {
+            *reply = new_list;
+            *reply_rest = NULL;
+            *node_count = count;
+            return;
+            }
+          if (((GCompareDataFunc) compare_func) (new_list->data, node->data, user_data) <= 0)
+            {
+            *reply = new_list;
+            *reply_rest = node;
+            *node_count = count;
+            return;
+            }
+          
+          count++;
+          cur_node = node;
+          node = node->next;
+          cur_node->next = new_list;
+          new_list = cur_node;
+        }
+    }
+}
+
+static void
+g_list_sort_stim_body (GList         *node,
+                       GList         *left_list,
+                       int            left_count,
+                       GList        **reply1_list,
+                       int           *reply1_count,
+                       GList        **reply2_list,
+                       int           *reply2_count,
+                       GList        **reply_rest,
+                       int            want,
+                       GCompareFunc   compare_func,
+		               gpointer       user_data)
+{
+  GList *right1_list, *right2_list;
+  int right1_count, right2_count;
+  GList *rest;
+  int subwant = want - (want >> 2);
+  
+  if (want < 16)
+    {
+      g_list_sort_stim_extract (node, &right1_list, &right1_count, &rest, compare_func, user_data);
+    }
+  else
+    {
+      g_list_sort_stim_body (node,
+                              left_list, left_count,
+                              &left_list, &left_count, 
+                              &right1_list, &right1_count,
+                              &rest,
+                              want/2,
+                              compare_func,
+                              user_data);
+    }
+  
+  while (left_count < subwant)
+    {
+      int mywant = left_count > want / 3 ? want - left_count : left_count;
+      g_list_sort_stim_body (rest,
+                             right1_list, right1_count,
+                             &right1_list, &right1_count, 
+                             &right2_list, &right2_count,
+                             &rest,
+                             mywant,
+                             compare_func,
+                             user_data);
+      
+      left_list = g_list_sort_stim_merge (left_list, right1_list, compare_func, user_data);
+      left_count = left_count + right1_count;
+      right1_list = right2_list;
+      right1_count = right2_count;
+      
+      if (right1_count > subwant)
+          break;
+      if (!right1_count)
+          break;
+    }
+  *reply1_list = left_list;
+  *reply1_count = left_count;;
+  *reply2_list = right1_list;
+  *reply2_count = right1_count;
+  *reply_rest = rest;
+  return;
+}
+
+static GList*
+g_list_sort_stim_real (GList*       list,
+                       GCompareFunc compare_func,
+		               gpointer     user_data)
+{
+  GList *node, *rest;
+  GList *reply1, *reply2; 
+  int count;
+  int reply1_count, reply2_count;
+  int want = g_list_length (list);
+  
+  g_list_sort_stim_extract (list, &node, &count, &rest, compare_func, user_data);
+  
+  g_list_sort_stim_body (rest,
+                         node,
+                         count,
+                         &reply1,
+                         &reply1_count,
+                         &reply2,
+                         &reply2_count,
+                         &rest,
+                         want,
+                         compare_func,
+                         user_data);
+  node = reply1;
+  GList *prev_node = NULL;
+  while (node)
+    {
+      node->prev = prev_node;
+      prev_node = node;
+      node = node->next;
+    }
+  return reply1;
+}
+
+
+GList *
+g_list_sort_stim (GList        *list,
+	              GCompareFunc  compare_func)
+{
+  return g_list_sort_stim_real (list, (GFunc) compare_func, NULL);
+			    
+}
+
+
+GList *
+g_list_sort_stim_with_data (GList            *list,
+		       GCompareDataFunc  compare_func,
+		       gpointer          user_data)
+{
+  return g_list_sort_stim_real (list, (GFunc) compare_func, user_data);
+}
+
+
 /**
  * g_list_sort:
  * @list: a #GList


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]