[Vala] [long][libgee] TreeSet support



Although the Hashtables are very common and efficient way of storing sets and 
maps it is not always the case since:
1. We do intensive inserting/removing which is usually ineffitient in the
hashtables (at least it was a case in GLib implementation) - probably due
to resize of the array
2. We need particular order of iterating the collection
...

However there are many types of trees:
1. AVL Trees: Well balanced, but perform the insertion/removing slower then
red-black tree. GLib implementation is AVL Tree.
2. Red-black tree: Less balanced then AVL Tree (however still
insertion/removing is in O(log n)).
3. Splay tree: Self-optimizing trees, which moves the accessed element to root.
Good for for example caches.
...

It would be nice if one or more would be found in libgee. I wrote a
implementation of AVL Tree Set (I can easly change it into a map).
However:
1. I am not sure how correct and how naive it is so I'd ask to carefully
test and revise it before merging (if it occures)
2. It suffers from memory leak due to #522135 
3. It operates on pointers. However as far as I know it is not allowed to
change the weak into strong references which would be needed in the rotations.

Index: gee/treeset.vala
===================================================================
--- gee/treeset.vala    (revision 0)
+++ gee/treeset.vala    (revision 0)
@@ -0,0 +1,423 @@
+/* hashset.vala
+ *
+ * Copyright (C) 2008 Maciej Piechotka
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
+ *
+ * Author:
+ *     Maciej Piechotka <uzytkownik2 gmail com>
+ */
+
+using GLib;
+
+/**
+ * Balanced tree implementation of Set interface
+ */
+public class Gee.TreeSet<G> : Object, Gee.Iterable<G>, Gee.Collection<G>, Gee.Set<G> {
+    /* Quick'n'dirty way - but I couldn't find anything like directcmp neighter in GLib.vapi */
+    public static int compare_direct (void *a, void *b) {
+        return (int)(((long)b) - ((long)a));
+    }
+    public TreeSet (construct CompareFunc compare_func = compare_direct) {}
+    
+    ~TreeSet () {
+        clear ();
+    }
+    
+    public CompareFunc compare_func {
+        set { _compare_func = value; }
+    }
+    
+    public int size {
+        get { return _nnodes; }
+    }
+    
+    public bool contains (G item) {
+        Node<G> *tmp = _root;
+        
+        while(tmp != null) {
+            int cmp = _compare_func(tmp->item, item);
+            if (cmp == 0)
+                return true;
+            else if (cmp > 0)
+                tmp = tmp->right;
+            else
+                tmp = tmp->left;
+        }
+        
+        return false;
+    }
+    
+    public bool add (G item) {
+        if(_root == null) {
+            _root = new Node.root (item);
+            _nnodes = 1;
+            _stamp++;
+            return true;
+        }
+        Node<G> *cur = _root;
+        while (true) {
+            int cmp = _compare_func (cur->item, item);
+            if (cmp == 0) {
+                return false;
+            } else if (cmp > 0) {
+                if (cur->right != null) {
+                    cur = cur->right;
+                } else {
+                    cur = cur->right = new Node (cur, item);
+                    break;
+                }
+            } else {
+                if (cur->left != null) {
+                    cur = cur->left;
+                } else {
+                    cur->left = new Node (cur, item);
+                    break;
+                }
+            }
+        }
+        
+        _nnodes += 1;
+        
+        for (Node<G> *parent = cur->parent; parent != null; cur = parent, parent = cur->parent) {
+            if (parent->right == cur)
+                parent->balance++;
+            else
+                parent->balance--;
+            
+            if (parent->balance == -2) {
+                if (cur->balance < 0) {
+                    parent->rotate_right ();
+                    parent->balance = parent->balance = 0;
+                } else {
+                    Node<G> *child = cur->right;
+                    cur->rotate_left ();
+                    parent->rotate_right ();
+                    if (child->balance == 0) {
+                        parent->balance = cur->balance = 0;
+                    } else if (child->balance < 0) {
+                        parent->balance = 0;
+                        cur->balance = 1;
+                    } else {
+                        parent->balance = -1;
+                        cur->balance = 0;
+                    }
+                    child->balance = 0;
+                }
+                if (parent == _root)
+                    _root = parent->parent;
+                break;
+            } else if (parent->balance == 2) {
+                if (cur->balance > 0) {
+                    parent->rotate_left ();
+                    parent->balance = cur->balance = 0;
+                } else {
+                    Node<G> *child = cur->left;
+                    cur->rotate_right ();
+                    parent->rotate_left ();
+                    if (child->balance == 0) {
+                        parent->balance = cur->balance = 0;
+                    } else if (child->balance > 0) {
+                        parent->balance = 0;
+                        cur->balance = -1;
+                    } else {
+                        parent->balance = 1;
+                        cur->balance = 0;
+                    }
+                    child->balance = 0;
+                }
+                if (parent == _root)
+                    _root = parent->parent;
+                break;
+            } else if (parent->balance == 0) {
+                break; // balanced
+            }
+        }
+        
+        _stamp++;
+        return true;
+    }
+    
+    public bool remove (G item) {
+        if(_root == null) {
+            _root = new Node.root (item);
+            _nnodes = 1;
+            _stamp++;
+            return true;
+        } else if (_nnodes == 1) {
+            if (_compare_func (_root->item, item) == 0) {
+                clear ();
+                return true;
+            } else {
+                return false;
+            }
+        }
+        
+        Node<G> *cur = _root;
+        while (true) {
+            int cmp = _compare_func (cur->item, item);
+            if (cmp == 0) {
+                break;
+            } else if (cmp > 0) {
+                if (cur->right != null)
+                    cur = cur->right;
+                else
+                    return false;
+            } else {
+                if (cur->left != null)
+                    cur = cur->left;
+                else
+                    return false;
+            }
+        }
+        
+        if (cur->left != null || cur->right != null) {
+            Node<G> *new_node = (cur->left != null) ? (cur->left->last ()) : (cur->right->first ());
+            cur->swap (new_node);
+            cur = new_node; // See implementation of swap - it only swaps data.
+        }
+        
+        Node<G> *dnode = cur;
+        
+        for(Node<G> *parent = cur->parent; parent != null; cur = parent, parent = cur->parent) {
+            if (parent->right == cur)
+                parent->balance--;
+            else
+                parent->balance++;
+            
+            if (parent->balance == +1 || parent->balance == -1) {
+                break;
+            } else if (parent->balance == 0) {
+                continue;
+            } else if (parent->balance == 2) {
+                Node<G> *sibling = parent->right;
+                if (sibling->balance == -1) {
+                    Node<G> *sibling_child = sibling->right;
+                    sibling->rotate_right ();
+                    parent->rotate_left ();
+                    if (sibling_child->balance == 1) {
+                        sibling->balance = 0;
+                        parent->balance = -1;
+                    } else if (sibling_child->balance == 0) {
+                        sibling->balance = parent->balance = 0;
+                    } else {
+                        sibling->balance = 1;
+                        parent->balance = 0;
+                    }
+                    continue;
+                } else {
+                    parent->rotate_left ();
+                    if (sibling->balance == 1) {
+                        parent->balance = sibling->balance = 0;
+                        continue;
+                    } else {
+                        parent->balance = 1;
+                        sibling->balance = -1;
+                        break;
+                    }
+                }
+            } else {
+                Node<G> *sibling = parent->left;
+                if (sibling->balance == 1) {
+                    Node<G> *sibling_child = sibling->left;
+                    sibling->rotate_left ();
+                    parent->rotate_right ();
+                    if (sibling_child->balance == -1) {
+                        sibling->balance = 0;
+                        parent->balance = 1;
+                    } else if (sibling_child->balance == 0) {
+                        sibling->balance = parent->balance = 0;
+                    } else {
+                        sibling->balance = -1;
+                        parent->balance = 0;
+                    }
+                    continue;
+                } else {
+                    parent->rotate_right ();
+                    if (sibling->balance == -1) {
+                        parent->balance = sibling->balance = 0;
+                    } else {
+                        parent->balance = -1;
+                        sibling->balance = 1;
+                        break;
+                    }
+                }
+            }
+        }
+        
+        if (dnode->parent->left == dnode) {
+            if (dnode->right != null) {
+                dnode->right->parent = dnode->parent;
+                dnode->parent->left = dnode->right;
+            } else {
+                dnode->parent->left = null;
+            }
+        } else {
+            if (dnode->left != null) {
+                dnode->left->parent = dnode->parent;
+                dnode->parent->right = dnode->left;
+            } else {
+                dnode->parent->right = null;
+            }
+        }
+        
+        delete dnode;
+        _stamp++;
+        _nnodes--;
+        return false;
+    }
+    
+    public void clear () {
+        delete _root;
+        _root = null;
+        _nnodes = 0;
+        _stamp++;
+    }
+    
+    public Type get_element_type () {
+        return typeof(G);
+    }
+    
+    public Gee.Iterator<G> iterator () {
+        return new Iterator<G>(this);
+    }
+    
+    private int _nnodes;
+    private uint _stamp = 0;
+    private CompareFunc _compare_func;
+    private Node<G> *_root;
+    
+    private class Node<G> {
+        public Node.root (G# item) {
+            this.item = #item;
+        }
+        
+        public Node (Node<G> *parent, G# item) {
+            this.parent = parent;
+            this.item = #item;
+        }
+        
+        ~Node () {
+            if (left != null)
+                delete left;
+            if (right != null)
+                delete right;
+        }
+        
+        public Node<G> *first () {
+            return (left == null) ? this : left->first ();
+        }
+        
+        public Node<G> *last () {
+            return (right == null) ? this : right->last ();
+        }
+        
+        public void rotate_left () {
+            Node<G> *parent = parent;
+            Node<G> *root = this;
+            Node<G> *pivot = root->right;
+            
+            root->right = pivot->left;
+            if (root->right != null)
+                root->right->parent = root;
+            
+            pivot->left = root;
+            root->parent = pivot;
+            
+            pivot->parent = parent;
+            if (parent == null) {
+                /* Do nothing */
+            } else if (parent->right == root) {
+                parent->right = pivot;
+            } else {
+                parent->left = pivot;
+            }
+        }
+        
+        public void rotate_right () {
+            Node<G> *parent = parent;
+            Node<G> *root = this;
+            Node<G> *pivot = root->left;
+            
+            root->left = pivot->right;
+            if (root->left != null)
+                root->left->parent = root;
+            
+            pivot->right = root;
+            root->parent = pivot;
+            
+            pivot->parent = parent;
+            if (parent == null) {
+                /* Do nothing */
+            } else if (parent->right == root) {
+                parent->right = pivot;
+            } else {
+                parent->left = pivot;
+            }
+        }
+        
+        public void swap (Node<G> second) {
+            G tmp = #this.item;
+            this.item = #second.item;
+            second.item = #tmp;
+        }
+        
+        public Node<G> *parent;
+        public Node<G> *left;
+        public Node<G> *right;
+        public G item;
+        public int8 balance;
+    }
+    
+    private class Iterator<G> : Object, Gee.Iterator<G> {
+        public TreeSet<G> set {
+            set {
+                this._set = value;
+                this._stamp = value._stamp;
+            }
+        }
+        
+        public Iterator (construct TreeSet! set) {}
+        
+        public bool next () {
+            if (_node == null) {
+                _node = _set._root->first ();
+            } else {
+                if (_node->right != null) {
+                    _node = _node->right->first ();
+                } else {
+                    Node<G> *parent = _node->parent;
+                    while (parent != null && parent->right == _node) {
+                        _node = parent;
+                        parent = _node->parent;
+                    }
+                    _node = parent;
+                }
+            }
+            return (_node != null);
+        }
+        
+        public G get () {
+            assert (_stamp == _set._stamp);
+                       assert (_node != null);
+                       return _node->item;
+        }
+        
+        private TreeSet<G> _set;
+        private Node<G> *_node;
+        private uint _stamp;
+    }
+}
+
Index: gee/Makefile.am
===================================================================
--- gee/Makefile.am     (revision 26)
+++ gee/Makefile.am     (working copy)
@@ -27,6 +27,7 @@
        readonlymap.vala \
        readonlyset.vala \
        set.vala \
+       treeset.vala \
        $(NULL)
 
 libgee_la_SOURCES = \
Index: tests/Makefile.am
===================================================================
--- tests/Makefile.am   (revision 26)
+++ tests/Makefile.am   (working copy)
@@ -20,6 +20,14 @@
 testarraylist_LDADD = $(progs_ldadd)
 EXTRA_DIST += $(testarraylist_VALASOURCES)
 
+TEST_PROGS += testtreeset
+testtreeset_VALASOURCES = testtreeset.vala
+testtreeset_SOURCES = testtreeset.c testtreeset.h
+$(testtreeset_SOURCES): $(testtreeset_VALASOURCES)
+       $(VALAC) --basedir $(top_srcdir) --vapidir $(top_srcdir)/gee --pkg gee-1.0 $^
+       touch $@
+testtreeset_LDADD = $(progs_ldadd)
+
 TEST_PROGS += testhashmap
 testhashmap_VALASOURCES = testhashmap.vala
 testhashmap_SOURCES = testhashmap.c testhashmap.h
Index: tests/testtreeset.vala
===================================================================
--- tests/testtreeset.vala      (revision 0)
+++ tests/testtreeset.vala      (revision 0)
@@ -0,0 +1,116 @@
+/* treeset.vala
+ *
+ * Copyright (C) 2008 Maciej Piechotka
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
+ *
+ * Author:
+ *     Maciej Piechotka <uzytkownik2 gmail com>
+ */
+
+using GLib;
+using Gee;
+
+void test_treeset_int_add () {
+       var treeset = new TreeSet<int> ();
+
+       treeset.add (42);
+       treeset.add (64);
+       treeset.add (80);
+       assert (treeset.contains (42));
+       assert (treeset.contains (64));
+       assert (treeset.contains (80));
+       assert (treeset.size == 3);
+}
+
+void test_treeset_int_iterator () {
+       var treeset = new TreeSet<int> ();
+       treeset.add (42);
+       treeset.add (64);
+       treeset.add (80);
+
+       var it = treeset.iterator ();
+       assert (it.next ());
+       assert (it.get () == 42);
+       assert (it.next ());
+       assert (it.get () == 64);
+       assert (it.next ());
+       assert (it.get () == 80);
+       assert (!it.next ());
+}
+
+void test_treeset_int_remove () {
+       var treeset = new TreeSet<int> ();
+       treeset.add (31);
+       treeset.add (42);
+       treeset.add (50);
+       treeset.add (64);
+       treeset.add (75);
+       treeset.add (80);
+       treeset.add (95);
+
+       treeset.remove (64);
+       assert (treeset.contains (31));
+       assert (treeset.contains (42));
+       assert (treeset.contains (50));
+       assert (!treeset.contains (64));
+       assert (treeset.contains (75));
+       assert (treeset.contains (80));
+       assert (treeset.contains (95));
+       assert (treeset.size == 6);
+}
+
+void test_treeset_string_add () {
+       var treeset = new TreeSet<string> (strcmp);
+
+       treeset.add ("hello");
+       assert (treeset.contains ("hello"));
+       assert (treeset.size == 1);
+}
+
+void test_treeset_string_iterator () {
+       var treeset = new TreeSet<string> (strcmp);
+       treeset.add ("hello");
+
+       var it = treeset.iterator ();
+       assert (it.next ());
+       assert (it.get () == "hello");
+       assert (!it.next ());
+}
+
+void test_treeset_string_remove () {
+       var treeset = new TreeSet<string> (strcmp);
+       treeset.add ("hello");
+
+       treeset.remove ("hello");
+       assert (!treeset.contains ("hello"));
+       assert (treeset.size == 0);
+}
+
+void main (string[] args) {
+       Test.init (ref args);
+
+       Test.add_func ("/treeset/int/add", test_treeset_int_add);
+       Test.add_func ("/treeset/int/iterator", test_treeset_int_iterator);
+       Test.add_func ("/treeset/int/remove", test_treeset_int_remove);
+
+       Test.add_func ("/treeset/string/add", test_treeset_string_add);
+       Test.add_func ("/treeset/string/iterator", test_treeset_string_iterator);
+       Test.add_func ("/treeset/string/remove", test_treeset_string_remove);
+
+       Test.run ();
+}
+
+

Regards
-- 
I've probably left my head... somewhere. Please wait untill I find it.
Homepage (pl_PL): http://uzytkownik.jogger.pl/
(GNU/)Linux User: #425935 (see http://counter.li.org/)



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]