[meld] meld.vc: Add a REMOVED/MISSING cache for speed issues (bgo#761789)



commit 3d89b3de050fc13f0c032624a3a6a44e041b7425
Author: Kai Willadsen <kai willadsen gmail com>
Date:   Sat Feb 27 08:18:33 2016 +1000

    meld.vc: Add a REMOVED/MISSING cache for speed issues (bgo#761789)
    
    In large repositories, we were checking the whole list of file states
    for every directory to identify files in that directory that weren't
    present on the filesystem.
    
    With this change, each VC now maintains a cache of files with REMOVED
    and MISSING states, keyed by their parent directory, reducing the
    expected complexity of inserting these files into the VC model.

 meld/vc/_vc.py       |   25 ++++++++++++++++---------
 meld/vc/bzr.py       |    2 ++
 meld/vc/git.py       |    2 ++
 meld/vc/mercurial.py |    1 +
 meld/vc/svn.py       |    1 +
 5 files changed, 22 insertions(+), 9 deletions(-)
---
diff --git a/meld/vc/_vc.py b/meld/vc/_vc.py
index dd91346..4f98528 100644
--- a/meld/vc/_vc.py
+++ b/meld/vc/_vc.py
@@ -23,6 +23,7 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import collections
 import itertools
 import os
 import re
@@ -139,6 +140,7 @@ class Vc(object):
             raise ValueError
         self._tree_cache = {}
         self._tree_meta_cache = {}
+        self._tree_missing_cache = collections.defaultdict(set)
 
     def get_files_to_commit(self, paths):
         """Get a list of files that will be committed from paths
@@ -244,6 +246,7 @@ class Vc(object):
         """
         if path is None:
             self._tree_cache = {}
+            self._tree_missing_cache = collections.defaultdict(set)
             path = './'
         self._update_tree_state_cache(path)
 
@@ -265,15 +268,19 @@ class Vc(object):
             yield Entry(path, name, state, isdir, options=meta)
 
         # Removed entries are not in the filesystem, so must be added here
-        for path, state in self._tree_cache.items():
-            if state in (STATE_REMOVED, STATE_MISSING):
-                folder, name = os.path.split(path)
-                if folder == base:
-                    # TODO: Ideally we'd know whether this was a folder
-                    # or a file. Since it's gone however, only the VC
-                    # knows, and may or may not tell us.
-                    meta = self._tree_meta_cache.get(path, "")
-                    yield Entry(path, name, state, isdir=False, options=meta)
+        for name in self._tree_missing_cache[base]:
+            path = os.path.join(base, name)
+            state = self._tree_cache.get(path, STATE_NORMAL)
+            # TODO: Ideally we'd know whether this was a folder
+            # or a file. Since it's gone however, only the VC
+            # knows, and may or may not tell us.
+            meta = self._tree_meta_cache.get(path, "")
+            yield Entry(path, name, state, isdir=False, options=meta)
+
+    def _add_missing_cache_entry(self, path, state):
+        if state in (STATE_REMOVED, STATE_MISSING):
+            folder, name = os.path.split(path)
+            self._tree_missing_cache[folder].add(name)
 
     def get_entry(self, path):
         """Return the entry associated with the given path in this VC
diff --git a/meld/vc/bzr.py b/meld/vc/bzr.py
index 9edd3a3..3eb1eb5 100644
--- a/meld/vc/bzr.py
+++ b/meld/vc/bzr.py
@@ -204,6 +204,8 @@ class Vc(_vc.Vc):
             path = path[:-1] if path.endswith('/') else path
             tree_cache[path].update(states)
             tree_meta_cache[path].extend(meta)
+            # Bazaar entries will only be REMOVED in the second state column
+            self._add_missing_cache_entry(path, state2)
 
         # Handle any renames now
         for old, new in rename_cache.items():
diff --git a/meld/vc/git.py b/meld/vc/git.py
index 65742e1..17ce012 100644
--- a/meld/vc/git.py
+++ b/meld/vc/git.py
@@ -345,6 +345,8 @@ class Vc(_vc.Vc):
                 old_mode, new_mode, old_sha, new_sha, statekey, path = columns
                 state = self.state_map.get(statekey.strip(), _vc.STATE_NONE)
                 self._tree_cache[get_real_path(path)] = state
+                # Git entries can't be MISSING; that's just an unstaged REMOVED
+                self._add_missing_cache_entry(path, state)
                 if old_mode != new_mode:
                     msg = _("Mode changed from %s to %s" %
                             (old_mode, new_mode))
diff --git a/meld/vc/mercurial.py b/meld/vc/mercurial.py
index 5cbf6c5..03dadfd 100644
--- a/meld/vc/mercurial.py
+++ b/meld/vc/mercurial.py
@@ -121,3 +121,4 @@ class Vc(_vc.Vc):
                 path = os.path.join(self.location, name.strip())
                 state = self.state_map.get(statekey.strip(), _vc.STATE_NONE)
                 self._tree_cache[path] = state
+                self._add_missing_cache_entry(path, state)
diff --git a/meld/vc/svn.py b/meld/vc/svn.py
index 68fb3d5..4cecb4f 100644
--- a/meld/vc/svn.py
+++ b/meld/vc/svn.py
@@ -215,3 +215,4 @@ class Vc(_vc.Vc):
                     rev = status.attrib.get("revision")
                     rev_label = _("Rev %s") % rev if rev is not None else ''
                     self._tree_meta_cache[path] = rev_label
+                    self._add_missing_cache_entry(path, state)


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]