[meld] Cache inline-diff results to avoid rediffing blocks (closes bgo#168139)



commit 40b26c55fff796d85e6225edee6c79bed7b0fc69
Author: Kai Willadsen <kai willadsen gmail com>
Date:   Fri Jul 17 12:58:39 2009 +1000

    Cache inline-diff results to avoid rediffing blocks (closes bgo#168139)
    
    Calculation of inline diffs is one of the most expensive things we do,
    but most changes to the diffed text result in very few changed blocks.
    This patch adds a simple caching mechanism to avoid constantly rediffing
    unchanged blocks. Currently, blocks are evicted from the cache on a LRU
    basis, which isn't perfect but works reasonably well.
    
    In combination with the previous patch, this closes bgo#168139.

 filediff.py |   40 ++++++++++++++++++++++++++++++++++++++--
 1 files changed, 38 insertions(+), 2 deletions(-)
---
diff --git a/filediff.py b/filediff.py
index e91c39b..25a5c86 100644
--- a/filediff.py
+++ b/filediff.py
@@ -20,6 +20,7 @@ from gettext import gettext as _
 import re
 import difflib
 import struct
+import time
 
 import pango
 import gobject
@@ -38,6 +39,40 @@ from sourceviewer import srcviewer
 
 gdk = gtk.gdk
 
+
+class CachedSequenceMatcher(object):
+    """Simple class for caching diff results, with LRU-based eviction
+
+    Results from the SequenceMatcher are cached and timestamped, and
+    subsequently evicted based on least-recent generation/usage. The LRU-based
+    eviction is overly simplistic, but is okay for our usage pattern.
+    """
+
+    def __init__(self):
+        self.cache = {}
+
+    def __call__(self, text1, textn):
+        try:
+            self.cache[(text1, textn)][1] = time.time()
+            return self.cache[(text1, textn)][0]
+        except KeyError:
+            matcher = difflib.SequenceMatcher(None, text1, textn)
+            opcodes = matcher.get_opcodes()
+            self.cache[(text1, textn)] = [opcodes, time.time()]
+            return opcodes
+
+    def clean(self, size_hint):
+        """Clean the cache if necessary
+
+        @param size_hint: the recommended minimum number of cache entries
+        """
+        if len(self.cache) < size_hint * 3:
+            return
+        items = self.cache.items()
+        items.sort(key=lambda it: it[1][1])
+        for item in items[:-size_hint * 2]:
+            del self.cache[item[0]]
+
 ################################################################################
 #
 # FileDiff
@@ -97,6 +132,7 @@ class FileDiff(melddoc.MeldDoc, gnomeglade.Component):
         self._sync_hscroll_lock = False
         self.linediffer = diffutil.Differ()
         self._inline_cache = set()
+        self._cached_match = CachedSequenceMatcher()
         for text in self.textview:
             text.set_wrap_mode( self.prefs.edit_wrap_lines )
         for buf in self.textbuffer:
@@ -666,10 +702,9 @@ class FileDiff(melddoc.MeldDoc, gnomeglade.Component):
                             bufs[i].apply_tag(tags[i], starts[i], ends[i])
                         continue
 
-                    matcher = difflib.SequenceMatcher(None, text1, textn)
                     #print "<<<\n%s\n---\n%s\n>>>" % (text1, textn)
                     back = (0,0)
-                    for o in matcher.get_opcodes():
+                    for o in self._cached_match(text1, textn):
                         if o[0] == "equal":
                             if (o[2]-o[1] < 3) or (o[4]-o[3] < 3):
                                 back = o[4]-o[3], o[2]-o[1]
@@ -686,6 +721,7 @@ class FileDiff(melddoc.MeldDoc, gnomeglade.Component):
         for b, tag, start in zip(self.textbuffer, alltags, startlines):
             b.remove_tag(tag, start, b.get_end_iter())
         self._inline_cache = newcache
+        self._cached_match.clean(len(self._inline_cache))
 
     def on_textview_expose_event(self, textview, event):
         if self.num_panes == 1:



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]