[meld] Add post-processing cleanup algorithm to MyersSequenceMatcher



commit f75743632e1615960f5080e8a4bc9232703a9397
Author: Piotr Piastucki <the_leech users berlios de>
Date:   Fri Jan 28 10:57:05 2011 +0100

    Add post-processing cleanup algorithm to MyersSequenceMatcher
    
    While there may exist several optimal edit scripts some of them are
    more suitable for human use. The goal of a post-processing cleanup
    algorithm is to provide more meaningful results by reducing 'chaff'
    in diff algorithm output. This commit adds a simple cleanup algorithm
    that combines small commonalities together in order to yield more
    human-friendly yet optimal results.

 meld/matchers.py |   23 +++++++++++++++++++++++
 1 files changed, 23 insertions(+), 0 deletions(-)
---
diff --git a/meld/matchers.py b/meld/matchers.py
index 5393de0..19507fe 100644
--- a/meld/matchers.py
+++ b/meld/matchers.py
@@ -133,6 +133,28 @@ class MyersSequenceMatcher(difflib.SequenceMatcher):
                 b = b2
         return (a, b)
 
+    def postprocess(self):
+        mb = [self.matching_blocks[-1]]
+        i = len(self.matching_blocks) - 2
+        while i >= 0:
+            cur_a, cur_b, cur_len = self.matching_blocks[i]
+            i -= 1
+            while i >= 0:
+                prev_a, prev_b, prev_len = self.matching_blocks[i]
+                if prev_b + prev_len == cur_b or prev_a + prev_len == cur_a:
+                    prev_slice_a = self.a[cur_a - prev_len:cur_a]
+                    prev_slice_b = self.b[cur_b - prev_len:cur_b]
+                    if prev_slice_a == prev_slice_b:
+                        cur_b -= prev_len
+                        cur_a -= prev_len
+                        cur_len += prev_len
+                        i -= 1
+                        continue
+                break
+            mb.append((cur_a, cur_b, cur_len))
+        mb.reverse()
+        self.matching_blocks = mb
+
     def build_matching_blocks(self, lastsnake, snakes):
         """
         Build list of matching blocks based on snakes taking into consideration all preprocessing
@@ -263,4 +285,5 @@ class MyersSequenceMatcher(difflib.SequenceMatcher):
                     lastsnake = node
                     break
         self.build_matching_blocks(lastsnake, snakes)
+        self.postprocess()
         yield 1



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]