[meld] Add post-processing cleanup algorithm to MyersSequenceMatcher
- From: Kai Willadsen <kaiw src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [meld] Add post-processing cleanup algorithm to MyersSequenceMatcher
- Date: Sat, 5 Feb 2011 22:03:50 +0000 (UTC)
commit f75743632e1615960f5080e8a4bc9232703a9397
Author: Piotr Piastucki <the_leech users berlios de>
Date: Fri Jan 28 10:57:05 2011 +0100
Add post-processing cleanup algorithm to MyersSequenceMatcher
While there may exist several optimal edit scripts some of them are
more suitable for human use. The goal of a post-processing cleanup
algorithm is to provide more meaningful results by reducing 'chaff'
in diff algorithm output. This commit adds a simple cleanup algorithm
that combines small commonalities together in order to yield more
human-friendly yet optimal results.
meld/matchers.py | 23 +++++++++++++++++++++++
1 files changed, 23 insertions(+), 0 deletions(-)
---
diff --git a/meld/matchers.py b/meld/matchers.py
index 5393de0..19507fe 100644
--- a/meld/matchers.py
+++ b/meld/matchers.py
@@ -133,6 +133,28 @@ class MyersSequenceMatcher(difflib.SequenceMatcher):
b = b2
return (a, b)
+ def postprocess(self):
+ mb = [self.matching_blocks[-1]]
+ i = len(self.matching_blocks) - 2
+ while i >= 0:
+ cur_a, cur_b, cur_len = self.matching_blocks[i]
+ i -= 1
+ while i >= 0:
+ prev_a, prev_b, prev_len = self.matching_blocks[i]
+ if prev_b + prev_len == cur_b or prev_a + prev_len == cur_a:
+ prev_slice_a = self.a[cur_a - prev_len:cur_a]
+ prev_slice_b = self.b[cur_b - prev_len:cur_b]
+ if prev_slice_a == prev_slice_b:
+ cur_b -= prev_len
+ cur_a -= prev_len
+ cur_len += prev_len
+ i -= 1
+ continue
+ break
+ mb.append((cur_a, cur_b, cur_len))
+ mb.reverse()
+ self.matching_blocks = mb
+
def build_matching_blocks(self, lastsnake, snakes):
"""
Build list of matching blocks based on snakes taking into consideration all preprocessing
@@ -263,4 +285,5 @@ class MyersSequenceMatcher(difflib.SequenceMatcher):
lastsnake = node
break
self.build_matching_blocks(lastsnake, snakes)
+ self.postprocess()
yield 1
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]