[meld/Python3: 52/54] filters: Maintain a byte-based regex filter for folder comparison



commit 356ca3817b1212faa49eaefdcbe4f70421593d1a
Author: Kai Willadsen <kai willadsen gmail com>
Date:   Mon May 2 08:07:43 2016 +1000

    filters: Maintain a byte-based regex filter for folder comparison
    
    Because the folder comparison text filter application must work without
    decoding the file it's operating on, we need it to be bytestring based.
    This patch just maintains the existed unicode-style behaviour for file
    comparisons, but adds a parallel byte-based regex for use in folder
    comparisons.

 meld/dirdiff.py |    8 ++++----
 meld/filters.py |   40 +++++++++++++++++++++++++++++++++++-----
 2 files changed, 39 insertions(+), 9 deletions(-)
---
diff --git a/meld/dirdiff.py b/meld/dirdiff.py
index 50d6242..708d3b0 100644
--- a/meld/dirdiff.py
+++ b/meld/dirdiff.py
@@ -189,10 +189,10 @@ def _files_same(files, regexes, comparison_args):
         result = Same
 
     if result == Different and need_contents:
-        contents = ["".join(c) for c in contents]
+        contents = [b"".join(c) for c in contents]
         # For probable text files, discard newline differences to match
         # file comparisons.
-        contents = ["\n".join(c.splitlines()) for c in contents]
+        contents = [b"\n".join(c.splitlines()) for c in contents]
 
         contents = [misc.apply_text_filters(c, regexes) for c in contents]
 
@@ -1266,7 +1266,7 @@ class DirDiff(melddoc.MeldDoc, gnomeglade.Component):
         """
         assert len(roots) == self.model.ntree
         ret = []
-        regexes = [f.filter for f in self.text_filters if f.active]
+        regexes = [f.byte_filter for f in self.text_filters if f.active]
         for files in fileslist:
             curfiles = [ os.path.join( r, f ) for r,f in zip(roots,files) ]
             is_present = [ os.path.exists( f ) for f in curfiles ]
@@ -1290,7 +1290,7 @@ class DirDiff(melddoc.MeldDoc, gnomeglade.Component):
         """Update the state of the item at 'it'
         """
         files = self.model.value_paths(it)
-        regexes = [f.filter for f in self.text_filters if f.active]
+        regexes = [f.byte_filter for f in self.text_filters if f.active]
 
         def stat(f):
             try:
diff --git a/meld/filters.py b/meld/filters.py
index 6b2df5e..d6d5ea3 100644
--- a/meld/filters.py
+++ b/meld/filters.py
@@ -20,14 +20,15 @@ from . import misc
 
 class FilterEntry(object):
 
-    __slots__ = ("label", "active", "filter", "filter_string")
+    __slots__ = ("label", "active", "filter", "byte_filter", "filter_string")
 
     REGEX, SHELL = 0, 1
 
-    def __init__(self, label, active, filter, filter_string):
+    def __init__(self, label, active, filter, byte_filter, filter_string):
         self.label = label
         self.active = active
         self.filter = filter
+        self.byte_filter = byte_filter
         self.filter_string = filter_string
 
     @classmethod
@@ -39,6 +40,19 @@ class FilterEntry(object):
         return compiled
 
     @classmethod
+    def _compile_byte_regex(cls, regex):
+        if not isinstance(regex, bytes):
+            # TODO: Register a custom error handling function to replace
+            # encoding errors with '.'?
+            regex = regex.encode('utf8', 'replace')
+
+        try:
+            compiled = re.compile(regex + b"(?m)")
+        except re.error:
+            compiled = None
+        return compiled
+
+    @classmethod
     def _compile_shell_pattern(cls, pattern):
         bits = pattern.split()
         if len(bits) > 1:
@@ -67,7 +81,8 @@ class FilterEntry(object):
         compiled = FilterEntry.compile_filter(filter_string, filter_type)
         if compiled is None:
             active = False
-        return FilterEntry(name, active, compiled, filter_string)
+        byte_filt = FilterEntry.compile_byte_filter(filter_string, filter_type)
+        return FilterEntry(name, active, compiled, byte_filt, filter_string)
 
     @classmethod
     def new_from_gsetting(cls, elements, filter_type):
@@ -75,7 +90,8 @@ class FilterEntry(object):
         compiled = FilterEntry.compile_filter(filter_string, filter_type)
         if compiled is None:
             active = False
-        return FilterEntry(name, active, compiled, filter_string)
+        byte_filt = FilterEntry.compile_byte_filter(filter_string, filter_type)
+        return FilterEntry(name, active, compiled, byte_filt, filter_string)
 
     @classmethod
     def compile_filter(cls, filter_string, filter_type):
@@ -87,8 +103,22 @@ class FilterEntry(object):
             raise ValueError("Unknown filter type")
         return compiled
 
+    @classmethod
+    def compile_byte_filter(cls, filter_string, filter_type):
+        if filter_type == FilterEntry.REGEX:
+            compiled = FilterEntry._compile_byte_regex(filter_string)
+        elif filter_type == FilterEntry.SHELL:
+            compiled = None
+        else:
+            raise ValueError("Unknown filter type")
+        return compiled
+
     def __copy__(self):
-        new = type(self)(self.label, self.active, None, self.filter_string)
+        new = type(self)(
+            self.label, self.active, None, None, self.filter_string)
         if self.filter is not None:
             new.filter = re.compile(self.filter.pattern, self.filter.flags)
+        if self.byte_filter is not None:
+            new.byte_filter = re.compile(
+                self.byte_filter.pattern, self.byte_filter.flags)
         return new


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]