[meld] meldbuffer, misc: Decode file labels use filesystem + UTF8 (bgo#750826)



commit ca28a9c52e62f68ceb56527afb866d7a3e18462f
Author: Kai Willadsen <kai willadsen gmail com>
Date:   Sat Jun 20 08:19:35 2015 +1000

    meldbuffer, misc: Decode file labels use filesystem + UTF8 (bgo#750826)
    
    Previously we didn't do anything with file labels, so if we got a label
    that wasn't ASCII, we would just throw. Since labels are not critical,
    and can be user-specified, this changes the behaviour to try first the
    filesystem encoding, then UTF-8, and then UTF-8 with replacement.

 meld/meldbuffer.py |   10 +++++++---
 meld/misc.py       |   22 ++++++++++++++++++++++
 2 files changed, 29 insertions(+), 3 deletions(-)
---
diff --git a/meld/meldbuffer.py b/meld/meldbuffer.py
index eba1e3b..9c89999 100644
--- a/meld/meldbuffer.py
+++ b/meld/meldbuffer.py
@@ -24,6 +24,7 @@ from gi.repository import GObject
 from gi.repository import GtkSource
 
 from meld.conf import _
+from meld.misc import fallback_decode
 from meld.settings import bind_settings, meldsettings
 from meld.util.compat import text_type
 
@@ -113,7 +114,7 @@ class MeldBufferData(GObject.GObject):
     def __init__(self, filename=None):
         GObject.GObject.__init__(self)
         self.reset()
-        self._label = self.filename = filename
+        self.label = self.filename = filename
 
     def reset(self):
         self.modified = False
@@ -134,11 +135,14 @@ class MeldBufferData(GObject.GObject):
     @property
     def label(self):
         #TRANSLATORS: This is the label of a new, currently-unnamed file.
-        return self._label or _("<unnamed>")
+        return self._label or _(u"<unnamed>")
 
     @label.setter
     def label(self, value):
-        self._label = value
+        if not value:
+            return
+        encodings = (sys.getfilesystemencoding(), 'utf8')
+        self._label = fallback_decode(value, encodings, lossy=True)
 
     def _connect_monitor(self):
         if self._filename:
diff --git a/meld/misc.py b/meld/misc.py
index c784934..7cca650 100644
--- a/meld/misc.py
+++ b/meld/misc.py
@@ -145,6 +145,28 @@ def gdk_to_cairo_color(color):
     return (color.red / 65535., color.green / 65535., color.blue / 65535.)
 
 
+def fallback_decode(bytes, encodings, lossy=False):
+    """Try and decode bytes according to multiple encodings
+
+    Generally, this should be used for best-effort decoding, when the
+    desired behaviour is "probably this, or UTF-8".
+
+    If lossy is True, then decode errors will be replaced. This may be
+    reasonable when the string is for display only.
+    """
+    for encoding in encodings:
+        try:
+            return bytes.decode(encoding)
+        except UnicodeDecodeError:
+            pass
+
+    if lossy:
+        return bytes.decode(encoding, errors='replace')
+
+    raise ValueError(
+        "Couldn't decode %r as one of %r" % (bytes, encodings))
+
+
 def all_same(lst):
     """Return True if all elements of the list are equal"""
     return not lst or lst.count(lst[0]) == len(lst)


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]