[kupfer] kupferstring: More correct unicode handling



commit 4ca263d2eea7bcd50208dbc06b77a910d8e534a5
Author: Ulrik Sverdrup <ulrik sverdrup gmail com>
Date:   Tue Jan 19 13:33:07 2010 +0100

    kupferstring: More correct unicode handling
    
    In a somewhat strange discovery, importing gtk means that we can call
    .decode("UTF-8") on a unicode object without error. However it is not
    correct.

 kupfer/kupferstring.py |    8 ++++++--
 1 files changed, 6 insertions(+), 2 deletions(-)
---
diff --git a/kupfer/kupferstring.py b/kupfer/kupferstring.py
index 31c3e16..73c7d64 100644
--- a/kupfer/kupferstring.py
+++ b/kupfer/kupferstring.py
@@ -31,6 +31,8 @@ def tounicode(utf8str):
 	"""Return `unicode` from UTF-8 encoded @utf8str
 	This is to use the same error handling etc everywhere
 	"""
+	if isinstance(utf8str, unicode):
+		return utf8str
 	return utf8str.decode("UTF-8", "replace") if utf8str is not None else u""
 
 def toutf8(ustr):
@@ -40,17 +42,19 @@ def toutf8(ustr):
 	"""
 	if isinstance(ustr, str):
 		return ustr
-	return ustr.encode("UTF-8", "replace")
+	return ustr.encode("UTF-8")
 
 def fromlocale(lstr):
 	"""Return a unicode string from locale bytestring @lstr"""
+	assert isinstance(lstr, str)
 	enc = locale.getpreferredencoding(do_setlocale=False)
 	return lstr.decode(enc, "replace")
 
 def tolocale(ustr):
 	"""Return a locale-encoded bytestring from unicode @ustr"""
+	assert isinstance(ustr, unicode)
 	enc = locale.getpreferredencoding(do_setlocale=False)
-	return ustr.encode(enc, "replace")
+	return ustr.encode(enc)
 
 
 def tofolded(ustr):



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]