[kupfer] Introduce kupferstring: String functions



commit d30cec07fcf0c82abda5d04926072c79e16f33d6
Author: Ulrik Sverdrup <ulrik sverdrup gmail com>
Date:   Fri Sep 11 14:47:19 2009 +0200

    Introduce kupferstring: String functions
    
    Put to/from unicode coding functions in kupferstring, as well as
    tofolded which will fold a unicode string.

 kupfer/kupferstring.py |   45 +++++++++++++++++++++++++++++++++++++++++++++
 kupfer/objects.py      |   20 +++++---------------
 2 files changed, 50 insertions(+), 15 deletions(-)
---
diff --git a/kupfer/kupferstring.py b/kupfer/kupferstring.py
new file mode 100644
index 0000000..de15130
--- /dev/null
+++ b/kupfer/kupferstring.py
@@ -0,0 +1,45 @@
+# -*- encoding: UTF-8 -*-
+
+import unicodedata
+from unicodedata import normalize, category
+
+def _folditems():
+	_folding_table = {
+		u"Å?" : u"l",
+		u"æ" : u"ae",
+		u"ø" : u"o",
+		u"Å?" : u"oe",
+		u"ð" : u"d",
+		u"þ" : u"th",
+		u"Ã?" : u"ss",
+	}
+
+	for c, rep in _folding_table.iteritems():
+		yield (ord(c.upper()), rep.upper())
+		yield (ord(c), rep)
+
+folding_table = dict(_folditems())
+
+def tounicode(utf8str):
+	"""Return `unicode` from UTF-8 encoded @utf8str
+	This is to use the same error handling etc everywhere
+	"""
+	return utf8str.decode("UTF-8", "replace") if utf8str is not None else u""
+
+def toutf8(ustr):
+	"""Return UTF-8 `str` from unicode @ustr
+	This is to use the same error handling etc everywhere
+	if ustr is `str`, just return it
+	"""
+	if isinstance(ustr, str):
+		return ustr
+	return ustr.encode("UTF-8", "replace")
+
+def tofolded(ustr):
+	"""Return a search-folded string"""
+	# Replace characters with folding_table, then
+	# decompose the string into combining chars representation,
+	# strip those and join up the result
+	srcstr = normalize("NFKD", ustr.translate(folding_table))
+	return u"".join(c for c in srcstr if category(c) != 'Mn')
+
diff --git a/kupfer/objects.py b/kupfer/objects.py
index 957332b..42a7673 100644
--- a/kupfer/objects.py
+++ b/kupfer/objects.py
@@ -18,6 +18,7 @@ from kupfer import pretty
 from kupfer import icons, launch, utils
 from kupfer.utils import locale_sort
 from kupfer.helplib import PicklingHelperMixin, FilesystemWatchMixin
+from kupfer.kupferstring import tounicode, toutf8, tofolded
 
 class Error (Exception):
 	pass
@@ -30,21 +31,6 @@ class InvalidLeafError (Error):
 	"""The Leaf passed to an Action is invalid"""
 	pass
 
-def tounicode(utf8str):
-	"""Return `unicode` from UTF-8 encoded @utf8str
-	This is to use the same error handling etc everywhere
-	"""
-	return utf8str.decode("UTF-8", "replace") if utf8str is not None else u""
-
-def toutf8(ustr):
-	"""Return UTF-8 `str` from unicode @ustr
-	This is to use the same error handling etc everywhere
-	if ustr is `str`, just return it
-	"""
-	if isinstance(ustr, str):
-		return ustr
-	return ustr.encode("UTF-8", "replace")
-
 class KupferObject (object):
 	"""
 	Base class for kupfer data model
@@ -68,6 +54,10 @@ class KupferObject (object):
 		if not name:
 			name = self.__class__.__name__
 		self.name = tounicode(name)
+		folded_name = tofolded(self.name)
+		self.name_aliases = set()
+		if folded_name != self.name:
+			self.name_aliases.add(folded_name)
 
 	def __str__(self):
 		return toutf8(self.name)



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]