[kupfer] textutils: doctest for extracting title



commit c1d89a109f215b38ffb094d5c286b411c2d110da
Author: Ulrik Sverdrup <ulrik sverdrup gmail com>
Date:   Sat Jan 16 22:06:01 2010 +0100

    textutils: doctest for extracting title

 kupfer/textutils.py |   23 ++++++++++++++++++++++-
 1 files changed, 22 insertions(+), 1 deletions(-)
---
diff --git a/kupfer/textutils.py b/kupfer/textutils.py
index d0a13e8..b15f9be 100644
--- a/kupfer/textutils.py
+++ b/kupfer/textutils.py
@@ -1,3 +1,4 @@
+# encoding: utf-8
 
 def _unicode_truncate(ustr, length, encoding="UTF-8"):
 	"Truncate @ustr to specific encoded byte length"
@@ -5,7 +6,7 @@ def _unicode_truncate(ustr, length, encoding="UTF-8"):
 	return bstr.decode(encoding, 'ignore')
 
 def extract_title_body(text, maxtitlelen=60):
-	"""Prepare @text: Return a (title, body) tuple
+	u"""Prepare @text: Return a (title, body) tuple
 
 	@text: A user-submitted paragraph or otherwise snippet of text. We
 	try to detect an obvious title and then return the title and the
@@ -15,7 +16,19 @@ def extract_title_body(text, maxtitlelen=60):
 	@maxtitlelen: A unitless measure of approximate length of title.
 	The default value yields a resulting title of approximately 60 ascii
 	characters, or 20 asian characters.
+
+	>>> extract_title_body(u"Short Text")
+	(u'Short Text', u'')
+
+	>>> title, body = extract_title_body(u"å?·ç­?æ?¹é??ã?«ã?¤ã??ã?¦ã?¯ã??é ?ç?®å??ã?®ä»?ã??æ?¹ã??"
+	...     "ã??ã?©ã?¼ã??ã??ã??ã??表è¨?ä¸?ã?®è«¸å??é¡?ã?«é?¢ã??ã?¦å¤?ã??ã?®æ?¹é??ã??å­?å?¨ã??ã?¦ã??ã??ã??")
+	>>> print title
+	å?·ç­?æ?¹é??ã?«ã?¤ã??ã?¦ã?¯ã??é ?ç?®å??ã?®ä»?ã??æ?¹ã??ã??ã?©
+	>>> print body			# doctest: +ELLIPSIS
+	å?·ç­?æ?¹é??ã?«ã?¤ã??ã?¦ã?¯ã??é ?ç?®å??ã?®ä»?ã??æ?¹ã??ã??ã?©...ã??ã?¦å¤?ã??ã?®æ?¹é??ã??å­?å?¨ã??ã?¦ã??ã??ã??
 	"""
+	# if you don't make real tests, it's not not worth doing it at all.
+
 	if not text.strip():
 		return text, u""
 
@@ -55,3 +68,11 @@ def extract_title_body(text, maxtitlelen=60):
 	else:
 		return text, u""
 
+if __name__ == '__main__':
+	# unicode doctest hack
+	import sys
+	reload(sys)
+	sys.setdefaultencoding("UTF-8")
+
+	import doctest
+	doctest.testmod()



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]