[kupfer] textutils: doctest for extracting title
- From: Ulrik Sverdrup <usverdrup src gnome org>
- To: svn-commits-list gnome org
- Cc:
- Subject: [kupfer] textutils: doctest for extracting title
- Date: Sat, 16 Jan 2010 22:21:14 +0000 (UTC)
commit c1d89a109f215b38ffb094d5c286b411c2d110da
Author: Ulrik Sverdrup <ulrik sverdrup gmail com>
Date: Sat Jan 16 22:06:01 2010 +0100
textutils: doctest for extracting title
kupfer/textutils.py | 23 ++++++++++++++++++++++-
1 files changed, 22 insertions(+), 1 deletions(-)
---
diff --git a/kupfer/textutils.py b/kupfer/textutils.py
index d0a13e8..b15f9be 100644
--- a/kupfer/textutils.py
+++ b/kupfer/textutils.py
@@ -1,3 +1,4 @@
+# encoding: utf-8
def _unicode_truncate(ustr, length, encoding="UTF-8"):
"Truncate @ustr to specific encoded byte length"
@@ -5,7 +6,7 @@ def _unicode_truncate(ustr, length, encoding="UTF-8"):
return bstr.decode(encoding, 'ignore')
def extract_title_body(text, maxtitlelen=60):
- """Prepare @text: Return a (title, body) tuple
+ u"""Prepare @text: Return a (title, body) tuple
@text: A user-submitted paragraph or otherwise snippet of text. We
try to detect an obvious title and then return the title and the
@@ -15,7 +16,19 @@ def extract_title_body(text, maxtitlelen=60):
@maxtitlelen: A unitless measure of approximate length of title.
The default value yields a resulting title of approximately 60 ascii
characters, or 20 asian characters.
+
+ >>> extract_title_body(u"Short Text")
+ (u'Short Text', u'')
+
+ >>> title, body = extract_title_body(u"å?·ç?æ?¹é??ã?«ã?¤ã??ã?¦ã?¯ã??é ?ç?®å??ã?®ä»?ã??æ?¹ã??"
+ ... "ã??ã?©ã?¼ã??ã??ã??ã??表è¨?ä¸?ã?®è«¸å??é¡?ã?«é?¢ã??ã?¦å¤?ã??ã?®æ?¹é??ã??å?å?¨ã??ã?¦ã??ã??ã??")
+ >>> print title
+ å?·ç?æ?¹é??ã?«ã?¤ã??ã?¦ã?¯ã??é ?ç?®å??ã?®ä»?ã??æ?¹ã??ã??ã?©
+ >>> print body # doctest: +ELLIPSIS
+ å?·ç?æ?¹é??ã?«ã?¤ã??ã?¦ã?¯ã??é ?ç?®å??ã?®ä»?ã??æ?¹ã??ã??ã?©...ã??ã?¦å¤?ã??ã?®æ?¹é??ã??å?å?¨ã??ã?¦ã??ã??ã??
"""
+ # if you don't make real tests, it's not not worth doing it at all.
+
if not text.strip():
return text, u""
@@ -55,3 +68,11 @@ def extract_title_body(text, maxtitlelen=60):
else:
return text, u""
+if __name__ == '__main__':
+ # unicode doctest hack
+ import sys
+ reload(sys)
+ sys.setdefaultencoding("UTF-8")
+
+ import doctest
+ doctest.testmod()
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]