r7152 - bigboard/trunk/bigboard/stocks/mail
- From: commits mugshot org
- To: online-desktop-list gnome org
- Subject: r7152 - bigboard/trunk/bigboard/stocks/mail
- Date: Tue, 8 Jan 2008 16:47:08 -0600 (CST)
Author: otaylor
Date: 2008-01-08 16:47:07 -0600 (Tue, 08 Jan 2008)
New Revision: 7152
Modified:
bigboard/trunk/bigboard/stocks/mail/MailStock.py
Log:
MailStock.py: Rewrite convert_entities to be more efficient
Modified: bigboard/trunk/bigboard/stocks/mail/MailStock.py
===================================================================
--- bigboard/trunk/bigboard/stocks/mail/MailStock.py 2008-01-08 22:26:17 UTC (rev 7151)
+++ bigboard/trunk/bigboard/stocks/mail/MailStock.py 2008-01-08 22:47:07 UTC (rev 7152)
@@ -27,23 +27,36 @@
s = s.replace("\\u003c/b\\>", e)
return s
+_CONVERT_ENTITIES_RE = re.compile("&(?:(#[0-9]+)|(#x[0-9A-Fa-f]+)|([A-Za-z]+));")
+
+def _convert_entity(m):
+ try:
+ if m.group(1) is not None:
+ return unichr(int(m.group(1)[1:]))
+ elif m.group(2) is not None:
+ return unichr(int(m.group(2)[2:], 16))
+ else:
+ return unichr(htmlentitydefs.name2codepoint[m.group(3)])
+ except ValueError:
+ return m.group(0)
+ except KeyError:
+ return m.group(0)
+ except OverflowError:
+ return m.group(0)
+
def convert_entities(s):
- exp = re.compile("&[#a-zA-Z0-9]*;")
- for match in exp.finditer(s):
- if match is not None:
- html_entity = match.group()
- try:
- if html_entity[1] == '#':
- entity_num = int(html_entity[2:-1])
- replacement_entity = unichr(entity_num)
- else:
- entity_str = html_entity[1:-1]
- replacement_entity = unichr(htmlentitydefs.name2codepoint[entity_str])
- s = s.replace(html_entity, replacement_entity)
- except KeyError:
- pass
- return s
+ """Replace standard HTML entities and numeric character references in the string"""
+ return _CONVERT_ENTITIES_RE.sub(_convert_entity, s)
+# assert convert_entities("&") == "&"
+# assert convert_entities("&foo<") == "&foo<"
+# assert convert_entities("A") == "A"
+# assert convert_entities("A") == "A"
+# assert convert_entities("&zzz_amp;") == "&zzz_amp;" # not something we parse as an entity
+# assert convert_entities("&zzzamp;") == "&zzzamp;" # unknown entity
+# assert convert_entities("�") == "�" # not a unicode character
+# assert convert_entities("�") == "�" # overflow
+
class LabelSlideout(ThemedSlideout):
__gsignals__ = {
'changed' : (gobject.SIGNAL_RUN_LAST, gobject.TYPE_NONE, (gobject.TYPE_STRING, )),
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]