bigboard r7294 - trunk/bigboard/stocks/mail
- From: walters svn gnome org
- To: svn-commits-list gnome org
- Subject: bigboard r7294 - trunk/bigboard/stocks/mail
- Date: Fri, 11 Apr 2008 22:47:08 +0100 (BST)
Author: walters
Date: Fri Apr 11 22:47:08 2008
New Revision: 7294
URL: http://svn.gnome.org/viewvc/bigboard?rev=7294&view=rev
Log:
526827: Fix GMail escape characters
Modified:
trunk/bigboard/stocks/mail/MailStock.py
Modified: trunk/bigboard/stocks/mail/MailStock.py
==============================================================================
--- trunk/bigboard/stocks/mail/MailStock.py (original)
+++ trunk/bigboard/stocks/mail/MailStock.py Fri Apr 11 22:47:08 2008
@@ -1,7 +1,12 @@
-import logging, re, htmlentitydefs, time, urllib2
+import logging, re, time, urllib2
import gobject, gtk
import hippo
+import xml.dom.minidom
+from xml.dom.minidom import Node
+from StringIO import StringIO
+
+import simplejson
from bigboard.stock import Stock
from bigboard.slideout import ThemedSlideout
@@ -16,47 +21,37 @@
_logger = logging.getLogger('bigboard.stocks.MailStock')
-def remove_strange_tags(s, markup=False):
- if "\\u003cb\\>" in s:
- if markup == True:
- b = "<b>"
- e = "</b>"
- else:
- b = ""
- e = ""
- s = s.replace("\\u003cb\\>", b)
- s = s.replace("\\u003c/b\\>", e)
- return s
-
-_CONVERT_ENTITIES_RE = re.compile("&(?:(#[0-9]+)|(#x[0-9A-Fa-f]+)|([A-Za-z]+));")
-
-def _convert_entity(m):
- try:
- if m.group(1) is not None:
- return unichr(int(m.group(1)[1:]))
- elif m.group(2) is not None:
- return unichr(int(m.group(2)[2:], 16))
+def replace_chr(m):
+ return unichr(int(m.group(1), 16))
+UNICHR_REPLACE = re.compile(r"\\u([A-F-a-f0-9]{4})")
+
+def gmail_jshtml_str_parse(s, markup=False):
+ s = s.replace(r'\>', '>')
+ s = s.replace(r'\<', '<')
+ parsed_str = UNICHR_REPLACE.sub(replace_chr, s)
+ # At this point, we have a Python unicode string which *should* hold
+ # an XML fragment. Convert that fragment into a document string.
+ pystr = "<html>" + parsed_str + "</html>"
+ # Parse that document string into a DOM.
+ dom = xml.dom.minidom.parseString(pystr)
+ textContent = StringIO()
+ # Now we parse the XML, only allowing the bold tag through, and eating everything else
+ def DomToText(node):
+ if node.nodeType == Node.TEXT_NODE:
+ textContent.write(node.data)
+ if markup and node.nodeType == Node.ELEMENT_NODE and node.nodeName == 'b':
+ in_bold = True
+ textContent.write('<b>')
else:
- return unichr(htmlentitydefs.name2codepoint[m.group(3)])
- except ValueError:
- return m.group(0)
- except KeyError:
- return m.group(0)
- except OverflowError:
- return m.group(0)
-
-def convert_entities(s):
- """Replace standard HTML entities and numeric character references in the string"""
- return _CONVERT_ENTITIES_RE.sub(_convert_entity, s)
-
-# assert convert_entities("&") == "&"
-# assert convert_entities("&foo<") == "&foo<"
-# assert convert_entities("A") == "A"
-# assert convert_entities("A") == "A"
-# assert convert_entities("&zzz_amp;") == "&zzz_amp;" # not something we parse as an entity
-# assert convert_entities("&zzzamp;") == "&zzzamp;" # unknown entity
-# assert convert_entities("�") == "�" # not a unicode character
-# assert convert_entities("�") == "�" # overflow
+ in_bold = False
+ if node.hasChildNodes():
+ for child in node.childNodes:
+ DomToText(child)
+ if in_bold:
+ textContent.write('</b>')
+ DomToText(dom.documentElement)
+ # Return the sanely filtered content
+ return textContent.getvalue()
class LabelSlideout(ThemedSlideout):
__gsignals__ = {
@@ -94,7 +89,7 @@
self.__header = Header(topborder=False)
self.id = thread.id
- subject = remove_strange_tags(thread.subject)
+ subject = gmail_jshtml_str_parse(thread.subject)
subject_box = hippo.CanvasText(classes='header', text=subject)
self.__header.append(subject_box, hippo.PACK_EXPAND)
@@ -106,9 +101,8 @@
if type(value) is list:
s = ", ".join(value)
if type(value) is str:
- s = remove_strange_tags(value)
-
- s = convert_entities(s)
+ s = gmail_jshtml_str_parse(value)
+
box = hippo.CanvasText(text=s, xalign=hippo.ALIGNMENT_START)
vbox.append(box)
@@ -189,7 +183,7 @@
for thread in threads:
if i >= self.__display_limit: break
- subject = remove_strange_tags(thread.subject, True)
+ subject = gmail_jshtml_str_parse(thread.subject, True)
box = PrelightingCanvasBox()
box.connect("button-press-event", self.create_email_slideout, thread)
@@ -246,3 +240,10 @@
def __on_more_button(self):
libbig.show_url("http://mail.google.com/mail")
+
+if __name__ == '__main__':
+ # We want to keep bold tags
+ assert gmail_jshtml_str_parse(r'test \u003cb\>hi\u003c/b\> moo', True) == 'test <b>hi</b> moo'
+ # Strip unknown tag "A"
+ assert gmail_jshtml_str_parse(r'test \u003ca\>hi\u003c/a\> moo', True) == 'test hi moo'
+
\ No newline at end of file
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]