[snowy] Port note XML->HTML transform to lxml library

From: Sanford Armstrong <sharm src gnome org>
To: commits-list gnome org
Cc:
Subject: [snowy] Port note XML->HTML transform to lxml library
Date: Mon, 4 Oct 2010 19:47:48 +0000 (UTC)
commit caa0360d6bd5431b4202e2d8c351488961f55e3d
Author: Sandy Armstrong <sanfordarmstrong gmail com>
Date:   Wed Sep 22 04:51:15 2010 -0700

    Port note XML->HTML transform to lxml library
    
    This appears to work around some sort of issue that developed
    on the GNOME server, resulting in hangs when parsing the XSLT.
    
    We'd been wanting to switch to lxml anyway, and this change
    appears to fix the problem.
    
    https://bugzilla.gnome.org/show_bug.cgi?id=631189

 data/note2xhtml.xsl |    5 ++-
 notes/views.py      |   77 ++++++++++++++++++++------------------------------
 2 files changed, 34 insertions(+), 48 deletions(-)
---
diff --git a/data/note2xhtml.xsl b/data/note2xhtml.xsl
index 7784816..abbdc5b 100644
--- a/data/note2xhtml.xsl
+++ b/data/note2xhtml.xsl
@@ -3,6 +3,7 @@
 		xmlns:tomboy="http://beatniksoftware.com/tomboy";
 		xmlns:size="http://beatniksoftware.com/tomboy/size";
 		xmlns:link="http://beatniksoftware.com/tomboy/link";
+		xmlns:tomboyonline="http://tomboy-online.org/stuff";
                 version='1.0'>
 
 <xsl:output method="html" indent="no" />
@@ -85,8 +86,8 @@
 
 <xsl:template match="link:internal">
 	<xsl:choose>
-		<xsl:when test="@id">
-			<a href="{$base-user-url}{ id}" class="link-internal">
+		<xsl:when test="tomboyonline:get_url_for_title(string(.))">
+			<a href="{tomboyonline:get_url_for_title(string(.))}" class="link-internal">
 				<xsl:value-of select="node()"/>
 			</a>
 		</xsl:when>
diff --git a/notes/views.py b/notes/views.py
index 1ae8bc3..de82df2 100644
--- a/notes/views.py
+++ b/notes/views.py
@@ -58,62 +58,47 @@ def note_list(request, username,
 
 def note_detail(request, username, note_id, slug='',
                 template_name='notes/note_detail.html'):
-    def clean_content(xml, author):
-        """
-        Adds an id attribute to <link:internal> tags so that URLs can be
-        constructed by the XSLT.
-        """
-        from xml.dom import minidom
-        doc = minidom.parseString(xml)
-
-        for link in doc.getElementsByTagName('link:internal'):
-            if len(link.childNodes) < 1: continue
-
-            title = link.childNodes[0].nodeValue
-            try:
-                note = Note.objects.get(author=author, title=title)
-            except ObjectDoesNotExist:
-                continue
-
-            link.setAttribute("id", str(note.pk))
-        
-        return doc.toxml()
-
     author = get_object_or_404(User, username=username)
     note = get_object_or_404(Note, pk=note_id, author=author)
 
     if request.user != author and note.permissions == 0:
         return HttpResponseForbidden()
-        
+
     if note.slug != slug:
         return HttpResponseRedirect(note.get_absolute_url())
-    
+
     # break this out into a function
-    import libxslt
-    import libxml2
+    from lxml import etree
     import os.path
 
-    style, doc, result = None, None, None
- 
-    try:
-        styledoc = libxml2.parseFile(os.path.join(settings.PROJECT_ROOT,
-                                                  'data/note2xhtml.xsl'))
-        style = libxslt.parseStylesheetDoc(styledoc)
-    
-        template = CONTENT_TEMPLATES.get(note.content_version, DEFAULT_CONTENT_TEMPLATE)
-        complete_xml = template.replace('%%%CONTENT%%%', note.content.encode('UTF-8'))
-        doc = libxml2.parseDoc(clean_content(complete_xml, author).encode('UTF-8'))
-
-        result = style.applyStylesheet(doc,
-            {'base-user-url': "'%s'" % reverse('note_index', kwargs={'username': author.username})}
-        )
-
-        # libxml2 doesn't munge encodings, so forcibly decode from UTF-8
-        body = unicode(style.saveResultToString(result), 'UTF-8')
-    finally:
-        if style != None: style.freeStylesheet()
-        if doc != None: doc.freeDoc()
-        if result != None: result.freeDoc()
+    # Extension function for XSL. Called twice per link,
+    # so we keep a little cache to save on lookups
+    link_cache = {}
+    def get_url_for_title(dummy, link_text):
+        if link_text in link_cache:
+            return link_cache[link_text]
+        try:
+            note = Note.objects.get(author=author, title=link_text)
+            note_url = note.get_absolute_url()
+            link_cache[link_text] = note_url
+            return note_url
+        except ObjectDoesNotExist:
+            return None
+
+    ns = etree.FunctionNamespace("http://tomboy-online.org/stuff";)
+    ns.prefix = "tomboyonline"
+    ns['get_url_for_title'] = get_url_for_title
+
+    style = etree.parse(os.path.join(settings.PROJECT_ROOT,
+                                     'data/note2xhtml.xsl'))
+    transform = etree.XSLT(style)
+
+    template = CONTENT_TEMPLATES.get(note.content_version, DEFAULT_CONTENT_TEMPLATE)
+    complete_xml = template.replace('%%%CONTENT%%%', note.content)
+    doc = etree.fromstring(complete_xml)
+
+    result = transform(doc)
+    body = str(result)
 
     return render_to_response(template_name,
                               {'title': note.title,
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]