[library-web] mallard: fix getting plain title



commit a5d3c6c24ef4fcbb990c72dab70c0da5d8c8e698
Author: Frédéric Péters <fpeters 0d be>
Date:   Sun Jan 26 14:25:39 2014 +0000

    mallard: fix getting plain title

 src/modtypes/mallard.py |   47 ++++++++++++++++++++---------------------------
 1 files changed, 20 insertions(+), 27 deletions(-)
---
diff --git a/src/modtypes/mallard.py b/src/modtypes/mallard.py
index 056c67c..9b1aa4d 100644
--- a/src/modtypes/mallard.py
+++ b/src/modtypes/mallard.py
@@ -45,11 +45,21 @@ MAL_NS = 'http://projectmallard.org/1.0/'
 class NotAMallardPageException(Exception):
     pass
 
+
+def plain_text(tag):
+    text = [tag.text]
+    for child in tag.getchildren():
+        text.append(child.text)
+        text.append(child.tail)
+    return ''.join([x for x in text if x])
+
+
 class MallardPage:
     id = None
     link_title = None
     sort_title = None
     desc = None
+    title = None
     info_nodes = None
     sections = None
 
@@ -89,12 +99,14 @@ class MallardPage:
 
         desc_tag = tree.find('//{%s}desc' % MAL_NS)
         if desc_tag is not None:
-            desc = [desc_tag.text]
-            for child in desc_tag.getchildren():
-                desc.append(child.text)
-                desc.append(child.tail)
-            desc.append(desc_tag.tail)
-            self.desc = ''.join([x for x in desc if x])
+            self.desc = plain_text(desc_tag)
+
+        title_text = tree.find("{%s}info/{%s}title[ type='text']" % (MAL_NS, MAL_NS))
+        if title_text is not None:
+            self.title = plain_text(title_text)
+        else:
+            self.title = plain_text(self.title_tag)
+        self.title = self.title.strip()
 
     def page_info(self, element):
         if element is None:
@@ -293,8 +305,8 @@ class MallardModule(DocModule):
             index_doc_page = os.path.join(lang_dirname, 'index.page')
             if os.path.exists(index_doc_page):
                 mallard_page = MallardPage(index_doc_page)
-                if mallard_page.link_title is not None:
-                    doc.title[lang] = mallard_page.link_title.text
+                if mallard_page.title:
+                    doc.title[lang] = mallard_page.title
                 if mallard_page.desc is not None:
                     doc.abstract[lang] = mallard_page.desc
 
@@ -323,25 +335,6 @@ class MallardModule(DocModule):
                     logging.warn('failed processing %s, not a mallard page' % xml_file)
                     doc_pages.remove(doc_page)
                     continue
-                if doc_page == 'index.page':
-                    def get_plain(x):
-                        # XXX: this should be removed, Mallard provides a text
-                        # version of the title in <info>
-                        if x.text:
-                            t = x.text
-                        else:
-                            t = ''
-                        for element in x.getchildren():
-                            if element.tag == '{%s}media' % MAL_NS:
-                                continue
-                            if not element.text:
-                                continue
-                            t += ' '
-                            t += element.text
-                            if element.tail:
-                                t += element.tail
-                        return t.strip()
-                    doc.title[lang] = get_plain(page.link_title)
             if not lang in doc.languages:
                 continue
             temporary = tempfile.NamedTemporaryFile()


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]