[library-web] modularize



commit 37328d0abb095657c6a3e21b9a497903b702fbeb
Author: Frédéric Péters <fpeters 0d be>
Date:   Wed Aug 5 10:46:11 2009 +0200

    modularize

 src/document.py              |  249 ++++++++
 src/lgo.py                   | 1323 +-----------------------------------------
 src/modtypes/base.py         |  206 +++++++
 src/modtypes/gnomedocbook.py |  387 ++++++++++++
 src/modtypes/gtkdoc.py       |  207 +++++++
 src/modtypes/htmlfiles.py    |  112 ++++
 src/overlay.py               |  185 ++++++
 src/utils.py                 |   57 ++
 8 files changed, 1422 insertions(+), 1304 deletions(-)
---
diff --git a/src/document.py b/src/document.py
new file mode 100644
index 0000000..6a0c902
--- /dev/null
+++ b/src/document.py
@@ -0,0 +1,249 @@
+# libgo - script to build library.gnome.org
+# Copyright (C) 2007-2009  Frederic Peters
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+import logging
+import os
+import stat
+import subprocess
+
+try:
+    import elementtree.ElementTree as ET
+except ImportError:
+    import xml.etree.ElementTree as ET
+
+try:
+    import html5lib
+except ImportError:
+    html5lib = None
+
+from utils import version_cmp, download
+
+
+def assert_elementtree_node(node):
+    '''Assert en ElementTree node can be serialized'''
+    try:
+        ET.ElementTree(node).write('/dev/null')
+    except:
+        raise AssertionError('node cannot be serialized')
+
+class Document:
+    '''Base class for documents displayed on library.gnome.org'''
+
+    channel = None # one of ('users', 'devel', 'about', 'admin')
+    module = None
+    path = None
+    category = None
+    toc_id = None
+    subsection = None
+    weight = 0.5
+    single_page_alternative = False
+
+    title = None # indexed on language, most recent version
+    abstract = None # indexed on language, most recent version
+    href = None # for external docs, indexed on language
+
+    languages = None # list of available languages
+    versions = None # list of available versions
+    version_keywords = None
+    version_mapping = None # mapping of two-number version to full-numbers version
+                           # like {'2.18': '2.18.3', '2.19': '2.19.90'}
+    tarballs = None # indexed on two-number version number
+
+    def __init__(self):
+        self.title = {}
+        self.abstract = {}
+        self.versions = []
+        self.version_keywords = {}
+        self.version_mapping = {}
+        self.keywords = []
+        self.tarballs = {}
+
+    def create_element(self, parent, language, original_language = None):
+        if not language in self.languages:
+            return
+        doc = ET.SubElement(parent, 'document')
+        if language == 'C':
+            language = 'en'
+        if not original_language:
+            original_language = language
+        href_language = None
+        if self.module:
+            doc.set('modulename', self.module)
+        if self.path:
+            doc.set('path', self.path)
+        elif self.href:
+            if self.href.get(original_language) and self.href.get(original_language) != '-':
+                href_language = original_language
+                doc.set('href', self.href.get(original_language))
+            else:
+                href_language = 'en'
+                doc.set('href', self.href.get('en'))
+        else:
+            logging.error('no path and no href in module %s ' % self.module)
+            return
+        title = self.title.get(original_language)
+        if not title:
+            title = self.title.get(language)
+            if not title:
+                title = self.module
+        ET.SubElement(doc, 'title').text = title
+
+        abstract = self.abstract.get(original_language)
+        if not abstract:
+            abstract = self.abstract.get(language)
+        if abstract:
+            ET.SubElement(doc, 'abstract').text = abstract
+
+        doc.set('channel', self.channel)
+        doc.set('weight', str(self.weight))
+
+        if href_language:
+            doc.set('lang', href_language)
+        else:
+            doc.set('lang', language)
+
+        #if self.category:
+        #    doc.set('category', self.category)
+        if self.toc_id:
+            doc.set('toc_id', self.toc_id)
+
+        langs = ET.SubElement(doc, 'other-languages')
+        for l in self.languages:
+            if l == language or l == 'C':
+                continue
+            ET.SubElement(langs, 'lang').text = l
+
+        if self.versions:
+            versions = ET.SubElement(doc, 'versions')
+            for v in sorted(self.versions, version_cmp):
+                version = ET.SubElement(versions, 'version')
+                version.set('href', v)
+                if v in self.version_mapping:
+                    version.text = self.version_mapping[v]
+                else:
+                    version.text = v
+                if v in self.version_keywords:
+                    version.set('keyword', self.version_keywords[v])
+
+        if self.keywords:
+            keywords = ET.SubElement(doc, 'keywords')
+            for k in self.keywords:
+                keyword = ET.SubElement(keywords, 'keyword')
+                keyword.text = k
+
+        if self.tarballs:
+            tarballs = ET.SubElement(doc, 'tarballs')
+            for k in reversed(sorted(self.tarballs.keys(), version_cmp)):
+                tarball = ET.SubElement(tarballs, 'tarball')
+                tarball.text = self.tarballs[k]
+                tarball.set('version', k)
+
+        if self.single_page_alternative:
+            doc.set('single_page_alternative', 'true')
+
+
+        assert_elementtree_node(doc)
+
+
+class RemoteDocument(Document):
+    '''Class for documentation files downloaded from remote servers and
+    formatted according to local layout
+
+    Sample description:
+
+    <document doc_module="deployment-guide" channel="admin" category="guides">
+      <local/> <!-- this is the important part -->
+      <title>Desktop Administrators' Guide to GNOME Lockdown and Preconfiguration</title>
+      <href>http://sayamindu.randomink.org/soc/deployment_guide/deployment_guide.html</href>
+    </document>
+    '''
+    html2html_xsl_file = os.path.join(data_dir, 'xslt', 'html2html.xsl')
+
+    def __init__(self, overlay):
+        Document.__init__(self)
+        self.overlay = overlay
+        if 'doc_module' in overlay.attrib:
+            self.module = overlay.attrib['doc_module']
+        self.channel = overlay.attrib['channel']
+        self.category = overlay.attrib['category']
+        self.toc_id = self.category
+        self.title = {}
+        self.href = {}
+        self.abstract = {}
+        for title in overlay.findall('title'):
+            lang = title.attrib.get(
+                    '{http://www.w3.org/XML/1998/namespace}lang', 'en')
+            self.title[lang] = title.text
+        for abstract in overlay.findall('abstract'):
+            lang = abstract.attrib.get(
+                    '{http://www.w3.org/XML/1998/namespace}lang', 'en')
+            self.abstract[lang] = abstract.text
+        for href in overlay.findall('href'):
+            if href.text == '-':
+                continue
+            lang = href.attrib.get(
+                    '{http://www.w3.org/XML/1998/namespace}lang', 'en')
+            self.href[lang] = href.text
+        self.languages = self.title.keys()
+
+        if overlay.find('subsection') is not None:
+            self.subsection = overlay.find('subsection').text
+
+        if overlay.find('local') is not None:
+            self.remote_to_local(overlay.find('local').attrib)
+
+    def remote_to_local(self, attribs):
+        web_output_dir = os.path.join(app.config.output_dir, self.channel, self.module)
+        mtime_xsl = os.stat(self.html2html_xsl_file)[stat.ST_MTIME]
+
+        for lang in self.href:
+            if self.href[lang] == '-':
+                continue
+            filename = self.download(self.href[lang])
+            if not filename:
+                continue
+            dst = os.path.join(web_output_dir, 'index.html.%s' % lang)
+            if os.path.exists(dst) and (
+                    os.stat(dst)[stat.ST_MTIME] > max(mtime_xsl, os.stat(filename)[stat.ST_MTIME])):
+                continue
+
+            parser = html5lib.HTMLParser()
+            doc = parser.parse(open(filename))
+            doc.childNodes[-1].attributes['xmlns'] = 'http://www.w3.org/1999/xhtml'
+            cmd = ['xsltproc', '--output', dst,
+                    '--stringparam', 'libgo.originalhref', self.href[lang],
+                    '--stringparam', 'libgo.channel', self.channel,
+                    '--nonet', '--xinclude', self.html2html_xsl_file, '-']
+            for k in attribs:
+                cmd.insert(-2, '--stringparam')
+                cmd.insert(-2, 'libgo.%s' % k)
+                cmd.insert(-2, attribs[k])
+
+            logging.debug('executing %s' % ' '.join(cmd))
+            xsltproc = subprocess.Popen(cmd, stdin = subprocess.PIPE)
+            xsltproc.communicate(doc.toxml())
+            xsltproc.wait()
+            if xsltproc.returncode:
+                logging.warn('%s failed with error %d' % (' '.join(cmd), xsltproc.returncode))
+
+        self.path = '/' + os.path.join(self.channel, self.module) + '/'
+
+    def download(self, href):
+        # TODO: add some support (think <local update="daily"/>) so the file
+        # can be "watched" for changes
+        return download(href)
+
diff --git a/src/lgo.py b/src/lgo.py
index e9dc1aa..cd61138 100755
--- a/src/lgo.py
+++ b/src/lgo.py
@@ -36,1319 +36,32 @@ import subprocess
 import dbm
 import shutil
 import socket
+import __builtin__
 
 try:
     from pysqlite2 import dbapi2 as sqlite
 except ImportError:
     sqlite = None
 
-try:
-    import html5lib
-except ImportError:
-    html5lib = None
+data_dir = os.path.join(os.path.dirname(__file__), '../data')
+__builtin__.__dict__['data_dir'] = data_dir
 
 from config import Config
 import errors
 import utils
+from utils import version_cmp, is_version_number, download
 
-app = None
-data_dir = os.path.join(os.path.dirname(__file__), '../data')
-
-licence_modules = ['fdl', 'gpl', 'lgpl']
-
-# timeout for downloads, so it doesn't hang on connecting to sourceforge
-socket.setdefaulttimeout(10)
-
-def assert_elementtree_node(node):
-    '''Assert en ElementTree node can be serialized'''
-    try:
-        ET.ElementTree(node).write('/dev/null')
-    except:
-        raise AssertionError('node cannot be serialized')
-
-def version_cmp(x, y):
-    # returns < 0 if x < y, 0 if x == y, and > 0 if x > y
-    if x == 'nightly' and y == 'nightly':
-        return 0
-    elif x == 'nightly':
-        return 1
-    elif y == 'nightly':
-        return -1
-    try:
-        return cmp([int(j) for j in x.split('.')], [int(k) for k in y.split('.')])
-    except ValueError:
-        logging.warning('failure in version_cmp: %r vs %r' % (x, y))
-        return 0
-
-def is_version_number(v):
-    return re.match('\d+\.\d+', v) is not None
-
-def download(href):
-    parsed_url = urllib2.urlparse.urlparse(href)
-    if parsed_url[0] == 'file':
-        return parsed_url[2]
-    filename = '/'.join(parsed_url[1:3])
-    cache_filename = os.path.join(app.config.download_cache_dir, filename)
-    cache_dir = os.path.split(cache_filename)[0]
-    if not os.path.exists(cache_dir):
-        os.makedirs(cache_dir)
-    if not os.path.exists(cache_filename):
-        logging.info('downloading %s' % href)
-        try:
-            s = urllib2.urlopen(href).read()
-        except urllib2.HTTPError, e:
-            logging.warning('error %s downloading %s' % (e.code, href))
-            return None
-        except urllib2.URLError, e:
-            logging.warning('error (URLError) downloading %s' % href)
-            return None
-        open(cache_filename, 'w').write(s)
-    return cache_filename
-
-
-class Document:
-    '''Base class for documents displayed on library.gnome.org'''
-
-    channel = None # one of ('users', 'devel', 'about', 'admin')
-    module = None
-    path = None
-    category = None
-    toc_id = None
-    subsection = None
-    weight = 0.5
-    single_page_alternative = False
-
-    title = None # indexed on language, most recent version
-    abstract = None # indexed on language, most recent version
-    href = None # for external docs, indexed on language
-
-    languages = None # list of available languages
-    versions = None # list of available versions
-    version_keywords = None
-    version_mapping = None # mapping of two-number version to full-numbers version
-                           # like {'2.18': '2.18.3', '2.19': '2.19.90'}
-    tarballs = None # indexed on two-number version number
-
-    def __init__(self):
-        self.title = {}
-        self.abstract = {}
-        self.versions = []
-        self.version_keywords = {}
-        self.version_mapping = {}
-        self.keywords = []
-        self.tarballs = {}
-
-    def create_element(self, parent, language, original_language = None):
-        if not language in self.languages:
-            return
-        doc = ET.SubElement(parent, 'document')
-        if language == 'C':
-            language = 'en'
-        if not original_language:
-            original_language = language
-        href_language = None
-        if self.module:
-            doc.set('modulename', self.module)
-        if self.path:
-            doc.set('path', self.path)
-        elif self.href:
-            if self.href.get(original_language) and self.href.get(original_language) != '-':
-                href_language = original_language
-                doc.set('href', self.href.get(original_language))
-            else:
-                href_language = 'en'
-                doc.set('href', self.href.get('en'))
-        else:
-            logging.error('no path and no href in module %s ' % self.module)
-            return
-        title = self.title.get(original_language)
-        if not title:
-            title = self.title.get(language)
-            if not title:
-                title = self.module
-        ET.SubElement(doc, 'title').text = title
-
-        abstract = self.abstract.get(original_language)
-        if not abstract:
-            abstract = self.abstract.get(language)
-        if abstract:
-            ET.SubElement(doc, 'abstract').text = abstract
-
-        doc.set('channel', self.channel)
-        doc.set('weight', str(self.weight))
-
-        if href_language:
-            doc.set('lang', href_language)
-        else:
-            doc.set('lang', language)
-
-        #if self.category:
-        #    doc.set('category', self.category)
-        if self.toc_id:
-            doc.set('toc_id', self.toc_id)
-
-        langs = ET.SubElement(doc, 'other-languages')
-        for l in self.languages:
-            if l == language or l == 'C':
-                continue
-            ET.SubElement(langs, 'lang').text = l
-
-        if self.versions:
-            versions = ET.SubElement(doc, 'versions')
-            for v in sorted(self.versions, version_cmp):
-                version = ET.SubElement(versions, 'version')
-                version.set('href', v)
-                if v in self.version_mapping:
-                    version.text = self.version_mapping[v]
-                else:
-                    version.text = v
-                if v in self.version_keywords:
-                    version.set('keyword', self.version_keywords[v])
-
-        if self.keywords:
-            keywords = ET.SubElement(doc, 'keywords')
-            for k in self.keywords:
-                keyword = ET.SubElement(keywords, 'keyword')
-                keyword.text = k
-
-        if self.tarballs:
-            tarballs = ET.SubElement(doc, 'tarballs')
-            for k in reversed(sorted(self.tarballs.keys(), version_cmp)):
-                tarball = ET.SubElement(tarballs, 'tarball')
-                tarball.text = self.tarballs[k]
-                tarball.set('version', k)
-
-        if self.single_page_alternative:
-            doc.set('single_page_alternative', 'true')
-
-
-        assert_elementtree_node(doc)
-
-
-class RemoteDocument(Document):
-    '''Class for documentation files downloaded from remote servers and
-    formatted according to local layout
-
-    Sample description:
-
-    <document doc_module="deployment-guide" channel="admin" category="guides">
-      <local/> <!-- this is the important part -->
-      <title>Desktop Administrators' Guide to GNOME Lockdown and Preconfiguration</title>
-      <href>http://sayamindu.randomink.org/soc/deployment_guide/deployment_guide.html</href>
-    </document>
-    '''
-    html2html_xsl_file = os.path.join(data_dir, 'xslt', 'html2html.xsl')
-
-    def __init__(self, overlay):
-        Document.__init__(self)
-        self.overlay = overlay
-        if 'doc_module' in overlay.attrib:
-            self.module = overlay.attrib['doc_module']
-        self.channel = overlay.attrib['channel']
-        self.category = overlay.attrib['category']
-        self.toc_id = self.category
-        self.title = {}
-        self.href = {}
-        self.abstract = {}
-        for title in overlay.findall('title'):
-            lang = title.attrib.get(
-                    '{http://www.w3.org/XML/1998/namespace}lang', 'en')
-            self.title[lang] = title.text
-        for abstract in overlay.findall('abstract'):
-            lang = abstract.attrib.get(
-                    '{http://www.w3.org/XML/1998/namespace}lang', 'en')
-            self.abstract[lang] = abstract.text
-        for href in overlay.findall('href'):
-            if href.text == '-':
-                continue
-            lang = href.attrib.get(
-                    '{http://www.w3.org/XML/1998/namespace}lang', 'en')
-            self.href[lang] = href.text
-        self.languages = self.title.keys()
-
-        if overlay.find('subsection') is not None:
-            self.subsection = overlay.find('subsection').text
-
-        if overlay.find('local') is not None:
-            self.remote_to_local(overlay.find('local').attrib)
-
-    def remote_to_local(self, attribs):
-        web_output_dir = os.path.join(app.config.output_dir, self.channel, self.module)
-        mtime_xsl = os.stat(self.html2html_xsl_file)[stat.ST_MTIME]
-
-        for lang in self.href:
-            if self.href[lang] == '-':
-                continue
-            filename = self.download(self.href[lang])
-            if not filename:
-                continue
-            dst = os.path.join(web_output_dir, 'index.html.%s' % lang)
-            if os.path.exists(dst) and (
-                    os.stat(dst)[stat.ST_MTIME] > max(mtime_xsl, os.stat(filename)[stat.ST_MTIME])):
-                continue
-
-            parser = html5lib.HTMLParser()
-            doc = parser.parse(open(filename))
-            doc.childNodes[-1].attributes['xmlns'] = 'http://www.w3.org/1999/xhtml'
-            cmd = ['xsltproc', '--output', dst,
-                    '--stringparam', 'libgo.originalhref', self.href[lang],
-                    '--stringparam', 'libgo.channel', self.channel,
-                    '--nonet', '--xinclude', self.html2html_xsl_file, '-']
-            for k in attribs:
-                cmd.insert(-2, '--stringparam')
-                cmd.insert(-2, 'libgo.%s' % k)
-                cmd.insert(-2, attribs[k])
-
-            logging.debug('executing %s' % ' '.join(cmd))
-            xsltproc = subprocess.Popen(cmd, stdin = subprocess.PIPE)
-            xsltproc.communicate(doc.toxml())
-            xsltproc.wait()
-            if xsltproc.returncode:
-                logging.warn('%s failed with error %d' % (' '.join(cmd), xsltproc.returncode))
-
-        self.path = '/' + os.path.join(self.channel, self.module) + '/'
-
-    def download(self, href):
-        # TODO: add some support (think <local update="daily"/>) so the file
-        # can be "watched" for changes
-        return download(href)
-
-
-class SubIndex:
-    def __init__(self, node):
-        self.id = node.attrib.get('id')
-        self.weight = node.attrib.get('weight')
-        self.sections = node.find('sections').text.split()
-        self.title = {}
-        self.abstract = {}
-
-        for title in node.findall('title'):
-            lang = title.attrib.get(
-                    '{http://www.w3.org/XML/1998/namespace}lang', 'en')
-            self.title[lang] = title.text
-        for abstract in node.findall('abstract'):
-            lang = abstract.attrib.get(
-                    '{http://www.w3.org/XML/1998/namespace}lang', 'en')
-            self.abstract[lang] = abstract.text
-
-    def create_element(self, parent, channel, language):
-        index = ET.SubElement(parent, 'index')
-        if language == 'C':
-            language = 'en'
-        index.set('id', self.id)
-        index.set('lang', language)
-        index.set('channel', channel)
-        index.set('weigth', self.weight)
-
-        title = self.title.get(language)
-        if not title:
-            title = self.title.get('en')
-            if not title:
-                title = self.id
-        ET.SubElement(index, 'title').text = title
-
-        abstract = self.abstract.get(language)
-        if not abstract:
-            abstract = self.abstract.get('en')
-        if abstract:
-            ET.SubElement(index, 'abstract').text = abstract
-
-        return index
-
-
-class DocModule:
-    '''Base class for documentation shipped in tarballs'''
-    makefile_am = None
-
-    filename = None
-    dirname = None
-    modulename = None
-
-    related_xsl_files = None
-    mtime_xslt_files = 0
-
-    nightly = False
-
-    def __init__(self, tar, tarinfo, makefile_am, nightly = False):
-        self.dirname = os.path.dirname(tarinfo.name)
-        if makefile_am:
-            self.makefile_am = makefile_am
-            self.modulename = re.findall(r'DOC_MODULE\s?=\s?(.*)', makefile_am)[0].strip()
-            if '@' in self.modulename:
-                logging.warning('variadic module name: %s' % self.modulename)
-                # don't go interpreting autotools variables, as in this path
-                # lays madness, instead simply cut out the variable, such as
-                # gnome-scan- API_VERSION@ becomes gnome-scan.
-                self.modulename = self.modulename.split('@')[0].strip('-')
-            # remove the version part, so libsoup-2.4 is handled just like
-            # another version of libsoup
-            self.modulename = re.sub('-\d+\.\d+$', '', self.modulename)
-        self.version = os.path.splitext(tar.name)[0].split('-')[-1]
-        if self.version.endswith('.tar'):
-            self.version = self.version[:-4]
-
-        if nightly or self.version == 'nightly':
-            self.nightly = True
-            self.version = 'nightly'
-            self.one_dot_version = 'nightly'
-        else:
-            self.one_dot_version = re.match(r'\d+\.\d+', self.version).group()
-
-        if self.related_xsl_files:
-            self.mtime_xslt_files = max([os.stat(
-                        os.path.join(data_dir, 'xslt', x))[stat.ST_MTIME] \
-                        for x in self.related_xsl_files])
-
-    def extract(self):
-        ext_dirname = os.path.join(app.config.private_dir, 'extracts')
-        if not os.path.exists(ext_dirname):
-            os.makedirs(ext_dirname)
-
-        base_tarball_name = os.path.basename(self.filename).rsplit('-', 1)[0]
-
-        if not os.path.exists(os.path.join(ext_dirname, self.dirname)):
-            logging.debug('extracting %s' % self.dirname)
-            tar = tarfile.open(self.filename, 'r')
-            for tarinfo in tar.getmembers():
-                if not os.path.split(tarinfo.name)[0].startswith(self.dirname):
-                    continue
-                dest = os.path.join(ext_dirname, tarinfo.name)
-                if tarinfo.isdir() and not os.path.exists(dest):
-                    os.makedirs(dest)
-                elif tarinfo.isreg():
-                    if not os.path.exists(os.path.dirname(dest)):
-                        os.makedirs(os.path.dirname(dest))
-                    open(dest, 'w').write(tar.extractfile(tarinfo).read())
-            tar.close()
-
-    def setup_path(self):
-        if self.modulename in licence_modules:
-            # special casing the licences, they do not go in a
-            # versioned path
-            self.path = '/' + os.path.join(self.channel, self.modulename) + '/'
-        else:
-            self.path = '/' + os.path.join(self.channel, self.modulename,
-                    self.one_dot_version) + '/'
-
-    def get_libgo_document(self, doc_linguas):
-        try:
-            doc = [x for x in app.documents if \
-                    x.module == self.modulename and x.channel == self.channel][0]
-        except IndexError:
-            doc = Document()
-            doc.filename = self.dirname
-            doc.module = self.modulename
-            doc.channel = self.channel
-            doc.languages = doc_linguas
-            doc.path = self.path
-            doc._last_version = self.version
-            doc.versions = [self.one_dot_version]
-            app.documents.append(doc)
-        else:
-            if doc._last_version == self.version:
-                # file was already processed in a previous moduleset
-                return None
-
-            if not self.nightly:
-                # a more recent version may already have been installed; probably
-                # because the same module is being mentioned as an extra tarball
-                # with an inferior version number; don't update path in this
-                # situation.  (see bug #530517 for the mention of this problem)
-                if version_cmp(doc._last_version, self.version) <= 0:
-                    doc._last_version = self.version
-                    if int(self.one_dot_version.split('.')[1]) % 2 == 0:
-                        # update path to point to the latest version (but no
-                        # development versions)
-                        doc.path = self.path
-
-            if not self.one_dot_version in doc.versions:
-                doc.versions.append(self.one_dot_version)
-            for lang in doc_linguas:
-                if not lang in doc.languages:
-                    doc.languages.append(lang)
-
-        doc.version_mapping[self.one_dot_version] = self.version
-
-        # only keep authorised languages
-        if app.config.languages:
-            for lang in doc.languages[:]:
-                if lang not in app.config.languages:
-                    doc.languages.remove(lang)
-
-        return doc
-
-    def install_version_symlinks(self, doc):
-        '''Create stable and devel symlinks'''
-
-        if self.nightly:
-            return
-
-        if self.channel not in ('users', 'devel', 'admin'):
-            return
-
-        if not self.one_dot_version in doc.versions:
-            # this version doesn't appear in available versions, probably it
-            # had been depreciated
-            return
-
-        web_output_dir = os.path.join(app.config.output_dir, self.channel, self.modulename)
-        development_release = (int(self.one_dot_version.split('.')[1]) % 2 == 1) or (
-                int(self.one_dot_version.split('.')[0]) == 0)
-
-        if development_release:
-            keyword = 'unstable'
-        else:
-            keyword = 'stable'
-
-        path = os.path.join(web_output_dir, keyword)
-        installed = False
-        if os.path.islink(path):
-            currently_marked = os.readlink(path)
-            if currently_marked == 'stable':
-                currently_marked = os.readlink(os.path.join(web_output_dir, 'stable'))
-            if version_cmp(self.version, currently_marked) >= 0:
-                # install new symlink
-                os.unlink(path)
-                os.symlink(self.one_dot_version, path)
-                installed = True
-        else:
-            os.symlink(self.one_dot_version, path)
-            installed = True
-
-        if installed:
-            if not development_release:
-                if doc.path == '/%s/%s/%s/' % (self.channel, self.modulename, self.one_dot_version):
-                    # set default path to use the keyword
-                    doc.path = '/%s/%s/stable/' % (self.channel, self.modulename)
-
-                # if there is no unstable link, create it even for a stable release
-                # (or if stable is newer)
-                path = os.path.join(web_output_dir, 'unstable')
-                if os.path.islink(path):
-                    currently_unstable = os.readlink(path)
-                    if currently_unstable == 'stable' or \
-                            version_cmp(self.version, currently_unstable) >= 0:
-                        os.unlink(path)
-                        os.symlink('stable', path)
-                else:
-                    os.symlink('stable', path)
-            elif not os.path.exists(os.path.join(web_output_dir, 'stable')):
-                # update default path to have codename if there is no stable
-                # release
-                doc.path = '/%s/%s/unstable/' % (self.channel, self.modulename)
-
-            for k in doc.version_keywords.keys():
-                if doc.version_keywords.get(k) == keyword:
-                    del doc.version_keywords[k]
-            doc.version_keywords[self.one_dot_version] = keyword
-
-    def create_tarball_symlink(self, doc):
-        tarball_dest = '../%s-html-%s.tar.gz' % (self.modulename, self.version)
-        in_version_path = os.path.join(app.config.output_dir, self.channel,
-                self.modulename, self.one_dot_version,
-                '%s-html.tar.gz' % self.modulename)
-        if os.path.islink(in_version_path):
-            os.unlink(in_version_path)
-        os.symlink(tarball_dest, in_version_path)
-
-
-class GtkDocModule(DocModule):
-    '''Class for documentation shipped in a tarball and using gtk-doc'''
-
-    gtkdoc_xsl_file = os.path.join(data_dir, 'xslt', 'gtk-doc.xsl')
-    html2html_xsl_file = os.path.join(data_dir, 'xslt', 'html2html.xsl')
-
-    related_xsl_files = ['gtk-doc.xsl', 'heading.xsl']
-
-    def setup_channel(self):
-        self.channel = app.overlay.get_channel_overlay(self.modulename, 'devel')
-
-    def __str__(self):
-        return 'gtk-doc module at %s' % self.dirname
-
-    def process(self):
-        doc_module = self.modulename
-        ext_dirname = os.path.join(app.config.private_dir, 'extracts')
-
-        main_sgml_file = re.findall(r'DOC_MAIN_SGML_FILE\s?=\s?(.*)',
-                self.makefile_am)[0].strip()
-        main_sgml_file = main_sgml_file.replace('$(DOC_MODULE)', doc_module)
-
-        try:
-            html_images = re.findall('HTML_IMAGES\s+=\s+(.*)', self.makefile_am)[0].split()
-        except IndexError:
-            html_images = []
-        html_images = [x.replace('$(srcdir)/', '') for x in html_images]
-
-        web_output_dir = os.path.join(app.config.output_dir, self.channel,
-                doc_module, self.one_dot_version)
-        if not os.path.exists(web_output_dir):
-            os.makedirs(web_output_dir)
-
-        if not app.rebuild_all and os.path.exists(
-                os.path.join(web_output_dir, '%s.devhelp' % doc_module)):
-            mtime = os.stat(os.path.join(web_output_dir, '%s.devhelp' % doc_module))[stat.ST_MTIME]
-        else:
-            mtime = 0
-
-        if mtime > max(self.mtime_tarball, self.mtime_xslt_files):
-            logging.debug('using already generated doc')
-        else:
-            logging.info('generating doc in %s' % web_output_dir[len(app.config.output_dir):])
-            cmd = ['xsltproc', '--output', web_output_dir + '/',
-                    '--nonet', '--xinclude',
-                    '--stringparam', 'libgo.lang', 'en',
-                    '--stringparam', 'gtkdoc.bookname', doc_module,
-                    '--stringparam', 'gtkdoc.version', '"(~lgo)"',
-                    '--stringparam', 'libgo.channel', self.channel,
-                    self.gtkdoc_xsl_file,
-                    os.path.join(ext_dirname, self.dirname, main_sgml_file)]
-            logging.debug('executing %s' % ' '.join(cmd))
-            xsltproc = subprocess.Popen(cmd, stdin = subprocess.PIPE, stderr = subprocess.PIPE)
-            stdout, stderr = xsltproc.communicate()
-            if re.findall('XInclude error : could not load.*and no fallback was found', stderr):
-                logging.warn('XInclude error, creating fake xsltproc return code')
-                xsltproc.returncode = 6
-
-            if xsltproc.returncode != 0:
-                logging.warn('%s failed with error %d' % (' '.join(cmd), xsltproc.returncode))
-                if xsltproc.returncode == 6:
-                    # build failed, probably because it has inline references in
-                    # documentation and would require a full module build to get
-                    # them properly.  (happens with GTK+)
-
-                    if html5lib:
-                        # convert files to XML, then process them with xsltproc
-                        # to get library.gnome.org look
-
-                        logging.debug('transforming files shipped with tarball')
-                        parser = html5lib.HTMLParser()
-
-                        for filename in os.listdir(os.path.join(
-                                    ext_dirname, self.dirname, 'html')):
-                            src = os.path.join(
-                                    ext_dirname, self.dirname, 'html', filename)
-                            dst = os.path.join(web_output_dir, filename)
-                            if not filename.endswith('.html'):
-                                open(dst, 'w').write(open(src, 'r').read())
-                                continue
-                            doc = parser.parse(open(src))
-                            doc.childNodes[-1].attributes['xmlns'] = 'http://www.w3.org/1999/xhtml'
-                            temporary = tempfile.NamedTemporaryFile()
-                            temporary.write(doc.childNodes[-1].toxml().encode('utf-8'))
-                            temporary.flush()
-
-                            cmd = ['xsltproc', '--output', dst,
-                                    '--nonet', '--xinclude',
-                                    '--stringparam', 'libgo.h2hmode', 'gtk-doc',
-                                    self.html2html_xsl_file,
-                                    os.path.join(ext_dirname,
-                                            self.dirname, temporary.name)]
-                            rc = subprocess.call(cmd)
-                    else:
-                        # simply copy files shipped in tarball
-                        logging.debug('copying files shipped with tarball')
-                        for filename in os.listdir(os.path.join(
-                                    ext_dirname, self.dirname, 'html')):
-                            src = os.path.join(ext_dirname, self.dirname, 'html', filename)
-                            dst = os.path.join(web_output_dir, filename)
-                            if not os.path.exists(os.path.split(dst)[0]):
-                                os.makedirs(os.path.split(dst)[0])
-                            if not os.path.exists(dst) or \
-                                    os.stat(src)[stat.ST_MTIME] > os.stat(dst)[stat.ST_MTIME]:
-                                open(dst, 'w').write(open(src, 'r').read())
-
-            if html_images:
-                # and copy images/
-                logging.debug('copying images')
-                for html_image in html_images:
-                    src = os.path.join(ext_dirname, self.dirname, html_image)
-                    if not os.path.exists(src):
-                        continue
-                    dst = os.path.join(web_output_dir, os.path.basename(html_image))
-                    if not os.path.exists(os.path.split(dst)[0]):
-                        os.makedirs(os.path.split(dst)[0])
-                    if not os.path.exists(dst) or \
-                            os.stat(src)[stat.ST_MTIME] > os.stat(dst)[stat.ST_MTIME]:
-                        open(dst, 'w').write(open(src, 'r').read())
-
-            # in any case, copy png files from gtk-doc
-            for src in glob.glob('/usr/share/gtk-doc/data/*.png'):
-                dst = os.path.join(web_output_dir, os.path.basename(src))
-                if not os.path.exists(dst) or \
-                        os.stat(src)[stat.ST_MTIME] > os.stat(dst)[stat.ST_MTIME]:
-                    open(dst, 'w').write(open(src, 'r').read())
-
-        doc = self.get_libgo_document(['en'])
-        if not doc:
-            return
-
-        doc.keywords.append('gtk-doc')
-        doc.category = 'api'
-        doc.toc_id = 'api'
-
-        devhelp_file = [x for x in os.listdir(web_output_dir) if x.endswith('.devhelp')]
-        if os.path.exists(os.path.join(web_output_dir, 'index.xml.en')):
-            tree = ET.parse(os.path.join(web_output_dir, 'index.xml.en'))
-            if tree.find('title') is not None:
-                doc.title['en'] = tree.find('title').text
-            elif tree.find('{http://www.w3.org/1999/xhtml}title') is not None:
-                doc.title['en'] = tree.find('{http://www.w3.org/1999/xhtml}title').text
-        elif devhelp_file:
-            tree = ET.parse(os.path.join(web_output_dir, devhelp_file[0]))
-            doc.title['en'] = tree.getroot().attrib['title']
-
-        if app.config.create_tarballs:
-            self.create_tarball(doc)
-            self.create_tarball_symlink(doc)
-
-        self.install_version_symlinks(doc)
-
-    def create_tarball(self, doc):
-        web_output_dir = os.path.join(app.config.output_dir, self.channel,
-                self.modulename)
-        tarball_name = '%s-html-%s.tar.gz' % (self.modulename, self.version)
-        tarball_filepath = os.path.join(web_output_dir, tarball_name)
-        if os.path.exists(tarball_filepath):
-            mtime = os.stat(tarball_filepath)[stat.ST_MTIME]
-        else:
-            mtime = 0
-
-        if mtime > self.mtime_tarball:
-            logging.debug('using already generated tarball')
-        else:
-            logging.info('generating doc tarball in %s' % tarball_filepath[
-                    len(app.config.output_dir):])
-            shipped_html = os.path.join(app.config.private_dir, 'extracts', self.dirname, 'html')
-            if not os.path.exists(shipped_html) or len(os.listdir(shipped_html)) == 0:
-                logging.warning('tarball shipped without html/, too bad')
-                return
-            tar = tarfile.open(tarball_filepath, 'w:gz')
-            base_tarball_dir = '%s-html-%s' % (self.modulename, self.version)
-            for base, dirs, files in os.walk(shipped_html):
-                base_dir = '%s/%s' % (base_tarball_dir, base[len(shipped_html):])
-                for file in files:
-                    orig_file = os.path.join(base, file)
-                    tarinfo = tar.gettarinfo(orig_file, '%s/%s' % (base_dir, file))
-                    tar.addfile(tarinfo, open(orig_file))
-            tar.close()
-
-        doc.tarballs[self.one_dot_version] = tarball_name
-
-
-class GnomeDocUtilsModule(DocModule):
-    '''Class for documentation shipped in a tarball and using gnome-doc-utils'''
-    db2html_xsl_file = os.path.join(data_dir, 'xslt', 'db2html.xsl')
-    category = None
-
-    related_xsl_files = ['db2html.xsl', 'heading.xsl']
-
-    def __init__(self, tar, tarinfo, makefile_am, nightly):
-        DocModule.__init__(self, tar, tarinfo, makefile_am, nightly)
-        if self.modulename == '@PACKAGE_NAME@':
-            # ekiga has this, use another way, looking at omf files
-            try:
-                omf_file = [x.name for x in tar.getmembers() if \
-                           x.name.startswith(self.dirname) and x.name.endswith('.omf.in')][0]
-            except IndexError:
-                logging.error('failed to get DOC_MODULE for %s' % tarinfo.name)
-            self.modulename = os.path.split(omf_file)[-1][:-len('.omf.in')]
-
-    def setup_channel(self):
-        # get category from omf file
-        ext_dirname = os.path.join(app.config.private_dir, 'extracts')
-        omf_file = glob.glob(os.path.join(ext_dirname, self.dirname) + '/*.omf.in')
-        if not omf_file:
-            omf_file = glob.glob(os.path.join(ext_dirname, self.dirname) + '/C/*.omf.in')
-        if omf_file:
-            try:
-                self.category = ET.parse(omf_file[0]).find('resource/subject').attrib['category']
-            except (IndexError, KeyError):
-                pass
-
-        channel = 'users'
-        if self.category and (self.category.startswith('GNOME|Development') or
-                self.category.startswith('GNOME|Applications|Programming')):
-            channel = 'devel'
-
-        self.channel = app.overlay.get_channel_overlay(self.modulename, channel)
-        if self.channel == 'misc' and not self.category:
-            self.category = 'GNOME|Other'
-
-    def __str__(self):
-        return 'gnome-doc-utils module at %s' % self.dirname
-
-    def process(self):
-        doc_module = self.modulename
-        ext_dirname = os.path.join(app.config.private_dir, 'extracts')
-
-        try:
-            doc_linguas = re.findall(r'DOC_LINGUAS\s+=[\t ](.*)',
-                    self.makefile_am)[0].split()
-            if not 'en' in doc_linguas:
-                doc_linguas.append('en')
-        except IndexError:
-            doc_linguas = ['en']
-
-        try:
-            doc_figures = re.findall('DOC_FIGURES\s+=\s+(.*)',
-                    self.makefile_am)[0].split()
-            figures_dirname = os.path.join(ext_dirname, self.dirname, 'C')
-            for doc_figure in doc_figures:
-                if not os.path.exists(os.path.join(figures_dirname, doc_figure)):
-                    logging.warning('figure (%s) listed but not present, going to autodiscover' % \
-                        doc_figure)
-                    raise IndexError()
-        except IndexError:
-            figures_dirname = os.path.join(ext_dirname, self.dirname, 'C', 'figures')
-            images_dirname = os.path.join(ext_dirname, self.dirname, 'C', 'images')
-            doc_figures = []
-            if os.path.exists(figures_dirname):
-                doc_figures += [os.path.join('figures', x) for x in \
-                        os.listdir(figures_dirname) \
-                        if os.path.splitext(x)[1] in ('.png', '.jpg', '.jpeg')]
-            if os.path.exists(images_dirname):
-                doc_figures += [os.path.join('images', x) for x in \
-                        os.listdir(images_dirname) \
-                        if os.path.splitext(x)[1] in ('.png', '.jpg', '.jpeg')]
-
-        doc_linguas.sort()
-        if app.config.languages:
-            for lang in doc_linguas[:]:
-                if lang not in app.config.languages + ['C']:
-                    doc_linguas.remove(lang)
-
-        doc = self.get_libgo_document(doc_linguas)
-        if not doc:
-            return
-
-        if self.category:
-            doc.category = self.category
-            doc.toc_id = app.toc_mapping.get(doc.category)
-
-        web_output_dir = os.path.join(app.config.output_dir, self.channel,
-                doc_module, self.one_dot_version)
-        
-        quirks = app.overlay.get_quirks(self)
-        if not 'flat-rendering' in quirks:
-            doc.single_page_alternative = True
-
-        logging.info('generating doc in %s' % web_output_dir[len(app.config.output_dir):])
-        if not os.path.exists(web_output_dir):
-            os.makedirs(web_output_dir)
-
-        if app.config.create_tarballs:
-            temporary_tarball_dir = tempfile.mkdtemp()
-            tarball_name = '%s-html-%s.tar.gz' % (self.modulename, self.version)
-            tarball_filepath = os.path.join(app.config.output_dir, self.channel,
-                    self.modulename, tarball_name)
-
-            if os.path.exists(tarball_filepath):
-                # doc already exists, keep it in tarballs dictionary as it may
-                # be skipped in this run, if the documentation is already
-                # uptodate.
-                doc.tarballs[self.one_dot_version] = tarball_name
-
-        base_tarball_name = os.path.basename(self.filename).rsplit('-', 1)[0]
-
-        create_tarball = app.config.create_tarballs
-
-        for lang in doc.languages:
-            if lang == 'en' and not os.path.exists(os.path.join(ext_dirname, self.dirname, 'en')):
-                lang_dirname = os.path.join(ext_dirname, self.dirname, 'C')
-            else:
-                lang_dirname = os.path.join(ext_dirname, self.dirname, lang)
-
-            xml_file = os.path.join(lang_dirname, doc_module + '.xml')
-            if not os.path.exists(xml_file):
-                # the document had a translation available in a previous
-                # version, and it got removed
-                continue
-
-            xml_index_file = os.path.join(web_output_dir, 'index.xml.%s' % lang)
-            skip_html_files = False
-            if not app.rebuild_all and (
-                    app.rebuild_language is None or
-                    lang != app.rebuild_language) and os.path.exists(xml_index_file):
-                mtime = os.stat(xml_index_file)[stat.ST_MTIME]
-                if mtime > max(self.mtime_tarball, self.mtime_xslt_files):
-                    try:
-                        tree = self.process_xml_index(xml_index_file, doc, lang)
-                    except errors.DepreciatedDocumentation:
-                        logging.info('skipped %s (%s) (depreciated documentation)' % (
-                                    self.modulename, self.one_dot_version))
-                        break
-
-                    if not create_tarball or os.path.exists(tarball_filepath):
-                        logging.debug('using already generated doc in %s' % lang)
-                        create_tarball = False
-                        continue
-                    skip_html_files = True
-
-            if 'missing-id-on-top-book-element' in quirks:
-                # Evolution documentation top element is currently <book
-                # lang="en"> but the gnome-doc-utils stylesheets are
-                # looking for the id # attribute to get filename.
-                # -- http://bugzilla.gnome.org/show_bug.cgi?id=462811
-                t = open(xml_file).read()
-                open(xml_file + '.fixed', 'w').write(t.replace('\n<book ', '\n<book id="index" '))
-                xml_file = xml_file + '.fixed'
-
-            if 'correct-article-index' in quirks:
-                # 2.20 release notes had <sect1 id="index"><title>Introduction...
-                # instead of <sect1 id="intro"><title>Introduction... and that
-                # prevented a correct id="index" on top <article>
-                # older release notes had <article id="article"
-                t = open(xml_file).read()
-                open(xml_file + '.fixed', 'w').write(
-                        re.sub('(<article.*?)(\sid=".*?")', r'\1', t # remove id on <article>
-                        ).replace('<article ', '<article id="index" '
-                        ).replace('<sect1 id="index"', '<sect1 id="intro"'))
-                xml_file = xml_file + '.fixed'
-
-            if not skip_html_files:
-                # format docbook into html files
-                cmd = ['xsltproc', '--output', web_output_dir + '/',
-                        '--nonet', '--xinclude',
-                        '--stringparam', 'libgo.lang', lang,
-                        '--stringparam', 'libgo.channel', self.channel,
-                        self.db2html_xsl_file, xml_file]
-
-                if self.nightly:
-                    cmd[5:5] = ['--param', 'libgo.nightly', 'true()']
-                onepage_cmd = cmd[:]
-
-                # 1st, generate a single page with all documentation, and
-                # rename it to $modulename.html.$lang (unless a single page
-                # is the default rendering)
-                if not 'flat-rendering' in quirks:
-                    onepage_cmd.insert(5, '--stringparam')
-                    onepage_cmd.insert(6, 'db.chunk.max_depth')
-                    onepage_cmd.insert(7, '0')
-                    logging.debug('executing %s' % ' '.join(onepage_cmd))
-                    rc = subprocess.call(onepage_cmd)
-                    if rc != 0:
-                        logging.warn('%s failed with error %d' % (' '.join(onepage_cmd), rc))
-                    index_html = os.path.join(web_output_dir, 'index.html.%s' % lang)
-                    if os.path.exists(index_html):
-                        os.rename(index_html,
-                            os.path.join(web_output_dir, '%s.html.%s' % (doc_module, lang)))
-
-                # 2nd, generate default rendering, to index.html.$lang
-                if 'flat-rendering' in quirks:
-                    cmd.insert(5, '--stringparam')
-                    cmd.insert(6, 'db.chunk.max_depth')
-                    cmd.insert(7, '0')
-                    cmd.insert(5, '--stringparam')
-                    cmd.insert(6, 'db.chunk.autotoc_depth')
-                    cmd.insert(7, '1')
-                if 'languages-in-sidebar' in quirks:
-                    cmd.insert(5, '--stringparam')
-                    cmd.insert(6, 'libgo.languages_in_sidebar')
-                    cmd.insert(7, ','.join(sorted(doc_linguas)))
-                logging.debug('executing %s' % ' '.join(cmd))
-                rc = subprocess.call(cmd)
-                if rc != 0:
-                    logging.warn('%s failed with error %d' % (' '.join(cmd), rc))
-                
-                if not os.path.exists(xml_index_file):
-                    logging.warn('no index file were created for %s' % doc_module)
-                    continue
-
-            if create_tarball:
-                # another formatting, to ship in tarball
-                cmd = ['xsltproc', '--output', os.path.join(temporary_tarball_dir, lang) + '/',
-                        '--nonet', '--xinclude',
-                        '--stringparam', 'libgo.lang', lang,
-                        '--param', 'libgo.tarball', 'true()',
-                        '--stringparam', 'db.chunk.extension', '.html',
-                        '--stringparam', 'theme.icon.admon.path', '',
-                        '--stringparam', 'theme.icon.nav.previous', 'nav-previous.png',
-                        '--stringparam', 'theme.icon.nav.next', 'nav-next.png',
-                        self.db2html_xsl_file, xml_file]
-                logging.debug('executing %s' % ' '.join(cmd))
-                rc = subprocess.call(cmd)
-                if rc != 0:
-                    logging.warn('%s failed with error %d' % (' '.join(cmd), rc))
-
-            if doc_figures:
-                # and copy images/
-                logging.debug('copying figures')
-                for doc_figure in doc_figures:
-                    src = os.path.join(lang_dirname, doc_figure)
-                    if not os.path.exists(src):
-                        # fallback to image from C locale.
-                        src = os.path.join(ext_dirname, self.dirname, 'C', doc_figure)
-                        if not os.path.exists(src):
-                            continue
-                    dst = os.path.join(web_output_dir, doc_figure + '.%s' % lang)
-                    if not os.path.exists(os.path.split(dst)[0]):
-                        os.makedirs(os.path.split(dst)[0])
-                    open(dst, 'w').write(open(src, 'r').read())
-
-                    if create_tarball:
-                        # for tarball, adds symlink, it will be followed when
-                        # creating the tarball
-                        dst = os.path.join(temporary_tarball_dir, lang, doc_figure)
-                        if not os.path.exists(os.path.split(dst)[0]):
-                            os.makedirs(os.path.split(dst)[0])
-                        if not os.path.exists(dst):
-                            os.symlink(os.path.abspath(src), dst)
-
-            try:
-                tree = self.process_xml_index(xml_index_file, doc, lang)
-            except errors.DepreciatedDocumentation:
-                logging.info('skipped %s (%s) (depreciated documentation)' % (
-                            self.modulename, self.one_dot_version))
-                break
-
-            # most documentation have @id == 'index', which is perfect for web
-            # publishing, for others, create a symlink from index html  lang 
-            html_index_file = tree.getroot().attrib.get('index')
-            if not html_index_file:
-                logging.warn('empty html index file for module %s' % doc_module)
-            elif html_index_file != 'index':
-                link_html_index_file = os.path.join(
-                        web_output_dir, 'index.html.%s' % lang)
-                try:
-                    os.symlink('%s.html.%s' % (html_index_file, lang), link_html_index_file)
-                except OSError:
-                    logging.warn('failed to create symlink to index file for module %s' % doc_module)
-
-                if create_tarball:
-                    dst = os.path.join(temporary_tarball_dir, lang, 'index.html')
-                    os.symlink('%s.html' % html_index_file, dst)
-
-            if create_tarball:
-                # also copy some static files from data/skin/
-                for filename in os.listdir(os.path.join(data_dir, 'skin')):
-                    if not (filename.startswith('nav-') or filename.startswith('admon-')):
-                        continue
-                    src = os.path.join(data_dir, 'skin', filename)
-                    dst = os.path.join(temporary_tarball_dir, lang, os.path.basename(filename))
-                    os.symlink(os.path.abspath(src), dst)
-        else:
-            if create_tarball:
-                self.create_tarball(doc, temporary_tarball_dir, tarball_filepath)
-                self.create_tarball_symlink(doc)
-
-        if app.config.create_tarballs:
-            shutil.rmtree(temporary_tarball_dir)
-
-        self.install_version_symlinks(doc)
-
-    def process_xml_index(self, xml_index_file, doc, lang):
-        tree = ET.parse(xml_index_file)
-
-        title_node_text = None
-        if tree.find('title') is not None:
-            title_node_text = tree.find('title').text
-        elif tree.find('{http://www.w3.org/1999/xhtml}title') is not None:
-            title_node_text = tree.find('{http://www.w3.org/1999/xhtml}title').text
-
-        if title_node_text == 'Problem showing document':
-            # title used in gnome-panel for depreciated documentation (such
-            # as window-list applet, moved to user guide); abort now, and
-            # remove this version.  Note it would be much easier if this
-            # could be detected earlier, perhaps with a marker in
-            # Makefile.am or the OMF file.
-            doc.versions.remove(self.one_dot_version)
-            if len(doc.versions) == 0:
-                # there were no other version, remove it completely
-                app.documents.remove(doc)
-            else:
-                # there was another version, fix up path to point to that one
-                previous_one_dot_version = re.match(r'\d+\.\d+',
-                        doc.versions[-1]).group()
-                if doc.version_keywords.get(previous_one_dot_version) != 'stable':
-                    # set link to /module//previous.version/
-                    doc.path = '/' + os.path.join(self.channel, self.modulename,
-                            previous_one_dot_version) + '/'
-                else:
-                    # set link to /module/stable/
-                    doc.path = '/' + os.path.join(self.channel, self.modulename,
-                            'stable') + '/'
-            raise errors.DepreciatedDocumentation()
-
-        quirks = app.overlay.get_quirks(self)
-
-        if not self.nightly or not doc.title.get(lang) and title_node_text:
-            doc.title[lang] = title_node_text
-
-        if tree.find('abstract') is not None and tree.find('abstract').text:
-            doc.abstract[lang] = tree.find('abstract').text
-        elif tree.find('{http://www.w3.org/1999/xhtml}abstract') is not None and \
-                tree.find('{http://www.w3.org/1999/xhtml}abstract').text:
-            doc.abstract[lang] = tree.find('{http://www.w3.org/1999/xhtml}abstract').text
-
-        if 'no-title-and-abstract-from-document' in quirks:
-            if doc.title.get(lang):
-                del doc.title[lang]
-            if doc.abstract.get(lang):
-                del doc.abstract[lang]
-
-        return tree
-
-    def create_tarball(self, doc, temporary_dir, tarball_filepath):
-        logging.info('generating doc tarball in %s' % tarball_filepath[
-                len(app.config.output_dir):])
-        tar = tarfile.open(tarball_filepath, 'w:gz')
-        base_tarball_dir = '%s-html-%s' % (self.modulename, self.version)
-        for base, dirs, files in os.walk(temporary_dir):
-            base_dir = '%s/%s' % (base_tarball_dir, base[len(temporary_dir):])
-            for file in files:
-                orig_file = os.path.join(base, file)
-                if os.path.islink(orig_file):
-                    if os.readlink(orig_file)[0] == '/':
-                        orig_file = os.path.realpath(orig_file)
-                tarinfo = tar.gettarinfo(orig_file, '%s/%s' % (base_dir, file))
-                tar.addfile(tarinfo, open(orig_file))
-        tar.close()
-
-        doc.tarballs[self.one_dot_version] = os.path.basename(tarball_filepath)
-
-
-class HtmlFilesModule(DocModule):
-    '''Class for documentation shipped in a tarball as HTML files'''
-    transform_mode = None
-
-    html2html_xsl_file = os.path.join(data_dir, 'xslt', 'html2html.xsl')
-
-    related_xsl_files = ['html2html.xsl', 'heading.xsl']
-
-    def __init__(self, tar, tarinfo, tarball_doc_elem):
-        DocModule.__init__(self, tar, tarinfo, None, False)
-        self.tarball_doc_elem = tarball_doc_elem
-        self.modulename = self.tarball_doc_elem.attrib.get('doc_module')
-        if self.tarball_doc_elem.find('transform-mode') is not None:
-            self.transform_mode = self.tarball_doc_elem.find('transform-mode').text
-
-    def setup_channel(self):
-        self.channel = self.tarball_doc_elem.attrib.get('channel')
- 
-    def __str__(self):
-        return 'HTML files module at %s' % self.dirname
-
-    def process(self):
-        doc_module = self.modulename
-        ext_dirname = os.path.join(app.config.private_dir, 'extracts')
-
-        web_output_dir = os.path.join(app.config.output_dir, self.channel,
-                doc_module, self.one_dot_version)
-        if not os.path.exists(web_output_dir):
-            os.makedirs(web_output_dir)
-
-        if not app.rebuild_all and os.path.exists(os.path.join(web_output_dir, 'index.html')):
-            mtime = os.stat(os.path.join(web_output_dir, 'index.html'))[stat.ST_MTIME]
-        else:
-            mtime = 0
-
-        if mtime > max(self.mtime_tarball, self.mtime_xslt_files):
-            logging.debug('using already generated doc')
-        else:
-            logging.info('generating doc in %s' % web_output_dir[len(app.config.output_dir):])
-
-            if html5lib:
-                # convert files to XML, then process them with xsltproc
-                # to get library.gnome.org look
-
-                logging.debug('transforming files shipped with tarball')
-                parser = html5lib.HTMLParser()
-
-                for filename in os.listdir(os.path.join(ext_dirname, self.dirname)):
-                    src = os.path.join(ext_dirname, self.dirname, filename)
-                    dst = os.path.join(web_output_dir, filename)
-                    if not filename.endswith('.html'):
-                        continue
-                    doc = parser.parse(open(src))
-                    doc.childNodes[-1].attributes['xmlns'] = 'http://www.w3.org/1999/xhtml'
-                    temporary = tempfile.NamedTemporaryFile()
-                    temporary.write(doc.childNodes[-1].toxml().encode('utf-8'))
-                    temporary.flush()
-
-                    cmd = ['xsltproc', '--output', dst,
-                            '--nonet', '--xinclude',
-                            self.html2html_xsl_file,
-                            os.path.join(ext_dirname,
-                                    self.dirname, temporary.name)]
-                    if self.transform_mode:
-                        cmd.insert(-2, '--stringparam')
-                        cmd.insert(-2, 'libgo.h2hmode')
-                        cmd.insert(-2, self.transform_mode)
-                    rc = subprocess.call(cmd)
-            else:
-                # simply copy HTML files shipped in tarball
-                logging.debug('copying files shipped with tarball')
-                for filename in os.listdir(os.path.join(ext_dirname, self.dirname)):
-                    src = os.path.join(ext_dirname, self.dirname, filename)
-                    dst = os.path.join(web_output_dir, filename)
-                    if not filename.endswith('.html'):
-                        continue
-                    if not os.path.exists(dst) or \
-                            os.stat(src)[stat.ST_MTIME] > os.stat(dst)[stat.ST_MTIME]:
-                        open(dst, 'w').write(open(src, 'r').read())
-
-            # copy non-html files
-            for filename in os.listdir(os.path.join(ext_dirname, self.dirname)):
-                src = os.path.join(ext_dirname, self.dirname, filename)
-                dst = os.path.join(web_output_dir, filename)
-                if filename.endswith('.html'):
-                    continue
-                if os.path.isdir(src):
-                    if os.path.exists(dst):
-                        shutil.rmtree(dst)
-                    shutil.copytree(src, dst)
-                else:
-                    if not os.path.exists(dst) or \
-                            os.stat(src)[stat.ST_MTIME] > os.stat(dst)[stat.ST_MTIME]:
-                        open(dst, 'w').write(open(src, 'r').read())
-
-        doc = self.get_libgo_document(['en'])
-        if not doc:
-            return
-
-        doc.category = self.tarball_doc_elem.get('category')
-        doc.toc_id = doc.category
-
-        if self.tarball_doc_elem.find('index') is not None:
-            path = os.path.join(web_output_dir, 'index.html')
-            if os.path.islink(path):
-                os.unlink(path)
-            os.symlink(self.tarball_doc_elem.find('index').text, path)
-
-        self.install_version_symlinks(doc)
-
-
-class Formatter(logging.Formatter):
-    '''Class used for formatting log messages'''
-
-    def __init__(self):
-        term = os.environ.get('TERM', '')
-        self.is_screen = (term == 'screen')
-        logging.Formatter.__init__(self)
-
-    def format(self, record):
-        if self.is_screen and record.levelname[0] == 'I':
-            sys.stdout.write('\033klgo: %s\033\\' % record.msg)
-            sys.stdout.flush()
-        return '%c: %s' % (record.levelname[0], record.msg)
-
-
-class Overlay:
-    def __init__(self, overlay_file):
-        self.tree = ET.parse(overlay_file)
-        self.modified_docs = {}
-        self.new_docs = []
-        self.more_tarball_docs = {}
-        self.quirks = {}
-
-        for doc in self.tree.findall('/documents/document'):
-            if 'doc_module' in doc.attrib:
-                # modifying an existing document
-                self.modified_docs[(
-                        doc.attrib['doc_module'], doc.attrib['channel'])] = doc
-
-            if not 'doc_module' in doc.attrib or (
-                    doc.find('new') is not None or doc.find('local') is not None):
-                # new document
-                self.new_docs.append(doc)
-
-            if 'matching_tarball' in doc.attrib:
-                tarball = doc.attrib['matching_tarball']
-                if not tarball in self.more_tarball_docs:
-                    self.more_tarball_docs[tarball] = []
-                tarball_docs = self.more_tarball_docs[tarball]
-                tarball_docs.append(doc)
-
-        self.toc_mapping = {}
-        for mapping in self.tree.findall('/subsections/map'):
-            channel = mapping.attrib.get('channel')
-            sectionid = mapping.attrib.get('id')
-            subsection = mapping.attrib.get('subsection')
-            self.toc_mapping[(channel, sectionid)] = subsection
-
-        for quirks in self.tree.findall('/quirks'):
-            self.quirks[(quirks.attrib['doc_module'], quirks.attrib['channel'])] = quirks
-
-    def apply(self, document):
-        if (document.channel, document.toc_id) in self.toc_mapping:
-            document.subsection = self.toc_mapping[(document.channel, document.toc_id)]
-
-        key = (document.module, document.channel)
-        overlay = self.modified_docs.get(key)
-        if overlay is None:
-            return
-        if overlay.find('title') is not None:
-            for title in overlay.findall('title'):
-                lang = title.attrib.get(
-                        '{http://www.w3.org/XML/1998/namespace}lang', 'en')
-                document.title[lang] = title.text
-            for lang in document.languages:
-                if not document.title.get(lang):
-                    document.title[lang] = document.title.get('en')
-        if overlay.find('abstract') is not None:
-            for abstract in overlay.findall('abstract'):
-                lang = abstract.attrib.get(
-                        '{http://www.w3.org/XML/1998/namespace}lang', 'en')
-                document.abstract[lang] = abstract.text
-            for lang in document.languages:
-                if not document.abstract.get(lang):
-                    document.abstract[lang] = document.abstract.get('en')
-        if overlay.find('subsection') is not None:
-            document.subsection = overlay.find('subsection').text
-        if overlay.find('category') is not None:
-            document.toc_id = overlay.find('category').text
-        if overlay.attrib.get('weight'):
-            document.weight = overlay.attrib.get('weight')
-
-        if overlay.find('keywords') is not None:
-            for keyword in overlay.findall('keywords/keyword'):
-                document.keywords.append(keyword.text)
-
-    def get_channel_overlay(self, module, current_channel):
-        for doc in self.tree.findall('/documents/document'):
-            if doc.attrib.get('doc_module') != module:
-                continue
-            if doc.attrib.get('old-channel') == current_channel:
-                return doc.attrib.get('channel')
-        return current_channel
-
-    def get_new_docs(self):
-        l = []
-        for overlay in self.new_docs:
-            doc = RemoteDocument(overlay)
-            self.apply(doc)
-            l.append(doc)
-        return l
+from document import Document
+from overlay import Overlay
 
-    def get_section_weight(self, section_id):
-        for section in self.tree.findall('subsections/subsection'):
-            if section.attrib.get('id') == section_id:
-                return float(section.attrib.get('weight', 0.5))
-        return 0.5
+from modtypes.gnomedocbook import GnomeDocbookModule
+from modtypes.gtkdoc import GtkDocModule
 
-    def get_subindexes(self, channel):
-        for subindexes in self.tree.findall('subsections/subindexes'):
-            if subindexes.attrib.get('channel') != channel:
-                return
 
-            return [SubIndex(x) for x in subindexes.findall('subindex')]
+app = None
 
-    def get_quirks(self, doc):
-        key = (doc.modulename, doc.channel)
-        quirks = self.quirks.get(key)
-        if quirks is None:
-            return []
-        q = []
-        for quirk in quirks.findall('quirk'):
-            min_version = quirk.attrib.get('appears-in')
-            max_version = quirk.attrib.get('fixed-in')
-            if min_version and version_cmp(min_version, doc.version) > 0:
-                continue
-            if max_version and version_cmp(max_version, doc.version) <= 0:
-                continue
-            q.append(quirk.text)
-        return q
+# timeout for downloads, so it doesn't hang on connecting to sourceforge
+socket.setdefaulttimeout(10)
 
 
 class FtpDotGnomeDotOrg:
@@ -1447,8 +160,8 @@ class Lgo:
         self.options, args = parser.parse_args()
 
         logging.basicConfig(level = 10 + logging.CRITICAL - self.options.verbose*10,
-            formatter = Formatter())
-        logging.getLogger().handlers[0].setFormatter(Formatter())
+            formatter = utils.LogFormatter())
+        logging.getLogger().handlers[0].setFormatter(utils.LogFormatter())
 
         self.debug = (self.options.verbose >= 5)
         self.rebuild_all = self.options.rebuild_all
@@ -1691,7 +404,7 @@ class Lgo:
 
         gduxrefs = ET.Element('gduxrefs')
         for doc_module in doc_modules:
-            if not isinstance(doc_module, GnomeDocUtilsModule):
+            if not isinstance(doc_module, GnomeDocbookModule):
                 continue
             element = ET.SubElement(gduxrefs, 'doc')
             element.set('id', doc_module.modulename)
@@ -1779,7 +492,7 @@ class Lgo:
                 makefile_am = makefile_am.replace('\\\n', ' ')
                 if 'DOC_MODULE' in makefile_am and regex_gdu.findall(makefile_am):
                     logging.debug('found usage of gnome-doc-utils in %s' % tarinfo.name)
-                    doc = GnomeDocUtilsModule(tar, tarinfo, makefile_am, nightly)
+                    doc = GnomeDocbookModule(tar, tarinfo, makefile_am, nightly)
                 elif 'include $(top_srcdir)/gtk-doc.make' in makefile_am or \
                         ('gtkdoc-scan' in makefile_am and not 'gtk-doc' in tarinfo.name):
                     logging.debug('found usage of gtk-doc in %s' % tarinfo.name)
@@ -1788,7 +501,7 @@ class Lgo:
                         os.path.basename(filename).startswith('gtk-doc-'):
                     logging.debug('found gtk-doc almost gnome-doc-utils manual in %s' % tarinfo.name)
                     makefile_am += '\nDOC_MODULE = gtk-doc-manual\n'
-                    doc = GnomeDocUtilsModule(tar, tarinfo, makefile_am, nightly)
+                    doc = GnomeDocbookModule(tar, tarinfo, makefile_am, nightly)
                 else:
                     continue
 
@@ -2023,7 +736,7 @@ class Lgo:
                     '--stringparam', 'libgo.lang', lang,
                     '--stringparam', 'libgo.channel', 'about',
                     '--param', 'db2html.navbar.bottom', 'false()',
-                    GnomeDocUtilsModule.db2html_xsl_file, xml_file]
+                    GnomeDocbookModule.db2html_xsl_file, xml_file]
             logging.debug('executing %s' % ' '.join(cmd))
             rc = subprocess.call(cmd)
             if rc != 0:
@@ -2100,5 +813,7 @@ class Lgo:
 
 if __name__ == '__main__':
     app = Lgo()
+    app.Document = Document
+    __builtin__.__dict__['app'] = app
     app.run()
 
diff --git a/src/modtypes/__init__.py b/src/modtypes/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/modtypes/base.py b/src/modtypes/base.py
new file mode 100644
index 0000000..b29e89f
--- /dev/null
+++ b/src/modtypes/base.py
@@ -0,0 +1,206 @@
+import logging
+import os
+import re
+import stat
+import tarfile
+
+from utils import version_cmp, is_version_number, download
+
+licence_modules = ['fdl', 'gpl', 'lgpl']
+
+
+class DocModule:
+    '''Base class for documentation shipped in tarballs'''
+    makefile_am = None
+
+    filename = None
+    dirname = None
+    modulename = None
+
+    related_xsl_files = None
+    mtime_xslt_files = 0
+
+    nightly = False
+
+    def __init__(self, tar, tarinfo, makefile_am, nightly = False):
+        self.dirname = os.path.dirname(tarinfo.name)
+        if makefile_am:
+            self.makefile_am = makefile_am
+            self.modulename = re.findall(r'DOC_MODULE\s?=\s?(.*)', makefile_am)[0].strip()
+            if '@' in self.modulename:
+                logging.warning('variadic module name: %s' % self.modulename)
+                # don't go interpreting autotools variables, as in this path
+                # lays madness, instead simply cut out the variable, such as
+                # gnome-scan- API_VERSION@ becomes gnome-scan.
+                self.modulename = self.modulename.split('@')[0].strip('-')
+            # remove the version part, so libsoup-2.4 is handled just like
+            # another version of libsoup
+            self.modulename = re.sub('-\d+\.\d+$', '', self.modulename)
+        self.version = os.path.splitext(tar.name)[0].split('-')[-1]
+        if self.version.endswith('.tar'):
+            self.version = self.version[:-4]
+
+        if nightly or self.version == 'nightly':
+            self.nightly = True
+            self.version = 'nightly'
+            self.one_dot_version = 'nightly'
+        else:
+            self.one_dot_version = re.match(r'\d+\.\d+', self.version).group()
+
+        if self.related_xsl_files:
+            self.mtime_xslt_files = max([os.stat(
+                        os.path.join(data_dir, 'xslt', x))[stat.ST_MTIME] \
+                        for x in self.related_xsl_files])
+
+    def extract(self):
+        ext_dirname = os.path.join(app.config.private_dir, 'extracts')
+        if not os.path.exists(ext_dirname):
+            os.makedirs(ext_dirname)
+
+        base_tarball_name = os.path.basename(self.filename).rsplit('-', 1)[0]
+
+        if not os.path.exists(os.path.join(ext_dirname, self.dirname)):
+            logging.debug('extracting %s' % self.dirname)
+            tar = tarfile.open(self.filename, 'r')
+            for tarinfo in tar.getmembers():
+                if not os.path.split(tarinfo.name)[0].startswith(self.dirname):
+                    continue
+                dest = os.path.join(ext_dirname, tarinfo.name)
+                if tarinfo.isdir() and not os.path.exists(dest):
+                    os.makedirs(dest)
+                elif tarinfo.isreg():
+                    if not os.path.exists(os.path.dirname(dest)):
+                        os.makedirs(os.path.dirname(dest))
+                    open(dest, 'w').write(tar.extractfile(tarinfo).read())
+            tar.close()
+
+    def setup_path(self):
+        if self.modulename in licence_modules:
+            # special casing the licences, they do not go in a
+            # versioned path
+            self.path = '/' + os.path.join(self.channel, self.modulename) + '/'
+        else:
+            self.path = '/' + os.path.join(self.channel, self.modulename,
+                    self.one_dot_version) + '/'
+
+    def get_libgo_document(self, doc_linguas):
+        try:
+            doc = [x for x in app.documents if \
+                    x.module == self.modulename and x.channel == self.channel][0]
+        except IndexError:
+            doc = app.Document()
+            doc.filename = self.dirname
+            doc.module = self.modulename
+            doc.channel = self.channel
+            doc.languages = doc_linguas
+            doc.path = self.path
+            doc._last_version = self.version
+            doc.versions = [self.one_dot_version]
+            app.documents.append(doc)
+        else:
+            if doc._last_version == self.version:
+                # file was already processed in a previous moduleset
+                return None
+
+            if not self.nightly:
+                # a more recent version may already have been installed; probably
+                # because the same module is being mentioned as an extra tarball
+                # with an inferior version number; don't update path in this
+                # situation.  (see bug #530517 for the mention of this problem)
+                if version_cmp(doc._last_version, self.version) <= 0:
+                    doc._last_version = self.version
+                    if int(self.one_dot_version.split('.')[1]) % 2 == 0:
+                        # update path to point to the latest version (but no
+                        # development versions)
+                        doc.path = self.path
+
+            if not self.one_dot_version in doc.versions:
+                doc.versions.append(self.one_dot_version)
+            for lang in doc_linguas:
+                if not lang in doc.languages:
+                    doc.languages.append(lang)
+
+        doc.version_mapping[self.one_dot_version] = self.version
+
+        # only keep authorised languages
+        if app.config.languages:
+            for lang in doc.languages[:]:
+                if lang not in app.config.languages:
+                    doc.languages.remove(lang)
+
+        return doc
+
+    def install_version_symlinks(self, doc):
+        '''Create stable and devel symlinks'''
+
+        if self.nightly:
+            return
+
+        if self.channel not in ('users', 'devel', 'admin'):
+            return
+
+        if not self.one_dot_version in doc.versions:
+            # this version doesn't appear in available versions, probably it
+            # had been depreciated
+            return
+
+        web_output_dir = os.path.join(app.config.output_dir, self.channel, self.modulename)
+        development_release = (int(self.one_dot_version.split('.')[1]) % 2 == 1) or (
+                int(self.one_dot_version.split('.')[0]) == 0)
+
+        if development_release:
+            keyword = 'unstable'
+        else:
+            keyword = 'stable'
+
+        path = os.path.join(web_output_dir, keyword)
+        installed = False
+        if os.path.islink(path):
+            currently_marked = os.readlink(path)
+            if currently_marked == 'stable':
+                currently_marked = os.readlink(os.path.join(web_output_dir, 'stable'))
+            if version_cmp(self.version, currently_marked) >= 0:
+                # install new symlink
+                os.unlink(path)
+                os.symlink(self.one_dot_version, path)
+                installed = True
+        else:
+            os.symlink(self.one_dot_version, path)
+            installed = True
+
+        if installed:
+            if not development_release:
+                if doc.path == '/%s/%s/%s/' % (self.channel, self.modulename, self.one_dot_version):
+                    # set default path to use the keyword
+                    doc.path = '/%s/%s/stable/' % (self.channel, self.modulename)
+
+                # if there is no unstable link, create it even for a stable release
+                # (or if stable is newer)
+                path = os.path.join(web_output_dir, 'unstable')
+                if os.path.islink(path):
+                    currently_unstable = os.readlink(path)
+                    if currently_unstable == 'stable' or \
+                            version_cmp(self.version, currently_unstable) >= 0:
+                        os.unlink(path)
+                        os.symlink('stable', path)
+                else:
+                    os.symlink('stable', path)
+            elif not os.path.exists(os.path.join(web_output_dir, 'stable')):
+                # update default path to have codename if there is no stable
+                # release
+                doc.path = '/%s/%s/unstable/' % (self.channel, self.modulename)
+
+            for k in doc.version_keywords.keys():
+                if doc.version_keywords.get(k) == keyword:
+                    del doc.version_keywords[k]
+            doc.version_keywords[self.one_dot_version] = keyword
+
+    def create_tarball_symlink(self, doc):
+        tarball_dest = '../%s-html-%s.tar.gz' % (self.modulename, self.version)
+        in_version_path = os.path.join(app.config.output_dir, self.channel,
+                self.modulename, self.one_dot_version,
+                '%s-html.tar.gz' % self.modulename)
+        if os.path.islink(in_version_path):
+            os.unlink(in_version_path)
+        os.symlink(tarball_dest, in_version_path)
+
diff --git a/src/modtypes/gnomedocbook.py b/src/modtypes/gnomedocbook.py
new file mode 100644
index 0000000..40e18f4
--- /dev/null
+++ b/src/modtypes/gnomedocbook.py
@@ -0,0 +1,387 @@
+import glob
+import os
+import re
+import subprocess
+import stat
+import tarfile
+
+try:
+    import html5lib
+except ImportError:
+    html5lib = None
+
+
+from base import DocModule
+
+class GnomeDocbookModule(DocModule):
+    '''Class for documentation shipped in a tarball and using gnome-doc-utils'''
+    db2html_xsl_file = os.path.join(data_dir, 'xslt', 'db2html.xsl')
+    category = None
+
+    related_xsl_files = ['db2html.xsl', 'heading.xsl']
+
+    def __init__(self, tar, tarinfo, makefile_am, nightly):
+        DocModule.__init__(self, tar, tarinfo, makefile_am, nightly)
+        if self.modulename == '@PACKAGE_NAME@':
+            # ekiga has this, use another way, looking at omf files
+            try:
+                omf_file = [x.name for x in tar.getmembers() if \
+                           x.name.startswith(self.dirname) and x.name.endswith('.omf.in')][0]
+            except IndexError:
+                logging.error('failed to get DOC_MODULE for %s' % tarinfo.name)
+            self.modulename = os.path.split(omf_file)[-1][:-len('.omf.in')]
+
+    def setup_channel(self):
+        # get category from omf file
+        ext_dirname = os.path.join(app.config.private_dir, 'extracts')
+        omf_file = glob.glob(os.path.join(ext_dirname, self.dirname) + '/*.omf.in')
+        if not omf_file:
+            omf_file = glob.glob(os.path.join(ext_dirname, self.dirname) + '/C/*.omf.in')
+        if omf_file:
+            try:
+                self.category = ET.parse(omf_file[0]).find('resource/subject').attrib['category']
+            except (IndexError, KeyError):
+                pass
+
+        channel = 'users'
+        if self.category and (self.category.startswith('GNOME|Development') or
+                self.category.startswith('GNOME|Applications|Programming')):
+            channel = 'devel'
+
+        self.channel = app.overlay.get_channel_overlay(self.modulename, channel)
+        if self.channel == 'misc' and not self.category:
+            self.category = 'GNOME|Other'
+
+    def __str__(self):
+        return 'gnome-doc-utils module at %s' % self.dirname
+
+    def process(self):
+        doc_module = self.modulename
+        ext_dirname = os.path.join(app.config.private_dir, 'extracts')
+
+        try:
+            doc_linguas = re.findall(r'DOC_LINGUAS\s+=[\t ](.*)',
+                    self.makefile_am)[0].split()
+            if not 'en' in doc_linguas:
+                doc_linguas.append('en')
+        except IndexError:
+            doc_linguas = ['en']
+
+        try:
+            doc_figures = re.findall('DOC_FIGURES\s+=\s+(.*)',
+                    self.makefile_am)[0].split()
+            figures_dirname = os.path.join(ext_dirname, self.dirname, 'C')
+            for doc_figure in doc_figures:
+                if not os.path.exists(os.path.join(figures_dirname, doc_figure)):
+                    logging.warning('figure (%s) listed but not present, going to autodiscover' % \
+                        doc_figure)
+                    raise IndexError()
+        except IndexError:
+            figures_dirname = os.path.join(ext_dirname, self.dirname, 'C', 'figures')
+            images_dirname = os.path.join(ext_dirname, self.dirname, 'C', 'images')
+            doc_figures = []
+            if os.path.exists(figures_dirname):
+                doc_figures += [os.path.join('figures', x) for x in \
+                        os.listdir(figures_dirname) \
+                        if os.path.splitext(x)[1] in ('.png', '.jpg', '.jpeg')]
+            if os.path.exists(images_dirname):
+                doc_figures += [os.path.join('images', x) for x in \
+                        os.listdir(images_dirname) \
+                        if os.path.splitext(x)[1] in ('.png', '.jpg', '.jpeg')]
+
+        doc_linguas.sort()
+        if app.config.languages:
+            for lang in doc_linguas[:]:
+                if lang not in app.config.languages + ['C']:
+                    doc_linguas.remove(lang)
+
+        doc = self.get_libgo_document(doc_linguas)
+        if not doc:
+            return
+
+        if self.category:
+            doc.category = self.category
+            doc.toc_id = app.toc_mapping.get(doc.category)
+
+        web_output_dir = os.path.join(app.config.output_dir, self.channel,
+                doc_module, self.one_dot_version)
+        
+        quirks = app.overlay.get_quirks(self)
+        if not 'flat-rendering' in quirks:
+            doc.single_page_alternative = True
+
+        logging.info('generating doc in %s' % web_output_dir[len(app.config.output_dir):])
+        if not os.path.exists(web_output_dir):
+            os.makedirs(web_output_dir)
+
+        if app.config.create_tarballs:
+            temporary_tarball_dir = tempfile.mkdtemp()
+            tarball_name = '%s-html-%s.tar.gz' % (self.modulename, self.version)
+            tarball_filepath = os.path.join(app.config.output_dir, self.channel,
+                    self.modulename, tarball_name)
+
+            if os.path.exists(tarball_filepath):
+                # doc already exists, keep it in tarballs dictionary as it may
+                # be skipped in this run, if the documentation is already
+                # uptodate.
+                doc.tarballs[self.one_dot_version] = tarball_name
+
+        base_tarball_name = os.path.basename(self.filename).rsplit('-', 1)[0]
+
+        create_tarball = app.config.create_tarballs
+
+        for lang in doc.languages:
+            if lang == 'en' and not os.path.exists(os.path.join(ext_dirname, self.dirname, 'en')):
+                lang_dirname = os.path.join(ext_dirname, self.dirname, 'C')
+            else:
+                lang_dirname = os.path.join(ext_dirname, self.dirname, lang)
+
+            xml_file = os.path.join(lang_dirname, doc_module + '.xml')
+            if not os.path.exists(xml_file):
+                # the document had a translation available in a previous
+                # version, and it got removed
+                continue
+
+            xml_index_file = os.path.join(web_output_dir, 'index.xml.%s' % lang)
+            skip_html_files = False
+            if not app.rebuild_all and (
+                    app.rebuild_language is None or
+                    lang != app.rebuild_language) and os.path.exists(xml_index_file):
+                mtime = os.stat(xml_index_file)[stat.ST_MTIME]
+                if mtime > max(self.mtime_tarball, self.mtime_xslt_files):
+                    try:
+                        tree = self.process_xml_index(xml_index_file, doc, lang)
+                    except errors.DepreciatedDocumentation:
+                        logging.info('skipped %s (%s) (depreciated documentation)' % (
+                                    self.modulename, self.one_dot_version))
+                        break
+
+                    if not create_tarball or os.path.exists(tarball_filepath):
+                        logging.debug('using already generated doc in %s' % lang)
+                        create_tarball = False
+                        continue
+                    skip_html_files = True
+
+            if 'missing-id-on-top-book-element' in quirks:
+                # Evolution documentation top element is currently <book
+                # lang="en"> but the gnome-doc-utils stylesheets are
+                # looking for the id # attribute to get filename.
+                # -- http://bugzilla.gnome.org/show_bug.cgi?id=462811
+                t = open(xml_file).read()
+                open(xml_file + '.fixed', 'w').write(t.replace('\n<book ', '\n<book id="index" '))
+                xml_file = xml_file + '.fixed'
+
+            if 'correct-article-index' in quirks:
+                # 2.20 release notes had <sect1 id="index"><title>Introduction...
+                # instead of <sect1 id="intro"><title>Introduction... and that
+                # prevented a correct id="index" on top <article>
+                # older release notes had <article id="article"
+                t = open(xml_file).read()
+                open(xml_file + '.fixed', 'w').write(
+                        re.sub('(<article.*?)(\sid=".*?")', r'\1', t # remove id on <article>
+                        ).replace('<article ', '<article id="index" '
+                        ).replace('<sect1 id="index"', '<sect1 id="intro"'))
+                xml_file = xml_file + '.fixed'
+
+            if not skip_html_files:
+                # format docbook into html files
+                cmd = ['xsltproc', '--output', web_output_dir + '/',
+                        '--nonet', '--xinclude',
+                        '--stringparam', 'libgo.lang', lang,
+                        '--stringparam', 'libgo.channel', self.channel,
+                        self.db2html_xsl_file, xml_file]
+
+                if self.nightly:
+                    cmd[5:5] = ['--param', 'libgo.nightly', 'true()']
+                onepage_cmd = cmd[:]
+
+                # 1st, generate a single page with all documentation, and
+                # rename it to $modulename.html.$lang (unless a single page
+                # is the default rendering)
+                if not 'flat-rendering' in quirks:
+                    onepage_cmd.insert(5, '--stringparam')
+                    onepage_cmd.insert(6, 'db.chunk.max_depth')
+                    onepage_cmd.insert(7, '0')
+                    logging.debug('executing %s' % ' '.join(onepage_cmd))
+                    rc = subprocess.call(onepage_cmd)
+                    if rc != 0:
+                        logging.warn('%s failed with error %d' % (' '.join(onepage_cmd), rc))
+                    index_html = os.path.join(web_output_dir, 'index.html.%s' % lang)
+                    if os.path.exists(index_html):
+                        os.rename(index_html,
+                            os.path.join(web_output_dir, '%s.html.%s' % (doc_module, lang)))
+
+                # 2nd, generate default rendering, to index.html.$lang
+                if 'flat-rendering' in quirks:
+                    cmd.insert(5, '--stringparam')
+                    cmd.insert(6, 'db.chunk.max_depth')
+                    cmd.insert(7, '0')
+                    cmd.insert(5, '--stringparam')
+                    cmd.insert(6, 'db.chunk.autotoc_depth')
+                    cmd.insert(7, '1')
+                if 'languages-in-sidebar' in quirks:
+                    cmd.insert(5, '--stringparam')
+                    cmd.insert(6, 'libgo.languages_in_sidebar')
+                    cmd.insert(7, ','.join(sorted(doc_linguas)))
+                logging.debug('executing %s' % ' '.join(cmd))
+                rc = subprocess.call(cmd)
+                if rc != 0:
+                    logging.warn('%s failed with error %d' % (' '.join(cmd), rc))
+                
+                if not os.path.exists(xml_index_file):
+                    logging.warn('no index file were created for %s' % doc_module)
+                    continue
+
+            if create_tarball:
+                # another formatting, to ship in tarball
+                cmd = ['xsltproc', '--output', os.path.join(temporary_tarball_dir, lang) + '/',
+                        '--nonet', '--xinclude',
+                        '--stringparam', 'libgo.lang', lang,
+                        '--param', 'libgo.tarball', 'true()',
+                        '--stringparam', 'db.chunk.extension', '.html',
+                        '--stringparam', 'theme.icon.admon.path', '',
+                        '--stringparam', 'theme.icon.nav.previous', 'nav-previous.png',
+                        '--stringparam', 'theme.icon.nav.next', 'nav-next.png',
+                        self.db2html_xsl_file, xml_file]
+                logging.debug('executing %s' % ' '.join(cmd))
+                rc = subprocess.call(cmd)
+                if rc != 0:
+                    logging.warn('%s failed with error %d' % (' '.join(cmd), rc))
+
+            if doc_figures:
+                # and copy images/
+                logging.debug('copying figures')
+                for doc_figure in doc_figures:
+                    src = os.path.join(lang_dirname, doc_figure)
+                    if not os.path.exists(src):
+                        # fallback to image from C locale.
+                        src = os.path.join(ext_dirname, self.dirname, 'C', doc_figure)
+                        if not os.path.exists(src):
+                            continue
+                    dst = os.path.join(web_output_dir, doc_figure + '.%s' % lang)
+                    if not os.path.exists(os.path.split(dst)[0]):
+                        os.makedirs(os.path.split(dst)[0])
+                    open(dst, 'w').write(open(src, 'r').read())
+
+                    if create_tarball:
+                        # for tarball, adds symlink, it will be followed when
+                        # creating the tarball
+                        dst = os.path.join(temporary_tarball_dir, lang, doc_figure)
+                        if not os.path.exists(os.path.split(dst)[0]):
+                            os.makedirs(os.path.split(dst)[0])
+                        if not os.path.exists(dst):
+                            os.symlink(os.path.abspath(src), dst)
+
+            try:
+                tree = self.process_xml_index(xml_index_file, doc, lang)
+            except errors.DepreciatedDocumentation:
+                logging.info('skipped %s (%s) (depreciated documentation)' % (
+                            self.modulename, self.one_dot_version))
+                break
+
+            # most documentation have @id == 'index', which is perfect for web
+            # publishing, for others, create a symlink from index html  lang 
+            html_index_file = tree.getroot().attrib.get('index')
+            if not html_index_file:
+                logging.warn('empty html index file for module %s' % doc_module)
+            elif html_index_file != 'index':
+                link_html_index_file = os.path.join(
+                        web_output_dir, 'index.html.%s' % lang)
+                try:
+                    os.symlink('%s.html.%s' % (html_index_file, lang), link_html_index_file)
+                except OSError:
+                    logging.warn('failed to create symlink to index file for module %s' % doc_module)
+
+                if create_tarball:
+                    dst = os.path.join(temporary_tarball_dir, lang, 'index.html')
+                    os.symlink('%s.html' % html_index_file, dst)
+
+            if create_tarball:
+                # also copy some static files from data/skin/
+                for filename in os.listdir(os.path.join(data_dir, 'skin')):
+                    if not (filename.startswith('nav-') or filename.startswith('admon-')):
+                        continue
+                    src = os.path.join(data_dir, 'skin', filename)
+                    dst = os.path.join(temporary_tarball_dir, lang, os.path.basename(filename))
+                    os.symlink(os.path.abspath(src), dst)
+        else:
+            if create_tarball:
+                self.create_tarball(doc, temporary_tarball_dir, tarball_filepath)
+                self.create_tarball_symlink(doc)
+
+        if app.config.create_tarballs:
+            shutil.rmtree(temporary_tarball_dir)
+
+        self.install_version_symlinks(doc)
+
+    def process_xml_index(self, xml_index_file, doc, lang):
+        tree = ET.parse(xml_index_file)
+
+        title_node_text = None
+        if tree.find('title') is not None:
+            title_node_text = tree.find('title').text
+        elif tree.find('{http://www.w3.org/1999/xhtml}title') is not None:
+            title_node_text = tree.find('{http://www.w3.org/1999/xhtml}title').text
+
+        if title_node_text == 'Problem showing document':
+            # title used in gnome-panel for depreciated documentation (such
+            # as window-list applet, moved to user guide); abort now, and
+            # remove this version.  Note it would be much easier if this
+            # could be detected earlier, perhaps with a marker in
+            # Makefile.am or the OMF file.
+            doc.versions.remove(self.one_dot_version)
+            if len(doc.versions) == 0:
+                # there were no other version, remove it completely
+                app.documents.remove(doc)
+            else:
+                # there was another version, fix up path to point to that one
+                previous_one_dot_version = re.match(r'\d+\.\d+',
+                        doc.versions[-1]).group()
+                if doc.version_keywords.get(previous_one_dot_version) != 'stable':
+                    # set link to /module//previous.version/
+                    doc.path = '/' + os.path.join(self.channel, self.modulename,
+                            previous_one_dot_version) + '/'
+                else:
+                    # set link to /module/stable/
+                    doc.path = '/' + os.path.join(self.channel, self.modulename,
+                            'stable') + '/'
+            raise errors.DepreciatedDocumentation()
+
+        quirks = app.overlay.get_quirks(self)
+
+        if not self.nightly or not doc.title.get(lang) and title_node_text:
+            doc.title[lang] = title_node_text
+
+        if tree.find('abstract') is not None and tree.find('abstract').text:
+            doc.abstract[lang] = tree.find('abstract').text
+        elif tree.find('{http://www.w3.org/1999/xhtml}abstract') is not None and \
+                tree.find('{http://www.w3.org/1999/xhtml}abstract').text:
+            doc.abstract[lang] = tree.find('{http://www.w3.org/1999/xhtml}abstract').text
+
+        if 'no-title-and-abstract-from-document' in quirks:
+            if doc.title.get(lang):
+                del doc.title[lang]
+            if doc.abstract.get(lang):
+                del doc.abstract[lang]
+
+        return tree
+
+    def create_tarball(self, doc, temporary_dir, tarball_filepath):
+        logging.info('generating doc tarball in %s' % tarball_filepath[
+                len(app.config.output_dir):])
+        tar = tarfile.open(tarball_filepath, 'w:gz')
+        base_tarball_dir = '%s-html-%s' % (self.modulename, self.version)
+        for base, dirs, files in os.walk(temporary_dir):
+            base_dir = '%s/%s' % (base_tarball_dir, base[len(temporary_dir):])
+            for file in files:
+                orig_file = os.path.join(base, file)
+                if os.path.islink(orig_file):
+                    if os.readlink(orig_file)[0] == '/':
+                        orig_file = os.path.realpath(orig_file)
+                tarinfo = tar.gettarinfo(orig_file, '%s/%s' % (base_dir, file))
+                tar.addfile(tarinfo, open(orig_file))
+        tar.close()
+
+        doc.tarballs[self.one_dot_version] = os.path.basename(tarball_filepath)
+
+
diff --git a/src/modtypes/gtkdoc.py b/src/modtypes/gtkdoc.py
new file mode 100644
index 0000000..ad0851b
--- /dev/null
+++ b/src/modtypes/gtkdoc.py
@@ -0,0 +1,207 @@
+import glob
+import logging
+import os
+import re
+import stat
+import subprocess
+import tarfile
+import tempfile
+
+try:
+    import elementtree.ElementTree as ET
+except ImportError:
+    import xml.etree.ElementTree as ET
+
+try:
+    import html5lib
+except ImportError:
+    html5lib = None
+
+from base import DocModule
+
+
+class GtkDocModule(DocModule):
+    '''Class for documentation shipped in a tarball and using gtk-doc'''
+
+    gtkdoc_xsl_file = os.path.join(data_dir, 'xslt', 'gtk-doc.xsl')
+    html2html_xsl_file = os.path.join(data_dir, 'xslt', 'html2html.xsl')
+
+    related_xsl_files = ['gtk-doc.xsl', 'heading.xsl']
+
+    def setup_channel(self):
+        self.channel = app.overlay.get_channel_overlay(self.modulename, 'devel')
+
+    def __str__(self):
+        return 'gtk-doc module at %s' % self.dirname
+
+    def process(self):
+        doc_module = self.modulename
+        ext_dirname = os.path.join(app.config.private_dir, 'extracts')
+
+        main_sgml_file = re.findall(r'DOC_MAIN_SGML_FILE\s?=\s?(.*)',
+                self.makefile_am)[0].strip()
+        main_sgml_file = main_sgml_file.replace('$(DOC_MODULE)', doc_module)
+
+        try:
+            html_images = re.findall('HTML_IMAGES\s+=\s+(.*)', self.makefile_am)[0].split()
+        except IndexError:
+            html_images = []
+        html_images = [x.replace('$(srcdir)/', '') for x in html_images]
+
+        web_output_dir = os.path.join(app.config.output_dir, self.channel,
+                doc_module, self.one_dot_version)
+        if not os.path.exists(web_output_dir):
+            os.makedirs(web_output_dir)
+
+        if not app.rebuild_all and os.path.exists(
+                os.path.join(web_output_dir, '%s.devhelp' % doc_module)):
+            mtime = os.stat(os.path.join(web_output_dir, '%s.devhelp' % doc_module))[stat.ST_MTIME]
+        else:
+            mtime = 0
+
+        if mtime > max(self.mtime_tarball, self.mtime_xslt_files):
+            logging.debug('using already generated doc')
+        else:
+            logging.info('generating doc in %s' % web_output_dir[len(app.config.output_dir):])
+            cmd = ['xsltproc', '--output', web_output_dir + '/',
+                    '--nonet', '--xinclude',
+                    '--stringparam', 'libgo.lang', 'en',
+                    '--stringparam', 'gtkdoc.bookname', doc_module,
+                    '--stringparam', 'gtkdoc.version', '"(~lgo)"',
+                    '--stringparam', 'libgo.channel', self.channel,
+                    self.gtkdoc_xsl_file,
+                    os.path.join(ext_dirname, self.dirname, main_sgml_file)]
+            logging.debug('executing %s' % ' '.join(cmd))
+            xsltproc = subprocess.Popen(cmd, stdin = subprocess.PIPE, stderr = subprocess.PIPE)
+            stdout, stderr = xsltproc.communicate()
+            if re.findall('XInclude error : could not load.*and no fallback was found', stderr):
+                logging.warn('XInclude error, creating fake xsltproc return code')
+                xsltproc.returncode = 6
+
+            if xsltproc.returncode != 0:
+                logging.warn('%s failed with error %d' % (' '.join(cmd), xsltproc.returncode))
+                if xsltproc.returncode == 6:
+                    # build failed, probably because it has inline references in
+                    # documentation and would require a full module build to get
+                    # them properly.  (happens with GTK+)
+
+                    if html5lib:
+                        # convert files to XML, then process them with xsltproc
+                        # to get library.gnome.org look
+
+                        logging.debug('transforming files shipped with tarball')
+                        parser = html5lib.HTMLParser()
+
+                        for filename in os.listdir(os.path.join(
+                                    ext_dirname, self.dirname, 'html')):
+                            src = os.path.join(
+                                    ext_dirname, self.dirname, 'html', filename)
+                            dst = os.path.join(web_output_dir, filename)
+                            if not filename.endswith('.html'):
+                                open(dst, 'w').write(open(src, 'r').read())
+                                continue
+                            doc = parser.parse(open(src))
+                            doc.childNodes[-1].attributes['xmlns'] = 'http://www.w3.org/1999/xhtml'
+                            temporary = tempfile.NamedTemporaryFile()
+                            temporary.write(doc.childNodes[-1].toxml().encode('utf-8'))
+                            temporary.flush()
+
+                            cmd = ['xsltproc', '--output', dst,
+                                    '--nonet', '--xinclude',
+                                    '--stringparam', 'libgo.h2hmode', 'gtk-doc',
+                                    self.html2html_xsl_file,
+                                    os.path.join(ext_dirname,
+                                            self.dirname, temporary.name)]
+                            rc = subprocess.call(cmd)
+                    else:
+                        # simply copy files shipped in tarball
+                        logging.debug('copying files shipped with tarball')
+                        for filename in os.listdir(os.path.join(
+                                    ext_dirname, self.dirname, 'html')):
+                            src = os.path.join(ext_dirname, self.dirname, 'html', filename)
+                            dst = os.path.join(web_output_dir, filename)
+                            if not os.path.exists(os.path.split(dst)[0]):
+                                os.makedirs(os.path.split(dst)[0])
+                            if not os.path.exists(dst) or \
+                                    os.stat(src)[stat.ST_MTIME] > os.stat(dst)[stat.ST_MTIME]:
+                                open(dst, 'w').write(open(src, 'r').read())
+
+            if html_images:
+                # and copy images/
+                logging.debug('copying images')
+                for html_image in html_images:
+                    src = os.path.join(ext_dirname, self.dirname, html_image)
+                    if not os.path.exists(src):
+                        continue
+                    dst = os.path.join(web_output_dir, os.path.basename(html_image))
+                    if not os.path.exists(os.path.split(dst)[0]):
+                        os.makedirs(os.path.split(dst)[0])
+                    if not os.path.exists(dst) or \
+                            os.stat(src)[stat.ST_MTIME] > os.stat(dst)[stat.ST_MTIME]:
+                        open(dst, 'w').write(open(src, 'r').read())
+
+            # in any case, copy png files from gtk-doc
+            for src in glob.glob('/usr/share/gtk-doc/data/*.png'):
+                dst = os.path.join(web_output_dir, os.path.basename(src))
+                if not os.path.exists(dst) or \
+                        os.stat(src)[stat.ST_MTIME] > os.stat(dst)[stat.ST_MTIME]:
+                    open(dst, 'w').write(open(src, 'r').read())
+
+        doc = self.get_libgo_document(['en'])
+        if not doc:
+            return
+
+        doc.keywords.append('gtk-doc')
+        doc.category = 'api'
+        doc.toc_id = 'api'
+
+        devhelp_file = [x for x in os.listdir(web_output_dir) if x.endswith('.devhelp')]
+        if os.path.exists(os.path.join(web_output_dir, 'index.xml.en')):
+            tree = ET.parse(os.path.join(web_output_dir, 'index.xml.en'))
+            if tree.find('title') is not None:
+                doc.title['en'] = tree.find('title').text
+            elif tree.find('{http://www.w3.org/1999/xhtml}title') is not None:
+                doc.title['en'] = tree.find('{http://www.w3.org/1999/xhtml}title').text
+        elif devhelp_file:
+            tree = ET.parse(os.path.join(web_output_dir, devhelp_file[0]))
+            doc.title['en'] = tree.getroot().attrib['title']
+
+        if app.config.create_tarballs:
+            self.create_tarball(doc)
+            self.create_tarball_symlink(doc)
+
+        self.install_version_symlinks(doc)
+
+    def create_tarball(self, doc):
+        web_output_dir = os.path.join(app.config.output_dir, self.channel,
+                self.modulename)
+        tarball_name = '%s-html-%s.tar.gz' % (self.modulename, self.version)
+        tarball_filepath = os.path.join(web_output_dir, tarball_name)
+        if os.path.exists(tarball_filepath):
+            mtime = os.stat(tarball_filepath)[stat.ST_MTIME]
+        else:
+            mtime = 0
+
+        if mtime > self.mtime_tarball:
+            logging.debug('using already generated tarball')
+        else:
+            logging.info('generating doc tarball in %s' % tarball_filepath[
+                    len(app.config.output_dir):])
+            shipped_html = os.path.join(app.config.private_dir, 'extracts', self.dirname, 'html')
+            if not os.path.exists(shipped_html) or len(os.listdir(shipped_html)) == 0:
+                logging.warning('tarball shipped without html/, too bad')
+                return
+            tar = tarfile.open(tarball_filepath, 'w:gz')
+            base_tarball_dir = '%s-html-%s' % (self.modulename, self.version)
+            for base, dirs, files in os.walk(shipped_html):
+                base_dir = '%s/%s' % (base_tarball_dir, base[len(shipped_html):])
+                for file in files:
+                    orig_file = os.path.join(base, file)
+                    tarinfo = tar.gettarinfo(orig_file, '%s/%s' % (base_dir, file))
+                    tar.addfile(tarinfo, open(orig_file))
+            tar.close()
+
+        doc.tarballs[self.one_dot_version] = tarball_name
+
+
+
diff --git a/src/modtypes/htmlfiles.py b/src/modtypes/htmlfiles.py
new file mode 100644
index 0000000..4c54da9
--- /dev/null
+++ b/src/modtypes/htmlfiles.py
@@ -0,0 +1,112 @@
+from base import DocModule
+
+class HtmlFilesModule(DocModule):
+    '''Class for documentation shipped in a tarball as HTML files'''
+    transform_mode = None
+
+    html2html_xsl_file = os.path.join(data_dir, 'xslt', 'html2html.xsl')
+
+    related_xsl_files = ['html2html.xsl', 'heading.xsl']
+
+    def __init__(self, tar, tarinfo, tarball_doc_elem):
+        DocModule.__init__(self, tar, tarinfo, None, False)
+        self.tarball_doc_elem = tarball_doc_elem
+        self.modulename = self.tarball_doc_elem.attrib.get('doc_module')
+        if self.tarball_doc_elem.find('transform-mode') is not None:
+            self.transform_mode = self.tarball_doc_elem.find('transform-mode').text
+
+    def setup_channel(self):
+        self.channel = self.tarball_doc_elem.attrib.get('channel')
+ 
+    def __str__(self):
+        return 'HTML files module at %s' % self.dirname
+
+    def process(self):
+        doc_module = self.modulename
+        ext_dirname = os.path.join(app.config.private_dir, 'extracts')
+
+        web_output_dir = os.path.join(app.config.output_dir, self.channel,
+                doc_module, self.one_dot_version)
+        if not os.path.exists(web_output_dir):
+            os.makedirs(web_output_dir)
+
+        if not app.rebuild_all and os.path.exists(os.path.join(web_output_dir, 'index.html')):
+            mtime = os.stat(os.path.join(web_output_dir, 'index.html'))[stat.ST_MTIME]
+        else:
+            mtime = 0
+
+        if mtime > max(self.mtime_tarball, self.mtime_xslt_files):
+            logging.debug('using already generated doc')
+        else:
+            logging.info('generating doc in %s' % web_output_dir[len(app.config.output_dir):])
+
+            if html5lib:
+                # convert files to XML, then process them with xsltproc
+                # to get library.gnome.org look
+
+                logging.debug('transforming files shipped with tarball')
+                parser = html5lib.HTMLParser()
+
+                for filename in os.listdir(os.path.join(ext_dirname, self.dirname)):
+                    src = os.path.join(ext_dirname, self.dirname, filename)
+                    dst = os.path.join(web_output_dir, filename)
+                    if not filename.endswith('.html'):
+                        continue
+                    doc = parser.parse(open(src))
+                    doc.childNodes[-1].attributes['xmlns'] = 'http://www.w3.org/1999/xhtml'
+                    temporary = tempfile.NamedTemporaryFile()
+                    temporary.write(doc.childNodes[-1].toxml().encode('utf-8'))
+                    temporary.flush()
+
+                    cmd = ['xsltproc', '--output', dst,
+                            '--nonet', '--xinclude',
+                            self.html2html_xsl_file,
+                            os.path.join(ext_dirname,
+                                    self.dirname, temporary.name)]
+                    if self.transform_mode:
+                        cmd.insert(-2, '--stringparam')
+                        cmd.insert(-2, 'libgo.h2hmode')
+                        cmd.insert(-2, self.transform_mode)
+                    rc = subprocess.call(cmd)
+            else:
+                # simply copy HTML files shipped in tarball
+                logging.debug('copying files shipped with tarball')
+                for filename in os.listdir(os.path.join(ext_dirname, self.dirname)):
+                    src = os.path.join(ext_dirname, self.dirname, filename)
+                    dst = os.path.join(web_output_dir, filename)
+                    if not filename.endswith('.html'):
+                        continue
+                    if not os.path.exists(dst) or \
+                            os.stat(src)[stat.ST_MTIME] > os.stat(dst)[stat.ST_MTIME]:
+                        open(dst, 'w').write(open(src, 'r').read())
+
+            # copy non-html files
+            for filename in os.listdir(os.path.join(ext_dirname, self.dirname)):
+                src = os.path.join(ext_dirname, self.dirname, filename)
+                dst = os.path.join(web_output_dir, filename)
+                if filename.endswith('.html'):
+                    continue
+                if os.path.isdir(src):
+                    if os.path.exists(dst):
+                        shutil.rmtree(dst)
+                    shutil.copytree(src, dst)
+                else:
+                    if not os.path.exists(dst) or \
+                            os.stat(src)[stat.ST_MTIME] > os.stat(dst)[stat.ST_MTIME]:
+                        open(dst, 'w').write(open(src, 'r').read())
+
+        doc = self.get_libgo_document(['en'])
+        if not doc:
+            return
+
+        doc.category = self.tarball_doc_elem.get('category')
+        doc.toc_id = doc.category
+
+        if self.tarball_doc_elem.find('index') is not None:
+            path = os.path.join(web_output_dir, 'index.html')
+            if os.path.islink(path):
+                os.unlink(path)
+            os.symlink(self.tarball_doc_elem.find('index').text, path)
+
+        self.install_version_symlinks(doc)
+
diff --git a/src/overlay.py b/src/overlay.py
new file mode 100644
index 0000000..97e9fe6
--- /dev/null
+++ b/src/overlay.py
@@ -0,0 +1,185 @@
+# libgo - script to build library.gnome.org
+# Copyright (C) 2007-2009  Frederic Peters
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+try:
+    import elementtree.ElementTree as ET
+except ImportError:
+    import xml.etree.ElementTree as ET
+
+from document import RemoteDocument
+from utils import version_cmp, is_version_number, download
+
+
+class SubIndex:
+    def __init__(self, node):
+        self.id = node.attrib.get('id')
+        self.weight = node.attrib.get('weight')
+        self.sections = node.find('sections').text.split()
+        self.title = {}
+        self.abstract = {}
+
+        for title in node.findall('title'):
+            lang = title.attrib.get(
+                    '{http://www.w3.org/XML/1998/namespace}lang', 'en')
+            self.title[lang] = title.text
+        for abstract in node.findall('abstract'):
+            lang = abstract.attrib.get(
+                    '{http://www.w3.org/XML/1998/namespace}lang', 'en')
+            self.abstract[lang] = abstract.text
+
+    def create_element(self, parent, channel, language):
+        index = ET.SubElement(parent, 'index')
+        if language == 'C':
+            language = 'en'
+        index.set('id', self.id)
+        index.set('lang', language)
+        index.set('channel', channel)
+        index.set('weigth', self.weight)
+
+        title = self.title.get(language)
+        if not title:
+            title = self.title.get('en')
+            if not title:
+                title = self.id
+        ET.SubElement(index, 'title').text = title
+
+        abstract = self.abstract.get(language)
+        if not abstract:
+            abstract = self.abstract.get('en')
+        if abstract:
+            ET.SubElement(index, 'abstract').text = abstract
+
+        return index
+
+
+class Overlay:
+    def __init__(self, overlay_file):
+        self.tree = ET.parse(overlay_file)
+        self.modified_docs = {}
+        self.new_docs = []
+        self.more_tarball_docs = {}
+        self.quirks = {}
+
+        for doc in self.tree.findall('/documents/document'):
+            if 'doc_module' in doc.attrib:
+                # modifying an existing document
+                self.modified_docs[(
+                        doc.attrib['doc_module'], doc.attrib['channel'])] = doc
+
+            if not 'doc_module' in doc.attrib or (
+                    doc.find('new') is not None or doc.find('local') is not None):
+                # new document
+                self.new_docs.append(doc)
+
+            if 'matching_tarball' in doc.attrib:
+                tarball = doc.attrib['matching_tarball']
+                if not tarball in self.more_tarball_docs:
+                    self.more_tarball_docs[tarball] = []
+                tarball_docs = self.more_tarball_docs[tarball]
+                tarball_docs.append(doc)
+
+        self.toc_mapping = {}
+        for mapping in self.tree.findall('/subsections/map'):
+            channel = mapping.attrib.get('channel')
+            sectionid = mapping.attrib.get('id')
+            subsection = mapping.attrib.get('subsection')
+            self.toc_mapping[(channel, sectionid)] = subsection
+
+        for quirks in self.tree.findall('/quirks'):
+            self.quirks[(quirks.attrib['doc_module'], quirks.attrib['channel'])] = quirks
+
+    def apply(self, document):
+        if (document.channel, document.toc_id) in self.toc_mapping:
+            document.subsection = self.toc_mapping[(document.channel, document.toc_id)]
+
+        key = (document.module, document.channel)
+        overlay = self.modified_docs.get(key)
+        if overlay is None:
+            return
+        if overlay.find('title') is not None:
+            for title in overlay.findall('title'):
+                lang = title.attrib.get(
+                        '{http://www.w3.org/XML/1998/namespace}lang', 'en')
+                document.title[lang] = title.text
+            for lang in document.languages:
+                if not document.title.get(lang):
+                    document.title[lang] = document.title.get('en')
+        if overlay.find('abstract') is not None:
+            for abstract in overlay.findall('abstract'):
+                lang = abstract.attrib.get(
+                        '{http://www.w3.org/XML/1998/namespace}lang', 'en')
+                document.abstract[lang] = abstract.text
+            for lang in document.languages:
+                if not document.abstract.get(lang):
+                    document.abstract[lang] = document.abstract.get('en')
+        if overlay.find('subsection') is not None:
+            document.subsection = overlay.find('subsection').text
+        if overlay.find('category') is not None:
+            document.toc_id = overlay.find('category').text
+        if overlay.attrib.get('weight'):
+            document.weight = overlay.attrib.get('weight')
+
+        if overlay.find('keywords') is not None:
+            for keyword in overlay.findall('keywords/keyword'):
+                document.keywords.append(keyword.text)
+
+    def get_channel_overlay(self, module, current_channel):
+        for doc in self.tree.findall('/documents/document'):
+            if doc.attrib.get('doc_module') != module:
+                continue
+            if doc.attrib.get('old-channel') == current_channel:
+                return doc.attrib.get('channel')
+        return current_channel
+
+    def get_new_docs(self):
+        l = []
+        for overlay in self.new_docs:
+            doc = RemoteDocument(overlay)
+            self.apply(doc)
+            l.append(doc)
+        return l
+
+    def get_section_weight(self, section_id):
+        for section in self.tree.findall('subsections/subsection'):
+            if section.attrib.get('id') == section_id:
+                return float(section.attrib.get('weight', 0.5))
+        return 0.5
+
+    def get_subindexes(self, channel):
+        for subindexes in self.tree.findall('subsections/subindexes'):
+            if subindexes.attrib.get('channel') != channel:
+                return
+
+            return [SubIndex(x) for x in subindexes.findall('subindex')]
+
+    def get_quirks(self, doc):
+        key = (doc.modulename, doc.channel)
+        quirks = self.quirks.get(key)
+        if quirks is None:
+            return []
+        q = []
+        for quirk in quirks.findall('quirk'):
+            min_version = quirk.attrib.get('appears-in')
+            max_version = quirk.attrib.get('fixed-in')
+            if min_version and version_cmp(min_version, doc.version) > 0:
+                continue
+            if max_version and version_cmp(max_version, doc.version) <= 0:
+                continue
+            q.append(quirk.text)
+        return q
+
+
diff --git a/src/utils.py b/src/utils.py
index 6850da9..1e008bc 100644
--- a/src/utils.py
+++ b/src/utils.py
@@ -15,7 +15,10 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
+import logging
 import os
+import re
+import urllib2
 
 class FakeTarFile:
     '''
@@ -57,3 +60,57 @@ class FakeTarInfo:
 
     def __repr__(self):
         return '<FakeTarInfo \'%s\'>' % self.name
+
+def version_cmp(x, y):
+    # returns < 0 if x < y, 0 if x == y, and > 0 if x > y
+    if x == 'nightly' and y == 'nightly':
+        return 0
+    elif x == 'nightly':
+        return 1
+    elif y == 'nightly':
+        return -1
+    try:
+        return cmp([int(j) for j in x.split('.')], [int(k) for k in y.split('.')])
+    except ValueError:
+        logging.warning('failure in version_cmp: %r vs %r' % (x, y))
+        return 0
+
+def is_version_number(v):
+    return re.match('\d+\.\d+', v) is not None
+
+def download(href):
+    parsed_url = urllib2.urlparse.urlparse(href)
+    if parsed_url[0] == 'file':
+        return parsed_url[2]
+    filename = '/'.join(parsed_url[1:3])
+    cache_filename = os.path.join(app.config.download_cache_dir, filename)
+    cache_dir = os.path.split(cache_filename)[0]
+    if not os.path.exists(cache_dir):
+        os.makedirs(cache_dir)
+    if not os.path.exists(cache_filename):
+        logging.info('downloading %s' % href)
+        try:
+            s = urllib2.urlopen(href).read()
+        except urllib2.HTTPError, e:
+            logging.warning('error %s downloading %s' % (e.code, href))
+            return None
+        except urllib2.URLError, e:
+            logging.warning('error (URLError) downloading %s' % href)
+            return None
+        open(cache_filename, 'w').write(s)
+    return cache_filename
+
+class LogFormatter(logging.Formatter):
+    '''Class used for formatting log messages'''
+
+    def __init__(self):
+        term = os.environ.get('TERM', '')
+        self.is_screen = (term == 'screen')
+        logging.Formatter.__init__(self)
+
+    def format(self, record):
+        if self.is_screen and record.levelname[0] == 'I':
+            sys.stdout.write('\033klgo: %s\033\\' % record.msg)
+            sys.stdout.flush()
+        return '%c: %s' % (record.levelname[0], record.msg)
+



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]