library.gnome.org index



Hello, 

Draft script attached, example output on http://lgo.0d.be/.

There is also some work on the xslt files, based on gnome-doc-utils
trunk, http://lgo.0d.be/test/  There are some errors in generated
HTML, I'll have a look at them, unless Shaun is faster.

Obvious ones are <h2 class="author"/> on the manual index page;
less obvious ones are cross-references, such as at the bottom of
http://lgo.0d.be/test/eog-print.html.en
  <a class="xref" href="eog-print.html.en#eog-print-image"
    title="Printing an Image"/>



        Frederic
#! /usr/bin/env python

import os
import re
import sys

class Documentation(object):
    pass


title_re = re.compile('<h1>(.*?)</h1>')
abstract_re = re.compile('<h3 class="abstract">(.*)</h3', re.DOTALL)

docs = {} # indexed on module
languages = {}

for base, dirs, filenames in os.walk('.'):
    if not 'index.html.en' in filenames:
        continue
    if base == '.':
        continue
    ign, module, version = base.rsplit(os.path.sep, 2)

    for filename in filenames:
        if not filename.startswith('index.html.'):
            continue

        doc = Documentation()

        doc.lang = filename.rsplit('.')[-1]
        doc.filepath = os.path.join(base, filename)
        doc.module = module
        doc.version = version
        doc.url = doc.filepath[1:] # stripping leading dot

        content = file(doc.filepath).read()

        try:
            doc.title = title_re.findall(content)[0]
        except IndexError:
            continue

        try:
            doc.abstract = abstract_re.findall(content)[0].strip()
        except IndexError:
            doc.abstract = None

        if not docs.has_key(module):
            docs[module] = []
        docs[module].append(doc)

        languages[doc.lang] = True


def cmpv(x, y):
    return cmp(x.version.split('.'), y.version.split('.'))

def included_file(key, lang):
    if os.path.exists('%s.html.%s' % (key, lang)):
        return open('%s.html.%s' % (key, lang)).read()
    return open('%s.html.en' % key).read()


for lang in languages.keys():
    out = file('index.html.%s' % lang, 'w')
    print >> out, included_file('snippets/index_top', lang)
    print >> out, included_file('header', lang)

    print >> out, '<dl class="doc-index">'

    for module in sorted(docs.keys()):
        versions = docs[module]
        versions.sort(cmpv)

        in_lang = [x for x in versions if x.lang == lang]
        if in_lang:
            if in_lang[-1].version != versions[-1].version:
                # latest version in this language is older than in English, 
                # include it as is nevertheless
                pass
            d = in_lang[-1]
        else:
            # not available in this language, fallback to English
            d = [x for x in versions if x.lang == 'en'][-1]

        print >> out, '  <dt><a href="%s">%s</a></dt>' % (d.url, d.title)
        if d.abstract:
            print >> out, '  <dd><p>%s</p>' % d.abstract
            print >> out, '</dd>\n'

    print >> out, '</dl>'
    print >> out, included_file('footer', lang)

    out.close()



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]