gimp-help-2 r2595 - in branches/xml2po-support: . stylesheets stylesheets/migrate stylesheets/zh_CN tools tools/migrate



Author: ulfehlert
Date: Sun Nov  9 17:34:50 2008
New Revision: 2595
URL: http://svn.gnome.org/viewvc/gimp-help-2?rev=2595&view=rev

Log:
2008-11-09  Ulf-D. Ehlert  <ulfehlert svn gnome org>

	* stylesheets/zh_CN
	* stylesheets/profile.xsl
	* tools/profile-xml.sh: removed

	* stylesheets/migrate/convert-glossary.xsl
	* tools/migrate/convert-glossary.py
	* tools/migrate.sh: added code to migrate glossary


Added:
   branches/xml2po-support/stylesheets/migrate/
   branches/xml2po-support/stylesheets/migrate/convert-glossary.xsl   (contents, props changed)
   branches/xml2po-support/tools/migrate/
   branches/xml2po-support/tools/migrate/convert-glossary.py   (contents, props changed)
Removed:
   branches/xml2po-support/stylesheets/profile.xsl
   branches/xml2po-support/stylesheets/zh_CN/
   branches/xml2po-support/tools/profile-xml.sh
Modified:
   branches/xml2po-support/ChangeLog
   branches/xml2po-support/tools/migrate.sh

Added: branches/xml2po-support/stylesheets/migrate/convert-glossary.xsl
==============================================================================
--- (empty file)
+++ branches/xml2po-support/stylesheets/migrate/convert-glossary.xsl	Sun Nov  9 17:34:50 2008
@@ -0,0 +1,133 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+    This is a simple stylesheet to merge the glossary files.
+    Additionally, the "glossterm" tags will be unified.
+-->
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform";>
+
+  <!-- Generate DocBook instance with correct DOCTYPE -->
+  <xsl:output method="xml" encoding="utf-8"
+    doctype-public="-//OASIS//DTD DocBook XML V4.5//EN"
+    doctype-system="http://www.docbook.org/xml/4.5/docbookx.dtd"/>
+
+  <xsl:template match="glossary">
+    <!--XXX: is this really the correct way to add a namespace?-->
+    <glossary xmlns:xi="http://www.w3.org/2001/XInclude";>
+      <xsl:for-each select="@*">
+        <xsl:attribute name="{name()}">
+          <xsl:value-of select="." />
+        </xsl:attribute>
+      </xsl:for-each>
+      <xsl:apply-templates/>
+    </glossary>
+  </xsl:template>
+
+  <xsl:template match="glossdiv">
+    <!--xsl:element name="{local-name(.)}">
+      <xsl:if test="@lang">
+        <xsl:attribute name="lang">
+          <xsl:value-of select="@lang" />
+        </xsl:attribute>
+      </xsl:if-->
+      <!-- XXX: it seems that sorting does not work with
+           multiple source files... -->
+      <xsl:apply-templates select="glossentry">
+        <xsl:sort select="@id" />
+      </xsl:apply-templates>
+    <!--/xsl:element-->
+  </xsl:template>
+
+  <xsl:template match="glossentry">
+    <xsl:choose>
+      <xsl:when test="count(glossterm) = 1 and
+                      glossterm/phrase and
+                      not(glossterm/@lang)">
+        <!-- <glossterm> <phrase lang="xx"> <phrase lang="yy"> ... -->
+        <xsl:copy-of select="."/>
+      </xsl:when>
+      <xsl:otherwise>
+        <xsl:element name="{local-name(.)}">
+          <xsl:attribute name="id">
+            <xsl:value-of select="@id" />
+          </xsl:attribute>
+          <xsl:if test="@lang">
+            <xsl:attribute name="lang">
+              <xsl:value-of select="@lang" />
+            </xsl:attribute>
+          </xsl:if>
+          <xsl:choose>
+            <xsl:when test="count(glossterm) > 1">
+              <!-- <glossterm lang="xx">... <glossterm lang="yy"> ... -->
+              <xsl:call-template name="glossterms" />
+              <xsl:apply-templates select="*[local-name() != 'glossterm']" />
+            </xsl:when>
+            <xsl:otherwise>
+              <xsl:apply-templates />
+            </xsl:otherwise>
+          </xsl:choose>
+        </xsl:element>
+      </xsl:otherwise>
+    </xsl:choose>
+  </xsl:template>
+
+  <xsl:template match="glossterm">
+    <xsl:element name="{local-name()}">
+      <xsl:choose>
+        <xsl:when test="phrase">
+          <!-- <glossterm lang="xx"> <phrase> ... -->
+          <xsl:variable name="termlang" select="@lang"/>
+          <xsl:for-each select="phrase">
+            <xsl:element name="phrase">
+              <xsl:choose>
+                <xsl:when test="@lang">
+                  <xsl:attribute name="lang">
+                    <xsl:value-of select="@lang" />
+                  </xsl:attribute>
+                </xsl:when>
+                <xsl:when test="$termlang">
+                  <xsl:attribute name="lang">
+                    <xsl:value-of select="$termlang" />
+                  </xsl:attribute>
+                </xsl:when>
+              </xsl:choose>
+              <xsl:value-of select="." />
+            </xsl:element>
+          </xsl:for-each>
+        </xsl:when>
+        <xsl:otherwise>
+          <!-- <glossterm lang="xx"> ... </glossterm> -->
+          <xsl:element name="phrase">
+            <xsl:if test="@lang">
+              <xsl:attribute name="lang">
+                <xsl:value-of select="@lang" />
+              </xsl:attribute>
+            </xsl:if>
+            <xsl:value-of select="." />
+          </xsl:element>
+        </xsl:otherwise>
+      </xsl:choose>
+    </xsl:element>
+  </xsl:template>
+
+  <!-- <glossterm lang="xx">... <glossterm lang="yy"> ... -->
+  <xsl:template name="glossterms">
+    <xsl:element name="glossterm">
+      <xsl:for-each select="glossterm">
+        <xsl:element name="phrase">
+          <xsl:attribute name="lang">
+            <xsl:value-of select="@lang" />
+          </xsl:attribute>
+          <xsl:value-of select="." />
+        </xsl:element>
+      </xsl:for-each>
+    </xsl:element>
+  </xsl:template>
+
+  <xsl:template match="simplelist|anchor" />
+
+  <!--xsl:template match="glossdef|indexterm|title"-->
+  <xsl:template match="*">
+    <xsl:copy-of select="." />
+  </xsl:template>
+
+</xsl:stylesheet>

Modified: branches/xml2po-support/tools/migrate.sh
==============================================================================
--- branches/xml2po-support/tools/migrate.sh	(original)
+++ branches/xml2po-support/tools/migrate.sh	Sun Nov  9 17:34:50 2008
@@ -31,9 +31,11 @@
     mv $oldsrcdir $srcdir
 fi
 
+# src/preface/authors.xml
 if [ -e $srcdir/preface/titles.xml ] &&
    [ -e stylesheets/authors_docbook.xsl ] &&
-   [ -e stylesheets/authors.xml ]; then
+   [ -e stylesheets/authors.xml ]
+then
     echo "Creating src/preface/authors.xml:"
     echo xsltproc --nonet \
         --output $srcdir/preface/authors.xml \
@@ -43,6 +45,7 @@
     echo >&2 "ERROR: Cannot make $srcdir/preface/authors.xml"
 fi
 
+# split
 echo "Splitting the source XML:"
 echo "Warning: the following files and directories will be skipped:"
 echo "$exclude_patterns" | sed -e 's/ /, /g; s/^/    /' >&2
@@ -58,6 +61,7 @@
 test -e $srcdir/preface/authors.xml && rm -f $srcdir/preface/authors.xml
 echo
 
+# oldsrc
 echo Saving source directory ...
 mv -vi "$srcdir" "$oldsrcdir" && \
 mv -vi "$xmldir"/en "$srcdir" && \
@@ -65,8 +69,29 @@
 ln -vs $PWD/"$srcdir" "$xmldir"/en
 echo
 
+# src/glossary/glossary.xml
+if [ -e $oldsrcdir/glossary/glossary.xml ] &&
+   [ -e stylesheets/migrate/convert-glossary.xsl ] &&
+   [ -e tools/migrate/convert-glossary.py ]
+then
+    echo "Creating $srcdir/glossary/glossary.xml:"
+    test -d $srcdir/glossary || mkdir $srcdir/glossary
+    xsltproc --nonet --xinclude \
+        stylesheets/migrate/convert-glossary.xsl \
+        $oldsrcdir/glossary/glossary.xml \
+    | tools/migrate/convert-glossary.py --lang "$LINGUAS" \
+    | xmllint --nonet --format - \
+    > $srcdir/glossary/glossary.xml
+    echo "Splitting $srcdir/glossary/glossary.xml:"
+    $SPLIT --lang="$LINGUAS" --file="$srcdir/glossary/glossary.xml" \
+           --dest="$xmldir"/'*'/glossary/
+else
+    echo >&2 "ERROR: Cannot make $srcdir/glossary/glossary.xml"
+fi
+
 test "$1" = "split" && exit 0
 
+# xmllint
 echo "Reformatting English XML files:"
 find $srcdir/ -type f -name '*.xml' |
 while read xmlfile; do
@@ -83,6 +108,7 @@
 
 test "$1" = "xmllint" && exit 0
 
+# pot
 echo "Creating POT files"
 time \
 find $srcdir -name '*.xml' |
@@ -100,6 +126,7 @@
 
 test "$1" = "pot" && exit 0
 
+# po
 echo "Creating PO files"
 time \
 find $srcdir -name '*.xml' |
@@ -115,12 +142,13 @@
         #$XML2PO --language $lang --reuse=$xmlfile --output="$pofile" \
         #        "$srcfile" 2>&1 | grep -vE 'image file .* not found'
         ($XML2PO --language $lang --reuse=$xmlfile --output='-' \
-                 "$srcfile" | msguniq | msgcat -w80 - > "$pofile") 2>&1 \
+                 "$srcfile" | msguniq --use-first | msgcat -w80 - > "$pofile") 2>&1 \
         | grep -vE 'image file .* not found'
     done
 done
 echo
 
+# check
 echo Simple check: searching for empty files...
 trap "rm -f 'empty files'" HUP INT QUIT PIPE TERM
 find ${podir} ${potdir} -type f -size 0 | sort | tee "empty files"

Added: branches/xml2po-support/tools/migrate/convert-glossary.py
==============================================================================
--- (empty file)
+++ branches/xml2po-support/tools/migrate/convert-glossary.py	Sun Nov  9 17:34:50 2008
@@ -0,0 +1,149 @@
+#!/usr/bin/env python
+# _*_ coding: utf8 -*_
+"""
+FIXME: Missing docstring
+"""
+
+import sys
+import re
+import xml.dom.minidom
+import logging
+
+logging.basicConfig(level=logging.INFO, format="%(levelname)-8s %(message)s")
+Logger = logging.getLogger("migrate.glossary")
+
+def usage(exit_code=0):
+    sys.stderr.write("Usage:\n    " +
+                     "%s [--lang LANGUAGES] < RESOVLVED_GLOSSARY_XML\n" % \
+                     sys.argv[0])
+    sys.exit(exit_code)
+
+def get_lang(elem):
+    """Get the value of the "lang" attribute for a given element
+    """
+    lang = elem.getAttribute("lang")
+    if lang:
+        return lang
+    else:
+        return get_lang(elem.parentNode)
+
+def main():
+    """FIXME: Missing docstring"""
+    languages = ["en","de","es","fr","hr","it","ko","nl","no","pl","ru","sv"]
+    if sys.argv[1:]:
+        if sys.argv[1] in ("-h", "--help"):
+            usage()
+        elif sys.argv[1] in ("-l", "--lang"):
+            if not sys.argv[2]: usage(64)
+            languages = re.split('[;, ]', sys.argv[2])
+        else:
+            usage(64)
+
+    # read XML file
+    filename = sys.stdin
+    doc = xml.dom.minidom.parse(filename)
+
+    # document element is last child of document root
+    old_glossary = doc.childNodes[-1]
+    assert old_glossary.nodeName == "glossary"
+    assert old_glossary.getAttribute("lang")
+
+    # this script requires a sequence of 'glossentry' nodes (no 'glossdiv'
+    # nodes!) with unified 'glossterm's -- see "stylesheets/glossary.xsl"
+    glossentries = [child for child in old_glossary.childNodes
+                          if child.nodeName == "glossentry"]
+    assert glossentries
+
+    # dict of lists of glossentries
+    entry_by_id = {}
+    # for matching glossary ids
+    localized_id_regex = re.compile("(.*)-([a-z]{2}(_[A-Z]{2})?)$")
+
+    for glossentry in glossentries:
+        id = glossentry.getAttribute("id")
+        assert id
+        # match and split ids like "glossary-foo-xx"
+        # with a language suffix "xx"
+        match = localized_id_regex.search(id)
+        if match:
+            saved_id = id
+            id, lang = match.group(1,2)
+            if lang == get_lang(glossentry):
+                glossentry.setAttribute("id", id)
+                assert id == glossentry.getAttribute("id")
+                Logger.warn("changed id '%s' to '%s'" % (saved_id, id))
+            else:
+                # Oops - that's probably "glossary-plug-in"
+                id = saved_id
+
+        entry = dict(node=glossentry, term=None)
+        # typically there's more than one glossentry with this id
+        if id in entry_by_id:
+            entry_by_id[id].append(entry)
+        else:
+            entry_by_id[id] = [entry]
+
+        # unified glossterms:
+        # <glossterm>
+        #   <phrase lang="en"> ... </phrase>
+        #   <phrase lang="xx"> ... </phrase>
+        #   <phrase lang="yy"> ... </phrase>
+        #   ...
+        # </glossterm>
+        for glossterm in [child for child in glossentry.childNodes
+                           if child.nodeName == "glossterm"]:
+            for phrase in [child for child in glossterm.childNodes
+                                 if child.nodeName == "phrase"]:
+                lang = get_lang(phrase)
+                if lang.find("en") < 0: continue
+                phrase.normalize()
+                assert len(phrase.childNodes) == 1  # should be a text node
+                term = phrase.childNodes[0].nodeValue.strip()
+                entry_by_id[id][-1]['term'] = term.encode("UTF-8")
+
+    impl = xml.dom.minidom.getDOMImplementation()
+    result = impl.createDocument(
+            None,
+            "glossary",
+            impl.createDocumentType(
+                    "glossary",
+                    "-//OASIS//DTD DocBook XML V4.5//EN",
+                    "http://www.docbook.org/xml/4.5/docbookx.dtd";))
+    result.encoding = "UTF-8"
+    new_glossary = result.childNodes[-1]
+    assert new_glossary.nodeName == "glossary"
+    for attr in ('id', 'lang'):
+        new_glossary.setAttribute(attr, old_glossary.getAttribute(attr))
+
+    # append title and indexterm nodes
+    for child in (child for child in old_glossary.childNodes
+                        if child.nodeType == xml.dom.Node.ELEMENT_NODE
+                        if child.nodeName != "glossentry"):
+        new_glossary.appendChild(child.cloneNode(True))
+
+    for id in sorted(entry_by_id.keys()):
+        Logger.debug("%d %s" % (len(entry_by_id[id]), id))
+        all_langs = []
+        en_node = None
+        for entry in entry_by_id[id]:
+            node, term = entry['node'], entry['term']
+            langs = get_lang(node)
+            Logger.debug("    %s" % langs)
+            assert (langs.find("en") < 0) == (term is None)
+            all_langs.extend(langs.split(';'))
+            node = new_glossary.appendChild(node.cloneNode(True))
+            if term: en_node = node
+        if not en_node:
+            Logger.warn("removed %s (glossentry for %s)" % \
+                        (id, ",".join(all_langs)))
+            continue
+        missing_langs = [lang for lang in languages if lang not in all_langs]
+        if missing_langs:
+            en_node.setAttribute("lang", get_lang(en_node) +
+                                         ';' + ';'.join(missing_langs))
+
+    print result.toxml("UTF-8")
+
+# Main program start
+if __name__ == '__main__':
+    main()



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]