[gimp-help-2] [xml2po] Add xml2po files from gnome-doc-utils

From: Ulf-D. Ehlert <ulfehlert src gnome org>
To: svn-commits-list gnome org
Cc:
Subject: [gimp-help-2] [xml2po] Add xml2po files from gnome-doc-utils
Date: Thu, 17 Dec 2009 20:01:30 +0000 (UTC)
commit 0734cfee0f4128925223bfcaa1cf620e25cad376
Author: Ulf-D. Ehlert <ulfehlert svn gnome org>
Date:   Wed Dec 16 20:21:14 2009 +0100

    [xml2po] Add xml2po files from gnome-doc-utils
    
    Update xml2po based on gnome-doc-utils v0.18.0 (git 2009-12-09).

 Makefile.GNU                   |    2 +-
 tools/.gitignore               |    2 +
 tools/xml2po.py                |  193 +++++++++++
 tools/xml2po/.gitignore        |    2 +
 tools/xml2po/__init__.py       |  686 ++++++++++++++++++++++++++++++++++++++++
 tools/xml2po/modes/.gitignore  |    2 +
 tools/xml2po/modes/basic.py    |   80 +++++
 tools/xml2po/modes/docbook.py  |  207 ++++++++++++
 8 files changed, 1173 insertions(+), 1 deletions(-)
---
diff --git a/Makefile.GNU b/Makefile.GNU
index 4e84670..a2051fa 100644
--- a/Makefile.GNU
+++ b/Makefile.GNU
@@ -28,7 +28,7 @@ XSLTFLAGS = --nonet
 XMLLINT      = xmllint
 XMLLINTFLAGS = --nonet
 
-XML2PO = tools/xml2po
+XML2PO = tools/xml2po.py
 
 MSGWIDTH      = 79
 MSGUNIQ       = msguniq 
diff --git a/tools/.gitignore b/tools/.gitignore
new file mode 100644
index 0000000..52e4e61
--- /dev/null
+++ b/tools/.gitignore
@@ -0,0 +1,2 @@
+*.pyc
+*.pyo
diff --git a/tools/xml2po.py b/tools/xml2po.py
new file mode 100755
index 0000000..8e26ca7
--- /dev/null
+++ b/tools/xml2po.py
@@ -0,0 +1,193 @@
+#!/usr/bin/python
+# -*- encoding: utf-8 -*-
+# Copyright (c) 2004, 2005, 2006 Danilo Å egan <danilo gnome org>.
+# Copyright (c) 2009 Claude Paroz <claude 2xlibre net>.
+#
+# This file is part of xml2po.
+#
+# xml2po is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# xml2po is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with xml2po; if not, write to the Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+
+# xml2po -- translate XML documents
+VERSION = "0.18.0"
+
+# Versioning system (I use this for a long time, so lets explain it to
+# those Linux-versioning-scheme addicts):
+#   1.0.* are unstable, development versions
+#   1.1 will be first stable release (release 1), and 1.1.* bugfix releases
+#   2.0.* will be unstable-feature-development stage (milestone 1)
+#   2.1.* unstable development betas (milestone 2)
+#   2.2 second stable release (release 2), and 2.2.* bugfix releases
+#   ...
+#
+import sys
+import os
+import getopt
+import tempfile
+
+NULL_STRING = '/dev/null'
+if not os.path.exists('/dev/null'): NULL_STRING = 'NUL'
+
+def usage (with_help = False):
+    print >> sys.stderr, "Usage:  %s [OPTIONS] [XMLFILE]..." % (sys.argv[0])
+    if with_help:
+        print >> sys.stderr, """
+OPTIONS may be some of:
+    -a    --automatic-tags     Automatically decides if tags are to be considered
+                                 "final" or not
+    -k    --keep-entities      Don't expand entities
+    -e    --expand-all-entities  Expand ALL entities (including SYSTEM ones)
+    -m    --mode=TYPE          Treat tags as type TYPE (default: docbook)
+    -o    --output=FILE        Print resulting text (XML or POT) to FILE
+    -p    --po-file=FILE       Specify PO file containing translation, and merge
+    -r    --reuse=FILE         Specify translated XML file with the same structure
+    -t    --translation=FILE   Specify MO file containing translation, and merge
+    -u    --update-translation=LANG.po   Updates a PO file using msgmerge program
+
+    -l    --language=LANG      Set language of the translation to LANG
+          --mark-untranslated  Set 'xml:lang="C"' on untranslated tags
+
+    -v    --version            Output version of the xml2po program
+
+    -h    --help               Output this message
+
+EXAMPLES:
+    To create a POTemplate book.pot from input files chapter1.xml and
+    chapter2.xml, run the following:
+        %(command)s -o book.pot chapter1.xml chapter2.xml
+
+    After translating book.pot into de.po, merge the translations back,
+    using -p option for each XML file:
+        %(command)s -p de.po chapter1.xml > chapter1.de.xml
+        %(command)s -p de.po chapter2.xml > chapter2.de.xml
+""" % {'command': sys.argv[0]}
+
+
+def main(argv):
+    if not argv:
+        usage()
+        sys.exit(2)
+
+    name = os.path.join(os.path.dirname(__file__), '..')
+    if os.path.exists(os.path.join(name, 'tests')):
+        print >> sys.stderr, 'Running from source folder, modifying PYTHONPATH'
+        sys.path.insert(0, name)
+
+    from xml2po import Main
+
+    # Default parameters
+    default_mode = 'docbook'
+    operation = 'pot' # 'pot', 'merge', 'update'
+    output  = '-' # this means to stdout
+    options = {
+        'mark_untranslated'   : False,
+        'expand_entities'     : True,
+        'expand_all_entities' : False,
+    }
+    origxml = ''
+    mofile = None
+    mofile_tmppath = None
+
+    try: opts, remaining_args = getopt.getopt(argv, 'avhkem:t:o:p:u:r:l:',
+                               ['automatic-tags','version', 'help', 'keep-entities', 'expand-all-entities', 'mode=', 'translation=',
+                                'output=', 'po-file=', 'update-translation=', 'reuse=', 'language=', 'mark-untranslated' ])
+    except getopt.GetoptError:
+        usage(True)
+        sys.exit(2)
+
+    for opt, arg in opts:
+        if opt in ('-m', '--mode'):
+            default_mode = arg
+        if opt in ('-a', '--automatic-tags'):
+            default_mode = 'basic'
+        elif opt in ('-k', '--keep-entities'):
+            options['expand_entities'] = False
+        elif opt in ('--mark-untranslated',):
+            options['mark_untranslated'] = True
+        elif opt in ('-e', '--expand-all-entities'):
+            options['expand_all_entities'] = True
+        elif opt in ('-l', '--language'):
+            options['translationlanguage'] = arg
+        elif opt in ('-t', '--translation'):
+            mofile = arg
+            operation = 'merge'
+            if 'translationlanguage' not in options:
+                options['translationlanguage'] = os.path.split(os.path.splitext(mofile)[0])[1]
+        elif opt in ('-r', '--reuse'):
+            origxml = arg
+        elif opt in ('-u', '--update-translation'):
+            operation = 'update'
+            po_to_update = arg
+        elif opt in ('-p', '--po-file'):
+            mofile_handle, mofile_tmppath = tempfile.mkstemp()
+            os.close(mofile_handle)
+            pofile = arg
+            operation = 'merge'
+            if 'translationlanguage' not in options:
+                options['translationlanguage'] = os.path.split(os.path.splitext(pofile)[0])[1]
+            os.system("msgfmt -o %s %s >%s" % (mofile_tmppath, pofile, NULL_STRING)) and sys.exit(7)
+            mofile = mofile_tmppath
+        elif opt in ('-o', '--output'):
+            output = arg
+        elif opt in ('-v', '--version'):
+            print VERSION
+            sys.exit(0)
+        elif opt in ('-h', '--help'):
+            usage(True)
+            sys.exit(0)
+
+    if operation == 'update' and output != "-":
+        print >> sys.stderr, "Option '-o' is not yet supported when updating translations directly. Ignoring this option."
+
+    # Treat remaining arguments as XML files
+    filenames = []
+    while remaining_args:
+        filenames.append(remaining_args.pop())
+
+    try:
+        xml2po_main = Main(default_mode, operation, output, options)
+    except IOError:
+        print >> sys.stderr, "Error: cannot open file %s for writing." % (output)
+        sys.exit(5)
+
+    if operation == 'merge':
+        if len(filenames) > 1:
+            print  >> sys.stderr, "Error: You can merge translations with only one XML file at a time."
+            sys.exit(2)
+
+        if not mofile:
+            print >> sys.stderr, "Error: You must specify MO file when merging translations."
+            sys.exit(3)
+
+        xml2po_main.merge(mofile, filenames[0])
+
+    elif operation == 'update':
+        xml2po_main.update(filenames, po_to_update)
+
+    elif origxml:
+        xml2po_main.reuse(origxml, filenames[0])
+
+    else:
+        # Standard POT producing
+        xml2po_main.to_pot(filenames)
+
+    if mofile_tmppath:
+        os.remove(mofile_tmppath)
+
+# Main program start
+if __name__ == '__main__':
+    main(sys.argv[1:])
+else:
+    raise NotImplementedError
diff --git a/tools/xml2po/.gitignore b/tools/xml2po/.gitignore
new file mode 100644
index 0000000..52e4e61
--- /dev/null
+++ b/tools/xml2po/.gitignore
@@ -0,0 +1,2 @@
+*.pyc
+*.pyo
diff --git a/tools/xml2po/__init__.py b/tools/xml2po/__init__.py
new file mode 100644
index 0000000..23486f7
--- /dev/null
+++ b/tools/xml2po/__init__.py
@@ -0,0 +1,686 @@
+# -*- encoding: utf-8 -*-
+# Copyright (c) 2004, 2005, 2006 Danilo Å egan <danilo gnome org>.
+# Copyright (c) 2009 Claude Paroz <claude 2xlibre net>.
+#
+# This file is part of xml2po.
+#
+# xml2po is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# xml2po is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with xml2po; if not, write to the Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+import os
+import sys
+import re
+import subprocess
+import tempfile
+import gettext
+import libxml2
+
+NULL_STRING = '/dev/null'
+if not os.path.exists('/dev/null'): NULL_STRING = 'NUL'
+
+# Utility functions
+def escapePoString(text):
+    return text.replace('\\','\\\\').replace('"', "\\\"").replace("\n","\\n").replace("\t","\\t")
+
+def unEscapePoString(text):
+    return text.replace('\\"', '"').replace('\\\\','\\')
+
+class NoneTranslations:
+    def gettext(self, message):
+        return None
+
+    def lgettext(self, message):
+        return None
+
+    def ngettext(self, msgid1, msgid2, n):
+        return None
+
+    def lngettext(self, msgid1, msgid2, n):
+        return None
+
+    def ugettext(self, message):
+        return None
+
+    def ungettext(self, msgid1, msgid2, n):
+        return None
+
+class MessageOutput:
+    """ Class to abstract po/pot file """
+    def __init__(self, app):
+        self.app = app
+        self.messages = []
+        self.comments = {}
+        self.linenos = {}
+        self.nowrap = {}
+        self.translations = []
+        self.do_translations = False
+        self.output_msgstr = False # this is msgid mode for outputMessage; True is for msgstr mode
+
+    def translationsFollow(self):
+        """Indicate that what follows are translations."""
+        self.output_msgstr = True
+
+    def setFilename(self, filename):
+        self.filename = filename
+
+    def outputMessage(self, text, lineno = 0, comment = None, spacepreserve = False, tag = None):
+        """Adds a string to the list of messages."""
+        if (text.strip() != ''):
+            t = escapePoString(text)
+            if self.output_msgstr:
+                self.translations.append(t)
+                return
+
+            if self.do_translations or (not t in self.messages):
+                self.messages.append(t)
+                if spacepreserve:
+                    self.nowrap[t] = True
+                if t in self.linenos.keys():
+                    self.linenos[t].append((self.filename, tag, lineno))
+                else:
+                    self.linenos[t] = [ (self.filename, tag, lineno) ]
+                if (not self.do_translations) and comment and not t in self.comments:
+                    self.comments[t] = comment
+            else:
+                if t in self.linenos.keys():
+                    self.linenos[t].append((self.filename, tag, lineno))
+                else:
+                    self.linenos[t] = [ (self.filename, tag, lineno) ]
+                if comment and not t in self.comments:
+                    self.comments[t] = comment
+
+    def outputHeader(self, out):
+        import time
+        out.write("""msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\\n"
+"POT-Creation-Date: %s\\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
+"Last-Translator: FULL NAME <EMAIL ADDRESS>\\n"
+"Language-Team: LANGUAGE <LL li org>\\n"
+"MIME-Version: 1.0\\n"
+"Content-Type: text/plain; charset=UTF-8\\n"
+"Content-Transfer-Encoding: 8bit\\n"
+
+""" % (time.strftime("%Y-%m-%d %H:%M%z")))
+
+    def outputAll(self, out):
+        self.outputHeader(out)
+
+        for k in self.messages:
+            if k in self.comments:
+                out.write("#. %s\n" % (self.comments[k].replace("\n","\n#. ")))
+            references = ""
+            for reference in self.linenos[k]:
+                references += "%s:%d(%s) " % (reference[0], reference[2], reference[1])
+            out.write("#: %s\n" % (references.strip()))
+            if k in self.nowrap and self.nowrap[k]:
+                out.write("#, no-wrap\n")
+            out.write("msgid \"%s\"\n" % (k))
+            translation = ""
+            if self.do_translations:
+                if len(self.translations)>0:
+                    translation = self.translations.pop(0)
+            if translation == k:
+                translation = ""
+            out.write("msgstr \"%s\"\n\n" % (translation))
+
+class XMLDocument(object):
+    def __init__(self, filename, app):
+        self.app = app
+        self.expand_entities = self.app.options.get('expand_entities')
+        self.ignored_tags = self.app.current_mode.getIgnoredTags()
+        ctxt = libxml2.createFileParserCtxt(filename)
+        ctxt.lineNumbers(1)
+        if self.app.options.get('expand_all_entities'):
+            ctxt.replaceEntities(1)
+        ctxt.parseDocument()
+        self.doc = ctxt.doc()
+        if self.doc.name != filename:
+            raise Exception("Error: I tried to open '%s' but got '%s' -- how did that happen?" % (filename, self.doc.name))
+        if self.app.msg:
+            self.app.msg.setFilename(filename)
+        self.isFinalNode = self.app.current_mode.isFinalNode
+
+    def generate_messages(self):
+        self.app.msg.setFilename(self.doc.name)
+        self.doSerialize(self.doc)
+
+    def normalizeNode(self, node):
+        #print >>sys.stderr, "<%s> (%s) [%s]" % (node.name, node.type, node.serialize('utf-8'))
+        if not node:
+            return
+        elif self.app.isSpacePreserveNode(node):
+            return
+        elif node.isText():
+            if node.isBlankNode():
+                if self.app.options.get('expand_entities') or \
+                  (not (node.prev and not node.prev.isBlankNode() and node.next and not node.next.isBlankNode()) ):
+                    #print >>sys.stderr, "BLANK"
+                    node.setContent('')
+            else:
+                node.setContent(re.sub('\s+',' ', node.content))
+
+        elif node.children and node.type == 'element':
+            child = node.children
+            while child:
+                self.normalizeNode(child)
+                child = child.next
+
+    def normalizeString(self, text, spacepreserve = False):
+        """Normalizes string to be used as key for gettext lookup.
+
+        Removes all unnecessary whitespace."""
+        if spacepreserve:
+            return text
+        try:
+            # Lets add document DTD so entities are resolved
+            dtd = self.doc.intSubset()
+            tmp = dtd.serialize('utf-8')
+            tmp = tmp + '<norm>%s</norm>' % text
+        except:
+            tmp = '<norm>%s</norm>' % text
+
+        try:
+            ctxt = libxml2.createDocParserCtxt(tmp)
+            if self.app.options.get('expand_entities'):
+                ctxt.replaceEntities(1)
+            ctxt.parseDocument()
+            tree = ctxt.doc()
+            newnode = tree.getRootElement()
+        except:
+            print >> sys.stderr, """Error while normalizing string as XML:\n"%s"\n""" % (text)
+            return text
+
+        self.normalizeNode(newnode)
+
+        result = ''
+        child = newnode.children
+        while child:
+            result += child.serialize('utf-8')
+            child = child.next
+
+        result = re.sub('^ ','', result)
+        result = re.sub(' $','', result)
+        tree.freeDoc()
+
+        return result
+
+    def stringForEntity(self, node):
+        """Replaces entities in the node."""
+        text = node.serialize('utf-8')
+        try:
+            # Lets add document DTD so entities are resolved
+            dtd = self.doc.intSubset()
+            tmp = dtd.serialize('utf-8') + '<norm>%s</norm>' % text
+            next = True
+        except:
+            tmp = '<norm>%s</norm>' % text
+            next = False
+
+        ctxt = libxml2.createDocParserCtxt(tmp)
+        if self.expand_entities:
+            ctxt.replaceEntities(1)
+        ctxt.parseDocument()
+        tree = ctxt.doc()
+        if next:
+            newnode = tree.children.next
+        else:
+            newnode = tree.children
+
+        result = ''
+        child = newnode.children
+        while child:
+            result += child.serialize('utf-8')
+            child = child.next
+        tree.freeDoc()
+        return result
+
+
+    def myAttributeSerialize(self, node):
+        result = ''
+        if node.children:
+            child = node.children
+            while child:
+                if child.type=='text':
+                    result += self.doc.encodeEntitiesReentrant(child.content)
+                elif child.type=='entity_ref':
+                    if not self.expand_entities:
+                        result += '&' + child.name + ';'
+                    else:
+                        result += child.content.decode('utf-8')
+                else:
+                    result += self.myAttributeSerialize(child)
+                child = child.next
+        else:
+            result = node.serialize('utf-8')
+        return result
+
+    def startTagForNode(self, node):
+        if not node:
+            return 0
+
+        result = node.name
+        params = ''
+        if node.properties:
+            for p in node.properties:
+                if p.type == 'attribute':
+                    try:
+                        nsprop = p.ns().name + ":" + p.name
+                    except:
+                        nsprop = p.name
+                    params += " %s=\"%s\"" % (nsprop, self.myAttributeSerialize(p))
+        return result+params
+
+    def endTagForNode(self, node):
+        if not node:
+            return False
+        return node.name
+
+    def ignoreNode(self, node):
+        if self.isFinalNode(node):
+            return False
+        if node.name in self.ignored_tags or node.type in ('dtd', 'comment'):
+            return True
+        return False
+
+    def getCommentForNode(self, node):
+        """Walk through previous siblings until a comment is found, or other element.
+
+        Only whitespace is allowed between comment and current node."""
+        prev = node.prev
+        while prev and prev.type == 'text' and prev.content.strip() == '':
+            prev = prev.prev
+        if prev and prev.type == 'comment':
+            return prev.content.strip()
+        else:
+            return None
+
+    def replaceAttributeContentsWithText(self, node, text):
+        node.setContent(text)
+
+    def replaceNodeContentsWithText(self, node, text):
+        """Replaces all subnodes of a node with contents of text treated as XML."""
+
+        if node.children:
+            starttag = self.startTagForNode(node)
+            endtag = self.endTagForNode(node)
+
+            # Lets add document DTD so entities are resolved
+            tmp = '<?xml version="1.0" encoding="utf-8" ?>'
+            try:
+                dtd = self.doc.intSubset()
+                tmp = tmp + dtd.serialize('utf-8')
+            except libxml2.treeError:
+                pass
+
+            content = '<%s>%s</%s>' % (starttag, text, endtag)
+            tmp = tmp + content.encode('utf-8')
+
+            newnode = None
+            try:
+                ctxt = libxml2.createDocParserCtxt(tmp)
+                ctxt.replaceEntities(0)
+                ctxt.parseDocument()
+                newnode = ctxt.doc()
+            except:
+                pass
+
+            if not newnode:
+                print >> sys.stderr, """Error while parsing translation as XML:\n"%s"\n""" % (text.encode('utf-8'))
+                return
+
+            newelem = newnode.getRootElement()
+
+            if newelem and newelem.children:
+                free = node.children
+                while free:
+                    next = free.next
+                    free.unlinkNode()
+                    free = next
+
+                if node:
+                    copy = newelem.copyNodeList()
+                    next = node.next
+                    node.replaceNode(newelem.copyNodeList())
+                    node.next = next
+
+            else:
+                # In practice, this happens with tags such as "<para>    </para>" (only whitespace in between)
+                pass
+        else:
+            node.setContent(text)
+
+    def autoNodeIsFinal(self, node):
+        """Returns True if node is text node, contains non-whitespace text nodes or entities."""
+        if hasattr(node, '__autofinal__'):
+            return node.__autofinal__
+        if node.name in self.ignored_tags:
+            node.__autofinal__ = False
+            return False
+        if node.isText() and node.content.strip()!='':
+            node.__autofinal__ = True
+            return True
+        final = False
+        child = node.children
+        while child:
+            if child.type in ['text'] and  child.content.strip()!='':
+                final = True
+                break
+            child = child.next
+
+        node.__autofinal__ = final
+        return final
+
+
+    def worthOutputting(self, node, noauto = False):
+        """Returns True if node is "worth outputting", otherwise False.
+
+        Node is "worth outputting", if none of the parents
+        isFinalNode, and it contains non-blank text and entities.
+        """
+        if noauto and hasattr(node, '__worth__'):
+            return node.__worth__
+        elif not noauto and hasattr(node, '__autoworth__'):
+            return node.__autoworth__
+        worth = True
+        parent = node.parent
+        final = self.isFinalNode(node) and node.name not in self.ignored_tags
+        while not final and parent:
+            if self.isFinalNode(parent):
+                final = True # reset if we've got to one final tag
+            if final and (parent.name not in self.ignored_tags) and self.worthOutputting(parent):
+                worth = False
+                break
+            parent = parent.parent
+        if not worth:
+            node.__worth__ = False
+            return False
+
+        if noauto:
+            node.__worth__ = worth
+            return worth
+        else:
+            node.__autoworth__ = self.autoNodeIsFinal(node)
+            return node.__autoworth__
+
+    def processAttribute(self, node, attr):
+        if not node or not attr or not self.worthOutputting(node=node, noauto=True):
+            return
+
+        outtxt = self.normalizeString(attr.content)
+        if self.app.operation == 'merge':
+            translation = self.app.getTranslation(outtxt)
+            self.replaceAttributeContentsWithText(attr, translation.encode('utf-8'))
+        else:
+            self.app.msg.outputMessage(outtxt, node.lineNo(),  "", spacepreserve=False,
+                              tag = node.name + ":" + attr.name)
+
+    def processElementTag(self, node, replacements, restart = False):
+        """Process node with node.type == 'element'."""
+        if node.type != 'element':
+            raise Exception("You must pass node with node.type=='element'.")
+
+        # Translate attributes if needed
+        if node.properties and self.app.current_mode.getTreatedAttributes():
+            for p in node.properties:
+                if p.name in self.app.current_mode.getTreatedAttributes():
+                    self.processAttribute(node, p)
+
+        outtxt = ''
+        if restart:
+            myrepl = []
+        else:
+            myrepl = replacements
+
+        submsgs = []
+
+        child = node.children
+        while child:
+            if (self.isFinalNode(child)) or (child.type == 'element' and self.worthOutputting(child)):
+                myrepl.append(self.processElementTag(child, myrepl, True))
+                outtxt += '<placeholder-%d/>' % (len(myrepl))
+            else:
+                if child.type == 'element':
+                    (starttag, content, endtag, translation) = self.processElementTag(child, myrepl, False)
+                    outtxt += '<%s>%s</%s>' % (starttag, content, endtag)
+                else:
+                    outtxt += self.doSerialize(child)
+            child = child.next
+
+        if self.app.operation == 'merge':
+            norm_outtxt = self.normalizeString(outtxt, self.app.isSpacePreserveNode(node))
+            translation = self.app.getTranslation(norm_outtxt)
+        else:
+            translation = outtxt.decode('utf-8')
+
+        starttag = self.startTagForNode(node)
+        endtag = self.endTagForNode(node)
+
+        worth = self.worthOutputting(node)
+        if not translation:
+            translation = outtxt.decode('utf-8')
+            if worth and self.app.options.get('mark_untranslated'):
+                node.setLang('C')
+
+        if restart or worth:
+            for i, repl in enumerate(myrepl):
+                replacement = '<%s>%s</%s>' % (repl[0], repl[3], repl[2])
+                translation = translation.replace('<placeholder-%d/>' % (i+1), replacement)
+
+            if worth:
+                if self.app.operation == 'merge':
+                    self.replaceNodeContentsWithText(node, translation)
+                else:
+                    norm_outtxt = self.normalizeString(outtxt, self.app.isSpacePreserveNode(node))
+                    self.app.msg.outputMessage(norm_outtxt, node.lineNo(), self.getCommentForNode(node), self.app.isSpacePreserveNode(node), tag = node.name)
+
+        return (starttag, outtxt, endtag, translation)
+
+
+    def isExternalGeneralParsedEntity(self, node):
+        try:
+            # it would be nice if debugDumpNode could use StringIO, but it apparently cannot
+            tmp = tempfile.TemporaryFile()
+            node.debugDumpNode(tmp,0)
+            tmp.seek(0)
+            tmpstr = tmp.read()
+            tmp.close()
+        except:
+            # We fail silently, and replace all entities if we cannot
+            # write .xml2po-entitychecking
+            # !!! This is not very nice thing to do, but I don't know if
+            #     raising an exception is any better
+            return False
+        return tmpstr.find('EXTERNAL_GENERAL_PARSED_ENTITY') != -1
+
+    def doSerialize(self, node):
+        """Serializes a node and its children, emitting PO messages along the way.
+
+        node is the node to serialize, first indicates whether surrounding
+        tags should be emitted as well.
+        """
+
+        if self.ignoreNode(node):
+            return ''
+        elif not node.children:
+            return node.serialize("utf-8")
+        elif node.type == 'entity_ref':
+            if self.isExternalGeneralParsedEntity(node):
+                return node.serialize('utf-8')
+            else:
+                return self.stringForEntity(node) #content #content #serialize("utf-8")
+        elif node.type == 'entity_decl':
+            return node.serialize('utf-8') #'<%s>%s</%s>' % (startTagForNode(node), node.content, node.name)
+        elif node.type == 'text':
+            return node.serialize('utf-8')
+        elif node.type == 'element':
+            repl = []
+            (starttag, content, endtag, translation) = self.processElementTag(node, repl, True)
+            return '<%s>%s</%s>' % (starttag, content, endtag)
+        else:
+            child = node.children
+            outtxt = ''
+            while child:
+                outtxt += self.doSerialize(child)
+                child = child.next
+            return outtxt
+
+def xml_error_handler(arg, ctxt):
+    #deactivate error messages from the validation
+    pass
+
+class Main(object):
+    def __init__(self, mode, operation, output, options):
+        libxml2.registerErrorHandler(xml_error_handler, None)
+        self.operation = operation
+        self.options = options
+        self.msg = None
+        self.gt = None
+        self.current_mode = self.load_mode(mode)()
+        # Prepare output
+        if operation == 'update':
+            self.out = tempfile.TemporaryFile()
+        elif output == '-':
+            self.out = sys.stdout
+        else:
+            self.out = file(output, 'w')
+
+    def load_mode(self, modename):
+        try:
+            module = __import__('xml2po.modes.%s' % modename, globals(), locals(), ['%sXmlMode' % modename])
+            return getattr(module, '%sXmlMode' % modename)
+        except (ImportError, AttributeError):
+            if modename == 'basic':
+                sys.stderr.write("Unable to find xml2po modes. Please check your xml2po installation.\n")
+                sys.exit(1)
+            else:
+                sys.stderr.write("Unable to load mode '%s'. Falling back to 'basic' mode with automatic detection (-a).\n" % modename)
+                return self.load_mode('basic')
+
+    def to_pot(self, xmlfiles):
+        """ Produce a pot file from the list of 'xmlfiles' """
+        self.msg = MessageOutput(self)
+        for xmlfile in xmlfiles:
+            if not os.access(xmlfile, os.R_OK):
+                raise IOError("Unable to read file '%s'" % xmlfile)
+            try:
+                doc = XMLDocument(xmlfile, self)
+            except Exception, e:
+                print >> sys.stderr, "Unable to parse XML file '%s': %s" % (xmlfile, str(e))
+                sys.exit(1)
+            self.current_mode.preProcessXml(doc.doc, self.msg)
+            doc.generate_messages()
+        self.output_po()
+
+    def merge(self, mofile, xmlfile):
+        """ Merge translations from mofile into xmlfile to generate a translated XML file """
+        if not os.access(xmlfile, os.R_OK):
+            raise IOError("Unable to read file '%s'" % xmlfile)
+        try:
+            doc = XMLDocument(xmlfile, self)
+        except Exception, e:
+            print >> sys.stderr, str(e)
+            sys.exit(1)
+
+        try:
+            mfile = open(mofile, "rb")
+        except:
+            print >> sys.stderr, "Can't open MO file '%s'." % (mofile)
+        self.gt = gettext.GNUTranslations(mfile)
+        self.gt.add_fallback(NoneTranslations())
+        # Has preProcessXml use cases for merge?
+        #self.current_mode.preProcessXml(doc.doc, self.msg)
+
+        doc.doSerialize(doc.doc)
+        tcmsg = self.current_mode.getStringForTranslators()
+        outtxt = self.getTranslation(tcmsg)
+        self.current_mode.postProcessXmlTranslation(doc.doc, self.options.get('translationlanguage'), outtxt)
+        self.out.write(doc.doc.serialize('utf-8', 1))
+
+    def reuse(self, origxml, xmlfile):
+        """ Produce a po file from xmlfile pot and using translations from origxml """
+        self.msg = MessageOutput(self)
+        self.msg.do_translations = True
+        if not os.access(xmlfile, os.R_OK):
+            raise IOError("Unable to read file '%s'" % xmlfile)
+        if not os.access(origxml, os.R_OK):
+            raise IOError("Unable to read file '%s'" % xmlfile)
+        try:
+            doc = XMLDocument(xmlfile, self)
+        except Exception, e:
+            print >> sys.stderr, str(e)
+            sys.exit(1)
+        doc.generate_messages()
+
+        self.msg.translationsFollow()
+        try:
+            doc = XMLDocument(origxml, self)
+        except Exception, e:
+            print >> sys.stderr, str(e)
+            sys.exit(1)
+        doc.generate_messages()
+        self.output_po()
+
+    def update(self, xmlfiles, lang_file):
+        """ Merge the produced pot with an existing po file (lang_file) """
+        if not os.access(lang_file, os.W_OK):
+            raise IOError("'%s' does not exist or is not writable." % lang_file)
+        self.to_pot(xmlfiles)
+        lang = os.path.basename(lang_file).split(".")[0]
+
+        sys.stderr.write("Merging translations for %s: \n" % (lang))
+        self.out.seek(0)
+        merge_cmd = subprocess.Popen(["msgmerge", "-o", ".tmp.%s.po" % lang, lang_file, "-"],
+                                     stdin=self.out, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        cmdout, cmderr = merge_cmd.communicate()
+        if merge_cmd.returncode:
+             raise Exception("Error during msgmerge command.")
+        else:
+            result = subprocess.call(["mv", ".tmp.%s.po" % lang, lang_file])
+            if result:
+                raise Exception("Error: cannot rename file.")
+            else:
+                subprocess.call(["msgfmt", "-cv", "-o", NULL_STRING, lang_file])
+
+    def getTranslation(self, text):
+        """Returns a translation via gettext for specified snippet.
+
+        text should be a string to look for.
+        """
+        #print >>sys.stderr,"getTranslation('%s')" % (text.encode('utf-8'))
+        if not text or text.strip() == '':
+            return text
+        if self.gt:
+            res = self.gt.ugettext(text.decode('utf-8'))
+            return res
+
+        return text
+
+    def output_po(self):
+        """ Write the resulting po/pot file to specified output """
+        tcmsg = self.current_mode.getStringForTranslators()
+        tccom = self.current_mode.getCommentForTranslators()
+        if tcmsg:
+            self.msg.outputMessage(tcmsg, lineno=0, comment=tccom)
+
+        self.msg.outputAll(self.out)
+
+    # **** XML utility functions ****
+    def isSpacePreserveNode(self, node):
+        if node.getSpacePreserve() == 1:
+            return True
+        else:
+            return node.name in self.current_mode.getSpacePreserveTags()
+
diff --git a/tools/xml2po/modes/.gitignore b/tools/xml2po/modes/.gitignore
new file mode 100644
index 0000000..52e4e61
--- /dev/null
+++ b/tools/xml2po/modes/.gitignore
@@ -0,0 +1,2 @@
+*.pyc
+*.pyo
diff --git a/tools/xml2po/modes/__init__.py b/tools/xml2po/modes/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tools/xml2po/modes/basic.py b/tools/xml2po/modes/basic.py
new file mode 100644
index 0000000..e2ef7a4
--- /dev/null
+++ b/tools/xml2po/modes/basic.py
@@ -0,0 +1,80 @@
+# Copyright (c) 2004, 2005, 2006 Danilo Segan <danilo gnome org>.
+#
+# This file is part of xml2po.
+#
+# xml2po is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# xml2po is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with xml2po; if not, write to the Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+
+# Basic default class; inherit from it to construct other special-handling classes
+#
+
+class basicXmlMode:
+    """Abstract class for special handling of document types."""
+    def getIgnoredTags(self):
+        "Returns array of tags to be ignored."
+        return ['itemizedlist', 'orderedlist', 'variablelist', 'varlistentry']
+
+    def getFinalTags(self):
+        "Returns array of tags to be considered 'final'."
+        return ['para', 'title', 'releaseinfo', 'revnumber',
+                'date', 'itemizedlist', 'orderedlist',
+                'variablelist', 'varlistentry', 'term']
+
+    def isFinalNode(self, node):
+        #node.type =='text' or not node.children or
+        if node.type == 'element' and node.name in self.getFinalTags():
+            return True
+        elif node.children:
+            final_children = True
+            child = node.children
+            while child and final_children:
+                if not child.isBlankNode() and child.type != 'comment' and not self.isFinalNode(child):
+                    final_children = False
+                child = child.next
+            if final_children:
+                return True
+        return False
+
+    def getSpacePreserveTags(self):
+        "Returns array of tags in which spaces are to be preserved."
+        return []
+
+    def getTreatedAttributes(self):
+        "Returns array of tag attributes which content is to be translated"
+        return []
+
+    def preProcessXml(self, doc, msg):
+        "Preprocess a document and perhaps adds some messages."
+        pass
+
+    def postProcessXmlTranslation(self, doc, language, translators):
+        """Sets a language and translators in "doc" tree.
+
+        "translators" is a string consisted of translator credits.
+        "language" is a simple string.
+        "doc" is a libxml2.xmlDoc instance."""
+        pass
+
+    def getStringForTranslators(self):
+        """Returns None or a string to be added to PO files.
+
+        Common example is 'translator-credits'."""
+        return None
+
+    def getCommentForTranslators(self):
+        """Returns a comment to be added next to string for crediting translators.
+
+        It should explain the format of the string provided by getStringForTranslators()."""
+        return None
diff --git a/tools/xml2po/modes/docbook.py b/tools/xml2po/modes/docbook.py
new file mode 100644
index 0000000..276a9d9
--- /dev/null
+++ b/tools/xml2po/modes/docbook.py
@@ -0,0 +1,207 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) 2004, 2005, 2006 Danilo Segan <danilo gnome org>.
+#
+# This file is part of xml2po.
+#
+# xml2po is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# xml2po is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with xml2po; if not, write to the Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+
+# This implements special instructions for handling DocBook XML documents
+# in a better way.
+#
+#  This means:
+#   â?? better handling of nested complicated tags (i.e. definitions of
+#     ignored-tags and final-tags)
+#   â?? support for merging translator-credits back into DocBook articles
+#   â?? support for setting a language
+#
+
+# We use "currentXmlMode" class name for all modes
+#  -- it might be better to have it named docbookXmlMode, but it will make loading harder;
+#     it is also not necessary until we start supporting extracting strings from more
+#     than one document type at the same time
+#
+import re
+import libxml2
+import os
+import sys
+try:
+    # Hashlib is new in Python 2.5
+    from hashlib import md5 as md5_new
+except ImportError:
+    from md5 import new as md5_new
+
+from basic import basicXmlMode
+
+class docbookXmlMode(basicXmlMode):
+    """Class for special handling of DocBook document types.
+
+    It sets lang attribute on article elements, and adds translators
+    to articleinfo/copyright."""
+    def __init__(self):
+        self.lists = ['itemizedlist', 'orderedlist', 'variablelist',
+                      'segmentedlist', 'simplelist', 'calloutlist', 'varlistentry' ]
+        self.objects = [ 'figure', 'textobject', 'imageobject', 'mediaobject',
+                         'screenshot' ]
+
+    def getIgnoredTags(self):
+        "Returns array of tags to be ignored."
+        return  self.objects + self.lists
+
+    def getFinalTags(self):
+        "Returns array of tags to be considered 'final'."
+        return ['para', 'formalpara', 'simpara',
+                'releaseinfo', 'revnumber', 'title',
+                'date', 'term', 'programlisting'] + self.objects + self.lists
+
+    def getSpacePreserveTags(self):
+        "Returns array of tags in which spaces are to be preserved."
+        return [
+            'classsynopsisinfo',
+            'computeroutput',
+            'funcsynopsisinfo',
+            'literallayout',
+            'programlisting',
+            'screen',
+            'synopsis',
+            'userinput'
+            ]
+
+    def getStringForTranslators(self):
+        """Returns string which will be used to credit translators."""
+        return "translator-credits"
+
+    def getCommentForTranslators(self):
+        """Returns a comment to be added next to string for crediting translators."""
+        return """Put one translator per line, in the form of NAME <EMAIL>, YEAR1, YEAR2"""
+
+    def _find_articleinfo(self, node):
+        if node.name == 'articleinfo' or node.name == 'bookinfo':
+            return node
+        child = node.children
+        while child:
+            ret = self._find_articleinfo(child)
+            if ret:
+                return ret
+            child = child.next
+        return None
+
+    def _find_lastcopyright(self, node):
+        if not node.children:
+            return None
+        last = node.lastChild()
+        tmp = last
+        while tmp:
+            if tmp.name == "copyright":
+                last = tmp
+                break
+            tmp = tmp.prev
+        return last
+
+    def _md5_for_file(self, filename):
+        hash = md5_new()
+        input = open(filename, "rb")
+        read = input.read(4096)
+        while read:
+            hash.update(read)
+            read = input.read(4096)
+        input.close()
+        return hash.hexdigest()
+
+    def _output_images(self, node, msg):
+        if node and node.type=='element' and node.name=='imagedata':
+            # Use .fileref to construct new message
+            attr = node.prop("fileref")
+            if attr:
+                dir = os.path.dirname(msg.filename)
+                fullpath = os.path.join(dir, attr)
+                if os.path.exists(fullpath):
+                    hash = self._md5_for_file(fullpath)
+                else:
+                    hash = "THIS FILE DOESN'T EXIST"
+                    print >>sys.stderr, "Warning: image file '%s' not found." % fullpath
+
+                msg.outputMessage("@@image: '%s'; md5=%s" % (attr, hash), node.lineNo(),
+                                  "When image changes, this message will be marked fuzzy or untranslated for you.\n"+
+                                  "It doesn't matter what you translate it to: it's not used at all.")
+        elif node and node.children:
+            child = node.children
+            while child:
+                self._output_images(child,msg)
+                child = child.next
+
+
+    def preProcessXml(self, doc, msg):
+        """Add additional messages of interest here."""
+        root = doc.getRootElement()
+        self._output_images(root,msg)
+
+    def postProcessXmlTranslation(self, doc, language, translators):
+        """Sets a language and translators in "doc" tree.
+
+        "translators" is a string consisted of "Name <email>, years" pairs
+        of each translator, separated by newlines."""
+
+        root = doc.getRootElement()
+        # DocBook documents can be something other than article, handle that as well in the future
+        while root and root.name != 'article' and root.name != 'book':
+            root = root.next
+        if root and (root.name == 'article' or root.name == 'book'):
+            root.setProp('lang', language)
+        else:
+            return
+
+        if translators == self.getStringForTranslators():
+            return
+        elif translators:
+            # Now, lets find 'articleinfo' (it can be something else, but this goes along with 'article')
+            ai = self._find_articleinfo(root)
+            if not ai:
+                return
+
+            # Now, lets do one translator at a time
+            lines = translators.split("\n")
+            for line in lines:
+                line = line.strip()
+                match = re.match(r"^([^<,]+)\s*(?:<([^>,]+)>)?,\s*(.*)$", line)
+                if match:
+                    last = self._find_lastcopyright(ai)
+                    copy = libxml2.newNode("copyright")
+                    if last:
+                        copy = last.addNextSibling(copy)
+                    else:
+                        ai.addChild(copy)
+                    if match.group(3):
+                        copy.newChild(None, "year", match.group(3).encode('utf-8'))
+                    if match.group(1) and match.group(2):
+                        holder = match.group(1)+"(%s)" % match.group(2)
+                    elif match.group(1):
+                        holder = match.group(1)
+                    elif match.group(2):
+                        holder = match.group(2)
+                    else:
+                        holder = "???"
+                    copy.newChild(None, "holder", holder.encode('utf-8'))
+
+# Perform some tests when ran standalone
+if __name__ == '__main__':
+    test = docbookXmlMode()
+    print "Ignored tags       : " + repr(test.getIgnoredTags())
+    print "Final tags         : " + repr(test.getFinalTags())
+    print "Space-preserve tags: " + repr(test.getSpacePreserveTags())
+
+    print "Credits from string: '%s'" % test.getStringForTranslators()
+    print "Explanation for credits:\n\t'%s'" % test.getCommentForTranslators()
+
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]