[gimp-help-2] [xml2po] Add xml2po files from gnome-doc-utils
- From: Ulf-D. Ehlert <ulfehlert src gnome org>
- To: svn-commits-list gnome org
- Cc:
- Subject: [gimp-help-2] [xml2po] Add xml2po files from gnome-doc-utils
- Date: Thu, 17 Dec 2009 20:01:30 +0000 (UTC)
commit 0734cfee0f4128925223bfcaa1cf620e25cad376
Author: Ulf-D. Ehlert <ulfehlert svn gnome org>
Date: Wed Dec 16 20:21:14 2009 +0100
[xml2po] Add xml2po files from gnome-doc-utils
Update xml2po based on gnome-doc-utils v0.18.0 (git 2009-12-09).
Makefile.GNU | 2 +-
tools/.gitignore | 2 +
tools/xml2po.py | 193 +++++++++++
tools/xml2po/.gitignore | 2 +
tools/xml2po/__init__.py | 686 ++++++++++++++++++++++++++++++++++++++++
tools/xml2po/modes/.gitignore | 2 +
tools/xml2po/modes/basic.py | 80 +++++
tools/xml2po/modes/docbook.py | 207 ++++++++++++
8 files changed, 1173 insertions(+), 1 deletions(-)
---
diff --git a/Makefile.GNU b/Makefile.GNU
index 4e84670..a2051fa 100644
--- a/Makefile.GNU
+++ b/Makefile.GNU
@@ -28,7 +28,7 @@ XSLTFLAGS = --nonet
XMLLINT = xmllint
XMLLINTFLAGS = --nonet
-XML2PO = tools/xml2po
+XML2PO = tools/xml2po.py
MSGWIDTH = 79
MSGUNIQ = msguniq
diff --git a/tools/.gitignore b/tools/.gitignore
new file mode 100644
index 0000000..52e4e61
--- /dev/null
+++ b/tools/.gitignore
@@ -0,0 +1,2 @@
+*.pyc
+*.pyo
diff --git a/tools/xml2po.py b/tools/xml2po.py
new file mode 100755
index 0000000..8e26ca7
--- /dev/null
+++ b/tools/xml2po.py
@@ -0,0 +1,193 @@
+#!/usr/bin/python
+# -*- encoding: utf-8 -*-
+# Copyright (c) 2004, 2005, 2006 Danilo Å egan <danilo gnome org>.
+# Copyright (c) 2009 Claude Paroz <claude 2xlibre net>.
+#
+# This file is part of xml2po.
+#
+# xml2po is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# xml2po is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with xml2po; if not, write to the Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+# xml2po -- translate XML documents
+VERSION = "0.18.0"
+
+# Versioning system (I use this for a long time, so lets explain it to
+# those Linux-versioning-scheme addicts):
+# 1.0.* are unstable, development versions
+# 1.1 will be first stable release (release 1), and 1.1.* bugfix releases
+# 2.0.* will be unstable-feature-development stage (milestone 1)
+# 2.1.* unstable development betas (milestone 2)
+# 2.2 second stable release (release 2), and 2.2.* bugfix releases
+# ...
+#
+import sys
+import os
+import getopt
+import tempfile
+
+NULL_STRING = '/dev/null'
+if not os.path.exists('/dev/null'): NULL_STRING = 'NUL'
+
+def usage (with_help = False):
+ print >> sys.stderr, "Usage: %s [OPTIONS] [XMLFILE]..." % (sys.argv[0])
+ if with_help:
+ print >> sys.stderr, """
+OPTIONS may be some of:
+ -a --automatic-tags Automatically decides if tags are to be considered
+ "final" or not
+ -k --keep-entities Don't expand entities
+ -e --expand-all-entities Expand ALL entities (including SYSTEM ones)
+ -m --mode=TYPE Treat tags as type TYPE (default: docbook)
+ -o --output=FILE Print resulting text (XML or POT) to FILE
+ -p --po-file=FILE Specify PO file containing translation, and merge
+ -r --reuse=FILE Specify translated XML file with the same structure
+ -t --translation=FILE Specify MO file containing translation, and merge
+ -u --update-translation=LANG.po Updates a PO file using msgmerge program
+
+ -l --language=LANG Set language of the translation to LANG
+ --mark-untranslated Set 'xml:lang="C"' on untranslated tags
+
+ -v --version Output version of the xml2po program
+
+ -h --help Output this message
+
+EXAMPLES:
+ To create a POTemplate book.pot from input files chapter1.xml and
+ chapter2.xml, run the following:
+ %(command)s -o book.pot chapter1.xml chapter2.xml
+
+ After translating book.pot into de.po, merge the translations back,
+ using -p option for each XML file:
+ %(command)s -p de.po chapter1.xml > chapter1.de.xml
+ %(command)s -p de.po chapter2.xml > chapter2.de.xml
+""" % {'command': sys.argv[0]}
+
+
+def main(argv):
+ if not argv:
+ usage()
+ sys.exit(2)
+
+ name = os.path.join(os.path.dirname(__file__), '..')
+ if os.path.exists(os.path.join(name, 'tests')):
+ print >> sys.stderr, 'Running from source folder, modifying PYTHONPATH'
+ sys.path.insert(0, name)
+
+ from xml2po import Main
+
+ # Default parameters
+ default_mode = 'docbook'
+ operation = 'pot' # 'pot', 'merge', 'update'
+ output = '-' # this means to stdout
+ options = {
+ 'mark_untranslated' : False,
+ 'expand_entities' : True,
+ 'expand_all_entities' : False,
+ }
+ origxml = ''
+ mofile = None
+ mofile_tmppath = None
+
+ try: opts, remaining_args = getopt.getopt(argv, 'avhkem:t:o:p:u:r:l:',
+ ['automatic-tags','version', 'help', 'keep-entities', 'expand-all-entities', 'mode=', 'translation=',
+ 'output=', 'po-file=', 'update-translation=', 'reuse=', 'language=', 'mark-untranslated' ])
+ except getopt.GetoptError:
+ usage(True)
+ sys.exit(2)
+
+ for opt, arg in opts:
+ if opt in ('-m', '--mode'):
+ default_mode = arg
+ if opt in ('-a', '--automatic-tags'):
+ default_mode = 'basic'
+ elif opt in ('-k', '--keep-entities'):
+ options['expand_entities'] = False
+ elif opt in ('--mark-untranslated',):
+ options['mark_untranslated'] = True
+ elif opt in ('-e', '--expand-all-entities'):
+ options['expand_all_entities'] = True
+ elif opt in ('-l', '--language'):
+ options['translationlanguage'] = arg
+ elif opt in ('-t', '--translation'):
+ mofile = arg
+ operation = 'merge'
+ if 'translationlanguage' not in options:
+ options['translationlanguage'] = os.path.split(os.path.splitext(mofile)[0])[1]
+ elif opt in ('-r', '--reuse'):
+ origxml = arg
+ elif opt in ('-u', '--update-translation'):
+ operation = 'update'
+ po_to_update = arg
+ elif opt in ('-p', '--po-file'):
+ mofile_handle, mofile_tmppath = tempfile.mkstemp()
+ os.close(mofile_handle)
+ pofile = arg
+ operation = 'merge'
+ if 'translationlanguage' not in options:
+ options['translationlanguage'] = os.path.split(os.path.splitext(pofile)[0])[1]
+ os.system("msgfmt -o %s %s >%s" % (mofile_tmppath, pofile, NULL_STRING)) and sys.exit(7)
+ mofile = mofile_tmppath
+ elif opt in ('-o', '--output'):
+ output = arg
+ elif opt in ('-v', '--version'):
+ print VERSION
+ sys.exit(0)
+ elif opt in ('-h', '--help'):
+ usage(True)
+ sys.exit(0)
+
+ if operation == 'update' and output != "-":
+ print >> sys.stderr, "Option '-o' is not yet supported when updating translations directly. Ignoring this option."
+
+ # Treat remaining arguments as XML files
+ filenames = []
+ while remaining_args:
+ filenames.append(remaining_args.pop())
+
+ try:
+ xml2po_main = Main(default_mode, operation, output, options)
+ except IOError:
+ print >> sys.stderr, "Error: cannot open file %s for writing." % (output)
+ sys.exit(5)
+
+ if operation == 'merge':
+ if len(filenames) > 1:
+ print >> sys.stderr, "Error: You can merge translations with only one XML file at a time."
+ sys.exit(2)
+
+ if not mofile:
+ print >> sys.stderr, "Error: You must specify MO file when merging translations."
+ sys.exit(3)
+
+ xml2po_main.merge(mofile, filenames[0])
+
+ elif operation == 'update':
+ xml2po_main.update(filenames, po_to_update)
+
+ elif origxml:
+ xml2po_main.reuse(origxml, filenames[0])
+
+ else:
+ # Standard POT producing
+ xml2po_main.to_pot(filenames)
+
+ if mofile_tmppath:
+ os.remove(mofile_tmppath)
+
+# Main program start
+if __name__ == '__main__':
+ main(sys.argv[1:])
+else:
+ raise NotImplementedError
diff --git a/tools/xml2po/.gitignore b/tools/xml2po/.gitignore
new file mode 100644
index 0000000..52e4e61
--- /dev/null
+++ b/tools/xml2po/.gitignore
@@ -0,0 +1,2 @@
+*.pyc
+*.pyo
diff --git a/tools/xml2po/__init__.py b/tools/xml2po/__init__.py
new file mode 100644
index 0000000..23486f7
--- /dev/null
+++ b/tools/xml2po/__init__.py
@@ -0,0 +1,686 @@
+# -*- encoding: utf-8 -*-
+# Copyright (c) 2004, 2005, 2006 Danilo Å egan <danilo gnome org>.
+# Copyright (c) 2009 Claude Paroz <claude 2xlibre net>.
+#
+# This file is part of xml2po.
+#
+# xml2po is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# xml2po is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with xml2po; if not, write to the Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+import os
+import sys
+import re
+import subprocess
+import tempfile
+import gettext
+import libxml2
+
+NULL_STRING = '/dev/null'
+if not os.path.exists('/dev/null'): NULL_STRING = 'NUL'
+
+# Utility functions
+def escapePoString(text):
+ return text.replace('\\','\\\\').replace('"', "\\\"").replace("\n","\\n").replace("\t","\\t")
+
+def unEscapePoString(text):
+ return text.replace('\\"', '"').replace('\\\\','\\')
+
+class NoneTranslations:
+ def gettext(self, message):
+ return None
+
+ def lgettext(self, message):
+ return None
+
+ def ngettext(self, msgid1, msgid2, n):
+ return None
+
+ def lngettext(self, msgid1, msgid2, n):
+ return None
+
+ def ugettext(self, message):
+ return None
+
+ def ungettext(self, msgid1, msgid2, n):
+ return None
+
+class MessageOutput:
+ """ Class to abstract po/pot file """
+ def __init__(self, app):
+ self.app = app
+ self.messages = []
+ self.comments = {}
+ self.linenos = {}
+ self.nowrap = {}
+ self.translations = []
+ self.do_translations = False
+ self.output_msgstr = False # this is msgid mode for outputMessage; True is for msgstr mode
+
+ def translationsFollow(self):
+ """Indicate that what follows are translations."""
+ self.output_msgstr = True
+
+ def setFilename(self, filename):
+ self.filename = filename
+
+ def outputMessage(self, text, lineno = 0, comment = None, spacepreserve = False, tag = None):
+ """Adds a string to the list of messages."""
+ if (text.strip() != ''):
+ t = escapePoString(text)
+ if self.output_msgstr:
+ self.translations.append(t)
+ return
+
+ if self.do_translations or (not t in self.messages):
+ self.messages.append(t)
+ if spacepreserve:
+ self.nowrap[t] = True
+ if t in self.linenos.keys():
+ self.linenos[t].append((self.filename, tag, lineno))
+ else:
+ self.linenos[t] = [ (self.filename, tag, lineno) ]
+ if (not self.do_translations) and comment and not t in self.comments:
+ self.comments[t] = comment
+ else:
+ if t in self.linenos.keys():
+ self.linenos[t].append((self.filename, tag, lineno))
+ else:
+ self.linenos[t] = [ (self.filename, tag, lineno) ]
+ if comment and not t in self.comments:
+ self.comments[t] = comment
+
+ def outputHeader(self, out):
+ import time
+ out.write("""msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\\n"
+"POT-Creation-Date: %s\\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
+"Last-Translator: FULL NAME <EMAIL ADDRESS>\\n"
+"Language-Team: LANGUAGE <LL li org>\\n"
+"MIME-Version: 1.0\\n"
+"Content-Type: text/plain; charset=UTF-8\\n"
+"Content-Transfer-Encoding: 8bit\\n"
+
+""" % (time.strftime("%Y-%m-%d %H:%M%z")))
+
+ def outputAll(self, out):
+ self.outputHeader(out)
+
+ for k in self.messages:
+ if k in self.comments:
+ out.write("#. %s\n" % (self.comments[k].replace("\n","\n#. ")))
+ references = ""
+ for reference in self.linenos[k]:
+ references += "%s:%d(%s) " % (reference[0], reference[2], reference[1])
+ out.write("#: %s\n" % (references.strip()))
+ if k in self.nowrap and self.nowrap[k]:
+ out.write("#, no-wrap\n")
+ out.write("msgid \"%s\"\n" % (k))
+ translation = ""
+ if self.do_translations:
+ if len(self.translations)>0:
+ translation = self.translations.pop(0)
+ if translation == k:
+ translation = ""
+ out.write("msgstr \"%s\"\n\n" % (translation))
+
+class XMLDocument(object):
+ def __init__(self, filename, app):
+ self.app = app
+ self.expand_entities = self.app.options.get('expand_entities')
+ self.ignored_tags = self.app.current_mode.getIgnoredTags()
+ ctxt = libxml2.createFileParserCtxt(filename)
+ ctxt.lineNumbers(1)
+ if self.app.options.get('expand_all_entities'):
+ ctxt.replaceEntities(1)
+ ctxt.parseDocument()
+ self.doc = ctxt.doc()
+ if self.doc.name != filename:
+ raise Exception("Error: I tried to open '%s' but got '%s' -- how did that happen?" % (filename, self.doc.name))
+ if self.app.msg:
+ self.app.msg.setFilename(filename)
+ self.isFinalNode = self.app.current_mode.isFinalNode
+
+ def generate_messages(self):
+ self.app.msg.setFilename(self.doc.name)
+ self.doSerialize(self.doc)
+
+ def normalizeNode(self, node):
+ #print >>sys.stderr, "<%s> (%s) [%s]" % (node.name, node.type, node.serialize('utf-8'))
+ if not node:
+ return
+ elif self.app.isSpacePreserveNode(node):
+ return
+ elif node.isText():
+ if node.isBlankNode():
+ if self.app.options.get('expand_entities') or \
+ (not (node.prev and not node.prev.isBlankNode() and node.next and not node.next.isBlankNode()) ):
+ #print >>sys.stderr, "BLANK"
+ node.setContent('')
+ else:
+ node.setContent(re.sub('\s+',' ', node.content))
+
+ elif node.children and node.type == 'element':
+ child = node.children
+ while child:
+ self.normalizeNode(child)
+ child = child.next
+
+ def normalizeString(self, text, spacepreserve = False):
+ """Normalizes string to be used as key for gettext lookup.
+
+ Removes all unnecessary whitespace."""
+ if spacepreserve:
+ return text
+ try:
+ # Lets add document DTD so entities are resolved
+ dtd = self.doc.intSubset()
+ tmp = dtd.serialize('utf-8')
+ tmp = tmp + '<norm>%s</norm>' % text
+ except:
+ tmp = '<norm>%s</norm>' % text
+
+ try:
+ ctxt = libxml2.createDocParserCtxt(tmp)
+ if self.app.options.get('expand_entities'):
+ ctxt.replaceEntities(1)
+ ctxt.parseDocument()
+ tree = ctxt.doc()
+ newnode = tree.getRootElement()
+ except:
+ print >> sys.stderr, """Error while normalizing string as XML:\n"%s"\n""" % (text)
+ return text
+
+ self.normalizeNode(newnode)
+
+ result = ''
+ child = newnode.children
+ while child:
+ result += child.serialize('utf-8')
+ child = child.next
+
+ result = re.sub('^ ','', result)
+ result = re.sub(' $','', result)
+ tree.freeDoc()
+
+ return result
+
+ def stringForEntity(self, node):
+ """Replaces entities in the node."""
+ text = node.serialize('utf-8')
+ try:
+ # Lets add document DTD so entities are resolved
+ dtd = self.doc.intSubset()
+ tmp = dtd.serialize('utf-8') + '<norm>%s</norm>' % text
+ next = True
+ except:
+ tmp = '<norm>%s</norm>' % text
+ next = False
+
+ ctxt = libxml2.createDocParserCtxt(tmp)
+ if self.expand_entities:
+ ctxt.replaceEntities(1)
+ ctxt.parseDocument()
+ tree = ctxt.doc()
+ if next:
+ newnode = tree.children.next
+ else:
+ newnode = tree.children
+
+ result = ''
+ child = newnode.children
+ while child:
+ result += child.serialize('utf-8')
+ child = child.next
+ tree.freeDoc()
+ return result
+
+
+ def myAttributeSerialize(self, node):
+ result = ''
+ if node.children:
+ child = node.children
+ while child:
+ if child.type=='text':
+ result += self.doc.encodeEntitiesReentrant(child.content)
+ elif child.type=='entity_ref':
+ if not self.expand_entities:
+ result += '&' + child.name + ';'
+ else:
+ result += child.content.decode('utf-8')
+ else:
+ result += self.myAttributeSerialize(child)
+ child = child.next
+ else:
+ result = node.serialize('utf-8')
+ return result
+
+ def startTagForNode(self, node):
+ if not node:
+ return 0
+
+ result = node.name
+ params = ''
+ if node.properties:
+ for p in node.properties:
+ if p.type == 'attribute':
+ try:
+ nsprop = p.ns().name + ":" + p.name
+ except:
+ nsprop = p.name
+ params += " %s=\"%s\"" % (nsprop, self.myAttributeSerialize(p))
+ return result+params
+
+ def endTagForNode(self, node):
+ if not node:
+ return False
+ return node.name
+
+ def ignoreNode(self, node):
+ if self.isFinalNode(node):
+ return False
+ if node.name in self.ignored_tags or node.type in ('dtd', 'comment'):
+ return True
+ return False
+
+ def getCommentForNode(self, node):
+ """Walk through previous siblings until a comment is found, or other element.
+
+ Only whitespace is allowed between comment and current node."""
+ prev = node.prev
+ while prev and prev.type == 'text' and prev.content.strip() == '':
+ prev = prev.prev
+ if prev and prev.type == 'comment':
+ return prev.content.strip()
+ else:
+ return None
+
+ def replaceAttributeContentsWithText(self, node, text):
+ node.setContent(text)
+
+ def replaceNodeContentsWithText(self, node, text):
+ """Replaces all subnodes of a node with contents of text treated as XML."""
+
+ if node.children:
+ starttag = self.startTagForNode(node)
+ endtag = self.endTagForNode(node)
+
+ # Lets add document DTD so entities are resolved
+ tmp = '<?xml version="1.0" encoding="utf-8" ?>'
+ try:
+ dtd = self.doc.intSubset()
+ tmp = tmp + dtd.serialize('utf-8')
+ except libxml2.treeError:
+ pass
+
+ content = '<%s>%s</%s>' % (starttag, text, endtag)
+ tmp = tmp + content.encode('utf-8')
+
+ newnode = None
+ try:
+ ctxt = libxml2.createDocParserCtxt(tmp)
+ ctxt.replaceEntities(0)
+ ctxt.parseDocument()
+ newnode = ctxt.doc()
+ except:
+ pass
+
+ if not newnode:
+ print >> sys.stderr, """Error while parsing translation as XML:\n"%s"\n""" % (text.encode('utf-8'))
+ return
+
+ newelem = newnode.getRootElement()
+
+ if newelem and newelem.children:
+ free = node.children
+ while free:
+ next = free.next
+ free.unlinkNode()
+ free = next
+
+ if node:
+ copy = newelem.copyNodeList()
+ next = node.next
+ node.replaceNode(newelem.copyNodeList())
+ node.next = next
+
+ else:
+ # In practice, this happens with tags such as "<para> </para>" (only whitespace in between)
+ pass
+ else:
+ node.setContent(text)
+
+ def autoNodeIsFinal(self, node):
+ """Returns True if node is text node, contains non-whitespace text nodes or entities."""
+ if hasattr(node, '__autofinal__'):
+ return node.__autofinal__
+ if node.name in self.ignored_tags:
+ node.__autofinal__ = False
+ return False
+ if node.isText() and node.content.strip()!='':
+ node.__autofinal__ = True
+ return True
+ final = False
+ child = node.children
+ while child:
+ if child.type in ['text'] and child.content.strip()!='':
+ final = True
+ break
+ child = child.next
+
+ node.__autofinal__ = final
+ return final
+
+
+ def worthOutputting(self, node, noauto = False):
+ """Returns True if node is "worth outputting", otherwise False.
+
+ Node is "worth outputting", if none of the parents
+ isFinalNode, and it contains non-blank text and entities.
+ """
+ if noauto and hasattr(node, '__worth__'):
+ return node.__worth__
+ elif not noauto and hasattr(node, '__autoworth__'):
+ return node.__autoworth__
+ worth = True
+ parent = node.parent
+ final = self.isFinalNode(node) and node.name not in self.ignored_tags
+ while not final and parent:
+ if self.isFinalNode(parent):
+ final = True # reset if we've got to one final tag
+ if final and (parent.name not in self.ignored_tags) and self.worthOutputting(parent):
+ worth = False
+ break
+ parent = parent.parent
+ if not worth:
+ node.__worth__ = False
+ return False
+
+ if noauto:
+ node.__worth__ = worth
+ return worth
+ else:
+ node.__autoworth__ = self.autoNodeIsFinal(node)
+ return node.__autoworth__
+
+ def processAttribute(self, node, attr):
+ if not node or not attr or not self.worthOutputting(node=node, noauto=True):
+ return
+
+ outtxt = self.normalizeString(attr.content)
+ if self.app.operation == 'merge':
+ translation = self.app.getTranslation(outtxt)
+ self.replaceAttributeContentsWithText(attr, translation.encode('utf-8'))
+ else:
+ self.app.msg.outputMessage(outtxt, node.lineNo(), "", spacepreserve=False,
+ tag = node.name + ":" + attr.name)
+
+ def processElementTag(self, node, replacements, restart = False):
+ """Process node with node.type == 'element'."""
+ if node.type != 'element':
+ raise Exception("You must pass node with node.type=='element'.")
+
+ # Translate attributes if needed
+ if node.properties and self.app.current_mode.getTreatedAttributes():
+ for p in node.properties:
+ if p.name in self.app.current_mode.getTreatedAttributes():
+ self.processAttribute(node, p)
+
+ outtxt = ''
+ if restart:
+ myrepl = []
+ else:
+ myrepl = replacements
+
+ submsgs = []
+
+ child = node.children
+ while child:
+ if (self.isFinalNode(child)) or (child.type == 'element' and self.worthOutputting(child)):
+ myrepl.append(self.processElementTag(child, myrepl, True))
+ outtxt += '<placeholder-%d/>' % (len(myrepl))
+ else:
+ if child.type == 'element':
+ (starttag, content, endtag, translation) = self.processElementTag(child, myrepl, False)
+ outtxt += '<%s>%s</%s>' % (starttag, content, endtag)
+ else:
+ outtxt += self.doSerialize(child)
+ child = child.next
+
+ if self.app.operation == 'merge':
+ norm_outtxt = self.normalizeString(outtxt, self.app.isSpacePreserveNode(node))
+ translation = self.app.getTranslation(norm_outtxt)
+ else:
+ translation = outtxt.decode('utf-8')
+
+ starttag = self.startTagForNode(node)
+ endtag = self.endTagForNode(node)
+
+ worth = self.worthOutputting(node)
+ if not translation:
+ translation = outtxt.decode('utf-8')
+ if worth and self.app.options.get('mark_untranslated'):
+ node.setLang('C')
+
+ if restart or worth:
+ for i, repl in enumerate(myrepl):
+ replacement = '<%s>%s</%s>' % (repl[0], repl[3], repl[2])
+ translation = translation.replace('<placeholder-%d/>' % (i+1), replacement)
+
+ if worth:
+ if self.app.operation == 'merge':
+ self.replaceNodeContentsWithText(node, translation)
+ else:
+ norm_outtxt = self.normalizeString(outtxt, self.app.isSpacePreserveNode(node))
+ self.app.msg.outputMessage(norm_outtxt, node.lineNo(), self.getCommentForNode(node), self.app.isSpacePreserveNode(node), tag = node.name)
+
+ return (starttag, outtxt, endtag, translation)
+
+
+ def isExternalGeneralParsedEntity(self, node):
+ try:
+ # it would be nice if debugDumpNode could use StringIO, but it apparently cannot
+ tmp = tempfile.TemporaryFile()
+ node.debugDumpNode(tmp,0)
+ tmp.seek(0)
+ tmpstr = tmp.read()
+ tmp.close()
+ except:
+ # We fail silently, and replace all entities if we cannot
+ # write .xml2po-entitychecking
+ # !!! This is not very nice thing to do, but I don't know if
+ # raising an exception is any better
+ return False
+ return tmpstr.find('EXTERNAL_GENERAL_PARSED_ENTITY') != -1
+
+ def doSerialize(self, node):
+ """Serializes a node and its children, emitting PO messages along the way.
+
+ node is the node to serialize, first indicates whether surrounding
+ tags should be emitted as well.
+ """
+
+ if self.ignoreNode(node):
+ return ''
+ elif not node.children:
+ return node.serialize("utf-8")
+ elif node.type == 'entity_ref':
+ if self.isExternalGeneralParsedEntity(node):
+ return node.serialize('utf-8')
+ else:
+ return self.stringForEntity(node) #content #content #serialize("utf-8")
+ elif node.type == 'entity_decl':
+ return node.serialize('utf-8') #'<%s>%s</%s>' % (startTagForNode(node), node.content, node.name)
+ elif node.type == 'text':
+ return node.serialize('utf-8')
+ elif node.type == 'element':
+ repl = []
+ (starttag, content, endtag, translation) = self.processElementTag(node, repl, True)
+ return '<%s>%s</%s>' % (starttag, content, endtag)
+ else:
+ child = node.children
+ outtxt = ''
+ while child:
+ outtxt += self.doSerialize(child)
+ child = child.next
+ return outtxt
+
+def xml_error_handler(arg, ctxt):
+ #deactivate error messages from the validation
+ pass
+
+class Main(object):
+ def __init__(self, mode, operation, output, options):
+ libxml2.registerErrorHandler(xml_error_handler, None)
+ self.operation = operation
+ self.options = options
+ self.msg = None
+ self.gt = None
+ self.current_mode = self.load_mode(mode)()
+ # Prepare output
+ if operation == 'update':
+ self.out = tempfile.TemporaryFile()
+ elif output == '-':
+ self.out = sys.stdout
+ else:
+ self.out = file(output, 'w')
+
+ def load_mode(self, modename):
+ try:
+ module = __import__('xml2po.modes.%s' % modename, globals(), locals(), ['%sXmlMode' % modename])
+ return getattr(module, '%sXmlMode' % modename)
+ except (ImportError, AttributeError):
+ if modename == 'basic':
+ sys.stderr.write("Unable to find xml2po modes. Please check your xml2po installation.\n")
+ sys.exit(1)
+ else:
+ sys.stderr.write("Unable to load mode '%s'. Falling back to 'basic' mode with automatic detection (-a).\n" % modename)
+ return self.load_mode('basic')
+
+ def to_pot(self, xmlfiles):
+ """ Produce a pot file from the list of 'xmlfiles' """
+ self.msg = MessageOutput(self)
+ for xmlfile in xmlfiles:
+ if not os.access(xmlfile, os.R_OK):
+ raise IOError("Unable to read file '%s'" % xmlfile)
+ try:
+ doc = XMLDocument(xmlfile, self)
+ except Exception, e:
+ print >> sys.stderr, "Unable to parse XML file '%s': %s" % (xmlfile, str(e))
+ sys.exit(1)
+ self.current_mode.preProcessXml(doc.doc, self.msg)
+ doc.generate_messages()
+ self.output_po()
+
+ def merge(self, mofile, xmlfile):
+ """ Merge translations from mofile into xmlfile to generate a translated XML file """
+ if not os.access(xmlfile, os.R_OK):
+ raise IOError("Unable to read file '%s'" % xmlfile)
+ try:
+ doc = XMLDocument(xmlfile, self)
+ except Exception, e:
+ print >> sys.stderr, str(e)
+ sys.exit(1)
+
+ try:
+ mfile = open(mofile, "rb")
+ except:
+ print >> sys.stderr, "Can't open MO file '%s'." % (mofile)
+ self.gt = gettext.GNUTranslations(mfile)
+ self.gt.add_fallback(NoneTranslations())
+ # Has preProcessXml use cases for merge?
+ #self.current_mode.preProcessXml(doc.doc, self.msg)
+
+ doc.doSerialize(doc.doc)
+ tcmsg = self.current_mode.getStringForTranslators()
+ outtxt = self.getTranslation(tcmsg)
+ self.current_mode.postProcessXmlTranslation(doc.doc, self.options.get('translationlanguage'), outtxt)
+ self.out.write(doc.doc.serialize('utf-8', 1))
+
+ def reuse(self, origxml, xmlfile):
+ """ Produce a po file from xmlfile pot and using translations from origxml """
+ self.msg = MessageOutput(self)
+ self.msg.do_translations = True
+ if not os.access(xmlfile, os.R_OK):
+ raise IOError("Unable to read file '%s'" % xmlfile)
+ if not os.access(origxml, os.R_OK):
+ raise IOError("Unable to read file '%s'" % xmlfile)
+ try:
+ doc = XMLDocument(xmlfile, self)
+ except Exception, e:
+ print >> sys.stderr, str(e)
+ sys.exit(1)
+ doc.generate_messages()
+
+ self.msg.translationsFollow()
+ try:
+ doc = XMLDocument(origxml, self)
+ except Exception, e:
+ print >> sys.stderr, str(e)
+ sys.exit(1)
+ doc.generate_messages()
+ self.output_po()
+
+ def update(self, xmlfiles, lang_file):
+ """ Merge the produced pot with an existing po file (lang_file) """
+ if not os.access(lang_file, os.W_OK):
+ raise IOError("'%s' does not exist or is not writable." % lang_file)
+ self.to_pot(xmlfiles)
+ lang = os.path.basename(lang_file).split(".")[0]
+
+ sys.stderr.write("Merging translations for %s: \n" % (lang))
+ self.out.seek(0)
+ merge_cmd = subprocess.Popen(["msgmerge", "-o", ".tmp.%s.po" % lang, lang_file, "-"],
+ stdin=self.out, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ cmdout, cmderr = merge_cmd.communicate()
+ if merge_cmd.returncode:
+ raise Exception("Error during msgmerge command.")
+ else:
+ result = subprocess.call(["mv", ".tmp.%s.po" % lang, lang_file])
+ if result:
+ raise Exception("Error: cannot rename file.")
+ else:
+ subprocess.call(["msgfmt", "-cv", "-o", NULL_STRING, lang_file])
+
+ def getTranslation(self, text):
+ """Returns a translation via gettext for specified snippet.
+
+ text should be a string to look for.
+ """
+ #print >>sys.stderr,"getTranslation('%s')" % (text.encode('utf-8'))
+ if not text or text.strip() == '':
+ return text
+ if self.gt:
+ res = self.gt.ugettext(text.decode('utf-8'))
+ return res
+
+ return text
+
+ def output_po(self):
+ """ Write the resulting po/pot file to specified output """
+ tcmsg = self.current_mode.getStringForTranslators()
+ tccom = self.current_mode.getCommentForTranslators()
+ if tcmsg:
+ self.msg.outputMessage(tcmsg, lineno=0, comment=tccom)
+
+ self.msg.outputAll(self.out)
+
+ # **** XML utility functions ****
+ def isSpacePreserveNode(self, node):
+ if node.getSpacePreserve() == 1:
+ return True
+ else:
+ return node.name in self.current_mode.getSpacePreserveTags()
+
diff --git a/tools/xml2po/modes/.gitignore b/tools/xml2po/modes/.gitignore
new file mode 100644
index 0000000..52e4e61
--- /dev/null
+++ b/tools/xml2po/modes/.gitignore
@@ -0,0 +1,2 @@
+*.pyc
+*.pyo
diff --git a/tools/xml2po/modes/__init__.py b/tools/xml2po/modes/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tools/xml2po/modes/basic.py b/tools/xml2po/modes/basic.py
new file mode 100644
index 0000000..e2ef7a4
--- /dev/null
+++ b/tools/xml2po/modes/basic.py
@@ -0,0 +1,80 @@
+# Copyright (c) 2004, 2005, 2006 Danilo Segan <danilo gnome org>.
+#
+# This file is part of xml2po.
+#
+# xml2po is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# xml2po is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with xml2po; if not, write to the Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+# Basic default class; inherit from it to construct other special-handling classes
+#
+
+class basicXmlMode:
+ """Abstract class for special handling of document types."""
+ def getIgnoredTags(self):
+ "Returns array of tags to be ignored."
+ return ['itemizedlist', 'orderedlist', 'variablelist', 'varlistentry']
+
+ def getFinalTags(self):
+ "Returns array of tags to be considered 'final'."
+ return ['para', 'title', 'releaseinfo', 'revnumber',
+ 'date', 'itemizedlist', 'orderedlist',
+ 'variablelist', 'varlistentry', 'term']
+
+ def isFinalNode(self, node):
+ #node.type =='text' or not node.children or
+ if node.type == 'element' and node.name in self.getFinalTags():
+ return True
+ elif node.children:
+ final_children = True
+ child = node.children
+ while child and final_children:
+ if not child.isBlankNode() and child.type != 'comment' and not self.isFinalNode(child):
+ final_children = False
+ child = child.next
+ if final_children:
+ return True
+ return False
+
+ def getSpacePreserveTags(self):
+ "Returns array of tags in which spaces are to be preserved."
+ return []
+
+ def getTreatedAttributes(self):
+ "Returns array of tag attributes which content is to be translated"
+ return []
+
+ def preProcessXml(self, doc, msg):
+ "Preprocess a document and perhaps adds some messages."
+ pass
+
+ def postProcessXmlTranslation(self, doc, language, translators):
+ """Sets a language and translators in "doc" tree.
+
+ "translators" is a string consisted of translator credits.
+ "language" is a simple string.
+ "doc" is a libxml2.xmlDoc instance."""
+ pass
+
+ def getStringForTranslators(self):
+ """Returns None or a string to be added to PO files.
+
+ Common example is 'translator-credits'."""
+ return None
+
+ def getCommentForTranslators(self):
+ """Returns a comment to be added next to string for crediting translators.
+
+ It should explain the format of the string provided by getStringForTranslators()."""
+ return None
diff --git a/tools/xml2po/modes/docbook.py b/tools/xml2po/modes/docbook.py
new file mode 100644
index 0000000..276a9d9
--- /dev/null
+++ b/tools/xml2po/modes/docbook.py
@@ -0,0 +1,207 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) 2004, 2005, 2006 Danilo Segan <danilo gnome org>.
+#
+# This file is part of xml2po.
+#
+# xml2po is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# xml2po is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with xml2po; if not, write to the Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+# This implements special instructions for handling DocBook XML documents
+# in a better way.
+#
+# This means:
+# â?? better handling of nested complicated tags (i.e. definitions of
+# ignored-tags and final-tags)
+# â?? support for merging translator-credits back into DocBook articles
+# â?? support for setting a language
+#
+
+# We use "currentXmlMode" class name for all modes
+# -- it might be better to have it named docbookXmlMode, but it will make loading harder;
+# it is also not necessary until we start supporting extracting strings from more
+# than one document type at the same time
+#
+import re
+import libxml2
+import os
+import sys
+try:
+ # Hashlib is new in Python 2.5
+ from hashlib import md5 as md5_new
+except ImportError:
+ from md5 import new as md5_new
+
+from basic import basicXmlMode
+
+class docbookXmlMode(basicXmlMode):
+ """Class for special handling of DocBook document types.
+
+ It sets lang attribute on article elements, and adds translators
+ to articleinfo/copyright."""
+ def __init__(self):
+ self.lists = ['itemizedlist', 'orderedlist', 'variablelist',
+ 'segmentedlist', 'simplelist', 'calloutlist', 'varlistentry' ]
+ self.objects = [ 'figure', 'textobject', 'imageobject', 'mediaobject',
+ 'screenshot' ]
+
+ def getIgnoredTags(self):
+ "Returns array of tags to be ignored."
+ return self.objects + self.lists
+
+ def getFinalTags(self):
+ "Returns array of tags to be considered 'final'."
+ return ['para', 'formalpara', 'simpara',
+ 'releaseinfo', 'revnumber', 'title',
+ 'date', 'term', 'programlisting'] + self.objects + self.lists
+
+ def getSpacePreserveTags(self):
+ "Returns array of tags in which spaces are to be preserved."
+ return [
+ 'classsynopsisinfo',
+ 'computeroutput',
+ 'funcsynopsisinfo',
+ 'literallayout',
+ 'programlisting',
+ 'screen',
+ 'synopsis',
+ 'userinput'
+ ]
+
+ def getStringForTranslators(self):
+ """Returns string which will be used to credit translators."""
+ return "translator-credits"
+
+ def getCommentForTranslators(self):
+ """Returns a comment to be added next to string for crediting translators."""
+ return """Put one translator per line, in the form of NAME <EMAIL>, YEAR1, YEAR2"""
+
+ def _find_articleinfo(self, node):
+ if node.name == 'articleinfo' or node.name == 'bookinfo':
+ return node
+ child = node.children
+ while child:
+ ret = self._find_articleinfo(child)
+ if ret:
+ return ret
+ child = child.next
+ return None
+
+ def _find_lastcopyright(self, node):
+ if not node.children:
+ return None
+ last = node.lastChild()
+ tmp = last
+ while tmp:
+ if tmp.name == "copyright":
+ last = tmp
+ break
+ tmp = tmp.prev
+ return last
+
+ def _md5_for_file(self, filename):
+ hash = md5_new()
+ input = open(filename, "rb")
+ read = input.read(4096)
+ while read:
+ hash.update(read)
+ read = input.read(4096)
+ input.close()
+ return hash.hexdigest()
+
+ def _output_images(self, node, msg):
+ if node and node.type=='element' and node.name=='imagedata':
+ # Use .fileref to construct new message
+ attr = node.prop("fileref")
+ if attr:
+ dir = os.path.dirname(msg.filename)
+ fullpath = os.path.join(dir, attr)
+ if os.path.exists(fullpath):
+ hash = self._md5_for_file(fullpath)
+ else:
+ hash = "THIS FILE DOESN'T EXIST"
+ print >>sys.stderr, "Warning: image file '%s' not found." % fullpath
+
+ msg.outputMessage("@@image: '%s'; md5=%s" % (attr, hash), node.lineNo(),
+ "When image changes, this message will be marked fuzzy or untranslated for you.\n"+
+ "It doesn't matter what you translate it to: it's not used at all.")
+ elif node and node.children:
+ child = node.children
+ while child:
+ self._output_images(child,msg)
+ child = child.next
+
+
+ def preProcessXml(self, doc, msg):
+ """Add additional messages of interest here."""
+ root = doc.getRootElement()
+ self._output_images(root,msg)
+
+ def postProcessXmlTranslation(self, doc, language, translators):
+ """Sets a language and translators in "doc" tree.
+
+ "translators" is a string consisted of "Name <email>, years" pairs
+ of each translator, separated by newlines."""
+
+ root = doc.getRootElement()
+ # DocBook documents can be something other than article, handle that as well in the future
+ while root and root.name != 'article' and root.name != 'book':
+ root = root.next
+ if root and (root.name == 'article' or root.name == 'book'):
+ root.setProp('lang', language)
+ else:
+ return
+
+ if translators == self.getStringForTranslators():
+ return
+ elif translators:
+ # Now, lets find 'articleinfo' (it can be something else, but this goes along with 'article')
+ ai = self._find_articleinfo(root)
+ if not ai:
+ return
+
+ # Now, lets do one translator at a time
+ lines = translators.split("\n")
+ for line in lines:
+ line = line.strip()
+ match = re.match(r"^([^<,]+)\s*(?:<([^>,]+)>)?,\s*(.*)$", line)
+ if match:
+ last = self._find_lastcopyright(ai)
+ copy = libxml2.newNode("copyright")
+ if last:
+ copy = last.addNextSibling(copy)
+ else:
+ ai.addChild(copy)
+ if match.group(3):
+ copy.newChild(None, "year", match.group(3).encode('utf-8'))
+ if match.group(1) and match.group(2):
+ holder = match.group(1)+"(%s)" % match.group(2)
+ elif match.group(1):
+ holder = match.group(1)
+ elif match.group(2):
+ holder = match.group(2)
+ else:
+ holder = "???"
+ copy.newChild(None, "holder", holder.encode('utf-8'))
+
+# Perform some tests when ran standalone
+if __name__ == '__main__':
+ test = docbookXmlMode()
+ print "Ignored tags : " + repr(test.getIgnoredTags())
+ print "Final tags : " + repr(test.getFinalTags())
+ print "Space-preserve tags: " + repr(test.getSpacePreserveTags())
+
+ print "Credits from string: '%s'" % test.getStringForTranslators()
+ print "Explanation for credits:\n\t'%s'" % test.getCommentForTranslators()
+
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]