[gnome-doc-utils] [xml2po] Partial rewrite of xml2po as a Python module
- From: Claude Paroz <claudep src gnome org>
- To: svn-commits-list gnome org
- Cc:
- Subject: [gnome-doc-utils] [xml2po] Partial rewrite of xml2po as a Python module
- Date: Sat, 1 Aug 2009 21:56:23 +0000 (UTC)
commit 72b4feeda4bceab3ab1cd82758cfff024df721b0
Author: Claude Paroz <claude 2xlibre net>
Date: Sat Aug 1 23:42:42 2009 +0200
[xml2po] Partial rewrite of xml2po as a Python module
Basically, the XML processing has been left untouched, with very small
tweakings (boolean instead of 1/0, python syntax, etc.). The main idea was to
separate the command line processing (xml2po.py.in -> xml2po) and the main
processing (moved as a module in /usr/lib/python??/site-packages/xml2po, with
/modes as a subdirectory). It should now be possible to call xml2po from any
python code with an "import xml2po" command, without resorting to command line.
bootstrap.make | 2 +-
configure.in | 5 +-
xml2po/.gitignore | 1 -
xml2po/Makefile.am | 17 +-
xml2po/modes/Makefile.am | 4 -
xml2po/tests/relnotes/test.sh | 4 +-
xml2po/tests/test.py | 12 +-
xml2po/xml2po.py | 889 ----------------------------------
xml2po/xml2po/__init__.py | 700 ++++++++++++++++++++++++++
xml2po/xml2po/modes/Makefile.am | 4 +
xml2po/{ => xml2po}/modes/basic.py | 10 +-
xml2po/{ => xml2po}/modes/docbook.py | 8 +-
xml2po/{ => xml2po}/modes/gs.py | 2 +-
xml2po/{ => xml2po}/modes/mallard.py | 0
xml2po/{ => xml2po}/modes/ubuntu.py | 3 -
xml2po/{ => xml2po}/modes/xhtml.py | 0
xml2po/xml2po/xml2po.py.in | 187 +++++++
17 files changed, 915 insertions(+), 933 deletions(-)
---
diff --git a/bootstrap.make b/bootstrap.make
index e0aa84e..66afeb6 100644
--- a/bootstrap.make
+++ b/bootstrap.make
@@ -1,4 +1,4 @@
-_xml2po = PYTHONPATH="$(shell pwd)/$(top_srcdir)/xml2po/modes:$(shell pwd)/$(top_builddir)/xml2po/modes:$(PYTHONPATH)" "$(shell pwd)/$(top_builddir)/xml2po/xml2po"
+_xml2po = PYTHONPATH="$(shell pwd)/$(top_builddir)/xml2po:$(PYTHONPATH)" "$(shell pwd)/$(top_builddir)/xml2po/xml2po/xml2po"
_db2html = $(top_srcdir)/xslt/docbook/html/db2html.xsl
_db2omf = $(top_srcdir)/xslt/docbook/omf/db2omf.xsl
diff --git a/configure.in b/configure.in
index b1b217f..bd8d459 100644
--- a/configure.in
+++ b/configure.in
@@ -28,7 +28,7 @@ AC_ARG_ENABLE([build-utils],
AM_CONDITIONAL(ENABLE_BUILD_UTILS, test x$enable_build_utils = xyes)
if test x$enable_build_utils = xyes; then
-AM_PATH_PYTHON([2.0])
+AM_PATH_PYTHON([2.4])
fi
PKG_CHECK_MODULES(GNOME_DOC_UTILS,
@@ -65,7 +65,8 @@ tools/gnome-doc-utils.pc
xslt/Makefile
xml2po/Makefile
xml2po/xml2po.pc
-xml2po/modes/Makefile
+xml2po/xml2po/Makefile
+xml2po/xml2po/modes/Makefile
xml2po/examples/Makefile
xslt/common/Makefile
xslt/docbook/Makefile
diff --git a/xml2po/.gitignore b/xml2po/.gitignore
index ed94985..690b961 100644
--- a/xml2po/.gitignore
+++ b/xml2po/.gitignore
@@ -2,6 +2,5 @@
Makefile
Makefile.in
README
-xml2po
xml2po.1
xml2po.pc
diff --git a/xml2po/Makefile.am b/xml2po/Makefile.am
index 61a6d39..6226ffb 100644
--- a/xml2po/Makefile.am
+++ b/xml2po/Makefile.am
@@ -1,20 +1,10 @@
-SUBDIRS = modes examples
-
-nodist_bin_SCRIPTS = xml2po
-CLEANFILES = xml2po
+SUBDIRS = xml2po examples
pkgconfigdir = $(libdir)/pkgconfig
pkgconfig_DATA = xml2po.pc
man_MANS = xml2po.1
-xml2po: xml2po.py
- sed -e "s/^VERSION =.*/VERSION = \"@VERSION \"/" \
- -e "s+^submodes_path =.*+submodes_path = \"$(pythondir)/xml2po\"+" \
- -e "s+^#!.*python.*+#!$(PYTHON)+" \
- < $(srcdir)/xml2po.py > xml2po
- chmod +x xml2po
-
$(srcdir)/README: README.in
sed -e "s/\ VERSION\@/@VERSION@/g" < README.in > README
@@ -29,7 +19,6 @@ EXTRA_DIST = \
README \
README.in \
TODO \
- xml2po.py \
- xml2po.pc.in \
- xml2po.1.xml \
+ xml2po.pc.in \
+ xml2po.1.xml \
xml2po.1
diff --git a/xml2po/tests/relnotes/test.sh b/xml2po/tests/relnotes/test.sh
index c9f12d0..7691fa2 100755
--- a/xml2po/tests/relnotes/test.sh
+++ b/xml2po/tests/relnotes/test.sh
@@ -1,9 +1,7 @@
#!/bin/sh
ALLFILES=`cat XMLFILES`
-XML2PO=../../xml2po
+XML2PO="../../xml2po/xml2po"
($XML2PO $ALLFILES | sed 's/"POT-Creation-Date: .*$/"POT-Creation-Date: \\n"/' | diff -u release-notes.pot -) || echo "Problem with POT extraction"
for i in $ALLFILES; do
($XML2PO -p el.po $i | diff -u el/$i -) || echo "Problem with merging $i"
done
-
-
\ No newline at end of file
diff --git a/xml2po/tests/test.py b/xml2po/tests/test.py
index 1875790..95c74f6 100755
--- a/xml2po/tests/test.py
+++ b/xml2po/tests/test.py
@@ -25,25 +25,23 @@ if len(sys.argv) > 1:
for opt in sys.argv[2:]:
myopts += " " + opt
output = input.replace(".xml", ".xml.out")
- fullcommand = "PYTHONPATH=../modes ../xml2po %s %s | sed 's/\"POT-Creation-Date: .*$/\"POT-Creation-Date: \\\\n\"/' | diff -u %s -" % (myopts, input, pot)
+ fullcommand = "../xml2po/xml2po %s %s | sed 's/\"POT-Creation-Date: .*$/\"POT-Creation-Date: \\\\n\"/' | diff -u %s -" % (myopts, input, pot)
#print >>sys.stderr, fullcommand
ret = os.system(fullcommand)
if ret:
print "Problem: extraction from '%s'" % (input)
- fullcommand = "PYTHONPATH=../modes ../xml2po -p %s %s %s | diff -u %s -" % (po, myopts, input, output)
+ fullcommand = "../xml2po/xml2po -p %s %s %s | diff -u %s -" % (po, myopts, input, output)
#print >>sys.stderr, fullcommand
ret = os.system(fullcommand)
if ret:
print "Problem: merging translation into '%s'" % (input)
else:
for t in SIMPLETESTS:
- if SIMPLETESTS[t].has_key("options"):
- myopts = SIMPLETESTS[t]["options"]
- else: myopts = ""
+ myopts = SIMPLETESTS[t].get("options", "")
if os.system("%s %s %s" % (sys.argv[0], t, myopts)):
print "WARNING: Test %s failed." % (t)
-
+
for t in OTHERTESTS:
- if os.system("cd %s && PYTHONPATH=../../modes ./%s" % (t[0], t[1])):
+ if os.system("cd %s && ./%s" % (t[0], t[1])):
print "WARNING: Test %s failed." % (t[0])
diff --git a/xml2po/xml2po/__init__.py b/xml2po/xml2po/__init__.py
new file mode 100644
index 0000000..763aa8d
--- /dev/null
+++ b/xml2po/xml2po/__init__.py
@@ -0,0 +1,700 @@
+# -*- encoding: utf-8 -*-
+# Copyright (c) 2004, 2005, 2006 Danilo Å egan <danilo gnome org>.
+# Copyright (c) 2009 Claude Paroz <claude 2xlibre net>.
+#
+# This file is part of xml2po.
+#
+# xml2po is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# xml2po is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with xml2po; if not, write to the Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+import os
+import sys
+import re
+import subprocess
+import tempfile
+import gettext
+import libxml2
+
+NULL_STRING = '/dev/null'
+if not os.path.exists('/dev/null'): NULL_STRING = 'NUL'
+
+# Utility functions
+def escapePoString(text):
+ return text.replace('\\','\\\\').replace('"', "\\\"").replace("\n","\\n").replace("\t","\\t")
+
+def unEscapePoString(text):
+ return text.replace('\\"', '"').replace('\\\\','\\')
+
+class NoneTranslations:
+ def gettext(self, message):
+ return None
+
+ def lgettext(self, message):
+ return None
+
+ def ngettext(self, msgid1, msgid2, n):
+ return None
+
+ def lngettext(self, msgid1, msgid2, n):
+ return None
+
+ def ugettext(self, message):
+ return None
+
+ def ungettext(self, msgid1, msgid2, n):
+ return None
+
+class MessageOutput:
+ """ Class to abstract po/pot file """
+ def __init__(self, app):
+ self.app = app
+ self.messages = []
+ self.comments = {}
+ self.linenos = {}
+ self.nowrap = {}
+ self.translations = []
+ self.do_translations = False
+ self.output_msgstr = False # this is msgid mode for outputMessage; True is for msgstr mode
+
+ def translationsFollow(self):
+ """Indicate that what follows are translations."""
+ self.output_msgstr = True
+
+ def setFilename(self, filename):
+ self.filename = filename
+
+ def outputMessage(self, text, lineno = 0, comment = None, spacepreserve = False, tag = None):
+ """Adds a string to the list of messages."""
+ if (text.strip() != ''):
+ t = escapePoString(text)
+ if self.output_msgstr:
+ self.translations.append(t)
+ return
+
+ if self.do_translations or (not t in self.messages):
+ self.messages.append(t)
+ if spacepreserve:
+ self.nowrap[t] = True
+ if t in self.linenos.keys():
+ self.linenos[t].append((self.filename, tag, lineno))
+ else:
+ self.linenos[t] = [ (self.filename, tag, lineno) ]
+ if (not self.do_translations) and comment and not t in self.comments:
+ self.comments[t] = comment
+ else:
+ if t in self.linenos.keys():
+ self.linenos[t].append((self.filename, tag, lineno))
+ else:
+ self.linenos[t] = [ (self.filename, tag, lineno) ]
+ if comment and not t in self.comments:
+ self.comments[t] = comment
+
+ def outputHeader(self, out):
+ import time
+ out.write("""msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\\n"
+"POT-Creation-Date: %s\\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
+"Last-Translator: FULL NAME <EMAIL ADDRESS>\\n"
+"Language-Team: LANGUAGE <LL li org>\\n"
+"MIME-Version: 1.0\\n"
+"Content-Type: text/plain; charset=UTF-8\\n"
+"Content-Transfer-Encoding: 8bit\\n"
+
+""" % (time.strftime("%Y-%m-%d %H:%M%z")))
+
+ def outputAll(self, out):
+ self.outputHeader(out)
+
+ for k in self.messages:
+ if k in self.comments:
+ out.write("#. %s\n" % (self.comments[k].replace("\n","\n#. ")))
+ references = ""
+ for reference in self.linenos[k]:
+ references += "%s:%d(%s) " % (reference[0], reference[2], reference[1])
+ out.write("#: %s\n" % (references))
+ if k in self.nowrap and self.nowrap[k]:
+ out.write("#, no-wrap\n")
+ out.write("msgid \"%s\"\n" % (k))
+ translation = ""
+ if self.do_translations:
+ if len(self.translations)>0:
+ translation = self.translations.pop(0)
+ if translation == k:
+ translation = ""
+ out.write("msgstr \"%s\"\n\n" % (translation))
+
+class XMLDocument(object):
+ def __init__(self, filename, app):
+ self.app = app
+ self.expand_entities = self.app.options.get('expand_entities')
+ self.ignored_tags = self.app.current_mode.getIgnoredTags()
+ ctxt = libxml2.createFileParserCtxt(filename)
+ ctxt.lineNumbers(1)
+ if self.app.options.get('expand_all_entities'):
+ ctxt.replaceEntities(1)
+ ctxt.parseDocument()
+ self.doc = ctxt.doc()
+ if self.doc.name != filename:
+ raise Exception("Error: I tried to open '%s' but got '%s' -- how did that happen?" % (filename, self.doc.name))
+ if self.app.msg:
+ self.app.msg.setFilename(filename)
+
+ def generate_messages(self):
+ self.app.msg.setFilename(self.doc.name)
+ self.doSerialize(self.doc)
+
+ def normalizeNode(self, node):
+ #print >>sys.stderr, "<%s> (%s) [%s]" % (node.name, node.type, node.serialize('utf-8'))
+ if not node:
+ return
+ elif self.app.isSpacePreserveNode(node):
+ return
+ elif node.isText():
+ if node.isBlankNode():
+ if self.app.options.get('expand_entities') or \
+ (not (node.prev and not node.prev.isBlankNode() and node.next and not node.next.isBlankNode()) ):
+ #print >>sys.stderr, "BLANK"
+ node.setContent('')
+ else:
+ node.setContent(re.sub('\s+',' ', node.content))
+
+ elif node.children and node.type == 'element':
+ child = node.children
+ while child:
+ self.normalizeNode(child)
+ child = child.next
+
+ def normalizeString(self, text, spacepreserve = False):
+ """Normalizes string to be used as key for gettext lookup.
+
+ Removes all unnecessary whitespace."""
+ if spacepreserve:
+ return text
+ try:
+ # Lets add document DTD so entities are resolved
+ dtd = self.doc.intSubset()
+ tmp = dtd.serialize('utf-8')
+ tmp = tmp + '<norm>%s</norm>' % text
+ except:
+ tmp = '<norm>%s</norm>' % text
+
+ try:
+ ctxt = libxml2.createDocParserCtxt(tmp)
+ if self.app.options.get('expand_entities'):
+ ctxt.replaceEntities(1)
+ ctxt.parseDocument()
+ tree = ctxt.doc()
+ newnode = tree.getRootElement()
+ except:
+ print >> sys.stderr, """Error while normalizing string as XML:\n"%s"\n""" % (text)
+ return text
+
+ self.normalizeNode(newnode)
+
+ result = ''
+ child = newnode.children
+ while child:
+ result += child.serialize('utf-8')
+ child = child.next
+
+ result = re.sub('^ ','', result)
+ result = re.sub(' $','', result)
+ tree.freeDoc()
+
+ return result
+
+ def stringForEntity(self, node):
+ """Replaces entities in the node."""
+ text = node.serialize('utf-8')
+ try:
+ # Lets add document DTD so entities are resolved
+ dtd = self.doc.intSubset()
+ tmp = dtd.serialize('utf-8') + '<norm>%s</norm>' % text
+ next = True
+ except:
+ tmp = '<norm>%s</norm>' % text
+ next = False
+
+ ctxt = libxml2.createDocParserCtxt(tmp)
+ if self.expand_entities:
+ ctxt.replaceEntities(1)
+ ctxt.parseDocument()
+ tree = ctxt.doc()
+ if next:
+ newnode = tree.children.next
+ else:
+ newnode = tree.children
+
+ result = ''
+ child = newnode.children
+ while child:
+ result += child.serialize('utf-8')
+ child = child.next
+ tree.freeDoc()
+ return result
+
+
+ def myAttributeSerialize(self, node):
+ result = ''
+ if node.children:
+ child = node.children
+ while child:
+ if child.type=='text':
+ result += self.doc.encodeEntitiesReentrant(child.content)
+ elif child.type=='entity_ref':
+ if not self.expand_entities:
+ result += '&' + child.name + ';'
+ else:
+ result += child.content.decode('utf-8')
+ else:
+ result += self.myAttributeSerialize(child)
+ child = child.next
+ else:
+ result = node.serialize('utf-8')
+ return result
+
+ def startTagForNode(self, node):
+ if not node:
+ return 0
+
+ result = node.name
+ params = ''
+ if node.properties:
+ for p in node.properties:
+ if p.type == 'attribute':
+ try:
+ nsprop = p.ns().name + ":" + p.name
+ except:
+ nsprop = p.name
+ params += " %s=\"%s\"" % (nsprop, self.myAttributeSerialize(p))
+ return result+params
+
+ def endTagForNode(self, node):
+ if not node:
+ return False
+ return node.name
+
+ def isFinalNode(self, node):
+ #node.type =='text' or not node.children or
+ if node.type == 'element' and node.name in self.app.current_mode.getFinalTags():
+ return True
+ elif node.children:
+ final_children = True
+ child = node.children
+ while child and final_children:
+ if not child.isBlankNode() and child.type != 'comment' and not self.isFinalNode(child):
+ final_children = False
+ child = child.next
+ if final_children:
+ return True
+ return False
+
+ def ignoreNode(self, node):
+ if self.isFinalNode(node):
+ return False
+ if node.name in self.ignored_tags or node.type in ('dtd', 'comment'):
+ return True
+ return False
+
+ def getCommentForNode(self, node):
+ """Walk through previous siblings until a comment is found, or other element.
+
+ Only whitespace is allowed between comment and current node."""
+ prev = node.prev
+ while prev and prev.type == 'text' and prev.content.strip() == '':
+ prev = prev.prev
+ if prev and prev.type == 'comment':
+ return prev.content.strip()
+ else:
+ return None
+
+ def replaceAttributeContentsWithText(self, node, text):
+ node.setContent(text)
+
+ def replaceNodeContentsWithText(self, node, text):
+ """Replaces all subnodes of a node with contents of text treated as XML."""
+
+ if node.children:
+ starttag = self.startTagForNode(node)
+ endtag = self.endTagForNode(node)
+
+ # Lets add document DTD so entities are resolved
+ tmp = '<?xml version="1.0" encoding="utf-8" ?>'
+ try:
+ dtd = self.doc.intSubset()
+ tmp = tmp + dtd.serialize('utf-8')
+ except libxml2.treeError:
+ pass
+
+ content = '<%s>%s</%s>' % (starttag, text, endtag)
+ tmp = tmp + content.encode('utf-8')
+
+ newnode = None
+ try:
+ ctxt = libxml2.createDocParserCtxt(tmp)
+ ctxt.replaceEntities(0)
+ ctxt.parseDocument()
+ newnode = ctxt.doc()
+ except:
+ pass
+
+ if not newnode:
+ print >> sys.stderr, """Error while parsing translation as XML:\n"%s"\n""" % (text.encode('utf-8'))
+ return
+
+ newelem = newnode.getRootElement()
+
+ if newelem and newelem.children:
+ free = node.children
+ while free:
+ next = free.next
+ free.unlinkNode()
+ free = next
+
+ if node:
+ copy = newelem.copyNodeList()
+ next = node.next
+ node.replaceNode(newelem.copyNodeList())
+ node.next = next
+
+ else:
+ # In practice, this happens with tags such as "<para> </para>" (only whitespace in between)
+ pass
+ else:
+ node.setContent(text)
+
+ def autoNodeIsFinal(self, node):
+ """Returns True if node is text node, contains non-whitespace text nodes or entities."""
+ if hasattr(node, '__autofinal__'):
+ return node.__autofinal__
+ if node.name in self.ignored_tags:
+ node.__autofinal__ = False
+ return False
+ if node.isText() and node.content.strip()!='':
+ node.__autofinal__ = True
+ return True
+ final = False
+ child = node.children
+ while child:
+ if child.type in ['text'] and child.content.strip()!='':
+ final = True
+ break
+ child = child.next
+
+ node.__autofinal__ = final
+ return final
+
+
+ def worthOutputting(self, node, noauto = False):
+ """Returns True if node is "worth outputting", otherwise False.
+
+ Node is "worth outputting", if none of the parents
+ isFinalNode, and it contains non-blank text and entities.
+ """
+ if noauto and hasattr(node, '__worth__'):
+ return node.__worth__
+ elif not noauto and hasattr(node, '__autoworth__'):
+ return node.__autoworth__
+ worth = True
+ parent = node.parent
+ final = self.isFinalNode(node) and node.name not in self.ignored_tags
+ while not final and parent:
+ if self.isFinalNode(parent):
+ final = True # reset if we've got to one final tag
+ if final and (parent.name not in self.ignored_tags) and self.worthOutputting(parent):
+ worth = False
+ break
+ parent = parent.parent
+ if not worth:
+ node.__worth__ = False
+ return False
+
+ if noauto:
+ node.__worth__ = worth
+ return worth
+ else:
+ node.__autoworth__ = self.autoNodeIsFinal(node)
+ return node.__autoworth__
+
+ def processAttribute(self, node, attr):
+ if not node or not attr or not self.worthOutputting(node=node, noauto=True):
+ return
+
+ outtxt = self.normalizeString(attr.content)
+ if self.app.operation == 'merge':
+ translation = self.app.getTranslation(outtxt)
+ self.replaceAttributeContentsWithText(attr, translation.encode('utf-8'))
+ else:
+ self.app.msg.outputMessage(outtxt, node.lineNo(), "", spacepreserve=False,
+ tag = node.name + ":" + attr.name)
+
+ def processElementTag(self, node, replacements, restart = False):
+ """Process node with node.type == 'element'."""
+ if node.type != 'element':
+ raise Exception("You must pass node with node.type=='element'.")
+
+ # Translate attributes if needed
+ if node.properties and self.app.current_mode.getTreatedAttributes():
+ for p in node.properties:
+ if p.name in self.app.current_mode.getTreatedAttributes():
+ self.processAttribute(node, p)
+
+ outtxt = ''
+ if restart:
+ myrepl = []
+ else:
+ myrepl = replacements
+
+ submsgs = []
+
+ child = node.children
+ while child:
+ if (self.isFinalNode(child)) or (child.type == 'element' and self.worthOutputting(child)):
+ myrepl.append(self.processElementTag(child, myrepl, True))
+ outtxt += '<placeholder-%d/>' % (len(myrepl))
+ else:
+ if child.type == 'element':
+ (starttag, content, endtag, translation) = self.processElementTag(child, myrepl, False)
+ outtxt += '<%s>%s</%s>' % (starttag, content, endtag)
+ else:
+ outtxt += self.doSerialize(child)
+ child = child.next
+
+ if self.app.operation == 'merge':
+ norm_outtxt = self.normalizeString(outtxt, self.app.isSpacePreserveNode(node))
+ translation = self.app.getTranslation(norm_outtxt)
+ else:
+ translation = outtxt.decode('utf-8')
+
+ starttag = self.startTagForNode(node)
+ endtag = self.endTagForNode(node)
+
+ worth = self.worthOutputting(node)
+ if not translation:
+ translation = outtxt.decode('utf-8')
+ if worth and self.app.options.get('mark_untranslated'):
+ node.setLang('C')
+
+ if restart or worth:
+ for i, repl in enumerate(myrepl, 1):
+ replacement = '<%s>%s</%s>' % (repl[0], repl[3], repl[2])
+ translation = translation.replace('<placeholder-%d/>' % i, replacement)
+
+ if worth:
+ if self.app.operation == 'merge':
+ self.replaceNodeContentsWithText(node, translation)
+ else:
+ norm_outtxt = self.normalizeString(outtxt, self.app.isSpacePreserveNode(node))
+ self.app.msg.outputMessage(norm_outtxt, node.lineNo(), self.getCommentForNode(node), self.app.isSpacePreserveNode(node), tag = node.name)
+
+ return (starttag, outtxt, endtag, translation)
+
+
+ def isExternalGeneralParsedEntity(self, node):
+ try:
+ # it would be nice if debugDumpNode could use StringIO, but it apparently cannot
+ tmp = tempfile.TemporaryFile()
+ node.debugDumpNode(tmp,0)
+ tmp.seek(0)
+ tmpstr = tmp.read()
+ tmp.close()
+ except:
+ # We fail silently, and replace all entities if we cannot
+ # write .xml2po-entitychecking
+ # !!! This is not very nice thing to do, but I don't know if
+ # raising an exception is any better
+ return False
+ return tmpstr.find('EXTERNAL_GENERAL_PARSED_ENTITY') != -1
+
+ def doSerialize(self, node):
+ """Serializes a node and its children, emitting PO messages along the way.
+
+ node is the node to serialize, first indicates whether surrounding
+ tags should be emitted as well.
+ """
+
+ if self.ignoreNode(node):
+ return ''
+ elif not node.children:
+ return node.serialize("utf-8")
+ elif node.type == 'entity_ref':
+ if self.isExternalGeneralParsedEntity(node):
+ return node.serialize('utf-8')
+ else:
+ return self.stringForEntity(node) #content #content #serialize("utf-8")
+ elif node.type == 'entity_decl':
+ return node.serialize('utf-8') #'<%s>%s</%s>' % (startTagForNode(node), node.content, node.name)
+ elif node.type == 'text':
+ return node.serialize('utf-8')
+ elif node.type == 'element':
+ repl = []
+ (starttag, content, endtag, translation) = self.processElementTag(node, repl, True)
+ return '<%s>%s</%s>' % (starttag, content, endtag)
+ else:
+ child = node.children
+ outtxt = ''
+ while child:
+ outtxt += self.doSerialize(child)
+ child = child.next
+ return outtxt
+
+def xml_error_handler(arg, ctxt):
+ #deactivate error messages from the validation
+ pass
+
+class Main(object):
+ def __init__(self, mode, operation, output, options):
+ libxml2.registerErrorHandler(xml_error_handler, None)
+ self.operation = operation
+ self.options = options
+ self.msg = None
+ self.gt = None
+ self.current_mode = self.load_mode(mode)()
+ # Prepare output
+ if operation == 'update':
+ self.out = tempfile.TemporaryFile()
+ elif output == '-':
+ self.out = sys.stdout
+ else:
+ self.out = file(output, 'w')
+
+ def load_mode(self, modename):
+ try:
+ module = __import__('xml2po.modes.%s' % modename, fromlist=['%sXmlMode' % modename])
+ return getattr(module, '%sXmlMode' % modename)
+ except (ImportError, AttributeError):
+ if modename == 'basic':
+ sys.stderr.write("Unable to find xml2po modes. Please check your xml2po installation.\n")
+ sys.exit(1)
+ else:
+ sys.stderr.write("Unable to load mode '%s'. Falling back to 'basic' mode with automatic detection (-a).\n" % modename)
+ return load_mode('basic')
+
+ def to_pot(self, xmlfiles):
+ """ Produce a pot file from the list of 'xmlfiles' """
+ self.msg = MessageOutput(self)
+ for xmlfile in xmlfiles:
+ if not os.access(xmlfile, os.R_OK):
+ raise IOError("Unable to read file '%s'" % xmlfile)
+ try:
+ doc = XMLDocument(xmlfile, self)
+ except Exception, e:
+ print >> sys.stderr, "Unable to parse XML file '%s': %s" % (xmlfile, str(e))
+ sys.exit(1)
+ self.current_mode.preProcessXml(doc.doc, self.msg)
+ doc.generate_messages()
+ self.output_po()
+
+ def merge(self, mofile, xmlfile):
+ """ Merge translations from mofile into xmlfile to generate a translated XML file """
+ if not os.access(xmlfile, os.R_OK):
+ raise IOError("Unable to read file '%s'" % xmlfile)
+ try:
+ doc = XMLDocument(xmlfile, self)
+ except Exception, e:
+ print >> sys.stderr, str(e)
+ sys.exit(1)
+
+ try:
+ mfile = open(mofile, "rb")
+ except:
+ print >> sys.stderr, "Can't open MO file '%s'." % (mofile)
+ self.gt = gettext.GNUTranslations(mfile)
+ self.gt.add_fallback(NoneTranslations())
+ # Has preProcessXml use cases for merge?
+ #self.current_mode.preProcessXml(doc.doc, self.msg)
+
+ doc.doSerialize(doc.doc)
+ tcmsg = self.current_mode.getStringForTranslators()
+ outtxt = self.getTranslation(tcmsg)
+ self.current_mode.postProcessXmlTranslation(doc.doc, self.options.get('translationlanguage'), outtxt)
+ self.out.write(doc.doc.serialize('utf-8', 1))
+
+ def reuse(self, origxml, xmlfile):
+ """ Produce a po file from xmlfile pot and using translations from origxml """
+ self.msg = MessageOutput(self)
+ self.msg.do_translations = True
+ if not os.access(xmlfile, os.R_OK):
+ raise IOError("Unable to read file '%s'" % xmlfile)
+ if not os.access(origxml, os.R_OK):
+ raise IOError("Unable to read file '%s'" % xmlfile)
+ try:
+ doc = XMLDocument(xmlfile, self)
+ except Exception, e:
+ print >> sys.stderr, str(e)
+ sys.exit(1)
+ doc.generate_messages()
+
+ self.msg.translationsFollow()
+ try:
+ doc = XMLDocument(origxml, self)
+ except Exception, e:
+ print >> sys.stderr, str(e)
+ sys.exit(1)
+ doc.generate_messages()
+ self.output_po()
+
+ def update(self, xmlfiles, lang_file):
+ """ Merge the produced pot with an existing po file (lang_file) """
+ if not os.access(lang_file, os.W_OK):
+ raise IOError("'%s' does not exist or is not writable." % lang_file)
+ self.to_pot(xmlfiles)
+ lang = os.path.basename(lang_file).split(".")[0]
+
+ sys.stderr.write("Merging translations for %s: \n" % (lang))
+ self.out.seek(0)
+ merge_cmd = subprocess.Popen(["msgmerge", "-o", ".tmp.%s.po" % lang, lang_file, "-"],
+ stdin=self.out, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ cmdout, cmderr = merge_cmd.communicate()
+ if merge_cmd.returncode:
+ raise Exception("Error during msgmerge command.")
+ else:
+ result = subprocess.call(["mv", ".tmp.%s.po" % lang, lang_file])
+ if result:
+ raise Exception("Error: cannot rename file.")
+ else:
+ subprocess.call(["msgfmt", "-cv", "-o", NULL_STRING, lang_file])
+
+ def getTranslation(self, text):
+ """Returns a translation via gettext for specified snippet.
+
+ text should be a string to look for.
+ """
+ #print >>sys.stderr,"getTranslation('%s')" % (text.encode('utf-8'))
+ if not text or text.strip() == '':
+ return text
+ if self.gt:
+ res = self.gt.ugettext(text.decode('utf-8'))
+ return res
+
+ return text
+
+ def output_po(self):
+ """ Write the resulting po/pot file to specified output """
+ tcmsg = self.current_mode.getStringForTranslators()
+ tccom = self.current_mode.getCommentForTranslators()
+ if tcmsg:
+ self.msg.outputMessage(tcmsg, lineno=0, comment=tccom)
+
+ self.msg.outputAll(self.out)
+
+ # **** XML utility functions ****
+ def isSpacePreserveNode(self, node):
+ if node.getSpacePreserve() == 1:
+ return True
+ else:
+ return node.name in self.current_mode.getSpacePreserveTags()
+
diff --git a/xml2po/xml2po/modes/Makefile.am b/xml2po/xml2po/modes/Makefile.am
new file mode 100644
index 0000000..04e7a33
--- /dev/null
+++ b/xml2po/xml2po/modes/Makefile.am
@@ -0,0 +1,4 @@
+modesdir = $(pythondir)/xml2po/modes
+modes_DATA = __init__.py basic.py docbook.py gs.py mallard.py ubuntu.py xhtml.py
+
+EXTRA_DIST = $(modes_DATA)
diff --git a/xml2po/modes/basic.py b/xml2po/xml2po/modes/basic.py
similarity index 86%
rename from xml2po/modes/basic.py
rename to xml2po/xml2po/modes/basic.py
index c3081d8..7765a1c 100644
--- a/xml2po/modes/basic.py
+++ b/xml2po/xml2po/modes/basic.py
@@ -17,18 +17,20 @@
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
-# Abstract class; inherit from it to construct other special-handling classes
+# Basic default class; inherit from it to construct other special-handling classes
#
class basicXmlMode:
"""Abstract class for special handling of document types."""
def getIgnoredTags(self):
"Returns array of tags to be ignored."
- return []
+ return ['itemizedlist', 'orderedlist', 'variablelist', 'varlistentry']
def getFinalTags(self):
"Returns array of tags to be considered 'final'."
- return []
+ return ['para', 'title', 'releaseinfo', 'revnumber',
+ 'date', 'itemizedlist', 'orderedlist',
+ 'variablelist', 'varlistentry', 'term']
def getSpacePreserveTags(self):
"Returns array of tags in which spaces are to be preserved."
@@ -44,7 +46,7 @@ class basicXmlMode:
def postProcessXmlTranslation(self, doc, language, translators):
"""Sets a language and translators in "doc" tree.
-
+
"translators" is a string consisted of translator credits.
"language" is a simple string.
"doc" is a libxml2.xmlDoc instance."""
diff --git a/xml2po/modes/docbook.py b/xml2po/xml2po/modes/docbook.py
similarity index 99%
rename from xml2po/modes/docbook.py
rename to xml2po/xml2po/modes/docbook.py
index 8422e0f..276a9d9 100644
--- a/xml2po/modes/docbook.py
+++ b/xml2po/xml2po/modes/docbook.py
@@ -132,7 +132,7 @@ class docbookXmlMode(basicXmlMode):
else:
hash = "THIS FILE DOESN'T EXIST"
print >>sys.stderr, "Warning: image file '%s' not found." % fullpath
-
+
msg.outputMessage("@@image: '%s'; md5=%s" % (attr, hash), node.lineNo(),
"When image changes, this message will be marked fuzzy or untranslated for you.\n"+
"It doesn't matter what you translate it to: it's not used at all.")
@@ -150,7 +150,7 @@ class docbookXmlMode(basicXmlMode):
def postProcessXmlTranslation(self, doc, language, translators):
"""Sets a language and translators in "doc" tree.
-
+
"translators" is a string consisted of "Name <email>, years" pairs
of each translator, separated by newlines."""
@@ -162,7 +162,7 @@ class docbookXmlMode(basicXmlMode):
root.setProp('lang', language)
else:
return
-
+
if translators == self.getStringForTranslators():
return
elif translators:
@@ -204,4 +204,4 @@ if __name__ == '__main__':
print "Credits from string: '%s'" % test.getStringForTranslators()
print "Explanation for credits:\n\t'%s'" % test.getCommentForTranslators()
-
+
diff --git a/xml2po/modes/gs.py b/xml2po/xml2po/modes/gs.py
similarity index 99%
rename from xml2po/modes/gs.py
rename to xml2po/xml2po/modes/gs.py
index 6a8fc17..ba2fbc7 100644
--- a/xml2po/modes/gs.py
+++ b/xml2po/xml2po/modes/gs.py
@@ -45,7 +45,7 @@ class gsXmlMode(basicXmlMode):
def postProcessXmlTranslation(self, doc, language, translators):
"""Sets a language and translators in "doc" tree.
-
+
"translators" is a string consisted of translator credits.
"language" is a simple string.
"doc" is a libxml2.xmlDoc instance."""
diff --git a/xml2po/modes/mallard.py b/xml2po/xml2po/modes/mallard.py
similarity index 100%
rename from xml2po/modes/mallard.py
rename to xml2po/xml2po/modes/mallard.py
diff --git a/xml2po/modes/ubuntu.py b/xml2po/xml2po/modes/ubuntu.py
similarity index 97%
rename from xml2po/modes/ubuntu.py
rename to xml2po/xml2po/modes/ubuntu.py
index 05649f1..bbd7986 100644
--- a/xml2po/modes/ubuntu.py
+++ b/xml2po/xml2po/modes/ubuntu.py
@@ -20,6 +20,3 @@ class ubuntuXmlMode (docbookXmlMode):
except:
newent = doc.addDocEntity('language', libxml2.XML_INTERNAL_GENERAL_ENTITY, None, None, language)
-
-
-
diff --git a/xml2po/modes/xhtml.py b/xml2po/xml2po/modes/xhtml.py
similarity index 100%
rename from xml2po/modes/xhtml.py
rename to xml2po/xml2po/modes/xhtml.py
diff --git a/xml2po/xml2po/xml2po.py.in b/xml2po/xml2po/xml2po.py.in
new file mode 100644
index 0000000..5da1389
--- /dev/null
+++ b/xml2po/xml2po/xml2po.py.in
@@ -0,0 +1,187 @@
+#!/usr/bin/python -u
+# -*- encoding: utf-8 -*-
+# Copyright (c) 2004, 2005, 2006 Danilo Å egan <danilo gnome org>.
+# Copyright (c) 2009 Claude Paroz <claude 2xlibre net>.
+#
+# This file is part of xml2po.
+#
+# xml2po is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# xml2po is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with xml2po; if not, write to the Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+# xml2po -- translate XML documents
+VERSION = "1.0.5"
+
+# Versioning system (I use this for a long time, so lets explain it to
+# those Linux-versioning-scheme addicts):
+# 1.0.* are unstable, development versions
+# 1.1 will be first stable release (release 1), and 1.1.* bugfix releases
+# 2.0.* will be unstable-feature-development stage (milestone 1)
+# 2.1.* unstable development betas (milestone 2)
+# 2.2 second stable release (release 2), and 2.2.* bugfix releases
+# ...
+#
+import sys
+import os
+import getopt
+
+NULL_STRING = '/dev/null'
+if not os.path.exists('/dev/null'): NULL_STRING = 'NUL'
+
+def usage (with_help = False):
+ print >> sys.stderr, "Usage: %s [OPTIONS] [XMLFILE]..." % (sys.argv[0])
+ if with_help:
+ print >> sys.stderr, """
+OPTIONS may be some of:
+ -a --automatic-tags Automatically decides if tags are to be considered
+ "final" or not
+ -k --keep-entities Don't expand entities
+ -e --expand-all-entities Expand ALL entities (including SYSTEM ones)
+ -m --mode=TYPE Treat tags as type TYPE (default: docbook)
+ -o --output=FILE Print resulting text (XML or POT) to FILE
+ -p --po-file=FILE Specify PO file containing translation, and merge
+ Overwrites temporary file .xml2po.mo.
+ -r --reuse=FILE Specify translated XML file with the same structure
+ -t --translation=FILE Specify MO file containing translation, and merge
+ -u --update-translation=LANG.po Updates a PO file using msgmerge program
+
+ -l --language=LANG Set language of the translation to LANG
+ --mark-untranslated Set 'xml:lang="C"' on untranslated tags
+
+ -v --version Output version of the xml2po program
+
+ -h --help Output this message
+
+EXAMPLES:
+ To create a POTemplate book.pot from input files chapter1.xml and
+ chapter2.xml, run the following:
+ %(command)s -o book.pot chapter1.xml chapter2.xml
+
+ After translating book.pot into de.po, merge the translations back,
+ using -p option for each XML file:
+ %(command)s -p de.po chapter1.xml > chapter1.de.xml
+ %(command)s -p de.po chapter2.xml > chapter2.de.xml
+""" % {'command': sys.argv[0]}
+
+
+def main(argv):
+ if not argv:
+ usage()
+ sys.exit(2)
+
+ name = os.path.join(os.path.dirname(__file__), '..')
+ if os.path.exists(os.path.join(name, 'tests')):
+ print >> sys.stderr, 'Running from source folder, modifying PYTHONPATH'
+ sys.path.insert(0, name)
+
+ from xml2po import Main
+
+ # Default parameters
+ default_mode = 'docbook'
+ operation = 'pot' # 'pot', 'merge', 'update'
+ output = '-' # this means to stdout
+ options = {
+ 'mark_untranslated' : False,
+ 'expand_entities' : True,
+ 'expand_all_entities' : False,
+ }
+ origxml = ''
+ mofile = ''
+
+ try: opts, remaining_args = getopt.getopt(argv, 'avhkem:t:o:p:u:r:l:',
+ ['automatic-tags','version', 'help', 'keep-entities', 'expand-all-entities', 'mode=', 'translation=',
+ 'output=', 'po-file=', 'update-translation=', 'reuse=', 'language=', 'mark-untranslated' ])
+ except getopt.GetoptError:
+ usage(True)
+ sys.exit(2)
+
+ for opt, arg in opts:
+ if opt in ('-m', '--mode'):
+ default_mode = arg
+ if opt in ('-a', '--automatic-tags'):
+ default_mode = 'basic'
+ elif opt in ('-k', '--keep-entities'):
+ options['expand_entities'] = False
+ elif opt in ('--mark-untranslated',):
+ options['mark_untranslated'] = True
+ elif opt in ('-e', '--expand-all-entities'):
+ options['expand_all_entities'] = True
+ elif opt in ('-l', '--language'):
+ options['translationlanguage'] = arg
+ elif opt in ('-t', '--translation'):
+ mofile = arg
+ operation = 'merge'
+ if 'translationlanguage' not in options:
+ options['translationlanguage'] = os.path.split(os.path.splitext(mofile)[0])[1]
+ elif opt in ('-r', '--reuse'):
+ origxml = arg
+ elif opt in ('-u', '--update-translation'):
+ operation = 'update'
+ po_to_update = arg
+ elif opt in ('-p', '--po-file'):
+ mofile = ".xml2po.mo"
+ pofile = arg
+ operation = 'merge'
+ if 'translationlanguage' not in options:
+ options['translationlanguage'] = os.path.split(os.path.splitext(pofile)[0])[1]
+ os.system("msgfmt -o %s %s >%s" % (mofile, pofile, NULL_STRING)) and sys.exit(7)
+ elif opt in ('-o', '--output'):
+ output = arg
+ elif opt in ('-v', '--version'):
+ print VERSION
+ sys.exit(0)
+ elif opt in ('-h', '--help'):
+ usage(True)
+ sys.exit(0)
+
+ if operation == 'update' and output != "-":
+ print >> sys.stderr, "Option '-o' is not yet supported when updating translations directly. Ignoring this option."
+
+ # Treat remaining arguments as XML files
+ filenames = []
+ while remaining_args:
+ filenames.append(remaining_args.pop())
+
+ try:
+ xml2po_main = Main(default_mode, operation, output, options)
+ except IOError:
+ print >> sys.stderr, "Error: cannot open file %s for writing." % (output)
+ sys.exit(5)
+
+ if operation == 'merge':
+ if len(filenames) > 1:
+ print >> sys.stderr, "Error: You can merge translations with only one XML file at a time."
+ sys.exit(2)
+
+ if not mofile:
+ print >> sys.stderr, "Error: You must specify MO file when merging translations."
+ sys.exit(3)
+
+ xml2po_main.merge(mofile, filenames[0])
+
+ elif operation == 'update':
+ xml2po_main.update(filenames, po_to_update)
+
+ elif origxml:
+ xml2po_main.reuse(origxml, filenames[0])
+
+ else:
+ # Standard POT producing
+ xml2po_main.to_pot(filenames)
+
+# Main program start
+if __name__ == '__main__':
+ main(sys.argv[1:])
+else:
+ raise NotImplementedError
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]