gimp-help-2 r2568 - in branches/xml2po-support: . tools
- From: ulfehlert svn gnome org
- To: svn-commits-list gnome org
- Subject: gimp-help-2 r2568 - in branches/xml2po-support: . tools
- Date: Wed, 1 Oct 2008 18:16:33 +0000 (UTC)
Author: ulfehlert
Date: Wed Oct 1 18:16:33 2008
New Revision: 2568
URL: http://svn.gnome.org/viewvc/gimp-help-2?rev=2568&view=rev
Log:
2008-10-01 Ulf-D. Ehlert <ulfehlert svn gnome org>
* tools/split_xml_multi_lang.py: minor changes including
two minor bugfixes
Modified:
branches/xml2po-support/ChangeLog
branches/xml2po-support/tools/split_xml_multi_lang.py
Modified: branches/xml2po-support/tools/split_xml_multi_lang.py
==============================================================================
--- branches/xml2po-support/tools/split_xml_multi_lang.py (original)
+++ branches/xml2po-support/tools/split_xml_multi_lang.py Wed Oct 1 18:16:33 2008
@@ -39,6 +39,10 @@
logging.basicConfig(level=logging.INFO, format="%(levelname)-8s %(message)s")
Logger = logging.getLogger("splitxml")
+# Constants
+RECURSIVE = True
+NONRECURSIVE = False
+
# these tags are considered NOT FINAL
sections = ('sect1', 'sect2', 'sect3', 'sect4', 'section', 'bibliodiv',
'book', 'part', 'chapter', 'preface', 'legalnotice')
@@ -87,7 +91,7 @@
"""FIXME"""
def __init__(self, node):
- self.logger = logging.getLogger("splitxml.node")
+ self.logger = logging.getLogger("splitxml")
assert isinstance(node, xml.dom.minidom.Node)
self._node = node
@@ -139,7 +143,7 @@
self.dest = {}
self.seqnum = 0
- self.logger = logging.getLogger("splitxml.doc")
+ self.logger = logging.getLogger("splitxml")
self.logger.info("Parsing %s" % filename)
try:
@@ -200,14 +204,22 @@
continue
if child.nodeType != xml.dom.minidom.Node.ELEMENT_NODE:
for lang in self.languages:
- clone = child.cloneNode(False)
+ clone = child.cloneNode(NONRECURSIVE)
self.dest[lang].appendChild(clone)
else:
- if not 'en' in self.get_langs(child):
+ try:
+ self.doc_languages = self.get_langs(child)
+ if not 'en' in self.doc_languages:
+ raise RuntimeError("No English document element")
+ except (AttributeError, RuntimeError):
+ #sys.exit("No English document element")
self.logger.error("No English document element")
sys.exit(74)
+ # Now we know that the document element has a valid "lang"
+ # attribute, and so has every element (via parent nodes).
+ self.logger.debug("languages = %s" % languages)
source = self.vectorize(child)
- clones = self.append_clones(source, self.dest, False)
+ clones = self.append_clones(source, self.dest, NONRECURSIVE)
return self.split(child, source, clones)
# Never reached, since "parse(filename)" catched it...
@@ -221,23 +233,31 @@
TODO: describe the algorithm(?)
"""
self.logger.debug("split(%s)" % (elem.nodeName))
- assert source and dest
+ assert elem.isSameNode(source['en'])
+
+ # keep track of the visited nodes
self.seqnum += 1
for child in elem.childNodes:
# (1) skip this node if we don't need it (e.g. comments)
if self.ignore(child):
- self.logger.debug("ignoring %s %s" % (child.nodeType, child.nodeName))
+ #self.logger.debug("ignoring %s %s" % (child.nodeType, child.nodeName))
+ pass
# (2) append non-empty text nodes to the destination nodes
elif self.text(child):
if child.nodeValue.strip():
# we should never be here, the (parent) node
# should be final then...
- self.logger.warn("TEXT in %s" % elem.nodeName)
+ logger.warn("TEXT in %s: <%s>" % \
+ (elem.nodeName, child.nodeValue.strip()))
+ # XXX: better just skip this text?
for lang in self.languages:
- dest[lang].appendChild(child)
+ dest[lang].appendChild(child.cloneNode(NONRECURSIVE))
+ elif self.logger.isEnabledFor(logging.DEBUG):
+ for lang in self.languages:
+ dest[lang].appendChild(child.cloneNode(NONRECURSIVE))
# (3) skip every non-English element
elif self.skip(child):
@@ -255,13 +275,15 @@
# (4a) append recursively (localized) clones of nodes we don't
# need/want to process any further (para, phrase, etc.)
if self.final(child):
- self.logger.debug("adding cloned final %s" % child.nodeName)
- clones = self.append_clones(copies, dest, True)
+ self.logger.debug("split(%s): adding cloned final %s" % \
+ (elem.nodeName, child.nodeName))
+ clones = self.append_clones(copies, dest, RECURSIVE)
# (4b) append non-recursively (localized) clones of nodes and
# process child recursively (sect[1-4], note, etc.)
else:
- self.logger.debug("adding cloned %s" % child.nodeName)
- clones = self.append_clones(copies, dest, False)
+ self.logger.debug("split(%s): adding cloned %s" % \
+ (elem.nodeName, child.nodeName))
+ clones = self.append_clones(copies, dest, NONRECURSIVE)
self.split(child, copies, clones)
return dest
@@ -320,17 +342,18 @@
Elements with a "seqnum" attribute will be removed from the list.
"""
siblings = []
- this = element
- while this.previousSibling: this = this.previousSibling
- while this:
+ sibl = element
+ # The English element is not necessarily the first one,
+ # so we start with the very first sibling:
+ while sibl.previousSibling: sibl = sibl.previousSibling
+ while sibl:
# TODO: add test for 'lang' attribute here(!?)
- if this.nodeType == element.nodeType \
- and this.nodeName == element.nodeName \
- and not this.isSameNode(element):
- if not (this.hasAttributes() and
- this.attributes.get("seqnum")):
- siblings.append(this)
- this = this.nextSibling
+ if sibl.nodeType == element.nodeType \
+ and sibl.nodeName == element.nodeName \
+ and not sibl.isSameNode(element):
+ if not sibl.getAttribute("seqnum"):
+ siblings.append(sibl)
+ sibl = sibl.nextSibling
return siblings
def append_clones(self, element, parent, recursive):
@@ -342,7 +365,8 @@
clones = dict([(key, element[key].cloneNode(recursive))
for key in element])
for lang in clones:
- clones[lang].removeAttribute("seqnum")
+ if not self.logger.isEnabledFor(logging.DEBUG):
+ clones[lang].removeAttribute("seqnum")
parent[lang].appendChild(clones[lang])
return clones
@@ -371,7 +395,6 @@
elif name in text_final_nodes:
return self.has_nonempty_text(node)
elif name in ('procedure', 'step'):
- self.logger.debug("final(%s): %s" % (name, (self.get_langs(node) == 1)))
return self.get_langs(node) == 1
else:
self.logger.warn("don't know what to do with '%s', assuming final" % name)
@@ -416,20 +439,18 @@
"""Get a list of languages specified by the "lang"
attribute for a given element
- Returnes all languages if element does not have a
- "lang" attribute.
- """
- # get value as string (e.g. "en;de;fr" or "")
- lang_attr = elem.getAttribute("lang")
-
- if lang_attr:
- langs = lang_attr.strip(';').split(';')
- if all:
- return langs
- else:
- return [k for k in langs if k in self.languages]
- else:
- return self.languages
+ If that element does not have a "lang" attribute,
+ the parent nodes will be searched until an attribute
+ is found. (The document/root element is guaranteed to
+ have a "lang" attribute...).
+ """
+ # get value as string (e.g. "en;de;fr")
+ langs = elem.getAttribute("lang")
+ while not langs:
+ elem = elem.parentNode
+ langs = elem.getAttribute("lang")
+ langs = langs.strip(';').split(';')
+ return [lang for lang in langs if lang in self.languages]
################################################################
@@ -446,7 +467,7 @@
# parse command line
usage = "usage: %prog [options] [FILE [DIR]]"
- version = "%prog 0.3 2008-09-06"
+ version = "%prog 0.4 2008-10-01"
cmdline = optparse.OptionParser(usage=usage, version=version)
cmdline.set_defaults(languages= ",".join(languages))
@@ -475,7 +496,7 @@
options.filename = args[0]
if len(args) == 2:
options.destdir = args[1]
- else:
+ elif len(args) > 2:
cmdline.error("too many arguments")
if not options.filename:
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]