[xml] Python XML doc displayer
- From: David Turvene <dturvene comcast net>
- To: xml gnome org
- Subject: [xml] Python XML doc displayer
- Date: Fri, 03 Dec 2004 15:03:47 -0500
I'm learning libxml2 because it seems to be robust and actively
maintained. Also, I like the rich python support. In one of my
learning exercises, I emulate xmlDebugDumpDocument by recursively
descending the document tree. The program is pasted below. I ran it
against several libxml2-2.6.16/doc/*.xml files for testing. My
comments/questions:
1) How does one get the document header properties (e.g. 'version',
'standalone', the DTD refentry) via python?
2) I had a problem testing if a node has a name space. The libxml2.py
code throws an exception when xmlNode::ns() is called and I didn't see a
test ('hasNs'?) for namespace presence.
3) Is there a better way to walk the node tree?
Dave Turvene
------------------------------ snip, snip
-------------------------------------
#!/usr/bin/env python
# XML Document Node pretty printer, see inline __doc__ for more info
# Using: libxml2-2.6.16-2, libxml2-python-2.6.16-2
#
# 041202 Dave Turvene
import os, sys, string
from optparse import OptionParser
import libxml2, libxml2mod
# global offset indicators for pretty printing
prefix = ''
prefixincr = ' '
def DispNode(self):
"""
Display XML node, trying to match the output of 'xmllint --debug'
or xmlDebugDumpDocument
"""
global prefix, prefixincr
print "%s%s" % (prefix, string.upper(self.get_type())),
if (self.type == 'document_xml'):
# Need to recover the version, encoding and standalone
# properties for this document
print "\n%sURL=%s" % (prefix, self.get_name())
elif (self.type == 'element'):
# Get the element
# Hack to test if there is a namespace. This is taken from the
# xmlNode class code, but doesn't throw an exception with no
# namespace
ns = libxml2mod.xmlNodeGetNs(self._o)
if (ns == None):
print "%s" % (self.get_name())
else:
print "%s:%s" % (libxml2.xmlNs(ns).get_name(),self.get_name())
elif (self.type == 'text'):
# simple text node
if (not self.content.isspace()):
print "\n%scontent=%s" % ((prefix+prefixincr),
self.get_content())
else:
print "\n%scontent=" % (prefix+prefixincr)
elif (self.type == 'attribute'):
# attribute node, these are found by get_properties
print self.get_name()
elif (self.type == 'pi'):
print self.get_name()
print "\n%scontent=:%s:" % ((prefix+prefixincr), self.get_content())
elif (self.type == 'dtd'):
# Still need to work on this
print self.get_name(), self.get_content()
else:
print "UNKNOWN TYPE:", self.type
def DispNodeRecurs(self):
"""recursively display properties and children of a node"""
# global pretty printing prefix, and offset increment
global prefix, prefixincr
# increase the pretty printing offset upon entry
prefix += prefixincr;
# display my information
DispNode(self)
# Display properties/attributes for self,
attr = self.get_properties()
while(attr):
DispNodeRecurs(attr)
attr = attr.get_next()
# Display children of self
if self.lsCountNode() > 0:
child = self.get_children()
while child is not None:
DispNodeRecurs(child)
child = child.get_next()
# unwind, so decrease the pretty printing offset
prefix = prefix[len(prefixincr):]
def XmlTreeDisp(argv):
"""Run libxml2 parser on an XML file, then do a depth-first walk
of the node tree. Send the output to stdout, progress/debug messages go
to stderr. There is an option to run debugDumpDocument to a file for
comparison.
"""
# Command line parsing
definputfile = './schema/test.xsd'
parser = OptionParser()
parser.add_option('-i', '--infile',
dest='inxmlfile',
default=definputfile,
help='XML input file, default to ' + definputfile)
parser.add_option('-t', '--test',
dest='testfile',
help='dump xmlDebugDumpDocument to a file for
comparison')
parser.add_option('-d', '--debug',
action='store_true',
dest='debug',
help='enable debugging, such as it is')
(options, args) = parser.parse_args(argv)
if (options.debug):
libxml2.debugMemory(1)
# Parse the input xml document
doc = libxml2.parseFile(options.inxmlfile)
# depth-first walk of the xml tree
print >> sys.stderr, ">> Running pretty printer against ",
options.inxmlfile
DispNodeRecurs(doc)
# If option is set, write xmlDebugDumpDocument for comparison
if (options.testfile != None):
print >> sys.stderr, ">> Running debugDumpDocument, writing to ", \
options.testfile
fout = open(options.testfile, 'w')
doc.debugDumpDocument(fout)
fout.close()
# clean myself up and get outta town
doc.freeDoc()
libxml2.cleanupParser()
if (options.debug) :
if (libxml2.debugMemory(1) != 0):
print >> sys.stderr, "Memory leak %d bytes" %
(libxml2.debugMemory(1))
libxml2.dumpMemory()
else:
print >> sys.stderr, "Memory checks OK"
if __name__ == "__main__":
XmlTreeDisp( sys.argv )
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]