[sysadmin-bin: 95/168] Add RDF writing capabilities to semi_rdf.py
- From: Andrea Veri <av src gnome org>
- To: gnome-sysadmin gnome org,commits-list gnome org
- Subject: [sysadmin-bin: 95/168] Add RDF writing capabilities to semi_rdf.py
- Date: Thu, 24 May 2012 19:59:26 +0000 (UTC)
commit 3dc8c66ad369aa3f25d0cdbe895c8a37f7681a16
Author: Owen W. Taylor <otaylor fishsoup net>
Date: Fri Apr 24 18:09:27 2009 -0400
Add RDF writing capabilities to semi_rdf.py
Support writing out a set of RDF nodes to an XML/RDF file; we can
use this for munging an RDF file or combining multiple files into
one.
We tag rdf:resource values with the string subclass UrlResource so
we can write them out again that way.
semi_rdf.py | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
1 files changed, 94 insertions(+), 5 deletions(-)
---
diff --git a/semi_rdf.py b/semi_rdf.py
index 5e9dfba..6079f85 100755
--- a/semi_rdf.py
+++ b/semi_rdf.py
@@ -22,14 +22,19 @@
import re
import sys
import xml.sax
+from xml.sax.saxutils import escape, quoteattr
RDF = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
XML = "http://www.w3.org/XML/1998/namespace"
WHITESPACE_RE = re.compile("\s+")
+# Tag strings as urls
+class UrlResource(str):
+ pass
+
class Node:
- def __init__(self, name, about):
+ def __init__(self, name, about=None):
self.name = name
self.about = None
self.properties = []
@@ -45,6 +50,12 @@ class Node:
return value
+ def add_property(self, name, lang, value):
+ self.properties.append((name, lang, value))
+
+ def remove_property(self, name):
+ self.properties = filter(lambda x: x[0] != name, self.properties)
+
class RdfHandler(xml.sax.handler.ContentHandler):
def __init__(self):
self.nodes = []
@@ -81,15 +92,15 @@ class RdfHandler(xml.sax.handler.ContentHandler):
if attrname == (XML, "lang"):
pass
elif attrname == (RDF, "resource"):
- resource = attributes.getValue(attrname)
+ resource = UrlResource(attributes.getValue(attrname))
elif attrname == (RDF, "parseType"):
parseType = attributes.getValue(attrname)
if parseType == "resource":
if node == None:
- node = Node(None, None)
+ node = Node(None)
else:
if node == None:
- node = Node(None, None)
+ node = Node(None)
node.properties.append((attrname, lang, attributes.getValue(attrname)))
self.__property_stack.append((name, lang, resource))
if node is not None:
@@ -97,7 +108,7 @@ class RdfHandler(xml.sax.handler.ContentHandler):
self.__node_stack.append(node)
self.__depth += 1
else:
- node = Node(name, None)
+ node = Node(name)
self.__node_stack.append(node)
for attrname in attributes.getNames():
if attrname == (RDF, "about"):
@@ -148,3 +159,81 @@ def read_rdf(f):
return handler.nodes
+def qualname(name, namespaces):
+ if name[0] is None:
+ return name[1]
+ else:
+ return namespaces[name[0]] + ":" + name[1]
+
+def _dump_node(f, node, lang, namespaces, depth=0):
+ prefix = " " * depth
+ if node.name != None:
+ f.write(prefix)
+ f.write('<%s' % qualname(node.name, namespaces))
+ if node.about is not None:
+ f.write(' rdf:about=%s' % quoteattr(node.about))
+ f.write(">\n")
+
+ for n, l, v in node.properties:
+ f.write(prefix)
+ f.write(' ')
+ f.write('<%s' % qualname(n, namespaces))
+ if l != lang:
+ f.write(' xml:lang=%s' % quoteattr(l))
+ if isinstance(v, UrlResource):
+ f.write(' rdf:resource=%s' % quoteattr(v))
+ elif isinstance(v, Node) and v.name == None:
+ f.write(' rdf:parseType="resource"')
+
+ if isinstance(v, UrlResource):
+ f.write("/>\n")
+ elif isinstance(v, basestring):
+ f.write(">")
+ f.write(escape(v))
+ f.write('</%s>\n' % qualname(n, namespaces))
+ elif v == None:
+ f.write("/>\n");
+ else:
+ f.write(">\n")
+ _dump_node(f, v, l, namespaces, depth+4)
+ f.write(prefix)
+ f.write(' ')
+ f.write('</%s>\n' % qualname(n, namespaces))
+
+ if node.name != None:
+ f.write(prefix)
+ f.write('</%s>\n' % qualname(node.name, namespaces))
+
+def dump_rdf(nodes, f):
+ namespaces = {
+ RDF: 'rdf',
+ XML: 'xml'
+ };
+
+ namespace_count = 0
+ toplevel_nodes = set(nodes)
+
+ for node in nodes:
+ if node.name and node.name[0] is not None and not node.name[0] in namespaces:
+ namespace_count += 1
+ namespaces[node.name[0]] = "ns" + str(namespace_count)
+ for n, l, v in node.properties:
+ if n[0] is not None and not n[0] in namespaces:
+ namespace_count += 1
+ namespaces[n[0]] = "ns" + str(namespace_count)
+ if isinstance(v, Node):
+ toplevel_nodes.remove(v)
+
+ f.write("<?xml version='1.0' encoding='UTF-8'?>\n")
+ f.write('<rdf:RDF')
+ items = sorted(namespaces.items(), lambda x, y: cmp(x[1], y[1]))
+ for url, name in items:
+ if name != 'xml':
+ f.write('\n xmlns:%s="%s"' % (name, url))
+ f.write('>\n')
+
+ for node in toplevel_nodes:
+ _dump_node(f, node, None, namespaces)
+
+ f.write('</rdf:RDF>\n')
+
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]