[sysadmin-bin: 95/168] Add RDF writing capabilities to semi_rdf.py



commit 3dc8c66ad369aa3f25d0cdbe895c8a37f7681a16
Author: Owen W. Taylor <otaylor fishsoup net>
Date:   Fri Apr 24 18:09:27 2009 -0400

    Add RDF writing capabilities to semi_rdf.py
    
    Support writing out a set of RDF nodes to an XML/RDF file; we can
    use this for munging an RDF file or combining multiple files into
    one.
    
    We tag rdf:resource values with the string subclass UrlResource so
    we can write them out again that way.

 semi_rdf.py |   99 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 files changed, 94 insertions(+), 5 deletions(-)
---
diff --git a/semi_rdf.py b/semi_rdf.py
index 5e9dfba..6079f85 100755
--- a/semi_rdf.py
+++ b/semi_rdf.py
@@ -22,14 +22,19 @@
 import re
 import sys
 import xml.sax
+from xml.sax.saxutils import escape, quoteattr
 
 RDF = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
 XML = "http://www.w3.org/XML/1998/namespace";
 
 WHITESPACE_RE = re.compile("\s+")
 
+# Tag strings as urls
+class UrlResource(str):
+    pass
+
 class Node:
-    def __init__(self, name, about):
+    def __init__(self, name, about=None):
         self.name  = name
         self.about = None
         self.properties = []
@@ -45,6 +50,12 @@ class Node:
 
         return value
 
+    def add_property(self, name, lang, value):
+        self.properties.append((name, lang, value))
+
+    def remove_property(self, name):
+        self.properties = filter(lambda x: x[0] != name, self.properties)
+
 class RdfHandler(xml.sax.handler.ContentHandler):
     def __init__(self):
         self.nodes = []
@@ -81,15 +92,15 @@ class RdfHandler(xml.sax.handler.ContentHandler):
                 if attrname == (XML, "lang"):
                     pass
                 elif attrname == (RDF, "resource"):
-                    resource = attributes.getValue(attrname)
+                    resource = UrlResource(attributes.getValue(attrname))
                 elif attrname == (RDF, "parseType"):
                     parseType = attributes.getValue(attrname)
                     if parseType == "resource":
                         if node == None:
-                            node = Node(None, None)
+                            node = Node(None)
                 else:
                     if node == None:
-                        node = Node(None, None)
+                        node = Node(None)
                     node.properties.append((attrname, lang, attributes.getValue(attrname)))
             self.__property_stack.append((name, lang, resource))
             if node is not None:
@@ -97,7 +108,7 @@ class RdfHandler(xml.sax.handler.ContentHandler):
                 self.__node_stack.append(node)
                 self.__depth += 1
         else:
-            node = Node(name, None)
+            node = Node(name)
             self.__node_stack.append(node)
             for attrname in attributes.getNames():
                 if attrname == (RDF, "about"):
@@ -148,3 +159,81 @@ def read_rdf(f):
 
     return handler.nodes
 
+def qualname(name, namespaces):
+    if name[0] is None:
+        return name[1]
+    else:
+        return namespaces[name[0]] + ":" + name[1]
+
+def _dump_node(f, node, lang, namespaces, depth=0):
+    prefix = " " * depth
+    if node.name != None:
+        f.write(prefix)
+        f.write('<%s' % qualname(node.name, namespaces))
+        if node.about is not None:
+            f.write(' rdf:about=%s' % quoteattr(node.about))
+        f.write(">\n")
+
+    for n, l, v in node.properties:
+        f.write(prefix)
+        f.write('  ')
+        f.write('<%s' % qualname(n, namespaces))
+        if l != lang:
+            f.write(' xml:lang=%s' % quoteattr(l))
+        if isinstance(v, UrlResource):
+            f.write(' rdf:resource=%s' % quoteattr(v))
+        elif isinstance(v, Node) and v.name == None:
+            f.write(' rdf:parseType="resource"')
+
+        if isinstance(v, UrlResource):
+            f.write("/>\n")
+        elif isinstance(v, basestring):
+            f.write(">")
+            f.write(escape(v))
+            f.write('</%s>\n' % qualname(n, namespaces))
+        elif v == None:
+            f.write("/>\n");
+        else:
+            f.write(">\n")
+            _dump_node(f, v, l, namespaces, depth+4)
+            f.write(prefix)
+            f.write('  ')
+            f.write('</%s>\n' % qualname(n, namespaces))
+
+    if node.name != None:
+        f.write(prefix)
+        f.write('</%s>\n' % qualname(node.name, namespaces))
+
+def dump_rdf(nodes, f):
+    namespaces = {
+        RDF: 'rdf',
+        XML: 'xml'
+    };
+
+    namespace_count = 0
+    toplevel_nodes = set(nodes)
+
+    for node in nodes:
+        if node.name and node.name[0] is not None and not node.name[0] in namespaces:
+            namespace_count += 1
+            namespaces[node.name[0]] = "ns" + str(namespace_count)
+        for n, l, v in node.properties:
+            if n[0] is not None and not n[0] in namespaces:
+                namespace_count += 1
+                namespaces[n[0]] = "ns" + str(namespace_count)
+            if isinstance(v, Node):
+                toplevel_nodes.remove(v)
+
+    f.write("<?xml version='1.0' encoding='UTF-8'?>\n")
+    f.write('<rdf:RDF')
+    items = sorted(namespaces.items(), lambda x, y: cmp(x[1], y[1]))
+    for url, name in items:
+        if name != 'xml':
+            f.write('\n    xmlns:%s="%s"' % (name, url))
+    f.write('>\n')
+
+    for node in toplevel_nodes:
+        _dump_node(f, node, None, namespaces)
+
+    f.write('</rdf:RDF>\n')
+



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]