[libxml2] Python binding for xmlRegisterInputCallback



commit 48da90bc4a3809728d08ee9ee8da21d044fbca82
Author: Alexey Neyman <stilor att net>
Date:   Mon Feb 25 15:54:25 2013 +0800

    Python binding for xmlRegisterInputCallback
    
    It is possible to make xmlIO handle any protocol by means of
    xmlRegisterInputCallback(). However, that function is currently only
    available in C API. So, the natural solution seems to be implementing Python
    bindings for the xmlRegisterInputCallback.
    
    * python/generator.py: skip xmlPopInputCallbacks
    * python/libxml.c python/libxml.py python/libxml_wrap.h: implement the
      wrappers
    * python/tests/input_callback.py python/tests/Makefile.am: also add a test case

 python/generator.py            |    2 +
 python/libxml.c                |   90 ++++++++++++++++++++++++++++
 python/libxml.py               |   26 ++++++++-
 python/libxml_wrap.h           |    2 +
 python/tests/Makefile.am       |    1 +
 python/tests/input_callback.py |  128 ++++++++++++++++++++++++++++++++++++++++
 6 files changed, 248 insertions(+), 1 deletions(-)
---
diff --git a/python/generator.py b/python/generator.py
index 767c4bb..83d100c 100755
--- a/python/generator.py
+++ b/python/generator.py
@@ -339,6 +339,8 @@ def skip_function(name):
         return 1
     if name == "xmlValidateAttributeDecl":
         return 1
+    if name == "xmlPopInputCallbacks":
+       return 1
 
     return 0
 
diff --git a/python/libxml.c b/python/libxml.c
index a556160..9c013a6 100644
--- a/python/libxml.c
+++ b/python/libxml.c
@@ -724,6 +724,94 @@ libxml_xmlSetEntityLoader(ATTRIBUTE_UNUSED PyObject *self, PyObject *args) {
     return(py_retval);
 }
 
+/************************************************************************
+ *                                                                     *
+ *             Input callback registration                             *
+ *                                                                     *
+ ************************************************************************/
+static PyObject *pythonInputOpenCallbackObject;
+static int pythonInputCallbackID = -1;
+
+static int
+pythonInputMatchCallback(ATTRIBUTE_UNUSED const char *URI)
+{
+    /* Always return success, real decision whether URI is supported will be
+     * made in open callback.  */
+    return 1;
+}
+
+static void *
+pythonInputOpenCallback(const char *URI)
+{
+    PyObject *ret;
+
+    ret = PyObject_CallFunction(pythonInputOpenCallbackObject,
+           (char *)"s", URI);
+    if (ret == Py_None) {
+       Py_DECREF(Py_None);
+       return NULL;
+    }
+    return ret;
+}
+
+PyObject *
+libxml_xmlRegisterInputCallback(ATTRIBUTE_UNUSED PyObject *self,
+                                PyObject *args) {
+    PyObject *cb;
+
+    if (!PyArg_ParseTuple(args,
+               (const char *)"O:libxml_xmlRegisterInputCallback", &cb))
+       return(NULL);
+
+    if (!PyCallable_Check(cb)) {
+       PyErr_SetString(PyExc_ValueError, "input callback is not callable");
+       return(NULL);
+    }
+
+    /* Python module registers a single callback and manages the list of
+     * all callbacks internally. This is necessitated by xmlInputMatchCallback
+     * API, which does not allow for passing of data objects to discriminate
+     * different Python methods.  */
+    if (pythonInputCallbackID == -1) {
+       pythonInputCallbackID = xmlRegisterInputCallbacks(
+               pythonInputMatchCallback, pythonInputOpenCallback,
+               xmlPythonFileReadRaw, xmlPythonFileCloseRaw);
+       if (pythonInputCallbackID == -1)
+           return PyErr_NoMemory();
+       pythonInputOpenCallbackObject = cb;
+       Py_INCREF(pythonInputOpenCallbackObject);
+    }
+
+    Py_INCREF(Py_None);
+    return(Py_None);
+}
+
+PyObject *
+libxml_xmlUnregisterInputCallback(ATTRIBUTE_UNUSED PyObject *self,
+                                ATTRIBUTE_UNUSED PyObject *args) {
+    int ret;
+
+    ret = xmlPopInputCallbacks();
+    if (pythonInputCallbackID != -1) {
+       /* Assert that the right input callback was popped. libxml's API does not
+        * allow removal by ID, so all that could be done is an assert.  */
+       if (pythonInputCallbackID == ret) {
+           pythonInputCallbackID = -1;
+           Py_DECREF(pythonInputOpenCallbackObject);
+           pythonInputOpenCallbackObject = NULL;
+       } else {
+           PyErr_SetString(PyExc_AssertionError, "popped non-python input callback");
+           return(NULL);
+       }
+    } else if (ret == -1) {
+       /* No more callbacks to pop */
+       PyErr_SetString(PyExc_IndexError, "no input callbacks to pop");
+       return(NULL);
+    }
+
+    Py_INCREF(Py_None);
+    return(Py_None);
+}
 
 /************************************************************************
  *                                                                     *
@@ -3693,6 +3781,8 @@ static PyMethodDef libxmlMethods[] = {
     {(char *) "getObjDesc", libxml_getObjDesc, METH_VARARGS, NULL},
     {(char *) "compareNodesEqual", libxml_compareNodesEqual, METH_VARARGS, NULL},
     {(char *) "nodeHash", libxml_nodeHash, METH_VARARGS, NULL},
+    {(char *) "xmlRegisterInputCallback", libxml_xmlRegisterInputCallback, METH_VARARGS, NULL},
+    {(char *) "xmlUnregisterInputCallback", libxml_xmlUnregisterInputCallback, METH_VARARGS, NULL},
     {NULL, NULL, 0, NULL}
 };
 
diff --git a/python/libxml.py b/python/libxml.py
index 013d65c..193f97a 100644
--- a/python/libxml.py
+++ b/python/libxml.py
@@ -719,11 +719,35 @@ class xmlTextReaderCore:
             return arg
 
 #
-# The cleanup now goes though a wrappe in libxml.c
+# The cleanup now goes though a wrapper in libxml.c
 #
 def cleanupParser():
     libxml2mod.xmlPythonCleanupParser()
 
+#
+# The interface to xmlRegisterInputCallbacks.
+# Since this API does not allow to pass a data object along with
+# match/open callbacks, it is necessary to maintain a list of all
+# Python callbacks.
+#
+__input_callbacks = []
+def registerInputCallback(func):
+    def findOpenCallback(URI):
+        for cb in reversed(__input_callbacks):
+            o = cb(URI)
+            if o is not None:
+                return o
+    libxml2mod.xmlRegisterInputCallback(findOpenCallback)
+    __input_callbacks.append(func)
+
+def popInputCallbacks():
+    # First pop python-level callbacks, when no more available - start
+    # popping built-in ones.
+    if len(__input_callbacks) > 0:
+        __input_callbacks.pop()
+    if len(__input_callbacks) == 0:
+        libxml2mod.xmlUnregisterInputCallback()
+
 # WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
 #
 # Everything before this line comes from libxml.py 
diff --git a/python/libxml_wrap.h b/python/libxml_wrap.h
index eaa5e96..ac5a626 100644
--- a/python/libxml_wrap.h
+++ b/python/libxml_wrap.h
@@ -247,3 +247,5 @@ PyObject * libxml_xmlSchemaValidCtxtPtrWrap(xmlSchemaValidCtxtPtr valid);
 #endif /* LIBXML_SCHEMAS_ENABLED */
 PyObject * libxml_xmlErrorPtrWrap(xmlErrorPtr error);
 PyObject * libxml_xmlSchemaSetValidErrors(PyObject * self, PyObject * args);
+PyObject * libxml_xmlRegisterInputCallback(PyObject *self, PyObject *args);
+PyObject * libxml_xmlUnregisterInputCallback(PyObject *self, PyObject *args);
diff --git a/python/tests/Makefile.am b/python/tests/Makefile.am
index ab079bb..95ebead 100644
--- a/python/tests/Makefile.am
+++ b/python/tests/Makefile.am
@@ -19,6 +19,7 @@ PYTESTS=      \
     xpath.py   \
     outbuf.py  \
     inbuf.py   \
+    input_callback.py \
     resolver.py \
     regexp.py  \
     reader.py  \
diff --git a/python/tests/input_callback.py b/python/tests/input_callback.py
new file mode 100644
index 0000000..982f940
--- /dev/null
+++ b/python/tests/input_callback.py
@@ -0,0 +1,128 @@
+#!/usr/bin/python -u
+#
+# This tests custom input callbacks
+#
+import sys
+import StringIO
+import libxml2
+
+# We implement a new scheme, py://strings/ that will reference this dictionary
+pystrings = {
+    'catalogs/catalog.xml' :
+'''<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE catalog PUBLIC "-//OASIS//DTD Entity Resolution XML Catalog V1.0//EN" 
"http://www.oasis-open.org/committees/entity/release/1.0/catalog.dtd";>
+<catalog xmlns="urn:oasis:names:tc:entity:xmlns:xml:catalog">
+  <rewriteSystem systemIdStartString="http://example.com/dtds/"; rewritePrefix="../dtds/"/>
+</catalog>''',
+
+    'xml/sample.xml' :
+'''<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE root SYSTEM "http://example.com/dtds/sample.dtd";>
+<root>&sample.entity;</root>''',
+
+    'dtds/sample.dtd' :
+'''
+<!ELEMENT root (#PCDATA)>
+<!ENTITY sample.entity "replacement text">'''
+}
+
+def verify_doc(doc):
+    e = doc.getRootElement()
+    if e.name != 'root':
+        raise ValueError("name")
+    if e.content != 'replacement text':
+        raise ValueError("content")
+
+prefix = "py://strings/"
+def my_input_cb(URI):
+    idx = URI.startswith(prefix)
+    if idx == -1:
+        return None
+    path = URI[len(prefix):]
+    if path not in pystrings:
+        print "my_input_cb: path does not exist, '%s'" % path
+        return None
+    print "my_input_cb: loading '%s'" % URI
+    return StringIO.StringIO(pystrings[path])
+
+opts = libxml2.XML_PARSE_DTDLOAD | libxml2.XML_PARSE_NONET | libxml2.XML_PARSE_COMPACT
+startURL = prefix + "xml/sample.xml"
+catURL = prefix + "catalogs/catalog.xml"
+
+# Check that we cannot read custom schema without custom callback
+print
+print "Test 1: Expecting failure to load (custom scheme not handled)"
+try:
+    doc = libxml2.readFile(startURL, None, opts)
+    print "Read custom scheme without registering handler succeeded?"
+    sys.exit(1)
+except libxml2.treeError, e:
+    pass
+
+# Register handler and try to load the same entity
+print
+print "Test 2: Expecting failure to load (no catalog - cannot load DTD)"
+libxml2.registerInputCallback(my_input_cb)
+doc = libxml2.readFile(startURL, None, opts)
+try:
+    verify_doc(doc)
+    print "Doc was loaded?"
+except ValueError, e:
+    if str(e) != "content":
+        print "Doc verify failed"
+doc.freeDoc()
+
+# Register a catalog (also accessible via pystr://) and retry
+print
+print "Test 3: Expecting successful loading"
+parser = libxml2.createURLParserCtxt(startURL, opts)
+parser.addLocalCatalog(catURL)
+parser.parseDocument()
+doc = parser.doc()
+verify_doc(doc)
+doc.freeDoc()
+
+# Unregister custom callback when parser is already created
+print
+print "Test 4: Expect failure to read (custom callback unregistered during read)"
+parser = libxml2.createURLParserCtxt(startURL, opts)
+libxml2.popInputCallbacks()
+parser.addLocalCatalog(catURL)
+parser.parseDocument()
+doc = parser.doc()
+try:
+    verify_doc(doc)
+    print "Doc was loaded?"
+except ValueError, e:
+    if str(e) != "content":
+        print "Doc verify failed"
+doc.freeDoc()
+
+# Try to load the document again
+print
+print "Test 5: Expect failure to load (callback unregistered)"
+try:
+    doc = libxml2.readFile(startURL, None, opts)
+    print "Read custom scheme without registering handler succeeded?"
+    sys.exit(1)
+except libxml2.treeError, e:
+    pass
+
+# But should be able to read standard I/O yet...
+print
+print "Test 6: Expect successful loading using standard I/O"
+doc = libxml2.readFile("tst.xml", None, opts)
+doc.freeDoc()
+
+# Now pop ALL input callbacks, should fail to load even standard I/O
+print
+print "Test 7: Remove all input callbacks, expect failure to load using standard I/O"
+try:
+    while True:
+        libxml2.popInputCallbacks()
+except IndexError, e:
+    print "Popped all input callbacks: " + str(e)
+try:
+    doc = libxml2.readFile("tst.xml", None, opts)
+except libxml2.treeError, e:
+    pass


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]