diff --git a/python/generator.py b/python/generator.py index 767c4bb..83d100c 100755 --- a/python/generator.py +++ b/python/generator.py @@ -339,6 +339,8 @@ def skip_function(name): return 1 if name == "xmlValidateAttributeDecl": return 1 + if name == "xmlPopInputCallbacks": + return 1 return 0 diff --git a/python/libxml.c b/python/libxml.c index 831b070..8c66fed 100644 --- a/python/libxml.c +++ b/python/libxml.c @@ -731,6 +731,94 @@ libxml_xmlSetEntityLoader(ATTRIBUTE_UNUSED PyObject *self, PyObject *args) { return(py_retval); } +/************************************************************************ + * * + * Input callback registration * + * * + ************************************************************************/ +static PyObject *pythonInputOpenCallbackObject; +static int pythonInputCallbackID = -1; + +static int +pythonInputMatchCallback(ATTRIBUTE_UNUSED const char *URI) +{ + /* Always return success, real decision whether URI is supported will be + * made in open callback. */ + return 1; +} + +static void * +pythonInputOpenCallback(const char *URI) +{ + PyObject *ret; + + ret = PyObject_CallFunction(pythonInputOpenCallbackObject, + (char *)"s", URI); + if (ret == Py_None) { + Py_DECREF(Py_None); + return NULL; + } + return ret; +} + +PyObject * +libxml_xmlRegisterInputCallback(ATTRIBUTE_UNUSED PyObject *self, + PyObject *args) { + PyObject *cb; + + if (!PyArg_ParseTuple(args, + (const char *)"O:libxml_xmlRegisterInputCallback", &cb)) + return(NULL); + + if (!PyCallable_Check(cb)) { + PyErr_SetString(PyExc_ValueError, "input callback is not callable"); + return(NULL); + } + + /* Python module registers a single callback and manages the list of + * all callbacks internally. This is necessitated by xmlInputMatchCallback + * API, which does not allow for passing of data objects to discriminate + * different Python methods. */ + if (pythonInputCallbackID == -1) { + pythonInputCallbackID = xmlRegisterInputCallbacks( + pythonInputMatchCallback, pythonInputOpenCallback, + xmlPythonFileReadRaw, xmlPythonFileCloseRaw); + if (pythonInputCallbackID == -1) + return PyErr_NoMemory(); + pythonInputOpenCallbackObject = cb; + Py_INCREF(pythonInputOpenCallbackObject); + } + + Py_INCREF(Py_None); + return(Py_None); +} + +PyObject * +libxml_xmlUnregisterInputCallback(ATTRIBUTE_UNUSED PyObject *self, + ATTRIBUTE_UNUSED PyObject *args) { + int ret; + + ret = xmlPopInputCallbacks(); + if (pythonInputCallbackID != -1) { + /* Assert that the right input callback was popped. libxml's API does not + * allow removal by ID, so all that could be done is an assert. */ + if (pythonInputCallbackID == ret) { + pythonInputCallbackID = -1; + Py_DECREF(pythonInputOpenCallbackObject); + pythonInputOpenCallbackObject = NULL; + } else { + PyErr_SetString(PyExc_AssertionError, "popped non-python input callback"); + return(NULL); + } + } else if (ret == -1) { + /* No more callbacks to pop */ + PyErr_SetString(PyExc_IndexError, "no input callbacks to pop"); + return(NULL); + } + + Py_INCREF(Py_None); + return(Py_None); +} /************************************************************************ * * @@ -3700,6 +3788,8 @@ static PyMethodDef libxmlMethods[] = { {(char *) "getObjDesc", libxml_getObjDesc, METH_VARARGS, NULL}, {(char *) "compareNodesEqual", libxml_compareNodesEqual, METH_VARARGS, NULL}, {(char *) "nodeHash", libxml_nodeHash, METH_VARARGS, NULL}, + {(char *) "xmlRegisterInputCallback", libxml_xmlRegisterInputCallback, METH_VARARGS, NULL}, + {(char *) "xmlUnregisterInputCallback", libxml_xmlUnregisterInputCallback, METH_VARARGS, NULL}, {NULL, NULL, 0, NULL} }; diff --git a/python/libxml.py b/python/libxml.py index 013d65c..193f97a 100644 --- a/python/libxml.py +++ b/python/libxml.py @@ -719,11 +719,35 @@ class xmlTextReaderCore: return arg # -# The cleanup now goes though a wrappe in libxml.c +# The cleanup now goes though a wrapper in libxml.c # def cleanupParser(): libxml2mod.xmlPythonCleanupParser() +# +# The interface to xmlRegisterInputCallbacks. +# Since this API does not allow to pass a data object along with +# match/open callbacks, it is necessary to maintain a list of all +# Python callbacks. +# +__input_callbacks = [] +def registerInputCallback(func): + def findOpenCallback(URI): + for cb in reversed(__input_callbacks): + o = cb(URI) + if o is not None: + return o + libxml2mod.xmlRegisterInputCallback(findOpenCallback) + __input_callbacks.append(func) + +def popInputCallbacks(): + # First pop python-level callbacks, when no more available - start + # popping built-in ones. + if len(__input_callbacks) > 0: + __input_callbacks.pop() + if len(__input_callbacks) == 0: + libxml2mod.xmlUnregisterInputCallback() + # WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING # # Everything before this line comes from libxml.py diff --git a/python/libxml_wrap.h b/python/libxml_wrap.h index eaa5e96..ac5a626 100644 --- a/python/libxml_wrap.h +++ b/python/libxml_wrap.h @@ -247,3 +247,5 @@ PyObject * libxml_xmlSchemaValidCtxtPtrWrap(xmlSchemaValidCtxtPtr valid); #endif /* LIBXML_SCHEMAS_ENABLED */ PyObject * libxml_xmlErrorPtrWrap(xmlErrorPtr error); PyObject * libxml_xmlSchemaSetValidErrors(PyObject * self, PyObject * args); +PyObject * libxml_xmlRegisterInputCallback(PyObject *self, PyObject *args); +PyObject * libxml_xmlUnregisterInputCallback(PyObject *self, PyObject *args); diff --git a/python/tests/Makefile.am b/python/tests/Makefile.am index ab079bb..95ebead 100644 --- a/python/tests/Makefile.am +++ b/python/tests/Makefile.am @@ -19,6 +19,7 @@ PYTESTS= \ xpath.py \ outbuf.py \ inbuf.py \ + input_callback.py \ resolver.py \ regexp.py \ reader.py \ diff --git a/python/tests/input_callback.py b/python/tests/input_callback.py new file mode 100644 index 0000000..982f940 --- /dev/null +++ b/python/tests/input_callback.py @@ -0,0 +1,128 @@ +#!/usr/bin/python -u +# +# This tests custom input callbacks +# +import sys +import StringIO +import libxml2 + +# We implement a new scheme, py://strings/ that will reference this dictionary +pystrings = { + 'catalogs/catalog.xml' : +''' + + + +''', + + 'xml/sample.xml' : +''' + +&sample.entity;''', + + 'dtds/sample.dtd' : +''' + +''' +} + +def verify_doc(doc): + e = doc.getRootElement() + if e.name != 'root': + raise ValueError("name") + if e.content != 'replacement text': + raise ValueError("content") + +prefix = "py://strings/" +def my_input_cb(URI): + idx = URI.startswith(prefix) + if idx == -1: + return None + path = URI[len(prefix):] + if path not in pystrings: + print "my_input_cb: path does not exist, '%s'" % path + return None + print "my_input_cb: loading '%s'" % URI + return StringIO.StringIO(pystrings[path]) + +opts = libxml2.XML_PARSE_DTDLOAD | libxml2.XML_PARSE_NONET | libxml2.XML_PARSE_COMPACT +startURL = prefix + "xml/sample.xml" +catURL = prefix + "catalogs/catalog.xml" + +# Check that we cannot read custom schema without custom callback +print +print "Test 1: Expecting failure to load (custom scheme not handled)" +try: + doc = libxml2.readFile(startURL, None, opts) + print "Read custom scheme without registering handler succeeded?" + sys.exit(1) +except libxml2.treeError, e: + pass + +# Register handler and try to load the same entity +print +print "Test 2: Expecting failure to load (no catalog - cannot load DTD)" +libxml2.registerInputCallback(my_input_cb) +doc = libxml2.readFile(startURL, None, opts) +try: + verify_doc(doc) + print "Doc was loaded?" +except ValueError, e: + if str(e) != "content": + print "Doc verify failed" +doc.freeDoc() + +# Register a catalog (also accessible via pystr://) and retry +print +print "Test 3: Expecting successful loading" +parser = libxml2.createURLParserCtxt(startURL, opts) +parser.addLocalCatalog(catURL) +parser.parseDocument() +doc = parser.doc() +verify_doc(doc) +doc.freeDoc() + +# Unregister custom callback when parser is already created +print +print "Test 4: Expect failure to read (custom callback unregistered during read)" +parser = libxml2.createURLParserCtxt(startURL, opts) +libxml2.popInputCallbacks() +parser.addLocalCatalog(catURL) +parser.parseDocument() +doc = parser.doc() +try: + verify_doc(doc) + print "Doc was loaded?" +except ValueError, e: + if str(e) != "content": + print "Doc verify failed" +doc.freeDoc() + +# Try to load the document again +print +print "Test 5: Expect failure to load (callback unregistered)" +try: + doc = libxml2.readFile(startURL, None, opts) + print "Read custom scheme without registering handler succeeded?" + sys.exit(1) +except libxml2.treeError, e: + pass + +# But should be able to read standard I/O yet... +print +print "Test 6: Expect successful loading using standard I/O" +doc = libxml2.readFile("tst.xml", None, opts) +doc.freeDoc() + +# Now pop ALL input callbacks, should fail to load even standard I/O +print +print "Test 7: Remove all input callbacks, expect failure to load using standard I/O" +try: + while True: + libxml2.popInputCallbacks() +except IndexError, e: + print "Popped all input callbacks: " + str(e) +try: + doc = libxml2.readFile("tst.xml", None, opts) +except libxml2.treeError, e: + pass