[libxslt] Reorganize fuzzing code



commit 311da8c8864e4f4f838434d769e0644cc02c9da9
Author: Nick Wellnhofer <wellnhofer aevum de>
Date:   Tue Apr 30 14:10:19 2019 +0200

    Reorganize fuzzing code
    
    - Move core fuzzing code into a single file fuzz.c
    - Add tests for fuzz targets
    - Reduce XSLT operation limit

 .travis.yml              |   1 +
 tests/fuzz/.gitignore    |   1 +
 tests/fuzz/Makefile.am   |  16 +-
 tests/fuzz/fuzz.c        | 371 +++++++++++++++++++++++++++++++++++++++++++++++
 tests/fuzz/fuzz.h        |  35 +++++
 tests/fuzz/testTargets.c |  86 +++++++++++
 tests/fuzz/xpath.c       | 208 +-------------------------
 tests/fuzz/xslt.c        | 130 +----------------
 8 files changed, 519 insertions(+), 329 deletions(-)
---
diff --git a/.travis.yml b/.travis.yml
index 45e013a3..172d1e2f 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -26,6 +26,7 @@ script:
     for target in libxslt libexslt xsltproc python tests/plugins; do
         make -j2 -C $target V=1
     done
+    make -j2 -C tests/fuzz testTargets
     make tests | tee test.log
     ! grep -qv '^## Running' test.log
 git:
diff --git a/tests/fuzz/.gitignore b/tests/fuzz/.gitignore
index 15b2878d..7515dea5 100644
--- a/tests/fuzz/.gitignore
+++ b/tests/fuzz/.gitignore
@@ -1,3 +1,4 @@
 /corpus/
+/testTargets
 /xpath
 /xslt
diff --git a/tests/fuzz/Makefile.am b/tests/fuzz/Makefile.am
index afb5e322..522f994d 100644
--- a/tests/fuzz/Makefile.am
+++ b/tests/fuzz/Makefile.am
@@ -2,22 +2,34 @@ LIBXSLT_LIBS = $(top_builddir)/libxslt/libxslt.la \
                $(top_builddir)/libexslt/libexslt.la
 
 EXTRA_PROGRAMS = xpath xslt
+check_PROGRAMS = testTargets
 EXTRA_DIST = xpath.dict xpath.xml xslt.dict xslt.xml seed
 CLEANFILES = $(EXTRA_PROGRAMS)
 AM_CPPFLAGS = -I$(top_srcdir)
 AM_CFLAGS = $(LIBXML_CFLAGS)
-AM_LDFLAGS = -fsanitize=fuzzer
 DEPENDENCIES = $(LIBXSLT_LIBS)
 LDADD = $(LIBXSLT_LIBS) \
         $(LIBGCRYPT_LIBS) $(LIBXML_LIBS) $(EXTRA_LIBS) $(M_LIBS)
 
+xpath_SOURCES = xpath.c fuzz.c
+xpath_LDFLAGS = -fsanitize=fuzzer
+
+xslt_SOURCES = xslt.c fuzz.c
+xslt_LDFLAGS = -fsanitize=fuzzer
+
+testTargets_SOURCES = testTargets.c fuzz.c
+
 $(top_builddir)/libxslt/libxslt.la:
        cd $(top_builddir)/libxslt && $(MAKE) libxslt.la
 
 $(top_builddir)/libexslt/libexslt.la: $(top_builddir)/libxslt/libxslt.la
        cd $(top_builddir)/libexslt && $(MAKE) libexslt.la
 
-.PHONY: fuzz-xpath fuzz-xslt
+.PHONY: tests fuzz-xpath fuzz-xslt
+
+tests: $(check_PROGRAMS)
+       @echo '## Running fuzz target tests'
+       @./testTargets $(srcdir)
 
 fuzz-xpath: xpath$(EXEEXT)
        @mkdir -p corpus/xpath
diff --git a/tests/fuzz/fuzz.c b/tests/fuzz/fuzz.c
new file mode 100644
index 00000000..0ef89db5
--- /dev/null
+++ b/tests/fuzz/fuzz.c
@@ -0,0 +1,371 @@
+/*
+ * fuzz.c: Fuzz targets for libxslt
+ *
+ * See Copyright for the status of this software.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "fuzz.h"
+
+#include <libxml/tree.h>
+#include <libxml/parser.h>
+#include <libxml/xpath.h>
+#include <libxml/xpathInternals.h>
+#include <libxslt/extensions.h>
+#include <libxslt/functions.h>
+#include <libxslt/security.h>
+#include <libxslt/transform.h>
+#include <libxslt/xslt.h>
+#include <libxslt/xsltInternals.h>
+#include <libxslt/xsltutils.h>
+#include <libexslt/exslt.h>
+
+#if defined(_WIN32)
+  #define DIR_SEP '\\'
+#else
+  #define DIR_SEP '/'
+#endif
+
+static xmlDocPtr doc;
+static xsltSecurityPrefsPtr sec;
+static xsltTransformContextPtr tctxt;
+static xmlHashTablePtr saxonExtHash;
+
+static void
+xsltFuzzErrorFunc(void *ctx ATTRIBUTE_UNUSED, const char *msg ATTRIBUTE_UNUSED,
+                  ...) {
+}
+
+static void
+xsltFuzzInit(void) {
+    /* Init libxml2, libxslt and libexslt */
+    xmlInitParser();
+    xmlXPathInit();
+    xsltInit();
+    exsltRegisterAll();
+
+    /* Suppress error messages */
+    xmlSetGenericErrorFunc(NULL, xsltFuzzErrorFunc);
+    xsltSetGenericErrorFunc(NULL, xsltFuzzErrorFunc);
+
+    /* Disallow I/O */
+    sec = xsltNewSecurityPrefs();
+    xsltSetSecurityPrefs(sec, XSLT_SECPREF_READ_FILE, xsltSecurityForbid);
+    xsltSetSecurityPrefs(sec, XSLT_SECPREF_WRITE_FILE, xsltSecurityForbid);
+    xsltSetSecurityPrefs(sec, XSLT_SECPREF_CREATE_DIRECTORY, xsltSecurityForbid);
+    xsltSetSecurityPrefs(sec, XSLT_SECPREF_READ_NETWORK, xsltSecurityForbid);
+    xsltSetSecurityPrefs(sec, XSLT_SECPREF_WRITE_NETWORK, xsltSecurityForbid);
+}
+
+static xmlDocPtr
+xsltFuzzLoadDoc(const char *argv0, const char *dir, const char *filename) {
+    char *path;
+
+    if (dir != NULL) {
+        path = malloc(strlen(dir) + 1 + strlen(filename) + 1);
+        sprintf(path, "%s/%s", dir, filename);
+    } else {
+        const char *end;
+        size_t dirLen;
+
+        end = strrchr(argv0, DIR_SEP);
+        dirLen = (end == NULL) ? 0 : end - argv0 + 1;
+        path = malloc(dirLen + strlen(filename) + 1);
+        memcpy(path, argv0, dirLen);
+        path[dirLen] = '\0';
+        strcat(path, filename);
+    }
+
+    doc = xmlReadFile(path, NULL, 0);
+    if (doc == NULL)
+        fprintf(stderr, "Error: unable to parse file '%s'\n", path);
+    free(path);
+
+    return doc;
+}
+
+/* XPath fuzzer
+ *
+ * This fuzz target parses and evaluates XPath expressions in an (E)XSLT
+ * context using a static XML document. It heavily exercises the libxml2
+ * XPath engine (xpath.c), a few other parts of libxml2, and most of
+ * libexslt.
+ *
+ * Some EXSLT functions need the transform context to create RVTs for
+ * node-sets. A couple of functions also access the stylesheet. The
+ * XPath context from the transform context is used to parse and
+ * evaluate expressions.
+ *
+ * All these objects are created once at startup. After fuzzing each input,
+ * they're reset as cheaply as possible.
+ *
+ * TODO
+ *
+ * - Some expressions can create lots of temporary node sets (RVTs) which
+ *   aren't freed until the whole expression was evaluated, leading to
+ *   extensive memory usage. Cleaning them up earlier would require
+ *   callbacks from the XPath engine, for example after evaluating a
+ *   predicate expression, which doesn't seem feasible. Terminating the
+ *   evaluation after creating a certain number of RVTs is a simple
+ *   workaround.
+ * - Register a custom xsl:decimal-format declaration for format-number().
+ * - Some functions add strings to the stylesheet or transform context
+ *   dictionary, for example via xsltGetQName, requiring a clean up of the
+ *   dicts after fuzzing each input. This behavior seems questionable.
+ *   Extension functions shouldn't needlessly modify the transform context
+ *   or stylesheet.
+ * - Register xsl:keys and fuzz the key() function.
+ * - Add a few custom func:functions.
+ * - Fuzz the document() function with external documents.
+ */
+
+int
+xsltFuzzXPathInit(int *argc_p ATTRIBUTE_UNUSED, char ***argv_p,
+                  const char *dir) {
+    const char *xmlFilename = "xpath.xml";
+    xsltStylesheetPtr style;
+    xmlXPathContextPtr xpctxt;
+
+    xsltFuzzInit();
+
+    /* Load XML document */
+    doc = xsltFuzzLoadDoc((*argv_p)[0], dir, xmlFilename);
+    if (doc == NULL)
+        return -1;
+
+    style = xsltNewStylesheet();
+    tctxt = xsltNewTransformContext(style, doc);
+    xsltSetCtxtSecurityPrefs(sec, tctxt);
+
+    /*
+     * Some extension functions need the current instruction.
+     *
+     * - format-number() for namespaces.
+     * - document() for the base URL.
+     * - maybe others?
+     *
+     * For fuzzing, it's enough to use the source document's root element.
+     */
+    tctxt->inst = xmlDocGetRootElement(doc);
+
+    saxonExtHash = (xmlHashTablePtr)
+        xsltStyleGetExtData(style, SAXON_NAMESPACE);
+
+    /* Set up XPath context */
+    xpctxt = tctxt->xpathCtxt;
+
+    /* Resource limits to avoid timeouts and call stack overflows */
+    xpctxt->maxParserDepth = 15;
+    xpctxt->maxDepth = 100;
+    xpctxt->opLimit = 500000;
+
+    /* Test namespaces used in xpath.xml */
+    xmlXPathRegisterNs(xpctxt, BAD_CAST "a", BAD_CAST "a");
+    xmlXPathRegisterNs(xpctxt, BAD_CAST "b", BAD_CAST "b");
+    xmlXPathRegisterNs(xpctxt, BAD_CAST "c", BAD_CAST "c");
+
+    /* EXSLT namespaces */
+    xmlXPathRegisterNs(xpctxt, BAD_CAST "crypto", EXSLT_CRYPTO_NAMESPACE);
+    xmlXPathRegisterNs(xpctxt, BAD_CAST "date", EXSLT_DATE_NAMESPACE);
+    xmlXPathRegisterNs(xpctxt, BAD_CAST "dyn", EXSLT_DYNAMIC_NAMESPACE);
+    xmlXPathRegisterNs(xpctxt, BAD_CAST "exsl", EXSLT_COMMON_NAMESPACE);
+    xmlXPathRegisterNs(xpctxt, BAD_CAST "math", EXSLT_MATH_NAMESPACE);
+    xmlXPathRegisterNs(xpctxt, BAD_CAST "saxon", SAXON_NAMESPACE);
+    xmlXPathRegisterNs(xpctxt, BAD_CAST "set", EXSLT_SETS_NAMESPACE);
+    xmlXPathRegisterNs(xpctxt, BAD_CAST "str", EXSLT_STRINGS_NAMESPACE);
+
+    /* Register variables */
+    xmlXPathRegisterVariable(xpctxt, BAD_CAST "f", xmlXPathNewFloat(-1.5));
+    xmlXPathRegisterVariable(xpctxt, BAD_CAST "b", xmlXPathNewBoolean(1));
+    xmlXPathRegisterVariable(xpctxt, BAD_CAST "s",
+                             xmlXPathNewString(BAD_CAST "var"));
+    xmlXPathRegisterVariable(
+            xpctxt, BAD_CAST "n",
+            xmlXPathEval(BAD_CAST "//node() | /*/*/namespace::*", xpctxt));
+
+    return 0;
+}
+
+xmlXPathObjectPtr
+xsltFuzzXPath(const char *data, size_t size) {
+    xmlXPathContextPtr xpctxt = tctxt->xpathCtxt;
+    xmlChar *xpathExpr;
+
+    /* Null-terminate */
+    xpathExpr = malloc(size + 1);
+    memcpy(xpathExpr, data, size);
+    xpathExpr[size] = 0;
+
+    /*
+     * format-number() can still cause memory errors with invalid UTF-8 in
+     * prefixes or suffixes. This shouldn't be exploitable in practice, but
+     * should be fixed. Check UTF-8 validity for now.
+     */
+    if (xmlCheckUTF8(xpathExpr) == 0) {
+        free(xpathExpr);
+        return NULL;
+    }
+
+    /* Compile and return early if the expression is invalid */
+    xmlXPathCompExprPtr compExpr = xmlXPathCtxtCompile(xpctxt, xpathExpr);
+    free(xpathExpr);
+    if (compExpr == NULL)
+        return NULL;
+
+    /* Initialize XPath evaluation context and evaluate */
+    xpctxt->node = (xmlNodePtr) doc; /* Maybe test different context nodes? */
+    xpctxt->contextSize = 1;
+    xpctxt->proximityPosition = 1;
+    xpctxt->opCount = 0;
+    xmlXPathObjectPtr xpathObj = xmlXPathCompiledEval(compExpr, xpctxt);
+    xmlXPathFreeCompExpr(compExpr);
+
+    /* Clean object cache */
+    xmlXPathContextSetCache(xpctxt, 0, 0, 0);
+    xmlXPathContextSetCache(xpctxt, 1, -1, 0);
+
+    /* Clean dictionaries */
+    if (xmlDictSize(tctxt->dict) > 0) {
+        xmlDictFree(tctxt->dict);
+        xmlDictFree(tctxt->style->dict);
+        tctxt->style->dict = xmlDictCreate();
+        tctxt->dict = xmlDictCreateSub(tctxt->style->dict);
+    }
+
+    /* Clean saxon:expression cache */
+    if (xmlHashSize(saxonExtHash) > 0) {
+        /* There doesn't seem to be a cheaper way with the public API. */
+        xsltShutdownCtxtExts(tctxt);
+        xsltInitCtxtExts(tctxt);
+        saxonExtHash = (xmlHashTablePtr)
+            xsltStyleGetExtData(tctxt->style, SAXON_NAMESPACE);
+    }
+
+    return xpathObj;
+}
+
+void
+xsltFuzzXPathFreeObject(xmlXPathObjectPtr obj) {
+    xmlXPathFreeObject(obj);
+
+    /* Some XSLT extension functions create RVTs. */
+    xsltFreeRVTs(tctxt);
+}
+
+void
+xsltFuzzXPathCleanup(void) {
+    xsltStylesheetPtr style = tctxt->style;
+
+    xmlXPathRegisteredNsCleanup(tctxt->xpathCtxt);
+    xsltFreeSecurityPrefs(sec);
+    sec = NULL;
+    xsltFreeTransformContext(tctxt);
+    tctxt = NULL;
+    xsltFreeStylesheet(style);
+    style = NULL;
+    xmlFreeDoc(doc);
+    doc = NULL;
+}
+
+/*
+ * XSLT fuzzer
+ *
+ * This is a rather naive fuzz target using a static XML document.
+ *
+ * TODO
+ *
+ * - Improve seed corpus
+ * - Mutate multiple input documents: source, xsl:import, xsl:include
+ * - format-number() with xsl:decimal-format
+ * - Better coverage for xsl:key and key() function
+ * - EXSLT func:function
+ * - xsl:document
+ */
+
+int
+xsltFuzzXsltInit(int *argc_p ATTRIBUTE_UNUSED, char ***argv_p,
+                 const char *dir) {
+    const char *xmlFilename = "xslt.xml";
+
+    xsltFuzzInit();
+
+    /* Load XML document */
+    doc = xsltFuzzLoadDoc((*argv_p)[0], dir, xmlFilename);
+    if (doc == NULL)
+        return -1;
+
+    return 0;
+}
+
+static void
+xsltSetXPathResourceLimits(xmlXPathContextPtr ctxt) {
+    ctxt->maxParserDepth = 15;
+    ctxt->maxDepth = 100;
+    ctxt->opLimit = 100000;
+}
+
+xmlChar *
+xsltFuzzXslt(const char *data, size_t size) {
+    xmlDocPtr xsltDoc;
+    xmlDocPtr result;
+    xmlNodePtr xsltRoot;
+    xsltStylesheetPtr sheet;
+    xsltTransformContextPtr ctxt;
+    xmlChar *ret = NULL;
+    int retLen;
+
+    xsltDoc = xmlReadMemory(data, size, NULL, NULL, 0);
+    if (xsltDoc == NULL)
+        return NULL;
+    xsltRoot = xmlDocGetRootElement(xsltDoc);
+    xmlNewNs(xsltRoot, EXSLT_COMMON_NAMESPACE, BAD_CAST "exsl");
+    xmlNewNs(xsltRoot, EXSLT_COMMON_NAMESPACE, BAD_CAST "exslt");
+    xmlNewNs(xsltRoot, EXSLT_CRYPTO_NAMESPACE, BAD_CAST "crypto");
+    xmlNewNs(xsltRoot, EXSLT_DATE_NAMESPACE, BAD_CAST "date");
+    xmlNewNs(xsltRoot, EXSLT_DYNAMIC_NAMESPACE, BAD_CAST "dyn");
+    xmlNewNs(xsltRoot, EXSLT_MATH_NAMESPACE, BAD_CAST "math");
+    xmlNewNs(xsltRoot, EXSLT_SETS_NAMESPACE, BAD_CAST "set");
+    xmlNewNs(xsltRoot, EXSLT_STRINGS_NAMESPACE, BAD_CAST "str");
+    xmlNewNs(xsltRoot, SAXON_NAMESPACE, BAD_CAST "saxon");
+
+    sheet = xsltNewStylesheet();
+    if (sheet == NULL) {
+        xmlFreeDoc(xsltDoc);
+        return NULL;
+    }
+    xsltSetXPathResourceLimits(sheet->xpathCtxt);
+    sheet->xpathCtxt->opCount = 0;
+    if (xsltParseStylesheetUser(sheet, xsltDoc) != 0) {
+        xsltFreeStylesheet(sheet);
+        xmlFreeDoc(xsltDoc);
+        return NULL;
+    }
+
+    ctxt = xsltNewTransformContext(sheet, doc);
+    xsltSetCtxtSecurityPrefs(sec, ctxt);
+    ctxt->maxTemplateDepth = 100;
+    ctxt->opLimit = 20000;
+    xsltSetXPathResourceLimits(ctxt->xpathCtxt);
+    ctxt->xpathCtxt->opCount = sheet->xpathCtxt->opCount;
+
+    result = xsltApplyStylesheetUser(sheet, doc, NULL, NULL, NULL, ctxt);
+    if (result != NULL)
+        xsltSaveResultToString(&ret, &retLen, result, sheet);
+
+    xmlFreeDoc(result);
+    xsltFreeTransformContext(ctxt);
+    xsltFreeStylesheet(sheet);
+
+    return ret;
+}
+
+void
+xsltFuzzXsltCleanup(void) {
+    xsltFreeSecurityPrefs(sec);
+    sec = NULL;
+    xmlFreeDoc(doc);
+    doc = NULL;
+}
diff --git a/tests/fuzz/fuzz.h b/tests/fuzz/fuzz.h
new file mode 100644
index 00000000..7dff3dbe
--- /dev/null
+++ b/tests/fuzz/fuzz.h
@@ -0,0 +1,35 @@
+/*
+ * xpath.h: Header for fuzz targets
+ *
+ * See Copyright for the status of this software.
+ */
+
+#ifndef __XML_XSLT_TESTS_FUZZ_H__
+#define __XML_XSLT_TESTS_FUZZ_H__
+
+#include <stddef.h>
+#include <libxml/xmlstring.h>
+#include <libxml/xpath.h>
+
+int
+xsltFuzzXPathInit(int *argc_p, char ***argv_p, const char *dir);
+
+xmlXPathObjectPtr
+xsltFuzzXPath(const char *data, size_t size);
+
+void
+xsltFuzzXPathFreeObject(xmlXPathObjectPtr obj);
+
+void
+xsltFuzzXPathCleanup(void);
+
+int
+xsltFuzzXsltInit(int *argc_p, char ***argv_p, const char *dir);
+
+xmlChar *
+xsltFuzzXslt(const char *data, size_t size);
+
+void
+xsltFuzzXsltCleanup(void);
+
+#endif
diff --git a/tests/fuzz/testTargets.c b/tests/fuzz/testTargets.c
new file mode 100644
index 00000000..114304bd
--- /dev/null
+++ b/tests/fuzz/testTargets.c
@@ -0,0 +1,86 @@
+/*
+ * testTargets.c: Test the fuzz targets
+ *
+ * See Copyright for the status of this software.
+ */
+
+#include <stdio.h>
+
+#include "fuzz.h"
+#include <libxml/globals.h>
+
+int
+testXPath(int argc, char **argv) {
+    xmlXPathObjectPtr obj;
+    const char expr[] = "count(//node())";
+    int ret = 0;
+
+    if (xsltFuzzXPathInit(&argc, &argv, argv[1]) != 0) {
+        xsltFuzzXPathCleanup();
+        return 1;
+    }
+
+    obj = xsltFuzzXPath(expr, sizeof(expr) - 1);
+    if ((obj == NULL) || (obj->type != XPATH_NUMBER)) {
+        fprintf(stderr, "Expression doesn't evaluate to number\n");
+        ret = 1;
+    } else if (obj->floatval != 39.0) {
+        fprintf(stderr, "Expression returned %f, expected %f\n",
+                obj->floatval, 39.0);
+        ret = 1;
+    }
+
+    xsltFuzzXPathFreeObject(obj);
+    xsltFuzzXPathCleanup();
+
+    return ret;
+}
+
+int
+testXslt(int argc, char **argv) {
+    xmlChar *result;
+    const char styleBuf[] =
+        "<xsl:stylesheet"
+        " xmlns:xsl='http://www.w3.org/1999/XSL/Transform'"
+        " version='1.0'"
+        " extension-element-prefixes='"
+        "  exsl exslt crypto date dyn math set str saxon"
+        "'>\n"
+        "<xsl:output omit-xml-declaration='yes'/>\n"
+        "<xsl:template match='/'>\n"
+        " <r><xsl:value-of select='count(//node())'/></r>\n"
+        "</xsl:template>\n"
+        "</xsl:stylesheet>\n";
+    int ret = 0;
+
+    if (xsltFuzzXsltInit(&argc, &argv, argv[1]) != 0) {
+        xsltFuzzXsltCleanup();
+        return 1;
+    }
+
+    result = xsltFuzzXslt(styleBuf, sizeof(styleBuf) - 1);
+    if (result == NULL) {
+        fprintf(stderr, "Result is NULL\n");
+        ret = 1;
+    } else if (xmlStrcmp(result, BAD_CAST "<r>39</r>\n") != 0) {
+        fprintf(stderr, "Stylesheet returned\n%sexpected \n%s\n",
+                result, "<r>39</r>");
+        ret = 1;
+    }
+
+    xmlFree(result);
+    xsltFuzzXsltCleanup();
+
+    return ret;
+}
+
+int main(int argc, char **argv) {
+    int ret = 0;
+
+    if (testXPath(argc, argv) != 0)
+        ret = 1;
+    if (testXslt(argc, argv) != 0)
+        ret = 1;
+
+    return ret;
+}
diff --git a/tests/fuzz/xpath.c b/tests/fuzz/xpath.c
index 9bbfe26c..475cb073 100644
--- a/tests/fuzz/xpath.c
+++ b/tests/fuzz/xpath.c
@@ -2,217 +2,19 @@
  * xpath.c: libFuzzer target for XPath expressions
  *
  * See Copyright for the status of this software.
- *
- * This fuzz target parses and evaluates XPath expressions in an (E)XSLT
- * context using a static XML document. It heavily exercises the libxml2
- * XPath engine (xpath.c), a few other parts of libxml2, and most of
- * libexslt.
- *
- * Some EXSLT functions need the transform context to create RVTs for
- * node-sets. A couple of functions also access the stylesheet. The
- * XPath context from the transform context is used to parse and
- * evaluate expressions.
- *
- * All these objects are created once at startup. After fuzzing each input,
- * they're reset as cheaply as possible.
- *
- * TODO
- *
- * - Some expressions can create lots of temporary node sets (RVTs) which
- *   aren't freed until the whole expression was evaluated, leading to
- *   extensive memory usage. Cleaning them up earlier would require
- *   callbacks from the XPath engine, for example after evaluating a
- *   predicate expression, which doesn't seem feasible. Terminating the
- *   evaluation after creating a certain number of RVTs is a simple
- *   workaround.
- * - Register a custom xsl:decimal-format declaration for format-number().
- * - Some functions add strings to the stylesheet or transform context
- *   dictionary, for example via xsltGetQName, requiring a clean up of the
- *   dicts after fuzzing each input. This behavior seems questionable.
- *   Extension functions shouldn't needlessly modify the transform context
- *   or stylesheet.
- * - Register xsl:keys and fuzz the key() function.
- * - Add a few custom func:functions.
- * - Fuzz the document() function with external documents.
  */
 
-#include <libgen.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-
-#include <libxml/tree.h>
-#include <libxml/parser.h>
-#include <libxml/xpath.h>
-#include <libxml/xpathInternals.h>
-#include <libxslt/extensions.h>
-#include <libxslt/functions.h>
-#include <libxslt/security.h>
-#include <libxslt/transform.h>
-#include <libxslt/xsltutils.h>
-#include <libexslt/exslt.h>
-
-static xmlDocPtr doc;
-static xsltTransformContextPtr tctxt;
-static xmlHashTablePtr saxonExtHash;
-
-static void
-xmlFuzzErrorFunc(void *ctx ATTRIBUTE_UNUSED, const char *msg ATTRIBUTE_UNUSED,
-                 ...) {
-}
+#include "fuzz.h"
 
 int
-LLVMFuzzerInitialize(int *argc_p ATTRIBUTE_UNUSED,
-                     char ***argv_p ATTRIBUTE_UNUSED) {
-    const char *xmlFilename = "xpath.xml";
-    const char *dir;
-    char *argv0;
-    char *xmlPath;
-    xsltSecurityPrefsPtr sec;
-    xsltStylesheetPtr style;
-    xmlXPathContextPtr xpctxt;
-
-    /* Init libxml2 and libexslt */
-    xmlInitParser();
-    xmlXPathInit();
-    exsltRegisterAll();
-
-    /* Load XML document */
-    argv0 = strdup((*argv_p)[0]);
-    dir = dirname(argv0);
-    xmlPath = malloc(strlen(dir) + 1 + strlen(xmlFilename) + 1);
-    sprintf(xmlPath, "%s/%s", dir, xmlFilename);
-    doc = xmlReadFile(xmlPath, NULL, 0);
-    free(xmlPath);
-    free(argv0);
-    if (doc == NULL) {
-        fprintf(stderr, "Error: unable to parse file \"%s\"\n", xmlPath);
-        return -1;
-    }
-
-    /* Suppress error messages */
-    xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc);
-    xsltSetGenericErrorFunc(NULL, xmlFuzzErrorFunc);
-
-    style = xsltNewStylesheet();
-    tctxt = xsltNewTransformContext(style, doc);
-
-    /* Disallow I/O */
-    sec = xsltNewSecurityPrefs();
-    xsltSetSecurityPrefs(sec, XSLT_SECPREF_READ_FILE, xsltSecurityForbid);
-    xsltSetSecurityPrefs(sec, XSLT_SECPREF_WRITE_FILE, xsltSecurityForbid);
-    xsltSetSecurityPrefs(sec, XSLT_SECPREF_CREATE_DIRECTORY, xsltSecurityForbid);
-    xsltSetSecurityPrefs(sec, XSLT_SECPREF_READ_NETWORK, xsltSecurityForbid);
-    xsltSetSecurityPrefs(sec, XSLT_SECPREF_WRITE_NETWORK, xsltSecurityForbid);
-    xsltSetCtxtSecurityPrefs(sec, tctxt);
-
-    /*
-     * Some extension functions need the current instruction.
-     *
-     * - format-number() for namespaces.
-     * - document() for the base URL.
-     * - maybe others?
-     *
-     * For fuzzing, it's enough to use the source document's root element.
-     */
-    tctxt->inst = xmlDocGetRootElement(doc);
-
-    saxonExtHash = (xmlHashTablePtr)
-        xsltStyleGetExtData(style, SAXON_NAMESPACE);
-
-    /* Set up XPath context */
-    xpctxt = tctxt->xpathCtxt;
-
-    /* Resource limits to avoid timeouts and call stack overflows */
-    xpctxt->maxParserDepth = 15;
-    xpctxt->maxDepth = 100;
-    xpctxt->opLimit = 500000;
-
-    /* Test namespaces used in xpath.xml */
-    xmlXPathRegisterNs(xpctxt, BAD_CAST "a", BAD_CAST "a");
-    xmlXPathRegisterNs(xpctxt, BAD_CAST "b", BAD_CAST "b");
-    xmlXPathRegisterNs(xpctxt, BAD_CAST "c", BAD_CAST "c");
-
-    /* EXSLT namespaces */
-    xmlXPathRegisterNs(xpctxt, BAD_CAST "crypto", EXSLT_CRYPTO_NAMESPACE);
-    xmlXPathRegisterNs(xpctxt, BAD_CAST "date", EXSLT_DATE_NAMESPACE);
-    xmlXPathRegisterNs(xpctxt, BAD_CAST "dyn", EXSLT_DYNAMIC_NAMESPACE);
-    xmlXPathRegisterNs(xpctxt, BAD_CAST "exsl", EXSLT_COMMON_NAMESPACE);
-    xmlXPathRegisterNs(xpctxt, BAD_CAST "math", EXSLT_MATH_NAMESPACE);
-    xmlXPathRegisterNs(xpctxt, BAD_CAST "saxon", SAXON_NAMESPACE);
-    xmlXPathRegisterNs(xpctxt, BAD_CAST "set", EXSLT_SETS_NAMESPACE);
-    xmlXPathRegisterNs(xpctxt, BAD_CAST "str", EXSLT_STRINGS_NAMESPACE);
-
-    /* Register variables */
-    xmlXPathRegisterVariable(xpctxt, BAD_CAST "f", xmlXPathNewFloat(-1.5));
-    xmlXPathRegisterVariable(xpctxt, BAD_CAST "b", xmlXPathNewBoolean(1));
-    xmlXPathRegisterVariable(xpctxt, BAD_CAST "s",
-                             xmlXPathNewString(BAD_CAST "var"));
-    xmlXPathRegisterVariable(
-            xpctxt, BAD_CAST "n",
-            xmlXPathEval(BAD_CAST "//node() | /*/*/namespace::*", xpctxt));
-
-    return 0;
+LLVMFuzzerInitialize(int *argc_p, char ***argv_p) {
+    return xsltFuzzXPathInit(argc_p, argv_p, NULL);
 }
 
 int
 LLVMFuzzerTestOneInput(const char *data, size_t size) {
-    xmlXPathContextPtr xpctxt = tctxt->xpathCtxt;
-    xmlChar *xpathExpr;
-
-    /* Null-terminate */
-    xpathExpr = malloc(size + 1);
-    memcpy(xpathExpr, data, size);
-    xpathExpr[size] = 0;
-
-    /*
-     * format-number() can still cause memory errors with invalid UTF-8 in
-     * prefixes or suffixes. This shouldn't be exploitable in practice, but
-     * should be fixed. Check UTF-8 validity for now.
-     */
-    if (xmlCheckUTF8(xpathExpr) == 0) {
-        free(xpathExpr);
-        return 0;
-    }
-
-    /* Compile and return early if the expression is invalid */
-    xmlXPathCompExprPtr compExpr = xmlXPathCtxtCompile(xpctxt, xpathExpr);
-    free(xpathExpr);
-    if (compExpr == NULL)
-        return 0;
-
-    /* Initialize XPath evaluation context and evaluate */
-    xpctxt->node = (xmlNodePtr) doc; /* Maybe test different context nodes? */
-    xpctxt->contextSize = 1;
-    xpctxt->proximityPosition = 1;
-    xpctxt->opCount = 0;
-    xmlXPathObjectPtr xpathObj = xmlXPathCompiledEval(compExpr, xpctxt);
-    xmlXPathFreeObject(xpathObj);
-    xmlXPathFreeCompExpr(compExpr);
-
-    /* Some XSLT extension functions create RVTs. */
-    xsltFreeRVTs(tctxt);
-
-    /* Clean object cache */
-    xmlXPathContextSetCache(xpctxt, 0, 0, 0);
-    xmlXPathContextSetCache(xpctxt, 1, -1, 0);
-
-    /* Clean dictionaries */
-    if (xmlDictSize(tctxt->dict) > 0) {
-        xmlDictFree(tctxt->dict);
-        xmlDictFree(tctxt->style->dict);
-        tctxt->style->dict = xmlDictCreate();
-        tctxt->dict = xmlDictCreateSub(tctxt->style->dict);
-    }
-
-    /* Clean saxon:expression cache */
-    if (xmlHashSize(saxonExtHash) > 0) {
-        /* There doesn't seem to be a cheaper way with the public API. */
-        xsltShutdownCtxtExts(tctxt);
-        xsltInitCtxtExts(tctxt);
-        saxonExtHash = (xmlHashTablePtr)
-            xsltStyleGetExtData(tctxt->style, SAXON_NAMESPACE);
-    }
+    xmlXPathObjectPtr xpathObj = xsltFuzzXPath(data, size);
+    xsltFuzzXPathFreeObject(xpathObj);
 
     return 0;
 }
diff --git a/tests/fuzz/xslt.c b/tests/fuzz/xslt.c
index 0d7bfebf..42ba7881 100644
--- a/tests/fuzz/xslt.c
+++ b/tests/fuzz/xslt.c
@@ -2,138 +2,20 @@
  * xslt.c: libFuzzer target for XSLT stylesheets
  *
  * See Copyright for the status of this software.
- *
- * This is a rather naive fuzz target using a static XML document.
- *
- * TODO
- *
- * - Improve seed corpus
- * - Mutate multiple input documents: source, xsl:import, xsl:include
- * - format-number() with xsl:decimal-format
- * - Better coverage for xsl:key and key() function
- * - EXSLT func:function
- * - xsl:document
  */
 
-#include <libgen.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-
-#include <libxml/tree.h>
-#include <libxml/parser.h>
-#include <libxslt/security.h>
-#include <libxslt/transform.h>
-#include <libxslt/xslt.h>
-#include <libxslt/xsltInternals.h>
-#include <libxslt/xsltutils.h>
-#include <libexslt/exslt.h>
-
-static xmlDocPtr doc;
-static xsltSecurityPrefsPtr sec;
-
-static void
-errorFunc(void *ctx, const char *msg, ...) {
-    /* Discard error messages. */
-}
+#include "fuzz.h"
+#include <libxml/globals.h>
 
 int
-LLVMFuzzerInitialize(int *argc_p ATTRIBUTE_UNUSED,
-                     char ***argv_p ATTRIBUTE_UNUSED) {
-    const char *xmlFilename = "xslt.xml";
-    const char *dir;
-    char *argv0;
-    char *xmlPath;
-
-    /* Init libraries */
-    xmlInitParser();
-    xmlXPathInit();
-    xsltInit();
-    exsltRegisterAll();
-
-    /* Load XML document */
-    argv0 = strdup((*argv_p)[0]);
-    dir = dirname(argv0);
-    xmlPath = malloc(strlen(dir) + 1 + strlen(xmlFilename) + 1);
-    sprintf(xmlPath, "%s/%s", dir, xmlFilename);
-    doc = xmlReadFile(xmlPath, NULL, 0);
-    free(xmlPath);
-    free(argv0);
-    if (doc == NULL) {
-        fprintf(stderr, "Error: unable to parse file \"%s\"\n", xmlPath);
-        return -1;
-    }
-
-    /* Suppress error messages */
-    xmlSetGenericErrorFunc(NULL, errorFunc);
-    xsltSetGenericErrorFunc(NULL, errorFunc);
-
-    /* Disallow I/O */
-    sec = xsltNewSecurityPrefs();
-    xsltSetSecurityPrefs(sec, XSLT_SECPREF_READ_FILE, xsltSecurityForbid);
-    xsltSetSecurityPrefs(sec, XSLT_SECPREF_WRITE_FILE, xsltSecurityForbid);
-    xsltSetSecurityPrefs(sec, XSLT_SECPREF_CREATE_DIRECTORY, xsltSecurityForbid);
-    xsltSetSecurityPrefs(sec, XSLT_SECPREF_READ_NETWORK, xsltSecurityForbid);
-    xsltSetSecurityPrefs(sec, XSLT_SECPREF_WRITE_NETWORK, xsltSecurityForbid);
-
-    return 0;
-}
-
-static void
-xsltSetXPathResourceLimits(xmlXPathContextPtr ctxt) {
-    ctxt->maxParserDepth = 15;
-    ctxt->maxDepth = 100;
-    ctxt->opLimit = 100000;
+LLVMFuzzerInitialize(int *argc_p, char ***argv_p) {
+    return xsltFuzzXsltInit(argc_p, argv_p, NULL);
 }
 
 int
 LLVMFuzzerTestOneInput(const char *data, size_t size) {
-    xmlDocPtr xsltDoc;
-    xmlDocPtr result;
-    xmlNodePtr xsltRoot;
-    xsltStylesheetPtr sheet;
-    xsltTransformContextPtr ctxt;
-
-    xsltDoc = xmlReadMemory(data, size, NULL, NULL, 0);
-    if (xsltDoc == NULL)
-        return 0;
-    xsltRoot = xmlDocGetRootElement(xsltDoc);
-    xmlNewNs(xsltRoot, EXSLT_COMMON_NAMESPACE, BAD_CAST "exsl");
-    xmlNewNs(xsltRoot, EXSLT_COMMON_NAMESPACE, BAD_CAST "exslt");
-    xmlNewNs(xsltRoot, EXSLT_CRYPTO_NAMESPACE, BAD_CAST "crypto");
-    xmlNewNs(xsltRoot, EXSLT_DATE_NAMESPACE, BAD_CAST "date");
-    xmlNewNs(xsltRoot, EXSLT_DYNAMIC_NAMESPACE, BAD_CAST "dyn");
-    xmlNewNs(xsltRoot, EXSLT_MATH_NAMESPACE, BAD_CAST "math");
-    xmlNewNs(xsltRoot, EXSLT_SETS_NAMESPACE, BAD_CAST "set");
-    xmlNewNs(xsltRoot, EXSLT_STRINGS_NAMESPACE, BAD_CAST "str");
-    xmlNewNs(xsltRoot, SAXON_NAMESPACE, BAD_CAST "saxon");
-
-    sheet = xsltNewStylesheet();
-    if (sheet == NULL) {
-        xmlFreeDoc(xsltDoc);
-        return 0;
-    }
-    xsltSetXPathResourceLimits(sheet->xpathCtxt);
-    sheet->xpathCtxt->opCount = 0;
-    if (xsltParseStylesheetUser(sheet, xsltDoc) != 0) {
-        xsltFreeStylesheet(sheet);
-        xmlFreeDoc(xsltDoc);
-        return 0;
-    }
-
-    ctxt = xsltNewTransformContext(sheet, doc);
-    xsltSetCtxtSecurityPrefs(sec, ctxt);
-    ctxt->maxTemplateDepth = 100;
-    ctxt->opLimit = 200000;
-    xsltSetXPathResourceLimits(ctxt->xpathCtxt);
-    ctxt->xpathCtxt->opCount = sheet->xpathCtxt->opCount;
-
-    result = xsltApplyStylesheetUser(sheet, doc, NULL, NULL, NULL, ctxt);
-
-    xmlFreeDoc(result);
-    xsltFreeTransformContext(ctxt);
-    xsltFreeStylesheet(sheet);
+    xmlChar *result = xsltFuzzXslt(data, size);
+    xmlFree(result);
 
     return 0;
 }
-


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]