[libxml2] Add XPath and XPointer fuzzer



commit ad26a60f958fce976eff2cfc980f999e404c730c
Author: Nick Wellnhofer <wellnhofer aevum de>
Date:   Thu Aug 6 13:20:01 2020 +0200

    Add XPath and XPointer fuzzer

 fuzz/.gitignore    |   5 +-
 fuzz/Makefile.am   |  23 ++++++-
 fuzz/fuzz.c        |  24 ++++----
 fuzz/fuzz.h        |   6 ++
 fuzz/xpath.c       |  49 +++++++++++++++
 fuzz/xpath.dict    |  94 +++++++++++++++++++++++++++++
 fuzz/xpath.options |   3 +
 fuzz/xpathSeed.c   | 171 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 8 files changed, 363 insertions(+), 12 deletions(-)
---
diff --git a/fuzz/.gitignore b/fuzz/.gitignore
index eecb4aea4..92e291350 100644
--- a/fuzz/.gitignore
+++ b/fuzz/.gitignore
@@ -5,9 +5,12 @@ regexp
 schema
 schemaSeed
 seed/html*
-seed/xml*
 seed/schema*
+seed/xml*
+seed/xpath*
 testFuzzer
 uri
 xml
 xmlSeed
+xpath
+xpathSeed
diff --git a/fuzz/Makefile.am b/fuzz/Makefile.am
index 9a1225dba..6d31c2273 100644
--- a/fuzz/Makefile.am
+++ b/fuzz/Makefile.am
@@ -1,4 +1,5 @@
-EXTRA_PROGRAMS = html htmlSeed regexp uri schema schemaSeed xml xmlSeed
+EXTRA_PROGRAMS = html htmlSeed regexp uri schema schemaSeed xml xmlSeed \
+                 xpath xpathSeed
 check_PROGRAMS = testFuzzer
 CLEANFILES = $(EXTRA_PROGRAMS)
 AM_CPPFLAGS = -I$(top_srcdir)/include
@@ -133,3 +134,23 @@ fuzz-schema: schema$(EXEEXT) seed/schema.stamp
            -timeout=20 \
            corpus/schema seed/schema
 
+# XPath fuzzer
+
+xpathSeed_SOURCES = xpathSeed.c fuzz.c
+
+seed/xpath.stamp: xpathSeed$(EXEEXT)
+       @mkdir -p seed/xpath
+       @./xpathSeed$(EXEEXT) "$(top_builddir)/test/XPath"
+       @touch seed/xpath.stamp
+
+xpath_SOURCES = xpath.c fuzz.c
+xpath_LDFLAGS = -fsanitize=fuzzer
+
+fuzz-xpath: xpath$(EXEEXT) seed/xpath.stamp
+       @mkdir -p corpus/xpath
+       ./xpath$(EXEEXT) \
+           -dict=xpath.dict \
+           -max_len=10000 \
+           -timeout=20 \
+           corpus/xpath seed/xpath
+
diff --git a/fuzz/fuzz.c b/fuzz/fuzz.c
index ba7c9cad5..0155efe50 100644
--- a/fuzz/fuzz.c
+++ b/fuzz/fuzz.c
@@ -122,20 +122,24 @@ xmlFuzzReadRemaining(size_t *size) {
 }
 
 /*
- * Write a random-length string to stdout in a format similar to
+ * xmlFuzzWriteString:
+ * @out:  output file
+ * @str:  string to write
+ *
+ * Write a random-length string to file in a format similar to
  * FuzzedDataProvider. Backslash followed by newline marks the end of the
  * string. Two backslashes are used to escape a backslash.
  */
-static void
-xmlFuzzWriteString(const char *str) {
+void
+xmlFuzzWriteString(FILE *out, const char *str) {
     for (; *str; str++) {
         int c = (unsigned char) *str;
-        putchar(c);
+        putc(c, out);
         if (c == '\\')
-            putchar(c);
+            putc(c, out);
     }
-    putchar('\\');
-    putchar('\n');
+    putc('\\', out);
+    putc('\n', out);
 }
 
 /**
@@ -150,7 +154,7 @@ xmlFuzzWriteString(const char *str) {
  *
  * Returns a zero-terminated string or NULL if the fuzz data is exhausted.
  */
-static const char *
+const char *
 xmlFuzzReadString(size_t *size) {
     const char *out = fuzzData.outPtr;
 
@@ -217,8 +221,8 @@ xmlFuzzEntityRecorder(const char *URL, const char *ID,
         }
     } while (len > 0);
 
-    xmlFuzzWriteString(URL);
-    xmlFuzzWriteString((char *) xmlBufContent(in->buf->buffer));
+    xmlFuzzWriteString(stdout, URL);
+    xmlFuzzWriteString(stdout, (char *) xmlBufContent(in->buf->buffer));
 
     xmlFreeInputStream(in);
 
diff --git a/fuzz/fuzz.h b/fuzz/fuzz.h
index 7e7fc29c2..1cb788f5c 100644
--- a/fuzz/fuzz.h
+++ b/fuzz/fuzz.h
@@ -36,6 +36,12 @@ xmlFuzzReadInt(void);
 const char *
 xmlFuzzReadRemaining(size_t *size);
 
+void
+xmlFuzzWriteString(FILE *out, const char *str);
+
+const char *
+xmlFuzzReadString(size_t *size);
+
 xmlParserInputPtr
 xmlFuzzEntityRecorder(const char *URL, const char *ID, xmlParserCtxtPtr ctxt);
 
diff --git a/fuzz/xpath.c b/fuzz/xpath.c
new file mode 100644
index 000000000..1017adaa4
--- /dev/null
+++ b/fuzz/xpath.c
@@ -0,0 +1,49 @@
+/*
+ * xpath.c: a libFuzzer target to test XPath and XPointer expressions.
+ *
+ * See Copyright for the status of this software.
+ */
+
+#include <libxml/parser.h>
+#include <libxml/xpointer.h>
+#include "fuzz.h"
+
+int
+LLVMFuzzerInitialize(int *argc ATTRIBUTE_UNUSED,
+                     char ***argv ATTRIBUTE_UNUSED) {
+    xmlInitParser();
+    xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc);
+
+    return 0;
+}
+
+int
+LLVMFuzzerTestOneInput(const char *data, size_t size) {
+    xmlDocPtr doc;
+    const char *expr, *xml;
+    size_t exprSize, xmlSize;
+
+    xmlFuzzDataInit(data, size);
+
+    expr = xmlFuzzReadString(&exprSize);
+    xml = xmlFuzzReadString(&xmlSize);
+
+    doc = xmlParseMemory(xml, xmlSize);
+    if (doc != NULL) {
+        xmlXPathContextPtr xpctxt = xmlXPathNewContext(doc);
+
+        /* Resource limits to avoid timeouts and call stack overflows */
+        xpctxt->maxParserDepth = 15;
+        xpctxt->maxDepth = 100;
+        xpctxt->opLimit = 500000;
+
+        xmlXPathFreeObject(xmlXPtrEval(BAD_CAST expr, xpctxt));
+        xmlXPathFreeContext(xpctxt);
+    }
+    xmlFreeDoc(doc);
+
+    xmlFuzzDataCleanup();
+
+    return(0);
+}
+
diff --git a/fuzz/xpath.dict b/fuzz/xpath.dict
new file mode 100644
index 000000000..4fe375fbc
--- /dev/null
+++ b/fuzz/xpath.dict
@@ -0,0 +1,94 @@
+# XML
+
+elem_a="<a></a>"
+elem_b="<b></b>"
+elem_c="<c></c>"
+elem_d="<d></d>"
+elem_empty="<a/>"
+elem_ns_a="<a:a xmlns:a='a'></a:a>"
+elem_ns_b="<b:b xmlns:b='b'></b:b>"
+
+attr_a=" a='a'"
+attr_b=" b='b'"
+
+ns_decl=" xmlns:a='a'"
+ns_default=" xmlns='a'"
+ns_prefix_a="a:"
+ns_prefix_b="b:"
+
+cdata_section="<![CDATA[ ]]>"
+
+comment="<!-- -->"
+
+pi="<?a?>"
+
+# XPath
+
+axis_ancestor="ancestor::"
+axis_ancestor_or_self="ancestor-or-self::"
+axis_attribute="attribute::"
+axis_attribute_abbrev="@"
+axis_child="child::"
+axis_descendant="descendant::"
+axis_descendant_or_self="descendant-or-self::"
+axis_following="following::"
+axis_following_sibling="following-sibling::"
+axis_namespace="namespace::"
+axis_parent="parent::"
+axis_preceding="preceding::"
+axis_preceding_siblings="preceding-sibling::"
+axis_self="self::"
+
+node_test_ns="a:"
+
+val_num="=(1.0)"
+val_str_sq="=('a')"
+val_str_dq="=(\"a\")"
+val_node_set="=(*)"
+val_elem="=(b)"
+
+step_root="/"
+step_descendant="//"
+step_any="//*"
+step_any_l="*//"
+step_elem="//b"
+step_ns_elem="//a:a"
+step_comment="//comment()"
+step_node="//node()"
+step_node_l="node()//"
+step_pi="//processing-instruction()"
+step_text="//text()"
+step_parent="../"
+
+op_plus="+1"
+op_minus=" - 1"
+op_neg="-"
+op_mul="*1"
+op_div=" div 1"
+op_mod=" mod 1"
+op_and=" and 1"
+op_or=" or 1"
+op_ne="!=1"
+op_lt="<1"
+op_gt=">1"
+op_le="<=1"
+op_ge=">=1"
+op_predicate_num="[1]"
+op_predicate_last="[last()]"
+op_predicate_str="['a']"
+op_predicate="[1=1]"
+op_arg_num=",1"
+op_arg_str=",'a'"
+op_arg_node=",*"
+op_union="|//b"
+
+var_num="=$f"
+var_bool="=$b"
+var_str="=$s"
+var_node_set="=$n"
+
+# Unicode
+
+utf8_2="\xC3\x84"
+utf8_3="\xE2\x80\x9C"
+utf8_4="\xF0\x9F\x98\x80"
diff --git a/fuzz/xpath.options b/fuzz/xpath.options
new file mode 100644
index 000000000..02d5e976f
--- /dev/null
+++ b/fuzz/xpath.options
@@ -0,0 +1,3 @@
+[libfuzzer]
+max_len = 10000
+timeout = 20
diff --git a/fuzz/xpathSeed.c b/fuzz/xpathSeed.c
new file mode 100644
index 000000000..3b6129846
--- /dev/null
+++ b/fuzz/xpathSeed.c
@@ -0,0 +1,171 @@
+/*
+ * xpathSeed.c: Generate the XPath and XPointer seed corpus for fuzzing.
+ *
+ * See Copyright for the status of this software.
+ */
+
+#include <glob.h>
+#include <libgen.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include "fuzz.h"
+
+#define PATH_SIZE 256
+#define EXPR_SIZE 4500
+
+typedef struct  {
+    const char *name;
+    const char *prefix;
+    char *data;
+    int counter;
+} xpathTestXml;
+
+static int
+processXml(const char *testDir, xpathTestXml *xml, const char *subdir,
+           int xptr);
+
+int
+main(int argc, char **argv) {
+    xpathTestXml xml;
+    char pattern[PATH_SIZE];
+    glob_t globbuf;
+    size_t i, size;
+    int ret = 0;
+
+    if (argc != 2) {
+        fprintf(stderr, "Usage: xpathSeed [TESTDIR]\n");
+        return(1);
+    }
+
+    xml.name = "expr";
+    xml.prefix = "";
+    xml.data = "<d></d>";
+    xml.counter = 1;
+    if (processXml(argv[1], &xml, "expr", 0) != 0)
+        ret = 1;
+
+    size = snprintf(pattern, sizeof(pattern), "%s/docs/*", argv[1]);
+    if (size >= PATH_SIZE)
+        return(1);
+    if (glob(pattern, 0, NULL, &globbuf) != 0)
+        return(1);
+
+    for (i = 0; i < globbuf.gl_pathc; i++) {
+        char *path = globbuf.gl_pathv[i];
+        FILE *xmlFile;
+        struct stat statbuf;
+
+        if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode)))
+            continue;
+        size = statbuf.st_size;
+        xmlFile = fopen(path, "rb");
+        if (xmlFile == NULL) {
+            ret = 1;
+            continue;
+        }
+        xml.data = xmlMalloc(size + 1);
+        if (xml.data == NULL) {
+            ret = 1;
+            goto close;
+        }
+        if (fread(xml.data, 1, size, xmlFile) != size) {
+            ret = 1;
+            goto free;
+        }
+        xml.data[size] = 0;
+        xml.name = basename(path);
+        xml.prefix = xml.name;
+        xml.counter = 1;
+
+        if (processXml(argv[1], &xml, "tests", 0) != 0)
+            ret = 1;
+        if (processXml(argv[1], &xml, "xptr", 1) != 0)
+            ret = 1;
+
+free:
+        xmlFree(xml.data);
+close:
+        fclose(xmlFile);
+    }
+
+    globfree(&globbuf);
+
+    return(ret);
+}
+
+static int
+processXml(const char *testDir, xpathTestXml *xml, const char *subdir,
+           int xptr) {
+    char pattern[PATH_SIZE];
+    glob_t globbuf;
+    size_t i, size;
+    int ret = 0;
+
+    size = snprintf(pattern, sizeof(pattern), "%s/%s/%s*",
+                    testDir, subdir, xml->prefix);
+    if (size >= PATH_SIZE)
+        return(-1);
+    if (glob(pattern, 0, NULL, &globbuf) != 0)
+        return(-1);
+
+    for (i = 0; i < globbuf.gl_pathc; i++) {
+        char *path = globbuf.gl_pathv[i];
+        struct stat statbuf;
+        FILE *in;
+        char expr[EXPR_SIZE];
+
+        if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode)))
+            continue;
+
+        printf("## Processing %s\n", path);
+        in = fopen(path, "rb");
+        if (in == NULL) {
+            ret = -1;
+            continue;
+        }
+
+        while (fgets(expr, EXPR_SIZE, in) > 0) {
+            char outPath[PATH_SIZE];
+            FILE *out;
+            int j;
+
+            for (j = 0; expr[j] != 0; j++)
+                if (expr[j] == '\r' || expr[j] == '\n')
+                    break;
+            expr[j] = 0;
+
+            size = snprintf(outPath, sizeof(outPath), "seed/xpath/%s-%d",
+                            xml->name, xml->counter);
+            if (size >= PATH_SIZE) {
+                ret = -1;
+                continue;
+            }
+            out = fopen(outPath, "wb");
+            if (out == NULL) {
+                ret = -1;
+                continue;
+            }
+
+            if (xptr) {
+                xmlFuzzWriteString(out, expr);
+            } else {
+                char xptrExpr[EXPR_SIZE+100];
+
+                snprintf(xptrExpr, sizeof(xptrExpr), "xpointer(%s)", expr);
+                xmlFuzzWriteString(out, xptrExpr);
+            }
+
+            xmlFuzzWriteString(out, xml->data);
+
+            fclose(out);
+            xml->counter++;
+        }
+
+        fclose(in);
+    }
+
+    globfree(&globbuf);
+
+    return(ret);
+}
+


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]