[libxml2] Add a couple of libFuzzer targets



commit 00ed736eecf93aeab49089abb06e0e5fc0e7e091
Author: Nick Wellnhofer <wellnhofer aevum de>
Date:   Fri Jun 5 12:49:25 2020 +0200

    Add a couple of libFuzzer targets
    
    - XML fuzzer
      Currently tests the pull parser, push parser and reader, as well as
      serialization. Supports splitting fuzz data into multiple documents
      for things like external DTDs or entities. The seed corpus is built
      from parts of the test suite.
    
    - Regexp fuzzer
      Seed corpus was statically generated from test suite.
    
    - URI fuzzer
      Tests parsing and most other functions from uri.c.

 Makefile.am                   |   3 +-
 configure.ac                  |   2 +-
 fuzz/.gitignore               |   7 ++
 fuzz/Makefile.am              |  75 ++++++++++++
 fuzz/README                   |  19 +++
 fuzz/fuzz.c                   | 274 ++++++++++++++++++++++++++++++++++++++++++
 fuzz/fuzz.h                   |  55 +++++++++
 fuzz/regexp.c                 |  40 ++++++
 fuzz/regexp.dict              |  16 +++
 fuzz/seed/regexp/branch-1     | Bin 0 -> 21 bytes
 fuzz/seed/regexp/branch-10    | Bin 0 -> 22 bytes
 fuzz/seed/regexp/branch-11    | Bin 0 -> 21 bytes
 fuzz/seed/regexp/branch-12    | Bin 0 -> 22 bytes
 fuzz/seed/regexp/branch-13    | Bin 0 -> 22 bytes
 fuzz/seed/regexp/branch-2     | Bin 0 -> 21 bytes
 fuzz/seed/regexp/branch-3     | Bin 0 -> 22 bytes
 fuzz/seed/regexp/branch-4     | Bin 0 -> 22 bytes
 fuzz/seed/regexp/branch-5     | Bin 0 -> 23 bytes
 fuzz/seed/regexp/branch-6     | Bin 0 -> 23 bytes
 fuzz/seed/regexp/branch-7     | Bin 0 -> 23 bytes
 fuzz/seed/regexp/branch-8     | Bin 0 -> 21 bytes
 fuzz/seed/regexp/branch-9     | Bin 0 -> 22 bytes
 fuzz/seed/regexp/bug316338-1  | Bin 0 -> 71 bytes
 fuzz/seed/regexp/bug316338-10 | Bin 0 -> 487 bytes
 fuzz/seed/regexp/bug316338-11 | Bin 0 -> 85 bytes
 fuzz/seed/regexp/bug316338-12 | Bin 0 -> 88 bytes
 fuzz/seed/regexp/bug316338-13 | Bin 0 -> 89 bytes
 fuzz/seed/regexp/bug316338-14 | Bin 0 -> 93 bytes
 fuzz/seed/regexp/bug316338-15 | Bin 0 -> 500 bytes
 fuzz/seed/regexp/bug316338-16 | Bin 0 -> 1355 bytes
 fuzz/seed/regexp/bug316338-2  | Bin 0 -> 74 bytes
 fuzz/seed/regexp/bug316338-3  | Bin 0 -> 75 bytes
 fuzz/seed/regexp/bug316338-4  | Bin 0 -> 79 bytes
 fuzz/seed/regexp/bug316338-5  | Bin 0 -> 487 bytes
 fuzz/seed/regexp/bug316338-6  | Bin 0 -> 71 bytes
 fuzz/seed/regexp/bug316338-7  | Bin 0 -> 74 bytes
 fuzz/seed/regexp/bug316338-8  | Bin 0 -> 75 bytes
 fuzz/seed/regexp/bug316338-9  | Bin 0 -> 79 bytes
 fuzz/seed/regexp/bug420596-1  | Bin 0 -> 25 bytes
 fuzz/seed/regexp/bug420596-2  | Bin 0 -> 25 bytes
 fuzz/seed/regexp/bug420596-3  | Bin 0 -> 25 bytes
 fuzz/seed/regexp/bug420596-4  | Bin 0 -> 25 bytes
 fuzz/seed/regexp/bug420596-5  | Bin 0 -> 23 bytes
 fuzz/seed/regexp/bug420596-6  | Bin 0 -> 23 bytes
 fuzz/seed/regexp/bug420596-7  | Bin 0 -> 23 bytes
 fuzz/seed/regexp/bug420596-8  | Bin 0 -> 23 bytes
 fuzz/seed/regexp/content-1    | Bin 0 -> 17 bytes
 fuzz/seed/regexp/content-10   | Bin 0 -> 33 bytes
 fuzz/seed/regexp/content-2    | Bin 0 -> 17 bytes
 fuzz/seed/regexp/content-3    | Bin 0 -> 18 bytes
 fuzz/seed/regexp/content-4    | Bin 0 -> 17 bytes
 fuzz/seed/regexp/content-5    | Bin 0 -> 32 bytes
 fuzz/seed/regexp/content-6    | Bin 0 -> 33 bytes
 fuzz/seed/regexp/content-7    | Bin 0 -> 34 bytes
 fuzz/seed/regexp/content-8    | Bin 0 -> 35 bytes
 fuzz/seed/regexp/content-9    | Bin 0 -> 40 bytes
 fuzz/seed/regexp/hard-1       | Bin 0 -> 37 bytes
 fuzz/seed/regexp/hard-10      | Bin 0 -> 28 bytes
 fuzz/seed/regexp/hard-2       | Bin 0 -> 38 bytes
 fuzz/seed/regexp/hard-3       | Bin 0 -> 41 bytes
 fuzz/seed/regexp/hard-4       | Bin 0 -> 37 bytes
 fuzz/seed/regexp/hard-5       | Bin 0 -> 40 bytes
 fuzz/seed/regexp/hard-6       | Bin 0 -> 39 bytes
 fuzz/seed/regexp/hard-7       | Bin 0 -> 29 bytes
 fuzz/seed/regexp/hard-8       | Bin 0 -> 33 bytes
 fuzz/seed/regexp/hard-9       | Bin 0 -> 27 bytes
 fuzz/seed/regexp/ncname-1     | Bin 0 -> 19 bytes
 fuzz/seed/regexp/ncname-2     | Bin 0 -> 21 bytes
 fuzz/seed/regexp/ncname-3     | Bin 0 -> 23 bytes
 fuzz/seed/regexp/ncname-4     | Bin 0 -> 21 bytes
 fuzz/seed/regexp/ncname-5     | Bin 0 -> 23 bytes
 fuzz/seed/regexp/ranges-1     | Bin 0 -> 8 bytes
 fuzz/seed/regexp/ranges-10    | Bin 0 -> 18 bytes
 fuzz/seed/regexp/ranges-11    | Bin 0 -> 16 bytes
 fuzz/seed/regexp/ranges-12    | Bin 0 -> 19 bytes
 fuzz/seed/regexp/ranges-2     | Bin 0 -> 9 bytes
 fuzz/seed/regexp/ranges-3     | Bin 0 -> 10 bytes
 fuzz/seed/regexp/ranges-4     | Bin 0 -> 11 bytes
 fuzz/seed/regexp/ranges-5     | Bin 0 -> 12 bytes
 fuzz/seed/regexp/ranges-6     | Bin 0 -> 13 bytes
 fuzz/seed/regexp/ranges-7     | Bin 0 -> 14 bytes
 fuzz/seed/regexp/ranges-8     | Bin 0 -> 15 bytes
 fuzz/seed/regexp/ranges-9     | Bin 0 -> 17 bytes
 fuzz/seed/regexp/ranges2-1    | Bin 0 -> 17 bytes
 fuzz/seed/regexp/ranges2-10   | Bin 0 -> 42 bytes
 fuzz/seed/regexp/ranges2-11   | Bin 0 -> 43 bytes
 fuzz/seed/regexp/ranges2-12   | Bin 0 -> 43 bytes
 fuzz/seed/regexp/ranges2-2    | Bin 0 -> 18 bytes
 fuzz/seed/regexp/ranges2-3    | Bin 0 -> 17 bytes
 fuzz/seed/regexp/ranges2-4    | Bin 0 -> 18 bytes
 fuzz/seed/regexp/ranges2-5    | Bin 0 -> 19 bytes
 fuzz/seed/regexp/ranges2-6    | Bin 0 -> 20 bytes
 fuzz/seed/regexp/ranges2-7    | Bin 0 -> 18 bytes
 fuzz/seed/regexp/ranges2-8    | Bin 0 -> 18 bytes
 fuzz/seed/regexp/ranges2-9    | Bin 0 -> 41 bytes
 fuzz/seed/regexp/xpath-1      | Bin 0 -> 181 bytes
 fuzz/seed/regexp/xpath-10     | Bin 0 -> 212 bytes
 fuzz/seed/regexp/xpath-11     | Bin 0 -> 181 bytes
 fuzz/seed/regexp/xpath-12     | Bin 0 -> 183 bytes
 fuzz/seed/regexp/xpath-13     | Bin 0 -> 183 bytes
 fuzz/seed/regexp/xpath-14     | Bin 0 -> 182 bytes
 fuzz/seed/regexp/xpath-15     | Bin 0 -> 191 bytes
 fuzz/seed/regexp/xpath-16     | Bin 0 -> 181 bytes
 fuzz/seed/regexp/xpath-17     | Bin 0 -> 241 bytes
 fuzz/seed/regexp/xpath-18     | Bin 0 -> 204 bytes
 fuzz/seed/regexp/xpath-19     | Bin 0 -> 262 bytes
 fuzz/seed/regexp/xpath-2      | Bin 0 -> 241 bytes
 fuzz/seed/regexp/xpath-20     | Bin 0 -> 181 bytes
 fuzz/seed/regexp/xpath-21     | Bin 0 -> 277 bytes
 fuzz/seed/regexp/xpath-22     | Bin 0 -> 337 bytes
 fuzz/seed/regexp/xpath-23     | Bin 0 -> 277 bytes
 fuzz/seed/regexp/xpath-24     | Bin 0 -> 279 bytes
 fuzz/seed/regexp/xpath-25     | Bin 0 -> 282 bytes
 fuzz/seed/regexp/xpath-26     | Bin 0 -> 281 bytes
 fuzz/seed/regexp/xpath-27     | Bin 0 -> 281 bytes
 fuzz/seed/regexp/xpath-28     | Bin 0 -> 287 bytes
 fuzz/seed/regexp/xpath-29     | Bin 0 -> 295 bytes
 fuzz/seed/regexp/xpath-3      | Bin 0 -> 181 bytes
 fuzz/seed/regexp/xpath-30     | Bin 0 -> 308 bytes
 fuzz/seed/regexp/xpath-31     | Bin 0 -> 277 bytes
 fuzz/seed/regexp/xpath-32     | Bin 0 -> 279 bytes
 fuzz/seed/regexp/xpath-33     | Bin 0 -> 279 bytes
 fuzz/seed/regexp/xpath-34     | Bin 0 -> 278 bytes
 fuzz/seed/regexp/xpath-35     | Bin 0 -> 287 bytes
 fuzz/seed/regexp/xpath-4      | Bin 0 -> 183 bytes
 fuzz/seed/regexp/xpath-5      | Bin 0 -> 186 bytes
 fuzz/seed/regexp/xpath-6      | Bin 0 -> 185 bytes
 fuzz/seed/regexp/xpath-7      | Bin 0 -> 185 bytes
 fuzz/seed/regexp/xpath-8      | Bin 0 -> 191 bytes
 fuzz/seed/regexp/xpath-9      | Bin 0 -> 199 bytes
 fuzz/seed/uri/dot             |   1 +
 fuzz/seed/uri/full            | Bin 0 -> 36 bytes
 fuzz/testFuzzer.c             |  55 +++++++++
 fuzz/uri.c                    |  45 +++++++
 fuzz/xml.c                    |  90 ++++++++++++++
 fuzz/xml.dict                 |  76 ++++++++++++
 fuzz/xmlSeed.c                |  94 +++++++++++++++
 137 files changed, 850 insertions(+), 2 deletions(-)
---
diff --git a/Makefile.am b/Makefile.am
index be1a883d..2a9d4709 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -2,7 +2,7 @@
 
 ACLOCAL_AMFLAGS = -I m4
 
-SUBDIRS = include . doc example xstc $(PYTHON_SUBDIR)
+SUBDIRS = include . doc example fuzz xstc $(PYTHON_SUBDIR)
 
 DIST_SUBDIRS = include . doc example python xstc
 
@@ -210,6 +210,7 @@ runtests: runtest$(EXEEXT) testrecurse$(EXEEXT) testapi$(EXEEXT) \
            $(CHECKER) ./runxmlconf$(EXEEXT)
        @(if [ "$(PYTHON_SUBDIR)" != "" ] ; then cd python ; \
            $(MAKE) tests ; fi)
+       @cd fuzz; $(MAKE) tests
 
 check: all runtests
 
diff --git a/configure.ac b/configure.ac
index 5f95fee0..3a3d91d3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1714,7 +1714,7 @@ rm -f COPYING.LIB COPYING
 ln -s $srcdir/Copyright COPYING
 
 # keep on one line for cygwin c.f. #130896
-AC_CONFIG_FILES([libxml2.spec:libxml.spec.in Makefile include/Makefile include/libxml/Makefile doc/Makefile 
doc/examples/Makefile doc/devhelp/Makefile example/Makefile python/Makefile python/tests/Makefile 
xstc/Makefile include/libxml/xmlversion.h libxml-2.0.pc libxml-2.0-uninstalled.pc libxml2-config.cmake])
+AC_CONFIG_FILES([libxml2.spec:libxml.spec.in Makefile include/Makefile include/libxml/Makefile doc/Makefile 
doc/examples/Makefile doc/devhelp/Makefile example/Makefile fuzz/Makefile python/Makefile 
python/tests/Makefile xstc/Makefile include/libxml/xmlversion.h libxml-2.0.pc libxml-2.0-uninstalled.pc 
libxml2-config.cmake])
 AC_CONFIG_FILES([python/setup.py], [chmod +x python/setup.py])
 AC_CONFIG_FILES([xml2-config], [chmod +x xml2-config])
 AC_OUTPUT
diff --git a/fuzz/.gitignore b/fuzz/.gitignore
new file mode 100644
index 00000000..28b71084
--- /dev/null
+++ b/fuzz/.gitignore
@@ -0,0 +1,7 @@
+corpus/
+regexp
+seed/xml*
+testFuzzer
+uri
+xml
+xmlSeed
diff --git a/fuzz/Makefile.am b/fuzz/Makefile.am
new file mode 100644
index 00000000..0e7391ba
--- /dev/null
+++ b/fuzz/Makefile.am
@@ -0,0 +1,75 @@
+EXTRA_PROGRAMS = regexp uri xml xmlSeed
+check_PROGRAMS = testFuzzer
+CLEANFILES = $(EXTRA_PROGRAMS)
+AM_CPPFLAGS = -I$(top_srcdir)/include
+DEPENDENCIES = $(top_builddir)/libxml2.la
+LDADD = $(STATIC_BINARIES) $(top_builddir)/libxml2.la $(THREAD_LIBS) $(Z_LIBS) $(LZMA_LIBS) $(ICONV_LIBS) 
$(M_LIBS) $(WIN32_EXTRA_LIBADD)
+
+PARSER_FUZZER_MAX_LEN = 100000
+XML_SEED_CORPUS_SRC = \
+    $(top_srcdir)/test/* \
+    $(top_srcdir)/test/errors/*.xml \
+    $(top_srcdir)/test/errors10/*.xml \
+    $(top_srcdir)/test/namespaces/* \
+    $(top_srcdir)/test/valid/*.xml \
+    $(top_srcdir)/test/xmlid/* \
+    $(top_srcdir)/test/VC/* \
+    $(top_srcdir)/test/VCM/*
+
+xmlSeed_SOURCES = xmlSeed.c fuzz.c
+
+seed/xml.stamp: xmlSeed$(EXEEXT)
+       @mkdir -p seed/xml
+       @for i in $(XML_SEED_CORPUS_SRC); do \
+           if [ -f $$i ]; then \
+               echo Processing seed $$i; \
+                base=$$(basename $$i) \
+               outfile=$(abs_builddir)/seed/xml/$$base; \
+                pushd $$(dirname $$i) >/dev/null; \
+               $(abs_builddir)/xmlSeed$(EXEEXT) $$base > $$outfile; \
+                popd >/dev/null; \
+               if [ "$$(wc -c < $$outfile)" -gt $(PARSER_FUZZER_MAX_LEN) ]; then \
+                   rm $$outfile; \
+               fi; \
+           fi; \
+       done
+       @touch seed/xml.stamp
+
+testFuzzer_SOURCES = testFuzzer.c fuzz.c
+
+tests: testFuzzer$(EXEEXT)
+       @echo "## Running fuzzer tests"
+       @./testFuzzer$(EXEEXT)
+
+xml_SOURCES = xml.c fuzz.c
+xml_LDFLAGS = -fsanitize=fuzzer
+
+fuzz-xml: xml$(EXEEXT) seed/xml.stamp
+       @mkdir -p corpus/xml
+       ./xml$(EXEEXT) \
+           -dict=xml.dict \
+           -max_len=$(PARSER_FUZZER_MAX_LEN) \
+           -timeout=20 \
+           corpus/xml seed/xml
+
+regexp_SOURCES = regexp.c fuzz.c
+regexp_LDFLAGS = -fsanitize=fuzzer
+
+fuzz-regexp: regexp$(EXEEXT)
+       @mkdir -p corpus/regexp
+       ./regexp$(EXEEXT) \
+           -dict=regexp.dict \
+           -max_len=10000 \
+           -timeout=20 \
+           corpus/regexp $(srcdir)/seed/regexp
+
+uri_SOURCES = uri.c fuzz.c
+uri_LDFLAGS = -fsanitize=fuzzer
+
+fuzz-uri: uri$(EXEEXT)
+       @mkdir -p corpus/uri
+       ./uri$(EXEEXT) \
+           -max_len=10000 \
+           -timeout=2 \
+           corpus/uri $(srcdir)/seed/uri
+
diff --git a/fuzz/README b/fuzz/README
new file mode 100644
index 00000000..f675ad82
--- /dev/null
+++ b/fuzz/README
@@ -0,0 +1,19 @@
+libFuzzer instructions for libxml2
+==================================
+
+Set compiler and options:
+
+    export CC=clang
+    export CFLAGS="-g -fsanitize=fuzzer-no-link,address,undefined \
+        -fno-sanitize-recover=all \
+        -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION"
+
+Build libxml2 with instrumentation:
+
+    ./configure --without-python
+    make
+
+Run fuzzers:
+
+    make -C fuzz fuzz-xml
+
diff --git a/fuzz/fuzz.c b/fuzz/fuzz.c
new file mode 100644
index 00000000..6955f280
--- /dev/null
+++ b/fuzz/fuzz.c
@@ -0,0 +1,274 @@
+/*
+ * fuzz.c: Common functions for fuzzing.
+ *
+ * See Copyright for the status of this software.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <libxml/hash.h>
+#include <libxml/parser.h>
+#include <libxml/parserInternals.h>
+#include <libxml/tree.h>
+#include <libxml/xmlIO.h>
+#include "fuzz.h"
+
+typedef struct {
+    const char *data;
+    size_t size;
+} xmlFuzzEntityInfo;
+
+/* Single static instance for now */
+static struct {
+    /* Original data */
+    const char *data;
+    size_t size;
+
+    /* Remaining data */
+    const char *ptr;
+    size_t remaining;
+
+    /* Buffer for unescaped strings */
+    char *outBuf;
+    char *outPtr; /* Free space at end of buffer */
+
+    xmlHashTablePtr entities; /* Maps URLs to xmlFuzzEntityInfos */
+
+    /* The first entity is the main entity. */
+    const char *mainUrl;
+    xmlFuzzEntityInfo *mainEntity;
+} fuzzData;
+
+/**
+ * xmlFuzzErrorFunc:
+ *
+ * An error function that simply discards all errors.
+ */
+void
+xmlFuzzErrorFunc(void *ctx ATTRIBUTE_UNUSED, const char *msg ATTRIBUTE_UNUSED,
+                 ...) {
+}
+
+/**
+ * xmlFuzzDataInit:
+ *
+ * Initialize fuzz data provider.
+ */
+void
+xmlFuzzDataInit(const char *data, size_t size) {
+    fuzzData.data = data;
+    fuzzData.size = size;
+    fuzzData.ptr = data;
+    fuzzData.remaining = size;
+
+    fuzzData.outBuf = xmlMalloc(size + 1);
+    fuzzData.outPtr = fuzzData.outBuf;
+
+    fuzzData.entities = xmlHashCreate(8);
+    fuzzData.mainUrl = NULL;
+    fuzzData.mainEntity = NULL;
+}
+
+static void
+xmlFreeEntityEntry(void *value, const xmlChar *name) {
+    xmlFree(value);
+}
+
+/**
+ * xmlFuzzDataFree:
+ *
+ * Cleanup fuzz data provider.
+ */
+void
+xmlFuzzDataCleanup(void) {
+    xmlFree(fuzzData.outBuf);
+    xmlHashFree(fuzzData.entities, xmlFreeEntityEntry);
+}
+
+/**
+ * xmlFuzzReadInt:
+ * @size:  size of string in bytes
+ *
+ * Read an integer from the fuzz data.
+ */
+int
+xmlFuzzReadInt() {
+    int ret;
+
+    if (fuzzData.remaining < sizeof(int))
+        return(0);
+    memcpy(&ret, fuzzData.ptr, sizeof(int));
+    fuzzData.ptr += sizeof(int);
+    fuzzData.remaining -= sizeof(int);
+
+    return ret;
+}
+
+/**
+ * xmlFuzzReadString:
+ * @size:  size of string in bytes
+ *
+ * Read a random-length string from the fuzz data.
+ *
+ * The format is similar to libFuzzer's FuzzedDataProvider but treats
+ * backslash followed by newline as end of string. This makes the fuzz data
+ * more readable. A backslash character is escaped with another backslash.
+ *
+ * Returns a zero-terminated string or NULL if the fuzz data is exhausted.
+ */
+static const char *
+xmlFuzzReadString(size_t *size) {
+    const char *out = fuzzData.outPtr;
+
+    while (fuzzData.remaining > 0) {
+        int c = *fuzzData.ptr++;
+        fuzzData.remaining--;
+
+        if ((c == '\\') && (fuzzData.remaining > 0)) {
+            int c2 = *fuzzData.ptr;
+
+            if (c2 == '\n') {
+                fuzzData.ptr++;
+                fuzzData.remaining--;
+                *size = fuzzData.outPtr - out;
+                *fuzzData.outPtr++ = '\0';
+                return(out);
+            }
+            if (c2 == '\\') {
+                fuzzData.ptr++;
+                fuzzData.remaining--;
+            }
+        }
+
+        *fuzzData.outPtr++ = c;
+    }
+
+    if (fuzzData.outPtr > out) {
+        *size = fuzzData.outPtr - out;
+        *fuzzData.outPtr++ = '\0';
+        return(out);
+    }
+
+    return(NULL);
+}
+
+/**
+ * xmlFuzzReadEntities:
+ *
+ * Read entities like the main XML file, external DTDs, external parsed
+ * entities from fuzz data.
+ */
+void
+xmlFuzzReadEntities(void) {
+    size_t num = 0;
+
+    while (1) {
+        const char *url, *entity;
+        size_t urlSize, entitySize;
+        xmlFuzzEntityInfo *entityInfo;
+        
+        url = xmlFuzzReadString(&urlSize);
+        if (url == NULL) break;
+
+        entity = xmlFuzzReadString(&entitySize);
+        if (entity == NULL) break;
+
+        if (xmlHashLookup(fuzzData.entities, (xmlChar *)url) == NULL) {
+            entityInfo = xmlMalloc(sizeof(xmlFuzzEntityInfo));
+            entityInfo->data = entity;
+            entityInfo->size = entitySize;
+
+            xmlHashAddEntry(fuzzData.entities, (xmlChar *)url, entityInfo);
+
+            if (num == 0) {
+                fuzzData.mainUrl = url;
+                fuzzData.mainEntity = entityInfo;
+            }
+
+            num++;
+        }
+    }
+}
+
+/**
+ * xmlFuzzMainEntity:
+ * @size:  size of the main entity in bytes
+ *
+ * Returns the main entity.
+ */
+const char *
+xmlFuzzMainEntity(size_t *size) {
+    if (fuzzData.mainEntity == NULL)
+        return(NULL);
+    *size = fuzzData.mainEntity->size;
+    return(fuzzData.mainEntity->data);
+}
+
+/**
+ * xmlFuzzEntityLoader:
+ *
+ * The entity loader for fuzz data.
+ */
+xmlParserInputPtr
+xmlFuzzEntityLoader(const char *URL, const char *ID ATTRIBUTE_UNUSED,
+                    xmlParserCtxtPtr ctxt) {
+    xmlParserInputPtr input;
+    xmlFuzzEntityInfo *entity;
+
+    if (URL == NULL)
+        return(NULL);
+    entity = xmlHashLookup(fuzzData.entities, (xmlChar *) URL);
+    if (entity == NULL)
+        return(NULL);
+
+    input = xmlNewInputStream(ctxt);
+    input->filename = NULL;
+    input->buf = xmlParserInputBufferCreateMem(entity->data, entity->size,
+                                               XML_CHAR_ENCODING_NONE);
+    input->base = input->cur = xmlBufContent(input->buf->buffer);
+    input->end = input->base + entity->size;
+
+    return input;
+}
+
+/**
+ * xmlFuzzExtractStrings:
+ *
+ * Extract C strings from input data. Use exact-size allocations to detect
+ * potential memory errors.
+ */
+size_t
+xmlFuzzExtractStrings(const char *data, size_t size, char **strings,
+                      size_t numStrings) {
+    const char *start = data;
+    const char *end = data + size;
+    size_t i = 0, ret;
+
+    while (i < numStrings) {
+        size_t strSize = end - start;
+        const char *zero = memchr(start, 0, strSize);
+
+        if (zero != NULL)
+            strSize = zero - start;
+
+        strings[i] = xmlMalloc(strSize + 1);
+        memcpy(strings[i], start, strSize);
+        strings[i][strSize] = '\0';
+
+        i++;
+        if (zero != NULL)
+            start = zero + 1;
+        else
+            break;
+    }
+
+    ret = i;
+
+    while (i < numStrings) {
+        strings[i] = NULL;
+        i++;
+    }
+
+    return(ret);
+}
+
diff --git a/fuzz/fuzz.h b/fuzz/fuzz.h
new file mode 100644
index 00000000..1093be14
--- /dev/null
+++ b/fuzz/fuzz.h
@@ -0,0 +1,55 @@
+/*
+ * fuzz.h: Common functions and macros for fuzzing.
+ *
+ * See Copyright for the status of this software.
+ */
+
+#ifndef __XML_FUZZERCOMMON_H__
+#define __XML_FUZZERCOMMON_H__
+
+#include <stddef.h>
+#include <libxml/parser.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int
+LLVMFuzzerInitialize(int *argc, char ***argv);
+
+int
+LLVMFuzzerTestOneInput(const char *data, size_t size);
+
+void
+xmlFuzzErrorFunc(void *ctx ATTRIBUTE_UNUSED, const char *msg ATTRIBUTE_UNUSED,
+                 ...);
+
+void
+xmlFuzzDataInit(const char *data, size_t size);
+
+void
+xmlFuzzDataCleanup(void);
+
+int
+xmlFuzzReadInt(void);
+
+void
+xmlFuzzReadEntities(void);
+
+const char *
+xmlFuzzMainEntity(size_t *size);
+
+xmlParserInputPtr
+xmlFuzzEntityLoader(const char *URL, const char *ID ATTRIBUTE_UNUSED,
+                    xmlParserCtxtPtr ctxt);
+
+size_t
+xmlFuzzExtractStrings(const char *data, size_t size, char **strings,
+                      size_t numStrings);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __XML_FUZZERCOMMON_H__ */
+
diff --git a/fuzz/regexp.c b/fuzz/regexp.c
new file mode 100644
index 00000000..ed13f637
--- /dev/null
+++ b/fuzz/regexp.c
@@ -0,0 +1,40 @@
+/*
+ * regexp.c: a libFuzzer target to test the regexp module.
+ *
+ * See Copyright for the status of this software.
+ */
+
+#include <libxml/xmlregexp.h>
+#include "fuzz.h"
+
+int
+LLVMFuzzerInitialize(int *argc ATTRIBUTE_UNUSED,
+                     char ***argv ATTRIBUTE_UNUSED) {
+    xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc);
+
+    return 0;
+}
+
+int
+LLVMFuzzerTestOneInput(const char *data, size_t size) {
+    xmlRegexpPtr regexp;
+    char *str[2] = { NULL, NULL };
+    size_t numStrings;
+
+    numStrings = xmlFuzzExtractStrings(data, size, str, 2);
+
+    regexp = xmlRegexpCompile(BAD_CAST str[0]);
+    /* xmlRegexpExec has pathological performance in too many cases. */
+#if 0
+    if ((regexp != NULL) && (numStrings >= 2)) {
+        xmlRegexpExec(regexp, BAD_CAST str[1]);
+    }
+#endif
+    xmlRegFreeRegexp(regexp);
+
+    xmlFree(str[0]);
+    xmlFree(str[1]);
+
+    return 0;
+}
+
diff --git a/fuzz/regexp.dict b/fuzz/regexp.dict
new file mode 100644
index 00000000..06b74a6c
--- /dev/null
+++ b/fuzz/regexp.dict
@@ -0,0 +1,16 @@
+quant_any="*"
+quant_opt="?"
+quant_some="+"
+quant_num="{1,2}"
+
+branch="|a"
+pos_group="[a]"
+neg_group="[^a]"
+
+cat_letter="\\p{L}"
+cat_mark="\\p{M}"
+cat_number="\\p{N}"
+cat_punct="\\p{P}"
+cat_sym="\\p{S}"
+cat_sep="\\p{Z}"
+cat_other="\\p{C}"
diff --git a/fuzz/seed/regexp/branch-1 b/fuzz/seed/regexp/branch-1
new file mode 100644
index 00000000..ded775ea
Binary files /dev/null and b/fuzz/seed/regexp/branch-1 differ
diff --git a/fuzz/seed/regexp/branch-10 b/fuzz/seed/regexp/branch-10
new file mode 100644
index 00000000..6700d775
Binary files /dev/null and b/fuzz/seed/regexp/branch-10 differ
diff --git a/fuzz/seed/regexp/branch-11 b/fuzz/seed/regexp/branch-11
new file mode 100644
index 00000000..d83f9181
Binary files /dev/null and b/fuzz/seed/regexp/branch-11 differ
diff --git a/fuzz/seed/regexp/branch-12 b/fuzz/seed/regexp/branch-12
new file mode 100644
index 00000000..b44dba57
Binary files /dev/null and b/fuzz/seed/regexp/branch-12 differ
diff --git a/fuzz/seed/regexp/branch-13 b/fuzz/seed/regexp/branch-13
new file mode 100644
index 00000000..64e50a00
Binary files /dev/null and b/fuzz/seed/regexp/branch-13 differ
diff --git a/fuzz/seed/regexp/branch-2 b/fuzz/seed/regexp/branch-2
new file mode 100644
index 00000000..8293d81d
Binary files /dev/null and b/fuzz/seed/regexp/branch-2 differ
diff --git a/fuzz/seed/regexp/branch-3 b/fuzz/seed/regexp/branch-3
new file mode 100644
index 00000000..696af9be
Binary files /dev/null and b/fuzz/seed/regexp/branch-3 differ
diff --git a/fuzz/seed/regexp/branch-4 b/fuzz/seed/regexp/branch-4
new file mode 100644
index 00000000..83179988
Binary files /dev/null and b/fuzz/seed/regexp/branch-4 differ
diff --git a/fuzz/seed/regexp/branch-5 b/fuzz/seed/regexp/branch-5
new file mode 100644
index 00000000..6b6db8b9
Binary files /dev/null and b/fuzz/seed/regexp/branch-5 differ
diff --git a/fuzz/seed/regexp/branch-6 b/fuzz/seed/regexp/branch-6
new file mode 100644
index 00000000..4f477902
Binary files /dev/null and b/fuzz/seed/regexp/branch-6 differ
diff --git a/fuzz/seed/regexp/branch-7 b/fuzz/seed/regexp/branch-7
new file mode 100644
index 00000000..6334f725
Binary files /dev/null and b/fuzz/seed/regexp/branch-7 differ
diff --git a/fuzz/seed/regexp/branch-8 b/fuzz/seed/regexp/branch-8
new file mode 100644
index 00000000..f77a8f4d
Binary files /dev/null and b/fuzz/seed/regexp/branch-8 differ
diff --git a/fuzz/seed/regexp/branch-9 b/fuzz/seed/regexp/branch-9
new file mode 100644
index 00000000..acd0eeca
Binary files /dev/null and b/fuzz/seed/regexp/branch-9 differ
diff --git a/fuzz/seed/regexp/bug316338-1 b/fuzz/seed/regexp/bug316338-1
new file mode 100644
index 00000000..9f0a504a
Binary files /dev/null and b/fuzz/seed/regexp/bug316338-1 differ
diff --git a/fuzz/seed/regexp/bug316338-10 b/fuzz/seed/regexp/bug316338-10
new file mode 100644
index 00000000..60685bbe
Binary files /dev/null and b/fuzz/seed/regexp/bug316338-10 differ
diff --git a/fuzz/seed/regexp/bug316338-11 b/fuzz/seed/regexp/bug316338-11
new file mode 100644
index 00000000..72a7956c
Binary files /dev/null and b/fuzz/seed/regexp/bug316338-11 differ
diff --git a/fuzz/seed/regexp/bug316338-12 b/fuzz/seed/regexp/bug316338-12
new file mode 100644
index 00000000..85416ee2
Binary files /dev/null and b/fuzz/seed/regexp/bug316338-12 differ
diff --git a/fuzz/seed/regexp/bug316338-13 b/fuzz/seed/regexp/bug316338-13
new file mode 100644
index 00000000..c91d4fea
Binary files /dev/null and b/fuzz/seed/regexp/bug316338-13 differ
diff --git a/fuzz/seed/regexp/bug316338-14 b/fuzz/seed/regexp/bug316338-14
new file mode 100644
index 00000000..a164b423
Binary files /dev/null and b/fuzz/seed/regexp/bug316338-14 differ
diff --git a/fuzz/seed/regexp/bug316338-15 b/fuzz/seed/regexp/bug316338-15
new file mode 100644
index 00000000..750c76de
Binary files /dev/null and b/fuzz/seed/regexp/bug316338-15 differ
diff --git a/fuzz/seed/regexp/bug316338-16 b/fuzz/seed/regexp/bug316338-16
new file mode 100644
index 00000000..23c5d230
Binary files /dev/null and b/fuzz/seed/regexp/bug316338-16 differ
diff --git a/fuzz/seed/regexp/bug316338-2 b/fuzz/seed/regexp/bug316338-2
new file mode 100644
index 00000000..5468d06d
Binary files /dev/null and b/fuzz/seed/regexp/bug316338-2 differ
diff --git a/fuzz/seed/regexp/bug316338-3 b/fuzz/seed/regexp/bug316338-3
new file mode 100644
index 00000000..76e1e0bc
Binary files /dev/null and b/fuzz/seed/regexp/bug316338-3 differ
diff --git a/fuzz/seed/regexp/bug316338-4 b/fuzz/seed/regexp/bug316338-4
new file mode 100644
index 00000000..e0f65a42
Binary files /dev/null and b/fuzz/seed/regexp/bug316338-4 differ
diff --git a/fuzz/seed/regexp/bug316338-5 b/fuzz/seed/regexp/bug316338-5
new file mode 100644
index 00000000..fcfaa97a
Binary files /dev/null and b/fuzz/seed/regexp/bug316338-5 differ
diff --git a/fuzz/seed/regexp/bug316338-6 b/fuzz/seed/regexp/bug316338-6
new file mode 100644
index 00000000..ce00a15b
Binary files /dev/null and b/fuzz/seed/regexp/bug316338-6 differ
diff --git a/fuzz/seed/regexp/bug316338-7 b/fuzz/seed/regexp/bug316338-7
new file mode 100644
index 00000000..127fe1f6
Binary files /dev/null and b/fuzz/seed/regexp/bug316338-7 differ
diff --git a/fuzz/seed/regexp/bug316338-8 b/fuzz/seed/regexp/bug316338-8
new file mode 100644
index 00000000..fe8bb8b2
Binary files /dev/null and b/fuzz/seed/regexp/bug316338-8 differ
diff --git a/fuzz/seed/regexp/bug316338-9 b/fuzz/seed/regexp/bug316338-9
new file mode 100644
index 00000000..3d56e5d7
Binary files /dev/null and b/fuzz/seed/regexp/bug316338-9 differ
diff --git a/fuzz/seed/regexp/bug420596-1 b/fuzz/seed/regexp/bug420596-1
new file mode 100644
index 00000000..4426933e
Binary files /dev/null and b/fuzz/seed/regexp/bug420596-1 differ
diff --git a/fuzz/seed/regexp/bug420596-2 b/fuzz/seed/regexp/bug420596-2
new file mode 100644
index 00000000..474d2b6e
Binary files /dev/null and b/fuzz/seed/regexp/bug420596-2 differ
diff --git a/fuzz/seed/regexp/bug420596-3 b/fuzz/seed/regexp/bug420596-3
new file mode 100644
index 00000000..09c75cb6
Binary files /dev/null and b/fuzz/seed/regexp/bug420596-3 differ
diff --git a/fuzz/seed/regexp/bug420596-4 b/fuzz/seed/regexp/bug420596-4
new file mode 100644
index 00000000..65d561ea
Binary files /dev/null and b/fuzz/seed/regexp/bug420596-4 differ
diff --git a/fuzz/seed/regexp/bug420596-5 b/fuzz/seed/regexp/bug420596-5
new file mode 100644
index 00000000..b6785803
Binary files /dev/null and b/fuzz/seed/regexp/bug420596-5 differ
diff --git a/fuzz/seed/regexp/bug420596-6 b/fuzz/seed/regexp/bug420596-6
new file mode 100644
index 00000000..3a05d82f
Binary files /dev/null and b/fuzz/seed/regexp/bug420596-6 differ
diff --git a/fuzz/seed/regexp/bug420596-7 b/fuzz/seed/regexp/bug420596-7
new file mode 100644
index 00000000..88e16605
Binary files /dev/null and b/fuzz/seed/regexp/bug420596-7 differ
diff --git a/fuzz/seed/regexp/bug420596-8 b/fuzz/seed/regexp/bug420596-8
new file mode 100644
index 00000000..4575a925
Binary files /dev/null and b/fuzz/seed/regexp/bug420596-8 differ
diff --git a/fuzz/seed/regexp/content-1 b/fuzz/seed/regexp/content-1
new file mode 100644
index 00000000..5acbf864
Binary files /dev/null and b/fuzz/seed/regexp/content-1 differ
diff --git a/fuzz/seed/regexp/content-10 b/fuzz/seed/regexp/content-10
new file mode 100644
index 00000000..f131454c
Binary files /dev/null and b/fuzz/seed/regexp/content-10 differ
diff --git a/fuzz/seed/regexp/content-2 b/fuzz/seed/regexp/content-2
new file mode 100644
index 00000000..4e6b663e
Binary files /dev/null and b/fuzz/seed/regexp/content-2 differ
diff --git a/fuzz/seed/regexp/content-3 b/fuzz/seed/regexp/content-3
new file mode 100644
index 00000000..b13fc8db
Binary files /dev/null and b/fuzz/seed/regexp/content-3 differ
diff --git a/fuzz/seed/regexp/content-4 b/fuzz/seed/regexp/content-4
new file mode 100644
index 00000000..47c5d6de
Binary files /dev/null and b/fuzz/seed/regexp/content-4 differ
diff --git a/fuzz/seed/regexp/content-5 b/fuzz/seed/regexp/content-5
new file mode 100644
index 00000000..f93860eb
Binary files /dev/null and b/fuzz/seed/regexp/content-5 differ
diff --git a/fuzz/seed/regexp/content-6 b/fuzz/seed/regexp/content-6
new file mode 100644
index 00000000..e5c6e14b
Binary files /dev/null and b/fuzz/seed/regexp/content-6 differ
diff --git a/fuzz/seed/regexp/content-7 b/fuzz/seed/regexp/content-7
new file mode 100644
index 00000000..4868dd2f
Binary files /dev/null and b/fuzz/seed/regexp/content-7 differ
diff --git a/fuzz/seed/regexp/content-8 b/fuzz/seed/regexp/content-8
new file mode 100644
index 00000000..a3a87d0b
Binary files /dev/null and b/fuzz/seed/regexp/content-8 differ
diff --git a/fuzz/seed/regexp/content-9 b/fuzz/seed/regexp/content-9
new file mode 100644
index 00000000..91f0d9e9
Binary files /dev/null and b/fuzz/seed/regexp/content-9 differ
diff --git a/fuzz/seed/regexp/hard-1 b/fuzz/seed/regexp/hard-1
new file mode 100644
index 00000000..ba00382e
Binary files /dev/null and b/fuzz/seed/regexp/hard-1 differ
diff --git a/fuzz/seed/regexp/hard-10 b/fuzz/seed/regexp/hard-10
new file mode 100644
index 00000000..7db28fa5
Binary files /dev/null and b/fuzz/seed/regexp/hard-10 differ
diff --git a/fuzz/seed/regexp/hard-2 b/fuzz/seed/regexp/hard-2
new file mode 100644
index 00000000..ed38b91b
Binary files /dev/null and b/fuzz/seed/regexp/hard-2 differ
diff --git a/fuzz/seed/regexp/hard-3 b/fuzz/seed/regexp/hard-3
new file mode 100644
index 00000000..7b16da0c
Binary files /dev/null and b/fuzz/seed/regexp/hard-3 differ
diff --git a/fuzz/seed/regexp/hard-4 b/fuzz/seed/regexp/hard-4
new file mode 100644
index 00000000..2ece886a
Binary files /dev/null and b/fuzz/seed/regexp/hard-4 differ
diff --git a/fuzz/seed/regexp/hard-5 b/fuzz/seed/regexp/hard-5
new file mode 100644
index 00000000..870a3ec5
Binary files /dev/null and b/fuzz/seed/regexp/hard-5 differ
diff --git a/fuzz/seed/regexp/hard-6 b/fuzz/seed/regexp/hard-6
new file mode 100644
index 00000000..06aa7d0d
Binary files /dev/null and b/fuzz/seed/regexp/hard-6 differ
diff --git a/fuzz/seed/regexp/hard-7 b/fuzz/seed/regexp/hard-7
new file mode 100644
index 00000000..50a9ec39
Binary files /dev/null and b/fuzz/seed/regexp/hard-7 differ
diff --git a/fuzz/seed/regexp/hard-8 b/fuzz/seed/regexp/hard-8
new file mode 100644
index 00000000..0991129f
Binary files /dev/null and b/fuzz/seed/regexp/hard-8 differ
diff --git a/fuzz/seed/regexp/hard-9 b/fuzz/seed/regexp/hard-9
new file mode 100644
index 00000000..5bd1d890
Binary files /dev/null and b/fuzz/seed/regexp/hard-9 differ
diff --git a/fuzz/seed/regexp/ncname-1 b/fuzz/seed/regexp/ncname-1
new file mode 100644
index 00000000..608eb9a9
Binary files /dev/null and b/fuzz/seed/regexp/ncname-1 differ
diff --git a/fuzz/seed/regexp/ncname-2 b/fuzz/seed/regexp/ncname-2
new file mode 100644
index 00000000..cfb9b960
Binary files /dev/null and b/fuzz/seed/regexp/ncname-2 differ
diff --git a/fuzz/seed/regexp/ncname-3 b/fuzz/seed/regexp/ncname-3
new file mode 100644
index 00000000..07a6a081
Binary files /dev/null and b/fuzz/seed/regexp/ncname-3 differ
diff --git a/fuzz/seed/regexp/ncname-4 b/fuzz/seed/regexp/ncname-4
new file mode 100644
index 00000000..87e937f4
Binary files /dev/null and b/fuzz/seed/regexp/ncname-4 differ
diff --git a/fuzz/seed/regexp/ncname-5 b/fuzz/seed/regexp/ncname-5
new file mode 100644
index 00000000..ad294560
Binary files /dev/null and b/fuzz/seed/regexp/ncname-5 differ
diff --git a/fuzz/seed/regexp/ranges-1 b/fuzz/seed/regexp/ranges-1
new file mode 100644
index 00000000..71448f23
Binary files /dev/null and b/fuzz/seed/regexp/ranges-1 differ
diff --git a/fuzz/seed/regexp/ranges-10 b/fuzz/seed/regexp/ranges-10
new file mode 100644
index 00000000..91aed3cf
Binary files /dev/null and b/fuzz/seed/regexp/ranges-10 differ
diff --git a/fuzz/seed/regexp/ranges-11 b/fuzz/seed/regexp/ranges-11
new file mode 100644
index 00000000..76eb5deb
Binary files /dev/null and b/fuzz/seed/regexp/ranges-11 differ
diff --git a/fuzz/seed/regexp/ranges-12 b/fuzz/seed/regexp/ranges-12
new file mode 100644
index 00000000..9c3bc663
Binary files /dev/null and b/fuzz/seed/regexp/ranges-12 differ
diff --git a/fuzz/seed/regexp/ranges-2 b/fuzz/seed/regexp/ranges-2
new file mode 100644
index 00000000..9369f7a5
Binary files /dev/null and b/fuzz/seed/regexp/ranges-2 differ
diff --git a/fuzz/seed/regexp/ranges-3 b/fuzz/seed/regexp/ranges-3
new file mode 100644
index 00000000..58a3a081
Binary files /dev/null and b/fuzz/seed/regexp/ranges-3 differ
diff --git a/fuzz/seed/regexp/ranges-4 b/fuzz/seed/regexp/ranges-4
new file mode 100644
index 00000000..da7e9dab
Binary files /dev/null and b/fuzz/seed/regexp/ranges-4 differ
diff --git a/fuzz/seed/regexp/ranges-5 b/fuzz/seed/regexp/ranges-5
new file mode 100644
index 00000000..83ad4a82
Binary files /dev/null and b/fuzz/seed/regexp/ranges-5 differ
diff --git a/fuzz/seed/regexp/ranges-6 b/fuzz/seed/regexp/ranges-6
new file mode 100644
index 00000000..3bc9758f
Binary files /dev/null and b/fuzz/seed/regexp/ranges-6 differ
diff --git a/fuzz/seed/regexp/ranges-7 b/fuzz/seed/regexp/ranges-7
new file mode 100644
index 00000000..fa890384
Binary files /dev/null and b/fuzz/seed/regexp/ranges-7 differ
diff --git a/fuzz/seed/regexp/ranges-8 b/fuzz/seed/regexp/ranges-8
new file mode 100644
index 00000000..96f0bb69
Binary files /dev/null and b/fuzz/seed/regexp/ranges-8 differ
diff --git a/fuzz/seed/regexp/ranges-9 b/fuzz/seed/regexp/ranges-9
new file mode 100644
index 00000000..8e3fc43a
Binary files /dev/null and b/fuzz/seed/regexp/ranges-9 differ
diff --git a/fuzz/seed/regexp/ranges2-1 b/fuzz/seed/regexp/ranges2-1
new file mode 100644
index 00000000..044a8eb9
Binary files /dev/null and b/fuzz/seed/regexp/ranges2-1 differ
diff --git a/fuzz/seed/regexp/ranges2-10 b/fuzz/seed/regexp/ranges2-10
new file mode 100644
index 00000000..19e2aa2d
Binary files /dev/null and b/fuzz/seed/regexp/ranges2-10 differ
diff --git a/fuzz/seed/regexp/ranges2-11 b/fuzz/seed/regexp/ranges2-11
new file mode 100644
index 00000000..89be181d
Binary files /dev/null and b/fuzz/seed/regexp/ranges2-11 differ
diff --git a/fuzz/seed/regexp/ranges2-12 b/fuzz/seed/regexp/ranges2-12
new file mode 100644
index 00000000..42ebdd31
Binary files /dev/null and b/fuzz/seed/regexp/ranges2-12 differ
diff --git a/fuzz/seed/regexp/ranges2-2 b/fuzz/seed/regexp/ranges2-2
new file mode 100644
index 00000000..026f7b84
Binary files /dev/null and b/fuzz/seed/regexp/ranges2-2 differ
diff --git a/fuzz/seed/regexp/ranges2-3 b/fuzz/seed/regexp/ranges2-3
new file mode 100644
index 00000000..83e78a9c
Binary files /dev/null and b/fuzz/seed/regexp/ranges2-3 differ
diff --git a/fuzz/seed/regexp/ranges2-4 b/fuzz/seed/regexp/ranges2-4
new file mode 100644
index 00000000..847b4e84
Binary files /dev/null and b/fuzz/seed/regexp/ranges2-4 differ
diff --git a/fuzz/seed/regexp/ranges2-5 b/fuzz/seed/regexp/ranges2-5
new file mode 100644
index 00000000..349168d3
Binary files /dev/null and b/fuzz/seed/regexp/ranges2-5 differ
diff --git a/fuzz/seed/regexp/ranges2-6 b/fuzz/seed/regexp/ranges2-6
new file mode 100644
index 00000000..5d2a4076
Binary files /dev/null and b/fuzz/seed/regexp/ranges2-6 differ
diff --git a/fuzz/seed/regexp/ranges2-7 b/fuzz/seed/regexp/ranges2-7
new file mode 100644
index 00000000..74fbafb4
Binary files /dev/null and b/fuzz/seed/regexp/ranges2-7 differ
diff --git a/fuzz/seed/regexp/ranges2-8 b/fuzz/seed/regexp/ranges2-8
new file mode 100644
index 00000000..125bfa91
Binary files /dev/null and b/fuzz/seed/regexp/ranges2-8 differ
diff --git a/fuzz/seed/regexp/ranges2-9 b/fuzz/seed/regexp/ranges2-9
new file mode 100644
index 00000000..f2cf1288
Binary files /dev/null and b/fuzz/seed/regexp/ranges2-9 differ
diff --git a/fuzz/seed/regexp/xpath-1 b/fuzz/seed/regexp/xpath-1
new file mode 100644
index 00000000..3bc17926
Binary files /dev/null and b/fuzz/seed/regexp/xpath-1 differ
diff --git a/fuzz/seed/regexp/xpath-10 b/fuzz/seed/regexp/xpath-10
new file mode 100644
index 00000000..e4f4b0cd
Binary files /dev/null and b/fuzz/seed/regexp/xpath-10 differ
diff --git a/fuzz/seed/regexp/xpath-11 b/fuzz/seed/regexp/xpath-11
new file mode 100644
index 00000000..318e0ccf
Binary files /dev/null and b/fuzz/seed/regexp/xpath-11 differ
diff --git a/fuzz/seed/regexp/xpath-12 b/fuzz/seed/regexp/xpath-12
new file mode 100644
index 00000000..f204295b
Binary files /dev/null and b/fuzz/seed/regexp/xpath-12 differ
diff --git a/fuzz/seed/regexp/xpath-13 b/fuzz/seed/regexp/xpath-13
new file mode 100644
index 00000000..70fccd59
Binary files /dev/null and b/fuzz/seed/regexp/xpath-13 differ
diff --git a/fuzz/seed/regexp/xpath-14 b/fuzz/seed/regexp/xpath-14
new file mode 100644
index 00000000..357ce2b5
Binary files /dev/null and b/fuzz/seed/regexp/xpath-14 differ
diff --git a/fuzz/seed/regexp/xpath-15 b/fuzz/seed/regexp/xpath-15
new file mode 100644
index 00000000..2a10a837
Binary files /dev/null and b/fuzz/seed/regexp/xpath-15 differ
diff --git a/fuzz/seed/regexp/xpath-16 b/fuzz/seed/regexp/xpath-16
new file mode 100644
index 00000000..1f3089fb
Binary files /dev/null and b/fuzz/seed/regexp/xpath-16 differ
diff --git a/fuzz/seed/regexp/xpath-17 b/fuzz/seed/regexp/xpath-17
new file mode 100644
index 00000000..a9d542fb
Binary files /dev/null and b/fuzz/seed/regexp/xpath-17 differ
diff --git a/fuzz/seed/regexp/xpath-18 b/fuzz/seed/regexp/xpath-18
new file mode 100644
index 00000000..651eb9d4
Binary files /dev/null and b/fuzz/seed/regexp/xpath-18 differ
diff --git a/fuzz/seed/regexp/xpath-19 b/fuzz/seed/regexp/xpath-19
new file mode 100644
index 00000000..fefea8f1
Binary files /dev/null and b/fuzz/seed/regexp/xpath-19 differ
diff --git a/fuzz/seed/regexp/xpath-2 b/fuzz/seed/regexp/xpath-2
new file mode 100644
index 00000000..81e5fba0
Binary files /dev/null and b/fuzz/seed/regexp/xpath-2 differ
diff --git a/fuzz/seed/regexp/xpath-20 b/fuzz/seed/regexp/xpath-20
new file mode 100644
index 00000000..1f3089fb
Binary files /dev/null and b/fuzz/seed/regexp/xpath-20 differ
diff --git a/fuzz/seed/regexp/xpath-21 b/fuzz/seed/regexp/xpath-21
new file mode 100644
index 00000000..706a7025
Binary files /dev/null and b/fuzz/seed/regexp/xpath-21 differ
diff --git a/fuzz/seed/regexp/xpath-22 b/fuzz/seed/regexp/xpath-22
new file mode 100644
index 00000000..a246f84c
Binary files /dev/null and b/fuzz/seed/regexp/xpath-22 differ
diff --git a/fuzz/seed/regexp/xpath-23 b/fuzz/seed/regexp/xpath-23
new file mode 100644
index 00000000..02753beb
Binary files /dev/null and b/fuzz/seed/regexp/xpath-23 differ
diff --git a/fuzz/seed/regexp/xpath-24 b/fuzz/seed/regexp/xpath-24
new file mode 100644
index 00000000..331105cd
Binary files /dev/null and b/fuzz/seed/regexp/xpath-24 differ
diff --git a/fuzz/seed/regexp/xpath-25 b/fuzz/seed/regexp/xpath-25
new file mode 100644
index 00000000..ce3da443
Binary files /dev/null and b/fuzz/seed/regexp/xpath-25 differ
diff --git a/fuzz/seed/regexp/xpath-26 b/fuzz/seed/regexp/xpath-26
new file mode 100644
index 00000000..b3bf8c23
Binary files /dev/null and b/fuzz/seed/regexp/xpath-26 differ
diff --git a/fuzz/seed/regexp/xpath-27 b/fuzz/seed/regexp/xpath-27
new file mode 100644
index 00000000..74bbe468
Binary files /dev/null and b/fuzz/seed/regexp/xpath-27 differ
diff --git a/fuzz/seed/regexp/xpath-28 b/fuzz/seed/regexp/xpath-28
new file mode 100644
index 00000000..b38a709e
Binary files /dev/null and b/fuzz/seed/regexp/xpath-28 differ
diff --git a/fuzz/seed/regexp/xpath-29 b/fuzz/seed/regexp/xpath-29
new file mode 100644
index 00000000..104d4e54
Binary files /dev/null and b/fuzz/seed/regexp/xpath-29 differ
diff --git a/fuzz/seed/regexp/xpath-3 b/fuzz/seed/regexp/xpath-3
new file mode 100644
index 00000000..6d7be85f
Binary files /dev/null and b/fuzz/seed/regexp/xpath-3 differ
diff --git a/fuzz/seed/regexp/xpath-30 b/fuzz/seed/regexp/xpath-30
new file mode 100644
index 00000000..b681ff14
Binary files /dev/null and b/fuzz/seed/regexp/xpath-30 differ
diff --git a/fuzz/seed/regexp/xpath-31 b/fuzz/seed/regexp/xpath-31
new file mode 100644
index 00000000..cd87b0e8
Binary files /dev/null and b/fuzz/seed/regexp/xpath-31 differ
diff --git a/fuzz/seed/regexp/xpath-32 b/fuzz/seed/regexp/xpath-32
new file mode 100644
index 00000000..c5cac32a
Binary files /dev/null and b/fuzz/seed/regexp/xpath-32 differ
diff --git a/fuzz/seed/regexp/xpath-33 b/fuzz/seed/regexp/xpath-33
new file mode 100644
index 00000000..89e3fcdc
Binary files /dev/null and b/fuzz/seed/regexp/xpath-33 differ
diff --git a/fuzz/seed/regexp/xpath-34 b/fuzz/seed/regexp/xpath-34
new file mode 100644
index 00000000..b65a3d6f
Binary files /dev/null and b/fuzz/seed/regexp/xpath-34 differ
diff --git a/fuzz/seed/regexp/xpath-35 b/fuzz/seed/regexp/xpath-35
new file mode 100644
index 00000000..252a70c2
Binary files /dev/null and b/fuzz/seed/regexp/xpath-35 differ
diff --git a/fuzz/seed/regexp/xpath-4 b/fuzz/seed/regexp/xpath-4
new file mode 100644
index 00000000..30718c57
Binary files /dev/null and b/fuzz/seed/regexp/xpath-4 differ
diff --git a/fuzz/seed/regexp/xpath-5 b/fuzz/seed/regexp/xpath-5
new file mode 100644
index 00000000..06ad88ef
Binary files /dev/null and b/fuzz/seed/regexp/xpath-5 differ
diff --git a/fuzz/seed/regexp/xpath-6 b/fuzz/seed/regexp/xpath-6
new file mode 100644
index 00000000..66787728
Binary files /dev/null and b/fuzz/seed/regexp/xpath-6 differ
diff --git a/fuzz/seed/regexp/xpath-7 b/fuzz/seed/regexp/xpath-7
new file mode 100644
index 00000000..e69ad856
Binary files /dev/null and b/fuzz/seed/regexp/xpath-7 differ
diff --git a/fuzz/seed/regexp/xpath-8 b/fuzz/seed/regexp/xpath-8
new file mode 100644
index 00000000..a8120ccd
Binary files /dev/null and b/fuzz/seed/regexp/xpath-8 differ
diff --git a/fuzz/seed/regexp/xpath-9 b/fuzz/seed/regexp/xpath-9
new file mode 100644
index 00000000..c037ce7f
Binary files /dev/null and b/fuzz/seed/regexp/xpath-9 differ
diff --git a/fuzz/seed/uri/dot b/fuzz/seed/uri/dot
new file mode 100644
index 00000000..945c9b46
--- /dev/null
+++ b/fuzz/seed/uri/dot
@@ -0,0 +1 @@
+.
\ No newline at end of file
diff --git a/fuzz/seed/uri/full b/fuzz/seed/uri/full
new file mode 100644
index 00000000..808e58a1
Binary files /dev/null and b/fuzz/seed/uri/full differ
diff --git a/fuzz/testFuzzer.c b/fuzz/testFuzzer.c
new file mode 100644
index 00000000..f6be7b8f
--- /dev/null
+++ b/fuzz/testFuzzer.c
@@ -0,0 +1,55 @@
+/*
+ * testFuzzer.c: Test program for the custom entity loader used to fuzz
+ * with multiple inputs.
+ *
+ * See Copyright for the status of this software.
+ */
+
+#include <string.h>
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+#include <libxml/xmlstring.h>
+#include "fuzz.h"
+
+int
+main() {
+    static const char data[] =
+        "doc.xml\\\n"
+        "<!DOCTYPE doc SYSTEM \"doc.dtd\">\n"
+        "<doc>&ent;</doc>\\\n"
+        "doc.dtd\\\n"
+        "<!ELEMENT doc (#PCDATA)>\n"
+        "<!ENTITY ent SYSTEM \"ent.txt\">\\\n"
+        "ent.txt\\\n"
+        "Hello, world!\\\n";
+    static xmlChar expected[] =
+        "<?xml version=\"1.0\"?>\n"
+        "<!DOCTYPE doc SYSTEM \"doc.dtd\">\n"
+        "<doc>Hello, world!</doc>\n";
+    const char *docBuffer;
+    size_t docSize;
+    xmlDocPtr doc;
+    xmlChar *out;
+    int ret = 0;
+
+    xmlSetExternalEntityLoader(xmlFuzzEntityLoader);
+
+    xmlFuzzDataInit(data, sizeof(data) - 1);
+    xmlFuzzReadEntities();
+    docBuffer = xmlFuzzMainEntity(&docSize);
+    doc = xmlReadMemory(docBuffer, docSize, NULL, NULL,
+                        XML_PARSE_NOENT | XML_PARSE_DTDLOAD);
+
+    xmlDocDumpMemory(doc, &out, NULL);
+    if (xmlStrcmp(out, expected) != 0) {
+        fprintf(stderr, "Expected:\n%sGot:\n%s", expected, out);
+        ret = 1;
+    }
+
+    xmlFree(out);
+    xmlFreeDoc(doc);
+    xmlFuzzDataCleanup();
+
+    return(ret);
+}
+
diff --git a/fuzz/uri.c b/fuzz/uri.c
new file mode 100644
index 00000000..69d0439f
--- /dev/null
+++ b/fuzz/uri.c
@@ -0,0 +1,45 @@
+/*
+ * uri.c: a libFuzzer target to test the URI module.
+ *
+ * See Copyright for the status of this software.
+ */
+
+#include <libxml/uri.h>
+#include "fuzz.h"
+
+int
+LLVMFuzzerTestOneInput(const char *data, size_t size) {
+    xmlURIPtr uri;
+    char *str[2] = { NULL, NULL };
+    size_t numStrings;
+
+    numStrings = xmlFuzzExtractStrings(data, size, str, 2);
+
+    uri = xmlParseURI(str[0]);
+    xmlFree(xmlSaveUri(uri));
+    xmlFreeURI(uri);
+
+    uri = xmlParseURIRaw(str[0], 1);
+    xmlFree(xmlSaveUri(uri));
+    xmlFreeURI(uri);
+
+    xmlFree(xmlURIUnescapeString(str[0], -1, NULL));
+    xmlFree(xmlURIEscape(BAD_CAST str[0]));
+    xmlFree(xmlCanonicPath(BAD_CAST str[0]));
+    xmlFree(xmlPathToURI(BAD_CAST str[0]));
+
+    if (numStrings >= 2) {
+        xmlFree(xmlBuildURI(BAD_CAST str[1], BAD_CAST str[0]));
+        xmlFree(xmlBuildRelativeURI(BAD_CAST str[1], BAD_CAST str[0]));
+        xmlFree(xmlURIEscapeStr(BAD_CAST str[0], BAD_CAST str[1]));
+    }
+
+    /* Modifies string, so must come last. */
+    xmlNormalizeURIPath(str[0]);
+
+    xmlFree(str[0]);
+    xmlFree(str[1]);
+
+    return 0;
+}
+
diff --git a/fuzz/xml.c b/fuzz/xml.c
new file mode 100644
index 00000000..50dd967d
--- /dev/null
+++ b/fuzz/xml.c
@@ -0,0 +1,90 @@
+/*
+ * xml.c: a libFuzzer target to test several XML parser interfaces.
+ *
+ * See Copyright for the status of this software.
+ */
+
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+#include <libxml/xmlerror.h>
+#include <libxml/xmlreader.h>
+#include "fuzz.h"
+
+int
+LLVMFuzzerInitialize(int *argc ATTRIBUTE_UNUSED,
+                     char ***argv ATTRIBUTE_UNUSED) {
+    xmlInitParser();
+    xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc);
+    xmlSetExternalEntityLoader(xmlFuzzEntityLoader);
+
+    return 0;
+}
+
+int
+LLVMFuzzerTestOneInput(const char *data, size_t size) {
+    static const size_t maxChunkSize = 128;
+    xmlDocPtr doc;
+    xmlParserCtxtPtr ctxt;
+    xmlTextReaderPtr reader;
+    xmlChar *out;
+    const char *docBuffer;
+    size_t docSize, consumed, chunkSize;
+    int opts, outSize;
+
+    xmlFuzzDataInit(data, size);
+    opts = xmlFuzzReadInt();
+    /* XML_PARSE_HUGE still causes timeouts. */
+    opts &= ~XML_PARSE_HUGE;
+
+    xmlFuzzReadEntities();
+    docBuffer = xmlFuzzMainEntity(&docSize);
+    if (docBuffer == NULL) {
+        xmlFuzzDataCleanup();
+        return(0);
+    }
+
+    /* Pull parser */
+
+    doc = xmlReadMemory(docBuffer, docSize, NULL, NULL, opts);
+    /* Also test the serializer. */
+    xmlDocDumpMemory(doc, &out, &outSize);
+    xmlFree(out);
+    xmlFreeDoc(doc);
+
+    /* Push parser */
+
+    ctxt = xmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL);
+    xmlCtxtUseOptions(ctxt, opts);
+
+    for (consumed = 0; consumed < docSize; consumed += chunkSize) {
+        chunkSize = docSize - consumed;
+        if (chunkSize > maxChunkSize)
+            chunkSize = maxChunkSize;
+        xmlParseChunk(ctxt, docBuffer + consumed, chunkSize, 0);
+    }
+
+    xmlParseChunk(ctxt, NULL, 0, 1);
+    xmlFreeDoc(ctxt->myDoc);
+    xmlFreeParserCtxt(ctxt);
+
+    /* Reader */
+
+    reader = xmlReaderForMemory(docBuffer, docSize, NULL, NULL, opts);
+    while (xmlTextReaderRead(reader) == 1) {
+        if (xmlTextReaderNodeType(reader) == XML_ELEMENT_NODE) {
+            int i, n = xmlTextReaderAttributeCount(reader);
+            for (i=0; i<n; i++) {
+                xmlTextReaderMoveToAttributeNo(reader, i);
+                while (xmlTextReaderReadAttributeValue(reader) == 1);
+            }
+        }
+    }
+    xmlFreeTextReader(reader);
+
+    /* Cleanup */
+
+    xmlFuzzDataCleanup();
+
+    return(0);
+}
+
diff --git a/fuzz/xml.dict b/fuzz/xml.dict
new file mode 100644
index 00000000..2573aea6
--- /dev/null
+++ b/fuzz/xml.dict
@@ -0,0 +1,76 @@
+xml_decl="<?xml version='1.0'?>"
+xml_decl_latin1="<?xml version='1.0' encoding='ISO-8859-1'?>"
+
+elem_start_end="<a></a>"
+elem_empty="<a/>"
+elem_ns_start_end="<a:a xmlns:a='a'></a:a>"
+elem_ns_empty="<a:a xmlns:a='a'/>"
+
+attr=" a='a'"
+
+ns_decl=" xmlns:a='a'"
+ns_default=" xmlns='a'"
+ns_prefix="a:"
+
+cdata_section="<![CDATA[ ]]>"
+
+comment="<!-- -->"
+
+pi="<?a?>"
+
+elem_decl_any="<!ELEMENT a ANY>"
+elem_decl_empty="<!ELEMENT a EMPTY>"
+elem_decl_children="<!ELEMENT a (a)>"
+elem_decl_mixed="<!ELEMENT a (#PCDATA|a)>"
+elem_children_choice="|a"
+elem_children_seq=",a"
+elem_children_sub_choice="|(a)"
+elem_children_sub_seq=",(a)"
+elem_quant_any="*"
+elem_quant_opt="?"
+elem_quant_some="+"
+
+attlist_decl_cdata_req="<!ATTLIST a a CDATA #REQUIRED>"
+attlist_decl_cdata_imp="<!ATTLIST a a CDATA #IMPLIED>"
+attlist_decl_cdata_def="<!ATTLIST a a CDATA 'a'>"
+attlist_decl_cdata_fix="<!ATTLIST a a CDATA #FIXED 'a'>"
+attlist_decl_id="<!ATTLIST a a ID #IMPLIED>"
+attlist_decl_idref="<!ATTLIST a a IDREF #IMPLIED>"
+attlist_decl_idrefs="<!ATTLIST a a IDREFS #IMPLIED>"
+attlist_decl_entity="<!ATTLIST a a ENTITY #IMPLIED>"
+attlist_decl_entities="<!ATTLIST a a ENTITIES #IMPLIED>"
+attlist_decl_nmtoken="<!ATTLIST a a NMTOKEN #IMPLIED>"
+attlist_decl_nmtokens="<!ATTLIST a a NMTOKENS #IMPLIED>"
+attlist_decl_enum="<!ATTLIST a a (a) #IMPLIED>"
+attlist_decl_notation="<!ATTLIST a a NOTATION (a) #IMPLIED>"
+
+include_sect="<![INCLUDE[ ]]>"
+ignore_sect="<![IGNORE[ ]]>"
+
+ge_decl="<!ENTITY a 'a'>"
+ge_decl_system="<!ENTITY a SYSTEM 'a'>"
+ge_decl_system_ndata="<!ENTITY a SYSTEM 'a' NDATA a>"
+ge_decl_public="<!ENTITY a PUBLIC 'a' 'a'>"
+ge_decl_public_ndata="<!ENTITY a PUBLIC 'a' 'a' NDATA a>"
+
+pe_decl="<!ENTITY % a 'a'>"
+pe_decl_system="<!ENTITY % a SYSTEM 'a'>"
+pe_decl_public="<!ENTITY % a PUBLIC 'a' 'a'>"
+
+char_ref_dec="&#60;"
+char_ref_hex="&#x3c;"
+char_ref_quoted="&#38;#60;"
+
+ge_ref_lt="&lt;"
+ge_ref_gt="&gt;"
+ge_ref_amp="&amp;"
+ge_ref_apos="&apos;"
+ge_ref_quot="&quot;"
+ge_ref="&a;"
+ge_ref_quoted="&#38;a;"
+
+pe_ref="%a;"
+pe_ref_quoted="&#37;a;"
+
+notation_decl_public="<!NOTATION a PUBLIC 'a'>"
+notation_decl_system="<!NOTATION a SYSTEM 'a'>"
diff --git a/fuzz/xmlSeed.c b/fuzz/xmlSeed.c
new file mode 100644
index 00000000..e1afcc63
--- /dev/null
+++ b/fuzz/xmlSeed.c
@@ -0,0 +1,94 @@
+/*
+ * xmlSeed.c: Generate the XML seed corpus for fuzzing.
+ *
+ * See Copyright for the status of this software.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <libxml/hash.h>
+#include <libxml/parser.h>
+#include <libxml/parserInternals.h>
+#include <libxml/xmlIO.h>
+#include <libxml/xmlerror.h>
+#include "fuzz.h"
+
+static xmlHashTablePtr entities;
+
+static void
+errorFunc(void *ctx ATTRIBUTE_UNUSED, const char *msg ATTRIBUTE_UNUSED, ...) {
+    /* Discard error messages. */
+}
+
+/*
+ * Write a random-length string in a format similar to FuzzedDataProvider.
+ * Backslash followed by newline marks the end of the string. Two
+ * backslashes are used to escape a backslash.
+ */
+static void
+writeEscaped(const char *str) {
+    for (; *str; str++) {
+        int c = (unsigned char) *str;
+        putchar(c);
+        if (c == '\\')
+            putchar(c);
+    }
+    putchar('\\');
+    putchar('\n');
+}
+
+/*
+ * A custom entity loader that writes all external DTDs or entities to a
+ * single file in the format expected by xmlFuzzEntityLoader.
+ */
+static xmlParserInputPtr
+entityLoader(const char *URL, const char *ID, xmlParserCtxtPtr context) {
+    xmlParserInputPtr in;
+    static const int chunkSize = 16384;
+    int len;
+
+    in = xmlNoNetExternalEntityLoader(URL, ID, context);
+    if (in == NULL)
+        return(NULL);
+
+    if (xmlHashLookup(entities, (const xmlChar *) URL) != NULL)
+        return(in);
+
+    do {
+        len = xmlParserInputBufferGrow(in->buf, chunkSize);
+        if (len < 0) {
+            fprintf(stderr, "Error reading %s\n", URL);
+            xmlFreeInputStream(in);
+            return(NULL);
+        }
+    } while (len > 0);
+
+    writeEscaped(URL);
+    writeEscaped((char *) xmlBufContent(in->buf->buffer));
+
+    xmlFreeInputStream(in);
+
+    xmlHashAddEntry(entities, (const xmlChar *) URL, "seen");
+
+    return(xmlNoNetExternalEntityLoader(URL, ID, context));
+}
+
+int
+main(int argc, char **argv) {
+    int opts = XML_PARSE_NOENT | XML_PARSE_DTDLOAD;
+
+    if (argc != 2) {
+        fprintf(stderr, "Usage: xmlSeed [FILE]\n");
+    }
+
+    fwrite(&opts, sizeof(opts), 1, stdout);
+
+    entities = xmlHashCreate(4);
+    xmlSetGenericErrorFunc(NULL, errorFunc);
+    xmlSetExternalEntityLoader(entityLoader);
+    xmlFreeDoc(xmlReadFile(argv[1], NULL, opts));
+    xmlHashFree(entities, NULL);
+
+    return(0);
+}
+


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]