[libxml2] Fix parse failure when 4-byte character in UTF-16 BE is split across a chunk



commit 03bb929390a5cac451f720ab581817684ecddb8e
Author: David Kilzer <ddkilzer apple com>
Date:   Wed Jul 7 18:23:18 2021 -0700

    Fix parse failure when 4-byte character in UTF-16 BE is split across a chunk
    
    This makes the logic in UTF16BEToUTF8() match UTF16LEToUTF8().
    
    * encoding.c:
    (UTF16LEToUTF8):
    - Fix comment to describe what the code does.
    (UTF16BEToUTF8):
    - Fix undefined behavior which was applied to UTF16LEToUTF8() in
      2f9382033e.
    - Add bounds check to while() loop which was applied to
      UTF16LEToUTF8() in be803967db.
    - Do not return -2 when (in >= inend) to fix the bug.  This was
      applied to UTF16LEToUTF8() in 496a1cf592.
    - Inline (<< 8) statements to match UTF16LEToUTF8().
    
    Add the following tests and results:
    
      test/text-4-byte-UTF-16-BE-offset.xml
      test/text-4-byte-UTF-16-BE.xml
      test/text-4-byte-UTF-16-LE-offset.xml
      test/text-4-byte-UTF-16-LE.xml

 encoding.c                                         |  23 +++++++++++----------
 result/noent/text-4-byte-UTF-16-BE-offset.xml      | Bin 0 -> 4244 bytes
 result/noent/text-4-byte-UTF-16-BE-offset.xml.sax2 |  21 +++++++++++++++++++
 result/noent/text-4-byte-UTF-16-BE.xml             | Bin 0 -> 4238 bytes
 result/noent/text-4-byte-UTF-16-BE.xml.sax2        |  21 +++++++++++++++++++
 result/noent/text-4-byte-UTF-16-LE-offset.xml      | Bin 0 -> 4244 bytes
 result/noent/text-4-byte-UTF-16-LE-offset.xml.sax2 |  21 +++++++++++++++++++
 result/noent/text-4-byte-UTF-16-LE.xml             | Bin 0 -> 4238 bytes
 result/noent/text-4-byte-UTF-16-LE.xml.sax2        |  21 +++++++++++++++++++
 result/text-4-byte-UTF-16-BE-offset.xml            | Bin 0 -> 4244 bytes
 result/text-4-byte-UTF-16-BE-offset.xml.rde        |   5 +++++
 result/text-4-byte-UTF-16-BE-offset.xml.rdr        |   5 +++++
 result/text-4-byte-UTF-16-BE-offset.xml.sax        |  21 +++++++++++++++++++
 result/text-4-byte-UTF-16-BE-offset.xml.sax2       |  21 +++++++++++++++++++
 result/text-4-byte-UTF-16-BE.xml                   | Bin 0 -> 4238 bytes
 result/text-4-byte-UTF-16-BE.xml.rde               |   5 +++++
 result/text-4-byte-UTF-16-BE.xml.rdr               |   5 +++++
 result/text-4-byte-UTF-16-BE.xml.sax               |  21 +++++++++++++++++++
 result/text-4-byte-UTF-16-BE.xml.sax2              |  21 +++++++++++++++++++
 result/text-4-byte-UTF-16-LE-offset.xml            | Bin 0 -> 4244 bytes
 result/text-4-byte-UTF-16-LE-offset.xml.rde        |   5 +++++
 result/text-4-byte-UTF-16-LE-offset.xml.rdr        |   5 +++++
 result/text-4-byte-UTF-16-LE-offset.xml.sax        |  21 +++++++++++++++++++
 result/text-4-byte-UTF-16-LE-offset.xml.sax2       |  21 +++++++++++++++++++
 result/text-4-byte-UTF-16-LE.xml                   | Bin 0 -> 4238 bytes
 result/text-4-byte-UTF-16-LE.xml.rde               |   5 +++++
 result/text-4-byte-UTF-16-LE.xml.rdr               |   5 +++++
 result/text-4-byte-UTF-16-LE.xml.sax               |  21 +++++++++++++++++++
 result/text-4-byte-UTF-16-LE.xml.sax2              |  21 +++++++++++++++++++
 test/text-4-byte-UTF-16-BE-offset.xml              | Bin 0 -> 4244 bytes
 test/text-4-byte-UTF-16-BE.xml                     | Bin 0 -> 4238 bytes
 test/text-4-byte-UTF-16-LE-offset.xml              | Bin 0 -> 4244 bytes
 test/text-4-byte-UTF-16-LE.xml                     | Bin 0 -> 4238 bytes
 33 files changed, 304 insertions(+), 11 deletions(-)
---
diff --git a/encoding.c b/encoding.c
index 5e50c153..5d28e4f1 100644
--- a/encoding.c
+++ b/encoding.c
@@ -527,7 +527,7 @@ UTF16LEToUTF8(unsigned char* out, int *outlen,
            in++;
        }
         if ((c & 0xFC00) == 0xD800) {    /* surrogates */
-           if (in >= inend) {           /* (in > inend) shouldn't happens */
+           if (in >= inend) {           /* handle split mutli-byte characters */
                break;
            }
            if (xmlLittleEndian) {
@@ -744,38 +744,39 @@ UTF16BEToUTF8(unsigned char* out, int *outlen,
 {
     unsigned char* outstart = out;
     const unsigned char* processed = inb;
-    unsigned char* outend = out + *outlen;
+    unsigned char* outend;
     unsigned short* in = (unsigned short*) inb;
     unsigned short* inend;
     unsigned int c, d, inlen;
     unsigned char *tmp;
     int bits;
 
+    if (*outlen == 0) {
+        *inlenb = 0;
+        return(0);
+    }
+    outend = out + *outlen;
     if ((*inlenb % 2) == 1)
         (*inlenb)--;
     inlen = *inlenb / 2;
     inend= in + inlen;
-    while (in < inend) {
+    while ((in < inend) && (out - outstart + 5 < *outlen)) {
        if (xmlLittleEndian) {
            tmp = (unsigned char *) in;
            c = *tmp++;
-           c = c << 8;
-           c = c | (unsigned int) *tmp;
+           c = (c << 8) | (unsigned int) *tmp;
            in++;
        } else {
            c= *in++;
        }
         if ((c & 0xFC00) == 0xD800) {    /* surrogates */
-           if (in >= inend) {           /* (in > inend) shouldn't happens */
-               *outlen = out - outstart;
-               *inlenb = processed - inb;
-               return(-2);
+           if (in >= inend) {           /* handle split mutli-byte characters */
+                break;
            }
            if (xmlLittleEndian) {
                tmp = (unsigned char *) in;
                d = *tmp++;
-               d = d << 8;
-               d = d | (unsigned int) *tmp;
+               d = (d << 8) | (unsigned int) *tmp;
                in++;
            } else {
                d= *in++;
diff --git a/result/noent/text-4-byte-UTF-16-BE-offset.xml b/result/noent/text-4-byte-UTF-16-BE-offset.xml
new file mode 100644
index 00000000..8a314d9a
Binary files /dev/null and b/result/noent/text-4-byte-UTF-16-BE-offset.xml differ
diff --git a/result/noent/text-4-byte-UTF-16-BE-offset.xml.sax2 
b/result/noent/text-4-byte-UTF-16-BE-offset.xml.sax2
new file mode 100644
index 00000000..835014be
--- /dev/null
+++ b/result/noent/text-4-byte-UTF-16-BE-offset.xml.sax2
@@ -0,0 +1,21 @@
+SAX.setDocumentLocator()
+SAX.startDocument()
+SAX.startElementNs(body, NULL, NULL, 0, 0, 0)
+SAX.characters(
+ , 2)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 197)
+SAX.endElementNs(body, NULL, NULL)
+SAX.endDocument()
diff --git a/result/noent/text-4-byte-UTF-16-BE.xml b/result/noent/text-4-byte-UTF-16-BE.xml
new file mode 100644
index 00000000..3f3e3ea4
Binary files /dev/null and b/result/noent/text-4-byte-UTF-16-BE.xml differ
diff --git a/result/noent/text-4-byte-UTF-16-BE.xml.sax2 b/result/noent/text-4-byte-UTF-16-BE.xml.sax2
new file mode 100644
index 00000000..e57e2798
--- /dev/null
+++ b/result/noent/text-4-byte-UTF-16-BE.xml.sax2
@@ -0,0 +1,21 @@
+SAX.setDocumentLocator()
+SAX.startDocument()
+SAX.startElementNs(body, NULL, NULL, 0, 0, 0)
+SAX.characters(
+, 1)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 193)
+SAX.endElementNs(body, NULL, NULL)
+SAX.endDocument()
diff --git a/result/noent/text-4-byte-UTF-16-LE-offset.xml b/result/noent/text-4-byte-UTF-16-LE-offset.xml
new file mode 100644
index 00000000..8a314d9a
Binary files /dev/null and b/result/noent/text-4-byte-UTF-16-LE-offset.xml differ
diff --git a/result/noent/text-4-byte-UTF-16-LE-offset.xml.sax2 
b/result/noent/text-4-byte-UTF-16-LE-offset.xml.sax2
new file mode 100644
index 00000000..835014be
--- /dev/null
+++ b/result/noent/text-4-byte-UTF-16-LE-offset.xml.sax2
@@ -0,0 +1,21 @@
+SAX.setDocumentLocator()
+SAX.startDocument()
+SAX.startElementNs(body, NULL, NULL, 0, 0, 0)
+SAX.characters(
+ , 2)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 197)
+SAX.endElementNs(body, NULL, NULL)
+SAX.endDocument()
diff --git a/result/noent/text-4-byte-UTF-16-LE.xml b/result/noent/text-4-byte-UTF-16-LE.xml
new file mode 100644
index 00000000..3f3e3ea4
Binary files /dev/null and b/result/noent/text-4-byte-UTF-16-LE.xml differ
diff --git a/result/noent/text-4-byte-UTF-16-LE.xml.sax2 b/result/noent/text-4-byte-UTF-16-LE.xml.sax2
new file mode 100644
index 00000000..e57e2798
--- /dev/null
+++ b/result/noent/text-4-byte-UTF-16-LE.xml.sax2
@@ -0,0 +1,21 @@
+SAX.setDocumentLocator()
+SAX.startDocument()
+SAX.startElementNs(body, NULL, NULL, 0, 0, 0)
+SAX.characters(
+, 1)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 193)
+SAX.endElementNs(body, NULL, NULL)
+SAX.endDocument()
diff --git a/result/text-4-byte-UTF-16-BE-offset.xml b/result/text-4-byte-UTF-16-BE-offset.xml
new file mode 100644
index 00000000..8a314d9a
Binary files /dev/null and b/result/text-4-byte-UTF-16-BE-offset.xml differ
diff --git a/result/text-4-byte-UTF-16-BE-offset.xml.rde b/result/text-4-byte-UTF-16-BE-offset.xml.rde
new file mode 100644
index 00000000..6b813330
--- /dev/null
+++ b/result/text-4-byte-UTF-16-BE-offset.xml.rde
@@ -0,0 +1,5 @@
+0 1 body 0 0
+1 3 #text 0 1 
+ 
🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓
 
🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓�
 
���🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓��
 
��🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓���
 �🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓
+
+0 15 body 0 0
diff --git a/result/text-4-byte-UTF-16-BE-offset.xml.rdr b/result/text-4-byte-UTF-16-BE-offset.xml.rdr
new file mode 100644
index 00000000..6b813330
--- /dev/null
+++ b/result/text-4-byte-UTF-16-BE-offset.xml.rdr
@@ -0,0 +1,5 @@
+0 1 body 0 0
+1 3 #text 0 1 
+ 
🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓
 
🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓�
 
���🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓��
 
��🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓���
 �🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓
+
+0 15 body 0 0
diff --git a/result/text-4-byte-UTF-16-BE-offset.xml.sax b/result/text-4-byte-UTF-16-BE-offset.xml.sax
new file mode 100644
index 00000000..4b3acbf6
--- /dev/null
+++ b/result/text-4-byte-UTF-16-BE-offset.xml.sax
@@ -0,0 +1,21 @@
+SAX.setDocumentLocator()
+SAX.startDocument()
+SAX.startElement(body)
+SAX.characters(
+ , 2)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 197)
+SAX.endElement(body)
+SAX.endDocument()
diff --git a/result/text-4-byte-UTF-16-BE-offset.xml.sax2 b/result/text-4-byte-UTF-16-BE-offset.xml.sax2
new file mode 100644
index 00000000..835014be
--- /dev/null
+++ b/result/text-4-byte-UTF-16-BE-offset.xml.sax2
@@ -0,0 +1,21 @@
+SAX.setDocumentLocator()
+SAX.startDocument()
+SAX.startElementNs(body, NULL, NULL, 0, 0, 0)
+SAX.characters(
+ , 2)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 197)
+SAX.endElementNs(body, NULL, NULL)
+SAX.endDocument()
diff --git a/result/text-4-byte-UTF-16-BE.xml b/result/text-4-byte-UTF-16-BE.xml
new file mode 100644
index 00000000..3f3e3ea4
Binary files /dev/null and b/result/text-4-byte-UTF-16-BE.xml differ
diff --git a/result/text-4-byte-UTF-16-BE.xml.rde b/result/text-4-byte-UTF-16-BE.xml.rde
new file mode 100644
index 00000000..e3c48dd0
--- /dev/null
+++ b/result/text-4-byte-UTF-16-BE.xml.rde
@@ -0,0 +1,5 @@
+0 1 body 0 0
+1 3 #text 0 1 
+🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓�
 
���🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓��
 
��🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓���
 
�🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓
 🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓
+
+0 15 body 0 0
diff --git a/result/text-4-byte-UTF-16-BE.xml.rdr b/result/text-4-byte-UTF-16-BE.xml.rdr
new file mode 100644
index 00000000..e3c48dd0
--- /dev/null
+++ b/result/text-4-byte-UTF-16-BE.xml.rdr
@@ -0,0 +1,5 @@
+0 1 body 0 0
+1 3 #text 0 1 
+🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓�
 
���🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓��
 
��🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓���
 
�🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓
 🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓
+
+0 15 body 0 0
diff --git a/result/text-4-byte-UTF-16-BE.xml.sax b/result/text-4-byte-UTF-16-BE.xml.sax
new file mode 100644
index 00000000..301e8504
--- /dev/null
+++ b/result/text-4-byte-UTF-16-BE.xml.sax
@@ -0,0 +1,21 @@
+SAX.setDocumentLocator()
+SAX.startDocument()
+SAX.startElement(body)
+SAX.characters(
+, 1)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 193)
+SAX.endElement(body)
+SAX.endDocument()
diff --git a/result/text-4-byte-UTF-16-BE.xml.sax2 b/result/text-4-byte-UTF-16-BE.xml.sax2
new file mode 100644
index 00000000..e57e2798
--- /dev/null
+++ b/result/text-4-byte-UTF-16-BE.xml.sax2
@@ -0,0 +1,21 @@
+SAX.setDocumentLocator()
+SAX.startDocument()
+SAX.startElementNs(body, NULL, NULL, 0, 0, 0)
+SAX.characters(
+, 1)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 193)
+SAX.endElementNs(body, NULL, NULL)
+SAX.endDocument()
diff --git a/result/text-4-byte-UTF-16-LE-offset.xml b/result/text-4-byte-UTF-16-LE-offset.xml
new file mode 100644
index 00000000..8a314d9a
Binary files /dev/null and b/result/text-4-byte-UTF-16-LE-offset.xml differ
diff --git a/result/text-4-byte-UTF-16-LE-offset.xml.rde b/result/text-4-byte-UTF-16-LE-offset.xml.rde
new file mode 100644
index 00000000..6b813330
--- /dev/null
+++ b/result/text-4-byte-UTF-16-LE-offset.xml.rde
@@ -0,0 +1,5 @@
+0 1 body 0 0
+1 3 #text 0 1 
+ 
🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓
 
🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓�
 
���🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓��
 
��🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓���
 �🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓
+
+0 15 body 0 0
diff --git a/result/text-4-byte-UTF-16-LE-offset.xml.rdr b/result/text-4-byte-UTF-16-LE-offset.xml.rdr
new file mode 100644
index 00000000..6b813330
--- /dev/null
+++ b/result/text-4-byte-UTF-16-LE-offset.xml.rdr
@@ -0,0 +1,5 @@
+0 1 body 0 0
+1 3 #text 0 1 
+ 
🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓
 
🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓�
 
���🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓��
 
��🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓���
 �🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓
+
+0 15 body 0 0
diff --git a/result/text-4-byte-UTF-16-LE-offset.xml.sax b/result/text-4-byte-UTF-16-LE-offset.xml.sax
new file mode 100644
index 00000000..4b3acbf6
--- /dev/null
+++ b/result/text-4-byte-UTF-16-LE-offset.xml.sax
@@ -0,0 +1,21 @@
+SAX.setDocumentLocator()
+SAX.startDocument()
+SAX.startElement(body)
+SAX.characters(
+ , 2)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 197)
+SAX.endElement(body)
+SAX.endDocument()
diff --git a/result/text-4-byte-UTF-16-LE-offset.xml.sax2 b/result/text-4-byte-UTF-16-LE-offset.xml.sax2
new file mode 100644
index 00000000..835014be
--- /dev/null
+++ b/result/text-4-byte-UTF-16-LE-offset.xml.sax2
@@ -0,0 +1,21 @@
+SAX.setDocumentLocator()
+SAX.startDocument()
+SAX.startElementNs(body, NULL, NULL, 0, 0, 0)
+SAX.characters(
+ , 2)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 197)
+SAX.endElementNs(body, NULL, NULL)
+SAX.endDocument()
diff --git a/result/text-4-byte-UTF-16-LE.xml b/result/text-4-byte-UTF-16-LE.xml
new file mode 100644
index 00000000..3f3e3ea4
Binary files /dev/null and b/result/text-4-byte-UTF-16-LE.xml differ
diff --git a/result/text-4-byte-UTF-16-LE.xml.rde b/result/text-4-byte-UTF-16-LE.xml.rde
new file mode 100644
index 00000000..e3c48dd0
--- /dev/null
+++ b/result/text-4-byte-UTF-16-LE.xml.rde
@@ -0,0 +1,5 @@
+0 1 body 0 0
+1 3 #text 0 1 
+🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓�
 
���🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓��
 
��🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓���
 
�🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓
 🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓
+
+0 15 body 0 0
diff --git a/result/text-4-byte-UTF-16-LE.xml.rdr b/result/text-4-byte-UTF-16-LE.xml.rdr
new file mode 100644
index 00000000..e3c48dd0
--- /dev/null
+++ b/result/text-4-byte-UTF-16-LE.xml.rdr
@@ -0,0 +1,5 @@
+0 1 body 0 0
+1 3 #text 0 1 
+🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓�
 
���🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓��
 
��🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓���
 
�🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓
 🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓🥓
+
+0 15 body 0 0
diff --git a/result/text-4-byte-UTF-16-LE.xml.sax b/result/text-4-byte-UTF-16-LE.xml.sax
new file mode 100644
index 00000000..301e8504
--- /dev/null
+++ b/result/text-4-byte-UTF-16-LE.xml.sax
@@ -0,0 +1,21 @@
+SAX.setDocumentLocator()
+SAX.startDocument()
+SAX.startElement(body)
+SAX.characters(
+, 1)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 193)
+SAX.endElement(body)
+SAX.endDocument()
diff --git a/result/text-4-byte-UTF-16-LE.xml.sax2 b/result/text-4-byte-UTF-16-LE.xml.sax2
new file mode 100644
index 00000000..e57e2798
--- /dev/null
+++ b/result/text-4-byte-UTF-16-LE.xml.sax2
@@ -0,0 +1,21 @@
+SAX.setDocumentLocator()
+SAX.startDocument()
+SAX.startElementNs(body, NULL, NULL, 0, 0, 0)
+SAX.characters(
+, 1)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 300)
+SAX.characters(🥓🥓🥓🥓🥓🥓🥓��, 193)
+SAX.endElementNs(body, NULL, NULL)
+SAX.endDocument()
diff --git a/test/text-4-byte-UTF-16-BE-offset.xml b/test/text-4-byte-UTF-16-BE-offset.xml
new file mode 100644
index 00000000..04f02186
Binary files /dev/null and b/test/text-4-byte-UTF-16-BE-offset.xml differ
diff --git a/test/text-4-byte-UTF-16-BE.xml b/test/text-4-byte-UTF-16-BE.xml
new file mode 100644
index 00000000..5a6405b2
Binary files /dev/null and b/test/text-4-byte-UTF-16-BE.xml differ
diff --git a/test/text-4-byte-UTF-16-LE-offset.xml b/test/text-4-byte-UTF-16-LE-offset.xml
new file mode 100644
index 00000000..8a314d9a
Binary files /dev/null and b/test/text-4-byte-UTF-16-LE-offset.xml differ
diff --git a/test/text-4-byte-UTF-16-LE.xml b/test/text-4-byte-UTF-16-LE.xml
new file mode 100644
index 00000000..3f3e3ea4
Binary files /dev/null and b/test/text-4-byte-UTF-16-LE.xml differ


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]