[libxml2] Avoid potential integer overflow in xmlstring.c



commit 6010a5369f55dab2b724045fc9b68cb14313da35
Author: Nick Wellnhofer <wellnhofer aevum de>
Date:   Fri Jan 28 16:27:12 2022 +0100

    Avoid potential integer overflow in xmlstring.c
    
    For historical reasons, the string API operates with int indices which
    can overflow, especially on 64-bit systems. libxml2 always made the
    tacit assumption that strings will be never larger than INT_MAX bytes.
    It should be considered a bug if any part of the code can produce
    larger strings, whether they are externally visible or not.
    
    Likewise, API users are expected not to supply strings larger than
    INT_MAX bytes. This requirement isn't documented. But even if it was,
    we must handle larger strings passed in by accident without causing
    memory errors.
    
    - xmlStrndup, xmlCharStrndup, xmlUTF8Strndup
      Avoid integer overflow if len == INT_MAX.
    
    - xmlStrlen, xmlUTF8Strsize, xmlUTF8Strloc
      Avoid integer overflow by using size_t for index. If an input string
      larger than INT_MAX bytes is detected, these functions now return 0
      instead of a wrong and possibly negative value.
    
    - xmlCheckUTF8
      Avoid integer overflow by limiting index range.
    
    - xmlStrncat, xmlStrncatNew, xmlEscapeFormatString
      Avoid integer overflow. Return NULL instead of producing strings
      larger than INT_MAX bytes.

 xmlstring.c | 61 +++++++++++++++++++++++++++++++++----------------------------
 1 file changed, 33 insertions(+), 28 deletions(-)
---
diff --git a/xmlstring.c b/xmlstring.c
index 3f5a5a2c..5a6875f5 100644
--- a/xmlstring.c
+++ b/xmlstring.c
@@ -18,6 +18,7 @@
 
 #include <stdlib.h>
 #include <string.h>
+#include <limits.h>
 #include <libxml/xmlmemory.h>
 #include <libxml/parserInternals.h>
 #include <libxml/xmlstring.h>
@@ -42,7 +43,7 @@ xmlStrndup(const xmlChar *cur, int len) {
     xmlChar *ret;
 
     if ((cur == NULL) || (len < 0)) return(NULL);
-    ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
+    ret = (xmlChar *) xmlMallocAtomic(((size_t) len + 1) * sizeof(xmlChar));
     if (ret == NULL) {
         xmlErrMemory(NULL, NULL);
         return(NULL);
@@ -87,7 +88,7 @@ xmlCharStrndup(const char *cur, int len) {
     xmlChar *ret;
 
     if ((cur == NULL) || (len < 0)) return(NULL);
-    ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
+    ret = (xmlChar *) xmlMallocAtomic(((size_t) len + 1) * sizeof(xmlChar));
     if (ret == NULL) {
         xmlErrMemory(NULL, NULL);
         return(NULL);
@@ -423,14 +424,14 @@ xmlStrsub(const xmlChar *str, int start, int len) {
 
 int
 xmlStrlen(const xmlChar *str) {
-    int len = 0;
+    size_t len = 0;
 
     if (str == NULL) return(0);
     while (*str != 0) { /* non input consuming */
         str++;
         len++;
     }
-    return(len);
+    return(len > INT_MAX ? 0 : len);
 }
 
 /**
@@ -460,9 +461,9 @@ xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
         return(xmlStrndup(add, len));
 
     size = xmlStrlen(cur);
-    if (size < 0)
+    if ((size < 0) || (size > INT_MAX - len))
         return(NULL);
-    ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
+    ret = (xmlChar *) xmlRealloc(cur, ((size_t) size + len + 1) * sizeof(xmlChar));
     if (ret == NULL) {
         xmlErrMemory(NULL, NULL);
         return(cur);
@@ -500,9 +501,9 @@ xmlStrncatNew(const xmlChar *str1, const xmlChar *str2, int len) {
         return(xmlStrndup(str2, len));
 
     size = xmlStrlen(str1);
-    if (size < 0)
+    if ((size < 0) || (size > INT_MAX - len))
         return(NULL);
-    ret = (xmlChar *) xmlMalloc((size + len + 1) * sizeof(xmlChar));
+    ret = (xmlChar *) xmlMalloc(((size_t) size + len + 1) * sizeof(xmlChar));
     if (ret == NULL) {
         xmlErrMemory(NULL, NULL);
         return(xmlStrndup(str1, size));
@@ -667,7 +668,7 @@ xmlUTF8Charcmp(const xmlChar *utf1, const xmlChar *utf2) {
  */
 int
 xmlUTF8Strlen(const xmlChar *utf) {
-    int ret = 0;
+    size_t ret = 0;
 
     if (utf == NULL)
         return(-1);
@@ -694,7 +695,7 @@ xmlUTF8Strlen(const xmlChar *utf) {
         }
         ret++;
     }
-    return(ret);
+    return(ret > INT_MAX ? 0 : ret);
 }
 
 /**
@@ -796,26 +797,28 @@ xmlCheckUTF8(const unsigned char *utf)
      *    1110xxxx 10xxxxxx 10xxxxxx                    valid 3-byte
      *    11110xxx 10xxxxxx 10xxxxxx 10xxxxxx           valid 4-byte
      */
-    for (ix = 0; (c = utf[ix]);) {      /* string is 0-terminated */
+    while ((c = utf[0])) {      /* string is 0-terminated */
+        ix = 0;
         if ((c & 0x80) == 0x00) {      /* 1-byte code, starts with 10 */
-            ix++;
+            ix = 1;
        } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
-           if ((utf[ix+1] & 0xc0 ) != 0x80)
+           if ((utf[1] & 0xc0 ) != 0x80)
                return 0;
-           ix += 2;
+           ix = 2;
        } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
-           if (((utf[ix+1] & 0xc0) != 0x80) ||
-               ((utf[ix+2] & 0xc0) != 0x80))
+           if (((utf[1] & 0xc0) != 0x80) ||
+               ((utf[2] & 0xc0) != 0x80))
                    return 0;
-           ix += 3;
+           ix = 3;
        } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
-           if (((utf[ix+1] & 0xc0) != 0x80) ||
-               ((utf[ix+2] & 0xc0) != 0x80) ||
-               ((utf[ix+3] & 0xc0) != 0x80))
+           if (((utf[1] & 0xc0) != 0x80) ||
+               ((utf[2] & 0xc0) != 0x80) ||
+               ((utf[3] & 0xc0) != 0x80))
                    return 0;
-           ix += 4;
+           ix = 4;
        } else                          /* unknown encoding */
            return 0;
+        utf += ix;
       }
       return(1);
 }
@@ -836,6 +839,7 @@ int
 xmlUTF8Strsize(const xmlChar *utf, int len) {
     const xmlChar *ptr=utf;
     int ch;
+    size_t ret;
 
     if (utf == NULL)
         return(0);
@@ -852,7 +856,8 @@ xmlUTF8Strsize(const xmlChar *utf, int len) {
                 ptr++;
            }
     }
-    return (ptr - utf);
+    ret = ptr - utf;
+    return (ret > INT_MAX ? 0 : ret);
 }
 
 
@@ -872,11 +877,8 @@ xmlUTF8Strndup(const xmlChar *utf, int len) {
 
     if ((utf == NULL) || (len < 0)) return(NULL);
     i = xmlUTF8Strsize(utf, len);
-    ret = (xmlChar *) xmlMallocAtomic((i + 1) * sizeof(xmlChar));
+    ret = (xmlChar *) xmlMallocAtomic(((size_t) i + 1) * sizeof(xmlChar));
     if (ret == NULL) {
-        xmlGenericError(xmlGenericErrorContext,
-                "malloc of %ld byte failed\n",
-                (len + 1) * (long)sizeof(xmlChar));
         return(NULL);
     }
     memcpy(ret, utf, i * sizeof(xmlChar));
@@ -928,14 +930,15 @@ xmlUTF8Strpos(const xmlChar *utf, int pos) {
  */
 int
 xmlUTF8Strloc(const xmlChar *utf, const xmlChar *utfchar) {
-    int i, size;
+    size_t i;
+    int size;
     int ch;
 
     if (utf==NULL || utfchar==NULL) return -1;
     size = xmlUTF8Strsize(utfchar, 1);
         for(i=0; (ch=*utf) != 0; i++) {
             if (xmlStrncmp(utf, utfchar, size)==0)
-                return(i);
+                return(i > INT_MAX ? 0 : i);
             utf++;
             if ( ch & 0x80 ) {
                 /* if not simple ascii, verify proper format */
@@ -1022,6 +1025,8 @@ xmlEscapeFormatString(xmlChar **msg)
     if (count == 0)
         return(*msg);
 
+    if ((count > INT_MAX) || (msgLen > INT_MAX - count))
+        return(NULL);
     resultLen = msgLen + count + 1;
     result = (xmlChar *) xmlMallocAtomic(resultLen * sizeof(xmlChar));
     if (result == NULL) {


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]