[libxml2] Add support for some non-standard escapes in regular expressions.



commit ec8ff95ce3c92caaa23e70b5df80418d83abd83d
Author: Damjan Jovanovic <damjan jov gmail com>
Date:   Sat May 29 16:36:44 2021 +0200

    Add support for some non-standard escapes in regular expressions.
    
    This adds support for some non-standard escape sequences observed
    in Microsoft's MSXML DLLs and used by Windows apps, and thus
    needed by Wine. Some are also used in other XML implementations,
    eg. Java's.
    
    This isn't intended to be final. We probably wish to toggle these
    non-standard escape sequences on and off somehow, as needed by
    the caller.
    
    Further discussion: https://gitlab.gnome.org/GNOME/libxml2/-/issues/260

 xmlregexp.c | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)
---
diff --git a/xmlregexp.c b/xmlregexp.c
index f9aac42f..92bae6fb 100644
--- a/xmlregexp.c
+++ b/xmlregexp.c
@@ -4969,7 +4969,10 @@ xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
        (cur == '|') || (cur == '.') || (cur == '?') || (cur == '*') ||
        (cur == '+') || (cur == '(') || (cur == ')') || (cur == '{') ||
        (cur == '}') || (cur == 0x2D) || (cur == 0x5B) || (cur == 0x5D) ||
-       (cur == 0x5E)) {
+       (cur == 0x5E) || (cur == '!') || (cur == '"') || (cur == '#') ||
+       (cur == '$') || (cur == '%') || (cur == ',') || (cur == '/') ||
+       (cur == ':') || (cur == ';') || (cur == '=') || (cur == '>') ||
+       (cur == '@') || (cur == '`') || (cur == '~') || (cur == 'u')) {
        if (ctxt->atom == NULL) {
            ctxt->atom = xmlRegNewAtom(ctxt, XML_REGEXP_CHARVAL);
            if (ctxt->atom != NULL) {
@@ -4983,6 +4986,22 @@ xmlFAParseCharClassEsc(xmlRegParserCtxtPtr ctxt) {
                    case 't':
                        ctxt->atom->codepoint = '\t';
                        break;
+                   case 'u':
+                   {
+                       char hex_buffer[5];
+                       int loop;
+                       for (loop = 0; loop < 4; loop++) {
+                           NEXT;
+                           if (!('0' <= CUR && CUR <= '9') && !('a' <= CUR && CUR <= 'f') && !('A' <= CUR && 
CUR <= 'F')) {
+                               ERROR("Expecting hex digit");
+                               return;
+                           }
+                           hex_buffer[loop] = CUR;
+                       }
+                       hex_buffer[4] = 0;
+                       sscanf(hex_buffer, "%x", &ctxt->atom->codepoint);
+                       break;
+                   }
                    default:
                        ctxt->atom->codepoint = cur;
                }


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]