[xml] [PATCH] Do normalize string-based datatype value in RelaxNG facet checking



Original patch is from Jan Pokorný <jpokorny redhat com>.
Improve it according to reviews and add test files.
---
 include/libxml/schemasInternals.h | 92 +++++++++++++++++++--------------------
 result/relaxng/pattern3_1         |  0
 result/relaxng/pattern3_1.err     |  1 +
 test/relaxng/pattern3.rng         | 11 +++++
 test/relaxng/pattern3_1.xml       |  1 +
 xmlschemastypes.c                 | 10 ++++-
 6 files changed, 68 insertions(+), 47 deletions(-)
 create mode 100644 result/relaxng/pattern3_1
 create mode 100644 result/relaxng/pattern3_1.err
 create mode 100644 test/relaxng/pattern3.rng
 create mode 100644 test/relaxng/pattern3_1.xml

diff --git a/include/libxml/schemasInternals.h b/include/libxml/schemasInternals.h
index 4f0ca9a..c7cf552 100644
--- a/include/libxml/schemasInternals.h
+++ b/include/libxml/schemasInternals.h
@@ -28,52 +28,52 @@ extern "C" {
 
 typedef enum {
     XML_SCHEMAS_UNKNOWN = 0,
-    XML_SCHEMAS_STRING,
-    XML_SCHEMAS_NORMSTRING,
-    XML_SCHEMAS_DECIMAL,
-    XML_SCHEMAS_TIME,
-    XML_SCHEMAS_GDAY,
-    XML_SCHEMAS_GMONTH,
-    XML_SCHEMAS_GMONTHDAY,
-    XML_SCHEMAS_GYEAR,
-    XML_SCHEMAS_GYEARMONTH,
-    XML_SCHEMAS_DATE,
-    XML_SCHEMAS_DATETIME,
-    XML_SCHEMAS_DURATION,
-    XML_SCHEMAS_FLOAT,
-    XML_SCHEMAS_DOUBLE,
-    XML_SCHEMAS_BOOLEAN,
-    XML_SCHEMAS_TOKEN,
-    XML_SCHEMAS_LANGUAGE,
-    XML_SCHEMAS_NMTOKEN,
-    XML_SCHEMAS_NMTOKENS,
-    XML_SCHEMAS_NAME,
-    XML_SCHEMAS_QNAME,
-    XML_SCHEMAS_NCNAME,
-    XML_SCHEMAS_ID,
-    XML_SCHEMAS_IDREF,
-    XML_SCHEMAS_IDREFS,
-    XML_SCHEMAS_ENTITY,
-    XML_SCHEMAS_ENTITIES,
-    XML_SCHEMAS_NOTATION,
-    XML_SCHEMAS_ANYURI,
-    XML_SCHEMAS_INTEGER,
-    XML_SCHEMAS_NPINTEGER,
-    XML_SCHEMAS_NINTEGER,
-    XML_SCHEMAS_NNINTEGER,
-    XML_SCHEMAS_PINTEGER,
-    XML_SCHEMAS_INT,
-    XML_SCHEMAS_UINT,
-    XML_SCHEMAS_LONG,
-    XML_SCHEMAS_ULONG,
-    XML_SCHEMAS_SHORT,
-    XML_SCHEMAS_USHORT,
-    XML_SCHEMAS_BYTE,
-    XML_SCHEMAS_UBYTE,
-    XML_SCHEMAS_HEXBINARY,
-    XML_SCHEMAS_BASE64BINARY,
-    XML_SCHEMAS_ANYTYPE,
-    XML_SCHEMAS_ANYSIMPLETYPE
+    XML_SCHEMAS_STRING = 1,
+    XML_SCHEMAS_NORMSTRING = 2,
+    XML_SCHEMAS_DECIMAL = 3,
+    XML_SCHEMAS_TIME = 4,
+    XML_SCHEMAS_GDAY = 5,
+    XML_SCHEMAS_GMONTH = 6,
+    XML_SCHEMAS_GMONTHDAY = 7,
+    XML_SCHEMAS_GYEAR = 8,
+    XML_SCHEMAS_GYEARMONTH = 9,
+    XML_SCHEMAS_DATE = 10,
+    XML_SCHEMAS_DATETIME = 11,
+    XML_SCHEMAS_DURATION = 12,
+    XML_SCHEMAS_FLOAT = 13,
+    XML_SCHEMAS_DOUBLE = 14,
+    XML_SCHEMAS_BOOLEAN = 15,
+    XML_SCHEMAS_TOKEN = 16,
+    XML_SCHEMAS_LANGUAGE = 17,
+    XML_SCHEMAS_NMTOKEN = 18,
+    XML_SCHEMAS_NMTOKENS = 19,
+    XML_SCHEMAS_NAME = 20,
+    XML_SCHEMAS_QNAME = 21,
+    XML_SCHEMAS_NCNAME = 22,
+    XML_SCHEMAS_ID = 23,
+    XML_SCHEMAS_IDREF = 24,
+    XML_SCHEMAS_IDREFS = 25,
+    XML_SCHEMAS_ENTITY = 26,
+    XML_SCHEMAS_ENTITIES = 27,
+    XML_SCHEMAS_NOTATION = 28,
+    XML_SCHEMAS_ANYURI = 29,
+    XML_SCHEMAS_INTEGER = 30,
+    XML_SCHEMAS_NPINTEGER = 31,
+    XML_SCHEMAS_NINTEGER = 32,
+    XML_SCHEMAS_NNINTEGER = 33,
+    XML_SCHEMAS_PINTEGER = 34,
+    XML_SCHEMAS_INT = 35,
+    XML_SCHEMAS_UINT = 36,
+    XML_SCHEMAS_LONG = 37,
+    XML_SCHEMAS_ULONG = 38,
+    XML_SCHEMAS_SHORT = 39,
+    XML_SCHEMAS_USHORT = 40,
+    XML_SCHEMAS_BYTE = 41,
+    XML_SCHEMAS_UBYTE = 42,
+    XML_SCHEMAS_HEXBINARY = 43,
+    XML_SCHEMAS_BASE64BINARY = 44,
+    XML_SCHEMAS_ANYTYPE = 45,
+    XML_SCHEMAS_ANYSIMPLETYPE = 46
 } xmlSchemaValType;
 
 /*
diff --git a/result/relaxng/pattern3_1 b/result/relaxng/pattern3_1
new file mode 100644
index 0000000..e69de29
diff --git a/result/relaxng/pattern3_1.err b/result/relaxng/pattern3_1.err
new file mode 100644
index 0000000..b1a9803
--- /dev/null
+++ b/result/relaxng/pattern3_1.err
@@ -0,0 +1 @@
+./test/relaxng/pattern3_1.xml validates
diff --git a/test/relaxng/pattern3.rng b/test/relaxng/pattern3.rng
new file mode 100644
index 0000000..fa4434f
--- /dev/null
+++ b/test/relaxng/pattern3.rng
@@ -0,0 +1,11 @@
+<?xml version="1.0"?>
+<grammar xmlns="http://relaxng.org/ns/structure/1.0";
+datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes";>
+  <start>
+    <element name="test">
+    <data type="token">
+    <param name="pattern">[a-z]+</param>
+    </data>
+    </element>
+  </start>
+</grammar>
diff --git a/test/relaxng/pattern3_1.xml b/test/relaxng/pattern3_1.xml
new file mode 100644
index 0000000..f559cd3
--- /dev/null
+++ b/test/relaxng/pattern3_1.xml
@@ -0,0 +1 @@
+<test> ooo </test>
diff --git a/xmlschemastypes.c b/xmlschemastypes.c
index ff64f50..3c9514c 100644
--- a/xmlschemastypes.c
+++ b/xmlschemastypes.c
@@ -5315,7 +5315,15 @@ xmlSchemaValidateFacetInternal(xmlSchemaFacetPtr facet,
            */
            if (value == NULL)
                return(-1);
-           ret = xmlRegexpExec(facet->regexp, value);
+           /*
+           * If string-derived type, regexp must be tested on the value space of
+           * the datatype.
+           * See https://www.w3.org/TR/xmlschema-2/#rf-pattern
+           */
+           const int stringType = val && ((val->type >= XML_SCHEMAS_STRING && val->type <= 
XML_SCHEMAS_NORMSTRING)
+                                           || (val->type >= XML_SCHEMAS_TOKEN && val->type <= 
XML_SCHEMAS_NCNAME));
+           ret = xmlRegexpExec(facet->regexp,
+                               (stringType && val->value.str) ? val->value.str : value);
            if (ret == 1)
                return(0);
            if (ret == 0)
-- 
2.7.0



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]