[libxml2] 492317 Fix Relax-NG validation problems



commit 1ba2aca3ebc3b47653a86849746b168a4e0bd8c6
Author: Daniel Veillard <veillard redhat com>
Date:   Mon Aug 31 16:47:39 2009 +0200

    492317 Fix  Relax-NG validation problems
    
    * relaxng.c xmlregexp.c: a subtle problem when checking for compileable
      content model, if using the same elements in cases of choices. Handled
      by adding a special flag to the regexp compilation to detect
      transitions with different atoms using same strings.
    * test/relaxng/492317* result/relaxng/492317*: add the test to the
      regression suite

 relaxng.c                   |   21 +++++++++++++
 result/relaxng/492317_0.err |    1 +
 result/relaxng/492317_1.err |    1 +
 result/relaxng/492317_2.err |    3 ++
 result/relaxng/492317_err   |    1 +
 test/relaxng/492317.rng     |   16 ++++++++++
 test/relaxng/492317_0.xml   |    4 ++
 test/relaxng/492317_1.xml   |    4 ++
 test/relaxng/492317_2.xml   |    4 ++
 xmlregexp.c                 |   68 ++++++++++++++++++++++++++++++++++++------
 10 files changed, 113 insertions(+), 10 deletions(-)
---
diff --git a/relaxng.c b/relaxng.c
index 6b83cfd..ea739de 100644
--- a/relaxng.c
+++ b/relaxng.c
@@ -2854,6 +2854,10 @@ xmlRelaxNGCleanupTypes(void)
  * 									*
  ************************************************************************/
 
+/* from automata.c but not exported */
+void xmlAutomataSetFlags(xmlAutomataPtr am, int flags);
+
+
 static int xmlRelaxNGTryCompile(xmlRelaxNGParserCtxtPtr ctxt,
                                 xmlRelaxNGDefinePtr def);
 
@@ -3037,6 +3041,17 @@ xmlRelaxNGCompile(xmlRelaxNGParserCtxtPtr ctxt, xmlRelaxNGDefinePtr def)
                 ctxt->am = xmlNewAutomata();
                 if (ctxt->am == NULL)
                     return (-1);
+
+                /*
+                 * assume identical strings but not same pointer are different
+                 * atoms, needed for non-determinism detection
+                 * That way if 2 elements with the same name are in a choice
+                 * branch the automata is found non-deterministic and
+                 * we fallback to the normal validation which does the right
+                 * thing of exploring both choices.
+                 */
+                xmlAutomataSetFlags(ctxt->am, 1);
+
                 ctxt->state = xmlAutomataGetInitState(ctxt->am);
                 while (list != NULL) {
                     xmlRelaxNGCompile(ctxt, list);
@@ -3068,6 +3083,7 @@ xmlRelaxNGCompile(xmlRelaxNGParserCtxtPtr ctxt, xmlRelaxNGDefinePtr def)
                 ctxt->am = xmlNewAutomata();
                 if (ctxt->am == NULL)
                     return (-1);
+                xmlAutomataSetFlags(ctxt->am, 1);
                 ctxt->state = xmlAutomataGetInitState(ctxt->am);
                 while (list != NULL) {
                     xmlRelaxNGCompile(ctxt, list);
@@ -3076,6 +3092,11 @@ xmlRelaxNGCompile(xmlRelaxNGParserCtxtPtr ctxt, xmlRelaxNGDefinePtr def)
                 xmlAutomataSetFinalState(ctxt->am, ctxt->state);
                 def->contModel = xmlAutomataCompile(ctxt->am);
                 if (!xmlRegexpIsDeterminist(def->contModel)) {
+#ifdef DEBUG_COMPILE
+                    xmlGenericError(xmlGenericErrorContext,
+                        "Content model not determinist %s\n",
+                                    def->name);
+#endif
                     /*
                      * we can only use the automata if it is determinist
                      */
diff --git a/result/relaxng/492317_0 b/result/relaxng/492317_0
new file mode 100644
index 0000000..e69de29
diff --git a/result/relaxng/492317_0.err b/result/relaxng/492317_0.err
new file mode 100644
index 0000000..9b8db15
--- /dev/null
+++ b/result/relaxng/492317_0.err
@@ -0,0 +1 @@
+./test/relaxng/492317_0.xml validates
diff --git a/result/relaxng/492317_1 b/result/relaxng/492317_1
new file mode 100644
index 0000000..e69de29
diff --git a/result/relaxng/492317_1.err b/result/relaxng/492317_1.err
new file mode 100644
index 0000000..177ee7b
--- /dev/null
+++ b/result/relaxng/492317_1.err
@@ -0,0 +1 @@
+./test/relaxng/492317_1.xml validates
diff --git a/result/relaxng/492317_2 b/result/relaxng/492317_2
new file mode 100644
index 0000000..e69de29
diff --git a/result/relaxng/492317_2.err b/result/relaxng/492317_2.err
new file mode 100644
index 0000000..e8b22e7
--- /dev/null
+++ b/result/relaxng/492317_2.err
@@ -0,0 +1,3 @@
+./test/relaxng/492317_2.xml:2: element child: Relax-NG validity error : Element child failed to validate attributes
+./test/relaxng/492317_2.xml:1: element root: Relax-NG validity error : Element root failed to validate content
+./test/relaxng/492317_2.xml fails to validate
diff --git a/result/relaxng/492317_err b/result/relaxng/492317_err
new file mode 100644
index 0000000..1f07539
--- /dev/null
+++ b/result/relaxng/492317_err
@@ -0,0 +1 @@
+./test/relaxng/492317.rng validates
diff --git a/result/relaxng/492317_valid b/result/relaxng/492317_valid
new file mode 100644
index 0000000..e69de29
diff --git a/test/relaxng/492317.rng b/test/relaxng/492317.rng
new file mode 100644
index 0000000..343f294
--- /dev/null
+++ b/test/relaxng/492317.rng
@@ -0,0 +1,16 @@
+<element name="root" xmlns="http://relaxng.org/ns/structure/1.0";>
+  <choice>
+    <element name="child">
+      <attribute name="type">
+        <value>Foo</value>
+      </attribute>
+      <!-- Define stuff that's only valid when type is "Foo" -->
+    </element>
+    <element name="child">
+      <attribute name="type">
+        <value>Bar</value>
+      </attribute>
+      <!-- Define stuff that's only valid when type is "Bar" -->
+    </element>
+  </choice>
+</element>
diff --git a/test/relaxng/492317_0.xml b/test/relaxng/492317_0.xml
new file mode 100644
index 0000000..6ab9d80
--- /dev/null
+++ b/test/relaxng/492317_0.xml
@@ -0,0 +1,4 @@
+<root>
+  <child type="Foo">
+  </child>
+</root>
diff --git a/test/relaxng/492317_1.xml b/test/relaxng/492317_1.xml
new file mode 100644
index 0000000..d325ac2
--- /dev/null
+++ b/test/relaxng/492317_1.xml
@@ -0,0 +1,4 @@
+<root>
+  <child type="Bar">
+  </child>
+</root>
diff --git a/test/relaxng/492317_2.xml b/test/relaxng/492317_2.xml
new file mode 100644
index 0000000..33bbc5d
--- /dev/null
+++ b/test/relaxng/492317_2.xml
@@ -0,0 +1,4 @@
+<root>
+  <child type="">
+  </child>
+</root>
diff --git a/xmlregexp.c b/xmlregexp.c
index 0644d0b..ac6d8bc 100644
--- a/xmlregexp.c
+++ b/xmlregexp.c
@@ -233,6 +233,8 @@ struct _xmlAutomataState {
 typedef struct _xmlAutomata xmlRegParserCtxt;
 typedef xmlRegParserCtxt *xmlRegParserCtxtPtr;
 
+#define AM_AUTOMATA_RNG 1
+
 struct _xmlAutomata {
     xmlChar *string;
     xmlChar *cur;
@@ -260,6 +262,7 @@ struct _xmlAutomata {
 
     int determinist;
     int negs;
+    int flags;
 };
 
 struct _xmlRegexp {
@@ -271,6 +274,7 @@ struct _xmlRegexp {
     int nbCounters;
     xmlRegCounter *counters;
     int determinist;
+    int flags;
     /*
      * That's the compact form for determinists automatas
      */
@@ -353,6 +357,8 @@ static int xmlRegCheckCharacter(xmlRegAtomPtr atom, int codepoint);
 static int xmlRegCheckCharacterRange(xmlRegAtomType type, int codepoint,
                   int neg, int start, int end, const xmlChar *blockName);
 
+void xmlAutomataSetFlags(xmlAutomataPtr am, int flags);
+
 /************************************************************************
  *									*
  * 		Regexp memory error handler				*
@@ -434,6 +440,7 @@ xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) {
     ret->nbCounters = ctxt->nbCounters;
     ret->counters = ctxt->counters;
     ret->determinist = ctxt->determinist;
+    ret->flags = ctxt->flags;
     if (ret->determinist == -1) {
         xmlRegexpIsDeterminist(ret);
     }
@@ -2428,6 +2435,7 @@ xmlFACompareAtomTypes(xmlRegAtomType type1, xmlRegAtomType type2) {
  * xmlFAEqualAtoms:
  * @atom1:  an atom
  * @atom2:  an atom
+ * @deep: if not set only compare string pointers
  *
  * Compares two atoms to check whether they are the same exactly
  * this is used to remove equivalent transitions
@@ -2435,7 +2443,7 @@ xmlFACompareAtomTypes(xmlRegAtomType type1, xmlRegAtomType type2) {
  * Returns 1 if same and 0 otherwise
  */
 static int
-xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
+xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) {
     int ret = 0;
 
     if (atom1 == atom2)
@@ -2450,8 +2458,11 @@ xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
 	    ret = 0;
 	    break;
         case XML_REGEXP_STRING:
-	    ret = xmlStrEqual((xmlChar *)atom1->valuep,
-	                      (xmlChar *)atom2->valuep);
+            if (!deep)
+                ret = (atom1->valuep == atom2->valuep);
+            else
+                ret = xmlStrEqual((xmlChar *)atom1->valuep,
+                                  (xmlChar *)atom2->valuep);
 	    break;
         case XML_REGEXP_CHARVAL:
 	    ret = (atom1->codepoint == atom2->codepoint);
@@ -2469,6 +2480,7 @@ xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
  * xmlFACompareAtoms:
  * @atom1:  an atom
  * @atom2:  an atom
+ * @deep: if not set only compare string pointers
  *
  * Compares two atoms to check whether they intersect in some ways,
  * this is used by xmlFAComputesDeterminism and xmlFARecurseDeterminism only
@@ -2476,7 +2488,7 @@ xmlFAEqualAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
  * Returns 1 if yes and 0 otherwise
  */
 static int
-xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
+xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2, int deep) {
     int ret = 1;
 
     if (atom1 == atom2)
@@ -2502,8 +2514,11 @@ xmlFACompareAtoms(xmlRegAtomPtr atom1, xmlRegAtomPtr atom2) {
     }
     switch (atom1->type) {
         case XML_REGEXP_STRING:
-	    ret = xmlRegStrEqualWildcard((xmlChar *)atom1->valuep,
-	                                 (xmlChar *)atom2->valuep);
+            if (!deep)
+                ret = (atom1->valuep != atom2->valuep);
+            else
+                ret = xmlRegStrEqualWildcard((xmlChar *)atom1->valuep,
+                                             (xmlChar *)atom2->valuep);
 	    break;
         case XML_REGEXP_EPSILON:
 	    goto not_determinist;
@@ -2566,9 +2581,14 @@ xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
     int res;
     int transnr, nbTrans;
     xmlRegTransPtr t1;
+    int deep = 1;
 
     if (state == NULL)
 	return(ret);
+
+    if (ctxt->flags & AM_AUTOMATA_RNG)
+        deep = 0;
+
     /*
      * don't recurse on transitions potentially added in the course of
      * the elimination.
@@ -2592,7 +2612,7 @@ xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
 	}
 	if (t1->to != to)
 	    continue;
-	if (xmlFACompareAtoms(t1->atom, atom)) {
+	if (xmlFACompareAtoms(t1->atom, atom, deep)) {
 	    ret = 0;
 	    /* mark the transition as non-deterministic */
 	    t1->nd = 1;
@@ -2616,6 +2636,7 @@ xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
     xmlRegTransPtr t1, t2, last;
     int i;
     int ret = 1;
+    int deep = 1;
 
 #ifdef DEBUG_REGEXP_GRAPH
     printf("xmlFAComputesDeterminism\n");
@@ -2624,6 +2645,9 @@ xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
     if (ctxt->determinist != -1)
 	return(ctxt->determinist);
 
+    if (ctxt->flags & AM_AUTOMATA_RNG)
+        deep = 0;
+
     /*
      * First cleanup the automata removing cancelled transitions
      */
@@ -2651,7 +2675,11 @@ xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
 		    continue;
 		if (t2->atom != NULL) {
 		    if (t1->to == t2->to) {
-			if (xmlFAEqualAtoms(t1->atom, t2->atom) &&
+                        /*
+                         * Here we use deep because we want to keep the
+                         * transitions which indicate a conflict
+                         */
+			if (xmlFAEqualAtoms(t1->atom, t2->atom, deep) &&
                             (t1->counter == t2->counter) &&
                             (t1->count == t2->count))
 			    t2->to = -1; /* eliminated */
@@ -2688,8 +2716,11 @@ xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
 		if (t2->to == -1) /* eliminated */
 		    continue;
 		if (t2->atom != NULL) {
-		    /* not determinist ! */
-		    if (xmlFACompareAtoms(t1->atom, t2->atom)) {
+                    /*
+                     * But here we don't use deep because we want to
+                     * find transitions which indicate a conflict
+                     */
+		    if (xmlFACompareAtoms(t1->atom, t2->atom, 1)) {
 			ret = 0;
 			/* mark the transitions as non-deterministic ones */
 			t1->nd = 1;
@@ -5477,10 +5508,12 @@ xmlRegexpIsDeterminist(xmlRegexpPtr comp) {
     am->nbStates = comp->nbStates;
     am->states = comp->states;
     am->determinist = -1;
+    am->flags = comp->flags;
     ret = xmlFAComputesDeterminism(am);
     am->atoms = NULL;
     am->states = NULL;
     xmlFreeAutomata(am);
+    comp->determinist = ret;
     return(ret);
 }
 
@@ -5558,6 +5591,7 @@ xmlNewAutomata(void) {
 	xmlFreeAutomata(ctxt);
 	return(NULL);
     }
+    ctxt->flags = 0;
 
     return(ctxt);
 }
@@ -5576,6 +5610,20 @@ xmlFreeAutomata(xmlAutomataPtr am) {
 }
 
 /**
+ * xmlAutomataSetFlags
+ * @am: an automata
+ * @flags:  a set of internal flags
+ *
+ * Set some flags on the automata
+ */
+void
+xmlAutomataSetFlags(xmlAutomataPtr am, int flags) {
+    if (am == NULL)
+	return;
+    am->flags |= flags;
+}
+
+/**
  * xmlAutomataGetInitState:
  * @am: an automata
  *



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]