[libxml2] Fix exponential runtime in xmlFARecurseDeterminism



commit 68eadabd0055cba39c4ea1acfa8931d0d10a44e5
Author: Nick Wellnhofer <wellnhofer aevum de>
Date:   Sat Jul 11 21:32:10 2020 +0200

    Fix exponential runtime in xmlFARecurseDeterminism
    
    In order to prevent visiting a state twice, states must be marked as
    visited for the whole duration of graph traversal because states might
    be reached by different paths. Otherwise state graphs like the
    following can lead to exponential runtime:
    
      ->O-->O-->O-->O-->O->
         \ / \ / \ / \ /
          O   O   O   O
    
    Reset the "visited" flag only after the graph was traversed.
    
    xmlFAComputesDeterminism still has massive performance problems when
    handling fuzzed input. By design, it has quadratic time complexity in
    the number of reachable states. Some issues might also stem from
    redundant epsilon transitions. With this fix, fuzzing regexes with a
    maximum length of 100 becomes feasible at least.
    
    Found with libFuzzer.

 xmlregexp.c | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)
---
diff --git a/xmlregexp.c b/xmlregexp.c
index dbf3bf2c2..f971f0c82 100644
--- a/xmlregexp.c
+++ b/xmlregexp.c
@@ -2658,7 +2658,6 @@ xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
            state->markd = XML_REGEXP_MARK_VISITED;
            res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
                                           to, atom);
-           state->markd = 0;
            if (res == 0) {
                ret = 0;
                /* t1->nd = 1; */
@@ -2676,6 +2675,30 @@ xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
     return(ret);
 }
 
+/**
+ * xmlFAFinishRecurseDeterminism:
+ * @ctxt:  a regexp parser context
+ *
+ * Reset flags after checking determinism.
+ */
+static void
+xmlFAFinishRecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) {
+    int transnr, nbTrans;
+
+    if (state == NULL)
+       return;
+    if (state->markd != XML_REGEXP_MARK_VISITED)
+       return;
+    state->markd = 0;
+
+    nbTrans = state->nbTrans;
+    for (transnr = 0; transnr < nbTrans; transnr++) {
+       xmlRegTransPtr t1 = &state->trans[transnr];
+       if ((t1->atom == NULL) && (t1->to >= 0))
+           xmlFAFinishRecurseDeterminism(ctxt, ctxt->states[t1->to]);
+    }
+}
+
 /**
  * xmlFAComputesDeterminism:
  * @ctxt:  a regexp parser context
@@ -2789,6 +2812,7 @@ xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
                     */
                    ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
                                                   t2->to, t2->atom);
+                    xmlFAFinishRecurseDeterminism(ctxt, ctxt->states[t1->to]);
                    /* don't shortcut the computation so all non deterministic
                       transition get marked down
                    if (ret == 0)


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]