[libxml2] Merge duplicate code paths handling PE references



commit 03904159f8ada9f18ca110de7f0fc24b994b315d
Author: Nick Wellnhofer <wellnhofer aevum de>
Date:   Mon Jun 5 21:16:00 2017 +0200

    Merge duplicate code paths handling PE references
    
    xmlParsePEReference is essentially a subset of
    xmlParserHandlePEReference, so make xmlParserHandlePEReference call
    xmlParsePEReference. The code paths in these functions differed
    slighty, but the code from xmlParserHandlePEReference seems more solid
    and tested.

 parser.c |  189 +++++++++++++------------------------------------------------
 1 files changed, 41 insertions(+), 148 deletions(-)
---
diff --git a/parser.c b/parser.c
index 7ef6028..0e51223 100644
--- a/parser.c
+++ b/parser.c
@@ -2533,11 +2533,6 @@ xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
  */
 void
 xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
-    const xmlChar *name;
-    xmlEntityPtr entity = NULL;
-    xmlParserInputPtr input;
-
-    if (RAW != '%') return;
     switch(ctxt->instate) {
        case XML_PARSER_CDATA_SECTION:
            return;
@@ -2592,128 +2587,7 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
             return;
     }
 
-    NEXT;
-    name = xmlParseName(ctxt);
-    if (xmlParserDebugEntities)
-       xmlGenericError(xmlGenericErrorContext,
-               "PEReference: %s\n", name);
-    if (name == NULL) {
-       xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
-    } else {
-       if (RAW == ';') {
-           NEXT;
-           if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
-               entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
-           if (ctxt->instate == XML_PARSER_EOF)
-               return;
-           if (entity == NULL) {
-
-               /*
-                * [ WFC: Entity Declared ]
-                * In a document without any DTD, a document with only an
-                * internal DTD subset which contains no parameter entity
-                * references, or a document with "standalone='yes'", ...
-                * ... The declaration of a parameter entity must precede
-                * any reference to it...
-                */
-               if ((ctxt->standalone == 1) ||
-                   ((ctxt->hasExternalSubset == 0) &&
-                    (ctxt->hasPErefs == 0))) {
-                   xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
-                        "PEReference: %%%s; not found\n", name);
-               } else {
-                   /*
-                    * [ VC: Entity Declared ]
-                    * In a document with an external subset or external
-                    * parameter entities with "standalone='no'", ...
-                    * ... The declaration of a parameter entity must precede
-                    * any reference to it...
-                    */
-                   if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
-                       xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
-                                        "PEReference: %%%s; not found\n",
-                                        name, NULL);
-                   } else
-                       xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
-                                     "PEReference: %%%s; not found\n",
-                                     name, NULL);
-                   ctxt->valid = 0;
-               }
-               xmlParserEntityCheck(ctxt, 0, NULL, 0);
-           } else if (ctxt->input->free != deallocblankswrapper) {
-                   input = xmlNewBlanksWrapperInputStream(ctxt, entity);
-                   if (xmlPushInput(ctxt, input) < 0)
-                       return;
-           } else {
-               if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
-                   (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
-                   xmlChar start[4];
-                   xmlCharEncoding enc;
-
-                   /*
-                    * Note: external parameter entities will not be loaded, it
-                    * is not required for a non-validating parser, unless the
-                    * option of validating, or substituting entities were
-                    * given. Doing so is far more secure as the parser will
-                    * only process data coming from the document entity by
-                    * default.
-                    */
-                    if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
-                       ((ctxt->options & XML_PARSE_NOENT) == 0) &&
-                       ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
-                       ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
-                       ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
-                       (ctxt->replaceEntities == 0) &&
-                       (ctxt->validate == 0))
-                       return;
-
-                   /*
-                    * handle the extra spaces added before and after
-                    * c.f. http://www.w3.org/TR/REC-xml#as-PE
-                    * this is done independently.
-                    */
-                   input = xmlNewEntityInputStream(ctxt, entity);
-                   if (xmlPushInput(ctxt, input) < 0)
-                       return;
-
-                   /*
-                    * Get the 4 first bytes and decode the charset
-                    * if enc != XML_CHAR_ENCODING_NONE
-                    * plug some encoding conversion routines.
-                    * Note that, since we may have some non-UTF8
-                    * encoding (like UTF16, bug 135229), the 'length'
-                    * is not known, but we can calculate based upon
-                    * the amount of data in the buffer.
-                    */
-                   GROW
-                    if (ctxt->instate == XML_PARSER_EOF)
-                        return;
-                   if ((ctxt->input->end - ctxt->input->cur)>=4) {
-                       start[0] = RAW;
-                       start[1] = NXT(1);
-                       start[2] = NXT(2);
-                       start[3] = NXT(3);
-                       enc = xmlDetectCharEncoding(start, 4);
-                       if (enc != XML_CHAR_ENCODING_NONE) {
-                           xmlSwitchEncoding(ctxt, enc);
-                       }
-                   }
-
-                   if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
-                       (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
-                       (IS_BLANK_CH(NXT(5)))) {
-                       xmlParseTextDecl(ctxt);
-                   }
-               } else {
-                   xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
-                            "PEReference: %s is not a parameter entity\n",
-                                     name);
-               }
-           }
-       } else {
-           xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
-       }
-    }
+    xmlParsePEReference(ctxt);
 }
 
 /*
@@ -8057,12 +7931,14 @@ xmlParsePEReference(xmlParserCtxtPtr ctxt)
     NEXT;
     name = xmlParseName(ctxt);
     if (name == NULL) {
-       xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
-                      "xmlParsePEReference: no name\n");
+       xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
        return;
     }
+    if (xmlParserDebugEntities)
+       xmlGenericError(xmlGenericErrorContext,
+               "PEReference: %s\n", name);
     if (RAW != ';') {
-       xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
+       xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
         return;
     }
 
@@ -8104,10 +7980,15 @@ xmlParsePEReference(xmlParserCtxtPtr ctxt)
             * ... The declaration of a parameter entity must
             * precede any reference to it...
             */
-           xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
-                         "PEReference: %%%s; not found\n",
-                         name, NULL);
-           ctxt->valid = 0;
+            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
+                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
+                                 "PEReference: %%%s; not found\n",
+                                 name, NULL);
+            } else
+                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
+                              "PEReference: %%%s; not found\n",
+                              name, NULL);
+            ctxt->valid = 0;
        }
        xmlParserEntityCheck(ctxt, 0, NULL, 0);
     } else {
@@ -8124,6 +8005,9 @@ xmlParsePEReference(xmlParserCtxtPtr ctxt)
            if (xmlPushInput(ctxt, input) < 0)
                return;
        } else {
+            xmlChar start[4];
+            xmlCharEncoding enc;
+
            if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
                ((ctxt->options & XML_PARSE_NOENT) == 0) &&
                ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
@@ -8133,27 +8017,36 @@ xmlParsePEReference(xmlParserCtxtPtr ctxt)
                (ctxt->validate == 0))
                return;
 
-           /*
-            * TODO !!!
-            * handle the extra spaces added before and after
-            * c.f. http://www.w3.org/TR/REC-xml#as-PE
-            */
            input = xmlNewEntityInputStream(ctxt, entity);
            if (xmlPushInput(ctxt, input) < 0)
                return;
+            /*
+             * Get the 4 first bytes and decode the charset
+             * if enc != XML_CHAR_ENCODING_NONE
+             * plug some encoding conversion routines.
+             * Note that, since we may have some non-UTF8
+             * encoding (like UTF16, bug 135229), the 'length'
+             * is not known, but we can calculate based upon
+             * the amount of data in the buffer.
+             */
+           GROW
+            if (ctxt->instate == XML_PARSER_EOF)
+                return;
+            if ((ctxt->input->end - ctxt->input->cur)>=4) {
+                start[0] = RAW;
+                start[1] = NXT(1);
+                start[2] = NXT(2);
+                start[3] = NXT(3);
+                enc = xmlDetectCharEncoding(start, 4);
+                if (enc != XML_CHAR_ENCODING_NONE) {
+                    xmlSwitchEncoding(ctxt, enc);
+                }
+            }
+
            if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
                (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
                (IS_BLANK_CH(NXT(5)))) {
                xmlParseTextDecl(ctxt);
-               if (ctxt->errNo ==
-                   XML_ERR_UNSUPPORTED_ENCODING) {
-                   /*
-                    * The XML REC instructs us to stop parsing
-                    * right here
-                    */
-                   xmlHaltParser(ctxt);
-                   return;
-               }
            }
        }
     }


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]