Re: [xml] ctxt->token patches



Hi Daniel,

  Hum, can you tellwhere this occured ? 

It seems benefical to test for ctxt->token in front
of a large list of other "else if" conditionals.

I've attached a step-2 patch which removes all remaining 
ctxt->token uses in parser.c and parserInternals.c (except
the initializing to 0). htmlparser.c and docbookparser.c will
be treated in the next patch.

This second patch eats about 60% of the performance gains
of the first, but I fully agree that's nonsense to leave voodoo
statements in the code.

To proceed further in performance enhancement I need to
do better profiling and perhaps look for more intrusive
changes in the parser.

Regards,
Peter Jacobi



*** after-step1\parser.c        Fri Jun 28 12:48:06 2002
--- parser.c    Fri Jun 28 16:29:41 2002
***************
*** 316,322 ****
      if (*(ctxt->input->cur) == '\n') {                                        \
        ctxt->input->line++; ctxt->input->col = 1;                      \
      } else ctxt->input->col++;                                                \
!     ctxt->token = 0; ctxt->input->cur += l;                           \
      if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);   \
    } while (0)
  
--- 316,322 ----
      if (*(ctxt->input->cur) == '\n') {                                        \
        ctxt->input->line++; ctxt->input->col = 1;                      \
      } else ctxt->input->col++;                                                \
!     ctxt->input->cur += l;                            \
      if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);   \
    } while (0)
  
***************
*** 341,352 ****
  xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
      int res = 0;
  
-     if (ctxt->token != 0) {
-       if (!IS_BLANK(ctxt->token))
-           return(0);
-       ctxt->token = 0;
-       res++;
-     }
      /*
       * It's Okay to use CUR/NEXT here since all the blanks are on
       * the ASCII range.
--- 341,346 ----
***************
*** 465,475 ****
      unsigned int val = 0;
      int count = 0;
  
-     if (ctxt->token != 0) {
-       val = ctxt->token;
-         ctxt->token = 0;
-         return(val);
-     }
      /*
       * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
       */
--- 459,464 ----
***************
*** 754,762 ****
      xmlEntityPtr entity = NULL;
      xmlParserInputPtr input;
  
-     if (ctxt->token != 0) {
-         return;
-     } 
      if (RAW != '%') return;
      switch(ctxt->instate) {
        case XML_PARSER_CDATA_SECTION:
--- 743,748 ----
***************
*** 2363,2370 ****
       * OK loop until we reach one of the ending char or a size limit.
       */
      c = CUR_CHAR(l);
!     while (((NXT(0) != limit) && /* checked */
!          (c != '<')) || (ctxt->token != 0)) {
        if (c == 0) break;
        if (c == '&') {
            if (NXT(1) == '#') {
--- 2349,2356 ----
       * OK loop until we reach one of the ending char or a size limit.
       */
      c = CUR_CHAR(l);
!     while ((NXT(0) != limit) && /* checked */
!          (c != '<')) {
        if (c == 0) break;
        if (c == '&') {
            if (NXT(1) == '#') {
***************
*** 2685,2691 ****
       * Accelerated common case where input don't need to be
       * modified before passing it to the handler.
       */
!     if ((ctxt->token == 0) && (!cdata)) {
        in = ctxt->input->cur;
        do {
  get_more:
--- 2671,2677 ----
       * Accelerated common case where input don't need to be
       * modified before passing it to the handler.
       */
!     if (!cdata) {
        in = ctxt->input->cur;
        do {
  get_more:
***************
*** 2777,2784 ****
      SHRINK;
      GROW;
      cur = CUR_CHAR(l);
!     while (((cur != '<') || (ctxt->token == '<')) && /* checked */
!       ((cur != '&') || (ctxt->token == '&')) && 
        (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
        if ((cur == ']') && (NXT(1) == ']') &&
            (NXT(2) == '>')) {
--- 2763,2770 ----
      SHRINK;
      GROW;
      cur = CUR_CHAR(l);
!     while ((cur != '<') && /* checked */
!       (cur != '&') && 
        (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
        if ((cur == ']') && (NXT(1) == ']') &&
            (NXT(2) == '>')) {
***************
*** 4938,4944 ****
               (NXT(2) != '>'))) {
            const xmlChar *check = CUR_PTR;
            int cons = ctxt->input->consumed;
-           int tok = ctxt->token;
  
            if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
                xmlParseConditionalSections(ctxt);
--- 4924,4929 ----
***************
*** 4955,4962 ****
            while ((RAW == 0) && (ctxt->inputNr > 1))
                xmlPopInput(ctxt);
  
!           if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
!               (tok == ctxt->token)) {
                ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
                if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                    ctxt->sax->error(ctxt->userData,
--- 4940,4946 ----
            while ((RAW == 0) && (ctxt->inputNr > 1))
                xmlPopInput(ctxt);
  
!           if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
                ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
                if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                    ctxt->sax->error(ctxt->userData,
***************
*** 5248,5254 ****
           (RAW == '%') || IS_BLANK(CUR)) {
        const xmlChar *check = CUR_PTR;
        int cons = ctxt->input->consumed;
-       int tok = ctxt->token;
  
        GROW;
          if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
--- 5232,5237 ----
***************
*** 5266,5273 ****
        while ((RAW == 0) && (ctxt->inputNr > 1))
            xmlPopInput(ctxt);
  
!       if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
!           (tok == ctxt->token)) {
            ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                ctxt->sax->error(ctxt->userData,
--- 5249,5255 ----
        while ((RAW == 0) && (ctxt->inputNr > 1))
            xmlPopInput(ctxt);
  
!       if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
            ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                ctxt->sax->error(ctxt->userData,
***************
*** 6869,6883 ****
        const xmlChar *cur = ctxt->input->cur;
  
        /*
-        * Handle  possible processed charrefs.
-        */
-       if (ctxt->token != 0) {
-           xmlParseCharData(ctxt, 0);
-       }
-       /*
         * First case : a Processing Instruction.
         */
!       else if ((*cur == '<') && (cur[1] == '?')) {
            xmlParsePI(ctxt);
        }
  
--- 6851,6859 ----
        const xmlChar *cur = ctxt->input->cur;
  
        /*
         * First case : a Processing Instruction.
         */
!       if ((*cur == '<') && (cur[1] == '?')) {
            xmlParsePI(ctxt);
        }
  
***************
*** 8549,8568 ****
              case XML_PARSER_CONTENT: {
                const xmlChar *test;
                int cons;
-               int tok;
- 
-                 /*
-                * Handle preparsed entities and charRef
-                */
-               if (ctxt->token != 0) {
-                   xmlChar current[2] = { 0 , 0 } ;
- 
-                   current[0] = (xmlChar) ctxt->token;
-                   if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
-                       (ctxt->sax->characters != NULL))
-                       ctxt->sax->characters(ctxt->userData, current, 1);
-                   ctxt->token = 0;
-               }
                if ((avail < 2) && (ctxt->inputNr == 1))
                    goto done;
                cur = ctxt->input->cur[0];
--- 8525,8530 ----
***************
*** 8570,8576 ****
  
                test = CUR_PTR;
                cons = ctxt->input->consumed;
-               tok = ctxt->token;
                if ((cur == '<') && (next == '?')) {
                    if ((!terminate) &&
                        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
--- 8532,8537 ----
***************
*** 8660,8667 ****
                 */
                while ((RAW == 0) && (ctxt->inputNr > 1))
                    xmlPopInput(ctxt);
!               if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
!                   (tok == ctxt->token)) {
                    ctxt->errNo = XML_ERR_INTERNAL_ERROR;
                    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                        ctxt->sax->error(ctxt->userData,
--- 8621,8627 ----
                 */
                while ((RAW == 0) && (ctxt->inputNr > 1))
                    xmlPopInput(ctxt);
!               if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
                    ctxt->errNo = XML_ERR_INTERNAL_ERROR;
                    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                        ctxt->sax->error(ctxt->userData,
*** after-step1\parserInternals.c       Fri Jun 28 10:23:45 2002
--- parserInternals.c   Fri Jun 28 16:29:41 2002
***************
*** 1110,1117 ****
       *   literal #xD, an XML processor must pass to the application
       *   the single character #xA. 
       */
!     if (ctxt->token != 0) ctxt->token = 0;
!     else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
        if ((*ctxt->input->cur == 0) &&
            (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
            (ctxt->instate != XML_PARSER_COMMENT)) {
--- 1110,1116 ----
       *   literal #xD, an XML processor must pass to the application
       *   the single character #xA. 
       */
!     if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
        if ((*ctxt->input->cur == 0) &&
            (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
            (ctxt->instate != XML_PARSER_COMMENT)) {
***************
*** 2781,2791 ****
             (c != end2) && (c != end3)) {
        GROW;
        if (c == 0) break;
!         if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
            int val = xmlParseCharRef(ctxt);
            COPY_BUF(0,buffer,nbchars,val);
            NEXTL(l);
!       } else if ((c == '&') && (ctxt->token != '&') &&
                   (what & XML_SUBSTITUTE_REF)) {
            if (xmlParserDebugEntities)
                xmlGenericError(xmlGenericErrorContext,
--- 2780,2790 ----
             (c != end2) && (c != end3)) {
        GROW;
        if (c == 0) break;
!         if ((c == '&') && (NXT(1) == '#')) {
            int val = xmlParseCharRef(ctxt);
            COPY_BUF(0,buffer,nbchars,val);
            NEXTL(l);
!       } else if (c == '&') &&
                   (what & XML_SUBSTITUTE_REF)) {
            if (xmlParserDebugEntities)
                xmlGenericError(xmlGenericErrorContext,
***************
*** 3317,3545 ****
        deprecated = 1;
      }
  
- #if 0
-     xmlParserInputPtr input;
-     xmlChar *name;
-     xmlEntityPtr ent = NULL;
- 
-     if (ctxt->token != 0) {
-         return;
-     } 
-     if (RAW != '&') return;
-     GROW;
-     if ((RAW == '&') && (NXT(1) == '#')) {
-       switch(ctxt->instate) {
-           case XML_PARSER_ENTITY_DECL:
-           case XML_PARSER_PI:
-           case XML_PARSER_CDATA_SECTION:
-           case XML_PARSER_COMMENT:
-           case XML_PARSER_SYSTEM_LITERAL:
-               /* we just ignore it there */
-               return;
-           case XML_PARSER_START_TAG:
-               return;
-           case XML_PARSER_END_TAG:
-               return;
-           case XML_PARSER_EOF:
-               ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
-               if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-                   ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
-               ctxt->wellFormed = 0;
-               ctxt->disableSAX = 1;
-               return;
-           case XML_PARSER_PROLOG:
-           case XML_PARSER_START:
-           case XML_PARSER_MISC:
-               ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
-               if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-                   ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
-               ctxt->wellFormed = 0;
-               ctxt->disableSAX = 1;
-               return;
-           case XML_PARSER_EPILOG:
-               ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
-               if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-                   ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
-               ctxt->wellFormed = 0;
-               ctxt->disableSAX = 1;
-               return;
-           case XML_PARSER_DTD:
-               ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
-               if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-                   ctxt->sax->error(ctxt->userData, 
-                          "CharRef are forbidden in DTDs!\n");
-               ctxt->wellFormed = 0;
-               ctxt->disableSAX = 1;
-               return;
-           case XML_PARSER_ENTITY_VALUE:
-               /*
-                * NOTE: in the case of entity values, we don't do the
-                *       substitution here since we need the literal
-                *       entity value to be able to save the internal
-                *       subset of the document.
-                *       This will be handled by xmlStringDecodeEntities
-                */
-               return;
-           case XML_PARSER_CONTENT:
-               return;
-           case XML_PARSER_ATTRIBUTE_VALUE:
-               /* ctxt->token = xmlParseCharRef(ctxt); */
-               return;
-             case XML_PARSER_IGNORE:
-               return;
-       }
-       return;
-     }
- 
-     switch(ctxt->instate) {
-       case XML_PARSER_CDATA_SECTION:
-           return;
-       case XML_PARSER_PI:
-         case XML_PARSER_COMMENT:
-       case XML_PARSER_SYSTEM_LITERAL:
-         case XML_PARSER_CONTENT:
-           return;
-       case XML_PARSER_START_TAG:
-           return;
-       case XML_PARSER_END_TAG:
-           return;
-         case XML_PARSER_EOF:
-           ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
-           if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-               ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
-           ctxt->wellFormed = 0;
-           ctxt->disableSAX = 1;
-           return;
-         case XML_PARSER_PROLOG:
-       case XML_PARSER_START:
-       case XML_PARSER_MISC:
-           ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
-           if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-               ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
-           ctxt->wellFormed = 0;
-           ctxt->disableSAX = 1;
-           return;
-         case XML_PARSER_EPILOG:
-           ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
-           if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-               ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
-           ctxt->wellFormed = 0;
-           ctxt->disableSAX = 1;
-           return;
-       case XML_PARSER_ENTITY_VALUE:
-           /*
-            * NOTE: in the case of entity values, we don't do the
-            *       substitution here since we need the literal
-            *       entity value to be able to save the internal
-            *       subset of the document.
-            *       This will be handled by xmlStringDecodeEntities
-            */
-           return;
-         case XML_PARSER_ATTRIBUTE_VALUE:
-           /*
-            * NOTE: in the case of attributes values, we don't do the
-            *       substitution here unless we are in a mode where
-            *       the parser is explicitly asked to substitute
-            *       entities. The SAX callback is called with values
-            *       without entity substitution.
-            *       This will then be handled by xmlStringDecodeEntities
-            */
-           return;
-       case XML_PARSER_ENTITY_DECL:
-           /*
-            * we just ignore it there
-            * the substitution will be done once the entity is referenced
-            */
-           return;
-         case XML_PARSER_DTD:
-           ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
-           if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-               ctxt->sax->error(ctxt->userData, 
-                      "Entity references are forbidden in DTDs!\n");
-           ctxt->wellFormed = 0;
-           ctxt->disableSAX = 1;
-           return;
-         case XML_PARSER_IGNORE:
-           return;
-     }
- 
- /* TODO: this seems not reached anymore .... Verify ... */
- xmlGenericError(xmlGenericErrorContext,
-       "Reached deprecated section in xmlParserHandleReference()\n");
- xmlGenericError(xmlGenericErrorContext,
-       "Please forward the document to daniel veillard com\n");
- xmlGenericError(xmlGenericErrorContext,
-       "indicating the version: %s, thanks !\n", xmlParserVersion);
-     NEXT;
-     name = xmlScanName(ctxt);
-     if (name == NULL) {
-       ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
-       if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-           ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
-       ctxt->wellFormed = 0;
-       ctxt->disableSAX = 1;
-       ctxt->token = '&';
-       return;
-     }
-     if (NXT(xmlStrlen(name)) != ';') {
-       ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
-       if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-           ctxt->sax->error(ctxt->userData, 
-                            "Entity reference: ';' expected\n");
-       ctxt->wellFormed = 0;
-       ctxt->disableSAX = 1;
-       ctxt->token = '&';
-       xmlFree(name);
-       return;
-     }
-     SKIP(xmlStrlen(name) + 1);
-     if (ctxt->sax != NULL) {
-       if (ctxt->sax->getEntity != NULL)
-           ent = ctxt->sax->getEntity(ctxt->userData, name);
-     }
- 
-     /*
-      * [ WFC: Entity Declared ]
-      * the Name given in the entity reference must match that in an entity
-      * declaration, except that well-formed documents need not declare any
-      * of the following entities: amp, lt, gt, apos, quot. 
-      */
-     if (ent == NULL)
-       ent = xmlGetPredefinedEntity(name);
-     if (ent == NULL) {
-         ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
-       if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-           ctxt->sax->error(ctxt->userData, 
-                            "Entity reference: entity %s not declared\n",
-                            name);
-       ctxt->wellFormed = 0;
-       ctxt->disableSAX = 1;
-       xmlFree(name);
-       return;
-     }
- 
-     /*
-      * [ WFC: Parsed Entity ]
-      * An entity reference must not contain the name of an unparsed entity
-      */
-     if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
-         ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
-       if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-           ctxt->sax->error(ctxt->userData, 
-                        "Entity reference to unparsed entity %s\n", name);
-       ctxt->wellFormed = 0;
-       ctxt->disableSAX = 1;
-     }
- 
-     if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
-         ctxt->token = ent->content[0];
-       xmlFree(name);
-       return;
-     }
-     input = xmlNewEntityInputStream(ctxt, ent);
-     xmlPushInput(ctxt, input);
-     xmlFree(name);
- #endif
      return;
  }
  
--- 3316,3321 ----

Attachment: ctxt-token-patch-2.zip
Description: Zip archive



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]