Hi Daniel,
Hum, can you tellwhere this occured ?
It seems benefical to test for ctxt->token in front of a large list of other "else if" conditionals. I've attached a step-2 patch which removes all remaining ctxt->token uses in parser.c and parserInternals.c (except the initializing to 0). htmlparser.c and docbookparser.c will be treated in the next patch. This second patch eats about 60% of the performance gains of the first, but I fully agree that's nonsense to leave voodoo statements in the code. To proceed further in performance enhancement I need to do better profiling and perhaps look for more intrusive changes in the parser. Regards, Peter Jacobi
*** after-step1\parser.c Fri Jun 28 12:48:06 2002 --- parser.c Fri Jun 28 16:29:41 2002 *************** *** 316,322 **** if (*(ctxt->input->cur) == '\n') { \ ctxt->input->line++; ctxt->input->col = 1; \ } else ctxt->input->col++; \ ! ctxt->token = 0; ctxt->input->cur += l; \ if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ } while (0) --- 316,322 ---- if (*(ctxt->input->cur) == '\n') { \ ctxt->input->line++; ctxt->input->col = 1; \ } else ctxt->input->col++; \ ! ctxt->input->cur += l; \ if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \ } while (0) *************** *** 341,352 **** xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { int res = 0; - if (ctxt->token != 0) { - if (!IS_BLANK(ctxt->token)) - return(0); - ctxt->token = 0; - res++; - } /* * It's Okay to use CUR/NEXT here since all the blanks are on * the ASCII range. --- 341,346 ---- *************** *** 465,475 **** unsigned int val = 0; int count = 0; - if (ctxt->token != 0) { - val = ctxt->token; - ctxt->token = 0; - return(val); - } /* * Using RAW/CUR/NEXT is okay since we are working on ASCII range here */ --- 459,464 ---- *************** *** 754,762 **** xmlEntityPtr entity = NULL; xmlParserInputPtr input; - if (ctxt->token != 0) { - return; - } if (RAW != '%') return; switch(ctxt->instate) { case XML_PARSER_CDATA_SECTION: --- 743,748 ---- *************** *** 2363,2370 **** * OK loop until we reach one of the ending char or a size limit. */ c = CUR_CHAR(l); ! while (((NXT(0) != limit) && /* checked */ ! (c != '<')) || (ctxt->token != 0)) { if (c == 0) break; if (c == '&') { if (NXT(1) == '#') { --- 2349,2356 ---- * OK loop until we reach one of the ending char or a size limit. */ c = CUR_CHAR(l); ! while ((NXT(0) != limit) && /* checked */ ! (c != '<')) { if (c == 0) break; if (c == '&') { if (NXT(1) == '#') { *************** *** 2685,2691 **** * Accelerated common case where input don't need to be * modified before passing it to the handler. */ ! if ((ctxt->token == 0) && (!cdata)) { in = ctxt->input->cur; do { get_more: --- 2671,2677 ---- * Accelerated common case where input don't need to be * modified before passing it to the handler. */ ! if (!cdata) { in = ctxt->input->cur; do { get_more: *************** *** 2777,2784 **** SHRINK; GROW; cur = CUR_CHAR(l); ! while (((cur != '<') || (ctxt->token == '<')) && /* checked */ ! ((cur != '&') || (ctxt->token == '&')) && (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { --- 2763,2770 ---- SHRINK; GROW; cur = CUR_CHAR(l); ! while ((cur != '<') && /* checked */ ! (cur != '&') && (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ { if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) { *************** *** 4938,4944 **** (NXT(2) != '>'))) { const xmlChar *check = CUR_PTR; int cons = ctxt->input->consumed; - int tok = ctxt->token; if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { xmlParseConditionalSections(ctxt); --- 4924,4929 ---- *************** *** 4955,4962 **** while ((RAW == 0) && (ctxt->inputNr > 1)) xmlPopInput(ctxt); ! if ((CUR_PTR == check) && (cons == ctxt->input->consumed) && ! (tok == ctxt->token)) { ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, --- 4940,4946 ---- while ((RAW == 0) && (ctxt->inputNr > 1)) xmlPopInput(ctxt); ! if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, *************** *** 5248,5254 **** (RAW == '%') || IS_BLANK(CUR)) { const xmlChar *check = CUR_PTR; int cons = ctxt->input->consumed; - int tok = ctxt->token; GROW; if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { --- 5232,5237 ---- *************** *** 5266,5273 **** while ((RAW == 0) && (ctxt->inputNr > 1)) xmlPopInput(ctxt); ! if ((CUR_PTR == check) && (cons == ctxt->input->consumed) && ! (tok == ctxt->token)) { ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, --- 5249,5255 ---- while ((RAW == 0) && (ctxt->inputNr > 1)) xmlPopInput(ctxt); ! if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) { ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, *************** *** 6869,6883 **** const xmlChar *cur = ctxt->input->cur; /* - * Handle possible processed charrefs. - */ - if (ctxt->token != 0) { - xmlParseCharData(ctxt, 0); - } - /* * First case : a Processing Instruction. */ ! else if ((*cur == '<') && (cur[1] == '?')) { xmlParsePI(ctxt); } --- 6851,6859 ---- const xmlChar *cur = ctxt->input->cur; /* * First case : a Processing Instruction. */ ! if ((*cur == '<') && (cur[1] == '?')) { xmlParsePI(ctxt); } *************** *** 8549,8568 **** case XML_PARSER_CONTENT: { const xmlChar *test; int cons; - int tok; - - /* - * Handle preparsed entities and charRef - */ - if (ctxt->token != 0) { - xmlChar current[2] = { 0 , 0 } ; - - current[0] = (xmlChar) ctxt->token; - if ((ctxt->sax != NULL) && (!ctxt->disableSAX) && - (ctxt->sax->characters != NULL)) - ctxt->sax->characters(ctxt->userData, current, 1); - ctxt->token = 0; - } if ((avail < 2) && (ctxt->inputNr == 1)) goto done; cur = ctxt->input->cur[0]; --- 8525,8530 ---- *************** *** 8570,8576 **** test = CUR_PTR; cons = ctxt->input->consumed; - tok = ctxt->token; if ((cur == '<') && (next == '?')) { if ((!terminate) && (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) --- 8532,8537 ---- *************** *** 8660,8667 **** */ while ((RAW == 0) && (ctxt->inputNr > 1)) xmlPopInput(ctxt); ! if ((cons == ctxt->input->consumed) && (test == CUR_PTR) && ! (tok == ctxt->token)) { ctxt->errNo = XML_ERR_INTERNAL_ERROR; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, --- 8621,8627 ---- */ while ((RAW == 0) && (ctxt->inputNr > 1)) xmlPopInput(ctxt); ! if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) { ctxt->errNo = XML_ERR_INTERNAL_ERROR; if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData,
*** after-step1\parserInternals.c Fri Jun 28 10:23:45 2002 --- parserInternals.c Fri Jun 28 16:29:41 2002 *************** *** 1110,1117 **** * literal #xD, an XML processor must pass to the application * the single character #xA. */ ! if (ctxt->token != 0) ctxt->token = 0; ! else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { if ((*ctxt->input->cur == 0) && (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) && (ctxt->instate != XML_PARSER_COMMENT)) { --- 1110,1116 ---- * literal #xD, an XML processor must pass to the application * the single character #xA. */ ! if (ctxt->charset == XML_CHAR_ENCODING_UTF8) { if ((*ctxt->input->cur == 0) && (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) && (ctxt->instate != XML_PARSER_COMMENT)) { *************** *** 2781,2791 **** (c != end2) && (c != end3)) { GROW; if (c == 0) break; ! if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) { int val = xmlParseCharRef(ctxt); COPY_BUF(0,buffer,nbchars,val); NEXTL(l); ! } else if ((c == '&') && (ctxt->token != '&') && (what & XML_SUBSTITUTE_REF)) { if (xmlParserDebugEntities) xmlGenericError(xmlGenericErrorContext, --- 2780,2790 ---- (c != end2) && (c != end3)) { GROW; if (c == 0) break; ! if ((c == '&') && (NXT(1) == '#')) { int val = xmlParseCharRef(ctxt); COPY_BUF(0,buffer,nbchars,val); NEXTL(l); ! } else if (c == '&') && (what & XML_SUBSTITUTE_REF)) { if (xmlParserDebugEntities) xmlGenericError(xmlGenericErrorContext, *************** *** 3317,3545 **** deprecated = 1; } - #if 0 - xmlParserInputPtr input; - xmlChar *name; - xmlEntityPtr ent = NULL; - - if (ctxt->token != 0) { - return; - } - if (RAW != '&') return; - GROW; - if ((RAW == '&') && (NXT(1) == '#')) { - switch(ctxt->instate) { - case XML_PARSER_ENTITY_DECL: - case XML_PARSER_PI: - case XML_PARSER_CDATA_SECTION: - case XML_PARSER_COMMENT: - case XML_PARSER_SYSTEM_LITERAL: - /* we just ignore it there */ - return; - case XML_PARSER_START_TAG: - return; - case XML_PARSER_END_TAG: - return; - case XML_PARSER_EOF: - ctxt->errNo = XML_ERR_CHARREF_AT_EOF; - if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, "CharRef at EOF\n"); - ctxt->wellFormed = 0; - ctxt->disableSAX = 1; - return; - case XML_PARSER_PROLOG: - case XML_PARSER_START: - case XML_PARSER_MISC: - ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG; - if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n"); - ctxt->wellFormed = 0; - ctxt->disableSAX = 1; - return; - case XML_PARSER_EPILOG: - ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG; - if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n"); - ctxt->wellFormed = 0; - ctxt->disableSAX = 1; - return; - case XML_PARSER_DTD: - ctxt->errNo = XML_ERR_CHARREF_IN_DTD; - if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, - "CharRef are forbidden in DTDs!\n"); - ctxt->wellFormed = 0; - ctxt->disableSAX = 1; - return; - case XML_PARSER_ENTITY_VALUE: - /* - * NOTE: in the case of entity values, we don't do the - * substitution here since we need the literal - * entity value to be able to save the internal - * subset of the document. - * This will be handled by xmlStringDecodeEntities - */ - return; - case XML_PARSER_CONTENT: - return; - case XML_PARSER_ATTRIBUTE_VALUE: - /* ctxt->token = xmlParseCharRef(ctxt); */ - return; - case XML_PARSER_IGNORE: - return; - } - return; - } - - switch(ctxt->instate) { - case XML_PARSER_CDATA_SECTION: - return; - case XML_PARSER_PI: - case XML_PARSER_COMMENT: - case XML_PARSER_SYSTEM_LITERAL: - case XML_PARSER_CONTENT: - return; - case XML_PARSER_START_TAG: - return; - case XML_PARSER_END_TAG: - return; - case XML_PARSER_EOF: - ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF; - if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, "Reference at EOF\n"); - ctxt->wellFormed = 0; - ctxt->disableSAX = 1; - return; - case XML_PARSER_PROLOG: - case XML_PARSER_START: - case XML_PARSER_MISC: - ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG; - if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, "Reference in prolog!\n"); - ctxt->wellFormed = 0; - ctxt->disableSAX = 1; - return; - case XML_PARSER_EPILOG: - ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG; - if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, "Reference in epilog!\n"); - ctxt->wellFormed = 0; - ctxt->disableSAX = 1; - return; - case XML_PARSER_ENTITY_VALUE: - /* - * NOTE: in the case of entity values, we don't do the - * substitution here since we need the literal - * entity value to be able to save the internal - * subset of the document. - * This will be handled by xmlStringDecodeEntities - */ - return; - case XML_PARSER_ATTRIBUTE_VALUE: - /* - * NOTE: in the case of attributes values, we don't do the - * substitution here unless we are in a mode where - * the parser is explicitly asked to substitute - * entities. The SAX callback is called with values - * without entity substitution. - * This will then be handled by xmlStringDecodeEntities - */ - return; - case XML_PARSER_ENTITY_DECL: - /* - * we just ignore it there - * the substitution will be done once the entity is referenced - */ - return; - case XML_PARSER_DTD: - ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD; - if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, - "Entity references are forbidden in DTDs!\n"); - ctxt->wellFormed = 0; - ctxt->disableSAX = 1; - return; - case XML_PARSER_IGNORE: - return; - } - - /* TODO: this seems not reached anymore .... Verify ... */ - xmlGenericError(xmlGenericErrorContext, - "Reached deprecated section in xmlParserHandleReference()\n"); - xmlGenericError(xmlGenericErrorContext, - "Please forward the document to daniel veillard com\n"); - xmlGenericError(xmlGenericErrorContext, - "indicating the version: %s, thanks !\n", xmlParserVersion); - NEXT; - name = xmlScanName(ctxt); - if (name == NULL) { - ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME; - if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, "Entity reference: no name\n"); - ctxt->wellFormed = 0; - ctxt->disableSAX = 1; - ctxt->token = '&'; - return; - } - if (NXT(xmlStrlen(name)) != ';') { - ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING; - if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, - "Entity reference: ';' expected\n"); - ctxt->wellFormed = 0; - ctxt->disableSAX = 1; - ctxt->token = '&'; - xmlFree(name); - return; - } - SKIP(xmlStrlen(name) + 1); - if (ctxt->sax != NULL) { - if (ctxt->sax->getEntity != NULL) - ent = ctxt->sax->getEntity(ctxt->userData, name); - } - - /* - * [ WFC: Entity Declared ] - * the Name given in the entity reference must match that in an entity - * declaration, except that well-formed documents need not declare any - * of the following entities: amp, lt, gt, apos, quot. - */ - if (ent == NULL) - ent = xmlGetPredefinedEntity(name); - if (ent == NULL) { - ctxt->errNo = XML_ERR_UNDECLARED_ENTITY; - if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, - "Entity reference: entity %s not declared\n", - name); - ctxt->wellFormed = 0; - ctxt->disableSAX = 1; - xmlFree(name); - return; - } - - /* - * [ WFC: Parsed Entity ] - * An entity reference must not contain the name of an unparsed entity - */ - if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) { - ctxt->errNo = XML_ERR_UNPARSED_ENTITY; - if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, - "Entity reference to unparsed entity %s\n", name); - ctxt->wellFormed = 0; - ctxt->disableSAX = 1; - } - - if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) { - ctxt->token = ent->content[0]; - xmlFree(name); - return; - } - input = xmlNewEntityInputStream(ctxt, ent); - xmlPushInput(ctxt, input); - xmlFree(name); - #endif return; } --- 3316,3321 ----
Attachment:
ctxt-token-patch-2.zip
Description: Zip archive