[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

Re: [xml] ctxt->token patches



Hi Daniel,

>   Hum, can you tellwhere this occured ? 

It seems benefical to test for ctxt->token in front
of a large list of other "else if" conditionals.

I've attached a step-2 patch which removes all remaining 
ctxt->token uses in parser.c and parserInternals.c (except
the initializing to 0). htmlparser.c and docbookparser.c will
be treated in the next patch.

This second patch eats about 60% of the performance gains
of the first, but I fully agree that's nonsense to leave voodoo
statements in the code.

To proceed further in performance enhancement I need to
do better profiling and perhaps look for more intrusive
changes in the parser.

Regards,
Peter Jacobi



*** after-step1\parser.c	Fri Jun 28 12:48:06 2002
--- parser.c	Fri Jun 28 16:29:41 2002
***************
*** 316,322 ****
      if (*(ctxt->input->cur) == '\n') {					\
  	ctxt->input->line++; ctxt->input->col = 1;			\
      } else ctxt->input->col++;						\
!     ctxt->token = 0; ctxt->input->cur += l;				\
      if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);	\
    } while (0)
  
--- 316,322 ----
      if (*(ctxt->input->cur) == '\n') {					\
  	ctxt->input->line++; ctxt->input->col = 1;			\
      } else ctxt->input->col++;						\
!     ctxt->input->cur += l;				\
      if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);	\
    } while (0)
  
***************
*** 341,352 ****
  xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
      int res = 0;
  
-     if (ctxt->token != 0) {
- 	if (!IS_BLANK(ctxt->token))
- 	    return(0);
- 	ctxt->token = 0;
- 	res++;
-     }
      /*
       * It's Okay to use CUR/NEXT here since all the blanks are on
       * the ASCII range.
--- 341,346 ----
***************
*** 465,475 ****
      unsigned int val = 0;
      int count = 0;
  
-     if (ctxt->token != 0) {
- 	val = ctxt->token;
-         ctxt->token = 0;
-         return(val);
-     }
      /*
       * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
       */
--- 459,464 ----
***************
*** 754,762 ****
      xmlEntityPtr entity = NULL;
      xmlParserInputPtr input;
  
-     if (ctxt->token != 0) {
-         return;
-     }	
      if (RAW != '%') return;
      switch(ctxt->instate) {
  	case XML_PARSER_CDATA_SECTION:
--- 743,748 ----
***************
*** 2363,2370 ****
       * OK loop until we reach one of the ending char or a size limit.
       */
      c = CUR_CHAR(l);
!     while (((NXT(0) != limit) && /* checked */
! 	   (c != '<')) || (ctxt->token != 0)) {
  	if (c == 0) break;
  	if (c == '&') {
  	    if (NXT(1) == '#') {
--- 2349,2356 ----
       * OK loop until we reach one of the ending char or a size limit.
       */
      c = CUR_CHAR(l);
!     while ((NXT(0) != limit) && /* checked */
! 	   (c != '<')) {
  	if (c == 0) break;
  	if (c == '&') {
  	    if (NXT(1) == '#') {
***************
*** 2685,2691 ****
       * Accelerated common case where input don't need to be
       * modified before passing it to the handler.
       */
!     if ((ctxt->token == 0) && (!cdata)) {
  	in = ctxt->input->cur;
  	do {
  get_more:
--- 2671,2677 ----
       * Accelerated common case where input don't need to be
       * modified before passing it to the handler.
       */
!     if (!cdata) {
  	in = ctxt->input->cur;
  	do {
  get_more:
***************
*** 2777,2784 ****
      SHRINK;
      GROW;
      cur = CUR_CHAR(l);
!     while (((cur != '<') || (ctxt->token == '<')) && /* checked */
! 	((cur != '&') || (ctxt->token == '&')) && 
  	(IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
  	if ((cur == ']') && (NXT(1) == ']') &&
  	    (NXT(2) == '>')) {
--- 2763,2770 ----
      SHRINK;
      GROW;
      cur = CUR_CHAR(l);
!     while ((cur != '<') && /* checked */
! 	(cur != '&') && 
  	(IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
  	if ((cur == ']') && (NXT(1) == ']') &&
  	    (NXT(2) == '>')) {
***************
*** 4938,4944 ****
  	       (NXT(2) != '>'))) {
  	    const xmlChar *check = CUR_PTR;
  	    int cons = ctxt->input->consumed;
- 	    int tok = ctxt->token;
  
  	    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
  		xmlParseConditionalSections(ctxt);
--- 4924,4929 ----
***************
*** 4955,4962 ****
  	    while ((RAW == 0) && (ctxt->inputNr > 1))
  		xmlPopInput(ctxt);
  
! 	    if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
! 		(tok == ctxt->token)) {
  		ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
  		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
  		    ctxt->sax->error(ctxt->userData,
--- 4940,4946 ----
  	    while ((RAW == 0) && (ctxt->inputNr > 1))
  		xmlPopInput(ctxt);
  
! 	    if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
  		ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
  		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
  		    ctxt->sax->error(ctxt->userData,
***************
*** 5248,5254 ****
  	   (RAW == '%') || IS_BLANK(CUR)) {
  	const xmlChar *check = CUR_PTR;
  	int cons = ctxt->input->consumed;
- 	int tok = ctxt->token;
  
  	GROW;
          if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
--- 5232,5237 ----
***************
*** 5266,5273 ****
  	while ((RAW == 0) && (ctxt->inputNr > 1))
  	    xmlPopInput(ctxt);
  
! 	if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
! 	    (tok == ctxt->token)) {
  	    ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
  	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
  		ctxt->sax->error(ctxt->userData,
--- 5249,5255 ----
  	while ((RAW == 0) && (ctxt->inputNr > 1))
  	    xmlPopInput(ctxt);
  
! 	if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
  	    ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
  	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
  		ctxt->sax->error(ctxt->userData,
***************
*** 6869,6883 ****
  	const xmlChar *cur = ctxt->input->cur;
  
  	/*
- 	 * Handle  possible processed charrefs.
- 	 */
- 	if (ctxt->token != 0) {
- 	    xmlParseCharData(ctxt, 0);
- 	}
- 	/*
  	 * First case : a Processing Instruction.
  	 */
! 	else if ((*cur == '<') && (cur[1] == '?')) {
  	    xmlParsePI(ctxt);
  	}
  
--- 6851,6859 ----
  	const xmlChar *cur = ctxt->input->cur;
  
  	/*
  	 * First case : a Processing Instruction.
  	 */
! 	if ((*cur == '<') && (cur[1] == '?')) {
  	    xmlParsePI(ctxt);
  	}
  
***************
*** 8549,8568 ****
              case XML_PARSER_CONTENT: {
  		const xmlChar *test;
  		int cons;
- 		int tok;
- 
-                 /*
- 		 * Handle preparsed entities and charRef
- 		 */
- 		if (ctxt->token != 0) {
- 		    xmlChar current[2] = { 0 , 0 } ;
- 
- 		    current[0] = (xmlChar) ctxt->token;
- 		    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
- 			(ctxt->sax->characters != NULL))
- 			ctxt->sax->characters(ctxt->userData, current, 1);
- 		    ctxt->token = 0;
- 		}
  		if ((avail < 2) && (ctxt->inputNr == 1))
  		    goto done;
  		cur = ctxt->input->cur[0];
--- 8525,8530 ----
***************
*** 8570,8576 ****
  
  		test = CUR_PTR;
  	        cons = ctxt->input->consumed;
- 	        tok = ctxt->token;
  	        if ((cur == '<') && (next == '?')) {
  		    if ((!terminate) &&
  		        (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
--- 8532,8537 ----
***************
*** 8660,8667 ****
  		 */
  		while ((RAW == 0) && (ctxt->inputNr > 1))
  		    xmlPopInput(ctxt);
! 		if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
! 		    (tok == ctxt->token)) {
  		    ctxt->errNo = XML_ERR_INTERNAL_ERROR;
  		    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
  			ctxt->sax->error(ctxt->userData,
--- 8621,8627 ----
  		 */
  		while ((RAW == 0) && (ctxt->inputNr > 1))
  		    xmlPopInput(ctxt);
! 		if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
  		    ctxt->errNo = XML_ERR_INTERNAL_ERROR;
  		    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
  			ctxt->sax->error(ctxt->userData,
*** after-step1\parserInternals.c	Fri Jun 28 10:23:45 2002
--- parserInternals.c	Fri Jun 28 16:29:41 2002
***************
*** 1110,1117 ****
       *   literal #xD, an XML processor must pass to the application
       *   the single character #xA. 
       */
!     if (ctxt->token != 0) ctxt->token = 0;
!     else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
  	if ((*ctxt->input->cur == 0) &&
  	    (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
  	    (ctxt->instate != XML_PARSER_COMMENT)) {
--- 1110,1116 ----
       *   literal #xD, an XML processor must pass to the application
       *   the single character #xA. 
       */
!     if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
  	if ((*ctxt->input->cur == 0) &&
  	    (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
  	    (ctxt->instate != XML_PARSER_COMMENT)) {
***************
*** 2781,2791 ****
             (c != end2) && (c != end3)) {
  	GROW;
  	if (c == 0) break;
!         if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
  	    int val = xmlParseCharRef(ctxt);
  	    COPY_BUF(0,buffer,nbchars,val);
  	    NEXTL(l);
! 	} else if ((c == '&') && (ctxt->token != '&') &&
  		   (what & XML_SUBSTITUTE_REF)) {
  	    if (xmlParserDebugEntities)
  		xmlGenericError(xmlGenericErrorContext,
--- 2780,2790 ----
             (c != end2) && (c != end3)) {
  	GROW;
  	if (c == 0) break;
!         if ((c == '&') && (NXT(1) == '#')) {
  	    int val = xmlParseCharRef(ctxt);
  	    COPY_BUF(0,buffer,nbchars,val);
  	    NEXTL(l);
! 	} else if (c == '&') &&
  		   (what & XML_SUBSTITUTE_REF)) {
  	    if (xmlParserDebugEntities)
  		xmlGenericError(xmlGenericErrorContext,
***************
*** 3317,3545 ****
  	deprecated = 1;
      }
  
- #if 0
-     xmlParserInputPtr input;
-     xmlChar *name;
-     xmlEntityPtr ent = NULL;
- 
-     if (ctxt->token != 0) {
-         return;
-     }	
-     if (RAW != '&') return;
-     GROW;
-     if ((RAW == '&') && (NXT(1) == '#')) {
- 	switch(ctxt->instate) {
- 	    case XML_PARSER_ENTITY_DECL:
- 	    case XML_PARSER_PI:
- 	    case XML_PARSER_CDATA_SECTION:
- 	    case XML_PARSER_COMMENT:
- 	    case XML_PARSER_SYSTEM_LITERAL:
- 		/* we just ignore it there */
- 		return;
- 	    case XML_PARSER_START_TAG:
- 		return;
- 	    case XML_PARSER_END_TAG:
- 		return;
- 	    case XML_PARSER_EOF:
- 		ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
- 		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- 		    ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
- 		ctxt->wellFormed = 0;
- 		ctxt->disableSAX = 1;
- 		return;
- 	    case XML_PARSER_PROLOG:
- 	    case XML_PARSER_START:
- 	    case XML_PARSER_MISC:
- 		ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
- 		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- 		    ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
- 		ctxt->wellFormed = 0;
- 		ctxt->disableSAX = 1;
- 		return;
- 	    case XML_PARSER_EPILOG:
- 		ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
- 		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- 		    ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
- 		ctxt->wellFormed = 0;
- 		ctxt->disableSAX = 1;
- 		return;
- 	    case XML_PARSER_DTD:
- 		ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
- 		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- 		    ctxt->sax->error(ctxt->userData, 
- 		           "CharRef are forbidden in DTDs!\n");
- 		ctxt->wellFormed = 0;
- 		ctxt->disableSAX = 1;
- 		return;
- 	    case XML_PARSER_ENTITY_VALUE:
- 	        /*
- 		 * NOTE: in the case of entity values, we don't do the
- 		 *       substitution here since we need the literal
- 		 *       entity value to be able to save the internal
- 		 *       subset of the document.
- 		 *       This will be handled by xmlStringDecodeEntities
- 		 */
- 		return;
- 	    case XML_PARSER_CONTENT:
- 		return;
- 	    case XML_PARSER_ATTRIBUTE_VALUE:
- 		/* ctxt->token = xmlParseCharRef(ctxt); */
- 		return;
-             case XML_PARSER_IGNORE:
- 	        return;
- 	}
- 	return;
-     }
- 
-     switch(ctxt->instate) {
- 	case XML_PARSER_CDATA_SECTION:
- 	    return;
- 	case XML_PARSER_PI:
-         case XML_PARSER_COMMENT:
- 	case XML_PARSER_SYSTEM_LITERAL:
-         case XML_PARSER_CONTENT:
- 	    return;
- 	case XML_PARSER_START_TAG:
- 	    return;
- 	case XML_PARSER_END_TAG:
- 	    return;
-         case XML_PARSER_EOF:
- 	    ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
- 	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- 	        ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
- 	    ctxt->wellFormed = 0;
- 	    ctxt->disableSAX = 1;
- 	    return;
-         case XML_PARSER_PROLOG:
- 	case XML_PARSER_START:
- 	case XML_PARSER_MISC:
- 	    ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
- 	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- 	        ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
- 	    ctxt->wellFormed = 0;
- 	    ctxt->disableSAX = 1;
- 	    return;
-         case XML_PARSER_EPILOG:
- 	    ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
- 	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- 	        ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
- 	    ctxt->wellFormed = 0;
- 	    ctxt->disableSAX = 1;
- 	    return;
- 	case XML_PARSER_ENTITY_VALUE:
- 	    /*
- 	     * NOTE: in the case of entity values, we don't do the
- 	     *       substitution here since we need the literal
- 	     *       entity value to be able to save the internal
- 	     *       subset of the document.
- 	     *       This will be handled by xmlStringDecodeEntities
- 	     */
- 	    return;
-         case XML_PARSER_ATTRIBUTE_VALUE:
- 	    /*
- 	     * NOTE: in the case of attributes values, we don't do the
- 	     *       substitution here unless we are in a mode where
- 	     *       the parser is explicitly asked to substitute
- 	     *       entities. The SAX callback is called with values
- 	     *       without entity substitution.
- 	     *       This will then be handled by xmlStringDecodeEntities
- 	     */
- 	    return;
- 	case XML_PARSER_ENTITY_DECL:
- 	    /*
- 	     * we just ignore it there
- 	     * the substitution will be done once the entity is referenced
- 	     */
- 	    return;
-         case XML_PARSER_DTD:
- 	    ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
- 	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- 		ctxt->sax->error(ctxt->userData, 
- 		       "Entity references are forbidden in DTDs!\n");
- 	    ctxt->wellFormed = 0;
- 	    ctxt->disableSAX = 1;
- 	    return;
-         case XML_PARSER_IGNORE:
- 	    return;
-     }
- 
- /* TODO: this seems not reached anymore .... Verify ... */
- xmlGenericError(xmlGenericErrorContext,
- 	"Reached deprecated section in xmlParserHandleReference()\n");
- xmlGenericError(xmlGenericErrorContext,
- 	"Please forward the document to daniel veillard com\n");
- xmlGenericError(xmlGenericErrorContext,
- 	"indicating the version: %s, thanks !\n", xmlParserVersion);
-     NEXT;
-     name = xmlScanName(ctxt);
-     if (name == NULL) {
- 	ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
- 	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- 	    ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
- 	ctxt->wellFormed = 0;
- 	ctxt->disableSAX = 1;
- 	ctxt->token = '&';
- 	return;
-     }
-     if (NXT(xmlStrlen(name)) != ';') {
- 	ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
- 	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- 	    ctxt->sax->error(ctxt->userData, 
- 	                     "Entity reference: ';' expected\n");
- 	ctxt->wellFormed = 0;
- 	ctxt->disableSAX = 1;
- 	ctxt->token = '&';
- 	xmlFree(name);
- 	return;
-     }
-     SKIP(xmlStrlen(name) + 1);
-     if (ctxt->sax != NULL) {
- 	if (ctxt->sax->getEntity != NULL)
- 	    ent = ctxt->sax->getEntity(ctxt->userData, name);
-     }
- 
-     /*
-      * [ WFC: Entity Declared ]
-      * the Name given in the entity reference must match that in an entity
-      * declaration, except that well-formed documents need not declare any
-      * of the following entities: amp, lt, gt, apos, quot. 
-      */
-     if (ent == NULL)
- 	ent = xmlGetPredefinedEntity(name);
-     if (ent == NULL) {
-         ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
- 	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- 	    ctxt->sax->error(ctxt->userData, 
- 			     "Entity reference: entity %s not declared\n",
- 			     name);
- 	ctxt->wellFormed = 0;
- 	ctxt->disableSAX = 1;
- 	xmlFree(name);
- 	return;
-     }
- 
-     /*
-      * [ WFC: Parsed Entity ]
-      * An entity reference must not contain the name of an unparsed entity
-      */
-     if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
-         ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
- 	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- 	    ctxt->sax->error(ctxt->userData, 
- 			 "Entity reference to unparsed entity %s\n", name);
- 	ctxt->wellFormed = 0;
- 	ctxt->disableSAX = 1;
-     }
- 
-     if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
-         ctxt->token = ent->content[0];
- 	xmlFree(name);
- 	return;
-     }
-     input = xmlNewEntityInputStream(ctxt, ent);
-     xmlPushInput(ctxt, input);
-     xmlFree(name);
- #endif
      return;
  }
  
--- 3316,3321 ----

Attachment: ctxt-token-patch-2.zip
Description: Zip archive



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]