Index: entities.c =================================================================== --- entities.c (revision 3771) +++ entities.c (working copy) @@ -102,7 +102,7 @@ xmlFreeEntity(xmlEntityPtr entity) dict = entity->doc->dict; - if ((entity->children) && (entity->owner == 1) && + if ((entity->children) && (entity->owner != 0) && (entity == (xmlEntityPtr) entity->children->parent)) xmlFreeNodeList(entity->children); if (dict != NULL) { Index: parserInternals.c =================================================================== --- parserInternals.c (revision 3771) +++ parserInternals.c (working copy) @@ -1670,6 +1670,7 @@ xmlInitParserCtxt(xmlParserCtxtPtr ctxt) ctxt->depth = 0; ctxt->charset = XML_CHAR_ENCODING_UTF8; ctxt->catalogs = NULL; + ctxt->nbentities = 0; xmlInitNodeInfoSeq(&ctxt->node_seq); return(0); } --- include/libxml/parser.h.orig 2008-08-28 21:31:24.000000000 +0200 +++ include/libxml/parser.h 2008-08-28 20:55:09.000000000 +0200 @@ -297,6 +297,8 @@ struct _xmlParserCtxt { */ xmlError lastError; xmlParserMode parseMode; /* the parser mode */ + unsigned long nbentities; /* number of entities references */ + unsigned long sizeentities; /* size of parsed entities */ }; /** --- parser.c.orig 2006-04-23 11:39:15.000000000 +0200 +++ parser.c 2008-08-28 21:36:27.000000000 +0200 @@ -80,6 +80,95 @@ #include #endif +static void +xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info); + +/************************************************************************ + * * + * Arbitrary limits set in the parser. * + * * + ************************************************************************/ + +#define XML_PARSER_BIG_ENTITY 1000 +#define XML_PARSER_LOT_ENTITY 5000 + +/* + * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity + * replacement over the size in byte of the input indicates that you have + * and eponential behaviour. A value of 10 correspond to at least 3 entity + * replacement per byte of input. + */ +#define XML_PARSER_NON_LINEAR 10 + +/* + * xmlParserEntityCheck + * + * Function to check non-linear entity expansion behaviour + * This is here to detect and stop exponential linear entity expansion + * This is not a limitation of the parser but a safety + * boundary feature. + */ +static int +xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size, + xmlEntityPtr ent) +{ + unsigned long consumed = 0; + + if (ctxt == NULL) + return (0); + if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) + return (1); + if (size != 0) { + /* + * Do the check based on the replacement size of the entity + */ + if (size < XML_PARSER_BIG_ENTITY) + return(0); + + /* + * A limit on the amount of text data reasonably used + */ + if (ctxt->input != NULL) { + consumed = ctxt->input->consumed + + (ctxt->input->cur - ctxt->input->base); + } + consumed += ctxt->sizeentities; + + if ((size < XML_PARSER_NON_LINEAR * consumed) && + (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed)) + return (0); + } else if (ent != NULL) { + /* + * use the number of parsed entities in the replacement + */ + size = ent->owner; + + /* + * The amount of data parsed counting entities size only once + */ + if (ctxt->input != NULL) { + consumed = ctxt->input->consumed + + (ctxt->input->cur - ctxt->input->base); + } + consumed += ctxt->sizeentities; + + /* + * Check the density of entities for the amount of data + * knowing an entity reference will take at least 3 bytes + */ + if (size * 3 < consumed * XML_PARSER_NON_LINEAR) + return (0); + } else { + /* + * strange we got no data for checking just return + */ + return (0); + } + + xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); + return (1); +} + /** * xmlParserMaxDepth: * @@ -2212,6 +2301,10 @@ xmlStringLenDecodeEntities(xmlParserCtxt "String decoding Entity Reference: %.30s\n", str); ent = xmlParseStringEntityRef(ctxt, &str); + if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) + goto int_error; + if (ent != NULL) + ctxt->nbentities += ent->owner; if ((ent != NULL) && (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { if (ent->content != NULL) { @@ -2236,6 +2329,10 @@ xmlStringLenDecodeEntities(xmlParserCtxt buffer[nbchars++] = *current++; if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { + if (xmlParserEntityCheck(ctxt, nbchars, ent)) { + xmlFree(rep); + goto int_error; + } growBuffer(buffer); } } @@ -2258,6 +2355,10 @@ xmlStringLenDecodeEntities(xmlParserCtxt xmlGenericError(xmlGenericErrorContext, "String decoding PE Reference: %.30s\n", str); ent = xmlParseStringPEReference(ctxt, &str); + if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP) + goto int_error; + if (ent != NULL) + ctxt->nbentities += ent->owner; if (ent != NULL) { xmlChar *rep; @@ -2271,6 +2372,10 @@ xmlStringLenDecodeEntities(xmlParserCtxt buffer[nbchars++] = *current++; if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { + if (xmlParserEntityCheck(ctxt, nbchars, ent)) { + xmlFree(rep); + goto int_error; + } growBuffer(buffer); } } @@ -2294,6 +2399,9 @@ xmlStringLenDecodeEntities(xmlParserCtxt mem_error: xmlErrMemory(ctxt, NULL); +int_error: + if (buffer != NULL) + xmlFree(buffer); return(NULL); } @@ -3100,6 +3208,9 @@ xmlParseAttValueComplex(xmlParserCtxtPtr } } else { ent = xmlParseEntityRef(ctxt); + ctxt->nbentities++; + if (ent != NULL) + ctxt->nbentities += ent->owner; if ((ent != NULL) && (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { if (len > buf_size - 10) { @@ -4342,6 +4453,7 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt int isParameter = 0; xmlChar *orig = NULL; int skipped; + unsigned long oldnbent = ctxt->nbentities; /* GROW; done in the caller */ if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) { @@ -4551,6 +4663,11 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt } } if (cur != NULL) { + if ((cur->owner != 0) || (cur->children == NULL)) { + cur->owner = ctxt->nbentities - oldnbent; + if (cur->owner == 0) + cur->owner = 1; + } if (cur->orig != NULL) xmlFree(orig); else @@ -5976,7 +6093,8 @@ xmlParseReference(xmlParserCtxtPtr ctxt) (ent->children == NULL)) { ent->children = list; ent->last = list; - ent->owner = 1; + if (ent->owner == 0) + ent->owner = 1; list->parent = (xmlNodePtr) ent; } else { xmlFreeNodeList(list); @@ -5985,6 +6103,7 @@ xmlParseReference(xmlParserCtxtPtr ctxt) xmlFreeNodeList(list); } } else { + unsigned long oldnbent = ctxt->nbentities; /* * 4.3.2: An internal general parsed entity is well-formed * if its replacement text matches the production labeled @@ -6007,6 +6126,7 @@ xmlParseReference(xmlParserCtxtPtr ctxt) ret = xmlParseBalancedChunkMemoryInternal(ctxt, value, user_data, &list); ctxt->depth--; + } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) { ctxt->depth++; @@ -6019,6 +6139,24 @@ xmlParseReference(xmlParserCtxtPtr ctxt) xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR, "invalid entity type found\n", NULL); } + /* + * Store the number of entities needing parsing for entity + * content and do checkings + */ + if ((ent->owner != 0) || (ent->children == NULL)) { + ent->owner = ctxt->nbentities - oldnbent; + if (ent->owner == 0) + ent->owner = 1; + } + if (ret == XML_ERR_ENTITY_LOOP) { + xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); + xmlFreeNodeList(list); + return; + } + if (xmlParserEntityCheck(ctxt, 0, ent)) { + xmlFreeNodeList(list); + return; + } if (ret == XML_ERR_ENTITY_LOOP) { xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL); return; @@ -6037,7 +6175,8 @@ xmlParseReference(xmlParserCtxtPtr ctxt) (ctxt->parseMode == XML_PARSE_READER)) { list->parent = (xmlNodePtr) ent; list = NULL; - ent->owner = 1; + if (ent->owner == 0) + ent->owner = 1; } else { ent->owner = 0; while (list != NULL) { @@ -6054,7 +6193,8 @@ xmlParseReference(xmlParserCtxtPtr ctxt) #endif /* LIBXML_LEGACY_ENABLED */ } } else { - ent->owner = 1; + if (ent->owner == 0) + ent->owner = 1; while (list != NULL) { list->parent = (xmlNodePtr) ent; if (list->next == NULL) @@ -6074,6 +6214,8 @@ xmlParseReference(xmlParserCtxtPtr ctxt) list = NULL; } } + } else if (ent->owner != 1) { + ctxt->nbentities += ent->owner; } if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) { @@ -6176,7 +6318,8 @@ xmlParseReference(xmlParserCtxtPtr ctxt) break; cur = next; } - ent->owner = 1; + if (ent->owner == 0) + ent->owner = 1; #ifdef LIBXML_LEGACY_ENABLED if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) xmlAddEntityReference(ent, firstChild, nw); @@ -6291,6 +6434,11 @@ xmlParseEntityRef(xmlParserCtxtPtr ctxt) if (RAW == ';') { NEXT; /* + * Increase the number of entity references parsed + */ + ctxt->nbentities++; + + /* * Ask first SAX for entity resolution, otherwise try the * predefined set. */ @@ -6462,6 +6610,10 @@ xmlParseStringEntityRef(xmlParserCtxtPtr if (*ptr == ';') { ptr++; /* + * Increase the number of entity references parsed + */ + ctxt->nbentities++; + /* * Ask first SAX for entity resolution, otherwise try the * predefined set. */ @@ -6623,6 +6775,11 @@ xmlParsePEReference(xmlParserCtxtPtr ctx } else { if (RAW == ';') { NEXT; + /* + * Increase the number of entity references parsed + */ + ctxt->nbentities++; + if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) entity = ctxt->sax->getParameterEntity(ctxt->userData, @@ -6753,6 +6910,11 @@ xmlParseStringPEReference(xmlParserCtxtP if (cur == ';') { ptr++; cur = *ptr; + /* + * Increase the number of entity references parsed + */ + ctxt->nbentities++; + if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL)) entity = ctxt->sax->getParameterEntity(ctxt->userData, @@ -11358,11 +11520,31 @@ xmlParseExternalEntityPrivate(xmlDocPtr } ret = XML_ERR_OK; } + + /* + * Record in the parent context the number of entities replacement + * done when parsing that reference. + */ + oldctxt->nbentities += ctxt->nbentities; + /* + * Also record the size of the entity parsed + */ + if (ctxt->input != NULL) { + oldctxt->sizeentities += ctxt->input->consumed; + oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base); + } + /* + * And record the last error if any + */ + if (ctxt->lastError.code != XML_ERR_OK) + xmlCopyError(&ctxt->lastError, &oldctxt->lastError); + if (sax != NULL) ctxt->sax = oldsax; oldctxt->node_seq.maximum = ctxt->node_seq.maximum; oldctxt->node_seq.length = ctxt->node_seq.length; oldctxt->node_seq.buffer = ctxt->node_seq.buffer; + oldctxt->nbentities += ctxt->nbentities; ctxt->node_seq.maximum = 0; ctxt->node_seq.length = 0; ctxt->node_seq.buffer = NULL; @@ -11587,6 +11769,17 @@ xmlParseBalancedChunkMemoryInternal(xmlP ctxt->myDoc->last = last; } + /* + * Record in the parent context the number of entities replacement + * done when parsing that reference. + */ + oldctxt->nbentities += ctxt->nbentities; + /* + * Also record the last error if any + */ + if (ctxt->lastError.code != XML_ERR_OK) + xmlCopyError(&ctxt->lastError, &oldctxt->lastError); + ctxt->sax = oldsax; ctxt->dict = NULL; ctxt->attsDefault = NULL; @@ -12883,6 +13076,8 @@ xmlCtxtReset(xmlParserCtxtPtr ctxt) ctxt->depth = 0; ctxt->charset = XML_CHAR_ENCODING_UTF8; ctxt->catalogs = NULL; + ctxt->nbentities = 0; + ctxt->sizeentities = 0; xmlInitNodeInfoSeq(&ctxt->node_seq); if (ctxt->attsDefault != NULL) {