[libxml2] Cleanup some of the parser code
- From: Daniel Veillard <veillard src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [libxml2] Cleanup some of the parser code
- Date: Wed, 15 Aug 2012 02:18:16 +0000 (UTC)
commit 1f972e9f2816e078d68954281a70e6e2448dc88b
Author: Daniel Veillard <veillard redhat com>
Date: Wed Aug 15 10:16:37 2012 +0800
Cleanup some of the parser code
Prefetching assumptions about the amount of data read in GROW
should be backed up with test for 0 termination when at the
end of the buffer.
parser.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++------
1 files changed, 47 insertions(+), 6 deletions(-)
---
diff --git a/parser.c b/parser.c
index 5b075ce..9a57b01 100644
--- a/parser.c
+++ b/parser.c
@@ -198,6 +198,17 @@ unsigned int xmlParserMaxDepth = 256;
#define XML_PARSER_BUFFER_SIZE 100
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
+/**
+ * XML_PARSER_CHUNK_SIZE
+ *
+ * When calling GROW that's the minimal amount of data
+ * the parser expected to have received. It is not a hard
+ * limit but an optimization when reading strings like Names
+ * It is not strictly needed as long as inputs available characters
+ * are followed by 0, which should be provided by the I/O level
+ */
+#define XML_PARSER_CHUNK_SIZE 100
+
/*
* List of XML prefixed PI allowed by W3C specs
*/
@@ -3253,7 +3264,7 @@ xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
((c >= 0xFDF0) && (c <= 0xFFFD)) ||
((c >= 0x10000) && (c <= 0xEFFFF))
)) {
- if (count++ > 100) {
+ if (count++ > XML_PARSER_CHUNK_SIZE) {
count = 0;
GROW;
if (ctxt->instate == XML_PARSER_EOF)
@@ -3279,7 +3290,7 @@ xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
(c == '_') || (c == ':') ||
(IS_COMBINING(c)) ||
(IS_EXTENDER(c)))) {
- if (count++ > 100) {
+ if (count++ > XML_PARSER_CHUNK_SIZE) {
count = 0;
GROW;
if (ctxt->instate == XML_PARSER_EOF)
@@ -3288,6 +3299,13 @@ xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
len += l;
NEXTL(l);
c = CUR_CHAR(l);
+ if (c == 0) {
+ count = 0;
+ GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
+ c = CUR_CHAR(l);
+ }
}
}
if ((len > XML_MAX_NAME_LENGTH) &&
@@ -3384,7 +3402,7 @@ xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
(xmlIsNameChar(ctxt, c) && (c != ':'))) {
- if (count++ > 100) {
+ if (count++ > XML_PARSER_CHUNK_SIZE) {
if ((len > XML_MAX_NAME_LENGTH) &&
((ctxt->options & XML_PARSE_HUGE) == 0)) {
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
@@ -3398,6 +3416,13 @@ xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
len += l;
NEXTL(l);
c = CUR_CHAR(l);
+ if (c == 0) {
+ count = 0;
+ GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
+ c = CUR_CHAR(l);
+ }
}
if ((len > XML_MAX_NAME_LENGTH) &&
((ctxt->options & XML_PARSE_HUGE) == 0)) {
@@ -3630,13 +3655,20 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
c = CUR_CHAR(l);
while (xmlIsNameChar(ctxt, c)) {
- if (count++ > 100) {
+ if (count++ > XML_PARSER_CHUNK_SIZE) {
count = 0;
GROW;
}
COPY_BUF(l,buf,len,c);
NEXTL(l);
c = CUR_CHAR(l);
+ if (c == 0) {
+ count = 0;
+ GROW;
+ if (ctxt->instate == XML_PARSER_EOF)
+ return(NULL);
+ c = CUR_CHAR(l);
+ }
if (len >= XML_MAX_NAMELEN) {
/*
* Okay someone managed to make a huge token, so he's ready to pay
@@ -3652,7 +3684,7 @@ xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
}
memcpy(buffer, buf, len);
while (xmlIsNameChar(ctxt, c)) {
- if (count++ > 100) {
+ if (count++ > XML_PARSER_CHUNK_SIZE) {
count = 0;
GROW;
if (ctxt->instate == XML_PARSER_EOF) {
@@ -7993,7 +8025,7 @@ xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
(IS_CHAR(c))) {
xmlBufferAdd(buf, ctxt->input->cur, l);
- if (count++ > 100) {
+ if (count++ > XML_PARSER_CHUNK_SIZE) {
count = 0;
GROW;
if (ctxt->instate == XML_PARSER_EOF) {
@@ -8003,6 +8035,15 @@ xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
}
NEXTL(l);
c = CUR_CHAR(l);
+ if (c == 0) {
+ count = 0;
+ GROW;
+ if (ctxt->instate == XML_PARSER_EOF) {
+ xmlBufferFree(buf);
+ return(-1);
+ }
+ c = CUR_CHAR(l);
+ }
}
if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]