libxml2 r3704 - trunk
- From: veillard svn gnome org
- To: svn-commits-list gnome org
- Subject: libxml2 r3704 - trunk
- Date: Wed, 12 Mar 2008 21:43:39 +0000 (GMT)
Author: veillard
Date: Wed Mar 12 21:43:39 2008
New Revision: 3704
URL: http://svn.gnome.org/viewvc/libxml2?rev=3704&view=rev
Log:
* HTMLparser.c: patch from Arnold Hendriks improving parsing of
html within html bogus data, still not a complete fix though
Daniel
Modified:
trunk/ChangeLog
trunk/HTMLparser.c
Modified: trunk/HTMLparser.c
==============================================================================
--- trunk/HTMLparser.c (original)
+++ trunk/HTMLparser.c Wed Mar 12 21:43:39 2008
@@ -3423,7 +3423,7 @@
*
* [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
*
- * Returns 0 in case of success and -1 in case of error.
+ * Returns 0 in case of success, -1 in case of error and 1 if discarded
*/
static int
@@ -3436,6 +3436,7 @@
int maxatts;
int meta = 0;
int i;
+ int discardtag = 0;
if ((ctxt == NULL) || (ctxt->input == NULL)) {
htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR,
@@ -3480,14 +3481,14 @@
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
"htmlParseStartTag: misplaced <html> tag\n",
name, NULL);
- return 0;
+ discardtag = 1;
}
if ((ctxt->nameNr != 1) &&
(xmlStrEqual(name, BAD_CAST"head"))) {
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
"htmlParseStartTag: misplaced <head> tag\n",
name, NULL);
- return 0;
+ discardtag = 1;
}
if (xmlStrEqual(name, BAD_CAST"body")) {
int indx;
@@ -3496,9 +3497,7 @@
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
"htmlParseStartTag: misplaced <body> tag\n",
name, NULL);
- while ((IS_CHAR_CH(CUR)) && (CUR != '>'))
- NEXT;
- return 0;
+ discardtag = 1;
}
}
}
@@ -3597,12 +3596,14 @@
/*
* SAX: Start of Element !
*/
- htmlnamePush(ctxt, name);
- if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) {
- if (nbatts != 0)
- ctxt->sax->startElement(ctxt->userData, name, atts);
- else
- ctxt->sax->startElement(ctxt->userData, name, NULL);
+ if (!discardtag) {
+ htmlnamePush(ctxt, name);
+ if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) {
+ if (nbatts != 0)
+ ctxt->sax->startElement(ctxt->userData, name, atts);
+ else
+ ctxt->sax->startElement(ctxt->userData, name, NULL);
+ }
}
if (atts != NULL) {
@@ -3612,7 +3613,7 @@
}
}
- return 0;
+ return(discardtag);
}
/**
@@ -3991,7 +3992,7 @@
failed = htmlParseStartTag(ctxt);
name = ctxt->name;
- if (failed || (name == NULL)) {
+ if ((failed == -1) || (name == NULL)) {
if (CUR == '>')
NEXT;
return;
@@ -4893,7 +4894,7 @@
failed = htmlParseStartTag(ctxt);
name = ctxt->name;
- if (failed ||
+ if ((failed == -1) ||
(name == NULL)) {
if (CUR == '>')
NEXT;
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]