[xml] [PATCH] Javascript wrapped up in comments
- From: The Dude <reflexionsniveau web de>
- To: xml gnome org
- Subject: [xml] [PATCH] Javascript wrapped up in comments
- Date: Thu, 08 Nov 2001 14:06:29 +0100
Hi,
yet another little fix for HTMLparse.c:
The following Javascript is not parsed correctly:
<script>
<!--
self.document.write("<b></b>");
-->
</script>
The attached patch fixes this by parsing javascript wrapped up in comments.
Greetings, Bastian Kleineidam
--
.~.
/V\ Unleash the power. Use Linux.
/( )\
^^-^^
--- ../libxml2-2.4.9.orig/HTMLparser.c Tue Oct 30 04:35:05 2001
+++ HTMLparser.c Thu Nov 8 13:56:38 2001
@@ -2269,6 +2269,91 @@
}
/**
+ * htmlParseComment:
+ * @ctxt: an HTML parser context
+ *
+ * Parse an XML (SGML) comment <!-- .... -->
+ *
+ * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
+ */
+static void
+htmlParseComment(htmlParserCtxtPtr ctxt) {
+ xmlChar *buf = NULL;
+ int len;
+ int size = HTML_PARSER_BUFFER_SIZE;
+ int q, ql;
+ int r, rl;
+ int cur, l;
+ xmlParserInputState state;
+
+ /*
+ * Check that there is a comment right here.
+ */
+ if ((RAW != '<') || (NXT(1) != '!') ||
+ (NXT(2) != '-') || (NXT(3) != '-')) return;
+
+ state = ctxt->instate;
+ ctxt->instate = XML_PARSER_COMMENT;
+ SHRINK;
+ SKIP(4);
+ buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
+ if (buf == NULL) {
+ xmlGenericError(xmlGenericErrorContext,
+ "malloc of %d byte failed\n", size);
+ ctxt->instate = state;
+ return;
+ }
+ q = CUR_CHAR(ql);
+ NEXTL(ql);
+ r = CUR_CHAR(rl);
+ NEXTL(rl);
+ cur = CUR_CHAR(l);
+ len = 0;
+ while (IS_CHAR(cur) &&
+ ((cur != '>') ||
+ (r != '-') || (q != '-'))) {
+ if (len + 5 >= size) {
+ size *= 2;
+ buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
+ if (buf == NULL) {
+ xmlGenericError(xmlGenericErrorContext,
+ "realloc of %d byte failed\n", size);
+ ctxt->instate = state;
+ return;
+ }
+ }
+ COPY_BUF(ql,buf,len,q);
+ q = r;
+ ql = rl;
+ r = cur;
+ rl = l;
+ NEXTL(l);
+ cur = CUR_CHAR(l);
+ if (cur == 0) {
+ SHRINK;
+ GROW;
+ cur = CUR_CHAR(l);
+ }
+ }
+ buf[len] = 0;
+ if (!IS_CHAR(cur)) {
+ ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
+ if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+ ctxt->sax->error(ctxt->userData,
+ "Comment not terminated \n<!--%.50s\n", buf);
+ ctxt->wellFormed = 0;
+ xmlFree(buf);
+ } else {
+ NEXT;
+ if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
+ (!ctxt->disableSAX))
+ ctxt->sax->comment(ctxt->userData, buf);
+ xmlFree(buf);
+ }
+ ctxt->instate = state;
+}
+
+/**
* htmlParseScript:
* @ctxt: an HTML parser context
*
@@ -2293,12 +2378,18 @@
htmlParseScript(htmlParserCtxtPtr ctxt) {
xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 1];
int nbchar = 0;
+ int comment = 0;
xmlChar cur;
SHRINK;
cur = CUR;
while (IS_CHAR(cur)) {
- if ((cur == '<') && (NXT(1) == '/')) {
+ if ((cur == '<') && (NXT(1) == '!') && (NXT(2) == '-') &&
+ (NXT(3) == '-')) {
+ comment = 1;
+ break;
+ }
+ else if ((cur == '<') && (NXT(1) == '/')) {
/*
* One should break here, the specification is clear:
* Authors should therefore escape "</" within the content.
@@ -2338,6 +2429,13 @@
ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar);
}
}
+
+ if (comment) {
+ /*
+ * Javascript is wrapped up in a comment
+ */
+ htmlParseComment(ctxt);
+ }
}
@@ -2469,91 +2567,6 @@
}
}
return(URI);
-}
-
-/**
- * htmlParseComment:
- * @ctxt: an HTML parser context
- *
- * Parse an XML (SGML) comment <!-- .... -->
- *
- * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
- */
-static void
-htmlParseComment(htmlParserCtxtPtr ctxt) {
- xmlChar *buf = NULL;
- int len;
- int size = HTML_PARSER_BUFFER_SIZE;
- int q, ql;
- int r, rl;
- int cur, l;
- xmlParserInputState state;
-
- /*
- * Check that there is a comment right here.
- */
- if ((RAW != '<') || (NXT(1) != '!') ||
- (NXT(2) != '-') || (NXT(3) != '-')) return;
-
- state = ctxt->instate;
- ctxt->instate = XML_PARSER_COMMENT;
- SHRINK;
- SKIP(4);
- buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
- if (buf == NULL) {
- xmlGenericError(xmlGenericErrorContext,
- "malloc of %d byte failed\n", size);
- ctxt->instate = state;
- return;
- }
- q = CUR_CHAR(ql);
- NEXTL(ql);
- r = CUR_CHAR(rl);
- NEXTL(rl);
- cur = CUR_CHAR(l);
- len = 0;
- while (IS_CHAR(cur) &&
- ((cur != '>') ||
- (r != '-') || (q != '-'))) {
- if (len + 5 >= size) {
- size *= 2;
- buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
- if (buf == NULL) {
- xmlGenericError(xmlGenericErrorContext,
- "realloc of %d byte failed\n", size);
- ctxt->instate = state;
- return;
- }
- }
- COPY_BUF(ql,buf,len,q);
- q = r;
- ql = rl;
- r = cur;
- rl = l;
- NEXTL(l);
- cur = CUR_CHAR(l);
- if (cur == 0) {
- SHRINK;
- GROW;
- cur = CUR_CHAR(l);
- }
- }
- buf[len] = 0;
- if (!IS_CHAR(cur)) {
- ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
- if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
- ctxt->sax->error(ctxt->userData,
- "Comment not terminated \n<!--%.50s\n", buf);
- ctxt->wellFormed = 0;
- xmlFree(buf);
- } else {
- NEXT;
- if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
- (!ctxt->disableSAX))
- ctxt->sax->comment(ctxt->userData, buf);
- xmlFree(buf);
- }
- ctxt->instate = state;
}
/**
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]