[xml] [PATCH] Javascript wrapped up in comments



Hi,

yet another little fix for HTMLparse.c:
The following Javascript is not parsed correctly:

<script>
<!--
self.document.write("<b></b>");
-->
</script>

The attached patch fixes this by parsing javascript wrapped up in comments.

Greetings, Bastian Kleineidam

--
 .~.
 /V\    Unleash the power. Use Linux.
/( )\
^^-^^
--- ../libxml2-2.4.9.orig/HTMLparser.c  Tue Oct 30 04:35:05 2001
+++ HTMLparser.c        Thu Nov  8 13:56:38 2001
@@ -2269,6 +2269,91 @@
 }
 
 /**
+ * htmlParseComment:
+ * @ctxt:  an HTML parser context
+ *
+ * Parse an XML (SGML) comment <!-- .... -->
+ *
+ * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
+ */
+static void
+htmlParseComment(htmlParserCtxtPtr ctxt) {
+    xmlChar *buf = NULL;
+    int len;
+    int size = HTML_PARSER_BUFFER_SIZE;
+    int q, ql;
+    int r, rl;
+    int cur, l;
+    xmlParserInputState state;
+
+    /*
+     * Check that there is a comment right here.
+     */
+    if ((RAW != '<') || (NXT(1) != '!') ||
+        (NXT(2) != '-') || (NXT(3) != '-')) return;
+
+    state = ctxt->instate;
+    ctxt->instate = XML_PARSER_COMMENT;
+    SHRINK;
+    SKIP(4);
+    buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
+    if (buf == NULL) {
+       xmlGenericError(xmlGenericErrorContext,
+               "malloc of %d byte failed\n", size);
+       ctxt->instate = state;
+       return;
+    }
+    q = CUR_CHAR(ql);
+    NEXTL(ql);
+    r = CUR_CHAR(rl);
+    NEXTL(rl);
+    cur = CUR_CHAR(l);
+    len = 0;
+    while (IS_CHAR(cur) &&
+           ((cur != '>') ||
+           (r != '-') || (q != '-'))) {
+       if (len + 5 >= size) {
+           size *= 2;
+           buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
+           if (buf == NULL) {
+               xmlGenericError(xmlGenericErrorContext,
+                       "realloc of %d byte failed\n", size);
+               ctxt->instate = state;
+               return;
+           }
+       }
+       COPY_BUF(ql,buf,len,q);
+       q = r;
+       ql = rl;
+       r = cur;
+       rl = l;
+       NEXTL(l);
+       cur = CUR_CHAR(l);
+       if (cur == 0) {
+           SHRINK;
+           GROW;
+           cur = CUR_CHAR(l);
+       }
+    }
+    buf[len] = 0;
+    if (!IS_CHAR(cur)) {
+       ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
+       if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+           ctxt->sax->error(ctxt->userData,
+                            "Comment not terminated \n<!--%.50s\n", buf);
+       ctxt->wellFormed = 0;
+       xmlFree(buf);
+    } else {
+        NEXT;
+       if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
+           (!ctxt->disableSAX))
+           ctxt->sax->comment(ctxt->userData, buf);
+       xmlFree(buf);
+    }
+    ctxt->instate = state;
+}
+
+/**
  * htmlParseScript:
  * @ctxt:  an HTML parser context
  *
@@ -2293,12 +2378,18 @@
 htmlParseScript(htmlParserCtxtPtr ctxt) {
     xmlChar buf[HTML_PARSER_BIG_BUFFER_SIZE + 1];
     int nbchar = 0;
+    int comment = 0;
     xmlChar cur;
 
     SHRINK;
     cur = CUR;
     while (IS_CHAR(cur)) {
-       if ((cur == '<') && (NXT(1) == '/')) {
+       if ((cur == '<') && (NXT(1) == '!') && (NXT(2) == '-') &&
+           (NXT(3) == '-')) {
+           comment = 1;
+           break;
+       }
+       else if ((cur == '<') && (NXT(1) == '/')) {
            /*
             * One should break here, the specification is clear:
             * Authors should therefore escape "</" within the content.
@@ -2338,6 +2429,13 @@
            ctxt->sax->cdataBlock(ctxt->userData, buf, nbchar);
        }
     }
+
+    if (comment) {
+       /*
+        * Javascript is wrapped up in a comment
+        */
+        htmlParseComment(ctxt);
+    }
 }
 
 
@@ -2469,91 +2567,6 @@
        }
     }
     return(URI);
-}
-
-/**
- * htmlParseComment:
- * @ctxt:  an HTML parser context
- *
- * Parse an XML (SGML) comment <!-- .... -->
- *
- * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
- */
-static void
-htmlParseComment(htmlParserCtxtPtr ctxt) {
-    xmlChar *buf = NULL;
-    int len;
-    int size = HTML_PARSER_BUFFER_SIZE;
-    int q, ql;
-    int r, rl;
-    int cur, l;
-    xmlParserInputState state;
-
-    /*
-     * Check that there is a comment right here.
-     */
-    if ((RAW != '<') || (NXT(1) != '!') ||
-        (NXT(2) != '-') || (NXT(3) != '-')) return;
-
-    state = ctxt->instate;
-    ctxt->instate = XML_PARSER_COMMENT;
-    SHRINK;
-    SKIP(4);
-    buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
-    if (buf == NULL) {
-       xmlGenericError(xmlGenericErrorContext,
-               "malloc of %d byte failed\n", size);
-       ctxt->instate = state;
-       return;
-    }
-    q = CUR_CHAR(ql);
-    NEXTL(ql);
-    r = CUR_CHAR(rl);
-    NEXTL(rl);
-    cur = CUR_CHAR(l);
-    len = 0;
-    while (IS_CHAR(cur) &&
-           ((cur != '>') ||
-           (r != '-') || (q != '-'))) {
-       if (len + 5 >= size) {
-           size *= 2;
-           buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
-           if (buf == NULL) {
-               xmlGenericError(xmlGenericErrorContext,
-                       "realloc of %d byte failed\n", size);
-               ctxt->instate = state;
-               return;
-           }
-       }
-       COPY_BUF(ql,buf,len,q);
-       q = r;
-       ql = rl;
-       r = cur;
-       rl = l;
-       NEXTL(l);
-       cur = CUR_CHAR(l);
-       if (cur == 0) {
-           SHRINK;
-           GROW;
-           cur = CUR_CHAR(l);
-       }
-    }
-    buf[len] = 0;
-    if (!IS_CHAR(cur)) {
-       ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
-       if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-           ctxt->sax->error(ctxt->userData,
-                            "Comment not terminated \n<!--%.50s\n", buf);
-       ctxt->wellFormed = 0;
-       xmlFree(buf);
-    } else {
-        NEXT;
-       if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
-           (!ctxt->disableSAX))
-           ctxt->sax->comment(ctxt->userData, buf);
-       xmlFree(buf);
-    }
-    ctxt->instate = state;
 }
 
 /**


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]