[libxml2] Improve HTML escaping of attribute on output



commit 7d4c529a334845621e2f805c8ed0e154b3350cec
Author: Daniel Veillard <veillard redhat com>
Date:   Wed Sep 5 11:45:32 2012 +0800

    Improve HTML escaping of attribute on output
    
    Handle special cases of &{...} constructs as hinted in the spec
      http://www.w3.org/TR/html401/appendix/notes.html#h-B.7.1
    and special values as comment <!-- ... --> used for server side includes
    This is limited to attribute values in HTML content.

 HTMLtree.c |   13 ++++++---
 entities.c |   86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
 save.h     |    1 +
 tree.c     |   11 +++++++-
 4 files changed, 98 insertions(+), 13 deletions(-)
---
diff --git a/HTMLtree.c b/HTMLtree.c
index 1f9dbec..530fd53 100644
--- a/HTMLtree.c
+++ b/HTMLtree.c
@@ -690,9 +690,10 @@ htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
     xmlChar *value;
 
     /*
-     * TODO: The html output method should not escape a & character
-     *       occurring in an attribute value immediately followed by
-     *       a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
+     * The html output method should not escape a & character
+     * occurring in an attribute value immediately followed by
+     * a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
+     * This is implemented in xmlEncodeEntitiesReentrant
      */
 
     if (cur == NULL) {
@@ -720,7 +721,11 @@ htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
 
 		while (IS_BLANK_CH(*tmp)) tmp++;
 
-		escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
+		/*
+		 * the < and > have already been escaped at the entity level
+		 * And doing so here breaks server side includes
+		 */
+		escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+<>");
 		if (escaped != NULL) {
 		    xmlBufWriteQuotedString(buf->buffer, escaped);
 		    xmlFree(escaped);
diff --git a/entities.c b/entities.c
index 7d06820..f9e9cd9 100644
--- a/entities.c
+++ b/entities.c
@@ -537,11 +537,11 @@ xmlGetDocEntity(xmlDocPtr doc, const xmlChar *name) {
     buffer_size = new_size;						\
 }
 
-
 /**
- * xmlEncodeEntitiesReentrant:
+ * xmlEncodeEntitiesInternal:
  * @doc:  the document containing the string
  * @input:  A string to convert to XML.
+ * @attr: are we handling an atrbute value
  *
  * Do a global encoding of a string, replacing the predefined entities
  * and non ASCII values with their entities and CharRef counterparts.
@@ -550,8 +550,8 @@ xmlGetDocEntity(xmlDocPtr doc, const xmlChar *name) {
  *
  * Returns A newly allocated string with the substitution done.
  */
-xmlChar *
-xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
+static xmlChar *
+xmlEncodeEntitiesInternal(xmlDocPtr doc, const xmlChar *input, int attr) {
     const xmlChar *cur = input;
     xmlChar *buffer = NULL;
     xmlChar *out = NULL;
@@ -568,7 +568,7 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
     buffer_size = 1000;
     buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
     if (buffer == NULL) {
-        xmlEntitiesErrMemory("xmlEncodeEntitiesReentrant: malloc failed");
+        xmlEntitiesErrMemory("xmlEncodeEntities: malloc failed");
 	return(NULL);
     }
     out = buffer;
@@ -585,6 +585,27 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
 	 * By default one have to encode at least '<', '>', '"' and '&' !
 	 */
 	if (*cur == '<') {
+	    const xmlChar *end;
+
+	    /*
+	     * Special handling of server side include in HTML attributes
+	     */
+	    if (html && attr &&
+	        (cur[1] == '!') && (cur[2] == '-') && (cur[3] == '-') &&
+	        ((end = xmlStrstr(cur, BAD_CAST "-->")) != NULL)) {
+	        while (cur != end) {
+		    *out++ = *cur++;
+		    indx = out - buffer;
+		    if (indx + 100 > buffer_size) {
+			growBufferReentrant();
+			out = &buffer[indx];
+		    }
+		}
+		*out++ = *cur++;
+		*out++ = *cur++;
+		*out++ = *cur++;
+		continue;
+	    }
 	    *out++ = '&';
 	    *out++ = 'l';
 	    *out++ = 't';
@@ -595,6 +616,22 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
 	    *out++ = 't';
 	    *out++ = ';';
 	} else if (*cur == '&') {
+	    /*
+	     * Special handling of &{...} construct from HTML 4, see
+	     * http://www.w3.org/TR/html401/appendix/notes.html#h-B.7.1
+	     */
+	    if (html && attr && (cur[1] == '{') && (strchr(cur, '}'))) {
+	        while (*cur != '}') {
+		    *out++ = *cur++;
+		    indx = out - buffer;
+		    if (indx + 100 > buffer_size) {
+			growBufferReentrant();
+			out = &buffer[indx];
+		    }
+		}
+		*out++ = *cur++;
+		continue;
+	    }
 	    *out++ = '&';
 	    *out++ = 'a';
 	    *out++ = 'm';
@@ -627,7 +664,7 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
 
 		if (*cur < 0xC0) {
 		    xmlEntitiesErr(XML_CHECK_NOT_UTF8,
-			    "xmlEncodeEntitiesReentrant : input not UTF-8");
+			    "xmlEncodeEntities: input not UTF-8");
 		    if (doc != NULL)
 			doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
 		    snprintf(buf, sizeof(buf), "&#%d;", *cur);
@@ -660,7 +697,7 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
 		}
 		if ((l == 1) || (!IS_CHAR(val))) {
 		    xmlEntitiesErr(XML_ERR_INVALID_CHAR,
-			"xmlEncodeEntitiesReentrant : char out of range\n");
+			"xmlEncodeEntities: char out of range\n");
 		    if (doc != NULL)
 			doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
 		    snprintf(buf, sizeof(buf), "&#%d;", *cur);
@@ -694,12 +731,45 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
     return(buffer);
 
 mem_error:
-    xmlEntitiesErrMemory("xmlEncodeEntitiesReentrant: realloc failed");
+    xmlEntitiesErrMemory("xmlEncodeEntities: realloc failed");
     xmlFree(buffer);
     return(NULL);
 }
 
 /**
+ * xmlEncodeAttributeEntities:
+ * @doc:  the document containing the string
+ * @input:  A string to convert to XML.
+ *
+ * Do a global encoding of a string, replacing the predefined entities
+ * and non ASCII values with their entities and CharRef counterparts for
+ * attribute values.
+ *
+ * Returns A newly allocated string with the substitution done.
+ */
+xmlChar *
+xmlEncodeAttributeEntities(xmlDocPtr doc, const xmlChar *input) {
+    return xmlEncodeEntitiesInternal(doc, input, 1);
+}
+
+/**
+ * xmlEncodeEntitiesReentrant:
+ * @doc:  the document containing the string
+ * @input:  A string to convert to XML.
+ *
+ * Do a global encoding of a string, replacing the predefined entities
+ * and non ASCII values with their entities and CharRef counterparts.
+ * Contrary to xmlEncodeEntities, this routine is reentrant, and result
+ * must be deallocated.
+ *
+ * Returns A newly allocated string with the substitution done.
+ */
+xmlChar *
+xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
+    return xmlEncodeEntitiesInternal(doc, input, 0);
+}
+
+/**
  * xmlEncodeSpecialChars:
  * @doc:  the document containing the string
  * @input:  A string to convert to XML.
diff --git a/save.h b/save.h
index f24f273..2c32a10 100644
--- a/save.h
+++ b/save.h
@@ -25,6 +25,7 @@ void xmlBufDumpNotationTable(xmlBufPtr buf, xmlNotationTablePtr table);
 void xmlBufDumpElementDecl(xmlBufPtr buf, xmlElementPtr elem);
 void xmlBufDumpAttributeDecl(xmlBufPtr buf, xmlAttributePtr attr);
 void xmlBufDumpEntityDecl(xmlBufPtr buf, xmlEntityPtr ent);
+xmlChar *xmlEncodeAttributeEntities(xmlDocPtr doc, const xmlChar *input);
 #endif
 
 #ifdef __cplusplus
diff --git a/tree.c b/tree.c
index df6f608..145a689 100644
--- a/tree.c
+++ b/tree.c
@@ -42,6 +42,7 @@
 #endif
 
 #include "buf.h"
+#include "save.h"
 
 int __xmlRegisterCallbacks = 0;
 
@@ -1661,9 +1662,14 @@ xmlNodeListGetString(xmlDocPtr doc, xmlNodePtr list, int inLine)
     xmlNodePtr node = list;
     xmlChar *ret = NULL;
     xmlEntityPtr ent;
+    int attr;
 
     if (list == NULL)
         return (NULL);
+    if ((list->parent != NULL) && (list->parent->type == XML_ATTRIBUTE_NODE))
+        attr = 1;
+    else
+        attr = 0;
 
     while (node != NULL) {
         if ((node->type == XML_TEXT_NODE) ||
@@ -1673,7 +1679,10 @@ xmlNodeListGetString(xmlDocPtr doc, xmlNodePtr list, int inLine)
             } else {
                 xmlChar *buffer;
 
-                buffer = xmlEncodeEntitiesReentrant(doc, node->content);
+		if (attr)
+		    buffer = xmlEncodeAttributeEntities(doc, node->content);
+		else
+		    buffer = xmlEncodeEntitiesReentrant(doc, node->content);
                 if (buffer != NULL) {
                     ret = xmlStrcat(ret, buffer);
                     xmlFree(buffer);



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]