Re: [xslt] bug in escaping cdata inside cdata insidecdata-section-element?



On Thu, 2003-09-25 at 14:54, Daniel Veillard wrote:
> On Thu, Sep 25, 2003 at 03:10:01PM -0400, Peter Pawlowski wrote:
> > I have some input XML which contains CDATA elements escaped inside of 
> > other CDATA elements:
> > 
> > 
> > <?xml version="1.0" encoding="UTF-8"?>
> > <collection>
> >   <test><![CDATA[
> >     <![CDATA[abc]]]>]&gt;<![CDATA[
> >   ]]></test>
> > </collection>
> > 
> > 
> > not the most attractive XML, I'll admin. but when I apply this simple XSL
> > to just copy the input (and add 'test' to the list of 
> > cdata-section-elements):
> > 
> > 
> > <?xml version="1.0"?>
> > <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform";
> >   version="1.0"
> > >
> >   <xsl:output method="xml" encoding="UTF-8" 
> >     cdata-section-elements="test" 
> >   />
> > 
> >   <xsl:template match="/">
> >     <xsl:copy-of select="*" />
> >   </xsl:template>
> > </xsl:stylesheet>
> > 
> > 
> > I get this output:
> > 
> > 
> > <?xml version="1.0" encoding="UTF-8"?>
> > <collection>
> >   <test><![CDATA[
> >     <![CDATA[abc]]>
> >   ]]></test>
> > </collection>
> > 
> > 
> > since the ']]>' has not been re-escaped, this is now broken XML.
> > 
> > my version:
> > 
> > [pawlowski]$ libxslt/xsltproc -V
> > Using libxml 20509, libxslt 10033 and libexslt 722
> > xsltproc was compiled against libxml 20509, libxslt 10033 and 
> > libexslt 722
> > libxslt 10033 was compiled against libxml 20509
> > libexslt 722 was compiled against libxml 20509
> 
>   Right, this looks like a bug, can you bugzilla it so it doesn't
> get lost, the bug is actually in libxml2 serailization layer.
>      http://xmlsoft.org/XSLT/bugs.html
> 
>    thanks !

Daniel,

A patch and simple test case are attached.  It had to be fixed in three
places in xmlNodeDumpOutputInternal and xhtmlNodeDumpOutput.  You may or
may not want to pull the CDATA escaping code into a function, but I
didn't bother.  It's only 16 lines.

--
Shaun

Index: tree.c
===================================================================
RCS file: /cvs/gnome/libxml2/tree.c,v
retrieving revision 1.277
diff -c -r1.277 tree.c
*** tree.c	24 Sep 2003 21:23:55 -0000	1.277
--- tree.c	26 Sep 2003 00:45:14 -0000
***************
*** 7273,7278 ****
--- 7273,7279 ----
  	    xmlNodePtr cur, int level, int format, const char *encoding) {
      int i;
      xmlNodePtr tmp;
+     xmlChar *start, *end;
  
      if (cur == NULL) {
  #ifdef DEBUG_TREE
***************
*** 7356,7365 ****
  	return;
      }
      if (cur->type == XML_CDATA_SECTION_NODE) {
!         xmlOutputBufferWriteString(buf, "<![CDATA[");
! 	if (cur->content != NULL)
! 	    xmlOutputBufferWriteString(buf, (const char *)cur->content);
!         xmlOutputBufferWriteString(buf, "]]>");
  	return;
      }
      if (cur->type == XML_ATTRIBUTE_NODE) {
--- 7357,7378 ----
  	return;
      }
      if (cur->type == XML_CDATA_SECTION_NODE) {
! 	start = end = cur->content;
! 	while (*end != '\0') {
! 	    if (*end == ']' && *(end + 1) == ']' && *(end + 2) == '>') {
! 		end = end + 2;
! 		xmlOutputBufferWriteString(buf, "<![CDATA[");
! 		xmlOutputBufferWrite(buf, end - start, (const char *)start);
! 		xmlOutputBufferWriteString(buf, "]]>");
! 		start = end;
! 	    }
! 	    end++;
! 	}
! 	if (start != end) {
! 	    xmlOutputBufferWriteString(buf, "<![CDATA[");
! 	    xmlOutputBufferWriteString(buf, (const char *)start);
! 	    xmlOutputBufferWriteString(buf, "]]>");
! 	}
  	return;
      }
      if (cur->type == XML_ATTRIBUTE_NODE) {
***************
*** 7810,7815 ****
--- 7823,7829 ----
              int level, int format, const char *encoding) {
      int i;
      xmlNodePtr tmp;
+     xmlChar *start, *end;
  
      if (cur == NULL) {
  #ifdef DEBUG_TREE
***************
*** 7893,7902 ****
  	return;
      }
      if (cur->type == XML_CDATA_SECTION_NODE) {
!         xmlOutputBufferWriteString(buf, "<![CDATA[");
! 	if (cur->content != NULL)
! 	    xmlOutputBufferWriteString(buf, (const char *)cur->content);
!         xmlOutputBufferWriteString(buf, "]]>");
  	return;
      }
  
--- 7907,7928 ----
  	return;
      }
      if (cur->type == XML_CDATA_SECTION_NODE) {
! 	start = end = cur->content;
! 	while (*end != '\0') {
! 	    if (*end == ']' && *(end + 1) == ']' && *(end + 2) == '>') {
! 		end = end + 2;
! 		xmlOutputBufferWriteString(buf, "<![CDATA[");
! 		xmlOutputBufferWrite(buf, end - start, (const char *)start);
! 		xmlOutputBufferWriteString(buf, "]]>");
! 		start = end;
! 	    }
! 	    end++;
! 	}
! 	if (start != end) {
! 	    xmlOutputBufferWriteString(buf, "<![CDATA[");
! 	    xmlOutputBufferWriteString(buf, (const char *)start);
! 	    xmlOutputBufferWriteString(buf, "]]>");
! 	}
  	return;
      }
  
***************
*** 7989,7999 ****
  		    (xmlStrchr(child->content, '&') == NULL)) {
  		    xhtmlNodeDumpOutput(buf, doc, child, 0, 0, encoding);
  		} else {
! 		    xmlOutputBufferWriteString(buf, "<![CDATA[");
! 		    if (child->content != NULL)
! 			xmlOutputBufferWriteString(buf,
! 				(const char *)child->content);
! 		    xmlOutputBufferWriteString(buf, "]]>");
  		}
  	    } else {
  		xhtmlNodeDumpOutput(buf, doc, child, 0, 0, encoding);
--- 8015,8039 ----
  		    (xmlStrchr(child->content, '&') == NULL)) {
  		    xhtmlNodeDumpOutput(buf, doc, child, 0, 0, encoding);
  		} else {
! 		    start = end = child->content;
! 		    while (*end != '\0') {
! 			if (*end == ']' &&
! 			    *(end + 1) == ']' &&
! 			    *(end + 2) == '>') {
! 			    end = end + 2;
! 			    xmlOutputBufferWriteString(buf, "<![CDATA[");
! 			    xmlOutputBufferWrite(buf, end - start,
! 						 (const char *)start);
! 			    xmlOutputBufferWriteString(buf, "]]>");
! 			    start = end;
! 			}
! 			end++;
! 		    }
! 		    if (start != end) {
! 			xmlOutputBufferWriteString(buf, "<![CDATA[");
! 			xmlOutputBufferWriteString(buf, (const char *)start);
! 			xmlOutputBufferWriteString(buf, "]]>");
! 		    }
  		}
  	    } else {
  		xhtmlNodeDumpOutput(buf, doc, child, 0, 0, encoding);


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]