[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]

[xml] xmlregexp patch



xmlregexp does not appear to handle UTF-8 in ranges. Run testSchemas
with the attached test files (rx.xsd and rx.xml). The attached patch
(against latest CVS) fixes the problem (at least for my little test). I
also ran the regression tests and this patch doesn't break anything.

Charlie B.

<?xml version="1.0"?>
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema";>
  <xsd:element name="comment" type="xsd:string"/>
  <!-- Stock Keeping Unit, a code for identifying products -->
  <xsd:simpleType name="SKU">
    <xsd:restriction base="xsd:string">
      <xsd:pattern value="\d{3}-[&#65;-&#256;]{2}"/>
    </xsd:restriction>
  </xsd:simpleType>
  <xsd:element name="Item" minOccurs="0" maxOccurs="unbounded">
    <xsd:complexType>
      <xsd:attribute name="partNum" type="SKU" use="required"/>
    </xsd:complexType>
  </xsd:element>
</xsd:schema>
<Item partNum="926-A&#199;"/>
Index: xmlregexp.c
===================================================================
RCS file: /cvs/gnome/libxml2/xmlregexp.c,v
retrieving revision 1.27
diff -c -r1.27 xmlregexp.c
*** xmlregexp.c	26 Dec 2003 06:03:14 -0000	1.27
--- xmlregexp.c	26 Dec 2003 16:12:15 -0000
***************
*** 3533,3539 ****
   */
  static void
  xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) {
!     int cur;
      int start = -1;
      int end = -1;
  
--- 3533,3539 ----
   */
  static void
  xmlFAParseCharRange(xmlRegParserCtxtPtr ctxt) {
!     int cur, len;
      int start = -1;
      int end = -1;
  
***************
*** 3560,3572 ****
  		return;
  	}
  	end = start;
      } else if ((cur != 0x5B) && (cur != 0x5D)) {
! 	end = start = cur;
      } else {
  	ERROR("Expecting a char range");
  	return;
      }
!     NEXT;
      if (start == '-') {
  	return;
      }
--- 3560,3573 ----
  		return;
  	}
  	end = start;
+         len = 1;
      } else if ((cur != 0x5B) && (cur != 0x5D)) {
!         end = start = CUR_SCHAR(ctxt->cur, len);
      } else {
  	ERROR("Expecting a char range");
  	return;
      }
!     NEXTL(len);
      if (start == '-') {
  	return;
      }
***************
*** 3593,3605 ****
  		ERROR("Invalid escape value");
  		return;
  	}
      } else if ((cur != 0x5B) && (cur != 0x5D)) {
! 	end = cur;
      } else {
  	ERROR("Expecting the end of a char range");
  	return;
      }
!     NEXT;
      /* TODO check that the values are acceptable character ranges for XML */
      if (end < start) {
  	ERROR("End of range is before start of range");
--- 3594,3607 ----
  		ERROR("Invalid escape value");
  		return;
  	}
+         len = 1;
      } else if ((cur != 0x5B) && (cur != 0x5D)) {
!         end = CUR_SCHAR(ctxt->cur, len);
      } else {
  	ERROR("Expecting the end of a char range");
  	return;
      }
!     NEXTL(len);
      /* TODO check that the values are acceptable character ranges for XML */
      if (end < start) {
  	ERROR("End of range is before start of range");


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]