[libxslt] Fix numbering in non-Latin scripts



commit de6d869a8ef5ca327231fb73489f4c9024d8757a
Author: Nick Wellnhofer <wellnhofer aevum de>
Date:   Sat Apr 27 14:33:29 2019 +0200

    Fix numbering in non-Latin scripts
    
    The `token` type wasn't wide enough to hold a Unicode code point.

 libxslt/numbers.c         | 24 +++++++++--------
 tests/docs/bug-219.xml    | 22 +++++++++++++++
 tests/general/bug-219.out | 68 +++++++++++++++++++++++++++++++++++++++++++++++
 tests/general/bug-219.xsl | 17 ++++++++++++
 4 files changed, 120 insertions(+), 11 deletions(-)
---
diff --git a/libxslt/numbers.c b/libxslt/numbers.c
index 75c31eba..0a2a51cb 100644
--- a/libxslt/numbers.c
+++ b/libxslt/numbers.c
@@ -36,7 +36,7 @@
 
 #define SYMBOL_QUOTE           ((xmlChar)'\'')
 
-#define DEFAULT_TOKEN          (xmlChar)'0'
+#define DEFAULT_TOKEN          '0'
 #define DEFAULT_SEPARATOR      "."
 
 #define MAX_TOKENS             1024
@@ -45,7 +45,7 @@ typedef struct _xsltFormatToken xsltFormatToken;
 typedef xsltFormatToken *xsltFormatTokenPtr;
 struct _xsltFormatToken {
     xmlChar    *separator;
-    xmlChar     token;
+    int                 token;
     int                 width;
 };
 
@@ -107,20 +107,22 @@ xsltUTF8Charcmp(xmlChar *utf1, xmlChar *utf2) {
      (xsltUTF8Charcmp((letter), (self)->patternSeparator) == 0))
 
 #define IS_DIGIT_ZERO(x) xsltIsDigitZero(x)
-#define IS_DIGIT_ONE(x) xsltIsDigitZero((xmlChar)(x)-1)
+#define IS_DIGIT_ONE(x) xsltIsDigitZero((x)-1)
 
 static int
 xsltIsDigitZero(unsigned int ch)
 {
     /*
      * Reference: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
+     *
+     * There a many more digit ranges in newer Unicode versions. These
+     * are only the zeros that match Digit in XML 1.0 (IS_DIGIT macro).
      */
     switch (ch) {
     case 0x0030: case 0x0660: case 0x06F0: case 0x0966:
     case 0x09E6: case 0x0A66: case 0x0AE6: case 0x0B66:
     case 0x0C66: case 0x0CE6: case 0x0D66: case 0x0E50:
-    case 0x0E60: case 0x0F20: case 0x1040: case 0x17E0:
-    case 0x1810: case 0xFF10:
+    case 0x0ED0: case 0x0F20:
        return TRUE;
     default:
        return FALSE;
@@ -383,13 +385,13 @@ xsltNumberFormatTokenize(const xmlChar *format,
                ix += len;
                val = xmlStringCurrentChar(NULL, format+ix, &len);
            } else {
-                tokens->tokens[tokens->nTokens].token = (xmlChar)'0';
+                tokens->tokens[tokens->nTokens].token = '0';
                 tokens->tokens[tokens->nTokens].width = 1;
             }
-       } else if ( (val == (xmlChar)'A') ||
-                   (val == (xmlChar)'a') ||
-                   (val == (xmlChar)'I') ||
-                   (val == (xmlChar)'i') ) {
+       } else if ( (val == 'A') ||
+                   (val == 'a') ||
+                   (val == 'I') ||
+                   (val == 'i') ) {
            tokens->tokens[tokens->nTokens].token = val;
            ix += len;
            val = xmlStringCurrentChar(NULL, format+ix, &len);
@@ -400,7 +402,7 @@ xsltNumberFormatTokenize(const xmlChar *format,
             *  not support a numbering sequence that starts with that
             *  token, it must use a format token of 1."
             */
-           tokens->tokens[tokens->nTokens].token = (xmlChar)'0';
+           tokens->tokens[tokens->nTokens].token = '0';
            tokens->tokens[tokens->nTokens].width = 1;
        }
        /*
diff --git a/tests/docs/bug-219.xml b/tests/docs/bug-219.xml
new file mode 100644
index 00000000..65497811
--- /dev/null
+++ b/tests/docs/bug-219.xml
@@ -0,0 +1,22 @@
+<test>
+    <formats>
+        <format>&#x0660;&#x0661;</format>
+        <format>&#x06F0;&#x06F1;</format>
+        <format>&#x0966;&#x0967;</format>
+        <format>&#x09E6;&#x09E7;</format>
+        <format>&#x0A66;&#x0A67;</format>
+        <format>&#x0AE6;&#x0AE7;</format>
+        <format>&#x0B66;&#x0B67;</format>
+        <format>&#x0C66;&#x0C67;</format>
+        <format>&#x0CE6;&#x0CE7;</format>
+        <format>&#x0D66;&#x0D67;</format>
+        <format>&#x0E50;&#x0E51;</format>
+        <format>&#x0ED0;&#x0ED1;</format>
+        <format>&#x0F20;&#x0F21;</format>
+    </formats>
+    <values>
+        <value>0</value>
+        <value>9</value>
+       <value>1234567890</value>
+    </values>
+</test>
diff --git a/tests/general/bug-219.out b/tests/general/bug-219.out
new file mode 100644
index 00000000..908043cc
--- /dev/null
+++ b/tests/general/bug-219.out
@@ -0,0 +1,68 @@
+<?xml version="1.0"?>
+<results>
+  <format f="&#x660;&#x661;">
+    <value v="0">٠٠</value>
+    <value v="9">٠٩</value>
+    <value v="1234567890">١٢٣٤٥٦٧٨٩٠</value>
+  </format>
+  <format f="&#x6F0;&#x6F1;">
+    <value v="0">۰۰</value>
+    <value v="9">۰۹</value>
+    <value v="1234567890">۱۲۳۴۵۶۷۸۹۰</value>
+  </format>
+  <format f="&#x966;&#x967;">
+    <value v="0">००</value>
+    <value v="9">०९</value>
+    <value v="1234567890">१२३४५६७८९०</value>
+  </format>
+  <format f="&#x9E6;&#x9E7;">
+    <value v="0">০০</value>
+    <value v="9">০৯</value>
+    <value v="1234567890">১২৩৪৫৬৭৮৯০</value>
+  </format>
+  <format f="&#xA66;&#xA67;">
+    <value v="0">੦੦</value>
+    <value v="9">੦੯</value>
+    <value v="1234567890">੧੨੩੪੫੬੭੮੯੦</value>
+  </format>
+  <format f="&#xAE6;&#xAE7;">
+    <value v="0">૦૦</value>
+    <value v="9">૦૯</value>
+    <value v="1234567890">૧૨૩૪૫૬૭૮૯૦</value>
+  </format>
+  <format f="&#xB66;&#xB67;">
+    <value v="0">୦୦</value>
+    <value v="9">୦୯</value>
+    <value v="1234567890">୧୨୩୪୫୬୭୮୯୦</value>
+  </format>
+  <format f="&#xC66;&#xC67;">
+    <value v="0">౦౦</value>
+    <value v="9">౦౯</value>
+    <value v="1234567890">౧౨౩౪౫౬౭౮౯౦</value>
+  </format>
+  <format f="&#xCE6;&#xCE7;">
+    <value v="0">೦೦</value>
+    <value v="9">೦೯</value>
+    <value v="1234567890">೧೨೩೪೫೬೭೮೯೦</value>
+  </format>
+  <format f="&#xD66;&#xD67;">
+    <value v="0">൦൦</value>
+    <value v="9">൦൯</value>
+    <value v="1234567890">൧൨൩൪൫൬൭൮൯൦</value>
+  </format>
+  <format f="&#xE50;&#xE51;">
+    <value v="0">๐๐</value>
+    <value v="9">๐๙</value>
+    <value v="1234567890">๑๒๓๔๕๖๗๘๙๐</value>
+  </format>
+  <format f="&#xED0;&#xED1;">
+    <value v="0">໐໐</value>
+    <value v="9">໐໙</value>
+    <value v="1234567890">໑໒໓໔໕໖໗໘໙໐</value>
+  </format>
+  <format f="&#xF20;&#xF21;">
+    <value v="0">༠༠</value>
+    <value v="9">༠༩</value>
+    <value v="1234567890">༡༢༣༤༥༦༧༨༩༠</value>
+  </format>
+</results>
diff --git a/tests/general/bug-219.xsl b/tests/general/bug-219.xsl
new file mode 100644
index 00000000..e291994d
--- /dev/null
+++ b/tests/general/bug-219.xsl
@@ -0,0 +1,17 @@
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"; version="1.0">
+    <xsl:output indent="yes"/>
+    <xsl:template match="test">
+        <results>
+            <xsl:for-each select="formats/format">
+                <format f="{.}">
+                    <xsl:variable name="f" select="."/>
+                    <xsl:for-each select="/test/values/value">
+                        <value v="{.}">
+                            <xsl:number value="." format="{$f}"/>
+                        </value>
+                    </xsl:for-each>
+                </format>
+            </xsl:for-each>
+        </results>
+    </xsl:template>
+</xsl:stylesheet>


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]