[xslt] Patch for xsl:sort lang support




Attached is a patch that provides support for language dependent sorting using the lang attribute of xsl:sort. The patch is against libxslt-1.1.23, but it applies to 1.1.24 as well. See also http://bugzilla.gnome.org/show_bug.cgi?id=152501

I tested the patch on several Linux distros and on Windows XP. It should work on BSD-based systems, too. But maybe some smaller changes are needed. On Linux I'm using strxfrm_l, so I changed configure to define _GNU_SOURCE if glibc is detected. On Windows I'm using _wcsxfrm_l because _strxfrm_l doesn't support UTF8. I put the bulk of the locale handling code in a new file called xsltlocale.c.

I also included a test case, but that still needs work, because glibc and MSVCRT sort order is different when using the French locale.

The changes to namespaces.c and transform.c have nothing to do with this patch. They are because of bug #377440: http://bugzilla.gnome.org/show_bug.cgi?id=377440

Nick


--
aevum gmbh
rumfordstr. 4
80469 münchen
germany

tel: +49 89 3838 0653
http://aevum.de/
diff -purN --exclude Makefile.in --exclude '*.m4' --exclude 'config.h.in*' --exclude configure --exclude '*config.h' --exclude '*.spec' --exclude NEWS libxslt-1.1.23/configure.in libxslt-1.1.23-nik2/configure.in
--- libxslt-1.1.23/configure.in	2008-04-08 19:18:06.000000000 +0200
+++ libxslt-1.1.23-nik2/configure.in	2008-05-14 17:47:43.000000000 +0200
@@ -105,6 +105,60 @@ AC_PATH_PROG(TAR, tar, /bin/tar)
 AC_STDC_HEADERS
 AM_PROG_LIBTOOL
 
+AC_MSG_CHECKING([for GNU extensions])
+AC_TRY_COMPILE(
+  [#include <features.h>],
+[
+#ifndef __GNU_LIBRARY__
+#error GNU library not found
+#endif
+],
+  [AC_MSG_RESULT(yes); AC_DEFINE(_GNU_SOURCE, 1, [Enable GNU extensions])],
+  [AC_MSG_RESULT(no)]
+)
+
+AC_MSG_CHECKING([for working xlocale.h])
+AC_TRY_RUN(
+[
+#include <locale.h>
+#include <xlocale.h>
+#include <string.h>
+#include <stdlib.h>
+
+#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 2
+#define locale_t __locale_t
+#define newlocale __newlocale
+#define freelocale __freelocale
+#define strxfrm_l __strxfrm_l
+#define LC_COLLATE_MASK (1 << LC_COLLATE)
+#endif
+
+int main()
+{
+    locale_t locale;
+    const char *src[2] = { "\xc3\x84rger", "Zeppelin" };
+    char *dst[2];
+    size_t len, r;
+    int i;
+    
+    locale = newlocale(LC_COLLATE_MASK, "en_US.utf8", NULL);
+    if (locale == NULL) exit(1);
+    for (i=0; i<2; ++i) {
+        len = strxfrm_l(NULL, src[i], 0, locale) + 1;
+        dst[i] = malloc(len);
+        if(dst[i] == NULL) exit(1);
+        r = strxfrm_l(dst[i], src[i], len, locale);
+        if(r >= len) exit(1);
+    }
+    if (strcmp(dst[0], dst[1]) >= 0) exit(1);
+    
+    exit(0);
+}
+],
+  [AC_MSG_RESULT(yes); AC_DEFINE(HAVE_XLOCALE_H, 1, [Have working xlocale.h])],
+  [AC_MSG_RESULT(no)]
+)
+
 dnl
 dnl Math detection
 dnl
diff -purN --exclude Makefile.in --exclude '*.m4' --exclude 'config.h.in*' --exclude configure --exclude '*config.h' --exclude '*.spec' --exclude NEWS libxslt-1.1.23/libxslt/extra.c libxslt-1.1.23-nik2/libxslt/extra.c
--- libxslt-1.1.23/libxslt/extra.c	2007-01-03 16:11:57.000000000 +0100
+++ libxslt-1.1.23-nik2/libxslt/extra.c	2008-05-14 17:47:43.000000000 +0200
@@ -15,7 +15,6 @@
 
 #include <string.h>
 #ifdef HAVE_TIME_H
-#define __USE_XOPEN
 #include <time.h>
 #endif
 #ifdef HAVE_STDLIB_H
diff -purN --exclude Makefile.in --exclude '*.m4' --exclude 'config.h.in*' --exclude configure --exclude '*config.h' --exclude '*.spec' --exclude NEWS libxslt-1.1.23/libxslt/Makefile.am libxslt-1.1.23-nik2/libxslt/Makefile.am
--- libxslt-1.1.23/libxslt/Makefile.am	2007-01-03 16:11:57.000000000 +0100
+++ libxslt-1.1.23-nik2/libxslt/Makefile.am	2008-05-14 17:47:43.000000000 +0200
@@ -24,11 +24,13 @@ xsltinc_HEADERS = 			\
 	security.h			\
 	xsltInternals.h			\
 	xsltconfig.h			\
-	xsltexports.h
+	xsltexports.h			\
+	xsltlocale.h
 
 libxslt_la_SOURCES = 			\
 	attrvt.c			\
 	xslt.c				\
+	xsltlocale.c			\
 	xsltutils.c			\
 	pattern.c			\
 	templates.c			\
diff -purN --exclude Makefile.in --exclude '*.m4' --exclude 'config.h.in*' --exclude configure --exclude '*config.h' --exclude '*.spec' --exclude NEWS libxslt-1.1.23/libxslt/namespaces.c libxslt-1.1.23-nik2/libxslt/namespaces.c
--- libxslt-1.1.23/libxslt/namespaces.c	2008-04-03 07:24:18.000000000 +0200
+++ libxslt-1.1.23-nik2/libxslt/namespaces.c	2008-05-14 17:47:43.000000000 +0200
@@ -554,7 +554,7 @@ xsltGetSpecialNamespace(xsltTransformCon
 	    *  the old way was use xmlSearchNsByHref() and to let change
 	    *  the prefix.
 	    */
-#if 0
+#if 1
 	    ns = xmlSearchNsByHref(target->doc, target, nsName);
 	    if (ns != NULL)
 		return(ns);
diff -purN --exclude Makefile.in --exclude '*.m4' --exclude 'config.h.in*' --exclude configure --exclude '*config.h' --exclude '*.spec' --exclude NEWS libxslt-1.1.23/libxslt/preproc.c libxslt-1.1.23-nik2/libxslt/preproc.c
--- libxslt-1.1.23/libxslt/preproc.c	2007-01-03 16:11:57.000000000 +0100
+++ libxslt-1.1.23-nik2/libxslt/preproc.c	2008-05-14 17:47:43.000000000 +0200
@@ -391,6 +391,8 @@ xsltFreeStylePreComp(xsltStylePreCompPtr
             break;
         case XSLT_FUNC_SORT: {
 		xsltStyleItemSortPtr item = (xsltStyleItemSortPtr) comp;
+		if (item->locale != NULL)
+		    xsltFreeLocale(item->locale);
 		if (item->comp != NULL)
 		    xmlXPathFreeCompExpr(item->comp);
 	    }
@@ -487,6 +489,8 @@ xsltFreeStylePreComp(xsltStylePreCompPtr
 	    break;
     }
 #else    
+    if (comp->locale != NULL)
+	xsltFreeLocale(comp->locale);
     if (comp->comp != NULL)
 	xmlXPathFreeCompExpr(comp->comp);
     if (comp->nsList != NULL)
@@ -728,6 +732,12 @@ xsltSortComp(xsltStylesheetPtr style, xm
     comp->lang = xsltEvalStaticAttrValueTemplate(style, inst,
 				 (const xmlChar *)"lang",
 				 NULL, &comp->has_lang);
+    if (comp->lang != NULL) {
+	comp->locale = xsltNewLocale(comp->lang);
+    }
+    else {
+        comp->locale = NULL;
+    }
 
     comp->select = xsltGetCNsProp(style, inst,(const xmlChar *)"select", XSLT_NAMESPACE);
     if (comp->select == NULL) {
diff -purN --exclude Makefile.in --exclude '*.m4' --exclude 'config.h.in*' --exclude configure --exclude '*config.h' --exclude '*.spec' --exclude NEWS libxslt-1.1.23/libxslt/transform.c libxslt-1.1.23-nik2/libxslt/transform.c
--- libxslt-1.1.23/libxslt/transform.c	2007-06-17 22:45:22.000000000 +0200
+++ libxslt-1.1.23-nik2/libxslt/transform.c	2008-05-14 17:47:43.000000000 +0200
@@ -1542,7 +1542,7 @@ xsltCopyTreeInternal(xsltTransformContex
 			    * Search by namespace name.
 			    * REVISIT TODO: Currently disabled.
 			    */
-#if 0
+#if 1
 			    ns = xmlSearchNsByHref(insert->doc,
 				insert, (*curns)->href);
 #endif
diff -purN --exclude Makefile.in --exclude '*.m4' --exclude 'config.h.in*' --exclude configure --exclude '*config.h' --exclude '*.spec' --exclude NEWS libxslt-1.1.23/libxslt/xsltInternals.h libxslt-1.1.23-nik2/libxslt/xsltInternals.h
--- libxslt-1.1.23/libxslt/xsltInternals.h	2008-01-24 16:23:15.000000000 +0100
+++ libxslt-1.1.23-nik2/libxslt/xsltInternals.h	2008-05-14 17:47:43.000000000 +0200
@@ -21,6 +21,7 @@
 #include <libxml/xmlstring.h>
 #include <libxslt/xslt.h>
 #include "xsltexports.h"
+#include "xsltlocale.h"
 #include "numbersInternals.h"
 
 #ifdef __cplusplus
@@ -1044,6 +1045,7 @@ struct _xsltStyleItemSort {
     int      descending;	/* sort */
     const xmlChar *lang;	/* sort */
     int      has_lang;		/* sort */
+    xsltLocale locale;		/* sort */
     const xmlChar *case_order;	/* sort */
     int      lower_first;	/* sort */
 
@@ -1380,6 +1382,7 @@ struct _xsltStylePreComp {
     int      descending;	/* sort */
     const xmlChar *lang;	/* sort */
     int      has_lang;		/* sort */
+    xsltLocale locale;		/* sort */
     const xmlChar *case_order;	/* sort */
     int      lower_first;	/* sort */
 
diff -purN --exclude Makefile.in --exclude '*.m4' --exclude 'config.h.in*' --exclude configure --exclude '*config.h' --exclude '*.spec' --exclude NEWS libxslt-1.1.23/libxslt/xsltlocale.c libxslt-1.1.23-nik2/libxslt/xsltlocale.c
--- libxslt-1.1.23/libxslt/xsltlocale.c	1970-01-01 01:00:00.000000000 +0100
+++ libxslt-1.1.23-nik2/libxslt/xsltlocale.c	2008-05-14 18:08:15.000000000 +0200
@@ -0,0 +1,450 @@
+/*
+ * xsltlocale.c: locale handling
+ *
+ * Reference:
+ * RFC 3066: Tags for the Identification of Languages
+ * http://www.ietf.org/rfc/rfc3066.txt
+ * ISO 639-1, ISO 3166-1
+ *
+ * Author: Nick Wellnhofer
+ */
+
+#define IN_LIBXSLT
+#include "libxslt.h"
+
+#include <ctype.h>
+#include <string.h>
+#include <libxml/xmlmemory.h>
+
+#include "xsltlocale.h"
+#include "xsltutils.h"
+
+#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 2
+#define newlocale __newlocale
+#define freelocale __freelocale
+#define strxfrm_l __strxfrm_l
+#define LC_COLLATE_MASK (1 << LC_COLLATE)
+#endif
+
+/**
+ * xsltNewLocale:
+ * @languageTag: RFC 3066 language tag
+ *
+ * Creates a new locale of an opaque system dependent type based on the
+ * language tag. Three-letter language codes (ISO 639-2 Alpha-3) are not
+ * supported.
+ *
+ * Returns the locale or NULL on error or if no matching locale was found
+ */
+xsltLocale
+xsltNewLocale(const xmlChar *languageTag) {
+#ifdef XSLT_LOCALE_XLOCALE
+    xsltLocale locale;
+    char localeName[23]; /* 8*lang + "-" + 8*region + ".utf8\0" */
+    const xmlChar *p = languageTag;
+    const char *region = NULL;
+    char *q = localeName;
+    int c, i, llen;
+    
+    /* Convert something like "pt-br" to "pt_BR.utf8" */
+    
+    if (languageTag == NULL)
+    	return(NULL);
+    
+    for (i=0; i<8 && isalpha(*p); ++i)
+	*q++ = tolower(*p++);
+    
+    if (i == 0)
+    	return(NULL);
+    
+    llen = i;
+    *q++ = '_';
+    
+    if (*p) {
+    	if (*p++ != '-')
+    	    return(NULL);
+	
+	for (i=0; i<8 && isalpha(*p); ++i)
+	    *q++ = toupper(*p++);
+    
+    	if (i == 0 || *p)
+    	    return(NULL);
+    	
+        memcpy(q, ".utf8", 6);
+        locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
+        if (locale != NULL)
+            return(locale);
+        
+        q = localeName + llen + 1;
+    }
+    
+    /* Try to find most common country for language */
+    
+    if (llen != 2)
+        return(NULL);
+
+    c = localeName[1];
+    
+    /* This is based on the locales from glibc 2.3.3 */
+    
+    switch (localeName[0]) {
+        case 'a':
+            if (c == 'a' || c == 'm') region = "ET";
+            else if (c == 'f') region = "ZA";
+            else if (c == 'n') region = "ES";
+            else if (c == 'r') region = "AE";
+            else if (c == 'z') region = "AZ";
+            break;
+        case 'b':
+            if (c == 'e') region = "BY";
+            else if (c == 'g') region = "BG";
+            else if (c == 'n') region = "BD";
+            else if (c == 'r') region = "FR";
+            else if (c == 's') region = "BA";
+            break;
+        case 'c':
+            if (c == 'a') region = "ES";
+            else if (c == 's') region = "CZ";
+            else if (c == 'y') region = "GB";
+            break;
+        case 'd':
+            if (c == 'a') region = "DK";
+            else if (c == 'e') region = "DE";
+            break;
+        case 'e':
+            if (c == 'l') region = "GR";
+            else if (c == 'n') region = "US";
+            else if (c == 's' || c == 'u') region = "ES";
+            else if (c == 't') region = "EE";
+            break;
+        case 'f':
+            if (c == 'a') region = "IR";
+            else if (c == 'i') region = "FI";
+            else if (c == 'o') region = "FO";
+            else if (c == 'r') region = "FR";
+            break;
+        case 'g':
+            if (c == 'a') region = "IE";
+            else if (c == 'l') region = "ES";
+            else if (c == 'v') region = "GB";
+            break;
+        case 'h':
+            if (c == 'e') region = "IL";
+            else if (c == 'i') region = "IN";
+            else if (c == 'r') region = "HT";
+            else if (c == 'u') region = "HU";
+            break;
+        case 'i':
+            if (c == 'd') region = "ID";
+            else if (c == 's') region = "IS";
+            else if (c == 't') region = "IT";
+            else if (c == 'w') region = "IL";
+            break;
+        case 'j':
+            if (c == 'a') region = "JP";
+            break;
+        case 'k':
+            if (c == 'l') region = "GL";
+            else if (c == 'o') region = "KR";
+            else if (c == 'w') region = "GB";
+            break;
+        case 'l':
+            if (c == 't') region = "LT";
+            else if (c == 'v') region = "LV";
+            break;
+        case 'm':
+            if (c == 'k') region = "MK";
+            else if (c == 'l' || c == 'r') region = "IN";
+            else if (c == 'n') region = "MN";
+            else if (c == 's') region = "MY";
+            else if (c == 't') region = "MT";
+            break;
+        case 'n':
+            if (c == 'b' || c == 'n' || c == 'o') region = "NO";
+            else if (c == 'e') region = "NP";
+            else if (c == 'l') region = "NL";
+            break;
+        case 'o':
+            if (c == 'm') region = "ET";
+            break;
+        case 'p':
+            if (c == 'a') region = "IN";
+            else if (c == 'l') region = "PL";
+            else if (c == 't') region = "PT";
+            break;
+        case 'r':
+            if (c == 'o') region = "RO";
+            else if (c == 'u') region = "RU";
+            break;
+        case 's':
+            switch (c) {
+                case 'e': region = "NO"; break;
+                case 'h': region = "YU"; break;
+                case 'k': region = "SK"; break;
+                case 'l': region = "SI"; break;
+                case 'o': region = "ET"; break;
+                case 'q': region = "AL"; break;
+                case 't': region = "ZA"; break;
+                case 'v': region = "SE"; break;
+            }
+            break;
+        case 't':
+            if (c == 'a' || c == 'e') region = "IN";
+            else if (c == 'h') region = "TH";
+            else if (c == 'i') region = "ER";
+            else if (c == 'r') region = "TR";
+            else if (c == 't') region = "RU";
+            break;
+        case 'u':
+            if (c == 'k') region = "UA";
+            else if (c == 'r') region = "PK";
+            break;
+        case 'v':
+            if (c == 'i') region = "VN";
+            break;
+        case 'w':
+            if (c == 'a') region = "BE";
+            break;
+        case 'x':
+            if (c == 'h') region = "ZA";
+            break;
+        case 'z':
+            if (c == 'h') region = "CN";
+            else if (c == 'u') region = "ZA";
+            break;
+    }
+    
+    if (region == NULL)
+        return(NULL);
+    
+    *q++ = region[0];
+    *q++ = region[1];
+    memcpy(q, ".utf8", 6);
+    locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
+    
+    return(locale);
+#endif
+
+#ifdef XSLT_LOCALE_MSVCRT
+    const char *localeName = NULL;
+    int c;
+    
+    /* We only look at the language and ignore the region. I think Windows
+       doesn't care about the region for LC_COLLATE anyway. */
+    
+    if (languageTag == NULL ||
+        !languageTag[0] ||
+        !languageTag[1] ||
+        languageTag[2] && languageTag[2] != '-')
+    	return(NULL);
+    
+    c = tolower(languageTag[1]);
+    
+    switch (tolower(languageTag[0])) {
+        case 'c':
+            if (c == 's') localeName = "csy"; /* Czech */
+            break;
+        case 'd':
+            if (c == 'a') localeName = "dan"; /* Danish */
+            else if (c == 'e') localeName = "deu"; /* German */
+            break;
+        case 'e':
+            if (c == 'l') localeName = "ell"; /* Greek */
+            else if (c == 'n') localeName = "english";
+            else if (c == 's') localeName = "esp"; /* Spanish */
+            break;
+        case 'f':
+            if (c == 'i') localeName = "fin"; /* Finnish */
+            else if (c == 'r') localeName = "fra"; /* French */
+            break;
+        case 'h':
+            if (c == 'u') localeName = "hun"; /* Hungarian */
+            break;
+        case 'i':
+            if (c == 's') localeName = "isl"; /* Icelandic */
+            else if (c == 't') localeName = "ita"; /* Italian */
+            break;
+        case 'j':
+            if (c == 'a') localeName = "jpn"; /* Japanese */
+            break;
+        case 'k':
+            if (c == 'o') localeName = "kor"; /* Korean */
+            break;
+        case 'n':
+            if (c == 'l') localeName = "nld"; /* Dutch */
+            else if (c == 'o') localeName = "norwegian";
+            break;
+        case 'p':
+            if (c == 'l') localeName = "plk"; /* Polish */
+            else if (c == 't') localeName = "ptg"; /* Portuguese */
+            break;
+        case 'r':
+            if (c == 'u') localeName = "rus"; /* Russian */
+            break;
+        case 's':
+            if (c == 'k') localeName = "sky"; /* Slovak */
+            else if (c == 'v') localeName = "sve"; /* Swedish */
+            break;
+        case 't':
+            if (c == 'r') localeName = "trk"; /* Turkish */
+            break;
+        case 'z':
+            if (c == 'h') localeName = "chinese";
+            break;
+    }
+    
+    if (localeName == NULL)
+        return(NULL);
+
+    return(_create_locale(LC_COLLATE, localeName));
+#endif
+
+#ifdef XSLT_LOCALE_NONE
+    return(NULL);
+#endif
+}
+
+/**
+ * xsltFreeLocale:
+ *
+ * Frees a locale created with xsltNewLocale
+ */
+void
+xsltFreeLocale(xsltLocale locale) {
+#ifdef XSLT_LOCALE_XLOCALE
+    freelocale(locale);
+#endif
+
+#ifdef XSLT_LOCALE_MSVCRT
+    _free_locale(locale);
+#endif
+}
+
+/**
+ * xsltStrxfrm:
+ * @locale: locale created with xsltNewLocale
+ * @string: UTF-8 string to transform
+ *
+ * Transforms a string according to locale. The transformed string must then be
+ * compared with xsltLocaleStrcmp and freed with xmlFree.
+ *
+ * Returns the transformed string or NULL on error
+ */
+xsltLocaleChar *
+xsltStrxfrm(xsltLocale locale, const xmlChar *string)
+{
+#ifdef XSLT_LOCALE_NONE
+    return(NULL);
+#else
+    size_t xstrlen, r;
+    xsltLocaleChar *xstr;
+    
+#ifdef XSLT_LOCALE_XLOCALE
+    xstrlen = strxfrm_l(NULL, (const char *)string, 0, locale) + 1;
+    xstr = (xsltLocaleChar *) xmlMalloc(xstrlen);
+    if (xstr == NULL) {
+	xsltTransformError(NULL, NULL, NULL,
+	    "xsltStrxfrm : out of memory error\n");
+	return(NULL);
+    }
+
+    r = strxfrm_l((char *)xstr, (const char *)string, xstrlen, locale);
+#endif
+
+#ifdef XSLT_LOCALE_MSVCRT
+    wchar_t *wcs;
+    wchar_t dummy;
+    int wcslen;
+    int i, j;
+    
+    /* convert UTF8 to Windows wide chars (UTF16) */
+    
+    wcslen = xmlUTF8Strlen(string);
+    if (wcslen < 0) {
+	xsltTransformError(NULL, NULL, NULL,
+	    "xsltStrxfrm : invalid UTF-8 string\n");
+        return(NULL);
+    }
+    wcs = (wchar_t *) xmlMalloc(sizeof(wchar_t) * (wcslen + 1));
+    if (wcs == NULL) {
+	xsltTransformError(NULL, NULL, NULL,
+	    "xsltStrxfrm : out of memory error\n");
+	return(NULL);
+    }
+
+    for (i=0, j=0; i<wcslen; ++i) {
+        int len = 4; /* not really, but string is already checked */
+        int c = xmlGetUTF8Char(string, &len);
+#if 0        
+        if (c < 0) {
+	    xsltTransformError(NULL, NULL, NULL,
+	        "xsltStrxfrm : invalid UTF-8 string\n");
+            xmlFree(wcs);
+            return(NULL);
+        }
+#endif
+
+        if (c == (wchar_t)c) {
+            wcs[j] = (wchar_t)c;
+            ++j;
+        }
+        
+        string += len;
+    }
+    
+    wcs[j] = 0;
+    
+    /* _wcsxfrm_l needs a dummy strDest because it always writes at least one
+       terminating zero wchar */
+    xstrlen = _wcsxfrm_l(&dummy, wcs, 0, locale);
+    if (xstrlen == INT_MAX) {
+	xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : strxfrm failed\n");
+        xmlFree(wcs);
+        return(NULL);
+    }
+    ++xstrlen;
+    xstr = (wchar_t *) xmlMalloc(sizeof(wchar_t) * xstrlen);
+    if (xstr == NULL) {
+	xsltTransformError(NULL, NULL, NULL,
+	    "xsltStrxfrm : out of memory error\n");
+        xmlFree(wcs);
+	return(NULL);
+    }
+
+    r = _wcsxfrm_l(xstr, wcs, xstrlen, locale);
+    
+    xmlFree(wcs);
+#endif /* XSLT_LOCALE_MSVCRT */
+    
+    if (r >= xstrlen) {
+	xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : strxfrm failed\n");
+        xmlFree(xstr);
+        return(NULL);
+    }
+
+    return(xstr);
+#endif /* XSLT_LOCALE_NONE */
+}
+
+/**
+ * xsltLocaleStrcmp:
+ * @str1: a string transformed with xsltStrxfrm
+ * @str2: a string transformed with xsltStrxfrm
+ *
+ * Compares two strings transformed with xsltStrxfrm
+ *
+ * Returns a value < 0 if str1 sorts before str2,
+ *         a value > 0 if str1 sorts after str2,
+ *         0 if str1 and str2 are equal wrt sorting
+ */
+int
+xsltLocaleStrcmp(const xsltLocaleChar *str1, const xsltLocaleChar *str2) {
+#ifdef XSLT_LOCALE_MSVCRT
+    if (str1 == str2) return(0);
+    if (str1 == NULL) return(-1);
+    if (str2 == NULL) return(1);
+    return(wcscmp(str1, str2));
+#else
+    return(xmlStrcmp(str1, str2));
+#endif
+}
diff -purN --exclude Makefile.in --exclude '*.m4' --exclude 'config.h.in*' --exclude configure --exclude '*config.h' --exclude '*.spec' --exclude NEWS libxslt-1.1.23/libxslt/xsltlocale.h libxslt-1.1.23-nik2/libxslt/xsltlocale.h
--- libxslt-1.1.23/libxslt/xsltlocale.h	1970-01-01 01:00:00.000000000 +0100
+++ libxslt-1.1.23-nik2/libxslt/xsltlocale.h	2008-05-14 18:06:45.000000000 +0200
@@ -0,0 +1,55 @@
+/*
+ * Summary: Locale handling
+ * Description: Interfaces for locale handling. Needed for language dependent
+ *              sorting.
+ *
+ * Copy: See Copyright for the status of this software.
+ *
+ * Author: Nick Wellnhofer
+ */
+
+#ifndef __XML_XSLTLOCALE_H__
+#define __XML_XSLTLOCALE_H__
+
+#include <libxml/xmlstring.h>
+
+#ifdef HAVE_XLOCALE_H
+
+#define XSLT_LOCALE_XLOCALE
+
+#include <locale.h>
+#include <xlocale.h>
+
+#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 2
+typedef __locale_t xsltLocale;
+#else
+typedef locale_t xsltLocale;
+#endif
+typedef xmlChar xsltLocaleChar;
+
+#else
+#if defined(_MSC_VER) || defined (__MINGW32__) && defined(__MSVCRT__)
+
+#define XSLT_LOCALE_MSVCRT
+
+#include <locale.h>
+
+typedef _locale_t xsltLocale;
+typedef wchar_t xsltLocaleChar;
+
+#else
+
+#define XSLT_LOCALE_NONE
+
+typedef void *xsltLocale;
+typedef xmlChar xsltLocaleChar;
+
+#endif
+#endif
+
+xsltLocale xsltNewLocale(const xmlChar *langName);
+void xsltFreeLocale(xsltLocale locale);
+xsltLocaleChar *xsltStrxfrm(xsltLocale locale, const xmlChar *string);
+int xsltLocaleStrcmp(const xsltLocaleChar *str1, const xsltLocaleChar *str2);
+
+#endif /* __XML_XSLTLOCALE_H__ */
diff -purN --exclude Makefile.in --exclude '*.m4' --exclude 'config.h.in*' --exclude configure --exclude '*config.h' --exclude '*.spec' --exclude NEWS libxslt-1.1.23/libxslt/xsltutils.c libxslt-1.1.23-nik2/libxslt/xsltutils.c
--- libxslt-1.1.23/libxslt/xsltutils.c	2008-04-03 09:26:50.000000000 +0200
+++ libxslt-1.1.23-nik2/libxslt/xsltutils.c	2008-05-14 17:46:55.000000000 +0200
@@ -1039,6 +1039,12 @@ xsltComputeSortResult(xsltTransformConte
 		}
 	    } else {
 		if (res->type == XPATH_STRING) {
+		    if (comp->locale != NULL) {
+			xmlChar *str = res->stringval;
+			res->stringval = (xmlChar *) xsltStrxfrm(comp->locale, str);
+			xmlFree(str);
+		    }
+
 		    results[i] = res;
 		} else {
 #ifdef WITH_XSLT_DEBUG_PROCESS
@@ -1191,6 +1197,10 @@ xsltDefaultSortFunction(xsltTransformCon
 				results[j + incr]->floatval)
 			    tst = 1;
 			else tst = -1;
+		    } else if(comp->locale != NULL) {
+			tst = xsltLocaleStrcmp(
+			    (xsltLocaleChar *) results[j]->stringval,
+			    (xsltLocaleChar *) results[j + incr]->stringval); 
 		    } else {
 			tst = xmlStrcmp(results[j]->stringval,
 				     results[j + incr]->stringval); 
@@ -1245,6 +1255,10 @@ xsltDefaultSortFunction(xsltTransformCon
 					res[j + incr]->floatval)
 				    tst = 1;
 				else tst = -1;
+			    } else if(comp->locale != NULL) {
+				tst = xsltLocaleStrcmp(
+				    (xsltLocaleChar *) res[j]->stringval,
+				    (xsltLocaleChar *) res[j + incr]->stringval); 
 			    } else {
 				tst = xmlStrcmp(res[j]->stringval,
 					     res[j + incr]->stringval); 
diff -purN --exclude Makefile.in --exclude '*.m4' --exclude 'config.h.in*' --exclude configure --exclude '*config.h' --exclude '*.spec' --exclude NEWS libxslt-1.1.23/tests/REC/test-10-2.xml libxslt-1.1.23-nik2/tests/REC/test-10-2.xml
--- libxslt-1.1.23/tests/REC/test-10-2.xml	1970-01-01 01:00:00.000000000 +0100
+++ libxslt-1.1.23-nik2/tests/REC/test-10-2.xml	2008-05-14 17:47:43.000000000 +0200
@@ -0,0 +1,30 @@
+<?xml version="1.0"?>
+<test>
+    <strings lang="de">
+        <string>z</string>
+        <string>Ã?y</string>
+        <string>ssz</string>
+        <string>ä</string>
+        <string>a</string>
+    </strings>
+    <strings lang="fr">
+        <string>côté</string>
+        <string>coté</string>
+        <string>côte</string>
+        <string>cote</string>
+    </strings>
+    <strings lang="sk">
+        <string>i</string>
+        <string>ch</string>
+        <string>h</string>
+        <string>ci</string>
+        <string>cg</string>
+    </strings>
+    <strings lang="sv">
+        <string>ä</string>
+        <string>z</string>
+        <string>vb</string>
+        <string>wa</string>
+        <string>a</string>
+    </strings>
+</test>
diff -purN --exclude Makefile.in --exclude '*.m4' --exclude 'config.h.in*' --exclude configure --exclude '*config.h' --exclude '*.spec' --exclude NEWS libxslt-1.1.23/tests/REC/test-10-2.xsl libxslt-1.1.23-nik2/tests/REC/test-10-2.xsl
--- libxslt-1.1.23/tests/REC/test-10-2.xsl	1970-01-01 01:00:00.000000000 +0100
+++ libxslt-1.1.23-nik2/tests/REC/test-10-2.xsl	2008-05-14 17:47:43.000000000 +0200
@@ -0,0 +1,49 @@
+<?xml version="1.0"?>
+
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform";>
+
+<xsl:template match="*">
+    <xsl:copy>
+        <xsl:apply-templates select="node()"/>
+    </xsl:copy>
+</xsl:template>
+
+<xsl:template match="strings">
+    <xsl:copy>
+        <xsl:copy-of select="@*"/>
+        <xsl:text>&#xa;</xsl:text>
+        <xsl:choose>
+            <xsl:when test="@lang='de'">
+                <xsl:apply-templates select="string">
+                    <xsl:sort lang="de"/>
+                </xsl:apply-templates>
+            </xsl:when>
+            <xsl:when test="@lang='fr'">
+                <xsl:apply-templates select="string">
+                    <xsl:sort lang="fr"/>
+                </xsl:apply-templates>
+            </xsl:when>
+            <xsl:when test="@lang='sk'">
+                <xsl:apply-templates select="string">
+                    <xsl:sort lang="sk"/>
+                </xsl:apply-templates>
+            </xsl:when>
+            <xsl:when test="@lang='sv'">
+                <xsl:apply-templates select="string">
+                    <xsl:sort lang="sv"/>
+                </xsl:apply-templates>
+            </xsl:when>
+        </xsl:choose>
+        <xsl:text>    </xsl:text>
+    </xsl:copy>
+</xsl:template>
+
+<xsl:template match="string">
+    <xsl:text>        </xsl:text>
+    <xsl:copy>
+        <xsl:value-of select="text()"/>
+    </xsl:copy>
+    <xsl:text>&#xa;</xsl:text>
+</xsl:template>
+
+</xsl:stylesheet>
diff -purN --exclude Makefile.in --exclude '*.m4' --exclude 'config.h.in*' --exclude configure --exclude '*config.h' --exclude '*.spec' --exclude NEWS libxslt-1.1.23/win32/Makefile.mingw libxslt-1.1.23-nik2/win32/Makefile.mingw
--- libxslt-1.1.23/win32/Makefile.mingw	2007-01-03 16:11:57.000000000 +0100
+++ libxslt-1.1.23-nik2/win32/Makefile.mingw	2008-05-14 17:47:43.000000000 +0200
@@ -87,6 +87,7 @@ XSLT_OBJS = $(XSLT_INTDIR)/attributes.o\
 	$(XSLT_INTDIR)/transform.o\
 	$(XSLT_INTDIR)/variables.o\
 	$(XSLT_INTDIR)/xslt.o\
+	$(XSLT_INTDIR)/xsltlocale.o\
 	$(XSLT_INTDIR)/xsltutils.o
 XSLT_SRCS = $(subst .o,.c,$(subst $(XSLT_INTDIR),$(XSLT_SRCDIR),$(XSLT_OBJS)))
 
@@ -107,6 +108,7 @@ XSLT_OBJS_A = $(XSLT_INTDIR_A)/attribute
 	$(XSLT_INTDIR_A)/transform.o\
 	$(XSLT_INTDIR_A)/variables.o\
 	$(XSLT_INTDIR_A)/xslt.o\
+	$(XSLT_INTDIR_A)/xsltlocale.o\
 	$(XSLT_INTDIR_A)/xsltutils.o
 
 # Libexslt object files.
diff -purN --exclude Makefile.in --exclude '*.m4' --exclude 'config.h.in*' --exclude configure --exclude '*config.h' --exclude '*.spec' --exclude NEWS libxslt-1.1.23/win32/Makefile.msvc libxslt-1.1.23-nik2/win32/Makefile.msvc
--- libxslt-1.1.23/win32/Makefile.msvc	2007-08-03 15:41:02.000000000 +0200
+++ libxslt-1.1.23-nik2/win32/Makefile.msvc	2008-05-14 17:47:43.000000000 +0200
@@ -53,6 +53,7 @@ CPPFLAGS = /nologo
 CC = cl.exe
 CFLAGS = /nologo /D "WIN32" /D "_WINDOWS" /D "_MBCS" /W3 $(CRUNTIME) /D "_REENTRANT"
 CFLAGS = $(CFLAGS) /I$(BASEDIR) /I$(XSLT_SRCDIR) /I$(INCPREFIX)
+CFLAGS = $(CFLAGS) /D_CRT_SECURE_NO_DEPRECATE /D_CRT_NONSTDC_NO_DEPRECATE
 
 # The linker and its options.
 LD = link.exe
@@ -90,6 +91,7 @@ XSLT_OBJS = $(XSLT_INTDIR)\attributes.ob
 	$(XSLT_INTDIR)\transform.obj\
 	$(XSLT_INTDIR)\variables.obj\
 	$(XSLT_INTDIR)\xslt.obj\
+	$(XSLT_INTDIR)\xsltlocale.obj\
 	$(XSLT_INTDIR)\xsltutils.obj\
 	$(XSLT_INTDIR)\attrvt.obj
 
@@ -110,6 +112,7 @@ XSLT_OBJS_A = $(XSLT_INTDIR_A)\attribute
 	$(XSLT_INTDIR_A)\transform.obj\
 	$(XSLT_INTDIR_A)\variables.obj\
 	$(XSLT_INTDIR_A)\xslt.obj\
+	$(XSLT_INTDIR_A)\xsltlocale.obj\
 	$(XSLT_INTDIR_A)\xsltutils.obj\
 	$(XSLT_INTDIR_A)\attrvt.obj
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]