[glib] Correct collation key generation on OS X



commit 525b33c05f37625864b41c03cb95ea6b822c1ede
Author: Carlos Sánchez de La Lama <csanchezdll gmail com>
Date:   Mon Jul 20 12:49:51 2015 +0200

    Correct collation key generation on OS X
    
    This ixes bug #673047.

 glib/gunicollate.c   |   38 +++++++++++++++++++++-----------------
 glib/tests/collate.c |   20 ++++++++++++++++++++
 2 files changed, 41 insertions(+), 17 deletions(-)
---
diff --git a/glib/gunicollate.c b/glib/gunicollate.c
index f725720..4fca634 100644
--- a/glib/gunicollate.c
+++ b/glib/gunicollate.c
@@ -215,26 +215,30 @@ collate_key_to_string (UCCollationValue *key,
                        gsize             key_len)
 {
   gchar *result;
-  gsize result_len = 0;
-  const gsize start = 2 * sizeof (void *) / sizeof (UCCollationValue);
-  gsize i;
+  gsize result_len;
+  long *lkey = (long *) key;
 
-  /* The first codes should be skipped: the same string on the same
-   * system can get different values at runtime in those positions,
-   * and they do not sort correctly.  The exact size of the prefix
-   * depends on whether we are building 64 or 32 bit.
+  /* UCCollationValue format:
+   *
+   * UCCollateOptions (32/64 bits)
+   * SizeInBytes      (32/64 bits)
+   * Value            (8 bits arrey)
+   *
+   * UCCollateOptions: ordering option mask of the collator
+   * used to create the key. Size changes on 32-bit / 64-bit
+   * hosts. On 64-bits also the extra half-word seems to have
+   * some extra (unknown) meaning.
+   * SizeInBytes: size of the whole structure, in bytes
+   * (including UCCollateOptions and SizeInBytes fields). Size
+   * changes on 32-bit & 64-bit hosts.
+   * Value: array of bytes containing the comparison weights.
+   * Seems to have several sub-strings separated by \001 and \002
+   * chars. Also, experience shows this is directly strcmp-able.
    */
-  if (key_len <= start)
-    return g_strdup ("");
-
-  for (i = start; i < key_len; i++)
-    result_len += utf8_encode (NULL, g_htonl (key[i] + 1));
 
+  result_len = lkey[1];
   result = g_malloc (result_len + 1);
-  result_len = 0;
-  for (i = start; i < key_len; i++)
-    result_len += utf8_encode (result + result_len, g_htonl (key[i] + 1));
-
+  memcpy (result, &lkey[2], result_len);
   result[result_len] = '\0';
 
   return result;
diff --git a/glib/tests/collate.c b/glib/tests/collate.c
index 24a913d..0c0e1bb 100644
--- a/glib/tests/collate.c
+++ b/glib/tests/collate.c
@@ -1,3 +1,5 @@
+#include "config.h"
+
 #include <glib.h>
 #include <locale.h>
 #include <stdlib.h>
@@ -231,6 +233,23 @@ const gchar *sorted2[] = {
 };
 
 const gchar *file_sorted2[] = {
+  /* Filename collation in OS X follows Finder style which gives
+   * a slightly different order from usual Linux locales. */
+#ifdef HAVE_CARBON
+  "a-.a",
+  "a.a",
+  "aa.a",
+  "file:foo",
+  "file0000",
+  "file000x",
+  "file1",
+  "file5",
+  "file10",
+  "file26",
+  "file0027",
+  "file027",
+  "file100",
+#else
   "a.a",
   "a-.a",
   "aa.a",
@@ -244,6 +263,7 @@ const gchar *file_sorted2[] = {
   "file0027",
   "file100",
   "file:foo",
+#endif
   NULL
 };
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]