[evolution-data-server/cursor-staging: 7/19] Added utility ECollator to libedataserver.

From: Tristan Van Berkom <tvb src gnome org>
To: commits-list gnome org
Cc:
Subject: [evolution-data-server/cursor-staging: 7/19] Added utility ECollator to libedataserver.
Date: Wed, 23 Oct 2013 19:06:24 +0000 (UTC)
commit 252a29f8d278f1f1f3bce7f2126d7c1aceecfc14
Author: Tristan Van Berkom <tristanvb openismus com>
Date:   Tue Apr 30 16:09:35 2013 +0900

    Added utility ECollator to libedataserver.
    
    The ECollator API wraps up the required ICU APIs into something more
    convenient for usage in EDS. The provided features allow for rich
    locale sensitive sorting as well as knowledge of the user's native
    alphabet.
    
    Some auxilary objects are needed to access features in ICU which are
    available only in C++:
    
        EAlphabetIndex
    
        A private accessory used internally by ECollator to implement AlphabeticIndex
        features. This is in a separate file since we need to use C++ APIs to use
        the AlphabeticIndex, so we compile it into a separate archive to be statically
        linked in with libedataserver.la.
    
        ETransliterator
    
        Added this glue to call icu::Transliterator C++ APIs on behalf of ECollator.

 configure.ac                                |   26 +-
 libedataserver/Makefile.am                  |   16 +-
 libedataserver/e-alphabet-index-private.cpp |  158 +++++++
 libedataserver/e-alphabet-index-private.h   |   62 +++
 libedataserver/e-collator.c                 |  650 +++++++++++++++++++++++++++
 libedataserver/e-collator.h                 |   96 ++++
 libedataserver/e-transliterator-private.cpp |   92 ++++
 libedataserver/e-transliterator-private.h   |   56 +++
 libedataserver/libedataserver.h             |    1 +
 9 files changed, 1139 insertions(+), 18 deletions(-)
---
diff --git a/configure.ac b/configure.ac
index 90bf762..44d8379 100644
--- a/configure.ac
+++ b/configure.ac
@@ -175,7 +175,6 @@ dnl *********************************************
 dnl Figure out early if we'll need a C++ compiler
 dnl *********************************************
 
-evo_with_cxx=no
 EVO_PHONENUMBER_ARGS
 
 dnl ******************************
@@ -214,22 +213,15 @@ dnl       -Wstrict-aliasing=2
 AM_CFLAGS="$WARNING_FLAGS -fno-strict-aliasing"
 AC_SUBST(AM_CFLAGS)
 
-if test "x$evo_with_cxx" = xyes; then
-       AC_PROG_CXX
+dnl C++ Compiler flags, needed for ICU C++ access and libphonenumber usage
+AC_PROG_CXX
 
-       AC_LANG_PUSH([C++])
-       AS_COMPILER_FLAGS(CXX_WARNING_FLAGS, [$proposed_cxx_warning_flags])
-       AC_SUBST(CXX_WARNING_FLAGS)
-       AM_CXXFLAGS="$CXX_WARNING_FLAGS"
-       AC_SUBST(AM_CXXFLAGS)
-       AC_LANG_POP([C++])
-else
-       dnl Autoconf requires those automake conditionals to be defined when
-       dnl generating the config files, but apparently it forgets to initialize
-       dnl them dnl if AC_PROG_CXX never gets called. Therefore we do it manually.
-       am__fastdepCXX_TRUE='#'
-       am__fastdepCXX_FALSE=
-fi
+AC_LANG_PUSH([C++])
+AS_COMPILER_FLAGS(CXX_WARNING_FLAGS, [$proposed_cxx_warning_flags])
+AC_SUBST(CXX_WARNING_FLAGS)
+AM_CXXFLAGS="$CXX_WARNING_FLAGS"
+AC_SUBST(AM_CXXFLAGS)
+AC_LANG_POP([C++])
 
 dnl Permits linking of C++ based libraries using the C linker if needed.
 AC_SUBST([predeps_CXX])
diff --git a/libedataserver/Makefile.am b/libedataserver/Makefile.am
index 0dc50c4..a016e6c 100644
--- a/libedataserver/Makefile.am
+++ b/libedataserver/Makefile.am
@@ -16,6 +16,7 @@ ENUM_GENERATED = e-source-enumtypes.h e-source-enumtypes.c
 BUILT_SOURCES = $(ENUM_GENERATED)
 
 lib_LTLIBRARIES = libedataserver-1.2.la
+noinst_LTLIBRARIES = libedataserver-private.la
 
 libedataserver_1_2_la_CPPFLAGS = \
        $(AM_CPPFLAGS)                                                  \
@@ -36,14 +37,17 @@ libedataserver_1_2_la_CPPFLAGS = \
        $(GIO_UNIX_CFLAGS)                                              \
        $(SOUP_CFLAGS)                                                  \
        $(CODE_COVERAGE_CFLAGS)                                         \
+       $(ICU_CFLAGS)                                                   \
        $(NULL)
 
 libedataserver_1_2_la_SOURCES =                \
        $(BUILT_SOURCES)                \
+       e-alphabet-index-private.h      \
        e-cancellable-locks.c           \
        e-categories.c                  \
        e-client.c                      \
        e-client-private.h              \
+       e-collator.c                    \
        e-credentials.c                 \
        e-flag.c                        \
        e-gdbus-templates.c             \
@@ -86,6 +90,7 @@ libedataserver_1_2_la_SOURCES =               \
        e-source-webdav.c               \
        e-debug-log.c                   \
        e-time-utils.c                  \
+       e-transliterator-private.h      \
        e-uid.c                         \
        e-url.c                         \
        e-data-server-util.c            \
@@ -102,7 +107,9 @@ libedataserver_1_2_la_LIBADD =                              \
        $(GIO_UNIX_LIBS)                                \
        $(ICONV_LIBS)                                   \
        $(SOCKET_LIBS)                                  \
-       $(SOUP_LIBS)
+       $(SOUP_LIBS)                                    \
+       $(ICU_LIBS)                                     \
+       $(NULL)
 
 libedataserver_1_2_la_LDFLAGS = \
        -version-info $(LIBEDATASERVER_CURRENT):$(LIBEDATASERVER_REVISION):$(LIBEDATASERVER_AGE) 
$(NO_UNDEFINED) \
@@ -116,6 +123,7 @@ libedataserverinclude_HEADERS =             \
        e-cancellable-locks.h           \
        e-categories.h                  \
        e-client.h                      \
+       e-collator.h                    \
        e-credentials.h                 \
        e-flag.h                        \
        e-gdbus-templates.h             \
@@ -167,6 +175,12 @@ libedataserverinclude_HEADERS =            \
        e-xml-hash-utils.h              \
        eds-version.h
 
+# We put the C++ code into a separate static library, so that we can use
+# the C linker for libebook-contacts.
+libedataserver_private_la_SOURCES = e-alphabet-index-private.cpp e-transliterator-private.cpp
+libedataserver_private_la_CPPFLAGS = $(libedataserver_1_2_la_CPPFLAGS)
+libedataserver_1_2_la_LIBADD += @predeps_CXX@ libedataserver-private.la @postdeps_CXX@
+
 %-$(API_VERSION).pc: %.pc
         cp $< $@
 
diff --git a/libedataserver/e-alphabet-index-private.cpp b/libedataserver/e-alphabet-index-private.cpp
new file mode 100644
index 0000000..8eb2056
--- /dev/null
+++ b/libedataserver/e-alphabet-index-private.cpp
@@ -0,0 +1,158 @@
+/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/*
+ * Copyright (C) 2013 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU Lesser General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ * Author: Tristan Van Berkom <tristanvb openismus com>
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "e-alphabet-index-private.h"
+
+/* C++ standard library */
+#include <string>
+#include <memory>
+
+/* system headers */
+#include <langinfo.h>
+#include <locale.h>
+
+/* ICU headers */
+#include <unicode/alphaindex.h>
+
+using icu::AlphabeticIndex;
+using icu::Locale;
+
+struct _EAlphabetIndex {
+       AlphabeticIndex *priv;
+};
+
+/* Create an AlphabetIndex for a given language code (normally
+ * language codes are 2 letter codes, eg. 'en' = English 'es' = Spanish.
+ */
+EAlphabetIndex *
+_e_alphabet_index_cxx_new_for_language (const gchar *language)
+{
+       UErrorCode status = U_ZERO_ERROR;
+       EAlphabetIndex *alphabet_index;
+
+       g_return_val_if_fail (language != NULL, NULL);
+
+       alphabet_index = g_slice_new (EAlphabetIndex);
+       alphabet_index->priv = new AlphabeticIndex (Locale (language), status);
+
+       return alphabet_index;
+}
+
+/* Frees an EAlphabetIndex and it's associated resources
+ */
+void
+_e_alphabet_index_cxx_free (EAlphabetIndex *alphabet_index)
+{
+       if (alphabet_index) {
+               delete alphabet_index->priv;
+               g_slice_free (EAlphabetIndex, alphabet_index);
+       }
+}
+
+/* Fetch the given index where 'word' should sort
+ */
+gint
+_e_alphabet_index_cxx_get_index (EAlphabetIndex  *alphabet_index,
+                                const gchar     *word)
+{
+       UErrorCode status = U_ZERO_ERROR;
+       UnicodeString string;
+       gint index;
+
+       g_return_val_if_fail (alphabet_index != NULL, -1);
+       g_return_val_if_fail (word != NULL, -1);
+
+       string = icu::UnicodeString::fromUTF8 (word);
+       index = alphabet_index->priv->getBucketIndex (string, status);
+
+       return index;
+}
+
+/* Fetch the list of labels in the alphabetic index.
+ *
+ * Returns an array of UTF-8 labels for each alphabetic
+ * index position 'n_labels' long, the returned array
+ * of strings can be freed with g_strfreev()
+ *
+ * The underflow, overflow and inflow parameters will be
+ * set to the appropriate indexes (reffers to indexes in the
+ * returned labels).
+ */
+gchar **
+_e_alphabet_index_cxx_get_labels (EAlphabetIndex  *alphabet_index,
+                                 gint            *n_labels,
+                                 gint            *underflow,
+                                 gint            *inflow,
+                                 gint            *overflow)
+{
+       UErrorCode status = U_ZERO_ERROR;
+       gchar **labels = NULL;
+       gint count, i;
+
+       g_return_val_if_fail (alphabet_index != NULL, NULL);
+       g_return_val_if_fail (n_labels != NULL, NULL);
+       g_return_val_if_fail (underflow != NULL, NULL);
+       g_return_val_if_fail (inflow != NULL, NULL);
+       g_return_val_if_fail (overflow != NULL, NULL);
+
+       count = alphabet_index->priv->getBucketCount (status);
+
+       labels = g_new0 (gchar *, count + 1);
+
+       /* In case they are missing, they should be set to -1 */
+       *underflow = *inflow = *overflow = -1;
+
+       /* Iterate over the AlphabeticIndex and collect UTF-8 versions
+        * of the bucket labels
+        */
+       alphabet_index->priv->resetBucketIterator (status);
+
+       for (i = 0; alphabet_index->priv->nextBucket (status); i++) {
+               UAlphabeticIndexLabelType label_type;
+               UnicodeString ustring;
+               std::string string;
+
+               label_type = alphabet_index->priv->getBucketLabelType ();
+
+               switch (label_type) {
+               case U_ALPHAINDEX_UNDERFLOW: *underflow = i; break;
+               case U_ALPHAINDEX_INFLOW:    *inflow    = i; break;
+               case U_ALPHAINDEX_OVERFLOW:  *overflow  = i; break;
+               case U_ALPHAINDEX_NORMAL:  /* do nothing */  break;
+               }
+
+               /* This is annoyingly heavy but not a function called
+                * very often, this could be improved by calling icu::UnicodeString::toUTF8()
+                * and implementing ICU's ByteSync class using glib's memory allocator.
+                */
+               ustring   = alphabet_index->priv->getBucketLabel ();
+               string    = ustring.toUTF8String (string);
+               labels[i] = g_strdup (string.c_str());
+       }
+
+       *n_labels = count;
+
+       return labels;
+}
diff --git a/libedataserver/e-alphabet-index-private.h b/libedataserver/e-alphabet-index-private.h
new file mode 100644
index 0000000..4e01f24
--- /dev/null
+++ b/libedataserver/e-alphabet-index-private.h
@@ -0,0 +1,62 @@
+/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/*
+ * Copyright (C) 2013 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU Lesser General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ * Author: Tristan Van Berkom <tristanvb openismus com>
+ */
+
+#if !defined (__LIBEDATASERVER_H_INSIDE__) && !defined (LIBEDATASERVER_COMPILATION)
+#error "Only <libedataserver/libedataserver.h> should be included directly."
+#endif
+
+#ifndef E_ALPHABET_INDEX_PRIVATE_H
+#define E_ALPHABET_INDEX_PRIVATE_H
+
+#include <glib-object.h>
+
+G_BEGIN_DECLS
+
+#if __GNUC__ >= 4
+#  define E_ALPHABET_INDEX_LOCAL __attribute__ ((visibility ("hidden")))
+#else
+#  define E_ALPHABET_INDEX_LOCAL
+#endif
+
+/**
+ * EAlphabetIndex:
+ *
+ * A private opaque type describing an alphabetic index
+ *
+ * Since: 3.10
+ **/
+typedef struct _EAlphabetIndex EAlphabetIndex;
+
+/* defined in e-alphabet-index-private.cpp, and used by by e-collator.c */
+
+E_ALPHABET_INDEX_LOCAL EAlphabetIndex *_e_alphabet_index_cxx_new_for_language (const gchar     *language);
+E_ALPHABET_INDEX_LOCAL void            _e_alphabet_index_cxx_free             (EAlphabetIndex  
*alphabet_index);
+E_ALPHABET_INDEX_LOCAL gint            _e_alphabet_index_cxx_get_index        (EAlphabetIndex  
*alphabet_index,
+                                                                              const gchar     *word);
+E_ALPHABET_INDEX_LOCAL gchar         **_e_alphabet_index_cxx_get_labels       (EAlphabetIndex  
*alphabet_index,
+                                                                              gint            *n_labels,
+                                                                              gint            *underflow,
+                                                                              gint            *inflow,
+                                                                              gint            *overflow);
+
+G_END_DECLS
+
+#endif /* E_ALPHABET_INDEX_PRIVATE_H */
diff --git a/libedataserver/e-collator.c b/libedataserver/e-collator.c
new file mode 100644
index 0000000..6885e5b
--- /dev/null
+++ b/libedataserver/e-collator.c
@@ -0,0 +1,650 @@
+/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/*
+ * Copyright (C) 2013 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU Lesser General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ * Author: Tristan Van Berkom <tristanvb openismus com>
+ */
+
+/**
+ * SECTION: e-collator
+ * @include: libedataserver/libedataserver.h
+ * @short_description: Collation services for locale sensitive sorting
+ *
+ * The #ECollator is a wrapper object around ICU collation services and
+ * provides features to sort words in locale specific ways. The collator
+ * also provides some API for determining features of the active alphabet
+ * in the user's locale, and which words should be sorted under which
+ * letter in the user's alphabet.
+ */
+
+#ifdef HAVE_CONFIG_H
+#  include <config.h>
+#endif
+
+#include <stdio.h>
+#include <string.h>
+
+/* ICU includes */
+#include <unicode/uclean.h>
+#include <unicode/ucol.h>
+#include <unicode/ustring.h>
+
+#include "e-collator.h"
+#include "e-alphabet-index-private.h"
+#include "e-transliterator-private.h"
+
+#define CONVERT_BUFFER_LEN        512
+#define COLLATION_KEY_BUFFER_LEN  1024
+#define LOCALE_BUFFER_LEN         256
+
+#define ENABLE_DEBUGGING 0
+
+G_DEFINE_QUARK (e-collator-error-quark, e_collator_error)
+
+G_DEFINE_BOXED_TYPE (ECollator,
+                    e_collator,
+                    e_collator_ref, 
+                    e_collator_unref)
+
+struct _ECollator
+{
+       UCollator       *coll;
+       volatile gint    ref_count;
+
+       EAlphabetIndex  *alpha_index;
+       gchar          **labels;
+       gint             n_labels;
+       gint             underflow;
+       gint             inflow;
+       gint             overflow;
+
+       ETransliterator *transliterator;
+};
+
+/*****************************************************
+ *                ICU Helper Functions               *
+ *****************************************************/
+#if ENABLE_DEBUGGING
+static void
+print_available_locales (void)
+{
+       UErrorCode status = U_ZERO_ERROR;
+       UChar result[100];
+       gchar printable[100 * 4];
+       gint count, i;
+
+       u_init (&status);
+
+       g_printerr ("List of available locales (default locale is: %s)\n", uloc_getDefault());
+
+       count = uloc_countAvailable();
+       for (i = 0; i < count; i++) {
+               UEnumeration *keywords;
+               const gchar *keyword;
+
+               uloc_getDisplayName(uloc_getAvailable(i), NULL, result, 100, &status);
+
+               u_austrcpy (printable, result);
+
+               /* print result */
+               g_printerr ("\t%s - %s", uloc_getAvailable(i), printable);
+
+               keywords = uloc_openKeywords (uloc_getAvailable(i), &status);
+               if (keywords) {
+                       UErrorCode kstatus = U_ZERO_ERROR;
+
+                       g_printerr ("[");
+
+                       while ((keyword = uenum_next (keywords, NULL, &kstatus)) != NULL)
+                               g_printerr (" %s ", keyword);
+
+                       g_printerr ("]");
+
+                       uenum_close (keywords);
+               }
+               g_printerr ("\n");
+       }
+}
+#endif
+
+static gchar *
+canonicalize_locale (const gchar  *posix_locale,
+                    gchar       **language_code,
+                    GError      **error)
+{
+       UErrorCode status = U_ZERO_ERROR;
+       gchar  locale_buffer[LOCALE_BUFFER_LEN];
+       gchar  language_buffer[8];
+       gchar *icu_locale;
+       gchar *final_locale;
+       gint   len;
+       const gchar *collation_type = NULL;
+
+       len = uloc_canonicalize (posix_locale, locale_buffer, LOCALE_BUFFER_LEN, &status);
+
+       if (U_FAILURE (status)) {
+               g_set_error (error, E_COLLATOR_ERROR,
+                            E_COLLATOR_ERROR_INVALID_LOCALE,
+                            "Failed to interpret locale '%s' (%s)",
+                            posix_locale,
+                            u_errorName (status));
+               return NULL;
+       }
+
+       if (len > LOCALE_BUFFER_LEN) {
+               icu_locale = g_malloc (len);
+
+               uloc_canonicalize (posix_locale, icu_locale, len, &status);
+       } else {
+               icu_locale = g_strndup (locale_buffer, len);
+       }
+
+       status = U_ZERO_ERROR;
+       len = uloc_getLanguage (icu_locale, language_buffer, 8, &status);
+
+       if (U_FAILURE (status)) {
+               g_set_error (error, E_COLLATOR_ERROR,
+                            E_COLLATOR_ERROR_INVALID_LOCALE,
+                            "Failed to interpret language for locale '%s': %s",
+                            icu_locale,
+                            u_errorName (status));
+               g_free (icu_locale);
+               return NULL;
+       }
+
+       /* Add 'phonebook' tailoring to certain locales */
+       if (len < 8 &&
+           (strcmp (language_buffer, "de") == 0 ||
+            strcmp (language_buffer, "fi") == 0)) {
+
+               collation_type = "phonebook";
+       }
+
+       if (collation_type != NULL)
+               final_locale = g_strconcat (icu_locale, "@collation=", collation_type, NULL);
+       else {
+               final_locale = icu_locale;
+               icu_locale = NULL;
+       }
+
+       g_free (icu_locale);
+
+       if (language_code)
+               *language_code = g_strdup (language_buffer);
+
+       return final_locale;
+}
+
+/* All purpose character encoding function, encodes text
+ * to a UChar from UTF-8 and first ensures that the string
+ * is valid UTF-8
+ */
+static const UChar *
+convert_to_ustring (const gchar  *string,
+                   UChar        *buffer,
+                   gint          buffer_len,
+                   gint         *result_len,
+                   UChar       **free_me,
+                   GError      **error)
+{
+       UErrorCode status = U_ZERO_ERROR;
+       const gchar *source_utf8;
+       gchar *alloc_utf8 = NULL;
+       gint   converted_len = 0;
+       UChar *converted_buffer;
+
+       /* First make sure we're dealing with utf8 */
+       if (g_utf8_validate (string, -1, NULL))
+               source_utf8 = string;
+       else {
+               alloc_utf8 = e_util_utf8_make_valid (string);
+               source_utf8 = alloc_utf8;
+       }
+
+       /* First pass, try converting to UChar in the given buffer */
+       converted_buffer = u_strFromUTF8Lenient (buffer,
+                                                buffer_len,
+                                                &converted_len,
+                                                source_utf8,
+                                                -1,
+                                                &status);
+
+       /* Set the result length right away... */
+       *result_len = converted_len;
+
+       if (U_FAILURE (status)) {
+               converted_buffer = NULL;
+               goto out;
+       }
+
+       /* Second pass, allocate a buffer big enough and then convert */
+       if (converted_len > buffer_len) {
+               *free_me = g_new (UChar, converted_len);
+
+               converted_buffer = u_strFromUTF8Lenient (*free_me,
+                                                        converted_len,
+                                                        NULL,
+                                                        source_utf8,
+                                                        -1,
+                                                        &status);
+
+               if (U_FAILURE (status)) {
+                       g_free (*free_me);
+                       *free_me = NULL;
+                       converted_buffer = NULL;
+                       goto out;
+               }
+       }
+
+ out:
+       g_free (alloc_utf8);
+
+       if (U_FAILURE (status))
+               g_set_error (error, E_COLLATOR_ERROR,
+                            E_COLLATOR_ERROR_CONVERSION,
+                            "Error occured while converting character encoding (%s)",
+                            u_errorName (status));
+
+       return converted_buffer;
+}
+
+/*****************************************************
+ *                        API                        *
+ *****************************************************/
+
+/**
+ * e_collator_new:
+ * @locale: The locale under which to sort
+ * @error: (allow none): A location to store a #GError from the #E_COLLATOR_ERROR domain
+ *
+ * Creates a new #ECollator for the given @locale,
+ * the returned collator should be freed with e_collator_unref().
+ *
+ * Returns: (transfer full): A newly created #ECollator.
+ *
+ * Since: 3.12
+ */
+ECollator *
+e_collator_new (const gchar     *locale,
+               GError         **error)
+{
+       ECollator *collator;
+       UCollator *coll;
+       UErrorCode status = U_ZERO_ERROR;
+       gchar     *icu_locale;
+       gchar     *language_code = NULL;
+
+       g_return_val_if_fail (locale && locale[0], NULL);
+
+#if ENABLE_DEBUGGING
+       print_available_locales ();
+#endif
+
+       icu_locale = canonicalize_locale (locale, &language_code, error);
+       if (!icu_locale)
+               return NULL;
+
+       coll = ucol_open (icu_locale, &status);
+
+       if (U_FAILURE (status)) {
+               g_set_error (error, E_COLLATOR_ERROR,
+                            E_COLLATOR_ERROR_OPEN,
+                            "Unable to open collator for locale '%s' (%s)",
+                            icu_locale,
+                            u_errorName (status));
+
+               g_free (language_code);
+               g_free (icu_locale);
+               ucol_close (coll);
+               return NULL;
+       }
+
+       g_free (icu_locale);
+
+       ucol_setStrength (coll, UCOL_DEFAULT_STRENGTH);
+
+       collator = g_slice_new0 (ECollator);
+       collator->coll = coll;
+       collator->ref_count = 1;
+
+       /* In Chinese we use transliteration services to sort latin 
+        * names interleaved with Chinese names in a latin AlphabeticIndex
+        */
+       if (g_strcmp0 (language_code, "zh") == 0)
+               collator->transliterator = _e_transliterator_cxx_new ("Han-Latin");
+
+       collator->alpha_index = _e_alphabet_index_cxx_new_for_language (language_code);
+       collator->labels = _e_alphabet_index_cxx_get_labels (collator->alpha_index,
+                                                            &collator->n_labels,
+                                                            &collator->underflow,
+                                                            &collator->inflow,
+                                                            &collator->overflow);
+
+       g_free (language_code);
+
+       return collator;
+}
+
+/**
+ * e_collator_ref:
+ * @collator: An #ECollator
+ *
+ * Increases the reference count of @collator.
+ *
+ * Returns: (transfer full): @collator
+ *
+ * Since: 3.12
+ */
+ECollator *
+e_collator_ref (ECollator *collator)
+{
+       g_return_val_if_fail (collator != NULL, NULL);
+
+       g_atomic_int_inc (&collator->ref_count);
+
+       return collator;
+}
+
+/**
+ * e_collator_unref:
+ * @collator: An #ECollator
+ *
+ * Decreases the reference count of @collator.
+ * If the reference count reaches 0 then the collator is freed
+ *
+ * Since: 3.12
+ */
+void
+e_collator_unref (ECollator *collator)
+{
+       g_return_if_fail (collator != NULL);
+
+       if (g_atomic_int_dec_and_test (&collator->ref_count)) {
+
+               if (collator->coll)
+                       ucol_close (collator->coll);
+
+               _e_alphabet_index_cxx_free (collator->alpha_index);
+               g_strfreev (collator->labels);
+
+               /* The transliterator is only used for specialized sorting in some locales,
+                * notably Chinese locales
+                */
+               if (collator->transliterator)
+                       _e_transliterator_cxx_free (collator->transliterator);
+
+               g_slice_free (ECollator, collator);
+       }
+}
+
+/**
+ * e_collator_generate_key:
+ * @collator: An #ECollator
+ * @str: The string to generate a collation key for
+ * @error: (allow none): A location to store a #GError from the #E_COLLATOR_ERROR domain
+ *
+ * Generates a collation key for @str, the result of comparing
+ * two collation keys with strcmp() will be the same result
+ * of calling e_collator_collate() on the same original strings.
+ *
+ * This function will first ensure that @str is valid UTF-8 encoded.
+ *
+ * Returns: (transfer full): A collation key for @str, or %NULL on failure with @error set.
+ *
+ * Since: 3.12
+ */
+gchar *
+e_collator_generate_key (ECollator    *collator,
+                        const gchar  *str,
+                        GError      **error)
+{
+       UChar  source_buffer[CONVERT_BUFFER_LEN];
+       UChar *free_me = NULL;
+       const UChar *source;
+       gchar stack_buffer[COLLATION_KEY_BUFFER_LEN];
+       gchar *collation_key;
+       gint key_len, source_len = 0;
+       gint alphabet_index;
+       gchar *translit_str = NULL;
+       const gchar *input_str;
+
+       g_return_val_if_fail (collator != NULL, NULL);
+       g_return_val_if_fail (str != NULL, NULL);
+
+       /* We may need to perform a conversion before generating the sort key */
+       if (collator->transliterator) {
+               translit_str = _e_transliterator_cxx_transliterate (collator->transliterator, str);
+               input_str = translit_str;
+       } else {
+               input_str = str;
+       }
+
+       source = convert_to_ustring (input_str,
+                                    source_buffer,
+                                    CONVERT_BUFFER_LEN,
+                                    &source_len,
+                                    &free_me,
+                                    error);
+
+       if (!source) {
+               g_free (translit_str);
+               return NULL;
+       }
+
+       /* Get the numerical index for this string */
+       alphabet_index = _e_alphabet_index_cxx_get_index (collator->alpha_index, input_str);
+
+       /* First try to generate a key in a predefined buffer size */
+       key_len = ucol_getSortKey (collator->coll, source, source_len,
+                                  (guchar *)stack_buffer, COLLATION_KEY_BUFFER_LEN);
+
+       if (key_len > COLLATION_KEY_BUFFER_LEN) {
+
+               /* Stack buffer wasn't large enough, regenerate into a new buffer
+                * (add a byte for a trailing NULL char)
+                *
+                * Note we allocate 4 extra chars to hold the prefixed alphabetic
+                * index into the first 4 charachters (the 5th extra char is the trailing
+                * null character).
+                */
+               collation_key = g_malloc (key_len + 5);
+
+               /* Format the alphabetic index into the first 4 chars */
+               snprintf (collation_key, 4, "%03d-", alphabet_index);
+
+               /* Get the sort key and put it in &collation_key[4] */
+               ucol_getSortKey (collator->coll, source, source_len,
+                                (guchar *)(collation_key + 4), key_len);
+
+               /* Just being paranoid, make sure we're null terminated since the API
+                * doesn't specify if the result length is null character inclusive
+                */
+               collation_key[key_len + 4] = '\0';
+       } else {
+               GString *string = g_string_new (NULL);
+
+               /* Format the alphabetic index into the first 4 chars */
+               g_string_append_printf (string, "%03d-", alphabet_index);
+
+               /* Insert the rest of the sort key from the stack buffer into the allocated buffer */
+               g_string_insert_len (string, 4, stack_buffer, key_len);
+
+               collation_key = g_string_free (string, FALSE);
+       }
+
+       g_free (free_me);
+       g_free (translit_str);
+
+       return (gchar *)collation_key;
+}
+
+/**
+ * e_collator_generate_key_for_index:
+ * @collator: An #ECollator
+ * @index: An index into the alphabetic labels
+ *
+ * Generates a sort key for the given alphabetic @index.
+ *
+ * The generated sort key is guaranteed to sort below
+ * any sort keys for words beginning with any variant of
+ * the given letter.
+ *
+ * For instance, a sort key generated for the index 5 of
+ * a latin alphabet, where the fifth index is 'E' will sort
+ * below any sort keys generated for words starting with
+ * the characters 'e', 'E', 'é', 'É', 'è' or 'È'. It will also
+ * sort above any sort keys generated for words starting with
+ * the characters 'd' or 'D'.
+ *
+ * Returns: (transfer full): A sort key for the given index
+ *
+ * Since: 3.12
+ */
+gchar *
+e_collator_generate_key_for_index (ECollator       *collator,
+                                  gint             index)
+{
+       g_return_val_if_fail (collator != NULL, NULL);
+       g_return_val_if_fail (index >= 0 && index < collator->n_labels, NULL);
+
+       return g_strdup_printf ("%03d", index);
+}
+
+/**
+ * e_collator_collate:
+ * @collator: An #ECollator
+ * @str_a: A string to compare
+ * @str_b: The string to compare with @str_a
+ * @result: (out): A location to store the comparison result
+ * @error: (allow none): A location to store a #GError from the #E_COLLATOR_ERROR domain
+ *
+ * Compares @str_a with @str_b, the order of strings is determined by the parameters of @collator.
+ *
+ * The @result will be set to integer less than, equal to, or greater than zero if @str_a is found,
+ * respectively, to be less than, to match, or be greater than @str_b.
+ *
+ * This function will first ensure that both strings are valid UTF-8.
+ *
+ * Returns: %TRUE on success, otherwise if %FALSE is returned then @error will be set.
+ *
+ * Since: 3.12
+ */
+gboolean
+e_collator_collate (ECollator    *collator,
+                   const gchar  *str_a,
+                   const gchar  *str_b,
+                   gint         *result,
+                   GError      **error)
+{
+       gchar *sort_key_a, *sort_key_b;
+
+       g_return_val_if_fail (collator != NULL, -1);
+       g_return_val_if_fail (str_a != NULL, -1);
+       g_return_val_if_fail (str_b != NULL, -1);
+       g_return_val_if_fail (result != NULL, -1);
+
+       sort_key_a = e_collator_generate_key (collator, str_a, error);
+       if (!sort_key_a)
+               return FALSE;
+
+       sort_key_b = e_collator_generate_key (collator, str_b, error);
+       if (!sort_key_b) {
+               g_free (sort_key_a);
+               return FALSE;
+       }
+
+       *result = strcmp (sort_key_a, sort_key_b);
+
+       g_free (sort_key_a);
+       g_free (sort_key_b);
+
+       return TRUE;
+}
+
+/**
+ * e_collator_get_index_labels:
+ * @collator: An #ECollator
+ * @n_labels: (out): The number of labels/indexes available for @collator
+ * @underflow: (allow-none) (out): The underflow index, for any words which sort below the active alphabet(s)
+ * @inflow: (allow-none) (out): The inflow index, for any words which sort between the active alphabets (if 
there is more than one)
+ * @overflow: (allow-none) (out): The overflow index, for any words which sort above the active alphabet(s)
+ *
+ * Fetches the displayable labels and index positions for the active alphabet.
+ *
+ * Returns: (array zero-terminated=1) (element-type utf8) (transfer none):
+ *   The array of displayable labels for each index in the active alphabet(s).
+ *
+ * Since: 3.12
+ */
+const gchar *const  *
+e_collator_get_index_labels (ECollator       *collator,
+                            gint            *n_labels,
+                            gint            *underflow,
+                            gint            *inflow,
+                            gint            *overflow)
+{
+       g_return_val_if_fail (collator != NULL, NULL);
+
+       if (n_labels)
+               *n_labels = collator->n_labels;
+       if (underflow)
+               *underflow = collator->underflow;
+       if (inflow)
+               *inflow = collator->inflow;
+       if (overflow)
+               *overflow = collator->overflow;
+
+       return (const gchar *const  *)collator->labels;
+}
+
+/**
+ * e_collator_get_index:
+ * @collator: An #ECollator
+ * @str: A string
+ *
+ * Checks which index, as determined by e_collator_get_index_labels(),
+ * that @str should sort under.
+ *
+ * Returns: The alphabetic index under which @str would sort
+ *
+ * Since: 3.12
+ */
+gint
+e_collator_get_index (ECollator       *collator,
+                     const gchar     *str)
+{
+       gint index;
+       gchar *translit_str = NULL;
+       const gchar *input_str;
+
+       g_return_val_if_fail (collator != NULL, -1);
+       g_return_val_if_fail (str != NULL, -1);
+
+       /* We may need to perform a conversion before generating the sort key */
+       if (collator->transliterator) {
+               translit_str = _e_transliterator_cxx_transliterate (collator->transliterator, str);
+               input_str = translit_str;
+       } else {
+               input_str = str;
+       }
+
+       index = _e_alphabet_index_cxx_get_index (collator->alpha_index, input_str);
+
+       g_free (translit_str);
+
+       return index;
+}
diff --git a/libedataserver/e-collator.h b/libedataserver/e-collator.h
new file mode 100644
index 0000000..933c6d1
--- /dev/null
+++ b/libedataserver/e-collator.h
@@ -0,0 +1,96 @@
+/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/*
+ * Copyright (C) 2013 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU Lesser General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ * Author: Tristan Van Berkom <tristanvb openismus com>
+ */
+#if !defined (__LIBEDATASERVER_H_INSIDE__) && !defined (LIBEDATASERVER_COMPILATION)
+#error "Only <libedataserver/libedataserver.h> should be included directly."
+#endif
+
+#include <glib.h>
+#include <libedataserver/e-source-enumtypes.h>
+#include <libedataserver/e-data-server-util.h>
+
+#ifndef E_COLLATOR_H
+#define E_COLLATOR_H
+
+/**
+ * E_COLLATOR_ERROR:
+ *
+ * An error domain for collation errors
+ *
+ * Since: 3.12
+ */
+#define E_COLLATOR_ERROR (e_collator_error_quark ())
+
+#define E_TYPE_COLLATOR (e_collator_get_type ())
+
+G_BEGIN_DECLS
+
+/**
+ * ECollatorError:
+ * @E_COLLATOR_ERROR_OPEN: An error occured trying to open a collator and access collation data.
+ * @E_COLLATOR_ERROR_CONVERSION: An error occurred converting character encodings
+ * @E_COLLATOR_ERROR_INVALID_LOCALE: A malformed locale name was given to e_collator_new()
+ *
+ * Errors from the #E_COLLATOR_ERROR domain.
+ */
+typedef enum {
+       E_COLLATOR_ERROR_OPEN,
+       E_COLLATOR_ERROR_CONVERSION,
+       E_COLLATOR_ERROR_INVALID_LOCALE
+} ECollatorError;
+
+/**
+ * ECollator:
+ *
+ * An opaque object used for locale specific string comparisons
+ * and sort ordering.
+ *
+ * Since: 3.12
+ */
+typedef struct _ECollator ECollator;
+
+GType                e_collator_get_type         (void);
+GQuark               e_collator_error_quark      (void);
+ECollator           *e_collator_new              (const gchar     *locale,
+                                                 GError         **error);
+ECollator           *e_collator_ref              (ECollator       *collator);
+void                 e_collator_unref            (ECollator       *collator);
+gchar               *e_collator_generate_key     (ECollator       *collator,
+                                                 const gchar     *str,
+                                                 GError         **error);
+gchar               *e_collator_generate_key_for_index
+                                                 (ECollator       *collator,
+                                                 gint             index);
+gboolean             e_collator_collate          (ECollator       *collator,
+                                                 const gchar     *str_a,
+                                                 const gchar     *str_b,
+                                                 gint            *result,
+                                                 GError         **error);
+const gchar *const  *e_collator_get_index_labels (ECollator       *collator,
+                                                 gint            *n_labels,
+                                                 gint            *underflow,
+                                                 gint            *inflow,
+                                                 gint            *overflow);
+gint                 e_collator_get_index        (ECollator       *collator,
+                                                 const gchar     *str);
+
+G_END_DECLS
+
+#endif /* E_COLLATOR_H */
diff --git a/libedataserver/e-transliterator-private.cpp b/libedataserver/e-transliterator-private.cpp
new file mode 100644
index 0000000..9e6b4a4
--- /dev/null
+++ b/libedataserver/e-transliterator-private.cpp
@@ -0,0 +1,92 @@
+/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/*
+ * Copyright (C) 2013 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU Lesser General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ * Author: Tristan Van Berkom <tristanvb openismus com>
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "e-transliterator-private.h"
+
+/* C++ standard library */
+#include <string>
+#include <memory>
+
+/* system headers */
+#include <langinfo.h>
+#include <locale.h>
+
+/* ICU headers */
+#include <unicode/translit.h>
+
+using icu::Transliterator;
+
+struct _ETransliterator {
+       Transliterator *priv;
+};
+
+/* Create an Transliterator for the source and target
+ * language stripts
+ */
+ETransliterator *
+_e_transliterator_cxx_new (const gchar *transliterator_id)
+{
+       UErrorCode status = U_ZERO_ERROR;
+       ETransliterator *transliterator;
+
+       g_return_val_if_fail (transliterator_id != NULL, NULL);
+
+       transliterator = g_slice_new (ETransliterator);
+       transliterator->priv = Transliterator::createInstance (transliterator_id, UTRANS_FORWARD, status); 
+
+       return transliterator;
+}
+
+/* Frees an ETransliterator and it's associated resources
+ */
+void
+_e_transliterator_cxx_free (ETransliterator *transliterator)
+{
+       if (transliterator) {
+               delete transliterator->priv;
+               g_slice_free (ETransliterator, transliterator);
+       }
+}
+
+/* Transliterates 'str' and returns the new allocated result
+ */
+gchar *
+_e_transliterator_cxx_transliterate (ETransliterator  *transliterator,
+                                    const gchar      *str)
+{
+       UnicodeString transform;
+       std::string sourceUTF8;
+       std::string targetUTF8;
+
+       g_return_val_if_fail (transliterator != NULL, NULL);
+       g_return_val_if_fail (str != NULL, NULL);
+
+       sourceUTF8 = str;
+       transform = icu::UnicodeString::fromUTF8 (sourceUTF8);
+       transliterator->priv->transliterate (transform);
+       targetUTF8 = transform.toUTF8String (targetUTF8);
+
+       return g_strdup (targetUTF8.c_str());
+}
diff --git a/libedataserver/e-transliterator-private.h b/libedataserver/e-transliterator-private.h
new file mode 100644
index 0000000..491f83c
--- /dev/null
+++ b/libedataserver/e-transliterator-private.h
@@ -0,0 +1,56 @@
+/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/*
+ * Copyright (C) 2013 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU Lesser General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ * Author: Tristan Van Berkom <tristanvb openismus com>
+ */
+
+#if !defined (__LIBEDATASERVER_H_INSIDE__) && !defined (LIBEDATASERVER_COMPILATION)
+#error "Only <libedataserver/libedataserver.h> should be included directly."
+#endif
+
+#ifndef E_TRANSLITERATOR_PRIVATE_H
+#define E_TRANSLITERATOR_PRIVATE_H
+
+#include <glib-object.h>
+
+G_BEGIN_DECLS
+
+#if __GNUC__ >= 4
+#  define E_TRANSLITERATOR_LOCAL __attribute__ ((visibility ("hidden")))
+#else
+#  define E_TRANSLITERATOR_LOCAL
+#endif
+
+/**
+ * ETransliterator:
+ *
+ * A private opaque type describing an alphabetic index
+ *
+ * Since: 3.12
+ **/
+typedef struct _ETransliterator ETransliterator;
+
+/* defined in e-transliterator-private.cpp, and used by by e-collator.c */
+E_TRANSLITERATOR_LOCAL ETransliterator *_e_transliterator_cxx_new             (const gchar      
*transliterator_id);
+E_TRANSLITERATOR_LOCAL void             _e_transliterator_cxx_free            (ETransliterator  
*transliterator);
+E_TRANSLITERATOR_LOCAL gchar           *_e_transliterator_cxx_transliterate   (ETransliterator  
*transliterator,
+                                                                              const gchar      *str);
+
+G_END_DECLS
+
+#endif /* E_TRANSLITERATOR_PRIVATE_H */
diff --git a/libedataserver/libedataserver.h b/libedataserver/libedataserver.h
index e9e5b8d..a4a6b08 100644
--- a/libedataserver/libedataserver.h
+++ b/libedataserver/libedataserver.h
@@ -24,6 +24,7 @@
 #include <libedataserver/e-cancellable-locks.h>
 #include <libedataserver/e-categories.h>
 #include <libedataserver/e-client.h>
+#include <libedataserver/e-collator.h>
 #include <libedataserver/e-credentials.h>
 #include <libedataserver/e-data-server-util.h>
 #include <libedataserver/e-debug-log.h>
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]