[evolution-data-server/openismus-work-3-8] EBookSqlite: Full support for transliterated queries



commit e59b5723fe3fd15bf9154bbd8c106904c1b55e50
Author: Tristan Van Berkom <tristan upstairslabs com>
Date:   Thu Dec 5 16:35:05 2013 +0900

    EBookSqlite: Full support for transliterated queries

 addressbook/libedata-book/e-book-sqlite.c |  251 +++++++++++++++++++++++++----
 1 files changed, 223 insertions(+), 28 deletions(-)
---
diff --git a/addressbook/libedata-book/e-book-sqlite.c b/addressbook/libedata-book/e-book-sqlite.c
index a5afb93..a57089b 100644
--- a/addressbook/libedata-book/e-book-sqlite.c
+++ b/addressbook/libedata-book/e-book-sqlite.c
@@ -261,6 +261,7 @@ ebsql_init_debug (void)
 #define EBSQL_SUFFIX_SORT_KEY        "localized"
 #define EBSQL_SUFFIX_PHONE           "phone"
 #define EBSQL_SUFFIX_COUNTRY         "country"
+#define EBSQL_SUFFIX_TRANSLIT        "translit"
 
 /* Track EBookIndexType's in a bit mask  */
 #define INDEX_FLAG(type)  (1 << E_BOOK_INDEX_##type)
@@ -331,6 +332,7 @@ struct _EBookSqlitePrivate {
        GCancellable   *cancel;          /* User passed GCancellable, we abort an operation if cancelled */
 
        ECollator      *collator;        /* The ECollator to create sort keys for any sortable fields */
+       ETransliterator *transliterator; /* For transliterated queries */
 
        /* SQLite resources  */
        sqlite3        *db;
@@ -645,6 +647,12 @@ summary_field_list_columns (SummaryField *field,
                columns = g_slist_prepend (columns, info);
        }
 
+       /* Transliterated value column */
+       if (field->type != G_TYPE_BOOLEAN && (field->index & INDEX_FLAG (TRANSLIT)) != 0) {
+               info    = column_info_new (field, folderid, EBSQL_SUFFIX_TRANSLIT, "TEXT", NULL, "TINDEX");
+               columns = g_slist_prepend (columns, info);
+       }
+
        return g_slist_reverse (columns);
 }
 
@@ -1988,9 +1996,13 @@ format_multivalues (EBookSqlite *ebsql)
                                g_string_append (string, ";suffix");
                        if ((ebsql->priv->summary_fields[i].index & INDEX_FLAG (PHONE)) != 0)
                                g_string_append (string, ";phone");
+                       if ((ebsql->priv->summary_fields[i].index & INDEX_FLAG (TRANSLIT)) != 0)
+                               g_string_append (string, ";translit");
                }
        }
 
+
+
        return g_string_free (string, FALSE);
 }
 
@@ -2080,6 +2092,10 @@ ebsql_introspect_summary (EBookSqlite *ebsql,
                        computed = INDEX_FLAG (SORT_KEY);
                        freeme   = g_strndup (col, p - col);
                        col      = freeme;
+               } else if ((p = strstr (col, "_" EBSQL_SUFFIX_TRANSLIT)) != NULL) {
+                       computed = INDEX_FLAG (TRANSLIT);
+                       freeme   = g_strndup (col, p - col);
+                       col      = freeme;
                }
 
                /* First check exception fields */
@@ -2160,6 +2176,8 @@ ebsql_introspect_summary (EBookSqlite *ebsql,
                                                iter->index |= INDEX_FLAG (SUFFIX);
                                        } else if (strcmp (params[j], "phone") == 0) {
                                                iter->index |= INDEX_FLAG (PHONE);
+                                       } else if (strcmp (params[j], "translit") == 0) {
+                                               iter->index |= INDEX_FLAG (TRANSLIT);
                                        }
                                }
                        }
@@ -3157,6 +3175,9 @@ ebsql_prepare_multi_insert (EBookSqlite *ebsql,
                g_string_append (string, ", value_" EBSQL_SUFFIX_COUNTRY);
        }
 
+       if ((field->index & INDEX_FLAG (TRANSLIT)) != 0)
+               g_string_append (string, ", value_" EBSQL_SUFFIX_TRANSLIT);
+
        g_string_append (string, ") VALUES (:uid, :value");
 
        if ((field->index & INDEX_FLAG (SUFFIX)) != 0)
@@ -3167,6 +3188,9 @@ ebsql_prepare_multi_insert (EBookSqlite *ebsql,
                g_string_append (string, ", :value_" EBSQL_SUFFIX_COUNTRY);
        }
 
+       if ((field->index & INDEX_FLAG (TRANSLIT)) != 0)
+               g_string_append (string, ", :value_" EBSQL_SUFFIX_TRANSLIT);
+
        g_string_append_c (string, ')');
 
        stmt = ebsql_prepare_statement (ebsql, string->str, error);
@@ -3184,7 +3208,7 @@ ebsql_run_multi_insert_one (EBookSqlite *ebsql,
                            GError **error)
 {
        gchar *normal = e_util_utf8_normalize (value);
-       gchar *str;
+       gchar *tmp, *str;
        gint ret, param_idx = 1;
 
        /* :uid */
@@ -3218,6 +3242,20 @@ ebsql_run_multi_insert_one (EBookSqlite *ebsql,
 
        }
 
+       if (ret == SQLITE_OK && (field->index & INDEX_FLAG (TRANSLIT)) != 0) {
+
+               if (value) {
+                       tmp = e_transliterator_transliterate (ebsql->priv->transliterator, value);
+                       str = e_util_utf8_normalize (tmp);
+                       g_free (tmp);
+               } else {
+                       str = NULL;
+               }
+
+               /* :value_translit */
+               ret = sqlite3_bind_text (stmt, param_idx++, str, -1, g_free);
+       }
+
        /* Run the statement */
        return ebsql_complete_statement (ebsql, stmt, ret, error);
 }
@@ -3311,6 +3349,12 @@ ebsql_prepare_insert (EBookSqlite *ebsql,
                                g_string_append (string, field->dbname);
                                g_string_append (string, "_" EBSQL_SUFFIX_COUNTRY);
                        }
+
+                       if ((field->index & INDEX_FLAG (TRANSLIT)) != 0) {
+                               g_string_append (string, ", ");
+                               g_string_append (string, field->dbname);
+                               g_string_append (string, "_" EBSQL_SUFFIX_TRANSLIT);
+                       }
                }
        }
        g_string_append (string, ", vcard, bdata)");
@@ -3347,6 +3391,9 @@ ebsql_prepare_insert (EBookSqlite *ebsql,
                                g_string_append_printf (string, ", :%s_" EBSQL_SUFFIX_COUNTRY, field->dbname);
                        }
 
+                       if ((field->index & INDEX_FLAG (TRANSLIT)) != 0)
+                               g_string_append_printf (string, ", :%s_" EBSQL_SUFFIX_TRANSLIT, 
field->dbname);
+
                } else if (field->type != E_TYPE_CONTACT_ATTR_LIST)
                        g_warn_if_reached ();
        }
@@ -3495,6 +3542,20 @@ ebsql_run_insert (EBookSqlite *ebsql,
                                        sqlite3_bind_int (stmt, param_idx++, country_code);
                        }
 
+                       if (ret == SQLITE_OK &&
+                           (field->index & INDEX_FLAG (TRANSLIT)) != 0) {
+                               
+                               if (val) {
+                                       gchar *tmp = e_transliterator_transliterate 
(ebsql->priv->transliterator, val);
+                                       str = e_util_utf8_normalize (tmp);
+                                       g_free (tmp);
+                               } else {
+                                       str = NULL;
+                               }
+
+                               ret = sqlite3_bind_text (stmt, param_idx++, str, -1, g_free);
+                       }
+
                        g_free (val);
                } else if (field->type == G_TYPE_BOOLEAN) {
                        gboolean val;
@@ -4419,24 +4480,6 @@ query_preflight_check (PreflightContext  *context,
                                                EBSQL_STATUS_STR (context->status)));
                        break;
 
-               case E_BOOK_QUERY_TRANSLIT_IS:
-               case E_BOOK_QUERY_TRANSLIT_CONTAINS:
-               case E_BOOK_QUERY_TRANSLIT_BEGINS_WITH:
-               case E_BOOK_QUERY_TRANSLIT_ENDS_WITH:
-
-                       /* These queries are not supported via the SQLite at all,
-                        * This is a bug and needs investigation, the compare_vcard()
-                        * function is not called, I suspect because of character encoding
-                        * issues.
-                        */
-                       context->status = MAX (context->status, PREFLIGHT_UNSUPPORTED);
-                       EBSQL_NOTE (PREFLIGHT,
-                                   g_printerr ("PREFLIGHT CHECK: "
-                                               "Query `%s' needs fallback search, new status: %s\n",
-                                               EBSQL_QUERY_TYPE_STR (field_test),
-                                               EBSQL_STATUS_STR (context->status)));
-                       break;
-
                case E_BOOK_QUERY_EQUALS_PHONE_NUMBER:
                case E_BOOK_QUERY_EQUALS_NATIONAL_PHONE_NUMBER:
                case E_BOOK_QUERY_EQUALS_SHORT_PHONE_NUMBER:
@@ -4487,6 +4530,24 @@ query_preflight_check (PreflightContext  *context,
                                }
                        }
                        break;
+
+               case E_BOOK_QUERY_TRANSLIT_IS:
+               case E_BOOK_QUERY_TRANSLIT_CONTAINS:
+               case E_BOOK_QUERY_TRANSLIT_BEGINS_WITH:
+               case E_BOOK_QUERY_TRANSLIT_ENDS_WITH:
+
+                       /* These only only searchable in the summary with the E_BOOK_INDEX_TRANSLIT index */
+                       if (test->field == NULL ||
+                           (test->field->index & INDEX_FLAG (TRANSLIT)) == 0) {
+
+                               context->status = MAX (context->status, PREFLIGHT_NOT_SUMMARIZED);
+                               EBSQL_NOTE (PREFLIGHT,
+                                           g_printerr ("PREFLIGHT CHECK: "
+                                                       "Query `%s' needs fallback search, new status: %s\n",
+                                                       EBSQL_QUERY_TYPE_STR (field_test),
+                                                       EBSQL_STATUS_STR (context->status)));
+                       }
+                       break;
                }
 
                if (test->field &&
@@ -4522,6 +4583,7 @@ query_preflight_substitute_full_name (PreflightContext *context,
                SummaryField *family_name, *given_name, *nickname;
                QueryElement *element;
                QueryFieldTest *test;
+               gboolean need_translit = FALSE;
 
                element = g_ptr_array_index (context->constraints, i);
 
@@ -4532,10 +4594,30 @@ query_preflight_substitute_full_name (PreflightContext *context,
                if (test->field_id != E_CONTACT_FULL_NAME)
                        continue;
 
+               /* If it's transliterated, we won't do this */
+               if (element->query >= E_BOOK_QUERY_TRANSLIT_IS &&
+                   element->query <= E_BOOK_QUERY_TRANSLIT_ENDS_WITH)
+                       need_translit = TRUE;
+
                family_name = summary_field_get (ebsql, E_CONTACT_FAMILY_NAME);
                given_name  = summary_field_get (ebsql, E_CONTACT_GIVEN_NAME);
                nickname    = summary_field_get (ebsql, E_CONTACT_NICKNAME);
 
+               /* Only OR them in if they are also indexed for transliteration */
+               if (need_translit) {
+                       if (family_name &&
+                           (family_name->index & INDEX_FLAG (TRANSLIT)) == 0)
+                               family_name = NULL;
+
+                       if (given_name &&
+                           (given_name->index & INDEX_FLAG (TRANSLIT)) == 0)
+                               given_name = NULL;
+
+                       if (nickname &&
+                           (nickname->index & INDEX_FLAG (TRANSLIT)) == 0)
+                               nickname = NULL;
+               }
+
                /* If any of these are in the summary, then we'll construct
                 * a grouped OR statment for this E_CONTACT_FULL_NAME test */
                if (family_name || given_name || nickname) {
@@ -4625,7 +4707,8 @@ typedef void (* GenerateFieldTest) (EBookSqlite      *ebsql,
  * with %Q or %q
  */
 static gchar *
-ebsql_normalize_for_like (QueryFieldTest *test,
+ebsql_normalize_for_like (EBookSqlite *ebsql,
+                         QueryFieldTest *test,
                          gboolean reverse_string,
                          gboolean *escape_needed)
 {
@@ -4642,6 +4725,13 @@ ebsql_normalize_for_like (QueryFieldTest *test,
        if (test->field_id == E_CONTACT_UID ||
            test->field_id == E_CONTACT_REV) {
                normal = test->value;
+       } else if (test->query >= E_BOOK_QUERY_TRANSLIT_CONTAINS &&
+                  test->query <= E_BOOK_QUERY_TRANSLIT_ENDS_WITH) {
+               gchar *tmp = e_transliterator_transliterate (ebsql->priv->transliterator, test->value);
+               freeme = e_util_utf8_normalize (tmp);
+               g_free (tmp);
+
+               normal = freeme;
        } else {
                freeme = e_util_utf8_normalize (test->value);
                normal = freeme;
@@ -4711,7 +4801,7 @@ field_test_query_contains (EBookSqlite        *ebsql,
        gboolean need_escape;
        gchar *escaped;
 
-       escaped = ebsql_normalize_for_like (test, FALSE, &need_escape);
+       escaped = ebsql_normalize_for_like (ebsql, test, FALSE, &need_escape);
 
        g_string_append_c (string, '(');
 
@@ -4739,7 +4829,7 @@ field_test_query_begins_with (EBookSqlite        *ebsql,
        gboolean need_escape;
        gchar *escaped;
 
-       escaped = ebsql_normalize_for_like (test, FALSE, &need_escape);
+       escaped = ebsql_normalize_for_like (ebsql, test, FALSE, &need_escape);
 
        g_string_append_c (string, '(');
        ebsql_string_append_column (string, field, NULL);
@@ -4768,7 +4858,7 @@ field_test_query_ends_with (EBookSqlite        *ebsql,
 
        if ((field->index & INDEX_FLAG (SUFFIX)) != 0) {
 
-               escaped = ebsql_normalize_for_like (test,
+               escaped = ebsql_normalize_for_like (ebsql, test,
                                                    TRUE, &need_escape);
 
                g_string_append_c (string, '(');
@@ -4782,7 +4872,7 @@ field_test_query_ends_with (EBookSqlite        *ebsql,
 
        } else {
 
-               escaped = ebsql_normalize_for_like (test,
+               escaped = ebsql_normalize_for_like (ebsql, test,
                                                    FALSE, &need_escape);
                g_string_append_c (string, '(');
 
@@ -4912,6 +5002,106 @@ field_test_query_regex_normal (EBookSqlite        *ebsql,
 }
 
 static void
+field_test_query_translit_is (EBookSqlite        *ebsql,
+                             GString            *string,
+                             QueryFieldTest     *test)
+{
+       SummaryField *field = test->field;
+       gchar *normal, *translit;
+
+       translit = e_transliterator_transliterate (ebsql->priv->transliterator, test->value);
+       normal = e_util_utf8_normalize (translit);
+
+       ebsql_string_append_column (string, field, EBSQL_SUFFIX_TRANSLIT);
+       ebsql_string_append_printf (string, " = %Q", normal);
+
+       g_free (translit);
+       g_free (normal);
+}
+
+static void
+field_test_query_translit_contains (EBookSqlite        *ebsql,
+                                   GString            *string,
+                                   QueryFieldTest     *test)
+{
+       SummaryField *field = test->field;
+       gboolean need_escape;
+       gchar *escaped;
+
+       escaped = ebsql_normalize_for_like (ebsql, test, FALSE, &need_escape);
+
+       g_string_append_c (string, '(');
+
+       ebsql_string_append_column (string, field, EBSQL_SUFFIX_TRANSLIT);
+       g_string_append (string, " IS NOT NULL AND ");
+       ebsql_string_append_column (string, field, EBSQL_SUFFIX_TRANSLIT);
+       g_string_append (string, " LIKE '%");
+       g_string_append (string, escaped);
+       g_string_append (string, "%'");
+
+       if (need_escape)
+               g_string_append (string, EBSQL_ESCAPE_SEQUENCE);
+
+       g_string_append_c (string, ')');
+
+       g_free (escaped);
+}
+
+static void
+field_test_query_translit_begins_with (EBookSqlite        *ebsql,
+                                      GString            *string,
+                                      QueryFieldTest     *test)
+{
+       SummaryField *field = test->field;
+       gboolean need_escape;
+       gchar *escaped;
+
+       escaped = ebsql_normalize_for_like (ebsql, test, FALSE, &need_escape);
+
+       g_string_append_c (string, '(');
+       ebsql_string_append_column (string, field, EBSQL_SUFFIX_TRANSLIT);
+       g_string_append (string, " IS NOT NULL AND ");
+
+       ebsql_string_append_column (string, field, EBSQL_SUFFIX_TRANSLIT);
+       g_string_append (string, " LIKE \'");
+       g_string_append (string, escaped);
+       g_string_append (string, "%\'");
+
+       if (need_escape)
+               g_string_append (string, EBSQL_ESCAPE_SEQUENCE);
+       g_string_append_c (string, ')');
+
+       g_free (escaped);
+}
+
+static void
+field_test_query_translit_ends_with (EBookSqlite        *ebsql,
+                                    GString            *string,
+                                    QueryFieldTest     *test)
+{
+       SummaryField *field = test->field;
+       gboolean need_escape;
+       gchar *escaped;
+
+       escaped = ebsql_normalize_for_like (ebsql, test, FALSE, &need_escape);
+       g_string_append_c (string, '(');
+
+       ebsql_string_append_column (string, field, EBSQL_SUFFIX_TRANSLIT);
+       g_string_append (string, " IS NOT NULL AND ");
+
+       ebsql_string_append_column (string, field, EBSQL_SUFFIX_TRANSLIT);
+       g_string_append (string, " LIKE \'%");
+       g_string_append (string, escaped);
+       g_string_append (string, "\'");
+
+       if (need_escape)
+               g_string_append (string, EBSQL_ESCAPE_SEQUENCE);
+
+       g_string_append_c (string, ')');
+       g_free (escaped);
+}
+
+static void
 field_test_query_exists (EBookSqlite        *ebsql,
                         GString            *string,
                         QueryFieldTest     *test)
@@ -4936,10 +5126,10 @@ static const GenerateFieldTest field_test_func_table[] = {
        field_test_query_eqphone_short,    /* E_BOOK_QUERY_EQUALS_SHORT_PHONE_NUMBER */
        field_test_query_regex_normal,     /* E_BOOK_QUERY_REGEX_NORMAL */
        NULL /* Requires fallback */,      /* E_BOOK_QUERY_REGEX_RAW  */
-       NULL /* Requires fallback */,      /* E_BOOK_QUERY_TRANSLIT_IS  */
-       NULL /* Requires fallback */,      /* E_BOOK_QUERY_TRANSLIT_CONTAINS  */
-       NULL /* Requires fallback */,      /* E_BOOK_QUERY_TRANSLIT_BEGINS_WITH  */
-       NULL /* Requires fallback */,      /* E_BOOK_QUERY_TRANSLIT_ENDS_WITH  */
+       field_test_query_translit_is,      /* E_BOOK_QUERY_TRANSLIT_IS  */
+       field_test_query_translit_contains,/* E_BOOK_QUERY_TRANSLIT_CONTAINS  */
+       field_test_query_translit_begins_with,/* E_BOOK_QUERY_TRANSLIT_BEGINS_WITH  */
+       field_test_query_translit_ends_with,/* E_BOOK_QUERY_TRANSLIT_ENDS_WITH  */
        field_test_query_exists,           /* BOOK_QUERY_EXISTS */
 };
 
@@ -5812,6 +6002,9 @@ e_book_sqlite_finalize (GObject *object)
        if (priv->collator)
                e_collator_unref (priv->collator);
 
+       if (ebsql->priv->transliterator)
+               e_transliterator_unref (ebsql->priv->transliterator);
+
        g_mutex_clear (&priv->lock);
        g_mutex_clear (&priv->updates_lock);
 
@@ -5858,6 +6051,8 @@ e_book_sqlite_init (EBookSqlite *ebsql)
                                             EBookSqlitePrivate);
        g_mutex_init (&ebsql->priv->lock);
        g_mutex_init (&ebsql->priv->updates_lock);
+
+       ebsql->priv->transliterator = e_transliterator_new ("Any-Latin");
 }
 
 /**********************************************************


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]