[evolution-data-server/openismus-work-3-8: 103/116] EBookBackendSqliteDB: Support transliterated queries.
- From: Tristan Van Berkom <tvb src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [evolution-data-server/openismus-work-3-8: 103/116] EBookBackendSqliteDB: Support transliterated queries.
- Date: Tue, 1 Oct 2013 00:53:33 +0000 (UTC)
commit ae0228e234636e3ed7e557346540a1fb1465cea9
Author: Tristan Van Berkom <tristan van berkom gmail com>
Date: Tue Aug 27 21:49:16 2013 +0200
EBookBackendSqliteDB: Support transliterated queries.
Currently only supported for string types (not for multivalue strings,
in which case we still use the EBookBackendSexp fallback).
.../libedata-book/e-book-backend-sqlitedb.c | 180 +++++++++++++++++--
1 files changed, 161 insertions(+), 19 deletions(-)
---
diff --git a/addressbook/libedata-book/e-book-backend-sqlitedb.c
b/addressbook/libedata-book/e-book-backend-sqlitedb.c
index e044225..63f1797 100644
--- a/addressbook/libedata-book/e-book-backend-sqlitedb.c
+++ b/addressbook/libedata-book/e-book-backend-sqlitedb.c
@@ -90,8 +90,9 @@ struct _EBookBackendSqliteDBPrivate {
guint have_attr_list : 1;
IndexFlags attr_list_indexes;
- ECollator *collator; /* The ECollator to create sort keys for all fields */
- gchar *locale; /* The current locale */
+ ETransliterator *transliterator;
+ ECollator *collator; /* The ECollator to create sort keys for all fields */
+ gchar *locale; /* The current locale */
};
G_DEFINE_TYPE (EBookBackendSqliteDB, e_book_backend_sqlitedb, G_TYPE_OBJECT)
@@ -261,6 +262,7 @@ e_book_backend_sqlitedb_init (EBookBackendSqliteDB *ebsdb)
{
ebsdb->priv = E_BOOK_BACKEND_SQLITEDB_GET_PRIVATE (ebsdb);
+ ebsdb->priv->transliterator = e_transliterator_new ("Any-Latin");
ebsdb->priv->store_vcard = TRUE;
ebsdb->priv->in_transaction = 0;
@@ -865,8 +867,9 @@ introspect_summary (EBookBackendSqliteDB *ebsdb,
gchar *p;
IndexFlags computed = 0;
- /* Ignore the 'localized' columns */
- if (g_str_has_suffix (col, "_localized"))
+ /* Ignore the 'localized' & 'translit' columns */
+ if (g_str_has_suffix (col, "_localized") ||
+ g_str_has_suffix (col, "_translit"))
continue;
/* Check if we're parsing a reverse field */
@@ -1017,6 +1020,14 @@ create_contacts_table (EBookBackendSqliteDB *ebsdb,
g_string_append (string, "_localized TEXT, ");
}
+ /* For any string columns, also create a transliterated data column
+ * for transliterated queries
+ */
+ if (ebsdb->priv->summary_fields[i].field != E_CONTACT_REV) {
+ g_string_append (string, ebsdb->priv->summary_fields[i].dbname);
+ g_string_append (string, "_translit TEXT, ");
+ }
+
} else if (ebsdb->priv->summary_fields[i].type == G_TYPE_BOOLEAN) {
g_string_append (string, ebsdb->priv->summary_fields[i].dbname);
g_string_append (string, " INTEGER, ");
@@ -1060,6 +1071,9 @@ create_contacts_table (EBookBackendSqliteDB *ebsdb,
g_string_append (string, ebsdb->priv->summary_fields[i].dbname);
g_string_append (string, "_localized TEXT, ");
+ g_string_append (string, ebsdb->priv->summary_fields[i].dbname);
+ g_string_append (string, "_translit TEXT, ");
+
success = book_backend_sql_exec (
ebsdb->priv->db, string->str, NULL, NULL , error);
@@ -1135,8 +1149,8 @@ create_contacts_table (EBookBackendSqliteDB *ebsdb,
sqlite3_free (stmt);
g_free (tmp);
- /* For any indexed column, also index the localized column */
- if (ebsdb->priv->summary_fields[i].field != E_CONTACT_REV) {
+ /* For any indexed column, also index the localized & transliterated columns */
+ if (success && ebsdb->priv->summary_fields[i].field != E_CONTACT_REV) {
tmp = g_strdup_printf (
"INDEX_%s_localized_%s",
summary_dbname_from_field (ebsdb,
ebsdb->priv->summary_fields[i].field),
@@ -1148,6 +1162,19 @@ create_contacts_table (EBookBackendSqliteDB *ebsdb,
sqlite3_free (stmt);
g_free (tmp);
}
+
+ if (success && ebsdb->priv->summary_fields[i].field != E_CONTACT_REV) {
+ tmp = g_strdup_printf (
+ "INDEX_%s_translit_%s",
+ summary_dbname_from_field (ebsdb,
ebsdb->priv->summary_fields[i].field),
+ folderid);
+ stmt = sqlite3_mprintf (
+ "CREATE INDEX IF NOT EXISTS %Q ON %Q (%s_translit)", tmp, folderid,
+ summary_dbname_from_field (ebsdb,
ebsdb->priv->summary_fields[i].field));
+ success = book_backend_sql_exec (ebsdb->priv->db, stmt, NULL, NULL, error);
+ sqlite3_free (stmt);
+ g_free (tmp);
+ }
}
if (success &&
@@ -2078,6 +2105,10 @@ insert_stmt_from_contact (EBookBackendSqliteDB *ebsdb,
g_string_append (string, ", ");
g_string_append (string, ebsdb->priv->summary_fields[i].dbname);
g_string_append (string, "_localized");
+
+ g_string_append (string, ", ");
+ g_string_append (string, ebsdb->priv->summary_fields[i].dbname);
+ g_string_append (string, "_translit");
}
if ((ebsdb->priv->summary_fields[i].index & INDEX_SUFFIX) != 0) {
@@ -2104,6 +2135,7 @@ insert_stmt_from_contact (EBookBackendSqliteDB *ebsdb,
gchar *val;
gchar *normal;
gchar *localized = NULL;
+ gchar *transliterated = NULL, *tmp;
if (i > 0)
g_string_append (string, ", ");
@@ -2113,12 +2145,20 @@ insert_stmt_from_contact (EBookBackendSqliteDB *ebsdb,
/* Special exception, never normalize/localize the UID or REV string */
if (ebsdb->priv->summary_fields[i].field != E_CONTACT_UID &&
ebsdb->priv->summary_fields[i].field != E_CONTACT_REV) {
+
normal = e_util_utf8_normalize (val);
- if (val)
+ if (val) {
localized = e_collator_generate_key (ebsdb->priv->collator, val,
NULL);
- else
+
+ tmp = e_transliterator_transliterate (ebsdb->priv->transliterator,
val);
+ transliterated = e_util_utf8_normalize (tmp);
+ g_free (tmp);
+
+ } else {
localized = g_strdup ("");
+ transliterated = g_strdup ("");
+ }
} else
normal = g_strdup (val);
@@ -2132,6 +2172,11 @@ insert_stmt_from_contact (EBookBackendSqliteDB *ebsdb,
g_string_append (string, ", ");
g_string_append (string, str);
sqlite3_free (str);
+
+ str = sqlite3_mprintf ("%Q", transliterated);
+ g_string_append (string, ", ");
+ g_string_append (string, str);
+ sqlite3_free (str);
}
if ((ebsdb->priv->summary_fields[i].index & INDEX_SUFFIX) != 0) {
@@ -2151,6 +2196,7 @@ insert_stmt_from_contact (EBookBackendSqliteDB *ebsdb,
g_free (normal);
g_free (val);
g_free (localized);
+ g_free (transliterated);
} else if (ebsdb->priv->summary_fields[i].type == G_TYPE_BOOLEAN) {
gboolean val;
@@ -3158,12 +3204,13 @@ func_check_translit (struct _ESExp *f,
struct _ESExpResult **argv,
gpointer data)
{
- /* Transliterated strings are not in the summary
- */
- ESExpResult *r;
+ ESExpResult *const r = func_check (f, argc, argv, data);
- r = e_sexp_result_new (f, ESEXP_RES_INT);
- r->value.number = 0;
+ /* Currently we don't support transliterated queries
+ * for multi-valued fields
+ */
+ if ((r->value.number & CHECK_IS_LIST_ATTR) != 0)
+ r->value.number = 0;
return r;
}
@@ -3374,14 +3421,19 @@ typedef enum {
MATCH_PHONE_NUMBER,
MATCH_NATIONAL_PHONE_NUMBER,
MATCH_SHORT_PHONE_NUMBER,
- MATCH_REGEX
+ MATCH_REGEX,
+ MATCH_TRANSLIT_CONTAINS,
+ MATCH_TRANSLIT_IS,
+ MATCH_TRANSLIT_BEGINS_WITH,
+ MATCH_TRANSLIT_ENDS_WITH,
} MatchType;
typedef enum {
CONVERT_NOTHING = 0,
CONVERT_NORMALIZE = (1 << 0),
CONVERT_REVERSE = (1 << 1),
- CONVERT_PHONE = (1 << 2)
+ CONVERT_PHONE = (1 << 2),
+ CONVERT_TRANSLIT = (1 << 3)
} ConvertFlags;
static gchar *
@@ -3421,15 +3473,23 @@ convert_string_value (EBookBackendSqliteDB *ebsdb,
gboolean escape_modifier_needed = FALSE;
const gchar *escape_modifier = " ESCAPE '^'";
gchar *computed = NULL;
- gchar *normal;
+ gchar *normal, *tmp;
const gchar *ptr;
g_return_val_if_fail (value != NULL, NULL);
- if ((flags & CONVERT_NORMALIZE) && match != MATCH_REGEX)
+ /* This is getting to be a mess, if we are transliterating,
+ * then we wont be normalizing as well, or regexing either
+ */
+ if ((flags & CONVERT_TRANSLIT) != 0) {
+ tmp = e_transliterator_transliterate (ebsdb->priv->transliterator, value);
+ normal = e_util_utf8_normalize (tmp);
+ g_free (tmp);
+ } else if ((flags & CONVERT_NORMALIZE) && match != MATCH_REGEX) {
normal = e_util_utf8_normalize (value);
- else
+ } else {
normal = g_strdup (value);
+ }
/* Just assume each character must be escaped. The result of this function
* is discarded shortly after calling this function. Therefore it's
@@ -3442,12 +3502,16 @@ convert_string_value (EBookBackendSqliteDB *ebsdb,
switch (match) {
case MATCH_CONTAINS:
case MATCH_ENDS_WITH:
+ case MATCH_TRANSLIT_CONTAINS:
+ case MATCH_TRANSLIT_ENDS_WITH:
case MATCH_SHORT_PHONE_NUMBER:
g_string_append_c (str, '%');
break;
case MATCH_BEGINS_WITH:
case MATCH_IS:
+ case MATCH_TRANSLIT_BEGINS_WITH:
+ case MATCH_TRANSLIT_IS:
case MATCH_PHONE_NUMBER:
case MATCH_NATIONAL_PHONE_NUMBER:
case MATCH_REGEX:
@@ -3478,11 +3542,15 @@ convert_string_value (EBookBackendSqliteDB *ebsdb,
switch (match) {
case MATCH_CONTAINS:
case MATCH_BEGINS_WITH:
+ case MATCH_TRANSLIT_CONTAINS:
+ case MATCH_TRANSLIT_BEGINS_WITH:
g_string_append_c (str, '%');
break;
case MATCH_ENDS_WITH:
case MATCH_IS:
+ case MATCH_TRANSLIT_ENDS_WITH:
+ case MATCH_TRANSLIT_IS:
case MATCH_PHONE_NUMBER:
case MATCH_NATIONAL_PHONE_NUMBER:
case MATCH_SHORT_PHONE_NUMBER:
@@ -3529,6 +3597,7 @@ field_name_and_query_term (EBookBackendSqliteDB *ebsdb,
} else {
gboolean suffix_search = FALSE;
gboolean phone_search = FALSE;
+ gboolean translit_search = FALSE;
/* If its a suffix search and we have reverse data to search... */
if (match == MATCH_ENDS_WITH &&
@@ -3549,6 +3618,13 @@ field_name_and_query_term (EBookBackendSqliteDB *ebsdb,
(ebsdb->priv->summary_fields[summary_index].index & INDEX_PREFIX) == 0)
suffix_search = TRUE;
+ /* Or perhaps we are doing a transliterated search, if none of the above apply */
+ else if (match == MATCH_TRANSLIT_CONTAINS ||
+ match == MATCH_TRANSLIT_IS ||
+ match == MATCH_TRANSLIT_BEGINS_WITH ||
+ match == MATCH_TRANSLIT_ENDS_WITH)
+ translit_search = TRUE;
+
if (suffix_search) {
/* Special case for suffix matching:
* o Reverse the string
@@ -3616,6 +3692,29 @@ field_name_and_query_term (EBookBackendSqliteDB *ebsdb,
}
}
+
+ } else if (translit_search) {
+
+ if (ebsdb->priv->summary_fields[summary_index].type == E_TYPE_CONTACT_ATTR_LIST) {
+ /* We havent yet supported transliterated multivalue queries, this
+ * should not be reached
+ */
+ g_warn_if_reached ();
+ } else
+ field_name = g_strconcat (
+ "summary.",
+ ebsdb->priv->summary_fields[summary_index].dbname,
+ "_translit", NULL);
+
+ if (ebsdb->priv->summary_fields[summary_index].field == E_CONTACT_UID ||
+ ebsdb->priv->summary_fields[summary_index].field == E_CONTACT_REV)
+ /* We should not be supporting transliteration queries on UID & REV */
+ g_warn_if_reached ();
+ else
+ value = convert_string_value (
+ ebsdb, query_term_input, region,
+ CONVERT_TRANSLIT, match);
+
} else {
if (ebsdb->priv->summary_fields[summary_index].type == E_TYPE_CONTACT_ATTR_LIST) {
field_name = g_strdup ("multi.value");
@@ -3659,6 +3758,7 @@ field_oper (MatchType match)
{
switch (match) {
case MATCH_IS:
+ case MATCH_TRANSLIT_IS:
case MATCH_PHONE_NUMBER:
case MATCH_NATIONAL_PHONE_NUMBER:
return "=";
@@ -3669,6 +3769,9 @@ field_oper (MatchType match)
case MATCH_CONTAINS:
case MATCH_BEGINS_WITH:
case MATCH_ENDS_WITH:
+ case MATCH_TRANSLIT_CONTAINS:
+ case MATCH_TRANSLIT_BEGINS_WITH:
+ case MATCH_TRANSLIT_ENDS_WITH:
case MATCH_SHORT_PHONE_NUMBER:
break;
}
@@ -3869,6 +3972,41 @@ func_regex (struct _ESExp *f,
return convert_match_exp (f, argc, argv, data, MATCH_REGEX);
}
+static ESExpResult *
+func_translit_contains (struct _ESExp *f,
+ gint argc,
+ struct _ESExpResult **argv,
+ gpointer data)
+{
+ return convert_match_exp (f, argc, argv, data, MATCH_TRANSLIT_CONTAINS);
+}
+
+static ESExpResult *
+func_translit_is (struct _ESExp *f,
+ gint argc,
+ struct _ESExpResult **argv,
+ gpointer data)
+{
+ return convert_match_exp (f, argc, argv, data, MATCH_TRANSLIT_IS);
+}
+
+static ESExpResult *
+func_translit_beginswith (struct _ESExp *f,
+ gint argc,
+ struct _ESExpResult **argv,
+ gpointer data)
+{
+ return convert_match_exp (f, argc, argv, data, MATCH_TRANSLIT_BEGINS_WITH);
+}
+
+static ESExpResult *
+func_translit_endswith (struct _ESExp *f,
+ gint argc,
+ struct _ESExpResult **argv,
+ gpointer data)
+{
+ return convert_match_exp (f, argc, argv, data, MATCH_TRANSLIT_ENDS_WITH);
+}
/* 'builtin' functions */
static struct {
@@ -3886,7 +4024,11 @@ static struct {
{ "eqphone", func_eqphone, 0 },
{ "eqphone_national", func_eqphone_national, 0 },
{ "eqphone_short", func_eqphone_short, 0 },
- { "regex_normal", func_regex, 0 }
+ { "regex_normal", func_regex, 0 },
+ { "translit_contains", func_translit_contains, 0 },
+ { "translit_is", func_translit_is, 0 },
+ { "translit_beginswith", func_translit_beginswith, 0 },
+ { "translit_endswith", func_translit_endswith, 0 }
};
static gchar *
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]