[tracker] libtracker-data: new 'tracker:normalize' method
- From: Aleksander Morgado <aleksm src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker] libtracker-data: new 'tracker:normalize' method
- Date: Tue, 21 Jan 2014 11:19:05 +0000 (UTC)
commit 8e00e18156328725c03210edb1a7585922c32984
Author: Aleksander Morgado <aleksander aleksander es>
Date: Wed Jan 15 14:16:05 2014 +0100
libtracker-data: new 'tracker:normalize' method
https://bugzilla.gnome.org/show_bug.cgi?id=722254
This method allows normalizing the strings used in SPARQL queries. It expects
two arguments: First, the string to be normalized, and second, one of "nfc",
"nfd", "nfkc" or "nfkd" specifying the type of normalization to apply to the
string.
Example:
1) First, insert a new element which has accents in the nie:title. In the
example we insert the word 'école' which in UTF-8 NFC looks like
"0xC3 0xA9 0x63 0x6F 0x6C 0x65":
$ tracker-sparql -u -q "
INSERT { <abc> a nie:InformationElement .
<abc> nie:title 'école' }"
2) Second, get hexdump of querying nie:title, we should get the original string
in UTF-8 and NFC normalization:
$ tracker-sparql -q "
SELECT ?title
WHERE { <abc> nie:title ?title }" | hexdump
0000000 6552 7573 746c 3a73 200a c320 63a9 6c6f
0000010 0a65 000a
0000013
3) Third, now apply explicitly NFC normalization, we should get the same output:
$ tracker-sparql -q "
SELECT tracker:normalize(?title,'nfc')
WHERE { <abc> nie:title ?title }" | hexdump
0000000 6552 7573 746c 3a73 200a c320 63a9 6c6f
0000010 0a65 000a
0000013
4) Last, apply a NFD decomposition, the expected decomposed string should look
like "0×65 0xCC 0x81 0×63 0x6F 0x6C 0×65":
$ tracker-sparql -q "
SELECT tracker:normalize(?title,'nfkd')
WHERE { <abc> nie:title ?title }" | hexdump
0000000 6552 7573 746c 3a73 200a 6520 81cc 6f63
0000010 656c 0a0a
0000014
src/libtracker-data/tracker-db-interface-sqlite.c | 108 ++++++++++++++++++++
src/libtracker-data/tracker-sparql-expression.vala | 9 ++
2 files changed, 117 insertions(+), 0 deletions(-)
---
diff --git a/src/libtracker-data/tracker-db-interface-sqlite.c
b/src/libtracker-data/tracker-db-interface-sqlite.c
index 5ac09d1..2581d45 100644
--- a/src/libtracker-data/tracker-db-interface-sqlite.c
+++ b/src/libtracker-data/tracker-db-interface-sqlite.c
@@ -615,6 +615,50 @@ function_sparql_case_fold (sqlite3_context *context,
sqlite3_result_text16 (context, zOutput, written * 2, free);
}
+static void
+function_sparql_normalize (sqlite3_context *context,
+ int argc,
+ sqlite3_value *argv[])
+{
+ const gchar *nfstr;
+ const uint16_t *zInput;
+ uint16_t *zOutput;
+ size_t written = 0;
+ int nInput;
+ uninorm_t nf;
+
+ if (argc != 2) {
+ sqlite3_result_error (context, "Invalid argument count", -1);
+ return;
+ }
+
+ zInput = sqlite3_value_text16 (argv[0]);
+
+ if (!zInput) {
+ return;
+ }
+
+ nfstr = sqlite3_value_text (argv[1]);
+ if (g_ascii_strcasecmp (nfstr, "nfc") == 0)
+ nf = UNINORM_NFC;
+ else if (g_ascii_strcasecmp (nfstr, "nfd") == 0)
+ nf = UNINORM_NFD;
+ else if (g_ascii_strcasecmp (nfstr, "nfkc") == 0)
+ nf = UNINORM_NFKC;
+ else if (g_ascii_strcasecmp (nfstr, "nfkd") == 0)
+ nf = UNINORM_NFKD;
+ else {
+ sqlite3_result_error (context, "Invalid normalization specified, options are 'nfc', 'nfd',
'nfkc' or 'nfkd'", -1);
+ return;
+ }
+
+ nInput = sqlite3_value_bytes16 (argv[0]);
+
+ zOutput = u16_normalize (nf, zInput, nInput/2, NULL, &written);
+
+ sqlite3_result_text16 (context, zOutput, written * 2, free);
+}
+
#elif HAVE_LIBICU
static void
@@ -701,6 +745,66 @@ function_sparql_case_fold (sqlite3_context *context,
sqlite3_result_text16 (context, zOutput, -1, sqlite3_free);
}
+static void
+function_sparql_normalize (sqlite3_context *context,
+ int argc,
+ sqlite3_value *argv[])
+{
+ const gchar *nfstr;
+ const uint16_t *zInput;
+ uint16_t *zOutput;
+ int nInput;
+ int nOutput;
+ UNormalizationMode nf;
+ UErrorCode status = U_ZERO_ERROR;
+
+ if (argc != 2) {
+ sqlite3_result_error (context, "Invalid argument count", -1);
+ return;
+ }
+
+ zInput = sqlite3_value_text16 (argv[0]);
+
+ if (!zInput) {
+ return;
+ }
+
+ nfstr = sqlite3_value_text (argv[1]);
+ if (g_ascii_strcasecmp (nfstr, "nfc") == 0)
+ nf = UNORM_NFC;
+ else if (g_ascii_strcasecmp (nfstr, "nfd") == 0)
+ nf = UNORM_NFD;
+ else if (g_ascii_strcasecmp (nfstr, "nfkc") == 0)
+ nf = UNORM_NFKC;
+ else if (g_ascii_strcasecmp (nfstr, "nfkd") == 0)
+ nf = UNORM_NFKD;
+ else {
+ sqlite3_result_error (context, "Invalid normalization specified", -1);
+ return;
+ }
+
+ nInput = sqlite3_value_bytes16 (argv[0]);
+
+ nOutput = nInput * 2 + 2;
+ zOutput = sqlite3_malloc (nOutput);
+
+ if (!zOutput) {
+ return;
+ }
+
+ unorm_normalize (zInput, nInput/2, nf, 0, zOutput, nOutput/2, &status);
+ if (!U_SUCCESS (status)) {
+ char zBuf[128];
+ sqlite3_snprintf (128, zBuf, "ICU error: unorm_normalize: %s", u_errorName (status));
+ zBuf[127] = '\0';
+ sqlite3_free (zOutput);
+ sqlite3_result_error (context, zBuf, -1);
+ return;
+ }
+
+ sqlite3_result_text16 (context, zOutput, -1, sqlite3_free);
+}
+
#endif
static inline int
@@ -825,6 +929,10 @@ open_database (TrackerDBInterface *db_interface,
db_interface, &function_sparql_case_fold,
NULL, NULL);
+ sqlite3_create_function (db_interface->db, "SparqlNormalize", 2, SQLITE_ANY,
+ db_interface, &function_sparql_normalize,
+ NULL, NULL);
+
sqlite3_create_function (db_interface->db, "SparqlFormatTime", 1, SQLITE_ANY,
db_interface, &function_sparql_format_time,
NULL, NULL);
diff --git a/src/libtracker-data/tracker-sparql-expression.vala
b/src/libtracker-data/tracker-sparql-expression.vala
index aa59b02..a2981af 100644
--- a/src/libtracker-data/tracker-sparql-expression.vala
+++ b/src/libtracker-data/tracker-sparql-expression.vala
@@ -487,6 +487,15 @@ class Tracker.Sparql.Expression : Object {
translate_expression_as_string (sql);
sql.append (")");
return PropertyType.STRING;
+ } else if (uri == TRACKER_NS + "normalize") {
+ // conversion to string
+ sql.append ("SparqlNormalize (");
+ translate_expression_as_string (sql);
+ sql.append (", ");
+ expect (SparqlTokenType.COMMA);
+ translate_expression_as_string (sql);
+ sql.append (")");
+ return PropertyType.STRING;
} else if (uri == FN_NS + "contains") {
// fn:contains('A','B') => 'A' GLOB '*B*'
sql.append ("(");
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]