[tracker/wip/carlosg/tracker-3.0-api-breaks: 88/100] libtracker-sparql: Add TrackerSparqlConnectionFlags to configure FTS
- From: Carlos Garnacho <carlosg src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker/wip/carlosg/tracker-3.0-api-breaks: 88/100] libtracker-sparql: Add TrackerSparqlConnectionFlags to configure FTS
- Date: Mon, 17 Feb 2020 18:16:13 +0000 (UTC)
commit 043f9ca9bcc5a4a56f45e6eb9196e0bb5d2f7ecb
Author: Carlos Garnacho <carlosg gnome org>
Date: Tue Feb 4 22:14:41 2020 +0100
libtracker-sparql: Add TrackerSparqlConnectionFlags to configure FTS
Those flags are based on the FTS gschema settings, although kind of
reduced. The flags are propagated down the FTS parser, and stored/
checked as main DB metadata, so we catch changes in those and rebuild
FTS tokens accordingly.
Readonly connections will not be able to issue any changes, and will
go with the settings as stored in the metadata table.
src/libtracker-data/tracker-db-interface-sqlite.c | 2 +-
src/libtracker-data/tracker-db-manager.c | 36 ++++++++++++++++-
src/libtracker-data/tracker-db-manager.h | 4 ++
src/libtracker-direct/tracker-direct.c | 28 +++++++++++--
src/libtracker-fts/tracker-fts-tokenizer.c | 49 +++++++++--------------
src/libtracker-fts/tracker-fts-tokenizer.h | 9 +++--
src/libtracker-fts/tracker-fts.c | 9 +++--
src/libtracker-fts/tracker-fts.h | 9 +++--
src/libtracker-sparql/tracker-connection.h | 12 +++++-
9 files changed, 108 insertions(+), 50 deletions(-)
---
diff --git a/src/libtracker-data/tracker-db-interface-sqlite.c
b/src/libtracker-data/tracker-db-interface-sqlite.c
index 90a7662e0..61c3a1c11 100644
--- a/src/libtracker-data/tracker-db-interface-sqlite.c
+++ b/src/libtracker-data/tracker-db-interface-sqlite.c
@@ -2089,7 +2089,7 @@ tracker_db_interface_sqlite_fts_init (TrackerDBInterface *db_interface,
#if HAVE_TRACKER_FTS
GStrv fts_columns;
- tracker_fts_init_db (db_interface->db, db_interface, properties);
+ tracker_fts_init_db (db_interface->db, db_interface, db_interface->flags, properties);
if (create &&
!tracker_fts_create_table (db_interface->db, database, "fts5",
diff --git a/src/libtracker-data/tracker-db-manager.c b/src/libtracker-data/tracker-db-manager.c
index a8c4d844c..a037bd63d 100644
--- a/src/libtracker-data/tracker-db-manager.c
+++ b/src/libtracker-data/tracker-db-manager.c
@@ -70,6 +70,11 @@
#define TOSTRING(x) TOSTRING1(x)
#define TRACKER_PARSER_VERSION_STRING TOSTRING(TRACKER_PARSER_VERSION)
+#define FTS_FLAGS (TRACKER_DB_MANAGER_FTS_ENABLE_STEMMER | \
+ TRACKER_DB_MANAGER_FTS_ENABLE_UNACCENT | \
+ TRACKER_DB_MANAGER_FTS_ENABLE_STOP_WORDS | \
+ TRACKER_DB_MANAGER_FTS_IGNORE_NUMBERS)
+
typedef enum {
TRACKER_DB_VERSION_UNKNOWN, /* Unknown */
TRACKER_DB_VERSION_0_6_6, /* before indexer-split */
@@ -673,6 +678,19 @@ tracker_db_manager_new (TrackerDBManagerFlags flags,
if ((flags & TRACKER_DB_MANAGER_REMOVE_ALL) != 0) {
return db_manager;
}
+ } else {
+ GValue value = G_VALUE_INIT;
+ TrackerDBManagerFlags fts_flags = 0;
+
+ if (tracker_db_manager_get_metadata (db_manager, "fts-flags", &value)) {
+ fts_flags = g_ascii_strtoll (g_value_get_string (&value), NULL, 10);
+ g_value_unset (&value);
+ }
+
+ /* Readonly connections should go with the FTS flags as stored
+ * in metadata.
+ */
+ db_manager->flags = (db_manager->flags & ~(FTS_FLAGS)) | fts_flags;
}
/* Set general database options */
@@ -1215,8 +1233,20 @@ tracker_db_manager_get_tokenizer_changed (TrackerDBManager *db_manager)
{
GValue value = G_VALUE_INIT;
const gchar *version;
+ TrackerDBManagerFlags flags;
gboolean changed;
+ if (!tracker_db_manager_get_metadata (db_manager, "fts-flags", &value))
+ return TRUE;
+
+ flags = g_ascii_strtoll (g_value_get_string (&value), NULL, 10);
+ g_value_unset (&value);
+
+ if ((db_manager->flags & TRACKER_DB_MANAGER_READONLY) == 0 &&
+ flags != (db_manager->flags & FTS_FLAGS)) {
+ return TRUE;
+ }
+
if (!tracker_db_manager_get_metadata (db_manager, "parser-version", &value))
return TRUE;
@@ -1234,9 +1264,13 @@ tracker_db_manager_tokenizer_update (TrackerDBManager *db_manager)
g_value_init (&value, G_TYPE_STRING);
g_value_set_string (&value, TRACKER_PARSER_VERSION_STRING);
-
tracker_db_manager_set_metadata (db_manager, "parser-version", &value);
g_value_unset (&value);
+
+ g_value_init (&value, G_TYPE_INT64);
+ g_value_set_int64 (&value, (db_manager->flags & FTS_FLAGS));
+ tracker_db_manager_set_metadata (db_manager, "fts-flags", &value);
+ g_value_unset (&value);
}
void
diff --git a/src/libtracker-data/tracker-db-manager.h b/src/libtracker-data/tracker-db-manager.h
index 388e9267b..276b78eae 100644
--- a/src/libtracker-data/tracker-db-manager.h
+++ b/src/libtracker-data/tracker-db-manager.h
@@ -43,6 +43,10 @@ typedef enum {
TRACKER_DB_MANAGER_READONLY = 1 << 3,
TRACKER_DB_MANAGER_DO_NOT_CHECK_ONTOLOGY = 1 << 4,
TRACKER_DB_MANAGER_ENABLE_MUTEXES = 1 << 5,
+ TRACKER_DB_MANAGER_FTS_ENABLE_STEMMER = 1 << 6,
+ TRACKER_DB_MANAGER_FTS_ENABLE_UNACCENT = 1 << 7,
+ TRACKER_DB_MANAGER_FTS_ENABLE_STOP_WORDS = 1 << 8,
+ TRACKER_DB_MANAGER_FTS_IGNORE_NUMBERS = 1 << 9,
} TrackerDBManagerFlags;
typedef struct _TrackerDBManager TrackerDBManager;
diff --git a/src/libtracker-direct/tracker-direct.c b/src/libtracker-direct/tracker-direct.c
index 1009701dc..842510055 100644
--- a/src/libtracker-direct/tracker-direct.c
+++ b/src/libtracker-direct/tracker-direct.c
@@ -201,6 +201,25 @@ set_up_thread_pools (TrackerDirectConnection *conn,
return TRUE;
}
+static TrackerDBManagerFlags
+translate_flags (TrackerSparqlConnectionFlags flags)
+{
+ TrackerDBManagerFlags db_flags = TRACKER_DB_MANAGER_ENABLE_MUTEXES;
+
+ if ((flags & TRACKER_SPARQL_CONNECTION_FLAGS_READONLY) != 0)
+ db_flags |= TRACKER_DB_MANAGER_READONLY;
+ if ((flags & TRACKER_SPARQL_CONNECTION_FLAGS_FTS_ENABLE_STEMMER) != 0)
+ db_flags |= TRACKER_DB_MANAGER_FTS_ENABLE_STEMMER;
+ if ((flags & TRACKER_SPARQL_CONNECTION_FLAGS_FTS_ENABLE_UNACCENT) != 0)
+ db_flags |= TRACKER_DB_MANAGER_FTS_ENABLE_UNACCENT;
+ if ((flags & TRACKER_SPARQL_CONNECTION_FLAGS_FTS_ENABLE_STOP_WORDS) != 0)
+ db_flags |= TRACKER_DB_MANAGER_FTS_ENABLE_STOP_WORDS;
+ if ((flags & TRACKER_SPARQL_CONNECTION_FLAGS_FTS_IGNORE_NUMBERS) != 0)
+ db_flags |= TRACKER_DB_MANAGER_FTS_IGNORE_NUMBERS;
+
+ return db_flags;
+}
+
static gboolean
tracker_direct_connection_initable_init (GInitable *initable,
GCancellable *cancellable,
@@ -208,7 +227,7 @@ tracker_direct_connection_initable_init (GInitable *initable,
{
TrackerDirectConnectionPrivate *priv;
TrackerDirectConnection *conn;
- TrackerDBManagerFlags db_flags = TRACKER_DB_MANAGER_ENABLE_MUTEXES;
+ TrackerDBManagerFlags db_flags;
GHashTable *namespaces;
GHashTableIter iter;
gchar *prefix, *ns;
@@ -221,10 +240,11 @@ tracker_direct_connection_initable_init (GInitable *initable,
if (!set_up_thread_pools (conn, error))
return FALSE;
+ db_flags = translate_flags (priv->flags);
+
/* Init data manager */
- if (priv->flags & TRACKER_SPARQL_CONNECTION_FLAGS_READONLY) {
- db_flags |= TRACKER_DB_MANAGER_READONLY;
- } else if (!priv->ontology) {
+ if (!priv->ontology &&
+ (db_flags & TRACKER_DB_MANAGER_READONLY) == 0) {
gchar *filename;
/* If the connection is read/write, and no ontology is specified,
diff --git a/src/libtracker-fts/tracker-fts-tokenizer.c b/src/libtracker-fts/tracker-fts-tokenizer.c
index a58a8bacd..6367b0e1c 100644
--- a/src/libtracker-fts/tracker-fts-tokenizer.c
+++ b/src/libtracker-fts/tracker-fts-tokenizer.c
@@ -40,12 +40,7 @@ typedef struct TrackerTokenizerFunctionData TrackerTokenizerFunctionData;
struct TrackerTokenizerData {
TrackerLanguage *language;
- int max_word_length;
- int max_words;
- gboolean enable_stemmer;
- gboolean enable_unaccent;
- gboolean ignore_numbers;
- gboolean ignore_stop_words;
+ TrackerDBManagerFlags flags;
};
struct TrackerTokenizer {
@@ -58,6 +53,9 @@ struct TrackerTokenizerFunctionData {
gchar **property_names;
};
+#define MAX_WORD_LENGTH 200
+#define MAX_WORDS 10000
+
static int
tracker_tokenizer_create (void *data,
const char **argv,
@@ -113,14 +111,14 @@ tracker_tokenizer_tokenize (Fts5Tokenizer *fts5_tokenizer,
(FTS5_TOKENIZE_QUERY | FTS5_TOKENIZE_PREFIX));
tracker_parser_reset (tokenizer->parser, text, length,
- data->max_word_length,
- data->enable_stemmer,
- data->enable_unaccent,
- data->ignore_stop_words,
- TRUE,
- data->ignore_numbers);
-
- while (n_tokens < data->max_words) {
+ MAX_WORD_LENGTH,
+ !!(data->flags & TRACKER_DB_MANAGER_FTS_ENABLE_STEMMER),
+ !!(data->flags & TRACKER_DB_MANAGER_FTS_ENABLE_UNACCENT),
+ !!(data->flags & TRACKER_DB_MANAGER_FTS_ENABLE_STOP_WORDS),
+ TRUE,
+ !!(data->flags & TRACKER_DB_MANAGER_FTS_IGNORE_NUMBERS));
+
+ while (n_tokens < MAX_WORDS) {
token = tracker_parser_next (tokenizer->parser,
&pos,
&start, &end,
@@ -157,23 +155,13 @@ static const fts5_tokenizer tracker_tokenizer_module = {
};
static TrackerTokenizerData *
-tracker_tokenizer_data_new (void)
+tracker_tokenizer_data_new (TrackerDBManagerFlags flags)
{
TrackerTokenizerData *p;
- TrackerFTSConfig *config;
-
- config = tracker_fts_config_new ();
p = g_new0 (TrackerTokenizerData, 1);
p->language = tracker_language_new (NULL);
- p->max_word_length = tracker_fts_config_get_max_word_length (config);
- p->enable_stemmer = tracker_fts_config_get_enable_stemmer (config);
- p->enable_unaccent = tracker_fts_config_get_enable_unaccent (config);
- p->ignore_numbers = tracker_fts_config_get_ignore_numbers (config);
- p->max_words = tracker_fts_config_get_max_words_to_index (config);
- p->ignore_stop_words = tracker_fts_config_get_ignore_stop_words (config);
-
- g_object_unref (config);
+ p->flags = flags;
return p;
}
@@ -440,9 +428,10 @@ tracker_tokenizer_function_data_free (TrackerTokenizerFunctionData *data)
}
gboolean
-tracker_tokenizer_initialize (sqlite3 *db,
- TrackerDBInterface *interface,
- const gchar **property_names)
+tracker_tokenizer_initialize (sqlite3 *db,
+ TrackerDBInterface *interface,
+ TrackerDBManagerFlags flags,
+ const gchar **property_names)
{
TrackerTokenizerData *data;
TrackerTokenizerFunctionData *func_data;
@@ -454,7 +443,7 @@ tracker_tokenizer_initialize (sqlite3 *db,
if (!api)
return FALSE;
- data = tracker_tokenizer_data_new ();
+ data = tracker_tokenizer_data_new (flags);
tokenizer = (fts5_tokenizer *) &tracker_tokenizer_module;
api->xCreateTokenizer (api, "TrackerTokenizer", data, tokenizer,
tracker_tokenizer_data_free);
diff --git a/src/libtracker-fts/tracker-fts-tokenizer.h b/src/libtracker-fts/tracker-fts-tokenizer.h
index 457476c5d..0065167a0 100644
--- a/src/libtracker-fts/tracker-fts-tokenizer.h
+++ b/src/libtracker-fts/tracker-fts-tokenizer.h
@@ -21,13 +21,14 @@
#include <sqlite3.h>
#include <glib.h>
-#include <libtracker-data/tracker-db-interface.h>
+#include <libtracker-data/tracker-db-manager.h>
#ifndef __TRACKER_FTS_TOKENIZER_H__
#define __TRACKER_FTS_TOKENIZER_H__
-gboolean tracker_tokenizer_initialize (sqlite3 *db,
- TrackerDBInterface *interface,
- const gchar **property_names);
+gboolean tracker_tokenizer_initialize (sqlite3 *db,
+ TrackerDBInterface *interface,
+ TrackerDBManagerFlags flags,
+ const gchar **property_names);
#endif /* __TRACKER_FTS_TOKENIZER_H__ */
diff --git a/src/libtracker-fts/tracker-fts.c b/src/libtracker-fts/tracker-fts.c
index 4abc14121..15743b00e 100644
--- a/src/libtracker-fts/tracker-fts.c
+++ b/src/libtracker-fts/tracker-fts.c
@@ -59,9 +59,10 @@ get_fts_properties (GHashTable *tables)
}
gboolean
-tracker_fts_init_db (sqlite3 *db,
- TrackerDBInterface *interface,
- GHashTable *tables)
+tracker_fts_init_db (sqlite3 *db,
+ TrackerDBInterface *interface,
+ TrackerDBManagerFlags flags,
+ GHashTable *tables)
{
gchar **property_names;
gboolean retval;
@@ -75,7 +76,7 @@ tracker_fts_init_db (sqlite3 *db,
#endif
property_names = get_fts_properties (tables);
- retval = tracker_tokenizer_initialize (db, interface, (const gchar **) property_names);
+ retval = tracker_tokenizer_initialize (db, interface, flags, (const gchar **) property_names);
g_strfreev (property_names);
return retval;
diff --git a/src/libtracker-fts/tracker-fts.h b/src/libtracker-fts/tracker-fts.h
index e8d3a91f9..059f81823 100644
--- a/src/libtracker-fts/tracker-fts.h
+++ b/src/libtracker-fts/tracker-fts.h
@@ -25,13 +25,14 @@
#include <sqlite3.h>
#include <glib.h>
-#include <libtracker-data/tracker-db-interface.h>
+#include <libtracker-data/tracker-db-manager.h>
G_BEGIN_DECLS
-gboolean tracker_fts_init_db (sqlite3 *db,
- TrackerDBInterface *interface,
- GHashTable *tables);
+gboolean tracker_fts_init_db (sqlite3 *db,
+ TrackerDBInterface *interface,
+ TrackerDBManagerFlags flags,
+ GHashTable *tables);
gboolean tracker_fts_create_table (sqlite3 *db,
const gchar *database,
gchar *table_name,
diff --git a/src/libtracker-sparql/tracker-connection.h b/src/libtracker-sparql/tracker-connection.h
index 61c2161e7..01cd9f076 100644
--- a/src/libtracker-sparql/tracker-connection.h
+++ b/src/libtracker-sparql/tracker-connection.h
@@ -31,12 +31,20 @@
* TrackerSparqlConnectionFlags:
* @TRACKER_SPARQL_CONNECTION_FLAGS_NONE: No flags.
* @TRACKER_SPARQL_CONNECTION_FLAGS_READONLY: Connection is readonly.
+ * @TRACKER_SPARQL_CONNECTION_FLAGS_FTS_ENABLE_STEMMER: Word stemming is applied to FTS search terms.
+ * @TRACKER_SPARQL_CONNECTION_FLAGS_FTS_ENABLE_UNACCENT: Unaccenting is applied to FTS search terms.
+ * @TRACKER_SPARQL_CONNECTION_FLAGS_FTS_ENABLE_STOP_WORDS: FTS Search terms are filtered through a stop word
list.
+ * @TRACKER_SPARQL_CONNECTION_FLAGS_FTS_IGNORE_NUMBERS: Ignore numbers in FTS search terms.
*
* Connection flags to modify #TrackerSparqlConnection behavior.
*/
typedef enum {
- TRACKER_SPARQL_CONNECTION_FLAGS_NONE = 0,
- TRACKER_SPARQL_CONNECTION_FLAGS_READONLY = 1 << 0,
+ TRACKER_SPARQL_CONNECTION_FLAGS_NONE = 0,
+ TRACKER_SPARQL_CONNECTION_FLAGS_READONLY = 1 << 0,
+ TRACKER_SPARQL_CONNECTION_FLAGS_FTS_ENABLE_STEMMER = 1 << 1,
+ TRACKER_SPARQL_CONNECTION_FLAGS_FTS_ENABLE_UNACCENT = 1 << 2,
+ TRACKER_SPARQL_CONNECTION_FLAGS_FTS_ENABLE_STOP_WORDS = 1 << 3,
+ TRACKER_SPARQL_CONNECTION_FLAGS_FTS_IGNORE_NUMBERS = 1 << 4,
} TrackerSparqlConnectionFlags;
/**
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]