[tracker] Fixes GB#560220: New FTS config option to enable/disable unaccenting



commit 2145eeb70fbe860371b8f7a020b7733b951f8027
Author: Aleksander Morgado <aleksander lanedo com>
Date:   Mon May 24 17:50:59 2010 +0200

    Fixes GB#560220: New FTS config option to enable/disable unaccenting
    
    	* Interactive tracker-parser tester also modified to read the
    	proper configuration values from tracker-fts.cfg

 docs/manpages/tracker-fts.cfg.5                  |    4 ++
 src/libtracker-fts/tracker-fts-config.c          |   44 ++++++++++++++++++
 src/libtracker-fts/tracker-fts-config.h          |    3 +
 src/libtracker-fts/tracker-fts.c                 |    8 +++
 src/libtracker-fts/tracker-parser-glib.c         |    5 ++-
 src/libtracker-fts/tracker-parser-libicu.c       |    5 ++-
 src/libtracker-fts/tracker-parser-libunistring.c |    5 ++-
 src/libtracker-fts/tracker-parser.h              |    1 +
 tests/libtracker-fts/tracker-parser-test.c       |   54 ++++++++++++++--------
 tests/libtracker-fts/tracker-parser.c            |   30 +++++-------
 10 files changed, 119 insertions(+), 40 deletions(-)
---
diff --git a/docs/manpages/tracker-fts.cfg.5 b/docs/manpages/tracker-fts.cfg.5
index 93587cf..efc8987 100644
--- a/docs/manpages/tracker-fts.cfg.5
+++ b/docs/manpages/tracker-fts.cfg.5
@@ -28,6 +28,10 @@ Set to true if stemming should be applied to each word. Stemming is the process
 for reducing inflected and derived words to their stem, base or root form.
 
 .TP
+.B EnableUnaccent=true
+Set to true if combining diacritical marks should be removed from each word.
+
+.TP
 .B IgnoreNumbers=true
 Set to true if words starting with numbers should be ignored.
 
diff --git a/src/libtracker-fts/tracker-fts-config.c b/src/libtracker-fts/tracker-fts-config.c
index 6d53bc4..f3b1faa 100644
--- a/src/libtracker-fts/tracker-fts-config.c
+++ b/src/libtracker-fts/tracker-fts-config.c
@@ -41,12 +41,14 @@
 #define DEFAULT_IGNORE_NUMBERS       TRUE
 #define DEFAULT_IGNORE_STOP_WORDS    TRUE
 #define DEFAULT_ENABLE_STEMMER       FALSE  /* As per GB#526346, disabled */
+#define DEFAULT_ENABLE_UNACCENT      TRUE
 
 typedef struct {
 	/* Indexing */
 	gint min_word_length;
 	gint max_word_length;
 	gboolean enable_stemmer;
+	gboolean enable_unaccent;
 	gboolean ignore_numbers;
 	gboolean ignore_stop_words;
 	gint max_words_to_index;
@@ -81,6 +83,7 @@ enum {
 	PROP_MIN_WORD_LENGTH,
 	PROP_MAX_WORD_LENGTH,
 	PROP_ENABLE_STEMMER,
+	PROP_ENABLE_UNACCENT,
 	PROP_IGNORE_NUMBERS,
 	PROP_IGNORE_STOP_WORDS,
 
@@ -92,6 +95,7 @@ static ObjectToKeyFile conversions[] = {
 	{ G_TYPE_INT,     "min-word-length",    GROUP_INDEXING, "MinWordLength"   },
 	{ G_TYPE_INT,     "max-word-length",    GROUP_INDEXING, "MaxWordLength"   },
 	{ G_TYPE_BOOLEAN, "enable-stemmer",     GROUP_INDEXING, "EnableStemmer"   },
+	{ G_TYPE_BOOLEAN, "enable-unaccent",    GROUP_INDEXING, "EnableUnaccent"  },
 	{ G_TYPE_BOOLEAN, "ignore-numbers",     GROUP_INDEXING, "IgnoreNumbers"   },
 	{ G_TYPE_BOOLEAN, "ignore-stop-words",  GROUP_INDEXING, "IgnoreStopWords" },
 	{ G_TYPE_INT,     "max-words-to-index", GROUP_INDEXING, "MaxWordsToIndex" },
@@ -136,6 +140,13 @@ tracker_fts_config_class_init (TrackerFTSConfigClass *klass)
 	                                                       DEFAULT_ENABLE_STEMMER,
 	                                                       G_PARAM_READWRITE | G_PARAM_CONSTRUCT));
 	g_object_class_install_property (object_class,
+	                                 PROP_ENABLE_UNACCENT,
+	                                 g_param_spec_boolean ("enable-unaccent",
+	                                                       "Enable Unaccent",
+	                                                       " Flag to enable word unaccenting (default=TRUE)",
+	                                                       DEFAULT_ENABLE_UNACCENT,
+	                                                       G_PARAM_READWRITE | G_PARAM_CONSTRUCT));
+	g_object_class_install_property (object_class,
 	                                 PROP_IGNORE_NUMBERS,
 	                                 g_param_spec_boolean ("ignore-numbers",
 	                                                       "Ignore numbers",
@@ -187,6 +198,10 @@ config_set_property (GObject      *object,
 		tracker_fts_config_set_enable_stemmer (TRACKER_FTS_CONFIG (object),
 		                                       g_value_get_boolean (value));
 		break;
+	case PROP_ENABLE_UNACCENT:
+		tracker_fts_config_set_enable_unaccent (TRACKER_FTS_CONFIG (object),
+		                                        g_value_get_boolean (value));
+		break;
 	case PROP_IGNORE_NUMBERS:
 		tracker_fts_config_set_ignore_numbers (TRACKER_FTS_CONFIG (object),
 		                                       g_value_get_boolean (value));
@@ -227,6 +242,9 @@ config_get_property (GObject    *object,
 	case PROP_ENABLE_STEMMER:
 		g_value_set_boolean (value, priv->enable_stemmer);
 		break;
+	case PROP_ENABLE_UNACCENT:
+		g_value_set_boolean (value, priv->enable_unaccent);
+		break;
 	case PROP_IGNORE_NUMBERS:
 		g_value_set_boolean (value, priv->ignore_numbers);
 		break;
@@ -449,6 +467,18 @@ tracker_fts_config_get_enable_stemmer (TrackerFTSConfig *config)
 }
 
 gboolean
+tracker_fts_config_get_enable_unaccent (TrackerFTSConfig *config)
+{
+	TrackerFTSConfigPrivate *priv;
+
+	g_return_val_if_fail (TRACKER_IS_FTS_CONFIG (config), DEFAULT_ENABLE_UNACCENT);
+
+	priv = TRACKER_FTS_CONFIG_GET_PRIVATE (config);
+
+	return priv->enable_unaccent;
+}
+
+gboolean
 tracker_fts_config_get_ignore_numbers (TrackerFTSConfig *config)
 {
 	TrackerFTSConfigPrivate *priv;
@@ -535,6 +565,20 @@ tracker_fts_config_set_enable_stemmer (TrackerFTSConfig *config,
 }
 
 void
+tracker_fts_config_set_enable_unaccent (TrackerFTSConfig *config,
+					gboolean          value)
+{
+	TrackerFTSConfigPrivate *priv;
+
+	g_return_if_fail (TRACKER_IS_FTS_CONFIG (config));
+
+	priv = TRACKER_FTS_CONFIG_GET_PRIVATE (config);
+
+	priv->enable_unaccent = value;
+	g_object_notify (G_OBJECT (config), "enable-unaccent");
+}
+
+void
 tracker_fts_config_set_ignore_numbers (TrackerFTSConfig *config,
                                        gboolean          value)
 {
diff --git a/src/libtracker-fts/tracker-fts-config.h b/src/libtracker-fts/tracker-fts-config.h
index aabb71a..de75fb8 100644
--- a/src/libtracker-fts/tracker-fts-config.h
+++ b/src/libtracker-fts/tracker-fts-config.h
@@ -51,6 +51,7 @@ gboolean          tracker_fts_config_save                   (TrackerFTSConfig *c
 gint              tracker_fts_config_get_min_word_length    (TrackerFTSConfig *config);
 gint              tracker_fts_config_get_max_word_length    (TrackerFTSConfig *config);
 gboolean          tracker_fts_config_get_enable_stemmer     (TrackerFTSConfig *config);
+gboolean          tracker_fts_config_get_enable_unaccent    (TrackerFTSConfig *config);
 gboolean          tracker_fts_config_get_ignore_numbers     (TrackerFTSConfig *config);
 gboolean          tracker_fts_config_get_ignore_stop_words  (TrackerFTSConfig *config);
 gint              tracker_fts_config_get_max_words_to_index (TrackerFTSConfig *config);
@@ -60,6 +61,8 @@ void              tracker_fts_config_set_max_word_length    (TrackerFTSConfig *c
                                                              gint              value);
 void              tracker_fts_config_set_enable_stemmer     (TrackerFTSConfig *config,
                                                              gboolean          value);
+void              tracker_fts_config_set_enable_unaccent    (TrackerFTSConfig *config,
+                                                             gboolean          value);
 void              tracker_fts_config_set_ignore_numbers     (TrackerFTSConfig *config,
                                                              gboolean          value);
 void              tracker_fts_config_set_ignore_stop_words  (TrackerFTSConfig *config,
diff --git a/src/libtracker-fts/tracker-fts.c b/src/libtracker-fts/tracker-fts.c
index e6a8326..3f42bcd 100644
--- a/src/libtracker-fts/tracker-fts.c
+++ b/src/libtracker-fts/tracker-fts.c
@@ -2331,6 +2331,7 @@ struct fulltext_vtab {
   int nColumn;			   /* number of columns in virtual table */
   TrackerParser *parser;	   /* tokenizer for inserts and queries */
   gboolean enable_stemmer;
+  gboolean enable_unaccent;
   gboolean ignore_numbers;
   gboolean ignore_stop_words;
   int max_words;
@@ -3372,6 +3373,7 @@ static int constructVtab(
   min_len = tracker_fts_config_get_min_word_length (config);
   max_len = tracker_fts_config_get_max_word_length (config);
   v->enable_stemmer = tracker_fts_config_get_enable_stemmer (config);
+  v->enable_unaccent = tracker_fts_config_get_enable_unaccent (config);
   v->ignore_numbers = tracker_fts_config_get_ignore_numbers (config);
 
   /* disable stop words if TRACKER_FTS_STOP_WORDS is set to 0 - used by tests
@@ -3397,6 +3399,9 @@ static int constructVtab(
   g_object_set_qdata_full (object, quark_fulltext_vtab, v,
                            (GDestroyNotify) fulltext_vtab_destroy);
 
+  /* Config no longer needed */
+  g_object_unref (config);
+
   return SQLITE_OK;
 }
 
@@ -3676,6 +3681,7 @@ static void snippetOffsetsOfColumn(
                         zDoc,
                         nDoc,
                         pVtab->enable_stemmer,
+                        pVtab->enable_unaccent,
                         pVtab->ignore_stop_words,
                         TRUE,
                         pVtab->ignore_numbers);
@@ -4379,6 +4385,7 @@ static int tokenizeSegment(
                         pSegment,
                         nSegment,
                         v->enable_stemmer,
+                        v->enable_unaccent,
                         v->ignore_stop_words,
                         FALSE,
                         v->ignore_numbers);
@@ -4838,6 +4845,7 @@ int Catid,
                         zText,
                         strlen (zText),
                         v->enable_stemmer,
+                        v->enable_unaccent,
                         v->ignore_stop_words,
                         TRUE,
                         v->ignore_numbers);
diff --git a/src/libtracker-fts/tracker-parser-glib.c b/src/libtracker-fts/tracker-parser-glib.c
index fd7d1bd..2c324bb 100644
--- a/src/libtracker-fts/tracker-parser-glib.c
+++ b/src/libtracker-fts/tracker-parser-glib.c
@@ -72,6 +72,7 @@ struct TrackerParser {
 
 	TrackerLanguage       *language;
 	gboolean               enable_stemmer;
+	gboolean               enable_unaccent;
 	gboolean               ignore_stop_words;
 	guint                  max_word_length;
 	gboolean               ignore_reserved_words;
@@ -456,6 +457,7 @@ tracker_parser_reset (TrackerParser *parser,
                       const gchar   *txt,
                       gint           txt_size,
                       gboolean       enable_stemmer,
+                      gboolean       enable_unaccent,
                       gboolean       ignore_stop_words,
                       gboolean       ignore_reserved_words,
                       gboolean       ignore_numbers)
@@ -470,6 +472,7 @@ tracker_parser_reset (TrackerParser *parser,
 	parser->encoding = get_encoding (txt);
 
 	parser->enable_stemmer = enable_stemmer;
+	parser->enable_unaccent = enable_unaccent;
 	parser->ignore_stop_words = ignore_stop_words;
 
 	parser->txt_size = txt_size;
@@ -533,7 +536,7 @@ tracker_parser_process_word (TrackerParser *parser,
 		tracker_parser_message_hex ("ORIGINAL word",
 		                            word, bytes);
 
-		if (do_strip) {
+		if (parser->enable_unaccent && do_strip) {
 			stripped_word = tracker_parser_unaccent_utf8_word (word,
 			                                                   bytes,
 			                                                   &len);
diff --git a/src/libtracker-fts/tracker-parser-libicu.c b/src/libtracker-fts/tracker-parser-libicu.c
index d3fdda4..4814281 100644
--- a/src/libtracker-fts/tracker-parser-libicu.c
+++ b/src/libtracker-fts/tracker-parser-libicu.c
@@ -55,6 +55,7 @@ struct TrackerParser {
 
 	TrackerLanguage       *language;
 	gboolean               enable_stemmer;
+	gboolean               enable_unaccent;
 	guint                  max_word_length;
 	gboolean               ignore_stop_words;
 	gboolean               ignore_reserved_words;
@@ -318,6 +319,7 @@ tracker_parser_reset (TrackerParser *parser,
                       const gchar   *txt,
                       gint           txt_size,
                       gboolean       enable_stemmer,
+                      gboolean       enable_unaccent,
                       gboolean       ignore_stop_words,
                       gboolean       ignore_reserved_words,
                       gboolean       ignore_numbers)
@@ -331,6 +333,7 @@ tracker_parser_reset (TrackerParser *parser,
 	g_return_if_fail (txt != NULL);
 
 	parser->enable_stemmer = enable_stemmer;
+	parser->enable_unaccent = enable_unaccent;
 	parser->ignore_stop_words = ignore_stop_words;
 
 	parser->txt_size = txt_size;
@@ -486,7 +489,7 @@ process_word_uchar (TrackerParser         *parser,
 	}
 
 	/* UNAC stripping needed? (for non-CJK and non-ASCII) */
-	if (type == TRACKER_PARSER_WORD_TYPE_OTHER_UNAC) {
+	if (parser->enable_unaccent && type == TRACKER_PARSER_WORD_TYPE_OTHER_UNAC) {
 		gsize stripped_word_length;
 
 		/* Get unaccented string in UTF-8 */
diff --git a/src/libtracker-fts/tracker-parser-libunistring.c b/src/libtracker-fts/tracker-parser-libunistring.c
index a5fe3ab..02b89a9 100644
--- a/src/libtracker-fts/tracker-parser-libunistring.c
+++ b/src/libtracker-fts/tracker-parser-libunistring.c
@@ -54,6 +54,7 @@ struct TrackerParser {
 
 	TrackerLanguage       *language;
 	gboolean               enable_stemmer;
+	gboolean               enable_unaccent;
 	guint                  max_word_length;
 	gboolean               ignore_stop_words;
 	gboolean               ignore_reserved_words;
@@ -277,6 +278,7 @@ tracker_parser_reset (TrackerParser *parser,
                       const gchar   *txt,
                       gint           txt_size,
                       gboolean       enable_stemmer,
+                      gboolean       enable_unaccent,
                       gboolean       ignore_stop_words,
                       gboolean       ignore_reserved_words,
                       gboolean       ignore_numbers)
@@ -285,6 +287,7 @@ tracker_parser_reset (TrackerParser *parser,
 	g_return_if_fail (txt != NULL);
 
 	parser->enable_stemmer = enable_stemmer;
+	parser->enable_unaccent = enable_unaccent;
 	parser->ignore_stop_words = ignore_stop_words;
 
 	parser->txt_size = txt_size;
@@ -407,7 +410,7 @@ process_word_utf8 (TrackerParser         *parser,
 	normalized[new_word_length] = '\0';
 
 	/* UNAC stripping needed? (for non-CJK and non-ASCII) */
-	if (type == TRACKER_PARSER_WORD_TYPE_OTHER_UNAC) {
+	if (parser->enable_unaccent && type == TRACKER_PARSER_WORD_TYPE_OTHER_UNAC) {
 		gsize stripped_word_length;
 
 		stripped = tracker_parser_unaccent_utf8_word (normalized,
diff --git a/src/libtracker-fts/tracker-parser.h b/src/libtracker-fts/tracker-parser.h
index 21ab427..b84d534 100644
--- a/src/libtracker-fts/tracker-parser.h
+++ b/src/libtracker-fts/tracker-parser.h
@@ -36,6 +36,7 @@ void           tracker_parser_reset           (TrackerParser   *parser,
                                                const gchar     *txt,
                                                gint             txt_size,
                                                gboolean         enable_stemmer,
+                                               gboolean         enable_unaccent,
                                                gboolean         ignore_stop_words,
                                                gboolean         ignore_reserved_words,
                                                gboolean         ignore_numbers);
diff --git a/tests/libtracker-fts/tracker-parser-test.c b/tests/libtracker-fts/tracker-parser-test.c
index 47edcbf..8975f41 100644
--- a/tests/libtracker-fts/tracker-parser-test.c
+++ b/tests/libtracker-fts/tracker-parser-test.c
@@ -53,6 +53,7 @@ typedef struct {
 	/* Default parser configuration to use */
 	gint              max_word_length;
 	gboolean          enable_stemmer;
+	gboolean          enable_unaccent;
 	gboolean          ignore_stop_words;
 	gboolean          ignore_reserved_words;
 	gboolean          ignore_numbers;
@@ -77,6 +78,7 @@ test_common_setup (TrackerParserTestFixture *fixture,
 	/* Default conf parameters */
 	fixture->max_word_length = 50;
 	fixture->enable_stemmer = TRUE;
+	fixture->enable_unaccent = TRUE;
 	fixture->ignore_stop_words = TRUE;
 	fixture->ignore_reserved_words = TRUE;
 	fixture->ignore_numbers = TRUE;
@@ -131,6 +133,7 @@ expected_nwords_check (TrackerParserTestFixture *fixture,
 	                      testdata->str,
 	                      strlen (testdata->str),
 	                      fixture->enable_stemmer,
+	                      fixture->enable_unaccent,
 	                      fixture->ignore_stop_words,
 	                      fixture->ignore_reserved_words,
 	                      testdata->ignore_numbers);
@@ -157,6 +160,7 @@ struct TestDataExpectedWord {
 	const gchar  *str;
 	const gchar  *expected;
 	gboolean      enable_stemmer;
+	gboolean      enable_unaccent;
 };
 
 /* Common expected_word test method */
@@ -177,6 +181,7 @@ expected_word_check (TrackerParserTestFixture *fixture,
 	                      testdata->str,
 	                      strlen (testdata->str),
 	                      testdata->enable_stemmer,
+	                      testdata->enable_unaccent,
 	                      fixture->ignore_stop_words,
 	                      fixture->ignore_reserved_words,
 	                      fixture->ignore_numbers);
@@ -198,38 +203,49 @@ expected_word_check (TrackerParserTestFixture *fixture,
 #ifdef HAVE_UNAC
 /* Normalization-related tests (unaccenting) */
 static const TestDataExpectedWord test_data_normalization[] = {
-	{ "école",                "ecole", FALSE },
-	{ "Ã?COLE",                "ecole", FALSE },
-	{ "Ã?cole",                "ecole", FALSE },
+	{ "école",                "ecole", FALSE, TRUE  },
+	{ "Ã?COLE",                "ecole", FALSE, TRUE  },
+	{ "Ã?cole",                "ecole", FALSE, TRUE  },
 #ifdef FULL_UNICODE_TESTS /* glib/pango doesn't like NFD strings */
-	{ "e" "\xCC\x81" "cole",  "ecole", FALSE },
-	{ "E" "\xCC\x81" "COLE",  "ecole", FALSE },
-	{ "E" "\xCC\x81" "cole",  "ecole", FALSE },
+	{ "e" "\xCC\x81" "cole",  "ecole", FALSE, TRUE  },
+	{ "E" "\xCC\x81" "COLE",  "ecole", FALSE, TRUE  },
+	{ "E" "\xCC\x81" "cole",  "ecole", FALSE, TRUE  },
 #endif
-	{ NULL,                   NULL,    FALSE }
+	{ NULL,                   NULL,    FALSE, FALSE }
 };
 
 /* Unaccenting-related tests */
 static const TestDataExpectedWord test_data_unaccent[] = {
-	{ "Murciélago", "murcielago", FALSE },
-	{ "camión",     "camion",     FALSE },
-	{ "desagüe",    "desague",    FALSE },
-	{ NULL,         NULL,         FALSE }
+	{ "Murciélago", "murcielago", FALSE, TRUE  },
+	{ "camión",     "camion",     FALSE, TRUE  },
+	{ "desagüe",    "desague",    FALSE, TRUE  },
+	{ "Murciélago", "murciélago", FALSE, FALSE },
+	{ "camión",     "camión",     FALSE, FALSE },
+	{ "desagüe",    "desagüe",    FALSE, FALSE },
+	{ NULL,         NULL,         FALSE, FALSE }
 };
 #else
 /* Normalization-related tests (not unaccenting) */
 static const TestDataExpectedWord test_data_normalization[] = {
-	{ "école",                "école", FALSE },
-	{ "�COLE",                "école", FALSE },
-	{ "�cole",                "école", FALSE },
+	{ "école",                "école", FALSE, FALSE },
+	{ "�COLE",                "école", FALSE, FALSE },
+	{ "�cole",                "école", FALSE, FALSE },
 #ifdef FULL_UNICODE_TESTS /* glib/pango doesn't like NFD strings */
-	{ "e" "\xCC\x81" "cole",  "école", FALSE },
-	{ "E" "\xCC\x81" "COLE",  "école", FALSE },
-	{ "E" "\xCC\x81" "cole",  "école", FALSE },
+	{ "e" "\xCC\x81" "cole",  "école", FALSE, FALSE },
+	{ "E" "\xCC\x81" "COLE",  "école", FALSE, FALSE },
+	{ "E" "\xCC\x81" "cole",  "école", FALSE, FALSE },
 #endif
-	{ NULL,                   NULL,    FALSE }
-};
+	{ "école",                "école", FALSE, TRUE  },
+	{ "�COLE",                "école", FALSE, TRUE  },
+	{ "�cole",                "école", FALSE, TRUE  },
+#ifdef FULL_UNICODE_TESTS /* glib/pango doesn't like NFD strings */
+	{ "e" "\xCC\x81" "cole",  "école", FALSE, TRUE  },
+	{ "E" "\xCC\x81" "COLE",  "école", FALSE, TRUE  },
+	{ "E" "\xCC\x81" "cole",  "école", FALSE, TRUE  },
 #endif
+	{ NULL,                   NULL,    FALSE, FALSE }
+};
+#endif /* !HAVE_UNAC */
 
 /* Stemming-related tests */
 static const TestDataExpectedWord test_data_stemming[] = {
diff --git a/tests/libtracker-fts/tracker-parser.c b/tests/libtracker-fts/tracker-parser.c
index 1c38215..0aaf6c4 100644
--- a/tests/libtracker-fts/tracker-parser.c
+++ b/tests/libtracker-fts/tracker-parser.c
@@ -29,10 +29,6 @@
 #include <libtracker-fts/tracker-fts-config.h>
 #include <libtracker-common/tracker-common.h>
 
-
-#define DEFAULT_MAX_WORD_LENGTH 30
-
-static gint      max_word_length = DEFAULT_MAX_WORD_LENGTH;
 static gchar    *text;
 static gchar    *filename;
 static gboolean  verbose;
@@ -46,12 +42,6 @@ static const GOptionEntry options [] = {
 		NULL
 	},
 	{
-		"max-word-length", 'm', 0,
-		G_OPTION_ARG_INT, &max_word_length,
-		"Maximum word length to consider",
-		NULL
-	},
-	{
 		"text", 't', 0,
 		G_OPTION_ARG_STRING, &text,
 		"Specific text to parse",
@@ -119,13 +109,17 @@ load_file_contents (void)
 static gboolean
 run_parsing (void)
 {
+	TrackerFTSConfig *config;
 	TrackerLanguage *language;
-	TrackerParser   *parser;
-	GTimer          *timer;
+	TrackerParser *parser;
+	GTimer *timer;
 
 	/* Initialize timing */
 	timer = g_timer_new ();
 
+	/* Read config file */
+	config = tracker_fts_config_new ();
+
 	/* Setup language for parser */
 	language = tracker_language_new (NULL);
 	if (!language) {
@@ -135,22 +129,22 @@ run_parsing (void)
 
 	/* Create the parser */
 	parser = tracker_parser_new (language,
-	                             max_word_length);
+	                             tracker_fts_config_get_max_word_length (config));
 	if (!parser) {
 		g_printerr ("Parser creation failed!\n");
 		g_object_unref (language);
 		return FALSE;
 	}
 
-	/* Reset the parser with our string */
+	/* Reset the parser with our string, reading the current FTS config */
 	tracker_parser_reset (parser,
 	                      text,
 	                      strlen (text),
+	                      tracker_fts_config_get_enable_stemmer (config),
+	                      tracker_fts_config_get_enable_unaccent (config),
+	                      tracker_fts_config_get_ignore_stop_words (config),
 	                      TRUE,
-	                      TRUE,
-	                      TRUE,
-	                      TRUE);
-
+	                      tracker_fts_config_get_ignore_numbers (config));
 
 	/* Loop through all words! */
 	while (1) {



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]