[tracker/fts4-fixes: 1/4] fts: remove word length limits in the tokenizer



commit 0347ed1acab86cf2179e1dba7394aa4871610085
Author: Carlos Garnacho <carlos lanedo com>
Date:   Thu Feb 14 17:41:53 2013 +0100

    fts: remove word length limits in the tokenizer
    
    Now all words are indexed regardless of length, the exception being
    as always words in the stop words list, which would filter most
    common/unhelpful words out of the database.

 src/libtracker-fts/tracker-fts-tokenizer.c |    4 +---
 1 files changed, 1 insertions(+), 3 deletions(-)
---
diff --git a/src/libtracker-fts/tracker-fts-tokenizer.c b/src/libtracker-fts/tracker-fts-tokenizer.c
index 2e12326..2f1f870 100644
--- a/src/libtracker-fts/tracker-fts-tokenizer.c
+++ b/src/libtracker-fts/tracker-fts-tokenizer.c
@@ -34,7 +34,6 @@ typedef struct TrackerCursor TrackerCursor;
 struct TrackerTokenizer {
   sqlite3_tokenizer base;
   TrackerLanguage *language;
-  int max_word_length;
   int max_words;
   gboolean enable_stemmer;
   gboolean enable_unaccent;
@@ -70,7 +69,6 @@ static int trackerCreate(
 
   config = tracker_fts_config_new ();
 
-  p->max_word_length = tracker_fts_config_get_max_word_length (config);
   p->enable_stemmer = tracker_fts_config_get_enable_stemmer (config);
   p->enable_unaccent = tracker_fts_config_get_enable_unaccent (config);
   p->ignore_numbers = tracker_fts_config_get_ignore_numbers (config);
@@ -121,7 +119,7 @@ static int trackerOpen(
 
   parser = tracker_parser_new (p->language);
   tracker_parser_reset (parser, zInput, nInput,
-                       p->max_word_length,
+                       G_MAXUINT,
                        p->enable_stemmer,
                        p->enable_unaccent,
                        p->ignore_stop_words,


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]