[tracker/fts4] fts: Obey min-word-length configuration on the tokenizer



commit 4f7a807341429836f140918ff4a82ecb5b274f7c
Author: Carlos Garnacho <carlos lanedo com>
Date:   Tue Feb 12 18:59:34 2013 +0100

    fts: Obey min-word-length configuration on the tokenizer

 src/libtracker-fts/tracker-fts-tokenizer.c |    4 +++-
 1 files changed, 3 insertions(+), 1 deletions(-)
---
diff --git a/src/libtracker-fts/tracker-fts-tokenizer.c b/src/libtracker-fts/tracker-fts-tokenizer.c
index 2e12326..2dfc997 100644
--- a/src/libtracker-fts/tracker-fts-tokenizer.c
+++ b/src/libtracker-fts/tracker-fts-tokenizer.c
@@ -34,6 +34,7 @@ typedef struct TrackerCursor TrackerCursor;
 struct TrackerTokenizer {
   sqlite3_tokenizer base;
   TrackerLanguage *language;
+  int min_word_length;
   int max_word_length;
   int max_words;
   gboolean enable_stemmer;
@@ -70,6 +71,7 @@ static int trackerCreate(
 
   config = tracker_fts_config_new ();
 
+  p->min_word_length = tracker_fts_config_get_min_word_length (config);
   p->max_word_length = tracker_fts_config_get_max_word_length (config);
   p->enable_stemmer = tracker_fts_config_get_enable_stemmer (config);
   p->enable_unaccent = tracker_fts_config_get_enable_unaccent (config);
@@ -181,7 +183,7 @@ static int trackerNext(
     if (!pToken){
       return SQLITE_DONE;
     }
-  } while (stop_word && p->ignore_stop_words);
+  } while (stop_word && p->ignore_stop_words && len < p->min_word_length);
 
   *ppToken = pToken;
   *piStartOffset = start;


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]