[tracker/parser-unicode-libs-review] Fixes GB#491850 and GB#503366: new FTS configuration parameters



commit 9e3638d9440d385bc83ddca048259ee96d99f4c1
Author: Aleksander Morgado <aleksander lanedo com>
Date:   Tue May 18 17:53:40 2010 +0200

    Fixes GB#491850 and GB#503366: new FTS configuration parameters
    
    	* IgnoreNumbers   (default: TRUE)
    	* IgnoreStopWords (default: TRUE)

 docs/manpages/tracker-fts.cfg.5                  |    8 ++
 src/libtracker-fts/tracker-fts-config.c          |  116 +++++++++++++++++++++-
 src/libtracker-fts/tracker-fts-config.h          |    6 +
 src/libtracker-fts/tracker-fts.c                 |   41 ++++++--
 src/libtracker-fts/tracker-parser-glib.c         |   28 +++---
 src/libtracker-fts/tracker-parser-libicu.c       |   50 +++++-----
 src/libtracker-fts/tracker-parser-libunistring.c |   40 ++++----
 src/libtracker-fts/tracker-parser.h              |    6 +-
 8 files changed, 218 insertions(+), 77 deletions(-)
---
diff --git a/docs/manpages/tracker-fts.cfg.5 b/docs/manpages/tracker-fts.cfg.5
index 96dc11a..176fe54 100644
--- a/docs/manpages/tracker-fts.cfg.5
+++ b/docs/manpages/tracker-fts.cfg.5
@@ -23,6 +23,14 @@ Set the minimum length of words to index (0->30).
 Set the maximum length of words to index (0->200).
 
 .TP
+.B IgnoreNumbers=true
+Set to true if words starting with numbers should be ignored.
+
+.TP
+.B IgnoreStopWords=true
+Set to true if stop words should be ignored.
+
+.TP
 .B MaxWordsToIndex=10000
 Maximum unique words to index from a file's content.
 
diff --git a/src/libtracker-fts/tracker-fts-config.c b/src/libtracker-fts/tracker-fts-config.c
index 4c1a7d7..736d044 100644
--- a/src/libtracker-fts/tracker-fts-config.c
+++ b/src/libtracker-fts/tracker-fts-config.c
@@ -37,12 +37,16 @@
 /* Default values */
 #define DEFAULT_MIN_WORD_LENGTH            3      /* 0->30 */
 #define DEFAULT_MAX_WORD_LENGTH            30     /* 0->200 */
-#define DEFAULT_MAX_WORDS_TO_INDEX 10000
+#define DEFAULT_MAX_WORDS_TO_INDEX      10000
+#define DEFAULT_IGNORE_NUMBERS           TRUE
+#define DEFAULT_IGNORE_STOP_WORDS        TRUE
 
 typedef struct {
 	/* Indexing */
 	gint min_word_length;
 	gint max_word_length;
+	gboolean ignore_numbers;
+	gboolean ignore_stop_words;
 	gint max_words_to_index;
 }  TrackerFTSConfigPrivate;
 
@@ -74,6 +78,8 @@ enum {
 	/* Indexing */
 	PROP_MIN_WORD_LENGTH,
 	PROP_MAX_WORD_LENGTH,
+	PROP_IGNORE_NUMBERS,
+	PROP_IGNORE_STOP_WORDS,
 
 	/* Performance */
 	PROP_MAX_WORDS_TO_INDEX,
@@ -82,6 +88,8 @@ enum {
 static ObjectToKeyFile conversions[] = {
 	{ G_TYPE_INT,     "min-word-length",    GROUP_INDEXING, "MinWordLength"   },
 	{ G_TYPE_INT,     "max-word-length",    GROUP_INDEXING, "MaxWordLength"   },
+	{ G_TYPE_BOOLEAN, "ignore-numbers",     GROUP_INDEXING, "IgnoreNumbers"   },
+	{ G_TYPE_BOOLEAN, "ignore-stop-words",  GROUP_INDEXING, "IgnoreStopWords" },
 	{ G_TYPE_INT,     "max-words-to-index", GROUP_INDEXING, "MaxWordsToIndex" },
 };
 
@@ -117,6 +125,20 @@ tracker_fts_config_class_init (TrackerFTSConfigClass *klass)
 	                                                   DEFAULT_MAX_WORD_LENGTH,
 	                                                   G_PARAM_READWRITE | G_PARAM_CONSTRUCT));
 	g_object_class_install_property (object_class,
+	                                 PROP_IGNORE_NUMBERS,
+	                                 g_param_spec_boolean ("ignore-numbers",
+	                                                       "Ignore numbers",
+	                                                       " Flag to ignore numbers in FTS (default: TRUE)",
+	                                                       DEFAULT_IGNORE_NUMBERS,
+	                                                       G_PARAM_READWRITE | G_PARAM_CONSTRUCT));
+	g_object_class_install_property (object_class,
+	                                 PROP_IGNORE_STOP_WORDS,
+	                                 g_param_spec_boolean ("ignore-stop-words",
+	                                                       "Ignore stop words",
+	                                                       " Flag to ignore stop words in FTS (default: TRUE)",
+	                                                       DEFAULT_IGNORE_STOP_WORDS,
+	                                                       G_PARAM_READWRITE | G_PARAM_CONSTRUCT));
+	g_object_class_install_property (object_class,
 	                                 PROP_MAX_WORDS_TO_INDEX,
 	                                 g_param_spec_int ("max-words-to-index",
 	                                                   "Maximum words to index",
@@ -138,7 +160,7 @@ static void
 config_set_property (GObject      *object,
                      guint         param_id,
                      const GValue *value,
-                     GParamSpec           *pspec)
+                     GParamSpec   *pspec)
 {
 	switch (param_id) {
 		/* Indexing */
@@ -150,6 +172,14 @@ config_set_property (GObject      *object,
 		tracker_fts_config_set_max_word_length (TRACKER_FTS_CONFIG (object),
 		                                        g_value_get_int (value));
 		break;
+	case PROP_IGNORE_NUMBERS:
+		tracker_fts_config_set_ignore_numbers (TRACKER_FTS_CONFIG (object),
+		                                       g_value_get_boolean (value));
+		break;
+	case PROP_IGNORE_STOP_WORDS:
+		tracker_fts_config_set_ignore_stop_words (TRACKER_FTS_CONFIG (object),
+		                                          g_value_get_boolean (value));
+		break;
 	case PROP_MAX_WORDS_TO_INDEX:
 		tracker_fts_config_set_max_words_to_index (TRACKER_FTS_CONFIG (object),
 		                                           g_value_get_int (value));
@@ -179,6 +209,12 @@ config_get_property (GObject    *object,
 	case PROP_MAX_WORD_LENGTH:
 		g_value_set_int (value, priv->max_word_length);
 		break;
+	case PROP_IGNORE_NUMBERS:
+		g_value_set_boolean (value, priv->ignore_numbers);
+		break;
+	case PROP_IGNORE_STOP_WORDS:
+		g_value_set_boolean (value, priv->ignore_stop_words);
+		break;
 	case PROP_MAX_WORDS_TO_INDEX:
 		g_value_set_int (value, priv->max_words_to_index);
 		break;
@@ -235,7 +271,13 @@ config_create_with_defaults (TrackerFTSConfig *config,
 			                        tracker_keyfile_object_default_int (config,
 			                                                            conversions[i].property));
 			break;
-
+		case G_TYPE_BOOLEAN:
+			g_key_file_set_boolean (key_file,
+			                        conversions[i].group,
+			                        conversions[i].key,
+			                        tracker_keyfile_object_default_boolean (config,
+			                                                                conversions[i].property));
+			break;
 		default:
 			g_assert_not_reached ();
 			break;
@@ -279,7 +321,13 @@ config_load (TrackerFTSConfig *config)
 			                                 conversions[i].group,
 			                                 conversions[i].key);
 			break;
-
+		case G_TYPE_BOOLEAN:
+			tracker_keyfile_object_load_boolean (G_OBJECT (file),
+			                                     conversions[i].property,
+			                                     file->key_file,
+			                                     conversions[i].group,
+			                                     conversions[i].key);
+			break;
 		default:
 			g_assert_not_reached ();
 			break;
@@ -313,6 +361,14 @@ config_save (TrackerFTSConfig *config)
 			                                 conversions[i].key);
 			break;
 
+		case G_TYPE_BOOLEAN:
+			tracker_keyfile_object_save_boolean (file,
+			                                     conversions[i].property,
+			                                     file->key_file,
+			                                     conversions[i].group,
+			                                     conversions[i].key);
+			break;
+
 		default:
 			g_assert_not_reached ();
 			break;
@@ -362,6 +418,30 @@ tracker_fts_config_get_max_word_length (TrackerFTSConfig *config)
 	return priv->max_word_length;
 }
 
+gboolean
+tracker_fts_config_get_ignore_numbers (TrackerFTSConfig *config)
+{
+	TrackerFTSConfigPrivate *priv;
+
+	g_return_val_if_fail (TRACKER_IS_FTS_CONFIG (config), DEFAULT_IGNORE_NUMBERS);
+
+	priv = TRACKER_FTS_CONFIG_GET_PRIVATE (config);
+
+	return priv->ignore_numbers;
+}
+
+gboolean
+tracker_fts_config_get_ignore_stop_words (TrackerFTSConfig *config)
+{
+	TrackerFTSConfigPrivate *priv;
+
+	g_return_val_if_fail (TRACKER_IS_FTS_CONFIG (config), DEFAULT_IGNORE_STOP_WORDS);
+
+	priv = TRACKER_FTS_CONFIG_GET_PRIVATE (config);
+
+	return priv->ignore_stop_words;
+}
+
 gint
 tracker_fts_config_get_max_words_to_index (TrackerFTSConfig *config)
 {
@@ -411,6 +491,34 @@ tracker_fts_config_set_max_word_length (TrackerFTSConfig *config,
 }
 
 void
+tracker_fts_config_set_ignore_numbers (TrackerFTSConfig *config,
+                                       gboolean          value)
+{
+	TrackerFTSConfigPrivate *priv;
+
+	g_return_if_fail (TRACKER_IS_FTS_CONFIG (config));
+
+	priv = TRACKER_FTS_CONFIG_GET_PRIVATE (config);
+
+	priv->ignore_numbers = value;
+	g_object_notify (G_OBJECT (config), "ignore-numbers");
+}
+
+void
+tracker_fts_config_set_ignore_stop_words (TrackerFTSConfig *config,
+                                          gboolean          value)
+{
+	TrackerFTSConfigPrivate *priv;
+
+	g_return_if_fail (TRACKER_IS_FTS_CONFIG (config));
+
+	priv = TRACKER_FTS_CONFIG_GET_PRIVATE (config);
+
+	priv->ignore_stop_words = value;
+	g_object_notify (G_OBJECT (config), "ignore-stop-words");
+}
+
+void
 tracker_fts_config_set_max_words_to_index (TrackerFTSConfig *config,
                                            gint              value)
 {
diff --git a/src/libtracker-fts/tracker-fts-config.h b/src/libtracker-fts/tracker-fts-config.h
index f366eeb..9c83e35 100644
--- a/src/libtracker-fts/tracker-fts-config.h
+++ b/src/libtracker-fts/tracker-fts-config.h
@@ -50,11 +50,17 @@ TrackerFTSConfig *tracker_fts_config_new                    (void);
 gboolean          tracker_fts_config_save                   (TrackerFTSConfig *config);
 gint              tracker_fts_config_get_min_word_length    (TrackerFTSConfig *config);
 gint              tracker_fts_config_get_max_word_length    (TrackerFTSConfig *config);
+gboolean          tracker_fts_config_get_ignore_numbers     (TrackerFTSConfig *config);
+gboolean          tracker_fts_config_get_ignore_stop_words  (TrackerFTSConfig *config);
 gint              tracker_fts_config_get_max_words_to_index (TrackerFTSConfig *config);
 void              tracker_fts_config_set_min_word_length    (TrackerFTSConfig *config,
                                                              gint              value);
 void              tracker_fts_config_set_max_word_length    (TrackerFTSConfig *config,
                                                              gint              value);
+void              tracker_fts_config_set_ignore_numbers     (TrackerFTSConfig *config,
+                                                             gboolean          value);
+void              tracker_fts_config_set_ignore_stop_words  (TrackerFTSConfig *config,
+                                                             gboolean          value);
 void              tracker_fts_config_set_max_words_to_index (TrackerFTSConfig *config,
                                                              gint              value);
 
diff --git a/src/libtracker-fts/tracker-fts.c b/src/libtracker-fts/tracker-fts.c
index ab2b329..7e2fabf 100644
--- a/src/libtracker-fts/tracker-fts.c
+++ b/src/libtracker-fts/tracker-fts.c
@@ -2330,7 +2330,8 @@ struct fulltext_vtab {
   const char *zName;		   /* virtual table name */
   int nColumn;			   /* number of columns in virtual table */
   TrackerParser *parser;	   /* tokenizer for inserts and queries */
-  gboolean stop_words;
+  gboolean ignore_numbers;
+  gboolean ignore_stop_words;
   int max_words;
   int min_word_length;
 
@@ -3369,14 +3370,13 @@ static int constructVtab(
 
   min_len = tracker_fts_config_get_min_word_length (config);
   max_len = tracker_fts_config_get_max_word_length (config);
+  v->ignore_numbers = tracker_fts_config_get_ignore_numbers (config);
+  v->ignore_stop_words = tracker_fts_config_get_ignore_stop_words (config);
 
   v->max_words = tracker_fts_config_get_max_words_to_index (config);
   v->min_word_length = min_len;
   v->parser = tracker_parser_new (language, max_len);
 
-  /* disable stop words if TRACKER_FTS_STOP_WORDS is set to 0 - used by tests */
-  v->stop_words = g_strcmp0 (g_getenv ("TRACKER_FTS_STOP_WORDS"), "0") != 0;
-
   g_object_unref (language);
 
 
@@ -3666,7 +3666,14 @@ static void snippetOffsetsOfColumn(
   pVtab = pQuery->pFts;
   nColumn = pVtab->nColumn;
 
-  tracker_parser_reset (pVtab->parser, zDoc, nDoc, FALSE, TRUE, pVtab->stop_words, TRUE, TRUE);
+  tracker_parser_reset (pVtab->parser,
+                        zDoc,
+                        nDoc,
+                        FALSE,
+                        TRUE,
+                        pVtab->ignore_stop_words,
+                        TRUE,
+                        pVtab->ignore_numbers);
 
   aTerm = pQuery->pTerms;
   nTerm = pQuery->nTerms;
@@ -3690,7 +3697,7 @@ static void snippetOffsetsOfColumn(
 
     if (!zToken) break;
 
-    if (stop_word) {
+    if (pVtab->ignore_stop_words && stop_word) {
       continue;
     }
 
@@ -4363,7 +4370,14 @@ static int tokenizeSegment(
   int firstIndex = pQuery->nTerms;
   int nTerm = 1;
 
-  tracker_parser_reset (parser, pSegment, nSegment, FALSE, TRUE, v->stop_words, FALSE, TRUE);
+  tracker_parser_reset (parser,
+                        pSegment,
+                        nSegment,
+                        FALSE,
+                        TRUE,
+                        v->ignore_stop_words,
+                        FALSE,
+                        v->ignore_numbers);
 
   while( 1 ){
     const char *pToken;
@@ -4442,7 +4456,7 @@ static int tokenizeSegment(
       if (nToken < v->min_word_length) {
         continue;
       }
-      if (stop_word != 0) {
+      if (v->ignore_stop_words && stop_word) {
         continue;
       }
     }
@@ -4816,7 +4830,14 @@ int Catid,
 
   if (!zText) return SQLITE_OK;
 
-  tracker_parser_reset (parser, zText, strlen (zText), FALSE, TRUE, v->stop_words, TRUE, TRUE);
+  tracker_parser_reset (parser,
+                        zText,
+                        strlen (zText),
+                        FALSE,
+                        TRUE,
+                        v->ignore_stop_words,
+                        TRUE,
+                        v->ignore_numbers);
 
   while( 1 ){
 
@@ -4835,7 +4856,7 @@ int Catid,
 
   // printf("token being indexed  is %s, begin is %d, end is %d and length is %d\n", pToken, iStartOffset, iEndOffset, nTokenBytes);
 
-   if (stop_word) {
+   if (v->ignore_stop_words && stop_word) {
 	continue;
    }
 
diff --git a/src/libtracker-fts/tracker-parser-glib.c b/src/libtracker-fts/tracker-parser-glib.c
index a2144af..89a89b9 100644
--- a/src/libtracker-fts/tracker-parser-glib.c
+++ b/src/libtracker-fts/tracker-parser-glib.c
@@ -72,12 +72,12 @@ struct TrackerParser {
 
 	TrackerLanguage       *language;
 	gboolean               enable_stemmer;
-	gboolean               enable_stop_words;
+	gboolean               ignore_stop_words;
 	guint                  max_words_to_index;
 	guint                  max_word_length;
 	gboolean               delimit_words;
-	gboolean               skip_reserved_words;
-	gboolean               skip_numbers;
+	gboolean               ignore_reserved_words;
+	gboolean               ignore_numbers;
 
 	/* Private members */
 	gchar                   *word;
@@ -278,14 +278,14 @@ parser_next (TrackerParser *parser,
 				/* word break */
 
 				/* check if word is reserved */
-				if (is_valid && parser->skip_reserved_words) {
+				if (is_valid && parser->ignore_reserved_words) {
 					if (length == 2 && word[0] == 'o' && word[1] == 'r') {
 						is_valid = FALSE;
 					}
 				}
 
 				if (!is_valid ||
-				    (parser->skip_numbers && word_type == TRACKER_PARSER_WORD_NUM)) {
+				    (parser->ignore_numbers && word_type == TRACKER_PARSER_WORD_NUM)) {
 					word_type = TRACKER_PARSER_WORD_IGNORE;
 					is_valid = TRUE;
 					length = 0;
@@ -312,12 +312,12 @@ parser_next (TrackerParser *parser,
 			 * underscore if we are filtering.
 			 */
 
-			if (parser->skip_numbers && type == TRACKER_PARSER_WORD_NUM) {
+			if (parser->ignore_numbers && type == TRACKER_PARSER_WORD_NUM) {
 				is_valid = FALSE;
 				continue;
 			} else {
 				if (type == TRACKER_PARSER_WORD_HYPHEN) {
-					is_valid = !parser->skip_reserved_words;
+					is_valid = !parser->ignore_reserved_words;
 					continue;
 				}
 			}
@@ -462,9 +462,9 @@ tracker_parser_reset (TrackerParser *parser,
                       gint           txt_size,
                       gboolean       delimit_words,
                       gboolean       enable_stemmer,
-                      gboolean       enable_stop_words,
-                      gboolean       skip_reserved_words,
-                      gboolean       skip_numbers)
+                      gboolean       ignore_stop_words,
+                      gboolean       ignore_reserved_words,
+                      gboolean       ignore_numbers)
 {
 	g_return_if_fail (parser != NULL);
 	g_return_if_fail (txt != NULL);
@@ -476,13 +476,13 @@ tracker_parser_reset (TrackerParser *parser,
 	parser->encoding = get_encoding (txt);
 
 	parser->enable_stemmer = enable_stemmer;
-	parser->enable_stop_words = enable_stop_words;
+	parser->ignore_stop_words = ignore_stop_words;
 	parser->delimit_words = delimit_words;
 
 	parser->txt_size = txt_size;
 	parser->txt = txt;
-	parser->skip_reserved_words = skip_reserved_words;
-	parser->skip_numbers = skip_numbers;
+	parser->ignore_reserved_words = ignore_reserved_words;
+	parser->ignore_numbers = ignore_numbers;
 
 	g_free (parser->word);
 	parser->word = NULL;
@@ -618,7 +618,7 @@ tracker_parser_next (TrackerParser *parser,
 		}
 
 		if (str &&
-		    parser->enable_stop_words &&
+		    parser->ignore_stop_words &&
 		    tracker_language_is_stop_word (parser->language, str)) {
 			*stop_word = TRUE;
 		} else {
diff --git a/src/libtracker-fts/tracker-parser-libicu.c b/src/libtracker-fts/tracker-parser-libicu.c
index 190931c..2579231 100644
--- a/src/libtracker-fts/tracker-parser-libicu.c
+++ b/src/libtracker-fts/tracker-parser-libicu.c
@@ -44,12 +44,10 @@ typedef enum {
 /* Max possible length of a UChar encoded string (just a safety limit) */
 #define WORD_BUFFER_LENGTH 512
 
-
-static gchar *process_word_uchar (TrackerParser *parser,
-                                  const UChar   *word,
-                                  gint           length,
-                                  TrackerParserWordType type);
-
+static gchar *process_word_uchar (TrackerParser         *parser,
+                                  const UChar           *word,
+                                  gint                   length,
+                                  TrackerParserWordType  type);
 
 struct TrackerParser {
 	const gchar           *txt;
@@ -57,12 +55,12 @@ struct TrackerParser {
 
 	TrackerLanguage       *language;
 	gboolean               enable_stemmer;
-	gboolean               enable_stop_words;
 	guint                  max_words_to_index;
 	guint                  max_word_length;
 	gboolean               delimit_words;
-	gboolean               skip_reserved_words;
-	gboolean               skip_numbers;
+	gboolean               ignore_stop_words;
+	gboolean               ignore_reserved_words;
+	gboolean               ignore_numbers;
 
 	/* Private members */
 	gchar                 *word;
@@ -86,7 +84,7 @@ struct TrackerParser {
 static gboolean
 get_word_info (const UChar           *word,
                gsize                  word_length,
-               gboolean               skip_numbers,
+               gboolean               ignore_numbers,
                gboolean              *p_is_allowed_word_start,
                TrackerParserWordType *p_word_type)
 {
@@ -117,7 +115,7 @@ get_word_info (const UChar           *word,
 	    unichar_gc == U_MODIFIER_LETTER ||
 	    unichar_gc == U_OTHER_LETTER ||
 	    IS_UNDERSCORE_UCS4 ((guint32)unichar) ||
-	    (!skip_numbers &&
+	    (!ignore_numbers &&
 	     (unichar_gc == U_DECIMAL_DIGIT_NUMBER ||
 	      unichar_gc == U_LETTER_NUMBER ||
 	      unichar_gc == U_OTHER_NUMBER))) {
@@ -197,9 +195,9 @@ parser_next (TrackerParser *parser,
 		/* g_debug ("next_word_offset_utf8: %" G_GSIZE_FORMAT, next_word_offset_utf8); */
 		/* g_debug ("current_word_offset_utf8: %" G_GSIZE_FORMAT, current_word_offset_utf8); */
 
-		/* Skip the word if longer than the maximum allowed */
+		/* Ignore the word if longer than the maximum allowed */
 		if (word_length_utf8 >= parser->max_word_length) {
-			/* Skip this word and keep on looping */
+			/* Ignore this word and keep on looping */
 			parser->cursor = next_word_offset_uchar;
 			continue;
 		}
@@ -207,7 +205,7 @@ parser_next (TrackerParser *parser,
 		/* Get word info... */
 		if (!get_word_info (&parser->utxt[parser->cursor],
 		                    word_length_uchar,
-		                    parser->skip_numbers,
+		                    parser->ignore_numbers,
 		                    &is_allowed,
 		                    &type)) {
 			/* Quit loop just in case */
@@ -215,18 +213,18 @@ parser_next (TrackerParser *parser,
 			break;
 		}
 
-		/* Skip the word if not an allowed word start */
+		/* Ignore the word if not an allowed word start */
 		if (!is_allowed) {
-			/* Skip this word and keep on looping */
+			/* Ignore this word and keep on looping */
 			parser->cursor = next_word_offset_uchar;
 			continue;
 		}
 
 		/* check if word is reserved (looking at ORIGINAL UTF-8 buffer here! */
-		if (parser->skip_reserved_words &&
+		if (parser->ignore_reserved_words &&
 		    tracker_parser_is_reserved_word_utf8 (&parser->txt[current_word_offset_utf8],
 		                                          word_length_utf8)) {
-			/* Skip this word and keep on looping */
+			/* Ignore this word and keep on looping */
 			parser->cursor = next_word_offset_uchar;
 			continue;
 		}
@@ -248,7 +246,7 @@ parser_next (TrackerParser *parser,
 		                                     truncated_length,
 		                                     type);
 		if (!processed_word) {
-			/* Skip this word and keep on looping */
+			/* Ignore this word and keep on looping */
 			parser->cursor = next_word_offset_uchar;
 			continue;
 		}
@@ -325,9 +323,9 @@ tracker_parser_reset (TrackerParser *parser,
                       gint           txt_size,
                       gboolean       delimit_words,
                       gboolean       enable_stemmer,
-                      gboolean       enable_stop_words,
-                      gboolean       skip_reserved_words,
-                      gboolean       skip_numbers)
+                      gboolean       ignore_stop_words,
+                      gboolean       ignore_reserved_words,
+                      gboolean       ignore_numbers)
 {
 	UErrorCode error = U_ZERO_ERROR;
 	UConverter *converter;
@@ -338,13 +336,13 @@ tracker_parser_reset (TrackerParser *parser,
 	g_return_if_fail (txt != NULL);
 
 	parser->enable_stemmer = enable_stemmer;
-	parser->enable_stop_words = enable_stop_words;
+	parser->ignore_stop_words = ignore_stop_words;
 	parser->delimit_words = delimit_words;
 
 	parser->txt_size = txt_size;
 	parser->txt = txt;
-	parser->skip_reserved_words = skip_reserved_words;
-	parser->skip_numbers = skip_numbers;
+	parser->ignore_reserved_words = ignore_reserved_words;
+	parser->ignore_numbers = ignore_numbers;
 
 	g_free (parser->word);
 	parser->word = NULL;
@@ -619,7 +617,7 @@ tracker_parser_next (TrackerParser *parser,
 	}
 
 	if (str &&
-	    parser->enable_stop_words &&
+	    parser->ignore_stop_words &&
 	    tracker_language_is_stop_word (parser->language, str)) {
 		*stop_word = TRUE;
 	} else {
diff --git a/src/libtracker-fts/tracker-parser-libunistring.c b/src/libtracker-fts/tracker-parser-libunistring.c
index 4a6ff35..de3f03d 100644
--- a/src/libtracker-fts/tracker-parser-libunistring.c
+++ b/src/libtracker-fts/tracker-parser-libunistring.c
@@ -54,12 +54,12 @@ struct TrackerParser {
 
 	TrackerLanguage       *language;
 	gboolean               enable_stemmer;
-	gboolean               enable_stop_words;
 	guint                  max_words_to_index;
 	guint                  max_word_length;
 	gboolean               delimit_words;
-	gboolean               skip_reserved_words;
-	gboolean               skip_numbers;
+	gboolean               ignore_stop_words;
+	gboolean               ignore_reserved_words;
+	gboolean               ignore_numbers;
 
 	/* Private members */
 	gchar                   *word;
@@ -175,25 +175,25 @@ parser_next (TrackerParser *parser,
 			break;
 		}
 
-		/* Skip the word if not an allowed word start */
+		/* Ignore the word if not an allowed word start */
 		if (!is_allowed) {
-			/* Skip this word and keep on looping */
+			/* Ignore this word and keep on looping */
 			parser->cursor += word_length;
 			continue;
 		}
 
-		/* Skip the word if longer than the maximum allowed */
+		/* Ignore the word if longer than the maximum allowed */
 		if (word_length >= parser->max_word_length) {
-			/* Skip this word and keep on looping */
+			/* Ignore this word and keep on looping */
 			parser->cursor += word_length;
 			continue;
 		}
 
-		/* check if word is reserved and skip it if so */
-		if (parser->skip_reserved_words &&
+		/* check if word is reserved and ignore it if so */
+		if (parser->ignore_reserved_words &&
 		    tracker_parser_is_reserved_word_utf8 (&parser->txt[parser->cursor],
 		                                          word_length)) {
-			/* Skip this word and keep on looping */
+			/* Ignore this word and keep on looping */
 			parser->cursor += word_length;
 			continue;
 		}
@@ -212,7 +212,7 @@ parser_next (TrackerParser *parser,
 		                                    truncated_length,
 		                                    type);
 		if (!processed_word) {
-			/* Skip this word and keep on looping */
+			/* Ignore this word and keep on looping */
 			parser->cursor += word_length;
 			continue;
 		}
@@ -280,21 +280,21 @@ tracker_parser_reset (TrackerParser *parser,
                       gint           txt_size,
                       gboolean       delimit_words,
                       gboolean       enable_stemmer,
-                      gboolean       enable_stop_words,
-                      gboolean       skip_reserved_words,
-                      gboolean       skip_numbers)
+                      gboolean       ignore_stop_words,
+                      gboolean       ignore_reserved_words,
+                      gboolean       ignore_numbers)
 {
 	g_return_if_fail (parser != NULL);
 	g_return_if_fail (txt != NULL);
 
 	parser->enable_stemmer = enable_stemmer;
-	parser->enable_stop_words = enable_stop_words;
+	parser->ignore_stop_words = ignore_stop_words;
 	parser->delimit_words = delimit_words;
 
 	parser->txt_size = txt_size;
 	parser->txt = txt;
-	parser->skip_reserved_words = skip_reserved_words;
-	parser->skip_numbers = skip_numbers;
+	parser->ignore_reserved_words = ignore_reserved_words;
+	parser->ignore_numbers = ignore_numbers;
 
 	g_free (parser->word);
 	parser->word = NULL;
@@ -316,7 +316,7 @@ tracker_parser_reset (TrackerParser *parser,
 	/* Prepare a custom category which is a combination of the
 	 * desired ones */
 	parser->allowed_start = UC_LETTER;
-	if (!parser->skip_numbers) {
+	if (!parser->ignore_numbers) {
 		parser->allowed_start = uc_general_category_or (parser->allowed_start, UC_NUMBER);
 	}
 }
@@ -376,7 +376,7 @@ process_word_utf8 (TrackerParser         *parser,
 		                          word_buffer,
 		                          &new_word_length);
 
-		/* Case folding + Normalization failed, skip this word */
+		/* Case folding + Normalization failed, ignore this word */
 		g_return_val_if_fail (normalized != NULL, NULL);
 
 		/* If output buffer is not the same as the one passed to
@@ -481,7 +481,7 @@ tracker_parser_next (TrackerParser *parser,
 	}
 
 	if (str &&
-	    parser->enable_stop_words &&
+	    parser->ignore_stop_words &&
 	    tracker_language_is_stop_word (parser->language, str)) {
 		*stop_word = TRUE;
 	} else {
diff --git a/src/libtracker-fts/tracker-parser.h b/src/libtracker-fts/tracker-parser.h
index cad4442..f4065b9 100644
--- a/src/libtracker-fts/tracker-parser.h
+++ b/src/libtracker-fts/tracker-parser.h
@@ -37,9 +37,9 @@ void           tracker_parser_reset           (TrackerParser   *parser,
                                                gint             txt_size,
                                                gboolean         delimit_words,
                                                gboolean         enable_stemmer,
-                                               gboolean         enable_stop_words,
-                                               gboolean         skip_reserved_words,
-                                               gboolean         skip_numbers);
+                                               gboolean         ignore_stop_words,
+                                               gboolean         ignore_reserved_words,
+                                               gboolean         ignore_numbers);
 
 const gchar *  tracker_parser_next            (TrackerParser   *parser,
                                                gint            *position,



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]