[tracker/parser-unicode-libs-review] Fixes GB#491850 and GB#503366: new FTS configuration parameters
- From: Aleksander Morgado <aleksm src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker/parser-unicode-libs-review] Fixes GB#491850 and GB#503366: new FTS configuration parameters
- Date: Tue, 18 May 2010 18:36:38 +0000 (UTC)
commit 9e3638d9440d385bc83ddca048259ee96d99f4c1
Author: Aleksander Morgado <aleksander lanedo com>
Date: Tue May 18 17:53:40 2010 +0200
Fixes GB#491850 and GB#503366: new FTS configuration parameters
* IgnoreNumbers (default: TRUE)
* IgnoreStopWords (default: TRUE)
docs/manpages/tracker-fts.cfg.5 | 8 ++
src/libtracker-fts/tracker-fts-config.c | 116 +++++++++++++++++++++-
src/libtracker-fts/tracker-fts-config.h | 6 +
src/libtracker-fts/tracker-fts.c | 41 ++++++--
src/libtracker-fts/tracker-parser-glib.c | 28 +++---
src/libtracker-fts/tracker-parser-libicu.c | 50 +++++-----
src/libtracker-fts/tracker-parser-libunistring.c | 40 ++++----
src/libtracker-fts/tracker-parser.h | 6 +-
8 files changed, 218 insertions(+), 77 deletions(-)
---
diff --git a/docs/manpages/tracker-fts.cfg.5 b/docs/manpages/tracker-fts.cfg.5
index 96dc11a..176fe54 100644
--- a/docs/manpages/tracker-fts.cfg.5
+++ b/docs/manpages/tracker-fts.cfg.5
@@ -23,6 +23,14 @@ Set the minimum length of words to index (0->30).
Set the maximum length of words to index (0->200).
.TP
+.B IgnoreNumbers=true
+Set to true if words starting with numbers should be ignored.
+
+.TP
+.B IgnoreStopWords=true
+Set to true if stop words should be ignored.
+
+.TP
.B MaxWordsToIndex=10000
Maximum unique words to index from a file's content.
diff --git a/src/libtracker-fts/tracker-fts-config.c b/src/libtracker-fts/tracker-fts-config.c
index 4c1a7d7..736d044 100644
--- a/src/libtracker-fts/tracker-fts-config.c
+++ b/src/libtracker-fts/tracker-fts-config.c
@@ -37,12 +37,16 @@
/* Default values */
#define DEFAULT_MIN_WORD_LENGTH 3 /* 0->30 */
#define DEFAULT_MAX_WORD_LENGTH 30 /* 0->200 */
-#define DEFAULT_MAX_WORDS_TO_INDEX 10000
+#define DEFAULT_MAX_WORDS_TO_INDEX 10000
+#define DEFAULT_IGNORE_NUMBERS TRUE
+#define DEFAULT_IGNORE_STOP_WORDS TRUE
typedef struct {
/* Indexing */
gint min_word_length;
gint max_word_length;
+ gboolean ignore_numbers;
+ gboolean ignore_stop_words;
gint max_words_to_index;
} TrackerFTSConfigPrivate;
@@ -74,6 +78,8 @@ enum {
/* Indexing */
PROP_MIN_WORD_LENGTH,
PROP_MAX_WORD_LENGTH,
+ PROP_IGNORE_NUMBERS,
+ PROP_IGNORE_STOP_WORDS,
/* Performance */
PROP_MAX_WORDS_TO_INDEX,
@@ -82,6 +88,8 @@ enum {
static ObjectToKeyFile conversions[] = {
{ G_TYPE_INT, "min-word-length", GROUP_INDEXING, "MinWordLength" },
{ G_TYPE_INT, "max-word-length", GROUP_INDEXING, "MaxWordLength" },
+ { G_TYPE_BOOLEAN, "ignore-numbers", GROUP_INDEXING, "IgnoreNumbers" },
+ { G_TYPE_BOOLEAN, "ignore-stop-words", GROUP_INDEXING, "IgnoreStopWords" },
{ G_TYPE_INT, "max-words-to-index", GROUP_INDEXING, "MaxWordsToIndex" },
};
@@ -117,6 +125,20 @@ tracker_fts_config_class_init (TrackerFTSConfigClass *klass)
DEFAULT_MAX_WORD_LENGTH,
G_PARAM_READWRITE | G_PARAM_CONSTRUCT));
g_object_class_install_property (object_class,
+ PROP_IGNORE_NUMBERS,
+ g_param_spec_boolean ("ignore-numbers",
+ "Ignore numbers",
+ " Flag to ignore numbers in FTS (default: TRUE)",
+ DEFAULT_IGNORE_NUMBERS,
+ G_PARAM_READWRITE | G_PARAM_CONSTRUCT));
+ g_object_class_install_property (object_class,
+ PROP_IGNORE_STOP_WORDS,
+ g_param_spec_boolean ("ignore-stop-words",
+ "Ignore stop words",
+ " Flag to ignore stop words in FTS (default: TRUE)",
+ DEFAULT_IGNORE_STOP_WORDS,
+ G_PARAM_READWRITE | G_PARAM_CONSTRUCT));
+ g_object_class_install_property (object_class,
PROP_MAX_WORDS_TO_INDEX,
g_param_spec_int ("max-words-to-index",
"Maximum words to index",
@@ -138,7 +160,7 @@ static void
config_set_property (GObject *object,
guint param_id,
const GValue *value,
- GParamSpec *pspec)
+ GParamSpec *pspec)
{
switch (param_id) {
/* Indexing */
@@ -150,6 +172,14 @@ config_set_property (GObject *object,
tracker_fts_config_set_max_word_length (TRACKER_FTS_CONFIG (object),
g_value_get_int (value));
break;
+ case PROP_IGNORE_NUMBERS:
+ tracker_fts_config_set_ignore_numbers (TRACKER_FTS_CONFIG (object),
+ g_value_get_boolean (value));
+ break;
+ case PROP_IGNORE_STOP_WORDS:
+ tracker_fts_config_set_ignore_stop_words (TRACKER_FTS_CONFIG (object),
+ g_value_get_boolean (value));
+ break;
case PROP_MAX_WORDS_TO_INDEX:
tracker_fts_config_set_max_words_to_index (TRACKER_FTS_CONFIG (object),
g_value_get_int (value));
@@ -179,6 +209,12 @@ config_get_property (GObject *object,
case PROP_MAX_WORD_LENGTH:
g_value_set_int (value, priv->max_word_length);
break;
+ case PROP_IGNORE_NUMBERS:
+ g_value_set_boolean (value, priv->ignore_numbers);
+ break;
+ case PROP_IGNORE_STOP_WORDS:
+ g_value_set_boolean (value, priv->ignore_stop_words);
+ break;
case PROP_MAX_WORDS_TO_INDEX:
g_value_set_int (value, priv->max_words_to_index);
break;
@@ -235,7 +271,13 @@ config_create_with_defaults (TrackerFTSConfig *config,
tracker_keyfile_object_default_int (config,
conversions[i].property));
break;
-
+ case G_TYPE_BOOLEAN:
+ g_key_file_set_boolean (key_file,
+ conversions[i].group,
+ conversions[i].key,
+ tracker_keyfile_object_default_boolean (config,
+ conversions[i].property));
+ break;
default:
g_assert_not_reached ();
break;
@@ -279,7 +321,13 @@ config_load (TrackerFTSConfig *config)
conversions[i].group,
conversions[i].key);
break;
-
+ case G_TYPE_BOOLEAN:
+ tracker_keyfile_object_load_boolean (G_OBJECT (file),
+ conversions[i].property,
+ file->key_file,
+ conversions[i].group,
+ conversions[i].key);
+ break;
default:
g_assert_not_reached ();
break;
@@ -313,6 +361,14 @@ config_save (TrackerFTSConfig *config)
conversions[i].key);
break;
+ case G_TYPE_BOOLEAN:
+ tracker_keyfile_object_save_boolean (file,
+ conversions[i].property,
+ file->key_file,
+ conversions[i].group,
+ conversions[i].key);
+ break;
+
default:
g_assert_not_reached ();
break;
@@ -362,6 +418,30 @@ tracker_fts_config_get_max_word_length (TrackerFTSConfig *config)
return priv->max_word_length;
}
+gboolean
+tracker_fts_config_get_ignore_numbers (TrackerFTSConfig *config)
+{
+ TrackerFTSConfigPrivate *priv;
+
+ g_return_val_if_fail (TRACKER_IS_FTS_CONFIG (config), DEFAULT_IGNORE_NUMBERS);
+
+ priv = TRACKER_FTS_CONFIG_GET_PRIVATE (config);
+
+ return priv->ignore_numbers;
+}
+
+gboolean
+tracker_fts_config_get_ignore_stop_words (TrackerFTSConfig *config)
+{
+ TrackerFTSConfigPrivate *priv;
+
+ g_return_val_if_fail (TRACKER_IS_FTS_CONFIG (config), DEFAULT_IGNORE_STOP_WORDS);
+
+ priv = TRACKER_FTS_CONFIG_GET_PRIVATE (config);
+
+ return priv->ignore_stop_words;
+}
+
gint
tracker_fts_config_get_max_words_to_index (TrackerFTSConfig *config)
{
@@ -411,6 +491,34 @@ tracker_fts_config_set_max_word_length (TrackerFTSConfig *config,
}
void
+tracker_fts_config_set_ignore_numbers (TrackerFTSConfig *config,
+ gboolean value)
+{
+ TrackerFTSConfigPrivate *priv;
+
+ g_return_if_fail (TRACKER_IS_FTS_CONFIG (config));
+
+ priv = TRACKER_FTS_CONFIG_GET_PRIVATE (config);
+
+ priv->ignore_numbers = value;
+ g_object_notify (G_OBJECT (config), "ignore-numbers");
+}
+
+void
+tracker_fts_config_set_ignore_stop_words (TrackerFTSConfig *config,
+ gboolean value)
+{
+ TrackerFTSConfigPrivate *priv;
+
+ g_return_if_fail (TRACKER_IS_FTS_CONFIG (config));
+
+ priv = TRACKER_FTS_CONFIG_GET_PRIVATE (config);
+
+ priv->ignore_stop_words = value;
+ g_object_notify (G_OBJECT (config), "ignore-stop-words");
+}
+
+void
tracker_fts_config_set_max_words_to_index (TrackerFTSConfig *config,
gint value)
{
diff --git a/src/libtracker-fts/tracker-fts-config.h b/src/libtracker-fts/tracker-fts-config.h
index f366eeb..9c83e35 100644
--- a/src/libtracker-fts/tracker-fts-config.h
+++ b/src/libtracker-fts/tracker-fts-config.h
@@ -50,11 +50,17 @@ TrackerFTSConfig *tracker_fts_config_new (void);
gboolean tracker_fts_config_save (TrackerFTSConfig *config);
gint tracker_fts_config_get_min_word_length (TrackerFTSConfig *config);
gint tracker_fts_config_get_max_word_length (TrackerFTSConfig *config);
+gboolean tracker_fts_config_get_ignore_numbers (TrackerFTSConfig *config);
+gboolean tracker_fts_config_get_ignore_stop_words (TrackerFTSConfig *config);
gint tracker_fts_config_get_max_words_to_index (TrackerFTSConfig *config);
void tracker_fts_config_set_min_word_length (TrackerFTSConfig *config,
gint value);
void tracker_fts_config_set_max_word_length (TrackerFTSConfig *config,
gint value);
+void tracker_fts_config_set_ignore_numbers (TrackerFTSConfig *config,
+ gboolean value);
+void tracker_fts_config_set_ignore_stop_words (TrackerFTSConfig *config,
+ gboolean value);
void tracker_fts_config_set_max_words_to_index (TrackerFTSConfig *config,
gint value);
diff --git a/src/libtracker-fts/tracker-fts.c b/src/libtracker-fts/tracker-fts.c
index ab2b329..7e2fabf 100644
--- a/src/libtracker-fts/tracker-fts.c
+++ b/src/libtracker-fts/tracker-fts.c
@@ -2330,7 +2330,8 @@ struct fulltext_vtab {
const char *zName; /* virtual table name */
int nColumn; /* number of columns in virtual table */
TrackerParser *parser; /* tokenizer for inserts and queries */
- gboolean stop_words;
+ gboolean ignore_numbers;
+ gboolean ignore_stop_words;
int max_words;
int min_word_length;
@@ -3369,14 +3370,13 @@ static int constructVtab(
min_len = tracker_fts_config_get_min_word_length (config);
max_len = tracker_fts_config_get_max_word_length (config);
+ v->ignore_numbers = tracker_fts_config_get_ignore_numbers (config);
+ v->ignore_stop_words = tracker_fts_config_get_ignore_stop_words (config);
v->max_words = tracker_fts_config_get_max_words_to_index (config);
v->min_word_length = min_len;
v->parser = tracker_parser_new (language, max_len);
- /* disable stop words if TRACKER_FTS_STOP_WORDS is set to 0 - used by tests */
- v->stop_words = g_strcmp0 (g_getenv ("TRACKER_FTS_STOP_WORDS"), "0") != 0;
-
g_object_unref (language);
@@ -3666,7 +3666,14 @@ static void snippetOffsetsOfColumn(
pVtab = pQuery->pFts;
nColumn = pVtab->nColumn;
- tracker_parser_reset (pVtab->parser, zDoc, nDoc, FALSE, TRUE, pVtab->stop_words, TRUE, TRUE);
+ tracker_parser_reset (pVtab->parser,
+ zDoc,
+ nDoc,
+ FALSE,
+ TRUE,
+ pVtab->ignore_stop_words,
+ TRUE,
+ pVtab->ignore_numbers);
aTerm = pQuery->pTerms;
nTerm = pQuery->nTerms;
@@ -3690,7 +3697,7 @@ static void snippetOffsetsOfColumn(
if (!zToken) break;
- if (stop_word) {
+ if (pVtab->ignore_stop_words && stop_word) {
continue;
}
@@ -4363,7 +4370,14 @@ static int tokenizeSegment(
int firstIndex = pQuery->nTerms;
int nTerm = 1;
- tracker_parser_reset (parser, pSegment, nSegment, FALSE, TRUE, v->stop_words, FALSE, TRUE);
+ tracker_parser_reset (parser,
+ pSegment,
+ nSegment,
+ FALSE,
+ TRUE,
+ v->ignore_stop_words,
+ FALSE,
+ v->ignore_numbers);
while( 1 ){
const char *pToken;
@@ -4442,7 +4456,7 @@ static int tokenizeSegment(
if (nToken < v->min_word_length) {
continue;
}
- if (stop_word != 0) {
+ if (v->ignore_stop_words && stop_word) {
continue;
}
}
@@ -4816,7 +4830,14 @@ int Catid,
if (!zText) return SQLITE_OK;
- tracker_parser_reset (parser, zText, strlen (zText), FALSE, TRUE, v->stop_words, TRUE, TRUE);
+ tracker_parser_reset (parser,
+ zText,
+ strlen (zText),
+ FALSE,
+ TRUE,
+ v->ignore_stop_words,
+ TRUE,
+ v->ignore_numbers);
while( 1 ){
@@ -4835,7 +4856,7 @@ int Catid,
// printf("token being indexed is %s, begin is %d, end is %d and length is %d\n", pToken, iStartOffset, iEndOffset, nTokenBytes);
- if (stop_word) {
+ if (v->ignore_stop_words && stop_word) {
continue;
}
diff --git a/src/libtracker-fts/tracker-parser-glib.c b/src/libtracker-fts/tracker-parser-glib.c
index a2144af..89a89b9 100644
--- a/src/libtracker-fts/tracker-parser-glib.c
+++ b/src/libtracker-fts/tracker-parser-glib.c
@@ -72,12 +72,12 @@ struct TrackerParser {
TrackerLanguage *language;
gboolean enable_stemmer;
- gboolean enable_stop_words;
+ gboolean ignore_stop_words;
guint max_words_to_index;
guint max_word_length;
gboolean delimit_words;
- gboolean skip_reserved_words;
- gboolean skip_numbers;
+ gboolean ignore_reserved_words;
+ gboolean ignore_numbers;
/* Private members */
gchar *word;
@@ -278,14 +278,14 @@ parser_next (TrackerParser *parser,
/* word break */
/* check if word is reserved */
- if (is_valid && parser->skip_reserved_words) {
+ if (is_valid && parser->ignore_reserved_words) {
if (length == 2 && word[0] == 'o' && word[1] == 'r') {
is_valid = FALSE;
}
}
if (!is_valid ||
- (parser->skip_numbers && word_type == TRACKER_PARSER_WORD_NUM)) {
+ (parser->ignore_numbers && word_type == TRACKER_PARSER_WORD_NUM)) {
word_type = TRACKER_PARSER_WORD_IGNORE;
is_valid = TRUE;
length = 0;
@@ -312,12 +312,12 @@ parser_next (TrackerParser *parser,
* underscore if we are filtering.
*/
- if (parser->skip_numbers && type == TRACKER_PARSER_WORD_NUM) {
+ if (parser->ignore_numbers && type == TRACKER_PARSER_WORD_NUM) {
is_valid = FALSE;
continue;
} else {
if (type == TRACKER_PARSER_WORD_HYPHEN) {
- is_valid = !parser->skip_reserved_words;
+ is_valid = !parser->ignore_reserved_words;
continue;
}
}
@@ -462,9 +462,9 @@ tracker_parser_reset (TrackerParser *parser,
gint txt_size,
gboolean delimit_words,
gboolean enable_stemmer,
- gboolean enable_stop_words,
- gboolean skip_reserved_words,
- gboolean skip_numbers)
+ gboolean ignore_stop_words,
+ gboolean ignore_reserved_words,
+ gboolean ignore_numbers)
{
g_return_if_fail (parser != NULL);
g_return_if_fail (txt != NULL);
@@ -476,13 +476,13 @@ tracker_parser_reset (TrackerParser *parser,
parser->encoding = get_encoding (txt);
parser->enable_stemmer = enable_stemmer;
- parser->enable_stop_words = enable_stop_words;
+ parser->ignore_stop_words = ignore_stop_words;
parser->delimit_words = delimit_words;
parser->txt_size = txt_size;
parser->txt = txt;
- parser->skip_reserved_words = skip_reserved_words;
- parser->skip_numbers = skip_numbers;
+ parser->ignore_reserved_words = ignore_reserved_words;
+ parser->ignore_numbers = ignore_numbers;
g_free (parser->word);
parser->word = NULL;
@@ -618,7 +618,7 @@ tracker_parser_next (TrackerParser *parser,
}
if (str &&
- parser->enable_stop_words &&
+ parser->ignore_stop_words &&
tracker_language_is_stop_word (parser->language, str)) {
*stop_word = TRUE;
} else {
diff --git a/src/libtracker-fts/tracker-parser-libicu.c b/src/libtracker-fts/tracker-parser-libicu.c
index 190931c..2579231 100644
--- a/src/libtracker-fts/tracker-parser-libicu.c
+++ b/src/libtracker-fts/tracker-parser-libicu.c
@@ -44,12 +44,10 @@ typedef enum {
/* Max possible length of a UChar encoded string (just a safety limit) */
#define WORD_BUFFER_LENGTH 512
-
-static gchar *process_word_uchar (TrackerParser *parser,
- const UChar *word,
- gint length,
- TrackerParserWordType type);
-
+static gchar *process_word_uchar (TrackerParser *parser,
+ const UChar *word,
+ gint length,
+ TrackerParserWordType type);
struct TrackerParser {
const gchar *txt;
@@ -57,12 +55,12 @@ struct TrackerParser {
TrackerLanguage *language;
gboolean enable_stemmer;
- gboolean enable_stop_words;
guint max_words_to_index;
guint max_word_length;
gboolean delimit_words;
- gboolean skip_reserved_words;
- gboolean skip_numbers;
+ gboolean ignore_stop_words;
+ gboolean ignore_reserved_words;
+ gboolean ignore_numbers;
/* Private members */
gchar *word;
@@ -86,7 +84,7 @@ struct TrackerParser {
static gboolean
get_word_info (const UChar *word,
gsize word_length,
- gboolean skip_numbers,
+ gboolean ignore_numbers,
gboolean *p_is_allowed_word_start,
TrackerParserWordType *p_word_type)
{
@@ -117,7 +115,7 @@ get_word_info (const UChar *word,
unichar_gc == U_MODIFIER_LETTER ||
unichar_gc == U_OTHER_LETTER ||
IS_UNDERSCORE_UCS4 ((guint32)unichar) ||
- (!skip_numbers &&
+ (!ignore_numbers &&
(unichar_gc == U_DECIMAL_DIGIT_NUMBER ||
unichar_gc == U_LETTER_NUMBER ||
unichar_gc == U_OTHER_NUMBER))) {
@@ -197,9 +195,9 @@ parser_next (TrackerParser *parser,
/* g_debug ("next_word_offset_utf8: %" G_GSIZE_FORMAT, next_word_offset_utf8); */
/* g_debug ("current_word_offset_utf8: %" G_GSIZE_FORMAT, current_word_offset_utf8); */
- /* Skip the word if longer than the maximum allowed */
+ /* Ignore the word if longer than the maximum allowed */
if (word_length_utf8 >= parser->max_word_length) {
- /* Skip this word and keep on looping */
+ /* Ignore this word and keep on looping */
parser->cursor = next_word_offset_uchar;
continue;
}
@@ -207,7 +205,7 @@ parser_next (TrackerParser *parser,
/* Get word info... */
if (!get_word_info (&parser->utxt[parser->cursor],
word_length_uchar,
- parser->skip_numbers,
+ parser->ignore_numbers,
&is_allowed,
&type)) {
/* Quit loop just in case */
@@ -215,18 +213,18 @@ parser_next (TrackerParser *parser,
break;
}
- /* Skip the word if not an allowed word start */
+ /* Ignore the word if not an allowed word start */
if (!is_allowed) {
- /* Skip this word and keep on looping */
+ /* Ignore this word and keep on looping */
parser->cursor = next_word_offset_uchar;
continue;
}
/* check if word is reserved (looking at ORIGINAL UTF-8 buffer here! */
- if (parser->skip_reserved_words &&
+ if (parser->ignore_reserved_words &&
tracker_parser_is_reserved_word_utf8 (&parser->txt[current_word_offset_utf8],
word_length_utf8)) {
- /* Skip this word and keep on looping */
+ /* Ignore this word and keep on looping */
parser->cursor = next_word_offset_uchar;
continue;
}
@@ -248,7 +246,7 @@ parser_next (TrackerParser *parser,
truncated_length,
type);
if (!processed_word) {
- /* Skip this word and keep on looping */
+ /* Ignore this word and keep on looping */
parser->cursor = next_word_offset_uchar;
continue;
}
@@ -325,9 +323,9 @@ tracker_parser_reset (TrackerParser *parser,
gint txt_size,
gboolean delimit_words,
gboolean enable_stemmer,
- gboolean enable_stop_words,
- gboolean skip_reserved_words,
- gboolean skip_numbers)
+ gboolean ignore_stop_words,
+ gboolean ignore_reserved_words,
+ gboolean ignore_numbers)
{
UErrorCode error = U_ZERO_ERROR;
UConverter *converter;
@@ -338,13 +336,13 @@ tracker_parser_reset (TrackerParser *parser,
g_return_if_fail (txt != NULL);
parser->enable_stemmer = enable_stemmer;
- parser->enable_stop_words = enable_stop_words;
+ parser->ignore_stop_words = ignore_stop_words;
parser->delimit_words = delimit_words;
parser->txt_size = txt_size;
parser->txt = txt;
- parser->skip_reserved_words = skip_reserved_words;
- parser->skip_numbers = skip_numbers;
+ parser->ignore_reserved_words = ignore_reserved_words;
+ parser->ignore_numbers = ignore_numbers;
g_free (parser->word);
parser->word = NULL;
@@ -619,7 +617,7 @@ tracker_parser_next (TrackerParser *parser,
}
if (str &&
- parser->enable_stop_words &&
+ parser->ignore_stop_words &&
tracker_language_is_stop_word (parser->language, str)) {
*stop_word = TRUE;
} else {
diff --git a/src/libtracker-fts/tracker-parser-libunistring.c b/src/libtracker-fts/tracker-parser-libunistring.c
index 4a6ff35..de3f03d 100644
--- a/src/libtracker-fts/tracker-parser-libunistring.c
+++ b/src/libtracker-fts/tracker-parser-libunistring.c
@@ -54,12 +54,12 @@ struct TrackerParser {
TrackerLanguage *language;
gboolean enable_stemmer;
- gboolean enable_stop_words;
guint max_words_to_index;
guint max_word_length;
gboolean delimit_words;
- gboolean skip_reserved_words;
- gboolean skip_numbers;
+ gboolean ignore_stop_words;
+ gboolean ignore_reserved_words;
+ gboolean ignore_numbers;
/* Private members */
gchar *word;
@@ -175,25 +175,25 @@ parser_next (TrackerParser *parser,
break;
}
- /* Skip the word if not an allowed word start */
+ /* Ignore the word if not an allowed word start */
if (!is_allowed) {
- /* Skip this word and keep on looping */
+ /* Ignore this word and keep on looping */
parser->cursor += word_length;
continue;
}
- /* Skip the word if longer than the maximum allowed */
+ /* Ignore the word if longer than the maximum allowed */
if (word_length >= parser->max_word_length) {
- /* Skip this word and keep on looping */
+ /* Ignore this word and keep on looping */
parser->cursor += word_length;
continue;
}
- /* check if word is reserved and skip it if so */
- if (parser->skip_reserved_words &&
+ /* check if word is reserved and ignore it if so */
+ if (parser->ignore_reserved_words &&
tracker_parser_is_reserved_word_utf8 (&parser->txt[parser->cursor],
word_length)) {
- /* Skip this word and keep on looping */
+ /* Ignore this word and keep on looping */
parser->cursor += word_length;
continue;
}
@@ -212,7 +212,7 @@ parser_next (TrackerParser *parser,
truncated_length,
type);
if (!processed_word) {
- /* Skip this word and keep on looping */
+ /* Ignore this word and keep on looping */
parser->cursor += word_length;
continue;
}
@@ -280,21 +280,21 @@ tracker_parser_reset (TrackerParser *parser,
gint txt_size,
gboolean delimit_words,
gboolean enable_stemmer,
- gboolean enable_stop_words,
- gboolean skip_reserved_words,
- gboolean skip_numbers)
+ gboolean ignore_stop_words,
+ gboolean ignore_reserved_words,
+ gboolean ignore_numbers)
{
g_return_if_fail (parser != NULL);
g_return_if_fail (txt != NULL);
parser->enable_stemmer = enable_stemmer;
- parser->enable_stop_words = enable_stop_words;
+ parser->ignore_stop_words = ignore_stop_words;
parser->delimit_words = delimit_words;
parser->txt_size = txt_size;
parser->txt = txt;
- parser->skip_reserved_words = skip_reserved_words;
- parser->skip_numbers = skip_numbers;
+ parser->ignore_reserved_words = ignore_reserved_words;
+ parser->ignore_numbers = ignore_numbers;
g_free (parser->word);
parser->word = NULL;
@@ -316,7 +316,7 @@ tracker_parser_reset (TrackerParser *parser,
/* Prepare a custom category which is a combination of the
* desired ones */
parser->allowed_start = UC_LETTER;
- if (!parser->skip_numbers) {
+ if (!parser->ignore_numbers) {
parser->allowed_start = uc_general_category_or (parser->allowed_start, UC_NUMBER);
}
}
@@ -376,7 +376,7 @@ process_word_utf8 (TrackerParser *parser,
word_buffer,
&new_word_length);
- /* Case folding + Normalization failed, skip this word */
+ /* Case folding + Normalization failed, ignore this word */
g_return_val_if_fail (normalized != NULL, NULL);
/* If output buffer is not the same as the one passed to
@@ -481,7 +481,7 @@ tracker_parser_next (TrackerParser *parser,
}
if (str &&
- parser->enable_stop_words &&
+ parser->ignore_stop_words &&
tracker_language_is_stop_word (parser->language, str)) {
*stop_word = TRUE;
} else {
diff --git a/src/libtracker-fts/tracker-parser.h b/src/libtracker-fts/tracker-parser.h
index cad4442..f4065b9 100644
--- a/src/libtracker-fts/tracker-parser.h
+++ b/src/libtracker-fts/tracker-parser.h
@@ -37,9 +37,9 @@ void tracker_parser_reset (TrackerParser *parser,
gint txt_size,
gboolean delimit_words,
gboolean enable_stemmer,
- gboolean enable_stop_words,
- gboolean skip_reserved_words,
- gboolean skip_numbers);
+ gboolean ignore_stop_words,
+ gboolean ignore_reserved_words,
+ gboolean ignore_numbers);
const gchar * tracker_parser_next (TrackerParser *parser,
gint *position,
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]