[tracker/rss-enclosures] FTS parsers: refactor & cleanup API
- From: Roberto Guido <rguido src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker/rss-enclosures] FTS parsers: refactor & cleanup API
- Date: Sat, 26 Jun 2010 23:11:43 +0000 (UTC)
commit 2f8241c9be7916b109554653b3dd61d3601d7086
Author: Aleksander Morgado <aleksander lanedo com>
Date: Thu May 27 18:12:54 2010 +0200
FTS parsers: refactor & cleanup API
src/libtracker-fts/tracker-fts.c | 13 ++++++----
src/libtracker-fts/tracker-parser-glib.c | 27 +++++++++-------------
src/libtracker-fts/tracker-parser-libicu.c | 21 +++++------------
src/libtracker-fts/tracker-parser-libunistring.c | 23 +++++++-----------
src/libtracker-fts/tracker-parser.h | 4 +-
tests/libtracker-fts/tracker-parser-test.c | 5 ++-
tests/libtracker-fts/tracker-parser.c | 4 +-
7 files changed, 41 insertions(+), 56 deletions(-)
---
diff --git a/src/libtracker-fts/tracker-fts.c b/src/libtracker-fts/tracker-fts.c
index ee590b2..ddfb293 100644
--- a/src/libtracker-fts/tracker-fts.c
+++ b/src/libtracker-fts/tracker-fts.c
@@ -2336,6 +2336,7 @@ struct fulltext_vtab {
gboolean ignore_stop_words;
int max_words;
int min_word_length;
+ int max_word_length;
/* Precompiled statements which we keep as long as the table is
** open.
@@ -3318,7 +3319,6 @@ static int constructVtab(
fulltext_vtab *v = 0;
TrackerFTSConfig *config;
TrackerLanguage *language;
- int min_len, max_len;
if (G_UNLIKELY (quark_fulltext_vtab == 0)) {
quark_fulltext_vtab = g_quark_from_static_string ("quark_fulltext_vtab");
@@ -3374,8 +3374,8 @@ static int constructVtab(
language = tracker_language_new (NULL);
- min_len = tracker_fts_config_get_min_word_length (config);
- max_len = tracker_fts_config_get_max_word_length (config);
+ v->min_word_length = tracker_fts_config_get_min_word_length (config);
+ v->max_word_length = tracker_fts_config_get_max_word_length (config);
v->enable_stemmer = tracker_fts_config_get_enable_stemmer (config);
v->enable_unaccent = tracker_fts_config_get_enable_unaccent (config);
v->ignore_numbers = tracker_fts_config_get_ignore_numbers (config);
@@ -3386,8 +3386,8 @@ static int constructVtab(
FALSE : tracker_fts_config_get_ignore_stop_words (config));
v->max_words = tracker_fts_config_get_max_words_to_index (config);
- v->min_word_length = min_len;
- v->parser = tracker_parser_new (language, max_len);
+
+ v->parser = tracker_parser_new (language);
g_object_unref (language);
@@ -3684,6 +3684,7 @@ static void snippetOffsetsOfColumn(
tracker_parser_reset (pVtab->parser,
zDoc,
nDoc,
+ pVtab->max_word_length,
pVtab->enable_stemmer,
pVtab->enable_unaccent,
pVtab->ignore_stop_words,
@@ -4388,6 +4389,7 @@ static int tokenizeSegment(
tracker_parser_reset (parser,
pSegment,
nSegment,
+ v->max_word_length,
v->enable_stemmer,
v->enable_unaccent,
v->ignore_stop_words,
@@ -4848,6 +4850,7 @@ int Catid,
tracker_parser_reset (parser,
zText,
strlen (zText),
+ v->max_word_length,
v->enable_stemmer,
v->enable_unaccent,
v->ignore_stop_words,
diff --git a/src/libtracker-fts/tracker-parser-glib.c b/src/libtracker-fts/tracker-parser-glib.c
index 2c324bb..9892829 100644
--- a/src/libtracker-fts/tracker-parser-glib.c
+++ b/src/libtracker-fts/tracker-parser-glib.c
@@ -71,19 +71,19 @@ struct TrackerParser {
gint txt_size;
TrackerLanguage *language;
+ guint max_word_length;
gboolean enable_stemmer;
gboolean enable_unaccent;
gboolean ignore_stop_words;
- guint max_word_length;
gboolean ignore_reserved_words;
gboolean ignore_numbers;
/* Private members */
- gchar *word;
- gint word_length;
- guint word_position;
- TrackerParserEncoding encoding;
- const gchar *cursor;
+ gchar *word;
+ gint word_length;
+ guint word_position;
+ TrackerParserEncoding encoding;
+ const gchar *cursor;
/* Pango members for CJK text parsing */
PangoLogAttr *attrs;
@@ -413,26 +413,19 @@ parser_next (TrackerParser *parser,
}
return FALSE;
-
}
TrackerParser *
-tracker_parser_new (TrackerLanguage *language,
- gint max_word_length)
+tracker_parser_new (TrackerLanguage *language)
{
TrackerParser *parser;
g_return_val_if_fail (TRACKER_IS_LANGUAGE (language), NULL);
- g_return_val_if_fail (max_word_length > 0, NULL);
parser = g_new0 (TrackerParser, 1);
parser->language = g_object_ref (language);
- parser->max_word_length = max_word_length;
- parser->word_length = 0;
- parser->attrs = NULL;
-
return parser;
}
@@ -456,6 +449,7 @@ void
tracker_parser_reset (TrackerParser *parser,
const gchar *txt,
gint txt_size,
+ guint max_word_length,
gboolean enable_stemmer,
gboolean enable_unaccent,
gboolean ignore_stop_words,
@@ -471,14 +465,15 @@ tracker_parser_reset (TrackerParser *parser,
parser->cursor = txt;
parser->encoding = get_encoding (txt);
+ parser->max_word_length = max_word_length;
parser->enable_stemmer = enable_stemmer;
parser->enable_unaccent = enable_unaccent;
parser->ignore_stop_words = ignore_stop_words;
+ parser->ignore_reserved_words = ignore_reserved_words;
+ parser->ignore_numbers = ignore_numbers;
parser->txt_size = txt_size;
parser->txt = txt;
- parser->ignore_reserved_words = ignore_reserved_words;
- parser->ignore_numbers = ignore_numbers;
g_free (parser->word);
parser->word = NULL;
diff --git a/src/libtracker-fts/tracker-parser-libicu.c b/src/libtracker-fts/tracker-parser-libicu.c
index 4814281..42f523d 100644
--- a/src/libtracker-fts/tracker-parser-libicu.c
+++ b/src/libtracker-fts/tracker-parser-libicu.c
@@ -54,9 +54,9 @@ struct TrackerParser {
gint txt_size;
TrackerLanguage *language;
+ guint max_word_length;
gboolean enable_stemmer;
gboolean enable_unaccent;
- guint max_word_length;
gboolean ignore_stop_words;
gboolean ignore_reserved_words;
gboolean ignore_numbers;
@@ -269,27 +269,16 @@ parser_next (TrackerParser *parser,
}
TrackerParser *
-tracker_parser_new (TrackerLanguage *language,
- gint max_word_length)
+tracker_parser_new (TrackerLanguage *language)
{
TrackerParser *parser;
g_return_val_if_fail (TRACKER_IS_LANGUAGE (language), NULL);
- g_return_val_if_fail (max_word_length > 0, NULL);
parser = g_new0 (TrackerParser, 1);
parser->language = g_object_ref (language);
- parser->max_word_length = max_word_length;
- parser->word_length = 0;
-
- parser->utxt = NULL;
- parser->offsets = NULL;
- parser->utxt_size = 0;
- parser->bi = NULL;
- parser->cursor = 0;
-
return parser;
}
@@ -318,6 +307,7 @@ void
tracker_parser_reset (TrackerParser *parser,
const gchar *txt,
gint txt_size,
+ guint max_word_length,
gboolean enable_stemmer,
gboolean enable_unaccent,
gboolean ignore_stop_words,
@@ -332,14 +322,15 @@ tracker_parser_reset (TrackerParser *parser,
g_return_if_fail (parser != NULL);
g_return_if_fail (txt != NULL);
+ parser->max_word_length = max_word_length;
parser->enable_stemmer = enable_stemmer;
parser->enable_unaccent = enable_unaccent;
parser->ignore_stop_words = ignore_stop_words;
+ parser->ignore_reserved_words = ignore_reserved_words;
+ parser->ignore_numbers = ignore_numbers;
parser->txt_size = txt_size;
parser->txt = txt;
- parser->ignore_reserved_words = ignore_reserved_words;
- parser->ignore_numbers = ignore_numbers;
g_free (parser->word);
parser->word = NULL;
diff --git a/src/libtracker-fts/tracker-parser-libunistring.c b/src/libtracker-fts/tracker-parser-libunistring.c
index 02b89a9..db021f5 100644
--- a/src/libtracker-fts/tracker-parser-libunistring.c
+++ b/src/libtracker-fts/tracker-parser-libunistring.c
@@ -53,17 +53,17 @@ struct TrackerParser {
gint txt_size;
TrackerLanguage *language;
+ guint max_word_length;
gboolean enable_stemmer;
gboolean enable_unaccent;
- guint max_word_length;
gboolean ignore_stop_words;
gboolean ignore_reserved_words;
gboolean ignore_numbers;
/* Private members */
- gchar *word;
- gint word_length;
- guint word_position;
+ gchar *word;
+ gint word_length;
+ guint word_position;
/* Cursor, as index of the input array of bytes */
gsize cursor;
@@ -237,23 +237,16 @@ parser_next (TrackerParser *parser,
}
TrackerParser *
-tracker_parser_new (TrackerLanguage *language,
- gint max_word_length)
+tracker_parser_new (TrackerLanguage *language)
{
TrackerParser *parser;
g_return_val_if_fail (TRACKER_IS_LANGUAGE (language), NULL);
- g_return_val_if_fail (max_word_length > 0, NULL);
parser = g_new0 (TrackerParser, 1);
parser->language = g_object_ref (language);
- parser->max_word_length = max_word_length;
- parser->word_length = 0;
-
- parser->word_break_flags = NULL;
-
return parser;
}
@@ -277,6 +270,7 @@ void
tracker_parser_reset (TrackerParser *parser,
const gchar *txt,
gint txt_size,
+ guint max_word_length,
gboolean enable_stemmer,
gboolean enable_unaccent,
gboolean ignore_stop_words,
@@ -286,14 +280,15 @@ tracker_parser_reset (TrackerParser *parser,
g_return_if_fail (parser != NULL);
g_return_if_fail (txt != NULL);
+ parser->max_word_length = max_word_length;
parser->enable_stemmer = enable_stemmer;
parser->enable_unaccent = enable_unaccent;
parser->ignore_stop_words = ignore_stop_words;
+ parser->ignore_reserved_words = ignore_reserved_words;
+ parser->ignore_numbers = ignore_numbers;
parser->txt_size = txt_size;
parser->txt = txt;
- parser->ignore_reserved_words = ignore_reserved_words;
- parser->ignore_numbers = ignore_numbers;
g_free (parser->word);
parser->word = NULL;
diff --git a/src/libtracker-fts/tracker-parser.h b/src/libtracker-fts/tracker-parser.h
index b84d534..57426c3 100644
--- a/src/libtracker-fts/tracker-parser.h
+++ b/src/libtracker-fts/tracker-parser.h
@@ -29,12 +29,12 @@ G_BEGIN_DECLS
typedef struct TrackerParser TrackerParser;
-TrackerParser *tracker_parser_new (TrackerLanguage *language,
- gint max_word_length);
+TrackerParser *tracker_parser_new (TrackerLanguage *language);
void tracker_parser_reset (TrackerParser *parser,
const gchar *txt,
gint txt_size,
+ guint max_word_length,
gboolean enable_stemmer,
gboolean enable_unaccent,
gboolean ignore_stop_words,
diff --git a/tests/libtracker-fts/tracker-parser-test.c b/tests/libtracker-fts/tracker-parser-test.c
index 1f3da94..c34fc08 100644
--- a/tests/libtracker-fts/tracker-parser-test.c
+++ b/tests/libtracker-fts/tracker-parser-test.c
@@ -84,8 +84,7 @@ test_common_setup (TrackerParserTestFixture *fixture,
fixture->ignore_numbers = TRUE;
/* Create the parser */
- fixture->parser = tracker_parser_new (language,
- fixture->max_word_length);
+ fixture->parser = tracker_parser_new (language);
if (!fixture->parser) {
g_critical ("Parser creation failed!");
return;
@@ -132,6 +131,7 @@ expected_nwords_check (TrackerParserTestFixture *fixture,
tracker_parser_reset (fixture->parser,
testdata->str,
strlen (testdata->str),
+ fixture->max_word_length,
fixture->enable_stemmer,
fixture->enable_unaccent,
fixture->ignore_stop_words,
@@ -180,6 +180,7 @@ expected_word_check (TrackerParserTestFixture *fixture,
tracker_parser_reset (fixture->parser,
testdata->str,
strlen (testdata->str),
+ fixture->max_word_length,
testdata->enable_stemmer,
testdata->enable_unaccent,
fixture->ignore_stop_words,
diff --git a/tests/libtracker-fts/tracker-parser.c b/tests/libtracker-fts/tracker-parser.c
index 0aaf6c4..23527c5 100644
--- a/tests/libtracker-fts/tracker-parser.c
+++ b/tests/libtracker-fts/tracker-parser.c
@@ -128,8 +128,7 @@ run_parsing (void)
}
/* Create the parser */
- parser = tracker_parser_new (language,
- tracker_fts_config_get_max_word_length (config));
+ parser = tracker_parser_new (language);
if (!parser) {
g_printerr ("Parser creation failed!\n");
g_object_unref (language);
@@ -140,6 +139,7 @@ run_parsing (void)
tracker_parser_reset (parser,
text,
strlen (text),
+ tracker_fts_config_get_max_word_length (config),
tracker_fts_config_get_enable_stemmer (config),
tracker_fts_config_get_enable_unaccent (config),
tracker_fts_config_get_ignore_stop_words (config),
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]