[tracker/rss-enclosures] FTS parsers: refactor & cleanup API



commit 2f8241c9be7916b109554653b3dd61d3601d7086
Author: Aleksander Morgado <aleksander lanedo com>
Date:   Thu May 27 18:12:54 2010 +0200

    FTS parsers: refactor & cleanup API

 src/libtracker-fts/tracker-fts.c                 |   13 ++++++----
 src/libtracker-fts/tracker-parser-glib.c         |   27 +++++++++-------------
 src/libtracker-fts/tracker-parser-libicu.c       |   21 +++++------------
 src/libtracker-fts/tracker-parser-libunistring.c |   23 +++++++-----------
 src/libtracker-fts/tracker-parser.h              |    4 +-
 tests/libtracker-fts/tracker-parser-test.c       |    5 ++-
 tests/libtracker-fts/tracker-parser.c            |    4 +-
 7 files changed, 41 insertions(+), 56 deletions(-)
---
diff --git a/src/libtracker-fts/tracker-fts.c b/src/libtracker-fts/tracker-fts.c
index ee590b2..ddfb293 100644
--- a/src/libtracker-fts/tracker-fts.c
+++ b/src/libtracker-fts/tracker-fts.c
@@ -2336,6 +2336,7 @@ struct fulltext_vtab {
   gboolean ignore_stop_words;
   int max_words;
   int min_word_length;
+  int max_word_length;
 
   /* Precompiled statements which we keep as long as the table is
   ** open.
@@ -3318,7 +3319,6 @@ static int constructVtab(
   fulltext_vtab *v = 0;
   TrackerFTSConfig *config;
   TrackerLanguage *language;
-  int min_len, max_len;
 
   if (G_UNLIKELY (quark_fulltext_vtab == 0)) {
     quark_fulltext_vtab = g_quark_from_static_string ("quark_fulltext_vtab");
@@ -3374,8 +3374,8 @@ static int constructVtab(
 
   language = tracker_language_new (NULL);
 
-  min_len = tracker_fts_config_get_min_word_length (config);
-  max_len = tracker_fts_config_get_max_word_length (config);
+  v->min_word_length = tracker_fts_config_get_min_word_length (config);
+  v->max_word_length = tracker_fts_config_get_max_word_length (config);
   v->enable_stemmer = tracker_fts_config_get_enable_stemmer (config);
   v->enable_unaccent = tracker_fts_config_get_enable_unaccent (config);
   v->ignore_numbers = tracker_fts_config_get_ignore_numbers (config);
@@ -3386,8 +3386,8 @@ static int constructVtab(
 			  FALSE : tracker_fts_config_get_ignore_stop_words (config));
 
   v->max_words = tracker_fts_config_get_max_words_to_index (config);
-  v->min_word_length = min_len;
-  v->parser = tracker_parser_new (language, max_len);
+
+  v->parser = tracker_parser_new (language);
 
   g_object_unref (language);
 
@@ -3684,6 +3684,7 @@ static void snippetOffsetsOfColumn(
   tracker_parser_reset (pVtab->parser,
                         zDoc,
                         nDoc,
+                        pVtab->max_word_length,
                         pVtab->enable_stemmer,
                         pVtab->enable_unaccent,
                         pVtab->ignore_stop_words,
@@ -4388,6 +4389,7 @@ static int tokenizeSegment(
   tracker_parser_reset (parser,
                         pSegment,
                         nSegment,
+                        v->max_word_length,
                         v->enable_stemmer,
                         v->enable_unaccent,
                         v->ignore_stop_words,
@@ -4848,6 +4850,7 @@ int Catid,
   tracker_parser_reset (parser,
                         zText,
                         strlen (zText),
+                        v->max_word_length,
                         v->enable_stemmer,
                         v->enable_unaccent,
                         v->ignore_stop_words,
diff --git a/src/libtracker-fts/tracker-parser-glib.c b/src/libtracker-fts/tracker-parser-glib.c
index 2c324bb..9892829 100644
--- a/src/libtracker-fts/tracker-parser-glib.c
+++ b/src/libtracker-fts/tracker-parser-glib.c
@@ -71,19 +71,19 @@ struct TrackerParser {
 	gint                   txt_size;
 
 	TrackerLanguage       *language;
+	guint                  max_word_length;
 	gboolean               enable_stemmer;
 	gboolean               enable_unaccent;
 	gboolean               ignore_stop_words;
-	guint                  max_word_length;
 	gboolean               ignore_reserved_words;
 	gboolean               ignore_numbers;
 
 	/* Private members */
-	gchar                   *word;
-	gint                    word_length;
-	guint                   word_position;
-	TrackerParserEncoding   encoding;
-	const gchar             *cursor;
+	gchar                 *word;
+	gint                   word_length;
+	guint                  word_position;
+	TrackerParserEncoding  encoding;
+	const gchar           *cursor;
 
 	/* Pango members for CJK text parsing */
 	PangoLogAttr          *attrs;
@@ -413,26 +413,19 @@ parser_next (TrackerParser *parser,
 	}
 
 	return FALSE;
-
 }
 
 TrackerParser *
-tracker_parser_new (TrackerLanguage *language,
-                    gint             max_word_length)
+tracker_parser_new (TrackerLanguage *language)
 {
 	TrackerParser *parser;
 
 	g_return_val_if_fail (TRACKER_IS_LANGUAGE (language), NULL);
-	g_return_val_if_fail (max_word_length > 0, NULL);
 
 	parser = g_new0 (TrackerParser, 1);
 
 	parser->language = g_object_ref (language);
 
-	parser->max_word_length = max_word_length;
-	parser->word_length = 0;
-	parser->attrs = NULL;
-
 	return parser;
 }
 
@@ -456,6 +449,7 @@ void
 tracker_parser_reset (TrackerParser *parser,
                       const gchar   *txt,
                       gint           txt_size,
+                      guint          max_word_length,
                       gboolean       enable_stemmer,
                       gboolean       enable_unaccent,
                       gboolean       ignore_stop_words,
@@ -471,14 +465,15 @@ tracker_parser_reset (TrackerParser *parser,
 	parser->cursor = txt;
 	parser->encoding = get_encoding (txt);
 
+	parser->max_word_length = max_word_length;
 	parser->enable_stemmer = enable_stemmer;
 	parser->enable_unaccent = enable_unaccent;
 	parser->ignore_stop_words = ignore_stop_words;
+	parser->ignore_reserved_words = ignore_reserved_words;
+	parser->ignore_numbers = ignore_numbers;
 
 	parser->txt_size = txt_size;
 	parser->txt = txt;
-	parser->ignore_reserved_words = ignore_reserved_words;
-	parser->ignore_numbers = ignore_numbers;
 
 	g_free (parser->word);
 	parser->word = NULL;
diff --git a/src/libtracker-fts/tracker-parser-libicu.c b/src/libtracker-fts/tracker-parser-libicu.c
index 4814281..42f523d 100644
--- a/src/libtracker-fts/tracker-parser-libicu.c
+++ b/src/libtracker-fts/tracker-parser-libicu.c
@@ -54,9 +54,9 @@ struct TrackerParser {
 	gint                   txt_size;
 
 	TrackerLanguage       *language;
+	guint                  max_word_length;
 	gboolean               enable_stemmer;
 	gboolean               enable_unaccent;
-	guint                  max_word_length;
 	gboolean               ignore_stop_words;
 	gboolean               ignore_reserved_words;
 	gboolean               ignore_numbers;
@@ -269,27 +269,16 @@ parser_next (TrackerParser *parser,
 }
 
 TrackerParser *
-tracker_parser_new (TrackerLanguage *language,
-                    gint             max_word_length)
+tracker_parser_new (TrackerLanguage *language)
 {
 	TrackerParser *parser;
 
 	g_return_val_if_fail (TRACKER_IS_LANGUAGE (language), NULL);
-	g_return_val_if_fail (max_word_length > 0, NULL);
 
 	parser = g_new0 (TrackerParser, 1);
 
 	parser->language = g_object_ref (language);
 
-	parser->max_word_length = max_word_length;
-	parser->word_length = 0;
-
-	parser->utxt = NULL;
-	parser->offsets = NULL;
-	parser->utxt_size = 0;
-	parser->bi = NULL;
-	parser->cursor = 0;
-
 	return parser;
 }
 
@@ -318,6 +307,7 @@ void
 tracker_parser_reset (TrackerParser *parser,
                       const gchar   *txt,
                       gint           txt_size,
+                      guint          max_word_length,
                       gboolean       enable_stemmer,
                       gboolean       enable_unaccent,
                       gboolean       ignore_stop_words,
@@ -332,14 +322,15 @@ tracker_parser_reset (TrackerParser *parser,
 	g_return_if_fail (parser != NULL);
 	g_return_if_fail (txt != NULL);
 
+	parser->max_word_length = max_word_length;
 	parser->enable_stemmer = enable_stemmer;
 	parser->enable_unaccent = enable_unaccent;
 	parser->ignore_stop_words = ignore_stop_words;
+	parser->ignore_reserved_words = ignore_reserved_words;
+	parser->ignore_numbers = ignore_numbers;
 
 	parser->txt_size = txt_size;
 	parser->txt = txt;
-	parser->ignore_reserved_words = ignore_reserved_words;
-	parser->ignore_numbers = ignore_numbers;
 
 	g_free (parser->word);
 	parser->word = NULL;
diff --git a/src/libtracker-fts/tracker-parser-libunistring.c b/src/libtracker-fts/tracker-parser-libunistring.c
index 02b89a9..db021f5 100644
--- a/src/libtracker-fts/tracker-parser-libunistring.c
+++ b/src/libtracker-fts/tracker-parser-libunistring.c
@@ -53,17 +53,17 @@ struct TrackerParser {
 	gint                   txt_size;
 
 	TrackerLanguage       *language;
+	guint                  max_word_length;
 	gboolean               enable_stemmer;
 	gboolean               enable_unaccent;
-	guint                  max_word_length;
 	gboolean               ignore_stop_words;
 	gboolean               ignore_reserved_words;
 	gboolean               ignore_numbers;
 
 	/* Private members */
-	gchar                   *word;
-	gint                    word_length;
-	guint                   word_position;
+	gchar                 *word;
+	gint                   word_length;
+	guint                  word_position;
 
 	/* Cursor, as index of the input array of bytes */
 	gsize                  cursor;
@@ -237,23 +237,16 @@ parser_next (TrackerParser *parser,
 }
 
 TrackerParser *
-tracker_parser_new (TrackerLanguage *language,
-                    gint             max_word_length)
+tracker_parser_new (TrackerLanguage *language)
 {
 	TrackerParser *parser;
 
 	g_return_val_if_fail (TRACKER_IS_LANGUAGE (language), NULL);
-	g_return_val_if_fail (max_word_length > 0, NULL);
 
 	parser = g_new0 (TrackerParser, 1);
 
 	parser->language = g_object_ref (language);
 
-	parser->max_word_length = max_word_length;
-	parser->word_length = 0;
-
-	parser->word_break_flags = NULL;
-
 	return parser;
 }
 
@@ -277,6 +270,7 @@ void
 tracker_parser_reset (TrackerParser *parser,
                       const gchar   *txt,
                       gint           txt_size,
+                      guint          max_word_length,
                       gboolean       enable_stemmer,
                       gboolean       enable_unaccent,
                       gboolean       ignore_stop_words,
@@ -286,14 +280,15 @@ tracker_parser_reset (TrackerParser *parser,
 	g_return_if_fail (parser != NULL);
 	g_return_if_fail (txt != NULL);
 
+	parser->max_word_length = max_word_length;
 	parser->enable_stemmer = enable_stemmer;
 	parser->enable_unaccent = enable_unaccent;
 	parser->ignore_stop_words = ignore_stop_words;
+	parser->ignore_reserved_words = ignore_reserved_words;
+	parser->ignore_numbers = ignore_numbers;
 
 	parser->txt_size = txt_size;
 	parser->txt = txt;
-	parser->ignore_reserved_words = ignore_reserved_words;
-	parser->ignore_numbers = ignore_numbers;
 
 	g_free (parser->word);
 	parser->word = NULL;
diff --git a/src/libtracker-fts/tracker-parser.h b/src/libtracker-fts/tracker-parser.h
index b84d534..57426c3 100644
--- a/src/libtracker-fts/tracker-parser.h
+++ b/src/libtracker-fts/tracker-parser.h
@@ -29,12 +29,12 @@ G_BEGIN_DECLS
 
 typedef struct TrackerParser TrackerParser;
 
-TrackerParser *tracker_parser_new             (TrackerLanguage *language,
-                                               gint             max_word_length);
+TrackerParser *tracker_parser_new             (TrackerLanguage *language);
 
 void           tracker_parser_reset           (TrackerParser   *parser,
                                                const gchar     *txt,
                                                gint             txt_size,
+                                               guint            max_word_length,
                                                gboolean         enable_stemmer,
                                                gboolean         enable_unaccent,
                                                gboolean         ignore_stop_words,
diff --git a/tests/libtracker-fts/tracker-parser-test.c b/tests/libtracker-fts/tracker-parser-test.c
index 1f3da94..c34fc08 100644
--- a/tests/libtracker-fts/tracker-parser-test.c
+++ b/tests/libtracker-fts/tracker-parser-test.c
@@ -84,8 +84,7 @@ test_common_setup (TrackerParserTestFixture *fixture,
 	fixture->ignore_numbers = TRUE;
 
 	/* Create the parser */
-	fixture->parser = tracker_parser_new (language,
-	                                      fixture->max_word_length);
+	fixture->parser = tracker_parser_new (language);
 	if (!fixture->parser) {
 		g_critical ("Parser creation failed!");
 		return;
@@ -132,6 +131,7 @@ expected_nwords_check (TrackerParserTestFixture *fixture,
 	tracker_parser_reset (fixture->parser,
 	                      testdata->str,
 	                      strlen (testdata->str),
+	                      fixture->max_word_length,
 	                      fixture->enable_stemmer,
 	                      fixture->enable_unaccent,
 	                      fixture->ignore_stop_words,
@@ -180,6 +180,7 @@ expected_word_check (TrackerParserTestFixture *fixture,
 	tracker_parser_reset (fixture->parser,
 	                      testdata->str,
 	                      strlen (testdata->str),
+	                      fixture->max_word_length,
 	                      testdata->enable_stemmer,
 	                      testdata->enable_unaccent,
 	                      fixture->ignore_stop_words,
diff --git a/tests/libtracker-fts/tracker-parser.c b/tests/libtracker-fts/tracker-parser.c
index 0aaf6c4..23527c5 100644
--- a/tests/libtracker-fts/tracker-parser.c
+++ b/tests/libtracker-fts/tracker-parser.c
@@ -128,8 +128,7 @@ run_parsing (void)
 	}
 
 	/* Create the parser */
-	parser = tracker_parser_new (language,
-	                             tracker_fts_config_get_max_word_length (config));
+	parser = tracker_parser_new (language);
 	if (!parser) {
 		g_printerr ("Parser creation failed!\n");
 		g_object_unref (language);
@@ -140,6 +139,7 @@ run_parsing (void)
 	tracker_parser_reset (parser,
 	                      text,
 	                      strlen (text),
+	                      tracker_fts_config_get_max_word_length (config),
 	                      tracker_fts_config_get_enable_stemmer (config),
 	                      tracker_fts_config_get_enable_unaccent (config),
 	                      tracker_fts_config_get_ignore_stop_words (config),



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]