tracker r1953 - in branches/indexer-split: . src/libtracker-common tests/libtracker-common
- From: ifrade svn gnome org
- To: svn-commits-list gnome org
- Subject: tracker r1953 - in branches/indexer-split: . src/libtracker-common tests/libtracker-common
- Date: Mon, 28 Jul 2008 12:57:11 +0000 (UTC)
Author: ifrade
Date: Mon Jul 28 12:57:11 2008
New Revision: 1953
URL: http://svn.gnome.org/viewvc/tracker?rev=1953&view=rev
Log:
Added stop words in tracker_language on creation. Fixed minor things
Added:
branches/indexer-split/tests/libtracker-common/tracker-parser-test.c
Modified:
branches/indexer-split/ChangeLog
branches/indexer-split/src/libtracker-common/tracker-language.c
branches/indexer-split/tests/libtracker-common/Makefile.am
Modified: branches/indexer-split/src/libtracker-common/tracker-language.c
==============================================================================
--- branches/indexer-split/src/libtracker-common/tracker-language.c (original)
+++ branches/indexer-split/src/libtracker-common/tracker-language.c Mon Jul 28 12:57:11 2008
@@ -217,11 +217,10 @@
gchar *str;
gchar *filename;
- str = g_strconcat (".", language_code, NULL);
+ str = g_strconcat ("stopwords.", language_code, NULL);
filename = g_build_filename (SHAREDIR,
"tracker",
"languages",
- "stopwords",
str,
NULL);
g_free (str);
@@ -269,7 +268,7 @@
content = g_mapped_file_get_contents (mapped_file);
words = g_strsplit_set (content, "\n" , -1);
- g_free (content);
+
g_mapped_file_free (mapped_file);
/* FIXME: Shouldn't clear the hash table first? */
@@ -342,11 +341,18 @@
TrackerLanguage *
tracker_language_new (TrackerConfig *config)
{
+ TrackerLanguage *language;
g_return_val_if_fail (TRACKER_IS_CONFIG (config), NULL);
- return g_object_new (TRACKER_TYPE_LANGUAGE,
- "config", config,
- NULL);
+ language = g_object_new (TRACKER_TYPE_LANGUAGE,
+ "config", config,
+ NULL);
+
+ language_set_stopword_list (language,
+ tracker_config_get_language (config));
+
+ return language;
+
}
TrackerConfig *
Modified: branches/indexer-split/tests/libtracker-common/Makefile.am
==============================================================================
--- branches/indexer-split/tests/libtracker-common/Makefile.am (original)
+++ branches/indexer-split/tests/libtracker-common/Makefile.am Mon Jul 28 12:57:11 2008
@@ -2,7 +2,11 @@
noinst_PROGRAMS = $(TEST_PROGS)
-TEST_PROGS += tracker-ontology tracker-dbus tracker-type-utils tracker-file-utils
+TEST_PROGS += tracker-ontology \
+ tracker-dbus \
+ tracker-type-utils \
+ tracker-file-utils \
+ tracker-parser
INCLUDES = \
-g \
@@ -58,3 +62,16 @@
$(GMODULE_LIBS) \
$(GTHREAD_LIBS) \
$(GLIB2_LIBS)
+
+
+tracker_parser_SOURCES = \
+ tracker-parser-test.c
+
+tracker_parser_LDADD = \
+ $(top_builddir)/src/libtracker-common/libtracker-common.la \
+ $(top_builddir)/tests/common/libtracker-testcommon.la \
+ $(top_builddir)/src/xdgmime/libxdgmime.la \
+ $(top_builddir)/src/libstemmer/libstemmer-private.la \
+ $(GMODULE_LIBS) \
+ $(GTHREAD_LIBS) \
+ $(GLIB2_LIBS)
Added: branches/indexer-split/tests/libtracker-common/tracker-parser-test.c
==============================================================================
--- (empty file)
+++ branches/indexer-split/tests/libtracker-common/tracker-parser-test.c Mon Jul 28 12:57:11 2008
@@ -0,0 +1,171 @@
+#include <glib.h>
+#include <glib/gtestutils.h>
+#include <string.h>
+
+#include <libtracker-common/tracker-config.h>
+#include <libtracker-common/tracker-language.h>
+#include <libtracker-common/tracker-parser.h>
+
+/*
+ * len(word) > 3 : 6 words
+ * longest word: 10 chars
+ */
+#define SAMPLE_TEXT "Here a good collection of various words to parse 12345678"
+
+TrackerConfig *config;
+TrackerLanguage *language;
+
+const gchar *text = "";
+
+void
+print_key (gpointer key, gpointer value, gpointer user_data)
+{
+ g_print ("word: %s\n", (gchar *)key);
+}
+
+void
+assert_key_length (gpointer key, gpointer value, gpointer user_data)
+{
+ gint max_length = GPOINTER_TO_INT (user_data);
+
+ g_assert_cmpint (strlen (key), <=, max_length);
+}
+
+/*
+ * Test max_words_to_index and min_length of the word
+ */
+static void
+test_parser_text_max_words_to_index (void)
+{
+ GHashTable *result = NULL;
+
+ result = tracker_parser_text (result,
+ SAMPLE_TEXT,
+ 1,
+ language,
+ 5, /* max words to index */
+ 18, /* max length of the word */
+ 3, /* min length of the word */
+ FALSE, FALSE); /* Filter / Delimit */
+
+ g_assert_cmpint (g_hash_table_size (result), ==, 5);
+
+ tracker_parser_text_free (result);
+}
+
+/*
+ * Test max length of the word.
+ */
+static void
+test_parser_text_max_length (void)
+{
+ GHashTable *result = NULL;
+ gint max_length;
+
+ max_length = 6;
+ result = tracker_parser_text (result,
+ SAMPLE_TEXT,
+ 1,
+ language,
+ 10, /* max words to index */
+ max_length, /* max length of the word */
+ 3, /* min length of the word */
+ FALSE, FALSE); /* Filter / Delimit */
+ g_hash_table_foreach (result, assert_key_length, GINT_TO_POINTER (max_length));
+ g_assert_cmpint (g_hash_table_size (result), ==, 7);
+
+ tracker_parser_text_free (result);
+}
+
+/*
+ * Filter numbers
+ */
+static void
+test_parser_text_filter_numbers (void)
+{
+ GHashTable *result = NULL;
+
+ /* Filtering numbers */
+ result = tracker_parser_text (result,
+ SAMPLE_TEXT,
+ 1,
+ language,
+ 100, /* max words to index */
+ 100, /* max length of the word */
+ 1, /* min length of the word */
+ TRUE, FALSE); /* Filter / Delimit */
+
+ g_assert (!g_hash_table_lookup (result, "12345678"));
+
+ g_assert_cmpint (g_hash_table_size (result), ==, 9);
+
+ tracker_parser_text_free (result);
+ result = NULL;
+
+ /* No filter */
+ result = tracker_parser_text (result,
+ SAMPLE_TEXT,
+ 1,
+ language,
+ 100, /* max words to index */
+ 100, /* max length of the word */
+ 1, /* min length of the word */
+ FALSE, FALSE); /* Filter / Delimit */
+
+ g_assert_cmpint (g_hash_table_size (result), ==, 10);
+
+ g_assert (g_hash_table_lookup (result, "12345678"));
+
+ tracker_parser_text_free (result);
+ result = NULL;
+}
+
+static void
+test_parser_stop_words (void)
+{
+ GHashTable *stop_words;
+
+ /* Check we have the default stop words */
+ stop_words = tracker_language_get_stop_words (language);
+ g_assert (stop_words);
+ g_assert_cmpint (g_hash_table_size (stop_words), >, 1);
+
+ /* Set specific stop words to test */
+ tracker_config_set_language (config, "en");
+ g_assert (g_hash_table_lookup (stop_words, "after"));
+
+}
+
+int
+main (int argc, char **argv) {
+
+ int result;
+
+ g_type_init ();
+ g_thread_init (NULL);
+ g_test_init (&argc, &argv, NULL);
+
+ /* Init */
+ config = tracker_config_new ();
+ language = tracker_language_new (config);
+
+ g_test_add_func ("/libtracker-common/tracker-parser/parser_text/max_words_to_index",
+ test_parser_text_max_words_to_index);
+
+ g_test_add_func ("/libtracker-common/tracker-parser/parser_text/max_length",
+ test_parser_text_max_length);
+
+ g_test_add_func ("/libtracker-common/tracker-parser/parser_text/filter_numbers",
+ test_parser_text_filter_numbers);
+
+ g_test_add_func ("/libtracker-common/tracker-parser/stop_words",
+ test_parser_stop_words);
+
+ result = g_test_run ();
+
+ /* End */
+ g_object_unref (config);
+ g_object_unref (language);
+
+ return result;
+}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]