tracker r1953 - in branches/indexer-split: . src/libtracker-common tests/libtracker-common



Author: ifrade
Date: Mon Jul 28 12:57:11 2008
New Revision: 1953
URL: http://svn.gnome.org/viewvc/tracker?rev=1953&view=rev

Log:
Added stop words in tracker_language on creation. Fixed minor things

Added:
   branches/indexer-split/tests/libtracker-common/tracker-parser-test.c
Modified:
   branches/indexer-split/ChangeLog
   branches/indexer-split/src/libtracker-common/tracker-language.c
   branches/indexer-split/tests/libtracker-common/Makefile.am

Modified: branches/indexer-split/src/libtracker-common/tracker-language.c
==============================================================================
--- branches/indexer-split/src/libtracker-common/tracker-language.c	(original)
+++ branches/indexer-split/src/libtracker-common/tracker-language.c	Mon Jul 28 12:57:11 2008
@@ -217,11 +217,10 @@
 	gchar *str;
 	gchar *filename;
 
-	str = g_strconcat (".", language_code, NULL);
+	str = g_strconcat ("stopwords.", language_code, NULL);
 	filename = g_build_filename (SHAREDIR,
 				     "tracker",
 				     "languages",
-				     "stopwords",
 				     str,
 				     NULL);
 	g_free (str);
@@ -269,7 +268,7 @@
 
 	content = g_mapped_file_get_contents (mapped_file);
 	words = g_strsplit_set (content, "\n" , -1);
-	g_free (content);
+
 	g_mapped_file_free (mapped_file);
 
 	/* FIXME: Shouldn't clear the hash table first? */
@@ -342,11 +341,18 @@
 TrackerLanguage *
 tracker_language_new (TrackerConfig *config)
 {
+	TrackerLanguage *language;
 	g_return_val_if_fail (TRACKER_IS_CONFIG (config), NULL);
 
-	return g_object_new (TRACKER_TYPE_LANGUAGE,
-			     "config", config,
-			     NULL);
+	language = g_object_new (TRACKER_TYPE_LANGUAGE,
+				 "config", config,
+				 NULL);
+	
+	language_set_stopword_list (language,
+				    tracker_config_get_language (config));
+	
+	return language;
+
 }
 
 TrackerConfig *

Modified: branches/indexer-split/tests/libtracker-common/Makefile.am
==============================================================================
--- branches/indexer-split/tests/libtracker-common/Makefile.am	(original)
+++ branches/indexer-split/tests/libtracker-common/Makefile.am	Mon Jul 28 12:57:11 2008
@@ -2,7 +2,11 @@
 
 noinst_PROGRAMS = $(TEST_PROGS)
 
-TEST_PROGS += tracker-ontology tracker-dbus tracker-type-utils tracker-file-utils
+TEST_PROGS += tracker-ontology \
+	tracker-dbus \
+	tracker-type-utils \
+	tracker-file-utils \
+	tracker-parser
 
 INCLUDES = 				\
 	-g 				\
@@ -58,3 +62,16 @@
 	$(GMODULE_LIBS)							\
 	$(GTHREAD_LIBS)							\
 	$(GLIB2_LIBS)							
+
+
+tracker_parser_SOURCES = \
+	tracker-parser-test.c 
+
+tracker_parser_LDADD =							\
+	$(top_builddir)/src/libtracker-common/libtracker-common.la 	\
+	$(top_builddir)/tests/common/libtracker-testcommon.la 		\
+	$(top_builddir)/src/xdgmime/libxdgmime.la 			\
+	$(top_builddir)/src/libstemmer/libstemmer-private.la 		\
+	$(GMODULE_LIBS)							\
+	$(GTHREAD_LIBS)							\
+	$(GLIB2_LIBS)							

Added: branches/indexer-split/tests/libtracker-common/tracker-parser-test.c
==============================================================================
--- (empty file)
+++ branches/indexer-split/tests/libtracker-common/tracker-parser-test.c	Mon Jul 28 12:57:11 2008
@@ -0,0 +1,171 @@
+#include <glib.h>
+#include <glib/gtestutils.h>
+#include <string.h>
+
+#include <libtracker-common/tracker-config.h>
+#include <libtracker-common/tracker-language.h>
+#include <libtracker-common/tracker-parser.h>
+
+/* 
+ * len(word) > 3 : 6 words  
+ * longest word: 10 chars
+ */
+#define SAMPLE_TEXT "Here a good collection of various words to parse 12345678"
+
+TrackerConfig *config;
+TrackerLanguage *language;
+
+const gchar *text = "";
+
+void
+print_key (gpointer key, gpointer value, gpointer user_data)
+{
+        g_print ("word: %s\n", (gchar *)key);
+}
+
+void
+assert_key_length (gpointer key, gpointer value, gpointer user_data)
+{
+        gint max_length = GPOINTER_TO_INT (user_data);
+
+        g_assert_cmpint (strlen (key), <=, max_length);
+}
+
+/* 
+ * Test max_words_to_index and min_length of the word
+ */
+static void 
+test_parser_text_max_words_to_index (void) 
+{
+        GHashTable *result = NULL;
+
+        result = tracker_parser_text (result,
+                                      SAMPLE_TEXT,
+                                      1,
+                                      language,
+                                      5, /* max words to index */
+                                      18, /* max length of the word */
+                                      3, /* min length of the word */
+                                      FALSE, FALSE); /* Filter / Delimit */
+        
+        g_assert_cmpint (g_hash_table_size (result), ==, 5);
+
+        tracker_parser_text_free (result);
+}
+
+/*
+ * Test max length of the word.
+ */
+static void
+test_parser_text_max_length (void)
+{
+        GHashTable *result = NULL;
+        gint max_length;
+
+        max_length = 6;
+        result = tracker_parser_text (result,
+                                      SAMPLE_TEXT,
+                                      1,
+                                      language,
+                                      10, /* max words to index */
+                                      max_length, /* max length of the word */
+                                      3, /* min length of the word */
+                                      FALSE, FALSE); /* Filter / Delimit */
+        g_hash_table_foreach (result, assert_key_length, GINT_TO_POINTER (max_length));
+        g_assert_cmpint (g_hash_table_size (result), ==, 7);
+
+        tracker_parser_text_free (result);        
+}
+
+/*
+ * Filter numbers 
+ */
+static void
+test_parser_text_filter_numbers (void)
+{
+        GHashTable *result = NULL;
+
+        /* Filtering numbers */
+        result = tracker_parser_text (result,
+                                      SAMPLE_TEXT,
+                                      1,
+                                      language,
+                                      100, /* max words to index */
+                                      100, /* max length of the word */
+                                      1, /* min length of the word */
+                                      TRUE, FALSE); /* Filter / Delimit */
+
+        g_assert (!g_hash_table_lookup (result, "12345678"));
+
+        g_assert_cmpint (g_hash_table_size (result), ==, 9);
+
+        tracker_parser_text_free (result);        
+        result = NULL;
+
+        /* No filter */
+        result = tracker_parser_text (result,
+                                      SAMPLE_TEXT,
+                                      1,
+                                      language,
+                                      100, /* max words to index */
+                                      100, /* max length of the word */
+                                      1, /* min length of the word */
+                                      FALSE, FALSE); /* Filter / Delimit */
+
+        g_assert_cmpint (g_hash_table_size (result), ==, 10);
+
+        g_assert (g_hash_table_lookup (result, "12345678"));
+
+        tracker_parser_text_free (result);        
+        result = NULL;
+}
+
+static void
+test_parser_stop_words (void)
+{
+        GHashTable *stop_words;
+        
+        /* Check we have the default stop words */
+        stop_words = tracker_language_get_stop_words (language);
+        g_assert (stop_words);
+        g_assert_cmpint (g_hash_table_size (stop_words), >, 1);
+
+        /* Set specific stop words to test */
+        tracker_config_set_language (config, "en");
+        g_assert (g_hash_table_lookup (stop_words, "after"));
+
+}
+
+int
+main (int argc, char **argv) {
+
+        int result;
+
+	g_type_init ();
+        g_thread_init (NULL);
+	g_test_init (&argc, &argv, NULL);
+
+        /* Init */
+        config = tracker_config_new ();
+        language = tracker_language_new (config);
+
+        g_test_add_func ("/libtracker-common/tracker-parser/parser_text/max_words_to_index",
+                         test_parser_text_max_words_to_index);
+
+        g_test_add_func ("/libtracker-common/tracker-parser/parser_text/max_length",
+                         test_parser_text_max_length);
+
+        g_test_add_func ("/libtracker-common/tracker-parser/parser_text/filter_numbers",
+                         test_parser_text_filter_numbers);
+
+        g_test_add_func ("/libtracker-common/tracker-parser/stop_words",
+                         test_parser_stop_words);
+
+        result = g_test_run ();
+        
+        /* End */
+        g_object_unref (config);
+        g_object_unref (language);
+
+        return result;
+}



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]