[tracker/parser-unicode-libs-review] FTS parser: added unit tests



commit 76325e8a9a4b32dcbc46ad69cab41053465f6068
Author: Aleksander Morgado <aleksander lanedo com>
Date:   Mon May 17 13:24:18 2010 +0200

    FTS parser: added unit tests
    
     * Tests for normalization issues
     * Tests for unaccenting issues
     * Tests for casefolding issues
     * Tests for output number of words issues

 tests/libtracker-fts/Makefile.am           |    7 +-
 tests/libtracker-fts/tracker-parser-test.c |  290 ++++++++++++++++++++++++++++
 2 files changed, 296 insertions(+), 1 deletions(-)
---
diff --git a/tests/libtracker-fts/Makefile.am b/tests/libtracker-fts/Makefile.am
index 6367b4d..ea5c17e 100644
--- a/tests/libtracker-fts/Makefile.am
+++ b/tests/libtracker-fts/Makefile.am
@@ -10,7 +10,8 @@ noinst_PROGRAMS = 							\
 
 
 TEST_PROGS += 								\
-	tracker-fts-test
+	tracker-fts-test 						\
+	tracker-parser-test
 
 INCLUDES = 								\
 	-DTRACKER_COMPILATION						\
@@ -43,6 +44,10 @@ tracker_fts_test_SOURCES = tracker-fts-test.c
 
 tracker_fts_test_LDADD = $(common_ldadd)
 
+tracker_parser_test_SOURCES = tracker-parser-test.c
+
+tracker_parser_test_LDADD = $(common_ldadd)
+
 tracker_parser_SOURCES = tracker-parser.c
 
 tracker_parser_LDADD = $(common_ldadd)
diff --git a/tests/libtracker-fts/tracker-parser-test.c b/tests/libtracker-fts/tracker-parser-test.c
new file mode 100644
index 0000000..820173f
--- /dev/null
+++ b/tests/libtracker-fts/tracker-parser-test.c
@@ -0,0 +1,290 @@
+/*
+ * Copyright (C) 2010, Nokia <ivan frade nokia com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ */
+
+#include "config.h"
+
+#include <string.h>
+
+#include <glib.h>
+#include <gio/gio.h>
+
+#include <libtracker-fts/tracker-parser.h>
+
+/* -------------- COMMON FOR ALL TESTS ----------------- */
+
+/* Fixture object type */
+typedef struct {
+	/* The parser object */
+	TrackerParser    *parser;
+
+	/* Default parser configuration to use */
+	gint              max_word_length;
+	gboolean          delimit_words;
+	gboolean          enable_stemmer;
+	gboolean          enable_stop_words;
+	gboolean          skip_reserved_words;
+	gboolean          skip_numbers;
+} TrackerParserTestFixture;
+
+/* Common setup for all tests */
+static void
+test_common_setup (TrackerParserTestFixture *fixture,
+                   gconstpointer data)
+{
+	TrackerLanguage  *language;
+
+	/* Setup language for parser */
+	language = tracker_language_new (NULL);
+	if (!language) {
+		g_critical ("Language setup failed!");
+		return;
+	}
+
+	/* Default conf parameters */
+	fixture->max_word_length = 50;
+	fixture->delimit_words = TRUE;
+	fixture->enable_stemmer = TRUE;
+	fixture->enable_stop_words = TRUE;
+	fixture->skip_reserved_words = TRUE;
+	fixture->skip_numbers = TRUE;
+
+	/* Create the parser */
+	fixture->parser = tracker_parser_new (language,
+	                                      fixture->max_word_length);
+	if (!fixture->parser) {
+		g_critical ("Parser creation failed!");
+		return;
+	}
+
+	g_object_unref (language);
+}
+
+/* Common teardown for all tests */
+static void
+test_common_teardown (TrackerParserTestFixture *fixture,
+                      gconstpointer data)
+{
+	if (fixture->parser) {
+		tracker_parser_free (fixture->parser);
+	}
+}
+
+/* -------------- EXPECTED NUMBER OF WORDS TESTS ----------------- */
+
+/* Test struct for the expected-nwords tests */
+typedef struct TestDataExpectedNWords TestDataExpectedNWords;
+struct TestDataExpectedNWords {
+	const gchar *str;
+	gboolean     skip_numbers;
+	guint        expected_nwords;
+};
+
+/* Common expected_word test method */
+static void
+expected_nwords_check (TrackerParserTestFixture *fixture,
+                       gconstpointer data)
+{
+	const TestDataExpectedNWords *testdata = data;
+	const gchar *word;
+	gint position;
+	gint byte_offset_start;
+	gint byte_offset_end;
+	gboolean stop_word;
+	gint word_length;
+	guint nwords = 0;
+
+	/* Reset the parser with the test string */
+	tracker_parser_reset (fixture->parser,
+	                      testdata->str,
+	                      strlen (testdata->str),
+	                      fixture->delimit_words,
+	                      fixture->enable_stemmer,
+	                      fixture->enable_stop_words,
+	                      fixture->skip_reserved_words,
+	                      testdata->skip_numbers);
+
+	/* Count number of output words */
+	while ((word = tracker_parser_next (fixture->parser,
+	                                    &position,
+	                                    &byte_offset_start,
+	                                    &byte_offset_end,
+	                                    &stop_word,
+	                                    &word_length))) {
+		nwords++;
+	}
+
+	/* Check if input is same as expected */
+	g_assert_cmpuint (nwords, == , testdata->expected_nwords);
+}
+
+/* -------------- EXPECTED WORD TESTS ----------------- */
+
+/* Test struct for the expected-word tests */
+typedef struct TestDataExpectedWord TestDataExpectedWord;
+struct TestDataExpectedWord {
+	const gchar  *str;
+	const gchar  *expected;
+};
+
+/* Common expected_word test method */
+static void
+expected_word_check (TrackerParserTestFixture *fixture,
+                     gconstpointer data)
+{
+	const TestDataExpectedWord *testdata = data;
+	const gchar *word;
+	gint position;
+	gint byte_offset_start;
+	gint byte_offset_end;
+	gboolean stop_word;
+	gint word_length;
+
+	/* Reset the parser with our string */
+	tracker_parser_reset (fixture->parser,
+	                      testdata->str,
+	                      strlen (testdata->str),
+	                      fixture->delimit_words,
+	                      FALSE, /* no stemming for this test */
+	                      fixture->enable_stop_words,
+	                      fixture->skip_reserved_words,
+	                      fixture->skip_numbers);
+
+	/* Process next word */
+	word = tracker_parser_next (fixture->parser,
+	                            &position,
+	                            &byte_offset_start,
+	                            &byte_offset_end,
+	                            &stop_word,
+	                            &word_length);
+
+	/* Check if input is same as expected */
+	g_assert_cmpstr (word, == , testdata->expected);
+}
+
+/* -------------- LIST OF TESTS ----------------- */
+
+/* Normalization-related tests */
+static const TestDataExpectedWord test_data_normalization[] = {
+	{ "école",                "ecole" },
+	{ "Ã?COLE",                "ecole" },
+	{ "Ã?cole",                "ecole" },
+	{ "e" "\xCC\x81" "cole",  "ecole" },
+	{ "E" "\xCC\x81" "COLE",  "ecole" },
+	{ "E" "\xCC\x81" "cole",  "ecole" },
+	{ NULL,                   NULL    }
+};
+
+/* Unaccenting-related tests */
+static const TestDataExpectedWord test_data_unaccent[] = {
+	{ "Murciélago", "murcielago" },
+	{ "camión",     "camion"     },
+	{ "desagüe",    "desague"    },
+	{ NULL,         NULL         }
+};
+
+/* Casefolding-related tests */
+static const TestDataExpectedWord test_data_casefolding[] = {
+	{ "gross", "gross" },
+	{ "GROSS", "gross" },
+	{ "GrOsS", "gross" },
+	{ "groÃ?",  "gross" },
+	{ NULL,    NULL    }
+};
+
+/* Number of expected words tests */
+static const TestDataExpectedNWords test_data_nwords[] = {
+	{ "The quick (\"brown\") fox canâ??t jump 32.3 feet, right?", TRUE,   8 },
+	{ "The quick (\"brown\") fox canâ??t jump 32.3 feet, right?", FALSE,  9 },
+	{ "ã??ã?¢ã?»ã?µã??ã?¨ã?³ã?¹",                                            TRUE,   2 }, /* katakana */
+	{ "æ?¬å·?æ??主æµ?ç??é£?å?³",                                          TRUE,   8 }, /* chinese */
+	{ "Ð?меÑ?иканÑ?кие Ñ?Ñ?да наÑ?одÑ?Ñ?Ñ?Ñ? в междÑ?наÑ?однÑ?Ñ? водаÑ?.",     TRUE,   6 }, /* russian */
+	{ "Bần ch� là m�t anh nghèo xác",                           TRUE,   7 }, /* vietnamese */
+	{ "ã??ã?¢ã?»ã?µã??ã?¨ã?³ã?¹ æ?¬å·?æ??主æµ?ç??é£?å?³ katakana, chinese, english",   TRUE,  13 }, /* mixed */
+	{ NULL,                                                     FALSE,  0 }
+};
+
+int
+main (int argc, char **argv)
+{
+	gint i;
+
+	g_type_init ();
+	if (!g_thread_supported ()) {
+		g_thread_init (NULL);
+	}
+	g_test_init (&argc, &argv, NULL);
+
+	/* Add normalization checks */
+	for (i = 0; test_data_normalization[i].str != NULL; i++) {
+		gchar *testpath;
+
+		testpath = g_strdup_printf ("/libtracker-fts/parser/normalization_%d", i);
+		g_test_add (testpath,
+		            TrackerParserTestFixture,
+		            &test_data_normalization[i],
+		            test_common_setup,
+		            expected_word_check,
+		            test_common_teardown);
+		g_free (testpath);
+	}
+
+	/* Add unaccent checks */
+	for (i = 0; test_data_unaccent[i].str != NULL; i++) {
+		gchar *testpath;
+
+		testpath = g_strdup_printf ("/libtracker-fts/parser/unaccent_%d", i);
+		g_test_add (testpath,
+		            TrackerParserTestFixture,
+		            &test_data_unaccent[i],
+		            test_common_setup,
+		            expected_word_check,
+		            test_common_teardown);
+		g_free (testpath);
+	}
+
+	/* Add casefolding checks */
+	for (i = 0; test_data_casefolding[i].str != NULL; i++) {
+		gchar *testpath;
+
+		testpath = g_strdup_printf ("/libtracker-fts/parser/casefolding_%d", i);
+		g_test_add (testpath,
+		            TrackerParserTestFixture,
+		            &test_data_casefolding[i],
+		            test_common_setup,
+		            expected_word_check,
+		            test_common_teardown);
+		g_free (testpath);
+	}
+
+	/* Add expected number of words checks */
+	for (i = 0; test_data_nwords[i].str != NULL; i++) {
+		gchar *testpath;
+
+		testpath = g_strdup_printf ("/libtracker-fts/parser/nwords_%d", i);
+		g_test_add (testpath,
+		            TrackerParserTestFixture,
+		            &test_data_nwords[i],
+		            test_common_setup,
+		            expected_nwords_check,
+		            test_common_teardown);
+		g_free (testpath);
+	}
+
+	return g_test_run ();
+}



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]