[tracker/fts-limits] Limit the word length in exact match queries but not in prefix ones.



commit 5a06f7f131ffb8b28b432335242ea54298966c0a
Author: Mikael Ottela <mikael ottela ixonos com>
Date:   Tue Feb 23 23:46:01 2010 +0200

    Limit the word length in exact match queries but not in prefix ones.

 configure.ac                                   |    8 +++--
 src/libtracker-fts/tracker-fts.c               |   43 ++++++++++++-----------
 src/libtracker-fts/tracker-parser.c            |   17 ++-------
 src/libtracker-fts/tracker-parser.h            |    9 +++--
 tests/libtracker-fts/Makefile.am               |    4 ++
 tests/libtracker-fts/limits/Makefile.am        |    8 ++++
 tests/libtracker-fts/limits/fts3limits-1.out   |    5 +++
 tests/libtracker-fts/limits/fts3limits-1.rq    |    1 +
 tests/libtracker-fts/limits/fts3limits-2.rq    |    1 +
 tests/libtracker-fts/limits/fts3limits-3.out   |    7 ++++
 tests/libtracker-fts/limits/fts3limits-3.rq    |    1 +
 tests/libtracker-fts/limits/fts3limits-4.out   |    1 +
 tests/libtracker-fts/limits/fts3limits-4.rq    |    1 +
 tests/libtracker-fts/limits/fts3limits-data.rq |   13 +++++++
 tests/libtracker-fts/tracker-fts-test.c        |    1 +
 15 files changed, 79 insertions(+), 41 deletions(-)
---
diff --git a/configure.ac b/configure.ac
index 065a37a..79b2252 100644
--- a/configure.ac
+++ b/configure.ac
@@ -945,7 +945,7 @@ fi
 
 if test "x$enable_tracker_search_bar" = "xyes"; then
    if test "x$have_tracker_search_bar" != "xyes"; then
-      AC_MSG_ERROR([Couldn't find tracker-search-bar dependencies ($APP_REQUIREMENTS $APPLET_REQUIREMENTS).])   
+      AC_MSG_ERROR([Couldn't find tracker-search-bar dependencies ($APP_REQUIREMENTS $APPLET_REQUIREMENTS).])
    fi
 fi
 
@@ -973,7 +973,7 @@ fi
 
 if test "x$enable_tracker_search_tool" = "xyes"; then
    if test "x$have_tracker_search_tool" != "xyes"; then
-      AC_MSG_ERROR([Couldn't find tracker-search-tool dependencies ($APP_REQUIREMENTS $VALA_REQUIREMENTS).])   
+      AC_MSG_ERROR([Couldn't find tracker-search-tool dependencies ($APP_REQUIREMENTS $VALA_REQUIREMENTS).])
    fi
 fi
 
@@ -1546,7 +1546,7 @@ fi
 
 AM_CONDITIONAL(HAVE_GTK_DOC, test "$enable_gtk_doc" = "yes")
 AM_CONDITIONAL(HAVE_GRAPHVIZ_FDP, test -n "$GRAPHVIZ_FDP")
-  
+
 ##################################################################
 # Check for older tracker project files which can cause problems
 ##################################################################
@@ -1674,6 +1674,8 @@ AC_CONFIG_FILES([
 	tests/libtracker-data/turtle/Makefile
 	tests/libtracker-db/Makefile
 	tests/libtracker-fts/Makefile
+	tests/libtracker-fts/limits/Makefile
+	tests/libtracker-fts/prefix/Makefile
 	tests/functional-tests/Makefile
 	tests/functional-tests/data/Makefile
 	tests/functional-tests/data/Music/Makefile
diff --git a/src/libtracker-fts/tracker-fts.c b/src/libtracker-fts/tracker-fts.c
index d52fae7..76083be 100644
--- a/src/libtracker-fts/tracker-fts.c
+++ b/src/libtracker-fts/tracker-fts.c
@@ -2323,6 +2323,7 @@ struct fulltext_vtab {
   TrackerParser *parser;	   /* tokenizer for inserts and queries */
   gboolean stop_words;
   int max_words;
+  int min_word_length;
 
   /* Precompiled statements which we keep as long as the table is
   ** open.
@@ -3358,8 +3359,8 @@ static int constructVtab(
   max_len = tracker_fts_config_get_max_word_length (config);
 
   v->max_words = tracker_fts_config_get_max_words_to_index (config);
-
-  v->parser = tracker_parser_new (language, max_len, min_len);
+  v->min_word_length = min_len;
+  v->parser = tracker_parser_new (language, max_len);
 
   /* disable stop words if TRACKER_FTS_STOP_WORDS is set to 0 - used by tests */
   v->stop_words = g_strcmp0 (g_getenv ("TRACKER_FTS_STOP_WORDS"), "0") != 0;
@@ -3629,7 +3630,7 @@ static void snippetOffsetsOfColumn(
   pVtab = pQuery->pFts;
   nColumn = pVtab->nColumn;
 
-  tracker_parser_reset (pVtab->parser, zDoc, nDoc, FALSE, TRUE, pVtab->stop_words, FALSE, FALSE);
+  tracker_parser_reset (pVtab->parser, zDoc, nDoc, FALSE, TRUE, pVtab->stop_words, FALSE);
 
   aTerm = pQuery->pTerms;
   nTerm = pQuery->nTerms;
@@ -4324,7 +4325,7 @@ static int tokenizeSegment(
   int iCol;
   int nTerm = 1;
 
-  tracker_parser_reset (parser, pSegment, nSegment, FALSE, TRUE, v->stop_words, TRUE, FALSE);
+  tracker_parser_reset (parser, pSegment, nSegment, FALSE, TRUE, v->stop_words, TRUE);
 
   while( 1 ){
     const char *pToken;
@@ -4332,10 +4333,10 @@ static int tokenizeSegment(
 
 
     pToken = tracker_parser_next (parser, &iPos,
-				     &iBegin,
-				     &iEnd,
-				     &stop_word,
-				     &nToken);
+				  &iBegin,
+				  &iEnd,
+				  &stop_word,
+				  &nToken);
     if (!pToken) {
       break;
      }
@@ -4363,10 +4364,10 @@ static int tokenizeSegment(
       continue;
     }
     if( !inPhrase && pQuery->nTerms>0 && !pQuery->nextIsOr && nToken==4
-      && pToken[0]=='n'
-      && pToken[1]=='e'
-      && pToken[2]=='a'
-      && pToken[3]=='r'
+	&& pToken[0]=='n'
+	&& pToken[1]=='e'
+	&& pToken[2]=='a'
+	&& pToken[3]=='r'
     ){
       QueryTerm *pTerm = &pQuery->pTerms[pQuery->nTerms-1];
       if( (iBegin+6)<nSegment
@@ -4380,10 +4381,10 @@ static int tokenizeSegment(
 	  iEnd++;
 	}
 	pToken = tracker_parser_next (parser, &iPos,
-				     &iBegin,
-				     &iEnd,
-				     &stop_word,
-				     &nToken);
+				      &iBegin,
+				      &iEnd,
+				      &stop_word,
+				      &nToken);
 	if (!pToken) {
 	  break;
 	}
@@ -4769,7 +4770,7 @@ int Catid,
 
   if (!zText) return SQLITE_OK;
 
-  tracker_parser_reset (parser, zText, strlen (zText), FALSE, TRUE, v->stop_words, FALSE, limit_word_length);
+  tracker_parser_reset (parser, zText, strlen (zText), FALSE, TRUE, v->stop_words, FALSE);
 
   while( 1 ){
 
@@ -4782,16 +4783,16 @@ int Catid,
 	break;
    }
 
+   if (limit_word_length && nTokenBytes < v->min_word_length) {
+	continue;
+   }
+
   // printf("token being indexed  is %s, begin is %d, end is %d and length is %d\n", pToken, iStartOffset, iEndOffset, nTokenBytes);
 
    if (stop_word) {
 	continue;
    }
 
-
-
-
-
     /* Positions can't be negative; we use -1 as a terminator
      * internally.  Token can't be NULL or empty. */
     if( iPosition<0 || pToken == NULL || nTokenBytes == 0 ){
diff --git a/src/libtracker-fts/tracker-parser.c b/src/libtracker-fts/tracker-parser.c
index fb71ed9..84eda5a 100644
--- a/src/libtracker-fts/tracker-parser.c
+++ b/src/libtracker-fts/tracker-parser.c
@@ -79,10 +79,8 @@ struct TrackerParser {
 	gboolean               enable_stop_words;
 	guint                  max_words_to_index;
 	guint                  max_word_length;
-	guint                  min_word_length;
 	gboolean               delimit_words;
 	gboolean               parse_reserved_words;
-	gboolean               limit_word_length;
 
 	/* Private members */
 	gchar                   *word;
@@ -324,8 +322,6 @@ parser_next (TrackerParser *parser,
 				}
 
 				if (!is_valid ||
-				    (parser->limit_word_length &&
-				     length < parser->min_word_length) ||
 				    word_type == TRACKER_PARSER_WORD_NUM) {
 					word_type = TRACKER_PARSER_WORD_IGNORE;
 					is_valid = TRUE;
@@ -364,7 +360,7 @@ parser_next (TrackerParser *parser,
 			}
 		}
 
-		if (parser->limit_word_length && length >= parser->max_word_length) {
+		if (length >= parser->max_word_length) {
 			continue;
 		}
 
@@ -462,21 +458,18 @@ parser_next (TrackerParser *parser,
 
 TrackerParser *
 tracker_parser_new (TrackerLanguage *language,
-                    gint             max_word_length,
-                    gint             min_word_length)
+                    gint             max_word_length)
 {
 	TrackerParser *parser;
 
 	g_return_val_if_fail (TRACKER_IS_LANGUAGE (language), NULL);
-	g_return_val_if_fail (min_word_length > 0, NULL);
-	g_return_val_if_fail (min_word_length < max_word_length, NULL);
+	g_return_val_if_fail (max_word_length > 0, NULL);
 
 	parser = g_new0 (TrackerParser, 1);
 
 	parser->language = g_object_ref (language);
 
 	parser->max_word_length = max_word_length;
-	parser->min_word_length = min_word_length;
 	parser->word_length = 0;
 	parser->attrs = NULL;
 
@@ -506,8 +499,7 @@ tracker_parser_reset (TrackerParser *parser,
                       gboolean       delimit_words,
                       gboolean       enable_stemmer,
                       gboolean       enable_stop_words,
-                      gboolean       parse_reserved_words,
-		      gboolean       limit_word_length)
+                      gboolean       parse_reserved_words)
 {
 	g_return_if_fail (parser != NULL);
 	g_return_if_fail (txt != NULL);
@@ -522,7 +514,6 @@ tracker_parser_reset (TrackerParser *parser,
 	parser->txt_size = txt_size;
 	parser->txt = txt;
 	parser->parse_reserved_words = parse_reserved_words;
-	parser->limit_word_length = limit_word_length;
 
 	g_free (parser->word);
 	parser->word = NULL;
diff --git a/src/libtracker-fts/tracker-parser.h b/src/libtracker-fts/tracker-parser.h
index e16123d..f6503ac 100644
--- a/src/libtracker-fts/tracker-parser.h
+++ b/src/libtracker-fts/tracker-parser.h
@@ -30,22 +30,23 @@ G_BEGIN_DECLS
 typedef struct TrackerParser TrackerParser;
 
 TrackerParser *tracker_parser_new             (TrackerLanguage *language,
-                                               gint             max_word_length,
-                                               gint             min_word_length);
+                                               gint             max_word_length);
+
 void           tracker_parser_reset           (TrackerParser   *parser,
                                                const gchar     *txt,
                                                gint             txt_size,
                                                gboolean                 delimit_words,
                                                gboolean                 enable_stemmer,
                                                gboolean                 enable_stop_words,
-                                               gboolean                 parse_reserved_words,
-					       gboolean                 limit_word_length);
+                                               gboolean                 parse_reserved_words);
+
 const gchar *  tracker_parser_next            (TrackerParser   *parser,
                                                gint            *position,
                                                gint            *byte_offset_start,
                                                gint            *byte_offset_end,
                                                gboolean        *stop_word,
                                                gint            *word_length);
+
 gchar *        tracker_parser_process_word    (TrackerParser   *parser,
                                                const char      *word,
                                                gint             length,
diff --git a/tests/libtracker-fts/Makefile.am b/tests/libtracker-fts/Makefile.am
index 9fa6c92..ac8ec9c 100644
--- a/tests/libtracker-fts/Makefile.am
+++ b/tests/libtracker-fts/Makefile.am
@@ -1,5 +1,9 @@
 include $(top_srcdir)/Makefile.decl
 
+SUBDIRS = 			\
+	limits			\
+	prefix
+
 noinst_PROGRAMS = $(TEST_PROGS)
 
 TEST_PROGS += 								\
diff --git a/tests/libtracker-fts/limits/Makefile.am b/tests/libtracker-fts/limits/Makefile.am
new file mode 100644
index 0000000..0e6788b
--- /dev/null
+++ b/tests/libtracker-fts/limits/Makefile.am
@@ -0,0 +1,8 @@
+include $(top_srcdir)/Makefile.decl
+
+EXTRA_DIST = 				\
+	fts3limits-data.rq		\
+	fts3limits-1.rq			\
+	fts3limits-2.rq			\
+	fts3limits-3.rq			\
+	fts3limits-4.rq
diff --git a/tests/libtracker-fts/limits/fts3limits-1.out b/tests/libtracker-fts/limits/fts3limits-1.out
new file mode 100644
index 0000000..85d8792
--- /dev/null
+++ b/tests/libtracker-fts/limits/fts3limits-1.out
@@ -0,0 +1,5 @@
+"http://www.example.org/test#3";
+"http://www.example.org/test#4";
+"http://www.example.org/test#5";
+"http://www.example.org/test#6";
+"http://www.example.org/test#8";
diff --git a/tests/libtracker-fts/limits/fts3limits-1.rq b/tests/libtracker-fts/limits/fts3limits-1.rq
new file mode 100644
index 0000000..7127fa2
--- /dev/null
+++ b/tests/libtracker-fts/limits/fts3limits-1.rq
@@ -0,0 +1 @@
+SELECT ?o WHERE { ?o fts:match "tr*" }
diff --git a/tests/libtracker-fts/limits/fts3limits-2.out b/tests/libtracker-fts/limits/fts3limits-2.out
new file mode 100644
index 0000000..e69de29
diff --git a/tests/libtracker-fts/limits/fts3limits-2.rq b/tests/libtracker-fts/limits/fts3limits-2.rq
new file mode 100644
index 0000000..edd0348
--- /dev/null
+++ b/tests/libtracker-fts/limits/fts3limits-2.rq
@@ -0,0 +1 @@
+SELECT ?o WHERE { ?o fts:match "tr" }
diff --git a/tests/libtracker-fts/limits/fts3limits-3.out b/tests/libtracker-fts/limits/fts3limits-3.out
new file mode 100644
index 0000000..f18d2cd
--- /dev/null
+++ b/tests/libtracker-fts/limits/fts3limits-3.out
@@ -0,0 +1,7 @@
+"http://www.example.org/test#2";
+"http://www.example.org/test#3";
+"http://www.example.org/test#4";
+"http://www.example.org/test#5";
+"http://www.example.org/test#6";
+"http://www.example.org/test#8";
+"http://www.example.org/test#9";
diff --git a/tests/libtracker-fts/limits/fts3limits-3.rq b/tests/libtracker-fts/limits/fts3limits-3.rq
new file mode 100644
index 0000000..48f43fa
--- /dev/null
+++ b/tests/libtracker-fts/limits/fts3limits-3.rq
@@ -0,0 +1 @@
+SELECT ?o WHERE { ?o fts:match "pr*" }
diff --git a/tests/libtracker-fts/limits/fts3limits-4.out b/tests/libtracker-fts/limits/fts3limits-4.out
new file mode 100644
index 0000000..cde9bca
--- /dev/null
+++ b/tests/libtracker-fts/limits/fts3limits-4.out
@@ -0,0 +1 @@
+"http://www.example.org/test#2";
diff --git a/tests/libtracker-fts/limits/fts3limits-4.rq b/tests/libtracker-fts/limits/fts3limits-4.rq
new file mode 100644
index 0000000..31cbc50
--- /dev/null
+++ b/tests/libtracker-fts/limits/fts3limits-4.rq
@@ -0,0 +1 @@
+SELECT ?o WHERE { ?o fts:match "pr" }
diff --git a/tests/libtracker-fts/limits/fts3limits-data.rq b/tests/libtracker-fts/limits/fts3limits-data.rq
new file mode 100644
index 0000000..2290ed5
--- /dev/null
+++ b/tests/libtracker-fts/limits/fts3limits-data.rq
@@ -0,0 +1,13 @@
+INSERT {
+	test:1 a test:A ; test:p "t"                            ; test:o "p" .
+	test:2 a test:A ; test:p "tr"                           ; test:o "pr" .
+	test:3 a test:A ; test:p "tra"                          ; test:o "pra" .
+	test:4 a test:A ; test:p "tracker test"                 ; test:o "pracker pest" .
+	test:5 a test:A ; test:p "tracking tester"              ; test:o "pracking pester" .
+	test:6 a test:A ; test:p "trash trash more trash"       ; test:o "prash prash more prash" .
+	test:7 a test:A ; test:p "racker ester"                 ; test:o "racker ester" .
+	test:8 a test:A ; test:p "TeStiNg TraCkEr"              ; test:o "PeStiNg PraCkEr" .
+	test:9 a test:A ; test:p "Prefix search with content"   ; test:o "Search with content" .
+	test:10 a test:A ; test:p "...and a one bit more here"  ; test:o "...and a one bit more here" .
+}
+
diff --git a/tests/libtracker-fts/tracker-fts-test.c b/tests/libtracker-fts/tracker-fts-test.c
index c03c36d..dc1cc77 100644
--- a/tests/libtracker-fts/tracker-fts-test.c
+++ b/tests/libtracker-fts/tracker-fts-test.c
@@ -44,6 +44,7 @@ const TestInfo tests[] = {
 	{ "fts3aa", 2 },
 	{ "fts3ae", 1 },
 	{ "prefix/fts3prefix", 3 },
+	{ "limits/fts3limits", 4 },
 	{ NULL }
 };
 



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]