[tracker/fts-limits] Limit the word length in exact match queries but not in prefix ones.
- From: Mikael Ottela <mottela src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker/fts-limits] Limit the word length in exact match queries but not in prefix ones.
- Date: Thu, 25 Feb 2010 09:00:20 +0000 (UTC)
commit 5a06f7f131ffb8b28b432335242ea54298966c0a
Author: Mikael Ottela <mikael ottela ixonos com>
Date: Tue Feb 23 23:46:01 2010 +0200
Limit the word length in exact match queries but not in prefix ones.
configure.ac | 8 +++--
src/libtracker-fts/tracker-fts.c | 43 ++++++++++++-----------
src/libtracker-fts/tracker-parser.c | 17 ++-------
src/libtracker-fts/tracker-parser.h | 9 +++--
tests/libtracker-fts/Makefile.am | 4 ++
tests/libtracker-fts/limits/Makefile.am | 8 ++++
tests/libtracker-fts/limits/fts3limits-1.out | 5 +++
tests/libtracker-fts/limits/fts3limits-1.rq | 1 +
tests/libtracker-fts/limits/fts3limits-2.rq | 1 +
tests/libtracker-fts/limits/fts3limits-3.out | 7 ++++
tests/libtracker-fts/limits/fts3limits-3.rq | 1 +
tests/libtracker-fts/limits/fts3limits-4.out | 1 +
tests/libtracker-fts/limits/fts3limits-4.rq | 1 +
tests/libtracker-fts/limits/fts3limits-data.rq | 13 +++++++
tests/libtracker-fts/tracker-fts-test.c | 1 +
15 files changed, 79 insertions(+), 41 deletions(-)
---
diff --git a/configure.ac b/configure.ac
index 065a37a..79b2252 100644
--- a/configure.ac
+++ b/configure.ac
@@ -945,7 +945,7 @@ fi
if test "x$enable_tracker_search_bar" = "xyes"; then
if test "x$have_tracker_search_bar" != "xyes"; then
- AC_MSG_ERROR([Couldn't find tracker-search-bar dependencies ($APP_REQUIREMENTS $APPLET_REQUIREMENTS).])
+ AC_MSG_ERROR([Couldn't find tracker-search-bar dependencies ($APP_REQUIREMENTS $APPLET_REQUIREMENTS).])
fi
fi
@@ -973,7 +973,7 @@ fi
if test "x$enable_tracker_search_tool" = "xyes"; then
if test "x$have_tracker_search_tool" != "xyes"; then
- AC_MSG_ERROR([Couldn't find tracker-search-tool dependencies ($APP_REQUIREMENTS $VALA_REQUIREMENTS).])
+ AC_MSG_ERROR([Couldn't find tracker-search-tool dependencies ($APP_REQUIREMENTS $VALA_REQUIREMENTS).])
fi
fi
@@ -1546,7 +1546,7 @@ fi
AM_CONDITIONAL(HAVE_GTK_DOC, test "$enable_gtk_doc" = "yes")
AM_CONDITIONAL(HAVE_GRAPHVIZ_FDP, test -n "$GRAPHVIZ_FDP")
-
+
##################################################################
# Check for older tracker project files which can cause problems
##################################################################
@@ -1674,6 +1674,8 @@ AC_CONFIG_FILES([
tests/libtracker-data/turtle/Makefile
tests/libtracker-db/Makefile
tests/libtracker-fts/Makefile
+ tests/libtracker-fts/limits/Makefile
+ tests/libtracker-fts/prefix/Makefile
tests/functional-tests/Makefile
tests/functional-tests/data/Makefile
tests/functional-tests/data/Music/Makefile
diff --git a/src/libtracker-fts/tracker-fts.c b/src/libtracker-fts/tracker-fts.c
index d52fae7..76083be 100644
--- a/src/libtracker-fts/tracker-fts.c
+++ b/src/libtracker-fts/tracker-fts.c
@@ -2323,6 +2323,7 @@ struct fulltext_vtab {
TrackerParser *parser; /* tokenizer for inserts and queries */
gboolean stop_words;
int max_words;
+ int min_word_length;
/* Precompiled statements which we keep as long as the table is
** open.
@@ -3358,8 +3359,8 @@ static int constructVtab(
max_len = tracker_fts_config_get_max_word_length (config);
v->max_words = tracker_fts_config_get_max_words_to_index (config);
-
- v->parser = tracker_parser_new (language, max_len, min_len);
+ v->min_word_length = min_len;
+ v->parser = tracker_parser_new (language, max_len);
/* disable stop words if TRACKER_FTS_STOP_WORDS is set to 0 - used by tests */
v->stop_words = g_strcmp0 (g_getenv ("TRACKER_FTS_STOP_WORDS"), "0") != 0;
@@ -3629,7 +3630,7 @@ static void snippetOffsetsOfColumn(
pVtab = pQuery->pFts;
nColumn = pVtab->nColumn;
- tracker_parser_reset (pVtab->parser, zDoc, nDoc, FALSE, TRUE, pVtab->stop_words, FALSE, FALSE);
+ tracker_parser_reset (pVtab->parser, zDoc, nDoc, FALSE, TRUE, pVtab->stop_words, FALSE);
aTerm = pQuery->pTerms;
nTerm = pQuery->nTerms;
@@ -4324,7 +4325,7 @@ static int tokenizeSegment(
int iCol;
int nTerm = 1;
- tracker_parser_reset (parser, pSegment, nSegment, FALSE, TRUE, v->stop_words, TRUE, FALSE);
+ tracker_parser_reset (parser, pSegment, nSegment, FALSE, TRUE, v->stop_words, TRUE);
while( 1 ){
const char *pToken;
@@ -4332,10 +4333,10 @@ static int tokenizeSegment(
pToken = tracker_parser_next (parser, &iPos,
- &iBegin,
- &iEnd,
- &stop_word,
- &nToken);
+ &iBegin,
+ &iEnd,
+ &stop_word,
+ &nToken);
if (!pToken) {
break;
}
@@ -4363,10 +4364,10 @@ static int tokenizeSegment(
continue;
}
if( !inPhrase && pQuery->nTerms>0 && !pQuery->nextIsOr && nToken==4
- && pToken[0]=='n'
- && pToken[1]=='e'
- && pToken[2]=='a'
- && pToken[3]=='r'
+ && pToken[0]=='n'
+ && pToken[1]=='e'
+ && pToken[2]=='a'
+ && pToken[3]=='r'
){
QueryTerm *pTerm = &pQuery->pTerms[pQuery->nTerms-1];
if( (iBegin+6)<nSegment
@@ -4380,10 +4381,10 @@ static int tokenizeSegment(
iEnd++;
}
pToken = tracker_parser_next (parser, &iPos,
- &iBegin,
- &iEnd,
- &stop_word,
- &nToken);
+ &iBegin,
+ &iEnd,
+ &stop_word,
+ &nToken);
if (!pToken) {
break;
}
@@ -4769,7 +4770,7 @@ int Catid,
if (!zText) return SQLITE_OK;
- tracker_parser_reset (parser, zText, strlen (zText), FALSE, TRUE, v->stop_words, FALSE, limit_word_length);
+ tracker_parser_reset (parser, zText, strlen (zText), FALSE, TRUE, v->stop_words, FALSE);
while( 1 ){
@@ -4782,16 +4783,16 @@ int Catid,
break;
}
+ if (limit_word_length && nTokenBytes < v->min_word_length) {
+ continue;
+ }
+
// printf("token being indexed is %s, begin is %d, end is %d and length is %d\n", pToken, iStartOffset, iEndOffset, nTokenBytes);
if (stop_word) {
continue;
}
-
-
-
-
/* Positions can't be negative; we use -1 as a terminator
* internally. Token can't be NULL or empty. */
if( iPosition<0 || pToken == NULL || nTokenBytes == 0 ){
diff --git a/src/libtracker-fts/tracker-parser.c b/src/libtracker-fts/tracker-parser.c
index fb71ed9..84eda5a 100644
--- a/src/libtracker-fts/tracker-parser.c
+++ b/src/libtracker-fts/tracker-parser.c
@@ -79,10 +79,8 @@ struct TrackerParser {
gboolean enable_stop_words;
guint max_words_to_index;
guint max_word_length;
- guint min_word_length;
gboolean delimit_words;
gboolean parse_reserved_words;
- gboolean limit_word_length;
/* Private members */
gchar *word;
@@ -324,8 +322,6 @@ parser_next (TrackerParser *parser,
}
if (!is_valid ||
- (parser->limit_word_length &&
- length < parser->min_word_length) ||
word_type == TRACKER_PARSER_WORD_NUM) {
word_type = TRACKER_PARSER_WORD_IGNORE;
is_valid = TRUE;
@@ -364,7 +360,7 @@ parser_next (TrackerParser *parser,
}
}
- if (parser->limit_word_length && length >= parser->max_word_length) {
+ if (length >= parser->max_word_length) {
continue;
}
@@ -462,21 +458,18 @@ parser_next (TrackerParser *parser,
TrackerParser *
tracker_parser_new (TrackerLanguage *language,
- gint max_word_length,
- gint min_word_length)
+ gint max_word_length)
{
TrackerParser *parser;
g_return_val_if_fail (TRACKER_IS_LANGUAGE (language), NULL);
- g_return_val_if_fail (min_word_length > 0, NULL);
- g_return_val_if_fail (min_word_length < max_word_length, NULL);
+ g_return_val_if_fail (max_word_length > 0, NULL);
parser = g_new0 (TrackerParser, 1);
parser->language = g_object_ref (language);
parser->max_word_length = max_word_length;
- parser->min_word_length = min_word_length;
parser->word_length = 0;
parser->attrs = NULL;
@@ -506,8 +499,7 @@ tracker_parser_reset (TrackerParser *parser,
gboolean delimit_words,
gboolean enable_stemmer,
gboolean enable_stop_words,
- gboolean parse_reserved_words,
- gboolean limit_word_length)
+ gboolean parse_reserved_words)
{
g_return_if_fail (parser != NULL);
g_return_if_fail (txt != NULL);
@@ -522,7 +514,6 @@ tracker_parser_reset (TrackerParser *parser,
parser->txt_size = txt_size;
parser->txt = txt;
parser->parse_reserved_words = parse_reserved_words;
- parser->limit_word_length = limit_word_length;
g_free (parser->word);
parser->word = NULL;
diff --git a/src/libtracker-fts/tracker-parser.h b/src/libtracker-fts/tracker-parser.h
index e16123d..f6503ac 100644
--- a/src/libtracker-fts/tracker-parser.h
+++ b/src/libtracker-fts/tracker-parser.h
@@ -30,22 +30,23 @@ G_BEGIN_DECLS
typedef struct TrackerParser TrackerParser;
TrackerParser *tracker_parser_new (TrackerLanguage *language,
- gint max_word_length,
- gint min_word_length);
+ gint max_word_length);
+
void tracker_parser_reset (TrackerParser *parser,
const gchar *txt,
gint txt_size,
gboolean delimit_words,
gboolean enable_stemmer,
gboolean enable_stop_words,
- gboolean parse_reserved_words,
- gboolean limit_word_length);
+ gboolean parse_reserved_words);
+
const gchar * tracker_parser_next (TrackerParser *parser,
gint *position,
gint *byte_offset_start,
gint *byte_offset_end,
gboolean *stop_word,
gint *word_length);
+
gchar * tracker_parser_process_word (TrackerParser *parser,
const char *word,
gint length,
diff --git a/tests/libtracker-fts/Makefile.am b/tests/libtracker-fts/Makefile.am
index 9fa6c92..ac8ec9c 100644
--- a/tests/libtracker-fts/Makefile.am
+++ b/tests/libtracker-fts/Makefile.am
@@ -1,5 +1,9 @@
include $(top_srcdir)/Makefile.decl
+SUBDIRS = \
+ limits \
+ prefix
+
noinst_PROGRAMS = $(TEST_PROGS)
TEST_PROGS += \
diff --git a/tests/libtracker-fts/limits/Makefile.am b/tests/libtracker-fts/limits/Makefile.am
new file mode 100644
index 0000000..0e6788b
--- /dev/null
+++ b/tests/libtracker-fts/limits/Makefile.am
@@ -0,0 +1,8 @@
+include $(top_srcdir)/Makefile.decl
+
+EXTRA_DIST = \
+ fts3limits-data.rq \
+ fts3limits-1.rq \
+ fts3limits-2.rq \
+ fts3limits-3.rq \
+ fts3limits-4.rq
diff --git a/tests/libtracker-fts/limits/fts3limits-1.out b/tests/libtracker-fts/limits/fts3limits-1.out
new file mode 100644
index 0000000..85d8792
--- /dev/null
+++ b/tests/libtracker-fts/limits/fts3limits-1.out
@@ -0,0 +1,5 @@
+"http://www.example.org/test#3"
+"http://www.example.org/test#4"
+"http://www.example.org/test#5"
+"http://www.example.org/test#6"
+"http://www.example.org/test#8"
diff --git a/tests/libtracker-fts/limits/fts3limits-1.rq b/tests/libtracker-fts/limits/fts3limits-1.rq
new file mode 100644
index 0000000..7127fa2
--- /dev/null
+++ b/tests/libtracker-fts/limits/fts3limits-1.rq
@@ -0,0 +1 @@
+SELECT ?o WHERE { ?o fts:match "tr*" }
diff --git a/tests/libtracker-fts/limits/fts3limits-2.out b/tests/libtracker-fts/limits/fts3limits-2.out
new file mode 100644
index 0000000..e69de29
diff --git a/tests/libtracker-fts/limits/fts3limits-2.rq b/tests/libtracker-fts/limits/fts3limits-2.rq
new file mode 100644
index 0000000..edd0348
--- /dev/null
+++ b/tests/libtracker-fts/limits/fts3limits-2.rq
@@ -0,0 +1 @@
+SELECT ?o WHERE { ?o fts:match "tr" }
diff --git a/tests/libtracker-fts/limits/fts3limits-3.out b/tests/libtracker-fts/limits/fts3limits-3.out
new file mode 100644
index 0000000..f18d2cd
--- /dev/null
+++ b/tests/libtracker-fts/limits/fts3limits-3.out
@@ -0,0 +1,7 @@
+"http://www.example.org/test#2"
+"http://www.example.org/test#3"
+"http://www.example.org/test#4"
+"http://www.example.org/test#5"
+"http://www.example.org/test#6"
+"http://www.example.org/test#8"
+"http://www.example.org/test#9"
diff --git a/tests/libtracker-fts/limits/fts3limits-3.rq b/tests/libtracker-fts/limits/fts3limits-3.rq
new file mode 100644
index 0000000..48f43fa
--- /dev/null
+++ b/tests/libtracker-fts/limits/fts3limits-3.rq
@@ -0,0 +1 @@
+SELECT ?o WHERE { ?o fts:match "pr*" }
diff --git a/tests/libtracker-fts/limits/fts3limits-4.out b/tests/libtracker-fts/limits/fts3limits-4.out
new file mode 100644
index 0000000..cde9bca
--- /dev/null
+++ b/tests/libtracker-fts/limits/fts3limits-4.out
@@ -0,0 +1 @@
+"http://www.example.org/test#2"
diff --git a/tests/libtracker-fts/limits/fts3limits-4.rq b/tests/libtracker-fts/limits/fts3limits-4.rq
new file mode 100644
index 0000000..31cbc50
--- /dev/null
+++ b/tests/libtracker-fts/limits/fts3limits-4.rq
@@ -0,0 +1 @@
+SELECT ?o WHERE { ?o fts:match "pr" }
diff --git a/tests/libtracker-fts/limits/fts3limits-data.rq b/tests/libtracker-fts/limits/fts3limits-data.rq
new file mode 100644
index 0000000..2290ed5
--- /dev/null
+++ b/tests/libtracker-fts/limits/fts3limits-data.rq
@@ -0,0 +1,13 @@
+INSERT {
+ test:1 a test:A ; test:p "t" ; test:o "p" .
+ test:2 a test:A ; test:p "tr" ; test:o "pr" .
+ test:3 a test:A ; test:p "tra" ; test:o "pra" .
+ test:4 a test:A ; test:p "tracker test" ; test:o "pracker pest" .
+ test:5 a test:A ; test:p "tracking tester" ; test:o "pracking pester" .
+ test:6 a test:A ; test:p "trash trash more trash" ; test:o "prash prash more prash" .
+ test:7 a test:A ; test:p "racker ester" ; test:o "racker ester" .
+ test:8 a test:A ; test:p "TeStiNg TraCkEr" ; test:o "PeStiNg PraCkEr" .
+ test:9 a test:A ; test:p "Prefix search with content" ; test:o "Search with content" .
+ test:10 a test:A ; test:p "...and a one bit more here" ; test:o "...and a one bit more here" .
+}
+
diff --git a/tests/libtracker-fts/tracker-fts-test.c b/tests/libtracker-fts/tracker-fts-test.c
index c03c36d..dc1cc77 100644
--- a/tests/libtracker-fts/tracker-fts-test.c
+++ b/tests/libtracker-fts/tracker-fts-test.c
@@ -44,6 +44,7 @@ const TestInfo tests[] = {
{ "fts3aa", 2 },
{ "fts3ae", 1 },
{ "prefix/fts3prefix", 3 },
+ { "limits/fts3limits", 4 },
{ NULL }
};
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]