[tracker/rss-enclosures] Fixes GB#619535: MaxWords configuration in FTS config is not used
- From: Roberto Guido <rguido src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker/rss-enclosures] Fixes GB#619535: MaxWords configuration in FTS config is not used
- Date: Sat, 26 Jun 2010 23:12:13 +0000 (UTC)
commit 2701c0ff02b30339c69b09dfb244be20f4285352
Author: Aleksander Morgado <aleksander lanedo com>
Date: Thu May 27 14:32:58 2010 +0200
Fixes GB#619535: MaxWords configuration in FTS config is not used
* Parsing loop now halts if configured max_words is reached.
src/libtracker-fts/tracker-fts.c | 35 +++++++++++++++++++++++++++++------
1 files changed, 29 insertions(+), 6 deletions(-)
---
diff --git a/src/libtracker-fts/tracker-fts.c b/src/libtracker-fts/tracker-fts.c
index ddfb293..3df2437 100644
--- a/src/libtracker-fts/tracker-fts.c
+++ b/src/libtracker-fts/tracker-fts.c
@@ -3677,10 +3677,14 @@ static void snippetOffsetsOfColumn(
unsigned int iRotor = 0; /* Index of current token */
int iRotorBegin[FTS3_ROTOR_SZ]; /* Beginning offset of token */
int iRotorLen[FTS3_ROTOR_SZ]; /* Length of token */
+ int nWords;
pVtab = pQuery->pFts;
nColumn = pVtab->nColumn;
+ FTSTRACE (("FTS parsing started for Snippets, limiting '%d' bytes to '%d' words",
+ nDoc, pVtab->max_words));
+
tracker_parser_reset (pVtab->parser,
zDoc,
nDoc,
@@ -3699,8 +3703,8 @@ static void snippetOffsetsOfColumn(
}
prevMatch = 0;
-
- while(1){
+ nWords = 0;
+ while(nWords < pVtab->max_words){
// rc = pTModule->xNext(pTCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos);
@@ -3717,6 +3721,7 @@ static void snippetOffsetsOfColumn(
continue;
}
+ nWords++;
iRotorBegin[iRotor&FTS3_ROTOR_MASK] = iBegin;
iRotorLen[iRotor&FTS3_ROTOR_MASK] = iEnd-iBegin;
@@ -4385,6 +4390,10 @@ static int tokenizeSegment(
TrackerParser *parser = v->parser;
int firstIndex = pQuery->nTerms;
int nTerm = 1;
+ int nWords;
+
+ FTSTRACE (("FTS parsing started for Segments, limiting '%d' bytes to '%d' words",
+ nSegment, v->max_words));
tracker_parser_reset (parser,
pSegment,
@@ -4396,7 +4405,8 @@ static int tokenizeSegment(
FALSE,
v->ignore_numbers);
- while( 1 ){
+ nWords = 0;
+ while(nWords < v->max_words){
const char *pToken;
int nToken, iBegin, iEnd, iPos, stop_word;
@@ -4410,6 +4420,8 @@ static int tokenizeSegment(
break;
}
+ nWords ++;
+
// printf("token being indexed is %s, pos is %d, begin is %d, end is %d and length is %d\n", pToken, iPos, iBegin, iEnd, nToken);
#if 0
@@ -4844,20 +4856,29 @@ int Catid,
TrackerParser *parser = v->parser;
DLCollector *p;
int nData; /* Size of doclist before our update. */
+ gint nText;
+ gint nWords;
if (!zText) return SQLITE_OK;
+ nText = strlen (zText);
+
+ if (!nText) return SQLITE_OK;
+
+ FTSTRACE (("FTS parsing started for Terms, limiting '%d' bytes to '%d' words",
+ nText, v->max_words));
+
tracker_parser_reset (parser,
zText,
- strlen (zText),
+ nText,
v->max_word_length,
v->enable_stemmer,
v->enable_unaccent,
v->ignore_stop_words,
TRUE,
v->ignore_numbers);
-
- while( 1 ){
+ nWords = 0;
+ while(nWords < v->max_words){
pToken = tracker_parser_next (parser, &iPosition,
&iStartOffset,
@@ -4872,6 +4893,8 @@ int Catid,
continue;
}
+ nWords++;
+
// printf("token being indexed is %s, begin is %d, end is %d and length is %d\n", pToken, iStartOffset, iEndOffset, nTokenBytes);
if (v->ignore_stop_words && stop_word) {
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]