[tracker] Fixes GB#619535: MaxWords configuration in FTS config is not used



commit c8247450504d770bb4bd83773375ba130070fbfd
Author: Aleksander Morgado <aleksander lanedo com>
Date:   Thu May 27 14:32:58 2010 +0200

    Fixes GB#619535: MaxWords configuration in FTS config is not used
    
     * Parsing loop now halts if configured max_words is reached.

 src/libtracker-fts/tracker-fts.c |   35 +++++++++++++++++++++++++++++------
 1 files changed, 29 insertions(+), 6 deletions(-)
---
diff --git a/src/libtracker-fts/tracker-fts.c b/src/libtracker-fts/tracker-fts.c
index ddfb293..3df2437 100644
--- a/src/libtracker-fts/tracker-fts.c
+++ b/src/libtracker-fts/tracker-fts.c
@@ -3677,10 +3677,14 @@ static void snippetOffsetsOfColumn(
   unsigned int iRotor = 0;             /* Index of current token */
   int iRotorBegin[FTS3_ROTOR_SZ];      /* Beginning offset of token */
   int iRotorLen[FTS3_ROTOR_SZ];        /* Length of token */
+  int nWords;
 
   pVtab = pQuery->pFts;
   nColumn = pVtab->nColumn;
 
+  FTSTRACE (("FTS parsing started for Snippets, limiting '%d' bytes to '%d' words",
+             nDoc, pVtab->max_words));
+
   tracker_parser_reset (pVtab->parser,
                         zDoc,
                         nDoc,
@@ -3699,8 +3703,8 @@ static void snippetOffsetsOfColumn(
   }
 
   prevMatch = 0;
-
-  while(1){
+  nWords = 0;
+  while(nWords < pVtab->max_words){
 //    rc = pTModule->xNext(pTCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos);
 
 
@@ -3717,6 +3721,7 @@ static void snippetOffsetsOfColumn(
       continue;
     }
 
+    nWords++;
 
     iRotorBegin[iRotor&FTS3_ROTOR_MASK] = iBegin;
     iRotorLen[iRotor&FTS3_ROTOR_MASK] = iEnd-iBegin;
@@ -4385,6 +4390,10 @@ static int tokenizeSegment(
   TrackerParser *parser = v->parser;
   int firstIndex = pQuery->nTerms;
   int nTerm = 1;
+  int nWords;
+
+  FTSTRACE (("FTS parsing started for Segments, limiting '%d' bytes to '%d' words",
+             nSegment, v->max_words));
 
   tracker_parser_reset (parser,
                         pSegment,
@@ -4396,7 +4405,8 @@ static int tokenizeSegment(
                         FALSE,
                         v->ignore_numbers);
 
-  while( 1 ){
+  nWords = 0;
+  while(nWords < v->max_words){
     const char *pToken;
     int nToken, iBegin, iEnd, iPos, stop_word;
 
@@ -4410,6 +4420,8 @@ static int tokenizeSegment(
       break;
      }
 
+    nWords ++;
+
 //   printf("token being indexed  is %s, pos is %d, begin is %d, end is %d and length is %d\n", pToken, iPos, iBegin, iEnd, nToken);
 
 #if 0
@@ -4844,20 +4856,29 @@ int Catid,
   TrackerParser *parser = v->parser;
   DLCollector *p;
   int nData;			 /* Size of doclist before our update. */
+  gint nText;
+  gint nWords;
 
   if (!zText) return SQLITE_OK;
 
+  nText = strlen (zText);
+
+  if (!nText) return SQLITE_OK;
+
+  FTSTRACE (("FTS parsing started for Terms, limiting '%d' bytes to '%d' words",
+             nText, v->max_words));
+
   tracker_parser_reset (parser,
                         zText,
-                        strlen (zText),
+                        nText,
                         v->max_word_length,
                         v->enable_stemmer,
                         v->enable_unaccent,
                         v->ignore_stop_words,
                         TRUE,
                         v->ignore_numbers);
-
-  while( 1 ){
+  nWords = 0;
+  while(nWords < v->max_words){
 
     pToken = tracker_parser_next (parser, &iPosition,
 				  &iStartOffset,
@@ -4872,6 +4893,8 @@ int Catid,
 	continue;
    }
 
+   nWords++;
+
   // printf("token being indexed  is %s, begin is %d, end is %d and length is %d\n", pToken, iStartOffset, iEndOffset, nTokenBytes);
 
    if (v->ignore_stop_words && stop_word) {



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]