tracker r2373 - in trunk: . src/libtracker-common src/tracker-fts tests/tracker-fts



Author: jamiemcc
Date: Wed Oct 15 03:56:12 2008
New Revision: 2373
URL: http://svn.gnome.org/viewvc/tracker?rev=2373&view=rev

Log:
2008-10-14 Jamie McCracken <jamiemcc at gnome org>

	* tests/tracker-fts/tracker-fts-test.c:
	* src/libtracker-common/tracker-ontology.c
        * src/libtracker-common/tracker-ontology.h
	* src/tracker-fts/tracker-fts.c
        * src/tracker-fts/Makefile.am

	Updated FTS code with metadata and onto support functions
	Added category to be stored in index (similar to qdbm)
	Modified handling of postion data so that all searches uses it
	Improve speed of generating snippets+offsets using position data



Modified:
   trunk/ChangeLog
   trunk/src/libtracker-common/tracker-ontology.c
   trunk/src/libtracker-common/tracker-ontology.h
   trunk/src/tracker-fts/Makefile.am
   trunk/src/tracker-fts/tracker-fts.c
   trunk/tests/tracker-fts/tracker-fts-test.c

Modified: trunk/src/libtracker-common/tracker-ontology.c
==============================================================================
--- trunk/src/libtracker-common/tracker-ontology.c	(original)
+++ trunk/src/libtracker-common/tracker-ontology.c	Wed Oct 15 03:56:12 2008
@@ -798,3 +798,18 @@
 
 	return FALSE;
 }
+
+const gchar *
+tracker_ontology_get_field_name_by_id (gint id)
+{
+	TrackerField *field;
+
+	field = tracker_ontology_get_field_by_id (id);
+
+	if (field) {
+		return tracker_field_get_name (field);
+	}
+	
+	return NULL;
+}
+

Modified: trunk/src/libtracker-common/tracker-ontology.h
==============================================================================
--- trunk/src/libtracker-common/tracker-ontology.h	(original)
+++ trunk/src/libtracker-common/tracker-ontology.h	Wed Oct 15 03:56:12 2008
@@ -62,6 +62,7 @@
 								 const gchar	*meta_name);
 gboolean	tracker_ontology_service_get_show_directories	(const gchar	*service_str);
 gboolean	tracker_ontology_service_get_show_files		(const gchar	*service_str);
+const gchar *	tracker_ontology_get_field_name_by_id 	        (gint id);
 
 /* Field mechanics */
 void		tracker_ontology_field_add			(TrackerField	*field);

Modified: trunk/src/tracker-fts/Makefile.am
==============================================================================
--- trunk/src/tracker-fts/Makefile.am	(original)
+++ trunk/src/tracker-fts/Makefile.am	Wed Oct 15 03:56:12 2008
@@ -8,8 +8,10 @@
 	-I$(top_srcdir)/src						\
 	$(GLIB2_CFLAGS)							\
 	$(SQLITE3_CFLAGS)						\
-	$(PANGO_CFLAGS)
+	$(PANGO_CFLAGS)							\
+	-DSTORE_CATEGORY=1			
 
+	
 tracker_fts_modules_LTLIBRARIES = tracker-fts.la
 		
 
@@ -21,7 +23,7 @@
 tracker_fts_la_LDFLAGS = $(module_flags)
 
 tracker_fts_la_LIBADD =							\
-	$(top_builddir)/src/libtracker-common/libtracker-common.la 	\
+	$(top_builddir)/src/libtracker-db/libtracker-db.la              \
 	$(SQLITE3_LIBS)							\
 	$(GTHREAD_LIBS)							\
 	$(PANGO_LIBS)							\

Modified: trunk/src/tracker-fts/tracker-fts.c
==============================================================================
--- trunk/src/tracker-fts/tracker-fts.c	(original)
+++ trunk/src/tracker-fts/tracker-fts.c	Wed Oct 15 03:56:12 2008
@@ -285,11 +285,12 @@
 #include <ctype.h>
 #include <sqlite3ext.h>
 
-#include <libtracker-common/tracker-config.h>
-#include <libtracker-common/tracker-language.h>
-#include <libtracker-common/tracker-parser.h>
+#define TRACKER_ENABLE_INTERNALS
+#include <libtracker-common/tracker-common.h>
 
 
+#include <libtracker-db/tracker-db-manager.h>
+
 #include "tracker-fts.h"
 #include "tracker-fts-hash.h"
 
@@ -316,6 +317,95 @@
 # define FTSTRACE(A)
 #endif
 
+
+/*  functions needed from tracker */
+
+static TrackerDBResultSet *
+db_metadata_get (TrackerDBInterface *iface, 
+		 const gchar        *id, 
+		 const gchar        *key)
+{
+	TrackerField *def;
+	const gchar  *proc = NULL;
+
+	g_return_val_if_fail (TRACKER_IS_DB_INTERFACE (iface), NULL);
+	g_return_val_if_fail (id, NULL);
+	g_return_val_if_fail (key, NULL);
+
+	def = tracker_ontology_get_field_by_name (key);
+	
+	if (!def) {
+		g_warning ("Metadata not found for id:'%s' and type:'%s'", id, key);
+		return NULL;
+	}
+
+	switch (tracker_field_get_data_type (def)) {
+	case TRACKER_FIELD_TYPE_INDEX:
+	case TRACKER_FIELD_TYPE_STRING:
+	case TRACKER_FIELD_TYPE_DOUBLE:
+		proc = "GetMetadata";
+		break;
+
+	case TRACKER_FIELD_TYPE_INTEGER:
+	case TRACKER_FIELD_TYPE_DATE:
+		proc = "GetMetadataNumeric";
+		break;
+
+	case TRACKER_FIELD_TYPE_FULLTEXT:
+		proc = "GetContents";
+		break;
+
+	case TRACKER_FIELD_TYPE_KEYWORD:
+		proc = "GetMetadataKeyword";
+		break;
+		
+	default:
+		g_warning ("Metadata could not be retrieved as type:%d is not supported", 
+			   tracker_field_get_data_type (def)); 
+		return NULL;
+	}
+
+	return tracker_db_interface_execute_procedure (iface,
+						       NULL, 
+				     		       proc, 
+				     		       id, 
+				     		       tracker_field_get_id (def),
+				     		       NULL);
+}
+
+
+
+static gchar *
+db_get_text (const char     *service,
+	     const char     *key,    
+	     const char     *id) 
+{
+	TrackerDBInterface *iface;
+	gchar              *contents = NULL;
+	TrackerDBResultSet *result_set;
+	
+	if (strcmp (key, "File:Contents") == 0) {
+		iface = tracker_db_manager_get_db_interface_by_type (service,
+								     TRACKER_DB_CONTENT_TYPE_CONTENTS);
+	} else {
+		iface = tracker_db_manager_get_db_interface_by_type (service,
+								     TRACKER_DB_CONTENT_TYPE_METADATA);
+	}
+
+	result_set = db_metadata_get (iface, id, key);
+
+	if (result_set) {
+		tracker_db_result_set_get (result_set, 0, &contents, -1);
+		g_object_unref (result_set);
+	}
+
+	return contents;
+}
+
+
+
+
+
 /*
 ** Default span for NEAR operators.
 */
@@ -348,9 +438,11 @@
 
 
 typedef enum DocListType {
-  DL_DOCIDS,		  /* docids only */
-  DL_POSITIONS,		  /* docids + positions */
-  DL_POSITIONS_OFFSETS	  /* docids + positions + offsets */
+
+  DL_DOCIDS,              /* docids only */
+  DL_POSITIONS,           /* docids + positions */
+  DL_POSITIONS_RANK,      /* docids + catid + rank + positions */
+  DL_POSITIONS_OFFSETS    /* docids + positions + offsets */
 } DocListType;
 
 /*
@@ -603,6 +695,12 @@
   int nData;
 
   sqlite_int64 iDocid;
+  
+#ifdef STORE_CATEGORY
+  int Catid;
+#endif
+  
+  
   int nElement;
 } DLReader;
 
@@ -614,6 +712,14 @@
   assert( !dlrAtEnd(pReader) );
   return pReader->iDocid;
 }
+
+#ifdef STORE_CATEGORY
+static int dlrCatid(DLReader *pReader){
+  //assert( !dlrAtEnd(pReader) );
+  return pReader->Catid;
+}
+#endif
+
 static const char *dlrDocData(DLReader *pReader){
   assert( !dlrAtEnd(pReader) );
   return pReader->pData;
@@ -633,12 +739,24 @@
 static const char *dlrPosData(DLReader *pReader){
   sqlite_int64 iDummy;
   int n = fts3GetVarint(pReader->pData, &iDummy);
+  
+#ifdef STORE_CATEGORY    
+  int Catid;
+  n += fts3GetVarint32(pReader->pData+n, &Catid);
+#endif
+  
   assert( !dlrAtEnd(pReader) );
   return pReader->pData+n;
 }
 static int dlrPosDataLen(DLReader *pReader){
   sqlite_int64 iDummy;
   int n = fts3GetVarint(pReader->pData, &iDummy);
+  
+#ifdef STORE_CATEGORY    
+  int Catid;
+  n += fts3GetVarint32(pReader->pData+n, &Catid);
+#endif
+  
   assert( !dlrAtEnd(pReader) );
   return pReader->nElement-n;
 }
@@ -653,8 +771,16 @@
   /* If there is more data, read the next doclist element. */
   if( pReader->nData!=0 ){
     sqlite_int64 iDocidDelta;
+    
     int iDummy, n = fts3GetVarint(pReader->pData, &iDocidDelta);
     pReader->iDocid += iDocidDelta;
+
+#ifdef STORE_CATEGORY    
+    int Catid;
+    n += fts3GetVarint32(pReader->pData+n, &Catid);
+    pReader->Catid = Catid;
+#endif
+    
     if( pReader->iType>=DL_POSITIONS ){
       assert( n<pReader->nData );
       while( 1 ){
@@ -684,6 +810,10 @@
   pReader->nElement = 0;
   pReader->iDocid = 0;
 
+#ifdef STORE_CATEGORY    
+  pReader->Catid = 0;
+#endif
+
   /* Load the first element's data.  There must be a first element. */
   dlrStep(pReader);
 }
@@ -706,6 +836,12 @@
     sqlite_int64 iDocidDelta;
     int n = fts3GetVarint(pData, &iDocidDelta);
     iPrevDocid += iDocidDelta;
+    
+#ifdef STORE_CATEGORY    
+    int Catid;
+    n += fts3GetVarint32(pData+n, &Catid);
+#endif
+
     if( iType>DL_DOCIDS ){
       int iDummy;
       while( 1 ){
@@ -812,6 +948,29 @@
   dlwAppend(pWriter, dlrDocData(pReader), dlrDocDataBytes(pReader),
 	    dlrDocid(pReader), dlrDocid(pReader));
 }
+
+
+#ifdef STORE_CATEGORY    
+static void dlwAdd(DLWriter *pWriter, sqlite_int64 iDocid, int Catid){
+  char c[VARINT_MAX];
+  int n = fts3PutVarint(c, iDocid-pWriter->iPrevDocid);
+
+  /* Docids must ascend. */
+  assert( !pWriter->has_iPrevDocid || iDocid>pWriter->iPrevDocid );
+  assert( pWriter->iType==DL_DOCIDS );
+
+  dataBufferAppend(pWriter->b, c, n);
+  pWriter->iPrevDocid = iDocid;
+  
+  n = fts3PutVarint(c, Catid);
+  dataBufferAppend(pWriter->b, c, n);
+  
+#ifndef NDEBUG
+  pWriter->has_iPrevDocid = 1;
+#endif
+}
+#else
+
 static void dlwAdd(DLWriter *pWriter, sqlite_int64 iDocid){
   char c[VARINT_MAX];
   int n = fts3PutVarint(c, iDocid-pWriter->iPrevDocid);
@@ -827,6 +986,8 @@
 #endif
 }
 
+#endif
+
 /*******************************************************************/
 /* PLReader is used to read data from a document's position list.  As
 ** the caller steps through the list, data is cached so that varints
@@ -923,6 +1084,7 @@
   SCRAMBLE(pReader);
 }
 
+
 /*******************************************************************/
 /* PLWriter is used in constructing a document's position list.  As a
 ** convenience, if iType is DL_DOCIDS, PLWriter becomes a no-op.
@@ -995,6 +1157,35 @@
   plwAdd(pWriter, plrColumn(pReader), plrPosition(pReader),
 	 plrStartOffset(pReader), plrEndOffset(pReader));
 }
+
+
+#ifdef STORE_CATEGORY
+static void plwInit(PLWriter *pWriter, DLWriter *dlw, sqlite_int64 iDocid, int Catid){
+  char c[VARINT_MAX];
+  int n;
+
+  pWriter->dlw = dlw;
+
+  /* Docids must ascend. */
+  assert( !pWriter->dlw->has_iPrevDocid || iDocid>pWriter->dlw->iPrevDocid );
+  n = fts3PutVarint(c, iDocid-pWriter->dlw->iPrevDocid);
+  dataBufferAppend(pWriter->dlw->b, c, n);
+  pWriter->dlw->iPrevDocid = iDocid;
+  
+  n = fts3PutVarint(c, Catid);
+  dataBufferAppend(pWriter->dlw->b, c, n);
+    
+#ifndef NDEBUG
+  pWriter->dlw->has_iPrevDocid = 1;
+#endif
+
+  pWriter->iColumn = 0;
+  pWriter->iPos = 0;
+  pWriter->iOffset = 0;
+}
+
+#else
+
 static void plwInit(PLWriter *pWriter, DLWriter *dlw, sqlite_int64 iDocid){
   char c[VARINT_MAX];
   int n;
@@ -1006,6 +1197,7 @@
   n = fts3PutVarint(c, iDocid-pWriter->dlw->iPrevDocid);
   dataBufferAppend(pWriter->dlw->b, c, n);
   pWriter->dlw->iPrevDocid = iDocid;
+    
 #ifndef NDEBUG
   pWriter->dlw->has_iPrevDocid = 1;
 #endif
@@ -1014,7 +1206,13 @@
   pWriter->iPos = 0;
   pWriter->iOffset = 0;
 }
-/* TODO(shess) Should plwDestroy() also terminate the doclist?	But
+
+#endif
+
+
+
+/* TODO(shess) Should plwDestroy() also terminate the doclist?  But
+
 ** then plwDestroy() would no longer be just a destructor, it would
 ** also be doing work, which isn't consistent with the overall idiom.
 ** Another option would be for plwAdd() to always append any necessary
@@ -1070,15 +1268,36 @@
     dataBufferAppend(b, pCollector->b.pData, pCollector->b.nData);
   }
 }
+
+static void dlcAddPos(DLCollector *pCollector, int iColumn, int iPos,
+                      int iStartOffset, int iEndOffset){
+  plwAdd(&pCollector->plw, iColumn, iPos, iStartOffset, iEndOffset);
+}
+
+
+#ifdef STORE_CATEGORY
+static void dlcNext(DLCollector *pCollector, sqlite_int64 iDocid, int Catid){
+  plwTerminate(&pCollector->plw);
+  plwDestroy(&pCollector->plw);
+  plwInit(&pCollector->plw, &pCollector->dlw, iDocid, Catid);
+}
+
+
+static DLCollector *dlcNew(sqlite_int64 iDocid, DocListType iType, int Catid){
+  DLCollector *pCollector = sqlite3_malloc(sizeof(DLCollector));
+  dataBufferInit(&pCollector->b, 0);
+  dlwInit(&pCollector->dlw, iType, &pCollector->b);
+  plwInit(&pCollector->plw, &pCollector->dlw, iDocid, Catid);
+  return pCollector;
+}
+
+#else
+
 static void dlcNext(DLCollector *pCollector, sqlite_int64 iDocid){
   plwTerminate(&pCollector->plw);
   plwDestroy(&pCollector->plw);
   plwInit(&pCollector->plw, &pCollector->dlw, iDocid);
 }
-static void dlcAddPos(DLCollector *pCollector, int iColumn, int iPos,
-		      int iStartOffset, int iEndOffset){
-  plwAdd(&pCollector->plw, iColumn, iPos, iStartOffset, iEndOffset);
-}
 
 static DLCollector *dlcNew(sqlite_int64 iDocid, DocListType iType){
   DLCollector *pCollector = sqlite3_malloc(sizeof(DLCollector));
@@ -1087,6 +1306,9 @@
   plwInit(&pCollector->plw, &pCollector->dlw, iDocid);
   return pCollector;
 }
+
+#endif
+
 static void dlcDelete(DLCollector *pCollector){
   plwDestroy(&pCollector->plw);
   dlwDestroy(&pCollector->dlw);
@@ -1125,12 +1347,18 @@
 
     while( !plrAtEnd(&plReader) ){
       if( iColumn==-1 || plrColumn(&plReader)==iColumn ){
-	if( !match ){
-	  plwInit(&plWriter, &dlWriter, dlrDocid(&dlReader));
-	  match = 1;
-	}
-	plwAdd(&plWriter, plrColumn(&plReader), plrPosition(&plReader),
-	       plrStartOffset(&plReader), plrEndOffset(&plReader));
+
+        if( !match ){
+        
+#ifdef STORE_CATEGORY        
+          plwInit(&plWriter, &dlWriter, dlrDocid(&dlReader), dlrCatid(&dlReader));
+#else
+          plwInit(&plWriter, &dlWriter, dlrDocid(&dlReader));
+#endif
+          match = 1;
+        }
+        plwAdd(&plWriter, plrColumn(&plReader), plrPosition(&plReader),
+               plrStartOffset(&plReader), plrEndOffset(&plReader));
       }
       plrStep(&plReader);
     }
@@ -1315,7 +1543,13 @@
 
   plrInit(&left, pLeft);
   plrInit(&right, pRight);
+  
+#ifdef STORE_CATEGORY        
+  plwInit(&writer, pOut, dlrDocid(pLeft), dlrCatid(pLeft));
+#else
   plwInit(&writer, pOut, dlrDocid(pLeft));
+#endif
+
 
   while( !plrAtEnd(&left) || !plrAtEnd(&right) ){
     int c = posListCmp(&left, &right);
@@ -1435,16 +1669,22 @@
       plrStep(&right);
     }else{
       if( (plrPosition(&right)-plrPosition(&left))<=(nNear+1) ){
-	if( !match ){
+
+        if( !match ){
+        
+#ifdef STORE_CATEGORY        
+	  plwInit(&writer, pOut, dlrDocid(pLeft), dlrCatid(pLeft));
+#else
 	  plwInit(&writer, pOut, dlrDocid(pLeft));
-	  match = 1;
-	}
-	if( !isSaveLeft ){
-	  plwAdd(&writer, plrColumn(&right), plrPosition(&right), 0, 0);
-	}else{
-	  plwAdd(&writer, plrColumn(&left), plrPosition(&left), 0, 0);
-	}
-	plrStep(&right);
+#endif
+          match = 1;
+        }
+        if( !isSaveLeft ){
+          plwAdd(&writer, plrColumn(&right), plrPosition(&right), 0, 0);
+        }else{
+          plwAdd(&writer, plrColumn(&left), plrPosition(&left), 0, 0);
+        }
+        plrStep(&right);
       }else{
 	plrStep(&left);
       }
@@ -1563,33 +1803,40 @@
 	  PLReader pr1 = {0};
 	  PLReader pr2 = {0};
 
-	  PLWriter plwriter;
-	  plwInit(&plwriter, &writer, dlrDocid(dlrAtEnd(&dr1)?&dr2:&dr1));
+          PLWriter plwriter;
+          
+            
+#ifdef STORE_CATEGORY        
+          plwInit(&plwriter, &writer, dlrDocid(dlrAtEnd(&dr1)?&dr2:&dr1), dlrCatid(dlrAtEnd(&dr1)?&dr2:&dr1));
+#else
+          plwInit(&plwriter, &writer, dlrDocid(dlrAtEnd(&dr1)?&dr2:&dr1));
+#endif
+
+          if( one.nData ) plrInit(&pr1, &dr1); 
+          if( two.nData ) plrInit(&pr2, &dr2);
+          while( !plrAtEnd(&pr1) || !plrAtEnd(&pr2) ){
+            int iCompare = plrCompare(&pr1, &pr2);
+            switch( iCompare ){
+              case -1:
+                plwCopy(&plwriter, &pr1);
+                plrStep(&pr1);
+                break;
+              case 1:
+                plwCopy(&plwriter, &pr2);
+                plrStep(&pr2);
+                break;
+              case 0:
+                plwCopy(&plwriter, &pr1);
+                plrStep(&pr1);
+                plrStep(&pr2);
+                break;
+            }
+          }
+          plwTerminate(&plwriter);
+        }
+        dataBufferDestroy(&one);
+        dataBufferDestroy(&two);
 
-	  if( one.nData ) plrInit(&pr1, &dr1);
-	  if( two.nData ) plrInit(&pr2, &dr2);
-	  while( !plrAtEnd(&pr1) || !plrAtEnd(&pr2) ){
-	    int iCompare = plrCompare(&pr1, &pr2);
-	    switch( iCompare ){
-	      case -1:
-		plwCopy(&plwriter, &pr1);
-		plrStep(&pr1);
-		break;
-	      case 1:
-		plwCopy(&plwriter, &pr2);
-		plrStep(&pr2);
-		break;
-	      case 0:
-		plwCopy(&plwriter, &pr1);
-		plrStep(&pr1);
-		plrStep(&pr2);
-		break;
-	    }
-	  }
-	  plwTerminate(&plwriter);
-	}
-	dataBufferDestroy(&one);
-	dataBufferDestroy(&two);
       }
       dlrStep(&left);
       dlrStep(&right);
@@ -1605,6 +1852,7 @@
 ** Write the intersection of these two doclists into pOut as a
 ** DL_DOCIDS doclist.
 */
+#ifdef STORE_CATEGORY
 static void docListAndMerge(
   const char *pLeft, int nLeft,
   const char *pRight, int nRight,
@@ -1615,6 +1863,58 @@
 
   if( nLeft==0 || nRight==0 ) return;
 
+
+  dlrInit(&left, DL_POSITIONS, pLeft, nLeft);
+  dlrInit(&right, DL_POSITIONS, pRight, nRight);
+  dlwInit(&writer, DL_POSITIONS, pOut);
+  
+ 
+
+  while( !dlrAtEnd(&left) && !dlrAtEnd(&right) ){
+    if(dlrDocid(&left)<dlrDocid(&right) ){
+      dlrStep(&left);
+    }else if( dlrDocid(&right)<dlrDocid(&left) ){
+      dlrStep(&right);
+    }else{
+      /* treat col 0 (Contents) as prevalent when it comes to deciding which position data to use */
+      PLReader plReader;
+      gboolean copied = FALSE;
+      plrInit(&plReader, &right);
+      if (!plrAtEnd(&plReader)) {
+	if (plrColumn(&plReader) == 0) {
+	  dlwCopy(&writer, &right);
+	  copied = TRUE;
+	}
+      } 
+      
+      plrDestroy (&plReader);
+      
+      if (!copied) {
+        dlwCopy(&writer, &left);
+      }
+
+      dlrStep(&left);
+      dlrStep(&right);
+    }
+  }
+
+  dlrDestroy(&left);
+  dlrDestroy(&right);
+  dlwDestroy(&writer);
+}  
+  
+#else
+static void docListAndMerge(
+  const char *pLeft, int nLeft,
+  const char *pRight, int nRight,
+  DataBuffer *pOut      /* Write the combined doclist here */
+){
+  DLReader left, right;
+  DLWriter writer;
+
+  if( nLeft==0 || nRight==0 ) return;
+
+
   dlrInit(&left, DL_DOCIDS, pLeft, nLeft);
   dlrInit(&right, DL_DOCIDS, pRight, nRight);
   dlwInit(&writer, DL_DOCIDS, pOut);
@@ -1625,7 +1925,9 @@
     }else if( dlrDocid(&right)<dlrDocid(&left) ){
       dlrStep(&right);
     }else{
+   
       dlwAdd(&writer, dlrDocid(&left));
+      
       dlrStep(&left);
       dlrStep(&right);
     }
@@ -1635,11 +1937,14 @@
   dlrDestroy(&right);
   dlwDestroy(&writer);
 }
+#endif
 
 /* We have two DL_DOCIDS doclists:  pLeft and pRight.
 ** Write the union of these two doclists into pOut as a
 ** DL_DOCIDS doclist.
 */
+
+#ifdef STORE_CATEGORY        
 static void docListOrMerge(
   const char *pLeft, int nLeft,
   const char *pRight, int nRight,
@@ -1657,6 +1962,76 @@
     return;
   }
 
+  dlrInit(&left, DL_POSITIONS, pLeft, nLeft);
+  dlrInit(&right, DL_POSITIONS, pRight, nRight);
+  dlwInit(&writer, DL_POSITIONS, pOut);
+
+  while( !dlrAtEnd(&left) || !dlrAtEnd(&right) ){
+    if( dlrAtEnd(&right) ){
+      dlwCopy (&writer, &left);
+      dlrStep(&left);
+    }else if( dlrAtEnd(&left) ){
+      
+      dlwCopy (&writer, &right);
+      dlrStep(&right);
+    }else if( dlrDocid(&left)<dlrDocid(&right) ){
+
+      dlwCopy (&writer, &left);
+
+      dlrStep(&left);
+    }else if( dlrDocid(&right)<dlrDocid(&left) ){
+
+      dlwCopy (&writer, &right);
+
+      dlrStep(&right);
+    }else{
+
+      /* treat col 0 (Contents) as prevalent when it comes to deciding which position data to use */
+      PLReader plReader;
+      gboolean copied = FALSE;
+      plrInit(&plReader, &right);
+      if (!plrAtEnd(&plReader)) {
+	if (plrColumn(&plReader) == 0) {
+	  dlwCopy(&writer, &right);
+	  copied = TRUE;
+	}
+      } 
+      
+      plrDestroy (&plReader);
+      
+      if (!copied) {
+        dlwCopy(&writer, &left);
+      }
+
+      dlrStep(&left);
+      dlrStep(&right);
+    }
+  }
+
+  dlrDestroy(&left);
+  dlrDestroy(&right);
+  dlwDestroy(&writer);
+}
+
+#else
+
+static void docListOrMerge(
+  const char *pLeft, int nLeft,
+  const char *pRight, int nRight,
+  DataBuffer *pOut      /* Write the combined doclist here */
+){
+  DLReader left, right;
+  DLWriter writer;
+
+  if( nLeft==0 ){
+    if( nRight!=0 ) dataBufferAppend(pOut, pRight, nRight);
+    return;
+  }
+  if( nRight==0 ){
+    dataBufferAppend(pOut, pLeft, nLeft);
+    return;
+  }
+
   dlrInit(&left, DL_DOCIDS, pLeft, nLeft);
   dlrInit(&right, DL_DOCIDS, pRight, nRight);
   dlwInit(&writer, DL_DOCIDS, pOut);
@@ -1664,18 +2039,32 @@
   while( !dlrAtEnd(&left) || !dlrAtEnd(&right) ){
     if( dlrAtEnd(&right) ){
       dlwAdd(&writer, dlrDocid(&left));
+      
+
       dlrStep(&left);
     }else if( dlrAtEnd(&left) ){
+      
       dlwAdd(&writer, dlrDocid(&right));
+      
       dlrStep(&right);
     }else if( dlrDocid(&left)<dlrDocid(&right) ){
+
       dlwAdd(&writer, dlrDocid(&left));
+
+
       dlrStep(&left);
     }else if( dlrDocid(&right)<dlrDocid(&left) ){
+
       dlwAdd(&writer, dlrDocid(&right));
+
+      
+
       dlrStep(&right);
     }else{
+
       dlwAdd(&writer, dlrDocid(&left));
+
+
       dlrStep(&left);
       dlrStep(&right);
     }
@@ -1685,11 +2074,13 @@
   dlrDestroy(&right);
   dlwDestroy(&writer);
 }
+#endif
 
 /* We have two DL_DOCIDS doclists:  pLeft and pRight.
 ** Write into pOut as DL_DOCIDS doclist containing all documents that
 ** occur in pLeft but not in pRight.
 */
+#ifdef STORE_CATEGORY   
 static void docListExceptMerge(
   const char *pLeft, int nLeft,
   const char *pRight, int nRight,
@@ -1704,6 +2095,39 @@
     return;
   }
 
+  dlrInit(&left, DL_POSITIONS, pLeft, nLeft);
+  dlrInit(&right, DL_POSITIONS, pRight, nRight);
+  dlwInit(&writer, DL_POSITIONS, pOut);
+
+  while( !dlrAtEnd(&left) ){
+    while( !dlrAtEnd(&right) && dlrDocid(&right)<dlrDocid(&left) ){
+      dlrStep(&right);
+    }
+    if( dlrAtEnd(&right) || dlrDocid(&left)<dlrDocid(&right) ){
+      dlwCopy (&writer, &left);
+    }
+    dlrStep(&left);
+  }
+
+  dlrDestroy(&left);
+  dlrDestroy(&right);
+  dlwDestroy(&writer);
+}
+#else
+static void docListExceptMerge(
+  const char *pLeft, int nLeft,
+  const char *pRight, int nRight,
+  DataBuffer *pOut      /* Write the combined doclist here */
+){
+  DLReader left, right;
+  DLWriter writer;
+
+  if( nLeft==0 ) return;
+  if( nRight==0 ){
+    dataBufferAppend(pOut, pLeft, nLeft);
+    return;
+  }
+
   dlrInit(&left, DL_DOCIDS, pLeft, nLeft);
   dlrInit(&right, DL_DOCIDS, pRight, nRight);
   dlwInit(&writer, DL_DOCIDS, pOut);
@@ -1713,6 +2137,7 @@
       dlrStep(&right);
     }
     if( dlrAtEnd(&right) || dlrDocid(&left)<dlrDocid(&right) ){
+      
       dlwAdd(&writer, dlrDocid(&left));
     }
     dlrStep(&left);
@@ -1722,6 +2147,7 @@
   dlrDestroy(&right);
   dlwDestroy(&writer);
 }
+#endif
 
 static char *string_dup_n(const char *s, int n){
   char *str = sqlite3_malloc(n + 1);
@@ -1891,13 +2317,14 @@
   int nMatch;	  /* Total number of matches */
   int nAlloc;	  /* Space allocated for aMatch[] */
   struct snippetMatch { /* One entry for each matching term */
-    char snStatus;	 /* Status flag for use while constructing snippets */
-    short int iCol;	 /* The column that contains the match */
-    short int iTerm;	 /* The index in Query.pTerms[] of the matching term */
-    int iToken;		 /* The index of the matching document token */
-    short int nByte;	 /* Number of bytes in the term */
-    int iStart;		 /* The offset to the first character of the term */
-  } *aMatch;	  /* Points to space obtained from malloc */
+    char snStatus;       /* Status flag for use while constructing snippets */
+    short int iCol;      /* The column that contains the match */
+    short int iTerm;     /* The index in Query.pTerms[] of the matching term */
+    int iToken;          /* The index of the matching document token */
+    short int nByte;     /* Number of bytes in the term */
+    int iStart;          /* The offset to the first character of the term */
+    int rank;		 /* the rank of the snippet */
+  } *aMatch;      /* Points to space obtained from malloc */
   char *zOffset;  /* Text rendering of aMatch[] */
   int nOffset;	  /* strlen(zOffset) */
   char *zSnippet; /* Snippet text */
@@ -2039,6 +2466,8 @@
   int iColumn;			   /* Column being searched */
   DataBuffer result;		   /* Doclist results from fulltextQuery */
   DLReader reader;		   /* Result reader if result not empty */
+  sqlite_int64 currentDocid;
+  int currentCatid;
 } fulltext_cursor;
 
 static struct fulltext_vtab *cursor_vtab(fulltext_cursor *c){
@@ -3251,6 +3680,7 @@
   pMatch->nByte = nByte;
 }
 
+
 /*
 ** Sizing information for the circular buffer used in snippetOffsetsOfColumn()
 */
@@ -3267,6 +3697,9 @@
   int iColumn,
   const char *zDoc,
   int nDoc
+#ifdef STORE_CATEGORY
+  , int position
+#endif  
 ){
 
   fulltext_vtab *pVtab;		       /* The full text index */
@@ -3302,9 +3735,10 @@
 
   while(1){
 //    rc = pTModule->xNext(pTCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos);
-
-    zToken = tracker_parser_next (pVtab->parser,
-				  &iPos,
+   
+    
+    zToken = tracker_parser_next (pVtab->parser, 
+    				  &iPos,
 				  &iBegin,
 				  &iEnd,
 				  &new_paragraph,
@@ -3316,7 +3750,8 @@
     if (stop_word) {
       continue;
     }
-
+    
+ 
     iRotorBegin[iRotor&FTS3_ROTOR_MASK] = iBegin;
     iRotorLen[iRotor&FTS3_ROTOR_MASK] = iEnd-iBegin;
     match = 0;
@@ -3443,19 +3878,89 @@
   }
 }
 
+
+
 /*
 ** Compute all offsets for the current row of the query.
 ** If the offsets have already been computed, this routine is a no-op.
 */
 static void snippetAllOffsets(fulltext_cursor *p){
-  int nColumn;
   int iColumn, i;
-  int iFirst, iLast;
   fulltext_vtab *pFts;
 
   if( p->snippet.nMatch ) return;
   if( p->q.nTerms==0 ) return;
   pFts = p->q.pFts;
+  
+#ifdef STORE_CATEGORY  
+  PLReader plReader;
+  int col_array[255];
+  gpointer pos_array[255];
+  
+  for (i=0; i<255; i++) {
+    col_array[i] = 0;
+    pos_array[i] = NULL;
+  }
+  
+  
+  int iPos = 0;
+  
+  printf ("calc snippet\n");
+
+  
+  plrInit(&plReader, &p->reader);
+  
+  if (plrAtEnd(&plReader)) return;
+  
+  iColumn = -1;
+    
+  for ( ; !plrAtEnd(&plReader); plrStep(&plReader) ){
+        
+    if (plrColumn (&plReader) != iColumn) {
+    
+      iColumn = plrColumn(&plReader);
+      col_array[iColumn] += 1;
+    }
+        
+    iPos = plrPosition(&plReader);
+    GSList *l = pos_array[iColumn];
+    l = g_slist_prepend (l, GINT_TO_POINTER (iPos)); 
+  }
+
+  plrDestroy(&plReader);
+  
+  if (!dlrAtEnd (&p->reader)) {
+ //   dlrStep (&p->reader);
+  }
+
+  /* get the column with most hits */  
+  int hit_column = 0;
+  int hit_column_count = col_array[0];  
+  
+  /*bias field id 0 more as its the main content field */
+ // if (hit_column_count > 0) hit_column_count++;
+  
+  for (i=1; i<255; i++) {
+    if (col_array [i] > hit_column_count) {
+      hit_column = i;
+      hit_column_count =col_array[i];
+    }
+    
+    g_slist_free (pos_array[i]);
+  }
+
+
+  const char *zDoc;
+  int nDoc;
+  zDoc = (const char*)sqlite3_column_text(p->pStmt, hit_column+1);
+  nDoc = sqlite3_column_bytes(p->pStmt, hit_column+1);
+  snippetOffsetsOfColumn(&p->q, &p->snippet, hit_column, zDoc, nDoc, iPos);
+  
+  
+#else  
+  int iFirst, iLast;
+  int nColumn;
+      
   nColumn = pFts->nColumn;
   iColumn = (p->iCursorType - QUERY_FULLTEXT);
   if( iColumn<0 || iColumn>=nColumn ){
@@ -3465,6 +3970,8 @@
     iFirst = iColumn;
     iLast = iColumn;
   }
+  
+    
   for(i=iFirst; i<=iLast; i++){
     const char *zDoc;
     int nDoc;
@@ -3472,7 +3979,8 @@
     nDoc = sqlite3_column_bytes(p->pStmt, i+1);
     snippetOffsetsOfColumn(&p->q, &p->snippet, i, zDoc, nDoc);
   }
-
+#endif  
+  
   trimSnippetOffsetsForNear(&p->q, &p->snippet);
 }
 
@@ -3718,7 +4226,11 @@
       return SQLITE_OK;
     }
     rc = sqlite3_bind_int64(c->pStmt, 1, dlrDocid(&c->reader));
+    c->currentDocid = dlrDocid(&c->reader);
+    c->currentCatid = dlrCatid(&c->reader);
+
     dlrStep(&c->reader);
+    
     if( rc!=SQLITE_OK ) return rc;
     /* TODO(shess) Handle SQLITE_SCHEMA AND SQLITE_BUSY. */
     rc = sqlite3_step(c->pStmt);
@@ -3763,8 +4275,15 @@
   assert( v->nPendingData<0 );
 
   dataBufferInit(&left, 0);
+  
+  #ifdef STORE_CATEGORY
   rc = termSelect(v, iColumn, pQTerm->pTerm, pQTerm->nTerm, pQTerm->isPrefix,
-		  (0<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS), &left);
+                  DL_POSITIONS, &left);
+  #else
+  rc = termSelect(v, iColumn, pQTerm->pTerm, pQTerm->nTerm, pQTerm->isPrefix,
+                  (0<pQTerm->nPhrase ? DL_POSITIONS : DL_DOCIDS), &left);
+  #endif                
+                  
   if( rc ) return rc;
   for(i=1; i<=pQTerm->nPhrase && left.nData>0; i++){
     /* If this token is connected to the next by a NEAR operator, and
@@ -3786,10 +4305,20 @@
       return rc;
     }
     dataBufferInit(&new, 0);
+    
+    #ifdef STORE_CATEGORY
+    docListPhraseMerge(left.pData, left.nData, right.pData, right.nData,
+                       pQTerm[i-1].nNear, pQTerm[i-1].iPhrase + nPhraseRight,
+                       DL_POSITIONS,
+                       &new);
+    
+    #else
     docListPhraseMerge(left.pData, left.nData, right.pData, right.nData,
-		       pQTerm[i-1].nNear, pQTerm[i-1].iPhrase + nPhraseRight,
-		       ((i<pQTerm->nPhrase) ? DL_POSITIONS : DL_DOCIDS),
-		       &new);
+                       pQTerm[i-1].nNear, pQTerm[i-1].iPhrase + nPhraseRight,
+                       ((i<pQTerm->nPhrase) ? DL_POSITIONS : DL_DOCIDS),
+                       &new);
+
+    #endif                   
     dataBufferDestroy(&left);
     dataBufferDestroy(&right);
     left = new;
@@ -4210,7 +4739,14 @@
       rc = fulltextQuery(v, idxNum-QUERY_FULLTEXT, zQuery, -1, &c->result, &c->q);
       if( rc!=SQLITE_OK ) return rc;
       if( c->result.nData!=0 ){
-	dlrInit(&c->reader, DL_DOCIDS, c->result.pData, c->result.nData);
+
+#ifdef STORE_CATEGORY
+        dlrInit(&c->reader, DL_POSITIONS, c->result.pData, c->result.nData);
+#else      
+        dlrInit(&c->reader, DL_DOCIDS, c->result.pData, c->result.nData);
+
+#endif   
+        
       }
       break;
     }
@@ -4239,6 +4775,13 @@
   fulltext_cursor *c = (fulltext_cursor *) pCursor;
   fulltext_vtab *v = cursor_vtab(c);
 
+#ifdef STORE_CATEGORY 
+  if (idxCol == 0) {
+    sqlite3_result_int (pContext, c->currentCatid);
+    return SQLITE_OK;
+  }
+#endif
+    	
   if( idxCol<v->nColumn ){
     sqlite3_value *pVal = sqlite3_column_value(c->pStmt, idxCol+1);
     sqlite3_result_value(pContext, pVal);
@@ -4271,7 +4814,11 @@
 ** we also store positions and offsets in the hash table using that
 ** column number.
 */
-static int buildTerms(fulltext_vtab *v, sqlite_int64 iDocid,
+static int buildTerms(fulltext_vtab *v, sqlite_int64 iDocid, 
+
+#ifdef STORE_CATEGORY      
+int Catid,
+#endif
 		      const char *zText, int iColumn){
   const char *pToken;
   int nTokenBytes;
@@ -4317,14 +4864,28 @@
     p = fts3HashFind(&v->pendingTerms, pToken, nTokenBytes);
     if( p==NULL ){
       nData = 0;
+      
+#ifdef STORE_CATEGORY       
+      p = dlcNew(iDocid, DL_DEFAULT, Catid);
+#else
       p = dlcNew(iDocid, DL_DEFAULT);
+#endif
+
       fts3HashInsert(&v->pendingTerms, pToken, nTokenBytes, p);
 
       /* Overhead for our hash table entry, the key, and the value. */
       v->nPendingData += sizeof(struct fts3HashElem)+sizeof(*p)+nTokenBytes;
     }else{
       nData = p->b.nData;
-      if( p->dlw.iPrevDocid!=iDocid ) dlcNext(p, iDocid);
+      if( p->dlw.iPrevDocid!=iDocid ) {
+#ifdef STORE_CATEGORY       
+        dlcNext(p, iDocid, Catid);
+#else
+        dlcNext(p, iDocid);
+#endif
+
+      
+      }
     }
     if( iColumn>=0 ){
       dlcAddPos(p, iColumn, iPosition, iStartOffset, iEndOffset);
@@ -4348,11 +4909,25 @@
 static int insertTerms(fulltext_vtab *v, sqlite_int64 iDocid,
 		       sqlite3_value **pValues){
   int i;
+  
+#ifdef STORE_CATEGORY   
+  
+  for(i = 1; i < v->nColumn ; ++i){
+    char *zText = (char*)sqlite3_value_text(pValues[i]);
+    int rc = buildTerms(v, iDocid, sqlite3_value_int (pValues[0]), zText, i);
+    if( rc!=SQLITE_OK ) return rc;
+  }
+  
+#else
+
   for(i = 0; i < v->nColumn ; ++i){
     char *zText = (char*)sqlite3_value_text(pValues[i]);
     int rc = buildTerms(v, iDocid, zText, i);
     if( rc!=SQLITE_OK ) return rc;
   }
+  
+#endif  
+
   return SQLITE_OK;
 }
 
@@ -4369,10 +4944,19 @@
   rc = content_select(v, iDocid, &pValues);
   if( rc!=SQLITE_OK ) return rc;
 
+#ifdef STORE_CATEGORY   
+  
+  for(i = 1 ; i < v->nColumn; ++i) {
+    rc = buildTerms(v, iDocid, atoi(pValues[0]), pValues[i], -1);
+    if( rc!=SQLITE_OK ) break;
+  }
+
+#else
   for(i = 0 ; i < v->nColumn; ++i) {
     rc = buildTerms(v, iDocid, pValues[i], -1);
     if( rc!=SQLITE_OK ) break;
   }
+#endif
 
   freeStringArray(v->nColumn, pValues);
   return SQLITE_OK;
@@ -7176,13 +7760,6 @@
   /* xRename */       fulltextRename,
 };
 
-static void hashDestroy(void *p){
-  fts3Hash *pHash = (fts3Hash *)p;
-  sqlite3Fts3HashClear(pHash);
-  sqlite3_free(pHash);
-}
-
-
 int sqlite3Fts3InitHashTable(sqlite3 *, fts3Hash *, const char *);
 
 /*

Modified: trunk/tests/tracker-fts/tracker-fts-test.c
==============================================================================
--- trunk/tests/tracker-fts/tracker-fts-test.c	(original)
+++ trunk/tests/tracker-fts/tracker-fts-test.c	Wed Oct 15 03:56:12 2008
@@ -27,94 +27,88 @@
 #include <glib.h>
 #include <glib-object.h>
 
-static gint
-callback (void	 *NotUsed,
-	  gint	  argc,
-	  gchar **argv,
-	  gchar **azColName)
+static gint 
+callback (void   *NotUsed, 
+          gint    argc, 
+          gchar **argv, 
+          gchar **azColName)
 {
 	gint i;
 
-	for (i = 0; i < argc; i++) {
-		printf("%s = %s\n", azColName[i], argv[i] ? argv[i] : "NULL");
-	}
-
-	printf("\n");
+  	for (i = 0; i < argc; i++) {
+    		printf("%s = %s\n", azColName[i], argv[i] ? argv[i] : "NULL");
+  	}
+  
+  	printf("\n");
 
-	return 0;
+  	return 0;
 }
 
 static void
-exec_sql (sqlite3     *db,
-	  const gchar *sql)
+exec_sql (sqlite3     *db, 
+          const gchar *sql)
 {
 	gchar *zErrMsg;
 	gint   rc;
 
-	rc = sqlite3_exec (db, sql , callback, 0, &zErrMsg);
-
-	if (rc != SQLITE_OK) {
-		g_printerr ("SQL error: %s\n", zErrMsg);
-		sqlite3_free (zErrMsg);
-	}
+        rc = sqlite3_exec (db, sql , callback, 0, &zErrMsg);
+	
+  	if (rc != SQLITE_OK) {
+    		g_printerr ("SQL error: %s\n", zErrMsg);
+    		sqlite3_free (zErrMsg);
+  	}
 }
 
-int
+int 
 main (int argc, char **argv)
 {
 	sqlite3  *db;
-	gint	  rc;
+	gint      rc;
 	gboolean  db_exists = FALSE;
-	gchar	 *st = NULL;
-	gchar	 *sql;
+	gchar    *st = NULL;
+        gchar    *sql;
 
 	g_type_init ();
-	g_thread_init (NULL);
-
-	/* FOR NOW! Return EXIT_SUCCESS (martyn)
-	 *
-	 * This has to work with no parameters. These tests are for
-	 * the unit tests, for any tests which are not for the Glib
-	 * unit test frame work, we should be adding those binaries to
-	 * the utils/ directory.
-	 */
-	return EXIT_SUCCESS;
-
+        g_thread_init (NULL);
+        
 	if (argc != 2) {
 		g_printerr ("Usage: %s MATCH_TERM\n", argv[0]);
 		g_printerr ("EG: %s stew\n", argv[0]);
 		return EXIT_FAILURE;
 	}
-
+	
 	db_exists = g_file_test ("/tmp/test.db", G_FILE_TEST_EXISTS);
-
+	
 	rc = sqlite3_open ("/tmp/test.db", &db);
 	if (rc) {
 		g_printerr ("Can't open database: %s\n", sqlite3_errmsg(db));
 		sqlite3_close(db);
 		return EXIT_FAILURE;
 	}
-
+	
 	sqlite3_enable_load_extension (db, 1);
 	sqlite3_load_extension (db, "tracker-fts.so", NULL, &st);
-
+	
 	if (st) {
 		fprintf(stderr, "SQL error: %s\n", st);
 		sqlite3_free(st);
 	}
-
+	
 	if (!db_exists) {
-		exec_sql (db, "create virtual table recipe using trackerfts (name, ingredients)");
-		exec_sql (db, "insert into recipe (name, ingredients) values ('broccoli stew', 'broccoli,peppers,cheese and tomatoes')");
-		exec_sql (db, "insert into recipe (name, ingredients) values ('pumpkin stew', 'pumpkin,onions,garlic and celery')");
-		exec_sql (db, "insert into recipe (name, ingredients) values ('broccoli pie', 'broccoli,cheese,onions and flour.')");
-		exec_sql (db, "insert into recipe (name, ingredients) values ('pumpkin pie', 'pumpkin,sugar,flour and butter.')");
-	}
-
-	sql = g_strdup_printf ("select rowid, name, ingredients, snippet(recipe), offsets(recipe) from recipe where recipe match '%s'", argv[1]);
+		exec_sql (db, "create virtual table recipe using trackerfts (cat, name, ingredients)");
+		exec_sql (db, "insert into recipe (cat, name, ingredients) values (3, 'broccoli stew', 'broccoli,peppers,cheese and tomatoes')");
+		exec_sql (db, "insert into recipe (cat, name, ingredients) values (4, 'pumpkin stew', 'pumpkin,onions,garlic and celery')");
+		exec_sql (db, "insert into recipe (cat, name, ingredients) values (2, 'broccoli pie', 'broccoli,cheese,onions and flour.')");
+		exec_sql (db, "insert into recipe (cat, name, ingredients) values (7, 'pumpkin pie', 'pumpkin,sugar,flour and butter.')");
+	}
+//	sql = g_strdup_printf ("select cat, count (*) from recipe where recipe match '%s' group by Cat", argv[1]);
+//	exec_sql (db, sql);
+//	g_free (sql);
+	sql = g_strdup_printf ("select rowid, cat, name, ingredients, snippet(recipe) from recipe where recipe match '%s' and Cat<8", argv[1]);
 	exec_sql (db, sql);
 	g_free (sql);
-
+	
+		
 	sqlite3_close(db);
 
 	return EXIT_SUCCESS;



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]