tracker r1777 - in branches/xesam-support: . src/libtracker-db src/trackerd



Author: pvanhoof
Date: Thu Jun 26 12:35:12 2008
New Revision: 1777
URL: http://svn.gnome.org/viewvc/tracker?rev=1777&view=rev

Log:
Sync with indexer split

Modified:
   branches/xesam-support/ChangeLog
   branches/xesam-support/src/libtracker-db/tracker-db-manager.c
   branches/xesam-support/src/trackerd/tracker-daemon.c
   branches/xesam-support/src/trackerd/tracker-db.c
   branches/xesam-support/src/trackerd/tracker-indexer.c
   branches/xesam-support/src/trackerd/tracker-indexer.h
   branches/xesam-support/src/trackerd/tracker-main.c
   branches/xesam-support/src/trackerd/tracker-main.h
   branches/xesam-support/src/trackerd/tracker-monitor.c
   branches/xesam-support/src/trackerd/tracker-query-tree.c
   branches/xesam-support/src/trackerd/tracker-query-tree.h
   branches/xesam-support/src/trackerd/tracker-search.c
   branches/xesam-support/src/trackerd/tracker-search.h
   branches/xesam-support/src/trackerd/tracker-xesam-manager.c

Modified: branches/xesam-support/src/libtracker-db/tracker-db-manager.c
==============================================================================
--- branches/xesam-support/src/libtracker-db/tracker-db-manager.c	(original)
+++ branches/xesam-support/src/libtracker-db/tracker-db-manager.c	Thu Jun 26 12:35:12 2008
@@ -133,8 +133,6 @@
           FALSE },
 };
 
-static gboolean db_manager_had_init = FALSE;
-
 static gboolean            db_exec_no_reply    (TrackerDBInterface *iface,
 						const gchar        *query,
 						...);
@@ -2555,8 +2553,10 @@
 tracker_db_manager_get_db_interface_by_service (const gchar *service, 
 						gboolean     content)
 {
-	TrackerDBType type;
-	TrackerDB     db;
+	TrackerDBInterface        *iface;
+	TrackerDBType              type;
+	static TrackerDBInterface *file_iface = NULL;
+	static TrackerDBInterface *email_iface = NULL;
 
 	g_return_val_if_fail (initialized != FALSE, NULL);
 	g_return_val_if_fail (service != NULL, NULL);
@@ -2565,23 +2565,29 @@
 
 	switch (type) {
 	case TRACKER_DB_TYPE_EMAIL:
-		if (G_UNLIKELY (content)) {
-			db = TRACKER_DB_EMAIL_CONTENTS;
-		} else {
-			db = TRACKER_DB_EMAIL_METADATA;
+		if (!email_iface) {
+			email_iface = tracker_db_manager_get_db_interfaces (4,
+									    TRACKER_DB_COMMON,
+									    TRACKER_DB_EMAIL_CONTENTS,
+									    TRACKER_DB_EMAIL_METADATA,
+									    TRACKER_DB_CACHE);
 		}
+		iface = email_iface;
 		break;
 
 	default:
-		if (G_UNLIKELY (content)) {
-			db = TRACKER_DB_FILE_CONTENTS;
-		} else {
-			db = TRACKER_DB_FILE_METADATA;
+		if (!file_iface) {
+			file_iface = tracker_db_manager_get_db_interfaces (4,
+									   TRACKER_DB_COMMON,
+									   TRACKER_DB_FILE_CONTENTS,
+									   TRACKER_DB_FILE_METADATA,
+									   TRACKER_DB_CACHE);
 		}
+		iface = file_iface;
 		break;
 	}
 
-	return tracker_db_manager_get_db_interface (db);
+	return iface;
 }
 
 TrackerDBInterface *

Modified: branches/xesam-support/src/trackerd/tracker-daemon.c
==============================================================================
--- branches/xesam-support/src/trackerd/tracker-daemon.c	(original)
+++ branches/xesam-support/src/trackerd/tracker-daemon.c	Thu Jun 26 12:35:12 2008
@@ -22,6 +22,7 @@
 #include "config.h"
 
 #include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
 
 #include <libtracker-common/tracker-log.h>

Modified: branches/xesam-support/src/trackerd/tracker-db.c
==============================================================================
--- branches/xesam-support/src/trackerd/tracker-db.c	(original)
+++ branches/xesam-support/src/trackerd/tracker-db.c	Thu Jun 26 12:35:12 2008
@@ -24,6 +24,7 @@
 #include "config.h"
 
 #include <string.h>
+#include <stdlib.h>
 #include <fcntl.h>
 #include <zlib.h>
 
@@ -908,14 +909,16 @@
 
 	/* Delete duds */
 	if (duds) {
-		GSList  *words, *w;
-		Indexer *indexer;
+		TrackerIndexer *indexer;
+		GSList         *words, *w;
 
 		words = tracker_query_tree_get_words (tree);
 		indexer = tracker_query_tree_get_indexer (tree);
 
 		for (w = words; w; w = w->next) {
-			tracker_remove_dud_hits (indexer, (const gchar *) w->data, duds);
+			tracker_indexer_remove_dud_hits (indexer, 
+							 (const gchar *) w->data, 
+							 duds);
 		}
 
 		g_slist_free (words);

Modified: branches/xesam-support/src/trackerd/tracker-indexer.c
==============================================================================
--- branches/xesam-support/src/trackerd/tracker-indexer.c	(original)
+++ branches/xesam-support/src/trackerd/tracker-indexer.c	Thu Jun 26 12:35:12 2008
@@ -1,6 +1,8 @@
-/* Tracker - indexer and metadata database engine
+/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/*
  * Copyright (C) 2006, Mr Jamie McCracken (jamiemcc gnome org)
- *
+ * Copyright (C) 2008, Nokia
+
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
@@ -58,461 +60,540 @@
 #include "tracker-status.h"
 
 /* Size of free block pool of inverted index */
-#define INDEXFBP            32     
-#define SCORE_MULTIPLIER    100000
 #define MAX_HIT_BUFFER      480000
-#define MAX_HITS_FOR_WORD   30000
 #define MAX_INDEX_FILE_SIZE 2000000000
 
-#define CREATE_INDEX                                                      \
-        "CREATE TABLE HitIndex (Word Text not null "                      \
-        "unique, HitCount Integer, HitArraySize Integer, HitArray Blob);"
-
-extern Tracker *tracker;
-
-static gint merge_count = 0;
-static gint merge_processed = 0;
-
-struct Indexer_ {
-	DEPOT  		*word_index;	/* file hashtable handle for the word -> {serviceID, ServiceTypeID, Score}  */
-	GMutex 		*word_mutex;
-	char   		*name;
-	gboolean	main_index;
-	gboolean	needs_merge; /* should new stuff be added directly or merged later on from a new index */
+#define TRACKER_INDEXER_GET_PRIVATE(o) (G_TYPE_INSTANCE_GET_PRIVATE ((o), TRACKER_TYPE_INDEXER, TrackerIndexerPrivate))
+
+typedef struct TrackerIndexerPrivate TrackerIndexerPrivate;
+
+struct TrackerIndexerPrivate {
+        /* File hashtable handle for the word -> {serviceID,
+         * ServiceTypeID, Score}.
+         */
+        TrackerConfig *config;
+
+	DEPOT         *word_index;	
+	GMutex        *word_mutex;
+
+	gchar         *name;
 };
 
-static inline gint16
-get_score (WordDetails *details)
+static void tracker_indexer_class_init   (TrackerIndexerClass *class);
+static void tracker_indexer_init         (TrackerIndexer      *tree);
+static void tracker_indexer_finalize     (GObject             *object);
+static void tracker_indexer_set_property (GObject             *object,
+                                          guint                prop_id,
+                                          const GValue        *value,
+                                          GParamSpec          *pspec);
+static void tracker_indexer_get_property (GObject             *object,
+                                          guint                prop_id,
+                                          GValue              *value,
+                                          GParamSpec          *pspec);
+
+enum {
+	PROP_0,
+	PROP_NAME,
+        PROP_CONFIG
+};
+
+G_DEFINE_TYPE (TrackerIndexer, tracker_indexer, G_TYPE_OBJECT)
+
+static void
+tracker_indexer_class_init (TrackerIndexerClass *klass)
 {
-	unsigned char a[2];
+	GObjectClass *object_class = G_OBJECT_CLASS (klass);
 
-	a[0] = (details->amalgamated >> 16) & 0xFF;
-	a[1] = (details->amalgamated >> 8) & 0xFF;
+	object_class->finalize = tracker_indexer_finalize;
+	object_class->set_property = tracker_indexer_set_property;
+	object_class->get_property = tracker_indexer_get_property;
+
+	g_object_class_install_property (object_class,
+					 PROP_NAME,
+					 g_param_spec_string ("name",
+							      "Name",
+							      "Name",
+							      NULL,
+							      G_PARAM_READABLE));
+	g_object_class_install_property (object_class,
+					 PROP_CONFIG,
+					 g_param_spec_object ("config",
+							      "Config",
+							      "Config",
+							      tracker_config_get_type (),
+							      G_PARAM_READWRITE));
 
-	return (gint16) (a[0] << 8) | (a[1]);	
+	g_type_class_add_private (object_class, sizeof (TrackerIndexerPrivate));
 }
 
-
-static inline guint8
-get_service_type (WordDetails *details)
+static void
+tracker_indexer_init (TrackerIndexer *indexer)
 {
-	return (details->amalgamated >> 24) & 0xFF;
-}
+	TrackerIndexerPrivate *priv;
 
+	priv = TRACKER_INDEXER_GET_PRIVATE (indexer);
+        
+	priv->word_mutex = g_mutex_new ();
+}
 
-guint32
-tracker_indexer_calc_amalgamated (gint service, gint score)
+static void
+tracker_indexer_finalize (GObject *object)
 {
-	unsigned char a[4];
-	gint16 score16;
-	guint8 service_type;
+	TrackerIndexerPrivate *priv;
 
-	if (score > 30000) {
-		score16 = 30000;
-	} else {
-		score16 = (gint16) score;
-	}
+	priv = TRACKER_INDEXER_GET_PRIVATE (object);
 
-	service_type = (guint8) service;
+        g_free (priv->name);
 
-	/* amalgamate and combine score and service_type into a single 32-bit int for compact storage */	
-	a[0] = service_type;
-	a[1] = (score16 >> 8 ) & 0xFF ;
-	a[2] = score16 & 0xFF ;
-	a[3] = 0;
+        g_mutex_lock (priv->word_mutex);
 
-	return (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3];	
-}
+	if (!dpclose (priv->word_index)) {
+		g_message ("Index closure has failed, %s", dperrmsg (dpecode));
+	}
 
+        g_mutex_unlock (priv->word_mutex);
 
-static int
-get_preferred_bucket_count (Indexer *indexer)
-{
-	gint result;
-        gint bucket_ratio;
+	g_mutex_free (priv->word_mutex);
 
-        bucket_ratio = tracker_config_get_bucket_ratio (tracker->config);
-        result = dprnum (indexer->word_index);
+        if (priv->config) {
+                g_object_unref (priv->config);
+        }
 
-	if (bucket_ratio < 1) {
-		result /= 2;
-	} else if (bucket_ratio > 3) {
-		result *= 4;
-	} else {
-		result *= bucket_ratio;
-	}
+	G_OBJECT_CLASS (tracker_indexer_parent_class)->finalize (object);
+}
 
-	g_message ("Preferred bucket count is %d", result);
+static void
+tracker_indexer_set_property (GObject      *object,
+                              guint         prop_id,
+                              const GValue *value,
+                              GParamSpec   *pspec)
+{
+	switch (prop_id) {
+	case PROP_CONFIG:
+		tracker_indexer_set_config (TRACKER_INDEXER (object),
+                                            g_value_get_object (value));
+		break;
+	default:
+		G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+	}
+}
 
-	return  result;
+static void
+tracker_indexer_get_property (GObject      *object,
+                              guint         prop_id,
+                              GValue       *value,
+                              GParamSpec   *pspec)
+{
+	TrackerIndexerPrivate *priv;
+
+	priv = TRACKER_INDEXER_GET_PRIVATE (object);
+
+	switch (prop_id) {
+	case PROP_NAME:
+		g_value_set_string (value, priv->name);
+		break;
+	case PROP_CONFIG:
+		g_value_set_object (value, priv->config);
+		break;
+	default:
+		G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+	}
 }
 
-gboolean
-tracker_indexer_repair (const char *name)
+static inline gint16
+get_score (TrackerIndexerWordDetails *details)
 {
-	gchar    *index_name;
-	gboolean  result = TRUE;
+	unsigned char a[2];
 
-        index_name = g_build_filename (tracker_get_data_dir (), 
-                                       name, 
-                                       NULL);
-	result = dprepair (index_name);
-	g_free (index_name);
+	a[0] = (details->amalgamated >> 16) & 0xFF;
+	a[1] = (details->amalgamated >> 8) & 0xFF;
 
-	return result;
+	return (gint16) (a[0] << 8) | (a[1]);	
 }
 
+static inline guint8
+get_service_type (TrackerIndexerWordDetails *details)
+{
+	return (details->amalgamated >> 24) & 0xFF;
+}
 
 static inline DEPOT *
-open_index (const gchar *name)
+open_index (const gchar *name,
+            gint         min_bucket_count,
+            gint         max_bucket_count)
 {
 	DEPOT *word_index = NULL;
 
-	if (!name) return NULL;
+        if (!name) {
+                return NULL;
+        }
 
-	g_message ("Opening index %s", name);
+	g_message ("Opening index:'%s'", name);
 
 	if (strstr (name, "tmp")) {
-		word_index = dpopen (name, DP_OWRITER | DP_OCREAT | DP_ONOLCK, 
-                                     tracker_config_get_min_bucket_count (tracker->config));
-	} else {
-		word_index = dpopen (name, DP_OWRITER | DP_OCREAT | DP_ONOLCK, 
-                                     tracker_config_get_max_bucket_count (tracker->config));
+		word_index = dpopen (name, 
+                                     DP_OWRITER | DP_OCREAT | DP_ONOLCK, 
+                                     min_bucket_count);
+	} else {
+		word_index = dpopen (name, 
+                                     DP_OWRITER | DP_OCREAT | DP_ONOLCK, 
+                                     max_bucket_count);
 	}
 
 	if (!word_index) {
-		g_critical ("%s index was not closed properly and caused error %s- attempting repair", name, dperrmsg (dpecode));
+		g_critical ("Index was not closed properly, index:'%s', %s", 
+                            name, 
+                            dperrmsg (dpecode));
+		g_message ("Attempting to repair...");
+
 		if (dprepair (name)) {
-			word_index = dpopen (name, DP_OWRITER | DP_OCREAT | DP_ONOLCK, 
-                                             tracker_config_get_min_bucket_count (tracker->config));
+			word_index = dpopen (name, 
+                                             DP_OWRITER | DP_OCREAT | DP_ONOLCK, 
+                                             min_bucket_count);
 		} else {
-			g_assert ("FATAL: index file is dead (suggest delete index file and restart trackerd)");
+			g_critical ("Index file is dead, it is suggested you remove "
+                                    "the indexe file:'%s' and restart trackerd",
+                                    name);
+                        return NULL;
 		}
 	}
 
 	return word_index;
-
 }
 
-
-static inline char *
-get_index_file (const char *name)
+static inline gchar *
+get_index_file (const gchar *name)
 {
 	return g_build_filename (tracker_get_data_dir (), name, NULL);
 }
 
-Indexer *
-tracker_indexer_open (const gchar *name, gboolean main_index)
+static inline gboolean 
+has_word (TrackerIndexer *indexer, 
+          const gchar    *word)
 {
-	char *word_dir;
-	DEPOT *word_index;
-	Indexer *result;
-
-	if (!name) return NULL;
-
-	word_dir = get_index_file (name);
+        TrackerIndexerPrivate *priv;
+	gchar                  buffer[32];
+	gint                   count;
 
-	word_index = open_index (word_dir);
-	
-	g_free (word_dir);
-
-	result = g_new0 (Indexer, 1);
-
-	result->main_index = main_index;
-	
-	result->needs_merge = FALSE;
-
-	result->name = g_strdup (name);
-
-	result->word_index = word_index;
-
-	result->word_mutex = g_mutex_new ();
-
-	dpsetalign (word_index , 8);
+        priv = TRACKER_INDEXER_GET_PRIVATE (indexer);
 
-	/* re optimize database if bucket count < rec count */
+	g_mutex_lock (priv->word_mutex);	
+        count = dpgetwb (priv->word_index, word, -1, 0, 32, buffer);
+	g_mutex_unlock (priv->word_mutex);	
 
-	int bucket_count, rec_count;
+	return count > 7;
+}
 
-	bucket_count = dpbnum (result->word_index);
-	rec_count = dprnum (result->word_index);
+static inline gint
+count_hit_size_for_word (TrackerIndexer *indexer, 
+                         const gchar    *word)
+{
+        TrackerIndexerPrivate *priv;
+	gint                   tsiz;
 
-	g_message ("Bucket count (max is %d) is %d and Record Count is %d", 
-                     tracker_config_get_max_bucket_count (tracker->config),
-                     bucket_count, rec_count);
+        priv = TRACKER_INDEXER_GET_PRIVATE (indexer);
+        
+	g_mutex_lock (priv->word_mutex);	
+	tsiz = dpvsiz (priv->word_index, word, -1);
+	g_mutex_unlock (priv->word_mutex);	
 
-	return result;
+	return tsiz;
 }
 
+/* int levenshtein ()
+ * Original license: GNU Lesser Public License
+ * from the Dixit project, (http://dixit.sourceforge.net/)
+ * Author: Octavian Procopiuc <oprocopiuc gmail com>
+ * Created: July 25, 2004
+ * Copied into tracker, by Edward Duffy
+ */
+static gint
+levenshtein (const gchar *source, 
+	     gchar       *target, 
+	     gint         maxdist)
+{
+	gchar n, m;
+	gint  l;
+	gchar mincolval;
+	gchar matrix[51][51];
+	gchar j;
+	gchar i;
+	gchar cell;
 
-void
-tracker_indexer_close (Indexer *indexer)
-{	
-	g_return_if_fail (indexer);
+	l = strlen (source);
+	if (l > 50)
+		return -1;
+	n = l;
 
-	g_mutex_lock (indexer->word_mutex);
+	l = strlen (target);
+	if (l > 50)
+		return -1;
+	m = l;
 
-	if (!dpclose (indexer->word_index)) {
-		g_message ("Index closure has failed due to %s", dperrmsg (dpecode));
-	}
+	if (maxdist == 0)
+		maxdist = MAX(m, n);
+	if (n == 0)
+		return MIN(m, maxdist);
+	if (m == 0)
+		return MIN(n, maxdist);
 
-	g_mutex_unlock (indexer->word_mutex);
-	g_mutex_free (indexer->word_mutex);
-	g_free (indexer->name);
-	g_free (indexer);
-}
+	/* Store the min. value on each column, so that, if it
+         * reaches. maxdist, we break early.
+         */
+	for (j = 0; j <= m; j++)
+		matrix[0][(gint)j] = j;
+
+	for (i = 1; i <= n; i++) {
+                gchar s_i;
 
+		mincolval = MAX(m, i);
+		matrix[(gint)i][0] = i;
 
-void
-tracker_indexer_free (Indexer *indexer, gboolean remove_file)
-{
-	
+		s_i = source[i-1];
 
-	if (remove_file) {
-                gchar *dbname;
+		for (j = 1; j <= m; j++) {
+			gchar t_j = target[j-1];
+			gchar cost = (s_i == t_j ? 0 : 1);
+			gchar above = matrix[i-1][(gint)j];
+			gchar left = matrix[(gint)i][j-1];
+			gchar diag = matrix[i-1][j-1];
 
-                dbname = g_build_filename (tracker_get_data_dir (), 
-                                           indexer->name, 
-                                           NULL);
+			cell = MIN(above + 1, MIN(left + 1, diag + cost));
 
-		g_return_if_fail (indexer);
+			/* Cover transposition, in addition to deletion,
+                         * insertion and substitution. This step is taken from:
+                         * Berghel, Hal ; Roach, David : "An Extension of Ukkonen's 
+                         * Enhanced Dynamic Programming ASM Algorithm"
+                         * (http://www.acm.org/~hlb/publications/asm/asm.html)
+                         */
+			if (i > 2 && j > 2) {
+				gchar trans = matrix[i-2][j-2] + 1;
 
-		g_mutex_lock (indexer->word_mutex);
+				if (source[i-2] != t_j)
+					trans++;
+				if (s_i != target[j-2])
+					trans++;
+				if (cell > trans)
+					cell = trans;
+			}
 
-		dpremove (dbname);
+			mincolval = MIN(mincolval, cell);
+			matrix[(gint)i][(gint)j] = cell;
+		}
 
-		g_mutex_unlock (indexer->word_mutex);
+		if (mincolval >= maxdist)
+			break;
+	}
 
-		g_free (dbname);
+	if (i == n + 1) {
+		return (gint) matrix[(gint)n][(gint)m];
 	} else {
-		g_mutex_lock (indexer->word_mutex);
-		dpclose (indexer->word_index);
-		g_mutex_unlock (indexer->word_mutex);
-	}
+		return maxdist;
+        }
+}
 
-	g_mutex_free (indexer->word_mutex);
+static gint
+count_hits_for_word (TrackerIndexer *indexer, 
+                     const gchar    *str) {
+        
+        gint tsiz;
+        gint hits = 0;
 
-	g_free (indexer->name);
+        tsiz = count_hit_size_for_word (indexer, str);
 
-	g_free (indexer);
+        if (tsiz == -1 || 
+            tsiz % sizeof (TrackerIndexerWordDetails) != 0) {
+                return -1;
+        }
 
-	
-}
+        hits = tsiz / sizeof (TrackerIndexerWordDetails);
 
-const gchar *   
-tracker_indexer_get_name (Indexer *indexer) 
-{
-        g_return_val_if_fail (indexer != NULL, NULL);
-        
-        return dpname (indexer->word_index);
+        return hits;
 }
 
+TrackerIndexer *
+tracker_indexer_new (TrackerIndexerType  type,
+                     TrackerConfig      *config)
+{
+        TrackerIndexer        *indexer;
+        TrackerIndexerPrivate *priv;
+        const gchar           *name;
+        gchar                 *directory;
+	gint                   bucket_count;
+        gint                   rec_count;
+
+	g_return_val_if_fail (TRACKER_IS_CONFIG (config), NULL);
+
+	indexer = g_object_new (TRACKER_TYPE_INDEXER,
+                                "config", config,
+                                NULL);
+
+        switch (type) {
+        case TRACKER_INDEXER_TYPE_FILES:
+                name = TRACKER_INDEXER_FILE_INDEX_DB_FILENAME;
+                break;
+        case TRACKER_INDEXER_TYPE_EMAILS:
+                name = TRACKER_INDEXER_EMAIL_INDEX_DB_FILENAME;
+                break;
+        case TRACKER_INDEXER_TYPE_FILES_UPDATE:
+                name = TRACKER_INDEXER_FILE_UPDATE_INDEX_DB_FILENAME;
+                break;
+        }
 
-guint32
-tracker_indexer_size (Indexer *indexer)
-{
-	return dpfsiz (indexer->word_index);
-}
+        priv = TRACKER_INDEXER_GET_PRIVATE (indexer);
 
+	priv->name = g_strdup (name);
 
-void
-tracker_indexer_sync (Indexer *indexer)
-{
-	g_mutex_lock (indexer->word_mutex);
-	dpsync (indexer->word_index);
-	g_mutex_unlock (indexer->word_mutex);
+	directory = get_index_file (name);
+	priv->word_index = open_index (directory,
+                                       tracker_config_get_min_bucket_count (priv->config),
+                                       tracker_config_get_max_bucket_count (priv->config));
+        g_free (directory);
+
+	dpsetalign (priv->word_index, 8);
+
+	/* Re optimize database if bucket count < rec count */
+	bucket_count = dpbnum (priv->word_index);
+	rec_count = dprnum (priv->word_index);
+
+	g_message ("Bucket count (max is %d) is %d and record count is %d", 
+                   tracker_config_get_max_bucket_count (priv->config),
+                   bucket_count, 
+                   rec_count);
+       
+        return indexer;
 }
 
-
-gboolean
-tracker_indexer_optimize (Indexer *indexer)
+void
+tracker_indexer_set_config (TrackerIndexer *object,
+			    TrackerConfig  *config)
 {
- 
-	int num, b_count;
+	TrackerIndexerPrivate *priv;
 
-	/* set bucket count to bucket_ratio times no. of recs divided by no. of divisions */
-        num = CLAMP (get_preferred_bucket_count (indexer), 
-                     tracker_config_get_min_bucket_count (tracker->config),
-                     tracker_config_get_max_bucket_count (tracker->config));
-
-	b_count = num / tracker_config_get_divisions (tracker->config);
-	g_message ("No. of buckets per division is %d", b_count);
-
-	g_message ("Please wait while optimization of indexes takes place...");
-	g_message ("Index has file size %d and bucket count of %d of which %d are used...", 
-                   tracker_indexer_size (indexer), 
-                   dpbnum (indexer->word_index), 
-                   dpbusenum (indexer->word_index));
-	
-	g_mutex_lock (indexer->word_mutex);
+	g_return_if_fail (TRACKER_IS_INDEXER (object));
+	g_return_if_fail (TRACKER_IS_CONFIG (config));
+
+	priv = TRACKER_INDEXER_GET_PRIVATE (object);
 
-	if (!dpoptimize (indexer->word_index, b_count)) {
+	if (config) {
+		g_object_ref (config);
+	}
 
-		g_mutex_unlock (indexer->word_mutex);
-		g_message ("Optimization has failed due to %s", dperrmsg (dpecode));
-		return FALSE;
+	if (priv->config) {
+		g_object_unref (priv->config);
 	}
 
-	g_mutex_unlock (indexer->word_mutex);
+	priv->config = config;
 
-	g_message ("Index has been successfully optimized to file size %d and with bucket count of %d of which %d are used...", 
-                   tracker_indexer_size (indexer), 
-                   dpbnum (indexer->word_index), 
-                   dpbusenum (indexer->word_index));
-	
-	
-	return TRUE;
+	g_object_notify (G_OBJECT (object), "config");
 }
 
-static inline gboolean 
-has_word (Indexer *index, const char *word)
+guint32
+tracker_indexer_get_size (TrackerIndexer *indexer)
 {
-	char buffer [32];
+        TrackerIndexerPrivate *priv;
+        guint32                size;
+
+        g_return_val_if_fail (TRACKER_IS_INDEXER (indexer), 0);
 
-	int count = dpgetwb (index->word_index, word, -1, 0, 32, buffer);
+        priv = TRACKER_INDEXER_GET_PRIVATE (indexer);
 
-	return (count > 7);
+        g_mutex_lock (priv->word_mutex);
+        dpfsiz (priv->word_index);        
+        g_mutex_unlock (priv->word_mutex);
 
+	return size;
 }
 
+gboolean
+tracker_indexer_are_databases_too_big (void)
+{
+	gchar       *filename;
+        const gchar *filename_const;
+        const gchar *data_dir;
+        gboolean     too_big;
 
+        data_dir = tracker_get_data_dir ();
 
-void
-tracker_indexer_apply_changes (Indexer *dest, Indexer *src,  gboolean update)
-{
-        GObject *object;
-	char 	*str;
-	char 	buffer[MAX_HIT_BUFFER];
-	int 	bytes;
-	int 	sz = sizeof (WordDetails);
-	int 	i = 0, interval;
-	int 	buff_size = MAX_HITS_FOR_WORD * sz;
-
-	g_message ("applying incremental changes to indexes");
-
-	guint32 size = tracker_indexer_size (dest);
-
-	if (size < (10 * 1024 * 1024)) {
-		interval = 20000;
-	} else if (size < (20 * 1024 * 1024)) {
-		interval = 10000;
-	} else if (size < (30 * 1024 * 1024)) {
-		interval = 5000;
-	} else if (size < (100 * 1024 * 1024)) {
-		interval = 3000;
-	} else {
-		interval = 2000;
+	filename = g_build_filename (data_dir, TRACKER_INDEXER_FILE_INDEX_DB_FILENAME, NULL);
+	too_big = tracker_file_get_size (filename) > MAX_INDEX_FILE_SIZE;
+        g_free (filename);
+        
+        if (too_big) {
+		g_critical ("File index database is too big, discontinuing indexing");
+		return TRUE;	
 	}
 
-#ifdef HAVE_HAL 
-	/* halve the interval value as notebook hard drives are smaller */
-	if (tracker_hal_get_battery_exists (tracker->hal)) {
-                interval /= 2;
-        }
-#endif /* HAVE_HAL */
-
-	dpiterinit (src->word_index);
-	
-	tracker->in_merge = TRUE;
-	merge_count = 1;
-	merge_processed = 0;
-	
-        /* Signal progress */
-        object = tracker_dbus_get_object (TRACKER_TYPE_DAEMON);
-        g_signal_emit_by_name (object, 
-                               "index-progress", 
-                               "Merging",
-                               "",
-                               tracker->index_count,
-                               merge_processed,
-                               merge_count);
-	
-	while ((str = dpiternext (src->word_index, NULL))) {
-		
-		i++;
-
-		if (i > 1 && (i % 200 == 0)) {
-#if 0
-                        /* FIXME-indexer-split: This has been commented out as
-                         * a result of removing the tracker-cache.[ch] which
-                         * is no longer used. This code is in a transitional
-                         * period.  
-                         *
-                         * -Martyn
-                         */ 
-			if (!tracker_cache_process_events (NULL, FALSE)) {
-				return;	
-			}
-#endif
-		}
+	filename = g_build_filename (data_dir, TRACKER_INDEXER_EMAIL_INDEX_DB_FILENAME, NULL);
+	too_big = tracker_file_get_size (filename) > MAX_INDEX_FILE_SIZE;
+	g_free (filename);
+        
+        if (too_big) {
+		g_critical ("Email index database is too big, discontinuing indexing");
+		return TRUE;	
+	}
 
-		if (i > 1 && (i % interval == 0)) {
-			if (!tracker_config_get_fast_merges (tracker->config)) {
-                                dpsync (dest->word_index);
-                        }
-		}
-			
-		bytes = dpgetwb (src->word_index, str, -1, 0, buff_size, buffer);
+        filename_const = tracker_db_manager_get_file (TRACKER_DB_FILE_METADATA);
+	too_big = tracker_file_get_size (filename_const) > MAX_INDEX_FILE_SIZE;
+        
+        if (too_big) {
+                g_critical ("File metadata database is too big, discontinuing indexing");
+		return TRUE;	
+	}
 
-		if (bytes < 1) continue;
+        filename_const = tracker_db_manager_get_file (TRACKER_DB_EMAIL_METADATA);
+	too_big = tracker_file_get_size (filename_const) > MAX_INDEX_FILE_SIZE;
+        
+        if (too_big) {
+		g_critical ("Email metadata database is too big, discontinuing indexing");
+		return TRUE;	
+	}
 
-		if (bytes % sz != 0) {
-			g_critical ("possible corruption found during application of changes to index with word %s (ignoring update for this word)", str);
-			continue;
-		}
+	return FALSE;
+}
 
-		if (update) {
-			tracker_indexer_update_word_chunk (dest, str, (WordDetails *) buffer, bytes / sz);
-		} else {
-			tracker_indexer_append_word_chunk (dest, str, (WordDetails *) buffer, bytes / sz);
-		}
-		
-		dpout (src->word_index, str, -1);
+guint32
+tracker_indexer_calc_amalgamated (gint service, 
+                                  gint score)
+{
+	unsigned char a[4];
+	gint16        score16;
+	guint8        service_type;
 
-		g_free (str);
+	if (score > 30000) {
+		score16 = 30000;
+	} else {
+		score16 = (gint16) score;
 	}
-	
-	dpsync (dest->word_index);
 
-	/* delete src and recreate if file update index */
+	service_type = (guint8) service;
 
-	tracker_indexer_free (src, TRUE);
+	/* Amalgamate and combine score and service_type into a single
+         * 32-bit int for compact storage.
+         */
+	a[0] = service_type;
+	a[1] = (score16 >> 8) & 0xFF;
+	a[2] = score16 & 0xFF;
+	a[3] = 0;
 
-	if (update) {
-		tracker->file_update_index = tracker_indexer_open (TRACKER_INDEXER_FILE_UPDATE_INDEX_DB_FILENAME, FALSE);
-	}
-	
-	tracker->in_merge = FALSE;
-	merge_count = 1;
-	merge_processed = 1;
-
-        /* Signal progress */
-        object = tracker_dbus_get_object (TRACKER_TYPE_DAEMON);
-        g_signal_emit_by_name (object, 
-			       "index-progress", 
-                               "Merging",                     
-                               "",
-                               tracker->index_count,        
-                               merge_processed,  
-                               merge_count);     
+	return (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3];
 }
 
 gboolean
-tracker_indexer_has_tmp_merge_files (IndexType type)
+tracker_indexer_has_tmp_merge_files (TrackerIndexerType type)
 {
-	GSList *files = NULL;
+	GSList  *files = NULL;
 	gboolean result = FALSE;
 
-
-	if (type == INDEX_TYPE_FILES) {
-		files =  tracker_process_files_get_files_with_prefix (tracker_get_data_dir (),
-                                                                      "file-index.tmp.");
+	if (type == TRACKER_INDEXER_TYPE_FILES) {
+		files = tracker_process_files_get_files_with_prefix (tracker_get_data_dir (),
+                                                                     "file-index.tmp.");
 	} else {
-		files =  tracker_process_files_get_files_with_prefix (tracker_get_data_dir (), 
-                                                                      "email-index.tmp.");
+		files = tracker_process_files_get_files_with_prefix (tracker_get_data_dir (), 
+                                                                     "email-index.tmp.");
 	}
 
-	result = (files != NULL);
+	result = files != NULL;
 
 	if (result) {
 		g_slist_foreach (files, (GFunc) g_free, NULL);
@@ -520,925 +601,211 @@
 	}
 
 	return result;
-
 }
 
+guint8
+tracker_indexer_word_details_get_service_type (TrackerIndexerWordDetails *details)
+{
+        g_return_val_if_fail (details != NULL, 0);
 
+	return (details->amalgamated >> 24) & 0xFF;
+}
 
-gboolean
-tracker_indexer_has_merge_files (IndexType type)
+gint16
+tracker_indexer_word_details_get_score (TrackerIndexerWordDetails *details)
 {
-	GSList      *files = NULL;
-	gboolean     result = FALSE;
-	gchar       *final;
-        const gchar *data_dir;
+	unsigned char a[2];
 
-        data_dir = tracker_get_data_dir ();
+        g_return_val_if_fail (details != NULL, 0);
 
-	if (type == INDEX_TYPE_FILES) {
-		files =  tracker_process_files_get_files_with_prefix (data_dir, 
-                                                                      "file-index.tmp.");
-		final = g_build_filename (data_dir, "file-index-final", NULL);
-	} else {
-		files =  tracker_process_files_get_files_with_prefix (data_dir,
-                                                                      "email-index.tmp.");
-		final = g_build_filename (data_dir, "email-index-final", NULL);
-	}
+	a[0] = (details->amalgamated >> 16) & 0xFF;
+	a[1] = (details->amalgamated >> 8) & 0xFF;
 
-	result = files != NULL;
+	return (gint16) (a[0] << 8) | (a[1]);	
+}
 
-	if (!result) {
-		result = g_file_test (final, G_FILE_TEST_EXISTS);
-	} else {
-		g_slist_foreach (files, (GFunc) g_free, NULL);
-		g_slist_free (files);
-	}
+char *
+tracker_indexer_get_suggestion (TrackerIndexer *indexer, 
+                                const gchar    *term, 
+                                gint            maxdist)
+{
+        TrackerIndexerPrivate *priv;
+	gchar		      *str;
+	gint		       dist; 
+	gchar		      *winner_str;
+	gint                   winner_dist;
+	gint		       hits;
+	GTimeVal	       start, current;
 
-	g_free (final);
+        g_return_val_if_fail (TRACKER_IS_INDEXER (indexer), NULL);
+        g_return_val_if_fail (term != NULL, NULL);
 
-	return result;
+	priv = TRACKER_INDEXER_GET_PRIVATE (indexer);
 
-}
+	winner_str = g_strdup (term);
+        winner_dist = G_MAXINT;  /* Initialize to the worst case */
 
-static void
-move_index (Indexer *src_index, Indexer *dest_index, const char *fname)
-{
+	g_mutex_lock (priv->word_mutex);
+        dpiterinit (priv->word_index);
 
-	if (!src_index || !dest_index) {
-		g_critical ("cannot move indexes");
-		return;
-	}
+	g_get_current_time (&start);
 
-	/* remove existing main index */
-	g_mutex_lock (dest_index->word_mutex);
+	str = dpiternext (priv->word_index, NULL);
+	g_mutex_unlock (priv->word_mutex);
 
-	dpclose (dest_index->word_index);
+	while (str != NULL) {
+		dist = levenshtein (term, str, 0);
 
-	dpremove (fname);
-
-	char *final_name = dpname (src_index->word_index);
-			
-	tracker_indexer_close (src_index);
-		
-	/* rename and reopen final index as main index */
-		
-	g_message ("renaming %s to %s", final_name, fname);
-	
-	rename (final_name, fname);
-
-	dest_index->word_index = open_index (fname);	
-
-	if (!dest_index->word_index) {
-		g_critical ("index creation failure for %s from %s", fname, final_name);
-	}
-
-	g_free (final_name);		
-
-	g_mutex_unlock (dest_index->word_mutex);
-
-}
-
-
-void
-tracker_indexer_merge_indexes (IndexType type)
-{
-        GObject     *object;
-	GSList      *lst;
-	Indexer     *final_index;
-	GSList      *file_list = NULL, *index_list = NULL;
-	const gchar *prefix;
-        const gchar *data_dir;
-	gint         i = 0, index_count, interval = 5000;
-	gboolean     final_exists;
-        gchar       *tmp;
-
-        data_dir = tracker_get_data_dir ();
-        object = tracker_dbus_get_object (TRACKER_TYPE_DAEMON);
-
-	if (type == INDEX_TYPE_FILES) {
-		g_return_if_fail (tracker->file_index);
-		
-		prefix = "file-index.tmp.";
-		index_list = g_slist_prepend (index_list, tracker->file_index);
-
-		tmp = g_build_filename (data_dir, "file-index-final", NULL);
-		final_exists = g_file_test (tmp, G_FILE_TEST_EXISTS);
-		g_free (tmp);
-	} else {
-		g_return_if_fail (tracker->email_index);
-
-		prefix = "email-index.tmp.";
-		index_list = g_slist_prepend (index_list, tracker->email_index);
-
-		tmp = g_build_filename (data_dir, "email-index-final", NULL);
-		final_exists = g_file_test (tmp, G_FILE_TEST_EXISTS);
-		g_free (tmp);
-	}
-	
-	file_list = tracker_process_files_get_files_with_prefix (data_dir, prefix);
-
-	if (!file_list || !file_list->data) {
-		g_slist_free (index_list);
-		return;
-	} else {
-                GSList *file;
+		if (dist != -1 && 
+                    dist < maxdist && 
+                    dist < winner_dist) {
+                        hits = count_hits_for_word (indexer, str);
 
-		for (file = file_list; file; file = file->next) {
-			if (file->data) {
-				gchar *name = g_path_get_basename (file->data);
-
-				if (name) {
-					if (g_file_test (file->data, G_FILE_TEST_EXISTS)) {
-                                                Indexer *tmp_index = tracker_indexer_open (name, FALSE);
-
-						if (tmp_index) {
-							index_list = g_slist_prepend (index_list, tmp_index);
-						}
-					}
+                        if (hits < 0) {
+                                g_free (winner_str);
+                                g_free (str);
 
-					g_free (name);
-				}
+                                return NULL;
+			} else if (hits > 0) {
+                                g_free (winner_str);
+                                winner_str = g_strdup (str);
+                                winner_dist = dist;
+                        } else {
+				g_message ("No hits for:'%s'!", str);
 			}
 		}
 
-		g_slist_foreach (file_list, (GFunc) g_free, NULL);
-		g_slist_free (file_list);
-	}
-
- 	index_count = g_slist_length (index_list);
-
-	if (index_count < 2) {
-		g_slist_free (index_list);
-		return;
-	}
-
-	g_message ("starting merge of %d indexes", index_count);
-	tracker->in_merge = TRUE;
-	merge_count = index_count;
-	merge_processed = 0;
-	
-        /* Signal progress */
-        g_signal_emit_by_name (object, 
-                               "index-progress", 
-                               "Merging",
-                               "",
-                               tracker->index_count,
-                               merge_processed,
-                               merge_count);
-
-	if (index_count == 2 && !final_exists) {
-                Indexer *index1 = index_list->data ;
-                Indexer *index2 = index_list->next->data ;
-
-		if (tracker_indexer_size (index1) * 3 < tracker_indexer_size (index2)) {
-			tracker_indexer_apply_changes (index2, index1, FALSE);
-			g_slist_free (index_list);
-			goto end_of_merging;
-		}
-	}
-
-	/* Signal state change */
-	g_signal_emit_by_name (object, 
-			       "index-state-change", 
-			       tracker_status_get_as_string (),
-			       tracker->first_time_index,
-			       tracker->in_merge,
-			       tracker->pause_manual,
-			       tracker_should_pause_on_battery (),
-			       tracker->pause_io,
-			       tracker_config_get_enable_indexing (tracker->config));
-
-	if (type == INDEX_TYPE_FILES) {
-		final_index = tracker_indexer_open ("file-index-final", TRUE);
-	} else {
-		final_index = tracker_indexer_open ("email-index-final", TRUE);
-	}
-
-	if (!final_index) {
-		g_slist_free (index_list);
-		g_critical ("could not open final index - abandoning index merge");
-		goto end_of_merging;
-	}
-
-	for (lst = index_list; lst && lst->data; lst = lst->next) {
-                gchar   *str;
-		Indexer *index = lst->data;
-
-		dpiterinit (index->word_index);
-
-		while ((str = dpiternext (index->word_index, NULL))) {
-			gchar buffer[MAX_HIT_BUFFER];
-			gint offset;
-			gint sz = sizeof (WordDetails);
-			gint buff_size = MAX_HITS_FOR_WORD * sz;
-
-			if (!has_word (final_index, str)) {
-
-				i++;
-
-				if (i > 101 && (i % 100 == 0)) {
-#if 0
-                                        /* FIXME-indexer-split: This has been commented out as
-                                         * a result of removing the tracker-cache.[ch] which
-                                         * is no longer used. This code is in a transitional
-                                         * period.  
-                                         *
-                                         * -Martyn
-                                         */ 
-					if (!tracker_cache_process_events (NULL, FALSE)) {
-                                                tracker_status_set_and_signal (TRACKER_STATUS_SHUTDOWN,
-                                                                               tracker->first_time_index,
-                                                                               tracker->in_merge,
-                                                                               tracker->pause_manual,
-                                                                               tracker_should_pause_on_battery (),
-                                                                               tracker->pause_io,
-                                                                               tracker_config_get_enable_indexing (tracker->config));
-						return;	
-					}
-#endif
-				}
-
-				if (i > interval && (i % interval == 0)) {
-
-                                        if (!tracker_config_get_fast_merges (tracker->config)) {
-
-						dpsync (final_index->word_index);
-
-						guint32 size = tracker_indexer_size (final_index);
-
-						if (size < (10 * 1024 * 1024)) {
-							interval = 10000;
-						} else if (size < (20 * 1024 * 1024)) {
-							interval = 6000;
-						} else if (size < (50 * 1024 * 1024)) {
-							interval = 6000;
-						} else if (size < (100 * 1024 * 1024)) {
-							interval = 4000;
-						} else {
-							interval = 3000;
-						}
-
-#ifdef HAVE_HAL 
-						/* halve the interval value as notebook hard drives are smaller */
-                                                if (tracker_hal_get_battery_exists (tracker->hal)) {
-                                                        interval /=  2;
-                                                }
-#endif /* HAVE_HAL */
-					}
-				}
-			
-				offset = dpgetwb (index->word_index, str, -1, 0, buff_size, buffer);
-
-				if (offset < 1) {
-                                        continue;
-                                }
-
-				if (offset % sz != 0) {
-					g_critical ("possible corruption found during merge of word %s - purging word from index (it will not be searchable)", str);
-					continue;
-				}
-
-				if (offset > 7 && offset < buff_size) {
-
-					GSList *list;
-
-					for (list = lst->next; list; list = list->next) {
-                                                gchar   tmp_buffer[MAX_HIT_BUFFER];
-						Indexer *tmp_index = list->data;
-
-						if (!tmp_index) {
-                                                        continue;
-                                                }
-
-						gint tmp_offset = dpgetwb (tmp_index->word_index, str, -1, 0, (buff_size - offset), tmp_buffer);	
-
-						if (tmp_offset > 0 && (tmp_offset % sz != 0)) {
-							g_critical ("possible corruption found during merge of word %s - purging word from index", str);
-							continue;
-						}
-
-						if (tmp_offset > 7 && (tmp_offset % sz == 0)) {
-							memcpy (buffer + offset, tmp_buffer, tmp_offset);
-							offset += tmp_offset;
-						}												
-					}
+		g_free (str);
 
-					dpput (final_index->word_index, str, -1, buffer, offset, DP_DOVER);
-				}
-			}
+		g_get_current_time (&current);
 
-			g_free (str);
+		if (current.tv_sec - start.tv_sec >= 2) { /* 2 second time out */
+			g_message ("Timeout in tracker_dbus_method_search_suggest");
+                        break;
 		}
 
-
-		
-
-		/* dont free last entry as that is the main index */
-		if (lst->next) {
-
-			if (index != tracker->file_index && index != tracker->email_index) {
-                                GObject *object;
-
-				tracker_indexer_free (index, TRUE);
-				merge_processed++;
-
-                                /* Signal progress */
-                                object = tracker_dbus_get_object (TRACKER_TYPE_DAEMON);
-                                g_signal_emit_by_name (object, 
-                                                       "index-progress", 
-                                                       "Merging",
-                                                       "",
-                                                       tracker->index_count,
-                                                       merge_processed,
-                                                       merge_count);
-			}
-
-
-		} else {
-			if (type == INDEX_TYPE_FILES) {
-
-				char *fname = get_index_file (TRACKER_INDEXER_FILE_INDEX_DB_FILENAME);
-				move_index (final_index, tracker->file_index, fname);	
-				g_free (fname);
-
-			} else {
-				char *fname = get_index_file (TRACKER_INDEXER_EMAIL_INDEX_DB_FILENAME);
-				move_index (final_index, tracker->email_index, fname);
-				g_free (fname);
-			}
-		}		
+                g_mutex_lock (priv->word_mutex);
+		str = dpiternext (priv->word_index, NULL);
+                g_mutex_unlock (priv->word_mutex);
 	}
-	
-	g_slist_free (index_list);
-
-	
 
- end_of_merging:
-	tracker->in_merge = FALSE;
-	
-        /* Signal state change */
-        g_signal_emit_by_name (object, 
-                               "index-state-change", 
-			       tracker_status_get_as_string (),
-                               tracker->first_time_index,
-                               tracker->in_merge,
-                               tracker->pause_manual,
-                               tracker_should_pause_on_battery (),
-                               tracker->pause_io,
-                               tracker_config_get_enable_indexing (tracker->config));
+        return winner_str;
 }
 
+TrackerIndexerWordDetails *
+tracker_indexer_get_word_hits (TrackerIndexer *indexer,
+			       const gchar    *word,
+			       guint          *count)
+{
+        TrackerIndexerPrivate     *priv;
+	TrackerIndexerWordDetails *details;
+	gint                       tsiz;
+	gchar                     *tmp;
 
+        g_return_val_if_fail (TRACKER_IS_INDEXER (indexer), NULL);
+        g_return_val_if_fail (word != NULL, NULL);
 
-/* indexing api */
-
-/* use for fast insertion of a word for multiple documents at a time */
-
-gboolean
-tracker_indexer_append_word_chunk (Indexer *indexer, const gchar *word, WordDetails *details, gint word_detail_count)
-{
-	g_return_val_if_fail (indexer, FALSE);
-	g_return_val_if_fail (indexer->word_index, FALSE);
-	g_return_val_if_fail (word, FALSE);
-	g_return_val_if_fail (details, FALSE);
-	g_return_val_if_fail (word_detail_count > 0, FALSE);
-
-	g_mutex_lock (indexer->word_mutex);
-	if (!dpput (indexer->word_index, word, -1, (char *) details, (word_detail_count * sizeof (WordDetails)), DP_DCAT)) {
-		g_mutex_unlock (indexer->word_mutex);
-		return FALSE;
-	}
-	g_mutex_unlock (indexer->word_mutex);
-
-	return TRUE;	
-}
+	priv = TRACKER_INDEXER_GET_PRIVATE (indexer);
 
+	g_mutex_lock (priv->word_mutex);
 
-/* append individual word for a document */
+	details = NULL;
 
-gboolean
-tracker_indexer_append_word (Indexer *indexer, const gchar *word, guint32 id, gint service, gint score)
-{
-        if (score < 1) {
-                return FALSE;
+        if (count) {
+                *count = 0;
         }
 
-	g_return_val_if_fail (indexer, FALSE);
-	g_return_val_if_fail (indexer->word_index, FALSE);
-        g_return_val_if_fail (word, FALSE);
+	if ((tmp = dpget (priv->word_index, word, -1, 0, MAX_HIT_BUFFER, &tsiz)) != NULL) {
+		if (tsiz >= (gint) sizeof (TrackerIndexerWordDetails)) {
+			details = (TrackerIndexerWordDetails *) tmp;
 
-	WordDetails pair;
-
-	pair.id = id;
-	pair.amalgamated = tracker_indexer_calc_amalgamated (service, score);
-
-	return tracker_indexer_append_word_chunk (indexer, word, &pair, 1);
-}
-
-
-/* append lists of words for a document - returns no. of hits added */
-gint
-tracker_indexer_append_word_list (Indexer *indexer, const gchar *word, GSList *list)
-{
-	WordDetails word_details[MAX_HITS_FOR_WORD], *wd;
-	gint i;
-	GSList *lst;
-
-	g_return_val_if_fail (indexer, 0);
-	g_return_val_if_fail (indexer->word_index, 0);
-	g_return_val_if_fail (word, 0);
-
-	i = 0;
-
-	if (list) {
-		for (lst = list; (lst && i < MAX_HITS_FOR_WORD); lst = lst->next) {
-
-			if (lst->data) {
-				wd = lst->data;
-				word_details[i].id = wd->id;
-				word_details[i].amalgamated = wd->amalgamated;
-				i++;
-			}
-		}
-	}
-
-	if (i > 0) {
-		tracker_indexer_append_word_chunk (indexer, word, word_details, i);
-	}
-
-	return i;
-}
-
-
-
-/* use for deletes or updates of multiple entities when they are not new */
-gboolean
-tracker_indexer_update_word_chunk (Indexer *indexer, const gchar *word, WordDetails *detail_chunk, gint word_detail_count)
-{	
-	int  tsiz, j, i, score;
-	char *tmp;
-	WordDetails *word_details;
-	gboolean write_back = FALSE;
-	GSList *list = NULL;
-
-	g_return_val_if_fail (indexer, FALSE);
-	g_return_val_if_fail (indexer->word_index, FALSE);
-	g_return_val_if_fail (word, FALSE);
-	g_return_val_if_fail (detail_chunk, FALSE);
-	g_return_val_if_fail (word_detail_count > 0, FALSE);
-
-	/* check if existing record is there */
-	gint hit_count = 0;
-
-	g_mutex_lock (indexer->word_mutex);
-
-	if ((tmp = dpget (indexer->word_index, word, -1, 0, MAX_HIT_BUFFER, &tsiz)) != NULL) {
-
-
-		WordDetails *details = (WordDetails *) tmp;
-		hit_count = tsiz / sizeof (WordDetails);
-
-		details = (WordDetails *) tmp;
-
-		for (j = 0; j < word_detail_count; j++) {
-
-			word_details = &detail_chunk[j];
-
-			gboolean edited = FALSE;
-
-			for (i = 0; i < hit_count; i++) {
-
-				if (details[i].id == word_details->id) {
-
-					write_back = TRUE;
-
-					/* NB the paramter score can be negative */
-					score = get_score (&details[i]) + get_score (word_details);
-					//g_print ("current score for %s is %d and new is %d and final is %d\n", word, get_score (&details[i]), get_score (word_details), score); 
-
-							
-					/* check for deletion */		
-					if (score < 1) {
-
-						//g_print ("deleting word hit %s\n", word);
-						
-						gint k;
-					
-						/* shift all subsequent records in array down one place */
-						for (k = i + 1; k < hit_count; k++) {
-							details[k - 1] = details[k];
-						}
-
-						hit_count--;
-	
-					} else {
-						details[i].amalgamated = tracker_indexer_calc_amalgamated (get_service_type (&details[i]), score);
-					}
-
-					edited = TRUE;
-					break;
-				}
-			}
-
-			/* add hits that could not be updated directly here so they can be appended later */
-			if (!edited) {
-				list = g_slist_prepend (list, &detail_chunk[j]);
-				g_debug ("could not update word hit %s - appending", word);
-			}
-		}
-	
-		/* write back if we have modded anything */
-		if (write_back) {
-			dpput (indexer->word_index, word, -1, (char *) details, (hit_count * sizeof (WordDetails)), DP_DOVER);
-		}
-
-		g_mutex_unlock (indexer->word_mutex);	
-
-		if (list) {
-			tracker_indexer_append_word_list (indexer, word, list);
-			g_slist_free (list);
-		}
-	
-		return TRUE;
-	}
-
-	g_mutex_unlock (indexer->word_mutex);	
-
-	/* none of the updates can be applied if word does not exist so return them all to be appended later */
-	return tracker_indexer_append_word_chunk (indexer, word, detail_chunk, word_detail_count);
-
-}
-
-
-/* use for deletes or updates of multiple entities when they are not new */
-gboolean
-tracker_indexer_update_word_list (Indexer *indexer, const gchar *word, GSList *update_list)
-{
-	WordDetails word_details[MAX_HITS_FOR_WORD], *wd;
-	gint i;
-	GSList *lst;
-
-	g_return_val_if_fail (indexer, 0);
-	g_return_val_if_fail (indexer->word_index, 0);
-	g_return_val_if_fail (word, 0);
-
-	i = 0;
-
-	if (update_list) {
-		for (lst = update_list; (lst && i < MAX_HITS_FOR_WORD); lst = lst->next) {
-
-			if (lst->data) {
-				wd = lst->data;
-				word_details[i].id = wd->id;
-				word_details[i].amalgamated = wd->amalgamated;
-				i++;
-			}
-		}
-	}
-
-	if (i > 0) {
-		tracker_indexer_update_word_chunk (indexer, word, word_details, i);
-	}
-
-	return i;
-}
-
-WordDetails *
-tracker_indexer_get_word_hits (Indexer     *indexer,
-			       const gchar *word,
-			       guint       *count)
-{
-	WordDetails *details;
-	gint tsiz;
-	gchar *tmp;
-
-	g_mutex_lock (indexer->word_mutex);
-
-	details = NULL;
-	*count = 0;
-
-	if ((tmp = dpget (indexer->word_index, word, -1, 0, MAX_HIT_BUFFER, &tsiz)) != NULL) {
-		if (tsiz >= (int) sizeof (WordDetails)) {
-			details = (WordDetails *) tmp;
-			*count = tsiz / sizeof (WordDetails);
+                        if (count) {
+                                *count = tsiz / sizeof (TrackerIndexerWordDetails);
+                        }
 		}
 	}
 
-	g_mutex_unlock (indexer->word_mutex);
+	g_mutex_unlock (priv->word_mutex);
 
 	return details;
 }
 
-/* use to delete dud hits for a word - dud_list is a list of TrackerSearchHit structs */
+/* Use to delete dud hits for a word - dud_list is a list of
+ * TrackerSearchHit structs.
+ */
 gboolean
-tracker_remove_dud_hits (Indexer *indexer, const gchar *word, GSList *dud_list)
-{
-	gint tsiz;
-	char *tmp;
+tracker_indexer_remove_dud_hits (TrackerIndexer *indexer, 
+				 const gchar    *word, 
+				 GSList         *dud_list)
+{
+        TrackerIndexerPrivate *priv;
+	gchar                 *tmp;
+	gint                   tsiz;
 
 	g_return_val_if_fail (indexer, FALSE);
-	g_return_val_if_fail (indexer->word_index, FALSE);
+	g_return_val_if_fail (priv->word_index, FALSE);
 	g_return_val_if_fail (word, FALSE);
 	g_return_val_if_fail (dud_list, FALSE);
-	
-	g_mutex_lock (indexer->word_mutex);
-
-	/* check if existing record is there  */
-	if ((tmp = dpget (indexer->word_index, word, -1, 0, MAX_HIT_BUFFER, &tsiz)) != NULL) {
-
-		if (tsiz >= (int) sizeof (WordDetails)) {
-
-			WordDetails *details;
-			int wi, i, pnum;
-
-			details = (WordDetails *) tmp;
-			pnum = tsiz / sizeof (WordDetails);
-			wi = 0;	
-
-			for (i = 0; i < pnum; i++) {
-
-				GSList *lst;
-
-				for (lst = dud_list; lst; lst = lst->next) {
 
-					TrackerSearchHit *hit = lst->data;
-
-					if (hit) {
-						if (details[i].id == hit->service_id) {
-							int k;
-
-							/* shift all subsequent records in array down one place */
-							for (k = i + 1; k < pnum; k++) {
-								details[k - 1] = details[k];
-							}
-
-							/* make size of array one size smaller */
-							tsiz -= sizeof (WordDetails); 
-							pnum--;
-
-							break;
-						}
-					}
-				}
-			}
-
-			dpput (indexer->word_index, word, -1, (char *) details, tsiz, DP_DOVER);
-			
-			g_mutex_unlock (indexer->word_mutex);	
+	priv = TRACKER_INDEXER_GET_PRIVATE (indexer);
 	
-			g_free (tmp);
-
-			return TRUE;
-		}
-
-		g_free (tmp);
-	}
-
-	g_mutex_unlock (indexer->word_mutex);
+	/* Check if existing record is there  */
+	g_mutex_lock (priv->word_mutex);
+	tmp = dpget (priv->word_index, 
+                     word, 
+                     -1,
+                     0,
+                     MAX_HIT_BUFFER,
+                     &tsiz);
+	g_mutex_unlock (priv->word_mutex);
 
-	return FALSE;
-}
-
-static inline gint
-count_hit_size_for_word (Indexer *indexer, const gchar *word)
-{
-	int  tsiz;
-
-	g_mutex_lock (indexer->word_mutex);	
-	tsiz = dpvsiz (indexer->word_index, word, -1);
-	g_mutex_unlock (indexer->word_mutex);	
-
-	return tsiz;
-}
-
-guint8
-tracker_word_details_get_service_type (WordDetails *details)
-{
-	return (details->amalgamated >> 24) & 0xFF;
-}
-
-gint16
-tracker_word_details_get_score (WordDetails *details)
-{
-	unsigned char a[2];
-
-	a[0] = (details->amalgamated >> 16) & 0xFF;
-	a[1] = (details->amalgamated >> 8) & 0xFF;
-
-	return (gint16) (a[0] << 8) | (a[1]);	
-}
-
-/* int levenshtein ()
- * Original license: GNU Lesser Public License
- * from the Dixit project, (http://dixit.sourceforge.net/)
- * Author: Octavian Procopiuc <oprocopiuc gmail com>
- * Created: July 25, 2004
- * Copied into tracker, by Edward Duffy
- */
-
-static int
-levenshtein(const char *source, char *target, int maxdist)
-{
-	char n, m;
-	int l;
-	l = strlen (source);
-	if (l > 50)
-		return -1;
-	n = l;
-
-	l = strlen (target);
-	if (l > 50)
-		return -1;
-	m = l;
-
-	if (maxdist == 0)
-		maxdist = MAX(m, n);
-	if (n == 0)
-		return MIN(m, maxdist);
-	if (m == 0)
-		return MIN(n, maxdist);
-
-	// Store the min. value on each column, so that, if it reaches
-	// maxdist, we break early.
-	char mincolval;
-
-	char matrix[51][51];
-
-	char j;
-	char i;
-	char cell;
-
-	for (j = 0; j <= m; j++)
-		matrix[0][(int)j] = j;
-
-	for (i = 1; i <= n; i++) {
-
-		mincolval = MAX(m, i);
-		matrix[(int)i][0] = i;
-
-		char s_i = source[i-1];
-
-		for (j = 1; j <= m; j++) {
-
-			char t_j = target[j-1];
-
-			char cost = (s_i == t_j ? 0 : 1);
-
-			char above = matrix[i-1][(int)j];
-			char left = matrix[(int)i][j-1];
-			char diag = matrix[i-1][j-1];
-			cell = MIN(above + 1, MIN(left + 1, diag + cost));
-
-			// Cover transposition, in addition to deletion,
-			// insertion and substitution. This step is taken from:
-			// Berghel, Hal ; Roach, David : "An Extension of Ukkonen's 
-			// Enhanced Dynamic Programming ASM Algorithm"
-			// (http://www.acm.org/~hlb/publications/asm/asm.html)
-
-			if (i > 2 && j > 2) {
-				char trans = matrix[i-2][j-2] + 1;
-				if (source[i-2] != t_j)
-					trans++;
-				if (s_i != target[j-2])
-					trans++;
-				if (cell > trans)
-					cell = trans;
-			}
-
-			mincolval = MIN(mincolval, cell);
-			matrix[(int)i][(int)j] = cell;
-		}
-
-		if (mincolval >= maxdist)
-			break;
-
-	}
-
-	if (i == n + 1)
-		return (int) matrix[(int)n][(int)m];
-	else
-		return maxdist;
-}
-
-static int
-count_hits_for_word (Indexer *indexer, const gchar *str) {
-        
-        gint tsiz, hits = 0;
-
-        tsiz = count_hit_size_for_word (indexer, str);
-
-        if (tsiz == -1 || tsiz % sizeof (WordDetails) != 0) {
-                return -1;
+        if (!tmp) {
+                return FALSE;
         }
 
-        hits = tsiz / sizeof (WordDetails);
-
-        return hits;
-}
-
-char *
-tracker_indexer_get_suggestion (Indexer *indexer, const gchar *term, gint maxdist)
-{
-
-	gchar		*str;
-	gint		dist; 
-	gchar		*winner_str;
-	gint		winner_dist;
-	gint		hits;
-	GTimeVal	start, current;
-
-	winner_str = g_strdup (term);
-        winner_dist = G_MAXINT;  /* Initialize to the worst case */
-
-        dpiterinit (indexer->word_index);
-
-	g_get_current_time (&start);
-
-	str = dpiternext (indexer->word_index, NULL);
-
-	while (str != NULL) {
-
-		dist = levenshtein (term, str, 0);
-
-		if (dist != -1 && dist < maxdist && dist < winner_dist) {
-
-                        hits = count_hits_for_word (indexer, str);
-
-                        if (hits < 0) {
-
-                                g_free (winner_str);
-                                g_free (str);
-                                return NULL;
-
-			} else if (hits > 0) {
-
-                                g_free (winner_str);
-                                winner_str = g_strdup (str);
-                                winner_dist = dist;
-
-                        } else {
-				g_message ("No hits for %s!", str);
-			}
-		}
-
-		g_free (str);
-
-		g_get_current_time (&current);
-
-		if (current.tv_sec - start.tv_sec >= 2) { /* 2 second time out */
-			g_message ("Timeout in tracker_dbus_method_search_suggest");
-                        break;
-		}
-
-		str = dpiternext (indexer->word_index, NULL);
-	}
-
-        return winner_str;
-}
-
-gboolean
-tracker_indexer_are_databases_too_big (void)
-{
-	gchar       *filename;
-        const gchar *filename_const;
-        const gchar *data_dir;
-        gboolean     too_big;
-
-        data_dir = tracker_get_data_dir ();
-
-	filename = g_build_filename (data_dir, TRACKER_INDEXER_FILE_INDEX_DB_FILENAME, NULL);
-	too_big = tracker_file_get_size (filename) > MAX_INDEX_FILE_SIZE;
-        g_free (filename);
-        
-        if (too_big) {
-		g_critical ("File index database is too big, discontinuing indexing");
-		return TRUE;	
-	}
-
-	filename = g_build_filename (data_dir, TRACKER_INDEXER_EMAIL_INDEX_DB_FILENAME, NULL);
-	too_big = tracker_file_get_size (filename) > MAX_INDEX_FILE_SIZE;
-	g_free (filename);
-        
-        if (too_big) {
-		g_critical ("Email index database is too big, discontinuing indexing");
-		return TRUE;	
-	}
-
-        filename_const = tracker_db_manager_get_file (TRACKER_DB_FILE_METADATA);
-	too_big = tracker_file_get_size (filename_const) > MAX_INDEX_FILE_SIZE;
-        
-        if (too_big) {
-                g_critical ("File metadata database is too big, discontinuing indexing");
-		return TRUE;	
-	}
-
-        filename_const = tracker_db_manager_get_file (TRACKER_DB_EMAIL_METADATA);
-	too_big = tracker_file_get_size (filename_const) > MAX_INDEX_FILE_SIZE;
+        if (tsiz >= (int) sizeof (TrackerIndexerWordDetails)) {
+                TrackerIndexerWordDetails *details;
+                gint                       wi, i, pnum;
+                
+                details = (TrackerIndexerWordDetails *) tmp;
+                pnum = tsiz / sizeof (TrackerIndexerWordDetails);
+                wi = 0;	
+                
+                for (i = 0; i < pnum; i++) {
+                        GSList *lst;
+                        
+                        for (lst = dud_list; lst; lst = lst->next) {
+                                TrackerSearchHit *hit = lst->data;
+                                
+                                if (hit) {
+                                        if (details[i].id == hit->service_id) {
+                                                gint k;
+                                                
+                                                /* Shift all subsequent records in array down one place */
+                                                for (k = i + 1; k < pnum; k++) {
+                                                        details[k - 1] = details[k];
+                                                }
+                                                
+                                                /* Make size of array one size smaller */
+                                                tsiz -= sizeof (TrackerIndexerWordDetails); 
+                                                pnum--;
+                                                
+                                                break;
+                                        }
+                                }
+                        }
+                }
+                
+                g_mutex_lock (priv->word_mutex);	
+                dpput (priv->word_index, word, -1, (gchar *) details, tsiz, DP_DOVER);
+                g_mutex_unlock (priv->word_mutex);	
+                
+                g_free (tmp);
+                
+                return TRUE;
+        }
         
-        if (too_big) {
-		g_critical ("Email metadata database is too big, discontinuing indexing");
-		return TRUE;	
-	}
+        g_free (tmp);
 
 	return FALSE;
 }

Modified: branches/xesam-support/src/trackerd/tracker-indexer.h
==============================================================================
--- branches/xesam-support/src/trackerd/tracker-indexer.h	(original)
+++ branches/xesam-support/src/trackerd/tracker-indexer.h	Thu Jun 26 12:35:12 2008
@@ -1,7 +1,8 @@
 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
-/* 
+/*
  * Copyright (C) 2006, Mr Jamie McCracken (jamiemcc gnome org)
- *
+ * Copyright (C) 2008, Nokia
+
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License as published by the Free Software Foundation; either
@@ -21,11 +22,9 @@
 #ifndef __TRACKERD_INDEXER_H__
 #define __TRACKERD_INDEXER_H__
 
-#include <stdlib.h>
-
 #include <glib.h>
 
-#include <libtracker-db/tracker-db-interface.h>
+#include <libtracker-common/tracker-config.h>
 
 #define TRACKER_INDEXER_FILE_INDEX_DB_FILENAME         "file-index.db"
 #define TRACKER_INDEXER_EMAIL_INDEX_DB_FILENAME        "email-index.db"
@@ -33,74 +32,69 @@
 
 G_BEGIN_DECLS
 
-typedef struct {                         
-	guint32 	id;              /* Service ID number of the
-                                          * document */
-	int 		amalgamated;     /* amalgamation of
-                                          * service_type and score of
-                                          * the word in the document's
-                                          * metadata */
-} WordDetails;
+#define TRACKER_TYPE_INDEXER         (tracker_indexer_get_type())
+#define TRACKER_INDEXER(o)           (G_TYPE_CHECK_INSTANCE_CAST ((o), TRACKER_TYPE_INDEXER, TrackerIndexer))
+#define TRACKER_INDEXER_CLASS(c)     (G_TYPE_CHECK_CLASS_CAST ((c),    TRACKER_TYPE_INDEXER, TrackerIndexerClass))
+#define TRACKER_IS_INDEXER(o)        (G_TYPE_CHECK_INSTANCE_TYPE ((o), TRACKER_TYPE_INDEXER))
+#define TRACKER_IS_INDEXER_CLASS(c)  (G_TYPE_CHECK_CLASS_TYPE ((c),    TRACKER_TYPE_INDEXER))
+#define TRACKER_INDEXER_GET_CLASS(o) (G_TYPE_INSTANCE_GET_CLASS ((o),  TRACKER_TYPE_INDEXER, TrackerIndexerClass))
+
+typedef struct TrackerIndexer TrackerIndexer;
+typedef struct TrackerIndexerClass TrackerIndexerClass;
+typedef struct TrackerIndexerWordDetails TrackerIndexerWordDetails;
+
+struct TrackerIndexer {
+	GObject parent;
+};
+
+struct TrackerIndexerClass {
+	GObjectClass parent_class;
+};
+
+struct TrackerIndexerWordDetails {                         
+	/* Service ID number of the document */
+	guint32 id;              
+
+	/* Amalgamation of service_type and score of the word in the
+	 * document's metadata.
+	 */
+	gint    amalgamated;     
+};
 
 typedef enum {
-	WordNormal,
-	WordWildCard,
-	WordExactPhrase
-} WordType;
-
-typedef struct {                        
-	gchar	 	*word;    
-	gint		hit_count;
-	gfloat		idf;
-	WordType	word_type;
-} SearchWord;
-
-typedef struct Indexer_ Indexer;
-
-typedef enum {
-	INDEX_TYPE_FILES,
-	INDEX_TYPE_EMAILS,
-	INDEX_TYPE_FILE_UPDATE
-} IndexType;
-
-guint32		tracker_indexer_calc_amalgamated 	(gint service, gint score);
-
-Indexer * 	tracker_indexer_open 			(const gchar *name, gboolean main_index);
-void		tracker_indexer_close 			(Indexer *indexer);
-gboolean	tracker_indexer_repair 			(const char *name);
-void		tracker_indexer_free 			(Indexer *indexer, gboolean remove_file);
-gboolean	tracker_indexer_has_merge_index 	(Indexer *indexer, gboolean update);
-
-const gchar *   tracker_indexer_get_name                (Indexer *indexer);
-guint32		tracker_indexer_size 			(Indexer *indexer);
-gboolean	tracker_indexer_optimize		(Indexer *indexer);
-void		tracker_indexer_sync 			(Indexer *indexer);
-
-void		tracker_indexer_apply_changes 		(Indexer *dest, Indexer *src,  gboolean update);
-void		tracker_indexer_merge_indexes 		(IndexType type);
-gboolean	tracker_indexer_has_merge_files 	(IndexType type);
-gboolean	tracker_indexer_has_tmp_merge_files 	(IndexType type);
-
-/* Indexing api */
-gboolean	tracker_indexer_append_word 		(Indexer *indexer, const gchar *word, guint32 id, gint service, gint score);
-gboolean	tracker_indexer_append_word_chunk 	(Indexer *indexer, const gchar *word, WordDetails *details, gint word_detail_count);
-gint		tracker_indexer_append_word_list 	(Indexer *indexer, const gchar *word, GSList *list);
-
-gboolean	tracker_indexer_update_word 		(Indexer *indexer, const gchar *word, guint32 id, gint service, gint score, gboolean remove_word);
-gboolean	tracker_indexer_update_word_chunk	(Indexer *indexer, const gchar *word, WordDetails *details, gint word_detail_count);
-gboolean	tracker_indexer_update_word_list 	(Indexer *indexer, const gchar *word, GSList *update_list);
-
-WordDetails *   tracker_indexer_get_word_hits           (Indexer *indexer, const gchar *word, guint *count);
-
-gboolean	tracker_remove_dud_hits 		(Indexer *indexer, const gchar *word, GSList *dud_list);
-
-char *          tracker_indexer_get_suggestion          (Indexer *indexer, const gchar *term, gint maxdist);
-
-gboolean        tracker_indexer_are_databases_too_big   (void);
-
-/* Word API */
-guint8          tracker_word_details_get_service_type   (WordDetails *details);
-gint16          tracker_word_details_get_score          (WordDetails *details);
+	TRACKER_INDEXER_TYPE_FILES,
+	TRACKER_INDEXER_TYPE_EMAILS,
+	TRACKER_INDEXER_TYPE_FILES_UPDATE
+} TrackerIndexerType;
+
+
+GType           tracker_indexer_get_type                      (void);
+
+TrackerIndexer *tracker_indexer_new                           (TrackerIndexerType         type,
+							       TrackerConfig             *config);
+void            tracker_indexer_set_config                    (TrackerIndexer            *object,
+							       TrackerConfig             *config);
+guint32         tracker_indexer_get_size                      (TrackerIndexer            *indexer);
+
+gboolean        tracker_indexer_are_databases_too_big         (void);
+gboolean        tracker_indexer_has_tmp_merge_files           (TrackerIndexerType         type);
+guint32         tracker_indexer_calc_amalgamated              (gint                       service,
+							       gint                       score);
+
+guint8          tracker_indexer_word_details_get_service_type (TrackerIndexerWordDetails *details);
+gint16          tracker_indexer_word_details_get_score        (TrackerIndexerWordDetails *details);
+
+char *          tracker_indexer_get_suggestion                (TrackerIndexer            *indexer,
+							       const gchar               *term,
+							       gint                       maxdist);
+TrackerIndexerWordDetails *
+                tracker_indexer_get_word_hits                 (TrackerIndexer            *indexer,
+							       const gchar               *word,
+							       guint                     *count);
+
+gboolean        tracker_indexer_remove_dud_hits               (TrackerIndexer            *indexer,
+							       const gchar               *word,
+							       GSList                    *dud_list);
 
 G_END_DECLS
 

Modified: branches/xesam-support/src/trackerd/tracker-main.c
==============================================================================
--- branches/xesam-support/src/trackerd/tracker-main.c	(original)
+++ branches/xesam-support/src/trackerd/tracker-main.c	Thu Jun 26 12:35:12 2008
@@ -25,9 +25,10 @@
 
 #include "config.h"
 
+#include <stdlib.h>
+#include <string.h>
 #include <signal.h>
 #include <locale.h>
-#include <string.h>
 #include <unistd.h> 
 #include <fcntl.h>
 
@@ -483,9 +484,6 @@
 static gboolean
 initialize_databases (void)
 {
-	Indexer *index;
-	gchar   *final_index_name;
-
 	/*
 	 * Create SQLite databases 
 	 */
@@ -519,13 +517,22 @@
 		tracker_db_set_option_int ("InitialIndex", 1);
 	}
 
+	return TRUE;
+}
+
+static gboolean
+initialize_indexers (TrackerConfig *config)
+{
+	TrackerIndexer *indexer;
+	gchar          *final_index_name;
+
 	/*
 	 * Create index files
 	 */
 	final_index_name = g_build_filename (data_dir, "file-index-final", NULL);
 	
 	if (g_file_test (final_index_name, G_FILE_TEST_EXISTS) && 
-	    !tracker_indexer_has_tmp_merge_files (INDEX_TYPE_FILES)) {
+	    !tracker_indexer_has_tmp_merge_files (TRACKER_INDEXER_TYPE_FILES)) {
 		gchar *file_index_name;
 
 		file_index_name = g_build_filename (data_dir, 
@@ -546,7 +553,7 @@
 					     NULL);
 	
 	if (g_file_test (final_index_name, G_FILE_TEST_EXISTS) && 
-	    !tracker_indexer_has_tmp_merge_files (INDEX_TYPE_EMAILS)) {
+	    !tracker_indexer_has_tmp_merge_files (TRACKER_INDEXER_TYPE_EMAILS)) {
 		gchar *file_index_name;
 
 		file_index_name = g_build_filename (data_dir, 
@@ -563,16 +570,26 @@
 	g_free (final_index_name);
 
 	/* Create indexers */
-	index = tracker_indexer_open (TRACKER_INDEXER_FILE_INDEX_DB_FILENAME, TRUE);
-	tracker->file_index = index;
+	indexer = tracker_indexer_new (TRACKER_INDEXER_TYPE_FILES, config);
+	if (!indexer) {
+		return FALSE;
+	}
 
-	index = tracker_indexer_open (TRACKER_INDEXER_FILE_UPDATE_INDEX_DB_FILENAME, FALSE);
-	tracker->file_update_index = index;
+	tracker->file_index = indexer;
 
-	index = tracker_indexer_open (TRACKER_INDEXER_EMAIL_INDEX_DB_FILENAME, TRUE);
-	tracker->email_index = index;
+	indexer = tracker_indexer_new (TRACKER_INDEXER_TYPE_FILES_UPDATE, config);
+	if (!indexer) {
+		return FALSE;
+	}
 
-	/* db_con->word_index = tracker->file_index; */
+	tracker->file_update_index = indexer;
+
+	indexer = tracker_indexer_new (TRACKER_INDEXER_TYPE_EMAILS, config);
+	if (!indexer) {
+		return FALSE;
+	}
+
+	tracker->email_index = indexer;
 
 	return TRUE;
 }
@@ -608,9 +625,17 @@
 static void
 shutdown_indexer (void)
 {
-	tracker_indexer_close (tracker->file_index);
-	tracker_indexer_close (tracker->file_update_index);
-	tracker_indexer_close (tracker->email_index);
+	if (tracker->file_index) {
+		g_object_unref (tracker->file_index);
+	}
+
+	if (tracker->file_update_index) {
+		g_object_unref (tracker->file_update_index);
+	}
+
+	if (tracker->email_index) {
+		g_object_unref (tracker->email_index);
+	}
 }
 
 static void
@@ -827,6 +852,10 @@
 		return EXIT_FAILURE;
 	}
 
+	if (!initialize_indexers (tracker->config)) {
+		return EXIT_FAILURE;
+	}
+
 	/* Set our status as running, if this is FALSE, threads stop
 	 * doing what they do and shutdown.
 	 */

Modified: branches/xesam-support/src/trackerd/tracker-main.h
==============================================================================
--- branches/xesam-support/src/trackerd/tracker-main.h	(original)
+++ branches/xesam-support/src/trackerd/tracker-main.h	Thu Jun 26 12:35:12 2008
@@ -50,6 +50,9 @@
         TrackerLanguage  *language;
 
 	TrackerCrawler   *crawler;
+        TrackerIndexer   *file_index;
+        TrackerIndexer   *file_update_index;
+        TrackerIndexer   *email_index;
 
  	gboolean          is_running; 
 	gboolean          readonly;
@@ -64,9 +67,6 @@
 	gboolean          pause_io;
 
 	/* Indexing options */
-        Indexer          *file_index;
-        Indexer          *file_update_index;
-        Indexer          *email_index;
 
 	/* Table of stop words that are to be ignored by the parser */
 	gboolean          first_time_index; 

Modified: branches/xesam-support/src/trackerd/tracker-monitor.c
==============================================================================
--- branches/xesam-support/src/trackerd/tracker-monitor.c	(original)
+++ branches/xesam-support/src/trackerd/tracker-monitor.c	Thu Jun 26 12:35:12 2008
@@ -19,6 +19,7 @@
  */
 
 #include <string.h>
+#include <stdlib.h>
 
 #include <libtracker-common/tracker-dbus.h>
 #include <libtracker-common/tracker-file-utils.h>

Modified: branches/xesam-support/src/trackerd/tracker-query-tree.c
==============================================================================
--- branches/xesam-support/src/trackerd/tracker-query-tree.c	(original)
+++ branches/xesam-support/src/trackerd/tracker-query-tree.c	Thu Jun 26 12:35:12 2008
@@ -67,7 +67,7 @@
 struct TrackerQueryTreePrivate {
 	gchar           *query_str;
 	TreeNode        *tree;
-	Indexer         *indexer;
+	TrackerIndexer  *indexer;
         TrackerConfig   *config;
         TrackerLanguage *language;
 	GArray          *services;
@@ -125,10 +125,11 @@
 							      G_PARAM_READWRITE));
 	g_object_class_install_property (object_class,
 					 PROP_INDEXER,
-					 g_param_spec_pointer ("indexer",
-							       "Indexer",
-							       "Indexer",
-							       G_PARAM_READWRITE));
+					 g_param_spec_object ("indexer",
+                                                              "Indexer",
+                                                              "Indexer",
+                                                              tracker_indexer_get_type (),
+                                                              G_PARAM_READWRITE));
 	g_object_class_install_property (object_class,
 					 PROP_CONFIG,
 					 g_param_spec_object ("config",
@@ -223,7 +224,7 @@
 		break;
 	case PROP_INDEXER:
 		tracker_query_tree_set_indexer (TRACKER_QUERY_TREE (object),
-						g_value_get_pointer (value));
+						g_value_get_object (value));
 		break;
 	case PROP_CONFIG:
 		tracker_query_tree_set_config (TRACKER_QUERY_TREE (object),
@@ -257,7 +258,7 @@
 		g_value_set_string (value, priv->query_str);
 		break;
 	case PROP_INDEXER:
-		g_value_set_pointer (value, priv->indexer);
+		g_value_set_object (value, priv->indexer);
 		break;
 	case PROP_CONFIG:
 		g_value_set_object (value, priv->config);
@@ -275,13 +276,13 @@
 
 TrackerQueryTree *
 tracker_query_tree_new (const gchar     *query_str,
-			Indexer         *indexer,
+			TrackerIndexer  *indexer,
                         TrackerConfig   *config,
                         TrackerLanguage *language,
 			GArray          *services)
 {
 	g_return_val_if_fail (query_str != NULL, NULL);
-	g_return_val_if_fail (indexer != NULL, NULL);
+	g_return_val_if_fail (TRACKER_IS_INDEXER (indexer), NULL);
 	g_return_val_if_fail (TRACKER_IS_CONFIG (config), NULL);
 	g_return_val_if_fail (language != NULL, NULL);
 
@@ -482,20 +483,29 @@
 
 void
 tracker_query_tree_set_indexer (TrackerQueryTree *tree,
-				Indexer          *indexer)
+				TrackerIndexer   *indexer)
 {
 	TrackerQueryTreePrivate *priv;
 
 	g_return_if_fail (TRACKER_IS_QUERY_TREE (tree));
-	g_return_if_fail (indexer != NULL);
+	g_return_if_fail (TRACKER_IS_INDEXER (indexer));
 
 	priv = TRACKER_QUERY_TREE_GET_PRIVATE (tree);
+
+	if (indexer) {
+		g_object_ref (indexer);
+	}
+
+	if (priv->indexer) {
+		g_object_unref (priv->indexer);
+	}
+
 	priv->indexer = indexer;
 
 	g_object_notify (G_OBJECT (tree), "indexer");
 }
 
-Indexer *
+TrackerIndexer *
 tracker_query_tree_get_indexer (TrackerQueryTree *tree)
 {
 	TrackerQueryTreePrivate *priv;
@@ -641,10 +651,14 @@
 }
 
 static gint
-get_idf_score (WordDetails *details, float idf)
+get_idf_score (TrackerIndexerWordDetails *details, 
+               gfloat                     idf)
 {
-	guint32 score = tracker_word_details_get_score (details);
-	float f = idf * score * SCORE_MULTIPLIER;
+	guint32 score;
+	gfloat  f;
+
+        score = tracker_indexer_word_details_get_score (details);
+        f = idf * score * SCORE_MULTIPLIER;
 
         return (f > 1.0) ? lrintf (f) : 1;
 }
@@ -676,8 +690,8 @@
 		      const gchar      *term)
 {
 	TrackerQueryTreePrivate *priv;
+	TrackerIndexerWordDetails *details;
 	GHashTable *result;
-	WordDetails *details;
 	guint count, i;
 
 	priv = TRACKER_QUERY_TREE_GET_PRIVATE (tree);
@@ -693,7 +707,7 @@
 		SearchHitData *data;
 		gint service;
 
-		service = tracker_word_details_get_service_type (&details[i]);
+		service = tracker_indexer_word_details_get_service_type (&details[i]);
 
 		if (in_array (priv->services, service)) {
 			data = g_slice_new (SearchHitData);

Modified: branches/xesam-support/src/trackerd/tracker-query-tree.h
==============================================================================
--- branches/xesam-support/src/trackerd/tracker-query-tree.h	(original)
+++ branches/xesam-support/src/trackerd/tracker-query-tree.h	Thu Jun 26 12:35:12 2008
@@ -65,16 +65,16 @@
 
 GType                 tracker_query_tree_get_type       (void);
 TrackerQueryTree *    tracker_query_tree_new            (const gchar      *query_str,
-                                                         Indexer          *indexer,
+                                                         TrackerIndexer   *indexer,
 							 TrackerConfig    *config,
 							 TrackerLanguage  *language,
                                                          GArray           *services);
 G_CONST_RETURN gchar *tracker_query_tree_get_query      (TrackerQueryTree *tree);
 void                  tracker_query_tree_set_query      (TrackerQueryTree *tree,
                                                          const gchar      *query_str);
-Indexer *             tracker_query_tree_get_indexer    (TrackerQueryTree *tree);
+TrackerIndexer *      tracker_query_tree_get_indexer    (TrackerQueryTree *tree);
 void                  tracker_query_tree_set_indexer    (TrackerQueryTree *tree,
-							 Indexer          *indexer);
+							 TrackerIndexer   *indexer);
 TrackerConfig *       tracker_query_tree_get_config     (TrackerQueryTree *tree);
 void                  tracker_query_tree_set_config     (TrackerQueryTree *tree,
                                                          TrackerConfig    *config);

Modified: branches/xesam-support/src/trackerd/tracker-search.c
==============================================================================
--- branches/xesam-support/src/trackerd/tracker-search.c	(original)
+++ branches/xesam-support/src/trackerd/tracker-search.c	Thu Jun 26 12:35:12 2008
@@ -50,8 +50,8 @@
 	DBusGProxy      *fd_proxy;
 	TrackerConfig   *config;
 	TrackerLanguage *language;
-        Indexer         *file_index;
-        Indexer         *email_index;
+        TrackerIndexer  *file_index;
+        TrackerIndexer  *email_index;
 } TrackerSearchPriv;
 
 enum {
@@ -217,8 +217,8 @@
 }
 
 void
-tracker_search_set_file_index (TrackerSearch *object,
-			       Indexer        *file_index)
+tracker_search_set_file_index (TrackerSearch  *object,
+			       TrackerIndexer *file_index)
 {
 	TrackerSearchPriv *priv;
 
@@ -233,8 +233,8 @@
 }
 
 void
-tracker_search_set_email_index (TrackerSearch *object,
-				Indexer       *email_index)
+tracker_search_set_email_index (TrackerSearch  *object,
+				TrackerIndexer *email_index)
 {
 	TrackerSearchPriv *priv;
 

Modified: branches/xesam-support/src/trackerd/tracker-search.h
==============================================================================
--- branches/xesam-support/src/trackerd/tracker-search.h	(original)
+++ branches/xesam-support/src/trackerd/tracker-search.h	Thu Jun 26 12:35:12 2008
@@ -59,9 +59,9 @@
 void           tracker_search_set_language      (TrackerSearch     *object,
 						 TrackerLanguage   *language);
 void           tracker_search_set_file_index    (TrackerSearch     *object,
-						 Indexer           *file_index);
+						 TrackerIndexer    *file_index);
 void           tracker_search_set_email_index   (TrackerSearch     *object,
-						 Indexer           *email_index);
+						 TrackerIndexer    *email_index);
 gboolean       tracker_search_get_hit_count     (TrackerSearch     *object,
 						 const gchar       *service,
 						 const gchar       *search_text,

Modified: branches/xesam-support/src/trackerd/tracker-xesam-manager.c
==============================================================================
--- branches/xesam-support/src/trackerd/tracker-xesam-manager.c	(original)
+++ branches/xesam-support/src/trackerd/tracker-xesam-manager.c	Thu Jun 26 12:35:12 2008
@@ -391,7 +391,7 @@
 	u = 0;
 #endif
 
-	r = rand ();
+	r = g_random_int ();
 	key = g_strdup_printf ("%ut%uut%uu%up%ur%uk%u",
 			       serial, t, ut, u, p, r,
 			       GPOINTER_TO_UINT (&key));



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]