tracker r1257 - in trunk: . src/trackerd
- From: ifrade svn gnome org
- To: svn-commits-list gnome org
- Subject: tracker r1257 - in trunk: . src/trackerd
- Date: Tue, 8 Apr 2008 20:45:03 +0100 (BST)
Author: ifrade
Date: Tue Apr 8 20:45:03 2008
New Revision: 1257
URL: http://svn.gnome.org/viewvc/tracker?rev=1257&view=rev
Log:
Patch Bug #525403. Isolating indexer
Modified:
trunk/ChangeLog
trunk/src/trackerd/tracker-cache.c
trunk/src/trackerd/tracker-dbus-search.c
trunk/src/trackerd/tracker-indexer.c
trunk/src/trackerd/tracker-indexer.h
trunk/src/trackerd/tracker-utils.h
trunk/src/trackerd/trackerd.c
Modified: trunk/src/trackerd/tracker-cache.c
==============================================================================
--- trunk/src/trackerd/tracker-cache.c (original)
+++ trunk/src/trackerd/tracker-cache.c Tue Apr 8 20:45:03 2008
@@ -65,7 +65,7 @@
break;
}
- indexer = tracker_indexer_open (temp_file_name);
+ indexer = tracker_indexer_open (temp_file_name, FALSE);
g_free (temp_file_name);
@@ -162,7 +162,7 @@
/* determine is index has been written to significantly before and create new ones if so */
if (tracker_indexer_size (tracker->file_index) > 4000000) {
index_con.file_index = create_merge_index ("file-index.tmp.");
- tracker_log ("flushing to %s", dpname (index_con.file_index->word_index));
+ tracker_log ("flushing to %s", tracker_indexer_get_name (index_con.file_index));
using_file_tmp = TRUE;
} else {
index_con.file_index = tracker->file_index;
@@ -170,7 +170,7 @@
if (tracker_indexer_size (tracker->email_index) > 4000000) {
index_con.email_index = create_merge_index ("email-index.tmp.");
- tracker_log ("flushing to %s", dpname (index_con.email_index->word_index));
+ tracker_log ("flushing to %s", tracker_indexer_get_name (index_con.email_index));
using_email_tmp = TRUE;
} else {
index_con.email_index = tracker->email_index;
Modified: trunk/src/trackerd/tracker-dbus-search.c
==============================================================================
--- trunk/src/trackerd/tracker-dbus-search.c (original)
+++ trunk/src/trackerd/tracker-dbus-search.c Tue Apr 8 20:45:03 2008
@@ -858,97 +858,6 @@
}
-/* int levenshtein ()
- * Original license: GNU Lesser Public License
- * from the Dixit project, (http://dixit.sourceforge.net/)
- * Author: Octavian Procopiuc <oprocopiuc gmail com>
- * Created: July 25, 2004
- * Copied into tracker, by Edward Duffy
- */
-
-static int
-levenshtein(char *source, char *target, int maxdist)
-{
- char n, m;
- int l;
- l = strlen (source);
- if (l > 50)
- return -1;
- n = l;
-
- l = strlen (target);
- if (l > 50)
- return -1;
- m = l;
-
- if (maxdist == 0)
- maxdist = MAX(m, n);
- if (n == 0)
- return MIN(m, maxdist);
- if (m == 0)
- return MIN(n, maxdist);
-
- // Store the min. value on each column, so that, if it reaches
- // maxdist, we break early.
- char mincolval;
-
- char matrix[51][51];
-
- char j;
- char i;
- char cell;
-
- for (j = 0; j <= m; j++)
- matrix[0][(int)j] = j;
-
- for (i = 1; i <= n; i++) {
-
- mincolval = MAX(m, i);
- matrix[(int)i][0] = i;
-
- char s_i = source[i-1];
-
- for (j = 1; j <= m; j++) {
-
- char t_j = target[j-1];
-
- char cost = (s_i == t_j ? 0 : 1);
-
- char above = matrix[i-1][(int)j];
- char left = matrix[(int)i][j-1];
- char diag = matrix[i-1][j-1];
- cell = MIN(above + 1, MIN(left + 1, diag + cost));
-
- // Cover transposition, in addition to deletion,
- // insertion and substitution. This step is taken from:
- // Berghel, Hal ; Roach, David : "An Extension of Ukkonen's
- // Enhanced Dynamic Programming ASM Algorithm"
- // (http://www.acm.org/~hlb/publications/asm/asm.html)
-
- if (i > 2 && j > 2) {
- char trans = matrix[i-2][j-2] + 1;
- if (source[i-2] != t_j)
- trans++;
- if (s_i != target[j-2])
- trans++;
- if (cell > trans)
- cell = trans;
- }
-
- mincolval = MIN(mincolval, cell);
- matrix[(int)i][(int)j] = cell;
- }
-
- if (mincolval >= maxdist)
- break;
-
- }
-
- if (i == n + 1)
- return (int) matrix[(int)n][(int)m];
- else
- return maxdist;
-}
void
@@ -956,14 +865,9 @@
{
DBusError dbus_error;
DBusMessage *reply;
- gchar *term, *str;
+ gchar *term;
gint maxdist;
- gint dist, tsiz;
gchar *winner_str;
- gint winner_dist;
- char *tmp;
- int hits;
- GTimeVal start, current;
/*
<method name="Suggest">
@@ -983,60 +887,14 @@
return;
}
- winner_str = NULL;
- winner_dist = -1; /* to initialize winner_dist with something */
-
-
Indexer *index = tracker->file_index;
- dpiterinit (index->word_index);
-
- g_get_current_time (&start);
+ winner_str = tracker_indexer_get_suggestion (index, term, maxdist);
- str = dpiternext (index->word_index, NULL);
- while (str != NULL) {
- dist = levenshtein (term, str, 0);
- if (dist != -1 && dist < maxdist) {
- hits = 0;
- if ((tmp = dpget (index->word_index, str, -1, 0, -1, &tsiz)) != NULL) {
- hits = tsiz / sizeof (WordDetails);
- free (tmp);
- if (tsiz % sizeof (WordDetails) != 0) {
- tracker_set_error (rec, "Possible data error from dpget Aborting tracker_dbus_method_search_suggest.");
- g_free (str);
- if (winner_str) {
- g_free (winner_str);
- }
- return;
- }
- }
- if (hits > 0) {
- if (winner_str == NULL) {
- winner_str = strdup (str);
- winner_dist = dist;
- }
- else if (dist < winner_dist) {
- free (winner_str);
- winner_str = strdup (str);
- winner_dist = dist;
- }
- }
- else {
- tracker_log ("No hits for %s!", str);
- }
- }
- free (str);
- g_get_current_time (¤t);
- if (current.tv_sec - start.tv_sec >= 2) { /* 2 second time out */
- tracker_log ("Timeout in tracker_dbus_method_search_suggest");
- break;
- }
- str = dpiternext (index->word_index, NULL);
- }
-
- if (winner_str == NULL) {
- winner_str = strdup (term);
- }
+ if (!winner_str) {
+ tracker_set_error (rec, "Possible data error in index. Aborting tracker_dbus_method_search_suggest.");
+ return;
+ }
tracker_log ("Suggested spelling for %s is %s.", term, winner_str);
Modified: trunk/src/trackerd/tracker-indexer.c
==============================================================================
--- trunk/src/trackerd/tracker-indexer.c (original)
+++ trunk/src/trackerd/tracker-indexer.c Tue Apr 8 20:45:03 2008
@@ -41,7 +41,8 @@
#include <unistd.h>
#include <string.h>
#include <math.h>
-#include <sqlite3.h>
+
+#include <depot.h>
#include <glib.h>
#include <glib/gstdio.h>
@@ -56,6 +57,16 @@
extern Tracker *tracker;
+struct Indexer_ {
+ DEPOT *word_index; /* file hashtable handle for the word -> {serviceID, ServiceTypeID, Score} */
+ GMutex *word_mutex;
+ char *name;
+ gpointer emails; /* pointer to email indexer */
+ gpointer data; /* pointer to file indexer */
+ gboolean main_index;
+ gboolean needs_merge; /* should new stuff be added directly or merged later on from a new index */
+};
+
static inline gint16
get_score (WordDetails *details)
{
@@ -257,7 +268,7 @@
}
Indexer *
-tracker_indexer_open (const gchar *name)
+tracker_indexer_open (const gchar *name, gboolean main_index)
{
char *word_dir;
DEPOT *word_index;
@@ -273,7 +284,7 @@
result = g_new0 (Indexer, 1);
- result->main_index = FALSE;
+ result->main_index = main_index;
result->needs_merge = FALSE;
@@ -351,6 +362,14 @@
}
+const gchar *
+tracker_indexer_get_name (Indexer *indexer)
+{
+ g_return_val_if_fail (indexer != NULL, NULL);
+
+ return dpname (indexer->word_index);
+}
+
guint32
tracker_indexer_size (Indexer *indexer)
@@ -499,7 +518,7 @@
tracker_indexer_free (src, TRUE);
if (update) {
- tracker->file_update_index = tracker_indexer_open ("file-update-index.db");
+ tracker->file_update_index = tracker_indexer_open ("file-update-index.db", FALSE);
}
tracker->in_merge = FALSE;
@@ -663,7 +682,7 @@
if (g_file_test (file->data, G_FILE_TEST_EXISTS)) {
- Indexer *tmp_index = tracker_indexer_open (name);
+ Indexer *tmp_index = tracker_indexer_open (name, FALSE);
if (tmp_index) {
index_list = g_slist_prepend (index_list, tmp_index);
}
@@ -712,9 +731,9 @@
tracker_dbus_send_index_status_change_signal ();
if (type == INDEX_TYPE_FILES) {
- final_index = tracker_indexer_open ("file-index-final");
+ final_index = tracker_indexer_open ("file-index-final", TRUE);
} else {
- final_index = tracker_indexer_open ("email-index-final");
+ final_index = tracker_indexer_open ("email-index-final", TRUE);
}
if (!final_index) {
@@ -1770,3 +1789,173 @@
return query->hit_count;
}
}
+
+/* int levenshtein ()
+ * Original license: GNU Lesser Public License
+ * from the Dixit project, (http://dixit.sourceforge.net/)
+ * Author: Octavian Procopiuc <oprocopiuc gmail com>
+ * Created: July 25, 2004
+ * Copied into tracker, by Edward Duffy
+ */
+
+static int
+levenshtein(const char *source, char *target, int maxdist)
+{
+ char n, m;
+ int l;
+ l = strlen (source);
+ if (l > 50)
+ return -1;
+ n = l;
+
+ l = strlen (target);
+ if (l > 50)
+ return -1;
+ m = l;
+
+ if (maxdist == 0)
+ maxdist = MAX(m, n);
+ if (n == 0)
+ return MIN(m, maxdist);
+ if (m == 0)
+ return MIN(n, maxdist);
+
+ // Store the min. value on each column, so that, if it reaches
+ // maxdist, we break early.
+ char mincolval;
+
+ char matrix[51][51];
+
+ char j;
+ char i;
+ char cell;
+
+ for (j = 0; j <= m; j++)
+ matrix[0][(int)j] = j;
+
+ for (i = 1; i <= n; i++) {
+
+ mincolval = MAX(m, i);
+ matrix[(int)i][0] = i;
+
+ char s_i = source[i-1];
+
+ for (j = 1; j <= m; j++) {
+
+ char t_j = target[j-1];
+
+ char cost = (s_i == t_j ? 0 : 1);
+
+ char above = matrix[i-1][(int)j];
+ char left = matrix[(int)i][j-1];
+ char diag = matrix[i-1][j-1];
+ cell = MIN(above + 1, MIN(left + 1, diag + cost));
+
+ // Cover transposition, in addition to deletion,
+ // insertion and substitution. This step is taken from:
+ // Berghel, Hal ; Roach, David : "An Extension of Ukkonen's
+ // Enhanced Dynamic Programming ASM Algorithm"
+ // (http://www.acm.org/~hlb/publications/asm/asm.html)
+
+ if (i > 2 && j > 2) {
+ char trans = matrix[i-2][j-2] + 1;
+ if (source[i-2] != t_j)
+ trans++;
+ if (s_i != target[j-2])
+ trans++;
+ if (cell > trans)
+ cell = trans;
+ }
+
+ mincolval = MIN(mincolval, cell);
+ matrix[(int)i][(int)j] = cell;
+ }
+
+ if (mincolval >= maxdist)
+ break;
+
+ }
+
+ if (i == n + 1)
+ return (int) matrix[(int)n][(int)m];
+ else
+ return maxdist;
+}
+
+static int
+count_hits_for_word (Indexer *indexer, const gchar *str) {
+
+ gint tsiz, hits = 0;
+
+ tsiz = count_hit_size_for_word (indexer, str);
+
+ if (tsiz == -1 || tsiz % sizeof (WordDetails) != 0) {
+ return -1;
+ }
+
+ hits = tsiz / sizeof (WordDetails);
+
+ return hits;
+}
+
+
+char *
+tracker_indexer_get_suggestion (Indexer *indexer, const gchar *term, gint maxdist)
+{
+
+ gchar *str;
+ gint dist;
+ gchar *winner_str;
+ gint winner_dist;
+ gint hits;
+ GTimeVal start, current;
+
+ winner_str = g_strdup (term);
+ winner_dist = G_MAXINT; /* Initialize to the worst case */
+
+ dpiterinit (indexer->word_index);
+
+ g_get_current_time (&start);
+
+ str = dpiternext (indexer->word_index, NULL);
+
+ while (str != NULL) {
+
+ dist = levenshtein (term, str, 0);
+
+ if (dist != -1 && dist < maxdist && dist < winner_dist) {
+
+ hits = count_hits_for_word (indexer, str);
+
+ if (hits < 0) {
+
+ g_free (winner_str);
+ g_free (str);
+ return NULL;
+
+ } else if (hits > 0) {
+
+ g_free (winner_str);
+ winner_str = g_strdup (str);
+ winner_dist = dist;
+
+ } else {
+ tracker_log ("No hits for %s!", str);
+ }
+ }
+
+ g_free (str);
+
+ g_get_current_time (¤t);
+
+ if (current.tv_sec - start.tv_sec >= 2) { /* 2 second time out */
+ tracker_log ("Timeout in tracker_dbus_method_search_suggest");
+ break;
+ }
+
+ str = dpiternext (indexer->word_index, NULL);
+ }
+
+ return winner_str;
+}
+
Modified: trunk/src/trackerd/tracker-indexer.h
==============================================================================
--- trunk/src/trackerd/tracker-indexer.h (original)
+++ trunk/src/trackerd/tracker-indexer.h Tue Apr 8 20:45:03 2008
@@ -25,9 +25,12 @@
#include <stdlib.h>
#include <glib.h>
-#include <depot.h>
-#include "tracker-utils.h"
+typedef struct { /* type of structure for an element of search result */
+ guint32 id; /* Service ID number of the document */
+ int amalgamated; /* amalgamation of service_type and score of the word in the document's metadata */
+} WordDetails;
+
typedef struct {
guint32 service_id; /* Service ID of the document */
@@ -51,17 +54,7 @@
} SearchWord;
-typedef struct {
- DEPOT *word_index; /* file hashtable handle for the word -> {serviceID, ServiceTypeID, Score} */
- GMutex *word_mutex;
- char *name;
- gpointer emails; /* pointer to email indexer */
- gpointer data; /* pointer to file indexer */
- gboolean main_index;
- gboolean needs_merge; /* should new stuff be added directly or merged later on from a new index */
-} Indexer;
-
-
+typedef struct Indexer_ Indexer;
typedef struct {
Indexer *indexer;
@@ -99,13 +92,13 @@
guint32 tracker_indexer_calc_amalgamated (gint service, gint score);
void tracker_index_free_hit_list (GSList *hit_list);
-Indexer * tracker_indexer_open (const gchar *name);
+Indexer * tracker_indexer_open (const gchar *name, gboolean main_index);
void tracker_indexer_close (Indexer *indexer);
gboolean tracker_indexer_repair (const char *name);
void tracker_indexer_free (Indexer *indexer, gboolean remove_file);
gboolean tracker_indexer_has_merge_index (Indexer *indexer, gboolean update);
-
+const gchar * tracker_indexer_get_name (Indexer *indexer);
guint32 tracker_indexer_size (Indexer *indexer);
gboolean tracker_indexer_optimize (Indexer *indexer);
void tracker_indexer_sync (Indexer *indexer);
@@ -131,5 +124,6 @@
gboolean tracker_remove_dud_hits (Indexer *indexer, const gchar *word, GSList *dud_list);
+char * tracker_indexer_get_suggestion (Indexer *indexer, const gchar *term, gint maxdist);
#endif
Modified: trunk/src/trackerd/tracker-utils.h
==============================================================================
--- trunk/src/trackerd/tracker-utils.h (original)
+++ trunk/src/trackerd/tracker-utils.h Tue Apr 8 20:45:03 2008
@@ -56,26 +56,11 @@
#define OPTIMIZATION_COUNT 10000
#define MAX_WORDS_TO_INDEX 10000
-/* default indexer options */
-#define MIN_INDEX_BUCKET_COUNT 131072 /* minimum bucket number of word index per division (total buckets = INDEXBNUM * INDEXDIV) */
-#define INDEX_DIVISIONS 4 /* no. of divisions of file */
-#define MAX_INDEX_BUCKET_COUNT 262144 /* max no of buckets to use */
-#define INDEX_BUCKET_RATIO 1 /* desired ratio of unused buckets to have (range 0 to 4)*/
-#define INDEX_PADDING 2
-
-
-typedef struct { /* type of structure for an element of search result */
- guint32 id; /* Service ID number of the document */
- int amalgamated; /* amalgamation of service_type and score of the word in the document's metadata */
-} WordDetails;
-
-
typedef struct {
int id; /* word ID of the cached word */
int count; /* cummulative count of the cached word */
} CacheWord;
-
typedef enum {
DATA_KEYWORD,
DATA_INDEX,
Modified: trunk/src/trackerd/trackerd.c
==============================================================================
--- trunk/src/trackerd/trackerd.c (original)
+++ trunk/src/trackerd/trackerd.c Tue Apr 8 20:45:03 2008
@@ -1170,16 +1170,13 @@
- Indexer *index = tracker_indexer_open ("file-index.db");
- index->main_index = TRUE;
+ Indexer *index = tracker_indexer_open ("file-index.db", TRUE);
tracker->file_index = index;
- index = tracker_indexer_open ("file-update-index.db");
- index->main_index = FALSE;
+ index = tracker_indexer_open ("file-update-index.db", FALSE);
tracker->file_update_index = index;
- index = tracker_indexer_open ("email-index.db");
- index->main_index = TRUE;
+ index = tracker_indexer_open ("email-index.db", TRUE);
tracker->email_index = index;
db_con->word_index = tracker->file_index;
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]