tracker r1549 - in branches/indexer-split: . src/tracker-indexer



Author: carlosg
Date: Fri May 30 13:18:05 2008
New Revision: 1549
URL: http://svn.gnome.org/viewvc/tracker?rev=1549&view=rev

Log:
2008-05-30  Carlos Garnacho  <carlos imendio com>

        * src/tracker-indexer/tracker-index.[ch]: New files. Provide methods
        to store data in the QDBM index.
        * src/tracker-indexer/Makefile.am: Added these files to build.
        * src/tracker-indexer/tracker-indexer.c: Pass all metadata contents
        through the parser and store all the results with TrackerIndex.


Added:
   branches/indexer-split/src/tracker-indexer/tracker-index.c
   branches/indexer-split/src/tracker-indexer/tracker-index.h
Modified:
   branches/indexer-split/ChangeLog
   branches/indexer-split/src/tracker-indexer/Makefile.am
   branches/indexer-split/src/tracker-indexer/tracker-indexer.c

Modified: branches/indexer-split/src/tracker-indexer/Makefile.am
==============================================================================
--- branches/indexer-split/src/tracker-indexer/Makefile.am	(original)
+++ branches/indexer-split/src/tracker-indexer/Makefile.am	Fri May 30 13:18:05 2008
@@ -18,6 +18,8 @@
 	tracker-dbus.h							\
 	tracker-ioprio.c						\
 	tracker-ioprio.h						\
+	tracker-index.c							\
+	tracker-index.h							\
 	tracker-indexer.c						\
 	tracker-indexer.h						\
 	tracker-indexer-db.c						\

Added: branches/indexer-split/src/tracker-indexer/tracker-index.c
==============================================================================
--- (empty file)
+++ branches/indexer-split/src/tracker-indexer/tracker-index.c	Fri May 30 13:18:05 2008
@@ -0,0 +1,161 @@
+/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/*
+ * Copyright (C) 2006, Mr Jamie McCracken (jamiemcc gnome org)
+ * Copyright (C) 2008, Nokia
+
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ */
+
+#include <glib.h>
+#include <qdbm/depot.h>
+
+#include "tracker-index.h"
+
+#define MAX_HIT_BUFFER 480000
+
+typedef struct TrackerIndexElement TrackerIndexElement;
+
+struct TrackerIndex {
+	GHashTable *cache;
+	DEPOT *index;
+};
+
+struct TrackerIndexElement {
+	guint32 id;          /* Service ID number of the
+			      * document */
+	guint32 amalgamated; /* amalgamation of
+			      * service_type and score of
+			      * the word in the document's
+			      * metadata */
+};
+
+static guint32
+tracker_index_calc_amalgamated (gint service,
+				gint weight)
+{
+	unsigned char a[4];
+	gint16 score16;
+	guint8 service_type;
+
+	score16 = (gint16) MIN (weight, 30000);
+	service_type = (guint8) service;
+
+	/* amalgamate and combine score and service_type
+	 * into a single 32-bit int for compact storage
+	 */
+	a[0] = service_type;
+	a[1] = (score16 >> 8 ) & 0xFF ;
+	a[2] = score16 & 0xFF ;
+	a[3] = 0;
+
+	return (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3];
+}
+
+static void
+free_cache_values (GArray *array)
+{
+	g_array_free (array, TRUE);
+}
+
+TrackerIndex *
+tracker_index_new (const gchar *file,
+		   gint         bucket_count)
+{
+	TrackerIndex *index;
+
+	index = g_new0 (TrackerIndex, 1);
+	index->cache = g_hash_table_new_full (g_str_hash,
+					      g_str_equal,
+					      (GDestroyNotify) g_free,
+					      (GDestroyNotify) free_cache_values);
+
+	index->index = dpopen (file, DP_OWRITER | DP_OCREAT | DP_ONOLCK, bucket_count);
+
+	return index;
+}
+
+void
+tracker_index_free (TrackerIndex *index)
+{
+	g_hash_table_destroy (index->cache);
+
+	if (!dpclose (index->index)) {
+		g_warning ("Could not close index: %s", dperrmsg (dpecode));
+	}
+
+	g_free (index);
+}
+
+void
+tracker_index_add_word (TrackerIndex *index,
+			const gchar  *word,
+			guint32       service_id,
+			gint          service_type,
+			gint          weight)
+{
+	TrackerIndexElement elem;
+	GArray *array;
+
+	elem.id = service_id;
+	elem.amalgamated = tracker_index_calc_amalgamated (service_type, weight);
+
+	array = g_hash_table_lookup (index->cache, word);
+
+	if (!array) {
+		/* create the array if it didn't exist */
+		array = g_array_new (FALSE, TRUE, sizeof (TrackerIndexElement));
+		g_hash_table_insert (index->cache, g_strdup (word), array);
+	}
+
+	g_array_append_val (array, elem);
+}
+
+static gboolean
+cache_flush_foreach (gpointer key,
+		     gpointer value,
+		     gpointer user_data)
+{
+	GArray *array;
+	DEPOT *index;
+	gchar *word, *tmp;
+	gint table_size;
+
+	word = (gchar *) key;
+	array = (GArray *) value;
+	index = (DEPOT *) user_data;
+
+#if 0
+	if ((tmp = dpget (index, word, -1, 0, MAX_HIT_BUFFER, &table_size)) != NULL) {
+		/* FIXME: missing merge with previous values */
+	}
+#endif
+
+	if (!dpput (index, word, -1, (char *) array->data, (array->len * sizeof (TrackerIndexElement)), DP_DCAT)) {
+		g_warning ("Could not store word: %s", word);
+		return FALSE;
+	}
+
+	/* Mark element for removal */
+	return TRUE;
+}
+
+void
+tracker_index_flush (TrackerIndex *index)
+{
+	g_message ("Flushing index");
+
+	g_hash_table_foreach_remove (index->cache, cache_flush_foreach, index->index);
+}

Added: branches/indexer-split/src/tracker-indexer/tracker-index.h
==============================================================================
--- (empty file)
+++ branches/indexer-split/src/tracker-indexer/tracker-index.h	Fri May 30 13:18:05 2008
@@ -0,0 +1,46 @@
+/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/*
+ * Copyright (C) 2006, Mr Jamie McCracken (jamiemcc gnome org)
+ * Copyright (C) 2008, Nokia
+
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __TRACKER_INDEX_H__
+#define __TRACKER_INDEX_H__
+
+#include <glib.h>
+
+G_BEGIN_DECLS
+
+typedef struct TrackerIndex TrackerIndex; /* opaque pointer */
+
+TrackerIndex * tracker_index_new      (const gchar *file,
+				       gint         bucket_count);
+void           tracker_index_free     (TrackerIndex *index);
+
+void           tracker_index_add_word (TrackerIndex *index,
+				       const gchar  *word,
+				       guint32       service_id,
+				       gint          service_type,
+				       gint          weight);
+
+void           tracker_index_flush    (TrackerIndex *index);
+
+
+G_END_DECLS
+
+#endif /* __TRACKER_INDEX_H__ */

Modified: branches/indexer-split/src/tracker-indexer/tracker-indexer.c
==============================================================================
--- branches/indexer-split/src/tracker-indexer/tracker-indexer.c	(original)
+++ branches/indexer-split/src/tracker-indexer/tracker-indexer.c	Fri May 30 13:18:05 2008
@@ -48,19 +48,21 @@
 
 #include <libtracker-common/tracker-config.h>
 #include <libtracker-common/tracker-file-utils.h>
+#include <libtracker-common/tracker-language.h>
+#include <libtracker-common/tracker-parser.h>
 #include <libtracker-common/tracker-ontology.h>
 #include <libtracker-db/tracker-db-interface-sqlite.h>
 
-#include <qdbm/depot.h>
-
 #include "tracker-indexer.h"
 #include "tracker-indexer-module.h"
 #include "tracker-indexer-db.h"
+#include "tracker-index.h"
 
 #define TRACKER_INDEXER_GET_PRIVATE(o) (G_TYPE_INSTANCE_GET_PRIVATE ((o), TRACKER_TYPE_INDEXER, TrackerIndexerPrivate))
 
 typedef struct TrackerIndexerPrivate TrackerIndexerPrivate;
 typedef struct PathInfo PathInfo;
+typedef struct MetadataForeachData MetadataForeachData;
 
 struct TrackerIndexerPrivate {
 	GQueue *dir_queue;
@@ -72,12 +74,13 @@
 
 	gchar *db_dir;
 
-	DEPOT *index;
+	TrackerIndex *index;
 	TrackerDBInterface *metadata;
 	TrackerDBInterface *contents;
 	TrackerDBInterface *common;
 
 	TrackerConfig *config;
+	TrackerLanguage *language;
 
 	guint idle_id;
 
@@ -89,6 +92,14 @@
 	gchar *path;
 };
 
+struct MetadataForeachData {
+	TrackerIndex *index;
+	TrackerLanguage *language;
+	TrackerConfig *config;
+	TrackerService *service;
+	guint32 id;
+};
+
 enum {
 	PROP_0,
 	PROP_RUNNING,
@@ -142,6 +153,11 @@
 	g_hash_table_destroy (priv->indexer_modules);
 
 	g_object_unref (priv->config);
+	g_object_unref (priv->language);
+
+	if (priv->index) {
+		tracker_index_free (priv->index);
+	}
 
 	if (priv->common) {
 		g_object_unref (priv->common);
@@ -237,6 +253,7 @@
 init_indexer (TrackerIndexer *indexer)
 {
 	TrackerIndexerPrivate *priv;
+	gchar *index_file;
 
 	priv = TRACKER_INDEXER_GET_PRIVATE (indexer);
 
@@ -244,10 +261,17 @@
 		tracker_dir_remove (priv->db_dir);
 	}
 
+	index_file = g_build_filename (priv->db_dir, "file-index.db", NULL);
+
+	priv->index = tracker_index_new (index_file,
+					 tracker_config_get_max_bucket_count (priv->config));
 	priv->common = tracker_indexer_db_get_common ();
 	priv->metadata = tracker_indexer_db_get_file_metadata ();
 
 	tracker_indexer_set_running (indexer, TRUE);
+
+	g_free (index_file);
+
 	return FALSE;
 }
 
@@ -263,6 +287,7 @@
 	priv->dir_queue = g_queue_new ();
 	priv->file_process_queue = g_queue_new ();
 	priv->config = tracker_config_new ();
+	priv->language = tracker_language_new (priv->config);
 
 	priv->db_dir = g_build_filename (g_get_user_cache_dir (),
 					 "tracker", NULL);
@@ -337,6 +362,62 @@
 }
 
 static void
+index_metadata_foreach (gpointer key,
+			gpointer value,
+			gpointer user_data)
+{
+	TrackerField *field;
+	MetadataForeachData *data;
+	gchar **arr;
+	gint i;
+
+	if (!value) {
+		return;
+	}
+
+	field = tracker_ontology_get_field_def ((gchar *) key);
+
+	data = (MetadataForeachData *) user_data;
+	arr = tracker_parser_text_into_array ((gchar *) value,
+					      data->language,
+					      tracker_config_get_max_word_length (data->config),
+					      tracker_config_get_min_word_length (data->config));
+
+	for (i = 0; arr[i]; i++) {
+		tracker_index_add_word (data->index,
+					arr[i],
+					data->id,
+					tracker_service_get_id (data->service),
+					tracker_field_get_weight (field));
+	}
+
+	g_strfreev (arr);
+}
+
+static void
+index_metadata (TrackerIndexer *indexer,
+		guint32         id,
+		TrackerService *service,
+		GHashTable     *metadata)
+{
+	TrackerIndexerPrivate *priv;
+	MetadataForeachData data;
+
+	priv = TRACKER_INDEXER_GET_PRIVATE (indexer);
+
+	data.index = priv->index;
+	data.language = priv->language;
+	data.config = priv->config;
+	data.service = service;
+	data.id = id;
+
+	g_hash_table_foreach (metadata, index_metadata_foreach, &data);
+
+	/* FIXME: flushing after adding each metadata set, not ideal */
+	tracker_index_flush (priv->index);
+}
+
+static void
 process_file (TrackerIndexer *indexer,
 	      PathInfo       *info)
 {
@@ -365,6 +446,8 @@
 			if (tracker_config_get_enable_xesam (tracker->config))
 				tracker_db_create_event (db_con, id, "Create");
 			*/
+
+			index_metadata (indexer, id, service, metadata);
 		}
 
 		g_hash_table_destroy (metadata);



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]