tracker r1549 - in branches/indexer-split: . src/tracker-indexer
- From: carlosg svn gnome org
- To: svn-commits-list gnome org
- Subject: tracker r1549 - in branches/indexer-split: . src/tracker-indexer
- Date: Fri, 30 May 2008 13:18:05 +0000 (UTC)
Author: carlosg
Date: Fri May 30 13:18:05 2008
New Revision: 1549
URL: http://svn.gnome.org/viewvc/tracker?rev=1549&view=rev
Log:
2008-05-30 Carlos Garnacho <carlos imendio com>
* src/tracker-indexer/tracker-index.[ch]: New files. Provide methods
to store data in the QDBM index.
* src/tracker-indexer/Makefile.am: Added these files to build.
* src/tracker-indexer/tracker-indexer.c: Pass all metadata contents
through the parser and store all the results with TrackerIndex.
Added:
branches/indexer-split/src/tracker-indexer/tracker-index.c
branches/indexer-split/src/tracker-indexer/tracker-index.h
Modified:
branches/indexer-split/ChangeLog
branches/indexer-split/src/tracker-indexer/Makefile.am
branches/indexer-split/src/tracker-indexer/tracker-indexer.c
Modified: branches/indexer-split/src/tracker-indexer/Makefile.am
==============================================================================
--- branches/indexer-split/src/tracker-indexer/Makefile.am (original)
+++ branches/indexer-split/src/tracker-indexer/Makefile.am Fri May 30 13:18:05 2008
@@ -18,6 +18,8 @@
tracker-dbus.h \
tracker-ioprio.c \
tracker-ioprio.h \
+ tracker-index.c \
+ tracker-index.h \
tracker-indexer.c \
tracker-indexer.h \
tracker-indexer-db.c \
Added: branches/indexer-split/src/tracker-indexer/tracker-index.c
==============================================================================
--- (empty file)
+++ branches/indexer-split/src/tracker-indexer/tracker-index.c Fri May 30 13:18:05 2008
@@ -0,0 +1,161 @@
+/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/*
+ * Copyright (C) 2006, Mr Jamie McCracken (jamiemcc gnome org)
+ * Copyright (C) 2008, Nokia
+
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#include <glib.h>
+#include <qdbm/depot.h>
+
+#include "tracker-index.h"
+
+#define MAX_HIT_BUFFER 480000
+
+typedef struct TrackerIndexElement TrackerIndexElement;
+
+struct TrackerIndex {
+ GHashTable *cache;
+ DEPOT *index;
+};
+
+struct TrackerIndexElement {
+ guint32 id; /* Service ID number of the
+ * document */
+ guint32 amalgamated; /* amalgamation of
+ * service_type and score of
+ * the word in the document's
+ * metadata */
+};
+
+static guint32
+tracker_index_calc_amalgamated (gint service,
+ gint weight)
+{
+ unsigned char a[4];
+ gint16 score16;
+ guint8 service_type;
+
+ score16 = (gint16) MIN (weight, 30000);
+ service_type = (guint8) service;
+
+ /* amalgamate and combine score and service_type
+ * into a single 32-bit int for compact storage
+ */
+ a[0] = service_type;
+ a[1] = (score16 >> 8 ) & 0xFF ;
+ a[2] = score16 & 0xFF ;
+ a[3] = 0;
+
+ return (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3];
+}
+
+static void
+free_cache_values (GArray *array)
+{
+ g_array_free (array, TRUE);
+}
+
+TrackerIndex *
+tracker_index_new (const gchar *file,
+ gint bucket_count)
+{
+ TrackerIndex *index;
+
+ index = g_new0 (TrackerIndex, 1);
+ index->cache = g_hash_table_new_full (g_str_hash,
+ g_str_equal,
+ (GDestroyNotify) g_free,
+ (GDestroyNotify) free_cache_values);
+
+ index->index = dpopen (file, DP_OWRITER | DP_OCREAT | DP_ONOLCK, bucket_count);
+
+ return index;
+}
+
+void
+tracker_index_free (TrackerIndex *index)
+{
+ g_hash_table_destroy (index->cache);
+
+ if (!dpclose (index->index)) {
+ g_warning ("Could not close index: %s", dperrmsg (dpecode));
+ }
+
+ g_free (index);
+}
+
+void
+tracker_index_add_word (TrackerIndex *index,
+ const gchar *word,
+ guint32 service_id,
+ gint service_type,
+ gint weight)
+{
+ TrackerIndexElement elem;
+ GArray *array;
+
+ elem.id = service_id;
+ elem.amalgamated = tracker_index_calc_amalgamated (service_type, weight);
+
+ array = g_hash_table_lookup (index->cache, word);
+
+ if (!array) {
+ /* create the array if it didn't exist */
+ array = g_array_new (FALSE, TRUE, sizeof (TrackerIndexElement));
+ g_hash_table_insert (index->cache, g_strdup (word), array);
+ }
+
+ g_array_append_val (array, elem);
+}
+
+static gboolean
+cache_flush_foreach (gpointer key,
+ gpointer value,
+ gpointer user_data)
+{
+ GArray *array;
+ DEPOT *index;
+ gchar *word, *tmp;
+ gint table_size;
+
+ word = (gchar *) key;
+ array = (GArray *) value;
+ index = (DEPOT *) user_data;
+
+#if 0
+ if ((tmp = dpget (index, word, -1, 0, MAX_HIT_BUFFER, &table_size)) != NULL) {
+ /* FIXME: missing merge with previous values */
+ }
+#endif
+
+ if (!dpput (index, word, -1, (char *) array->data, (array->len * sizeof (TrackerIndexElement)), DP_DCAT)) {
+ g_warning ("Could not store word: %s", word);
+ return FALSE;
+ }
+
+ /* Mark element for removal */
+ return TRUE;
+}
+
+void
+tracker_index_flush (TrackerIndex *index)
+{
+ g_message ("Flushing index");
+
+ g_hash_table_foreach_remove (index->cache, cache_flush_foreach, index->index);
+}
Added: branches/indexer-split/src/tracker-indexer/tracker-index.h
==============================================================================
--- (empty file)
+++ branches/indexer-split/src/tracker-indexer/tracker-index.h Fri May 30 13:18:05 2008
@@ -0,0 +1,46 @@
+/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/*
+ * Copyright (C) 2006, Mr Jamie McCracken (jamiemcc gnome org)
+ * Copyright (C) 2008, Nokia
+
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __TRACKER_INDEX_H__
+#define __TRACKER_INDEX_H__
+
+#include <glib.h>
+
+G_BEGIN_DECLS
+
+typedef struct TrackerIndex TrackerIndex; /* opaque pointer */
+
+TrackerIndex * tracker_index_new (const gchar *file,
+ gint bucket_count);
+void tracker_index_free (TrackerIndex *index);
+
+void tracker_index_add_word (TrackerIndex *index,
+ const gchar *word,
+ guint32 service_id,
+ gint service_type,
+ gint weight);
+
+void tracker_index_flush (TrackerIndex *index);
+
+
+G_END_DECLS
+
+#endif /* __TRACKER_INDEX_H__ */
Modified: branches/indexer-split/src/tracker-indexer/tracker-indexer.c
==============================================================================
--- branches/indexer-split/src/tracker-indexer/tracker-indexer.c (original)
+++ branches/indexer-split/src/tracker-indexer/tracker-indexer.c Fri May 30 13:18:05 2008
@@ -48,19 +48,21 @@
#include <libtracker-common/tracker-config.h>
#include <libtracker-common/tracker-file-utils.h>
+#include <libtracker-common/tracker-language.h>
+#include <libtracker-common/tracker-parser.h>
#include <libtracker-common/tracker-ontology.h>
#include <libtracker-db/tracker-db-interface-sqlite.h>
-#include <qdbm/depot.h>
-
#include "tracker-indexer.h"
#include "tracker-indexer-module.h"
#include "tracker-indexer-db.h"
+#include "tracker-index.h"
#define TRACKER_INDEXER_GET_PRIVATE(o) (G_TYPE_INSTANCE_GET_PRIVATE ((o), TRACKER_TYPE_INDEXER, TrackerIndexerPrivate))
typedef struct TrackerIndexerPrivate TrackerIndexerPrivate;
typedef struct PathInfo PathInfo;
+typedef struct MetadataForeachData MetadataForeachData;
struct TrackerIndexerPrivate {
GQueue *dir_queue;
@@ -72,12 +74,13 @@
gchar *db_dir;
- DEPOT *index;
+ TrackerIndex *index;
TrackerDBInterface *metadata;
TrackerDBInterface *contents;
TrackerDBInterface *common;
TrackerConfig *config;
+ TrackerLanguage *language;
guint idle_id;
@@ -89,6 +92,14 @@
gchar *path;
};
+struct MetadataForeachData {
+ TrackerIndex *index;
+ TrackerLanguage *language;
+ TrackerConfig *config;
+ TrackerService *service;
+ guint32 id;
+};
+
enum {
PROP_0,
PROP_RUNNING,
@@ -142,6 +153,11 @@
g_hash_table_destroy (priv->indexer_modules);
g_object_unref (priv->config);
+ g_object_unref (priv->language);
+
+ if (priv->index) {
+ tracker_index_free (priv->index);
+ }
if (priv->common) {
g_object_unref (priv->common);
@@ -237,6 +253,7 @@
init_indexer (TrackerIndexer *indexer)
{
TrackerIndexerPrivate *priv;
+ gchar *index_file;
priv = TRACKER_INDEXER_GET_PRIVATE (indexer);
@@ -244,10 +261,17 @@
tracker_dir_remove (priv->db_dir);
}
+ index_file = g_build_filename (priv->db_dir, "file-index.db", NULL);
+
+ priv->index = tracker_index_new (index_file,
+ tracker_config_get_max_bucket_count (priv->config));
priv->common = tracker_indexer_db_get_common ();
priv->metadata = tracker_indexer_db_get_file_metadata ();
tracker_indexer_set_running (indexer, TRUE);
+
+ g_free (index_file);
+
return FALSE;
}
@@ -263,6 +287,7 @@
priv->dir_queue = g_queue_new ();
priv->file_process_queue = g_queue_new ();
priv->config = tracker_config_new ();
+ priv->language = tracker_language_new (priv->config);
priv->db_dir = g_build_filename (g_get_user_cache_dir (),
"tracker", NULL);
@@ -337,6 +362,62 @@
}
static void
+index_metadata_foreach (gpointer key,
+ gpointer value,
+ gpointer user_data)
+{
+ TrackerField *field;
+ MetadataForeachData *data;
+ gchar **arr;
+ gint i;
+
+ if (!value) {
+ return;
+ }
+
+ field = tracker_ontology_get_field_def ((gchar *) key);
+
+ data = (MetadataForeachData *) user_data;
+ arr = tracker_parser_text_into_array ((gchar *) value,
+ data->language,
+ tracker_config_get_max_word_length (data->config),
+ tracker_config_get_min_word_length (data->config));
+
+ for (i = 0; arr[i]; i++) {
+ tracker_index_add_word (data->index,
+ arr[i],
+ data->id,
+ tracker_service_get_id (data->service),
+ tracker_field_get_weight (field));
+ }
+
+ g_strfreev (arr);
+}
+
+static void
+index_metadata (TrackerIndexer *indexer,
+ guint32 id,
+ TrackerService *service,
+ GHashTable *metadata)
+{
+ TrackerIndexerPrivate *priv;
+ MetadataForeachData data;
+
+ priv = TRACKER_INDEXER_GET_PRIVATE (indexer);
+
+ data.index = priv->index;
+ data.language = priv->language;
+ data.config = priv->config;
+ data.service = service;
+ data.id = id;
+
+ g_hash_table_foreach (metadata, index_metadata_foreach, &data);
+
+ /* FIXME: flushing after adding each metadata set, not ideal */
+ tracker_index_flush (priv->index);
+}
+
+static void
process_file (TrackerIndexer *indexer,
PathInfo *info)
{
@@ -365,6 +446,8 @@
if (tracker_config_get_enable_xesam (tracker->config))
tracker_db_create_event (db_con, id, "Create");
*/
+
+ index_metadata (indexer, id, service, metadata);
}
g_hash_table_destroy (metadata);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]