tracker r1971 - in branches/indexer-split: . data src/tracker-indexer



Author: ifrade
Date: Thu Jul 31 13:17:51 2008
New Revision: 1971
URL: http://svn.gnome.org/viewvc/tracker?rev=1971&view=rev

Log:
Added deletion powers to the indexer

Modified:
   branches/indexer-split/ChangeLog
   branches/indexer-split/data/sqlite-stored-procs.sql
   branches/indexer-split/src/tracker-indexer/tracker-indexer-db.c
   branches/indexer-split/src/tracker-indexer/tracker-indexer-db.h
   branches/indexer-split/src/tracker-indexer/tracker-indexer.c

Modified: branches/indexer-split/data/sqlite-stored-procs.sql
==============================================================================
--- branches/indexer-split/data/sqlite-stored-procs.sql	(original)
+++ branches/indexer-split/data/sqlite-stored-procs.sql	Thu Jul 31 13:17:51 2008
@@ -76,6 +76,11 @@
 DeleteService10 Delete FROM ServiceLinks Where (DestPath = ?) or (DestPath glob ?); 
 DeleteService11 DELETE FROM ServiceContents where ServiceID = ?;
 
+
+DeleteServiceMetadata DELETE FROM ServiceMetaData WHERE ServiceID = ?;
+DeleteServiceKeywordMetadata DELETE FROM ServiceMetaData WHERE ServiceID = ?;
+DeleteServiceNumericMetadata DELETE FROM ServiceMetaData WHERE ServiceID = ?;
+
 DeleteEmbeddedServiceMetadata1 DELETE FROM ServiceMetaData WHERE ServiceID = ? and MetaDataID in (select ID from MetaDataTypes where Embedded = 1);
 DeleteEmbeddedServiceMetadata2 DELETE FROM ServiceKeywordMetaData WHERE ServiceID = ? and MetaDataID in (select ID from MetaDataTypes where Embedded = 1);
 DeleteEmbeddedServiceMetadata3 DELETE FROM ServiceNumericMetaData WHERE ServiceID = ? and MetaDataID in (select ID from MetaDataTypes where Embedded = 1);

Modified: branches/indexer-split/src/tracker-indexer/tracker-indexer-db.c
==============================================================================
--- branches/indexer-split/src/tracker-indexer/tracker-indexer-db.c	(original)
+++ branches/indexer-split/src/tracker-indexer/tracker-indexer-db.c	Thu Jul 31 13:17:51 2008
@@ -93,6 +93,22 @@
 }
 
 void
+tracker_db_decrement_stats (TrackerDBInterface *iface,
+			    TrackerService     *service)
+{
+	const gchar *service_type, *parent;
+
+	service_type = tracker_service_get_name (service);
+	parent = tracker_service_get_parent (service);
+
+	tracker_db_interface_execute_procedure (iface, NULL, "DecStat", service_type, NULL);
+
+	if (parent) {
+		tracker_db_interface_execute_procedure (iface, NULL, "DecStat", parent, NULL);
+	}
+}
+
+void
 tracker_db_create_event (TrackerDBInterface *iface,
 			   guint32 service_id, 
 			   const gchar *type)
@@ -115,10 +131,12 @@
 			  gchar           **out_dirname,
 			  gchar           **out_basename)
 {
-	const gchar *dirname, *basename;
+	const gchar *dirname = NULL, *basename = NULL;
 
-	dirname = tracker_metadata_lookup (metadata, "File:Path");
-	basename = tracker_metadata_lookup (metadata, "File:Name");
+	if (metadata) {
+		dirname = tracker_metadata_lookup (metadata, "File:Path");
+		basename = tracker_metadata_lookup (metadata, "File:Name");
+	}
 
 	if (dirname && basename) {
 		*out_dirname = g_strdup (dirname);
@@ -161,6 +179,38 @@
 	return id;
 }
 
+guint
+tracker_db_get_service_type (const gchar *path)
+{
+	TrackerDBInterface *iface;
+	TrackerDBResultSet *result_set;
+	gchar *dirname, *basename;
+	guint service_type_id;
+
+	get_dirname_and_basename (path, NULL, &dirname, &basename);
+
+	/* We are asking this because the module cannot assign service_type -> probably it is files */
+	iface = tracker_db_manager_get_db_interface_by_type ("Files",
+							     TRACKER_DB_CONTENT_TYPE_METADATA);
+
+	result_set = tracker_db_interface_execute_procedure (iface, NULL,
+							     "GetServiceID",
+							     dirname,
+							     basename,
+							     NULL);
+	g_free (dirname);
+	g_free (basename);
+
+	if (!result_set) {
+		return 0;
+	}
+
+	tracker_db_result_set_get (result_set, 3, &service_type_id, -1);
+	g_object_unref (result_set);
+
+	return service_type_id;
+}
+
 gboolean
 tracker_db_create_service (TrackerService  *service,
 			   guint32          id,
@@ -219,6 +269,108 @@
 	return TRUE;
 }
 
+static gchar *
+db_get_metadata (TrackerService *service, guint service_id, gboolean keywords)
+{
+	TrackerDBInterface *iface;
+	TrackerDBResultSet *result_set;
+	gchar              *query;
+	GString            *result;
+	gchar              *str = NULL;
+
+	iface = tracker_db_manager_get_db_interface_by_type (tracker_service_get_name (service),
+							     TRACKER_DB_CONTENT_TYPE_METADATA);
+
+	result = g_string_new ("");
+
+	if (service_id < 1) {
+		return g_string_free (result, FALSE);
+	}
+
+	if (keywords) {
+		query = g_strdup_printf ("Select MetadataValue From ServiceKeywordMetadata WHERE serviceID = %d",
+					 service_id);
+	} else {
+		query = g_strdup_printf ("Select MetadataValue From ServiceMetadata WHERE serviceID = %d",
+					 service_id);
+	}
+	result_set = tracker_db_interface_execute_query (iface, NULL, query);
+	g_free (query);
+
+	if (result_set) {
+
+		gboolean valid = TRUE;
+
+		while (valid) {
+			tracker_db_result_set_get (result_set, 0, &str, -1);
+			result = g_string_append (result, str);
+			result = g_string_append (result, " ");
+			valid = tracker_db_result_set_iter_next (result_set);
+			g_free (str);
+		}
+		g_object_unref (result_set);
+	}
+
+	return g_string_free (result, FALSE);
+}
+
+
+
+void
+tracker_db_delete_service (TrackerService  *service,
+			   guint32          service_id)
+{
+
+	TrackerDBInterface *iface;
+	gchar *service_id_str;
+
+	if (service_id < 1) {
+		return;
+	}
+
+	iface = tracker_db_manager_get_db_interface_by_type (tracker_service_get_name (service),
+							     TRACKER_DB_CONTENT_TYPE_METADATA);
+
+	service_id_str = tracker_guint32_to_string (service_id);
+
+	/* Delete from services table */
+	tracker_db_interface_execute_procedure (iface, NULL, "DeleteService1", service_id_str, NULL);
+
+	g_free (service_id_str);
+}
+
+void
+tracker_db_delete_metadata (TrackerService *service,
+			    guint32         service_id)
+{
+	TrackerDBInterface *iface;
+	gchar *service_id_str;
+
+	iface = tracker_db_manager_get_db_interface_by_type (tracker_service_get_name (service),
+							     TRACKER_DB_CONTENT_TYPE_METADATA);
+
+	service_id_str = tracker_guint32_to_string (service_id);
+
+	/* Delete from ServiceMetadata, ServiceKeywordMetadata, ServiceNumberMetadata */
+	tracker_db_interface_execute_procedure (iface, NULL, "DeleteServiceMetadata", service_id_str, NULL);
+	tracker_db_interface_execute_procedure (iface, NULL, "DeleteServiceKeywordMetadata", service_id_str, NULL);
+	tracker_db_interface_execute_procedure (iface, NULL, "DeleteServiceNumericMetadata", service_id_str, NULL);
+}
+
+gchar *
+tracker_db_get_unparsed_metadata (TrackerService *service,
+				  guint           service_id) {
+
+	return db_get_metadata (service, service_id, TRUE);
+}
+
+gchar *
+tracker_db_get_parsed_metadata (TrackerService *service,
+				guint           service_id) {
+	return db_get_metadata (service, service_id, FALSE);
+}
+
+
 void
 tracker_db_set_metadata (TrackerService *service,
 			 guint32         id,
@@ -307,3 +459,56 @@
 						NULL);
 	g_free (id_str);
 }
+
+gchar *
+tracker_db_get_text (TrackerService *service, guint32 id) {
+
+	TrackerDBInterface *iface;
+	TrackerField       *field;
+	gchar              *service_id_str, *contents = NULL;
+	TrackerDBResultSet *result_set;
+
+	service_id_str = tracker_guint32_to_string (id);
+	field = tracker_ontology_get_field_def ("File:Contents");
+	iface = tracker_db_manager_get_db_interface_by_type (tracker_service_get_name (service),
+							     TRACKER_DB_CONTENT_TYPE_CONTENTS);
+
+	/* Delete contents if it has! */
+	result_set = tracker_db_interface_execute_procedure (iface, NULL, 
+							     "GetContents", 
+							     service_id_str, tracker_field_get_id (field),
+							     NULL);
+
+	if (result_set) {
+		
+		tracker_db_result_set_get (result_set, 0, &contents, -1);
+		g_object_unref (result_set);
+	}
+
+	g_free (service_id_str);
+
+	return contents;
+
+}
+
+void
+tracker_db_delete_text (TrackerService *service, guint32 id) {
+
+	TrackerDBInterface *iface;
+	TrackerField *field;
+	gchar *service_id_str;
+
+	service_id_str = tracker_guint32_to_string (id);
+	field = tracker_ontology_get_field_def ("File:Contents");
+	iface = tracker_db_manager_get_db_interface_by_type (tracker_service_get_name (service),
+							     TRACKER_DB_CONTENT_TYPE_CONTENTS);
+
+	/* Delete contents if it has! */
+	tracker_db_interface_execute_procedure (iface, NULL, 
+						"DeleteContent", 
+						service_id_str, tracker_field_get_id (field),
+						NULL);
+
+	g_free (service_id_str);
+
+}

Modified: branches/indexer-split/src/tracker-indexer/tracker-indexer-db.h
==============================================================================
--- branches/indexer-split/src/tracker-indexer/tracker-indexer-db.h	(original)
+++ branches/indexer-split/src/tracker-indexer/tracker-indexer-db.h	Thu Jul 31 13:17:51 2008
@@ -27,29 +27,56 @@
 #include "tracker-metadata.h"
 
 G_BEGIN_DECLS
+guint32  tracker_db_get_new_service_id    (TrackerDBInterface *iface);
+void     tracker_db_increment_stats       (TrackerDBInterface *iface,
+					   TrackerService     *service);
+void     tracker_db_decrement_stats       (TrackerDBInterface *iface,
+					   TrackerService     *service);
+/* Using path */
+guint    tracker_db_check_service         (TrackerService     *service,
+					   const gchar        *path,
+					   TrackerMetadata    *metadata);
+guint    tracker_db_get_service_type      (const gchar        *path);
+
+
+/* Services  */
+gboolean tracker_db_create_service        (TrackerService     *service,
+					   guint32             id,
+					   const gchar        *path,
+					   TrackerMetadata    *metadata);
+void     tracker_db_delete_service        (TrackerService     *service,
+					   guint32             id);
+
+
+/* Metadata */
+void     tracker_db_set_metadata          (TrackerService     *service,
+					   guint32             id,
+					   TrackerField       *field,
+					   const gchar        *value,
+					   const gchar        *parsed_value);
+gchar   *tracker_db_get_parsed_metadata   (TrackerService     *service,
+					   guint32             id);
+gchar   *tracker_db_get_unparsed_metadata (TrackerService     *service,
+					   guint32             id);
+void     tracker_db_delete_metadata       (TrackerService     *service,
+					   guint32             id);
+
+
+/* Contents */
+void     tracker_db_set_text              (TrackerService     *service,
+					   guint32             id,
+					   const gchar        *text);
+gchar   *tracker_db_get_text              (TrackerService     *service,
+					   guint32             id);
+void     tracker_db_delete_text           (TrackerService     *service,
+					   guint32             id);
+
+
+/* Events */
+void     tracker_db_create_event          (TrackerDBInterface *iface,
+					   guint32             service_id,
+					   const gchar        *type);
 
-guint32  tracker_db_get_new_service_id (TrackerDBInterface *iface);
-void     tracker_db_increment_stats    (TrackerDBInterface *iface,
-                                        TrackerService     *service);
-
-guint    tracker_db_check_service      (TrackerService     *service,
-					const gchar        *path,
-					TrackerMetadata    *metadata);
-gboolean tracker_db_create_service     (TrackerService     *service,
-                                        guint32             id,
-                                        const gchar        *path,
-					TrackerMetadata    *metadata);
-void     tracker_db_set_metadata       (TrackerService     *service,
-                                        guint32             id,
-                                        TrackerField       *field,
-                                        const gchar        *value,
-					const gchar        *parsed_value);
-void     tracker_db_set_text           (TrackerService     *service,
-					guint32             id,
-					const gchar        *text);
-void     tracker_db_create_event       (TrackerDBInterface *iface,
-                                        guint32             service_id,
-                                        const gchar        *type);
 
 
 G_END_DECLS

Modified: branches/indexer-split/src/tracker-indexer/tracker-indexer.c
==============================================================================
--- branches/indexer-split/src/tracker-indexer/tracker-indexer.c	(original)
+++ branches/indexer-split/src/tracker-indexer/tracker-indexer.c	Thu Jul 31 13:17:51 2008
@@ -128,9 +128,16 @@
 	gboolean is_paused;
 };
 
+typedef enum {
+	TRACKER_ITEM_ACTION_CREATE,
+	TRACKER_ITEM_ACTION_DELETE,
+	TRACKER_ITEM_ACTION_UPDATE
+} ItemAction;
+
 struct PathInfo {
 	GModule *module;
 	TrackerFile *file;
+	ItemAction action;
 };
 
 struct MetadataForeachData {
@@ -167,13 +174,15 @@
 static PathInfo *
 path_info_new (GModule *module,
 	       const gchar *module_name,
-	       const gchar *path)
+	       const gchar *path,
+	       ItemAction action)
 {
 	PathInfo *info;
 
 	info = g_slice_new (PathInfo);
 	info->module = module;
 	info->file = tracker_indexer_module_file_new (module, module_name, path);
+	info->action = action;
 
 	return info;
 }
@@ -780,10 +789,11 @@
 }
 
 static void
-index_text_contents (TrackerIndexer *indexer,
-		     gint service_id,
-		     gint service_type,
-		     const gchar *text)
+send_text_to_index (TrackerIndexer *indexer,
+		    gint service_id,
+		    gint service_type,
+		    const gchar *text,
+		    gint weight_factor)
 {
 	GHashTable *parsed = NULL;
 	GList      *words = NULL, *iter;
@@ -808,7 +818,7 @@
 					(gchar *)iter->data,
 					service_id,
 					service_type,
-					weight); 
+					weight*weight_factor); 
 	}
 
 	tracker_parser_text_free (parsed);
@@ -816,41 +826,76 @@
 }
 
 
-static gboolean
-process_file (TrackerIndexer *indexer,
-	      PathInfo       *info)
+static void
+index_text_with_parsing (TrackerIndexer *indexer, gint service_id, gint service_type_id, const gchar *content) 
 {
-	TrackerMetadata *metadata;
+	send_text_to_index (indexer, service_id, service_type_id, content, 1);
+}
 
-	g_debug ("Processing file:'%s'", info->file->path);
+static void
+unindex_text_with_parsing (TrackerIndexer *indexer, gint service_id, gint service_type_id, const gchar *content, gint weight_factor) 
+{
+	send_text_to_index (indexer, service_id, service_type_id, content, weight_factor);
+}
 
-	/* Set the current module */
-	g_free (indexer->private->current_module_name);
-	indexer->private->current_module_name = g_strdup (info->file->module_name);
+static void
+unindex_text_no_parsing (TrackerIndexer *indexer,
+			 gint service_id,
+			 gint service_type_id,
+			 const gchar *text)
+{
+	GHashTable *parsed = NULL;
+	GList      *words = NULL, *iter;
+	gint        weight;
+
+	parsed = tracker_parser_text_fast (parsed,
+					   text,
+					   50); /* We dont know the exact property weight. Big value works */
 	
-	/* Sleep to throttle back indexing */
-	indexer_throttle (indexer->private->config, 100);
+	words = g_hash_table_get_keys (parsed);
+	
+	for (iter = words; iter != NULL; iter = iter->next) {
+		
+		weight = GPOINTER_TO_INT (g_hash_table_lookup (parsed, (gchar *)iter->data));
+
+
+		tracker_index_add_word (indexer->private->index, 
+					(gchar *)iter->data,
+					service_id,
+					service_type_id,
+					weight * -1); 
+	}
+
+	tracker_parser_text_free (parsed);
+}
+
+
+static gboolean
+handle_file_create (TrackerIndexer *indexer,
+		    PathInfo       *info) 
+{
+	TrackerMetadata *metadata;
 
 	/* Process file */
 	metadata = tracker_indexer_module_file_get_metadata (info->module, info->file);
 
 	if (metadata) {
+
 		TrackerService *service_def;
-		gchar *service_type, *text;
+		gchar *service_type;
+		gchar *text;
 		guint32 id;
 
-		/* FIXME: We clearly need a better way to define the service type for a given item */
 		service_type = g_strdup (tracker_module_config_get_index_service (info->file->module_name));
-
+	
 		if (!service_type || !service_type[0]) {
 			gchar *mimetype;
-
-			g_free (service_type);
+		
 			mimetype = tracker_file_get_mime_type (info->file->path);
 			service_type = tracker_ontology_get_service_type_for_mime (mimetype);
 			g_free (mimetype);
 		}
-
+		
 		service_def = tracker_ontology_get_service_type_by_name (service_type);
 		g_free (service_type);
 
@@ -877,10 +922,10 @@
 
 				if (text) {
 					/* Save in the index */
-					index_text_contents (indexer, 
-							     id, 
-							     tracker_service_get_id (service_def),
-							     text);
+					index_text_with_parsing (indexer, 
+								 id, 
+								 tracker_service_get_id (service_def), 
+								 text);
 
 					/* Save in the DB */
 					tracker_db_set_text (service_def, id, text);
@@ -898,6 +943,105 @@
 	return !tracker_indexer_module_file_iter_contents (info->module, info->file);
 }
 
+static gboolean
+handle_file_delete (TrackerIndexer *indexer,
+		    PathInfo       *info) 
+{
+
+	TrackerService *service_def;
+	gchar          *content;
+	gchar          *service_type = NULL;
+	guint           service_id;
+	guint           service_type_id;
+	gchar          *metadata;
+
+
+	service_type = g_strdup (tracker_module_config_get_index_service (info->file->module_name));
+	
+	if (!service_type || !service_type[0]) {
+		gchar *name;
+
+		/* The file is not anymore in the filesystem. Obtain the service type from the DB */
+		service_type_id = tracker_db_get_service_type (info->file->path);
+		name = tracker_ontology_get_service_type_by_id (service_type_id);
+		service_def = tracker_ontology_get_service_type_by_name (name);
+
+		g_free (name);
+	} else {
+		service_def = tracker_ontology_get_service_type_by_name (service_type);
+		service_type_id = tracker_service_get_id (service_def);
+	}
+
+	service_id = tracker_db_check_service (service_def, info->file->path, NULL);
+	
+	if (service_id < 1) {
+		g_message ("Cannot delete file: it doesnt exist in DB");
+		return TRUE;
+	}
+
+
+	/* Get content, unindex the words and delete the contents */
+	content = tracker_db_get_text (service_def, service_id);
+	if (content) {
+		unindex_text_with_parsing (indexer, service_id, service_type_id, content, -1);
+		g_free (content);
+		tracker_db_delete_text (service_def, service_id);
+	}
+
+
+	/* Get metadata from DB to remove it from the index */
+	metadata = tracker_db_get_parsed_metadata (service_def, service_id);
+	unindex_text_no_parsing (indexer, service_id, service_type_id, metadata);
+	g_free (metadata);
+
+
+	/* the weight depends on metadata, but a number high enough force deletion  */
+	metadata = tracker_db_get_unparsed_metadata (service_def, service_id);
+	unindex_text_with_parsing (indexer, service_id, service_type_id, metadata, -1000);
+	g_free (metadata);
+
+	
+	/* delete service */
+        tracker_db_delete_service (service_def, service_id);
+	tracker_db_delete_metadata (service_def, service_id);
+
+	tracker_db_decrement_stats (indexer->private->common, service_def);
+	
+	indexer->private->items_processed++;
+
+	return !tracker_indexer_module_file_iter_contents (info->module, info->file);
+}
+
+static gboolean
+process_file (TrackerIndexer *indexer,
+	      PathInfo       *info)
+{
+	g_debug ("Processing file:'%s'", info->file->path);
+
+	/* Set the current module */
+	g_free (indexer->private->current_module_name);
+	indexer->private->current_module_name = g_strdup (info->file->module_name);
+	
+	/* Sleep to throttle back indexing */
+	indexer_throttle (indexer->private->config, 100);
+
+	switch (info->action) {
+
+	case TRACKER_ITEM_ACTION_CREATE:
+	case TRACKER_ITEM_ACTION_UPDATE:
+		return handle_file_create (indexer, info);
+
+	case TRACKER_ITEM_ACTION_DELETE:
+		return handle_file_delete (indexer, info);
+
+	default:
+		g_critical ("Action not support in indexer");
+		return FALSE;
+	}
+
+
+}
+
 static void
 process_directory (TrackerIndexer *indexer,
 		   PathInfo *info,
@@ -920,11 +1064,11 @@
 
 		path = g_build_filename (info->file->path, name, NULL);
 
-		new_info = path_info_new (info->module, info->file->module_name, path);
+		new_info = path_info_new (info->module, info->file->module_name, path, TRACKER_ITEM_ACTION_CREATE);
 		add_file (indexer, new_info);
 
 		if (recurse && g_file_test (path, G_FILE_TEST_IS_DIR)) {
-			new_info = path_info_new (info->module, info->file->module_name, path);
+			new_info = path_info_new (info->module, info->file->module_name, path, TRACKER_ITEM_ACTION_CREATE);
 			add_directory (indexer, new_info);
 		}
 
@@ -981,7 +1125,7 @@
 	for (d = dirs; d; d = d->next) {
 		PathInfo *info;
 
-		info = path_info_new (module, module_name, d->data);
+		info = path_info_new (module, module_name, d->data, TRACKER_ITEM_ACTION_CREATE);
 		add_directory (indexer, info);
 	}
 
@@ -1225,7 +1369,7 @@
 		for (i = 0; files[i]; i++) {
 			PathInfo *info;
 
-			info = path_info_new (module, module_name, files[i]);
+			info = path_info_new (module, module_name, files[i], TRACKER_ITEM_ACTION_CREATE);
 			add_file (indexer, info);
 		}
 	} else {
@@ -1272,7 +1416,7 @@
 		for (i = 0; files[i]; i++) {
 			PathInfo *info;
 
-			info = path_info_new (module, module_name, files[i]);
+			info = path_info_new (module, module_name, files[i], TRACKER_ITEM_ACTION_UPDATE);
 			add_file (indexer, info);
 		}
 	} else {
@@ -1319,7 +1463,7 @@
 		for (i = 0; files[i]; i++) {
 			PathInfo *info;
 
-			info = path_info_new (module, module_name, files[i]);
+			info = path_info_new (module, module_name, files[i], TRACKER_ITEM_ACTION_DELETE);
 			add_file (indexer, info);
 		}
 



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]