tracker r1263 - in trunk: . src/trackerd



Author: carlosg
Date: Thu Apr 10 13:56:52 2008
New Revision: 1263
URL: http://svn.gnome.org/viewvc/tracker?rev=1263&view=rev

Log:
2008-04-10  Carlos Garnacho  <carlos imendio com>

        Add a query extension tree, at the moment supports And and Or
        operations. Fixes #509607.

        * src/trackerd/tracker-query-tree.[ch]: New files, implement the
        expression tree, and return a hit list based on the evaluation of the
        search terms with the logical expressions.
        * src/trackerd/Makefile.am: Added these files.
        * src/trackerd/tracker-indexer.[ch]: Remove SearchQuery related code,
        implemented now by TrackerQueryTree.
        * src/trackerd/tracker-db-sqlite.c:
        * src/trackerd/tracker-dbus-search.c: Use new TrackerQueryTree API.


Added:
   trunk/src/trackerd/tracker-query-tree.c
   trunk/src/trackerd/tracker-query-tree.h
Modified:
   trunk/ChangeLog
   trunk/src/trackerd/Makefile.am
   trunk/src/trackerd/tracker-db-sqlite.c
   trunk/src/trackerd/tracker-dbus-search.c
   trunk/src/trackerd/tracker-indexer.c
   trunk/src/trackerd/tracker-indexer.h

Modified: trunk/src/trackerd/Makefile.am
==============================================================================
--- trunk/src/trackerd/Makefile.am	(original)
+++ trunk/src/trackerd/Makefile.am	Thu Apr 10 13:56:52 2008
@@ -87,6 +87,8 @@
 	tracker-process-files.h						\
 	tracker-process-requests.c					\
 	tracker-process-requests.h					\
+	tracker-query-tree.c						\
+	tracker-query-tree.h						\
 	tracker-rdf-query.c						\
 	tracker-rdf-query.h						\
 	tracker-utils.c							\

Modified: trunk/src/trackerd/tracker-db-sqlite.c
==============================================================================
--- trunk/src/trackerd/tracker-db-sqlite.c	(original)
+++ trunk/src/trackerd/tracker-db-sqlite.c	Thu Apr 10 13:56:52 2008
@@ -53,6 +53,7 @@
 #include "tracker-utils.h"
 #include "tracker-watch.h"
 #include "tracker-service-manager.h"
+#include "tracker-query-tree.h"
 
 #define MAX_TEXT_BUFFER 65567
 #define MAX_COMPRESS_BUFFER 65565
@@ -2997,35 +2998,27 @@
 {
 }
 
-
-static void
-delete_dud (SearchWord *search_word, SearchQuery *query)
-{
-	tracker_remove_dud_hits (query->indexer, search_word->word, query->duds);
-}
-
-
-
 char ***
 tracker_db_search_text (DBConnection *db_con, const char *service, const char *search_string, int offset, int limit, gboolean save_results, gboolean detailed)
 {
+	TrackerQueryTree *tree;
 	char 		***result, **array;
-	GSList 		*hit_list;
+	GArray          *hits;
 	int 		count;
-	const GSList	*tmp;
 	gboolean	detailed_emails = FALSE, detailed_apps = FALSE;
 	int		service_array[255];
 	const gchar     *procedure;
+	GArray          *services = NULL;
+	guint           i, j;
+	GSList          *duds = NULL;
 
 	array = tracker_parse_text_into_array (search_string);
 
 	char ***res = tracker_exec_proc (db_con->common, "GetRelatedServiceIDs", 2, service, service);
-	
-	int i = 0,j =0;
+	i = j = 0;
 	char **row;
 	
 	if (res) {	
-	
 		while ((row = tracker_db_get_row (res, i))) {
 
 			if (row[0]) {
@@ -3035,34 +3028,20 @@
 
 			i++;
 		}
-			
-		tracker_db_free_result (res);
-	}
-
-	SearchQuery *query = tracker_create_query (db_con->word_index, service_array, j, offset, limit);
-
-	char **pstr;
-
-	for (pstr = array; *pstr; pstr++) {
-		tracker_add_query_word (query, *pstr, WordNormal);	
-		
-	}
 
-	g_strfreev (array);
-
-	if (!tracker_indexer_get_hits (query)) {
-
-		tracker_free_query (query);
-		return NULL;
+		service_array[j] = 0;
+		services = g_array_new (TRUE, TRUE, sizeof (gint));
+		g_array_append_vals (services, service_array, j);
 
+		tracker_db_free_result (res);
 	}
 
-	hit_list = query->hits;
-
+	tree = tracker_query_tree_new (search_string, db_con->word_index, services);
+	hits = tracker_query_tree_get_hits (tree, offset, limit);
 	result = NULL;
 
 	if (!save_results) {
-		count = g_slist_length (hit_list);
+		count = hits->len;
 
 		if (count > limit) count = limit;
 
@@ -3074,23 +3053,22 @@
 
 	count = 0;
 
-	for (tmp = hit_list; tmp; tmp = tmp->next) {
-
-		SearchHit *hit;
+	for (i = 0; i < hits->len; i++) {
+		TrackerSearchHit hit;
 		char	  *str_id;
 		char	  ***res;
 
 		if (count >= limit) break;
 
-		hit = tmp->data;
+		hit = g_array_index (hits, TrackerSearchHit, i);
 
-		str_id = tracker_uint_to_str (hit->service_id);
+		str_id = tracker_uint_to_str (hit.service_id);
 
 		/* we save results into SearchResults table instead of returing an array of array of strings */
 		if (save_results) {
 			char *str_score;
 
-			str_score = tracker_int_to_str (hit->score);
+			str_score = tracker_int_to_str (hit.score);
 
 			tracker_exec_proc (db_con, "InsertSearchResult1", 2, str_id, str_score);
 
@@ -3141,7 +3119,7 @@
 		} else {
 			tracker_log ("dud hit for search detected");
 			/* add to dud list */
-			query->duds = g_slist_prepend (query->duds, hit);
+			duds = g_slist_prepend (duds, &hit);
 		}
 
 	}
@@ -3153,11 +3131,23 @@
 	}
 
 	/* delete duds */
-	if (query->duds) {
-		g_slist_foreach (query->words, (GFunc) delete_dud, query);
+	if (duds) {
+		GSList *words, *w;
+		Indexer *indexer;
+
+		words = tracker_query_tree_get_words (tree);
+		indexer = tracker_query_tree_get_indexer (tree);
+
+		for (w = words; w; w = w->next) {
+			tracker_remove_dud_hits (indexer, (const gchar *) w->data, duds);
+		}
+
+		g_slist_free (words);
 	}
 
-	tracker_free_query (query);
+	g_object_unref (tree);
+	g_array_free (hits, TRUE);
+	g_array_free (services, TRUE);
 
 	return (char ***)result;
 }
@@ -4930,15 +4920,19 @@
 char ***
 tracker_db_search_text_mime (DBConnection *db_con, const char *text, char **mime_array, int n)
 {
-	char 	     **result, **array;
-	GSList 	     *hit_list, *result_list;
+	TrackerQueryTree *tree;
+	GArray       *hits;
+	char 	     **result;
+	GSList 	     *result_list;
 	const GSList *tmp;
+	guint        i;
 	int 	     count;
 	gint         service_array[8];
+	GArray       *services;
 
 	result = NULL;
 	result_list = NULL;
-	 
+
 	service_array[0] = tracker_service_manager_get_id_for_service ("Files");
 	service_array[1] = tracker_service_manager_get_id_for_service ("Folders");
 	service_array[2] = tracker_service_manager_get_id_for_service ("Documents");
@@ -4948,37 +4942,22 @@
 	service_array[6] = tracker_service_manager_get_id_for_service ("Text");
 	service_array[7] = tracker_service_manager_get_id_for_service ("Other");
 
-	SearchQuery *query = tracker_create_query (db_con->word_index, service_array, 8, 0, 999999);
-
-	array = tracker_parse_text_into_array (text);
+	services = g_array_new (TRUE, TRUE, sizeof (gint));
+	g_array_append_vals (services, service_array, 8);
 
-	char **pstr;
-
-	for (pstr = array; *pstr; pstr++) {
-		tracker_add_query_word (query, *pstr, WordNormal);	
-	}
-
-	g_strfreev (array);
-
-	if (!tracker_indexer_get_hits (query)) {
-
-		tracker_free_query (query);
-		return NULL;
-
-	}
-
-	hit_list = query->hits;
+	tree = tracker_query_tree_new (text, db_con->word_index, services);
+	hits = tracker_query_tree_get_hits (tree, 0, 0);
 
 	count = 0;
 
-	for (tmp = hit_list; tmp; tmp = tmp->next) {
-		SearchHit *hit;
+	for (i = 0; i < hits->len; i++) {
+		TrackerSearchHit hit;
 		char	  *str_id;
 		char	  ***res;
 
-		hit = tmp->data;
+		hit = g_array_index (hits, TrackerSearchHit, i);
 
-		str_id = tracker_uint_to_str (hit->service_id);
+		str_id = tracker_uint_to_str (hit.service_id);
 
 		res = tracker_exec_proc (db_con, "GetFileByID", 1, str_id);
 
@@ -5018,8 +4997,6 @@
 		}
 	}
 
-	tracker_free_query (query);
-
 	if (!result_list) {
 		return NULL;
 	}
@@ -5038,6 +5015,9 @@
 	}
 
 	g_slist_free (result_list);
+	g_object_unref (tree);
+	g_array_free (hits, TRUE);
+	g_array_free (services, TRUE);
 
 	return (char ***) result;
 }
@@ -5046,16 +5026,19 @@
 char ***
 tracker_db_search_text_location (DBConnection *db_con, const char *text, const char *location)
 {
+	TrackerQueryTree *tree;
+	GArray       *hits;
 	char	     *location_prefix;
-	char 	     **result, **array;
-	GSList 	     *hit_list, *result_list;
+	char 	     **result;
+	GSList 	     *result_list;
 	const GSList *tmp;
 	int 	     count;
-	gint          service_array[8];
+	gint         service_array[8];
+	guint        i;
+	GArray       *services;
 
 	location_prefix = g_strconcat (location, G_DIR_SEPARATOR_S, NULL);
 
-	 
 	service_array[0] = tracker_service_manager_get_id_for_service ("Files");
 	service_array[1] = tracker_service_manager_get_id_for_service ("Folders");
 	service_array[2] = tracker_service_manager_get_id_for_service ("Documents");
@@ -5065,39 +5048,24 @@
 	service_array[6] = tracker_service_manager_get_id_for_service ("Text");
 	service_array[7] = tracker_service_manager_get_id_for_service ("Other");
 
-	SearchQuery *query = tracker_create_query (db_con->word_index, service_array, 8, 0, 999999);
-
-	array = tracker_parse_text_into_array (text);
+	services = g_array_new (TRUE, TRUE, sizeof (gint));
+	g_array_append_vals (services, service_array, 8);
 
-	char **pstr;
-
-	for (pstr = array; *pstr; pstr++) {
-		tracker_add_query_word (query, *pstr, WordNormal);	
-	}
-
-	g_strfreev (array);
-
-	if (!tracker_indexer_get_hits (query)) {
-
-		tracker_free_query (query);
-		return NULL;
-
-	}
-
-	hit_list = query->hits;
+	tree = tracker_query_tree_new (text, db_con->word_index, services);
+	hits = tracker_query_tree_get_hits (tree, 0, 0);
 
 	result_list = NULL;
 
 	count = 0;
 
-	for (tmp = hit_list; tmp; tmp = tmp->next) {
-		SearchHit *hit;
+	for (i = 0; i < hits->len; i++) {
+		TrackerSearchHit hit;
 		char	  *str_id;
 		char	  ***res;
 
-		hit = tmp->data;
+		hit = g_array_index (hits, TrackerSearchHit, i);
 
-		str_id = tracker_uint_to_str (hit->service_id);
+		str_id = tracker_uint_to_str (hit.service_id);
 
 		res = tracker_exec_proc (db_con, "GetFileByID", 1, str_id);
 
@@ -5133,8 +5101,6 @@
 
 	g_free (location_prefix);
 
-	tracker_free_query (query);
-
 	if (!result_list) {
 		return NULL;
 	}
@@ -5153,6 +5119,9 @@
 	}
 
 	g_slist_free (result_list);
+	g_object_unref (tree);
+	g_array_free (hits, TRUE);
+	g_array_free (services, TRUE);
 
 	return (char ***) result;
 }
@@ -5161,16 +5130,19 @@
 char ***
 tracker_db_search_text_mime_location (DBConnection *db_con, const char *text, char **mime_array, int n, const char *location)
 {
+	TrackerQueryTree *tree;
+	GArray       *hits;
 	char	     *location_prefix;
-	char 	     **result, **array;
-	GSList 	     *hit_list, *result_list;
+	char 	     **result;
+	GSList 	     *result_list;
 	const GSList *tmp;
 	int	     count;
-	gint          service_array[8];
+	gint         service_array[8];
+	guint        i;
+	GArray       *services;
 
 	location_prefix = g_strconcat (location, G_DIR_SEPARATOR_S, NULL);
 
-		 
 	service_array[0] = tracker_service_manager_get_id_for_service ("Files");
 	service_array[1] = tracker_service_manager_get_id_for_service ("Folders");
 	service_array[2] = tracker_service_manager_get_id_for_service ("Documents");
@@ -5180,39 +5152,24 @@
 	service_array[6] = tracker_service_manager_get_id_for_service ("Text");
 	service_array[7] = tracker_service_manager_get_id_for_service ("Other");
 
-	SearchQuery *query = tracker_create_query (db_con->word_index, service_array, 8, 0, 999999);
-
-	array = tracker_parse_text_into_array (text);
-
-	char **pstr;
-
-	for (pstr = array; *pstr; pstr++) {
-		tracker_add_query_word (query, *pstr, WordNormal);	
-	}
-
-	g_strfreev (array);
-
-	if (!tracker_indexer_get_hits (query)) {
-
-		tracker_free_query (query);
-		return NULL;
-
-	}
+	services = g_array_new (TRUE, TRUE, sizeof (gint));
+	g_array_append_vals (services, service_array, 8);
 
-	hit_list = query->hits;
+	tree = tracker_query_tree_new (text, db_con->word_index, services);
+	hits = tracker_query_tree_get_hits (tree, 0, 0);
 
 	result_list = NULL;
 
 	count = 0;
 
-	for (tmp = hit_list; tmp; tmp = tmp->next) {
-		SearchHit *hit;
+	for (i = 0; i < hits->len; i++) {
+		TrackerSearchHit hit;
 		char	  *str_id;
 		char	  ***res;
 
-		hit = tmp->data;
+		hit = g_array_index (hits, TrackerSearchHit, i);
 
-		str_id = tracker_uint_to_str (hit->service_id);
+		str_id = tracker_uint_to_str (hit.service_id);
 
 		res = tracker_exec_proc (db_con, "GetFileByID", 1, str_id);
 
@@ -5257,8 +5214,6 @@
 
 	g_free (location_prefix);
 
-	tracker_free_query (query);
-
 	if (!result_list) {
 		return NULL;
 	}
@@ -5276,6 +5231,9 @@
 	}
 
 	g_slist_free (result_list);
+	g_object_unref (tree);
+	g_array_free (hits, TRUE);
+	g_array_free (services, TRUE);
 
 	return (char ***) result;
 }

Modified: trunk/src/trackerd/tracker-dbus-search.c
==============================================================================
--- trunk/src/trackerd/tracker-dbus-search.c	(original)
+++ trunk/src/trackerd/tracker-dbus-search.c	Thu Apr 10 13:56:52 2008
@@ -24,6 +24,7 @@
 
 #include "tracker-dbus-methods.h"
 #include "tracker-rdf-query.h"
+#include "tracker-query-tree.h"
 #include "tracker-indexer.h"
 #include "tracker-service-manager.h"
 
@@ -32,6 +33,7 @@
 void
 tracker_dbus_method_search_get_hit_count (DBusRec *rec)
 {
+	TrackerQueryTree *tree;
 	DBConnection *db_con;
 	DBusError    dbus_error;
 	gchar	     *service;
@@ -80,11 +82,10 @@
 
 	//tracker_log ("Executing GetHitCount with params %s, %s", service, str);
 
-	gchar **array;
 	gint service_array[12];
+	GArray *services;
 	gint result;
 	DBusMessage *reply;
-	int service_count = 1;
 
 	service_array[0] = tracker_service_manager_get_id_for_service (service);
 
@@ -97,38 +98,24 @@
 		service_array[6] = tracker_service_manager_get_id_for_service ("Text");
 		service_array[7] = tracker_service_manager_get_id_for_service ("Development");
 		service_array[8] = tracker_service_manager_get_id_for_service ("Other");
-		service_count = 9;
-
+		service_array[9] = 0;
 	} else if (strcmp (service, "Emails") == 0) {
 		service_array[1] = tracker_service_manager_get_id_for_service ("EvolutionEmails");
 		service_array[2] = tracker_service_manager_get_id_for_service ("KMailEmails");
 		service_array[3] = tracker_service_manager_get_id_for_service ("ThunderbirdEmails");
 		service_array[4] = tracker_service_manager_get_id_for_service ("ModestEmails");
-		service_count = 5;
-
+		service_array[5] = 0;
  	} else if (strcmp (service, "Conversations") == 0) {
 		service_array[1] = tracker_service_manager_get_id_for_service ("GaimConversations");
-		service_count = 2;
+		service_array[2] = 0;
 	}
 
+	services = g_array_new (TRUE, TRUE, sizeof (gint));
+	g_array_append_vals (services, service_array, G_N_ELEMENTS (service_array));
 
 	db_con = tracker_db_get_service_connection (db_con, service);
-
-	SearchQuery *query = tracker_create_query (db_con->word_index, service_array, service_count, 0, 999999);
-
-	array = tracker_parse_text_into_array (str);
-
-	gchar **pstr;
-
-	for (pstr = array; *pstr; pstr++) {
-		tracker_add_query_word (query, *pstr, WordNormal);
-	}
-
-	g_strfreev (array);
-
-	result = tracker_get_hit_count (query);
-
-	tracker_free_query (query);
+	tree = tracker_query_tree_new (str, db_con->word_index, services);
+	result = tracker_query_tree_get_hit_count (tree);
 
 	reply = dbus_message_new_method_return (rec->message);
 
@@ -140,15 +127,20 @@
 	dbus_connection_send (rec->connection, reply, NULL);
 
 	dbus_message_unref (reply);
+	g_object_unref (tree);
+	g_array_free (services, TRUE);
 }
 
 
 void
 tracker_dbus_method_search_get_hit_count_all (DBusRec *rec)
 {
+	TrackerQueryTree *tree;
+	GArray       *hit_counts, *mail_hit_counts;
 	DBConnection *db_con;
 	DBusError    dbus_error;
-	gchar	     *str;
+	gchar	     ***res, *str;
+	guint        i;
 
 	g_return_if_fail (rec);
 	g_return_if_fail (rec->user_data);
@@ -180,27 +172,35 @@
 
 	//tracker_log ("Executing detailed search with params %s, %s, %d, %d", service, str, offset, limit);
 
-	gchar ***res, **array;
+	tree = tracker_query_tree_new (str, db_con->word_index, NULL);
+	hit_counts = tracker_query_tree_get_hit_counts (tree);
 
-	SearchQuery *query = tracker_create_query (db_con->word_index, NULL, 0, 0, 999999);
+	tracker_query_tree_set_indexer (tree, tracker->email_index);
+	mail_hit_counts = tracker_query_tree_get_hit_counts (tree);
+	g_array_append_vals (hit_counts, mail_hit_counts->data, mail_hit_counts->len);
+	g_array_free (mail_hit_counts, TRUE);
 
-	array = tracker_parse_text_into_array (str);
+	res = g_new0 (gchar **, hit_counts->len + 1);
 
-	gchar **pstr;
+	for (i = 0; i < hit_counts->len; i++) {
+		TrackerHitCount count;
+		gchar **elem;
 
-	for (pstr = array; *pstr; pstr++) {
-		tracker_add_query_word (query, *pstr, WordNormal);	
-	}
-
-	g_strfreev (array);
+		count = g_array_index (hit_counts, TrackerHitCount, i);
+		elem = g_new (char *, 3);
 
-	res = tracker_get_hit_counts (query);
+		elem[0] = tracker_service_manager_get_service_by_id (count.service_type_id);
+		elem[1] = tracker_uint_to_str (count.count);
+		elem[2] = NULL;
 
-	tracker_free_query (query);	
+		res[i] = elem;
+	}
 
 	tracker_dbus_reply_with_query_result (rec, res);
 
 	tracker_db_free_result (res);
+	g_array_free (hit_counts, TRUE);
+	g_object_unref (tree);
 }
 
 

Modified: trunk/src/trackerd/tracker-indexer.c
==============================================================================
--- trunk/src/trackerd/tracker-indexer.c	(original)
+++ trunk/src/trackerd/tracker-indexer.c	Thu Apr 10 13:56:52 2008
@@ -50,10 +50,12 @@
 #include <libtracker-common/tracker-log.h>
 #include <libtracker-common/tracker-config.h>
 
+#include "tracker-query-tree.h"
 #include "tracker-indexer.h"
 #include "tracker-cache.h"
 #include "tracker-dbus.h"
 #include "tracker-service-manager.h"
+#include "tracker-query-tree.h"
 
 extern Tracker *tracker;
 
@@ -111,85 +113,6 @@
 }
 
 
-static inline SearchHit *
-word_details_to_search_hit (WordDetails *details)
-{
-	SearchHit *hit = g_slice_new (SearchHit);
-
-	hit->service_id = details->id;
-	hit->service_type_id = get_service_type (details);
-	hit->score = get_score (details);
-
-	return hit;
-}
-
-
-static inline SearchHit *
-copy_search_hit (SearchHit *src)
-{
-	SearchHit *hit = g_slice_new (SearchHit);
-
-	hit->service_id = src->service_id;
-	hit->service_type_id = src->service_type_id;
-	hit->score = src->score;
-
-	return hit;
-}
-
-
-static gint 
-compare_words (const void *a, const void *b)
-{
-	WordDetails *ap, *bp;
-
-	ap = (WordDetails *)a;
-	bp = (WordDetails *)b;
-
-	return (get_score(bp) - get_score(ap));
-}
-
-
-static gint 
-compare_search_hits (const void *a, const void *b)
-{
-	SearchHit *ap, *bp;
-
-	ap = (SearchHit *)a;
-	bp = (SearchHit *)b;
-
-	return (bp->score - ap->score);
-}
-
-
-static gint 
-compare_search_words (const void *a, const void *b)
-{
-	SearchWord *ap, *bp;
-
-	ap = (SearchWord *)a;
-	bp = (SearchWord *)b;
-
-	return (ap->hit_count - bp->hit_count);	
-}
-
-
-void		
-tracker_index_free_hit_list (GSList *hit_list)
-{
-	GSList *lst;
-
-	if (!hit_list) {
-                return;
-        }
-
-	for (lst = hit_list; lst; lst = lst->next) {
-                SearchHit *hit = lst->data;
-		g_slice_free (SearchHit, hit);
-	}
-
-	g_slist_free (hit_list);
-}
-
 static int
 get_preferred_bucket_count (Indexer *indexer)
 {
@@ -1092,9 +1015,33 @@
 	return i;
 }
 
+WordDetails *
+tracker_indexer_get_word_hits (Indexer     *indexer,
+			       const gchar *word,
+			       guint       *count)
+{
+	WordDetails *details;
+	gint tsiz;
+	gchar *tmp;
+
+	g_mutex_lock (indexer->word_mutex);
+
+	details = NULL;
+	*count = 0;
 
+	if ((tmp = dpget (indexer->word_index, word, -1, 0, MAX_HIT_BUFFER, &tsiz)) != NULL) {
+		if (tsiz >= (int) sizeof (WordDetails)) {
+			details = (WordDetails *) tmp;
+			*count = tsiz / sizeof (WordDetails);
+		}
+	}
 
-/* use to delete dud hits for a word - dud_list is a list of SearchHit structs */
+	g_mutex_unlock (indexer->word_mutex);
+
+	return details;
+}
+
+/* use to delete dud hits for a word - dud_list is a list of TrackerSearchHit structs */
 gboolean
 tracker_remove_dud_hits (Indexer *indexer, const gchar *word, GSList *dud_list)
 {
@@ -1126,7 +1073,7 @@
 
 				for (lst = dud_list; lst; lst = lst->next) {
 
-					SearchHit *hit = lst->data;
+					TrackerSearchHit *hit = lst->data;
 
 					if (hit) {
 						if (details[i].id == hit->service_id) {
@@ -1164,17 +1111,6 @@
 	return FALSE;
 }
 
-
-static gint
-get_idf_score (WordDetails *details, float idf)
-{
-	guint32 score = get_score (details);
-	float f = idf * score * SCORE_MULTIPLIER;
-
-        return (f > 1.0) ? lrintf (f) : 1;
-}
-
-
 static inline gint
 count_hit_size_for_word (Indexer *indexer, const gchar *word)
 {
@@ -1187,607 +1123,21 @@
 	return tsiz;
 }
 
-
-static inline gboolean
-in_array (gint *array, gint count, gint value)
-{
-	gint i;
-
-	for (i = 0; i < count; i++) {
-		if (array[i] == value) {
-			return TRUE;
-		}
-	}
-
-	return FALSE;
-}
-
-
-SearchQuery *
-tracker_create_query (Indexer *indexer, gint *service_array, gint service_array_count, gint offset, gint limit)
-{
-	SearchQuery *result = g_slice_new0 (SearchQuery);
-
-	result->indexer = indexer;
-	result->service_array = service_array;
-	result->service_array_count = service_array_count;
-	result->offset = offset;
-	result->limit = limit;	
-
-	return result;
-}
-
-
-void
-tracker_add_query_word (SearchQuery *query, const gchar *word, WordType word_type)
-{
-	if (!word || word[0] == 0 || (word[0] == ' ' && word[1] == 0)) {
-		return;
-	}
-
-	SearchWord *result = g_slice_new0 (SearchWord);
-
-	result->word = g_strdup (word);
-	result->hit_count = 0;
-	result->idf = 0;
-	result->word_type = word_type;
-
-	query->words = g_slist_prepend (query->words, result);
-}
-
-
-static void
-free_word (SearchWord *result)
-{
-	g_free (result->word);
-	g_slice_free (SearchWord, result);
-}
-
-
-void
-tracker_free_query (SearchQuery *query)
-{
-	tracker_index_free_hit_list (query->hits);
-
-	/* Do not free individual dud hits - dud SearchHit structs are always part
-           of the hit list so will already be freed when hit list is freed above */
-	g_slist_free (query->duds);
-
-	g_slist_foreach (query->words, (GFunc) free_word, NULL);
-	g_slist_free (query->words);
-
-	g_slice_free (SearchQuery, query);
-}
-
-
-static GSList *
-get_hits_for_single_word (SearchQuery *query, SearchWord *search_word, gint *return_hit_count)
-{
-	int  tsiz, total_count = 0;
-	char *tmp = NULL;
-	GSList *result = NULL;
-
-	int offset = query->offset;
-
-	/* some results might be dud so get an extra 50 to compensate */
-	int limit = query->limit + 50;
-	
-	if (tracker->shutdown) {
-                return NULL;
-        }
-
-	g_mutex_lock (query->indexer->word_mutex);
-
-	if ((tmp = dpget (query->indexer->word_index, search_word->word, -1, 0, MAX_HIT_BUFFER, &tsiz)) != NULL) {
-
-		g_mutex_unlock (query->indexer->word_mutex);
-
-		if (tsiz >= (int) sizeof (WordDetails)) {
-			WordDetails *details;
-			int pnum;
-
-			details = (WordDetails *) tmp;
-			
-			pnum = tsiz / sizeof (WordDetails);
-
-			tracker_debug ("total hit count (excluding service divisions) is %d", pnum);
-		
-			qsort (details, pnum, sizeof (WordDetails), compare_words);
-
-			int i;
-			for (i = 0; i < pnum; i++) {
-				int service;
-
-				service = get_service_type (&details[i]);
-
-				if (!query->service_array || in_array (query->service_array, query->service_array_count, service)) {
-
-					total_count++;
-
-					if (offset != 0) {
-						offset--;
-						continue;
-					}
-
-					if (limit > 0) {
-						SearchHit *hit;
-
-						hit = word_details_to_search_hit (&details[i]);
-
-						result = g_slist_prepend (result, hit);
-		
-						limit--;
-
-					} else {
-						continue;
-					}
-				}
-			}
-
-                        tracker_debug ("total hit count for service is %d", total_count);
-		}
-
-	} else {
-		g_mutex_unlock (query->indexer->word_mutex);
-	}
-
-	*return_hit_count = total_count;
-
-	g_free (tmp);
-
-	return g_slist_reverse (result);
-}
-
-
-static GHashTable *
-get_intermediate_hits (SearchQuery *query, GHashTable *match_table, SearchWord *search_word, BoolOp bool_op)
-{
-	int  tsiz;
-	char *tmp;
-	GHashTable *result;
-
-	if (tracker->shutdown) {
-                return NULL;
-        }
-
-	result = g_hash_table_new (NULL, NULL);
-
-	g_mutex_lock (query->indexer->word_mutex);
-
-	if ((tmp = dpget (query->indexer->word_index, search_word->word, -1, 0, MAX_HIT_BUFFER, &tsiz)) != NULL) {
-
-		g_mutex_unlock (query->indexer->word_mutex);
-
-		if (tsiz >= (int) sizeof (WordDetails)) {
-
-			WordDetails *details;
-			int count;
-
-			details = (WordDetails *) tmp;
-			
-			count = tsiz / sizeof (WordDetails);
-				
-			int i;
-			for (i = 0; i < count; i++) {
-
-				int service;
-				service = get_service_type (&details[i]);
-
-				if (!query->service_array || in_array (query->service_array, query->service_array_count, service)) {
-
-					if (match_table) {
-						gpointer pscore;
-						guint32 score;
-					
-						pscore = g_hash_table_lookup (match_table, GUINT_TO_POINTER (details[i].id));
-
-						if (bool_op == BoolAnd && !pscore) {
-							continue;
-						}
-
-						score = GPOINTER_TO_UINT (pscore) +  get_idf_score (&details[i], search_word->idf);
-
-						g_hash_table_insert (result, GUINT_TO_POINTER (details[i].id), GUINT_TO_POINTER (score));   	
-
-					} else {
-						int idf_score = get_idf_score (&details[i], search_word->idf);
-
-						g_hash_table_insert (result, GUINT_TO_POINTER (details[i].id), GUINT_TO_POINTER (idf_score));
-					}
-				}
-			}
-		}
-
-		g_free (tmp);
-
-	} else {
-		g_mutex_unlock (query->indexer->word_mutex);
-	}
-
-	if (match_table) {
-		g_hash_table_destroy (match_table);
-	}
-
-	return result;
-}
-
-
-static GSList *
-get_final_hits (SearchQuery *query, GHashTable *match_table, SearchWord *search_word, BoolOp bool_op, gint *return_hit_count)
-{
-	int  tsiz, rnum;
-	char *tmp;
-	SearchHit *result;
-	GSList *list;
-
-	int offset = query->offset;
-
-	/* some results might be dud so get an extra 50 to compensate */
-	int limit = query->limit + 50;
-
-	*return_hit_count = 0;
-
-	rnum = 0;
-	list = NULL;
-
-	if (tracker->shutdown) {
-                return NULL;
-        }
-
-	if (!match_table || g_hash_table_size (match_table) < 1) {
-		return NULL;
-	}
-				
-	g_mutex_lock (query->indexer->word_mutex);
-
-	if ((tmp = dpget (query->indexer->word_index, search_word->word, -1, 0, MAX_HIT_BUFFER, &tsiz)) != NULL) {
-
-		g_mutex_unlock (query->indexer->word_mutex);
-
-		if (tsiz >= (int) sizeof (WordDetails)) {
-
-			WordDetails *details;
-			int size, count;
-
-			details = (WordDetails *) tmp;
-			
-			count = tsiz / sizeof (WordDetails);
-	
-			size = g_hash_table_size (match_table);
-
-			if (bool_op == BoolAnd) {
-				result = g_malloc0 (sizeof (SearchHit) * size);
-			} else {
-				result = g_malloc0 (sizeof (SearchHit) * (size + count));
-			}
-				
-			int i;
-			for (i = 0; i < count; i++) {
-
-				int service;
-				service = get_service_type (&details[i]);
-
-				if (!query->service_array || in_array (query->service_array, query->service_array_count, service)) {
-
-					gpointer pscore;
-					int score;
-				
-					pscore = g_hash_table_lookup (match_table, GUINT_TO_POINTER (details[i].id));
-
-					if (bool_op == BoolAnd && !pscore) {
-						continue;
-					}
-
-					/* implements IDF * TF here  */
-					score = GPOINTER_TO_INT (pscore) +  get_idf_score (&details[i], search_word->idf);
-
-					result[rnum].service_id = details[i].id;
-					result[rnum].service_type_id = service;
-    					result[rnum].score = score;
-				    
-					rnum++;
-				}
-			}
-
-			qsort (result, rnum, sizeof (SearchHit), compare_search_hits);
-
-			*return_hit_count = rnum;
-
-			if (offset > rnum) {
-				tracker_log ("WARNING: offset is too big - no results will be returned for search!");
-				g_free (tmp);
-				g_free (result);
-				return NULL;
-			}
-
-			if ((limit + offset) < rnum) { 
-				count = limit + offset;
-			} else {
-				count = rnum;
-			}
-
-			for (i = offset; i < count; i++) {
-				list = g_slist_prepend (list, copy_search_hit (&result[i]));
-			}
-
-			g_free (result);
-		}
-
-		g_free (tmp);
-
-	} else {
-		g_mutex_unlock (query->indexer->word_mutex);
-	}
-
-	return g_slist_reverse (list);
-}
-
-
-gboolean
-tracker_indexer_get_hits (SearchQuery *query)
+guint8
+tracker_word_details_get_service_type (WordDetails *details)
 {
-	GHashTable *table;
-	GSList *lst;
-
-	if (tracker->shutdown) {
-                return FALSE;
-	}
-
-	g_return_val_if_fail (query->indexer, FALSE);
-	g_return_val_if_fail (query->limit > 0, FALSE);
-
-	if (!query->words) {
-		return TRUE;
-	}
-
-	/* do simple case of only one search word fast */
-	if (!query->words->next) {
-		SearchWord *word = query->words->data;
-
-		if (!word) {
-                        return FALSE;
-                }
-
-		query->hits = get_hits_for_single_word (query, word, &query->hit_count);
-
-		return TRUE;
-	}
-
-	/* calc stats for each word */
-	for (lst = query->words; lst; lst = lst->next) {
-		SearchWord *word = lst->data;
-
-		if (!word) {
-                        return FALSE;
-                }
-
-		word->hit_count = count_hit_size_for_word (query->indexer, word->word);
-		word->idf = 1.0/word->hit_count;
-
-		if (word->hit_count < 1) {
-			return FALSE;
-		}
-	}
-
-	/* do multiple word searches - start with words with fewest hits first */
-
-	query->words = g_slist_sort (query->words, (GCompareFunc) compare_search_words);
-
-	table = NULL;
-
-	for (lst = query->words; lst; lst = lst->next) {
-		SearchWord *word = lst->data;
-
-		if (!word) {
-                        return FALSE;
-                }
-
-		if (lst->next) {
-			table = get_intermediate_hits (query, table, word, BoolAnd);
-
-			if (g_hash_table_size (table) == 0) {
-				query->hit_count = 0;
-				g_hash_table_destroy (table);
-				return FALSE;
-			}
-
-		} else {
-			query->hits = get_final_hits (query, table, word, BoolAnd, &query->hit_count);
-
-			if (table) {
-				g_hash_table_destroy (table);
-			}
-				
-			return TRUE;
-		}
-        }
-
-	return FALSE;
-}
-
-
-static gint
-prepend_key_pointer (gpointer key, gpointer value, gpointer data)
-{
-  	GSList **plist = data;
-  	*plist = g_slist_prepend (*plist, key);
-  	return 1;
-}
-
-
-static GSList * 
-g_hash_table_key_slist (GHashTable *table)
-{
-  	GSList *rv = NULL;
-  	g_hash_table_foreach (table, (GHFunc) prepend_key_pointer, &rv);
-  	return rv;
-}
-
-
-static gint
-sort_func (gpointer a, gpointer b)
-{
-	return (GPOINTER_TO_UINT (a) - GPOINTER_TO_UINT (b));
-}
-
-
-gchar ***
-tracker_get_hit_counts (SearchQuery *query)
-{
-	GHashTable *table = g_hash_table_new (NULL, NULL);
-
-	if (tracker->shutdown) {
-                return NULL;
-	}
-
-	g_return_val_if_fail (query, NULL);
-        g_return_val_if_fail (query->words, NULL);
-
-	query->service_array = NULL;
-	query->service_array_count = 0;
-
-	query->hits = NULL;
-
-	if (tracker_indexer_get_hits (query)) {
-		GSList *tmp;
-
-		for (tmp = query->hits; tmp; tmp=tmp->next) {
-			SearchHit *hit;
-                        gpointer   data;
-			guint32    count;
-			gint       parent_id;
-
-                        hit = tmp->data;
-                        data = g_hash_table_lookup (table, GUINT_TO_POINTER (hit->service_type_id));
-                        count = GPOINTER_TO_UINT (data) + 1;
-
-			g_hash_table_insert (table, 
-                                             GUINT_TO_POINTER (hit->service_type_id), 
-                                             GUINT_TO_POINTER (count));
-
-
-			/* Update service's parent count too (if it has a parent) */
-			parent_id = tracker_service_manager_get_parent_id_for_service_id (hit->service_type_id);
-
-			if (parent_id != -1) {
-                                data = g_hash_table_lookup (table, GUINT_TO_POINTER (parent_id));
-                                count = GPOINTER_TO_UINT (data) + 1;
-	
-				g_hash_table_insert (table, 
-                                                     GUINT_TO_POINTER (parent_id), 
-                                                     GUINT_TO_POINTER (count));
-			}
-        	}
-		tracker_index_free_hit_list (query->hits);
-		query->hits = NULL;
-	}
-
-
-	/* search emails */
-
-	query->indexer = tracker->email_index;
-
-	if (tracker_indexer_get_hits (query)) {
-		GSList *tmp;
-	
-		for (tmp = query->hits; tmp; tmp=tmp->next) {
-			SearchHit *hit;
-                        gpointer   data;
-			guint32    count;
-			gint       parent_id;
-
-                        hit = tmp->data;
-                        data = g_hash_table_lookup (table, GUINT_TO_POINTER (hit->service_type_id));
-                        count = GPOINTER_TO_UINT (data) + 1;
-
-			g_hash_table_insert (table, 
-                                             GUINT_TO_POINTER (hit->service_type_id), 
-                                             GUINT_TO_POINTER (count));
-
-			/* update service's parent count too (if it has a parent) */
-			parent_id = tracker_service_manager_get_parent_id_for_service_id (hit->service_type_id);
-
-			if (parent_id != -1) {
-                                data = g_hash_table_lookup (table, GUINT_TO_POINTER (parent_id));
-				count = GPOINTER_TO_UINT (data) + 1;
-
-				g_hash_table_insert (table, 
-                                                     GUINT_TO_POINTER (parent_id), 
-                                                     GUINT_TO_POINTER (count));
-			}
-	        }
-
-		tracker_index_free_hit_list (query->hits);
-		query->hits = NULL;
-	}
-
-	query->indexer = tracker->file_index;
-
-	GSList *list, *lst;
-
-	list = g_hash_table_key_slist (table);
-
-	list = g_slist_sort (list, (GCompareFunc) sort_func);
-
-	gint len, i;
-	len = g_slist_length (list);
-	
-	gchar **res = g_new0 (gchar *, len + 1);
-
-	res[len] = NULL;
-
-	i = 0;
-
-	for (lst = list; i < len && lst; lst = lst->next) {
-                gpointer   data;
-		guint32    service;
-		guint32    count;
-		gchar    **row;
-
-		if (!lst || !lst->data) {
-			tracker_error ("ERROR: in get hit counts");
-			res[i] = NULL;
-			continue;
-		}
-
-                service = GPOINTER_TO_UINT (lst->data);
-                data = g_hash_table_lookup (table, GUINT_TO_POINTER (service));
-                count = GPOINTER_TO_UINT (data);
-
-                row = g_new0 (gchar *, 3);
-
-		row[0] = tracker_service_manager_get_service_by_id ((gint) service);
-		row[1] = tracker_uint_to_str (count);
-		row[2] = NULL;
-
-		res[i] = (gchar *)row;
-
-		i++;
-	}
-
-	g_slist_free (list);
-
-	g_hash_table_destroy (table);
-
-	return (gchar ***) res;
+	return (details->amalgamated >> 24) & 0xFF;
 }
 
-
-gint
-tracker_get_hit_count (SearchQuery *query)
+gint16
+tracker_word_details_get_score (WordDetails *details)
 {
-	if (tracker->shutdown) {
-                return 0;
-	}
+	unsigned char a[2];
 
-	g_return_val_if_fail (query, 0);
-        g_return_val_if_fail (query->words, 0);
+	a[0] = (details->amalgamated >> 16) & 0xFF;
+	a[1] = (details->amalgamated >> 8) & 0xFF;
 
-	if (!tracker_indexer_get_hits (query)) {
-		return 0;
-	} else {
-		return query->hit_count;
-	}
+	return (gint16) (a[0] << 8) | (a[1]);	
 }
 
 /* int levenshtein ()
@@ -1898,7 +1248,6 @@
         return hits;
 }
 
-
 char *
 tracker_indexer_get_suggestion (Indexer *indexer, const gchar *term, gint maxdist)
 {
@@ -1958,4 +1307,3 @@
 
         return winner_str;
 }
-

Modified: trunk/src/trackerd/tracker-indexer.h
==============================================================================
--- trunk/src/trackerd/tracker-indexer.h	(original)
+++ trunk/src/trackerd/tracker-indexer.h	Thu Apr 10 13:56:52 2008
@@ -32,13 +32,6 @@
 } WordDetails;
 
 
-typedef struct {                         	 
-	guint32 	service_id;              /* Service ID of the document */
-	guint32 	service_type_id;         /* Service type ID of the document */
-	guint32 	score;            	 /* Ranking score */
-} SearchHit;
-
-
 typedef enum {
 	WordNormal,
 	WordWildCard,
@@ -56,26 +49,6 @@
 
 typedef struct Indexer_ Indexer;
 
-typedef struct {                        
-	Indexer 	*indexer;
-	gint 		*service_array;    
-	gint 		service_array_count;
-	gint 		hit_count;
-	GSList	        *hits;
-	GSList		*words;
-	GSList		*duds;
-	gint		offset;
-	gint		limit;
-} SearchQuery;
-
-
-typedef enum {
-	BoolAnd,
-	BoolOr,
-	BoolNot
-} BoolOp;
-
-
 typedef enum 
 {
 	INDEX_TYPE_FILES,
@@ -84,13 +57,7 @@
 } IndexType;
 
 
-SearchQuery * 	tracker_create_query 			(Indexer *indexer, gint *service_array, gint service_array_count, gint offset, gint limit);
-void		tracker_free_query 			(SearchQuery *query);
-
-void		tracker_add_query_word 			(SearchQuery *query, const gchar *word, WordType word_type);
-
 guint32		tracker_indexer_calc_amalgamated 	(gint service, gint score);
-void		tracker_index_free_hit_list		(GSList *hit_list);
 
 Indexer * 	tracker_indexer_open 			(const gchar *name, gboolean main_index);
 void		tracker_indexer_close 			(Indexer *indexer);
@@ -117,13 +84,13 @@
 gboolean	tracker_indexer_update_word_chunk	(Indexer *indexer, const gchar *word, WordDetails *details, gint word_detail_count);
 gboolean	tracker_indexer_update_word_list 	(Indexer *indexer, const gchar *word, GSList *update_list);
 
-
-gboolean	tracker_indexer_get_hits 		(SearchQuery *query);
-gchar ***	tracker_get_hit_counts 			(SearchQuery *query);
-gint		tracker_get_hit_count 			(SearchQuery *query);
+WordDetails *   tracker_indexer_get_word_hits           (Indexer *indexer, const gchar *word, guint *count);
 
 gboolean	tracker_remove_dud_hits 		(Indexer *indexer, const gchar *word, GSList *dud_list);
 
 char *          tracker_indexer_get_suggestion          (Indexer *indexer, const gchar *term, gint maxdist);
 
+guint8          tracker_word_details_get_service_type   (WordDetails *details);
+gint16          tracker_word_details_get_score          (WordDetails *details);
+
 #endif

Added: trunk/src/trackerd/tracker-query-tree.c
==============================================================================
--- (empty file)
+++ trunk/src/trackerd/tracker-query-tree.c	Thu Apr 10 13:56:52 2008
@@ -0,0 +1,810 @@
+/* Tracker - indexer and metadata database engine
+ * Copyright (C) 2007-2008 Nokia
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ */
+
+#include <glib-object.h>
+#include <string.h>
+#include <math.h>
+#include <depot.h>
+#include "tracker-query-tree.h"
+#include "tracker-parser.h"
+#include "tracker-utils.h"
+#include "tracker-service-manager.h"
+
+#define TRACKER_QUERY_TREE_GET_PRIVATE(o) (G_TYPE_INSTANCE_GET_PRIVATE ((o), TRACKER_TYPE_QUERY_TREE, TrackerQueryTreePrivate))
+#define MAX_HIT_BUFFER 480000
+#define SCORE_MULTIPLIER 100000
+
+typedef enum OperationType OperationType;
+typedef enum TreeNodeType TreeNodeType;
+typedef struct TreeNode TreeNode;
+typedef struct TrackerQueryTreePrivate TrackerQueryTreePrivate;
+typedef struct ComposeHitsData ComposeHitsData;
+typedef struct SearchHitData SearchHitData;
+
+enum OperationType {
+	OP_NONE,
+	OP_AND,
+	OP_OR
+};
+
+struct TreeNode {
+	TreeNode *left;
+	TreeNode *right;
+	OperationType op;
+	gchar *term;
+};
+
+struct TrackerQueryTreePrivate {
+	gchar *query_str;
+	TreeNode *tree;
+	Indexer *indexer;
+	GArray *services;
+};
+
+struct ComposeHitsData {
+	OperationType op;
+	GHashTable *other_table;
+	GHashTable *dest_table;
+};
+
+struct SearchHitData {
+	guint32 service_type_id;
+	guint32 score;
+};
+
+enum {
+	PROP_0,
+	PROP_QUERY,
+	PROP_INDEXER,
+	PROP_SERVICES
+};
+
+static void tracker_query_tree_class_init (TrackerQueryTreeClass *class);
+static void tracker_query_tree_init       (TrackerQueryTree      *tree);
+static void tracker_query_tree_finalize   (GObject               *object);
+
+static void tracker_query_tree_set_property (GObject      *object,
+					     guint         prop_id,
+					     const GValue *value,
+					     GParamSpec   *pspec);
+static void tracker_query_tree_get_property (GObject      *object,
+					     guint         prop_id,
+					     GValue       *value,
+					     GParamSpec   *pspec);
+
+
+G_DEFINE_TYPE (TrackerQueryTree, tracker_query_tree, G_TYPE_OBJECT)
+
+static void
+tracker_query_tree_class_init (TrackerQueryTreeClass *klass)
+{
+	GObjectClass *object_class = G_OBJECT_CLASS (klass);
+
+	object_class->finalize = tracker_query_tree_finalize;
+	object_class->set_property = tracker_query_tree_set_property;
+	object_class->get_property = tracker_query_tree_get_property;
+
+	g_object_class_install_property (object_class,
+					 PROP_QUERY,
+					 g_param_spec_string ("query",
+							      "Query",
+							      "Query",
+							      NULL,
+							      G_PARAM_READWRITE));
+	g_object_class_install_property (object_class,
+					 PROP_INDEXER,
+					 g_param_spec_pointer ("indexer",
+							       "Indexer",
+							       "Indexer",
+							       G_PARAM_READWRITE));
+	g_object_class_install_property (object_class,
+					 PROP_SERVICES,
+					 g_param_spec_pointer ("services",
+							       "Services",
+							       "GArray of services",
+							       G_PARAM_READWRITE));
+	g_type_class_add_private (object_class,
+				  sizeof (TrackerQueryTreePrivate));
+}
+
+static void
+tracker_query_tree_init (TrackerQueryTree *tree)
+{
+}
+
+static TreeNode *
+tree_node_leaf_new (const gchar *term)
+{
+	TreeNode *node;
+
+	node = g_slice_new0 (TreeNode);
+	node->term = g_strdup (term);
+	node->op = OP_NONE;
+
+	return node;
+}
+
+static TreeNode *
+tree_node_operator_new (OperationType op)
+{
+	TreeNode *node;
+
+	node = g_slice_new0 (TreeNode);
+	node->op = op;
+
+	return node;
+}
+
+static void
+tree_node_free (TreeNode *node)
+{
+	if (!node)
+		return;
+
+	/* Free string if any */
+	g_free (node->term);
+
+	/* free subnodes */
+	tree_node_free (node->left);
+	tree_node_free (node->right);
+
+	g_slice_free (TreeNode, node);
+}
+
+static void
+tracker_query_tree_finalize (GObject *object)
+{
+	TrackerQueryTreePrivate *priv;
+
+	priv = TRACKER_QUERY_TREE_GET_PRIVATE (object);
+
+	tree_node_free (priv->tree);
+	g_free (priv->query_str);
+
+	G_OBJECT_CLASS (tracker_query_tree_parent_class)->finalize (object);
+}
+
+static void
+tracker_query_tree_set_property (GObject      *object,
+				 guint         prop_id,
+				 const GValue *value,
+				 GParamSpec   *pspec)
+{
+	switch (prop_id) {
+	case PROP_QUERY:
+		tracker_query_tree_set_query (TRACKER_QUERY_TREE (object),
+					      g_value_get_string (value));
+		break;
+	case PROP_INDEXER:
+		tracker_query_tree_set_indexer (TRACKER_QUERY_TREE (object),
+						g_value_get_pointer (value));
+		break;
+	case PROP_SERVICES:
+		tracker_query_tree_set_services (TRACKER_QUERY_TREE (object),
+						 g_value_get_pointer (value));
+		break;
+	default:
+		G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+	}
+}
+
+static void
+tracker_query_tree_get_property (GObject      *object,
+				 guint         prop_id,
+				 GValue       *value,
+				 GParamSpec   *pspec)
+{
+	TrackerQueryTreePrivate *priv;
+
+	priv = TRACKER_QUERY_TREE_GET_PRIVATE (object);
+
+	switch (prop_id) {
+	case PROP_QUERY:
+		g_value_set_string (value, priv->query_str);
+		break;
+	case PROP_INDEXER:
+		g_value_set_pointer (value, priv->indexer);
+		break;
+	case PROP_SERVICES:
+		g_value_set_pointer (value, priv->services);
+		break;
+	default:
+		G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+	}
+}
+
+TrackerQueryTree *
+tracker_query_tree_new (const gchar *query_str,
+			Indexer     *indexer,
+			GArray      *services)
+{
+	g_return_val_if_fail (query_str != NULL, NULL);
+	g_return_val_if_fail (indexer != NULL, NULL);
+
+	return g_object_new (TRACKER_TYPE_QUERY_TREE,
+			     "query", query_str,
+			     "indexer", indexer,
+			     "services", services,
+			     NULL);
+}
+
+# if 0
+static void
+print_tree (TreeNode *node)
+{
+	if (!node) {
+		g_print ("NULL ");
+		return;
+	}
+
+	switch (node->op) {
+	case OP_AND:
+		g_print ("AND ");
+		print_tree (node->left);
+		print_tree (node->right);
+		break;
+	case OP_OR:
+		g_print ("OR ");
+		print_tree (node->left);
+		print_tree (node->right);
+		break;
+	default:
+		g_print ("%s ", node->term);
+	}
+}
+#endif
+
+static void
+create_query_tree (TrackerQueryTree *tree)
+{
+	TrackerQueryTreePrivate *priv;
+	TreeNode *node, *stack_node;
+	GQueue *queue, *stack;
+	gboolean last_element_is_term = FALSE;
+	gchar *parsed_str, **strings;
+	gint i;
+
+	priv = TRACKER_QUERY_TREE_GET_PRIVATE (tree);
+
+	strings = g_strsplit (priv->query_str, " ", -1);
+	queue = g_queue_new ();
+	stack = g_queue_new ();
+
+	/* Create a parse tree that recognizes queries such as:
+	 * "foo"
+	 * "foo and bar"
+	 * "foo or bar"
+	 * "foo and bar or baz"
+	 * "foo or bar and baz"
+	 *
+	 * The operator "and" will have higher priority than the operator "or",
+	 * and will also be assumed to exist if there is no operator between two
+	 * search terms.
+	 */
+
+	/* Step 1. Create polish notation for the search term, the
+	 * stack will be used to store operators temporarily
+	 */
+	for (i = 0; strings[i]; i++) {
+		OperationType op;
+
+		if (!strings[i] || !*strings[i])
+			continue;
+
+		/* get operator type */
+		if (strcmp (strings[i], "and") == 0) {
+			op = OP_AND;
+		} else if (strcmp (strings [i], "or") == 0) {
+			op = OP_OR;
+		} else {
+			if (last_element_is_term) {
+				/* last element was a search term, assume the "and"
+				 * operator between these two elements, and wait
+				 * for actually parsing the second term */
+				op = OP_AND;
+				i--;
+			} else {
+				op = OP_NONE;
+			}
+		}
+
+		/* create node for the operation type */
+		switch (op) {
+		case OP_AND:
+			node = tree_node_operator_new (OP_AND);
+			stack_node = g_queue_peek_head (stack);
+
+			/* push in the queue operators with fewer priority, "or" in this case */
+			while (stack_node && stack_node->op == OP_OR) {
+				stack_node = g_queue_pop_head (stack);
+				g_queue_push_head (queue, stack_node);
+
+				stack_node = g_queue_peek_head (stack);
+			}
+
+			g_queue_push_head (stack, node);
+			last_element_is_term = FALSE;
+			break;
+		case OP_OR:
+			node = tree_node_operator_new (OP_OR);
+			g_queue_push_head (stack, node);
+			last_element_is_term = FALSE;
+			break;
+		default:
+			/* search term */
+			parsed_str = tracker_parse_text_to_string (strings[i], TRUE, FALSE, FALSE);
+			node = tree_node_leaf_new (g_strstrip (parsed_str));
+			g_queue_push_head (queue, node);
+			last_element_is_term = TRUE;
+
+			g_free (parsed_str);
+		}
+	}
+
+	/* we've finished parsing, queue all remaining operators in the stack */
+	while ((stack_node = g_queue_pop_head (stack)) != NULL) {
+		g_queue_push_head (queue, stack_node);
+	}
+
+	/* step 2: run through the reverse polish notation and connect nodes to
+	 * create a tree, the stack will be used to store temporarily nodes
+	 * until they're connected to a parent node */
+	while ((node = g_queue_pop_tail (queue)) != NULL) {
+		switch (node->op) {
+		case OP_AND:
+		case OP_OR:
+			node->left = g_queue_pop_head (stack);
+			node->right = g_queue_pop_head (stack);
+			g_queue_push_head (stack, node);
+			break;
+		default:
+			g_queue_push_head (stack, node);
+			break;
+		}
+
+		priv->tree = node;
+	}
+
+	g_strfreev (strings);
+	g_queue_free (stack);
+	g_queue_free (queue);
+}
+
+void
+tracker_query_tree_set_query (TrackerQueryTree *tree,
+			      const gchar      *query_str)
+{
+	TrackerQueryTreePrivate *priv;
+	gchar *str;
+
+	g_return_if_fail (TRACKER_IS_QUERY_TREE (tree));
+	g_return_if_fail (query_str != NULL);
+
+	priv = TRACKER_QUERY_TREE_GET_PRIVATE (tree);
+
+	str = g_strdup (query_str);
+	g_free (priv->query_str);
+	priv->query_str = str;
+
+	/* construct the parse tree */
+	create_query_tree (tree);
+
+	g_object_notify (G_OBJECT (tree), "query");
+}
+
+G_CONST_RETURN gchar *
+tracker_query_tree_get_query (TrackerQueryTree *tree)
+{
+	TrackerQueryTreePrivate *priv;
+
+	g_return_val_if_fail (TRACKER_IS_QUERY_TREE (tree), NULL);
+
+	priv = TRACKER_QUERY_TREE_GET_PRIVATE (tree);
+	return priv->query_str;
+}
+
+void
+tracker_query_tree_set_indexer (TrackerQueryTree *tree,
+				Indexer          *indexer)
+{
+	TrackerQueryTreePrivate *priv;
+
+	g_return_if_fail (TRACKER_IS_QUERY_TREE (tree));
+	g_return_if_fail (indexer != NULL);
+
+	priv = TRACKER_QUERY_TREE_GET_PRIVATE (tree);
+	priv->indexer = indexer;
+
+	g_object_notify (G_OBJECT (tree), "indexer");
+}
+
+Indexer *
+tracker_query_tree_get_indexer (TrackerQueryTree *tree)
+{
+	TrackerQueryTreePrivate *priv;
+
+	g_return_val_if_fail (TRACKER_IS_QUERY_TREE (tree), NULL);
+
+	priv = TRACKER_QUERY_TREE_GET_PRIVATE (tree);
+	return priv->indexer;
+}
+
+void
+tracker_query_tree_set_services (TrackerQueryTree *tree,
+				 GArray           *services)
+{
+	TrackerQueryTreePrivate *priv;
+	GArray *copy = NULL;
+
+	g_return_if_fail (TRACKER_IS_QUERY_TREE (tree));
+
+	priv = TRACKER_QUERY_TREE_GET_PRIVATE (tree);
+
+	if (priv->services != services) {
+		if (services) {
+			copy = g_array_new (TRUE, TRUE, sizeof (gint));
+			g_array_append_vals (copy, services->data, services->len);
+		}
+
+		if (priv->services)
+			g_array_free (priv->services, TRUE);
+
+		priv->services = copy;
+		g_object_notify (G_OBJECT (tree), "services");
+	}
+}
+
+GArray *
+tracker_query_tree_get_services (TrackerQueryTree *tree)
+{
+	TrackerQueryTreePrivate *priv;
+
+	g_return_val_if_fail (TRACKER_IS_QUERY_TREE (tree), NULL);
+
+	priv = TRACKER_QUERY_TREE_GET_PRIVATE (tree);
+	return priv->services;
+}
+
+static void
+get_tree_words (TreeNode *node, GSList **list)
+{
+	if (!node)
+		return;
+
+	if (node->op == OP_NONE)
+		*list = g_slist_prepend (*list, node->term);
+	else {
+		get_tree_words (node->left, list);
+		get_tree_words (node->right, list);
+	}
+}
+
+GSList *
+tracker_query_tree_get_words (TrackerQueryTree *tree)
+{
+	TrackerQueryTreePrivate *priv;
+	GSList *list = NULL;
+
+	g_return_val_if_fail (TRACKER_IS_QUERY_TREE (tree), NULL);
+
+	priv = TRACKER_QUERY_TREE_GET_PRIVATE (tree);
+	get_tree_words (priv->tree, &list);
+
+	return list;
+}
+
+static gint
+get_idf_score (WordDetails *details, float idf)
+{
+	guint32 score = tracker_word_details_get_score (details);
+	float f = idf * score * SCORE_MULTIPLIER;
+
+        return (f > 1.0) ? lrintf (f) : 1;
+}
+
+static gboolean
+in_array (GArray *array, gint element)
+{
+	guint i;
+
+	if (!array)
+		return TRUE;
+
+	for (i = 0; i < array->len; i++) {
+		if (g_array_index (array, gint, i) == element)
+			return TRUE;
+	}
+
+	return FALSE;
+}
+
+static void
+search_hit_data_free (SearchHitData *search_hit)
+{
+	g_slice_free (SearchHitData, search_hit);
+}
+
+static GHashTable *
+get_search_term_hits (TrackerQueryTree *tree,
+		      const gchar      *term)
+{
+	TrackerQueryTreePrivate *priv;
+	GHashTable *result;
+	WordDetails *details;
+	guint count, i;
+
+	priv = TRACKER_QUERY_TREE_GET_PRIVATE (tree);
+	result = g_hash_table_new_full (NULL, NULL, NULL,
+					(GDestroyNotify) search_hit_data_free);
+
+	details = tracker_indexer_get_word_hits (priv->indexer, term, &count);
+
+	if (!details)
+		return result;
+
+	for (i = 0; i < count; i++) {
+		SearchHitData *data;
+		gint service;
+
+		service = tracker_word_details_get_service_type (&details[i]);
+
+		if (in_array (priv->services, service)) {
+			data = g_slice_new (SearchHitData);
+			data->service_type_id = service;
+			data->score = get_idf_score (&details[i], (float) 1 / count);
+
+			g_hash_table_insert (result, GINT_TO_POINTER (details[i].id), data);
+		}
+	}
+
+	g_free (details);
+
+	return result;
+}
+
+static void
+compose_hits_foreach (gpointer key,
+		      gpointer value,
+		      gpointer user_data)
+{
+	SearchHitData *hit1, *hit2, *hit;
+	ComposeHitsData *data;
+
+	data = (ComposeHitsData *) user_data;
+	hit1 = (SearchHitData *) value;
+	hit2 = g_hash_table_lookup (data->other_table, key);
+
+	if (data->op == OP_OR) {
+		/* compose both scores in the same entry */
+		hit = g_slice_dup (SearchHitData, hit1);
+
+		if (hit2)
+			hit->score += hit2->score;
+
+		g_hash_table_insert (data->dest_table, key, hit);
+	} else if (data->op == OP_AND) {
+		/* only compose if the key is in both tables */
+		if (hit2) {
+			hit = g_slice_dup (SearchHitData, hit1);
+			hit->score += hit2->score;
+			g_hash_table_insert (data->dest_table, key, hit);
+		}
+	} else {
+		g_assert_not_reached ();
+	}
+}
+
+static GHashTable *
+compose_hits (OperationType  op,
+	      GHashTable    *left_table,
+	      GHashTable    *right_table)
+{
+	ComposeHitsData data;
+	GHashTable *foreach_table;
+
+	data.op = op;
+
+	/* try to run the foreach in the table with less hits */
+	if (g_hash_table_size (left_table) < g_hash_table_size (right_table)) {
+		foreach_table = left_table;
+		data.other_table = right_table;
+	} else {
+		foreach_table = right_table;
+		data.other_table = left_table;
+	}
+
+	if (op == OP_OR) {
+		data.dest_table = g_hash_table_ref (data.other_table);
+	} else {
+		data.dest_table = g_hash_table_new_full (NULL, NULL, NULL,
+							 (GDestroyNotify) search_hit_data_free);
+	}
+
+	g_hash_table_foreach (foreach_table, (GHFunc) compose_hits_foreach, &data);
+
+	return data.dest_table;
+}
+
+static GHashTable *
+get_node_hits (TrackerQueryTree *tree,
+	       TreeNode         *node)
+{
+	GHashTable *left_table, *right_table, *result;
+
+	if (!node)
+		return NULL;
+
+	switch (node->op) {
+	case OP_NONE:
+		result = get_search_term_hits (tree, node->term);
+		break;
+	case OP_AND:
+	case OP_OR:
+		left_table = get_node_hits (tree, node->left);
+		right_table = get_node_hits (tree, node->right);
+		result = compose_hits (node->op, left_table, right_table);
+
+		g_hash_table_unref (left_table);
+		g_hash_table_unref (right_table);
+		break;
+	default:
+		g_assert_not_reached ();
+	}
+
+	return result;
+}
+
+static void
+get_hits_foreach (gpointer key,
+		  gpointer value,
+		  gpointer user_data)
+{
+	GArray *array;
+	TrackerSearchHit hit;
+	SearchHitData *hit_data;
+
+	array = (GArray *) user_data;
+	hit_data = (SearchHitData *) value;
+
+	hit.service_id = GPOINTER_TO_UINT (key);
+	hit.service_type_id = hit_data->service_type_id;
+	hit.score = hit_data->score;
+
+	g_array_append_val (array, hit);
+}
+
+static gint
+compare_search_hits (gconstpointer a,
+		     gconstpointer b)
+{
+	TrackerSearchHit *ap, *bp;
+
+	ap = (TrackerSearchHit *) a;
+	bp = (TrackerSearchHit *) b;
+
+	return (bp->score - ap->score);
+}
+
+GArray *
+tracker_query_tree_get_hits (TrackerQueryTree *tree,
+			     guint             offset,
+			     guint             limit)
+{
+	TrackerQueryTreePrivate *priv;
+	GHashTable *table;
+	GArray *array;
+
+	g_return_val_if_fail (TRACKER_IS_QUERY_TREE (tree), NULL);
+
+	priv = TRACKER_QUERY_TREE_GET_PRIVATE (tree);
+
+	g_return_val_if_fail (priv->tree != NULL, NULL);
+
+	table = get_node_hits (tree, priv->tree);
+	array = g_array_sized_new (TRUE, TRUE, sizeof (TrackerSearchHit),
+				   g_hash_table_size (table));
+
+	g_hash_table_foreach (table, (GHFunc) get_hits_foreach, array);
+	g_array_sort (array, compare_search_hits);
+
+	if (offset > 0) {
+		g_array_remove_range (array, 0, CLAMP (offset, 0, array->len));
+	}
+
+	if (limit > 0 && limit < array->len) {
+		g_array_remove_range (array, limit, array->len - limit);
+	}
+
+	return array;
+}
+
+gint
+tracker_query_tree_get_hit_count (TrackerQueryTree *tree)
+{
+	TrackerQueryTreePrivate *priv;
+	GHashTable *table;
+	gint count;
+
+	g_return_val_if_fail (TRACKER_IS_QUERY_TREE (tree), 0);
+
+	priv = TRACKER_QUERY_TREE_GET_PRIVATE (tree);
+	table = get_node_hits (tree, priv->tree);
+
+	if (!table)
+		return 0;
+
+	count = g_hash_table_size (table);
+	g_hash_table_destroy (table);
+
+	return count;
+}
+
+static void
+get_hit_count_foreach (gpointer key,
+		       gpointer value,
+		       gpointer user_data)
+{
+	GArray *array = (GArray *) user_data;
+	TrackerHitCount count;
+
+	count.service_type_id = GPOINTER_TO_INT (key);
+	count.count = GPOINTER_TO_INT (value);
+
+	g_array_append_val (array, count);
+}
+
+GArray *
+tracker_query_tree_get_hit_counts (TrackerQueryTree *tree)
+{
+	GHashTable *table;
+	GArray *hits, *counts;
+	guint i;
+
+	g_return_val_if_fail (TRACKER_IS_QUERY_TREE (tree), NULL);
+
+	table = g_hash_table_new (NULL, NULL);
+	hits = tracker_query_tree_get_hits (tree, 0, 0);
+	counts = g_array_sized_new (TRUE, TRUE, sizeof (TrackerHitCount), 10);
+
+	for (i = 0; i < hits->len; i++) {
+		TrackerSearchHit hit;
+		gint count, parent_id;
+
+		hit = g_array_index (hits, TrackerSearchHit, i);
+		count = GPOINTER_TO_INT (g_hash_table_lookup (table, GINT_TO_POINTER (hit.service_type_id)));
+		count++;
+
+		g_hash_table_insert (table, GINT_TO_POINTER (hit.service_type_id), GINT_TO_POINTER (count));
+
+		/* update service's parent count too (if it has a parent) */
+		parent_id = tracker_service_manager_get_parent_id_for_service_id (hit.service_type_id);
+
+		if (parent_id != -1) {
+			count = GPOINTER_TO_INT (g_hash_table_lookup (table, GINT_TO_POINTER (parent_id)));
+			count++;
+
+			g_hash_table_insert (table, GINT_TO_POINTER (parent_id), GINT_TO_POINTER (count));
+		}
+	}
+
+	g_hash_table_foreach (table, (GHFunc) get_hit_count_foreach, counts);
+	g_array_free (hits, TRUE);
+
+	return counts;
+}

Added: trunk/src/trackerd/tracker-query-tree.h
==============================================================================
--- (empty file)
+++ trunk/src/trackerd/tracker-query-tree.h	Thu Apr 10 13:56:52 2008
@@ -0,0 +1,90 @@
+/* Tracker - indexer and metadata database engine
+ * Copyright (C) 2007 Nokia
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA  02110-1301, USA.
+ */
+
+#ifndef __TRACKER_QUERY_TREE_H__
+#define __TRACKER_QUERY_TREE_H__
+
+#include <glib.h>
+
+G_BEGIN_DECLS
+
+#include <glib-object.h>
+#include "tracker-indexer.h"
+
+#define TRACKER_TYPE_QUERY_TREE         (tracker_query_tree_get_type())
+#define TRACKER_QUERY_TREE(o)           (G_TYPE_CHECK_INSTANCE_CAST ((o), TRACKER_TYPE_QUERY_TREE, TrackerQueryTree))
+#define TRACKER_QUERY_TREE_CLASS(c)     (G_TYPE_CHECK_CLASS_CAST ((c),    TRACKER_TYPE_QUERY_TREE, TrackerQueryTreeClass))
+#define TRACKER_IS_QUERY_TREE(o)        (G_TYPE_CHECK_INSTANCE_TYPE ((o), TRACKER_TYPE_QUERY_TREE))
+#define TRACKER_IS_QUERY_TREE_CLASS(c)  (G_TYPE_CHECK_CLASS_TYPE ((c),    TRACKER_TYPE_QUERY_TREE))
+#define TRACKER_QUERY_TREE_GET_CLASS(o) (G_TYPE_INSTANCE_GET_CLASS ((o),  TRACKER_TYPE_QUERY_TREE, TrackerQueryTreeClass))
+
+typedef struct TrackerQueryTree TrackerQueryTree;
+typedef struct TrackerQueryTreeClass TrackerQueryTreeClass;
+typedef struct TrackerSearchHit TrackerSearchHit;
+typedef struct TrackerHitCount TrackerHitCount;
+
+struct TrackerQueryTree {
+	GObject parent;
+};
+
+struct TrackerQueryTreeClass {
+	GObjectClass parent_class;
+};
+
+struct TrackerSearchHit {
+	guint32 service_id;      /* Service ID of the document */
+	guint32 service_type_id; /* Service type ID of the document */
+	guint32 score;           /* Ranking score */
+};
+
+struct TrackerHitCount {
+	guint service_type_id;
+	guint count;
+};
+
+GType tracker_query_tree_get_type (void);
+
+TrackerQueryTree *     tracker_query_tree_new            (const gchar      *query_str,
+							  Indexer          *indexer,
+							  GArray           *services);
+
+G_CONST_RETURN gchar * tracker_query_tree_get_query      (TrackerQueryTree *tree);
+void                   tracker_query_tree_set_query      (TrackerQueryTree *tree,
+							  const gchar      *query_str);
+
+Indexer *              tracker_query_tree_get_indexer    (TrackerQueryTree *tree);
+void                   tracker_query_tree_set_indexer    (TrackerQueryTree *tree,
+							  Indexer          *indexer);
+
+GArray *               tracker_query_tree_get_services   (TrackerQueryTree *tree);
+void                   tracker_query_tree_set_services   (TrackerQueryTree *tree,
+							  GArray           *services);
+
+GSList *               tracker_query_tree_get_words      (TrackerQueryTree *tree);
+
+GArray *               tracker_query_tree_get_hits       (TrackerQueryTree *tree,
+							  guint             offset,
+							  guint             limit);
+gint                   tracker_query_tree_get_hit_count  (TrackerQueryTree *tree);
+GArray *               tracker_query_tree_get_hit_counts (TrackerQueryTree *tree);
+
+
+G_END_DECLS
+
+#endif /* __TRACKER_QUERY_TREE_H__ */



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]