tracker r1263 - in trunk: . src/trackerd
- From: carlosg svn gnome org
- To: svn-commits-list gnome org
- Subject: tracker r1263 - in trunk: . src/trackerd
- Date: Thu, 10 Apr 2008 13:56:53 +0100 (BST)
Author: carlosg
Date: Thu Apr 10 13:56:52 2008
New Revision: 1263
URL: http://svn.gnome.org/viewvc/tracker?rev=1263&view=rev
Log:
2008-04-10 Carlos Garnacho <carlos imendio com>
Add a query extension tree, at the moment supports And and Or
operations. Fixes #509607.
* src/trackerd/tracker-query-tree.[ch]: New files, implement the
expression tree, and return a hit list based on the evaluation of the
search terms with the logical expressions.
* src/trackerd/Makefile.am: Added these files.
* src/trackerd/tracker-indexer.[ch]: Remove SearchQuery related code,
implemented now by TrackerQueryTree.
* src/trackerd/tracker-db-sqlite.c:
* src/trackerd/tracker-dbus-search.c: Use new TrackerQueryTree API.
Added:
trunk/src/trackerd/tracker-query-tree.c
trunk/src/trackerd/tracker-query-tree.h
Modified:
trunk/ChangeLog
trunk/src/trackerd/Makefile.am
trunk/src/trackerd/tracker-db-sqlite.c
trunk/src/trackerd/tracker-dbus-search.c
trunk/src/trackerd/tracker-indexer.c
trunk/src/trackerd/tracker-indexer.h
Modified: trunk/src/trackerd/Makefile.am
==============================================================================
--- trunk/src/trackerd/Makefile.am (original)
+++ trunk/src/trackerd/Makefile.am Thu Apr 10 13:56:52 2008
@@ -87,6 +87,8 @@
tracker-process-files.h \
tracker-process-requests.c \
tracker-process-requests.h \
+ tracker-query-tree.c \
+ tracker-query-tree.h \
tracker-rdf-query.c \
tracker-rdf-query.h \
tracker-utils.c \
Modified: trunk/src/trackerd/tracker-db-sqlite.c
==============================================================================
--- trunk/src/trackerd/tracker-db-sqlite.c (original)
+++ trunk/src/trackerd/tracker-db-sqlite.c Thu Apr 10 13:56:52 2008
@@ -53,6 +53,7 @@
#include "tracker-utils.h"
#include "tracker-watch.h"
#include "tracker-service-manager.h"
+#include "tracker-query-tree.h"
#define MAX_TEXT_BUFFER 65567
#define MAX_COMPRESS_BUFFER 65565
@@ -2997,35 +2998,27 @@
{
}
-
-static void
-delete_dud (SearchWord *search_word, SearchQuery *query)
-{
- tracker_remove_dud_hits (query->indexer, search_word->word, query->duds);
-}
-
-
-
char ***
tracker_db_search_text (DBConnection *db_con, const char *service, const char *search_string, int offset, int limit, gboolean save_results, gboolean detailed)
{
+ TrackerQueryTree *tree;
char ***result, **array;
- GSList *hit_list;
+ GArray *hits;
int count;
- const GSList *tmp;
gboolean detailed_emails = FALSE, detailed_apps = FALSE;
int service_array[255];
const gchar *procedure;
+ GArray *services = NULL;
+ guint i, j;
+ GSList *duds = NULL;
array = tracker_parse_text_into_array (search_string);
char ***res = tracker_exec_proc (db_con->common, "GetRelatedServiceIDs", 2, service, service);
-
- int i = 0,j =0;
+ i = j = 0;
char **row;
if (res) {
-
while ((row = tracker_db_get_row (res, i))) {
if (row[0]) {
@@ -3035,34 +3028,20 @@
i++;
}
-
- tracker_db_free_result (res);
- }
-
- SearchQuery *query = tracker_create_query (db_con->word_index, service_array, j, offset, limit);
-
- char **pstr;
-
- for (pstr = array; *pstr; pstr++) {
- tracker_add_query_word (query, *pstr, WordNormal);
-
- }
- g_strfreev (array);
-
- if (!tracker_indexer_get_hits (query)) {
-
- tracker_free_query (query);
- return NULL;
+ service_array[j] = 0;
+ services = g_array_new (TRUE, TRUE, sizeof (gint));
+ g_array_append_vals (services, service_array, j);
+ tracker_db_free_result (res);
}
- hit_list = query->hits;
-
+ tree = tracker_query_tree_new (search_string, db_con->word_index, services);
+ hits = tracker_query_tree_get_hits (tree, offset, limit);
result = NULL;
if (!save_results) {
- count = g_slist_length (hit_list);
+ count = hits->len;
if (count > limit) count = limit;
@@ -3074,23 +3053,22 @@
count = 0;
- for (tmp = hit_list; tmp; tmp = tmp->next) {
-
- SearchHit *hit;
+ for (i = 0; i < hits->len; i++) {
+ TrackerSearchHit hit;
char *str_id;
char ***res;
if (count >= limit) break;
- hit = tmp->data;
+ hit = g_array_index (hits, TrackerSearchHit, i);
- str_id = tracker_uint_to_str (hit->service_id);
+ str_id = tracker_uint_to_str (hit.service_id);
/* we save results into SearchResults table instead of returing an array of array of strings */
if (save_results) {
char *str_score;
- str_score = tracker_int_to_str (hit->score);
+ str_score = tracker_int_to_str (hit.score);
tracker_exec_proc (db_con, "InsertSearchResult1", 2, str_id, str_score);
@@ -3141,7 +3119,7 @@
} else {
tracker_log ("dud hit for search detected");
/* add to dud list */
- query->duds = g_slist_prepend (query->duds, hit);
+ duds = g_slist_prepend (duds, &hit);
}
}
@@ -3153,11 +3131,23 @@
}
/* delete duds */
- if (query->duds) {
- g_slist_foreach (query->words, (GFunc) delete_dud, query);
+ if (duds) {
+ GSList *words, *w;
+ Indexer *indexer;
+
+ words = tracker_query_tree_get_words (tree);
+ indexer = tracker_query_tree_get_indexer (tree);
+
+ for (w = words; w; w = w->next) {
+ tracker_remove_dud_hits (indexer, (const gchar *) w->data, duds);
+ }
+
+ g_slist_free (words);
}
- tracker_free_query (query);
+ g_object_unref (tree);
+ g_array_free (hits, TRUE);
+ g_array_free (services, TRUE);
return (char ***)result;
}
@@ -4930,15 +4920,19 @@
char ***
tracker_db_search_text_mime (DBConnection *db_con, const char *text, char **mime_array, int n)
{
- char **result, **array;
- GSList *hit_list, *result_list;
+ TrackerQueryTree *tree;
+ GArray *hits;
+ char **result;
+ GSList *result_list;
const GSList *tmp;
+ guint i;
int count;
gint service_array[8];
+ GArray *services;
result = NULL;
result_list = NULL;
-
+
service_array[0] = tracker_service_manager_get_id_for_service ("Files");
service_array[1] = tracker_service_manager_get_id_for_service ("Folders");
service_array[2] = tracker_service_manager_get_id_for_service ("Documents");
@@ -4948,37 +4942,22 @@
service_array[6] = tracker_service_manager_get_id_for_service ("Text");
service_array[7] = tracker_service_manager_get_id_for_service ("Other");
- SearchQuery *query = tracker_create_query (db_con->word_index, service_array, 8, 0, 999999);
-
- array = tracker_parse_text_into_array (text);
+ services = g_array_new (TRUE, TRUE, sizeof (gint));
+ g_array_append_vals (services, service_array, 8);
- char **pstr;
-
- for (pstr = array; *pstr; pstr++) {
- tracker_add_query_word (query, *pstr, WordNormal);
- }
-
- g_strfreev (array);
-
- if (!tracker_indexer_get_hits (query)) {
-
- tracker_free_query (query);
- return NULL;
-
- }
-
- hit_list = query->hits;
+ tree = tracker_query_tree_new (text, db_con->word_index, services);
+ hits = tracker_query_tree_get_hits (tree, 0, 0);
count = 0;
- for (tmp = hit_list; tmp; tmp = tmp->next) {
- SearchHit *hit;
+ for (i = 0; i < hits->len; i++) {
+ TrackerSearchHit hit;
char *str_id;
char ***res;
- hit = tmp->data;
+ hit = g_array_index (hits, TrackerSearchHit, i);
- str_id = tracker_uint_to_str (hit->service_id);
+ str_id = tracker_uint_to_str (hit.service_id);
res = tracker_exec_proc (db_con, "GetFileByID", 1, str_id);
@@ -5018,8 +4997,6 @@
}
}
- tracker_free_query (query);
-
if (!result_list) {
return NULL;
}
@@ -5038,6 +5015,9 @@
}
g_slist_free (result_list);
+ g_object_unref (tree);
+ g_array_free (hits, TRUE);
+ g_array_free (services, TRUE);
return (char ***) result;
}
@@ -5046,16 +5026,19 @@
char ***
tracker_db_search_text_location (DBConnection *db_con, const char *text, const char *location)
{
+ TrackerQueryTree *tree;
+ GArray *hits;
char *location_prefix;
- char **result, **array;
- GSList *hit_list, *result_list;
+ char **result;
+ GSList *result_list;
const GSList *tmp;
int count;
- gint service_array[8];
+ gint service_array[8];
+ guint i;
+ GArray *services;
location_prefix = g_strconcat (location, G_DIR_SEPARATOR_S, NULL);
-
service_array[0] = tracker_service_manager_get_id_for_service ("Files");
service_array[1] = tracker_service_manager_get_id_for_service ("Folders");
service_array[2] = tracker_service_manager_get_id_for_service ("Documents");
@@ -5065,39 +5048,24 @@
service_array[6] = tracker_service_manager_get_id_for_service ("Text");
service_array[7] = tracker_service_manager_get_id_for_service ("Other");
- SearchQuery *query = tracker_create_query (db_con->word_index, service_array, 8, 0, 999999);
-
- array = tracker_parse_text_into_array (text);
+ services = g_array_new (TRUE, TRUE, sizeof (gint));
+ g_array_append_vals (services, service_array, 8);
- char **pstr;
-
- for (pstr = array; *pstr; pstr++) {
- tracker_add_query_word (query, *pstr, WordNormal);
- }
-
- g_strfreev (array);
-
- if (!tracker_indexer_get_hits (query)) {
-
- tracker_free_query (query);
- return NULL;
-
- }
-
- hit_list = query->hits;
+ tree = tracker_query_tree_new (text, db_con->word_index, services);
+ hits = tracker_query_tree_get_hits (tree, 0, 0);
result_list = NULL;
count = 0;
- for (tmp = hit_list; tmp; tmp = tmp->next) {
- SearchHit *hit;
+ for (i = 0; i < hits->len; i++) {
+ TrackerSearchHit hit;
char *str_id;
char ***res;
- hit = tmp->data;
+ hit = g_array_index (hits, TrackerSearchHit, i);
- str_id = tracker_uint_to_str (hit->service_id);
+ str_id = tracker_uint_to_str (hit.service_id);
res = tracker_exec_proc (db_con, "GetFileByID", 1, str_id);
@@ -5133,8 +5101,6 @@
g_free (location_prefix);
- tracker_free_query (query);
-
if (!result_list) {
return NULL;
}
@@ -5153,6 +5119,9 @@
}
g_slist_free (result_list);
+ g_object_unref (tree);
+ g_array_free (hits, TRUE);
+ g_array_free (services, TRUE);
return (char ***) result;
}
@@ -5161,16 +5130,19 @@
char ***
tracker_db_search_text_mime_location (DBConnection *db_con, const char *text, char **mime_array, int n, const char *location)
{
+ TrackerQueryTree *tree;
+ GArray *hits;
char *location_prefix;
- char **result, **array;
- GSList *hit_list, *result_list;
+ char **result;
+ GSList *result_list;
const GSList *tmp;
int count;
- gint service_array[8];
+ gint service_array[8];
+ guint i;
+ GArray *services;
location_prefix = g_strconcat (location, G_DIR_SEPARATOR_S, NULL);
-
service_array[0] = tracker_service_manager_get_id_for_service ("Files");
service_array[1] = tracker_service_manager_get_id_for_service ("Folders");
service_array[2] = tracker_service_manager_get_id_for_service ("Documents");
@@ -5180,39 +5152,24 @@
service_array[6] = tracker_service_manager_get_id_for_service ("Text");
service_array[7] = tracker_service_manager_get_id_for_service ("Other");
- SearchQuery *query = tracker_create_query (db_con->word_index, service_array, 8, 0, 999999);
-
- array = tracker_parse_text_into_array (text);
-
- char **pstr;
-
- for (pstr = array; *pstr; pstr++) {
- tracker_add_query_word (query, *pstr, WordNormal);
- }
-
- g_strfreev (array);
-
- if (!tracker_indexer_get_hits (query)) {
-
- tracker_free_query (query);
- return NULL;
-
- }
+ services = g_array_new (TRUE, TRUE, sizeof (gint));
+ g_array_append_vals (services, service_array, 8);
- hit_list = query->hits;
+ tree = tracker_query_tree_new (text, db_con->word_index, services);
+ hits = tracker_query_tree_get_hits (tree, 0, 0);
result_list = NULL;
count = 0;
- for (tmp = hit_list; tmp; tmp = tmp->next) {
- SearchHit *hit;
+ for (i = 0; i < hits->len; i++) {
+ TrackerSearchHit hit;
char *str_id;
char ***res;
- hit = tmp->data;
+ hit = g_array_index (hits, TrackerSearchHit, i);
- str_id = tracker_uint_to_str (hit->service_id);
+ str_id = tracker_uint_to_str (hit.service_id);
res = tracker_exec_proc (db_con, "GetFileByID", 1, str_id);
@@ -5257,8 +5214,6 @@
g_free (location_prefix);
- tracker_free_query (query);
-
if (!result_list) {
return NULL;
}
@@ -5276,6 +5231,9 @@
}
g_slist_free (result_list);
+ g_object_unref (tree);
+ g_array_free (hits, TRUE);
+ g_array_free (services, TRUE);
return (char ***) result;
}
Modified: trunk/src/trackerd/tracker-dbus-search.c
==============================================================================
--- trunk/src/trackerd/tracker-dbus-search.c (original)
+++ trunk/src/trackerd/tracker-dbus-search.c Thu Apr 10 13:56:52 2008
@@ -24,6 +24,7 @@
#include "tracker-dbus-methods.h"
#include "tracker-rdf-query.h"
+#include "tracker-query-tree.h"
#include "tracker-indexer.h"
#include "tracker-service-manager.h"
@@ -32,6 +33,7 @@
void
tracker_dbus_method_search_get_hit_count (DBusRec *rec)
{
+ TrackerQueryTree *tree;
DBConnection *db_con;
DBusError dbus_error;
gchar *service;
@@ -80,11 +82,10 @@
//tracker_log ("Executing GetHitCount with params %s, %s", service, str);
- gchar **array;
gint service_array[12];
+ GArray *services;
gint result;
DBusMessage *reply;
- int service_count = 1;
service_array[0] = tracker_service_manager_get_id_for_service (service);
@@ -97,38 +98,24 @@
service_array[6] = tracker_service_manager_get_id_for_service ("Text");
service_array[7] = tracker_service_manager_get_id_for_service ("Development");
service_array[8] = tracker_service_manager_get_id_for_service ("Other");
- service_count = 9;
-
+ service_array[9] = 0;
} else if (strcmp (service, "Emails") == 0) {
service_array[1] = tracker_service_manager_get_id_for_service ("EvolutionEmails");
service_array[2] = tracker_service_manager_get_id_for_service ("KMailEmails");
service_array[3] = tracker_service_manager_get_id_for_service ("ThunderbirdEmails");
service_array[4] = tracker_service_manager_get_id_for_service ("ModestEmails");
- service_count = 5;
-
+ service_array[5] = 0;
} else if (strcmp (service, "Conversations") == 0) {
service_array[1] = tracker_service_manager_get_id_for_service ("GaimConversations");
- service_count = 2;
+ service_array[2] = 0;
}
+ services = g_array_new (TRUE, TRUE, sizeof (gint));
+ g_array_append_vals (services, service_array, G_N_ELEMENTS (service_array));
db_con = tracker_db_get_service_connection (db_con, service);
-
- SearchQuery *query = tracker_create_query (db_con->word_index, service_array, service_count, 0, 999999);
-
- array = tracker_parse_text_into_array (str);
-
- gchar **pstr;
-
- for (pstr = array; *pstr; pstr++) {
- tracker_add_query_word (query, *pstr, WordNormal);
- }
-
- g_strfreev (array);
-
- result = tracker_get_hit_count (query);
-
- tracker_free_query (query);
+ tree = tracker_query_tree_new (str, db_con->word_index, services);
+ result = tracker_query_tree_get_hit_count (tree);
reply = dbus_message_new_method_return (rec->message);
@@ -140,15 +127,20 @@
dbus_connection_send (rec->connection, reply, NULL);
dbus_message_unref (reply);
+ g_object_unref (tree);
+ g_array_free (services, TRUE);
}
void
tracker_dbus_method_search_get_hit_count_all (DBusRec *rec)
{
+ TrackerQueryTree *tree;
+ GArray *hit_counts, *mail_hit_counts;
DBConnection *db_con;
DBusError dbus_error;
- gchar *str;
+ gchar ***res, *str;
+ guint i;
g_return_if_fail (rec);
g_return_if_fail (rec->user_data);
@@ -180,27 +172,35 @@
//tracker_log ("Executing detailed search with params %s, %s, %d, %d", service, str, offset, limit);
- gchar ***res, **array;
+ tree = tracker_query_tree_new (str, db_con->word_index, NULL);
+ hit_counts = tracker_query_tree_get_hit_counts (tree);
- SearchQuery *query = tracker_create_query (db_con->word_index, NULL, 0, 0, 999999);
+ tracker_query_tree_set_indexer (tree, tracker->email_index);
+ mail_hit_counts = tracker_query_tree_get_hit_counts (tree);
+ g_array_append_vals (hit_counts, mail_hit_counts->data, mail_hit_counts->len);
+ g_array_free (mail_hit_counts, TRUE);
- array = tracker_parse_text_into_array (str);
+ res = g_new0 (gchar **, hit_counts->len + 1);
- gchar **pstr;
+ for (i = 0; i < hit_counts->len; i++) {
+ TrackerHitCount count;
+ gchar **elem;
- for (pstr = array; *pstr; pstr++) {
- tracker_add_query_word (query, *pstr, WordNormal);
- }
-
- g_strfreev (array);
+ count = g_array_index (hit_counts, TrackerHitCount, i);
+ elem = g_new (char *, 3);
- res = tracker_get_hit_counts (query);
+ elem[0] = tracker_service_manager_get_service_by_id (count.service_type_id);
+ elem[1] = tracker_uint_to_str (count.count);
+ elem[2] = NULL;
- tracker_free_query (query);
+ res[i] = elem;
+ }
tracker_dbus_reply_with_query_result (rec, res);
tracker_db_free_result (res);
+ g_array_free (hit_counts, TRUE);
+ g_object_unref (tree);
}
Modified: trunk/src/trackerd/tracker-indexer.c
==============================================================================
--- trunk/src/trackerd/tracker-indexer.c (original)
+++ trunk/src/trackerd/tracker-indexer.c Thu Apr 10 13:56:52 2008
@@ -50,10 +50,12 @@
#include <libtracker-common/tracker-log.h>
#include <libtracker-common/tracker-config.h>
+#include "tracker-query-tree.h"
#include "tracker-indexer.h"
#include "tracker-cache.h"
#include "tracker-dbus.h"
#include "tracker-service-manager.h"
+#include "tracker-query-tree.h"
extern Tracker *tracker;
@@ -111,85 +113,6 @@
}
-static inline SearchHit *
-word_details_to_search_hit (WordDetails *details)
-{
- SearchHit *hit = g_slice_new (SearchHit);
-
- hit->service_id = details->id;
- hit->service_type_id = get_service_type (details);
- hit->score = get_score (details);
-
- return hit;
-}
-
-
-static inline SearchHit *
-copy_search_hit (SearchHit *src)
-{
- SearchHit *hit = g_slice_new (SearchHit);
-
- hit->service_id = src->service_id;
- hit->service_type_id = src->service_type_id;
- hit->score = src->score;
-
- return hit;
-}
-
-
-static gint
-compare_words (const void *a, const void *b)
-{
- WordDetails *ap, *bp;
-
- ap = (WordDetails *)a;
- bp = (WordDetails *)b;
-
- return (get_score(bp) - get_score(ap));
-}
-
-
-static gint
-compare_search_hits (const void *a, const void *b)
-{
- SearchHit *ap, *bp;
-
- ap = (SearchHit *)a;
- bp = (SearchHit *)b;
-
- return (bp->score - ap->score);
-}
-
-
-static gint
-compare_search_words (const void *a, const void *b)
-{
- SearchWord *ap, *bp;
-
- ap = (SearchWord *)a;
- bp = (SearchWord *)b;
-
- return (ap->hit_count - bp->hit_count);
-}
-
-
-void
-tracker_index_free_hit_list (GSList *hit_list)
-{
- GSList *lst;
-
- if (!hit_list) {
- return;
- }
-
- for (lst = hit_list; lst; lst = lst->next) {
- SearchHit *hit = lst->data;
- g_slice_free (SearchHit, hit);
- }
-
- g_slist_free (hit_list);
-}
-
static int
get_preferred_bucket_count (Indexer *indexer)
{
@@ -1092,9 +1015,33 @@
return i;
}
+WordDetails *
+tracker_indexer_get_word_hits (Indexer *indexer,
+ const gchar *word,
+ guint *count)
+{
+ WordDetails *details;
+ gint tsiz;
+ gchar *tmp;
+
+ g_mutex_lock (indexer->word_mutex);
+
+ details = NULL;
+ *count = 0;
+ if ((tmp = dpget (indexer->word_index, word, -1, 0, MAX_HIT_BUFFER, &tsiz)) != NULL) {
+ if (tsiz >= (int) sizeof (WordDetails)) {
+ details = (WordDetails *) tmp;
+ *count = tsiz / sizeof (WordDetails);
+ }
+ }
-/* use to delete dud hits for a word - dud_list is a list of SearchHit structs */
+ g_mutex_unlock (indexer->word_mutex);
+
+ return details;
+}
+
+/* use to delete dud hits for a word - dud_list is a list of TrackerSearchHit structs */
gboolean
tracker_remove_dud_hits (Indexer *indexer, const gchar *word, GSList *dud_list)
{
@@ -1126,7 +1073,7 @@
for (lst = dud_list; lst; lst = lst->next) {
- SearchHit *hit = lst->data;
+ TrackerSearchHit *hit = lst->data;
if (hit) {
if (details[i].id == hit->service_id) {
@@ -1164,17 +1111,6 @@
return FALSE;
}
-
-static gint
-get_idf_score (WordDetails *details, float idf)
-{
- guint32 score = get_score (details);
- float f = idf * score * SCORE_MULTIPLIER;
-
- return (f > 1.0) ? lrintf (f) : 1;
-}
-
-
static inline gint
count_hit_size_for_word (Indexer *indexer, const gchar *word)
{
@@ -1187,607 +1123,21 @@
return tsiz;
}
-
-static inline gboolean
-in_array (gint *array, gint count, gint value)
-{
- gint i;
-
- for (i = 0; i < count; i++) {
- if (array[i] == value) {
- return TRUE;
- }
- }
-
- return FALSE;
-}
-
-
-SearchQuery *
-tracker_create_query (Indexer *indexer, gint *service_array, gint service_array_count, gint offset, gint limit)
-{
- SearchQuery *result = g_slice_new0 (SearchQuery);
-
- result->indexer = indexer;
- result->service_array = service_array;
- result->service_array_count = service_array_count;
- result->offset = offset;
- result->limit = limit;
-
- return result;
-}
-
-
-void
-tracker_add_query_word (SearchQuery *query, const gchar *word, WordType word_type)
-{
- if (!word || word[0] == 0 || (word[0] == ' ' && word[1] == 0)) {
- return;
- }
-
- SearchWord *result = g_slice_new0 (SearchWord);
-
- result->word = g_strdup (word);
- result->hit_count = 0;
- result->idf = 0;
- result->word_type = word_type;
-
- query->words = g_slist_prepend (query->words, result);
-}
-
-
-static void
-free_word (SearchWord *result)
-{
- g_free (result->word);
- g_slice_free (SearchWord, result);
-}
-
-
-void
-tracker_free_query (SearchQuery *query)
-{
- tracker_index_free_hit_list (query->hits);
-
- /* Do not free individual dud hits - dud SearchHit structs are always part
- of the hit list so will already be freed when hit list is freed above */
- g_slist_free (query->duds);
-
- g_slist_foreach (query->words, (GFunc) free_word, NULL);
- g_slist_free (query->words);
-
- g_slice_free (SearchQuery, query);
-}
-
-
-static GSList *
-get_hits_for_single_word (SearchQuery *query, SearchWord *search_word, gint *return_hit_count)
-{
- int tsiz, total_count = 0;
- char *tmp = NULL;
- GSList *result = NULL;
-
- int offset = query->offset;
-
- /* some results might be dud so get an extra 50 to compensate */
- int limit = query->limit + 50;
-
- if (tracker->shutdown) {
- return NULL;
- }
-
- g_mutex_lock (query->indexer->word_mutex);
-
- if ((tmp = dpget (query->indexer->word_index, search_word->word, -1, 0, MAX_HIT_BUFFER, &tsiz)) != NULL) {
-
- g_mutex_unlock (query->indexer->word_mutex);
-
- if (tsiz >= (int) sizeof (WordDetails)) {
- WordDetails *details;
- int pnum;
-
- details = (WordDetails *) tmp;
-
- pnum = tsiz / sizeof (WordDetails);
-
- tracker_debug ("total hit count (excluding service divisions) is %d", pnum);
-
- qsort (details, pnum, sizeof (WordDetails), compare_words);
-
- int i;
- for (i = 0; i < pnum; i++) {
- int service;
-
- service = get_service_type (&details[i]);
-
- if (!query->service_array || in_array (query->service_array, query->service_array_count, service)) {
-
- total_count++;
-
- if (offset != 0) {
- offset--;
- continue;
- }
-
- if (limit > 0) {
- SearchHit *hit;
-
- hit = word_details_to_search_hit (&details[i]);
-
- result = g_slist_prepend (result, hit);
-
- limit--;
-
- } else {
- continue;
- }
- }
- }
-
- tracker_debug ("total hit count for service is %d", total_count);
- }
-
- } else {
- g_mutex_unlock (query->indexer->word_mutex);
- }
-
- *return_hit_count = total_count;
-
- g_free (tmp);
-
- return g_slist_reverse (result);
-}
-
-
-static GHashTable *
-get_intermediate_hits (SearchQuery *query, GHashTable *match_table, SearchWord *search_word, BoolOp bool_op)
-{
- int tsiz;
- char *tmp;
- GHashTable *result;
-
- if (tracker->shutdown) {
- return NULL;
- }
-
- result = g_hash_table_new (NULL, NULL);
-
- g_mutex_lock (query->indexer->word_mutex);
-
- if ((tmp = dpget (query->indexer->word_index, search_word->word, -1, 0, MAX_HIT_BUFFER, &tsiz)) != NULL) {
-
- g_mutex_unlock (query->indexer->word_mutex);
-
- if (tsiz >= (int) sizeof (WordDetails)) {
-
- WordDetails *details;
- int count;
-
- details = (WordDetails *) tmp;
-
- count = tsiz / sizeof (WordDetails);
-
- int i;
- for (i = 0; i < count; i++) {
-
- int service;
- service = get_service_type (&details[i]);
-
- if (!query->service_array || in_array (query->service_array, query->service_array_count, service)) {
-
- if (match_table) {
- gpointer pscore;
- guint32 score;
-
- pscore = g_hash_table_lookup (match_table, GUINT_TO_POINTER (details[i].id));
-
- if (bool_op == BoolAnd && !pscore) {
- continue;
- }
-
- score = GPOINTER_TO_UINT (pscore) + get_idf_score (&details[i], search_word->idf);
-
- g_hash_table_insert (result, GUINT_TO_POINTER (details[i].id), GUINT_TO_POINTER (score));
-
- } else {
- int idf_score = get_idf_score (&details[i], search_word->idf);
-
- g_hash_table_insert (result, GUINT_TO_POINTER (details[i].id), GUINT_TO_POINTER (idf_score));
- }
- }
- }
- }
-
- g_free (tmp);
-
- } else {
- g_mutex_unlock (query->indexer->word_mutex);
- }
-
- if (match_table) {
- g_hash_table_destroy (match_table);
- }
-
- return result;
-}
-
-
-static GSList *
-get_final_hits (SearchQuery *query, GHashTable *match_table, SearchWord *search_word, BoolOp bool_op, gint *return_hit_count)
-{
- int tsiz, rnum;
- char *tmp;
- SearchHit *result;
- GSList *list;
-
- int offset = query->offset;
-
- /* some results might be dud so get an extra 50 to compensate */
- int limit = query->limit + 50;
-
- *return_hit_count = 0;
-
- rnum = 0;
- list = NULL;
-
- if (tracker->shutdown) {
- return NULL;
- }
-
- if (!match_table || g_hash_table_size (match_table) < 1) {
- return NULL;
- }
-
- g_mutex_lock (query->indexer->word_mutex);
-
- if ((tmp = dpget (query->indexer->word_index, search_word->word, -1, 0, MAX_HIT_BUFFER, &tsiz)) != NULL) {
-
- g_mutex_unlock (query->indexer->word_mutex);
-
- if (tsiz >= (int) sizeof (WordDetails)) {
-
- WordDetails *details;
- int size, count;
-
- details = (WordDetails *) tmp;
-
- count = tsiz / sizeof (WordDetails);
-
- size = g_hash_table_size (match_table);
-
- if (bool_op == BoolAnd) {
- result = g_malloc0 (sizeof (SearchHit) * size);
- } else {
- result = g_malloc0 (sizeof (SearchHit) * (size + count));
- }
-
- int i;
- for (i = 0; i < count; i++) {
-
- int service;
- service = get_service_type (&details[i]);
-
- if (!query->service_array || in_array (query->service_array, query->service_array_count, service)) {
-
- gpointer pscore;
- int score;
-
- pscore = g_hash_table_lookup (match_table, GUINT_TO_POINTER (details[i].id));
-
- if (bool_op == BoolAnd && !pscore) {
- continue;
- }
-
- /* implements IDF * TF here */
- score = GPOINTER_TO_INT (pscore) + get_idf_score (&details[i], search_word->idf);
-
- result[rnum].service_id = details[i].id;
- result[rnum].service_type_id = service;
- result[rnum].score = score;
-
- rnum++;
- }
- }
-
- qsort (result, rnum, sizeof (SearchHit), compare_search_hits);
-
- *return_hit_count = rnum;
-
- if (offset > rnum) {
- tracker_log ("WARNING: offset is too big - no results will be returned for search!");
- g_free (tmp);
- g_free (result);
- return NULL;
- }
-
- if ((limit + offset) < rnum) {
- count = limit + offset;
- } else {
- count = rnum;
- }
-
- for (i = offset; i < count; i++) {
- list = g_slist_prepend (list, copy_search_hit (&result[i]));
- }
-
- g_free (result);
- }
-
- g_free (tmp);
-
- } else {
- g_mutex_unlock (query->indexer->word_mutex);
- }
-
- return g_slist_reverse (list);
-}
-
-
-gboolean
-tracker_indexer_get_hits (SearchQuery *query)
+guint8
+tracker_word_details_get_service_type (WordDetails *details)
{
- GHashTable *table;
- GSList *lst;
-
- if (tracker->shutdown) {
- return FALSE;
- }
-
- g_return_val_if_fail (query->indexer, FALSE);
- g_return_val_if_fail (query->limit > 0, FALSE);
-
- if (!query->words) {
- return TRUE;
- }
-
- /* do simple case of only one search word fast */
- if (!query->words->next) {
- SearchWord *word = query->words->data;
-
- if (!word) {
- return FALSE;
- }
-
- query->hits = get_hits_for_single_word (query, word, &query->hit_count);
-
- return TRUE;
- }
-
- /* calc stats for each word */
- for (lst = query->words; lst; lst = lst->next) {
- SearchWord *word = lst->data;
-
- if (!word) {
- return FALSE;
- }
-
- word->hit_count = count_hit_size_for_word (query->indexer, word->word);
- word->idf = 1.0/word->hit_count;
-
- if (word->hit_count < 1) {
- return FALSE;
- }
- }
-
- /* do multiple word searches - start with words with fewest hits first */
-
- query->words = g_slist_sort (query->words, (GCompareFunc) compare_search_words);
-
- table = NULL;
-
- for (lst = query->words; lst; lst = lst->next) {
- SearchWord *word = lst->data;
-
- if (!word) {
- return FALSE;
- }
-
- if (lst->next) {
- table = get_intermediate_hits (query, table, word, BoolAnd);
-
- if (g_hash_table_size (table) == 0) {
- query->hit_count = 0;
- g_hash_table_destroy (table);
- return FALSE;
- }
-
- } else {
- query->hits = get_final_hits (query, table, word, BoolAnd, &query->hit_count);
-
- if (table) {
- g_hash_table_destroy (table);
- }
-
- return TRUE;
- }
- }
-
- return FALSE;
-}
-
-
-static gint
-prepend_key_pointer (gpointer key, gpointer value, gpointer data)
-{
- GSList **plist = data;
- *plist = g_slist_prepend (*plist, key);
- return 1;
-}
-
-
-static GSList *
-g_hash_table_key_slist (GHashTable *table)
-{
- GSList *rv = NULL;
- g_hash_table_foreach (table, (GHFunc) prepend_key_pointer, &rv);
- return rv;
-}
-
-
-static gint
-sort_func (gpointer a, gpointer b)
-{
- return (GPOINTER_TO_UINT (a) - GPOINTER_TO_UINT (b));
-}
-
-
-gchar ***
-tracker_get_hit_counts (SearchQuery *query)
-{
- GHashTable *table = g_hash_table_new (NULL, NULL);
-
- if (tracker->shutdown) {
- return NULL;
- }
-
- g_return_val_if_fail (query, NULL);
- g_return_val_if_fail (query->words, NULL);
-
- query->service_array = NULL;
- query->service_array_count = 0;
-
- query->hits = NULL;
-
- if (tracker_indexer_get_hits (query)) {
- GSList *tmp;
-
- for (tmp = query->hits; tmp; tmp=tmp->next) {
- SearchHit *hit;
- gpointer data;
- guint32 count;
- gint parent_id;
-
- hit = tmp->data;
- data = g_hash_table_lookup (table, GUINT_TO_POINTER (hit->service_type_id));
- count = GPOINTER_TO_UINT (data) + 1;
-
- g_hash_table_insert (table,
- GUINT_TO_POINTER (hit->service_type_id),
- GUINT_TO_POINTER (count));
-
-
- /* Update service's parent count too (if it has a parent) */
- parent_id = tracker_service_manager_get_parent_id_for_service_id (hit->service_type_id);
-
- if (parent_id != -1) {
- data = g_hash_table_lookup (table, GUINT_TO_POINTER (parent_id));
- count = GPOINTER_TO_UINT (data) + 1;
-
- g_hash_table_insert (table,
- GUINT_TO_POINTER (parent_id),
- GUINT_TO_POINTER (count));
- }
- }
- tracker_index_free_hit_list (query->hits);
- query->hits = NULL;
- }
-
-
- /* search emails */
-
- query->indexer = tracker->email_index;
-
- if (tracker_indexer_get_hits (query)) {
- GSList *tmp;
-
- for (tmp = query->hits; tmp; tmp=tmp->next) {
- SearchHit *hit;
- gpointer data;
- guint32 count;
- gint parent_id;
-
- hit = tmp->data;
- data = g_hash_table_lookup (table, GUINT_TO_POINTER (hit->service_type_id));
- count = GPOINTER_TO_UINT (data) + 1;
-
- g_hash_table_insert (table,
- GUINT_TO_POINTER (hit->service_type_id),
- GUINT_TO_POINTER (count));
-
- /* update service's parent count too (if it has a parent) */
- parent_id = tracker_service_manager_get_parent_id_for_service_id (hit->service_type_id);
-
- if (parent_id != -1) {
- data = g_hash_table_lookup (table, GUINT_TO_POINTER (parent_id));
- count = GPOINTER_TO_UINT (data) + 1;
-
- g_hash_table_insert (table,
- GUINT_TO_POINTER (parent_id),
- GUINT_TO_POINTER (count));
- }
- }
-
- tracker_index_free_hit_list (query->hits);
- query->hits = NULL;
- }
-
- query->indexer = tracker->file_index;
-
- GSList *list, *lst;
-
- list = g_hash_table_key_slist (table);
-
- list = g_slist_sort (list, (GCompareFunc) sort_func);
-
- gint len, i;
- len = g_slist_length (list);
-
- gchar **res = g_new0 (gchar *, len + 1);
-
- res[len] = NULL;
-
- i = 0;
-
- for (lst = list; i < len && lst; lst = lst->next) {
- gpointer data;
- guint32 service;
- guint32 count;
- gchar **row;
-
- if (!lst || !lst->data) {
- tracker_error ("ERROR: in get hit counts");
- res[i] = NULL;
- continue;
- }
-
- service = GPOINTER_TO_UINT (lst->data);
- data = g_hash_table_lookup (table, GUINT_TO_POINTER (service));
- count = GPOINTER_TO_UINT (data);
-
- row = g_new0 (gchar *, 3);
-
- row[0] = tracker_service_manager_get_service_by_id ((gint) service);
- row[1] = tracker_uint_to_str (count);
- row[2] = NULL;
-
- res[i] = (gchar *)row;
-
- i++;
- }
-
- g_slist_free (list);
-
- g_hash_table_destroy (table);
-
- return (gchar ***) res;
+ return (details->amalgamated >> 24) & 0xFF;
}
-
-gint
-tracker_get_hit_count (SearchQuery *query)
+gint16
+tracker_word_details_get_score (WordDetails *details)
{
- if (tracker->shutdown) {
- return 0;
- }
+ unsigned char a[2];
- g_return_val_if_fail (query, 0);
- g_return_val_if_fail (query->words, 0);
+ a[0] = (details->amalgamated >> 16) & 0xFF;
+ a[1] = (details->amalgamated >> 8) & 0xFF;
- if (!tracker_indexer_get_hits (query)) {
- return 0;
- } else {
- return query->hit_count;
- }
+ return (gint16) (a[0] << 8) | (a[1]);
}
/* int levenshtein ()
@@ -1898,7 +1248,6 @@
return hits;
}
-
char *
tracker_indexer_get_suggestion (Indexer *indexer, const gchar *term, gint maxdist)
{
@@ -1958,4 +1307,3 @@
return winner_str;
}
-
Modified: trunk/src/trackerd/tracker-indexer.h
==============================================================================
--- trunk/src/trackerd/tracker-indexer.h (original)
+++ trunk/src/trackerd/tracker-indexer.h Thu Apr 10 13:56:52 2008
@@ -32,13 +32,6 @@
} WordDetails;
-typedef struct {
- guint32 service_id; /* Service ID of the document */
- guint32 service_type_id; /* Service type ID of the document */
- guint32 score; /* Ranking score */
-} SearchHit;
-
-
typedef enum {
WordNormal,
WordWildCard,
@@ -56,26 +49,6 @@
typedef struct Indexer_ Indexer;
-typedef struct {
- Indexer *indexer;
- gint *service_array;
- gint service_array_count;
- gint hit_count;
- GSList *hits;
- GSList *words;
- GSList *duds;
- gint offset;
- gint limit;
-} SearchQuery;
-
-
-typedef enum {
- BoolAnd,
- BoolOr,
- BoolNot
-} BoolOp;
-
-
typedef enum
{
INDEX_TYPE_FILES,
@@ -84,13 +57,7 @@
} IndexType;
-SearchQuery * tracker_create_query (Indexer *indexer, gint *service_array, gint service_array_count, gint offset, gint limit);
-void tracker_free_query (SearchQuery *query);
-
-void tracker_add_query_word (SearchQuery *query, const gchar *word, WordType word_type);
-
guint32 tracker_indexer_calc_amalgamated (gint service, gint score);
-void tracker_index_free_hit_list (GSList *hit_list);
Indexer * tracker_indexer_open (const gchar *name, gboolean main_index);
void tracker_indexer_close (Indexer *indexer);
@@ -117,13 +84,13 @@
gboolean tracker_indexer_update_word_chunk (Indexer *indexer, const gchar *word, WordDetails *details, gint word_detail_count);
gboolean tracker_indexer_update_word_list (Indexer *indexer, const gchar *word, GSList *update_list);
-
-gboolean tracker_indexer_get_hits (SearchQuery *query);
-gchar *** tracker_get_hit_counts (SearchQuery *query);
-gint tracker_get_hit_count (SearchQuery *query);
+WordDetails * tracker_indexer_get_word_hits (Indexer *indexer, const gchar *word, guint *count);
gboolean tracker_remove_dud_hits (Indexer *indexer, const gchar *word, GSList *dud_list);
char * tracker_indexer_get_suggestion (Indexer *indexer, const gchar *term, gint maxdist);
+guint8 tracker_word_details_get_service_type (WordDetails *details);
+gint16 tracker_word_details_get_score (WordDetails *details);
+
#endif
Added: trunk/src/trackerd/tracker-query-tree.c
==============================================================================
--- (empty file)
+++ trunk/src/trackerd/tracker-query-tree.c Thu Apr 10 13:56:52 2008
@@ -0,0 +1,810 @@
+/* Tracker - indexer and metadata database engine
+ * Copyright (C) 2007-2008 Nokia
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#include <glib-object.h>
+#include <string.h>
+#include <math.h>
+#include <depot.h>
+#include "tracker-query-tree.h"
+#include "tracker-parser.h"
+#include "tracker-utils.h"
+#include "tracker-service-manager.h"
+
+#define TRACKER_QUERY_TREE_GET_PRIVATE(o) (G_TYPE_INSTANCE_GET_PRIVATE ((o), TRACKER_TYPE_QUERY_TREE, TrackerQueryTreePrivate))
+#define MAX_HIT_BUFFER 480000
+#define SCORE_MULTIPLIER 100000
+
+typedef enum OperationType OperationType;
+typedef enum TreeNodeType TreeNodeType;
+typedef struct TreeNode TreeNode;
+typedef struct TrackerQueryTreePrivate TrackerQueryTreePrivate;
+typedef struct ComposeHitsData ComposeHitsData;
+typedef struct SearchHitData SearchHitData;
+
+enum OperationType {
+ OP_NONE,
+ OP_AND,
+ OP_OR
+};
+
+struct TreeNode {
+ TreeNode *left;
+ TreeNode *right;
+ OperationType op;
+ gchar *term;
+};
+
+struct TrackerQueryTreePrivate {
+ gchar *query_str;
+ TreeNode *tree;
+ Indexer *indexer;
+ GArray *services;
+};
+
+struct ComposeHitsData {
+ OperationType op;
+ GHashTable *other_table;
+ GHashTable *dest_table;
+};
+
+struct SearchHitData {
+ guint32 service_type_id;
+ guint32 score;
+};
+
+enum {
+ PROP_0,
+ PROP_QUERY,
+ PROP_INDEXER,
+ PROP_SERVICES
+};
+
+static void tracker_query_tree_class_init (TrackerQueryTreeClass *class);
+static void tracker_query_tree_init (TrackerQueryTree *tree);
+static void tracker_query_tree_finalize (GObject *object);
+
+static void tracker_query_tree_set_property (GObject *object,
+ guint prop_id,
+ const GValue *value,
+ GParamSpec *pspec);
+static void tracker_query_tree_get_property (GObject *object,
+ guint prop_id,
+ GValue *value,
+ GParamSpec *pspec);
+
+
+G_DEFINE_TYPE (TrackerQueryTree, tracker_query_tree, G_TYPE_OBJECT)
+
+static void
+tracker_query_tree_class_init (TrackerQueryTreeClass *klass)
+{
+ GObjectClass *object_class = G_OBJECT_CLASS (klass);
+
+ object_class->finalize = tracker_query_tree_finalize;
+ object_class->set_property = tracker_query_tree_set_property;
+ object_class->get_property = tracker_query_tree_get_property;
+
+ g_object_class_install_property (object_class,
+ PROP_QUERY,
+ g_param_spec_string ("query",
+ "Query",
+ "Query",
+ NULL,
+ G_PARAM_READWRITE));
+ g_object_class_install_property (object_class,
+ PROP_INDEXER,
+ g_param_spec_pointer ("indexer",
+ "Indexer",
+ "Indexer",
+ G_PARAM_READWRITE));
+ g_object_class_install_property (object_class,
+ PROP_SERVICES,
+ g_param_spec_pointer ("services",
+ "Services",
+ "GArray of services",
+ G_PARAM_READWRITE));
+ g_type_class_add_private (object_class,
+ sizeof (TrackerQueryTreePrivate));
+}
+
+static void
+tracker_query_tree_init (TrackerQueryTree *tree)
+{
+}
+
+static TreeNode *
+tree_node_leaf_new (const gchar *term)
+{
+ TreeNode *node;
+
+ node = g_slice_new0 (TreeNode);
+ node->term = g_strdup (term);
+ node->op = OP_NONE;
+
+ return node;
+}
+
+static TreeNode *
+tree_node_operator_new (OperationType op)
+{
+ TreeNode *node;
+
+ node = g_slice_new0 (TreeNode);
+ node->op = op;
+
+ return node;
+}
+
+static void
+tree_node_free (TreeNode *node)
+{
+ if (!node)
+ return;
+
+ /* Free string if any */
+ g_free (node->term);
+
+ /* free subnodes */
+ tree_node_free (node->left);
+ tree_node_free (node->right);
+
+ g_slice_free (TreeNode, node);
+}
+
+static void
+tracker_query_tree_finalize (GObject *object)
+{
+ TrackerQueryTreePrivate *priv;
+
+ priv = TRACKER_QUERY_TREE_GET_PRIVATE (object);
+
+ tree_node_free (priv->tree);
+ g_free (priv->query_str);
+
+ G_OBJECT_CLASS (tracker_query_tree_parent_class)->finalize (object);
+}
+
+static void
+tracker_query_tree_set_property (GObject *object,
+ guint prop_id,
+ const GValue *value,
+ GParamSpec *pspec)
+{
+ switch (prop_id) {
+ case PROP_QUERY:
+ tracker_query_tree_set_query (TRACKER_QUERY_TREE (object),
+ g_value_get_string (value));
+ break;
+ case PROP_INDEXER:
+ tracker_query_tree_set_indexer (TRACKER_QUERY_TREE (object),
+ g_value_get_pointer (value));
+ break;
+ case PROP_SERVICES:
+ tracker_query_tree_set_services (TRACKER_QUERY_TREE (object),
+ g_value_get_pointer (value));
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+ }
+}
+
+static void
+tracker_query_tree_get_property (GObject *object,
+ guint prop_id,
+ GValue *value,
+ GParamSpec *pspec)
+{
+ TrackerQueryTreePrivate *priv;
+
+ priv = TRACKER_QUERY_TREE_GET_PRIVATE (object);
+
+ switch (prop_id) {
+ case PROP_QUERY:
+ g_value_set_string (value, priv->query_str);
+ break;
+ case PROP_INDEXER:
+ g_value_set_pointer (value, priv->indexer);
+ break;
+ case PROP_SERVICES:
+ g_value_set_pointer (value, priv->services);
+ break;
+ default:
+ G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+ }
+}
+
+TrackerQueryTree *
+tracker_query_tree_new (const gchar *query_str,
+ Indexer *indexer,
+ GArray *services)
+{
+ g_return_val_if_fail (query_str != NULL, NULL);
+ g_return_val_if_fail (indexer != NULL, NULL);
+
+ return g_object_new (TRACKER_TYPE_QUERY_TREE,
+ "query", query_str,
+ "indexer", indexer,
+ "services", services,
+ NULL);
+}
+
+# if 0
+static void
+print_tree (TreeNode *node)
+{
+ if (!node) {
+ g_print ("NULL ");
+ return;
+ }
+
+ switch (node->op) {
+ case OP_AND:
+ g_print ("AND ");
+ print_tree (node->left);
+ print_tree (node->right);
+ break;
+ case OP_OR:
+ g_print ("OR ");
+ print_tree (node->left);
+ print_tree (node->right);
+ break;
+ default:
+ g_print ("%s ", node->term);
+ }
+}
+#endif
+
+static void
+create_query_tree (TrackerQueryTree *tree)
+{
+ TrackerQueryTreePrivate *priv;
+ TreeNode *node, *stack_node;
+ GQueue *queue, *stack;
+ gboolean last_element_is_term = FALSE;
+ gchar *parsed_str, **strings;
+ gint i;
+
+ priv = TRACKER_QUERY_TREE_GET_PRIVATE (tree);
+
+ strings = g_strsplit (priv->query_str, " ", -1);
+ queue = g_queue_new ();
+ stack = g_queue_new ();
+
+ /* Create a parse tree that recognizes queries such as:
+ * "foo"
+ * "foo and bar"
+ * "foo or bar"
+ * "foo and bar or baz"
+ * "foo or bar and baz"
+ *
+ * The operator "and" will have higher priority than the operator "or",
+ * and will also be assumed to exist if there is no operator between two
+ * search terms.
+ */
+
+ /* Step 1. Create polish notation for the search term, the
+ * stack will be used to store operators temporarily
+ */
+ for (i = 0; strings[i]; i++) {
+ OperationType op;
+
+ if (!strings[i] || !*strings[i])
+ continue;
+
+ /* get operator type */
+ if (strcmp (strings[i], "and") == 0) {
+ op = OP_AND;
+ } else if (strcmp (strings [i], "or") == 0) {
+ op = OP_OR;
+ } else {
+ if (last_element_is_term) {
+ /* last element was a search term, assume the "and"
+ * operator between these two elements, and wait
+ * for actually parsing the second term */
+ op = OP_AND;
+ i--;
+ } else {
+ op = OP_NONE;
+ }
+ }
+
+ /* create node for the operation type */
+ switch (op) {
+ case OP_AND:
+ node = tree_node_operator_new (OP_AND);
+ stack_node = g_queue_peek_head (stack);
+
+ /* push in the queue operators with fewer priority, "or" in this case */
+ while (stack_node && stack_node->op == OP_OR) {
+ stack_node = g_queue_pop_head (stack);
+ g_queue_push_head (queue, stack_node);
+
+ stack_node = g_queue_peek_head (stack);
+ }
+
+ g_queue_push_head (stack, node);
+ last_element_is_term = FALSE;
+ break;
+ case OP_OR:
+ node = tree_node_operator_new (OP_OR);
+ g_queue_push_head (stack, node);
+ last_element_is_term = FALSE;
+ break;
+ default:
+ /* search term */
+ parsed_str = tracker_parse_text_to_string (strings[i], TRUE, FALSE, FALSE);
+ node = tree_node_leaf_new (g_strstrip (parsed_str));
+ g_queue_push_head (queue, node);
+ last_element_is_term = TRUE;
+
+ g_free (parsed_str);
+ }
+ }
+
+ /* we've finished parsing, queue all remaining operators in the stack */
+ while ((stack_node = g_queue_pop_head (stack)) != NULL) {
+ g_queue_push_head (queue, stack_node);
+ }
+
+ /* step 2: run through the reverse polish notation and connect nodes to
+ * create a tree, the stack will be used to store temporarily nodes
+ * until they're connected to a parent node */
+ while ((node = g_queue_pop_tail (queue)) != NULL) {
+ switch (node->op) {
+ case OP_AND:
+ case OP_OR:
+ node->left = g_queue_pop_head (stack);
+ node->right = g_queue_pop_head (stack);
+ g_queue_push_head (stack, node);
+ break;
+ default:
+ g_queue_push_head (stack, node);
+ break;
+ }
+
+ priv->tree = node;
+ }
+
+ g_strfreev (strings);
+ g_queue_free (stack);
+ g_queue_free (queue);
+}
+
+void
+tracker_query_tree_set_query (TrackerQueryTree *tree,
+ const gchar *query_str)
+{
+ TrackerQueryTreePrivate *priv;
+ gchar *str;
+
+ g_return_if_fail (TRACKER_IS_QUERY_TREE (tree));
+ g_return_if_fail (query_str != NULL);
+
+ priv = TRACKER_QUERY_TREE_GET_PRIVATE (tree);
+
+ str = g_strdup (query_str);
+ g_free (priv->query_str);
+ priv->query_str = str;
+
+ /* construct the parse tree */
+ create_query_tree (tree);
+
+ g_object_notify (G_OBJECT (tree), "query");
+}
+
+G_CONST_RETURN gchar *
+tracker_query_tree_get_query (TrackerQueryTree *tree)
+{
+ TrackerQueryTreePrivate *priv;
+
+ g_return_val_if_fail (TRACKER_IS_QUERY_TREE (tree), NULL);
+
+ priv = TRACKER_QUERY_TREE_GET_PRIVATE (tree);
+ return priv->query_str;
+}
+
+void
+tracker_query_tree_set_indexer (TrackerQueryTree *tree,
+ Indexer *indexer)
+{
+ TrackerQueryTreePrivate *priv;
+
+ g_return_if_fail (TRACKER_IS_QUERY_TREE (tree));
+ g_return_if_fail (indexer != NULL);
+
+ priv = TRACKER_QUERY_TREE_GET_PRIVATE (tree);
+ priv->indexer = indexer;
+
+ g_object_notify (G_OBJECT (tree), "indexer");
+}
+
+Indexer *
+tracker_query_tree_get_indexer (TrackerQueryTree *tree)
+{
+ TrackerQueryTreePrivate *priv;
+
+ g_return_val_if_fail (TRACKER_IS_QUERY_TREE (tree), NULL);
+
+ priv = TRACKER_QUERY_TREE_GET_PRIVATE (tree);
+ return priv->indexer;
+}
+
+void
+tracker_query_tree_set_services (TrackerQueryTree *tree,
+ GArray *services)
+{
+ TrackerQueryTreePrivate *priv;
+ GArray *copy = NULL;
+
+ g_return_if_fail (TRACKER_IS_QUERY_TREE (tree));
+
+ priv = TRACKER_QUERY_TREE_GET_PRIVATE (tree);
+
+ if (priv->services != services) {
+ if (services) {
+ copy = g_array_new (TRUE, TRUE, sizeof (gint));
+ g_array_append_vals (copy, services->data, services->len);
+ }
+
+ if (priv->services)
+ g_array_free (priv->services, TRUE);
+
+ priv->services = copy;
+ g_object_notify (G_OBJECT (tree), "services");
+ }
+}
+
+GArray *
+tracker_query_tree_get_services (TrackerQueryTree *tree)
+{
+ TrackerQueryTreePrivate *priv;
+
+ g_return_val_if_fail (TRACKER_IS_QUERY_TREE (tree), NULL);
+
+ priv = TRACKER_QUERY_TREE_GET_PRIVATE (tree);
+ return priv->services;
+}
+
+static void
+get_tree_words (TreeNode *node, GSList **list)
+{
+ if (!node)
+ return;
+
+ if (node->op == OP_NONE)
+ *list = g_slist_prepend (*list, node->term);
+ else {
+ get_tree_words (node->left, list);
+ get_tree_words (node->right, list);
+ }
+}
+
+GSList *
+tracker_query_tree_get_words (TrackerQueryTree *tree)
+{
+ TrackerQueryTreePrivate *priv;
+ GSList *list = NULL;
+
+ g_return_val_if_fail (TRACKER_IS_QUERY_TREE (tree), NULL);
+
+ priv = TRACKER_QUERY_TREE_GET_PRIVATE (tree);
+ get_tree_words (priv->tree, &list);
+
+ return list;
+}
+
+static gint
+get_idf_score (WordDetails *details, float idf)
+{
+ guint32 score = tracker_word_details_get_score (details);
+ float f = idf * score * SCORE_MULTIPLIER;
+
+ return (f > 1.0) ? lrintf (f) : 1;
+}
+
+static gboolean
+in_array (GArray *array, gint element)
+{
+ guint i;
+
+ if (!array)
+ return TRUE;
+
+ for (i = 0; i < array->len; i++) {
+ if (g_array_index (array, gint, i) == element)
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static void
+search_hit_data_free (SearchHitData *search_hit)
+{
+ g_slice_free (SearchHitData, search_hit);
+}
+
+static GHashTable *
+get_search_term_hits (TrackerQueryTree *tree,
+ const gchar *term)
+{
+ TrackerQueryTreePrivate *priv;
+ GHashTable *result;
+ WordDetails *details;
+ guint count, i;
+
+ priv = TRACKER_QUERY_TREE_GET_PRIVATE (tree);
+ result = g_hash_table_new_full (NULL, NULL, NULL,
+ (GDestroyNotify) search_hit_data_free);
+
+ details = tracker_indexer_get_word_hits (priv->indexer, term, &count);
+
+ if (!details)
+ return result;
+
+ for (i = 0; i < count; i++) {
+ SearchHitData *data;
+ gint service;
+
+ service = tracker_word_details_get_service_type (&details[i]);
+
+ if (in_array (priv->services, service)) {
+ data = g_slice_new (SearchHitData);
+ data->service_type_id = service;
+ data->score = get_idf_score (&details[i], (float) 1 / count);
+
+ g_hash_table_insert (result, GINT_TO_POINTER (details[i].id), data);
+ }
+ }
+
+ g_free (details);
+
+ return result;
+}
+
+static void
+compose_hits_foreach (gpointer key,
+ gpointer value,
+ gpointer user_data)
+{
+ SearchHitData *hit1, *hit2, *hit;
+ ComposeHitsData *data;
+
+ data = (ComposeHitsData *) user_data;
+ hit1 = (SearchHitData *) value;
+ hit2 = g_hash_table_lookup (data->other_table, key);
+
+ if (data->op == OP_OR) {
+ /* compose both scores in the same entry */
+ hit = g_slice_dup (SearchHitData, hit1);
+
+ if (hit2)
+ hit->score += hit2->score;
+
+ g_hash_table_insert (data->dest_table, key, hit);
+ } else if (data->op == OP_AND) {
+ /* only compose if the key is in both tables */
+ if (hit2) {
+ hit = g_slice_dup (SearchHitData, hit1);
+ hit->score += hit2->score;
+ g_hash_table_insert (data->dest_table, key, hit);
+ }
+ } else {
+ g_assert_not_reached ();
+ }
+}
+
+static GHashTable *
+compose_hits (OperationType op,
+ GHashTable *left_table,
+ GHashTable *right_table)
+{
+ ComposeHitsData data;
+ GHashTable *foreach_table;
+
+ data.op = op;
+
+ /* try to run the foreach in the table with less hits */
+ if (g_hash_table_size (left_table) < g_hash_table_size (right_table)) {
+ foreach_table = left_table;
+ data.other_table = right_table;
+ } else {
+ foreach_table = right_table;
+ data.other_table = left_table;
+ }
+
+ if (op == OP_OR) {
+ data.dest_table = g_hash_table_ref (data.other_table);
+ } else {
+ data.dest_table = g_hash_table_new_full (NULL, NULL, NULL,
+ (GDestroyNotify) search_hit_data_free);
+ }
+
+ g_hash_table_foreach (foreach_table, (GHFunc) compose_hits_foreach, &data);
+
+ return data.dest_table;
+}
+
+static GHashTable *
+get_node_hits (TrackerQueryTree *tree,
+ TreeNode *node)
+{
+ GHashTable *left_table, *right_table, *result;
+
+ if (!node)
+ return NULL;
+
+ switch (node->op) {
+ case OP_NONE:
+ result = get_search_term_hits (tree, node->term);
+ break;
+ case OP_AND:
+ case OP_OR:
+ left_table = get_node_hits (tree, node->left);
+ right_table = get_node_hits (tree, node->right);
+ result = compose_hits (node->op, left_table, right_table);
+
+ g_hash_table_unref (left_table);
+ g_hash_table_unref (right_table);
+ break;
+ default:
+ g_assert_not_reached ();
+ }
+
+ return result;
+}
+
+static void
+get_hits_foreach (gpointer key,
+ gpointer value,
+ gpointer user_data)
+{
+ GArray *array;
+ TrackerSearchHit hit;
+ SearchHitData *hit_data;
+
+ array = (GArray *) user_data;
+ hit_data = (SearchHitData *) value;
+
+ hit.service_id = GPOINTER_TO_UINT (key);
+ hit.service_type_id = hit_data->service_type_id;
+ hit.score = hit_data->score;
+
+ g_array_append_val (array, hit);
+}
+
+static gint
+compare_search_hits (gconstpointer a,
+ gconstpointer b)
+{
+ TrackerSearchHit *ap, *bp;
+
+ ap = (TrackerSearchHit *) a;
+ bp = (TrackerSearchHit *) b;
+
+ return (bp->score - ap->score);
+}
+
+GArray *
+tracker_query_tree_get_hits (TrackerQueryTree *tree,
+ guint offset,
+ guint limit)
+{
+ TrackerQueryTreePrivate *priv;
+ GHashTable *table;
+ GArray *array;
+
+ g_return_val_if_fail (TRACKER_IS_QUERY_TREE (tree), NULL);
+
+ priv = TRACKER_QUERY_TREE_GET_PRIVATE (tree);
+
+ g_return_val_if_fail (priv->tree != NULL, NULL);
+
+ table = get_node_hits (tree, priv->tree);
+ array = g_array_sized_new (TRUE, TRUE, sizeof (TrackerSearchHit),
+ g_hash_table_size (table));
+
+ g_hash_table_foreach (table, (GHFunc) get_hits_foreach, array);
+ g_array_sort (array, compare_search_hits);
+
+ if (offset > 0) {
+ g_array_remove_range (array, 0, CLAMP (offset, 0, array->len));
+ }
+
+ if (limit > 0 && limit < array->len) {
+ g_array_remove_range (array, limit, array->len - limit);
+ }
+
+ return array;
+}
+
+gint
+tracker_query_tree_get_hit_count (TrackerQueryTree *tree)
+{
+ TrackerQueryTreePrivate *priv;
+ GHashTable *table;
+ gint count;
+
+ g_return_val_if_fail (TRACKER_IS_QUERY_TREE (tree), 0);
+
+ priv = TRACKER_QUERY_TREE_GET_PRIVATE (tree);
+ table = get_node_hits (tree, priv->tree);
+
+ if (!table)
+ return 0;
+
+ count = g_hash_table_size (table);
+ g_hash_table_destroy (table);
+
+ return count;
+}
+
+static void
+get_hit_count_foreach (gpointer key,
+ gpointer value,
+ gpointer user_data)
+{
+ GArray *array = (GArray *) user_data;
+ TrackerHitCount count;
+
+ count.service_type_id = GPOINTER_TO_INT (key);
+ count.count = GPOINTER_TO_INT (value);
+
+ g_array_append_val (array, count);
+}
+
+GArray *
+tracker_query_tree_get_hit_counts (TrackerQueryTree *tree)
+{
+ GHashTable *table;
+ GArray *hits, *counts;
+ guint i;
+
+ g_return_val_if_fail (TRACKER_IS_QUERY_TREE (tree), NULL);
+
+ table = g_hash_table_new (NULL, NULL);
+ hits = tracker_query_tree_get_hits (tree, 0, 0);
+ counts = g_array_sized_new (TRUE, TRUE, sizeof (TrackerHitCount), 10);
+
+ for (i = 0; i < hits->len; i++) {
+ TrackerSearchHit hit;
+ gint count, parent_id;
+
+ hit = g_array_index (hits, TrackerSearchHit, i);
+ count = GPOINTER_TO_INT (g_hash_table_lookup (table, GINT_TO_POINTER (hit.service_type_id)));
+ count++;
+
+ g_hash_table_insert (table, GINT_TO_POINTER (hit.service_type_id), GINT_TO_POINTER (count));
+
+ /* update service's parent count too (if it has a parent) */
+ parent_id = tracker_service_manager_get_parent_id_for_service_id (hit.service_type_id);
+
+ if (parent_id != -1) {
+ count = GPOINTER_TO_INT (g_hash_table_lookup (table, GINT_TO_POINTER (parent_id)));
+ count++;
+
+ g_hash_table_insert (table, GINT_TO_POINTER (parent_id), GINT_TO_POINTER (count));
+ }
+ }
+
+ g_hash_table_foreach (table, (GHFunc) get_hit_count_foreach, counts);
+ g_array_free (hits, TRUE);
+
+ return counts;
+}
Added: trunk/src/trackerd/tracker-query-tree.h
==============================================================================
--- (empty file)
+++ trunk/src/trackerd/tracker-query-tree.h Thu Apr 10 13:56:52 2008
@@ -0,0 +1,90 @@
+/* Tracker - indexer and metadata database engine
+ * Copyright (C) 2007 Nokia
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __TRACKER_QUERY_TREE_H__
+#define __TRACKER_QUERY_TREE_H__
+
+#include <glib.h>
+
+G_BEGIN_DECLS
+
+#include <glib-object.h>
+#include "tracker-indexer.h"
+
+#define TRACKER_TYPE_QUERY_TREE (tracker_query_tree_get_type())
+#define TRACKER_QUERY_TREE(o) (G_TYPE_CHECK_INSTANCE_CAST ((o), TRACKER_TYPE_QUERY_TREE, TrackerQueryTree))
+#define TRACKER_QUERY_TREE_CLASS(c) (G_TYPE_CHECK_CLASS_CAST ((c), TRACKER_TYPE_QUERY_TREE, TrackerQueryTreeClass))
+#define TRACKER_IS_QUERY_TREE(o) (G_TYPE_CHECK_INSTANCE_TYPE ((o), TRACKER_TYPE_QUERY_TREE))
+#define TRACKER_IS_QUERY_TREE_CLASS(c) (G_TYPE_CHECK_CLASS_TYPE ((c), TRACKER_TYPE_QUERY_TREE))
+#define TRACKER_QUERY_TREE_GET_CLASS(o) (G_TYPE_INSTANCE_GET_CLASS ((o), TRACKER_TYPE_QUERY_TREE, TrackerQueryTreeClass))
+
+typedef struct TrackerQueryTree TrackerQueryTree;
+typedef struct TrackerQueryTreeClass TrackerQueryTreeClass;
+typedef struct TrackerSearchHit TrackerSearchHit;
+typedef struct TrackerHitCount TrackerHitCount;
+
+struct TrackerQueryTree {
+ GObject parent;
+};
+
+struct TrackerQueryTreeClass {
+ GObjectClass parent_class;
+};
+
+struct TrackerSearchHit {
+ guint32 service_id; /* Service ID of the document */
+ guint32 service_type_id; /* Service type ID of the document */
+ guint32 score; /* Ranking score */
+};
+
+struct TrackerHitCount {
+ guint service_type_id;
+ guint count;
+};
+
+GType tracker_query_tree_get_type (void);
+
+TrackerQueryTree * tracker_query_tree_new (const gchar *query_str,
+ Indexer *indexer,
+ GArray *services);
+
+G_CONST_RETURN gchar * tracker_query_tree_get_query (TrackerQueryTree *tree);
+void tracker_query_tree_set_query (TrackerQueryTree *tree,
+ const gchar *query_str);
+
+Indexer * tracker_query_tree_get_indexer (TrackerQueryTree *tree);
+void tracker_query_tree_set_indexer (TrackerQueryTree *tree,
+ Indexer *indexer);
+
+GArray * tracker_query_tree_get_services (TrackerQueryTree *tree);
+void tracker_query_tree_set_services (TrackerQueryTree *tree,
+ GArray *services);
+
+GSList * tracker_query_tree_get_words (TrackerQueryTree *tree);
+
+GArray * tracker_query_tree_get_hits (TrackerQueryTree *tree,
+ guint offset,
+ guint limit);
+gint tracker_query_tree_get_hit_count (TrackerQueryTree *tree);
+GArray * tracker_query_tree_get_hit_counts (TrackerQueryTree *tree);
+
+
+G_END_DECLS
+
+#endif /* __TRACKER_QUERY_TREE_H__ */
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]