tracker r2074 - in branches/indexer-split: . src/libtracker-common src/libtracker-db src/tracker-extract src/tracker-indexer src/tracker-utils src/trackerd tests/libtracker-common tests/libtracker-db



Author: mr
Date: Thu Aug 14 15:25:30 2008
New Revision: 2074
URL: http://svn.gnome.org/viewvc/tracker?rev=2074&view=rev

Log:
	* src/libtracker-db/tracker-db-index.c: (tracker_db_index_flush):
	If the index is not open when we try to flush, try and open it
	first. 

	* src/tracker-extract/tracker-extract-tiff.c:
	(tracker_extract_tiff): 
	* tests/libtracker-db/tracker-db-dbus-test.c:
	(test_dbus_query_result_to_ptr_array):  Fixed compiler warnings.

	* src/tracker-indexer/tracker-indexer.c: Fixed a nasty crash I
	introduced last commit and clean up the merge function for words
	tables. Plus make sure we free basename and dirname in
	delete_item(), this was quite a leak.

	* src/tracker-utils/tracker-files.c: (main): Fixed printing a %d
	as a %s.

	* src/trackerd/tracker-db.c: (update_metadata_index): 
	* tests/libtracker-common/tracker-parser-test.c: Use
	g_hash_table_unref() instead of the custom function we had for the
	parser for freeing hash tables.

	* src/trackerd/tracker-processor.c: (tracker_processor_stop):
	Fixed a warning where the trackerd would stop before it started
	processing (due to Ctrl+C) and the private->timer was NULL, so
	using it caused warnings.


Modified:
   branches/indexer-split/ChangeLog
   branches/indexer-split/src/libtracker-common/tracker-parser.c
   branches/indexer-split/src/libtracker-common/tracker-parser.h
   branches/indexer-split/src/libtracker-db/tracker-db-index.c
   branches/indexer-split/src/tracker-extract/tracker-extract-tiff.c
   branches/indexer-split/src/tracker-indexer/tracker-indexer.c
   branches/indexer-split/src/tracker-utils/tracker-files.c
   branches/indexer-split/src/trackerd/tracker-db.c
   branches/indexer-split/src/trackerd/tracker-processor.c
   branches/indexer-split/tests/libtracker-common/tracker-parser-test.c
   branches/indexer-split/tests/libtracker-db/tracker-db-dbus-test.c

Modified: branches/indexer-split/src/libtracker-common/tracker-parser.c
==============================================================================
--- branches/indexer-split/src/libtracker-common/tracker-parser.c	(original)
+++ branches/indexer-split/src/libtracker-common/tracker-parser.c	Thu Aug 14 15:25:30 2008
@@ -166,7 +166,7 @@
               gboolean          filter_words, 
               gboolean          filter_numbers, 
               gboolean          delimit_hyphen,
-              const gchar     **index_word)
+              gchar           **index_word)
 {
         TrackerParserWordType word_type;
         gunichar              word[64];
@@ -286,60 +286,59 @@
                 word[length -1] = c;
         }
         
-        if (is_valid) {
-                if (word_type == TRACKER_PARSER_WORD_NUM) {
-                        if (!filter_numbers || length >= INDEX_NUMBER_MIN_LENGTH) {
-                                *index_word = g_ucs4_to_utf8 (word, length, NULL, NULL, NULL);
-                        } 
+        if (!is_valid) {
+                return p;
+        }
+
+        if (word_type == TRACKER_PARSER_WORD_NUM) {
+                if (!filter_numbers || length >= INDEX_NUMBER_MIN_LENGTH) {
+                        *index_word = g_ucs4_to_utf8 (word, length, NULL, NULL, NULL);
+                } 
+        } else if (length >= min_word_length) {
+                const gchar *stem_word;
+                gchar       *stripped_word;
+                gchar       *str;
+                gchar       *utf8;
+                guint32      len;
+                
+                utf8 = g_ucs4_to_utf8 (word, length, NULL, &bytes, NULL);
+                
+                if (!utf8) {
+                        return p;
+                }
+		
+                if (do_strip) {
+                        stripped_word = strip_word (utf8, bytes, &len);
                 } else {
-                        if (length >= min_word_length) {
-                                gchar 	*str = NULL;
-                                gchar   *tmp;
-                                guint32  len;
-                                gchar   *utf8;
-                                
-                                utf8 = g_ucs4_to_utf8 (word, length, NULL, &bytes, NULL);
-                                
-                                if (!utf8) {
-                                        return p;
-                                }
-				
-                                if (do_strip) {
-                                        str = strip_word (utf8, bytes, &len);
-                                }
-                                
-                                if (!str) {
-                                        tmp = g_utf8_normalize (utf8, bytes, G_NORMALIZE_NFC);
-                                } else {
-                                        tmp = g_utf8_normalize (str, len, G_NORMALIZE_NFC);
-                                        g_free (str);
-                                }
-                                
-                                g_free (utf8);
-                                
-                                *index_word = tracker_language_stem_word (language, 
-                                                                          tmp, 
-                                                                          strlen (tmp));
-                                g_free (tmp);
-                                
-                                if (filter_words && is_stop_word (language, *index_word)) {
-                                        *index_word = NULL;
-                                }
-                        }
+                        stripped_word = NULL;
                 }
-        } 
+                
+                if (!stripped_word) {
+                        str = g_utf8_normalize (utf8, 
+                                                bytes, 
+                                                G_NORMALIZE_NFC);
+                } else {
+                        str = g_utf8_normalize (stripped_word, 
+                                                len, 
+                                                G_NORMALIZE_NFC);
+                        g_free (stripped_word);
+                }
+                
+                g_free (utf8);
+                               
+                stem_word = tracker_language_stem_word (language, 
+                                                        str, 
+                                                        strlen (str));
+                g_free (str);
+               
+                if (!filter_words || !is_stop_word (language, stem_word)) {
+                        *index_word = g_strdup (stem_word);
+                }
+        }
         
         return p;	
 }
 
-static void
-delete_words (gpointer key,
-              gpointer value,
-              gpointer user_data)
-{
-	g_free (key);
-}
-
 gchar *
 tracker_parser_text_to_string (const gchar     *txt, 
                                TrackerLanguage *language,
@@ -425,8 +424,8 @@
 		parsed_text = g_string_free (strs, FALSE);
 		return g_strstrip (parsed_text);
         } else {
-                GString     *str;
-                const gchar *word;
+                GString *str;
+                gchar   *word;
 
                 str = g_string_new (" ");
                 
@@ -444,6 +443,7 @@
                         if (word) {
                                 g_string_append (str, word);
                                 g_string_append_c (str, ' ');
+                                g_free (word);
                         }
                         
                         if (!p || !*p) {
@@ -494,7 +494,10 @@
 
         /* Use this for already processed text only */
 	if (!word_table) {
-		word_table = g_hash_table_new (g_str_hash, g_str_equal);
+		word_table = g_hash_table_new_full (g_str_hash, 
+                                                    g_str_equal,
+                                                    g_free,
+                                                    NULL);
 	} 
 
 	if (!txt || weight == 0) {
@@ -525,11 +528,11 @@
 }
 
 static gboolean
-word_table_increment (GHashTable  *word_table,
-                      const gchar *index_word, 
-                      gint         weight,
-                      gint         total_words,
-                      gint         max_words_to_index) 
+word_table_increment (GHashTable *word_table,
+                      gchar      *index_word, 
+                      gint        weight,
+                      gint        total_words,
+                      gint        max_words_to_index) 
 {
         gboolean update_count;
 
@@ -542,10 +545,11 @@
                 p = g_hash_table_lookup (word_table, index_word);
                 count = GPOINTER_TO_INT (p);
 
-                /* Take a copy the first time */
-                g_hash_table_insert (word_table, 
-                                     count == 0 ? g_strdup (index_word) : (gchar*) index_word, 
-                                     GINT_TO_POINTER (count + weight));
+                g_hash_table_replace (word_table, 
+                                      index_word,
+                                      GINT_TO_POINTER (count + weight));
+        } else {
+                g_free (index_word);
         }
 
         return update_count;
@@ -571,7 +575,10 @@
         g_return_val_if_fail (language != NULL, NULL);
 
 	if (!word_table) {
-		word_table = g_hash_table_new (g_str_hash, g_str_equal);
+		word_table = g_hash_table_new_full (g_str_hash, 
+                                                    g_str_equal,
+                                                    g_free,
+                                                    NULL);
 		total_words = 0;
 	} else {
 		total_words = g_hash_table_size (word_table);
@@ -612,31 +619,29 @@
 				end_word = g_utf8_offset_to_pointer (txt, i);
 
 				if (start_word != end_word) {
-					gchar    *s;
+					gchar    *str;
 					gchar    *index_word;
                                         gboolean  was_updated;
 
 					/* Normalize word */
-                                        s = g_utf8_casefold (start_word, end_word - start_word);
-					if (!s) {
+                                        str = g_utf8_casefold (start_word, end_word - start_word);
+					if (!str) {
                                                 continue;
                                         }
 
-                                        index_word = g_utf8_normalize (s, -1, G_NORMALIZE_NFC);
-					g_free (s);
+                                        index_word = g_utf8_normalize (str, -1, G_NORMALIZE_NFC);
+					g_free (str);
 
 					if (!index_word) {
                                                 continue;
                                         }
-					
-					total_words++;
 
+					total_words++;
                                         was_updated = word_table_increment (word_table, 
                                                                             index_word,        
                                                                             weight, 
                                                                             total_words,
                                                                             max_words_to_index);
-                                        g_free (index_word);
 
                                         if (!was_updated) {
                                                 break;
@@ -653,7 +658,7 @@
 
 		g_free (attrs);		
 	} else {
-                const gchar *word;
+                gchar *word;
 
 		while (TRUE) {
 			i++;
@@ -686,12 +691,3 @@
 
 	return word_table;
 }
-
-void
-tracker_parser_text_free (GHashTable *table)
-{
-	if (table) {
-		g_hash_table_foreach (table, delete_words, NULL);		
-		g_hash_table_destroy (table);
-	}
-}

Modified: branches/indexer-split/src/libtracker-common/tracker-parser.h
==============================================================================
--- branches/indexer-split/src/libtracker-common/tracker-parser.h	(original)
+++ branches/indexer-split/src/libtracker-common/tracker-parser.h	Thu Aug 14 15:25:30 2008
@@ -66,7 +66,6 @@
 					    TrackerLanguage *language,
 					    gint             max_word_length,
 					    gint             min_word_length);
-void        tracker_parser_text_free       (GHashTable      *table);
 
 G_END_DECLS
 

Modified: branches/indexer-split/src/libtracker-db/tracker-db-index.c
==============================================================================
--- branches/indexer-split/src/libtracker-db/tracker-db-index.c	(original)
+++ branches/indexer-split/src/libtracker-db/tracker-db-index.c	Thu Aug 14 15:25:30 2008
@@ -821,6 +821,13 @@
 
         g_mutex_lock (priv->mutex);
 
+	if (!priv->index) {
+		g_debug ("Index was not open for flush, opening first...");
+		g_mutex_unlock (priv->mutex);
+		tracker_db_index_open (index);
+		g_mutex_lock (priv->mutex);
+	}
+
 	if (priv->index) {
 		size = g_hash_table_size (priv->cache);
 		g_debug ("Flushing index with %d items in cache", size);
@@ -829,8 +836,7 @@
 					     cache_flush_foreach, 
 					     priv->index);
 	} else {
-		g_warning ("Flushing index while closed, "
-			   "this indicates a problem in the software");
+		g_warning ("Could not open index, cache was not flushed");
 		size = 0;
 	}
 

Modified: branches/indexer-split/src/tracker-extract/tracker-extract-tiff.c
==============================================================================
--- branches/indexer-split/src/tracker-extract/tracker-extract-tiff.c	(original)
+++ branches/indexer-split/src/tracker-extract/tracker-extract-tiff.c	Thu Aug 14 15:25:30 2008
@@ -99,8 +99,6 @@
 {
 	TIFF     *image;
 	long      exifOffset;
-	gchar    *xmpOffset;
-	uint32    size;
 
 	TiffTag  *tag;
 

Modified: branches/indexer-split/src/tracker-indexer/tracker-indexer.c
==============================================================================
--- branches/indexer-split/src/tracker-indexer/tracker-indexer.c	(original)
+++ branches/indexer-split/src/tracker-indexer/tracker-indexer.c	Thu Aug 14 15:25:30 2008
@@ -881,7 +881,7 @@
 	}
 
 	if (full_parsing) {
-		parsed = tracker_parser_text (parsed,
+		parsed = tracker_parser_text (NULL,
 					      text,
 					      weight_factor,
 					      indexer->private->language,
@@ -894,13 +894,13 @@
 		/* We dont know the exact property weight. 
 		   Big value works.
 		 */
-		parsed = tracker_parser_text_fast (parsed,
+		parsed = tracker_parser_text_fast (NULL,
 						   text,
 						   weight_factor); 
 	}
 
 	g_hash_table_iter_init (&iter, parsed);
-
+	
 	while (g_hash_table_iter_next (&iter, &key, &value)) {
 		tracker_db_index_add_word (indexer->private->index,
 					   key,
@@ -908,8 +908,8 @@
 					   service_type,
 					   GPOINTER_TO_INT (value));
 	}
-
-	tracker_parser_text_free (parsed);
+	
+	g_hash_table_unref (parsed);
 }
 
 static void
@@ -1014,28 +1014,34 @@
 		  gpointer user_data)
 {
 	GHashTable *new_table;
-	gpointer    k = NULL;
-	gpointer    v = NULL;
+	gpointer    k;
+	gpointer    v;
 	gchar      *word;
-	gint	    score;
+	gint	    new_score;
 
 	word = key;
-	score = GPOINTER_TO_INT (value);
+	new_score = GPOINTER_TO_INT (value);
 	new_table = user_data;
 
-	if (!g_hash_table_lookup_extended (new_table, word, &k, &v)) {
-		g_hash_table_insert (new_table, 
-				     g_strdup (word), 
-				     GINT_TO_POINTER (0 - score));
-	} else {
-                if ((GPOINTER_TO_INT (v) - score) != 0) {
+	if (g_hash_table_lookup_extended (new_table, word, &k, &v)) {
+		gint old_score;
+		gint calculated_score;
+		
+		old_score = GPOINTER_TO_INT (v);
+		calculated_score = old_score - new_score;
+
+		if (calculated_score != 0) {
                         g_hash_table_insert (new_table, 
-                                             (gchar *) word, 
-                                             GINT_TO_POINTER (GPOINTER_TO_INT (v) - score));
+                                             g_strdup (word), 
+                                             GINT_TO_POINTER (calculated_score));
                 } else {
                         /* The word is the same in old and new text */
                         g_hash_table_remove (new_table, word);
-                }
+		}
+	} else {
+		g_hash_table_insert (new_table, 
+				     g_strdup (word), 
+				     GINT_TO_POINTER (0 - new_score));
 	}
 }
 
@@ -1152,8 +1158,8 @@
 						 tracker_service_get_id (service_def), 
 						 new_words);
 
-			tracker_parser_text_free (old_words);
-			tracker_parser_text_free (new_words);
+			g_hash_table_unref (old_words);
+			g_hash_table_unref (new_words);
 		}
 
 		g_free (old_text);
@@ -1198,6 +1204,8 @@
 
 		if (service_type_id == 0) {
 			/* File didn't exist, nothing to delete */
+			g_free (dirname);
+			g_free (basename);
 			return;
 		}
 

Modified: branches/indexer-split/src/tracker-utils/tracker-files.c
==============================================================================
--- branches/indexer-split/src/tracker-utils/tracker-files.c	(original)
+++ branches/indexer-split/src/tracker-utils/tracker-files.c	Thu Aug 14 15:25:30 2008
@@ -111,7 +111,7 @@
 		if (error) {
 			g_printerr ("%s:'%s', %s\n",
 				    _("Could not get files by service type"), 
-				    type,
+				    service,
 				    error->message);
 			g_error_free (error);
 

Modified: branches/indexer-split/src/trackerd/tracker-db.c
==============================================================================
--- branches/indexer-split/src/trackerd/tracker-db.c	(original)
+++ branches/indexer-split/src/trackerd/tracker-db.c	Thu Aug 14 15:25:30 2008
@@ -261,8 +261,8 @@
 	tracker_db_update_differential_index (old_table, new_table, id, sid);
 #endif
 
-	tracker_parser_text_free (old_table);
-	tracker_parser_text_free (new_table);
+	g_hash_table_unref (old_table);
+	g_hash_table_unref (new_table);
 }
 
 static gchar *

Modified: branches/indexer-split/src/trackerd/tracker-processor.c
==============================================================================
--- branches/indexer-split/src/trackerd/tracker-processor.c	(original)
+++ branches/indexer-split/src/trackerd/tracker-processor.c	Thu Aug 14 15:25:30 2008
@@ -1305,6 +1305,8 @@
 void
 tracker_processor_stop (TrackerProcessor *processor)
 {
+	gdouble elapsed;
+
 	g_return_if_fail (TRACKER_IS_PROCESSOR (processor));
 
 	if (processor->private->interrupted) {
@@ -1323,10 +1325,15 @@
 	g_message ("Process %s\n",
 		   processor->private->finished ? "has finished" : "been stopped");
 
-	g_timer_stop (processor->private->timer);
+	if (processor->private->timer) {
+		g_timer_stop (processor->private->timer);
+		elapsed = g_timer_elapsed (processor->private->timer, NULL);
+	} else {
+		elapsed = 0;
+	}
 
 	g_message ("Total time taken : %4.4f seconds",
-		   g_timer_elapsed (processor->private->timer, NULL));
+		   elapsed);
 	g_message ("Total directories: %d (%d ignored)", 
 		   processor->private->directories_found,
 		   processor->private->directories_ignored);

Modified: branches/indexer-split/tests/libtracker-common/tracker-parser-test.c
==============================================================================
--- branches/indexer-split/tests/libtracker-common/tracker-parser-test.c	(original)
+++ branches/indexer-split/tests/libtracker-common/tracker-parser-test.c	Thu Aug 14 15:25:30 2008
@@ -51,7 +51,7 @@
         
         g_assert_cmpint (g_hash_table_size (result), ==, 5);
 
-        tracker_parser_text_free (result);
+        g_hash_table_unref (result);
 }
 
 /*
@@ -75,7 +75,7 @@
         g_hash_table_foreach (result, assert_key_length, GINT_TO_POINTER (max_length));
         g_assert_cmpint (g_hash_table_size (result), ==, 8);
 
-        tracker_parser_text_free (result);        
+        g_hash_table_unref (result);
 }
 
 /*
@@ -100,7 +100,7 @@
 
         g_assert_cmpint (g_hash_table_size (result), ==, 4);
 
-        tracker_parser_text_free (result);        
+        g_hash_table_unref (result);
         result = NULL;
 
         /* No filter */
@@ -117,7 +117,7 @@
 
         g_assert (g_hash_table_lookup (result, "12345678"));
 
-        tracker_parser_text_free (result);        
+        g_hash_table_unref (result);
         result = NULL;
 }
 

Modified: branches/indexer-split/tests/libtracker-db/tracker-db-dbus-test.c
==============================================================================
--- branches/indexer-split/tests/libtracker-db/tracker-db-dbus-test.c	(original)
+++ branches/indexer-split/tests/libtracker-db/tracker-db-dbus-test.c	Thu Aug 14 15:25:30 2008
@@ -120,7 +120,6 @@
 {
         TrackerDBResultSet *result_set = NULL;
         GPtrArray *result = NULL;
-        gint       count;
 
         /* NULL */
         result = tracker_dbus_query_result_to_ptr_array (result_set);



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]