[tracker/parser-unicode-libs-review: 70/85] Avoid unneeded stack buffer in parser_next



commit 47f6589bffcfdea46e6f052aad465b8915c114c7
Author: Aleksander Morgado <aleksander lanedo com>
Date:   Thu Apr 29 10:45:58 2010 +0200

    Avoid unneeded stack buffer in parser_next

 src/libtracker-fts/tracker-parser.c |   47 ++++++++++++++++++++---------------
 1 files changed, 27 insertions(+), 20 deletions(-)
---
diff --git a/src/libtracker-fts/tracker-parser.c b/src/libtracker-fts/tracker-parser.c
index 1e2b8b7..5ffa73e 100644
--- a/src/libtracker-fts/tracker-parser.c
+++ b/src/libtracker-fts/tracker-parser.c
@@ -598,16 +598,15 @@ parser_next (TrackerParser *parser,
  * Just check byte per byte, and if any of the bytes is >127, then it's not
  *  ASCII-7 */
 static gboolean
-is_ascii_word (const gchar *word)
+is_ascii_word (const gchar *word,
+               gsize        length)
 {
-	guchar *i;
+	gsize i;
 
-	i = (guchar *)word;
-	while (*i != '\0') {
-		if (!IS_ASCII_BYTE (*i)) {
+	for (i = 0; i < length; i++) {
+		if (!IS_ASCII_BYTE ((guchar)word[i])) {
 			return FALSE;
 		}
-		i++;
 	}
 	return TRUE;
 }
@@ -634,8 +633,7 @@ parser_next (TrackerParser *parser,
 		ucs4_t first_unichar;
 		gint first_unichar_len;
 		gsize i;
-		gsize new_length;
-		gchar word_buffer [WORD_BUFFER_LENGTH];
+		gsize truncated_length;
 		gboolean do_strip;
 
 		/* Get first character of the word as UCS4 */
@@ -685,25 +683,23 @@ parser_next (TrackerParser *parser,
 			continue;
 		}
 
-		/* compute truncated word length if needed */
-		new_length = (word_length < WORD_BUFFER_LENGTH ?
-		              word_length :
-		              WORD_BUFFER_LENGTH - 1);
-
-		/* Word here needs always to be NIL-terminated */
-		memcpy (word_buffer, &(parser->txt[parser->cursor]), new_length);
-		word_buffer[new_length] = '\0';
+		/* compute truncated word length if needed (to avoid extremely
+		 *  long words)*/
+		truncated_length = (word_length < WORD_BUFFER_LENGTH ?
+		                    word_length :
+		                    WORD_BUFFER_LENGTH - 1);
 
 		/* Enable UNAC stripping only if no ASCII and no CJK */
-		do_strip = (!is_ascii_word (word_buffer) &&
+		do_strip = (!is_ascii_word (&(parser->txt[parser->cursor]),
+		                            truncated_length) &&
 		            !IS_CJK_UCS4 (first_unichar));
 
 		/* Process the word here. If it fails, we can still go
 		 *  to the next one. Returns newly allocated string
 		 *  always */
 		processed_word = tracker_parser_process_word (parser,
-		                                              word_buffer,
-		                                              new_length,
+		                                              &(parser->txt[parser->cursor]),
+		                                              truncated_length,
 		                                              do_strip);
 		if (!processed_word) {
 			/* Skip this word and keep on looping */
@@ -975,6 +971,9 @@ tracker_parser_process_word (TrackerParser *parser,
 	g_return_val_if_fail (parser != NULL, NULL);
 	g_return_val_if_fail (word != NULL, NULL);
 
+	/* If length is set as -1, the input word MUST be NIL-terminated.
+	 * Otherwise, this restriction is not needed as the length to process
+	 *  is given as input argument */
 	if (length < 0) {
 		length = strlen (word);
 	}
@@ -983,10 +982,18 @@ tracker_parser_process_word (TrackerParser *parser,
 #if TRACKER_PARSER_DEBUG_HEX
 	{
 		gchar *aux;
+		gchar *word_aux;
+
+		/* Word may not come NIL-terminated */
+		word_aux = g_malloc (length + 1);
+		memcpy (word_aux, word, length);
+		word_aux[length] = '\0';
+
 		aux = tracker_strhex (word, length, ':');
 		g_message ("ORIGINAL word: '%s' (%s)",
-		           word, aux);
+		           word_aux, aux);
 		g_free (aux);
+		g_free (word_aux);
 	}
 #endif
 



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]