[tracker/parser-unicode-libs-review] Cleanup parsing reserved words in libicu/libunistring parsers



commit 6905b6bc52a94adaaf2e5ac3a28c0480f1dee954
Author: Aleksander Morgado <aleksander lanedo com>
Date:   Fri May 7 11:42:46 2010 +0200

    Cleanup parsing reserved words in libicu/libunistring parsers

 src/libtracker-fts/tracker-parser-libicu.c       |    8 ++---
 src/libtracker-fts/tracker-parser-libunistring.c |    7 ++--
 src/libtracker-fts/tracker-parser-utils.c        |   34 ++++++++++++++++++++++
 src/libtracker-fts/tracker-parser-utils.h        |    4 ++
 4 files changed, 44 insertions(+), 9 deletions(-)
---
diff --git a/src/libtracker-fts/tracker-parser-libicu.c b/src/libtracker-fts/tracker-parser-libicu.c
index cf15ffa..0a280f5 100644
--- a/src/libtracker-fts/tracker-parser-libicu.c
+++ b/src/libtracker-fts/tracker-parser-libicu.c
@@ -222,12 +222,10 @@ parser_next (TrackerParser *parser,
 			continue;
 		}
 
-		/* check if word is reserved (looking at ORIGINAL UTF-8 buffer
-		 *  here! */
+		/* check if word is reserved (looking at ORIGINAL UTF-8 buffer here! */
 		if (parser->parse_reserved_words &&
-		    word_length_utf8 == 2 &&
-		    parser->txt[current_word_offset_utf8] == 'o' &&
-		    parser->txt[current_word_offset_utf8 + 1] == 'r') {
+		    tracker_parser_is_reserved_word_utf8 (&parser->txt[current_word_offset_utf8],
+		                                          word_length_utf8)) {
 			/* Skip this word and keep on looping */
 			parser->cursor = next_word_offset_uchar;
 			continue;
diff --git a/src/libtracker-fts/tracker-parser-libunistring.c b/src/libtracker-fts/tracker-parser-libunistring.c
index eb308b3..7f6fc6b 100644
--- a/src/libtracker-fts/tracker-parser-libunistring.c
+++ b/src/libtracker-fts/tracker-parser-libunistring.c
@@ -189,11 +189,10 @@ parser_next (TrackerParser *parser,
 			continue;
 		}
 
-		/* check if word is reserved */
+		/* check if word is reserved and skip it if so */
 		if (parser->parse_reserved_words &&
-		    word_length == 2 &&
-		    parser->txt[parser->cursor] == 'o' &&
-		    parser->txt[parser->cursor + 1] == 'r') {
+		    tracker_parser_is_reserved_word_utf8 (&parser->txt[parser->cursor],
+		                                          word_length)) {
 			/* Skip this word and keep on looping */
 			parser->cursor += word_length;
 			continue;
diff --git a/src/libtracker-fts/tracker-parser-utils.c b/src/libtracker-fts/tracker-parser-utils.c
index 222b4a1..e6c8521 100644
--- a/src/libtracker-fts/tracker-parser-utils.c
+++ b/src/libtracker-fts/tracker-parser-utils.c
@@ -169,6 +169,40 @@ tracker_parser_unaccent_utf8_word (const gchar *str,
 }
 
 
+/*
+ * Definition of the possible reserved words.
+ *  Length of word is explicitly given to avoid strlen() calls
+ */
+typedef struct {
+	const gchar *word;
+	gsize        word_length;
+} TrackerParserReservedWord;
+
+static const TrackerParserReservedWord reserved_words[] = {
+	{ "or", 2 },
+	{ NULL, 0 }
+};
+
+gboolean
+tracker_parser_is_reserved_word_utf8 (const gchar *word,
+                                      gsize word_length)
+{
+	gint i = 0;
+
+	/* Loop the array of predefined reserved words */
+	while (reserved_words[i].word != NULL) {
+		if (word_length == reserved_words[i].word_length &&
+		    strncmp (word,
+		             reserved_words[i].word,
+		             word_length) == 0) {
+			return TRUE;
+		}
+		i++;
+	}
+
+	return FALSE;
+}
+
 
 #if TRACKER_PARSER_DEBUG_HEX
 void
diff --git a/src/libtracker-fts/tracker-parser-utils.h b/src/libtracker-fts/tracker-parser-utils.h
index 2e7a2c6..9c007bd 100644
--- a/src/libtracker-fts/tracker-parser-utils.h
+++ b/src/libtracker-fts/tracker-parser-utils.h
@@ -45,6 +45,10 @@ gchar *tracker_parser_unaccent_UChar_word (const UChar *string,
 #endif
 
 
+gboolean tracker_parser_is_reserved_word_utf8 (const gchar *word,
+                                               gsize word_length);
+
+
 /* Define to 1 if you want to enable debugging logs showing HEX contents
  * of the words being parsed */
 #define TRACKER_PARSER_DEBUG_HEX 0



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]