[tracker/parser-unicode-libs-review] Cleanup parsing reserved words in libicu/libunistring parsers
- From: Aleksander Morgado <aleksm src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker/parser-unicode-libs-review] Cleanup parsing reserved words in libicu/libunistring parsers
- Date: Fri, 7 May 2010 12:01:23 +0000 (UTC)
commit 6905b6bc52a94adaaf2e5ac3a28c0480f1dee954
Author: Aleksander Morgado <aleksander lanedo com>
Date: Fri May 7 11:42:46 2010 +0200
Cleanup parsing reserved words in libicu/libunistring parsers
src/libtracker-fts/tracker-parser-libicu.c | 8 ++---
src/libtracker-fts/tracker-parser-libunistring.c | 7 ++--
src/libtracker-fts/tracker-parser-utils.c | 34 ++++++++++++++++++++++
src/libtracker-fts/tracker-parser-utils.h | 4 ++
4 files changed, 44 insertions(+), 9 deletions(-)
---
diff --git a/src/libtracker-fts/tracker-parser-libicu.c b/src/libtracker-fts/tracker-parser-libicu.c
index cf15ffa..0a280f5 100644
--- a/src/libtracker-fts/tracker-parser-libicu.c
+++ b/src/libtracker-fts/tracker-parser-libicu.c
@@ -222,12 +222,10 @@ parser_next (TrackerParser *parser,
continue;
}
- /* check if word is reserved (looking at ORIGINAL UTF-8 buffer
- * here! */
+ /* check if word is reserved (looking at ORIGINAL UTF-8 buffer here! */
if (parser->parse_reserved_words &&
- word_length_utf8 == 2 &&
- parser->txt[current_word_offset_utf8] == 'o' &&
- parser->txt[current_word_offset_utf8 + 1] == 'r') {
+ tracker_parser_is_reserved_word_utf8 (&parser->txt[current_word_offset_utf8],
+ word_length_utf8)) {
/* Skip this word and keep on looping */
parser->cursor = next_word_offset_uchar;
continue;
diff --git a/src/libtracker-fts/tracker-parser-libunistring.c b/src/libtracker-fts/tracker-parser-libunistring.c
index eb308b3..7f6fc6b 100644
--- a/src/libtracker-fts/tracker-parser-libunistring.c
+++ b/src/libtracker-fts/tracker-parser-libunistring.c
@@ -189,11 +189,10 @@ parser_next (TrackerParser *parser,
continue;
}
- /* check if word is reserved */
+ /* check if word is reserved and skip it if so */
if (parser->parse_reserved_words &&
- word_length == 2 &&
- parser->txt[parser->cursor] == 'o' &&
- parser->txt[parser->cursor + 1] == 'r') {
+ tracker_parser_is_reserved_word_utf8 (&parser->txt[parser->cursor],
+ word_length)) {
/* Skip this word and keep on looping */
parser->cursor += word_length;
continue;
diff --git a/src/libtracker-fts/tracker-parser-utils.c b/src/libtracker-fts/tracker-parser-utils.c
index 222b4a1..e6c8521 100644
--- a/src/libtracker-fts/tracker-parser-utils.c
+++ b/src/libtracker-fts/tracker-parser-utils.c
@@ -169,6 +169,40 @@ tracker_parser_unaccent_utf8_word (const gchar *str,
}
+/*
+ * Definition of the possible reserved words.
+ * Length of word is explicitly given to avoid strlen() calls
+ */
+typedef struct {
+ const gchar *word;
+ gsize word_length;
+} TrackerParserReservedWord;
+
+static const TrackerParserReservedWord reserved_words[] = {
+ { "or", 2 },
+ { NULL, 0 }
+};
+
+gboolean
+tracker_parser_is_reserved_word_utf8 (const gchar *word,
+ gsize word_length)
+{
+ gint i = 0;
+
+ /* Loop the array of predefined reserved words */
+ while (reserved_words[i].word != NULL) {
+ if (word_length == reserved_words[i].word_length &&
+ strncmp (word,
+ reserved_words[i].word,
+ word_length) == 0) {
+ return TRUE;
+ }
+ i++;
+ }
+
+ return FALSE;
+}
+
#if TRACKER_PARSER_DEBUG_HEX
void
diff --git a/src/libtracker-fts/tracker-parser-utils.h b/src/libtracker-fts/tracker-parser-utils.h
index 2e7a2c6..9c007bd 100644
--- a/src/libtracker-fts/tracker-parser-utils.h
+++ b/src/libtracker-fts/tracker-parser-utils.h
@@ -45,6 +45,10 @@ gchar *tracker_parser_unaccent_UChar_word (const UChar *string,
#endif
+gboolean tracker_parser_is_reserved_word_utf8 (const gchar *word,
+ gsize word_length);
+
+
/* Define to 1 if you want to enable debugging logs showing HEX contents
* of the words being parsed */
#define TRACKER_PARSER_DEBUG_HEX 0
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]