[tracker] Fix buffer overrun in libunistring builds



commit 1714a4c196374a661893944aa795cd0bb116799c
Author: Marius Gedminas <marius gedmin as>
Date:   Fri Oct 23 12:46:12 2015 +0300

    Fix buffer overrun in libunistring builds
    
    libunistring uses UTF-8 strings without a trailing NUL byte.  We're
    passing such strings to tracker_parser_unaccent_nfkd_string() from
    function_sparql_unaccent() in the sqlite interface.  If the string has
    no accented characters, writing a NUL byte at the end will step out of
    bounds.  This causes memory corruption and crashes.
    
    The other caller of tracker_parser_unaccent_nfkd_string() is
    process_word_utf8(), and it looks like it wants a trailing NUL, so let's
    add it there.
    
    There are no more callers of the libunistring version of
    tracker_parser_unaccent_nfkd_string().
    
    (For extra confusion, the libicu version of
    tracker_parser_unaccent_nfkd_string() deals with U+0000-terminated
    UTF-16 strings.)
    
    Should fix https://bugzilla.gnome.org/show_bug.cgi?id=746195

 .../tracker-parser-libunistring.c                  |   13 +++++--------
 1 files changed, 5 insertions(+), 8 deletions(-)
---
diff --git a/src/libtracker-common/tracker-parser-libunistring.c 
b/src/libtracker-common/tracker-parser-libunistring.c
index 9de6e46..d24c5f1 100644
--- a/src/libtracker-common/tracker-parser-libunistring.c
+++ b/src/libtracker-common/tracker-parser-libunistring.c
@@ -157,7 +157,8 @@ get_word_info (TrackerParser         *parser,
 }
 
 /* The input word in this method MUST be normalized in NFKD form,
- * and given in UTF-8, where str_length is the byte-length */
+ * and given in UTF-8, where str_length is the byte-length
+ * (note: there is no trailing NUL character!) */
 gboolean
 tracker_parser_unaccent_nfkd_string (gpointer  str,
                                      gsize    *str_length)
@@ -169,7 +170,6 @@ tracker_parser_unaccent_nfkd_string (gpointer  str,
 
        g_return_val_if_fail (str != NULL, FALSE);
        g_return_val_if_fail (str_length != NULL, FALSE);
-       g_return_val_if_fail (*str_length > 0, FALSE);
 
        word = (gchar *)str;
        word_length = *str_length;
@@ -209,9 +209,6 @@ tracker_parser_unaccent_nfkd_string (gpointer  str,
                j += utf8_len;
        }
 
-       /* Force proper string end */
-       word[j] = '\0';
-
        /* Set new output length */
        *str_length = j;
 
@@ -289,9 +286,6 @@ process_word_utf8 (TrackerParser         *parser,
                                            normalized, new_word_length);
        }
 
-       /* Set output NIL */
-       normalized[new_word_length] = '\0';
-
        /* UNAC stripping needed? (for non-CJK and non-ASCII) */
        if (parser->enable_unaccent &&
            type == TRACKER_PARSER_WORD_TYPE_OTHER_UNAC &&
@@ -301,6 +295,9 @@ process_word_utf8 (TrackerParser         *parser,
                                            normalized, new_word_length);
        }
 
+       /* Set output NIL */
+       normalized[new_word_length] = '\0';
+
        /* Check if stop word */
        if (parser->ignore_stop_words) {
                *stop_word = tracker_language_is_stop_word (parser->language,


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]