[dasher: 43/43] Fix SymbolStream handling of bad UTF characters (in peekBack())
- From: Patrick Welche <pwelche src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [dasher: 43/43] Fix SymbolStream handling of bad UTF characters (in peekBack())
- Date: Thu, 23 Jun 2011 18:59:22 +0000 (UTC)
commit 971713d61e27cc7fc113e5bc912a69acccf79429
Author: Alan Lawrence <acl33 inf phy cam ac uk>
Date: Thu Jun 23 10:09:59 2011 +0100
Fix SymbolStream handling of bad UTF characters (in peekBack())
Src/DasherCore/Alphabet/AlphabetMap.cpp | 13 ++++++++++++-
1 files changed, 12 insertions(+), 1 deletions(-)
---
diff --git a/Src/DasherCore/Alphabet/AlphabetMap.cpp b/Src/DasherCore/Alphabet/AlphabetMap.cpp
index 9c3e82c..5245fb6 100644
--- a/Src/DasherCore/Alphabet/AlphabetMap.cpp
+++ b/Src/DasherCore/Alphabet/AlphabetMap.cpp
@@ -140,17 +140,28 @@ string CAlphabetMap::SymbolStream::peekAhead() {
}
string CAlphabetMap::SymbolStream::peekBack() {
+ bool bSeenHighBit=false;
for(int i=pos-1; i>=0; i--) {
if (buf[i] & 0x80) {
//multibyte character...
+ bSeenHighBit=true;
if (buf[i] & 0x40) {
//START of multibyte character
int numChars = m_utf8_count_array[buf[i]];
+ if (i+numChars>pos) {
+ //last (attempt to read a) symbol was an incomplete UTF8 character (!).
+ // We'll have reported an error already when we saw it the first time, so for now just:
+ return "";
+ }
DASHER_ASSERT(i+numChars==pos);
return string(&buf[i],numChars);
}
//in middle of multibyte, keep going back...
- } else return string(&buf[i],1); //high bit not set -> single-byte char
+ } else {
+ //high bit not set -> single-byte char
+ if (bSeenHighBit) return ""; //followed by a "continuation of multibyte char" without a "first byte of multibyte char" before it. (Malformed!)
+ return string(&buf[i],1);
+ }
}
//fail...relatively gracefully ;-)
return "";
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]