[glib] Optimized the overlong sequence check in g_utf8_get_char_extended()
- From: Behdad Esfahbod <behdad src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [glib] Optimized the overlong sequence check in g_utf8_get_char_extended()
- Date: Thu, 27 May 2010 18:00:27 +0000 (UTC)
commit c3bc0f4f8fc5125a732f383671ee5bff939423d2
Author: Mikhail Zabaluev <mikhail zabaluev gmail com>
Date: Thu May 27 14:00:12 2010 -0400
Optimized the overlong sequence check in g_utf8_get_char_extended()
Rather make it branch to get the due sequence length for the resulting
character code, we can as well get the minimum code value in the initial
branching.
glib/gutf8.c | 26 +++++++++++++++++---------
1 files changed, 17 insertions(+), 9 deletions(-)
---
diff --git a/glib/gutf8.c b/glib/gutf8.c
index 27b1e4c..d0badbb 100644
--- a/glib/gutf8.c
+++ b/glib/gutf8.c
@@ -704,20 +704,23 @@ g_utf8_strrchr (const char *p,
/* Like g_utf8_get_char, but take a maximum length
- * and return (gunichar)-2 on incomplete trailing character
+ * and return (gunichar)-2 on incomplete trailing character;
+ * also check for malformed or overlong sequences
+ * and return (gunichar)-1 in this case.
*/
static inline gunichar
g_utf8_get_char_extended (const gchar *p,
- gssize max_len)
+ gssize max_len)
{
guint i, len;
+ gunichar min_code;
gunichar wc = (guchar) *p;
if (wc < 0x80)
{
return wc;
}
- else if (wc < 0xc0)
+ else if (G_UNLIKELY (wc < 0xc0))
{
return (gunichar)-1;
}
@@ -725,33 +728,38 @@ g_utf8_get_char_extended (const gchar *p,
{
len = 2;
wc &= 0x1f;
+ min_code = 1 << 7;
}
else if (wc < 0xf0)
{
len = 3;
wc &= 0x0f;
+ min_code = 1 << 11;
}
else if (wc < 0xf8)
{
len = 4;
wc &= 0x07;
+ min_code = 1 << 16;
}
else if (wc < 0xfc)
{
len = 5;
wc &= 0x03;
+ min_code = 1 << 21;
}
else if (wc < 0xfe)
{
len = 6;
wc &= 0x01;
+ min_code = 1 << 26;
}
else
{
return (gunichar)-1;
}
-
- if (max_len >= 0 && len > max_len)
+
+ if (G_UNLIKELY (max_len >= 0 && len > max_len))
{
for (i = 1; i < max_len; i++)
{
@@ -764,8 +772,8 @@ g_utf8_get_char_extended (const gchar *p,
for (i = 1; i < len; ++i)
{
gunichar ch = ((guchar *)p)[i];
-
- if ((ch & 0xc0) != 0x80)
+
+ if (G_UNLIKELY ((ch & 0xc0) != 0x80))
{
if (ch)
return (gunichar)-1;
@@ -777,9 +785,9 @@ g_utf8_get_char_extended (const gchar *p,
wc |= (ch & 0x3f);
}
- if (UTF8_LENGTH(wc) != len)
+ if (G_UNLIKELY (wc < min_code))
return (gunichar)-1;
-
+
return wc;
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]