[glib] Made g_utf8_to_ucs4_fast() even faster
- From: Matthias Clasen <matthiasc src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [glib] Made g_utf8_to_ucs4_fast() even faster
- Date: Sat, 4 Jun 2011 02:43:32 +0000 (UTC)
commit 69efeee287f752befdcaae94f5ec981b33be6377
Author: Mikhail Zabaluev <mikhail zabaluev gmail com>
Date: Wed Mar 17 02:34:51 2010 +0200
Made g_utf8_to_ucs4_fast() even faster
https://bugzilla.gnome.org/show_bug.cgi?id=619435
glib/gutf8.c | 45 ++++++++++++++++-----------------------------
1 files changed, 16 insertions(+), 29 deletions(-)
---
diff --git a/glib/gutf8.c b/glib/gutf8.c
index ed245dc..46f71b1 100644
--- a/glib/gutf8.c
+++ b/glib/gutf8.c
@@ -851,7 +851,6 @@ g_utf8_to_ucs4_fast (const gchar *str,
glong len,
glong *items_written)
{
- gint j, charlen;
gunichar *result;
gint n_chars, i;
const gchar *p;
@@ -882,49 +881,37 @@ g_utf8_to_ucs4_fast (const gchar *str,
p = str;
for (i=0; i < n_chars; i++)
{
- gunichar wc = ((unsigned char *)p)[0];
+ gunichar wc = (guchar)*p++;
if (wc < 0x80)
{
result[i] = wc;
- p++;
}
else
{
- if (wc < 0xe0)
- {
- charlen = 2;
- wc &= 0x1f;
- }
- else if (wc < 0xf0)
- {
- charlen = 3;
- wc &= 0x0f;
- }
- else if (wc < 0xf8)
- {
- charlen = 4;
- wc &= 0x07;
- }
- else if (wc < 0xfc)
- {
- charlen = 5;
- wc &= 0x03;
- }
- else
+ gunichar mask = 0x40;
+
+ if (G_UNLIKELY ((wc & mask) == 0))
{
- charlen = 6;
- wc &= 0x01;
+ /* It's an out-of-sequence 10xxxxxxx byte.
+ * Rather than making an ugly hash of this and the next byte
+ * and overrunning the buffer, it's more useful to treat it
+ * with a replacement character */
+ result[i] = 0xfffd;
+ continue;
}
- for (j = 1; j < charlen; j++)
+ do
{
wc <<= 6;
- wc |= ((unsigned char *)p)[j] & 0x3f;
+ wc |= (guchar)(*p++) & 0x3f;
+ mask <<= 5;
}
+ while((wc & mask) != 0);
+
+ wc &= mask - 1;
result[i] = wc;
- p += charlen;
}
}
result[i] = 0;
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]