[gimp] Last part of fixes for handling UTF-8 coded strings (Bugs 572865 & 628893)
- From: Kevin Cozens <kcozens src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gimp] Last part of fixes for handling UTF-8 coded strings (Bugs 572865 & 628893)
- Date: Sun, 19 Sep 2010 21:48:35 +0000 (UTC)
commit ae6670ba8b20f03a7d12d5eba57c5be3b9dcb1db
Author: Kevin Cozens <kcozens cvs gnome org>
Date: Sun Sep 19 17:46:41 2010 -0400
Last part of fixes for handling UTF-8 coded strings (Bugs 572865 & 628893)
At the end of a double quoted string, readstrexp() was passing byte count
instead of character count to mk_counted_string(). Cleaned up basic_inchar().
It ignores bad UTF-8 coded characters when reading from file or buffer.
plug-ins/script-fu/tinyscheme/scheme.c | 100 ++++++++++++++++----------------
1 files changed, 50 insertions(+), 50 deletions(-)
---
diff --git a/plug-ins/script-fu/tinyscheme/scheme.c b/plug-ins/script-fu/tinyscheme/scheme.c
index 3cd8d40..cd6769c 100644
--- a/plug-ins/script-fu/tinyscheme/scheme.c
+++ b/plug-ins/script-fu/tinyscheme/scheme.c
@@ -1043,11 +1043,11 @@ static char *store_string(scheme *sc, int char_cnt,
else
len = q2 - str;
q=(gchar*)sc->malloc(len+1);
- }
- else {
+ } else {
len = g_unichar_to_utf8(fill, utf8);
q=(gchar*)sc->malloc(char_cnt*len+1);
}
+
if(q==0) {
sc->no_memory=1;
return sc->strbuff;
@@ -1565,82 +1565,81 @@ static void port_close(scheme *sc, pointer p, int flag) {
}
}
+/* This routine will ignore byte sequences that are not valid UTF-8 */
static gunichar basic_inchar(port *pt) {
- int len;
-
if(pt->kind & port_file) {
- unsigned char utf8[7];
int c;
- int i;
c = fgetc(pt->rep.stdio.file);
- if (c == EOF) return EOF;
- utf8[0] = c;
while (TRUE)
{
- if (utf8[0] <= 0x7f)
+ if (c == EOF) return EOF;
+
+ if (c <= 0x7f)
+ return (gunichar) c;
+
+ /* Is this byte an invalid lead per RFC-3629? */
+ if (c < 0xc2 || c > 0xf4)
{
- return (gunichar) utf8[0];
+ /* Ignore invalid lead byte and get the next characer */
+ c = fgetc(pt->rep.stdio.file);
}
-
- /* Check for valid lead byte per RFC-3629 */
- if (utf8[0] >= 0xc2 && utf8[0] <= 0xf4)
+ else /* Byte is valid lead */
{
- len = utf8_length[utf8[0] & 0x3F];
+ unsigned char utf8[7];
+ int len;
+ int i;
+
+ utf8[0] = c; /* Save the lead byte */
+
+ len = utf8_length[c & 0x3F];
for (i = 1; i <= len; i++)
{
c = fgetc(pt->rep.stdio.file);
- if (c == EOF) return EOF;
- utf8[i] = c;
- if ((utf8[i] & 0xc0) != 0x80)
- {
+
+ /* Stop reading if this is not a continuation character */
+ if ((c & 0xc0) != 0x80)
break;
- }
+
+ utf8[i] = c;
}
- if (i > len)
+ if (i > len) /* Read the expected number of bytes? */
{
- return g_utf8_get_char ((char *) utf8);
+ return g_utf8_get_char_validated ((char *) utf8,
+ sizeof(utf8));
}
- /* we did not get enough continuation characters. */
- utf8[0] = utf8[i]; /* ignore and restart */
+ /* Not enough continuation characters so ignore and restart */
}
- else
- {
- /* Everything else is invalid and will be ignored */
- c = fgetc(pt->rep.stdio.file);
- if (c == EOF) return EOF;
- utf8[0] = c;
- }
- }
+ } /* end of while (TRUE) */
} else {
- if(*pt->rep.string.curr == 0 ||
- pt->rep.string.curr == pt->rep.string.past_the_end) {
- return EOF;
- } else {
- gunichar c;
+ gunichar c;
+ int len;
+
+ while (TRUE)
+ {
+ /* Found NUL or at end of input buffer? */
+ if (*pt->rep.string.curr == 0 ||
+ pt->rep.string.curr == pt->rep.string.past_the_end) {
+ return EOF;
+ }
len = pt->rep.string.past_the_end - pt->rep.string.curr;
c = g_utf8_get_char_validated(pt->rep.string.curr, len);
- if (c < 0)
+ if (c >= 0) /* Valid UTF-8 character? */
{
- pt->rep.string.curr = g_utf8_find_next_char(pt->rep.string.curr,
- pt->rep.string.past_the_end);
- if (pt->rep.string.curr == NULL)
- pt->rep.string.curr = pt->rep.string.past_the_end;
- c = ' ';
- }
- else
- {
- len = g_unichar_to_utf8(c, NULL);
+ len = g_unichar_to_utf8(c, NULL); /* Length of UTF-8 sequence */
pt->rep.string.curr += len;
+ return c;
}
- return c;
- }
+ /* Look for next valid UTF-8 character in buffer */
+ pt->rep.string.curr = g_utf8_find_next_char(pt->rep.string.curr,
+ pt->rep.string.past_the_end);
+ } /* end of while (TRUE) */
}
}
@@ -1813,7 +1812,8 @@ static pointer readstrexp(scheme *sc) {
break;
case '"':
*p=0;
- return mk_counted_string(sc,sc->strbuff,p-sc->strbuff);
+ return mk_counted_string(sc,sc->strbuff,
+ g_utf8_strlen(sc->strbuff, sizeof(sc->strbuff)));
default:
len = g_unichar_to_utf8(c, p);
p += len;
@@ -2855,7 +2855,7 @@ static pointer opexe_0(scheme *sc, enum scheme_opcodes op) {
s_goto(sc,OP_EVAL);
case OP_DEF1: /* define */
- x=find_slot_in_env(sc,sc->envir,sc->code,0);
+ x=find_slot_in_env(sc,sc->envir,sc->code,0);
if (x != sc->NIL) {
set_slot_in_env(sc, x, sc->value);
} else {
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]