g_utf8_validate patch



This makes it reject overlong sequences, surrogates and invalid code
points.

-- 
Robert
Index: ChangeLog
===================================================================
RCS file: /cvs/gnome/glib/ChangeLog,v
retrieving revision 1.523
diff -u -r1.523 ChangeLog
--- ChangeLog	2000/11/05 18:38:08	1.523
+++ ChangeLog	2000/11/07 18:29:59
@@ -1,3 +1,8 @@
+2000-11-07  Robert Brady <robert suse co uk>
+
+	* gutf8.c (g_utf8_validate): Reject overlong sequences,
+	surrogates, and U+FFFE and U+FFFF.
+
 2000-11-05  Havoc Pennington  <hp pobox com>
 
 	* gmarkup.h: rename G_MARKUP_FOO to
Index: gutf8.c
===================================================================
RCS file: /cvs/gnome/glib/gutf8.c,v
retrieving revision 1.7
diff -u -r1.7 gutf8.c
--- gutf8.c	2000/09/18 14:55:24	1.7
+++ gutf8.c	2000/11/07 18:30:00
@@ -86,6 +86,9 @@
   3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,0,0
 };
 
+static int min_ucs_for_len[] = 
+{ 0, 0, 0x80, 0x800, 0x10000, 0x002000000, 0x040000000 };
+
 /**
  * g_utf8_find_prev_char:
  * @str: pointer to the beginning of a UTF-8 string
@@ -581,12 +584,22 @@
         
       UTF8_GET (result, p, i, mask, len);
 
-      if (result == (gunichar)-1)
+      if (result == (gunichar)-1 ||
+	  (result >= 0xd800 &&
+	   result <= 0xdfff) ||
+	  result == 0xfffe ||
+	  result == 0xffff)
         {
           retval = FALSE;
           break;
         }
       
+      if (result < min_ucs_for_len[len])
+	{
+	  retval = FALSE;
+	  break;
+	}
+
       p += len;
     }
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]