r3954 - in trunk/birnet: . tests
- From: timj svn gnome org
- To: svn-commits-list gnome org
- Subject: r3954 - in trunk/birnet: . tests
- Date: Tue, 10 Oct 2006 17:25:49 -0400 (EDT)
Author: timj
Date: 2006-10-10 17:25:27 -0400 (Tue, 10 Oct 2006)
New Revision: 3954
Modified:
trunk/birnet/ChangeLog
trunk/birnet/birnetutf8.cc
trunk/birnet/birnetutf8.hh
trunk/birnet/tests/strings.cc
Log:
Tue Oct 10 23:23:11 2006 Tim Janik <timj gtk org>
* birnetutf8.hh, birnetutf8.cc: added Unichar::isvalid().
changed arguemnt order of utf8_find_prev() to match that of
g_utf8_find_prev_char(), adapted test case. added utf8_validate(),
wrapping g_utf8_validate().
* tests/strings.cc: adapt to utf8_find_prev(). added Unichar::isvalid()
test and utf8_validate() test.
Modified: trunk/birnet/ChangeLog
===================================================================
--- trunk/birnet/ChangeLog 2006-10-09 23:35:14 UTC (rev 3953)
+++ trunk/birnet/ChangeLog 2006-10-10 21:25:27 UTC (rev 3954)
@@ -1,3 +1,13 @@
+Tue Oct 10 23:23:11 2006 Tim Janik <timj gtk org>
+
+ * birnetutf8.hh, birnetutf8.cc: added Unichar::isvalid().
+ changed arguemnt order of utf8_find_prev() to match that of
+ g_utf8_find_prev_char(), adapted test case. added utf8_validate(),
+ wrapping g_utf8_validate().
+
+ * tests/strings.cc: adapt to utf8_find_prev(). added Unichar::isvalid()
+ test and utf8_validate() test.
+
Tue Oct 10 01:34:05 2006 Tim Janik <timj gtk org>
* birnetcdefs.h: provide extra prototype for extern inline functions
Modified: trunk/birnet/birnetutf8.cc
===================================================================
--- trunk/birnet/birnetutf8.cc 2006-10-09 23:35:14 UTC (rev 3953)
+++ trunk/birnet/birnetutf8.cc 2006-10-10 21:25:27 UTC (rev 3954)
@@ -311,4 +311,17 @@
return l;
}
+bool
+utf8_validate (const String &strng,
+ int *bound)
+{
+ const char *c = &strng[0];
+ size_t l = strng.size();
+ const gchar *end = NULL;
+ gboolean gb = g_utf8_validate (c, l, &end);
+ if (bound)
+ *bound = !gb ? end - c : -1;
+ return gb != false;
+}
+
} // Birnet
Modified: trunk/birnet/birnetutf8.hh
===================================================================
--- trunk/birnet/birnetutf8.hh 2006-10-09 23:35:14 UTC (rev 3953)
+++ trunk/birnet/birnetutf8.hh 2006-10-10 21:25:27 UTC (rev 3954)
@@ -24,26 +24,27 @@
namespace Birnet {
namespace Unichar {
-bool isalnum (unichar uc) BIRNET_CONST;
-bool isalpha (unichar uc) BIRNET_CONST;
-bool iscntrl (unichar uc) BIRNET_CONST;
-bool isdigit (unichar uc) BIRNET_CONST;
-int digit_value (unichar uc) BIRNET_CONST;
-bool isgraph (unichar uc) BIRNET_CONST;
-bool islower (unichar uc) BIRNET_CONST;
-unichar tolower (unichar uc) BIRNET_CONST;
-bool isprint (unichar uc) BIRNET_CONST;
-bool ispunct (unichar uc) BIRNET_CONST;
-bool isspace (unichar uc) BIRNET_CONST;
-bool isupper (unichar uc) BIRNET_CONST;
-unichar toupper (unichar uc) BIRNET_CONST;
-bool isxdigit (unichar uc) BIRNET_CONST;
-int xdigit_value (unichar uc) BIRNET_CONST;
-bool istitle (unichar uc) BIRNET_CONST;
-unichar totitle (unichar uc) BIRNET_CONST;
-bool isdefined (unichar uc) BIRNET_CONST;
-bool iswide (unichar uc) BIRNET_CONST;
-bool iswide_cjk (unichar uc) BIRNET_CONST;
+inline bool isvalid (unichar uc) BIRNET_CONST;
+bool isalnum (unichar uc) BIRNET_CONST;
+bool isalpha (unichar uc) BIRNET_CONST;
+bool iscntrl (unichar uc) BIRNET_CONST;
+bool isdigit (unichar uc) BIRNET_CONST;
+int digit_value (unichar uc) BIRNET_CONST;
+bool isgraph (unichar uc) BIRNET_CONST;
+bool islower (unichar uc) BIRNET_CONST;
+unichar tolower (unichar uc) BIRNET_CONST;
+bool isprint (unichar uc) BIRNET_CONST;
+bool ispunct (unichar uc) BIRNET_CONST;
+bool isspace (unichar uc) BIRNET_CONST;
+bool isupper (unichar uc) BIRNET_CONST;
+unichar toupper (unichar uc) BIRNET_CONST;
+bool isxdigit (unichar uc) BIRNET_CONST;
+int xdigit_value (unichar uc) BIRNET_CONST;
+bool istitle (unichar uc) BIRNET_CONST;
+unichar totitle (unichar uc) BIRNET_CONST;
+bool isdefined (unichar uc) BIRNET_CONST;
+bool iswide (unichar uc) BIRNET_CONST;
+bool iswide_cjk (unichar uc) BIRNET_CONST;
typedef enum {
CONTROL, FORMAT, UNASSIGNED,
PRIVATE_USE, SURROGATE, LOWERCASE_LETTER,
@@ -82,17 +83,35 @@
inline char* utf8_prev (char *c);
inline const char* utf8_find_next (const char *c,
const char *bound = NULL);
-inline char* utf8_find_next (char *c,
+inline char* utf8_find_next (char *current,
const char *bound = NULL);
-inline const char* utf8_find_prev (const char *c,
- const char *start = NULL);
-inline char* utf8_find_prev (char *c,
- const char *start = NULL);
+inline const char* utf8_find_prev (const char *start,
+ const char *current);
+inline char* utf8_find_prev (const char *start,
+ char *currrent);
unichar utf8_to_unichar (const char *str);
int utf8_from_unichar (unichar uc,
char str[8]);
+bool utf8_validate (const String &string,
+ int *bound = NULL);
/* --- implementation bits --- */
+namespace Unichar {
+inline bool
+isvalid (unichar uc)
+{
+ if (BIRNET_UNLIKELY (uc > 0xfdcf && uc < 0xfdf0))
+ return false;
+ if (BIRNET_UNLIKELY ((uc & 0xfffe) == 0xfffe))
+ return false;
+ if (BIRNET_UNLIKELY (uc > 0x10ffff))
+ return false;
+ if (BIRNET_UNLIKELY ((uc & 0xfffff800) == 0xd800))
+ return false;
+ return true;
+}
+} // Unichar
+
extern const int8 utf8_skip_table[256];
inline const char*
@@ -144,20 +163,20 @@
}
inline const char*
-utf8_find_prev (const char *c,
- const char *start)
+utf8_find_prev (const char *start,
+ const char *current)
{
do
- c--;
- while (c >= start && (*c & 0xc0) == 0x80);
- return !start || c >= start ? c : NULL;
+ current--;
+ while (current >= start && (*current & 0xc0) == 0x80);
+ return current >= start ? current : NULL;
}
inline char*
-utf8_find_prev (char *c,
- const char *start)
+utf8_find_prev (const char *start,
+ char *current)
{
- return const_cast<char*> (utf8_find_prev (const_cast<const char*> (c), start));
+ return const_cast<char*> (utf8_find_prev (start, const_cast<const char*> (current)));
}
Modified: trunk/birnet/tests/strings.cc
===================================================================
--- trunk/birnet/tests/strings.cc 2006-10-09 23:35:14 UTC (rev 3953)
+++ trunk/birnet/tests/strings.cc 2006-10-10 21:25:27 UTC (rev 3954)
@@ -61,7 +61,7 @@
TCHECK (pn == cur + 1);
gn = g_utf8_find_next_char (cur, NULL);
TCHECK (pn == gn);
- pp = utf8_find_prev (pn, cbuffer);
+ pp = utf8_find_prev (cbuffer, pn);
TCHECK (pp == cur);
/* random unichar */
cur = pn;
@@ -69,7 +69,7 @@
TCHECK (pn == cur + l);
gn = g_utf8_find_next_char (cur, NULL);
TCHECK (pn == gn);
- pp = utf8_find_prev (pn, cbuffer);
+ pp = utf8_find_prev (cbuffer, pn);
TCHECK (pp == cur);
/* y */
cur = pn;
@@ -77,7 +77,7 @@
TCHECK (pn == cur + 1);
gn = g_utf8_find_next_char (cur, NULL);
TCHECK (pn == gn);
- pp = utf8_find_prev (pn, cbuffer);
+ pp = utf8_find_prev (cbuffer, pn);
TCHECK (pp == cur);
/* 7 (last) */
cur = pn;
@@ -85,7 +85,7 @@
TCHECK (pn == cur + 1);
gn = g_utf8_find_next_char (cur, NULL);
TCHECK (pn == gn);
- pp = utf8_find_prev (pn, cbuffer);
+ pp = utf8_find_prev (cbuffer, pn);
TCHECK (pp == cur);
/* last with bounds */
pn = utf8_find_next (cur, cur + strlen (cur));
@@ -95,6 +95,20 @@
/* first with bounds */
pp = utf8_find_prev (cbuffer, cbuffer);
TCHECK (pp == NULL);
+
+ /* validate valid UTF-8 */
+ bool bb = utf8_validate (cbuffer);
+ bool gb = g_utf8_validate (cbuffer, -1, NULL);
+ TCHECK (bb == gb);
+ /* validate invalid UTF-8 */
+ cbuffer[rand() % (l + 3)] = rand();
+ const char *gp;
+ int indx;
+ bb = utf8_validate (cbuffer, &indx);
+ gb = g_utf8_validate (cbuffer, -1, &gp);
+ TCHECK (bb == gb);
+ if (!bb)
+ TCHECK (cbuffer + indx == gp);
}
TDONE();
}
@@ -114,6 +128,9 @@
if (i % 20000 == 0)
TOK();
+ bb = Unichar::isvalid (uc);
+ gb = g_unichar_validate (uc);
+ TCHECK (bb == gb);
bb = Unichar::isalnum (uc);
gb = g_unichar_isalnum (uc);
TCHECK (bb == gb);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]