=?UTF-8?B?Z191dGY4X3N0cmxjcHk=?=
- From: "PHILIP PAGE, BLOOMBERG/ 731 LEXIN" <ppage bloomberg net>
- To: GTK-DEVEL-LIST GNOME ORG
- Subject: g_utf8_strlcpy
- Date: 29 Oct 2008 10:24:09 -0400
I see that http://bugzilla.gnome.org/show_bug.cgi?id=520116 has been entered to develop a utf8 version of strlcpy. Has anyone done this? 
   Below is a proposed implementation and some test cases.
Philip Page
Bloomberg LP
/**
 * g_utf8_strlcpy:
 * @dest: buffer to fill with characters from @src
 * @src: UTF-8 encoded string
 * @n: size of @dest
 *
 * Like the BSD-standard strlcpy() function, but
 * is careful not to truncate in the middle of a character.
 * The @src string must be valid UTF-8 encoded text.
 * (Use g_utf8_validate() on all text before trying to use UTF-8
 * utility functions with it.)
 *
 * Return value: strlen(src)
 **/
size_t
g_utf8_strlcpy (
    char *      dest,
    const char *src,
    size_t      n)
{
  register const gchar *s = src;
  while (s - src < n  &&  *s)
    {
      s = g_utf8_next_char(s);
    }
  if (s - src >= n)
    {
      /* We need to truncate; back up one. */
      s = g_utf8_prev_char(s);
      strncpy(dest, src, s - src);
      dest[s - src] = '\0';
      /* Find the full length for return value. */
      while (*s)
        {
          s = g_utf8_next_char(s);
        }
    }
  else
    {
      /* Plenty of room, just copy */
      strncpy(dest, src, s - src);
      dest[s - src] = '\0';
    }
  return s - src;
}
int main (int argc, char * argv[])
{
    char s1[] = "abcd";
    char s2[] = "\xE2\x82\xAC";
    char s3[] = "a\xE2\x82\xAC";
    char s4[] = "\xE2\x82\xACz";
    char dest[64];
    int ret;
    ret = g_utf8_strlcpy(dest, "", 64);
    assert(ret == 0);
    assert(strlen(dest) == 0);
    ret = g_utf8_strlcpy(dest, "abcd", 64);
    assert(ret == 4);
    assert(strlen(dest) == 4);
    ret = g_utf8_strlcpy(dest, "abcd", 2);
    assert(ret == 4);
    assert(strlen(dest) == 1);
    ret = g_utf8_strlcpy(dest, "\xE2\x82\xAC", 64);
    assert(ret == 3);
    assert(strlen(dest) == 3);
    ret = g_utf8_strlcpy(dest, "\xE2\x82\xAC", 2);
    assert(ret == 3);
    assert(strlen(dest) == 0);
    ret = g_utf8_strlcpy(dest, "a\xE2\x82\xAC", 64);
    assert(ret == 4);
    assert(strlen(dest) == 4);
    ret = g_utf8_strlcpy(dest, "a\xE2\x82\xAC", 1);
    assert(ret == 4);
    assert(strlen(dest) == 0);
    ret = g_utf8_strlcpy(dest, "a\xE2\x82\xAC", 2);
    assert(ret == 4);
    assert(strlen(dest) == 1);
    ret = g_utf8_strlcpy(dest, "a\xE2\x82\xAC", 4);
    assert(ret == 4);
    assert(strlen(dest) == 1);
    ret = g_utf8_strlcpy(dest, "\xE2\x82\xACz", 64);
    assert(ret == 4);
    assert(strlen(dest) == 4);
    ret = g_utf8_strlcpy(dest, "\xE2\x82\xACz", 4);
    assert(ret == 4);
    assert(strlen(dest) == 3);
    ret = g_utf8_strlcpy(dest, "\xE2\x82\xACz", 4);
    assert(ret == 4);
    assert(strlen(dest) == 3);
}
[
Date Prev][
Date Next]   [
Thread Prev][
Thread Next]   
[
Thread Index]
[
Date Index]
[
Author Index]