=?UTF-8?B?Z191dGY4X3N0cmxjcHk=?=

From: "PHILIP PAGE, BLOOMBERG/ 731 LEXIN" <ppage bloomberg net>
To: GTK-DEVEL-LIST GNOME ORG
Subject: g_utf8_strlcpy
Date: 29 Oct 2008 10:24:09 -0400

I see that http://bugzilla.gnome.org/show_bug.cgi?id=520116 has been entered to develop a utf8 version of strlcpy. Has anyone done this? 
   Below is a proposed implementation and some test cases.

Philip Page
Bloomberg LP

/**
 * g_utf8_strlcpy:
 * @dest: buffer to fill with characters from @src
 * @src: UTF-8 encoded string
 * @n: size of @dest
 *
 * Like the BSD-standard strlcpy() function, but
 * is careful not to truncate in the middle of a character.
 * The @src string must be valid UTF-8 encoded text.
 * (Use g_utf8_validate() on all text before trying to use UTF-8
 * utility functions with it.)
 *
 * Return value: strlen(src)
 **/
size_t
g_utf8_strlcpy (
    char *      dest,
    const char *src,
    size_t      n)
{
  register const gchar *s = src;
  while (s - src < n  &&  *s)
    {
      s = g_utf8_next_char(s);
    }
  if (s - src >= n)
    {
      /* We need to truncate; back up one. */
      s = g_utf8_prev_char(s);
      strncpy(dest, src, s - src);
      dest[s - src] = '\0';
      /* Find the full length for return value. */
      while (*s)
        {
          s = g_utf8_next_char(s);
        }
    }
  else
    {
      /* Plenty of room, just copy */
      strncpy(dest, src, s - src);
      dest[s - src] = '\0';
    }

  return s - src;
}


int main (int argc, char * argv[])
{
    char s1[] = "abcd";
    char s2[] = "\xE2\x82\xAC";
    char s3[] = "a\xE2\x82\xAC";
    char s4[] = "\xE2\x82\xACz";

    char dest[64];
    int ret;

    ret = g_utf8_strlcpy(dest, "", 64);
    assert(ret == 0);
    assert(strlen(dest) == 0);

    ret = g_utf8_strlcpy(dest, "abcd", 64);
    assert(ret == 4);
    assert(strlen(dest) == 4);

    ret = g_utf8_strlcpy(dest, "abcd", 2);
    assert(ret == 4);
    assert(strlen(dest) == 1);


    ret = g_utf8_strlcpy(dest, "\xE2\x82\xAC", 64);
    assert(ret == 3);
    assert(strlen(dest) == 3);

    ret = g_utf8_strlcpy(dest, "\xE2\x82\xAC", 2);
    assert(ret == 3);
    assert(strlen(dest) == 0);



    ret = g_utf8_strlcpy(dest, "a\xE2\x82\xAC", 64);
    assert(ret == 4);
    assert(strlen(dest) == 4);

    ret = g_utf8_strlcpy(dest, "a\xE2\x82\xAC", 1);
    assert(ret == 4);
    assert(strlen(dest) == 0);

    ret = g_utf8_strlcpy(dest, "a\xE2\x82\xAC", 2);
    assert(ret == 4);
    assert(strlen(dest) == 1);

    ret = g_utf8_strlcpy(dest, "a\xE2\x82\xAC", 4);
    assert(ret == 4);
    assert(strlen(dest) == 1);


    ret = g_utf8_strlcpy(dest, "\xE2\x82\xACz", 64);
    assert(ret == 4);
    assert(strlen(dest) == 4);

    ret = g_utf8_strlcpy(dest, "\xE2\x82\xACz", 4);
    assert(ret == 4);
    assert(strlen(dest) == 3);

    ret = g_utf8_strlcpy(dest, "\xE2\x82\xACz", 4);
    assert(ret == 4);
    assert(strlen(dest) == 3);
}

Follow-Ups:
- Re: g_utf8_strlcpy
  - From: Christian Dywan

[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]