g_convert and g_utf8_validate



Hi,

Two thingies for glib. g_utf8_validate() is supposed to be robust
against a random stream of bytes, so check it over and find my broken
assumptions.

Havoc

gboolean
g_utf8_validate (const gchar  *str,
                 gint          max_len,
                 const gchar **end)
{

  const gchar *p;
  gboolean retval = TRUE;
  
  if (end)
    *end = str;
  
  p = str;
  
  while ((max_len < 0 || (p - str) < max_len) && *p)
    {
      int i, mask = 0, len;
      gunichar result;
      unsigned char c = (unsigned char) *p;
      
      UTF8_COMPUTE (c, mask, len);

      if (len == -1)
        {
          retval = FALSE;
          break;
        }

      /* check that the expected number of bytes exists in str */
      if (max_len >= 0 &&
          ((max_len - (p - str)) < len))
        {
          retval = FALSE;
          break;
        }
        
      UTF8_GET (result, p, i, mask, len);

      if (result == (gunichar)-1)
        {
          retval = FALSE;
          break;
        }
      
      p += len;

      if (end)
        *end = p;
    }
  
  return retval;
}

/* iconv_open() etc. are not thread safe */
G_LOCK_DEFINE_STATIC (iconv_lock);

gchar*
g_convert (const gchar *str,
           gint         len,
           const gchar *to_codeset,
           const gchar *from_codeset,
           gint        *bytes_converted)
{
  gchar *dest;
  gchar *outp;
  const gchar *p;
  size_t inbytes_remaining;
  size_t outbytes_remaining;
  size_t err;
  iconv_t cd;
  size_t outbuf_size;
  
  g_return_val_if_fail (str != NULL, NULL);
  g_return_val_if_fail (to_codeset != NULL, NULL);
  g_return_val_if_fail (from_codeset != NULL, NULL);

  G_LOCK (iconv_lock);
  
  cd = iconv_open (to_codeset, from_codeset);

  if (cd == (iconv_t) -1)
    {
      /* Something went wrong.  */
      if (errno == EINVAL)
        ; /* don't warn; just return NULL with bytes_converted of 0 */
      else
        g_warning ("Failed to convert character set `%s' to `%s': %s",
                   from_codeset, to_codeset, strerror (errno));

      if (bytes_converted)
        *bytes_converted = 0;

      G_UNLOCK (iconv_lock);
      
      return NULL;
    }

  if (len < 0)
    len = strlen (str);

  p = str;
  inbytes_remaining = len;
  outbuf_size = len + 1; /* + 1 for nul in case len == 1 */
  outbytes_remaining = outbuf_size - 1; /* -1 for nul */
  outp = dest = g_malloc (outbuf_size);

 again:
  
  err = iconv (cd, &p, &inbytes_remaining, &outp, &outbytes_remaining);

  if (err == (size_t) -1)
    {
      if (errno == E2BIG)
        {
          size_t used = outp - dest;
          outbuf_size *= 2;
          dest = g_realloc (dest, outbuf_size);

          outp = dest + used;
          outbytes_remaining = outbuf_size - used - 1; /* -1 for nul */

          goto again;
        }
      else
        g_warning ("iconv() failed: %s", strerror (errno));
    }

  *outp = '\0';
  
  if (iconv_close (cd) != 0)
    g_warning ("Failed to close iconv() conversion descriptor: %s",
               strerror (errno));

  if (bytes_converted)
    *bytes_converted = p - str;
  
  G_UNLOCK (iconv_lock);
  
  if (p == str)
    {
      g_free (dest);
      return NULL;
    }
  else
    return dest;
}




[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]