Re: [gnet-devel] Efficient URI parsing (was: GNet HTTP server class)



On Thu, 2007-12-13 at 21:49 -0500, Jeff Garzik wrote:

> FWIW, I've attached the modified version of uri.c that I use in my 
> non-GNet HTTP server, in case somebody finds it interesting or useful. 
> (...) This URI code does all modifications in-place.

I looked into adding some kind of gnet_uri_parse_inplace() that parses
an URI in place and puts the result into a GURI structure on the stack,
with separators in the input string being munged into terminators.
However, I ran into the problem that you can't always turn the '/'
between the hostname and the path into a terminator for the hostname,
because it's also needed as the first character of the path.

I came up with two possible solutions: either just document that the
path might not/won't start with a '/' (eww), or require the caller to
pass in a buffer for the hostname that can be used instead, which would
involve a small memcpy if there's a hostname, but seemed like the
overall cleaner solution to me.  Attached what I came up with
(#ifdef'ed out though; the code in gst_uri_new is just there so I can
run the code through the unit tests).

Haven't decided yet if this is really useful enough to add (ie. is it
good enough that someone would actually use it rather than write a
custom parser?).

> - [field_unescape] includes a micro-optimization to avoid a whole of
>  lot unnecessary in-place modifications, for the common cases (why
>  dirty cachelines needlessly?)

I've committed that, thanks.

 Cheers
  -Tim

Index: uri.c
===================================================================
--- uri.c	(revision 483)
+++ uri.c	(working copy)
@@ -231,6 +231,53 @@ path:
   return TRUE;
 }
 
+#if 0
+static gchar *
+parse_inplace_munge_string_part (StringPart * part)
+{
+  if (part->len == 0)
+    return NULL;
+  ((gchar*)part->str)[part->len] = '\0';
+  return (gchar *) part->str;
+}
+
+/* We require the caller to pass in a buffer where we can put the hostname,
+ * because if there's both a hostname and a path we can't munge the separating
+ * '/' into a terminator for the hostname because we need the '/' as the first
+ * character of the path.
+ */
+static gboolean
+gnet_uri_parse_inplace (GURI * guri, gchar * uri, gchar * hostname, gsize len)
+{
+  StringPart scheme, usr, host, path, query, frag;
+  guint port;
+
+  if (!gnet_uri_parse (uri, &scheme, &usr, &host, &port, &path, &query, &frag))
+    return FALSE;
+
+  if (host.len >= len)
+    return FALSE;
+
+  if (host.len > 0) {
+    if (hostname == NULL)
+      return FALSE;
+
+    strncpy (hostname, host.str, host.len);
+    hostname[host.len] = '\0';
+  }
+
+  guri->scheme = parse_inplace_munge_string_part (&scheme);
+  guri->userinfo = parse_inplace_munge_string_part (&usr);
+  guri->hostname = (host.len > 0) ? hostname : NULL;
+  guri->path = parse_inplace_munge_string_part (&path);
+  guri->query = parse_inplace_munge_string_part (&query);
+  guri->fragment = parse_inplace_munge_string_part (&frag);
+  guri->port = port;
+
+  return TRUE;
+}
+#endif
+
 /**
  *  gnet_uri_new
  *  @uri: URI string
@@ -250,6 +297,17 @@ path:
 GURI *
 gnet_uri_new (const gchar * uri)
 {
+#if 0
+  GURI guri = { NULL, }, *ret = NULL;
+  gchar *uricpy, hostname[1024];
+
+  uricpy = g_strdup (uri);
+  if (gnet_uri_parse_inplace (&guri, uricpy, hostname, sizeof (hostname))) {
+    ret = gnet_uri_clone (&guri);
+  }
+  g_free (uricpy);
+  return ret;
+#else
   StringPart scheme, usr, host, path, query, frag;
   guint port;
   GURI *guri;
@@ -269,6 +327,7 @@ gnet_uri_new (const gchar * uri)
   guri->port = port;
 
   return guri;
+#endif
 }
 
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]