[glib: 1/2] guri: Improve performance of remove_dot_segments() algorithm
- From: Philip Withnall <pwithnall src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [glib: 1/2] guri: Improve performance of remove_dot_segments() algorithm
- Date: Wed, 17 Nov 2021 15:20:29 +0000 (UTC)
commit 21b45d6ac299a31f237983facbadf5fe79afc657
Author: Sebastian Wilhelmi <wilhelmi google com>
Date: Wed Nov 17 15:20:28 2021 +0000
guri: Improve performance of remove_dot_segments() algorithm
glib/guri.c | 117 +++++++++++++++++++++++++++++++++----------------------
glib/tests/uri.c | 13 +++++++
2 files changed, 84 insertions(+), 46 deletions(-)
---
diff --git a/glib/guri.c b/glib/guri.c
index 2c94efbc6..5c2b35b8a 100644
--- a/glib/guri.c
+++ b/glib/guri.c
@@ -1296,68 +1296,93 @@ g_uri_is_valid (const gchar *uri_string,
}
-/* This does the "Remove Dot Segments" algorithm from section 5.2.4 of
- * RFC 3986, except that @path is modified in place.
+/* Implements the "Remove Dot Segments" algorithm from section 5.2.4 of
+ * RFC 3986.
*
* See https://tools.ietf.org/html/rfc3986#section-5.2.4
*/
static void
remove_dot_segments (gchar *path)
{
- gchar *p, *q;
+ /* The output can be written to the same buffer that the input
+ * is read from, as the output pointer is only ever increased
+ * when the input pointer is increased as well, and the input
+ * pointer is never decreased. */
+ gchar *input = path;
+ gchar *output = path;
if (!*path)
return;
- /* Remove "./" where "." is a complete segment. */
- for (p = path + 1; *p; )
+ while (*input)
{
- if (*(p - 1) == '/' &&
- *p == '.' && *(p + 1) == '/')
- memmove (p, p + 2, strlen (p + 2) + 1);
- else
- p++;
- }
- /* Remove "." at end. */
- if (p > path + 2 &&
- *(p - 1) == '.' && *(p - 2) == '/')
- *(p - 1) = '\0';
-
- /* Remove "<segment>/../" where <segment> != ".." */
- for (p = path + 1; *p; )
- {
- if (!strncmp (p, "../", 3))
+ /* A. If the input buffer begins with a prefix of "../" or "./",
+ * then remove that prefix from the input buffer; otherwise,
+ */
+ if (strncmp (input, "../", 3) == 0)
+ input += 3;
+ else if (strncmp (input, "./", 2) == 0)
+ input += 2;
+
+ /* B. if the input buffer begins with a prefix of "/./" or "/.",
+ * where "." is a complete path segment, then replace that
+ * prefix with "/" in the input buffer; otherwise,
+ */
+ else if (strncmp (input, "/./", 3) == 0)
+ input += 2;
+ else if (strcmp (input, "/.") == 0)
+ input[1] = '\0';
+
+ /* C. if the input buffer begins with a prefix of "/../" or "/..",
+ * where ".." is a complete path segment, then replace that
+ * prefix with "/" in the input buffer and remove the last
+ * segment and its preceding "/" (if any) from the output
+ * buffer; otherwise,
+ */
+ else if (strncmp (input, "/../", 4) == 0)
{
- p += 3;
- continue;
+ input += 3;
+ if (output > path)
+ {
+ do
+ {
+ output--;
+ }
+ while (*output != '/' && output > path);
+ }
}
- q = strchr (p + 1, '/');
- if (!q)
- break;
- if (strncmp (q, "/../", 4) != 0)
+ else if (strcmp (input, "/..") == 0)
{
- p = q + 1;
- continue;
+ input[1] = '\0';
+ if (output > path)
+ {
+ do
+ {
+ output--;
+ }
+ while (*output != '/' && output > path);
+ }
}
- memmove (p, q + 4, strlen (q + 4) + 1);
- p = path + 1;
- }
- /* Remove "<segment>/.." at end where <segment> != ".." */
- q = strrchr (path, '/');
- if (q && q != path && !strcmp (q, "/.."))
- {
- p = q - 1;
- while (p > path && *p != '/')
- p--;
- if (strncmp (p, "/../", 4) != 0)
- *(p + 1) = 0;
- }
- /* Remove extraneous initial "/.."s */
- while (!strncmp (path, "/../", 4))
- memmove (path, path + 3, strlen (path) - 2);
- if (!strcmp (path, "/.."))
- path[1] = '\0';
+ /* D. if the input buffer consists only of "." or "..", then remove
+ * that from the input buffer; otherwise,
+ */
+ else if (strcmp (input, "..") == 0 || strcmp (input, ".") == 0)
+ input[0] = '\0';
+
+ /* E. move the first path segment in the input buffer to the end of
+ * the output buffer, including the initial "/" character (if
+ * any) and any subsequent characters up to, but not including,
+ * the next "/" character or the end of the input buffer.
+ */
+ else
+ {
+ *output++ = *input++;
+ while (*input && *input != '/')
+ *output++ = *input++;
+ }
+ }
+ *output = '\0';
}
/**
diff --git a/glib/tests/uri.c b/glib/tests/uri.c
index 7a251af64..e1ad7cfe1 100644
--- a/glib/tests/uri.c
+++ b/glib/tests/uri.c
@@ -898,6 +898,19 @@ static const UriRelativeTest relative_tests[] = {
{ "ScHeMe://User:P%61ss@HOST.%63om:1234/path/./from/../to%7d/item%2dobj?qu%65ry=something#fr%61gment",
"scheme://User:Pass HOST com:1234/path/to%7D/item-obj?query=something#fragment",
{ "scheme", "User:Pass", "HOST.com", 1234, "/path/to}/item-obj", "query=something", "fragment" } },
+ /* Test corner cases of remove_dot_segments */
+ { "http:..", "http:",
+ { "http", NULL, NULL, -1, "", NULL, NULL } },
+ { "http:../", "http:",
+ { "http", NULL, NULL, -1, "", NULL, NULL } },
+ { "http:.", "http:",
+ { "http", NULL, NULL, -1, "", NULL, NULL } },
+ { "http:./", "http:",
+ { "http", NULL, NULL, -1, "", NULL, NULL } },
+ { "http:a/..", "http:/",
+ { "http", NULL, NULL, -1, "/", NULL, NULL } },
+ { "http:a/../", "http:/",
+ { "http", NULL, NULL, -1, "/", NULL, NULL } },
};
static int num_relative_tests = G_N_ELEMENTS (relative_tests);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]