[glib/glib-2-70: 1/2] gfileutils: Improve performance of g_canonicalize_filename()
- From: Philip Withnall <pwithnall src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [glib/glib-2-70: 1/2] gfileutils: Improve performance of g_canonicalize_filename()
- Date: Tue, 30 Nov 2021 10:08:12 +0000 (UTC)
commit 28a15f95c4c6bf62ceab652f24e6c93f1f10e4ff
Author: Sebastian Wilhelmi <wilhelmi google com>
Date: Fri Nov 26 13:43:56 2021 +0000
gfileutils: Improve performance of g_canonicalize_filename()
Improve the performance of canonicalising filenames with many `..` or
`.` components, by modifying the path inline rather than calling
`memmove()`.
Signed-off-by: Philip Withnall <pwithnall endlessos org>
Fixes: #2541
glib/gfileutils.c | 115 +++++++++++++++++++++++++++---------------------------
tests/testglib.c | 36 +++++++++++++++++
2 files changed, 94 insertions(+), 57 deletions(-)
---
diff --git a/glib/gfileutils.c b/glib/gfileutils.c
index 2a0db4f97..5a161cb75 100644
--- a/glib/gfileutils.c
+++ b/glib/gfileutils.c
@@ -2736,8 +2736,7 @@ gchar *
g_canonicalize_filename (const gchar *filename,
const gchar *relative_to)
{
- gchar *canon, *start, *p, *q;
- guint i;
+ gchar *canon, *input, *output, *start;
g_return_val_if_fail (relative_to == NULL || g_path_is_absolute (relative_to), NULL);
@@ -2770,74 +2769,76 @@ g_canonicalize_filename (const gchar *filename,
return g_build_filename (G_DIR_SEPARATOR_S, filename, NULL);
}
- /* POSIX allows double slashes at the start to
- * mean something special (as does windows too).
- * So, "//" != "/", but more than two slashes
+ /* Find the first dir separator and use the canonical dir separator. */
+ for (output = start - 1;
+ (output >= canon) && G_IS_DIR_SEPARATOR (*output);
+ output--)
+ *output = G_DIR_SEPARATOR;
+
+ output += 2;
+
+ /* POSIX allows double slashes at the start to mean something special
+ * (as does windows too). So, "//" != "/", but more than two slashes
* is treated as "/".
*/
- i = 0;
- for (p = start - 1;
- (p >= canon) &&
- G_IS_DIR_SEPARATOR (*p);
- p--)
- i++;
- if (i > 2)
- {
- i -= 1;
- start -= i;
- memmove (start, start+i, strlen (start+i) + 1);
- }
+ if (start - output == 1)
+ output++;
- /* Make sure we're using the canonical dir separator */
- p++;
- while (p < start && G_IS_DIR_SEPARATOR (*p))
- *p++ = G_DIR_SEPARATOR;
-
- p = start;
- while (*p != 0)
+ input = start;
+ while (*input)
{
- if (p[0] == '.' && (p[1] == 0 || G_IS_DIR_SEPARATOR (p[1])))
+ /* input points to the next non-separator to be processed. */
+ /* output points to the next location to write to. */
+ g_assert (input > canon && G_IS_DIR_SEPARATOR (input[-1]));
+ g_assert (output > canon && G_IS_DIR_SEPARATOR (output[-1]));
+ g_assert (input >= output);
+
+ /* Ignore repeated dir separators. */
+ while (G_IS_DIR_SEPARATOR (input[0]))
+ input++;
+
+ /* Ignore single dot directory components. */
+ if (input[0] == '.' && (input[1] == 0 || G_IS_DIR_SEPARATOR (input[1])))
{
- memmove (p, p+1, strlen (p+1)+1);
+ if (input[1] == 0)
+ break;
+ input += 2;
}
- else if (p[0] == '.' && p[1] == '.' && (p[2] == 0 || G_IS_DIR_SEPARATOR (p[2])))
+ /* Remove double-dot directory components along with the preceding
+ * path component. */
+ else if (input[0] == '.' && input[1] == '.' &&
+ (input[2] == 0 || G_IS_DIR_SEPARATOR (input[2])))
{
- q = p + 2;
- /* Skip previous separator */
- p = p - 2;
- if (p < start)
- p = start;
- while (p > start && !G_IS_DIR_SEPARATOR (*p))
- p--;
- if (G_IS_DIR_SEPARATOR (*p))
- *p++ = G_DIR_SEPARATOR;
- memmove (p, q, strlen (q)+1);
+ if (output > start)
+ {
+ do
+ {
+ output--;
+ }
+ while (!G_IS_DIR_SEPARATOR (output[-1]) && output > start);
+ }
+ if (input[2] == 0)
+ break;
+ input += 3;
}
+ /* Copy the input to the output until the next separator,
+ * while converting it to canonical separator */
else
{
- /* Skip until next separator */
- while (*p != 0 && !G_IS_DIR_SEPARATOR (*p))
- p++;
-
- if (*p != 0)
- {
- /* Canonicalize one separator */
- *p++ = G_DIR_SEPARATOR;
- }
+ while (*input && !G_IS_DIR_SEPARATOR (*input))
+ *output++ = *input++;
+ if (input[0] == 0)
+ break;
+ input++;
+ *output++ = G_DIR_SEPARATOR;
}
-
- /* Remove additional separators */
- q = p;
- while (*q && G_IS_DIR_SEPARATOR (*q))
- q++;
-
- if (p != q)
- memmove (p, q, strlen (q) + 1);
}
- /* Remove trailing slashes */
- if (p > start && G_IS_DIR_SEPARATOR (*(p-1)))
- *(p-1) = 0;
+ /* Remove a potentially trailing dir separator */
+ if (output > start && G_IS_DIR_SEPARATOR (output[-1]))
+ output--;
+
+ *output = '\0';
return canon;
}
diff --git a/tests/testglib.c b/tests/testglib.c
index 071afdc1d..a3546fae6 100644
--- a/tests/testglib.c
+++ b/tests/testglib.c
@@ -1051,6 +1051,18 @@ test_paths (void)
{ "///triple/slash", ".", "/triple/slash" },
{ "//double/slash", ".", "//double/slash" },
{ "/cwd/../with/./complexities/", "./hello", "/with/complexities/hello" },
+ { "/", ".dot-dir", "/.dot-dir" },
+ { "/cwd", "..", "/" },
+ { "/etc", "hello/..", "/etc" },
+ { "/etc", "hello/../", "/etc" },
+ { "/", "..", "/" },
+ { "/", "../", "/" },
+ { "/", "/..", "/" },
+ { "/", "/../", "/" },
+ { "/", ".", "/" },
+ { "/", "./", "/" },
+ { "/", "/.", "/" },
+ { "/", "/./", "/" },
#else
{ "/etc", "../usr/share", "\\usr\\share" },
{ "/", "/foo/bar", "\\foo\\bar" },
@@ -1066,6 +1078,18 @@ test_paths (void)
{ "///triple/slash", ".", "\\triple\\slash" },
{ "//double/slash", ".", "//double/slash\\" },
{ "/cwd/../with/./complexities/", "./hello", "\\with\\complexities\\hello" },
+ { "/", ".dot-dir", "/.dot-dir" },
+ { "/cwd", "..", "/" },
+ { "/etc", "hello/..", "/etc" },
+ { "/etc", "hello/../", "/etc" },
+ { "/", "..", "/" },
+ { "/", "../", "/" },
+ { "/", "/..", "/" },
+ { "/", "/../", "/" },
+ { "/", ".", "/" },
+ { "/", "./", "/" },
+ { "/", "/.", "/" },
+ { "/", "/./", "/" },
{ "\\etc", "..\\usr\\share", "\\usr\\share" },
{ "\\", "\\foo\\bar", "\\foo\\bar" },
@@ -1081,6 +1105,18 @@ test_paths (void)
{ "\\\\\\triple\\slash", ".", "\\triple\\slash" },
{ "\\\\double\\slash", ".", "\\\\double\\slash\\" },
{ "\\cwd\\..\\with\\.\\complexities\\", ".\\hello", "\\with\\complexities\\hello" },
+ { "\\", ".dot-dir", "\\.dot-dir" },
+ { "\\cwd", "..", "\\" },
+ { "\\etc", "hello\\..", "\\etc" },
+ { "\\etc", "hello\\..\\", "\\etc" },
+ { "\\", "..", "\\" },
+ { "\\", "..\\", "\\" },
+ { "\\", "\\..", "\\" },
+ { "\\", "\\..\\", "\\" },
+ { "\\", ".", "\\" },
+ { "\\", ".\\", "\\" },
+ { "\\", "\\.", "\\" },
+ { "\\", "\\.\\", "\\" },
#endif
};
const guint n_canonicalize_filename_checks = G_N_ELEMENTS (canonicalize_filename_checks);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]