[glib] fileutils: Add g_canonicalize_filename



commit b9b642de06e714584e89aa7b8d878a98599538ed
Author: Georges Basile Stavracas Neto <georges stavracas gmail com>
Date:   Fri Apr 27 12:44:30 2018 -0300

    fileutils: Add g_canonicalize_filename
    
    Getting the canonical filename is a relatively common
    operation when dealing with symbolic links.
    
    This commit exposes GLocalFile's implementation of a
    filename canonicalizer function, with a few additions
    to make it more useful for consumers of it.
    
    Instead of always assuming g_get_current_dir(), the
    exposed function allows passing it as an additional
    parameter.
    
    This will be used to fix the GTimeZone code to retrieve
    the local timezone from a zoneinfo symlink.
    
    (Tweaked by Philip Withnall <withnall endlessm com> to drop g_autofree
    usage and add some additional tests.)
    
    https://bugzilla.gnome.org/show_bug.cgi?id=111848

 docs/reference/glib/glib-sections.txt |   1 +
 glib/gfileutils.c                     | 134 ++++++++++++++++++++++++++++++++++
 glib/gfileutils.h                     |   4 +
 tests/testglib.c                      |  74 +++++++++++++++++++
 4 files changed, 213 insertions(+)
---
diff --git a/docs/reference/glib/glib-sections.txt b/docs/reference/glib/glib-sections.txt
index f6322f052..47baf5eb8 100644
--- a/docs/reference/glib/glib-sections.txt
+++ b/docs/reference/glib/glib-sections.txt
@@ -1852,6 +1852,7 @@ g_get_tmp_dir
 g_get_current_dir
 g_basename
 g_dirname
+g_canonicalize_filename
 g_path_is_absolute
 g_path_skip_root
 g_path_get_basename
diff --git a/glib/gfileutils.c b/glib/gfileutils.c
index a084dfcc2..18e0a9fec 100644
--- a/glib/gfileutils.c
+++ b/glib/gfileutils.c
@@ -2477,6 +2477,140 @@ g_path_get_dirname (const gchar *file_name)
   return base;
 }
 
+/**
+ * g_canonicalize_filename:
+ * @filename: (type filename): the name of the file
+ * @relative_to: (type filename) (nullable): the relative directory, or %NULL
+ * to use the current working directory
+ *
+ * Gets the canonical file name from @filename. All triple slashes are turned into
+ * single slashes, and all `..` and `.`s resolved against @relative_to.
+ *
+ * Symlinks are not followed, and the returned path is guaranteed to be absolute.
+ *
+ * If @filename is an absolute path, @relative_to is ignored. Otherwise,
+ * @relative_to will be prepended to @filename to make it absolute. @relative_to
+ * must be an absolute path, or %NULL. If @relative_to is %NULL, it'll fallback
+ * to g_get_current_dir().
+ *
+ * This function never fails, and will canonicalize file paths even if they don't
+ * exist.
+ *
+ * No file system I/O is done.
+ *
+ * Returns: (type filename) (transfer full): a newly allocated string with the
+ * canonical file path
+ */
+gchar *
+g_canonicalize_filename (const gchar *filename,
+                         const gchar *relative_to)
+{
+  gchar *canon, *start, *p, *q;
+  guint i;
+
+  g_return_val_if_fail (relative_to == NULL || g_path_is_absolute (relative_to), NULL);
+
+  if (!g_path_is_absolute (filename))
+    {
+      gchar *cwd_allocated = NULL;
+      const gchar  *cwd;
+
+      if (relative_to != NULL)
+        cwd = relative_to;
+      else
+        cwd = cwd_allocated = g_get_current_dir ();
+
+      canon = g_build_filename (cwd, filename, NULL);
+      g_free (cwd_allocated);
+    }
+  else
+    {
+      canon = g_strdup (filename);
+    }
+
+  start = (char *)g_path_skip_root (canon);
+
+  if (start == NULL)
+    {
+      /* This shouldn't really happen, as g_get_current_dir() should
+         return an absolute pathname, but bug 573843 shows this is
+         not always happening */
+      g_free (canon);
+      return g_build_filename (G_DIR_SEPARATOR_S, filename, NULL);
+    }
+
+  /* POSIX allows double slashes at the start to
+   * mean something special (as does windows too).
+   * So, "//" != "/", but more than two slashes
+   * is treated as "/".
+   */
+  i = 0;
+  for (p = start - 1;
+       (p >= canon) &&
+         G_IS_DIR_SEPARATOR (*p);
+       p--)
+    i++;
+  if (i > 2)
+    {
+      i -= 1;
+      start -= i;
+      memmove (start, start+i, strlen (start+i) + 1);
+    }
+
+  /* Make sure we're using the canonical dir separator */
+  p++;
+  while (p < start && G_IS_DIR_SEPARATOR (*p))
+    *p++ = G_DIR_SEPARATOR;
+
+  p = start;
+  while (*p != 0)
+    {
+      if (p[0] == '.' && (p[1] == 0 || G_IS_DIR_SEPARATOR (p[1])))
+        {
+          memmove (p, p+1, strlen (p+1)+1);
+        }
+      else if (p[0] == '.' && p[1] == '.' && (p[2] == 0 || G_IS_DIR_SEPARATOR (p[2])))
+        {
+          q = p + 2;
+          /* Skip previous separator */
+          p = p - 2;
+          if (p < start)
+            p = start;
+          while (p > start && !G_IS_DIR_SEPARATOR (*p))
+            p--;
+          if (G_IS_DIR_SEPARATOR (*p))
+            *p++ = G_DIR_SEPARATOR;
+          memmove (p, q, strlen (q)+1);
+        }
+      else
+        {
+          /* Skip until next separator */
+          while (*p != 0 && !G_IS_DIR_SEPARATOR (*p))
+            p++;
+
+          if (*p != 0)
+            {
+              /* Canonicalize one separator */
+              *p++ = G_DIR_SEPARATOR;
+            }
+        }
+
+      /* Remove additional separators */
+      q = p;
+      while (*q && G_IS_DIR_SEPARATOR (*q))
+        q++;
+
+      if (p != q)
+        memmove (p, q, strlen (q) + 1);
+    }
+
+  /* Remove trailing slashes */
+  if (p > start && G_IS_DIR_SEPARATOR (*(p-1)))
+    *(p-1) = 0;
+
+  return canon;
+}
+
 #if defined(MAXPATHLEN)
 #define G_PATH_LENGTH MAXPATHLEN
 #elif defined(PATH_MAX)
diff --git a/glib/gfileutils.h b/glib/gfileutils.h
index b24651e3e..bcaaa40ff 100644
--- a/glib/gfileutils.h
+++ b/glib/gfileutils.h
@@ -172,6 +172,10 @@ gchar *g_path_get_basename (const gchar *file_name) G_GNUC_MALLOC;
 GLIB_AVAILABLE_IN_ALL
 gchar *g_path_get_dirname  (const gchar *file_name) G_GNUC_MALLOC;
 
+GLIB_AVAILABLE_IN_2_58
+gchar *g_canonicalize_filename (const gchar *filename,
+                                const gchar *relative_to) G_GNUC_MALLOC;
+
 G_END_DECLS
 
 #endif /* __G_FILEUTILS_H__ */
diff --git a/tests/testglib.c b/tests/testglib.c
index 041e336e7..ca9153a13 100644
--- a/tests/testglib.c
+++ b/tests/testglib.c
@@ -838,6 +838,43 @@ test_paths (void)
     { "", NULL },
   };
   const guint n_skip_root_checks = G_N_ELEMENTS (skip_root_checks);
+  struct {
+    gchar *cwd;
+    gchar *relative_path;
+    gchar *canonical_path;
+  } canonicalize_filename_checks[] = {
+    { "/etc", "../usr/share", "/usr/share" },
+    { "/", "/foo/bar", "/foo/bar" },
+    { "/usr/bin", "../../foo/bar", "/foo/bar" },
+    { "/", "../../foo/bar", "/foo/bar" },
+    { "/double//dash", "../../foo/bar", "/foo/bar" },
+    { "/usr/share/foo", ".././././bar", "/usr/share/bar" },
+    { "/foo/bar", "../bar/./.././bar", "/foo/bar" },
+    { "/test///dir", "../../././foo/bar", "/foo/bar" },
+    { "/test///dir", "../../././/foo///bar", "/foo/bar" },
+    { "/etc", "///triple/slash", "/triple/slash" },
+    { "/etc", "//double/slash", "//double/slash" },
+    { "///triple/slash", ".", "/triple/slash" },
+    { "//double/slash", ".", "//double/slash" },
+    { "/cwd/../with/./complexities/", "./hello", "/with/complexities/hello" },
+#ifdef G_OS_WIN32
+    { "\\etc", "..\\usr\\share", "\\usr\\share" },
+    { "\\", "\\foo\\bar", "\\foo\\bar" },
+    { "\\usr\\bin", "..\\..\\foo\\bar", "\\foo\\bar" },
+    { "\\", "..\\..\\foo\\bar", "\\foo\\bar" },
+    { "\\double\\\\dash", "..\\..\\foo\\bar", "\\foo\\bar" },
+    { "\\usr\\share\\foo", "..\\.\\.\\.\\bar", "\\usr\\share\\bar" },
+    { "\\foo\\bar", "..\\bar\\.\\..\\.\\bar", "\\foo\\bar" },
+    { "\\test\\\\\\dir", "..\\..\\.\\.\\foo\\bar", "\\foo\\bar" },
+    { "\\test\\\\\\dir", "..\\..\\.\\.\\\\foo\\\\\\bar", "\\foo\\bar" },
+    { "\\etc", "\\\\\\triple\\slash", "\\triple\\slash" },
+    { "\\etc", "\\\\double\\slash", "\\\\double\\slash" },
+    { "\\\\\\triple\\slash", ".", "\\triple\\slash" },
+    { "\\\\double\\slash", ".", "\\\\double\\slash" },
+    { "\\cwd\\..\\with\\.\\complexities\\", ".\\hello", "\\cwd\\with\\complexities\\hello" },
+#endif
+  };
+  const guint n_canonicalize_filename_checks = G_N_ELEMENTS (canonicalize_filename_checks);
   gchar *string;
   guint i;
   if (g_test_verbose())
@@ -896,6 +933,43 @@ test_paths (void)
     }
   if (g_test_verbose())
     g_printerr ("ok\n");
+
+  if (g_test_verbose ())
+    g_printerr ("checking g_canonicalize_filename()...");
+  for (i = 0; i < n_canonicalize_filename_checks; i++)
+    {
+      gchar *canonical_path = g_canonicalize_filename (canonicalize_filename_checks[i].relative_path,
+                                                       canonicalize_filename_checks[i].cwd);
+      if (g_strcmp0 (canonical_path, canonicalize_filename_checks[i].canonical_path) != 0)
+        {
+          g_error ("\nfailed for \"%s\"==\"%s\" (returned: \"%s\")\n",
+                   canonicalize_filename_checks[i].relative_path,
+                   canonicalize_filename_checks[i].canonical_path,
+                   canonical_path);
+        }
+      g_free (canonical_path);
+    }
+  if (g_test_verbose ())
+    g_printerr ("ok\n");
+
+  if (g_test_verbose ())
+    g_printerr ("checking g_canonicalize_filename() supports NULL...");
+
+    {
+      const gchar *relative_path = "./";
+      gchar *canonical_path = g_canonicalize_filename (relative_path, NULL);
+      gchar *cwd = g_get_current_dir ();
+      if (g_strcmp0 (canonical_path, cwd) != 0)
+        {
+          g_error ("\nfailed for \"%s\"==\"%s\" (returned: \"%s\")\n",
+                   relative_path, cwd, canonical_path);
+        }
+      g_free (cwd);
+      g_free (canonical_path);
+    }
+
+  if (g_test_verbose ())
+    g_printerr ("ok\n");
 }
 
 static void


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]