[gnome-autoar/wip/razvan/extract-refactor: 1/3] AutoarExtract: refactor pathname building process



commit 179aea0352d10d1b15f65bbb7f0d608ef911ed99
Author: Razvan Chitu <razvan ch95 gmail com>
Date:   Sat May 28 00:01:04 2016 +0300

    AutoarExtract: refactor pathname building process
    
    Previously, operations with pathnames obtained from libarchive were performed
    directly on strings, without the use of the GFile API. This lead to an
    increasing complexity of the code and also to oversights. Also, the process of
    turning a path in the archive to a path on the file system had a complex logic.
    It relied on multiple parameters that could have different meanings according to
    some context elements, such as the number of files in an archive. One example is
    the `top_level_dir` member, which could be a file if the archive had just one
    element inside, or a directory in other situations.
    
    In order to fix these issues, use the GFile API for path operations, like
    finding a common prefix, path concatenation etc. For the logic part, make sure
    that the top level directory is always a directory so the number of checks can
    be reduced.

 gnome-autoar/autoar-extract.c |  233 ++++++++++++++++++-----------------------
 1 files changed, 102 insertions(+), 131 deletions(-)
---
diff --git a/gnome-autoar/autoar-extract.c b/gnome-autoar/autoar-extract.c
index c5886e6..9d5b5c3 100644
--- a/gnome-autoar/autoar-extract.c
+++ b/gnome-autoar/autoar-extract.c
@@ -125,6 +125,8 @@ struct _AutoarExtractPrivate
   guint64 size;
   guint64 completed_size;
 
+  GList *files_list;
+
   guint files;
   guint completed_files;
 
@@ -143,14 +145,12 @@ struct _AutoarExtractPrivate
   GArray     *extracted_dir_list;
   GFile      *top_level_dir;
 
-  int   pathname_prefix_len;
-  char *pathname_basename;
+  GFile *prefix;
+
   char *suggested_destname;
 
   int in_thread         : 1;
   int use_raw_format    : 1;
-  int has_top_level_dir : 1;
-  int has_only_one_file : 1;
 };
 
 struct _GFileAndInfo
@@ -530,6 +530,12 @@ autoar_extract_dispose (GObject *object)
   g_clear_object (&(priv->arpref));
   g_clear_object (&(priv->top_level_dir));
   g_clear_object (&(priv->cancellable));
+  g_clear_object (&(priv->prefix));
+
+  if (priv->files_list != NULL) {
+    g_list_free_full (priv->files_list, g_object_unref);
+    priv->files_list = NULL;
+  }
 
   if (priv->userhash != NULL) {
     g_hash_table_unref (priv->userhash);
@@ -584,9 +590,6 @@ autoar_extract_finalize (GObject *object)
     priv->error = NULL;
   }
 
-  g_free (priv->pathname_basename);
-  priv->pathname_basename = NULL;
-
   g_free (priv->suggested_destname);
   priv->suggested_destname = NULL;
 
@@ -857,42 +860,50 @@ autoar_extract_signal_error (AutoarExtract *arextract)
 }
 
 static GFile*
+autoar_extract_get_common_prefix (GList *files,
+                                  GFile *root)
+{
+  GFile *prefix;
+  GFile *file;
+  GList *l;
+
+  prefix = g_object_ref (files->data);
+
+  while (!g_file_has_parent (prefix, root)) {
+    file = g_file_get_parent (prefix);
+    g_object_unref (prefix);
+    prefix = file;
+  }
+
+  for (l = files->next; l; l = l->next) {
+    file = l->data;
+
+    if (!g_file_has_prefix (file, prefix) && !g_file_equal (file, prefix)) {
+      g_object_unref (prefix);
+      return NULL;
+    }
+  }
+
+  return prefix;
+}
+
+static GFile*
 autoar_extract_do_sanitize_pathname (const char *pathname,
-                                     const char *skip_chars,
                                      GFile *top_level_dir) {
-  const char *pathname_skip_prefix;
-  char **pathname_chunks;
   GFile *extracted_filename;
-  int i;
-
-  if (skip_chars != NULL)
-    pathname_skip_prefix = pathname + strspn (pathname, "./");
-  else
-    pathname_skip_prefix = pathname;
 
-  for (; *pathname_skip_prefix == '/'; pathname_skip_prefix++);
-  extracted_filename = g_file_get_child (top_level_dir, pathname_skip_prefix);
+  extracted_filename = g_file_get_child (top_level_dir, pathname);
 
-  /* Extracted file should not be located outside the top level directory. */
   if (!g_file_has_prefix (extracted_filename, top_level_dir)) {
-    pathname_chunks = g_strsplit (pathname_skip_prefix, "/", G_MAXINT);
-    for (i = 0; pathname_chunks[i] != NULL; i++) {
-      if (strcmp (pathname_chunks[i], "..") == 0) {
-        char *pathname_sanitized;
+    char *basename;
 
-        *pathname_chunks[i] = '\0';
-        pathname_sanitized = g_strjoinv ("/", pathname_chunks);
+    basename = g_file_get_basename (extracted_filename);
 
-        g_object_unref (extracted_filename);
-        extracted_filename = g_file_get_child (top_level_dir, pathname_sanitized);
+    g_object_unref (extracted_filename);
 
-        g_free (pathname_sanitized);
+    extracted_filename = g_file_get_child (top_level_dir, basename);
 
-        if (g_file_has_prefix (extracted_filename, top_level_dir))
-          break;
-      }
-    }
-    g_strfreev (pathname_chunks);
+    g_free (basename);
   }
 
   return extracted_filename;
@@ -1479,6 +1490,8 @@ autoar_extract_init (AutoarExtract *arextract)
   priv->size = 0;
   priv->completed_size = 0;
 
+  priv->files_list = NULL;
+
   priv->files = 0;
   priv->completed_files = 0;
 
@@ -1497,14 +1510,10 @@ autoar_extract_init (AutoarExtract *arextract)
   g_array_set_clear_func (priv->extracted_dir_list, g_file_and_info_free);
   priv->top_level_dir = NULL;
 
-  priv->pathname_prefix_len = 0;
-  priv->pathname_basename = NULL;
   priv->suggested_destname = NULL;
 
   priv->in_thread = FALSE;
   priv->use_raw_format = FALSE;
-  priv->has_top_level_dir = TRUE;
-  priv->has_only_one_file = TRUE;
 }
 
 static AutoarExtract*
@@ -1721,8 +1730,6 @@ autoar_extract_step_scan_toplevel (AutoarExtract *arextract)
   struct archive *a;
   struct archive_entry *entry;
 
-  char *pathname_prefix;
-
   AutoarExtractPrivate *priv;
   int r;
 
@@ -1750,13 +1757,10 @@ autoar_extract_step_scan_toplevel (AutoarExtract *arextract)
     priv->use_raw_format = TRUE;
   }
 
-  pathname_prefix = NULL;
-
   while ((r = archive_read_next_header (a, &entry)) == ARCHIVE_OK) {
     const char *pathname;
 
     if (g_cancellable_is_cancelled (priv->cancellable)) {
-      g_free (pathname_prefix);
       archive_read_free (a);
       return;
     }
@@ -1764,6 +1768,9 @@ autoar_extract_step_scan_toplevel (AutoarExtract *arextract)
     pathname = archive_entry_pathname (entry);
     g_debug ("autoar_extract_step_scan_toplevel: %d: pathname = %s", priv->files, pathname);
 
+    priv->files_list = g_list_prepend (priv->files_list,
+                                       g_file_get_child (priv->output_file, pathname));
+
     if (!priv->use_raw_format && !autoar_extract_do_pattern_check (pathname, priv->pattern_compiled)) {
       g_hash_table_insert (priv->bad_filename, g_strdup (pathname), GUINT_TO_POINTER (TRUE));
       continue;
@@ -1771,34 +1778,12 @@ autoar_extract_step_scan_toplevel (AutoarExtract *arextract)
 
     g_debug ("autoar_extract_step_scan_toplevel: %d: pattern check passed", priv->files);
 
-    if (pathname_prefix == NULL) {
-      char *dir_sep_location;
-      size_t skip_len, prefix_len;
-
-      skip_len = strspn (pathname, "./");
-      dir_sep_location = strchr (pathname + skip_len, '/');
-      if (dir_sep_location == NULL) {
-        prefix_len = strlen (pathname);
-      } else {
-        prefix_len = dir_sep_location - pathname;
-      }
-      pathname_prefix = g_strndup (pathname, prefix_len);
-      g_debug ("autoar_extract_step_scan_toplevel: pathname_prefix = %s", pathname_prefix);
-
-      priv->pathname_prefix_len = prefix_len;
-      priv->pathname_basename = g_path_get_basename (pathname);
-    } else {
-      priv->has_only_one_file = FALSE;
-      if (!g_str_has_prefix (pathname, pathname_prefix)) {
-        priv->has_top_level_dir = FALSE;
-      }
-    }
     priv->files++;
     priv->size += archive_entry_size (entry);
     archive_read_data_skip (a);
   }
 
-  if (pathname_prefix == NULL) {
+  if (priv->files_list == NULL) {
     if (priv->error == NULL) {
       priv->error = g_error_new (AUTOAR_EXTRACT_ERROR, EMPTY_ARCHIVE_ERRNO,
                                  "\'%s\': %s", priv->source, "empty archive");
@@ -1811,7 +1796,6 @@ autoar_extract_step_scan_toplevel (AutoarExtract *arextract)
     if (priv->error == NULL) {
       priv->error = autoar_common_g_error_new_a (a, priv->source);
     }
-    g_free (pathname_prefix);
     archive_read_free (a);
     return;
   }
@@ -1821,13 +1805,23 @@ autoar_extract_step_scan_toplevel (AutoarExtract *arextract)
   if (priv->size <= 0)
     priv->size = G_MAXUINT64;
 
-  g_free (pathname_prefix);
   archive_read_free (a);
 
-  g_debug ("autoar_extract_step_scan_toplevel: has_top_level_dir = %s",
-           priv->has_top_level_dir ? "TRUE" : "FALSE");
-  g_debug ("autoar_extract_step_scan_toplevel: has_only_one_file = %s",
-           priv->has_only_one_file ? "TRUE" : "FALSE");
+  g_debug ("autoar_extract_step_scan_toplevel: files = %d", priv->files);
+
+  priv->files_list = g_list_reverse (priv->files_list);
+
+  priv->prefix = autoar_extract_get_common_prefix (priv->files_list, priv->output_file);
+
+  if (priv->prefix != NULL) {
+    char *path_prefix;
+
+    path_prefix = g_file_get_path (priv->prefix);
+    g_debug ("autoar_extract_step_scan_toplevel: pathname_prefix = %s", path_prefix);
+
+    g_free (path_prefix);
+  }
+
   autoar_extract_signal_scanned (arextract);
 }
 
@@ -1836,60 +1830,49 @@ autoar_extract_step_decide_dest (AutoarExtract *arextract) {
   /* Step 2: Create necessary directories
    * If the archive contains only one file, we don't create the directory */
 
-  const char *pathname_extension;
-
   AutoarExtractPrivate *priv;
-  int i;
 
   priv = arextract->priv;
 
   g_debug ("autoar_extract_step_decide_dest: called");
 
-  {
-    pathname_extension = autoar_common_get_filename_extension (priv->pathname_basename);
-    if (priv->has_only_one_file && (pathname_extension != priv->pathname_basename)) {
-      /* If we only have one file, we have to add the file extension.
-       * Although we use the variable `top_level_dir', it may be a regular
-       * file, so the extension is important. */
-      char *new_filename = g_strconcat (priv->suggested_destname, pathname_extension, NULL);
-      priv->top_level_dir = g_file_get_child (priv->output_file, new_filename);
-      g_free (new_filename);
-    } else {
-      priv->top_level_dir = g_file_get_child (priv->output_file, priv->suggested_destname);
-      pathname_extension = "";
-    }
-  }
+  if (priv->prefix != NULL) {
+    /* We must check if the archive and the prefix have the same name (minus
+     * the extension). If they do, then the destination should be the output
+     * directory itself.
+     */
+    char *prefix_name;
+    char *prefix_name_no_ext;
 
-  {
-    char *top_level_dir_basename_modified = NULL;
-    for (i = 1; g_file_query_exists (priv->top_level_dir, priv->cancellable); i++) {
-      g_free (top_level_dir_basename_modified);
-      g_object_unref (priv->top_level_dir);
-
-      if (g_cancellable_is_cancelled (priv->cancellable))
-        return;
-
-      if (priv->has_only_one_file) {
-        top_level_dir_basename_modified = g_strdup_printf ("%s(%d)%s",
-                                                           priv->suggested_destname,
-                                                           i,
-                                                           pathname_extension);
-      } else {
-        top_level_dir_basename_modified = g_strdup_printf ("%s(%d)",
-                                                           priv->suggested_destname,
-                                                           i);
-      }
-      priv->top_level_dir = g_file_get_child (priv->output_file,
-                                              top_level_dir_basename_modified);
+    prefix_name = g_file_get_basename (priv->prefix);
+    prefix_name_no_ext = autoar_common_get_basename_remove_extension (prefix_name);
+
+    if (g_strcmp0 (prefix_name_no_ext, priv->suggested_destname) == 0) {
+      priv->top_level_dir = g_object_ref (priv->output_file);
     }
-    g_free (top_level_dir_basename_modified);
+
+    g_free (prefix_name);
+    g_free (prefix_name_no_ext);
+  }
+  /* If none of the above situations apply, the top level directory has the
+   * name of the source archive (minus the extension)
+   */
+  if (priv->top_level_dir == NULL) {
+    priv->top_level_dir = g_file_get_child (priv->output_file,
+                                            priv->suggested_destname);
   }
 
-  if (!(priv->has_only_one_file))
-    g_file_make_directory_with_parents (priv->top_level_dir, priv->cancellable, &(priv->error));
+  g_file_make_directory_with_parents (priv->top_level_dir, priv->cancellable,
+                                      &(priv->error));
 
-  if (priv->error != NULL)
+  if (priv->error != NULL && priv->error->code == G_IO_ERROR_EXISTS) {
+    g_error_free (priv->error);
+    priv->error = NULL;
+  }
+
+  if (priv->error != NULL) {
     return;
+  }
 
   autoar_extract_signal_decide_dest (arextract);
 }
@@ -1942,24 +1925,12 @@ autoar_extract_step_extract (AutoarExtract *arextract) {
     if (GPOINTER_TO_UINT (g_hash_table_lookup (priv->bad_filename, pathname)))
       continue;
 
-    if (!(priv->has_only_one_file)) {
-      if (priv->has_top_level_dir) {
-        extracted_filename =
-          autoar_extract_do_sanitize_pathname (pathname + priv->pathname_prefix_len,
-                                               NULL, priv->top_level_dir);
-        if (hardlink != NULL)
-          hardlink_filename =
-            autoar_extract_do_sanitize_pathname (hardlink + priv->pathname_prefix_len,
-                                                 NULL, priv->top_level_dir);
-      } else {
-        extracted_filename =
-          autoar_extract_do_sanitize_pathname (pathname, "./", priv->top_level_dir);
-        if (hardlink != NULL)
-          hardlink_filename =
-            autoar_extract_do_sanitize_pathname (pathname, "./", priv->top_level_dir);
-      }
-    } else {
-      extracted_filename = g_object_ref (priv->top_level_dir);
+    extracted_filename =
+      autoar_extract_do_sanitize_pathname (pathname, priv->top_level_dir);
+
+    if (hardlink != NULL) {
+      hardlink_filename =
+        autoar_extract_do_sanitize_pathname (hardlink, priv->top_level_dir);
     }
 
     autoar_extract_do_write_entry (arextract, a, entry,


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]