[libgsf] Zip: Properly read archives with 64k-1 or more members.
- From: Morten Welinder <mortenw src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [libgsf] Zip: Properly read archives with 64k-1 or more members.
- Date: Fri, 21 Nov 2014 01:03:54 +0000 (UTC)
commit 38173a818efe19265f54f24c63bd23899ca7d961
Author: Morten Welinder <terra gnome org>
Date: Thu Nov 20 20:02:53 2014 -0500
Zip: Properly read archives with 64k-1 or more members.
Also speed up things for the lots-of-members case.
ChangeLog | 8 +++
NEWS | 2 +
gsf/gsf-infile-zip.c | 129 ++++++++++++++++++++++++++++++++-----------------
gsf/gsf-zip-impl.h | 21 ++++++++-
gsf/gsf-zip-utils.c | 28 +++++-----
5 files changed, 128 insertions(+), 60 deletions(-)
---
diff --git a/ChangeLog b/ChangeLog
index fef3150..dc77c43 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,13 @@
2014-11-20 Morten Welinder <terra gnome org>
+ * gsf/gsf-infile-zip.c (zip_read_dirents): Handle zip64 format for
+ archives with 65535 or more members.
+ (ZipInfo): Larger fields; user GPtrArray for speed. All users
+ changed.
+
+ * gsf/gsf-zip-impl.h (GsfZipVDir): Use a GPtrArray for speed. All
+ users changed.
+
* gsf/gsf-input.c: Update to new introspection syntax.
2014-11-15 Morten Welinder <terra gnome org>
diff --git a/NEWS b/NEWS
index b3e8408..234dcad 100644
--- a/NEWS
+++ b/NEWS
@@ -7,6 +7,8 @@ Morten:
* Plug leaks.
* Dead kittens.
* Reduce number of CRITICALs for corrupted files.
+ * Read zip files with 64k+ members. [Part of #732209]
+ * Speed up zip file reading for lots of members.
--------------------------------------------------------------------------
libgsf 1.14.30
diff --git a/gsf/gsf-infile-zip.c b/gsf/gsf-infile-zip.c
index 01b7aa6..6813a7a 100644
--- a/gsf/gsf-infile-zip.c
+++ b/gsf/gsf-infile-zip.c
@@ -42,19 +42,20 @@ enum {
static GObjectClass *parent_class;
typedef struct {
- guint16 entries;
- guint32 dir_pos;
- GList *dirent_list;
+ guint32 entries;
+ gsf_off_t dir_pos;
+ GPtrArray *dirents;
GsfZipVDir *vdir;
int ref_count;
} ZipInfo;
struct _GsfInfileZip {
- GsfInfile parent;
+ GsfInfile base;
GsfInput *source;
ZipInfo *info;
+ gboolean zip64;
GsfZipVDir *vdir;
@@ -100,10 +101,10 @@ zip_make_modtime (guint32 dostime)
static GsfZipVDir *
vdir_child_by_name (GsfZipVDir *vdir, char const *name)
{
- GSList *l;
+ unsigned ui;
- for (l = vdir->children; l; l = l->next) {
- GsfZipVDir *child = (GsfZipVDir *) l->data;
+ for (ui = 0; ui < vdir->children->len; ui++) {
+ GsfZipVDir *child = g_ptr_array_index (vdir->children, ui);
if (strcmp (child->name, name) == 0)
return child;
}
@@ -113,7 +114,9 @@ vdir_child_by_name (GsfZipVDir *vdir, char const *name)
static GsfZipVDir *
vdir_child_by_index (GsfZipVDir *vdir, int target)
{
- return g_slist_nth_data (vdir->children, target);
+ return (unsigned)target < vdir->children->len
+ ? g_ptr_array_index (vdir->children, target)
+ : NULL;
}
static void
@@ -143,16 +146,15 @@ vdir_insert (GsfZipVDir *vdir, char const * name, GsfZipDirent *dirent)
}
static gsf_off_t
-zip_find_trailer (GsfInfileZip *zip)
+zip_find_trailer (GsfInfileZip *zip, guint32 trailer_signature, guint size)
{
- static guint8 const trailer_signature[] =
- { 'P', 'K', 0x05, 0x06 };
gsf_off_t offset, trailer_offset, filesize;
gsf_off_t maplen;
guint8 const *data;
+ guchar sig1 = trailer_signature & 0xff;
filesize = gsf_input_size (zip->source);
- if (filesize < ZIP_TRAILER_SIZE)
+ if (filesize < size)
return -1;
trailer_offset = filesize;
@@ -173,9 +175,9 @@ zip_find_trailer (GsfInfileZip *zip)
p = (guchar *) data;
for (s = p + maplen - 1; (s >= p); s--, trailer_offset--) {
- if ((*s == 'P') &&
- (p + maplen - 1 - s > ZIP_TRAILER_SIZE - 2) &&
- !memcmp (s, trailer_signature, sizeof (trailer_signature))) {
+ if (*s == sig1 &&
+ p + maplen - 1 - s > size - 2 &&
+ !memcmp (s, &trailer_signature, sizeof (trailer_signature))) {
return --trailer_offset;
}
}
@@ -269,16 +271,17 @@ zip_info_ref (ZipInfo *info)
static void
zip_info_unref (ZipInfo *info)
{
- GList *p;
+ unsigned ui;
if (info->ref_count-- != 1)
return;
gsf_zip_vdir_free (info->vdir, FALSE);
- for (p = info->dirent_list; p != NULL; p = p->next)
- gsf_zip_dirent_free ((GsfZipDirent *) p->data);
-
- g_list_free (info->dirent_list);
+ for (ui = 0; ui < info->dirents->len; ui++) {
+ GsfZipDirent *e = g_ptr_array_index (info->dirents, ui);
+ gsf_zip_dirent_free (e);
+ }
+ g_ptr_array_free (info->dirents, TRUE);
g_free (info);
}
@@ -317,33 +320,64 @@ zip_dup (GsfInfileZip const *src, GError **err)
static gboolean
zip_read_dirents (GsfInfileZip *zip)
{
- guint8 const *trailer;
- guint16 entries, i;
- guint32 dir_pos;
+ guint8 const *data;
+ guint32 entries, i;
ZipInfo *info;
- gsf_off_t offset;
+ gsf_off_t dir_pos, offset;
+ gboolean need_zip64 = FALSE;
/* Find and check the trailing header */
- offset = zip_find_trailer (zip);
- if (offset < 0) {
- zip->err = g_error_new (gsf_input_error_id (), 0,
- _("No Zip trailer"));
- return TRUE;
+ offset = zip_find_trailer (zip, ZIP_TRAILER_SIGNATURE, ZIP_TRAILER_SIZE);
+ if (offset < 0 ||
+ gsf_input_seek (zip->source, offset, G_SEEK_SET))
+ goto bad;
+
+ data = gsf_input_read (zip->source, ZIP_TRAILER_SIZE, NULL);
+ if (!data)
+ goto bad;
+
+ entries = GSF_LE_GET_GUINT16 (data + ZIP_TRAILER_ENTRIES);
+ need_zip64 |= (entries == 0xffffu);
+ dir_pos = GSF_LE_GET_GUINT32 (data + ZIP_TRAILER_DIR_POS);
+ need_zip64 |= (dir_pos == 0xffffffffu);
+
+ if (need_zip64) {
+ guint32 disk, disks;
+ gsf_off_t zip64_eod_offset;
+
+ zip->zip64 = TRUE;
+
+ /* Find the zip64 locator */
+ offset = zip_find_trailer (zip, ZIP_ZIP64_LOCATOR_SIGNATURE, ZIP_ZIP64_LOCATOR_SIZE);
+ if (offset < 0 ||
+ gsf_input_seek (zip->source, offset, G_SEEK_SET))
+ goto bad;
+
+ data = gsf_input_read (zip->source, ZIP_ZIP64_LOCATOR_SIZE, NULL);
+ if (!data)
+ goto bad;
+
+ disk = GSF_LE_GET_GUINT32 (data + ZIP_ZIP64_LOCATOR_DISK);
+ zip64_eod_offset = GSF_LE_GET_GUINT64 (data + ZIP_ZIP64_LOCATOR_OFFSET);
+ disks = GSF_LE_GET_GUINT32 (data + ZIP_ZIP64_LOCATOR_DISKS);
+
+ if (disk != 0 || disks != 1)
+ goto bad;
+
+ if (gsf_input_seek (zip->source, zip64_eod_offset, G_SEEK_SET))
+ goto bad;
+ data = gsf_input_read (zip->source, ZIP_TRAILER64_SIZE, NULL);
+ if (!data)
+ goto bad;
+
+ entries = GSF_LE_GET_GUINT64 (data + ZIP_TRAILER64_ENTRIES);
+ dir_pos = GSF_LE_GET_GUINT64 (data + ZIP_TRAILER64_DIR_POS);
}
- if (gsf_input_seek (zip->source, offset, G_SEEK_SET) ||
- NULL == (trailer = gsf_input_read (zip->source, ZIP_TRAILER_SIZE, NULL))) {
- zip->err = g_error_new (gsf_input_error_id (), 0,
- _("Error reading Zip signature"));
- return TRUE;
- }
-
- entries = GSF_LE_GET_GUINT32 (trailer + ZIP_TRAILER_ENTRIES);
- dir_pos = GSF_LE_GET_GUINT32 (trailer + ZIP_TRAILER_DIR_POS);
-
info = g_new0 (ZipInfo, 1);
zip->info = info;
+ info->dirents = g_ptr_array_new ();
info->ref_count = 1;
info->entries = entries;
info->dir_pos = dir_pos;
@@ -359,22 +393,26 @@ zip_read_dirents (GsfInfileZip *zip)
return TRUE;
}
- info->dirent_list = g_list_append (info->dirent_list, d);
+ g_ptr_array_add (info->dirents, d);
}
return FALSE;
+
+bad:
+ zip->err = g_error_new (gsf_input_error_id (), 0,
+ _("Broken zip file structure"));
+ return TRUE;
}
static void
zip_build_vdirs (GsfInfileZip *zip)
{
- GList *l;
- GsfZipDirent *dirent;
+ unsigned ui;
ZipInfo *info = zip->info;
info->vdir = gsf_zip_vdir_new ("", TRUE, NULL);
- for (l = info->dirent_list; l; l = l->next) {
- dirent = (GsfZipDirent *) l->data;
+ for (ui = 0; ui < info->dirents->len; ui++) {
+ GsfZipDirent *dirent = g_ptr_array_index (info->dirents, ui);
vdir_insert (info->vdir, dirent->name, dirent);
}
}
@@ -695,7 +733,7 @@ gsf_infile_zip_num_children (GsfInfile *infile)
if (!zip->vdir->is_directory)
return -1;
- return g_slist_length (zip->vdir->children);
+ return zip->vdir->children->len;
}
static void
@@ -763,6 +801,7 @@ gsf_infile_zip_init (GObject *obj)
GsfInfileZip *zip = (GsfInfileZip *)obj;
zip->source = NULL;
zip->info = NULL;
+ zip->zip64 = FALSE;
zip->vdir = NULL;
zip->stream = NULL;
zip->restlen = 0;
diff --git a/gsf/gsf-zip-impl.h b/gsf/gsf-zip-impl.h
index 84d455e..78efc79 100644
--- a/gsf/gsf-zip-impl.h
+++ b/gsf/gsf-zip-impl.h
@@ -39,6 +39,7 @@ G_BEGIN_DECLS
#define ZIP_HEADER_NAME_LEN 26
#define ZIP_HEADER_EXTRA_LEN 28
+#define ZIP_TRAILER_SIGNATURE 0x06054b50
#define ZIP_TRAILER_SIZE 22
#define ZIP_TRAILER_DISK 4
#define ZIP_TRAILER_DIR_DISK 6
@@ -48,6 +49,23 @@ G_BEGIN_DECLS
#define ZIP_TRAILER_DIR_POS 16
#define ZIP_TRAILER_COMMENT_SIZE 20
+#define ZIP_ZIP64_LOCATOR_SIGNATURE 0x07064b50
+#define ZIP_ZIP64_LOCATOR_SIZE 20
+#define ZIP_ZIP64_LOCATOR_DISK 4
+#define ZIP_ZIP64_LOCATOR_OFFSET 8
+#define ZIP_ZIP64_LOCATOR_DISKS 16
+
+#define ZIP_TRAILER64_SIZE 56 /* or more */
+#define ZIP_TRAILER64_RECSIZE 4
+#define ZIP_TRAILER64_VERSION_MADE 12
+#define ZIP_TRAILER64_VERSION_NEEDED 14
+#define ZIP_TRAILER64_DISK 16
+#define ZIP_TRAILER64_DIR_DISK 20
+#define ZIP_TRAILER64_ENTRIES 24
+#define ZIP_TRAILER64_TOTAL_ENTRIES 32
+#define ZIP_TRAILER64_DIR_SIZE 40
+#define ZIP_TRAILER64_DIR_POS 48
+
#define ZIP_DIRENT_SIZE 46
#define ZIP_DIRENT_ENCODER 4
#define ZIP_DIRENT_EXTRACT 6
@@ -103,7 +121,8 @@ typedef struct {
char *name;
gboolean is_directory;
GsfZipDirent *dirent;
- GSList *children, *last_child;
+ GPtrArray *children;
+ GSList *last_child; /* Unused */
} GsfZipVDir;
GType gsf_zip_dirent_get_type (void);
diff --git a/gsf/gsf-zip-utils.c b/gsf/gsf-zip-utils.c
index 64dca98..6588841 100644
--- a/gsf/gsf-zip-utils.c
+++ b/gsf/gsf-zip-utils.c
@@ -94,7 +94,7 @@ gsf_zip_vdir_new (char const *name, gboolean is_directory, GsfZipDirent *dirent)
vdir->name = g_strdup (name);
vdir->is_directory = is_directory;
vdir->dirent = dirent;
- vdir->children = NULL;
+ vdir->children = g_ptr_array_new ();
return vdir;
}
@@ -122,15 +122,17 @@ gsf_vdir_free (GsfZipVDir *vdir, gboolean free_dirent)
void
gsf_zip_vdir_free (GsfZipVDir *vdir, gboolean free_dirent)
{
- GSList *l;
+ unsigned ui;
if (!vdir)
return;
- for (l = vdir->children; l; l = l->next)
- gsf_zip_vdir_free ((GsfZipVDir *)l->data, free_dirent);
+ for (ui = 0; ui < vdir->children->len; ui++) {
+ GsfZipVDir *c = g_ptr_array_index (vdir->children, ui);
+ gsf_zip_vdir_free (c, free_dirent);
+ }
+ g_ptr_array_free (vdir->children, TRUE);
- g_slist_free (vdir->children);
g_free (vdir->name);
if (free_dirent && vdir->dirent)
gsf_zip_dirent_free (vdir->dirent);
@@ -141,7 +143,8 @@ static GsfZipVDir *
gsf_zip_vdir_copy (GsfZipVDir *vdir)
{
GsfZipVDir *res = g_new0 (GsfZipVDir, 1);
- GSList *l;
+ unsigned ui;
+
/* it is not possible to add a ref_count without breaking the API,
* so we need to really copy everything */
if (vdir->name)
@@ -149,8 +152,10 @@ gsf_zip_vdir_copy (GsfZipVDir *vdir)
res->is_directory = vdir->is_directory;
if (vdir->dirent)
res->dirent = gsf_zip_dirent_copy (vdir->dirent);
- for (l = vdir->children; l; l = l->next)
- gsf_zip_vdir_add_child (res, gsf_zip_vdir_copy ((GsfZipVDir *)l->data));
+ for (ui = 0; ui < vdir->children->len; ui++) {
+ GsfZipVDir *c = g_ptr_array_index (vdir->children, ui);
+ gsf_zip_vdir_add_child (res, gsf_zip_vdir_copy (c));
+ }
return res;
}
@@ -183,10 +188,5 @@ gsf_vdir_add_child (GsfZipVDir *vdir, GsfZipVDir *child)
void
gsf_zip_vdir_add_child (GsfZipVDir *vdir, GsfZipVDir *child)
{
- GSList *tail = g_slist_append (NULL, child);
- if (vdir->children)
- vdir->last_child->next = tail;
- else
- vdir->children = tail;
- vdir->last_child = tail;
+ g_ptr_array_add (vdir->children, child);
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]