[libgsf] Zip: handle files larger than 4G.
- From: Morten Welinder <mortenw src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [libgsf] Zip: handle files larger than 4G.
- Date: Fri, 21 Nov 2014 04:01:34 +0000 (UTC)
commit 5bc7e809671dc6246f9b173435c803ce4c1b1003
Author: Morten Welinder <terra gnome org>
Date: Thu Nov 20 23:00:02 2014 -0500
Zip: handle files larger than 4G.
Also fix various 64-bit issues.
ChangeLog | 8 +++
NEWS | 2 +
gsf/gsf-infile-zip.c | 140 +++++++++++++++++++++++++++++++++++--------------
gsf/gsf-input.c | 2 +-
gsf/gsf-utils.c | 11 ++---
gsf/gsf-zip-impl.h | 13 +++--
6 files changed, 124 insertions(+), 52 deletions(-)
---
diff --git a/ChangeLog b/ChangeLog
index dc77c43..214ed5c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,9 +1,17 @@
2014-11-20 Morten Welinder <terra gnome org>
+ * gsf/gsf-input.c (gsf_input_read0): Fix 64-bit issue.
+
+ * gsf/gsf-utils.c (gsf_input_dump): Fix 64-bit issue. Read larger
+ blocks.
+
* gsf/gsf-infile-zip.c (zip_read_dirents): Handle zip64 format for
archives with 65535 or more members.
(ZipInfo): Larger fields; user GPtrArray for speed. All users
changed.
+ (zip_find_trailer): Fix endianess issue.
+ (gsf_infile_zip_read): Fix 64-bit issue.
+ (zip_dirent_new_in): Handle zip64 extra fields.
* gsf/gsf-zip-impl.h (GsfZipVDir): Use a GPtrArray for speed. All
users changed.
diff --git a/NEWS b/NEWS
index 234dcad..26b765f 100644
--- a/NEWS
+++ b/NEWS
@@ -8,7 +8,9 @@ Morten:
* Dead kittens.
* Reduce number of CRITICALs for corrupted files.
* Read zip files with 64k+ members. [Part of #732209]
+ * Read zip files members larger than 4G. [Part of #732209]
* Speed up zip file reading for lots of members.
+ * Fix various issues with files larger than 4G.
--------------------------------------------------------------------------
libgsf 1.14.30
diff --git a/gsf/gsf-infile-zip.c b/gsf/gsf-infile-zip.c
index 6813a7a..a177e67 100644
--- a/gsf/gsf-infile-zip.c
+++ b/gsf/gsf-infile-zip.c
@@ -60,8 +60,8 @@ struct _GsfInfileZip {
GsfZipVDir *vdir;
z_stream *stream;
- guint32 restlen;
- guint32 crestlen;
+ gsf_off_t restlen;
+ gsf_off_t crestlen;
guint8 *buf;
size_t buf_size;
@@ -146,17 +146,20 @@ vdir_insert (GsfZipVDir *vdir, char const * name, GsfZipDirent *dirent)
}
static gsf_off_t
-zip_find_trailer (GsfInfileZip *zip, guint32 trailer_signature, guint size)
+zip_find_trailer (GsfInfileZip *zip, guint32 sig, guint size)
{
+ char trailer_signature[4];
gsf_off_t offset, trailer_offset, filesize;
gsf_off_t maplen;
guint8 const *data;
- guchar sig1 = trailer_signature & 0xff;
+ guchar sig1 = sig & 0xff;
filesize = gsf_input_size (zip->source);
if (filesize < size)
return -1;
+ GSF_LE_SET_GUINT32 (trailer_signature, sig);
+
trailer_offset = filesize;
maplen = filesize & (ZIP_BUF_SIZE - 1);
if (maplen == 0)
@@ -177,7 +180,7 @@ zip_find_trailer (GsfInfileZip *zip, guint32 trailer_signature, guint size)
for (s = p + maplen - 1; (s >= p); s--, trailer_offset--) {
if (*s == sig1 &&
p + maplen - 1 - s > size - 2 &&
- !memcmp (s, &trailer_signature, sizeof (trailer_signature))) {
+ !memcmp (s, trailer_signature, sizeof (trailer_signature))) {
return --trailer_offset;
}
}
@@ -204,41 +207,108 @@ zip_find_trailer (GsfInfileZip *zip, guint32 trailer_signature, guint size)
return -1;
}
+static guint8 const *
+zip_dirent_extra_field (guint8 const *extra, size_t elen,
+ guint16 typ, guint32 *pflen)
+{
+ while (TRUE) {
+ guint16 ftyp, flen;
+
+ if (elen == 0) {
+ *pflen = 0;
+ return NULL;
+ }
+
+ if (elen < 4)
+ goto bad;
+
+ ftyp = GSF_LE_GET_GUINT16 (extra);
+ flen = GSF_LE_GET_GUINT16 (extra + 2);
+ if (flen > elen - 4)
+ goto bad;
+
+ extra += 4;
+ elen -= 4;
+ if (ftyp == typ) {
+ /* Found the extended data. */
+ *pflen = flen;
+ return extra;
+ }
+ extra += flen;
+ elen -= flen;
+ }
+
+bad:
+ *pflen = 0;
+ return NULL;
+}
+
+
static GsfZipDirent *
zip_dirent_new_in (GsfInfileZip *zip, gsf_off_t *offset)
{
- static guint8 const dirent_signature[] =
- { 'P', 'K', 0x01, 0x02 };
GsfZipDirent *dirent;
- guint8 const *data;
- guint16 name_len, extras_len, comment_len, compr_method, flags;
- guint32 dostime, crc32, csize, usize, off;
+ guint8 const *data, *variable, *extra;
+ guint16 compr_method, flags;
+ guint32 dostime, crc32, disk_start, name_len, extras_len, comment_len, vlen, elen;
+ gsf_off_t off, csize, usize;
gchar *name;
+ guint8 header[ZIP_DIRENT_SIZE];
- /* Read data and check the header */
+ /* Read fixed-length part of data and check the header */
+ data = header;
if (gsf_input_seek (zip->source, *offset, G_SEEK_SET) ||
- NULL == (data = gsf_input_read (zip->source, ZIP_DIRENT_SIZE, NULL)) ||
- 0 != memcmp (data, dirent_signature, sizeof (dirent_signature))) {
+ !gsf_input_read (zip->source, ZIP_DIRENT_SIZE, header) ||
+ GSF_LE_GET_GUINT32 (data) != ZIP_DIRENT_SIGNATURE) {
return NULL;
}
name_len = GSF_LE_GET_GUINT16 (data + ZIP_DIRENT_NAME_SIZE);
extras_len = GSF_LE_GET_GUINT16 (data + ZIP_DIRENT_EXTRAS_SIZE);
comment_len = GSF_LE_GET_GUINT16 (data + ZIP_DIRENT_COMMENT_SIZE);
+ vlen = name_len + extras_len + comment_len;
+
+ /* Read variable part */
+ variable = gsf_input_read (zip->source, ZIP_DIRENT_SIZE, NULL);
+ if (!variable && vlen > 0)
+ return NULL;
+ if (FALSE && variable) gsf_mem_dump (variable, vlen);
+
+ extra = zip_dirent_extra_field (variable + name_len, extras_len,
+ ZIP_DIRENT_EXTRA_FIELD_ZIP64, &elen);
flags = GSF_LE_GET_GUINT32 (data + ZIP_DIRENT_FLAGS);
compr_method = GSF_LE_GET_GUINT16 (data + ZIP_DIRENT_COMPR_METHOD);
dostime = GSF_LE_GET_GUINT32 (data + ZIP_DIRENT_DOSTIME);
crc32 = GSF_LE_GET_GUINT32 (data + ZIP_DIRENT_CRC32);
csize = GSF_LE_GET_GUINT32 (data + ZIP_DIRENT_CSIZE);
+ if (csize == 0xffffffffu && elen >= 8) {
+ csize = GSF_LE_GET_GUINT64 (extra);
+ extra += 8;
+ elen -= 8;
+ }
usize = GSF_LE_GET_GUINT32 (data + ZIP_DIRENT_USIZE);
+ if (usize == 0xffffffffu && elen >= 8) {
+ usize = GSF_LE_GET_GUINT64 (extra);
+ extra += 8;
+ elen -= 8;
+ }
off = GSF_LE_GET_GUINT32 (data + ZIP_DIRENT_OFFSET);
+ if (off == 0xffffffffu && elen >= 8) {
+ off = GSF_LE_GET_GUINT64 (extra);
+ extra += 8;
+ elen -= 8;
+ }
- if ((data = gsf_input_read (zip->source, name_len, NULL)) == NULL)
- return NULL;
+ disk_start = GSF_LE_GET_GUINT16 (data + ZIP_DIRENT_DISKSTART);
+ if (disk_start == 0xffffu && elen >= 4) {
+ disk_start = GSF_LE_GET_GUINT32 (extra);
+ extra += 4;
+ elen -= 4;
+ }
- name = g_new (gchar, (gulong) (name_len + 1));
- memcpy (name, data, name_len);
+ name = g_new (gchar, name_len + 1);
+ memcpy (name, variable, name_len);
name[name_len] = '\0';
dirent = gsf_zip_dirent_new ();
@@ -255,7 +325,7 @@ zip_dirent_new_in (GsfInfileZip *zip, gsf_off_t *offset)
g_print ("%s = 0x%x @ %" GSF_OFF_T_FORMAT "\n", name, off, *offset);
#endif
- *offset += ZIP_DIRENT_SIZE + name_len + extras_len + comment_len;
+ *offset += ZIP_DIRENT_SIZE + vlen;
return dirent;
}
@@ -320,43 +390,32 @@ zip_dup (GsfInfileZip const *src, GError **err)
static gboolean
zip_read_dirents (GsfInfileZip *zip)
{
- guint8 const *data;
+ guint8 const *data, *locator;
guint32 entries, i;
ZipInfo *info;
gsf_off_t dir_pos, offset;
- gboolean need_zip64 = FALSE;
/* Find and check the trailing header */
offset = zip_find_trailer (zip, ZIP_TRAILER_SIGNATURE, ZIP_TRAILER_SIZE);
- if (offset < 0 ||
- gsf_input_seek (zip->source, offset, G_SEEK_SET))
+ if (offset < ZIP_ZIP64_LOCATOR_SIZE ||
+ gsf_input_seek (zip->source, offset - ZIP_ZIP64_LOCATOR_SIZE, G_SEEK_SET))
goto bad;
- data = gsf_input_read (zip->source, ZIP_TRAILER_SIZE, NULL);
- if (!data)
+ locator = gsf_input_read (zip->source, ZIP_TRAILER_SIZE + ZIP_ZIP64_LOCATOR_SIZE, NULL);
+ if (!locator)
goto bad;
+ data = locator + ZIP_ZIP64_LOCATOR_SIZE;
entries = GSF_LE_GET_GUINT16 (data + ZIP_TRAILER_ENTRIES);
- need_zip64 |= (entries == 0xffffu);
dir_pos = GSF_LE_GET_GUINT32 (data + ZIP_TRAILER_DIR_POS);
- need_zip64 |= (dir_pos == 0xffffffffu);
- if (need_zip64) {
+ if (GSF_LE_GET_GUINT32 (locator) == ZIP_ZIP64_LOCATOR_SIGNATURE) {
guint32 disk, disks;
gsf_off_t zip64_eod_offset;
zip->zip64 = TRUE;
- /* Find the zip64 locator */
- offset = zip_find_trailer (zip, ZIP_ZIP64_LOCATOR_SIGNATURE, ZIP_ZIP64_LOCATOR_SIZE);
- if (offset < 0 ||
- gsf_input_seek (zip->source, offset, G_SEEK_SET))
- goto bad;
-
- data = gsf_input_read (zip->source, ZIP_ZIP64_LOCATOR_SIZE, NULL);
- if (!data)
- goto bad;
-
+ data = locator;
disk = GSF_LE_GET_GUINT32 (data + ZIP_ZIP64_LOCATOR_DISK);
zip64_eod_offset = GSF_LE_GET_GUINT64 (data + ZIP_ZIP64_LOCATOR_OFFSET);
disks = GSF_LE_GET_GUINT32 (data + ZIP_ZIP64_LOCATOR_DISKS);
@@ -551,10 +610,10 @@ static guint8 const *
gsf_infile_zip_read (GsfInput *input, size_t num_bytes, guint8 *buffer)
{
GsfInfileZip *zip = GSF_INFILE_ZIP (input);
- GsfZipVDir *vdir = zip->vdir;
+ GsfZipVDir *vdir = zip->vdir;
gsf_off_t pos;
- if (zip->restlen < num_bytes)
+ if (zip->restlen < (gsf_off_t)num_bytes)
return NULL;
switch (vdir->dirent->compr_method) {
@@ -580,7 +639,7 @@ gsf_infile_zip_read (GsfInput *input, size_t num_bytes, guint8 *buffer)
do {
int err;
- int startlen;
+ gsf_off_t startlen;
if (zip->crestlen > 0 && zip->stream->avail_in == 0)
if (!zip_update_stream_in (zip))
@@ -785,6 +844,7 @@ gsf_infile_zip_constructor (GType type,
/* Special call from zip_dup. */
zip->source = gsf_input_dup (zip->dup_parent->source, &zip->err);
zip->info = zip_info_ref (zip->dup_parent->info);
+ zip->zip64 = zip->dup_parent->zip64;
zip->dup_parent = NULL;
} else {
if (!zip_init_info (zip))
diff --git a/gsf/gsf-input.c b/gsf/gsf-input.c
index 5154c15..0590033 100644
--- a/gsf/gsf-input.c
+++ b/gsf/gsf-input.c
@@ -403,7 +403,7 @@ gsf_input_read0 (GsfInput *input, size_t num_bytes, size_t *bytes_read)
*bytes_read = num_bytes;
- if (num_bytes > (size_t)gsf_input_remaining (input))
+ if (num_bytes < 0 || (gsf_off_t)num_bytes > gsf_input_remaining (input))
return NULL;
res = g_new (guint8, num_bytes);
diff --git a/gsf/gsf-utils.c b/gsf/gsf-utils.c
index f9c53fa..69ee7af 100644
--- a/gsf/gsf-utils.c
+++ b/gsf/gsf-utils.c
@@ -296,17 +296,14 @@ gsf_mem_dump (guint8 const *ptr, size_t len)
void
gsf_input_dump (GsfInput *input, gboolean dump_as_hex)
{
- gsf_off_t offset = 0;
- size_t size, count;
- guint8 const *data;
+ gsf_off_t offset = 0, size;
/* read in small blocks to excercise things */
size = gsf_input_size (GSF_INPUT (input));
while (size > 0) {
- count = size;
- if (count > 0x100)
- count = 0x100;
- data = gsf_input_read (GSF_INPUT (input), count, NULL);
+ size_t count = (size > 0x1000) ? 0x1000 : size;
+ guint8 const *data =
+ gsf_input_read (GSF_INPUT (input), count, NULL);
g_return_if_fail (data != NULL);
if (dump_as_hex)
gsf_mem_dump_full (data, count, offset);
diff --git a/gsf/gsf-zip-impl.h b/gsf/gsf-zip-impl.h
index 78efc79..ab2d71a 100644
--- a/gsf/gsf-zip-impl.h
+++ b/gsf/gsf-zip-impl.h
@@ -57,8 +57,8 @@ G_BEGIN_DECLS
#define ZIP_TRAILER64_SIZE 56 /* or more */
#define ZIP_TRAILER64_RECSIZE 4
-#define ZIP_TRAILER64_VERSION_MADE 12
-#define ZIP_TRAILER64_VERSION_NEEDED 14
+#define ZIP_TRAILER64_ENCODER 12
+#define ZIP_TRAILER64_EXTRACT 14
#define ZIP_TRAILER64_DISK 16
#define ZIP_TRAILER64_DIR_DISK 20
#define ZIP_TRAILER64_ENTRIES 24
@@ -66,6 +66,7 @@ G_BEGIN_DECLS
#define ZIP_TRAILER64_DIR_SIZE 40
#define ZIP_TRAILER64_DIR_POS 48
+#define ZIP_DIRENT_SIGNATURE 0x02014b50
#define ZIP_DIRENT_SIZE 46
#define ZIP_DIRENT_ENCODER 4
#define ZIP_DIRENT_EXTRACT 6
@@ -94,6 +95,10 @@ G_BEGIN_DECLS
#define ZIP_FILE_HEADER_NAME_SIZE 26
#define ZIP_FILE_HEADER_EXTRAS_SIZE 28
+#define ZIP_DIRENT_EXTRA_FIELD_ZIP64 0x0001
+#define ZIP_DIRENT_EXTRA_FIELD_UNIXTIME 0x5455 /* "UT" */
+#define ZIP_DIRENT_EXTRA_FIELD_UIDGID 0x7875 /* "ux" */
+
#define ZIP_NAME_SEPARATOR '/'
#define ZIP_BLOCK_SIZE 32768
@@ -110,8 +115,8 @@ typedef struct {
guint16 flags;
GsfZipCompressionMethod compr_method;
guint32 crc32;
- size_t csize;
- size_t usize;
+ gsf_off_t csize;
+ gsf_off_t usize;
gsf_off_t offset;
gsf_off_t data_offset;
guint32 dostime;
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]