[libgsf] zip: first cut at writing zip64.



commit d627943a1b46d9f60b0016734430476d0fd198aa
Author: Morten Welinder <terra gnome org>
Date:   Sun Nov 23 18:57:32 2014 -0500

    zip: first cut at writing zip64.
    
    This automatically handles the more-than-64k-files cases.
    
    The large file case is trickier and not fully handled right now.  The
    problem is that we need to know in advance if the length will be larger
    than 4G or, alternatively, we must unconditionally use zip64 format.
    The problem is, we don't know if our consumers can handle it.  No
    released version of libgsf to date can handle it, for example.

 NEWS                  |    1 +
 gsf/gsf-infile-zip.c  |   36 +++----
 gsf/gsf-outfile-zip.c |  294 ++++++++++++++++++++++++++++++++++++-------------
 gsf/gsf-zip-impl.h    |   71 ++++++++----
 4 files changed, 283 insertions(+), 119 deletions(-)
---
diff --git a/NEWS b/NEWS
index eb1f6e0..01f577d 100644
--- a/NEWS
+++ b/NEWS
@@ -13,6 +13,7 @@ Morten:
        * Speed up stdio directory handling with large number of files.
        * Fix various issues with files larger than 4G.
        * Fix minor zip file issues.
+       * Write zip archives with more than 64k+ members.
 
 --------------------------------------------------------------------------
 libgsf 1.14.30
diff --git a/gsf/gsf-infile-zip.c b/gsf/gsf-infile-zip.c
index 64b94d9..e1d638f 100644
--- a/gsf/gsf-infile-zip.c
+++ b/gsf/gsf-infile-zip.c
@@ -4,6 +4,7 @@
  *
  * Copyright (C) 2002-2006 Jody Goldberg (jody gnome org)
  *                        Tambet Ingo   (tambet ximian com)
+ * Copyright (C) 2014 Morten Welinder (terra gnome org)
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of version 2.1 of the GNU Lesser General Public
@@ -146,9 +147,8 @@ vdir_insert (GsfZipVDir *vdir, char const * name, GsfZipDirent *dirent)
 }
 
 static gsf_off_t
-zip_find_trailer (GsfInfileZip *zip, guint32 sig, guint size)
+zip_find_trailer (GsfInfileZip *zip, guint32 sig, gssize size)
 {
-       char trailer_signature[4];
        gsf_off_t offset, trailer_offset, filesize;
        gsf_off_t maplen;
        guint8 const *data;
@@ -158,8 +158,6 @@ zip_find_trailer (GsfInfileZip *zip, guint32 sig, guint size)
        if (filesize < size)
                return -1;
 
-       GSF_LE_SET_GUINT32 (trailer_signature, sig);
-
        trailer_offset = filesize;
        maplen = filesize & (ZIP_BUF_SIZE - 1);
        if (maplen == 0)
@@ -180,7 +178,7 @@ zip_find_trailer (GsfInfileZip *zip, guint32 sig, guint size)
                for (s = p + maplen - 1; (s >= p); s--, trailer_offset--) {
                        if (*s == sig1 &&
                            p + maplen - 1 - s > size - 2 &&
-                           !memcmp (s, trailer_signature, sizeof (trailer_signature))) {
+                           GSF_LE_GET_GUINT32 (s) == sig) {
                                return --trailer_offset;
                        }
                }
@@ -254,6 +252,7 @@ zip_dirent_new_in (GsfInfileZip *zip, gsf_off_t *offset)
        gsf_off_t off, csize, usize;
        gchar *name;
        guint8 header[ZIP_DIRENT_SIZE];
+       gboolean zip64;
 
        /* Read fixed-length part of data and check the header */
        data = header;
@@ -276,6 +275,7 @@ zip_dirent_new_in (GsfInfileZip *zip, gsf_off_t *offset)
 
        extra = zip_dirent_extra_field (variable + name_len, extras_len,
                                        ZIP_DIRENT_EXTRA_FIELD_ZIP64, &elen);
+       zip64 = (extra != NULL);
 
        flags =         GSF_LE_GET_GUINT32 (data + ZIP_DIRENT_FLAGS);
        compr_method =  GSF_LE_GET_GUINT16 (data + ZIP_DIRENT_COMPR_METHOD);
@@ -321,6 +321,7 @@ zip_dirent_new_in (GsfInfileZip *zip, gsf_off_t *offset)
        dirent->usize =         usize;
        dirent->offset =        off;
        dirent->dostime =       dostime;
+       dirent->zip64 =         zip64;
 #if 0
        g_print ("%s = 0x%x @ %" GSF_OFF_T_FORMAT "\n", name, off, *offset);
 #endif
@@ -503,8 +504,6 @@ zip_init_info (GsfInfileZip *zip)
 static gboolean
 zip_child_init (GsfInfileZip *child, GError **errmsg)
 {
-       static guint8 const header_signature[] =
-               { 'P', 'K', 0x03, 0x04 };
        guint8 const *data = NULL;
        guint16 name_len, extras_len;
        const char *err = NULL;
@@ -513,18 +512,16 @@ zip_child_init (GsfInfileZip *child, GError **errmsg)
 
        /* skip local header
         * should test tons of other info, but trust that those are correct
-        **/
+        */
 
-       if (gsf_input_seek (child->source, (gsf_off_t) dirent->offset, G_SEEK_SET))
+       if (gsf_input_seek (child->source, dirent->offset, G_SEEK_SET)) {
                err = _("Error seeking to zip header");
-       else if (NULL == (data = gsf_input_read (child->source, ZIP_FILE_HEADER_SIZE, NULL)))
+       } else if (NULL == (data = gsf_input_read (child->source, ZIP_HEADER_SIZE, NULL)))
                err = _("Error reading zip header");
-       else if (0 != memcmp (data, header_signature, sizeof (header_signature))) {
+       else if (GSF_LE_GET_GUINT32 (data) != ZIP_HEADER_SIGNATURE) {
                err = _("Error incorrect zip header");
-               g_printerr ("Header is :\n");
-               gsf_mem_dump (data, sizeof (header_signature));
-               g_printerr ("Header should be :\n");
-               gsf_mem_dump (header_signature, sizeof (header_signature));
+               g_printerr ("Header is 0x%x\n", GSF_LE_GET_GUINT32 (data));
+               g_printerr ("Expected 0x%x\n", ZIP_HEADER_SIGNATURE);
        }
 
        if (NULL != err) {
@@ -536,10 +533,10 @@ zip_child_init (GsfInfileZip *child, GError **errmsg)
        /* Throw clang a bone.  */
        g_assert (data != NULL);
 
-       name_len =   GSF_LE_GET_GUINT16 (data + ZIP_FILE_HEADER_NAME_SIZE);
-       extras_len = GSF_LE_GET_GUINT16 (data + ZIP_FILE_HEADER_EXTRAS_SIZE);
+       name_len =   GSF_LE_GET_GUINT16 (data + ZIP_HEADER_NAME_SIZE);
+       extras_len = GSF_LE_GET_GUINT16 (data + ZIP_HEADER_EXTRAS_SIZE);
 
-       dirent->data_offset = dirent->offset + ZIP_FILE_HEADER_SIZE + name_len + extras_len;
+       dirent->data_offset = dirent->offset + ZIP_HEADER_SIZE + name_len + extras_len;
        child->restlen  = dirent->usize;
        child->crestlen = dirent->csize;
 
@@ -729,8 +726,7 @@ gsf_infile_zip_new_child (GsfInfileZip *parent, GsfZipVDir *vdir, GError **err)
        child->vdir = vdir;
 
        if (dirent) {
-               gsf_input_set_size (GSF_INPUT (child),
-                                   (gsf_off_t) dirent->usize);
+               gsf_input_set_size (GSF_INPUT (child), dirent->usize);
 
                if (dirent->dostime) {
                        GDateTime *modtime = zip_make_modtime (dirent->dostime);
diff --git a/gsf/gsf-outfile-zip.c b/gsf/gsf-outfile-zip.c
index 044b174..29dd63f 100644
--- a/gsf/gsf-outfile-zip.c
+++ b/gsf/gsf-outfile-zip.c
@@ -3,6 +3,7 @@
  * gsf-outfile-zip.c: zip archive output.
  *
  * Copyright (C) 2002-2006 Jon K Hellan (hellan acm org)
+ * Copyright (C) 2014 Morten Welinder (terra gnome org)
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of version 2.1 of the GNU Lesser General Public
@@ -48,6 +49,7 @@ struct _GsfOutfileZip {
        GsfOutput     *sink;
        GsfOutfileZip *root;
 
+       gboolean zip64;
        char *entry_name;
 
        GsfZipVDir    *vdir;
@@ -154,53 +156,126 @@ gsf_outfile_zip_seek (G_GNUC_UNUSED GsfOutput *output,
 static gboolean
 zip_dirent_write (GsfOutput *sink, GsfZipDirent *dirent)
 {
-       static guint8 const dirent_signature[] =
-               { 'P', 'K', 0x01, 0x02 };
        guint8 buf[ZIP_DIRENT_SIZE];
        int nlen = strlen (dirent->name);
        gboolean ret;
+       const guint8 extract = dirent->zip64 ? 45 : 23;
+       GString *extras = g_string_sized_new (ZIP_DIRENT_SIZE + nlen + 100);
+
+       if (dirent->zip64) {
+               char tmp[8];
+
+               /*
+                * We could unconditinally store the offset here, but
+                * zipinfo has a known bug in which it fails to account
+                * for differences in extra fields between the global
+                * and the local headers.  So we try to make them the
+                * same.
+                */
+               gboolean do_offset = dirent->offset >= 0xffffffffu;
+               GSF_LE_SET_GUINT16 (tmp, ZIP_DIRENT_EXTRA_FIELD_ZIP64);
+               GSF_LE_SET_GUINT16 (tmp + 2, (2 + do_offset) * 8);
+               g_string_append_len (extras, tmp, 4);
+               GSF_LE_SET_GUINT64 (tmp, dirent->usize);
+               g_string_append_len (extras, tmp, 8);
+               GSF_LE_SET_GUINT64 (tmp, dirent->csize);
+               g_string_append_len (extras, tmp, 8);
+               if (do_offset) {
+                       GSF_LE_SET_GUINT64 (tmp, dirent->offset);
+                       g_string_append_len (extras, tmp, 8);
+               }
+       }
 
        memset (buf, 0, sizeof buf);
-       memcpy (buf, dirent_signature, sizeof dirent_signature);
-       GSF_LE_SET_GUINT16 (buf + ZIP_DIRENT_ENCODER, 0x317); /* Unix */
-       GSF_LE_SET_GUINT16 (buf + ZIP_DIRENT_EXTRACT, 0x14);
+       GSF_LE_SET_GUINT32 (buf, ZIP_DIRENT_SIGNATURE);
+       GSF_LE_SET_GUINT16 (buf + ZIP_DIRENT_ENCODER,
+                           (ZIP_OS_UNIX << 8) + extract);
+       GSF_LE_SET_GUINT16 (buf + ZIP_DIRENT_EXTRACT,
+                           (ZIP_OS_MSDOS << 8) + extract);
        GSF_LE_SET_GUINT16 (buf + ZIP_DIRENT_FLAGS, dirent->flags);
        GSF_LE_SET_GUINT16 (buf + ZIP_DIRENT_COMPR_METHOD,
                            dirent->compr_method);
        GSF_LE_SET_GUINT32 (buf + ZIP_DIRENT_DOSTIME, dirent->dostime);
        GSF_LE_SET_GUINT32 (buf + ZIP_DIRENT_CRC32, dirent->crc32);
-       GSF_LE_SET_GUINT32 (buf + ZIP_DIRENT_CSIZE, dirent->csize);
-       GSF_LE_SET_GUINT32 (buf + ZIP_DIRENT_USIZE, dirent->usize);
+       GSF_LE_SET_GUINT32 (buf + ZIP_DIRENT_CSIZE,
+                           dirent->zip64 ? 0xffffffffu : dirent->csize);
+       GSF_LE_SET_GUINT32 (buf + ZIP_DIRENT_USIZE,
+                           dirent->zip64 ? 0xffffffffu : dirent->usize);
        GSF_LE_SET_GUINT16 (buf + ZIP_DIRENT_NAME_SIZE, nlen);
-       GSF_LE_SET_GUINT16 (buf + ZIP_DIRENT_EXTRAS_SIZE, 0);
+       GSF_LE_SET_GUINT16 (buf + ZIP_DIRENT_EXTRAS_SIZE, extras->len);
        GSF_LE_SET_GUINT16 (buf + ZIP_DIRENT_COMMENT_SIZE, 0);
        GSF_LE_SET_GUINT16 (buf + ZIP_DIRENT_DISKSTART, 0);
        GSF_LE_SET_GUINT16 (buf + ZIP_DIRENT_FILE_TYPE, 0);
        /* Hardcode file mode 644 */
        GSF_LE_SET_GUINT32 (buf + ZIP_DIRENT_FILE_MODE, 0100644 << 16);
-       GSF_LE_SET_GUINT32 (buf + ZIP_DIRENT_OFFSET, dirent->offset);
+       GSF_LE_SET_GUINT32 (buf + ZIP_DIRENT_OFFSET,
+                           MIN (dirent->offset, 0xffffffffu));
+
+       /* Stuff everything into extras so we can do just one write.  */
+       g_string_insert_len (extras,          0, buf, sizeof buf);
+       g_string_insert_len (extras, sizeof buf, dirent->name, nlen);
+
+       ret = gsf_output_write (sink, extras->len, extras->str);
 
-       ret = gsf_output_write (sink, sizeof buf, buf);
-       if (ret)
-               ret = gsf_output_write (sink, nlen, dirent->name);
+       g_string_free (extras, TRUE);
 
        return ret;
 }
 
 static gboolean
-zip_trailer_write (GsfOutfileZip *zip, unsigned entries, gsf_off_t dirpos)
+zip_trailer_write (GsfOutfileZip *zip, unsigned entries,
+                  gsf_off_t dirpos, gsf_off_t dirsize)
 {
-       static guint8 const trailer_signature[] =
-               { 'P', 'K', 0x05, 0x06 };
        guint8 buf[ZIP_TRAILER_SIZE];
-       gsf_off_t pos = gsf_output_tell (zip->sink);
 
        memset (buf, 0, sizeof buf);
-       memcpy (buf, trailer_signature, sizeof trailer_signature);
-       GSF_LE_SET_GUINT16 (buf + ZIP_TRAILER_ENTRIES, entries);
-       GSF_LE_SET_GUINT16 (buf + ZIP_TRAILER_TOTAL_ENTRIES, entries);
-       GSF_LE_SET_GUINT32 (buf + ZIP_TRAILER_DIR_SIZE, pos - dirpos);
-       GSF_LE_SET_GUINT32 (buf + ZIP_TRAILER_DIR_POS, dirpos);
+       GSF_LE_SET_GUINT32 (buf, ZIP_TRAILER_SIGNATURE);
+       GSF_LE_SET_GUINT16 (buf + ZIP_TRAILER_ENTRIES,
+                           MIN (entries, 0xffffu));
+       GSF_LE_SET_GUINT16 (buf + ZIP_TRAILER_TOTAL_ENTRIES,
+                           MIN (entries, 0xffffu));
+       GSF_LE_SET_GUINT32 (buf + ZIP_TRAILER_DIR_SIZE,
+                           MIN (dirsize, 0xffffffffu));
+       GSF_LE_SET_GUINT32 (buf + ZIP_TRAILER_DIR_POS,
+                           MIN (dirpos, 0xffffffffu));
+
+       return gsf_output_write (zip->sink, sizeof buf, buf);
+}
+
+static gboolean
+zip_trailer64_write (GsfOutfileZip *zip, unsigned entries,
+                    gsf_off_t dirpos, gsf_off_t dirsize)
+{
+       guint8 buf[ZIP_TRAILER64_SIZE];
+       guint8 extract = 45;
+
+       memset (buf, 0, sizeof buf);
+       GSF_LE_SET_GUINT32 (buf, ZIP_TRAILER64_SIGNATURE);
+       GSF_LE_SET_GUINT64 (buf + ZIP_TRAILER64_RECSIZE, sizeof buf);
+       GSF_LE_SET_GUINT16 (buf + ZIP_TRAILER64_ENCODER,
+                           (ZIP_OS_UNIX << 8) + extract);
+       GSF_LE_SET_GUINT16 (buf + ZIP_TRAILER64_EXTRACT,
+                           (ZIP_OS_MSDOS << 8) + extract);
+       GSF_LE_SET_GUINT32 (buf + ZIP_TRAILER64_DISK, 0);
+       GSF_LE_SET_GUINT32 (buf + ZIP_TRAILER64_DIR_DISK, 0);
+       GSF_LE_SET_GUINT32 (buf + ZIP_TRAILER64_ENTRIES, entries);
+       GSF_LE_SET_GUINT32 (buf + ZIP_TRAILER64_TOTAL_ENTRIES, entries);
+       GSF_LE_SET_GUINT64 (buf + ZIP_TRAILER64_DIR_SIZE, dirsize);
+       GSF_LE_SET_GUINT64 (buf + ZIP_TRAILER64_DIR_POS, dirpos);
+
+       return gsf_output_write (zip->sink, sizeof buf, buf);
+}
+
+static gboolean
+zip_zip64_locator_write (GsfOutfileZip *zip, gsf_off_t trailerpos)
+{
+       guint8 buf[ZIP_ZIP64_LOCATOR_SIZE];
+
+       memset (buf, 0, sizeof buf);
+       GSF_LE_SET_GUINT32 (buf, ZIP_ZIP64_LOCATOR_SIGNATURE);
+       GSF_LE_SET_GUINT32 (buf + ZIP_ZIP64_LOCATOR_DISK, 0);
+       GSF_LE_SET_GUINT64 (buf + ZIP_ZIP64_LOCATOR_OFFSET, trailerpos);
+       GSF_LE_SET_GUINT32 (buf + ZIP_ZIP64_LOCATOR_DISKS, 1);
 
        return gsf_output_write (zip->sink, sizeof buf, buf);
 }
@@ -209,15 +284,18 @@ static gboolean
 zip_close_root (GsfOutput *output)
 {
        GsfOutfileZip *zip = GSF_OUTFILE_ZIP (output);
-       GsfOutfileZip *child;
-       gsf_off_t dirpos = gsf_output_tell (zip->sink);
+       gsf_off_t dirpos = gsf_output_tell (zip->sink), dirend;
        GPtrArray *elem = zip->root_order;
        unsigned entries = elem->len;
        unsigned i;
 
        /* Check that children are closed */
        for (i = 0 ; i < elem->len ; i++) {
-               child = g_ptr_array_index (elem, i);
+               GsfOutfileZip *child = g_ptr_array_index (elem, i);
+               GsfZipDirent *dirent = child->vdir->dirent;
+               if (dirent->csize >= 0xffffffffu ||
+                   dirent->usize >= 0xffffffffu)
+                       zip->zip64 = TRUE;  /* No choice.  */
                if (!gsf_output_is_closed (GSF_OUTPUT (child))) {
                        g_warning ("Child still open");
                        return FALSE;
@@ -225,15 +303,30 @@ zip_close_root (GsfOutput *output)
        }
 
        /* Write directory */
+       dirpos = gsf_output_tell (zip->sink);
        for (i = 0 ; i < entries ; i++) {
-               child = g_ptr_array_index (elem, i);
-               if (!zip_dirent_write (zip->sink, child->vdir->dirent))
+               GsfOutfileZip *child = g_ptr_array_index (elem, i);
+               GsfZipDirent *dirent = child->vdir->dirent;
+               if (!zip_dirent_write (zip->sink, dirent))
                        return FALSE;
        }
+       dirend = gsf_output_tell (zip->sink);
+
+       if (entries >= 0xffffu || dirend >= 0xfffff000u) {
+               /* We don't have a choice; force zip64.  */
+               zip->zip64 = TRUE;
+       }
 
        disconnect_children (zip);
 
-       return zip_trailer_write (zip, entries, dirpos);
+       if (zip->zip64) {
+               if (!zip_trailer64_write (zip, entries, dirpos, dirend - dirpos))
+                       return FALSE;
+               if (!zip_zip64_locator_write (zip, dirend))
+                       return FALSE;
+       }
+
+       return zip_trailer_write (zip, entries, dirpos, dirend - dirpos);
 }
 
 static void
@@ -250,7 +343,7 @@ stream_name_write_to_buf (GsfOutfileZip *zip, GString *res)
                stream_name_write_to_buf (GSF_OUTFILE_ZIP (container), res);
                if (res->len) {
                        /* Forward slash is specified by the format.  */
-                       g_string_append_c (res, '/');
+                       g_string_append_c (res, ZIP_NAME_SEPARATOR);
                }
        }
 
@@ -301,45 +394,89 @@ zip_time_make (GDateTime *modtime)
 static void
 zip_dirent_update_flags (GsfZipDirent *dirent)
 {
-       if (dirent->compr_method == GSF_ZIP_STORED)
-               dirent->flags &= ~8;
-       else
-               dirent->flags |= 8;
+       /* Since we can seek, do not use a ddesc.  */
+       dirent->flags &= ~ZIP_DIRENT_FLAGS_HAS_DDESC;
 }
 
 static GsfZipDirent*
 zip_dirent_new_out (GsfOutfileZip *zip)
 {
-       GsfZipDirent *dirent = gsf_zip_dirent_new ();
-       dirent->name = stream_name_build (zip);
-       dirent->compr_method = zip->compression_method;
-       dirent->dostime = zip_time_make (gsf_output_get_modtime (GSF_OUTPUT (zip)));
-       zip_dirent_update_flags (dirent);
-       return dirent;
+       char *name = stream_name_build (zip);
+       /*
+        * The spec is a bit vague about the length limit for file names, but
+        * clearly we should not go beyond 0xffff.
+        */
+       if (strlen (name) < 0xffffu) {
+               GsfZipDirent *dirent = gsf_zip_dirent_new ();
+               dirent->name = name;
+               dirent->compr_method = zip->compression_method;
+               dirent->dostime = zip_time_make (gsf_output_get_modtime (GSF_OUTPUT (zip)));
+               dirent->zip64 = zip->zip64;
+               zip_dirent_update_flags (dirent);
+               return dirent;
+       } else
+               return NULL;
 }
 
 static gboolean
 zip_header_write (GsfOutfileZip *zip)
 {
-       static guint8 const header_signature[] =
-               { 'P', 'K', 0x03, 0x04 };
        guint8 hbuf[ZIP_HEADER_SIZE];
-       GsfZipDirent *dirent = zip->vdir->dirent;
+       GsfZipDirent const *dirent = zip->vdir->dirent;
        char *name = dirent->name;
        int   nlen = strlen (name);
        gboolean ret;
+       gboolean has_ddesc = (dirent->flags & ZIP_DIRENT_FLAGS_HAS_DDESC) != 0;
+       guint32 crc32 = has_ddesc ? 0 : dirent->crc32;
+       gsf_off_t csize = has_ddesc ? 0 : dirent->csize;
+       gsf_off_t usize = has_ddesc ? 0 : dirent->usize;
+       GString *extras = g_string_sized_new (ZIP_HEADER_SIZE + nlen + 100);
+       const guint8 extract = dirent->zip64 ? 45 : 23;
+
+       /*
+        * In the has_ddesc case, we write crc32/size/usize as zero and store
+        * the right values in the DDESC record that follows the data.
+        *
+        * In the !has_ddesc case, we return to the same spot and write the
+        * header a second time correcting crc32/size/usize, see
+        * see zip_header_patch_sizes.  For this reason, we must ensure that
+        * the record's length does not depend on the the sizes.
+        */
+
+       if (dirent->zip64) {
+               char tmp[8];
+               GSF_LE_SET_GUINT16 (tmp, ZIP_DIRENT_EXTRA_FIELD_ZIP64);
+               GSF_LE_SET_GUINT16 (tmp + 2, 2 * 8);
+               g_string_append_len (extras, tmp, 4);
+               GSF_LE_SET_GUINT64 (tmp, usize);
+               g_string_append_len (extras, tmp, 8);
+               GSF_LE_SET_GUINT64 (tmp, csize);
+               g_string_append_len (extras, tmp, 8);
+       }
 
        memset (hbuf, 0, sizeof hbuf);
-       memcpy (hbuf, header_signature, sizeof header_signature);
-       GSF_LE_SET_GUINT16 (hbuf + ZIP_HEADER_VERSION, 0x14);
+       GSF_LE_SET_GUINT32 (hbuf, ZIP_HEADER_SIGNATURE);
+       GSF_LE_SET_GUINT16 (hbuf + ZIP_HEADER_EXTRACT,
+                           (ZIP_OS_MSDOS << 8) + extract);
        GSF_LE_SET_GUINT16 (hbuf + ZIP_HEADER_FLAGS, dirent->flags);
        GSF_LE_SET_GUINT16 (hbuf + ZIP_HEADER_COMP_METHOD,
                            dirent->compr_method);
-       GSF_LE_SET_GUINT32 (hbuf + ZIP_HEADER_TIME, dirent->dostime);
-       GSF_LE_SET_GUINT16 (hbuf + ZIP_HEADER_NAME_LEN, nlen);
-       ret = gsf_output_write (zip->sink, sizeof hbuf, hbuf);
-       if (ret)
-               ret = gsf_output_write (zip->sink, nlen, name);
+       GSF_LE_SET_GUINT32 (hbuf + ZIP_HEADER_DOSTIME, dirent->dostime);
+       GSF_LE_SET_GUINT32 (hbuf + ZIP_HEADER_CRC32, crc32);
+       GSF_LE_SET_GUINT32 (hbuf + ZIP_HEADER_CSIZE,
+                           dirent->zip64 ? 0xffffffffu : csize);
+       GSF_LE_SET_GUINT32 (hbuf + ZIP_HEADER_USIZE,
+                           dirent->zip64 ? 0xffffffffu : usize);
+       GSF_LE_SET_GUINT16 (hbuf + ZIP_HEADER_NAME_SIZE, nlen);
+       GSF_LE_SET_GUINT16 (hbuf + ZIP_HEADER_EXTRAS_SIZE, extras->len);
+
+       /* Stuff everything into extras so we can do just one write.  */
+       g_string_insert_len (extras,           0, hbuf, sizeof hbuf);
+       g_string_insert_len (extras, sizeof hbuf, name, nlen);
+
+       ret = gsf_output_write (zip->sink, extras->len, extras->str);
+
+       g_string_free (extras, TRUE);
 
        return ret;
 }
@@ -360,10 +497,12 @@ zip_init_write (GsfOutput *output)
                return FALSE;
 
        dirent = zip_dirent_new_out (zip);
-       dirent->offset = gsf_output_tell (zip->sink);
-       if (zip->vdir->dirent)
-               g_warning ("Leak.");
+       if (!dirent) {
+               gsf_output_unwrap (G_OBJECT (output), zip->sink);
+               return FALSE;
+       }
 
+       dirent->offset = gsf_output_tell (zip->sink);
        zip->vdir->dirent = dirent;
        zip_header_write (zip);
        zip->writing = TRUE;
@@ -431,16 +570,27 @@ zip_flush (GsfOutfileZip *zip)
 static gboolean
 zip_ddesc_write (GsfOutfileZip *zip)
 {
-       static guint8 const ddesc_signature[] =
-               { 'P', 'K', 0x07, 0x08 };
-       guint8 buf[16];
+       guint8 buf[MAX (ZIP_DDESC_SIZE, ZIP_DDESC64_SIZE)];
        GsfZipDirent *dirent = zip->vdir->dirent;
+       size_t size;
 
-       memcpy (buf, ddesc_signature, sizeof ddesc_signature);
-       GSF_LE_SET_GUINT32 (buf + 4, dirent->crc32);
-       GSF_LE_SET_GUINT32 (buf + 8, dirent->csize);
-       GSF_LE_SET_GUINT32 (buf + 12, dirent->usize);
-       if (!gsf_output_write (zip->sink, sizeof buf, buf)) {
+       /* Documentation says signature is not official.  */
+
+       if (dirent->zip64) {
+               GSF_LE_SET_GUINT32 (buf, ZIP_DDESC64_SIGNATURE);
+               GSF_LE_SET_GUINT32 (buf + ZIP_DDESC64_CRC32, dirent->crc32);
+               GSF_LE_SET_GUINT64 (buf + ZIP_DDESC64_CSIZE, dirent->csize);
+               GSF_LE_SET_GUINT64 (buf + ZIP_DDESC64_USIZE, dirent->usize);
+               size = ZIP_DDESC64_SIZE;
+       } else {
+               GSF_LE_SET_GUINT32 (buf, ZIP_DDESC_SIGNATURE);
+               GSF_LE_SET_GUINT32 (buf + ZIP_DDESC_CRC32, dirent->crc32);
+               GSF_LE_SET_GUINT32 (buf + ZIP_DDESC_CSIZE, dirent->csize);
+               GSF_LE_SET_GUINT32 (buf + ZIP_DDESC_USIZE, dirent->usize);
+               size = ZIP_DDESC_SIZE;
+       }
+
+       if (!gsf_output_write (zip->sink, size - 4, buf + 4)) {
                return FALSE;
        }
 
@@ -448,25 +598,15 @@ zip_ddesc_write (GsfOutfileZip *zip)
 }
 
 static gboolean
-zip_header_write_sizes (GsfOutfileZip *zip)
+zip_header_patch_sizes (GsfOutfileZip *zip)
 {
-       guint8 hbuf[ZIP_HEADER_SIZE];
        GsfZipDirent *dirent = zip->vdir->dirent;
        gsf_off_t pos = gsf_output_tell (zip->sink);
 
-       if (!gsf_output_seek (zip->sink, dirent->offset + ZIP_HEADER_CRC,
-                             G_SEEK_SET))
-               return FALSE;
-
-       GSF_LE_SET_GUINT32 (hbuf + ZIP_HEADER_CRC, dirent->crc32);
-       GSF_LE_SET_GUINT32 (hbuf + ZIP_HEADER_COMP_SIZE, dirent->csize);
-       GSF_LE_SET_GUINT32 (hbuf + ZIP_HEADER_UNCOMP_SIZE, dirent->usize);
-       if (!gsf_output_write (zip->sink, 12, hbuf + ZIP_HEADER_CRC))
-               return FALSE;
-       if (!gsf_output_seek (zip->sink, pos, G_SEEK_SET))
-               return FALSE;
-
-       return TRUE;
+       /* Rewrite the header in the same location again.  */
+       return (gsf_output_seek (zip->sink, dirent->offset, G_SEEK_SET) &&
+               zip_header_write (zip) &&
+               gsf_output_seek (zip->sink, pos, G_SEEK_SET));
 }
 
 static gboolean
@@ -482,11 +622,13 @@ zip_close_stream (GsfOutput *output)
        if (zip->compression_method == GSF_ZIP_DEFLATED) {
                if (!zip_flush (zip))
                        return FALSE;
+       }
 
+       if (zip->vdir->dirent->flags & ZIP_DIRENT_FLAGS_HAS_DDESC) {
                if (!zip_ddesc_write (zip)) /* Write data descriptor */
                        return FALSE;
        } else {
-               if (!zip_header_write_sizes (zip)) /* Write crc, sizes */
+               if (!zip_header_patch_sizes (zip)) /* Write crc, sizes */
                        return FALSE;
        }
        zip->root->writing = FALSE;
@@ -621,6 +763,7 @@ gsf_outfile_zip_new_child (GsfOutfile *parent,
                                                params);
        gsf_property_settings_free (params, n_params);
 
+       child->zip64 = zip_parent->zip64;
        child->vdir = gsf_zip_vdir_new (name, is_dir, NULL);
 
        /* FIXME: It isn't clear what encoding name is in.  */
@@ -643,6 +786,7 @@ gsf_outfile_zip_init (GObject *obj)
        zip->sink = NULL;
        zip->root = NULL;
        zip->entry_name = NULL;
+       zip->zip64 = FALSE;
        zip->vdir = NULL;
        zip->root_order = NULL;
        zip->stream = NULL;
diff --git a/gsf/gsf-zip-impl.h b/gsf/gsf-zip-impl.h
index 29a01d3..e4afaed 100644
--- a/gsf/gsf-zip-impl.h
+++ b/gsf/gsf-zip-impl.h
@@ -4,6 +4,7 @@
  * gsf-zip-impl.h:
  *
  * Copyright (C) 2002-2006 Tambet Ingo (tambet ximian com)
+ * Copyright (C) 2014 Morten Welinder (terra gnome org)
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of version 2.1 of the GNU Lesser General Public
@@ -27,18 +28,37 @@
 
 G_BEGIN_DECLS
 
+/* Every member file is preceded by a header with this format.  */
+#define ZIP_HEADER_SIGNATURE            0x04034b50
 #define ZIP_HEADER_SIZE                30
-#define ZIP_HEADER_VERSION              4
-#define ZIP_HEADER_OS                   5
+#define ZIP_HEADER_EXTRACT              4
 #define ZIP_HEADER_FLAGS                6
 #define ZIP_HEADER_COMP_METHOD           8
-#define ZIP_HEADER_TIME                 10
-#define ZIP_HEADER_CRC                         14
-#define ZIP_HEADER_COMP_SIZE           18
-#define ZIP_HEADER_UNCOMP_SIZE          22
-#define ZIP_HEADER_NAME_LEN            26
-#define ZIP_HEADER_EXTRA_LEN           28
-
+#define ZIP_HEADER_DOSTIME              10
+#define ZIP_HEADER_CRC32               14
+#define ZIP_HEADER_CSIZE               18
+#define ZIP_HEADER_USIZE                22
+#define ZIP_HEADER_NAME_SIZE           26
+#define ZIP_HEADER_EXTRAS_SIZE         28
+
+/* Members may have this record after the compressed data.  It is meant
+   to be used only when it is not possible to seek back and patch the
+   right values into the header.  */
+#define ZIP_DDESC_SIGNATURE             0x08074b50
+#define ZIP_DDESC_SIZE                  16
+#define ZIP_DDESC_CRC32                 4
+#define ZIP_DDESC_CSIZE                 8
+#define ZIP_DDESC_USIZE                 12
+
+/* 64-bit version of above.  Used when the ZIP64 extra field is present
+   in the header.  */
+#define ZIP_DDESC64_SIGNATURE           ZIP_DDESC_SIGNATURE
+#define ZIP_DDESC64_SIZE                24
+#define ZIP_DDESC64_CRC32               4
+#define ZIP_DDESC64_CSIZE               8
+#define ZIP_DDESC64_USIZE               16
+
+/* The whole archive ends with a trailer.  */
 #define ZIP_TRAILER_SIGNATURE           0x06054b50
 #define ZIP_TRAILER_SIZE               22
 #define ZIP_TRAILER_DISK               4
@@ -49,12 +69,14 @@ G_BEGIN_DECLS
 #define ZIP_TRAILER_DIR_POS            16
 #define ZIP_TRAILER_COMMENT_SIZE       20
 
+/* A zip64 locator comes immediately before the trailer, if it is present.  */
 #define ZIP_ZIP64_LOCATOR_SIGNATURE     0x07064b50
 #define ZIP_ZIP64_LOCATOR_SIZE                 20
 #define ZIP_ZIP64_LOCATOR_DISK         4
 #define ZIP_ZIP64_LOCATOR_OFFSET       8
 #define ZIP_ZIP64_LOCATOR_DISKS                16
 
+/* A zip64 archive has this record somewhere to extend the field sizes.  */
 #define ZIP_TRAILER64_SIGNATURE         0x06064b50
 #define ZIP_TRAILER64_SIZE             56  /* or more */
 #define ZIP_TRAILER64_RECSIZE            4
@@ -67,6 +89,7 @@ G_BEGIN_DECLS
 #define ZIP_TRAILER64_DIR_SIZE                 40
 #define ZIP_TRAILER64_DIR_POS          48
 
+/* This defines the entries in the central directory.  */
 #define ZIP_DIRENT_SIGNATURE            0x02014b50
 #define ZIP_DIRENT_SIZE                 46
 #define ZIP_DIRENT_ENCODER              4
@@ -85,27 +108,26 @@ G_BEGIN_DECLS
 #define ZIP_DIRENT_FILE_MODE            38
 #define ZIP_DIRENT_OFFSET               42
 
-#define ZIP_FILE_HEADER_SIZE            30
-#define ZIP_FILE_HEADER_EXTRACT          4
-#define ZIP_FILE_HEADER_FLAGS            6
-#define ZIP_FILE_HEADER_COMPR_METHOD     8
-#define ZIP_FILE_HEADER_DOSTIME         10
-#define ZIP_FILE_HEADER_CRC32           14
-#define ZIP_FILE_HEADER_CSIZE           18
-#define ZIP_FILE_HEADER_USIZE           22
-#define ZIP_FILE_HEADER_NAME_SIZE       26
-#define ZIP_FILE_HEADER_EXTRAS_SIZE     28
-
-#define ZIP_DIRENT_EXTRA_FIELD_ZIP64 0x0001
-#define ZIP_DIRENT_EXTRA_FIELD_UNIXTIME 0x5455  /* "UT" */
-#define ZIP_DIRENT_EXTRA_FIELD_UIDGID 0x7875    /* "ux" */
+/* A few well-defined extra-field tags.  */
+enum {
+       ZIP_DIRENT_EXTRA_FIELD_ZIP64 = 0x0001,
+       ZIP_DIRENT_EXTRA_FIELD_UNIXTIME = 0x5455,  /* "UT" */
+       ZIP_DIRENT_EXTRA_FIELD_UIDGID = 0x7875    /* "ux" */
+};
+
+#define ZIP_DIRENT_FLAGS_HAS_DDESC 8
+
+/* OS codes.  There are plenty, but this is all we need.  */
+enum {
+       ZIP_OS_MSDOS = 0,
+       ZIP_OS_UNIX = 3
+};
 
 #define ZIP_NAME_SEPARATOR    '/'
 
 #define ZIP_BLOCK_SIZE 32768
 #define ZIP_BUF_SIZE 512
 
-
 /* z_flags */
 #define ZZIP_IS_ENCRYPTED(p)    ((*(unsigned char*)p)&1)
 #define ZZIP_IS_COMPRLEVEL(p)  (((*(unsigned char*)p)>>1)&3)
@@ -121,6 +143,7 @@ typedef struct {
        gsf_off_t                offset;
        gsf_off_t                data_offset;
        guint32                  dostime;
+       gboolean                 zip64;
 } GsfZipDirent;
 
 typedef struct {


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]