? gsf/gsf-zip-impl.h ? tests/test-cat-zip.c ? tests/test-ls-zip.c Index: gsf/gsf-infile-zip.c =================================================================== RCS file: /cvs/gnome/libgsf/gsf/gsf-infile-zip.c,v retrieving revision 1.11 diff -u -r1.11 gsf-infile-zip.c --- gsf/gsf-infile-zip.c 12 Jun 2002 14:57:27 -0000 1.11 +++ gsf/gsf-infile-zip.c 25 Jul 2002 10:54:06 -0000 @@ -3,6 +3,7 @@ * gsf-infile-zip.c : * * Copyright (C) 2002 Jody Goldberg (jody gnome org) + * Tambet Ingo (tambet ximian com) * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -20,12 +21,49 @@ */ #include -#include #include +#include #include +#include +#include + +#include +#include + +#undef G_LOG_DOMAIN +#define G_LOG_DOMAIN "libgsf:zip" + +typedef enum { + ZIP_STORED = 0, + ZIP_SHRUNK = 1, + ZIP_REDUCEDx1 = 2, + ZIP_REDUCEDx2 = 3, + ZIP_REDUCEDx3 = 4, + ZIP_REDUCEDx4 = 5, + ZIP_IMPLODED = 6, + ZIP_TOKENIZED = 7, + ZIP_DEFLATED = 8, + ZIP_DEFLATED_BETTER = 9, + ZIP_IMPLODED_BETTER = 10 +} ZipCompressionMethod; + +typedef struct { + char *name; + ZipCompressionMethod compr_method; + guint32 crc32; + size_t csize; + size_t usize; + off_t offset; + off_t data_offset; + + guint32 restlen; + guint32 crestlen; + z_stream stream; +} ZipDirent; typedef struct { - GsfInput *sb_file; + guint16 entries; + guint32 dir_pos; int ref_count; } ZipInfo; @@ -33,8 +71,11 @@ struct _GsfInfileZip { GsfInfile parent; - GsfInput *input; - ZipInfo *info; + GsfInput *input; + ZipInfo *info; + GList *dirent_list; + + ZipDirent *dirent; }; typedef struct { @@ -44,69 +85,136 @@ #define GSF_INFILE_ZIP_CLASS(k) (G_TYPE_CHECK_CLASS_CAST ((k), GSF_INFILE_ZIP_TYPE, GsfInfileZipClass)) #define GSF_IS_INFILE_ZIP_CLASS(k) (G_TYPE_CHECK_CLASS_TYPE ((k), GSF_INFILE_ZIP_TYPE)) -/** - * Based on information in zziplib - */ -#define ZIP_HEADER_SIZE 30 -#define ZIP_HEADER_VERSION 4 -#define ZIP_HEADER_OS 5 -#define ZIP_HEADER_FLAGS 6 -#define ZIP_HEADER_COMP_METHOD 8 -#define ZIP_HEADER_TIME 10 -#define ZIP_HEADER_CRC 14 -#define ZIP_HEADER_COMP_SIZE 18 -#define ZIP_HEADER_UNCOMP_SIZE 22 -#define ZIP_HEADER_NAME_LEN 26 -#define ZIP_HEADER_EXTRA_LEN 28 - -#define ZIP_TRAILER_SIZE 22 -#define ZIP_TRAILER_DISK 2 -#define ZIP_TRAILER_DIR_DISK 4 -#define ZIP_TRAILER_ENTRIES 6 -#define ZIP_TRAILER_TOTAL_ENTRIES 8 -#define ZIP_TRAILER_DIR_SIZE 10 -#define ZIP_TRAILER_DIR_POS 14 -#define ZIP_TRAILER_COMMENT_SIZE 18 - -#if 0 - char z_magic[4]; /* central file header signature (0x02014b50) */ - struct zzip_version z_encoder; /* version made by */ - struct zzip_version z_extract; /* version need to extract */ - char z_flags[2]; /* general purpose bit flag */ - char z_compr[2]; /* compression method */ - struct zzip_dostime z_dostime; /* last mod file time&date (dos format) */ - char z_crc32[4]; /* crc-32 */ - char z_csize[4]; /* compressed size */ - char z_usize[4]; /* uncompressed size */ - char z_namlen[2]; /* filename length (null if stdin) */ - char z_extras[2]; /* extra field length */ - char z_comment[2]; /* file comment length */ - char z_diskstart[2]; /* disk number of start (if spanning zip over multiple disks) */ - char z_filetype[2]; /* internal file attributes, bit0 = ascii */ - char z_filemode[4]; /* extrnal file attributes, eg. msdos attrib byte */ - char z_off[4]; /* relative offset of local file header, seekval if singledisk */ - /* followed by filename (of variable size) */ - /* followed by extra field (of variable size) */ - /* followed by file comment (of variable size) */ -/* z_flags */ -#define ZZIP_IS_ENCRYPTED(p) ((*(unsigned char*)p)&1) -#define ZZIP_IS_COMPRLEVEL(p) (((*(unsigned char*)p)>>1)&3) -#define ZZIP_IS_STREAMED(p) (((*(unsigned char*)p)>>3)&1) - -/* z_compr */ -#define ZZIP_IS_STORED 0 -#define ZZIP_IS_SHRUNK 1 -#define ZZIP_IS_REDUCEDx1 2 -#define ZZIP_IS_REDUCEDx2 3 -#define ZZIP_IS_REDUCEDx3 4 -#define ZZIP_IS_REDUCEDx4 5 -#define ZZIP_IS_IMPLODED 6 -#define ZZIP_IS_TOKENIZED 7 -#define ZZIP_IS_DEFLATED 8 -#define ZZIP_IS_DEFLATED_BETTER 9 -#define ZZIP_IS_IMPLODED_BETTER 10 +static off_t +zip_find_trailer (GsfInfileZip *zip) +{ + static guint8 const trailer_signature[] = + { 'P', 'K', 0x05, 0x06 }; + off_t offset, trailer_offset; + size_t filesize, maplen; + guint8 const *data; + + filesize = gsf_input_size (zip->input); + if (filesize < ZIP_TRAILER_SIZE) + return -1; + + trailer_offset = filesize; + maplen = filesize & (ZIP_BUF_SIZE - 1); + if (maplen == 0) + maplen = ZIP_BUF_SIZE; + offset = filesize - maplen; /* offset is now BUFSIZ aligned */ + + while (TRUE) { + guchar *p, *s; + + if ((gsf_input_seek (zip->input, offset, GSF_SEEK_SET)) < 0) + return -1; + + if ((data = gsf_input_read (zip->input, maplen, NULL)) == NULL) + return -1; + + p = (guchar *) data; + + for (s = p + maplen - 1; (s >= p); s--, trailer_offset--) { + if ((*s == 'P') && + (p + maplen - 1 - s > ZIP_TRAILER_SIZE - 2) && + !memcmp (s, trailer_signature, sizeof (trailer_signature))) { + return --trailer_offset; + } + } + + /* not found in currently mapped block, so update it if + * there is some room in before. The requirements are.. + * (a) mappings should overlap so that trailer can cross BUFSIZ-boundary + * (b) trailer cannot be farther away than 64K from fileend + */ + + /* outer loop cond */ + if (offset <= 0) + return -1; + + /* outer loop step */ + offset -= ZIP_BUF_SIZE / 2; + maplen = ZIP_BUF_SIZE; + + if (filesize - offset > 64 * 1024) + return -1; + } /*outer loop*/ + + return -1; +} + +static ZipDirent * +zip_dirent_new (GsfInfileZip *zip, off_t *offset) +{ + static guint8 const dirent_signature[] = + { 'P', 'K', 0x01, 0x02 }; + ZipDirent *dirent; + guint8 const *data; + guint16 name_len, extras_len, comment_len, compr_method; + guint32 crc32, csize, usize, off; + gchar *name; + + /* Read data and check the header */ + if (gsf_input_seek (zip->input, *offset, GSF_SEEK_SET) || + NULL == (data = gsf_input_read (zip->input, ZIP_DIRENT_SIZE, NULL)) || + 0 != memcmp (data, dirent_signature, sizeof (dirent_signature))) { + return NULL; + } + + name_len = GSF_LE_GET_GUINT16 (data + ZIP_DIRENT_NAME_SIZE); + extras_len = GSF_LE_GET_GUINT16 (data + ZIP_DIRENT_EXTRAS_SIZE); + comment_len = GSF_LE_GET_GUINT16 (data + ZIP_DIRENT_COMMENT_SIZE); + + compr_method = GSF_LE_GET_GUINT16 (data + ZIP_DIRENT_COMPR_METHOD); + crc32 = GSF_LE_GET_GUINT32 (data + ZIP_DIRENT_CRC32); + csize = GSF_LE_GET_GUINT32 (data + ZIP_DIRENT_CSIZE); + usize = GSF_LE_GET_GUINT32 (data + ZIP_DIRENT_USIZE); + off = GSF_LE_GET_GUINT32 (data + ZIP_DIRENT_OFFSET); + + name = (gchar *) g_malloc ((gulong) (name_len + 1)); + if ((data = gsf_input_read (zip->input, name_len, NULL)) == NULL) { + g_free (name); + return NULL; + } -#endif + memcpy (name, data, name_len); + name[name_len + 1] = '\0'; + + dirent = g_new0 (ZipDirent, 1); + dirent->name = name; + + dirent->compr_method = compr_method; + dirent->crc32 = crc32; + dirent->csize = csize; + dirent->usize = usize; + dirent->offset = off; + dirent->restlen = usize; + dirent->crestlen = csize; + + *offset += ZIP_DIRENT_SIZE + name_len + extras_len + comment_len; + + return dirent; +} + +static void +zip_dirent_free (ZipDirent *dirent) +{ + g_return_if_fail (dirent != NULL); + + (void) inflateEnd (&dirent->stream); + if (dirent->name != NULL) + g_free (dirent->name); + g_free (dirent); +} + +/*****************************************************************************/ +static ZipInfo * +zip_info_ref (ZipInfo *info) +{ + info->ref_count++; + return info; +} static void zip_info_unref (ZipInfo *info) @@ -118,6 +226,28 @@ } /** + * zip_dup : + * @src : + * + * Utility routine to _partially_ replicate a file. It does NOT init the dirent_list. + * + * Return value: the partial duplicate. + **/ +static GsfInfileZip * +zip_dup (GsfInfileZip const *src) +{ + GsfInfileZip *dst; + + g_return_val_if_fail (src != NULL, NULL); + + dst = g_object_new (GSF_INFILE_ZIP_TYPE, NULL); + dst->input = gsf_input_dup (src->input, NULL); + dst->info = zip_info_ref (src->info); + + return dst; +} + +/** * zip_init_info : * @zip : * @err : optionally NULL @@ -130,89 +260,157 @@ static gboolean zip_init_info (GsfInfileZip *zip, GError **err) { - static guint8 const dirent_signature[] = - { 'P', 'K', 0x01, 0x02 }; static guint8 const header_signature[] = { 'P', 'K', 0x03, 0x04 }; - static guint8 const trailer_signature[] = - { 'P', 'K', 0x05, 0x06 }; - guint8 const *header; + guint8 const *header, *trailer; + guint16 entries, i; + guint32 dir_pos; ZipInfo *info; + off_t offset; /* Check the header */ if (gsf_input_seek (zip->input, 0, GSF_SEEK_SET) || NULL == (header = gsf_input_read (zip->input, ZIP_HEADER_SIZE, NULL)) || 0 != memcmp (header, header_signature, sizeof (header_signature))) { - if (err != NULL) - *err = g_error_new (gsf_input_error (), 0, - "No Zip signature"); - return TRUE; + g_set_error (err, gsf_input_error (), 0, "No Zip signature"); } /* Find and check the trailing header */ -#if 0 - for (s = p + maplen-1; (s >= p); s--) - { - if (*s == 'P' - && p+maplen-1-s > sizeof(*trailer)-2 - && ZZIP_DISK_TRAILER_CHECKMAGIC(s)) - { - /* if the file-comment is not present, it happens - that the z_comment field often isn't either */ - if (p+maplen-1-s > sizeof(*trailer)) - { memcpy (trailer, s, sizeof(*trailer)); } - else - { - memcpy (trailer, s, sizeof(*trailer)-2); - trailer->z_comment[0] = 0; trailer->z_comment[1] = 0; - } + offset = zip_find_trailer (zip); + if (offset < 0) { + g_set_error (err, gsf_input_error (), 0, "No Zip trailer"); + return TRUE; + } - { return(0); } - } - } + if (gsf_input_seek (zip->input, offset, GSF_SEEK_SET) || + NULL == (trailer = gsf_input_read (zip->input, ZIP_TRAILER_SIZE, NULL))) { + g_set_error (err, gsf_input_error (), 0, "Error reading Zip signature"); + return TRUE; + } - DBG5("directory = { entries= %d/%d, size= %ld, seek= %ld } ", - ZZIP_GET16(trailer.z_entries), ZZIP_GET16(trailer.z_finalentries), - ZZIP_GET32(trailer.z_rootsize), ZZIP_GET32(trailer.z_rootseek)); - if ((rv = zzip_parse_root_directory(dir->fd, &trailer, &dir->hdr0)) != 0) -#endif + entries = GSF_LE_GET_GUINT16 (trailer + ZIP_TRAILER_ENTRIES); + dir_pos = GSF_LE_GET_GUINT32 (trailer + ZIP_TRAILER_DIR_POS); info = g_new0 (ZipInfo, 1); zip->info = info; -#if 0 - zip->dirent = info->root_dir = zip_dirent_new (zip, 0, NULL); - if (zip->dirent == NULL) - return TRUE; -#endif + info->ref_count = 1; + info->entries = entries; + info->dir_pos = dir_pos; + + /* Read the directory */ + for (i = 0, offset = dir_pos; i < entries; i++) { + ZipDirent *d; + + d = zip_dirent_new (zip, &offset); + if (d == NULL) { + g_set_error (err, gsf_input_error (), 0, "Error reading zip dirent"); + return TRUE; + } + + zip->dirent_list = g_list_append (zip->dirent_list, d); + } return FALSE; } +/* GsfInput class functions */ + static GsfInput * gsf_infile_zip_dup (GsfInput *src_input, GError **err) { GsfInfileZip const *src = GSF_INFILE_ZIP (src_input); - GsfInfileZip *dst = NULL; + GsfInfileZip *dst = zip_dup (src); + + if (dst == NULL) { + g_set_error (err, gsf_input_error (), 0, "Something went wrong in zip_dup."); + return NULL; + } + + dst->dirent = src->dirent; return GSF_INPUT (dst); } +static gboolean +zip_update_stream_in (GsfInfileZip const *zip) +{ + ZipDirent *dirent; + guint32 read_now; + guint8 const *data; + + dirent = zip->dirent; + + if (dirent->crestlen == 0) + return FALSE; + + read_now = (dirent->crestlen > ZIP_BLOCK_SIZE) ? ZIP_BLOCK_SIZE : dirent->crestlen; + + gsf_input_seek (zip->input, (off_t) (dirent->data_offset + dirent->stream.total_in), GSF_SEEK_SET); + if ((data = gsf_input_read (zip->input, read_now, NULL)) == NULL) + return FALSE; + + dirent->crestlen -= read_now; + dirent->stream.next_in = (unsigned char *) data; /* next input byte */ + dirent->stream.avail_in = read_now; /* number of bytes available at next_in */ + + return TRUE; +} + static guint8 const * gsf_infile_zip_read (GsfInput *input, size_t num_bytes, guint8 *buffer) { GsfInfileZip *zip = GSF_INFILE_ZIP (input); + ZipDirent *dirent = zip->dirent; + + static guint8 *buf; + + if (buf == NULL) + buf = g_malloc (256); -#if 0 - if (zip->stream.buf_size < num_bytes) { - if (zip->stream.buf != NULL) - g_free (zip->stream.buf); - zip->stream.buf_size = num_bytes; - zip->stream.buf = g_malloc (num_bytes); + if (dirent->restlen < num_bytes) + return NULL; + + switch (dirent->compr_method) { + case ZIP_STORED: + dirent->restlen -= num_bytes; + return gsf_input_read (input, num_bytes, buffer); + break; + case ZIP_DEFLATED: + + if (buffer == NULL) + buffer = buf; + + dirent->stream.avail_out = num_bytes; + dirent->stream.next_out = (unsigned char *)buffer; + + do { + int err; + int startlen; + + if (dirent->crestlen > 0 && dirent->stream.avail_in == 0) + if (!zip_update_stream_in (zip)) + break; + + startlen = dirent->stream.total_out; + err = inflate(&dirent->stream, Z_NO_FLUSH); + + if (err == Z_STREAM_END) + dirent->restlen = 0; + else if (err == Z_OK) + dirent->restlen -= (dirent->stream.total_out - startlen); + else + break; + + } while (dirent->restlen && dirent->stream.avail_out); + + return buffer; + break; + + default: + break; } - return zip->stream.buf; -#endif return NULL; } @@ -220,13 +418,79 @@ gsf_infile_zip_seek (GsfInput *input, off_t offset, GsfOff_t whence) { GsfInfileZip *zip = GSF_INFILE_ZIP (input); + + (void) zip; + (void) offset; + (void) whence; + return FALSE; } +/* GsfInfile class functions */ + +/*****************************************************************************/ + + +static GsfInput * +gsf_infile_zip_new_child (GsfInfileZip *parent, ZipDirent *dirent) +{ + GsfInfileZip *child; + static guint8 const header_signature[] = + { 'P', 'K', 0x03, 0x04 }; + guint8 const *data; + guint16 name_len, extras_len; + + child = zip_dup (parent); + + gsf_input_set_size (GSF_INPUT (child), dirent->usize); + gsf_input_set_name (GSF_INPUT (child), dirent->name); + gsf_input_set_container (GSF_INPUT (child), GSF_INFILE (parent)); + + child->dirent = dirent; + + /* skip local header + * should test tons of other info, but trust that those are correct + **/ + + if (gsf_input_seek (child->input, dirent->offset, GSF_SEEK_SET) || + NULL == (data = gsf_input_read (child->input, ZIP_FILE_HEADER_SIZE, NULL)) || + 0 != memcmp (data, header_signature, sizeof (header_signature))) { + g_object_unref (child); + return NULL; + } + + name_len = GSF_LE_GET_GUINT16 (data + ZIP_FILE_HEADER_NAME_SIZE); + extras_len = GSF_LE_GET_GUINT16 (data + ZIP_FILE_HEADER_EXTRAS_SIZE); + + dirent->data_offset = dirent->offset + ZIP_FILE_HEADER_SIZE + name_len + extras_len; + + if (dirent->compr_method != ZIP_STORED) { + int err; + + if (!zip_update_stream_in (child)) { + g_object_unref (child); + return NULL; + } + + err = inflateInit2 (&child->dirent->stream, -MAX_WBITS); + if (err != Z_OK) { + g_object_unref (child); + return NULL; + } + } + + return GSF_INPUT (child); +} + static GsfInput * gsf_infile_zip_child_by_index (GsfInfile *infile, int target) { GsfInfileZip *zip = GSF_INFILE_ZIP (infile); + GList *p; + + for (p = zip->dirent_list; p != NULL; p = p->next) + if (target-- <= 0) + return gsf_infile_zip_new_child (zip, p->data); return NULL; } @@ -234,6 +498,11 @@ gsf_infile_zip_name_by_index (GsfInfile *infile, int target) { GsfInfileZip *zip = GSF_INFILE_ZIP (infile); + GList *p; + + for (p = zip->dirent_list; p != NULL; p = p->next) + if (target-- <= 0) + return ((ZipDirent *)p->data)->name; return NULL; } @@ -241,6 +510,13 @@ gsf_infile_zip_child_by_name (GsfInfile *infile, char const *name) { GsfInfileZip *zip = GSF_INFILE_ZIP (infile); + GList *p; + + for (p = zip->dirent_list; p != NULL; p = p->next) { + ZipDirent *dirent = p->data; + if (dirent->name != NULL && !strcmp (name, dirent->name)) + return gsf_infile_zip_new_child (zip, p->data); + } return NULL; } @@ -248,18 +524,30 @@ gsf_infile_zip_num_children (GsfInfile *infile) { GsfInfileZip *zip = GSF_INFILE_ZIP (infile); - return 0; + + return g_list_length (zip->dirent_list); } + static void gsf_infile_zip_finalize (GObject *obj) { GObjectClass *parent_class; GsfInfileZip *zip = GSF_INFILE_ZIP (obj); + GList *p; + if (zip->input != NULL) { + g_object_unref (G_OBJECT (zip->input)); + zip->input = NULL; + } if (zip->info != NULL) { zip_info_unref (zip->info); zip->info = NULL; } + for (p = zip->dirent_list; p != NULL; p = p->next) + zip_dirent_free ((ZipDirent *) p->data); + + g_list_free (zip->dirent_list); + parent_class = g_type_class_peek (GSF_INFILE_TYPE); if (parent_class && parent_class->finalize) parent_class->finalize (obj); @@ -269,8 +557,9 @@ gsf_infile_zip_init (GObject *obj) { GsfInfileZip *zip = (GsfInfileZip *)obj; - zip->input = NULL; - zip->info = NULL; + zip->input = NULL; + zip->info = NULL; + zip->dirent = NULL; } static void @@ -321,14 +610,4 @@ } return GSF_INFILE (zip); -} - -/** - * gsf_infile_zip_get_ooname : - * @input : - **/ -char const * -gsf_infile_zip_get_ooname (GsfInput *input) -{ - return NULL; } Index: gsf/gsf-infile-zip.h =================================================================== RCS file: /cvs/gnome/libgsf/gsf/gsf-infile-zip.h,v retrieving revision 1.3 diff -u -r1.3 gsf-infile-zip.h --- gsf/gsf-infile-zip.h 12 Jun 2002 14:57:27 -0000 1.3 +++ gsf/gsf-infile-zip.h 25 Jul 2002 10:54:06 -0000 @@ -35,7 +35,7 @@ GType gsf_infile_zip_get_type (void); GsfInfile *gsf_infile_zip_new (GsfInput *source, GError **err); -char const *gsf_infile_zip_get_ooname (GsfInput *input); + G_END_DECLS Index: tests/Makefile.am =================================================================== RCS file: /cvs/gnome/libgsf/tests/Makefile.am,v retrieving revision 1.3 diff -u -r1.3 Makefile.am --- tests/Makefile.am 6 Jul 2002 06:20:40 -0000 1.3 +++ tests/Makefile.am 25 Jul 2002 10:54:06 -0000 @@ -2,7 +2,7 @@ check_PROGRAMS = test-msole1 test-msole2 test-cp-msole test-msvba \ test-zip1 test-input1 test-gzip1 \ - test-textline + test-textline test-ls-zip test-cat-zip ldadd = $(top_builddir)/gsf/libgsf-1.la $(LIBGSF_LIBS) @@ -10,7 +10,9 @@ test_msole2_LDADD = $(ldadd) test_cp_msole_LDADD = $(ldadd) test_msvba_LDADD = $(ldadd) -test_zip1_LDADD = $(ldadd) +test_zip1_LDADD = $(ldadd) test_input1_LDADD = $(ldadd) test_gzip1_LDADD = $(ldadd) test_textline_LDADD = $(ldadd) +test_ls_zip_LDADD = $(ldadd) +test_cat_zip_LDADD = $(ldadd)