1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2011, 2013, 2014 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
27 #include <libpspp/assertion.h>
28 #include <libpspp/compiler.h>
35 #include "zip-reader.h"
36 #include "zip-private.h"
39 #define _(msgid) gettext (msgid)
40 #define N_(msgid) (msgid)
44 FILE *fp; /* The stream from which the data is read */
45 uint32_t offset; /* Starting offset in file. */
46 uint32_t comp_size; /* Length of member file data, in bytes. */
47 uint32_t ucomp_size; /* Uncompressed length of member file data, in bytes. */
48 uint32_t expected_crc; /* CRC-32 of member file data.. */
49 char *name; /* Name of member file. */
51 const struct decompressor *decompressor;
53 size_t bytes_unread; /* Number of bytes left in the member available for reading */
55 struct string *errmsgs; /* A string to hold error messages.
56 This string is NOT owned by this object. */
62 bool (*init) (struct zip_member *);
63 int (*read) (struct zip_member *, void *, size_t);
64 void (*finish) (struct zip_member *);
66 static const struct decompressor stored_decompressor;
67 static const struct decompressor inflate_decompressor;
69 static bool find_eocd (FILE *fp, off_t *off);
71 static const struct decompressor *
72 get_decompressor (struct zip_member *zm, uint16_t c)
78 return &stored_decompressor;
81 return &inflate_decompressor;
84 ds_put_format (zm->errmsgs, _("Unsupported compression type (%d)"), c);
92 char *filename; /* The name of the file from which the data is read */
93 FILE *fr; /* The stream from which the meta data is read */
94 uint16_t n_members; /* The number of members in this archive */
95 struct zip_member **members; /* The members (may be null pointers until the headers have been read */
101 zip_member_finish (struct zip_member *zm)
103 ds_clear (zm->errmsgs);
104 /* Probably not useful, because we would have to read right to the end of the member
105 if (zm->expected_crc != zm->crc)
107 ds_put_cstr (zm->errs, _("CRC error reading zip"));
110 zip_member_unref (zm);
115 /* Destroy the zip reader */
117 zip_reader_destroy (struct zip_reader *zr)
126 for (i = 0; i < zr->n_members; ++i)
128 zip_member_unref (zr->members[i]);
136 zm_dump (const struct zip_member *zm)
138 printf ("%d\t%08x\t %s\n", zm->ucomp_size, zm->expected_crc, zm->name);
142 /* Skip N bytes in F */
144 skip_bytes (FILE *f, size_t n)
146 fseeko (f, n, SEEK_CUR);
149 static bool get_bytes (FILE *f, void *x, size_t n) WARN_UNUSED_RESULT;
152 /* Read N bytes from F, storing the result in X */
154 get_bytes (FILE *f, void *x, size_t n)
156 return (n == fread (x, 1, n, f));
159 static bool get_u32 (FILE *f, uint32_t *v) WARN_UNUSED_RESULT;
162 /* Read a 32 bit value from F */
164 get_u32 (FILE *f, uint32_t *v)
167 if (!get_bytes (f, &x, sizeof x))
169 #ifdef WORDS_BIGENDIAN
177 static bool get_u16 (FILE *f, uint16_t *v) WARN_UNUSED_RESULT;
180 /* Read a 16 bit value from F */
182 get_u16 (FILE *f, uint16_t *v)
185 if (!get_bytes (f, &x, sizeof x))
187 #ifdef WORDS_BIGENDIAN
196 /* Read 32 bit integer and compare it with EXPECTED.
197 place an error string in ERR if necessary. */
199 check_magic (FILE *f, uint32_t expected, struct string *err)
203 if (! get_u32 (f, &magic)) return false;
205 if ((expected != magic))
208 _("Corrupt file at 0x%llx: Expected %"PRIx32"; got %"PRIx32),
209 (long long int) ftello (f) - sizeof (uint32_t), expected, magic);
217 /* Reads upto BYTES bytes from ZM and puts them in BUF.
218 Returns the number of bytes read, or -1 on error */
220 zip_member_read (struct zip_member *zm, void *buf, size_t bytes)
224 ds_clear (zm->errmsgs);
226 if ( bytes > zm->bytes_unread)
227 bytes = zm->bytes_unread;
229 bytes_read = zm->decompressor->read (zm, buf, bytes);
233 zm->crc = crc32_update (zm->crc, buf, bytes_read);
235 zm->bytes_unread -= bytes_read;
242 Read a local file header from ZR and add it to ZR's internal array.
243 Returns a pointer to the member read. This pointer belongs to ZR.
244 If the caller wishes to control it, she should ref it with
247 static struct zip_member *
248 zip_header_read_next (struct zip_reader *zr)
250 struct zip_member *zm = xzalloc (sizeof *zm);
252 uint16_t v, nlen, extralen;
253 uint16_t gp, time, date;
255 uint16_t clen, diskstart, iattr;
260 zm->errmsgs = zr->errs;
262 if ( ! check_magic (zr->fr, MAGIC_SOCD, zr->errs))
265 if (! get_u16 (zr->fr, &v)) return NULL;
267 if (! get_u16 (zr->fr, &v)) return NULL;
268 if (! get_u16 (zr->fr, &gp)) return NULL;
269 if (! get_u16 (zr->fr, &comp_type)) return NULL;
271 zm->decompressor = get_decompressor (zm, comp_type);
272 if (! zm->decompressor) return NULL;
274 if (! get_u16 (zr->fr, &time)) return NULL;
275 if (! get_u16 (zr->fr, &date)) return NULL;
276 if (! get_u32 (zr->fr, &zm->expected_crc)) return NULL;
277 if (! get_u32 (zr->fr, &zm->comp_size)) return NULL;
278 if (! get_u32 (zr->fr, &zm->ucomp_size)) return NULL;
279 if (! get_u16 (zr->fr, &nlen)) return NULL;
280 if (! get_u16 (zr->fr, &extralen)) return NULL;
281 if (! get_u16 (zr->fr, &clen)) return NULL;
282 if (! get_u16 (zr->fr, &diskstart)) return NULL;
283 if (! get_u16 (zr->fr, &iattr)) return NULL;
284 if (! get_u32 (zr->fr, &eattr)) return NULL;
285 if (! get_u32 (zr->fr, &zm->offset)) return NULL;
287 zm->name = xzalloc (nlen + 1);
288 if (! get_bytes (zr->fr, zm->name, nlen)) return NULL;
290 skip_bytes (zr->fr, extralen);
292 zr->members[zr->nm++] = zm;
294 zm->fp = fopen (zr->filename, "rb");
302 /* Create a reader from the zip called FILENAME */
304 zip_reader_create (const char *filename, struct string *errs)
306 uint16_t disknum, total_members;
308 uint32_t central_dir_start, central_dir_length;
310 struct zip_reader *zr = xzalloc (sizeof *zr);
313 ds_init_empty (zr->errs);
317 zr->fr = fopen (filename, "rb");
320 ds_put_cstr (zr->errs, strerror (errno));
325 if ( ! check_magic (zr->fr, MAGIC_LHDR, zr->errs))
332 if ( ! find_eocd (zr->fr, &offset))
334 ds_put_format (zr->errs, _("Cannot find central directory"));
340 if ( 0 != fseeko (zr->fr, offset, SEEK_SET))
342 const char *mm = strerror (errno);
343 ds_put_format (zr->errs, _("Failed to seek to end of central directory record: %s"), mm);
350 if ( ! check_magic (zr->fr, MAGIC_EOCD, zr->errs))
357 if (! get_u16 (zr->fr, &disknum)
358 || ! get_u16 (zr->fr, &disknum)
360 || ! get_u16 (zr->fr, &zr->n_members)
361 || ! get_u16 (zr->fr, &total_members)
363 || ! get_u32 (zr->fr, ¢ral_dir_length)
364 || ! get_u32 (zr->fr, ¢ral_dir_start))
371 if ( 0 != fseeko (zr->fr, central_dir_start, SEEK_SET))
373 const char *mm = strerror (errno);
374 ds_put_format (zr->errs, _("Failed to seek to central directory: %s"), mm);
380 zr->members = xcalloc (zr->n_members, sizeof (*zr->members));
381 memset (zr->members, 0, zr->n_members * sizeof (*zr->members));
383 zr->filename = xstrdup (filename);
390 /* Return the member called MEMBER from the reader ZR */
392 zip_member_open (struct zip_reader *zr, const char *member)
394 uint16_t v, nlen, extra_len;
395 uint16_t gp, comp_type, time, date;
396 uint32_t ucomp_size, comp_size;
399 bool new_member = false;
403 struct zip_member *zm = NULL;
408 for (i = 0; i < zr->n_members; ++i)
414 zm = zr->members[i] = zip_header_read_next (zr);
417 if (zm && 0 == strcmp (zm->name, member))
426 if ( 0 != fseeko (zm->fp, zm->offset, SEEK_SET))
428 const char *mm = strerror (errno);
429 ds_put_format (zm->errmsgs, _("Failed to seek to start of member `%s': %s"), zm->name, mm);
433 if ( ! check_magic (zm->fp, MAGIC_LHDR, zr->errs))
438 if (! get_u16 (zm->fp, &v)) return NULL;
439 if (! get_u16 (zm->fp, &gp)) return NULL;
440 if (! get_u16 (zm->fp, &comp_type)) return NULL;
441 zm->decompressor = get_decompressor (zm, comp_type);
442 if (! zm->decompressor) return NULL;
443 if (! get_u16 (zm->fp, &time)) return NULL;
444 if (! get_u16 (zm->fp, &date)) return NULL;
445 if (! get_u32 (zm->fp, &crc)) return NULL;
446 if (! get_u32 (zm->fp, &comp_size)) return NULL;
448 if (! get_u32 (zm->fp, &ucomp_size)) return NULL;
449 if (! get_u16 (zm->fp, &nlen)) return NULL;
450 if (! get_u16 (zm->fp, &extra_len)) return NULL;
452 name = xzalloc (nlen + 1);
454 if (! get_bytes (zm->fp, name, nlen)) return NULL;
456 skip_bytes (zm->fp, extra_len);
458 if (strcmp (name, zm->name) != 0)
460 ds_put_format (zm->errmsgs,
461 _("Name mismatch in zip archive. Central directory says `%s'; local file header says `%s'"),
470 zm->bytes_unread = zm->ucomp_size;
473 zm->decompressor->finish (zm);
475 if (!zm->decompressor->init (zm) )
482 zip_member_ref (struct zip_member *zm)
491 zip_member_unref (struct zip_member *zm)
496 if (--zm->ref_cnt == 0)
498 zm->decompressor->finish (zm);
509 static bool probe_magic (FILE *fp, uint32_t magic, off_t start, off_t stop, off_t *off);
512 /* Search for something that looks like the End Of Central Directory in FP.
513 If found, the offset of the record will be placed in OFF.
514 Returns true if found false otherwise.
517 find_eocd (FILE *fp, off_t *off)
520 const uint32_t magic = MAGIC_EOCD;
523 /* The magic cannot be more than 22 bytes from the end of the file,
524 because that is the minimum length of the EndOfCentralDirectory
527 if ( 0 > fseeko (fp, -22, SEEK_END))
532 stop = start + sizeof (magic);
535 found = probe_magic (fp, magic, start, stop, off);
536 /* FIXME: For extra confidence lookup the directory start record here*/
539 stop = start + sizeof (magic);
549 Search FP for MAGIC starting at START and reaching until STOP.
550 Returns true iff MAGIC is found. False otherwise.
551 OFF receives the location of the magic.
554 probe_magic (FILE *fp, uint32_t magic, off_t start, off_t stop, off_t *off)
558 unsigned char seq[4];
561 if ( 0 > fseeko (fp, start, SEEK_SET))
566 for (i = 0; i < 4 ; ++i)
568 seq[i] = (magic >> i * 8) & 0xFF;
573 if (1 != fread (&byte, 1, 1, fp))
576 if ( byte == seq[state])
583 *off = ftello (fp) - 4;
595 /* Null decompressor. */
598 stored_read (struct zip_member *zm, void *buf, size_t n)
600 return fread (buf, 1, n, zm->fp);
604 stored_init (struct zip_member *zm UNUSED)
610 stored_finish (struct zip_member *zm UNUSED)
612 /* Nothing required */
615 static const struct decompressor stored_decompressor =
616 {stored_init, stored_read, stored_finish};
618 /* Inflate decompressor. */
623 #define UCOMPSIZE 4096
629 unsigned char ucomp[UCOMPSIZE];
631 size_t ucomp_bytes_read;
633 /* Two bitfields as defined by RFC1950 */
638 inflate_finish (struct zip_member *zm)
640 struct inflator *inf = zm->aux;
642 inflateEnd (&inf->zss);
648 inflate_init (struct zip_member *zm)
651 struct inflator *inf = xzalloc (sizeof *inf);
654 uint16_t cmf = 0x8; /* Always 8 for inflate */
656 const uint16_t cinfo = 7; /* log_2(Window size) - 8 */
658 cmf |= cinfo << 4; /* Put cinfo into the high nibble */
660 /* make these into a 16 bit word */
661 inf->cmf_flg = (cmf << 8 ) | flg;
663 /* Set the check bits */
664 inf->cmf_flg += 31 - (inf->cmf_flg % 31);
665 assert (inf->cmf_flg % 31 == 0);
667 inf->zss.next_in = Z_NULL;
668 inf->zss.avail_in = 0;
669 inf->zss.zalloc = Z_NULL;
670 inf->zss.zfree = Z_NULL;
671 inf->zss.opaque = Z_NULL;
672 r = inflateInit (&inf->zss);
676 ds_put_format (zm->errmsgs, _("Cannot initialize inflator: %s"), zError (r));
686 inflate_read (struct zip_member *zm, void *buf, size_t n)
689 struct inflator *inf = zm->aux;
691 if (inf->zss.avail_in == 0)
697 if ( inf->state == 0)
699 inf->ucomp[1] = inf->cmf_flg ;
700 inf->ucomp[0] = inf->cmf_flg >> 8 ;
706 bytes_to_read = zm->comp_size - inf->ucomp_bytes_read;
708 if (bytes_to_read == 0)
711 if (bytes_to_read > UCOMPSIZE)
712 bytes_to_read = UCOMPSIZE;
714 bytes_read = fread (inf->ucomp + pad, 1, bytes_to_read - pad, zm->fp);
716 inf->ucomp_bytes_read += bytes_read;
718 inf->zss.avail_in = bytes_read + pad;
719 inf->zss.next_in = inf->ucomp;
721 inf->zss.avail_out = n;
722 inf->zss.next_out = buf;
724 r = inflate (&inf->zss, Z_NO_FLUSH);
727 return n - inf->zss.avail_out;
730 ds_put_format (zm->errmsgs, _("Error inflating: %s"), zError (r));
735 static const struct decompressor inflate_decompressor =
736 {inflate_init, inflate_read, inflate_finish};