1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2011, 2013, 2014 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
27 #include <libpspp/assertion.h>
28 #include <libpspp/compiler.h>
35 #include "zip-reader.h"
36 #include "zip-private.h"
39 #define _(msgid) gettext (msgid)
40 #define N_(msgid) (msgid)
44 FILE *fp; /* The stream from which the data is read */
45 uint32_t offset; /* Starting offset in file. */
46 uint32_t comp_size; /* Length of member file data, in bytes. */
47 uint32_t ucomp_size; /* Uncompressed length of member file data, in bytes. */
48 uint32_t expected_crc; /* CRC-32 of member file data.. */
49 char *name; /* Name of member file. */
51 const struct decompressor *decompressor;
53 size_t bytes_unread; /* Number of bytes left in the member available for reading */
55 struct string *errmsgs; /* A string to hold error messages.
56 This string is NOT owned by this object. */
62 bool (*init) (struct zip_member *);
63 int (*read) (struct zip_member *, void *, size_t);
64 void (*finish) (struct zip_member *);
66 static const struct decompressor stored_decompressor;
67 static const struct decompressor inflate_decompressor;
69 static bool find_eocd (FILE *fp, off_t *off);
71 static const struct decompressor *
72 get_decompressor (struct zip_member *zm, uint16_t c)
78 return &stored_decompressor;
81 return &inflate_decompressor;
84 ds_put_format (zm->errmsgs, _("Unsupported compression type (%d)"), c);
92 char *filename; /* The name of the file from which the data is read */
93 FILE *fr; /* The stream from which the meta data is read */
94 uint16_t n_members; /* The number of members in this archive */
95 struct zip_member **members; /* The members (may be null pointers until the headers have been read */
101 zip_member_finish (struct zip_member *zm)
103 ds_clear (zm->errmsgs);
104 /* Probably not useful, because we would have to read right to the end of the member
105 if (zm->expected_crc != zm->crc)
107 ds_put_cstr (zm->errs, _("CRC error reading zip"));
110 zip_member_unref (zm);
115 /* Destroy the zip reader */
117 zip_reader_destroy (struct zip_reader *zr)
126 for (i = 0; i < zr->n_members; ++i)
128 zip_member_unref (zr->members[i]);
136 zm_dump (const struct zip_member *zm)
138 printf ("%d\t%08x\t %s\n", zm->ucomp_size, zm->expected_crc, zm->name);
142 /* Skip N bytes in F */
144 skip_bytes (FILE *f, size_t n)
146 fseeko (f, n, SEEK_CUR);
149 static bool get_bytes (FILE *f, void *x, size_t n) WARN_UNUSED_RESULT;
152 /* Read N bytes from F, storing the result in X */
154 get_bytes (FILE *f, void *x, size_t n)
156 return (n == fread (x, 1, n, f));
159 static bool get_u32 (FILE *f, uint32_t *v) WARN_UNUSED_RESULT;
162 /* Read a 32 bit value from F */
164 get_u32 (FILE *f, uint32_t *v)
167 if (!get_bytes (f, &x, sizeof x))
169 #ifdef WORDS_BIGENDIAN
177 static bool get_u16 (FILE *f, uint16_t *v) WARN_UNUSED_RESULT;
180 /* Read a 16 bit value from F */
182 get_u16 (FILE *f, uint16_t *v)
185 if (!get_bytes (f, &x, sizeof x))
187 #ifdef WORDS_BIGENDIAN
196 /* Read 32 bit integer and compare it with EXPECTED.
197 place an error string in ERR if necessary. */
199 check_magic (FILE *f, uint32_t expected, struct string *err)
203 if (! get_u32 (f, &magic)) return false;
205 if ((expected != magic))
208 _("Corrupt file at 0x%llx: Expected %"PRIx32"; got %"PRIx32),
209 (long long int) ftello (f) - sizeof (uint32_t), expected, magic);
217 /* Reads upto BYTES bytes from ZM and puts them in BUF.
218 Returns the number of bytes read, or -1 on error */
220 zip_member_read (struct zip_member *zm, void *buf, size_t bytes)
224 ds_clear (zm->errmsgs);
226 if ( bytes > zm->bytes_unread)
227 bytes = zm->bytes_unread;
229 bytes_read = zm->decompressor->read (zm, buf, bytes);
233 zm->crc = crc32_update (zm->crc, buf, bytes_read);
235 zm->bytes_unread -= bytes_read;
242 Read a local file header from ZR and add it to ZR's internal array.
243 Returns a pointer to the member read. This pointer belongs to ZR.
244 If the caller wishes to control it, she should ref it with
247 static struct zip_member *
248 zip_header_read_next (struct zip_reader *zr)
250 struct zip_member *zm = xzalloc (sizeof *zm);
252 uint16_t v, nlen, extralen;
253 uint16_t gp, time, date;
255 uint16_t clen, diskstart, iattr;
260 zm->errmsgs = zr->errs;
262 if ( ! check_magic (zr->fr, MAGIC_SOCD, zr->errs))
265 if (! get_u16 (zr->fr, &v)) return NULL;
267 if (! get_u16 (zr->fr, &v)) return NULL;
268 if (! get_u16 (zr->fr, &gp)) return NULL;
269 if (! get_u16 (zr->fr, &comp_type)) return NULL;
271 zm->decompressor = get_decompressor (zm, comp_type);
272 if (! zm->decompressor) return NULL;
274 if (! get_u16 (zr->fr, &time)) return NULL;
275 if (! get_u16 (zr->fr, &date)) return NULL;
276 if (! get_u32 (zr->fr, &zm->expected_crc)) return NULL;
277 if (! get_u32 (zr->fr, &zm->comp_size)) return NULL;
278 if (! get_u32 (zr->fr, &zm->ucomp_size)) return NULL;
279 if (! get_u16 (zr->fr, &nlen)) return NULL;
280 if (! get_u16 (zr->fr, &extralen)) return NULL;
281 if (! get_u16 (zr->fr, &clen)) return NULL;
282 if (! get_u16 (zr->fr, &diskstart)) return NULL;
283 if (! get_u16 (zr->fr, &iattr)) return NULL;
284 if (! get_u32 (zr->fr, &eattr)) return NULL;
285 if (! get_u32 (zr->fr, &zm->offset)) return NULL;
287 zm->name = xzalloc (nlen + 1);
288 if (! get_bytes (zr->fr, zm->name, nlen)) return NULL;
290 skip_bytes (zr->fr, extralen);
292 zr->members[zr->nm++] = zm;
294 zm->fp = fopen (zr->filename, "rb");
302 /* Create a reader from the zip called FILENAME */
304 zip_reader_create (const char *filename, struct string *errs)
306 uint16_t disknum, total_members;
308 uint32_t central_dir_start, central_dir_length;
310 struct zip_reader *zr = xzalloc (sizeof *zr);
313 ds_init_empty (zr->errs);
317 zr->fr = fopen (filename, "rb");
320 ds_put_cstr (zr->errs, strerror (errno));
325 if ( ! check_magic (zr->fr, MAGIC_LHDR, zr->errs))
332 if ( ! find_eocd (zr->fr, &offset))
334 ds_put_format (zr->errs, _("Cannot find central directory"));
340 if ( 0 != fseeko (zr->fr, offset, SEEK_SET))
342 const char *mm = strerror (errno);
343 ds_put_format (zr->errs, _("Failed to seek to end of central directory record: %s"), mm);
350 if ( ! check_magic (zr->fr, MAGIC_EOCD, zr->errs))
357 if (! get_u16 (zr->fr, &disknum)) return NULL;
358 if (! get_u16 (zr->fr, &disknum)) return NULL;
360 if (! get_u16 (zr->fr, &zr->n_members)) return NULL;
361 if (! get_u16 (zr->fr, &total_members)) return NULL;
363 if (! get_u32 (zr->fr, ¢ral_dir_length)) return NULL;
364 if (! get_u32 (zr->fr, ¢ral_dir_start)) return NULL;
366 if ( 0 != fseeko (zr->fr, central_dir_start, SEEK_SET))
368 const char *mm = strerror (errno);
369 ds_put_format (zr->errs, _("Failed to seek to central directory: %s"), mm);
375 zr->members = xcalloc (zr->n_members, sizeof (*zr->members));
376 memset (zr->members, 0, zr->n_members * sizeof (*zr->members));
378 zr->filename = xstrdup (filename);
385 /* Return the member called MEMBER from the reader ZR */
387 zip_member_open (struct zip_reader *zr, const char *member)
389 uint16_t v, nlen, extra_len;
390 uint16_t gp, comp_type, time, date;
391 uint32_t ucomp_size, comp_size;
394 bool new_member = false;
398 struct zip_member *zm = NULL;
403 for (i = 0; i < zr->n_members; ++i)
409 zm = zr->members[i] = zip_header_read_next (zr);
412 if (zm && 0 == strcmp (zm->name, member))
421 if ( 0 != fseeko (zm->fp, zm->offset, SEEK_SET))
423 const char *mm = strerror (errno);
424 ds_put_format (zm->errmsgs, _("Failed to seek to start of member `%s': %s"), zm->name, mm);
428 if ( ! check_magic (zm->fp, MAGIC_LHDR, zr->errs))
433 if (! get_u16 (zm->fp, &v)) return NULL;
434 if (! get_u16 (zm->fp, &gp)) return NULL;
435 if (! get_u16 (zm->fp, &comp_type)) return NULL;
436 zm->decompressor = get_decompressor (zm, comp_type);
437 if (! zm->decompressor) return NULL;
438 if (! get_u16 (zm->fp, &time)) return NULL;
439 if (! get_u16 (zm->fp, &date)) return NULL;
440 if (! get_u32 (zm->fp, &crc)) return NULL;
441 if (! get_u32 (zm->fp, &comp_size)) return NULL;
443 if (! get_u32 (zm->fp, &ucomp_size)) return NULL;
444 if (! get_u16 (zm->fp, &nlen)) return NULL;
445 if (! get_u16 (zm->fp, &extra_len)) return NULL;
447 name = xzalloc (nlen + 1);
449 if (! get_bytes (zm->fp, name, nlen)) return NULL;
451 skip_bytes (zm->fp, extra_len);
453 if (strcmp (name, zm->name) != 0)
455 ds_put_format (zm->errmsgs,
456 _("Name mismatch in zip archive. Central directory says `%s'; local file header says `%s'"),
465 zm->bytes_unread = zm->ucomp_size;
468 zm->decompressor->finish (zm);
470 if (!zm->decompressor->init (zm) )
477 zip_member_ref (struct zip_member *zm)
486 zip_member_unref (struct zip_member *zm)
491 if (--zm->ref_cnt == 0)
493 zm->decompressor->finish (zm);
504 static bool probe_magic (FILE *fp, uint32_t magic, off_t start, off_t stop, off_t *off);
507 /* Search for something that looks like the End Of Central Directory in FP.
508 If found, the offset of the record will be placed in OFF.
509 Returns true if found false otherwise.
512 find_eocd (FILE *fp, off_t *off)
515 const uint32_t magic = MAGIC_EOCD;
518 /* The magic cannot be more than 22 bytes from the end of the file,
519 because that is the minimum length of the EndOfCentralDirectory
522 if ( 0 > fseeko (fp, -22, SEEK_END))
527 stop = start + sizeof (magic);
530 found = probe_magic (fp, magic, start, stop, off);
531 /* FIXME: For extra confidence lookup the directory start record here*/
534 stop = start + sizeof (magic);
544 Search FP for MAGIC starting at START and reaching until STOP.
545 Returns true iff MAGIC is found. False otherwise.
546 OFF receives the location of the magic.
549 probe_magic (FILE *fp, uint32_t magic, off_t start, off_t stop, off_t *off)
553 unsigned char seq[4];
556 if ( 0 > fseeko (fp, start, SEEK_SET))
561 for (i = 0; i < 4 ; ++i)
563 seq[i] = (magic >> i * 8) & 0xFF;
568 if (1 != fread (&byte, 1, 1, fp))
571 if ( byte == seq[state])
578 *off = ftello (fp) - 4;
590 /* Null decompressor. */
593 stored_read (struct zip_member *zm, void *buf, size_t n)
595 return fread (buf, 1, n, zm->fp);
599 stored_init (struct zip_member *zm UNUSED)
605 stored_finish (struct zip_member *zm UNUSED)
607 /* Nothing required */
610 static const struct decompressor stored_decompressor =
611 {stored_init, stored_read, stored_finish};
613 /* Inflate decompressor. */
618 #define UCOMPSIZE 4096
624 unsigned char ucomp[UCOMPSIZE];
626 size_t ucomp_bytes_read;
628 /* Two bitfields as defined by RFC1950 */
633 inflate_finish (struct zip_member *zm)
635 struct inflator *inf = zm->aux;
637 inflateEnd (&inf->zss);
643 inflate_init (struct zip_member *zm)
646 struct inflator *inf = xzalloc (sizeof *inf);
649 uint16_t cmf = 0x8; /* Always 8 for inflate */
651 const uint16_t cinfo = 7; /* log_2(Window size) - 8 */
653 cmf |= cinfo << 4; /* Put cinfo into the high nibble */
655 /* make these into a 16 bit word */
656 inf->cmf_flg = (cmf << 8 ) | flg;
658 /* Set the check bits */
659 inf->cmf_flg += 31 - (inf->cmf_flg % 31);
660 assert (inf->cmf_flg % 31 == 0);
662 inf->zss.next_in = Z_NULL;
663 inf->zss.avail_in = 0;
664 inf->zss.zalloc = Z_NULL;
665 inf->zss.zfree = Z_NULL;
666 inf->zss.opaque = Z_NULL;
667 r = inflateInit (&inf->zss);
671 ds_put_format (zm->errmsgs, _("Cannot initialize inflator: %s"), zError (r));
681 inflate_read (struct zip_member *zm, void *buf, size_t n)
684 struct inflator *inf = zm->aux;
686 if (inf->zss.avail_in == 0)
692 if ( inf->state == 0)
694 inf->ucomp[1] = inf->cmf_flg ;
695 inf->ucomp[0] = inf->cmf_flg >> 8 ;
701 bytes_to_read = zm->comp_size - inf->ucomp_bytes_read;
703 if (bytes_to_read == 0)
706 if (bytes_to_read > UCOMPSIZE)
707 bytes_to_read = UCOMPSIZE;
709 bytes_read = fread (inf->ucomp + pad, 1, bytes_to_read - pad, zm->fp);
711 inf->ucomp_bytes_read += bytes_read;
713 inf->zss.avail_in = bytes_read + pad;
714 inf->zss.next_in = inf->ucomp;
716 inf->zss.avail_out = n;
717 inf->zss.next_out = buf;
719 r = inflate (&inf->zss, Z_NO_FLUSH);
722 return n - inf->zss.avail_out;
725 ds_put_format (zm->errmsgs, _("Error inflating: %s"), zError (r));
730 static const struct decompressor inflate_decompressor =
731 {inflate_init, inflate_read, inflate_finish};