1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2011, 2013, 2014 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
27 #include <libpspp/assertion.h>
28 #include <libpspp/compiler.h>
34 #include "zip-reader.h"
35 #include "zip-private.h"
38 #define _(msgid) gettext (msgid)
39 #define N_(msgid) (msgid)
43 FILE *fp; /* The stream from which the data is read */
44 uint32_t offset; /* Starting offset in file. */
45 uint32_t comp_size; /* Length of member file data, in bytes. */
46 uint32_t ucomp_size; /* Uncompressed length of member file data, in bytes. */
47 const struct decompressor *decompressor;
49 size_t bytes_unread; /* Number of bytes left in the member available for reading */
50 struct string *errmsgs; /* A string to hold error messages.
51 This string is NOT owned by this object. */
57 bool (*init) (struct zip_member *);
58 int (*read) (struct zip_member *, void *, size_t);
59 void (*finish) (struct zip_member *);
61 static const struct decompressor stored_decompressor;
62 static const struct decompressor inflate_decompressor;
64 static bool find_eocd (FILE *fp, off_t *off);
66 static const struct decompressor *
67 get_decompressor (uint16_t c)
72 return &stored_decompressor;
75 return &inflate_decompressor;
84 char *filename; /* The name of the file from which the data is read */
85 FILE *fr; /* The stream from which the meta data is read */
86 uint16_t n_entries; /* Number of directory entries. */
87 struct zip_entry *entries; /* Directory entries. */
93 uint32_t offset; /* Starting offset in file. */
94 uint32_t comp_size; /* Length of member file data, in bytes. */
95 uint32_t ucomp_size; /* Uncompressed length of member file data, in bytes. */
96 char *name; /* Name of member file. */
100 zip_member_finish (struct zip_member *zm)
104 ds_clear (zm->errmsgs);
105 zm->decompressor->finish (zm);
111 /* Destroy the zip reader */
113 zip_reader_destroy (struct zip_reader *zr)
122 for (i = 0; i < zr->n_entries; ++i)
124 struct zip_entry *ze = &zr->entries[i];
132 /* Skip N bytes in F */
134 skip_bytes (FILE *f, size_t n)
136 fseeko (f, n, SEEK_CUR);
139 static bool get_bytes (FILE *f, void *x, size_t n) WARN_UNUSED_RESULT;
142 /* Read N bytes from F, storing the result in X */
144 get_bytes (FILE *f, void *x, size_t n)
146 return (n == fread (x, 1, n, f));
149 static bool get_u32 (FILE *f, uint32_t *v) WARN_UNUSED_RESULT;
152 /* Read a 32 bit value from F */
154 get_u32 (FILE *f, uint32_t *v)
157 if (!get_bytes (f, &x, sizeof x))
159 #ifdef WORDS_BIGENDIAN
167 static bool get_u16 (FILE *f, uint16_t *v) WARN_UNUSED_RESULT;
170 /* Read a 16 bit value from F */
172 get_u16 (FILE *f, uint16_t *v)
175 if (!get_bytes (f, &x, sizeof x))
177 #ifdef WORDS_BIGENDIAN
186 /* Read 32 bit integer and compare it with EXPECTED.
187 place an error string in ERR if necessary. */
189 check_magic (FILE *f, uint32_t expected, struct string *err)
193 if (! get_u32 (f, &magic)) return false;
195 if ((expected != magic))
198 _("Corrupt file at 0x%llx: Expected %"PRIx32"; got %"PRIx32),
199 (long long int) ftello (f) - sizeof (uint32_t), expected, magic);
207 /* Reads upto BYTES bytes from ZM and puts them in BUF.
208 Returns the number of bytes read, or -1 on error */
210 zip_member_read (struct zip_member *zm, void *buf, size_t bytes)
214 ds_clear (zm->errmsgs);
216 if ( bytes > zm->bytes_unread)
217 bytes = zm->bytes_unread;
219 bytes_read = zm->decompressor->read (zm, buf, bytes);
223 zm->bytes_unread -= bytes_read;
229 /* Read a central directory header from ZR and initializes ZE with it.
230 Returns true if successful, false otherwise. */
232 zip_header_read_next (struct zip_reader *zr, struct zip_entry *ze)
234 uint16_t v, nlen, extralen;
235 uint16_t gp, time, date;
236 uint32_t expected_crc;
238 uint16_t clen, diskstart, iattr;
242 if ( ! check_magic (zr->fr, MAGIC_SOCD, zr->errs))
245 if (! get_u16 (zr->fr, &v)) return false;
246 if (! get_u16 (zr->fr, &v)) return false;
247 if (! get_u16 (zr->fr, &gp)) return false;
248 if (! get_u16 (zr->fr, &comp_type)) return false;
249 if (! get_u16 (zr->fr, &time)) return false;
250 if (! get_u16 (zr->fr, &date)) return false;
251 if (! get_u32 (zr->fr, &expected_crc)) return false;
252 if (! get_u32 (zr->fr, &ze->comp_size)) return false;
253 if (! get_u32 (zr->fr, &ze->ucomp_size)) return false;
254 if (! get_u16 (zr->fr, &nlen)) return false;
255 if (! get_u16 (zr->fr, &extralen)) return false;
256 if (! get_u16 (zr->fr, &clen)) return false;
257 if (! get_u16 (zr->fr, &diskstart)) return false;
258 if (! get_u16 (zr->fr, &iattr)) return false;
259 if (! get_u32 (zr->fr, &eattr)) return false;
260 if (! get_u32 (zr->fr, &ze->offset)) return false;
262 ze->name = xzalloc (nlen + 1);
263 if (! get_bytes (zr->fr, ze->name, nlen)) return false;
265 skip_bytes (zr->fr, extralen);
271 /* Create a reader from the zip called FILENAME */
273 zip_reader_create (const char *filename, struct string *errs)
275 uint16_t disknum, n_members, total_members;
277 uint32_t central_dir_start, central_dir_length;
279 struct zip_reader *zr = xzalloc (sizeof *zr);
282 ds_init_empty (zr->errs);
284 zr->fr = fopen (filename, "rb");
287 ds_put_cstr (zr->errs, strerror (errno));
292 if ( ! check_magic (zr->fr, MAGIC_LHDR, zr->errs))
299 if ( ! find_eocd (zr->fr, &offset))
301 ds_put_format (zr->errs, _("Cannot find central directory"));
307 if ( 0 != fseeko (zr->fr, offset, SEEK_SET))
309 const char *mm = strerror (errno);
310 ds_put_format (zr->errs, _("Failed to seek to end of central directory record: %s"), mm);
317 if ( ! check_magic (zr->fr, MAGIC_EOCD, zr->errs))
324 if (! get_u16 (zr->fr, &disknum)
325 || ! get_u16 (zr->fr, &disknum)
327 || ! get_u16 (zr->fr, &n_members)
328 || ! get_u16 (zr->fr, &total_members)
330 || ! get_u32 (zr->fr, ¢ral_dir_length)
331 || ! get_u32 (zr->fr, ¢ral_dir_start))
338 if ( 0 != fseeko (zr->fr, central_dir_start, SEEK_SET))
340 const char *mm = strerror (errno);
341 ds_put_format (zr->errs, _("Failed to seek to central directory: %s"), mm);
347 zr->filename = xstrdup (filename);
349 zr->entries = xcalloc (n_members, sizeof *zr->entries);
350 for (int i = 0; i < n_members; i++)
352 if (!zip_header_read_next (zr, &zr->entries[zr->n_entries]))
354 zip_reader_destroy (zr);
363 static struct zip_entry *
364 zip_entry_find (struct zip_reader *zr, const char *member)
366 for (int i = 0; i < zr->n_entries; ++i)
368 struct zip_entry *ze = &zr->entries[i];
369 if (0 == strcmp (ze->name, member))
375 /* Return the member called MEMBER from the reader ZR */
377 zip_member_open (struct zip_reader *zr, const char *member)
379 struct zip_entry *ze = zip_entry_find (zr, member);
382 ds_put_format (zr->errs, _("%s: unknown member"), member);
386 FILE *fp = fopen (zr->filename, "rb");
389 ds_put_cstr (zr->errs, strerror (errno));
393 struct zip_member *zm = xmalloc (sizeof *zm);
395 zm->offset = ze->offset;
396 zm->comp_size = ze->comp_size;
397 zm->ucomp_size = ze->ucomp_size;
398 zm->decompressor = NULL;
399 zm->bytes_unread = ze->ucomp_size;
400 zm->errmsgs = zr->errs;
403 if ( 0 != fseeko (zm->fp, zm->offset, SEEK_SET))
405 ds_put_format (zr->errs, _("Failed to seek to start of member `%s': %s"),
406 ze->name, strerror (errno));
410 if ( ! check_magic (zm->fp, MAGIC_LHDR, zr->errs))
413 uint16_t v, nlen, extra_len;
414 uint16_t gp, comp_type, time, date;
415 uint32_t ucomp_size, comp_size;
417 if (! get_u16 (zm->fp, &v)) goto error;
418 if (! get_u16 (zm->fp, &gp)) goto error;
419 if (! get_u16 (zm->fp, &comp_type)) goto error;
420 zm->decompressor = get_decompressor (comp_type);
421 if (! zm->decompressor) goto error;
422 if (! get_u16 (zm->fp, &time)) goto error;
423 if (! get_u16 (zm->fp, &date)) goto error;
424 if (! get_u32 (zm->fp, &crc)) goto error;
425 if (! get_u32 (zm->fp, &comp_size)) goto error;
427 if (! get_u32 (zm->fp, &ucomp_size)) goto error;
428 if (! get_u16 (zm->fp, &nlen)) goto error;
429 if (! get_u16 (zm->fp, &extra_len)) goto error;
431 char *name = xzalloc (nlen + 1);
432 if (! get_bytes (zm->fp, name, nlen))
437 if (strcmp (name, ze->name) != 0)
439 ds_put_format (zm->errmsgs,
440 _("Name mismatch in zip archive. Central directory "
441 "says `%s'; local file header says `%s'"),
448 skip_bytes (zm->fp, extra_len);
450 if (!zm->decompressor->init (zm) )
463 static bool probe_magic (FILE *fp, uint32_t magic, off_t start, off_t stop, off_t *off);
466 /* Search for something that looks like the End Of Central Directory in FP.
467 If found, the offset of the record will be placed in OFF.
468 Returns true if found false otherwise.
471 find_eocd (FILE *fp, off_t *off)
474 const uint32_t magic = MAGIC_EOCD;
477 /* The magic cannot be more than 22 bytes from the end of the file,
478 because that is the minimum length of the EndOfCentralDirectory
481 if ( 0 > fseeko (fp, -22, SEEK_END))
486 stop = start + sizeof (magic);
489 found = probe_magic (fp, magic, start, stop, off);
490 /* FIXME: For extra confidence lookup the directory start record here*/
493 stop = start + sizeof (magic);
503 Search FP for MAGIC starting at START and reaching until STOP.
504 Returns true iff MAGIC is found. False otherwise.
505 OFF receives the location of the magic.
508 probe_magic (FILE *fp, uint32_t magic, off_t start, off_t stop, off_t *off)
512 unsigned char seq[4];
515 if ( 0 > fseeko (fp, start, SEEK_SET))
520 for (i = 0; i < 4 ; ++i)
522 seq[i] = (magic >> i * 8) & 0xFF;
527 if (1 != fread (&byte, 1, 1, fp))
530 if ( byte == seq[state])
537 *off = ftello (fp) - 4;
549 /* Null decompressor. */
552 stored_read (struct zip_member *zm, void *buf, size_t n)
554 return fread (buf, 1, n, zm->fp);
558 stored_init (struct zip_member *zm UNUSED)
564 stored_finish (struct zip_member *zm UNUSED)
566 /* Nothing required */
569 static const struct decompressor stored_decompressor =
570 {stored_init, stored_read, stored_finish};
572 /* Inflate decompressor. */
577 #define UCOMPSIZE 4096
583 unsigned char ucomp[UCOMPSIZE];
585 size_t ucomp_bytes_read;
587 /* Two bitfields as defined by RFC1950 */
592 inflate_finish (struct zip_member *zm)
594 struct inflator *inf = zm->aux;
596 inflateEnd (&inf->zss);
602 inflate_init (struct zip_member *zm)
605 struct inflator *inf = xzalloc (sizeof *inf);
608 uint16_t cmf = 0x8; /* Always 8 for inflate */
610 const uint16_t cinfo = 7; /* log_2(Window size) - 8 */
612 cmf |= cinfo << 4; /* Put cinfo into the high nibble */
614 /* make these into a 16 bit word */
615 inf->cmf_flg = (cmf << 8 ) | flg;
617 /* Set the check bits */
618 inf->cmf_flg += 31 - (inf->cmf_flg % 31);
619 assert (inf->cmf_flg % 31 == 0);
621 inf->zss.next_in = Z_NULL;
622 inf->zss.avail_in = 0;
623 inf->zss.zalloc = Z_NULL;
624 inf->zss.zfree = Z_NULL;
625 inf->zss.opaque = Z_NULL;
626 r = inflateInit (&inf->zss);
630 ds_put_format (zm->errmsgs, _("Cannot initialize inflator: %s"), zError (r));
640 inflate_read (struct zip_member *zm, void *buf, size_t n)
643 struct inflator *inf = zm->aux;
645 if (inf->zss.avail_in == 0)
651 if ( inf->state == 0)
653 inf->ucomp[1] = inf->cmf_flg ;
654 inf->ucomp[0] = inf->cmf_flg >> 8 ;
660 bytes_to_read = zm->comp_size - inf->ucomp_bytes_read;
662 if (bytes_to_read == 0)
665 if (bytes_to_read > UCOMPSIZE)
666 bytes_to_read = UCOMPSIZE;
668 bytes_read = fread (inf->ucomp + pad, 1, bytes_to_read - pad, zm->fp);
670 inf->ucomp_bytes_read += bytes_read;
672 inf->zss.avail_in = bytes_read + pad;
673 inf->zss.next_in = inf->ucomp;
675 inf->zss.avail_out = n;
676 inf->zss.next_out = buf;
678 r = inflate (&inf->zss, Z_NO_FLUSH);
681 return n - inf->zss.avail_out;
684 ds_put_format (zm->errmsgs, _("Error inflating: %s"), zError (r));
689 static const struct decompressor inflate_decompressor =
690 {inflate_init, inflate_read, inflate_finish};