1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2011, 2013, 2014 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
27 #include <libpspp/assertion.h>
28 #include <libpspp/compiler.h>
34 #include "zip-reader.h"
35 #include "zip-private.h"
38 #define _(msgid) gettext (msgid)
39 #define N_(msgid) (msgid)
43 FILE *fp; /* The stream from which the data is read */
44 uint32_t offset; /* Starting offset in file. */
45 uint32_t comp_size; /* Length of member file data, in bytes. */
46 uint32_t ucomp_size; /* Uncompressed length of member file data, in bytes. */
47 const struct decompressor *decompressor;
49 size_t bytes_unread; /* Number of bytes left in the member available for reading */
50 struct string *errmsgs; /* A string to hold error messages.
51 This string is NOT owned by this object. */
57 bool (*init) (struct zip_member *);
58 int (*read) (struct zip_member *, void *, size_t);
59 void (*finish) (struct zip_member *);
61 static const struct decompressor stored_decompressor;
62 static const struct decompressor inflate_decompressor;
64 static bool find_eocd (FILE *fp, off_t *off);
66 static const struct decompressor *
67 get_decompressor (uint16_t c)
72 return &stored_decompressor;
75 return &inflate_decompressor;
84 char *filename; /* The name of the file from which the data is read */
85 uint16_t n_entries; /* Number of directory entries. */
86 struct zip_entry *entries; /* Directory entries. */
92 uint32_t offset; /* Starting offset in file. */
93 uint32_t comp_size; /* Length of member file data, in bytes. */
94 uint32_t ucomp_size; /* Uncompressed length of member file data, in bytes. */
95 char *name; /* Name of member file. */
99 zip_member_finish (struct zip_member *zm)
103 ds_clear (zm->errmsgs);
104 zm->decompressor->finish (zm);
110 /* Destroy the zip reader */
112 zip_reader_destroy (struct zip_reader *zr)
120 for (i = 0; i < zr->n_entries; ++i)
122 struct zip_entry *ze = &zr->entries[i];
130 /* Skip N bytes in F */
132 skip_bytes (FILE *f, size_t n)
134 fseeko (f, n, SEEK_CUR);
137 static bool get_bytes (FILE *f, void *x, size_t n) WARN_UNUSED_RESULT;
140 /* Read N bytes from F, storing the result in X */
142 get_bytes (FILE *f, void *x, size_t n)
144 return (n == fread (x, 1, n, f));
147 static bool get_u32 (FILE *f, uint32_t *v) WARN_UNUSED_RESULT;
150 /* Read a 32 bit value from F */
152 get_u32 (FILE *f, uint32_t *v)
155 if (!get_bytes (f, &x, sizeof x))
157 #ifdef WORDS_BIGENDIAN
165 static bool get_u16 (FILE *f, uint16_t *v) WARN_UNUSED_RESULT;
168 /* Read a 16 bit value from F */
170 get_u16 (FILE *f, uint16_t *v)
173 if (!get_bytes (f, &x, sizeof x))
175 #ifdef WORDS_BIGENDIAN
184 /* Read 32 bit integer and compare it with EXPECTED.
185 place an error string in ERR if necessary. */
187 check_magic (FILE *f, uint32_t expected, struct string *err)
191 if (! get_u32 (f, &magic)) return false;
193 if ((expected != magic))
196 _("Corrupt file at 0x%llx: Expected %"PRIx32"; got %"PRIx32),
197 (long long int) ftello (f) - sizeof (uint32_t), expected, magic);
205 /* Reads upto BYTES bytes from ZM and puts them in BUF.
206 Returns the number of bytes read, or -1 on error */
208 zip_member_read (struct zip_member *zm, void *buf, size_t bytes)
212 ds_clear (zm->errmsgs);
214 if ( bytes > zm->bytes_unread)
215 bytes = zm->bytes_unread;
217 bytes_read = zm->decompressor->read (zm, buf, bytes);
221 zm->bytes_unread -= bytes_read;
227 /* Read a central directory header from FILE and initializes ZE with it.
228 Returns true if successful, false otherwise. On error, appends error
231 zip_header_read_next (FILE *file, struct zip_entry *ze, struct string *errs)
233 uint16_t v, nlen, extralen;
234 uint16_t gp, time, date;
235 uint32_t expected_crc;
237 uint16_t clen, diskstart, iattr;
241 if ( ! check_magic (file, MAGIC_SOCD, errs))
244 if (! get_u16 (file, &v)) return false;
245 if (! get_u16 (file, &v)) return false;
246 if (! get_u16 (file, &gp)) return false;
247 if (! get_u16 (file, &comp_type)) return false;
248 if (! get_u16 (file, &time)) return false;
249 if (! get_u16 (file, &date)) return false;
250 if (! get_u32 (file, &expected_crc)) return false;
251 if (! get_u32 (file, &ze->comp_size)) return false;
252 if (! get_u32 (file, &ze->ucomp_size)) return false;
253 if (! get_u16 (file, &nlen)) return false;
254 if (! get_u16 (file, &extralen)) return false;
255 if (! get_u16 (file, &clen)) return false;
256 if (! get_u16 (file, &diskstart)) return false;
257 if (! get_u16 (file, &iattr)) return false;
258 if (! get_u32 (file, &eattr)) return false;
259 if (! get_u32 (file, &ze->offset)) return false;
261 ze->name = xzalloc (nlen + 1);
262 if (! get_bytes (file, ze->name, nlen)) return false;
264 skip_bytes (file, extralen);
270 /* Create a reader from the zip called FILENAME */
272 zip_reader_create (const char *filename, struct string *errs)
274 uint16_t disknum, n_members, total_members;
276 uint32_t central_dir_start, central_dir_length;
278 struct zip_reader *zr = xzalloc (sizeof *zr);
281 ds_init_empty (zr->errs);
283 FILE *file = fopen (filename, "rb");
286 ds_put_cstr (zr->errs, strerror (errno));
291 if ( ! check_magic (file, MAGIC_LHDR, zr->errs))
298 if ( ! find_eocd (file, &offset))
300 ds_put_format (zr->errs, _("Cannot find central directory"));
306 if ( 0 != fseeko (file, offset, SEEK_SET))
308 const char *mm = strerror (errno);
309 ds_put_format (zr->errs, _("Failed to seek to end of central directory record: %s"), mm);
316 if ( ! check_magic (file, MAGIC_EOCD, zr->errs))
323 if (! get_u16 (file, &disknum)
324 || ! get_u16 (file, &disknum)
326 || ! get_u16 (file, &n_members)
327 || ! get_u16 (file, &total_members)
329 || ! get_u32 (file, ¢ral_dir_length)
330 || ! get_u32 (file, ¢ral_dir_start))
337 if ( 0 != fseeko (file, central_dir_start, SEEK_SET))
339 const char *mm = strerror (errno);
340 ds_put_format (zr->errs, _("Failed to seek to central directory: %s"), mm);
346 zr->filename = xstrdup (filename);
348 zr->entries = xcalloc (n_members, sizeof *zr->entries);
349 for (int i = 0; i < n_members; i++)
351 if (!zip_header_read_next (file, &zr->entries[zr->n_entries], errs))
354 zip_reader_destroy (zr);
363 static struct zip_entry *
364 zip_entry_find (struct zip_reader *zr, const char *member)
366 for (int i = 0; i < zr->n_entries; ++i)
368 struct zip_entry *ze = &zr->entries[i];
369 if (0 == strcmp (ze->name, member))
375 /* Return the member called MEMBER from the reader ZR */
377 zip_member_open (struct zip_reader *zr, const char *member)
379 struct zip_entry *ze = zip_entry_find (zr, member);
382 ds_put_format (zr->errs, _("%s: unknown member"), member);
386 FILE *fp = fopen (zr->filename, "rb");
389 ds_put_cstr (zr->errs, strerror (errno));
393 struct zip_member *zm = xmalloc (sizeof *zm);
395 zm->offset = ze->offset;
396 zm->comp_size = ze->comp_size;
397 zm->ucomp_size = ze->ucomp_size;
398 zm->decompressor = NULL;
399 zm->bytes_unread = ze->ucomp_size;
400 zm->errmsgs = zr->errs;
403 if ( 0 != fseeko (zm->fp, zm->offset, SEEK_SET))
405 ds_put_format (zr->errs, _("Failed to seek to start of member `%s': %s"),
406 ze->name, strerror (errno));
410 if ( ! check_magic (zm->fp, MAGIC_LHDR, zr->errs))
413 uint16_t v, nlen, extra_len;
414 uint16_t gp, comp_type, time, date;
415 uint32_t ucomp_size, comp_size;
417 if (! get_u16 (zm->fp, &v)) goto error;
418 if (! get_u16 (zm->fp, &gp)) goto error;
419 if (! get_u16 (zm->fp, &comp_type)) goto error;
420 zm->decompressor = get_decompressor (comp_type);
421 if (! zm->decompressor) goto error;
422 if (! get_u16 (zm->fp, &time)) goto error;
423 if (! get_u16 (zm->fp, &date)) goto error;
424 if (! get_u32 (zm->fp, &crc)) goto error;
425 if (! get_u32 (zm->fp, &comp_size)) goto error;
427 if (! get_u32 (zm->fp, &ucomp_size)) goto error;
428 if (! get_u16 (zm->fp, &nlen)) goto error;
429 if (! get_u16 (zm->fp, &extra_len)) goto error;
431 char *name = xzalloc (nlen + 1);
432 if (! get_bytes (zm->fp, name, nlen))
437 if (strcmp (name, ze->name) != 0)
439 ds_put_format (zm->errmsgs,
440 _("Name mismatch in zip archive. Central directory "
441 "says `%s'; local file header says `%s'"),
448 skip_bytes (zm->fp, extra_len);
450 if (!zm->decompressor->init (zm) )
463 static bool probe_magic (FILE *fp, uint32_t magic, off_t start, off_t stop, off_t *off);
466 /* Search for something that looks like the End Of Central Directory in FP.
467 If found, the offset of the record will be placed in OFF.
468 Returns true if found false otherwise.
471 find_eocd (FILE *fp, off_t *off)
474 const uint32_t magic = MAGIC_EOCD;
477 /* The magic cannot be more than 22 bytes from the end of the file,
478 because that is the minimum length of the EndOfCentralDirectory
481 if ( 0 > fseeko (fp, -22, SEEK_END))
486 stop = start + sizeof (magic);
489 found = probe_magic (fp, magic, start, stop, off);
490 /* FIXME: For extra confidence lookup the directory start record here*/
493 stop = start + sizeof (magic);
503 Search FP for MAGIC starting at START and reaching until STOP.
504 Returns true iff MAGIC is found. False otherwise.
505 OFF receives the location of the magic.
508 probe_magic (FILE *fp, uint32_t magic, off_t start, off_t stop, off_t *off)
512 unsigned char seq[4];
515 if ( 0 > fseeko (fp, start, SEEK_SET))
520 for (i = 0; i < 4 ; ++i)
522 seq[i] = (magic >> i * 8) & 0xFF;
527 if (1 != fread (&byte, 1, 1, fp))
530 if ( byte == seq[state])
537 *off = ftello (fp) - 4;
549 /* Null decompressor. */
552 stored_read (struct zip_member *zm, void *buf, size_t n)
554 return fread (buf, 1, n, zm->fp);
558 stored_init (struct zip_member *zm UNUSED)
564 stored_finish (struct zip_member *zm UNUSED)
566 /* Nothing required */
569 static const struct decompressor stored_decompressor =
570 {stored_init, stored_read, stored_finish};
572 /* Inflate decompressor. */
577 #define UCOMPSIZE 4096
583 unsigned char ucomp[UCOMPSIZE];
585 size_t ucomp_bytes_read;
587 /* Two bitfields as defined by RFC1950 */
592 inflate_finish (struct zip_member *zm)
594 struct inflator *inf = zm->aux;
596 inflateEnd (&inf->zss);
602 inflate_init (struct zip_member *zm)
605 struct inflator *inf = xzalloc (sizeof *inf);
608 uint16_t cmf = 0x8; /* Always 8 for inflate */
610 const uint16_t cinfo = 7; /* log_2(Window size) - 8 */
612 cmf |= cinfo << 4; /* Put cinfo into the high nibble */
614 /* make these into a 16 bit word */
615 inf->cmf_flg = (cmf << 8 ) | flg;
617 /* Set the check bits */
618 inf->cmf_flg += 31 - (inf->cmf_flg % 31);
619 assert (inf->cmf_flg % 31 == 0);
621 inf->zss.next_in = Z_NULL;
622 inf->zss.avail_in = 0;
623 inf->zss.zalloc = Z_NULL;
624 inf->zss.zfree = Z_NULL;
625 inf->zss.opaque = Z_NULL;
626 r = inflateInit (&inf->zss);
630 ds_put_format (zm->errmsgs, _("Cannot initialize inflator: %s"), zError (r));
640 inflate_read (struct zip_member *zm, void *buf, size_t n)
643 struct inflator *inf = zm->aux;
645 if (inf->zss.avail_in == 0)
651 if ( inf->state == 0)
653 inf->ucomp[1] = inf->cmf_flg ;
654 inf->ucomp[0] = inf->cmf_flg >> 8 ;
660 bytes_to_read = zm->comp_size - inf->ucomp_bytes_read;
662 if (bytes_to_read == 0)
665 if (bytes_to_read > UCOMPSIZE)
666 bytes_to_read = UCOMPSIZE;
668 bytes_read = fread (inf->ucomp + pad, 1, bytes_to_read - pad, zm->fp);
670 inf->ucomp_bytes_read += bytes_read;
672 inf->zss.avail_in = bytes_read + pad;
673 inf->zss.next_in = inf->ucomp;
675 inf->zss.avail_out = n;
676 inf->zss.next_out = buf;
678 r = inflate (&inf->zss, Z_NO_FLUSH);
681 return n - inf->zss.avail_out;
684 ds_put_format (zm->errmsgs, _("Error inflating: %s"), zError (r));
689 static const struct decompressor inflate_decompressor =
690 {inflate_init, inflate_read, inflate_finish};