1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2011, 2013, 2014 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
27 #include <libpspp/assertion.h>
28 #include <libpspp/compiler.h>
34 #include "zip-reader.h"
35 #include "zip-private.h"
38 #define _(msgid) gettext (msgid)
39 #define N_(msgid) (msgid)
43 FILE *fp; /* The stream from which the data is read */
44 uint32_t offset; /* Starting offset in file. */
45 uint32_t comp_size; /* Length of member file data, in bytes. */
46 uint32_t ucomp_size; /* Uncompressed length of member file data, in bytes. */
47 const struct decompressor *decompressor;
49 size_t bytes_unread; /* Number of bytes left in the member available for reading */
50 struct string *errmsgs; /* A string to hold error messages.
51 This string is NOT owned by this object. */
57 bool (*init) (struct zip_member *);
58 int (*read) (struct zip_member *, void *, size_t);
59 void (*finish) (struct zip_member *);
61 static const struct decompressor stored_decompressor;
62 static const struct decompressor inflate_decompressor;
64 static bool find_eocd (FILE *fp, off_t *off);
66 static const struct decompressor *
67 get_decompressor (uint16_t c)
72 return &stored_decompressor;
75 return &inflate_decompressor;
84 char *filename; /* The name of the file from which the data is read */
85 uint16_t n_entries; /* Number of directory entries. */
86 struct zip_entry *entries; /* Directory entries. */
92 uint32_t offset; /* Starting offset in file. */
93 uint32_t comp_size; /* Length of member file data, in bytes. */
94 uint32_t ucomp_size; /* Uncompressed length of member file data, in bytes. */
95 char *name; /* Name of member file. */
99 zip_member_finish (struct zip_member *zm)
103 ds_clear (zm->errmsgs);
104 zm->decompressor->finish (zm);
110 /* Destroy the zip reader */
112 zip_reader_destroy (struct zip_reader *zr)
120 for (i = 0; i < zr->n_entries; ++i)
122 struct zip_entry *ze = &zr->entries[i];
130 /* Skip N bytes in F */
132 skip_bytes (FILE *f, size_t n)
134 fseeko (f, n, SEEK_CUR);
137 static bool get_bytes (FILE *f, void *x, size_t n) WARN_UNUSED_RESULT;
140 /* Read N bytes from F, storing the result in X */
142 get_bytes (FILE *f, void *x, size_t n)
144 return (n == fread (x, 1, n, f));
147 static bool get_u32 (FILE *f, uint32_t *v) WARN_UNUSED_RESULT;
150 /* Read a 32 bit value from F */
152 get_u32 (FILE *f, uint32_t *v)
155 if (!get_bytes (f, &x, sizeof x))
157 #ifdef WORDS_BIGENDIAN
165 static bool get_u16 (FILE *f, uint16_t *v) WARN_UNUSED_RESULT;
168 /* Read a 16 bit value from F */
170 get_u16 (FILE *f, uint16_t *v)
173 if (!get_bytes (f, &x, sizeof x))
175 #ifdef WORDS_BIGENDIAN
184 /* Read 32 bit integer and compare it with EXPECTED.
185 place an error string in ERR if necessary. */
187 check_magic (FILE *f, uint32_t expected, struct string *err)
191 if (! get_u32 (f, &magic)) return false;
193 if ((expected != magic))
196 _("Corrupt file at 0x%llx: Expected %"PRIx32"; got %"PRIx32),
197 (long long int) ftello (f) - sizeof (uint32_t), expected, magic);
205 /* Reads upto BYTES bytes from ZM and puts them in BUF.
206 Returns the number of bytes read, or -1 on error */
208 zip_member_read (struct zip_member *zm, void *buf, size_t bytes)
212 ds_clear (zm->errmsgs);
214 if ( bytes > zm->bytes_unread)
215 bytes = zm->bytes_unread;
217 bytes_read = zm->decompressor->read (zm, buf, bytes);
221 zm->bytes_unread -= bytes_read;
227 /* Read a central directory header from FILE and initializes ZE with it.
228 Returns true if successful, false otherwise. On error, appends error
231 zip_header_read_next (FILE *file, struct zip_entry *ze, struct string *errs)
233 uint16_t v, nlen, extralen;
234 uint16_t gp, time, date;
235 uint32_t expected_crc;
237 uint16_t clen, diskstart, iattr;
241 if ( ! check_magic (file, MAGIC_SOCD, errs))
244 if (! get_u16 (file, &v)) return false;
245 if (! get_u16 (file, &v)) return false;
246 if (! get_u16 (file, &gp)) return false;
247 if (! get_u16 (file, &comp_type)) return false;
248 if (! get_u16 (file, &time)) return false;
249 if (! get_u16 (file, &date)) return false;
250 if (! get_u32 (file, &expected_crc)) return false;
251 if (! get_u32 (file, &ze->comp_size)) return false;
252 if (! get_u32 (file, &ze->ucomp_size)) return false;
253 if (! get_u16 (file, &nlen)) return false;
254 if (! get_u16 (file, &extralen)) return false;
255 if (! get_u16 (file, &clen)) return false;
256 if (! get_u16 (file, &diskstart)) return false;
257 if (! get_u16 (file, &iattr)) return false;
258 if (! get_u32 (file, &eattr)) return false;
259 if (! get_u32 (file, &ze->offset)) return false;
261 ze->name = xzalloc (nlen + 1);
262 if (! get_bytes (file, ze->name, nlen)) return false;
264 skip_bytes (file, extralen);
270 /* Create a reader from the zip called FILENAME */
272 zip_reader_create (const char *filename, struct string *errs)
274 uint16_t disknum, n_members, total_members;
276 uint32_t central_dir_start, central_dir_length;
278 struct zip_reader *zr = xzalloc (sizeof *zr);
281 ds_init_empty (zr->errs);
283 FILE *file = fopen (filename, "rb");
286 ds_put_cstr (zr->errs, strerror (errno));
291 if ( ! check_magic (file, MAGIC_LHDR, zr->errs))
298 if ( ! find_eocd (file, &offset))
300 ds_put_format (zr->errs, _("Cannot find central directory"));
306 if ( 0 != fseeko (file, offset, SEEK_SET))
308 const char *mm = strerror (errno);
309 ds_put_format (zr->errs, _("Failed to seek to end of central directory record: %s"), mm);
316 if ( ! check_magic (file, MAGIC_EOCD, zr->errs))
323 if (! get_u16 (file, &disknum)
324 || ! get_u16 (file, &disknum)
326 || ! get_u16 (file, &n_members)
327 || ! get_u16 (file, &total_members)
329 || ! get_u32 (file, ¢ral_dir_length)
330 || ! get_u32 (file, ¢ral_dir_start))
337 if ( 0 != fseeko (file, central_dir_start, SEEK_SET))
339 const char *mm = strerror (errno);
340 ds_put_format (zr->errs, _("Failed to seek to central directory: %s"), mm);
346 zr->filename = xstrdup (filename);
348 zr->entries = xcalloc (n_members, sizeof *zr->entries);
349 for (int i = 0; i < n_members; i++)
351 if (!zip_header_read_next (file, &zr->entries[zr->n_entries], errs))
354 zip_reader_destroy (zr);
363 static struct zip_entry *
364 zip_entry_find (struct zip_reader *zr, const char *member)
366 for (int i = 0; i < zr->n_entries; ++i)
368 struct zip_entry *ze = &zr->entries[i];
369 if (0 == strcmp (ze->name, member))
376 zip_reader_get_member_name(const struct zip_reader *zr, size_t idx)
378 return idx < zr->n_entries ? zr->entries[idx].name : NULL;
381 /* Return the member called MEMBER from the reader ZR */
383 zip_member_open (struct zip_reader *zr, const char *member)
385 struct zip_entry *ze = zip_entry_find (zr, member);
388 ds_put_format (zr->errs, _("%s: unknown member"), member);
392 FILE *fp = fopen (zr->filename, "rb");
395 ds_put_cstr (zr->errs, strerror (errno));
399 struct zip_member *zm = xmalloc (sizeof *zm);
401 zm->offset = ze->offset;
402 zm->comp_size = ze->comp_size;
403 zm->ucomp_size = ze->ucomp_size;
404 zm->decompressor = NULL;
405 zm->bytes_unread = ze->ucomp_size;
406 zm->errmsgs = zr->errs;
409 if ( 0 != fseeko (zm->fp, zm->offset, SEEK_SET))
411 ds_put_format (zr->errs, _("Failed to seek to start of member `%s': %s"),
412 ze->name, strerror (errno));
416 if ( ! check_magic (zm->fp, MAGIC_LHDR, zr->errs))
419 uint16_t v, nlen, extra_len;
420 uint16_t gp, comp_type, time, date;
421 uint32_t ucomp_size, comp_size;
423 if (! get_u16 (zm->fp, &v)) goto error;
424 if (! get_u16 (zm->fp, &gp)) goto error;
425 if (! get_u16 (zm->fp, &comp_type)) goto error;
426 zm->decompressor = get_decompressor (comp_type);
427 if (! zm->decompressor) goto error;
428 if (! get_u16 (zm->fp, &time)) goto error;
429 if (! get_u16 (zm->fp, &date)) goto error;
430 if (! get_u32 (zm->fp, &crc)) goto error;
431 if (! get_u32 (zm->fp, &comp_size)) goto error;
433 if (! get_u32 (zm->fp, &ucomp_size)) goto error;
434 if (! get_u16 (zm->fp, &nlen)) goto error;
435 if (! get_u16 (zm->fp, &extra_len)) goto error;
437 char *name = xzalloc (nlen + 1);
438 if (! get_bytes (zm->fp, name, nlen))
443 if (strcmp (name, ze->name) != 0)
445 ds_put_format (zm->errmsgs,
446 _("Name mismatch in zip archive. Central directory "
447 "says `%s'; local file header says `%s'"),
454 skip_bytes (zm->fp, extra_len);
456 if (!zm->decompressor->init (zm) )
469 static bool probe_magic (FILE *fp, uint32_t magic, off_t start, off_t stop, off_t *off);
472 /* Search for something that looks like the End Of Central Directory in FP.
473 If found, the offset of the record will be placed in OFF.
474 Returns true if found false otherwise.
477 find_eocd (FILE *fp, off_t *off)
480 const uint32_t magic = MAGIC_EOCD;
483 /* The magic cannot be more than 22 bytes from the end of the file,
484 because that is the minimum length of the EndOfCentralDirectory
487 if ( 0 > fseeko (fp, -22, SEEK_END))
492 stop = start + sizeof (magic);
495 found = probe_magic (fp, magic, start, stop, off);
496 /* FIXME: For extra confidence lookup the directory start record here*/
499 stop = start + sizeof (magic);
509 Search FP for MAGIC starting at START and reaching until STOP.
510 Returns true iff MAGIC is found. False otherwise.
511 OFF receives the location of the magic.
514 probe_magic (FILE *fp, uint32_t magic, off_t start, off_t stop, off_t *off)
518 unsigned char seq[4];
521 if ( 0 > fseeko (fp, start, SEEK_SET))
526 for (i = 0; i < 4 ; ++i)
528 seq[i] = (magic >> i * 8) & 0xFF;
533 if (1 != fread (&byte, 1, 1, fp))
536 if ( byte == seq[state])
543 *off = ftello (fp) - 4;
555 /* Null decompressor. */
558 stored_read (struct zip_member *zm, void *buf, size_t n)
560 return fread (buf, 1, n, zm->fp);
564 stored_init (struct zip_member *zm UNUSED)
570 stored_finish (struct zip_member *zm UNUSED)
572 /* Nothing required */
575 static const struct decompressor stored_decompressor =
576 {stored_init, stored_read, stored_finish};
578 /* Inflate decompressor. */
583 #define UCOMPSIZE 4096
589 unsigned char ucomp[UCOMPSIZE];
591 size_t ucomp_bytes_read;
593 /* Two bitfields as defined by RFC1950 */
598 inflate_finish (struct zip_member *zm)
600 struct inflator *inf = zm->aux;
602 inflateEnd (&inf->zss);
608 inflate_init (struct zip_member *zm)
611 struct inflator *inf = xzalloc (sizeof *inf);
614 uint16_t cmf = 0x8; /* Always 8 for inflate */
616 const uint16_t cinfo = 7; /* log_2(Window size) - 8 */
618 cmf |= cinfo << 4; /* Put cinfo into the high nibble */
620 /* make these into a 16 bit word */
621 inf->cmf_flg = (cmf << 8 ) | flg;
623 /* Set the check bits */
624 inf->cmf_flg += 31 - (inf->cmf_flg % 31);
625 assert (inf->cmf_flg % 31 == 0);
627 inf->zss.next_in = Z_NULL;
628 inf->zss.avail_in = 0;
629 inf->zss.zalloc = Z_NULL;
630 inf->zss.zfree = Z_NULL;
631 inf->zss.opaque = Z_NULL;
632 r = inflateInit (&inf->zss);
636 ds_put_format (zm->errmsgs, _("Cannot initialize inflator: %s"), zError (r));
646 inflate_read (struct zip_member *zm, void *buf, size_t n)
649 struct inflator *inf = zm->aux;
651 if (inf->zss.avail_in == 0)
657 if ( inf->state == 0)
659 inf->ucomp[1] = inf->cmf_flg ;
660 inf->ucomp[0] = inf->cmf_flg >> 8 ;
666 bytes_to_read = zm->comp_size - inf->ucomp_bytes_read;
668 if (bytes_to_read == 0)
671 if (bytes_to_read > UCOMPSIZE)
672 bytes_to_read = UCOMPSIZE;
674 bytes_read = fread (inf->ucomp + pad, 1, bytes_to_read - pad, zm->fp);
676 inf->ucomp_bytes_read += bytes_read;
678 inf->zss.avail_in = bytes_read + pad;
679 inf->zss.next_in = inf->ucomp;
681 inf->zss.avail_out = n;
682 inf->zss.next_out = buf;
684 r = inflate (&inf->zss, Z_NO_FLUSH);
687 return n - inf->zss.avail_out;
690 ds_put_format (zm->errmsgs, _("Error inflating: %s"), zError (r));
695 static const struct decompressor inflate_decompressor =
696 {inflate_init, inflate_read, inflate_finish};