1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2011, 2013, 2014 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
27 #include <libpspp/assertion.h>
28 #include <libpspp/compiler.h>
32 #include "integer-format.h"
33 #include "zip-reader.h"
34 #include "zip-private.h"
37 #define _(msgid) gettext (msgid)
38 #define N_(msgid) (msgid)
42 char *file_name; /* File name. */
43 char *member_name; /* Member name. */
44 FILE *fp; /* The stream from which the data is read */
45 uint32_t offset; /* Starting offset in file. */
46 uint32_t comp_size; /* Length of member file data, in bytes. */
47 uint32_t ucomp_size; /* Uncompressed length of member file data, in bytes. */
48 const struct decompressor *decompressor;
50 size_t bytes_unread; /* Number of bytes left in the member available for reading */
51 struct string *errmsgs; /* A string to hold error messages.
52 This string is NOT owned by this object. */
58 bool (*init) (struct zip_member *);
59 int (*read) (struct zip_member *, void *, size_t);
60 void (*finish) (struct zip_member *);
62 static const struct decompressor stored_decompressor;
63 static const struct decompressor inflate_decompressor;
65 static bool find_eocd (FILE *fp, off_t *off);
67 static const struct decompressor *
68 get_decompressor (uint16_t c)
73 return &stored_decompressor;
76 return &inflate_decompressor;
85 char *file_name; /* The name of the file from which the data is read */
86 uint16_t n_entries; /* Number of directory entries. */
87 struct zip_entry *entries; /* Directory entries. */
88 struct string *errs; /* A string to hold error messages. This
89 string is NOT owned by this object. */
94 uint32_t offset; /* Starting offset in file. */
95 uint32_t comp_size; /* Length of member file data, in bytes. */
96 uint32_t ucomp_size; /* Uncompressed length of member file data, in bytes. */
97 char *name; /* Name of member file. */
101 zip_member_finish (struct zip_member *zm)
105 free (zm->file_name);
106 free (zm->member_name);
107 ds_clear (zm->errmsgs);
108 zm->decompressor->finish (zm);
114 /* Destroy the zip reader */
116 zip_reader_destroy (struct zip_reader *zr)
122 free (zr->file_name);
124 for (i = 0; i < zr->n_entries; ++i)
126 struct zip_entry *ze = &zr->entries[i];
134 /* Skip N bytes in F */
136 skip_bytes (FILE *f, size_t n)
138 fseeko (f, n, SEEK_CUR);
141 static bool get_bytes (FILE *f, void *x, size_t n) WARN_UNUSED_RESULT;
144 /* Read N bytes from F, storing the result in X */
146 get_bytes (FILE *f, void *x, size_t n)
148 return (n == fread (x, 1, n, f));
151 /* Read a 32 bit value from F */
152 static bool WARN_UNUSED_RESULT
153 get_u32 (FILE *f, uint32_t *v)
156 if (!get_bytes (f, &x, sizeof x))
158 *v = le_to_native32 (x);
162 /* Read a 16 bit value from F */
163 static bool WARN_UNUSED_RESULT
164 get_u16 (FILE *f, uint16_t *v)
167 if (!get_bytes (f, &x, sizeof x))
169 *v = le_to_native16 (x);
174 /* Read 32 bit integer and compare it with EXPECTED.
175 place an error string in ERR if necessary. */
177 check_magic (FILE *f, const char *file_name,
178 uint32_t expected, struct string *err)
182 if (! get_u32 (f, &magic)) return false;
184 if ((expected != magic))
187 _("%s: corrupt archive at 0x%llx: "
188 "expected %#"PRIx32" but got %#"PRIx32),
190 (long long int) ftello (f) - sizeof (uint32_t),
199 /* Reads upto BYTES bytes from ZM and puts them in BUF.
200 Returns the number of bytes read, or -1 on error */
202 zip_member_read (struct zip_member *zm, void *buf, size_t bytes)
206 ds_clear (zm->errmsgs);
208 if (bytes > zm->bytes_unread)
209 bytes = zm->bytes_unread;
211 bytes_read = zm->decompressor->read (zm, buf, bytes);
215 zm->bytes_unread -= bytes_read;
220 /* Read all of ZM into memory, storing the data in *DATAP and its size in *NP.
221 Returns NULL if successful, otherwise an error string that the caller
222 must eventually free(). */
223 char * WARN_UNUSED_RESULT
224 zip_member_read_all (struct zip_reader *zr, const char *member_name,
225 void **datap, size_t *np)
227 struct zip_member *zm = zip_member_open (zr, member_name);
232 return ds_steal_cstr (zr->errs);
235 *datap = xmalloc (zm->ucomp_size);
236 *np = zm->ucomp_size;
238 uint8_t *data = *datap;
239 while (zm->bytes_unread)
240 if (zip_member_read (zm, data + (zm->ucomp_size - zm->bytes_unread),
241 zm->bytes_unread) == -1)
243 zip_member_finish (zm);
247 return ds_steal_cstr (zr->errs);
250 zip_member_finish (zm);
254 /* Read a central directory header from FILE and initializes ZE with it.
255 Returns true if successful, false otherwise. On error, appends error
258 zip_header_read_next (FILE *file, const char *file_name,
259 struct zip_entry *ze, struct string *errs)
261 uint16_t v, nlen, extralen;
262 uint16_t gp, time, date;
263 uint32_t expected_crc;
265 uint16_t clen, diskstart, iattr;
269 if (! check_magic (file, file_name, MAGIC_SOCD, errs))
272 if (! get_u16 (file, &v)) return false;
273 if (! get_u16 (file, &v)) return false;
274 if (! get_u16 (file, &gp)) return false;
275 if (! get_u16 (file, &comp_type)) return false;
276 if (! get_u16 (file, &time)) return false;
277 if (! get_u16 (file, &date)) return false;
278 if (! get_u32 (file, &expected_crc)) return false;
279 if (! get_u32 (file, &ze->comp_size)) return false;
280 if (! get_u32 (file, &ze->ucomp_size)) return false;
281 if (! get_u16 (file, &nlen)) return false;
282 if (! get_u16 (file, &extralen)) return false;
283 if (! get_u16 (file, &clen)) return false;
284 if (! get_u16 (file, &diskstart)) return false;
285 if (! get_u16 (file, &iattr)) return false;
286 if (! get_u32 (file, &eattr)) return false;
287 if (! get_u32 (file, &ze->offset)) return false;
289 ze->name = xzalloc (nlen + 1);
290 if (! get_bytes (file, ze->name, nlen)) return false;
292 skip_bytes (file, extralen);
298 /* Create a reader from the zip called FILE_NAME */
300 zip_reader_create (const char *file_name, struct string *errs)
302 uint16_t disknum, n_members, total_members;
304 uint32_t central_dir_start, central_dir_length;
306 struct zip_reader *zr = xzalloc (sizeof *zr);
309 ds_init_empty (zr->errs);
311 FILE *file = fopen (file_name, "rb");
314 ds_put_format (zr->errs, _("%s: open failed (%s)"),
315 file_name, strerror (errno));
320 if (! check_magic (file, file_name, MAGIC_LHDR, zr->errs))
327 if (! find_eocd (file, &offset))
329 ds_put_format (zr->errs, _("%s: cannot find central directory"),
336 if (0 != fseeko (file, offset, SEEK_SET))
338 ds_put_format (zr->errs, _("%s: seek failed (%s)"),
339 file_name, strerror (errno));
346 if (! check_magic (file, file_name, MAGIC_EOCD, zr->errs))
353 if (! get_u16 (file, &disknum)
354 || ! get_u16 (file, &disknum)
356 || ! get_u16 (file, &n_members)
357 || ! get_u16 (file, &total_members)
359 || ! get_u32 (file, ¢ral_dir_length)
360 || ! get_u32 (file, ¢ral_dir_start))
367 if (0 != fseeko (file, central_dir_start, SEEK_SET))
369 ds_put_format (zr->errs, _("%s: seek failed (%s)"),
370 file_name, strerror (errno));
376 zr->file_name = xstrdup (file_name);
378 zr->entries = xcalloc (n_members, sizeof *zr->entries);
379 for (int i = 0; i < n_members; i++)
381 if (!zip_header_read_next (file, file_name,
382 &zr->entries[zr->n_entries], errs))
385 zip_reader_destroy (zr);
395 static struct zip_entry *
396 zip_entry_find (const struct zip_reader *zr, const char *member)
398 for (int i = 0; i < zr->n_entries; ++i)
400 struct zip_entry *ze = &zr->entries[i];
401 if (0 == strcmp (ze->name, member))
408 zip_reader_get_member_name(const struct zip_reader *zr, size_t idx)
410 return idx < zr->n_entries ? zr->entries[idx].name : NULL;
413 /* Returns true if ZR contains a member named MEMBER, false otherwise. */
415 zip_reader_contains_member (const struct zip_reader *zr, const char *member)
417 return zip_entry_find (zr, member) != NULL;
420 /* Return the member called MEMBER from the reader ZR */
422 zip_member_open (struct zip_reader *zr, const char *member)
424 struct zip_entry *ze = zip_entry_find (zr, member);
427 ds_put_format (zr->errs, _("%s: unknown member \"%s\""),
428 zr->file_name, member);
432 FILE *fp = fopen (zr->file_name, "rb");
435 ds_put_format (zr->errs, _("%s: open failed (%s)"),
436 zr->file_name, strerror (errno));
440 struct zip_member *zm = xmalloc (sizeof *zm);
441 zm->file_name = xstrdup (zr->file_name);
442 zm->member_name = xstrdup (member);
444 zm->offset = ze->offset;
445 zm->comp_size = ze->comp_size;
446 zm->ucomp_size = ze->ucomp_size;
447 zm->decompressor = NULL;
448 zm->bytes_unread = ze->ucomp_size;
449 zm->errmsgs = zr->errs;
452 if (0 != fseeko (zm->fp, zm->offset, SEEK_SET))
454 ds_put_format (zr->errs, _("%s: seek failed (%s)"),
455 ze->name, strerror (errno));
459 if (! check_magic (zm->fp, zr->file_name, MAGIC_LHDR, zr->errs))
462 uint16_t v, nlen, extra_len;
463 uint16_t gp, comp_type, time, date;
464 uint32_t ucomp_size, comp_size;
466 if (! get_u16 (zm->fp, &v)) goto error;
467 if (! get_u16 (zm->fp, &gp)) goto error;
468 if (! get_u16 (zm->fp, &comp_type)) goto error;
469 zm->decompressor = get_decompressor (comp_type);
470 if (! zm->decompressor) goto error;
471 if (! get_u16 (zm->fp, &time)) goto error;
472 if (! get_u16 (zm->fp, &date)) goto error;
473 if (! get_u32 (zm->fp, &crc)) goto error;
474 if (! get_u32 (zm->fp, &comp_size)) goto error;
476 if (! get_u32 (zm->fp, &ucomp_size)) goto error;
477 if (! get_u16 (zm->fp, &nlen)) goto error;
478 if (! get_u16 (zm->fp, &extra_len)) goto error;
480 char *name = xzalloc (nlen + 1);
481 if (! get_bytes (zm->fp, name, nlen))
486 if (strcmp (name, ze->name) != 0)
488 ds_put_format (zm->errmsgs,
489 _("%s: name mismatch between central directory (%s) "
490 "and local file header (%s)"),
491 zm->file_name, ze->name, name);
497 skip_bytes (zm->fp, extra_len);
499 if (!zm->decompressor->init (zm))
506 free (zm->file_name);
507 free (zm->member_name);
514 static bool probe_magic (FILE *fp, uint32_t magic, off_t start, off_t stop, off_t *off);
517 /* Search for something that looks like the End Of Central Directory in FP.
518 If found, the offset of the record will be placed in OFF.
519 Returns true if found false otherwise.
522 find_eocd (FILE *fp, off_t *off)
525 const uint32_t magic = MAGIC_EOCD;
528 /* The magic cannot be more than 22 bytes from the end of the file,
529 because that is the minimum length of the EndOfCentralDirectory
532 if (0 > fseeko (fp, -22, SEEK_END))
537 stop = start + sizeof (magic);
540 found = probe_magic (fp, magic, start, stop, off);
541 /* FIXME: For extra confidence lookup the directory start record here*/
544 stop = start + sizeof (magic);
554 Search FP for MAGIC starting at START and reaching until STOP.
555 Returns true iff MAGIC is found. False otherwise.
556 OFF receives the location of the magic.
559 probe_magic (FILE *fp, uint32_t magic, off_t start, off_t stop, off_t *off)
563 unsigned char seq[4];
566 if (0 > fseeko (fp, start, SEEK_SET))
571 for (i = 0; i < 4 ; ++i)
573 seq[i] = (magic >> i * 8) & 0xFF;
578 if (1 != fread (&byte, 1, 1, fp))
581 if (byte == seq[state])
588 *off = ftello (fp) - 4;
600 /* Null decompressor. */
603 stored_read (struct zip_member *zm, void *buf, size_t n)
605 return fread (buf, 1, n, zm->fp);
609 stored_init (struct zip_member *zm UNUSED)
615 stored_finish (struct zip_member *zm UNUSED)
617 /* Nothing required */
620 static const struct decompressor stored_decompressor =
621 {stored_init, stored_read, stored_finish};
623 /* Inflate decompressor. */
628 #define UCOMPSIZE 4096
634 unsigned char ucomp[UCOMPSIZE];
636 size_t ucomp_bytes_read;
638 /* Two bitfields as defined by RFC1950 */
643 inflate_finish (struct zip_member *zm)
645 struct inflator *inf = zm->aux;
647 inflateEnd (&inf->zss);
653 inflate_init (struct zip_member *zm)
656 struct inflator *inf = xzalloc (sizeof *inf);
659 uint16_t cmf = 0x8; /* Always 8 for inflate */
661 const uint16_t cinfo = 7; /* log_2(Window size) - 8 */
663 cmf |= cinfo << 4; /* Put cinfo into the high nibble */
665 /* make these into a 16 bit word */
666 inf->cmf_flg = (cmf << 8) | flg;
668 /* Set the check bits */
669 inf->cmf_flg += 31 - (inf->cmf_flg % 31);
670 assert (inf->cmf_flg % 31 == 0);
672 inf->zss.next_in = Z_NULL;
673 inf->zss.avail_in = 0;
674 inf->zss.zalloc = Z_NULL;
675 inf->zss.zfree = Z_NULL;
676 inf->zss.opaque = Z_NULL;
677 r = inflateInit (&inf->zss);
681 ds_put_format (zm->errmsgs,
682 _("%s: cannot initialize inflator (%s)"),
683 zm->file_name, zError (r));
693 inflate_read (struct zip_member *zm, void *buf, size_t n)
696 struct inflator *inf = zm->aux;
698 if (inf->zss.avail_in == 0)
706 inf->ucomp[1] = inf->cmf_flg ;
707 inf->ucomp[0] = inf->cmf_flg >> 8 ;
713 bytes_to_read = zm->comp_size - inf->ucomp_bytes_read;
715 if (bytes_to_read == 0)
718 if (bytes_to_read > UCOMPSIZE)
719 bytes_to_read = UCOMPSIZE;
721 bytes_read = fread (inf->ucomp + pad, 1, bytes_to_read - pad, zm->fp);
723 inf->ucomp_bytes_read += bytes_read;
725 inf->zss.avail_in = bytes_read + pad;
726 inf->zss.next_in = inf->ucomp;
728 inf->zss.avail_out = n;
729 inf->zss.next_out = buf;
731 r = inflate (&inf->zss, Z_NO_FLUSH);
734 return n - inf->zss.avail_out;
737 ds_put_format (zm->errmsgs, _("%s: error inflating \"%s\" (%s)"),
738 zm->file_name, zm->member_name, zError (r));
743 static const struct decompressor inflate_decompressor =
744 {inflate_init, inflate_read, inflate_finish};