1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2011, 2013, 2014 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
27 #include <libpspp/assertion.h>
28 #include <libpspp/compiler.h>
32 #include "integer-format.h"
33 #include "zip-reader.h"
34 #include "zip-private.h"
37 #define _(msgid) gettext (msgid)
38 #define N_(msgid) (msgid)
42 char *file_name; /* File name. */
43 char *member_name; /* Member name. */
44 FILE *fp; /* The stream from which the data is read */
45 uint32_t offset; /* Starting offset in file. */
46 uint32_t comp_size; /* Length of member file data, in bytes. */
47 uint32_t ucomp_size; /* Uncompressed length of member file data, in bytes. */
48 const struct decompressor *decompressor;
50 size_t bytes_unread; /* Number of bytes left in the member available for reading */
51 struct string *errmsgs; /* A string to hold error messages.
52 This string is NOT owned by this object. */
58 bool (*init) (struct zip_member *);
59 int (*read) (struct zip_member *, void *, size_t);
60 void (*finish) (struct zip_member *);
62 static const struct decompressor stored_decompressor;
63 static const struct decompressor inflate_decompressor;
65 static bool find_eocd (FILE *fp, off_t *off);
67 static const struct decompressor *
68 get_decompressor (uint16_t c)
73 return &stored_decompressor;
76 return &inflate_decompressor;
85 char *file_name; /* The name of the file from which the data is read */
86 uint16_t n_entries; /* Number of directory entries. */
87 struct zip_entry *entries; /* Directory entries. */
88 struct string *errs; /* A string to hold error messages. This
89 string is NOT owned by this object. */
94 uint32_t offset; /* Starting offset in file. */
95 uint32_t comp_size; /* Length of member file data, in bytes. */
96 uint32_t ucomp_size; /* Uncompressed length of member file data, in bytes. */
97 char *name; /* Name of member file. */
101 zip_member_finish (struct zip_member *zm)
105 free (zm->file_name);
106 free (zm->member_name);
107 ds_clear (zm->errmsgs);
108 zm->decompressor->finish (zm);
114 /* Destroy the zip reader */
116 zip_reader_destroy (struct zip_reader *zr)
122 free (zr->file_name);
124 for (i = 0; i < zr->n_entries; ++i)
126 struct zip_entry *ze = &zr->entries[i];
134 /* Skip N bytes in F */
136 skip_bytes (FILE *f, size_t n)
138 fseeko (f, n, SEEK_CUR);
141 static bool get_bytes (FILE *f, void *x, size_t n) WARN_UNUSED_RESULT;
144 /* Read N bytes from F, storing the result in X */
146 get_bytes (FILE *f, void *x, size_t n)
148 return (n == fread (x, 1, n, f));
151 /* Read a 32 bit value from F */
152 static bool WARN_UNUSED_RESULT
153 get_u32 (FILE *f, uint32_t *v)
156 if (!get_bytes (f, &x, sizeof x))
158 *v = le_to_native32 (x);
162 /* Read a 16 bit value from F */
163 static bool WARN_UNUSED_RESULT
164 get_u16 (FILE *f, uint16_t *v)
167 if (!get_bytes (f, &x, sizeof x))
169 *v = le_to_native16 (x);
174 /* Read 32 bit integer and compare it with EXPECTED.
175 place an error string in ERR if necessary. */
177 check_magic (FILE *f, const char *file_name,
178 uint32_t expected, struct string *err)
182 if (! get_u32 (f, &magic)) return false;
184 if ((expected != magic))
187 _("%s: corrupt archive at 0x%llx: "
188 "expected %#"PRIx32" but got %#"PRIx32),
190 (long long int) ftello (f) - sizeof (uint32_t),
199 /* Reads upto BYTES bytes from ZM and puts them in BUF.
200 Returns the number of bytes read, or -1 on error */
202 zip_member_read (struct zip_member *zm, void *buf, size_t bytes)
206 ds_clear (zm->errmsgs);
208 if (bytes > zm->bytes_unread)
209 bytes = zm->bytes_unread;
211 bytes_read = zm->decompressor->read (zm, buf, bytes);
215 zm->bytes_unread -= bytes_read;
220 /* Read all of ZM into memory, storing the data in *DATAP and its size in *NP.
221 Returns NULL if successful, otherwise an error string that the caller
222 must eventually free(). */
223 char * WARN_UNUSED_RESULT
224 zip_member_read_all (struct zip_reader *zr, const char *member_name,
225 void **datap, size_t *np)
227 struct zip_member *zm = zip_member_open (zr, member_name);
232 return ds_steal_cstr (zr->errs);
235 *datap = xmalloc (zm->ucomp_size);
236 *np = zm->ucomp_size;
238 uint8_t *data = *datap;
239 while (zm->bytes_unread)
240 if (zip_member_read (zm, data + (zm->ucomp_size - zm->bytes_unread),
241 zm->bytes_unread) == -1)
243 zip_member_finish (zm);
247 return ds_steal_cstr (zr->errs);
250 zip_member_finish (zm);
254 /* Read a central directory header from FILE and initializes ZE with it.
255 Returns true if successful, false otherwise. On error, appends error
258 zip_header_read_next (FILE *file, const char *file_name,
259 struct zip_entry *ze, struct string *errs)
261 uint16_t v, nlen, extralen;
262 uint16_t gp, time, date;
263 uint32_t expected_crc;
265 uint16_t clen, diskstart, iattr;
269 if (! check_magic (file, file_name, MAGIC_SOCD, errs))
272 if (! get_u16 (file, &v)) return false;
273 if (! get_u16 (file, &v)) return false;
274 if (! get_u16 (file, &gp)) return false;
275 if (! get_u16 (file, &comp_type)) return false;
276 if (! get_u16 (file, &time)) return false;
277 if (! get_u16 (file, &date)) return false;
278 if (! get_u32 (file, &expected_crc)) return false;
279 if (! get_u32 (file, &ze->comp_size)) return false;
280 if (! get_u32 (file, &ze->ucomp_size)) return false;
281 if (! get_u16 (file, &nlen)) return false;
282 if (! get_u16 (file, &extralen)) return false;
283 if (! get_u16 (file, &clen)) return false;
284 if (! get_u16 (file, &diskstart)) return false;
285 if (! get_u16 (file, &iattr)) return false;
286 if (! get_u32 (file, &eattr)) return false;
287 if (! get_u32 (file, &ze->offset)) return false;
289 ze->name = xzalloc (nlen + 1);
290 if (! get_bytes (file, ze->name, nlen)) return false;
292 skip_bytes (file, extralen);
298 /* Create a reader from the zip called FILE_NAME */
300 zip_reader_create (const char *file_name, struct string *errs)
302 uint16_t disknum, n_members, total_members;
304 uint32_t central_dir_start, central_dir_length;
306 struct zip_reader *zr = xzalloc (sizeof *zr);
309 ds_init_empty (zr->errs);
311 FILE *file = fopen (file_name, "rb");
314 ds_put_format (zr->errs, _("%s: open failed (%s)"),
315 file_name, strerror (errno));
320 if (! check_magic (file, file_name, MAGIC_LHDR, zr->errs))
327 if (! find_eocd (file, &offset))
329 ds_put_format (zr->errs, _("%s: cannot find central directory"),
336 if (0 != fseeko (file, offset, SEEK_SET))
338 ds_put_format (zr->errs, _("%s: seek failed (%s)"),
339 file_name, strerror (errno));
346 if (! check_magic (file, file_name, MAGIC_EOCD, zr->errs))
353 if (! get_u16 (file, &disknum)
354 || ! get_u16 (file, &disknum)
356 || ! get_u16 (file, &n_members)
357 || ! get_u16 (file, &total_members)
359 || ! get_u32 (file, ¢ral_dir_length)
360 || ! get_u32 (file, ¢ral_dir_start))
367 if (0 != fseeko (file, central_dir_start, SEEK_SET))
369 ds_put_format (zr->errs, _("%s: seek failed (%s)"),
370 file_name, strerror (errno));
376 zr->file_name = xstrdup (file_name);
378 zr->entries = xcalloc (n_members, sizeof *zr->entries);
379 for (int i = 0; i < n_members; i++)
381 if (!zip_header_read_next (file, file_name,
382 &zr->entries[zr->n_entries], errs))
385 zip_reader_destroy (zr);
394 static struct zip_entry *
395 zip_entry_find (const struct zip_reader *zr, const char *member)
397 for (int i = 0; i < zr->n_entries; ++i)
399 struct zip_entry *ze = &zr->entries[i];
400 if (0 == strcmp (ze->name, member))
407 zip_reader_get_member_name(const struct zip_reader *zr, size_t idx)
409 return idx < zr->n_entries ? zr->entries[idx].name : NULL;
412 /* Returns true if ZR contains a member named MEMBER, false otherwise. */
414 zip_reader_contains_member (const struct zip_reader *zr, const char *member)
416 return zip_entry_find (zr, member) != NULL;
419 /* Return the member called MEMBER from the reader ZR */
421 zip_member_open (struct zip_reader *zr, const char *member)
423 struct zip_entry *ze = zip_entry_find (zr, member);
426 ds_put_format (zr->errs, _("%s: unknown member \"%s\""),
427 zr->file_name, member);
431 FILE *fp = fopen (zr->file_name, "rb");
434 ds_put_format (zr->errs, _("%s: open failed (%s)"),
435 zr->file_name, strerror (errno));
439 struct zip_member *zm = xmalloc (sizeof *zm);
440 zm->file_name = xstrdup (zr->file_name);
441 zm->member_name = xstrdup (member);
443 zm->offset = ze->offset;
444 zm->comp_size = ze->comp_size;
445 zm->ucomp_size = ze->ucomp_size;
446 zm->decompressor = NULL;
447 zm->bytes_unread = ze->ucomp_size;
448 zm->errmsgs = zr->errs;
451 if (0 != fseeko (zm->fp, zm->offset, SEEK_SET))
453 ds_put_format (zr->errs, _("%s: seek failed (%s)"),
454 ze->name, strerror (errno));
458 if (! check_magic (zm->fp, zr->file_name, MAGIC_LHDR, zr->errs))
461 uint16_t v, nlen, extra_len;
462 uint16_t gp, comp_type, time, date;
463 uint32_t ucomp_size, comp_size;
465 if (! get_u16 (zm->fp, &v)) goto error;
466 if (! get_u16 (zm->fp, &gp)) goto error;
467 if (! get_u16 (zm->fp, &comp_type)) goto error;
468 zm->decompressor = get_decompressor (comp_type);
469 if (! zm->decompressor) goto error;
470 if (! get_u16 (zm->fp, &time)) goto error;
471 if (! get_u16 (zm->fp, &date)) goto error;
472 if (! get_u32 (zm->fp, &crc)) goto error;
473 if (! get_u32 (zm->fp, &comp_size)) goto error;
475 if (! get_u32 (zm->fp, &ucomp_size)) goto error;
476 if (! get_u16 (zm->fp, &nlen)) goto error;
477 if (! get_u16 (zm->fp, &extra_len)) goto error;
479 char *name = xzalloc (nlen + 1);
480 if (! get_bytes (zm->fp, name, nlen))
485 if (strcmp (name, ze->name) != 0)
487 ds_put_format (zm->errmsgs,
488 _("%s: name mismatch between central directory (%s) "
489 "and local file header (%s)"),
490 zm->file_name, ze->name, name);
496 skip_bytes (zm->fp, extra_len);
498 if (!zm->decompressor->init (zm))
505 free (zm->file_name);
506 free (zm->member_name);
513 static bool probe_magic (FILE *fp, uint32_t magic, off_t start, off_t stop, off_t *off);
516 /* Search for something that looks like the End Of Central Directory in FP.
517 If found, the offset of the record will be placed in OFF.
518 Returns true if found false otherwise.
521 find_eocd (FILE *fp, off_t *off)
524 const uint32_t magic = MAGIC_EOCD;
527 /* The magic cannot be more than 22 bytes from the end of the file,
528 because that is the minimum length of the EndOfCentralDirectory
531 if (0 > fseeko (fp, -22, SEEK_END))
536 stop = start + sizeof (magic);
539 found = probe_magic (fp, magic, start, stop, off);
540 /* FIXME: For extra confidence lookup the directory start record here*/
543 stop = start + sizeof (magic);
553 Search FP for MAGIC starting at START and reaching until STOP.
554 Returns true iff MAGIC is found. False otherwise.
555 OFF receives the location of the magic.
558 probe_magic (FILE *fp, uint32_t magic, off_t start, off_t stop, off_t *off)
562 unsigned char seq[4];
565 if (0 > fseeko (fp, start, SEEK_SET))
570 for (i = 0; i < 4 ; ++i)
572 seq[i] = (magic >> i * 8) & 0xFF;
577 if (1 != fread (&byte, 1, 1, fp))
580 if (byte == seq[state])
587 *off = ftello (fp) - 4;
599 /* Null decompressor. */
602 stored_read (struct zip_member *zm, void *buf, size_t n)
604 return fread (buf, 1, n, zm->fp);
608 stored_init (struct zip_member *zm UNUSED)
614 stored_finish (struct zip_member *zm UNUSED)
616 /* Nothing required */
619 static const struct decompressor stored_decompressor =
620 {stored_init, stored_read, stored_finish};
622 /* Inflate decompressor. */
627 #define UCOMPSIZE 4096
633 unsigned char ucomp[UCOMPSIZE];
635 size_t ucomp_bytes_read;
637 /* Two bitfields as defined by RFC1950 */
642 inflate_finish (struct zip_member *zm)
644 struct inflator *inf = zm->aux;
646 inflateEnd (&inf->zss);
652 inflate_init (struct zip_member *zm)
655 struct inflator *inf = xzalloc (sizeof *inf);
658 uint16_t cmf = 0x8; /* Always 8 for inflate */
660 const uint16_t cinfo = 7; /* log_2(Window size) - 8 */
662 cmf |= cinfo << 4; /* Put cinfo into the high nibble */
664 /* make these into a 16 bit word */
665 inf->cmf_flg = (cmf << 8) | flg;
667 /* Set the check bits */
668 inf->cmf_flg += 31 - (inf->cmf_flg % 31);
669 assert (inf->cmf_flg % 31 == 0);
671 inf->zss.next_in = Z_NULL;
672 inf->zss.avail_in = 0;
673 inf->zss.zalloc = Z_NULL;
674 inf->zss.zfree = Z_NULL;
675 inf->zss.opaque = Z_NULL;
676 r = inflateInit (&inf->zss);
680 ds_put_format (zm->errmsgs,
681 _("%s: cannot initialize inflator (%s)"),
682 zm->file_name, zError (r));
692 inflate_read (struct zip_member *zm, void *buf, size_t n)
695 struct inflator *inf = zm->aux;
697 if (inf->zss.avail_in == 0)
705 inf->ucomp[1] = inf->cmf_flg ;
706 inf->ucomp[0] = inf->cmf_flg >> 8 ;
712 bytes_to_read = zm->comp_size - inf->ucomp_bytes_read;
714 if (bytes_to_read == 0)
717 if (bytes_to_read > UCOMPSIZE)
718 bytes_to_read = UCOMPSIZE;
720 bytes_read = fread (inf->ucomp + pad, 1, bytes_to_read - pad, zm->fp);
722 inf->ucomp_bytes_read += bytes_read;
724 inf->zss.avail_in = bytes_read + pad;
725 inf->zss.next_in = inf->ucomp;
727 inf->zss.avail_out = n;
728 inf->zss.next_out = buf;
730 r = inflate (&inf->zss, Z_NO_FLUSH);
733 return n - inf->zss.avail_out;
736 ds_put_format (zm->errmsgs, _("%s: error inflating \"%s\" (%s)"),
737 zm->file_name, zm->member_name, zError (r));
742 static const struct decompressor inflate_decompressor =
743 {inflate_init, inflate_read, inflate_finish};