1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2011, 2013, 2014 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
27 #include <libpspp/assertion.h>
28 #include <libpspp/compiler.h>
34 #include "zip-reader.h"
35 #include "zip-private.h"
38 #define _(msgid) gettext (msgid)
39 #define N_(msgid) (msgid)
43 char *file_name; /* File name. */
44 char *member_name; /* Member name. */
45 FILE *fp; /* The stream from which the data is read */
46 uint32_t offset; /* Starting offset in file. */
47 uint32_t comp_size; /* Length of member file data, in bytes. */
48 uint32_t ucomp_size; /* Uncompressed length of member file data, in bytes. */
49 const struct decompressor *decompressor;
51 size_t bytes_unread; /* Number of bytes left in the member available for reading */
52 struct string *errmsgs; /* A string to hold error messages.
53 This string is NOT owned by this object. */
59 bool (*init) (struct zip_member *);
60 int (*read) (struct zip_member *, void *, size_t);
61 void (*finish) (struct zip_member *);
63 static const struct decompressor stored_decompressor;
64 static const struct decompressor inflate_decompressor;
66 static bool find_eocd (FILE *fp, off_t *off);
68 static const struct decompressor *
69 get_decompressor (uint16_t c)
74 return &stored_decompressor;
77 return &inflate_decompressor;
86 char *file_name; /* The name of the file from which the data is read */
87 uint16_t n_entries; /* Number of directory entries. */
88 struct zip_entry *entries; /* Directory entries. */
89 struct string *errs; /* A string to hold error messages. This
90 string is NOT owned by this object. */
95 uint32_t offset; /* Starting offset in file. */
96 uint32_t comp_size; /* Length of member file data, in bytes. */
97 uint32_t ucomp_size; /* Uncompressed length of member file data, in bytes. */
98 char *name; /* Name of member file. */
102 zip_member_finish (struct zip_member *zm)
106 free (zm->file_name);
107 free (zm->member_name);
108 ds_clear (zm->errmsgs);
109 zm->decompressor->finish (zm);
115 /* Destroy the zip reader */
117 zip_reader_destroy (struct zip_reader *zr)
123 free (zr->file_name);
125 for (i = 0; i < zr->n_entries; ++i)
127 struct zip_entry *ze = &zr->entries[i];
135 /* Skip N bytes in F */
137 skip_bytes (FILE *f, size_t n)
139 fseeko (f, n, SEEK_CUR);
142 static bool get_bytes (FILE *f, void *x, size_t n) WARN_UNUSED_RESULT;
145 /* Read N bytes from F, storing the result in X */
147 get_bytes (FILE *f, void *x, size_t n)
149 return (n == fread (x, 1, n, f));
152 static bool get_u32 (FILE *f, uint32_t *v) WARN_UNUSED_RESULT;
155 /* Read a 32 bit value from F */
157 get_u32 (FILE *f, uint32_t *v)
160 if (!get_bytes (f, &x, sizeof x))
162 #ifdef WORDS_BIGENDIAN
170 static bool get_u16 (FILE *f, uint16_t *v) WARN_UNUSED_RESULT;
173 /* Read a 16 bit value from F */
175 get_u16 (FILE *f, uint16_t *v)
178 if (!get_bytes (f, &x, sizeof x))
180 #ifdef WORDS_BIGENDIAN
189 /* Read 32 bit integer and compare it with EXPECTED.
190 place an error string in ERR if necessary. */
192 check_magic (FILE *f, const char *file_name,
193 uint32_t expected, struct string *err)
197 if (! get_u32 (f, &magic)) return false;
199 if ((expected != magic))
202 _("%s: corrupt archive at 0x%llx: "
203 "expected %#"PRIx32" but got %#"PRIx32),
205 (long long int) ftello (f) - sizeof (uint32_t),
214 /* Reads upto BYTES bytes from ZM and puts them in BUF.
215 Returns the number of bytes read, or -1 on error */
217 zip_member_read (struct zip_member *zm, void *buf, size_t bytes)
221 ds_clear (zm->errmsgs);
223 if ( bytes > zm->bytes_unread)
224 bytes = zm->bytes_unread;
226 bytes_read = zm->decompressor->read (zm, buf, bytes);
230 zm->bytes_unread -= bytes_read;
236 /* Read a central directory header from FILE and initializes ZE with it.
237 Returns true if successful, false otherwise. On error, appends error
240 zip_header_read_next (FILE *file, const char *file_name,
241 struct zip_entry *ze, struct string *errs)
243 uint16_t v, nlen, extralen;
244 uint16_t gp, time, date;
245 uint32_t expected_crc;
247 uint16_t clen, diskstart, iattr;
251 if ( ! check_magic (file, file_name, MAGIC_SOCD, errs))
254 if (! get_u16 (file, &v)) return false;
255 if (! get_u16 (file, &v)) return false;
256 if (! get_u16 (file, &gp)) return false;
257 if (! get_u16 (file, &comp_type)) return false;
258 if (! get_u16 (file, &time)) return false;
259 if (! get_u16 (file, &date)) return false;
260 if (! get_u32 (file, &expected_crc)) return false;
261 if (! get_u32 (file, &ze->comp_size)) return false;
262 if (! get_u32 (file, &ze->ucomp_size)) return false;
263 if (! get_u16 (file, &nlen)) return false;
264 if (! get_u16 (file, &extralen)) return false;
265 if (! get_u16 (file, &clen)) return false;
266 if (! get_u16 (file, &diskstart)) return false;
267 if (! get_u16 (file, &iattr)) return false;
268 if (! get_u32 (file, &eattr)) return false;
269 if (! get_u32 (file, &ze->offset)) return false;
271 ze->name = xzalloc (nlen + 1);
272 if (! get_bytes (file, ze->name, nlen)) return false;
274 skip_bytes (file, extralen);
280 /* Create a reader from the zip called FILE_NAME */
282 zip_reader_create (const char *file_name, struct string *errs)
284 uint16_t disknum, n_members, total_members;
286 uint32_t central_dir_start, central_dir_length;
288 struct zip_reader *zr = xzalloc (sizeof *zr);
291 ds_init_empty (zr->errs);
293 FILE *file = fopen (file_name, "rb");
296 ds_put_format (zr->errs, _("%s: open failed (%s)"),
297 file_name, strerror (errno));
302 if ( ! check_magic (file, file_name, MAGIC_LHDR, zr->errs))
309 if ( ! find_eocd (file, &offset))
311 ds_put_format (zr->errs, _("%s: cannot find central directory"),
318 if ( 0 != fseeko (file, offset, SEEK_SET))
320 ds_put_format (zr->errs, _("%s: seek failed (%s)"),
321 file_name, strerror (errno));
328 if ( ! check_magic (file, file_name, MAGIC_EOCD, zr->errs))
335 if (! get_u16 (file, &disknum)
336 || ! get_u16 (file, &disknum)
338 || ! get_u16 (file, &n_members)
339 || ! get_u16 (file, &total_members)
341 || ! get_u32 (file, ¢ral_dir_length)
342 || ! get_u32 (file, ¢ral_dir_start))
349 if ( 0 != fseeko (file, central_dir_start, SEEK_SET))
351 ds_put_format (zr->errs, _("%s: seek failed (%s)"),
352 file_name, strerror (errno));
358 zr->file_name = xstrdup (file_name);
360 zr->entries = xcalloc (n_members, sizeof *zr->entries);
361 for (int i = 0; i < n_members; i++)
363 if (!zip_header_read_next (file, file_name,
364 &zr->entries[zr->n_entries], errs))
367 zip_reader_destroy (zr);
376 static struct zip_entry *
377 zip_entry_find (struct zip_reader *zr, const char *member)
379 for (int i = 0; i < zr->n_entries; ++i)
381 struct zip_entry *ze = &zr->entries[i];
382 if (0 == strcmp (ze->name, member))
389 zip_reader_get_member_name(const struct zip_reader *zr, size_t idx)
391 return idx < zr->n_entries ? zr->entries[idx].name : NULL;
394 /* Return the member called MEMBER from the reader ZR */
396 zip_member_open (struct zip_reader *zr, const char *member)
398 struct zip_entry *ze = zip_entry_find (zr, member);
401 ds_put_format (zr->errs, _("%s: unknown member \"%s\""),
402 zr->file_name, member);
406 FILE *fp = fopen (zr->file_name, "rb");
409 ds_put_format (zr->errs, _("%s: open failed (%s)"),
410 zr->file_name, strerror (errno));
414 struct zip_member *zm = xmalloc (sizeof *zm);
415 zm->file_name = xstrdup (zr->file_name);
416 zm->member_name = xstrdup (member);
418 zm->offset = ze->offset;
419 zm->comp_size = ze->comp_size;
420 zm->ucomp_size = ze->ucomp_size;
421 zm->decompressor = NULL;
422 zm->bytes_unread = ze->ucomp_size;
423 zm->errmsgs = zr->errs;
426 if ( 0 != fseeko (zm->fp, zm->offset, SEEK_SET))
428 ds_put_format (zr->errs, _("%s: seek failed (%s)"),
429 ze->name, strerror (errno));
433 if ( ! check_magic (zm->fp, zr->file_name, MAGIC_LHDR, zr->errs))
436 uint16_t v, nlen, extra_len;
437 uint16_t gp, comp_type, time, date;
438 uint32_t ucomp_size, comp_size;
440 if (! get_u16 (zm->fp, &v)) goto error;
441 if (! get_u16 (zm->fp, &gp)) goto error;
442 if (! get_u16 (zm->fp, &comp_type)) goto error;
443 zm->decompressor = get_decompressor (comp_type);
444 if (! zm->decompressor) goto error;
445 if (! get_u16 (zm->fp, &time)) goto error;
446 if (! get_u16 (zm->fp, &date)) goto error;
447 if (! get_u32 (zm->fp, &crc)) goto error;
448 if (! get_u32 (zm->fp, &comp_size)) goto error;
450 if (! get_u32 (zm->fp, &ucomp_size)) goto error;
451 if (! get_u16 (zm->fp, &nlen)) goto error;
452 if (! get_u16 (zm->fp, &extra_len)) goto error;
454 char *name = xzalloc (nlen + 1);
455 if (! get_bytes (zm->fp, name, nlen))
460 if (strcmp (name, ze->name) != 0)
462 ds_put_format (zm->errmsgs,
463 _("%s: name mismatch betwen central directory (%s) "
464 "and local file header (%s)"),
465 zm->file_name, ze->name, name);
471 skip_bytes (zm->fp, extra_len);
473 if (!zm->decompressor->init (zm) )
480 free (zm->file_name);
481 free (zm->member_name);
488 static bool probe_magic (FILE *fp, uint32_t magic, off_t start, off_t stop, off_t *off);
491 /* Search for something that looks like the End Of Central Directory in FP.
492 If found, the offset of the record will be placed in OFF.
493 Returns true if found false otherwise.
496 find_eocd (FILE *fp, off_t *off)
499 const uint32_t magic = MAGIC_EOCD;
502 /* The magic cannot be more than 22 bytes from the end of the file,
503 because that is the minimum length of the EndOfCentralDirectory
506 if ( 0 > fseeko (fp, -22, SEEK_END))
511 stop = start + sizeof (magic);
514 found = probe_magic (fp, magic, start, stop, off);
515 /* FIXME: For extra confidence lookup the directory start record here*/
518 stop = start + sizeof (magic);
528 Search FP for MAGIC starting at START and reaching until STOP.
529 Returns true iff MAGIC is found. False otherwise.
530 OFF receives the location of the magic.
533 probe_magic (FILE *fp, uint32_t magic, off_t start, off_t stop, off_t *off)
537 unsigned char seq[4];
540 if ( 0 > fseeko (fp, start, SEEK_SET))
545 for (i = 0; i < 4 ; ++i)
547 seq[i] = (magic >> i * 8) & 0xFF;
552 if (1 != fread (&byte, 1, 1, fp))
555 if ( byte == seq[state])
562 *off = ftello (fp) - 4;
574 /* Null decompressor. */
577 stored_read (struct zip_member *zm, void *buf, size_t n)
579 return fread (buf, 1, n, zm->fp);
583 stored_init (struct zip_member *zm UNUSED)
589 stored_finish (struct zip_member *zm UNUSED)
591 /* Nothing required */
594 static const struct decompressor stored_decompressor =
595 {stored_init, stored_read, stored_finish};
597 /* Inflate decompressor. */
602 #define UCOMPSIZE 4096
608 unsigned char ucomp[UCOMPSIZE];
610 size_t ucomp_bytes_read;
612 /* Two bitfields as defined by RFC1950 */
617 inflate_finish (struct zip_member *zm)
619 struct inflator *inf = zm->aux;
621 inflateEnd (&inf->zss);
627 inflate_init (struct zip_member *zm)
630 struct inflator *inf = xzalloc (sizeof *inf);
633 uint16_t cmf = 0x8; /* Always 8 for inflate */
635 const uint16_t cinfo = 7; /* log_2(Window size) - 8 */
637 cmf |= cinfo << 4; /* Put cinfo into the high nibble */
639 /* make these into a 16 bit word */
640 inf->cmf_flg = (cmf << 8 ) | flg;
642 /* Set the check bits */
643 inf->cmf_flg += 31 - (inf->cmf_flg % 31);
644 assert (inf->cmf_flg % 31 == 0);
646 inf->zss.next_in = Z_NULL;
647 inf->zss.avail_in = 0;
648 inf->zss.zalloc = Z_NULL;
649 inf->zss.zfree = Z_NULL;
650 inf->zss.opaque = Z_NULL;
651 r = inflateInit (&inf->zss);
655 ds_put_format (zm->errmsgs,
656 _("%s: cannot initialize inflator (%s)"),
657 zm->file_name, zError (r));
667 inflate_read (struct zip_member *zm, void *buf, size_t n)
670 struct inflator *inf = zm->aux;
672 if (inf->zss.avail_in == 0)
678 if ( inf->state == 0)
680 inf->ucomp[1] = inf->cmf_flg ;
681 inf->ucomp[0] = inf->cmf_flg >> 8 ;
687 bytes_to_read = zm->comp_size - inf->ucomp_bytes_read;
689 if (bytes_to_read == 0)
692 if (bytes_to_read > UCOMPSIZE)
693 bytes_to_read = UCOMPSIZE;
695 bytes_read = fread (inf->ucomp + pad, 1, bytes_to_read - pad, zm->fp);
697 inf->ucomp_bytes_read += bytes_read;
699 inf->zss.avail_in = bytes_read + pad;
700 inf->zss.next_in = inf->ucomp;
702 inf->zss.avail_out = n;
703 inf->zss.next_out = buf;
705 r = inflate (&inf->zss, Z_NO_FLUSH);
708 return n - inf->zss.avail_out;
711 ds_put_format (zm->errmsgs, _("%s: error inflating \"%s\" (%s)"),
712 zm->file_name, zm->member_name, zError (r));
717 static const struct decompressor inflate_decompressor =
718 {inflate_init, inflate_read, inflate_finish};