1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2011, 2013, 2014 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
27 #include <libpspp/assertion.h>
28 #include <libpspp/compiler.h>
34 #include "zip-reader.h"
35 #include "zip-private.h"
38 #define _(msgid) gettext (msgid)
39 #define N_(msgid) (msgid)
43 char *file_name; /* File name. */
44 char *member_name; /* Member name. */
45 FILE *fp; /* The stream from which the data is read */
46 uint32_t offset; /* Starting offset in file. */
47 uint32_t comp_size; /* Length of member file data, in bytes. */
48 uint32_t ucomp_size; /* Uncompressed length of member file data, in bytes. */
49 const struct decompressor *decompressor;
51 size_t bytes_unread; /* Number of bytes left in the member available for reading */
52 struct string *errmsgs; /* A string to hold error messages.
53 This string is NOT owned by this object. */
59 bool (*init) (struct zip_member *);
60 int (*read) (struct zip_member *, void *, size_t);
61 void (*finish) (struct zip_member *);
63 static const struct decompressor stored_decompressor;
64 static const struct decompressor inflate_decompressor;
66 static bool find_eocd (FILE *fp, off_t *off);
68 static const struct decompressor *
69 get_decompressor (uint16_t c)
74 return &stored_decompressor;
77 return &inflate_decompressor;
86 char *file_name; /* The name of the file from which the data is read */
87 uint16_t n_entries; /* Number of directory entries. */
88 struct zip_entry *entries; /* Directory entries. */
89 struct string *errs; /* A string to hold error messages. This
90 string is NOT owned by this object. */
95 uint32_t offset; /* Starting offset in file. */
96 uint32_t comp_size; /* Length of member file data, in bytes. */
97 uint32_t ucomp_size; /* Uncompressed length of member file data, in bytes. */
98 char *name; /* Name of member file. */
102 zip_member_finish (struct zip_member *zm)
106 free (zm->file_name);
107 free (zm->member_name);
108 ds_clear (zm->errmsgs);
109 zm->decompressor->finish (zm);
115 /* Destroy the zip reader */
117 zip_reader_destroy (struct zip_reader *zr)
123 free (zr->file_name);
125 for (i = 0; i < zr->n_entries; ++i)
127 struct zip_entry *ze = &zr->entries[i];
135 /* Skip N bytes in F */
137 skip_bytes (FILE *f, size_t n)
139 fseeko (f, n, SEEK_CUR);
142 static bool get_bytes (FILE *f, void *x, size_t n) WARN_UNUSED_RESULT;
145 /* Read N bytes from F, storing the result in X */
147 get_bytes (FILE *f, void *x, size_t n)
149 return (n == fread (x, 1, n, f));
152 static bool get_u32 (FILE *f, uint32_t *v) WARN_UNUSED_RESULT;
155 /* Read a 32 bit value from F */
157 get_u32 (FILE *f, uint32_t *v)
160 if (!get_bytes (f, &x, sizeof x))
162 #ifdef WORDS_BIGENDIAN
170 static bool get_u16 (FILE *f, uint16_t *v) WARN_UNUSED_RESULT;
173 /* Read a 16 bit value from F */
175 get_u16 (FILE *f, uint16_t *v)
178 if (!get_bytes (f, &x, sizeof x))
180 #ifdef WORDS_BIGENDIAN
189 /* Read 32 bit integer and compare it with EXPECTED.
190 place an error string in ERR if necessary. */
192 check_magic (FILE *f, const char *file_name,
193 uint32_t expected, struct string *err)
197 if (! get_u32 (f, &magic)) return false;
199 if ((expected != magic))
202 _("%s: corrupt archive at 0x%llx: "
203 "expected %#"PRIx32" but got %#"PRIx32),
205 (long long int) ftello (f) - sizeof (uint32_t),
214 /* Reads upto BYTES bytes from ZM and puts them in BUF.
215 Returns the number of bytes read, or -1 on error */
217 zip_member_read (struct zip_member *zm, void *buf, size_t bytes)
221 ds_clear (zm->errmsgs);
223 if ( bytes > zm->bytes_unread)
224 bytes = zm->bytes_unread;
226 bytes_read = zm->decompressor->read (zm, buf, bytes);
230 zm->bytes_unread -= bytes_read;
235 /* Read all of ZM into memory, storing the data in *DATAP and its size in *NP.
236 Returns NULL if successful, otherwise an error string that the caller
237 must eventually free(). */
238 char * WARN_UNUSED_RESULT
239 zip_member_read_all (struct zip_reader *zr, const char *member_name,
240 void **datap, size_t *np)
242 struct zip_member *zm = zip_member_open (zr, member_name);
247 return ds_steal_cstr (zr->errs);
250 *datap = xmalloc (zm->ucomp_size);
251 *np = zm->ucomp_size;
253 uint8_t *data = *datap;
254 while (zm->bytes_unread)
255 if (zip_member_read (zm, data + (zm->ucomp_size - zm->bytes_unread),
256 zm->bytes_unread) == -1)
258 zip_member_finish (zm);
262 return ds_steal_cstr (zr->errs);
265 zip_member_finish (zm);
269 /* Read a central directory header from FILE and initializes ZE with it.
270 Returns true if successful, false otherwise. On error, appends error
273 zip_header_read_next (FILE *file, const char *file_name,
274 struct zip_entry *ze, struct string *errs)
276 uint16_t v, nlen, extralen;
277 uint16_t gp, time, date;
278 uint32_t expected_crc;
280 uint16_t clen, diskstart, iattr;
284 if ( ! check_magic (file, file_name, MAGIC_SOCD, errs))
287 if (! get_u16 (file, &v)) return false;
288 if (! get_u16 (file, &v)) return false;
289 if (! get_u16 (file, &gp)) return false;
290 if (! get_u16 (file, &comp_type)) return false;
291 if (! get_u16 (file, &time)) return false;
292 if (! get_u16 (file, &date)) return false;
293 if (! get_u32 (file, &expected_crc)) return false;
294 if (! get_u32 (file, &ze->comp_size)) return false;
295 if (! get_u32 (file, &ze->ucomp_size)) return false;
296 if (! get_u16 (file, &nlen)) return false;
297 if (! get_u16 (file, &extralen)) return false;
298 if (! get_u16 (file, &clen)) return false;
299 if (! get_u16 (file, &diskstart)) return false;
300 if (! get_u16 (file, &iattr)) return false;
301 if (! get_u32 (file, &eattr)) return false;
302 if (! get_u32 (file, &ze->offset)) return false;
304 ze->name = xzalloc (nlen + 1);
305 if (! get_bytes (file, ze->name, nlen)) return false;
307 skip_bytes (file, extralen);
313 /* Create a reader from the zip called FILE_NAME */
315 zip_reader_create (const char *file_name, struct string *errs)
317 uint16_t disknum, n_members, total_members;
319 uint32_t central_dir_start, central_dir_length;
321 struct zip_reader *zr = xzalloc (sizeof *zr);
324 ds_init_empty (zr->errs);
326 FILE *file = fopen (file_name, "rb");
329 ds_put_format (zr->errs, _("%s: open failed (%s)"),
330 file_name, strerror (errno));
335 if ( ! check_magic (file, file_name, MAGIC_LHDR, zr->errs))
342 if ( ! find_eocd (file, &offset))
344 ds_put_format (zr->errs, _("%s: cannot find central directory"),
351 if ( 0 != fseeko (file, offset, SEEK_SET))
353 ds_put_format (zr->errs, _("%s: seek failed (%s)"),
354 file_name, strerror (errno));
361 if ( ! check_magic (file, file_name, MAGIC_EOCD, zr->errs))
368 if (! get_u16 (file, &disknum)
369 || ! get_u16 (file, &disknum)
371 || ! get_u16 (file, &n_members)
372 || ! get_u16 (file, &total_members)
374 || ! get_u32 (file, ¢ral_dir_length)
375 || ! get_u32 (file, ¢ral_dir_start))
382 if ( 0 != fseeko (file, central_dir_start, SEEK_SET))
384 ds_put_format (zr->errs, _("%s: seek failed (%s)"),
385 file_name, strerror (errno));
391 zr->file_name = xstrdup (file_name);
393 zr->entries = xcalloc (n_members, sizeof *zr->entries);
394 for (int i = 0; i < n_members; i++)
396 if (!zip_header_read_next (file, file_name,
397 &zr->entries[zr->n_entries], errs))
400 zip_reader_destroy (zr);
409 static struct zip_entry *
410 zip_entry_find (const struct zip_reader *zr, const char *member)
412 for (int i = 0; i < zr->n_entries; ++i)
414 struct zip_entry *ze = &zr->entries[i];
415 if (0 == strcmp (ze->name, member))
422 zip_reader_get_member_name(const struct zip_reader *zr, size_t idx)
424 return idx < zr->n_entries ? zr->entries[idx].name : NULL;
427 /* Returns true if ZR contains a member named MEMBER, false otherwise. */
429 zip_reader_contains_member (const struct zip_reader *zr, const char *member)
431 return zip_entry_find (zr, member) != NULL;
434 /* Return the member called MEMBER from the reader ZR */
436 zip_member_open (struct zip_reader *zr, const char *member)
438 struct zip_entry *ze = zip_entry_find (zr, member);
441 ds_put_format (zr->errs, _("%s: unknown member \"%s\""),
442 zr->file_name, member);
446 FILE *fp = fopen (zr->file_name, "rb");
449 ds_put_format (zr->errs, _("%s: open failed (%s)"),
450 zr->file_name, strerror (errno));
454 struct zip_member *zm = xmalloc (sizeof *zm);
455 zm->file_name = xstrdup (zr->file_name);
456 zm->member_name = xstrdup (member);
458 zm->offset = ze->offset;
459 zm->comp_size = ze->comp_size;
460 zm->ucomp_size = ze->ucomp_size;
461 zm->decompressor = NULL;
462 zm->bytes_unread = ze->ucomp_size;
463 zm->errmsgs = zr->errs;
466 if ( 0 != fseeko (zm->fp, zm->offset, SEEK_SET))
468 ds_put_format (zr->errs, _("%s: seek failed (%s)"),
469 ze->name, strerror (errno));
473 if ( ! check_magic (zm->fp, zr->file_name, MAGIC_LHDR, zr->errs))
476 uint16_t v, nlen, extra_len;
477 uint16_t gp, comp_type, time, date;
478 uint32_t ucomp_size, comp_size;
480 if (! get_u16 (zm->fp, &v)) goto error;
481 if (! get_u16 (zm->fp, &gp)) goto error;
482 if (! get_u16 (zm->fp, &comp_type)) goto error;
483 zm->decompressor = get_decompressor (comp_type);
484 if (! zm->decompressor) goto error;
485 if (! get_u16 (zm->fp, &time)) goto error;
486 if (! get_u16 (zm->fp, &date)) goto error;
487 if (! get_u32 (zm->fp, &crc)) goto error;
488 if (! get_u32 (zm->fp, &comp_size)) goto error;
490 if (! get_u32 (zm->fp, &ucomp_size)) goto error;
491 if (! get_u16 (zm->fp, &nlen)) goto error;
492 if (! get_u16 (zm->fp, &extra_len)) goto error;
494 char *name = xzalloc (nlen + 1);
495 if (! get_bytes (zm->fp, name, nlen))
500 if (strcmp (name, ze->name) != 0)
502 ds_put_format (zm->errmsgs,
503 _("%s: name mismatch betwen central directory (%s) "
504 "and local file header (%s)"),
505 zm->file_name, ze->name, name);
511 skip_bytes (zm->fp, extra_len);
513 if (!zm->decompressor->init (zm) )
520 free (zm->file_name);
521 free (zm->member_name);
528 static bool probe_magic (FILE *fp, uint32_t magic, off_t start, off_t stop, off_t *off);
531 /* Search for something that looks like the End Of Central Directory in FP.
532 If found, the offset of the record will be placed in OFF.
533 Returns true if found false otherwise.
536 find_eocd (FILE *fp, off_t *off)
539 const uint32_t magic = MAGIC_EOCD;
542 /* The magic cannot be more than 22 bytes from the end of the file,
543 because that is the minimum length of the EndOfCentralDirectory
546 if ( 0 > fseeko (fp, -22, SEEK_END))
551 stop = start + sizeof (magic);
554 found = probe_magic (fp, magic, start, stop, off);
555 /* FIXME: For extra confidence lookup the directory start record here*/
558 stop = start + sizeof (magic);
568 Search FP for MAGIC starting at START and reaching until STOP.
569 Returns true iff MAGIC is found. False otherwise.
570 OFF receives the location of the magic.
573 probe_magic (FILE *fp, uint32_t magic, off_t start, off_t stop, off_t *off)
577 unsigned char seq[4];
580 if ( 0 > fseeko (fp, start, SEEK_SET))
585 for (i = 0; i < 4 ; ++i)
587 seq[i] = (magic >> i * 8) & 0xFF;
592 if (1 != fread (&byte, 1, 1, fp))
595 if ( byte == seq[state])
602 *off = ftello (fp) - 4;
614 /* Null decompressor. */
617 stored_read (struct zip_member *zm, void *buf, size_t n)
619 return fread (buf, 1, n, zm->fp);
623 stored_init (struct zip_member *zm UNUSED)
629 stored_finish (struct zip_member *zm UNUSED)
631 /* Nothing required */
634 static const struct decompressor stored_decompressor =
635 {stored_init, stored_read, stored_finish};
637 /* Inflate decompressor. */
642 #define UCOMPSIZE 4096
648 unsigned char ucomp[UCOMPSIZE];
650 size_t ucomp_bytes_read;
652 /* Two bitfields as defined by RFC1950 */
657 inflate_finish (struct zip_member *zm)
659 struct inflator *inf = zm->aux;
661 inflateEnd (&inf->zss);
667 inflate_init (struct zip_member *zm)
670 struct inflator *inf = xzalloc (sizeof *inf);
673 uint16_t cmf = 0x8; /* Always 8 for inflate */
675 const uint16_t cinfo = 7; /* log_2(Window size) - 8 */
677 cmf |= cinfo << 4; /* Put cinfo into the high nibble */
679 /* make these into a 16 bit word */
680 inf->cmf_flg = (cmf << 8 ) | flg;
682 /* Set the check bits */
683 inf->cmf_flg += 31 - (inf->cmf_flg % 31);
684 assert (inf->cmf_flg % 31 == 0);
686 inf->zss.next_in = Z_NULL;
687 inf->zss.avail_in = 0;
688 inf->zss.zalloc = Z_NULL;
689 inf->zss.zfree = Z_NULL;
690 inf->zss.opaque = Z_NULL;
691 r = inflateInit (&inf->zss);
695 ds_put_format (zm->errmsgs,
696 _("%s: cannot initialize inflator (%s)"),
697 zm->file_name, zError (r));
707 inflate_read (struct zip_member *zm, void *buf, size_t n)
710 struct inflator *inf = zm->aux;
712 if (inf->zss.avail_in == 0)
718 if ( inf->state == 0)
720 inf->ucomp[1] = inf->cmf_flg ;
721 inf->ucomp[0] = inf->cmf_flg >> 8 ;
727 bytes_to_read = zm->comp_size - inf->ucomp_bytes_read;
729 if (bytes_to_read == 0)
732 if (bytes_to_read > UCOMPSIZE)
733 bytes_to_read = UCOMPSIZE;
735 bytes_read = fread (inf->ucomp + pad, 1, bytes_to_read - pad, zm->fp);
737 inf->ucomp_bytes_read += bytes_read;
739 inf->zss.avail_in = bytes_read + pad;
740 inf->zss.next_in = inf->ucomp;
742 inf->zss.avail_out = n;
743 inf->zss.next_out = buf;
745 r = inflate (&inf->zss, Z_NO_FLUSH);
748 return n - inf->zss.avail_out;
751 ds_put_format (zm->errmsgs, _("%s: error inflating \"%s\" (%s)"),
752 zm->file_name, zm->member_name, zError (r));
757 static const struct decompressor inflate_decompressor =
758 {inflate_init, inflate_read, inflate_finish};