1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2011, 2013, 2014 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
27 #include <libpspp/assertion.h>
28 #include <libpspp/compiler.h>
34 #include "zip-reader.h"
35 #include "zip-private.h"
38 #define _(msgid) gettext (msgid)
39 #define N_(msgid) (msgid)
43 char *file_name; /* File name. */
44 char *member_name; /* Member name. */
45 FILE *fp; /* The stream from which the data is read */
46 uint32_t offset; /* Starting offset in file. */
47 uint32_t comp_size; /* Length of member file data, in bytes. */
48 uint32_t ucomp_size; /* Uncompressed length of member file data, in bytes. */
49 const struct decompressor *decompressor;
51 size_t bytes_unread; /* Number of bytes left in the member available for reading */
52 struct string *errmsgs; /* A string to hold error messages.
53 This string is NOT owned by this object. */
59 bool (*init) (struct zip_member *);
60 int (*read) (struct zip_member *, void *, size_t);
61 void (*finish) (struct zip_member *);
63 static const struct decompressor stored_decompressor;
64 static const struct decompressor inflate_decompressor;
66 static bool find_eocd (FILE *fp, off_t *off);
68 static const struct decompressor *
69 get_decompressor (uint16_t c)
74 return &stored_decompressor;
77 return &inflate_decompressor;
86 char *file_name; /* The name of the file from which the data is read */
87 uint16_t n_entries; /* Number of directory entries. */
88 struct zip_entry *entries; /* Directory entries. */
89 struct string *errs; /* A string to hold error messages. This
90 string is NOT owned by this object. */
95 uint32_t offset; /* Starting offset in file. */
96 uint32_t comp_size; /* Length of member file data, in bytes. */
97 uint32_t ucomp_size; /* Uncompressed length of member file data, in bytes. */
98 char *name; /* Name of member file. */
102 zip_member_finish (struct zip_member *zm)
106 free (zm->file_name);
107 free (zm->member_name);
108 ds_clear (zm->errmsgs);
109 zm->decompressor->finish (zm);
115 /* Destroy the zip reader */
117 zip_reader_destroy (struct zip_reader *zr)
123 free (zr->file_name);
125 for (i = 0; i < zr->n_entries; ++i)
127 struct zip_entry *ze = &zr->entries[i];
135 /* Skip N bytes in F */
137 skip_bytes (FILE *f, size_t n)
139 fseeko (f, n, SEEK_CUR);
142 static bool get_bytes (FILE *f, void *x, size_t n) WARN_UNUSED_RESULT;
145 /* Read N bytes from F, storing the result in X */
147 get_bytes (FILE *f, void *x, size_t n)
149 return (n == fread (x, 1, n, f));
152 static bool get_u32 (FILE *f, uint32_t *v) WARN_UNUSED_RESULT;
155 /* Read a 32 bit value from F */
157 get_u32 (FILE *f, uint32_t *v)
160 if (!get_bytes (f, &x, sizeof x))
162 #ifdef WORDS_BIGENDIAN
170 static bool get_u16 (FILE *f, uint16_t *v) WARN_UNUSED_RESULT;
173 /* Read a 16 bit value from F */
175 get_u16 (FILE *f, uint16_t *v)
178 if (!get_bytes (f, &x, sizeof x))
180 #ifdef WORDS_BIGENDIAN
189 /* Read 32 bit integer and compare it with EXPECTED.
190 place an error string in ERR if necessary. */
192 check_magic (FILE *f, const char *file_name,
193 uint32_t expected, struct string *err)
197 if (! get_u32 (f, &magic)) return false;
199 if ((expected != magic))
202 _("%s: corrupt archive at 0x%llx: "
203 "expected %#"PRIx32" but got %#"PRIx32),
205 (long long int) ftello (f) - sizeof (uint32_t),
214 /* Reads upto BYTES bytes from ZM and puts them in BUF.
215 Returns the number of bytes read, or -1 on error */
217 zip_member_read (struct zip_member *zm, void *buf, size_t bytes)
221 ds_clear (zm->errmsgs);
223 if ( bytes > zm->bytes_unread)
224 bytes = zm->bytes_unread;
226 bytes_read = zm->decompressor->read (zm, buf, bytes);
230 zm->bytes_unread -= bytes_read;
235 /* Read all of ZM into memory, storing the data in *DATAP and its size in *NP.
236 Returns NULL if successful, otherwise an error string that the caller
237 must eventually free(). */
238 char * WARN_UNUSED_RESULT
239 zip_member_read_all (struct zip_reader *zr, const char *member_name,
240 void **datap, size_t *np)
242 struct zip_member *zm = zip_member_open (zr, member_name);
247 return ds_steal_cstr (zr->errs);
250 *datap = xmalloc (zm->ucomp_size);
251 *np = zm->ucomp_size;
253 uint8_t *data = *datap;
254 while (zm->bytes_unread)
255 if (zip_member_read (zm, data + (zm->ucomp_size - zm->bytes_unread),
256 zm->bytes_unread) == -1)
258 zip_member_finish (zm);
262 return ds_steal_cstr (zr->errs);
265 zip_member_finish (zm);
269 /* Read a central directory header from FILE and initializes ZE with it.
270 Returns true if successful, false otherwise. On error, appends error
273 zip_header_read_next (FILE *file, const char *file_name,
274 struct zip_entry *ze, struct string *errs)
276 uint16_t v, nlen, extralen;
277 uint16_t gp, time, date;
278 uint32_t expected_crc;
280 uint16_t clen, diskstart, iattr;
284 if ( ! check_magic (file, file_name, MAGIC_SOCD, errs))
287 if (! get_u16 (file, &v)) return false;
288 if (! get_u16 (file, &v)) return false;
289 if (! get_u16 (file, &gp)) return false;
290 if (! get_u16 (file, &comp_type)) return false;
291 if (! get_u16 (file, &time)) return false;
292 if (! get_u16 (file, &date)) return false;
293 if (! get_u32 (file, &expected_crc)) return false;
294 if (! get_u32 (file, &ze->comp_size)) return false;
295 if (! get_u32 (file, &ze->ucomp_size)) return false;
296 if (! get_u16 (file, &nlen)) return false;
297 if (! get_u16 (file, &extralen)) return false;
298 if (! get_u16 (file, &clen)) return false;
299 if (! get_u16 (file, &diskstart)) return false;
300 if (! get_u16 (file, &iattr)) return false;
301 if (! get_u32 (file, &eattr)) return false;
302 if (! get_u32 (file, &ze->offset)) return false;
304 ze->name = xzalloc (nlen + 1);
305 if (! get_bytes (file, ze->name, nlen)) return false;
307 skip_bytes (file, extralen);
313 /* Create a reader from the zip called FILE_NAME */
315 zip_reader_create (const char *file_name, struct string *errs)
317 uint16_t disknum, n_members, total_members;
319 uint32_t central_dir_start, central_dir_length;
321 struct zip_reader *zr = xzalloc (sizeof *zr);
324 ds_init_empty (zr->errs);
326 FILE *file = fopen (file_name, "rb");
329 ds_put_format (zr->errs, _("%s: open failed (%s)"),
330 file_name, strerror (errno));
335 if ( ! check_magic (file, file_name, MAGIC_LHDR, zr->errs))
342 if ( ! find_eocd (file, &offset))
344 ds_put_format (zr->errs, _("%s: cannot find central directory"),
351 if ( 0 != fseeko (file, offset, SEEK_SET))
353 ds_put_format (zr->errs, _("%s: seek failed (%s)"),
354 file_name, strerror (errno));
361 if ( ! check_magic (file, file_name, MAGIC_EOCD, zr->errs))
368 if (! get_u16 (file, &disknum)
369 || ! get_u16 (file, &disknum)
371 || ! get_u16 (file, &n_members)
372 || ! get_u16 (file, &total_members)
374 || ! get_u32 (file, ¢ral_dir_length)
375 || ! get_u32 (file, ¢ral_dir_start))
382 if ( 0 != fseeko (file, central_dir_start, SEEK_SET))
384 ds_put_format (zr->errs, _("%s: seek failed (%s)"),
385 file_name, strerror (errno));
391 zr->file_name = xstrdup (file_name);
393 zr->entries = xcalloc (n_members, sizeof *zr->entries);
394 for (int i = 0; i < n_members; i++)
396 if (!zip_header_read_next (file, file_name,
397 &zr->entries[zr->n_entries], errs))
400 zip_reader_destroy (zr);
409 static struct zip_entry *
410 zip_entry_find (struct zip_reader *zr, const char *member)
412 for (int i = 0; i < zr->n_entries; ++i)
414 struct zip_entry *ze = &zr->entries[i];
415 if (0 == strcmp (ze->name, member))
422 zip_reader_get_member_name(const struct zip_reader *zr, size_t idx)
424 return idx < zr->n_entries ? zr->entries[idx].name : NULL;
427 /* Return the member called MEMBER from the reader ZR */
429 zip_member_open (struct zip_reader *zr, const char *member)
431 struct zip_entry *ze = zip_entry_find (zr, member);
434 ds_put_format (zr->errs, _("%s: unknown member \"%s\""),
435 zr->file_name, member);
439 FILE *fp = fopen (zr->file_name, "rb");
442 ds_put_format (zr->errs, _("%s: open failed (%s)"),
443 zr->file_name, strerror (errno));
447 struct zip_member *zm = xmalloc (sizeof *zm);
448 zm->file_name = xstrdup (zr->file_name);
449 zm->member_name = xstrdup (member);
451 zm->offset = ze->offset;
452 zm->comp_size = ze->comp_size;
453 zm->ucomp_size = ze->ucomp_size;
454 zm->decompressor = NULL;
455 zm->bytes_unread = ze->ucomp_size;
456 zm->errmsgs = zr->errs;
459 if ( 0 != fseeko (zm->fp, zm->offset, SEEK_SET))
461 ds_put_format (zr->errs, _("%s: seek failed (%s)"),
462 ze->name, strerror (errno));
466 if ( ! check_magic (zm->fp, zr->file_name, MAGIC_LHDR, zr->errs))
469 uint16_t v, nlen, extra_len;
470 uint16_t gp, comp_type, time, date;
471 uint32_t ucomp_size, comp_size;
473 if (! get_u16 (zm->fp, &v)) goto error;
474 if (! get_u16 (zm->fp, &gp)) goto error;
475 if (! get_u16 (zm->fp, &comp_type)) goto error;
476 zm->decompressor = get_decompressor (comp_type);
477 if (! zm->decompressor) goto error;
478 if (! get_u16 (zm->fp, &time)) goto error;
479 if (! get_u16 (zm->fp, &date)) goto error;
480 if (! get_u32 (zm->fp, &crc)) goto error;
481 if (! get_u32 (zm->fp, &comp_size)) goto error;
483 if (! get_u32 (zm->fp, &ucomp_size)) goto error;
484 if (! get_u16 (zm->fp, &nlen)) goto error;
485 if (! get_u16 (zm->fp, &extra_len)) goto error;
487 char *name = xzalloc (nlen + 1);
488 if (! get_bytes (zm->fp, name, nlen))
493 if (strcmp (name, ze->name) != 0)
495 ds_put_format (zm->errmsgs,
496 _("%s: name mismatch betwen central directory (%s) "
497 "and local file header (%s)"),
498 zm->file_name, ze->name, name);
504 skip_bytes (zm->fp, extra_len);
506 if (!zm->decompressor->init (zm) )
513 free (zm->file_name);
514 free (zm->member_name);
521 static bool probe_magic (FILE *fp, uint32_t magic, off_t start, off_t stop, off_t *off);
524 /* Search for something that looks like the End Of Central Directory in FP.
525 If found, the offset of the record will be placed in OFF.
526 Returns true if found false otherwise.
529 find_eocd (FILE *fp, off_t *off)
532 const uint32_t magic = MAGIC_EOCD;
535 /* The magic cannot be more than 22 bytes from the end of the file,
536 because that is the minimum length of the EndOfCentralDirectory
539 if ( 0 > fseeko (fp, -22, SEEK_END))
544 stop = start + sizeof (magic);
547 found = probe_magic (fp, magic, start, stop, off);
548 /* FIXME: For extra confidence lookup the directory start record here*/
551 stop = start + sizeof (magic);
561 Search FP for MAGIC starting at START and reaching until STOP.
562 Returns true iff MAGIC is found. False otherwise.
563 OFF receives the location of the magic.
566 probe_magic (FILE *fp, uint32_t magic, off_t start, off_t stop, off_t *off)
570 unsigned char seq[4];
573 if ( 0 > fseeko (fp, start, SEEK_SET))
578 for (i = 0; i < 4 ; ++i)
580 seq[i] = (magic >> i * 8) & 0xFF;
585 if (1 != fread (&byte, 1, 1, fp))
588 if ( byte == seq[state])
595 *off = ftello (fp) - 4;
607 /* Null decompressor. */
610 stored_read (struct zip_member *zm, void *buf, size_t n)
612 return fread (buf, 1, n, zm->fp);
616 stored_init (struct zip_member *zm UNUSED)
622 stored_finish (struct zip_member *zm UNUSED)
624 /* Nothing required */
627 static const struct decompressor stored_decompressor =
628 {stored_init, stored_read, stored_finish};
630 /* Inflate decompressor. */
635 #define UCOMPSIZE 4096
641 unsigned char ucomp[UCOMPSIZE];
643 size_t ucomp_bytes_read;
645 /* Two bitfields as defined by RFC1950 */
650 inflate_finish (struct zip_member *zm)
652 struct inflator *inf = zm->aux;
654 inflateEnd (&inf->zss);
660 inflate_init (struct zip_member *zm)
663 struct inflator *inf = xzalloc (sizeof *inf);
666 uint16_t cmf = 0x8; /* Always 8 for inflate */
668 const uint16_t cinfo = 7; /* log_2(Window size) - 8 */
670 cmf |= cinfo << 4; /* Put cinfo into the high nibble */
672 /* make these into a 16 bit word */
673 inf->cmf_flg = (cmf << 8 ) | flg;
675 /* Set the check bits */
676 inf->cmf_flg += 31 - (inf->cmf_flg % 31);
677 assert (inf->cmf_flg % 31 == 0);
679 inf->zss.next_in = Z_NULL;
680 inf->zss.avail_in = 0;
681 inf->zss.zalloc = Z_NULL;
682 inf->zss.zfree = Z_NULL;
683 inf->zss.opaque = Z_NULL;
684 r = inflateInit (&inf->zss);
688 ds_put_format (zm->errmsgs,
689 _("%s: cannot initialize inflator (%s)"),
690 zm->file_name, zError (r));
700 inflate_read (struct zip_member *zm, void *buf, size_t n)
703 struct inflator *inf = zm->aux;
705 if (inf->zss.avail_in == 0)
711 if ( inf->state == 0)
713 inf->ucomp[1] = inf->cmf_flg ;
714 inf->ucomp[0] = inf->cmf_flg >> 8 ;
720 bytes_to_read = zm->comp_size - inf->ucomp_bytes_read;
722 if (bytes_to_read == 0)
725 if (bytes_to_read > UCOMPSIZE)
726 bytes_to_read = UCOMPSIZE;
728 bytes_read = fread (inf->ucomp + pad, 1, bytes_to_read - pad, zm->fp);
730 inf->ucomp_bytes_read += bytes_read;
732 inf->zss.avail_in = bytes_read + pad;
733 inf->zss.next_in = inf->ucomp;
735 inf->zss.avail_out = n;
736 inf->zss.next_out = buf;
738 r = inflate (&inf->zss, Z_NO_FLUSH);
741 return n - inf->zss.avail_out;
744 ds_put_format (zm->errmsgs, _("%s: error inflating \"%s\" (%s)"),
745 zm->file_name, zm->member_name, zError (r));
750 static const struct decompressor inflate_decompressor =
751 {inflate_init, inflate_read, inflate_finish};