1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2011, 2013, 2014 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
27 #include <libpspp/assertion.h>
28 #include <libpspp/compiler.h>
32 #include "integer-format.h"
33 #include "zip-reader.h"
34 #include "zip-private.h"
37 #define _(msgid) gettext (msgid)
38 #define N_(msgid) (msgid)
42 char *file_name; /* File name. */
43 char *member_name; /* Member name. */
44 FILE *fp; /* The stream from which the data is read */
45 uint32_t offset; /* Starting offset in file. */
46 uint32_t comp_size; /* Length of member file data, in bytes. */
47 uint32_t ucomp_size; /* Uncompressed length of member file data, in bytes. */
48 const struct decompressor *decompressor;
50 size_t bytes_unread; /* Number of bytes left in the member available for reading */
51 char *error; /* Error message, if any. */
57 char *(*init) (struct zip_member *);
58 int (*read) (struct zip_member *, void *, size_t);
59 void (*finish) (struct zip_member *);
61 static const struct decompressor stored_decompressor;
62 static const struct decompressor inflate_decompressor;
64 static bool find_eocd (FILE *fp, off_t *off);
66 static const struct decompressor *
67 get_decompressor (uint16_t c)
72 return &stored_decompressor;
75 return &inflate_decompressor;
84 char *file_name; /* The name of the file from which the data is read */
85 uint16_t n_entries; /* Number of directory entries. */
86 struct zip_entry *entries; /* Directory entries. */
91 uint32_t offset; /* Starting offset in file. */
92 uint32_t comp_size; /* Length of member file data, in bytes. */
93 uint32_t ucomp_size; /* Uncompressed length of member file data, in bytes. */
94 char *name; /* Name of member file. */
97 char * WARN_UNUSED_RESULT
98 zip_member_steal_error (struct zip_member *zm)
100 char *retval = zm->error;
106 zip_member_finish (struct zip_member *zm)
110 free (zm->file_name);
111 free (zm->member_name);
112 zm->decompressor->finish (zm);
119 /* Destroy the zip reader */
121 zip_reader_destroy (struct zip_reader *zr)
127 free (zr->file_name);
129 for (i = 0; i < zr->n_entries; ++i)
131 struct zip_entry *ze = &zr->entries[i];
139 /* Skip N bytes in F */
141 skip_bytes (FILE *f, size_t n)
143 fseeko (f, n, SEEK_CUR);
147 get_bytes (FILE *f, void *x, size_t n)
149 if (!fread (x, n, 1, f))
157 get_bytes (f, &x, sizeof x);
158 return le_to_native32 (x);
165 get_bytes (f, &x, sizeof x);
166 return le_to_native16 (x);
169 static char * WARN_UNUSED_RESULT
170 get_stream_error (FILE *f, const char *file_name)
173 return xasprintf (_("%s: unexpected end of file"), file_name);
176 /* The particular error might not be in errno anymore. Try to find out
177 what the error was. */
180 return (!fread (&x, 1, sizeof x, f) && errno
181 ? xasprintf (_("%s: I/O error reading Zip archive (%s)"),
182 file_name, strerror (errno))
183 : xasprintf (_("%s: I/O error reading Zip archive"), file_name));
189 /* Read 32 bit integer and compare it with EXPECTED.
190 place an error string in ERR if necessary. */
191 static char * WARN_UNUSED_RESULT
192 check_magic (FILE *f, const char *file_name, uint32_t expected)
194 uint32_t magic = get_u32 (f);
195 char *error = get_stream_error (f, file_name);
198 else if (expected != magic)
199 return xasprintf (_("%s: corrupt archive at 0x%llx: "
200 "expected %#"PRIx32" but got %#"PRIx32),
202 (long long int) ftello (f) - sizeof (uint32_t),
209 /* Reads upto BYTES bytes from ZM and puts them in BUF.
210 Returns the number of bytes read, or -1 on error */
212 zip_member_read (struct zip_member *zm, void *buf, size_t bytes)
214 if (bytes > zm->bytes_unread)
215 bytes = zm->bytes_unread;
219 int bytes_read = zm->decompressor->read (zm, buf, bytes);
223 zm->bytes_unread -= bytes_read;
228 /* Read all of ZM into memory, storing the data in *DATAP and its size in *NP.
229 Returns NULL if successful, otherwise an error string that the caller
230 must eventually free(). */
231 char * WARN_UNUSED_RESULT
232 zip_member_read_all (struct zip_reader *zr, const char *member_name,
233 void **datap, size_t *np)
235 struct zip_member *zm;
236 char *error = zip_member_open (zr, member_name, &zm);
244 *datap = xmalloc (zm->ucomp_size);
245 *np = zm->ucomp_size;
247 uint8_t *data = *datap;
248 while (zm->bytes_unread)
249 if (zip_member_read (zm, data + (zm->ucomp_size - zm->bytes_unread),
250 zm->bytes_unread) == -1)
252 char *error = zip_member_steal_error (zm);
253 zip_member_finish (zm);
260 zip_member_finish (zm);
264 /* Read a central directory header from FILE and initializes ZE with it.
265 Returns true if successful, false otherwise. On error, appends error
267 static char * WARN_UNUSED_RESULT
268 zip_header_read_next (FILE *file, const char *file_name,
269 struct zip_entry *ze)
271 char *error = check_magic (file, file_name, MAGIC_SOCD);
275 get_u16 (file); /* v */
276 get_u16 (file); /* v */
277 get_u16 (file); /* gp */
278 get_u16 (file); /* comp_type */
279 get_u16 (file); /* time */
280 get_u16 (file); /* date */
281 get_u32 (file); /* expected_crc */
282 ze->comp_size = get_u32 (file);
283 ze->ucomp_size = get_u32 (file);
284 uint16_t nlen = get_u16 (file);
285 uint16_t extralen = get_u16 (file);
286 get_u16 (file); /* clen */
287 get_u16 (file); /* diskstart */
288 get_u16 (file); /* iattr */
289 get_u32 (file); /* eattr */
290 ze->offset = get_u32 (file);
292 error = get_stream_error (file, file_name);
296 ze->name = xzalloc (nlen + 1);
297 get_bytes (file, ze->name, nlen);
298 error = get_stream_error (file, file_name);
302 skip_bytes (file, extralen);
308 /* Create a reader from the zip called FILE_NAME */
309 char * WARN_UNUSED_RESULT
310 zip_reader_create (const char *file_name, struct zip_reader **zrp)
314 FILE *file = fopen (file_name, "rb");
316 return xasprintf (_("%s: open failed (%s)"), file_name, strerror (errno));
318 /* Check the Zip file magic. */
319 char *error = check_magic (file, file_name, MAGIC_LHDR);
326 /* Find end of central directory record and read it. */
328 if (! find_eocd (file, &offset))
331 return xasprintf (_("%s: cannot find central directory"), file_name);
333 if (0 != fseeko (file, offset, SEEK_SET))
335 error = xasprintf (_("%s: seek failed (%s)"),
336 file_name, strerror (errno));
340 error = check_magic (file, file_name, MAGIC_EOCD);
346 get_u16 (file); /* disknum */
347 get_u16 (file); /* disknum */
348 uint16_t n_members = get_u16 (file);
349 get_u16 (file); /* total_members */
350 get_u32 (file); /* central_dir_length */
351 uint32_t central_dir_start = get_u32 (file);
352 error = get_stream_error (file, file_name);
359 /* Read central directory. */
360 if (0 != fseeko (file, central_dir_start, SEEK_SET))
362 error = xasprintf (_("%s: seek failed (%s)"),
363 file_name, strerror (errno));
368 struct zip_reader *zr = xzalloc (sizeof *zr);
369 zr->file_name = xstrdup (file_name);
370 zr->entries = xcalloc (n_members, sizeof *zr->entries);
371 for (int i = 0; i < n_members; i++)
373 error = zip_header_read_next (file, file_name,
374 &zr->entries[zr->n_entries]);
378 zip_reader_destroy (zr);
390 static struct zip_entry *
391 zip_entry_find (const struct zip_reader *zr, const char *member)
393 for (int i = 0; i < zr->n_entries; ++i)
395 struct zip_entry *ze = &zr->entries[i];
396 if (0 == strcmp (ze->name, member))
403 zip_reader_get_member_name(const struct zip_reader *zr, size_t idx)
405 return idx < zr->n_entries ? zr->entries[idx].name : NULL;
408 /* Returns true if ZR contains a member named MEMBER, false otherwise. */
410 zip_reader_contains_member (const struct zip_reader *zr, const char *member)
412 return zip_entry_find (zr, member) != NULL;
415 /* Return the member called MEMBER from the reader ZR */
416 char * WARN_UNUSED_RESULT
417 zip_member_open (struct zip_reader *zr, const char *member,
418 struct zip_member **zmp)
422 struct zip_entry *ze = zip_entry_find (zr, member);
424 return xasprintf (_("%s: unknown member \"%s\""),
425 zr->file_name, member);
427 FILE *fp = fopen (zr->file_name, "rb");
429 return xasprintf ( _("%s: open failed (%s)"),
430 zr->file_name, strerror (errno));
432 struct zip_member *zm = xmalloc (sizeof *zm);
433 zm->file_name = xstrdup (zr->file_name);
434 zm->member_name = xstrdup (member);
436 zm->offset = ze->offset;
437 zm->comp_size = ze->comp_size;
438 zm->ucomp_size = ze->ucomp_size;
439 zm->decompressor = NULL;
440 zm->bytes_unread = ze->ucomp_size;
445 if (0 != fseeko (zm->fp, zm->offset, SEEK_SET))
447 error = xasprintf (_("%s: seek failed (%s)"),
448 ze->name, strerror (errno));
452 error = check_magic (zm->fp, zr->file_name, MAGIC_LHDR);
456 get_u16 (zm->fp); /* v */
457 get_u16 (zm->fp); /* gp */
458 uint16_t comp_type = get_u16 (zm->fp);
459 zm->decompressor = get_decompressor (comp_type);
460 if (!zm->decompressor)
462 error = xasprintf (_("%s: member \"%s\" has unknown compression "
464 zr->file_name, zm->member_name, comp_type);
467 get_u16 (zm->fp); /* time */
468 get_u16 (zm->fp); /* date */
469 get_u32 (zm->fp); /* crc */
470 get_u32 (zm->fp); /* comp_size */
471 get_u32 (zm->fp); /* ucomp_size */
472 uint16_t nlen = get_u16 (zm->fp);
473 uint16_t extra_len = get_u16 (zm->fp);
474 error = get_stream_error (zm->fp, zr->file_name);
478 char *name = xzalloc (nlen + 1);
479 get_bytes (zm->fp, name, nlen);
480 error = get_stream_error (zm->fp, zr->file_name);
483 if (strcmp (name, ze->name) != 0)
485 error = xasprintf (_("%s: name mismatch between central directory (%s) "
486 "and local file header (%s)"),
487 zm->file_name, ze->name, name);
493 skip_bytes (zm->fp, extra_len);
495 error = zm->decompressor->init (zm);
504 free (zm->file_name);
505 free (zm->member_name);
512 static bool probe_magic (FILE *fp, uint32_t magic, off_t start, off_t stop, off_t *off);
515 /* Search for something that looks like the End Of Central Directory in FP.
516 If found, the offset of the record will be placed in OFF.
517 Returns true if found false otherwise.
520 find_eocd (FILE *fp, off_t *off)
523 const uint32_t magic = MAGIC_EOCD;
526 /* The magic cannot be more than 22 bytes from the end of the file,
527 because that is the minimum length of the EndOfCentralDirectory
530 if (0 > fseeko (fp, -22, SEEK_END))
535 stop = start + sizeof (magic);
538 found = probe_magic (fp, magic, start, stop, off);
539 /* FIXME: For extra confidence lookup the directory start record here*/
542 stop = start + sizeof (magic);
552 Search FP for MAGIC starting at START and reaching until STOP.
553 Returns true iff MAGIC is found. False otherwise.
554 OFF receives the location of the magic.
557 probe_magic (FILE *fp, uint32_t magic, off_t start, off_t stop, off_t *off)
561 unsigned char seq[4];
564 if (0 > fseeko (fp, start, SEEK_SET))
569 for (i = 0; i < 4 ; ++i)
571 seq[i] = (magic >> i * 8) & 0xFF;
576 if (1 != fread (&byte, 1, 1, fp))
579 if (byte == seq[state])
586 *off = ftello (fp) - 4;
598 /* Null decompressor. */
601 stored_read (struct zip_member *zm, void *buf, size_t n)
603 size_t bytes_read = fread (buf, 1, n, zm->fp);
604 if (!bytes_read && !zm->error)
605 zm->error = get_stream_error (zm->fp, zm->file_name);
610 stored_init (struct zip_member *zm UNUSED)
616 stored_finish (struct zip_member *zm UNUSED)
618 /* Nothing required */
621 static const struct decompressor stored_decompressor =
622 {stored_init, stored_read, stored_finish};
624 /* Inflate decompressor. */
629 #define UCOMPSIZE 4096
635 unsigned char ucomp[UCOMPSIZE];
637 size_t ucomp_bytes_read;
639 /* Two bitfields as defined by RFC1950 */
644 inflate_finish (struct zip_member *zm)
646 struct inflator *inf = zm->aux;
648 inflateEnd (&inf->zss);
654 inflate_init (struct zip_member *zm)
657 struct inflator *inf = xzalloc (sizeof *inf);
660 uint16_t cmf = 0x8; /* Always 8 for inflate */
662 const uint16_t cinfo = 7; /* log_2(Window size) - 8 */
664 cmf |= cinfo << 4; /* Put cinfo into the high nibble */
666 /* make these into a 16 bit word */
667 inf->cmf_flg = (cmf << 8) | flg;
669 /* Set the check bits */
670 inf->cmf_flg += 31 - (inf->cmf_flg % 31);
671 assert (inf->cmf_flg % 31 == 0);
673 inf->zss.next_in = Z_NULL;
674 inf->zss.avail_in = 0;
675 inf->zss.zalloc = Z_NULL;
676 inf->zss.zfree = Z_NULL;
677 inf->zss.opaque = Z_NULL;
678 r = inflateInit (&inf->zss);
681 return xasprintf (_("%s: cannot initialize inflator (%s)"),
682 zm->file_name, zError (r));
690 inflate_read (struct zip_member *zm, void *buf, size_t n)
693 struct inflator *inf = zm->aux;
695 if (inf->zss.avail_in == 0)
703 inf->ucomp[1] = inf->cmf_flg ;
704 inf->ucomp[0] = inf->cmf_flg >> 8 ;
710 bytes_to_read = zm->comp_size - inf->ucomp_bytes_read;
712 if (bytes_to_read == 0)
715 if (bytes_to_read > UCOMPSIZE)
716 bytes_to_read = UCOMPSIZE;
718 bytes_read = fread (inf->ucomp + pad, 1, bytes_to_read - pad, zm->fp);
719 if (!bytes_read && !zm->error)
721 zm->error = get_stream_error (zm->fp, zm->file_name);
725 inf->ucomp_bytes_read += bytes_read;
727 inf->zss.avail_in = bytes_read + pad;
728 inf->zss.next_in = inf->ucomp;
730 inf->zss.avail_out = n;
731 inf->zss.next_out = buf;
733 r = inflate (&inf->zss, Z_NO_FLUSH);
736 return n - inf->zss.avail_out;
740 zm->error = xasprintf (_("%s: error inflating \"%s\" (%s)"),
741 zm->file_name, zm->member_name, zError (r));
746 static const struct decompressor inflate_decompressor =
747 {inflate_init, inflate_read, inflate_finish};