1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2011, 2013, 2014, 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "libpspp/zip-reader.h"
20 #include "libpspp/zip-private.h"
29 #include "libpspp/assertion.h"
30 #include "libpspp/cast.h"
31 #include "libpspp/compiler.h"
32 #include "libpspp/integer-format.h"
33 #include "libpspp/str.h"
35 #include "gl/xalloc.h"
38 #define _(msgid) gettext (msgid)
39 #define N_(msgid) (msgid)
43 char *file_name; /* File name. */
44 char *member_name; /* Member name. */
45 FILE *fp; /* The stream from which the data is read */
46 uint32_t offset; /* Starting offset in file. */
47 uint32_t comp_size; /* Length of member file data, in bytes. */
48 uint32_t ucomp_size; /* Uncompressed length of member file data, in bytes. */
49 const struct decompressor *decompressor;
51 size_t bytes_unread; /* Number of bytes left in the member available for reading */
52 char *error; /* Error message, if any. */
58 char *(*init) (struct zip_member *);
59 int (*read) (struct zip_member *, void *, size_t);
60 void (*finish) (struct zip_member *);
62 static const struct decompressor stored_decompressor;
63 static const struct decompressor inflate_decompressor;
65 static bool find_eocd (FILE *fp, off_t *off);
67 static const struct decompressor *
68 get_decompressor (uint16_t c)
73 return &stored_decompressor;
76 return &inflate_decompressor;
86 char *file_name; /* The name of the file from which the data is read */
87 uint16_t n_entries; /* Number of directory entries. */
88 struct zip_entry *entries; /* Directory entries. */
93 uint32_t offset; /* Starting offset in file. */
94 uint32_t comp_size; /* Length of member file data, in bytes. */
95 uint32_t ucomp_size; /* Uncompressed length of member file data, in bytes. */
96 char *name; /* Name of member file. */
99 char * WARN_UNUSED_RESULT
100 zip_member_steal_error (struct zip_member *zm)
102 char *retval = zm->error;
108 zip_member_finish (struct zip_member *zm)
112 free (zm->file_name);
113 free (zm->member_name);
114 zm->decompressor->finish (zm);
122 zip_reader_ref (const struct zip_reader *zr_)
124 struct zip_reader *zr = CONST_CAST (struct zip_reader *, zr_);
125 assert (zr->ref_cnt > 0);
130 /* Destroy the zip reader */
132 zip_reader_unref (struct zip_reader *zr)
136 assert (zr->ref_cnt > 0);
140 free (zr->file_name);
142 for (int i = 0; i < zr->n_entries; ++i)
144 struct zip_entry *ze = &zr->entries[i];
152 /* Skip N bytes in F */
154 skip_bytes (FILE *f, size_t n)
156 fseeko (f, n, SEEK_CUR);
160 get_bytes (FILE *f, void *x, size_t n)
162 if (!fread (x, n, 1, f))
170 get_bytes (f, &x, sizeof x);
171 return le_to_native32 (x);
178 get_bytes (f, &x, sizeof x);
179 return le_to_native16 (x);
182 static char * WARN_UNUSED_RESULT
183 get_stream_error (FILE *f, const char *file_name)
186 return xasprintf (_("%s: unexpected end of file"), file_name);
189 /* The particular error might not be in errno anymore. Try to find out
190 what the error was. */
193 return (!fread (&x, 1, sizeof x, f) && errno
194 ? xasprintf (_("%s: I/O error reading Zip archive (%s)"),
195 file_name, strerror (errno))
196 : xasprintf (_("%s: I/O error reading Zip archive"), file_name));
202 /* Read 32 bit integer and compare it with EXPECTED.
203 place an error string in ERR if necessary. */
204 static char * WARN_UNUSED_RESULT
205 check_magic (FILE *f, const char *file_name, uint32_t expected)
207 uint32_t magic = get_u32 (f);
208 char *error = get_stream_error (f, file_name);
211 else if (expected != magic)
212 return xasprintf (_("%s: corrupt archive at 0x%llx: "
213 "expected %#"PRIx32" but got %#"PRIx32),
215 (long long int) ftello (f) - sizeof (uint32_t),
222 /* Reads upto BYTES bytes from ZM and puts them in BUF.
223 Returns the number of bytes read, or -1 on error */
225 zip_member_read (struct zip_member *zm, void *buf, size_t bytes)
227 if (bytes > zm->bytes_unread)
228 bytes = zm->bytes_unread;
232 int bytes_read = zm->decompressor->read (zm, buf, bytes);
236 zm->bytes_unread -= bytes_read;
241 /* Read all of ZM into memory, storing the data in *DATAP and its size in *NP.
242 Returns NULL if successful, otherwise an error string that the caller
243 must eventually free(). */
244 char * WARN_UNUSED_RESULT
245 zip_member_read_all (struct zip_reader *zr, const char *member_name,
246 void **datap, size_t *np)
248 struct zip_member *zm;
249 char *error = zip_member_open (zr, member_name, &zm);
257 *datap = xmalloc (zm->ucomp_size);
258 *np = zm->ucomp_size;
260 uint8_t *data = *datap;
261 while (zm->bytes_unread)
262 if (zip_member_read (zm, data + (zm->ucomp_size - zm->bytes_unread),
263 zm->bytes_unread) == -1)
265 char *error = zip_member_steal_error (zm);
266 zip_member_finish (zm);
273 zip_member_finish (zm);
277 /* Read a central directory header from FILE and initializes ZE with it.
278 Returns true if successful, false otherwise. On error, appends error
280 static char * WARN_UNUSED_RESULT
281 zip_header_read_next (FILE *file, const char *file_name,
282 struct zip_entry *ze)
284 char *error = check_magic (file, file_name, MAGIC_SOCD);
288 get_u16 (file); /* v */
289 get_u16 (file); /* v */
290 get_u16 (file); /* gp */
291 get_u16 (file); /* comp_type */
292 get_u16 (file); /* time */
293 get_u16 (file); /* date */
294 get_u32 (file); /* expected_crc */
295 ze->comp_size = get_u32 (file);
296 ze->ucomp_size = get_u32 (file);
297 uint16_t nlen = get_u16 (file);
298 uint16_t extralen = get_u16 (file);
299 get_u16 (file); /* clen */
300 get_u16 (file); /* diskstart */
301 get_u16 (file); /* iattr */
302 get_u32 (file); /* eattr */
303 ze->offset = get_u32 (file);
305 error = get_stream_error (file, file_name);
309 ze->name = xzalloc (nlen + 1);
310 get_bytes (file, ze->name, nlen);
311 error = get_stream_error (file, file_name);
315 skip_bytes (file, extralen);
321 /* Create a reader from the zip called FILE_NAME */
322 char * WARN_UNUSED_RESULT
323 zip_reader_create (const char *file_name, struct zip_reader **zrp)
327 FILE *file = fopen (file_name, "rb");
329 return xasprintf (_("%s: open failed (%s)"), file_name, strerror (errno));
331 /* Check the Zip file magic. */
332 char *error = check_magic (file, file_name, MAGIC_LHDR);
339 /* Find end of central directory record and read it. */
341 if (! find_eocd (file, &offset))
344 return xasprintf (_("%s: cannot find central directory"), file_name);
346 if (0 != fseeko (file, offset, SEEK_SET))
348 error = xasprintf (_("%s: seek failed (%s)"),
349 file_name, strerror (errno));
353 error = check_magic (file, file_name, MAGIC_EOCD);
359 get_u16 (file); /* disknum */
360 get_u16 (file); /* disknum */
361 uint16_t n_members = get_u16 (file);
362 get_u16 (file); /* total_members */
363 get_u32 (file); /* central_dir_length */
364 uint32_t central_dir_start = get_u32 (file);
365 error = get_stream_error (file, file_name);
372 /* Read central directory. */
373 if (0 != fseeko (file, central_dir_start, SEEK_SET))
375 error = xasprintf (_("%s: seek failed (%s)"),
376 file_name, strerror (errno));
381 struct zip_reader *zr = xzalloc (sizeof *zr);
383 zr->file_name = xstrdup (file_name);
384 zr->entries = xcalloc (n_members, sizeof *zr->entries);
385 for (int i = 0; i < n_members; i++)
387 error = zip_header_read_next (file, file_name,
388 &zr->entries[zr->n_entries]);
392 zip_reader_unref (zr);
404 static struct zip_entry *
405 zip_entry_find (const struct zip_reader *zr, const char *member)
407 for (int i = 0; i < zr->n_entries; ++i)
409 struct zip_entry *ze = &zr->entries[i];
410 if (0 == strcmp (ze->name, member))
417 zip_reader_get_member_name(const struct zip_reader *zr, size_t idx)
419 return idx < zr->n_entries ? zr->entries[idx].name : NULL;
422 /* Returns true if ZR contains a member named MEMBER, false otherwise. */
424 zip_reader_contains_member (const struct zip_reader *zr, const char *member)
426 return zip_entry_find (zr, member) != NULL;
429 /* Return the member called MEMBER from the reader ZR */
430 char * WARN_UNUSED_RESULT
431 zip_member_open (struct zip_reader *zr, const char *member,
432 struct zip_member **zmp)
436 struct zip_entry *ze = zip_entry_find (zr, member);
438 return xasprintf (_("%s: unknown member \"%s\""),
439 zr->file_name, member);
441 FILE *fp = fopen (zr->file_name, "rb");
443 return xasprintf ( _("%s: open failed (%s)"),
444 zr->file_name, strerror (errno));
446 struct zip_member *zm = xmalloc (sizeof *zm);
447 zm->file_name = xstrdup (zr->file_name);
448 zm->member_name = xstrdup (member);
450 zm->offset = ze->offset;
451 zm->comp_size = ze->comp_size;
452 zm->ucomp_size = ze->ucomp_size;
453 zm->decompressor = NULL;
454 zm->bytes_unread = ze->ucomp_size;
459 if (0 != fseeko (zm->fp, zm->offset, SEEK_SET))
461 error = xasprintf (_("%s: seek failed (%s)"),
462 ze->name, strerror (errno));
466 error = check_magic (zm->fp, zr->file_name, MAGIC_LHDR);
470 get_u16 (zm->fp); /* v */
471 get_u16 (zm->fp); /* gp */
472 uint16_t comp_type = get_u16 (zm->fp);
473 zm->decompressor = get_decompressor (comp_type);
474 if (!zm->decompressor)
476 error = xasprintf (_("%s: member \"%s\" has unknown compression "
478 zr->file_name, zm->member_name, comp_type);
481 get_u16 (zm->fp); /* time */
482 get_u16 (zm->fp); /* date */
483 get_u32 (zm->fp); /* crc */
484 get_u32 (zm->fp); /* comp_size */
485 get_u32 (zm->fp); /* ucomp_size */
486 uint16_t nlen = get_u16 (zm->fp);
487 uint16_t extra_len = get_u16 (zm->fp);
488 error = get_stream_error (zm->fp, zr->file_name);
492 char *name = xzalloc (nlen + 1);
493 get_bytes (zm->fp, name, nlen);
494 error = get_stream_error (zm->fp, zr->file_name);
497 if (strcmp (name, ze->name) != 0)
499 error = xasprintf (_("%s: name mismatch between central directory (%s) "
500 "and local file header (%s)"),
501 zm->file_name, ze->name, name);
507 skip_bytes (zm->fp, extra_len);
509 error = zm->decompressor->init (zm);
518 free (zm->file_name);
519 free (zm->member_name);
526 static bool probe_magic (FILE *fp, uint32_t magic, off_t start, off_t stop, off_t *off);
529 /* Search for something that looks like the End Of Central Directory in FP.
530 If found, the offset of the record will be placed in OFF.
531 Returns true if found false otherwise.
534 find_eocd (FILE *fp, off_t *off)
537 const uint32_t magic = MAGIC_EOCD;
540 /* The magic cannot be more than 22 bytes from the end of the file,
541 because that is the minimum length of the EndOfCentralDirectory
544 if (0 > fseeko (fp, -22, SEEK_END))
549 stop = start + sizeof (magic);
552 found = probe_magic (fp, magic, start, stop, off);
553 /* FIXME: For extra confidence lookup the directory start record here*/
556 stop = start + sizeof (magic);
566 Search FP for MAGIC starting at START and reaching until STOP.
567 Returns true iff MAGIC is found. False otherwise.
568 OFF receives the location of the magic.
571 probe_magic (FILE *fp, uint32_t magic, off_t start, off_t stop, off_t *off)
575 unsigned char seq[4];
578 if (0 > fseeko (fp, start, SEEK_SET))
583 for (i = 0; i < 4 ; ++i)
585 seq[i] = (magic >> i * 8) & 0xFF;
590 if (1 != fread (&byte, 1, 1, fp))
593 if (byte == seq[state])
600 *off = ftello (fp) - 4;
612 /* Null decompressor. */
615 stored_read (struct zip_member *zm, void *buf, size_t n)
617 size_t bytes_read = fread (buf, 1, n, zm->fp);
618 if (!bytes_read && !zm->error)
619 zm->error = get_stream_error (zm->fp, zm->file_name);
624 stored_init (struct zip_member *zm UNUSED)
630 stored_finish (struct zip_member *zm UNUSED)
632 /* Nothing required */
635 static const struct decompressor stored_decompressor =
636 {stored_init, stored_read, stored_finish};
638 /* Inflate decompressor. */
643 #define UCOMPSIZE 4096
649 unsigned char ucomp[UCOMPSIZE];
651 size_t ucomp_bytes_read;
653 /* Two bitfields as defined by RFC1950 */
658 inflate_finish (struct zip_member *zm)
660 struct inflator *inf = zm->aux;
662 inflateEnd (&inf->zss);
668 inflate_init (struct zip_member *zm)
671 struct inflator *inf = xzalloc (sizeof *inf);
674 uint16_t cmf = 0x8; /* Always 8 for inflate */
676 const uint16_t cinfo = 7; /* log_2(Window size) - 8 */
678 cmf |= cinfo << 4; /* Put cinfo into the high nibble */
680 /* make these into a 16 bit word */
681 inf->cmf_flg = (cmf << 8) | flg;
683 /* Set the check bits */
684 inf->cmf_flg += 31 - (inf->cmf_flg % 31);
685 assert (inf->cmf_flg % 31 == 0);
687 inf->zss.next_in = Z_NULL;
688 inf->zss.avail_in = 0;
689 inf->zss.zalloc = Z_NULL;
690 inf->zss.zfree = Z_NULL;
691 inf->zss.opaque = Z_NULL;
692 r = inflateInit (&inf->zss);
695 return xasprintf (_("%s: cannot initialize inflator (%s)"),
696 zm->file_name, zError (r));
704 inflate_read (struct zip_member *zm, void *buf, size_t n)
707 struct inflator *inf = zm->aux;
709 if (inf->zss.avail_in == 0)
717 inf->ucomp[1] = inf->cmf_flg ;
718 inf->ucomp[0] = inf->cmf_flg >> 8 ;
724 bytes_to_read = zm->comp_size - inf->ucomp_bytes_read;
726 if (bytes_to_read == 0)
729 if (bytes_to_read > UCOMPSIZE)
730 bytes_to_read = UCOMPSIZE;
732 bytes_read = fread (inf->ucomp + pad, 1, bytes_to_read - pad, zm->fp);
733 if (!bytes_read && !zm->error)
735 zm->error = get_stream_error (zm->fp, zm->file_name);
739 inf->ucomp_bytes_read += bytes_read;
741 inf->zss.avail_in = bytes_read + pad;
742 inf->zss.next_in = inf->ucomp;
744 inf->zss.avail_out = n;
745 inf->zss.next_out = buf;
747 r = inflate (&inf->zss, Z_NO_FLUSH);
750 return n - inf->zss.avail_out;
754 zm->error = xasprintf (_("%s: error inflating \"%s\" (%s)"),
755 zm->file_name, zm->member_name, zError (r));
760 static const struct decompressor inflate_decompressor =
761 {inflate_init, inflate_read, inflate_finish};