1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2011, 2013, 2014, 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "libpspp/zip-reader.h"
20 #include "libpspp/zip-private.h"
29 #include "libpspp/assertion.h"
30 #include "libpspp/cast.h"
31 #include "libpspp/compiler.h"
32 #include "libpspp/integer-format.h"
33 #include "libpspp/str.h"
36 #include "gl/xalloc.h"
39 #define _(msgid) gettext (msgid)
40 #define N_(msgid) (msgid)
44 char *file_name; /* File name. */
45 char *member_name; /* Member name. */
46 FILE *fp; /* The stream from which the data is read */
47 uint32_t offset; /* Starting offset in file. */
48 uint32_t comp_size; /* Length of member file data, in bytes. */
49 uint32_t ucomp_size; /* Uncompressed length of member file data, in bytes. */
51 uint32_t expected_crc;
52 uint32_t accumulated_crc;
54 const struct decompressor *decompressor;
56 size_t bytes_unread; /* Number of bytes left in the member available for reading */
57 char *error; /* Error message, if any. */
63 char *(*init) (struct zip_member *);
64 int (*read) (struct zip_member *, void *, size_t);
65 void (*finish) (struct zip_member *);
67 static const struct decompressor stored_decompressor;
68 static const struct decompressor inflate_decompressor;
70 static bool find_eocd (FILE *fp, off_t *off);
72 static const struct decompressor *
73 get_decompressor (uint16_t c)
78 return &stored_decompressor;
81 return &inflate_decompressor;
91 char *file_name; /* The name of the file from which the data is read */
92 uint16_t n_entries; /* Number of directory entries. */
93 struct zip_entry *entries; /* Directory entries. */
98 uint32_t offset; /* Starting offset in file. */
99 uint32_t comp_size; /* Length of member file data, in bytes. */
100 uint32_t ucomp_size; /* Uncompressed length of member file data, in bytes. */
101 uint32_t expected_crc; /* CRC32 of uncompressed data. */
102 char *name; /* Name of member file. */
105 char * WARN_UNUSED_RESULT
106 zip_member_steal_error (struct zip_member *zm)
108 char *retval = zm->error;
114 zip_member_finish (struct zip_member *zm)
118 free (zm->file_name);
119 free (zm->member_name);
120 zm->decompressor->finish (zm);
128 zip_reader_ref (const struct zip_reader *zr_)
130 struct zip_reader *zr = CONST_CAST (struct zip_reader *, zr_);
131 assert (zr->ref_cnt > 0);
136 /* Destroy the zip reader */
138 zip_reader_unref (struct zip_reader *zr)
142 assert (zr->ref_cnt > 0);
146 free (zr->file_name);
148 for (int i = 0; i < zr->n_entries; ++i)
150 struct zip_entry *ze = &zr->entries[i];
158 /* Skip N bytes in F */
160 skip_bytes (FILE *f, size_t n)
162 fseeko (f, n, SEEK_CUR);
166 get_bytes (FILE *f, void *x, size_t n)
168 if (!fread (x, n, 1, f))
176 get_bytes (f, &x, sizeof x);
177 return le_to_native32 (x);
184 get_bytes (f, &x, sizeof x);
185 return le_to_native16 (x);
188 static char * WARN_UNUSED_RESULT
189 get_stream_error (FILE *f, const char *file_name)
192 return xasprintf (_("%s: unexpected end of file"), file_name);
195 /* The particular error might not be in errno anymore. Try to find out
196 what the error was. */
199 return (!fread (&x, 1, sizeof x, f) && errno
200 ? xasprintf (_("%s: I/O error reading Zip archive (%s)"),
201 file_name, strerror (errno))
202 : xasprintf (_("%s: I/O error reading Zip archive"), file_name));
208 /* Read 32 bit integer and compare it with EXPECTED.
209 place an error string in ERR if necessary. */
210 static char * WARN_UNUSED_RESULT
211 check_magic (FILE *f, const char *file_name, uint32_t expected)
213 uint32_t magic = get_u32 (f);
214 char *error = get_stream_error (f, file_name);
217 else if (expected != magic)
218 return xasprintf (_("%s: corrupt archive at 0x%llx: "
219 "expected %#"PRIx32" but got %#"PRIx32),
221 (long long int) ftello (f) - sizeof (uint32_t),
228 /* Reads upto BYTES bytes from ZM and puts them in BUF.
229 Returns the number of bytes read, or -1 on error */
231 zip_member_read (struct zip_member *zm, void *buf, size_t bytes)
233 if (bytes > zm->bytes_unread)
234 bytes = zm->bytes_unread;
238 int bytes_read = zm->decompressor->read (zm, buf, bytes);
242 zm->bytes_unread -= bytes_read;
243 zm->accumulated_crc = crc32_update (zm->accumulated_crc, buf, bytes_read);
244 if (!zm->bytes_unread && zm->accumulated_crc != zm->expected_crc)
246 zm->error = xasprintf (_("%s: corrupt archive reading member \"%s\": "
247 "bad CRC %#"PRIx32" (expected %"PRIx32")"),
248 zm->file_name, zm->member_name,
249 zm->accumulated_crc, zm->expected_crc);
256 /* Read all of ZM into memory, storing the data in *DATAP and its size in *NP.
257 Returns NULL if successful, otherwise an error string that the caller
258 must eventually free(). */
259 char * WARN_UNUSED_RESULT
260 zip_member_read_all (struct zip_reader *zr, const char *member_name,
261 void **datap, size_t *np)
263 struct zip_member *zm;
264 char *error = zip_member_open (zr, member_name, &zm);
272 *datap = xmalloc (zm->ucomp_size);
273 *np = zm->ucomp_size;
275 uint8_t *data = *datap;
276 while (zm->bytes_unread)
277 if (zip_member_read (zm, data + (zm->ucomp_size - zm->bytes_unread),
278 zm->bytes_unread) == -1)
280 char *error = zip_member_steal_error (zm);
281 zip_member_finish (zm);
288 zip_member_finish (zm);
292 /* Read a central directory header from FILE and initializes ZE with it.
293 Returns true if successful, false otherwise. On error, appends error
295 static char * WARN_UNUSED_RESULT
296 zip_header_read_next (FILE *file, const char *file_name,
297 struct zip_entry *ze)
299 char *error = check_magic (file, file_name, MAGIC_SOCD);
303 get_u16 (file); /* v */
304 get_u16 (file); /* v */
305 get_u16 (file); /* gp */
306 get_u16 (file); /* comp_type */
307 get_u16 (file); /* time */
308 get_u16 (file); /* date */
309 ze->expected_crc = get_u32 (file);
310 ze->comp_size = get_u32 (file);
311 ze->ucomp_size = get_u32 (file);
312 uint16_t nlen = get_u16 (file);
313 uint16_t extralen = get_u16 (file);
314 get_u16 (file); /* clen */
315 get_u16 (file); /* diskstart */
316 get_u16 (file); /* iattr */
317 get_u32 (file); /* eattr */
318 ze->offset = get_u32 (file);
320 error = get_stream_error (file, file_name);
324 ze->name = xzalloc (nlen + 1);
325 get_bytes (file, ze->name, nlen);
326 error = get_stream_error (file, file_name);
330 skip_bytes (file, extralen);
336 /* Create a reader from the zip called FILE_NAME */
337 char * WARN_UNUSED_RESULT
338 zip_reader_create (const char *file_name, struct zip_reader **zrp)
342 FILE *file = fopen (file_name, "rb");
344 return xasprintf (_("%s: open failed (%s)"), file_name, strerror (errno));
346 /* Check the Zip file magic. */
347 char *error = check_magic (file, file_name, MAGIC_LHDR);
354 /* Find end of central directory record and read it. */
356 if (! find_eocd (file, &offset))
359 return xasprintf (_("%s: cannot find central directory"), file_name);
361 if (0 != fseeko (file, offset, SEEK_SET))
363 error = xasprintf (_("%s: seek failed (%s)"),
364 file_name, strerror (errno));
368 error = check_magic (file, file_name, MAGIC_EOCD);
374 get_u16 (file); /* disknum */
375 get_u16 (file); /* disknum */
376 uint16_t n_members = get_u16 (file);
377 get_u16 (file); /* total_members */
378 get_u32 (file); /* central_dir_length */
379 uint32_t central_dir_start = get_u32 (file);
380 error = get_stream_error (file, file_name);
387 /* Read central directory. */
388 if (0 != fseeko (file, central_dir_start, SEEK_SET))
390 error = xasprintf (_("%s: seek failed (%s)"),
391 file_name, strerror (errno));
396 struct zip_reader *zr = XZALLOC (struct zip_reader);
398 zr->file_name = xstrdup (file_name);
399 zr->entries = xcalloc (n_members, sizeof *zr->entries);
400 for (int i = 0; i < n_members; i++)
402 error = zip_header_read_next (file, file_name,
403 &zr->entries[zr->n_entries]);
407 zip_reader_unref (zr);
419 static struct zip_entry *
420 zip_entry_find (const struct zip_reader *zr, const char *member)
422 for (int i = 0; i < zr->n_entries; ++i)
424 struct zip_entry *ze = &zr->entries[i];
425 if (0 == strcmp (ze->name, member))
432 zip_reader_get_member_name(const struct zip_reader *zr, size_t idx)
434 return idx < zr->n_entries ? zr->entries[idx].name : NULL;
437 /* Returns true if ZR contains a member named MEMBER, false otherwise. */
439 zip_reader_contains_member (const struct zip_reader *zr, const char *member)
441 return zip_entry_find (zr, member) != NULL;
444 /* Return the member called MEMBER from the reader ZR */
445 char * WARN_UNUSED_RESULT
446 zip_member_open (struct zip_reader *zr, const char *member,
447 struct zip_member **zmp)
451 struct zip_entry *ze = zip_entry_find (zr, member);
453 return xasprintf (_("%s: unknown member \"%s\""),
454 zr->file_name, member);
456 FILE *fp = fopen (zr->file_name, "rb");
458 return xasprintf ( _("%s: open failed (%s)"),
459 zr->file_name, strerror (errno));
461 struct zip_member *zm = xmalloc (sizeof *zm);
462 *zm = (struct zip_member) {
463 .file_name = xstrdup (zr->file_name),
464 .member_name = xstrdup (member),
466 .offset = ze->offset,
467 .comp_size = ze->comp_size,
468 .ucomp_size = ze->ucomp_size,
469 .bytes_unread = ze->ucomp_size,
470 .expected_crc = ze->expected_crc,
474 if (0 != fseeko (zm->fp, zm->offset, SEEK_SET))
476 error = xasprintf (_("%s: seek failed (%s)"),
477 ze->name, strerror (errno));
481 error = check_magic (zm->fp, zr->file_name, MAGIC_LHDR);
485 get_u16 (zm->fp); /* v */
486 get_u16 (zm->fp); /* gp */
487 uint16_t comp_type = get_u16 (zm->fp);
488 zm->decompressor = get_decompressor (comp_type);
489 if (!zm->decompressor)
491 error = xasprintf (_("%s: member \"%s\" has unknown compression "
493 zr->file_name, zm->member_name, comp_type);
496 get_u16 (zm->fp); /* time */
497 get_u16 (zm->fp); /* date */
498 get_u32 (zm->fp); /* crc */
499 get_u32 (zm->fp); /* comp_size */
500 get_u32 (zm->fp); /* ucomp_size */
501 uint16_t nlen = get_u16 (zm->fp);
502 uint16_t extra_len = get_u16 (zm->fp);
503 error = get_stream_error (zm->fp, zr->file_name);
507 char *name = xzalloc (nlen + 1);
508 get_bytes (zm->fp, name, nlen);
509 error = get_stream_error (zm->fp, zr->file_name);
512 if (strcmp (name, ze->name) != 0)
514 error = xasprintf (_("%s: name mismatch between central directory (%s) "
515 "and local file header (%s)"),
516 zm->file_name, ze->name, name);
522 skip_bytes (zm->fp, extra_len);
524 error = zm->decompressor->init (zm);
533 free (zm->file_name);
534 free (zm->member_name);
541 static bool probe_magic (FILE *fp, uint32_t magic, off_t start, off_t stop, off_t *off);
544 /* Search for something that looks like the End Of Central Directory in FP.
545 If found, the offset of the record will be placed in OFF.
546 Returns true if found false otherwise.
549 find_eocd (FILE *fp, off_t *off)
552 const uint32_t magic = MAGIC_EOCD;
555 /* The magic cannot be more than 22 bytes from the end of the file,
556 because that is the minimum length of the EndOfCentralDirectory
559 if (0 > fseeko (fp, -22, SEEK_END))
564 stop = start + sizeof (magic);
567 found = probe_magic (fp, magic, start, stop, off);
568 /* FIXME: For extra confidence lookup the directory start record here*/
571 stop = start + sizeof (magic);
581 Search FP for MAGIC starting at START and reaching until STOP.
582 Returns true iff MAGIC is found. False otherwise.
583 OFF receives the location of the magic.
586 probe_magic (FILE *fp, uint32_t magic, off_t start, off_t stop, off_t *off)
590 unsigned char seq[4];
593 if (0 > fseeko (fp, start, SEEK_SET))
598 for (i = 0; i < 4 ; ++i)
600 seq[i] = (magic >> i * 8) & 0xFF;
605 if (1 != fread (&byte, 1, 1, fp))
608 if (byte == seq[state])
615 *off = ftello (fp) - 4;
627 /* Null decompressor. */
630 stored_read (struct zip_member *zm, void *buf, size_t n)
632 size_t bytes_read = fread (buf, 1, n, zm->fp);
633 if (!bytes_read && !zm->error)
634 zm->error = get_stream_error (zm->fp, zm->file_name);
639 stored_init (struct zip_member *zm UNUSED)
645 stored_finish (struct zip_member *zm UNUSED)
647 /* Nothing required */
650 static const struct decompressor stored_decompressor =
651 {stored_init, stored_read, stored_finish};
653 /* Inflate decompressor. */
658 #define UCOMPSIZE 4096
664 unsigned char ucomp[UCOMPSIZE];
666 size_t ucomp_bytes_read;
668 /* Two bitfields as defined by RFC1950 */
673 inflate_finish (struct zip_member *zm)
675 struct inflator *inf = zm->aux;
677 inflateEnd (&inf->zss);
683 inflate_init (struct zip_member *zm)
686 struct inflator *inf = XZALLOC (struct inflator);
689 uint16_t cmf = 0x8; /* Always 8 for inflate */
691 const uint16_t cinfo = 7; /* log_2(Window size) - 8 */
693 cmf |= cinfo << 4; /* Put cinfo into the high nibble */
695 /* make these into a 16 bit word */
696 inf->cmf_flg = (cmf << 8) | flg;
698 /* Set the check bits */
699 inf->cmf_flg += 31 - (inf->cmf_flg % 31);
700 assert (inf->cmf_flg % 31 == 0);
702 inf->zss.next_in = Z_NULL;
703 inf->zss.avail_in = 0;
704 inf->zss.zalloc = Z_NULL;
705 inf->zss.zfree = Z_NULL;
706 inf->zss.opaque = Z_NULL;
707 r = inflateInit (&inf->zss);
710 return xasprintf (_("%s: cannot initialize inflator (%s)"),
711 zm->file_name, zError (r));
719 inflate_read (struct zip_member *zm, void *buf, size_t n)
722 struct inflator *inf = zm->aux;
724 if (inf->zss.avail_in == 0)
732 inf->ucomp[1] = inf->cmf_flg ;
733 inf->ucomp[0] = inf->cmf_flg >> 8 ;
739 bytes_to_read = zm->comp_size - inf->ucomp_bytes_read;
741 if (bytes_to_read == 0)
744 if (bytes_to_read > UCOMPSIZE)
745 bytes_to_read = UCOMPSIZE;
747 bytes_read = fread (inf->ucomp + pad, 1, bytes_to_read - pad, zm->fp);
748 if (!bytes_read && !zm->error)
750 zm->error = get_stream_error (zm->fp, zm->file_name);
754 inf->ucomp_bytes_read += bytes_read;
756 inf->zss.avail_in = bytes_read + pad;
757 inf->zss.next_in = inf->ucomp;
759 inf->zss.avail_out = n;
760 inf->zss.next_out = buf;
762 r = inflate (&inf->zss, Z_NO_FLUSH);
765 return n - inf->zss.avail_out;
769 zm->error = xasprintf (_("%s: error inflating \"%s\" (%s)"),
770 zm->file_name, zm->member_name, zError (r));
775 static const struct decompressor inflate_decompressor =
776 {inflate_init, inflate_read, inflate_finish};