1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2011, 2013, 2014 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
27 #include <libpspp/assertion.h>
28 #include <libpspp/compiler.h>
37 #include "zip-reader.h"
38 #include "zip-private.h"
41 #define _(msgid) gettext (msgid)
42 #define N_(msgid) (msgid)
45 static bool find_eocd (FILE *fp, off_t *off);
48 stored_read (struct zip_member *zm, void *buf, size_t n)
50 return fread (buf, 1, n, zm->fp);
54 stored_init (struct zip_member *zm UNUSED)
60 stored_finish (struct zip_member *zm UNUSED)
62 /* Nothing required */
66 static struct decompressor decompressors[n_COMPRESSION] =
68 {stored_init, stored_read, stored_finish},
69 {inflate_init, inflate_read, inflate_finish}
72 static enum compression
73 comp_code (struct zip_member *zm, uint16_t c)
75 enum compression which;
80 which = COMPRESSION_STORED;
83 which = COMPRESSION_INFLATE;
86 ds_put_format (zm->errmsgs, _("Unsupported compression type (%d)"), c);
87 which = n_COMPRESSION;
96 char *filename; /* The name of the file from which the data is read */
97 FILE *fr; /* The stream from which the meta data is read */
98 uint16_t n_members; /* The number of members in this archive */
99 struct zip_member **members; /* The members (may be null pointers until the headers have been read */
105 zip_member_finish (struct zip_member *zm)
107 ds_clear (zm->errmsgs);
108 /* Probably not useful, because we would have to read right to the end of the member
109 if (zm->expected_crc != zm->crc)
111 ds_put_cstr (zm->errs, _("CRC error reading zip"));
114 zip_member_unref (zm);
119 /* Destroy the zip reader */
121 zip_reader_destroy (struct zip_reader *zr)
130 for (i = 0; i < zr->n_members; ++i)
132 zip_member_unref (zr->members[i]);
140 zm_dump (const struct zip_member *zm)
142 printf ("%d\t%08x\t %s\n", zm->ucomp_size, zm->expected_crc, zm->name);
146 /* Skip N bytes in F */
148 skip_bytes (FILE *f, size_t n)
150 fseeko (f, n, SEEK_CUR);
153 static bool get_bytes (FILE *f, void *x, size_t n) WARN_UNUSED_RESULT;
156 /* Read N bytes from F, storing the result in X */
158 get_bytes (FILE *f, void *x, size_t n)
160 return (n == fread (x, 1, n, f));
163 static bool get_u32 (FILE *f, uint32_t *v) WARN_UNUSED_RESULT;
166 /* Read a 32 bit value from F */
168 get_u32 (FILE *f, uint32_t *v)
171 if (!get_bytes (f, &x, sizeof x))
173 #ifdef WORDS_BIGENDIAN
181 static bool get_u16 (FILE *f, uint16_t *v) WARN_UNUSED_RESULT;
184 /* Read a 16 bit value from F */
186 get_u16 (FILE *f, uint16_t *v)
189 if (!get_bytes (f, &x, sizeof x))
191 #ifdef WORDS_BIGENDIAN
200 /* Read 32 bit integer and compare it with EXPECTED.
201 place an error string in ERR if necessary. */
203 check_magic (FILE *f, uint32_t expected, struct string *err)
207 if (! get_u32 (f, &magic)) return false;
209 if ((expected != magic))
212 _("Corrupt file at 0x%llx: Expected %"PRIx32"; got %"PRIx32),
213 (long long int) ftello (f) - sizeof (uint32_t), expected, magic);
221 /* Reads upto BYTES bytes from ZM and puts them in BUF.
222 Returns the number of bytes read, or -1 on error */
224 zip_member_read (struct zip_member *zm, void *buf, size_t bytes)
228 ds_clear (zm->errmsgs);
230 if ( bytes > zm->bytes_unread)
231 bytes = zm->bytes_unread;
233 bytes_read = decompressors[zm->compression].read (zm, buf, bytes);
237 zm->crc = crc32_update (zm->crc, buf, bytes_read);
239 zm->bytes_unread -= bytes_read;
246 Read a local file header from ZR and add it to ZR's internal array.
247 Returns a pointer to the member read. This pointer belongs to ZR.
248 If the caller wishes to control it, she should ref it with
251 static struct zip_member *
252 zip_header_read_next (struct zip_reader *zr)
254 struct zip_member *zm = xzalloc (sizeof *zm);
256 uint16_t v, nlen, extralen;
257 uint16_t gp, time, date;
259 uint16_t clen, diskstart, iattr;
264 zm->errmsgs = zr->errs;
266 if ( ! check_magic (zr->fr, MAGIC_SOCD, zr->errs))
269 if (! get_u16 (zr->fr, &v)) return NULL;
271 if (! get_u16 (zr->fr, &v)) return NULL;
272 if (! get_u16 (zr->fr, &gp)) return NULL;
273 if (! get_u16 (zr->fr, &comp_type)) return NULL;
275 zm->compression = comp_code (zm, comp_type);
277 if (! get_u16 (zr->fr, &time)) return NULL;
278 if (! get_u16 (zr->fr, &date)) return NULL;
279 if (! get_u32 (zr->fr, &zm->expected_crc)) return NULL;
280 if (! get_u32 (zr->fr, &zm->comp_size)) return NULL;
281 if (! get_u32 (zr->fr, &zm->ucomp_size)) return NULL;
282 if (! get_u16 (zr->fr, &nlen)) return NULL;
283 if (! get_u16 (zr->fr, &extralen)) return NULL;
284 if (! get_u16 (zr->fr, &clen)) return NULL;
285 if (! get_u16 (zr->fr, &diskstart)) return NULL;
286 if (! get_u16 (zr->fr, &iattr)) return NULL;
287 if (! get_u32 (zr->fr, &eattr)) return NULL;
288 if (! get_u32 (zr->fr, &zm->offset)) return NULL;
290 zm->name = xzalloc (nlen + 1);
291 if (! get_bytes (zr->fr, zm->name, nlen)) return NULL;
293 skip_bytes (zr->fr, extralen);
295 zr->members[zr->nm++] = zm;
297 zm->fp = fopen (zr->filename, "rb");
305 /* Create a reader from the zip called FILENAME */
307 zip_reader_create (const char *filename, struct string *errs)
309 uint16_t disknum, total_members;
311 uint32_t central_dir_start, central_dir_length;
313 struct zip_reader *zr = xzalloc (sizeof *zr);
316 ds_init_empty (zr->errs);
320 zr->fr = fopen (filename, "rb");
323 ds_put_cstr (zr->errs, strerror (errno));
328 if ( ! check_magic (zr->fr, MAGIC_LHDR, zr->errs))
335 if ( ! find_eocd (zr->fr, &offset))
337 ds_put_format (zr->errs, _("Cannot find central directory"));
343 if ( 0 != fseeko (zr->fr, offset, SEEK_SET))
345 const char *mm = strerror (errno);
346 ds_put_format (zr->errs, _("Failed to seek to end of central directory record: %s"), mm);
353 if ( ! check_magic (zr->fr, MAGIC_EOCD, zr->errs))
360 if (! get_u16 (zr->fr, &disknum)) return NULL;
361 if (! get_u16 (zr->fr, &disknum)) return NULL;
363 if (! get_u16 (zr->fr, &zr->n_members)) return NULL;
364 if (! get_u16 (zr->fr, &total_members)) return NULL;
366 if (! get_u32 (zr->fr, ¢ral_dir_length)) return NULL;
367 if (! get_u32 (zr->fr, ¢ral_dir_start)) return NULL;
369 if ( 0 != fseeko (zr->fr, central_dir_start, SEEK_SET))
371 const char *mm = strerror (errno);
372 ds_put_format (zr->errs, _("Failed to seek to central directory: %s"), mm);
378 zr->members = xcalloc (zr->n_members, sizeof (*zr->members));
379 memset (zr->members, 0, zr->n_members * sizeof (*zr->members));
381 zr->filename = strdup (filename);
388 /* Return the member called MEMBER from the reader ZR */
390 zip_member_open (struct zip_reader *zr, const char *member)
392 uint16_t v, nlen, extra_len;
393 uint16_t gp, comp_type, time, date;
394 uint32_t ucomp_size, comp_size;
397 bool new_member = false;
401 struct zip_member *zm = NULL;
406 for (i = 0; i < zr->n_members; ++i)
412 zm = zr->members[i] = zip_header_read_next (zr);
415 if (zm && 0 == strcmp (zm->name, member))
424 if ( 0 != fseeko (zm->fp, zm->offset, SEEK_SET))
426 const char *mm = strerror (errno);
427 ds_put_format (zm->errmsgs, _("Failed to seek to start of member `%s': %s"), zm->name, mm);
431 if ( ! check_magic (zm->fp, MAGIC_LHDR, zr->errs))
436 if (! get_u16 (zm->fp, &v)) return NULL;
437 if (! get_u16 (zm->fp, &gp)) return NULL;
438 if (! get_u16 (zm->fp, &comp_type)) return NULL;
439 zm->compression = comp_code (zm, comp_type);
440 if (! get_u16 (zm->fp, &time)) return NULL;
441 if (! get_u16 (zm->fp, &date)) return NULL;
442 if (! get_u32 (zm->fp, &crc)) return NULL;
443 if (! get_u32 (zm->fp, &comp_size)) return NULL;
445 if (! get_u32 (zm->fp, &ucomp_size)) return NULL;
446 if (! get_u16 (zm->fp, &nlen)) return NULL;
447 if (! get_u16 (zm->fp, &extra_len)) return NULL;
449 name = xzalloc (nlen + 1);
451 if (! get_bytes (zm->fp, name, nlen)) return NULL;
453 skip_bytes (zm->fp, extra_len);
455 if (strcmp (name, zm->name) != 0)
457 ds_put_format (zm->errmsgs,
458 _("Name mismatch in zip archive. Central directory says `%s'; local file header says `%s'"),
467 zm->bytes_unread = zm->ucomp_size;
470 decompressors[zm->compression].finish (zm);
472 if (!decompressors[zm->compression].init (zm) )
479 zip_member_ref (struct zip_member *zm)
488 zip_member_unref (struct zip_member *zm)
493 if (--zm->ref_cnt == 0)
495 decompressors[zm->compression].finish (zm);
506 static bool probe_magic (FILE *fp, uint32_t magic, off_t start, off_t stop, off_t *off);
509 /* Search for something that looks like the End Of Central Directory in FP.
510 If found, the offset of the record will be placed in OFF.
511 Returns true if found false otherwise.
514 find_eocd (FILE *fp, off_t *off)
517 const uint32_t magic = MAGIC_EOCD;
520 /* The magic cannot be more than 22 bytes from the end of the file,
521 because that is the minimum length of the EndOfCentralDirectory
524 if ( 0 > fseeko (fp, -22, SEEK_END))
529 stop = start + sizeof (magic);
532 found = probe_magic (fp, magic, start, stop, off);
533 /* FIXME: For extra confidence lookup the directory start record here*/
536 stop = start + sizeof (magic);
546 Search FP for MAGIC starting at START and reaching until STOP.
547 Returns true iff MAGIC is found. False otherwise.
548 OFF receives the location of the magic.
551 probe_magic (FILE *fp, uint32_t magic, off_t start, off_t stop, off_t *off)
555 unsigned char seq[4];
558 if ( 0 > fseeko (fp, start, SEEK_SET))
563 for (i = 0; i < 4 ; ++i)
565 seq[i] = (magic >> i * 8) & 0xFF;
570 if (1 != fread (&byte, 1, 1, fp))
573 if ( byte == seq[state])
580 *off = ftello (fp) - 4;