1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2011, 2013, 2014 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
27 #include <libpspp/assertion.h>
28 #include <libpspp/compiler.h>
37 #include "zip-reader.h"
38 #include "zip-private.h"
41 #define _(msgid) gettext (msgid)
42 #define N_(msgid) (msgid)
45 static bool find_eocd (FILE *fp, off_t *off);
48 stored_read (struct zip_member *zm, void *buf, size_t n)
50 return fread (buf, 1, n, zm->fp);
54 stored_init (struct zip_member *zm UNUSED)
60 stored_finish (struct zip_member *zm UNUSED)
62 /* Nothing required */
66 static struct decompressor decompressors[n_COMPRESSION] =
68 {stored_init, stored_read, stored_finish},
69 {inflate_init, inflate_read, inflate_finish}
72 static enum compression
73 comp_code (struct zip_member *zm, uint16_t c)
75 enum compression which;
79 which = COMPRESSION_STORED;
82 which = COMPRESSION_INFLATE;
85 ds_put_format (zm->errs, _("Unsupported compression type (%d)"), c);
86 which = n_COMPRESSION;
95 char *filename; /* The name of the file from which the data is read */
96 FILE *fr; /* The stream from which the meta data is read */
97 uint16_t n_members; /* The number of members in this archive */
98 struct zip_member **members; /* The members (may be null pointers until the headers have been read */
104 zip_member_finish (struct zip_member *zm)
107 /* Probably not useful, because we would have to read right to the end of the member
108 if (zm->expected_crc != zm->crc)
110 ds_put_cstr (zm->errs, _("CRC error reading zip"));
113 zip_member_unref (zm);
118 /* Destroy the zip reader */
120 zip_reader_destroy (struct zip_reader *zr)
129 for (i = 0; i < zr->n_members; ++i)
131 zip_member_unref (zr->members[i]);
139 zm_dump (const struct zip_member *zm)
141 printf ("%d\t%08x\t %s\n", zm->ucomp_size, zm->expected_crc, zm->name);
145 /* Skip N bytes in F */
147 skip_bytes (FILE *f, size_t n)
149 fseeko (f, n, SEEK_CUR);
152 static bool get_bytes (FILE *f, void *x, size_t n) WARN_UNUSED_RESULT;
155 /* Read N bytes from F, storing the result in X */
157 get_bytes (FILE *f, void *x, size_t n)
159 return (n == fread (x, 1, n, f));
162 static bool get_u32 (FILE *f, uint32_t *v) WARN_UNUSED_RESULT;
165 /* Read a 32 bit value from F */
167 get_u32 (FILE *f, uint32_t *v)
170 if (!get_bytes (f, &x, sizeof x))
172 #ifdef WORDS_BIGENDIAN
180 static bool get_u16 (FILE *f, uint16_t *v) WARN_UNUSED_RESULT;
183 /* Read a 16 bit value from F */
185 get_u16 (FILE *f, uint16_t *v)
188 if (!get_bytes (f, &x, sizeof x))
190 #ifdef WORDS_BIGENDIAN
199 /* Read 32 bit integer and compare it with EXPECTED.
200 place an error string in ERR if necessary. */
202 check_magic (FILE *f, uint32_t expected, struct string *err)
206 if (! get_u32 (f, &magic)) return false;
208 if ((expected != magic))
211 _("Corrupt file at 0x%llx: Expected %"PRIx32"; got %"PRIx32),
212 (long long int) ftello (f) - sizeof (uint32_t), expected, magic);
220 /* Reads upto BYTES bytes from ZM and puts them in BUF.
221 Returns the number of bytes read, or -1 on error */
223 zip_member_read (struct zip_member *zm, void *buf, size_t bytes)
229 if ( bytes > zm->bytes_unread)
230 bytes = zm->bytes_unread;
232 bytes_read = decompressors[zm->compression].read (zm, buf, bytes);
236 zm->crc = crc32_update (zm->crc, buf, bytes_read);
238 zm->bytes_unread -= bytes_read;
245 Read a local file header from ZR and add it to ZR's internal array.
246 Returns a pointer to the member read. This pointer belongs to ZR.
247 If the caller wishes to control it, she should ref it with
250 static struct zip_member *
251 zip_header_read_next (struct zip_reader *zr)
253 struct zip_member *zm = xzalloc (sizeof *zm);
255 uint16_t v, nlen, extralen;
256 uint16_t gp, time, date;
258 uint16_t clen, diskstart, iattr;
264 if ( ! check_magic (zr->fr, MAGIC_SOCD, zr->errs))
267 if (! get_u16 (zr->fr, &v)) return NULL;
269 if (! get_u16 (zr->fr, &v)) return NULL;
270 if (! get_u16 (zr->fr, &gp)) return NULL;
271 if (! get_u16 (zr->fr, &comp_type)) return NULL;
273 zm->compression = comp_code (zm, comp_type);
275 if (! get_u16 (zr->fr, &time)) return NULL;
276 if (! get_u16 (zr->fr, &date)) return NULL;
277 if (! get_u32 (zr->fr, &zm->expected_crc)) return NULL;
278 if (! get_u32 (zr->fr, &zm->comp_size)) return NULL;
279 if (! get_u32 (zr->fr, &zm->ucomp_size)) return NULL;
280 if (! get_u16 (zr->fr, &nlen)) return NULL;
281 if (! get_u16 (zr->fr, &extralen)) return NULL;
282 if (! get_u16 (zr->fr, &clen)) return NULL;
283 if (! get_u16 (zr->fr, &diskstart)) return NULL;
284 if (! get_u16 (zr->fr, &iattr)) return NULL;
285 if (! get_u32 (zr->fr, &eattr)) return NULL;
286 if (! get_u32 (zr->fr, &zm->offset)) return NULL;
288 zm->name = xzalloc (nlen + 1);
289 if (! get_bytes (zr->fr, zm->name, nlen)) return NULL;
291 skip_bytes (zr->fr, extralen);
293 zr->members[zr->nm++] = zm;
295 zm->fp = fopen (zr->filename, "rb");
303 /* Create a reader from the zip called FILENAME */
305 zip_reader_create (const char *filename, struct string *errs)
307 uint16_t disknum, total_members;
309 uint32_t central_dir_start, central_dir_length;
311 struct zip_reader *zr = xzalloc (sizeof *zr);
314 ds_init_empty (zr->errs);
318 zr->fr = fopen (filename, "rb");
321 ds_put_cstr (zr->errs, strerror (errno));
326 if ( ! check_magic (zr->fr, MAGIC_LHDR, zr->errs))
333 if ( ! find_eocd (zr->fr, &offset))
335 ds_put_format (zr->errs, _("Cannot find central directory"));
341 if ( 0 != fseeko (zr->fr, offset, SEEK_SET))
343 const char *mm = strerror (errno);
344 ds_put_format (zr->errs, _("Failed to seek to end of central directory record: %s"), mm);
351 if ( ! check_magic (zr->fr, MAGIC_EOCD, zr->errs))
358 if (! get_u16 (zr->fr, &disknum)) return NULL;
359 if (! get_u16 (zr->fr, &disknum)) return NULL;
361 if (! get_u16 (zr->fr, &zr->n_members)) return NULL;
362 if (! get_u16 (zr->fr, &total_members)) return NULL;
364 if (! get_u32 (zr->fr, ¢ral_dir_length)) return NULL;
365 if (! get_u32 (zr->fr, ¢ral_dir_start)) return NULL;
367 if ( 0 != fseeko (zr->fr, central_dir_start, SEEK_SET))
369 const char *mm = strerror (errno);
370 ds_put_format (zr->errs, _("Failed to seek to central directory: %s"), mm);
376 zr->members = xcalloc (zr->n_members, sizeof (*zr->members));
377 memset (zr->members, 0, zr->n_members * sizeof (*zr->members));
379 zr->filename = strdup (filename);
386 /* Return the member called MEMBER from the reader ZR */
388 zip_member_open (struct zip_reader *zr, const char *member)
390 uint16_t v, nlen, extra_len;
391 uint16_t gp, comp_type, time, date;
392 uint32_t ucomp_size, comp_size;
395 bool new_member = false;
399 struct zip_member *zm = NULL;
404 for (i = 0; i < zr->n_members; ++i)
410 zm = zr->members[i] = zip_header_read_next (zr);
413 if (zm && 0 == strcmp (zm->name, member))
422 if ( 0 != fseeko (zm->fp, zm->offset, SEEK_SET))
424 const char *mm = strerror (errno);
425 ds_put_format (zm->errs, _("Failed to seek to start of member `%s': %s"), zm->name, mm);
429 if ( ! check_magic (zm->fp, MAGIC_LHDR, zr->errs))
434 if (! get_u16 (zm->fp, &v)) return NULL;
435 if (! get_u16 (zm->fp, &gp)) return NULL;
436 if (! get_u16 (zm->fp, &comp_type)) return NULL;
437 zm->compression = comp_code (zm, comp_type);
438 if (! get_u16 (zm->fp, &time)) return NULL;
439 if (! get_u16 (zm->fp, &date)) return NULL;
440 if (! get_u32 (zm->fp, &crc)) return NULL;
441 if (! get_u32 (zm->fp, &comp_size)) return NULL;
443 if (! get_u32 (zm->fp, &ucomp_size)) return NULL;
444 if (! get_u16 (zm->fp, &nlen)) return NULL;
445 if (! get_u16 (zm->fp, &extra_len)) return NULL;
447 name = xzalloc (nlen + 1);
449 if (! get_bytes (zm->fp, name, nlen)) return NULL;
451 skip_bytes (zm->fp, extra_len);
453 if (strcmp (name, zm->name) != 0)
455 ds_put_format (zm->errs,
456 _("Name mismatch in zip archive. Central directory says `%s'; local file header says `%s'"),
465 zm->bytes_unread = zm->ucomp_size;
468 decompressors[zm->compression].finish (zm);
470 if (!decompressors[zm->compression].init (zm) )
477 zip_member_ref (struct zip_member *zm)
486 zip_member_unref (struct zip_member *zm)
491 if (--zm->ref_cnt == 0)
493 decompressors[zm->compression].finish (zm);
504 static bool probe_magic (FILE *fp, uint32_t magic, off_t start, off_t stop, off_t *off);
507 /* Search for something that looks like the End Of Central Directory in FP.
508 If found, the offset of the record will be placed in OFF.
509 Returns true if found false otherwise.
512 find_eocd (FILE *fp, off_t *off)
515 const uint32_t magic = MAGIC_EOCD;
518 /* The magic cannot be more than 22 bytes from the end of the file,
519 because that is the minimum length of the EndOfCentralDirectory
522 if ( 0 > fseeko (fp, -22, SEEK_END))
527 stop = start + sizeof (magic);
530 found = probe_magic (fp, magic, start, stop, off);
531 /* FIXME: For extra confidence lookup the directory start record here*/
534 stop = start + sizeof (magic);
544 Search FP for MAGIC starting at START and reaching until STOP.
545 Returns true iff MAGIC is found. False otherwise.
546 OFF receives the location of the magic.
549 probe_magic (FILE *fp, uint32_t magic, off_t start, off_t stop, off_t *off)
553 unsigned char seq[4];
556 if ( 0 > fseeko (fp, start, SEEK_SET))
561 for (i = 0; i < 4 ; ++i)
563 seq[i] = (magic >> i * 8) & 0xFF;
568 if (1 != fread (&byte, 1, 1, fp))
571 if ( byte == seq[state])
578 *off = ftello (fp) - 4;