1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2011, 2013 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
27 #include <libpspp/assertion.h>
36 #include "zip-reader.h"
37 #include "zip-private.h"
40 #define _(msgid) gettext (msgid)
41 #define N_(msgid) (msgid)
44 static bool find_eocd (FILE *fp, off_t *off);
47 stored_read (struct zip_member *zm, void *buf, size_t n)
49 return fread (buf, 1, n, zm->fp);
53 stored_init (struct zip_member *zm UNUSED)
59 stored_finish (struct zip_member *zm UNUSED)
61 /* Nothing required */
65 static struct decompressor decompressors[n_COMPRESSION] =
67 {stored_init, stored_read, stored_finish},
68 {inflate_init, inflate_read, inflate_finish}
71 static enum compression
72 comp_code (struct zip_member *zm, uint16_t c)
74 enum compression which;
78 which = COMPRESSION_STORED;
81 which = COMPRESSION_INFLATE;
84 ds_put_format (zm->errs, _("Unsupported compression type (%d)"), c);
85 which = n_COMPRESSION;
94 char *filename; /* The name of the file from which the data is read */
95 FILE *fr; /* The stream from which the meta data is read */
96 uint16_t n_members; /* The number of members in this archive */
97 struct zip_member **members; /* The members (may be null pointers until the headers have been read */
103 zip_member_finish (struct zip_member *zm)
106 /* Probably not useful, because we would have to read right to the end of the member
107 if (zm->expected_crc != zm->crc)
109 ds_put_cstr (zm->errs, _("CRC error reading zip"));
112 zip_member_unref (zm);
117 /* Destroy the zip reader */
119 zip_reader_destroy (struct zip_reader *zr)
128 for (i = 0; i < zr->n_members; ++i)
130 zip_member_unref (zr->members[i]);
138 zm_dump (const struct zip_member *zm)
140 printf ("%d\t%08x\t %s\n", zm->ucomp_size, zm->expected_crc, zm->name);
144 /* Skip N bytes in F */
146 skip_bytes (FILE *f, size_t n)
148 fseeko (f, n, SEEK_CUR);
151 /* Read N bytes from F, storing the result in X */
153 get_bytes (FILE *f, void *x, size_t n)
158 /* Read a 32 bit value from F */
160 get_u32 (FILE *f, uint32_t *v)
163 get_bytes (f, &x, sizeof x);
164 #ifdef WORDS_BIGENDIAN
171 /* Read a 16 bit value from F */
173 get_u16 (FILE *f, uint16_t *v)
176 get_bytes (f, &x, sizeof x);
177 #ifdef WORDS_BIGENDIAN
185 /* Read 32 bit integer and compare it with EXPECTED.
186 place an error string in ERR if necessary. */
188 check_magic (FILE *f, uint32_t expected, struct string *err)
194 if ((expected != magic))
197 _("Corrupt file at 0x%llx: Expected %"PRIx32"; got %"PRIx32),
198 (long long int) ftello (f) - sizeof (uint32_t), expected, magic);
206 /* Reads upto BYTES bytes from ZM and puts them in BUF.
207 Returns the number of bytes read, or -1 on error */
209 zip_member_read (struct zip_member *zm, void *buf, size_t bytes)
215 if ( bytes > zm->bytes_unread)
216 bytes = zm->bytes_unread;
218 bytes_read = decompressors[zm->compression].read (zm, buf, bytes);
222 zm->crc = crc32_update (zm->crc, buf, bytes_read);
224 zm->bytes_unread -= bytes_read;
231 Read a local file header from ZR and add it to ZR's internal array.
232 Returns a pointer to the member read. This pointer belongs to ZR.
233 If the caller wishes to control it, she should ref it with
236 static struct zip_member *
237 zip_header_read_next (struct zip_reader *zr)
239 struct zip_member *zm = xzalloc (sizeof *zm);
241 uint16_t v, nlen, extralen;
242 uint16_t gp, time, date;
244 uint16_t clen, diskstart, iattr;
250 if ( ! check_magic (zr->fr, MAGIC_SOCD, zr->errs))
253 get_u16 (zr->fr, &v);
255 get_u16 (zr->fr, &v);
256 get_u16 (zr->fr, &gp);
257 get_u16 (zr->fr, &comp_type);
259 zm->compression = comp_code (zm, comp_type);
261 get_u16 (zr->fr, &time);
262 get_u16 (zr->fr, &date);
263 get_u32 (zr->fr, &zm->expected_crc);
264 get_u32 (zr->fr, &zm->comp_size);
265 get_u32 (zr->fr, &zm->ucomp_size);
266 get_u16 (zr->fr, &nlen);
267 get_u16 (zr->fr, &extralen);
268 get_u16 (zr->fr, &clen);
269 get_u16 (zr->fr, &diskstart);
270 get_u16 (zr->fr, &iattr);
271 get_u32 (zr->fr, &eattr);
272 get_u32 (zr->fr, &zm->offset);
274 zm->name = xzalloc (nlen + 1);
275 get_bytes (zr->fr, zm->name, nlen);
277 skip_bytes (zr->fr, extralen);
279 zr->members[zr->nm++] = zm;
281 zm->fp = fopen (zr->filename, "r");
289 /* Create a reader from the zip called FILENAME */
291 zip_reader_create (const char *filename, struct string *errs)
293 uint16_t disknum, total_members;
295 uint32_t central_dir_start, central_dir_length;
297 struct zip_reader *zr = xzalloc (sizeof *zr);
300 ds_init_empty (zr->errs);
304 zr->fr = fopen (filename, "r");
307 ds_put_cstr (zr->errs, strerror (errno));
312 if ( ! check_magic (zr->fr, MAGIC_LHDR, zr->errs))
319 if ( ! find_eocd (zr->fr, &offset))
321 ds_put_format (zr->errs, _("Cannot find central directory"));
327 if ( 0 != fseeko (zr->fr, offset, SEEK_SET))
329 const char *mm = strerror (errno);
330 ds_put_format (zr->errs, _("Failed to seek to end of central directory record: %s"), mm);
337 if ( ! check_magic (zr->fr, MAGIC_EOCD, zr->errs))
344 get_u16 (zr->fr, &disknum);
345 get_u16 (zr->fr, &disknum);
347 get_u16 (zr->fr, &zr->n_members);
348 get_u16 (zr->fr, &total_members);
350 get_u32 (zr->fr, ¢ral_dir_length);
351 get_u32 (zr->fr, ¢ral_dir_start);
353 if ( 0 != fseeko (zr->fr, central_dir_start, SEEK_SET))
355 const char *mm = strerror (errno);
356 ds_put_format (zr->errs, _("Failed to seek to central directory: %s"), mm);
362 zr->members = xcalloc (zr->n_members, sizeof (*zr->members));
363 memset (zr->members, 0, zr->n_members * sizeof (*zr->members));
365 zr->filename = strdup (filename);
372 /* Return the member called MEMBER from the reader ZR */
374 zip_member_open (struct zip_reader *zr, const char *member)
376 uint16_t v, nlen, extra_len;
377 uint16_t gp, comp_type, time, date;
378 uint32_t ucomp_size, comp_size;
381 bool new_member = false;
385 struct zip_member *zm = NULL;
390 for (i = 0; i < zr->n_members; ++i)
396 zm = zr->members[i] = zip_header_read_next (zr);
399 if (zm && 0 == strcmp (zm->name, member))
408 if ( 0 != fseeko (zm->fp, zm->offset, SEEK_SET))
410 const char *mm = strerror (errno);
411 ds_put_format (zm->errs, _("Failed to seek to start of member `%s': %s"), zm->name, mm);
415 if ( ! check_magic (zm->fp, MAGIC_LHDR, zr->errs))
420 get_u16 (zm->fp, &v);
421 get_u16 (zm->fp, &gp);
422 get_u16 (zm->fp, &comp_type);
423 zm->compression = comp_code (zm, comp_type);
424 get_u16 (zm->fp, &time);
425 get_u16 (zm->fp, &date);
426 get_u32 (zm->fp, &crc);
427 get_u32 (zm->fp, &comp_size);
429 get_u32 (zm->fp, &ucomp_size);
430 get_u16 (zm->fp, &nlen);
431 get_u16 (zm->fp, &extra_len);
433 name = xzalloc (nlen + 1);
435 get_bytes (zm->fp, name, nlen);
437 skip_bytes (zm->fp, extra_len);
439 if (strcmp (name, zm->name) != 0)
441 ds_put_format (zm->errs,
442 _("Name mismatch in zip archive. Central directory says `%s'; local file header says `%s'"),
451 zm->bytes_unread = zm->ucomp_size;
454 decompressors[zm->compression].finish (zm);
456 if (!decompressors[zm->compression].init (zm) )
463 zip_member_ref (struct zip_member *zm)
472 zip_member_unref (struct zip_member *zm)
477 if (--zm->ref_cnt == 0)
479 decompressors[zm->compression].finish (zm);
490 static bool probe_magic (FILE *fp, uint32_t magic, off_t start, off_t stop, off_t *off);
493 /* Search for something that looks like the End Of Central Directory in FP.
494 If found, the offset of the record will be placed in OFF.
495 Returns true if found false otherwise.
498 find_eocd (FILE *fp, off_t *off)
501 const uint32_t magic = MAGIC_EOCD;
504 /* The magic cannot be more than 22 bytes from the end of the file,
505 because that is the minimum length of the EndOfCentralDirectory
508 if ( 0 > fseeko (fp, -22, SEEK_END))
513 stop = start + sizeof (magic);
516 found = probe_magic (fp, magic, start, stop, off);
517 /* FIXME: For extra confidence lookup the directory start record here*/
520 stop = start + sizeof (magic);
530 Search FP for MAGIC starting at START and reaching until STOP.
531 Returns true iff MAGIC is found. False otherwise.
532 OFF receives the location of the magic.
535 probe_magic (FILE *fp, uint32_t magic, off_t start, off_t stop, off_t *off)
539 unsigned char seq[4];
542 if ( 0 > fseeko (fp, start, SEEK_SET))
547 for (i = 0; i < 4 ; ++i)
549 seq[i] = (magic >> i * 8) & 0xFF;
554 fread (&byte, 1, 1, fp);
556 if ( byte == seq[state])
563 *off = ftello (fp) - 4;