1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
27 #include <libpspp/assertion.h>
36 #include "zip-reader.h"
37 #include "zip-private.h"
40 #define _(msgid) gettext (msgid)
41 #define N_(msgid) (msgid)
44 static bool find_eocd (FILE *fp, off_t *off);
47 stored_read (struct zip_member *zm, void *buf, size_t n)
49 return fread (buf, 1, n, zm->fp);
53 stored_init (struct zip_member *zm UNUSED)
59 stored_finish (struct zip_member *zm UNUSED)
61 /* Nothing required */
65 static struct decompressor decompressors[n_COMPRESSION] =
67 {stored_init, stored_read, stored_finish},
69 {inflate_init, inflate_read, inflate_finish}
73 static enum compression
74 comp_code (struct zip_member *zm, uint16_t c)
76 enum compression which;
80 which = COMPRESSION_STORED;
84 which = COMPRESSION_INFLATE;
88 ds_put_format (zm->errs, _("Unsupported compression type (%d)"), c);
89 which = n_COMPRESSION;
98 char *filename; /* The name of the file from which the data is read */
99 FILE *fr; /* The stream from which the meta data is read */
100 uint16_t n_members; /* The number of members in this archive */
101 struct zip_member **members; /* The members (may be null pointers until the headers have been read */
107 zip_member_finish (struct zip_member *zm)
110 /* Probably not useful, because we would have to read right to the end of the member
111 if (zm->expected_crc != zm->crc)
113 ds_put_cstr (zm->errs, _("CRC error reading zip"));
116 zip_member_unref (zm);
121 /* Destroy the zip reader */
123 zip_reader_destroy (struct zip_reader *zr)
134 for (i = 0; i < zr->n_members; ++i)
136 zip_member_unref (zr->members[i]);
144 zm_dump (const struct zip_member *zm)
146 printf ("%d\t%08x\t %s\n", zm->ucomp_size, zm->expected_crc, zm->name);
150 /* Skip N bytes in F */
152 skip_bytes (FILE *f, size_t n)
154 fseeko (f, n, SEEK_CUR);
157 /* Read N bytes from F, storing the result in X */
159 get_bytes (FILE *f, void *x, size_t n)
164 /* Read a 32 bit value from F */
166 get_u32 (FILE *f, uint32_t *v)
169 get_bytes (f, &x, sizeof x);
170 #ifdef WORDS_BIGENDIAN
177 /* Read a 16 bit value from F */
179 get_u16 (FILE *f, uint16_t *v)
182 get_bytes (f, &x, sizeof x);
183 #ifdef WORDS_BIGENDIAN
191 /* Read 32 bit integer and compare it with EXPECTED.
192 place an error string in ERR if necessary. */
194 check_magic (FILE *f, uint32_t expected, struct string *err)
200 if ((expected != magic))
203 _("Corrupt file at 0x%llx: Expected %"PRIx32"; got %"PRIx32),
204 (long long int) ftello (f) - sizeof (uint32_t), expected, magic);
212 /* Reads upto BYTES bytes from ZM and puts them in BUF.
213 Returns the number of bytes read, or -1 on error */
215 zip_member_read (struct zip_member *zm, void *buf, size_t bytes)
221 if ( bytes > zm->bytes_unread)
222 bytes = zm->bytes_unread;
224 bytes_read = decompressors[zm->compression].read (zm, buf, bytes);
228 zm->crc = crc32_update (zm->crc, buf, bytes_read);
230 zm->bytes_unread -= bytes_read;
237 Read a local file header from ZR and add it to ZR's internal array.
238 Returns a pointer to the member read. This pointer belongs to ZR.
239 If the caller wishes to control it, she should ref it with
242 static struct zip_member *
243 zip_header_read_next (struct zip_reader *zr)
245 struct zip_member *zm = xzalloc (sizeof *zm);
247 uint16_t v, nlen, extralen;
248 uint16_t gp, time, date;
250 uint16_t clen, diskstart, iattr;
256 if ( ! check_magic (zr->fr, MAGIC_SOCD, zr->errs))
259 get_u16 (zr->fr, &v);
261 get_u16 (zr->fr, &v);
262 get_u16 (zr->fr, &gp);
263 get_u16 (zr->fr, &comp_type);
265 zm->compression = comp_code (zm, comp_type);
267 get_u16 (zr->fr, &time);
268 get_u16 (zr->fr, &date);
269 get_u32 (zr->fr, &zm->expected_crc);
270 get_u32 (zr->fr, &zm->comp_size);
271 get_u32 (zr->fr, &zm->ucomp_size);
272 get_u16 (zr->fr, &nlen);
273 get_u16 (zr->fr, &extralen);
274 get_u16 (zr->fr, &clen);
275 get_u16 (zr->fr, &diskstart);
276 get_u16 (zr->fr, &iattr);
277 get_u32 (zr->fr, &eattr);
278 get_u32 (zr->fr, &zm->offset);
280 zm->name = xzalloc (nlen + 1);
281 get_bytes (zr->fr, zm->name, nlen);
283 skip_bytes (zr->fr, extralen);
285 zr->members[zr->nm++] = zm;
287 zm->fp = fopen (zr->filename, "r");
295 /* Create a reader from the zip called FILENAME */
297 zip_reader_create (const char *filename, struct string *errs)
299 uint16_t disknum, total_members;
301 uint32_t central_dir_start, central_dir_length;
303 struct zip_reader *zr = xzalloc (sizeof *zr);
306 ds_init_empty (zr->errs);
310 zr->fr = fopen (filename, "r");
313 ds_put_cstr (zr->errs, strerror (errno));
318 if ( ! check_magic (zr->fr, MAGIC_LHDR, zr->errs))
325 if ( ! find_eocd (zr->fr, &offset))
327 ds_put_format (zr->errs, _("Cannot find central directory"));
333 if ( 0 != fseeko (zr->fr, offset, SEEK_SET))
335 const char *mm = strerror (errno);
336 ds_put_format (zr->errs, _("Failed to seek to end of central directory record: %s"), mm);
343 if ( ! check_magic (zr->fr, MAGIC_EOCD, zr->errs))
350 get_u16 (zr->fr, &disknum);
351 get_u16 (zr->fr, &disknum);
353 get_u16 (zr->fr, &zr->n_members);
354 get_u16 (zr->fr, &total_members);
356 get_u32 (zr->fr, ¢ral_dir_length);
357 get_u32 (zr->fr, ¢ral_dir_start);
359 if ( 0 != fseeko (zr->fr, central_dir_start, SEEK_SET))
361 const char *mm = strerror (errno);
362 ds_put_format (zr->errs, _("Failed to seek to central directory: %s"), mm);
368 zr->members = xcalloc (zr->n_members, sizeof (*zr->members));
369 memset (zr->members, 0, zr->n_members * sizeof (*zr->members));
371 zr->filename = strdup (filename);
378 /* Return the member called MEMBER from the reader ZR */
380 zip_member_open (struct zip_reader *zr, const char *member)
382 uint16_t v, nlen, extra_len;
383 uint16_t gp, comp_type, time, date;
384 uint32_t ucomp_size, comp_size;
387 bool new_member = false;
391 struct zip_member *zm = NULL;
396 for (i = 0; i < zr->n_members; ++i)
402 zm = zr->members[i] = zip_header_read_next (zr);
405 if (zm && 0 == strcmp (zm->name, member))
414 if ( 0 != fseeko (zm->fp, zm->offset, SEEK_SET))
416 const char *mm = strerror (errno);
417 ds_put_format (zm->errs, _("Failed to seek to start of member `%s': %s"), zm->name, mm);
421 if ( ! check_magic (zm->fp, MAGIC_LHDR, zr->errs))
426 get_u16 (zm->fp, &v);
427 get_u16 (zm->fp, &gp);
428 get_u16 (zm->fp, &comp_type);
429 zm->compression = comp_code (zm, comp_type);
430 get_u16 (zm->fp, &time);
431 get_u16 (zm->fp, &date);
432 get_u32 (zm->fp, &crc);
433 get_u32 (zm->fp, &comp_size);
435 get_u32 (zm->fp, &ucomp_size);
436 get_u16 (zm->fp, &nlen);
437 get_u16 (zm->fp, &extra_len);
439 name = xzalloc (nlen + 1);
441 get_bytes (zm->fp, name, nlen);
443 skip_bytes (zm->fp, extra_len);
445 if (strcmp (name, zm->name) != 0)
447 ds_put_format (zm->errs,
448 _("Name mismatch in zip archive. Central directory says `%s'; local file header says `%s'"),
457 zm->bytes_unread = zm->ucomp_size;
460 decompressors[zm->compression].finish (zm);
462 if (!decompressors[zm->compression].init (zm) )
469 zip_member_ref (struct zip_member *zm)
478 zip_member_unref (struct zip_member *zm)
483 if (--zm->ref_cnt == 0)
485 decompressors[zm->compression].finish (zm);
496 static bool probe_magic (FILE *fp, uint32_t magic, off_t start, off_t stop, off_t *off);
499 /* Search for something that looks like the End Of Central Directory in FP.
500 If found, the offset of the record will be placed in OFF.
501 Returns true if found false otherwise.
504 find_eocd (FILE *fp, off_t *off)
507 const uint32_t magic = MAGIC_EOCD;
510 /* The magic cannot be more than 22 bytes from the end of the file,
511 because that is the minimum length of the EndOfCentralDirectory
514 if ( 0 > fseeko (fp, -22, SEEK_END))
519 stop = start + sizeof (magic);
522 found = probe_magic (fp, magic, start, stop, off);
523 /* FIXME: For extra confidence lookup the directory start record here*/
526 stop = start + sizeof (magic);
536 Search FP for MAGIC starting at START and reaching until STOP.
537 Returns true iff MAGIC is found. False otherwise.
538 OFF receives the location of the magic.
541 probe_magic (FILE *fp, uint32_t magic, off_t start, off_t stop, off_t *off)
545 unsigned char seq[4];
548 if ( 0 > fseeko (fp, start, SEEK_SET))
553 for (i = 0; i < 4 ; ++i)
555 seq[i] = (magic >> i * 8) & 0xFF;
560 fread (&byte, 1, 1, fp);
562 if ( byte == seq[state])
569 *off = ftello (fp) - 4;