1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
27 #include <libpspp/assertion.h>
35 #include "zip-reader.h"
36 #include "zip-private.h"
39 #define _(msgid) gettext (msgid)
40 #define N_(msgid) (msgid)
43 static bool find_eocd (FILE *fp, off_t *off);
46 stored_read (struct zip_member *zm, void *buf, size_t n)
48 return fread (buf, 1, n, zm->fp);
52 stored_init (struct zip_member *zm UNUSED)
58 stored_finish (struct zip_member *zm UNUSED)
60 /* Nothing required */
64 static struct decompressor decompressors[n_COMPRESSION] =
66 {stored_init, stored_read, stored_finish},
68 {inflate_init, inflate_read, inflate_finish}
72 static enum compression
73 comp_code (struct zip_member *zm, uint16_t c)
75 enum compression which;
79 which = COMPRESSION_STORED;
83 which = COMPRESSION_INFLATE;
87 ds_put_format (zm->errs, _("Unsupported compression type (%d)"), c);
88 which = n_COMPRESSION;
97 char *filename; /* The name of the file from which the data is read */
98 FILE *fr; /* The stream from which the meta data is read */
99 uint16_t n_members; /* The number of members in this archive */
100 struct zip_member **members; /* The members (may be null pointers until the headers have been read */
106 zip_member_finish (struct zip_member *zm)
109 /* Probably not useful, because we would have to read right to the end of the member
110 if (zm->expected_crc != zm->crc)
112 ds_put_cstr (zm->errs, _("CRC error reading zip"));
115 zip_member_unref (zm);
120 /* Destroy the zip reader */
122 zip_reader_destroy (struct zip_reader *zr)
131 for (i = 0; i < zr->n_members; ++i)
133 zip_member_unref (zr->members[i]);
141 zm_dump (const struct zip_member *zm)
143 printf ("%d\t%08x\t %s\n", zm->ucomp_size, zm->expected_crc, zm->name);
147 /* Skip N bytes in F */
149 skip_bytes (FILE *f, size_t n)
151 fseeko (f, n, SEEK_CUR);
154 /* Read N bytes from F, storing the result in X */
156 get_bytes (FILE *f, void *x, size_t n)
161 /* Read a 32 bit value from F */
163 get_u32 (FILE *f, uint32_t *x)
165 get_bytes (f, x, sizeof *x);
169 /* Read 32 bit integer and compare it with EXPECTED.
170 place an error string in ERR if necessary. */
172 check_magic (FILE *f, uint32_t expected, struct string *err)
178 if ((expected != magic))
181 _("Corrupt file at 0x%llx: Expected %"PRIx32"; got %"PRIx32),
182 (long long int) ftello (f) - sizeof (uint32_t), expected, magic);
190 /* Read a 16 bit value from F */
192 get_u16 (FILE *f, uint16_t *x)
194 get_bytes (f, x, sizeof *x);
197 /* Reads upto BYTES bytes from ZM and puts them in BUF.
198 Returns the number of bytes read, or -1 on error */
200 zip_member_read (struct zip_member *zm, void *buf, size_t bytes)
206 if ( bytes > zm->bytes_unread)
207 bytes = zm->bytes_unread;
209 bytes_read = decompressors[zm->compression].read (zm, buf, bytes);
213 zm->crc = crc32_update (zm->crc, buf, bytes_read);
215 zm->bytes_unread -= bytes_read;
222 Read a local file header from ZR and add it to ZR's internal array.
223 Returns a pointer to the member read. This pointer belongs to ZR.
224 If the caller wishes to control it, she should ref it with
227 static struct zip_member *
228 zip_header_read_next (struct zip_reader *zr)
230 struct zip_member *zm = xzalloc (sizeof *zm);
232 uint16_t v, nlen, extralen;
233 uint16_t gp, time, date;
235 uint16_t clen, diskstart, iattr;
241 if ( ! check_magic (zr->fr, MAGIC_SOCD, zr->errs))
244 get_u16 (zr->fr, &v);
246 get_u16 (zr->fr, &v);
247 get_u16 (zr->fr, &gp);
248 get_u16 (zr->fr, &comp_type);
250 zm->compression = comp_code (zm, comp_type);
252 get_u16 (zr->fr, &time);
253 get_u16 (zr->fr, &date);
254 get_u32 (zr->fr, &zm->expected_crc);
255 get_u32 (zr->fr, &zm->comp_size);
256 get_u32 (zr->fr, &zm->ucomp_size);
257 get_u16 (zr->fr, &nlen);
258 get_u16 (zr->fr, &extralen);
259 get_u16 (zr->fr, &clen);
260 get_u16 (zr->fr, &diskstart);
261 get_u16 (zr->fr, &iattr);
262 get_u32 (zr->fr, &eattr);
263 get_u32 (zr->fr, &zm->offset);
265 zm->name = calloc (nlen + 1, 1);
266 get_bytes (zr->fr, zm->name, nlen);
268 skip_bytes (zr->fr, extralen);
270 zr->members[zr->nm++] = zm;
272 zm->fp = fopen (zr->filename, "r");
280 /* Create a reader from the zip called FILENAME */
282 zip_reader_create (const char *filename, struct string *errs)
284 uint16_t disknum, total_members;
286 uint32_t central_dir_start, central_dir_length;
288 struct zip_reader *zr = malloc (sizeof *zr);
291 ds_init_empty (zr->errs);
295 zr->fr = fopen (filename, "r");
298 ds_put_cstr (zr->errs, strerror (errno));
303 if ( ! check_magic (zr->fr, MAGIC_LHDR, zr->errs))
310 if ( ! find_eocd (zr->fr, &offset))
312 ds_put_format (zr->errs, _("Cannot find central directory"));
318 if ( 0 != fseeko (zr->fr, offset, SEEK_SET))
320 const char *mm = strerror (errno);
321 ds_put_format (zr->errs, _("Failed to seek to end of central directory record: %s"), mm);
328 if ( ! check_magic (zr->fr, MAGIC_EOCD, zr->errs))
335 get_u16 (zr->fr, &disknum);
336 get_u16 (zr->fr, &disknum);
338 get_u16 (zr->fr, &zr->n_members);
339 get_u16 (zr->fr, &total_members);
341 get_u32 (zr->fr, ¢ral_dir_length);
342 get_u32 (zr->fr, ¢ral_dir_start);
344 if ( 0 != fseeko (zr->fr, central_dir_start, SEEK_SET))
346 const char *mm = strerror (errno);
347 ds_put_format (zr->errs, _("Failed to seek to central directory: %s"), mm);
353 zr->members = calloc (zr->n_members, sizeof (*zr->members));
355 zr->filename = strdup (filename);
362 /* Return the member called MEMBER from the reader ZR */
364 zip_member_open (struct zip_reader *zr, const char *member)
366 uint16_t v, nlen, extra_len;
367 uint16_t gp, comp_type, time, date;
368 uint32_t ucomp_size, comp_size;
375 struct zip_member *zm = NULL;
380 for (i = 0 ; i < zr->n_members; ++i)
382 zm = zr->members[i] = zip_header_read_next (zr);
383 if (zm && 0 == strcmp (zm->name, member))
396 if ( 0 != fseeko (zm->fp, zm->offset, SEEK_SET))
398 const char *mm = strerror (errno);
399 ds_put_format (zm->errs, _("Failed to seek to start of member `%s': %s"), zm->name, mm);
403 if ( ! check_magic (zm->fp, MAGIC_LHDR, zr->errs))
408 get_u16 (zm->fp, &v);
409 get_u16 (zm->fp, &gp);
410 get_u16 (zm->fp, &comp_type);
411 zm->compression = comp_code (zm, comp_type);
412 get_u16 (zm->fp, &time);
413 get_u16 (zm->fp, &date);
414 get_u32 (zm->fp, &crc);
415 get_u32 (zm->fp, &comp_size);
417 get_u32 (zm->fp, &ucomp_size);
418 get_u16 (zm->fp, &nlen);
419 get_u16 (zm->fp, &extra_len);
421 name = calloc (nlen + 1, sizeof (char));
423 get_bytes (zm->fp, name, nlen);
425 skip_bytes (zm->fp, extra_len);
427 if (strcmp (name, zm->name) != 0)
429 ds_put_format (zm->errs,
430 _("Name mismatch in zip archive. Central directory says `%s'; local file header says `%s'"),
439 zm->bytes_unread = zm->ucomp_size;
441 if ( ! decompressors[zm->compression].init (zm) )
448 zip_member_ref (struct zip_member *zm)
457 zip_member_unref (struct zip_member *zm)
462 if (--zm->ref_cnt == 0)
464 decompressors[zm->compression].finish (zm);
475 static bool probe_magic (FILE *fp, uint32_t magic, off_t start, off_t stop, off_t *off);
478 /* Search for something that looks like the End Of Central Directory in FP.
479 If found, the offset of the record will be placed in OFF.
480 Returns true if found false otherwise.
483 find_eocd (FILE *fp, off_t *off)
486 const uint32_t magic = MAGIC_EOCD;
489 /* The magic cannot be more than 22 bytes from the end of the file,
490 because that is the minimum length of the EndOfCentralDirectory
493 if ( 0 > fseeko (fp, -22, SEEK_END))
498 stop = start + sizeof (magic);
501 found = probe_magic (fp, magic, start, stop, off);
502 /* FIXME: For extra confidence lookup the directory start record here*/
505 stop = start + sizeof (magic);
515 Search FP for MAGIC starting at START and reaching until STOP.
516 Returns true iff MAGIC is found. False otherwise.
517 OFF receives the location of the magic.
520 probe_magic (FILE *fp, uint32_t magic, off_t start, off_t stop, off_t *off)
524 unsigned char seq[4];
527 if ( 0 > fseeko (fp, start, SEEK_SET))
532 for (i = 0; i < 4 ; ++i)
534 seq[i] = (magic >> i * 8) & 0xFF;
539 fread (&byte, 1, 1, fp);
541 if ( byte == seq[state])
548 *off = ftello (fp) - 4;