1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
27 #include <libpspp/assertion.h>
36 #include "zip-reader.h"
37 #include "zip-private.h"
40 #define _(msgid) gettext (msgid)
41 #define N_(msgid) (msgid)
44 static bool find_eocd (FILE *fp, off_t *off);
47 stored_read (struct zip_member *zm, void *buf, size_t n)
49 return fread (buf, 1, n, zm->fp);
53 stored_init (struct zip_member *zm UNUSED)
59 stored_finish (struct zip_member *zm UNUSED)
61 /* Nothing required */
65 static struct decompressor decompressors[n_COMPRESSION] =
67 {stored_init, stored_read, stored_finish},
69 {inflate_init, inflate_read, inflate_finish}
73 static enum compression
74 comp_code (struct zip_member *zm, uint16_t c)
76 enum compression which;
80 which = COMPRESSION_STORED;
84 which = COMPRESSION_INFLATE;
88 ds_put_format (zm->errs, _("Unsupported compression type (%d)"), c);
89 which = n_COMPRESSION;
98 char *filename; /* The name of the file from which the data is read */
99 FILE *fr; /* The stream from which the meta data is read */
100 uint16_t n_members; /* The number of members in this archive */
101 struct zip_member **members; /* The members (may be null pointers until the headers have been read */
107 zip_member_finish (struct zip_member *zm)
110 /* Probably not useful, because we would have to read right to the end of the member
111 if (zm->expected_crc != zm->crc)
113 ds_put_cstr (zm->errs, _("CRC error reading zip"));
116 zip_member_unref (zm);
121 /* Destroy the zip reader */
123 zip_reader_destroy (struct zip_reader *zr)
132 for (i = 0; i < zr->n_members; ++i)
134 zip_member_unref (zr->members[i]);
142 zm_dump (const struct zip_member *zm)
144 printf ("%d\t%08x\t %s\n", zm->ucomp_size, zm->expected_crc, zm->name);
148 /* Skip N bytes in F */
150 skip_bytes (FILE *f, size_t n)
152 fseeko (f, n, SEEK_CUR);
155 /* Read N bytes from F, storing the result in X */
157 get_bytes (FILE *f, void *x, size_t n)
162 /* Read a 32 bit value from F */
164 get_u32 (FILE *f, uint32_t *v)
167 get_bytes (f, &x, sizeof x);
168 #ifdef WORDS_BIGENDIAN
175 /* Read a 16 bit value from F */
177 get_u16 (FILE *f, uint16_t *v)
180 get_bytes (f, &x, sizeof x);
181 #ifdef WORDS_BIGENDIAN
189 /* Read 32 bit integer and compare it with EXPECTED.
190 place an error string in ERR if necessary. */
192 check_magic (FILE *f, uint32_t expected, struct string *err)
198 if ((expected != magic))
201 _("Corrupt file at 0x%llx: Expected %"PRIx32"; got %"PRIx32),
202 (long long int) ftello (f) - sizeof (uint32_t), expected, magic);
210 /* Reads upto BYTES bytes from ZM and puts them in BUF.
211 Returns the number of bytes read, or -1 on error */
213 zip_member_read (struct zip_member *zm, void *buf, size_t bytes)
219 if ( bytes > zm->bytes_unread)
220 bytes = zm->bytes_unread;
222 bytes_read = decompressors[zm->compression].read (zm, buf, bytes);
226 zm->crc = crc32_update (zm->crc, buf, bytes_read);
228 zm->bytes_unread -= bytes_read;
235 Read a local file header from ZR and add it to ZR's internal array.
236 Returns a pointer to the member read. This pointer belongs to ZR.
237 If the caller wishes to control it, she should ref it with
240 static struct zip_member *
241 zip_header_read_next (struct zip_reader *zr)
243 struct zip_member *zm = xzalloc (sizeof *zm);
245 uint16_t v, nlen, extralen;
246 uint16_t gp, time, date;
248 uint16_t clen, diskstart, iattr;
254 if ( ! check_magic (zr->fr, MAGIC_SOCD, zr->errs))
257 get_u16 (zr->fr, &v);
259 get_u16 (zr->fr, &v);
260 get_u16 (zr->fr, &gp);
261 get_u16 (zr->fr, &comp_type);
263 zm->compression = comp_code (zm, comp_type);
265 get_u16 (zr->fr, &time);
266 get_u16 (zr->fr, &date);
267 get_u32 (zr->fr, &zm->expected_crc);
268 get_u32 (zr->fr, &zm->comp_size);
269 get_u32 (zr->fr, &zm->ucomp_size);
270 get_u16 (zr->fr, &nlen);
271 get_u16 (zr->fr, &extralen);
272 get_u16 (zr->fr, &clen);
273 get_u16 (zr->fr, &diskstart);
274 get_u16 (zr->fr, &iattr);
275 get_u32 (zr->fr, &eattr);
276 get_u32 (zr->fr, &zm->offset);
278 zm->name = calloc (nlen + 1, 1);
279 get_bytes (zr->fr, zm->name, nlen);
281 skip_bytes (zr->fr, extralen);
283 zr->members[zr->nm++] = zm;
285 zm->fp = fopen (zr->filename, "r");
293 /* Create a reader from the zip called FILENAME */
295 zip_reader_create (const char *filename, struct string *errs)
297 uint16_t disknum, total_members;
299 uint32_t central_dir_start, central_dir_length;
301 struct zip_reader *zr = malloc (sizeof *zr);
304 ds_init_empty (zr->errs);
308 zr->fr = fopen (filename, "r");
311 ds_put_cstr (zr->errs, strerror (errno));
316 if ( ! check_magic (zr->fr, MAGIC_LHDR, zr->errs))
323 if ( ! find_eocd (zr->fr, &offset))
325 ds_put_format (zr->errs, _("Cannot find central directory"));
331 if ( 0 != fseeko (zr->fr, offset, SEEK_SET))
333 const char *mm = strerror (errno);
334 ds_put_format (zr->errs, _("Failed to seek to end of central directory record: %s"), mm);
341 if ( ! check_magic (zr->fr, MAGIC_EOCD, zr->errs))
348 get_u16 (zr->fr, &disknum);
349 get_u16 (zr->fr, &disknum);
351 get_u16 (zr->fr, &zr->n_members);
352 get_u16 (zr->fr, &total_members);
354 get_u32 (zr->fr, ¢ral_dir_length);
355 get_u32 (zr->fr, ¢ral_dir_start);
357 if ( 0 != fseeko (zr->fr, central_dir_start, SEEK_SET))
359 const char *mm = strerror (errno);
360 ds_put_format (zr->errs, _("Failed to seek to central directory: %s"), mm);
366 zr->members = calloc (zr->n_members, sizeof (*zr->members));
368 zr->filename = strdup (filename);
375 /* Return the member called MEMBER from the reader ZR */
377 zip_member_open (struct zip_reader *zr, const char *member)
379 uint16_t v, nlen, extra_len;
380 uint16_t gp, comp_type, time, date;
381 uint32_t ucomp_size, comp_size;
388 struct zip_member *zm = NULL;
393 for (i = 0 ; i < zr->n_members; ++i)
395 zm = zr->members[i] = zip_header_read_next (zr);
396 if (zm && 0 == strcmp (zm->name, member))
409 if ( 0 != fseeko (zm->fp, zm->offset, SEEK_SET))
411 const char *mm = strerror (errno);
412 ds_put_format (zm->errs, _("Failed to seek to start of member `%s': %s"), zm->name, mm);
416 if ( ! check_magic (zm->fp, MAGIC_LHDR, zr->errs))
421 get_u16 (zm->fp, &v);
422 get_u16 (zm->fp, &gp);
423 get_u16 (zm->fp, &comp_type);
424 zm->compression = comp_code (zm, comp_type);
425 get_u16 (zm->fp, &time);
426 get_u16 (zm->fp, &date);
427 get_u32 (zm->fp, &crc);
428 get_u32 (zm->fp, &comp_size);
430 get_u32 (zm->fp, &ucomp_size);
431 get_u16 (zm->fp, &nlen);
432 get_u16 (zm->fp, &extra_len);
434 name = calloc (nlen + 1, sizeof (char));
436 get_bytes (zm->fp, name, nlen);
438 skip_bytes (zm->fp, extra_len);
440 if (strcmp (name, zm->name) != 0)
442 ds_put_format (zm->errs,
443 _("Name mismatch in zip archive. Central directory says `%s'; local file header says `%s'"),
452 zm->bytes_unread = zm->ucomp_size;
454 if ( ! decompressors[zm->compression].init (zm) )
461 zip_member_ref (struct zip_member *zm)
470 zip_member_unref (struct zip_member *zm)
475 if (--zm->ref_cnt == 0)
477 decompressors[zm->compression].finish (zm);
488 static bool probe_magic (FILE *fp, uint32_t magic, off_t start, off_t stop, off_t *off);
491 /* Search for something that looks like the End Of Central Directory in FP.
492 If found, the offset of the record will be placed in OFF.
493 Returns true if found false otherwise.
496 find_eocd (FILE *fp, off_t *off)
499 const uint32_t magic = MAGIC_EOCD;
502 /* The magic cannot be more than 22 bytes from the end of the file,
503 because that is the minimum length of the EndOfCentralDirectory
506 if ( 0 > fseeko (fp, -22, SEEK_END))
511 stop = start + sizeof (magic);
514 found = probe_magic (fp, magic, start, stop, off);
515 /* FIXME: For extra confidence lookup the directory start record here*/
518 stop = start + sizeof (magic);
528 Search FP for MAGIC starting at START and reaching until STOP.
529 Returns true iff MAGIC is found. False otherwise.
530 OFF receives the location of the magic.
533 probe_magic (FILE *fp, uint32_t magic, off_t start, off_t stop, off_t *off)
537 unsigned char seq[4];
540 if ( 0 > fseeko (fp, start, SEEK_SET))
545 for (i = 0; i < 4 ; ++i)
547 seq[i] = (magic >> i * 8) & 0xFF;
552 fread (&byte, 1, 1, fp);
554 if ( byte == seq[state])
561 *off = ftello (fp) - 4;