From: Ben Pfaff Date: Sun, 16 Jul 2017 03:22:10 +0000 (-0700) Subject: zip-reader: Read the whole central directory at .zip open time. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?p=pspp;a=commitdiff_plain;h=56c91a598fef068907bd8de975d5f49b9c02a045 zip-reader: Read the whole central directory at .zip open time. By reading the whole central directory when we open the .zip file, rather than one entry at a time, we make it easier to get a list of entries and make it available to the client. (This will be implemented in an upcoming commit.) This commit also fixes memory leaks in the error case in zip_member_open(). This commit also changes the usage model. Previously, if the caller opened a single member more than once, all the members shared a single copy of the member and read each other's data. Now, each caller that opens a member gets a separate copy and a separate file pointer. Callers now need to call zip_member_finish() when they're done with a member. With this commit, the code only keeps an open FILE for a member if the caller opened that member. Previously, it kept an open FILE for any member that had been encountered. --- diff --git a/src/data/ods-reader.c b/src/data/ods-reader.c index 457edeb4fc..ae9ac8e041 100644 --- a/src/data/ods-reader.c +++ b/src/data/ods-reader.c @@ -85,6 +85,7 @@ enum reader_state struct state_data { xmlTextReaderPtr xtr; + struct zip_member *zm; int node_type; enum reader_state state; int row; @@ -103,6 +104,9 @@ state_data_destroy (struct state_data *sd) xmlFreeTextReader (sd->xtr); sd->xtr = NULL; + + zip_member_finish (sd->zm); + sd->zm = NULL; } struct ods_reader @@ -538,6 +542,7 @@ get_sheet_count (struct zip_reader *zreader) { int s = _xmlchar_to_int (attr); xmlFreeTextReader (mxtr); + zip_member_finish (meta); xmlFree (name); xmlFree (attr); return s; @@ -548,6 +553,7 @@ get_sheet_count (struct zip_reader *zreader) } xmlFreeTextReader (mxtr); + zip_member_finish (meta); return -1; } @@ -565,8 +571,9 @@ ods_error_handler (void *ctx, const char *mesg, } -static xmlTextReaderPtr -init_reader (struct ods_reader *r, bool report_errors) +static bool +init_reader (struct ods_reader *r, bool report_errors, + struct state_data *state) { struct zip_member *content = zip_member_open (r->zreader, "content.xml"); xmlTextReaderPtr xtr; @@ -578,15 +585,18 @@ init_reader (struct ods_reader *r, bool report_errors) report_errors ? 0 : (XML_PARSE_NOERROR | XML_PARSE_NOWARNING) ); if ( xtr == NULL) - return false; + return false; + *state = (struct state_data) { .xtr = xtr, + .zm = content, + .state = STATE_INIT }; r->spreadsheet.type = SPREADSHEET_ODS; if (report_errors) xmlTextReaderSetErrorHandler (xtr, ods_error_handler, r); - return xtr; + return true; } @@ -596,7 +606,6 @@ ods_probe (const char *filename, bool report_errors) { int sheet_count; struct ods_reader *r = xzalloc (sizeof *r); - xmlTextReaderPtr xtr; struct zip_reader *zr; ds_init_empty (&r->zip_errs); @@ -620,17 +629,8 @@ ods_probe (const char *filename, bool report_errors) r->zreader = zr; r->spreadsheet.ref_cnt = 1; - xtr = init_reader (r, report_errors); - if (xtr == NULL) - { - goto error; - } - r->msd.xtr = xtr; - r->msd.row = 0; - r->msd.col = 0; - r->msd.current_sheet = 0; - r->msd.state = STATE_INIT; - + if (!init_reader (r, report_errors, &r->msd)) + goto error; r->spreadsheet.n_sheets = sheet_count; r->n_allocated_sheets = 0; @@ -657,7 +657,6 @@ ods_make_reader (struct spreadsheet *spreadsheet, int i; struct var_spec *var_spec = NULL; int n_var_specs = 0; - xmlTextReaderPtr xtr; struct ods_reader *r = (struct ods_reader *) spreadsheet; xmlChar *val_string = NULL; @@ -667,16 +666,9 @@ ods_make_reader (struct spreadsheet *spreadsheet, ds_init_empty (&r->ods_errs); ++r->spreadsheet.ref_cnt; - xtr = init_reader (r, true); - if ( xtr == NULL) + if (!init_reader (r, true, &r->rsd)) goto error; - r->rsd.xtr = xtr; - r->rsd.row = 0; - r->rsd.col = 0; - r->rsd.current_sheet = 0; - r->rsd.state = STATE_INIT; - r->used_first_case = false; r->first_case = NULL; @@ -894,7 +886,7 @@ ods_make_reader (struct spreadsheet *spreadsheet, &ods_file_casereader_class, r); error: - + for ( i = 0 ; i < n_var_specs ; ++i ) { free (var_spec[i].firstval.type); diff --git a/src/libpspp/zip-reader.c b/src/libpspp/zip-reader.c index 0f8c263328..ae8f5a57fb 100644 --- a/src/libpspp/zip-reader.c +++ b/src/libpspp/zip-reader.c @@ -28,7 +28,6 @@ #include #include -#include #include "str.h" @@ -45,13 +44,9 @@ struct zip_member uint32_t offset; /* Starting offset in file. */ uint32_t comp_size; /* Length of member file data, in bytes. */ uint32_t ucomp_size; /* Uncompressed length of member file data, in bytes. */ - uint32_t expected_crc; /* CRC-32 of member file data.. */ - char *name; /* Name of member file. */ - uint32_t crc; const struct decompressor *decompressor; size_t bytes_unread; /* Number of bytes left in the member available for reading */ - int ref_cnt; struct string *errmsgs; /* A string to hold error messages. This string is NOT owned by this object. */ void *aux; @@ -69,9 +64,8 @@ static const struct decompressor inflate_decompressor; static bool find_eocd (FILE *fp, off_t *off); static const struct decompressor * -get_decompressor (struct zip_member *zm, uint16_t c) +get_decompressor (uint16_t c) { - assert (zm->errmsgs); switch (c) { case 0: @@ -81,37 +75,39 @@ get_decompressor (struct zip_member *zm, uint16_t c) return &inflate_decompressor; default: - ds_put_format (zm->errmsgs, _("Unsupported compression type (%d)"), c); return NULL; } } - struct zip_reader { char *filename; /* The name of the file from which the data is read */ FILE *fr; /* The stream from which the meta data is read */ - uint16_t n_members; /* The number of members in this archive */ - struct zip_member **members; /* The members (may be null pointers until the headers have been read */ - int nm; + uint16_t n_entries; /* Number of directory entries. */ + struct zip_entry *entries; /* Directory entries. */ struct string *errs; }; +struct zip_entry +{ + uint32_t offset; /* Starting offset in file. */ + uint32_t comp_size; /* Length of member file data, in bytes. */ + uint32_t ucomp_size; /* Uncompressed length of member file data, in bytes. */ + char *name; /* Name of member file. */ +}; + void zip_member_finish (struct zip_member *zm) { - ds_clear (zm->errmsgs); - /* Probably not useful, because we would have to read right to the end of the member - if (zm->expected_crc != zm->crc) + if (zm) { - ds_put_cstr (zm->errs, _("CRC error reading zip")); + ds_clear (zm->errmsgs); + zm->decompressor->finish (zm); + fclose (zm->fp); + free (zm); } - */ - zip_member_unref (zm); } - - /* Destroy the zip reader */ void zip_reader_destroy (struct zip_reader *zr) @@ -123,22 +119,16 @@ zip_reader_destroy (struct zip_reader *zr) fclose (zr->fr); free (zr->filename); - for (i = 0; i < zr->n_members; ++i) + for (i = 0; i < zr->n_entries; ++i) { - zip_member_unref (zr->members[i]); + struct zip_entry *ze = &zr->entries[i]; + free (ze->name); } - free (zr->members); + free (zr->entries); free (zr); } -void -zm_dump (const struct zip_member *zm) -{ - printf ("%d\t%08x\t %s\n", zm->ucomp_size, zm->expected_crc, zm->name); -} - - /* Skip N bytes in F */ static void skip_bytes (FILE *f, size_t n) @@ -230,72 +220,51 @@ zip_member_read (struct zip_member *zm, void *buf, size_t bytes) if ( bytes_read < 0) return bytes_read; - zm->crc = crc32_update (zm->crc, buf, bytes_read); - zm->bytes_unread -= bytes_read; return bytes_read; } -/* - Read a local file header from ZR and add it to ZR's internal array. - Returns a pointer to the member read. This pointer belongs to ZR. - If the caller wishes to control it, she should ref it with - zip_member_ref. -*/ -static struct zip_member * -zip_header_read_next (struct zip_reader *zr) +/* Read a central directory header from ZR and initializes ZE with it. + Returns true if successful, false otherwise. */ +static bool +zip_header_read_next (struct zip_reader *zr, struct zip_entry *ze) { - struct zip_member *zm = xzalloc (sizeof *zm); - uint16_t v, nlen, extralen; uint16_t gp, time, date; + uint32_t expected_crc; uint16_t clen, diskstart, iattr; uint32_t eattr; uint16_t comp_type; - ds_clear (zr->errs); - zm->errmsgs = zr->errs; - if ( ! check_magic (zr->fr, MAGIC_SOCD, zr->errs)) - return NULL; - - if (! get_u16 (zr->fr, &v)) return NULL; - - if (! get_u16 (zr->fr, &v)) return NULL; - if (! get_u16 (zr->fr, &gp)) return NULL; - if (! get_u16 (zr->fr, &comp_type)) return NULL; - - zm->decompressor = get_decompressor (zm, comp_type); - if (! zm->decompressor) return NULL; - - if (! get_u16 (zr->fr, &time)) return NULL; - if (! get_u16 (zr->fr, &date)) return NULL; - if (! get_u32 (zr->fr, &zm->expected_crc)) return NULL; - if (! get_u32 (zr->fr, &zm->comp_size)) return NULL; - if (! get_u32 (zr->fr, &zm->ucomp_size)) return NULL; - if (! get_u16 (zr->fr, &nlen)) return NULL; - if (! get_u16 (zr->fr, &extralen)) return NULL; - if (! get_u16 (zr->fr, &clen)) return NULL; - if (! get_u16 (zr->fr, &diskstart)) return NULL; - if (! get_u16 (zr->fr, &iattr)) return NULL; - if (! get_u32 (zr->fr, &eattr)) return NULL; - if (! get_u32 (zr->fr, &zm->offset)) return NULL; + return false; - zm->name = xzalloc (nlen + 1); - if (! get_bytes (zr->fr, zm->name, nlen)) return NULL; + if (! get_u16 (zr->fr, &v)) return false; + if (! get_u16 (zr->fr, &v)) return false; + if (! get_u16 (zr->fr, &gp)) return false; + if (! get_u16 (zr->fr, &comp_type)) return false; + if (! get_u16 (zr->fr, &time)) return false; + if (! get_u16 (zr->fr, &date)) return false; + if (! get_u32 (zr->fr, &expected_crc)) return false; + if (! get_u32 (zr->fr, &ze->comp_size)) return false; + if (! get_u32 (zr->fr, &ze->ucomp_size)) return false; + if (! get_u16 (zr->fr, &nlen)) return false; + if (! get_u16 (zr->fr, &extralen)) return false; + if (! get_u16 (zr->fr, &clen)) return false; + if (! get_u16 (zr->fr, &diskstart)) return false; + if (! get_u16 (zr->fr, &iattr)) return false; + if (! get_u32 (zr->fr, &eattr)) return false; + if (! get_u32 (zr->fr, &ze->offset)) return false; + + ze->name = xzalloc (nlen + 1); + if (! get_bytes (zr->fr, ze->name, nlen)) return false; skip_bytes (zr->fr, extralen); - zr->members[zr->nm++] = zm; - - zm->fp = fopen (zr->filename, "rb"); - zm->ref_cnt = 1; - - - return zm; + return true; } @@ -303,7 +272,7 @@ zip_header_read_next (struct zip_reader *zr) struct zip_reader * zip_reader_create (const char *filename, struct string *errs) { - uint16_t disknum, total_members; + uint16_t disknum, n_members, total_members; off_t offset = 0; uint32_t central_dir_start, central_dir_length; @@ -312,8 +281,6 @@ zip_reader_create (const char *filename, struct string *errs) if ( zr->errs) ds_init_empty (zr->errs); - zr->nm = 0; - zr->fr = fopen (filename, "rb"); if (NULL == zr->fr) { @@ -357,7 +324,7 @@ zip_reader_create (const char *filename, struct string *errs) if (! get_u16 (zr->fr, &disknum) || ! get_u16 (zr->fr, &disknum) - || ! get_u16 (zr->fr, &zr->n_members) + || ! get_u16 (zr->fr, &n_members) || ! get_u16 (zr->fr, &total_members) || ! get_u32 (zr->fr, ¢ral_dir_length) @@ -377,133 +344,120 @@ zip_reader_create (const char *filename, struct string *errs) return NULL; } - zr->members = xcalloc (zr->n_members, sizeof (*zr->members)); - memset (zr->members, 0, zr->n_members * sizeof (*zr->members)); - zr->filename = xstrdup (filename); + zr->entries = xcalloc (n_members, sizeof *zr->entries); + for (int i = 0; i < n_members; i++) + { + if (!zip_header_read_next (zr, &zr->entries[zr->n_entries])) + { + zip_reader_destroy (zr); + return NULL; + } + zr->n_entries++; + } + return zr; } - +static struct zip_entry * +zip_entry_find (struct zip_reader *zr, const char *member) +{ + for (int i = 0; i < zr->n_entries; ++i) + { + struct zip_entry *ze = &zr->entries[i]; + if (0 == strcmp (ze->name, member)) + return ze; + } + return NULL; +} /* Return the member called MEMBER from the reader ZR */ struct zip_member * zip_member_open (struct zip_reader *zr, const char *member) { - uint16_t v, nlen, extra_len; - uint16_t gp, comp_type, time, date; - uint32_t ucomp_size, comp_size; - - uint32_t crc; - bool new_member = false; - char *name = NULL; - - int i; - struct zip_member *zm = NULL; - - if ( zr == NULL) - return NULL; - - for (i = 0; i < zr->n_members; ++i) - { - zm = zr->members[i]; - - if (zm == NULL) - { - zm = zr->members[i] = zip_header_read_next (zr); - new_member = true; - } - if (zm && 0 == strcmp (zm->name, member)) - break; - else - zm = NULL; - } - - if ( zm == NULL) - return NULL; - - if ( 0 != fseeko (zm->fp, zm->offset, SEEK_SET)) + struct zip_entry *ze = zip_entry_find (zr, member); + if ( ze == NULL) { - const char *mm = strerror (errno); - ds_put_format (zm->errmsgs, _("Failed to seek to start of member `%s': %s"), zm->name, mm); + ds_put_format (zr->errs, _("%s: unknown member"), member); return NULL; } - if ( ! check_magic (zm->fp, MAGIC_LHDR, zr->errs)) + FILE *fp = fopen (zr->filename, "rb"); + if (!fp) { + ds_put_cstr (zr->errs, strerror (errno)); return NULL; } - if (! get_u16 (zm->fp, &v)) return NULL; - if (! get_u16 (zm->fp, &gp)) return NULL; - if (! get_u16 (zm->fp, &comp_type)) return NULL; - zm->decompressor = get_decompressor (zm, comp_type); - if (! zm->decompressor) return NULL; - if (! get_u16 (zm->fp, &time)) return NULL; - if (! get_u16 (zm->fp, &date)) return NULL; - if (! get_u32 (zm->fp, &crc)) return NULL; - if (! get_u32 (zm->fp, &comp_size)) return NULL; - - if (! get_u32 (zm->fp, &ucomp_size)) return NULL; - if (! get_u16 (zm->fp, &nlen)) return NULL; - if (! get_u16 (zm->fp, &extra_len)) return NULL; - - name = xzalloc (nlen + 1); + struct zip_member *zm = xmalloc (sizeof *zm); + zm->fp = fp; + zm->offset = ze->offset; + zm->comp_size = ze->comp_size; + zm->ucomp_size = ze->ucomp_size; + zm->decompressor = NULL; + zm->bytes_unread = ze->ucomp_size; + zm->errmsgs = zr->errs; + zm->aux = NULL; - if (! get_bytes (zm->fp, name, nlen)) return NULL; + if ( 0 != fseeko (zm->fp, zm->offset, SEEK_SET)) + { + ds_put_format (zr->errs, _("Failed to seek to start of member `%s': %s"), + ze->name, strerror (errno)); + goto error; + } - skip_bytes (zm->fp, extra_len); + if ( ! check_magic (zm->fp, MAGIC_LHDR, zr->errs)) + goto error; - if (strcmp (name, zm->name) != 0) + uint16_t v, nlen, extra_len; + uint16_t gp, comp_type, time, date; + uint32_t ucomp_size, comp_size; + uint32_t crc; + if (! get_u16 (zm->fp, &v)) goto error; + if (! get_u16 (zm->fp, &gp)) goto error; + if (! get_u16 (zm->fp, &comp_type)) goto error; + zm->decompressor = get_decompressor (comp_type); + if (! zm->decompressor) goto error; + if (! get_u16 (zm->fp, &time)) goto error; + if (! get_u16 (zm->fp, &date)) goto error; + if (! get_u32 (zm->fp, &crc)) goto error; + if (! get_u32 (zm->fp, &comp_size)) goto error; + + if (! get_u32 (zm->fp, &ucomp_size)) goto error; + if (! get_u16 (zm->fp, &nlen)) goto error; + if (! get_u16 (zm->fp, &extra_len)) goto error; + + char *name = xzalloc (nlen + 1); + if (! get_bytes (zm->fp, name, nlen)) + { + free (name); + goto error; + } + if (strcmp (name, ze->name) != 0) { ds_put_format (zm->errmsgs, - _("Name mismatch in zip archive. Central directory says `%s'; local file header says `%s'"), - zm->name, name); + _("Name mismatch in zip archive. Central directory " + "says `%s'; local file header says `%s'"), + ze->name, name); free (name); - free (zm); - return NULL; + goto error; } - free (name); - zm->bytes_unread = zm->ucomp_size; - - if ( !new_member) - zm->decompressor->finish (zm); + skip_bytes (zm->fp, extra_len); if (!zm->decompressor->init (zm) ) - return NULL; + goto error; return zm; -} -void -zip_member_ref (struct zip_member *zm) -{ - zm->ref_cnt++; +error: + fclose (zm->fp); + free (zm); + return NULL; } - - - -void -zip_member_unref (struct zip_member *zm) -{ - if ( zm == NULL) - return; - - if (--zm->ref_cnt == 0) - { - zm->decompressor->finish (zm); - if (zm->fp) - fclose (zm->fp); - free (zm->name); - free (zm); - } -} - - static bool probe_magic (FILE *fp, uint32_t magic, off_t start, off_t stop, off_t *off); diff --git a/src/libpspp/zip-reader.h b/src/libpspp/zip-reader.h index 05642ffdc0..842864143d 100644 --- a/src/libpspp/zip-reader.h +++ b/src/libpspp/zip-reader.h @@ -22,8 +22,6 @@ struct zip_member; struct zip_reader; struct string; -void zm_dump (const struct zip_member *zm); - /* Create zip reader to read the file called FILENAME. If ERRS is non-null if will be used to contain any error messages which the reader wishes to report. @@ -40,12 +38,6 @@ struct zip_member *zip_member_open (struct zip_reader *zr, const char *member); Returns the number of bytes read, or -1 on error */ int zip_member_read (struct zip_member *zm, void *buf, size_t n); -/* Unref (and possibly destroy) the zip member ZM */ -void zip_member_unref (struct zip_member *zm); - -/* Ref the zip member */ -void zip_member_ref (struct zip_member *zm); - void zip_member_finish (struct zip_member *zm); diff --git a/tests/libpspp/zip-test.c b/tests/libpspp/zip-test.c index 011d123841..aea62824bb 100644 --- a/tests/libpspp/zip-test.c +++ b/tests/libpspp/zip-test.c @@ -96,6 +96,7 @@ main (int argc, char **argv) { fwrite (buf, x, 1, fp); } + zip_member_finish (zm); fclose (fp); if ( x < 0) {