X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=utilities%2Fpspp-dump-sav.c;h=36cb285380f564c93edbc19be87d4f6383ba43bb;hb=refs%2Fheads%2Fctables7;hp=a23ce6b42087d1cb5286d96024b5f8c790c6ab09;hpb=c6a0811bac0249e4f0d60ad979d37c2719c6a020;p=pspp diff --git a/utilities/pspp-dump-sav.c b/utilities/pspp-dump-sav.c index a23ce6b420..36cb285380 100644 --- a/utilities/pspp-dump-sav.c +++ b/utilities/pspp-dump-sav.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. + Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,11 +18,15 @@ #include #include +#include +#include #include +#include #include #include #include "data/val-type.h" +#include "libpspp/cast.h" #include "libpspp/compiler.h" #include "libpspp/float-format.h" #include "libpspp/integer-format.h" @@ -31,13 +35,19 @@ #include "gl/error.h" #include "gl/minmax.h" #include "gl/progname.h" +#include "gl/version-etc.h" #include "gl/xalloc.h" - -#include "gettext.h" -#define _(msgid) gettext (msgid) +#include "gl/xsize.h" #define ID_MAX_LEN 64 +enum compression + { + COMP_NONE, + COMP_SIMPLE, + COMP_ZLIB + }; + struct sfm_reader { const char *file_name; @@ -51,7 +61,7 @@ struct sfm_reader enum integer_format integer_format; enum float_format float_format; - bool compressed; + enum compression compression; double bias; }; @@ -64,6 +74,8 @@ static void read_machine_integer_info (struct sfm_reader *, size_t size, size_t count); static void read_machine_float_info (struct sfm_reader *, size_t size, size_t count); +static void read_extra_product_info (struct sfm_reader *, + size_t size, size_t count); static void read_mrsets (struct sfm_reader *, size_t size, size_t count); static void read_display_parameters (struct sfm_reader *, size_t size, size_t count); @@ -80,12 +92,15 @@ static void read_character_encoding (struct sfm_reader *r, size_t size, size_t count); static void read_long_string_value_labels (struct sfm_reader *r, size_t size, size_t count); +static void read_long_string_missing_values (struct sfm_reader *r, + size_t size, size_t count); static void read_unknown_extension (struct sfm_reader *, size_t size, size_t count); -static void read_compressed_data (struct sfm_reader *); +static void read_simple_compressed_data (struct sfm_reader *, int max_cases); +static void read_zlib_compressed_data (struct sfm_reader *); static struct text_record *open_text_record ( - struct sfm_reader *, size_t size); + struct sfm_reader *, size_t size, size_t count); static void close_text_record (struct text_record *); static bool read_variable_to_value_pair (struct text_record *, char **key, char **value); @@ -93,8 +108,9 @@ static char *text_tokenize (struct text_record *, int delimiter); static bool text_match (struct text_record *text, int c); static const char *text_parse_counted_string (struct text_record *); static size_t text_pos (const struct text_record *); +static const char *text_get_all (const struct text_record *); -static void usage (int exit_code); +static void usage (void); static void sys_warn (struct sfm_reader *, const char *, ...) PRINTF_FORMAT (2, 3); static void sys_error (struct sfm_reader *, const char *, ...) @@ -110,17 +126,58 @@ static void read_string (struct sfm_reader *, char *, size_t); static void skip_bytes (struct sfm_reader *, size_t); static void trim_spaces (char *); +static void print_string (const char *s, size_t len); + int main (int argc, char *argv[]) { + int max_cases = 0; struct sfm_reader r; int i; set_program_name (argv[0]); - if (argc < 2) - usage (EXIT_FAILURE); - for (i = 1; i < argc; i++) + for (;;) + { + static const struct option long_options[] = + { + { "data", optional_argument, NULL, 'd' }, + { "help", no_argument, NULL, 'h' }, + { "version", no_argument, NULL, 'v' }, + { NULL, 0, NULL, 0 }, + }; + + int c; + + c = getopt_long (argc, argv, "d::hv", long_options, NULL); + if (c == -1) + break; + + switch (c) + { + case 'd': + max_cases = optarg ? atoi (optarg) : INT_MAX; + break; + + case 'v': + version_etc (stdout, "pspp-dump-sav", PACKAGE_NAME, PACKAGE_VERSION, + "Ben Pfaff", "John Darrington", NULL_SENTINEL); + exit (EXIT_SUCCESS); + + case 'h': + usage (); + exit (EXIT_SUCCESS); + + default: + exit (EXIT_FAILURE); + } + } + + if (optind == argc) + error (1, 0, "at least one non-option argument is required; " + "use --help for help"); + + for (i = optind; i < argc; i++) { int rec_type; @@ -133,11 +190,11 @@ main (int argc, char *argv[]) r.n_var_widths = 0; r.allocated_var_widths = 0; r.var_widths = 0; - r.compressed = false; + r.compression = COMP_NONE; - if (argc > 2) + if (argc - optind > 1) printf ("Reading \"%s\":\n", r.file_name); - + read_header (&r); while ((rec_type = read_int (&r)) != 999) { @@ -152,7 +209,7 @@ main (int argc, char *argv[]) break; case 4: - sys_error (&r, _("Misplaced type 4 record.")); + sys_error (&r, "Misplaced type 4 record."); case 6: read_document_record (&r); @@ -163,7 +220,7 @@ main (int argc, char *argv[]) break; default: - sys_error (&r, _("Unrecognized record type %d."), rec_type); + sys_error (&r, "Unrecognized record type %d.", rec_type); } } printf ("%08llx: end-of-dictionary record " @@ -171,12 +228,19 @@ main (int argc, char *argv[]) (long long int) ftello (r.file), (long long int) ftello (r.file) + 4); - if (r.compressed) - read_compressed_data (&r); + if (r.compression == COMP_SIMPLE) + { + if (max_cases > 0) + read_simple_compressed_data (&r, max_cases); + } + else if (r.compression == COMP_ZLIB) + read_zlib_compressed_data (&r); + + free (r.var_widths); fclose (r.file); } - + return 0; } @@ -187,7 +251,6 @@ read_header (struct sfm_reader *r) char eye_catcher[61]; uint8_t raw_layout_code[4]; int32_t layout_code; - int32_t nominal_case_size; int32_t compressed; int32_t weight_index; int32_t ncases; @@ -195,12 +258,17 @@ read_header (struct sfm_reader *r) char creation_date[10]; char creation_time[9]; char file_label[65]; + bool zmagic; read_string (r, rec_type, sizeof rec_type); read_string (r, eye_catcher, sizeof eye_catcher); - if (strcmp ("$FL2", rec_type) != 0) - sys_error (r, _("This is not an SPSS system file.")); + if (!strcmp ("$FL2", rec_type)) + zmagic = false; + else if (!strcmp ("$FL3", rec_type)) + zmagic = true; + else + sys_error (r, "This is not an SPSS system file."); /* Identify integer format. */ read_bytes (r, raw_layout_code, sizeof raw_layout_code); @@ -210,24 +278,40 @@ read_header (struct sfm_reader *r) &r->integer_format)) || (r->integer_format != INTEGER_MSB_FIRST && r->integer_format != INTEGER_LSB_FIRST)) - sys_error (r, _("This is not an SPSS system file.")); + sys_error (r, "This is not an SPSS system file."); layout_code = integer_get (r->integer_format, raw_layout_code, sizeof raw_layout_code); - nominal_case_size = read_int (r); + read_int (r); /* Nominal case size (not actually useful). */ compressed = read_int (r); weight_index = read_int (r); ncases = read_int (r); - r->compressed = compressed != 0; + if (!zmagic) + { + if (compressed == 0) + r->compression = COMP_NONE; + else if (compressed == 1) + r->compression = COMP_SIMPLE; + else if (compressed != 0) + sys_error (r, "SAV file header has invalid compression value " + "%"PRId32".", compressed); + } + else + { + if (compressed == 2) + r->compression = COMP_ZLIB; + else + sys_error (r, "ZSAV file header has invalid compression value " + "%"PRId32".", compressed); + } /* Identify floating-point format and obtain compression bias. */ read_bytes (r, raw_bias, sizeof raw_bias); if (float_identify (100.0, raw_bias, sizeof raw_bias, &r->float_format) == 0) { - sys_warn (r, _("Compression bias is not the usual " - "value of 100, or system file uses unrecognized " - "floating-point format.")); + sys_warn (r, "Compression bias is not the usual value of 100, or system " + "file uses unrecognized floating-point format."); if (r->integer_format == INTEGER_MSB_FIRST) r->float_format = FLOAT_IEEE_DOUBLE_BE; else @@ -244,10 +328,15 @@ read_header (struct sfm_reader *r) printf ("File header record:\n"); printf ("\t%17s: %s\n", "Product name", eye_catcher); printf ("\t%17s: %"PRId32"\n", "Layout code", layout_code); - printf ("\t%17s: %"PRId32"\n", "Compressed", compressed); + printf ("\t%17s: %"PRId32" (%s)\n", "Compressed", + compressed, + r->compression == COMP_NONE ? "no compression" + : r->compression == COMP_SIMPLE ? "simple compression" + : r->compression == COMP_ZLIB ? "ZLIB compression" + : ""); printf ("\t%17s: %"PRId32"\n", "Weight index", weight_index); printf ("\t%17s: %"PRId32"\n", "Number of cases", ncases); - printf ("\t%17s: %g\n", "Compression bias", r->bias); + printf ("\t%17s: %.*g\n", "Compression bias", DBL_DIG + 1, r->bias); printf ("\t%17s: %s\n", "Creation date", creation_date); printf ("\t%17s: %s\n", "Creation time", creation_time); printf ("\t%17s: \"%s\"\n", "File label", file_label); @@ -293,6 +382,8 @@ format_name (int format) case 37: return "CCE"; case 38: return "EDATE"; case 39: return "SDATE"; + case 40: return "MTIME"; + case 41: return "YMDHMS"; default: return "invalid"; } } @@ -312,7 +403,7 @@ read_variable_record (struct sfm_reader *r) char name[9]; printf ("%08llx: variable record #%d\n", - (long long int) ftello (r->file), r->n_variable_records++); + (long long int) ftello (r->file), ++r->n_variable_records); width = read_int (r); has_variable_label = read_int (r); @@ -354,22 +445,18 @@ read_variable_record (struct sfm_reader *r) /* Get variable label, if any. */ if (has_variable_label != 0 && has_variable_label != 1) - sys_error (r, _("Variable label indicator field is not 0 or 1.")); + sys_error (r, "Variable label indicator field is not 0 or 1."); if (has_variable_label == 1) { long long int offset = ftello (r->file); - size_t len, read_len; - char label[255 + 1]; + enum { MAX_LABEL_LEN = 65536 }; - len = read_int (r); - - /* Read up to 255 bytes of label. */ - read_len = MIN (sizeof label - 1, len); + size_t len = read_int (r); + size_t read_len = MIN (MAX_LABEL_LEN, len); + char *label = xmalloc (read_len + 1); read_string (r, label, read_len + 1); printf("\t%08llx Variable label: \"%s\"\n", offset, label); - - /* Skip unread label bytes. */ - skip_bytes (r, len - read_len); + free (label); /* Skip label padding up to multiple of 4 bytes. */ skip_bytes (r, ROUND_UP (len, 4) - len); @@ -385,23 +472,23 @@ read_variable_record (struct sfm_reader *r) { if (missing_value_code < -3 || missing_value_code > 3 || missing_value_code == -1) - sys_error (r, _("Numeric missing value indicator field is not " - "-3, -2, 0, 1, 2, or 3.")); + sys_error (r, "Numeric missing value indicator field is not " + "-3, -2, 0, 1, 2, or 3."); if (missing_value_code < 0) { double low = read_float (r); double high = read_float (r); - printf (" %g...%g", low, high); + printf (" %.*g...%.*g", DBL_DIG + 1, low, DBL_DIG + 1, high); missing_value_code = -missing_value_code - 2; } for (i = 0; i < missing_value_code; i++) - printf (" %g", read_float (r)); + printf (" %.*g", DBL_DIG + 1, read_float (r)); } else if (width > 0) { if (missing_value_code < 1 || missing_value_code > 3) - sys_error (r, _("String missing value indicator field is not " - "0, 1, 2, or 3.")); + sys_error (r, "String missing value indicator field is not " + "0, 1, 2, or 3."); for (i = 0; i < missing_value_code; i++) { char string[9]; @@ -420,11 +507,11 @@ print_untyped_value (struct sfm_reader *r, char raw_value[8]) double value; value = float_get_double (r->float_format, raw_value); - for (n_printable = 0; n_printable < sizeof raw_value; n_printable++) + for (n_printable = 0; n_printable < 8; n_printable++) if (!isprint (raw_value[n_printable])) break; - printf ("%g/\"%.*s\"", value, n_printable, raw_value); + printf ("%.*g/\"%.*s\"", DBL_DIG + 1, value, n_printable, raw_value); } /* Reads value labels from sysfile R and inserts them into the @@ -432,14 +519,14 @@ print_untyped_value (struct sfm_reader *r, char raw_value[8]) static void read_value_label_record (struct sfm_reader *r) { - int label_cnt, var_cnt; + int n_labels, n_vars; int i; printf ("%08llx: value labels record\n", (long long int) ftello (r->file)); /* Read number of labels. */ - label_cnt = read_int (r); - for (i = 0; i < label_cnt; i++) + n_labels = read_int (r); + for (i = 0; i < n_labels; i++) { char raw_value[8]; unsigned char label_len; @@ -466,14 +553,14 @@ read_value_label_record (struct sfm_reader *r) /* Read record type of type 4 record. */ if (read_int (r) != 4) - sys_error (r, _("Variable index record (type 4) does not immediately " - "follow value label record (type 3) as it should.")); + sys_error (r, "Variable index record (type 4) does not immediately " + "follow value label record (type 3) as it should."); /* Read number of variables associated with value label from type 4 record. */ printf ("\t%08llx: apply to variables", (long long int) ftello (r->file)); - var_cnt = read_int (r); - for (i = 0; i < var_cnt; i++) + n_vars = read_int (r); + for (i = 0; i < n_vars; i++) printf (" #%d", read_int (r)); putchar ('\n'); } @@ -520,12 +607,6 @@ read_extension_record (struct sfm_reader *r) read_machine_float_info (r, size, count); return; - case 5: - /* Variable sets information. We don't use these yet. - They only apply to GUIs; see VARSETS on the APPLY - DICTIONARY command in SPSS documentation. */ - break; - case 6: /* DATE variable information. We don't use it yet, but we should. */ @@ -536,6 +617,10 @@ read_extension_record (struct sfm_reader *r) read_mrsets (r, size, count); return; + case 10: + read_extra_product_info (r, size, count); + return; + case 11: read_display_parameters (r, size, count); return; @@ -568,8 +653,12 @@ read_extension_record (struct sfm_reader *r) read_long_string_value_labels (r, size, count); return; + case 22: + read_long_string_missing_values (r, size, count); + return; + default: - sys_warn (r, _("Unrecognized record type 7, subtype %d."), subtype); + sys_warn (r, "Unrecognized record type 7, subtype %d.", subtype); read_unknown_extension (r, size, count); return; } @@ -592,9 +681,8 @@ read_machine_integer_info (struct sfm_reader *r, size_t size, size_t count) printf ("%08llx: machine integer info\n", offset); if (size != 4 || count != 8) - sys_error (r, _("Bad size (%zu) or count (%zu) field on record type 7, " - "subtype 3."), - size, count); + sys_error (r, "Bad size (%zu) or count (%zu) field on record type 7, " + "subtype 3.", size, count); printf ("\tVersion: %d.%d.%d\n", version_major, version_minor, version_revision); @@ -623,23 +711,37 @@ read_machine_float_info (struct sfm_reader *r, size_t size, size_t count) printf ("%08llx: machine float info\n", offset); if (size != 8 || count != 3) - sys_error (r, _("Bad size (%zu) or count (%zu) on extension 4."), + sys_error (r, "Bad size (%zu) or count (%zu) on extension 4.", size, count); - printf ("\tsysmis: %g\n", sysmis); + printf ("\tsysmis: %.*g (%a)\n", DBL_DIG + 1, sysmis, sysmis); if (sysmis != SYSMIS) - sys_warn (r, _("File specifies unexpected value %g as %s."), - sysmis, "SYSMIS"); + sys_warn (r, "File specifies unexpected value %.*g (%a) as %s.", + DBL_DIG + 1, sysmis, sysmis, "SYSMIS"); - printf ("\thighest: %g\n", highest); + printf ("\thighest: %.*g (%a)\n", DBL_DIG + 1, highest, highest); if (highest != HIGHEST) - sys_warn (r, _("File specifies unexpected value %g as %s."), - highest, "HIGHEST"); + sys_warn (r, "File specifies unexpected value %.*g (%a) as %s.", + DBL_DIG + 1, highest, highest, "HIGHEST"); - printf ("\tlowest: %g\n", lowest); - if (lowest != LOWEST) - sys_warn (r, _("File specifies unexpected value %g as %s."), - lowest, "LOWEST"); + printf ("\tlowest: %.*g (%a)\n", DBL_DIG + 1, lowest, lowest); + if (lowest != LOWEST && lowest != SYSMIS) + sys_warn (r, "File specifies unexpected value %.*g (%a) as %s.", + DBL_DIG + 1, lowest, lowest, "LOWEST"); +} + +static void +read_extra_product_info (struct sfm_reader *r, + size_t size, size_t count) +{ + struct text_record *text; + const char *s; + + printf ("%08llx: extra product info\n", (long long int) ftello (r->file)); + text = open_text_record (r, size, count); + s = text_get_all (text); + print_string (s, strlen (s)); + close_text_record (text); } /* Read record type 7, subtype 7. */ @@ -650,7 +752,7 @@ read_mrsets (struct sfm_reader *r, size_t size, size_t count) printf ("%08llx: multiple response sets\n", (long long int) ftello (r->file)); - text = open_text_record (r, size * count); + text = open_text_record (r, size, count); for (;;) { const char *name; @@ -661,6 +763,9 @@ read_mrsets (struct sfm_reader *r, size_t size, size_t count) const char *label; const char *variables; + while (text_match (text, '\n')) + continue; + name = text_tokenize (text, '='); if (name == NULL) break; @@ -689,17 +794,21 @@ read_mrsets (struct sfm_reader *r, size_t size, size_t count) if (!text_match (text, ' ')) { - sys_warn (r, _("Missing space following `%c' at offset %zu " - "in MRSETS record"), 'E', text_pos (text)); + sys_warn (r, "Missing space following `%c' at offset %zu " + "in MRSETS record", 'E', text_pos (text)); break; } number = text_tokenize (text, ' '); - if (!strcmp (number, "11")) + if (!number) + sys_warn (r, "Missing label source value " + "following `E' at offset %zu in MRSETS record", + text_pos (text)); + else if (!strcmp (number, "11")) label_from_var_label = true; else if (strcmp (number, "1")) - sys_warn (r, _("Unexpected label source value `%s' " - "following `E' at offset %zu in MRSETS record"), + sys_warn (r, "Unexpected label source value `%s' " + "following `E' at offset %zu in MRSETS record", number, text_pos (text)); } @@ -722,12 +831,6 @@ read_mrsets (struct sfm_reader *r, size_t size, size_t count) break; variables = text_tokenize (text, '\n'); - if (variables == NULL) - { - sys_warn (r, "missing variable names following label " - "at offset %zu in mrsets record", text_pos (text)); - break; - } printf ("\t\"%s\": multiple %s set", name, type == MRSET_MC ? "category" : "dichotomy"); @@ -739,7 +842,10 @@ read_mrsets (struct sfm_reader *r, size_t size, size_t count) printf (", label \"%s\"", label); if (label_from_var_label) printf (", label from variable label"); - printf(", variables \"%s\"\n", variables); + if (variables != NULL) + printf(", variables \"%s\"\n", variables); + else + printf(", no variables\n"); } close_text_record (text); } @@ -756,7 +862,7 @@ read_display_parameters (struct sfm_reader *r, size_t size, size_t count) (long long int) ftello (r->file)); if (size != 4) { - sys_warn (r, _("Bad size %zu on extension 11."), size); + sys_warn (r, "Bad size %zu on extension 11.", size); skip_bytes (r, size * count); return; } @@ -768,7 +874,7 @@ read_display_parameters (struct sfm_reader *r, size_t size, size_t count) includes_width = false; else { - sys_warn (r, _("Extension 11 has bad count %zu (for %zu variables)."), + sys_warn (r, "Extension 11 has bad count %zu (for %zu variables.", count, n_vars); skip_bytes (r, size * count); return; @@ -806,7 +912,7 @@ read_long_var_name_map (struct sfm_reader *r, size_t size, size_t count) printf ("%08llx: long variable names (short => long)\n", (long long int) ftello (r->file)); - text = open_text_record (r, size * count); + text = open_text_record (r, size, count); while (read_variable_to_value_pair (text, &var, &long_name)) printf ("\t%s => %s\n", var, long_name); close_text_record (text); @@ -823,7 +929,7 @@ read_long_string_map (struct sfm_reader *r, size_t size, size_t count) printf ("%08llx: very long strings (variable => length)\n", (long long int) ftello (r->file)); - text = open_text_record (r, size * count); + text = open_text_record (r, size, count); while (read_variable_to_value_pair (text, &var, &length_s)) printf ("\t%s => %d\n", var, atoi (length_s)); close_text_record (text); @@ -836,25 +942,25 @@ read_attributes (struct sfm_reader *r, struct text_record *text, const char *key; int index; - for (;;) + for (;;) { key = text_tokenize (text, '('); if (key == NULL) return true; - + for (index = 1; ; index++) { /* Parse the value. */ const char *value = text_tokenize (text, '\n'); - if (value == NULL) + if (value == NULL) { - sys_warn (r, _("%s: Error parsing attribute value %s[%d]"), + sys_warn (r, "%s: Error parsing attribute value %s[%d]", variable, key, index); return false; } if (strlen (value) < 2 || value[0] != '\'' || value[strlen (value) - 1] != '\'') - sys_warn (r, _("%s: Attribute value %s[%d] is not quoted: %s"), + sys_warn (r, "%s: Attribute value %s[%d] is not quoted: %s", variable, key, index, value); else printf ("\t%s: %s[%d] = \"%.*s\"\n", @@ -866,7 +972,7 @@ read_attributes (struct sfm_reader *r, struct text_record *text, } if (text_match (text, '/')) - return true; + return true; } } @@ -878,13 +984,13 @@ read_ncases64 (struct sfm_reader *r, size_t size, size_t count) if (size != 8) { - sys_warn (r, _("Bad size %zu for extended number of cases."), size); + sys_warn (r, "Bad size %zu for extended number of cases.", size); skip_bytes (r, size * count); return; } if (count != 2) { - sys_warn (r, _("Bad count %zu for extended number of cases."), size); + sys_warn (r, "Bad count %zu for extended number of cases.", size); skip_bytes (r, size * count); return; } @@ -896,12 +1002,12 @@ read_ncases64 (struct sfm_reader *r, size_t size, size_t count) } static void -read_datafile_attributes (struct sfm_reader *r, size_t size, size_t count) +read_datafile_attributes (struct sfm_reader *r, size_t size, size_t count) { struct text_record *text; - + printf ("%08llx: datafile attributes\n", (long long int) ftello (r->file)); - text = open_text_record (r, size * count); + text = open_text_record (r, size, count); read_attributes (r, text, "datafile"); close_text_record (text); } @@ -914,6 +1020,8 @@ read_character_encoding (struct sfm_reader *r, size_t size, size_t count) read_string (r, encoding, count + 1); printf ("%08llx: Character Encoding: %s\n", posn, encoding); + + free (encoding); } static void @@ -934,8 +1042,8 @@ read_long_string_value_labels (struct sfm_reader *r, size_t size, size_t count) /* Read variable name. */ var_name_len = read_int (r); if (var_name_len > ID_MAX_LEN) - sys_error (r, _("Variable name length in long string value label " - "record (%d) exceeds %d-byte limit."), + sys_error (r, "Variable name length in long string value label " + "record (%d) exceeds %d-byte limit.", var_name_len, ID_MAX_LEN); read_string (r, var_name, var_name_len + 1); @@ -976,6 +1084,56 @@ read_long_string_value_labels (struct sfm_reader *r, size_t size, size_t count) } } +static void +read_long_string_missing_values (struct sfm_reader *r, + size_t size, size_t count) +{ + long long int start = ftello (r->file); + + printf ("%08llx: long string missing values\n", start); + while (ftello (r->file) - start < size * count) + { + long long posn = ftello (r->file); + char var_name[ID_MAX_LEN + 1]; + uint8_t n_missing_values; + int var_name_len; + int i; + + /* Read variable name. */ + var_name_len = read_int (r); + if (var_name_len > ID_MAX_LEN) + sys_error (r, "Variable name length in long string value label " + "record (%d) exceeds %d-byte limit.", + var_name_len, ID_MAX_LEN); + read_string (r, var_name, var_name_len + 1); + + /* Read number of values. */ + read_bytes (r, &n_missing_values, 1); + + printf ("\t%08llx: %s, %d missing values:", + posn, var_name, n_missing_values); + + /* Read values. */ + for (i = 0; i < n_missing_values; i++) + { + char *value; + int value_length; + + posn = ftello (r->file); + + /* Read value. */ + value_length = read_int (r); + value = xmalloc (value_length + 1); + read_string (r, value, value_length + 1); + + printf (" \"%s\"", value); + + free (value); + } + printf ("\n"); + } +} + static void hex_dump (size_t offset, const void *buffer_, size_t buffer_size) { @@ -1032,42 +1190,29 @@ read_unknown_extension (struct sfm_reader *r, size_t size, size_t count) { buffer = xmalloc (count); read_bytes (r, buffer, count); - if (memchr (buffer, 0, count) == 0) - for (i = 0; i < count; i++) - { - unsigned char c = buffer[i]; - - if (c == '\\') - printf ("\\\\"); - else if (c == '\n' || isprint (c)) - putchar (c); - else - printf ("\\%02x", c); - } - else - hex_dump (0, buffer, count); + print_string (CHAR_CAST (char *, buffer), count); free (buffer); } } static void -read_variable_attributes (struct sfm_reader *r, size_t size, size_t count) +read_variable_attributes (struct sfm_reader *r, size_t size, size_t count) { struct text_record *text; - + printf ("%08llx: variable attributes\n", (long long int) ftello (r->file)); - text = open_text_record (r, size * count); - for (;;) + text = open_text_record (r, size, count); + for (;;) { const char *variable = text_tokenize (text, ':'); if (variable == NULL || !read_attributes (r, text, variable)) - break; + break; } close_text_record (text); } static void -read_compressed_data (struct sfm_reader *r) +read_simple_compressed_data (struct sfm_reader *r, int max_cases) { enum { N_OPCODES = 8 }; uint8_t opcodes[N_OPCODES]; @@ -1082,11 +1227,11 @@ read_compressed_data (struct sfm_reader *r) opcode_idx = N_OPCODES; opcode_ofs = 0; case_num = 0; - for (case_num = 0; ; case_num++) + for (case_num = 0; case_num < max_cases; case_num++) { printf ("%08llx: case %d's uncompressible data begins\n", (long long int) ftello (r->file), case_num); - for (i = 0; i < r->n_var_widths; ) + for (i = 0; i < r->n_var_widths;) { int width = r->var_widths[i]; char raw_value[8]; @@ -1111,7 +1256,7 @@ read_compressed_data (struct sfm_reader *r) switch (opcode) { default: - printf ("%g", opcode - r->bias); + printf ("%.*g", DBL_DIG + 1, opcode - r->bias); if (width != 0) printf (", but this is a string variable (width=%d)", width); printf ("\n"); @@ -1155,6 +1300,87 @@ read_compressed_data (struct sfm_reader *r) } } } + +static void +read_zlib_compressed_data (struct sfm_reader *r) +{ + long long int ofs; + long long int this_ofs, next_ofs, next_len; + long long int bias, zero; + long long int expected_uncmp_ofs, expected_cmp_ofs; + unsigned int block_size, n_blocks; + unsigned int i; + + read_int (r); + ofs = ftello (r->file); + printf ("\n%08llx: ZLIB compressed data header:\n", ofs); + + this_ofs = read_int64 (r); + next_ofs = read_int64 (r); + next_len = read_int64 (r); + + printf ("\tzheader_ofs: 0x%llx\n", this_ofs); + if (this_ofs != ofs) + printf ("\t\t(Expected 0x%llx.)\n", ofs); + printf ("\tztrailer_ofs: 0x%llx\n", next_ofs); + printf ("\tztrailer_len: %lld\n", next_len); + if (next_len < 24 || next_len % 24) + printf ("\t\t(Trailer length is not a positive multiple of 24.)\n"); + + printf ("\n%08llx: 0x%llx bytes of ZLIB compressed data\n", + ofs + 8 * 3, next_ofs - (ofs + 8 * 3)); + + skip_bytes (r, next_ofs - (ofs + 8 * 3)); + + printf ("\n%08llx: ZLIB trailer fixed header:\n", next_ofs); + bias = read_int64 (r); + zero = read_int64 (r); + block_size = read_int (r); + n_blocks = read_int (r); + printf ("\tbias: %lld\n", bias); + printf ("\tzero: 0x%llx\n", zero); + if (zero != 0) + printf ("\t\t(Expected 0.)\n"); + printf ("\tblock_size: 0x%x\n", block_size); + if (block_size != 0x3ff000) + printf ("\t\t(Expected 0x3ff000.)\n"); + printf ("\tn_blocks: %u\n", n_blocks); + if (n_blocks != next_len / 24 - 1) + printf ("\t\t(Expected %llu.)\n", next_len / 24 - 1); + + expected_uncmp_ofs = ofs; + expected_cmp_ofs = ofs + 24; + for (i = 0; i < n_blocks; i++) + { + long long int blockinfo_ofs = ftello (r->file); + unsigned long long int uncompressed_ofs = read_int64 (r); + unsigned long long int compressed_ofs = read_int64 (r); + unsigned int uncompressed_size = read_int (r); + unsigned int compressed_size = read_int (r); + + printf ("\n%08llx: ZLIB block descriptor %d\n", blockinfo_ofs, i + 1); + + printf ("\tuncompressed_ofs: 0x%llx\n", uncompressed_ofs); + if (uncompressed_ofs != expected_uncmp_ofs) + printf ("\t\t(Expected 0x%llx.)\n", ofs); + + printf ("\tcompressed_ofs: 0x%llx\n", compressed_ofs); + if (compressed_ofs != expected_cmp_ofs) + printf ("\t\t(Expected 0x%llx.)\n", ofs + 24); + + printf ("\tuncompressed_size: 0x%x\n", uncompressed_size); + if (i < n_blocks - 1 && uncompressed_size != block_size) + printf ("\t\t(Expected 0x%x.)\n", block_size); + + printf ("\tcompressed_size: 0x%x\n", compressed_size); + if (i == n_blocks - 1 && compressed_ofs + compressed_size != next_ofs) + printf ("\t\t(This was expected to be 0x%llx.)\n", + next_ofs - compressed_size); + + expected_uncmp_ofs += uncompressed_size; + expected_cmp_ofs += compressed_size; + } +} /* Helpers for reading records that consist of structured text strings. */ @@ -1168,18 +1394,23 @@ struct text_record size_t pos; /* Current position in buffer. */ }; -/* Reads SIZE bytes into a text record for R, +/* Reads SIZE * COUNT bytes into a text record for R, and returns the new text record. */ static struct text_record * -open_text_record (struct sfm_reader *r, size_t size) +open_text_record (struct sfm_reader *r, size_t size, size_t count) { struct text_record *text = xmalloc (sizeof *text); - char *buffer = xmalloc (size + 1); - read_bytes (r, buffer, size); - buffer[size] = '\0'; + + if (size_overflow_p (xsum (1, xtimes (size, count)))) + sys_error (r, "Extension record too large."); + + size_t n_bytes = size * count; + char *buffer = xmalloc (n_bytes + 1); + read_bytes (r, buffer, n_bytes); + buffer[n_bytes] = '\0'; text->reader = r; text->buffer = buffer; - text->size = size; + text->size = n_bytes; text->pos = 0; return text; } @@ -1209,9 +1440,9 @@ text_tokenize (struct text_record *text, int delimiter) } static bool -text_match (struct text_record *text, int c) +text_match (struct text_record *text, int c) { - if (text->pos < text->size && text->buffer[text->pos] == c) + if (text->pos < text->size && text->buffer[text->pos] == c) { text->pos++; return true; @@ -1293,14 +1524,26 @@ text_pos (const struct text_record *text) { return text->pos; } + +static const char * +text_get_all (const struct text_record *text) +{ + return text->buffer; +} static void -usage (int exit_code) +usage (void) { - printf ("usage: %s SYSFILE...\n" - "where each SYSFILE is the name of a system file\n", - program_name); - exit (exit_code); + printf ("\ +%s, a utility for dissecting system files.\n\ +Usage: %s [OPTION]... SYSFILE...\n\ +where each SYSFILE is the name of a system file.\n\ +\n\ +Options:\n\ + --data[=MAXCASES] print (up to MAXCASES cases of) compressed data\n\ + --help display this help and exit\n\ + --version output version information and exit\n", + program_name, program_name); } /* Displays a corruption message. */ @@ -1347,15 +1590,15 @@ sys_error (struct sfm_reader *r, const char *format, ...) too. */ static inline bool read_bytes_internal (struct sfm_reader *r, bool eof_is_ok, - void *buf, size_t byte_cnt) + void *buf, size_t n_bytes) { - size_t bytes_read = fread (buf, 1, byte_cnt, r->file); - if (bytes_read == byte_cnt) + size_t bytes_read = fread (buf, 1, n_bytes, r->file); + if (bytes_read == n_bytes) return true; else if (ferror (r->file)) - sys_error (r, _("System error: %s."), strerror (errno)); + sys_error (r, "System error: %s.", strerror (errno)); else if (!eof_is_ok || bytes_read != 0) - sys_error (r, _("Unexpected end of file.")); + sys_error (r, "Unexpected end of file."); else return false; } @@ -1363,9 +1606,9 @@ read_bytes_internal (struct sfm_reader *r, bool eof_is_ok, /* Reads BYTE_CNT into BUF. Aborts upon I/O error or if end-of-file is encountered. */ static void -read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt) +read_bytes (struct sfm_reader *r, void *buf, size_t n_bytes) { - read_bytes_internal (r, false, buf, byte_cnt); + read_bytes_internal (r, false, buf, n_bytes); } /* Reads BYTE_CNT bytes into BUF. @@ -1373,9 +1616,9 @@ read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt) Returns false if an immediate end-of-file is encountered. Aborts if an I/O error or a partial read occurs. */ static bool -try_read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt) +try_read_bytes (struct sfm_reader *r, void *buf, size_t n_bytes) { - return read_bytes_internal (r, true, buf, byte_cnt); + return read_bytes_internal (r, true, buf, n_bytes); } /* Reads a 32-bit signed integer from R and returns its value in @@ -1439,3 +1682,27 @@ trim_spaces (char *s) end--; *end = '\0'; } + +static void +print_string (const char *s, size_t len) +{ + if (memchr (s, 0, len) == 0) + { + size_t i; + + for (i = 0; i < len; i++) + { + unsigned char c = s[i]; + + if (c == '\\') + printf ("\\\\"); + else if (c == '\n' || isprint (c)) + putchar (c); + else + printf ("\\%02x", c); + } + putchar ('\n'); + } + else + hex_dump (0, s, len); +}