X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=utilities%2Fpspp-dump-sav.c;h=f207d8ecbf0f3362f2daf5e4f7d9245714555c86;hb=65481c834dca537af5851fa2b7efe209f09af613;hp=307add8db3b56b339afe79f3d0e7d732c6c21236;hpb=41f83a8f1c88489e4fc458fb7661430ff91d8a61;p=pspp diff --git a/utilities/pspp-dump-sav.c b/utilities/pspp-dump-sav.c index 307add8db3..f207d8ecbf 100644 --- a/utilities/pspp-dump-sav.c +++ b/utilities/pspp-dump-sav.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. + Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -39,6 +40,13 @@ #define ID_MAX_LEN 64 +enum compression + { + COMP_NONE, + COMP_SIMPLE, + COMP_ZLIB + }; + struct sfm_reader { const char *file_name; @@ -52,7 +60,7 @@ struct sfm_reader enum integer_format integer_format; enum float_format float_format; - bool compressed; + enum compression compression; double bias; }; @@ -83,9 +91,12 @@ static void read_character_encoding (struct sfm_reader *r, size_t size, size_t count); static void read_long_string_value_labels (struct sfm_reader *r, size_t size, size_t count); +static void read_long_string_missing_values (struct sfm_reader *r, + size_t size, size_t count); static void read_unknown_extension (struct sfm_reader *, size_t size, size_t count); -static void read_compressed_data (struct sfm_reader *, int max_cases); +static void read_simple_compressed_data (struct sfm_reader *, int max_cases); +static void read_zlib_compressed_data (struct sfm_reader *); static struct text_record *open_text_record ( struct sfm_reader *, size_t size); @@ -178,11 +189,11 @@ main (int argc, char *argv[]) r.n_var_widths = 0; r.allocated_var_widths = 0; r.var_widths = 0; - r.compressed = false; + r.compression = COMP_NONE; if (argc - optind > 1) printf ("Reading \"%s\":\n", r.file_name); - + read_header (&r); while ((rec_type = read_int (&r)) != 999) { @@ -216,12 +227,17 @@ main (int argc, char *argv[]) (long long int) ftello (r.file), (long long int) ftello (r.file) + 4); - if (r.compressed && max_cases > 0) - read_compressed_data (&r, max_cases); + if (r.compression == COMP_SIMPLE) + { + if (max_cases > 0) + read_simple_compressed_data (&r, max_cases); + } + else if (r.compression == COMP_ZLIB) + read_zlib_compressed_data (&r); fclose (r.file); } - + return 0; } @@ -239,11 +255,16 @@ read_header (struct sfm_reader *r) char creation_date[10]; char creation_time[9]; char file_label[65]; + bool zmagic; read_string (r, rec_type, sizeof rec_type); read_string (r, eye_catcher, sizeof eye_catcher); - if (strcmp ("$FL2", rec_type) != 0) + if (!strcmp ("$FL2", rec_type)) + zmagic = false; + else if (!strcmp ("$FL3", rec_type)) + zmagic = true; + else sys_error (r, "This is not an SPSS system file."); /* Identify integer format. */ @@ -263,7 +284,24 @@ read_header (struct sfm_reader *r) weight_index = read_int (r); ncases = read_int (r); - r->compressed = compressed != 0; + if (!zmagic) + { + if (compressed == 0) + r->compression = COMP_NONE; + else if (compressed == 1) + r->compression = COMP_SIMPLE; + else if (compressed != 0) + sys_error (r, "SAV file header has invalid compression value " + "%"PRId32".", compressed); + } + else + { + if (compressed == 2) + r->compression = COMP_ZLIB; + else + sys_error (r, "ZSAV file header has invalid compression value " + "%"PRId32".", compressed); + } /* Identify floating-point format and obtain compression bias. */ read_bytes (r, raw_bias, sizeof raw_bias); @@ -287,10 +325,15 @@ read_header (struct sfm_reader *r) printf ("File header record:\n"); printf ("\t%17s: %s\n", "Product name", eye_catcher); printf ("\t%17s: %"PRId32"\n", "Layout code", layout_code); - printf ("\t%17s: %"PRId32"\n", "Compressed", compressed); + printf ("\t%17s: %"PRId32" (%s)\n", "Compressed", + compressed, + r->compression == COMP_NONE ? "no compression" + : r->compression == COMP_SIMPLE ? "simple compression" + : r->compression == COMP_ZLIB ? "ZLIB compression" + : ""); printf ("\t%17s: %"PRId32"\n", "Weight index", weight_index); printf ("\t%17s: %"PRId32"\n", "Number of cases", ncases); - printf ("\t%17s: %g\n", "Compression bias", r->bias); + printf ("\t%17s: %.*g\n", "Compression bias", DBL_DIG + 1, r->bias); printf ("\t%17s: %s\n", "Creation date", creation_date); printf ("\t%17s: %s\n", "Creation time", creation_time); printf ("\t%17s: \"%s\"\n", "File label", file_label); @@ -401,18 +444,16 @@ read_variable_record (struct sfm_reader *r) if (has_variable_label == 1) { long long int offset = ftello (r->file); - size_t len, read_len; - char label[255 + 1]; + size_t len; + char *label; len = read_int (r); /* Read up to 255 bytes of label. */ - read_len = MIN (sizeof label - 1, len); - read_string (r, label, read_len + 1); + label = xmalloc (len + 1); + read_string (r, label, len + 1); printf("\t%08llx Variable label: \"%s\"\n", offset, label); - - /* Skip unread label bytes. */ - skip_bytes (r, len - read_len); + free (label); /* Skip label padding up to multiple of 4 bytes. */ skip_bytes (r, ROUND_UP (len, 4) - len); @@ -434,11 +475,11 @@ read_variable_record (struct sfm_reader *r) { double low = read_float (r); double high = read_float (r); - printf (" %g...%g", low, high); + printf (" %.*g...%.*g", DBL_DIG + 1, low, DBL_DIG + 1, high); missing_value_code = -missing_value_code - 2; } for (i = 0; i < missing_value_code; i++) - printf (" %g", read_float (r)); + printf (" %.*g", DBL_DIG + 1, read_float (r)); } else if (width > 0) { @@ -467,7 +508,7 @@ print_untyped_value (struct sfm_reader *r, char raw_value[8]) if (!isprint (raw_value[n_printable])) break; - printf ("%g/\"%.*s\"", value, n_printable, raw_value); + printf ("%.*g/\"%.*s\"", DBL_DIG + 1, value, n_printable, raw_value); } /* Reads value labels from sysfile R and inserts them into the @@ -563,12 +604,6 @@ read_extension_record (struct sfm_reader *r) read_machine_float_info (r, size, count); return; - case 5: - /* Variable sets information. We don't use these yet. - They only apply to GUIs; see VARSETS on the APPLY - DICTIONARY command in SPSS documentation. */ - break; - case 6: /* DATE variable information. We don't use it yet, but we should. */ @@ -615,6 +650,10 @@ read_extension_record (struct sfm_reader *r) read_long_string_value_labels (r, size, count); return; + case 22: + read_long_string_missing_values (r, size, count); + return; + default: sys_warn (r, "Unrecognized record type 7, subtype %d.", subtype); read_unknown_extension (r, size, count); @@ -672,20 +711,20 @@ read_machine_float_info (struct sfm_reader *r, size_t size, size_t count) sys_error (r, "Bad size (%zu) or count (%zu) on extension 4.", size, count); - printf ("\tsysmis: %g (%a)\n", sysmis, sysmis); + printf ("\tsysmis: %.*g (%a)\n", DBL_DIG + 1, sysmis, sysmis); if (sysmis != SYSMIS) - sys_warn (r, "File specifies unexpected value %g (%a) as %s.", - sysmis, sysmis, "SYSMIS"); + sys_warn (r, "File specifies unexpected value %.*g (%a) as %s.", + DBL_DIG + 1, sysmis, sysmis, "SYSMIS"); - printf ("\thighest: %g (%a)\n", highest, highest); + printf ("\thighest: %.*g (%a)\n", DBL_DIG + 1, highest, highest); if (highest != HIGHEST) - sys_warn (r, "File specifies unexpected value %g (%a) as %s.", - highest, highest, "HIGHEST"); + sys_warn (r, "File specifies unexpected value %.*g (%a) as %s.", + DBL_DIG + 1, highest, highest, "HIGHEST"); - printf ("\tlowest: %g (%a)\n", lowest, lowest); + printf ("\tlowest: %.*g (%a)\n", DBL_DIG + 1, lowest, lowest); if (lowest != LOWEST && lowest != SYSMIS) - sys_warn (r, "File specifies unexpected value %g (%a) as %s.", - lowest, lowest, "LOWEST"); + sys_warn (r, "File specifies unexpected value %.*g (%a) as %s.", + DBL_DIG + 1, lowest, lowest, "LOWEST"); } static void @@ -721,6 +760,9 @@ read_mrsets (struct sfm_reader *r, size_t size, size_t count) const char *label; const char *variables; + while (text_match (text, '\n')) + continue; + name = text_tokenize (text, '='); if (name == NULL) break; @@ -782,12 +824,6 @@ read_mrsets (struct sfm_reader *r, size_t size, size_t count) break; variables = text_tokenize (text, '\n'); - if (variables == NULL) - { - sys_warn (r, "missing variable names following label " - "at offset %zu in mrsets record", text_pos (text)); - break; - } printf ("\t\"%s\": multiple %s set", name, type == MRSET_MC ? "category" : "dichotomy"); @@ -799,7 +835,10 @@ read_mrsets (struct sfm_reader *r, size_t size, size_t count) printf (", label \"%s\"", label); if (label_from_var_label) printf (", label from variable label"); - printf(", variables \"%s\"\n", variables); + if (variables != NULL) + printf(", variables \"%s\"\n", variables); + else + printf(", no variables\n"); } close_text_record (text); } @@ -896,17 +935,17 @@ read_attributes (struct sfm_reader *r, struct text_record *text, const char *key; int index; - for (;;) + for (;;) { key = text_tokenize (text, '('); if (key == NULL) return true; - + for (index = 1; ; index++) { /* Parse the value. */ const char *value = text_tokenize (text, '\n'); - if (value == NULL) + if (value == NULL) { sys_warn (r, "%s: Error parsing attribute value %s[%d]", variable, key, index); @@ -926,7 +965,7 @@ read_attributes (struct sfm_reader *r, struct text_record *text, } if (text_match (text, '/')) - return true; + return true; } } @@ -956,10 +995,10 @@ read_ncases64 (struct sfm_reader *r, size_t size, size_t count) } static void -read_datafile_attributes (struct sfm_reader *r, size_t size, size_t count) +read_datafile_attributes (struct sfm_reader *r, size_t size, size_t count) { struct text_record *text; - + printf ("%08llx: datafile attributes\n", (long long int) ftello (r->file)); text = open_text_record (r, size * count); read_attributes (r, text, "datafile"); @@ -1036,6 +1075,56 @@ read_long_string_value_labels (struct sfm_reader *r, size_t size, size_t count) } } +static void +read_long_string_missing_values (struct sfm_reader *r, + size_t size, size_t count) +{ + long long int start = ftello (r->file); + + printf ("%08llx: long string missing values\n", start); + while (ftello (r->file) - start < size * count) + { + long long posn = ftello (r->file); + char var_name[ID_MAX_LEN + 1]; + uint8_t n_missing_values; + int var_name_len; + int i; + + /* Read variable name. */ + var_name_len = read_int (r); + if (var_name_len > ID_MAX_LEN) + sys_error (r, "Variable name length in long string value label " + "record (%d) exceeds %d-byte limit.", + var_name_len, ID_MAX_LEN); + read_string (r, var_name, var_name_len + 1); + + /* Read number of values. */ + read_bytes (r, &n_missing_values, 1); + + printf ("\t%08llx: %s, %d missing values:", + posn, var_name, n_missing_values); + + /* Read values. */ + for (i = 0; i < n_missing_values; i++) + { + char *value; + int value_length; + + posn = ftello (r->file); + + /* Read value. */ + value_length = read_int (r); + value = xmalloc (value_length + 1); + read_string (r, value, value_length + 1); + + printf (" \"%s\"", value); + + free (value); + } + printf ("\n"); + } +} + static void hex_dump (size_t offset, const void *buffer_, size_t buffer_size) { @@ -1098,23 +1187,23 @@ read_unknown_extension (struct sfm_reader *r, size_t size, size_t count) } static void -read_variable_attributes (struct sfm_reader *r, size_t size, size_t count) +read_variable_attributes (struct sfm_reader *r, size_t size, size_t count) { struct text_record *text; - + printf ("%08llx: variable attributes\n", (long long int) ftello (r->file)); text = open_text_record (r, size * count); - for (;;) + for (;;) { const char *variable = text_tokenize (text, ':'); if (variable == NULL || !read_attributes (r, text, variable)) - break; + break; } close_text_record (text); } static void -read_compressed_data (struct sfm_reader *r, int max_cases) +read_simple_compressed_data (struct sfm_reader *r, int max_cases) { enum { N_OPCODES = 8 }; uint8_t opcodes[N_OPCODES]; @@ -1158,7 +1247,7 @@ read_compressed_data (struct sfm_reader *r, int max_cases) switch (opcode) { default: - printf ("%g", opcode - r->bias); + printf ("%.*g", DBL_DIG + 1, opcode - r->bias); if (width != 0) printf (", but this is a string variable (width=%d)", width); printf ("\n"); @@ -1202,6 +1291,87 @@ read_compressed_data (struct sfm_reader *r, int max_cases) } } } + +static void +read_zlib_compressed_data (struct sfm_reader *r) +{ + long long int ofs; + long long int this_ofs, next_ofs, next_len; + long long int bias, zero; + long long int expected_uncmp_ofs, expected_cmp_ofs; + unsigned int block_size, n_blocks; + unsigned int i; + + read_int (r); + ofs = ftello (r->file); + printf ("\n%08llx: ZLIB compressed data header:\n", ofs); + + this_ofs = read_int64 (r); + next_ofs = read_int64 (r); + next_len = read_int64 (r); + + printf ("\tzheader_ofs: 0x%llx\n", this_ofs); + if (this_ofs != ofs) + printf ("\t\t(Expected 0x%llx.)\n", ofs); + printf ("\tztrailer_ofs: 0x%llx\n", next_ofs); + printf ("\tztrailer_len: %lld\n", next_len); + if (next_len < 24 || next_len % 24) + printf ("\t\t(Trailer length is not a positive multiple of 24.)\n"); + + printf ("\n%08llx: 0x%llx bytes of ZLIB compressed data\n", + ofs + 8 * 3, next_ofs - (ofs + 8 * 3)); + + skip_bytes (r, next_ofs - (ofs + 8 * 3)); + + printf ("\n%08llx: ZLIB trailer fixed header:\n", next_ofs); + bias = read_int64 (r); + zero = read_int64 (r); + block_size = read_int (r); + n_blocks = read_int (r); + printf ("\tbias: %lld\n", bias); + printf ("\tzero: 0x%llx\n", zero); + if (zero != 0) + printf ("\t\t(Expected 0.)\n"); + printf ("\tblock_size: 0x%x\n", block_size); + if (block_size != 0x3ff000) + printf ("\t\t(Expected 0x3ff000.)\n"); + printf ("\tn_blocks: %u\n", n_blocks); + if (n_blocks != next_len / 24 - 1) + printf ("\t\t(Expected %llu.)\n", next_len / 24 - 1); + + expected_uncmp_ofs = ofs; + expected_cmp_ofs = ofs + 24; + for (i = 0; i < n_blocks; i++) + { + long long int blockinfo_ofs = ftello (r->file); + unsigned long long int uncompressed_ofs = read_int64 (r); + unsigned long long int compressed_ofs = read_int64 (r); + unsigned int uncompressed_size = read_int (r); + unsigned int compressed_size = read_int (r); + + printf ("\n%08llx: ZLIB block descriptor %d\n", blockinfo_ofs, i + 1); + + printf ("\tuncompressed_ofs: 0x%llx\n", uncompressed_ofs); + if (uncompressed_ofs != expected_uncmp_ofs) + printf ("\t\t(Expected 0x%llx.)\n", ofs); + + printf ("\tcompressed_ofs: 0x%llx\n", compressed_ofs); + if (compressed_ofs != expected_cmp_ofs) + printf ("\t\t(Expected 0x%llx.)\n", ofs + 24); + + printf ("\tuncompressed_size: 0x%x\n", uncompressed_size); + if (i < n_blocks - 1 && uncompressed_size != block_size) + printf ("\t\t(Expected 0x%x.)\n", block_size); + + printf ("\tcompressed_size: 0x%x\n", compressed_size); + if (i == n_blocks - 1 && compressed_ofs + compressed_size != next_ofs) + printf ("\t\t(This was expected to be 0x%llx.)\n", + next_ofs - compressed_size); + + expected_uncmp_ofs += uncompressed_size; + expected_cmp_ofs += compressed_size; + } +} /* Helpers for reading records that consist of structured text strings. */ @@ -1256,9 +1426,9 @@ text_tokenize (struct text_record *text, int delimiter) } static bool -text_match (struct text_record *text, int c) +text_match (struct text_record *text, int c) { - if (text->pos < text->size && text->buffer[text->pos] == c) + if (text->pos < text->size && text->buffer[text->pos] == c) { text->pos++; return true;