X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=tests%2Fdissect-sysfile.c;h=ecb8d59b77ac11d9d5f415a2886adf23234e0fba;hb=c5ad65b0351ab1d897eb072eeaec06fb37802b01;hp=62161f9c540200b1bcc954d5cef829342e8d671f;hpb=9e0e4996fad6563f0a1ce628b80db5c23ef8279e;p=pspp diff --git a/tests/dissect-sysfile.c b/tests/dissect-sysfile.c index 62161f9c54..ecb8d59b77 100644 --- a/tests/dissect-sysfile.c +++ b/tests/dissect-sysfile.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. + Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -36,6 +36,8 @@ #include "gettext.h" #define _(msgid) gettext (msgid) +#define VAR_NAME_LEN 64 + struct sfm_reader { const char *file_name; @@ -43,8 +45,14 @@ struct sfm_reader int n_variable_records, n_variables; + int *var_widths; + size_t n_var_widths, allocated_var_widths; + enum integer_format integer_format; enum float_format float_format; + + bool compressed; + double bias; }; static void read_header (struct sfm_reader *); @@ -56,6 +64,7 @@ static void read_machine_integer_info (struct sfm_reader *, size_t size, size_t count); static void read_machine_float_info (struct sfm_reader *, size_t size, size_t count); +static void read_mrsets (struct sfm_reader *, size_t size, size_t count); static void read_display_parameters (struct sfm_reader *, size_t size, size_t count); static void read_long_var_name_map (struct sfm_reader *r, @@ -66,9 +75,14 @@ static void read_datafile_attributes (struct sfm_reader *r, size_t size, size_t count); static void read_variable_attributes (struct sfm_reader *r, size_t size, size_t count); +static void read_ncases64 (struct sfm_reader *, size_t size, size_t count); static void read_character_encoding (struct sfm_reader *r, size_t size, size_t count); - +static void read_long_string_value_labels (struct sfm_reader *r, + size_t size, size_t count); +static void read_unknown_extension (struct sfm_reader *, + size_t size, size_t count); +static void read_compressed_data (struct sfm_reader *); static struct text_record *open_text_record ( struct sfm_reader *, size_t size); @@ -77,6 +91,8 @@ static bool read_variable_to_value_pair (struct text_record *, char **key, char **value); static char *text_tokenize (struct text_record *, int delimiter); static bool text_match (struct text_record *text, int c); +static const char *text_parse_counted_string (struct text_record *); +static size_t text_pos (const struct text_record *); static void usage (int exit_code); static void sys_warn (struct sfm_reader *, const char *, ...) @@ -87,6 +103,7 @@ static void sys_error (struct sfm_reader *, const char *, ...) static void read_bytes (struct sfm_reader *, void *, size_t); static int read_int (struct sfm_reader *); +static int64_t read_int64 (struct sfm_reader *); static double read_float (struct sfm_reader *); static void read_string (struct sfm_reader *, char *, size_t); static void skip_bytes (struct sfm_reader *, size_t); @@ -112,6 +129,10 @@ main (int argc, char *argv[]) error (EXIT_FAILURE, errno, "error opening \"%s\"", r.file_name); r.n_variable_records = 0; r.n_variables = 0; + r.n_var_widths = 0; + r.allocated_var_widths = 0; + r.var_widths = 0; + r.compressed = false; if (argc > 2) printf ("Reading \"%s\":\n", r.file_name); @@ -148,6 +169,9 @@ main (int argc, char *argv[]) "(first byte of data at %08lx)\n", ftell (r.file), ftell (r.file) + 4); + if (r.compressed) + read_compressed_data (&r); + fclose (r.file); } @@ -166,7 +190,6 @@ read_header (struct sfm_reader *r) int32_t weight_index; int32_t ncases; uint8_t raw_bias[8]; - double bias; char creation_date[10]; char creation_time[9]; char file_label[65]; @@ -190,10 +213,12 @@ read_header (struct sfm_reader *r) raw_layout_code, sizeof raw_layout_code); nominal_case_size = read_int (r); - compressed = read_int (r) != 0; + compressed = read_int (r); weight_index = read_int (r); ncases = read_int (r); + r->compressed = compressed != 0; + /* Identify floating-point format and obtain compression bias. */ read_bytes (r, raw_bias, sizeof raw_bias); if (float_identify (100.0, raw_bias, sizeof raw_bias, &r->float_format) == 0) @@ -206,7 +231,7 @@ read_header (struct sfm_reader *r) else r->float_format = FLOAT_IEEE_DOUBLE_LE; } - bias = float_get_double (r->float_format, raw_bias); + r->bias = float_get_double (r->float_format, raw_bias); read_string (r, creation_date, sizeof creation_date); read_string (r, creation_time, sizeof creation_time); @@ -220,7 +245,7 @@ read_header (struct sfm_reader *r) printf ("\t%17s: %"PRId32"\n", "Compressed", compressed); printf ("\t%17s: %"PRId32"\n", "Weight index", weight_index); printf ("\t%17s: %"PRId32"\n", "Number of cases", ncases); - printf ("\t%17s: %g\n", "Compression bias", bias); + printf ("\t%17s: %g\n", "Compression bias", r->bias); printf ("\t%17s: %s\n", "Creation date", creation_date); printf ("\t%17s: %s\n", "Creation time", creation_time); printf ("\t%17s: \"%s\"\n", "File label", file_label); @@ -298,6 +323,11 @@ read_variable_record (struct sfm_reader *r) if (width >= 0) r->n_variables++; + if (r->n_var_widths >= r->allocated_var_widths) + r->var_widths = x2nrealloc (r->var_widths, &r->allocated_var_widths, + sizeof *r->var_widths); + r->var_widths[r->n_var_widths++] = width; + printf ("\tWidth: %d (%s)\n", width, width > 0 ? "string" @@ -377,6 +407,20 @@ read_variable_record (struct sfm_reader *r) } } +static void +print_untyped_value (struct sfm_reader *r, char raw_value[8]) +{ + int n_printable; + double value; + + value = float_get_double (r->float_format, raw_value); + for (n_printable = 0; n_printable < sizeof raw_value; n_printable++) + if (!isprint (raw_value[n_printable])) + break; + + printf ("%g/\"%.*s\"", value, n_printable, raw_value); +} + /* Reads value labels from sysfile R and inserts them into the associated dictionary. */ static void @@ -392,17 +436,11 @@ read_value_label_record (struct sfm_reader *r) for (i = 0; i < label_cnt; i++) { char raw_value[8]; - double value; - int n_printable; unsigned char label_len; size_t padded_len; char label[256]; read_bytes (r, raw_value, sizeof raw_value); - value = float_get_double (r->float_format, raw_value); - for (n_printable = 0; n_printable < sizeof raw_value; n_printable++) - if (!isprint (raw_value[n_printable])) - break; /* Read label length. */ read_bytes (r, &label_len, sizeof label_len); @@ -412,7 +450,9 @@ read_value_label_record (struct sfm_reader *r) read_bytes (r, label, padded_len - 1); label[label_len] = 0; - printf ("\t%g/\"%.*s\": \"%s\"\n", value, n_printable, raw_value, label); + printf ("\t"); + print_untyped_value (r, raw_value); + printf (": \"%s\"\n", label); } /* Now, read the type 4 record that has the list of variables @@ -486,8 +526,9 @@ read_extension_record (struct sfm_reader *r) break; case 7: - /* Unknown purpose. */ - break; + case 19: + read_mrsets (r, size, count); + return; case 11: read_display_parameters (r, size, count); @@ -502,8 +543,8 @@ read_extension_record (struct sfm_reader *r) return; case 16: - /* New in SPSS v14? Unknown purpose. */ - break; + read_ncases64 (r, size, count); + return; case 17: read_datafile_attributes (r, size, count); @@ -517,9 +558,14 @@ read_extension_record (struct sfm_reader *r) read_character_encoding (r, size, count); return; + case 21: + read_long_string_value_labels (r, size, count); + return; + default: sys_warn (r, _("Unrecognized record type 7, subtype %d."), subtype); - break; + read_unknown_extension (r, size, count); + return; } skip_bytes (r, bytes); @@ -576,13 +622,119 @@ read_machine_float_info (struct sfm_reader *r, size_t size, size_t count) printf ("\tsysmis: %g\n", sysmis); if (sysmis != SYSMIS) - sys_warn (r, _("File specifies unexpected value %g as SYSMIS."), sysmis); + sys_warn (r, _("File specifies unexpected value %g as %s."), + sysmis, "SYSMIS"); + printf ("\thighest: %g\n", highest); if (highest != HIGHEST) - sys_warn (r, _("File specifies unexpected value %g as HIGHEST."), highest); + sys_warn (r, _("File specifies unexpected value %g as %s."), + highest, "HIGHEST"); + printf ("\tlowest: %g\n", lowest); if (lowest != LOWEST) - sys_warn (r, _("File specifies unexpected value %g as LOWEST."), lowest); + sys_warn (r, _("File specifies unexpected value %g as %s."), + lowest, "LOWEST"); +} + +/* Read record type 7, subtype 7. */ +static void +read_mrsets (struct sfm_reader *r, size_t size, size_t count) +{ + struct text_record *text; + + printf ("%08lx: multiple response sets\n", ftell (r->file)); + text = open_text_record (r, size * count); + for (;;) + { + const char *name; + enum { MRSET_MC, MRSET_MD } type; + bool cat_label_from_counted_values = false; + bool label_from_var_label = false; + const char *counted; + const char *label; + const char *variables; + + name = text_tokenize (text, '='); + if (name == NULL) + break; + + if (text_match (text, 'C')) + { + type = MRSET_MC; + counted = NULL; + if (!text_match (text, ' ')) + { + sys_warn (r, "missing space following 'C' at offset %zu " + "in mrsets record", text_pos (text)); + break; + } + } + else if (text_match (text, 'D')) + { + type = MRSET_MD; + } + else if (text_match (text, 'E')) + { + char *number; + + type = MRSET_MD; + cat_label_from_counted_values = true; + + if (!text_match (text, ' ')) + { + sys_warn (r, _("Missing space following 'E' at offset %zu " + "in MRSETS record"), text_pos (text)); + break; + } + + number = text_tokenize (text, ' '); + if (!strcmp (number, "11")) + label_from_var_label = true; + else if (strcmp (number, "1")) + sys_warn (r, _("Unexpected label source value \"%s\" " + "following 'E' at offset %zu in MRSETS record"), + number, text_pos (text)); + + } + else + { + sys_warn (r, "missing 'C', 'D', or 'E' at offset %zu " + "in mrsets record", text_pos (text)); + break; + } + + if (type == MRSET_MD) + { + counted = text_parse_counted_string (text); + if (counted == NULL) + break; + } + + label = text_parse_counted_string (text); + if (label == NULL) + break; + + variables = text_tokenize (text, '\n'); + if (variables == NULL) + { + sys_warn (r, "missing variable names following label " + "at offset %zu in mrsets record", text_pos (text)); + break; + } + + printf ("\t\"%s\": multiple %s set", + name, type == MRSET_MC ? "category" : "dichotomy"); + if (counted != NULL) + printf (", counted value \"%s\"", counted); + if (cat_label_from_counted_values) + printf (", category labels from counted values"); + if (label[0] != '\0') + printf (", label \"%s\"", label); + if (label_from_var_label) + printf (", label from variable label"); + printf(", variables \"%s\"\n", variables); + } + close_text_record (text); } /* Read record type 7, subtype 11. */ @@ -708,6 +860,31 @@ read_attributes (struct sfm_reader *r, struct text_record *text, } } +/* Read extended number of cases record. */ +static void +read_ncases64 (struct sfm_reader *r, size_t size, size_t count) +{ + int64_t unknown, ncases64; + + if (size != 8) + { + sys_warn (r, _("Bad size %zu for extended number of cases."), size); + skip_bytes (r, size * count); + return; + } + if (count != 2) + { + sys_warn (r, _("Bad count %zu for extended number of cases."), size); + skip_bytes (r, size * count); + return; + } + unknown = read_int64 (r); + ncases64 = read_int64 (r); + printf ("%08lx: extended number of cases: " + "unknown=%"PRId64", ncases64=%"PRId64"\n", + ftell (r->file), unknown, ncases64); +} + static void read_datafile_attributes (struct sfm_reader *r, size_t size, size_t count) { @@ -729,6 +906,139 @@ read_character_encoding (struct sfm_reader *r, size_t size, size_t count) printf ("%08lx: Character Encoding: %s\n", posn, encoding); } +static void +read_long_string_value_labels (struct sfm_reader *r, size_t size, size_t count) +{ + const long start = ftell (r->file); + + printf ("%08lx: long string value labels\n", start); + while (ftell (r->file) - start < size * count) + { + long posn = ftell (r->file); + char var_name[VAR_NAME_LEN + 1]; + int var_name_len; + int n_values; + int width; + int i; + + /* Read variable name. */ + var_name_len = read_int (r); + if (var_name_len > VAR_NAME_LEN) + sys_error (r, _("Variable name length in long string value label " + "record (%d) exceeds %d-byte limit."), + var_name_len, VAR_NAME_LEN); + read_string (r, var_name, var_name_len + 1); + + /* Read width, number of values. */ + width = read_int (r); + n_values = read_int (r); + + printf ("\t%08lx: %s, width %d, %d values\n", + posn, var_name, width, n_values); + + /* Read values. */ + for (i = 0; i < n_values; i++) + { + char *value; + int value_length; + + char *label; + int label_length; + + posn = ftell (r->file); + + /* Read value. */ + value_length = read_int (r); + value = xmalloc (value_length + 1); + read_string (r, value, value_length + 1); + + /* Read label. */ + label_length = read_int (r); + label = xmalloc (label_length + 1); + read_string (r, label, label_length + 1); + + printf ("\t\t%08lx: \"%s\" (%d bytes) => \"%s\" (%d bytes)\n", + posn, value, value_length, label, label_length); + + free (value); + free (label); + } + } +} + +static void +hex_dump (size_t offset, const void *buffer_, size_t buffer_size) +{ + const uint8_t *buffer = buffer_; + + while (buffer_size > 0) + { + size_t n = MIN (buffer_size, 16); + size_t i; + + printf ("%04zx", offset); + for (i = 0; i < 16; i++) + { + if (i < n) + printf ("%c%02x", i == 8 ? '-' : ' ', buffer[i]); + else + printf (" "); + } + + printf (" |"); + for (i = 0; i < 16; i++) + { + unsigned char c = i < n ? buffer[i] : ' '; + putchar (isprint (c) ? c : '.'); + } + printf ("|\n"); + + offset += n; + buffer += n; + buffer_size -= n; + } +} + +/* Reads and prints any type 7 record that we don't understand. */ +static void +read_unknown_extension (struct sfm_reader *r, size_t size, size_t count) +{ + unsigned char *buffer; + size_t i; + + if (size == 0 || count > 65536 / size) + skip_bytes (r, size * count); + else if (size != 1) + { + buffer = xmalloc (size); + for (i = 0; i < count; i++) + { + read_bytes (r, buffer, size); + hex_dump (i * size, buffer, size); + } + free (buffer); + } + else + { + buffer = xmalloc (count); + read_bytes (r, buffer, count); + if (memchr (buffer, 0, count) == 0) + for (i = 0; i < count; i++) + { + unsigned char c = buffer[i]; + + if (c == '\\') + printf ("\\\\"); + else if (c == '\n' || isprint (c)) + putchar (c); + else + printf ("\\%02x", c); + } + else + hex_dump (0, buffer, count); + free (buffer); + } +} static void read_variable_attributes (struct sfm_reader *r, size_t size, size_t count) @@ -745,6 +1055,82 @@ read_variable_attributes (struct sfm_reader *r, size_t size, size_t count) } close_text_record (text); } + +static void +read_compressed_data (struct sfm_reader *r) +{ + enum { N_OPCODES = 8 }; + uint8_t opcodes[N_OPCODES]; + long int opcode_ofs; + int opcode_idx; + int case_num; + int i; + + read_int (r); + printf ("\n%08lx: compressed data:\n", ftell (r->file)); + + opcode_idx = N_OPCODES; + case_num = 0; + for (case_num = 0; ; case_num++) + { + printf ("%08lx: case %d's uncompressible data begins\n", + ftell (r->file), case_num); + for (i = 0; i < r->n_var_widths; i++) + { + int width = r->var_widths[i]; + char raw_value[8]; + int opcode; + + if (opcode_idx >= N_OPCODES) + { + opcode_ofs = ftell (r->file); + read_bytes (r, opcodes, 8); + opcode_idx = 0; + } + opcode = opcodes[opcode_idx]; + printf ("%08lx: variable %d: opcode %d: ", + opcode_ofs + opcode_idx, i, opcode); + + switch (opcode) + { + default: + printf ("%g", opcode - r->bias); + if (width != 0) + printf (", but this is a string variable (width=%d)", width); + printf ("\n"); + break; + + case 252: + printf ("end of data\n"); + return; + + case 253: + read_bytes (r, raw_value, 8); + printf ("uncompressible data: "); + print_untyped_value (r, raw_value); + printf ("\n"); + break; + + case 254: + printf ("spaces"); + if (width == 0) + printf (", but this is a numeric variable"); + printf ("\n"); + break; + + case 255: + printf ("SYSMIS"); + if (width != 0) + printf (", but this is a string variable (width=%d)", width); + + printf ("\n"); + break; + } + + opcode_idx++; + } + } +} /* Helpers for reading records that consist of structured text strings. */ @@ -752,6 +1138,7 @@ read_variable_attributes (struct sfm_reader *r, size_t size, size_t count) /* State. */ struct text_record { + struct sfm_reader *reader; /* Reader. */ char *buffer; /* Record contents. */ size_t size; /* Size of buffer. */ size_t pos; /* Current position in buffer. */ @@ -765,6 +1152,8 @@ open_text_record (struct sfm_reader *r, size_t size) struct text_record *text = xmalloc (sizeof *text); char *buffer = xmalloc (size + 1); read_bytes (r, buffer, size); + buffer[size] = '\0'; + text->reader = r; text->buffer = buffer; text->size = size; text->pos = 0; @@ -807,6 +1196,54 @@ text_match (struct text_record *text, int c) return false; } +/* Reads a integer value expressed in decimal, then a space, then a string that + consists of exactly as many bytes as specified by the integer, then a space, + from TEXT. Returns the string, null-terminated, as a subset of TEXT's + buffer (so the caller should not free the string). */ +static const char * +text_parse_counted_string (struct text_record *text) +{ + size_t start; + size_t n; + char *s; + + start = text->pos; + n = 0; + while (isdigit ((unsigned char) text->buffer[text->pos])) + n = (n * 10) + (text->buffer[text->pos++] - '0'); + if (start == text->pos) + { + sys_error (text->reader, "expecting digit at offset %zu in record", + text->pos); + return NULL; + } + + if (!text_match (text, ' ')) + { + sys_error (text->reader, "expecting space at offset %zu in record", + text->pos); + return NULL; + } + + if (text->pos + n > text->size) + { + sys_error (text->reader, "%zu-byte string starting at offset %zu " + "exceeds record length %zu", n, text->pos, text->size); + return NULL; + } + + s = &text->buffer[text->pos]; + if (s[n] != ' ') + { + sys_error (text->reader, "expecting space at offset %zu following " + "%zu-byte string", text->pos + n, n); + return NULL; + } + s[n] = '\0'; + text->pos += n + 1; + return s; +} + /* Reads a variable=value pair from TEXT. Looks up the variable in DICT and stores it into *VAR. Stores a null-terminated value into *VALUE. */ @@ -825,6 +1262,13 @@ read_variable_to_value_pair (struct text_record *text, text->pos++; return true; } + +/* Returns the current byte offset inside the TEXT's string. */ +static size_t +text_pos (const struct text_record *text) +{ + return text->pos; +} static void usage (int exit_code) @@ -910,6 +1354,16 @@ read_int (struct sfm_reader *r) return integer_get (r->integer_format, integer, sizeof integer); } +/* Reads a 64-bit signed integer from R and returns its value in + host format. */ +static int64_t +read_int64 (struct sfm_reader *r) +{ + uint8_t integer[8]; + read_bytes (r, integer, sizeof integer); + return integer_get (r->integer_format, integer, sizeof integer); +} + /* Reads a 64-bit floating-point number from R and returns its value in host format. */ static double