X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=tests%2Fdissect-sysfile.c;h=8bab745f3ab23a52ca566b4e75b2c8001d194e30;hb=74ddfcbaa07193bab2d6cbe218fc6c16dd3cd052;hp=532684237745e5eef82aafa0eefa8f977f160e4a;hpb=025c8c504b302d7ffce6800134df494a4a68a5f5;p=pspp-builds.git diff --git a/tests/dissect-sysfile.c b/tests/dissect-sysfile.c index 53268423..8bab745f 100644 --- a/tests/dissect-sysfile.c +++ b/tests/dissect-sysfile.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2007 Free Software Foundation, Inc. + Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -36,6 +36,8 @@ #include "gettext.h" #define _(msgid) gettext (msgid) +#define VAR_NAME_LEN 64 + struct sfm_reader { const char *file_name; @@ -43,8 +45,14 @@ struct sfm_reader int n_variable_records, n_variables; + int *var_widths; + size_t n_var_widths, allocated_var_widths; + enum integer_format integer_format; enum float_format float_format; + + bool compressed; + double bias; }; static void read_header (struct sfm_reader *); @@ -62,12 +70,23 @@ static void read_long_var_name_map (struct sfm_reader *r, size_t size, size_t count); static void read_long_string_map (struct sfm_reader *r, size_t size, size_t count); - -static struct variable_to_value_map *open_variable_to_value_map ( +static void read_datafile_attributes (struct sfm_reader *r, + size_t size, size_t count); +static void read_variable_attributes (struct sfm_reader *r, + size_t size, size_t count); +static void read_character_encoding (struct sfm_reader *r, + size_t size, size_t count); +static void read_long_string_value_labels (struct sfm_reader *r, + size_t size, size_t count); +static void read_compressed_data (struct sfm_reader *); + +static struct text_record *open_text_record ( struct sfm_reader *, size_t size); -static void close_variable_to_value_map (struct variable_to_value_map *); -static bool read_variable_to_value_map (struct variable_to_value_map *, - char **key, char **value); +static void close_text_record (struct text_record *); +static bool read_variable_to_value_pair (struct text_record *, + char **key, char **value); +static char *text_tokenize (struct text_record *, int delimiter); +static bool text_match (struct text_record *text, int c); static void usage (int exit_code); static void sys_warn (struct sfm_reader *, const char *, ...) @@ -87,50 +106,68 @@ int main (int argc, char *argv[]) { struct sfm_reader r; - int rec_type; + int i; set_program_name (argv[0]); - if (argc != 2) + if (argc < 2) usage (EXIT_FAILURE); - r.file_name = argv[1]; - r.file = fopen (r.file_name, "rb"); - if (r.file == NULL) - error (EXIT_FAILURE, errno, "error opening \"%s\"", r.file_name); - r.n_variable_records = 0; - r.n_variables = 0; - - read_header (&r); - while ((rec_type = read_int (&r)) != 999) + for (i = 1; i < argc; i++) { - switch (rec_type) + int rec_type; + + r.file_name = argv[i]; + r.file = fopen (r.file_name, "rb"); + if (r.file == NULL) + error (EXIT_FAILURE, errno, "error opening \"%s\"", r.file_name); + r.n_variable_records = 0; + r.n_variables = 0; + r.n_var_widths = 0; + r.allocated_var_widths = 0; + r.var_widths = 0; + r.compressed = false; + + if (argc > 2) + printf ("Reading \"%s\":\n", r.file_name); + + read_header (&r); + while ((rec_type = read_int (&r)) != 999) { - case 2: - read_variable_record (&r); - break; + switch (rec_type) + { + case 2: + read_variable_record (&r); + break; - case 3: - read_value_label_record (&r); - break; + case 3: + read_value_label_record (&r); + break; - case 4: - sys_error (&r, _("Misplaced type 4 record.")); + case 4: + sys_error (&r, _("Misplaced type 4 record.")); - case 6: - read_document_record (&r); - break; + case 6: + read_document_record (&r); + break; - case 7: - read_extension_record (&r); - break; + case 7: + read_extension_record (&r); + break; - default: - sys_error (&r, _("Unrecognized record type %d."), rec_type); + default: + sys_error (&r, _("Unrecognized record type %d."), rec_type); + } } - } - printf ("%08lx: end-of-dictionary record (first byte of data at %08lx)\n", - ftell (r.file), ftell (r.file) + 4); + printf ("%08lx: end-of-dictionary record " + "(first byte of data at %08lx)\n", + ftell (r.file), ftell (r.file) + 4); + + if (r.compressed) + read_compressed_data (&r); + fclose (r.file); + } + return 0; } @@ -146,7 +183,6 @@ read_header (struct sfm_reader *r) int32_t weight_index; int32_t ncases; uint8_t raw_bias[8]; - double bias; char creation_date[10]; char creation_time[9]; char file_label[65]; @@ -170,10 +206,12 @@ read_header (struct sfm_reader *r) raw_layout_code, sizeof raw_layout_code); nominal_case_size = read_int (r); - compressed = read_int (r) != 0; + compressed = read_int (r); weight_index = read_int (r); ncases = read_int (r); + r->compressed = compressed != 0; + /* Identify floating-point format and obtain compression bias. */ read_bytes (r, raw_bias, sizeof raw_bias); if (float_identify (100.0, raw_bias, sizeof raw_bias, &r->float_format) == 0) @@ -186,7 +224,7 @@ read_header (struct sfm_reader *r) else r->float_format = FLOAT_IEEE_DOUBLE_LE; } - bias = float_get_double (r->float_format, raw_bias); + r->bias = float_get_double (r->float_format, raw_bias); read_string (r, creation_date, sizeof creation_date); read_string (r, creation_time, sizeof creation_time); @@ -200,7 +238,7 @@ read_header (struct sfm_reader *r) printf ("\t%17s: %"PRId32"\n", "Compressed", compressed); printf ("\t%17s: %"PRId32"\n", "Weight index", weight_index); printf ("\t%17s: %"PRId32"\n", "Number of cases", ncases); - printf ("\t%17s: %g\n", "Compression bias", bias); + printf ("\t%17s: %g\n", "Compression bias", r->bias); printf ("\t%17s: %s\n", "Creation date", creation_date); printf ("\t%17s: %s\n", "Creation time", creation_time); printf ("\t%17s: \"%s\"\n", "File label", file_label); @@ -278,6 +316,11 @@ read_variable_record (struct sfm_reader *r) if (width >= 0) r->n_variables++; + if (r->n_var_widths >= r->allocated_var_widths) + r->var_widths = x2nrealloc (r->var_widths, &r->allocated_var_widths, + sizeof *r->var_widths); + r->var_widths[r->n_var_widths++] = width; + printf ("\tWidth: %d (%s)\n", width, width > 0 ? "string" @@ -357,6 +400,20 @@ read_variable_record (struct sfm_reader *r) } } +static void +print_untyped_value (struct sfm_reader *r, char raw_value[8]) +{ + int n_printable; + double value; + + value = float_get_double (r->float_format, raw_value); + for (n_printable = 0; n_printable < sizeof raw_value; n_printable++) + if (!isprint (raw_value[n_printable])) + break; + + printf ("%g/\"%.*s\"", value, n_printable, raw_value); +} + /* Reads value labels from sysfile R and inserts them into the associated dictionary. */ static void @@ -372,17 +429,11 @@ read_value_label_record (struct sfm_reader *r) for (i = 0; i < label_cnt; i++) { char raw_value[8]; - double value; - int n_printable; unsigned char label_len; size_t padded_len; char label[256]; read_bytes (r, raw_value, sizeof raw_value); - value = float_get_double (r->float_format, raw_value); - for (n_printable = 0; n_printable < sizeof raw_value; n_printable++) - if (!isprint (raw_value[n_printable])) - break; /* Read label length. */ read_bytes (r, &label_len, sizeof label_len); @@ -392,7 +443,9 @@ read_value_label_record (struct sfm_reader *r) read_bytes (r, label, padded_len - 1); label[label_len] = 0; - printf ("\t%g/\"%.*s\": \"%s\"\n", value, n_printable, raw_value, label); + printf ("\t"); + print_untyped_value (r, raw_value); + printf (": \"%s\"\n", label); } /* Now, read the type 4 record that has the list of variables @@ -486,9 +539,20 @@ read_extension_record (struct sfm_reader *r) break; case 17: - /* Text field that defines variable attributes. New in - SPSS 14. */ - break; + read_datafile_attributes (r, size, count); + return; + + case 18: + read_variable_attributes (r, size, count); + return; + + case 20: + read_character_encoding (r, size, count); + return; + + case 21: + read_long_string_value_labels (r, size, count); + return; default: sys_warn (r, _("Unrecognized record type 7, subtype %d."), subtype); @@ -549,13 +613,18 @@ read_machine_float_info (struct sfm_reader *r, size_t size, size_t count) printf ("\tsysmis: %g\n", sysmis); if (sysmis != SYSMIS) - sys_warn (r, _("File specifies unexpected value %g as SYSMIS."), sysmis); + sys_warn (r, _("File specifies unexpected value %g as %s."), + sysmis, "SYSMIS"); + printf ("\thighest: %g\n", highest); if (highest != HIGHEST) - sys_warn (r, _("File specifies unexpected value %g as HIGHEST."), highest); + sys_warn (r, _("File specifies unexpected value %g as %s."), + highest, "HIGHEST"); + printf ("\tlowest: %g\n", lowest); if (lowest != LOWEST) - sys_warn (r, _("File specifies unexpected value %g as LOWEST."), lowest); + sys_warn (r, _("File specifies unexpected value %g as %s."), + lowest, "LOWEST"); } /* Read record type 7, subtype 11. */ @@ -613,15 +682,15 @@ read_display_parameters (struct sfm_reader *r, size_t size, size_t count) static void read_long_var_name_map (struct sfm_reader *r, size_t size, size_t count) { - struct variable_to_value_map *map; + struct text_record *text; char *var; char *long_name; printf ("%08lx: long variable names (short => long)\n", ftell (r->file)); - map = open_variable_to_value_map (r, size * count); - while (read_variable_to_value_map (map, &var, &long_name)) + text = open_text_record (r, size * count); + while (read_variable_to_value_pair (text, &var, &long_name)) printf ("\t%s => %s\n", var, long_name); - close_variable_to_value_map (map); + close_text_record (text); } /* Reads record type 7, subtype 14, which gives the real length @@ -629,89 +698,316 @@ read_long_var_name_map (struct sfm_reader *r, size_t size, size_t count) static void read_long_string_map (struct sfm_reader *r, size_t size, size_t count) { - struct variable_to_value_map *map; + struct text_record *text; char *var; char *length_s; printf ("%08lx: very long strings (variable => length)\n", ftell (r->file)); - map = open_variable_to_value_map (r, size * count); - while (read_variable_to_value_map (map, &var, &length_s)) + text = open_text_record (r, size * count); + while (read_variable_to_value_pair (text, &var, &length_s)) printf ("\t%s => %d\n", var, atoi (length_s)); - close_variable_to_value_map (map); + close_text_record (text); +} + +static bool +read_attributes (struct sfm_reader *r, struct text_record *text, + const char *variable) +{ + const char *key; + int index; + + for (;;) + { + key = text_tokenize (text, '('); + if (key == NULL) + return true; + + for (index = 1; ; index++) + { + /* Parse the value. */ + const char *value = text_tokenize (text, '\n'); + if (value == NULL) + { + sys_warn (r, _("%s: Error parsing attribute value %s[%d]"), + variable, key, index); + return false; + } + if (strlen (value) < 2 + || value[0] != '\'' || value[strlen (value) - 1] != '\'') + sys_warn (r, _("%s: Attribute value %s[%d] is not quoted: %s"), + variable, key, index, value); + else + printf ("\t%s: %s[%d] = \"%.*s\"\n", + variable, key, index, (int) strlen (value) - 2, value + 1); + + /* Was this the last value for this attribute? */ + if (text_match (text, ')')) + break; + } + + if (text_match (text, '/')) + return true; + } +} + +static void +read_datafile_attributes (struct sfm_reader *r, size_t size, size_t count) +{ + struct text_record *text; + + printf ("%08lx: datafile attributes\n", ftell (r->file)); + text = open_text_record (r, size * count); + read_attributes (r, text, "datafile"); + close_text_record (text); +} + +static void +read_character_encoding (struct sfm_reader *r, size_t size, size_t count) +{ + const unsigned long int posn = ftell (r->file); + char *encoding = xcalloc (size, count + 1); + read_string (r, encoding, count + 1); + + printf ("%08lx: Character Encoding: %s\n", posn, encoding); +} + +static void +read_long_string_value_labels (struct sfm_reader *r, size_t size, size_t count) +{ + const long start = ftell (r->file); + + printf ("%08lx: long string value labels\n", start); + while (ftell (r->file) - start < size * count) + { + long posn = ftell (r->file); + char var_name[VAR_NAME_LEN + 1]; + int var_name_len; + int n_values; + int width; + int i; + + /* Read variable name. */ + var_name_len = read_int (r); + if (var_name_len > VAR_NAME_LEN) + sys_error (r, _("Variable name length in long string value label " + "record (%d) exceeds %d-byte limit."), + var_name_len, VAR_NAME_LEN); + read_string (r, var_name, var_name_len + 1); + + /* Read width, number of values. */ + width = read_int (r); + n_values = read_int (r); + + printf ("\t%08lx: %s, width %d, %d values\n", + posn, var_name, width, n_values); + + /* Read values. */ + for (i = 0; i < n_values; i++) + { + char *value; + int value_length; + + char *label; + int label_length; + + posn = ftell (r->file); + + /* Read value. */ + value_length = read_int (r); + value = xmalloc (value_length + 1); + read_string (r, value, value_length + 1); + + /* Read label. */ + label_length = read_int (r); + label = xmalloc (label_length + 1); + read_string (r, label, label_length + 1); + + printf ("\t\t%08lx: \"%s\" (%d bytes) => \"%s\" (%d bytes)\n", + posn, value, value_length, label, label_length); + + free (value); + free (label); + } + } +} + +static void +read_variable_attributes (struct sfm_reader *r, size_t size, size_t count) +{ + struct text_record *text; + + printf ("%08lx: variable attributes\n", ftell (r->file)); + text = open_text_record (r, size * count); + for (;;) + { + const char *variable = text_tokenize (text, ':'); + if (variable == NULL || !read_attributes (r, text, variable)) + break; + } + close_text_record (text); +} + +static void +read_compressed_data (struct sfm_reader *r) +{ + enum { N_OPCODES = 8 }; + uint8_t opcodes[N_OPCODES]; + long int opcode_ofs; + int opcode_idx; + int case_num; + int i; + + read_int (r); + printf ("\n%08lx: compressed data:\n", ftell (r->file)); + + opcode_idx = N_OPCODES; + case_num = 0; + for (case_num = 0; ; case_num++) + { + printf ("%08lx: case %d's uncompressible data begins\n", + ftell (r->file), case_num); + for (i = 0; i < r->n_var_widths; i++) + { + int width = r->var_widths[i]; + char raw_value[8]; + int opcode; + + if (opcode_idx >= N_OPCODES) + { + opcode_ofs = ftell (r->file); + read_bytes (r, opcodes, 8); + opcode_idx = 0; + } + opcode = opcodes[opcode_idx]; + printf ("%08lx: variable %d: opcode %d: ", + opcode_ofs + opcode_idx, i, opcode); + + switch (opcode) + { + default: + printf ("%g", opcode - r->bias); + if (width != 0) + printf (", but this is a string variable (width=%d)", width); + printf ("\n"); + break; + + case 252: + printf ("end of data\n"); + return; + + case 253: + read_bytes (r, raw_value, 8); + printf ("uncompressible data: "); + print_untyped_value (r, raw_value); + printf ("\n"); + break; + + case 254: + printf ("spaces"); + if (width == 0) + printf (", but this is a numeric variable"); + printf ("\n"); + break; + + case 255: + printf ("SYSMIS"); + if (width != 0) + printf (", but this is a string variable (width=%d)", width); + + printf ("\n"); + break; + } + + opcode_idx++; + } + } } -/* Helpers for reading records that contain "variable=value" - pairs. */ +/* Helpers for reading records that consist of structured text + strings. */ /* State. */ -struct variable_to_value_map +struct text_record { char *buffer; /* Record contents. */ size_t size; /* Size of buffer. */ size_t pos; /* Current position in buffer. */ }; -/* Reads SIZE bytes into a "variable=value" map for R, - and returns the map. */ -static struct variable_to_value_map * -open_variable_to_value_map (struct sfm_reader *r, size_t size) +/* Reads SIZE bytes into a text record for R, + and returns the new text record. */ +static struct text_record * +open_text_record (struct sfm_reader *r, size_t size) { - struct variable_to_value_map *map = xmalloc (sizeof *map); + struct text_record *text = xmalloc (sizeof *text); char *buffer = xmalloc (size + 1); read_bytes (r, buffer, size); - map->buffer = buffer; - map->size = size; - map->pos = 0; - return map; + text->buffer = buffer; + text->size = size; + text->pos = 0; + return text; } -/* Closes MAP and frees its storage. - Not really needed, because the pool will free the map anyway, +/* Closes TEXT and frees its storage. + Not really needed, because the pool will free the text record anyway, but can be used to free it earlier. */ static void -close_variable_to_value_map (struct variable_to_value_map *map) +close_text_record (struct text_record *text) { - free (map); - free (map->buffer); + free (text->buffer); + free (text); } static char * -tokenize (struct variable_to_value_map *map, int delimiter) -{ - size_t start = map->pos; - while (map->pos < map->size - && map->buffer[map->pos] != delimiter - && map->buffer[map->pos] != '\0') - map->pos++; - if (map->pos == map->size) +text_tokenize (struct text_record *text, int delimiter) +{ + size_t start = text->pos; + while (text->pos < text->size + && text->buffer[text->pos] != delimiter + && text->buffer[text->pos] != '\0') + text->pos++; + if (text->pos == text->size) return NULL; - map->buffer[map->pos++] = '\0'; - return &map->buffer[start]; + text->buffer[text->pos++] = '\0'; + return &text->buffer[start]; +} + +static bool +text_match (struct text_record *text, int c) +{ + if (text->pos < text->size && text->buffer[text->pos] == c) + { + text->pos++; + return true; + } + else + return false; } -/* Reads the next variable=value pair from MAP. +/* Reads a variable=value pair from TEXT. Looks up the variable in DICT and stores it into *VAR. Stores a null-terminated value into *VALUE. */ static bool -read_variable_to_value_map (struct variable_to_value_map *map, - char **key, char **value) +read_variable_to_value_pair (struct text_record *text, + char **key, char **value) { - *key = tokenize (map, '='); - *value = tokenize (map, '\t'); + *key = text_tokenize (text, '='); + *value = text_tokenize (text, '\t'); if (!*key || !*value) return false; - while (map->pos < map->size - && (map->buffer[map->pos] == '\t' - || map->buffer[map->pos] == '\0')) - map->pos++; + while (text->pos < text->size + && (text->buffer[text->pos] == '\t' + || text->buffer[text->pos] == '\0')) + text->pos++; return true; } static void usage (int exit_code) { - printf ("usage: %s SYSFILE, where SYSFILE is the name of a system file\n", + printf ("usage: %s SYSFILE...\n" + "where each SYSFILE is the name of a system file\n", program_name); exit (exit_code); }