/* PSPP - a program for statistical analysis.
- Copyright (C) 2007 Free Software Foundation, Inc.
+ Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include "gettext.h"
#define _(msgid) gettext (msgid)
+#define VAR_NAME_LEN 64
+
struct sfm_reader
{
const char *file_name;
int n_variable_records, n_variables;
+ int *var_widths;
+ size_t n_var_widths, allocated_var_widths;
+
enum integer_format integer_format;
enum float_format float_format;
+
+ bool compressed;
+ double bias;
};
static void read_header (struct sfm_reader *);
size_t size, size_t count);
static void read_long_string_map (struct sfm_reader *r,
size_t size, size_t count);
-
-static struct variable_to_value_map *open_variable_to_value_map (
+static void read_datafile_attributes (struct sfm_reader *r,
+ size_t size, size_t count);
+static void read_variable_attributes (struct sfm_reader *r,
+ size_t size, size_t count);
+static void read_character_encoding (struct sfm_reader *r,
+ size_t size, size_t count);
+static void read_long_string_value_labels (struct sfm_reader *r,
+ size_t size, size_t count);
+static void read_compressed_data (struct sfm_reader *);
+
+static struct text_record *open_text_record (
struct sfm_reader *, size_t size);
-static void close_variable_to_value_map (struct variable_to_value_map *);
-static bool read_variable_to_value_map (struct variable_to_value_map *,
- char **key, char **value);
+static void close_text_record (struct text_record *);
+static bool read_variable_to_value_pair (struct text_record *,
+ char **key, char **value);
+static char *text_tokenize (struct text_record *, int delimiter);
+static bool text_match (struct text_record *text, int c);
static void usage (int exit_code);
static void sys_warn (struct sfm_reader *, const char *, ...)
main (int argc, char *argv[])
{
struct sfm_reader r;
- int rec_type;
+ int i;
set_program_name (argv[0]);
- if (argc != 2)
+ if (argc < 2)
usage (EXIT_FAILURE);
- r.file_name = argv[1];
- r.file = fopen (r.file_name, "rb");
- if (r.file == NULL)
- error (EXIT_FAILURE, errno, "error opening \"%s\"", r.file_name);
- r.n_variable_records = 0;
- r.n_variables = 0;
-
- read_header (&r);
- while ((rec_type = read_int (&r)) != 999)
+ for (i = 1; i < argc; i++)
{
- switch (rec_type)
+ int rec_type;
+
+ r.file_name = argv[i];
+ r.file = fopen (r.file_name, "rb");
+ if (r.file == NULL)
+ error (EXIT_FAILURE, errno, "error opening \"%s\"", r.file_name);
+ r.n_variable_records = 0;
+ r.n_variables = 0;
+ r.n_var_widths = 0;
+ r.allocated_var_widths = 0;
+ r.var_widths = 0;
+ r.compressed = false;
+
+ if (argc > 2)
+ printf ("Reading \"%s\":\n", r.file_name);
+
+ read_header (&r);
+ while ((rec_type = read_int (&r)) != 999)
{
- case 2:
- read_variable_record (&r);
- break;
+ switch (rec_type)
+ {
+ case 2:
+ read_variable_record (&r);
+ break;
- case 3:
- read_value_label_record (&r);
- break;
+ case 3:
+ read_value_label_record (&r);
+ break;
- case 4:
- sys_error (&r, _("Misplaced type 4 record."));
+ case 4:
+ sys_error (&r, _("Misplaced type 4 record."));
- case 6:
- read_document_record (&r);
- break;
+ case 6:
+ read_document_record (&r);
+ break;
- case 7:
- read_extension_record (&r);
- break;
+ case 7:
+ read_extension_record (&r);
+ break;
- default:
- sys_error (&r, _("Unrecognized record type %d."), rec_type);
+ default:
+ sys_error (&r, _("Unrecognized record type %d."), rec_type);
+ }
}
- }
- printf ("%08lx: end-of-dictionary record (first byte of data at %08lx)\n",
- ftell (r.file), ftell (r.file) + 4);
+ printf ("%08lx: end-of-dictionary record "
+ "(first byte of data at %08lx)\n",
+ ftell (r.file), ftell (r.file) + 4);
+
+ if (r.compressed)
+ read_compressed_data (&r);
+ fclose (r.file);
+ }
+
return 0;
}
int32_t weight_index;
int32_t ncases;
uint8_t raw_bias[8];
- double bias;
char creation_date[10];
char creation_time[9];
char file_label[65];
raw_layout_code, sizeof raw_layout_code);
nominal_case_size = read_int (r);
- compressed = read_int (r) != 0;
+ compressed = read_int (r);
weight_index = read_int (r);
ncases = read_int (r);
+ r->compressed = compressed != 0;
+
/* Identify floating-point format and obtain compression bias. */
read_bytes (r, raw_bias, sizeof raw_bias);
if (float_identify (100.0, raw_bias, sizeof raw_bias, &r->float_format) == 0)
else
r->float_format = FLOAT_IEEE_DOUBLE_LE;
}
- bias = float_get_double (r->float_format, raw_bias);
+ r->bias = float_get_double (r->float_format, raw_bias);
read_string (r, creation_date, sizeof creation_date);
read_string (r, creation_time, sizeof creation_time);
printf ("\t%17s: %"PRId32"\n", "Compressed", compressed);
printf ("\t%17s: %"PRId32"\n", "Weight index", weight_index);
printf ("\t%17s: %"PRId32"\n", "Number of cases", ncases);
- printf ("\t%17s: %g\n", "Compression bias", bias);
+ printf ("\t%17s: %g\n", "Compression bias", r->bias);
printf ("\t%17s: %s\n", "Creation date", creation_date);
printf ("\t%17s: %s\n", "Creation time", creation_time);
printf ("\t%17s: \"%s\"\n", "File label", file_label);
if (width >= 0)
r->n_variables++;
+ if (r->n_var_widths >= r->allocated_var_widths)
+ r->var_widths = x2nrealloc (r->var_widths, &r->allocated_var_widths,
+ sizeof *r->var_widths);
+ r->var_widths[r->n_var_widths++] = width;
+
printf ("\tWidth: %d (%s)\n",
width,
width > 0 ? "string"
}
}
+static void
+print_untyped_value (struct sfm_reader *r, char raw_value[8])
+{
+ int n_printable;
+ double value;
+
+ value = float_get_double (r->float_format, raw_value);
+ for (n_printable = 0; n_printable < sizeof raw_value; n_printable++)
+ if (!isprint (raw_value[n_printable]))
+ break;
+
+ printf ("%g/\"%.*s\"", value, n_printable, raw_value);
+}
+
/* Reads value labels from sysfile R and inserts them into the
associated dictionary. */
static void
for (i = 0; i < label_cnt; i++)
{
char raw_value[8];
- double value;
- int n_printable;
unsigned char label_len;
size_t padded_len;
char label[256];
read_bytes (r, raw_value, sizeof raw_value);
- value = float_get_double (r->float_format, raw_value);
- for (n_printable = 0; n_printable < sizeof raw_value; n_printable++)
- if (!isprint (raw_value[n_printable]))
- break;
/* Read label length. */
read_bytes (r, &label_len, sizeof label_len);
read_bytes (r, label, padded_len - 1);
label[label_len] = 0;
- printf ("\t%g/\"%.*s\": \"%s\"\n", value, n_printable, raw_value, label);
+ printf ("\t");
+ print_untyped_value (r, raw_value);
+ printf (": \"%s\"\n", label);
}
/* Now, read the type 4 record that has the list of variables
break;
case 17:
- /* Text field that defines variable attributes. New in
- SPSS 14. */
- break;
+ read_datafile_attributes (r, size, count);
+ return;
+
+ case 18:
+ read_variable_attributes (r, size, count);
+ return;
+
+ case 20:
+ read_character_encoding (r, size, count);
+ return;
+
+ case 21:
+ read_long_string_value_labels (r, size, count);
+ return;
default:
sys_warn (r, _("Unrecognized record type 7, subtype %d."), subtype);
printf ("\tsysmis: %g\n", sysmis);
if (sysmis != SYSMIS)
- sys_warn (r, _("File specifies unexpected value %g as SYSMIS."), sysmis);
+ sys_warn (r, _("File specifies unexpected value %g as %s."),
+ sysmis, "SYSMIS");
+
printf ("\thighest: %g\n", highest);
if (highest != HIGHEST)
- sys_warn (r, _("File specifies unexpected value %g as HIGHEST."), highest);
+ sys_warn (r, _("File specifies unexpected value %g as %s."),
+ highest, "HIGHEST");
+
printf ("\tlowest: %g\n", lowest);
if (lowest != LOWEST)
- sys_warn (r, _("File specifies unexpected value %g as LOWEST."), lowest);
+ sys_warn (r, _("File specifies unexpected value %g as %s."),
+ lowest, "LOWEST");
}
/* Read record type 7, subtype 11. */
static void
read_long_var_name_map (struct sfm_reader *r, size_t size, size_t count)
{
- struct variable_to_value_map *map;
+ struct text_record *text;
char *var;
char *long_name;
printf ("%08lx: long variable names (short => long)\n", ftell (r->file));
- map = open_variable_to_value_map (r, size * count);
- while (read_variable_to_value_map (map, &var, &long_name))
+ text = open_text_record (r, size * count);
+ while (read_variable_to_value_pair (text, &var, &long_name))
printf ("\t%s => %s\n", var, long_name);
- close_variable_to_value_map (map);
+ close_text_record (text);
}
/* Reads record type 7, subtype 14, which gives the real length
static void
read_long_string_map (struct sfm_reader *r, size_t size, size_t count)
{
- struct variable_to_value_map *map;
+ struct text_record *text;
char *var;
char *length_s;
printf ("%08lx: very long strings (variable => length)\n", ftell (r->file));
- map = open_variable_to_value_map (r, size * count);
- while (read_variable_to_value_map (map, &var, &length_s))
+ text = open_text_record (r, size * count);
+ while (read_variable_to_value_pair (text, &var, &length_s))
printf ("\t%s => %d\n", var, atoi (length_s));
- close_variable_to_value_map (map);
+ close_text_record (text);
+}
+
+static bool
+read_attributes (struct sfm_reader *r, struct text_record *text,
+ const char *variable)
+{
+ const char *key;
+ int index;
+
+ for (;;)
+ {
+ key = text_tokenize (text, '(');
+ if (key == NULL)
+ return true;
+
+ for (index = 1; ; index++)
+ {
+ /* Parse the value. */
+ const char *value = text_tokenize (text, '\n');
+ if (value == NULL)
+ {
+ sys_warn (r, _("%s: Error parsing attribute value %s[%d]"),
+ variable, key, index);
+ return false;
+ }
+ if (strlen (value) < 2
+ || value[0] != '\'' || value[strlen (value) - 1] != '\'')
+ sys_warn (r, _("%s: Attribute value %s[%d] is not quoted: %s"),
+ variable, key, index, value);
+ else
+ printf ("\t%s: %s[%d] = \"%.*s\"\n",
+ variable, key, index, (int) strlen (value) - 2, value + 1);
+
+ /* Was this the last value for this attribute? */
+ if (text_match (text, ')'))
+ break;
+ }
+
+ if (text_match (text, '/'))
+ return true;
+ }
+}
+
+static void
+read_datafile_attributes (struct sfm_reader *r, size_t size, size_t count)
+{
+ struct text_record *text;
+
+ printf ("%08lx: datafile attributes\n", ftell (r->file));
+ text = open_text_record (r, size * count);
+ read_attributes (r, text, "datafile");
+ close_text_record (text);
+}
+
+static void
+read_character_encoding (struct sfm_reader *r, size_t size, size_t count)
+{
+ const unsigned long int posn = ftell (r->file);
+ char *encoding = xcalloc (size, count + 1);
+ read_string (r, encoding, count + 1);
+
+ printf ("%08lx: Character Encoding: %s\n", posn, encoding);
+}
+
+static void
+read_long_string_value_labels (struct sfm_reader *r, size_t size, size_t count)
+{
+ const long start = ftell (r->file);
+
+ printf ("%08lx: long string value labels\n", start);
+ while (ftell (r->file) - start < size * count)
+ {
+ long posn = ftell (r->file);
+ char var_name[VAR_NAME_LEN + 1];
+ int var_name_len;
+ int n_values;
+ int width;
+ int i;
+
+ /* Read variable name. */
+ var_name_len = read_int (r);
+ if (var_name_len > VAR_NAME_LEN)
+ sys_error (r, _("Variable name length in long string value label "
+ "record (%d) exceeds %d-byte limit."),
+ var_name_len, VAR_NAME_LEN);
+ read_string (r, var_name, var_name_len + 1);
+
+ /* Read width, number of values. */
+ width = read_int (r);
+ n_values = read_int (r);
+
+ printf ("\t%08lx: %s, width %d, %d values\n",
+ posn, var_name, width, n_values);
+
+ /* Read values. */
+ for (i = 0; i < n_values; i++)
+ {
+ char *value;
+ int value_length;
+
+ char *label;
+ int label_length;
+
+ posn = ftell (r->file);
+
+ /* Read value. */
+ value_length = read_int (r);
+ value = xmalloc (value_length + 1);
+ read_string (r, value, value_length + 1);
+
+ /* Read label. */
+ label_length = read_int (r);
+ label = xmalloc (label_length + 1);
+ read_string (r, label, label_length + 1);
+
+ printf ("\t\t%08lx: \"%s\" (%d bytes) => \"%s\" (%d bytes)\n",
+ posn, value, value_length, label, label_length);
+
+ free (value);
+ free (label);
+ }
+ }
+}
+
+static void
+read_variable_attributes (struct sfm_reader *r, size_t size, size_t count)
+{
+ struct text_record *text;
+
+ printf ("%08lx: variable attributes\n", ftell (r->file));
+ text = open_text_record (r, size * count);
+ for (;;)
+ {
+ const char *variable = text_tokenize (text, ':');
+ if (variable == NULL || !read_attributes (r, text, variable))
+ break;
+ }
+ close_text_record (text);
+}
+
+static void
+read_compressed_data (struct sfm_reader *r)
+{
+ enum { N_OPCODES = 8 };
+ uint8_t opcodes[N_OPCODES];
+ long int opcode_ofs;
+ int opcode_idx;
+ int case_num;
+ int i;
+
+ read_int (r);
+ printf ("\n%08lx: compressed data:\n", ftell (r->file));
+
+ opcode_idx = N_OPCODES;
+ case_num = 0;
+ for (case_num = 0; ; case_num++)
+ {
+ printf ("%08lx: case %d's uncompressible data begins\n",
+ ftell (r->file), case_num);
+ for (i = 0; i < r->n_var_widths; i++)
+ {
+ int width = r->var_widths[i];
+ char raw_value[8];
+ int opcode;
+
+ if (opcode_idx >= N_OPCODES)
+ {
+ opcode_ofs = ftell (r->file);
+ read_bytes (r, opcodes, 8);
+ opcode_idx = 0;
+ }
+ opcode = opcodes[opcode_idx];
+ printf ("%08lx: variable %d: opcode %d: ",
+ opcode_ofs + opcode_idx, i, opcode);
+
+ switch (opcode)
+ {
+ default:
+ printf ("%g", opcode - r->bias);
+ if (width != 0)
+ printf (", but this is a string variable (width=%d)", width);
+ printf ("\n");
+ break;
+
+ case 252:
+ printf ("end of data\n");
+ return;
+
+ case 253:
+ read_bytes (r, raw_value, 8);
+ printf ("uncompressible data: ");
+ print_untyped_value (r, raw_value);
+ printf ("\n");
+ break;
+
+ case 254:
+ printf ("spaces");
+ if (width == 0)
+ printf (", but this is a numeric variable");
+ printf ("\n");
+ break;
+
+ case 255:
+ printf ("SYSMIS");
+ if (width != 0)
+ printf (", but this is a string variable (width=%d)", width);
+
+ printf ("\n");
+ break;
+ }
+
+ opcode_idx++;
+ }
+ }
}
\f
-/* Helpers for reading records that contain "variable=value"
- pairs. */
+/* Helpers for reading records that consist of structured text
+ strings. */
/* State. */
-struct variable_to_value_map
+struct text_record
{
char *buffer; /* Record contents. */
size_t size; /* Size of buffer. */
size_t pos; /* Current position in buffer. */
};
-/* Reads SIZE bytes into a "variable=value" map for R,
- and returns the map. */
-static struct variable_to_value_map *
-open_variable_to_value_map (struct sfm_reader *r, size_t size)
+/* Reads SIZE bytes into a text record for R,
+ and returns the new text record. */
+static struct text_record *
+open_text_record (struct sfm_reader *r, size_t size)
{
- struct variable_to_value_map *map = xmalloc (sizeof *map);
+ struct text_record *text = xmalloc (sizeof *text);
char *buffer = xmalloc (size + 1);
read_bytes (r, buffer, size);
- map->buffer = buffer;
- map->size = size;
- map->pos = 0;
- return map;
+ text->buffer = buffer;
+ text->size = size;
+ text->pos = 0;
+ return text;
}
-/* Closes MAP and frees its storage.
- Not really needed, because the pool will free the map anyway,
+/* Closes TEXT and frees its storage.
+ Not really needed, because the pool will free the text record anyway,
but can be used to free it earlier. */
static void
-close_variable_to_value_map (struct variable_to_value_map *map)
+close_text_record (struct text_record *text)
{
- free (map);
- free (map->buffer);
+ free (text->buffer);
+ free (text);
}
static char *
-tokenize (struct variable_to_value_map *map, int delimiter)
-{
- size_t start = map->pos;
- while (map->pos < map->size
- && map->buffer[map->pos] != delimiter
- && map->buffer[map->pos] != '\0')
- map->pos++;
- if (map->pos == map->size)
+text_tokenize (struct text_record *text, int delimiter)
+{
+ size_t start = text->pos;
+ while (text->pos < text->size
+ && text->buffer[text->pos] != delimiter
+ && text->buffer[text->pos] != '\0')
+ text->pos++;
+ if (text->pos == text->size)
return NULL;
- map->buffer[map->pos++] = '\0';
- return &map->buffer[start];
+ text->buffer[text->pos++] = '\0';
+ return &text->buffer[start];
+}
+
+static bool
+text_match (struct text_record *text, int c)
+{
+ if (text->pos < text->size && text->buffer[text->pos] == c)
+ {
+ text->pos++;
+ return true;
+ }
+ else
+ return false;
}
-/* Reads the next variable=value pair from MAP.
+/* Reads a variable=value pair from TEXT.
Looks up the variable in DICT and stores it into *VAR.
Stores a null-terminated value into *VALUE. */
static bool
-read_variable_to_value_map (struct variable_to_value_map *map,
- char **key, char **value)
+read_variable_to_value_pair (struct text_record *text,
+ char **key, char **value)
{
- *key = tokenize (map, '=');
- *value = tokenize (map, '\t');
+ *key = text_tokenize (text, '=');
+ *value = text_tokenize (text, '\t');
if (!*key || !*value)
return false;
- while (map->pos < map->size
- && (map->buffer[map->pos] == '\t'
- || map->buffer[map->pos] == '\0'))
- map->pos++;
+ while (text->pos < text->size
+ && (text->buffer[text->pos] == '\t'
+ || text->buffer[text->pos] == '\0'))
+ text->pos++;
return true;
}
\f
static void
usage (int exit_code)
{
- printf ("usage: %s SYSFILE, where SYSFILE is the name of a system file\n",
+ printf ("usage: %s SYSFILE...\n"
+ "where each SYSFILE is the name of a system file\n",
program_name);
exit (exit_code);
}