#include "gettext.h"
#define _(msgid) gettext (msgid)
+#define VAR_NAME_LEN 64
+
struct sfm_reader
{
const char *file_name;
int n_variable_records, n_variables;
+ int *var_widths;
+ size_t n_var_widths, allocated_var_widths;
+
enum integer_format integer_format;
enum float_format float_format;
+
+ bool compressed;
+ double bias;
};
static void read_header (struct sfm_reader *);
size_t size, size_t count);
static void read_character_encoding (struct sfm_reader *r,
size_t size, size_t count);
-
+static void read_long_string_value_labels (struct sfm_reader *r,
+ size_t size, size_t count);
+static void read_compressed_data (struct sfm_reader *);
static struct text_record *open_text_record (
struct sfm_reader *, size_t size);
error (EXIT_FAILURE, errno, "error opening \"%s\"", r.file_name);
r.n_variable_records = 0;
r.n_variables = 0;
+ r.n_var_widths = 0;
+ r.allocated_var_widths = 0;
+ r.var_widths = 0;
+ r.compressed = false;
if (argc > 2)
printf ("Reading \"%s\":\n", r.file_name);
"(first byte of data at %08lx)\n",
ftell (r.file), ftell (r.file) + 4);
+ if (r.compressed)
+ read_compressed_data (&r);
+
fclose (r.file);
}
int32_t weight_index;
int32_t ncases;
uint8_t raw_bias[8];
- double bias;
char creation_date[10];
char creation_time[9];
char file_label[65];
raw_layout_code, sizeof raw_layout_code);
nominal_case_size = read_int (r);
- compressed = read_int (r) != 0;
+ compressed = read_int (r);
weight_index = read_int (r);
ncases = read_int (r);
+ r->compressed = compressed != 0;
+
/* Identify floating-point format and obtain compression bias. */
read_bytes (r, raw_bias, sizeof raw_bias);
if (float_identify (100.0, raw_bias, sizeof raw_bias, &r->float_format) == 0)
else
r->float_format = FLOAT_IEEE_DOUBLE_LE;
}
- bias = float_get_double (r->float_format, raw_bias);
+ r->bias = float_get_double (r->float_format, raw_bias);
read_string (r, creation_date, sizeof creation_date);
read_string (r, creation_time, sizeof creation_time);
printf ("\t%17s: %"PRId32"\n", "Compressed", compressed);
printf ("\t%17s: %"PRId32"\n", "Weight index", weight_index);
printf ("\t%17s: %"PRId32"\n", "Number of cases", ncases);
- printf ("\t%17s: %g\n", "Compression bias", bias);
+ printf ("\t%17s: %g\n", "Compression bias", r->bias);
printf ("\t%17s: %s\n", "Creation date", creation_date);
printf ("\t%17s: %s\n", "Creation time", creation_time);
printf ("\t%17s: \"%s\"\n", "File label", file_label);
if (width >= 0)
r->n_variables++;
+ if (r->n_var_widths >= r->allocated_var_widths)
+ r->var_widths = x2nrealloc (r->var_widths, &r->allocated_var_widths,
+ sizeof *r->var_widths);
+ r->var_widths[r->n_var_widths++] = width;
+
printf ("\tWidth: %d (%s)\n",
width,
width > 0 ? "string"
}
}
+static void
+print_untyped_value (struct sfm_reader *r, char raw_value[8])
+{
+ int n_printable;
+ double value;
+
+ value = float_get_double (r->float_format, raw_value);
+ for (n_printable = 0; n_printable < sizeof raw_value; n_printable++)
+ if (!isprint (raw_value[n_printable]))
+ break;
+
+ printf ("%g/\"%.*s\"", value, n_printable, raw_value);
+}
+
/* Reads value labels from sysfile R and inserts them into the
associated dictionary. */
static void
for (i = 0; i < label_cnt; i++)
{
char raw_value[8];
- double value;
- int n_printable;
unsigned char label_len;
size_t padded_len;
char label[256];
read_bytes (r, raw_value, sizeof raw_value);
- value = float_get_double (r->float_format, raw_value);
- for (n_printable = 0; n_printable < sizeof raw_value; n_printable++)
- if (!isprint (raw_value[n_printable]))
- break;
/* Read label length. */
read_bytes (r, &label_len, sizeof label_len);
read_bytes (r, label, padded_len - 1);
label[label_len] = 0;
- printf ("\t%g/\"%.*s\": \"%s\"\n", value, n_printable, raw_value, label);
+ printf ("\t");
+ print_untyped_value (r, raw_value);
+ printf (": \"%s\"\n", label);
}
/* Now, read the type 4 record that has the list of variables
read_character_encoding (r, size, count);
return;
+ case 21:
+ read_long_string_value_labels (r, size, count);
+ return;
+
default:
sys_warn (r, _("Unrecognized record type 7, subtype %d."), subtype);
break;
printf ("\tsysmis: %g\n", sysmis);
if (sysmis != SYSMIS)
- sys_warn (r, _("File specifies unexpected value %g as SYSMIS."), sysmis);
+ sys_warn (r, _("File specifies unexpected value %g as %s."),
+ sysmis, "SYSMIS");
+
printf ("\thighest: %g\n", highest);
if (highest != HIGHEST)
- sys_warn (r, _("File specifies unexpected value %g as HIGHEST."), highest);
+ sys_warn (r, _("File specifies unexpected value %g as %s."),
+ highest, "HIGHEST");
+
printf ("\tlowest: %g\n", lowest);
if (lowest != LOWEST)
- sys_warn (r, _("File specifies unexpected value %g as LOWEST."), lowest);
+ sys_warn (r, _("File specifies unexpected value %g as %s."),
+ lowest, "LOWEST");
}
/* Read record type 7, subtype 11. */
printf ("%08lx: Character Encoding: %s\n", posn, encoding);
}
+static void
+read_long_string_value_labels (struct sfm_reader *r, size_t size, size_t count)
+{
+ const long start = ftell (r->file);
+
+ printf ("%08lx: long string value labels\n", start);
+ while (ftell (r->file) - start < size * count)
+ {
+ long posn = ftell (r->file);
+ char var_name[VAR_NAME_LEN + 1];
+ int var_name_len;
+ int n_values;
+ int width;
+ int i;
+
+ /* Read variable name. */
+ var_name_len = read_int (r);
+ if (var_name_len > VAR_NAME_LEN)
+ sys_error (r, _("Variable name length in long string value label "
+ "record (%d) exceeds %d-byte limit."),
+ var_name_len, VAR_NAME_LEN);
+ read_string (r, var_name, var_name_len + 1);
+
+ /* Read width, number of values. */
+ width = read_int (r);
+ n_values = read_int (r);
+
+ printf ("\t%08lx: %s, width %d, %d values\n",
+ posn, var_name, width, n_values);
+
+ /* Read values. */
+ for (i = 0; i < n_values; i++)
+ {
+ char *value;
+ int value_length;
+
+ char *label;
+ int label_length;
+
+ posn = ftell (r->file);
+
+ /* Read value. */
+ value_length = read_int (r);
+ value = xmalloc (value_length + 1);
+ read_string (r, value, value_length + 1);
+
+ /* Read label. */
+ label_length = read_int (r);
+ label = xmalloc (label_length + 1);
+ read_string (r, label, label_length + 1);
+
+ printf ("\t\t%08lx: \"%s\" (%d bytes) => \"%s\" (%d bytes)\n",
+ posn, value, value_length, label, label_length);
+
+ free (value);
+ free (label);
+ }
+ }
+}
static void
read_variable_attributes (struct sfm_reader *r, size_t size, size_t count)
}
close_text_record (text);
}
+
+static void
+read_compressed_data (struct sfm_reader *r)
+{
+ enum { N_OPCODES = 8 };
+ uint8_t opcodes[N_OPCODES];
+ long int opcode_ofs;
+ int opcode_idx;
+ int case_num;
+ int i;
+
+ read_int (r);
+ printf ("\n%08lx: compressed data:\n", ftell (r->file));
+
+ opcode_idx = N_OPCODES;
+ case_num = 0;
+ for (case_num = 0; ; case_num++)
+ {
+ printf ("%08lx: case %d's uncompressible data begins\n",
+ ftell (r->file), case_num);
+ for (i = 0; i < r->n_var_widths; i++)
+ {
+ int width = r->var_widths[i];
+ char raw_value[8];
+ int opcode;
+
+ if (opcode_idx >= N_OPCODES)
+ {
+ opcode_ofs = ftell (r->file);
+ read_bytes (r, opcodes, 8);
+ opcode_idx = 0;
+ }
+ opcode = opcodes[opcode_idx];
+ printf ("%08lx: variable %d: opcode %d: ",
+ opcode_ofs + opcode_idx, i, opcode);
+
+ switch (opcode)
+ {
+ default:
+ printf ("%g", opcode - r->bias);
+ if (width != 0)
+ printf (", but this is a string variable (width=%d)", width);
+ printf ("\n");
+ break;
+
+ case 252:
+ printf ("end of data\n");
+ return;
+
+ case 253:
+ read_bytes (r, raw_value, 8);
+ printf ("uncompressible data: ");
+ print_untyped_value (r, raw_value);
+ printf ("\n");
+ break;
+
+ case 254:
+ printf ("spaces");
+ if (width == 0)
+ printf (", but this is a numeric variable");
+ printf ("\n");
+ break;
+
+ case 255:
+ printf ("SYSMIS");
+ if (width != 0)
+ printf (", but this is a string variable (width=%d)", width);
+
+ printf ("\n");
+ break;
+ }
+
+ opcode_idx++;
+ }
+ }
+}
\f
/* Helpers for reading records that consist of structured text
strings. */