include $(top_srcdir)/doc/automake.mk
include $(top_srcdir)/examples/automake.mk
include $(top_srcdir)/src/automake.mk
+include $(top_srcdir)/utilities/automake.mk
include $(top_srcdir)/tests/automake.mk
tests_data_inexactify_SOURCES = tests/data/inexactify.c
-noinst_PROGRAMS += tests/dissect-sysfile
-tests_dissect_sysfile_SOURCES = \
- src/libpspp/integer-format.c \
- src/libpspp/float-format.c \
- tests/dissect-sysfile.c
-tests_dissect_sysfile_CPPFLAGS = $(AM_CPPFLAGS) -DINSTALLDIR=\"$(bindir)\"
-
check_PROGRAMS += tests/language/lexer/command-name-test
tests_language_lexer_command_name_test_SOURCES = \
src/data/identifier.c \
+++ /dev/null
-/* PSPP - a program for statistical analysis.
- Copyright (C) 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>. */
-
-#include <config.h>
-
-#include <ctype.h>
-#include <errno.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include <data/val-type.h>
-#include <libpspp/compiler.h>
-#include <libpspp/float-format.h>
-#include <libpspp/integer-format.h>
-#include <libpspp/misc.h>
-
-#include "error.h"
-#include "minmax.h"
-#include "progname.h"
-#include "xalloc.h"
-
-#include "gettext.h"
-#define _(msgid) gettext (msgid)
-
-#define ID_MAX_LEN 64
-
-struct sfm_reader
- {
- const char *file_name;
- FILE *file;
-
- int n_variable_records, n_variables;
-
- int *var_widths;
- size_t n_var_widths, allocated_var_widths;
-
- enum integer_format integer_format;
- enum float_format float_format;
-
- bool compressed;
- double bias;
- };
-
-static void read_header (struct sfm_reader *);
-static void read_variable_record (struct sfm_reader *);
-static void read_value_label_record (struct sfm_reader *);
-static void read_document_record (struct sfm_reader *);
-static void read_extension_record (struct sfm_reader *);
-static void read_machine_integer_info (struct sfm_reader *,
- size_t size, size_t count);
-static void read_machine_float_info (struct sfm_reader *,
- size_t size, size_t count);
-static void read_mrsets (struct sfm_reader *, size_t size, size_t count);
-static void read_display_parameters (struct sfm_reader *,
- size_t size, size_t count);
-static void read_long_var_name_map (struct sfm_reader *r,
- size_t size, size_t count);
-static void read_long_string_map (struct sfm_reader *r,
- size_t size, size_t count);
-static void read_datafile_attributes (struct sfm_reader *r,
- size_t size, size_t count);
-static void read_variable_attributes (struct sfm_reader *r,
- size_t size, size_t count);
-static void read_ncases64 (struct sfm_reader *, size_t size, size_t count);
-static void read_character_encoding (struct sfm_reader *r,
- size_t size, size_t count);
-static void read_long_string_value_labels (struct sfm_reader *r,
- size_t size, size_t count);
-static void read_unknown_extension (struct sfm_reader *,
- size_t size, size_t count);
-static void read_compressed_data (struct sfm_reader *);
-
-static struct text_record *open_text_record (
- struct sfm_reader *, size_t size);
-static void close_text_record (struct text_record *);
-static bool read_variable_to_value_pair (struct text_record *,
- char **key, char **value);
-static char *text_tokenize (struct text_record *, int delimiter);
-static bool text_match (struct text_record *text, int c);
-static const char *text_parse_counted_string (struct text_record *);
-static size_t text_pos (const struct text_record *);
-
-static void usage (int exit_code);
-static void sys_warn (struct sfm_reader *, const char *, ...)
- PRINTF_FORMAT (2, 3);
-static void sys_error (struct sfm_reader *, const char *, ...)
- PRINTF_FORMAT (2, 3)
- NO_RETURN;
-
-static void read_bytes (struct sfm_reader *, void *, size_t);
-static bool try_read_bytes (struct sfm_reader *, void *, size_t);
-static int read_int (struct sfm_reader *);
-static int64_t read_int64 (struct sfm_reader *);
-static double read_float (struct sfm_reader *);
-static void read_string (struct sfm_reader *, char *, size_t);
-static void skip_bytes (struct sfm_reader *, size_t);
-static void trim_spaces (char *);
-
-int
-main (int argc, char *argv[])
-{
- struct sfm_reader r;
- int i;
-
- set_program_name (argv[0]);
- if (argc < 2)
- usage (EXIT_FAILURE);
-
- for (i = 1; i < argc; i++)
- {
- int rec_type;
-
- r.file_name = argv[i];
- r.file = fopen (r.file_name, "rb");
- if (r.file == NULL)
- error (EXIT_FAILURE, errno, "error opening `%s'", r.file_name);
- r.n_variable_records = 0;
- r.n_variables = 0;
- r.n_var_widths = 0;
- r.allocated_var_widths = 0;
- r.var_widths = 0;
- r.compressed = false;
-
- if (argc > 2)
- printf ("Reading \"%s\":\n", r.file_name);
-
- read_header (&r);
- while ((rec_type = read_int (&r)) != 999)
- {
- switch (rec_type)
- {
- case 2:
- read_variable_record (&r);
- break;
-
- case 3:
- read_value_label_record (&r);
- break;
-
- case 4:
- sys_error (&r, _("Misplaced type 4 record."));
-
- case 6:
- read_document_record (&r);
- break;
-
- case 7:
- read_extension_record (&r);
- break;
-
- default:
- sys_error (&r, _("Unrecognized record type %d."), rec_type);
- }
- }
- printf ("%08llx: end-of-dictionary record "
- "(first byte of data at %08llx)\n",
- (long long int) ftello (r.file),
- (long long int) ftello (r.file) + 4);
-
- if (r.compressed)
- read_compressed_data (&r);
-
- fclose (r.file);
- }
-
- return 0;
-}
-
-static void
-read_header (struct sfm_reader *r)
-{
- char rec_type[5];
- char eye_catcher[61];
- uint8_t raw_layout_code[4];
- int32_t layout_code;
- int32_t nominal_case_size;
- int32_t compressed;
- int32_t weight_index;
- int32_t ncases;
- uint8_t raw_bias[8];
- char creation_date[10];
- char creation_time[9];
- char file_label[65];
-
- read_string (r, rec_type, sizeof rec_type);
- read_string (r, eye_catcher, sizeof eye_catcher);
-
- if (strcmp ("$FL2", rec_type) != 0)
- sys_error (r, _("This is not an SPSS system file."));
-
- /* Identify integer format. */
- read_bytes (r, raw_layout_code, sizeof raw_layout_code);
- if ((!integer_identify (2, raw_layout_code, sizeof raw_layout_code,
- &r->integer_format)
- && !integer_identify (3, raw_layout_code, sizeof raw_layout_code,
- &r->integer_format))
- || (r->integer_format != INTEGER_MSB_FIRST
- && r->integer_format != INTEGER_LSB_FIRST))
- sys_error (r, _("This is not an SPSS system file."));
- layout_code = integer_get (r->integer_format,
- raw_layout_code, sizeof raw_layout_code);
-
- nominal_case_size = read_int (r);
- compressed = read_int (r);
- weight_index = read_int (r);
- ncases = read_int (r);
-
- r->compressed = compressed != 0;
-
- /* Identify floating-point format and obtain compression bias. */
- read_bytes (r, raw_bias, sizeof raw_bias);
- if (float_identify (100.0, raw_bias, sizeof raw_bias, &r->float_format) == 0)
- {
- sys_warn (r, _("Compression bias is not the usual "
- "value of 100, or system file uses unrecognized "
- "floating-point format."));
- if (r->integer_format == INTEGER_MSB_FIRST)
- r->float_format = FLOAT_IEEE_DOUBLE_BE;
- else
- r->float_format = FLOAT_IEEE_DOUBLE_LE;
- }
- r->bias = float_get_double (r->float_format, raw_bias);
-
- read_string (r, creation_date, sizeof creation_date);
- read_string (r, creation_time, sizeof creation_time);
- read_string (r, file_label, sizeof file_label);
- trim_spaces (file_label);
- skip_bytes (r, 3);
-
- printf ("File header record:\n");
- printf ("\t%17s: %s\n", "Product name", eye_catcher);
- printf ("\t%17s: %"PRId32"\n", "Layout code", layout_code);
- printf ("\t%17s: %"PRId32"\n", "Compressed", compressed);
- printf ("\t%17s: %"PRId32"\n", "Weight index", weight_index);
- printf ("\t%17s: %"PRId32"\n", "Number of cases", ncases);
- printf ("\t%17s: %g\n", "Compression bias", r->bias);
- printf ("\t%17s: %s\n", "Creation date", creation_date);
- printf ("\t%17s: %s\n", "Creation time", creation_time);
- printf ("\t%17s: \"%s\"\n", "File label", file_label);
-}
-
-static const char *
-format_name (int format)
-{
- switch ((format >> 16) & 0xff)
- {
- case 1: return "A";
- case 2: return "AHEX";
- case 3: return "COMMA";
- case 4: return "DOLLAR";
- case 5: return "F";
- case 6: return "IB";
- case 7: return "PIBHEX";
- case 8: return "P";
- case 9: return "PIB";
- case 10: return "PK";
- case 11: return "RB";
- case 12: return "RBHEX";
- case 15: return "Z";
- case 16: return "N";
- case 17: return "E";
- case 20: return "DATE";
- case 21: return "TIME";
- case 22: return "DATETIME";
- case 23: return "ADATE";
- case 24: return "JDATE";
- case 25: return "DTIME";
- case 26: return "WKDAY";
- case 27: return "MONTH";
- case 28: return "MOYR";
- case 29: return "QYR";
- case 30: return "WKYR";
- case 31: return "PCT";
- case 32: return "DOT";
- case 33: return "CCA";
- case 34: return "CCB";
- case 35: return "CCC";
- case 36: return "CCD";
- case 37: return "CCE";
- case 38: return "EDATE";
- case 39: return "SDATE";
- default: return "invalid";
- }
-}
-
-/* Reads a variable (type 2) record from R and adds the
- corresponding variable to DICT.
- Also skips past additional variable records for long string
- variables. */
-static void
-read_variable_record (struct sfm_reader *r)
-{
- int width;
- int has_variable_label;
- int missing_value_code;
- int print_format;
- int write_format;
- char name[9];
-
- printf ("%08llx: variable record #%d\n",
- (long long int) ftello (r->file), r->n_variable_records++);
-
- width = read_int (r);
- has_variable_label = read_int (r);
- missing_value_code = read_int (r);
- print_format = read_int (r);
- write_format = read_int (r);
- read_string (r, name, sizeof name);
- name[strcspn (name, " ")] = '\0';
-
- if (width >= 0)
- r->n_variables++;
-
- if (r->n_var_widths >= r->allocated_var_widths)
- r->var_widths = x2nrealloc (r->var_widths, &r->allocated_var_widths,
- sizeof *r->var_widths);
- r->var_widths[r->n_var_widths++] = width;
-
- printf ("\tWidth: %d (%s)\n",
- width,
- width > 0 ? "string"
- : width == 0 ? "numeric"
- : "long string continuation record");
- printf ("\tVariable label: %d\n", has_variable_label);
- printf ("\tMissing values code: %d (%s)\n", missing_value_code,
- (missing_value_code == 0 ? "no missing values"
- : missing_value_code == 1 ? "one missing value"
- : missing_value_code == 2 ? "two missing values"
- : missing_value_code == 3 ? "three missing values"
- : missing_value_code == -2 ? "one missing value range"
- : missing_value_code == -3 ? "one missing value, one range"
- : "bad value"));
- printf ("\tPrint format: %06x (%s%d.%d)\n",
- print_format, format_name (print_format),
- (print_format >> 8) & 0xff, print_format & 0xff);
- printf ("\tWrite format: %06x (%s%d.%d)\n",
- write_format, format_name (write_format),
- (write_format >> 8) & 0xff, write_format & 0xff);
- printf ("\tName: %s\n", name);
-
- /* Get variable label, if any. */
- if (has_variable_label != 0 && has_variable_label != 1)
- sys_error (r, _("Variable label indicator field is not 0 or 1."));
- if (has_variable_label == 1)
- {
- long long int offset = ftello (r->file);
- size_t len, read_len;
- char label[255 + 1];
-
- len = read_int (r);
-
- /* Read up to 255 bytes of label. */
- read_len = MIN (sizeof label - 1, len);
- read_string (r, label, read_len + 1);
- printf("\t%08llx Variable label: \"%s\"\n", offset, label);
-
- /* Skip unread label bytes. */
- skip_bytes (r, len - read_len);
-
- /* Skip label padding up to multiple of 4 bytes. */
- skip_bytes (r, ROUND_UP (len, 4) - len);
- }
-
- /* Set missing values. */
- if (missing_value_code != 0)
- {
- int i;
-
- printf ("\t%08llx Missing values:", (long long int) ftello (r->file));
- if (!width)
- {
- if (missing_value_code < -3 || missing_value_code > 3
- || missing_value_code == -1)
- sys_error (r, _("Numeric missing value indicator field is not "
- "-3, -2, 0, 1, 2, or 3."));
- if (missing_value_code < 0)
- {
- double low = read_float (r);
- double high = read_float (r);
- printf (" %g...%g", low, high);
- missing_value_code = -missing_value_code - 2;
- }
- for (i = 0; i < missing_value_code; i++)
- printf (" %g", read_float (r));
- }
- else if (width > 0)
- {
- if (missing_value_code < 1 || missing_value_code > 3)
- sys_error (r, _("String missing value indicator field is not "
- "0, 1, 2, or 3."));
- for (i = 0; i < missing_value_code; i++)
- {
- char string[9];
- read_string (r, string, sizeof string);
- printf (" \"%s\"", string);
- }
- }
- putchar ('\n');
- }
-}
-
-static void
-print_untyped_value (struct sfm_reader *r, char raw_value[8])
-{
- int n_printable;
- double value;
-
- value = float_get_double (r->float_format, raw_value);
- for (n_printable = 0; n_printable < sizeof raw_value; n_printable++)
- if (!isprint (raw_value[n_printable]))
- break;
-
- printf ("%g/\"%.*s\"", value, n_printable, raw_value);
-}
-
-/* Reads value labels from sysfile R and inserts them into the
- associated dictionary. */
-static void
-read_value_label_record (struct sfm_reader *r)
-{
- int label_cnt, var_cnt;
- int i;
-
- printf ("%08llx: value labels record\n", (long long int) ftello (r->file));
-
- /* Read number of labels. */
- label_cnt = read_int (r);
- for (i = 0; i < label_cnt; i++)
- {
- char raw_value[8];
- unsigned char label_len;
- size_t padded_len;
- char label[256];
-
- read_bytes (r, raw_value, sizeof raw_value);
-
- /* Read label length. */
- read_bytes (r, &label_len, sizeof label_len);
- padded_len = ROUND_UP (label_len + 1, 8);
-
- /* Read label, padding. */
- read_bytes (r, label, padded_len - 1);
- label[label_len] = 0;
-
- printf ("\t");
- print_untyped_value (r, raw_value);
- printf (": \"%s\"\n", label);
- }
-
- /* Now, read the type 4 record that has the list of variables
- to which the value labels are to be applied. */
-
- /* Read record type of type 4 record. */
- if (read_int (r) != 4)
- sys_error (r, _("Variable index record (type 4) does not immediately "
- "follow value label record (type 3) as it should."));
-
- /* Read number of variables associated with value label from type 4
- record. */
- printf ("\t%08llx: apply to variables", (long long int) ftello (r->file));
- var_cnt = read_int (r);
- for (i = 0; i < var_cnt; i++)
- printf (" #%d", read_int (r));
- putchar ('\n');
-}
-
-static void
-read_document_record (struct sfm_reader *r)
-{
- int n_lines;
- int i;
-
- printf ("%08llx: document record\n", (long long int) ftello (r->file));
- n_lines = read_int (r);
- printf ("\t%d lines of documents\n", n_lines);
-
- for (i = 0; i < n_lines; i++)
- {
- char line[81];
- printf ("\t%08llx: ", (long long int) ftello (r->file));
- read_string (r, line, sizeof line);
- trim_spaces (line);
- printf ("line %d: \"%s\"\n", i, line);
- }
-}
-
-static void
-read_extension_record (struct sfm_reader *r)
-{
- long long int offset = ftello (r->file);
- int subtype = read_int (r);
- size_t size = read_int (r);
- size_t count = read_int (r);
- size_t bytes = size * count;
-
- printf ("%08llx: Record 7, subtype %d, size=%zu, count=%zu\n",
- offset, subtype, size, count);
-
- switch (subtype)
- {
- case 3:
- read_machine_integer_info (r, size, count);
- return;
-
- case 4:
- read_machine_float_info (r, size, count);
- return;
-
- case 5:
- /* Variable sets information. We don't use these yet.
- They only apply to GUIs; see VARSETS on the APPLY
- DICTIONARY command in SPSS documentation. */
- break;
-
- case 6:
- /* DATE variable information. We don't use it yet, but we
- should. */
- break;
-
- case 7:
- case 19:
- read_mrsets (r, size, count);
- return;
-
- case 11:
- read_display_parameters (r, size, count);
- return;
-
- case 13:
- read_long_var_name_map (r, size, count);
- return;
-
- case 14:
- read_long_string_map (r, size, count);
- return;
-
- case 16:
- read_ncases64 (r, size, count);
- return;
-
- case 17:
- read_datafile_attributes (r, size, count);
- return;
-
- case 18:
- read_variable_attributes (r, size, count);
- return;
-
- case 20:
- read_character_encoding (r, size, count);
- return;
-
- case 21:
- read_long_string_value_labels (r, size, count);
- return;
-
- default:
- sys_warn (r, _("Unrecognized record type 7, subtype %d."), subtype);
- read_unknown_extension (r, size, count);
- return;
- }
-
- skip_bytes (r, bytes);
-}
-
-static void
-read_machine_integer_info (struct sfm_reader *r, size_t size, size_t count)
-{
- long long int offset = ftello (r->file);
- int version_major = read_int (r);
- int version_minor = read_int (r);
- int version_revision = read_int (r);
- int machine_code = read_int (r);
- int float_representation = read_int (r);
- int compression_code = read_int (r);
- int integer_representation = read_int (r);
- int character_code = read_int (r);
-
- printf ("%08llx: machine integer info\n", offset);
- if (size != 4 || count != 8)
- sys_error (r, _("Bad size (%zu) or count (%zu) field on record type 7, "
- "subtype 3."),
- size, count);
-
- printf ("\tVersion: %d.%d.%d\n",
- version_major, version_minor, version_revision);
- printf ("\tMachine code: %d\n", machine_code);
- printf ("\tFloating point representation: %d (%s)\n",
- float_representation,
- float_representation == 1 ? "IEEE 754"
- : float_representation == 2 ? "IBM 370"
- : float_representation == 3 ? "DEC VAX"
- : "unknown");
- printf ("\tCompression code: %d\n", compression_code);
- printf ("\tEndianness: %d (%s)\n", integer_representation,
- integer_representation == 1 ? "big"
- : integer_representation == 2 ? "little" : "unknown");
- printf ("\tCharacter code: %d\n", character_code);
-}
-
-/* Read record type 7, subtype 4. */
-static void
-read_machine_float_info (struct sfm_reader *r, size_t size, size_t count)
-{
- long long int offset = ftello (r->file);
- double sysmis = read_float (r);
- double highest = read_float (r);
- double lowest = read_float (r);
-
- printf ("%08llx: machine float info\n", offset);
- if (size != 8 || count != 3)
- sys_error (r, _("Bad size (%zu) or count (%zu) on extension 4."),
- size, count);
-
- printf ("\tsysmis: %g\n", sysmis);
- if (sysmis != SYSMIS)
- sys_warn (r, _("File specifies unexpected value %g as %s."),
- sysmis, "SYSMIS");
-
- printf ("\thighest: %g\n", highest);
- if (highest != HIGHEST)
- sys_warn (r, _("File specifies unexpected value %g as %s."),
- highest, "HIGHEST");
-
- printf ("\tlowest: %g\n", lowest);
- if (lowest != LOWEST)
- sys_warn (r, _("File specifies unexpected value %g as %s."),
- lowest, "LOWEST");
-}
-
-/* Read record type 7, subtype 7. */
-static void
-read_mrsets (struct sfm_reader *r, size_t size, size_t count)
-{
- struct text_record *text;
-
- printf ("%08llx: multiple response sets\n",
- (long long int) ftello (r->file));
- text = open_text_record (r, size * count);
- for (;;)
- {
- const char *name;
- enum { MRSET_MC, MRSET_MD } type;
- bool cat_label_from_counted_values = false;
- bool label_from_var_label = false;
- const char *counted;
- const char *label;
- const char *variables;
-
- name = text_tokenize (text, '=');
- if (name == NULL)
- break;
-
- if (text_match (text, 'C'))
- {
- type = MRSET_MC;
- counted = NULL;
- if (!text_match (text, ' '))
- {
- sys_warn (r, "missing space following 'C' at offset %zu "
- "in mrsets record", text_pos (text));
- break;
- }
- }
- else if (text_match (text, 'D'))
- {
- type = MRSET_MD;
- }
- else if (text_match (text, 'E'))
- {
- char *number;
-
- type = MRSET_MD;
- cat_label_from_counted_values = true;
-
- if (!text_match (text, ' '))
- {
- sys_warn (r, _("Missing space following `%c' at offset %zu "
- "in MRSETS record"), 'E', text_pos (text));
- break;
- }
-
- number = text_tokenize (text, ' ');
- if (!strcmp (number, "11"))
- label_from_var_label = true;
- else if (strcmp (number, "1"))
- sys_warn (r, _("Unexpected label source value `%s' "
- "following `E' at offset %zu in MRSETS record"),
- number, text_pos (text));
-
- }
- else
- {
- sys_warn (r, "missing `C', `D', or `E' at offset %zu "
- "in mrsets record", text_pos (text));
- break;
- }
-
- if (type == MRSET_MD)
- {
- counted = text_parse_counted_string (text);
- if (counted == NULL)
- break;
- }
-
- label = text_parse_counted_string (text);
- if (label == NULL)
- break;
-
- variables = text_tokenize (text, '\n');
- if (variables == NULL)
- {
- sys_warn (r, "missing variable names following label "
- "at offset %zu in mrsets record", text_pos (text));
- break;
- }
-
- printf ("\t\"%s\": multiple %s set",
- name, type == MRSET_MC ? "category" : "dichotomy");
- if (counted != NULL)
- printf (", counted value \"%s\"", counted);
- if (cat_label_from_counted_values)
- printf (", category labels from counted values");
- if (label[0] != '\0')
- printf (", label \"%s\"", label);
- if (label_from_var_label)
- printf (", label from variable label");
- printf(", variables \"%s\"\n", variables);
- }
- close_text_record (text);
-}
-
-/* Read record type 7, subtype 11. */
-static void
-read_display_parameters (struct sfm_reader *r, size_t size, size_t count)
-{
- size_t n_vars;
- bool includes_width;
- size_t i;
-
- printf ("%08llx: variable display parameters\n",
- (long long int) ftello (r->file));
- if (size != 4)
- {
- sys_warn (r, _("Bad size %zu on extension 11."), size);
- skip_bytes (r, size * count);
- return;
- }
-
- n_vars = r->n_variables;
- if (count == 3 * n_vars)
- includes_width = true;
- else if (count == 2 * n_vars)
- includes_width = false;
- else
- {
- sys_warn (r, _("Extension 11 has bad count %zu (for %zu variables)."),
- count, n_vars);
- skip_bytes (r, size * count);
- return;
- }
-
- for (i = 0; i < n_vars; ++i)
- {
- int measure = read_int (r);
- int width = includes_width ? read_int (r) : 0;
- int align = read_int (r);
-
- printf ("\tVar #%zu: measure=%d (%s)", i, measure,
- (measure == 1 ? "nominal"
- : measure == 2 ? "ordinal"
- : measure == 3 ? "scale"
- : "invalid"));
- if (includes_width)
- printf (", width=%d", width);
- printf (", align=%d (%s)\n", align,
- (align == 0 ? "left"
- : align == 1 ? "right"
- : align == 2 ? "centre"
- : "invalid"));
- }
-}
-
-/* Reads record type 7, subtype 13, which gives the long name
- that corresponds to each short name. */
-static void
-read_long_var_name_map (struct sfm_reader *r, size_t size, size_t count)
-{
- struct text_record *text;
- char *var;
- char *long_name;
-
- printf ("%08llx: long variable names (short => long)\n",
- (long long int) ftello (r->file));
- text = open_text_record (r, size * count);
- while (read_variable_to_value_pair (text, &var, &long_name))
- printf ("\t%s => %s\n", var, long_name);
- close_text_record (text);
-}
-
-/* Reads record type 7, subtype 14, which gives the real length
- of each very long string. Rearranges DICT accordingly. */
-static void
-read_long_string_map (struct sfm_reader *r, size_t size, size_t count)
-{
- struct text_record *text;
- char *var;
- char *length_s;
-
- printf ("%08llx: very long strings (variable => length)\n",
- (long long int) ftello (r->file));
- text = open_text_record (r, size * count);
- while (read_variable_to_value_pair (text, &var, &length_s))
- printf ("\t%s => %d\n", var, atoi (length_s));
- close_text_record (text);
-}
-
-static bool
-read_attributes (struct sfm_reader *r, struct text_record *text,
- const char *variable)
-{
- const char *key;
- int index;
-
- for (;;)
- {
- key = text_tokenize (text, '(');
- if (key == NULL)
- return true;
-
- for (index = 1; ; index++)
- {
- /* Parse the value. */
- const char *value = text_tokenize (text, '\n');
- if (value == NULL)
- {
- sys_warn (r, _("%s: Error parsing attribute value %s[%d]"),
- variable, key, index);
- return false;
- }
- if (strlen (value) < 2
- || value[0] != '\'' || value[strlen (value) - 1] != '\'')
- sys_warn (r, _("%s: Attribute value %s[%d] is not quoted: %s"),
- variable, key, index, value);
- else
- printf ("\t%s: %s[%d] = \"%.*s\"\n",
- variable, key, index, (int) strlen (value) - 2, value + 1);
-
- /* Was this the last value for this attribute? */
- if (text_match (text, ')'))
- break;
- }
-
- if (text_match (text, '/'))
- return true;
- }
-}
-
-/* Read extended number of cases record. */
-static void
-read_ncases64 (struct sfm_reader *r, size_t size, size_t count)
-{
- int64_t unknown, ncases64;
-
- if (size != 8)
- {
- sys_warn (r, _("Bad size %zu for extended number of cases."), size);
- skip_bytes (r, size * count);
- return;
- }
- if (count != 2)
- {
- sys_warn (r, _("Bad count %zu for extended number of cases."), size);
- skip_bytes (r, size * count);
- return;
- }
- unknown = read_int64 (r);
- ncases64 = read_int64 (r);
- printf ("%08llx: extended number of cases: "
- "unknown=%"PRId64", ncases64=%"PRId64"\n",
- (long long int) ftello (r->file), unknown, ncases64);
-}
-
-static void
-read_datafile_attributes (struct sfm_reader *r, size_t size, size_t count)
-{
- struct text_record *text;
-
- printf ("%08llx: datafile attributes\n", (long long int) ftello (r->file));
- text = open_text_record (r, size * count);
- read_attributes (r, text, "datafile");
- close_text_record (text);
-}
-
-static void
-read_character_encoding (struct sfm_reader *r, size_t size, size_t count)
-{
- long long int posn = ftello (r->file);
- char *encoding = xcalloc (size, count + 1);
- read_string (r, encoding, count + 1);
-
- printf ("%08llx: Character Encoding: %s\n", posn, encoding);
-}
-
-static void
-read_long_string_value_labels (struct sfm_reader *r, size_t size, size_t count)
-{
- long long int start = ftello (r->file);
-
- printf ("%08llx: long string value labels\n", start);
- while (ftello (r->file) - start < size * count)
- {
- long long posn = ftello (r->file);
- char var_name[ID_MAX_LEN + 1];
- int var_name_len;
- int n_values;
- int width;
- int i;
-
- /* Read variable name. */
- var_name_len = read_int (r);
- if (var_name_len > ID_MAX_LEN)
- sys_error (r, _("Variable name length in long string value label "
- "record (%d) exceeds %d-byte limit."),
- var_name_len, ID_MAX_LEN);
- read_string (r, var_name, var_name_len + 1);
-
- /* Read width, number of values. */
- width = read_int (r);
- n_values = read_int (r);
-
- printf ("\t%08llx: %s, width %d, %d values\n",
- posn, var_name, width, n_values);
-
- /* Read values. */
- for (i = 0; i < n_values; i++)
- {
- char *value;
- int value_length;
-
- char *label;
- int label_length;
-
- posn = ftello (r->file);
-
- /* Read value. */
- value_length = read_int (r);
- value = xmalloc (value_length + 1);
- read_string (r, value, value_length + 1);
-
- /* Read label. */
- label_length = read_int (r);
- label = xmalloc (label_length + 1);
- read_string (r, label, label_length + 1);
-
- printf ("\t\t%08llx: \"%s\" (%d bytes) => \"%s\" (%d bytes)\n",
- posn, value, value_length, label, label_length);
-
- free (value);
- free (label);
- }
- }
-}
-
-static void
-hex_dump (size_t offset, const void *buffer_, size_t buffer_size)
-{
- const uint8_t *buffer = buffer_;
-
- while (buffer_size > 0)
- {
- size_t n = MIN (buffer_size, 16);
- size_t i;
-
- printf ("%04zx", offset);
- for (i = 0; i < 16; i++)
- {
- if (i < n)
- printf ("%c%02x", i == 8 ? '-' : ' ', buffer[i]);
- else
- printf (" ");
- }
-
- printf (" |");
- for (i = 0; i < 16; i++)
- {
- unsigned char c = i < n ? buffer[i] : ' ';
- putchar (isprint (c) ? c : '.');
- }
- printf ("|\n");
-
- offset += n;
- buffer += n;
- buffer_size -= n;
- }
-}
-
-/* Reads and prints any type 7 record that we don't understand. */
-static void
-read_unknown_extension (struct sfm_reader *r, size_t size, size_t count)
-{
- unsigned char *buffer;
- size_t i;
-
- if (size == 0 || count > 65536 / size)
- skip_bytes (r, size * count);
- else if (size != 1)
- {
- buffer = xmalloc (size);
- for (i = 0; i < count; i++)
- {
- read_bytes (r, buffer, size);
- hex_dump (i * size, buffer, size);
- }
- free (buffer);
- }
- else
- {
- buffer = xmalloc (count);
- read_bytes (r, buffer, count);
- if (memchr (buffer, 0, count) == 0)
- for (i = 0; i < count; i++)
- {
- unsigned char c = buffer[i];
-
- if (c == '\\')
- printf ("\\\\");
- else if (c == '\n' || isprint (c))
- putchar (c);
- else
- printf ("\\%02x", c);
- }
- else
- hex_dump (0, buffer, count);
- free (buffer);
- }
-}
-
-static void
-read_variable_attributes (struct sfm_reader *r, size_t size, size_t count)
-{
- struct text_record *text;
-
- printf ("%08llx: variable attributes\n", (long long int) ftello (r->file));
- text = open_text_record (r, size * count);
- for (;;)
- {
- const char *variable = text_tokenize (text, ':');
- if (variable == NULL || !read_attributes (r, text, variable))
- break;
- }
- close_text_record (text);
-}
-
-static void
-read_compressed_data (struct sfm_reader *r)
-{
- enum { N_OPCODES = 8 };
- uint8_t opcodes[N_OPCODES];
- long long int opcode_ofs;
- int opcode_idx;
- int case_num;
- int i;
-
- read_int (r);
- printf ("\n%08llx: compressed data:\n", (long long int) ftello (r->file));
-
- opcode_idx = N_OPCODES;
- opcode_ofs = 0;
- case_num = 0;
- for (case_num = 0; ; case_num++)
- {
- printf ("%08llx: case %d's uncompressible data begins\n",
- (long long int) ftello (r->file), case_num);
- for (i = 0; i < r->n_var_widths; )
- {
- int width = r->var_widths[i];
- char raw_value[8];
- int opcode;
-
- if (opcode_idx >= N_OPCODES)
- {
- opcode_ofs = ftello (r->file);
- if (i == 0)
- {
- if (!try_read_bytes (r, opcodes, 8))
- return;
- }
- else
- read_bytes (r, opcodes, 8);
- opcode_idx = 0;
- }
- opcode = opcodes[opcode_idx];
- printf ("%08llx: variable %d: opcode %d: ",
- opcode_ofs + opcode_idx, i, opcode);
-
- switch (opcode)
- {
- default:
- printf ("%g", opcode - r->bias);
- if (width != 0)
- printf (", but this is a string variable (width=%d)", width);
- printf ("\n");
- i++;
- break;
-
- case 0:
- printf ("ignored padding\n");
- break;
-
- case 252:
- printf ("end of data\n");
- return;
-
- case 253:
- read_bytes (r, raw_value, 8);
- printf ("uncompressible data: ");
- print_untyped_value (r, raw_value);
- printf ("\n");
- i++;
- break;
-
- case 254:
- printf ("spaces");
- if (width == 0)
- printf (", but this is a numeric variable");
- printf ("\n");
- i++;
- break;
-
- case 255:
- printf ("SYSMIS");
- if (width != 0)
- printf (", but this is a string variable (width=%d)", width);
- printf ("\n");
- i++;
- break;
- }
-
- opcode_idx++;
- }
- }
-}
-\f
-/* Helpers for reading records that consist of structured text
- strings. */
-
-/* State. */
-struct text_record
- {
- struct sfm_reader *reader; /* Reader. */
- char *buffer; /* Record contents. */
- size_t size; /* Size of buffer. */
- size_t pos; /* Current position in buffer. */
- };
-
-/* Reads SIZE bytes into a text record for R,
- and returns the new text record. */
-static struct text_record *
-open_text_record (struct sfm_reader *r, size_t size)
-{
- struct text_record *text = xmalloc (sizeof *text);
- char *buffer = xmalloc (size + 1);
- read_bytes (r, buffer, size);
- buffer[size] = '\0';
- text->reader = r;
- text->buffer = buffer;
- text->size = size;
- text->pos = 0;
- return text;
-}
-
-/* Closes TEXT and frees its storage.
- Not really needed, because the pool will free the text record anyway,
- but can be used to free it earlier. */
-static void
-close_text_record (struct text_record *text)
-{
- free (text->buffer);
- free (text);
-}
-
-static char *
-text_tokenize (struct text_record *text, int delimiter)
-{
- size_t start = text->pos;
- while (text->pos < text->size
- && text->buffer[text->pos] != delimiter
- && text->buffer[text->pos] != '\0')
- text->pos++;
- if (start == text->pos)
- return NULL;
- text->buffer[text->pos++] = '\0';
- return &text->buffer[start];
-}
-
-static bool
-text_match (struct text_record *text, int c)
-{
- if (text->pos < text->size && text->buffer[text->pos] == c)
- {
- text->pos++;
- return true;
- }
- else
- return false;
-}
-
-/* Reads a integer value expressed in decimal, then a space, then a string that
- consists of exactly as many bytes as specified by the integer, then a space,
- from TEXT. Returns the string, null-terminated, as a subset of TEXT's
- buffer (so the caller should not free the string). */
-static const char *
-text_parse_counted_string (struct text_record *text)
-{
- size_t start;
- size_t n;
- char *s;
-
- start = text->pos;
- n = 0;
- while (isdigit ((unsigned char) text->buffer[text->pos]))
- n = (n * 10) + (text->buffer[text->pos++] - '0');
- if (start == text->pos)
- {
- sys_error (text->reader, "expecting digit at offset %zu in record",
- text->pos);
- return NULL;
- }
-
- if (!text_match (text, ' '))
- {
- sys_error (text->reader, "expecting space at offset %zu in record",
- text->pos);
- return NULL;
- }
-
- if (text->pos + n > text->size)
- {
- sys_error (text->reader, "%zu-byte string starting at offset %zu "
- "exceeds record length %zu", n, text->pos, text->size);
- return NULL;
- }
-
- s = &text->buffer[text->pos];
- if (s[n] != ' ')
- {
- sys_error (text->reader, "expecting space at offset %zu following "
- "%zu-byte string", text->pos + n, n);
- return NULL;
- }
- s[n] = '\0';
- text->pos += n + 1;
- return s;
-}
-
-/* Reads a variable=value pair from TEXT.
- Looks up the variable in DICT and stores it into *VAR.
- Stores a null-terminated value into *VALUE. */
-static bool
-read_variable_to_value_pair (struct text_record *text,
- char **key, char **value)
-{
- *key = text_tokenize (text, '=');
- *value = text_tokenize (text, '\t');
- if (!*key || !*value)
- return false;
-
- while (text->pos < text->size
- && (text->buffer[text->pos] == '\t'
- || text->buffer[text->pos] == '\0'))
- text->pos++;
- return true;
-}
-
-/* Returns the current byte offset inside the TEXT's string. */
-static size_t
-text_pos (const struct text_record *text)
-{
- return text->pos;
-}
-\f
-static void
-usage (int exit_code)
-{
- printf ("usage: %s SYSFILE...\n"
- "where each SYSFILE is the name of a system file\n",
- program_name);
- exit (exit_code);
-}
-
-/* Displays a corruption message. */
-static void
-sys_msg (struct sfm_reader *r, const char *format, va_list args)
-{
- printf ("\"%s\" near offset 0x%llx: ",
- r->file_name, (long long int) ftello (r->file));
- vprintf (format, args);
- putchar ('\n');
-}
-
-/* Displays a warning for the current file position. */
-static void
-sys_warn (struct sfm_reader *r, const char *format, ...)
-{
- va_list args;
-
- va_start (args, format);
- sys_msg (r, format, args);
- va_end (args);
-}
-
-/* Displays an error for the current file position,
- marks it as in an error state,
- and aborts reading it using longjmp. */
-static void
-sys_error (struct sfm_reader *r, const char *format, ...)
-{
- va_list args;
-
- va_start (args, format);
- sys_msg (r, format, args);
- va_end (args);
-
- exit (EXIT_FAILURE);
-}
-\f
-/* Reads BYTE_CNT bytes into BUF.
- Returns true if exactly BYTE_CNT bytes are successfully read.
- Aborts if an I/O error or a partial read occurs.
- If EOF_IS_OK, then an immediate end-of-file causes false to be
- returned; otherwise, immediate end-of-file causes an abort
- too. */
-static inline bool
-read_bytes_internal (struct sfm_reader *r, bool eof_is_ok,
- void *buf, size_t byte_cnt)
-{
- size_t bytes_read = fread (buf, 1, byte_cnt, r->file);
- if (bytes_read == byte_cnt)
- return true;
- else if (ferror (r->file))
- sys_error (r, _("System error: %s."), strerror (errno));
- else if (!eof_is_ok || bytes_read != 0)
- sys_error (r, _("Unexpected end of file."));
- else
- return false;
-}
-
-/* Reads BYTE_CNT into BUF.
- Aborts upon I/O error or if end-of-file is encountered. */
-static void
-read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
-{
- read_bytes_internal (r, false, buf, byte_cnt);
-}
-
-/* Reads BYTE_CNT bytes into BUF.
- Returns true if exactly BYTE_CNT bytes are successfully read.
- Returns false if an immediate end-of-file is encountered.
- Aborts if an I/O error or a partial read occurs. */
-static bool
-try_read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
-{
- return read_bytes_internal (r, true, buf, byte_cnt);
-}
-
-/* Reads a 32-bit signed integer from R and returns its value in
- host format. */
-static int
-read_int (struct sfm_reader *r)
-{
- uint8_t integer[4];
- read_bytes (r, integer, sizeof integer);
- return integer_get (r->integer_format, integer, sizeof integer);
-}
-
-/* Reads a 64-bit signed integer from R and returns its value in
- host format. */
-static int64_t
-read_int64 (struct sfm_reader *r)
-{
- uint8_t integer[8];
- read_bytes (r, integer, sizeof integer);
- return integer_get (r->integer_format, integer, sizeof integer);
-}
-
-/* Reads a 64-bit floating-point number from R and returns its
- value in host format. */
-static double
-read_float (struct sfm_reader *r)
-{
- uint8_t number[8];
- read_bytes (r, number, sizeof number);
- return float_get_double (r->float_format, number);
-}
-
-/* Reads exactly SIZE - 1 bytes into BUFFER
- and stores a null byte into BUFFER[SIZE - 1]. */
-static void
-read_string (struct sfm_reader *r, char *buffer, size_t size)
-{
- assert (size > 0);
- read_bytes (r, buffer, size - 1);
- buffer[size - 1] = '\0';
-}
-
-/* Skips BYTES bytes forward in R. */
-static void
-skip_bytes (struct sfm_reader *r, size_t bytes)
-{
- while (bytes > 0)
- {
- char buffer[1024];
- size_t chunk = MIN (sizeof buffer, bytes);
- read_bytes (r, buffer, chunk);
- bytes -= chunk;
- }
-}
-
-static void
-trim_spaces (char *s)
-{
- char *end = strchr (s, '\0');
- while (end > s && end[-1] == ' ')
- end--;
- *end = '\0';
-}
--- /dev/null
+bin_PROGRAMS += utilities/pspp-dump-sav
+utilities_pspp_dump_sav_SOURCES = \
+ src/libpspp/integer-format.c \
+ src/libpspp/float-format.c \
+ utilities/pspp-dump-sav.c
+utilities_pspp_dump_sav_CPPFLAGS = $(AM_CPPFLAGS) -DINSTALLDIR=\"$(bindir)\"
+
--- /dev/null
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <data/val-type.h>
+#include <libpspp/compiler.h>
+#include <libpspp/float-format.h>
+#include <libpspp/integer-format.h>
+#include <libpspp/misc.h>
+
+#include "error.h"
+#include "minmax.h"
+#include "progname.h"
+#include "xalloc.h"
+
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
+
+#define ID_MAX_LEN 64
+
+struct sfm_reader
+ {
+ const char *file_name;
+ FILE *file;
+
+ int n_variable_records, n_variables;
+
+ int *var_widths;
+ size_t n_var_widths, allocated_var_widths;
+
+ enum integer_format integer_format;
+ enum float_format float_format;
+
+ bool compressed;
+ double bias;
+ };
+
+static void read_header (struct sfm_reader *);
+static void read_variable_record (struct sfm_reader *);
+static void read_value_label_record (struct sfm_reader *);
+static void read_document_record (struct sfm_reader *);
+static void read_extension_record (struct sfm_reader *);
+static void read_machine_integer_info (struct sfm_reader *,
+ size_t size, size_t count);
+static void read_machine_float_info (struct sfm_reader *,
+ size_t size, size_t count);
+static void read_mrsets (struct sfm_reader *, size_t size, size_t count);
+static void read_display_parameters (struct sfm_reader *,
+ size_t size, size_t count);
+static void read_long_var_name_map (struct sfm_reader *r,
+ size_t size, size_t count);
+static void read_long_string_map (struct sfm_reader *r,
+ size_t size, size_t count);
+static void read_datafile_attributes (struct sfm_reader *r,
+ size_t size, size_t count);
+static void read_variable_attributes (struct sfm_reader *r,
+ size_t size, size_t count);
+static void read_ncases64 (struct sfm_reader *, size_t size, size_t count);
+static void read_character_encoding (struct sfm_reader *r,
+ size_t size, size_t count);
+static void read_long_string_value_labels (struct sfm_reader *r,
+ size_t size, size_t count);
+static void read_unknown_extension (struct sfm_reader *,
+ size_t size, size_t count);
+static void read_compressed_data (struct sfm_reader *);
+
+static struct text_record *open_text_record (
+ struct sfm_reader *, size_t size);
+static void close_text_record (struct text_record *);
+static bool read_variable_to_value_pair (struct text_record *,
+ char **key, char **value);
+static char *text_tokenize (struct text_record *, int delimiter);
+static bool text_match (struct text_record *text, int c);
+static const char *text_parse_counted_string (struct text_record *);
+static size_t text_pos (const struct text_record *);
+
+static void usage (int exit_code);
+static void sys_warn (struct sfm_reader *, const char *, ...)
+ PRINTF_FORMAT (2, 3);
+static void sys_error (struct sfm_reader *, const char *, ...)
+ PRINTF_FORMAT (2, 3)
+ NO_RETURN;
+
+static void read_bytes (struct sfm_reader *, void *, size_t);
+static bool try_read_bytes (struct sfm_reader *, void *, size_t);
+static int read_int (struct sfm_reader *);
+static int64_t read_int64 (struct sfm_reader *);
+static double read_float (struct sfm_reader *);
+static void read_string (struct sfm_reader *, char *, size_t);
+static void skip_bytes (struct sfm_reader *, size_t);
+static void trim_spaces (char *);
+
+int
+main (int argc, char *argv[])
+{
+ struct sfm_reader r;
+ int i;
+
+ set_program_name (argv[0]);
+ if (argc < 2)
+ usage (EXIT_FAILURE);
+
+ for (i = 1; i < argc; i++)
+ {
+ int rec_type;
+
+ r.file_name = argv[i];
+ r.file = fopen (r.file_name, "rb");
+ if (r.file == NULL)
+ error (EXIT_FAILURE, errno, "error opening `%s'", r.file_name);
+ r.n_variable_records = 0;
+ r.n_variables = 0;
+ r.n_var_widths = 0;
+ r.allocated_var_widths = 0;
+ r.var_widths = 0;
+ r.compressed = false;
+
+ if (argc > 2)
+ printf ("Reading \"%s\":\n", r.file_name);
+
+ read_header (&r);
+ while ((rec_type = read_int (&r)) != 999)
+ {
+ switch (rec_type)
+ {
+ case 2:
+ read_variable_record (&r);
+ break;
+
+ case 3:
+ read_value_label_record (&r);
+ break;
+
+ case 4:
+ sys_error (&r, _("Misplaced type 4 record."));
+
+ case 6:
+ read_document_record (&r);
+ break;
+
+ case 7:
+ read_extension_record (&r);
+ break;
+
+ default:
+ sys_error (&r, _("Unrecognized record type %d."), rec_type);
+ }
+ }
+ printf ("%08llx: end-of-dictionary record "
+ "(first byte of data at %08llx)\n",
+ (long long int) ftello (r.file),
+ (long long int) ftello (r.file) + 4);
+
+ if (r.compressed)
+ read_compressed_data (&r);
+
+ fclose (r.file);
+ }
+
+ return 0;
+}
+
+static void
+read_header (struct sfm_reader *r)
+{
+ char rec_type[5];
+ char eye_catcher[61];
+ uint8_t raw_layout_code[4];
+ int32_t layout_code;
+ int32_t nominal_case_size;
+ int32_t compressed;
+ int32_t weight_index;
+ int32_t ncases;
+ uint8_t raw_bias[8];
+ char creation_date[10];
+ char creation_time[9];
+ char file_label[65];
+
+ read_string (r, rec_type, sizeof rec_type);
+ read_string (r, eye_catcher, sizeof eye_catcher);
+
+ if (strcmp ("$FL2", rec_type) != 0)
+ sys_error (r, _("This is not an SPSS system file."));
+
+ /* Identify integer format. */
+ read_bytes (r, raw_layout_code, sizeof raw_layout_code);
+ if ((!integer_identify (2, raw_layout_code, sizeof raw_layout_code,
+ &r->integer_format)
+ && !integer_identify (3, raw_layout_code, sizeof raw_layout_code,
+ &r->integer_format))
+ || (r->integer_format != INTEGER_MSB_FIRST
+ && r->integer_format != INTEGER_LSB_FIRST))
+ sys_error (r, _("This is not an SPSS system file."));
+ layout_code = integer_get (r->integer_format,
+ raw_layout_code, sizeof raw_layout_code);
+
+ nominal_case_size = read_int (r);
+ compressed = read_int (r);
+ weight_index = read_int (r);
+ ncases = read_int (r);
+
+ r->compressed = compressed != 0;
+
+ /* Identify floating-point format and obtain compression bias. */
+ read_bytes (r, raw_bias, sizeof raw_bias);
+ if (float_identify (100.0, raw_bias, sizeof raw_bias, &r->float_format) == 0)
+ {
+ sys_warn (r, _("Compression bias is not the usual "
+ "value of 100, or system file uses unrecognized "
+ "floating-point format."));
+ if (r->integer_format == INTEGER_MSB_FIRST)
+ r->float_format = FLOAT_IEEE_DOUBLE_BE;
+ else
+ r->float_format = FLOAT_IEEE_DOUBLE_LE;
+ }
+ r->bias = float_get_double (r->float_format, raw_bias);
+
+ read_string (r, creation_date, sizeof creation_date);
+ read_string (r, creation_time, sizeof creation_time);
+ read_string (r, file_label, sizeof file_label);
+ trim_spaces (file_label);
+ skip_bytes (r, 3);
+
+ printf ("File header record:\n");
+ printf ("\t%17s: %s\n", "Product name", eye_catcher);
+ printf ("\t%17s: %"PRId32"\n", "Layout code", layout_code);
+ printf ("\t%17s: %"PRId32"\n", "Compressed", compressed);
+ printf ("\t%17s: %"PRId32"\n", "Weight index", weight_index);
+ printf ("\t%17s: %"PRId32"\n", "Number of cases", ncases);
+ printf ("\t%17s: %g\n", "Compression bias", r->bias);
+ printf ("\t%17s: %s\n", "Creation date", creation_date);
+ printf ("\t%17s: %s\n", "Creation time", creation_time);
+ printf ("\t%17s: \"%s\"\n", "File label", file_label);
+}
+
+static const char *
+format_name (int format)
+{
+ switch ((format >> 16) & 0xff)
+ {
+ case 1: return "A";
+ case 2: return "AHEX";
+ case 3: return "COMMA";
+ case 4: return "DOLLAR";
+ case 5: return "F";
+ case 6: return "IB";
+ case 7: return "PIBHEX";
+ case 8: return "P";
+ case 9: return "PIB";
+ case 10: return "PK";
+ case 11: return "RB";
+ case 12: return "RBHEX";
+ case 15: return "Z";
+ case 16: return "N";
+ case 17: return "E";
+ case 20: return "DATE";
+ case 21: return "TIME";
+ case 22: return "DATETIME";
+ case 23: return "ADATE";
+ case 24: return "JDATE";
+ case 25: return "DTIME";
+ case 26: return "WKDAY";
+ case 27: return "MONTH";
+ case 28: return "MOYR";
+ case 29: return "QYR";
+ case 30: return "WKYR";
+ case 31: return "PCT";
+ case 32: return "DOT";
+ case 33: return "CCA";
+ case 34: return "CCB";
+ case 35: return "CCC";
+ case 36: return "CCD";
+ case 37: return "CCE";
+ case 38: return "EDATE";
+ case 39: return "SDATE";
+ default: return "invalid";
+ }
+}
+
+/* Reads a variable (type 2) record from R and adds the
+ corresponding variable to DICT.
+ Also skips past additional variable records for long string
+ variables. */
+static void
+read_variable_record (struct sfm_reader *r)
+{
+ int width;
+ int has_variable_label;
+ int missing_value_code;
+ int print_format;
+ int write_format;
+ char name[9];
+
+ printf ("%08llx: variable record #%d\n",
+ (long long int) ftello (r->file), r->n_variable_records++);
+
+ width = read_int (r);
+ has_variable_label = read_int (r);
+ missing_value_code = read_int (r);
+ print_format = read_int (r);
+ write_format = read_int (r);
+ read_string (r, name, sizeof name);
+ name[strcspn (name, " ")] = '\0';
+
+ if (width >= 0)
+ r->n_variables++;
+
+ if (r->n_var_widths >= r->allocated_var_widths)
+ r->var_widths = x2nrealloc (r->var_widths, &r->allocated_var_widths,
+ sizeof *r->var_widths);
+ r->var_widths[r->n_var_widths++] = width;
+
+ printf ("\tWidth: %d (%s)\n",
+ width,
+ width > 0 ? "string"
+ : width == 0 ? "numeric"
+ : "long string continuation record");
+ printf ("\tVariable label: %d\n", has_variable_label);
+ printf ("\tMissing values code: %d (%s)\n", missing_value_code,
+ (missing_value_code == 0 ? "no missing values"
+ : missing_value_code == 1 ? "one missing value"
+ : missing_value_code == 2 ? "two missing values"
+ : missing_value_code == 3 ? "three missing values"
+ : missing_value_code == -2 ? "one missing value range"
+ : missing_value_code == -3 ? "one missing value, one range"
+ : "bad value"));
+ printf ("\tPrint format: %06x (%s%d.%d)\n",
+ print_format, format_name (print_format),
+ (print_format >> 8) & 0xff, print_format & 0xff);
+ printf ("\tWrite format: %06x (%s%d.%d)\n",
+ write_format, format_name (write_format),
+ (write_format >> 8) & 0xff, write_format & 0xff);
+ printf ("\tName: %s\n", name);
+
+ /* Get variable label, if any. */
+ if (has_variable_label != 0 && has_variable_label != 1)
+ sys_error (r, _("Variable label indicator field is not 0 or 1."));
+ if (has_variable_label == 1)
+ {
+ long long int offset = ftello (r->file);
+ size_t len, read_len;
+ char label[255 + 1];
+
+ len = read_int (r);
+
+ /* Read up to 255 bytes of label. */
+ read_len = MIN (sizeof label - 1, len);
+ read_string (r, label, read_len + 1);
+ printf("\t%08llx Variable label: \"%s\"\n", offset, label);
+
+ /* Skip unread label bytes. */
+ skip_bytes (r, len - read_len);
+
+ /* Skip label padding up to multiple of 4 bytes. */
+ skip_bytes (r, ROUND_UP (len, 4) - len);
+ }
+
+ /* Set missing values. */
+ if (missing_value_code != 0)
+ {
+ int i;
+
+ printf ("\t%08llx Missing values:", (long long int) ftello (r->file));
+ if (!width)
+ {
+ if (missing_value_code < -3 || missing_value_code > 3
+ || missing_value_code == -1)
+ sys_error (r, _("Numeric missing value indicator field is not "
+ "-3, -2, 0, 1, 2, or 3."));
+ if (missing_value_code < 0)
+ {
+ double low = read_float (r);
+ double high = read_float (r);
+ printf (" %g...%g", low, high);
+ missing_value_code = -missing_value_code - 2;
+ }
+ for (i = 0; i < missing_value_code; i++)
+ printf (" %g", read_float (r));
+ }
+ else if (width > 0)
+ {
+ if (missing_value_code < 1 || missing_value_code > 3)
+ sys_error (r, _("String missing value indicator field is not "
+ "0, 1, 2, or 3."));
+ for (i = 0; i < missing_value_code; i++)
+ {
+ char string[9];
+ read_string (r, string, sizeof string);
+ printf (" \"%s\"", string);
+ }
+ }
+ putchar ('\n');
+ }
+}
+
+static void
+print_untyped_value (struct sfm_reader *r, char raw_value[8])
+{
+ int n_printable;
+ double value;
+
+ value = float_get_double (r->float_format, raw_value);
+ for (n_printable = 0; n_printable < sizeof raw_value; n_printable++)
+ if (!isprint (raw_value[n_printable]))
+ break;
+
+ printf ("%g/\"%.*s\"", value, n_printable, raw_value);
+}
+
+/* Reads value labels from sysfile R and inserts them into the
+ associated dictionary. */
+static void
+read_value_label_record (struct sfm_reader *r)
+{
+ int label_cnt, var_cnt;
+ int i;
+
+ printf ("%08llx: value labels record\n", (long long int) ftello (r->file));
+
+ /* Read number of labels. */
+ label_cnt = read_int (r);
+ for (i = 0; i < label_cnt; i++)
+ {
+ char raw_value[8];
+ unsigned char label_len;
+ size_t padded_len;
+ char label[256];
+
+ read_bytes (r, raw_value, sizeof raw_value);
+
+ /* Read label length. */
+ read_bytes (r, &label_len, sizeof label_len);
+ padded_len = ROUND_UP (label_len + 1, 8);
+
+ /* Read label, padding. */
+ read_bytes (r, label, padded_len - 1);
+ label[label_len] = 0;
+
+ printf ("\t");
+ print_untyped_value (r, raw_value);
+ printf (": \"%s\"\n", label);
+ }
+
+ /* Now, read the type 4 record that has the list of variables
+ to which the value labels are to be applied. */
+
+ /* Read record type of type 4 record. */
+ if (read_int (r) != 4)
+ sys_error (r, _("Variable index record (type 4) does not immediately "
+ "follow value label record (type 3) as it should."));
+
+ /* Read number of variables associated with value label from type 4
+ record. */
+ printf ("\t%08llx: apply to variables", (long long int) ftello (r->file));
+ var_cnt = read_int (r);
+ for (i = 0; i < var_cnt; i++)
+ printf (" #%d", read_int (r));
+ putchar ('\n');
+}
+
+static void
+read_document_record (struct sfm_reader *r)
+{
+ int n_lines;
+ int i;
+
+ printf ("%08llx: document record\n", (long long int) ftello (r->file));
+ n_lines = read_int (r);
+ printf ("\t%d lines of documents\n", n_lines);
+
+ for (i = 0; i < n_lines; i++)
+ {
+ char line[81];
+ printf ("\t%08llx: ", (long long int) ftello (r->file));
+ read_string (r, line, sizeof line);
+ trim_spaces (line);
+ printf ("line %d: \"%s\"\n", i, line);
+ }
+}
+
+static void
+read_extension_record (struct sfm_reader *r)
+{
+ long long int offset = ftello (r->file);
+ int subtype = read_int (r);
+ size_t size = read_int (r);
+ size_t count = read_int (r);
+ size_t bytes = size * count;
+
+ printf ("%08llx: Record 7, subtype %d, size=%zu, count=%zu\n",
+ offset, subtype, size, count);
+
+ switch (subtype)
+ {
+ case 3:
+ read_machine_integer_info (r, size, count);
+ return;
+
+ case 4:
+ read_machine_float_info (r, size, count);
+ return;
+
+ case 5:
+ /* Variable sets information. We don't use these yet.
+ They only apply to GUIs; see VARSETS on the APPLY
+ DICTIONARY command in SPSS documentation. */
+ break;
+
+ case 6:
+ /* DATE variable information. We don't use it yet, but we
+ should. */
+ break;
+
+ case 7:
+ case 19:
+ read_mrsets (r, size, count);
+ return;
+
+ case 11:
+ read_display_parameters (r, size, count);
+ return;
+
+ case 13:
+ read_long_var_name_map (r, size, count);
+ return;
+
+ case 14:
+ read_long_string_map (r, size, count);
+ return;
+
+ case 16:
+ read_ncases64 (r, size, count);
+ return;
+
+ case 17:
+ read_datafile_attributes (r, size, count);
+ return;
+
+ case 18:
+ read_variable_attributes (r, size, count);
+ return;
+
+ case 20:
+ read_character_encoding (r, size, count);
+ return;
+
+ case 21:
+ read_long_string_value_labels (r, size, count);
+ return;
+
+ default:
+ sys_warn (r, _("Unrecognized record type 7, subtype %d."), subtype);
+ read_unknown_extension (r, size, count);
+ return;
+ }
+
+ skip_bytes (r, bytes);
+}
+
+static void
+read_machine_integer_info (struct sfm_reader *r, size_t size, size_t count)
+{
+ long long int offset = ftello (r->file);
+ int version_major = read_int (r);
+ int version_minor = read_int (r);
+ int version_revision = read_int (r);
+ int machine_code = read_int (r);
+ int float_representation = read_int (r);
+ int compression_code = read_int (r);
+ int integer_representation = read_int (r);
+ int character_code = read_int (r);
+
+ printf ("%08llx: machine integer info\n", offset);
+ if (size != 4 || count != 8)
+ sys_error (r, _("Bad size (%zu) or count (%zu) field on record type 7, "
+ "subtype 3."),
+ size, count);
+
+ printf ("\tVersion: %d.%d.%d\n",
+ version_major, version_minor, version_revision);
+ printf ("\tMachine code: %d\n", machine_code);
+ printf ("\tFloating point representation: %d (%s)\n",
+ float_representation,
+ float_representation == 1 ? "IEEE 754"
+ : float_representation == 2 ? "IBM 370"
+ : float_representation == 3 ? "DEC VAX"
+ : "unknown");
+ printf ("\tCompression code: %d\n", compression_code);
+ printf ("\tEndianness: %d (%s)\n", integer_representation,
+ integer_representation == 1 ? "big"
+ : integer_representation == 2 ? "little" : "unknown");
+ printf ("\tCharacter code: %d\n", character_code);
+}
+
+/* Read record type 7, subtype 4. */
+static void
+read_machine_float_info (struct sfm_reader *r, size_t size, size_t count)
+{
+ long long int offset = ftello (r->file);
+ double sysmis = read_float (r);
+ double highest = read_float (r);
+ double lowest = read_float (r);
+
+ printf ("%08llx: machine float info\n", offset);
+ if (size != 8 || count != 3)
+ sys_error (r, _("Bad size (%zu) or count (%zu) on extension 4."),
+ size, count);
+
+ printf ("\tsysmis: %g\n", sysmis);
+ if (sysmis != SYSMIS)
+ sys_warn (r, _("File specifies unexpected value %g as %s."),
+ sysmis, "SYSMIS");
+
+ printf ("\thighest: %g\n", highest);
+ if (highest != HIGHEST)
+ sys_warn (r, _("File specifies unexpected value %g as %s."),
+ highest, "HIGHEST");
+
+ printf ("\tlowest: %g\n", lowest);
+ if (lowest != LOWEST)
+ sys_warn (r, _("File specifies unexpected value %g as %s."),
+ lowest, "LOWEST");
+}
+
+/* Read record type 7, subtype 7. */
+static void
+read_mrsets (struct sfm_reader *r, size_t size, size_t count)
+{
+ struct text_record *text;
+
+ printf ("%08llx: multiple response sets\n",
+ (long long int) ftello (r->file));
+ text = open_text_record (r, size * count);
+ for (;;)
+ {
+ const char *name;
+ enum { MRSET_MC, MRSET_MD } type;
+ bool cat_label_from_counted_values = false;
+ bool label_from_var_label = false;
+ const char *counted;
+ const char *label;
+ const char *variables;
+
+ name = text_tokenize (text, '=');
+ if (name == NULL)
+ break;
+
+ if (text_match (text, 'C'))
+ {
+ type = MRSET_MC;
+ counted = NULL;
+ if (!text_match (text, ' '))
+ {
+ sys_warn (r, "missing space following 'C' at offset %zu "
+ "in mrsets record", text_pos (text));
+ break;
+ }
+ }
+ else if (text_match (text, 'D'))
+ {
+ type = MRSET_MD;
+ }
+ else if (text_match (text, 'E'))
+ {
+ char *number;
+
+ type = MRSET_MD;
+ cat_label_from_counted_values = true;
+
+ if (!text_match (text, ' '))
+ {
+ sys_warn (r, _("Missing space following `%c' at offset %zu "
+ "in MRSETS record"), 'E', text_pos (text));
+ break;
+ }
+
+ number = text_tokenize (text, ' ');
+ if (!strcmp (number, "11"))
+ label_from_var_label = true;
+ else if (strcmp (number, "1"))
+ sys_warn (r, _("Unexpected label source value `%s' "
+ "following `E' at offset %zu in MRSETS record"),
+ number, text_pos (text));
+
+ }
+ else
+ {
+ sys_warn (r, "missing `C', `D', or `E' at offset %zu "
+ "in mrsets record", text_pos (text));
+ break;
+ }
+
+ if (type == MRSET_MD)
+ {
+ counted = text_parse_counted_string (text);
+ if (counted == NULL)
+ break;
+ }
+
+ label = text_parse_counted_string (text);
+ if (label == NULL)
+ break;
+
+ variables = text_tokenize (text, '\n');
+ if (variables == NULL)
+ {
+ sys_warn (r, "missing variable names following label "
+ "at offset %zu in mrsets record", text_pos (text));
+ break;
+ }
+
+ printf ("\t\"%s\": multiple %s set",
+ name, type == MRSET_MC ? "category" : "dichotomy");
+ if (counted != NULL)
+ printf (", counted value \"%s\"", counted);
+ if (cat_label_from_counted_values)
+ printf (", category labels from counted values");
+ if (label[0] != '\0')
+ printf (", label \"%s\"", label);
+ if (label_from_var_label)
+ printf (", label from variable label");
+ printf(", variables \"%s\"\n", variables);
+ }
+ close_text_record (text);
+}
+
+/* Read record type 7, subtype 11. */
+static void
+read_display_parameters (struct sfm_reader *r, size_t size, size_t count)
+{
+ size_t n_vars;
+ bool includes_width;
+ size_t i;
+
+ printf ("%08llx: variable display parameters\n",
+ (long long int) ftello (r->file));
+ if (size != 4)
+ {
+ sys_warn (r, _("Bad size %zu on extension 11."), size);
+ skip_bytes (r, size * count);
+ return;
+ }
+
+ n_vars = r->n_variables;
+ if (count == 3 * n_vars)
+ includes_width = true;
+ else if (count == 2 * n_vars)
+ includes_width = false;
+ else
+ {
+ sys_warn (r, _("Extension 11 has bad count %zu (for %zu variables)."),
+ count, n_vars);
+ skip_bytes (r, size * count);
+ return;
+ }
+
+ for (i = 0; i < n_vars; ++i)
+ {
+ int measure = read_int (r);
+ int width = includes_width ? read_int (r) : 0;
+ int align = read_int (r);
+
+ printf ("\tVar #%zu: measure=%d (%s)", i, measure,
+ (measure == 1 ? "nominal"
+ : measure == 2 ? "ordinal"
+ : measure == 3 ? "scale"
+ : "invalid"));
+ if (includes_width)
+ printf (", width=%d", width);
+ printf (", align=%d (%s)\n", align,
+ (align == 0 ? "left"
+ : align == 1 ? "right"
+ : align == 2 ? "centre"
+ : "invalid"));
+ }
+}
+
+/* Reads record type 7, subtype 13, which gives the long name
+ that corresponds to each short name. */
+static void
+read_long_var_name_map (struct sfm_reader *r, size_t size, size_t count)
+{
+ struct text_record *text;
+ char *var;
+ char *long_name;
+
+ printf ("%08llx: long variable names (short => long)\n",
+ (long long int) ftello (r->file));
+ text = open_text_record (r, size * count);
+ while (read_variable_to_value_pair (text, &var, &long_name))
+ printf ("\t%s => %s\n", var, long_name);
+ close_text_record (text);
+}
+
+/* Reads record type 7, subtype 14, which gives the real length
+ of each very long string. Rearranges DICT accordingly. */
+static void
+read_long_string_map (struct sfm_reader *r, size_t size, size_t count)
+{
+ struct text_record *text;
+ char *var;
+ char *length_s;
+
+ printf ("%08llx: very long strings (variable => length)\n",
+ (long long int) ftello (r->file));
+ text = open_text_record (r, size * count);
+ while (read_variable_to_value_pair (text, &var, &length_s))
+ printf ("\t%s => %d\n", var, atoi (length_s));
+ close_text_record (text);
+}
+
+static bool
+read_attributes (struct sfm_reader *r, struct text_record *text,
+ const char *variable)
+{
+ const char *key;
+ int index;
+
+ for (;;)
+ {
+ key = text_tokenize (text, '(');
+ if (key == NULL)
+ return true;
+
+ for (index = 1; ; index++)
+ {
+ /* Parse the value. */
+ const char *value = text_tokenize (text, '\n');
+ if (value == NULL)
+ {
+ sys_warn (r, _("%s: Error parsing attribute value %s[%d]"),
+ variable, key, index);
+ return false;
+ }
+ if (strlen (value) < 2
+ || value[0] != '\'' || value[strlen (value) - 1] != '\'')
+ sys_warn (r, _("%s: Attribute value %s[%d] is not quoted: %s"),
+ variable, key, index, value);
+ else
+ printf ("\t%s: %s[%d] = \"%.*s\"\n",
+ variable, key, index, (int) strlen (value) - 2, value + 1);
+
+ /* Was this the last value for this attribute? */
+ if (text_match (text, ')'))
+ break;
+ }
+
+ if (text_match (text, '/'))
+ return true;
+ }
+}
+
+/* Read extended number of cases record. */
+static void
+read_ncases64 (struct sfm_reader *r, size_t size, size_t count)
+{
+ int64_t unknown, ncases64;
+
+ if (size != 8)
+ {
+ sys_warn (r, _("Bad size %zu for extended number of cases."), size);
+ skip_bytes (r, size * count);
+ return;
+ }
+ if (count != 2)
+ {
+ sys_warn (r, _("Bad count %zu for extended number of cases."), size);
+ skip_bytes (r, size * count);
+ return;
+ }
+ unknown = read_int64 (r);
+ ncases64 = read_int64 (r);
+ printf ("%08llx: extended number of cases: "
+ "unknown=%"PRId64", ncases64=%"PRId64"\n",
+ (long long int) ftello (r->file), unknown, ncases64);
+}
+
+static void
+read_datafile_attributes (struct sfm_reader *r, size_t size, size_t count)
+{
+ struct text_record *text;
+
+ printf ("%08llx: datafile attributes\n", (long long int) ftello (r->file));
+ text = open_text_record (r, size * count);
+ read_attributes (r, text, "datafile");
+ close_text_record (text);
+}
+
+static void
+read_character_encoding (struct sfm_reader *r, size_t size, size_t count)
+{
+ long long int posn = ftello (r->file);
+ char *encoding = xcalloc (size, count + 1);
+ read_string (r, encoding, count + 1);
+
+ printf ("%08llx: Character Encoding: %s\n", posn, encoding);
+}
+
+static void
+read_long_string_value_labels (struct sfm_reader *r, size_t size, size_t count)
+{
+ long long int start = ftello (r->file);
+
+ printf ("%08llx: long string value labels\n", start);
+ while (ftello (r->file) - start < size * count)
+ {
+ long long posn = ftello (r->file);
+ char var_name[ID_MAX_LEN + 1];
+ int var_name_len;
+ int n_values;
+ int width;
+ int i;
+
+ /* Read variable name. */
+ var_name_len = read_int (r);
+ if (var_name_len > ID_MAX_LEN)
+ sys_error (r, _("Variable name length in long string value label "
+ "record (%d) exceeds %d-byte limit."),
+ var_name_len, ID_MAX_LEN);
+ read_string (r, var_name, var_name_len + 1);
+
+ /* Read width, number of values. */
+ width = read_int (r);
+ n_values = read_int (r);
+
+ printf ("\t%08llx: %s, width %d, %d values\n",
+ posn, var_name, width, n_values);
+
+ /* Read values. */
+ for (i = 0; i < n_values; i++)
+ {
+ char *value;
+ int value_length;
+
+ char *label;
+ int label_length;
+
+ posn = ftello (r->file);
+
+ /* Read value. */
+ value_length = read_int (r);
+ value = xmalloc (value_length + 1);
+ read_string (r, value, value_length + 1);
+
+ /* Read label. */
+ label_length = read_int (r);
+ label = xmalloc (label_length + 1);
+ read_string (r, label, label_length + 1);
+
+ printf ("\t\t%08llx: \"%s\" (%d bytes) => \"%s\" (%d bytes)\n",
+ posn, value, value_length, label, label_length);
+
+ free (value);
+ free (label);
+ }
+ }
+}
+
+static void
+hex_dump (size_t offset, const void *buffer_, size_t buffer_size)
+{
+ const uint8_t *buffer = buffer_;
+
+ while (buffer_size > 0)
+ {
+ size_t n = MIN (buffer_size, 16);
+ size_t i;
+
+ printf ("%04zx", offset);
+ for (i = 0; i < 16; i++)
+ {
+ if (i < n)
+ printf ("%c%02x", i == 8 ? '-' : ' ', buffer[i]);
+ else
+ printf (" ");
+ }
+
+ printf (" |");
+ for (i = 0; i < 16; i++)
+ {
+ unsigned char c = i < n ? buffer[i] : ' ';
+ putchar (isprint (c) ? c : '.');
+ }
+ printf ("|\n");
+
+ offset += n;
+ buffer += n;
+ buffer_size -= n;
+ }
+}
+
+/* Reads and prints any type 7 record that we don't understand. */
+static void
+read_unknown_extension (struct sfm_reader *r, size_t size, size_t count)
+{
+ unsigned char *buffer;
+ size_t i;
+
+ if (size == 0 || count > 65536 / size)
+ skip_bytes (r, size * count);
+ else if (size != 1)
+ {
+ buffer = xmalloc (size);
+ for (i = 0; i < count; i++)
+ {
+ read_bytes (r, buffer, size);
+ hex_dump (i * size, buffer, size);
+ }
+ free (buffer);
+ }
+ else
+ {
+ buffer = xmalloc (count);
+ read_bytes (r, buffer, count);
+ if (memchr (buffer, 0, count) == 0)
+ for (i = 0; i < count; i++)
+ {
+ unsigned char c = buffer[i];
+
+ if (c == '\\')
+ printf ("\\\\");
+ else if (c == '\n' || isprint (c))
+ putchar (c);
+ else
+ printf ("\\%02x", c);
+ }
+ else
+ hex_dump (0, buffer, count);
+ free (buffer);
+ }
+}
+
+static void
+read_variable_attributes (struct sfm_reader *r, size_t size, size_t count)
+{
+ struct text_record *text;
+
+ printf ("%08llx: variable attributes\n", (long long int) ftello (r->file));
+ text = open_text_record (r, size * count);
+ for (;;)
+ {
+ const char *variable = text_tokenize (text, ':');
+ if (variable == NULL || !read_attributes (r, text, variable))
+ break;
+ }
+ close_text_record (text);
+}
+
+static void
+read_compressed_data (struct sfm_reader *r)
+{
+ enum { N_OPCODES = 8 };
+ uint8_t opcodes[N_OPCODES];
+ long long int opcode_ofs;
+ int opcode_idx;
+ int case_num;
+ int i;
+
+ read_int (r);
+ printf ("\n%08llx: compressed data:\n", (long long int) ftello (r->file));
+
+ opcode_idx = N_OPCODES;
+ opcode_ofs = 0;
+ case_num = 0;
+ for (case_num = 0; ; case_num++)
+ {
+ printf ("%08llx: case %d's uncompressible data begins\n",
+ (long long int) ftello (r->file), case_num);
+ for (i = 0; i < r->n_var_widths; )
+ {
+ int width = r->var_widths[i];
+ char raw_value[8];
+ int opcode;
+
+ if (opcode_idx >= N_OPCODES)
+ {
+ opcode_ofs = ftello (r->file);
+ if (i == 0)
+ {
+ if (!try_read_bytes (r, opcodes, 8))
+ return;
+ }
+ else
+ read_bytes (r, opcodes, 8);
+ opcode_idx = 0;
+ }
+ opcode = opcodes[opcode_idx];
+ printf ("%08llx: variable %d: opcode %d: ",
+ opcode_ofs + opcode_idx, i, opcode);
+
+ switch (opcode)
+ {
+ default:
+ printf ("%g", opcode - r->bias);
+ if (width != 0)
+ printf (", but this is a string variable (width=%d)", width);
+ printf ("\n");
+ i++;
+ break;
+
+ case 0:
+ printf ("ignored padding\n");
+ break;
+
+ case 252:
+ printf ("end of data\n");
+ return;
+
+ case 253:
+ read_bytes (r, raw_value, 8);
+ printf ("uncompressible data: ");
+ print_untyped_value (r, raw_value);
+ printf ("\n");
+ i++;
+ break;
+
+ case 254:
+ printf ("spaces");
+ if (width == 0)
+ printf (", but this is a numeric variable");
+ printf ("\n");
+ i++;
+ break;
+
+ case 255:
+ printf ("SYSMIS");
+ if (width != 0)
+ printf (", but this is a string variable (width=%d)", width);
+ printf ("\n");
+ i++;
+ break;
+ }
+
+ opcode_idx++;
+ }
+ }
+}
+\f
+/* Helpers for reading records that consist of structured text
+ strings. */
+
+/* State. */
+struct text_record
+ {
+ struct sfm_reader *reader; /* Reader. */
+ char *buffer; /* Record contents. */
+ size_t size; /* Size of buffer. */
+ size_t pos; /* Current position in buffer. */
+ };
+
+/* Reads SIZE bytes into a text record for R,
+ and returns the new text record. */
+static struct text_record *
+open_text_record (struct sfm_reader *r, size_t size)
+{
+ struct text_record *text = xmalloc (sizeof *text);
+ char *buffer = xmalloc (size + 1);
+ read_bytes (r, buffer, size);
+ buffer[size] = '\0';
+ text->reader = r;
+ text->buffer = buffer;
+ text->size = size;
+ text->pos = 0;
+ return text;
+}
+
+/* Closes TEXT and frees its storage.
+ Not really needed, because the pool will free the text record anyway,
+ but can be used to free it earlier. */
+static void
+close_text_record (struct text_record *text)
+{
+ free (text->buffer);
+ free (text);
+}
+
+static char *
+text_tokenize (struct text_record *text, int delimiter)
+{
+ size_t start = text->pos;
+ while (text->pos < text->size
+ && text->buffer[text->pos] != delimiter
+ && text->buffer[text->pos] != '\0')
+ text->pos++;
+ if (start == text->pos)
+ return NULL;
+ text->buffer[text->pos++] = '\0';
+ return &text->buffer[start];
+}
+
+static bool
+text_match (struct text_record *text, int c)
+{
+ if (text->pos < text->size && text->buffer[text->pos] == c)
+ {
+ text->pos++;
+ return true;
+ }
+ else
+ return false;
+}
+
+/* Reads a integer value expressed in decimal, then a space, then a string that
+ consists of exactly as many bytes as specified by the integer, then a space,
+ from TEXT. Returns the string, null-terminated, as a subset of TEXT's
+ buffer (so the caller should not free the string). */
+static const char *
+text_parse_counted_string (struct text_record *text)
+{
+ size_t start;
+ size_t n;
+ char *s;
+
+ start = text->pos;
+ n = 0;
+ while (isdigit ((unsigned char) text->buffer[text->pos]))
+ n = (n * 10) + (text->buffer[text->pos++] - '0');
+ if (start == text->pos)
+ {
+ sys_error (text->reader, "expecting digit at offset %zu in record",
+ text->pos);
+ return NULL;
+ }
+
+ if (!text_match (text, ' '))
+ {
+ sys_error (text->reader, "expecting space at offset %zu in record",
+ text->pos);
+ return NULL;
+ }
+
+ if (text->pos + n > text->size)
+ {
+ sys_error (text->reader, "%zu-byte string starting at offset %zu "
+ "exceeds record length %zu", n, text->pos, text->size);
+ return NULL;
+ }
+
+ s = &text->buffer[text->pos];
+ if (s[n] != ' ')
+ {
+ sys_error (text->reader, "expecting space at offset %zu following "
+ "%zu-byte string", text->pos + n, n);
+ return NULL;
+ }
+ s[n] = '\0';
+ text->pos += n + 1;
+ return s;
+}
+
+/* Reads a variable=value pair from TEXT.
+ Looks up the variable in DICT and stores it into *VAR.
+ Stores a null-terminated value into *VALUE. */
+static bool
+read_variable_to_value_pair (struct text_record *text,
+ char **key, char **value)
+{
+ *key = text_tokenize (text, '=');
+ *value = text_tokenize (text, '\t');
+ if (!*key || !*value)
+ return false;
+
+ while (text->pos < text->size
+ && (text->buffer[text->pos] == '\t'
+ || text->buffer[text->pos] == '\0'))
+ text->pos++;
+ return true;
+}
+
+/* Returns the current byte offset inside the TEXT's string. */
+static size_t
+text_pos (const struct text_record *text)
+{
+ return text->pos;
+}
+\f
+static void
+usage (int exit_code)
+{
+ printf ("usage: %s SYSFILE...\n"
+ "where each SYSFILE is the name of a system file\n",
+ program_name);
+ exit (exit_code);
+}
+
+/* Displays a corruption message. */
+static void
+sys_msg (struct sfm_reader *r, const char *format, va_list args)
+{
+ printf ("\"%s\" near offset 0x%llx: ",
+ r->file_name, (long long int) ftello (r->file));
+ vprintf (format, args);
+ putchar ('\n');
+}
+
+/* Displays a warning for the current file position. */
+static void
+sys_warn (struct sfm_reader *r, const char *format, ...)
+{
+ va_list args;
+
+ va_start (args, format);
+ sys_msg (r, format, args);
+ va_end (args);
+}
+
+/* Displays an error for the current file position,
+ marks it as in an error state,
+ and aborts reading it using longjmp. */
+static void
+sys_error (struct sfm_reader *r, const char *format, ...)
+{
+ va_list args;
+
+ va_start (args, format);
+ sys_msg (r, format, args);
+ va_end (args);
+
+ exit (EXIT_FAILURE);
+}
+\f
+/* Reads BYTE_CNT bytes into BUF.
+ Returns true if exactly BYTE_CNT bytes are successfully read.
+ Aborts if an I/O error or a partial read occurs.
+ If EOF_IS_OK, then an immediate end-of-file causes false to be
+ returned; otherwise, immediate end-of-file causes an abort
+ too. */
+static inline bool
+read_bytes_internal (struct sfm_reader *r, bool eof_is_ok,
+ void *buf, size_t byte_cnt)
+{
+ size_t bytes_read = fread (buf, 1, byte_cnt, r->file);
+ if (bytes_read == byte_cnt)
+ return true;
+ else if (ferror (r->file))
+ sys_error (r, _("System error: %s."), strerror (errno));
+ else if (!eof_is_ok || bytes_read != 0)
+ sys_error (r, _("Unexpected end of file."));
+ else
+ return false;
+}
+
+/* Reads BYTE_CNT into BUF.
+ Aborts upon I/O error or if end-of-file is encountered. */
+static void
+read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
+{
+ read_bytes_internal (r, false, buf, byte_cnt);
+}
+
+/* Reads BYTE_CNT bytes into BUF.
+ Returns true if exactly BYTE_CNT bytes are successfully read.
+ Returns false if an immediate end-of-file is encountered.
+ Aborts if an I/O error or a partial read occurs. */
+static bool
+try_read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
+{
+ return read_bytes_internal (r, true, buf, byte_cnt);
+}
+
+/* Reads a 32-bit signed integer from R and returns its value in
+ host format. */
+static int
+read_int (struct sfm_reader *r)
+{
+ uint8_t integer[4];
+ read_bytes (r, integer, sizeof integer);
+ return integer_get (r->integer_format, integer, sizeof integer);
+}
+
+/* Reads a 64-bit signed integer from R and returns its value in
+ host format. */
+static int64_t
+read_int64 (struct sfm_reader *r)
+{
+ uint8_t integer[8];
+ read_bytes (r, integer, sizeof integer);
+ return integer_get (r->integer_format, integer, sizeof integer);
+}
+
+/* Reads a 64-bit floating-point number from R and returns its
+ value in host format. */
+static double
+read_float (struct sfm_reader *r)
+{
+ uint8_t number[8];
+ read_bytes (r, number, sizeof number);
+ return float_get_double (r->float_format, number);
+}
+
+/* Reads exactly SIZE - 1 bytes into BUFFER
+ and stores a null byte into BUFFER[SIZE - 1]. */
+static void
+read_string (struct sfm_reader *r, char *buffer, size_t size)
+{
+ assert (size > 0);
+ read_bytes (r, buffer, size - 1);
+ buffer[size - 1] = '\0';
+}
+
+/* Skips BYTES bytes forward in R. */
+static void
+skip_bytes (struct sfm_reader *r, size_t bytes)
+{
+ while (bytes > 0)
+ {
+ char buffer[1024];
+ size_t chunk = MIN (sizeof buffer, bytes);
+ read_bytes (r, buffer, chunk);
+ bytes -= chunk;
+ }
+}
+
+static void
+trim_spaces (char *s)
+{
+ char *end = strchr (s, '\0');
+ while (end > s && end[-1] == ' ')
+ end--;
+ *end = '\0';
+}