/* PSPP - a program for statistical analysis.
- Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
+ Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include <ctype.h>
#include <errno.h>
+#include <float.h>
#include <getopt.h>
#include <inttypes.h>
#include <limits.h>
#include "gl/progname.h"
#include "gl/version-etc.h"
#include "gl/xalloc.h"
+#include "gl/xsize.h"
#define ID_MAX_LEN 64
+enum compression
+ {
+ COMP_NONE,
+ COMP_SIMPLE,
+ COMP_ZLIB
+ };
+
struct sfm_reader
{
const char *file_name;
enum integer_format integer_format;
enum float_format float_format;
- bool compressed;
+ enum compression compression;
double bias;
};
size_t size, size_t count);
static void read_unknown_extension (struct sfm_reader *,
size_t size, size_t count);
-static void read_compressed_data (struct sfm_reader *, int max_cases);
+static void read_simple_compressed_data (struct sfm_reader *, int max_cases);
+static void read_zlib_compressed_data (struct sfm_reader *);
static struct text_record *open_text_record (
- struct sfm_reader *, size_t size);
+ struct sfm_reader *, size_t size, size_t count);
static void close_text_record (struct text_record *);
static bool read_variable_to_value_pair (struct text_record *,
char **key, char **value);
r.n_var_widths = 0;
r.allocated_var_widths = 0;
r.var_widths = 0;
- r.compressed = false;
+ r.compression = COMP_NONE;
if (argc - optind > 1)
printf ("Reading \"%s\":\n", r.file_name);
-
+
read_header (&r);
while ((rec_type = read_int (&r)) != 999)
{
(long long int) ftello (r.file),
(long long int) ftello (r.file) + 4);
- if (r.compressed && max_cases > 0)
- read_compressed_data (&r, max_cases);
+ if (r.compression == COMP_SIMPLE)
+ {
+ if (max_cases > 0)
+ read_simple_compressed_data (&r, max_cases);
+ }
+ else if (r.compression == COMP_ZLIB)
+ read_zlib_compressed_data (&r);
+
+ free (r.var_widths);
fclose (r.file);
}
-
+
return 0;
}
char creation_date[10];
char creation_time[9];
char file_label[65];
+ bool zmagic;
read_string (r, rec_type, sizeof rec_type);
read_string (r, eye_catcher, sizeof eye_catcher);
- if (strcmp ("$FL2", rec_type) != 0)
+ if (!strcmp ("$FL2", rec_type))
+ zmagic = false;
+ else if (!strcmp ("$FL3", rec_type))
+ zmagic = true;
+ else
sys_error (r, "This is not an SPSS system file.");
/* Identify integer format. */
weight_index = read_int (r);
ncases = read_int (r);
- r->compressed = compressed != 0;
+ if (!zmagic)
+ {
+ if (compressed == 0)
+ r->compression = COMP_NONE;
+ else if (compressed == 1)
+ r->compression = COMP_SIMPLE;
+ else if (compressed != 0)
+ sys_error (r, "SAV file header has invalid compression value "
+ "%"PRId32".", compressed);
+ }
+ else
+ {
+ if (compressed == 2)
+ r->compression = COMP_ZLIB;
+ else
+ sys_error (r, "ZSAV file header has invalid compression value "
+ "%"PRId32".", compressed);
+ }
/* Identify floating-point format and obtain compression bias. */
read_bytes (r, raw_bias, sizeof raw_bias);
printf ("File header record:\n");
printf ("\t%17s: %s\n", "Product name", eye_catcher);
printf ("\t%17s: %"PRId32"\n", "Layout code", layout_code);
- printf ("\t%17s: %"PRId32"\n", "Compressed", compressed);
+ printf ("\t%17s: %"PRId32" (%s)\n", "Compressed",
+ compressed,
+ r->compression == COMP_NONE ? "no compression"
+ : r->compression == COMP_SIMPLE ? "simple compression"
+ : r->compression == COMP_ZLIB ? "ZLIB compression"
+ : "<error>");
printf ("\t%17s: %"PRId32"\n", "Weight index", weight_index);
printf ("\t%17s: %"PRId32"\n", "Number of cases", ncases);
- printf ("\t%17s: %g\n", "Compression bias", r->bias);
+ printf ("\t%17s: %.*g\n", "Compression bias", DBL_DIG + 1, r->bias);
printf ("\t%17s: %s\n", "Creation date", creation_date);
printf ("\t%17s: %s\n", "Creation time", creation_time);
printf ("\t%17s: \"%s\"\n", "File label", file_label);
case 37: return "CCE";
case 38: return "EDATE";
case 39: return "SDATE";
+ case 40: return "MTIME";
+ case 41: return "YMDHMS";
default: return "invalid";
}
}
char name[9];
printf ("%08llx: variable record #%d\n",
- (long long int) ftello (r->file), r->n_variable_records++);
+ (long long int) ftello (r->file), ++r->n_variable_records);
width = read_int (r);
has_variable_label = read_int (r);
if (has_variable_label == 1)
{
long long int offset = ftello (r->file);
- size_t len, read_len;
- char label[255 + 1];
-
- len = read_int (r);
+ enum { MAX_LABEL_LEN = 65536 };
- /* Read up to 255 bytes of label. */
- read_len = MIN (sizeof label - 1, len);
+ size_t len = read_int (r);
+ size_t read_len = MIN (MAX_LABEL_LEN, len);
+ char *label = xmalloc (read_len + 1);
read_string (r, label, read_len + 1);
printf("\t%08llx Variable label: \"%s\"\n", offset, label);
-
- /* Skip unread label bytes. */
- skip_bytes (r, len - read_len);
+ free (label);
/* Skip label padding up to multiple of 4 bytes. */
skip_bytes (r, ROUND_UP (len, 4) - len);
{
double low = read_float (r);
double high = read_float (r);
- printf (" %g...%g", low, high);
+ printf (" %.*g...%.*g", DBL_DIG + 1, low, DBL_DIG + 1, high);
missing_value_code = -missing_value_code - 2;
}
for (i = 0; i < missing_value_code; i++)
- printf (" %g", read_float (r));
+ printf (" %.*g", DBL_DIG + 1, read_float (r));
}
else if (width > 0)
{
if (!isprint (raw_value[n_printable]))
break;
- printf ("%g/\"%.*s\"", value, n_printable, raw_value);
+ printf ("%.*g/\"%.*s\"", DBL_DIG + 1, value, n_printable, raw_value);
}
/* Reads value labels from sysfile R and inserts them into the
static void
read_value_label_record (struct sfm_reader *r)
{
- int label_cnt, var_cnt;
+ int n_labels, n_vars;
int i;
printf ("%08llx: value labels record\n", (long long int) ftello (r->file));
/* Read number of labels. */
- label_cnt = read_int (r);
- for (i = 0; i < label_cnt; i++)
+ n_labels = read_int (r);
+ for (i = 0; i < n_labels; i++)
{
char raw_value[8];
unsigned char label_len;
/* Read number of variables associated with value label from type 4
record. */
printf ("\t%08llx: apply to variables", (long long int) ftello (r->file));
- var_cnt = read_int (r);
- for (i = 0; i < var_cnt; i++)
+ n_vars = read_int (r);
+ for (i = 0; i < n_vars; i++)
printf (" #%d", read_int (r));
putchar ('\n');
}
read_machine_float_info (r, size, count);
return;
- case 5:
- /* Variable sets information. We don't use these yet.
- They only apply to GUIs; see VARSETS on the APPLY
- DICTIONARY command in SPSS documentation. */
- break;
-
case 6:
/* DATE variable information. We don't use it yet, but we
should. */
sys_error (r, "Bad size (%zu) or count (%zu) on extension 4.",
size, count);
- printf ("\tsysmis: %g (%a)\n", sysmis, sysmis);
+ printf ("\tsysmis: %.*g (%a)\n", DBL_DIG + 1, sysmis, sysmis);
if (sysmis != SYSMIS)
- sys_warn (r, "File specifies unexpected value %g (%a) as %s.",
- sysmis, sysmis, "SYSMIS");
+ sys_warn (r, "File specifies unexpected value %.*g (%a) as %s.",
+ DBL_DIG + 1, sysmis, sysmis, "SYSMIS");
- printf ("\thighest: %g (%a)\n", highest, highest);
+ printf ("\thighest: %.*g (%a)\n", DBL_DIG + 1, highest, highest);
if (highest != HIGHEST)
- sys_warn (r, "File specifies unexpected value %g (%a) as %s.",
- highest, highest, "HIGHEST");
+ sys_warn (r, "File specifies unexpected value %.*g (%a) as %s.",
+ DBL_DIG + 1, highest, highest, "HIGHEST");
- printf ("\tlowest: %g (%a)\n", lowest, lowest);
+ printf ("\tlowest: %.*g (%a)\n", DBL_DIG + 1, lowest, lowest);
if (lowest != LOWEST && lowest != SYSMIS)
- sys_warn (r, "File specifies unexpected value %g (%a) as %s.",
- lowest, lowest, "LOWEST");
+ sys_warn (r, "File specifies unexpected value %.*g (%a) as %s.",
+ DBL_DIG + 1, lowest, lowest, "LOWEST");
}
static void
const char *s;
printf ("%08llx: extra product info\n", (long long int) ftello (r->file));
- text = open_text_record (r, size * count);
+ text = open_text_record (r, size, count);
s = text_get_all (text);
print_string (s, strlen (s));
close_text_record (text);
printf ("%08llx: multiple response sets\n",
(long long int) ftello (r->file));
- text = open_text_record (r, size * count);
+ text = open_text_record (r, size, count);
for (;;)
{
const char *name;
const char *label;
const char *variables;
+ while (text_match (text, '\n'))
+ continue;
+
name = text_tokenize (text, '=');
if (name == NULL)
break;
}
number = text_tokenize (text, ' ');
- if (!strcmp (number, "11"))
+ if (!number)
+ sys_warn (r, "Missing label source value "
+ "following `E' at offset %zu in MRSETS record",
+ text_pos (text));
+ else if (!strcmp (number, "11"))
label_from_var_label = true;
else if (strcmp (number, "1"))
sys_warn (r, "Unexpected label source value `%s' "
break;
variables = text_tokenize (text, '\n');
- if (variables == NULL)
- {
- sys_warn (r, "missing variable names following label "
- "at offset %zu in mrsets record", text_pos (text));
- break;
- }
printf ("\t\"%s\": multiple %s set",
name, type == MRSET_MC ? "category" : "dichotomy");
printf (", label \"%s\"", label);
if (label_from_var_label)
printf (", label from variable label");
- printf(", variables \"%s\"\n", variables);
+ if (variables != NULL)
+ printf(", variables \"%s\"\n", variables);
+ else
+ printf(", no variables\n");
}
close_text_record (text);
}
printf ("%08llx: long variable names (short => long)\n",
(long long int) ftello (r->file));
- text = open_text_record (r, size * count);
+ text = open_text_record (r, size, count);
while (read_variable_to_value_pair (text, &var, &long_name))
printf ("\t%s => %s\n", var, long_name);
close_text_record (text);
printf ("%08llx: very long strings (variable => length)\n",
(long long int) ftello (r->file));
- text = open_text_record (r, size * count);
+ text = open_text_record (r, size, count);
while (read_variable_to_value_pair (text, &var, &length_s))
printf ("\t%s => %d\n", var, atoi (length_s));
close_text_record (text);
const char *key;
int index;
- for (;;)
+ for (;;)
{
key = text_tokenize (text, '(');
if (key == NULL)
return true;
-
+
for (index = 1; ; index++)
{
/* Parse the value. */
const char *value = text_tokenize (text, '\n');
- if (value == NULL)
+ if (value == NULL)
{
sys_warn (r, "%s: Error parsing attribute value %s[%d]",
variable, key, index);
}
if (text_match (text, '/'))
- return true;
+ return true;
}
}
}
static void
-read_datafile_attributes (struct sfm_reader *r, size_t size, size_t count)
+read_datafile_attributes (struct sfm_reader *r, size_t size, size_t count)
{
struct text_record *text;
-
+
printf ("%08llx: datafile attributes\n", (long long int) ftello (r->file));
- text = open_text_record (r, size * count);
+ text = open_text_record (r, size, count);
read_attributes (r, text, "datafile");
close_text_record (text);
}
read_string (r, encoding, count + 1);
printf ("%08llx: Character Encoding: %s\n", posn, encoding);
+
+ free (encoding);
}
static void
}
static void
-read_variable_attributes (struct sfm_reader *r, size_t size, size_t count)
+read_variable_attributes (struct sfm_reader *r, size_t size, size_t count)
{
struct text_record *text;
-
+
printf ("%08llx: variable attributes\n", (long long int) ftello (r->file));
- text = open_text_record (r, size * count);
- for (;;)
+ text = open_text_record (r, size, count);
+ for (;;)
{
const char *variable = text_tokenize (text, ':');
if (variable == NULL || !read_attributes (r, text, variable))
- break;
+ break;
}
close_text_record (text);
}
static void
-read_compressed_data (struct sfm_reader *r, int max_cases)
+read_simple_compressed_data (struct sfm_reader *r, int max_cases)
{
enum { N_OPCODES = 8 };
uint8_t opcodes[N_OPCODES];
{
printf ("%08llx: case %d's uncompressible data begins\n",
(long long int) ftello (r->file), case_num);
- for (i = 0; i < r->n_var_widths; )
+ for (i = 0; i < r->n_var_widths;)
{
int width = r->var_widths[i];
char raw_value[8];
switch (opcode)
{
default:
- printf ("%g", opcode - r->bias);
+ printf ("%.*g", DBL_DIG + 1, opcode - r->bias);
if (width != 0)
printf (", but this is a string variable (width=%d)", width);
printf ("\n");
}
}
}
+
+static void
+read_zlib_compressed_data (struct sfm_reader *r)
+{
+ long long int ofs;
+ long long int this_ofs, next_ofs, next_len;
+ long long int bias, zero;
+ long long int expected_uncmp_ofs, expected_cmp_ofs;
+ unsigned int block_size, n_blocks;
+ unsigned int i;
+
+ read_int (r);
+ ofs = ftello (r->file);
+ printf ("\n%08llx: ZLIB compressed data header:\n", ofs);
+
+ this_ofs = read_int64 (r);
+ next_ofs = read_int64 (r);
+ next_len = read_int64 (r);
+
+ printf ("\tzheader_ofs: 0x%llx\n", this_ofs);
+ if (this_ofs != ofs)
+ printf ("\t\t(Expected 0x%llx.)\n", ofs);
+ printf ("\tztrailer_ofs: 0x%llx\n", next_ofs);
+ printf ("\tztrailer_len: %lld\n", next_len);
+ if (next_len < 24 || next_len % 24)
+ printf ("\t\t(Trailer length is not a positive multiple of 24.)\n");
+
+ printf ("\n%08llx: 0x%llx bytes of ZLIB compressed data\n",
+ ofs + 8 * 3, next_ofs - (ofs + 8 * 3));
+
+ skip_bytes (r, next_ofs - (ofs + 8 * 3));
+
+ printf ("\n%08llx: ZLIB trailer fixed header:\n", next_ofs);
+ bias = read_int64 (r);
+ zero = read_int64 (r);
+ block_size = read_int (r);
+ n_blocks = read_int (r);
+ printf ("\tbias: %lld\n", bias);
+ printf ("\tzero: 0x%llx\n", zero);
+ if (zero != 0)
+ printf ("\t\t(Expected 0.)\n");
+ printf ("\tblock_size: 0x%x\n", block_size);
+ if (block_size != 0x3ff000)
+ printf ("\t\t(Expected 0x3ff000.)\n");
+ printf ("\tn_blocks: %u\n", n_blocks);
+ if (n_blocks != next_len / 24 - 1)
+ printf ("\t\t(Expected %llu.)\n", next_len / 24 - 1);
+
+ expected_uncmp_ofs = ofs;
+ expected_cmp_ofs = ofs + 24;
+ for (i = 0; i < n_blocks; i++)
+ {
+ long long int blockinfo_ofs = ftello (r->file);
+ unsigned long long int uncompressed_ofs = read_int64 (r);
+ unsigned long long int compressed_ofs = read_int64 (r);
+ unsigned int uncompressed_size = read_int (r);
+ unsigned int compressed_size = read_int (r);
+
+ printf ("\n%08llx: ZLIB block descriptor %d\n", blockinfo_ofs, i + 1);
+
+ printf ("\tuncompressed_ofs: 0x%llx\n", uncompressed_ofs);
+ if (uncompressed_ofs != expected_uncmp_ofs)
+ printf ("\t\t(Expected 0x%llx.)\n", ofs);
+
+ printf ("\tcompressed_ofs: 0x%llx\n", compressed_ofs);
+ if (compressed_ofs != expected_cmp_ofs)
+ printf ("\t\t(Expected 0x%llx.)\n", ofs + 24);
+
+ printf ("\tuncompressed_size: 0x%x\n", uncompressed_size);
+ if (i < n_blocks - 1 && uncompressed_size != block_size)
+ printf ("\t\t(Expected 0x%x.)\n", block_size);
+
+ printf ("\tcompressed_size: 0x%x\n", compressed_size);
+ if (i == n_blocks - 1 && compressed_ofs + compressed_size != next_ofs)
+ printf ("\t\t(This was expected to be 0x%llx.)\n",
+ next_ofs - compressed_size);
+
+ expected_uncmp_ofs += uncompressed_size;
+ expected_cmp_ofs += compressed_size;
+ }
+}
\f
/* Helpers for reading records that consist of structured text
strings. */
size_t pos; /* Current position in buffer. */
};
-/* Reads SIZE bytes into a text record for R,
+/* Reads SIZE * COUNT bytes into a text record for R,
and returns the new text record. */
static struct text_record *
-open_text_record (struct sfm_reader *r, size_t size)
+open_text_record (struct sfm_reader *r, size_t size, size_t count)
{
struct text_record *text = xmalloc (sizeof *text);
- char *buffer = xmalloc (size + 1);
- read_bytes (r, buffer, size);
- buffer[size] = '\0';
+
+ if (size_overflow_p (xsum (1, xtimes (size, count))))
+ sys_error (r, "Extension record too large.");
+
+ size_t n_bytes = size * count;
+ char *buffer = xmalloc (n_bytes + 1);
+ read_bytes (r, buffer, n_bytes);
+ buffer[n_bytes] = '\0';
text->reader = r;
text->buffer = buffer;
- text->size = size;
+ text->size = n_bytes;
text->pos = 0;
return text;
}
}
static bool
-text_match (struct text_record *text, int c)
+text_match (struct text_record *text, int c)
{
- if (text->pos < text->size && text->buffer[text->pos] == c)
+ if (text->pos < text->size && text->buffer[text->pos] == c)
{
text->pos++;
return true;
too. */
static inline bool
read_bytes_internal (struct sfm_reader *r, bool eof_is_ok,
- void *buf, size_t byte_cnt)
+ void *buf, size_t n_bytes)
{
- size_t bytes_read = fread (buf, 1, byte_cnt, r->file);
- if (bytes_read == byte_cnt)
+ size_t bytes_read = fread (buf, 1, n_bytes, r->file);
+ if (bytes_read == n_bytes)
return true;
else if (ferror (r->file))
sys_error (r, "System error: %s.", strerror (errno));
/* Reads BYTE_CNT into BUF.
Aborts upon I/O error or if end-of-file is encountered. */
static void
-read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
+read_bytes (struct sfm_reader *r, void *buf, size_t n_bytes)
{
- read_bytes_internal (r, false, buf, byte_cnt);
+ read_bytes_internal (r, false, buf, n_bytes);
}
/* Reads BYTE_CNT bytes into BUF.
Returns false if an immediate end-of-file is encountered.
Aborts if an I/O error or a partial read occurs. */
static bool
-try_read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
+try_read_bytes (struct sfm_reader *r, void *buf, size_t n_bytes)
{
- return read_bytes_internal (r, true, buf, byte_cnt);
+ return read_bytes_internal (r, true, buf, n_bytes);
}
/* Reads a 32-bit signed integer from R and returns its value in