/* PSPP - a program for statistical analysis.
- Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
+ Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include <ctype.h>
#include <errno.h>
+#include <float.h>
#include <getopt.h>
#include <inttypes.h>
#include <limits.h>
#define ID_MAX_LEN 64
+enum compression
+ {
+ COMP_NONE,
+ COMP_SIMPLE,
+ COMP_ZLIB
+ };
+
struct sfm_reader
{
const char *file_name;
enum integer_format integer_format;
enum float_format float_format;
- bool compressed;
+ enum compression compression;
double bias;
};
size_t size, size_t count);
static void read_unknown_extension (struct sfm_reader *,
size_t size, size_t count);
-static void read_compressed_data (struct sfm_reader *, int max_cases);
+static void read_simple_compressed_data (struct sfm_reader *, int max_cases);
+static void read_zlib_compressed_data (struct sfm_reader *);
static struct text_record *open_text_record (
struct sfm_reader *, size_t size);
r.n_var_widths = 0;
r.allocated_var_widths = 0;
r.var_widths = 0;
- r.compressed = false;
+ r.compression = COMP_NONE;
if (argc - optind > 1)
printf ("Reading \"%s\":\n", r.file_name);
(long long int) ftello (r.file),
(long long int) ftello (r.file) + 4);
- if (r.compressed && max_cases > 0)
- read_compressed_data (&r, max_cases);
+ if (r.compression == COMP_SIMPLE)
+ {
+ if (max_cases > 0)
+ read_simple_compressed_data (&r, max_cases);
+ }
+ else if (r.compression == COMP_ZLIB)
+ read_zlib_compressed_data (&r);
fclose (r.file);
}
char creation_date[10];
char creation_time[9];
char file_label[65];
+ bool zmagic;
read_string (r, rec_type, sizeof rec_type);
read_string (r, eye_catcher, sizeof eye_catcher);
- if (strcmp ("$FL2", rec_type) != 0)
+ if (!strcmp ("$FL2", rec_type))
+ zmagic = false;
+ else if (!strcmp ("$FL3", rec_type))
+ zmagic = true;
+ else
sys_error (r, "This is not an SPSS system file.");
/* Identify integer format. */
weight_index = read_int (r);
ncases = read_int (r);
- r->compressed = compressed != 0;
+ if (!zmagic)
+ {
+ if (compressed == 0)
+ r->compression = COMP_NONE;
+ else if (compressed == 1)
+ r->compression = COMP_SIMPLE;
+ else if (compressed != 0)
+ sys_error (r, "SAV file header has invalid compression value "
+ "%"PRId32".", compressed);
+ }
+ else
+ {
+ if (compressed == 2)
+ r->compression = COMP_ZLIB;
+ else
+ sys_error (r, "ZSAV file header has invalid compression value "
+ "%"PRId32".", compressed);
+ }
/* Identify floating-point format and obtain compression bias. */
read_bytes (r, raw_bias, sizeof raw_bias);
printf ("File header record:\n");
printf ("\t%17s: %s\n", "Product name", eye_catcher);
printf ("\t%17s: %"PRId32"\n", "Layout code", layout_code);
- printf ("\t%17s: %"PRId32"\n", "Compressed", compressed);
+ printf ("\t%17s: %"PRId32" (%s)\n", "Compressed",
+ compressed,
+ r->compression == COMP_NONE ? "no compression"
+ : r->compression == COMP_SIMPLE ? "simple compression"
+ : r->compression == COMP_ZLIB ? "ZLIB compression"
+ : "<error>");
printf ("\t%17s: %"PRId32"\n", "Weight index", weight_index);
printf ("\t%17s: %"PRId32"\n", "Number of cases", ncases);
- printf ("\t%17s: %g\n", "Compression bias", r->bias);
+ printf ("\t%17s: %.*g\n", "Compression bias", DBL_DIG + 1, r->bias);
printf ("\t%17s: %s\n", "Creation date", creation_date);
printf ("\t%17s: %s\n", "Creation time", creation_time);
printf ("\t%17s: \"%s\"\n", "File label", file_label);
if (has_variable_label == 1)
{
long long int offset = ftello (r->file);
- size_t len, read_len;
- char label[255 + 1];
+ size_t len;
+ char *label;
len = read_int (r);
/* Read up to 255 bytes of label. */
- read_len = MIN (sizeof label - 1, len);
- read_string (r, label, read_len + 1);
+ label = xmalloc (len + 1);
+ read_string (r, label, len + 1);
printf("\t%08llx Variable label: \"%s\"\n", offset, label);
-
- /* Skip unread label bytes. */
- skip_bytes (r, len - read_len);
+ free (label);
/* Skip label padding up to multiple of 4 bytes. */
skip_bytes (r, ROUND_UP (len, 4) - len);
{
double low = read_float (r);
double high = read_float (r);
- printf (" %g...%g", low, high);
+ printf (" %.*g...%.*g", DBL_DIG + 1, low, DBL_DIG + 1, high);
missing_value_code = -missing_value_code - 2;
}
for (i = 0; i < missing_value_code; i++)
- printf (" %g", read_float (r));
+ printf (" %.*g", DBL_DIG + 1, read_float (r));
}
else if (width > 0)
{
if (!isprint (raw_value[n_printable]))
break;
- printf ("%g/\"%.*s\"", value, n_printable, raw_value);
+ printf ("%.*g/\"%.*s\"", DBL_DIG + 1, value, n_printable, raw_value);
}
/* Reads value labels from sysfile R and inserts them into the
read_machine_float_info (r, size, count);
return;
- case 5:
- /* Variable sets information. We don't use these yet.
- They only apply to GUIs; see VARSETS on the APPLY
- DICTIONARY command in SPSS documentation. */
- break;
-
case 6:
/* DATE variable information. We don't use it yet, but we
should. */
sys_error (r, "Bad size (%zu) or count (%zu) on extension 4.",
size, count);
- printf ("\tsysmis: %g (%a)\n", sysmis, sysmis);
+ printf ("\tsysmis: %.*g (%a)\n", DBL_DIG + 1, sysmis, sysmis);
if (sysmis != SYSMIS)
- sys_warn (r, "File specifies unexpected value %g (%a) as %s.",
- sysmis, sysmis, "SYSMIS");
+ sys_warn (r, "File specifies unexpected value %.*g (%a) as %s.",
+ DBL_DIG + 1, sysmis, sysmis, "SYSMIS");
- printf ("\thighest: %g (%a)\n", highest, highest);
+ printf ("\thighest: %.*g (%a)\n", DBL_DIG + 1, highest, highest);
if (highest != HIGHEST)
- sys_warn (r, "File specifies unexpected value %g (%a) as %s.",
- highest, highest, "HIGHEST");
+ sys_warn (r, "File specifies unexpected value %.*g (%a) as %s.",
+ DBL_DIG + 1, highest, highest, "HIGHEST");
- printf ("\tlowest: %g (%a)\n", lowest, lowest);
+ printf ("\tlowest: %.*g (%a)\n", DBL_DIG + 1, lowest, lowest);
if (lowest != LOWEST && lowest != SYSMIS)
- sys_warn (r, "File specifies unexpected value %g (%a) as %s.",
- lowest, lowest, "LOWEST");
+ sys_warn (r, "File specifies unexpected value %.*g (%a) as %s.",
+ DBL_DIG + 1, lowest, lowest, "LOWEST");
}
static void
const char *label;
const char *variables;
+ while (text_match (text, '\n'))
+ continue;
+
name = text_tokenize (text, '=');
if (name == NULL)
break;
break;
variables = text_tokenize (text, '\n');
- if (variables == NULL)
- {
- sys_warn (r, "missing variable names following label "
- "at offset %zu in mrsets record", text_pos (text));
- break;
- }
printf ("\t\"%s\": multiple %s set",
name, type == MRSET_MC ? "category" : "dichotomy");
printf (", label \"%s\"", label);
if (label_from_var_label)
printf (", label from variable label");
- printf(", variables \"%s\"\n", variables);
+ if (variables != NULL)
+ printf(", variables \"%s\"\n", variables);
+ else
+ printf(", no variables\n");
}
close_text_record (text);
}
}
static void
-read_compressed_data (struct sfm_reader *r, int max_cases)
+read_simple_compressed_data (struct sfm_reader *r, int max_cases)
{
enum { N_OPCODES = 8 };
uint8_t opcodes[N_OPCODES];
switch (opcode)
{
default:
- printf ("%g", opcode - r->bias);
+ printf ("%.*g", DBL_DIG + 1, opcode - r->bias);
if (width != 0)
printf (", but this is a string variable (width=%d)", width);
printf ("\n");
}
}
}
+
+static void
+read_zlib_compressed_data (struct sfm_reader *r)
+{
+ long long int ofs;
+ long long int this_ofs, next_ofs, next_len;
+ long long int bias, zero;
+ long long int expected_uncmp_ofs, expected_cmp_ofs;
+ unsigned int block_size, n_blocks;
+ unsigned int i;
+
+ read_int (r);
+ ofs = ftello (r->file);
+ printf ("\n%08llx: ZLIB compressed data header:\n", ofs);
+
+ this_ofs = read_int64 (r);
+ next_ofs = read_int64 (r);
+ next_len = read_int64 (r);
+
+ printf ("\tzheader_ofs: 0x%llx\n", this_ofs);
+ if (this_ofs != ofs)
+ printf ("\t\t(Expected 0x%llx.)\n", ofs);
+ printf ("\tztrailer_ofs: 0x%llx\n", next_ofs);
+ printf ("\tztrailer_len: %lld\n", next_len);
+ if (next_len < 24 || next_len % 24)
+ printf ("\t\t(Trailer length is not a positive multiple of 24.)\n");
+
+ printf ("\n%08llx: 0x%llx bytes of ZLIB compressed data\n",
+ ofs + 8 * 3, next_ofs - (ofs + 8 * 3));
+
+ skip_bytes (r, next_ofs - (ofs + 8 * 3));
+
+ printf ("\n%08llx: ZLIB trailer fixed header:\n", next_ofs);
+ bias = read_int64 (r);
+ zero = read_int64 (r);
+ block_size = read_int (r);
+ n_blocks = read_int (r);
+ printf ("\tbias: %lld\n", bias);
+ printf ("\tzero: 0x%llx\n", zero);
+ if (zero != 0)
+ printf ("\t\t(Expected 0.)\n");
+ printf ("\tblock_size: 0x%x\n", block_size);
+ if (block_size != 0x3ff000)
+ printf ("\t\t(Expected 0x3ff000.)\n");
+ printf ("\tn_blocks: %u\n", n_blocks);
+ if (n_blocks != next_len / 24 - 1)
+ printf ("\t\t(Expected %llu.)\n", next_len / 24 - 1);
+
+ expected_uncmp_ofs = ofs;
+ expected_cmp_ofs = ofs + 24;
+ for (i = 0; i < n_blocks; i++)
+ {
+ long long int blockinfo_ofs = ftello (r->file);
+ unsigned long long int uncompressed_ofs = read_int64 (r);
+ unsigned long long int compressed_ofs = read_int64 (r);
+ unsigned int uncompressed_size = read_int (r);
+ unsigned int compressed_size = read_int (r);
+
+ printf ("\n%08llx: ZLIB block descriptor %d\n", blockinfo_ofs, i + 1);
+
+ printf ("\tuncompressed_ofs: 0x%llx\n", uncompressed_ofs);
+ if (uncompressed_ofs != expected_uncmp_ofs)
+ printf ("\t\t(Expected 0x%llx.)\n", ofs);
+
+ printf ("\tcompressed_ofs: 0x%llx\n", compressed_ofs);
+ if (compressed_ofs != expected_cmp_ofs)
+ printf ("\t\t(Expected 0x%llx.)\n", ofs + 24);
+
+ printf ("\tuncompressed_size: 0x%x\n", uncompressed_size);
+ if (i < n_blocks - 1 && uncompressed_size != block_size)
+ printf ("\t\t(Expected 0x%x.)\n", block_size);
+
+ printf ("\tcompressed_size: 0x%x\n", compressed_size);
+ if (i == n_blocks - 1 && compressed_ofs + compressed_size != next_ofs)
+ printf ("\t\t(This was expected to be 0x%llx.)\n",
+ next_ofs - compressed_size);
+
+ expected_uncmp_ofs += uncompressed_size;
+ expected_cmp_ofs += compressed_size;
+ }
+}
\f
/* Helpers for reading records that consist of structured text
strings. */