/* PSPP - a program for statistical analysis.
- Copyright (C) 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+ Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include <errno.h>
#include <getopt.h>
#include <inttypes.h>
+#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#define ID_MAX_LEN 64
+enum compression
+ {
+ COMP_NONE,
+ COMP_SIMPLE,
+ COMP_ZLIB
+ };
+
struct sfm_reader
{
const char *file_name;
enum integer_format integer_format;
enum float_format float_format;
- bool compressed;
+ enum compression compression;
double bias;
};
size_t size, size_t count);
static void read_machine_float_info (struct sfm_reader *,
size_t size, size_t count);
+static void read_extra_product_info (struct sfm_reader *,
+ size_t size, size_t count);
static void read_mrsets (struct sfm_reader *, size_t size, size_t count);
static void read_display_parameters (struct sfm_reader *,
size_t size, size_t count);
size_t size, size_t count);
static void read_long_string_value_labels (struct sfm_reader *r,
size_t size, size_t count);
+static void read_long_string_missing_values (struct sfm_reader *r,
+ size_t size, size_t count);
static void read_unknown_extension (struct sfm_reader *,
size_t size, size_t count);
-static void read_compressed_data (struct sfm_reader *, int max_cases);
+static void read_simple_compressed_data (struct sfm_reader *, int max_cases);
+static void read_zlib_compressed_data (struct sfm_reader *);
static struct text_record *open_text_record (
struct sfm_reader *, size_t size);
static bool text_match (struct text_record *text, int c);
static const char *text_parse_counted_string (struct text_record *);
static size_t text_pos (const struct text_record *);
+static const char *text_get_all (const struct text_record *);
static void usage (void);
static void sys_warn (struct sfm_reader *, const char *, ...)
static void skip_bytes (struct sfm_reader *, size_t);
static void trim_spaces (char *);
+static void print_string (const char *s, size_t len);
+
int
main (int argc, char *argv[])
{
r.n_var_widths = 0;
r.allocated_var_widths = 0;
r.var_widths = 0;
- r.compressed = false;
+ r.compression = COMP_NONE;
if (argc - optind > 1)
printf ("Reading \"%s\":\n", r.file_name);
(long long int) ftello (r.file),
(long long int) ftello (r.file) + 4);
- if (r.compressed && max_cases > 0)
- read_compressed_data (&r, max_cases);
+ if (r.compression == COMP_SIMPLE)
+ {
+ if (max_cases > 0)
+ read_simple_compressed_data (&r, max_cases);
+ }
+ else if (r.compression == COMP_ZLIB)
+ read_zlib_compressed_data (&r);
fclose (r.file);
}
char eye_catcher[61];
uint8_t raw_layout_code[4];
int32_t layout_code;
- int32_t nominal_case_size;
int32_t compressed;
int32_t weight_index;
int32_t ncases;
char creation_date[10];
char creation_time[9];
char file_label[65];
+ bool zmagic;
read_string (r, rec_type, sizeof rec_type);
read_string (r, eye_catcher, sizeof eye_catcher);
- if (strcmp ("$FL2", rec_type) != 0)
+ if (!strcmp ("$FL2", rec_type))
+ zmagic = false;
+ else if (!strcmp ("$FL3", rec_type))
+ zmagic = true;
+ else
sys_error (r, "This is not an SPSS system file.");
/* Identify integer format. */
layout_code = integer_get (r->integer_format,
raw_layout_code, sizeof raw_layout_code);
- nominal_case_size = read_int (r);
+ read_int (r); /* Nominal case size (not actually useful). */
compressed = read_int (r);
weight_index = read_int (r);
ncases = read_int (r);
- r->compressed = compressed != 0;
+ if (!zmagic)
+ {
+ if (compressed == 0)
+ r->compression = COMP_NONE;
+ else if (compressed == 1)
+ r->compression = COMP_SIMPLE;
+ else if (compressed != 0)
+ sys_error (r, "SAV file header has invalid compression value "
+ "%"PRId32".", compressed);
+ }
+ else
+ {
+ if (compressed == 2)
+ r->compression = COMP_ZLIB;
+ else
+ sys_error (r, "ZSAV file header has invalid compression value "
+ "%"PRId32".", compressed);
+ }
/* Identify floating-point format and obtain compression bias. */
read_bytes (r, raw_bias, sizeof raw_bias);
printf ("File header record:\n");
printf ("\t%17s: %s\n", "Product name", eye_catcher);
printf ("\t%17s: %"PRId32"\n", "Layout code", layout_code);
- printf ("\t%17s: %"PRId32"\n", "Compressed", compressed);
+ printf ("\t%17s: %"PRId32" (%s)\n", "Compressed",
+ compressed,
+ r->compression == COMP_NONE ? "no compression"
+ : r->compression == COMP_SIMPLE ? "simple compression"
+ : r->compression == COMP_ZLIB ? "ZLIB compression"
+ : "<error>");
printf ("\t%17s: %"PRId32"\n", "Weight index", weight_index);
printf ("\t%17s: %"PRId32"\n", "Number of cases", ncases);
printf ("\t%17s: %g\n", "Compression bias", r->bias);
double value;
value = float_get_double (r->float_format, raw_value);
- for (n_printable = 0; n_printable < sizeof raw_value; n_printable++)
+ for (n_printable = 0; n_printable < 8; n_printable++)
if (!isprint (raw_value[n_printable]))
break;
read_mrsets (r, size, count);
return;
+ case 10:
+ read_extra_product_info (r, size, count);
+ return;
+
case 11:
read_display_parameters (r, size, count);
return;
read_long_string_value_labels (r, size, count);
return;
+ case 22:
+ read_long_string_missing_values (r, size, count);
+ return;
+
default:
sys_warn (r, "Unrecognized record type 7, subtype %d.", subtype);
read_unknown_extension (r, size, count);
sys_error (r, "Bad size (%zu) or count (%zu) on extension 4.",
size, count);
- printf ("\tsysmis: %g\n", sysmis);
+ printf ("\tsysmis: %g (%a)\n", sysmis, sysmis);
if (sysmis != SYSMIS)
- sys_warn (r, "File specifies unexpected value %g as %s.",
- sysmis, "SYSMIS");
+ sys_warn (r, "File specifies unexpected value %g (%a) as %s.",
+ sysmis, sysmis, "SYSMIS");
- printf ("\thighest: %g\n", highest);
+ printf ("\thighest: %g (%a)\n", highest, highest);
if (highest != HIGHEST)
- sys_warn (r, "File specifies unexpected value %g as %s.",
- highest, "HIGHEST");
+ sys_warn (r, "File specifies unexpected value %g (%a) as %s.",
+ highest, highest, "HIGHEST");
- printf ("\tlowest: %g\n", lowest);
- if (lowest != LOWEST)
- sys_warn (r, "File specifies unexpected value %g as %s.",
- lowest, "LOWEST");
+ printf ("\tlowest: %g (%a)\n", lowest, lowest);
+ if (lowest != LOWEST && lowest != SYSMIS)
+ sys_warn (r, "File specifies unexpected value %g (%a) as %s.",
+ lowest, lowest, "LOWEST");
+}
+
+static void
+read_extra_product_info (struct sfm_reader *r,
+ size_t size, size_t count)
+{
+ struct text_record *text;
+ const char *s;
+
+ printf ("%08llx: extra product info\n", (long long int) ftello (r->file));
+ text = open_text_record (r, size * count);
+ s = text_get_all (text);
+ print_string (s, strlen (s));
+ close_text_record (text);
}
/* Read record type 7, subtype 7. */
}
}
+static void
+read_long_string_missing_values (struct sfm_reader *r,
+ size_t size, size_t count)
+{
+ long long int start = ftello (r->file);
+
+ printf ("%08llx: long string missing values\n", start);
+ while (ftello (r->file) - start < size * count)
+ {
+ long long posn = ftello (r->file);
+ char var_name[ID_MAX_LEN + 1];
+ uint8_t n_missing_values;
+ int var_name_len;
+ int i;
+
+ /* Read variable name. */
+ var_name_len = read_int (r);
+ if (var_name_len > ID_MAX_LEN)
+ sys_error (r, "Variable name length in long string value label "
+ "record (%d) exceeds %d-byte limit.",
+ var_name_len, ID_MAX_LEN);
+ read_string (r, var_name, var_name_len + 1);
+
+ /* Read number of values. */
+ read_bytes (r, &n_missing_values, 1);
+
+ printf ("\t%08llx: %s, %d missing values:",
+ posn, var_name, n_missing_values);
+
+ /* Read values. */
+ for (i = 0; i < n_missing_values; i++)
+ {
+ char *value;
+ int value_length;
+
+ posn = ftello (r->file);
+
+ /* Read value. */
+ value_length = read_int (r);
+ value = xmalloc (value_length + 1);
+ read_string (r, value, value_length + 1);
+
+ printf (" \"%s\"", value);
+
+ free (value);
+ }
+ printf ("\n");
+ }
+}
+
static void
hex_dump (size_t offset, const void *buffer_, size_t buffer_size)
{
{
buffer = xmalloc (count);
read_bytes (r, buffer, count);
- if (memchr (buffer, 0, count) == 0)
- for (i = 0; i < count; i++)
- {
- unsigned char c = buffer[i];
-
- if (c == '\\')
- printf ("\\\\");
- else if (c == '\n' || isprint (c))
- putchar (c);
- else
- printf ("\\%02x", c);
- }
- else
- hex_dump (0, buffer, count);
+ print_string (CHAR_CAST (char *, buffer), count);
free (buffer);
}
}
}
static void
-read_compressed_data (struct sfm_reader *r, int max_cases)
+read_simple_compressed_data (struct sfm_reader *r, int max_cases)
{
enum { N_OPCODES = 8 };
uint8_t opcodes[N_OPCODES];
}
}
}
+
+static void
+read_zlib_compressed_data (struct sfm_reader *r)
+{
+ long long int ofs;
+ long long int this_ofs, next_ofs, next_len;
+ long long int bias, zero;
+ long long int expected_uncmp_ofs, expected_cmp_ofs;
+ unsigned int block_size, n_blocks;
+ unsigned int i;
+
+ read_int (r);
+ ofs = ftello (r->file);
+ printf ("\n%08llx: ZLIB compressed data header:\n", ofs);
+
+ this_ofs = read_int64 (r);
+ next_ofs = read_int64 (r);
+ next_len = read_int64 (r);
+
+ printf ("\tzheader_ofs: 0x%llx\n", this_ofs);
+ if (this_ofs != ofs)
+ printf ("\t\t(Expected 0x%llx.)\n", ofs);
+ printf ("\tztrailer_ofs: 0x%llx\n", next_ofs);
+ printf ("\tztrailer_len: %lld\n", next_len);
+ if (next_len < 24 || next_len % 24)
+ printf ("\t\t(Trailer length is not a positive multiple of 24.)\n");
+
+ printf ("\n%08llx: 0x%llx bytes of ZLIB compressed data\n",
+ ofs + 8 * 3, next_ofs - (ofs + 8 * 3));
+
+ skip_bytes (r, next_ofs - (ofs + 8 * 3));
+
+ printf ("\n%08llx: ZLIB trailer fixed header:\n", next_ofs);
+ bias = read_int64 (r);
+ zero = read_int64 (r);
+ block_size = read_int (r);
+ n_blocks = read_int (r);
+ printf ("\tbias: %lld\n", bias);
+ printf ("\tzero: 0x%llx\n", zero);
+ if (zero != 0)
+ printf ("\t\t(Expected 0.)\n");
+ printf ("\tblock_size: 0x%x\n", block_size);
+ if (block_size != 0x3ff000)
+ printf ("\t\t(Expected 0x3ff000.)\n");
+ printf ("\tn_blocks: %u\n", n_blocks);
+ if (n_blocks != next_len / 24 - 1)
+ printf ("\t\t(Expected %llu.)\n", next_len / 24 - 1);
+
+ expected_uncmp_ofs = ofs;
+ expected_cmp_ofs = ofs + 24;
+ for (i = 0; i < n_blocks; i++)
+ {
+ long long int blockinfo_ofs = ftello (r->file);
+ unsigned long long int uncompressed_ofs = read_int64 (r);
+ unsigned long long int compressed_ofs = read_int64 (r);
+ unsigned int uncompressed_size = read_int (r);
+ unsigned int compressed_size = read_int (r);
+
+ printf ("\n%08llx: ZLIB block descriptor %d\n", blockinfo_ofs, i + 1);
+
+ printf ("\tuncompressed_ofs: 0x%llx\n", uncompressed_ofs);
+ if (uncompressed_ofs != expected_uncmp_ofs)
+ printf ("\t\t(Expected 0x%llx.)\n", ofs);
+
+ printf ("\tcompressed_ofs: 0x%llx\n", compressed_ofs);
+ if (compressed_ofs != expected_cmp_ofs)
+ printf ("\t\t(Expected 0x%llx.)\n", ofs + 24);
+
+ printf ("\tuncompressed_size: 0x%x\n", uncompressed_size);
+ if (i < n_blocks - 1 && uncompressed_size != block_size)
+ printf ("\t\t(Expected 0x%x.)\n", block_size);
+
+ printf ("\tcompressed_size: 0x%x\n", compressed_size);
+ if (i == n_blocks - 1 && compressed_ofs + compressed_size != next_ofs)
+ printf ("\t\t(This was expected to be 0x%llx.)\n",
+ next_ofs - compressed_size);
+
+ expected_uncmp_ofs += uncompressed_size;
+ expected_cmp_ofs += compressed_size;
+ }
+}
\f
/* Helpers for reading records that consist of structured text
strings. */
{
return text->pos;
}
+
+static const char *
+text_get_all (const struct text_record *text)
+{
+ return text->buffer;
+}
\f
static void
usage (void)
end--;
*end = '\0';
}
+
+static void
+print_string (const char *s, size_t len)
+{
+ if (memchr (s, 0, len) == 0)
+ {
+ size_t i;
+
+ for (i = 0; i < len; i++)
+ {
+ unsigned char c = s[i];
+
+ if (c == '\\')
+ printf ("\\\\");
+ else if (c == '\n' || isprint (c))
+ putchar (c);
+ else
+ printf ("\\%02x", c);
+ }
+ putchar ('\n');
+ }
+ else
+ hex_dump (0, s, len);
+}