/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-2000, 2006-2007, 2009-2014 Free Software Foundation, Inc.
+ Copyright (C) 1997-2000, 2006-2007, 2009-2016 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include <config.h>
-#include "data/sys-file-reader.h"
#include "data/sys-file-private.h"
#include <errno.h>
#include <sys/stat.h>
#include <zlib.h>
+#include "data/any-reader.h"
#include "data/attributes.h"
#include "data/case.h"
#include "data/casereader-provider.h"
#include "libpspp/assertion.h"
#include "libpspp/compiler.h"
#include "libpspp/i18n.h"
+#include "libpspp/ll.h"
#include "libpspp/message.h"
#include "libpspp/misc.h"
#include "libpspp/pool.h"
int weight_idx; /* 0 if unweighted, otherwise a var index. */
int nominal_case_size; /* Number of var positions. */
- /* These correspond to the members of struct sfm_file_info or a dictionary
+ /* These correspond to the members of struct any_file_info or a dictionary
but in the system file's encoding rather than ASCII. */
char creation_date[10]; /* "dd mmm yy". */
char creation_time[9]; /* "hh:mm:ss". */
struct sfm_extension_record
{
+ struct ll ll; /* In struct sfm_reader 'var_attrs' list. */
int subtype; /* Record subtype. */
off_t pos; /* Starting offset in file. */
unsigned int size; /* Size of data elements. */
/* System file reader. */
struct sfm_reader
{
+ struct any_reader any_reader;
+
/* Resource tracking. */
struct pool *pool; /* All system file state. */
/* File data. */
- struct sfm_read_info info;
+ struct any_read_info info;
struct sfm_header_record header;
struct sfm_var_record *vars;
size_t n_vars;
struct sfm_mrset *mrsets;
size_t n_mrsets;
struct sfm_extension_record *extensions[32];
+ struct ll_list var_attrs; /* Contains "struct sfm_extension_record"s. */
/* File state. */
struct file_handle *fh; /* File handle. */
const char *encoding; /* String encoding. */
/* Decompression. */
- enum sfm_compression compression;
+ enum any_compression compression;
double bias; /* Compression bias, usually 100.0. */
uint8_t opcodes[8]; /* Current block of opcodes. */
size_t opcode_idx; /* Next opcode to interpret, 8 if none left. */
static const struct casereader_class sys_file_casereader_class;
+static struct sfm_reader *
+sfm_reader_cast (const struct any_reader *r_)
+{
+ assert (r_->klass == &sys_file_reader_class);
+ return UP_CAST (r_, struct sfm_reader, any_reader);
+}
+
+static bool sfm_close (struct any_reader *);
+
static struct variable *lookup_var_by_index (struct sfm_reader *, off_t,
const struct sfm_var_record *,
size_t n, int idx);
struct text_record *,
struct variable **var, char **value);
static void text_warn (struct sfm_reader *r, struct text_record *text,
- const char *format, ...)
- PRINTF_FORMAT (3, 4);
+ const char *format, ...) PRINTF_FORMAT (3, 4);
static char *text_get_token (struct text_record *,
struct substring delimiters, char *delimiter);
static bool text_match (struct text_record *, char c);
static bool read_dictionary (struct sfm_reader *);
static bool read_record (struct sfm_reader *, int type,
size_t *allocated_vars, size_t *allocated_labels);
-static bool read_header (struct sfm_reader *, struct sfm_read_info *,
+static bool read_header (struct sfm_reader *, struct any_read_info *,
struct sfm_header_record *);
static void parse_header (struct sfm_reader *,
const struct sfm_header_record *,
- struct sfm_read_info *, struct dictionary *);
+ struct any_read_info *, struct dictionary *);
static bool parse_variable_records (struct sfm_reader *, struct dictionary *,
struct sfm_var_record *, size_t n);
static void parse_format_spec (struct sfm_reader *, off_t pos,
struct dictionary *);
static bool parse_machine_integer_info (struct sfm_reader *,
const struct sfm_extension_record *,
- struct sfm_read_info *);
+ struct any_read_info *);
static void parse_machine_float_info (struct sfm_reader *,
const struct sfm_extension_record *);
static void parse_extra_product_info (struct sfm_reader *,
const struct sfm_extension_record *,
- struct sfm_read_info *);
+ struct any_read_info *);
static void parse_mrsets (struct sfm_reader *,
const struct sfm_extension_record *,
size_t *allocated_mrsets);
const struct sfm_extension_record *,
struct dictionary *);
static void assign_variable_roles (struct sfm_reader *, struct dictionary *);
-static bool parse_long_string_value_labels (struct sfm_reader *,
+static void parse_long_string_value_labels (struct sfm_reader *,
const struct sfm_extension_record *,
struct dictionary *);
-static bool parse_long_string_missing_values (
+static void parse_long_string_missing_values (
struct sfm_reader *, const struct sfm_extension_record *,
struct dictionary *);
/* Frees the strings inside INFO. */
void
-sfm_read_info_destroy (struct sfm_read_info *info)
+any_read_info_destroy (struct any_read_info *info)
{
if (info)
{
/* Tries to open FH for reading as a system file. Returns an sfm_reader if
successful, otherwise NULL. */
-struct sfm_reader *
+static struct any_reader *
sfm_open (struct file_handle *fh)
{
size_t allocated_mrsets = 0;
/* Create and initialize reader. */
r = xzalloc (sizeof *r);
+ r->any_reader.klass = &sys_file_reader_class;
r->pool = pool_create ();
pool_register (r->pool, free, r);
r->fh = fh_ref (fh);
r->opcode_idx = sizeof r->opcodes;
+ ll_init (&r->var_attrs);
/* TRANSLATORS: this fragment will be interpolated into
messages in fh_lock() that identify types of files. */
if (r->lock == NULL)
goto error;
- r->file = fn_open (fh_get_file_name (fh), "rb");
+ r->file = fn_open (fh, "rb");
if (r->file == NULL)
{
msg (ME, _("Error opening `%s' for reading as a system file: %s."),
if (r->extensions[EXT_MRSETS2] != NULL)
parse_mrsets (r, r->extensions[EXT_MRSETS2], &allocated_mrsets);
- return r;
+ return &r->any_reader;
+
error:
- sfm_close (r);
+ if (r)
+ sfm_close (&r->any_reader);
return NULL;
}
if (!skip_bytes (r, 4))
return false;
- if (r->compression == SFM_COMP_ZLIB && !read_zheader (r))
+ if (r->compression == ANY_COMP_ZLIB && !read_zheader (r))
return false;
return true;
|| subtype >= sizeof r->extensions / sizeof *r->extensions)
{
sys_warn (r, r->pos,
- _("Unrecognized record type 7, subtype %d. Please "
- "send a copy of this file, and the syntax which "
- "created it to %s."),
- subtype, PACKAGE_BUGREPORT);
+ _("Unrecognized record type 7, subtype %d. For help, "
+ "please send this file to %s and mention that you were "
+ "using %s."),
+ subtype, PACKAGE_BUGREPORT, PACKAGE_STRING);
return skip_extension_record (r, subtype);
}
+ else if (subtype == 18)
+ {
+ /* System files written by "Stata 14.1/-savespss- 1.77 by S.Radyakin"
+ put each variable attribute into a separate record with subtype
+ 18. I'm surprised that SPSS puts up with this. */
+ struct sfm_extension_record *ext;
+ bool ok = read_extension_record (r, subtype, &ext);
+ if (ok)
+ ll_push_tail (&r->var_attrs, &ext->ll);
+ return ok;
+ }
else if (r->extensions[subtype] != NULL)
{
sys_warn (r, r->pos,
_("Record type 7, subtype %d found here has the same "
- "type as the record found near offset 0x%llx. "
- "Please send a copy of this file, and the syntax "
- "which created it to %s."),
+ "type as the record found near offset 0x%llx. For "
+ "help, please send this file to %s and mention that "
+ "you were using %s."),
subtype, (long long int) r->extensions[subtype]->pos,
- PACKAGE_BUGREPORT);
+ PACKAGE_BUGREPORT, PACKAGE_STRING);
return skip_extension_record (r, subtype);
}
else
/* Returns the character encoding obtained from R, or a null pointer if R
doesn't have an indication of its character encoding. */
-const char *
+static const char *
sfm_get_encoding (const struct sfm_reader *r)
{
/* The EXT_ENCODING record is the best way to determine dictionary
whatever encoding system file R uses. *IDS[I] is true if *STRINGSP[I] must
be a valid PSPP language identifier, false if *STRINGSP[I] is free-form
text. */
-size_t
-sfm_get_strings (const struct sfm_reader *r, struct pool *pool,
+static size_t
+sfm_get_strings (const struct any_reader *r_, struct pool *pool,
char ***titlesp, bool **idsp, char ***stringsp)
{
+ struct sfm_reader *r = sfm_reader_cast (r_);
const struct sfm_mrset *mrset;
struct get_strings_aux aux;
size_t var_idx;
mrset_idx);
}
- /* */
/* data file attributes */
/* variable attributes */
/* long var map */
ENCODING is null, or the locale encoding if R specifies no encoding.
If INFOP is non-null, then it receives additional info about the system
- file, which the caller must eventually free with sfm_read_info_destroy()
+ file, which the caller must eventually free with any_read_info_destroy()
when it is no longer needed.
This function consumes R. The caller must use it again later, even to
destroy it with sfm_close(). */
-struct casereader *
-sfm_decode (struct sfm_reader *r, const char *encoding,
- struct dictionary **dictp, struct sfm_read_info *infop)
+static struct casereader *
+sfm_decode (struct any_reader *r_, const char *encoding,
+ struct dictionary **dictp, struct any_read_info *infop)
{
+ struct sfm_reader *r = sfm_reader_cast (r_);
struct dictionary *dict;
size_t i;
parse_long_var_name_map (r, r->extensions[EXT_LONG_NAMES], dict);
/* The following records use long names, so they need to follow renaming. */
- if (r->extensions[EXT_VAR_ATTRS] != NULL)
+ if (!ll_is_empty (&r->var_attrs))
{
- parse_variable_attributes (r, r->extensions[EXT_VAR_ATTRS], dict);
+ struct sfm_extension_record *ext;
+ ll_for_each (ext, struct sfm_extension_record, ll, &r->var_attrs)
+ parse_variable_attributes (r, ext, dict);
/* Roles use the $@Role attribute. */
assign_variable_roles (r, dict);
}
-
- if (r->extensions[EXT_LONG_LABELS] != NULL
- && !parse_long_string_value_labels (r, r->extensions[EXT_LONG_LABELS],
- dict))
- goto error;
- if (r->extensions[EXT_LONG_MISSING] != NULL
- && !parse_long_string_missing_values (r, r->extensions[EXT_LONG_MISSING],
- dict))
- goto error;
+ if (r->extensions[EXT_LONG_LABELS] != NULL)
+ parse_long_string_value_labels (r, r->extensions[EXT_LONG_LABELS], dict);
+ if (r->extensions[EXT_LONG_MISSING] != NULL)
+ parse_long_string_missing_values (r, r->extensions[EXT_LONG_MISSING],
+ dict);
/* Warn if the actual amount of data per case differs from the
amount that the header claims. SPSS version 13 gets this
&sys_file_casereader_class, r);
error:
- sfm_close (r);
+ sfm_close (r_);
dict_destroy (dict);
*dictp = NULL;
return NULL;
closed with sfm_decode() or this function.
Returns true if an I/O error has occurred on READER, false
otherwise. */
-bool
-sfm_close (struct sfm_reader *r)
+static bool
+sfm_close (struct any_reader *r_)
{
+ struct sfm_reader *r = sfm_reader_cast (r_);
bool error;
- if (r == NULL)
- return true;
-
if (r->file)
{
- if (fn_close (fh_get_file_name (r->fh), r->file) == EOF)
+ if (fn_close (r->fh, r->file) == EOF)
{
msg (ME, _("Error closing system file `%s': %s."),
fh_get_file_name (r->fh), strerror (errno));
r->file = NULL;
}
- sfm_read_info_destroy (&r->info);
+ any_read_info_destroy (&r->info);
fh_unlock (r->lock);
fh_unref (r->fh);
sys_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
{
struct sfm_reader *r = r_;
- sfm_close (r);
+ sfm_close (&r->any_reader);
}
-/* Returns true if FILE is an SPSS system file,
- false otherwise. */
-bool
+/* Detects whether FILE is an SPSS system file. Returns 1 if so, 0 if not, and
+ a negative errno value if there is an error reading FILE. */
+static int
sfm_detect (FILE *file)
{
char magic[5];
+ if (fseek (file, 0, SEEK_SET) != 0)
+ return -errno;
if (fread (magic, 4, 1, file) != 1)
- return false;
+ return ferror (file) ? -errno : 0;
magic[4] = '\0';
return (!strcmp (ASCII_MAGIC, magic)
except for the string fields in *INFO, which parse_header() will initialize
later once the file's encoding is known. */
static bool
-read_header (struct sfm_reader *r, struct sfm_read_info *info,
+read_header (struct sfm_reader *r, struct any_read_info *info,
struct sfm_header_record *header)
{
uint8_t raw_layout_code[4];
if (!zmagic)
{
if (compressed == 0)
- r->compression = SFM_COMP_NONE;
+ r->compression = ANY_COMP_NONE;
else if (compressed == 1)
- r->compression = SFM_COMP_SIMPLE;
+ r->compression = ANY_COMP_SIMPLE;
else if (compressed != 0)
{
sys_error (r, 0, "System file header has invalid compression "
else
{
if (compressed == 2)
- r->compression = SFM_COMP_ZLIB;
+ r->compression = ANY_COMP_ZLIB;
else
{
sys_error (r, 0, "ZLIB-compressed system file header has invalid "
if (has_variable_label == 1)
{
- enum { MAX_LABEL_LEN = 255 };
+ enum { MAX_LABEL_LEN = 65536 };
unsigned int len, read_len;
if (!read_uint (r, &len))
}
sys_warn (r, record->pos,
- _("Unrecognized record type 7, subtype %d. Please send a "
- "copy of this file, and the syntax which created it to %s."),
- subtype, PACKAGE_BUGREPORT);
+ _("Unrecognized record type 7, subtype %d. For help, please "
+ "send this file to %s and mention that you were using %s."),
+ subtype, PACKAGE_BUGREPORT, PACKAGE_STRING);
skip:
return skip_bytes (r, n_bytes);
static void
parse_header (struct sfm_reader *r, const struct sfm_header_record *header,
- struct sfm_read_info *info, struct dictionary *dict)
+ struct any_read_info *info, struct dictionary *dict)
{
const char *dict_encoding = dict_get_encoding (dict);
struct substring product;
utf8_label = recode_string_pool ("UTF-8", dict_encoding,
rec->label, -1, r->pool);
- var_set_label (var, utf8_label, false);
+ var_set_label (var, utf8_label);
}
/* Set missing values. */
}
}
else
- {
- union value value;
-
- value_init_pool (r->pool, &value, width);
- value_set_missing (&value, width);
- for (i = 0; i < rec->missing_value_code; i++)
- mv_add_str (&mv, rec->missing + 8 * i, MIN (width, 8));
- }
+ for (i = 0; i < rec->missing_value_code; i++)
+ mv_add_str (&mv, rec->missing + 8 * i, MIN (width, 8));
var_set_missing_values (var, &mv);
}
static bool
parse_machine_integer_info (struct sfm_reader *r,
const struct sfm_extension_record *record,
- struct sfm_read_info *info)
+ struct any_read_info *info)
{
int float_representation, expected_float_format;
int integer_representation, expected_integer_format;
static void
parse_extra_product_info (struct sfm_reader *r,
const struct sfm_extension_record *record,
- struct sfm_read_info *info)
+ struct any_read_info *info)
{
struct text_record *text;
while (read_variable_to_value_pair (r, dict, text, &var, &long_name))
{
/* Validate long name. */
- if (!dict_id_is_valid (dict, long_name, false))
+ if (!dict_id_is_valid (dict, long_name, false)
+ || long_name[0] == '$' || long_name[0] == '#')
{
sys_warn (r, record->pos,
_("Long variable mapping from %s to invalid "
text_warn (r, text, _("Error parsing attribute value %s[%d]."),
key, index);
break;
- }
+ }
length = strlen (value);
- if (length >= 2 && value[0] == '\'' && value[length - 1] == '\'')
+ if (length >= 2 && value[0] == '\'' && value[length - 1] == '\'')
{
value[length - 1] = '\0';
- attribute_add_value (attr, value + 1);
+ attribute_add_value (attr, value + 1);
}
- else
+ else
{
text_warn (r, text,
_("Attribute value %s[%d] is not quoted: %s."),
key, index, value);
- attribute_add_value (attr, value);
+ attribute_add_value (attr, value);
}
/* Was this the last value for this attribute? */
size_t end = record->size * record->count;
if (length >= end || ofs + length > end)
{
- sys_error (r, record->pos + end,
- _("Extension record subtype %d ends unexpectedly."),
- record->subtype);
+ sys_warn (r, record->pos + end,
+ _("Extension record subtype %d ends unexpectedly."),
+ record->subtype);
return false;
}
return true;
}
-static bool
+static void
parse_long_string_value_labels (struct sfm_reader *r,
const struct sfm_extension_record *record,
struct dictionary *dict)
/* Parse variable name length. */
if (!check_overflow (r, record, ofs, 4))
- return false;
+ return;
var_name_len = parse_int (r, record->data, ofs);
ofs += 4;
/* Parse variable name, width, and number of labels. */
if (!check_overflow (r, record, ofs, var_name_len + 8))
- return false;
+ return;
var_name = recode_string_pool ("UTF-8", dict_encoding,
(const char *) record->data + ofs,
var_name_len, r->pool);
/* Parse value length. */
if (!check_overflow (r, record, ofs, 4))
- return false;
+ return;
value_length = parse_int (r, record->data, ofs);
ofs += 4;
/* Parse value. */
if (!check_overflow (r, record, ofs, value_length))
- return false;
+ return;
if (!skip)
{
if (value_length == width)
/* Parse label length. */
if (!check_overflow (r, record, ofs, 4))
- return false;
+ return;
label_length = parse_int (r, record->data, ofs);
ofs += 4;
/* Parse label. */
if (!check_overflow (r, record, ofs, label_length))
- return false;
+ return;
if (!skip)
{
char *label;
ofs += label_length;
}
}
-
- return true;
}
-static bool
+static void
parse_long_string_missing_values (struct sfm_reader *r,
const struct sfm_extension_record *record,
struct dictionary *dict)
/* Parse variable name length. */
if (!check_overflow (r, record, ofs, 4))
- return false;
+ return;
var_name_len = parse_int (r, record->data, ofs);
ofs += 4;
/* Parse variable name. */
if (!check_overflow (r, record, ofs, var_name_len + 1))
- return false;
+ return;
var_name = recode_string_pool ("UTF-8", dict_encoding,
(const char *) record->data + ofs,
var_name_len, r->pool);
/* Parse value length. */
if (!check_overflow (r, record, ofs, 4))
- return false;
+ return;
value_length = parse_int (r, record->data, ofs);
ofs += 4;
/* Parse value. */
if (!check_overflow (r, record, ofs, value_length))
- return false;
+ return;
if (var != NULL
&& i < 3
&& !mv_add_str (&mv, (const uint8_t *) record->data + ofs,
if (var != NULL)
var_set_missing_values (var, &mv);
}
-
- return true;
}
\f
/* Case reader. */
int retval;
int i;
- if (r->error)
+ if (r->error || !r->sfm_var_cnt)
return NULL;
c = case_create (r->proto);
static bool
read_case_number (struct sfm_reader *r, double *d)
{
- if (r->compression == SFM_COMP_NONE)
+ if (r->compression == ANY_COMP_NONE)
{
uint8_t number[8];
if (!try_read_bytes (r, number, sizeof number))
static int
read_opcode (struct sfm_reader *r)
{
- assert (r->compression != SFM_COMP_NONE);
+ assert (r->compression != ANY_COMP_NONE);
for (;;)
{
int opcode;
read_whole_strings (struct sfm_reader *r, uint8_t *s, size_t length)
{
assert (length % 8 == 0);
- if (r->compression == SFM_COMP_NONE)
+ if (r->compression == ANY_COMP_NONE)
return try_read_bytes (r, s, length);
else
{
{
if (!text_read_short_name (r, dict, text, ss_cstr ("="), var))
return false;
-
+
*value = text_get_token (text, ss_buffer ("\t\0", 2), NULL);
if (*value == NULL)
return false;
text_warn (struct sfm_reader *r, struct text_record *text,
const char *format, ...)
{
- if (text->n_warnings++ < MAX_TEXT_WARNINGS)
+ if (text->n_warnings++ < MAX_TEXT_WARNINGS)
{
va_list args;
static bool
text_match (struct text_record *text, char c)
{
- if (text->buffer.string[text->pos] == c)
+ if (text->pos >= text->buffer.length)
+ return false;
+
+ if (text->buffer.string[text->pos] == c)
{
text->pos++;
return true;
va_end (args);
}
-/* Displays an error for the current file position,
- marks it as in an error state,
- and aborts reading it using longjmp. */
+/* Displays an error for the current file position and marks it as in an error
+ state. */
static void
sys_error (struct sfm_reader *r, off_t offset, const char *format, ...)
{
static int
read_compressed_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
{
- if (r->compression == SFM_COMP_SIMPLE)
+ if (r->compression == ANY_COMP_SIMPLE)
return read_bytes (r, buf, byte_cnt);
else
{
static int
try_read_compressed_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
{
- if (r->compression == SFM_COMP_SIMPLE)
+ if (r->compression == ANY_COMP_SIMPLE)
return try_read_bytes (r, buf, byte_cnt);
else
return read_bytes_zlib (r, buf, byte_cnt);
NULL,
NULL,
};
+
+const struct any_reader_class sys_file_reader_class =
+ {
+ N_("SPSS System File"),
+ sfm_detect,
+ sfm_open,
+ sfm_close,
+ sfm_decode,
+ sfm_get_strings,
+ };