/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-2000, 2006-2007, 2009-2016 Free Software Foundation, Inc.
+ Copyright (C) 1997-2000, 2006-2007, 2009-2016, 2021 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include "data/value-labels.h"
#include "data/value.h"
#include "data/variable.h"
+#include "data/varset.h"
#include "libpspp/array.h"
#include "libpspp/assertion.h"
#include "libpspp/compiler.h"
enum integer_format integer_format; /* On-disk integer format. */
enum float_format float_format; /* On-disk floating point format. */
struct sfm_var *sfm_vars; /* Variables. */
- size_t sfm_var_cnt; /* Number of variables. */
- int case_cnt; /* Number of cases */
+ size_t sfm_n_vars; /* Number of variables. */
+ int n_cases; /* Number of cases */
const char *encoding; /* String encoding. */
bool written_by_readstat; /* From https://github.com/WizardMac/ReadStat? */
struct sfm_var_record *, size_t n);
static void parse_format_spec (struct sfm_reader *, off_t pos,
unsigned int format, enum which_format,
- struct variable *, int *format_warning_cnt);
+ struct variable *, int *format_n_warnings);
static void parse_document (struct dictionary *, struct sfm_document_record *);
static void parse_display_parameters (struct sfm_reader *,
const struct sfm_extension_record *,
static void parse_long_string_missing_values (
struct sfm_reader *, const struct sfm_extension_record *,
struct dictionary *);
+static void parse_var_sets (struct sfm_reader *,
+ const struct sfm_extension_record *,
+ struct dictionary *);
/* Frees the strings inside INFO. */
void
sfm_open (struct file_handle *fh)
{
size_t allocated_mrsets = 0;
- struct sfm_reader *r;
/* Create and initialize reader. */
- r = xzalloc (sizeof *r);
+ struct sfm_reader *r = XZALLOC (struct sfm_reader);
r->any_reader.klass = &sys_file_reader_class;
r->pool = pool_create ();
pool_register (r->pool, free, r);
return aux.n;
}
-/* Decodes the dictionary read from R, saving it into into *DICT. Character
+/* Decodes the dictionary read from R, saving it into *DICT. Character
strings in R are decoded using ENCODING, or an encoding obtained from R if
ENCODING is null, or the locale encoding if R specifies no encoding.
if (r->extensions[EXT_LONG_MISSING] != NULL)
parse_long_string_missing_values (r, r->extensions[EXT_LONG_MISSING],
dict);
+ if (r->extensions[EXT_VAR_SETS])
+ parse_var_sets (r, r->extensions[EXT_VAR_SETS], dict);
/* Warn if the actual amount of data per case differs from the
amount that the header claims. SPSS version 13 gets this
sfm_read_case to use. We cannot use the `struct variable's
from the dictionary we created, because the caller owns the
dictionary and may destroy or modify its variables. */
- sfm_dictionary_to_sfm_vars (dict, &r->sfm_vars, &r->sfm_var_cnt);
+ sfm_dictionary_to_sfm_vars (dict, &r->sfm_vars, &r->sfm_n_vars);
pool_register (r->pool, free, r->sfm_vars);
r->proto = caseproto_ref_pool (dict_get_proto (dict), r->pool);
}
return casereader_create_sequential
- (NULL, r->proto,
- r->case_cnt == -1 ? CASENUMBER_MAX: r->case_cnt,
- &sys_file_casereader_class, r);
+ (NULL, r->proto, r->n_cases == -1 ? CASENUMBER_MAX : r->n_cases,
+ &sys_file_casereader_class, r);
error:
sfm_close (r_);
r->compression = ANY_COMP_NONE;
else if (compressed == 1)
r->compression = ANY_COMP_SIMPLE;
- else if (compressed != 0)
+ else
{
sys_error (r, 0, "System file header has invalid compression "
"value %d.", compressed);
if (!read_int (r, &header->weight_idx))
return false;
- if (!read_int (r, &r->case_cnt))
+ if (!read_int (r, &r->n_cases))
return false;
- if (r->case_cnt > INT_MAX / 2)
- r->case_cnt = -1;
+ if (r->n_cases > INT_MAX / 2)
+ r->n_cases = -1;
/* Identify floating-point format and obtain compression bias. */
if (!read_bytes (r, raw_bias, sizeof raw_bias))
info->integer_format = r->integer_format;
info->float_format = r->float_format;
info->compression = r->compression;
- info->case_cnt = r->case_cnt;
+ info->n_cases = r->n_cases;
return true;
}
/* Implemented record types. */
{ EXT_INTEGER, 4, 8 },
{ EXT_FLOAT, 8, 3 },
+ { EXT_VAR_SETS, 1, 0 },
{ EXT_MRSETS, 1, 0 },
{ EXT_PRODUCT_INFO, 1, 0 },
{ EXT_DISPLAY, 4, 0 },
{ EXT_LONG_MISSING, 1, 0 },
/* Ignored record types. */
- { EXT_VAR_SETS, 0, 0 },
{ EXT_DATE, 0, 0 },
{ EXT_DATA_ENTRY, 0, 0 },
{ EXT_DATAVIEW, 0, 0 },
}
struct variable *var;
- if (!dict_id_is_valid (dict, name, false)
- || name[0] == '$' || name[0] == '#')
+ if (!dict_id_is_valid (dict, name) || name[0] == '$' || name[0] == '#')
{
var = add_var_with_generated_name (dict, rec->width);
sys_warn (r, rec->pos, _("Renaming variable with invalid name "
if (fmt_from_u32 (format, var_get_width (v), false, &f))
{
if (which == PRINT_FORMAT)
- var_set_print_format (v, &f);
+ var_set_print_format (v, f);
else
- var_set_write_format (v, &f);
+ var_set_write_format (v, f);
}
else if (format == 0)
{
size_t i;
name = recode_string ("UTF-8", r->encoding, s->name, -1);
- if (!mrset_is_valid_name (name, dict_get_encoding (dict), false))
+ if (!mrset_is_valid_name (name, dict_get_encoding (dict)))
{
sys_warn (r, -1, _("Invalid multiple response set name `%s'."),
name);
size_t ofs;
size_t i;
- n_vars = dict_get_var_cnt (dict);
+ n_vars = dict_get_n_vars (dict);
if (record->count == 3 * n_vars)
includes_width = true;
else if (record->count == 2 * n_vars)
/* Renaming a variable may clear its short names, but we
want to retain them, so we save them and re-set them
afterward. */
- n_short_names = var_get_short_name_cnt (var);
+ n_short_names = var_get_n_short_names (var);
short_names = xnmalloc (n_short_names, sizeof *short_names);
for (i = 0; i < n_short_names; i++)
{
converted to lowercase, as the long variable names. */
size_t i;
- for (i = 0; i < dict_get_var_cnt (dict); i++)
+ for (i = 0; i < dict_get_n_vars (dict); i++)
{
struct variable *var = dict_get_var (dict, i);
char *new_name;
while (read_variable_to_value_pair (r, dict, text, &var, &long_name))
{
/* Validate long name. */
- if (!dict_id_is_valid (dict, long_name, false)
+ if (!dict_id_is_valid (dict, long_name)
|| long_name[0] == '$' || long_name[0] == '#')
{
sys_warn (r, record->pos,
{
size_t idx = var_get_dict_index (var);
long int length;
- int segment_cnt;
int i;
/* Get length. */
}
/* Check segments. */
- segment_cnt = sfm_width_to_segments (length);
- if (segment_cnt == 1)
+ int n_segments = sfm_width_to_segments (length);
+ if (n_segments == 1)
{
sys_warn (r, record->pos,
_("%s listed in very long string record with width %s, "
var_get_name (var), length_s);
continue;
}
- if (idx + segment_cnt > dict_get_var_cnt (dict))
+ if (idx + n_segments > dict_get_n_vars (dict))
{
sys_error (r, record->pos,
_("Very long string %s overflows dictionary."),
/* Get the short names from the segments and check their
lengths. */
- for (i = 0; i < segment_cnt; i++)
+ for (i = 0; i < n_segments; i++)
{
struct variable *seg = dict_get_var (dict, idx + i);
int alloc_width = sfm_segment_alloc_width (length, i);
return false;
}
}
- dict_delete_consecutive_vars (dict, idx + 1, segment_cnt - 1);
+ dict_delete_consecutive_vars (dict, idx + 1, n_segments - 1);
var_set_width (var, length);
}
close_text_record (r, text);
- dict_compact_values (dict);
return true;
}
size_t n_warnings = 0;
size_t i;
- for (i = 0; i < dict_get_var_cnt (dict); i++)
+ for (i = 0; i < dict_get_n_vars (dict); i++)
{
struct variable *var = dict_get_var (dict, i);
struct attrset *attrs = var_get_attributes (var);
}
static bool
-check_overflow (struct sfm_reader *r,
- const struct sfm_extension_record *record,
- size_t ofs, size_t length)
+check_overflow__ (const struct sfm_extension_record *record,
+ size_t ofs, size_t length)
{
size_t end = record->size * record->count;
if (length >= end || ofs + length > end)
- {
- sys_warn (r, record->pos + end,
- _("Extension record subtype %d ends unexpectedly."),
- record->subtype);
- return false;
- }
+ return false;
return true;
}
+static bool
+check_overflow (struct sfm_reader *r,
+ const struct sfm_extension_record *record,
+ size_t ofs, size_t length)
+{
+ bool ok = check_overflow__ (record, ofs, length);
+ if (!ok)
+ sys_warn (r, record->pos + record->size * record->count,
+ _("Extension record subtype %d ends unexpectedly."),
+ record->subtype);
+ return ok;
+}
+
static void
parse_long_string_value_labels (struct sfm_reader *r,
const struct sfm_extension_record *record,
size_t end = record->size * record->count;
size_t ofs = 0;
+ bool warned = false;
while (ofs < end)
{
struct missing_values mv;
var = NULL;
}
+ /* Parse value length. */
+ if (!check_overflow (r, record, ofs, 4))
+ return;
+ size_t value_length = parse_int (r, record->data, ofs);
+ ofs += 4;
+
/* Parse values. */
mv_init_pool (r->pool, &mv, var ? var_get_width (var) : 8);
for (i = 0; i < n_missing_values; i++)
{
- size_t value_length;
-
- /* Parse value length. */
- if (!check_overflow (r, record, ofs, 4))
- return;
- value_length = parse_int (r, record->data, ofs);
- ofs += 4;
+ /* Tolerate files written by old, buggy versions of PSPP where we
+ believed that the value_length was repeated before each missing
+ value. */
+ if (check_overflow__ (record, ofs, value_length)
+ && parse_int (r, record->data, ofs) == 8)
+ {
+ if (!warned)
+ {
+ sys_warn (r, record->pos + ofs,
+ _("This file has corrupted metadata written by a "
+ "buggy version of PSPP. To fix it, save a new "
+ "copy of the file."));
+ warned = true;
+ }
+ ofs += 4;
+ }
/* Parse value. */
if (!check_overflow (r, record, ofs, value_length))
var_set_missing_values (var, &mv);
}
}
+
+static void
+parse_var_sets (struct sfm_reader *r,
+ const struct sfm_extension_record *record,
+ struct dictionary *dict)
+{
+ struct text_record *text = open_text_record (r, record, true);
+ for (;;)
+ {
+ char *varset_name = text_get_token (text, ss_cstr ("="), NULL);
+ if (!varset_name)
+ break;
+
+ struct varset *varset = xmalloc (sizeof *varset);
+ *varset = (struct varset) {
+ .name = xstrdup (varset_name),
+ };
+
+ text_match (text, ' ');
+
+ size_t allocated_vars = 0;
+ char delimiter;
+ do
+ {
+ char *var_name = text_get_token (text, ss_cstr (" \n"), &delimiter);
+ if (!var_name)
+ break;
+
+ size_t len = strlen (var_name);
+ if (len > 0 && var_name[len - 1] == '\r')
+ var_name[len - 1] = '\0';
+
+ struct variable *var = dict_lookup_var (dict, var_name);
+ if (var)
+ {
+ if (varset->n_vars >= allocated_vars)
+ varset->vars = x2nrealloc (varset->vars, &allocated_vars,
+ sizeof *varset->vars);
+ varset->vars[varset->n_vars++] = var;
+ }
+ else
+ sys_warn (r, record->pos,
+ _("Variable set %s contains unknown variable %s."),
+ varset_name, var_name);
+ }
+ while (delimiter == ' ');
+
+ dict_add_varset (dict, varset);
+ }
+ close_text_record (r, text);
+}
\f
/* Case reader. */
int retval;
int i;
- if (r->error || !r->sfm_var_cnt)
+ if (r->error || !r->sfm_n_vars)
return NULL;
c = case_create (r->proto);
- for (i = 0; i < r->sfm_var_cnt; i++)
+ for (i = 0; i < r->sfm_n_vars; i++)
{
struct sfm_var *sv = &r->sfm_vars[i];
union value *v = case_data_rw_idx (c, sv->case_index);
eof:
if (i != 0)
partial_record (r);
- if (r->case_cnt != -1)
+ if (r->n_cases != -1)
read_error (reader, r);
case_unref (c);
return NULL;
ds_put_format (&text, _("`%s': "), fh_get_file_name (r->fh));
ds_put_vformat (&text, format, args);
- struct msg m = {
+ struct msg *m = xmalloc (sizeof *m);
+ *m = (struct msg) {
.category = msg_class_to_category (class),
.severity = msg_class_to_severity (class),
- .text = ds_cstr (&text),
+ .text = ds_steal_cstr (&text),
};
- msg_emit (&m);
+ msg_emit (m);
}
/* Displays a warning for offset OFFSET in the file. */
an error. */
static inline int
read_bytes_internal (struct sfm_reader *r, bool eof_is_ok,
- void *buf, size_t byte_cnt)
+ void *buf, size_t n_bytes)
{
- size_t bytes_read = fread (buf, 1, byte_cnt, r->file);
+ size_t bytes_read = fread (buf, 1, n_bytes, r->file);
r->pos += bytes_read;
- if (bytes_read == byte_cnt)
+ if (bytes_read == n_bytes)
return 1;
else if (ferror (r->file))
{
Returns true if successful.
Returns false upon I/O error or if end-of-file is encountered. */
static bool
-read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
+read_bytes (struct sfm_reader *r, void *buf, size_t n_bytes)
{
- return read_bytes_internal (r, false, buf, byte_cnt) == 1;
+ return read_bytes_internal (r, false, buf, n_bytes) == 1;
}
/* Reads BYTE_CNT bytes into BUF.
Returns 0 if an immediate end-of-file is encountered.
Returns -1 if an I/O error or a partial read occurs. */
static int
-try_read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
+try_read_bytes (struct sfm_reader *r, void *buf, size_t n_bytes)
{
- return read_bytes_internal (r, true, buf, byte_cnt);
+ return read_bytes_internal (r, true, buf, n_bytes);
}
/* Reads a 32-bit signed integer from R and stores its value in host format in
}
static int
-read_bytes_zlib (struct sfm_reader *r, void *buf_, size_t byte_cnt)
+read_bytes_zlib (struct sfm_reader *r, void *buf_, size_t n_bytes)
{
uint8_t *buf = buf_;
- if (byte_cnt == 0)
+ if (n_bytes == 0)
return 1;
for (;;)
/* Use already inflated data if there is any. */
if (r->zout_pos < r->zout_end)
{
- unsigned int n = MIN (byte_cnt, r->zout_end - r->zout_pos);
+ unsigned int n = MIN (n_bytes, r->zout_end - r->zout_pos);
memcpy (buf, &r->zout_buf[r->zout_pos], n);
r->zout_pos += n;
- byte_cnt -= n;
+ n_bytes -= n;
buf += n;
- if (byte_cnt == 0)
+ if (n_bytes == 0)
return 1;
}
}
static int
-read_compressed_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
+read_compressed_bytes (struct sfm_reader *r, void *buf, size_t n_bytes)
{
if (r->compression == ANY_COMP_SIMPLE)
- return read_bytes (r, buf, byte_cnt);
+ return read_bytes (r, buf, n_bytes);
else
{
- int retval = read_bytes_zlib (r, buf, byte_cnt);
+ int retval = read_bytes_zlib (r, buf, n_bytes);
if (retval == 0)
sys_error (r, r->pos, _("Unexpected end of ZLIB compressed data."));
return retval;
}
static int
-try_read_compressed_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
+try_read_compressed_bytes (struct sfm_reader *r, void *buf, size_t n_bytes)
{
if (r->compression == ANY_COMP_SIMPLE)
- return try_read_bytes (r, buf, byte_cnt);
+ return try_read_bytes (r, buf, n_bytes);
else
- return read_bytes_zlib (r, buf, byte_cnt);
+ return read_bytes_zlib (r, buf, n_bytes);
}
/* Reads a 64-bit floating-point number from R and returns its