/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-2000, 2006-2007, 2009-2015 Free Software Foundation, Inc.
+ Copyright (C) 1997-2000, 2006-2007, 2009-2016 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include "libpspp/assertion.h"
#include "libpspp/compiler.h"
#include "libpspp/i18n.h"
+#include "libpspp/ll.h"
#include "libpspp/message.h"
#include "libpspp/misc.h"
#include "libpspp/pool.h"
struct sfm_extension_record
{
+ struct ll ll; /* In struct sfm_reader 'var_attrs' list. */
int subtype; /* Record subtype. */
off_t pos; /* Starting offset in file. */
unsigned int size; /* Size of data elements. */
struct sfm_mrset *mrsets;
size_t n_mrsets;
struct sfm_extension_record *extensions[32];
+ struct ll_list var_attrs; /* Contains "struct sfm_extension_record"s. */
/* File state. */
struct file_handle *fh; /* File handle. */
size_t sfm_var_cnt; /* Number of variables. */
int case_cnt; /* Number of cases */
const char *encoding; /* String encoding. */
+ bool written_by_readstat; /* From https://github.com/WizardMac/ReadStat? */
/* Decompression. */
enum any_compression compression;
struct sfm_var_record *);
static bool read_value_label_record (struct sfm_reader *,
struct sfm_value_label_record *);
-static struct sfm_document_record *read_document_record (struct sfm_reader *);
+static bool read_document_record (struct sfm_reader *);
static bool read_extension_record (struct sfm_reader *, int subtype,
struct sfm_extension_record **);
static bool skip_extension_record (struct sfm_reader *, int subtype);
struct text_record *,
struct variable **var, char **value);
static void text_warn (struct sfm_reader *r, struct text_record *text,
- const char *format, ...)
- PRINTF_FORMAT (3, 4);
+ const char *format, ...) PRINTF_FORMAT (3, 4);
static char *text_get_token (struct text_record *,
struct substring delimiters, char *delimiter);
static bool text_match (struct text_record *, char c);
pool_register (r->pool, free, r);
r->fh = fh_ref (fh);
r->opcode_idx = sizeof r->opcodes;
+ ll_init (&r->var_attrs);
/* TRANSLATORS: this fragment will be interpolated into
messages in fh_lock() that identify types of files. */
sys_error (r, r->pos, _("Duplicate type 6 (document) record."));
return false;
}
- r->document = read_document_record (r);
- return r->document != NULL;
+ return read_document_record (r);
case 7:
if (!read_int (r, &subtype))
subtype, PACKAGE_BUGREPORT, PACKAGE_STRING);
return skip_extension_record (r, subtype);
}
+ else if (subtype == 18)
+ {
+ /* System files written by "Stata 14.1/-savespss- 1.77 by S.Radyakin"
+ put each variable attribute into a separate record with subtype
+ 18. I'm surprised that SPSS puts up with this. */
+ struct sfm_extension_record *ext;
+ bool ok = read_extension_record (r, subtype, &ext);
+ if (ok && ext)
+ ll_push_tail (&r->var_attrs, &ext->ll);
+ return ok;
+ }
else if (r->extensions[subtype] != NULL)
{
sys_warn (r, r->pos,
mrset_idx);
}
- /* */
/* data file attributes */
/* variable attributes */
/* long var map */
parse_long_var_name_map (r, r->extensions[EXT_LONG_NAMES], dict);
/* The following records use long names, so they need to follow renaming. */
- if (r->extensions[EXT_VAR_ATTRS] != NULL)
+ if (!ll_is_empty (&r->var_attrs))
{
- parse_variable_attributes (r, r->extensions[EXT_VAR_ATTRS], dict);
+ struct sfm_extension_record *ext;
+ ll_for_each (ext, struct sfm_extension_record, ll, &r->var_attrs)
+ parse_variable_attributes (r, ext, dict);
/* Roles use the $@Role attribute. */
assign_variable_roles (r, dict);
}
-
if (r->extensions[EXT_LONG_LABELS] != NULL)
parse_long_string_value_labels (r, r->extensions[EXT_LONG_LABELS], dict);
if (r->extensions[EXT_LONG_MISSING] != NULL)
amount that the header claims. SPSS version 13 gets this
wrong when very long strings are involved, so don't warn in
that case. */
- if (r->header.nominal_case_size != -1
+ if (r->header.nominal_case_size > 0
&& r->header.nominal_case_size != r->n_vars
&& r->info.version_major != 13)
sys_warn (r, -1, _("File header claims %d variable positions but "
if (!read_string (r, header->magic, sizeof header->magic)
|| !read_string (r, header->eye_catcher, sizeof header->eye_catcher))
return false;
+ r->written_by_readstat = strstr (header->eye_catcher,
+ "https://github.com/WizardMac/ReadStat");
if (!strcmp (ASCII_MAGIC, header->magic)
|| !strcmp (EBCDIC_MAGIC, header->magic))
return true;
}
-/* Reads a document record from R and returns it. */
-static struct sfm_document_record *
+/* Reads a document record from R. Returns true if successful, false on
+ error. */
+static bool
read_document_record (struct sfm_reader *r)
{
- struct sfm_document_record *record;
int n_lines;
-
- record = pool_malloc (r->pool, sizeof *record);
- record->pos = r->pos;
-
if (!read_int (r, &n_lines))
- return NULL;
- if (n_lines <= 0 || n_lines >= INT_MAX / DOC_LINE_LENGTH)
+ return false;
+ else if (n_lines == 0)
+ return true;
+ else if (n_lines < 0 || n_lines >= INT_MAX / DOC_LINE_LENGTH)
{
- sys_error (r, record->pos,
+ sys_error (r, r->pos,
_("Number of document lines (%d) "
"must be greater than 0 and less than %d."),
n_lines, INT_MAX / DOC_LINE_LENGTH);
- return NULL;
+ return false;
}
+ struct sfm_document_record *record;
+ record = pool_malloc (r->pool, sizeof *record);
+ record->pos = r->pos;
record->n_lines = n_lines;
record->documents = pool_malloc (r->pool, DOC_LINE_LENGTH * n_lines);
if (!read_bytes (r, record->documents, DOC_LINE_LENGTH * n_lines))
- return NULL;
+ return false;
- return record;
+ r->document = record;
+ return true;
}
static bool
"`%s' to `%s'."),
name, new_name);
var = rec->var = dict_create_var_assert (dict, new_name, rec->width);
+ var_set_short_name (var, 0, new_name);
free (new_name);
}
- /* Set the short name the same as the long name. */
- var_set_short_name (var, 0, name);
+ /* Set the short name the same as the long name (even if we renamed
+ it). */
+ var_set_short_name (var, 0, var_get_name (var));
/* Get variable label, if any. */
if (rec->label)
size_t i;
name = recode_string ("UTF-8", r->encoding, s->name, -1);
- if (name[0] != '$')
+ if (!mrset_is_valid_name (name, dict_get_encoding (dict), false))
{
- sys_warn (r, -1, _("Multiple response set name `%s' does not begin "
- "with `$'."),
+ sys_warn (r, -1, _("Invalid multiple response set name `%s'."),
name);
free (name);
continue;
}
static void
-rename_var_and_save_short_names (struct dictionary *dict, struct variable *var,
- const char *new_name)
+rename_var_and_save_short_names (struct sfm_reader *r, off_t pos,
+ struct dictionary *dict,
+ struct variable *var, const char *new_name)
{
size_t n_short_names;
char **short_names;
}
/* Set long name. */
- dict_rename_var (dict, var, new_name);
+ if (!dict_try_rename_var (dict, var, new_name))
+ sys_warn (r, pos, _("Duplicate long variable name `%s'."), new_name);
/* Restore short names. */
for (i = 0; i < n_short_names; i++)
char *new_name;
new_name = utf8_to_lower (var_get_name (var));
- rename_var_and_save_short_names (dict, var, new_name);
+ rename_var_and_save_short_names (r, -1, dict, var, new_name);
free (new_name);
}
while (read_variable_to_value_pair (r, dict, text, &var, &long_name))
{
/* Validate long name. */
- if (!dict_id_is_valid (dict, long_name, false))
+ if (!dict_id_is_valid (dict, long_name, false)
+ || long_name[0] == '$' || long_name[0] == '#')
{
sys_warn (r, record->pos,
_("Long variable mapping from %s to invalid "
continue;
}
- /* Identify any duplicates. */
- if (utf8_strcasecmp (var_get_short_name (var, 0), long_name)
- && dict_lookup_var (dict, long_name) != NULL)
- {
- sys_warn (r, record->pos,
- _("Duplicate long variable name `%s'."), long_name);
- continue;
- }
-
- rename_var_and_save_short_names (dict, var, long_name);
+ rename_var_and_save_short_names (r, record->pos, dict, var, long_name);
}
close_text_record (r, text);
}
if (!var_add_value_label (var, &value, utf8_labels[j]))
{
- if (var_is_numeric (var))
+ if (r->written_by_readstat)
+ {
+ /* Ignore the problem. ReadStat is buggy and emits value
+ labels whose values are longer than string variables'
+ widths, that are identical in the actual width of the
+ variable, e.g. both values "ABC123" and "ABC456" for a
+ string variable with width 3. */
+ }
+ else if (var_is_numeric (var))
sys_warn (r, record->pos,
_("Duplicate value label for %g on %s."),
value.f, var_get_name (var));
text_warn (r, text, _("Error parsing attribute value %s[%d]."),
key, index);
break;
- }
+ }
length = strlen (value);
- if (length >= 2 && value[0] == '\'' && value[length - 1] == '\'')
+ if (length >= 2 && value[0] == '\'' && value[length - 1] == '\'')
{
value[length - 1] = '\0';
- attribute_add_value (attr, value + 1);
+ attribute_add_value (attr, value + 1);
}
- else
+ else
{
text_warn (r, text,
_("Attribute value %s[%d] is not quoted: %s."),
key, index, value);
- attribute_add_value (attr, value);
+ attribute_add_value (attr, value);
}
/* Was this the last value for this attribute? */
break;
}
if (attrs != NULL)
- attrset_add (attrs, attr);
+ {
+ if (!attrset_try_add (attrs, attr))
+ {
+ text_warn (r, text, _("Duplicate attribute %s."),
+ attribute_get_name (attr));
+ attribute_destroy (attr);
+ }
+ }
else
attribute_destroy (attr);
}
ofs += 4;
/* Parse variable name, width, and number of labels. */
- if (!check_overflow (r, record, ofs, var_name_len + 8))
+ if (!check_overflow (r, record, ofs, var_name_len)
+ || !check_overflow (r, record, ofs, var_name_len + 8))
return;
var_name = recode_string_pool ("UTF-8", dict_encoding,
(const char *) record->data + ofs,
ofs += 4;
/* Parse variable name. */
- if (!check_overflow (r, record, ofs, var_name_len + 1))
+ if (!check_overflow (r, record, ofs, var_name_len)
+ || !check_overflow (r, record, ofs, var_name_len + 1))
return;
var_name = recode_string_pool ("UTF-8", dict_encoding,
(const char *) record->data + ofs,
}
/* Closes TEXT, frees its storage, and issues a final warning
- about suppressed warnings if necesary. */
+ about suppressed warnings if necessary. */
static void
close_text_record (struct sfm_reader *r, struct text_record *text)
{
{
if (!text_read_short_name (r, dict, text, ss_cstr ("="), var))
return false;
-
+
*value = text_get_token (text, ss_buffer ("\t\0", 2), NULL);
if (*value == NULL)
return false;
text_warn (struct sfm_reader *r, struct text_record *text,
const char *format, ...)
{
- if (text->n_warnings++ < MAX_TEXT_WARNINGS)
+ if (text->n_warnings++ < MAX_TEXT_WARNINGS)
{
va_list args;
static bool
text_match (struct text_record *text, char c)
{
- if (text->buffer.string[text->pos] == c)
+ if (text->pos >= text->buffer.length)
+ return false;
+
+ if (text->buffer.string[text->pos] == c)
{
text->pos++;
return true;
if (fstat (fileno (r->file), &s))
{
- sys_error (ME, 0, _("%s: stat failed (%s)."),
+ sys_error (r, 0, _("%s: stat failed (%s)."),
fh_get_file_name (r->fh), strerror (errno));
return false;
}