/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-2000, 2006-2007, 2009-2012 Free Software Foundation, Inc.
+ Copyright (C) 1997-2000, 2006-2007, 2009-2013 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include "libpspp/str.h"
#include "libpspp/stringi-set.h"
+#include "gl/c-strtod.h"
#include "gl/c-ctype.h"
#include "gl/inttostr.h"
#include "gl/localcharset.h"
EXT_DATE = 6, /* DATE. */
EXT_MRSETS = 7, /* Multiple response sets. */
EXT_DATA_ENTRY = 8, /* SPSS Data Entry. */
- /* subtypes 9-10 unknown */
+ /* subtype 9 unknown */
+ EXT_PRODUCT_INFO = 10, /* Extra product info text. */
EXT_DISPLAY = 11, /* Variable display parameters. */
/* subtype 12 unknown */
EXT_LONG_NAMES = 13, /* Long variable names. */
EXT_VAR_ATTRS = 18, /* Variable attributes. */
EXT_MRSETS2 = 19, /* Multiple response sets (extended). */
EXT_ENCODING = 20, /* Character encoding. */
- EXT_LONG_LABELS = 21 /* Value labels for long strings. */
+ EXT_LONG_LABELS = 21, /* Value labels for long strings. */
+ EXT_LONG_MISSING = 22, /* Missing values for long strings. */
+ EXT_DATAVIEW = 24 /* "Format properties in dataview table". */
};
/* Fields from the top-level header record. */
struct sfm_extension_record
{
+ int subtype; /* Record subtype. */
off_t pos; /* Starting offset in file. */
size_t size; /* Size of data elements. */
size_t count; /* Number of data elements. */
static void read_string (struct sfm_reader *, char *, size_t);
static void skip_bytes (struct sfm_reader *, size_t);
+static char *fix_line_ends (const char *);
+
static int parse_int (struct sfm_reader *, const void *data, size_t ofs);
static double parse_float (struct sfm_reader *, const void *data, size_t ofs);
static const char *text_parse_counted_string (struct sfm_reader *,
struct text_record *);
static size_t text_pos (const struct text_record *);
+static const char *text_get_all (const struct text_record *);
static bool close_reader (struct sfm_reader *r);
\f
struct sfm_read_info *);
static void parse_machine_float_info (struct sfm_reader *,
const struct sfm_extension_record *);
+static void parse_extra_product_info (struct sfm_reader *,
+ const struct sfm_extension_record *,
+ struct sfm_read_info *);
static void parse_mrsets (struct sfm_reader *,
const struct sfm_extension_record *,
struct dictionary *);
static void parse_variable_attributes (struct sfm_reader *,
const struct sfm_extension_record *,
struct dictionary *);
+static void assign_variable_roles (struct sfm_reader *, struct dictionary *);
static void parse_long_string_value_labels (struct sfm_reader *,
const struct sfm_extension_record *,
struct dictionary *);
+static void parse_long_string_missing_values (struct sfm_reader *,
+ const struct sfm_extension_record *,
+ struct dictionary *);
/* Frees the strings inside INFO. */
void
free (info->creation_date);
free (info->creation_time);
free (info->product);
+ free (info->product_ext);
}
}
if (extensions[EXT_FLOAT] != NULL)
parse_machine_float_info (r, extensions[EXT_FLOAT]);
+ if (extensions[EXT_PRODUCT_INFO] != NULL)
+ parse_extra_product_info (r, extensions[EXT_PRODUCT_INFO], info);
+
if (extensions[EXT_FILE_ATTRS] != NULL)
parse_data_file_attributes (r, extensions[EXT_FILE_ATTRS], dict);
/* The following records use long names, so they need to follow renaming. */
if (extensions[EXT_VAR_ATTRS] != NULL)
- parse_variable_attributes (r, extensions[EXT_VAR_ATTRS], dict);
+ {
+ parse_variable_attributes (r, extensions[EXT_VAR_ATTRS], dict);
+
+ /* Roles use the $@Role attribute. */
+ assign_variable_roles (r, dict);
+ }
if (extensions[EXT_LONG_LABELS] != NULL)
parse_long_string_value_labels (r, extensions[EXT_LONG_LABELS], dict);
+ if (extensions[EXT_LONG_MISSING] != NULL)
+ parse_long_string_missing_values (r, extensions[EXT_LONG_MISSING], dict);
/* Warn if the actual amount of data per case differs from the
amount that the header claims. SPSS version 13 gets this
read_extension_record_header (struct sfm_reader *r, int subtype,
struct sfm_extension_record *record)
{
+ record->subtype = subtype;
record->pos = r->pos;
record->size = read_int (r);
record->count = read_int (r);
{ EXT_INTEGER, 4, 8 },
{ EXT_FLOAT, 8, 3 },
{ EXT_MRSETS, 1, 0 },
+ { EXT_PRODUCT_INFO, 1, 0 },
{ EXT_DISPLAY, 4, 0 },
{ EXT_LONG_NAMES, 1, 0 },
{ EXT_LONG_STRINGS, 1, 0 },
{ EXT_MRSETS2, 1, 0 },
{ EXT_ENCODING, 1, 0 },
{ EXT_LONG_LABELS, 1, 0 },
+ { EXT_LONG_MISSING, 1, 0 },
/* Ignored record types. */
{ EXT_VAR_SETS, 0, 0 },
{ EXT_DATE, 0, 0 },
{ EXT_DATA_ENTRY, 0, 0 },
+ { EXT_DATAVIEW, 0, 0 },
};
const struct extension_record_type *type;
const char *dict_encoding = dict_get_encoding (dict);
struct substring product;
struct substring label;
+ char *fixed_label;
/* Convert file label to UTF-8 and put it into DICT. */
label = recode_substring_pool ("UTF-8", dict_encoding,
ss_cstr (header->file_label), r->pool);
ss_trim (&label, ss_cstr (" "));
label.string[label.length] = '\0';
- dict_set_label (dict, label.string);
+ fixed_label = fix_line_ends (label.string);
+ dict_set_label (dict, fixed_label);
+ free (fixed_label);
/* Put creation date and time in UTF-8 into INFO. */
info->creation_date = recode_string ("UTF-8", dict_encoding,
{
double low = parse_float (r, rec->missing, 0);
double high = parse_float (r, rec->missing, 8);
+
+ /* Deal with SPSS 21 change in representation. */
+ if (low == SYSMIS)
+ low = LOWEST;
+
mv_add_range (&mv, low, high);
ofs += 16;
}
value_init_pool (r->pool, &value, width);
value_set_missing (&value, width);
for (i = 0; i < rec->missing_value_code; i++)
- {
- uint8_t *s = value_str_rw (&value, width);
- memcpy (s, rec->missing + 8 * i, MIN (width, 8));
- mv_add_str (&mv, s);
- }
+ mv_add_str (&mv, rec->missing + 8 * i, MIN (width, 8));
}
var_set_missing_values (var, &mv);
}
double lowest = parse_float (r, record->data, 16);
if (sysmis != SYSMIS)
- sys_warn (r, record->pos, _("File specifies unexpected value %g as %s."),
- sysmis, "SYSMIS");
+ sys_warn (r, record->pos,
+ _("File specifies unexpected value %g (%a) as %s, "
+ "instead of %g (%a)."),
+ sysmis, sysmis, "SYSMIS", SYSMIS, SYSMIS);
if (highest != HIGHEST)
- sys_warn (r, record->pos, _("File specifies unexpected value %g as %s."),
- highest, "HIGHEST");
+ sys_warn (r, record->pos,
+ _("File specifies unexpected value %g (%a) as %s, "
+ "instead of %g (%a)."),
+ highest, highest, "HIGHEST", HIGHEST, HIGHEST);
+
+ /* SPSS before version 21 used a unique value just bigger than SYSMIS as
+ LOWEST. SPSS 21 uses SYSMIS for LOWEST, which is OK because LOWEST only
+ appears in a context (missing values) where SYSMIS cannot. */
+ if (lowest != LOWEST && lowest != SYSMIS)
+ sys_warn (r, record->pos,
+ _("File specifies unexpected value %g (%a) as %s, "
+ "instead of %g (%a) or %g (%a)."),
+ lowest, lowest, "LOWEST", LOWEST, LOWEST, SYSMIS, SYSMIS);
+}
+
+/* Parses record type 7, subtype 10. */
+static void
+parse_extra_product_info (struct sfm_reader *r,
+ const struct sfm_extension_record *record,
+ struct sfm_read_info *info)
+{
+ struct text_record *text;
- if (lowest != LOWEST)
- sys_warn (r, record->pos, _("File specifies unexpected value %g as %s."),
- lowest, "LOWEST");
+ text = open_text_record (r, record, true);
+ info->product_ext = fix_line_ends (text_get_all (text));
+ close_text_record (r, text);
}
/* Parses record type 7, subtype 7 or 19. */
mrset->width = width;
value_init (&mrset->counted, width);
if (width == 0)
- mrset->counted.f = strtod (counted, NULL);
+ mrset->counted.f = c_strtod (counted, NULL);
else
value_copy_str_rpad (&mrset->counted, width,
(const uint8_t *) counted, ' ');
align = parse_int (r, record->data, ofs);
ofs += 4;
- /* SPSS 14 sometimes seems to set string variables' measure
- to zero. */
- if (0 == measure && var_is_alpha (v))
+ /* SPSS sometimes seems to set variables' measure to zero. */
+ if (0 == measure)
measure = 1;
if (measure < 1 || measure > 3 || align < 0 || align > 2)
if (record == NULL)
{
- /* Convert variable names to lowercase. */
+ /* There are no long variable names. Use the short variable names,
+ converted to lowercase, as the long variable names. */
size_t i;
for (i = 0; i < dict_get_var_cnt (dict); i++)
struct variable *var = dict_get_var (dict, i);
char *new_name;
- new_name = xstrdup (var_get_name (var));
- str_lowercase (new_name);
-
+ new_name = utf8_to_lower (var_get_name (var));
rename_var_and_save_short_names (dict, var, new_name);
-
free (new_name);
}
}
/* Identify any duplicates. */
- if (strcasecmp (var_get_short_name (var, 0), long_name)
+ if (utf8_strcasecmp (var_get_short_name (var, 0), long_name)
&& dict_lookup_var (dict, long_name) != NULL)
{
sys_warn (r, record->pos,
char **utf8_labels;
size_t i;
- utf8_labels = pool_nmalloc (r->pool, sizeof *utf8_labels, record->n_labels);
+ utf8_labels = pool_nmalloc (r->pool, record->n_labels, sizeof *utf8_labels);
for (i = 0; i < record->n_labels; i++)
utf8_labels[i] = recode_string_pool ("UTF-8", dict_get_encoding (dict),
record->labels[i].label, -1,
close_text_record (r, text);
}
+static void
+assign_variable_roles (struct sfm_reader *r, struct dictionary *dict)
+{
+ size_t n_warnings = 0;
+ size_t i;
+
+ for (i = 0; i < dict_get_var_cnt (dict); i++)
+ {
+ struct variable *var = dict_get_var (dict, i);
+ struct attrset *attrs = var_get_attributes (var);
+ const struct attribute *attr = attrset_lookup (attrs, "$@Role");
+ if (attr != NULL)
+ {
+ int value = atoi (attribute_get_value (attr, 0));
+ enum var_role role;
+
+ switch (value)
+ {
+ case 0:
+ role = ROLE_INPUT;
+ break;
+
+ case 1:
+ role = ROLE_TARGET;
+ break;
+
+ case 2:
+ role = ROLE_BOTH;
+ break;
+
+ case 3:
+ role = ROLE_NONE;
+ break;
+
+ case 4:
+ role = ROLE_PARTITION;
+ break;
+
+ case 5:
+ role = ROLE_SPLIT;
+ break;
+
+ default:
+ role = ROLE_INPUT;
+ if (n_warnings++ == 0)
+ sys_warn (r, -1, _("Invalid role for variable %s."),
+ var_get_name (var));
+ }
+
+ var_set_role (var, role);
+ }
+ }
+
+ if (n_warnings > 1)
+ sys_warn (r, -1, _("%zu other variables had invalid roles."),
+ n_warnings - 1);
+}
+
static void
check_overflow (struct sfm_reader *r,
const struct sfm_extension_record *record,
size_t end = record->size * record->count;
if (length >= end || ofs + length > end)
sys_error (r, record->pos + end,
- _("Long string value label record ends unexpectedly."));
+ _("Extension record subtype %d ends unexpectedly."),
+ record->subtype);
}
static void
var = dict_lookup_var (dict, var_name);
if (var == NULL)
sys_warn (r, record->pos + ofs,
- _("Ignoring long string value record for "
+ _("Ignoring long string value label record for "
"unknown variable %s."), var_name);
else if (var_is_numeric (var))
{
sys_warn (r, record->pos + ofs,
- _("Ignoring long string value record for "
+ _("Ignoring long string value label record for "
"numeric variable %s."), var_name);
var = NULL;
}
else if (width != var_get_width (var))
{
sys_warn (r, record->pos + ofs,
- _("Ignoring long string value record for variable %s "
- "because the record's width (%d) does not match the "
+ _("Ignoring long string value label record for variable "
+ "%s because the record's width (%d) does not match the "
"variable's width (%d)."),
var_name, width, var_get_width (var));
var = NULL;
else
{
sys_warn (r, record->pos + ofs,
- _("Ignoring long string value %zu for variable "
- "%s, with width %d, that has bad value "
+ _("Ignoring long string value label %zu for "
+ "variable %s, with width %d, that has bad value "
"width %zu."),
i, var_get_name (var), width, value_length);
skip = true;
}
}
}
+
+static void
+parse_long_string_missing_values (struct sfm_reader *r,
+ const struct sfm_extension_record *record,
+ struct dictionary *dict)
+{
+ const char *dict_encoding = dict_get_encoding (dict);
+ size_t end = record->size * record->count;
+ size_t ofs = 0;
+
+ while (ofs < end)
+ {
+ struct missing_values mv;
+ char *var_name;
+ struct variable *var;
+ int n_missing_values;
+ int var_name_len;
+ size_t i;
+
+ /* Parse variable name length. */
+ check_overflow (r, record, ofs, 4);
+ var_name_len = parse_int (r, record->data, ofs);
+ ofs += 4;
+
+ /* Parse variable name. */
+ check_overflow (r, record, ofs, var_name_len + 1);
+ var_name = recode_string_pool ("UTF-8", dict_encoding,
+ (const char *) record->data + ofs,
+ var_name_len, r->pool);
+ ofs += var_name_len;
+
+ /* Parse number of missing values. */
+ n_missing_values = ((const uint8_t *) record->data)[ofs];
+ if (n_missing_values < 1 || n_missing_values > 3)
+ sys_warn (r, record->pos + ofs,
+ _("Long string missing values record says variable %s "
+ "has %d missing values, but only 1 to 3 missing values "
+ "are allowed."),
+ var_name, n_missing_values);
+ ofs++;
+
+ /* Look up 'var' and validate. */
+ var = dict_lookup_var (dict, var_name);
+ if (var == NULL)
+ sys_warn (r, record->pos + ofs,
+ _("Ignoring long string missing value record for "
+ "unknown variable %s."), var_name);
+ else if (var_is_numeric (var))
+ {
+ sys_warn (r, record->pos + ofs,
+ _("Ignoring long string missing value record for "
+ "numeric variable %s."), var_name);
+ var = NULL;
+ }
+
+ /* Parse values. */
+ mv_init_pool (r->pool, &mv, var ? var_get_width (var) : 8);
+ for (i = 0; i < n_missing_values; i++)
+ {
+ size_t value_length;
+
+ /* Parse value length. */
+ check_overflow (r, record, ofs, 4);
+ value_length = parse_int (r, record->data, ofs);
+ ofs += 4;
+
+ /* Parse value. */
+ check_overflow (r, record, ofs, value_length);
+ if (var != NULL
+ && i < 3
+ && !mv_add_str (&mv, (const uint8_t *) record->data + ofs,
+ value_length))
+ sys_warn (r, record->pos + ofs,
+ _("Ignoring long string missing value %zu for variable "
+ "%s, with width %d, that has bad value width %zu."),
+ i, var_get_name (var), var_get_width (var),
+ value_length);
+ ofs += value_length;
+ }
+ if (var != NULL)
+ var_set_missing_values (var, &mv);
+ }
+}
\f
/* Case reader. */
{
return text->pos;
}
+
+static const char *
+text_get_all (const struct text_record *text)
+{
+ return text->buffer.string;
+}
\f
/* Messages. */
bytes -= chunk;
}
}
+
+/* Returns a malloc()'d copy of S in which all lone CRs and CR LF pairs have
+ been replaced by LFs.
+
+ (A product that identifies itself as VOXCO INTERVIEWER 4.3 produces system
+ files that use CR-only line ends in the file label and extra product
+ info.) */
+static char *
+fix_line_ends (const char *s)
+{
+ char *dst, *d;
+
+ d = dst = xmalloc (strlen (s) + 1);
+ while (*s != '\0')
+ {
+ if (*s == '\r')
+ {
+ s++;
+ if (*s == '\n')
+ s++;
+ *d++ = '\n';
+ }
+ else
+ *d++ = *s++;
+ }
+ *d = '\0';
+
+ return dst;
+}
\f
static const struct casereader_class sys_file_casereader_class =
{