#include <setjmp.h>
#include <stdlib.h>
+#include <libpspp/i18n.h>
#include <libpspp/assertion.h>
#include <libpspp/message.h>
#include <libpspp/compiler.h>
static void read_variable_attributes (struct sfm_reader *,
size_t size, size_t count,
struct dictionary *);
+static void read_long_string_value_labels (struct sfm_reader *,
+ size_t size, size_t count,
+ struct dictionary *);
+
+/* Convert all the strings in DICT from the dict encoding to UTF8 */
+static void
+recode_strings (struct dictionary *dict)
+{
+ int i;
+
+ const char *enc = dict_get_encoding (dict);
+
+ if ( NULL == enc)
+ enc = get_default_encoding ();
+
+ for (i = 0 ; i < dict_get_var_cnt (dict); ++i)
+ {
+ /* Convert the long variable name */
+ struct variable *var = dict_get_var (dict, i);
+ const char *native_name = var_get_name (var);
+ char *utf8_name = recode_string (UTF8, enc, native_name, -1);
+ if ( 0 != strcmp (utf8_name, native_name))
+ {
+ if ( NULL == dict_lookup_var (dict, utf8_name))
+ dict_rename_var (dict, var, utf8_name);
+ else
+ msg (MW,
+ _("Recoded variable name duplicates an existing `%s' within system file."), utf8_name);
+ }
+
+ free (utf8_name);
+
+ /* Convert the variable label */
+ if (var_has_label (var))
+ {
+ char *utf8_label = recode_string (UTF8, enc, var_get_label (var), -1);
+ var_set_label (var, utf8_label);
+ free (utf8_label);
+ }
+
+ if (var_has_value_labels (var))
+ {
+ const struct val_lab *vl = NULL;
+ const struct val_labs *vlabs = var_get_value_labels (var);
+
+ for (vl = val_labs_first (vlabs); vl != NULL; vl = val_labs_next (vlabs, vl))
+ {
+ const union value *val = val_lab_get_value (vl);
+ const char *label = val_lab_get_label (vl);
+ char *new_label = NULL;
+
+ new_label = recode_string (UTF8, enc, label, -1);
+
+ var_replace_value_label (var, val, new_label);
+ free (new_label);
+ }
+ }
+ }
+}
/* Opens the system file designated by file handle FH for
reading. Reads the system file's dictionary into *DICT.
r->has_long_var_names = true;
}
+ recode_strings (*dict);
+
/* Read record 999 data, which is just filler. */
read_int (r);
read_bytes (r, raw_bias, sizeof raw_bias);
if (float_identify (100.0, raw_bias, sizeof raw_bias, &r->float_format) == 0)
{
- sys_warn (r, _("Compression bias is not the usual "
- "value of 100, or system file uses unrecognized "
- "floating-point format."));
+ uint8_t zero_bias[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+
+ if (memcmp (raw_bias, zero_bias, 8))
+ sys_warn (r, _("Compression bias is not the usual "
+ "value of 100, or system file uses unrecognized "
+ "floating-point format."));
+ else
+ {
+ /* Some software is known to write all-zeros to this
+ field. Such software also writes floating-point
+ numbers in the format that we expect by default
+ (it seems that all software most likely does, in
+ reality), so don't warn in this case. */
+ }
+
if (r->integer_format == INTEGER_MSB_FIRST)
r->float_format = FLOAT_IEEE_DOUBLE_BE;
else
/* Create variable. */
if (width < 0 || width > 255)
- sys_error (r, _("Bad variable width %d."), width);
+ sys_error (r, _("Bad width %d for variable %s."), width, name);
var = dict_create_var (dict, name, width);
if (var == NULL)
sys_error (r,
struct missing_values mv;
int i;
- mv_init (&mv, var_get_width (var));
+ mv_init_pool (r->pool, &mv, var_get_width (var));
if (var_is_numeric (var))
{
if (missing_value_code < -3 || missing_value_code > 3
}
else
{
+ int mv_width = MAX (width, 8);
+ union value value;
+
if (missing_value_code < 1 || missing_value_code > 3)
sys_error (r, _("String missing value indicator field is not "
"0, 1, 2, or 3."));
- if (var_is_long_string (var))
- sys_warn (r, _("Ignoring missing values on long string variable "
- "%s, which PSPP does not yet support."), name);
+
+ value_init (&value, mv_width);
+ value_set_missing (&value, mv_width);
for (i = 0; i < missing_value_code; i++)
{
- char string[9];
- read_string (r, string, sizeof string);
- mv_add_str (&mv, string);
+ uint8_t *s = value_str_rw (&value, mv_width);
+ read_bytes (r, s, 8);
+ mv_add_str (&mv, s);
}
+ value_destroy (&value, mv_width);
}
- if (!var_is_long_string (var))
- var_set_missing_values (var, &mv);
+ var_set_missing_values (var, &mv);
}
/* Set formats. */
/* New in SPSS 16. Contains a single string that describes
the character encoding, e.g. "windows-1252". */
{
- char *encoding = xcalloc (size, count + 1);
+ char *encoding = pool_calloc (r->pool, size, count + 1);
read_string (r, encoding, count + 1);
dict_set_encoding (dict, encoding);
return;
case 21:
/* New in SPSS 16. Encodes value labels for long string
variables. */
- sys_warn (r, _("Ignoring value labels for long string variables, "
- "which PSPP does not yet support."));
- break;
+ read_long_string_value_labels (r, size, count, dict);
+ return;
default:
sys_warn (r, _("Unrecognized record type 7, subtype %d. Please send a copy of this file, and the syntax which created it to %s"),
NOT_REACHED ();
if (integer_representation != expected_integer_format)
{
- static const char *const endian[] = {N_("little-endian"), N_("big-endian")};
+ static const char *const endian[] = {N_("Little Endian"), N_("Big Endian")};
sys_warn (r, _("Integer format indicated by system file (%s) "
"differs from expected (%s)."),
gettext (endian[integer_representation == 1]),
size, count);
if (sysmis != SYSMIS)
- sys_warn (r, _("File specifies unexpected value %g as SYSMIS."), sysmis);
+ sys_warn (r, _("File specifies unexpected value %g as %s."),
+ sysmis, "SYSMIS");
+
if (highest != HIGHEST)
- sys_warn (r, _("File specifies unexpected value %g as HIGHEST."), highest);
+ sys_warn (r, _("File specifies unexpected value %g as %s."),
+ highest, "HIGHEST");
+
if (lowest != LOWEST)
- sys_warn (r, _("File specifies unexpected value %g as LOWEST."), lowest);
+ sys_warn (r, _("File specifies unexpected value %g as %s."),
+ lowest, "LOWEST");
}
/* Read record type 7, subtype 11, which specifies how variables
struct label
{
- char raw_value[8]; /* Value as uninterpreted bytes. */
+ uint8_t raw_value[8]; /* Value as uninterpreted bytes. */
union value value; /* Value. */
char *label; /* Null-terminated label string. */
};
for (i = 0; i < var_cnt; i++)
{
var[i] = lookup_var_by_value_idx (r, var_by_value_idx, read_int (r));
- if (var_is_long_string (var[i]))
- sys_error (r, _("Value labels are not allowed on long string "
- "variables (%s)."), var_get_name (var[i]));
+ if (var_get_width (var[i]) > 8)
+ sys_error (r, _("Value labels may not be added to long string "
+ "variables (e.g. %s) using records types 3 and 4."),
+ var_get_name (var[i]));
max_width = MAX (max_width, var_get_width (var[i]));
}
value_init_pool (subpool, &label->value, max_width);
if (var_is_alpha (var[0]))
- buf_copy_rpad (value_str_rw (&label->value, max_width), max_width,
+ u8_buf_copy_rpad (value_str_rw (&label->value, max_width), max_width,
label->raw_value, sizeof label->raw_value, ' ');
else
label->value.f = float_get_double (r->float_format, label->raw_value);
close_text_record (r, text);
}
+static void
+skip_long_string_value_labels (struct sfm_reader *r, size_t n_labels)
+{
+ size_t i;
+
+ for (i = 0; i < n_labels; i++)
+ {
+ size_t value_length, label_length;
+
+ value_length = read_int (r);
+ skip_bytes (r, value_length);
+ label_length = read_int (r);
+ skip_bytes (r, label_length);
+ }
+}
+
+static void
+read_long_string_value_labels (struct sfm_reader *r,
+ size_t size, size_t count,
+ struct dictionary *d)
+{
+ const off_t start = ftello (r->file);
+ while (ftello (r->file) - start < size * count)
+ {
+ char var_name[VAR_NAME_LEN + 1];
+ size_t n_labels, i;
+ struct variable *v;
+ union value value;
+ int var_name_len;
+ int width;
+
+ /* Read header. */
+ var_name_len = read_int (r);
+ if (var_name_len > VAR_NAME_LEN)
+ sys_error (r, _("Variable name length in long string value label "
+ "record (%d) exceeds %d-byte limit."),
+ var_name_len, VAR_NAME_LEN);
+ read_string (r, var_name, var_name_len + 1);
+ width = read_int (r);
+ n_labels = read_int (r);
+
+ v = dict_lookup_var (d, var_name);
+ if (v == NULL)
+ {
+ sys_warn (r, _("Ignoring long string value record for "
+ "unknown variable %s."), var_name);
+ skip_long_string_value_labels (r, n_labels);
+ continue;
+ }
+ if (var_is_numeric (v))
+ {
+ sys_warn (r, _("Ignoring long string value record for "
+ "numeric variable %s."), var_name);
+ skip_long_string_value_labels (r, n_labels);
+ continue;
+ }
+ if (width != var_get_width (v))
+ {
+ sys_warn (r, _("Ignoring long string value record for variable %s "
+ "because the record's width (%d) does not match the "
+ "variable's width (%d)"),
+ var_name, width, var_get_width (v));
+ skip_long_string_value_labels (r, n_labels);
+ continue;
+ }
+
+ /* Read values. */
+ value_init_pool (r->pool, &value, width);
+ for (i = 0; i < n_labels; i++)
+ {
+ size_t value_length, label_length;
+ char label[256];
+ bool skip = false;
+
+ /* Read value. */
+ value_length = read_int (r);
+ if (value_length == width)
+ read_bytes (r, value_str_rw (&value, width), width);
+ else
+ {
+ sys_warn (r, _("Ignoring long string value %zu for variable %s, "
+ "with width %d, that has bad value width %zu."),
+ i, var_get_name (v), width, value_length);
+ skip_bytes (r, value_length);
+ skip = true;
+ }
+
+ /* Read label. */
+ label_length = read_int (r);
+ read_string (r, label, MIN (sizeof label, label_length + 1));
+ if (label_length >= sizeof label)
+ {
+ /* Skip and silently ignore label text after the
+ first 255 bytes. The maximum documented length
+ of a label is 120 bytes so this is more than
+ generous. */
+ skip_bytes (r, sizeof label - (label_length + 1));
+ }
+
+ if (!skip && !var_add_value_label (v, &value, label))
+ sys_warn (r, _("Duplicate value label for \"%.*s\" on %s."),
+ width, value_str (&value, width), var_get_name (v));
+ }
+ }
+}
+
+
/* Reads record type 7, subtype 18, which lists custom
attributes on individual variables. */
static void
static void read_error (struct casereader *, const struct sfm_reader *);
static bool read_case_number (struct sfm_reader *, double *);
-static bool read_case_string (struct sfm_reader *, char *, size_t);
+static bool read_case_string (struct sfm_reader *, uint8_t *, size_t);
static int read_opcode (struct sfm_reader *);
static bool read_compressed_number (struct sfm_reader *, double *);
-static bool read_compressed_string (struct sfm_reader *, char *);
-static bool read_whole_strings (struct sfm_reader *, char *, size_t);
+static bool read_compressed_string (struct sfm_reader *, uint8_t *);
+static bool read_whole_strings (struct sfm_reader *, uint8_t *, size_t);
static bool skip_whole_strings (struct sfm_reader *, size_t);
/* Reads and returns one case from READER's file. Returns a null
}
else
{
- char *s = value_str_rw (v, sv->var_width);
+ uint8_t *s = value_str_rw (v, sv->var_width);
if (!read_case_string (r, s + sv->offset, sv->segment_width))
goto eof;
if (!skip_whole_strings (r, ROUND_DOWN (sv->padding, 8)))
Returns true if successful, false if end of file is
reached immediately. */
static bool
-read_case_string (struct sfm_reader *r, char *s, size_t length)
+read_case_string (struct sfm_reader *r, uint8_t *s, size_t length)
{
size_t whole = ROUND_DOWN (length, 8);
size_t partial = length % 8;
if (partial)
{
- char bounce[8];
+ uint8_t bounce[8];
if (!read_whole_strings (r, bounce, sizeof bounce))
{
if (whole)
Returns true if successful, false if end of file is
reached immediately. */
static bool
-read_compressed_string (struct sfm_reader *r, char *dst)
+read_compressed_string (struct sfm_reader *r, uint8_t *dst)
{
switch (read_opcode (r))
{
Returns true if successful, false if end of file is
reached immediately. */
static bool
-read_whole_strings (struct sfm_reader *r, char *s, size_t length)
+read_whole_strings (struct sfm_reader *r, uint8_t *s, size_t length)
{
assert (length % 8 == 0);
if (!r->compressed)
static bool
skip_whole_strings (struct sfm_reader *r, size_t length)
{
- char buffer[1024];
+ uint8_t buffer[1024];
assert (length < sizeof buffer);
return read_whole_strings (r, buffer, length);
}