/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-9, 2000, 2006, 2007 Free Software Foundation, Inc.
+ Copyright (C) 1997-9, 2000, 2006, 2007, 2009 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
double bias; /* Compression bias, usually 100.0. */
uint8_t opcodes[8]; /* Current block of opcodes. */
size_t opcode_idx; /* Next opcode to interpret, 8 if none left. */
+ bool corruption_warning; /* Warned about possible corruption? */
};
static const struct casereader_class sys_file_casereader_class;
r->oct_cnt = 0;
r->has_long_var_names = false;
r->opcode_idx = sizeof r->opcodes;
+ r->corruption_warning = false;
/* TRANSLATORS: this fragment will be interpolated into
messages in fh_lock() that identify types of files. */
read_bytes (r, raw_bias, sizeof raw_bias);
if (float_identify (100.0, raw_bias, sizeof raw_bias, &r->float_format) == 0)
{
- sys_warn (r, _("Compression bias (%g) is not the usual "
- "value of 100, or system file uses unrecognized "
- "floating-point format."),
- r->bias);
+ uint8_t zero_bias[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+
+ if (memcmp (raw_bias, zero_bias, 8))
+ sys_warn (r, _("Compression bias is not the usual "
+ "value of 100, or system file uses unrecognized "
+ "floating-point format."));
+ else
+ {
+ /* Some software is known to write all-zeros to this
+ field. Such software also writes floating-point
+ numbers in the format that we expect by default
+ (it seems that all software most likely does, in
+ reality), so don't warn in this case. */
+ }
+
if (r->integer_format == INTEGER_MSB_FIRST)
r->float_format = FLOAT_IEEE_DOUBLE_BE;
else
for (i = 0; i < missing_value_code; i++)
mv_add_num (&mv, read_float (r));
}
- else if (var_get_width (var) <= MAX_SHORT_STRING)
+ else
{
if (missing_value_code < 1 || missing_value_code > 3)
sys_error (r, _("String missing value indicator field is not "
"0, 1, 2, or 3."));
+ if (var_is_long_string (var))
+ sys_warn (r, _("Ignoring missing values on long string variable "
+ "%s, which PSPP does not yet support."), name);
for (i = 0; i < missing_value_code; i++)
{
char string[9];
mv_add_str (&mv, string);
}
}
- else
- sys_error (r, _("Long string variable %s may not have missing "
- "values."), name);
- var_set_missing_values (var, &mv);
+ if (!var_is_long_string (var))
+ var_set_missing_values (var, &mv);
}
/* Set formats. */
break;
case 7:
- /* Unknown purpose. */
+ /* Used by the MRSETS command. */
+ break;
+
+ case 8:
+ /* Used by the SPSS Data Entry software. */
break;
case 11:
SPSS 14. */
break;
+ case 20:
+ /* New in SPSS 16. Contains a single string that describes
+ the character encoding, e.g. "windows-1252". */
+ break;
+
+ case 21:
+ /* New in SPSS 16. Encodes value labels for long string
+ variables. */
+ sys_warn (r, _("Ignoring value labels for long string variables, "
+ "which PSPP does not yet support."));
+ break;
+
default:
- sys_warn (r, _("Unrecognized record type 7, subtype %d."), subtype);
+ sys_warn (r, _("Unrecognized record type 7, subtype %d. Please send a copy of this file, and the syntax which created it to %s"),
+ subtype, PACKAGE_BUGREPORT);
break;
}
NOT_REACHED ();
if (integer_representation != expected_integer_format)
{
- static const char *endian[] = {N_("little-endian"), N_("big-endian")};
+ static const char *const endian[] = {N_("little-endian"), N_("big-endian")};
sys_warn (r, _("Integer format indicated by system file (%s) "
"differs from expected (%s)."),
gettext (endian[integer_representation == 1]),
break;
case 254:
- sys_error (r, _("Compressed data is corrupt."));
+ float_convert (r->float_format, " ", FLOAT_NATIVE_DOUBLE, d);
+ if (!r->corruption_warning)
+ {
+ r->corruption_warning = true;
+ sys_warn (r, _("Possible compressed data corruption: "
+ "compressed spaces appear in numeric field."));
+ }
+ break;
case 255:
*d = SYSMIS;
static bool
read_compressed_string (struct sfm_reader *r, char *dst)
{
- switch (read_opcode (r))
+ int opcode = read_opcode (r);
+ switch (opcode)
{
case -1:
case 252:
break;
default:
- sys_error (r, _("Compressed data is corrupt."));
+ {
+ double value = opcode - r->bias;
+ float_convert (FLOAT_NATIVE_DOUBLE, &value, r->float_format, dst);
+ if (value == 0.0)
+ {
+ /* This has actually been seen "in the wild". The submitter of the
+ file that showed that the contents decoded as spaces, but they
+ were at the end of the field so it's possible that the null
+ bytes just acted as null terminators. */
+ }
+ else if (!r->corruption_warning)
+ {
+ r->corruption_warning = true;
+ sys_warn (r, _("Possible compressed data corruption: "
+ "string contains compressed integer (opcode %d)"),
+ opcode);
+ }
+ }
+ break;
}
return true;