int32 var_name_len;
char var_name[];
char n_missing_values;
-long_string_missing_value values[];
+int32 value_len;
+char values[values_len * n_missing_values];
@end example
@table @code
The number of missing values, either 1, 2, or 3. (This is, unusually,
a single byte instead of a 32-bit number.)
-@item long_string_missing_value values[];
-The missing values themselves. This array contains exactly
-@code{n_missing_values} elements, each of which has the following
-substructure:
-
-@example
-int32 value_len;
-char value[];
-@end example
-
-@table @code
@item int32 value_len;
-The length of the missing value string, in bytes. This value should
+The length of each missing value string, in bytes. This value should
be 8, because long string variables are at least 8 bytes wide (by
definition), only the first 8 bytes of a long string variable's
missing values are allowed to be non-spaces, and any spaces within the
first 8 bytes are included in the missing value here.
-@item char value[];
-The missing value string, exactly @code{value_len} bytes, without
-any padding or null terminator.
-@end table
+@item char values[values_len * n_missing_values]
+The missing values themselves, without any padding or null
+terminators.
@end table
+An earlier version of this document stated that @code{value_len} was
+repeated before each of the missing values, so that there was an extra
+@code{int32} value of 8 before each missing value after the first.
+Old versions of PSPP wrote data files in this format. Readers can
+tolerate this mistake, if they wish, by noticing and skipping the
+extra @code{int32} values, which wouldn't ordinarily occur in strings.
+
@node Data File and Variable Attributes Records
@section Data File and Variable Attributes Records
}
static bool
-check_overflow (struct sfm_reader *r,
- const struct sfm_extension_record *record,
- size_t ofs, size_t length)
+check_overflow__ (const struct sfm_extension_record *record,
+ size_t ofs, size_t length)
{
size_t end = record->size * record->count;
if (length >= end || ofs + length > end)
- {
- sys_warn (r, record->pos + end,
- _("Extension record subtype %d ends unexpectedly."),
- record->subtype);
- return false;
- }
+ return false;
return true;
}
+static bool
+check_overflow (struct sfm_reader *r,
+ const struct sfm_extension_record *record,
+ size_t ofs, size_t length)
+{
+ bool ok = check_overflow__ (record, ofs, length);
+ if (!ok)
+ sys_warn (r, record->pos + record->size * record->count,
+ _("Extension record subtype %d ends unexpectedly."),
+ record->subtype);
+ return ok;
+}
+
static void
parse_long_string_value_labels (struct sfm_reader *r,
const struct sfm_extension_record *record,
size_t end = record->size * record->count;
size_t ofs = 0;
+ bool warned = false;
while (ofs < end)
{
struct missing_values mv;
var = NULL;
}
+ /* Parse value length. */
+ if (!check_overflow (r, record, ofs, 4))
+ return;
+ size_t value_length = parse_int (r, record->data, ofs);
+ ofs += 4;
+
/* Parse values. */
mv_init_pool (r->pool, &mv, var ? var_get_width (var) : 8);
for (i = 0; i < n_missing_values; i++)
{
- size_t value_length;
-
- /* Parse value length. */
- if (!check_overflow (r, record, ofs, 4))
- return;
- value_length = parse_int (r, record->data, ofs);
- ofs += 4;
+ /* Tolerate files written by old, buggy versions of PSPP where we
+ believed that the value_length was repeated before each missing
+ value. */
+ if (check_overflow__ (record, ofs, value_length)
+ && parse_int (r, record->data, ofs) == 8)
+ {
+ if (!warned)
+ {
+ sys_warn (r, record->pos + ofs,
+ _("This file has corrupted metadata written by a "
+ "buggy version of PSPP. To fix it, save a new "
+ "copy of the file."));
+ warned = true;
+ }
+ ofs += 4;
+ }
/* Parse value. */
if (!check_overflow (r, record, ofs, value_length))
size += 4;
size += recode_string_len (encoding, "UTF-8", var_get_name (var), -1);
size += 1;
- size += mv_n_values (mv) * (4 + 8);
+ size += 4;
+ size += mv_n_values (mv) * 8;
}
if (size == 0)
return;
n_missing_values = mv_n_values (mv);
write_bytes (w, &n_missing_values, 1);
+ write_int (w, 8);
+
for (j = 0; j < n_missing_values; j++)
{
const union value *value = mv_get_value (mv, j);
-
- write_int (w, 8);
write_bytes (w, value->s, 8);
}
}
COUNT("STR8"); i8 1; 8; "abcdefgh";
dnl Two missing values for STR9.
-COUNT("STR9"); i8 2; 8; "abcdefgh"; 8; "01234567";
+COUNT("STR9"); i8 2; 8; "abcdefgh"; "01234567";
dnl Three missing values for STR9.
-COUNT("STR10"); i8 3; 8; "abcdefgh"; 8; "01234567"; 8; "0 ";
+COUNT("STR10"); i8 3; 8; "abcdefgh"; "01234567"; "0 ";
);
dnl Character encoding record.
dnl Long string variable missing values record.
7; 22; 1; COUNT (
dnl Zero missing values (not allowed) for STR1 .
-COUNT("STR1"); i8 >>0<<;
+COUNT("STR1"); i8 >>0<<; 8;
dnl Four missing values (not allowed) for STR2.
-COUNT("STR2"); i8 4;
-8; "abcdefgh"; 8; "ijklmnop"; 8; "qrstuvwx"; 8; "yz012345";
+COUNT("STR2"); i8 4; 8;
+"abcdefgh"; "ijklmnop"; "qrstuvwx"; "yz012345";
dnl Missing values for unknown variable
COUNT(>>"Nonexistent"<<); i8 1; 8; "abcdefgh";
dnl Too long missing value
COUNT("STR3"); i8 1; >>COUNT("abcdefghijkl")<<;
+
+dnl Buggy way that this was written in old PSPP, with a length
+dnl before each missing value instead of just once.
+COUNT("STR3"); i8 2; 8; "ABCDEFGH"; >>8<<; "IJKLMNOP";
);
dnl Character encoding record.
AT_CHECK([pspp -O format=csv sys-file.sps], [0],
["warning: `sys-file.sav' near offset 0x1f8: Long string missing values record says variable STR1 has 0 missing values, but only 1 to 3 missing values are allowed."
-"warning: `sys-file.sav' near offset 0x201: Long string missing values record says variable STR2 has 4 missing values, but only 1 to 3 missing values are allowed."
+"warning: `sys-file.sav' near offset 0x205: Long string missing values record says variable STR2 has 4 missing values, but only 1 to 3 missing values are allowed."
+
+warning: `sys-file.sav' near offset 0x23a: Ignoring long string missing value record for unknown variable Nonexistent.
-warning: `sys-file.sav' near offset 0x242: Ignoring long string missing value record for unknown variable Nonexistent.
+warning: `sys-file.sav' near offset 0x24f: Ignoring long string missing value record for numeric variable NUM1.
-warning: `sys-file.sav' near offset 0x257: Ignoring long string missing value record for numeric variable NUM1.
+"warning: `sys-file.sav' near offset 0x268: Ignoring long string missing value 0 for variable str3, with width 11, that has bad value width 12."
-"warning: `sys-file.sav' near offset 0x270: Ignoring long string missing value 0 for variable str3, with width 11, that has bad value width 12."
+"warning: `sys-file.sav' near offset 0x289: This file has corrupted metadata written by a buggy version of PSPP. To fix it, save a new copy of the file."
Table: Variables
Name,Position,Measurement Level,Role,Width,Alignment,Print Format,Write Format,Missing Values
num1,1,Unknown,Input,8,Right,F8.0,F8.0,
str1,2,Nominal,Input,9,Left,A9,A9,
str2,3,Nominal,Input,10,Left,A10,A10,"""abcdefgh""; ""ijklmnop""; ""qrstuvwx"""
-str3,4,Nominal,Input,11,Left,A11,A11,
+str3,4,Nominal,Input,11,Left,A11,A11,"""ABCDEFGH""; ""IJKLMNOP"""
])
done
AT_CLEANUP
while (ftello (r->file) - start < size * count)
{
long long posn = ftello (r->file);
- char var_name[ID_MAX_LEN + 1];
- uint8_t n_missing_values;
- int var_name_len;
- int i;
/* Read variable name. */
- var_name_len = read_int (r);
+ int var_name_len = read_int (r);
if (var_name_len > ID_MAX_LEN)
sys_error (r, "Variable name length in long string value label "
"record (%d) exceeds %d-byte limit.",
var_name_len, ID_MAX_LEN);
+ char var_name[ID_MAX_LEN + 1];
read_string (r, var_name, var_name_len + 1);
/* Read number of values. */
+ uint8_t n_missing_values;
read_bytes (r, &n_missing_values, 1);
- printf ("\t%08llx: %s, %d missing values:",
- posn, var_name, n_missing_values);
+ /* Read value length. */
+ int value_length = read_int (r);
+
+ printf ("\t%08llx: %s, %d missing values, each %d bytes:",
+ posn, var_name, n_missing_values, value_length);
/* Read values. */
- for (i = 0; i < n_missing_values; i++)
+ for (int i = 0; i < n_missing_values; i++)
{
- char *value;
- int value_length;
-
posn = ftello (r->file);
/* Read value. */
- value_length = read_int (r);
- value = xmalloc (value_length + 1);
+ char *value = xmalloc (value_length + 1);
read_string (r, value, value_length + 1);
printf (" \"%s\"", value);