From 553937316cf8ad2d290d0aff14f067aee8e139dc Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sat, 3 Aug 2013 12:20:46 -0700 Subject: [PATCH] sys-file-reader: Handle SPSS 21 change in representation of LOWEST. Thanks to Mindaugus for reporting this issue. Bug #39502. --- doc/dev/system-file-format.texi | 28 ++++++++++++++++++++++------ src/data/sys-file-reader.c | 14 +++++++++++--- 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/doc/dev/system-file-format.texi b/doc/dev/system-file-format.texi index 8315762cef..e98b718883 100644 --- a/doc/dev/system-file-format.texi +++ b/doc/dev/system-file-format.texi @@ -33,12 +33,28 @@ floating-point numbers, and translates as needed. However, only IEEE has actually been observed in system files, and it is likely that other formats are obsolete or were never used. -The PSPP system-missing value is represented by the largest possible -negative number in the floating point format (@code{-DBL_MAX}). Two -other values are important for use as missing values: @code{HIGHEST}, -represented by the largest possible positive number (@code{DBL_MAX}), -and @code{LOWEST}, represented by the second-largest negative number -(in IEEE 754 format, @code{0xffeffffffffffffe}). +System files use a few floating point values for special purposes: + +@table @asis +@item SYSMIS +The system-missing value is represented by the largest possible +negative number in the floating point format (@code{-DBL_MAX}). + +@item HIGHEST +HIGHEST is used as the high end of a missing value range with an +unbounded maximum. It is represented by the largest possible positive +number (@code{DBL_MAX}). + +@item LOWEST +LOWEST is used as the low end of a missing value range with an +unbounded minimum. It was originally represented by the +second-largest negative number (in IEEE 754 format, +@code{0xffeffffffffffffe}). System files written by SPSS 21 and later +instead use the largest negative number (@code{-DBL_MAX}), the same +value as SYSMIS. This does not lead to ambiguity because LOWEST +appears in system files only in missing value ranges, which never +contain SYSMIS. +@end table System files are divided into records, each of which begins with a 4-byte record type, usually regarded as an @code{int32}. diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c index c9d4912973..a03a317a45 100644 --- a/src/data/sys-file-reader.c +++ b/src/data/sys-file-reader.c @@ -1040,6 +1040,11 @@ parse_variable_records (struct sfm_reader *r, struct dictionary *dict, { double low = parse_float (r, rec->missing, 0); double high = parse_float (r, rec->missing, 8); + + /* Deal with SPSS 21 change in representation. */ + if (low == SYSMIS) + low = LOWEST; + mv_add_range (&mv, low, high); ofs += 16; } @@ -1269,11 +1274,14 @@ parse_machine_float_info (struct sfm_reader *r, "instead of %g (%a)."), highest, highest, "HIGHEST", HIGHEST, HIGHEST); - if (lowest != LOWEST) + /* SPSS before version 21 used a unique value just bigger than SYSMIS as + LOWEST. SPSS 21 uses SYSMIS for LOWEST, which is OK because LOWEST only + appears in a context (missing values) where SYSMIS cannot. */ + if (lowest != LOWEST && lowest != SYSMIS) sys_warn (r, record->pos, _("File specifies unexpected value %g (%a) as %s, " - "instead of %g (%a)."), - lowest, lowest, "LOWEST", LOWEST, LOWEST); + "instead of %g (%a) or %g (%a)."), + lowest, lowest, "LOWEST", LOWEST, LOWEST, SYSMIS, SYSMIS); } /* Parses record type 7, subtype 7 or 19. */ -- 2.30.2