From 8e27b1a0dba7f33b7acb0d8894efe2045b0bb98f Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sat, 29 Jul 2017 16:06:24 -0700 Subject: [PATCH] sys-file-reader: Accept document records with no document lines. This allows PSPP to accept system files that appear to be produced by the ReadStat software at https://github.com/WizardMac/ReadStat. Thanks to ftr for reporting this bug via pspp-users at: http://lists.gnu.org/archive/html/pspp-users/2017-07/msg00002.html --- doc/dev/system-file-format.texi | 5 ++- src/data/sys-file-reader.c | 33 +++++++------- tests/data/sys-file-reader.at | 77 ++++++++++++++++++++------------- 3 files changed, 68 insertions(+), 47 deletions(-) diff --git a/doc/dev/system-file-format.texi b/doc/dev/system-file-format.texi index 7cc0342c4c..fb131f5274 100644 --- a/doc/dev/system-file-format.texi +++ b/doc/dev/system-file-format.texi @@ -605,7 +605,10 @@ char lines[][80]; Record type. Always set to 6. @item int32 n_lines; -Number of lines of documents present. +Number of lines of documents present. This should be greater than +zero, but the system file writer that identifies itself as +@url{https://github.com/WizardMac/ReadStat} writes document records +with zero @code{n_lines}. @item char lines[][80]; Document lines. The number of elements is defined by @code{n_lines}. diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c index d1676564d8..54788deb6e 100644 --- a/src/data/sys-file-reader.c +++ b/src/data/sys-file-reader.c @@ -282,7 +282,7 @@ static bool read_variable_record (struct sfm_reader *, struct sfm_var_record *); static bool read_value_label_record (struct sfm_reader *, struct sfm_value_label_record *); -static struct sfm_document_record *read_document_record (struct sfm_reader *); +static bool read_document_record (struct sfm_reader *); static bool read_extension_record (struct sfm_reader *, int subtype, struct sfm_extension_record **); static bool skip_extension_record (struct sfm_reader *, int subtype); @@ -500,8 +500,7 @@ read_record (struct sfm_reader *r, int type, sys_error (r, r->pos, _("Duplicate type 6 (document) record.")); return false; } - r->document = read_document_record (r); - return r->document != NULL; + return read_document_record (r); case 7: if (!read_int (r, &subtype)) @@ -1229,33 +1228,35 @@ read_value_label_record (struct sfm_reader *r, return true; } -/* Reads a document record from R and returns it. */ -static struct sfm_document_record * +/* Reads a document record from R. Returns true if successful, false on + error. */ +static bool read_document_record (struct sfm_reader *r) { - struct sfm_document_record *record; int n_lines; - - record = pool_malloc (r->pool, sizeof *record); - record->pos = r->pos; - if (!read_int (r, &n_lines)) - return NULL; - if (n_lines <= 0 || n_lines >= INT_MAX / DOC_LINE_LENGTH) + return false; + else if (n_lines == 0) + return true; + else if (n_lines < 0 || n_lines >= INT_MAX / DOC_LINE_LENGTH) { - sys_error (r, record->pos, + sys_error (r, r->pos, _("Number of document lines (%d) " "must be greater than 0 and less than %d."), n_lines, INT_MAX / DOC_LINE_LENGTH); - return NULL; + return false; } + struct sfm_document_record *record; + record = pool_malloc (r->pool, sizeof *record); + record->pos = r->pos; record->n_lines = n_lines; record->documents = pool_malloc (r->pool, DOC_LINE_LENGTH * n_lines); if (!read_bytes (r, record->documents, DOC_LINE_LENGTH * n_lines)) - return NULL; + return false; - return record; + r->document = record; + return true; } static bool diff --git a/tests/data/sys-file-reader.at b/tests/data/sys-file-reader.at index 669ed5b969..be4782284c 100644 --- a/tests/data/sys-file-reader.at +++ b/tests/data/sys-file-reader.at @@ -584,6 +584,53 @@ num1 done AT_CLEANUP +AT_SETUP([empty document record]) +AT_KEYWORDS([sack synthetic system file positive]) +AT_DATA([sys-file.sack], [dnl +dnl File header. +"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file"; +2; dnl Layout code +1; dnl Nominal case size +0; dnl Not compressed +0; dnl Not weighted +1; dnl 1 case. +100.0; dnl Bias. +"01 Jan 11"; "20:53:52"; s64 "PSPP synthetic test file"; +i8 0 *3; + +dnl Numeric variable, no label or missing values. +2; 0; 0; 0; 0x050800 *2; s8 "NUM1"; + +dnl Machine integer info record. +7; 3; 4; 8; 1; 2; 3; -1; 1; 1; ENDIAN; 1252; + +dnl Document record. +6; 0; + +dnl Character encoding record. +7; 20; 1; 12; "windows-1252"; + +dnl Dictionary termination record. +999; 0; + +dnl Data. +1.0; +]) +for variant in be le; do + AT_CHECK([sack --$variant sys-file.sack > sys-file.sav]) + AT_DATA([sys-file.sps], [dnl +GET FILE='sys-file.sav'. +LIST. +]) + AT_CHECK([pspp -o pspp.csv sys-file.sps]) + AT_CHECK([cat pspp.csv], [0], [dnl +Table: Data List +num1 +1 +]) +done +AT_CLEANUP + AT_SETUP([multiple response sets]) AT_KEYWORDS([sack synthetic system file positive]) AT_DATA([sys-file.sack], [dnl @@ -2213,36 +2260,6 @@ done AT_CLEANUP -AT_SETUP([empty document record]) -AT_KEYWORDS([sack synthetic system file negative]) -AT_DATA([sys-file.sack], [dnl -dnl File header. -"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file"; -2; 2; 1; 0; -1; 100.0; "01 Jan 11"; "20:53:52"; s64 ""; i8 0 *3; - -dnl Numeric variable, no label or missing values. -2; 0; 0; 0; 0x050800 *2; s8 "NUM1"; - -dnl Empty document record. -6; >>0<<; - -dnl Dictionary termination record. -999; 0; - -dnl Data. -1.0; -]) -for variant in be le; do - AT_CHECK([sack --$variant sys-file.sack > sys-file.sav]) - AT_DATA([sys-file.sps], [dnl -GET FILE='sys-file.sav'. -]) - AT_CHECK([pspp -O format=csv sys-file.sps], [1], [dnl -error: `sys-file.sav' near offset 0xd4: Number of document lines (0) must be greater than 0 and less than 26843545. -]) -done -AT_CLEANUP - AT_SETUP([extension record too large]) AT_KEYWORDS([sack synthetic system file negative]) AT_DATA([sys-file.sack], [dnl -- 2.30.2