From: Ben Pfaff Date: Sat, 20 Feb 2016 18:12:48 +0000 (-0800) Subject: sys-file-reader: Read system files with multiple subtype-18 extensions. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?p=pspp;a=commitdiff_plain;h=e4f726ad61233531b155b62f0c99ff667b2c2e11 sys-file-reader: Read system files with multiple subtype-18 extensions. These are written by "Stata 14.1/-savespss- 1.77 by S.Radyakin". I think it's really a bug in that writer, but if SPSS accepts these files then I guess PSPP should too. Reported by news . Bug #47089. --- diff --git a/doc/dev/system-file-format.texi b/doc/dev/system-file-format.texi index ff4c603451..7cc0342c4c 100644 --- a/doc/dev/system-file-format.texi +++ b/doc/dev/system-file-format.texi @@ -161,6 +161,11 @@ Document record, if present. Extension (type 7) records, in ascending numerical order of their subtypes. +System files written by SPSS include at most one of each kind of +extension record. This is generally true of system files written by +other software as well, with known exceptions noted below in the +individual sections about each type of record. + @item Dictionary termination record. @@ -1360,7 +1365,7 @@ The total number of bytes in @code{attributes}. @item char attributes[]; The attributes, in a text-based format. -In record type 17, this field contains a single attribute set. An +In record subtype 17, this field contains a single attribute set. An attribute set is a sequence of one or more attributes concatenated together. Each attribute consists of a name, which has the same syntax as a variable name, followed by, inside parentheses, a sequence @@ -1372,13 +1377,17 @@ way to embed a line feed in a value. There is no distinction between an attribute with a single value and an attribute array with one element. -In record type 18, this field contains a sequence of one or more +In record subtype 18, this field contains a sequence of one or more variable attribute sets. If more than one variable attribute set is present, each one after the first is delimited from the previous by @code{/}. Each variable attribute set consists of a long variable name, followed by @code{:}, followed by an attribute set with the same -syntax as on record type 17. +syntax as on record subtype 17. + +System files written by @code{Stata 14.1/-savespss- 1.77 by +S.Radyakin} may include multiple records with subtype 18, one per +variable that has variable attributes. The total length is @code{count} bytes. @end table diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c index 8a747b7a57..57e1dc822c 100644 --- a/src/data/sys-file-reader.c +++ b/src/data/sys-file-reader.c @@ -45,6 +45,7 @@ #include "libpspp/assertion.h" #include "libpspp/compiler.h" #include "libpspp/i18n.h" +#include "libpspp/ll.h" #include "libpspp/message.h" #include "libpspp/misc.h" #include "libpspp/pool.h" @@ -158,6 +159,7 @@ struct sfm_mrset struct sfm_extension_record { + struct ll ll; /* In struct sfm_reader 'var_attrs' list. */ int subtype; /* Record subtype. */ off_t pos; /* Starting offset in file. */ unsigned int size; /* Size of data elements. */ @@ -184,6 +186,7 @@ struct sfm_reader struct sfm_mrset *mrsets; size_t n_mrsets; struct sfm_extension_record *extensions[32]; + struct ll_list var_attrs; /* Contains "struct sfm_extension_record"s. */ /* File state. */ struct file_handle *fh; /* File handle. */ @@ -401,6 +404,7 @@ sfm_open (struct file_handle *fh) pool_register (r->pool, free, r); r->fh = fh_ref (fh); r->opcode_idx = sizeof r->opcodes; + ll_init (&r->var_attrs); /* TRANSLATORS: this fragment will be interpolated into messages in fh_lock() that identify types of files. */ @@ -513,6 +517,17 @@ read_record (struct sfm_reader *r, int type, subtype, PACKAGE_BUGREPORT, PACKAGE_STRING); return skip_extension_record (r, subtype); } + else if (subtype == 18) + { + /* System files written by "Stata 14.1/-savespss- 1.77 by S.Radyakin" + put each variable attribute into a separate record with subtype + 18. I'm surprised that SPSS puts up with this. */ + struct sfm_extension_record *ext; + bool ok = read_extension_record (r, subtype, &ext); + if (ok) + ll_push_tail (&r->var_attrs, &ext->ll); + return ok; + } else if (r->extensions[subtype] != NULL) { sys_warn (r, r->pos, @@ -719,7 +734,6 @@ sfm_get_strings (const struct any_reader *r_, struct pool *pool, mrset_idx); } - /* */ /* data file attributes */ /* variable attributes */ /* long var map */ @@ -830,14 +844,15 @@ sfm_decode (struct any_reader *r_, const char *encoding, parse_long_var_name_map (r, r->extensions[EXT_LONG_NAMES], dict); /* The following records use long names, so they need to follow renaming. */ - if (r->extensions[EXT_VAR_ATTRS] != NULL) + if (!ll_is_empty (&r->var_attrs)) { - parse_variable_attributes (r, r->extensions[EXT_VAR_ATTRS], dict); + struct sfm_extension_record *ext; + ll_for_each (ext, struct sfm_extension_record, ll, &r->var_attrs) + parse_variable_attributes (r, ext, dict); /* Roles use the $@Role attribute. */ assign_variable_roles (r, dict); } - if (r->extensions[EXT_LONG_LABELS] != NULL) parse_long_string_value_labels (r, r->extensions[EXT_LONG_LABELS], dict); if (r->extensions[EXT_LONG_MISSING] != NULL) diff --git a/tests/data/sys-file-reader.at b/tests/data/sys-file-reader.at index 7bd0ddf644..a00ef2b46c 100644 --- a/tests/data/sys-file-reader.at +++ b/tests/data/sys-file-reader.at @@ -1096,7 +1096,7 @@ AT_DATA([sys-file.sack], [dnl dnl File header. "$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file"; 2; dnl Layout code -2; dnl Nominal case size +3; dnl Nominal case size 0; dnl Not compressed 0; dnl Not weighted 0; dnl 1 case. @@ -1107,6 +1107,7 @@ i8 0 *3; dnl Variables. 2; 0; 0; 0; 0x050800 *2; s8 "FIRSTVAR"; 2; 0; 0; 0; 0x050800 *2; s8 "SECONDVA"; +2; 0; 0; 0; 0x050800 *2; s8 "THIRDVAR"; dnl Machine integer info record. 7; 3; 4; 8; 1; 2; 3; -1; 1; 1; ENDIAN; 1252; @@ -1115,6 +1116,7 @@ dnl Long variable names. 7; 13; 1; COUNT ( "FIRSTVAR=FirstVariable"; i8 9; "SECONDVA=S"; i8 233; "condVariable"; i8 9; +"THIRDVAR=ThirdVariable"; i8 9 ); dnl Data file attributes record. @@ -1133,6 +1135,10 @@ dnl Variable attributes record. "xyzzy('quux'"; i8 10; ")"; ); +dnl Another variable attributes record. +dnl Only system files written by "Stata 14.1/-savespss- 1.77 by S.Radyakin" +dnl include multiple variable attributes records. +7; 18; 1; COUNT ("ThirdVariable:fizz('buzz'"; i8 10; ")";); dnl Character encoding record. 7; 20; 1; 12; "windows-1252"; @@ -1156,6 +1162,8 @@ adèle[2],34 bert,123" SécondVariable,"Attribute,Value xyzzy,quux" +ThirdVariable,"Attribute,Value +fizz,buzz" Table: Custom data file attributes. Attribute,Value @@ -1183,6 +1191,11 @@ Role: Input Attribute,Value xyzzy,quux",2 +ThirdVariable,"Format: F8.0 +Role: Input + +Attribute,Value +fizz,buzz",3 Table: Custom data file attributes. Attribute,Value