From: Ben Pfaff Date: Fri, 20 Sep 2013 06:39:53 +0000 (-0700) Subject: sys-file-reader: Add support for record 7, subtype 10. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=refs%2Fbuilds%2F20130920030502%2Fpspp;p=pspp sys-file-reader: Add support for record 7, subtype 10. Reported by Christoph Ruip with .sav file attached here: http://lists.gnu.org/archive/html/bug-gnu-pspp/2013-09/msg00044.html --- diff --git a/NEWS b/NEWS index 9076779070..e37115b36d 100644 --- a/NEWS +++ b/NEWS @@ -24,6 +24,9 @@ Changes after 0.8.0: - System files written by IBM SPSS 21 are now read without warnings. + - System files written by "VOXCO INTERVIEWER 4.3" are now read + without warnings. + - PSPPIRE should now more gracefully handle syntax files that contain errors. diff --git a/doc/dev/system-file-format.texi b/doc/dev/system-file-format.texi index 9bb6361440..52737fb943 100644 --- a/doc/dev/system-file-format.texi +++ b/doc/dev/system-file-format.texi @@ -96,6 +96,7 @@ Each type of record is described separately below. * Machine Integer Info Record:: * Machine Floating-Point Info Record:: * Multiple Response Sets Records:: +* Extra Product Info Record:: * Variable Display Parameter Record:: * Long Variable Names Record:: * Very Long String Record:: @@ -203,6 +204,10 @@ field is arbitrarily set to @samp{00:00:00}. File label declared by the user, if any (@pxref{FILE LABEL,,,pspp, PSPP Users Guide}). Padded on the right with spaces. +A product that identifies itself as @code{VOXCO INTERVIEWER 4.3} uses +CR-only line ends in this field, rather than the more usual LF-only or +CR LF line ends. + @item char padding[3]; Ignored padding bytes to make the structure a multiple of 32 bits in length. Set to zeros. @@ -780,6 +785,44 @@ $d=E 1 2 34 13 third mdgroup k l m $e=E 11 6 choice 0 n o p @end example +@node Extra Product Info Record +@section Extra Product Info Record + +This optional record appears to contain a text string that describes +the program that wrote the file and the source of the data. (This is +redundant with the file label and product info found in the file +header record.) + +@example +/* @r{Header.} */ +int32 rec_type; +int32 subtype; +int32 size; +int32 count; + +/* @r{Exactly @code{count} bytes of data.} */ +char info[]; +@end example + +@table @code +@item int32 rec_type; +Record type. Always set to 7. + +@item int32 subtype; +Record subtype. Always set to 10. + +@item int32 size; +The size of each element in the @code{info} member. Always set to 1. + +@item int32 count; +The total number of bytes in @code{info}. + +@item char info[]; +A text string. A product that identifies itself as @code{VOXCO +INTERVIEWER 4.3} uses CR-only line ends in this field, rather than the +more usual LF-only or CR LF line ends. +@end table + @node Variable Display Parameter Record @section Variable Display Parameter Record diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c index 4138fa9128..5b654a1c05 100644 --- a/src/data/sys-file-reader.c +++ b/src/data/sys-file-reader.c @@ -72,7 +72,8 @@ enum EXT_DATE = 6, /* DATE. */ EXT_MRSETS = 7, /* Multiple response sets. */ EXT_DATA_ENTRY = 8, /* SPSS Data Entry. */ - /* subtypes 9-10 unknown */ + /* subtype 9 unknown */ + EXT_PRODUCT_INFO = 10, /* Extra product info text. */ EXT_DISPLAY = 11, /* Variable display parameters. */ /* subtype 12 unknown */ EXT_LONG_NAMES = 13, /* Long variable names. */ @@ -201,6 +202,8 @@ static double read_float (struct sfm_reader *); static void read_string (struct sfm_reader *, char *, size_t); static void skip_bytes (struct sfm_reader *, size_t); +static char *fix_line_ends (const char *); + static int parse_int (struct sfm_reader *, const void *data, size_t ofs); static double parse_float (struct sfm_reader *, const void *data, size_t ofs); @@ -246,6 +249,7 @@ static bool text_read_short_name (struct sfm_reader *, struct dictionary *, static const char *text_parse_counted_string (struct sfm_reader *, struct text_record *); static size_t text_pos (const struct text_record *); +static const char *text_get_all (const struct text_record *); static bool close_reader (struct sfm_reader *r); @@ -276,6 +280,9 @@ static void parse_machine_integer_info (struct sfm_reader *, struct sfm_read_info *); static void parse_machine_float_info (struct sfm_reader *, const struct sfm_extension_record *); +static void parse_extra_product_info (struct sfm_reader *, + const struct sfm_extension_record *, + struct sfm_read_info *); static void parse_mrsets (struct sfm_reader *, const struct sfm_extension_record *, struct dictionary *); @@ -309,6 +316,7 @@ sfm_read_info_destroy (struct sfm_read_info *info) free (info->creation_date); free (info->creation_time); free (info->product); + free (info->product_ext); } } @@ -479,6 +487,9 @@ sfm_open_reader (struct file_handle *fh, const char *volatile encoding, if (extensions[EXT_FLOAT] != NULL) parse_machine_float_info (r, extensions[EXT_FLOAT]); + if (extensions[EXT_PRODUCT_INFO] != NULL) + parse_extra_product_info (r, extensions[EXT_PRODUCT_INFO], info); + if (extensions[EXT_FILE_ATTRS] != NULL) parse_data_file_attributes (r, extensions[EXT_FILE_ATTRS], dict); @@ -880,6 +891,7 @@ read_extension_record (struct sfm_reader *r, int subtype) { EXT_INTEGER, 4, 8 }, { EXT_FLOAT, 8, 3 }, { EXT_MRSETS, 1, 0 }, + { EXT_PRODUCT_INFO, 1, 0 }, { EXT_DISPLAY, 4, 0 }, { EXT_LONG_NAMES, 1, 0 }, { EXT_LONG_STRINGS, 1, 0 }, @@ -959,13 +971,16 @@ parse_header (struct sfm_reader *r, const struct sfm_header_record *header, const char *dict_encoding = dict_get_encoding (dict); struct substring product; struct substring label; + char *fixed_label; /* Convert file label to UTF-8 and put it into DICT. */ label = recode_substring_pool ("UTF-8", dict_encoding, ss_cstr (header->file_label), r->pool); ss_trim (&label, ss_cstr (" ")); label.string[label.length] = '\0'; - dict_set_label (dict, label.string); + fixed_label = fix_line_ends (label.string); + dict_set_label (dict, fixed_label); + free (fixed_label); /* Put creation date and time in UTF-8 into INFO. */ info->creation_date = recode_string ("UTF-8", dict_encoding, @@ -1292,6 +1307,19 @@ parse_machine_float_info (struct sfm_reader *r, lowest, lowest, "LOWEST", LOWEST, LOWEST, SYSMIS, SYSMIS); } +/* Parses record type 7, subtype 10. */ +static void +parse_extra_product_info (struct sfm_reader *r, + const struct sfm_extension_record *record, + struct sfm_read_info *info) +{ + struct text_record *text; + + text = open_text_record (r, record, true); + info->product_ext = fix_line_ends (text_get_all (text)); + close_text_record (r, text); +} + /* Parses record type 7, subtype 7 or 19. */ static void parse_mrsets (struct sfm_reader *r, const struct sfm_extension_record *record, @@ -2589,6 +2617,12 @@ text_pos (const struct text_record *text) { return text->pos; } + +static const char * +text_get_all (const struct text_record *text) +{ + return text->buffer.string; +} /* Messages. */ @@ -2741,6 +2775,35 @@ skip_bytes (struct sfm_reader *r, size_t bytes) bytes -= chunk; } } + +/* Returns a malloc()'d copy of S in which all lone CRs and CR LF pairs have + been replaced by LFs. + + (A product that identifies itself as VOXCO INTERVIEWER 4.3 produces system + files that use CR-only line ends in the file label and extra product + info.) */ +static char * +fix_line_ends (const char *s) +{ + char *dst, *d; + + d = dst = xmalloc (strlen (s) + 1); + while (*s != '\0') + { + if (*s == '\r') + { + s++; + if (*s == '\n') + s++; + *d++ = '\n'; + } + else + *d++ = *s++; + } + *d = '\0'; + + return dst; +} static const struct casereader_class sys_file_casereader_class = { diff --git a/src/data/sys-file-reader.h b/src/data/sys-file-reader.h index a8f16e10db..037d33a394 100644 --- a/src/data/sys-file-reader.h +++ b/src/data/sys-file-reader.h @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2009, 2011, 2012 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2009, 2011, 2012, 2013 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -39,6 +39,7 @@ struct sfm_read_info bool compressed; /* 0=no, 1=yes. */ casenumber case_cnt; /* -1 if unknown. */ char *product; /* Product name. */ + char *product_ext; /* Extra product info. */ /* Writer's version number in X.Y.Z format. The version number is not always present; if not, then diff --git a/src/language/dictionary/sys-file-info.c b/src/language/dictionary/sys-file-info.c index 60ce4d4686..3327a2c4ca 100644 --- a/src/language/dictionary/sys-file-info.c +++ b/src/language/dictionary/sys-file-info.c @@ -89,7 +89,7 @@ cmd_sysfile_info (struct lexer *lexer, struct dataset *ds UNUSED) } casereader_destroy (reader); - t = tab_create (2, 11); + t = tab_create (2, 11 + (info.product_ext != NULL)); r = 0; tab_vline (t, TAL_GAP, 1, 0, 8); @@ -108,6 +108,12 @@ cmd_sysfile_info (struct lexer *lexer, struct dataset *ds UNUSED) tab_text_format (t, 1, r++, TAB_LEFT, "%s %s by %s", info.creation_date, info.creation_time, info.product); + if (info.product_ext) + { + tab_text (t, 0, r, TAB_LEFT, _("Product:")); + tab_text (t, 1, r++, TAB_LEFT, info.product_ext); + } + tab_text (t, 0, r, TAB_LEFT, _("Integer Format:")); tab_text (t, 1, r++, TAB_LEFT, info.integer_format == INTEGER_MSB_FIRST ? _("Big Endian") diff --git a/tests/data/sys-file-reader.at b/tests/data/sys-file-reader.at index 1a376bdc63..602ae76c1a 100644 --- a/tests/data/sys-file-reader.at +++ b/tests/data/sys-file-reader.at @@ -640,6 +640,57 @@ Category label source: Value labels of counted value done AT_CLEANUP +dnl Also checks for handling of CR-only line ends in file label and +dnl extra product info. +AT_SETUP([extra product info]) +AT_KEYWORDS([sack synthetic system file positive]) +AT_DATA([sys-file.sack], [dnl +dnl File header. +"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file"; +2; dnl Layout code +4; dnl Nominal case size +0; dnl Not compressed +0; dnl Not weighted +0; dnl No cases. +100.0; dnl Bias. +"01 Jan 11"; "20:53:52"; "PSPP synthetic"; i8 13; s49 "test file"; +i8 0 *3; + +dnl Numeric variables. +2; 0; 0; 0; 0x050800 *2; s8 "A"; +2; 0; 0; 0; 0x050800 *2; s8 "B"; +2; 0; 0; 0; 0x050800 *2; s8 "C"; +2; 0; 0; 0; 0x050800 *2; s8 "D"; + +dnl Extra product info. +7; 10; 1; COUNT ("Extra product info"; i8 13; "another line"; i8 13; "blah"); + +dnl Dictionary termination record. +999; 0; +]) +for variant in \ + "be 0e1cac77501322b012637dcaeb3858ab" \ + "le ecffd25cae41bbc89c29487abe192016" +do + set $variant + AT_CHECK_UNQUOTED([sack --$[1] sys-file.sack > sys-file.sav], [0], [], [$[2] +]) + AT_DATA([sys-file.sps], [dnl +SYSFILE INFO FILE='sys-file.sav'. +]) + AT_CHECK([pspp -o pspp.csv sys-file.sps]) + AT_CHECK([sed 7q pspp.csv], [0], [dnl +File:,sys-file.sav +Label:,"PSPP synthetic +test file" +Created:,01 Jan 11 20:53:52 by $(@%:@) SPSS DATA FILE PSPP synthetic test file +Product:,"Extra product info +another line +blah" +]) +done +AT_CLEANUP + AT_SETUP([variable display parameters, without width]) AT_KEYWORDS([sack synthetic system file positive]) AT_DATA([sys-file.sack], [dnl diff --git a/utilities/pspp-dump-sav.c b/utilities/pspp-dump-sav.c index a6ed8d1567..307add8db3 100644 --- a/utilities/pspp-dump-sav.c +++ b/utilities/pspp-dump-sav.c @@ -65,6 +65,8 @@ static void read_machine_integer_info (struct sfm_reader *, size_t size, size_t count); static void read_machine_float_info (struct sfm_reader *, size_t size, size_t count); +static void read_extra_product_info (struct sfm_reader *, + size_t size, size_t count); static void read_mrsets (struct sfm_reader *, size_t size, size_t count); static void read_display_parameters (struct sfm_reader *, size_t size, size_t count); @@ -94,6 +96,7 @@ static char *text_tokenize (struct text_record *, int delimiter); static bool text_match (struct text_record *text, int c); static const char *text_parse_counted_string (struct text_record *); static size_t text_pos (const struct text_record *); +static const char *text_get_all (const struct text_record *); static void usage (void); static void sys_warn (struct sfm_reader *, const char *, ...) @@ -111,6 +114,8 @@ static void read_string (struct sfm_reader *, char *, size_t); static void skip_bytes (struct sfm_reader *, size_t); static void trim_spaces (char *); +static void print_string (const char *s, size_t len); + int main (int argc, char *argv[]) { @@ -574,6 +579,10 @@ read_extension_record (struct sfm_reader *r) read_mrsets (r, size, count); return; + case 10: + read_extra_product_info (r, size, count); + return; + case 11: read_display_parameters (r, size, count); return; @@ -679,6 +688,20 @@ read_machine_float_info (struct sfm_reader *r, size_t size, size_t count) lowest, lowest, "LOWEST"); } +static void +read_extra_product_info (struct sfm_reader *r, + size_t size, size_t count) +{ + struct text_record *text; + const char *s; + + printf ("%08llx: extra product info\n", (long long int) ftello (r->file)); + text = open_text_record (r, size * count); + s = text_get_all (text); + print_string (s, strlen (s)); + close_text_record (text); +} + /* Read record type 7, subtype 7. */ static void read_mrsets (struct sfm_reader *r, size_t size, size_t count) @@ -1069,23 +1092,7 @@ read_unknown_extension (struct sfm_reader *r, size_t size, size_t count) { buffer = xmalloc (count); read_bytes (r, buffer, count); - if (memchr (buffer, 0, count) == 0) - { - for (i = 0; i < count; i++) - { - unsigned char c = buffer[i]; - - if (c == '\\') - printf ("\\\\"); - else if (c == '\n' || isprint (c)) - putchar (c); - else - printf ("\\%02x", c); - } - putchar ('\n'); - } - else - hex_dump (0, buffer, count); + print_string (CHAR_CAST (char *, buffer), count); free (buffer); } } @@ -1333,6 +1340,12 @@ text_pos (const struct text_record *text) { return text->pos; } + +static const char * +text_get_all (const struct text_record *text) +{ + return text->buffer; +} static void usage (void) @@ -1485,3 +1498,27 @@ trim_spaces (char *s) end--; *end = '\0'; } + +static void +print_string (const char *s, size_t len) +{ + if (memchr (s, 0, len) == 0) + { + size_t i; + + for (i = 0; i < len; i++) + { + unsigned char c = s[i]; + + if (c == '\\') + printf ("\\\\"); + else if (c == '\n' || isprint (c)) + putchar (c); + else + printf ("\\%02x", c); + } + putchar ('\n'); + } + else + hex_dump (0, s, len); +}