1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2007 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <data/sys-file-reader.h>
20 #include <data/sys-file-private.h>
28 #include <libpspp/assertion.h>
29 #include <libpspp/message.h>
30 #include <libpspp/compiler.h>
31 #include <libpspp/misc.h>
32 #include <libpspp/pool.h>
33 #include <libpspp/str.h>
34 #include <libpspp/hash.h>
35 #include <libpspp/array.h>
37 #include <data/case.h>
38 #include <data/casereader-provider.h>
39 #include <data/casereader.h>
40 #include <data/dictionary.h>
41 #include <data/file-handle-def.h>
42 #include <data/file-name.h>
43 #include <data/format.h>
44 #include <data/missing-values.h>
45 #include <data/value-labels.h>
46 #include <data/variable.h>
47 #include <data/value.h>
52 #include "unlocked-io.h"
57 #define _(msgid) gettext (msgid)
58 #define N_(msgid) (msgid)
60 /* System file reader. */
63 /* Resource tracking. */
64 struct pool *pool; /* All system file state. */
65 jmp_buf bail_out; /* longjmp() target for error handling. */
68 struct file_handle *fh; /* File handle. */
69 FILE *file; /* File stream. */
70 bool error; /* I/O or corruption error? */
71 size_t value_cnt; /* Number of "union value"s in struct case. */
74 enum integer_format integer_format; /* On-disk integer format. */
75 enum float_format float_format; /* On-disk floating point format. */
76 int oct_cnt; /* Number of 8-byte units per case. */
77 struct sfm_var *sfm_vars; /* Variables. */
78 size_t sfm_var_cnt; /* Number of variables. */
79 casenumber case_cnt; /* Number of cases */
80 bool has_long_var_names; /* File has a long variable name map */
83 bool compressed; /* File is compressed? */
84 double bias; /* Compression bias, usually 100.0. */
85 uint8_t opcodes[8]; /* Current block of opcodes. */
86 size_t opcode_idx; /* Next opcode to interpret, 8 if none left. */
89 static struct casereader_class sys_file_casereader_class;
91 static bool close_reader (struct sfm_reader *);
93 static struct variable **make_var_by_value_idx (struct sfm_reader *,
95 static struct variable *lookup_var_by_value_idx (struct sfm_reader *,
99 static void sys_warn (struct sfm_reader *, const char *, ...)
100 PRINTF_FORMAT (2, 3);
102 static void sys_error (struct sfm_reader *, const char *, ...)
106 static void read_bytes (struct sfm_reader *, void *, size_t);
107 static bool try_read_bytes (struct sfm_reader *, void *, size_t);
108 static int read_int (struct sfm_reader *);
109 static double read_float (struct sfm_reader *);
110 static void read_string (struct sfm_reader *, char *, size_t);
111 static void skip_bytes (struct sfm_reader *, size_t);
113 static struct variable_to_value_map *open_variable_to_value_map (
114 struct sfm_reader *, size_t size);
115 static void close_variable_to_value_map (struct sfm_reader *r,
116 struct variable_to_value_map *);
117 static bool read_variable_to_value_map (struct sfm_reader *,
119 struct variable_to_value_map *,
120 struct variable **var, char **value,
123 static bool close_reader (struct sfm_reader *r);
125 /* Dictionary reader. */
133 static void read_header (struct sfm_reader *, struct dictionary *,
134 int *weight_idx, int *claimed_oct_cnt,
135 struct sfm_read_info *);
136 static void read_variable_record (struct sfm_reader *, struct dictionary *,
137 int *format_warning_cnt);
138 static void parse_format_spec (struct sfm_reader *, unsigned int,
139 enum which_format, struct variable *,
140 int *format_warning_cnt);
141 static void setup_weight (struct sfm_reader *, int weight_idx,
142 struct variable **var_by_value_idx,
143 struct dictionary *);
144 static void read_documents (struct sfm_reader *, struct dictionary *);
145 static void read_value_labels (struct sfm_reader *, struct dictionary *,
146 struct variable **var_by_value_idx);
148 static void read_extension_record (struct sfm_reader *, struct dictionary *,
149 struct sfm_read_info *);
150 static void read_machine_integer_info (struct sfm_reader *,
151 size_t size, size_t count,
152 struct sfm_read_info *);
153 static void read_machine_float_info (struct sfm_reader *,
154 size_t size, size_t count);
155 static void read_display_parameters (struct sfm_reader *,
156 size_t size, size_t count,
157 struct dictionary *);
158 static void read_long_var_name_map (struct sfm_reader *,
159 size_t size, size_t count,
160 struct dictionary *);
161 static void read_long_string_map (struct sfm_reader *,
162 size_t size, size_t count,
163 struct dictionary *);
166 /* Opens the system file designated by file handle FH for
167 reading. Reads the system file's dictionary into *DICT.
168 If INFO is non-null, then it receives additional info about the
171 sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
172 struct sfm_read_info *volatile info)
174 struct sfm_reader *volatile r = NULL;
175 struct variable **var_by_value_idx;
176 struct sfm_read_info local_info;
177 int format_warning_cnt = 0;
182 if (!fh_open (fh, FH_REF_FILE, "system file", "rs"))
185 *dict = dict_create ();
187 /* Create and initialize reader. */
188 r = pool_create_container (struct sfm_reader, pool);
190 r->file = fn_open (fh_get_file_name (fh), "rb");
193 r->has_long_var_names = false;
194 r->opcode_idx = sizeof r->opcodes;
196 /* Initialize info. */
199 memset (info, 0, sizeof *info);
201 if (setjmp (r->bail_out))
204 dict_destroy (*dict);
211 msg (ME, _("Error opening \"%s\" for reading as a system file: %s."),
212 fh_get_file_name (r->fh), strerror (errno));
213 longjmp (r->bail_out, 1);
217 read_header (r, *dict, &weight_idx, &claimed_oct_cnt, info);
219 /* Read all the variable definition records. */
220 rec_type = read_int (r);
221 while (rec_type == 2)
223 read_variable_record (r, *dict, &format_warning_cnt);
224 rec_type = read_int (r);
227 /* Figure out the case format. */
228 var_by_value_idx = make_var_by_value_idx (r, *dict);
229 setup_weight (r, weight_idx, var_by_value_idx, *dict);
231 /* Read all the rest of the dictionary records. */
232 while (rec_type != 999)
237 read_value_labels (r, *dict, var_by_value_idx);
241 sys_error (r, _("Misplaced type 4 record."));
244 read_documents (r, *dict);
248 read_extension_record (r, *dict, info);
252 sys_error (r, _("Unrecognized record type %d."), rec_type);
254 rec_type = read_int (r);
258 if ( ! r->has_long_var_names )
261 for (i = 0; i < dict_get_var_cnt (*dict); i++)
263 struct variable *var = dict_get_var (*dict, i);
264 char short_name [SHORT_NAME_LEN + 1];
265 char long_name [SHORT_NAME_LEN + 1];
267 strcpy (short_name, var_get_name (var));
269 strcpy (long_name, short_name);
270 str_lowercase (long_name);
272 /* Set long name. Renaming a variable may clear the short
273 name, but we want to retain it, so re-set it
275 dict_rename_var (*dict, var, long_name);
276 var_set_short_name (var, 0, short_name);
279 r->has_long_var_names = true;
282 /* Read record 999 data, which is just filler. */
285 /* Warn if the actual amount of data per case differs from the
286 amount that the header claims. SPSS version 13 gets this
287 wrong when very long strings are involved, so don't warn in
289 if (claimed_oct_cnt != -1 && claimed_oct_cnt != r->oct_cnt
290 && info->version_major != 13)
291 sys_warn (r, _("File header claims %d variable positions but "
292 "%d were read from file."),
293 claimed_oct_cnt, r->oct_cnt);
295 /* Create an index of dictionary variable widths for
296 sfm_read_case to use. We cannot use the `struct variable's
297 from the dictionary we created, because the caller owns the
298 dictionary and may destroy or modify its variables. */
299 sfm_dictionary_to_sfm_vars (*dict, &r->sfm_vars, &r->sfm_var_cnt);
300 pool_register (r->pool, free, r->sfm_vars);
302 pool_free (r->pool, var_by_value_idx);
303 r->value_cnt = dict_get_next_value_idx (*dict);
304 return casereader_create_sequential
306 r->case_cnt == -1 ? CASENUMBER_MAX: r->case_cnt,
307 &sys_file_casereader_class, r);
310 /* Closes a system file after we're done with it.
311 Returns true if an I/O error has occurred on READER, false
314 close_reader (struct sfm_reader *r)
323 if (fn_close (fh_get_file_name (r->fh), r->file) == EOF)
325 msg (ME, _("Error closing system file \"%s\": %s."),
326 fh_get_file_name (r->fh), strerror (errno));
333 fh_close (r->fh, "system file", "rs");
336 pool_destroy (r->pool);
341 /* Destroys READER. */
343 sys_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
345 struct sfm_reader *r = r_;
349 /* Returns true if FILE is an SPSS system file,
352 sfm_detect (FILE *file)
356 if (fread (rec_type, 4, 1, file) != 1)
360 return !strcmp ("$FL2", rec_type);
363 /* Reads the global header of the system file.
364 Sets DICT's file label to the system file's label.
365 Sets *WEIGHT_IDX to 0 if the system file is unweighted,
366 or to the value index of the weight variable otherwise.
367 Sets *CLAIMED_OCT_CNT to the number of "octs" (8-byte units)
368 per case that the file claims to have (although it is not
370 Initializes INFO with header information. */
372 read_header (struct sfm_reader *r, struct dictionary *dict,
373 int *weight_idx, int *claimed_oct_cnt,
374 struct sfm_read_info *info)
377 char eye_catcher[61];
378 uint8_t raw_layout_code[4];
380 char creation_date[10];
381 char creation_time[9];
383 struct substring file_label_ss;
384 struct substring product;
386 read_string (r, rec_type, sizeof rec_type);
387 read_string (r, eye_catcher, sizeof eye_catcher);
389 if (strcmp ("$FL2", rec_type) != 0)
390 sys_error (r, _("This is not an SPSS system file."));
392 /* Identify integer format. */
393 read_bytes (r, raw_layout_code, sizeof raw_layout_code);
394 if ((!integer_identify (2, raw_layout_code, sizeof raw_layout_code,
396 && !integer_identify (3, raw_layout_code, sizeof raw_layout_code,
398 || (r->integer_format != INTEGER_MSB_FIRST
399 && r->integer_format != INTEGER_LSB_FIRST))
400 sys_error (r, _("This is not an SPSS system file."));
402 *claimed_oct_cnt = read_int (r);
403 if (*claimed_oct_cnt < 0 || *claimed_oct_cnt > INT_MAX / 16)
404 *claimed_oct_cnt = -1;
406 r->compressed = read_int (r) != 0;
408 *weight_idx = read_int (r);
410 r->case_cnt = read_int (r);
411 if ( r->case_cnt > INT_MAX / 2)
415 /* Identify floating-point format and obtain compression bias. */
416 read_bytes (r, raw_bias, sizeof raw_bias);
417 if (float_identify (100.0, raw_bias, sizeof raw_bias, &r->float_format) == 0)
419 sys_warn (r, _("Compression bias (%g) is not the usual "
420 "value of 100, or system file uses unrecognized "
421 "floating-point format."),
423 if (r->integer_format == INTEGER_MSB_FIRST)
424 r->float_format = FLOAT_IEEE_DOUBLE_BE;
426 r->float_format = FLOAT_IEEE_DOUBLE_LE;
428 float_convert (r->float_format, raw_bias, FLOAT_NATIVE_DOUBLE, &r->bias);
430 read_string (r, creation_date, sizeof creation_date);
431 read_string (r, creation_time, sizeof creation_time);
432 read_string (r, file_label, sizeof file_label);
435 file_label_ss = ss_cstr (file_label);
436 ss_trim (&file_label_ss, ss_cstr (" "));
437 if (!ss_is_empty (file_label_ss))
439 ss_data (file_label_ss)[ss_length (file_label_ss)] = '\0';
440 dict_set_label (dict, ss_data (file_label_ss));
443 strcpy (info->creation_date, creation_date);
444 strcpy (info->creation_time, creation_time);
445 info->integer_format = r->integer_format;
446 info->float_format = r->float_format;
447 info->compressed = r->compressed;
448 info->case_cnt = r->case_cnt;
450 product = ss_cstr (eye_catcher);
451 ss_match_string (&product, ss_cstr ("@(#) SPSS DATA FILE"));
452 ss_trim (&product, ss_cstr (" "));
453 str_copy_buf_trunc (info->product, sizeof info->product,
454 ss_data (product), ss_length (product));
457 /* Reads a variable (type 2) record from R and adds the
458 corresponding variable to DICT.
459 Also skips past additional variable records for long string
462 read_variable_record (struct sfm_reader *r, struct dictionary *dict,
463 int *format_warning_cnt)
466 int has_variable_label;
467 int missing_value_code;
472 struct variable *var;
475 width = read_int (r);
476 has_variable_label = read_int (r);
477 missing_value_code = read_int (r);
478 print_format = read_int (r);
479 write_format = read_int (r);
480 read_string (r, name, sizeof name);
481 name[strcspn (name, " ")] = '\0';
483 /* Check variable name. */
484 if (name[0] == '$' || name[0] == '#')
485 sys_error (r, "Variable name begins with invalid character `%c'.",
487 if (!var_is_plausible_name (name, false))
488 sys_error (r, _("Invalid variable name `%s'."), name);
490 /* Create variable. */
491 if (width < 0 || width > 255)
492 sys_error (r, _("Bad variable width %d."), width);
493 var = dict_create_var (dict, name, width);
496 _("Duplicate variable name `%s' within system file."),
499 /* Set the short name the same as the long name. */
500 var_set_short_name (var, 0, var_get_name (var));
502 /* Get variable label, if any. */
503 if (has_variable_label != 0 && has_variable_label != 1)
504 sys_error (r, _("Variable label indicator field is not 0 or 1."));
505 if (has_variable_label == 1)
511 if (len >= sizeof label)
512 sys_error (r, _("Variable %s has label of invalid length %u."),
513 name, (unsigned int) len);
514 read_string (r, label, len + 1);
515 var_set_label (var, label);
517 skip_bytes (r, ROUND_UP (len, 4) - len);
520 /* Set missing values. */
521 if (missing_value_code != 0)
523 struct missing_values mv;
526 mv_init (&mv, var_get_width (var));
527 if (var_is_numeric (var))
529 if (missing_value_code < -3 || missing_value_code > 3
530 || missing_value_code == -1)
531 sys_error (r, _("Numeric missing value indicator field is not "
532 "-3, -2, 0, 1, 2, or 3."));
533 if (missing_value_code < 0)
535 double low = read_float (r);
536 double high = read_float (r);
537 mv_add_num_range (&mv, low, high);
538 missing_value_code = -missing_value_code - 2;
540 for (i = 0; i < missing_value_code; i++)
541 mv_add_num (&mv, read_float (r));
543 else if (var_get_width (var) <= MAX_SHORT_STRING)
545 if (missing_value_code < 1 || missing_value_code > 3)
546 sys_error (r, _("String missing value indicator field is not "
548 for (i = 0; i < missing_value_code; i++)
551 read_string (r, string, sizeof string);
552 mv_add_str (&mv, string);
556 sys_error (r, _("Long string variable %s may not have missing "
558 var_set_missing_values (var, &mv);
562 parse_format_spec (r, print_format, PRINT_FORMAT, var, format_warning_cnt);
563 parse_format_spec (r, write_format, WRITE_FORMAT, var, format_warning_cnt);
565 /* Account for values.
566 Skip long string continuation records, if any. */
567 nv = width == 0 ? 1 : DIV_RND_UP (width, 8);
573 for (i = 1; i < nv; i++)
575 /* Check for record type 2 and width -1. */
576 if (read_int (r) != 2 || read_int (r) != -1)
577 sys_error (r, _("Missing string continuation record."));
579 /* Skip and ignore remaining continuation data. */
580 has_variable_label = read_int (r);
581 missing_value_code = read_int (r);
582 print_format = read_int (r);
583 write_format = read_int (r);
584 read_string (r, name, sizeof name);
586 /* Variable label fields on continuation records have
587 been spotted in system files created by "SPSS Power
588 Macintosh Release 6.1". */
589 if (has_variable_label)
590 skip_bytes (r, ROUND_UP (read_int (r), 4));
595 /* Translates the format spec from sysfile format to internal
598 parse_format_spec (struct sfm_reader *r, unsigned int s,
599 enum which_format which, struct variable *v,
600 int *format_warning_cnt)
602 const int max_format_warnings = 8;
604 uint8_t raw_type = s >> 16;
610 if (!fmt_from_io (raw_type, &f.type))
611 sys_error (r, _("Unknown variable format %d."), (int) raw_type);
616 ok = fmt_check_output (&f) && fmt_check_width_compat (&f, var_get_width (v));
621 if (which == PRINT_FORMAT)
622 var_set_print_format (v, &f);
624 var_set_write_format (v, &f);
626 else if (*++format_warning_cnt <= max_format_warnings)
628 char fmt_string[FMT_STRING_LEN_MAX + 1];
629 sys_warn (r, _("%s variable %s has invalid %s format %s."),
630 var_is_numeric (v) ? _("Numeric") : _("String"),
632 which == PRINT_FORMAT ? _("print") : _("write"),
633 fmt_to_string (&f, fmt_string));
635 if (*format_warning_cnt == max_format_warnings)
636 sys_warn (r, _("Suppressing further invalid format warnings."));
640 /* Sets the weighting variable in DICT to the variable
641 corresponding to the given 1-based VALUE_IDX, if VALUE_IDX is
644 setup_weight (struct sfm_reader *r, int weight_idx,
645 struct variable **var_by_value_idx, struct dictionary *dict)
649 struct variable *weight_var
650 = lookup_var_by_value_idx (r, var_by_value_idx, weight_idx);
651 if (var_is_numeric (weight_var))
652 dict_set_weight (dict, weight_var);
654 sys_error (r, _("Weighting variable must be numeric."));
658 /* Reads a document record, type 6, from system file R, and sets up
659 the documents and n_documents fields in the associated
662 read_documents (struct sfm_reader *r, struct dictionary *dict)
667 if (dict_get_documents (dict) != NULL)
668 sys_error (r, _("Multiple type 6 (document) records."));
670 line_cnt = read_int (r);
672 sys_error (r, _("Number of document lines (%d) "
673 "must be greater than 0."), line_cnt);
675 documents = pool_nmalloc (r->pool, line_cnt + 1, DOC_LINE_LENGTH);
676 read_string (r, documents, DOC_LINE_LENGTH * line_cnt + 1);
677 if (strlen (documents) == DOC_LINE_LENGTH * line_cnt)
678 dict_set_documents (dict, documents);
680 sys_error (r, _("Document line contains null byte."));
681 pool_free (r->pool, documents);
684 /* Read a type 7 extension record. */
686 read_extension_record (struct sfm_reader *r, struct dictionary *dict,
687 struct sfm_read_info *info)
689 int subtype = read_int (r);
690 size_t size = read_int (r);
691 size_t count = read_int (r);
692 size_t bytes = size * count;
694 /* Check that SIZE * COUNT + 1 doesn't overflow. Adding 1
695 allows an extra byte for a null terminator, used by some
696 extension processing routines. */
697 if (size != 0 && size_overflow_p (xsum (1, xtimes (count, size))))
698 sys_error (r, "Record type 7 subtype %d too large.", subtype);
703 read_machine_integer_info (r, size, count, info);
707 read_machine_float_info (r, size, count);
711 /* Variable sets information. We don't use these yet.
712 They only apply to GUIs; see VARSETS on the APPLY
713 DICTIONARY command in SPSS documentation. */
717 /* DATE variable information. We don't use it yet, but we
722 /* Unknown purpose. */
726 read_display_parameters (r, size, count, dict);
730 read_long_var_name_map (r, size, count, dict);
734 read_long_string_map (r, size, count, dict);
738 /* New in SPSS v14? Unknown purpose. */
742 /* Text field that defines variable attributes. New in
747 sys_warn (r, _("Unrecognized record type 7, subtype %d."), subtype);
751 skip_bytes (r, bytes);
754 /* Read record type 7, subtype 3. */
756 read_machine_integer_info (struct sfm_reader *r, size_t size, size_t count,
757 struct sfm_read_info *info)
759 int version_major = read_int (r);
760 int version_minor = read_int (r);
761 int version_revision = read_int (r);
762 int machine_code UNUSED = read_int (r);
763 int float_representation = read_int (r);
764 int compression_code UNUSED = read_int (r);
765 int integer_representation = read_int (r);
766 int character_code UNUSED = read_int (r);
768 int expected_float_format;
769 int expected_integer_format;
771 if (size != 4 || count != 8)
772 sys_error (r, _("Bad size (%u) or count (%u) field on record type 7, "
774 (unsigned int) size, (unsigned int) count);
776 /* Save version info. */
777 info->version_major = version_major;
778 info->version_minor = version_minor;
779 info->version_revision = version_revision;
781 /* Check floating point format. */
782 if (r->float_format == FLOAT_IEEE_DOUBLE_BE
783 || r->float_format == FLOAT_IEEE_DOUBLE_LE)
784 expected_float_format = 1;
785 else if (r->float_format == FLOAT_Z_LONG)
786 expected_float_format = 2;
787 else if (r->float_format == FLOAT_VAX_G || r->float_format == FLOAT_VAX_D)
788 expected_float_format = 3;
791 if (float_representation != expected_float_format)
792 sys_error (r, _("Floating-point representation indicated by "
793 "system file (%d) differs from expected (%d)."),
794 r->float_format, expected_float_format);
796 /* Check integer format. */
797 if (r->integer_format == INTEGER_MSB_FIRST)
798 expected_integer_format = 1;
799 else if (r->integer_format == INTEGER_LSB_FIRST)
800 expected_integer_format = 2;
803 if (integer_representation != expected_integer_format)
805 static const char *endian[] = {N_("little-endian"), N_("big-endian")};
806 sys_warn (r, _("Integer format indicated by system file (%s) "
807 "differs from expected (%s)."),
808 gettext (endian[integer_representation == 1]),
809 gettext (endian[expected_integer_format == 1]));
813 /* Read record type 7, subtype 4. */
815 read_machine_float_info (struct sfm_reader *r, size_t size, size_t count)
817 double sysmis = read_float (r);
818 double highest = read_float (r);
819 double lowest = read_float (r);
821 if (size != 8 || count != 3)
822 sys_error (r, _("Bad size (%u) or count (%u) on extension 4."),
823 (unsigned int) size, (unsigned int) count);
825 if (sysmis != SYSMIS)
826 sys_warn (r, _("File specifies unexpected value %g as SYSMIS."), sysmis);
827 if (highest != HIGHEST)
828 sys_warn (r, _("File specifies unexpected value %g as HIGHEST."), highest);
829 if (lowest != LOWEST)
830 sys_warn (r, _("File specifies unexpected value %g as LOWEST."), lowest);
833 /* Read record type 7, subtype 11, which specifies how variables
834 should be displayed in GUI environments. */
836 read_display_parameters (struct sfm_reader *r, size_t size, size_t count,
837 struct dictionary *dict)
839 const size_t n_vars = count / 3 ;
843 if (count % 3 || n_vars != dict_get_var_cnt (dict))
844 sys_error (r, _("Bad size (%u) or count (%u) on extension 11."),
845 (unsigned int) size, (unsigned int) count);
847 for (i = 0; i < n_vars; ++i)
849 struct variable *v = dict_get_var (dict, i);
850 int measure = read_int (r);
851 int width = read_int (r);
852 int align = read_int (r);
854 /* SPSS 14 sometimes seems to set string variables' measure
856 if (0 == measure && var_is_alpha (v))
859 /* Older versions (SPSS 9.0) sometimes set the display width
860 to zero. This causes confusion especially in the GUI */
864 if (measure < 1 || measure > 3 || align < 0 || align > 2)
867 sys_warn (r, _("Invalid variable display parameters. "
868 "Default parameters substituted."));
873 var_set_measure (v, (measure == 1 ? MEASURE_NOMINAL
874 : measure == 2 ? MEASURE_ORDINAL
876 var_set_display_width (v, width);
877 var_set_alignment (v, (align == 0 ? ALIGN_LEFT
878 : align == 1 ? ALIGN_RIGHT
883 /* Reads record type 7, subtype 13, which gives the long name
884 that corresponds to each short name. Modifies variable names
885 in DICT accordingly. */
887 read_long_var_name_map (struct sfm_reader *r, size_t size, size_t count,
888 struct dictionary *dict)
890 struct variable_to_value_map *map;
891 struct variable *var;
895 map = open_variable_to_value_map (r, size * count);
896 while (read_variable_to_value_map (r, dict, map, &var, &long_name,
900 size_t short_name_cnt;
903 /* Validate long name. */
904 if (!var_is_valid_name (long_name, false))
906 sys_warn (r, _("Long variable mapping from %s to invalid "
907 "variable name `%s'."),
908 var_get_name (var), long_name);
912 /* Identify any duplicates. */
913 if (strcasecmp (var_get_short_name (var, 0), long_name)
914 && dict_lookup_var (dict, long_name) != NULL)
916 sys_warn (r, _("Duplicate long variable name `%s' "
917 "within system file."), long_name);
921 /* Renaming a variable may clear its short names, but we
922 want to retain them, so we save them and re-set them
924 short_name_cnt = var_get_short_name_cnt (var);
925 short_names = xnmalloc (short_name_cnt, sizeof *short_names);
926 for (i = 0; i < short_name_cnt; i++)
928 const char *s = var_get_short_name (var, i);
929 short_names[i] = s != NULL ? xstrdup (s) : NULL;
933 dict_rename_var (dict, var, long_name);
935 /* Restore short names. */
936 for (i = 0; i < short_name_cnt; i++)
938 var_set_short_name (var, i, short_names[i]);
939 free (short_names[i]);
943 close_variable_to_value_map (r, map);
944 r->has_long_var_names = true;
947 /* Reads record type 7, subtype 14, which gives the real length
948 of each very long string. Rearranges DICT accordingly. */
950 read_long_string_map (struct sfm_reader *r, size_t size, size_t count,
951 struct dictionary *dict)
953 struct variable_to_value_map *map;
954 struct variable *var;
958 map = open_variable_to_value_map (r, size * count);
959 while (read_variable_to_value_map (r, dict, map, &var, &length_s,
962 size_t idx = var_get_dict_index (var);
968 length = strtol (length_s, NULL, 10);
969 if (length < 1 || length > MAX_STRING)
971 sys_warn (r, _("%s listed as string of invalid length %s "
972 "in very length string record."),
973 var_get_name (var), length_s);
977 /* Check segments. */
978 segment_cnt = sfm_width_to_segments (length);
979 if (segment_cnt == 1)
981 sys_warn (r, _("%s listed in very long string record with width %s, "
982 "which requires only one segment."),
983 var_get_name (var), length_s);
986 if (idx + segment_cnt > dict_get_var_cnt (dict))
987 sys_error (r, _("Very long string %s overflows dictionary."),
990 /* Get the short names from the segments and check their
992 for (i = 0; i < segment_cnt; i++)
994 struct variable *seg = dict_get_var (dict, idx + i);
995 int alloc_width = sfm_segment_alloc_width (length, i);
996 int width = var_get_width (seg);
999 var_set_short_name (var, i, var_get_short_name (seg, 0));
1000 if (ROUND_UP (width, 8) != ROUND_UP (alloc_width, 8))
1001 sys_error (r, _("Very long string with width %ld has segment %d "
1002 "of width %d (expected %d)"),
1003 length, i, width, alloc_width);
1005 dict_delete_consecutive_vars (dict, idx + 1, segment_cnt - 1);
1006 var_set_width (var, length);
1008 close_variable_to_value_map (r, map);
1009 dict_compact_values (dict);
1012 /* Reads value labels from sysfile H and inserts them into the
1013 associated dictionary. */
1015 read_value_labels (struct sfm_reader *r,
1016 struct dictionary *dict, struct variable **var_by_value_idx)
1018 struct pool *subpool;
1022 char raw_value[8]; /* Value as uninterpreted bytes. */
1023 union value value; /* Value. */
1024 char *label; /* Null-terminated label string. */
1027 struct label *labels = NULL;
1028 int label_cnt; /* Number of labels. */
1030 struct variable **var = NULL; /* Associated variables. */
1031 int var_cnt; /* Number of associated variables. */
1035 subpool = pool_create_subpool (r->pool);
1037 /* Read the type 3 record and record its contents. We can't do
1038 much with the data yet because we don't know whether it is
1039 of numeric or string type. */
1041 /* Read number of labels. */
1042 label_cnt = read_int (r);
1044 if (size_overflow_p (xtimes (label_cnt, sizeof *labels)))
1046 sys_warn (r, _("Invalid number of labels: %d. Ignoring labels."),
1051 /* Read each value/label tuple into labels[]. */
1052 labels = pool_nalloc (subpool, label_cnt, sizeof *labels);
1053 for (i = 0; i < label_cnt; i++)
1055 struct label *label = labels + i;
1056 unsigned char label_len;
1060 read_bytes (r, label->raw_value, sizeof label->raw_value);
1062 /* Read label length. */
1063 read_bytes (r, &label_len, sizeof label_len);
1064 padded_len = ROUND_UP (label_len + 1, 8);
1066 /* Read label, padding. */
1067 label->label = pool_alloc (subpool, padded_len + 1);
1068 read_bytes (r, label->label, padded_len - 1);
1069 label->label[label_len] = 0;
1072 /* Now, read the type 4 record that has the list of variables
1073 to which the value labels are to be applied. */
1075 /* Read record type of type 4 record. */
1076 if (read_int (r) != 4)
1077 sys_error (r, _("Variable index record (type 4) does not immediately "
1078 "follow value label record (type 3) as it should."));
1080 /* Read number of variables associated with value label from type 4
1082 var_cnt = read_int (r);
1083 if (var_cnt < 1 || var_cnt > dict_get_var_cnt (dict))
1084 sys_error (r, _("Number of variables associated with a value label (%d) "
1085 "is not between 1 and the number of variables (%u)."),
1086 var_cnt, (unsigned int) dict_get_var_cnt (dict));
1088 /* Read the list of variables. */
1089 var = pool_nalloc (subpool, var_cnt, sizeof *var);
1090 for (i = 0; i < var_cnt; i++)
1092 var[i] = lookup_var_by_value_idx (r, var_by_value_idx, read_int (r));
1093 if (var_is_long_string (var[i]))
1094 sys_error (r, _("Value labels are not allowed on long string "
1095 "variables (%s)."), var_get_name (var[i]));
1098 /* Type check the variables. */
1099 for (i = 1; i < var_cnt; i++)
1100 if (var_get_type (var[i]) != var_get_type (var[0]))
1101 sys_error (r, _("Variables associated with value label are not all of "
1102 "identical type. Variable %s is %s, but variable "
1104 var_get_name (var[0]),
1105 var_is_numeric (var[0]) ? _("numeric") : _("string"),
1106 var_get_name (var[i]),
1107 var_is_numeric (var[i]) ? _("numeric") : _("string"));
1109 /* Fill in labels[].value, now that we know the desired type. */
1110 for (i = 0; i < label_cnt; i++)
1112 struct label *label = labels + i;
1114 if (var_is_alpha (var[0]))
1115 buf_copy_rpad (label->value.s, sizeof label->value.s,
1116 label->raw_value, sizeof label->raw_value);
1118 label->value.f = float_get_double (r->float_format, label->raw_value);
1121 /* Assign the `value_label's to each variable. */
1122 for (i = 0; i < var_cnt; i++)
1124 struct variable *v = var[i];
1127 /* Add each label to the variable. */
1128 for (j = 0; j < label_cnt; j++)
1130 struct label *label = &labels[j];
1131 if (!var_add_value_label (v, &label->value, label->label))
1133 if (var_is_numeric (var[0]))
1134 sys_warn (r, _("Duplicate value label for %g on %s."),
1135 label->value.f, var_get_name (v));
1137 sys_warn (r, _("Duplicate value label for \"%.*s\" on %s."),
1138 var_get_width (v), label->value.s,
1144 pool_destroy (subpool);
1149 static void partial_record (struct sfm_reader *r)
1152 static void read_error (struct casereader *, const struct sfm_reader *);
1154 static bool read_case_number (struct sfm_reader *, double *);
1155 static bool read_case_string (struct sfm_reader *, char *, size_t);
1156 static int read_opcode (struct sfm_reader *);
1157 static bool read_compressed_number (struct sfm_reader *, double *);
1158 static bool read_compressed_string (struct sfm_reader *, char *);
1159 static bool read_whole_strings (struct sfm_reader *, char *, size_t);
1160 static bool skip_whole_strings (struct sfm_reader *, size_t);
1162 /* Reads one case from READER's file into C. Returns true only
1165 sys_file_casereader_read (struct casereader *reader, void *r_,
1168 struct sfm_reader *r = r_;
1174 case_create (c, r->value_cnt);
1175 if (setjmp (r->bail_out))
1177 casereader_force_error (reader);
1182 for (i = 0; i < r->sfm_var_cnt; i++)
1184 struct sfm_var *sv = &r->sfm_vars[i];
1185 union value *v = case_data_rw_idx (c, sv->case_index);
1189 if (!read_case_number (r, &v->f))
1194 if (!read_case_string (r, v->s + sv->offset, sv->width))
1196 if (!skip_whole_strings (r, ROUND_DOWN (sv->padding, 8)))
1206 if (r->case_cnt != -1)
1207 read_error (reader, r);
1211 /* Issues an error that R ends in a partial record. */
1213 partial_record (struct sfm_reader *r)
1215 sys_error (r, _("File ends in partial case."));
1218 /* Issues an error that an unspecified error occurred SFM, and
1221 read_error (struct casereader *r, const struct sfm_reader *sfm)
1223 msg (ME, _("Error reading case from file %s."), fh_get_name (sfm->fh));
1224 casereader_force_error (r);
1227 /* Reads a number from R and stores its value in *D.
1228 If R is compressed, reads a compressed number;
1229 otherwise, reads a number in the regular way.
1230 Returns true if successful, false if end of file is
1231 reached immediately. */
1233 read_case_number (struct sfm_reader *r, double *d)
1238 if (!try_read_bytes (r, number, sizeof number))
1240 float_convert (r->float_format, number, FLOAT_NATIVE_DOUBLE, d);
1244 return read_compressed_number (r, d);
1247 /* Reads LENGTH string bytes from R into S.
1248 Always reads a multiple of 8 bytes; if LENGTH is not a
1249 multiple of 8, then extra bytes are read and discarded without
1251 Reads compressed strings if S is compressed.
1252 Returns true if successful, false if end of file is
1253 reached immediately. */
1255 read_case_string (struct sfm_reader *r, char *s, size_t length)
1257 size_t whole = ROUND_DOWN (length, 8);
1258 size_t partial = length % 8;
1262 if (!read_whole_strings (r, s, whole))
1269 if (!read_whole_strings (r, bounce, sizeof bounce))
1275 memcpy (s + whole, bounce, partial);
1281 /* Reads and returns the next compression opcode from R. */
1283 read_opcode (struct sfm_reader *r)
1285 assert (r->compressed);
1289 if (r->opcode_idx >= sizeof r->opcodes)
1291 if (!try_read_bytes (r, r->opcodes, sizeof r->opcodes))
1295 opcode = r->opcodes[r->opcode_idx++];
1302 /* Reads a compressed number from R and stores its value in D.
1303 Returns true if successful, false if end of file is
1304 reached immediately. */
1306 read_compressed_number (struct sfm_reader *r, double *d)
1308 int opcode = read_opcode (r);
1316 *d = read_float (r);
1320 sys_error (r, _("Compressed data is corrupt."));
1327 *d = opcode - r->bias;
1334 /* Reads a compressed 8-byte string segment from R and stores it
1336 Returns true if successful, false if end of file is
1337 reached immediately. */
1339 read_compressed_string (struct sfm_reader *r, char *dst)
1341 switch (read_opcode (r))
1348 read_bytes (r, dst, 8);
1352 memset (dst, ' ', 8);
1356 sys_error (r, _("Compressed data is corrupt."));
1362 /* Reads LENGTH string bytes from R into S.
1363 LENGTH must be a multiple of 8.
1364 Reads compressed strings if S is compressed.
1365 Returns true if successful, false if end of file is
1366 reached immediately. */
1368 read_whole_strings (struct sfm_reader *r, char *s, size_t length)
1370 assert (length % 8 == 0);
1372 return try_read_bytes (r, s, length);
1376 for (ofs = 0; ofs < length; ofs += 8)
1377 if (!read_compressed_string (r, s + ofs))
1387 /* Skips LENGTH string bytes from R.
1388 LENGTH must be a multiple of 8.
1389 (LENGTH is also limited to 1024, but that's only because the
1390 current caller never needs more than that many bytes.)
1391 Returns true if successful, false if end of file is
1392 reached immediately. */
1394 skip_whole_strings (struct sfm_reader *r, size_t length)
1397 assert (length < sizeof buffer);
1398 return read_whole_strings (r, buffer, length);
1401 /* Creates and returns a table that can be used for translating a value
1402 index into a case to a "struct variable *" for DICT. Multiple
1403 system file fields reference variables this way.
1405 This table must be created before processing the very long
1406 string extension record, because that record causes some
1407 values to be deleted from the case and the dictionary to be
1409 static struct variable **
1410 make_var_by_value_idx (struct sfm_reader *r, struct dictionary *dict)
1412 struct variable **var_by_value_idx;
1416 var_by_value_idx = pool_nmalloc (r->pool,
1417 r->oct_cnt, sizeof *var_by_value_idx);
1418 for (i = 0; i < dict_get_var_cnt (dict); i++)
1420 struct variable *v = dict_get_var (dict, i);
1421 int nv = var_is_numeric (v) ? 1 : DIV_RND_UP (var_get_width (v), 8);
1424 var_by_value_idx[value_idx++] = v;
1425 for (j = 1; j < nv; j++)
1426 var_by_value_idx[value_idx++] = NULL;
1428 assert (value_idx == r->oct_cnt);
1430 return var_by_value_idx;
1433 /* Returns the "struct variable" corresponding to the given
1434 1-basd VALUE_IDX in VAR_BY_VALUE_IDX. Verifies that the index
1436 static struct variable *
1437 lookup_var_by_value_idx (struct sfm_reader *r,
1438 struct variable **var_by_value_idx, int value_idx)
1440 struct variable *var;
1442 if (value_idx < 1 || value_idx > r->oct_cnt)
1443 sys_error (r, _("Variable index %d not in valid range 1...%d."),
1444 value_idx, r->oct_cnt);
1446 var = var_by_value_idx[value_idx - 1];
1448 sys_error (r, _("Variable index %d refers to long string "
1455 /* Returns the variable in D with the given SHORT_NAME,
1456 or a null pointer if there is none. */
1457 static struct variable *
1458 lookup_var_by_short_name (struct dictionary *d, const char *short_name)
1460 struct variable *var;
1464 /* First try looking up by full name. This often succeeds. */
1465 var = dict_lookup_var (d, short_name);
1466 if (var != NULL && !strcasecmp (var_get_short_name (var, 0), short_name))
1469 /* Iterate through the whole dictionary as a fallback. */
1470 var_cnt = dict_get_var_cnt (d);
1471 for (i = 0; i < var_cnt; i++)
1473 var = dict_get_var (d, i);
1474 if (!strcasecmp (var_get_short_name (var, 0), short_name))
1481 /* Helpers for reading records that contain "variable=value"
1485 struct variable_to_value_map
1487 struct substring buffer; /* Record contents. */
1488 size_t pos; /* Current position in buffer. */
1491 /* Reads SIZE bytes into a "variable=value" map for R,
1492 and returns the map. */
1493 static struct variable_to_value_map *
1494 open_variable_to_value_map (struct sfm_reader *r, size_t size)
1496 struct variable_to_value_map *map = pool_alloc (r->pool, sizeof *map);
1497 char *buffer = pool_malloc (r->pool, size + 1);
1498 read_bytes (r, buffer, size);
1499 map->buffer = ss_buffer (buffer, size);
1504 /* Closes MAP and frees its storage.
1505 Not really needed, because the pool will free the map anyway,
1506 but can be used to free it earlier. */
1508 close_variable_to_value_map (struct sfm_reader *r,
1509 struct variable_to_value_map *map)
1511 pool_free (r->pool, ss_data (map->buffer));
1514 /* Reads the next variable=value pair from MAP.
1515 Looks up the variable in DICT and stores it into *VAR.
1516 Stores a null-terminated value into *VALUE. */
1518 read_variable_to_value_map (struct sfm_reader *r, struct dictionary *dict,
1519 struct variable_to_value_map *map,
1520 struct variable **var, char **value,
1523 int max_warnings = 5;
1527 struct substring short_name_ss, value_ss;
1529 if (!ss_tokenize (map->buffer, ss_cstr ("="), &map->pos, &short_name_ss)
1530 || !ss_tokenize (map->buffer, ss_buffer ("\t\0", 2), &map->pos,
1533 if (*warning_cnt > max_warnings)
1534 sys_warn (r, _("Suppressed %d additional variable map warnings."),
1535 *warning_cnt - max_warnings);
1539 map->pos += ss_span (ss_substr (map->buffer, map->pos, SIZE_MAX),
1540 ss_buffer ("\t\0", 2));
1542 ss_data (short_name_ss)[ss_length (short_name_ss)] = '\0';
1543 *var = lookup_var_by_short_name (dict, ss_data (short_name_ss));
1546 if (++*warning_cnt <= max_warnings)
1547 sys_warn (r, _("Variable map refers to unknown variable %s."),
1548 ss_data (short_name_ss));
1552 ss_data (value_ss)[ss_length (value_ss)] = '\0';
1553 *value = ss_data (value_ss);
1561 /* Displays a corruption message. */
1563 sys_msg (struct sfm_reader *r, int class, const char *format, va_list args)
1568 ds_init_empty (&text);
1569 ds_put_format (&text, "\"%s\" near offset 0x%lx: ",
1570 fh_get_file_name (r->fh), (unsigned long) ftell (r->file));
1571 ds_put_vformat (&text, format, args);
1573 m.category = msg_class_to_category (class);
1574 m.severity = msg_class_to_severity (class);
1575 m.where.file_name = NULL;
1576 m.where.line_number = 0;
1577 m.text = ds_cstr (&text);
1582 /* Displays a warning for the current file position. */
1584 sys_warn (struct sfm_reader *r, const char *format, ...)
1588 va_start (args, format);
1589 sys_msg (r, MW, format, args);
1593 /* Displays an error for the current file position,
1594 marks it as in an error state,
1595 and aborts reading it using longjmp. */
1597 sys_error (struct sfm_reader *r, const char *format, ...)
1601 va_start (args, format);
1602 sys_msg (r, ME, format, args);
1606 longjmp (r->bail_out, 1);
1609 /* Reads BYTE_CNT bytes into BUF.
1610 Returns true if exactly BYTE_CNT bytes are successfully read.
1611 Aborts if an I/O error or a partial read occurs.
1612 If EOF_IS_OK, then an immediate end-of-file causes false to be
1613 returned; otherwise, immediate end-of-file causes an abort
1616 read_bytes_internal (struct sfm_reader *r, bool eof_is_ok,
1617 void *buf, size_t byte_cnt)
1619 size_t bytes_read = fread (buf, 1, byte_cnt, r->file);
1620 if (bytes_read == byte_cnt)
1622 else if (ferror (r->file))
1623 sys_error (r, _("System error: %s."), strerror (errno));
1624 else if (!eof_is_ok || bytes_read != 0)
1625 sys_error (r, _("Unexpected end of file."));
1630 /* Reads BYTE_CNT into BUF.
1631 Aborts upon I/O error or if end-of-file is encountered. */
1633 read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
1635 read_bytes_internal (r, false, buf, byte_cnt);
1638 /* Reads BYTE_CNT bytes into BUF.
1639 Returns true if exactly BYTE_CNT bytes are successfully read.
1640 Returns false if an immediate end-of-file is encountered.
1641 Aborts if an I/O error or a partial read occurs. */
1643 try_read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
1645 return read_bytes_internal (r, true, buf, byte_cnt);
1648 /* Reads a 32-bit signed integer from R and returns its value in
1651 read_int (struct sfm_reader *r)
1654 read_bytes (r, integer, sizeof integer);
1655 return integer_get (r->integer_format, integer, sizeof integer);
1658 /* Reads a 64-bit floating-point number from R and returns its
1659 value in host format. */
1661 read_float (struct sfm_reader *r)
1664 read_bytes (r, number, sizeof number);
1665 return float_get_double (r->float_format, number);
1668 /* Reads exactly SIZE - 1 bytes into BUFFER
1669 and stores a null byte into BUFFER[SIZE - 1]. */
1671 read_string (struct sfm_reader *r, char *buffer, size_t size)
1674 read_bytes (r, buffer, size - 1);
1675 buffer[size - 1] = '\0';
1678 /* Skips BYTES bytes forward in R. */
1680 skip_bytes (struct sfm_reader *r, size_t bytes)
1685 size_t chunk = MIN (sizeof buffer, bytes);
1686 read_bytes (r, buffer, chunk);
1691 static struct casereader_class sys_file_casereader_class =
1693 sys_file_casereader_read,
1694 sys_file_casereader_destroy,