1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2007 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <data/sys-file-reader.h>
20 #include <data/sys-file-private.h>
28 #include <libpspp/assertion.h>
29 #include <libpspp/message.h>
30 #include <libpspp/compiler.h>
31 #include <libpspp/misc.h>
32 #include <libpspp/pool.h>
33 #include <libpspp/str.h>
34 #include <libpspp/hash.h>
35 #include <libpspp/array.h>
37 #include <data/case.h>
38 #include <data/casereader-provider.h>
39 #include <data/casereader.h>
40 #include <data/dictionary.h>
41 #include <data/file-handle-def.h>
42 #include <data/file-name.h>
43 #include <data/format.h>
44 #include <data/missing-values.h>
45 #include <data/value-labels.h>
46 #include <data/variable.h>
47 #include <data/value.h>
52 #include "unlocked-io.h"
57 #define _(msgid) gettext (msgid)
58 #define N_(msgid) (msgid)
60 /* System file reader. */
63 /* Resource tracking. */
64 struct pool *pool; /* All system file state. */
65 jmp_buf bail_out; /* longjmp() target for error handling. */
68 struct file_handle *fh; /* File handle. */
69 struct fh_lock *lock; /* Mutual exclusion for file handle. */
70 FILE *file; /* File stream. */
71 bool error; /* I/O or corruption error? */
72 size_t value_cnt; /* Number of "union value"s in struct case. */
75 enum integer_format integer_format; /* On-disk integer format. */
76 enum float_format float_format; /* On-disk floating point format. */
77 int oct_cnt; /* Number of 8-byte units per case. */
78 struct sfm_var *sfm_vars; /* Variables. */
79 size_t sfm_var_cnt; /* Number of variables. */
80 casenumber case_cnt; /* Number of cases */
81 bool has_long_var_names; /* File has a long variable name map */
84 bool compressed; /* File is compressed? */
85 double bias; /* Compression bias, usually 100.0. */
86 uint8_t opcodes[8]; /* Current block of opcodes. */
87 size_t opcode_idx; /* Next opcode to interpret, 8 if none left. */
90 static struct casereader_class sys_file_casereader_class;
92 static bool close_reader (struct sfm_reader *);
94 static struct variable **make_var_by_value_idx (struct sfm_reader *,
96 static struct variable *lookup_var_by_value_idx (struct sfm_reader *,
100 static void sys_warn (struct sfm_reader *, const char *, ...)
101 PRINTF_FORMAT (2, 3);
103 static void sys_error (struct sfm_reader *, const char *, ...)
107 static void read_bytes (struct sfm_reader *, void *, size_t);
108 static bool try_read_bytes (struct sfm_reader *, void *, size_t);
109 static int read_int (struct sfm_reader *);
110 static double read_float (struct sfm_reader *);
111 static void read_string (struct sfm_reader *, char *, size_t);
112 static void skip_bytes (struct sfm_reader *, size_t);
114 static struct variable_to_value_map *open_variable_to_value_map (
115 struct sfm_reader *, size_t size);
116 static void close_variable_to_value_map (struct sfm_reader *r,
117 struct variable_to_value_map *);
118 static bool read_variable_to_value_map (struct sfm_reader *,
120 struct variable_to_value_map *,
121 struct variable **var, char **value,
124 static bool close_reader (struct sfm_reader *r);
126 /* Dictionary reader. */
134 static void read_header (struct sfm_reader *, struct dictionary *,
135 int *weight_idx, int *claimed_oct_cnt,
136 struct sfm_read_info *);
137 static void read_variable_record (struct sfm_reader *, struct dictionary *,
138 int *format_warning_cnt);
139 static void parse_format_spec (struct sfm_reader *, unsigned int,
140 enum which_format, struct variable *,
141 int *format_warning_cnt);
142 static void setup_weight (struct sfm_reader *, int weight_idx,
143 struct variable **var_by_value_idx,
144 struct dictionary *);
145 static void read_documents (struct sfm_reader *, struct dictionary *);
146 static void read_value_labels (struct sfm_reader *, struct dictionary *,
147 struct variable **var_by_value_idx);
149 static void read_extension_record (struct sfm_reader *, struct dictionary *,
150 struct sfm_read_info *);
151 static void read_machine_integer_info (struct sfm_reader *,
152 size_t size, size_t count,
153 struct sfm_read_info *);
154 static void read_machine_float_info (struct sfm_reader *,
155 size_t size, size_t count);
156 static void read_display_parameters (struct sfm_reader *,
157 size_t size, size_t count,
158 struct dictionary *);
159 static void read_long_var_name_map (struct sfm_reader *,
160 size_t size, size_t count,
161 struct dictionary *);
162 static void read_long_string_map (struct sfm_reader *,
163 size_t size, size_t count,
164 struct dictionary *);
167 /* Opens the system file designated by file handle FH for
168 reading. Reads the system file's dictionary into *DICT.
169 If INFO is non-null, then it receives additional info about the
172 sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
173 struct sfm_read_info *volatile info)
175 struct sfm_reader *volatile r = NULL;
176 struct variable **var_by_value_idx;
177 struct sfm_read_info local_info;
178 int format_warning_cnt = 0;
183 *dict = dict_create ();
185 /* Create and initialize reader. */
186 r = pool_create_container (struct sfm_reader, pool);
192 r->has_long_var_names = false;
193 r->opcode_idx = sizeof r->opcodes;
195 /* TRANSLATORS: this fragment will be interpolated into
196 messages in fh_lock() that identify types of files. */
197 r->lock = fh_lock (fh, FH_REF_FILE, N_("system file"), FH_ACC_READ, false);
201 r->file = fn_open (fh_get_file_name (fh), "rb");
204 msg (ME, _("Error opening \"%s\" for reading as a system file: %s."),
205 fh_get_file_name (r->fh), strerror (errno));
209 /* Initialize info. */
212 memset (info, 0, sizeof *info);
214 if (setjmp (r->bail_out))
219 read_header (r, *dict, &weight_idx, &claimed_oct_cnt, info);
221 /* Read all the variable definition records. */
222 rec_type = read_int (r);
223 while (rec_type == 2)
225 read_variable_record (r, *dict, &format_warning_cnt);
226 rec_type = read_int (r);
229 /* Figure out the case format. */
230 var_by_value_idx = make_var_by_value_idx (r, *dict);
231 setup_weight (r, weight_idx, var_by_value_idx, *dict);
233 /* Read all the rest of the dictionary records. */
234 while (rec_type != 999)
239 read_value_labels (r, *dict, var_by_value_idx);
243 sys_error (r, _("Misplaced type 4 record."));
246 read_documents (r, *dict);
250 read_extension_record (r, *dict, info);
254 sys_error (r, _("Unrecognized record type %d."), rec_type);
256 rec_type = read_int (r);
260 if ( ! r->has_long_var_names )
263 for (i = 0; i < dict_get_var_cnt (*dict); i++)
265 struct variable *var = dict_get_var (*dict, i);
266 char short_name [SHORT_NAME_LEN + 1];
267 char long_name [SHORT_NAME_LEN + 1];
269 strcpy (short_name, var_get_name (var));
271 strcpy (long_name, short_name);
272 str_lowercase (long_name);
274 /* Set long name. Renaming a variable may clear the short
275 name, but we want to retain it, so re-set it
277 dict_rename_var (*dict, var, long_name);
278 var_set_short_name (var, 0, short_name);
281 r->has_long_var_names = true;
284 /* Read record 999 data, which is just filler. */
287 /* Warn if the actual amount of data per case differs from the
288 amount that the header claims. SPSS version 13 gets this
289 wrong when very long strings are involved, so don't warn in
291 if (claimed_oct_cnt != -1 && claimed_oct_cnt != r->oct_cnt
292 && info->version_major != 13)
293 sys_warn (r, _("File header claims %d variable positions but "
294 "%d were read from file."),
295 claimed_oct_cnt, r->oct_cnt);
297 /* Create an index of dictionary variable widths for
298 sfm_read_case to use. We cannot use the `struct variable's
299 from the dictionary we created, because the caller owns the
300 dictionary and may destroy or modify its variables. */
301 sfm_dictionary_to_sfm_vars (*dict, &r->sfm_vars, &r->sfm_var_cnt);
302 pool_register (r->pool, free, r->sfm_vars);
304 pool_free (r->pool, var_by_value_idx);
305 r->value_cnt = dict_get_next_value_idx (*dict);
306 return casereader_create_sequential
308 r->case_cnt == -1 ? CASENUMBER_MAX: r->case_cnt,
309 &sys_file_casereader_class, r);
313 dict_destroy (*dict);
318 /* Closes a system file after we're done with it.
319 Returns true if an I/O error has occurred on READER, false
322 close_reader (struct sfm_reader *r)
331 if (fn_close (fh_get_file_name (r->fh), r->file) == EOF)
333 msg (ME, _("Error closing system file \"%s\": %s."),
334 fh_get_file_name (r->fh), strerror (errno));
344 pool_destroy (r->pool);
349 /* Destroys READER. */
351 sys_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
353 struct sfm_reader *r = r_;
357 /* Returns true if FILE is an SPSS system file,
360 sfm_detect (FILE *file)
364 if (fread (rec_type, 4, 1, file) != 1)
368 return !strcmp ("$FL2", rec_type);
371 /* Reads the global header of the system file.
372 Sets DICT's file label to the system file's label.
373 Sets *WEIGHT_IDX to 0 if the system file is unweighted,
374 or to the value index of the weight variable otherwise.
375 Sets *CLAIMED_OCT_CNT to the number of "octs" (8-byte units)
376 per case that the file claims to have (although it is not
378 Initializes INFO with header information. */
380 read_header (struct sfm_reader *r, struct dictionary *dict,
381 int *weight_idx, int *claimed_oct_cnt,
382 struct sfm_read_info *info)
385 char eye_catcher[61];
386 uint8_t raw_layout_code[4];
388 char creation_date[10];
389 char creation_time[9];
391 struct substring file_label_ss;
392 struct substring product;
394 read_string (r, rec_type, sizeof rec_type);
395 read_string (r, eye_catcher, sizeof eye_catcher);
397 if (strcmp ("$FL2", rec_type) != 0)
398 sys_error (r, _("This is not an SPSS system file."));
400 /* Identify integer format. */
401 read_bytes (r, raw_layout_code, sizeof raw_layout_code);
402 if ((!integer_identify (2, raw_layout_code, sizeof raw_layout_code,
404 && !integer_identify (3, raw_layout_code, sizeof raw_layout_code,
406 || (r->integer_format != INTEGER_MSB_FIRST
407 && r->integer_format != INTEGER_LSB_FIRST))
408 sys_error (r, _("This is not an SPSS system file."));
410 *claimed_oct_cnt = read_int (r);
411 if (*claimed_oct_cnt < 0 || *claimed_oct_cnt > INT_MAX / 16)
412 *claimed_oct_cnt = -1;
414 r->compressed = read_int (r) != 0;
416 *weight_idx = read_int (r);
418 r->case_cnt = read_int (r);
419 if ( r->case_cnt > INT_MAX / 2)
423 /* Identify floating-point format and obtain compression bias. */
424 read_bytes (r, raw_bias, sizeof raw_bias);
425 if (float_identify (100.0, raw_bias, sizeof raw_bias, &r->float_format) == 0)
427 sys_warn (r, _("Compression bias (%g) is not the usual "
428 "value of 100, or system file uses unrecognized "
429 "floating-point format."),
431 if (r->integer_format == INTEGER_MSB_FIRST)
432 r->float_format = FLOAT_IEEE_DOUBLE_BE;
434 r->float_format = FLOAT_IEEE_DOUBLE_LE;
436 float_convert (r->float_format, raw_bias, FLOAT_NATIVE_DOUBLE, &r->bias);
438 read_string (r, creation_date, sizeof creation_date);
439 read_string (r, creation_time, sizeof creation_time);
440 read_string (r, file_label, sizeof file_label);
443 file_label_ss = ss_cstr (file_label);
444 ss_trim (&file_label_ss, ss_cstr (" "));
445 if (!ss_is_empty (file_label_ss))
447 ss_data (file_label_ss)[ss_length (file_label_ss)] = '\0';
448 dict_set_label (dict, ss_data (file_label_ss));
451 strcpy (info->creation_date, creation_date);
452 strcpy (info->creation_time, creation_time);
453 info->integer_format = r->integer_format;
454 info->float_format = r->float_format;
455 info->compressed = r->compressed;
456 info->case_cnt = r->case_cnt;
458 product = ss_cstr (eye_catcher);
459 ss_match_string (&product, ss_cstr ("@(#) SPSS DATA FILE"));
460 ss_trim (&product, ss_cstr (" "));
461 str_copy_buf_trunc (info->product, sizeof info->product,
462 ss_data (product), ss_length (product));
465 /* Reads a variable (type 2) record from R and adds the
466 corresponding variable to DICT.
467 Also skips past additional variable records for long string
470 read_variable_record (struct sfm_reader *r, struct dictionary *dict,
471 int *format_warning_cnt)
474 int has_variable_label;
475 int missing_value_code;
480 struct variable *var;
483 width = read_int (r);
484 has_variable_label = read_int (r);
485 missing_value_code = read_int (r);
486 print_format = read_int (r);
487 write_format = read_int (r);
488 read_string (r, name, sizeof name);
489 name[strcspn (name, " ")] = '\0';
491 /* Check variable name. */
492 if (name[0] == '$' || name[0] == '#')
493 sys_error (r, "Variable name begins with invalid character `%c'.",
495 if (!var_is_plausible_name (name, false))
496 sys_error (r, _("Invalid variable name `%s'."), name);
498 /* Create variable. */
499 if (width < 0 || width > 255)
500 sys_error (r, _("Bad variable width %d."), width);
501 var = dict_create_var (dict, name, width);
504 _("Duplicate variable name `%s' within system file."),
507 /* Set the short name the same as the long name. */
508 var_set_short_name (var, 0, var_get_name (var));
510 /* Get variable label, if any. */
511 if (has_variable_label != 0 && has_variable_label != 1)
512 sys_error (r, _("Variable label indicator field is not 0 or 1."));
513 if (has_variable_label == 1)
519 if (len >= sizeof label)
520 sys_error (r, _("Variable %s has label of invalid length %zu."),
522 read_string (r, label, len + 1);
523 var_set_label (var, label);
525 skip_bytes (r, ROUND_UP (len, 4) - len);
528 /* Set missing values. */
529 if (missing_value_code != 0)
531 struct missing_values mv;
534 mv_init (&mv, var_get_width (var));
535 if (var_is_numeric (var))
537 if (missing_value_code < -3 || missing_value_code > 3
538 || missing_value_code == -1)
539 sys_error (r, _("Numeric missing value indicator field is not "
540 "-3, -2, 0, 1, 2, or 3."));
541 if (missing_value_code < 0)
543 double low = read_float (r);
544 double high = read_float (r);
545 mv_add_num_range (&mv, low, high);
546 missing_value_code = -missing_value_code - 2;
548 for (i = 0; i < missing_value_code; i++)
549 mv_add_num (&mv, read_float (r));
551 else if (var_get_width (var) <= MAX_SHORT_STRING)
553 if (missing_value_code < 1 || missing_value_code > 3)
554 sys_error (r, _("String missing value indicator field is not "
556 for (i = 0; i < missing_value_code; i++)
559 read_string (r, string, sizeof string);
560 mv_add_str (&mv, string);
564 sys_error (r, _("Long string variable %s may not have missing "
566 var_set_missing_values (var, &mv);
570 parse_format_spec (r, print_format, PRINT_FORMAT, var, format_warning_cnt);
571 parse_format_spec (r, write_format, WRITE_FORMAT, var, format_warning_cnt);
573 /* Account for values.
574 Skip long string continuation records, if any. */
575 nv = width == 0 ? 1 : DIV_RND_UP (width, 8);
581 for (i = 1; i < nv; i++)
583 /* Check for record type 2 and width -1. */
584 if (read_int (r) != 2 || read_int (r) != -1)
585 sys_error (r, _("Missing string continuation record."));
587 /* Skip and ignore remaining continuation data. */
588 has_variable_label = read_int (r);
589 missing_value_code = read_int (r);
590 print_format = read_int (r);
591 write_format = read_int (r);
592 read_string (r, name, sizeof name);
594 /* Variable label fields on continuation records have
595 been spotted in system files created by "SPSS Power
596 Macintosh Release 6.1". */
597 if (has_variable_label)
598 skip_bytes (r, ROUND_UP (read_int (r), 4));
603 /* Translates the format spec from sysfile format to internal
606 parse_format_spec (struct sfm_reader *r, unsigned int s,
607 enum which_format which, struct variable *v,
608 int *format_warning_cnt)
610 const int max_format_warnings = 8;
612 uint8_t raw_type = s >> 16;
618 if (!fmt_from_io (raw_type, &f.type))
619 sys_error (r, _("Unknown variable format %"PRIu8"."), raw_type);
624 ok = fmt_check_output (&f) && fmt_check_width_compat (&f, var_get_width (v));
629 if (which == PRINT_FORMAT)
630 var_set_print_format (v, &f);
632 var_set_write_format (v, &f);
634 else if (*++format_warning_cnt <= max_format_warnings)
636 char fmt_string[FMT_STRING_LEN_MAX + 1];
637 sys_warn (r, _("%s variable %s has invalid %s format %s."),
638 var_is_numeric (v) ? _("Numeric") : _("String"),
640 which == PRINT_FORMAT ? _("print") : _("write"),
641 fmt_to_string (&f, fmt_string));
643 if (*format_warning_cnt == max_format_warnings)
644 sys_warn (r, _("Suppressing further invalid format warnings."));
648 /* Sets the weighting variable in DICT to the variable
649 corresponding to the given 1-based VALUE_IDX, if VALUE_IDX is
652 setup_weight (struct sfm_reader *r, int weight_idx,
653 struct variable **var_by_value_idx, struct dictionary *dict)
657 struct variable *weight_var
658 = lookup_var_by_value_idx (r, var_by_value_idx, weight_idx);
659 if (var_is_numeric (weight_var))
660 dict_set_weight (dict, weight_var);
662 sys_error (r, _("Weighting variable must be numeric."));
666 /* Reads a document record, type 6, from system file R, and sets up
667 the documents and n_documents fields in the associated
670 read_documents (struct sfm_reader *r, struct dictionary *dict)
675 if (dict_get_documents (dict) != NULL)
676 sys_error (r, _("Multiple type 6 (document) records."));
678 line_cnt = read_int (r);
680 sys_error (r, _("Number of document lines (%d) "
681 "must be greater than 0."), line_cnt);
683 documents = pool_nmalloc (r->pool, line_cnt + 1, DOC_LINE_LENGTH);
684 read_string (r, documents, DOC_LINE_LENGTH * line_cnt + 1);
685 if (strlen (documents) == DOC_LINE_LENGTH * line_cnt)
686 dict_set_documents (dict, documents);
688 sys_error (r, _("Document line contains null byte."));
689 pool_free (r->pool, documents);
692 /* Read a type 7 extension record. */
694 read_extension_record (struct sfm_reader *r, struct dictionary *dict,
695 struct sfm_read_info *info)
697 int subtype = read_int (r);
698 size_t size = read_int (r);
699 size_t count = read_int (r);
700 size_t bytes = size * count;
702 /* Check that SIZE * COUNT + 1 doesn't overflow. Adding 1
703 allows an extra byte for a null terminator, used by some
704 extension processing routines. */
705 if (size != 0 && size_overflow_p (xsum (1, xtimes (count, size))))
706 sys_error (r, "Record type 7 subtype %d too large.", subtype);
711 read_machine_integer_info (r, size, count, info);
715 read_machine_float_info (r, size, count);
719 /* Variable sets information. We don't use these yet.
720 They only apply to GUIs; see VARSETS on the APPLY
721 DICTIONARY command in SPSS documentation. */
725 /* DATE variable information. We don't use it yet, but we
730 /* Unknown purpose. */
734 read_display_parameters (r, size, count, dict);
738 read_long_var_name_map (r, size, count, dict);
742 read_long_string_map (r, size, count, dict);
746 /* New in SPSS v14? Unknown purpose. */
750 /* Text field that defines variable attributes. New in
755 sys_warn (r, _("Unrecognized record type 7, subtype %d."), subtype);
759 skip_bytes (r, bytes);
762 /* Read record type 7, subtype 3. */
764 read_machine_integer_info (struct sfm_reader *r, size_t size, size_t count,
765 struct sfm_read_info *info)
767 int version_major = read_int (r);
768 int version_minor = read_int (r);
769 int version_revision = read_int (r);
770 int machine_code UNUSED = read_int (r);
771 int float_representation = read_int (r);
772 int compression_code UNUSED = read_int (r);
773 int integer_representation = read_int (r);
774 int character_code UNUSED = read_int (r);
776 int expected_float_format;
777 int expected_integer_format;
779 if (size != 4 || count != 8)
780 sys_error (r, _("Bad size (%zu) or count (%zu) field on record type 7, "
784 /* Save version info. */
785 info->version_major = version_major;
786 info->version_minor = version_minor;
787 info->version_revision = version_revision;
789 /* Check floating point format. */
790 if (r->float_format == FLOAT_IEEE_DOUBLE_BE
791 || r->float_format == FLOAT_IEEE_DOUBLE_LE)
792 expected_float_format = 1;
793 else if (r->float_format == FLOAT_Z_LONG)
794 expected_float_format = 2;
795 else if (r->float_format == FLOAT_VAX_G || r->float_format == FLOAT_VAX_D)
796 expected_float_format = 3;
799 if (float_representation != expected_float_format)
800 sys_error (r, _("Floating-point representation indicated by "
801 "system file (%d) differs from expected (%d)."),
802 r->float_format, expected_float_format);
804 /* Check integer format. */
805 if (r->integer_format == INTEGER_MSB_FIRST)
806 expected_integer_format = 1;
807 else if (r->integer_format == INTEGER_LSB_FIRST)
808 expected_integer_format = 2;
811 if (integer_representation != expected_integer_format)
813 static const char *endian[] = {N_("little-endian"), N_("big-endian")};
814 sys_warn (r, _("Integer format indicated by system file (%s) "
815 "differs from expected (%s)."),
816 gettext (endian[integer_representation == 1]),
817 gettext (endian[expected_integer_format == 1]));
821 /* Read record type 7, subtype 4. */
823 read_machine_float_info (struct sfm_reader *r, size_t size, size_t count)
825 double sysmis = read_float (r);
826 double highest = read_float (r);
827 double lowest = read_float (r);
829 if (size != 8 || count != 3)
830 sys_error (r, _("Bad size (%zu) or count (%zu) on extension 4."),
833 if (sysmis != SYSMIS)
834 sys_warn (r, _("File specifies unexpected value %g as SYSMIS."), sysmis);
835 if (highest != HIGHEST)
836 sys_warn (r, _("File specifies unexpected value %g as HIGHEST."), highest);
837 if (lowest != LOWEST)
838 sys_warn (r, _("File specifies unexpected value %g as LOWEST."), lowest);
841 /* Read record type 7, subtype 11, which specifies how variables
842 should be displayed in GUI environments. */
844 read_display_parameters (struct sfm_reader *r, size_t size, size_t count,
845 struct dictionary *dict)
847 const size_t n_vars = count / 3 ;
851 if (count % 3 || n_vars != dict_get_var_cnt (dict))
852 sys_error (r, _("Bad size (%zu) or count (%zu) on extension 11."),
855 for (i = 0; i < n_vars; ++i)
857 struct variable *v = dict_get_var (dict, i);
858 int measure = read_int (r);
859 int width = read_int (r);
860 int align = read_int (r);
862 /* SPSS 14 sometimes seems to set string variables' measure
864 if (0 == measure && var_is_alpha (v))
867 /* Older versions (SPSS 9.0) sometimes set the display width
868 to zero. This causes confusion especially in the GUI */
872 if (measure < 1 || measure > 3 || align < 0 || align > 2)
875 sys_warn (r, _("Invalid variable display parameters. "
876 "Default parameters substituted."));
881 var_set_measure (v, (measure == 1 ? MEASURE_NOMINAL
882 : measure == 2 ? MEASURE_ORDINAL
884 var_set_display_width (v, width);
885 var_set_alignment (v, (align == 0 ? ALIGN_LEFT
886 : align == 1 ? ALIGN_RIGHT
891 /* Reads record type 7, subtype 13, which gives the long name
892 that corresponds to each short name. Modifies variable names
893 in DICT accordingly. */
895 read_long_var_name_map (struct sfm_reader *r, size_t size, size_t count,
896 struct dictionary *dict)
898 struct variable_to_value_map *map;
899 struct variable *var;
903 map = open_variable_to_value_map (r, size * count);
904 while (read_variable_to_value_map (r, dict, map, &var, &long_name,
908 size_t short_name_cnt;
911 /* Validate long name. */
912 if (!var_is_valid_name (long_name, false))
914 sys_warn (r, _("Long variable mapping from %s to invalid "
915 "variable name `%s'."),
916 var_get_name (var), long_name);
920 /* Identify any duplicates. */
921 if (strcasecmp (var_get_short_name (var, 0), long_name)
922 && dict_lookup_var (dict, long_name) != NULL)
924 sys_warn (r, _("Duplicate long variable name `%s' "
925 "within system file."), long_name);
929 /* Renaming a variable may clear its short names, but we
930 want to retain them, so we save them and re-set them
932 short_name_cnt = var_get_short_name_cnt (var);
933 short_names = xnmalloc (short_name_cnt, sizeof *short_names);
934 for (i = 0; i < short_name_cnt; i++)
936 const char *s = var_get_short_name (var, i);
937 short_names[i] = s != NULL ? xstrdup (s) : NULL;
941 dict_rename_var (dict, var, long_name);
943 /* Restore short names. */
944 for (i = 0; i < short_name_cnt; i++)
946 var_set_short_name (var, i, short_names[i]);
947 free (short_names[i]);
951 close_variable_to_value_map (r, map);
952 r->has_long_var_names = true;
955 /* Reads record type 7, subtype 14, which gives the real length
956 of each very long string. Rearranges DICT accordingly. */
958 read_long_string_map (struct sfm_reader *r, size_t size, size_t count,
959 struct dictionary *dict)
961 struct variable_to_value_map *map;
962 struct variable *var;
966 map = open_variable_to_value_map (r, size * count);
967 while (read_variable_to_value_map (r, dict, map, &var, &length_s,
970 size_t idx = var_get_dict_index (var);
976 length = strtol (length_s, NULL, 10);
977 if (length < 1 || length > MAX_STRING)
979 sys_warn (r, _("%s listed as string of invalid length %s "
980 "in very length string record."),
981 var_get_name (var), length_s);
985 /* Check segments. */
986 segment_cnt = sfm_width_to_segments (length);
987 if (segment_cnt == 1)
989 sys_warn (r, _("%s listed in very long string record with width %s, "
990 "which requires only one segment."),
991 var_get_name (var), length_s);
994 if (idx + segment_cnt > dict_get_var_cnt (dict))
995 sys_error (r, _("Very long string %s overflows dictionary."),
998 /* Get the short names from the segments and check their
1000 for (i = 0; i < segment_cnt; i++)
1002 struct variable *seg = dict_get_var (dict, idx + i);
1003 int alloc_width = sfm_segment_alloc_width (length, i);
1004 int width = var_get_width (seg);
1007 var_set_short_name (var, i, var_get_short_name (seg, 0));
1008 if (ROUND_UP (width, 8) != ROUND_UP (alloc_width, 8))
1009 sys_error (r, _("Very long string with width %ld has segment %d "
1010 "of width %d (expected %d)"),
1011 length, i, width, alloc_width);
1013 dict_delete_consecutive_vars (dict, idx + 1, segment_cnt - 1);
1014 var_set_width (var, length);
1016 close_variable_to_value_map (r, map);
1017 dict_compact_values (dict);
1020 /* Reads value labels from sysfile H and inserts them into the
1021 associated dictionary. */
1023 read_value_labels (struct sfm_reader *r,
1024 struct dictionary *dict, struct variable **var_by_value_idx)
1026 struct pool *subpool;
1030 char raw_value[8]; /* Value as uninterpreted bytes. */
1031 union value value; /* Value. */
1032 char *label; /* Null-terminated label string. */
1035 struct label *labels = NULL;
1036 int label_cnt; /* Number of labels. */
1038 struct variable **var = NULL; /* Associated variables. */
1039 int var_cnt; /* Number of associated variables. */
1043 subpool = pool_create_subpool (r->pool);
1045 /* Read the type 3 record and record its contents. We can't do
1046 much with the data yet because we don't know whether it is
1047 of numeric or string type. */
1049 /* Read number of labels. */
1050 label_cnt = read_int (r);
1052 if (size_overflow_p (xtimes (label_cnt, sizeof *labels)))
1054 sys_warn (r, _("Invalid number of labels: %d. Ignoring labels."),
1059 /* Read each value/label tuple into labels[]. */
1060 labels = pool_nalloc (subpool, label_cnt, sizeof *labels);
1061 for (i = 0; i < label_cnt; i++)
1063 struct label *label = labels + i;
1064 unsigned char label_len;
1068 read_bytes (r, label->raw_value, sizeof label->raw_value);
1070 /* Read label length. */
1071 read_bytes (r, &label_len, sizeof label_len);
1072 padded_len = ROUND_UP (label_len + 1, 8);
1074 /* Read label, padding. */
1075 label->label = pool_alloc (subpool, padded_len + 1);
1076 read_bytes (r, label->label, padded_len - 1);
1077 label->label[label_len] = 0;
1080 /* Now, read the type 4 record that has the list of variables
1081 to which the value labels are to be applied. */
1083 /* Read record type of type 4 record. */
1084 if (read_int (r) != 4)
1085 sys_error (r, _("Variable index record (type 4) does not immediately "
1086 "follow value label record (type 3) as it should."));
1088 /* Read number of variables associated with value label from type 4
1090 var_cnt = read_int (r);
1091 if (var_cnt < 1 || var_cnt > dict_get_var_cnt (dict))
1092 sys_error (r, _("Number of variables associated with a value label (%d) "
1093 "is not between 1 and the number of variables (%zu)."),
1094 var_cnt, dict_get_var_cnt (dict));
1096 /* Read the list of variables. */
1097 var = pool_nalloc (subpool, var_cnt, sizeof *var);
1098 for (i = 0; i < var_cnt; i++)
1100 var[i] = lookup_var_by_value_idx (r, var_by_value_idx, read_int (r));
1101 if (var_is_long_string (var[i]))
1102 sys_error (r, _("Value labels are not allowed on long string "
1103 "variables (%s)."), var_get_name (var[i]));
1106 /* Type check the variables. */
1107 for (i = 1; i < var_cnt; i++)
1108 if (var_get_type (var[i]) != var_get_type (var[0]))
1109 sys_error (r, _("Variables associated with value label are not all of "
1110 "identical type. Variable %s is %s, but variable "
1112 var_get_name (var[0]),
1113 var_is_numeric (var[0]) ? _("numeric") : _("string"),
1114 var_get_name (var[i]),
1115 var_is_numeric (var[i]) ? _("numeric") : _("string"));
1117 /* Fill in labels[].value, now that we know the desired type. */
1118 for (i = 0; i < label_cnt; i++)
1120 struct label *label = labels + i;
1122 if (var_is_alpha (var[0]))
1123 buf_copy_rpad (label->value.s, sizeof label->value.s,
1124 label->raw_value, sizeof label->raw_value);
1126 label->value.f = float_get_double (r->float_format, label->raw_value);
1129 /* Assign the `value_label's to each variable. */
1130 for (i = 0; i < var_cnt; i++)
1132 struct variable *v = var[i];
1135 /* Add each label to the variable. */
1136 for (j = 0; j < label_cnt; j++)
1138 struct label *label = &labels[j];
1139 if (!var_add_value_label (v, &label->value, label->label))
1141 if (var_is_numeric (var[0]))
1142 sys_warn (r, _("Duplicate value label for %g on %s."),
1143 label->value.f, var_get_name (v));
1145 sys_warn (r, _("Duplicate value label for \"%.*s\" on %s."),
1146 var_get_width (v), label->value.s,
1152 pool_destroy (subpool);
1157 static void partial_record (struct sfm_reader *r)
1160 static void read_error (struct casereader *, const struct sfm_reader *);
1162 static bool read_case_number (struct sfm_reader *, double *);
1163 static bool read_case_string (struct sfm_reader *, char *, size_t);
1164 static int read_opcode (struct sfm_reader *);
1165 static bool read_compressed_number (struct sfm_reader *, double *);
1166 static bool read_compressed_string (struct sfm_reader *, char *);
1167 static bool read_whole_strings (struct sfm_reader *, char *, size_t);
1168 static bool skip_whole_strings (struct sfm_reader *, size_t);
1170 /* Reads one case from READER's file into C. Returns true only
1173 sys_file_casereader_read (struct casereader *reader, void *r_,
1176 struct sfm_reader *r = r_;
1182 case_create (c, r->value_cnt);
1183 if (setjmp (r->bail_out))
1185 casereader_force_error (reader);
1190 for (i = 0; i < r->sfm_var_cnt; i++)
1192 struct sfm_var *sv = &r->sfm_vars[i];
1193 union value *v = case_data_rw_idx (c, sv->case_index);
1197 if (!read_case_number (r, &v->f))
1202 if (!read_case_string (r, v->s + sv->offset, sv->width))
1204 if (!skip_whole_strings (r, ROUND_DOWN (sv->padding, 8)))
1214 if (r->case_cnt != -1)
1215 read_error (reader, r);
1219 /* Issues an error that R ends in a partial record. */
1221 partial_record (struct sfm_reader *r)
1223 sys_error (r, _("File ends in partial case."));
1226 /* Issues an error that an unspecified error occurred SFM, and
1229 read_error (struct casereader *r, const struct sfm_reader *sfm)
1231 msg (ME, _("Error reading case from file %s."), fh_get_name (sfm->fh));
1232 casereader_force_error (r);
1235 /* Reads a number from R and stores its value in *D.
1236 If R is compressed, reads a compressed number;
1237 otherwise, reads a number in the regular way.
1238 Returns true if successful, false if end of file is
1239 reached immediately. */
1241 read_case_number (struct sfm_reader *r, double *d)
1246 if (!try_read_bytes (r, number, sizeof number))
1248 float_convert (r->float_format, number, FLOAT_NATIVE_DOUBLE, d);
1252 return read_compressed_number (r, d);
1255 /* Reads LENGTH string bytes from R into S.
1256 Always reads a multiple of 8 bytes; if LENGTH is not a
1257 multiple of 8, then extra bytes are read and discarded without
1259 Reads compressed strings if S is compressed.
1260 Returns true if successful, false if end of file is
1261 reached immediately. */
1263 read_case_string (struct sfm_reader *r, char *s, size_t length)
1265 size_t whole = ROUND_DOWN (length, 8);
1266 size_t partial = length % 8;
1270 if (!read_whole_strings (r, s, whole))
1277 if (!read_whole_strings (r, bounce, sizeof bounce))
1283 memcpy (s + whole, bounce, partial);
1289 /* Reads and returns the next compression opcode from R. */
1291 read_opcode (struct sfm_reader *r)
1293 assert (r->compressed);
1297 if (r->opcode_idx >= sizeof r->opcodes)
1299 if (!try_read_bytes (r, r->opcodes, sizeof r->opcodes))
1303 opcode = r->opcodes[r->opcode_idx++];
1310 /* Reads a compressed number from R and stores its value in D.
1311 Returns true if successful, false if end of file is
1312 reached immediately. */
1314 read_compressed_number (struct sfm_reader *r, double *d)
1316 int opcode = read_opcode (r);
1324 *d = read_float (r);
1328 sys_error (r, _("Compressed data is corrupt."));
1335 *d = opcode - r->bias;
1342 /* Reads a compressed 8-byte string segment from R and stores it
1344 Returns true if successful, false if end of file is
1345 reached immediately. */
1347 read_compressed_string (struct sfm_reader *r, char *dst)
1349 switch (read_opcode (r))
1356 read_bytes (r, dst, 8);
1360 memset (dst, ' ', 8);
1364 sys_error (r, _("Compressed data is corrupt."));
1370 /* Reads LENGTH string bytes from R into S.
1371 LENGTH must be a multiple of 8.
1372 Reads compressed strings if S is compressed.
1373 Returns true if successful, false if end of file is
1374 reached immediately. */
1376 read_whole_strings (struct sfm_reader *r, char *s, size_t length)
1378 assert (length % 8 == 0);
1380 return try_read_bytes (r, s, length);
1384 for (ofs = 0; ofs < length; ofs += 8)
1385 if (!read_compressed_string (r, s + ofs))
1395 /* Skips LENGTH string bytes from R.
1396 LENGTH must be a multiple of 8.
1397 (LENGTH is also limited to 1024, but that's only because the
1398 current caller never needs more than that many bytes.)
1399 Returns true if successful, false if end of file is
1400 reached immediately. */
1402 skip_whole_strings (struct sfm_reader *r, size_t length)
1405 assert (length < sizeof buffer);
1406 return read_whole_strings (r, buffer, length);
1409 /* Creates and returns a table that can be used for translating a value
1410 index into a case to a "struct variable *" for DICT. Multiple
1411 system file fields reference variables this way.
1413 This table must be created before processing the very long
1414 string extension record, because that record causes some
1415 values to be deleted from the case and the dictionary to be
1417 static struct variable **
1418 make_var_by_value_idx (struct sfm_reader *r, struct dictionary *dict)
1420 struct variable **var_by_value_idx;
1424 var_by_value_idx = pool_nmalloc (r->pool,
1425 r->oct_cnt, sizeof *var_by_value_idx);
1426 for (i = 0; i < dict_get_var_cnt (dict); i++)
1428 struct variable *v = dict_get_var (dict, i);
1429 int nv = var_is_numeric (v) ? 1 : DIV_RND_UP (var_get_width (v), 8);
1432 var_by_value_idx[value_idx++] = v;
1433 for (j = 1; j < nv; j++)
1434 var_by_value_idx[value_idx++] = NULL;
1436 assert (value_idx == r->oct_cnt);
1438 return var_by_value_idx;
1441 /* Returns the "struct variable" corresponding to the given
1442 1-basd VALUE_IDX in VAR_BY_VALUE_IDX. Verifies that the index
1444 static struct variable *
1445 lookup_var_by_value_idx (struct sfm_reader *r,
1446 struct variable **var_by_value_idx, int value_idx)
1448 struct variable *var;
1450 if (value_idx < 1 || value_idx > r->oct_cnt)
1451 sys_error (r, _("Variable index %d not in valid range 1...%d."),
1452 value_idx, r->oct_cnt);
1454 var = var_by_value_idx[value_idx - 1];
1456 sys_error (r, _("Variable index %d refers to long string "
1463 /* Returns the variable in D with the given SHORT_NAME,
1464 or a null pointer if there is none. */
1465 static struct variable *
1466 lookup_var_by_short_name (struct dictionary *d, const char *short_name)
1468 struct variable *var;
1472 /* First try looking up by full name. This often succeeds. */
1473 var = dict_lookup_var (d, short_name);
1474 if (var != NULL && !strcasecmp (var_get_short_name (var, 0), short_name))
1477 /* Iterate through the whole dictionary as a fallback. */
1478 var_cnt = dict_get_var_cnt (d);
1479 for (i = 0; i < var_cnt; i++)
1481 var = dict_get_var (d, i);
1482 if (!strcasecmp (var_get_short_name (var, 0), short_name))
1489 /* Helpers for reading records that contain "variable=value"
1493 struct variable_to_value_map
1495 struct substring buffer; /* Record contents. */
1496 size_t pos; /* Current position in buffer. */
1499 /* Reads SIZE bytes into a "variable=value" map for R,
1500 and returns the map. */
1501 static struct variable_to_value_map *
1502 open_variable_to_value_map (struct sfm_reader *r, size_t size)
1504 struct variable_to_value_map *map = pool_alloc (r->pool, sizeof *map);
1505 char *buffer = pool_malloc (r->pool, size + 1);
1506 read_bytes (r, buffer, size);
1507 map->buffer = ss_buffer (buffer, size);
1512 /* Closes MAP and frees its storage.
1513 Not really needed, because the pool will free the map anyway,
1514 but can be used to free it earlier. */
1516 close_variable_to_value_map (struct sfm_reader *r,
1517 struct variable_to_value_map *map)
1519 pool_free (r->pool, ss_data (map->buffer));
1522 /* Reads the next variable=value pair from MAP.
1523 Looks up the variable in DICT and stores it into *VAR.
1524 Stores a null-terminated value into *VALUE. */
1526 read_variable_to_value_map (struct sfm_reader *r, struct dictionary *dict,
1527 struct variable_to_value_map *map,
1528 struct variable **var, char **value,
1531 int max_warnings = 5;
1535 struct substring short_name_ss, value_ss;
1537 if (!ss_tokenize (map->buffer, ss_cstr ("="), &map->pos, &short_name_ss)
1538 || !ss_tokenize (map->buffer, ss_buffer ("\t\0", 2), &map->pos,
1541 if (*warning_cnt > max_warnings)
1542 sys_warn (r, _("Suppressed %d additional variable map warnings."),
1543 *warning_cnt - max_warnings);
1547 map->pos += ss_span (ss_substr (map->buffer, map->pos, SIZE_MAX),
1548 ss_buffer ("\t\0", 2));
1550 ss_data (short_name_ss)[ss_length (short_name_ss)] = '\0';
1551 *var = lookup_var_by_short_name (dict, ss_data (short_name_ss));
1554 if (++*warning_cnt <= max_warnings)
1555 sys_warn (r, _("Variable map refers to unknown variable %s."),
1556 ss_data (short_name_ss));
1560 ss_data (value_ss)[ss_length (value_ss)] = '\0';
1561 *value = ss_data (value_ss);
1569 /* Displays a corruption message. */
1571 sys_msg (struct sfm_reader *r, int class, const char *format, va_list args)
1576 ds_init_empty (&text);
1577 ds_put_format (&text, "\"%s\" near offset 0x%lx: ",
1578 fh_get_file_name (r->fh), (unsigned long) ftell (r->file));
1579 ds_put_vformat (&text, format, args);
1581 m.category = msg_class_to_category (class);
1582 m.severity = msg_class_to_severity (class);
1583 m.where.file_name = NULL;
1584 m.where.line_number = 0;
1585 m.text = ds_cstr (&text);
1590 /* Displays a warning for the current file position. */
1592 sys_warn (struct sfm_reader *r, const char *format, ...)
1596 va_start (args, format);
1597 sys_msg (r, MW, format, args);
1601 /* Displays an error for the current file position,
1602 marks it as in an error state,
1603 and aborts reading it using longjmp. */
1605 sys_error (struct sfm_reader *r, const char *format, ...)
1609 va_start (args, format);
1610 sys_msg (r, ME, format, args);
1614 longjmp (r->bail_out, 1);
1617 /* Reads BYTE_CNT bytes into BUF.
1618 Returns true if exactly BYTE_CNT bytes are successfully read.
1619 Aborts if an I/O error or a partial read occurs.
1620 If EOF_IS_OK, then an immediate end-of-file causes false to be
1621 returned; otherwise, immediate end-of-file causes an abort
1624 read_bytes_internal (struct sfm_reader *r, bool eof_is_ok,
1625 void *buf, size_t byte_cnt)
1627 size_t bytes_read = fread (buf, 1, byte_cnt, r->file);
1628 if (bytes_read == byte_cnt)
1630 else if (ferror (r->file))
1631 sys_error (r, _("System error: %s."), strerror (errno));
1632 else if (!eof_is_ok || bytes_read != 0)
1633 sys_error (r, _("Unexpected end of file."));
1638 /* Reads BYTE_CNT into BUF.
1639 Aborts upon I/O error or if end-of-file is encountered. */
1641 read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
1643 read_bytes_internal (r, false, buf, byte_cnt);
1646 /* Reads BYTE_CNT bytes into BUF.
1647 Returns true if exactly BYTE_CNT bytes are successfully read.
1648 Returns false if an immediate end-of-file is encountered.
1649 Aborts if an I/O error or a partial read occurs. */
1651 try_read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
1653 return read_bytes_internal (r, true, buf, byte_cnt);
1656 /* Reads a 32-bit signed integer from R and returns its value in
1659 read_int (struct sfm_reader *r)
1662 read_bytes (r, integer, sizeof integer);
1663 return integer_get (r->integer_format, integer, sizeof integer);
1666 /* Reads a 64-bit floating-point number from R and returns its
1667 value in host format. */
1669 read_float (struct sfm_reader *r)
1672 read_bytes (r, number, sizeof number);
1673 return float_get_double (r->float_format, number);
1676 /* Reads exactly SIZE - 1 bytes into BUFFER
1677 and stores a null byte into BUFFER[SIZE - 1]. */
1679 read_string (struct sfm_reader *r, char *buffer, size_t size)
1682 read_bytes (r, buffer, size - 1);
1683 buffer[size - 1] = '\0';
1686 /* Skips BYTES bytes forward in R. */
1688 skip_bytes (struct sfm_reader *r, size_t bytes)
1693 size_t chunk = MIN (sizeof buffer, bytes);
1694 read_bytes (r, buffer, chunk);
1699 static struct casereader_class sys_file_casereader_class =
1701 sys_file_casereader_read,
1702 sys_file_casereader_destroy,