1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2007, 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <data/sys-file-reader.h>
20 #include <data/sys-file-private.h>
28 #include <libpspp/i18n.h>
29 #include <libpspp/assertion.h>
30 #include <libpspp/message.h>
31 #include <libpspp/compiler.h>
32 #include <libpspp/misc.h>
33 #include <libpspp/pool.h>
34 #include <libpspp/str.h>
35 #include <libpspp/hash.h>
36 #include <libpspp/array.h>
38 #include <data/attributes.h>
39 #include <data/case.h>
40 #include <data/casereader-provider.h>
41 #include <data/casereader.h>
42 #include <data/dictionary.h>
43 #include <data/file-handle-def.h>
44 #include <data/file-name.h>
45 #include <data/format.h>
46 #include <data/missing-values.h>
47 #include <data/short-names.h>
48 #include <data/value-labels.h>
49 #include <data/variable.h>
50 #include <data/value.h>
55 #include "unlocked-io.h"
60 #define _(msgid) gettext (msgid)
61 #define N_(msgid) (msgid)
63 /* System file reader. */
66 /* Resource tracking. */
67 struct pool *pool; /* All system file state. */
68 jmp_buf bail_out; /* longjmp() target for error handling. */
71 struct file_handle *fh; /* File handle. */
72 struct fh_lock *lock; /* Mutual exclusion for file handle. */
73 FILE *file; /* File stream. */
74 bool error; /* I/O or corruption error? */
75 struct caseproto *proto; /* Format of output cases. */
78 enum integer_format integer_format; /* On-disk integer format. */
79 enum float_format float_format; /* On-disk floating point format. */
80 int oct_cnt; /* Number of 8-byte units per case. */
81 struct sfm_var *sfm_vars; /* Variables. */
82 size_t sfm_var_cnt; /* Number of variables. */
83 casenumber case_cnt; /* Number of cases */
84 bool has_long_var_names; /* File has a long variable name map */
87 bool compressed; /* File is compressed? */
88 double bias; /* Compression bias, usually 100.0. */
89 uint8_t opcodes[8]; /* Current block of opcodes. */
90 size_t opcode_idx; /* Next opcode to interpret, 8 if none left. */
91 bool corruption_warning; /* Warned about possible corruption? */
94 static const struct casereader_class sys_file_casereader_class;
96 static bool close_reader (struct sfm_reader *);
98 static struct variable **make_var_by_value_idx (struct sfm_reader *,
100 static struct variable *lookup_var_by_value_idx (struct sfm_reader *,
104 static void sys_msg (struct sfm_reader *r, int class,
105 const char *format, va_list args)
106 PRINTF_FORMAT (3, 0);
107 static void sys_warn (struct sfm_reader *, const char *, ...)
108 PRINTF_FORMAT (2, 3);
109 static void sys_error (struct sfm_reader *, const char *, ...)
113 static void read_bytes (struct sfm_reader *, void *, size_t);
114 static bool try_read_bytes (struct sfm_reader *, void *, size_t);
115 static int read_int (struct sfm_reader *);
116 static double read_float (struct sfm_reader *);
117 static void read_string (struct sfm_reader *, char *, size_t);
118 static void skip_bytes (struct sfm_reader *, size_t);
120 static struct text_record *open_text_record (struct sfm_reader *, size_t size);
121 static void close_text_record (struct sfm_reader *r,
122 struct text_record *);
123 static bool read_variable_to_value_pair (struct sfm_reader *,
125 struct text_record *,
126 struct variable **var, char **value);
127 static void text_warn (struct sfm_reader *r, struct text_record *text,
128 const char *format, ...)
129 PRINTF_FORMAT (3, 4);
130 static char *text_get_token (struct text_record *,
131 struct substring delimiters);
132 static bool text_match (struct text_record *, char c);
133 static bool text_read_short_name (struct sfm_reader *, struct dictionary *,
134 struct text_record *,
135 struct substring delimiters,
138 static bool close_reader (struct sfm_reader *r);
140 /* Dictionary reader. */
148 static void read_header (struct sfm_reader *, struct dictionary *,
149 int *weight_idx, int *claimed_oct_cnt,
150 struct sfm_read_info *);
151 static void read_variable_record (struct sfm_reader *, struct dictionary *,
152 int *format_warning_cnt);
153 static void parse_format_spec (struct sfm_reader *, unsigned int,
154 enum which_format, struct variable *,
155 int *format_warning_cnt);
156 static void setup_weight (struct sfm_reader *, int weight_idx,
157 struct variable **var_by_value_idx,
158 struct dictionary *);
159 static void read_documents (struct sfm_reader *, struct dictionary *);
160 static void read_value_labels (struct sfm_reader *, struct dictionary *,
161 struct variable **var_by_value_idx);
163 static void read_extension_record (struct sfm_reader *, struct dictionary *,
164 struct sfm_read_info *);
165 static void read_machine_integer_info (struct sfm_reader *,
166 size_t size, size_t count,
167 struct sfm_read_info *,
170 static void read_machine_float_info (struct sfm_reader *,
171 size_t size, size_t count);
172 static void read_display_parameters (struct sfm_reader *,
173 size_t size, size_t count,
174 struct dictionary *);
175 static void read_long_var_name_map (struct sfm_reader *,
176 size_t size, size_t count,
177 struct dictionary *);
178 static void read_long_string_map (struct sfm_reader *,
179 size_t size, size_t count,
180 struct dictionary *);
181 static void read_data_file_attributes (struct sfm_reader *,
182 size_t size, size_t count,
183 struct dictionary *);
184 static void read_variable_attributes (struct sfm_reader *,
185 size_t size, size_t count,
186 struct dictionary *);
187 static void read_long_string_value_labels (struct sfm_reader *,
188 size_t size, size_t count,
189 struct dictionary *);
191 /* Convert all the strings in DICT from the dict encoding to UTF8 */
193 recode_strings (struct dictionary *dict)
197 const char *enc = dict_get_encoding (dict);
200 enc = get_default_encoding ();
202 for (i = 0 ; i < dict_get_var_cnt (dict); ++i)
204 /* Convert the long variable name */
205 struct variable *var = dict_get_var (dict, i);
206 const char *native_name = var_get_name (var);
207 char *utf8_name = recode_string (UTF8, enc, native_name, -1);
208 if ( 0 != strcmp (utf8_name, native_name))
210 if ( NULL == dict_lookup_var (dict, utf8_name))
211 dict_rename_var (dict, var, utf8_name);
214 _("Recoded variable name duplicates an existing `%s' within system file."), utf8_name);
219 /* Convert the variable label */
220 if (var_has_label (var))
222 char *utf8_label = recode_string (UTF8, enc, var_get_label (var), -1);
223 var_set_label (var, utf8_label);
227 if (var_has_value_labels (var))
229 const struct val_lab *vl = NULL;
230 const struct val_labs *vlabs = var_get_value_labels (var);
232 for (vl = val_labs_first (vlabs); vl != NULL; vl = val_labs_next (vlabs, vl))
234 const union value *val = val_lab_get_value (vl);
235 const char *label = val_lab_get_label (vl);
236 char *new_label = NULL;
238 new_label = recode_string (UTF8, enc, label, -1);
240 var_replace_value_label (var, val, new_label);
247 /* Opens the system file designated by file handle FH for
248 reading. Reads the system file's dictionary into *DICT.
249 If INFO is non-null, then it receives additional info about the
252 sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
253 struct sfm_read_info *volatile info)
255 struct sfm_reader *volatile r = NULL;
256 struct variable **var_by_value_idx;
257 struct sfm_read_info local_info;
258 int format_warning_cnt = 0;
263 *dict = dict_create ();
265 /* Create and initialize reader. */
266 r = pool_create_container (struct sfm_reader, pool);
272 r->has_long_var_names = false;
273 r->opcode_idx = sizeof r->opcodes;
274 r->corruption_warning = false;
276 /* TRANSLATORS: this fragment will be interpolated into
277 messages in fh_lock() that identify types of files. */
278 r->lock = fh_lock (fh, FH_REF_FILE, N_("system file"), FH_ACC_READ, false);
282 r->file = fn_open (fh_get_file_name (fh), "rb");
285 msg (ME, _("Error opening \"%s\" for reading as a system file: %s."),
286 fh_get_file_name (r->fh), strerror (errno));
290 /* Initialize info. */
293 memset (info, 0, sizeof *info);
295 if (setjmp (r->bail_out))
300 read_header (r, *dict, &weight_idx, &claimed_oct_cnt, info);
302 /* Read all the variable definition records. */
303 rec_type = read_int (r);
304 while (rec_type == 2)
306 read_variable_record (r, *dict, &format_warning_cnt);
307 rec_type = read_int (r);
310 /* Figure out the case format. */
311 var_by_value_idx = make_var_by_value_idx (r, *dict);
312 setup_weight (r, weight_idx, var_by_value_idx, *dict);
314 /* Read all the rest of the dictionary records. */
315 while (rec_type != 999)
320 read_value_labels (r, *dict, var_by_value_idx);
324 sys_error (r, _("Misplaced type 4 record."));
327 read_documents (r, *dict);
331 read_extension_record (r, *dict, info);
335 sys_error (r, _("Unrecognized record type %d."), rec_type);
337 rec_type = read_int (r);
341 if ( ! r->has_long_var_names )
344 for (i = 0; i < dict_get_var_cnt (*dict); i++)
346 struct variable *var = dict_get_var (*dict, i);
347 char short_name[SHORT_NAME_LEN + 1];
348 char long_name[SHORT_NAME_LEN + 1];
350 strcpy (short_name, var_get_name (var));
352 strcpy (long_name, short_name);
353 str_lowercase (long_name);
355 /* Set long name. Renaming a variable may clear the short
356 name, but we want to retain it, so re-set it
358 dict_rename_var (*dict, var, long_name);
359 var_set_short_name (var, 0, short_name);
362 r->has_long_var_names = true;
365 recode_strings (*dict);
367 /* Read record 999 data, which is just filler. */
370 /* Warn if the actual amount of data per case differs from the
371 amount that the header claims. SPSS version 13 gets this
372 wrong when very long strings are involved, so don't warn in
374 if (claimed_oct_cnt != -1 && claimed_oct_cnt != r->oct_cnt
375 && info->version_major != 13)
376 sys_warn (r, _("File header claims %d variable positions but "
377 "%d were read from file."),
378 claimed_oct_cnt, r->oct_cnt);
380 /* Create an index of dictionary variable widths for
381 sfm_read_case to use. We cannot use the `struct variable's
382 from the dictionary we created, because the caller owns the
383 dictionary and may destroy or modify its variables. */
384 sfm_dictionary_to_sfm_vars (*dict, &r->sfm_vars, &r->sfm_var_cnt);
385 pool_register (r->pool, free, r->sfm_vars);
386 r->proto = caseproto_ref_pool (dict_get_proto (*dict), r->pool);
388 pool_free (r->pool, var_by_value_idx);
389 return casereader_create_sequential
391 r->case_cnt == -1 ? CASENUMBER_MAX: r->case_cnt,
392 &sys_file_casereader_class, r);
396 dict_destroy (*dict);
401 /* Closes a system file after we're done with it.
402 Returns true if an I/O error has occurred on READER, false
405 close_reader (struct sfm_reader *r)
414 if (fn_close (fh_get_file_name (r->fh), r->file) == EOF)
416 msg (ME, _("Error closing system file \"%s\": %s."),
417 fh_get_file_name (r->fh), strerror (errno));
427 pool_destroy (r->pool);
432 /* Destroys READER. */
434 sys_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
436 struct sfm_reader *r = r_;
440 /* Returns true if FILE is an SPSS system file,
443 sfm_detect (FILE *file)
447 if (fread (rec_type, 4, 1, file) != 1)
451 return !strcmp ("$FL2", rec_type);
454 /* Reads the global header of the system file.
455 Sets DICT's file label to the system file's label.
456 Sets *WEIGHT_IDX to 0 if the system file is unweighted,
457 or to the value index of the weight variable otherwise.
458 Sets *CLAIMED_OCT_CNT to the number of "octs" (8-byte units)
459 per case that the file claims to have (although it is not
461 Initializes INFO with header information. */
463 read_header (struct sfm_reader *r, struct dictionary *dict,
464 int *weight_idx, int *claimed_oct_cnt,
465 struct sfm_read_info *info)
468 char eye_catcher[61];
469 uint8_t raw_layout_code[4];
471 char creation_date[10];
472 char creation_time[9];
474 struct substring file_label_ss;
475 struct substring product;
477 read_string (r, rec_type, sizeof rec_type);
478 read_string (r, eye_catcher, sizeof eye_catcher);
480 if (strcmp ("$FL2", rec_type) != 0)
481 sys_error (r, _("This is not an SPSS system file."));
483 /* Identify integer format. */
484 read_bytes (r, raw_layout_code, sizeof raw_layout_code);
485 if ((!integer_identify (2, raw_layout_code, sizeof raw_layout_code,
487 && !integer_identify (3, raw_layout_code, sizeof raw_layout_code,
489 || (r->integer_format != INTEGER_MSB_FIRST
490 && r->integer_format != INTEGER_LSB_FIRST))
491 sys_error (r, _("This is not an SPSS system file."));
493 *claimed_oct_cnt = read_int (r);
494 if (*claimed_oct_cnt < 0 || *claimed_oct_cnt > INT_MAX / 16)
495 *claimed_oct_cnt = -1;
497 r->compressed = read_int (r) != 0;
499 *weight_idx = read_int (r);
501 r->case_cnt = read_int (r);
502 if ( r->case_cnt > INT_MAX / 2)
506 /* Identify floating-point format and obtain compression bias. */
507 read_bytes (r, raw_bias, sizeof raw_bias);
508 if (float_identify (100.0, raw_bias, sizeof raw_bias, &r->float_format) == 0)
510 uint8_t zero_bias[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
512 if (memcmp (raw_bias, zero_bias, 8))
513 sys_warn (r, _("Compression bias is not the usual "
514 "value of 100, or system file uses unrecognized "
515 "floating-point format."));
518 /* Some software is known to write all-zeros to this
519 field. Such software also writes floating-point
520 numbers in the format that we expect by default
521 (it seems that all software most likely does, in
522 reality), so don't warn in this case. */
525 if (r->integer_format == INTEGER_MSB_FIRST)
526 r->float_format = FLOAT_IEEE_DOUBLE_BE;
528 r->float_format = FLOAT_IEEE_DOUBLE_LE;
530 float_convert (r->float_format, raw_bias, FLOAT_NATIVE_DOUBLE, &r->bias);
532 read_string (r, creation_date, sizeof creation_date);
533 read_string (r, creation_time, sizeof creation_time);
534 read_string (r, file_label, sizeof file_label);
537 file_label_ss = ss_cstr (file_label);
538 ss_trim (&file_label_ss, ss_cstr (" "));
539 if (!ss_is_empty (file_label_ss))
541 ss_data (file_label_ss)[ss_length (file_label_ss)] = '\0';
542 dict_set_label (dict, ss_data (file_label_ss));
545 strcpy (info->creation_date, creation_date);
546 strcpy (info->creation_time, creation_time);
547 info->integer_format = r->integer_format;
548 info->float_format = r->float_format;
549 info->compressed = r->compressed;
550 info->case_cnt = r->case_cnt;
552 product = ss_cstr (eye_catcher);
553 ss_match_string (&product, ss_cstr ("@(#) SPSS DATA FILE"));
554 ss_trim (&product, ss_cstr (" "));
555 str_copy_buf_trunc (info->product, sizeof info->product,
556 ss_data (product), ss_length (product));
559 /* Reads a variable (type 2) record from R and adds the
560 corresponding variable to DICT.
561 Also skips past additional variable records for long string
564 read_variable_record (struct sfm_reader *r, struct dictionary *dict,
565 int *format_warning_cnt)
568 int has_variable_label;
569 int missing_value_code;
574 struct variable *var;
577 width = read_int (r);
578 has_variable_label = read_int (r);
579 missing_value_code = read_int (r);
580 print_format = read_int (r);
581 write_format = read_int (r);
582 read_string (r, name, sizeof name);
583 name[strcspn (name, " ")] = '\0';
585 /* Check variable name. */
586 if (name[0] == '$' || name[0] == '#')
587 sys_error (r, "Variable name begins with invalid character `%c'.",
589 if (!var_is_plausible_name (name, false))
590 sys_error (r, _("Invalid variable name `%s'."), name);
592 /* Create variable. */
593 if (width < 0 || width > 255)
594 sys_error (r, _("Bad width %d for variable %s."), width, name);
595 var = dict_create_var (dict, name, width);
598 _("Duplicate variable name `%s' within system file."),
601 /* Set the short name the same as the long name. */
602 var_set_short_name (var, 0, var_get_name (var));
604 /* Get variable label, if any. */
605 if (has_variable_label != 0 && has_variable_label != 1)
606 sys_error (r, _("Variable label indicator field is not 0 or 1."));
607 if (has_variable_label == 1)
613 if (len >= sizeof label)
614 sys_error (r, _("Variable %s has label of invalid length %zu."),
616 read_string (r, label, len + 1);
617 var_set_label (var, label);
619 skip_bytes (r, ROUND_UP (len, 4) - len);
622 /* Set missing values. */
623 if (missing_value_code != 0)
625 struct missing_values mv;
628 mv_init_pool (r->pool, &mv, var_get_width (var));
629 if (var_is_numeric (var))
631 if (missing_value_code < -3 || missing_value_code > 3
632 || missing_value_code == -1)
633 sys_error (r, _("Numeric missing value indicator field is not "
634 "-3, -2, 0, 1, 2, or 3."));
635 if (missing_value_code < 0)
637 double low = read_float (r);
638 double high = read_float (r);
639 mv_add_range (&mv, low, high);
640 missing_value_code = -missing_value_code - 2;
642 for (i = 0; i < missing_value_code; i++)
643 mv_add_num (&mv, read_float (r));
647 int mv_width = MAX (width, 8);
650 if (missing_value_code < 1 || missing_value_code > 3)
651 sys_error (r, _("String missing value indicator field is not "
654 value_init (&value, mv_width);
655 value_set_missing (&value, mv_width);
656 for (i = 0; i < missing_value_code; i++)
658 uint8_t *s = value_str_rw (&value, mv_width);
659 read_bytes (r, s, 8);
662 value_destroy (&value, mv_width);
664 var_set_missing_values (var, &mv);
668 parse_format_spec (r, print_format, PRINT_FORMAT, var, format_warning_cnt);
669 parse_format_spec (r, write_format, WRITE_FORMAT, var, format_warning_cnt);
671 /* Account for values.
672 Skip long string continuation records, if any. */
673 nv = width == 0 ? 1 : DIV_RND_UP (width, 8);
679 for (i = 1; i < nv; i++)
681 /* Check for record type 2 and width -1. */
682 if (read_int (r) != 2 || read_int (r) != -1)
683 sys_error (r, _("Missing string continuation record."));
685 /* Skip and ignore remaining continuation data. */
686 has_variable_label = read_int (r);
687 missing_value_code = read_int (r);
688 print_format = read_int (r);
689 write_format = read_int (r);
690 read_string (r, name, sizeof name);
692 /* Variable label fields on continuation records have
693 been spotted in system files created by "SPSS Power
694 Macintosh Release 6.1". */
695 if (has_variable_label)
696 skip_bytes (r, ROUND_UP (read_int (r), 4));
701 /* Translates the format spec from sysfile format to internal
704 parse_format_spec (struct sfm_reader *r, unsigned int s,
705 enum which_format which, struct variable *v,
706 int *format_warning_cnt)
708 const int max_format_warnings = 8;
710 uint8_t raw_type = s >> 16;
716 if (!fmt_from_io (raw_type, &f.type))
717 sys_error (r, _("Unknown variable format %"PRIu8"."), raw_type);
722 ok = fmt_check_output (&f) && fmt_check_width_compat (&f, var_get_width (v));
727 if (which == PRINT_FORMAT)
728 var_set_print_format (v, &f);
730 var_set_write_format (v, &f);
732 else if (*++format_warning_cnt <= max_format_warnings)
734 char fmt_string[FMT_STRING_LEN_MAX + 1];
735 sys_warn (r, _("%s variable %s has invalid %s format %s."),
736 var_is_numeric (v) ? _("Numeric") : _("String"),
738 which == PRINT_FORMAT ? _("print") : _("write"),
739 fmt_to_string (&f, fmt_string));
741 if (*format_warning_cnt == max_format_warnings)
742 sys_warn (r, _("Suppressing further invalid format warnings."));
746 /* Sets the weighting variable in DICT to the variable
747 corresponding to the given 1-based VALUE_IDX, if VALUE_IDX is
750 setup_weight (struct sfm_reader *r, int weight_idx,
751 struct variable **var_by_value_idx, struct dictionary *dict)
755 struct variable *weight_var
756 = lookup_var_by_value_idx (r, var_by_value_idx, weight_idx);
757 if (var_is_numeric (weight_var))
758 dict_set_weight (dict, weight_var);
760 sys_error (r, _("Weighting variable must be numeric."));
764 /* Reads a document record, type 6, from system file R, and sets up
765 the documents and n_documents fields in the associated
768 read_documents (struct sfm_reader *r, struct dictionary *dict)
773 if (dict_get_documents (dict) != NULL)
774 sys_error (r, _("Multiple type 6 (document) records."));
776 line_cnt = read_int (r);
778 sys_error (r, _("Number of document lines (%d) "
779 "must be greater than 0."), line_cnt);
781 documents = pool_nmalloc (r->pool, line_cnt + 1, DOC_LINE_LENGTH);
782 read_string (r, documents, DOC_LINE_LENGTH * line_cnt + 1);
783 if (strlen (documents) == DOC_LINE_LENGTH * line_cnt)
784 dict_set_documents (dict, documents);
786 sys_error (r, _("Document line contains null byte."));
787 pool_free (r->pool, documents);
790 /* Read a type 7 extension record. */
792 read_extension_record (struct sfm_reader *r, struct dictionary *dict,
793 struct sfm_read_info *info)
795 int subtype = read_int (r);
796 size_t size = read_int (r);
797 size_t count = read_int (r);
798 size_t bytes = size * count;
800 /* Check that SIZE * COUNT + 1 doesn't overflow. Adding 1
801 allows an extra byte for a null terminator, used by some
802 extension processing routines. */
803 if (size != 0 && size_overflow_p (xsum (1, xtimes (count, size))))
804 sys_error (r, "Record type 7 subtype %d too large.", subtype);
809 read_machine_integer_info (r, size, count, info, dict);
813 read_machine_float_info (r, size, count);
817 /* Variable sets information. We don't use these yet.
818 They only apply to GUIs; see VARSETS on the APPLY
819 DICTIONARY command in SPSS documentation. */
823 /* DATE variable information. We don't use it yet, but we
828 /* Used by the MRSETS command. */
832 /* Used by the SPSS Data Entry software. */
836 read_display_parameters (r, size, count, dict);
840 read_long_var_name_map (r, size, count, dict);
844 read_long_string_map (r, size, count, dict);
848 /* New in SPSS v14? Unknown purpose. */
852 read_data_file_attributes (r, size, count, dict);
856 read_variable_attributes (r, size, count, dict);
860 /* New in SPSS 16. Contains a single string that describes
861 the character encoding, e.g. "windows-1252". */
863 char *encoding = pool_calloc (r->pool, size, count + 1);
864 read_string (r, encoding, count + 1);
865 dict_set_encoding (dict, encoding);
870 /* New in SPSS 16. Encodes value labels for long string
872 read_long_string_value_labels (r, size, count, dict);
876 sys_warn (r, _("Unrecognized record type 7, subtype %d. Please send a copy of this file, and the syntax which created it to %s"),
877 subtype, PACKAGE_BUGREPORT);
881 skip_bytes (r, bytes);
884 /* Read record type 7, subtype 3. */
886 read_machine_integer_info (struct sfm_reader *r, size_t size, size_t count,
887 struct sfm_read_info *info,
888 struct dictionary *dict)
890 int version_major = read_int (r);
891 int version_minor = read_int (r);
892 int version_revision = read_int (r);
893 int machine_code UNUSED = read_int (r);
894 int float_representation = read_int (r);
895 int compression_code UNUSED = read_int (r);
896 int integer_representation = read_int (r);
897 int character_code = read_int (r);
899 int expected_float_format;
900 int expected_integer_format;
902 if (size != 4 || count != 8)
903 sys_error (r, _("Bad size (%zu) or count (%zu) field on record type 7, "
907 /* Save version info. */
908 info->version_major = version_major;
909 info->version_minor = version_minor;
910 info->version_revision = version_revision;
912 /* Check floating point format. */
913 if (r->float_format == FLOAT_IEEE_DOUBLE_BE
914 || r->float_format == FLOAT_IEEE_DOUBLE_LE)
915 expected_float_format = 1;
916 else if (r->float_format == FLOAT_Z_LONG)
917 expected_float_format = 2;
918 else if (r->float_format == FLOAT_VAX_G || r->float_format == FLOAT_VAX_D)
919 expected_float_format = 3;
922 if (float_representation != expected_float_format)
923 sys_error (r, _("Floating-point representation indicated by "
924 "system file (%d) differs from expected (%d)."),
925 r->float_format, expected_float_format);
927 /* Check integer format. */
928 if (r->integer_format == INTEGER_MSB_FIRST)
929 expected_integer_format = 1;
930 else if (r->integer_format == INTEGER_LSB_FIRST)
931 expected_integer_format = 2;
934 if (integer_representation != expected_integer_format)
936 static const char *const endian[] = {N_("Little Endian"), N_("Big Endian")};
937 sys_warn (r, _("Integer format indicated by system file (%s) "
938 "differs from expected (%s)."),
939 gettext (endian[integer_representation == 1]),
940 gettext (endian[expected_integer_format == 1]));
945 Record 7 (20) provides a much more reliable way of
946 setting the encoding.
947 The character_code is used as a fallback only.
949 if ( NULL == dict_get_encoding (dict))
951 switch (character_code)
954 dict_set_encoding (dict, "EBCDIC-US");
958 /* These ostensibly mean "7-bit ASCII" and "8-bit ASCII"[sic]
959 respectively. However, there are known to be many files
960 in the wild with character code 2, yet have data which are
962 Therefore we ignore these values.
966 dict_set_encoding (dict, "MS_KANJI");
969 dict_set_encoding (dict, "UTF-7");
972 dict_set_encoding (dict, "UTF-8");
977 snprintf (enc, 100, "CP%d", character_code);
978 dict_set_encoding (dict, enc);
985 /* Read record type 7, subtype 4. */
987 read_machine_float_info (struct sfm_reader *r, size_t size, size_t count)
989 double sysmis = read_float (r);
990 double highest = read_float (r);
991 double lowest = read_float (r);
993 if (size != 8 || count != 3)
994 sys_error (r, _("Bad size (%zu) or count (%zu) on extension 4."),
997 if (sysmis != SYSMIS)
998 sys_warn (r, _("File specifies unexpected value %g as %s."),
1001 if (highest != HIGHEST)
1002 sys_warn (r, _("File specifies unexpected value %g as %s."),
1003 highest, "HIGHEST");
1005 if (lowest != LOWEST)
1006 sys_warn (r, _("File specifies unexpected value %g as %s."),
1010 /* Read record type 7, subtype 11, which specifies how variables
1011 should be displayed in GUI environments. */
1013 read_display_parameters (struct sfm_reader *r, size_t size, size_t count,
1014 struct dictionary *dict)
1017 bool includes_width;
1018 bool warned = false;
1023 sys_warn (r, _("Bad size %zu on extension 11."), size);
1024 skip_bytes (r, size * count);
1028 n_vars = dict_get_var_cnt (dict);
1029 if (count == 3 * n_vars)
1030 includes_width = true;
1031 else if (count == 2 * n_vars)
1032 includes_width = false;
1035 sys_warn (r, _("Extension 11 has bad count %zu (for %zu variables)."),
1037 skip_bytes (r, size * count);
1041 for (i = 0; i < n_vars; ++i)
1043 struct variable *v = dict_get_var (dict, i);
1044 int measure = read_int (r);
1045 int width = includes_width ? read_int (r) : 0;
1046 int align = read_int (r);
1048 /* SPSS 14 sometimes seems to set string variables' measure
1050 if (0 == measure && var_is_alpha (v))
1053 if (measure < 1 || measure > 3 || align < 0 || align > 2)
1056 sys_warn (r, _("Invalid variable display parameters "
1057 "for variable %zu (%s). "
1058 "Default parameters substituted."),
1059 i, var_get_name (v));
1064 var_set_measure (v, (measure == 1 ? MEASURE_NOMINAL
1065 : measure == 2 ? MEASURE_ORDINAL
1067 var_set_alignment (v, (align == 0 ? ALIGN_LEFT
1068 : align == 1 ? ALIGN_RIGHT
1071 /* Older versions (SPSS 9.0) sometimes set the display
1072 width to zero. This causes confusion in the GUI, so
1073 only set the width if it is nonzero. */
1075 var_set_display_width (v, width);
1079 /* Reads record type 7, subtype 13, which gives the long name
1080 that corresponds to each short name. Modifies variable names
1081 in DICT accordingly. */
1083 read_long_var_name_map (struct sfm_reader *r, size_t size, size_t count,
1084 struct dictionary *dict)
1086 struct text_record *text;
1087 struct variable *var;
1090 text = open_text_record (r, size * count);
1091 while (read_variable_to_value_pair (r, dict, text, &var, &long_name))
1094 size_t short_name_cnt;
1097 /* Validate long name. */
1098 if (!var_is_valid_name (long_name, false))
1100 sys_warn (r, _("Long variable mapping from %s to invalid "
1101 "variable name `%s'."),
1102 var_get_name (var), long_name);
1106 /* Identify any duplicates. */
1107 if (strcasecmp (var_get_short_name (var, 0), long_name)
1108 && dict_lookup_var (dict, long_name) != NULL)
1110 sys_warn (r, _("Duplicate long variable name `%s' "
1111 "within system file."), long_name);
1115 /* Renaming a variable may clear its short names, but we
1116 want to retain them, so we save them and re-set them
1118 short_name_cnt = var_get_short_name_cnt (var);
1119 short_names = xnmalloc (short_name_cnt, sizeof *short_names);
1120 for (i = 0; i < short_name_cnt; i++)
1122 const char *s = var_get_short_name (var, i);
1123 short_names[i] = s != NULL ? xstrdup (s) : NULL;
1126 /* Set long name. */
1127 dict_rename_var (dict, var, long_name);
1129 /* Restore short names. */
1130 for (i = 0; i < short_name_cnt; i++)
1132 var_set_short_name (var, i, short_names[i]);
1133 free (short_names[i]);
1137 close_text_record (r, text);
1138 r->has_long_var_names = true;
1141 /* Reads record type 7, subtype 14, which gives the real length
1142 of each very long string. Rearranges DICT accordingly. */
1144 read_long_string_map (struct sfm_reader *r, size_t size, size_t count,
1145 struct dictionary *dict)
1147 struct text_record *text;
1148 struct variable *var;
1151 text = open_text_record (r, size * count);
1152 while (read_variable_to_value_pair (r, dict, text, &var, &length_s))
1154 size_t idx = var_get_dict_index (var);
1160 length = strtol (length_s, NULL, 10);
1161 if (length < 1 || length > MAX_STRING)
1163 sys_warn (r, _("%s listed as string of invalid length %s "
1164 "in very length string record."),
1165 var_get_name (var), length_s);
1169 /* Check segments. */
1170 segment_cnt = sfm_width_to_segments (length);
1171 if (segment_cnt == 1)
1173 sys_warn (r, _("%s listed in very long string record with width %s, "
1174 "which requires only one segment."),
1175 var_get_name (var), length_s);
1178 if (idx + segment_cnt > dict_get_var_cnt (dict))
1179 sys_error (r, _("Very long string %s overflows dictionary."),
1180 var_get_name (var));
1182 /* Get the short names from the segments and check their
1184 for (i = 0; i < segment_cnt; i++)
1186 struct variable *seg = dict_get_var (dict, idx + i);
1187 int alloc_width = sfm_segment_alloc_width (length, i);
1188 int width = var_get_width (seg);
1191 var_set_short_name (var, i, var_get_short_name (seg, 0));
1192 if (ROUND_UP (width, 8) != ROUND_UP (alloc_width, 8))
1193 sys_error (r, _("Very long string with width %ld has segment %d "
1194 "of width %d (expected %d)"),
1195 length, i, width, alloc_width);
1197 dict_delete_consecutive_vars (dict, idx + 1, segment_cnt - 1);
1198 var_set_width (var, length);
1200 close_text_record (r, text);
1201 dict_compact_values (dict);
1204 /* Reads value labels from sysfile H and inserts them into the
1205 associated dictionary. */
1207 read_value_labels (struct sfm_reader *r,
1208 struct dictionary *dict, struct variable **var_by_value_idx)
1210 struct pool *subpool;
1214 uint8_t raw_value[8]; /* Value as uninterpreted bytes. */
1215 union value value; /* Value. */
1216 char *label; /* Null-terminated label string. */
1219 struct label *labels = NULL;
1220 int label_cnt; /* Number of labels. */
1222 struct variable **var = NULL; /* Associated variables. */
1223 int var_cnt; /* Number of associated variables. */
1224 int max_width; /* Maximum width of string variables. */
1228 subpool = pool_create_subpool (r->pool);
1230 /* Read the type 3 record and record its contents. We can't do
1231 much with the data yet because we don't know whether it is
1232 of numeric or string type. */
1234 /* Read number of labels. */
1235 label_cnt = read_int (r);
1237 if (size_overflow_p (xtimes (label_cnt, sizeof *labels)))
1239 sys_warn (r, _("Invalid number of labels: %d. Ignoring labels."),
1244 /* Read each value/label tuple into labels[]. */
1245 labels = pool_nalloc (subpool, label_cnt, sizeof *labels);
1246 for (i = 0; i < label_cnt; i++)
1248 struct label *label = labels + i;
1249 unsigned char label_len;
1253 read_bytes (r, label->raw_value, sizeof label->raw_value);
1255 /* Read label length. */
1256 read_bytes (r, &label_len, sizeof label_len);
1257 padded_len = ROUND_UP (label_len + 1, 8);
1259 /* Read label, padding. */
1260 label->label = pool_alloc (subpool, padded_len + 1);
1261 read_bytes (r, label->label, padded_len - 1);
1262 label->label[label_len] = 0;
1265 /* Now, read the type 4 record that has the list of variables
1266 to which the value labels are to be applied. */
1268 /* Read record type of type 4 record. */
1269 if (read_int (r) != 4)
1270 sys_error (r, _("Variable index record (type 4) does not immediately "
1271 "follow value label record (type 3) as it should."));
1273 /* Read number of variables associated with value label from type 4
1275 var_cnt = read_int (r);
1276 if (var_cnt < 1 || var_cnt > dict_get_var_cnt (dict))
1277 sys_error (r, _("Number of variables associated with a value label (%d) "
1278 "is not between 1 and the number of variables (%zu)."),
1279 var_cnt, dict_get_var_cnt (dict));
1281 /* Read the list of variables. */
1282 var = pool_nalloc (subpool, var_cnt, sizeof *var);
1284 for (i = 0; i < var_cnt; i++)
1286 var[i] = lookup_var_by_value_idx (r, var_by_value_idx, read_int (r));
1287 if (var_get_width (var[i]) > 8)
1288 sys_error (r, _("Value labels may not be added to long string "
1289 "variables (e.g. %s) using records types 3 and 4."),
1290 var_get_name (var[i]));
1291 max_width = MAX (max_width, var_get_width (var[i]));
1294 /* Type check the variables. */
1295 for (i = 1; i < var_cnt; i++)
1296 if (var_get_type (var[i]) != var_get_type (var[0]))
1297 sys_error (r, _("Variables associated with value label are not all of "
1298 "identical type. Variable %s is %s, but variable "
1300 var_get_name (var[0]),
1301 var_is_numeric (var[0]) ? _("numeric") : _("string"),
1302 var_get_name (var[i]),
1303 var_is_numeric (var[i]) ? _("numeric") : _("string"));
1305 /* Fill in labels[].value, now that we know the desired type. */
1306 for (i = 0; i < label_cnt; i++)
1308 struct label *label = labels + i;
1310 value_init_pool (subpool, &label->value, max_width);
1311 if (var_is_alpha (var[0]))
1312 u8_buf_copy_rpad (value_str_rw (&label->value, max_width), max_width,
1313 label->raw_value, sizeof label->raw_value, ' ');
1315 label->value.f = float_get_double (r->float_format, label->raw_value);
1318 /* Assign the `value_label's to each variable. */
1319 for (i = 0; i < var_cnt; i++)
1321 struct variable *v = var[i];
1324 /* Add each label to the variable. */
1325 for (j = 0; j < label_cnt; j++)
1327 struct label *label = &labels[j];
1328 if (!var_add_value_label (v, &label->value, label->label))
1330 if (var_is_numeric (var[0]))
1331 sys_warn (r, _("Duplicate value label for %g on %s."),
1332 label->value.f, var_get_name (v));
1334 sys_warn (r, _("Duplicate value label for \"%.*s\" on %s."),
1335 max_width, value_str (&label->value, max_width),
1341 pool_destroy (subpool);
1344 /* Reads a set of custom attributes from TEXT into ATTRS.
1345 ATTRS may be a null pointer, in which case the attributes are
1346 read but discarded. */
1348 read_attributes (struct sfm_reader *r, struct text_record *text,
1349 struct attrset *attrs)
1353 struct attribute *attr;
1357 /* Parse the key. */
1358 key = text_get_token (text, ss_cstr ("("));
1362 attr = attribute_create (key);
1363 for (index = 1; ; index++)
1365 /* Parse the value. */
1369 value = text_get_token (text, ss_cstr ("\n"));
1372 text_warn (r, text, _("Error parsing attribute value %s[%d]"),
1377 length = strlen (value);
1378 if (length >= 2 && value[0] == '\'' && value[length - 1] == '\'')
1380 value[length - 1] = '\0';
1381 attribute_add_value (attr, value + 1);
1386 _("Attribute value %s[%d] is not quoted: %s"),
1388 attribute_add_value (attr, value);
1391 /* Was this the last value for this attribute? */
1392 if (text_match (text, ')'))
1396 attrset_add (attrs, attr);
1398 attribute_destroy (attr);
1400 while (!text_match (text, '/'));
1403 /* Reads record type 7, subtype 17, which lists custom
1404 attributes on the data file. */
1406 read_data_file_attributes (struct sfm_reader *r,
1407 size_t size, size_t count,
1408 struct dictionary *dict)
1410 struct text_record *text = open_text_record (r, size * count);
1411 read_attributes (r, text, dict_get_attributes (dict));
1412 close_text_record (r, text);
1416 skip_long_string_value_labels (struct sfm_reader *r, size_t n_labels)
1420 for (i = 0; i < n_labels; i++)
1422 size_t value_length, label_length;
1424 value_length = read_int (r);
1425 skip_bytes (r, value_length);
1426 label_length = read_int (r);
1427 skip_bytes (r, label_length);
1432 read_long_string_value_labels (struct sfm_reader *r,
1433 size_t size, size_t count,
1434 struct dictionary *d)
1436 const off_t start = ftello (r->file);
1437 while (ftello (r->file) - start < size * count)
1439 char var_name[VAR_NAME_LEN + 1];
1447 var_name_len = read_int (r);
1448 if (var_name_len > VAR_NAME_LEN)
1449 sys_error (r, _("Variable name length in long string value label "
1450 "record (%d) exceeds %d-byte limit."),
1451 var_name_len, VAR_NAME_LEN);
1452 read_string (r, var_name, var_name_len + 1);
1453 width = read_int (r);
1454 n_labels = read_int (r);
1456 v = dict_lookup_var (d, var_name);
1459 sys_warn (r, _("Ignoring long string value record for "
1460 "unknown variable %s."), var_name);
1461 skip_long_string_value_labels (r, n_labels);
1464 if (var_is_numeric (v))
1466 sys_warn (r, _("Ignoring long string value record for "
1467 "numeric variable %s."), var_name);
1468 skip_long_string_value_labels (r, n_labels);
1471 if (width != var_get_width (v))
1473 sys_warn (r, _("Ignoring long string value record for variable %s "
1474 "because the record's width (%d) does not match the "
1475 "variable's width (%d)"),
1476 var_name, width, var_get_width (v));
1477 skip_long_string_value_labels (r, n_labels);
1482 value_init_pool (r->pool, &value, width);
1483 for (i = 0; i < n_labels; i++)
1485 size_t value_length, label_length;
1490 value_length = read_int (r);
1491 if (value_length == width)
1492 read_bytes (r, value_str_rw (&value, width), width);
1495 sys_warn (r, _("Ignoring long string value %zu for variable %s, "
1496 "with width %d, that has bad value width %zu."),
1497 i, var_get_name (v), width, value_length);
1498 skip_bytes (r, value_length);
1503 label_length = read_int (r);
1504 read_string (r, label, MIN (sizeof label, label_length + 1));
1505 if (label_length >= sizeof label)
1507 /* Skip and silently ignore label text after the
1508 first 255 bytes. The maximum documented length
1509 of a label is 120 bytes so this is more than
1511 skip_bytes (r, sizeof label - (label_length + 1));
1514 if (!skip && !var_add_value_label (v, &value, label))
1515 sys_warn (r, _("Duplicate value label for \"%.*s\" on %s."),
1516 width, value_str (&value, width), var_get_name (v));
1522 /* Reads record type 7, subtype 18, which lists custom
1523 attributes on individual variables. */
1525 read_variable_attributes (struct sfm_reader *r,
1526 size_t size, size_t count,
1527 struct dictionary *dict)
1529 struct text_record *text = open_text_record (r, size * count);
1532 struct variable *var;
1533 if (!text_read_short_name (r, dict, text, ss_cstr (":"), &var))
1535 read_attributes (r, text, var != NULL ? var_get_attributes (var) : NULL);
1537 close_text_record (r, text);
1543 static void partial_record (struct sfm_reader *r)
1546 static void read_error (struct casereader *, const struct sfm_reader *);
1548 static bool read_case_number (struct sfm_reader *, double *);
1549 static bool read_case_string (struct sfm_reader *, uint8_t *, size_t);
1550 static int read_opcode (struct sfm_reader *);
1551 static bool read_compressed_number (struct sfm_reader *, double *);
1552 static bool read_compressed_string (struct sfm_reader *, uint8_t *);
1553 static bool read_whole_strings (struct sfm_reader *, uint8_t *, size_t);
1554 static bool skip_whole_strings (struct sfm_reader *, size_t);
1556 /* Reads and returns one case from READER's file. Returns a null
1557 pointer if not successful. */
1558 static struct ccase *
1559 sys_file_casereader_read (struct casereader *reader, void *r_)
1561 struct sfm_reader *r = r_;
1562 struct ccase *volatile c;
1568 c = case_create (r->proto);
1569 if (setjmp (r->bail_out))
1571 casereader_force_error (reader);
1576 for (i = 0; i < r->sfm_var_cnt; i++)
1578 struct sfm_var *sv = &r->sfm_vars[i];
1579 union value *v = case_data_rw_idx (c, sv->case_index);
1581 if (sv->var_width == 0)
1583 if (!read_case_number (r, &v->f))
1588 uint8_t *s = value_str_rw (v, sv->var_width);
1589 if (!read_case_string (r, s + sv->offset, sv->segment_width))
1591 if (!skip_whole_strings (r, ROUND_DOWN (sv->padding, 8)))
1601 if (r->case_cnt != -1)
1602 read_error (reader, r);
1606 /* Issues an error that R ends in a partial record. */
1608 partial_record (struct sfm_reader *r)
1610 sys_error (r, _("File ends in partial case."));
1613 /* Issues an error that an unspecified error occurred SFM, and
1616 read_error (struct casereader *r, const struct sfm_reader *sfm)
1618 msg (ME, _("Error reading case from file %s."), fh_get_name (sfm->fh));
1619 casereader_force_error (r);
1622 /* Reads a number from R and stores its value in *D.
1623 If R is compressed, reads a compressed number;
1624 otherwise, reads a number in the regular way.
1625 Returns true if successful, false if end of file is
1626 reached immediately. */
1628 read_case_number (struct sfm_reader *r, double *d)
1633 if (!try_read_bytes (r, number, sizeof number))
1635 float_convert (r->float_format, number, FLOAT_NATIVE_DOUBLE, d);
1639 return read_compressed_number (r, d);
1642 /* Reads LENGTH string bytes from R into S.
1643 Always reads a multiple of 8 bytes; if LENGTH is not a
1644 multiple of 8, then extra bytes are read and discarded without
1646 Reads compressed strings if S is compressed.
1647 Returns true if successful, false if end of file is
1648 reached immediately. */
1650 read_case_string (struct sfm_reader *r, uint8_t *s, size_t length)
1652 size_t whole = ROUND_DOWN (length, 8);
1653 size_t partial = length % 8;
1657 if (!read_whole_strings (r, s, whole))
1664 if (!read_whole_strings (r, bounce, sizeof bounce))
1670 memcpy (s + whole, bounce, partial);
1676 /* Reads and returns the next compression opcode from R. */
1678 read_opcode (struct sfm_reader *r)
1680 assert (r->compressed);
1684 if (r->opcode_idx >= sizeof r->opcodes)
1686 if (!try_read_bytes (r, r->opcodes, sizeof r->opcodes))
1690 opcode = r->opcodes[r->opcode_idx++];
1697 /* Reads a compressed number from R and stores its value in D.
1698 Returns true if successful, false if end of file is
1699 reached immediately. */
1701 read_compressed_number (struct sfm_reader *r, double *d)
1703 int opcode = read_opcode (r);
1711 *d = read_float (r);
1715 float_convert (r->float_format, " ", FLOAT_NATIVE_DOUBLE, d);
1716 if (!r->corruption_warning)
1718 r->corruption_warning = true;
1719 sys_warn (r, _("Possible compressed data corruption: "
1720 "compressed spaces appear in numeric field."));
1729 *d = opcode - r->bias;
1736 /* Reads a compressed 8-byte string segment from R and stores it
1738 Returns true if successful, false if end of file is
1739 reached immediately. */
1741 read_compressed_string (struct sfm_reader *r, uint8_t *dst)
1743 int opcode = read_opcode (r);
1751 read_bytes (r, dst, 8);
1755 memset (dst, ' ', 8);
1760 double value = opcode - r->bias;
1761 float_convert (FLOAT_NATIVE_DOUBLE, &value, r->float_format, dst);
1764 /* This has actually been seen "in the wild". The submitter of the
1765 file that showed that the contents decoded as spaces, but they
1766 were at the end of the field so it's possible that the null
1767 bytes just acted as null terminators. */
1769 else if (!r->corruption_warning)
1771 r->corruption_warning = true;
1772 sys_warn (r, _("Possible compressed data corruption: "
1773 "string contains compressed integer (opcode %d)"),
1783 /* Reads LENGTH string bytes from R into S.
1784 LENGTH must be a multiple of 8.
1785 Reads compressed strings if S is compressed.
1786 Returns true if successful, false if end of file is
1787 reached immediately. */
1789 read_whole_strings (struct sfm_reader *r, uint8_t *s, size_t length)
1791 assert (length % 8 == 0);
1793 return try_read_bytes (r, s, length);
1797 for (ofs = 0; ofs < length; ofs += 8)
1798 if (!read_compressed_string (r, s + ofs))
1808 /* Skips LENGTH string bytes from R.
1809 LENGTH must be a multiple of 8.
1810 (LENGTH is also limited to 1024, but that's only because the
1811 current caller never needs more than that many bytes.)
1812 Returns true if successful, false if end of file is
1813 reached immediately. */
1815 skip_whole_strings (struct sfm_reader *r, size_t length)
1817 uint8_t buffer[1024];
1818 assert (length < sizeof buffer);
1819 return read_whole_strings (r, buffer, length);
1822 /* Creates and returns a table that can be used for translating a value
1823 index into a case to a "struct variable *" for DICT. Multiple
1824 system file fields reference variables this way.
1826 This table must be created before processing the very long
1827 string extension record, because that record causes some
1828 values to be deleted from the case and the dictionary to be
1830 static struct variable **
1831 make_var_by_value_idx (struct sfm_reader *r, struct dictionary *dict)
1833 struct variable **var_by_value_idx;
1837 var_by_value_idx = pool_nmalloc (r->pool,
1838 r->oct_cnt, sizeof *var_by_value_idx);
1839 for (i = 0; i < dict_get_var_cnt (dict); i++)
1841 struct variable *v = dict_get_var (dict, i);
1842 int nv = var_is_numeric (v) ? 1 : DIV_RND_UP (var_get_width (v), 8);
1845 var_by_value_idx[value_idx++] = v;
1846 for (j = 1; j < nv; j++)
1847 var_by_value_idx[value_idx++] = NULL;
1849 assert (value_idx == r->oct_cnt);
1851 return var_by_value_idx;
1854 /* Returns the "struct variable" corresponding to the given
1855 1-basd VALUE_IDX in VAR_BY_VALUE_IDX. Verifies that the index
1857 static struct variable *
1858 lookup_var_by_value_idx (struct sfm_reader *r,
1859 struct variable **var_by_value_idx, int value_idx)
1861 struct variable *var;
1863 if (value_idx < 1 || value_idx > r->oct_cnt)
1864 sys_error (r, _("Variable index %d not in valid range 1...%d."),
1865 value_idx, r->oct_cnt);
1867 var = var_by_value_idx[value_idx - 1];
1869 sys_error (r, _("Variable index %d refers to long string "
1876 /* Returns the variable in D with the given SHORT_NAME,
1877 or a null pointer if there is none. */
1878 static struct variable *
1879 lookup_var_by_short_name (struct dictionary *d, const char *short_name)
1881 struct variable *var;
1885 /* First try looking up by full name. This often succeeds. */
1886 var = dict_lookup_var (d, short_name);
1887 if (var != NULL && !strcasecmp (var_get_short_name (var, 0), short_name))
1890 /* Iterate through the whole dictionary as a fallback. */
1891 var_cnt = dict_get_var_cnt (d);
1892 for (i = 0; i < var_cnt; i++)
1894 var = dict_get_var (d, i);
1895 if (!strcasecmp (var_get_short_name (var, 0), short_name))
1902 /* Helpers for reading records that contain structured text
1905 /* Maximum number of warnings to issue for a single text
1907 #define MAX_TEXT_WARNINGS 5
1912 struct substring buffer; /* Record contents. */
1913 size_t pos; /* Current position in buffer. */
1914 int n_warnings; /* Number of warnings issued or suppressed. */
1917 /* Reads SIZE bytes into a text record for R,
1918 and returns the new text record. */
1919 static struct text_record *
1920 open_text_record (struct sfm_reader *r, size_t size)
1922 struct text_record *text = pool_alloc (r->pool, sizeof *text);
1923 char *buffer = pool_malloc (r->pool, size + 1);
1924 read_bytes (r, buffer, size);
1925 text->buffer = ss_buffer (buffer, size);
1927 text->n_warnings = 0;
1931 /* Closes TEXT, frees its storage, and issues a final warning
1932 about suppressed warnings if necesary. */
1934 close_text_record (struct sfm_reader *r, struct text_record *text)
1936 if (text->n_warnings > MAX_TEXT_WARNINGS)
1937 sys_warn (r, _("Suppressed %d additional related warnings."),
1938 text->n_warnings - MAX_TEXT_WARNINGS);
1939 pool_free (r->pool, ss_data (text->buffer));
1942 /* Reads a variable=value pair from TEXT.
1943 Looks up the variable in DICT and stores it into *VAR.
1944 Stores a null-terminated value into *VALUE. */
1946 read_variable_to_value_pair (struct sfm_reader *r, struct dictionary *dict,
1947 struct text_record *text,
1948 struct variable **var, char **value)
1952 if (!text_read_short_name (r, dict, text, ss_cstr ("="), var))
1955 *value = text_get_token (text, ss_buffer ("\t\0", 2));
1959 text->pos += ss_span (ss_substr (text->buffer, text->pos, SIZE_MAX),
1960 ss_buffer ("\t\0", 2));
1968 text_read_short_name (struct sfm_reader *r, struct dictionary *dict,
1969 struct text_record *text, struct substring delimiters,
1970 struct variable **var)
1972 char *short_name = text_get_token (text, delimiters);
1973 if (short_name == NULL)
1976 *var = lookup_var_by_short_name (dict, short_name);
1978 text_warn (r, text, _("Variable map refers to unknown variable %s."),
1983 /* Displays a warning for the current file position, limiting the
1984 number to MAX_TEXT_WARNINGS for TEXT. */
1986 text_warn (struct sfm_reader *r, struct text_record *text,
1987 const char *format, ...)
1989 if (text->n_warnings++ < MAX_TEXT_WARNINGS)
1993 va_start (args, format);
1994 sys_msg (r, MW, format, args);
2000 text_get_token (struct text_record *text, struct substring delimiters)
2002 struct substring token;
2004 if (!ss_tokenize (text->buffer, delimiters, &text->pos, &token))
2006 ss_data (token)[ss_length (token)] = '\0';
2007 return ss_data (token);
2011 text_match (struct text_record *text, char c)
2013 if (text->buffer.string[text->pos] == c)
2024 /* Displays a corruption message. */
2026 sys_msg (struct sfm_reader *r, int class, const char *format, va_list args)
2031 ds_init_empty (&text);
2032 ds_put_format (&text, "\"%s\" near offset 0x%lx: ",
2033 fh_get_file_name (r->fh), (unsigned long) ftell (r->file));
2034 ds_put_vformat (&text, format, args);
2036 m.category = msg_class_to_category (class);
2037 m.severity = msg_class_to_severity (class);
2038 m.where.file_name = NULL;
2039 m.where.line_number = 0;
2040 m.text = ds_cstr (&text);
2045 /* Displays a warning for the current file position. */
2047 sys_warn (struct sfm_reader *r, const char *format, ...)
2051 va_start (args, format);
2052 sys_msg (r, MW, format, args);
2056 /* Displays an error for the current file position,
2057 marks it as in an error state,
2058 and aborts reading it using longjmp. */
2060 sys_error (struct sfm_reader *r, const char *format, ...)
2064 va_start (args, format);
2065 sys_msg (r, ME, format, args);
2069 longjmp (r->bail_out, 1);
2072 /* Reads BYTE_CNT bytes into BUF.
2073 Returns true if exactly BYTE_CNT bytes are successfully read.
2074 Aborts if an I/O error or a partial read occurs.
2075 If EOF_IS_OK, then an immediate end-of-file causes false to be
2076 returned; otherwise, immediate end-of-file causes an abort
2079 read_bytes_internal (struct sfm_reader *r, bool eof_is_ok,
2080 void *buf, size_t byte_cnt)
2082 size_t bytes_read = fread (buf, 1, byte_cnt, r->file);
2083 if (bytes_read == byte_cnt)
2085 else if (ferror (r->file))
2086 sys_error (r, _("System error: %s."), strerror (errno));
2087 else if (!eof_is_ok || bytes_read != 0)
2088 sys_error (r, _("Unexpected end of file."));
2093 /* Reads BYTE_CNT into BUF.
2094 Aborts upon I/O error or if end-of-file is encountered. */
2096 read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
2098 read_bytes_internal (r, false, buf, byte_cnt);
2101 /* Reads BYTE_CNT bytes into BUF.
2102 Returns true if exactly BYTE_CNT bytes are successfully read.
2103 Returns false if an immediate end-of-file is encountered.
2104 Aborts if an I/O error or a partial read occurs. */
2106 try_read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
2108 return read_bytes_internal (r, true, buf, byte_cnt);
2111 /* Reads a 32-bit signed integer from R and returns its value in
2114 read_int (struct sfm_reader *r)
2117 read_bytes (r, integer, sizeof integer);
2118 return integer_get (r->integer_format, integer, sizeof integer);
2121 /* Reads a 64-bit floating-point number from R and returns its
2122 value in host format. */
2124 read_float (struct sfm_reader *r)
2127 read_bytes (r, number, sizeof number);
2128 return float_get_double (r->float_format, number);
2131 /* Reads exactly SIZE - 1 bytes into BUFFER
2132 and stores a null byte into BUFFER[SIZE - 1]. */
2134 read_string (struct sfm_reader *r, char *buffer, size_t size)
2137 read_bytes (r, buffer, size - 1);
2138 buffer[size - 1] = '\0';
2141 /* Skips BYTES bytes forward in R. */
2143 skip_bytes (struct sfm_reader *r, size_t bytes)
2148 size_t chunk = MIN (sizeof buffer, bytes);
2149 read_bytes (r, buffer, chunk);
2154 static const struct casereader_class sys_file_casereader_class =
2156 sys_file_casereader_read,
2157 sys_file_casereader_destroy,