1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2007, 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <data/sys-file-reader.h>
20 #include <data/sys-file-private.h>
28 #include <libpspp/assertion.h>
29 #include <libpspp/message.h>
30 #include <libpspp/compiler.h>
31 #include <libpspp/misc.h>
32 #include <libpspp/pool.h>
33 #include <libpspp/str.h>
34 #include <libpspp/hash.h>
35 #include <libpspp/array.h>
37 #include <data/attributes.h>
38 #include <data/case.h>
39 #include <data/casereader-provider.h>
40 #include <data/casereader.h>
41 #include <data/dictionary.h>
42 #include <data/file-handle-def.h>
43 #include <data/file-name.h>
44 #include <data/format.h>
45 #include <data/missing-values.h>
46 #include <data/short-names.h>
47 #include <data/value-labels.h>
48 #include <data/variable.h>
49 #include <data/value.h>
54 #include "unlocked-io.h"
59 #define _(msgid) gettext (msgid)
60 #define N_(msgid) (msgid)
62 /* System file reader. */
65 /* Resource tracking. */
66 struct pool *pool; /* All system file state. */
67 jmp_buf bail_out; /* longjmp() target for error handling. */
70 struct file_handle *fh; /* File handle. */
71 struct fh_lock *lock; /* Mutual exclusion for file handle. */
72 FILE *file; /* File stream. */
73 bool error; /* I/O or corruption error? */
74 struct caseproto *proto; /* Format of output cases. */
77 enum integer_format integer_format; /* On-disk integer format. */
78 enum float_format float_format; /* On-disk floating point format. */
79 int oct_cnt; /* Number of 8-byte units per case. */
80 struct sfm_var *sfm_vars; /* Variables. */
81 size_t sfm_var_cnt; /* Number of variables. */
82 casenumber case_cnt; /* Number of cases */
83 bool has_long_var_names; /* File has a long variable name map */
86 bool compressed; /* File is compressed? */
87 double bias; /* Compression bias, usually 100.0. */
88 uint8_t opcodes[8]; /* Current block of opcodes. */
89 size_t opcode_idx; /* Next opcode to interpret, 8 if none left. */
92 static const struct casereader_class sys_file_casereader_class;
94 static bool close_reader (struct sfm_reader *);
96 static struct variable **make_var_by_value_idx (struct sfm_reader *,
98 static struct variable *lookup_var_by_value_idx (struct sfm_reader *,
102 static void sys_msg (struct sfm_reader *r, int class,
103 const char *format, va_list args)
104 PRINTF_FORMAT (3, 0);
105 static void sys_warn (struct sfm_reader *, const char *, ...)
106 PRINTF_FORMAT (2, 3);
107 static void sys_error (struct sfm_reader *, const char *, ...)
111 static void read_bytes (struct sfm_reader *, void *, size_t);
112 static bool try_read_bytes (struct sfm_reader *, void *, size_t);
113 static int read_int (struct sfm_reader *);
114 static double read_float (struct sfm_reader *);
115 static void read_string (struct sfm_reader *, char *, size_t);
116 static void skip_bytes (struct sfm_reader *, size_t);
118 static struct text_record *open_text_record (struct sfm_reader *, size_t size);
119 static void close_text_record (struct sfm_reader *r,
120 struct text_record *);
121 static bool read_variable_to_value_pair (struct sfm_reader *,
123 struct text_record *,
124 struct variable **var, char **value);
125 static void text_warn (struct sfm_reader *r, struct text_record *text,
126 const char *format, ...)
127 PRINTF_FORMAT (3, 4);
128 static char *text_get_token (struct text_record *,
129 struct substring delimiters);
130 static bool text_match (struct text_record *, char c);
131 static bool text_read_short_name (struct sfm_reader *, struct dictionary *,
132 struct text_record *,
133 struct substring delimiters,
136 static bool close_reader (struct sfm_reader *r);
138 /* Dictionary reader. */
146 static void read_header (struct sfm_reader *, struct dictionary *,
147 int *weight_idx, int *claimed_oct_cnt,
148 struct sfm_read_info *);
149 static void read_variable_record (struct sfm_reader *, struct dictionary *,
150 int *format_warning_cnt);
151 static void parse_format_spec (struct sfm_reader *, unsigned int,
152 enum which_format, struct variable *,
153 int *format_warning_cnt);
154 static void setup_weight (struct sfm_reader *, int weight_idx,
155 struct variable **var_by_value_idx,
156 struct dictionary *);
157 static void read_documents (struct sfm_reader *, struct dictionary *);
158 static void read_value_labels (struct sfm_reader *, struct dictionary *,
159 struct variable **var_by_value_idx);
161 static void read_extension_record (struct sfm_reader *, struct dictionary *,
162 struct sfm_read_info *);
163 static void read_machine_integer_info (struct sfm_reader *,
164 size_t size, size_t count,
165 struct sfm_read_info *,
168 static void read_machine_float_info (struct sfm_reader *,
169 size_t size, size_t count);
170 static void read_display_parameters (struct sfm_reader *,
171 size_t size, size_t count,
172 struct dictionary *);
173 static void read_long_var_name_map (struct sfm_reader *,
174 size_t size, size_t count,
175 struct dictionary *);
176 static void read_long_string_map (struct sfm_reader *,
177 size_t size, size_t count,
178 struct dictionary *);
179 static void read_data_file_attributes (struct sfm_reader *,
180 size_t size, size_t count,
181 struct dictionary *);
182 static void read_variable_attributes (struct sfm_reader *,
183 size_t size, size_t count,
184 struct dictionary *);
185 static void read_long_string_value_labels (struct sfm_reader *,
186 size_t size, size_t count,
187 struct dictionary *);
189 /* Opens the system file designated by file handle FH for
190 reading. Reads the system file's dictionary into *DICT.
191 If INFO is non-null, then it receives additional info about the
194 sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
195 struct sfm_read_info *volatile info)
197 struct sfm_reader *volatile r = NULL;
198 struct variable **var_by_value_idx;
199 struct sfm_read_info local_info;
200 int format_warning_cnt = 0;
205 *dict = dict_create ();
207 /* Create and initialize reader. */
208 r = pool_create_container (struct sfm_reader, pool);
214 r->has_long_var_names = false;
215 r->opcode_idx = sizeof r->opcodes;
217 /* TRANSLATORS: this fragment will be interpolated into
218 messages in fh_lock() that identify types of files. */
219 r->lock = fh_lock (fh, FH_REF_FILE, N_("system file"), FH_ACC_READ, false);
223 r->file = fn_open (fh_get_file_name (fh), "rb");
226 msg (ME, _("Error opening \"%s\" for reading as a system file: %s."),
227 fh_get_file_name (r->fh), strerror (errno));
231 /* Initialize info. */
234 memset (info, 0, sizeof *info);
236 if (setjmp (r->bail_out))
241 read_header (r, *dict, &weight_idx, &claimed_oct_cnt, info);
243 /* Read all the variable definition records. */
244 rec_type = read_int (r);
245 while (rec_type == 2)
247 read_variable_record (r, *dict, &format_warning_cnt);
248 rec_type = read_int (r);
251 /* Figure out the case format. */
252 var_by_value_idx = make_var_by_value_idx (r, *dict);
253 setup_weight (r, weight_idx, var_by_value_idx, *dict);
255 /* Read all the rest of the dictionary records. */
256 while (rec_type != 999)
261 read_value_labels (r, *dict, var_by_value_idx);
265 sys_error (r, _("Misplaced type 4 record."));
268 read_documents (r, *dict);
272 read_extension_record (r, *dict, info);
276 sys_error (r, _("Unrecognized record type %d."), rec_type);
278 rec_type = read_int (r);
282 if ( ! r->has_long_var_names )
285 for (i = 0; i < dict_get_var_cnt (*dict); i++)
287 struct variable *var = dict_get_var (*dict, i);
288 char short_name[SHORT_NAME_LEN + 1];
289 char long_name[SHORT_NAME_LEN + 1];
291 strcpy (short_name, var_get_name (var));
293 strcpy (long_name, short_name);
294 str_lowercase (long_name);
296 /* Set long name. Renaming a variable may clear the short
297 name, but we want to retain it, so re-set it
299 dict_rename_var (*dict, var, long_name);
300 var_set_short_name (var, 0, short_name);
303 r->has_long_var_names = true;
306 /* Read record 999 data, which is just filler. */
309 /* Warn if the actual amount of data per case differs from the
310 amount that the header claims. SPSS version 13 gets this
311 wrong when very long strings are involved, so don't warn in
313 if (claimed_oct_cnt != -1 && claimed_oct_cnt != r->oct_cnt
314 && info->version_major != 13)
315 sys_warn (r, _("File header claims %d variable positions but "
316 "%d were read from file."),
317 claimed_oct_cnt, r->oct_cnt);
319 /* Create an index of dictionary variable widths for
320 sfm_read_case to use. We cannot use the `struct variable's
321 from the dictionary we created, because the caller owns the
322 dictionary and may destroy or modify its variables. */
323 sfm_dictionary_to_sfm_vars (*dict, &r->sfm_vars, &r->sfm_var_cnt);
324 pool_register (r->pool, free, r->sfm_vars);
325 r->proto = caseproto_ref_pool (dict_get_proto (*dict), r->pool);
327 pool_free (r->pool, var_by_value_idx);
328 return casereader_create_sequential
330 r->case_cnt == -1 ? CASENUMBER_MAX: r->case_cnt,
331 &sys_file_casereader_class, r);
335 dict_destroy (*dict);
340 /* Closes a system file after we're done with it.
341 Returns true if an I/O error has occurred on READER, false
344 close_reader (struct sfm_reader *r)
353 if (fn_close (fh_get_file_name (r->fh), r->file) == EOF)
355 msg (ME, _("Error closing system file \"%s\": %s."),
356 fh_get_file_name (r->fh), strerror (errno));
366 pool_destroy (r->pool);
371 /* Destroys READER. */
373 sys_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
375 struct sfm_reader *r = r_;
379 /* Returns true if FILE is an SPSS system file,
382 sfm_detect (FILE *file)
386 if (fread (rec_type, 4, 1, file) != 1)
390 return !strcmp ("$FL2", rec_type);
393 /* Reads the global header of the system file.
394 Sets DICT's file label to the system file's label.
395 Sets *WEIGHT_IDX to 0 if the system file is unweighted,
396 or to the value index of the weight variable otherwise.
397 Sets *CLAIMED_OCT_CNT to the number of "octs" (8-byte units)
398 per case that the file claims to have (although it is not
400 Initializes INFO with header information. */
402 read_header (struct sfm_reader *r, struct dictionary *dict,
403 int *weight_idx, int *claimed_oct_cnt,
404 struct sfm_read_info *info)
407 char eye_catcher[61];
408 uint8_t raw_layout_code[4];
410 char creation_date[10];
411 char creation_time[9];
413 struct substring file_label_ss;
414 struct substring product;
416 read_string (r, rec_type, sizeof rec_type);
417 read_string (r, eye_catcher, sizeof eye_catcher);
419 if (strcmp ("$FL2", rec_type) != 0)
420 sys_error (r, _("This is not an SPSS system file."));
422 /* Identify integer format. */
423 read_bytes (r, raw_layout_code, sizeof raw_layout_code);
424 if ((!integer_identify (2, raw_layout_code, sizeof raw_layout_code,
426 && !integer_identify (3, raw_layout_code, sizeof raw_layout_code,
428 || (r->integer_format != INTEGER_MSB_FIRST
429 && r->integer_format != INTEGER_LSB_FIRST))
430 sys_error (r, _("This is not an SPSS system file."));
432 *claimed_oct_cnt = read_int (r);
433 if (*claimed_oct_cnt < 0 || *claimed_oct_cnt > INT_MAX / 16)
434 *claimed_oct_cnt = -1;
436 r->compressed = read_int (r) != 0;
438 *weight_idx = read_int (r);
440 r->case_cnt = read_int (r);
441 if ( r->case_cnt > INT_MAX / 2)
445 /* Identify floating-point format and obtain compression bias. */
446 read_bytes (r, raw_bias, sizeof raw_bias);
447 if (float_identify (100.0, raw_bias, sizeof raw_bias, &r->float_format) == 0)
449 sys_warn (r, _("Compression bias is not the usual "
450 "value of 100, or system file uses unrecognized "
451 "floating-point format."));
452 if (r->integer_format == INTEGER_MSB_FIRST)
453 r->float_format = FLOAT_IEEE_DOUBLE_BE;
455 r->float_format = FLOAT_IEEE_DOUBLE_LE;
457 float_convert (r->float_format, raw_bias, FLOAT_NATIVE_DOUBLE, &r->bias);
459 read_string (r, creation_date, sizeof creation_date);
460 read_string (r, creation_time, sizeof creation_time);
461 read_string (r, file_label, sizeof file_label);
464 file_label_ss = ss_cstr (file_label);
465 ss_trim (&file_label_ss, ss_cstr (" "));
466 if (!ss_is_empty (file_label_ss))
468 ss_data (file_label_ss)[ss_length (file_label_ss)] = '\0';
469 dict_set_label (dict, ss_data (file_label_ss));
472 strcpy (info->creation_date, creation_date);
473 strcpy (info->creation_time, creation_time);
474 info->integer_format = r->integer_format;
475 info->float_format = r->float_format;
476 info->compressed = r->compressed;
477 info->case_cnt = r->case_cnt;
479 product = ss_cstr (eye_catcher);
480 ss_match_string (&product, ss_cstr ("@(#) SPSS DATA FILE"));
481 ss_trim (&product, ss_cstr (" "));
482 str_copy_buf_trunc (info->product, sizeof info->product,
483 ss_data (product), ss_length (product));
486 /* Reads a variable (type 2) record from R and adds the
487 corresponding variable to DICT.
488 Also skips past additional variable records for long string
491 read_variable_record (struct sfm_reader *r, struct dictionary *dict,
492 int *format_warning_cnt)
495 int has_variable_label;
496 int missing_value_code;
501 struct variable *var;
504 width = read_int (r);
505 has_variable_label = read_int (r);
506 missing_value_code = read_int (r);
507 print_format = read_int (r);
508 write_format = read_int (r);
509 read_string (r, name, sizeof name);
510 name[strcspn (name, " ")] = '\0';
512 /* Check variable name. */
513 if (name[0] == '$' || name[0] == '#')
514 sys_error (r, "Variable name begins with invalid character `%c'.",
516 if (!var_is_plausible_name (name, false))
517 sys_error (r, _("Invalid variable name `%s'."), name);
519 /* Create variable. */
520 if (width < 0 || width > 255)
521 sys_error (r, _("Bad variable width %d."), width);
522 var = dict_create_var (dict, name, width);
525 _("Duplicate variable name `%s' within system file."),
528 /* Set the short name the same as the long name. */
529 var_set_short_name (var, 0, var_get_name (var));
531 /* Get variable label, if any. */
532 if (has_variable_label != 0 && has_variable_label != 1)
533 sys_error (r, _("Variable label indicator field is not 0 or 1."));
534 if (has_variable_label == 1)
540 if (len >= sizeof label)
541 sys_error (r, _("Variable %s has label of invalid length %zu."),
543 read_string (r, label, len + 1);
544 var_set_label (var, label);
546 skip_bytes (r, ROUND_UP (len, 4) - len);
549 /* Set missing values. */
550 if (missing_value_code != 0)
552 struct missing_values mv;
555 mv_init_pool (r->pool, &mv, var_get_width (var));
556 if (var_is_numeric (var))
558 if (missing_value_code < -3 || missing_value_code > 3
559 || missing_value_code == -1)
560 sys_error (r, _("Numeric missing value indicator field is not "
561 "-3, -2, 0, 1, 2, or 3."));
562 if (missing_value_code < 0)
564 double low = read_float (r);
565 double high = read_float (r);
566 mv_add_range (&mv, low, high);
567 missing_value_code = -missing_value_code - 2;
569 for (i = 0; i < missing_value_code; i++)
570 mv_add_num (&mv, read_float (r));
574 int mv_width = MAX (width, 8);
577 if (missing_value_code < 1 || missing_value_code > 3)
578 sys_error (r, _("String missing value indicator field is not "
581 value_init (&value, mv_width);
582 value_set_missing (&value, mv_width);
583 for (i = 0; i < missing_value_code; i++)
585 char *s = value_str_rw (&value, mv_width);
586 read_bytes (r, s, 8);
589 value_destroy (&value, mv_width);
591 var_set_missing_values (var, &mv);
595 parse_format_spec (r, print_format, PRINT_FORMAT, var, format_warning_cnt);
596 parse_format_spec (r, write_format, WRITE_FORMAT, var, format_warning_cnt);
598 /* Account for values.
599 Skip long string continuation records, if any. */
600 nv = width == 0 ? 1 : DIV_RND_UP (width, 8);
606 for (i = 1; i < nv; i++)
608 /* Check for record type 2 and width -1. */
609 if (read_int (r) != 2 || read_int (r) != -1)
610 sys_error (r, _("Missing string continuation record."));
612 /* Skip and ignore remaining continuation data. */
613 has_variable_label = read_int (r);
614 missing_value_code = read_int (r);
615 print_format = read_int (r);
616 write_format = read_int (r);
617 read_string (r, name, sizeof name);
619 /* Variable label fields on continuation records have
620 been spotted in system files created by "SPSS Power
621 Macintosh Release 6.1". */
622 if (has_variable_label)
623 skip_bytes (r, ROUND_UP (read_int (r), 4));
628 /* Translates the format spec from sysfile format to internal
631 parse_format_spec (struct sfm_reader *r, unsigned int s,
632 enum which_format which, struct variable *v,
633 int *format_warning_cnt)
635 const int max_format_warnings = 8;
637 uint8_t raw_type = s >> 16;
643 if (!fmt_from_io (raw_type, &f.type))
644 sys_error (r, _("Unknown variable format %"PRIu8"."), raw_type);
649 ok = fmt_check_output (&f) && fmt_check_width_compat (&f, var_get_width (v));
654 if (which == PRINT_FORMAT)
655 var_set_print_format (v, &f);
657 var_set_write_format (v, &f);
659 else if (*++format_warning_cnt <= max_format_warnings)
661 char fmt_string[FMT_STRING_LEN_MAX + 1];
662 sys_warn (r, _("%s variable %s has invalid %s format %s."),
663 var_is_numeric (v) ? _("Numeric") : _("String"),
665 which == PRINT_FORMAT ? _("print") : _("write"),
666 fmt_to_string (&f, fmt_string));
668 if (*format_warning_cnt == max_format_warnings)
669 sys_warn (r, _("Suppressing further invalid format warnings."));
673 /* Sets the weighting variable in DICT to the variable
674 corresponding to the given 1-based VALUE_IDX, if VALUE_IDX is
677 setup_weight (struct sfm_reader *r, int weight_idx,
678 struct variable **var_by_value_idx, struct dictionary *dict)
682 struct variable *weight_var
683 = lookup_var_by_value_idx (r, var_by_value_idx, weight_idx);
684 if (var_is_numeric (weight_var))
685 dict_set_weight (dict, weight_var);
687 sys_error (r, _("Weighting variable must be numeric."));
691 /* Reads a document record, type 6, from system file R, and sets up
692 the documents and n_documents fields in the associated
695 read_documents (struct sfm_reader *r, struct dictionary *dict)
700 if (dict_get_documents (dict) != NULL)
701 sys_error (r, _("Multiple type 6 (document) records."));
703 line_cnt = read_int (r);
705 sys_error (r, _("Number of document lines (%d) "
706 "must be greater than 0."), line_cnt);
708 documents = pool_nmalloc (r->pool, line_cnt + 1, DOC_LINE_LENGTH);
709 read_string (r, documents, DOC_LINE_LENGTH * line_cnt + 1);
710 if (strlen (documents) == DOC_LINE_LENGTH * line_cnt)
711 dict_set_documents (dict, documents);
713 sys_error (r, _("Document line contains null byte."));
714 pool_free (r->pool, documents);
717 /* Read a type 7 extension record. */
719 read_extension_record (struct sfm_reader *r, struct dictionary *dict,
720 struct sfm_read_info *info)
722 int subtype = read_int (r);
723 size_t size = read_int (r);
724 size_t count = read_int (r);
725 size_t bytes = size * count;
727 /* Check that SIZE * COUNT + 1 doesn't overflow. Adding 1
728 allows an extra byte for a null terminator, used by some
729 extension processing routines. */
730 if (size != 0 && size_overflow_p (xsum (1, xtimes (count, size))))
731 sys_error (r, "Record type 7 subtype %d too large.", subtype);
736 read_machine_integer_info (r, size, count, info, dict);
740 read_machine_float_info (r, size, count);
744 /* Variable sets information. We don't use these yet.
745 They only apply to GUIs; see VARSETS on the APPLY
746 DICTIONARY command in SPSS documentation. */
750 /* DATE variable information. We don't use it yet, but we
755 /* Used by the MRSETS command. */
759 /* Used by the SPSS Data Entry software. */
763 read_display_parameters (r, size, count, dict);
767 read_long_var_name_map (r, size, count, dict);
771 read_long_string_map (r, size, count, dict);
775 /* New in SPSS v14? Unknown purpose. */
779 read_data_file_attributes (r, size, count, dict);
783 read_variable_attributes (r, size, count, dict);
787 /* New in SPSS 16. Contains a single string that describes
788 the character encoding, e.g. "windows-1252". */
790 char *encoding = pool_calloc (r->pool, size, count + 1);
791 read_string (r, encoding, count + 1);
792 dict_set_encoding (dict, encoding);
797 /* New in SPSS 16. Encodes value labels for long string
799 read_long_string_value_labels (r, size, count, dict);
803 sys_warn (r, _("Unrecognized record type 7, subtype %d. Please send a copy of this file, and the syntax which created it to %s"),
804 subtype, PACKAGE_BUGREPORT);
808 skip_bytes (r, bytes);
811 /* Read record type 7, subtype 3. */
813 read_machine_integer_info (struct sfm_reader *r, size_t size, size_t count,
814 struct sfm_read_info *info,
815 struct dictionary *dict)
817 int version_major = read_int (r);
818 int version_minor = read_int (r);
819 int version_revision = read_int (r);
820 int machine_code UNUSED = read_int (r);
821 int float_representation = read_int (r);
822 int compression_code UNUSED = read_int (r);
823 int integer_representation = read_int (r);
824 int character_code = read_int (r);
826 int expected_float_format;
827 int expected_integer_format;
829 if (size != 4 || count != 8)
830 sys_error (r, _("Bad size (%zu) or count (%zu) field on record type 7, "
834 /* Save version info. */
835 info->version_major = version_major;
836 info->version_minor = version_minor;
837 info->version_revision = version_revision;
839 /* Check floating point format. */
840 if (r->float_format == FLOAT_IEEE_DOUBLE_BE
841 || r->float_format == FLOAT_IEEE_DOUBLE_LE)
842 expected_float_format = 1;
843 else if (r->float_format == FLOAT_Z_LONG)
844 expected_float_format = 2;
845 else if (r->float_format == FLOAT_VAX_G || r->float_format == FLOAT_VAX_D)
846 expected_float_format = 3;
849 if (float_representation != expected_float_format)
850 sys_error (r, _("Floating-point representation indicated by "
851 "system file (%d) differs from expected (%d)."),
852 r->float_format, expected_float_format);
854 /* Check integer format. */
855 if (r->integer_format == INTEGER_MSB_FIRST)
856 expected_integer_format = 1;
857 else if (r->integer_format == INTEGER_LSB_FIRST)
858 expected_integer_format = 2;
861 if (integer_representation != expected_integer_format)
863 static const char *const endian[] = {N_("little-endian"), N_("big-endian")};
864 sys_warn (r, _("Integer format indicated by system file (%s) "
865 "differs from expected (%s)."),
866 gettext (endian[integer_representation == 1]),
867 gettext (endian[expected_integer_format == 1]));
872 Record 7 (20) provides a much more reliable way of
873 setting the encoding.
874 The character_code is used as a fallback only.
876 if ( NULL == dict_get_encoding (dict))
878 switch (character_code)
881 dict_set_encoding (dict, "EBCDIC-US");
885 /* These ostensibly mean "7-bit ASCII" and "8-bit ASCII"[sic]
886 respectively. However, there are known to be many files
887 in the wild with character code 2, yet have data which are
889 Therefore we ignore these values.
893 dict_set_encoding (dict, "MS_KANJI");
896 dict_set_encoding (dict, "UTF-7");
899 dict_set_encoding (dict, "UTF-8");
904 snprintf (enc, 100, "CP%d", character_code);
905 dict_set_encoding (dict, enc);
912 /* Read record type 7, subtype 4. */
914 read_machine_float_info (struct sfm_reader *r, size_t size, size_t count)
916 double sysmis = read_float (r);
917 double highest = read_float (r);
918 double lowest = read_float (r);
920 if (size != 8 || count != 3)
921 sys_error (r, _("Bad size (%zu) or count (%zu) on extension 4."),
924 if (sysmis != SYSMIS)
925 sys_warn (r, _("File specifies unexpected value %g as SYSMIS."), sysmis);
926 if (highest != HIGHEST)
927 sys_warn (r, _("File specifies unexpected value %g as HIGHEST."), highest);
928 if (lowest != LOWEST)
929 sys_warn (r, _("File specifies unexpected value %g as LOWEST."), lowest);
932 /* Read record type 7, subtype 11, which specifies how variables
933 should be displayed in GUI environments. */
935 read_display_parameters (struct sfm_reader *r, size_t size, size_t count,
936 struct dictionary *dict)
945 sys_warn (r, _("Bad size %zu on extension 11."), size);
946 skip_bytes (r, size * count);
950 n_vars = dict_get_var_cnt (dict);
951 if (count == 3 * n_vars)
952 includes_width = true;
953 else if (count == 2 * n_vars)
954 includes_width = false;
957 sys_warn (r, _("Extension 11 has bad count %zu (for %zu variables)."),
959 skip_bytes (r, size * count);
963 for (i = 0; i < n_vars; ++i)
965 struct variable *v = dict_get_var (dict, i);
966 int measure = read_int (r);
967 int width = includes_width ? read_int (r) : 0;
968 int align = read_int (r);
970 /* SPSS 14 sometimes seems to set string variables' measure
972 if (0 == measure && var_is_alpha (v))
975 if (measure < 1 || measure > 3 || align < 0 || align > 2)
978 sys_warn (r, _("Invalid variable display parameters "
979 "for variable %zu (%s). "
980 "Default parameters substituted."),
981 i, var_get_name (v));
986 var_set_measure (v, (measure == 1 ? MEASURE_NOMINAL
987 : measure == 2 ? MEASURE_ORDINAL
989 var_set_alignment (v, (align == 0 ? ALIGN_LEFT
990 : align == 1 ? ALIGN_RIGHT
993 /* Older versions (SPSS 9.0) sometimes set the display
994 width to zero. This causes confusion in the GUI, so
995 only set the width if it is nonzero. */
997 var_set_display_width (v, width);
1001 /* Reads record type 7, subtype 13, which gives the long name
1002 that corresponds to each short name. Modifies variable names
1003 in DICT accordingly. */
1005 read_long_var_name_map (struct sfm_reader *r, size_t size, size_t count,
1006 struct dictionary *dict)
1008 struct text_record *text;
1009 struct variable *var;
1012 text = open_text_record (r, size * count);
1013 while (read_variable_to_value_pair (r, dict, text, &var, &long_name))
1016 size_t short_name_cnt;
1019 /* Validate long name. */
1020 if (!var_is_valid_name (long_name, false))
1022 sys_warn (r, _("Long variable mapping from %s to invalid "
1023 "variable name `%s'."),
1024 var_get_name (var), long_name);
1028 /* Identify any duplicates. */
1029 if (strcasecmp (var_get_short_name (var, 0), long_name)
1030 && dict_lookup_var (dict, long_name) != NULL)
1032 sys_warn (r, _("Duplicate long variable name `%s' "
1033 "within system file."), long_name);
1037 /* Renaming a variable may clear its short names, but we
1038 want to retain them, so we save them and re-set them
1040 short_name_cnt = var_get_short_name_cnt (var);
1041 short_names = xnmalloc (short_name_cnt, sizeof *short_names);
1042 for (i = 0; i < short_name_cnt; i++)
1044 const char *s = var_get_short_name (var, i);
1045 short_names[i] = s != NULL ? xstrdup (s) : NULL;
1048 /* Set long name. */
1049 dict_rename_var (dict, var, long_name);
1051 /* Restore short names. */
1052 for (i = 0; i < short_name_cnt; i++)
1054 var_set_short_name (var, i, short_names[i]);
1055 free (short_names[i]);
1059 close_text_record (r, text);
1060 r->has_long_var_names = true;
1063 /* Reads record type 7, subtype 14, which gives the real length
1064 of each very long string. Rearranges DICT accordingly. */
1066 read_long_string_map (struct sfm_reader *r, size_t size, size_t count,
1067 struct dictionary *dict)
1069 struct text_record *text;
1070 struct variable *var;
1073 text = open_text_record (r, size * count);
1074 while (read_variable_to_value_pair (r, dict, text, &var, &length_s))
1076 size_t idx = var_get_dict_index (var);
1082 length = strtol (length_s, NULL, 10);
1083 if (length < 1 || length > MAX_STRING)
1085 sys_warn (r, _("%s listed as string of invalid length %s "
1086 "in very length string record."),
1087 var_get_name (var), length_s);
1091 /* Check segments. */
1092 segment_cnt = sfm_width_to_segments (length);
1093 if (segment_cnt == 1)
1095 sys_warn (r, _("%s listed in very long string record with width %s, "
1096 "which requires only one segment."),
1097 var_get_name (var), length_s);
1100 if (idx + segment_cnt > dict_get_var_cnt (dict))
1101 sys_error (r, _("Very long string %s overflows dictionary."),
1102 var_get_name (var));
1104 /* Get the short names from the segments and check their
1106 for (i = 0; i < segment_cnt; i++)
1108 struct variable *seg = dict_get_var (dict, idx + i);
1109 int alloc_width = sfm_segment_alloc_width (length, i);
1110 int width = var_get_width (seg);
1113 var_set_short_name (var, i, var_get_short_name (seg, 0));
1114 if (ROUND_UP (width, 8) != ROUND_UP (alloc_width, 8))
1115 sys_error (r, _("Very long string with width %ld has segment %d "
1116 "of width %d (expected %d)"),
1117 length, i, width, alloc_width);
1119 dict_delete_consecutive_vars (dict, idx + 1, segment_cnt - 1);
1120 var_set_width (var, length);
1122 close_text_record (r, text);
1123 dict_compact_values (dict);
1126 /* Reads value labels from sysfile H and inserts them into the
1127 associated dictionary. */
1129 read_value_labels (struct sfm_reader *r,
1130 struct dictionary *dict, struct variable **var_by_value_idx)
1132 struct pool *subpool;
1136 char raw_value[8]; /* Value as uninterpreted bytes. */
1137 union value value; /* Value. */
1138 char *label; /* Null-terminated label string. */
1141 struct label *labels = NULL;
1142 int label_cnt; /* Number of labels. */
1144 struct variable **var = NULL; /* Associated variables. */
1145 int var_cnt; /* Number of associated variables. */
1146 int max_width; /* Maximum width of string variables. */
1150 subpool = pool_create_subpool (r->pool);
1152 /* Read the type 3 record and record its contents. We can't do
1153 much with the data yet because we don't know whether it is
1154 of numeric or string type. */
1156 /* Read number of labels. */
1157 label_cnt = read_int (r);
1159 if (size_overflow_p (xtimes (label_cnt, sizeof *labels)))
1161 sys_warn (r, _("Invalid number of labels: %d. Ignoring labels."),
1166 /* Read each value/label tuple into labels[]. */
1167 labels = pool_nalloc (subpool, label_cnt, sizeof *labels);
1168 for (i = 0; i < label_cnt; i++)
1170 struct label *label = labels + i;
1171 unsigned char label_len;
1175 read_bytes (r, label->raw_value, sizeof label->raw_value);
1177 /* Read label length. */
1178 read_bytes (r, &label_len, sizeof label_len);
1179 padded_len = ROUND_UP (label_len + 1, 8);
1181 /* Read label, padding. */
1182 label->label = pool_alloc (subpool, padded_len + 1);
1183 read_bytes (r, label->label, padded_len - 1);
1184 label->label[label_len] = 0;
1187 /* Now, read the type 4 record that has the list of variables
1188 to which the value labels are to be applied. */
1190 /* Read record type of type 4 record. */
1191 if (read_int (r) != 4)
1192 sys_error (r, _("Variable index record (type 4) does not immediately "
1193 "follow value label record (type 3) as it should."));
1195 /* Read number of variables associated with value label from type 4
1197 var_cnt = read_int (r);
1198 if (var_cnt < 1 || var_cnt > dict_get_var_cnt (dict))
1199 sys_error (r, _("Number of variables associated with a value label (%d) "
1200 "is not between 1 and the number of variables (%zu)."),
1201 var_cnt, dict_get_var_cnt (dict));
1203 /* Read the list of variables. */
1204 var = pool_nalloc (subpool, var_cnt, sizeof *var);
1206 for (i = 0; i < var_cnt; i++)
1208 var[i] = lookup_var_by_value_idx (r, var_by_value_idx, read_int (r));
1209 if (var_get_width (var[i]) > 8)
1210 sys_error (r, _("Value labels may not be added to long string "
1211 "variables (e.g. %s) using records types 3 and 4."),
1212 var_get_name (var[i]));
1213 max_width = MAX (max_width, var_get_width (var[i]));
1216 /* Type check the variables. */
1217 for (i = 1; i < var_cnt; i++)
1218 if (var_get_type (var[i]) != var_get_type (var[0]))
1219 sys_error (r, _("Variables associated with value label are not all of "
1220 "identical type. Variable %s is %s, but variable "
1222 var_get_name (var[0]),
1223 var_is_numeric (var[0]) ? _("numeric") : _("string"),
1224 var_get_name (var[i]),
1225 var_is_numeric (var[i]) ? _("numeric") : _("string"));
1227 /* Fill in labels[].value, now that we know the desired type. */
1228 for (i = 0; i < label_cnt; i++)
1230 struct label *label = labels + i;
1232 value_init_pool (subpool, &label->value, max_width);
1233 if (var_is_alpha (var[0]))
1234 buf_copy_rpad (value_str_rw (&label->value, max_width), max_width,
1235 label->raw_value, sizeof label->raw_value, ' ');
1237 label->value.f = float_get_double (r->float_format, label->raw_value);
1240 /* Assign the `value_label's to each variable. */
1241 for (i = 0; i < var_cnt; i++)
1243 struct variable *v = var[i];
1246 /* Add each label to the variable. */
1247 for (j = 0; j < label_cnt; j++)
1249 struct label *label = &labels[j];
1250 if (!var_add_value_label (v, &label->value, label->label))
1252 if (var_is_numeric (var[0]))
1253 sys_warn (r, _("Duplicate value label for %g on %s."),
1254 label->value.f, var_get_name (v));
1256 sys_warn (r, _("Duplicate value label for \"%.*s\" on %s."),
1257 max_width, value_str (&label->value, max_width),
1263 pool_destroy (subpool);
1266 /* Reads a set of custom attributes from TEXT into ATTRS.
1267 ATTRS may be a null pointer, in which case the attributes are
1268 read but discarded. */
1270 read_attributes (struct sfm_reader *r, struct text_record *text,
1271 struct attrset *attrs)
1275 struct attribute *attr;
1279 /* Parse the key. */
1280 key = text_get_token (text, ss_cstr ("("));
1284 attr = attribute_create (key);
1285 for (index = 1; ; index++)
1287 /* Parse the value. */
1291 value = text_get_token (text, ss_cstr ("\n"));
1294 text_warn (r, text, _("Error parsing attribute value %s[%d]"),
1299 length = strlen (value);
1300 if (length >= 2 && value[0] == '\'' && value[length - 1] == '\'')
1302 value[length - 1] = '\0';
1303 attribute_add_value (attr, value + 1);
1308 _("Attribute value %s[%d] is not quoted: %s"),
1310 attribute_add_value (attr, value);
1313 /* Was this the last value for this attribute? */
1314 if (text_match (text, ')'))
1318 attrset_add (attrs, attr);
1320 attribute_destroy (attr);
1322 while (!text_match (text, '/'));
1325 /* Reads record type 7, subtype 17, which lists custom
1326 attributes on the data file. */
1328 read_data_file_attributes (struct sfm_reader *r,
1329 size_t size, size_t count,
1330 struct dictionary *dict)
1332 struct text_record *text = open_text_record (r, size * count);
1333 read_attributes (r, text, dict_get_attributes (dict));
1334 close_text_record (r, text);
1338 skip_long_string_value_labels (struct sfm_reader *r, size_t n_labels)
1342 for (i = 0; i < n_labels; i++)
1344 size_t value_length, label_length;
1346 value_length = read_int (r);
1347 skip_bytes (r, value_length);
1348 label_length = read_int (r);
1349 skip_bytes (r, label_length);
1354 read_long_string_value_labels (struct sfm_reader *r,
1355 size_t size, size_t count,
1356 struct dictionary *d)
1358 const off_t start = ftello (r->file);
1359 while (ftello (r->file) - start < size * count)
1361 char var_name[VAR_NAME_LEN + 1];
1369 var_name_len = read_int (r);
1370 if (var_name_len > VAR_NAME_LEN)
1371 sys_error (r, _("Variable name length in long string value label "
1372 "record (%d) exceeds %d-byte limit."),
1373 var_name_len, VAR_NAME_LEN);
1374 read_string (r, var_name, var_name_len + 1);
1375 width = read_int (r);
1376 n_labels = read_int (r);
1378 v = dict_lookup_var (d, var_name);
1381 sys_warn (r, _("Ignoring long string value record for "
1382 "unknown variable %s."), var_name);
1383 skip_long_string_value_labels (r, n_labels);
1386 if (var_is_numeric (v))
1388 sys_warn (r, _("Ignoring long string value record for "
1389 "numeric variable %s."), var_name);
1390 skip_long_string_value_labels (r, n_labels);
1393 if (width != var_get_width (v))
1395 sys_warn (r, _("Ignoring long string value record for variable %s "
1396 "because the record's width (%d) does not match the "
1397 "variable's width (%d)"),
1398 var_name, width, var_get_width (v));
1399 skip_long_string_value_labels (r, n_labels);
1404 value_init_pool (r->pool, &value, width);
1405 for (i = 0; i < n_labels; i++)
1407 size_t value_length, label_length;
1412 value_length = read_int (r);
1413 if (value_length == width)
1414 read_string (r, value_str_rw (&value, width), width + 1);
1417 sys_warn (r, _("Ignoring long string value %zu for variable %s, "
1418 "with width %d, that has bad value width %zu."),
1419 i, var_get_name (v), width, value_length);
1420 skip_bytes (r, value_length);
1425 label_length = read_int (r);
1426 read_string (r, label, MIN (sizeof label, label_length + 1));
1427 if (label_length >= sizeof label)
1429 /* Skip and silently ignore label text after the
1430 first 255 bytes. The maximum documented length
1431 of a label is 120 bytes so this is more than
1433 skip_bytes (r, sizeof label - (label_length + 1));
1436 if (!skip && !var_add_value_label (v, &value, label))
1437 sys_warn (r, _("Duplicate value label for \"%.*s\" on %s."),
1438 width, value_str (&value, width), var_get_name (v));
1444 /* Reads record type 7, subtype 18, which lists custom
1445 attributes on individual variables. */
1447 read_variable_attributes (struct sfm_reader *r,
1448 size_t size, size_t count,
1449 struct dictionary *dict)
1451 struct text_record *text = open_text_record (r, size * count);
1454 struct variable *var;
1455 if (!text_read_short_name (r, dict, text, ss_cstr (":"), &var))
1457 read_attributes (r, text, var != NULL ? var_get_attributes (var) : NULL);
1459 close_text_record (r, text);
1465 static void partial_record (struct sfm_reader *r)
1468 static void read_error (struct casereader *, const struct sfm_reader *);
1470 static bool read_case_number (struct sfm_reader *, double *);
1471 static bool read_case_string (struct sfm_reader *, char *, size_t);
1472 static int read_opcode (struct sfm_reader *);
1473 static bool read_compressed_number (struct sfm_reader *, double *);
1474 static bool read_compressed_string (struct sfm_reader *, char *);
1475 static bool read_whole_strings (struct sfm_reader *, char *, size_t);
1476 static bool skip_whole_strings (struct sfm_reader *, size_t);
1478 /* Reads and returns one case from READER's file. Returns a null
1479 pointer if not successful. */
1480 static struct ccase *
1481 sys_file_casereader_read (struct casereader *reader, void *r_)
1483 struct sfm_reader *r = r_;
1484 struct ccase *volatile c;
1490 c = case_create (r->proto);
1491 if (setjmp (r->bail_out))
1493 casereader_force_error (reader);
1498 for (i = 0; i < r->sfm_var_cnt; i++)
1500 struct sfm_var *sv = &r->sfm_vars[i];
1501 union value *v = case_data_rw_idx (c, sv->case_index);
1503 if (sv->var_width == 0)
1505 if (!read_case_number (r, &v->f))
1510 char *s = value_str_rw (v, sv->var_width);
1511 if (!read_case_string (r, s + sv->offset, sv->segment_width))
1513 if (!skip_whole_strings (r, ROUND_DOWN (sv->padding, 8)))
1523 if (r->case_cnt != -1)
1524 read_error (reader, r);
1528 /* Issues an error that R ends in a partial record. */
1530 partial_record (struct sfm_reader *r)
1532 sys_error (r, _("File ends in partial case."));
1535 /* Issues an error that an unspecified error occurred SFM, and
1538 read_error (struct casereader *r, const struct sfm_reader *sfm)
1540 msg (ME, _("Error reading case from file %s."), fh_get_name (sfm->fh));
1541 casereader_force_error (r);
1544 /* Reads a number from R and stores its value in *D.
1545 If R is compressed, reads a compressed number;
1546 otherwise, reads a number in the regular way.
1547 Returns true if successful, false if end of file is
1548 reached immediately. */
1550 read_case_number (struct sfm_reader *r, double *d)
1555 if (!try_read_bytes (r, number, sizeof number))
1557 float_convert (r->float_format, number, FLOAT_NATIVE_DOUBLE, d);
1561 return read_compressed_number (r, d);
1564 /* Reads LENGTH string bytes from R into S.
1565 Always reads a multiple of 8 bytes; if LENGTH is not a
1566 multiple of 8, then extra bytes are read and discarded without
1568 Reads compressed strings if S is compressed.
1569 Returns true if successful, false if end of file is
1570 reached immediately. */
1572 read_case_string (struct sfm_reader *r, char *s, size_t length)
1574 size_t whole = ROUND_DOWN (length, 8);
1575 size_t partial = length % 8;
1579 if (!read_whole_strings (r, s, whole))
1586 if (!read_whole_strings (r, bounce, sizeof bounce))
1592 memcpy (s + whole, bounce, partial);
1598 /* Reads and returns the next compression opcode from R. */
1600 read_opcode (struct sfm_reader *r)
1602 assert (r->compressed);
1606 if (r->opcode_idx >= sizeof r->opcodes)
1608 if (!try_read_bytes (r, r->opcodes, sizeof r->opcodes))
1612 opcode = r->opcodes[r->opcode_idx++];
1619 /* Reads a compressed number from R and stores its value in D.
1620 Returns true if successful, false if end of file is
1621 reached immediately. */
1623 read_compressed_number (struct sfm_reader *r, double *d)
1625 int opcode = read_opcode (r);
1633 *d = read_float (r);
1637 sys_error (r, _("Compressed data is corrupt."));
1644 *d = opcode - r->bias;
1651 /* Reads a compressed 8-byte string segment from R and stores it
1653 Returns true if successful, false if end of file is
1654 reached immediately. */
1656 read_compressed_string (struct sfm_reader *r, char *dst)
1658 switch (read_opcode (r))
1665 read_bytes (r, dst, 8);
1669 memset (dst, ' ', 8);
1673 sys_error (r, _("Compressed data is corrupt."));
1679 /* Reads LENGTH string bytes from R into S.
1680 LENGTH must be a multiple of 8.
1681 Reads compressed strings if S is compressed.
1682 Returns true if successful, false if end of file is
1683 reached immediately. */
1685 read_whole_strings (struct sfm_reader *r, char *s, size_t length)
1687 assert (length % 8 == 0);
1689 return try_read_bytes (r, s, length);
1693 for (ofs = 0; ofs < length; ofs += 8)
1694 if (!read_compressed_string (r, s + ofs))
1704 /* Skips LENGTH string bytes from R.
1705 LENGTH must be a multiple of 8.
1706 (LENGTH is also limited to 1024, but that's only because the
1707 current caller never needs more than that many bytes.)
1708 Returns true if successful, false if end of file is
1709 reached immediately. */
1711 skip_whole_strings (struct sfm_reader *r, size_t length)
1714 assert (length < sizeof buffer);
1715 return read_whole_strings (r, buffer, length);
1718 /* Creates and returns a table that can be used for translating a value
1719 index into a case to a "struct variable *" for DICT. Multiple
1720 system file fields reference variables this way.
1722 This table must be created before processing the very long
1723 string extension record, because that record causes some
1724 values to be deleted from the case and the dictionary to be
1726 static struct variable **
1727 make_var_by_value_idx (struct sfm_reader *r, struct dictionary *dict)
1729 struct variable **var_by_value_idx;
1733 var_by_value_idx = pool_nmalloc (r->pool,
1734 r->oct_cnt, sizeof *var_by_value_idx);
1735 for (i = 0; i < dict_get_var_cnt (dict); i++)
1737 struct variable *v = dict_get_var (dict, i);
1738 int nv = var_is_numeric (v) ? 1 : DIV_RND_UP (var_get_width (v), 8);
1741 var_by_value_idx[value_idx++] = v;
1742 for (j = 1; j < nv; j++)
1743 var_by_value_idx[value_idx++] = NULL;
1745 assert (value_idx == r->oct_cnt);
1747 return var_by_value_idx;
1750 /* Returns the "struct variable" corresponding to the given
1751 1-basd VALUE_IDX in VAR_BY_VALUE_IDX. Verifies that the index
1753 static struct variable *
1754 lookup_var_by_value_idx (struct sfm_reader *r,
1755 struct variable **var_by_value_idx, int value_idx)
1757 struct variable *var;
1759 if (value_idx < 1 || value_idx > r->oct_cnt)
1760 sys_error (r, _("Variable index %d not in valid range 1...%d."),
1761 value_idx, r->oct_cnt);
1763 var = var_by_value_idx[value_idx - 1];
1765 sys_error (r, _("Variable index %d refers to long string "
1772 /* Returns the variable in D with the given SHORT_NAME,
1773 or a null pointer if there is none. */
1774 static struct variable *
1775 lookup_var_by_short_name (struct dictionary *d, const char *short_name)
1777 struct variable *var;
1781 /* First try looking up by full name. This often succeeds. */
1782 var = dict_lookup_var (d, short_name);
1783 if (var != NULL && !strcasecmp (var_get_short_name (var, 0), short_name))
1786 /* Iterate through the whole dictionary as a fallback. */
1787 var_cnt = dict_get_var_cnt (d);
1788 for (i = 0; i < var_cnt; i++)
1790 var = dict_get_var (d, i);
1791 if (!strcasecmp (var_get_short_name (var, 0), short_name))
1798 /* Helpers for reading records that contain structured text
1801 /* Maximum number of warnings to issue for a single text
1803 #define MAX_TEXT_WARNINGS 5
1808 struct substring buffer; /* Record contents. */
1809 size_t pos; /* Current position in buffer. */
1810 int n_warnings; /* Number of warnings issued or suppressed. */
1813 /* Reads SIZE bytes into a text record for R,
1814 and returns the new text record. */
1815 static struct text_record *
1816 open_text_record (struct sfm_reader *r, size_t size)
1818 struct text_record *text = pool_alloc (r->pool, sizeof *text);
1819 char *buffer = pool_malloc (r->pool, size + 1);
1820 read_bytes (r, buffer, size);
1821 text->buffer = ss_buffer (buffer, size);
1823 text->n_warnings = 0;
1827 /* Closes TEXT, frees its storage, and issues a final warning
1828 about suppressed warnings if necesary. */
1830 close_text_record (struct sfm_reader *r, struct text_record *text)
1832 if (text->n_warnings > MAX_TEXT_WARNINGS)
1833 sys_warn (r, _("Suppressed %d additional related warnings."),
1834 text->n_warnings - MAX_TEXT_WARNINGS);
1835 pool_free (r->pool, ss_data (text->buffer));
1838 /* Reads a variable=value pair from TEXT.
1839 Looks up the variable in DICT and stores it into *VAR.
1840 Stores a null-terminated value into *VALUE. */
1842 read_variable_to_value_pair (struct sfm_reader *r, struct dictionary *dict,
1843 struct text_record *text,
1844 struct variable **var, char **value)
1848 if (!text_read_short_name (r, dict, text, ss_cstr ("="), var))
1851 *value = text_get_token (text, ss_buffer ("\t\0", 2));
1855 text->pos += ss_span (ss_substr (text->buffer, text->pos, SIZE_MAX),
1856 ss_buffer ("\t\0", 2));
1864 text_read_short_name (struct sfm_reader *r, struct dictionary *dict,
1865 struct text_record *text, struct substring delimiters,
1866 struct variable **var)
1868 char *short_name = text_get_token (text, delimiters);
1869 if (short_name == NULL)
1872 *var = lookup_var_by_short_name (dict, short_name);
1874 text_warn (r, text, _("Variable map refers to unknown variable %s."),
1879 /* Displays a warning for the current file position, limiting the
1880 number to MAX_TEXT_WARNINGS for TEXT. */
1882 text_warn (struct sfm_reader *r, struct text_record *text,
1883 const char *format, ...)
1885 if (text->n_warnings++ < MAX_TEXT_WARNINGS)
1889 va_start (args, format);
1890 sys_msg (r, MW, format, args);
1896 text_get_token (struct text_record *text, struct substring delimiters)
1898 struct substring token;
1900 if (!ss_tokenize (text->buffer, delimiters, &text->pos, &token))
1902 ss_data (token)[ss_length (token)] = '\0';
1903 return ss_data (token);
1907 text_match (struct text_record *text, char c)
1909 if (text->buffer.string[text->pos] == c)
1920 /* Displays a corruption message. */
1922 sys_msg (struct sfm_reader *r, int class, const char *format, va_list args)
1927 ds_init_empty (&text);
1928 ds_put_format (&text, "\"%s\" near offset 0x%lx: ",
1929 fh_get_file_name (r->fh), (unsigned long) ftell (r->file));
1930 ds_put_vformat (&text, format, args);
1932 m.category = msg_class_to_category (class);
1933 m.severity = msg_class_to_severity (class);
1934 m.where.file_name = NULL;
1935 m.where.line_number = 0;
1936 m.text = ds_cstr (&text);
1941 /* Displays a warning for the current file position. */
1943 sys_warn (struct sfm_reader *r, const char *format, ...)
1947 va_start (args, format);
1948 sys_msg (r, MW, format, args);
1952 /* Displays an error for the current file position,
1953 marks it as in an error state,
1954 and aborts reading it using longjmp. */
1956 sys_error (struct sfm_reader *r, const char *format, ...)
1960 va_start (args, format);
1961 sys_msg (r, ME, format, args);
1965 longjmp (r->bail_out, 1);
1968 /* Reads BYTE_CNT bytes into BUF.
1969 Returns true if exactly BYTE_CNT bytes are successfully read.
1970 Aborts if an I/O error or a partial read occurs.
1971 If EOF_IS_OK, then an immediate end-of-file causes false to be
1972 returned; otherwise, immediate end-of-file causes an abort
1975 read_bytes_internal (struct sfm_reader *r, bool eof_is_ok,
1976 void *buf, size_t byte_cnt)
1978 size_t bytes_read = fread (buf, 1, byte_cnt, r->file);
1979 if (bytes_read == byte_cnt)
1981 else if (ferror (r->file))
1982 sys_error (r, _("System error: %s."), strerror (errno));
1983 else if (!eof_is_ok || bytes_read != 0)
1984 sys_error (r, _("Unexpected end of file."));
1989 /* Reads BYTE_CNT into BUF.
1990 Aborts upon I/O error or if end-of-file is encountered. */
1992 read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
1994 read_bytes_internal (r, false, buf, byte_cnt);
1997 /* Reads BYTE_CNT bytes into BUF.
1998 Returns true if exactly BYTE_CNT bytes are successfully read.
1999 Returns false if an immediate end-of-file is encountered.
2000 Aborts if an I/O error or a partial read occurs. */
2002 try_read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
2004 return read_bytes_internal (r, true, buf, byte_cnt);
2007 /* Reads a 32-bit signed integer from R and returns its value in
2010 read_int (struct sfm_reader *r)
2013 read_bytes (r, integer, sizeof integer);
2014 return integer_get (r->integer_format, integer, sizeof integer);
2017 /* Reads a 64-bit floating-point number from R and returns its
2018 value in host format. */
2020 read_float (struct sfm_reader *r)
2023 read_bytes (r, number, sizeof number);
2024 return float_get_double (r->float_format, number);
2027 /* Reads exactly SIZE - 1 bytes into BUFFER
2028 and stores a null byte into BUFFER[SIZE - 1]. */
2030 read_string (struct sfm_reader *r, char *buffer, size_t size)
2033 read_bytes (r, buffer, size - 1);
2034 buffer[size - 1] = '\0';
2037 /* Skips BYTES bytes forward in R. */
2039 skip_bytes (struct sfm_reader *r, size_t bytes)
2044 size_t chunk = MIN (sizeof buffer, bytes);
2045 read_bytes (r, buffer, chunk);
2050 static const struct casereader_class sys_file_casereader_class =
2052 sys_file_casereader_read,
2053 sys_file_casereader_destroy,