1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2007, 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <data/sys-file-reader.h>
20 #include <data/sys-file-private.h>
28 #include <libpspp/i18n.h>
29 #include <libpspp/assertion.h>
30 #include <libpspp/message.h>
31 #include <libpspp/compiler.h>
32 #include <libpspp/misc.h>
33 #include <libpspp/pool.h>
34 #include <libpspp/str.h>
35 #include <libpspp/hash.h>
36 #include <libpspp/array.h>
38 #include <data/attributes.h>
39 #include <data/case.h>
40 #include <data/casereader-provider.h>
41 #include <data/casereader.h>
42 #include <data/dictionary.h>
43 #include <data/file-handle-def.h>
44 #include <data/file-name.h>
45 #include <data/format.h>
46 #include <data/missing-values.h>
47 #include <data/short-names.h>
48 #include <data/value-labels.h>
49 #include <data/variable.h>
50 #include <data/value.h>
55 #include "unlocked-io.h"
60 #define _(msgid) gettext (msgid)
61 #define N_(msgid) (msgid)
63 /* System file reader. */
66 /* Resource tracking. */
67 struct pool *pool; /* All system file state. */
68 jmp_buf bail_out; /* longjmp() target for error handling. */
71 struct file_handle *fh; /* File handle. */
72 struct fh_lock *lock; /* Mutual exclusion for file handle. */
73 FILE *file; /* File stream. */
74 bool error; /* I/O or corruption error? */
75 struct caseproto *proto; /* Format of output cases. */
78 enum integer_format integer_format; /* On-disk integer format. */
79 enum float_format float_format; /* On-disk floating point format. */
80 int oct_cnt; /* Number of 8-byte units per case. */
81 struct sfm_var *sfm_vars; /* Variables. */
82 size_t sfm_var_cnt; /* Number of variables. */
83 casenumber case_cnt; /* Number of cases */
84 bool has_long_var_names; /* File has a long variable name map */
87 bool compressed; /* File is compressed? */
88 double bias; /* Compression bias, usually 100.0. */
89 uint8_t opcodes[8]; /* Current block of opcodes. */
90 size_t opcode_idx; /* Next opcode to interpret, 8 if none left. */
93 static const struct casereader_class sys_file_casereader_class;
95 static bool close_reader (struct sfm_reader *);
97 static struct variable **make_var_by_value_idx (struct sfm_reader *,
99 static struct variable *lookup_var_by_value_idx (struct sfm_reader *,
103 static void sys_msg (struct sfm_reader *r, int class,
104 const char *format, va_list args)
105 PRINTF_FORMAT (3, 0);
106 static void sys_warn (struct sfm_reader *, const char *, ...)
107 PRINTF_FORMAT (2, 3);
108 static void sys_error (struct sfm_reader *, const char *, ...)
112 static void read_bytes (struct sfm_reader *, void *, size_t);
113 static bool try_read_bytes (struct sfm_reader *, void *, size_t);
114 static int read_int (struct sfm_reader *);
115 static double read_float (struct sfm_reader *);
116 static void read_string (struct sfm_reader *, char *, size_t);
117 static void skip_bytes (struct sfm_reader *, size_t);
119 static struct text_record *open_text_record (struct sfm_reader *, size_t size);
120 static void close_text_record (struct sfm_reader *r,
121 struct text_record *);
122 static bool read_variable_to_value_pair (struct sfm_reader *,
124 struct text_record *,
125 struct variable **var, char **value);
126 static void text_warn (struct sfm_reader *r, struct text_record *text,
127 const char *format, ...)
128 PRINTF_FORMAT (3, 4);
129 static char *text_get_token (struct text_record *,
130 struct substring delimiters);
131 static bool text_match (struct text_record *, char c);
132 static bool text_read_short_name (struct sfm_reader *, struct dictionary *,
133 struct text_record *,
134 struct substring delimiters,
137 static bool close_reader (struct sfm_reader *r);
139 /* Dictionary reader. */
147 static void read_header (struct sfm_reader *, struct dictionary *,
148 int *weight_idx, int *claimed_oct_cnt,
149 struct sfm_read_info *);
150 static void read_variable_record (struct sfm_reader *, struct dictionary *,
151 int *format_warning_cnt);
152 static void parse_format_spec (struct sfm_reader *, unsigned int,
153 enum which_format, struct variable *,
154 int *format_warning_cnt);
155 static void setup_weight (struct sfm_reader *, int weight_idx,
156 struct variable **var_by_value_idx,
157 struct dictionary *);
158 static void read_documents (struct sfm_reader *, struct dictionary *);
159 static void read_value_labels (struct sfm_reader *, struct dictionary *,
160 struct variable **var_by_value_idx);
162 static void read_extension_record (struct sfm_reader *, struct dictionary *,
163 struct sfm_read_info *);
164 static void read_machine_integer_info (struct sfm_reader *,
165 size_t size, size_t count,
166 struct sfm_read_info *,
169 static void read_machine_float_info (struct sfm_reader *,
170 size_t size, size_t count);
171 static void read_display_parameters (struct sfm_reader *,
172 size_t size, size_t count,
173 struct dictionary *);
174 static void read_long_var_name_map (struct sfm_reader *,
175 size_t size, size_t count,
176 struct dictionary *);
177 static void read_long_string_map (struct sfm_reader *,
178 size_t size, size_t count,
179 struct dictionary *);
180 static void read_data_file_attributes (struct sfm_reader *,
181 size_t size, size_t count,
182 struct dictionary *);
183 static void read_variable_attributes (struct sfm_reader *,
184 size_t size, size_t count,
185 struct dictionary *);
186 static void read_long_string_value_labels (struct sfm_reader *,
187 size_t size, size_t count,
188 struct dictionary *);
190 /* Convert all the strings in DICT from the dict encoding to UTF8 */
192 recode_strings (struct dictionary *dict)
196 const char *enc = dict_get_encoding (dict);
199 enc = get_default_encoding ();
201 for (i = 0 ; i < dict_get_var_cnt (dict); ++i)
203 /* Convert the long variable name */
204 struct variable *var = dict_get_var (dict, i);
205 char *utf8_name = recode_string (UTF8, enc, var_get_name (var), -1);
206 dict_rename_var (dict, var, utf8_name);
209 /* Convert the variable label */
210 if (var_has_label (var))
212 char *utf8_label = recode_string (UTF8, enc, var_get_label (var), -1);
213 var_set_label (var, utf8_label);
217 if (var_has_value_labels (var))
219 const struct val_lab *vl = NULL;
220 const struct val_labs *vlabs = var_get_value_labels (var);
222 for (vl = val_labs_first (vlabs); vl != NULL; vl = val_labs_next (vlabs, vl))
224 const union value *val = val_lab_get_value (vl);
225 const char *label = val_lab_get_label (vl);
226 char *new_label = NULL;
228 new_label = recode_string (UTF8, enc, label, -1);
230 var_replace_value_label (var, val, new_label);
237 /* Opens the system file designated by file handle FH for
238 reading. Reads the system file's dictionary into *DICT.
239 If INFO is non-null, then it receives additional info about the
242 sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
243 struct sfm_read_info *volatile info)
245 struct sfm_reader *volatile r = NULL;
246 struct variable **var_by_value_idx;
247 struct sfm_read_info local_info;
248 int format_warning_cnt = 0;
253 *dict = dict_create ();
255 /* Create and initialize reader. */
256 r = pool_create_container (struct sfm_reader, pool);
262 r->has_long_var_names = false;
263 r->opcode_idx = sizeof r->opcodes;
265 /* TRANSLATORS: this fragment will be interpolated into
266 messages in fh_lock() that identify types of files. */
267 r->lock = fh_lock (fh, FH_REF_FILE, N_("system file"), FH_ACC_READ, false);
271 r->file = fn_open (fh_get_file_name (fh), "rb");
274 msg (ME, _("Error opening \"%s\" for reading as a system file: %s."),
275 fh_get_file_name (r->fh), strerror (errno));
279 /* Initialize info. */
282 memset (info, 0, sizeof *info);
284 if (setjmp (r->bail_out))
289 read_header (r, *dict, &weight_idx, &claimed_oct_cnt, info);
291 /* Read all the variable definition records. */
292 rec_type = read_int (r);
293 while (rec_type == 2)
295 read_variable_record (r, *dict, &format_warning_cnt);
296 rec_type = read_int (r);
299 /* Figure out the case format. */
300 var_by_value_idx = make_var_by_value_idx (r, *dict);
301 setup_weight (r, weight_idx, var_by_value_idx, *dict);
303 /* Read all the rest of the dictionary records. */
304 while (rec_type != 999)
309 read_value_labels (r, *dict, var_by_value_idx);
313 sys_error (r, _("Misplaced type 4 record."));
316 read_documents (r, *dict);
320 read_extension_record (r, *dict, info);
324 sys_error (r, _("Unrecognized record type %d."), rec_type);
326 rec_type = read_int (r);
330 if ( ! r->has_long_var_names )
333 for (i = 0; i < dict_get_var_cnt (*dict); i++)
335 struct variable *var = dict_get_var (*dict, i);
336 char short_name[SHORT_NAME_LEN + 1];
337 char long_name[SHORT_NAME_LEN + 1];
339 strcpy (short_name, var_get_name (var));
341 strcpy (long_name, short_name);
342 str_lowercase (long_name);
344 /* Set long name. Renaming a variable may clear the short
345 name, but we want to retain it, so re-set it
347 dict_rename_var (*dict, var, long_name);
348 var_set_short_name (var, 0, short_name);
351 r->has_long_var_names = true;
354 recode_strings (*dict);
356 /* Read record 999 data, which is just filler. */
359 /* Warn if the actual amount of data per case differs from the
360 amount that the header claims. SPSS version 13 gets this
361 wrong when very long strings are involved, so don't warn in
363 if (claimed_oct_cnt != -1 && claimed_oct_cnt != r->oct_cnt
364 && info->version_major != 13)
365 sys_warn (r, _("File header claims %d variable positions but "
366 "%d were read from file."),
367 claimed_oct_cnt, r->oct_cnt);
369 /* Create an index of dictionary variable widths for
370 sfm_read_case to use. We cannot use the `struct variable's
371 from the dictionary we created, because the caller owns the
372 dictionary and may destroy or modify its variables. */
373 sfm_dictionary_to_sfm_vars (*dict, &r->sfm_vars, &r->sfm_var_cnt);
374 pool_register (r->pool, free, r->sfm_vars);
375 r->proto = caseproto_ref_pool (dict_get_proto (*dict), r->pool);
377 pool_free (r->pool, var_by_value_idx);
378 return casereader_create_sequential
380 r->case_cnt == -1 ? CASENUMBER_MAX: r->case_cnt,
381 &sys_file_casereader_class, r);
385 dict_destroy (*dict);
390 /* Closes a system file after we're done with it.
391 Returns true if an I/O error has occurred on READER, false
394 close_reader (struct sfm_reader *r)
403 if (fn_close (fh_get_file_name (r->fh), r->file) == EOF)
405 msg (ME, _("Error closing system file \"%s\": %s."),
406 fh_get_file_name (r->fh), strerror (errno));
416 pool_destroy (r->pool);
421 /* Destroys READER. */
423 sys_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
425 struct sfm_reader *r = r_;
429 /* Returns true if FILE is an SPSS system file,
432 sfm_detect (FILE *file)
436 if (fread (rec_type, 4, 1, file) != 1)
440 return !strcmp ("$FL2", rec_type);
443 /* Reads the global header of the system file.
444 Sets DICT's file label to the system file's label.
445 Sets *WEIGHT_IDX to 0 if the system file is unweighted,
446 or to the value index of the weight variable otherwise.
447 Sets *CLAIMED_OCT_CNT to the number of "octs" (8-byte units)
448 per case that the file claims to have (although it is not
450 Initializes INFO with header information. */
452 read_header (struct sfm_reader *r, struct dictionary *dict,
453 int *weight_idx, int *claimed_oct_cnt,
454 struct sfm_read_info *info)
457 char eye_catcher[61];
458 uint8_t raw_layout_code[4];
460 char creation_date[10];
461 char creation_time[9];
463 struct substring file_label_ss;
464 struct substring product;
466 read_string (r, rec_type, sizeof rec_type);
467 read_string (r, eye_catcher, sizeof eye_catcher);
469 if (strcmp ("$FL2", rec_type) != 0)
470 sys_error (r, _("This is not an SPSS system file."));
472 /* Identify integer format. */
473 read_bytes (r, raw_layout_code, sizeof raw_layout_code);
474 if ((!integer_identify (2, raw_layout_code, sizeof raw_layout_code,
476 && !integer_identify (3, raw_layout_code, sizeof raw_layout_code,
478 || (r->integer_format != INTEGER_MSB_FIRST
479 && r->integer_format != INTEGER_LSB_FIRST))
480 sys_error (r, _("This is not an SPSS system file."));
482 *claimed_oct_cnt = read_int (r);
483 if (*claimed_oct_cnt < 0 || *claimed_oct_cnt > INT_MAX / 16)
484 *claimed_oct_cnt = -1;
486 r->compressed = read_int (r) != 0;
488 *weight_idx = read_int (r);
490 r->case_cnt = read_int (r);
491 if ( r->case_cnt > INT_MAX / 2)
495 /* Identify floating-point format and obtain compression bias. */
496 read_bytes (r, raw_bias, sizeof raw_bias);
497 if (float_identify (100.0, raw_bias, sizeof raw_bias, &r->float_format) == 0)
499 sys_warn (r, _("Compression bias is not the usual "
500 "value of 100, or system file uses unrecognized "
501 "floating-point format."));
502 if (r->integer_format == INTEGER_MSB_FIRST)
503 r->float_format = FLOAT_IEEE_DOUBLE_BE;
505 r->float_format = FLOAT_IEEE_DOUBLE_LE;
507 float_convert (r->float_format, raw_bias, FLOAT_NATIVE_DOUBLE, &r->bias);
509 read_string (r, creation_date, sizeof creation_date);
510 read_string (r, creation_time, sizeof creation_time);
511 read_string (r, file_label, sizeof file_label);
514 file_label_ss = ss_cstr (file_label);
515 ss_trim (&file_label_ss, ss_cstr (" "));
516 if (!ss_is_empty (file_label_ss))
518 ss_data (file_label_ss)[ss_length (file_label_ss)] = '\0';
519 dict_set_label (dict, ss_data (file_label_ss));
522 strcpy (info->creation_date, creation_date);
523 strcpy (info->creation_time, creation_time);
524 info->integer_format = r->integer_format;
525 info->float_format = r->float_format;
526 info->compressed = r->compressed;
527 info->case_cnt = r->case_cnt;
529 product = ss_cstr (eye_catcher);
530 ss_match_string (&product, ss_cstr ("@(#) SPSS DATA FILE"));
531 ss_trim (&product, ss_cstr (" "));
532 str_copy_buf_trunc (info->product, sizeof info->product,
533 ss_data (product), ss_length (product));
536 /* Reads a variable (type 2) record from R and adds the
537 corresponding variable to DICT.
538 Also skips past additional variable records for long string
541 read_variable_record (struct sfm_reader *r, struct dictionary *dict,
542 int *format_warning_cnt)
545 int has_variable_label;
546 int missing_value_code;
551 struct variable *var;
554 width = read_int (r);
555 has_variable_label = read_int (r);
556 missing_value_code = read_int (r);
557 print_format = read_int (r);
558 write_format = read_int (r);
559 read_string (r, name, sizeof name);
560 name[strcspn (name, " ")] = '\0';
562 /* Check variable name. */
563 if (name[0] == '$' || name[0] == '#')
564 sys_error (r, "Variable name begins with invalid character `%c'.",
566 if (!var_is_plausible_name (name, false))
567 sys_error (r, _("Invalid variable name `%s'."), name);
569 /* Create variable. */
570 if (width < 0 || width > 255)
571 sys_error (r, _("Bad variable width %d."), width);
572 var = dict_create_var (dict, name, width);
575 _("Duplicate variable name `%s' within system file."),
578 /* Set the short name the same as the long name. */
579 var_set_short_name (var, 0, var_get_name (var));
581 /* Get variable label, if any. */
582 if (has_variable_label != 0 && has_variable_label != 1)
583 sys_error (r, _("Variable label indicator field is not 0 or 1."));
584 if (has_variable_label == 1)
590 if (len >= sizeof label)
591 sys_error (r, _("Variable %s has label of invalid length %zu."),
593 read_string (r, label, len + 1);
594 var_set_label (var, label);
596 skip_bytes (r, ROUND_UP (len, 4) - len);
599 /* Set missing values. */
600 if (missing_value_code != 0)
602 struct missing_values mv;
605 mv_init_pool (r->pool, &mv, var_get_width (var));
606 if (var_is_numeric (var))
608 if (missing_value_code < -3 || missing_value_code > 3
609 || missing_value_code == -1)
610 sys_error (r, _("Numeric missing value indicator field is not "
611 "-3, -2, 0, 1, 2, or 3."));
612 if (missing_value_code < 0)
614 double low = read_float (r);
615 double high = read_float (r);
616 mv_add_range (&mv, low, high);
617 missing_value_code = -missing_value_code - 2;
619 for (i = 0; i < missing_value_code; i++)
620 mv_add_num (&mv, read_float (r));
624 int mv_width = MAX (width, 8);
627 if (missing_value_code < 1 || missing_value_code > 3)
628 sys_error (r, _("String missing value indicator field is not "
631 value_init (&value, mv_width);
632 value_set_missing (&value, mv_width);
633 for (i = 0; i < missing_value_code; i++)
635 char *s = value_str_rw (&value, mv_width);
636 read_bytes (r, s, 8);
639 value_destroy (&value, mv_width);
641 var_set_missing_values (var, &mv);
645 parse_format_spec (r, print_format, PRINT_FORMAT, var, format_warning_cnt);
646 parse_format_spec (r, write_format, WRITE_FORMAT, var, format_warning_cnt);
648 /* Account for values.
649 Skip long string continuation records, if any. */
650 nv = width == 0 ? 1 : DIV_RND_UP (width, 8);
656 for (i = 1; i < nv; i++)
658 /* Check for record type 2 and width -1. */
659 if (read_int (r) != 2 || read_int (r) != -1)
660 sys_error (r, _("Missing string continuation record."));
662 /* Skip and ignore remaining continuation data. */
663 has_variable_label = read_int (r);
664 missing_value_code = read_int (r);
665 print_format = read_int (r);
666 write_format = read_int (r);
667 read_string (r, name, sizeof name);
669 /* Variable label fields on continuation records have
670 been spotted in system files created by "SPSS Power
671 Macintosh Release 6.1". */
672 if (has_variable_label)
673 skip_bytes (r, ROUND_UP (read_int (r), 4));
678 /* Translates the format spec from sysfile format to internal
681 parse_format_spec (struct sfm_reader *r, unsigned int s,
682 enum which_format which, struct variable *v,
683 int *format_warning_cnt)
685 const int max_format_warnings = 8;
687 uint8_t raw_type = s >> 16;
693 if (!fmt_from_io (raw_type, &f.type))
694 sys_error (r, _("Unknown variable format %"PRIu8"."), raw_type);
699 ok = fmt_check_output (&f) && fmt_check_width_compat (&f, var_get_width (v));
704 if (which == PRINT_FORMAT)
705 var_set_print_format (v, &f);
707 var_set_write_format (v, &f);
709 else if (*++format_warning_cnt <= max_format_warnings)
711 char fmt_string[FMT_STRING_LEN_MAX + 1];
712 sys_warn (r, _("%s variable %s has invalid %s format %s."),
713 var_is_numeric (v) ? _("Numeric") : _("String"),
715 which == PRINT_FORMAT ? _("print") : _("write"),
716 fmt_to_string (&f, fmt_string));
718 if (*format_warning_cnt == max_format_warnings)
719 sys_warn (r, _("Suppressing further invalid format warnings."));
723 /* Sets the weighting variable in DICT to the variable
724 corresponding to the given 1-based VALUE_IDX, if VALUE_IDX is
727 setup_weight (struct sfm_reader *r, int weight_idx,
728 struct variable **var_by_value_idx, struct dictionary *dict)
732 struct variable *weight_var
733 = lookup_var_by_value_idx (r, var_by_value_idx, weight_idx);
734 if (var_is_numeric (weight_var))
735 dict_set_weight (dict, weight_var);
737 sys_error (r, _("Weighting variable must be numeric."));
741 /* Reads a document record, type 6, from system file R, and sets up
742 the documents and n_documents fields in the associated
745 read_documents (struct sfm_reader *r, struct dictionary *dict)
750 if (dict_get_documents (dict) != NULL)
751 sys_error (r, _("Multiple type 6 (document) records."));
753 line_cnt = read_int (r);
755 sys_error (r, _("Number of document lines (%d) "
756 "must be greater than 0."), line_cnt);
758 documents = pool_nmalloc (r->pool, line_cnt + 1, DOC_LINE_LENGTH);
759 read_string (r, documents, DOC_LINE_LENGTH * line_cnt + 1);
760 if (strlen (documents) == DOC_LINE_LENGTH * line_cnt)
761 dict_set_documents (dict, documents);
763 sys_error (r, _("Document line contains null byte."));
764 pool_free (r->pool, documents);
767 /* Read a type 7 extension record. */
769 read_extension_record (struct sfm_reader *r, struct dictionary *dict,
770 struct sfm_read_info *info)
772 int subtype = read_int (r);
773 size_t size = read_int (r);
774 size_t count = read_int (r);
775 size_t bytes = size * count;
777 /* Check that SIZE * COUNT + 1 doesn't overflow. Adding 1
778 allows an extra byte for a null terminator, used by some
779 extension processing routines. */
780 if (size != 0 && size_overflow_p (xsum (1, xtimes (count, size))))
781 sys_error (r, "Record type 7 subtype %d too large.", subtype);
786 read_machine_integer_info (r, size, count, info, dict);
790 read_machine_float_info (r, size, count);
794 /* Variable sets information. We don't use these yet.
795 They only apply to GUIs; see VARSETS on the APPLY
796 DICTIONARY command in SPSS documentation. */
800 /* DATE variable information. We don't use it yet, but we
805 /* Used by the MRSETS command. */
809 /* Used by the SPSS Data Entry software. */
813 read_display_parameters (r, size, count, dict);
817 read_long_var_name_map (r, size, count, dict);
821 read_long_string_map (r, size, count, dict);
825 /* New in SPSS v14? Unknown purpose. */
829 read_data_file_attributes (r, size, count, dict);
833 read_variable_attributes (r, size, count, dict);
837 /* New in SPSS 16. Contains a single string that describes
838 the character encoding, e.g. "windows-1252". */
840 char *encoding = pool_calloc (r->pool, size, count + 1);
841 read_string (r, encoding, count + 1);
842 dict_set_encoding (dict, encoding);
847 /* New in SPSS 16. Encodes value labels for long string
849 read_long_string_value_labels (r, size, count, dict);
853 sys_warn (r, _("Unrecognized record type 7, subtype %d. Please send a copy of this file, and the syntax which created it to %s"),
854 subtype, PACKAGE_BUGREPORT);
858 skip_bytes (r, bytes);
861 /* Read record type 7, subtype 3. */
863 read_machine_integer_info (struct sfm_reader *r, size_t size, size_t count,
864 struct sfm_read_info *info,
865 struct dictionary *dict)
867 int version_major = read_int (r);
868 int version_minor = read_int (r);
869 int version_revision = read_int (r);
870 int machine_code UNUSED = read_int (r);
871 int float_representation = read_int (r);
872 int compression_code UNUSED = read_int (r);
873 int integer_representation = read_int (r);
874 int character_code = read_int (r);
876 int expected_float_format;
877 int expected_integer_format;
879 if (size != 4 || count != 8)
880 sys_error (r, _("Bad size (%zu) or count (%zu) field on record type 7, "
884 /* Save version info. */
885 info->version_major = version_major;
886 info->version_minor = version_minor;
887 info->version_revision = version_revision;
889 /* Check floating point format. */
890 if (r->float_format == FLOAT_IEEE_DOUBLE_BE
891 || r->float_format == FLOAT_IEEE_DOUBLE_LE)
892 expected_float_format = 1;
893 else if (r->float_format == FLOAT_Z_LONG)
894 expected_float_format = 2;
895 else if (r->float_format == FLOAT_VAX_G || r->float_format == FLOAT_VAX_D)
896 expected_float_format = 3;
899 if (float_representation != expected_float_format)
900 sys_error (r, _("Floating-point representation indicated by "
901 "system file (%d) differs from expected (%d)."),
902 r->float_format, expected_float_format);
904 /* Check integer format. */
905 if (r->integer_format == INTEGER_MSB_FIRST)
906 expected_integer_format = 1;
907 else if (r->integer_format == INTEGER_LSB_FIRST)
908 expected_integer_format = 2;
911 if (integer_representation != expected_integer_format)
913 static const char *const endian[] = {N_("little-endian"), N_("big-endian")};
914 sys_warn (r, _("Integer format indicated by system file (%s) "
915 "differs from expected (%s)."),
916 gettext (endian[integer_representation == 1]),
917 gettext (endian[expected_integer_format == 1]));
922 Record 7 (20) provides a much more reliable way of
923 setting the encoding.
924 The character_code is used as a fallback only.
926 if ( NULL == dict_get_encoding (dict))
928 switch (character_code)
931 dict_set_encoding (dict, "EBCDIC-US");
935 /* These ostensibly mean "7-bit ASCII" and "8-bit ASCII"[sic]
936 respectively. However, there are known to be many files
937 in the wild with character code 2, yet have data which are
939 Therefore we ignore these values.
943 dict_set_encoding (dict, "MS_KANJI");
946 dict_set_encoding (dict, "UTF-7");
949 dict_set_encoding (dict, "UTF-8");
954 snprintf (enc, 100, "CP%d", character_code);
955 dict_set_encoding (dict, enc);
962 /* Read record type 7, subtype 4. */
964 read_machine_float_info (struct sfm_reader *r, size_t size, size_t count)
966 double sysmis = read_float (r);
967 double highest = read_float (r);
968 double lowest = read_float (r);
970 if (size != 8 || count != 3)
971 sys_error (r, _("Bad size (%zu) or count (%zu) on extension 4."),
974 if (sysmis != SYSMIS)
975 sys_warn (r, _("File specifies unexpected value %g as SYSMIS."), sysmis);
976 if (highest != HIGHEST)
977 sys_warn (r, _("File specifies unexpected value %g as HIGHEST."), highest);
978 if (lowest != LOWEST)
979 sys_warn (r, _("File specifies unexpected value %g as LOWEST."), lowest);
982 /* Read record type 7, subtype 11, which specifies how variables
983 should be displayed in GUI environments. */
985 read_display_parameters (struct sfm_reader *r, size_t size, size_t count,
986 struct dictionary *dict)
995 sys_warn (r, _("Bad size %zu on extension 11."), size);
996 skip_bytes (r, size * count);
1000 n_vars = dict_get_var_cnt (dict);
1001 if (count == 3 * n_vars)
1002 includes_width = true;
1003 else if (count == 2 * n_vars)
1004 includes_width = false;
1007 sys_warn (r, _("Extension 11 has bad count %zu (for %zu variables)."),
1009 skip_bytes (r, size * count);
1013 for (i = 0; i < n_vars; ++i)
1015 struct variable *v = dict_get_var (dict, i);
1016 int measure = read_int (r);
1017 int width = includes_width ? read_int (r) : 0;
1018 int align = read_int (r);
1020 /* SPSS 14 sometimes seems to set string variables' measure
1022 if (0 == measure && var_is_alpha (v))
1025 if (measure < 1 || measure > 3 || align < 0 || align > 2)
1028 sys_warn (r, _("Invalid variable display parameters "
1029 "for variable %zu (%s). "
1030 "Default parameters substituted."),
1031 i, var_get_name (v));
1036 var_set_measure (v, (measure == 1 ? MEASURE_NOMINAL
1037 : measure == 2 ? MEASURE_ORDINAL
1039 var_set_alignment (v, (align == 0 ? ALIGN_LEFT
1040 : align == 1 ? ALIGN_RIGHT
1043 /* Older versions (SPSS 9.0) sometimes set the display
1044 width to zero. This causes confusion in the GUI, so
1045 only set the width if it is nonzero. */
1047 var_set_display_width (v, width);
1051 /* Reads record type 7, subtype 13, which gives the long name
1052 that corresponds to each short name. Modifies variable names
1053 in DICT accordingly. */
1055 read_long_var_name_map (struct sfm_reader *r, size_t size, size_t count,
1056 struct dictionary *dict)
1058 struct text_record *text;
1059 struct variable *var;
1062 text = open_text_record (r, size * count);
1063 while (read_variable_to_value_pair (r, dict, text, &var, &long_name))
1066 size_t short_name_cnt;
1069 /* Validate long name. */
1070 if (!var_is_valid_name (long_name, false))
1072 sys_warn (r, _("Long variable mapping from %s to invalid "
1073 "variable name `%s'."),
1074 var_get_name (var), long_name);
1078 /* Identify any duplicates. */
1079 if (strcasecmp (var_get_short_name (var, 0), long_name)
1080 && dict_lookup_var (dict, long_name) != NULL)
1082 sys_warn (r, _("Duplicate long variable name `%s' "
1083 "within system file."), long_name);
1087 /* Renaming a variable may clear its short names, but we
1088 want to retain them, so we save them and re-set them
1090 short_name_cnt = var_get_short_name_cnt (var);
1091 short_names = xnmalloc (short_name_cnt, sizeof *short_names);
1092 for (i = 0; i < short_name_cnt; i++)
1094 const char *s = var_get_short_name (var, i);
1095 short_names[i] = s != NULL ? xstrdup (s) : NULL;
1098 /* Set long name. */
1099 dict_rename_var (dict, var, long_name);
1101 /* Restore short names. */
1102 for (i = 0; i < short_name_cnt; i++)
1104 var_set_short_name (var, i, short_names[i]);
1105 free (short_names[i]);
1109 close_text_record (r, text);
1110 r->has_long_var_names = true;
1113 /* Reads record type 7, subtype 14, which gives the real length
1114 of each very long string. Rearranges DICT accordingly. */
1116 read_long_string_map (struct sfm_reader *r, size_t size, size_t count,
1117 struct dictionary *dict)
1119 struct text_record *text;
1120 struct variable *var;
1123 text = open_text_record (r, size * count);
1124 while (read_variable_to_value_pair (r, dict, text, &var, &length_s))
1126 size_t idx = var_get_dict_index (var);
1132 length = strtol (length_s, NULL, 10);
1133 if (length < 1 || length > MAX_STRING)
1135 sys_warn (r, _("%s listed as string of invalid length %s "
1136 "in very length string record."),
1137 var_get_name (var), length_s);
1141 /* Check segments. */
1142 segment_cnt = sfm_width_to_segments (length);
1143 if (segment_cnt == 1)
1145 sys_warn (r, _("%s listed in very long string record with width %s, "
1146 "which requires only one segment."),
1147 var_get_name (var), length_s);
1150 if (idx + segment_cnt > dict_get_var_cnt (dict))
1151 sys_error (r, _("Very long string %s overflows dictionary."),
1152 var_get_name (var));
1154 /* Get the short names from the segments and check their
1156 for (i = 0; i < segment_cnt; i++)
1158 struct variable *seg = dict_get_var (dict, idx + i);
1159 int alloc_width = sfm_segment_alloc_width (length, i);
1160 int width = var_get_width (seg);
1163 var_set_short_name (var, i, var_get_short_name (seg, 0));
1164 if (ROUND_UP (width, 8) != ROUND_UP (alloc_width, 8))
1165 sys_error (r, _("Very long string with width %ld has segment %d "
1166 "of width %d (expected %d)"),
1167 length, i, width, alloc_width);
1169 dict_delete_consecutive_vars (dict, idx + 1, segment_cnt - 1);
1170 var_set_width (var, length);
1172 close_text_record (r, text);
1173 dict_compact_values (dict);
1176 /* Reads value labels from sysfile H and inserts them into the
1177 associated dictionary. */
1179 read_value_labels (struct sfm_reader *r,
1180 struct dictionary *dict, struct variable **var_by_value_idx)
1182 struct pool *subpool;
1186 char raw_value[8]; /* Value as uninterpreted bytes. */
1187 union value value; /* Value. */
1188 char *label; /* Null-terminated label string. */
1191 struct label *labels = NULL;
1192 int label_cnt; /* Number of labels. */
1194 struct variable **var = NULL; /* Associated variables. */
1195 int var_cnt; /* Number of associated variables. */
1196 int max_width; /* Maximum width of string variables. */
1200 subpool = pool_create_subpool (r->pool);
1202 /* Read the type 3 record and record its contents. We can't do
1203 much with the data yet because we don't know whether it is
1204 of numeric or string type. */
1206 /* Read number of labels. */
1207 label_cnt = read_int (r);
1209 if (size_overflow_p (xtimes (label_cnt, sizeof *labels)))
1211 sys_warn (r, _("Invalid number of labels: %d. Ignoring labels."),
1216 /* Read each value/label tuple into labels[]. */
1217 labels = pool_nalloc (subpool, label_cnt, sizeof *labels);
1218 for (i = 0; i < label_cnt; i++)
1220 struct label *label = labels + i;
1221 unsigned char label_len;
1225 read_bytes (r, label->raw_value, sizeof label->raw_value);
1227 /* Read label length. */
1228 read_bytes (r, &label_len, sizeof label_len);
1229 padded_len = ROUND_UP (label_len + 1, 8);
1231 /* Read label, padding. */
1232 label->label = pool_alloc (subpool, padded_len + 1);
1233 read_bytes (r, label->label, padded_len - 1);
1234 label->label[label_len] = 0;
1237 /* Now, read the type 4 record that has the list of variables
1238 to which the value labels are to be applied. */
1240 /* Read record type of type 4 record. */
1241 if (read_int (r) != 4)
1242 sys_error (r, _("Variable index record (type 4) does not immediately "
1243 "follow value label record (type 3) as it should."));
1245 /* Read number of variables associated with value label from type 4
1247 var_cnt = read_int (r);
1248 if (var_cnt < 1 || var_cnt > dict_get_var_cnt (dict))
1249 sys_error (r, _("Number of variables associated with a value label (%d) "
1250 "is not between 1 and the number of variables (%zu)."),
1251 var_cnt, dict_get_var_cnt (dict));
1253 /* Read the list of variables. */
1254 var = pool_nalloc (subpool, var_cnt, sizeof *var);
1256 for (i = 0; i < var_cnt; i++)
1258 var[i] = lookup_var_by_value_idx (r, var_by_value_idx, read_int (r));
1259 if (var_get_width (var[i]) > 8)
1260 sys_error (r, _("Value labels may not be added to long string "
1261 "variables (e.g. %s) using records types 3 and 4."),
1262 var_get_name (var[i]));
1263 max_width = MAX (max_width, var_get_width (var[i]));
1266 /* Type check the variables. */
1267 for (i = 1; i < var_cnt; i++)
1268 if (var_get_type (var[i]) != var_get_type (var[0]))
1269 sys_error (r, _("Variables associated with value label are not all of "
1270 "identical type. Variable %s is %s, but variable "
1272 var_get_name (var[0]),
1273 var_is_numeric (var[0]) ? _("numeric") : _("string"),
1274 var_get_name (var[i]),
1275 var_is_numeric (var[i]) ? _("numeric") : _("string"));
1277 /* Fill in labels[].value, now that we know the desired type. */
1278 for (i = 0; i < label_cnt; i++)
1280 struct label *label = labels + i;
1282 value_init_pool (subpool, &label->value, max_width);
1283 if (var_is_alpha (var[0]))
1284 buf_copy_rpad (value_str_rw (&label->value, max_width), max_width,
1285 label->raw_value, sizeof label->raw_value, ' ');
1287 label->value.f = float_get_double (r->float_format, label->raw_value);
1290 /* Assign the `value_label's to each variable. */
1291 for (i = 0; i < var_cnt; i++)
1293 struct variable *v = var[i];
1296 /* Add each label to the variable. */
1297 for (j = 0; j < label_cnt; j++)
1299 struct label *label = &labels[j];
1300 if (!var_add_value_label (v, &label->value, label->label))
1302 if (var_is_numeric (var[0]))
1303 sys_warn (r, _("Duplicate value label for %g on %s."),
1304 label->value.f, var_get_name (v));
1306 sys_warn (r, _("Duplicate value label for \"%.*s\" on %s."),
1307 max_width, value_str (&label->value, max_width),
1313 pool_destroy (subpool);
1316 /* Reads a set of custom attributes from TEXT into ATTRS.
1317 ATTRS may be a null pointer, in which case the attributes are
1318 read but discarded. */
1320 read_attributes (struct sfm_reader *r, struct text_record *text,
1321 struct attrset *attrs)
1325 struct attribute *attr;
1329 /* Parse the key. */
1330 key = text_get_token (text, ss_cstr ("("));
1334 attr = attribute_create (key);
1335 for (index = 1; ; index++)
1337 /* Parse the value. */
1341 value = text_get_token (text, ss_cstr ("\n"));
1344 text_warn (r, text, _("Error parsing attribute value %s[%d]"),
1349 length = strlen (value);
1350 if (length >= 2 && value[0] == '\'' && value[length - 1] == '\'')
1352 value[length - 1] = '\0';
1353 attribute_add_value (attr, value + 1);
1358 _("Attribute value %s[%d] is not quoted: %s"),
1360 attribute_add_value (attr, value);
1363 /* Was this the last value for this attribute? */
1364 if (text_match (text, ')'))
1368 attrset_add (attrs, attr);
1370 attribute_destroy (attr);
1372 while (!text_match (text, '/'));
1375 /* Reads record type 7, subtype 17, which lists custom
1376 attributes on the data file. */
1378 read_data_file_attributes (struct sfm_reader *r,
1379 size_t size, size_t count,
1380 struct dictionary *dict)
1382 struct text_record *text = open_text_record (r, size * count);
1383 read_attributes (r, text, dict_get_attributes (dict));
1384 close_text_record (r, text);
1388 skip_long_string_value_labels (struct sfm_reader *r, size_t n_labels)
1392 for (i = 0; i < n_labels; i++)
1394 size_t value_length, label_length;
1396 value_length = read_int (r);
1397 skip_bytes (r, value_length);
1398 label_length = read_int (r);
1399 skip_bytes (r, label_length);
1404 read_long_string_value_labels (struct sfm_reader *r,
1405 size_t size, size_t count,
1406 struct dictionary *d)
1408 const off_t start = ftello (r->file);
1409 while (ftello (r->file) - start < size * count)
1411 char var_name[VAR_NAME_LEN + 1];
1419 var_name_len = read_int (r);
1420 if (var_name_len > VAR_NAME_LEN)
1421 sys_error (r, _("Variable name length in long string value label "
1422 "record (%d) exceeds %d-byte limit."),
1423 var_name_len, VAR_NAME_LEN);
1424 read_string (r, var_name, var_name_len + 1);
1425 width = read_int (r);
1426 n_labels = read_int (r);
1428 v = dict_lookup_var (d, var_name);
1431 sys_warn (r, _("Ignoring long string value record for "
1432 "unknown variable %s."), var_name);
1433 skip_long_string_value_labels (r, n_labels);
1436 if (var_is_numeric (v))
1438 sys_warn (r, _("Ignoring long string value record for "
1439 "numeric variable %s."), var_name);
1440 skip_long_string_value_labels (r, n_labels);
1443 if (width != var_get_width (v))
1445 sys_warn (r, _("Ignoring long string value record for variable %s "
1446 "because the record's width (%d) does not match the "
1447 "variable's width (%d)"),
1448 var_name, width, var_get_width (v));
1449 skip_long_string_value_labels (r, n_labels);
1454 value_init_pool (r->pool, &value, width);
1455 for (i = 0; i < n_labels; i++)
1457 size_t value_length, label_length;
1462 value_length = read_int (r);
1463 if (value_length == width)
1464 read_string (r, value_str_rw (&value, width), width + 1);
1467 sys_warn (r, _("Ignoring long string value %zu for variable %s, "
1468 "with width %d, that has bad value width %zu."),
1469 i, var_get_name (v), width, value_length);
1470 skip_bytes (r, value_length);
1475 label_length = read_int (r);
1476 read_string (r, label, MIN (sizeof label, label_length + 1));
1477 if (label_length >= sizeof label)
1479 /* Skip and silently ignore label text after the
1480 first 255 bytes. The maximum documented length
1481 of a label is 120 bytes so this is more than
1483 skip_bytes (r, sizeof label - (label_length + 1));
1486 if (!skip && !var_add_value_label (v, &value, label))
1487 sys_warn (r, _("Duplicate value label for \"%.*s\" on %s."),
1488 width, value_str (&value, width), var_get_name (v));
1494 /* Reads record type 7, subtype 18, which lists custom
1495 attributes on individual variables. */
1497 read_variable_attributes (struct sfm_reader *r,
1498 size_t size, size_t count,
1499 struct dictionary *dict)
1501 struct text_record *text = open_text_record (r, size * count);
1504 struct variable *var;
1505 if (!text_read_short_name (r, dict, text, ss_cstr (":"), &var))
1507 read_attributes (r, text, var != NULL ? var_get_attributes (var) : NULL);
1509 close_text_record (r, text);
1515 static void partial_record (struct sfm_reader *r)
1518 static void read_error (struct casereader *, const struct sfm_reader *);
1520 static bool read_case_number (struct sfm_reader *, double *);
1521 static bool read_case_string (struct sfm_reader *, char *, size_t);
1522 static int read_opcode (struct sfm_reader *);
1523 static bool read_compressed_number (struct sfm_reader *, double *);
1524 static bool read_compressed_string (struct sfm_reader *, char *);
1525 static bool read_whole_strings (struct sfm_reader *, char *, size_t);
1526 static bool skip_whole_strings (struct sfm_reader *, size_t);
1528 /* Reads and returns one case from READER's file. Returns a null
1529 pointer if not successful. */
1530 static struct ccase *
1531 sys_file_casereader_read (struct casereader *reader, void *r_)
1533 struct sfm_reader *r = r_;
1534 struct ccase *volatile c;
1540 c = case_create (r->proto);
1541 if (setjmp (r->bail_out))
1543 casereader_force_error (reader);
1548 for (i = 0; i < r->sfm_var_cnt; i++)
1550 struct sfm_var *sv = &r->sfm_vars[i];
1551 union value *v = case_data_rw_idx (c, sv->case_index);
1553 if (sv->var_width == 0)
1555 if (!read_case_number (r, &v->f))
1560 char *s = value_str_rw (v, sv->var_width);
1561 if (!read_case_string (r, s + sv->offset, sv->segment_width))
1563 if (!skip_whole_strings (r, ROUND_DOWN (sv->padding, 8)))
1573 if (r->case_cnt != -1)
1574 read_error (reader, r);
1578 /* Issues an error that R ends in a partial record. */
1580 partial_record (struct sfm_reader *r)
1582 sys_error (r, _("File ends in partial case."));
1585 /* Issues an error that an unspecified error occurred SFM, and
1588 read_error (struct casereader *r, const struct sfm_reader *sfm)
1590 msg (ME, _("Error reading case from file %s."), fh_get_name (sfm->fh));
1591 casereader_force_error (r);
1594 /* Reads a number from R and stores its value in *D.
1595 If R is compressed, reads a compressed number;
1596 otherwise, reads a number in the regular way.
1597 Returns true if successful, false if end of file is
1598 reached immediately. */
1600 read_case_number (struct sfm_reader *r, double *d)
1605 if (!try_read_bytes (r, number, sizeof number))
1607 float_convert (r->float_format, number, FLOAT_NATIVE_DOUBLE, d);
1611 return read_compressed_number (r, d);
1614 /* Reads LENGTH string bytes from R into S.
1615 Always reads a multiple of 8 bytes; if LENGTH is not a
1616 multiple of 8, then extra bytes are read and discarded without
1618 Reads compressed strings if S is compressed.
1619 Returns true if successful, false if end of file is
1620 reached immediately. */
1622 read_case_string (struct sfm_reader *r, char *s, size_t length)
1624 size_t whole = ROUND_DOWN (length, 8);
1625 size_t partial = length % 8;
1629 if (!read_whole_strings (r, s, whole))
1636 if (!read_whole_strings (r, bounce, sizeof bounce))
1642 memcpy (s + whole, bounce, partial);
1648 /* Reads and returns the next compression opcode from R. */
1650 read_opcode (struct sfm_reader *r)
1652 assert (r->compressed);
1656 if (r->opcode_idx >= sizeof r->opcodes)
1658 if (!try_read_bytes (r, r->opcodes, sizeof r->opcodes))
1662 opcode = r->opcodes[r->opcode_idx++];
1669 /* Reads a compressed number from R and stores its value in D.
1670 Returns true if successful, false if end of file is
1671 reached immediately. */
1673 read_compressed_number (struct sfm_reader *r, double *d)
1675 int opcode = read_opcode (r);
1683 *d = read_float (r);
1687 sys_error (r, _("Compressed data is corrupt."));
1694 *d = opcode - r->bias;
1701 /* Reads a compressed 8-byte string segment from R and stores it
1703 Returns true if successful, false if end of file is
1704 reached immediately. */
1706 read_compressed_string (struct sfm_reader *r, char *dst)
1708 switch (read_opcode (r))
1715 read_bytes (r, dst, 8);
1719 memset (dst, ' ', 8);
1723 sys_error (r, _("Compressed data is corrupt."));
1729 /* Reads LENGTH string bytes from R into S.
1730 LENGTH must be a multiple of 8.
1731 Reads compressed strings if S is compressed.
1732 Returns true if successful, false if end of file is
1733 reached immediately. */
1735 read_whole_strings (struct sfm_reader *r, char *s, size_t length)
1737 assert (length % 8 == 0);
1739 return try_read_bytes (r, s, length);
1743 for (ofs = 0; ofs < length; ofs += 8)
1744 if (!read_compressed_string (r, s + ofs))
1754 /* Skips LENGTH string bytes from R.
1755 LENGTH must be a multiple of 8.
1756 (LENGTH is also limited to 1024, but that's only because the
1757 current caller never needs more than that many bytes.)
1758 Returns true if successful, false if end of file is
1759 reached immediately. */
1761 skip_whole_strings (struct sfm_reader *r, size_t length)
1764 assert (length < sizeof buffer);
1765 return read_whole_strings (r, buffer, length);
1768 /* Creates and returns a table that can be used for translating a value
1769 index into a case to a "struct variable *" for DICT. Multiple
1770 system file fields reference variables this way.
1772 This table must be created before processing the very long
1773 string extension record, because that record causes some
1774 values to be deleted from the case and the dictionary to be
1776 static struct variable **
1777 make_var_by_value_idx (struct sfm_reader *r, struct dictionary *dict)
1779 struct variable **var_by_value_idx;
1783 var_by_value_idx = pool_nmalloc (r->pool,
1784 r->oct_cnt, sizeof *var_by_value_idx);
1785 for (i = 0; i < dict_get_var_cnt (dict); i++)
1787 struct variable *v = dict_get_var (dict, i);
1788 int nv = var_is_numeric (v) ? 1 : DIV_RND_UP (var_get_width (v), 8);
1791 var_by_value_idx[value_idx++] = v;
1792 for (j = 1; j < nv; j++)
1793 var_by_value_idx[value_idx++] = NULL;
1795 assert (value_idx == r->oct_cnt);
1797 return var_by_value_idx;
1800 /* Returns the "struct variable" corresponding to the given
1801 1-basd VALUE_IDX in VAR_BY_VALUE_IDX. Verifies that the index
1803 static struct variable *
1804 lookup_var_by_value_idx (struct sfm_reader *r,
1805 struct variable **var_by_value_idx, int value_idx)
1807 struct variable *var;
1809 if (value_idx < 1 || value_idx > r->oct_cnt)
1810 sys_error (r, _("Variable index %d not in valid range 1...%d."),
1811 value_idx, r->oct_cnt);
1813 var = var_by_value_idx[value_idx - 1];
1815 sys_error (r, _("Variable index %d refers to long string "
1822 /* Returns the variable in D with the given SHORT_NAME,
1823 or a null pointer if there is none. */
1824 static struct variable *
1825 lookup_var_by_short_name (struct dictionary *d, const char *short_name)
1827 struct variable *var;
1831 /* First try looking up by full name. This often succeeds. */
1832 var = dict_lookup_var (d, short_name);
1833 if (var != NULL && !strcasecmp (var_get_short_name (var, 0), short_name))
1836 /* Iterate through the whole dictionary as a fallback. */
1837 var_cnt = dict_get_var_cnt (d);
1838 for (i = 0; i < var_cnt; i++)
1840 var = dict_get_var (d, i);
1841 if (!strcasecmp (var_get_short_name (var, 0), short_name))
1848 /* Helpers for reading records that contain structured text
1851 /* Maximum number of warnings to issue for a single text
1853 #define MAX_TEXT_WARNINGS 5
1858 struct substring buffer; /* Record contents. */
1859 size_t pos; /* Current position in buffer. */
1860 int n_warnings; /* Number of warnings issued or suppressed. */
1863 /* Reads SIZE bytes into a text record for R,
1864 and returns the new text record. */
1865 static struct text_record *
1866 open_text_record (struct sfm_reader *r, size_t size)
1868 struct text_record *text = pool_alloc (r->pool, sizeof *text);
1869 char *buffer = pool_malloc (r->pool, size + 1);
1870 read_bytes (r, buffer, size);
1871 text->buffer = ss_buffer (buffer, size);
1873 text->n_warnings = 0;
1877 /* Closes TEXT, frees its storage, and issues a final warning
1878 about suppressed warnings if necesary. */
1880 close_text_record (struct sfm_reader *r, struct text_record *text)
1882 if (text->n_warnings > MAX_TEXT_WARNINGS)
1883 sys_warn (r, _("Suppressed %d additional related warnings."),
1884 text->n_warnings - MAX_TEXT_WARNINGS);
1885 pool_free (r->pool, ss_data (text->buffer));
1888 /* Reads a variable=value pair from TEXT.
1889 Looks up the variable in DICT and stores it into *VAR.
1890 Stores a null-terminated value into *VALUE. */
1892 read_variable_to_value_pair (struct sfm_reader *r, struct dictionary *dict,
1893 struct text_record *text,
1894 struct variable **var, char **value)
1898 if (!text_read_short_name (r, dict, text, ss_cstr ("="), var))
1901 *value = text_get_token (text, ss_buffer ("\t\0", 2));
1905 text->pos += ss_span (ss_substr (text->buffer, text->pos, SIZE_MAX),
1906 ss_buffer ("\t\0", 2));
1914 text_read_short_name (struct sfm_reader *r, struct dictionary *dict,
1915 struct text_record *text, struct substring delimiters,
1916 struct variable **var)
1918 char *short_name = text_get_token (text, delimiters);
1919 if (short_name == NULL)
1922 *var = lookup_var_by_short_name (dict, short_name);
1924 text_warn (r, text, _("Variable map refers to unknown variable %s."),
1929 /* Displays a warning for the current file position, limiting the
1930 number to MAX_TEXT_WARNINGS for TEXT. */
1932 text_warn (struct sfm_reader *r, struct text_record *text,
1933 const char *format, ...)
1935 if (text->n_warnings++ < MAX_TEXT_WARNINGS)
1939 va_start (args, format);
1940 sys_msg (r, MW, format, args);
1946 text_get_token (struct text_record *text, struct substring delimiters)
1948 struct substring token;
1950 if (!ss_tokenize (text->buffer, delimiters, &text->pos, &token))
1952 ss_data (token)[ss_length (token)] = '\0';
1953 return ss_data (token);
1957 text_match (struct text_record *text, char c)
1959 if (text->buffer.string[text->pos] == c)
1970 /* Displays a corruption message. */
1972 sys_msg (struct sfm_reader *r, int class, const char *format, va_list args)
1977 ds_init_empty (&text);
1978 ds_put_format (&text, "\"%s\" near offset 0x%lx: ",
1979 fh_get_file_name (r->fh), (unsigned long) ftell (r->file));
1980 ds_put_vformat (&text, format, args);
1982 m.category = msg_class_to_category (class);
1983 m.severity = msg_class_to_severity (class);
1984 m.where.file_name = NULL;
1985 m.where.line_number = 0;
1986 m.text = ds_cstr (&text);
1991 /* Displays a warning for the current file position. */
1993 sys_warn (struct sfm_reader *r, const char *format, ...)
1997 va_start (args, format);
1998 sys_msg (r, MW, format, args);
2002 /* Displays an error for the current file position,
2003 marks it as in an error state,
2004 and aborts reading it using longjmp. */
2006 sys_error (struct sfm_reader *r, const char *format, ...)
2010 va_start (args, format);
2011 sys_msg (r, ME, format, args);
2015 longjmp (r->bail_out, 1);
2018 /* Reads BYTE_CNT bytes into BUF.
2019 Returns true if exactly BYTE_CNT bytes are successfully read.
2020 Aborts if an I/O error or a partial read occurs.
2021 If EOF_IS_OK, then an immediate end-of-file causes false to be
2022 returned; otherwise, immediate end-of-file causes an abort
2025 read_bytes_internal (struct sfm_reader *r, bool eof_is_ok,
2026 void *buf, size_t byte_cnt)
2028 size_t bytes_read = fread (buf, 1, byte_cnt, r->file);
2029 if (bytes_read == byte_cnt)
2031 else if (ferror (r->file))
2032 sys_error (r, _("System error: %s."), strerror (errno));
2033 else if (!eof_is_ok || bytes_read != 0)
2034 sys_error (r, _("Unexpected end of file."));
2039 /* Reads BYTE_CNT into BUF.
2040 Aborts upon I/O error or if end-of-file is encountered. */
2042 read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
2044 read_bytes_internal (r, false, buf, byte_cnt);
2047 /* Reads BYTE_CNT bytes into BUF.
2048 Returns true if exactly BYTE_CNT bytes are successfully read.
2049 Returns false if an immediate end-of-file is encountered.
2050 Aborts if an I/O error or a partial read occurs. */
2052 try_read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
2054 return read_bytes_internal (r, true, buf, byte_cnt);
2057 /* Reads a 32-bit signed integer from R and returns its value in
2060 read_int (struct sfm_reader *r)
2063 read_bytes (r, integer, sizeof integer);
2064 return integer_get (r->integer_format, integer, sizeof integer);
2067 /* Reads a 64-bit floating-point number from R and returns its
2068 value in host format. */
2070 read_float (struct sfm_reader *r)
2073 read_bytes (r, number, sizeof number);
2074 return float_get_double (r->float_format, number);
2077 /* Reads exactly SIZE - 1 bytes into BUFFER
2078 and stores a null byte into BUFFER[SIZE - 1]. */
2080 read_string (struct sfm_reader *r, char *buffer, size_t size)
2083 read_bytes (r, buffer, size - 1);
2084 buffer[size - 1] = '\0';
2087 /* Skips BYTES bytes forward in R. */
2089 skip_bytes (struct sfm_reader *r, size_t bytes)
2094 size_t chunk = MIN (sizeof buffer, bytes);
2095 read_bytes (r, buffer, chunk);
2100 static const struct casereader_class sys_file_casereader_class =
2102 sys_file_casereader_read,
2103 sys_file_casereader_destroy,