1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2007, 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <data/sys-file-reader.h>
20 #include <data/sys-file-private.h>
28 #include <libpspp/i18n.h>
29 #include <libpspp/assertion.h>
30 #include <libpspp/message.h>
31 #include <libpspp/compiler.h>
32 #include <libpspp/misc.h>
33 #include <libpspp/pool.h>
34 #include <libpspp/str.h>
35 #include <libpspp/hash.h>
36 #include <libpspp/array.h>
38 #include <data/attributes.h>
39 #include <data/case.h>
40 #include <data/casereader-provider.h>
41 #include <data/casereader.h>
42 #include <data/dictionary.h>
43 #include <data/file-handle-def.h>
44 #include <data/file-name.h>
45 #include <data/format.h>
46 #include <data/missing-values.h>
47 #include <data/short-names.h>
48 #include <data/value-labels.h>
49 #include <data/variable.h>
50 #include <data/value.h>
55 #include "unlocked-io.h"
60 #define _(msgid) gettext (msgid)
61 #define N_(msgid) (msgid)
63 /* System file reader. */
66 /* Resource tracking. */
67 struct pool *pool; /* All system file state. */
68 jmp_buf bail_out; /* longjmp() target for error handling. */
71 struct file_handle *fh; /* File handle. */
72 struct fh_lock *lock; /* Mutual exclusion for file handle. */
73 FILE *file; /* File stream. */
74 bool error; /* I/O or corruption error? */
75 struct caseproto *proto; /* Format of output cases. */
78 enum integer_format integer_format; /* On-disk integer format. */
79 enum float_format float_format; /* On-disk floating point format. */
80 int oct_cnt; /* Number of 8-byte units per case. */
81 struct sfm_var *sfm_vars; /* Variables. */
82 size_t sfm_var_cnt; /* Number of variables. */
83 casenumber case_cnt; /* Number of cases */
84 bool has_long_var_names; /* File has a long variable name map */
87 bool compressed; /* File is compressed? */
88 double bias; /* Compression bias, usually 100.0. */
89 uint8_t opcodes[8]; /* Current block of opcodes. */
90 size_t opcode_idx; /* Next opcode to interpret, 8 if none left. */
93 static const struct casereader_class sys_file_casereader_class;
95 static bool close_reader (struct sfm_reader *);
97 static struct variable **make_var_by_value_idx (struct sfm_reader *,
99 static struct variable *lookup_var_by_value_idx (struct sfm_reader *,
103 static void sys_msg (struct sfm_reader *r, int class,
104 const char *format, va_list args)
105 PRINTF_FORMAT (3, 0);
106 static void sys_warn (struct sfm_reader *, const char *, ...)
107 PRINTF_FORMAT (2, 3);
108 static void sys_error (struct sfm_reader *, const char *, ...)
112 static void read_bytes (struct sfm_reader *, void *, size_t);
113 static bool try_read_bytes (struct sfm_reader *, void *, size_t);
114 static int read_int (struct sfm_reader *);
115 static double read_float (struct sfm_reader *);
116 static void read_string (struct sfm_reader *, char *, size_t);
117 static void skip_bytes (struct sfm_reader *, size_t);
119 static struct text_record *open_text_record (struct sfm_reader *, size_t size);
120 static void close_text_record (struct sfm_reader *r,
121 struct text_record *);
122 static bool read_variable_to_value_pair (struct sfm_reader *,
124 struct text_record *,
125 struct variable **var, char **value);
126 static void text_warn (struct sfm_reader *r, struct text_record *text,
127 const char *format, ...)
128 PRINTF_FORMAT (3, 4);
129 static char *text_get_token (struct text_record *,
130 struct substring delimiters);
131 static bool text_match (struct text_record *, char c);
132 static bool text_read_short_name (struct sfm_reader *, struct dictionary *,
133 struct text_record *,
134 struct substring delimiters,
137 static bool close_reader (struct sfm_reader *r);
139 /* Dictionary reader. */
147 static void read_header (struct sfm_reader *, struct dictionary *,
148 int *weight_idx, int *claimed_oct_cnt,
149 struct sfm_read_info *);
150 static void read_variable_record (struct sfm_reader *, struct dictionary *,
151 int *format_warning_cnt);
152 static void parse_format_spec (struct sfm_reader *, unsigned int,
153 enum which_format, struct variable *,
154 int *format_warning_cnt);
155 static void setup_weight (struct sfm_reader *, int weight_idx,
156 struct variable **var_by_value_idx,
157 struct dictionary *);
158 static void read_documents (struct sfm_reader *, struct dictionary *);
159 static void read_value_labels (struct sfm_reader *, struct dictionary *,
160 struct variable **var_by_value_idx);
162 static void read_extension_record (struct sfm_reader *, struct dictionary *,
163 struct sfm_read_info *);
164 static void read_machine_integer_info (struct sfm_reader *,
165 size_t size, size_t count,
166 struct sfm_read_info *,
169 static void read_machine_float_info (struct sfm_reader *,
170 size_t size, size_t count);
171 static void read_display_parameters (struct sfm_reader *,
172 size_t size, size_t count,
173 struct dictionary *);
174 static void read_long_var_name_map (struct sfm_reader *,
175 size_t size, size_t count,
176 struct dictionary *);
177 static void read_long_string_map (struct sfm_reader *,
178 size_t size, size_t count,
179 struct dictionary *);
180 static void read_data_file_attributes (struct sfm_reader *,
181 size_t size, size_t count,
182 struct dictionary *);
183 static void read_variable_attributes (struct sfm_reader *,
184 size_t size, size_t count,
185 struct dictionary *);
186 static void read_long_string_value_labels (struct sfm_reader *,
187 size_t size, size_t count,
188 struct dictionary *);
190 /* Convert all the strings in DICT from the dict encoding to UTF8 */
192 recode_strings (struct dictionary *dict)
196 const char *enc = dict_get_encoding (dict);
200 for (i = 0 ; i < dict_get_var_cnt (dict); ++i)
202 /* Convert the long variable name */
203 struct variable *var = dict_get_var (dict, i);
204 char *utf8_name = recode_string (UTF8, enc, var_get_name (var), -1);
205 dict_rename_var (dict, var, utf8_name);
208 /* Convert the variable label */
209 if (var_has_label (var))
211 char *utf8_label = recode_string (UTF8, enc, var_get_label (var), -1);
212 var_set_label (var, utf8_label);
216 if (var_has_value_labels (var))
218 const struct val_lab *vl = NULL;
219 const struct val_labs *vlabs = var_get_value_labels (var);
221 for (vl = val_labs_first (vlabs); vl != NULL; vl = val_labs_next (vlabs, vl))
223 const union value *val = val_lab_get_value (vl);
224 const char *label = val_lab_get_label (vl);
225 char *new_label = NULL;
227 new_label = recode_string (UTF8, enc, label, -1);
229 var_replace_value_label (var, val, new_label);
236 /* Opens the system file designated by file handle FH for
237 reading. Reads the system file's dictionary into *DICT.
238 If INFO is non-null, then it receives additional info about the
241 sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
242 struct sfm_read_info *volatile info)
244 struct sfm_reader *volatile r = NULL;
245 struct variable **var_by_value_idx;
246 struct sfm_read_info local_info;
247 int format_warning_cnt = 0;
252 *dict = dict_create ();
254 /* Create and initialize reader. */
255 r = pool_create_container (struct sfm_reader, pool);
261 r->has_long_var_names = false;
262 r->opcode_idx = sizeof r->opcodes;
264 /* TRANSLATORS: this fragment will be interpolated into
265 messages in fh_lock() that identify types of files. */
266 r->lock = fh_lock (fh, FH_REF_FILE, N_("system file"), FH_ACC_READ, false);
270 r->file = fn_open (fh_get_file_name (fh), "rb");
273 msg (ME, _("Error opening \"%s\" for reading as a system file: %s."),
274 fh_get_file_name (r->fh), strerror (errno));
278 /* Initialize info. */
281 memset (info, 0, sizeof *info);
283 if (setjmp (r->bail_out))
288 read_header (r, *dict, &weight_idx, &claimed_oct_cnt, info);
290 /* Read all the variable definition records. */
291 rec_type = read_int (r);
292 while (rec_type == 2)
294 read_variable_record (r, *dict, &format_warning_cnt);
295 rec_type = read_int (r);
298 /* Figure out the case format. */
299 var_by_value_idx = make_var_by_value_idx (r, *dict);
300 setup_weight (r, weight_idx, var_by_value_idx, *dict);
302 /* Read all the rest of the dictionary records. */
303 while (rec_type != 999)
308 read_value_labels (r, *dict, var_by_value_idx);
312 sys_error (r, _("Misplaced type 4 record."));
315 read_documents (r, *dict);
319 read_extension_record (r, *dict, info);
323 sys_error (r, _("Unrecognized record type %d."), rec_type);
325 rec_type = read_int (r);
329 if ( ! r->has_long_var_names )
332 for (i = 0; i < dict_get_var_cnt (*dict); i++)
334 struct variable *var = dict_get_var (*dict, i);
335 char short_name[SHORT_NAME_LEN + 1];
336 char long_name[SHORT_NAME_LEN + 1];
338 strcpy (short_name, var_get_name (var));
340 strcpy (long_name, short_name);
341 str_lowercase (long_name);
343 /* Set long name. Renaming a variable may clear the short
344 name, but we want to retain it, so re-set it
346 dict_rename_var (*dict, var, long_name);
347 var_set_short_name (var, 0, short_name);
350 r->has_long_var_names = true;
353 recode_strings (*dict);
355 /* Read record 999 data, which is just filler. */
358 /* Warn if the actual amount of data per case differs from the
359 amount that the header claims. SPSS version 13 gets this
360 wrong when very long strings are involved, so don't warn in
362 if (claimed_oct_cnt != -1 && claimed_oct_cnt != r->oct_cnt
363 && info->version_major != 13)
364 sys_warn (r, _("File header claims %d variable positions but "
365 "%d were read from file."),
366 claimed_oct_cnt, r->oct_cnt);
368 /* Create an index of dictionary variable widths for
369 sfm_read_case to use. We cannot use the `struct variable's
370 from the dictionary we created, because the caller owns the
371 dictionary and may destroy or modify its variables. */
372 sfm_dictionary_to_sfm_vars (*dict, &r->sfm_vars, &r->sfm_var_cnt);
373 pool_register (r->pool, free, r->sfm_vars);
374 r->proto = caseproto_ref_pool (dict_get_proto (*dict), r->pool);
376 pool_free (r->pool, var_by_value_idx);
377 return casereader_create_sequential
379 r->case_cnt == -1 ? CASENUMBER_MAX: r->case_cnt,
380 &sys_file_casereader_class, r);
384 dict_destroy (*dict);
389 /* Closes a system file after we're done with it.
390 Returns true if an I/O error has occurred on READER, false
393 close_reader (struct sfm_reader *r)
402 if (fn_close (fh_get_file_name (r->fh), r->file) == EOF)
404 msg (ME, _("Error closing system file \"%s\": %s."),
405 fh_get_file_name (r->fh), strerror (errno));
415 pool_destroy (r->pool);
420 /* Destroys READER. */
422 sys_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
424 struct sfm_reader *r = r_;
428 /* Returns true if FILE is an SPSS system file,
431 sfm_detect (FILE *file)
435 if (fread (rec_type, 4, 1, file) != 1)
439 return !strcmp ("$FL2", rec_type);
442 /* Reads the global header of the system file.
443 Sets DICT's file label to the system file's label.
444 Sets *WEIGHT_IDX to 0 if the system file is unweighted,
445 or to the value index of the weight variable otherwise.
446 Sets *CLAIMED_OCT_CNT to the number of "octs" (8-byte units)
447 per case that the file claims to have (although it is not
449 Initializes INFO with header information. */
451 read_header (struct sfm_reader *r, struct dictionary *dict,
452 int *weight_idx, int *claimed_oct_cnt,
453 struct sfm_read_info *info)
456 char eye_catcher[61];
457 uint8_t raw_layout_code[4];
459 char creation_date[10];
460 char creation_time[9];
462 struct substring file_label_ss;
463 struct substring product;
465 read_string (r, rec_type, sizeof rec_type);
466 read_string (r, eye_catcher, sizeof eye_catcher);
468 if (strcmp ("$FL2", rec_type) != 0)
469 sys_error (r, _("This is not an SPSS system file."));
471 /* Identify integer format. */
472 read_bytes (r, raw_layout_code, sizeof raw_layout_code);
473 if ((!integer_identify (2, raw_layout_code, sizeof raw_layout_code,
475 && !integer_identify (3, raw_layout_code, sizeof raw_layout_code,
477 || (r->integer_format != INTEGER_MSB_FIRST
478 && r->integer_format != INTEGER_LSB_FIRST))
479 sys_error (r, _("This is not an SPSS system file."));
481 *claimed_oct_cnt = read_int (r);
482 if (*claimed_oct_cnt < 0 || *claimed_oct_cnt > INT_MAX / 16)
483 *claimed_oct_cnt = -1;
485 r->compressed = read_int (r) != 0;
487 *weight_idx = read_int (r);
489 r->case_cnt = read_int (r);
490 if ( r->case_cnt > INT_MAX / 2)
494 /* Identify floating-point format and obtain compression bias. */
495 read_bytes (r, raw_bias, sizeof raw_bias);
496 if (float_identify (100.0, raw_bias, sizeof raw_bias, &r->float_format) == 0)
498 sys_warn (r, _("Compression bias is not the usual "
499 "value of 100, or system file uses unrecognized "
500 "floating-point format."));
501 if (r->integer_format == INTEGER_MSB_FIRST)
502 r->float_format = FLOAT_IEEE_DOUBLE_BE;
504 r->float_format = FLOAT_IEEE_DOUBLE_LE;
506 float_convert (r->float_format, raw_bias, FLOAT_NATIVE_DOUBLE, &r->bias);
508 read_string (r, creation_date, sizeof creation_date);
509 read_string (r, creation_time, sizeof creation_time);
510 read_string (r, file_label, sizeof file_label);
513 file_label_ss = ss_cstr (file_label);
514 ss_trim (&file_label_ss, ss_cstr (" "));
515 if (!ss_is_empty (file_label_ss))
517 ss_data (file_label_ss)[ss_length (file_label_ss)] = '\0';
518 dict_set_label (dict, ss_data (file_label_ss));
521 strcpy (info->creation_date, creation_date);
522 strcpy (info->creation_time, creation_time);
523 info->integer_format = r->integer_format;
524 info->float_format = r->float_format;
525 info->compressed = r->compressed;
526 info->case_cnt = r->case_cnt;
528 product = ss_cstr (eye_catcher);
529 ss_match_string (&product, ss_cstr ("@(#) SPSS DATA FILE"));
530 ss_trim (&product, ss_cstr (" "));
531 str_copy_buf_trunc (info->product, sizeof info->product,
532 ss_data (product), ss_length (product));
535 /* Reads a variable (type 2) record from R and adds the
536 corresponding variable to DICT.
537 Also skips past additional variable records for long string
540 read_variable_record (struct sfm_reader *r, struct dictionary *dict,
541 int *format_warning_cnt)
544 int has_variable_label;
545 int missing_value_code;
550 struct variable *var;
553 width = read_int (r);
554 has_variable_label = read_int (r);
555 missing_value_code = read_int (r);
556 print_format = read_int (r);
557 write_format = read_int (r);
558 read_string (r, name, sizeof name);
559 name[strcspn (name, " ")] = '\0';
561 /* Check variable name. */
562 if (name[0] == '$' || name[0] == '#')
563 sys_error (r, "Variable name begins with invalid character `%c'.",
565 if (!var_is_plausible_name (name, false))
566 sys_error (r, _("Invalid variable name `%s'."), name);
568 /* Create variable. */
569 if (width < 0 || width > 255)
570 sys_error (r, _("Bad variable width %d."), width);
571 var = dict_create_var (dict, name, width);
574 _("Duplicate variable name `%s' within system file."),
577 /* Set the short name the same as the long name. */
578 var_set_short_name (var, 0, var_get_name (var));
580 /* Get variable label, if any. */
581 if (has_variable_label != 0 && has_variable_label != 1)
582 sys_error (r, _("Variable label indicator field is not 0 or 1."));
583 if (has_variable_label == 1)
589 if (len >= sizeof label)
590 sys_error (r, _("Variable %s has label of invalid length %zu."),
592 read_string (r, label, len + 1);
593 var_set_label (var, label);
595 skip_bytes (r, ROUND_UP (len, 4) - len);
598 /* Set missing values. */
599 if (missing_value_code != 0)
601 struct missing_values mv;
604 mv_init_pool (r->pool, &mv, var_get_width (var));
605 if (var_is_numeric (var))
607 if (missing_value_code < -3 || missing_value_code > 3
608 || missing_value_code == -1)
609 sys_error (r, _("Numeric missing value indicator field is not "
610 "-3, -2, 0, 1, 2, or 3."));
611 if (missing_value_code < 0)
613 double low = read_float (r);
614 double high = read_float (r);
615 mv_add_range (&mv, low, high);
616 missing_value_code = -missing_value_code - 2;
618 for (i = 0; i < missing_value_code; i++)
619 mv_add_num (&mv, read_float (r));
623 int mv_width = MAX (width, 8);
626 if (missing_value_code < 1 || missing_value_code > 3)
627 sys_error (r, _("String missing value indicator field is not "
630 value_init (&value, mv_width);
631 value_set_missing (&value, mv_width);
632 for (i = 0; i < missing_value_code; i++)
634 char *s = value_str_rw (&value, mv_width);
635 read_bytes (r, s, 8);
638 value_destroy (&value, mv_width);
640 var_set_missing_values (var, &mv);
644 parse_format_spec (r, print_format, PRINT_FORMAT, var, format_warning_cnt);
645 parse_format_spec (r, write_format, WRITE_FORMAT, var, format_warning_cnt);
647 /* Account for values.
648 Skip long string continuation records, if any. */
649 nv = width == 0 ? 1 : DIV_RND_UP (width, 8);
655 for (i = 1; i < nv; i++)
657 /* Check for record type 2 and width -1. */
658 if (read_int (r) != 2 || read_int (r) != -1)
659 sys_error (r, _("Missing string continuation record."));
661 /* Skip and ignore remaining continuation data. */
662 has_variable_label = read_int (r);
663 missing_value_code = read_int (r);
664 print_format = read_int (r);
665 write_format = read_int (r);
666 read_string (r, name, sizeof name);
668 /* Variable label fields on continuation records have
669 been spotted in system files created by "SPSS Power
670 Macintosh Release 6.1". */
671 if (has_variable_label)
672 skip_bytes (r, ROUND_UP (read_int (r), 4));
677 /* Translates the format spec from sysfile format to internal
680 parse_format_spec (struct sfm_reader *r, unsigned int s,
681 enum which_format which, struct variable *v,
682 int *format_warning_cnt)
684 const int max_format_warnings = 8;
686 uint8_t raw_type = s >> 16;
692 if (!fmt_from_io (raw_type, &f.type))
693 sys_error (r, _("Unknown variable format %"PRIu8"."), raw_type);
698 ok = fmt_check_output (&f) && fmt_check_width_compat (&f, var_get_width (v));
703 if (which == PRINT_FORMAT)
704 var_set_print_format (v, &f);
706 var_set_write_format (v, &f);
708 else if (*++format_warning_cnt <= max_format_warnings)
710 char fmt_string[FMT_STRING_LEN_MAX + 1];
711 sys_warn (r, _("%s variable %s has invalid %s format %s."),
712 var_is_numeric (v) ? _("Numeric") : _("String"),
714 which == PRINT_FORMAT ? _("print") : _("write"),
715 fmt_to_string (&f, fmt_string));
717 if (*format_warning_cnt == max_format_warnings)
718 sys_warn (r, _("Suppressing further invalid format warnings."));
722 /* Sets the weighting variable in DICT to the variable
723 corresponding to the given 1-based VALUE_IDX, if VALUE_IDX is
726 setup_weight (struct sfm_reader *r, int weight_idx,
727 struct variable **var_by_value_idx, struct dictionary *dict)
731 struct variable *weight_var
732 = lookup_var_by_value_idx (r, var_by_value_idx, weight_idx);
733 if (var_is_numeric (weight_var))
734 dict_set_weight (dict, weight_var);
736 sys_error (r, _("Weighting variable must be numeric."));
740 /* Reads a document record, type 6, from system file R, and sets up
741 the documents and n_documents fields in the associated
744 read_documents (struct sfm_reader *r, struct dictionary *dict)
749 if (dict_get_documents (dict) != NULL)
750 sys_error (r, _("Multiple type 6 (document) records."));
752 line_cnt = read_int (r);
754 sys_error (r, _("Number of document lines (%d) "
755 "must be greater than 0."), line_cnt);
757 documents = pool_nmalloc (r->pool, line_cnt + 1, DOC_LINE_LENGTH);
758 read_string (r, documents, DOC_LINE_LENGTH * line_cnt + 1);
759 if (strlen (documents) == DOC_LINE_LENGTH * line_cnt)
760 dict_set_documents (dict, documents);
762 sys_error (r, _("Document line contains null byte."));
763 pool_free (r->pool, documents);
766 /* Read a type 7 extension record. */
768 read_extension_record (struct sfm_reader *r, struct dictionary *dict,
769 struct sfm_read_info *info)
771 int subtype = read_int (r);
772 size_t size = read_int (r);
773 size_t count = read_int (r);
774 size_t bytes = size * count;
776 /* Check that SIZE * COUNT + 1 doesn't overflow. Adding 1
777 allows an extra byte for a null terminator, used by some
778 extension processing routines. */
779 if (size != 0 && size_overflow_p (xsum (1, xtimes (count, size))))
780 sys_error (r, "Record type 7 subtype %d too large.", subtype);
785 read_machine_integer_info (r, size, count, info, dict);
789 read_machine_float_info (r, size, count);
793 /* Variable sets information. We don't use these yet.
794 They only apply to GUIs; see VARSETS on the APPLY
795 DICTIONARY command in SPSS documentation. */
799 /* DATE variable information. We don't use it yet, but we
804 /* Used by the MRSETS command. */
808 /* Used by the SPSS Data Entry software. */
812 read_display_parameters (r, size, count, dict);
816 read_long_var_name_map (r, size, count, dict);
820 read_long_string_map (r, size, count, dict);
824 /* New in SPSS v14? Unknown purpose. */
828 read_data_file_attributes (r, size, count, dict);
832 read_variable_attributes (r, size, count, dict);
836 /* New in SPSS 16. Contains a single string that describes
837 the character encoding, e.g. "windows-1252". */
839 char *encoding = pool_calloc (r->pool, size, count + 1);
840 read_string (r, encoding, count + 1);
841 dict_set_encoding (dict, encoding);
846 /* New in SPSS 16. Encodes value labels for long string
848 read_long_string_value_labels (r, size, count, dict);
852 sys_warn (r, _("Unrecognized record type 7, subtype %d. Please send a copy of this file, and the syntax which created it to %s"),
853 subtype, PACKAGE_BUGREPORT);
857 skip_bytes (r, bytes);
860 /* Read record type 7, subtype 3. */
862 read_machine_integer_info (struct sfm_reader *r, size_t size, size_t count,
863 struct sfm_read_info *info,
864 struct dictionary *dict)
866 int version_major = read_int (r);
867 int version_minor = read_int (r);
868 int version_revision = read_int (r);
869 int machine_code UNUSED = read_int (r);
870 int float_representation = read_int (r);
871 int compression_code UNUSED = read_int (r);
872 int integer_representation = read_int (r);
873 int character_code = read_int (r);
875 int expected_float_format;
876 int expected_integer_format;
878 if (size != 4 || count != 8)
879 sys_error (r, _("Bad size (%zu) or count (%zu) field on record type 7, "
883 /* Save version info. */
884 info->version_major = version_major;
885 info->version_minor = version_minor;
886 info->version_revision = version_revision;
888 /* Check floating point format. */
889 if (r->float_format == FLOAT_IEEE_DOUBLE_BE
890 || r->float_format == FLOAT_IEEE_DOUBLE_LE)
891 expected_float_format = 1;
892 else if (r->float_format == FLOAT_Z_LONG)
893 expected_float_format = 2;
894 else if (r->float_format == FLOAT_VAX_G || r->float_format == FLOAT_VAX_D)
895 expected_float_format = 3;
898 if (float_representation != expected_float_format)
899 sys_error (r, _("Floating-point representation indicated by "
900 "system file (%d) differs from expected (%d)."),
901 r->float_format, expected_float_format);
903 /* Check integer format. */
904 if (r->integer_format == INTEGER_MSB_FIRST)
905 expected_integer_format = 1;
906 else if (r->integer_format == INTEGER_LSB_FIRST)
907 expected_integer_format = 2;
910 if (integer_representation != expected_integer_format)
912 static const char *const endian[] = {N_("little-endian"), N_("big-endian")};
913 sys_warn (r, _("Integer format indicated by system file (%s) "
914 "differs from expected (%s)."),
915 gettext (endian[integer_representation == 1]),
916 gettext (endian[expected_integer_format == 1]));
921 Record 7 (20) provides a much more reliable way of
922 setting the encoding.
923 The character_code is used as a fallback only.
925 if ( NULL == dict_get_encoding (dict))
927 switch (character_code)
930 dict_set_encoding (dict, "EBCDIC-US");
934 /* These ostensibly mean "7-bit ASCII" and "8-bit ASCII"[sic]
935 respectively. However, there are known to be many files
936 in the wild with character code 2, yet have data which are
938 Therefore we ignore these values.
942 dict_set_encoding (dict, "MS_KANJI");
945 dict_set_encoding (dict, "UTF-7");
948 dict_set_encoding (dict, "UTF-8");
953 snprintf (enc, 100, "CP%d", character_code);
954 dict_set_encoding (dict, enc);
961 /* Read record type 7, subtype 4. */
963 read_machine_float_info (struct sfm_reader *r, size_t size, size_t count)
965 double sysmis = read_float (r);
966 double highest = read_float (r);
967 double lowest = read_float (r);
969 if (size != 8 || count != 3)
970 sys_error (r, _("Bad size (%zu) or count (%zu) on extension 4."),
973 if (sysmis != SYSMIS)
974 sys_warn (r, _("File specifies unexpected value %g as SYSMIS."), sysmis);
975 if (highest != HIGHEST)
976 sys_warn (r, _("File specifies unexpected value %g as HIGHEST."), highest);
977 if (lowest != LOWEST)
978 sys_warn (r, _("File specifies unexpected value %g as LOWEST."), lowest);
981 /* Read record type 7, subtype 11, which specifies how variables
982 should be displayed in GUI environments. */
984 read_display_parameters (struct sfm_reader *r, size_t size, size_t count,
985 struct dictionary *dict)
994 sys_warn (r, _("Bad size %zu on extension 11."), size);
995 skip_bytes (r, size * count);
999 n_vars = dict_get_var_cnt (dict);
1000 if (count == 3 * n_vars)
1001 includes_width = true;
1002 else if (count == 2 * n_vars)
1003 includes_width = false;
1006 sys_warn (r, _("Extension 11 has bad count %zu (for %zu variables)."),
1008 skip_bytes (r, size * count);
1012 for (i = 0; i < n_vars; ++i)
1014 struct variable *v = dict_get_var (dict, i);
1015 int measure = read_int (r);
1016 int width = includes_width ? read_int (r) : 0;
1017 int align = read_int (r);
1019 /* SPSS 14 sometimes seems to set string variables' measure
1021 if (0 == measure && var_is_alpha (v))
1024 if (measure < 1 || measure > 3 || align < 0 || align > 2)
1027 sys_warn (r, _("Invalid variable display parameters "
1028 "for variable %zu (%s). "
1029 "Default parameters substituted."),
1030 i, var_get_name (v));
1035 var_set_measure (v, (measure == 1 ? MEASURE_NOMINAL
1036 : measure == 2 ? MEASURE_ORDINAL
1038 var_set_alignment (v, (align == 0 ? ALIGN_LEFT
1039 : align == 1 ? ALIGN_RIGHT
1042 /* Older versions (SPSS 9.0) sometimes set the display
1043 width to zero. This causes confusion in the GUI, so
1044 only set the width if it is nonzero. */
1046 var_set_display_width (v, width);
1050 /* Reads record type 7, subtype 13, which gives the long name
1051 that corresponds to each short name. Modifies variable names
1052 in DICT accordingly. */
1054 read_long_var_name_map (struct sfm_reader *r, size_t size, size_t count,
1055 struct dictionary *dict)
1057 struct text_record *text;
1058 struct variable *var;
1061 text = open_text_record (r, size * count);
1062 while (read_variable_to_value_pair (r, dict, text, &var, &long_name))
1065 size_t short_name_cnt;
1068 /* Validate long name. */
1069 if (!var_is_valid_name (long_name, false))
1071 sys_warn (r, _("Long variable mapping from %s to invalid "
1072 "variable name `%s'."),
1073 var_get_name (var), long_name);
1077 /* Identify any duplicates. */
1078 if (strcasecmp (var_get_short_name (var, 0), long_name)
1079 && dict_lookup_var (dict, long_name) != NULL)
1081 sys_warn (r, _("Duplicate long variable name `%s' "
1082 "within system file."), long_name);
1086 /* Renaming a variable may clear its short names, but we
1087 want to retain them, so we save them and re-set them
1089 short_name_cnt = var_get_short_name_cnt (var);
1090 short_names = xnmalloc (short_name_cnt, sizeof *short_names);
1091 for (i = 0; i < short_name_cnt; i++)
1093 const char *s = var_get_short_name (var, i);
1094 short_names[i] = s != NULL ? xstrdup (s) : NULL;
1097 /* Set long name. */
1098 dict_rename_var (dict, var, long_name);
1100 /* Restore short names. */
1101 for (i = 0; i < short_name_cnt; i++)
1103 var_set_short_name (var, i, short_names[i]);
1104 free (short_names[i]);
1108 close_text_record (r, text);
1109 r->has_long_var_names = true;
1112 /* Reads record type 7, subtype 14, which gives the real length
1113 of each very long string. Rearranges DICT accordingly. */
1115 read_long_string_map (struct sfm_reader *r, size_t size, size_t count,
1116 struct dictionary *dict)
1118 struct text_record *text;
1119 struct variable *var;
1122 text = open_text_record (r, size * count);
1123 while (read_variable_to_value_pair (r, dict, text, &var, &length_s))
1125 size_t idx = var_get_dict_index (var);
1131 length = strtol (length_s, NULL, 10);
1132 if (length < 1 || length > MAX_STRING)
1134 sys_warn (r, _("%s listed as string of invalid length %s "
1135 "in very length string record."),
1136 var_get_name (var), length_s);
1140 /* Check segments. */
1141 segment_cnt = sfm_width_to_segments (length);
1142 if (segment_cnt == 1)
1144 sys_warn (r, _("%s listed in very long string record with width %s, "
1145 "which requires only one segment."),
1146 var_get_name (var), length_s);
1149 if (idx + segment_cnt > dict_get_var_cnt (dict))
1150 sys_error (r, _("Very long string %s overflows dictionary."),
1151 var_get_name (var));
1153 /* Get the short names from the segments and check their
1155 for (i = 0; i < segment_cnt; i++)
1157 struct variable *seg = dict_get_var (dict, idx + i);
1158 int alloc_width = sfm_segment_alloc_width (length, i);
1159 int width = var_get_width (seg);
1162 var_set_short_name (var, i, var_get_short_name (seg, 0));
1163 if (ROUND_UP (width, 8) != ROUND_UP (alloc_width, 8))
1164 sys_error (r, _("Very long string with width %ld has segment %d "
1165 "of width %d (expected %d)"),
1166 length, i, width, alloc_width);
1168 dict_delete_consecutive_vars (dict, idx + 1, segment_cnt - 1);
1169 var_set_width (var, length);
1171 close_text_record (r, text);
1172 dict_compact_values (dict);
1175 /* Reads value labels from sysfile H and inserts them into the
1176 associated dictionary. */
1178 read_value_labels (struct sfm_reader *r,
1179 struct dictionary *dict, struct variable **var_by_value_idx)
1181 struct pool *subpool;
1185 char raw_value[8]; /* Value as uninterpreted bytes. */
1186 union value value; /* Value. */
1187 char *label; /* Null-terminated label string. */
1190 struct label *labels = NULL;
1191 int label_cnt; /* Number of labels. */
1193 struct variable **var = NULL; /* Associated variables. */
1194 int var_cnt; /* Number of associated variables. */
1195 int max_width; /* Maximum width of string variables. */
1199 subpool = pool_create_subpool (r->pool);
1201 /* Read the type 3 record and record its contents. We can't do
1202 much with the data yet because we don't know whether it is
1203 of numeric or string type. */
1205 /* Read number of labels. */
1206 label_cnt = read_int (r);
1208 if (size_overflow_p (xtimes (label_cnt, sizeof *labels)))
1210 sys_warn (r, _("Invalid number of labels: %d. Ignoring labels."),
1215 /* Read each value/label tuple into labels[]. */
1216 labels = pool_nalloc (subpool, label_cnt, sizeof *labels);
1217 for (i = 0; i < label_cnt; i++)
1219 struct label *label = labels + i;
1220 unsigned char label_len;
1224 read_bytes (r, label->raw_value, sizeof label->raw_value);
1226 /* Read label length. */
1227 read_bytes (r, &label_len, sizeof label_len);
1228 padded_len = ROUND_UP (label_len + 1, 8);
1230 /* Read label, padding. */
1231 label->label = pool_alloc (subpool, padded_len + 1);
1232 read_bytes (r, label->label, padded_len - 1);
1233 label->label[label_len] = 0;
1236 /* Now, read the type 4 record that has the list of variables
1237 to which the value labels are to be applied. */
1239 /* Read record type of type 4 record. */
1240 if (read_int (r) != 4)
1241 sys_error (r, _("Variable index record (type 4) does not immediately "
1242 "follow value label record (type 3) as it should."));
1244 /* Read number of variables associated with value label from type 4
1246 var_cnt = read_int (r);
1247 if (var_cnt < 1 || var_cnt > dict_get_var_cnt (dict))
1248 sys_error (r, _("Number of variables associated with a value label (%d) "
1249 "is not between 1 and the number of variables (%zu)."),
1250 var_cnt, dict_get_var_cnt (dict));
1252 /* Read the list of variables. */
1253 var = pool_nalloc (subpool, var_cnt, sizeof *var);
1255 for (i = 0; i < var_cnt; i++)
1257 var[i] = lookup_var_by_value_idx (r, var_by_value_idx, read_int (r));
1258 if (var_get_width (var[i]) > 8)
1259 sys_error (r, _("Value labels may not be added to long string "
1260 "variables (e.g. %s) using records types 3 and 4."),
1261 var_get_name (var[i]));
1262 max_width = MAX (max_width, var_get_width (var[i]));
1265 /* Type check the variables. */
1266 for (i = 1; i < var_cnt; i++)
1267 if (var_get_type (var[i]) != var_get_type (var[0]))
1268 sys_error (r, _("Variables associated with value label are not all of "
1269 "identical type. Variable %s is %s, but variable "
1271 var_get_name (var[0]),
1272 var_is_numeric (var[0]) ? _("numeric") : _("string"),
1273 var_get_name (var[i]),
1274 var_is_numeric (var[i]) ? _("numeric") : _("string"));
1276 /* Fill in labels[].value, now that we know the desired type. */
1277 for (i = 0; i < label_cnt; i++)
1279 struct label *label = labels + i;
1281 value_init_pool (subpool, &label->value, max_width);
1282 if (var_is_alpha (var[0]))
1283 buf_copy_rpad (value_str_rw (&label->value, max_width), max_width,
1284 label->raw_value, sizeof label->raw_value, ' ');
1286 label->value.f = float_get_double (r->float_format, label->raw_value);
1289 /* Assign the `value_label's to each variable. */
1290 for (i = 0; i < var_cnt; i++)
1292 struct variable *v = var[i];
1295 /* Add each label to the variable. */
1296 for (j = 0; j < label_cnt; j++)
1298 struct label *label = &labels[j];
1299 if (!var_add_value_label (v, &label->value, label->label))
1301 if (var_is_numeric (var[0]))
1302 sys_warn (r, _("Duplicate value label for %g on %s."),
1303 label->value.f, var_get_name (v));
1305 sys_warn (r, _("Duplicate value label for \"%.*s\" on %s."),
1306 max_width, value_str (&label->value, max_width),
1312 pool_destroy (subpool);
1315 /* Reads a set of custom attributes from TEXT into ATTRS.
1316 ATTRS may be a null pointer, in which case the attributes are
1317 read but discarded. */
1319 read_attributes (struct sfm_reader *r, struct text_record *text,
1320 struct attrset *attrs)
1324 struct attribute *attr;
1328 /* Parse the key. */
1329 key = text_get_token (text, ss_cstr ("("));
1333 attr = attribute_create (key);
1334 for (index = 1; ; index++)
1336 /* Parse the value. */
1340 value = text_get_token (text, ss_cstr ("\n"));
1343 text_warn (r, text, _("Error parsing attribute value %s[%d]"),
1348 length = strlen (value);
1349 if (length >= 2 && value[0] == '\'' && value[length - 1] == '\'')
1351 value[length - 1] = '\0';
1352 attribute_add_value (attr, value + 1);
1357 _("Attribute value %s[%d] is not quoted: %s"),
1359 attribute_add_value (attr, value);
1362 /* Was this the last value for this attribute? */
1363 if (text_match (text, ')'))
1367 attrset_add (attrs, attr);
1369 attribute_destroy (attr);
1371 while (!text_match (text, '/'));
1374 /* Reads record type 7, subtype 17, which lists custom
1375 attributes on the data file. */
1377 read_data_file_attributes (struct sfm_reader *r,
1378 size_t size, size_t count,
1379 struct dictionary *dict)
1381 struct text_record *text = open_text_record (r, size * count);
1382 read_attributes (r, text, dict_get_attributes (dict));
1383 close_text_record (r, text);
1387 skip_long_string_value_labels (struct sfm_reader *r, size_t n_labels)
1391 for (i = 0; i < n_labels; i++)
1393 size_t value_length, label_length;
1395 value_length = read_int (r);
1396 skip_bytes (r, value_length);
1397 label_length = read_int (r);
1398 skip_bytes (r, label_length);
1403 read_long_string_value_labels (struct sfm_reader *r,
1404 size_t size, size_t count,
1405 struct dictionary *d)
1407 const off_t start = ftello (r->file);
1408 while (ftello (r->file) - start < size * count)
1410 char var_name[VAR_NAME_LEN + 1];
1418 var_name_len = read_int (r);
1419 if (var_name_len > VAR_NAME_LEN)
1420 sys_error (r, _("Variable name length in long string value label "
1421 "record (%d) exceeds %d-byte limit."),
1422 var_name_len, VAR_NAME_LEN);
1423 read_string (r, var_name, var_name_len + 1);
1424 width = read_int (r);
1425 n_labels = read_int (r);
1427 v = dict_lookup_var (d, var_name);
1430 sys_warn (r, _("Ignoring long string value record for "
1431 "unknown variable %s."), var_name);
1432 skip_long_string_value_labels (r, n_labels);
1435 if (var_is_numeric (v))
1437 sys_warn (r, _("Ignoring long string value record for "
1438 "numeric variable %s."), var_name);
1439 skip_long_string_value_labels (r, n_labels);
1442 if (width != var_get_width (v))
1444 sys_warn (r, _("Ignoring long string value record for variable %s "
1445 "because the record's width (%d) does not match the "
1446 "variable's width (%d)"),
1447 var_name, width, var_get_width (v));
1448 skip_long_string_value_labels (r, n_labels);
1453 value_init_pool (r->pool, &value, width);
1454 for (i = 0; i < n_labels; i++)
1456 size_t value_length, label_length;
1461 value_length = read_int (r);
1462 if (value_length == width)
1463 read_string (r, value_str_rw (&value, width), width + 1);
1466 sys_warn (r, _("Ignoring long string value %zu for variable %s, "
1467 "with width %d, that has bad value width %zu."),
1468 i, var_get_name (v), width, value_length);
1469 skip_bytes (r, value_length);
1474 label_length = read_int (r);
1475 read_string (r, label, MIN (sizeof label, label_length + 1));
1476 if (label_length >= sizeof label)
1478 /* Skip and silently ignore label text after the
1479 first 255 bytes. The maximum documented length
1480 of a label is 120 bytes so this is more than
1482 skip_bytes (r, sizeof label - (label_length + 1));
1485 if (!skip && !var_add_value_label (v, &value, label))
1486 sys_warn (r, _("Duplicate value label for \"%.*s\" on %s."),
1487 width, value_str (&value, width), var_get_name (v));
1493 /* Reads record type 7, subtype 18, which lists custom
1494 attributes on individual variables. */
1496 read_variable_attributes (struct sfm_reader *r,
1497 size_t size, size_t count,
1498 struct dictionary *dict)
1500 struct text_record *text = open_text_record (r, size * count);
1503 struct variable *var;
1504 if (!text_read_short_name (r, dict, text, ss_cstr (":"), &var))
1506 read_attributes (r, text, var != NULL ? var_get_attributes (var) : NULL);
1508 close_text_record (r, text);
1514 static void partial_record (struct sfm_reader *r)
1517 static void read_error (struct casereader *, const struct sfm_reader *);
1519 static bool read_case_number (struct sfm_reader *, double *);
1520 static bool read_case_string (struct sfm_reader *, char *, size_t);
1521 static int read_opcode (struct sfm_reader *);
1522 static bool read_compressed_number (struct sfm_reader *, double *);
1523 static bool read_compressed_string (struct sfm_reader *, char *);
1524 static bool read_whole_strings (struct sfm_reader *, char *, size_t);
1525 static bool skip_whole_strings (struct sfm_reader *, size_t);
1527 /* Reads and returns one case from READER's file. Returns a null
1528 pointer if not successful. */
1529 static struct ccase *
1530 sys_file_casereader_read (struct casereader *reader, void *r_)
1532 struct sfm_reader *r = r_;
1533 struct ccase *volatile c;
1539 c = case_create (r->proto);
1540 if (setjmp (r->bail_out))
1542 casereader_force_error (reader);
1547 for (i = 0; i < r->sfm_var_cnt; i++)
1549 struct sfm_var *sv = &r->sfm_vars[i];
1550 union value *v = case_data_rw_idx (c, sv->case_index);
1552 if (sv->var_width == 0)
1554 if (!read_case_number (r, &v->f))
1559 char *s = value_str_rw (v, sv->var_width);
1560 if (!read_case_string (r, s + sv->offset, sv->segment_width))
1562 if (!skip_whole_strings (r, ROUND_DOWN (sv->padding, 8)))
1572 if (r->case_cnt != -1)
1573 read_error (reader, r);
1577 /* Issues an error that R ends in a partial record. */
1579 partial_record (struct sfm_reader *r)
1581 sys_error (r, _("File ends in partial case."));
1584 /* Issues an error that an unspecified error occurred SFM, and
1587 read_error (struct casereader *r, const struct sfm_reader *sfm)
1589 msg (ME, _("Error reading case from file %s."), fh_get_name (sfm->fh));
1590 casereader_force_error (r);
1593 /* Reads a number from R and stores its value in *D.
1594 If R is compressed, reads a compressed number;
1595 otherwise, reads a number in the regular way.
1596 Returns true if successful, false if end of file is
1597 reached immediately. */
1599 read_case_number (struct sfm_reader *r, double *d)
1604 if (!try_read_bytes (r, number, sizeof number))
1606 float_convert (r->float_format, number, FLOAT_NATIVE_DOUBLE, d);
1610 return read_compressed_number (r, d);
1613 /* Reads LENGTH string bytes from R into S.
1614 Always reads a multiple of 8 bytes; if LENGTH is not a
1615 multiple of 8, then extra bytes are read and discarded without
1617 Reads compressed strings if S is compressed.
1618 Returns true if successful, false if end of file is
1619 reached immediately. */
1621 read_case_string (struct sfm_reader *r, char *s, size_t length)
1623 size_t whole = ROUND_DOWN (length, 8);
1624 size_t partial = length % 8;
1628 if (!read_whole_strings (r, s, whole))
1635 if (!read_whole_strings (r, bounce, sizeof bounce))
1641 memcpy (s + whole, bounce, partial);
1647 /* Reads and returns the next compression opcode from R. */
1649 read_opcode (struct sfm_reader *r)
1651 assert (r->compressed);
1655 if (r->opcode_idx >= sizeof r->opcodes)
1657 if (!try_read_bytes (r, r->opcodes, sizeof r->opcodes))
1661 opcode = r->opcodes[r->opcode_idx++];
1668 /* Reads a compressed number from R and stores its value in D.
1669 Returns true if successful, false if end of file is
1670 reached immediately. */
1672 read_compressed_number (struct sfm_reader *r, double *d)
1674 int opcode = read_opcode (r);
1682 *d = read_float (r);
1686 sys_error (r, _("Compressed data is corrupt."));
1693 *d = opcode - r->bias;
1700 /* Reads a compressed 8-byte string segment from R and stores it
1702 Returns true if successful, false if end of file is
1703 reached immediately. */
1705 read_compressed_string (struct sfm_reader *r, char *dst)
1707 switch (read_opcode (r))
1714 read_bytes (r, dst, 8);
1718 memset (dst, ' ', 8);
1722 sys_error (r, _("Compressed data is corrupt."));
1728 /* Reads LENGTH string bytes from R into S.
1729 LENGTH must be a multiple of 8.
1730 Reads compressed strings if S is compressed.
1731 Returns true if successful, false if end of file is
1732 reached immediately. */
1734 read_whole_strings (struct sfm_reader *r, char *s, size_t length)
1736 assert (length % 8 == 0);
1738 return try_read_bytes (r, s, length);
1742 for (ofs = 0; ofs < length; ofs += 8)
1743 if (!read_compressed_string (r, s + ofs))
1753 /* Skips LENGTH string bytes from R.
1754 LENGTH must be a multiple of 8.
1755 (LENGTH is also limited to 1024, but that's only because the
1756 current caller never needs more than that many bytes.)
1757 Returns true if successful, false if end of file is
1758 reached immediately. */
1760 skip_whole_strings (struct sfm_reader *r, size_t length)
1763 assert (length < sizeof buffer);
1764 return read_whole_strings (r, buffer, length);
1767 /* Creates and returns a table that can be used for translating a value
1768 index into a case to a "struct variable *" for DICT. Multiple
1769 system file fields reference variables this way.
1771 This table must be created before processing the very long
1772 string extension record, because that record causes some
1773 values to be deleted from the case and the dictionary to be
1775 static struct variable **
1776 make_var_by_value_idx (struct sfm_reader *r, struct dictionary *dict)
1778 struct variable **var_by_value_idx;
1782 var_by_value_idx = pool_nmalloc (r->pool,
1783 r->oct_cnt, sizeof *var_by_value_idx);
1784 for (i = 0; i < dict_get_var_cnt (dict); i++)
1786 struct variable *v = dict_get_var (dict, i);
1787 int nv = var_is_numeric (v) ? 1 : DIV_RND_UP (var_get_width (v), 8);
1790 var_by_value_idx[value_idx++] = v;
1791 for (j = 1; j < nv; j++)
1792 var_by_value_idx[value_idx++] = NULL;
1794 assert (value_idx == r->oct_cnt);
1796 return var_by_value_idx;
1799 /* Returns the "struct variable" corresponding to the given
1800 1-basd VALUE_IDX in VAR_BY_VALUE_IDX. Verifies that the index
1802 static struct variable *
1803 lookup_var_by_value_idx (struct sfm_reader *r,
1804 struct variable **var_by_value_idx, int value_idx)
1806 struct variable *var;
1808 if (value_idx < 1 || value_idx > r->oct_cnt)
1809 sys_error (r, _("Variable index %d not in valid range 1...%d."),
1810 value_idx, r->oct_cnt);
1812 var = var_by_value_idx[value_idx - 1];
1814 sys_error (r, _("Variable index %d refers to long string "
1821 /* Returns the variable in D with the given SHORT_NAME,
1822 or a null pointer if there is none. */
1823 static struct variable *
1824 lookup_var_by_short_name (struct dictionary *d, const char *short_name)
1826 struct variable *var;
1830 /* First try looking up by full name. This often succeeds. */
1831 var = dict_lookup_var (d, short_name);
1832 if (var != NULL && !strcasecmp (var_get_short_name (var, 0), short_name))
1835 /* Iterate through the whole dictionary as a fallback. */
1836 var_cnt = dict_get_var_cnt (d);
1837 for (i = 0; i < var_cnt; i++)
1839 var = dict_get_var (d, i);
1840 if (!strcasecmp (var_get_short_name (var, 0), short_name))
1847 /* Helpers for reading records that contain structured text
1850 /* Maximum number of warnings to issue for a single text
1852 #define MAX_TEXT_WARNINGS 5
1857 struct substring buffer; /* Record contents. */
1858 size_t pos; /* Current position in buffer. */
1859 int n_warnings; /* Number of warnings issued or suppressed. */
1862 /* Reads SIZE bytes into a text record for R,
1863 and returns the new text record. */
1864 static struct text_record *
1865 open_text_record (struct sfm_reader *r, size_t size)
1867 struct text_record *text = pool_alloc (r->pool, sizeof *text);
1868 char *buffer = pool_malloc (r->pool, size + 1);
1869 read_bytes (r, buffer, size);
1870 text->buffer = ss_buffer (buffer, size);
1872 text->n_warnings = 0;
1876 /* Closes TEXT, frees its storage, and issues a final warning
1877 about suppressed warnings if necesary. */
1879 close_text_record (struct sfm_reader *r, struct text_record *text)
1881 if (text->n_warnings > MAX_TEXT_WARNINGS)
1882 sys_warn (r, _("Suppressed %d additional related warnings."),
1883 text->n_warnings - MAX_TEXT_WARNINGS);
1884 pool_free (r->pool, ss_data (text->buffer));
1887 /* Reads a variable=value pair from TEXT.
1888 Looks up the variable in DICT and stores it into *VAR.
1889 Stores a null-terminated value into *VALUE. */
1891 read_variable_to_value_pair (struct sfm_reader *r, struct dictionary *dict,
1892 struct text_record *text,
1893 struct variable **var, char **value)
1897 if (!text_read_short_name (r, dict, text, ss_cstr ("="), var))
1900 *value = text_get_token (text, ss_buffer ("\t\0", 2));
1904 text->pos += ss_span (ss_substr (text->buffer, text->pos, SIZE_MAX),
1905 ss_buffer ("\t\0", 2));
1913 text_read_short_name (struct sfm_reader *r, struct dictionary *dict,
1914 struct text_record *text, struct substring delimiters,
1915 struct variable **var)
1917 char *short_name = text_get_token (text, delimiters);
1918 if (short_name == NULL)
1921 *var = lookup_var_by_short_name (dict, short_name);
1923 text_warn (r, text, _("Variable map refers to unknown variable %s."),
1928 /* Displays a warning for the current file position, limiting the
1929 number to MAX_TEXT_WARNINGS for TEXT. */
1931 text_warn (struct sfm_reader *r, struct text_record *text,
1932 const char *format, ...)
1934 if (text->n_warnings++ < MAX_TEXT_WARNINGS)
1938 va_start (args, format);
1939 sys_msg (r, MW, format, args);
1945 text_get_token (struct text_record *text, struct substring delimiters)
1947 struct substring token;
1949 if (!ss_tokenize (text->buffer, delimiters, &text->pos, &token))
1951 ss_data (token)[ss_length (token)] = '\0';
1952 return ss_data (token);
1956 text_match (struct text_record *text, char c)
1958 if (text->buffer.string[text->pos] == c)
1969 /* Displays a corruption message. */
1971 sys_msg (struct sfm_reader *r, int class, const char *format, va_list args)
1976 ds_init_empty (&text);
1977 ds_put_format (&text, "\"%s\" near offset 0x%lx: ",
1978 fh_get_file_name (r->fh), (unsigned long) ftell (r->file));
1979 ds_put_vformat (&text, format, args);
1981 m.category = msg_class_to_category (class);
1982 m.severity = msg_class_to_severity (class);
1983 m.where.file_name = NULL;
1984 m.where.line_number = 0;
1985 m.text = ds_cstr (&text);
1990 /* Displays a warning for the current file position. */
1992 sys_warn (struct sfm_reader *r, const char *format, ...)
1996 va_start (args, format);
1997 sys_msg (r, MW, format, args);
2001 /* Displays an error for the current file position,
2002 marks it as in an error state,
2003 and aborts reading it using longjmp. */
2005 sys_error (struct sfm_reader *r, const char *format, ...)
2009 va_start (args, format);
2010 sys_msg (r, ME, format, args);
2014 longjmp (r->bail_out, 1);
2017 /* Reads BYTE_CNT bytes into BUF.
2018 Returns true if exactly BYTE_CNT bytes are successfully read.
2019 Aborts if an I/O error or a partial read occurs.
2020 If EOF_IS_OK, then an immediate end-of-file causes false to be
2021 returned; otherwise, immediate end-of-file causes an abort
2024 read_bytes_internal (struct sfm_reader *r, bool eof_is_ok,
2025 void *buf, size_t byte_cnt)
2027 size_t bytes_read = fread (buf, 1, byte_cnt, r->file);
2028 if (bytes_read == byte_cnt)
2030 else if (ferror (r->file))
2031 sys_error (r, _("System error: %s."), strerror (errno));
2032 else if (!eof_is_ok || bytes_read != 0)
2033 sys_error (r, _("Unexpected end of file."));
2038 /* Reads BYTE_CNT into BUF.
2039 Aborts upon I/O error or if end-of-file is encountered. */
2041 read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
2043 read_bytes_internal (r, false, buf, byte_cnt);
2046 /* Reads BYTE_CNT bytes into BUF.
2047 Returns true if exactly BYTE_CNT bytes are successfully read.
2048 Returns false if an immediate end-of-file is encountered.
2049 Aborts if an I/O error or a partial read occurs. */
2051 try_read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
2053 return read_bytes_internal (r, true, buf, byte_cnt);
2056 /* Reads a 32-bit signed integer from R and returns its value in
2059 read_int (struct sfm_reader *r)
2062 read_bytes (r, integer, sizeof integer);
2063 return integer_get (r->integer_format, integer, sizeof integer);
2066 /* Reads a 64-bit floating-point number from R and returns its
2067 value in host format. */
2069 read_float (struct sfm_reader *r)
2072 read_bytes (r, number, sizeof number);
2073 return float_get_double (r->float_format, number);
2076 /* Reads exactly SIZE - 1 bytes into BUFFER
2077 and stores a null byte into BUFFER[SIZE - 1]. */
2079 read_string (struct sfm_reader *r, char *buffer, size_t size)
2082 read_bytes (r, buffer, size - 1);
2083 buffer[size - 1] = '\0';
2086 /* Skips BYTES bytes forward in R. */
2088 skip_bytes (struct sfm_reader *r, size_t bytes)
2093 size_t chunk = MIN (sizeof buffer, bytes);
2094 read_bytes (r, buffer, chunk);
2099 static const struct casereader_class sys_file_casereader_class =
2101 sys_file_casereader_read,
2102 sys_file_casereader_destroy,