1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2007, 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <data/sys-file-reader.h>
20 #include <data/sys-file-private.h>
28 #include <libpspp/i18n.h>
29 #include <libpspp/assertion.h>
30 #include <libpspp/message.h>
31 #include <libpspp/compiler.h>
32 #include <libpspp/misc.h>
33 #include <libpspp/pool.h>
34 #include <libpspp/str.h>
35 #include <libpspp/hash.h>
36 #include <libpspp/array.h>
38 #include <data/attributes.h>
39 #include <data/case.h>
40 #include <data/casereader-provider.h>
41 #include <data/casereader.h>
42 #include <data/dictionary.h>
43 #include <data/file-handle-def.h>
44 #include <data/file-name.h>
45 #include <data/format.h>
46 #include <data/missing-values.h>
47 #include <data/short-names.h>
48 #include <data/value-labels.h>
49 #include <data/variable.h>
50 #include <data/value.h>
55 #include "unlocked-io.h"
60 #define _(msgid) gettext (msgid)
61 #define N_(msgid) (msgid)
63 /* System file reader. */
66 /* Resource tracking. */
67 struct pool *pool; /* All system file state. */
68 jmp_buf bail_out; /* longjmp() target for error handling. */
71 struct file_handle *fh; /* File handle. */
72 struct fh_lock *lock; /* Mutual exclusion for file handle. */
73 FILE *file; /* File stream. */
74 bool error; /* I/O or corruption error? */
75 struct caseproto *proto; /* Format of output cases. */
78 enum integer_format integer_format; /* On-disk integer format. */
79 enum float_format float_format; /* On-disk floating point format. */
80 int oct_cnt; /* Number of 8-byte units per case. */
81 struct sfm_var *sfm_vars; /* Variables. */
82 size_t sfm_var_cnt; /* Number of variables. */
83 casenumber case_cnt; /* Number of cases */
84 bool has_long_var_names; /* File has a long variable name map */
87 bool compressed; /* File is compressed? */
88 double bias; /* Compression bias, usually 100.0. */
89 uint8_t opcodes[8]; /* Current block of opcodes. */
90 size_t opcode_idx; /* Next opcode to interpret, 8 if none left. */
93 static const struct casereader_class sys_file_casereader_class;
95 static bool close_reader (struct sfm_reader *);
97 static struct variable **make_var_by_value_idx (struct sfm_reader *,
99 static struct variable *lookup_var_by_value_idx (struct sfm_reader *,
103 static void sys_msg (struct sfm_reader *r, int class,
104 const char *format, va_list args)
105 PRINTF_FORMAT (3, 0);
106 static void sys_warn (struct sfm_reader *, const char *, ...)
107 PRINTF_FORMAT (2, 3);
108 static void sys_error (struct sfm_reader *, const char *, ...)
112 static void read_bytes (struct sfm_reader *, void *, size_t);
113 static bool try_read_bytes (struct sfm_reader *, void *, size_t);
114 static int read_int (struct sfm_reader *);
115 static double read_float (struct sfm_reader *);
116 static void read_string (struct sfm_reader *, char *, size_t);
117 static void skip_bytes (struct sfm_reader *, size_t);
119 static struct text_record *open_text_record (struct sfm_reader *, size_t size);
120 static void close_text_record (struct sfm_reader *r,
121 struct text_record *);
122 static bool read_variable_to_value_pair (struct sfm_reader *,
124 struct text_record *,
125 struct variable **var, char **value);
126 static void text_warn (struct sfm_reader *r, struct text_record *text,
127 const char *format, ...)
128 PRINTF_FORMAT (3, 4);
129 static char *text_get_token (struct text_record *,
130 struct substring delimiters);
131 static bool text_match (struct text_record *, char c);
132 static bool text_read_short_name (struct sfm_reader *, struct dictionary *,
133 struct text_record *,
134 struct substring delimiters,
137 static bool close_reader (struct sfm_reader *r);
139 /* Dictionary reader. */
147 static void read_header (struct sfm_reader *, struct dictionary *,
148 int *weight_idx, int *claimed_oct_cnt,
149 struct sfm_read_info *);
150 static void read_variable_record (struct sfm_reader *, struct dictionary *,
151 int *format_warning_cnt);
152 static void parse_format_spec (struct sfm_reader *, unsigned int,
153 enum which_format, struct variable *,
154 int *format_warning_cnt);
155 static void setup_weight (struct sfm_reader *, int weight_idx,
156 struct variable **var_by_value_idx,
157 struct dictionary *);
158 static void read_documents (struct sfm_reader *, struct dictionary *);
159 static void read_value_labels (struct sfm_reader *, struct dictionary *,
160 struct variable **var_by_value_idx);
162 static void read_extension_record (struct sfm_reader *, struct dictionary *,
163 struct sfm_read_info *);
164 static void read_machine_integer_info (struct sfm_reader *,
165 size_t size, size_t count,
166 struct sfm_read_info *,
169 static void read_machine_float_info (struct sfm_reader *,
170 size_t size, size_t count);
171 static void read_display_parameters (struct sfm_reader *,
172 size_t size, size_t count,
173 struct dictionary *);
174 static void read_long_var_name_map (struct sfm_reader *,
175 size_t size, size_t count,
176 struct dictionary *);
177 static void read_long_string_map (struct sfm_reader *,
178 size_t size, size_t count,
179 struct dictionary *);
180 static void read_data_file_attributes (struct sfm_reader *,
181 size_t size, size_t count,
182 struct dictionary *);
183 static void read_variable_attributes (struct sfm_reader *,
184 size_t size, size_t count,
185 struct dictionary *);
186 static void read_long_string_value_labels (struct sfm_reader *,
187 size_t size, size_t count,
188 struct dictionary *);
190 /* Convert all the strings in DICT from the dict encoding to UTF8 */
192 recode_strings (struct dictionary *dict)
196 const char *enc = dict_get_encoding (dict);
199 enc = get_default_encoding ();
201 for (i = 0 ; i < dict_get_var_cnt (dict); ++i)
203 /* Convert the long variable name */
204 struct variable *var = dict_get_var (dict, i);
205 const char *native_name = var_get_name (var);
206 char *utf8_name = recode_string (UTF8, enc, native_name, -1);
207 if ( 0 != strcmp (utf8_name, native_name))
209 if ( NULL == dict_lookup_var (dict, utf8_name))
210 dict_rename_var (dict, var, utf8_name);
213 _("Recoded variable name duplicates an existing `%s' within system file."), utf8_name);
218 /* Convert the variable label */
219 if (var_has_label (var))
221 char *utf8_label = recode_string (UTF8, enc, var_get_label (var), -1);
222 var_set_label (var, utf8_label);
226 if (var_has_value_labels (var))
228 const struct val_lab *vl = NULL;
229 const struct val_labs *vlabs = var_get_value_labels (var);
231 for (vl = val_labs_first (vlabs); vl != NULL; vl = val_labs_next (vlabs, vl))
233 const union value *val = val_lab_get_value (vl);
234 const char *label = val_lab_get_label (vl);
235 char *new_label = NULL;
237 new_label = recode_string (UTF8, enc, label, -1);
239 var_replace_value_label (var, val, new_label);
246 /* Opens the system file designated by file handle FH for
247 reading. Reads the system file's dictionary into *DICT.
248 If INFO is non-null, then it receives additional info about the
251 sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
252 struct sfm_read_info *volatile info)
254 struct sfm_reader *volatile r = NULL;
255 struct variable **var_by_value_idx;
256 struct sfm_read_info local_info;
257 int format_warning_cnt = 0;
262 *dict = dict_create ();
264 /* Create and initialize reader. */
265 r = pool_create_container (struct sfm_reader, pool);
271 r->has_long_var_names = false;
272 r->opcode_idx = sizeof r->opcodes;
274 /* TRANSLATORS: this fragment will be interpolated into
275 messages in fh_lock() that identify types of files. */
276 r->lock = fh_lock (fh, FH_REF_FILE, N_("system file"), FH_ACC_READ, false);
280 r->file = fn_open (fh_get_file_name (fh), "rb");
283 msg (ME, _("Error opening \"%s\" for reading as a system file: %s."),
284 fh_get_file_name (r->fh), strerror (errno));
288 /* Initialize info. */
291 memset (info, 0, sizeof *info);
293 if (setjmp (r->bail_out))
298 read_header (r, *dict, &weight_idx, &claimed_oct_cnt, info);
300 /* Read all the variable definition records. */
301 rec_type = read_int (r);
302 while (rec_type == 2)
304 read_variable_record (r, *dict, &format_warning_cnt);
305 rec_type = read_int (r);
308 /* Figure out the case format. */
309 var_by_value_idx = make_var_by_value_idx (r, *dict);
310 setup_weight (r, weight_idx, var_by_value_idx, *dict);
312 /* Read all the rest of the dictionary records. */
313 while (rec_type != 999)
318 read_value_labels (r, *dict, var_by_value_idx);
322 sys_error (r, _("Misplaced type 4 record."));
325 read_documents (r, *dict);
329 read_extension_record (r, *dict, info);
333 sys_error (r, _("Unrecognized record type %d."), rec_type);
335 rec_type = read_int (r);
339 if ( ! r->has_long_var_names )
342 for (i = 0; i < dict_get_var_cnt (*dict); i++)
344 struct variable *var = dict_get_var (*dict, i);
345 char short_name[SHORT_NAME_LEN + 1];
346 char long_name[SHORT_NAME_LEN + 1];
348 strcpy (short_name, var_get_name (var));
350 strcpy (long_name, short_name);
351 str_lowercase (long_name);
353 /* Set long name. Renaming a variable may clear the short
354 name, but we want to retain it, so re-set it
356 dict_rename_var (*dict, var, long_name);
357 var_set_short_name (var, 0, short_name);
360 r->has_long_var_names = true;
363 recode_strings (*dict);
365 /* Read record 999 data, which is just filler. */
368 /* Warn if the actual amount of data per case differs from the
369 amount that the header claims. SPSS version 13 gets this
370 wrong when very long strings are involved, so don't warn in
372 if (claimed_oct_cnt != -1 && claimed_oct_cnt != r->oct_cnt
373 && info->version_major != 13)
374 sys_warn (r, _("File header claims %d variable positions but "
375 "%d were read from file."),
376 claimed_oct_cnt, r->oct_cnt);
378 /* Create an index of dictionary variable widths for
379 sfm_read_case to use. We cannot use the `struct variable's
380 from the dictionary we created, because the caller owns the
381 dictionary and may destroy or modify its variables. */
382 sfm_dictionary_to_sfm_vars (*dict, &r->sfm_vars, &r->sfm_var_cnt);
383 pool_register (r->pool, free, r->sfm_vars);
384 r->proto = caseproto_ref_pool (dict_get_proto (*dict), r->pool);
386 pool_free (r->pool, var_by_value_idx);
387 return casereader_create_sequential
389 r->case_cnt == -1 ? CASENUMBER_MAX: r->case_cnt,
390 &sys_file_casereader_class, r);
394 dict_destroy (*dict);
399 /* Closes a system file after we're done with it.
400 Returns true if an I/O error has occurred on READER, false
403 close_reader (struct sfm_reader *r)
412 if (fn_close (fh_get_file_name (r->fh), r->file) == EOF)
414 msg (ME, _("Error closing system file \"%s\": %s."),
415 fh_get_file_name (r->fh), strerror (errno));
425 pool_destroy (r->pool);
430 /* Destroys READER. */
432 sys_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
434 struct sfm_reader *r = r_;
438 /* Returns true if FILE is an SPSS system file,
441 sfm_detect (FILE *file)
445 if (fread (rec_type, 4, 1, file) != 1)
449 return !strcmp ("$FL2", rec_type);
452 /* Reads the global header of the system file.
453 Sets DICT's file label to the system file's label.
454 Sets *WEIGHT_IDX to 0 if the system file is unweighted,
455 or to the value index of the weight variable otherwise.
456 Sets *CLAIMED_OCT_CNT to the number of "octs" (8-byte units)
457 per case that the file claims to have (although it is not
459 Initializes INFO with header information. */
461 read_header (struct sfm_reader *r, struct dictionary *dict,
462 int *weight_idx, int *claimed_oct_cnt,
463 struct sfm_read_info *info)
466 char eye_catcher[61];
467 uint8_t raw_layout_code[4];
469 char creation_date[10];
470 char creation_time[9];
472 struct substring file_label_ss;
473 struct substring product;
475 read_string (r, rec_type, sizeof rec_type);
476 read_string (r, eye_catcher, sizeof eye_catcher);
478 if (strcmp ("$FL2", rec_type) != 0)
479 sys_error (r, _("This is not an SPSS system file."));
481 /* Identify integer format. */
482 read_bytes (r, raw_layout_code, sizeof raw_layout_code);
483 if ((!integer_identify (2, raw_layout_code, sizeof raw_layout_code,
485 && !integer_identify (3, raw_layout_code, sizeof raw_layout_code,
487 || (r->integer_format != INTEGER_MSB_FIRST
488 && r->integer_format != INTEGER_LSB_FIRST))
489 sys_error (r, _("This is not an SPSS system file."));
491 *claimed_oct_cnt = read_int (r);
492 if (*claimed_oct_cnt < 0 || *claimed_oct_cnt > INT_MAX / 16)
493 *claimed_oct_cnt = -1;
495 r->compressed = read_int (r) != 0;
497 *weight_idx = read_int (r);
499 r->case_cnt = read_int (r);
500 if ( r->case_cnt > INT_MAX / 2)
504 /* Identify floating-point format and obtain compression bias. */
505 read_bytes (r, raw_bias, sizeof raw_bias);
506 if (float_identify (100.0, raw_bias, sizeof raw_bias, &r->float_format) == 0)
508 sys_warn (r, _("Compression bias is not the usual "
509 "value of 100, or system file uses unrecognized "
510 "floating-point format."));
511 if (r->integer_format == INTEGER_MSB_FIRST)
512 r->float_format = FLOAT_IEEE_DOUBLE_BE;
514 r->float_format = FLOAT_IEEE_DOUBLE_LE;
516 float_convert (r->float_format, raw_bias, FLOAT_NATIVE_DOUBLE, &r->bias);
518 read_string (r, creation_date, sizeof creation_date);
519 read_string (r, creation_time, sizeof creation_time);
520 read_string (r, file_label, sizeof file_label);
523 file_label_ss = ss_cstr (file_label);
524 ss_trim (&file_label_ss, ss_cstr (" "));
525 if (!ss_is_empty (file_label_ss))
527 ss_data (file_label_ss)[ss_length (file_label_ss)] = '\0';
528 dict_set_label (dict, ss_data (file_label_ss));
531 strcpy (info->creation_date, creation_date);
532 strcpy (info->creation_time, creation_time);
533 info->integer_format = r->integer_format;
534 info->float_format = r->float_format;
535 info->compressed = r->compressed;
536 info->case_cnt = r->case_cnt;
538 product = ss_cstr (eye_catcher);
539 ss_match_string (&product, ss_cstr ("@(#) SPSS DATA FILE"));
540 ss_trim (&product, ss_cstr (" "));
541 str_copy_buf_trunc (info->product, sizeof info->product,
542 ss_data (product), ss_length (product));
545 /* Reads a variable (type 2) record from R and adds the
546 corresponding variable to DICT.
547 Also skips past additional variable records for long string
550 read_variable_record (struct sfm_reader *r, struct dictionary *dict,
551 int *format_warning_cnt)
554 int has_variable_label;
555 int missing_value_code;
560 struct variable *var;
563 width = read_int (r);
564 has_variable_label = read_int (r);
565 missing_value_code = read_int (r);
566 print_format = read_int (r);
567 write_format = read_int (r);
568 read_string (r, name, sizeof name);
569 name[strcspn (name, " ")] = '\0';
571 /* Check variable name. */
572 if (name[0] == '$' || name[0] == '#')
573 sys_error (r, "Variable name begins with invalid character `%c'.",
575 if (!var_is_plausible_name (name, false))
576 sys_error (r, _("Invalid variable name `%s'."), name);
578 /* Create variable. */
579 if (width < 0 || width > 255)
580 sys_error (r, _("Bad width %d for variable %s."), width, name);
581 var = dict_create_var (dict, name, width);
584 _("Duplicate variable name `%s' within system file."),
587 /* Set the short name the same as the long name. */
588 var_set_short_name (var, 0, var_get_name (var));
590 /* Get variable label, if any. */
591 if (has_variable_label != 0 && has_variable_label != 1)
592 sys_error (r, _("Variable label indicator field is not 0 or 1."));
593 if (has_variable_label == 1)
599 if (len >= sizeof label)
600 sys_error (r, _("Variable %s has label of invalid length %zu."),
602 read_string (r, label, len + 1);
603 var_set_label (var, label);
605 skip_bytes (r, ROUND_UP (len, 4) - len);
608 /* Set missing values. */
609 if (missing_value_code != 0)
611 struct missing_values mv;
614 mv_init_pool (r->pool, &mv, var_get_width (var));
615 if (var_is_numeric (var))
617 if (missing_value_code < -3 || missing_value_code > 3
618 || missing_value_code == -1)
619 sys_error (r, _("Numeric missing value indicator field is not "
620 "-3, -2, 0, 1, 2, or 3."));
621 if (missing_value_code < 0)
623 double low = read_float (r);
624 double high = read_float (r);
625 mv_add_range (&mv, low, high);
626 missing_value_code = -missing_value_code - 2;
628 for (i = 0; i < missing_value_code; i++)
629 mv_add_num (&mv, read_float (r));
633 int mv_width = MAX (width, 8);
636 if (missing_value_code < 1 || missing_value_code > 3)
637 sys_error (r, _("String missing value indicator field is not "
640 value_init (&value, mv_width);
641 value_set_missing (&value, mv_width);
642 for (i = 0; i < missing_value_code; i++)
644 uint8_t *s = value_str_rw (&value, mv_width);
645 read_bytes (r, s, 8);
648 value_destroy (&value, mv_width);
650 var_set_missing_values (var, &mv);
654 parse_format_spec (r, print_format, PRINT_FORMAT, var, format_warning_cnt);
655 parse_format_spec (r, write_format, WRITE_FORMAT, var, format_warning_cnt);
657 /* Account for values.
658 Skip long string continuation records, if any. */
659 nv = width == 0 ? 1 : DIV_RND_UP (width, 8);
665 for (i = 1; i < nv; i++)
667 /* Check for record type 2 and width -1. */
668 if (read_int (r) != 2 || read_int (r) != -1)
669 sys_error (r, _("Missing string continuation record."));
671 /* Skip and ignore remaining continuation data. */
672 has_variable_label = read_int (r);
673 missing_value_code = read_int (r);
674 print_format = read_int (r);
675 write_format = read_int (r);
676 read_string (r, name, sizeof name);
678 /* Variable label fields on continuation records have
679 been spotted in system files created by "SPSS Power
680 Macintosh Release 6.1". */
681 if (has_variable_label)
682 skip_bytes (r, ROUND_UP (read_int (r), 4));
687 /* Translates the format spec from sysfile format to internal
690 parse_format_spec (struct sfm_reader *r, unsigned int s,
691 enum which_format which, struct variable *v,
692 int *format_warning_cnt)
694 const int max_format_warnings = 8;
696 uint8_t raw_type = s >> 16;
702 if (!fmt_from_io (raw_type, &f.type))
703 sys_error (r, _("Unknown variable format %"PRIu8"."), raw_type);
708 ok = fmt_check_output (&f) && fmt_check_width_compat (&f, var_get_width (v));
713 if (which == PRINT_FORMAT)
714 var_set_print_format (v, &f);
716 var_set_write_format (v, &f);
718 else if (*++format_warning_cnt <= max_format_warnings)
720 char fmt_string[FMT_STRING_LEN_MAX + 1];
721 sys_warn (r, _("%s variable %s has invalid %s format %s."),
722 var_is_numeric (v) ? _("Numeric") : _("String"),
724 which == PRINT_FORMAT ? _("print") : _("write"),
725 fmt_to_string (&f, fmt_string));
727 if (*format_warning_cnt == max_format_warnings)
728 sys_warn (r, _("Suppressing further invalid format warnings."));
732 /* Sets the weighting variable in DICT to the variable
733 corresponding to the given 1-based VALUE_IDX, if VALUE_IDX is
736 setup_weight (struct sfm_reader *r, int weight_idx,
737 struct variable **var_by_value_idx, struct dictionary *dict)
741 struct variable *weight_var
742 = lookup_var_by_value_idx (r, var_by_value_idx, weight_idx);
743 if (var_is_numeric (weight_var))
744 dict_set_weight (dict, weight_var);
746 sys_error (r, _("Weighting variable must be numeric."));
750 /* Reads a document record, type 6, from system file R, and sets up
751 the documents and n_documents fields in the associated
754 read_documents (struct sfm_reader *r, struct dictionary *dict)
759 if (dict_get_documents (dict) != NULL)
760 sys_error (r, _("Multiple type 6 (document) records."));
762 line_cnt = read_int (r);
764 sys_error (r, _("Number of document lines (%d) "
765 "must be greater than 0."), line_cnt);
767 documents = pool_nmalloc (r->pool, line_cnt + 1, DOC_LINE_LENGTH);
768 read_string (r, documents, DOC_LINE_LENGTH * line_cnt + 1);
769 if (strlen (documents) == DOC_LINE_LENGTH * line_cnt)
770 dict_set_documents (dict, documents);
772 sys_error (r, _("Document line contains null byte."));
773 pool_free (r->pool, documents);
776 /* Read a type 7 extension record. */
778 read_extension_record (struct sfm_reader *r, struct dictionary *dict,
779 struct sfm_read_info *info)
781 int subtype = read_int (r);
782 size_t size = read_int (r);
783 size_t count = read_int (r);
784 size_t bytes = size * count;
786 /* Check that SIZE * COUNT + 1 doesn't overflow. Adding 1
787 allows an extra byte for a null terminator, used by some
788 extension processing routines. */
789 if (size != 0 && size_overflow_p (xsum (1, xtimes (count, size))))
790 sys_error (r, "Record type 7 subtype %d too large.", subtype);
795 read_machine_integer_info (r, size, count, info, dict);
799 read_machine_float_info (r, size, count);
803 /* Variable sets information. We don't use these yet.
804 They only apply to GUIs; see VARSETS on the APPLY
805 DICTIONARY command in SPSS documentation. */
809 /* DATE variable information. We don't use it yet, but we
814 /* Used by the MRSETS command. */
818 /* Used by the SPSS Data Entry software. */
822 read_display_parameters (r, size, count, dict);
826 read_long_var_name_map (r, size, count, dict);
830 read_long_string_map (r, size, count, dict);
834 /* New in SPSS v14? Unknown purpose. */
838 read_data_file_attributes (r, size, count, dict);
842 read_variable_attributes (r, size, count, dict);
846 /* New in SPSS 16. Contains a single string that describes
847 the character encoding, e.g. "windows-1252". */
849 char *encoding = pool_calloc (r->pool, size, count + 1);
850 read_string (r, encoding, count + 1);
851 dict_set_encoding (dict, encoding);
856 /* New in SPSS 16. Encodes value labels for long string
858 read_long_string_value_labels (r, size, count, dict);
862 sys_warn (r, _("Unrecognized record type 7, subtype %d. Please send a copy of this file, and the syntax which created it to %s"),
863 subtype, PACKAGE_BUGREPORT);
867 skip_bytes (r, bytes);
870 /* Read record type 7, subtype 3. */
872 read_machine_integer_info (struct sfm_reader *r, size_t size, size_t count,
873 struct sfm_read_info *info,
874 struct dictionary *dict)
876 int version_major = read_int (r);
877 int version_minor = read_int (r);
878 int version_revision = read_int (r);
879 int machine_code UNUSED = read_int (r);
880 int float_representation = read_int (r);
881 int compression_code UNUSED = read_int (r);
882 int integer_representation = read_int (r);
883 int character_code = read_int (r);
885 int expected_float_format;
886 int expected_integer_format;
888 if (size != 4 || count != 8)
889 sys_error (r, _("Bad size (%zu) or count (%zu) field on record type 7, "
893 /* Save version info. */
894 info->version_major = version_major;
895 info->version_minor = version_minor;
896 info->version_revision = version_revision;
898 /* Check floating point format. */
899 if (r->float_format == FLOAT_IEEE_DOUBLE_BE
900 || r->float_format == FLOAT_IEEE_DOUBLE_LE)
901 expected_float_format = 1;
902 else if (r->float_format == FLOAT_Z_LONG)
903 expected_float_format = 2;
904 else if (r->float_format == FLOAT_VAX_G || r->float_format == FLOAT_VAX_D)
905 expected_float_format = 3;
908 if (float_representation != expected_float_format)
909 sys_error (r, _("Floating-point representation indicated by "
910 "system file (%d) differs from expected (%d)."),
911 r->float_format, expected_float_format);
913 /* Check integer format. */
914 if (r->integer_format == INTEGER_MSB_FIRST)
915 expected_integer_format = 1;
916 else if (r->integer_format == INTEGER_LSB_FIRST)
917 expected_integer_format = 2;
920 if (integer_representation != expected_integer_format)
922 static const char *const endian[] = {N_("little-endian"), N_("big-endian")};
923 sys_warn (r, _("Integer format indicated by system file (%s) "
924 "differs from expected (%s)."),
925 gettext (endian[integer_representation == 1]),
926 gettext (endian[expected_integer_format == 1]));
931 Record 7 (20) provides a much more reliable way of
932 setting the encoding.
933 The character_code is used as a fallback only.
935 if ( NULL == dict_get_encoding (dict))
937 switch (character_code)
940 dict_set_encoding (dict, "EBCDIC-US");
944 /* These ostensibly mean "7-bit ASCII" and "8-bit ASCII"[sic]
945 respectively. However, there are known to be many files
946 in the wild with character code 2, yet have data which are
948 Therefore we ignore these values.
952 dict_set_encoding (dict, "MS_KANJI");
955 dict_set_encoding (dict, "UTF-7");
958 dict_set_encoding (dict, "UTF-8");
963 snprintf (enc, 100, "CP%d", character_code);
964 dict_set_encoding (dict, enc);
971 /* Read record type 7, subtype 4. */
973 read_machine_float_info (struct sfm_reader *r, size_t size, size_t count)
975 double sysmis = read_float (r);
976 double highest = read_float (r);
977 double lowest = read_float (r);
979 if (size != 8 || count != 3)
980 sys_error (r, _("Bad size (%zu) or count (%zu) on extension 4."),
983 if (sysmis != SYSMIS)
984 sys_warn (r, _("File specifies unexpected value %g as %s."),
987 if (highest != HIGHEST)
988 sys_warn (r, _("File specifies unexpected value %g as %s."),
991 if (lowest != LOWEST)
992 sys_warn (r, _("File specifies unexpected value %g as %s."),
996 /* Read record type 7, subtype 11, which specifies how variables
997 should be displayed in GUI environments. */
999 read_display_parameters (struct sfm_reader *r, size_t size, size_t count,
1000 struct dictionary *dict)
1003 bool includes_width;
1004 bool warned = false;
1009 sys_warn (r, _("Bad size %zu on extension 11."), size);
1010 skip_bytes (r, size * count);
1014 n_vars = dict_get_var_cnt (dict);
1015 if (count == 3 * n_vars)
1016 includes_width = true;
1017 else if (count == 2 * n_vars)
1018 includes_width = false;
1021 sys_warn (r, _("Extension 11 has bad count %zu (for %zu variables)."),
1023 skip_bytes (r, size * count);
1027 for (i = 0; i < n_vars; ++i)
1029 struct variable *v = dict_get_var (dict, i);
1030 int measure = read_int (r);
1031 int width = includes_width ? read_int (r) : 0;
1032 int align = read_int (r);
1034 /* SPSS 14 sometimes seems to set string variables' measure
1036 if (0 == measure && var_is_alpha (v))
1039 if (measure < 1 || measure > 3 || align < 0 || align > 2)
1042 sys_warn (r, _("Invalid variable display parameters "
1043 "for variable %zu (%s). "
1044 "Default parameters substituted."),
1045 i, var_get_name (v));
1050 var_set_measure (v, (measure == 1 ? MEASURE_NOMINAL
1051 : measure == 2 ? MEASURE_ORDINAL
1053 var_set_alignment (v, (align == 0 ? ALIGN_LEFT
1054 : align == 1 ? ALIGN_RIGHT
1057 /* Older versions (SPSS 9.0) sometimes set the display
1058 width to zero. This causes confusion in the GUI, so
1059 only set the width if it is nonzero. */
1061 var_set_display_width (v, width);
1065 /* Reads record type 7, subtype 13, which gives the long name
1066 that corresponds to each short name. Modifies variable names
1067 in DICT accordingly. */
1069 read_long_var_name_map (struct sfm_reader *r, size_t size, size_t count,
1070 struct dictionary *dict)
1072 struct text_record *text;
1073 struct variable *var;
1076 text = open_text_record (r, size * count);
1077 while (read_variable_to_value_pair (r, dict, text, &var, &long_name))
1080 size_t short_name_cnt;
1083 /* Validate long name. */
1084 if (!var_is_valid_name (long_name, false))
1086 sys_warn (r, _("Long variable mapping from %s to invalid "
1087 "variable name `%s'."),
1088 var_get_name (var), long_name);
1092 /* Identify any duplicates. */
1093 if (strcasecmp (var_get_short_name (var, 0), long_name)
1094 && dict_lookup_var (dict, long_name) != NULL)
1096 sys_warn (r, _("Duplicate long variable name `%s' "
1097 "within system file."), long_name);
1101 /* Renaming a variable may clear its short names, but we
1102 want to retain them, so we save them and re-set them
1104 short_name_cnt = var_get_short_name_cnt (var);
1105 short_names = xnmalloc (short_name_cnt, sizeof *short_names);
1106 for (i = 0; i < short_name_cnt; i++)
1108 const char *s = var_get_short_name (var, i);
1109 short_names[i] = s != NULL ? xstrdup (s) : NULL;
1112 /* Set long name. */
1113 dict_rename_var (dict, var, long_name);
1115 /* Restore short names. */
1116 for (i = 0; i < short_name_cnt; i++)
1118 var_set_short_name (var, i, short_names[i]);
1119 free (short_names[i]);
1123 close_text_record (r, text);
1124 r->has_long_var_names = true;
1127 /* Reads record type 7, subtype 14, which gives the real length
1128 of each very long string. Rearranges DICT accordingly. */
1130 read_long_string_map (struct sfm_reader *r, size_t size, size_t count,
1131 struct dictionary *dict)
1133 struct text_record *text;
1134 struct variable *var;
1137 text = open_text_record (r, size * count);
1138 while (read_variable_to_value_pair (r, dict, text, &var, &length_s))
1140 size_t idx = var_get_dict_index (var);
1146 length = strtol (length_s, NULL, 10);
1147 if (length < 1 || length > MAX_STRING)
1149 sys_warn (r, _("%s listed as string of invalid length %s "
1150 "in very length string record."),
1151 var_get_name (var), length_s);
1155 /* Check segments. */
1156 segment_cnt = sfm_width_to_segments (length);
1157 if (segment_cnt == 1)
1159 sys_warn (r, _("%s listed in very long string record with width %s, "
1160 "which requires only one segment."),
1161 var_get_name (var), length_s);
1164 if (idx + segment_cnt > dict_get_var_cnt (dict))
1165 sys_error (r, _("Very long string %s overflows dictionary."),
1166 var_get_name (var));
1168 /* Get the short names from the segments and check their
1170 for (i = 0; i < segment_cnt; i++)
1172 struct variable *seg = dict_get_var (dict, idx + i);
1173 int alloc_width = sfm_segment_alloc_width (length, i);
1174 int width = var_get_width (seg);
1177 var_set_short_name (var, i, var_get_short_name (seg, 0));
1178 if (ROUND_UP (width, 8) != ROUND_UP (alloc_width, 8))
1179 sys_error (r, _("Very long string with width %ld has segment %d "
1180 "of width %d (expected %d)"),
1181 length, i, width, alloc_width);
1183 dict_delete_consecutive_vars (dict, idx + 1, segment_cnt - 1);
1184 var_set_width (var, length);
1186 close_text_record (r, text);
1187 dict_compact_values (dict);
1190 /* Reads value labels from sysfile H and inserts them into the
1191 associated dictionary. */
1193 read_value_labels (struct sfm_reader *r,
1194 struct dictionary *dict, struct variable **var_by_value_idx)
1196 struct pool *subpool;
1200 uint8_t raw_value[8]; /* Value as uninterpreted bytes. */
1201 union value value; /* Value. */
1202 char *label; /* Null-terminated label string. */
1205 struct label *labels = NULL;
1206 int label_cnt; /* Number of labels. */
1208 struct variable **var = NULL; /* Associated variables. */
1209 int var_cnt; /* Number of associated variables. */
1210 int max_width; /* Maximum width of string variables. */
1214 subpool = pool_create_subpool (r->pool);
1216 /* Read the type 3 record and record its contents. We can't do
1217 much with the data yet because we don't know whether it is
1218 of numeric or string type. */
1220 /* Read number of labels. */
1221 label_cnt = read_int (r);
1223 if (size_overflow_p (xtimes (label_cnt, sizeof *labels)))
1225 sys_warn (r, _("Invalid number of labels: %d. Ignoring labels."),
1230 /* Read each value/label tuple into labels[]. */
1231 labels = pool_nalloc (subpool, label_cnt, sizeof *labels);
1232 for (i = 0; i < label_cnt; i++)
1234 struct label *label = labels + i;
1235 unsigned char label_len;
1239 read_bytes (r, label->raw_value, sizeof label->raw_value);
1241 /* Read label length. */
1242 read_bytes (r, &label_len, sizeof label_len);
1243 padded_len = ROUND_UP (label_len + 1, 8);
1245 /* Read label, padding. */
1246 label->label = pool_alloc (subpool, padded_len + 1);
1247 read_bytes (r, label->label, padded_len - 1);
1248 label->label[label_len] = 0;
1251 /* Now, read the type 4 record that has the list of variables
1252 to which the value labels are to be applied. */
1254 /* Read record type of type 4 record. */
1255 if (read_int (r) != 4)
1256 sys_error (r, _("Variable index record (type 4) does not immediately "
1257 "follow value label record (type 3) as it should."));
1259 /* Read number of variables associated with value label from type 4
1261 var_cnt = read_int (r);
1262 if (var_cnt < 1 || var_cnt > dict_get_var_cnt (dict))
1263 sys_error (r, _("Number of variables associated with a value label (%d) "
1264 "is not between 1 and the number of variables (%zu)."),
1265 var_cnt, dict_get_var_cnt (dict));
1267 /* Read the list of variables. */
1268 var = pool_nalloc (subpool, var_cnt, sizeof *var);
1270 for (i = 0; i < var_cnt; i++)
1272 var[i] = lookup_var_by_value_idx (r, var_by_value_idx, read_int (r));
1273 if (var_get_width (var[i]) > 8)
1274 sys_error (r, _("Value labels may not be added to long string "
1275 "variables (e.g. %s) using records types 3 and 4."),
1276 var_get_name (var[i]));
1277 max_width = MAX (max_width, var_get_width (var[i]));
1280 /* Type check the variables. */
1281 for (i = 1; i < var_cnt; i++)
1282 if (var_get_type (var[i]) != var_get_type (var[0]))
1283 sys_error (r, _("Variables associated with value label are not all of "
1284 "identical type. Variable %s is %s, but variable "
1286 var_get_name (var[0]),
1287 var_is_numeric (var[0]) ? _("numeric") : _("string"),
1288 var_get_name (var[i]),
1289 var_is_numeric (var[i]) ? _("numeric") : _("string"));
1291 /* Fill in labels[].value, now that we know the desired type. */
1292 for (i = 0; i < label_cnt; i++)
1294 struct label *label = labels + i;
1296 value_init_pool (subpool, &label->value, max_width);
1297 if (var_is_alpha (var[0]))
1298 u8_buf_copy_rpad (value_str_rw (&label->value, max_width), max_width,
1299 label->raw_value, sizeof label->raw_value, ' ');
1301 label->value.f = float_get_double (r->float_format, label->raw_value);
1304 /* Assign the `value_label's to each variable. */
1305 for (i = 0; i < var_cnt; i++)
1307 struct variable *v = var[i];
1310 /* Add each label to the variable. */
1311 for (j = 0; j < label_cnt; j++)
1313 struct label *label = &labels[j];
1314 if (!var_add_value_label (v, &label->value, label->label))
1316 if (var_is_numeric (var[0]))
1317 sys_warn (r, _("Duplicate value label for %g on %s."),
1318 label->value.f, var_get_name (v));
1320 sys_warn (r, _("Duplicate value label for \"%.*s\" on %s."),
1321 max_width, value_str (&label->value, max_width),
1327 pool_destroy (subpool);
1330 /* Reads a set of custom attributes from TEXT into ATTRS.
1331 ATTRS may be a null pointer, in which case the attributes are
1332 read but discarded. */
1334 read_attributes (struct sfm_reader *r, struct text_record *text,
1335 struct attrset *attrs)
1339 struct attribute *attr;
1343 /* Parse the key. */
1344 key = text_get_token (text, ss_cstr ("("));
1348 attr = attribute_create (key);
1349 for (index = 1; ; index++)
1351 /* Parse the value. */
1355 value = text_get_token (text, ss_cstr ("\n"));
1358 text_warn (r, text, _("Error parsing attribute value %s[%d]"),
1363 length = strlen (value);
1364 if (length >= 2 && value[0] == '\'' && value[length - 1] == '\'')
1366 value[length - 1] = '\0';
1367 attribute_add_value (attr, value + 1);
1372 _("Attribute value %s[%d] is not quoted: %s"),
1374 attribute_add_value (attr, value);
1377 /* Was this the last value for this attribute? */
1378 if (text_match (text, ')'))
1382 attrset_add (attrs, attr);
1384 attribute_destroy (attr);
1386 while (!text_match (text, '/'));
1389 /* Reads record type 7, subtype 17, which lists custom
1390 attributes on the data file. */
1392 read_data_file_attributes (struct sfm_reader *r,
1393 size_t size, size_t count,
1394 struct dictionary *dict)
1396 struct text_record *text = open_text_record (r, size * count);
1397 read_attributes (r, text, dict_get_attributes (dict));
1398 close_text_record (r, text);
1402 skip_long_string_value_labels (struct sfm_reader *r, size_t n_labels)
1406 for (i = 0; i < n_labels; i++)
1408 size_t value_length, label_length;
1410 value_length = read_int (r);
1411 skip_bytes (r, value_length);
1412 label_length = read_int (r);
1413 skip_bytes (r, label_length);
1418 read_long_string_value_labels (struct sfm_reader *r,
1419 size_t size, size_t count,
1420 struct dictionary *d)
1422 const off_t start = ftello (r->file);
1423 while (ftello (r->file) - start < size * count)
1425 char var_name[VAR_NAME_LEN + 1];
1433 var_name_len = read_int (r);
1434 if (var_name_len > VAR_NAME_LEN)
1435 sys_error (r, _("Variable name length in long string value label "
1436 "record (%d) exceeds %d-byte limit."),
1437 var_name_len, VAR_NAME_LEN);
1438 read_string (r, var_name, var_name_len + 1);
1439 width = read_int (r);
1440 n_labels = read_int (r);
1442 v = dict_lookup_var (d, var_name);
1445 sys_warn (r, _("Ignoring long string value record for "
1446 "unknown variable %s."), var_name);
1447 skip_long_string_value_labels (r, n_labels);
1450 if (var_is_numeric (v))
1452 sys_warn (r, _("Ignoring long string value record for "
1453 "numeric variable %s."), var_name);
1454 skip_long_string_value_labels (r, n_labels);
1457 if (width != var_get_width (v))
1459 sys_warn (r, _("Ignoring long string value record for variable %s "
1460 "because the record's width (%d) does not match the "
1461 "variable's width (%d)"),
1462 var_name, width, var_get_width (v));
1463 skip_long_string_value_labels (r, n_labels);
1468 value_init_pool (r->pool, &value, width);
1469 for (i = 0; i < n_labels; i++)
1471 size_t value_length, label_length;
1476 value_length = read_int (r);
1477 if (value_length == width)
1478 read_bytes (r, value_str_rw (&value, width), width);
1481 sys_warn (r, _("Ignoring long string value %zu for variable %s, "
1482 "with width %d, that has bad value width %zu."),
1483 i, var_get_name (v), width, value_length);
1484 skip_bytes (r, value_length);
1489 label_length = read_int (r);
1490 read_string (r, label, MIN (sizeof label, label_length + 1));
1491 if (label_length >= sizeof label)
1493 /* Skip and silently ignore label text after the
1494 first 255 bytes. The maximum documented length
1495 of a label is 120 bytes so this is more than
1497 skip_bytes (r, sizeof label - (label_length + 1));
1500 if (!skip && !var_add_value_label (v, &value, label))
1501 sys_warn (r, _("Duplicate value label for \"%.*s\" on %s."),
1502 width, value_str (&value, width), var_get_name (v));
1508 /* Reads record type 7, subtype 18, which lists custom
1509 attributes on individual variables. */
1511 read_variable_attributes (struct sfm_reader *r,
1512 size_t size, size_t count,
1513 struct dictionary *dict)
1515 struct text_record *text = open_text_record (r, size * count);
1518 struct variable *var;
1519 if (!text_read_short_name (r, dict, text, ss_cstr (":"), &var))
1521 read_attributes (r, text, var != NULL ? var_get_attributes (var) : NULL);
1523 close_text_record (r, text);
1529 static void partial_record (struct sfm_reader *r)
1532 static void read_error (struct casereader *, const struct sfm_reader *);
1534 static bool read_case_number (struct sfm_reader *, double *);
1535 static bool read_case_string (struct sfm_reader *, uint8_t *, size_t);
1536 static int read_opcode (struct sfm_reader *);
1537 static bool read_compressed_number (struct sfm_reader *, double *);
1538 static bool read_compressed_string (struct sfm_reader *, uint8_t *);
1539 static bool read_whole_strings (struct sfm_reader *, uint8_t *, size_t);
1540 static bool skip_whole_strings (struct sfm_reader *, size_t);
1542 /* Reads and returns one case from READER's file. Returns a null
1543 pointer if not successful. */
1544 static struct ccase *
1545 sys_file_casereader_read (struct casereader *reader, void *r_)
1547 struct sfm_reader *r = r_;
1548 struct ccase *volatile c;
1554 c = case_create (r->proto);
1555 if (setjmp (r->bail_out))
1557 casereader_force_error (reader);
1562 for (i = 0; i < r->sfm_var_cnt; i++)
1564 struct sfm_var *sv = &r->sfm_vars[i];
1565 union value *v = case_data_rw_idx (c, sv->case_index);
1567 if (sv->var_width == 0)
1569 if (!read_case_number (r, &v->f))
1574 uint8_t *s = value_str_rw (v, sv->var_width);
1575 if (!read_case_string (r, s + sv->offset, sv->segment_width))
1577 if (!skip_whole_strings (r, ROUND_DOWN (sv->padding, 8)))
1587 if (r->case_cnt != -1)
1588 read_error (reader, r);
1592 /* Issues an error that R ends in a partial record. */
1594 partial_record (struct sfm_reader *r)
1596 sys_error (r, _("File ends in partial case."));
1599 /* Issues an error that an unspecified error occurred SFM, and
1602 read_error (struct casereader *r, const struct sfm_reader *sfm)
1604 msg (ME, _("Error reading case from file %s."), fh_get_name (sfm->fh));
1605 casereader_force_error (r);
1608 /* Reads a number from R and stores its value in *D.
1609 If R is compressed, reads a compressed number;
1610 otherwise, reads a number in the regular way.
1611 Returns true if successful, false if end of file is
1612 reached immediately. */
1614 read_case_number (struct sfm_reader *r, double *d)
1619 if (!try_read_bytes (r, number, sizeof number))
1621 float_convert (r->float_format, number, FLOAT_NATIVE_DOUBLE, d);
1625 return read_compressed_number (r, d);
1628 /* Reads LENGTH string bytes from R into S.
1629 Always reads a multiple of 8 bytes; if LENGTH is not a
1630 multiple of 8, then extra bytes are read and discarded without
1632 Reads compressed strings if S is compressed.
1633 Returns true if successful, false if end of file is
1634 reached immediately. */
1636 read_case_string (struct sfm_reader *r, uint8_t *s, size_t length)
1638 size_t whole = ROUND_DOWN (length, 8);
1639 size_t partial = length % 8;
1643 if (!read_whole_strings (r, s, whole))
1650 if (!read_whole_strings (r, bounce, sizeof bounce))
1656 memcpy (s + whole, bounce, partial);
1662 /* Reads and returns the next compression opcode from R. */
1664 read_opcode (struct sfm_reader *r)
1666 assert (r->compressed);
1670 if (r->opcode_idx >= sizeof r->opcodes)
1672 if (!try_read_bytes (r, r->opcodes, sizeof r->opcodes))
1676 opcode = r->opcodes[r->opcode_idx++];
1683 /* Reads a compressed number from R and stores its value in D.
1684 Returns true if successful, false if end of file is
1685 reached immediately. */
1687 read_compressed_number (struct sfm_reader *r, double *d)
1689 int opcode = read_opcode (r);
1697 *d = read_float (r);
1701 sys_error (r, _("Compressed data is corrupt."));
1708 *d = opcode - r->bias;
1715 /* Reads a compressed 8-byte string segment from R and stores it
1717 Returns true if successful, false if end of file is
1718 reached immediately. */
1720 read_compressed_string (struct sfm_reader *r, uint8_t *dst)
1722 switch (read_opcode (r))
1729 read_bytes (r, dst, 8);
1733 memset (dst, ' ', 8);
1737 sys_error (r, _("Compressed data is corrupt."));
1743 /* Reads LENGTH string bytes from R into S.
1744 LENGTH must be a multiple of 8.
1745 Reads compressed strings if S is compressed.
1746 Returns true if successful, false if end of file is
1747 reached immediately. */
1749 read_whole_strings (struct sfm_reader *r, uint8_t *s, size_t length)
1751 assert (length % 8 == 0);
1753 return try_read_bytes (r, s, length);
1757 for (ofs = 0; ofs < length; ofs += 8)
1758 if (!read_compressed_string (r, s + ofs))
1768 /* Skips LENGTH string bytes from R.
1769 LENGTH must be a multiple of 8.
1770 (LENGTH is also limited to 1024, but that's only because the
1771 current caller never needs more than that many bytes.)
1772 Returns true if successful, false if end of file is
1773 reached immediately. */
1775 skip_whole_strings (struct sfm_reader *r, size_t length)
1777 uint8_t buffer[1024];
1778 assert (length < sizeof buffer);
1779 return read_whole_strings (r, buffer, length);
1782 /* Creates and returns a table that can be used for translating a value
1783 index into a case to a "struct variable *" for DICT. Multiple
1784 system file fields reference variables this way.
1786 This table must be created before processing the very long
1787 string extension record, because that record causes some
1788 values to be deleted from the case and the dictionary to be
1790 static struct variable **
1791 make_var_by_value_idx (struct sfm_reader *r, struct dictionary *dict)
1793 struct variable **var_by_value_idx;
1797 var_by_value_idx = pool_nmalloc (r->pool,
1798 r->oct_cnt, sizeof *var_by_value_idx);
1799 for (i = 0; i < dict_get_var_cnt (dict); i++)
1801 struct variable *v = dict_get_var (dict, i);
1802 int nv = var_is_numeric (v) ? 1 : DIV_RND_UP (var_get_width (v), 8);
1805 var_by_value_idx[value_idx++] = v;
1806 for (j = 1; j < nv; j++)
1807 var_by_value_idx[value_idx++] = NULL;
1809 assert (value_idx == r->oct_cnt);
1811 return var_by_value_idx;
1814 /* Returns the "struct variable" corresponding to the given
1815 1-basd VALUE_IDX in VAR_BY_VALUE_IDX. Verifies that the index
1817 static struct variable *
1818 lookup_var_by_value_idx (struct sfm_reader *r,
1819 struct variable **var_by_value_idx, int value_idx)
1821 struct variable *var;
1823 if (value_idx < 1 || value_idx > r->oct_cnt)
1824 sys_error (r, _("Variable index %d not in valid range 1...%d."),
1825 value_idx, r->oct_cnt);
1827 var = var_by_value_idx[value_idx - 1];
1829 sys_error (r, _("Variable index %d refers to long string "
1836 /* Returns the variable in D with the given SHORT_NAME,
1837 or a null pointer if there is none. */
1838 static struct variable *
1839 lookup_var_by_short_name (struct dictionary *d, const char *short_name)
1841 struct variable *var;
1845 /* First try looking up by full name. This often succeeds. */
1846 var = dict_lookup_var (d, short_name);
1847 if (var != NULL && !strcasecmp (var_get_short_name (var, 0), short_name))
1850 /* Iterate through the whole dictionary as a fallback. */
1851 var_cnt = dict_get_var_cnt (d);
1852 for (i = 0; i < var_cnt; i++)
1854 var = dict_get_var (d, i);
1855 if (!strcasecmp (var_get_short_name (var, 0), short_name))
1862 /* Helpers for reading records that contain structured text
1865 /* Maximum number of warnings to issue for a single text
1867 #define MAX_TEXT_WARNINGS 5
1872 struct substring buffer; /* Record contents. */
1873 size_t pos; /* Current position in buffer. */
1874 int n_warnings; /* Number of warnings issued or suppressed. */
1877 /* Reads SIZE bytes into a text record for R,
1878 and returns the new text record. */
1879 static struct text_record *
1880 open_text_record (struct sfm_reader *r, size_t size)
1882 struct text_record *text = pool_alloc (r->pool, sizeof *text);
1883 char *buffer = pool_malloc (r->pool, size + 1);
1884 read_bytes (r, buffer, size);
1885 text->buffer = ss_buffer (buffer, size);
1887 text->n_warnings = 0;
1891 /* Closes TEXT, frees its storage, and issues a final warning
1892 about suppressed warnings if necesary. */
1894 close_text_record (struct sfm_reader *r, struct text_record *text)
1896 if (text->n_warnings > MAX_TEXT_WARNINGS)
1897 sys_warn (r, _("Suppressed %d additional related warnings."),
1898 text->n_warnings - MAX_TEXT_WARNINGS);
1899 pool_free (r->pool, ss_data (text->buffer));
1902 /* Reads a variable=value pair from TEXT.
1903 Looks up the variable in DICT and stores it into *VAR.
1904 Stores a null-terminated value into *VALUE. */
1906 read_variable_to_value_pair (struct sfm_reader *r, struct dictionary *dict,
1907 struct text_record *text,
1908 struct variable **var, char **value)
1912 if (!text_read_short_name (r, dict, text, ss_cstr ("="), var))
1915 *value = text_get_token (text, ss_buffer ("\t\0", 2));
1919 text->pos += ss_span (ss_substr (text->buffer, text->pos, SIZE_MAX),
1920 ss_buffer ("\t\0", 2));
1928 text_read_short_name (struct sfm_reader *r, struct dictionary *dict,
1929 struct text_record *text, struct substring delimiters,
1930 struct variable **var)
1932 char *short_name = text_get_token (text, delimiters);
1933 if (short_name == NULL)
1936 *var = lookup_var_by_short_name (dict, short_name);
1938 text_warn (r, text, _("Variable map refers to unknown variable %s."),
1943 /* Displays a warning for the current file position, limiting the
1944 number to MAX_TEXT_WARNINGS for TEXT. */
1946 text_warn (struct sfm_reader *r, struct text_record *text,
1947 const char *format, ...)
1949 if (text->n_warnings++ < MAX_TEXT_WARNINGS)
1953 va_start (args, format);
1954 sys_msg (r, MW, format, args);
1960 text_get_token (struct text_record *text, struct substring delimiters)
1962 struct substring token;
1964 if (!ss_tokenize (text->buffer, delimiters, &text->pos, &token))
1966 ss_data (token)[ss_length (token)] = '\0';
1967 return ss_data (token);
1971 text_match (struct text_record *text, char c)
1973 if (text->buffer.string[text->pos] == c)
1984 /* Displays a corruption message. */
1986 sys_msg (struct sfm_reader *r, int class, const char *format, va_list args)
1991 ds_init_empty (&text);
1992 ds_put_format (&text, "\"%s\" near offset 0x%lx: ",
1993 fh_get_file_name (r->fh), (unsigned long) ftell (r->file));
1994 ds_put_vformat (&text, format, args);
1996 m.category = msg_class_to_category (class);
1997 m.severity = msg_class_to_severity (class);
1998 m.where.file_name = NULL;
1999 m.where.line_number = 0;
2000 m.text = ds_cstr (&text);
2005 /* Displays a warning for the current file position. */
2007 sys_warn (struct sfm_reader *r, const char *format, ...)
2011 va_start (args, format);
2012 sys_msg (r, MW, format, args);
2016 /* Displays an error for the current file position,
2017 marks it as in an error state,
2018 and aborts reading it using longjmp. */
2020 sys_error (struct sfm_reader *r, const char *format, ...)
2024 va_start (args, format);
2025 sys_msg (r, ME, format, args);
2029 longjmp (r->bail_out, 1);
2032 /* Reads BYTE_CNT bytes into BUF.
2033 Returns true if exactly BYTE_CNT bytes are successfully read.
2034 Aborts if an I/O error or a partial read occurs.
2035 If EOF_IS_OK, then an immediate end-of-file causes false to be
2036 returned; otherwise, immediate end-of-file causes an abort
2039 read_bytes_internal (struct sfm_reader *r, bool eof_is_ok,
2040 void *buf, size_t byte_cnt)
2042 size_t bytes_read = fread (buf, 1, byte_cnt, r->file);
2043 if (bytes_read == byte_cnt)
2045 else if (ferror (r->file))
2046 sys_error (r, _("System error: %s."), strerror (errno));
2047 else if (!eof_is_ok || bytes_read != 0)
2048 sys_error (r, _("Unexpected end of file."));
2053 /* Reads BYTE_CNT into BUF.
2054 Aborts upon I/O error or if end-of-file is encountered. */
2056 read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
2058 read_bytes_internal (r, false, buf, byte_cnt);
2061 /* Reads BYTE_CNT bytes into BUF.
2062 Returns true if exactly BYTE_CNT bytes are successfully read.
2063 Returns false if an immediate end-of-file is encountered.
2064 Aborts if an I/O error or a partial read occurs. */
2066 try_read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
2068 return read_bytes_internal (r, true, buf, byte_cnt);
2071 /* Reads a 32-bit signed integer from R and returns its value in
2074 read_int (struct sfm_reader *r)
2077 read_bytes (r, integer, sizeof integer);
2078 return integer_get (r->integer_format, integer, sizeof integer);
2081 /* Reads a 64-bit floating-point number from R and returns its
2082 value in host format. */
2084 read_float (struct sfm_reader *r)
2087 read_bytes (r, number, sizeof number);
2088 return float_get_double (r->float_format, number);
2091 /* Reads exactly SIZE - 1 bytes into BUFFER
2092 and stores a null byte into BUFFER[SIZE - 1]. */
2094 read_string (struct sfm_reader *r, char *buffer, size_t size)
2097 read_bytes (r, buffer, size - 1);
2098 buffer[size - 1] = '\0';
2101 /* Skips BYTES bytes forward in R. */
2103 skip_bytes (struct sfm_reader *r, size_t bytes)
2108 size_t chunk = MIN (sizeof buffer, bytes);
2109 read_bytes (r, buffer, chunk);
2114 static const struct casereader_class sys_file_casereader_class =
2116 sys_file_casereader_read,
2117 sys_file_casereader_destroy,