1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2007, 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <data/sys-file-reader.h>
20 #include <data/sys-file-private.h>
28 #include <libpspp/i18n.h>
29 #include <libpspp/assertion.h>
30 #include <libpspp/message.h>
31 #include <libpspp/compiler.h>
32 #include <libpspp/misc.h>
33 #include <libpspp/pool.h>
34 #include <libpspp/str.h>
35 #include <libpspp/hash.h>
36 #include <libpspp/array.h>
38 #include <data/attributes.h>
39 #include <data/case.h>
40 #include <data/casereader-provider.h>
41 #include <data/casereader.h>
42 #include <data/dictionary.h>
43 #include <data/file-handle-def.h>
44 #include <data/file-name.h>
45 #include <data/format.h>
46 #include <data/missing-values.h>
47 #include <data/short-names.h>
48 #include <data/value-labels.h>
49 #include <data/variable.h>
50 #include <data/value.h>
55 #include "unlocked-io.h"
60 #define _(msgid) gettext (msgid)
61 #define N_(msgid) (msgid)
63 /* System file reader. */
66 /* Resource tracking. */
67 struct pool *pool; /* All system file state. */
68 jmp_buf bail_out; /* longjmp() target for error handling. */
71 struct file_handle *fh; /* File handle. */
72 struct fh_lock *lock; /* Mutual exclusion for file handle. */
73 FILE *file; /* File stream. */
74 bool error; /* I/O or corruption error? */
75 struct caseproto *proto; /* Format of output cases. */
78 enum integer_format integer_format; /* On-disk integer format. */
79 enum float_format float_format; /* On-disk floating point format. */
80 int oct_cnt; /* Number of 8-byte units per case. */
81 struct sfm_var *sfm_vars; /* Variables. */
82 size_t sfm_var_cnt; /* Number of variables. */
83 casenumber case_cnt; /* Number of cases */
84 bool has_long_var_names; /* File has a long variable name map */
87 bool compressed; /* File is compressed? */
88 double bias; /* Compression bias, usually 100.0. */
89 uint8_t opcodes[8]; /* Current block of opcodes. */
90 size_t opcode_idx; /* Next opcode to interpret, 8 if none left. */
93 static const struct casereader_class sys_file_casereader_class;
95 static bool close_reader (struct sfm_reader *);
97 static struct variable **make_var_by_value_idx (struct sfm_reader *,
99 static struct variable *lookup_var_by_value_idx (struct sfm_reader *,
103 static void sys_msg (struct sfm_reader *r, int class,
104 const char *format, va_list args)
105 PRINTF_FORMAT (3, 0);
106 static void sys_warn (struct sfm_reader *, const char *, ...)
107 PRINTF_FORMAT (2, 3);
108 static void sys_error (struct sfm_reader *, const char *, ...)
112 static void read_bytes (struct sfm_reader *, void *, size_t);
113 static bool try_read_bytes (struct sfm_reader *, void *, size_t);
114 static int read_int (struct sfm_reader *);
115 static double read_float (struct sfm_reader *);
116 static void read_string (struct sfm_reader *, char *, size_t);
117 static void skip_bytes (struct sfm_reader *, size_t);
119 static struct text_record *open_text_record (struct sfm_reader *, size_t size);
120 static void close_text_record (struct sfm_reader *r,
121 struct text_record *);
122 static bool read_variable_to_value_pair (struct sfm_reader *,
124 struct text_record *,
125 struct variable **var, char **value);
126 static void text_warn (struct sfm_reader *r, struct text_record *text,
127 const char *format, ...)
128 PRINTF_FORMAT (3, 4);
129 static char *text_get_token (struct text_record *,
130 struct substring delimiters);
131 static bool text_match (struct text_record *, char c);
132 static bool text_read_short_name (struct sfm_reader *, struct dictionary *,
133 struct text_record *,
134 struct substring delimiters,
137 static bool close_reader (struct sfm_reader *r);
139 /* Dictionary reader. */
147 static void read_header (struct sfm_reader *, struct dictionary *,
148 int *weight_idx, int *claimed_oct_cnt,
149 struct sfm_read_info *);
150 static void read_variable_record (struct sfm_reader *, struct dictionary *,
151 int *format_warning_cnt);
152 static void parse_format_spec (struct sfm_reader *, unsigned int,
153 enum which_format, struct variable *,
154 int *format_warning_cnt);
155 static void setup_weight (struct sfm_reader *, int weight_idx,
156 struct variable **var_by_value_idx,
157 struct dictionary *);
158 static void read_documents (struct sfm_reader *, struct dictionary *);
159 static void read_value_labels (struct sfm_reader *, struct dictionary *,
160 struct variable **var_by_value_idx);
162 static void read_extension_record (struct sfm_reader *, struct dictionary *,
163 struct sfm_read_info *);
164 static void read_machine_integer_info (struct sfm_reader *,
165 size_t size, size_t count,
166 struct sfm_read_info *,
169 static void read_machine_float_info (struct sfm_reader *,
170 size_t size, size_t count);
171 static void read_display_parameters (struct sfm_reader *,
172 size_t size, size_t count,
173 struct dictionary *);
174 static void read_long_var_name_map (struct sfm_reader *,
175 size_t size, size_t count,
176 struct dictionary *);
177 static void read_long_string_map (struct sfm_reader *,
178 size_t size, size_t count,
179 struct dictionary *);
180 static void read_data_file_attributes (struct sfm_reader *,
181 size_t size, size_t count,
182 struct dictionary *);
183 static void read_variable_attributes (struct sfm_reader *,
184 size_t size, size_t count,
185 struct dictionary *);
186 static void read_long_string_value_labels (struct sfm_reader *,
187 size_t size, size_t count,
188 struct dictionary *);
190 /* Convert all the strings in DICT from the dict encoding to UTF8 */
192 recode_strings (struct dictionary *dict)
196 const char *enc = dict_get_encoding (dict);
199 enc = get_default_encoding ();
201 for (i = 0 ; i < dict_get_var_cnt (dict); ++i)
203 /* Convert the long variable name */
204 struct variable *var = dict_get_var (dict, i);
205 const char *native_name = var_get_name (var);
206 char *utf8_name = recode_string (UTF8, enc, native_name, -1);
207 if ( 0 != strcmp (utf8_name, native_name))
209 if ( NULL == dict_lookup_var (dict, utf8_name))
210 dict_rename_var (dict, var, utf8_name);
213 _("Recoded variable name duplicates an existing `%s' within system file."), utf8_name);
218 /* Convert the variable label */
219 if (var_has_label (var))
221 char *utf8_label = recode_string (UTF8, enc, var_get_label (var), -1);
222 var_set_label (var, utf8_label);
226 if (var_has_value_labels (var))
228 const struct val_lab *vl = NULL;
229 const struct val_labs *vlabs = var_get_value_labels (var);
231 for (vl = val_labs_first (vlabs); vl != NULL; vl = val_labs_next (vlabs, vl))
233 const union value *val = val_lab_get_value (vl);
234 const char *label = val_lab_get_label (vl);
235 char *new_label = NULL;
237 new_label = recode_string (UTF8, enc, label, -1);
239 var_replace_value_label (var, val, new_label);
246 /* Opens the system file designated by file handle FH for
247 reading. Reads the system file's dictionary into *DICT.
248 If INFO is non-null, then it receives additional info about the
251 sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
252 struct sfm_read_info *volatile info)
254 struct sfm_reader *volatile r = NULL;
255 struct variable **var_by_value_idx;
256 struct sfm_read_info local_info;
257 int format_warning_cnt = 0;
262 *dict = dict_create ();
264 /* Create and initialize reader. */
265 r = pool_create_container (struct sfm_reader, pool);
271 r->has_long_var_names = false;
272 r->opcode_idx = sizeof r->opcodes;
274 /* TRANSLATORS: this fragment will be interpolated into
275 messages in fh_lock() that identify types of files. */
276 r->lock = fh_lock (fh, FH_REF_FILE, N_("system file"), FH_ACC_READ, false);
280 r->file = fn_open (fh_get_file_name (fh), "rb");
283 msg (ME, _("Error opening \"%s\" for reading as a system file: %s."),
284 fh_get_file_name (r->fh), strerror (errno));
288 /* Initialize info. */
291 memset (info, 0, sizeof *info);
293 if (setjmp (r->bail_out))
298 read_header (r, *dict, &weight_idx, &claimed_oct_cnt, info);
300 /* Read all the variable definition records. */
301 rec_type = read_int (r);
302 while (rec_type == 2)
304 read_variable_record (r, *dict, &format_warning_cnt);
305 rec_type = read_int (r);
308 /* Figure out the case format. */
309 var_by_value_idx = make_var_by_value_idx (r, *dict);
310 setup_weight (r, weight_idx, var_by_value_idx, *dict);
312 /* Read all the rest of the dictionary records. */
313 while (rec_type != 999)
318 read_value_labels (r, *dict, var_by_value_idx);
322 sys_error (r, _("Misplaced type 4 record."));
325 read_documents (r, *dict);
329 read_extension_record (r, *dict, info);
333 sys_error (r, _("Unrecognized record type %d."), rec_type);
335 rec_type = read_int (r);
339 if ( ! r->has_long_var_names )
342 for (i = 0; i < dict_get_var_cnt (*dict); i++)
344 struct variable *var = dict_get_var (*dict, i);
345 char short_name[SHORT_NAME_LEN + 1];
346 char long_name[SHORT_NAME_LEN + 1];
348 strcpy (short_name, var_get_name (var));
350 strcpy (long_name, short_name);
351 str_lowercase (long_name);
353 /* Set long name. Renaming a variable may clear the short
354 name, but we want to retain it, so re-set it
356 dict_rename_var (*dict, var, long_name);
357 var_set_short_name (var, 0, short_name);
360 r->has_long_var_names = true;
363 recode_strings (*dict);
365 /* Read record 999 data, which is just filler. */
368 /* Warn if the actual amount of data per case differs from the
369 amount that the header claims. SPSS version 13 gets this
370 wrong when very long strings are involved, so don't warn in
372 if (claimed_oct_cnt != -1 && claimed_oct_cnt != r->oct_cnt
373 && info->version_major != 13)
374 sys_warn (r, _("File header claims %d variable positions but "
375 "%d were read from file."),
376 claimed_oct_cnt, r->oct_cnt);
378 /* Create an index of dictionary variable widths for
379 sfm_read_case to use. We cannot use the `struct variable's
380 from the dictionary we created, because the caller owns the
381 dictionary and may destroy or modify its variables. */
382 sfm_dictionary_to_sfm_vars (*dict, &r->sfm_vars, &r->sfm_var_cnt);
383 pool_register (r->pool, free, r->sfm_vars);
384 r->proto = caseproto_ref_pool (dict_get_proto (*dict), r->pool);
386 pool_free (r->pool, var_by_value_idx);
387 return casereader_create_sequential
389 r->case_cnt == -1 ? CASENUMBER_MAX: r->case_cnt,
390 &sys_file_casereader_class, r);
394 dict_destroy (*dict);
399 /* Closes a system file after we're done with it.
400 Returns true if an I/O error has occurred on READER, false
403 close_reader (struct sfm_reader *r)
412 if (fn_close (fh_get_file_name (r->fh), r->file) == EOF)
414 msg (ME, _("Error closing system file \"%s\": %s."),
415 fh_get_file_name (r->fh), strerror (errno));
425 pool_destroy (r->pool);
430 /* Destroys READER. */
432 sys_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
434 struct sfm_reader *r = r_;
438 /* Returns true if FILE is an SPSS system file,
441 sfm_detect (FILE *file)
445 if (fread (rec_type, 4, 1, file) != 1)
449 return !strcmp ("$FL2", rec_type);
452 /* Reads the global header of the system file.
453 Sets DICT's file label to the system file's label.
454 Sets *WEIGHT_IDX to 0 if the system file is unweighted,
455 or to the value index of the weight variable otherwise.
456 Sets *CLAIMED_OCT_CNT to the number of "octs" (8-byte units)
457 per case that the file claims to have (although it is not
459 Initializes INFO with header information. */
461 read_header (struct sfm_reader *r, struct dictionary *dict,
462 int *weight_idx, int *claimed_oct_cnt,
463 struct sfm_read_info *info)
466 char eye_catcher[61];
467 uint8_t raw_layout_code[4];
469 char creation_date[10];
470 char creation_time[9];
472 struct substring file_label_ss;
473 struct substring product;
475 read_string (r, rec_type, sizeof rec_type);
476 read_string (r, eye_catcher, sizeof eye_catcher);
478 if (strcmp ("$FL2", rec_type) != 0)
479 sys_error (r, _("This is not an SPSS system file."));
481 /* Identify integer format. */
482 read_bytes (r, raw_layout_code, sizeof raw_layout_code);
483 if ((!integer_identify (2, raw_layout_code, sizeof raw_layout_code,
485 && !integer_identify (3, raw_layout_code, sizeof raw_layout_code,
487 || (r->integer_format != INTEGER_MSB_FIRST
488 && r->integer_format != INTEGER_LSB_FIRST))
489 sys_error (r, _("This is not an SPSS system file."));
491 *claimed_oct_cnt = read_int (r);
492 if (*claimed_oct_cnt < 0 || *claimed_oct_cnt > INT_MAX / 16)
493 *claimed_oct_cnt = -1;
495 r->compressed = read_int (r) != 0;
497 *weight_idx = read_int (r);
499 r->case_cnt = read_int (r);
500 if ( r->case_cnt > INT_MAX / 2)
504 /* Identify floating-point format and obtain compression bias. */
505 read_bytes (r, raw_bias, sizeof raw_bias);
506 if (float_identify (100.0, raw_bias, sizeof raw_bias, &r->float_format) == 0)
508 uint8_t zero_bias[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
510 if (memcmp (raw_bias, zero_bias, 8))
511 sys_warn (r, _("Compression bias is not the usual "
512 "value of 100, or system file uses unrecognized "
513 "floating-point format."));
516 /* Some software is known to write all-zeros to this
517 field. Such software also writes floating-point
518 numbers in the format that we expect by default
519 (it seems that all software most likely does, in
520 reality), so don't warn in this case. */
523 if (r->integer_format == INTEGER_MSB_FIRST)
524 r->float_format = FLOAT_IEEE_DOUBLE_BE;
526 r->float_format = FLOAT_IEEE_DOUBLE_LE;
528 float_convert (r->float_format, raw_bias, FLOAT_NATIVE_DOUBLE, &r->bias);
530 read_string (r, creation_date, sizeof creation_date);
531 read_string (r, creation_time, sizeof creation_time);
532 read_string (r, file_label, sizeof file_label);
535 file_label_ss = ss_cstr (file_label);
536 ss_trim (&file_label_ss, ss_cstr (" "));
537 if (!ss_is_empty (file_label_ss))
539 ss_data (file_label_ss)[ss_length (file_label_ss)] = '\0';
540 dict_set_label (dict, ss_data (file_label_ss));
543 strcpy (info->creation_date, creation_date);
544 strcpy (info->creation_time, creation_time);
545 info->integer_format = r->integer_format;
546 info->float_format = r->float_format;
547 info->compressed = r->compressed;
548 info->case_cnt = r->case_cnt;
550 product = ss_cstr (eye_catcher);
551 ss_match_string (&product, ss_cstr ("@(#) SPSS DATA FILE"));
552 ss_trim (&product, ss_cstr (" "));
553 str_copy_buf_trunc (info->product, sizeof info->product,
554 ss_data (product), ss_length (product));
557 /* Reads a variable (type 2) record from R and adds the
558 corresponding variable to DICT.
559 Also skips past additional variable records for long string
562 read_variable_record (struct sfm_reader *r, struct dictionary *dict,
563 int *format_warning_cnt)
566 int has_variable_label;
567 int missing_value_code;
572 struct variable *var;
575 width = read_int (r);
576 has_variable_label = read_int (r);
577 missing_value_code = read_int (r);
578 print_format = read_int (r);
579 write_format = read_int (r);
580 read_string (r, name, sizeof name);
581 name[strcspn (name, " ")] = '\0';
583 /* Check variable name. */
584 if (name[0] == '$' || name[0] == '#')
585 sys_error (r, "Variable name begins with invalid character `%c'.",
587 if (!var_is_plausible_name (name, false))
588 sys_error (r, _("Invalid variable name `%s'."), name);
590 /* Create variable. */
591 if (width < 0 || width > 255)
592 sys_error (r, _("Bad width %d for variable %s."), width, name);
593 var = dict_create_var (dict, name, width);
596 _("Duplicate variable name `%s' within system file."),
599 /* Set the short name the same as the long name. */
600 var_set_short_name (var, 0, var_get_name (var));
602 /* Get variable label, if any. */
603 if (has_variable_label != 0 && has_variable_label != 1)
604 sys_error (r, _("Variable label indicator field is not 0 or 1."));
605 if (has_variable_label == 1)
611 if (len >= sizeof label)
612 sys_error (r, _("Variable %s has label of invalid length %zu."),
614 read_string (r, label, len + 1);
615 var_set_label (var, label);
617 skip_bytes (r, ROUND_UP (len, 4) - len);
620 /* Set missing values. */
621 if (missing_value_code != 0)
623 struct missing_values mv;
626 mv_init_pool (r->pool, &mv, var_get_width (var));
627 if (var_is_numeric (var))
629 if (missing_value_code < -3 || missing_value_code > 3
630 || missing_value_code == -1)
631 sys_error (r, _("Numeric missing value indicator field is not "
632 "-3, -2, 0, 1, 2, or 3."));
633 if (missing_value_code < 0)
635 double low = read_float (r);
636 double high = read_float (r);
637 mv_add_range (&mv, low, high);
638 missing_value_code = -missing_value_code - 2;
640 for (i = 0; i < missing_value_code; i++)
641 mv_add_num (&mv, read_float (r));
645 int mv_width = MAX (width, 8);
648 if (missing_value_code < 1 || missing_value_code > 3)
649 sys_error (r, _("String missing value indicator field is not "
652 value_init (&value, mv_width);
653 value_set_missing (&value, mv_width);
654 for (i = 0; i < missing_value_code; i++)
656 uint8_t *s = value_str_rw (&value, mv_width);
657 read_bytes (r, s, 8);
660 value_destroy (&value, mv_width);
662 var_set_missing_values (var, &mv);
666 parse_format_spec (r, print_format, PRINT_FORMAT, var, format_warning_cnt);
667 parse_format_spec (r, write_format, WRITE_FORMAT, var, format_warning_cnt);
669 /* Account for values.
670 Skip long string continuation records, if any. */
671 nv = width == 0 ? 1 : DIV_RND_UP (width, 8);
677 for (i = 1; i < nv; i++)
679 /* Check for record type 2 and width -1. */
680 if (read_int (r) != 2 || read_int (r) != -1)
681 sys_error (r, _("Missing string continuation record."));
683 /* Skip and ignore remaining continuation data. */
684 has_variable_label = read_int (r);
685 missing_value_code = read_int (r);
686 print_format = read_int (r);
687 write_format = read_int (r);
688 read_string (r, name, sizeof name);
690 /* Variable label fields on continuation records have
691 been spotted in system files created by "SPSS Power
692 Macintosh Release 6.1". */
693 if (has_variable_label)
694 skip_bytes (r, ROUND_UP (read_int (r), 4));
699 /* Translates the format spec from sysfile format to internal
702 parse_format_spec (struct sfm_reader *r, unsigned int s,
703 enum which_format which, struct variable *v,
704 int *format_warning_cnt)
706 const int max_format_warnings = 8;
708 uint8_t raw_type = s >> 16;
714 if (!fmt_from_io (raw_type, &f.type))
715 sys_error (r, _("Unknown variable format %"PRIu8"."), raw_type);
720 ok = fmt_check_output (&f) && fmt_check_width_compat (&f, var_get_width (v));
725 if (which == PRINT_FORMAT)
726 var_set_print_format (v, &f);
728 var_set_write_format (v, &f);
730 else if (*++format_warning_cnt <= max_format_warnings)
732 char fmt_string[FMT_STRING_LEN_MAX + 1];
733 sys_warn (r, _("%s variable %s has invalid %s format %s."),
734 var_is_numeric (v) ? _("Numeric") : _("String"),
736 which == PRINT_FORMAT ? _("print") : _("write"),
737 fmt_to_string (&f, fmt_string));
739 if (*format_warning_cnt == max_format_warnings)
740 sys_warn (r, _("Suppressing further invalid format warnings."));
744 /* Sets the weighting variable in DICT to the variable
745 corresponding to the given 1-based VALUE_IDX, if VALUE_IDX is
748 setup_weight (struct sfm_reader *r, int weight_idx,
749 struct variable **var_by_value_idx, struct dictionary *dict)
753 struct variable *weight_var
754 = lookup_var_by_value_idx (r, var_by_value_idx, weight_idx);
755 if (var_is_numeric (weight_var))
756 dict_set_weight (dict, weight_var);
758 sys_error (r, _("Weighting variable must be numeric."));
762 /* Reads a document record, type 6, from system file R, and sets up
763 the documents and n_documents fields in the associated
766 read_documents (struct sfm_reader *r, struct dictionary *dict)
771 if (dict_get_documents (dict) != NULL)
772 sys_error (r, _("Multiple type 6 (document) records."));
774 line_cnt = read_int (r);
776 sys_error (r, _("Number of document lines (%d) "
777 "must be greater than 0."), line_cnt);
779 documents = pool_nmalloc (r->pool, line_cnt + 1, DOC_LINE_LENGTH);
780 read_string (r, documents, DOC_LINE_LENGTH * line_cnt + 1);
781 if (strlen (documents) == DOC_LINE_LENGTH * line_cnt)
782 dict_set_documents (dict, documents);
784 sys_error (r, _("Document line contains null byte."));
785 pool_free (r->pool, documents);
788 /* Read a type 7 extension record. */
790 read_extension_record (struct sfm_reader *r, struct dictionary *dict,
791 struct sfm_read_info *info)
793 int subtype = read_int (r);
794 size_t size = read_int (r);
795 size_t count = read_int (r);
796 size_t bytes = size * count;
798 /* Check that SIZE * COUNT + 1 doesn't overflow. Adding 1
799 allows an extra byte for a null terminator, used by some
800 extension processing routines. */
801 if (size != 0 && size_overflow_p (xsum (1, xtimes (count, size))))
802 sys_error (r, "Record type 7 subtype %d too large.", subtype);
807 read_machine_integer_info (r, size, count, info, dict);
811 read_machine_float_info (r, size, count);
815 /* Variable sets information. We don't use these yet.
816 They only apply to GUIs; see VARSETS on the APPLY
817 DICTIONARY command in SPSS documentation. */
821 /* DATE variable information. We don't use it yet, but we
826 /* Used by the MRSETS command. */
830 /* Used by the SPSS Data Entry software. */
834 read_display_parameters (r, size, count, dict);
838 read_long_var_name_map (r, size, count, dict);
842 read_long_string_map (r, size, count, dict);
846 /* New in SPSS v14? Unknown purpose. */
850 read_data_file_attributes (r, size, count, dict);
854 read_variable_attributes (r, size, count, dict);
858 /* New in SPSS 16. Contains a single string that describes
859 the character encoding, e.g. "windows-1252". */
861 char *encoding = pool_calloc (r->pool, size, count + 1);
862 read_string (r, encoding, count + 1);
863 dict_set_encoding (dict, encoding);
868 /* New in SPSS 16. Encodes value labels for long string
870 read_long_string_value_labels (r, size, count, dict);
874 sys_warn (r, _("Unrecognized record type 7, subtype %d. Please send a copy of this file, and the syntax which created it to %s"),
875 subtype, PACKAGE_BUGREPORT);
879 skip_bytes (r, bytes);
882 /* Read record type 7, subtype 3. */
884 read_machine_integer_info (struct sfm_reader *r, size_t size, size_t count,
885 struct sfm_read_info *info,
886 struct dictionary *dict)
888 int version_major = read_int (r);
889 int version_minor = read_int (r);
890 int version_revision = read_int (r);
891 int machine_code UNUSED = read_int (r);
892 int float_representation = read_int (r);
893 int compression_code UNUSED = read_int (r);
894 int integer_representation = read_int (r);
895 int character_code = read_int (r);
897 int expected_float_format;
898 int expected_integer_format;
900 if (size != 4 || count != 8)
901 sys_error (r, _("Bad size (%zu) or count (%zu) field on record type 7, "
905 /* Save version info. */
906 info->version_major = version_major;
907 info->version_minor = version_minor;
908 info->version_revision = version_revision;
910 /* Check floating point format. */
911 if (r->float_format == FLOAT_IEEE_DOUBLE_BE
912 || r->float_format == FLOAT_IEEE_DOUBLE_LE)
913 expected_float_format = 1;
914 else if (r->float_format == FLOAT_Z_LONG)
915 expected_float_format = 2;
916 else if (r->float_format == FLOAT_VAX_G || r->float_format == FLOAT_VAX_D)
917 expected_float_format = 3;
920 if (float_representation != expected_float_format)
921 sys_error (r, _("Floating-point representation indicated by "
922 "system file (%d) differs from expected (%d)."),
923 r->float_format, expected_float_format);
925 /* Check integer format. */
926 if (r->integer_format == INTEGER_MSB_FIRST)
927 expected_integer_format = 1;
928 else if (r->integer_format == INTEGER_LSB_FIRST)
929 expected_integer_format = 2;
932 if (integer_representation != expected_integer_format)
934 static const char *const endian[] = {N_("Little Endian"), N_("Big Endian")};
935 sys_warn (r, _("Integer format indicated by system file (%s) "
936 "differs from expected (%s)."),
937 gettext (endian[integer_representation == 1]),
938 gettext (endian[expected_integer_format == 1]));
943 Record 7 (20) provides a much more reliable way of
944 setting the encoding.
945 The character_code is used as a fallback only.
947 if ( NULL == dict_get_encoding (dict))
949 switch (character_code)
952 dict_set_encoding (dict, "EBCDIC-US");
956 /* These ostensibly mean "7-bit ASCII" and "8-bit ASCII"[sic]
957 respectively. However, there are known to be many files
958 in the wild with character code 2, yet have data which are
960 Therefore we ignore these values.
964 dict_set_encoding (dict, "MS_KANJI");
967 dict_set_encoding (dict, "UTF-7");
970 dict_set_encoding (dict, "UTF-8");
975 snprintf (enc, 100, "CP%d", character_code);
976 dict_set_encoding (dict, enc);
983 /* Read record type 7, subtype 4. */
985 read_machine_float_info (struct sfm_reader *r, size_t size, size_t count)
987 double sysmis = read_float (r);
988 double highest = read_float (r);
989 double lowest = read_float (r);
991 if (size != 8 || count != 3)
992 sys_error (r, _("Bad size (%zu) or count (%zu) on extension 4."),
995 if (sysmis != SYSMIS)
996 sys_warn (r, _("File specifies unexpected value %g as %s."),
999 if (highest != HIGHEST)
1000 sys_warn (r, _("File specifies unexpected value %g as %s."),
1001 highest, "HIGHEST");
1003 if (lowest != LOWEST)
1004 sys_warn (r, _("File specifies unexpected value %g as %s."),
1008 /* Read record type 7, subtype 11, which specifies how variables
1009 should be displayed in GUI environments. */
1011 read_display_parameters (struct sfm_reader *r, size_t size, size_t count,
1012 struct dictionary *dict)
1015 bool includes_width;
1016 bool warned = false;
1021 sys_warn (r, _("Bad size %zu on extension 11."), size);
1022 skip_bytes (r, size * count);
1026 n_vars = dict_get_var_cnt (dict);
1027 if (count == 3 * n_vars)
1028 includes_width = true;
1029 else if (count == 2 * n_vars)
1030 includes_width = false;
1033 sys_warn (r, _("Extension 11 has bad count %zu (for %zu variables)."),
1035 skip_bytes (r, size * count);
1039 for (i = 0; i < n_vars; ++i)
1041 struct variable *v = dict_get_var (dict, i);
1042 int measure = read_int (r);
1043 int width = includes_width ? read_int (r) : 0;
1044 int align = read_int (r);
1046 /* SPSS 14 sometimes seems to set string variables' measure
1048 if (0 == measure && var_is_alpha (v))
1051 if (measure < 1 || measure > 3 || align < 0 || align > 2)
1054 sys_warn (r, _("Invalid variable display parameters "
1055 "for variable %zu (%s). "
1056 "Default parameters substituted."),
1057 i, var_get_name (v));
1062 var_set_measure (v, (measure == 1 ? MEASURE_NOMINAL
1063 : measure == 2 ? MEASURE_ORDINAL
1065 var_set_alignment (v, (align == 0 ? ALIGN_LEFT
1066 : align == 1 ? ALIGN_RIGHT
1069 /* Older versions (SPSS 9.0) sometimes set the display
1070 width to zero. This causes confusion in the GUI, so
1071 only set the width if it is nonzero. */
1073 var_set_display_width (v, width);
1077 /* Reads record type 7, subtype 13, which gives the long name
1078 that corresponds to each short name. Modifies variable names
1079 in DICT accordingly. */
1081 read_long_var_name_map (struct sfm_reader *r, size_t size, size_t count,
1082 struct dictionary *dict)
1084 struct text_record *text;
1085 struct variable *var;
1088 text = open_text_record (r, size * count);
1089 while (read_variable_to_value_pair (r, dict, text, &var, &long_name))
1092 size_t short_name_cnt;
1095 /* Validate long name. */
1096 if (!var_is_valid_name (long_name, false))
1098 sys_warn (r, _("Long variable mapping from %s to invalid "
1099 "variable name `%s'."),
1100 var_get_name (var), long_name);
1104 /* Identify any duplicates. */
1105 if (strcasecmp (var_get_short_name (var, 0), long_name)
1106 && dict_lookup_var (dict, long_name) != NULL)
1108 sys_warn (r, _("Duplicate long variable name `%s' "
1109 "within system file."), long_name);
1113 /* Renaming a variable may clear its short names, but we
1114 want to retain them, so we save them and re-set them
1116 short_name_cnt = var_get_short_name_cnt (var);
1117 short_names = xnmalloc (short_name_cnt, sizeof *short_names);
1118 for (i = 0; i < short_name_cnt; i++)
1120 const char *s = var_get_short_name (var, i);
1121 short_names[i] = s != NULL ? xstrdup (s) : NULL;
1124 /* Set long name. */
1125 dict_rename_var (dict, var, long_name);
1127 /* Restore short names. */
1128 for (i = 0; i < short_name_cnt; i++)
1130 var_set_short_name (var, i, short_names[i]);
1131 free (short_names[i]);
1135 close_text_record (r, text);
1136 r->has_long_var_names = true;
1139 /* Reads record type 7, subtype 14, which gives the real length
1140 of each very long string. Rearranges DICT accordingly. */
1142 read_long_string_map (struct sfm_reader *r, size_t size, size_t count,
1143 struct dictionary *dict)
1145 struct text_record *text;
1146 struct variable *var;
1149 text = open_text_record (r, size * count);
1150 while (read_variable_to_value_pair (r, dict, text, &var, &length_s))
1152 size_t idx = var_get_dict_index (var);
1158 length = strtol (length_s, NULL, 10);
1159 if (length < 1 || length > MAX_STRING)
1161 sys_warn (r, _("%s listed as string of invalid length %s "
1162 "in very length string record."),
1163 var_get_name (var), length_s);
1167 /* Check segments. */
1168 segment_cnt = sfm_width_to_segments (length);
1169 if (segment_cnt == 1)
1171 sys_warn (r, _("%s listed in very long string record with width %s, "
1172 "which requires only one segment."),
1173 var_get_name (var), length_s);
1176 if (idx + segment_cnt > dict_get_var_cnt (dict))
1177 sys_error (r, _("Very long string %s overflows dictionary."),
1178 var_get_name (var));
1180 /* Get the short names from the segments and check their
1182 for (i = 0; i < segment_cnt; i++)
1184 struct variable *seg = dict_get_var (dict, idx + i);
1185 int alloc_width = sfm_segment_alloc_width (length, i);
1186 int width = var_get_width (seg);
1189 var_set_short_name (var, i, var_get_short_name (seg, 0));
1190 if (ROUND_UP (width, 8) != ROUND_UP (alloc_width, 8))
1191 sys_error (r, _("Very long string with width %ld has segment %d "
1192 "of width %d (expected %d)"),
1193 length, i, width, alloc_width);
1195 dict_delete_consecutive_vars (dict, idx + 1, segment_cnt - 1);
1196 var_set_width (var, length);
1198 close_text_record (r, text);
1199 dict_compact_values (dict);
1202 /* Reads value labels from sysfile H and inserts them into the
1203 associated dictionary. */
1205 read_value_labels (struct sfm_reader *r,
1206 struct dictionary *dict, struct variable **var_by_value_idx)
1208 struct pool *subpool;
1212 uint8_t raw_value[8]; /* Value as uninterpreted bytes. */
1213 union value value; /* Value. */
1214 char *label; /* Null-terminated label string. */
1217 struct label *labels = NULL;
1218 int label_cnt; /* Number of labels. */
1220 struct variable **var = NULL; /* Associated variables. */
1221 int var_cnt; /* Number of associated variables. */
1222 int max_width; /* Maximum width of string variables. */
1226 subpool = pool_create_subpool (r->pool);
1228 /* Read the type 3 record and record its contents. We can't do
1229 much with the data yet because we don't know whether it is
1230 of numeric or string type. */
1232 /* Read number of labels. */
1233 label_cnt = read_int (r);
1235 if (size_overflow_p (xtimes (label_cnt, sizeof *labels)))
1237 sys_warn (r, _("Invalid number of labels: %d. Ignoring labels."),
1242 /* Read each value/label tuple into labels[]. */
1243 labels = pool_nalloc (subpool, label_cnt, sizeof *labels);
1244 for (i = 0; i < label_cnt; i++)
1246 struct label *label = labels + i;
1247 unsigned char label_len;
1251 read_bytes (r, label->raw_value, sizeof label->raw_value);
1253 /* Read label length. */
1254 read_bytes (r, &label_len, sizeof label_len);
1255 padded_len = ROUND_UP (label_len + 1, 8);
1257 /* Read label, padding. */
1258 label->label = pool_alloc (subpool, padded_len + 1);
1259 read_bytes (r, label->label, padded_len - 1);
1260 label->label[label_len] = 0;
1263 /* Now, read the type 4 record that has the list of variables
1264 to which the value labels are to be applied. */
1266 /* Read record type of type 4 record. */
1267 if (read_int (r) != 4)
1268 sys_error (r, _("Variable index record (type 4) does not immediately "
1269 "follow value label record (type 3) as it should."));
1271 /* Read number of variables associated with value label from type 4
1273 var_cnt = read_int (r);
1274 if (var_cnt < 1 || var_cnt > dict_get_var_cnt (dict))
1275 sys_error (r, _("Number of variables associated with a value label (%d) "
1276 "is not between 1 and the number of variables (%zu)."),
1277 var_cnt, dict_get_var_cnt (dict));
1279 /* Read the list of variables. */
1280 var = pool_nalloc (subpool, var_cnt, sizeof *var);
1282 for (i = 0; i < var_cnt; i++)
1284 var[i] = lookup_var_by_value_idx (r, var_by_value_idx, read_int (r));
1285 if (var_get_width (var[i]) > 8)
1286 sys_error (r, _("Value labels may not be added to long string "
1287 "variables (e.g. %s) using records types 3 and 4."),
1288 var_get_name (var[i]));
1289 max_width = MAX (max_width, var_get_width (var[i]));
1292 /* Type check the variables. */
1293 for (i = 1; i < var_cnt; i++)
1294 if (var_get_type (var[i]) != var_get_type (var[0]))
1295 sys_error (r, _("Variables associated with value label are not all of "
1296 "identical type. Variable %s is %s, but variable "
1298 var_get_name (var[0]),
1299 var_is_numeric (var[0]) ? _("numeric") : _("string"),
1300 var_get_name (var[i]),
1301 var_is_numeric (var[i]) ? _("numeric") : _("string"));
1303 /* Fill in labels[].value, now that we know the desired type. */
1304 for (i = 0; i < label_cnt; i++)
1306 struct label *label = labels + i;
1308 value_init_pool (subpool, &label->value, max_width);
1309 if (var_is_alpha (var[0]))
1310 u8_buf_copy_rpad (value_str_rw (&label->value, max_width), max_width,
1311 label->raw_value, sizeof label->raw_value, ' ');
1313 label->value.f = float_get_double (r->float_format, label->raw_value);
1316 /* Assign the `value_label's to each variable. */
1317 for (i = 0; i < var_cnt; i++)
1319 struct variable *v = var[i];
1322 /* Add each label to the variable. */
1323 for (j = 0; j < label_cnt; j++)
1325 struct label *label = &labels[j];
1326 if (!var_add_value_label (v, &label->value, label->label))
1328 if (var_is_numeric (var[0]))
1329 sys_warn (r, _("Duplicate value label for %g on %s."),
1330 label->value.f, var_get_name (v));
1332 sys_warn (r, _("Duplicate value label for \"%.*s\" on %s."),
1333 max_width, value_str (&label->value, max_width),
1339 pool_destroy (subpool);
1342 /* Reads a set of custom attributes from TEXT into ATTRS.
1343 ATTRS may be a null pointer, in which case the attributes are
1344 read but discarded. */
1346 read_attributes (struct sfm_reader *r, struct text_record *text,
1347 struct attrset *attrs)
1351 struct attribute *attr;
1355 /* Parse the key. */
1356 key = text_get_token (text, ss_cstr ("("));
1360 attr = attribute_create (key);
1361 for (index = 1; ; index++)
1363 /* Parse the value. */
1367 value = text_get_token (text, ss_cstr ("\n"));
1370 text_warn (r, text, _("Error parsing attribute value %s[%d]"),
1375 length = strlen (value);
1376 if (length >= 2 && value[0] == '\'' && value[length - 1] == '\'')
1378 value[length - 1] = '\0';
1379 attribute_add_value (attr, value + 1);
1384 _("Attribute value %s[%d] is not quoted: %s"),
1386 attribute_add_value (attr, value);
1389 /* Was this the last value for this attribute? */
1390 if (text_match (text, ')'))
1394 attrset_add (attrs, attr);
1396 attribute_destroy (attr);
1398 while (!text_match (text, '/'));
1401 /* Reads record type 7, subtype 17, which lists custom
1402 attributes on the data file. */
1404 read_data_file_attributes (struct sfm_reader *r,
1405 size_t size, size_t count,
1406 struct dictionary *dict)
1408 struct text_record *text = open_text_record (r, size * count);
1409 read_attributes (r, text, dict_get_attributes (dict));
1410 close_text_record (r, text);
1414 skip_long_string_value_labels (struct sfm_reader *r, size_t n_labels)
1418 for (i = 0; i < n_labels; i++)
1420 size_t value_length, label_length;
1422 value_length = read_int (r);
1423 skip_bytes (r, value_length);
1424 label_length = read_int (r);
1425 skip_bytes (r, label_length);
1430 read_long_string_value_labels (struct sfm_reader *r,
1431 size_t size, size_t count,
1432 struct dictionary *d)
1434 const off_t start = ftello (r->file);
1435 while (ftello (r->file) - start < size * count)
1437 char var_name[VAR_NAME_LEN + 1];
1445 var_name_len = read_int (r);
1446 if (var_name_len > VAR_NAME_LEN)
1447 sys_error (r, _("Variable name length in long string value label "
1448 "record (%d) exceeds %d-byte limit."),
1449 var_name_len, VAR_NAME_LEN);
1450 read_string (r, var_name, var_name_len + 1);
1451 width = read_int (r);
1452 n_labels = read_int (r);
1454 v = dict_lookup_var (d, var_name);
1457 sys_warn (r, _("Ignoring long string value record for "
1458 "unknown variable %s."), var_name);
1459 skip_long_string_value_labels (r, n_labels);
1462 if (var_is_numeric (v))
1464 sys_warn (r, _("Ignoring long string value record for "
1465 "numeric variable %s."), var_name);
1466 skip_long_string_value_labels (r, n_labels);
1469 if (width != var_get_width (v))
1471 sys_warn (r, _("Ignoring long string value record for variable %s "
1472 "because the record's width (%d) does not match the "
1473 "variable's width (%d)"),
1474 var_name, width, var_get_width (v));
1475 skip_long_string_value_labels (r, n_labels);
1480 value_init_pool (r->pool, &value, width);
1481 for (i = 0; i < n_labels; i++)
1483 size_t value_length, label_length;
1488 value_length = read_int (r);
1489 if (value_length == width)
1490 read_bytes (r, value_str_rw (&value, width), width);
1493 sys_warn (r, _("Ignoring long string value %zu for variable %s, "
1494 "with width %d, that has bad value width %zu."),
1495 i, var_get_name (v), width, value_length);
1496 skip_bytes (r, value_length);
1501 label_length = read_int (r);
1502 read_string (r, label, MIN (sizeof label, label_length + 1));
1503 if (label_length >= sizeof label)
1505 /* Skip and silently ignore label text after the
1506 first 255 bytes. The maximum documented length
1507 of a label is 120 bytes so this is more than
1509 skip_bytes (r, sizeof label - (label_length + 1));
1512 if (!skip && !var_add_value_label (v, &value, label))
1513 sys_warn (r, _("Duplicate value label for \"%.*s\" on %s."),
1514 width, value_str (&value, width), var_get_name (v));
1520 /* Reads record type 7, subtype 18, which lists custom
1521 attributes on individual variables. */
1523 read_variable_attributes (struct sfm_reader *r,
1524 size_t size, size_t count,
1525 struct dictionary *dict)
1527 struct text_record *text = open_text_record (r, size * count);
1530 struct variable *var;
1531 if (!text_read_short_name (r, dict, text, ss_cstr (":"), &var))
1533 read_attributes (r, text, var != NULL ? var_get_attributes (var) : NULL);
1535 close_text_record (r, text);
1541 static void partial_record (struct sfm_reader *r)
1544 static void read_error (struct casereader *, const struct sfm_reader *);
1546 static bool read_case_number (struct sfm_reader *, double *);
1547 static bool read_case_string (struct sfm_reader *, uint8_t *, size_t);
1548 static int read_opcode (struct sfm_reader *);
1549 static bool read_compressed_number (struct sfm_reader *, double *);
1550 static bool read_compressed_string (struct sfm_reader *, uint8_t *);
1551 static bool read_whole_strings (struct sfm_reader *, uint8_t *, size_t);
1552 static bool skip_whole_strings (struct sfm_reader *, size_t);
1554 /* Reads and returns one case from READER's file. Returns a null
1555 pointer if not successful. */
1556 static struct ccase *
1557 sys_file_casereader_read (struct casereader *reader, void *r_)
1559 struct sfm_reader *r = r_;
1560 struct ccase *volatile c;
1566 c = case_create (r->proto);
1567 if (setjmp (r->bail_out))
1569 casereader_force_error (reader);
1574 for (i = 0; i < r->sfm_var_cnt; i++)
1576 struct sfm_var *sv = &r->sfm_vars[i];
1577 union value *v = case_data_rw_idx (c, sv->case_index);
1579 if (sv->var_width == 0)
1581 if (!read_case_number (r, &v->f))
1586 uint8_t *s = value_str_rw (v, sv->var_width);
1587 if (!read_case_string (r, s + sv->offset, sv->segment_width))
1589 if (!skip_whole_strings (r, ROUND_DOWN (sv->padding, 8)))
1599 if (r->case_cnt != -1)
1600 read_error (reader, r);
1604 /* Issues an error that R ends in a partial record. */
1606 partial_record (struct sfm_reader *r)
1608 sys_error (r, _("File ends in partial case."));
1611 /* Issues an error that an unspecified error occurred SFM, and
1614 read_error (struct casereader *r, const struct sfm_reader *sfm)
1616 msg (ME, _("Error reading case from file %s."), fh_get_name (sfm->fh));
1617 casereader_force_error (r);
1620 /* Reads a number from R and stores its value in *D.
1621 If R is compressed, reads a compressed number;
1622 otherwise, reads a number in the regular way.
1623 Returns true if successful, false if end of file is
1624 reached immediately. */
1626 read_case_number (struct sfm_reader *r, double *d)
1631 if (!try_read_bytes (r, number, sizeof number))
1633 float_convert (r->float_format, number, FLOAT_NATIVE_DOUBLE, d);
1637 return read_compressed_number (r, d);
1640 /* Reads LENGTH string bytes from R into S.
1641 Always reads a multiple of 8 bytes; if LENGTH is not a
1642 multiple of 8, then extra bytes are read and discarded without
1644 Reads compressed strings if S is compressed.
1645 Returns true if successful, false if end of file is
1646 reached immediately. */
1648 read_case_string (struct sfm_reader *r, uint8_t *s, size_t length)
1650 size_t whole = ROUND_DOWN (length, 8);
1651 size_t partial = length % 8;
1655 if (!read_whole_strings (r, s, whole))
1662 if (!read_whole_strings (r, bounce, sizeof bounce))
1668 memcpy (s + whole, bounce, partial);
1674 /* Reads and returns the next compression opcode from R. */
1676 read_opcode (struct sfm_reader *r)
1678 assert (r->compressed);
1682 if (r->opcode_idx >= sizeof r->opcodes)
1684 if (!try_read_bytes (r, r->opcodes, sizeof r->opcodes))
1688 opcode = r->opcodes[r->opcode_idx++];
1695 /* Reads a compressed number from R and stores its value in D.
1696 Returns true if successful, false if end of file is
1697 reached immediately. */
1699 read_compressed_number (struct sfm_reader *r, double *d)
1701 int opcode = read_opcode (r);
1709 *d = read_float (r);
1713 sys_error (r, _("Compressed data is corrupt."));
1720 *d = opcode - r->bias;
1727 /* Reads a compressed 8-byte string segment from R and stores it
1729 Returns true if successful, false if end of file is
1730 reached immediately. */
1732 read_compressed_string (struct sfm_reader *r, uint8_t *dst)
1734 switch (read_opcode (r))
1741 read_bytes (r, dst, 8);
1745 memset (dst, ' ', 8);
1749 sys_error (r, _("Compressed data is corrupt."));
1755 /* Reads LENGTH string bytes from R into S.
1756 LENGTH must be a multiple of 8.
1757 Reads compressed strings if S is compressed.
1758 Returns true if successful, false if end of file is
1759 reached immediately. */
1761 read_whole_strings (struct sfm_reader *r, uint8_t *s, size_t length)
1763 assert (length % 8 == 0);
1765 return try_read_bytes (r, s, length);
1769 for (ofs = 0; ofs < length; ofs += 8)
1770 if (!read_compressed_string (r, s + ofs))
1780 /* Skips LENGTH string bytes from R.
1781 LENGTH must be a multiple of 8.
1782 (LENGTH is also limited to 1024, but that's only because the
1783 current caller never needs more than that many bytes.)
1784 Returns true if successful, false if end of file is
1785 reached immediately. */
1787 skip_whole_strings (struct sfm_reader *r, size_t length)
1789 uint8_t buffer[1024];
1790 assert (length < sizeof buffer);
1791 return read_whole_strings (r, buffer, length);
1794 /* Creates and returns a table that can be used for translating a value
1795 index into a case to a "struct variable *" for DICT. Multiple
1796 system file fields reference variables this way.
1798 This table must be created before processing the very long
1799 string extension record, because that record causes some
1800 values to be deleted from the case and the dictionary to be
1802 static struct variable **
1803 make_var_by_value_idx (struct sfm_reader *r, struct dictionary *dict)
1805 struct variable **var_by_value_idx;
1809 var_by_value_idx = pool_nmalloc (r->pool,
1810 r->oct_cnt, sizeof *var_by_value_idx);
1811 for (i = 0; i < dict_get_var_cnt (dict); i++)
1813 struct variable *v = dict_get_var (dict, i);
1814 int nv = var_is_numeric (v) ? 1 : DIV_RND_UP (var_get_width (v), 8);
1817 var_by_value_idx[value_idx++] = v;
1818 for (j = 1; j < nv; j++)
1819 var_by_value_idx[value_idx++] = NULL;
1821 assert (value_idx == r->oct_cnt);
1823 return var_by_value_idx;
1826 /* Returns the "struct variable" corresponding to the given
1827 1-basd VALUE_IDX in VAR_BY_VALUE_IDX. Verifies that the index
1829 static struct variable *
1830 lookup_var_by_value_idx (struct sfm_reader *r,
1831 struct variable **var_by_value_idx, int value_idx)
1833 struct variable *var;
1835 if (value_idx < 1 || value_idx > r->oct_cnt)
1836 sys_error (r, _("Variable index %d not in valid range 1...%d."),
1837 value_idx, r->oct_cnt);
1839 var = var_by_value_idx[value_idx - 1];
1841 sys_error (r, _("Variable index %d refers to long string "
1848 /* Returns the variable in D with the given SHORT_NAME,
1849 or a null pointer if there is none. */
1850 static struct variable *
1851 lookup_var_by_short_name (struct dictionary *d, const char *short_name)
1853 struct variable *var;
1857 /* First try looking up by full name. This often succeeds. */
1858 var = dict_lookup_var (d, short_name);
1859 if (var != NULL && !strcasecmp (var_get_short_name (var, 0), short_name))
1862 /* Iterate through the whole dictionary as a fallback. */
1863 var_cnt = dict_get_var_cnt (d);
1864 for (i = 0; i < var_cnt; i++)
1866 var = dict_get_var (d, i);
1867 if (!strcasecmp (var_get_short_name (var, 0), short_name))
1874 /* Helpers for reading records that contain structured text
1877 /* Maximum number of warnings to issue for a single text
1879 #define MAX_TEXT_WARNINGS 5
1884 struct substring buffer; /* Record contents. */
1885 size_t pos; /* Current position in buffer. */
1886 int n_warnings; /* Number of warnings issued or suppressed. */
1889 /* Reads SIZE bytes into a text record for R,
1890 and returns the new text record. */
1891 static struct text_record *
1892 open_text_record (struct sfm_reader *r, size_t size)
1894 struct text_record *text = pool_alloc (r->pool, sizeof *text);
1895 char *buffer = pool_malloc (r->pool, size + 1);
1896 read_bytes (r, buffer, size);
1897 text->buffer = ss_buffer (buffer, size);
1899 text->n_warnings = 0;
1903 /* Closes TEXT, frees its storage, and issues a final warning
1904 about suppressed warnings if necesary. */
1906 close_text_record (struct sfm_reader *r, struct text_record *text)
1908 if (text->n_warnings > MAX_TEXT_WARNINGS)
1909 sys_warn (r, _("Suppressed %d additional related warnings."),
1910 text->n_warnings - MAX_TEXT_WARNINGS);
1911 pool_free (r->pool, ss_data (text->buffer));
1914 /* Reads a variable=value pair from TEXT.
1915 Looks up the variable in DICT and stores it into *VAR.
1916 Stores a null-terminated value into *VALUE. */
1918 read_variable_to_value_pair (struct sfm_reader *r, struct dictionary *dict,
1919 struct text_record *text,
1920 struct variable **var, char **value)
1924 if (!text_read_short_name (r, dict, text, ss_cstr ("="), var))
1927 *value = text_get_token (text, ss_buffer ("\t\0", 2));
1931 text->pos += ss_span (ss_substr (text->buffer, text->pos, SIZE_MAX),
1932 ss_buffer ("\t\0", 2));
1940 text_read_short_name (struct sfm_reader *r, struct dictionary *dict,
1941 struct text_record *text, struct substring delimiters,
1942 struct variable **var)
1944 char *short_name = text_get_token (text, delimiters);
1945 if (short_name == NULL)
1948 *var = lookup_var_by_short_name (dict, short_name);
1950 text_warn (r, text, _("Variable map refers to unknown variable %s."),
1955 /* Displays a warning for the current file position, limiting the
1956 number to MAX_TEXT_WARNINGS for TEXT. */
1958 text_warn (struct sfm_reader *r, struct text_record *text,
1959 const char *format, ...)
1961 if (text->n_warnings++ < MAX_TEXT_WARNINGS)
1965 va_start (args, format);
1966 sys_msg (r, MW, format, args);
1972 text_get_token (struct text_record *text, struct substring delimiters)
1974 struct substring token;
1976 if (!ss_tokenize (text->buffer, delimiters, &text->pos, &token))
1978 ss_data (token)[ss_length (token)] = '\0';
1979 return ss_data (token);
1983 text_match (struct text_record *text, char c)
1985 if (text->buffer.string[text->pos] == c)
1996 /* Displays a corruption message. */
1998 sys_msg (struct sfm_reader *r, int class, const char *format, va_list args)
2003 ds_init_empty (&text);
2004 ds_put_format (&text, "\"%s\" near offset 0x%lx: ",
2005 fh_get_file_name (r->fh), (unsigned long) ftell (r->file));
2006 ds_put_vformat (&text, format, args);
2008 m.category = msg_class_to_category (class);
2009 m.severity = msg_class_to_severity (class);
2010 m.where.file_name = NULL;
2011 m.where.line_number = 0;
2012 m.text = ds_cstr (&text);
2017 /* Displays a warning for the current file position. */
2019 sys_warn (struct sfm_reader *r, const char *format, ...)
2023 va_start (args, format);
2024 sys_msg (r, MW, format, args);
2028 /* Displays an error for the current file position,
2029 marks it as in an error state,
2030 and aborts reading it using longjmp. */
2032 sys_error (struct sfm_reader *r, const char *format, ...)
2036 va_start (args, format);
2037 sys_msg (r, ME, format, args);
2041 longjmp (r->bail_out, 1);
2044 /* Reads BYTE_CNT bytes into BUF.
2045 Returns true if exactly BYTE_CNT bytes are successfully read.
2046 Aborts if an I/O error or a partial read occurs.
2047 If EOF_IS_OK, then an immediate end-of-file causes false to be
2048 returned; otherwise, immediate end-of-file causes an abort
2051 read_bytes_internal (struct sfm_reader *r, bool eof_is_ok,
2052 void *buf, size_t byte_cnt)
2054 size_t bytes_read = fread (buf, 1, byte_cnt, r->file);
2055 if (bytes_read == byte_cnt)
2057 else if (ferror (r->file))
2058 sys_error (r, _("System error: %s."), strerror (errno));
2059 else if (!eof_is_ok || bytes_read != 0)
2060 sys_error (r, _("Unexpected end of file."));
2065 /* Reads BYTE_CNT into BUF.
2066 Aborts upon I/O error or if end-of-file is encountered. */
2068 read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
2070 read_bytes_internal (r, false, buf, byte_cnt);
2073 /* Reads BYTE_CNT bytes into BUF.
2074 Returns true if exactly BYTE_CNT bytes are successfully read.
2075 Returns false if an immediate end-of-file is encountered.
2076 Aborts if an I/O error or a partial read occurs. */
2078 try_read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
2080 return read_bytes_internal (r, true, buf, byte_cnt);
2083 /* Reads a 32-bit signed integer from R and returns its value in
2086 read_int (struct sfm_reader *r)
2089 read_bytes (r, integer, sizeof integer);
2090 return integer_get (r->integer_format, integer, sizeof integer);
2093 /* Reads a 64-bit floating-point number from R and returns its
2094 value in host format. */
2096 read_float (struct sfm_reader *r)
2099 read_bytes (r, number, sizeof number);
2100 return float_get_double (r->float_format, number);
2103 /* Reads exactly SIZE - 1 bytes into BUFFER
2104 and stores a null byte into BUFFER[SIZE - 1]. */
2106 read_string (struct sfm_reader *r, char *buffer, size_t size)
2109 read_bytes (r, buffer, size - 1);
2110 buffer[size - 1] = '\0';
2113 /* Skips BYTES bytes forward in R. */
2115 skip_bytes (struct sfm_reader *r, size_t bytes)
2120 size_t chunk = MIN (sizeof buffer, bytes);
2121 read_bytes (r, buffer, chunk);
2126 static const struct casereader_class sys_file_casereader_class =
2128 sys_file_casereader_read,
2129 sys_file_casereader_destroy,