1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-2000, 2006-2007, 2009-2013 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "data/sys-file-reader.h"
20 #include "data/sys-file-private.h"
28 #include "data/attributes.h"
29 #include "data/case.h"
30 #include "data/casereader-provider.h"
31 #include "data/casereader.h"
32 #include "data/dictionary.h"
33 #include "data/file-handle-def.h"
34 #include "data/file-name.h"
35 #include "data/format.h"
36 #include "data/identifier.h"
37 #include "data/missing-values.h"
38 #include "data/mrset.h"
39 #include "data/short-names.h"
40 #include "data/value-labels.h"
41 #include "data/value.h"
42 #include "data/variable.h"
43 #include "libpspp/array.h"
44 #include "libpspp/assertion.h"
45 #include "libpspp/compiler.h"
46 #include "libpspp/i18n.h"
47 #include "libpspp/message.h"
48 #include "libpspp/misc.h"
49 #include "libpspp/pool.h"
50 #include "libpspp/str.h"
51 #include "libpspp/stringi-set.h"
53 #include "gl/c-strtod.h"
54 #include "gl/c-ctype.h"
55 #include "gl/inttostr.h"
56 #include "gl/localcharset.h"
57 #include "gl/minmax.h"
58 #include "gl/unlocked-io.h"
59 #include "gl/xalloc.h"
63 #define _(msgid) gettext (msgid)
64 #define N_(msgid) (msgid)
68 /* subtypes 0-2 unknown */
69 EXT_INTEGER = 3, /* Machine integer info. */
70 EXT_FLOAT = 4, /* Machine floating-point info. */
71 EXT_VAR_SETS = 5, /* Variable sets. */
72 EXT_DATE = 6, /* DATE. */
73 EXT_MRSETS = 7, /* Multiple response sets. */
74 EXT_DATA_ENTRY = 8, /* SPSS Data Entry. */
75 /* subtype 9 unknown */
76 EXT_PRODUCT_INFO = 10, /* Extra product info text. */
77 EXT_DISPLAY = 11, /* Variable display parameters. */
78 /* subtype 12 unknown */
79 EXT_LONG_NAMES = 13, /* Long variable names. */
80 EXT_LONG_STRINGS = 14, /* Long strings. */
81 /* subtype 15 unknown */
82 EXT_NCASES = 16, /* Extended number of cases. */
83 EXT_FILE_ATTRS = 17, /* Data file attributes. */
84 EXT_VAR_ATTRS = 18, /* Variable attributes. */
85 EXT_MRSETS2 = 19, /* Multiple response sets (extended). */
86 EXT_ENCODING = 20, /* Character encoding. */
87 EXT_LONG_LABELS = 21, /* Value labels for long strings. */
88 EXT_LONG_MISSING = 22, /* Missing values for long strings. */
89 EXT_DATAVIEW = 24 /* "Format properties in dataview table". */
92 /* Fields from the top-level header record. */
93 struct sfm_header_record
95 char magic[5]; /* First 4 bytes of file, then null. */
96 int weight_idx; /* 0 if unweighted, otherwise a var index. */
97 int nominal_case_size; /* Number of var positions. */
99 /* These correspond to the members of struct sfm_file_info or a dictionary
100 but in the system file's encoding rather than ASCII. */
101 char creation_date[10]; /* "dd mmm yy". */
102 char creation_time[9]; /* "hh:mm:ss". */
103 char eye_catcher[61]; /* Eye-catcher string, then product name. */
104 char file_label[65]; /* File label. */
107 struct sfm_var_record
114 int missing_value_code;
117 struct variable *var;
120 struct sfm_value_label
126 struct sfm_value_label_record
129 struct sfm_value_label *labels;
136 struct sfm_document_record
143 struct sfm_extension_record
145 int subtype; /* Record subtype. */
146 off_t pos; /* Starting offset in file. */
147 size_t size; /* Size of data elements. */
148 size_t count; /* Number of data elements. */
149 void *data; /* Contents. */
152 /* System file reader. */
155 /* Resource tracking. */
156 struct pool *pool; /* All system file state. */
157 jmp_buf bail_out; /* longjmp() target for error handling. */
160 struct file_handle *fh; /* File handle. */
161 struct fh_lock *lock; /* Mutual exclusion for file handle. */
162 FILE *file; /* File stream. */
163 off_t pos; /* Position in file. */
164 bool error; /* I/O or corruption error? */
165 struct caseproto *proto; /* Format of output cases. */
168 enum integer_format integer_format; /* On-disk integer format. */
169 enum float_format float_format; /* On-disk floating point format. */
170 struct sfm_var *sfm_vars; /* Variables. */
171 size_t sfm_var_cnt; /* Number of variables. */
172 casenumber case_cnt; /* Number of cases */
173 const char *encoding; /* String encoding. */
176 bool compressed; /* File is compressed? */
177 double bias; /* Compression bias, usually 100.0. */
178 uint8_t opcodes[8]; /* Current block of opcodes. */
179 size_t opcode_idx; /* Next opcode to interpret, 8 if none left. */
180 bool corruption_warning; /* Warned about possible corruption? */
183 static const struct casereader_class sys_file_casereader_class;
185 static bool close_reader (struct sfm_reader *);
187 static struct variable *lookup_var_by_index (struct sfm_reader *, off_t,
188 const struct sfm_var_record *,
191 static void sys_msg (struct sfm_reader *r, off_t, int class,
192 const char *format, va_list args)
193 PRINTF_FORMAT (4, 0);
194 static void sys_warn (struct sfm_reader *, off_t, const char *, ...)
195 PRINTF_FORMAT (3, 4);
196 static void sys_error (struct sfm_reader *, off_t, const char *, ...)
200 static void read_bytes (struct sfm_reader *, void *, size_t);
201 static bool try_read_bytes (struct sfm_reader *, void *, size_t);
202 static int read_int (struct sfm_reader *);
203 static double read_float (struct sfm_reader *);
204 static void read_string (struct sfm_reader *, char *, size_t);
205 static void skip_bytes (struct sfm_reader *, size_t);
207 static char *fix_line_ends (const char *);
209 static int parse_int (struct sfm_reader *, const void *data, size_t ofs);
210 static double parse_float (struct sfm_reader *, const void *data, size_t ofs);
212 static void read_variable_record (struct sfm_reader *,
213 struct sfm_var_record *);
214 static void read_value_label_record (struct sfm_reader *,
215 struct sfm_value_label_record *,
217 static struct sfm_document_record *read_document_record (struct sfm_reader *);
218 static struct sfm_extension_record *read_extension_record (
219 struct sfm_reader *, int subtype);
220 static void skip_extension_record (struct sfm_reader *, int subtype);
222 static const char *choose_encoding (
224 const struct sfm_header_record *,
225 const struct sfm_extension_record *ext_integer,
226 const struct sfm_extension_record *ext_encoding);
228 static struct text_record *open_text_record (
229 struct sfm_reader *, const struct sfm_extension_record *,
230 bool recode_to_utf8);
231 static void close_text_record (struct sfm_reader *,
232 struct text_record *);
233 static bool read_variable_to_value_pair (struct sfm_reader *,
235 struct text_record *,
236 struct variable **var, char **value);
237 static void text_warn (struct sfm_reader *r, struct text_record *text,
238 const char *format, ...)
239 PRINTF_FORMAT (3, 4);
240 static char *text_get_token (struct text_record *,
241 struct substring delimiters, char *delimiter);
242 static bool text_match (struct text_record *, char c);
243 static bool text_read_variable_name (struct sfm_reader *, struct dictionary *,
244 struct text_record *,
245 struct substring delimiters,
247 static bool text_read_short_name (struct sfm_reader *, struct dictionary *,
248 struct text_record *,
249 struct substring delimiters,
251 static const char *text_parse_counted_string (struct sfm_reader *,
252 struct text_record *);
253 static size_t text_pos (const struct text_record *);
254 static const char *text_get_all (const struct text_record *);
256 static bool close_reader (struct sfm_reader *r);
258 /* Dictionary reader. */
266 static void read_header (struct sfm_reader *, struct sfm_read_info *,
267 struct sfm_header_record *);
268 static void parse_header (struct sfm_reader *,
269 const struct sfm_header_record *,
270 struct sfm_read_info *, struct dictionary *);
271 static void parse_variable_records (struct sfm_reader *, struct dictionary *,
272 struct sfm_var_record *, size_t n);
273 static void parse_format_spec (struct sfm_reader *, off_t pos,
274 unsigned int format, enum which_format,
275 struct variable *, int *format_warning_cnt);
276 static void parse_document (struct dictionary *, struct sfm_document_record *);
277 static void parse_display_parameters (struct sfm_reader *,
278 const struct sfm_extension_record *,
279 struct dictionary *);
280 static void parse_machine_integer_info (struct sfm_reader *,
281 const struct sfm_extension_record *,
282 struct sfm_read_info *);
283 static void parse_machine_float_info (struct sfm_reader *,
284 const struct sfm_extension_record *);
285 static void parse_extra_product_info (struct sfm_reader *,
286 const struct sfm_extension_record *,
287 struct sfm_read_info *);
288 static void parse_mrsets (struct sfm_reader *,
289 const struct sfm_extension_record *,
290 struct dictionary *);
291 static void parse_long_var_name_map (struct sfm_reader *,
292 const struct sfm_extension_record *,
293 struct dictionary *);
294 static void parse_long_string_map (struct sfm_reader *,
295 const struct sfm_extension_record *,
296 struct dictionary *);
297 static void parse_value_labels (struct sfm_reader *, struct dictionary *,
298 const struct sfm_var_record *,
300 const struct sfm_value_label_record *);
301 static void parse_data_file_attributes (struct sfm_reader *,
302 const struct sfm_extension_record *,
303 struct dictionary *);
304 static void parse_variable_attributes (struct sfm_reader *,
305 const struct sfm_extension_record *,
306 struct dictionary *);
307 static void assign_variable_roles (struct sfm_reader *, struct dictionary *);
308 static void parse_long_string_value_labels (struct sfm_reader *,
309 const struct sfm_extension_record *,
310 struct dictionary *);
311 static void parse_long_string_missing_values (struct sfm_reader *,
312 const struct sfm_extension_record *,
313 struct dictionary *);
315 /* Frees the strings inside INFO. */
317 sfm_read_info_destroy (struct sfm_read_info *info)
321 free (info->creation_date);
322 free (info->creation_time);
323 free (info->product);
324 free (info->product_ext);
328 /* Opens the system file designated by file handle FH for reading. Reads the
329 system file's dictionary into *DICT.
331 Ordinarily the reader attempts to automatically detect the character
332 encoding based on the file's contents. This isn't always possible,
333 especially for files written by old versions of SPSS or PSPP, so specifying
334 a nonnull ENCODING overrides the choice of character encoding.
336 If INFO is non-null, then it receives additional info about the system file,
337 which the caller must eventually free with sfm_read_info_destroy() when it
338 is no longer needed. */
340 sfm_open_reader (struct file_handle *fh, const char *volatile encoding,
341 struct dictionary **dictp, struct sfm_read_info *infop)
343 struct sfm_reader *volatile r = NULL;
344 struct sfm_read_info *volatile info;
346 struct sfm_header_record header;
348 struct sfm_var_record *vars;
349 size_t n_vars, allocated_vars;
351 struct sfm_value_label_record *labels;
352 size_t n_labels, allocated_labels;
354 struct sfm_document_record *document;
356 struct sfm_extension_record *extensions[32];
358 struct dictionary *volatile dict = NULL;
361 /* Create and initialize reader. */
362 r = pool_create_container (struct sfm_reader, pool);
368 r->opcode_idx = sizeof r->opcodes;
369 r->corruption_warning = false;
371 info = infop ? infop : xmalloc (sizeof *info);
372 memset (info, 0, sizeof *info);
374 /* TRANSLATORS: this fragment will be interpolated into
375 messages in fh_lock() that identify types of files. */
376 r->lock = fh_lock (fh, FH_REF_FILE, N_("system file"), FH_ACC_READ, false);
380 r->file = fn_open (fh_get_file_name (fh), "rb");
383 msg (ME, _("Error opening `%s' for reading as a system file: %s."),
384 fh_get_file_name (r->fh), strerror (errno));
388 if (setjmp (r->bail_out))
392 read_header (r, info, &header);
395 n_vars = allocated_vars = 0;
398 n_labels = allocated_labels = 0;
402 memset (extensions, 0, sizeof extensions);
412 read_int (r); /* Skip filler. */
419 if (n_vars >= allocated_vars)
420 vars = pool_2nrealloc (r->pool, vars, &allocated_vars,
422 read_variable_record (r, &vars[n_vars++]);
426 if (n_labels >= allocated_labels)
427 labels = pool_2nrealloc (r->pool, labels, &allocated_labels,
429 read_value_label_record (r, &labels[n_labels++], n_vars);
433 /* A Type 4 record is always immediately after a type 3 record,
434 so the code for type 3 records reads the type 4 record too. */
435 sys_error (r, r->pos, _("Misplaced type 4 record."));
438 if (document != NULL)
439 sys_error (r, r->pos, _("Duplicate type 6 (document) record."));
440 document = read_document_record (r);
444 subtype = read_int (r);
445 if (subtype < 0 || subtype >= sizeof extensions / sizeof *extensions)
448 _("Unrecognized record type 7, subtype %d. Please "
449 "send a copy of this file, and the syntax which "
450 "created it to %s."),
451 subtype, PACKAGE_BUGREPORT);
452 skip_extension_record (r, subtype);
454 else if (extensions[subtype] != NULL)
457 _("Record type 7, subtype %d found here has the same "
458 "type as the record found near offset 0x%llx. "
459 "Please send a copy of this file, and the syntax "
460 "which created it to %s."),
461 subtype, (long long int) extensions[subtype]->pos,
463 skip_extension_record (r, subtype);
466 extensions[subtype] = read_extension_record (r, subtype);
470 sys_error (r, r->pos, _("Unrecognized record type %d."), type);
475 /* Now actually parse what we read.
477 First, figure out the correct character encoding, because this determines
478 how the rest of the header data is to be interpreted. */
479 dict = dict_create (encoding
481 : choose_encoding (r, &header, extensions[EXT_INTEGER],
482 extensions[EXT_ENCODING]));
483 r->encoding = dict_get_encoding (dict);
485 /* These records don't use variables at all. */
486 if (document != NULL)
487 parse_document (dict, document);
489 if (extensions[EXT_INTEGER] != NULL)
490 parse_machine_integer_info (r, extensions[EXT_INTEGER], info);
492 if (extensions[EXT_FLOAT] != NULL)
493 parse_machine_float_info (r, extensions[EXT_FLOAT]);
495 if (extensions[EXT_PRODUCT_INFO] != NULL)
496 parse_extra_product_info (r, extensions[EXT_PRODUCT_INFO], info);
498 if (extensions[EXT_FILE_ATTRS] != NULL)
499 parse_data_file_attributes (r, extensions[EXT_FILE_ATTRS], dict);
501 parse_header (r, &header, info, dict);
503 /* Parse the variable records, the basis of almost everything else. */
504 parse_variable_records (r, dict, vars, n_vars);
506 /* Parse value labels and the weight variable immediately after the variable
507 records. These records use indexes into var_recs[], so we must parse them
508 before those indexes become invalidated by very long string variables. */
509 for (i = 0; i < n_labels; i++)
510 parse_value_labels (r, dict, vars, n_vars, &labels[i]);
511 if (header.weight_idx != 0)
513 struct variable *weight_var;
515 weight_var = lookup_var_by_index (r, 76, vars, n_vars,
517 if (var_is_numeric (weight_var))
518 dict_set_weight (dict, weight_var);
520 sys_error (r, -1, _("Weighting variable must be numeric "
521 "(not string variable `%s')."),
522 var_get_name (weight_var));
525 if (extensions[EXT_DISPLAY] != NULL)
526 parse_display_parameters (r, extensions[EXT_DISPLAY], dict);
528 /* The following records use short names, so they need to be parsed before
529 parse_long_var_name_map() changes short names to long names. */
530 if (extensions[EXT_MRSETS] != NULL)
531 parse_mrsets (r, extensions[EXT_MRSETS], dict);
533 if (extensions[EXT_MRSETS2] != NULL)
534 parse_mrsets (r, extensions[EXT_MRSETS2], dict);
536 if (extensions[EXT_LONG_STRINGS] != NULL)
537 parse_long_string_map (r, extensions[EXT_LONG_STRINGS], dict);
539 /* Now rename variables to their long names. */
540 parse_long_var_name_map (r, extensions[EXT_LONG_NAMES], dict);
542 /* The following records use long names, so they need to follow renaming. */
543 if (extensions[EXT_VAR_ATTRS] != NULL)
545 parse_variable_attributes (r, extensions[EXT_VAR_ATTRS], dict);
547 /* Roles use the $@Role attribute. */
548 assign_variable_roles (r, dict);
551 if (extensions[EXT_LONG_LABELS] != NULL)
552 parse_long_string_value_labels (r, extensions[EXT_LONG_LABELS], dict);
553 if (extensions[EXT_LONG_MISSING] != NULL)
554 parse_long_string_missing_values (r, extensions[EXT_LONG_MISSING], dict);
556 /* Warn if the actual amount of data per case differs from the
557 amount that the header claims. SPSS version 13 gets this
558 wrong when very long strings are involved, so don't warn in
560 if (header.nominal_case_size != -1 && header.nominal_case_size != n_vars
561 && info->version_major != 13)
562 sys_warn (r, -1, _("File header claims %d variable positions but "
563 "%zu were read from file."),
564 header.nominal_case_size, n_vars);
566 /* Create an index of dictionary variable widths for
567 sfm_read_case to use. We cannot use the `struct variable's
568 from the dictionary we created, because the caller owns the
569 dictionary and may destroy or modify its variables. */
570 sfm_dictionary_to_sfm_vars (dict, &r->sfm_vars, &r->sfm_var_cnt);
571 pool_register (r->pool, free, r->sfm_vars);
572 r->proto = caseproto_ref_pool (dict_get_proto (dict), r->pool);
577 sfm_read_info_destroy (info);
581 return casereader_create_sequential
583 r->case_cnt == -1 ? CASENUMBER_MAX: r->case_cnt,
584 &sys_file_casereader_class, r);
589 sfm_read_info_destroy (info);
599 /* Closes a system file after we're done with it.
600 Returns true if an I/O error has occurred on READER, false
603 close_reader (struct sfm_reader *r)
612 if (fn_close (fh_get_file_name (r->fh), r->file) == EOF)
614 msg (ME, _("Error closing system file `%s': %s."),
615 fh_get_file_name (r->fh), strerror (errno));
625 pool_destroy (r->pool);
630 /* Destroys READER. */
632 sys_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
634 struct sfm_reader *r = r_;
638 /* Returns true if FILE is an SPSS system file,
641 sfm_detect (FILE *file)
645 if (fread (magic, 4, 1, file) != 1)
649 return !strcmp (ASCII_MAGIC, magic) || !strcmp (EBCDIC_MAGIC, magic);
652 /* Reads the global header of the system file. Initializes *HEADER and *INFO,
653 except for the string fields in *INFO, which parse_header() will initialize
654 later once the file's encoding is known. */
656 read_header (struct sfm_reader *r, struct sfm_read_info *info,
657 struct sfm_header_record *header)
659 uint8_t raw_layout_code[4];
662 read_string (r, header->magic, sizeof header->magic);
663 read_string (r, header->eye_catcher, sizeof header->eye_catcher);
665 if (strcmp (ASCII_MAGIC, header->magic)
666 && strcmp (EBCDIC_MAGIC, header->magic))
667 sys_error (r, 0, _("This is not an SPSS system file."));
669 /* Identify integer format. */
670 read_bytes (r, raw_layout_code, sizeof raw_layout_code);
671 if ((!integer_identify (2, raw_layout_code, sizeof raw_layout_code,
673 && !integer_identify (3, raw_layout_code, sizeof raw_layout_code,
675 || (r->integer_format != INTEGER_MSB_FIRST
676 && r->integer_format != INTEGER_LSB_FIRST))
677 sys_error (r, 64, _("This is not an SPSS system file."));
679 header->nominal_case_size = read_int (r);
680 if (header->nominal_case_size < 0
681 || header->nominal_case_size > INT_MAX / 16)
682 header->nominal_case_size = -1;
684 r->compressed = read_int (r) != 0;
686 header->weight_idx = read_int (r);
688 r->case_cnt = read_int (r);
689 if ( r->case_cnt > INT_MAX / 2)
692 /* Identify floating-point format and obtain compression bias. */
693 read_bytes (r, raw_bias, sizeof raw_bias);
694 if (float_identify (100.0, raw_bias, sizeof raw_bias, &r->float_format) == 0)
696 uint8_t zero_bias[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
698 if (memcmp (raw_bias, zero_bias, 8))
699 sys_warn (r, r->pos - 8,
700 _("Compression bias is not the usual "
701 "value of 100, or system file uses unrecognized "
702 "floating-point format."));
705 /* Some software is known to write all-zeros to this
706 field. Such software also writes floating-point
707 numbers in the format that we expect by default
708 (it seems that all software most likely does, in
709 reality), so don't warn in this case. */
712 if (r->integer_format == INTEGER_MSB_FIRST)
713 r->float_format = FLOAT_IEEE_DOUBLE_BE;
715 r->float_format = FLOAT_IEEE_DOUBLE_LE;
717 float_convert (r->float_format, raw_bias, FLOAT_NATIVE_DOUBLE, &r->bias);
719 read_string (r, header->creation_date, sizeof header->creation_date);
720 read_string (r, header->creation_time, sizeof header->creation_time);
721 read_string (r, header->file_label, sizeof header->file_label);
724 info->integer_format = r->integer_format;
725 info->float_format = r->float_format;
726 info->compressed = r->compressed;
727 info->case_cnt = r->case_cnt;
730 /* Reads a variable (type 2) record from R into RECORD. */
732 read_variable_record (struct sfm_reader *r, struct sfm_var_record *record)
734 int has_variable_label;
736 memset (record, 0, sizeof *record);
738 record->pos = r->pos;
739 record->width = read_int (r);
740 has_variable_label = read_int (r);
741 record->missing_value_code = read_int (r);
742 record->print_format = read_int (r);
743 record->write_format = read_int (r);
744 read_bytes (r, record->name, sizeof record->name);
746 if (has_variable_label == 1)
748 enum { MAX_LABEL_LEN = 255 };
749 size_t len, read_len;
753 /* Read up to MAX_LABEL_LEN bytes of label. */
754 read_len = MIN (MAX_LABEL_LEN, len);
755 record->label = pool_malloc (r->pool, read_len + 1);
756 read_string (r, record->label, read_len + 1);
758 /* Skip unread label bytes. */
759 skip_bytes (r, len - read_len);
761 /* Skip label padding up to multiple of 4 bytes. */
762 skip_bytes (r, ROUND_UP (len, 4) - len);
764 else if (has_variable_label != 0)
765 sys_error (r, record->pos,
766 _("Variable label indicator field is not 0 or 1."));
768 /* Set missing values. */
769 if (record->missing_value_code != 0)
771 int code = record->missing_value_code;
772 if (record->width == 0)
774 if (code < -3 || code > 3 || code == -1)
775 sys_error (r, record->pos,
776 _("Numeric missing value indicator field is not "
777 "-3, -2, 0, 1, 2, or 3."));
781 if (code < 1 || code > 3)
782 sys_error (r, record->pos,
783 _("String missing value indicator field is not "
787 read_bytes (r, record->missing, 8 * abs (code));
791 /* Reads value labels from R into RECORD. */
793 read_value_label_record (struct sfm_reader *r,
794 struct sfm_value_label_record *record,
799 /* Read type 3 record. */
800 record->pos = r->pos;
801 record->n_labels = read_int (r);
802 if (record->n_labels > SIZE_MAX / sizeof *record->labels)
803 sys_error (r, r->pos - 4, _("Invalid number of labels %zu."),
805 record->labels = pool_nmalloc (r->pool, record->n_labels,
806 sizeof *record->labels);
807 for (i = 0; i < record->n_labels; i++)
809 struct sfm_value_label *label = &record->labels[i];
810 unsigned char label_len;
813 read_bytes (r, label->value, sizeof label->value);
815 /* Read label length. */
816 read_bytes (r, &label_len, sizeof label_len);
817 padded_len = ROUND_UP (label_len + 1, 8);
819 /* Read label, padding. */
820 label->label = pool_malloc (r->pool, padded_len + 1);
821 read_bytes (r, label->label, padded_len - 1);
822 label->label[label_len] = '\0';
825 /* Read record type of type 4 record. */
826 if (read_int (r) != 4)
827 sys_error (r, r->pos - 4,
828 _("Variable index record (type 4) does not immediately "
829 "follow value label record (type 3) as it should."));
831 /* Read number of variables associated with value label from type 4
833 record->n_vars = read_int (r);
834 if (record->n_vars < 1 || record->n_vars > n_vars)
835 sys_error (r, r->pos - 4,
836 _("Number of variables associated with a value label (%zu) "
837 "is not between 1 and the number of variables (%zu)."),
838 record->n_vars, n_vars);
839 record->vars = pool_nmalloc (r->pool, record->n_vars, sizeof *record->vars);
840 for (i = 0; i < record->n_vars; i++)
841 record->vars[i] = read_int (r);
844 /* Reads a document record from R and returns it. */
845 static struct sfm_document_record *
846 read_document_record (struct sfm_reader *r)
848 struct sfm_document_record *record;
851 record = pool_malloc (r->pool, sizeof *record);
852 record->pos = r->pos;
854 n_lines = read_int (r);
855 if (n_lines <= 0 || n_lines >= INT_MAX / DOC_LINE_LENGTH)
856 sys_error (r, record->pos,
857 _("Number of document lines (%d) "
858 "must be greater than 0 and less than %d."),
859 n_lines, INT_MAX / DOC_LINE_LENGTH);
861 record->n_lines = n_lines;
862 record->documents = pool_malloc (r->pool, DOC_LINE_LENGTH * n_lines);
863 read_bytes (r, record->documents, DOC_LINE_LENGTH * n_lines);
869 read_extension_record_header (struct sfm_reader *r, int subtype,
870 struct sfm_extension_record *record)
872 record->subtype = subtype;
873 record->pos = r->pos;
874 record->size = read_int (r);
875 record->count = read_int (r);
877 /* Check that SIZE * COUNT + 1 doesn't overflow. Adding 1
878 allows an extra byte for a null terminator, used by some
879 extension processing routines. */
880 if (record->size != 0
881 && size_overflow_p (xsum (1, xtimes (record->count, record->size))))
882 sys_error (r, record->pos, "Record type 7 subtype %d too large.", subtype);
885 /* Reads an extension record from R into RECORD. */
886 static struct sfm_extension_record *
887 read_extension_record (struct sfm_reader *r, int subtype)
889 struct extension_record_type
896 static const struct extension_record_type types[] =
898 /* Implemented record types. */
899 { EXT_INTEGER, 4, 8 },
901 { EXT_MRSETS, 1, 0 },
902 { EXT_PRODUCT_INFO, 1, 0 },
903 { EXT_DISPLAY, 4, 0 },
904 { EXT_LONG_NAMES, 1, 0 },
905 { EXT_LONG_STRINGS, 1, 0 },
906 { EXT_NCASES, 8, 2 },
907 { EXT_FILE_ATTRS, 1, 0 },
908 { EXT_VAR_ATTRS, 1, 0 },
909 { EXT_MRSETS2, 1, 0 },
910 { EXT_ENCODING, 1, 0 },
911 { EXT_LONG_LABELS, 1, 0 },
912 { EXT_LONG_MISSING, 1, 0 },
914 /* Ignored record types. */
915 { EXT_VAR_SETS, 0, 0 },
917 { EXT_DATA_ENTRY, 0, 0 },
918 { EXT_DATAVIEW, 0, 0 },
921 const struct extension_record_type *type;
922 struct sfm_extension_record *record;
925 record = pool_malloc (r->pool, sizeof *record);
926 read_extension_record_header (r, subtype, record);
927 n_bytes = record->count * record->size;
929 for (type = types; type < &types[sizeof types / sizeof *types]; type++)
930 if (subtype == type->subtype)
932 if (type->size > 0 && record->size != type->size)
933 sys_warn (r, record->pos,
934 _("Record type 7, subtype %d has bad size %zu "
935 "(expected %d)."), subtype, record->size, type->size);
936 else if (type->count > 0 && record->count != type->count)
937 sys_warn (r, record->pos,
938 _("Record type 7, subtype %d has bad count %zu "
939 "(expected %d)."), subtype, record->count, type->count);
940 else if (type->count == 0 && type->size == 0)
942 /* Ignore this record. */
946 char *data = pool_malloc (r->pool, n_bytes + 1);
947 data[n_bytes] = '\0';
950 read_bytes (r, record->data, n_bytes);
957 sys_warn (r, record->pos,
958 _("Unrecognized record type 7, subtype %d. Please send a "
959 "copy of this file, and the syntax which created it to %s."),
960 subtype, PACKAGE_BUGREPORT);
963 skip_bytes (r, n_bytes);
968 skip_extension_record (struct sfm_reader *r, int subtype)
970 struct sfm_extension_record record;
972 read_extension_record_header (r, subtype, &record);
973 skip_bytes (r, record.count * record.size);
977 parse_header (struct sfm_reader *r, const struct sfm_header_record *header,
978 struct sfm_read_info *info, struct dictionary *dict)
980 const char *dict_encoding = dict_get_encoding (dict);
981 struct substring product;
982 struct substring label;
985 /* Convert file label to UTF-8 and put it into DICT. */
986 label = recode_substring_pool ("UTF-8", dict_encoding,
987 ss_cstr (header->file_label), r->pool);
988 ss_trim (&label, ss_cstr (" "));
989 label.string[label.length] = '\0';
990 fixed_label = fix_line_ends (label.string);
991 dict_set_label (dict, fixed_label);
994 /* Put creation date and time in UTF-8 into INFO. */
995 info->creation_date = recode_string ("UTF-8", dict_encoding,
996 header->creation_date, -1);
997 info->creation_time = recode_string ("UTF-8", dict_encoding,
998 header->creation_time, -1);
1000 /* Put product name into INFO, dropping eye-catcher string if present. */
1001 product = recode_substring_pool ("UTF-8", dict_encoding,
1002 ss_cstr (header->eye_catcher), r->pool);
1003 ss_match_string (&product, ss_cstr ("@(#) SPSS DATA FILE"));
1004 ss_trim (&product, ss_cstr (" "));
1005 info->product = ss_xstrdup (product);
1008 /* Reads a variable (type 2) record from R and adds the
1009 corresponding variable to DICT.
1010 Also skips past additional variable records for long string
1013 parse_variable_records (struct sfm_reader *r, struct dictionary *dict,
1014 struct sfm_var_record *var_recs, size_t n_var_recs)
1016 const char *dict_encoding = dict_get_encoding (dict);
1017 struct sfm_var_record *rec;
1020 for (rec = var_recs; rec < &var_recs[n_var_recs]; )
1022 struct variable *var;
1027 name = recode_string_pool ("UTF-8", dict_encoding,
1028 rec->name, 8, r->pool);
1029 name[strcspn (name, " ")] = '\0';
1031 if (!dict_id_is_valid (dict, name, false)
1032 || name[0] == '$' || name[0] == '#')
1033 sys_error (r, rec->pos, _("Invalid variable name `%s'."), name);
1035 if (rec->width < 0 || rec->width > 255)
1036 sys_error (r, rec->pos,
1037 _("Bad width %d for variable %s."), rec->width, name);
1039 var = rec->var = dict_create_var (dict, name, rec->width);
1041 sys_error (r, rec->pos, _("Duplicate variable name `%s'."), name);
1043 /* Set the short name the same as the long name. */
1044 var_set_short_name (var, 0, name);
1046 /* Get variable label, if any. */
1051 utf8_label = recode_string_pool ("UTF-8", dict_encoding,
1052 rec->label, -1, r->pool);
1053 var_set_label (var, utf8_label, false);
1056 /* Set missing values. */
1057 if (rec->missing_value_code != 0)
1059 int width = var_get_width (var);
1060 struct missing_values mv;
1062 mv_init_pool (r->pool, &mv, width);
1063 if (var_is_numeric (var))
1065 bool has_range = rec->missing_value_code < 0;
1066 int n_discrete = (has_range
1067 ? rec->missing_value_code == -3
1068 : rec->missing_value_code);
1073 double low = parse_float (r, rec->missing, 0);
1074 double high = parse_float (r, rec->missing, 8);
1076 /* Deal with SPSS 21 change in representation. */
1080 mv_add_range (&mv, low, high);
1084 for (i = 0; i < n_discrete; i++)
1086 mv_add_num (&mv, parse_float (r, rec->missing, ofs));
1094 value_init_pool (r->pool, &value, width);
1095 value_set_missing (&value, width);
1096 for (i = 0; i < rec->missing_value_code; i++)
1097 mv_add_str (&mv, rec->missing + 8 * i, MIN (width, 8));
1099 var_set_missing_values (var, &mv);
1103 parse_format_spec (r, rec->pos + 12, rec->print_format,
1104 PRINT_FORMAT, var, &n_warnings);
1105 parse_format_spec (r, rec->pos + 16, rec->write_format,
1106 WRITE_FORMAT, var, &n_warnings);
1108 /* Account for values.
1109 Skip long string continuation records, if any. */
1110 n_values = rec->width == 0 ? 1 : DIV_RND_UP (rec->width, 8);
1111 for (i = 1; i < n_values; i++)
1112 if (i + (rec - var_recs) >= n_var_recs || rec[i].width != -1)
1113 sys_error (r, rec->pos, _("Missing string continuation record."));
1118 /* Translates the format spec from sysfile format to internal
1121 parse_format_spec (struct sfm_reader *r, off_t pos, unsigned int format,
1122 enum which_format which, struct variable *v,
1125 const int max_warnings = 8;
1126 uint8_t raw_type = format >> 16;
1127 uint8_t w = format >> 8;
1136 ok = (fmt_from_io (raw_type, &f.type)
1137 && fmt_check_output (&f)
1138 && fmt_check_width_compat (&f, var_get_width (v)));
1143 if (which == PRINT_FORMAT)
1144 var_set_print_format (v, &f);
1146 var_set_write_format (v, &f);
1148 else if (format == 0)
1150 /* Actually observed in the wild. No point in warning about it. */
1152 else if (++*n_warnings <= max_warnings)
1154 if (which == PRINT_FORMAT)
1155 sys_warn (r, pos, _("Variable %s with width %d has invalid print "
1157 var_get_name (v), var_get_width (v), format);
1159 sys_warn (r, pos, _("Variable %s with width %d has invalid write "
1161 var_get_name (v), var_get_width (v), format);
1163 if (*n_warnings == max_warnings)
1164 sys_warn (r, -1, _("Suppressing further invalid format warnings."));
1169 parse_document (struct dictionary *dict, struct sfm_document_record *record)
1173 for (p = record->documents;
1174 p < record->documents + DOC_LINE_LENGTH * record->n_lines;
1175 p += DOC_LINE_LENGTH)
1177 struct substring line;
1179 line = recode_substring_pool ("UTF-8", dict_get_encoding (dict),
1180 ss_buffer (p, DOC_LINE_LENGTH), NULL);
1181 ss_rtrim (&line, ss_cstr (" "));
1182 line.string[line.length] = '\0';
1184 dict_add_document_line (dict, line.string, false);
1190 /* Parses record type 7, subtype 3. */
1192 parse_machine_integer_info (struct sfm_reader *r,
1193 const struct sfm_extension_record *record,
1194 struct sfm_read_info *info)
1196 int float_representation, expected_float_format;
1197 int integer_representation, expected_integer_format;
1199 /* Save version info. */
1200 info->version_major = parse_int (r, record->data, 0);
1201 info->version_minor = parse_int (r, record->data, 4);
1202 info->version_revision = parse_int (r, record->data, 8);
1204 /* Check floating point format. */
1205 float_representation = parse_int (r, record->data, 16);
1206 if (r->float_format == FLOAT_IEEE_DOUBLE_BE
1207 || r->float_format == FLOAT_IEEE_DOUBLE_LE)
1208 expected_float_format = 1;
1209 else if (r->float_format == FLOAT_Z_LONG)
1210 expected_float_format = 2;
1211 else if (r->float_format == FLOAT_VAX_G || r->float_format == FLOAT_VAX_D)
1212 expected_float_format = 3;
1215 if (float_representation != expected_float_format)
1216 sys_error (r, record->pos, _("Floating-point representation indicated by "
1217 "system file (%d) differs from expected (%d)."),
1218 float_representation, expected_float_format);
1220 /* Check integer format. */
1221 integer_representation = parse_int (r, record->data, 24);
1222 if (r->integer_format == INTEGER_MSB_FIRST)
1223 expected_integer_format = 1;
1224 else if (r->integer_format == INTEGER_LSB_FIRST)
1225 expected_integer_format = 2;
1228 if (integer_representation != expected_integer_format)
1229 sys_warn (r, record->pos,
1230 _("Integer format indicated by system file (%d) "
1231 "differs from expected (%d)."),
1232 integer_representation, expected_integer_format);
1237 choose_encoding (struct sfm_reader *r,
1238 const struct sfm_header_record *header,
1239 const struct sfm_extension_record *ext_integer,
1240 const struct sfm_extension_record *ext_encoding)
1242 /* The EXT_ENCODING record is a more reliable way to determine dictionary
1245 return ext_encoding->data;
1247 /* But EXT_INTEGER is better than nothing as a fallback. */
1250 int codepage = parse_int (r, ext_integer->data, 7 * 4);
1251 const char *encoding;
1260 /* These ostensibly mean "7-bit ASCII" and "8-bit ASCII"[sic]
1261 respectively. However, there are known to be many files in the wild
1262 with character code 2, yet have data which are clearly not ASCII.
1263 Therefore we ignore these values. */
1270 encoding = sys_get_encoding_from_codepage (codepage);
1271 if (encoding != NULL)
1277 /* If the file magic number is EBCDIC then its character data is too. */
1278 if (!strcmp (header->magic, EBCDIC_MAGIC))
1281 return locale_charset ();
1284 /* Parses record type 7, subtype 4. */
1286 parse_machine_float_info (struct sfm_reader *r,
1287 const struct sfm_extension_record *record)
1289 double sysmis = parse_float (r, record->data, 0);
1290 double highest = parse_float (r, record->data, 8);
1291 double lowest = parse_float (r, record->data, 16);
1293 if (sysmis != SYSMIS)
1294 sys_warn (r, record->pos,
1295 _("File specifies unexpected value %g (%a) as %s, "
1296 "instead of %g (%a)."),
1297 sysmis, sysmis, "SYSMIS", SYSMIS, SYSMIS);
1299 if (highest != HIGHEST)
1300 sys_warn (r, record->pos,
1301 _("File specifies unexpected value %g (%a) as %s, "
1302 "instead of %g (%a)."),
1303 highest, highest, "HIGHEST", HIGHEST, HIGHEST);
1305 /* SPSS before version 21 used a unique value just bigger than SYSMIS as
1306 LOWEST. SPSS 21 uses SYSMIS for LOWEST, which is OK because LOWEST only
1307 appears in a context (missing values) where SYSMIS cannot. */
1308 if (lowest != LOWEST && lowest != SYSMIS)
1309 sys_warn (r, record->pos,
1310 _("File specifies unexpected value %g (%a) as %s, "
1311 "instead of %g (%a) or %g (%a)."),
1312 lowest, lowest, "LOWEST", LOWEST, LOWEST, SYSMIS, SYSMIS);
1315 /* Parses record type 7, subtype 10. */
1317 parse_extra_product_info (struct sfm_reader *r,
1318 const struct sfm_extension_record *record,
1319 struct sfm_read_info *info)
1321 struct text_record *text;
1323 text = open_text_record (r, record, true);
1324 info->product_ext = fix_line_ends (text_get_all (text));
1325 close_text_record (r, text);
1328 /* Parses record type 7, subtype 7 or 19. */
1330 parse_mrsets (struct sfm_reader *r, const struct sfm_extension_record *record,
1331 struct dictionary *dict)
1333 struct text_record *text;
1334 struct mrset *mrset;
1336 text = open_text_record (r, record, false);
1339 const char *counted = NULL;
1342 struct stringi_set var_names;
1343 size_t allocated_vars;
1347 mrset = xzalloc (sizeof *mrset);
1349 name = text_get_token (text, ss_cstr ("="), NULL);
1352 mrset->name = recode_string ("UTF-8", r->encoding, name, -1);
1354 if (mrset->name[0] != '$')
1356 sys_warn (r, record->pos,
1357 _("`%s' does not begin with `$' at offset %zu "
1358 "in MRSETS record."), mrset->name, text_pos (text));
1362 if (text_match (text, 'C'))
1364 mrset->type = MRSET_MC;
1365 if (!text_match (text, ' '))
1367 sys_warn (r, record->pos,
1368 _("Missing space following `%c' at offset %zu "
1369 "in MRSETS record."), 'C', text_pos (text));
1373 else if (text_match (text, 'D'))
1375 mrset->type = MRSET_MD;
1376 mrset->cat_source = MRSET_VARLABELS;
1378 else if (text_match (text, 'E'))
1382 mrset->type = MRSET_MD;
1383 mrset->cat_source = MRSET_COUNTEDVALUES;
1384 if (!text_match (text, ' '))
1386 sys_warn (r, record->pos,
1387 _("Missing space following `%c' at offset %zu "
1388 "in MRSETS record."), 'E', text_pos (text));
1392 number = text_get_token (text, ss_cstr (" "), NULL);
1393 if (!strcmp (number, "11"))
1394 mrset->label_from_var_label = true;
1395 else if (strcmp (number, "1"))
1396 sys_warn (r, record->pos,
1397 _("Unexpected label source value `%s' following `E' "
1398 "at offset %zu in MRSETS record."),
1399 number, text_pos (text));
1403 sys_warn (r, record->pos,
1404 _("Missing `C', `D', or `E' at offset %zu "
1405 "in MRSETS record."),
1410 if (mrset->type == MRSET_MD)
1412 counted = text_parse_counted_string (r, text);
1413 if (counted == NULL)
1417 label = text_parse_counted_string (r, text);
1420 if (label[0] != '\0')
1421 mrset->label = recode_string ("UTF-8", r->encoding, label, -1);
1423 stringi_set_init (&var_names);
1428 const char *raw_var_name;
1429 struct variable *var;
1432 raw_var_name = text_get_token (text, ss_cstr (" \n"), &delimiter);
1433 if (raw_var_name == NULL)
1435 sys_warn (r, record->pos,
1436 _("Missing new-line parsing variable names "
1437 "at offset %zu in MRSETS record."),
1441 var_name = recode_string ("UTF-8", r->encoding, raw_var_name, -1);
1443 var = dict_lookup_var (dict, var_name);
1449 if (!stringi_set_insert (&var_names, var_name))
1451 sys_warn (r, record->pos,
1452 _("Duplicate variable name %s "
1453 "at offset %zu in MRSETS record."),
1454 var_name, text_pos (text));
1460 if (mrset->label == NULL && mrset->label_from_var_label
1461 && var_has_label (var))
1462 mrset->label = xstrdup (var_get_label (var));
1465 && var_get_type (var) != var_get_type (mrset->vars[0]))
1467 sys_warn (r, record->pos,
1468 _("MRSET %s contains both string and "
1469 "numeric variables."), name);
1472 width = MIN (width, var_get_width (var));
1474 if (mrset->n_vars >= allocated_vars)
1475 mrset->vars = x2nrealloc (mrset->vars, &allocated_vars,
1476 sizeof *mrset->vars);
1477 mrset->vars[mrset->n_vars++] = var;
1479 while (delimiter != '\n');
1481 if (mrset->n_vars < 2)
1483 sys_warn (r, record->pos,
1484 _("MRSET %s has only %zu variables."), mrset->name,
1486 mrset_destroy (mrset);
1487 stringi_set_destroy (&var_names);
1491 if (mrset->type == MRSET_MD)
1493 mrset->width = width;
1494 value_init (&mrset->counted, width);
1496 mrset->counted.f = c_strtod (counted, NULL);
1498 value_copy_str_rpad (&mrset->counted, width,
1499 (const uint8_t *) counted, ' ');
1502 dict_add_mrset (dict, mrset);
1504 stringi_set_destroy (&var_names);
1506 mrset_destroy (mrset);
1507 close_text_record (r, text);
1510 /* Read record type 7, subtype 11, which specifies how variables
1511 should be displayed in GUI environments. */
1513 parse_display_parameters (struct sfm_reader *r,
1514 const struct sfm_extension_record *record,
1515 struct dictionary *dict)
1517 bool includes_width;
1518 bool warned = false;
1523 n_vars = dict_get_var_cnt (dict);
1524 if (record->count == 3 * n_vars)
1525 includes_width = true;
1526 else if (record->count == 2 * n_vars)
1527 includes_width = false;
1530 sys_warn (r, record->pos,
1531 _("Extension 11 has bad count %zu (for %zu variables)."),
1532 record->count, n_vars);
1537 for (i = 0; i < n_vars; ++i)
1539 struct variable *v = dict_get_var (dict, i);
1540 int measure, width, align;
1542 measure = parse_int (r, record->data, ofs);
1547 width = parse_int (r, record->data, ofs);
1553 align = parse_int (r, record->data, ofs);
1556 /* SPSS sometimes seems to set variables' measure to zero. */
1560 if (measure < 1 || measure > 3 || align < 0 || align > 2)
1563 sys_warn (r, record->pos,
1564 _("Invalid variable display parameters for variable "
1565 "%zu (%s). Default parameters substituted."),
1566 i, var_get_name (v));
1571 var_set_measure (v, (measure == 1 ? MEASURE_NOMINAL
1572 : measure == 2 ? MEASURE_ORDINAL
1574 var_set_alignment (v, (align == 0 ? ALIGN_LEFT
1575 : align == 1 ? ALIGN_RIGHT
1578 /* Older versions (SPSS 9.0) sometimes set the display
1579 width to zero. This causes confusion in the GUI, so
1580 only set the width if it is nonzero. */
1582 var_set_display_width (v, width);
1587 rename_var_and_save_short_names (struct dictionary *dict, struct variable *var,
1588 const char *new_name)
1590 size_t n_short_names;
1594 /* Renaming a variable may clear its short names, but we
1595 want to retain them, so we save them and re-set them
1597 n_short_names = var_get_short_name_cnt (var);
1598 short_names = xnmalloc (n_short_names, sizeof *short_names);
1599 for (i = 0; i < n_short_names; i++)
1601 const char *s = var_get_short_name (var, i);
1602 short_names[i] = s != NULL ? xstrdup (s) : NULL;
1605 /* Set long name. */
1606 dict_rename_var (dict, var, new_name);
1608 /* Restore short names. */
1609 for (i = 0; i < n_short_names; i++)
1611 var_set_short_name (var, i, short_names[i]);
1612 free (short_names[i]);
1617 /* Parses record type 7, subtype 13, which gives the long name that corresponds
1618 to each short name. Modifies variable names in DICT accordingly. */
1620 parse_long_var_name_map (struct sfm_reader *r,
1621 const struct sfm_extension_record *record,
1622 struct dictionary *dict)
1624 struct text_record *text;
1625 struct variable *var;
1630 /* There are no long variable names. Use the short variable names,
1631 converted to lowercase, as the long variable names. */
1634 for (i = 0; i < dict_get_var_cnt (dict); i++)
1636 struct variable *var = dict_get_var (dict, i);
1639 new_name = utf8_to_lower (var_get_name (var));
1640 rename_var_and_save_short_names (dict, var, new_name);
1647 /* Rename each of the variables, one by one. (In a correctly constructed
1648 system file, this cannot create any intermediate duplicate variable names,
1649 because all of the new variable names are longer than any of the old
1650 variable names and thus there cannot be any overlaps.) */
1651 text = open_text_record (r, record, true);
1652 while (read_variable_to_value_pair (r, dict, text, &var, &long_name))
1654 /* Validate long name. */
1655 if (!dict_id_is_valid (dict, long_name, false))
1657 sys_warn (r, record->pos,
1658 _("Long variable mapping from %s to invalid "
1659 "variable name `%s'."),
1660 var_get_name (var), long_name);
1664 /* Identify any duplicates. */
1665 if (utf8_strcasecmp (var_get_short_name (var, 0), long_name)
1666 && dict_lookup_var (dict, long_name) != NULL)
1668 sys_warn (r, record->pos,
1669 _("Duplicate long variable name `%s'."), long_name);
1673 rename_var_and_save_short_names (dict, var, long_name);
1675 close_text_record (r, text);
1678 /* Reads record type 7, subtype 14, which gives the real length
1679 of each very long string. Rearranges DICT accordingly. */
1681 parse_long_string_map (struct sfm_reader *r,
1682 const struct sfm_extension_record *record,
1683 struct dictionary *dict)
1685 struct text_record *text;
1686 struct variable *var;
1689 text = open_text_record (r, record, true);
1690 while (read_variable_to_value_pair (r, dict, text, &var, &length_s))
1692 size_t idx = var_get_dict_index (var);
1698 length = strtol (length_s, NULL, 10);
1699 if (length < 1 || length > MAX_STRING)
1701 sys_warn (r, record->pos,
1702 _("%s listed as string of invalid length %s "
1703 "in very long string record."),
1704 var_get_name (var), length_s);
1708 /* Check segments. */
1709 segment_cnt = sfm_width_to_segments (length);
1710 if (segment_cnt == 1)
1712 sys_warn (r, record->pos,
1713 _("%s listed in very long string record with width %s, "
1714 "which requires only one segment."),
1715 var_get_name (var), length_s);
1718 if (idx + segment_cnt > dict_get_var_cnt (dict))
1719 sys_error (r, record->pos,
1720 _("Very long string %s overflows dictionary."),
1721 var_get_name (var));
1723 /* Get the short names from the segments and check their
1725 for (i = 0; i < segment_cnt; i++)
1727 struct variable *seg = dict_get_var (dict, idx + i);
1728 int alloc_width = sfm_segment_alloc_width (length, i);
1729 int width = var_get_width (seg);
1732 var_set_short_name (var, i, var_get_short_name (seg, 0));
1733 if (ROUND_UP (width, 8) != ROUND_UP (alloc_width, 8))
1734 sys_error (r, record->pos,
1735 _("Very long string with width %ld has segment %d "
1736 "of width %d (expected %d)."),
1737 length, i, width, alloc_width);
1739 dict_delete_consecutive_vars (dict, idx + 1, segment_cnt - 1);
1740 var_set_width (var, length);
1742 close_text_record (r, text);
1743 dict_compact_values (dict);
1747 parse_value_labels (struct sfm_reader *r, struct dictionary *dict,
1748 const struct sfm_var_record *var_recs, size_t n_var_recs,
1749 const struct sfm_value_label_record *record)
1751 struct variable **vars;
1755 utf8_labels = pool_nmalloc (r->pool, record->n_labels, sizeof *utf8_labels);
1756 for (i = 0; i < record->n_labels; i++)
1757 utf8_labels[i] = recode_string_pool ("UTF-8", dict_get_encoding (dict),
1758 record->labels[i].label, -1,
1761 vars = pool_nmalloc (r->pool, record->n_vars, sizeof *vars);
1762 for (i = 0; i < record->n_vars; i++)
1763 vars[i] = lookup_var_by_index (r, record->pos,
1764 var_recs, n_var_recs, record->vars[i]);
1766 for (i = 1; i < record->n_vars; i++)
1767 if (var_get_type (vars[i]) != var_get_type (vars[0]))
1768 sys_error (r, record->pos,
1769 _("Variables associated with value label are not all of "
1770 "identical type. Variable %s is %s, but variable "
1772 var_get_name (vars[0]),
1773 var_is_numeric (vars[0]) ? _("numeric") : _("string"),
1774 var_get_name (vars[i]),
1775 var_is_numeric (vars[i]) ? _("numeric") : _("string"));
1777 for (i = 0; i < record->n_vars; i++)
1779 struct variable *var = vars[i];
1783 width = var_get_width (var);
1785 sys_error (r, record->pos,
1786 _("Value labels may not be added to long string "
1787 "variables (e.g. %s) using records types 3 and 4."),
1788 var_get_name (var));
1790 for (j = 0; j < record->n_labels; j++)
1792 struct sfm_value_label *label = &record->labels[j];
1795 value_init (&value, width);
1797 value.f = parse_float (r, label->value, 0);
1799 memcpy (value_str_rw (&value, width), label->value, width);
1801 if (!var_add_value_label (var, &value, utf8_labels[j]))
1803 if (var_is_numeric (var))
1804 sys_warn (r, record->pos,
1805 _("Duplicate value label for %g on %s."),
1806 value.f, var_get_name (var));
1808 sys_warn (r, record->pos,
1809 _("Duplicate value label for `%.*s' on %s."),
1810 width, value_str (&value, width),
1811 var_get_name (var));
1814 value_destroy (&value, width);
1818 pool_free (r->pool, vars);
1819 for (i = 0; i < record->n_labels; i++)
1820 pool_free (r->pool, utf8_labels[i]);
1821 pool_free (r->pool, utf8_labels);
1824 static struct variable *
1825 lookup_var_by_index (struct sfm_reader *r, off_t offset,
1826 const struct sfm_var_record *var_recs, size_t n_var_recs,
1829 const struct sfm_var_record *rec;
1831 if (idx < 1 || idx > n_var_recs)
1833 sys_error (r, offset,
1834 _("Variable index %d not in valid range 1...%zu."),
1839 rec = &var_recs[idx - 1];
1840 if (rec->var == NULL)
1842 sys_error (r, offset,
1843 _("Variable index %d refers to long string continuation."),
1851 /* Parses a set of custom attributes from TEXT into ATTRS.
1852 ATTRS may be a null pointer, in which case the attributes are
1853 read but discarded. */
1855 parse_attributes (struct sfm_reader *r, struct text_record *text,
1856 struct attrset *attrs)
1860 struct attribute *attr;
1864 /* Parse the key. */
1865 key = text_get_token (text, ss_cstr ("("), NULL);
1869 attr = attribute_create (key);
1870 for (index = 1; ; index++)
1872 /* Parse the value. */
1876 value = text_get_token (text, ss_cstr ("\n"), NULL);
1879 text_warn (r, text, _("Error parsing attribute value %s[%d]."),
1884 length = strlen (value);
1885 if (length >= 2 && value[0] == '\'' && value[length - 1] == '\'')
1887 value[length - 1] = '\0';
1888 attribute_add_value (attr, value + 1);
1893 _("Attribute value %s[%d] is not quoted: %s."),
1895 attribute_add_value (attr, value);
1898 /* Was this the last value for this attribute? */
1899 if (text_match (text, ')'))
1903 attrset_add (attrs, attr);
1905 attribute_destroy (attr);
1907 while (!text_match (text, '/'));
1910 /* Reads record type 7, subtype 17, which lists custom
1911 attributes on the data file. */
1913 parse_data_file_attributes (struct sfm_reader *r,
1914 const struct sfm_extension_record *record,
1915 struct dictionary *dict)
1917 struct text_record *text = open_text_record (r, record, true);
1918 parse_attributes (r, text, dict_get_attributes (dict));
1919 close_text_record (r, text);
1922 /* Parses record type 7, subtype 18, which lists custom
1923 attributes on individual variables. */
1925 parse_variable_attributes (struct sfm_reader *r,
1926 const struct sfm_extension_record *record,
1927 struct dictionary *dict)
1929 struct text_record *text;
1930 struct variable *var;
1932 text = open_text_record (r, record, true);
1933 while (text_read_variable_name (r, dict, text, ss_cstr (":"), &var))
1934 parse_attributes (r, text, var != NULL ? var_get_attributes (var) : NULL);
1935 close_text_record (r, text);
1939 assign_variable_roles (struct sfm_reader *r, struct dictionary *dict)
1941 size_t n_warnings = 0;
1944 for (i = 0; i < dict_get_var_cnt (dict); i++)
1946 struct variable *var = dict_get_var (dict, i);
1947 struct attrset *attrs = var_get_attributes (var);
1948 const struct attribute *attr = attrset_lookup (attrs, "$@Role");
1951 int value = atoi (attribute_get_value (attr, 0));
1973 role = ROLE_PARTITION;
1982 if (n_warnings++ == 0)
1983 sys_warn (r, -1, _("Invalid role for variable %s."),
1984 var_get_name (var));
1987 var_set_role (var, role);
1992 sys_warn (r, -1, _("%zu other variables had invalid roles."),
1997 check_overflow (struct sfm_reader *r,
1998 const struct sfm_extension_record *record,
1999 size_t ofs, size_t length)
2001 size_t end = record->size * record->count;
2002 if (length >= end || ofs + length > end)
2003 sys_error (r, record->pos + end,
2004 _("Extension record subtype %d ends unexpectedly."),
2009 parse_long_string_value_labels (struct sfm_reader *r,
2010 const struct sfm_extension_record *record,
2011 struct dictionary *dict)
2013 const char *dict_encoding = dict_get_encoding (dict);
2014 size_t end = record->size * record->count;
2021 struct variable *var;
2026 /* Parse variable name length. */
2027 check_overflow (r, record, ofs, 4);
2028 var_name_len = parse_int (r, record->data, ofs);
2031 /* Parse variable name, width, and number of labels. */
2032 check_overflow (r, record, ofs, var_name_len + 8);
2033 var_name = recode_string_pool ("UTF-8", dict_encoding,
2034 (const char *) record->data + ofs,
2035 var_name_len, r->pool);
2036 width = parse_int (r, record->data, ofs + var_name_len);
2037 n_labels = parse_int (r, record->data, ofs + var_name_len + 4);
2038 ofs += var_name_len + 8;
2040 /* Look up 'var' and validate. */
2041 var = dict_lookup_var (dict, var_name);
2043 sys_warn (r, record->pos + ofs,
2044 _("Ignoring long string value label record for "
2045 "unknown variable %s."), var_name);
2046 else if (var_is_numeric (var))
2048 sys_warn (r, record->pos + ofs,
2049 _("Ignoring long string value label record for "
2050 "numeric variable %s."), var_name);
2053 else if (width != var_get_width (var))
2055 sys_warn (r, record->pos + ofs,
2056 _("Ignoring long string value label record for variable "
2057 "%s because the record's width (%d) does not match the "
2058 "variable's width (%d)."),
2059 var_name, width, var_get_width (var));
2064 value_init_pool (r->pool, &value, width);
2065 for (i = 0; i < n_labels; i++)
2067 size_t value_length, label_length;
2068 bool skip = var == NULL;
2070 /* Parse value length. */
2071 check_overflow (r, record, ofs, 4);
2072 value_length = parse_int (r, record->data, ofs);
2076 check_overflow (r, record, ofs, value_length);
2079 if (value_length == width)
2080 memcpy (value_str_rw (&value, width),
2081 (const uint8_t *) record->data + ofs, width);
2084 sys_warn (r, record->pos + ofs,
2085 _("Ignoring long string value label %zu for "
2086 "variable %s, with width %d, that has bad value "
2088 i, var_get_name (var), width, value_length);
2092 ofs += value_length;
2094 /* Parse label length. */
2095 check_overflow (r, record, ofs, 4);
2096 label_length = parse_int (r, record->data, ofs);
2100 check_overflow (r, record, ofs, label_length);
2105 label = recode_string_pool ("UTF-8", dict_encoding,
2106 (const char *) record->data + ofs,
2107 label_length, r->pool);
2108 if (!var_add_value_label (var, &value, label))
2109 sys_warn (r, record->pos + ofs,
2110 _("Duplicate value label for `%.*s' on %s."),
2111 width, value_str (&value, width),
2112 var_get_name (var));
2113 pool_free (r->pool, label);
2115 ofs += label_length;
2121 parse_long_string_missing_values (struct sfm_reader *r,
2122 const struct sfm_extension_record *record,
2123 struct dictionary *dict)
2125 const char *dict_encoding = dict_get_encoding (dict);
2126 size_t end = record->size * record->count;
2131 struct missing_values mv;
2133 struct variable *var;
2134 int n_missing_values;
2138 /* Parse variable name length. */
2139 check_overflow (r, record, ofs, 4);
2140 var_name_len = parse_int (r, record->data, ofs);
2143 /* Parse variable name. */
2144 check_overflow (r, record, ofs, var_name_len + 1);
2145 var_name = recode_string_pool ("UTF-8", dict_encoding,
2146 (const char *) record->data + ofs,
2147 var_name_len, r->pool);
2148 ofs += var_name_len;
2150 /* Parse number of missing values. */
2151 n_missing_values = ((const uint8_t *) record->data)[ofs];
2152 if (n_missing_values < 1 || n_missing_values > 3)
2153 sys_warn (r, record->pos + ofs,
2154 _("Long string missing values record says variable %s "
2155 "has %d missing values, but only 1 to 3 missing values "
2157 var_name, n_missing_values);
2160 /* Look up 'var' and validate. */
2161 var = dict_lookup_var (dict, var_name);
2163 sys_warn (r, record->pos + ofs,
2164 _("Ignoring long string missing value record for "
2165 "unknown variable %s."), var_name);
2166 else if (var_is_numeric (var))
2168 sys_warn (r, record->pos + ofs,
2169 _("Ignoring long string missing value record for "
2170 "numeric variable %s."), var_name);
2175 mv_init_pool (r->pool, &mv, var ? var_get_width (var) : 8);
2176 for (i = 0; i < n_missing_values; i++)
2178 size_t value_length;
2180 /* Parse value length. */
2181 check_overflow (r, record, ofs, 4);
2182 value_length = parse_int (r, record->data, ofs);
2186 check_overflow (r, record, ofs, value_length);
2189 && !mv_add_str (&mv, (const uint8_t *) record->data + ofs,
2191 sys_warn (r, record->pos + ofs,
2192 _("Ignoring long string missing value %zu for variable "
2193 "%s, with width %d, that has bad value width %zu."),
2194 i, var_get_name (var), var_get_width (var),
2196 ofs += value_length;
2199 var_set_missing_values (var, &mv);
2205 static void partial_record (struct sfm_reader *r)
2208 static void read_error (struct casereader *, const struct sfm_reader *);
2210 static bool read_case_number (struct sfm_reader *, double *);
2211 static bool read_case_string (struct sfm_reader *, uint8_t *, size_t);
2212 static int read_opcode (struct sfm_reader *);
2213 static bool read_compressed_number (struct sfm_reader *, double *);
2214 static bool read_compressed_string (struct sfm_reader *, uint8_t *);
2215 static bool read_whole_strings (struct sfm_reader *, uint8_t *, size_t);
2216 static bool skip_whole_strings (struct sfm_reader *, size_t);
2218 /* Reads and returns one case from READER's file. Returns a null
2219 pointer if not successful. */
2220 static struct ccase *
2221 sys_file_casereader_read (struct casereader *reader, void *r_)
2223 struct sfm_reader *r = r_;
2224 struct ccase *volatile c;
2230 c = case_create (r->proto);
2231 if (setjmp (r->bail_out))
2233 casereader_force_error (reader);
2238 for (i = 0; i < r->sfm_var_cnt; i++)
2240 struct sfm_var *sv = &r->sfm_vars[i];
2241 union value *v = case_data_rw_idx (c, sv->case_index);
2243 if (sv->var_width == 0)
2245 if (!read_case_number (r, &v->f))
2250 uint8_t *s = value_str_rw (v, sv->var_width);
2251 if (!read_case_string (r, s + sv->offset, sv->segment_width))
2253 if (!skip_whole_strings (r, ROUND_DOWN (sv->padding, 8)))
2262 if (r->case_cnt != -1)
2263 read_error (reader, r);
2268 /* Issues an error that R ends in a partial record. */
2270 partial_record (struct sfm_reader *r)
2272 sys_error (r, r->pos, _("File ends in partial case."));
2275 /* Issues an error that an unspecified error occurred SFM, and
2278 read_error (struct casereader *r, const struct sfm_reader *sfm)
2280 msg (ME, _("Error reading case from file %s."), fh_get_name (sfm->fh));
2281 casereader_force_error (r);
2284 /* Reads a number from R and stores its value in *D.
2285 If R is compressed, reads a compressed number;
2286 otherwise, reads a number in the regular way.
2287 Returns true if successful, false if end of file is
2288 reached immediately. */
2290 read_case_number (struct sfm_reader *r, double *d)
2295 if (!try_read_bytes (r, number, sizeof number))
2297 float_convert (r->float_format, number, FLOAT_NATIVE_DOUBLE, d);
2301 return read_compressed_number (r, d);
2304 /* Reads LENGTH string bytes from R into S.
2305 Always reads a multiple of 8 bytes; if LENGTH is not a
2306 multiple of 8, then extra bytes are read and discarded without
2308 Reads compressed strings if S is compressed.
2309 Returns true if successful, false if end of file is
2310 reached immediately. */
2312 read_case_string (struct sfm_reader *r, uint8_t *s, size_t length)
2314 size_t whole = ROUND_DOWN (length, 8);
2315 size_t partial = length % 8;
2319 if (!read_whole_strings (r, s, whole))
2326 if (!read_whole_strings (r, bounce, sizeof bounce))
2332 memcpy (s + whole, bounce, partial);
2338 /* Reads and returns the next compression opcode from R. */
2340 read_opcode (struct sfm_reader *r)
2342 assert (r->compressed);
2346 if (r->opcode_idx >= sizeof r->opcodes)
2348 if (!try_read_bytes (r, r->opcodes, sizeof r->opcodes))
2352 opcode = r->opcodes[r->opcode_idx++];
2359 /* Reads a compressed number from R and stores its value in D.
2360 Returns true if successful, false if end of file is
2361 reached immediately. */
2363 read_compressed_number (struct sfm_reader *r, double *d)
2365 int opcode = read_opcode (r);
2373 *d = read_float (r);
2377 float_convert (r->float_format, " ", FLOAT_NATIVE_DOUBLE, d);
2378 if (!r->corruption_warning)
2380 r->corruption_warning = true;
2381 sys_warn (r, r->pos,
2382 _("Possible compressed data corruption: "
2383 "compressed spaces appear in numeric field."));
2392 *d = opcode - r->bias;
2399 /* Reads a compressed 8-byte string segment from R and stores it
2401 Returns true if successful, false if end of file is
2402 reached immediately. */
2404 read_compressed_string (struct sfm_reader *r, uint8_t *dst)
2406 int opcode = read_opcode (r);
2414 read_bytes (r, dst, 8);
2418 memset (dst, ' ', 8);
2423 double value = opcode - r->bias;
2424 float_convert (FLOAT_NATIVE_DOUBLE, &value, r->float_format, dst);
2427 /* This has actually been seen "in the wild". The submitter of the
2428 file that showed that the contents decoded as spaces, but they
2429 were at the end of the field so it's possible that the null
2430 bytes just acted as null terminators. */
2432 else if (!r->corruption_warning)
2434 r->corruption_warning = true;
2435 sys_warn (r, r->pos,
2436 _("Possible compressed data corruption: "
2437 "string contains compressed integer (opcode %d)."),
2447 /* Reads LENGTH string bytes from R into S.
2448 LENGTH must be a multiple of 8.
2449 Reads compressed strings if S is compressed.
2450 Returns true if successful, false if end of file is
2451 reached immediately. */
2453 read_whole_strings (struct sfm_reader *r, uint8_t *s, size_t length)
2455 assert (length % 8 == 0);
2457 return try_read_bytes (r, s, length);
2461 for (ofs = 0; ofs < length; ofs += 8)
2462 if (!read_compressed_string (r, s + ofs))
2472 /* Skips LENGTH string bytes from R.
2473 LENGTH must be a multiple of 8.
2474 (LENGTH is also limited to 1024, but that's only because the
2475 current caller never needs more than that many bytes.)
2476 Returns true if successful, false if end of file is
2477 reached immediately. */
2479 skip_whole_strings (struct sfm_reader *r, size_t length)
2481 uint8_t buffer[1024];
2482 assert (length < sizeof buffer);
2483 return read_whole_strings (r, buffer, length);
2486 /* Helpers for reading records that contain structured text
2489 /* Maximum number of warnings to issue for a single text
2491 #define MAX_TEXT_WARNINGS 5
2496 struct substring buffer; /* Record contents. */
2497 off_t start; /* Starting offset in file. */
2498 size_t pos; /* Current position in buffer. */
2499 int n_warnings; /* Number of warnings issued or suppressed. */
2500 bool recoded; /* Recoded into UTF-8? */
2503 static struct text_record *
2504 open_text_record (struct sfm_reader *r,
2505 const struct sfm_extension_record *record,
2506 bool recode_to_utf8)
2508 struct text_record *text;
2509 struct substring raw;
2511 text = pool_alloc (r->pool, sizeof *text);
2512 raw = ss_buffer (record->data, record->size * record->count);
2513 text->start = record->pos;
2514 text->buffer = (recode_to_utf8
2515 ? recode_substring_pool ("UTF-8", r->encoding, raw, r->pool)
2518 text->n_warnings = 0;
2519 text->recoded = recode_to_utf8;
2524 /* Closes TEXT, frees its storage, and issues a final warning
2525 about suppressed warnings if necesary. */
2527 close_text_record (struct sfm_reader *r, struct text_record *text)
2529 if (text->n_warnings > MAX_TEXT_WARNINGS)
2530 sys_warn (r, -1, _("Suppressed %d additional related warnings."),
2531 text->n_warnings - MAX_TEXT_WARNINGS);
2533 pool_free (r->pool, ss_data (text->buffer));
2536 /* Reads a variable=value pair from TEXT.
2537 Looks up the variable in DICT and stores it into *VAR.
2538 Stores a null-terminated value into *VALUE. */
2540 read_variable_to_value_pair (struct sfm_reader *r, struct dictionary *dict,
2541 struct text_record *text,
2542 struct variable **var, char **value)
2546 if (!text_read_short_name (r, dict, text, ss_cstr ("="), var))
2549 *value = text_get_token (text, ss_buffer ("\t\0", 2), NULL);
2553 text->pos += ss_span (ss_substr (text->buffer, text->pos, SIZE_MAX),
2554 ss_buffer ("\t\0", 2));
2562 text_read_variable_name (struct sfm_reader *r, struct dictionary *dict,
2563 struct text_record *text, struct substring delimiters,
2564 struct variable **var)
2568 name = text_get_token (text, delimiters, NULL);
2572 *var = dict_lookup_var (dict, name);
2576 text_warn (r, text, _("Dictionary record refers to unknown variable %s."),
2583 text_read_short_name (struct sfm_reader *r, struct dictionary *dict,
2584 struct text_record *text, struct substring delimiters,
2585 struct variable **var)
2587 char *short_name = text_get_token (text, delimiters, NULL);
2588 if (short_name == NULL)
2591 *var = dict_lookup_var (dict, short_name);
2593 text_warn (r, text, _("Dictionary record refers to unknown variable %s."),
2598 /* Displays a warning for the current file position, limiting the
2599 number to MAX_TEXT_WARNINGS for TEXT. */
2601 text_warn (struct sfm_reader *r, struct text_record *text,
2602 const char *format, ...)
2604 if (text->n_warnings++ < MAX_TEXT_WARNINGS)
2608 va_start (args, format);
2609 sys_msg (r, text->start + text->pos, MW, format, args);
2615 text_get_token (struct text_record *text, struct substring delimiters,
2618 struct substring token;
2621 if (!ss_tokenize (text->buffer, delimiters, &text->pos, &token))
2624 end = &ss_data (token)[ss_length (token)];
2625 if (delimiter != NULL)
2628 return ss_data (token);
2631 /* Reads a integer value expressed in decimal, then a space, then a string that
2632 consists of exactly as many bytes as specified by the integer, then a space,
2633 from TEXT. Returns the string, null-terminated, as a subset of TEXT's
2634 buffer (so the caller should not free the string). */
2636 text_parse_counted_string (struct sfm_reader *r, struct text_record *text)
2644 while (text->pos < text->buffer.length)
2646 int c = text->buffer.string[text->pos];
2647 if (c < '0' || c > '9')
2649 n = (n * 10) + (c - '0');
2652 if (text->pos >= text->buffer.length || start == text->pos)
2654 sys_warn (r, text->start,
2655 _("Expecting digit at offset %zu in MRSETS record."),
2660 if (!text_match (text, ' '))
2662 sys_warn (r, text->start,
2663 _("Expecting space at offset %zu in MRSETS record."),
2668 if (text->pos + n > text->buffer.length)
2670 sys_warn (r, text->start,
2671 _("%zu-byte string starting at offset %zu "
2672 "exceeds record length %zu."),
2673 n, text->pos, text->buffer.length);
2677 s = &text->buffer.string[text->pos];
2680 sys_warn (r, text->start,
2681 _("Expecting space at offset %zu following %zu-byte string."),
2691 text_match (struct text_record *text, char c)
2693 if (text->buffer.string[text->pos] == c)
2702 /* Returns the current byte offset (as converted to UTF-8, if it was converted)
2703 inside the TEXT's string. */
2705 text_pos (const struct text_record *text)
2711 text_get_all (const struct text_record *text)
2713 return text->buffer.string;
2718 /* Displays a corruption message. */
2720 sys_msg (struct sfm_reader *r, off_t offset,
2721 int class, const char *format, va_list args)
2726 ds_init_empty (&text);
2728 ds_put_format (&text, _("`%s' near offset 0x%llx: "),
2729 fh_get_file_name (r->fh), (long long int) offset);
2731 ds_put_format (&text, _("`%s': "), fh_get_file_name (r->fh));
2732 ds_put_vformat (&text, format, args);
2734 m.category = msg_class_to_category (class);
2735 m.severity = msg_class_to_severity (class);
2741 m.text = ds_cstr (&text);
2746 /* Displays a warning for offset OFFSET in the file. */
2748 sys_warn (struct sfm_reader *r, off_t offset, const char *format, ...)
2752 va_start (args, format);
2753 sys_msg (r, offset, MW, format, args);
2757 /* Displays an error for the current file position,
2758 marks it as in an error state,
2759 and aborts reading it using longjmp. */
2761 sys_error (struct sfm_reader *r, off_t offset, const char *format, ...)
2765 va_start (args, format);
2766 sys_msg (r, offset, ME, format, args);
2770 longjmp (r->bail_out, 1);
2773 /* Reads BYTE_CNT bytes into BUF.
2774 Returns true if exactly BYTE_CNT bytes are successfully read.
2775 Aborts if an I/O error or a partial read occurs.
2776 If EOF_IS_OK, then an immediate end-of-file causes false to be
2777 returned; otherwise, immediate end-of-file causes an abort
2780 read_bytes_internal (struct sfm_reader *r, bool eof_is_ok,
2781 void *buf, size_t byte_cnt)
2783 size_t bytes_read = fread (buf, 1, byte_cnt, r->file);
2784 r->pos += bytes_read;
2785 if (bytes_read == byte_cnt)
2787 else if (ferror (r->file))
2788 sys_error (r, r->pos, _("System error: %s."), strerror (errno));
2789 else if (!eof_is_ok || bytes_read != 0)
2790 sys_error (r, r->pos, _("Unexpected end of file."));
2795 /* Reads BYTE_CNT into BUF.
2796 Aborts upon I/O error or if end-of-file is encountered. */
2798 read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
2800 read_bytes_internal (r, false, buf, byte_cnt);
2803 /* Reads BYTE_CNT bytes into BUF.
2804 Returns true if exactly BYTE_CNT bytes are successfully read.
2805 Returns false if an immediate end-of-file is encountered.
2806 Aborts if an I/O error or a partial read occurs. */
2808 try_read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
2810 return read_bytes_internal (r, true, buf, byte_cnt);
2813 /* Reads a 32-bit signed integer from R and returns its value in
2816 read_int (struct sfm_reader *r)
2819 read_bytes (r, integer, sizeof integer);
2820 return integer_get (r->integer_format, integer, sizeof integer);
2823 /* Reads a 64-bit floating-point number from R and returns its
2824 value in host format. */
2826 read_float (struct sfm_reader *r)
2829 read_bytes (r, number, sizeof number);
2830 return float_get_double (r->float_format, number);
2834 parse_int (struct sfm_reader *r, const void *data, size_t ofs)
2836 return integer_get (r->integer_format, (const uint8_t *) data + ofs, 4);
2840 parse_float (struct sfm_reader *r, const void *data, size_t ofs)
2842 return float_get_double (r->float_format, (const uint8_t *) data + ofs);
2845 /* Reads exactly SIZE - 1 bytes into BUFFER
2846 and stores a null byte into BUFFER[SIZE - 1]. */
2848 read_string (struct sfm_reader *r, char *buffer, size_t size)
2851 read_bytes (r, buffer, size - 1);
2852 buffer[size - 1] = '\0';
2855 /* Skips BYTES bytes forward in R. */
2857 skip_bytes (struct sfm_reader *r, size_t bytes)
2862 size_t chunk = MIN (sizeof buffer, bytes);
2863 read_bytes (r, buffer, chunk);
2868 /* Returns a malloc()'d copy of S in which all lone CRs and CR LF pairs have
2869 been replaced by LFs.
2871 (A product that identifies itself as VOXCO INTERVIEWER 4.3 produces system
2872 files that use CR-only line ends in the file label and extra product
2875 fix_line_ends (const char *s)
2879 d = dst = xmalloc (strlen (s) + 1);
2897 static const struct casereader_class sys_file_casereader_class =
2899 sys_file_casereader_read,
2900 sys_file_casereader_destroy,