1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-2000, 2006-2007, 2009-2013 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "data/sys-file-reader.h"
20 #include "data/sys-file-private.h"
28 #include "data/attributes.h"
29 #include "data/case.h"
30 #include "data/casereader-provider.h"
31 #include "data/casereader.h"
32 #include "data/dictionary.h"
33 #include "data/file-handle-def.h"
34 #include "data/file-name.h"
35 #include "data/format.h"
36 #include "data/identifier.h"
37 #include "data/missing-values.h"
38 #include "data/mrset.h"
39 #include "data/short-names.h"
40 #include "data/value-labels.h"
41 #include "data/value.h"
42 #include "data/variable.h"
43 #include "libpspp/array.h"
44 #include "libpspp/assertion.h"
45 #include "libpspp/compiler.h"
46 #include "libpspp/i18n.h"
47 #include "libpspp/message.h"
48 #include "libpspp/misc.h"
49 #include "libpspp/pool.h"
50 #include "libpspp/str.h"
51 #include "libpspp/stringi-set.h"
53 #include "gl/c-strtod.h"
54 #include "gl/c-ctype.h"
55 #include "gl/inttostr.h"
56 #include "gl/localcharset.h"
57 #include "gl/minmax.h"
58 #include "gl/unlocked-io.h"
59 #include "gl/xalloc.h"
63 #define _(msgid) gettext (msgid)
64 #define N_(msgid) (msgid)
68 /* subtypes 0-2 unknown */
69 EXT_INTEGER = 3, /* Machine integer info. */
70 EXT_FLOAT = 4, /* Machine floating-point info. */
71 EXT_VAR_SETS = 5, /* Variable sets. */
72 EXT_DATE = 6, /* DATE. */
73 EXT_MRSETS = 7, /* Multiple response sets. */
74 EXT_DATA_ENTRY = 8, /* SPSS Data Entry. */
75 /* subtype 9 unknown */
76 EXT_PRODUCT_INFO = 10, /* Extra product info text. */
77 EXT_DISPLAY = 11, /* Variable display parameters. */
78 /* subtype 12 unknown */
79 EXT_LONG_NAMES = 13, /* Long variable names. */
80 EXT_LONG_STRINGS = 14, /* Long strings. */
81 /* subtype 15 unknown */
82 EXT_NCASES = 16, /* Extended number of cases. */
83 EXT_FILE_ATTRS = 17, /* Data file attributes. */
84 EXT_VAR_ATTRS = 18, /* Variable attributes. */
85 EXT_MRSETS2 = 19, /* Multiple response sets (extended). */
86 EXT_ENCODING = 20, /* Character encoding. */
87 EXT_LONG_LABELS = 21, /* Value labels for long strings. */
88 EXT_DATAVIEW = 24 /* "Format properties in dataview table". */
91 /* Fields from the top-level header record. */
92 struct sfm_header_record
94 char magic[5]; /* First 4 bytes of file, then null. */
95 int weight_idx; /* 0 if unweighted, otherwise a var index. */
96 int nominal_case_size; /* Number of var positions. */
98 /* These correspond to the members of struct sfm_file_info or a dictionary
99 but in the system file's encoding rather than ASCII. */
100 char creation_date[10]; /* "dd mmm yy". */
101 char creation_time[9]; /* "hh:mm:ss". */
102 char eye_catcher[61]; /* Eye-catcher string, then product name. */
103 char file_label[65]; /* File label. */
106 struct sfm_var_record
113 int missing_value_code;
116 struct variable *var;
119 struct sfm_value_label
125 struct sfm_value_label_record
128 struct sfm_value_label *labels;
135 struct sfm_document_record
142 struct sfm_extension_record
144 off_t pos; /* Starting offset in file. */
145 size_t size; /* Size of data elements. */
146 size_t count; /* Number of data elements. */
147 void *data; /* Contents. */
150 /* System file reader. */
153 /* Resource tracking. */
154 struct pool *pool; /* All system file state. */
155 jmp_buf bail_out; /* longjmp() target for error handling. */
158 struct file_handle *fh; /* File handle. */
159 struct fh_lock *lock; /* Mutual exclusion for file handle. */
160 FILE *file; /* File stream. */
161 off_t pos; /* Position in file. */
162 bool error; /* I/O or corruption error? */
163 struct caseproto *proto; /* Format of output cases. */
166 enum integer_format integer_format; /* On-disk integer format. */
167 enum float_format float_format; /* On-disk floating point format. */
168 struct sfm_var *sfm_vars; /* Variables. */
169 size_t sfm_var_cnt; /* Number of variables. */
170 casenumber case_cnt; /* Number of cases */
171 const char *encoding; /* String encoding. */
174 bool compressed; /* File is compressed? */
175 double bias; /* Compression bias, usually 100.0. */
176 uint8_t opcodes[8]; /* Current block of opcodes. */
177 size_t opcode_idx; /* Next opcode to interpret, 8 if none left. */
178 bool corruption_warning; /* Warned about possible corruption? */
181 static const struct casereader_class sys_file_casereader_class;
183 static bool close_reader (struct sfm_reader *);
185 static struct variable *lookup_var_by_index (struct sfm_reader *, off_t,
186 const struct sfm_var_record *,
189 static void sys_msg (struct sfm_reader *r, off_t, int class,
190 const char *format, va_list args)
191 PRINTF_FORMAT (4, 0);
192 static void sys_warn (struct sfm_reader *, off_t, const char *, ...)
193 PRINTF_FORMAT (3, 4);
194 static void sys_error (struct sfm_reader *, off_t, const char *, ...)
198 static void read_bytes (struct sfm_reader *, void *, size_t);
199 static bool try_read_bytes (struct sfm_reader *, void *, size_t);
200 static int read_int (struct sfm_reader *);
201 static double read_float (struct sfm_reader *);
202 static void read_string (struct sfm_reader *, char *, size_t);
203 static void skip_bytes (struct sfm_reader *, size_t);
205 static char *fix_line_ends (const char *);
207 static int parse_int (struct sfm_reader *, const void *data, size_t ofs);
208 static double parse_float (struct sfm_reader *, const void *data, size_t ofs);
210 static void read_variable_record (struct sfm_reader *,
211 struct sfm_var_record *);
212 static void read_value_label_record (struct sfm_reader *,
213 struct sfm_value_label_record *,
215 static struct sfm_document_record *read_document_record (struct sfm_reader *);
216 static struct sfm_extension_record *read_extension_record (
217 struct sfm_reader *, int subtype);
218 static void skip_extension_record (struct sfm_reader *, int subtype);
220 static const char *choose_encoding (
222 const struct sfm_header_record *,
223 const struct sfm_extension_record *ext_integer,
224 const struct sfm_extension_record *ext_encoding);
226 static struct text_record *open_text_record (
227 struct sfm_reader *, const struct sfm_extension_record *,
228 bool recode_to_utf8);
229 static void close_text_record (struct sfm_reader *,
230 struct text_record *);
231 static bool read_variable_to_value_pair (struct sfm_reader *,
233 struct text_record *,
234 struct variable **var, char **value);
235 static void text_warn (struct sfm_reader *r, struct text_record *text,
236 const char *format, ...)
237 PRINTF_FORMAT (3, 4);
238 static char *text_get_token (struct text_record *,
239 struct substring delimiters, char *delimiter);
240 static bool text_match (struct text_record *, char c);
241 static bool text_read_variable_name (struct sfm_reader *, struct dictionary *,
242 struct text_record *,
243 struct substring delimiters,
245 static bool text_read_short_name (struct sfm_reader *, struct dictionary *,
246 struct text_record *,
247 struct substring delimiters,
249 static const char *text_parse_counted_string (struct sfm_reader *,
250 struct text_record *);
251 static size_t text_pos (const struct text_record *);
252 static const char *text_get_all (const struct text_record *);
254 static bool close_reader (struct sfm_reader *r);
256 /* Dictionary reader. */
264 static void read_header (struct sfm_reader *, struct sfm_read_info *,
265 struct sfm_header_record *);
266 static void parse_header (struct sfm_reader *,
267 const struct sfm_header_record *,
268 struct sfm_read_info *, struct dictionary *);
269 static void parse_variable_records (struct sfm_reader *, struct dictionary *,
270 struct sfm_var_record *, size_t n);
271 static void parse_format_spec (struct sfm_reader *, off_t pos,
272 unsigned int format, enum which_format,
273 struct variable *, int *format_warning_cnt);
274 static void parse_document (struct dictionary *, struct sfm_document_record *);
275 static void parse_display_parameters (struct sfm_reader *,
276 const struct sfm_extension_record *,
277 struct dictionary *);
278 static void parse_machine_integer_info (struct sfm_reader *,
279 const struct sfm_extension_record *,
280 struct sfm_read_info *);
281 static void parse_machine_float_info (struct sfm_reader *,
282 const struct sfm_extension_record *);
283 static void parse_extra_product_info (struct sfm_reader *,
284 const struct sfm_extension_record *,
285 struct sfm_read_info *);
286 static void parse_mrsets (struct sfm_reader *,
287 const struct sfm_extension_record *,
288 struct dictionary *);
289 static void parse_long_var_name_map (struct sfm_reader *,
290 const struct sfm_extension_record *,
291 struct dictionary *);
292 static void parse_long_string_map (struct sfm_reader *,
293 const struct sfm_extension_record *,
294 struct dictionary *);
295 static void parse_value_labels (struct sfm_reader *, struct dictionary *,
296 const struct sfm_var_record *,
298 const struct sfm_value_label_record *);
299 static void parse_data_file_attributes (struct sfm_reader *,
300 const struct sfm_extension_record *,
301 struct dictionary *);
302 static void parse_variable_attributes (struct sfm_reader *,
303 const struct sfm_extension_record *,
304 struct dictionary *);
305 static void assign_variable_roles (struct sfm_reader *, struct dictionary *);
306 static void parse_long_string_value_labels (struct sfm_reader *,
307 const struct sfm_extension_record *,
308 struct dictionary *);
310 /* Frees the strings inside INFO. */
312 sfm_read_info_destroy (struct sfm_read_info *info)
316 free (info->creation_date);
317 free (info->creation_time);
318 free (info->product);
319 free (info->product_ext);
323 /* Opens the system file designated by file handle FH for reading. Reads the
324 system file's dictionary into *DICT.
326 Ordinarily the reader attempts to automatically detect the character
327 encoding based on the file's contents. This isn't always possible,
328 especially for files written by old versions of SPSS or PSPP, so specifying
329 a nonnull ENCODING overrides the choice of character encoding.
331 If INFO is non-null, then it receives additional info about the system file,
332 which the caller must eventually free with sfm_read_info_destroy() when it
333 is no longer needed. */
335 sfm_open_reader (struct file_handle *fh, const char *volatile encoding,
336 struct dictionary **dictp, struct sfm_read_info *infop)
338 struct sfm_reader *volatile r = NULL;
339 struct sfm_read_info *volatile info;
341 struct sfm_header_record header;
343 struct sfm_var_record *vars;
344 size_t n_vars, allocated_vars;
346 struct sfm_value_label_record *labels;
347 size_t n_labels, allocated_labels;
349 struct sfm_document_record *document;
351 struct sfm_extension_record *extensions[32];
353 struct dictionary *volatile dict = NULL;
356 /* Create and initialize reader. */
357 r = pool_create_container (struct sfm_reader, pool);
363 r->opcode_idx = sizeof r->opcodes;
364 r->corruption_warning = false;
366 info = infop ? infop : xmalloc (sizeof *info);
367 memset (info, 0, sizeof *info);
369 /* TRANSLATORS: this fragment will be interpolated into
370 messages in fh_lock() that identify types of files. */
371 r->lock = fh_lock (fh, FH_REF_FILE, N_("system file"), FH_ACC_READ, false);
375 r->file = fn_open (fh_get_file_name (fh), "rb");
378 msg (ME, _("Error opening `%s' for reading as a system file: %s."),
379 fh_get_file_name (r->fh), strerror (errno));
383 if (setjmp (r->bail_out))
387 read_header (r, info, &header);
390 n_vars = allocated_vars = 0;
393 n_labels = allocated_labels = 0;
397 memset (extensions, 0, sizeof extensions);
407 read_int (r); /* Skip filler. */
414 if (n_vars >= allocated_vars)
415 vars = pool_2nrealloc (r->pool, vars, &allocated_vars,
417 read_variable_record (r, &vars[n_vars++]);
421 if (n_labels >= allocated_labels)
422 labels = pool_2nrealloc (r->pool, labels, &allocated_labels,
424 read_value_label_record (r, &labels[n_labels++], n_vars);
428 /* A Type 4 record is always immediately after a type 3 record,
429 so the code for type 3 records reads the type 4 record too. */
430 sys_error (r, r->pos, _("Misplaced type 4 record."));
433 if (document != NULL)
434 sys_error (r, r->pos, _("Duplicate type 6 (document) record."));
435 document = read_document_record (r);
439 subtype = read_int (r);
440 if (subtype < 0 || subtype >= sizeof extensions / sizeof *extensions)
443 _("Unrecognized record type 7, subtype %d. Please "
444 "send a copy of this file, and the syntax which "
445 "created it to %s."),
446 subtype, PACKAGE_BUGREPORT);
447 skip_extension_record (r, subtype);
449 else if (extensions[subtype] != NULL)
452 _("Record type 7, subtype %d found here has the same "
453 "type as the record found near offset 0x%llx. "
454 "Please send a copy of this file, and the syntax "
455 "which created it to %s."),
456 subtype, (long long int) extensions[subtype]->pos,
458 skip_extension_record (r, subtype);
461 extensions[subtype] = read_extension_record (r, subtype);
465 sys_error (r, r->pos, _("Unrecognized record type %d."), type);
470 /* Now actually parse what we read.
472 First, figure out the correct character encoding, because this determines
473 how the rest of the header data is to be interpreted. */
474 dict = dict_create (encoding
476 : choose_encoding (r, &header, extensions[EXT_INTEGER],
477 extensions[EXT_ENCODING]));
478 r->encoding = dict_get_encoding (dict);
480 /* These records don't use variables at all. */
481 if (document != NULL)
482 parse_document (dict, document);
484 if (extensions[EXT_INTEGER] != NULL)
485 parse_machine_integer_info (r, extensions[EXT_INTEGER], info);
487 if (extensions[EXT_FLOAT] != NULL)
488 parse_machine_float_info (r, extensions[EXT_FLOAT]);
490 if (extensions[EXT_PRODUCT_INFO] != NULL)
491 parse_extra_product_info (r, extensions[EXT_PRODUCT_INFO], info);
493 if (extensions[EXT_FILE_ATTRS] != NULL)
494 parse_data_file_attributes (r, extensions[EXT_FILE_ATTRS], dict);
496 parse_header (r, &header, info, dict);
498 /* Parse the variable records, the basis of almost everything else. */
499 parse_variable_records (r, dict, vars, n_vars);
501 /* Parse value labels and the weight variable immediately after the variable
502 records. These records use indexes into var_recs[], so we must parse them
503 before those indexes become invalidated by very long string variables. */
504 for (i = 0; i < n_labels; i++)
505 parse_value_labels (r, dict, vars, n_vars, &labels[i]);
506 if (header.weight_idx != 0)
508 struct variable *weight_var;
510 weight_var = lookup_var_by_index (r, 76, vars, n_vars,
512 if (var_is_numeric (weight_var))
513 dict_set_weight (dict, weight_var);
515 sys_error (r, -1, _("Weighting variable must be numeric "
516 "(not string variable `%s')."),
517 var_get_name (weight_var));
520 if (extensions[EXT_DISPLAY] != NULL)
521 parse_display_parameters (r, extensions[EXT_DISPLAY], dict);
523 /* The following records use short names, so they need to be parsed before
524 parse_long_var_name_map() changes short names to long names. */
525 if (extensions[EXT_MRSETS] != NULL)
526 parse_mrsets (r, extensions[EXT_MRSETS], dict);
528 if (extensions[EXT_MRSETS2] != NULL)
529 parse_mrsets (r, extensions[EXT_MRSETS2], dict);
531 if (extensions[EXT_LONG_STRINGS] != NULL)
532 parse_long_string_map (r, extensions[EXT_LONG_STRINGS], dict);
534 /* Now rename variables to their long names. */
535 parse_long_var_name_map (r, extensions[EXT_LONG_NAMES], dict);
537 /* The following records use long names, so they need to follow renaming. */
538 if (extensions[EXT_VAR_ATTRS] != NULL)
540 parse_variable_attributes (r, extensions[EXT_VAR_ATTRS], dict);
542 /* Roles use the $@Role attribute. */
543 assign_variable_roles (r, dict);
546 if (extensions[EXT_LONG_LABELS] != NULL)
547 parse_long_string_value_labels (r, extensions[EXT_LONG_LABELS], dict);
549 /* Warn if the actual amount of data per case differs from the
550 amount that the header claims. SPSS version 13 gets this
551 wrong when very long strings are involved, so don't warn in
553 if (header.nominal_case_size != -1 && header.nominal_case_size != n_vars
554 && info->version_major != 13)
555 sys_warn (r, -1, _("File header claims %d variable positions but "
556 "%zu were read from file."),
557 header.nominal_case_size, n_vars);
559 /* Create an index of dictionary variable widths for
560 sfm_read_case to use. We cannot use the `struct variable's
561 from the dictionary we created, because the caller owns the
562 dictionary and may destroy or modify its variables. */
563 sfm_dictionary_to_sfm_vars (dict, &r->sfm_vars, &r->sfm_var_cnt);
564 pool_register (r->pool, free, r->sfm_vars);
565 r->proto = caseproto_ref_pool (dict_get_proto (dict), r->pool);
570 sfm_read_info_destroy (info);
574 return casereader_create_sequential
576 r->case_cnt == -1 ? CASENUMBER_MAX: r->case_cnt,
577 &sys_file_casereader_class, r);
582 sfm_read_info_destroy (info);
592 /* Closes a system file after we're done with it.
593 Returns true if an I/O error has occurred on READER, false
596 close_reader (struct sfm_reader *r)
605 if (fn_close (fh_get_file_name (r->fh), r->file) == EOF)
607 msg (ME, _("Error closing system file `%s': %s."),
608 fh_get_file_name (r->fh), strerror (errno));
618 pool_destroy (r->pool);
623 /* Destroys READER. */
625 sys_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
627 struct sfm_reader *r = r_;
631 /* Returns true if FILE is an SPSS system file,
634 sfm_detect (FILE *file)
638 if (fread (magic, 4, 1, file) != 1)
642 return !strcmp (ASCII_MAGIC, magic) || !strcmp (EBCDIC_MAGIC, magic);
645 /* Reads the global header of the system file. Initializes *HEADER and *INFO,
646 except for the string fields in *INFO, which parse_header() will initialize
647 later once the file's encoding is known. */
649 read_header (struct sfm_reader *r, struct sfm_read_info *info,
650 struct sfm_header_record *header)
652 uint8_t raw_layout_code[4];
655 read_string (r, header->magic, sizeof header->magic);
656 read_string (r, header->eye_catcher, sizeof header->eye_catcher);
658 if (strcmp (ASCII_MAGIC, header->magic)
659 && strcmp (EBCDIC_MAGIC, header->magic))
660 sys_error (r, 0, _("This is not an SPSS system file."));
662 /* Identify integer format. */
663 read_bytes (r, raw_layout_code, sizeof raw_layout_code);
664 if ((!integer_identify (2, raw_layout_code, sizeof raw_layout_code,
666 && !integer_identify (3, raw_layout_code, sizeof raw_layout_code,
668 || (r->integer_format != INTEGER_MSB_FIRST
669 && r->integer_format != INTEGER_LSB_FIRST))
670 sys_error (r, 64, _("This is not an SPSS system file."));
672 header->nominal_case_size = read_int (r);
673 if (header->nominal_case_size < 0
674 || header->nominal_case_size > INT_MAX / 16)
675 header->nominal_case_size = -1;
677 r->compressed = read_int (r) != 0;
679 header->weight_idx = read_int (r);
681 r->case_cnt = read_int (r);
682 if ( r->case_cnt > INT_MAX / 2)
685 /* Identify floating-point format and obtain compression bias. */
686 read_bytes (r, raw_bias, sizeof raw_bias);
687 if (float_identify (100.0, raw_bias, sizeof raw_bias, &r->float_format) == 0)
689 uint8_t zero_bias[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
691 if (memcmp (raw_bias, zero_bias, 8))
692 sys_warn (r, r->pos - 8,
693 _("Compression bias is not the usual "
694 "value of 100, or system file uses unrecognized "
695 "floating-point format."));
698 /* Some software is known to write all-zeros to this
699 field. Such software also writes floating-point
700 numbers in the format that we expect by default
701 (it seems that all software most likely does, in
702 reality), so don't warn in this case. */
705 if (r->integer_format == INTEGER_MSB_FIRST)
706 r->float_format = FLOAT_IEEE_DOUBLE_BE;
708 r->float_format = FLOAT_IEEE_DOUBLE_LE;
710 float_convert (r->float_format, raw_bias, FLOAT_NATIVE_DOUBLE, &r->bias);
712 read_string (r, header->creation_date, sizeof header->creation_date);
713 read_string (r, header->creation_time, sizeof header->creation_time);
714 read_string (r, header->file_label, sizeof header->file_label);
717 info->integer_format = r->integer_format;
718 info->float_format = r->float_format;
719 info->compressed = r->compressed;
720 info->case_cnt = r->case_cnt;
723 /* Reads a variable (type 2) record from R into RECORD. */
725 read_variable_record (struct sfm_reader *r, struct sfm_var_record *record)
727 int has_variable_label;
729 memset (record, 0, sizeof *record);
731 record->pos = r->pos;
732 record->width = read_int (r);
733 has_variable_label = read_int (r);
734 record->missing_value_code = read_int (r);
735 record->print_format = read_int (r);
736 record->write_format = read_int (r);
737 read_bytes (r, record->name, sizeof record->name);
739 if (has_variable_label == 1)
741 enum { MAX_LABEL_LEN = 255 };
742 size_t len, read_len;
746 /* Read up to MAX_LABEL_LEN bytes of label. */
747 read_len = MIN (MAX_LABEL_LEN, len);
748 record->label = pool_malloc (r->pool, read_len + 1);
749 read_string (r, record->label, read_len + 1);
751 /* Skip unread label bytes. */
752 skip_bytes (r, len - read_len);
754 /* Skip label padding up to multiple of 4 bytes. */
755 skip_bytes (r, ROUND_UP (len, 4) - len);
757 else if (has_variable_label != 0)
758 sys_error (r, record->pos,
759 _("Variable label indicator field is not 0 or 1."));
761 /* Set missing values. */
762 if (record->missing_value_code != 0)
764 int code = record->missing_value_code;
765 if (record->width == 0)
767 if (code < -3 || code > 3 || code == -1)
768 sys_error (r, record->pos,
769 _("Numeric missing value indicator field is not "
770 "-3, -2, 0, 1, 2, or 3."));
774 if (code < 1 || code > 3)
775 sys_error (r, record->pos,
776 _("String missing value indicator field is not "
780 read_bytes (r, record->missing, 8 * abs (code));
784 /* Reads value labels from R into RECORD. */
786 read_value_label_record (struct sfm_reader *r,
787 struct sfm_value_label_record *record,
792 /* Read type 3 record. */
793 record->pos = r->pos;
794 record->n_labels = read_int (r);
795 if (record->n_labels > SIZE_MAX / sizeof *record->labels)
796 sys_error (r, r->pos - 4, _("Invalid number of labels %zu."),
798 record->labels = pool_nmalloc (r->pool, record->n_labels,
799 sizeof *record->labels);
800 for (i = 0; i < record->n_labels; i++)
802 struct sfm_value_label *label = &record->labels[i];
803 unsigned char label_len;
806 read_bytes (r, label->value, sizeof label->value);
808 /* Read label length. */
809 read_bytes (r, &label_len, sizeof label_len);
810 padded_len = ROUND_UP (label_len + 1, 8);
812 /* Read label, padding. */
813 label->label = pool_malloc (r->pool, padded_len + 1);
814 read_bytes (r, label->label, padded_len - 1);
815 label->label[label_len] = '\0';
818 /* Read record type of type 4 record. */
819 if (read_int (r) != 4)
820 sys_error (r, r->pos - 4,
821 _("Variable index record (type 4) does not immediately "
822 "follow value label record (type 3) as it should."));
824 /* Read number of variables associated with value label from type 4
826 record->n_vars = read_int (r);
827 if (record->n_vars < 1 || record->n_vars > n_vars)
828 sys_error (r, r->pos - 4,
829 _("Number of variables associated with a value label (%zu) "
830 "is not between 1 and the number of variables (%zu)."),
831 record->n_vars, n_vars);
832 record->vars = pool_nmalloc (r->pool, record->n_vars, sizeof *record->vars);
833 for (i = 0; i < record->n_vars; i++)
834 record->vars[i] = read_int (r);
837 /* Reads a document record from R and returns it. */
838 static struct sfm_document_record *
839 read_document_record (struct sfm_reader *r)
841 struct sfm_document_record *record;
844 record = pool_malloc (r->pool, sizeof *record);
845 record->pos = r->pos;
847 n_lines = read_int (r);
848 if (n_lines <= 0 || n_lines >= INT_MAX / DOC_LINE_LENGTH)
849 sys_error (r, record->pos,
850 _("Number of document lines (%d) "
851 "must be greater than 0 and less than %d."),
852 n_lines, INT_MAX / DOC_LINE_LENGTH);
854 record->n_lines = n_lines;
855 record->documents = pool_malloc (r->pool, DOC_LINE_LENGTH * n_lines);
856 read_bytes (r, record->documents, DOC_LINE_LENGTH * n_lines);
862 read_extension_record_header (struct sfm_reader *r, int subtype,
863 struct sfm_extension_record *record)
865 record->pos = r->pos;
866 record->size = read_int (r);
867 record->count = read_int (r);
869 /* Check that SIZE * COUNT + 1 doesn't overflow. Adding 1
870 allows an extra byte for a null terminator, used by some
871 extension processing routines. */
872 if (record->size != 0
873 && size_overflow_p (xsum (1, xtimes (record->count, record->size))))
874 sys_error (r, record->pos, "Record type 7 subtype %d too large.", subtype);
877 /* Reads an extension record from R into RECORD. */
878 static struct sfm_extension_record *
879 read_extension_record (struct sfm_reader *r, int subtype)
881 struct extension_record_type
888 static const struct extension_record_type types[] =
890 /* Implemented record types. */
891 { EXT_INTEGER, 4, 8 },
893 { EXT_MRSETS, 1, 0 },
894 { EXT_PRODUCT_INFO, 1, 0 },
895 { EXT_DISPLAY, 4, 0 },
896 { EXT_LONG_NAMES, 1, 0 },
897 { EXT_LONG_STRINGS, 1, 0 },
898 { EXT_NCASES, 8, 2 },
899 { EXT_FILE_ATTRS, 1, 0 },
900 { EXT_VAR_ATTRS, 1, 0 },
901 { EXT_MRSETS2, 1, 0 },
902 { EXT_ENCODING, 1, 0 },
903 { EXT_LONG_LABELS, 1, 0 },
905 /* Ignored record types. */
906 { EXT_VAR_SETS, 0, 0 },
908 { EXT_DATA_ENTRY, 0, 0 },
909 { EXT_DATAVIEW, 0, 0 },
912 const struct extension_record_type *type;
913 struct sfm_extension_record *record;
916 record = pool_malloc (r->pool, sizeof *record);
917 read_extension_record_header (r, subtype, record);
918 n_bytes = record->count * record->size;
920 for (type = types; type < &types[sizeof types / sizeof *types]; type++)
921 if (subtype == type->subtype)
923 if (type->size > 0 && record->size != type->size)
924 sys_warn (r, record->pos,
925 _("Record type 7, subtype %d has bad size %zu "
926 "(expected %d)."), subtype, record->size, type->size);
927 else if (type->count > 0 && record->count != type->count)
928 sys_warn (r, record->pos,
929 _("Record type 7, subtype %d has bad count %zu "
930 "(expected %d)."), subtype, record->count, type->count);
931 else if (type->count == 0 && type->size == 0)
933 /* Ignore this record. */
937 char *data = pool_malloc (r->pool, n_bytes + 1);
938 data[n_bytes] = '\0';
941 read_bytes (r, record->data, n_bytes);
948 sys_warn (r, record->pos,
949 _("Unrecognized record type 7, subtype %d. Please send a "
950 "copy of this file, and the syntax which created it to %s."),
951 subtype, PACKAGE_BUGREPORT);
954 skip_bytes (r, n_bytes);
959 skip_extension_record (struct sfm_reader *r, int subtype)
961 struct sfm_extension_record record;
963 read_extension_record_header (r, subtype, &record);
964 skip_bytes (r, record.count * record.size);
968 parse_header (struct sfm_reader *r, const struct sfm_header_record *header,
969 struct sfm_read_info *info, struct dictionary *dict)
971 const char *dict_encoding = dict_get_encoding (dict);
972 struct substring product;
973 struct substring label;
976 /* Convert file label to UTF-8 and put it into DICT. */
977 label = recode_substring_pool ("UTF-8", dict_encoding,
978 ss_cstr (header->file_label), r->pool);
979 ss_trim (&label, ss_cstr (" "));
980 label.string[label.length] = '\0';
981 fixed_label = fix_line_ends (label.string);
982 dict_set_label (dict, fixed_label);
985 /* Put creation date and time in UTF-8 into INFO. */
986 info->creation_date = recode_string ("UTF-8", dict_encoding,
987 header->creation_date, -1);
988 info->creation_time = recode_string ("UTF-8", dict_encoding,
989 header->creation_time, -1);
991 /* Put product name into INFO, dropping eye-catcher string if present. */
992 product = recode_substring_pool ("UTF-8", dict_encoding,
993 ss_cstr (header->eye_catcher), r->pool);
994 ss_match_string (&product, ss_cstr ("@(#) SPSS DATA FILE"));
995 ss_trim (&product, ss_cstr (" "));
996 info->product = ss_xstrdup (product);
999 /* Reads a variable (type 2) record from R and adds the
1000 corresponding variable to DICT.
1001 Also skips past additional variable records for long string
1004 parse_variable_records (struct sfm_reader *r, struct dictionary *dict,
1005 struct sfm_var_record *var_recs, size_t n_var_recs)
1007 const char *dict_encoding = dict_get_encoding (dict);
1008 struct sfm_var_record *rec;
1011 for (rec = var_recs; rec < &var_recs[n_var_recs]; )
1013 struct variable *var;
1018 name = recode_string_pool ("UTF-8", dict_encoding,
1019 rec->name, 8, r->pool);
1020 name[strcspn (name, " ")] = '\0';
1022 if (!dict_id_is_valid (dict, name, false)
1023 || name[0] == '$' || name[0] == '#')
1024 sys_error (r, rec->pos, _("Invalid variable name `%s'."), name);
1026 if (rec->width < 0 || rec->width > 255)
1027 sys_error (r, rec->pos,
1028 _("Bad width %d for variable %s."), rec->width, name);
1030 var = rec->var = dict_create_var (dict, name, rec->width);
1032 sys_error (r, rec->pos, _("Duplicate variable name `%s'."), name);
1034 /* Set the short name the same as the long name. */
1035 var_set_short_name (var, 0, name);
1037 /* Get variable label, if any. */
1042 utf8_label = recode_string_pool ("UTF-8", dict_encoding,
1043 rec->label, -1, r->pool);
1044 var_set_label (var, utf8_label, false);
1047 /* Set missing values. */
1048 if (rec->missing_value_code != 0)
1050 int width = var_get_width (var);
1051 struct missing_values mv;
1053 mv_init_pool (r->pool, &mv, width);
1054 if (var_is_numeric (var))
1056 bool has_range = rec->missing_value_code < 0;
1057 int n_discrete = (has_range
1058 ? rec->missing_value_code == -3
1059 : rec->missing_value_code);
1064 double low = parse_float (r, rec->missing, 0);
1065 double high = parse_float (r, rec->missing, 8);
1067 /* Deal with SPSS 21 change in representation. */
1071 mv_add_range (&mv, low, high);
1075 for (i = 0; i < n_discrete; i++)
1077 mv_add_num (&mv, parse_float (r, rec->missing, ofs));
1085 value_init_pool (r->pool, &value, width);
1086 value_set_missing (&value, width);
1087 for (i = 0; i < rec->missing_value_code; i++)
1089 uint8_t *s = value_str_rw (&value, width);
1090 memcpy (s, rec->missing + 8 * i, MIN (width, 8));
1091 mv_add_str (&mv, s);
1094 var_set_missing_values (var, &mv);
1098 parse_format_spec (r, rec->pos + 12, rec->print_format,
1099 PRINT_FORMAT, var, &n_warnings);
1100 parse_format_spec (r, rec->pos + 16, rec->write_format,
1101 WRITE_FORMAT, var, &n_warnings);
1103 /* Account for values.
1104 Skip long string continuation records, if any. */
1105 n_values = rec->width == 0 ? 1 : DIV_RND_UP (rec->width, 8);
1106 for (i = 1; i < n_values; i++)
1107 if (i + (rec - var_recs) >= n_var_recs || rec[i].width != -1)
1108 sys_error (r, rec->pos, _("Missing string continuation record."));
1113 /* Translates the format spec from sysfile format to internal
1116 parse_format_spec (struct sfm_reader *r, off_t pos, unsigned int format,
1117 enum which_format which, struct variable *v,
1120 const int max_warnings = 8;
1121 uint8_t raw_type = format >> 16;
1122 uint8_t w = format >> 8;
1131 ok = (fmt_from_io (raw_type, &f.type)
1132 && fmt_check_output (&f)
1133 && fmt_check_width_compat (&f, var_get_width (v)));
1138 if (which == PRINT_FORMAT)
1139 var_set_print_format (v, &f);
1141 var_set_write_format (v, &f);
1143 else if (format == 0)
1145 /* Actually observed in the wild. No point in warning about it. */
1147 else if (++*n_warnings <= max_warnings)
1149 if (which == PRINT_FORMAT)
1150 sys_warn (r, pos, _("Variable %s with width %d has invalid print "
1152 var_get_name (v), var_get_width (v), format);
1154 sys_warn (r, pos, _("Variable %s with width %d has invalid write "
1156 var_get_name (v), var_get_width (v), format);
1158 if (*n_warnings == max_warnings)
1159 sys_warn (r, -1, _("Suppressing further invalid format warnings."));
1164 parse_document (struct dictionary *dict, struct sfm_document_record *record)
1168 for (p = record->documents;
1169 p < record->documents + DOC_LINE_LENGTH * record->n_lines;
1170 p += DOC_LINE_LENGTH)
1172 struct substring line;
1174 line = recode_substring_pool ("UTF-8", dict_get_encoding (dict),
1175 ss_buffer (p, DOC_LINE_LENGTH), NULL);
1176 ss_rtrim (&line, ss_cstr (" "));
1177 line.string[line.length] = '\0';
1179 dict_add_document_line (dict, line.string, false);
1185 /* Parses record type 7, subtype 3. */
1187 parse_machine_integer_info (struct sfm_reader *r,
1188 const struct sfm_extension_record *record,
1189 struct sfm_read_info *info)
1191 int float_representation, expected_float_format;
1192 int integer_representation, expected_integer_format;
1194 /* Save version info. */
1195 info->version_major = parse_int (r, record->data, 0);
1196 info->version_minor = parse_int (r, record->data, 4);
1197 info->version_revision = parse_int (r, record->data, 8);
1199 /* Check floating point format. */
1200 float_representation = parse_int (r, record->data, 16);
1201 if (r->float_format == FLOAT_IEEE_DOUBLE_BE
1202 || r->float_format == FLOAT_IEEE_DOUBLE_LE)
1203 expected_float_format = 1;
1204 else if (r->float_format == FLOAT_Z_LONG)
1205 expected_float_format = 2;
1206 else if (r->float_format == FLOAT_VAX_G || r->float_format == FLOAT_VAX_D)
1207 expected_float_format = 3;
1210 if (float_representation != expected_float_format)
1211 sys_error (r, record->pos, _("Floating-point representation indicated by "
1212 "system file (%d) differs from expected (%d)."),
1213 float_representation, expected_float_format);
1215 /* Check integer format. */
1216 integer_representation = parse_int (r, record->data, 24);
1217 if (r->integer_format == INTEGER_MSB_FIRST)
1218 expected_integer_format = 1;
1219 else if (r->integer_format == INTEGER_LSB_FIRST)
1220 expected_integer_format = 2;
1223 if (integer_representation != expected_integer_format)
1224 sys_warn (r, record->pos,
1225 _("Integer format indicated by system file (%d) "
1226 "differs from expected (%d)."),
1227 integer_representation, expected_integer_format);
1232 choose_encoding (struct sfm_reader *r,
1233 const struct sfm_header_record *header,
1234 const struct sfm_extension_record *ext_integer,
1235 const struct sfm_extension_record *ext_encoding)
1237 /* The EXT_ENCODING record is a more reliable way to determine dictionary
1240 return ext_encoding->data;
1242 /* But EXT_INTEGER is better than nothing as a fallback. */
1245 int codepage = parse_int (r, ext_integer->data, 7 * 4);
1246 const char *encoding;
1255 /* These ostensibly mean "7-bit ASCII" and "8-bit ASCII"[sic]
1256 respectively. However, there are known to be many files in the wild
1257 with character code 2, yet have data which are clearly not ASCII.
1258 Therefore we ignore these values. */
1265 encoding = sys_get_encoding_from_codepage (codepage);
1266 if (encoding != NULL)
1272 /* If the file magic number is EBCDIC then its character data is too. */
1273 if (!strcmp (header->magic, EBCDIC_MAGIC))
1276 return locale_charset ();
1279 /* Parses record type 7, subtype 4. */
1281 parse_machine_float_info (struct sfm_reader *r,
1282 const struct sfm_extension_record *record)
1284 double sysmis = parse_float (r, record->data, 0);
1285 double highest = parse_float (r, record->data, 8);
1286 double lowest = parse_float (r, record->data, 16);
1288 if (sysmis != SYSMIS)
1289 sys_warn (r, record->pos,
1290 _("File specifies unexpected value %g (%a) as %s, "
1291 "instead of %g (%a)."),
1292 sysmis, sysmis, "SYSMIS", SYSMIS, SYSMIS);
1294 if (highest != HIGHEST)
1295 sys_warn (r, record->pos,
1296 _("File specifies unexpected value %g (%a) as %s, "
1297 "instead of %g (%a)."),
1298 highest, highest, "HIGHEST", HIGHEST, HIGHEST);
1300 /* SPSS before version 21 used a unique value just bigger than SYSMIS as
1301 LOWEST. SPSS 21 uses SYSMIS for LOWEST, which is OK because LOWEST only
1302 appears in a context (missing values) where SYSMIS cannot. */
1303 if (lowest != LOWEST && lowest != SYSMIS)
1304 sys_warn (r, record->pos,
1305 _("File specifies unexpected value %g (%a) as %s, "
1306 "instead of %g (%a) or %g (%a)."),
1307 lowest, lowest, "LOWEST", LOWEST, LOWEST, SYSMIS, SYSMIS);
1310 /* Parses record type 7, subtype 10. */
1312 parse_extra_product_info (struct sfm_reader *r,
1313 const struct sfm_extension_record *record,
1314 struct sfm_read_info *info)
1316 struct text_record *text;
1318 text = open_text_record (r, record, true);
1319 info->product_ext = fix_line_ends (text_get_all (text));
1320 close_text_record (r, text);
1323 /* Parses record type 7, subtype 7 or 19. */
1325 parse_mrsets (struct sfm_reader *r, const struct sfm_extension_record *record,
1326 struct dictionary *dict)
1328 struct text_record *text;
1329 struct mrset *mrset;
1331 text = open_text_record (r, record, false);
1334 const char *counted = NULL;
1337 struct stringi_set var_names;
1338 size_t allocated_vars;
1342 mrset = xzalloc (sizeof *mrset);
1344 name = text_get_token (text, ss_cstr ("="), NULL);
1347 mrset->name = recode_string ("UTF-8", r->encoding, name, -1);
1349 if (mrset->name[0] != '$')
1351 sys_warn (r, record->pos,
1352 _("`%s' does not begin with `$' at offset %zu "
1353 "in MRSETS record."), mrset->name, text_pos (text));
1357 if (text_match (text, 'C'))
1359 mrset->type = MRSET_MC;
1360 if (!text_match (text, ' '))
1362 sys_warn (r, record->pos,
1363 _("Missing space following `%c' at offset %zu "
1364 "in MRSETS record."), 'C', text_pos (text));
1368 else if (text_match (text, 'D'))
1370 mrset->type = MRSET_MD;
1371 mrset->cat_source = MRSET_VARLABELS;
1373 else if (text_match (text, 'E'))
1377 mrset->type = MRSET_MD;
1378 mrset->cat_source = MRSET_COUNTEDVALUES;
1379 if (!text_match (text, ' '))
1381 sys_warn (r, record->pos,
1382 _("Missing space following `%c' at offset %zu "
1383 "in MRSETS record."), 'E', text_pos (text));
1387 number = text_get_token (text, ss_cstr (" "), NULL);
1388 if (!strcmp (number, "11"))
1389 mrset->label_from_var_label = true;
1390 else if (strcmp (number, "1"))
1391 sys_warn (r, record->pos,
1392 _("Unexpected label source value `%s' following `E' "
1393 "at offset %zu in MRSETS record."),
1394 number, text_pos (text));
1398 sys_warn (r, record->pos,
1399 _("Missing `C', `D', or `E' at offset %zu "
1400 "in MRSETS record."),
1405 if (mrset->type == MRSET_MD)
1407 counted = text_parse_counted_string (r, text);
1408 if (counted == NULL)
1412 label = text_parse_counted_string (r, text);
1415 if (label[0] != '\0')
1416 mrset->label = recode_string ("UTF-8", r->encoding, label, -1);
1418 stringi_set_init (&var_names);
1423 const char *raw_var_name;
1424 struct variable *var;
1427 raw_var_name = text_get_token (text, ss_cstr (" \n"), &delimiter);
1428 if (raw_var_name == NULL)
1430 sys_warn (r, record->pos,
1431 _("Missing new-line parsing variable names "
1432 "at offset %zu in MRSETS record."),
1436 var_name = recode_string ("UTF-8", r->encoding, raw_var_name, -1);
1438 var = dict_lookup_var (dict, var_name);
1444 if (!stringi_set_insert (&var_names, var_name))
1446 sys_warn (r, record->pos,
1447 _("Duplicate variable name %s "
1448 "at offset %zu in MRSETS record."),
1449 var_name, text_pos (text));
1455 if (mrset->label == NULL && mrset->label_from_var_label
1456 && var_has_label (var))
1457 mrset->label = xstrdup (var_get_label (var));
1460 && var_get_type (var) != var_get_type (mrset->vars[0]))
1462 sys_warn (r, record->pos,
1463 _("MRSET %s contains both string and "
1464 "numeric variables."), name);
1467 width = MIN (width, var_get_width (var));
1469 if (mrset->n_vars >= allocated_vars)
1470 mrset->vars = x2nrealloc (mrset->vars, &allocated_vars,
1471 sizeof *mrset->vars);
1472 mrset->vars[mrset->n_vars++] = var;
1474 while (delimiter != '\n');
1476 if (mrset->n_vars < 2)
1478 sys_warn (r, record->pos,
1479 _("MRSET %s has only %zu variables."), mrset->name,
1481 mrset_destroy (mrset);
1482 stringi_set_destroy (&var_names);
1486 if (mrset->type == MRSET_MD)
1488 mrset->width = width;
1489 value_init (&mrset->counted, width);
1491 mrset->counted.f = c_strtod (counted, NULL);
1493 value_copy_str_rpad (&mrset->counted, width,
1494 (const uint8_t *) counted, ' ');
1497 dict_add_mrset (dict, mrset);
1499 stringi_set_destroy (&var_names);
1501 mrset_destroy (mrset);
1502 close_text_record (r, text);
1505 /* Read record type 7, subtype 11, which specifies how variables
1506 should be displayed in GUI environments. */
1508 parse_display_parameters (struct sfm_reader *r,
1509 const struct sfm_extension_record *record,
1510 struct dictionary *dict)
1512 bool includes_width;
1513 bool warned = false;
1518 n_vars = dict_get_var_cnt (dict);
1519 if (record->count == 3 * n_vars)
1520 includes_width = true;
1521 else if (record->count == 2 * n_vars)
1522 includes_width = false;
1525 sys_warn (r, record->pos,
1526 _("Extension 11 has bad count %zu (for %zu variables)."),
1527 record->count, n_vars);
1532 for (i = 0; i < n_vars; ++i)
1534 struct variable *v = dict_get_var (dict, i);
1535 int measure, width, align;
1537 measure = parse_int (r, record->data, ofs);
1542 width = parse_int (r, record->data, ofs);
1548 align = parse_int (r, record->data, ofs);
1551 /* SPSS sometimes seems to set variables' measure to zero. */
1555 if (measure < 1 || measure > 3 || align < 0 || align > 2)
1558 sys_warn (r, record->pos,
1559 _("Invalid variable display parameters for variable "
1560 "%zu (%s). Default parameters substituted."),
1561 i, var_get_name (v));
1566 var_set_measure (v, (measure == 1 ? MEASURE_NOMINAL
1567 : measure == 2 ? MEASURE_ORDINAL
1569 var_set_alignment (v, (align == 0 ? ALIGN_LEFT
1570 : align == 1 ? ALIGN_RIGHT
1573 /* Older versions (SPSS 9.0) sometimes set the display
1574 width to zero. This causes confusion in the GUI, so
1575 only set the width if it is nonzero. */
1577 var_set_display_width (v, width);
1582 rename_var_and_save_short_names (struct dictionary *dict, struct variable *var,
1583 const char *new_name)
1585 size_t n_short_names;
1589 /* Renaming a variable may clear its short names, but we
1590 want to retain them, so we save them and re-set them
1592 n_short_names = var_get_short_name_cnt (var);
1593 short_names = xnmalloc (n_short_names, sizeof *short_names);
1594 for (i = 0; i < n_short_names; i++)
1596 const char *s = var_get_short_name (var, i);
1597 short_names[i] = s != NULL ? xstrdup (s) : NULL;
1600 /* Set long name. */
1601 dict_rename_var (dict, var, new_name);
1603 /* Restore short names. */
1604 for (i = 0; i < n_short_names; i++)
1606 var_set_short_name (var, i, short_names[i]);
1607 free (short_names[i]);
1612 /* Parses record type 7, subtype 13, which gives the long name that corresponds
1613 to each short name. Modifies variable names in DICT accordingly. */
1615 parse_long_var_name_map (struct sfm_reader *r,
1616 const struct sfm_extension_record *record,
1617 struct dictionary *dict)
1619 struct text_record *text;
1620 struct variable *var;
1625 /* There are no long variable names. Use the short variable names,
1626 converted to lowercase, as the long variable names. */
1629 for (i = 0; i < dict_get_var_cnt (dict); i++)
1631 struct variable *var = dict_get_var (dict, i);
1634 new_name = utf8_to_lower (var_get_name (var));
1635 rename_var_and_save_short_names (dict, var, new_name);
1642 /* Rename each of the variables, one by one. (In a correctly constructed
1643 system file, this cannot create any intermediate duplicate variable names,
1644 because all of the new variable names are longer than any of the old
1645 variable names and thus there cannot be any overlaps.) */
1646 text = open_text_record (r, record, true);
1647 while (read_variable_to_value_pair (r, dict, text, &var, &long_name))
1649 /* Validate long name. */
1650 if (!dict_id_is_valid (dict, long_name, false))
1652 sys_warn (r, record->pos,
1653 _("Long variable mapping from %s to invalid "
1654 "variable name `%s'."),
1655 var_get_name (var), long_name);
1659 /* Identify any duplicates. */
1660 if (utf8_strcasecmp (var_get_short_name (var, 0), long_name)
1661 && dict_lookup_var (dict, long_name) != NULL)
1663 sys_warn (r, record->pos,
1664 _("Duplicate long variable name `%s'."), long_name);
1668 rename_var_and_save_short_names (dict, var, long_name);
1670 close_text_record (r, text);
1673 /* Reads record type 7, subtype 14, which gives the real length
1674 of each very long string. Rearranges DICT accordingly. */
1676 parse_long_string_map (struct sfm_reader *r,
1677 const struct sfm_extension_record *record,
1678 struct dictionary *dict)
1680 struct text_record *text;
1681 struct variable *var;
1684 text = open_text_record (r, record, true);
1685 while (read_variable_to_value_pair (r, dict, text, &var, &length_s))
1687 size_t idx = var_get_dict_index (var);
1693 length = strtol (length_s, NULL, 10);
1694 if (length < 1 || length > MAX_STRING)
1696 sys_warn (r, record->pos,
1697 _("%s listed as string of invalid length %s "
1698 "in very long string record."),
1699 var_get_name (var), length_s);
1703 /* Check segments. */
1704 segment_cnt = sfm_width_to_segments (length);
1705 if (segment_cnt == 1)
1707 sys_warn (r, record->pos,
1708 _("%s listed in very long string record with width %s, "
1709 "which requires only one segment."),
1710 var_get_name (var), length_s);
1713 if (idx + segment_cnt > dict_get_var_cnt (dict))
1714 sys_error (r, record->pos,
1715 _("Very long string %s overflows dictionary."),
1716 var_get_name (var));
1718 /* Get the short names from the segments and check their
1720 for (i = 0; i < segment_cnt; i++)
1722 struct variable *seg = dict_get_var (dict, idx + i);
1723 int alloc_width = sfm_segment_alloc_width (length, i);
1724 int width = var_get_width (seg);
1727 var_set_short_name (var, i, var_get_short_name (seg, 0));
1728 if (ROUND_UP (width, 8) != ROUND_UP (alloc_width, 8))
1729 sys_error (r, record->pos,
1730 _("Very long string with width %ld has segment %d "
1731 "of width %d (expected %d)."),
1732 length, i, width, alloc_width);
1734 dict_delete_consecutive_vars (dict, idx + 1, segment_cnt - 1);
1735 var_set_width (var, length);
1737 close_text_record (r, text);
1738 dict_compact_values (dict);
1742 parse_value_labels (struct sfm_reader *r, struct dictionary *dict,
1743 const struct sfm_var_record *var_recs, size_t n_var_recs,
1744 const struct sfm_value_label_record *record)
1746 struct variable **vars;
1750 utf8_labels = pool_nmalloc (r->pool, record->n_labels, sizeof *utf8_labels);
1751 for (i = 0; i < record->n_labels; i++)
1752 utf8_labels[i] = recode_string_pool ("UTF-8", dict_get_encoding (dict),
1753 record->labels[i].label, -1,
1756 vars = pool_nmalloc (r->pool, record->n_vars, sizeof *vars);
1757 for (i = 0; i < record->n_vars; i++)
1758 vars[i] = lookup_var_by_index (r, record->pos,
1759 var_recs, n_var_recs, record->vars[i]);
1761 for (i = 1; i < record->n_vars; i++)
1762 if (var_get_type (vars[i]) != var_get_type (vars[0]))
1763 sys_error (r, record->pos,
1764 _("Variables associated with value label are not all of "
1765 "identical type. Variable %s is %s, but variable "
1767 var_get_name (vars[0]),
1768 var_is_numeric (vars[0]) ? _("numeric") : _("string"),
1769 var_get_name (vars[i]),
1770 var_is_numeric (vars[i]) ? _("numeric") : _("string"));
1772 for (i = 0; i < record->n_vars; i++)
1774 struct variable *var = vars[i];
1778 width = var_get_width (var);
1780 sys_error (r, record->pos,
1781 _("Value labels may not be added to long string "
1782 "variables (e.g. %s) using records types 3 and 4."),
1783 var_get_name (var));
1785 for (j = 0; j < record->n_labels; j++)
1787 struct sfm_value_label *label = &record->labels[j];
1790 value_init (&value, width);
1792 value.f = parse_float (r, label->value, 0);
1794 memcpy (value_str_rw (&value, width), label->value, width);
1796 if (!var_add_value_label (var, &value, utf8_labels[j]))
1798 if (var_is_numeric (var))
1799 sys_warn (r, record->pos,
1800 _("Duplicate value label for %g on %s."),
1801 value.f, var_get_name (var));
1803 sys_warn (r, record->pos,
1804 _("Duplicate value label for `%.*s' on %s."),
1805 width, value_str (&value, width),
1806 var_get_name (var));
1809 value_destroy (&value, width);
1813 pool_free (r->pool, vars);
1814 for (i = 0; i < record->n_labels; i++)
1815 pool_free (r->pool, utf8_labels[i]);
1816 pool_free (r->pool, utf8_labels);
1819 static struct variable *
1820 lookup_var_by_index (struct sfm_reader *r, off_t offset,
1821 const struct sfm_var_record *var_recs, size_t n_var_recs,
1824 const struct sfm_var_record *rec;
1826 if (idx < 1 || idx > n_var_recs)
1828 sys_error (r, offset,
1829 _("Variable index %d not in valid range 1...%zu."),
1834 rec = &var_recs[idx - 1];
1835 if (rec->var == NULL)
1837 sys_error (r, offset,
1838 _("Variable index %d refers to long string continuation."),
1846 /* Parses a set of custom attributes from TEXT into ATTRS.
1847 ATTRS may be a null pointer, in which case the attributes are
1848 read but discarded. */
1850 parse_attributes (struct sfm_reader *r, struct text_record *text,
1851 struct attrset *attrs)
1855 struct attribute *attr;
1859 /* Parse the key. */
1860 key = text_get_token (text, ss_cstr ("("), NULL);
1864 attr = attribute_create (key);
1865 for (index = 1; ; index++)
1867 /* Parse the value. */
1871 value = text_get_token (text, ss_cstr ("\n"), NULL);
1874 text_warn (r, text, _("Error parsing attribute value %s[%d]."),
1879 length = strlen (value);
1880 if (length >= 2 && value[0] == '\'' && value[length - 1] == '\'')
1882 value[length - 1] = '\0';
1883 attribute_add_value (attr, value + 1);
1888 _("Attribute value %s[%d] is not quoted: %s."),
1890 attribute_add_value (attr, value);
1893 /* Was this the last value for this attribute? */
1894 if (text_match (text, ')'))
1898 attrset_add (attrs, attr);
1900 attribute_destroy (attr);
1902 while (!text_match (text, '/'));
1905 /* Reads record type 7, subtype 17, which lists custom
1906 attributes on the data file. */
1908 parse_data_file_attributes (struct sfm_reader *r,
1909 const struct sfm_extension_record *record,
1910 struct dictionary *dict)
1912 struct text_record *text = open_text_record (r, record, true);
1913 parse_attributes (r, text, dict_get_attributes (dict));
1914 close_text_record (r, text);
1917 /* Parses record type 7, subtype 18, which lists custom
1918 attributes on individual variables. */
1920 parse_variable_attributes (struct sfm_reader *r,
1921 const struct sfm_extension_record *record,
1922 struct dictionary *dict)
1924 struct text_record *text;
1925 struct variable *var;
1927 text = open_text_record (r, record, true);
1928 while (text_read_variable_name (r, dict, text, ss_cstr (":"), &var))
1929 parse_attributes (r, text, var != NULL ? var_get_attributes (var) : NULL);
1930 close_text_record (r, text);
1934 assign_variable_roles (struct sfm_reader *r, struct dictionary *dict)
1936 size_t n_warnings = 0;
1939 for (i = 0; i < dict_get_var_cnt (dict); i++)
1941 struct variable *var = dict_get_var (dict, i);
1942 struct attrset *attrs = var_get_attributes (var);
1943 const struct attribute *attr = attrset_lookup (attrs, "$@Role");
1946 int value = atoi (attribute_get_value (attr, 0));
1968 role = ROLE_PARTITION;
1977 if (n_warnings++ == 0)
1978 sys_warn (r, -1, _("Invalid role for variable %s."),
1979 var_get_name (var));
1982 var_set_role (var, role);
1987 sys_warn (r, -1, _("%zu other variables had invalid roles."),
1992 check_overflow (struct sfm_reader *r,
1993 const struct sfm_extension_record *record,
1994 size_t ofs, size_t length)
1996 size_t end = record->size * record->count;
1997 if (length >= end || ofs + length > end)
1998 sys_error (r, record->pos + end,
1999 _("Long string value label record ends unexpectedly."));
2003 parse_long_string_value_labels (struct sfm_reader *r,
2004 const struct sfm_extension_record *record,
2005 struct dictionary *dict)
2007 const char *dict_encoding = dict_get_encoding (dict);
2008 size_t end = record->size * record->count;
2015 struct variable *var;
2020 /* Parse variable name length. */
2021 check_overflow (r, record, ofs, 4);
2022 var_name_len = parse_int (r, record->data, ofs);
2025 /* Parse variable name, width, and number of labels. */
2026 check_overflow (r, record, ofs, var_name_len + 8);
2027 var_name = recode_string_pool ("UTF-8", dict_encoding,
2028 (const char *) record->data + ofs,
2029 var_name_len, r->pool);
2030 width = parse_int (r, record->data, ofs + var_name_len);
2031 n_labels = parse_int (r, record->data, ofs + var_name_len + 4);
2032 ofs += var_name_len + 8;
2034 /* Look up 'var' and validate. */
2035 var = dict_lookup_var (dict, var_name);
2037 sys_warn (r, record->pos + ofs,
2038 _("Ignoring long string value record for "
2039 "unknown variable %s."), var_name);
2040 else if (var_is_numeric (var))
2042 sys_warn (r, record->pos + ofs,
2043 _("Ignoring long string value record for "
2044 "numeric variable %s."), var_name);
2047 else if (width != var_get_width (var))
2049 sys_warn (r, record->pos + ofs,
2050 _("Ignoring long string value record for variable %s "
2051 "because the record's width (%d) does not match the "
2052 "variable's width (%d)."),
2053 var_name, width, var_get_width (var));
2058 value_init_pool (r->pool, &value, width);
2059 for (i = 0; i < n_labels; i++)
2061 size_t value_length, label_length;
2062 bool skip = var == NULL;
2064 /* Parse value length. */
2065 check_overflow (r, record, ofs, 4);
2066 value_length = parse_int (r, record->data, ofs);
2070 check_overflow (r, record, ofs, value_length);
2073 if (value_length == width)
2074 memcpy (value_str_rw (&value, width),
2075 (const uint8_t *) record->data + ofs, width);
2078 sys_warn (r, record->pos + ofs,
2079 _("Ignoring long string value %zu for variable "
2080 "%s, with width %d, that has bad value "
2082 i, var_get_name (var), width, value_length);
2086 ofs += value_length;
2088 /* Parse label length. */
2089 check_overflow (r, record, ofs, 4);
2090 label_length = parse_int (r, record->data, ofs);
2094 check_overflow (r, record, ofs, label_length);
2099 label = recode_string_pool ("UTF-8", dict_encoding,
2100 (const char *) record->data + ofs,
2101 label_length, r->pool);
2102 if (!var_add_value_label (var, &value, label))
2103 sys_warn (r, record->pos + ofs,
2104 _("Duplicate value label for `%.*s' on %s."),
2105 width, value_str (&value, width),
2106 var_get_name (var));
2107 pool_free (r->pool, label);
2109 ofs += label_length;
2116 static void partial_record (struct sfm_reader *r)
2119 static void read_error (struct casereader *, const struct sfm_reader *);
2121 static bool read_case_number (struct sfm_reader *, double *);
2122 static bool read_case_string (struct sfm_reader *, uint8_t *, size_t);
2123 static int read_opcode (struct sfm_reader *);
2124 static bool read_compressed_number (struct sfm_reader *, double *);
2125 static bool read_compressed_string (struct sfm_reader *, uint8_t *);
2126 static bool read_whole_strings (struct sfm_reader *, uint8_t *, size_t);
2127 static bool skip_whole_strings (struct sfm_reader *, size_t);
2129 /* Reads and returns one case from READER's file. Returns a null
2130 pointer if not successful. */
2131 static struct ccase *
2132 sys_file_casereader_read (struct casereader *reader, void *r_)
2134 struct sfm_reader *r = r_;
2135 struct ccase *volatile c;
2141 c = case_create (r->proto);
2142 if (setjmp (r->bail_out))
2144 casereader_force_error (reader);
2149 for (i = 0; i < r->sfm_var_cnt; i++)
2151 struct sfm_var *sv = &r->sfm_vars[i];
2152 union value *v = case_data_rw_idx (c, sv->case_index);
2154 if (sv->var_width == 0)
2156 if (!read_case_number (r, &v->f))
2161 uint8_t *s = value_str_rw (v, sv->var_width);
2162 if (!read_case_string (r, s + sv->offset, sv->segment_width))
2164 if (!skip_whole_strings (r, ROUND_DOWN (sv->padding, 8)))
2173 if (r->case_cnt != -1)
2174 read_error (reader, r);
2179 /* Issues an error that R ends in a partial record. */
2181 partial_record (struct sfm_reader *r)
2183 sys_error (r, r->pos, _("File ends in partial case."));
2186 /* Issues an error that an unspecified error occurred SFM, and
2189 read_error (struct casereader *r, const struct sfm_reader *sfm)
2191 msg (ME, _("Error reading case from file %s."), fh_get_name (sfm->fh));
2192 casereader_force_error (r);
2195 /* Reads a number from R and stores its value in *D.
2196 If R is compressed, reads a compressed number;
2197 otherwise, reads a number in the regular way.
2198 Returns true if successful, false if end of file is
2199 reached immediately. */
2201 read_case_number (struct sfm_reader *r, double *d)
2206 if (!try_read_bytes (r, number, sizeof number))
2208 float_convert (r->float_format, number, FLOAT_NATIVE_DOUBLE, d);
2212 return read_compressed_number (r, d);
2215 /* Reads LENGTH string bytes from R into S.
2216 Always reads a multiple of 8 bytes; if LENGTH is not a
2217 multiple of 8, then extra bytes are read and discarded without
2219 Reads compressed strings if S is compressed.
2220 Returns true if successful, false if end of file is
2221 reached immediately. */
2223 read_case_string (struct sfm_reader *r, uint8_t *s, size_t length)
2225 size_t whole = ROUND_DOWN (length, 8);
2226 size_t partial = length % 8;
2230 if (!read_whole_strings (r, s, whole))
2237 if (!read_whole_strings (r, bounce, sizeof bounce))
2243 memcpy (s + whole, bounce, partial);
2249 /* Reads and returns the next compression opcode from R. */
2251 read_opcode (struct sfm_reader *r)
2253 assert (r->compressed);
2257 if (r->opcode_idx >= sizeof r->opcodes)
2259 if (!try_read_bytes (r, r->opcodes, sizeof r->opcodes))
2263 opcode = r->opcodes[r->opcode_idx++];
2270 /* Reads a compressed number from R and stores its value in D.
2271 Returns true if successful, false if end of file is
2272 reached immediately. */
2274 read_compressed_number (struct sfm_reader *r, double *d)
2276 int opcode = read_opcode (r);
2284 *d = read_float (r);
2288 float_convert (r->float_format, " ", FLOAT_NATIVE_DOUBLE, d);
2289 if (!r->corruption_warning)
2291 r->corruption_warning = true;
2292 sys_warn (r, r->pos,
2293 _("Possible compressed data corruption: "
2294 "compressed spaces appear in numeric field."));
2303 *d = opcode - r->bias;
2310 /* Reads a compressed 8-byte string segment from R and stores it
2312 Returns true if successful, false if end of file is
2313 reached immediately. */
2315 read_compressed_string (struct sfm_reader *r, uint8_t *dst)
2317 int opcode = read_opcode (r);
2325 read_bytes (r, dst, 8);
2329 memset (dst, ' ', 8);
2334 double value = opcode - r->bias;
2335 float_convert (FLOAT_NATIVE_DOUBLE, &value, r->float_format, dst);
2338 /* This has actually been seen "in the wild". The submitter of the
2339 file that showed that the contents decoded as spaces, but they
2340 were at the end of the field so it's possible that the null
2341 bytes just acted as null terminators. */
2343 else if (!r->corruption_warning)
2345 r->corruption_warning = true;
2346 sys_warn (r, r->pos,
2347 _("Possible compressed data corruption: "
2348 "string contains compressed integer (opcode %d)."),
2358 /* Reads LENGTH string bytes from R into S.
2359 LENGTH must be a multiple of 8.
2360 Reads compressed strings if S is compressed.
2361 Returns true if successful, false if end of file is
2362 reached immediately. */
2364 read_whole_strings (struct sfm_reader *r, uint8_t *s, size_t length)
2366 assert (length % 8 == 0);
2368 return try_read_bytes (r, s, length);
2372 for (ofs = 0; ofs < length; ofs += 8)
2373 if (!read_compressed_string (r, s + ofs))
2383 /* Skips LENGTH string bytes from R.
2384 LENGTH must be a multiple of 8.
2385 (LENGTH is also limited to 1024, but that's only because the
2386 current caller never needs more than that many bytes.)
2387 Returns true if successful, false if end of file is
2388 reached immediately. */
2390 skip_whole_strings (struct sfm_reader *r, size_t length)
2392 uint8_t buffer[1024];
2393 assert (length < sizeof buffer);
2394 return read_whole_strings (r, buffer, length);
2397 /* Helpers for reading records that contain structured text
2400 /* Maximum number of warnings to issue for a single text
2402 #define MAX_TEXT_WARNINGS 5
2407 struct substring buffer; /* Record contents. */
2408 off_t start; /* Starting offset in file. */
2409 size_t pos; /* Current position in buffer. */
2410 int n_warnings; /* Number of warnings issued or suppressed. */
2411 bool recoded; /* Recoded into UTF-8? */
2414 static struct text_record *
2415 open_text_record (struct sfm_reader *r,
2416 const struct sfm_extension_record *record,
2417 bool recode_to_utf8)
2419 struct text_record *text;
2420 struct substring raw;
2422 text = pool_alloc (r->pool, sizeof *text);
2423 raw = ss_buffer (record->data, record->size * record->count);
2424 text->start = record->pos;
2425 text->buffer = (recode_to_utf8
2426 ? recode_substring_pool ("UTF-8", r->encoding, raw, r->pool)
2429 text->n_warnings = 0;
2430 text->recoded = recode_to_utf8;
2435 /* Closes TEXT, frees its storage, and issues a final warning
2436 about suppressed warnings if necesary. */
2438 close_text_record (struct sfm_reader *r, struct text_record *text)
2440 if (text->n_warnings > MAX_TEXT_WARNINGS)
2441 sys_warn (r, -1, _("Suppressed %d additional related warnings."),
2442 text->n_warnings - MAX_TEXT_WARNINGS);
2444 pool_free (r->pool, ss_data (text->buffer));
2447 /* Reads a variable=value pair from TEXT.
2448 Looks up the variable in DICT and stores it into *VAR.
2449 Stores a null-terminated value into *VALUE. */
2451 read_variable_to_value_pair (struct sfm_reader *r, struct dictionary *dict,
2452 struct text_record *text,
2453 struct variable **var, char **value)
2457 if (!text_read_short_name (r, dict, text, ss_cstr ("="), var))
2460 *value = text_get_token (text, ss_buffer ("\t\0", 2), NULL);
2464 text->pos += ss_span (ss_substr (text->buffer, text->pos, SIZE_MAX),
2465 ss_buffer ("\t\0", 2));
2473 text_read_variable_name (struct sfm_reader *r, struct dictionary *dict,
2474 struct text_record *text, struct substring delimiters,
2475 struct variable **var)
2479 name = text_get_token (text, delimiters, NULL);
2483 *var = dict_lookup_var (dict, name);
2487 text_warn (r, text, _("Dictionary record refers to unknown variable %s."),
2494 text_read_short_name (struct sfm_reader *r, struct dictionary *dict,
2495 struct text_record *text, struct substring delimiters,
2496 struct variable **var)
2498 char *short_name = text_get_token (text, delimiters, NULL);
2499 if (short_name == NULL)
2502 *var = dict_lookup_var (dict, short_name);
2504 text_warn (r, text, _("Dictionary record refers to unknown variable %s."),
2509 /* Displays a warning for the current file position, limiting the
2510 number to MAX_TEXT_WARNINGS for TEXT. */
2512 text_warn (struct sfm_reader *r, struct text_record *text,
2513 const char *format, ...)
2515 if (text->n_warnings++ < MAX_TEXT_WARNINGS)
2519 va_start (args, format);
2520 sys_msg (r, text->start + text->pos, MW, format, args);
2526 text_get_token (struct text_record *text, struct substring delimiters,
2529 struct substring token;
2532 if (!ss_tokenize (text->buffer, delimiters, &text->pos, &token))
2535 end = &ss_data (token)[ss_length (token)];
2536 if (delimiter != NULL)
2539 return ss_data (token);
2542 /* Reads a integer value expressed in decimal, then a space, then a string that
2543 consists of exactly as many bytes as specified by the integer, then a space,
2544 from TEXT. Returns the string, null-terminated, as a subset of TEXT's
2545 buffer (so the caller should not free the string). */
2547 text_parse_counted_string (struct sfm_reader *r, struct text_record *text)
2555 while (text->pos < text->buffer.length)
2557 int c = text->buffer.string[text->pos];
2558 if (c < '0' || c > '9')
2560 n = (n * 10) + (c - '0');
2563 if (text->pos >= text->buffer.length || start == text->pos)
2565 sys_warn (r, text->start,
2566 _("Expecting digit at offset %zu in MRSETS record."),
2571 if (!text_match (text, ' '))
2573 sys_warn (r, text->start,
2574 _("Expecting space at offset %zu in MRSETS record."),
2579 if (text->pos + n > text->buffer.length)
2581 sys_warn (r, text->start,
2582 _("%zu-byte string starting at offset %zu "
2583 "exceeds record length %zu."),
2584 n, text->pos, text->buffer.length);
2588 s = &text->buffer.string[text->pos];
2591 sys_warn (r, text->start,
2592 _("Expecting space at offset %zu following %zu-byte string."),
2602 text_match (struct text_record *text, char c)
2604 if (text->buffer.string[text->pos] == c)
2613 /* Returns the current byte offset (as converted to UTF-8, if it was converted)
2614 inside the TEXT's string. */
2616 text_pos (const struct text_record *text)
2622 text_get_all (const struct text_record *text)
2624 return text->buffer.string;
2629 /* Displays a corruption message. */
2631 sys_msg (struct sfm_reader *r, off_t offset,
2632 int class, const char *format, va_list args)
2637 ds_init_empty (&text);
2639 ds_put_format (&text, _("`%s' near offset 0x%llx: "),
2640 fh_get_file_name (r->fh), (long long int) offset);
2642 ds_put_format (&text, _("`%s': "), fh_get_file_name (r->fh));
2643 ds_put_vformat (&text, format, args);
2645 m.category = msg_class_to_category (class);
2646 m.severity = msg_class_to_severity (class);
2652 m.text = ds_cstr (&text);
2657 /* Displays a warning for offset OFFSET in the file. */
2659 sys_warn (struct sfm_reader *r, off_t offset, const char *format, ...)
2663 va_start (args, format);
2664 sys_msg (r, offset, MW, format, args);
2668 /* Displays an error for the current file position,
2669 marks it as in an error state,
2670 and aborts reading it using longjmp. */
2672 sys_error (struct sfm_reader *r, off_t offset, const char *format, ...)
2676 va_start (args, format);
2677 sys_msg (r, offset, ME, format, args);
2681 longjmp (r->bail_out, 1);
2684 /* Reads BYTE_CNT bytes into BUF.
2685 Returns true if exactly BYTE_CNT bytes are successfully read.
2686 Aborts if an I/O error or a partial read occurs.
2687 If EOF_IS_OK, then an immediate end-of-file causes false to be
2688 returned; otherwise, immediate end-of-file causes an abort
2691 read_bytes_internal (struct sfm_reader *r, bool eof_is_ok,
2692 void *buf, size_t byte_cnt)
2694 size_t bytes_read = fread (buf, 1, byte_cnt, r->file);
2695 r->pos += bytes_read;
2696 if (bytes_read == byte_cnt)
2698 else if (ferror (r->file))
2699 sys_error (r, r->pos, _("System error: %s."), strerror (errno));
2700 else if (!eof_is_ok || bytes_read != 0)
2701 sys_error (r, r->pos, _("Unexpected end of file."));
2706 /* Reads BYTE_CNT into BUF.
2707 Aborts upon I/O error or if end-of-file is encountered. */
2709 read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
2711 read_bytes_internal (r, false, buf, byte_cnt);
2714 /* Reads BYTE_CNT bytes into BUF.
2715 Returns true if exactly BYTE_CNT bytes are successfully read.
2716 Returns false if an immediate end-of-file is encountered.
2717 Aborts if an I/O error or a partial read occurs. */
2719 try_read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
2721 return read_bytes_internal (r, true, buf, byte_cnt);
2724 /* Reads a 32-bit signed integer from R and returns its value in
2727 read_int (struct sfm_reader *r)
2730 read_bytes (r, integer, sizeof integer);
2731 return integer_get (r->integer_format, integer, sizeof integer);
2734 /* Reads a 64-bit floating-point number from R and returns its
2735 value in host format. */
2737 read_float (struct sfm_reader *r)
2740 read_bytes (r, number, sizeof number);
2741 return float_get_double (r->float_format, number);
2745 parse_int (struct sfm_reader *r, const void *data, size_t ofs)
2747 return integer_get (r->integer_format, (const uint8_t *) data + ofs, 4);
2751 parse_float (struct sfm_reader *r, const void *data, size_t ofs)
2753 return float_get_double (r->float_format, (const uint8_t *) data + ofs);
2756 /* Reads exactly SIZE - 1 bytes into BUFFER
2757 and stores a null byte into BUFFER[SIZE - 1]. */
2759 read_string (struct sfm_reader *r, char *buffer, size_t size)
2762 read_bytes (r, buffer, size - 1);
2763 buffer[size - 1] = '\0';
2766 /* Skips BYTES bytes forward in R. */
2768 skip_bytes (struct sfm_reader *r, size_t bytes)
2773 size_t chunk = MIN (sizeof buffer, bytes);
2774 read_bytes (r, buffer, chunk);
2779 /* Returns a malloc()'d copy of S in which all lone CRs and CR LF pairs have
2780 been replaced by LFs.
2782 (A product that identifies itself as VOXCO INTERVIEWER 4.3 produces system
2783 files that use CR-only line ends in the file label and extra product
2786 fix_line_ends (const char *s)
2790 d = dst = xmalloc (strlen (s) + 1);
2808 static const struct casereader_class sys_file_casereader_class =
2810 sys_file_casereader_read,
2811 sys_file_casereader_destroy,