1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-2000, 2006-2007, 2009-2013 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "data/sys-file-reader.h"
20 #include "data/sys-file-private.h"
28 #include "data/attributes.h"
29 #include "data/case.h"
30 #include "data/casereader-provider.h"
31 #include "data/casereader.h"
32 #include "data/dictionary.h"
33 #include "data/file-handle-def.h"
34 #include "data/file-name.h"
35 #include "data/format.h"
36 #include "data/identifier.h"
37 #include "data/missing-values.h"
38 #include "data/mrset.h"
39 #include "data/short-names.h"
40 #include "data/value-labels.h"
41 #include "data/value.h"
42 #include "data/variable.h"
43 #include "libpspp/array.h"
44 #include "libpspp/assertion.h"
45 #include "libpspp/compiler.h"
46 #include "libpspp/i18n.h"
47 #include "libpspp/message.h"
48 #include "libpspp/misc.h"
49 #include "libpspp/pool.h"
50 #include "libpspp/str.h"
51 #include "libpspp/stringi-set.h"
53 #include "gl/c-strtod.h"
54 #include "gl/c-ctype.h"
55 #include "gl/inttostr.h"
56 #include "gl/localcharset.h"
57 #include "gl/minmax.h"
58 #include "gl/unlocked-io.h"
59 #include "gl/xalloc.h"
63 #define _(msgid) gettext (msgid)
64 #define N_(msgid) (msgid)
68 /* subtypes 0-2 unknown */
69 EXT_INTEGER = 3, /* Machine integer info. */
70 EXT_FLOAT = 4, /* Machine floating-point info. */
71 EXT_VAR_SETS = 5, /* Variable sets. */
72 EXT_DATE = 6, /* DATE. */
73 EXT_MRSETS = 7, /* Multiple response sets. */
74 EXT_DATA_ENTRY = 8, /* SPSS Data Entry. */
75 /* subtype 9 unknown */
76 EXT_PRODUCT_INFO = 10, /* Extra product info text. */
77 EXT_DISPLAY = 11, /* Variable display parameters. */
78 /* subtype 12 unknown */
79 EXT_LONG_NAMES = 13, /* Long variable names. */
80 EXT_LONG_STRINGS = 14, /* Long strings. */
81 /* subtype 15 unknown */
82 EXT_NCASES = 16, /* Extended number of cases. */
83 EXT_FILE_ATTRS = 17, /* Data file attributes. */
84 EXT_VAR_ATTRS = 18, /* Variable attributes. */
85 EXT_MRSETS2 = 19, /* Multiple response sets (extended). */
86 EXT_ENCODING = 20, /* Character encoding. */
87 EXT_LONG_LABELS = 21, /* Value labels for long strings. */
88 EXT_DATAVIEW = 24 /* "Format properties in dataview table". */
91 /* Fields from the top-level header record. */
92 struct sfm_header_record
94 char magic[5]; /* First 4 bytes of file, then null. */
95 int weight_idx; /* 0 if unweighted, otherwise a var index. */
96 int nominal_case_size; /* Number of var positions. */
98 /* These correspond to the members of struct sfm_file_info or a dictionary
99 but in the system file's encoding rather than ASCII. */
100 char creation_date[10]; /* "dd mmm yy". */
101 char creation_time[9]; /* "hh:mm:ss". */
102 char eye_catcher[61]; /* Eye-catcher string, then product name. */
103 char file_label[65]; /* File label. */
106 struct sfm_var_record
113 int missing_value_code;
116 struct variable *var;
119 struct sfm_value_label
125 struct sfm_value_label_record
128 struct sfm_value_label *labels;
135 struct sfm_document_record
142 struct sfm_extension_record
144 off_t pos; /* Starting offset in file. */
145 size_t size; /* Size of data elements. */
146 size_t count; /* Number of data elements. */
147 void *data; /* Contents. */
150 /* System file reader. */
153 /* Resource tracking. */
154 struct pool *pool; /* All system file state. */
155 jmp_buf bail_out; /* longjmp() target for error handling. */
158 struct file_handle *fh; /* File handle. */
159 struct fh_lock *lock; /* Mutual exclusion for file handle. */
160 FILE *file; /* File stream. */
161 off_t pos; /* Position in file. */
162 bool error; /* I/O or corruption error? */
163 struct caseproto *proto; /* Format of output cases. */
166 enum integer_format integer_format; /* On-disk integer format. */
167 enum float_format float_format; /* On-disk floating point format. */
168 struct sfm_var *sfm_vars; /* Variables. */
169 size_t sfm_var_cnt; /* Number of variables. */
170 casenumber case_cnt; /* Number of cases */
171 const char *encoding; /* String encoding. */
174 bool compressed; /* File is compressed? */
175 double bias; /* Compression bias, usually 100.0. */
176 uint8_t opcodes[8]; /* Current block of opcodes. */
177 size_t opcode_idx; /* Next opcode to interpret, 8 if none left. */
178 bool corruption_warning; /* Warned about possible corruption? */
181 static const struct casereader_class sys_file_casereader_class;
183 static bool close_reader (struct sfm_reader *);
185 static struct variable *lookup_var_by_index (struct sfm_reader *, off_t,
186 const struct sfm_var_record *,
189 static void sys_msg (struct sfm_reader *r, off_t, int class,
190 const char *format, va_list args)
191 PRINTF_FORMAT (4, 0);
192 static void sys_warn (struct sfm_reader *, off_t, const char *, ...)
193 PRINTF_FORMAT (3, 4);
194 static void sys_error (struct sfm_reader *, off_t, const char *, ...)
198 static void read_bytes (struct sfm_reader *, void *, size_t);
199 static bool try_read_bytes (struct sfm_reader *, void *, size_t);
200 static int read_int (struct sfm_reader *);
201 static double read_float (struct sfm_reader *);
202 static void read_string (struct sfm_reader *, char *, size_t);
203 static void skip_bytes (struct sfm_reader *, size_t);
205 static char *fix_line_ends (const char *);
207 static int parse_int (struct sfm_reader *, const void *data, size_t ofs);
208 static double parse_float (struct sfm_reader *, const void *data, size_t ofs);
210 static void read_variable_record (struct sfm_reader *,
211 struct sfm_var_record *);
212 static void read_value_label_record (struct sfm_reader *,
213 struct sfm_value_label_record *,
215 static struct sfm_document_record *read_document_record (struct sfm_reader *);
216 static struct sfm_extension_record *read_extension_record (
217 struct sfm_reader *, int subtype);
218 static void skip_extension_record (struct sfm_reader *, int subtype);
220 static const char *choose_encoding (
222 const struct sfm_header_record *,
223 const struct sfm_extension_record *ext_integer,
224 const struct sfm_extension_record *ext_encoding);
226 static struct text_record *open_text_record (
227 struct sfm_reader *, const struct sfm_extension_record *,
228 bool recode_to_utf8);
229 static void close_text_record (struct sfm_reader *,
230 struct text_record *);
231 static bool read_variable_to_value_pair (struct sfm_reader *,
233 struct text_record *,
234 struct variable **var, char **value);
235 static void text_warn (struct sfm_reader *r, struct text_record *text,
236 const char *format, ...)
237 PRINTF_FORMAT (3, 4);
238 static char *text_get_token (struct text_record *,
239 struct substring delimiters, char *delimiter);
240 static bool text_match (struct text_record *, char c);
241 static bool text_read_variable_name (struct sfm_reader *, struct dictionary *,
242 struct text_record *,
243 struct substring delimiters,
245 static bool text_read_short_name (struct sfm_reader *, struct dictionary *,
246 struct text_record *,
247 struct substring delimiters,
249 static const char *text_parse_counted_string (struct sfm_reader *,
250 struct text_record *);
251 static size_t text_pos (const struct text_record *);
252 static const char *text_get_all (const struct text_record *);
254 static bool close_reader (struct sfm_reader *r);
256 /* Dictionary reader. */
264 static void read_header (struct sfm_reader *, struct sfm_read_info *,
265 struct sfm_header_record *);
266 static void parse_header (struct sfm_reader *,
267 const struct sfm_header_record *,
268 struct sfm_read_info *, struct dictionary *);
269 static void parse_variable_records (struct sfm_reader *, struct dictionary *,
270 struct sfm_var_record *, size_t n);
271 static void parse_format_spec (struct sfm_reader *, off_t pos,
272 unsigned int format, enum which_format,
273 struct variable *, int *format_warning_cnt);
274 static void parse_document (struct dictionary *, struct sfm_document_record *);
275 static void parse_display_parameters (struct sfm_reader *,
276 const struct sfm_extension_record *,
277 struct dictionary *);
278 static void parse_machine_integer_info (struct sfm_reader *,
279 const struct sfm_extension_record *,
280 struct sfm_read_info *);
281 static void parse_machine_float_info (struct sfm_reader *,
282 const struct sfm_extension_record *);
283 static void parse_extra_product_info (struct sfm_reader *,
284 const struct sfm_extension_record *,
285 struct sfm_read_info *);
286 static void parse_mrsets (struct sfm_reader *,
287 const struct sfm_extension_record *,
288 struct dictionary *);
289 static void parse_long_var_name_map (struct sfm_reader *,
290 const struct sfm_extension_record *,
291 struct dictionary *);
292 static void parse_long_string_map (struct sfm_reader *,
293 const struct sfm_extension_record *,
294 struct dictionary *);
295 static void parse_value_labels (struct sfm_reader *, struct dictionary *,
296 const struct sfm_var_record *,
298 const struct sfm_value_label_record *);
299 static void parse_data_file_attributes (struct sfm_reader *,
300 const struct sfm_extension_record *,
301 struct dictionary *);
302 static void parse_variable_attributes (struct sfm_reader *,
303 const struct sfm_extension_record *,
304 struct dictionary *);
305 static void assign_variable_roles (struct sfm_reader *, struct dictionary *);
306 static void parse_long_string_value_labels (struct sfm_reader *,
307 const struct sfm_extension_record *,
308 struct dictionary *);
310 /* Frees the strings inside INFO. */
312 sfm_read_info_destroy (struct sfm_read_info *info)
316 free (info->creation_date);
317 free (info->creation_time);
318 free (info->product);
319 free (info->product_ext);
323 /* Opens the system file designated by file handle FH for reading. Reads the
324 system file's dictionary into *DICT.
326 Ordinarily the reader attempts to automatically detect the character
327 encoding based on the file's contents. This isn't always possible,
328 especially for files written by old versions of SPSS or PSPP, so specifying
329 a nonnull ENCODING overrides the choice of character encoding.
331 If INFO is non-null, then it receives additional info about the system file,
332 which the caller must eventually free with sfm_read_info_destroy() when it
333 is no longer needed. */
335 sfm_open_reader (struct file_handle *fh, const char *volatile encoding,
336 struct dictionary **dictp, struct sfm_read_info *infop)
338 struct sfm_reader *volatile r = NULL;
339 struct sfm_read_info *volatile info;
341 struct sfm_header_record header;
343 struct sfm_var_record *vars;
344 size_t n_vars, allocated_vars;
346 struct sfm_value_label_record *labels;
347 size_t n_labels, allocated_labels;
349 struct sfm_document_record *document;
351 struct sfm_extension_record *extensions[32];
353 struct dictionary *volatile dict = NULL;
356 /* Create and initialize reader. */
357 r = pool_create_container (struct sfm_reader, pool);
363 r->opcode_idx = sizeof r->opcodes;
364 r->corruption_warning = false;
366 info = infop ? infop : xmalloc (sizeof *info);
367 memset (info, 0, sizeof *info);
369 /* TRANSLATORS: this fragment will be interpolated into
370 messages in fh_lock() that identify types of files. */
371 r->lock = fh_lock (fh, FH_REF_FILE, N_("system file"), FH_ACC_READ, false);
375 r->file = fn_open (fh_get_file_name (fh), "rb");
378 msg (ME, _("Error opening `%s' for reading as a system file: %s."),
379 fh_get_file_name (r->fh), strerror (errno));
383 if (setjmp (r->bail_out))
387 read_header (r, info, &header);
390 n_vars = allocated_vars = 0;
393 n_labels = allocated_labels = 0;
397 memset (extensions, 0, sizeof extensions);
407 read_int (r); /* Skip filler. */
414 if (n_vars >= allocated_vars)
415 vars = pool_2nrealloc (r->pool, vars, &allocated_vars,
417 read_variable_record (r, &vars[n_vars++]);
421 if (n_labels >= allocated_labels)
422 labels = pool_2nrealloc (r->pool, labels, &allocated_labels,
424 read_value_label_record (r, &labels[n_labels++], n_vars);
428 /* A Type 4 record is always immediately after a type 3 record,
429 so the code for type 3 records reads the type 4 record too. */
430 sys_error (r, r->pos, _("Misplaced type 4 record."));
433 if (document != NULL)
434 sys_error (r, r->pos, _("Duplicate type 6 (document) record."));
435 document = read_document_record (r);
439 subtype = read_int (r);
440 if (subtype < 0 || subtype >= sizeof extensions / sizeof *extensions)
443 _("Unrecognized record type 7, subtype %d. Please "
444 "send a copy of this file, and the syntax which "
445 "created it to %s."),
446 subtype, PACKAGE_BUGREPORT);
447 skip_extension_record (r, subtype);
449 else if (extensions[subtype] != NULL)
452 _("Record type 7, subtype %d found here has the same "
453 "type as the record found near offset 0x%llx. "
454 "Please send a copy of this file, and the syntax "
455 "which created it to %s."),
456 subtype, (long long int) extensions[subtype]->pos,
458 skip_extension_record (r, subtype);
461 extensions[subtype] = read_extension_record (r, subtype);
465 sys_error (r, r->pos, _("Unrecognized record type %d."), type);
470 /* Now actually parse what we read.
472 First, figure out the correct character encoding, because this determines
473 how the rest of the header data is to be interpreted. */
474 dict = dict_create (encoding
476 : choose_encoding (r, &header, extensions[EXT_INTEGER],
477 extensions[EXT_ENCODING]));
478 r->encoding = dict_get_encoding (dict);
480 /* These records don't use variables at all. */
481 if (document != NULL)
482 parse_document (dict, document);
484 if (extensions[EXT_INTEGER] != NULL)
485 parse_machine_integer_info (r, extensions[EXT_INTEGER], info);
487 if (extensions[EXT_FLOAT] != NULL)
488 parse_machine_float_info (r, extensions[EXT_FLOAT]);
490 if (extensions[EXT_PRODUCT_INFO] != NULL)
491 parse_extra_product_info (r, extensions[EXT_PRODUCT_INFO], info);
493 if (extensions[EXT_FILE_ATTRS] != NULL)
494 parse_data_file_attributes (r, extensions[EXT_FILE_ATTRS], dict);
496 parse_header (r, &header, info, dict);
498 /* Parse the variable records, the basis of almost everything else. */
499 parse_variable_records (r, dict, vars, n_vars);
501 /* Parse value labels and the weight variable immediately after the variable
502 records. These records use indexes into var_recs[], so we must parse them
503 before those indexes become invalidated by very long string variables. */
504 for (i = 0; i < n_labels; i++)
505 parse_value_labels (r, dict, vars, n_vars, &labels[i]);
506 if (header.weight_idx != 0)
508 struct variable *weight_var;
510 weight_var = lookup_var_by_index (r, 76, vars, n_vars,
512 if (var_is_numeric (weight_var))
513 dict_set_weight (dict, weight_var);
515 sys_error (r, -1, _("Weighting variable must be numeric "
516 "(not string variable `%s')."),
517 var_get_name (weight_var));
520 if (extensions[EXT_DISPLAY] != NULL)
521 parse_display_parameters (r, extensions[EXT_DISPLAY], dict);
523 /* The following records use short names, so they need to be parsed before
524 parse_long_var_name_map() changes short names to long names. */
525 if (extensions[EXT_MRSETS] != NULL)
526 parse_mrsets (r, extensions[EXT_MRSETS], dict);
528 if (extensions[EXT_MRSETS2] != NULL)
529 parse_mrsets (r, extensions[EXT_MRSETS2], dict);
531 if (extensions[EXT_LONG_STRINGS] != NULL)
532 parse_long_string_map (r, extensions[EXT_LONG_STRINGS], dict);
534 /* Now rename variables to their long names. */
535 parse_long_var_name_map (r, extensions[EXT_LONG_NAMES], dict);
537 /* The following records use long names, so they need to follow renaming. */
538 if (extensions[EXT_VAR_ATTRS] != NULL)
540 parse_variable_attributes (r, extensions[EXT_VAR_ATTRS], dict);
542 /* Roles use the $@Role attribute. */
543 assign_variable_roles (r, dict);
546 if (extensions[EXT_LONG_LABELS] != NULL)
547 parse_long_string_value_labels (r, extensions[EXT_LONG_LABELS], dict);
549 /* Warn if the actual amount of data per case differs from the
550 amount that the header claims. SPSS version 13 gets this
551 wrong when very long strings are involved, so don't warn in
553 if (header.nominal_case_size != -1 && header.nominal_case_size != n_vars
554 && info->version_major != 13)
555 sys_warn (r, -1, _("File header claims %d variable positions but "
556 "%zu were read from file."),
557 header.nominal_case_size, n_vars);
559 /* Create an index of dictionary variable widths for
560 sfm_read_case to use. We cannot use the `struct variable's
561 from the dictionary we created, because the caller owns the
562 dictionary and may destroy or modify its variables. */
563 sfm_dictionary_to_sfm_vars (dict, &r->sfm_vars, &r->sfm_var_cnt);
564 pool_register (r->pool, free, r->sfm_vars);
565 r->proto = caseproto_ref_pool (dict_get_proto (dict), r->pool);
570 sfm_read_info_destroy (info);
574 return casereader_create_sequential
576 r->case_cnt == -1 ? CASENUMBER_MAX: r->case_cnt,
577 &sys_file_casereader_class, r);
582 sfm_read_info_destroy (info);
592 /* Closes a system file after we're done with it.
593 Returns true if an I/O error has occurred on READER, false
596 close_reader (struct sfm_reader *r)
605 if (fn_close (fh_get_file_name (r->fh), r->file) == EOF)
607 msg (ME, _("Error closing system file `%s': %s."),
608 fh_get_file_name (r->fh), strerror (errno));
618 pool_destroy (r->pool);
623 /* Destroys READER. */
625 sys_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
627 struct sfm_reader *r = r_;
631 /* Returns true if FILE is an SPSS system file,
634 sfm_detect (FILE *file)
638 if (fread (magic, 4, 1, file) != 1)
642 return !strcmp (ASCII_MAGIC, magic) || !strcmp (EBCDIC_MAGIC, magic);
645 /* Reads the global header of the system file. Initializes *HEADER and *INFO,
646 except for the string fields in *INFO, which parse_header() will initialize
647 later once the file's encoding is known. */
649 read_header (struct sfm_reader *r, struct sfm_read_info *info,
650 struct sfm_header_record *header)
652 uint8_t raw_layout_code[4];
655 read_string (r, header->magic, sizeof header->magic);
656 read_string (r, header->eye_catcher, sizeof header->eye_catcher);
658 if (strcmp (ASCII_MAGIC, header->magic)
659 && strcmp (EBCDIC_MAGIC, header->magic))
660 sys_error (r, 0, _("This is not an SPSS system file."));
662 /* Identify integer format. */
663 read_bytes (r, raw_layout_code, sizeof raw_layout_code);
664 if ((!integer_identify (2, raw_layout_code, sizeof raw_layout_code,
666 && !integer_identify (3, raw_layout_code, sizeof raw_layout_code,
668 || (r->integer_format != INTEGER_MSB_FIRST
669 && r->integer_format != INTEGER_LSB_FIRST))
670 sys_error (r, 64, _("This is not an SPSS system file."));
672 header->nominal_case_size = read_int (r);
673 if (header->nominal_case_size < 0
674 || header->nominal_case_size > INT_MAX / 16)
675 header->nominal_case_size = -1;
677 r->compressed = read_int (r) != 0;
679 header->weight_idx = read_int (r);
681 r->case_cnt = read_int (r);
682 if ( r->case_cnt > INT_MAX / 2)
685 /* Identify floating-point format and obtain compression bias. */
686 read_bytes (r, raw_bias, sizeof raw_bias);
687 if (float_identify (100.0, raw_bias, sizeof raw_bias, &r->float_format) == 0)
689 uint8_t zero_bias[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
691 if (memcmp (raw_bias, zero_bias, 8))
692 sys_warn (r, r->pos - 8,
693 _("Compression bias is not the usual "
694 "value of 100, or system file uses unrecognized "
695 "floating-point format."));
698 /* Some software is known to write all-zeros to this
699 field. Such software also writes floating-point
700 numbers in the format that we expect by default
701 (it seems that all software most likely does, in
702 reality), so don't warn in this case. */
705 if (r->integer_format == INTEGER_MSB_FIRST)
706 r->float_format = FLOAT_IEEE_DOUBLE_BE;
708 r->float_format = FLOAT_IEEE_DOUBLE_LE;
710 float_convert (r->float_format, raw_bias, FLOAT_NATIVE_DOUBLE, &r->bias);
712 read_string (r, header->creation_date, sizeof header->creation_date);
713 read_string (r, header->creation_time, sizeof header->creation_time);
714 read_string (r, header->file_label, sizeof header->file_label);
717 info->integer_format = r->integer_format;
718 info->float_format = r->float_format;
719 info->compressed = r->compressed;
720 info->case_cnt = r->case_cnt;
723 /* Reads a variable (type 2) record from R into RECORD. */
725 read_variable_record (struct sfm_reader *r, struct sfm_var_record *record)
727 int has_variable_label;
729 memset (record, 0, sizeof *record);
731 record->pos = r->pos;
732 record->width = read_int (r);
733 has_variable_label = read_int (r);
734 record->missing_value_code = read_int (r);
735 record->print_format = read_int (r);
736 record->write_format = read_int (r);
737 read_bytes (r, record->name, sizeof record->name);
739 if (has_variable_label == 1)
741 enum { MAX_LABEL_LEN = 255 };
742 size_t len, read_len;
746 /* Read up to MAX_LABEL_LEN bytes of label. */
747 read_len = MIN (MAX_LABEL_LEN, len);
748 record->label = pool_malloc (r->pool, read_len + 1);
749 read_string (r, record->label, read_len + 1);
751 /* Skip unread label bytes. */
752 skip_bytes (r, len - read_len);
754 /* Skip label padding up to multiple of 4 bytes. */
755 skip_bytes (r, ROUND_UP (len, 4) - len);
757 else if (has_variable_label != 0)
758 sys_error (r, record->pos,
759 _("Variable label indicator field is not 0 or 1."));
761 /* Set missing values. */
762 if (record->missing_value_code != 0)
764 int code = record->missing_value_code;
765 if (record->width == 0)
767 if (code < -3 || code > 3 || code == -1)
768 sys_error (r, record->pos,
769 _("Numeric missing value indicator field is not "
770 "-3, -2, 0, 1, 2, or 3."));
774 if (code < 1 || code > 3)
775 sys_error (r, record->pos,
776 _("String missing value indicator field is not "
780 read_bytes (r, record->missing, 8 * abs (code));
784 /* Reads value labels from R into RECORD. */
786 read_value_label_record (struct sfm_reader *r,
787 struct sfm_value_label_record *record,
792 /* Read type 3 record. */
793 record->pos = r->pos;
794 record->n_labels = read_int (r);
795 if (record->n_labels > SIZE_MAX / sizeof *record->labels)
796 sys_error (r, r->pos - 4, _("Invalid number of labels %zu."),
798 record->labels = pool_nmalloc (r->pool, record->n_labels,
799 sizeof *record->labels);
800 for (i = 0; i < record->n_labels; i++)
802 struct sfm_value_label *label = &record->labels[i];
803 unsigned char label_len;
806 read_bytes (r, label->value, sizeof label->value);
808 /* Read label length. */
809 read_bytes (r, &label_len, sizeof label_len);
810 padded_len = ROUND_UP (label_len + 1, 8);
812 /* Read label, padding. */
813 label->label = pool_malloc (r->pool, padded_len + 1);
814 read_bytes (r, label->label, padded_len - 1);
815 label->label[label_len] = '\0';
818 /* Read record type of type 4 record. */
819 if (read_int (r) != 4)
820 sys_error (r, r->pos - 4,
821 _("Variable index record (type 4) does not immediately "
822 "follow value label record (type 3) as it should."));
824 /* Read number of variables associated with value label from type 4
826 record->n_vars = read_int (r);
827 if (record->n_vars < 1 || record->n_vars > n_vars)
828 sys_error (r, r->pos - 4,
829 _("Number of variables associated with a value label (%zu) "
830 "is not between 1 and the number of variables (%zu)."),
831 record->n_vars, n_vars);
832 record->vars = pool_nmalloc (r->pool, record->n_vars, sizeof *record->vars);
833 for (i = 0; i < record->n_vars; i++)
834 record->vars[i] = read_int (r);
837 /* Reads a document record from R and returns it. */
838 static struct sfm_document_record *
839 read_document_record (struct sfm_reader *r)
841 struct sfm_document_record *record;
844 record = pool_malloc (r->pool, sizeof *record);
845 record->pos = r->pos;
847 n_lines = read_int (r);
848 if (n_lines <= 0 || n_lines >= INT_MAX / DOC_LINE_LENGTH)
849 sys_error (r, record->pos,
850 _("Number of document lines (%d) "
851 "must be greater than 0 and less than %d."),
852 n_lines, INT_MAX / DOC_LINE_LENGTH);
854 record->n_lines = n_lines;
855 record->documents = pool_malloc (r->pool, DOC_LINE_LENGTH * n_lines);
856 read_bytes (r, record->documents, DOC_LINE_LENGTH * n_lines);
862 read_extension_record_header (struct sfm_reader *r, int subtype,
863 struct sfm_extension_record *record)
865 record->pos = r->pos;
866 record->size = read_int (r);
867 record->count = read_int (r);
869 /* Check that SIZE * COUNT + 1 doesn't overflow. Adding 1
870 allows an extra byte for a null terminator, used by some
871 extension processing routines. */
872 if (record->size != 0
873 && size_overflow_p (xsum (1, xtimes (record->count, record->size))))
874 sys_error (r, record->pos, "Record type 7 subtype %d too large.", subtype);
877 /* Reads an extension record from R into RECORD. */
878 static struct sfm_extension_record *
879 read_extension_record (struct sfm_reader *r, int subtype)
881 struct extension_record_type
888 static const struct extension_record_type types[] =
890 /* Implemented record types. */
891 { EXT_INTEGER, 4, 8 },
893 { EXT_MRSETS, 1, 0 },
894 { EXT_PRODUCT_INFO, 1, 0 },
895 { EXT_DISPLAY, 4, 0 },
896 { EXT_LONG_NAMES, 1, 0 },
897 { EXT_LONG_STRINGS, 1, 0 },
898 { EXT_NCASES, 8, 2 },
899 { EXT_FILE_ATTRS, 1, 0 },
900 { EXT_VAR_ATTRS, 1, 0 },
901 { EXT_MRSETS2, 1, 0 },
902 { EXT_ENCODING, 1, 0 },
903 { EXT_LONG_LABELS, 1, 0 },
905 /* Ignored record types. */
906 { EXT_VAR_SETS, 0, 0 },
908 { EXT_DATA_ENTRY, 0, 0 },
909 { EXT_DATAVIEW, 0, 0 },
912 const struct extension_record_type *type;
913 struct sfm_extension_record *record;
916 record = pool_malloc (r->pool, sizeof *record);
917 read_extension_record_header (r, subtype, record);
918 n_bytes = record->count * record->size;
920 for (type = types; type < &types[sizeof types / sizeof *types]; type++)
921 if (subtype == type->subtype)
923 if (type->size > 0 && record->size != type->size)
924 sys_warn (r, record->pos,
925 _("Record type 7, subtype %d has bad size %zu "
926 "(expected %d)."), subtype, record->size, type->size);
927 else if (type->count > 0 && record->count != type->count)
928 sys_warn (r, record->pos,
929 _("Record type 7, subtype %d has bad count %zu "
930 "(expected %d)."), subtype, record->count, type->count);
931 else if (type->count == 0 && type->size == 0)
933 /* Ignore this record. */
937 char *data = pool_malloc (r->pool, n_bytes + 1);
938 data[n_bytes] = '\0';
941 read_bytes (r, record->data, n_bytes);
948 sys_warn (r, record->pos,
949 _("Unrecognized record type 7, subtype %d. Please send a "
950 "copy of this file, and the syntax which created it to %s."),
951 subtype, PACKAGE_BUGREPORT);
954 skip_bytes (r, n_bytes);
959 skip_extension_record (struct sfm_reader *r, int subtype)
961 struct sfm_extension_record record;
963 read_extension_record_header (r, subtype, &record);
964 skip_bytes (r, record.count * record.size);
968 parse_header (struct sfm_reader *r, const struct sfm_header_record *header,
969 struct sfm_read_info *info, struct dictionary *dict)
971 const char *dict_encoding = dict_get_encoding (dict);
972 struct substring product;
973 struct substring label;
976 /* Convert file label to UTF-8 and put it into DICT. */
977 label = recode_substring_pool ("UTF-8", dict_encoding,
978 ss_cstr (header->file_label), r->pool);
979 ss_trim (&label, ss_cstr (" "));
980 label.string[label.length] = '\0';
981 fixed_label = fix_line_ends (label.string);
982 dict_set_label (dict, fixed_label);
985 /* Put creation date and time in UTF-8 into INFO. */
986 info->creation_date = recode_string ("UTF-8", dict_encoding,
987 header->creation_date, -1);
988 info->creation_time = recode_string ("UTF-8", dict_encoding,
989 header->creation_time, -1);
991 /* Put product name into INFO, dropping eye-catcher string if present. */
992 product = recode_substring_pool ("UTF-8", dict_encoding,
993 ss_cstr (header->eye_catcher), r->pool);
994 ss_match_string (&product, ss_cstr ("@(#) SPSS DATA FILE"));
995 ss_trim (&product, ss_cstr (" "));
996 info->product = ss_xstrdup (product);
999 /* Reads a variable (type 2) record from R and adds the
1000 corresponding variable to DICT.
1001 Also skips past additional variable records for long string
1004 parse_variable_records (struct sfm_reader *r, struct dictionary *dict,
1005 struct sfm_var_record *var_recs, size_t n_var_recs)
1007 const char *dict_encoding = dict_get_encoding (dict);
1008 struct sfm_var_record *rec;
1011 for (rec = var_recs; rec < &var_recs[n_var_recs]; )
1013 struct variable *var;
1018 name = recode_string_pool ("UTF-8", dict_encoding,
1019 rec->name, 8, r->pool);
1020 name[strcspn (name, " ")] = '\0';
1022 if (!dict_id_is_valid (dict, name, false)
1023 || name[0] == '$' || name[0] == '#')
1024 sys_error (r, rec->pos, _("Invalid variable name `%s'."), name);
1026 if (rec->width < 0 || rec->width > 255)
1027 sys_error (r, rec->pos,
1028 _("Bad width %d for variable %s."), rec->width, name);
1030 var = rec->var = dict_create_var (dict, name, rec->width);
1032 sys_error (r, rec->pos, _("Duplicate variable name `%s'."), name);
1034 /* Set the short name the same as the long name. */
1035 var_set_short_name (var, 0, name);
1037 /* Get variable label, if any. */
1042 utf8_label = recode_string_pool ("UTF-8", dict_encoding,
1043 rec->label, -1, r->pool);
1044 var_set_label (var, utf8_label, false);
1047 /* Set missing values. */
1048 if (rec->missing_value_code != 0)
1050 int width = var_get_width (var);
1051 struct missing_values mv;
1053 mv_init_pool (r->pool, &mv, width);
1054 if (var_is_numeric (var))
1056 bool has_range = rec->missing_value_code < 0;
1057 int n_discrete = (has_range
1058 ? rec->missing_value_code == -3
1059 : rec->missing_value_code);
1064 double low = parse_float (r, rec->missing, 0);
1065 double high = parse_float (r, rec->missing, 8);
1067 /* Deal with SPSS 21 change in representation. */
1071 mv_add_range (&mv, low, high);
1075 for (i = 0; i < n_discrete; i++)
1077 mv_add_num (&mv, parse_float (r, rec->missing, ofs));
1085 value_init_pool (r->pool, &value, width);
1086 value_set_missing (&value, width);
1087 for (i = 0; i < rec->missing_value_code; i++)
1088 mv_add_str (&mv, rec->missing + 8 * i, MIN (width, 8));
1090 var_set_missing_values (var, &mv);
1094 parse_format_spec (r, rec->pos + 12, rec->print_format,
1095 PRINT_FORMAT, var, &n_warnings);
1096 parse_format_spec (r, rec->pos + 16, rec->write_format,
1097 WRITE_FORMAT, var, &n_warnings);
1099 /* Account for values.
1100 Skip long string continuation records, if any. */
1101 n_values = rec->width == 0 ? 1 : DIV_RND_UP (rec->width, 8);
1102 for (i = 1; i < n_values; i++)
1103 if (i + (rec - var_recs) >= n_var_recs || rec[i].width != -1)
1104 sys_error (r, rec->pos, _("Missing string continuation record."));
1109 /* Translates the format spec from sysfile format to internal
1112 parse_format_spec (struct sfm_reader *r, off_t pos, unsigned int format,
1113 enum which_format which, struct variable *v,
1116 const int max_warnings = 8;
1117 uint8_t raw_type = format >> 16;
1118 uint8_t w = format >> 8;
1127 ok = (fmt_from_io (raw_type, &f.type)
1128 && fmt_check_output (&f)
1129 && fmt_check_width_compat (&f, var_get_width (v)));
1134 if (which == PRINT_FORMAT)
1135 var_set_print_format (v, &f);
1137 var_set_write_format (v, &f);
1139 else if (format == 0)
1141 /* Actually observed in the wild. No point in warning about it. */
1143 else if (++*n_warnings <= max_warnings)
1145 if (which == PRINT_FORMAT)
1146 sys_warn (r, pos, _("Variable %s with width %d has invalid print "
1148 var_get_name (v), var_get_width (v), format);
1150 sys_warn (r, pos, _("Variable %s with width %d has invalid write "
1152 var_get_name (v), var_get_width (v), format);
1154 if (*n_warnings == max_warnings)
1155 sys_warn (r, -1, _("Suppressing further invalid format warnings."));
1160 parse_document (struct dictionary *dict, struct sfm_document_record *record)
1164 for (p = record->documents;
1165 p < record->documents + DOC_LINE_LENGTH * record->n_lines;
1166 p += DOC_LINE_LENGTH)
1168 struct substring line;
1170 line = recode_substring_pool ("UTF-8", dict_get_encoding (dict),
1171 ss_buffer (p, DOC_LINE_LENGTH), NULL);
1172 ss_rtrim (&line, ss_cstr (" "));
1173 line.string[line.length] = '\0';
1175 dict_add_document_line (dict, line.string, false);
1181 /* Parses record type 7, subtype 3. */
1183 parse_machine_integer_info (struct sfm_reader *r,
1184 const struct sfm_extension_record *record,
1185 struct sfm_read_info *info)
1187 int float_representation, expected_float_format;
1188 int integer_representation, expected_integer_format;
1190 /* Save version info. */
1191 info->version_major = parse_int (r, record->data, 0);
1192 info->version_minor = parse_int (r, record->data, 4);
1193 info->version_revision = parse_int (r, record->data, 8);
1195 /* Check floating point format. */
1196 float_representation = parse_int (r, record->data, 16);
1197 if (r->float_format == FLOAT_IEEE_DOUBLE_BE
1198 || r->float_format == FLOAT_IEEE_DOUBLE_LE)
1199 expected_float_format = 1;
1200 else if (r->float_format == FLOAT_Z_LONG)
1201 expected_float_format = 2;
1202 else if (r->float_format == FLOAT_VAX_G || r->float_format == FLOAT_VAX_D)
1203 expected_float_format = 3;
1206 if (float_representation != expected_float_format)
1207 sys_error (r, record->pos, _("Floating-point representation indicated by "
1208 "system file (%d) differs from expected (%d)."),
1209 float_representation, expected_float_format);
1211 /* Check integer format. */
1212 integer_representation = parse_int (r, record->data, 24);
1213 if (r->integer_format == INTEGER_MSB_FIRST)
1214 expected_integer_format = 1;
1215 else if (r->integer_format == INTEGER_LSB_FIRST)
1216 expected_integer_format = 2;
1219 if (integer_representation != expected_integer_format)
1220 sys_warn (r, record->pos,
1221 _("Integer format indicated by system file (%d) "
1222 "differs from expected (%d)."),
1223 integer_representation, expected_integer_format);
1228 choose_encoding (struct sfm_reader *r,
1229 const struct sfm_header_record *header,
1230 const struct sfm_extension_record *ext_integer,
1231 const struct sfm_extension_record *ext_encoding)
1233 /* The EXT_ENCODING record is a more reliable way to determine dictionary
1236 return ext_encoding->data;
1238 /* But EXT_INTEGER is better than nothing as a fallback. */
1241 int codepage = parse_int (r, ext_integer->data, 7 * 4);
1242 const char *encoding;
1251 /* These ostensibly mean "7-bit ASCII" and "8-bit ASCII"[sic]
1252 respectively. However, there are known to be many files in the wild
1253 with character code 2, yet have data which are clearly not ASCII.
1254 Therefore we ignore these values. */
1261 encoding = sys_get_encoding_from_codepage (codepage);
1262 if (encoding != NULL)
1268 /* If the file magic number is EBCDIC then its character data is too. */
1269 if (!strcmp (header->magic, EBCDIC_MAGIC))
1272 return locale_charset ();
1275 /* Parses record type 7, subtype 4. */
1277 parse_machine_float_info (struct sfm_reader *r,
1278 const struct sfm_extension_record *record)
1280 double sysmis = parse_float (r, record->data, 0);
1281 double highest = parse_float (r, record->data, 8);
1282 double lowest = parse_float (r, record->data, 16);
1284 if (sysmis != SYSMIS)
1285 sys_warn (r, record->pos,
1286 _("File specifies unexpected value %g (%a) as %s, "
1287 "instead of %g (%a)."),
1288 sysmis, sysmis, "SYSMIS", SYSMIS, SYSMIS);
1290 if (highest != HIGHEST)
1291 sys_warn (r, record->pos,
1292 _("File specifies unexpected value %g (%a) as %s, "
1293 "instead of %g (%a)."),
1294 highest, highest, "HIGHEST", HIGHEST, HIGHEST);
1296 /* SPSS before version 21 used a unique value just bigger than SYSMIS as
1297 LOWEST. SPSS 21 uses SYSMIS for LOWEST, which is OK because LOWEST only
1298 appears in a context (missing values) where SYSMIS cannot. */
1299 if (lowest != LOWEST && lowest != SYSMIS)
1300 sys_warn (r, record->pos,
1301 _("File specifies unexpected value %g (%a) as %s, "
1302 "instead of %g (%a) or %g (%a)."),
1303 lowest, lowest, "LOWEST", LOWEST, LOWEST, SYSMIS, SYSMIS);
1306 /* Parses record type 7, subtype 10. */
1308 parse_extra_product_info (struct sfm_reader *r,
1309 const struct sfm_extension_record *record,
1310 struct sfm_read_info *info)
1312 struct text_record *text;
1314 text = open_text_record (r, record, true);
1315 info->product_ext = fix_line_ends (text_get_all (text));
1316 close_text_record (r, text);
1319 /* Parses record type 7, subtype 7 or 19. */
1321 parse_mrsets (struct sfm_reader *r, const struct sfm_extension_record *record,
1322 struct dictionary *dict)
1324 struct text_record *text;
1325 struct mrset *mrset;
1327 text = open_text_record (r, record, false);
1330 const char *counted = NULL;
1333 struct stringi_set var_names;
1334 size_t allocated_vars;
1338 mrset = xzalloc (sizeof *mrset);
1340 name = text_get_token (text, ss_cstr ("="), NULL);
1343 mrset->name = recode_string ("UTF-8", r->encoding, name, -1);
1345 if (mrset->name[0] != '$')
1347 sys_warn (r, record->pos,
1348 _("`%s' does not begin with `$' at offset %zu "
1349 "in MRSETS record."), mrset->name, text_pos (text));
1353 if (text_match (text, 'C'))
1355 mrset->type = MRSET_MC;
1356 if (!text_match (text, ' '))
1358 sys_warn (r, record->pos,
1359 _("Missing space following `%c' at offset %zu "
1360 "in MRSETS record."), 'C', text_pos (text));
1364 else if (text_match (text, 'D'))
1366 mrset->type = MRSET_MD;
1367 mrset->cat_source = MRSET_VARLABELS;
1369 else if (text_match (text, 'E'))
1373 mrset->type = MRSET_MD;
1374 mrset->cat_source = MRSET_COUNTEDVALUES;
1375 if (!text_match (text, ' '))
1377 sys_warn (r, record->pos,
1378 _("Missing space following `%c' at offset %zu "
1379 "in MRSETS record."), 'E', text_pos (text));
1383 number = text_get_token (text, ss_cstr (" "), NULL);
1384 if (!strcmp (number, "11"))
1385 mrset->label_from_var_label = true;
1386 else if (strcmp (number, "1"))
1387 sys_warn (r, record->pos,
1388 _("Unexpected label source value `%s' following `E' "
1389 "at offset %zu in MRSETS record."),
1390 number, text_pos (text));
1394 sys_warn (r, record->pos,
1395 _("Missing `C', `D', or `E' at offset %zu "
1396 "in MRSETS record."),
1401 if (mrset->type == MRSET_MD)
1403 counted = text_parse_counted_string (r, text);
1404 if (counted == NULL)
1408 label = text_parse_counted_string (r, text);
1411 if (label[0] != '\0')
1412 mrset->label = recode_string ("UTF-8", r->encoding, label, -1);
1414 stringi_set_init (&var_names);
1419 const char *raw_var_name;
1420 struct variable *var;
1423 raw_var_name = text_get_token (text, ss_cstr (" \n"), &delimiter);
1424 if (raw_var_name == NULL)
1426 sys_warn (r, record->pos,
1427 _("Missing new-line parsing variable names "
1428 "at offset %zu in MRSETS record."),
1432 var_name = recode_string ("UTF-8", r->encoding, raw_var_name, -1);
1434 var = dict_lookup_var (dict, var_name);
1440 if (!stringi_set_insert (&var_names, var_name))
1442 sys_warn (r, record->pos,
1443 _("Duplicate variable name %s "
1444 "at offset %zu in MRSETS record."),
1445 var_name, text_pos (text));
1451 if (mrset->label == NULL && mrset->label_from_var_label
1452 && var_has_label (var))
1453 mrset->label = xstrdup (var_get_label (var));
1456 && var_get_type (var) != var_get_type (mrset->vars[0]))
1458 sys_warn (r, record->pos,
1459 _("MRSET %s contains both string and "
1460 "numeric variables."), name);
1463 width = MIN (width, var_get_width (var));
1465 if (mrset->n_vars >= allocated_vars)
1466 mrset->vars = x2nrealloc (mrset->vars, &allocated_vars,
1467 sizeof *mrset->vars);
1468 mrset->vars[mrset->n_vars++] = var;
1470 while (delimiter != '\n');
1472 if (mrset->n_vars < 2)
1474 sys_warn (r, record->pos,
1475 _("MRSET %s has only %zu variables."), mrset->name,
1477 mrset_destroy (mrset);
1478 stringi_set_destroy (&var_names);
1482 if (mrset->type == MRSET_MD)
1484 mrset->width = width;
1485 value_init (&mrset->counted, width);
1487 mrset->counted.f = c_strtod (counted, NULL);
1489 value_copy_str_rpad (&mrset->counted, width,
1490 (const uint8_t *) counted, ' ');
1493 dict_add_mrset (dict, mrset);
1495 stringi_set_destroy (&var_names);
1497 mrset_destroy (mrset);
1498 close_text_record (r, text);
1501 /* Read record type 7, subtype 11, which specifies how variables
1502 should be displayed in GUI environments. */
1504 parse_display_parameters (struct sfm_reader *r,
1505 const struct sfm_extension_record *record,
1506 struct dictionary *dict)
1508 bool includes_width;
1509 bool warned = false;
1514 n_vars = dict_get_var_cnt (dict);
1515 if (record->count == 3 * n_vars)
1516 includes_width = true;
1517 else if (record->count == 2 * n_vars)
1518 includes_width = false;
1521 sys_warn (r, record->pos,
1522 _("Extension 11 has bad count %zu (for %zu variables)."),
1523 record->count, n_vars);
1528 for (i = 0; i < n_vars; ++i)
1530 struct variable *v = dict_get_var (dict, i);
1531 int measure, width, align;
1533 measure = parse_int (r, record->data, ofs);
1538 width = parse_int (r, record->data, ofs);
1544 align = parse_int (r, record->data, ofs);
1547 /* SPSS sometimes seems to set variables' measure to zero. */
1551 if (measure < 1 || measure > 3 || align < 0 || align > 2)
1554 sys_warn (r, record->pos,
1555 _("Invalid variable display parameters for variable "
1556 "%zu (%s). Default parameters substituted."),
1557 i, var_get_name (v));
1562 var_set_measure (v, (measure == 1 ? MEASURE_NOMINAL
1563 : measure == 2 ? MEASURE_ORDINAL
1565 var_set_alignment (v, (align == 0 ? ALIGN_LEFT
1566 : align == 1 ? ALIGN_RIGHT
1569 /* Older versions (SPSS 9.0) sometimes set the display
1570 width to zero. This causes confusion in the GUI, so
1571 only set the width if it is nonzero. */
1573 var_set_display_width (v, width);
1578 rename_var_and_save_short_names (struct dictionary *dict, struct variable *var,
1579 const char *new_name)
1581 size_t n_short_names;
1585 /* Renaming a variable may clear its short names, but we
1586 want to retain them, so we save them and re-set them
1588 n_short_names = var_get_short_name_cnt (var);
1589 short_names = xnmalloc (n_short_names, sizeof *short_names);
1590 for (i = 0; i < n_short_names; i++)
1592 const char *s = var_get_short_name (var, i);
1593 short_names[i] = s != NULL ? xstrdup (s) : NULL;
1596 /* Set long name. */
1597 dict_rename_var (dict, var, new_name);
1599 /* Restore short names. */
1600 for (i = 0; i < n_short_names; i++)
1602 var_set_short_name (var, i, short_names[i]);
1603 free (short_names[i]);
1608 /* Parses record type 7, subtype 13, which gives the long name that corresponds
1609 to each short name. Modifies variable names in DICT accordingly. */
1611 parse_long_var_name_map (struct sfm_reader *r,
1612 const struct sfm_extension_record *record,
1613 struct dictionary *dict)
1615 struct text_record *text;
1616 struct variable *var;
1621 /* There are no long variable names. Use the short variable names,
1622 converted to lowercase, as the long variable names. */
1625 for (i = 0; i < dict_get_var_cnt (dict); i++)
1627 struct variable *var = dict_get_var (dict, i);
1630 new_name = utf8_to_lower (var_get_name (var));
1631 rename_var_and_save_short_names (dict, var, new_name);
1638 /* Rename each of the variables, one by one. (In a correctly constructed
1639 system file, this cannot create any intermediate duplicate variable names,
1640 because all of the new variable names are longer than any of the old
1641 variable names and thus there cannot be any overlaps.) */
1642 text = open_text_record (r, record, true);
1643 while (read_variable_to_value_pair (r, dict, text, &var, &long_name))
1645 /* Validate long name. */
1646 if (!dict_id_is_valid (dict, long_name, false))
1648 sys_warn (r, record->pos,
1649 _("Long variable mapping from %s to invalid "
1650 "variable name `%s'."),
1651 var_get_name (var), long_name);
1655 /* Identify any duplicates. */
1656 if (utf8_strcasecmp (var_get_short_name (var, 0), long_name)
1657 && dict_lookup_var (dict, long_name) != NULL)
1659 sys_warn (r, record->pos,
1660 _("Duplicate long variable name `%s'."), long_name);
1664 rename_var_and_save_short_names (dict, var, long_name);
1666 close_text_record (r, text);
1669 /* Reads record type 7, subtype 14, which gives the real length
1670 of each very long string. Rearranges DICT accordingly. */
1672 parse_long_string_map (struct sfm_reader *r,
1673 const struct sfm_extension_record *record,
1674 struct dictionary *dict)
1676 struct text_record *text;
1677 struct variable *var;
1680 text = open_text_record (r, record, true);
1681 while (read_variable_to_value_pair (r, dict, text, &var, &length_s))
1683 size_t idx = var_get_dict_index (var);
1689 length = strtol (length_s, NULL, 10);
1690 if (length < 1 || length > MAX_STRING)
1692 sys_warn (r, record->pos,
1693 _("%s listed as string of invalid length %s "
1694 "in very long string record."),
1695 var_get_name (var), length_s);
1699 /* Check segments. */
1700 segment_cnt = sfm_width_to_segments (length);
1701 if (segment_cnt == 1)
1703 sys_warn (r, record->pos,
1704 _("%s listed in very long string record with width %s, "
1705 "which requires only one segment."),
1706 var_get_name (var), length_s);
1709 if (idx + segment_cnt > dict_get_var_cnt (dict))
1710 sys_error (r, record->pos,
1711 _("Very long string %s overflows dictionary."),
1712 var_get_name (var));
1714 /* Get the short names from the segments and check their
1716 for (i = 0; i < segment_cnt; i++)
1718 struct variable *seg = dict_get_var (dict, idx + i);
1719 int alloc_width = sfm_segment_alloc_width (length, i);
1720 int width = var_get_width (seg);
1723 var_set_short_name (var, i, var_get_short_name (seg, 0));
1724 if (ROUND_UP (width, 8) != ROUND_UP (alloc_width, 8))
1725 sys_error (r, record->pos,
1726 _("Very long string with width %ld has segment %d "
1727 "of width %d (expected %d)."),
1728 length, i, width, alloc_width);
1730 dict_delete_consecutive_vars (dict, idx + 1, segment_cnt - 1);
1731 var_set_width (var, length);
1733 close_text_record (r, text);
1734 dict_compact_values (dict);
1738 parse_value_labels (struct sfm_reader *r, struct dictionary *dict,
1739 const struct sfm_var_record *var_recs, size_t n_var_recs,
1740 const struct sfm_value_label_record *record)
1742 struct variable **vars;
1746 utf8_labels = pool_nmalloc (r->pool, record->n_labels, sizeof *utf8_labels);
1747 for (i = 0; i < record->n_labels; i++)
1748 utf8_labels[i] = recode_string_pool ("UTF-8", dict_get_encoding (dict),
1749 record->labels[i].label, -1,
1752 vars = pool_nmalloc (r->pool, record->n_vars, sizeof *vars);
1753 for (i = 0; i < record->n_vars; i++)
1754 vars[i] = lookup_var_by_index (r, record->pos,
1755 var_recs, n_var_recs, record->vars[i]);
1757 for (i = 1; i < record->n_vars; i++)
1758 if (var_get_type (vars[i]) != var_get_type (vars[0]))
1759 sys_error (r, record->pos,
1760 _("Variables associated with value label are not all of "
1761 "identical type. Variable %s is %s, but variable "
1763 var_get_name (vars[0]),
1764 var_is_numeric (vars[0]) ? _("numeric") : _("string"),
1765 var_get_name (vars[i]),
1766 var_is_numeric (vars[i]) ? _("numeric") : _("string"));
1768 for (i = 0; i < record->n_vars; i++)
1770 struct variable *var = vars[i];
1774 width = var_get_width (var);
1776 sys_error (r, record->pos,
1777 _("Value labels may not be added to long string "
1778 "variables (e.g. %s) using records types 3 and 4."),
1779 var_get_name (var));
1781 for (j = 0; j < record->n_labels; j++)
1783 struct sfm_value_label *label = &record->labels[j];
1786 value_init (&value, width);
1788 value.f = parse_float (r, label->value, 0);
1790 memcpy (value_str_rw (&value, width), label->value, width);
1792 if (!var_add_value_label (var, &value, utf8_labels[j]))
1794 if (var_is_numeric (var))
1795 sys_warn (r, record->pos,
1796 _("Duplicate value label for %g on %s."),
1797 value.f, var_get_name (var));
1799 sys_warn (r, record->pos,
1800 _("Duplicate value label for `%.*s' on %s."),
1801 width, value_str (&value, width),
1802 var_get_name (var));
1805 value_destroy (&value, width);
1809 pool_free (r->pool, vars);
1810 for (i = 0; i < record->n_labels; i++)
1811 pool_free (r->pool, utf8_labels[i]);
1812 pool_free (r->pool, utf8_labels);
1815 static struct variable *
1816 lookup_var_by_index (struct sfm_reader *r, off_t offset,
1817 const struct sfm_var_record *var_recs, size_t n_var_recs,
1820 const struct sfm_var_record *rec;
1822 if (idx < 1 || idx > n_var_recs)
1824 sys_error (r, offset,
1825 _("Variable index %d not in valid range 1...%zu."),
1830 rec = &var_recs[idx - 1];
1831 if (rec->var == NULL)
1833 sys_error (r, offset,
1834 _("Variable index %d refers to long string continuation."),
1842 /* Parses a set of custom attributes from TEXT into ATTRS.
1843 ATTRS may be a null pointer, in which case the attributes are
1844 read but discarded. */
1846 parse_attributes (struct sfm_reader *r, struct text_record *text,
1847 struct attrset *attrs)
1851 struct attribute *attr;
1855 /* Parse the key. */
1856 key = text_get_token (text, ss_cstr ("("), NULL);
1860 attr = attribute_create (key);
1861 for (index = 1; ; index++)
1863 /* Parse the value. */
1867 value = text_get_token (text, ss_cstr ("\n"), NULL);
1870 text_warn (r, text, _("Error parsing attribute value %s[%d]."),
1875 length = strlen (value);
1876 if (length >= 2 && value[0] == '\'' && value[length - 1] == '\'')
1878 value[length - 1] = '\0';
1879 attribute_add_value (attr, value + 1);
1884 _("Attribute value %s[%d] is not quoted: %s."),
1886 attribute_add_value (attr, value);
1889 /* Was this the last value for this attribute? */
1890 if (text_match (text, ')'))
1894 attrset_add (attrs, attr);
1896 attribute_destroy (attr);
1898 while (!text_match (text, '/'));
1901 /* Reads record type 7, subtype 17, which lists custom
1902 attributes on the data file. */
1904 parse_data_file_attributes (struct sfm_reader *r,
1905 const struct sfm_extension_record *record,
1906 struct dictionary *dict)
1908 struct text_record *text = open_text_record (r, record, true);
1909 parse_attributes (r, text, dict_get_attributes (dict));
1910 close_text_record (r, text);
1913 /* Parses record type 7, subtype 18, which lists custom
1914 attributes on individual variables. */
1916 parse_variable_attributes (struct sfm_reader *r,
1917 const struct sfm_extension_record *record,
1918 struct dictionary *dict)
1920 struct text_record *text;
1921 struct variable *var;
1923 text = open_text_record (r, record, true);
1924 while (text_read_variable_name (r, dict, text, ss_cstr (":"), &var))
1925 parse_attributes (r, text, var != NULL ? var_get_attributes (var) : NULL);
1926 close_text_record (r, text);
1930 assign_variable_roles (struct sfm_reader *r, struct dictionary *dict)
1932 size_t n_warnings = 0;
1935 for (i = 0; i < dict_get_var_cnt (dict); i++)
1937 struct variable *var = dict_get_var (dict, i);
1938 struct attrset *attrs = var_get_attributes (var);
1939 const struct attribute *attr = attrset_lookup (attrs, "$@Role");
1942 int value = atoi (attribute_get_value (attr, 0));
1964 role = ROLE_PARTITION;
1973 if (n_warnings++ == 0)
1974 sys_warn (r, -1, _("Invalid role for variable %s."),
1975 var_get_name (var));
1978 var_set_role (var, role);
1983 sys_warn (r, -1, _("%zu other variables had invalid roles."),
1988 check_overflow (struct sfm_reader *r,
1989 const struct sfm_extension_record *record,
1990 size_t ofs, size_t length)
1992 size_t end = record->size * record->count;
1993 if (length >= end || ofs + length > end)
1994 sys_error (r, record->pos + end,
1995 _("Long string value label record ends unexpectedly."));
1999 parse_long_string_value_labels (struct sfm_reader *r,
2000 const struct sfm_extension_record *record,
2001 struct dictionary *dict)
2003 const char *dict_encoding = dict_get_encoding (dict);
2004 size_t end = record->size * record->count;
2011 struct variable *var;
2016 /* Parse variable name length. */
2017 check_overflow (r, record, ofs, 4);
2018 var_name_len = parse_int (r, record->data, ofs);
2021 /* Parse variable name, width, and number of labels. */
2022 check_overflow (r, record, ofs, var_name_len + 8);
2023 var_name = recode_string_pool ("UTF-8", dict_encoding,
2024 (const char *) record->data + ofs,
2025 var_name_len, r->pool);
2026 width = parse_int (r, record->data, ofs + var_name_len);
2027 n_labels = parse_int (r, record->data, ofs + var_name_len + 4);
2028 ofs += var_name_len + 8;
2030 /* Look up 'var' and validate. */
2031 var = dict_lookup_var (dict, var_name);
2033 sys_warn (r, record->pos + ofs,
2034 _("Ignoring long string value record for "
2035 "unknown variable %s."), var_name);
2036 else if (var_is_numeric (var))
2038 sys_warn (r, record->pos + ofs,
2039 _("Ignoring long string value record for "
2040 "numeric variable %s."), var_name);
2043 else if (width != var_get_width (var))
2045 sys_warn (r, record->pos + ofs,
2046 _("Ignoring long string value record for variable %s "
2047 "because the record's width (%d) does not match the "
2048 "variable's width (%d)."),
2049 var_name, width, var_get_width (var));
2054 value_init_pool (r->pool, &value, width);
2055 for (i = 0; i < n_labels; i++)
2057 size_t value_length, label_length;
2058 bool skip = var == NULL;
2060 /* Parse value length. */
2061 check_overflow (r, record, ofs, 4);
2062 value_length = parse_int (r, record->data, ofs);
2066 check_overflow (r, record, ofs, value_length);
2069 if (value_length == width)
2070 memcpy (value_str_rw (&value, width),
2071 (const uint8_t *) record->data + ofs, width);
2074 sys_warn (r, record->pos + ofs,
2075 _("Ignoring long string value %zu for variable "
2076 "%s, with width %d, that has bad value "
2078 i, var_get_name (var), width, value_length);
2082 ofs += value_length;
2084 /* Parse label length. */
2085 check_overflow (r, record, ofs, 4);
2086 label_length = parse_int (r, record->data, ofs);
2090 check_overflow (r, record, ofs, label_length);
2095 label = recode_string_pool ("UTF-8", dict_encoding,
2096 (const char *) record->data + ofs,
2097 label_length, r->pool);
2098 if (!var_add_value_label (var, &value, label))
2099 sys_warn (r, record->pos + ofs,
2100 _("Duplicate value label for `%.*s' on %s."),
2101 width, value_str (&value, width),
2102 var_get_name (var));
2103 pool_free (r->pool, label);
2105 ofs += label_length;
2112 static void partial_record (struct sfm_reader *r)
2115 static void read_error (struct casereader *, const struct sfm_reader *);
2117 static bool read_case_number (struct sfm_reader *, double *);
2118 static bool read_case_string (struct sfm_reader *, uint8_t *, size_t);
2119 static int read_opcode (struct sfm_reader *);
2120 static bool read_compressed_number (struct sfm_reader *, double *);
2121 static bool read_compressed_string (struct sfm_reader *, uint8_t *);
2122 static bool read_whole_strings (struct sfm_reader *, uint8_t *, size_t);
2123 static bool skip_whole_strings (struct sfm_reader *, size_t);
2125 /* Reads and returns one case from READER's file. Returns a null
2126 pointer if not successful. */
2127 static struct ccase *
2128 sys_file_casereader_read (struct casereader *reader, void *r_)
2130 struct sfm_reader *r = r_;
2131 struct ccase *volatile c;
2137 c = case_create (r->proto);
2138 if (setjmp (r->bail_out))
2140 casereader_force_error (reader);
2145 for (i = 0; i < r->sfm_var_cnt; i++)
2147 struct sfm_var *sv = &r->sfm_vars[i];
2148 union value *v = case_data_rw_idx (c, sv->case_index);
2150 if (sv->var_width == 0)
2152 if (!read_case_number (r, &v->f))
2157 uint8_t *s = value_str_rw (v, sv->var_width);
2158 if (!read_case_string (r, s + sv->offset, sv->segment_width))
2160 if (!skip_whole_strings (r, ROUND_DOWN (sv->padding, 8)))
2169 if (r->case_cnt != -1)
2170 read_error (reader, r);
2175 /* Issues an error that R ends in a partial record. */
2177 partial_record (struct sfm_reader *r)
2179 sys_error (r, r->pos, _("File ends in partial case."));
2182 /* Issues an error that an unspecified error occurred SFM, and
2185 read_error (struct casereader *r, const struct sfm_reader *sfm)
2187 msg (ME, _("Error reading case from file %s."), fh_get_name (sfm->fh));
2188 casereader_force_error (r);
2191 /* Reads a number from R and stores its value in *D.
2192 If R is compressed, reads a compressed number;
2193 otherwise, reads a number in the regular way.
2194 Returns true if successful, false if end of file is
2195 reached immediately. */
2197 read_case_number (struct sfm_reader *r, double *d)
2202 if (!try_read_bytes (r, number, sizeof number))
2204 float_convert (r->float_format, number, FLOAT_NATIVE_DOUBLE, d);
2208 return read_compressed_number (r, d);
2211 /* Reads LENGTH string bytes from R into S.
2212 Always reads a multiple of 8 bytes; if LENGTH is not a
2213 multiple of 8, then extra bytes are read and discarded without
2215 Reads compressed strings if S is compressed.
2216 Returns true if successful, false if end of file is
2217 reached immediately. */
2219 read_case_string (struct sfm_reader *r, uint8_t *s, size_t length)
2221 size_t whole = ROUND_DOWN (length, 8);
2222 size_t partial = length % 8;
2226 if (!read_whole_strings (r, s, whole))
2233 if (!read_whole_strings (r, bounce, sizeof bounce))
2239 memcpy (s + whole, bounce, partial);
2245 /* Reads and returns the next compression opcode from R. */
2247 read_opcode (struct sfm_reader *r)
2249 assert (r->compressed);
2253 if (r->opcode_idx >= sizeof r->opcodes)
2255 if (!try_read_bytes (r, r->opcodes, sizeof r->opcodes))
2259 opcode = r->opcodes[r->opcode_idx++];
2266 /* Reads a compressed number from R and stores its value in D.
2267 Returns true if successful, false if end of file is
2268 reached immediately. */
2270 read_compressed_number (struct sfm_reader *r, double *d)
2272 int opcode = read_opcode (r);
2280 *d = read_float (r);
2284 float_convert (r->float_format, " ", FLOAT_NATIVE_DOUBLE, d);
2285 if (!r->corruption_warning)
2287 r->corruption_warning = true;
2288 sys_warn (r, r->pos,
2289 _("Possible compressed data corruption: "
2290 "compressed spaces appear in numeric field."));
2299 *d = opcode - r->bias;
2306 /* Reads a compressed 8-byte string segment from R and stores it
2308 Returns true if successful, false if end of file is
2309 reached immediately. */
2311 read_compressed_string (struct sfm_reader *r, uint8_t *dst)
2313 int opcode = read_opcode (r);
2321 read_bytes (r, dst, 8);
2325 memset (dst, ' ', 8);
2330 double value = opcode - r->bias;
2331 float_convert (FLOAT_NATIVE_DOUBLE, &value, r->float_format, dst);
2334 /* This has actually been seen "in the wild". The submitter of the
2335 file that showed that the contents decoded as spaces, but they
2336 were at the end of the field so it's possible that the null
2337 bytes just acted as null terminators. */
2339 else if (!r->corruption_warning)
2341 r->corruption_warning = true;
2342 sys_warn (r, r->pos,
2343 _("Possible compressed data corruption: "
2344 "string contains compressed integer (opcode %d)."),
2354 /* Reads LENGTH string bytes from R into S.
2355 LENGTH must be a multiple of 8.
2356 Reads compressed strings if S is compressed.
2357 Returns true if successful, false if end of file is
2358 reached immediately. */
2360 read_whole_strings (struct sfm_reader *r, uint8_t *s, size_t length)
2362 assert (length % 8 == 0);
2364 return try_read_bytes (r, s, length);
2368 for (ofs = 0; ofs < length; ofs += 8)
2369 if (!read_compressed_string (r, s + ofs))
2379 /* Skips LENGTH string bytes from R.
2380 LENGTH must be a multiple of 8.
2381 (LENGTH is also limited to 1024, but that's only because the
2382 current caller never needs more than that many bytes.)
2383 Returns true if successful, false if end of file is
2384 reached immediately. */
2386 skip_whole_strings (struct sfm_reader *r, size_t length)
2388 uint8_t buffer[1024];
2389 assert (length < sizeof buffer);
2390 return read_whole_strings (r, buffer, length);
2393 /* Helpers for reading records that contain structured text
2396 /* Maximum number of warnings to issue for a single text
2398 #define MAX_TEXT_WARNINGS 5
2403 struct substring buffer; /* Record contents. */
2404 off_t start; /* Starting offset in file. */
2405 size_t pos; /* Current position in buffer. */
2406 int n_warnings; /* Number of warnings issued or suppressed. */
2407 bool recoded; /* Recoded into UTF-8? */
2410 static struct text_record *
2411 open_text_record (struct sfm_reader *r,
2412 const struct sfm_extension_record *record,
2413 bool recode_to_utf8)
2415 struct text_record *text;
2416 struct substring raw;
2418 text = pool_alloc (r->pool, sizeof *text);
2419 raw = ss_buffer (record->data, record->size * record->count);
2420 text->start = record->pos;
2421 text->buffer = (recode_to_utf8
2422 ? recode_substring_pool ("UTF-8", r->encoding, raw, r->pool)
2425 text->n_warnings = 0;
2426 text->recoded = recode_to_utf8;
2431 /* Closes TEXT, frees its storage, and issues a final warning
2432 about suppressed warnings if necesary. */
2434 close_text_record (struct sfm_reader *r, struct text_record *text)
2436 if (text->n_warnings > MAX_TEXT_WARNINGS)
2437 sys_warn (r, -1, _("Suppressed %d additional related warnings."),
2438 text->n_warnings - MAX_TEXT_WARNINGS);
2440 pool_free (r->pool, ss_data (text->buffer));
2443 /* Reads a variable=value pair from TEXT.
2444 Looks up the variable in DICT and stores it into *VAR.
2445 Stores a null-terminated value into *VALUE. */
2447 read_variable_to_value_pair (struct sfm_reader *r, struct dictionary *dict,
2448 struct text_record *text,
2449 struct variable **var, char **value)
2453 if (!text_read_short_name (r, dict, text, ss_cstr ("="), var))
2456 *value = text_get_token (text, ss_buffer ("\t\0", 2), NULL);
2460 text->pos += ss_span (ss_substr (text->buffer, text->pos, SIZE_MAX),
2461 ss_buffer ("\t\0", 2));
2469 text_read_variable_name (struct sfm_reader *r, struct dictionary *dict,
2470 struct text_record *text, struct substring delimiters,
2471 struct variable **var)
2475 name = text_get_token (text, delimiters, NULL);
2479 *var = dict_lookup_var (dict, name);
2483 text_warn (r, text, _("Dictionary record refers to unknown variable %s."),
2490 text_read_short_name (struct sfm_reader *r, struct dictionary *dict,
2491 struct text_record *text, struct substring delimiters,
2492 struct variable **var)
2494 char *short_name = text_get_token (text, delimiters, NULL);
2495 if (short_name == NULL)
2498 *var = dict_lookup_var (dict, short_name);
2500 text_warn (r, text, _("Dictionary record refers to unknown variable %s."),
2505 /* Displays a warning for the current file position, limiting the
2506 number to MAX_TEXT_WARNINGS for TEXT. */
2508 text_warn (struct sfm_reader *r, struct text_record *text,
2509 const char *format, ...)
2511 if (text->n_warnings++ < MAX_TEXT_WARNINGS)
2515 va_start (args, format);
2516 sys_msg (r, text->start + text->pos, MW, format, args);
2522 text_get_token (struct text_record *text, struct substring delimiters,
2525 struct substring token;
2528 if (!ss_tokenize (text->buffer, delimiters, &text->pos, &token))
2531 end = &ss_data (token)[ss_length (token)];
2532 if (delimiter != NULL)
2535 return ss_data (token);
2538 /* Reads a integer value expressed in decimal, then a space, then a string that
2539 consists of exactly as many bytes as specified by the integer, then a space,
2540 from TEXT. Returns the string, null-terminated, as a subset of TEXT's
2541 buffer (so the caller should not free the string). */
2543 text_parse_counted_string (struct sfm_reader *r, struct text_record *text)
2551 while (text->pos < text->buffer.length)
2553 int c = text->buffer.string[text->pos];
2554 if (c < '0' || c > '9')
2556 n = (n * 10) + (c - '0');
2559 if (text->pos >= text->buffer.length || start == text->pos)
2561 sys_warn (r, text->start,
2562 _("Expecting digit at offset %zu in MRSETS record."),
2567 if (!text_match (text, ' '))
2569 sys_warn (r, text->start,
2570 _("Expecting space at offset %zu in MRSETS record."),
2575 if (text->pos + n > text->buffer.length)
2577 sys_warn (r, text->start,
2578 _("%zu-byte string starting at offset %zu "
2579 "exceeds record length %zu."),
2580 n, text->pos, text->buffer.length);
2584 s = &text->buffer.string[text->pos];
2587 sys_warn (r, text->start,
2588 _("Expecting space at offset %zu following %zu-byte string."),
2598 text_match (struct text_record *text, char c)
2600 if (text->buffer.string[text->pos] == c)
2609 /* Returns the current byte offset (as converted to UTF-8, if it was converted)
2610 inside the TEXT's string. */
2612 text_pos (const struct text_record *text)
2618 text_get_all (const struct text_record *text)
2620 return text->buffer.string;
2625 /* Displays a corruption message. */
2627 sys_msg (struct sfm_reader *r, off_t offset,
2628 int class, const char *format, va_list args)
2633 ds_init_empty (&text);
2635 ds_put_format (&text, _("`%s' near offset 0x%llx: "),
2636 fh_get_file_name (r->fh), (long long int) offset);
2638 ds_put_format (&text, _("`%s': "), fh_get_file_name (r->fh));
2639 ds_put_vformat (&text, format, args);
2641 m.category = msg_class_to_category (class);
2642 m.severity = msg_class_to_severity (class);
2648 m.text = ds_cstr (&text);
2653 /* Displays a warning for offset OFFSET in the file. */
2655 sys_warn (struct sfm_reader *r, off_t offset, const char *format, ...)
2659 va_start (args, format);
2660 sys_msg (r, offset, MW, format, args);
2664 /* Displays an error for the current file position,
2665 marks it as in an error state,
2666 and aborts reading it using longjmp. */
2668 sys_error (struct sfm_reader *r, off_t offset, const char *format, ...)
2672 va_start (args, format);
2673 sys_msg (r, offset, ME, format, args);
2677 longjmp (r->bail_out, 1);
2680 /* Reads BYTE_CNT bytes into BUF.
2681 Returns true if exactly BYTE_CNT bytes are successfully read.
2682 Aborts if an I/O error or a partial read occurs.
2683 If EOF_IS_OK, then an immediate end-of-file causes false to be
2684 returned; otherwise, immediate end-of-file causes an abort
2687 read_bytes_internal (struct sfm_reader *r, bool eof_is_ok,
2688 void *buf, size_t byte_cnt)
2690 size_t bytes_read = fread (buf, 1, byte_cnt, r->file);
2691 r->pos += bytes_read;
2692 if (bytes_read == byte_cnt)
2694 else if (ferror (r->file))
2695 sys_error (r, r->pos, _("System error: %s."), strerror (errno));
2696 else if (!eof_is_ok || bytes_read != 0)
2697 sys_error (r, r->pos, _("Unexpected end of file."));
2702 /* Reads BYTE_CNT into BUF.
2703 Aborts upon I/O error or if end-of-file is encountered. */
2705 read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
2707 read_bytes_internal (r, false, buf, byte_cnt);
2710 /* Reads BYTE_CNT bytes into BUF.
2711 Returns true if exactly BYTE_CNT bytes are successfully read.
2712 Returns false if an immediate end-of-file is encountered.
2713 Aborts if an I/O error or a partial read occurs. */
2715 try_read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
2717 return read_bytes_internal (r, true, buf, byte_cnt);
2720 /* Reads a 32-bit signed integer from R and returns its value in
2723 read_int (struct sfm_reader *r)
2726 read_bytes (r, integer, sizeof integer);
2727 return integer_get (r->integer_format, integer, sizeof integer);
2730 /* Reads a 64-bit floating-point number from R and returns its
2731 value in host format. */
2733 read_float (struct sfm_reader *r)
2736 read_bytes (r, number, sizeof number);
2737 return float_get_double (r->float_format, number);
2741 parse_int (struct sfm_reader *r, const void *data, size_t ofs)
2743 return integer_get (r->integer_format, (const uint8_t *) data + ofs, 4);
2747 parse_float (struct sfm_reader *r, const void *data, size_t ofs)
2749 return float_get_double (r->float_format, (const uint8_t *) data + ofs);
2752 /* Reads exactly SIZE - 1 bytes into BUFFER
2753 and stores a null byte into BUFFER[SIZE - 1]. */
2755 read_string (struct sfm_reader *r, char *buffer, size_t size)
2758 read_bytes (r, buffer, size - 1);
2759 buffer[size - 1] = '\0';
2762 /* Skips BYTES bytes forward in R. */
2764 skip_bytes (struct sfm_reader *r, size_t bytes)
2769 size_t chunk = MIN (sizeof buffer, bytes);
2770 read_bytes (r, buffer, chunk);
2775 /* Returns a malloc()'d copy of S in which all lone CRs and CR LF pairs have
2776 been replaced by LFs.
2778 (A product that identifies itself as VOXCO INTERVIEWER 4.3 produces system
2779 files that use CR-only line ends in the file label and extra product
2782 fix_line_ends (const char *s)
2786 d = dst = xmalloc (strlen (s) + 1);
2804 static const struct casereader_class sys_file_casereader_class =
2806 sys_file_casereader_read,
2807 sys_file_casereader_destroy,