1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-2000, 2006-2007, 2009-2016 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "data/sys-file-private.h"
28 #include "data/any-reader.h"
29 #include "data/attributes.h"
30 #include "data/case.h"
31 #include "data/casereader-provider.h"
32 #include "data/casereader.h"
33 #include "data/dictionary.h"
34 #include "data/file-handle-def.h"
35 #include "data/file-name.h"
36 #include "data/format.h"
37 #include "data/identifier.h"
38 #include "data/missing-values.h"
39 #include "data/mrset.h"
40 #include "data/short-names.h"
41 #include "data/value-labels.h"
42 #include "data/value.h"
43 #include "data/variable.h"
44 #include "libpspp/array.h"
45 #include "libpspp/assertion.h"
46 #include "libpspp/compiler.h"
47 #include "libpspp/i18n.h"
48 #include "libpspp/ll.h"
49 #include "libpspp/message.h"
50 #include "libpspp/misc.h"
51 #include "libpspp/pool.h"
52 #include "libpspp/str.h"
53 #include "libpspp/stringi-set.h"
55 #include "gl/c-strtod.h"
56 #include "gl/c-ctype.h"
57 #include "gl/inttostr.h"
58 #include "gl/localcharset.h"
59 #include "gl/minmax.h"
60 #include "gl/unlocked-io.h"
61 #include "gl/xalloc.h"
62 #include "gl/xalloc-oversized.h"
66 #define _(msgid) gettext (msgid)
67 #define N_(msgid) (msgid)
71 /* subtypes 0-2 unknown */
72 EXT_INTEGER = 3, /* Machine integer info. */
73 EXT_FLOAT = 4, /* Machine floating-point info. */
74 EXT_VAR_SETS = 5, /* Variable sets. */
75 EXT_DATE = 6, /* DATE. */
76 EXT_MRSETS = 7, /* Multiple response sets. */
77 EXT_DATA_ENTRY = 8, /* SPSS Data Entry. */
78 /* subtype 9 unknown */
79 EXT_PRODUCT_INFO = 10, /* Extra product info text. */
80 EXT_DISPLAY = 11, /* Variable display parameters. */
81 /* subtype 12 unknown */
82 EXT_LONG_NAMES = 13, /* Long variable names. */
83 EXT_LONG_STRINGS = 14, /* Long strings. */
84 /* subtype 15 unknown */
85 EXT_NCASES = 16, /* Extended number of cases. */
86 EXT_FILE_ATTRS = 17, /* Data file attributes. */
87 EXT_VAR_ATTRS = 18, /* Variable attributes. */
88 EXT_MRSETS2 = 19, /* Multiple response sets (extended). */
89 EXT_ENCODING = 20, /* Character encoding. */
90 EXT_LONG_LABELS = 21, /* Value labels for long strings. */
91 EXT_LONG_MISSING = 22, /* Missing values for long strings. */
92 EXT_DATAVIEW = 24 /* "Format properties in dataview table". */
95 /* Fields from the top-level header record. */
96 struct sfm_header_record
98 char magic[5]; /* First 4 bytes of file, then null. */
99 int weight_idx; /* 0 if unweighted, otherwise a var index. */
100 int nominal_case_size; /* Number of var positions. */
102 /* These correspond to the members of struct any_file_info or a dictionary
103 but in the system file's encoding rather than ASCII. */
104 char creation_date[10]; /* "dd mmm yy". */
105 char creation_time[9]; /* "hh:mm:ss". */
106 char eye_catcher[61]; /* Eye-catcher string, then product name. */
107 char file_label[65]; /* File label. */
110 struct sfm_var_record
117 int missing_value_code;
120 struct variable *var;
123 struct sfm_value_label
129 struct sfm_value_label_record
132 struct sfm_value_label *labels;
133 unsigned int n_labels;
139 struct sfm_document_record
148 const char *name; /* Name. */
149 const char *label; /* Human-readable label for group. */
150 enum mrset_type type; /* Group type. */
151 const char **vars; /* Constituent variables' names. */
152 size_t n_vars; /* Number of constituent variables. */
155 enum mrset_md_cat_source cat_source; /* Source of category labels. */
156 bool label_from_var_label; /* 'label' taken from variable label? */
157 const char *counted; /* Counted value, as string. */
160 struct sfm_extension_record
162 struct ll ll; /* In struct sfm_reader 'var_attrs' list. */
163 int subtype; /* Record subtype. */
164 off_t pos; /* Starting offset in file. */
165 unsigned int size; /* Size of data elements. */
166 unsigned int count; /* Number of data elements. */
167 void *data; /* Contents. */
170 /* System file reader. */
173 struct any_reader any_reader;
175 /* Resource tracking. */
176 struct pool *pool; /* All system file state. */
179 struct any_read_info info;
180 struct sfm_header_record header;
181 struct sfm_var_record *vars;
183 struct sfm_value_label_record *labels;
185 struct sfm_document_record *document;
186 struct sfm_mrset *mrsets;
188 struct sfm_extension_record *extensions[32];
189 struct ll_list var_attrs; /* Contains "struct sfm_extension_record"s. */
192 struct file_handle *fh; /* File handle. */
193 struct fh_lock *lock; /* Mutual exclusion for file handle. */
194 FILE *file; /* File stream. */
195 off_t pos; /* Position in file. */
196 bool error; /* I/O or corruption error? */
197 struct caseproto *proto; /* Format of output cases. */
200 enum integer_format integer_format; /* On-disk integer format. */
201 enum float_format float_format; /* On-disk floating point format. */
202 struct sfm_var *sfm_vars; /* Variables. */
203 size_t sfm_var_cnt; /* Number of variables. */
204 int case_cnt; /* Number of cases */
205 const char *encoding; /* String encoding. */
208 enum any_compression compression;
209 double bias; /* Compression bias, usually 100.0. */
210 uint8_t opcodes[8]; /* Current block of opcodes. */
211 size_t opcode_idx; /* Next opcode to interpret, 8 if none left. */
212 bool corruption_warning; /* Warned about possible corruption? */
214 /* ZLIB decompression. */
215 long long int ztrailer_ofs; /* Offset of ZLIB trailer at end of file. */
216 #define ZIN_BUF_SIZE 4096
217 uint8_t *zin_buf; /* Inflation input buffer. */
218 #define ZOUT_BUF_SIZE 16384
219 uint8_t *zout_buf; /* Inflation output buffer. */
220 unsigned int zout_end; /* Number of bytes of data in zout_buf. */
221 unsigned int zout_pos; /* First unconsumed byte in zout_buf. */
222 z_stream zstream; /* ZLIB inflater. */
225 static const struct casereader_class sys_file_casereader_class;
227 static struct sfm_reader *
228 sfm_reader_cast (const struct any_reader *r_)
230 assert (r_->klass == &sys_file_reader_class);
231 return UP_CAST (r_, struct sfm_reader, any_reader);
234 static bool sfm_close (struct any_reader *);
236 static struct variable *lookup_var_by_index (struct sfm_reader *, off_t,
237 const struct sfm_var_record *,
240 static void sys_msg (struct sfm_reader *r, off_t, int class,
241 const char *format, va_list args)
242 PRINTF_FORMAT (4, 0);
243 static void sys_warn (struct sfm_reader *, off_t, const char *, ...)
244 PRINTF_FORMAT (3, 4);
245 static void sys_error (struct sfm_reader *, off_t, const char *, ...)
246 PRINTF_FORMAT (3, 4);
248 static bool read_bytes (struct sfm_reader *, void *, size_t)
250 static int try_read_bytes (struct sfm_reader *, void *, size_t)
252 static bool read_int (struct sfm_reader *, int *) WARN_UNUSED_RESULT;
253 static bool read_uint (struct sfm_reader *, unsigned int *) WARN_UNUSED_RESULT;
254 static bool read_int64 (struct sfm_reader *, long long int *)
256 static bool read_uint64 (struct sfm_reader *, unsigned long long int *)
258 static bool read_string (struct sfm_reader *, char *, size_t)
260 static bool skip_bytes (struct sfm_reader *, size_t) WARN_UNUSED_RESULT;
262 /* ZLIB compressed data handling. */
263 static bool read_zheader (struct sfm_reader *) WARN_UNUSED_RESULT;
264 static bool open_zstream (struct sfm_reader *) WARN_UNUSED_RESULT;
265 static bool close_zstream (struct sfm_reader *) WARN_UNUSED_RESULT;
266 static int read_bytes_zlib (struct sfm_reader *, void *, size_t)
268 static int read_compressed_bytes (struct sfm_reader *, void *, size_t)
270 static int try_read_compressed_bytes (struct sfm_reader *, void *, size_t)
272 static bool read_compressed_float (struct sfm_reader *, double *)
275 static char *fix_line_ends (const char *);
277 static int parse_int (const struct sfm_reader *, const void *data, size_t ofs);
278 static double parse_float (const struct sfm_reader *,
279 const void *data, size_t ofs);
281 static bool read_variable_record (struct sfm_reader *,
282 struct sfm_var_record *);
283 static bool read_value_label_record (struct sfm_reader *,
284 struct sfm_value_label_record *);
285 static struct sfm_document_record *read_document_record (struct sfm_reader *);
286 static bool read_extension_record (struct sfm_reader *, int subtype,
287 struct sfm_extension_record **);
288 static bool skip_extension_record (struct sfm_reader *, int subtype);
290 static struct text_record *open_text_record (
291 struct sfm_reader *, const struct sfm_extension_record *,
292 bool recode_to_utf8);
293 static void close_text_record (struct sfm_reader *,
294 struct text_record *);
295 static bool read_variable_to_value_pair (struct sfm_reader *,
297 struct text_record *,
298 struct variable **var, char **value);
299 static void text_warn (struct sfm_reader *r, struct text_record *text,
300 const char *format, ...) PRINTF_FORMAT (3, 4);
301 static char *text_get_token (struct text_record *,
302 struct substring delimiters, char *delimiter);
303 static bool text_match (struct text_record *, char c);
304 static bool text_read_variable_name (struct sfm_reader *, struct dictionary *,
305 struct text_record *,
306 struct substring delimiters,
308 static bool text_read_short_name (struct sfm_reader *, struct dictionary *,
309 struct text_record *,
310 struct substring delimiters,
312 static const char *text_parse_counted_string (struct sfm_reader *,
313 struct text_record *);
314 static size_t text_pos (const struct text_record *);
315 static const char *text_get_all (const struct text_record *);
317 /* Dictionary reader. */
325 static bool read_dictionary (struct sfm_reader *);
326 static bool read_record (struct sfm_reader *, int type,
327 size_t *allocated_vars, size_t *allocated_labels);
328 static bool read_header (struct sfm_reader *, struct any_read_info *,
329 struct sfm_header_record *);
330 static void parse_header (struct sfm_reader *,
331 const struct sfm_header_record *,
332 struct any_read_info *, struct dictionary *);
333 static bool parse_variable_records (struct sfm_reader *, struct dictionary *,
334 struct sfm_var_record *, size_t n);
335 static void parse_format_spec (struct sfm_reader *, off_t pos,
336 unsigned int format, enum which_format,
337 struct variable *, int *format_warning_cnt);
338 static void parse_document (struct dictionary *, struct sfm_document_record *);
339 static void parse_display_parameters (struct sfm_reader *,
340 const struct sfm_extension_record *,
341 struct dictionary *);
342 static bool parse_machine_integer_info (struct sfm_reader *,
343 const struct sfm_extension_record *,
344 struct any_read_info *);
345 static void parse_machine_float_info (struct sfm_reader *,
346 const struct sfm_extension_record *);
347 static void parse_extra_product_info (struct sfm_reader *,
348 const struct sfm_extension_record *,
349 struct any_read_info *);
350 static void parse_mrsets (struct sfm_reader *,
351 const struct sfm_extension_record *,
352 size_t *allocated_mrsets);
353 static void decode_mrsets (struct sfm_reader *, struct dictionary *);
354 static void parse_long_var_name_map (struct sfm_reader *,
355 const struct sfm_extension_record *,
356 struct dictionary *);
357 static bool parse_long_string_map (struct sfm_reader *,
358 const struct sfm_extension_record *,
359 struct dictionary *);
360 static bool parse_value_labels (struct sfm_reader *, struct dictionary *,
361 const struct sfm_var_record *,
363 const struct sfm_value_label_record *);
364 static void parse_data_file_attributes (struct sfm_reader *,
365 const struct sfm_extension_record *,
366 struct dictionary *);
367 static void parse_variable_attributes (struct sfm_reader *,
368 const struct sfm_extension_record *,
369 struct dictionary *);
370 static void assign_variable_roles (struct sfm_reader *, struct dictionary *);
371 static void parse_long_string_value_labels (struct sfm_reader *,
372 const struct sfm_extension_record *,
373 struct dictionary *);
374 static void parse_long_string_missing_values (
375 struct sfm_reader *, const struct sfm_extension_record *,
376 struct dictionary *);
378 /* Frees the strings inside INFO. */
380 any_read_info_destroy (struct any_read_info *info)
384 free (info->creation_date);
385 free (info->creation_time);
386 free (info->product);
387 free (info->product_ext);
391 /* Tries to open FH for reading as a system file. Returns an sfm_reader if
392 successful, otherwise NULL. */
393 static struct any_reader *
394 sfm_open (struct file_handle *fh)
396 size_t allocated_mrsets = 0;
397 struct sfm_reader *r;
399 /* Create and initialize reader. */
400 r = xzalloc (sizeof *r);
401 r->any_reader.klass = &sys_file_reader_class;
402 r->pool = pool_create ();
403 pool_register (r->pool, free, r);
405 r->opcode_idx = sizeof r->opcodes;
406 ll_init (&r->var_attrs);
408 /* TRANSLATORS: this fragment will be interpolated into
409 messages in fh_lock() that identify types of files. */
410 r->lock = fh_lock (fh, FH_REF_FILE, N_("system file"), FH_ACC_READ, false);
414 r->file = fn_open (fh, "rb");
417 msg (ME, _("Error opening `%s' for reading as a system file: %s."),
418 fh_get_file_name (r->fh), strerror (errno));
422 if (!read_dictionary (r))
425 if (r->extensions[EXT_MRSETS] != NULL)
426 parse_mrsets (r, r->extensions[EXT_MRSETS], &allocated_mrsets);
428 if (r->extensions[EXT_MRSETS2] != NULL)
429 parse_mrsets (r, r->extensions[EXT_MRSETS2], &allocated_mrsets);
431 return &r->any_reader;
435 sfm_close (&r->any_reader);
440 read_dictionary (struct sfm_reader *r)
442 size_t allocated_vars;
443 size_t allocated_labels;
445 if (!read_header (r, &r->info, &r->header))
449 allocated_labels = 0;
454 if (!read_int (r, &type))
458 if (!read_record (r, type, &allocated_vars, &allocated_labels))
462 if (!skip_bytes (r, 4))
465 if (r->compression == ANY_COMP_ZLIB && !read_zheader (r))
472 read_record (struct sfm_reader *r, int type,
473 size_t *allocated_vars, size_t *allocated_labels)
480 if (r->n_vars >= *allocated_vars)
481 r->vars = pool_2nrealloc (r->pool, r->vars, allocated_vars,
483 return read_variable_record (r, &r->vars[r->n_vars++]);
486 if (r->n_labels >= *allocated_labels)
487 r->labels = pool_2nrealloc (r->pool, r->labels, allocated_labels,
489 return read_value_label_record (r, &r->labels[r->n_labels++]);
492 /* A Type 4 record is always immediately after a type 3 record,
493 so the code for type 3 records reads the type 4 record too. */
494 sys_error (r, r->pos, _("Misplaced type 4 record."));
498 if (r->document != NULL)
500 sys_error (r, r->pos, _("Duplicate type 6 (document) record."));
503 r->document = read_document_record (r);
504 return r->document != NULL;
507 if (!read_int (r, &subtype))
510 || subtype >= sizeof r->extensions / sizeof *r->extensions)
513 _("Unrecognized record type 7, subtype %d. For help, "
514 "please send this file to %s and mention that you were "
516 subtype, PACKAGE_BUGREPORT, PACKAGE_STRING);
517 return skip_extension_record (r, subtype);
519 else if (subtype == 18)
521 /* System files written by "Stata 14.1/-savespss- 1.77 by S.Radyakin"
522 put each variable attribute into a separate record with subtype
523 18. I'm surprised that SPSS puts up with this. */
524 struct sfm_extension_record *ext;
525 bool ok = read_extension_record (r, subtype, &ext);
527 ll_push_tail (&r->var_attrs, &ext->ll);
530 else if (r->extensions[subtype] != NULL)
533 _("Record type 7, subtype %d found here has the same "
534 "type as the record found near offset 0x%llx. For "
535 "help, please send this file to %s and mention that "
536 "you were using %s."),
537 subtype, (long long int) r->extensions[subtype]->pos,
538 PACKAGE_BUGREPORT, PACKAGE_STRING);
539 return skip_extension_record (r, subtype);
542 return read_extension_record (r, subtype, &r->extensions[subtype]);
545 sys_error (r, r->pos, _("Unrecognized record type %d."), type);
552 /* Returns the character encoding obtained from R, or a null pointer if R
553 doesn't have an indication of its character encoding. */
555 sfm_get_encoding (const struct sfm_reader *r)
557 /* The EXT_ENCODING record is the best way to determine dictionary
559 if (r->extensions[EXT_ENCODING])
560 return r->extensions[EXT_ENCODING]->data;
562 /* But EXT_INTEGER is better than nothing as a fallback. */
563 if (r->extensions[EXT_INTEGER])
565 int codepage = parse_int (r, r->extensions[EXT_INTEGER]->data, 7 * 4);
566 const char *encoding;
575 /* These ostensibly mean "7-bit ASCII" and "8-bit ASCII"[sic]
576 respectively. However, many files have character code 2 but data
577 which are clearly not ASCII. Therefore, ignore these values. */
584 encoding = sys_get_encoding_from_codepage (codepage);
585 if (encoding != NULL)
591 /* If the file magic number is EBCDIC then its character data is too. */
592 if (!strcmp (r->header.magic, EBCDIC_MAGIC))
598 struct get_strings_aux
609 add_string__ (struct get_strings_aux *aux,
610 const char *string, bool id, char *title)
612 if (aux->n >= aux->allocated)
614 aux->allocated = 2 * (aux->allocated + 1);
615 aux->titles = pool_realloc (aux->pool, aux->titles,
616 aux->allocated * sizeof *aux->titles);
617 aux->strings = pool_realloc (aux->pool, aux->strings,
618 aux->allocated * sizeof *aux->strings);
619 aux->ids = pool_realloc (aux->pool, aux->ids,
620 aux->allocated * sizeof *aux->ids);
623 aux->titles[aux->n] = title;
624 aux->strings[aux->n] = pool_strdup (aux->pool, string);
625 aux->ids[aux->n] = id;
629 static void PRINTF_FORMAT (3, 4)
630 add_string (struct get_strings_aux *aux,
631 const char *string, const char *title, ...)
635 va_start (args, title);
636 add_string__ (aux, string, false, pool_vasprintf (aux->pool, title, args));
640 static void PRINTF_FORMAT (3, 4)
641 add_id (struct get_strings_aux *aux, const char *id, const char *title, ...)
645 va_start (args, title);
646 add_string__ (aux, id, true, pool_vasprintf (aux->pool, title, args));
650 /* Retrieves significant string data from R in its raw format, to allow the
651 caller to try to detect the encoding in use.
653 Returns the number of strings retrieved N. Sets each of *TITLESP, *IDSP,
654 and *STRINGSP to an array of N elements allocated from POOL. For each I in
655 0...N-1, UTF-8 string *TITLESP[I] describes *STRINGSP[I], which is in
656 whatever encoding system file R uses. *IDS[I] is true if *STRINGSP[I] must
657 be a valid PSPP language identifier, false if *STRINGSP[I] is free-form
660 sfm_get_strings (const struct any_reader *r_, struct pool *pool,
661 char ***titlesp, bool **idsp, char ***stringsp)
663 struct sfm_reader *r = sfm_reader_cast (r_);
664 const struct sfm_mrset *mrset;
665 struct get_strings_aux aux;
677 for (i = 0; i < r->n_vars; i++)
678 if (r->vars[i].width != -1)
679 add_id (&aux, r->vars[i].name, _("Variable %zu"), ++var_idx);
682 for (i = 0; i < r->n_vars; i++)
683 if (r->vars[i].width != -1)
686 if (r->vars[i].label)
687 add_string (&aux, r->vars[i].label, _("Variable %zu Label"),
692 for (i = 0; i < r->n_labels; i++)
693 for (j = 0; j < r->labels[i].n_labels; j++)
694 add_string (&aux, r->labels[i].labels[j].label,
695 _("Value Label %zu"), k++);
697 add_string (&aux, r->header.creation_date, _("Creation Date"));
698 add_string (&aux, r->header.creation_time, _("Creation Time"));
699 add_string (&aux, r->header.eye_catcher, _("Product"));
700 add_string (&aux, r->header.file_label, _("File Label"));
702 if (r->extensions[EXT_PRODUCT_INFO])
703 add_string (&aux, r->extensions[EXT_PRODUCT_INFO]->data,
704 _("Extra Product Info"));
710 for (i = 0; i < r->document->n_lines; i++)
714 memcpy (line, r->document->documents + i * 80, 80);
717 add_string (&aux, line, _("Document Line %zu"), i + 1);
721 for (mrset = r->mrsets; mrset < &r->mrsets[r->n_mrsets]; mrset++)
723 size_t mrset_idx = mrset - r->mrsets + 1;
725 add_id (&aux, mrset->name, _("MRSET %zu"), mrset_idx);
727 add_string (&aux, mrset->label, _("MRSET %zu Label"), mrset_idx);
729 /* Skip the variables because they ought to be duplicates. */
732 add_string (&aux, mrset->counted, _("MRSET %zu Counted Value"),
736 /* data file attributes */
737 /* variable attributes */
739 /* long string value labels */
740 /* long string missing values */
742 *titlesp = aux.titles;
744 *stringsp = aux.strings;
748 /* Decodes the dictionary read from R, saving it into into *DICT. Character
749 strings in R are decoded using ENCODING, or an encoding obtained from R if
750 ENCODING is null, or the locale encoding if R specifies no encoding.
752 If INFOP is non-null, then it receives additional info about the system
753 file, which the caller must eventually free with any_read_info_destroy()
754 when it is no longer needed.
756 This function consumes R. The caller must use it again later, even to
757 destroy it with sfm_close(). */
758 static struct casereader *
759 sfm_decode (struct any_reader *r_, const char *encoding,
760 struct dictionary **dictp, struct any_read_info *infop)
762 struct sfm_reader *r = sfm_reader_cast (r_);
763 struct dictionary *dict;
766 if (encoding == NULL)
768 encoding = sfm_get_encoding (r);
769 if (encoding == NULL)
771 sys_warn (r, -1, _("This system file does not indicate its own "
772 "character encoding. Using default encoding "
773 "%s. For best results, specify an encoding "
774 "explicitly. Use SYSFILE INFO with "
775 "ENCODING=\"DETECT\" to analyze the possible "
778 encoding = locale_charset ();
782 dict = dict_create (encoding);
783 r->encoding = dict_get_encoding (dict);
785 /* These records don't use variables at all. */
786 if (r->document != NULL)
787 parse_document (dict, r->document);
789 if (r->extensions[EXT_INTEGER] != NULL
790 && !parse_machine_integer_info (r, r->extensions[EXT_INTEGER], &r->info))
793 if (r->extensions[EXT_FLOAT] != NULL)
794 parse_machine_float_info (r, r->extensions[EXT_FLOAT]);
796 if (r->extensions[EXT_PRODUCT_INFO] != NULL)
797 parse_extra_product_info (r, r->extensions[EXT_PRODUCT_INFO], &r->info);
799 if (r->extensions[EXT_FILE_ATTRS] != NULL)
800 parse_data_file_attributes (r, r->extensions[EXT_FILE_ATTRS], dict);
802 parse_header (r, &r->header, &r->info, dict);
804 /* Parse the variable records, the basis of almost everything else. */
805 if (!parse_variable_records (r, dict, r->vars, r->n_vars))
808 /* Parse value labels and the weight variable immediately after the variable
809 records. These records use indexes into var_recs[], so we must parse them
810 before those indexes become invalidated by very long string variables. */
811 for (i = 0; i < r->n_labels; i++)
812 if (!parse_value_labels (r, dict, r->vars, r->n_vars, &r->labels[i]))
814 if (r->header.weight_idx != 0)
816 struct variable *weight_var;
818 weight_var = lookup_var_by_index (r, 76, r->vars, r->n_vars,
819 r->header.weight_idx);
820 if (weight_var != NULL)
822 if (var_is_numeric (weight_var))
823 dict_set_weight (dict, weight_var);
825 sys_warn (r, -1, _("Ignoring string variable `%s' set "
826 "as weighting variable."),
827 var_get_name (weight_var));
831 if (r->extensions[EXT_DISPLAY] != NULL)
832 parse_display_parameters (r, r->extensions[EXT_DISPLAY], dict);
834 /* The following records use short names, so they need to be parsed before
835 parse_long_var_name_map() changes short names to long names. */
836 decode_mrsets (r, dict);
838 if (r->extensions[EXT_LONG_STRINGS] != NULL
839 && !parse_long_string_map (r, r->extensions[EXT_LONG_STRINGS], dict))
842 /* Now rename variables to their long names. */
843 parse_long_var_name_map (r, r->extensions[EXT_LONG_NAMES], dict);
845 /* The following records use long names, so they need to follow renaming. */
846 if (!ll_is_empty (&r->var_attrs))
848 struct sfm_extension_record *ext;
849 ll_for_each (ext, struct sfm_extension_record, ll, &r->var_attrs)
850 parse_variable_attributes (r, ext, dict);
852 /* Roles use the $@Role attribute. */
853 assign_variable_roles (r, dict);
855 if (r->extensions[EXT_LONG_LABELS] != NULL)
856 parse_long_string_value_labels (r, r->extensions[EXT_LONG_LABELS], dict);
857 if (r->extensions[EXT_LONG_MISSING] != NULL)
858 parse_long_string_missing_values (r, r->extensions[EXT_LONG_MISSING],
861 /* Warn if the actual amount of data per case differs from the
862 amount that the header claims. SPSS version 13 gets this
863 wrong when very long strings are involved, so don't warn in
865 if (r->header.nominal_case_size != -1
866 && r->header.nominal_case_size != r->n_vars
867 && r->info.version_major != 13)
868 sys_warn (r, -1, _("File header claims %d variable positions but "
869 "%zu were read from file."),
870 r->header.nominal_case_size, r->n_vars);
872 /* Create an index of dictionary variable widths for
873 sfm_read_case to use. We cannot use the `struct variable's
874 from the dictionary we created, because the caller owns the
875 dictionary and may destroy or modify its variables. */
876 sfm_dictionary_to_sfm_vars (dict, &r->sfm_vars, &r->sfm_var_cnt);
877 pool_register (r->pool, free, r->sfm_vars);
878 r->proto = caseproto_ref_pool (dict_get_proto (dict), r->pool);
884 memset (&r->info, 0, sizeof r->info);
887 return casereader_create_sequential
889 r->case_cnt == -1 ? CASENUMBER_MAX: r->case_cnt,
890 &sys_file_casereader_class, r);
899 /* Closes R, which should have been returned by sfm_open() but not already
900 closed with sfm_decode() or this function.
901 Returns true if an I/O error has occurred on READER, false
904 sfm_close (struct any_reader *r_)
906 struct sfm_reader *r = sfm_reader_cast (r_);
911 if (fn_close (r->fh, r->file) == EOF)
913 msg (ME, _("Error closing system file `%s': %s."),
914 fh_get_file_name (r->fh), strerror (errno));
920 any_read_info_destroy (&r->info);
925 pool_destroy (r->pool);
930 /* Destroys READER. */
932 sys_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
934 struct sfm_reader *r = r_;
935 sfm_close (&r->any_reader);
938 /* Detects whether FILE is an SPSS system file. Returns 1 if so, 0 if not, and
939 a negative errno value if there is an error reading FILE. */
941 sfm_detect (FILE *file)
945 if (fseek (file, 0, SEEK_SET) != 0)
947 if (fread (magic, 4, 1, file) != 1)
948 return ferror (file) ? -errno : 0;
951 return (!strcmp (ASCII_MAGIC, magic)
952 || !strcmp (ASCII_ZMAGIC, magic)
953 || !strcmp (EBCDIC_MAGIC, magic));
956 /* Reads the global header of the system file. Initializes *HEADER and *INFO,
957 except for the string fields in *INFO, which parse_header() will initialize
958 later once the file's encoding is known. */
960 read_header (struct sfm_reader *r, struct any_read_info *info,
961 struct sfm_header_record *header)
963 uint8_t raw_layout_code[4];
968 if (!read_string (r, header->magic, sizeof header->magic)
969 || !read_string (r, header->eye_catcher, sizeof header->eye_catcher))
972 if (!strcmp (ASCII_MAGIC, header->magic)
973 || !strcmp (EBCDIC_MAGIC, header->magic))
975 else if (!strcmp (ASCII_ZMAGIC, header->magic))
979 sys_error (r, 0, _("This is not an SPSS system file."));
983 /* Identify integer format. */
984 if (!read_bytes (r, raw_layout_code, sizeof raw_layout_code))
986 if ((!integer_identify (2, raw_layout_code, sizeof raw_layout_code,
988 && !integer_identify (3, raw_layout_code, sizeof raw_layout_code,
990 || (r->integer_format != INTEGER_MSB_FIRST
991 && r->integer_format != INTEGER_LSB_FIRST))
993 sys_error (r, 64, _("This is not an SPSS system file."));
997 if (!read_int (r, &header->nominal_case_size))
1000 if (header->nominal_case_size < 0
1001 || header->nominal_case_size > INT_MAX / 16)
1002 header->nominal_case_size = -1;
1004 if (!read_int (r, &compressed))
1008 if (compressed == 0)
1009 r->compression = ANY_COMP_NONE;
1010 else if (compressed == 1)
1011 r->compression = ANY_COMP_SIMPLE;
1012 else if (compressed != 0)
1014 sys_error (r, 0, "System file header has invalid compression "
1015 "value %d.", compressed);
1021 if (compressed == 2)
1022 r->compression = ANY_COMP_ZLIB;
1025 sys_error (r, 0, "ZLIB-compressed system file header has invalid "
1026 "compression value %d.", compressed);
1031 if (!read_int (r, &header->weight_idx))
1034 if (!read_int (r, &r->case_cnt))
1036 if ( r->case_cnt > INT_MAX / 2)
1039 /* Identify floating-point format and obtain compression bias. */
1040 if (!read_bytes (r, raw_bias, sizeof raw_bias))
1042 if (float_identify (100.0, raw_bias, sizeof raw_bias, &r->float_format) == 0)
1044 uint8_t zero_bias[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
1046 if (memcmp (raw_bias, zero_bias, 8))
1047 sys_warn (r, r->pos - 8,
1048 _("Compression bias is not the usual "
1049 "value of 100, or system file uses unrecognized "
1050 "floating-point format."));
1053 /* Some software is known to write all-zeros to this
1054 field. Such software also writes floating-point
1055 numbers in the format that we expect by default
1056 (it seems that all software most likely does, in
1057 reality), so don't warn in this case. */
1060 if (r->integer_format == INTEGER_MSB_FIRST)
1061 r->float_format = FLOAT_IEEE_DOUBLE_BE;
1063 r->float_format = FLOAT_IEEE_DOUBLE_LE;
1065 float_convert (r->float_format, raw_bias, FLOAT_NATIVE_DOUBLE, &r->bias);
1067 if (!read_string (r, header->creation_date, sizeof header->creation_date)
1068 || !read_string (r, header->creation_time, sizeof header->creation_time)
1069 || !read_string (r, header->file_label, sizeof header->file_label)
1070 || !skip_bytes (r, 3))
1073 info->integer_format = r->integer_format;
1074 info->float_format = r->float_format;
1075 info->compression = r->compression;
1076 info->case_cnt = r->case_cnt;
1081 /* Reads a variable (type 2) record from R into RECORD. */
1083 read_variable_record (struct sfm_reader *r, struct sfm_var_record *record)
1085 int has_variable_label;
1087 memset (record, 0, sizeof *record);
1089 record->pos = r->pos;
1090 if (!read_int (r, &record->width)
1091 || !read_int (r, &has_variable_label)
1092 || !read_int (r, &record->missing_value_code)
1093 || !read_int (r, &record->print_format)
1094 || !read_int (r, &record->write_format)
1095 || !read_string (r, record->name, sizeof record->name))
1098 if (has_variable_label == 1)
1100 enum { MAX_LABEL_LEN = 65536 };
1101 unsigned int len, read_len;
1103 if (!read_uint (r, &len))
1106 /* Read up to MAX_LABEL_LEN bytes of label. */
1107 read_len = MIN (MAX_LABEL_LEN, len);
1108 record->label = pool_malloc (r->pool, read_len + 1);
1109 if (!read_string (r, record->label, read_len + 1))
1112 /* Skip unread label bytes. */
1113 if (!skip_bytes (r, len - read_len))
1116 /* Skip label padding up to multiple of 4 bytes. */
1117 if (!skip_bytes (r, ROUND_UP (len, 4) - len))
1120 else if (has_variable_label != 0)
1122 sys_error (r, record->pos,
1123 _("Variable label indicator field is not 0 or 1."));
1127 /* Set missing values. */
1128 if (record->missing_value_code != 0)
1130 int code = record->missing_value_code;
1131 if (record->width == 0)
1133 if (code < -3 || code > 3 || code == -1)
1135 sys_error (r, record->pos,
1136 _("Numeric missing value indicator field is not "
1137 "-3, -2, 0, 1, 2, or 3."));
1143 if (code < 1 || code > 3)
1145 sys_error (r, record->pos,
1146 _("String missing value indicator field is not "
1152 if (!read_bytes (r, record->missing, 8 * abs (code)))
1159 /* Reads value labels from R into RECORD. */
1161 read_value_label_record (struct sfm_reader *r,
1162 struct sfm_value_label_record *record)
1167 /* Read type 3 record. */
1168 record->pos = r->pos;
1169 if (!read_uint (r, &record->n_labels))
1171 if (record->n_labels > UINT_MAX / sizeof *record->labels)
1173 sys_error (r, r->pos - 4, _("Invalid number of labels %u."),
1177 record->labels = pool_nmalloc (r->pool, record->n_labels,
1178 sizeof *record->labels);
1179 for (i = 0; i < record->n_labels; i++)
1181 struct sfm_value_label *label = &record->labels[i];
1182 unsigned char label_len;
1185 if (!read_bytes (r, label->value, sizeof label->value))
1188 /* Read label length. */
1189 if (!read_bytes (r, &label_len, sizeof label_len))
1191 padded_len = ROUND_UP (label_len + 1, 8);
1193 /* Read label, padding. */
1194 label->label = pool_malloc (r->pool, padded_len + 1);
1195 if (!read_bytes (r, label->label, padded_len - 1))
1197 label->label[label_len] = '\0';
1200 /* Read record type of type 4 record. */
1201 if (!read_int (r, &type))
1205 sys_error (r, r->pos - 4,
1206 _("Variable index record (type 4) does not immediately "
1207 "follow value label record (type 3) as it should."));
1211 /* Read number of variables associated with value label from type 4
1213 if (!read_uint (r, &record->n_vars))
1215 if (record->n_vars < 1 || record->n_vars > r->n_vars)
1217 sys_error (r, r->pos - 4,
1218 _("Number of variables associated with a value label (%u) "
1219 "is not between 1 and the number of variables (%zu)."),
1220 record->n_vars, r->n_vars);
1224 record->vars = pool_nmalloc (r->pool, record->n_vars, sizeof *record->vars);
1225 for (i = 0; i < record->n_vars; i++)
1226 if (!read_int (r, &record->vars[i]))
1232 /* Reads a document record from R and returns it. */
1233 static struct sfm_document_record *
1234 read_document_record (struct sfm_reader *r)
1236 struct sfm_document_record *record;
1239 record = pool_malloc (r->pool, sizeof *record);
1240 record->pos = r->pos;
1242 if (!read_int (r, &n_lines))
1244 if (n_lines <= 0 || n_lines >= INT_MAX / DOC_LINE_LENGTH)
1246 sys_error (r, record->pos,
1247 _("Number of document lines (%d) "
1248 "must be greater than 0 and less than %d."),
1249 n_lines, INT_MAX / DOC_LINE_LENGTH);
1253 record->n_lines = n_lines;
1254 record->documents = pool_malloc (r->pool, DOC_LINE_LENGTH * n_lines);
1255 if (!read_bytes (r, record->documents, DOC_LINE_LENGTH * n_lines))
1262 read_extension_record_header (struct sfm_reader *r, int subtype,
1263 struct sfm_extension_record *record)
1265 record->subtype = subtype;
1266 record->pos = r->pos;
1267 if (!read_uint (r, &record->size) || !read_uint (r, &record->count))
1270 /* Check that SIZE * COUNT + 1 doesn't overflow. Adding 1
1271 allows an extra byte for a null terminator, used by some
1272 extension processing routines. */
1273 if (record->size != 0
1274 && xsum (1, xtimes (record->count, record->size)) >= UINT_MAX)
1276 sys_error (r, record->pos, "Record type 7 subtype %d too large.",
1284 /* Reads an extension record from R into RECORD. */
1286 read_extension_record (struct sfm_reader *r, int subtype,
1287 struct sfm_extension_record **recordp)
1289 struct extension_record_type
1296 static const struct extension_record_type types[] =
1298 /* Implemented record types. */
1299 { EXT_INTEGER, 4, 8 },
1300 { EXT_FLOAT, 8, 3 },
1301 { EXT_MRSETS, 1, 0 },
1302 { EXT_PRODUCT_INFO, 1, 0 },
1303 { EXT_DISPLAY, 4, 0 },
1304 { EXT_LONG_NAMES, 1, 0 },
1305 { EXT_LONG_STRINGS, 1, 0 },
1306 { EXT_NCASES, 8, 2 },
1307 { EXT_FILE_ATTRS, 1, 0 },
1308 { EXT_VAR_ATTRS, 1, 0 },
1309 { EXT_MRSETS2, 1, 0 },
1310 { EXT_ENCODING, 1, 0 },
1311 { EXT_LONG_LABELS, 1, 0 },
1312 { EXT_LONG_MISSING, 1, 0 },
1314 /* Ignored record types. */
1315 { EXT_VAR_SETS, 0, 0 },
1317 { EXT_DATA_ENTRY, 0, 0 },
1318 { EXT_DATAVIEW, 0, 0 },
1321 const struct extension_record_type *type;
1322 struct sfm_extension_record *record;
1326 record = pool_malloc (r->pool, sizeof *record);
1327 if (!read_extension_record_header (r, subtype, record))
1329 n_bytes = record->count * record->size;
1331 for (type = types; type < &types[sizeof types / sizeof *types]; type++)
1332 if (subtype == type->subtype)
1334 if (type->size > 0 && record->size != type->size)
1335 sys_warn (r, record->pos,
1336 _("Record type 7, subtype %d has bad size %u "
1337 "(expected %d)."), subtype, record->size, type->size);
1338 else if (type->count > 0 && record->count != type->count)
1339 sys_warn (r, record->pos,
1340 _("Record type 7, subtype %d has bad count %u "
1341 "(expected %d)."), subtype, record->count, type->count);
1342 else if (type->count == 0 && type->size == 0)
1344 /* Ignore this record. */
1348 char *data = pool_malloc (r->pool, n_bytes + 1);
1349 data[n_bytes] = '\0';
1351 record->data = data;
1352 if (!read_bytes (r, record->data, n_bytes))
1361 sys_warn (r, record->pos,
1362 _("Unrecognized record type 7, subtype %d. For help, please "
1363 "send this file to %s and mention that you were using %s."),
1364 subtype, PACKAGE_BUGREPORT, PACKAGE_STRING);
1367 return skip_bytes (r, n_bytes);
1371 skip_extension_record (struct sfm_reader *r, int subtype)
1373 struct sfm_extension_record record;
1375 return (read_extension_record_header (r, subtype, &record)
1376 && skip_bytes (r, record.count * record.size));
1380 parse_header (struct sfm_reader *r, const struct sfm_header_record *header,
1381 struct any_read_info *info, struct dictionary *dict)
1383 const char *dict_encoding = dict_get_encoding (dict);
1384 struct substring product;
1385 struct substring label;
1388 /* Convert file label to UTF-8 and put it into DICT. */
1389 label = recode_substring_pool ("UTF-8", dict_encoding,
1390 ss_cstr (header->file_label), r->pool);
1391 ss_trim (&label, ss_cstr (" "));
1392 label.string[label.length] = '\0';
1393 fixed_label = fix_line_ends (label.string);
1394 dict_set_label (dict, fixed_label);
1397 /* Put creation date and time in UTF-8 into INFO. */
1398 info->creation_date = recode_string ("UTF-8", dict_encoding,
1399 header->creation_date, -1);
1400 info->creation_time = recode_string ("UTF-8", dict_encoding,
1401 header->creation_time, -1);
1403 /* Put product name into INFO, dropping eye-catcher string if present. */
1404 product = recode_substring_pool ("UTF-8", dict_encoding,
1405 ss_cstr (header->eye_catcher), r->pool);
1406 ss_match_string (&product, ss_cstr ("@(#) SPSS DATA FILE"));
1407 ss_trim (&product, ss_cstr (" "));
1408 info->product = ss_xstrdup (product);
1411 /* Reads a variable (type 2) record from R and adds the
1412 corresponding variable to DICT.
1413 Also skips past additional variable records for long string
1416 parse_variable_records (struct sfm_reader *r, struct dictionary *dict,
1417 struct sfm_var_record *var_recs, size_t n_var_recs)
1419 const char *dict_encoding = dict_get_encoding (dict);
1420 struct sfm_var_record *rec;
1423 for (rec = var_recs; rec < &var_recs[n_var_recs]; )
1425 struct variable *var;
1430 name = recode_string_pool ("UTF-8", dict_encoding,
1431 rec->name, -1, r->pool);
1432 name[strcspn (name, " ")] = '\0';
1434 if (!dict_id_is_valid (dict, name, false)
1435 || name[0] == '$' || name[0] == '#')
1437 sys_error (r, rec->pos, _("Invalid variable name `%s'."), name);
1441 if (rec->width < 0 || rec->width > 255)
1443 sys_error (r, rec->pos,
1444 _("Bad width %d for variable %s."), rec->width, name);
1448 var = rec->var = dict_create_var (dict, name, rec->width);
1451 char *new_name = dict_make_unique_var_name (dict, NULL, NULL);
1452 sys_warn (r, rec->pos, _("Renaming variable with duplicate name "
1455 var = rec->var = dict_create_var_assert (dict, new_name, rec->width);
1459 /* Set the short name the same as the long name. */
1460 var_set_short_name (var, 0, name);
1462 /* Get variable label, if any. */
1467 utf8_label = recode_string_pool ("UTF-8", dict_encoding,
1468 rec->label, -1, r->pool);
1469 var_set_label (var, utf8_label);
1472 /* Set missing values. */
1473 if (rec->missing_value_code != 0)
1475 int width = var_get_width (var);
1476 struct missing_values mv;
1478 mv_init_pool (r->pool, &mv, width);
1479 if (var_is_numeric (var))
1481 bool has_range = rec->missing_value_code < 0;
1482 int n_discrete = (has_range
1483 ? rec->missing_value_code == -3
1484 : rec->missing_value_code);
1489 double low = parse_float (r, rec->missing, 0);
1490 double high = parse_float (r, rec->missing, 8);
1492 /* Deal with SPSS 21 change in representation. */
1496 mv_add_range (&mv, low, high);
1500 for (i = 0; i < n_discrete; i++)
1502 mv_add_num (&mv, parse_float (r, rec->missing, ofs));
1507 for (i = 0; i < rec->missing_value_code; i++)
1508 mv_add_str (&mv, rec->missing + 8 * i, MIN (width, 8));
1509 var_set_missing_values (var, &mv);
1513 parse_format_spec (r, rec->pos + 12, rec->print_format,
1514 PRINT_FORMAT, var, &n_warnings);
1515 parse_format_spec (r, rec->pos + 16, rec->write_format,
1516 WRITE_FORMAT, var, &n_warnings);
1518 /* Account for values.
1519 Skip long string continuation records, if any. */
1520 n_values = rec->width == 0 ? 1 : DIV_RND_UP (rec->width, 8);
1521 for (i = 1; i < n_values; i++)
1522 if (i + (rec - var_recs) >= n_var_recs || rec[i].width != -1)
1524 sys_error (r, rec->pos, _("Missing string continuation record."));
1533 /* Translates the format spec from sysfile format to internal
1536 parse_format_spec (struct sfm_reader *r, off_t pos, unsigned int format,
1537 enum which_format which, struct variable *v,
1540 const int max_warnings = 8;
1541 uint8_t raw_type = format >> 16;
1542 uint8_t w = format >> 8;
1551 ok = (fmt_from_io (raw_type, &f.type)
1552 && fmt_check_output (&f)
1553 && fmt_check_width_compat (&f, var_get_width (v)));
1558 if (which == PRINT_FORMAT)
1559 var_set_print_format (v, &f);
1561 var_set_write_format (v, &f);
1563 else if (format == 0)
1565 /* Actually observed in the wild. No point in warning about it. */
1567 else if (++*n_warnings <= max_warnings)
1569 if (which == PRINT_FORMAT)
1570 sys_warn (r, pos, _("Variable %s with width %d has invalid print "
1572 var_get_name (v), var_get_width (v), format);
1574 sys_warn (r, pos, _("Variable %s with width %d has invalid write "
1576 var_get_name (v), var_get_width (v), format);
1578 if (*n_warnings == max_warnings)
1579 sys_warn (r, -1, _("Suppressing further invalid format warnings."));
1584 parse_document (struct dictionary *dict, struct sfm_document_record *record)
1588 for (p = record->documents;
1589 p < record->documents + DOC_LINE_LENGTH * record->n_lines;
1590 p += DOC_LINE_LENGTH)
1592 struct substring line;
1594 line = recode_substring_pool ("UTF-8", dict_get_encoding (dict),
1595 ss_buffer (p, DOC_LINE_LENGTH), NULL);
1596 ss_rtrim (&line, ss_cstr (" "));
1597 line.string[line.length] = '\0';
1599 dict_add_document_line (dict, line.string, false);
1605 /* Parses record type 7, subtype 3. */
1607 parse_machine_integer_info (struct sfm_reader *r,
1608 const struct sfm_extension_record *record,
1609 struct any_read_info *info)
1611 int float_representation, expected_float_format;
1612 int integer_representation, expected_integer_format;
1614 /* Save version info. */
1615 info->version_major = parse_int (r, record->data, 0);
1616 info->version_minor = parse_int (r, record->data, 4);
1617 info->version_revision = parse_int (r, record->data, 8);
1619 /* Check floating point format. */
1620 float_representation = parse_int (r, record->data, 16);
1621 if (r->float_format == FLOAT_IEEE_DOUBLE_BE
1622 || r->float_format == FLOAT_IEEE_DOUBLE_LE)
1623 expected_float_format = 1;
1624 else if (r->float_format == FLOAT_Z_LONG)
1625 expected_float_format = 2;
1626 else if (r->float_format == FLOAT_VAX_G || r->float_format == FLOAT_VAX_D)
1627 expected_float_format = 3;
1630 if (float_representation != expected_float_format)
1632 sys_error (r, record->pos,
1633 _("Floating-point representation indicated by "
1634 "system file (%d) differs from expected (%d)."),
1635 float_representation, expected_float_format);
1639 /* Check integer format. */
1640 integer_representation = parse_int (r, record->data, 24);
1641 if (r->integer_format == INTEGER_MSB_FIRST)
1642 expected_integer_format = 1;
1643 else if (r->integer_format == INTEGER_LSB_FIRST)
1644 expected_integer_format = 2;
1647 if (integer_representation != expected_integer_format)
1648 sys_warn (r, record->pos,
1649 _("Integer format indicated by system file (%d) "
1650 "differs from expected (%d)."),
1651 integer_representation, expected_integer_format);
1656 /* Parses record type 7, subtype 4. */
1658 parse_machine_float_info (struct sfm_reader *r,
1659 const struct sfm_extension_record *record)
1661 double sysmis = parse_float (r, record->data, 0);
1662 double highest = parse_float (r, record->data, 8);
1663 double lowest = parse_float (r, record->data, 16);
1665 if (sysmis != SYSMIS)
1666 sys_warn (r, record->pos,
1667 _("File specifies unexpected value %g (%a) as %s, "
1668 "instead of %g (%a)."),
1669 sysmis, sysmis, "SYSMIS", SYSMIS, SYSMIS);
1671 if (highest != HIGHEST)
1672 sys_warn (r, record->pos,
1673 _("File specifies unexpected value %g (%a) as %s, "
1674 "instead of %g (%a)."),
1675 highest, highest, "HIGHEST", HIGHEST, HIGHEST);
1677 /* SPSS before version 21 used a unique value just bigger than SYSMIS as
1678 LOWEST. SPSS 21 uses SYSMIS for LOWEST, which is OK because LOWEST only
1679 appears in a context (missing values) where SYSMIS cannot. */
1680 if (lowest != LOWEST && lowest != SYSMIS)
1681 sys_warn (r, record->pos,
1682 _("File specifies unexpected value %g (%a) as %s, "
1683 "instead of %g (%a) or %g (%a)."),
1684 lowest, lowest, "LOWEST", LOWEST, LOWEST, SYSMIS, SYSMIS);
1687 /* Parses record type 7, subtype 10. */
1689 parse_extra_product_info (struct sfm_reader *r,
1690 const struct sfm_extension_record *record,
1691 struct any_read_info *info)
1693 struct text_record *text;
1695 text = open_text_record (r, record, true);
1696 info->product_ext = fix_line_ends (text_get_all (text));
1697 close_text_record (r, text);
1700 /* Parses record type 7, subtype 7 or 19. */
1702 parse_mrsets (struct sfm_reader *r, const struct sfm_extension_record *record,
1703 size_t *allocated_mrsets)
1705 struct text_record *text;
1707 text = open_text_record (r, record, false);
1710 struct sfm_mrset *mrset;
1711 size_t allocated_vars;
1714 /* Skip extra line feeds if present. */
1715 while (text_match (text, '\n'))
1718 if (r->n_mrsets >= *allocated_mrsets)
1719 r->mrsets = pool_2nrealloc (r->pool, r->mrsets, allocated_mrsets,
1721 mrset = &r->mrsets[r->n_mrsets];
1722 memset(mrset, 0, sizeof *mrset);
1724 mrset->name = text_get_token (text, ss_cstr ("="), NULL);
1725 if (mrset->name == NULL)
1728 if (text_match (text, 'C'))
1730 mrset->type = MRSET_MC;
1731 if (!text_match (text, ' '))
1733 sys_warn (r, record->pos,
1734 _("Missing space following `%c' at offset %zu "
1735 "in MRSETS record."), 'C', text_pos (text));
1739 else if (text_match (text, 'D'))
1741 mrset->type = MRSET_MD;
1742 mrset->cat_source = MRSET_VARLABELS;
1744 else if (text_match (text, 'E'))
1748 mrset->type = MRSET_MD;
1749 mrset->cat_source = MRSET_COUNTEDVALUES;
1750 if (!text_match (text, ' '))
1752 sys_warn (r, record->pos,
1753 _("Missing space following `%c' at offset %zu "
1754 "in MRSETS record."), 'E', text_pos (text));
1758 number = text_get_token (text, ss_cstr (" "), NULL);
1759 if (!strcmp (number, "11"))
1760 mrset->label_from_var_label = true;
1761 else if (strcmp (number, "1"))
1762 sys_warn (r, record->pos,
1763 _("Unexpected label source value following `E' "
1764 "at offset %zu in MRSETS record."),
1769 sys_warn (r, record->pos,
1770 _("Missing `C', `D', or `E' at offset %zu "
1771 "in MRSETS record."),
1776 if (mrset->type == MRSET_MD)
1778 mrset->counted = text_parse_counted_string (r, text);
1779 if (mrset->counted == NULL)
1783 mrset->label = text_parse_counted_string (r, text);
1784 if (mrset->label == NULL)
1792 var = text_get_token (text, ss_cstr (" \n"), &delimiter);
1795 if (delimiter != '\n')
1796 sys_warn (r, record->pos,
1797 _("Missing new-line parsing variable names "
1798 "at offset %zu in MRSETS record."),
1803 if (mrset->n_vars >= allocated_vars)
1804 mrset->vars = pool_2nrealloc (r->pool, mrset->vars,
1806 sizeof *mrset->vars);
1807 mrset->vars[mrset->n_vars++] = var;
1809 while (delimiter != '\n');
1813 close_text_record (r, text);
1817 decode_mrsets (struct sfm_reader *r, struct dictionary *dict)
1819 const struct sfm_mrset *s;
1821 for (s = r->mrsets; s < &r->mrsets[r->n_mrsets]; s++)
1823 struct stringi_set var_names;
1824 struct mrset *mrset;
1829 name = recode_string ("UTF-8", r->encoding, s->name, -1);
1832 sys_warn (r, -1, _("Multiple response set name `%s' does not begin "
1839 mrset = xzalloc (sizeof *mrset);
1841 mrset->type = s->type;
1842 mrset->cat_source = s->cat_source;
1843 mrset->label_from_var_label = s->label_from_var_label;
1844 if (s->label[0] != '\0')
1845 mrset->label = recode_string ("UTF-8", r->encoding, s->label, -1);
1847 stringi_set_init (&var_names);
1848 mrset->vars = xmalloc (s->n_vars * sizeof *mrset->vars);
1850 for (i = 0; i < s->n_vars; i++)
1852 struct variable *var;
1855 var_name = recode_string ("UTF-8", r->encoding, s->vars[i], -1);
1857 var = dict_lookup_var (dict, var_name);
1863 if (!stringi_set_insert (&var_names, var_name))
1866 _("MRSET %s contains duplicate variable name %s."),
1867 mrset->name, var_name);
1873 if (mrset->label == NULL && mrset->label_from_var_label
1874 && var_has_label (var))
1875 mrset->label = xstrdup (var_get_label (var));
1878 && var_get_type (var) != var_get_type (mrset->vars[0]))
1881 _("MRSET %s contains both string and "
1882 "numeric variables."), mrset->name);
1885 width = MIN (width, var_get_width (var));
1887 mrset->vars[mrset->n_vars++] = var;
1890 if (mrset->n_vars < 2)
1892 if (mrset->n_vars == 0)
1893 sys_warn (r, -1, _("MRSET %s has no variables."), mrset->name);
1895 sys_warn (r, -1, _("MRSET %s has only one variable."),
1897 mrset_destroy (mrset);
1898 stringi_set_destroy (&var_names);
1902 if (mrset->type == MRSET_MD)
1904 mrset->width = width;
1905 value_init (&mrset->counted, width);
1907 mrset->counted.f = c_strtod (s->counted, NULL);
1909 value_copy_str_rpad (&mrset->counted, width,
1910 (const uint8_t *) s->counted, ' ');
1913 dict_add_mrset (dict, mrset);
1914 stringi_set_destroy (&var_names);
1918 /* Read record type 7, subtype 11, which specifies how variables
1919 should be displayed in GUI environments. */
1921 parse_display_parameters (struct sfm_reader *r,
1922 const struct sfm_extension_record *record,
1923 struct dictionary *dict)
1925 bool includes_width;
1926 bool warned = false;
1931 n_vars = dict_get_var_cnt (dict);
1932 if (record->count == 3 * n_vars)
1933 includes_width = true;
1934 else if (record->count == 2 * n_vars)
1935 includes_width = false;
1938 sys_warn (r, record->pos,
1939 _("Extension 11 has bad count %u (for %zu variables)."),
1940 record->count, n_vars);
1945 for (i = 0; i < n_vars; ++i)
1947 struct variable *v = dict_get_var (dict, i);
1948 int measure, width, align;
1950 measure = parse_int (r, record->data, ofs);
1955 width = parse_int (r, record->data, ofs);
1961 align = parse_int (r, record->data, ofs);
1964 /* SPSS sometimes seems to set variables' measure to zero. */
1968 if (measure < 1 || measure > 3 || align < 0 || align > 2)
1971 sys_warn (r, record->pos,
1972 _("Invalid variable display parameters for variable "
1973 "%zu (%s). Default parameters substituted."),
1974 i, var_get_name (v));
1979 var_set_measure (v, (measure == 1 ? MEASURE_NOMINAL
1980 : measure == 2 ? MEASURE_ORDINAL
1982 var_set_alignment (v, (align == 0 ? ALIGN_LEFT
1983 : align == 1 ? ALIGN_RIGHT
1986 /* Older versions (SPSS 9.0) sometimes set the display
1987 width to zero. This causes confusion in the GUI, so
1988 only set the width if it is nonzero. */
1990 var_set_display_width (v, width);
1995 rename_var_and_save_short_names (struct dictionary *dict, struct variable *var,
1996 const char *new_name)
1998 size_t n_short_names;
2002 /* Renaming a variable may clear its short names, but we
2003 want to retain them, so we save them and re-set them
2005 n_short_names = var_get_short_name_cnt (var);
2006 short_names = xnmalloc (n_short_names, sizeof *short_names);
2007 for (i = 0; i < n_short_names; i++)
2009 const char *s = var_get_short_name (var, i);
2010 short_names[i] = s != NULL ? xstrdup (s) : NULL;
2013 /* Set long name. */
2014 dict_rename_var (dict, var, new_name);
2016 /* Restore short names. */
2017 for (i = 0; i < n_short_names; i++)
2019 var_set_short_name (var, i, short_names[i]);
2020 free (short_names[i]);
2025 /* Parses record type 7, subtype 13, which gives the long name that corresponds
2026 to each short name. Modifies variable names in DICT accordingly. */
2028 parse_long_var_name_map (struct sfm_reader *r,
2029 const struct sfm_extension_record *record,
2030 struct dictionary *dict)
2032 struct text_record *text;
2033 struct variable *var;
2038 /* There are no long variable names. Use the short variable names,
2039 converted to lowercase, as the long variable names. */
2042 for (i = 0; i < dict_get_var_cnt (dict); i++)
2044 struct variable *var = dict_get_var (dict, i);
2047 new_name = utf8_to_lower (var_get_name (var));
2048 rename_var_and_save_short_names (dict, var, new_name);
2055 /* Rename each of the variables, one by one. (In a correctly constructed
2056 system file, this cannot create any intermediate duplicate variable names,
2057 because all of the new variable names are longer than any of the old
2058 variable names and thus there cannot be any overlaps.) */
2059 text = open_text_record (r, record, true);
2060 while (read_variable_to_value_pair (r, dict, text, &var, &long_name))
2062 /* Validate long name. */
2063 if (!dict_id_is_valid (dict, long_name, false)
2064 || long_name[0] == '$' || long_name[0] == '#')
2066 sys_warn (r, record->pos,
2067 _("Long variable mapping from %s to invalid "
2068 "variable name `%s'."),
2069 var_get_name (var), long_name);
2073 /* Identify any duplicates. */
2074 if (utf8_strcasecmp (var_get_short_name (var, 0), long_name)
2075 && dict_lookup_var (dict, long_name) != NULL)
2077 sys_warn (r, record->pos,
2078 _("Duplicate long variable name `%s'."), long_name);
2082 rename_var_and_save_short_names (dict, var, long_name);
2084 close_text_record (r, text);
2087 /* Reads record type 7, subtype 14, which gives the real length
2088 of each very long string. Rearranges DICT accordingly. */
2090 parse_long_string_map (struct sfm_reader *r,
2091 const struct sfm_extension_record *record,
2092 struct dictionary *dict)
2094 struct text_record *text;
2095 struct variable *var;
2098 text = open_text_record (r, record, true);
2099 while (read_variable_to_value_pair (r, dict, text, &var, &length_s))
2101 size_t idx = var_get_dict_index (var);
2107 length = strtol (length_s, NULL, 10);
2108 if (length < 1 || length > MAX_STRING)
2110 sys_warn (r, record->pos,
2111 _("%s listed as string of invalid length %s "
2112 "in very long string record."),
2113 var_get_name (var), length_s);
2117 /* Check segments. */
2118 segment_cnt = sfm_width_to_segments (length);
2119 if (segment_cnt == 1)
2121 sys_warn (r, record->pos,
2122 _("%s listed in very long string record with width %s, "
2123 "which requires only one segment."),
2124 var_get_name (var), length_s);
2127 if (idx + segment_cnt > dict_get_var_cnt (dict))
2129 sys_error (r, record->pos,
2130 _("Very long string %s overflows dictionary."),
2131 var_get_name (var));
2135 /* Get the short names from the segments and check their
2137 for (i = 0; i < segment_cnt; i++)
2139 struct variable *seg = dict_get_var (dict, idx + i);
2140 int alloc_width = sfm_segment_alloc_width (length, i);
2141 int width = var_get_width (seg);
2144 var_set_short_name (var, i, var_get_short_name (seg, 0));
2145 if (ROUND_UP (width, 8) != ROUND_UP (alloc_width, 8))
2147 sys_error (r, record->pos,
2148 _("Very long string with width %ld has segment %d "
2149 "of width %d (expected %d)."),
2150 length, i, width, alloc_width);
2154 dict_delete_consecutive_vars (dict, idx + 1, segment_cnt - 1);
2155 var_set_width (var, length);
2157 close_text_record (r, text);
2158 dict_compact_values (dict);
2164 parse_value_labels (struct sfm_reader *r, struct dictionary *dict,
2165 const struct sfm_var_record *var_recs, size_t n_var_recs,
2166 const struct sfm_value_label_record *record)
2168 struct variable **vars;
2172 utf8_labels = pool_nmalloc (r->pool, record->n_labels, sizeof *utf8_labels);
2173 for (i = 0; i < record->n_labels; i++)
2174 utf8_labels[i] = recode_string_pool ("UTF-8", dict_get_encoding (dict),
2175 record->labels[i].label, -1,
2178 vars = pool_nmalloc (r->pool, record->n_vars, sizeof *vars);
2179 for (i = 0; i < record->n_vars; i++)
2181 vars[i] = lookup_var_by_index (r, record->pos,
2182 var_recs, n_var_recs, record->vars[i]);
2183 if (vars[i] == NULL)
2187 for (i = 1; i < record->n_vars; i++)
2188 if (var_get_type (vars[i]) != var_get_type (vars[0]))
2190 sys_error (r, record->pos,
2191 _("Variables associated with value label are not all of "
2192 "identical type. Variable %s is %s, but variable "
2194 var_get_name (vars[0]),
2195 var_is_numeric (vars[0]) ? _("numeric") : _("string"),
2196 var_get_name (vars[i]),
2197 var_is_numeric (vars[i]) ? _("numeric") : _("string"));
2201 for (i = 0; i < record->n_vars; i++)
2203 struct variable *var = vars[i];
2207 width = var_get_width (var);
2210 sys_error (r, record->pos,
2211 _("Value labels may not be added to long string "
2212 "variables (e.g. %s) using records types 3 and 4."),
2213 var_get_name (var));
2217 for (j = 0; j < record->n_labels; j++)
2219 struct sfm_value_label *label = &record->labels[j];
2222 value_init (&value, width);
2224 value.f = parse_float (r, label->value, 0);
2226 memcpy (value_str_rw (&value, width), label->value, width);
2228 if (!var_add_value_label (var, &value, utf8_labels[j]))
2230 if (var_is_numeric (var))
2231 sys_warn (r, record->pos,
2232 _("Duplicate value label for %g on %s."),
2233 value.f, var_get_name (var));
2235 sys_warn (r, record->pos,
2236 _("Duplicate value label for `%.*s' on %s."),
2237 width, value_str (&value, width),
2238 var_get_name (var));
2241 value_destroy (&value, width);
2245 pool_free (r->pool, vars);
2246 for (i = 0; i < record->n_labels; i++)
2247 pool_free (r->pool, utf8_labels[i]);
2248 pool_free (r->pool, utf8_labels);
2253 static struct variable *
2254 lookup_var_by_index (struct sfm_reader *r, off_t offset,
2255 const struct sfm_var_record *var_recs, size_t n_var_recs,
2258 const struct sfm_var_record *rec;
2260 if (idx < 1 || idx > n_var_recs)
2262 sys_error (r, offset,
2263 _("Variable index %d not in valid range 1...%zu."),
2268 rec = &var_recs[idx - 1];
2269 if (rec->var == NULL)
2271 sys_error (r, offset,
2272 _("Variable index %d refers to long string continuation."),
2280 /* Parses a set of custom attributes from TEXT into ATTRS.
2281 ATTRS may be a null pointer, in which case the attributes are
2282 read but discarded. */
2284 parse_attributes (struct sfm_reader *r, struct text_record *text,
2285 struct attrset *attrs)
2289 struct attribute *attr;
2293 /* Parse the key. */
2294 key = text_get_token (text, ss_cstr ("("), NULL);
2298 attr = attribute_create (key);
2299 for (index = 1; ; index++)
2301 /* Parse the value. */
2305 value = text_get_token (text, ss_cstr ("\n"), NULL);
2308 text_warn (r, text, _("Error parsing attribute value %s[%d]."),
2313 length = strlen (value);
2314 if (length >= 2 && value[0] == '\'' && value[length - 1] == '\'')
2316 value[length - 1] = '\0';
2317 attribute_add_value (attr, value + 1);
2322 _("Attribute value %s[%d] is not quoted: %s."),
2324 attribute_add_value (attr, value);
2327 /* Was this the last value for this attribute? */
2328 if (text_match (text, ')'))
2332 attrset_add (attrs, attr);
2334 attribute_destroy (attr);
2336 while (!text_match (text, '/'));
2339 /* Reads record type 7, subtype 17, which lists custom
2340 attributes on the data file. */
2342 parse_data_file_attributes (struct sfm_reader *r,
2343 const struct sfm_extension_record *record,
2344 struct dictionary *dict)
2346 struct text_record *text = open_text_record (r, record, true);
2347 parse_attributes (r, text, dict_get_attributes (dict));
2348 close_text_record (r, text);
2351 /* Parses record type 7, subtype 18, which lists custom
2352 attributes on individual variables. */
2354 parse_variable_attributes (struct sfm_reader *r,
2355 const struct sfm_extension_record *record,
2356 struct dictionary *dict)
2358 struct text_record *text;
2359 struct variable *var;
2361 text = open_text_record (r, record, true);
2362 while (text_read_variable_name (r, dict, text, ss_cstr (":"), &var))
2363 parse_attributes (r, text, var != NULL ? var_get_attributes (var) : NULL);
2364 close_text_record (r, text);
2368 assign_variable_roles (struct sfm_reader *r, struct dictionary *dict)
2370 size_t n_warnings = 0;
2373 for (i = 0; i < dict_get_var_cnt (dict); i++)
2375 struct variable *var = dict_get_var (dict, i);
2376 struct attrset *attrs = var_get_attributes (var);
2377 const struct attribute *attr = attrset_lookup (attrs, "$@Role");
2380 int value = atoi (attribute_get_value (attr, 0));
2402 role = ROLE_PARTITION;
2411 if (n_warnings++ == 0)
2412 sys_warn (r, -1, _("Invalid role for variable %s."),
2413 var_get_name (var));
2416 var_set_role (var, role);
2421 sys_warn (r, -1, _("%zu other variables had invalid roles."),
2426 check_overflow (struct sfm_reader *r,
2427 const struct sfm_extension_record *record,
2428 size_t ofs, size_t length)
2430 size_t end = record->size * record->count;
2431 if (length >= end || ofs + length > end)
2433 sys_warn (r, record->pos + end,
2434 _("Extension record subtype %d ends unexpectedly."),
2442 parse_long_string_value_labels (struct sfm_reader *r,
2443 const struct sfm_extension_record *record,
2444 struct dictionary *dict)
2446 const char *dict_encoding = dict_get_encoding (dict);
2447 size_t end = record->size * record->count;
2454 struct variable *var;
2459 /* Parse variable name length. */
2460 if (!check_overflow (r, record, ofs, 4))
2462 var_name_len = parse_int (r, record->data, ofs);
2465 /* Parse variable name, width, and number of labels. */
2466 if (!check_overflow (r, record, ofs, var_name_len + 8))
2468 var_name = recode_string_pool ("UTF-8", dict_encoding,
2469 (const char *) record->data + ofs,
2470 var_name_len, r->pool);
2471 width = parse_int (r, record->data, ofs + var_name_len);
2472 n_labels = parse_int (r, record->data, ofs + var_name_len + 4);
2473 ofs += var_name_len + 8;
2475 /* Look up 'var' and validate. */
2476 var = dict_lookup_var (dict, var_name);
2478 sys_warn (r, record->pos + ofs,
2479 _("Ignoring long string value label record for "
2480 "unknown variable %s."), var_name);
2481 else if (var_is_numeric (var))
2483 sys_warn (r, record->pos + ofs,
2484 _("Ignoring long string value label record for "
2485 "numeric variable %s."), var_name);
2488 else if (width != var_get_width (var))
2490 sys_warn (r, record->pos + ofs,
2491 _("Ignoring long string value label record for variable "
2492 "%s because the record's width (%d) does not match the "
2493 "variable's width (%d)."),
2494 var_name, width, var_get_width (var));
2499 value_init_pool (r->pool, &value, width);
2500 for (i = 0; i < n_labels; i++)
2502 size_t value_length, label_length;
2503 bool skip = var == NULL;
2505 /* Parse value length. */
2506 if (!check_overflow (r, record, ofs, 4))
2508 value_length = parse_int (r, record->data, ofs);
2512 if (!check_overflow (r, record, ofs, value_length))
2516 if (value_length == width)
2517 memcpy (value_str_rw (&value, width),
2518 (const uint8_t *) record->data + ofs, width);
2521 sys_warn (r, record->pos + ofs,
2522 _("Ignoring long string value label %zu for "
2523 "variable %s, with width %d, that has bad value "
2525 i, var_get_name (var), width, value_length);
2529 ofs += value_length;
2531 /* Parse label length. */
2532 if (!check_overflow (r, record, ofs, 4))
2534 label_length = parse_int (r, record->data, ofs);
2538 if (!check_overflow (r, record, ofs, label_length))
2544 label = recode_string_pool ("UTF-8", dict_encoding,
2545 (const char *) record->data + ofs,
2546 label_length, r->pool);
2547 if (!var_add_value_label (var, &value, label))
2548 sys_warn (r, record->pos + ofs,
2549 _("Duplicate value label for `%.*s' on %s."),
2550 width, value_str (&value, width),
2551 var_get_name (var));
2552 pool_free (r->pool, label);
2554 ofs += label_length;
2560 parse_long_string_missing_values (struct sfm_reader *r,
2561 const struct sfm_extension_record *record,
2562 struct dictionary *dict)
2564 const char *dict_encoding = dict_get_encoding (dict);
2565 size_t end = record->size * record->count;
2570 struct missing_values mv;
2572 struct variable *var;
2573 int n_missing_values;
2577 /* Parse variable name length. */
2578 if (!check_overflow (r, record, ofs, 4))
2580 var_name_len = parse_int (r, record->data, ofs);
2583 /* Parse variable name. */
2584 if (!check_overflow (r, record, ofs, var_name_len + 1))
2586 var_name = recode_string_pool ("UTF-8", dict_encoding,
2587 (const char *) record->data + ofs,
2588 var_name_len, r->pool);
2589 ofs += var_name_len;
2591 /* Parse number of missing values. */
2592 n_missing_values = ((const uint8_t *) record->data)[ofs];
2593 if (n_missing_values < 1 || n_missing_values > 3)
2594 sys_warn (r, record->pos + ofs,
2595 _("Long string missing values record says variable %s "
2596 "has %d missing values, but only 1 to 3 missing values "
2598 var_name, n_missing_values);
2601 /* Look up 'var' and validate. */
2602 var = dict_lookup_var (dict, var_name);
2604 sys_warn (r, record->pos + ofs,
2605 _("Ignoring long string missing value record for "
2606 "unknown variable %s."), var_name);
2607 else if (var_is_numeric (var))
2609 sys_warn (r, record->pos + ofs,
2610 _("Ignoring long string missing value record for "
2611 "numeric variable %s."), var_name);
2616 mv_init_pool (r->pool, &mv, var ? var_get_width (var) : 8);
2617 for (i = 0; i < n_missing_values; i++)
2619 size_t value_length;
2621 /* Parse value length. */
2622 if (!check_overflow (r, record, ofs, 4))
2624 value_length = parse_int (r, record->data, ofs);
2628 if (!check_overflow (r, record, ofs, value_length))
2632 && !mv_add_str (&mv, (const uint8_t *) record->data + ofs,
2634 sys_warn (r, record->pos + ofs,
2635 _("Ignoring long string missing value %zu for variable "
2636 "%s, with width %d, that has bad value width %zu."),
2637 i, var_get_name (var), var_get_width (var),
2639 ofs += value_length;
2642 var_set_missing_values (var, &mv);
2648 static void partial_record (struct sfm_reader *);
2650 static void read_error (struct casereader *, const struct sfm_reader *);
2652 static bool read_case_number (struct sfm_reader *, double *);
2653 static int read_case_string (struct sfm_reader *, uint8_t *, size_t);
2654 static int read_opcode (struct sfm_reader *);
2655 static bool read_compressed_number (struct sfm_reader *, double *);
2656 static int read_compressed_string (struct sfm_reader *, uint8_t *);
2657 static int read_whole_strings (struct sfm_reader *, uint8_t *, size_t);
2658 static bool skip_whole_strings (struct sfm_reader *, size_t);
2660 /* Reads and returns one case from READER's file. Returns a null
2661 pointer if not successful. */
2662 static struct ccase *
2663 sys_file_casereader_read (struct casereader *reader, void *r_)
2665 struct sfm_reader *r = r_;
2670 if (r->error || !r->sfm_var_cnt)
2673 c = case_create (r->proto);
2675 for (i = 0; i < r->sfm_var_cnt; i++)
2677 struct sfm_var *sv = &r->sfm_vars[i];
2678 union value *v = case_data_rw_idx (c, sv->case_index);
2680 if (sv->var_width == 0)
2681 retval = read_case_number (r, &v->f);
2684 uint8_t *s = value_str_rw (v, sv->var_width);
2685 retval = read_case_string (r, s + sv->offset, sv->segment_width);
2688 retval = skip_whole_strings (r, ROUND_DOWN (sv->padding, 8));
2690 sys_error (r, r->pos, _("File ends in partial string value."));
2702 if (r->case_cnt != -1)
2703 read_error (reader, r);
2708 /* Issues an error that R ends in a partial record. */
2710 partial_record (struct sfm_reader *r)
2712 sys_error (r, r->pos, _("File ends in partial case."));
2715 /* Issues an error that an unspecified error occurred SFM, and
2718 read_error (struct casereader *r, const struct sfm_reader *sfm)
2720 msg (ME, _("Error reading case from file %s."), fh_get_name (sfm->fh));
2721 casereader_force_error (r);
2724 /* Reads a number from R and stores its value in *D.
2725 If R is compressed, reads a compressed number;
2726 otherwise, reads a number in the regular way.
2727 Returns true if successful, false if end of file is
2728 reached immediately. */
2730 read_case_number (struct sfm_reader *r, double *d)
2732 if (r->compression == ANY_COMP_NONE)
2735 if (!try_read_bytes (r, number, sizeof number))
2737 float_convert (r->float_format, number, FLOAT_NATIVE_DOUBLE, d);
2741 return read_compressed_number (r, d);
2744 /* Reads LENGTH string bytes from R into S. Always reads a multiple of 8
2745 bytes; if LENGTH is not a multiple of 8, then extra bytes are read and
2746 discarded without being written to S. Reads compressed strings if S is
2747 compressed. Returns 1 if successful, 0 if end of file is reached
2748 immediately, or -1 for some kind of error. */
2750 read_case_string (struct sfm_reader *r, uint8_t *s, size_t length)
2752 size_t whole = ROUND_DOWN (length, 8);
2753 size_t partial = length % 8;
2757 int retval = read_whole_strings (r, s, whole);
2765 int retval = read_whole_strings (r, bounce, sizeof bounce);
2777 memcpy (s + whole, bounce, partial);
2783 /* Reads and returns the next compression opcode from R. */
2785 read_opcode (struct sfm_reader *r)
2787 assert (r->compression != ANY_COMP_NONE);
2791 if (r->opcode_idx >= sizeof r->opcodes)
2794 int retval = try_read_compressed_bytes (r, r->opcodes,
2800 opcode = r->opcodes[r->opcode_idx++];
2807 /* Reads a compressed number from R and stores its value in D.
2808 Returns true if successful, false if end of file is
2809 reached immediately. */
2811 read_compressed_number (struct sfm_reader *r, double *d)
2813 int opcode = read_opcode (r);
2821 return read_compressed_float (r, d);
2824 float_convert (r->float_format, " ", FLOAT_NATIVE_DOUBLE, d);
2825 if (!r->corruption_warning)
2827 r->corruption_warning = true;
2828 sys_warn (r, r->pos,
2829 _("Possible compressed data corruption: "
2830 "compressed spaces appear in numeric field."));
2839 *d = opcode - r->bias;
2846 /* Reads a compressed 8-byte string segment from R and stores it in DST. */
2848 read_compressed_string (struct sfm_reader *r, uint8_t *dst)
2853 opcode = read_opcode (r);
2861 retval = read_compressed_bytes (r, dst, 8);
2862 return retval == 1 ? 1 : -1;
2865 memset (dst, ' ', 8);
2870 double value = opcode - r->bias;
2871 float_convert (FLOAT_NATIVE_DOUBLE, &value, r->float_format, dst);
2874 /* This has actually been seen "in the wild". The submitter of the
2875 file that showed that the contents decoded as spaces, but they
2876 were at the end of the field so it's possible that the null
2877 bytes just acted as null terminators. */
2879 else if (!r->corruption_warning)
2881 r->corruption_warning = true;
2882 sys_warn (r, r->pos,
2883 _("Possible compressed data corruption: "
2884 "string contains compressed integer (opcode %d)."),
2892 /* Reads LENGTH string bytes from R into S. LENGTH must be a multiple of 8.
2893 Reads compressed strings if S is compressed. Returns 1 if successful, 0 if
2894 end of file is reached immediately, or -1 for some kind of error. */
2896 read_whole_strings (struct sfm_reader *r, uint8_t *s, size_t length)
2898 assert (length % 8 == 0);
2899 if (r->compression == ANY_COMP_NONE)
2900 return try_read_bytes (r, s, length);
2905 for (ofs = 0; ofs < length; ofs += 8)
2907 int retval = read_compressed_string (r, s + ofs);
2922 /* Skips LENGTH string bytes from R.
2923 LENGTH must be a multiple of 8.
2924 (LENGTH is also limited to 1024, but that's only because the
2925 current caller never needs more than that many bytes.)
2926 Returns true if successful, false if end of file is
2927 reached immediately. */
2929 skip_whole_strings (struct sfm_reader *r, size_t length)
2931 uint8_t buffer[1024];
2932 assert (length < sizeof buffer);
2933 return read_whole_strings (r, buffer, length);
2936 /* Helpers for reading records that contain structured text
2939 /* Maximum number of warnings to issue for a single text
2941 #define MAX_TEXT_WARNINGS 5
2946 struct substring buffer; /* Record contents. */
2947 off_t start; /* Starting offset in file. */
2948 size_t pos; /* Current position in buffer. */
2949 int n_warnings; /* Number of warnings issued or suppressed. */
2950 bool recoded; /* Recoded into UTF-8? */
2953 static struct text_record *
2954 open_text_record (struct sfm_reader *r,
2955 const struct sfm_extension_record *record,
2956 bool recode_to_utf8)
2958 struct text_record *text;
2959 struct substring raw;
2961 text = pool_alloc (r->pool, sizeof *text);
2962 raw = ss_buffer (record->data, record->size * record->count);
2963 text->start = record->pos;
2964 text->buffer = (recode_to_utf8
2965 ? recode_substring_pool ("UTF-8", r->encoding, raw, r->pool)
2968 text->n_warnings = 0;
2969 text->recoded = recode_to_utf8;
2974 /* Closes TEXT, frees its storage, and issues a final warning
2975 about suppressed warnings if necesary. */
2977 close_text_record (struct sfm_reader *r, struct text_record *text)
2979 if (text->n_warnings > MAX_TEXT_WARNINGS)
2980 sys_warn (r, -1, _("Suppressed %d additional related warnings."),
2981 text->n_warnings - MAX_TEXT_WARNINGS);
2983 pool_free (r->pool, ss_data (text->buffer));
2986 /* Reads a variable=value pair from TEXT.
2987 Looks up the variable in DICT and stores it into *VAR.
2988 Stores a null-terminated value into *VALUE. */
2990 read_variable_to_value_pair (struct sfm_reader *r, struct dictionary *dict,
2991 struct text_record *text,
2992 struct variable **var, char **value)
2996 if (!text_read_short_name (r, dict, text, ss_cstr ("="), var))
2999 *value = text_get_token (text, ss_buffer ("\t\0", 2), NULL);
3003 text->pos += ss_span (ss_substr (text->buffer, text->pos, SIZE_MAX),
3004 ss_buffer ("\t\0", 2));
3012 text_read_variable_name (struct sfm_reader *r, struct dictionary *dict,
3013 struct text_record *text, struct substring delimiters,
3014 struct variable **var)
3018 name = text_get_token (text, delimiters, NULL);
3022 *var = dict_lookup_var (dict, name);
3026 text_warn (r, text, _("Dictionary record refers to unknown variable %s."),
3033 text_read_short_name (struct sfm_reader *r, struct dictionary *dict,
3034 struct text_record *text, struct substring delimiters,
3035 struct variable **var)
3037 char *short_name = text_get_token (text, delimiters, NULL);
3038 if (short_name == NULL)
3041 *var = dict_lookup_var (dict, short_name);
3043 text_warn (r, text, _("Dictionary record refers to unknown variable %s."),
3048 /* Displays a warning for the current file position, limiting the
3049 number to MAX_TEXT_WARNINGS for TEXT. */
3051 text_warn (struct sfm_reader *r, struct text_record *text,
3052 const char *format, ...)
3054 if (text->n_warnings++ < MAX_TEXT_WARNINGS)
3058 va_start (args, format);
3059 sys_msg (r, text->start + text->pos, MW, format, args);
3065 text_get_token (struct text_record *text, struct substring delimiters,
3068 struct substring token;
3071 if (!ss_tokenize (text->buffer, delimiters, &text->pos, &token))
3074 end = &ss_data (token)[ss_length (token)];
3075 if (delimiter != NULL)
3078 return ss_data (token);
3081 /* Reads a integer value expressed in decimal, then a space, then a string that
3082 consists of exactly as many bytes as specified by the integer, then a space,
3083 from TEXT. Returns the string, null-terminated, as a subset of TEXT's
3084 buffer (so the caller should not free the string). */
3086 text_parse_counted_string (struct sfm_reader *r, struct text_record *text)
3094 while (text->pos < text->buffer.length)
3096 int c = text->buffer.string[text->pos];
3097 if (c < '0' || c > '9')
3099 n = (n * 10) + (c - '0');
3102 if (text->pos >= text->buffer.length || start == text->pos)
3104 sys_warn (r, text->start,
3105 _("Expecting digit at offset %zu in MRSETS record."),
3110 if (!text_match (text, ' '))
3112 sys_warn (r, text->start,
3113 _("Expecting space at offset %zu in MRSETS record."),
3118 if (text->pos + n > text->buffer.length)
3120 sys_warn (r, text->start,
3121 _("%zu-byte string starting at offset %zu "
3122 "exceeds record length %zu."),
3123 n, text->pos, text->buffer.length);
3127 s = &text->buffer.string[text->pos];
3130 sys_warn (r, text->start,
3131 _("Expecting space at offset %zu following %zu-byte string."),
3141 text_match (struct text_record *text, char c)
3143 if (text->pos >= text->buffer.length)
3146 if (text->buffer.string[text->pos] == c)
3155 /* Returns the current byte offset (as converted to UTF-8, if it was converted)
3156 inside the TEXT's string. */
3158 text_pos (const struct text_record *text)
3164 text_get_all (const struct text_record *text)
3166 return text->buffer.string;
3171 /* Displays a corruption message. */
3173 sys_msg (struct sfm_reader *r, off_t offset,
3174 int class, const char *format, va_list args)
3179 ds_init_empty (&text);
3181 ds_put_format (&text, _("`%s' near offset 0x%llx: "),
3182 fh_get_file_name (r->fh), (long long int) offset);
3184 ds_put_format (&text, _("`%s': "), fh_get_file_name (r->fh));
3185 ds_put_vformat (&text, format, args);
3187 m.category = msg_class_to_category (class);
3188 m.severity = msg_class_to_severity (class);
3194 m.text = ds_cstr (&text);
3199 /* Displays a warning for offset OFFSET in the file. */
3201 sys_warn (struct sfm_reader *r, off_t offset, const char *format, ...)
3205 va_start (args, format);
3206 sys_msg (r, offset, MW, format, args);
3210 /* Displays an error for the current file position and marks it as in an error
3213 sys_error (struct sfm_reader *r, off_t offset, const char *format, ...)
3217 va_start (args, format);
3218 sys_msg (r, offset, ME, format, args);
3224 /* Reads BYTE_CNT bytes into BUF.
3225 Returns 1 if exactly BYTE_CNT bytes are successfully read.
3226 Returns -1 if an I/O error or a partial read occurs.
3227 Returns 0 for an immediate end-of-file and, if EOF_IS_OK is false, reports
3230 read_bytes_internal (struct sfm_reader *r, bool eof_is_ok,
3231 void *buf, size_t byte_cnt)
3233 size_t bytes_read = fread (buf, 1, byte_cnt, r->file);
3234 r->pos += bytes_read;
3235 if (bytes_read == byte_cnt)
3237 else if (ferror (r->file))
3239 sys_error (r, r->pos, _("System error: %s."), strerror (errno));
3242 else if (!eof_is_ok || bytes_read != 0)
3244 sys_error (r, r->pos, _("Unexpected end of file."));
3251 /* Reads BYTE_CNT into BUF.
3252 Returns true if successful.
3253 Returns false upon I/O error or if end-of-file is encountered. */
3255 read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
3257 return read_bytes_internal (r, false, buf, byte_cnt) == 1;
3260 /* Reads BYTE_CNT bytes into BUF.
3261 Returns 1 if exactly BYTE_CNT bytes are successfully read.
3262 Returns 0 if an immediate end-of-file is encountered.
3263 Returns -1 if an I/O error or a partial read occurs. */
3265 try_read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
3267 return read_bytes_internal (r, true, buf, byte_cnt);
3270 /* Reads a 32-bit signed integer from R and stores its value in host format in
3271 *X. Returns true if successful, otherwise false. */
3273 read_int (struct sfm_reader *r, int *x)
3276 if (read_bytes (r, integer, sizeof integer) != 1)
3278 *x = integer_get (r->integer_format, integer, sizeof integer);
3283 read_uint (struct sfm_reader *r, unsigned int *x)
3288 ok = read_int (r, &y);
3293 /* Reads a 64-bit signed integer from R and returns its value in
3296 read_int64 (struct sfm_reader *r, long long int *x)
3299 if (read_bytes (r, integer, sizeof integer) != 1)
3301 *x = integer_get (r->integer_format, integer, sizeof integer);
3305 /* Reads a 64-bit signed integer from R and returns its value in
3308 read_uint64 (struct sfm_reader *r, unsigned long long int *x)
3313 ok = read_int64 (r, &y);
3319 parse_int (const struct sfm_reader *r, const void *data, size_t ofs)
3321 return integer_get (r->integer_format, (const uint8_t *) data + ofs, 4);
3325 parse_float (const struct sfm_reader *r, const void *data, size_t ofs)
3327 return float_get_double (r->float_format, (const uint8_t *) data + ofs);
3330 /* Reads exactly SIZE - 1 bytes into BUFFER
3331 and stores a null byte into BUFFER[SIZE - 1]. */
3333 read_string (struct sfm_reader *r, char *buffer, size_t size)
3338 ok = read_bytes (r, buffer, size - 1);
3340 buffer[size - 1] = '\0';
3344 /* Skips BYTES bytes forward in R. */
3346 skip_bytes (struct sfm_reader *r, size_t bytes)
3351 size_t chunk = MIN (sizeof buffer, bytes);
3352 if (!read_bytes (r, buffer, chunk))
3360 /* Returns a malloc()'d copy of S in which all lone CRs and CR LF pairs have
3361 been replaced by LFs.
3363 (A product that identifies itself as VOXCO INTERVIEWER 4.3 produces system
3364 files that use CR-only line ends in the file label and extra product
3367 fix_line_ends (const char *s)
3371 d = dst = xmalloc (strlen (s) + 1);
3390 read_ztrailer (struct sfm_reader *r,
3391 long long int zheader_ofs,
3392 long long int ztrailer_len);
3395 zalloc (voidpf pool_, uInt items, uInt size)
3397 struct pool *pool = pool_;
3399 return (!size || xalloc_oversized (items, size)
3401 : pool_malloc (pool, items * size));
3405 zfree (voidpf pool_, voidpf address)
3407 struct pool *pool = pool_;
3409 pool_free (pool, address);
3413 read_zheader (struct sfm_reader *r)
3416 long long int zheader_ofs;
3417 long long int ztrailer_ofs;
3418 long long int ztrailer_len;
3420 if (!read_int64 (r, &zheader_ofs)
3421 || !read_int64 (r, &ztrailer_ofs)
3422 || !read_int64 (r, &ztrailer_len))
3425 if (zheader_ofs != pos)
3427 sys_error (r, pos, _("Wrong ZLIB data header offset %#llx "
3428 "(expected %#llx)."),
3429 zheader_ofs, (long long int) pos);
3433 if (ztrailer_ofs < r->pos)
3435 sys_error (r, pos, _("Impossible ZLIB trailer offset 0x%llx."),
3440 if (ztrailer_len < 24 || ztrailer_len % 24)
3442 sys_error (r, pos, _("Invalid ZLIB trailer length %lld."), ztrailer_len);
3446 r->ztrailer_ofs = ztrailer_ofs;
3447 if (!read_ztrailer (r, zheader_ofs, ztrailer_len))
3450 if (r->zin_buf == NULL)
3452 r->zin_buf = pool_malloc (r->pool, ZIN_BUF_SIZE);
3453 r->zout_buf = pool_malloc (r->pool, ZOUT_BUF_SIZE);
3454 r->zstream.next_in = NULL;
3455 r->zstream.avail_in = 0;
3458 r->zstream.zalloc = zalloc;
3459 r->zstream.zfree = zfree;
3460 r->zstream.opaque = r->pool;
3462 return open_zstream (r);
3466 seek (struct sfm_reader *r, off_t offset)
3468 if (fseeko (r->file, offset, SEEK_SET))
3469 sys_error (r, 0, _("%s: seek failed (%s)."),
3470 fh_get_file_name (r->fh), strerror (errno));
3474 /* Performs some additional consistency checks on the ZLIB compressed data
3477 read_ztrailer (struct sfm_reader *r,
3478 long long int zheader_ofs,
3479 long long int ztrailer_len)
3481 long long int expected_uncmp_ofs;
3482 long long int expected_cmp_ofs;
3485 unsigned int block_size;
3486 unsigned int n_blocks;
3490 if (fstat (fileno (r->file), &s))
3492 sys_error (ME, 0, _("%s: stat failed (%s)."),
3493 fh_get_file_name (r->fh), strerror (errno));
3497 if (!S_ISREG (s.st_mode))
3499 /* We can't seek to the trailer and then back to the data in this file,
3500 so skip doing extra checks. */
3504 if (r->ztrailer_ofs + ztrailer_len != s.st_size)
3505 sys_warn (r, r->pos,
3506 _("End of ZLIB trailer (0x%llx) is not file size (0x%llx)."),
3507 r->ztrailer_ofs + ztrailer_len, (long long int) s.st_size);
3509 seek (r, r->ztrailer_ofs);
3511 /* Read fixed header from ZLIB data trailer. */
3512 if (!read_int64 (r, &bias))
3514 if (-bias != r->bias)
3516 sys_error (r, r->pos, _("ZLIB trailer bias (%lld) differs from "
3517 "file header bias (%.2f)."),
3522 if (!read_int64 (r, &zero))
3525 sys_warn (r, r->pos,
3526 _("ZLIB trailer \"zero\" field has nonzero value %lld."), zero);
3528 if (!read_uint (r, &block_size))
3530 if (block_size != ZBLOCK_SIZE)
3531 sys_warn (r, r->pos,
3532 _("ZLIB trailer specifies unexpected %u-byte block size."),
3535 if (!read_uint (r, &n_blocks))
3537 if (n_blocks != (ztrailer_len - 24) / 24)
3539 sys_error (r, r->pos,
3540 _("%lld-byte ZLIB trailer specifies %u data blocks (expected "
3542 ztrailer_len, n_blocks, (ztrailer_len - 24) / 24);
3546 expected_uncmp_ofs = zheader_ofs;
3547 expected_cmp_ofs = zheader_ofs + 24;
3548 for (i = 0; i < n_blocks; i++)
3550 off_t desc_ofs = r->pos;
3551 unsigned long long int uncompressed_ofs;
3552 unsigned long long int compressed_ofs;
3553 unsigned int uncompressed_size;
3554 unsigned int compressed_size;
3556 if (!read_uint64 (r, &uncompressed_ofs)
3557 || !read_uint64 (r, &compressed_ofs)
3558 || !read_uint (r, &uncompressed_size)
3559 || !read_uint (r, &compressed_size))
3562 if (uncompressed_ofs != expected_uncmp_ofs)
3564 sys_error (r, desc_ofs,
3565 _("ZLIB block descriptor %u reported uncompressed data "
3566 "offset %#llx, when %#llx was expected."),
3567 i, uncompressed_ofs, expected_uncmp_ofs);
3571 if (compressed_ofs != expected_cmp_ofs)
3573 sys_error (r, desc_ofs,
3574 _("ZLIB block descriptor %u reported compressed data "
3575 "offset %#llx, when %#llx was expected."),
3576 i, compressed_ofs, expected_cmp_ofs);
3580 if (i < n_blocks - 1)
3582 if (uncompressed_size != block_size)
3583 sys_warn (r, desc_ofs,
3584 _("ZLIB block descriptor %u reported block size %#x, "
3585 "when %#x was expected."),
3586 i, uncompressed_size, block_size);
3590 if (uncompressed_size > block_size)
3591 sys_warn (r, desc_ofs,
3592 _("ZLIB block descriptor %u reported block size %#x, "
3593 "when at most %#x was expected."),
3594 i, uncompressed_size, block_size);
3597 /* http://www.zlib.net/zlib_tech.html says that the maximum expansion
3598 from compression, with worst-case parameters, is 13.5% plus 11 bytes.
3599 This code checks for an expansion of more than 14.3% plus 11
3601 if (compressed_size > uncompressed_size + uncompressed_size / 7 + 11)
3603 sys_error (r, desc_ofs,
3604 _("ZLIB block descriptor %u reports compressed size %u "
3605 "and uncompressed size %u."),
3606 i, compressed_size, uncompressed_size);
3610 expected_uncmp_ofs += uncompressed_size;
3611 expected_cmp_ofs += compressed_size;
3614 if (expected_cmp_ofs != r->ztrailer_ofs)
3616 sys_error (r, r->pos, _("ZLIB trailer is at offset %#llx but %#llx "
3617 "would be expected from block descriptors."),
3618 r->ztrailer_ofs, expected_cmp_ofs);
3622 seek (r, zheader_ofs + 24);
3627 open_zstream (struct sfm_reader *r)
3631 r->zout_pos = r->zout_end = 0;
3632 error = inflateInit (&r->zstream);
3635 sys_error (r, r->pos, _("ZLIB initialization failed (%s)."),
3643 close_zstream (struct sfm_reader *r)
3647 error = inflateEnd (&r->zstream);
3650 sys_error (r, r->pos, _("Inconsistency at end of ZLIB stream (%s)."),
3658 read_bytes_zlib (struct sfm_reader *r, void *buf_, size_t byte_cnt)
3660 uint8_t *buf = buf_;
3669 /* Use already inflated data if there is any. */
3670 if (r->zout_pos < r->zout_end)
3672 unsigned int n = MIN (byte_cnt, r->zout_end - r->zout_pos);
3673 memcpy (buf, &r->zout_buf[r->zout_pos], n);
3682 /* We need to inflate some more data.
3683 Get some more input data if we don't have any. */
3684 if (r->zstream.avail_in == 0)
3686 unsigned int n = MIN (ZIN_BUF_SIZE, r->ztrailer_ofs - r->pos);
3691 int retval = try_read_bytes (r, r->zin_buf, n);
3694 r->zstream.avail_in = n;
3695 r->zstream.next_in = r->zin_buf;
3699 /* Inflate the (remaining) input data. */
3700 r->zstream.avail_out = ZOUT_BUF_SIZE;
3701 r->zstream.next_out = r->zout_buf;
3702 error = inflate (&r->zstream, Z_SYNC_FLUSH);
3704 r->zout_end = r->zstream.next_out - r->zout_buf;
3705 if (r->zout_end == 0)
3707 if (error != Z_STREAM_END)
3709 sys_error (r, r->pos, _("ZLIB stream inconsistency (%s)."),
3713 else if (!close_zstream (r) || !open_zstream (r))
3718 /* Process the output data and ignore 'error' for now. ZLIB will
3719 present it to us again on the next inflate() call. */
3725 read_compressed_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
3727 if (r->compression == ANY_COMP_SIMPLE)
3728 return read_bytes (r, buf, byte_cnt);
3731 int retval = read_bytes_zlib (r, buf, byte_cnt);
3733 sys_error (r, r->pos, _("Unexpected end of ZLIB compressed data."));
3739 try_read_compressed_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
3741 if (r->compression == ANY_COMP_SIMPLE)
3742 return try_read_bytes (r, buf, byte_cnt);
3744 return read_bytes_zlib (r, buf, byte_cnt);
3747 /* Reads a 64-bit floating-point number from R and returns its
3748 value in host format. */
3750 read_compressed_float (struct sfm_reader *r, double *d)
3754 if (!read_compressed_bytes (r, number, sizeof number))
3757 *d = float_get_double (r->float_format, number);
3761 static const struct casereader_class sys_file_casereader_class =
3763 sys_file_casereader_read,
3764 sys_file_casereader_destroy,
3769 const struct any_reader_class sys_file_reader_class =
3771 N_("SPSS System File"),