1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-2000, 2006-2007, 2009-2016 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "data/sys-file-private.h"
28 #include "data/any-reader.h"
29 #include "data/attributes.h"
30 #include "data/case.h"
31 #include "data/casereader-provider.h"
32 #include "data/casereader.h"
33 #include "data/dictionary.h"
34 #include "data/file-handle-def.h"
35 #include "data/file-name.h"
36 #include "data/format.h"
37 #include "data/identifier.h"
38 #include "data/missing-values.h"
39 #include "data/mrset.h"
40 #include "data/short-names.h"
41 #include "data/value-labels.h"
42 #include "data/value.h"
43 #include "data/variable.h"
44 #include "libpspp/array.h"
45 #include "libpspp/assertion.h"
46 #include "libpspp/compiler.h"
47 #include "libpspp/i18n.h"
48 #include "libpspp/ll.h"
49 #include "libpspp/message.h"
50 #include "libpspp/misc.h"
51 #include "libpspp/pool.h"
52 #include "libpspp/str.h"
53 #include "libpspp/stringi-set.h"
55 #include "gl/c-strtod.h"
56 #include "gl/c-ctype.h"
57 #include "gl/inttostr.h"
58 #include "gl/localcharset.h"
59 #include "gl/minmax.h"
60 #include "gl/unlocked-io.h"
61 #include "gl/xalloc.h"
62 #include "gl/xalloc-oversized.h"
66 #define _(msgid) gettext (msgid)
67 #define N_(msgid) (msgid)
71 /* subtypes 0-2 unknown */
72 EXT_INTEGER = 3, /* Machine integer info. */
73 EXT_FLOAT = 4, /* Machine floating-point info. */
74 EXT_VAR_SETS = 5, /* Variable sets. */
75 EXT_DATE = 6, /* DATE. */
76 EXT_MRSETS = 7, /* Multiple response sets. */
77 EXT_DATA_ENTRY = 8, /* SPSS Data Entry. */
78 /* subtype 9 unknown */
79 EXT_PRODUCT_INFO = 10, /* Extra product info text. */
80 EXT_DISPLAY = 11, /* Variable display parameters. */
81 /* subtype 12 unknown */
82 EXT_LONG_NAMES = 13, /* Long variable names. */
83 EXT_LONG_STRINGS = 14, /* Long strings. */
84 /* subtype 15 unknown */
85 EXT_NCASES = 16, /* Extended number of cases. */
86 EXT_FILE_ATTRS = 17, /* Data file attributes. */
87 EXT_VAR_ATTRS = 18, /* Variable attributes. */
88 EXT_MRSETS2 = 19, /* Multiple response sets (extended). */
89 EXT_ENCODING = 20, /* Character encoding. */
90 EXT_LONG_LABELS = 21, /* Value labels for long strings. */
91 EXT_LONG_MISSING = 22, /* Missing values for long strings. */
92 EXT_DATAVIEW = 24 /* "Format properties in dataview table". */
95 /* Fields from the top-level header record. */
96 struct sfm_header_record
98 char magic[5]; /* First 4 bytes of file, then null. */
99 int weight_idx; /* 0 if unweighted, otherwise a var index. */
100 int nominal_case_size; /* Number of var positions. */
102 /* These correspond to the members of struct any_file_info or a dictionary
103 but in the system file's encoding rather than ASCII. */
104 char creation_date[10]; /* "dd mmm yy". */
105 char creation_time[9]; /* "hh:mm:ss". */
106 char eye_catcher[61]; /* Eye-catcher string, then product name. */
107 char file_label[65]; /* File label. */
110 struct sfm_var_record
117 int missing_value_code;
120 struct variable *var;
123 struct sfm_value_label
129 struct sfm_value_label_record
132 struct sfm_value_label *labels;
133 unsigned int n_labels;
139 struct sfm_document_record
148 const char *name; /* Name. */
149 const char *label; /* Human-readable label for group. */
150 enum mrset_type type; /* Group type. */
151 const char **vars; /* Constituent variables' names. */
152 size_t n_vars; /* Number of constituent variables. */
155 enum mrset_md_cat_source cat_source; /* Source of category labels. */
156 bool label_from_var_label; /* 'label' taken from variable label? */
157 const char *counted; /* Counted value, as string. */
160 struct sfm_extension_record
162 struct ll ll; /* In struct sfm_reader 'var_attrs' list. */
163 int subtype; /* Record subtype. */
164 off_t pos; /* Starting offset in file. */
165 unsigned int size; /* Size of data elements. */
166 unsigned int count; /* Number of data elements. */
167 void *data; /* Contents. */
170 /* System file reader. */
173 struct any_reader any_reader;
175 /* Resource tracking. */
176 struct pool *pool; /* All system file state. */
179 struct any_read_info info;
180 struct sfm_header_record header;
181 struct sfm_var_record *vars;
183 struct sfm_value_label_record *labels;
185 struct sfm_document_record *document;
186 struct sfm_mrset *mrsets;
188 struct sfm_extension_record *extensions[32];
189 struct ll_list var_attrs; /* Contains "struct sfm_extension_record"s. */
192 struct file_handle *fh; /* File handle. */
193 struct fh_lock *lock; /* Mutual exclusion for file handle. */
194 FILE *file; /* File stream. */
195 off_t pos; /* Position in file. */
196 bool error; /* I/O or corruption error? */
197 struct caseproto *proto; /* Format of output cases. */
200 enum integer_format integer_format; /* On-disk integer format. */
201 enum float_format float_format; /* On-disk floating point format. */
202 struct sfm_var *sfm_vars; /* Variables. */
203 size_t sfm_var_cnt; /* Number of variables. */
204 int case_cnt; /* Number of cases */
205 const char *encoding; /* String encoding. */
208 enum any_compression compression;
209 double bias; /* Compression bias, usually 100.0. */
210 uint8_t opcodes[8]; /* Current block of opcodes. */
211 size_t opcode_idx; /* Next opcode to interpret, 8 if none left. */
212 bool corruption_warning; /* Warned about possible corruption? */
214 /* ZLIB decompression. */
215 long long int ztrailer_ofs; /* Offset of ZLIB trailer at end of file. */
216 #define ZIN_BUF_SIZE 4096
217 uint8_t *zin_buf; /* Inflation input buffer. */
218 #define ZOUT_BUF_SIZE 16384
219 uint8_t *zout_buf; /* Inflation output buffer. */
220 unsigned int zout_end; /* Number of bytes of data in zout_buf. */
221 unsigned int zout_pos; /* First unconsumed byte in zout_buf. */
222 z_stream zstream; /* ZLIB inflater. */
225 static const struct casereader_class sys_file_casereader_class;
227 static struct sfm_reader *
228 sfm_reader_cast (const struct any_reader *r_)
230 assert (r_->klass == &sys_file_reader_class);
231 return UP_CAST (r_, struct sfm_reader, any_reader);
234 static bool sfm_close (struct any_reader *);
236 static struct variable *lookup_var_by_index (struct sfm_reader *, off_t,
237 const struct sfm_var_record *,
240 static void sys_msg (struct sfm_reader *r, off_t, int class,
241 const char *format, va_list args)
242 PRINTF_FORMAT (4, 0);
243 static void sys_warn (struct sfm_reader *, off_t, const char *, ...)
244 PRINTF_FORMAT (3, 4);
245 static void sys_error (struct sfm_reader *, off_t, const char *, ...)
246 PRINTF_FORMAT (3, 4);
248 static bool read_bytes (struct sfm_reader *, void *, size_t)
250 static int try_read_bytes (struct sfm_reader *, void *, size_t)
252 static bool read_int (struct sfm_reader *, int *) WARN_UNUSED_RESULT;
253 static bool read_uint (struct sfm_reader *, unsigned int *) WARN_UNUSED_RESULT;
254 static bool read_int64 (struct sfm_reader *, long long int *)
256 static bool read_uint64 (struct sfm_reader *, unsigned long long int *)
258 static bool read_string (struct sfm_reader *, char *, size_t)
260 static bool skip_bytes (struct sfm_reader *, size_t) WARN_UNUSED_RESULT;
262 /* ZLIB compressed data handling. */
263 static bool read_zheader (struct sfm_reader *) WARN_UNUSED_RESULT;
264 static bool open_zstream (struct sfm_reader *) WARN_UNUSED_RESULT;
265 static bool close_zstream (struct sfm_reader *) WARN_UNUSED_RESULT;
266 static int read_bytes_zlib (struct sfm_reader *, void *, size_t)
268 static int read_compressed_bytes (struct sfm_reader *, void *, size_t)
270 static int try_read_compressed_bytes (struct sfm_reader *, void *, size_t)
272 static bool read_compressed_float (struct sfm_reader *, double *)
275 static char *fix_line_ends (const char *);
277 static int parse_int (const struct sfm_reader *, const void *data, size_t ofs);
278 static double parse_float (const struct sfm_reader *,
279 const void *data, size_t ofs);
281 static bool read_variable_record (struct sfm_reader *,
282 struct sfm_var_record *);
283 static bool read_value_label_record (struct sfm_reader *,
284 struct sfm_value_label_record *);
285 static bool read_document_record (struct sfm_reader *);
286 static bool read_extension_record (struct sfm_reader *, int subtype,
287 struct sfm_extension_record **);
288 static bool skip_extension_record (struct sfm_reader *, int subtype);
290 static struct text_record *open_text_record (
291 struct sfm_reader *, const struct sfm_extension_record *,
292 bool recode_to_utf8);
293 static void close_text_record (struct sfm_reader *,
294 struct text_record *);
295 static bool read_variable_to_value_pair (struct sfm_reader *,
297 struct text_record *,
298 struct variable **var, char **value);
299 static void text_warn (struct sfm_reader *r, struct text_record *text,
300 const char *format, ...) PRINTF_FORMAT (3, 4);
301 static char *text_get_token (struct text_record *,
302 struct substring delimiters, char *delimiter);
303 static bool text_match (struct text_record *, char c);
304 static bool text_read_variable_name (struct sfm_reader *, struct dictionary *,
305 struct text_record *,
306 struct substring delimiters,
308 static bool text_read_short_name (struct sfm_reader *, struct dictionary *,
309 struct text_record *,
310 struct substring delimiters,
312 static const char *text_parse_counted_string (struct sfm_reader *,
313 struct text_record *);
314 static size_t text_pos (const struct text_record *);
315 static const char *text_get_all (const struct text_record *);
317 /* Dictionary reader. */
325 static bool read_dictionary (struct sfm_reader *);
326 static bool read_record (struct sfm_reader *, int type,
327 size_t *allocated_vars, size_t *allocated_labels);
328 static bool read_header (struct sfm_reader *, struct any_read_info *,
329 struct sfm_header_record *);
330 static void parse_header (struct sfm_reader *,
331 const struct sfm_header_record *,
332 struct any_read_info *, struct dictionary *);
333 static bool parse_variable_records (struct sfm_reader *, struct dictionary *,
334 struct sfm_var_record *, size_t n);
335 static void parse_format_spec (struct sfm_reader *, off_t pos,
336 unsigned int format, enum which_format,
337 struct variable *, int *format_warning_cnt);
338 static void parse_document (struct dictionary *, struct sfm_document_record *);
339 static void parse_display_parameters (struct sfm_reader *,
340 const struct sfm_extension_record *,
341 struct dictionary *);
342 static bool parse_machine_integer_info (struct sfm_reader *,
343 const struct sfm_extension_record *,
344 struct any_read_info *);
345 static void parse_machine_float_info (struct sfm_reader *,
346 const struct sfm_extension_record *);
347 static void parse_extra_product_info (struct sfm_reader *,
348 const struct sfm_extension_record *,
349 struct any_read_info *);
350 static void parse_mrsets (struct sfm_reader *,
351 const struct sfm_extension_record *,
352 size_t *allocated_mrsets);
353 static void decode_mrsets (struct sfm_reader *, struct dictionary *);
354 static void parse_long_var_name_map (struct sfm_reader *,
355 const struct sfm_extension_record *,
356 struct dictionary *);
357 static bool parse_long_string_map (struct sfm_reader *,
358 const struct sfm_extension_record *,
359 struct dictionary *);
360 static bool parse_value_labels (struct sfm_reader *, struct dictionary *,
361 const struct sfm_var_record *,
363 const struct sfm_value_label_record *);
364 static void parse_data_file_attributes (struct sfm_reader *,
365 const struct sfm_extension_record *,
366 struct dictionary *);
367 static void parse_variable_attributes (struct sfm_reader *,
368 const struct sfm_extension_record *,
369 struct dictionary *);
370 static void assign_variable_roles (struct sfm_reader *, struct dictionary *);
371 static void parse_long_string_value_labels (struct sfm_reader *,
372 const struct sfm_extension_record *,
373 struct dictionary *);
374 static void parse_long_string_missing_values (
375 struct sfm_reader *, const struct sfm_extension_record *,
376 struct dictionary *);
378 /* Frees the strings inside INFO. */
380 any_read_info_destroy (struct any_read_info *info)
384 free (info->creation_date);
385 free (info->creation_time);
386 free (info->product);
387 free (info->product_ext);
391 /* Tries to open FH for reading as a system file. Returns an sfm_reader if
392 successful, otherwise NULL. */
393 static struct any_reader *
394 sfm_open (struct file_handle *fh)
396 size_t allocated_mrsets = 0;
397 struct sfm_reader *r;
399 /* Create and initialize reader. */
400 r = xzalloc (sizeof *r);
401 r->any_reader.klass = &sys_file_reader_class;
402 r->pool = pool_create ();
403 pool_register (r->pool, free, r);
405 r->opcode_idx = sizeof r->opcodes;
406 ll_init (&r->var_attrs);
408 /* TRANSLATORS: this fragment will be interpolated into
409 messages in fh_lock() that identify types of files. */
410 r->lock = fh_lock (fh, FH_REF_FILE, N_("system file"), FH_ACC_READ, false);
414 r->file = fn_open (fh, "rb");
417 msg (ME, _("Error opening `%s' for reading as a system file: %s."),
418 fh_get_file_name (r->fh), strerror (errno));
422 if (!read_dictionary (r))
425 if (r->extensions[EXT_MRSETS] != NULL)
426 parse_mrsets (r, r->extensions[EXT_MRSETS], &allocated_mrsets);
428 if (r->extensions[EXT_MRSETS2] != NULL)
429 parse_mrsets (r, r->extensions[EXT_MRSETS2], &allocated_mrsets);
431 return &r->any_reader;
435 sfm_close (&r->any_reader);
440 read_dictionary (struct sfm_reader *r)
442 size_t allocated_vars;
443 size_t allocated_labels;
445 if (!read_header (r, &r->info, &r->header))
449 allocated_labels = 0;
454 if (!read_int (r, &type))
458 if (!read_record (r, type, &allocated_vars, &allocated_labels))
462 if (!skip_bytes (r, 4))
465 if (r->compression == ANY_COMP_ZLIB && !read_zheader (r))
472 read_record (struct sfm_reader *r, int type,
473 size_t *allocated_vars, size_t *allocated_labels)
480 if (r->n_vars >= *allocated_vars)
481 r->vars = pool_2nrealloc (r->pool, r->vars, allocated_vars,
483 return read_variable_record (r, &r->vars[r->n_vars++]);
486 if (r->n_labels >= *allocated_labels)
487 r->labels = pool_2nrealloc (r->pool, r->labels, allocated_labels,
489 return read_value_label_record (r, &r->labels[r->n_labels++]);
492 /* A Type 4 record is always immediately after a type 3 record,
493 so the code for type 3 records reads the type 4 record too. */
494 sys_error (r, r->pos, _("Misplaced type 4 record."));
498 if (r->document != NULL)
500 sys_error (r, r->pos, _("Duplicate type 6 (document) record."));
503 return read_document_record (r);
506 if (!read_int (r, &subtype))
509 || subtype >= sizeof r->extensions / sizeof *r->extensions)
512 _("Unrecognized record type 7, subtype %d. For help, "
513 "please send this file to %s and mention that you were "
515 subtype, PACKAGE_BUGREPORT, PACKAGE_STRING);
516 return skip_extension_record (r, subtype);
518 else if (subtype == 18)
520 /* System files written by "Stata 14.1/-savespss- 1.77 by S.Radyakin"
521 put each variable attribute into a separate record with subtype
522 18. I'm surprised that SPSS puts up with this. */
523 struct sfm_extension_record *ext;
524 bool ok = read_extension_record (r, subtype, &ext);
526 ll_push_tail (&r->var_attrs, &ext->ll);
529 else if (r->extensions[subtype] != NULL)
532 _("Record type 7, subtype %d found here has the same "
533 "type as the record found near offset 0x%llx. For "
534 "help, please send this file to %s and mention that "
535 "you were using %s."),
536 subtype, (long long int) r->extensions[subtype]->pos,
537 PACKAGE_BUGREPORT, PACKAGE_STRING);
538 return skip_extension_record (r, subtype);
541 return read_extension_record (r, subtype, &r->extensions[subtype]);
544 sys_error (r, r->pos, _("Unrecognized record type %d."), type);
551 /* Returns the character encoding obtained from R, or a null pointer if R
552 doesn't have an indication of its character encoding. */
554 sfm_get_encoding (const struct sfm_reader *r)
556 /* The EXT_ENCODING record is the best way to determine dictionary
558 if (r->extensions[EXT_ENCODING])
559 return r->extensions[EXT_ENCODING]->data;
561 /* But EXT_INTEGER is better than nothing as a fallback. */
562 if (r->extensions[EXT_INTEGER])
564 int codepage = parse_int (r, r->extensions[EXT_INTEGER]->data, 7 * 4);
565 const char *encoding;
574 /* These ostensibly mean "7-bit ASCII" and "8-bit ASCII"[sic]
575 respectively. However, many files have character code 2 but data
576 which are clearly not ASCII. Therefore, ignore these values. */
583 encoding = sys_get_encoding_from_codepage (codepage);
584 if (encoding != NULL)
590 /* If the file magic number is EBCDIC then its character data is too. */
591 if (!strcmp (r->header.magic, EBCDIC_MAGIC))
597 struct get_strings_aux
608 add_string__ (struct get_strings_aux *aux,
609 const char *string, bool id, char *title)
611 if (aux->n >= aux->allocated)
613 aux->allocated = 2 * (aux->allocated + 1);
614 aux->titles = pool_realloc (aux->pool, aux->titles,
615 aux->allocated * sizeof *aux->titles);
616 aux->strings = pool_realloc (aux->pool, aux->strings,
617 aux->allocated * sizeof *aux->strings);
618 aux->ids = pool_realloc (aux->pool, aux->ids,
619 aux->allocated * sizeof *aux->ids);
622 aux->titles[aux->n] = title;
623 aux->strings[aux->n] = pool_strdup (aux->pool, string);
624 aux->ids[aux->n] = id;
628 static void PRINTF_FORMAT (3, 4)
629 add_string (struct get_strings_aux *aux,
630 const char *string, const char *title, ...)
634 va_start (args, title);
635 add_string__ (aux, string, false, pool_vasprintf (aux->pool, title, args));
639 static void PRINTF_FORMAT (3, 4)
640 add_id (struct get_strings_aux *aux, const char *id, const char *title, ...)
644 va_start (args, title);
645 add_string__ (aux, id, true, pool_vasprintf (aux->pool, title, args));
649 /* Retrieves significant string data from R in its raw format, to allow the
650 caller to try to detect the encoding in use.
652 Returns the number of strings retrieved N. Sets each of *TITLESP, *IDSP,
653 and *STRINGSP to an array of N elements allocated from POOL. For each I in
654 0...N-1, UTF-8 string *TITLESP[I] describes *STRINGSP[I], which is in
655 whatever encoding system file R uses. *IDS[I] is true if *STRINGSP[I] must
656 be a valid PSPP language identifier, false if *STRINGSP[I] is free-form
659 sfm_get_strings (const struct any_reader *r_, struct pool *pool,
660 char ***titlesp, bool **idsp, char ***stringsp)
662 struct sfm_reader *r = sfm_reader_cast (r_);
663 const struct sfm_mrset *mrset;
664 struct get_strings_aux aux;
676 for (i = 0; i < r->n_vars; i++)
677 if (r->vars[i].width != -1)
678 add_id (&aux, r->vars[i].name, _("Variable %zu"), ++var_idx);
681 for (i = 0; i < r->n_vars; i++)
682 if (r->vars[i].width != -1)
685 if (r->vars[i].label)
686 add_string (&aux, r->vars[i].label, _("Variable %zu Label"),
691 for (i = 0; i < r->n_labels; i++)
692 for (j = 0; j < r->labels[i].n_labels; j++)
693 add_string (&aux, r->labels[i].labels[j].label,
694 _("Value Label %zu"), k++);
696 add_string (&aux, r->header.creation_date, _("Creation Date"));
697 add_string (&aux, r->header.creation_time, _("Creation Time"));
698 add_string (&aux, r->header.eye_catcher, _("Product"));
699 add_string (&aux, r->header.file_label, _("File Label"));
701 if (r->extensions[EXT_PRODUCT_INFO])
702 add_string (&aux, r->extensions[EXT_PRODUCT_INFO]->data,
703 _("Extra Product Info"));
709 for (i = 0; i < r->document->n_lines; i++)
713 memcpy (line, r->document->documents + i * 80, 80);
716 add_string (&aux, line, _("Document Line %zu"), i + 1);
720 for (mrset = r->mrsets; mrset < &r->mrsets[r->n_mrsets]; mrset++)
722 size_t mrset_idx = mrset - r->mrsets + 1;
724 add_id (&aux, mrset->name, _("MRSET %zu"), mrset_idx);
726 add_string (&aux, mrset->label, _("MRSET %zu Label"), mrset_idx);
728 /* Skip the variables because they ought to be duplicates. */
731 add_string (&aux, mrset->counted, _("MRSET %zu Counted Value"),
735 /* data file attributes */
736 /* variable attributes */
738 /* long string value labels */
739 /* long string missing values */
741 *titlesp = aux.titles;
743 *stringsp = aux.strings;
747 /* Decodes the dictionary read from R, saving it into into *DICT. Character
748 strings in R are decoded using ENCODING, or an encoding obtained from R if
749 ENCODING is null, or the locale encoding if R specifies no encoding.
751 If INFOP is non-null, then it receives additional info about the system
752 file, which the caller must eventually free with any_read_info_destroy()
753 when it is no longer needed.
755 This function consumes R. The caller must use it again later, even to
756 destroy it with sfm_close(). */
757 static struct casereader *
758 sfm_decode (struct any_reader *r_, const char *encoding,
759 struct dictionary **dictp, struct any_read_info *infop)
761 struct sfm_reader *r = sfm_reader_cast (r_);
762 struct dictionary *dict;
765 if (encoding == NULL)
767 encoding = sfm_get_encoding (r);
768 if (encoding == NULL)
770 sys_warn (r, -1, _("This system file does not indicate its own "
771 "character encoding. Using default encoding "
772 "%s. For best results, specify an encoding "
773 "explicitly. Use SYSFILE INFO with "
774 "ENCODING=\"DETECT\" to analyze the possible "
777 encoding = locale_charset ();
781 dict = dict_create (encoding);
782 r->encoding = dict_get_encoding (dict);
784 /* These records don't use variables at all. */
785 if (r->document != NULL)
786 parse_document (dict, r->document);
788 if (r->extensions[EXT_INTEGER] != NULL
789 && !parse_machine_integer_info (r, r->extensions[EXT_INTEGER], &r->info))
792 if (r->extensions[EXT_FLOAT] != NULL)
793 parse_machine_float_info (r, r->extensions[EXT_FLOAT]);
795 if (r->extensions[EXT_PRODUCT_INFO] != NULL)
796 parse_extra_product_info (r, r->extensions[EXT_PRODUCT_INFO], &r->info);
798 if (r->extensions[EXT_FILE_ATTRS] != NULL)
799 parse_data_file_attributes (r, r->extensions[EXT_FILE_ATTRS], dict);
801 parse_header (r, &r->header, &r->info, dict);
803 /* Parse the variable records, the basis of almost everything else. */
804 if (!parse_variable_records (r, dict, r->vars, r->n_vars))
807 /* Parse value labels and the weight variable immediately after the variable
808 records. These records use indexes into var_recs[], so we must parse them
809 before those indexes become invalidated by very long string variables. */
810 for (i = 0; i < r->n_labels; i++)
811 if (!parse_value_labels (r, dict, r->vars, r->n_vars, &r->labels[i]))
813 if (r->header.weight_idx != 0)
815 struct variable *weight_var;
817 weight_var = lookup_var_by_index (r, 76, r->vars, r->n_vars,
818 r->header.weight_idx);
819 if (weight_var != NULL)
821 if (var_is_numeric (weight_var))
822 dict_set_weight (dict, weight_var);
824 sys_warn (r, -1, _("Ignoring string variable `%s' set "
825 "as weighting variable."),
826 var_get_name (weight_var));
830 if (r->extensions[EXT_DISPLAY] != NULL)
831 parse_display_parameters (r, r->extensions[EXT_DISPLAY], dict);
833 /* The following records use short names, so they need to be parsed before
834 parse_long_var_name_map() changes short names to long names. */
835 decode_mrsets (r, dict);
837 if (r->extensions[EXT_LONG_STRINGS] != NULL
838 && !parse_long_string_map (r, r->extensions[EXT_LONG_STRINGS], dict))
841 /* Now rename variables to their long names. */
842 parse_long_var_name_map (r, r->extensions[EXT_LONG_NAMES], dict);
844 /* The following records use long names, so they need to follow renaming. */
845 if (!ll_is_empty (&r->var_attrs))
847 struct sfm_extension_record *ext;
848 ll_for_each (ext, struct sfm_extension_record, ll, &r->var_attrs)
849 parse_variable_attributes (r, ext, dict);
851 /* Roles use the $@Role attribute. */
852 assign_variable_roles (r, dict);
854 if (r->extensions[EXT_LONG_LABELS] != NULL)
855 parse_long_string_value_labels (r, r->extensions[EXT_LONG_LABELS], dict);
856 if (r->extensions[EXT_LONG_MISSING] != NULL)
857 parse_long_string_missing_values (r, r->extensions[EXT_LONG_MISSING],
860 /* Warn if the actual amount of data per case differs from the
861 amount that the header claims. SPSS version 13 gets this
862 wrong when very long strings are involved, so don't warn in
864 if (r->header.nominal_case_size > 0
865 && r->header.nominal_case_size != r->n_vars
866 && r->info.version_major != 13)
867 sys_warn (r, -1, _("File header claims %d variable positions but "
868 "%zu were read from file."),
869 r->header.nominal_case_size, r->n_vars);
871 /* Create an index of dictionary variable widths for
872 sfm_read_case to use. We cannot use the `struct variable's
873 from the dictionary we created, because the caller owns the
874 dictionary and may destroy or modify its variables. */
875 sfm_dictionary_to_sfm_vars (dict, &r->sfm_vars, &r->sfm_var_cnt);
876 pool_register (r->pool, free, r->sfm_vars);
877 r->proto = caseproto_ref_pool (dict_get_proto (dict), r->pool);
883 memset (&r->info, 0, sizeof r->info);
886 return casereader_create_sequential
888 r->case_cnt == -1 ? CASENUMBER_MAX: r->case_cnt,
889 &sys_file_casereader_class, r);
898 /* Closes R, which should have been returned by sfm_open() but not already
899 closed with sfm_decode() or this function.
900 Returns true if an I/O error has occurred on READER, false
903 sfm_close (struct any_reader *r_)
905 struct sfm_reader *r = sfm_reader_cast (r_);
910 if (fn_close (r->fh, r->file) == EOF)
912 msg (ME, _("Error closing system file `%s': %s."),
913 fh_get_file_name (r->fh), strerror (errno));
919 any_read_info_destroy (&r->info);
924 pool_destroy (r->pool);
929 /* Destroys READER. */
931 sys_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
933 struct sfm_reader *r = r_;
934 sfm_close (&r->any_reader);
937 /* Detects whether FILE is an SPSS system file. Returns 1 if so, 0 if not, and
938 a negative errno value if there is an error reading FILE. */
940 sfm_detect (FILE *file)
944 if (fseek (file, 0, SEEK_SET) != 0)
946 if (fread (magic, 4, 1, file) != 1)
947 return ferror (file) ? -errno : 0;
950 return (!strcmp (ASCII_MAGIC, magic)
951 || !strcmp (ASCII_ZMAGIC, magic)
952 || !strcmp (EBCDIC_MAGIC, magic));
955 /* Reads the global header of the system file. Initializes *HEADER and *INFO,
956 except for the string fields in *INFO, which parse_header() will initialize
957 later once the file's encoding is known. */
959 read_header (struct sfm_reader *r, struct any_read_info *info,
960 struct sfm_header_record *header)
962 uint8_t raw_layout_code[4];
967 if (!read_string (r, header->magic, sizeof header->magic)
968 || !read_string (r, header->eye_catcher, sizeof header->eye_catcher))
971 if (!strcmp (ASCII_MAGIC, header->magic)
972 || !strcmp (EBCDIC_MAGIC, header->magic))
974 else if (!strcmp (ASCII_ZMAGIC, header->magic))
978 sys_error (r, 0, _("This is not an SPSS system file."));
982 /* Identify integer format. */
983 if (!read_bytes (r, raw_layout_code, sizeof raw_layout_code))
985 if ((!integer_identify (2, raw_layout_code, sizeof raw_layout_code,
987 && !integer_identify (3, raw_layout_code, sizeof raw_layout_code,
989 || (r->integer_format != INTEGER_MSB_FIRST
990 && r->integer_format != INTEGER_LSB_FIRST))
992 sys_error (r, 64, _("This is not an SPSS system file."));
996 if (!read_int (r, &header->nominal_case_size))
999 if (header->nominal_case_size < 0
1000 || header->nominal_case_size > INT_MAX / 16)
1001 header->nominal_case_size = -1;
1003 if (!read_int (r, &compressed))
1007 if (compressed == 0)
1008 r->compression = ANY_COMP_NONE;
1009 else if (compressed == 1)
1010 r->compression = ANY_COMP_SIMPLE;
1011 else if (compressed != 0)
1013 sys_error (r, 0, "System file header has invalid compression "
1014 "value %d.", compressed);
1020 if (compressed == 2)
1021 r->compression = ANY_COMP_ZLIB;
1024 sys_error (r, 0, "ZLIB-compressed system file header has invalid "
1025 "compression value %d.", compressed);
1030 if (!read_int (r, &header->weight_idx))
1033 if (!read_int (r, &r->case_cnt))
1035 if ( r->case_cnt > INT_MAX / 2)
1038 /* Identify floating-point format and obtain compression bias. */
1039 if (!read_bytes (r, raw_bias, sizeof raw_bias))
1041 if (float_identify (100.0, raw_bias, sizeof raw_bias, &r->float_format) == 0)
1043 uint8_t zero_bias[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
1045 if (memcmp (raw_bias, zero_bias, 8))
1046 sys_warn (r, r->pos - 8,
1047 _("Compression bias is not the usual "
1048 "value of 100, or system file uses unrecognized "
1049 "floating-point format."));
1052 /* Some software is known to write all-zeros to this
1053 field. Such software also writes floating-point
1054 numbers in the format that we expect by default
1055 (it seems that all software most likely does, in
1056 reality), so don't warn in this case. */
1059 if (r->integer_format == INTEGER_MSB_FIRST)
1060 r->float_format = FLOAT_IEEE_DOUBLE_BE;
1062 r->float_format = FLOAT_IEEE_DOUBLE_LE;
1064 float_convert (r->float_format, raw_bias, FLOAT_NATIVE_DOUBLE, &r->bias);
1066 if (!read_string (r, header->creation_date, sizeof header->creation_date)
1067 || !read_string (r, header->creation_time, sizeof header->creation_time)
1068 || !read_string (r, header->file_label, sizeof header->file_label)
1069 || !skip_bytes (r, 3))
1072 info->integer_format = r->integer_format;
1073 info->float_format = r->float_format;
1074 info->compression = r->compression;
1075 info->case_cnt = r->case_cnt;
1080 /* Reads a variable (type 2) record from R into RECORD. */
1082 read_variable_record (struct sfm_reader *r, struct sfm_var_record *record)
1084 int has_variable_label;
1086 memset (record, 0, sizeof *record);
1088 record->pos = r->pos;
1089 if (!read_int (r, &record->width)
1090 || !read_int (r, &has_variable_label)
1091 || !read_int (r, &record->missing_value_code)
1092 || !read_int (r, &record->print_format)
1093 || !read_int (r, &record->write_format)
1094 || !read_string (r, record->name, sizeof record->name))
1097 if (has_variable_label == 1)
1099 enum { MAX_LABEL_LEN = 65536 };
1100 unsigned int len, read_len;
1102 if (!read_uint (r, &len))
1105 /* Read up to MAX_LABEL_LEN bytes of label. */
1106 read_len = MIN (MAX_LABEL_LEN, len);
1107 record->label = pool_malloc (r->pool, read_len + 1);
1108 if (!read_string (r, record->label, read_len + 1))
1111 /* Skip unread label bytes. */
1112 if (!skip_bytes (r, len - read_len))
1115 /* Skip label padding up to multiple of 4 bytes. */
1116 if (!skip_bytes (r, ROUND_UP (len, 4) - len))
1119 else if (has_variable_label != 0)
1121 sys_error (r, record->pos,
1122 _("Variable label indicator field is not 0 or 1."));
1126 /* Set missing values. */
1127 if (record->missing_value_code != 0)
1129 int code = record->missing_value_code;
1130 if (record->width == 0)
1132 if (code < -3 || code > 3 || code == -1)
1134 sys_error (r, record->pos,
1135 _("Numeric missing value indicator field is not "
1136 "-3, -2, 0, 1, 2, or 3."));
1142 if (code < 1 || code > 3)
1144 sys_error (r, record->pos,
1145 _("String missing value indicator field is not "
1151 if (!read_bytes (r, record->missing, 8 * abs (code)))
1158 /* Reads value labels from R into RECORD. */
1160 read_value_label_record (struct sfm_reader *r,
1161 struct sfm_value_label_record *record)
1166 /* Read type 3 record. */
1167 record->pos = r->pos;
1168 if (!read_uint (r, &record->n_labels))
1170 if (record->n_labels > UINT_MAX / sizeof *record->labels)
1172 sys_error (r, r->pos - 4, _("Invalid number of labels %u."),
1176 record->labels = pool_nmalloc (r->pool, record->n_labels,
1177 sizeof *record->labels);
1178 for (i = 0; i < record->n_labels; i++)
1180 struct sfm_value_label *label = &record->labels[i];
1181 unsigned char label_len;
1184 if (!read_bytes (r, label->value, sizeof label->value))
1187 /* Read label length. */
1188 if (!read_bytes (r, &label_len, sizeof label_len))
1190 padded_len = ROUND_UP (label_len + 1, 8);
1192 /* Read label, padding. */
1193 label->label = pool_malloc (r->pool, padded_len + 1);
1194 if (!read_bytes (r, label->label, padded_len - 1))
1196 label->label[label_len] = '\0';
1199 /* Read record type of type 4 record. */
1200 if (!read_int (r, &type))
1204 sys_error (r, r->pos - 4,
1205 _("Variable index record (type 4) does not immediately "
1206 "follow value label record (type 3) as it should."));
1210 /* Read number of variables associated with value label from type 4
1212 if (!read_uint (r, &record->n_vars))
1214 if (record->n_vars < 1 || record->n_vars > r->n_vars)
1216 sys_error (r, r->pos - 4,
1217 _("Number of variables associated with a value label (%u) "
1218 "is not between 1 and the number of variables (%zu)."),
1219 record->n_vars, r->n_vars);
1223 record->vars = pool_nmalloc (r->pool, record->n_vars, sizeof *record->vars);
1224 for (i = 0; i < record->n_vars; i++)
1225 if (!read_int (r, &record->vars[i]))
1231 /* Reads a document record from R. Returns true if successful, false on
1234 read_document_record (struct sfm_reader *r)
1237 if (!read_int (r, &n_lines))
1239 else if (n_lines == 0)
1241 else if (n_lines < 0 || n_lines >= INT_MAX / DOC_LINE_LENGTH)
1243 sys_error (r, r->pos,
1244 _("Number of document lines (%d) "
1245 "must be greater than 0 and less than %d."),
1246 n_lines, INT_MAX / DOC_LINE_LENGTH);
1250 struct sfm_document_record *record;
1251 record = pool_malloc (r->pool, sizeof *record);
1252 record->pos = r->pos;
1253 record->n_lines = n_lines;
1254 record->documents = pool_malloc (r->pool, DOC_LINE_LENGTH * n_lines);
1255 if (!read_bytes (r, record->documents, DOC_LINE_LENGTH * n_lines))
1258 r->document = record;
1263 read_extension_record_header (struct sfm_reader *r, int subtype,
1264 struct sfm_extension_record *record)
1266 record->subtype = subtype;
1267 record->pos = r->pos;
1268 if (!read_uint (r, &record->size) || !read_uint (r, &record->count))
1271 /* Check that SIZE * COUNT + 1 doesn't overflow. Adding 1
1272 allows an extra byte for a null terminator, used by some
1273 extension processing routines. */
1274 if (record->size != 0
1275 && xsum (1, xtimes (record->count, record->size)) >= UINT_MAX)
1277 sys_error (r, record->pos, "Record type 7 subtype %d too large.",
1285 /* Reads an extension record from R into RECORD. */
1287 read_extension_record (struct sfm_reader *r, int subtype,
1288 struct sfm_extension_record **recordp)
1290 struct extension_record_type
1297 static const struct extension_record_type types[] =
1299 /* Implemented record types. */
1300 { EXT_INTEGER, 4, 8 },
1301 { EXT_FLOAT, 8, 3 },
1302 { EXT_MRSETS, 1, 0 },
1303 { EXT_PRODUCT_INFO, 1, 0 },
1304 { EXT_DISPLAY, 4, 0 },
1305 { EXT_LONG_NAMES, 1, 0 },
1306 { EXT_LONG_STRINGS, 1, 0 },
1307 { EXT_NCASES, 8, 2 },
1308 { EXT_FILE_ATTRS, 1, 0 },
1309 { EXT_VAR_ATTRS, 1, 0 },
1310 { EXT_MRSETS2, 1, 0 },
1311 { EXT_ENCODING, 1, 0 },
1312 { EXT_LONG_LABELS, 1, 0 },
1313 { EXT_LONG_MISSING, 1, 0 },
1315 /* Ignored record types. */
1316 { EXT_VAR_SETS, 0, 0 },
1318 { EXT_DATA_ENTRY, 0, 0 },
1319 { EXT_DATAVIEW, 0, 0 },
1322 const struct extension_record_type *type;
1323 struct sfm_extension_record *record;
1327 record = pool_malloc (r->pool, sizeof *record);
1328 if (!read_extension_record_header (r, subtype, record))
1330 n_bytes = record->count * record->size;
1332 for (type = types; type < &types[sizeof types / sizeof *types]; type++)
1333 if (subtype == type->subtype)
1335 if (type->size > 0 && record->size != type->size)
1336 sys_warn (r, record->pos,
1337 _("Record type 7, subtype %d has bad size %u "
1338 "(expected %d)."), subtype, record->size, type->size);
1339 else if (type->count > 0 && record->count != type->count)
1340 sys_warn (r, record->pos,
1341 _("Record type 7, subtype %d has bad count %u "
1342 "(expected %d)."), subtype, record->count, type->count);
1343 else if (type->count == 0 && type->size == 0)
1345 /* Ignore this record. */
1349 char *data = pool_malloc (r->pool, n_bytes + 1);
1350 data[n_bytes] = '\0';
1352 record->data = data;
1353 if (!read_bytes (r, record->data, n_bytes))
1362 sys_warn (r, record->pos,
1363 _("Unrecognized record type 7, subtype %d. For help, please "
1364 "send this file to %s and mention that you were using %s."),
1365 subtype, PACKAGE_BUGREPORT, PACKAGE_STRING);
1368 return skip_bytes (r, n_bytes);
1372 skip_extension_record (struct sfm_reader *r, int subtype)
1374 struct sfm_extension_record record;
1376 return (read_extension_record_header (r, subtype, &record)
1377 && skip_bytes (r, record.count * record.size));
1381 parse_header (struct sfm_reader *r, const struct sfm_header_record *header,
1382 struct any_read_info *info, struct dictionary *dict)
1384 const char *dict_encoding = dict_get_encoding (dict);
1385 struct substring product;
1386 struct substring label;
1389 /* Convert file label to UTF-8 and put it into DICT. */
1390 label = recode_substring_pool ("UTF-8", dict_encoding,
1391 ss_cstr (header->file_label), r->pool);
1392 ss_trim (&label, ss_cstr (" "));
1393 label.string[label.length] = '\0';
1394 fixed_label = fix_line_ends (label.string);
1395 dict_set_label (dict, fixed_label);
1398 /* Put creation date and time in UTF-8 into INFO. */
1399 info->creation_date = recode_string ("UTF-8", dict_encoding,
1400 header->creation_date, -1);
1401 info->creation_time = recode_string ("UTF-8", dict_encoding,
1402 header->creation_time, -1);
1404 /* Put product name into INFO, dropping eye-catcher string if present. */
1405 product = recode_substring_pool ("UTF-8", dict_encoding,
1406 ss_cstr (header->eye_catcher), r->pool);
1407 ss_match_string (&product, ss_cstr ("@(#) SPSS DATA FILE"));
1408 ss_trim (&product, ss_cstr (" "));
1409 info->product = ss_xstrdup (product);
1412 /* Reads a variable (type 2) record from R and adds the
1413 corresponding variable to DICT.
1414 Also skips past additional variable records for long string
1417 parse_variable_records (struct sfm_reader *r, struct dictionary *dict,
1418 struct sfm_var_record *var_recs, size_t n_var_recs)
1420 const char *dict_encoding = dict_get_encoding (dict);
1421 struct sfm_var_record *rec;
1424 for (rec = var_recs; rec < &var_recs[n_var_recs]; )
1426 struct variable *var;
1431 name = recode_string_pool ("UTF-8", dict_encoding,
1432 rec->name, -1, r->pool);
1433 name[strcspn (name, " ")] = '\0';
1435 if (!dict_id_is_valid (dict, name, false)
1436 || name[0] == '$' || name[0] == '#')
1438 sys_error (r, rec->pos, _("Invalid variable name `%s'."), name);
1442 if (rec->width < 0 || rec->width > 255)
1444 sys_error (r, rec->pos,
1445 _("Bad width %d for variable %s."), rec->width, name);
1449 var = rec->var = dict_create_var (dict, name, rec->width);
1452 char *new_name = dict_make_unique_var_name (dict, NULL, NULL);
1453 sys_warn (r, rec->pos, _("Renaming variable with duplicate name "
1456 var = rec->var = dict_create_var_assert (dict, new_name, rec->width);
1460 /* Set the short name the same as the long name. */
1461 var_set_short_name (var, 0, name);
1463 /* Get variable label, if any. */
1468 utf8_label = recode_string_pool ("UTF-8", dict_encoding,
1469 rec->label, -1, r->pool);
1470 var_set_label (var, utf8_label);
1473 /* Set missing values. */
1474 if (rec->missing_value_code != 0)
1476 int width = var_get_width (var);
1477 struct missing_values mv;
1479 mv_init_pool (r->pool, &mv, width);
1480 if (var_is_numeric (var))
1482 bool has_range = rec->missing_value_code < 0;
1483 int n_discrete = (has_range
1484 ? rec->missing_value_code == -3
1485 : rec->missing_value_code);
1490 double low = parse_float (r, rec->missing, 0);
1491 double high = parse_float (r, rec->missing, 8);
1493 /* Deal with SPSS 21 change in representation. */
1497 mv_add_range (&mv, low, high);
1501 for (i = 0; i < n_discrete; i++)
1503 mv_add_num (&mv, parse_float (r, rec->missing, ofs));
1508 for (i = 0; i < rec->missing_value_code; i++)
1509 mv_add_str (&mv, rec->missing + 8 * i, MIN (width, 8));
1510 var_set_missing_values (var, &mv);
1514 parse_format_spec (r, rec->pos + 12, rec->print_format,
1515 PRINT_FORMAT, var, &n_warnings);
1516 parse_format_spec (r, rec->pos + 16, rec->write_format,
1517 WRITE_FORMAT, var, &n_warnings);
1519 /* Account for values.
1520 Skip long string continuation records, if any. */
1521 n_values = rec->width == 0 ? 1 : DIV_RND_UP (rec->width, 8);
1522 for (i = 1; i < n_values; i++)
1523 if (i + (rec - var_recs) >= n_var_recs || rec[i].width != -1)
1525 sys_error (r, rec->pos, _("Missing string continuation record."));
1534 /* Translates the format spec from sysfile format to internal
1537 parse_format_spec (struct sfm_reader *r, off_t pos, unsigned int format,
1538 enum which_format which, struct variable *v,
1541 const int max_warnings = 8;
1542 uint8_t raw_type = format >> 16;
1543 uint8_t w = format >> 8;
1552 ok = (fmt_from_io (raw_type, &f.type)
1553 && fmt_check_output (&f)
1554 && fmt_check_width_compat (&f, var_get_width (v)));
1559 if (which == PRINT_FORMAT)
1560 var_set_print_format (v, &f);
1562 var_set_write_format (v, &f);
1564 else if (format == 0)
1566 /* Actually observed in the wild. No point in warning about it. */
1568 else if (++*n_warnings <= max_warnings)
1570 if (which == PRINT_FORMAT)
1571 sys_warn (r, pos, _("Variable %s with width %d has invalid print "
1573 var_get_name (v), var_get_width (v), format);
1575 sys_warn (r, pos, _("Variable %s with width %d has invalid write "
1577 var_get_name (v), var_get_width (v), format);
1579 if (*n_warnings == max_warnings)
1580 sys_warn (r, -1, _("Suppressing further invalid format warnings."));
1585 parse_document (struct dictionary *dict, struct sfm_document_record *record)
1589 for (p = record->documents;
1590 p < record->documents + DOC_LINE_LENGTH * record->n_lines;
1591 p += DOC_LINE_LENGTH)
1593 struct substring line;
1595 line = recode_substring_pool ("UTF-8", dict_get_encoding (dict),
1596 ss_buffer (p, DOC_LINE_LENGTH), NULL);
1597 ss_rtrim (&line, ss_cstr (" "));
1598 line.string[line.length] = '\0';
1600 dict_add_document_line (dict, line.string, false);
1606 /* Parses record type 7, subtype 3. */
1608 parse_machine_integer_info (struct sfm_reader *r,
1609 const struct sfm_extension_record *record,
1610 struct any_read_info *info)
1612 int float_representation, expected_float_format;
1613 int integer_representation, expected_integer_format;
1615 /* Save version info. */
1616 info->version_major = parse_int (r, record->data, 0);
1617 info->version_minor = parse_int (r, record->data, 4);
1618 info->version_revision = parse_int (r, record->data, 8);
1620 /* Check floating point format. */
1621 float_representation = parse_int (r, record->data, 16);
1622 if (r->float_format == FLOAT_IEEE_DOUBLE_BE
1623 || r->float_format == FLOAT_IEEE_DOUBLE_LE)
1624 expected_float_format = 1;
1625 else if (r->float_format == FLOAT_Z_LONG)
1626 expected_float_format = 2;
1627 else if (r->float_format == FLOAT_VAX_G || r->float_format == FLOAT_VAX_D)
1628 expected_float_format = 3;
1631 if (float_representation != expected_float_format)
1633 sys_error (r, record->pos,
1634 _("Floating-point representation indicated by "
1635 "system file (%d) differs from expected (%d)."),
1636 float_representation, expected_float_format);
1640 /* Check integer format. */
1641 integer_representation = parse_int (r, record->data, 24);
1642 if (r->integer_format == INTEGER_MSB_FIRST)
1643 expected_integer_format = 1;
1644 else if (r->integer_format == INTEGER_LSB_FIRST)
1645 expected_integer_format = 2;
1648 if (integer_representation != expected_integer_format)
1649 sys_warn (r, record->pos,
1650 _("Integer format indicated by system file (%d) "
1651 "differs from expected (%d)."),
1652 integer_representation, expected_integer_format);
1657 /* Parses record type 7, subtype 4. */
1659 parse_machine_float_info (struct sfm_reader *r,
1660 const struct sfm_extension_record *record)
1662 double sysmis = parse_float (r, record->data, 0);
1663 double highest = parse_float (r, record->data, 8);
1664 double lowest = parse_float (r, record->data, 16);
1666 if (sysmis != SYSMIS)
1667 sys_warn (r, record->pos,
1668 _("File specifies unexpected value %g (%a) as %s, "
1669 "instead of %g (%a)."),
1670 sysmis, sysmis, "SYSMIS", SYSMIS, SYSMIS);
1672 if (highest != HIGHEST)
1673 sys_warn (r, record->pos,
1674 _("File specifies unexpected value %g (%a) as %s, "
1675 "instead of %g (%a)."),
1676 highest, highest, "HIGHEST", HIGHEST, HIGHEST);
1678 /* SPSS before version 21 used a unique value just bigger than SYSMIS as
1679 LOWEST. SPSS 21 uses SYSMIS for LOWEST, which is OK because LOWEST only
1680 appears in a context (missing values) where SYSMIS cannot. */
1681 if (lowest != LOWEST && lowest != SYSMIS)
1682 sys_warn (r, record->pos,
1683 _("File specifies unexpected value %g (%a) as %s, "
1684 "instead of %g (%a) or %g (%a)."),
1685 lowest, lowest, "LOWEST", LOWEST, LOWEST, SYSMIS, SYSMIS);
1688 /* Parses record type 7, subtype 10. */
1690 parse_extra_product_info (struct sfm_reader *r,
1691 const struct sfm_extension_record *record,
1692 struct any_read_info *info)
1694 struct text_record *text;
1696 text = open_text_record (r, record, true);
1697 info->product_ext = fix_line_ends (text_get_all (text));
1698 close_text_record (r, text);
1701 /* Parses record type 7, subtype 7 or 19. */
1703 parse_mrsets (struct sfm_reader *r, const struct sfm_extension_record *record,
1704 size_t *allocated_mrsets)
1706 struct text_record *text;
1708 text = open_text_record (r, record, false);
1711 struct sfm_mrset *mrset;
1712 size_t allocated_vars;
1715 /* Skip extra line feeds if present. */
1716 while (text_match (text, '\n'))
1719 if (r->n_mrsets >= *allocated_mrsets)
1720 r->mrsets = pool_2nrealloc (r->pool, r->mrsets, allocated_mrsets,
1722 mrset = &r->mrsets[r->n_mrsets];
1723 memset(mrset, 0, sizeof *mrset);
1725 mrset->name = text_get_token (text, ss_cstr ("="), NULL);
1726 if (mrset->name == NULL)
1729 if (text_match (text, 'C'))
1731 mrset->type = MRSET_MC;
1732 if (!text_match (text, ' '))
1734 sys_warn (r, record->pos,
1735 _("Missing space following `%c' at offset %zu "
1736 "in MRSETS record."), 'C', text_pos (text));
1740 else if (text_match (text, 'D'))
1742 mrset->type = MRSET_MD;
1743 mrset->cat_source = MRSET_VARLABELS;
1745 else if (text_match (text, 'E'))
1749 mrset->type = MRSET_MD;
1750 mrset->cat_source = MRSET_COUNTEDVALUES;
1751 if (!text_match (text, ' '))
1753 sys_warn (r, record->pos,
1754 _("Missing space following `%c' at offset %zu "
1755 "in MRSETS record."), 'E', text_pos (text));
1759 number = text_get_token (text, ss_cstr (" "), NULL);
1760 if (!strcmp (number, "11"))
1761 mrset->label_from_var_label = true;
1762 else if (strcmp (number, "1"))
1763 sys_warn (r, record->pos,
1764 _("Unexpected label source value following `E' "
1765 "at offset %zu in MRSETS record."),
1770 sys_warn (r, record->pos,
1771 _("Missing `C', `D', or `E' at offset %zu "
1772 "in MRSETS record."),
1777 if (mrset->type == MRSET_MD)
1779 mrset->counted = text_parse_counted_string (r, text);
1780 if (mrset->counted == NULL)
1784 mrset->label = text_parse_counted_string (r, text);
1785 if (mrset->label == NULL)
1793 var = text_get_token (text, ss_cstr (" \n"), &delimiter);
1796 if (delimiter != '\n')
1797 sys_warn (r, record->pos,
1798 _("Missing new-line parsing variable names "
1799 "at offset %zu in MRSETS record."),
1804 if (mrset->n_vars >= allocated_vars)
1805 mrset->vars = pool_2nrealloc (r->pool, mrset->vars,
1807 sizeof *mrset->vars);
1808 mrset->vars[mrset->n_vars++] = var;
1810 while (delimiter != '\n');
1814 close_text_record (r, text);
1818 decode_mrsets (struct sfm_reader *r, struct dictionary *dict)
1820 const struct sfm_mrset *s;
1822 for (s = r->mrsets; s < &r->mrsets[r->n_mrsets]; s++)
1824 struct stringi_set var_names;
1825 struct mrset *mrset;
1830 name = recode_string ("UTF-8", r->encoding, s->name, -1);
1833 sys_warn (r, -1, _("Multiple response set name `%s' does not begin "
1840 mrset = xzalloc (sizeof *mrset);
1842 mrset->type = s->type;
1843 mrset->cat_source = s->cat_source;
1844 mrset->label_from_var_label = s->label_from_var_label;
1845 if (s->label[0] != '\0')
1846 mrset->label = recode_string ("UTF-8", r->encoding, s->label, -1);
1848 stringi_set_init (&var_names);
1849 mrset->vars = xmalloc (s->n_vars * sizeof *mrset->vars);
1851 for (i = 0; i < s->n_vars; i++)
1853 struct variable *var;
1856 var_name = recode_string ("UTF-8", r->encoding, s->vars[i], -1);
1858 var = dict_lookup_var (dict, var_name);
1864 if (!stringi_set_insert (&var_names, var_name))
1867 _("MRSET %s contains duplicate variable name %s."),
1868 mrset->name, var_name);
1874 if (mrset->label == NULL && mrset->label_from_var_label
1875 && var_has_label (var))
1876 mrset->label = xstrdup (var_get_label (var));
1879 && var_get_type (var) != var_get_type (mrset->vars[0]))
1882 _("MRSET %s contains both string and "
1883 "numeric variables."), mrset->name);
1886 width = MIN (width, var_get_width (var));
1888 mrset->vars[mrset->n_vars++] = var;
1891 if (mrset->n_vars < 2)
1893 if (mrset->n_vars == 0)
1894 sys_warn (r, -1, _("MRSET %s has no variables."), mrset->name);
1896 sys_warn (r, -1, _("MRSET %s has only one variable."),
1898 mrset_destroy (mrset);
1899 stringi_set_destroy (&var_names);
1903 if (mrset->type == MRSET_MD)
1905 mrset->width = width;
1906 value_init (&mrset->counted, width);
1908 mrset->counted.f = c_strtod (s->counted, NULL);
1910 value_copy_str_rpad (&mrset->counted, width,
1911 (const uint8_t *) s->counted, ' ');
1914 dict_add_mrset (dict, mrset);
1915 stringi_set_destroy (&var_names);
1919 /* Read record type 7, subtype 11, which specifies how variables
1920 should be displayed in GUI environments. */
1922 parse_display_parameters (struct sfm_reader *r,
1923 const struct sfm_extension_record *record,
1924 struct dictionary *dict)
1926 bool includes_width;
1927 bool warned = false;
1932 n_vars = dict_get_var_cnt (dict);
1933 if (record->count == 3 * n_vars)
1934 includes_width = true;
1935 else if (record->count == 2 * n_vars)
1936 includes_width = false;
1939 sys_warn (r, record->pos,
1940 _("Extension 11 has bad count %u (for %zu variables)."),
1941 record->count, n_vars);
1946 for (i = 0; i < n_vars; ++i)
1948 struct variable *v = dict_get_var (dict, i);
1949 int measure, width, align;
1951 measure = parse_int (r, record->data, ofs);
1956 width = parse_int (r, record->data, ofs);
1962 align = parse_int (r, record->data, ofs);
1965 /* SPSS sometimes seems to set variables' measure to zero. */
1969 if (measure < 1 || measure > 3 || align < 0 || align > 2)
1972 sys_warn (r, record->pos,
1973 _("Invalid variable display parameters for variable "
1974 "%zu (%s). Default parameters substituted."),
1975 i, var_get_name (v));
1980 var_set_measure (v, (measure == 1 ? MEASURE_NOMINAL
1981 : measure == 2 ? MEASURE_ORDINAL
1983 var_set_alignment (v, (align == 0 ? ALIGN_LEFT
1984 : align == 1 ? ALIGN_RIGHT
1987 /* Older versions (SPSS 9.0) sometimes set the display
1988 width to zero. This causes confusion in the GUI, so
1989 only set the width if it is nonzero. */
1991 var_set_display_width (v, width);
1996 rename_var_and_save_short_names (struct dictionary *dict, struct variable *var,
1997 const char *new_name)
1999 size_t n_short_names;
2003 /* Renaming a variable may clear its short names, but we
2004 want to retain them, so we save them and re-set them
2006 n_short_names = var_get_short_name_cnt (var);
2007 short_names = xnmalloc (n_short_names, sizeof *short_names);
2008 for (i = 0; i < n_short_names; i++)
2010 const char *s = var_get_short_name (var, i);
2011 short_names[i] = s != NULL ? xstrdup (s) : NULL;
2014 /* Set long name. */
2015 dict_rename_var (dict, var, new_name);
2017 /* Restore short names. */
2018 for (i = 0; i < n_short_names; i++)
2020 var_set_short_name (var, i, short_names[i]);
2021 free (short_names[i]);
2026 /* Parses record type 7, subtype 13, which gives the long name that corresponds
2027 to each short name. Modifies variable names in DICT accordingly. */
2029 parse_long_var_name_map (struct sfm_reader *r,
2030 const struct sfm_extension_record *record,
2031 struct dictionary *dict)
2033 struct text_record *text;
2034 struct variable *var;
2039 /* There are no long variable names. Use the short variable names,
2040 converted to lowercase, as the long variable names. */
2043 for (i = 0; i < dict_get_var_cnt (dict); i++)
2045 struct variable *var = dict_get_var (dict, i);
2048 new_name = utf8_to_lower (var_get_name (var));
2049 rename_var_and_save_short_names (dict, var, new_name);
2056 /* Rename each of the variables, one by one. (In a correctly constructed
2057 system file, this cannot create any intermediate duplicate variable names,
2058 because all of the new variable names are longer than any of the old
2059 variable names and thus there cannot be any overlaps.) */
2060 text = open_text_record (r, record, true);
2061 while (read_variable_to_value_pair (r, dict, text, &var, &long_name))
2063 /* Validate long name. */
2064 if (!dict_id_is_valid (dict, long_name, false)
2065 || long_name[0] == '$' || long_name[0] == '#')
2067 sys_warn (r, record->pos,
2068 _("Long variable mapping from %s to invalid "
2069 "variable name `%s'."),
2070 var_get_name (var), long_name);
2074 /* Identify any duplicates. */
2075 if (utf8_strcasecmp (var_get_short_name (var, 0), long_name)
2076 && dict_lookup_var (dict, long_name) != NULL)
2078 sys_warn (r, record->pos,
2079 _("Duplicate long variable name `%s'."), long_name);
2083 rename_var_and_save_short_names (dict, var, long_name);
2085 close_text_record (r, text);
2088 /* Reads record type 7, subtype 14, which gives the real length
2089 of each very long string. Rearranges DICT accordingly. */
2091 parse_long_string_map (struct sfm_reader *r,
2092 const struct sfm_extension_record *record,
2093 struct dictionary *dict)
2095 struct text_record *text;
2096 struct variable *var;
2099 text = open_text_record (r, record, true);
2100 while (read_variable_to_value_pair (r, dict, text, &var, &length_s))
2102 size_t idx = var_get_dict_index (var);
2108 length = strtol (length_s, NULL, 10);
2109 if (length < 1 || length > MAX_STRING)
2111 sys_warn (r, record->pos,
2112 _("%s listed as string of invalid length %s "
2113 "in very long string record."),
2114 var_get_name (var), length_s);
2118 /* Check segments. */
2119 segment_cnt = sfm_width_to_segments (length);
2120 if (segment_cnt == 1)
2122 sys_warn (r, record->pos,
2123 _("%s listed in very long string record with width %s, "
2124 "which requires only one segment."),
2125 var_get_name (var), length_s);
2128 if (idx + segment_cnt > dict_get_var_cnt (dict))
2130 sys_error (r, record->pos,
2131 _("Very long string %s overflows dictionary."),
2132 var_get_name (var));
2136 /* Get the short names from the segments and check their
2138 for (i = 0; i < segment_cnt; i++)
2140 struct variable *seg = dict_get_var (dict, idx + i);
2141 int alloc_width = sfm_segment_alloc_width (length, i);
2142 int width = var_get_width (seg);
2145 var_set_short_name (var, i, var_get_short_name (seg, 0));
2146 if (ROUND_UP (width, 8) != ROUND_UP (alloc_width, 8))
2148 sys_error (r, record->pos,
2149 _("Very long string with width %ld has segment %d "
2150 "of width %d (expected %d)."),
2151 length, i, width, alloc_width);
2155 dict_delete_consecutive_vars (dict, idx + 1, segment_cnt - 1);
2156 var_set_width (var, length);
2158 close_text_record (r, text);
2159 dict_compact_values (dict);
2165 parse_value_labels (struct sfm_reader *r, struct dictionary *dict,
2166 const struct sfm_var_record *var_recs, size_t n_var_recs,
2167 const struct sfm_value_label_record *record)
2169 struct variable **vars;
2173 utf8_labels = pool_nmalloc (r->pool, record->n_labels, sizeof *utf8_labels);
2174 for (i = 0; i < record->n_labels; i++)
2175 utf8_labels[i] = recode_string_pool ("UTF-8", dict_get_encoding (dict),
2176 record->labels[i].label, -1,
2179 vars = pool_nmalloc (r->pool, record->n_vars, sizeof *vars);
2180 for (i = 0; i < record->n_vars; i++)
2182 vars[i] = lookup_var_by_index (r, record->pos,
2183 var_recs, n_var_recs, record->vars[i]);
2184 if (vars[i] == NULL)
2188 for (i = 1; i < record->n_vars; i++)
2189 if (var_get_type (vars[i]) != var_get_type (vars[0]))
2191 sys_error (r, record->pos,
2192 _("Variables associated with value label are not all of "
2193 "identical type. Variable %s is %s, but variable "
2195 var_get_name (vars[0]),
2196 var_is_numeric (vars[0]) ? _("numeric") : _("string"),
2197 var_get_name (vars[i]),
2198 var_is_numeric (vars[i]) ? _("numeric") : _("string"));
2202 for (i = 0; i < record->n_vars; i++)
2204 struct variable *var = vars[i];
2208 width = var_get_width (var);
2211 sys_error (r, record->pos,
2212 _("Value labels may not be added to long string "
2213 "variables (e.g. %s) using records types 3 and 4."),
2214 var_get_name (var));
2218 for (j = 0; j < record->n_labels; j++)
2220 struct sfm_value_label *label = &record->labels[j];
2223 value_init (&value, width);
2225 value.f = parse_float (r, label->value, 0);
2227 memcpy (value_str_rw (&value, width), label->value, width);
2229 if (!var_add_value_label (var, &value, utf8_labels[j]))
2231 if (var_is_numeric (var))
2232 sys_warn (r, record->pos,
2233 _("Duplicate value label for %g on %s."),
2234 value.f, var_get_name (var));
2236 sys_warn (r, record->pos,
2237 _("Duplicate value label for `%.*s' on %s."),
2238 width, value_str (&value, width),
2239 var_get_name (var));
2242 value_destroy (&value, width);
2246 pool_free (r->pool, vars);
2247 for (i = 0; i < record->n_labels; i++)
2248 pool_free (r->pool, utf8_labels[i]);
2249 pool_free (r->pool, utf8_labels);
2254 static struct variable *
2255 lookup_var_by_index (struct sfm_reader *r, off_t offset,
2256 const struct sfm_var_record *var_recs, size_t n_var_recs,
2259 const struct sfm_var_record *rec;
2261 if (idx < 1 || idx > n_var_recs)
2263 sys_error (r, offset,
2264 _("Variable index %d not in valid range 1...%zu."),
2269 rec = &var_recs[idx - 1];
2270 if (rec->var == NULL)
2272 sys_error (r, offset,
2273 _("Variable index %d refers to long string continuation."),
2281 /* Parses a set of custom attributes from TEXT into ATTRS.
2282 ATTRS may be a null pointer, in which case the attributes are
2283 read but discarded. */
2285 parse_attributes (struct sfm_reader *r, struct text_record *text,
2286 struct attrset *attrs)
2290 struct attribute *attr;
2294 /* Parse the key. */
2295 key = text_get_token (text, ss_cstr ("("), NULL);
2299 attr = attribute_create (key);
2300 for (index = 1; ; index++)
2302 /* Parse the value. */
2306 value = text_get_token (text, ss_cstr ("\n"), NULL);
2309 text_warn (r, text, _("Error parsing attribute value %s[%d]."),
2314 length = strlen (value);
2315 if (length >= 2 && value[0] == '\'' && value[length - 1] == '\'')
2317 value[length - 1] = '\0';
2318 attribute_add_value (attr, value + 1);
2323 _("Attribute value %s[%d] is not quoted: %s."),
2325 attribute_add_value (attr, value);
2328 /* Was this the last value for this attribute? */
2329 if (text_match (text, ')'))
2333 attrset_add (attrs, attr);
2335 attribute_destroy (attr);
2337 while (!text_match (text, '/'));
2340 /* Reads record type 7, subtype 17, which lists custom
2341 attributes on the data file. */
2343 parse_data_file_attributes (struct sfm_reader *r,
2344 const struct sfm_extension_record *record,
2345 struct dictionary *dict)
2347 struct text_record *text = open_text_record (r, record, true);
2348 parse_attributes (r, text, dict_get_attributes (dict));
2349 close_text_record (r, text);
2352 /* Parses record type 7, subtype 18, which lists custom
2353 attributes on individual variables. */
2355 parse_variable_attributes (struct sfm_reader *r,
2356 const struct sfm_extension_record *record,
2357 struct dictionary *dict)
2359 struct text_record *text;
2360 struct variable *var;
2362 text = open_text_record (r, record, true);
2363 while (text_read_variable_name (r, dict, text, ss_cstr (":"), &var))
2364 parse_attributes (r, text, var != NULL ? var_get_attributes (var) : NULL);
2365 close_text_record (r, text);
2369 assign_variable_roles (struct sfm_reader *r, struct dictionary *dict)
2371 size_t n_warnings = 0;
2374 for (i = 0; i < dict_get_var_cnt (dict); i++)
2376 struct variable *var = dict_get_var (dict, i);
2377 struct attrset *attrs = var_get_attributes (var);
2378 const struct attribute *attr = attrset_lookup (attrs, "$@Role");
2381 int value = atoi (attribute_get_value (attr, 0));
2403 role = ROLE_PARTITION;
2412 if (n_warnings++ == 0)
2413 sys_warn (r, -1, _("Invalid role for variable %s."),
2414 var_get_name (var));
2417 var_set_role (var, role);
2422 sys_warn (r, -1, _("%zu other variables had invalid roles."),
2427 check_overflow (struct sfm_reader *r,
2428 const struct sfm_extension_record *record,
2429 size_t ofs, size_t length)
2431 size_t end = record->size * record->count;
2432 if (length >= end || ofs + length > end)
2434 sys_warn (r, record->pos + end,
2435 _("Extension record subtype %d ends unexpectedly."),
2443 parse_long_string_value_labels (struct sfm_reader *r,
2444 const struct sfm_extension_record *record,
2445 struct dictionary *dict)
2447 const char *dict_encoding = dict_get_encoding (dict);
2448 size_t end = record->size * record->count;
2455 struct variable *var;
2460 /* Parse variable name length. */
2461 if (!check_overflow (r, record, ofs, 4))
2463 var_name_len = parse_int (r, record->data, ofs);
2466 /* Parse variable name, width, and number of labels. */
2467 if (!check_overflow (r, record, ofs, var_name_len)
2468 || !check_overflow (r, record, ofs, var_name_len + 8))
2470 var_name = recode_string_pool ("UTF-8", dict_encoding,
2471 (const char *) record->data + ofs,
2472 var_name_len, r->pool);
2473 width = parse_int (r, record->data, ofs + var_name_len);
2474 n_labels = parse_int (r, record->data, ofs + var_name_len + 4);
2475 ofs += var_name_len + 8;
2477 /* Look up 'var' and validate. */
2478 var = dict_lookup_var (dict, var_name);
2480 sys_warn (r, record->pos + ofs,
2481 _("Ignoring long string value label record for "
2482 "unknown variable %s."), var_name);
2483 else if (var_is_numeric (var))
2485 sys_warn (r, record->pos + ofs,
2486 _("Ignoring long string value label record for "
2487 "numeric variable %s."), var_name);
2490 else if (width != var_get_width (var))
2492 sys_warn (r, record->pos + ofs,
2493 _("Ignoring long string value label record for variable "
2494 "%s because the record's width (%d) does not match the "
2495 "variable's width (%d)."),
2496 var_name, width, var_get_width (var));
2501 value_init_pool (r->pool, &value, width);
2502 for (i = 0; i < n_labels; i++)
2504 size_t value_length, label_length;
2505 bool skip = var == NULL;
2507 /* Parse value length. */
2508 if (!check_overflow (r, record, ofs, 4))
2510 value_length = parse_int (r, record->data, ofs);
2514 if (!check_overflow (r, record, ofs, value_length))
2518 if (value_length == width)
2519 memcpy (value_str_rw (&value, width),
2520 (const uint8_t *) record->data + ofs, width);
2523 sys_warn (r, record->pos + ofs,
2524 _("Ignoring long string value label %zu for "
2525 "variable %s, with width %d, that has bad value "
2527 i, var_get_name (var), width, value_length);
2531 ofs += value_length;
2533 /* Parse label length. */
2534 if (!check_overflow (r, record, ofs, 4))
2536 label_length = parse_int (r, record->data, ofs);
2540 if (!check_overflow (r, record, ofs, label_length))
2546 label = recode_string_pool ("UTF-8", dict_encoding,
2547 (const char *) record->data + ofs,
2548 label_length, r->pool);
2549 if (!var_add_value_label (var, &value, label))
2550 sys_warn (r, record->pos + ofs,
2551 _("Duplicate value label for `%.*s' on %s."),
2552 width, value_str (&value, width),
2553 var_get_name (var));
2554 pool_free (r->pool, label);
2556 ofs += label_length;
2562 parse_long_string_missing_values (struct sfm_reader *r,
2563 const struct sfm_extension_record *record,
2564 struct dictionary *dict)
2566 const char *dict_encoding = dict_get_encoding (dict);
2567 size_t end = record->size * record->count;
2572 struct missing_values mv;
2574 struct variable *var;
2575 int n_missing_values;
2579 /* Parse variable name length. */
2580 if (!check_overflow (r, record, ofs, 4))
2582 var_name_len = parse_int (r, record->data, ofs);
2585 /* Parse variable name. */
2586 if (!check_overflow (r, record, ofs, var_name_len)
2587 || !check_overflow (r, record, ofs, var_name_len + 1))
2589 var_name = recode_string_pool ("UTF-8", dict_encoding,
2590 (const char *) record->data + ofs,
2591 var_name_len, r->pool);
2592 ofs += var_name_len;
2594 /* Parse number of missing values. */
2595 n_missing_values = ((const uint8_t *) record->data)[ofs];
2596 if (n_missing_values < 1 || n_missing_values > 3)
2597 sys_warn (r, record->pos + ofs,
2598 _("Long string missing values record says variable %s "
2599 "has %d missing values, but only 1 to 3 missing values "
2601 var_name, n_missing_values);
2604 /* Look up 'var' and validate. */
2605 var = dict_lookup_var (dict, var_name);
2607 sys_warn (r, record->pos + ofs,
2608 _("Ignoring long string missing value record for "
2609 "unknown variable %s."), var_name);
2610 else if (var_is_numeric (var))
2612 sys_warn (r, record->pos + ofs,
2613 _("Ignoring long string missing value record for "
2614 "numeric variable %s."), var_name);
2619 mv_init_pool (r->pool, &mv, var ? var_get_width (var) : 8);
2620 for (i = 0; i < n_missing_values; i++)
2622 size_t value_length;
2624 /* Parse value length. */
2625 if (!check_overflow (r, record, ofs, 4))
2627 value_length = parse_int (r, record->data, ofs);
2631 if (!check_overflow (r, record, ofs, value_length))
2635 && !mv_add_str (&mv, (const uint8_t *) record->data + ofs,
2637 sys_warn (r, record->pos + ofs,
2638 _("Ignoring long string missing value %zu for variable "
2639 "%s, with width %d, that has bad value width %zu."),
2640 i, var_get_name (var), var_get_width (var),
2642 ofs += value_length;
2645 var_set_missing_values (var, &mv);
2651 static void partial_record (struct sfm_reader *);
2653 static void read_error (struct casereader *, const struct sfm_reader *);
2655 static bool read_case_number (struct sfm_reader *, double *);
2656 static int read_case_string (struct sfm_reader *, uint8_t *, size_t);
2657 static int read_opcode (struct sfm_reader *);
2658 static bool read_compressed_number (struct sfm_reader *, double *);
2659 static int read_compressed_string (struct sfm_reader *, uint8_t *);
2660 static int read_whole_strings (struct sfm_reader *, uint8_t *, size_t);
2661 static bool skip_whole_strings (struct sfm_reader *, size_t);
2663 /* Reads and returns one case from READER's file. Returns a null
2664 pointer if not successful. */
2665 static struct ccase *
2666 sys_file_casereader_read (struct casereader *reader, void *r_)
2668 struct sfm_reader *r = r_;
2673 if (r->error || !r->sfm_var_cnt)
2676 c = case_create (r->proto);
2678 for (i = 0; i < r->sfm_var_cnt; i++)
2680 struct sfm_var *sv = &r->sfm_vars[i];
2681 union value *v = case_data_rw_idx (c, sv->case_index);
2683 if (sv->var_width == 0)
2684 retval = read_case_number (r, &v->f);
2687 uint8_t *s = value_str_rw (v, sv->var_width);
2688 retval = read_case_string (r, s + sv->offset, sv->segment_width);
2691 retval = skip_whole_strings (r, ROUND_DOWN (sv->padding, 8));
2693 sys_error (r, r->pos, _("File ends in partial string value."));
2705 if (r->case_cnt != -1)
2706 read_error (reader, r);
2711 /* Issues an error that R ends in a partial record. */
2713 partial_record (struct sfm_reader *r)
2715 sys_error (r, r->pos, _("File ends in partial case."));
2718 /* Issues an error that an unspecified error occurred SFM, and
2721 read_error (struct casereader *r, const struct sfm_reader *sfm)
2723 msg (ME, _("Error reading case from file %s."), fh_get_name (sfm->fh));
2724 casereader_force_error (r);
2727 /* Reads a number from R and stores its value in *D.
2728 If R is compressed, reads a compressed number;
2729 otherwise, reads a number in the regular way.
2730 Returns true if successful, false if end of file is
2731 reached immediately. */
2733 read_case_number (struct sfm_reader *r, double *d)
2735 if (r->compression == ANY_COMP_NONE)
2738 if (!try_read_bytes (r, number, sizeof number))
2740 float_convert (r->float_format, number, FLOAT_NATIVE_DOUBLE, d);
2744 return read_compressed_number (r, d);
2747 /* Reads LENGTH string bytes from R into S. Always reads a multiple of 8
2748 bytes; if LENGTH is not a multiple of 8, then extra bytes are read and
2749 discarded without being written to S. Reads compressed strings if S is
2750 compressed. Returns 1 if successful, 0 if end of file is reached
2751 immediately, or -1 for some kind of error. */
2753 read_case_string (struct sfm_reader *r, uint8_t *s, size_t length)
2755 size_t whole = ROUND_DOWN (length, 8);
2756 size_t partial = length % 8;
2760 int retval = read_whole_strings (r, s, whole);
2768 int retval = read_whole_strings (r, bounce, sizeof bounce);
2780 memcpy (s + whole, bounce, partial);
2786 /* Reads and returns the next compression opcode from R. */
2788 read_opcode (struct sfm_reader *r)
2790 assert (r->compression != ANY_COMP_NONE);
2794 if (r->opcode_idx >= sizeof r->opcodes)
2797 int retval = try_read_compressed_bytes (r, r->opcodes,
2803 opcode = r->opcodes[r->opcode_idx++];
2810 /* Reads a compressed number from R and stores its value in D.
2811 Returns true if successful, false if end of file is
2812 reached immediately. */
2814 read_compressed_number (struct sfm_reader *r, double *d)
2816 int opcode = read_opcode (r);
2824 return read_compressed_float (r, d);
2827 float_convert (r->float_format, " ", FLOAT_NATIVE_DOUBLE, d);
2828 if (!r->corruption_warning)
2830 r->corruption_warning = true;
2831 sys_warn (r, r->pos,
2832 _("Possible compressed data corruption: "
2833 "compressed spaces appear in numeric field."));
2842 *d = opcode - r->bias;
2849 /* Reads a compressed 8-byte string segment from R and stores it in DST. */
2851 read_compressed_string (struct sfm_reader *r, uint8_t *dst)
2856 opcode = read_opcode (r);
2864 retval = read_compressed_bytes (r, dst, 8);
2865 return retval == 1 ? 1 : -1;
2868 memset (dst, ' ', 8);
2873 double value = opcode - r->bias;
2874 float_convert (FLOAT_NATIVE_DOUBLE, &value, r->float_format, dst);
2877 /* This has actually been seen "in the wild". The submitter of the
2878 file that showed that the contents decoded as spaces, but they
2879 were at the end of the field so it's possible that the null
2880 bytes just acted as null terminators. */
2882 else if (!r->corruption_warning)
2884 r->corruption_warning = true;
2885 sys_warn (r, r->pos,
2886 _("Possible compressed data corruption: "
2887 "string contains compressed integer (opcode %d)."),
2895 /* Reads LENGTH string bytes from R into S. LENGTH must be a multiple of 8.
2896 Reads compressed strings if S is compressed. Returns 1 if successful, 0 if
2897 end of file is reached immediately, or -1 for some kind of error. */
2899 read_whole_strings (struct sfm_reader *r, uint8_t *s, size_t length)
2901 assert (length % 8 == 0);
2902 if (r->compression == ANY_COMP_NONE)
2903 return try_read_bytes (r, s, length);
2908 for (ofs = 0; ofs < length; ofs += 8)
2910 int retval = read_compressed_string (r, s + ofs);
2925 /* Skips LENGTH string bytes from R.
2926 LENGTH must be a multiple of 8.
2927 (LENGTH is also limited to 1024, but that's only because the
2928 current caller never needs more than that many bytes.)
2929 Returns true if successful, false if end of file is
2930 reached immediately. */
2932 skip_whole_strings (struct sfm_reader *r, size_t length)
2934 uint8_t buffer[1024];
2935 assert (length < sizeof buffer);
2936 return read_whole_strings (r, buffer, length);
2939 /* Helpers for reading records that contain structured text
2942 /* Maximum number of warnings to issue for a single text
2944 #define MAX_TEXT_WARNINGS 5
2949 struct substring buffer; /* Record contents. */
2950 off_t start; /* Starting offset in file. */
2951 size_t pos; /* Current position in buffer. */
2952 int n_warnings; /* Number of warnings issued or suppressed. */
2953 bool recoded; /* Recoded into UTF-8? */
2956 static struct text_record *
2957 open_text_record (struct sfm_reader *r,
2958 const struct sfm_extension_record *record,
2959 bool recode_to_utf8)
2961 struct text_record *text;
2962 struct substring raw;
2964 text = pool_alloc (r->pool, sizeof *text);
2965 raw = ss_buffer (record->data, record->size * record->count);
2966 text->start = record->pos;
2967 text->buffer = (recode_to_utf8
2968 ? recode_substring_pool ("UTF-8", r->encoding, raw, r->pool)
2971 text->n_warnings = 0;
2972 text->recoded = recode_to_utf8;
2977 /* Closes TEXT, frees its storage, and issues a final warning
2978 about suppressed warnings if necesary. */
2980 close_text_record (struct sfm_reader *r, struct text_record *text)
2982 if (text->n_warnings > MAX_TEXT_WARNINGS)
2983 sys_warn (r, -1, _("Suppressed %d additional related warnings."),
2984 text->n_warnings - MAX_TEXT_WARNINGS);
2986 pool_free (r->pool, ss_data (text->buffer));
2989 /* Reads a variable=value pair from TEXT.
2990 Looks up the variable in DICT and stores it into *VAR.
2991 Stores a null-terminated value into *VALUE. */
2993 read_variable_to_value_pair (struct sfm_reader *r, struct dictionary *dict,
2994 struct text_record *text,
2995 struct variable **var, char **value)
2999 if (!text_read_short_name (r, dict, text, ss_cstr ("="), var))
3002 *value = text_get_token (text, ss_buffer ("\t\0", 2), NULL);
3006 text->pos += ss_span (ss_substr (text->buffer, text->pos, SIZE_MAX),
3007 ss_buffer ("\t\0", 2));
3015 text_read_variable_name (struct sfm_reader *r, struct dictionary *dict,
3016 struct text_record *text, struct substring delimiters,
3017 struct variable **var)
3021 name = text_get_token (text, delimiters, NULL);
3025 *var = dict_lookup_var (dict, name);
3029 text_warn (r, text, _("Dictionary record refers to unknown variable %s."),
3036 text_read_short_name (struct sfm_reader *r, struct dictionary *dict,
3037 struct text_record *text, struct substring delimiters,
3038 struct variable **var)
3040 char *short_name = text_get_token (text, delimiters, NULL);
3041 if (short_name == NULL)
3044 *var = dict_lookup_var (dict, short_name);
3046 text_warn (r, text, _("Dictionary record refers to unknown variable %s."),
3051 /* Displays a warning for the current file position, limiting the
3052 number to MAX_TEXT_WARNINGS for TEXT. */
3054 text_warn (struct sfm_reader *r, struct text_record *text,
3055 const char *format, ...)
3057 if (text->n_warnings++ < MAX_TEXT_WARNINGS)
3061 va_start (args, format);
3062 sys_msg (r, text->start + text->pos, MW, format, args);
3068 text_get_token (struct text_record *text, struct substring delimiters,
3071 struct substring token;
3074 if (!ss_tokenize (text->buffer, delimiters, &text->pos, &token))
3077 end = &ss_data (token)[ss_length (token)];
3078 if (delimiter != NULL)
3081 return ss_data (token);
3084 /* Reads a integer value expressed in decimal, then a space, then a string that
3085 consists of exactly as many bytes as specified by the integer, then a space,
3086 from TEXT. Returns the string, null-terminated, as a subset of TEXT's
3087 buffer (so the caller should not free the string). */
3089 text_parse_counted_string (struct sfm_reader *r, struct text_record *text)
3097 while (text->pos < text->buffer.length)
3099 int c = text->buffer.string[text->pos];
3100 if (c < '0' || c > '9')
3102 n = (n * 10) + (c - '0');
3105 if (text->pos >= text->buffer.length || start == text->pos)
3107 sys_warn (r, text->start,
3108 _("Expecting digit at offset %zu in MRSETS record."),
3113 if (!text_match (text, ' '))
3115 sys_warn (r, text->start,
3116 _("Expecting space at offset %zu in MRSETS record."),
3121 if (text->pos + n > text->buffer.length)
3123 sys_warn (r, text->start,
3124 _("%zu-byte string starting at offset %zu "
3125 "exceeds record length %zu."),
3126 n, text->pos, text->buffer.length);
3130 s = &text->buffer.string[text->pos];
3133 sys_warn (r, text->start,
3134 _("Expecting space at offset %zu following %zu-byte string."),
3144 text_match (struct text_record *text, char c)
3146 if (text->pos >= text->buffer.length)
3149 if (text->buffer.string[text->pos] == c)
3158 /* Returns the current byte offset (as converted to UTF-8, if it was converted)
3159 inside the TEXT's string. */
3161 text_pos (const struct text_record *text)
3167 text_get_all (const struct text_record *text)
3169 return text->buffer.string;
3174 /* Displays a corruption message. */
3176 sys_msg (struct sfm_reader *r, off_t offset,
3177 int class, const char *format, va_list args)
3182 ds_init_empty (&text);
3184 ds_put_format (&text, _("`%s' near offset 0x%llx: "),
3185 fh_get_file_name (r->fh), (long long int) offset);
3187 ds_put_format (&text, _("`%s': "), fh_get_file_name (r->fh));
3188 ds_put_vformat (&text, format, args);
3190 m.category = msg_class_to_category (class);
3191 m.severity = msg_class_to_severity (class);
3197 m.text = ds_cstr (&text);
3202 /* Displays a warning for offset OFFSET in the file. */
3204 sys_warn (struct sfm_reader *r, off_t offset, const char *format, ...)
3208 va_start (args, format);
3209 sys_msg (r, offset, MW, format, args);
3213 /* Displays an error for the current file position and marks it as in an error
3216 sys_error (struct sfm_reader *r, off_t offset, const char *format, ...)
3220 va_start (args, format);
3221 sys_msg (r, offset, ME, format, args);
3227 /* Reads BYTE_CNT bytes into BUF.
3228 Returns 1 if exactly BYTE_CNT bytes are successfully read.
3229 Returns -1 if an I/O error or a partial read occurs.
3230 Returns 0 for an immediate end-of-file and, if EOF_IS_OK is false, reports
3233 read_bytes_internal (struct sfm_reader *r, bool eof_is_ok,
3234 void *buf, size_t byte_cnt)
3236 size_t bytes_read = fread (buf, 1, byte_cnt, r->file);
3237 r->pos += bytes_read;
3238 if (bytes_read == byte_cnt)
3240 else if (ferror (r->file))
3242 sys_error (r, r->pos, _("System error: %s."), strerror (errno));
3245 else if (!eof_is_ok || bytes_read != 0)
3247 sys_error (r, r->pos, _("Unexpected end of file."));
3254 /* Reads BYTE_CNT into BUF.
3255 Returns true if successful.
3256 Returns false upon I/O error or if end-of-file is encountered. */
3258 read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
3260 return read_bytes_internal (r, false, buf, byte_cnt) == 1;
3263 /* Reads BYTE_CNT bytes into BUF.
3264 Returns 1 if exactly BYTE_CNT bytes are successfully read.
3265 Returns 0 if an immediate end-of-file is encountered.
3266 Returns -1 if an I/O error or a partial read occurs. */
3268 try_read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
3270 return read_bytes_internal (r, true, buf, byte_cnt);
3273 /* Reads a 32-bit signed integer from R and stores its value in host format in
3274 *X. Returns true if successful, otherwise false. */
3276 read_int (struct sfm_reader *r, int *x)
3279 if (read_bytes (r, integer, sizeof integer) != 1)
3281 *x = integer_get (r->integer_format, integer, sizeof integer);
3286 read_uint (struct sfm_reader *r, unsigned int *x)
3291 ok = read_int (r, &y);
3296 /* Reads a 64-bit signed integer from R and returns its value in
3299 read_int64 (struct sfm_reader *r, long long int *x)
3302 if (read_bytes (r, integer, sizeof integer) != 1)
3304 *x = integer_get (r->integer_format, integer, sizeof integer);
3308 /* Reads a 64-bit signed integer from R and returns its value in
3311 read_uint64 (struct sfm_reader *r, unsigned long long int *x)
3316 ok = read_int64 (r, &y);
3322 parse_int (const struct sfm_reader *r, const void *data, size_t ofs)
3324 return integer_get (r->integer_format, (const uint8_t *) data + ofs, 4);
3328 parse_float (const struct sfm_reader *r, const void *data, size_t ofs)
3330 return float_get_double (r->float_format, (const uint8_t *) data + ofs);
3333 /* Reads exactly SIZE - 1 bytes into BUFFER
3334 and stores a null byte into BUFFER[SIZE - 1]. */
3336 read_string (struct sfm_reader *r, char *buffer, size_t size)
3341 ok = read_bytes (r, buffer, size - 1);
3343 buffer[size - 1] = '\0';
3347 /* Skips BYTES bytes forward in R. */
3349 skip_bytes (struct sfm_reader *r, size_t bytes)
3354 size_t chunk = MIN (sizeof buffer, bytes);
3355 if (!read_bytes (r, buffer, chunk))
3363 /* Returns a malloc()'d copy of S in which all lone CRs and CR LF pairs have
3364 been replaced by LFs.
3366 (A product that identifies itself as VOXCO INTERVIEWER 4.3 produces system
3367 files that use CR-only line ends in the file label and extra product
3370 fix_line_ends (const char *s)
3374 d = dst = xmalloc (strlen (s) + 1);
3393 read_ztrailer (struct sfm_reader *r,
3394 long long int zheader_ofs,
3395 long long int ztrailer_len);
3398 zalloc (voidpf pool_, uInt items, uInt size)
3400 struct pool *pool = pool_;
3402 return (!size || xalloc_oversized (items, size)
3404 : pool_malloc (pool, items * size));
3408 zfree (voidpf pool_, voidpf address)
3410 struct pool *pool = pool_;
3412 pool_free (pool, address);
3416 read_zheader (struct sfm_reader *r)
3419 long long int zheader_ofs;
3420 long long int ztrailer_ofs;
3421 long long int ztrailer_len;
3423 if (!read_int64 (r, &zheader_ofs)
3424 || !read_int64 (r, &ztrailer_ofs)
3425 || !read_int64 (r, &ztrailer_len))
3428 if (zheader_ofs != pos)
3430 sys_error (r, pos, _("Wrong ZLIB data header offset %#llx "
3431 "(expected %#llx)."),
3432 zheader_ofs, (long long int) pos);
3436 if (ztrailer_ofs < r->pos)
3438 sys_error (r, pos, _("Impossible ZLIB trailer offset 0x%llx."),
3443 if (ztrailer_len < 24 || ztrailer_len % 24)
3445 sys_error (r, pos, _("Invalid ZLIB trailer length %lld."), ztrailer_len);
3449 r->ztrailer_ofs = ztrailer_ofs;
3450 if (!read_ztrailer (r, zheader_ofs, ztrailer_len))
3453 if (r->zin_buf == NULL)
3455 r->zin_buf = pool_malloc (r->pool, ZIN_BUF_SIZE);
3456 r->zout_buf = pool_malloc (r->pool, ZOUT_BUF_SIZE);
3457 r->zstream.next_in = NULL;
3458 r->zstream.avail_in = 0;
3461 r->zstream.zalloc = zalloc;
3462 r->zstream.zfree = zfree;
3463 r->zstream.opaque = r->pool;
3465 return open_zstream (r);
3469 seek (struct sfm_reader *r, off_t offset)
3471 if (fseeko (r->file, offset, SEEK_SET))
3472 sys_error (r, 0, _("%s: seek failed (%s)."),
3473 fh_get_file_name (r->fh), strerror (errno));
3477 /* Performs some additional consistency checks on the ZLIB compressed data
3480 read_ztrailer (struct sfm_reader *r,
3481 long long int zheader_ofs,
3482 long long int ztrailer_len)
3484 long long int expected_uncmp_ofs;
3485 long long int expected_cmp_ofs;
3488 unsigned int block_size;
3489 unsigned int n_blocks;
3493 if (fstat (fileno (r->file), &s))
3495 sys_error (ME, 0, _("%s: stat failed (%s)."),
3496 fh_get_file_name (r->fh), strerror (errno));
3500 if (!S_ISREG (s.st_mode))
3502 /* We can't seek to the trailer and then back to the data in this file,
3503 so skip doing extra checks. */
3507 if (r->ztrailer_ofs + ztrailer_len != s.st_size)
3508 sys_warn (r, r->pos,
3509 _("End of ZLIB trailer (0x%llx) is not file size (0x%llx)."),
3510 r->ztrailer_ofs + ztrailer_len, (long long int) s.st_size);
3512 seek (r, r->ztrailer_ofs);
3514 /* Read fixed header from ZLIB data trailer. */
3515 if (!read_int64 (r, &bias))
3517 if (-bias != r->bias)
3519 sys_error (r, r->pos, _("ZLIB trailer bias (%lld) differs from "
3520 "file header bias (%.2f)."),
3525 if (!read_int64 (r, &zero))
3528 sys_warn (r, r->pos,
3529 _("ZLIB trailer \"zero\" field has nonzero value %lld."), zero);
3531 if (!read_uint (r, &block_size))
3533 if (block_size != ZBLOCK_SIZE)
3534 sys_warn (r, r->pos,
3535 _("ZLIB trailer specifies unexpected %u-byte block size."),
3538 if (!read_uint (r, &n_blocks))
3540 if (n_blocks != (ztrailer_len - 24) / 24)
3542 sys_error (r, r->pos,
3543 _("%lld-byte ZLIB trailer specifies %u data blocks (expected "
3545 ztrailer_len, n_blocks, (ztrailer_len - 24) / 24);
3549 expected_uncmp_ofs = zheader_ofs;
3550 expected_cmp_ofs = zheader_ofs + 24;
3551 for (i = 0; i < n_blocks; i++)
3553 off_t desc_ofs = r->pos;
3554 unsigned long long int uncompressed_ofs;
3555 unsigned long long int compressed_ofs;
3556 unsigned int uncompressed_size;
3557 unsigned int compressed_size;
3559 if (!read_uint64 (r, &uncompressed_ofs)
3560 || !read_uint64 (r, &compressed_ofs)
3561 || !read_uint (r, &uncompressed_size)
3562 || !read_uint (r, &compressed_size))
3565 if (uncompressed_ofs != expected_uncmp_ofs)
3567 sys_error (r, desc_ofs,
3568 _("ZLIB block descriptor %u reported uncompressed data "
3569 "offset %#llx, when %#llx was expected."),
3570 i, uncompressed_ofs, expected_uncmp_ofs);
3574 if (compressed_ofs != expected_cmp_ofs)
3576 sys_error (r, desc_ofs,
3577 _("ZLIB block descriptor %u reported compressed data "
3578 "offset %#llx, when %#llx was expected."),
3579 i, compressed_ofs, expected_cmp_ofs);
3583 if (i < n_blocks - 1)
3585 if (uncompressed_size != block_size)
3586 sys_warn (r, desc_ofs,
3587 _("ZLIB block descriptor %u reported block size %#x, "
3588 "when %#x was expected."),
3589 i, uncompressed_size, block_size);
3593 if (uncompressed_size > block_size)
3594 sys_warn (r, desc_ofs,
3595 _("ZLIB block descriptor %u reported block size %#x, "
3596 "when at most %#x was expected."),
3597 i, uncompressed_size, block_size);
3600 /* http://www.zlib.net/zlib_tech.html says that the maximum expansion
3601 from compression, with worst-case parameters, is 13.5% plus 11 bytes.
3602 This code checks for an expansion of more than 14.3% plus 11
3604 if (compressed_size > uncompressed_size + uncompressed_size / 7 + 11)
3606 sys_error (r, desc_ofs,
3607 _("ZLIB block descriptor %u reports compressed size %u "
3608 "and uncompressed size %u."),
3609 i, compressed_size, uncompressed_size);
3613 expected_uncmp_ofs += uncompressed_size;
3614 expected_cmp_ofs += compressed_size;
3617 if (expected_cmp_ofs != r->ztrailer_ofs)
3619 sys_error (r, r->pos, _("ZLIB trailer is at offset %#llx but %#llx "
3620 "would be expected from block descriptors."),
3621 r->ztrailer_ofs, expected_cmp_ofs);
3625 seek (r, zheader_ofs + 24);
3630 open_zstream (struct sfm_reader *r)
3634 r->zout_pos = r->zout_end = 0;
3635 error = inflateInit (&r->zstream);
3638 sys_error (r, r->pos, _("ZLIB initialization failed (%s)."),
3646 close_zstream (struct sfm_reader *r)
3650 error = inflateEnd (&r->zstream);
3653 sys_error (r, r->pos, _("Inconsistency at end of ZLIB stream (%s)."),
3661 read_bytes_zlib (struct sfm_reader *r, void *buf_, size_t byte_cnt)
3663 uint8_t *buf = buf_;
3672 /* Use already inflated data if there is any. */
3673 if (r->zout_pos < r->zout_end)
3675 unsigned int n = MIN (byte_cnt, r->zout_end - r->zout_pos);
3676 memcpy (buf, &r->zout_buf[r->zout_pos], n);
3685 /* We need to inflate some more data.
3686 Get some more input data if we don't have any. */
3687 if (r->zstream.avail_in == 0)
3689 unsigned int n = MIN (ZIN_BUF_SIZE, r->ztrailer_ofs - r->pos);
3694 int retval = try_read_bytes (r, r->zin_buf, n);
3697 r->zstream.avail_in = n;
3698 r->zstream.next_in = r->zin_buf;
3702 /* Inflate the (remaining) input data. */
3703 r->zstream.avail_out = ZOUT_BUF_SIZE;
3704 r->zstream.next_out = r->zout_buf;
3705 error = inflate (&r->zstream, Z_SYNC_FLUSH);
3707 r->zout_end = r->zstream.next_out - r->zout_buf;
3708 if (r->zout_end == 0)
3710 if (error != Z_STREAM_END)
3712 sys_error (r, r->pos, _("ZLIB stream inconsistency (%s)."),
3716 else if (!close_zstream (r) || !open_zstream (r))
3721 /* Process the output data and ignore 'error' for now. ZLIB will
3722 present it to us again on the next inflate() call. */
3728 read_compressed_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
3730 if (r->compression == ANY_COMP_SIMPLE)
3731 return read_bytes (r, buf, byte_cnt);
3734 int retval = read_bytes_zlib (r, buf, byte_cnt);
3736 sys_error (r, r->pos, _("Unexpected end of ZLIB compressed data."));
3742 try_read_compressed_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
3744 if (r->compression == ANY_COMP_SIMPLE)
3745 return try_read_bytes (r, buf, byte_cnt);
3747 return read_bytes_zlib (r, buf, byte_cnt);
3750 /* Reads a 64-bit floating-point number from R and returns its
3751 value in host format. */
3753 read_compressed_float (struct sfm_reader *r, double *d)
3757 if (!read_compressed_bytes (r, number, sizeof number))
3760 *d = float_get_double (r->float_format, number);
3764 static const struct casereader_class sys_file_casereader_class =
3766 sys_file_casereader_read,
3767 sys_file_casereader_destroy,
3772 const struct any_reader_class sys_file_reader_class =
3774 N_("SPSS System File"),