1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-2000, 2006-2007, 2009-2016 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "data/sys-file-private.h"
28 #include "data/any-reader.h"
29 #include "data/attributes.h"
30 #include "data/case.h"
31 #include "data/casereader-provider.h"
32 #include "data/casereader.h"
33 #include "data/dictionary.h"
34 #include "data/file-handle-def.h"
35 #include "data/file-name.h"
36 #include "data/format.h"
37 #include "data/identifier.h"
38 #include "data/missing-values.h"
39 #include "data/mrset.h"
40 #include "data/short-names.h"
41 #include "data/value-labels.h"
42 #include "data/value.h"
43 #include "data/variable.h"
44 #include "libpspp/array.h"
45 #include "libpspp/assertion.h"
46 #include "libpspp/compiler.h"
47 #include "libpspp/i18n.h"
48 #include "libpspp/ll.h"
49 #include "libpspp/message.h"
50 #include "libpspp/misc.h"
51 #include "libpspp/pool.h"
52 #include "libpspp/str.h"
53 #include "libpspp/stringi-set.h"
55 #include "gl/c-strtod.h"
56 #include "gl/c-ctype.h"
57 #include "gl/inttostr.h"
58 #include "gl/localcharset.h"
59 #include "gl/minmax.h"
60 #include "gl/unlocked-io.h"
61 #include "gl/xalloc.h"
62 #include "gl/xalloc-oversized.h"
66 #define _(msgid) gettext (msgid)
67 #define N_(msgid) (msgid)
71 /* subtypes 0-2 unknown */
72 EXT_INTEGER = 3, /* Machine integer info. */
73 EXT_FLOAT = 4, /* Machine floating-point info. */
74 EXT_VAR_SETS = 5, /* Variable sets. */
75 EXT_DATE = 6, /* DATE. */
76 EXT_MRSETS = 7, /* Multiple response sets. */
77 EXT_DATA_ENTRY = 8, /* SPSS Data Entry. */
78 /* subtype 9 unknown */
79 EXT_PRODUCT_INFO = 10, /* Extra product info text. */
80 EXT_DISPLAY = 11, /* Variable display parameters. */
81 /* subtype 12 unknown */
82 EXT_LONG_NAMES = 13, /* Long variable names. */
83 EXT_LONG_STRINGS = 14, /* Long strings. */
84 /* subtype 15 unknown */
85 EXT_NCASES = 16, /* Extended number of cases. */
86 EXT_FILE_ATTRS = 17, /* Data file attributes. */
87 EXT_VAR_ATTRS = 18, /* Variable attributes. */
88 EXT_MRSETS2 = 19, /* Multiple response sets (extended). */
89 EXT_ENCODING = 20, /* Character encoding. */
90 EXT_LONG_LABELS = 21, /* Value labels for long strings. */
91 EXT_LONG_MISSING = 22, /* Missing values for long strings. */
92 EXT_DATAVIEW = 24 /* "Format properties in dataview table". */
95 /* Fields from the top-level header record. */
96 struct sfm_header_record
98 char magic[5]; /* First 4 bytes of file, then null. */
99 int weight_idx; /* 0 if unweighted, otherwise a var index. */
100 int nominal_case_size; /* Number of var positions. */
102 /* These correspond to the members of struct any_file_info or a dictionary
103 but in the system file's encoding rather than ASCII. */
104 char creation_date[10]; /* "dd mmm yy". */
105 char creation_time[9]; /* "hh:mm:ss". */
106 char eye_catcher[61]; /* Eye-catcher string, then product name. */
107 char file_label[65]; /* File label. */
110 struct sfm_var_record
117 int missing_value_code;
120 struct variable *var;
123 struct sfm_value_label
129 struct sfm_value_label_record
132 struct sfm_value_label *labels;
133 unsigned int n_labels;
139 struct sfm_document_record
148 const char *name; /* Name. */
149 const char *label; /* Human-readable label for group. */
150 enum mrset_type type; /* Group type. */
151 const char **vars; /* Constituent variables' names. */
152 size_t n_vars; /* Number of constituent variables. */
155 enum mrset_md_cat_source cat_source; /* Source of category labels. */
156 bool label_from_var_label; /* 'label' taken from variable label? */
157 const char *counted; /* Counted value, as string. */
160 struct sfm_extension_record
162 struct ll ll; /* In struct sfm_reader 'var_attrs' list. */
163 int subtype; /* Record subtype. */
164 off_t pos; /* Starting offset in file. */
165 unsigned int size; /* Size of data elements. */
166 unsigned int count; /* Number of data elements. */
167 void *data; /* Contents. */
170 /* System file reader. */
173 struct any_reader any_reader;
175 /* Resource tracking. */
176 struct pool *pool; /* All system file state. */
179 struct any_read_info info;
180 struct sfm_header_record header;
181 struct sfm_var_record *vars;
183 struct sfm_value_label_record *labels;
185 struct sfm_document_record *document;
186 struct sfm_mrset *mrsets;
188 struct sfm_extension_record *extensions[32];
189 struct ll_list var_attrs; /* Contains "struct sfm_extension_record"s. */
192 struct file_handle *fh; /* File handle. */
193 struct fh_lock *lock; /* Mutual exclusion for file handle. */
194 FILE *file; /* File stream. */
195 off_t pos; /* Position in file. */
196 bool error; /* I/O or corruption error? */
197 struct caseproto *proto; /* Format of output cases. */
200 enum integer_format integer_format; /* On-disk integer format. */
201 enum float_format float_format; /* On-disk floating point format. */
202 struct sfm_var *sfm_vars; /* Variables. */
203 size_t sfm_var_cnt; /* Number of variables. */
204 int case_cnt; /* Number of cases */
205 const char *encoding; /* String encoding. */
206 bool written_by_readstat; /* From https://github.com/WizardMac/ReadStat? */
209 enum any_compression compression;
210 double bias; /* Compression bias, usually 100.0. */
211 uint8_t opcodes[8]; /* Current block of opcodes. */
212 size_t opcode_idx; /* Next opcode to interpret, 8 if none left. */
213 bool corruption_warning; /* Warned about possible corruption? */
215 /* ZLIB decompression. */
216 long long int ztrailer_ofs; /* Offset of ZLIB trailer at end of file. */
217 #define ZIN_BUF_SIZE 4096
218 uint8_t *zin_buf; /* Inflation input buffer. */
219 #define ZOUT_BUF_SIZE 16384
220 uint8_t *zout_buf; /* Inflation output buffer. */
221 unsigned int zout_end; /* Number of bytes of data in zout_buf. */
222 unsigned int zout_pos; /* First unconsumed byte in zout_buf. */
223 z_stream zstream; /* ZLIB inflater. */
226 static const struct casereader_class sys_file_casereader_class;
228 static struct sfm_reader *
229 sfm_reader_cast (const struct any_reader *r_)
231 assert (r_->klass == &sys_file_reader_class);
232 return UP_CAST (r_, struct sfm_reader, any_reader);
235 static bool sfm_close (struct any_reader *);
237 static struct variable *lookup_var_by_index (struct sfm_reader *, off_t,
238 const struct sfm_var_record *,
241 static void sys_msg (struct sfm_reader *r, off_t, int class,
242 const char *format, va_list args)
243 PRINTF_FORMAT (4, 0);
244 static void sys_warn (struct sfm_reader *, off_t, const char *, ...)
245 PRINTF_FORMAT (3, 4);
246 static void sys_error (struct sfm_reader *, off_t, const char *, ...)
247 PRINTF_FORMAT (3, 4);
249 static bool read_bytes (struct sfm_reader *, void *, size_t)
251 static int try_read_bytes (struct sfm_reader *, void *, size_t)
253 static bool read_int (struct sfm_reader *, int *) WARN_UNUSED_RESULT;
254 static bool read_uint (struct sfm_reader *, unsigned int *) WARN_UNUSED_RESULT;
255 static bool read_int64 (struct sfm_reader *, long long int *)
257 static bool read_uint64 (struct sfm_reader *, unsigned long long int *)
259 static bool read_string (struct sfm_reader *, char *, size_t)
261 static bool skip_bytes (struct sfm_reader *, size_t) WARN_UNUSED_RESULT;
263 /* ZLIB compressed data handling. */
264 static bool read_zheader (struct sfm_reader *) WARN_UNUSED_RESULT;
265 static bool open_zstream (struct sfm_reader *) WARN_UNUSED_RESULT;
266 static bool close_zstream (struct sfm_reader *) WARN_UNUSED_RESULT;
267 static int read_bytes_zlib (struct sfm_reader *, void *, size_t)
269 static int read_compressed_bytes (struct sfm_reader *, void *, size_t)
271 static int try_read_compressed_bytes (struct sfm_reader *, void *, size_t)
273 static bool read_compressed_float (struct sfm_reader *, double *)
276 static char *fix_line_ends (const char *);
278 static int parse_int (const struct sfm_reader *, const void *data, size_t ofs);
279 static double parse_float (const struct sfm_reader *,
280 const void *data, size_t ofs);
282 static bool read_variable_record (struct sfm_reader *,
283 struct sfm_var_record *);
284 static bool read_value_label_record (struct sfm_reader *,
285 struct sfm_value_label_record *);
286 static bool read_document_record (struct sfm_reader *);
287 static bool read_extension_record (struct sfm_reader *, int subtype,
288 struct sfm_extension_record **);
289 static bool skip_extension_record (struct sfm_reader *, int subtype);
291 static struct text_record *open_text_record (
292 struct sfm_reader *, const struct sfm_extension_record *,
293 bool recode_to_utf8);
294 static void close_text_record (struct sfm_reader *,
295 struct text_record *);
296 static bool read_variable_to_value_pair (struct sfm_reader *,
298 struct text_record *,
299 struct variable **var, char **value);
300 static void text_warn (struct sfm_reader *r, struct text_record *text,
301 const char *format, ...) PRINTF_FORMAT (3, 4);
302 static char *text_get_token (struct text_record *,
303 struct substring delimiters, char *delimiter);
304 static bool text_match (struct text_record *, char c);
305 static bool text_read_variable_name (struct sfm_reader *, struct dictionary *,
306 struct text_record *,
307 struct substring delimiters,
309 static bool text_read_short_name (struct sfm_reader *, struct dictionary *,
310 struct text_record *,
311 struct substring delimiters,
313 static const char *text_parse_counted_string (struct sfm_reader *,
314 struct text_record *);
315 static size_t text_pos (const struct text_record *);
316 static const char *text_get_all (const struct text_record *);
318 /* Dictionary reader. */
326 static bool read_dictionary (struct sfm_reader *);
327 static bool read_record (struct sfm_reader *, int type,
328 size_t *allocated_vars, size_t *allocated_labels);
329 static bool read_header (struct sfm_reader *, struct any_read_info *,
330 struct sfm_header_record *);
331 static void parse_header (struct sfm_reader *,
332 const struct sfm_header_record *,
333 struct any_read_info *, struct dictionary *);
334 static bool parse_variable_records (struct sfm_reader *, struct dictionary *,
335 struct sfm_var_record *, size_t n);
336 static void parse_format_spec (struct sfm_reader *, off_t pos,
337 unsigned int format, enum which_format,
338 struct variable *, int *format_warning_cnt);
339 static void parse_document (struct dictionary *, struct sfm_document_record *);
340 static void parse_display_parameters (struct sfm_reader *,
341 const struct sfm_extension_record *,
342 struct dictionary *);
343 static bool parse_machine_integer_info (struct sfm_reader *,
344 const struct sfm_extension_record *,
345 struct any_read_info *);
346 static void parse_machine_float_info (struct sfm_reader *,
347 const struct sfm_extension_record *);
348 static void parse_extra_product_info (struct sfm_reader *,
349 const struct sfm_extension_record *,
350 struct any_read_info *);
351 static void parse_mrsets (struct sfm_reader *,
352 const struct sfm_extension_record *,
353 size_t *allocated_mrsets);
354 static void decode_mrsets (struct sfm_reader *, struct dictionary *);
355 static void parse_long_var_name_map (struct sfm_reader *,
356 const struct sfm_extension_record *,
357 struct dictionary *);
358 static bool parse_long_string_map (struct sfm_reader *,
359 const struct sfm_extension_record *,
360 struct dictionary *);
361 static bool parse_value_labels (struct sfm_reader *, struct dictionary *,
362 const struct sfm_var_record *,
364 const struct sfm_value_label_record *);
365 static void parse_data_file_attributes (struct sfm_reader *,
366 const struct sfm_extension_record *,
367 struct dictionary *);
368 static void parse_variable_attributes (struct sfm_reader *,
369 const struct sfm_extension_record *,
370 struct dictionary *);
371 static void assign_variable_roles (struct sfm_reader *, struct dictionary *);
372 static void parse_long_string_value_labels (struct sfm_reader *,
373 const struct sfm_extension_record *,
374 struct dictionary *);
375 static void parse_long_string_missing_values (
376 struct sfm_reader *, const struct sfm_extension_record *,
377 struct dictionary *);
379 /* Frees the strings inside INFO. */
381 any_read_info_destroy (struct any_read_info *info)
385 free (info->creation_date);
386 free (info->creation_time);
387 free (info->product);
388 free (info->product_ext);
392 /* Tries to open FH for reading as a system file. Returns an sfm_reader if
393 successful, otherwise NULL. */
394 static struct any_reader *
395 sfm_open (struct file_handle *fh)
397 size_t allocated_mrsets = 0;
398 struct sfm_reader *r;
400 /* Create and initialize reader. */
401 r = xzalloc (sizeof *r);
402 r->any_reader.klass = &sys_file_reader_class;
403 r->pool = pool_create ();
404 pool_register (r->pool, free, r);
406 r->opcode_idx = sizeof r->opcodes;
407 ll_init (&r->var_attrs);
409 /* TRANSLATORS: this fragment will be interpolated into
410 messages in fh_lock() that identify types of files. */
411 r->lock = fh_lock (fh, FH_REF_FILE, N_("system file"), FH_ACC_READ, false);
415 r->file = fn_open (fh, "rb");
418 msg (ME, _("Error opening `%s' for reading as a system file: %s."),
419 fh_get_file_name (r->fh), strerror (errno));
423 if (!read_dictionary (r))
426 if (r->extensions[EXT_MRSETS] != NULL)
427 parse_mrsets (r, r->extensions[EXT_MRSETS], &allocated_mrsets);
429 if (r->extensions[EXT_MRSETS2] != NULL)
430 parse_mrsets (r, r->extensions[EXT_MRSETS2], &allocated_mrsets);
432 return &r->any_reader;
436 sfm_close (&r->any_reader);
441 read_dictionary (struct sfm_reader *r)
443 size_t allocated_vars;
444 size_t allocated_labels;
446 if (!read_header (r, &r->info, &r->header))
450 allocated_labels = 0;
455 if (!read_int (r, &type))
459 if (!read_record (r, type, &allocated_vars, &allocated_labels))
463 if (!skip_bytes (r, 4))
466 if (r->compression == ANY_COMP_ZLIB && !read_zheader (r))
473 read_record (struct sfm_reader *r, int type,
474 size_t *allocated_vars, size_t *allocated_labels)
481 if (r->n_vars >= *allocated_vars)
482 r->vars = pool_2nrealloc (r->pool, r->vars, allocated_vars,
484 return read_variable_record (r, &r->vars[r->n_vars++]);
487 if (r->n_labels >= *allocated_labels)
488 r->labels = pool_2nrealloc (r->pool, r->labels, allocated_labels,
490 return read_value_label_record (r, &r->labels[r->n_labels++]);
493 /* A Type 4 record is always immediately after a type 3 record,
494 so the code for type 3 records reads the type 4 record too. */
495 sys_error (r, r->pos, _("Misplaced type 4 record."));
499 if (r->document != NULL)
501 sys_error (r, r->pos, _("Duplicate type 6 (document) record."));
504 return read_document_record (r);
507 if (!read_int (r, &subtype))
510 || subtype >= sizeof r->extensions / sizeof *r->extensions)
513 _("Unrecognized record type 7, subtype %d. For help, "
514 "please send this file to %s and mention that you were "
516 subtype, PACKAGE_BUGREPORT, PACKAGE_STRING);
517 return skip_extension_record (r, subtype);
519 else if (subtype == 18)
521 /* System files written by "Stata 14.1/-savespss- 1.77 by S.Radyakin"
522 put each variable attribute into a separate record with subtype
523 18. I'm surprised that SPSS puts up with this. */
524 struct sfm_extension_record *ext;
525 bool ok = read_extension_record (r, subtype, &ext);
527 ll_push_tail (&r->var_attrs, &ext->ll);
530 else if (r->extensions[subtype] != NULL)
533 _("Record type 7, subtype %d found here has the same "
534 "type as the record found near offset 0x%llx. For "
535 "help, please send this file to %s and mention that "
536 "you were using %s."),
537 subtype, (long long int) r->extensions[subtype]->pos,
538 PACKAGE_BUGREPORT, PACKAGE_STRING);
539 return skip_extension_record (r, subtype);
542 return read_extension_record (r, subtype, &r->extensions[subtype]);
545 sys_error (r, r->pos, _("Unrecognized record type %d."), type);
552 /* Returns the character encoding obtained from R, or a null pointer if R
553 doesn't have an indication of its character encoding. */
555 sfm_get_encoding (const struct sfm_reader *r)
557 /* The EXT_ENCODING record is the best way to determine dictionary
559 if (r->extensions[EXT_ENCODING])
560 return r->extensions[EXT_ENCODING]->data;
562 /* But EXT_INTEGER is better than nothing as a fallback. */
563 if (r->extensions[EXT_INTEGER])
565 int codepage = parse_int (r, r->extensions[EXT_INTEGER]->data, 7 * 4);
566 const char *encoding;
575 /* These ostensibly mean "7-bit ASCII" and "8-bit ASCII"[sic]
576 respectively. However, many files have character code 2 but data
577 which are clearly not ASCII. Therefore, ignore these values. */
584 encoding = sys_get_encoding_from_codepage (codepage);
585 if (encoding != NULL)
591 /* If the file magic number is EBCDIC then its character data is too. */
592 if (!strcmp (r->header.magic, EBCDIC_MAGIC))
598 struct get_strings_aux
609 add_string__ (struct get_strings_aux *aux,
610 const char *string, bool id, char *title)
612 if (aux->n >= aux->allocated)
614 aux->allocated = 2 * (aux->allocated + 1);
615 aux->titles = pool_realloc (aux->pool, aux->titles,
616 aux->allocated * sizeof *aux->titles);
617 aux->strings = pool_realloc (aux->pool, aux->strings,
618 aux->allocated * sizeof *aux->strings);
619 aux->ids = pool_realloc (aux->pool, aux->ids,
620 aux->allocated * sizeof *aux->ids);
623 aux->titles[aux->n] = title;
624 aux->strings[aux->n] = pool_strdup (aux->pool, string);
625 aux->ids[aux->n] = id;
629 static void PRINTF_FORMAT (3, 4)
630 add_string (struct get_strings_aux *aux,
631 const char *string, const char *title, ...)
635 va_start (args, title);
636 add_string__ (aux, string, false, pool_vasprintf (aux->pool, title, args));
640 static void PRINTF_FORMAT (3, 4)
641 add_id (struct get_strings_aux *aux, const char *id, const char *title, ...)
645 va_start (args, title);
646 add_string__ (aux, id, true, pool_vasprintf (aux->pool, title, args));
650 /* Retrieves significant string data from R in its raw format, to allow the
651 caller to try to detect the encoding in use.
653 Returns the number of strings retrieved N. Sets each of *TITLESP, *IDSP,
654 and *STRINGSP to an array of N elements allocated from POOL. For each I in
655 0...N-1, UTF-8 string *TITLESP[I] describes *STRINGSP[I], which is in
656 whatever encoding system file R uses. *IDS[I] is true if *STRINGSP[I] must
657 be a valid PSPP language identifier, false if *STRINGSP[I] is free-form
660 sfm_get_strings (const struct any_reader *r_, struct pool *pool,
661 char ***titlesp, bool **idsp, char ***stringsp)
663 struct sfm_reader *r = sfm_reader_cast (r_);
664 const struct sfm_mrset *mrset;
665 struct get_strings_aux aux;
677 for (i = 0; i < r->n_vars; i++)
678 if (r->vars[i].width != -1)
679 add_id (&aux, r->vars[i].name, _("Variable %zu"), ++var_idx);
682 for (i = 0; i < r->n_vars; i++)
683 if (r->vars[i].width != -1)
686 if (r->vars[i].label)
687 add_string (&aux, r->vars[i].label, _("Variable %zu Label"),
692 for (i = 0; i < r->n_labels; i++)
693 for (j = 0; j < r->labels[i].n_labels; j++)
694 add_string (&aux, r->labels[i].labels[j].label,
695 _("Value Label %zu"), k++);
697 add_string (&aux, r->header.creation_date, _("Creation Date"));
698 add_string (&aux, r->header.creation_time, _("Creation Time"));
699 add_string (&aux, r->header.eye_catcher, _("Product"));
700 add_string (&aux, r->header.file_label, _("File Label"));
702 if (r->extensions[EXT_PRODUCT_INFO])
703 add_string (&aux, r->extensions[EXT_PRODUCT_INFO]->data,
704 _("Extra Product Info"));
710 for (i = 0; i < r->document->n_lines; i++)
714 memcpy (line, r->document->documents + i * 80, 80);
717 add_string (&aux, line, _("Document Line %zu"), i + 1);
721 for (mrset = r->mrsets; mrset < &r->mrsets[r->n_mrsets]; mrset++)
723 size_t mrset_idx = mrset - r->mrsets + 1;
725 add_id (&aux, mrset->name, _("MRSET %zu"), mrset_idx);
727 add_string (&aux, mrset->label, _("MRSET %zu Label"), mrset_idx);
729 /* Skip the variables because they ought to be duplicates. */
732 add_string (&aux, mrset->counted, _("MRSET %zu Counted Value"),
736 /* data file attributes */
737 /* variable attributes */
739 /* long string value labels */
740 /* long string missing values */
742 *titlesp = aux.titles;
744 *stringsp = aux.strings;
748 /* Decodes the dictionary read from R, saving it into into *DICT. Character
749 strings in R are decoded using ENCODING, or an encoding obtained from R if
750 ENCODING is null, or the locale encoding if R specifies no encoding.
752 If INFOP is non-null, then it receives additional info about the system
753 file, which the caller must eventually free with any_read_info_destroy()
754 when it is no longer needed.
756 This function consumes R. The caller must use it again later, even to
757 destroy it with sfm_close(). */
758 static struct casereader *
759 sfm_decode (struct any_reader *r_, const char *encoding,
760 struct dictionary **dictp, struct any_read_info *infop)
762 struct sfm_reader *r = sfm_reader_cast (r_);
763 struct dictionary *dict;
766 if (encoding == NULL)
768 encoding = sfm_get_encoding (r);
769 if (encoding == NULL)
771 sys_warn (r, -1, _("This system file does not indicate its own "
772 "character encoding. Using default encoding "
773 "%s. For best results, specify an encoding "
774 "explicitly. Use SYSFILE INFO with "
775 "ENCODING=\"DETECT\" to analyze the possible "
778 encoding = locale_charset ();
782 dict = dict_create (encoding);
783 r->encoding = dict_get_encoding (dict);
785 /* These records don't use variables at all. */
786 if (r->document != NULL)
787 parse_document (dict, r->document);
789 if (r->extensions[EXT_INTEGER] != NULL
790 && !parse_machine_integer_info (r, r->extensions[EXT_INTEGER], &r->info))
793 if (r->extensions[EXT_FLOAT] != NULL)
794 parse_machine_float_info (r, r->extensions[EXT_FLOAT]);
796 if (r->extensions[EXT_PRODUCT_INFO] != NULL)
797 parse_extra_product_info (r, r->extensions[EXT_PRODUCT_INFO], &r->info);
799 if (r->extensions[EXT_FILE_ATTRS] != NULL)
800 parse_data_file_attributes (r, r->extensions[EXT_FILE_ATTRS], dict);
802 parse_header (r, &r->header, &r->info, dict);
804 /* Parse the variable records, the basis of almost everything else. */
805 if (!parse_variable_records (r, dict, r->vars, r->n_vars))
808 /* Parse value labels and the weight variable immediately after the variable
809 records. These records use indexes into var_recs[], so we must parse them
810 before those indexes become invalidated by very long string variables. */
811 for (i = 0; i < r->n_labels; i++)
812 if (!parse_value_labels (r, dict, r->vars, r->n_vars, &r->labels[i]))
814 if (r->header.weight_idx != 0)
816 struct variable *weight_var;
818 weight_var = lookup_var_by_index (r, 76, r->vars, r->n_vars,
819 r->header.weight_idx);
820 if (weight_var != NULL)
822 if (var_is_numeric (weight_var))
823 dict_set_weight (dict, weight_var);
825 sys_warn (r, -1, _("Ignoring string variable `%s' set "
826 "as weighting variable."),
827 var_get_name (weight_var));
831 if (r->extensions[EXT_DISPLAY] != NULL)
832 parse_display_parameters (r, r->extensions[EXT_DISPLAY], dict);
834 /* The following records use short names, so they need to be parsed before
835 parse_long_var_name_map() changes short names to long names. */
836 decode_mrsets (r, dict);
838 if (r->extensions[EXT_LONG_STRINGS] != NULL
839 && !parse_long_string_map (r, r->extensions[EXT_LONG_STRINGS], dict))
842 /* Now rename variables to their long names. */
843 parse_long_var_name_map (r, r->extensions[EXT_LONG_NAMES], dict);
845 /* The following records use long names, so they need to follow renaming. */
846 if (!ll_is_empty (&r->var_attrs))
848 struct sfm_extension_record *ext;
849 ll_for_each (ext, struct sfm_extension_record, ll, &r->var_attrs)
850 parse_variable_attributes (r, ext, dict);
852 /* Roles use the $@Role attribute. */
853 assign_variable_roles (r, dict);
855 if (r->extensions[EXT_LONG_LABELS] != NULL)
856 parse_long_string_value_labels (r, r->extensions[EXT_LONG_LABELS], dict);
857 if (r->extensions[EXT_LONG_MISSING] != NULL)
858 parse_long_string_missing_values (r, r->extensions[EXT_LONG_MISSING],
861 /* Warn if the actual amount of data per case differs from the
862 amount that the header claims. SPSS version 13 gets this
863 wrong when very long strings are involved, so don't warn in
865 if (r->header.nominal_case_size > 0
866 && r->header.nominal_case_size != r->n_vars
867 && r->info.version_major != 13)
868 sys_warn (r, -1, _("File header claims %d variable positions but "
869 "%zu were read from file."),
870 r->header.nominal_case_size, r->n_vars);
872 /* Create an index of dictionary variable widths for
873 sfm_read_case to use. We cannot use the `struct variable's
874 from the dictionary we created, because the caller owns the
875 dictionary and may destroy or modify its variables. */
876 sfm_dictionary_to_sfm_vars (dict, &r->sfm_vars, &r->sfm_var_cnt);
877 pool_register (r->pool, free, r->sfm_vars);
878 r->proto = caseproto_ref_pool (dict_get_proto (dict), r->pool);
884 memset (&r->info, 0, sizeof r->info);
887 return casereader_create_sequential
889 r->case_cnt == -1 ? CASENUMBER_MAX: r->case_cnt,
890 &sys_file_casereader_class, r);
899 /* Closes R, which should have been returned by sfm_open() but not already
900 closed with sfm_decode() or this function.
901 Returns true if an I/O error has occurred on READER, false
904 sfm_close (struct any_reader *r_)
906 struct sfm_reader *r = sfm_reader_cast (r_);
911 if (fn_close (r->fh, r->file) == EOF)
913 msg (ME, _("Error closing system file `%s': %s."),
914 fh_get_file_name (r->fh), strerror (errno));
920 any_read_info_destroy (&r->info);
925 pool_destroy (r->pool);
930 /* Destroys READER. */
932 sys_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
934 struct sfm_reader *r = r_;
935 sfm_close (&r->any_reader);
938 /* Detects whether FILE is an SPSS system file. Returns 1 if so, 0 if not, and
939 a negative errno value if there is an error reading FILE. */
941 sfm_detect (FILE *file)
945 if (fseek (file, 0, SEEK_SET) != 0)
947 if (fread (magic, 4, 1, file) != 1)
948 return ferror (file) ? -errno : 0;
951 return (!strcmp (ASCII_MAGIC, magic)
952 || !strcmp (ASCII_ZMAGIC, magic)
953 || !strcmp (EBCDIC_MAGIC, magic));
956 /* Reads the global header of the system file. Initializes *HEADER and *INFO,
957 except for the string fields in *INFO, which parse_header() will initialize
958 later once the file's encoding is known. */
960 read_header (struct sfm_reader *r, struct any_read_info *info,
961 struct sfm_header_record *header)
963 uint8_t raw_layout_code[4];
968 if (!read_string (r, header->magic, sizeof header->magic)
969 || !read_string (r, header->eye_catcher, sizeof header->eye_catcher))
971 r->written_by_readstat = strstr (header->eye_catcher,
972 "https://github.com/WizardMac/ReadStat");
974 if (!strcmp (ASCII_MAGIC, header->magic)
975 || !strcmp (EBCDIC_MAGIC, header->magic))
977 else if (!strcmp (ASCII_ZMAGIC, header->magic))
981 sys_error (r, 0, _("This is not an SPSS system file."));
985 /* Identify integer format. */
986 if (!read_bytes (r, raw_layout_code, sizeof raw_layout_code))
988 if ((!integer_identify (2, raw_layout_code, sizeof raw_layout_code,
990 && !integer_identify (3, raw_layout_code, sizeof raw_layout_code,
992 || (r->integer_format != INTEGER_MSB_FIRST
993 && r->integer_format != INTEGER_LSB_FIRST))
995 sys_error (r, 64, _("This is not an SPSS system file."));
999 if (!read_int (r, &header->nominal_case_size))
1002 if (header->nominal_case_size < 0
1003 || header->nominal_case_size > INT_MAX / 16)
1004 header->nominal_case_size = -1;
1006 if (!read_int (r, &compressed))
1010 if (compressed == 0)
1011 r->compression = ANY_COMP_NONE;
1012 else if (compressed == 1)
1013 r->compression = ANY_COMP_SIMPLE;
1014 else if (compressed != 0)
1016 sys_error (r, 0, "System file header has invalid compression "
1017 "value %d.", compressed);
1023 if (compressed == 2)
1024 r->compression = ANY_COMP_ZLIB;
1027 sys_error (r, 0, "ZLIB-compressed system file header has invalid "
1028 "compression value %d.", compressed);
1033 if (!read_int (r, &header->weight_idx))
1036 if (!read_int (r, &r->case_cnt))
1038 if ( r->case_cnt > INT_MAX / 2)
1041 /* Identify floating-point format and obtain compression bias. */
1042 if (!read_bytes (r, raw_bias, sizeof raw_bias))
1044 if (float_identify (100.0, raw_bias, sizeof raw_bias, &r->float_format) == 0)
1046 uint8_t zero_bias[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
1048 if (memcmp (raw_bias, zero_bias, 8))
1049 sys_warn (r, r->pos - 8,
1050 _("Compression bias is not the usual "
1051 "value of 100, or system file uses unrecognized "
1052 "floating-point format."));
1055 /* Some software is known to write all-zeros to this
1056 field. Such software also writes floating-point
1057 numbers in the format that we expect by default
1058 (it seems that all software most likely does, in
1059 reality), so don't warn in this case. */
1062 if (r->integer_format == INTEGER_MSB_FIRST)
1063 r->float_format = FLOAT_IEEE_DOUBLE_BE;
1065 r->float_format = FLOAT_IEEE_DOUBLE_LE;
1067 float_convert (r->float_format, raw_bias, FLOAT_NATIVE_DOUBLE, &r->bias);
1069 if (!read_string (r, header->creation_date, sizeof header->creation_date)
1070 || !read_string (r, header->creation_time, sizeof header->creation_time)
1071 || !read_string (r, header->file_label, sizeof header->file_label)
1072 || !skip_bytes (r, 3))
1075 info->integer_format = r->integer_format;
1076 info->float_format = r->float_format;
1077 info->compression = r->compression;
1078 info->case_cnt = r->case_cnt;
1083 /* Reads a variable (type 2) record from R into RECORD. */
1085 read_variable_record (struct sfm_reader *r, struct sfm_var_record *record)
1087 int has_variable_label;
1089 memset (record, 0, sizeof *record);
1091 record->pos = r->pos;
1092 if (!read_int (r, &record->width)
1093 || !read_int (r, &has_variable_label)
1094 || !read_int (r, &record->missing_value_code)
1095 || !read_int (r, &record->print_format)
1096 || !read_int (r, &record->write_format)
1097 || !read_string (r, record->name, sizeof record->name))
1100 if (has_variable_label == 1)
1102 enum { MAX_LABEL_LEN = 65536 };
1103 unsigned int len, read_len;
1105 if (!read_uint (r, &len))
1108 /* Read up to MAX_LABEL_LEN bytes of label. */
1109 read_len = MIN (MAX_LABEL_LEN, len);
1110 record->label = pool_malloc (r->pool, read_len + 1);
1111 if (!read_string (r, record->label, read_len + 1))
1114 /* Skip unread label bytes. */
1115 if (!skip_bytes (r, len - read_len))
1118 /* Skip label padding up to multiple of 4 bytes. */
1119 if (!skip_bytes (r, ROUND_UP (len, 4) - len))
1122 else if (has_variable_label != 0)
1124 sys_error (r, record->pos,
1125 _("Variable label indicator field is not 0 or 1."));
1129 /* Set missing values. */
1130 if (record->missing_value_code != 0)
1132 int code = record->missing_value_code;
1133 if (record->width == 0)
1135 if (code < -3 || code > 3 || code == -1)
1137 sys_error (r, record->pos,
1138 _("Numeric missing value indicator field is not "
1139 "-3, -2, 0, 1, 2, or 3."));
1145 if (code < 1 || code > 3)
1147 sys_error (r, record->pos,
1148 _("String missing value indicator field is not "
1154 if (!read_bytes (r, record->missing, 8 * abs (code)))
1161 /* Reads value labels from R into RECORD. */
1163 read_value_label_record (struct sfm_reader *r,
1164 struct sfm_value_label_record *record)
1169 /* Read type 3 record. */
1170 record->pos = r->pos;
1171 if (!read_uint (r, &record->n_labels))
1173 if (record->n_labels > UINT_MAX / sizeof *record->labels)
1175 sys_error (r, r->pos - 4, _("Invalid number of labels %u."),
1179 record->labels = pool_nmalloc (r->pool, record->n_labels,
1180 sizeof *record->labels);
1181 for (i = 0; i < record->n_labels; i++)
1183 struct sfm_value_label *label = &record->labels[i];
1184 unsigned char label_len;
1187 if (!read_bytes (r, label->value, sizeof label->value))
1190 /* Read label length. */
1191 if (!read_bytes (r, &label_len, sizeof label_len))
1193 padded_len = ROUND_UP (label_len + 1, 8);
1195 /* Read label, padding. */
1196 label->label = pool_malloc (r->pool, padded_len + 1);
1197 if (!read_bytes (r, label->label, padded_len - 1))
1199 label->label[label_len] = '\0';
1202 /* Read record type of type 4 record. */
1203 if (!read_int (r, &type))
1207 sys_error (r, r->pos - 4,
1208 _("Variable index record (type 4) does not immediately "
1209 "follow value label record (type 3) as it should."));
1213 /* Read number of variables associated with value label from type 4
1215 if (!read_uint (r, &record->n_vars))
1217 if (record->n_vars < 1 || record->n_vars > r->n_vars)
1219 sys_error (r, r->pos - 4,
1220 _("Number of variables associated with a value label (%u) "
1221 "is not between 1 and the number of variables (%zu)."),
1222 record->n_vars, r->n_vars);
1226 record->vars = pool_nmalloc (r->pool, record->n_vars, sizeof *record->vars);
1227 for (i = 0; i < record->n_vars; i++)
1228 if (!read_int (r, &record->vars[i]))
1234 /* Reads a document record from R. Returns true if successful, false on
1237 read_document_record (struct sfm_reader *r)
1240 if (!read_int (r, &n_lines))
1242 else if (n_lines == 0)
1244 else if (n_lines < 0 || n_lines >= INT_MAX / DOC_LINE_LENGTH)
1246 sys_error (r, r->pos,
1247 _("Number of document lines (%d) "
1248 "must be greater than 0 and less than %d."),
1249 n_lines, INT_MAX / DOC_LINE_LENGTH);
1253 struct sfm_document_record *record;
1254 record = pool_malloc (r->pool, sizeof *record);
1255 record->pos = r->pos;
1256 record->n_lines = n_lines;
1257 record->documents = pool_malloc (r->pool, DOC_LINE_LENGTH * n_lines);
1258 if (!read_bytes (r, record->documents, DOC_LINE_LENGTH * n_lines))
1261 r->document = record;
1266 read_extension_record_header (struct sfm_reader *r, int subtype,
1267 struct sfm_extension_record *record)
1269 record->subtype = subtype;
1270 record->pos = r->pos;
1271 if (!read_uint (r, &record->size) || !read_uint (r, &record->count))
1274 /* Check that SIZE * COUNT + 1 doesn't overflow. Adding 1
1275 allows an extra byte for a null terminator, used by some
1276 extension processing routines. */
1277 if (record->size != 0
1278 && xsum (1, xtimes (record->count, record->size)) >= UINT_MAX)
1280 sys_error (r, record->pos, "Record type 7 subtype %d too large.",
1288 /* Reads an extension record from R into RECORD. */
1290 read_extension_record (struct sfm_reader *r, int subtype,
1291 struct sfm_extension_record **recordp)
1293 struct extension_record_type
1300 static const struct extension_record_type types[] =
1302 /* Implemented record types. */
1303 { EXT_INTEGER, 4, 8 },
1304 { EXT_FLOAT, 8, 3 },
1305 { EXT_MRSETS, 1, 0 },
1306 { EXT_PRODUCT_INFO, 1, 0 },
1307 { EXT_DISPLAY, 4, 0 },
1308 { EXT_LONG_NAMES, 1, 0 },
1309 { EXT_LONG_STRINGS, 1, 0 },
1310 { EXT_NCASES, 8, 2 },
1311 { EXT_FILE_ATTRS, 1, 0 },
1312 { EXT_VAR_ATTRS, 1, 0 },
1313 { EXT_MRSETS2, 1, 0 },
1314 { EXT_ENCODING, 1, 0 },
1315 { EXT_LONG_LABELS, 1, 0 },
1316 { EXT_LONG_MISSING, 1, 0 },
1318 /* Ignored record types. */
1319 { EXT_VAR_SETS, 0, 0 },
1321 { EXT_DATA_ENTRY, 0, 0 },
1322 { EXT_DATAVIEW, 0, 0 },
1325 const struct extension_record_type *type;
1326 struct sfm_extension_record *record;
1330 record = pool_malloc (r->pool, sizeof *record);
1331 if (!read_extension_record_header (r, subtype, record))
1333 n_bytes = record->count * record->size;
1335 for (type = types; type < &types[sizeof types / sizeof *types]; type++)
1336 if (subtype == type->subtype)
1338 if (type->size > 0 && record->size != type->size)
1339 sys_warn (r, record->pos,
1340 _("Record type 7, subtype %d has bad size %u "
1341 "(expected %d)."), subtype, record->size, type->size);
1342 else if (type->count > 0 && record->count != type->count)
1343 sys_warn (r, record->pos,
1344 _("Record type 7, subtype %d has bad count %u "
1345 "(expected %d)."), subtype, record->count, type->count);
1346 else if (type->count == 0 && type->size == 0)
1348 /* Ignore this record. */
1352 char *data = pool_malloc (r->pool, n_bytes + 1);
1353 data[n_bytes] = '\0';
1355 record->data = data;
1356 if (!read_bytes (r, record->data, n_bytes))
1365 sys_warn (r, record->pos,
1366 _("Unrecognized record type 7, subtype %d. For help, please "
1367 "send this file to %s and mention that you were using %s."),
1368 subtype, PACKAGE_BUGREPORT, PACKAGE_STRING);
1371 return skip_bytes (r, n_bytes);
1375 skip_extension_record (struct sfm_reader *r, int subtype)
1377 struct sfm_extension_record record;
1379 return (read_extension_record_header (r, subtype, &record)
1380 && skip_bytes (r, record.count * record.size));
1384 parse_header (struct sfm_reader *r, const struct sfm_header_record *header,
1385 struct any_read_info *info, struct dictionary *dict)
1387 const char *dict_encoding = dict_get_encoding (dict);
1388 struct substring product;
1389 struct substring label;
1392 /* Convert file label to UTF-8 and put it into DICT. */
1393 label = recode_substring_pool ("UTF-8", dict_encoding,
1394 ss_cstr (header->file_label), r->pool);
1395 ss_trim (&label, ss_cstr (" "));
1396 label.string[label.length] = '\0';
1397 fixed_label = fix_line_ends (label.string);
1398 dict_set_label (dict, fixed_label);
1401 /* Put creation date and time in UTF-8 into INFO. */
1402 info->creation_date = recode_string ("UTF-8", dict_encoding,
1403 header->creation_date, -1);
1404 info->creation_time = recode_string ("UTF-8", dict_encoding,
1405 header->creation_time, -1);
1407 /* Put product name into INFO, dropping eye-catcher string if present. */
1408 product = recode_substring_pool ("UTF-8", dict_encoding,
1409 ss_cstr (header->eye_catcher), r->pool);
1410 ss_match_string (&product, ss_cstr ("@(#) SPSS DATA FILE"));
1411 ss_trim (&product, ss_cstr (" "));
1412 info->product = ss_xstrdup (product);
1415 /* Reads a variable (type 2) record from R and adds the
1416 corresponding variable to DICT.
1417 Also skips past additional variable records for long string
1420 parse_variable_records (struct sfm_reader *r, struct dictionary *dict,
1421 struct sfm_var_record *var_recs, size_t n_var_recs)
1423 const char *dict_encoding = dict_get_encoding (dict);
1424 struct sfm_var_record *rec;
1427 for (rec = var_recs; rec < &var_recs[n_var_recs]; )
1429 struct variable *var;
1434 name = recode_string_pool ("UTF-8", dict_encoding,
1435 rec->name, -1, r->pool);
1436 name[strcspn (name, " ")] = '\0';
1438 if (!dict_id_is_valid (dict, name, false)
1439 || name[0] == '$' || name[0] == '#')
1441 sys_error (r, rec->pos, _("Invalid variable name `%s'."), name);
1445 if (rec->width < 0 || rec->width > 255)
1447 sys_error (r, rec->pos,
1448 _("Bad width %d for variable %s."), rec->width, name);
1452 var = rec->var = dict_create_var (dict, name, rec->width);
1455 char *new_name = dict_make_unique_var_name (dict, NULL, NULL);
1456 sys_warn (r, rec->pos, _("Renaming variable with duplicate name "
1459 var = rec->var = dict_create_var_assert (dict, new_name, rec->width);
1460 var_set_short_name (var, 0, new_name);
1464 /* Set the short name the same as the long name (even if we renamed
1466 var_set_short_name (var, 0, var_get_name (var));
1468 /* Get variable label, if any. */
1473 utf8_label = recode_string_pool ("UTF-8", dict_encoding,
1474 rec->label, -1, r->pool);
1475 var_set_label (var, utf8_label);
1478 /* Set missing values. */
1479 if (rec->missing_value_code != 0)
1481 int width = var_get_width (var);
1482 struct missing_values mv;
1484 mv_init_pool (r->pool, &mv, width);
1485 if (var_is_numeric (var))
1487 bool has_range = rec->missing_value_code < 0;
1488 int n_discrete = (has_range
1489 ? rec->missing_value_code == -3
1490 : rec->missing_value_code);
1495 double low = parse_float (r, rec->missing, 0);
1496 double high = parse_float (r, rec->missing, 8);
1498 /* Deal with SPSS 21 change in representation. */
1502 mv_add_range (&mv, low, high);
1506 for (i = 0; i < n_discrete; i++)
1508 mv_add_num (&mv, parse_float (r, rec->missing, ofs));
1513 for (i = 0; i < rec->missing_value_code; i++)
1514 mv_add_str (&mv, rec->missing + 8 * i, MIN (width, 8));
1515 var_set_missing_values (var, &mv);
1519 parse_format_spec (r, rec->pos + 12, rec->print_format,
1520 PRINT_FORMAT, var, &n_warnings);
1521 parse_format_spec (r, rec->pos + 16, rec->write_format,
1522 WRITE_FORMAT, var, &n_warnings);
1524 /* Account for values.
1525 Skip long string continuation records, if any. */
1526 n_values = rec->width == 0 ? 1 : DIV_RND_UP (rec->width, 8);
1527 for (i = 1; i < n_values; i++)
1528 if (i + (rec - var_recs) >= n_var_recs || rec[i].width != -1)
1530 sys_error (r, rec->pos, _("Missing string continuation record."));
1539 /* Translates the format spec from sysfile format to internal
1542 parse_format_spec (struct sfm_reader *r, off_t pos, unsigned int format,
1543 enum which_format which, struct variable *v,
1546 const int max_warnings = 8;
1547 uint8_t raw_type = format >> 16;
1548 uint8_t w = format >> 8;
1557 ok = (fmt_from_io (raw_type, &f.type)
1558 && fmt_check_output (&f)
1559 && fmt_check_width_compat (&f, var_get_width (v)));
1564 if (which == PRINT_FORMAT)
1565 var_set_print_format (v, &f);
1567 var_set_write_format (v, &f);
1569 else if (format == 0)
1571 /* Actually observed in the wild. No point in warning about it. */
1573 else if (++*n_warnings <= max_warnings)
1575 if (which == PRINT_FORMAT)
1576 sys_warn (r, pos, _("Variable %s with width %d has invalid print "
1578 var_get_name (v), var_get_width (v), format);
1580 sys_warn (r, pos, _("Variable %s with width %d has invalid write "
1582 var_get_name (v), var_get_width (v), format);
1584 if (*n_warnings == max_warnings)
1585 sys_warn (r, -1, _("Suppressing further invalid format warnings."));
1590 parse_document (struct dictionary *dict, struct sfm_document_record *record)
1594 for (p = record->documents;
1595 p < record->documents + DOC_LINE_LENGTH * record->n_lines;
1596 p += DOC_LINE_LENGTH)
1598 struct substring line;
1600 line = recode_substring_pool ("UTF-8", dict_get_encoding (dict),
1601 ss_buffer (p, DOC_LINE_LENGTH), NULL);
1602 ss_rtrim (&line, ss_cstr (" "));
1603 line.string[line.length] = '\0';
1605 dict_add_document_line (dict, line.string, false);
1611 /* Parses record type 7, subtype 3. */
1613 parse_machine_integer_info (struct sfm_reader *r,
1614 const struct sfm_extension_record *record,
1615 struct any_read_info *info)
1617 int float_representation, expected_float_format;
1618 int integer_representation, expected_integer_format;
1620 /* Save version info. */
1621 info->version_major = parse_int (r, record->data, 0);
1622 info->version_minor = parse_int (r, record->data, 4);
1623 info->version_revision = parse_int (r, record->data, 8);
1625 /* Check floating point format. */
1626 float_representation = parse_int (r, record->data, 16);
1627 if (r->float_format == FLOAT_IEEE_DOUBLE_BE
1628 || r->float_format == FLOAT_IEEE_DOUBLE_LE)
1629 expected_float_format = 1;
1630 else if (r->float_format == FLOAT_Z_LONG)
1631 expected_float_format = 2;
1632 else if (r->float_format == FLOAT_VAX_G || r->float_format == FLOAT_VAX_D)
1633 expected_float_format = 3;
1636 if (float_representation != expected_float_format)
1638 sys_error (r, record->pos,
1639 _("Floating-point representation indicated by "
1640 "system file (%d) differs from expected (%d)."),
1641 float_representation, expected_float_format);
1645 /* Check integer format. */
1646 integer_representation = parse_int (r, record->data, 24);
1647 if (r->integer_format == INTEGER_MSB_FIRST)
1648 expected_integer_format = 1;
1649 else if (r->integer_format == INTEGER_LSB_FIRST)
1650 expected_integer_format = 2;
1653 if (integer_representation != expected_integer_format)
1654 sys_warn (r, record->pos,
1655 _("Integer format indicated by system file (%d) "
1656 "differs from expected (%d)."),
1657 integer_representation, expected_integer_format);
1662 /* Parses record type 7, subtype 4. */
1664 parse_machine_float_info (struct sfm_reader *r,
1665 const struct sfm_extension_record *record)
1667 double sysmis = parse_float (r, record->data, 0);
1668 double highest = parse_float (r, record->data, 8);
1669 double lowest = parse_float (r, record->data, 16);
1671 if (sysmis != SYSMIS)
1672 sys_warn (r, record->pos,
1673 _("File specifies unexpected value %g (%a) as %s, "
1674 "instead of %g (%a)."),
1675 sysmis, sysmis, "SYSMIS", SYSMIS, SYSMIS);
1677 if (highest != HIGHEST)
1678 sys_warn (r, record->pos,
1679 _("File specifies unexpected value %g (%a) as %s, "
1680 "instead of %g (%a)."),
1681 highest, highest, "HIGHEST", HIGHEST, HIGHEST);
1683 /* SPSS before version 21 used a unique value just bigger than SYSMIS as
1684 LOWEST. SPSS 21 uses SYSMIS for LOWEST, which is OK because LOWEST only
1685 appears in a context (missing values) where SYSMIS cannot. */
1686 if (lowest != LOWEST && lowest != SYSMIS)
1687 sys_warn (r, record->pos,
1688 _("File specifies unexpected value %g (%a) as %s, "
1689 "instead of %g (%a) or %g (%a)."),
1690 lowest, lowest, "LOWEST", LOWEST, LOWEST, SYSMIS, SYSMIS);
1693 /* Parses record type 7, subtype 10. */
1695 parse_extra_product_info (struct sfm_reader *r,
1696 const struct sfm_extension_record *record,
1697 struct any_read_info *info)
1699 struct text_record *text;
1701 text = open_text_record (r, record, true);
1702 info->product_ext = fix_line_ends (text_get_all (text));
1703 close_text_record (r, text);
1706 /* Parses record type 7, subtype 7 or 19. */
1708 parse_mrsets (struct sfm_reader *r, const struct sfm_extension_record *record,
1709 size_t *allocated_mrsets)
1711 struct text_record *text;
1713 text = open_text_record (r, record, false);
1716 struct sfm_mrset *mrset;
1717 size_t allocated_vars;
1720 /* Skip extra line feeds if present. */
1721 while (text_match (text, '\n'))
1724 if (r->n_mrsets >= *allocated_mrsets)
1725 r->mrsets = pool_2nrealloc (r->pool, r->mrsets, allocated_mrsets,
1727 mrset = &r->mrsets[r->n_mrsets];
1728 memset(mrset, 0, sizeof *mrset);
1730 mrset->name = text_get_token (text, ss_cstr ("="), NULL);
1731 if (mrset->name == NULL)
1734 if (text_match (text, 'C'))
1736 mrset->type = MRSET_MC;
1737 if (!text_match (text, ' '))
1739 sys_warn (r, record->pos,
1740 _("Missing space following `%c' at offset %zu "
1741 "in MRSETS record."), 'C', text_pos (text));
1745 else if (text_match (text, 'D'))
1747 mrset->type = MRSET_MD;
1748 mrset->cat_source = MRSET_VARLABELS;
1750 else if (text_match (text, 'E'))
1754 mrset->type = MRSET_MD;
1755 mrset->cat_source = MRSET_COUNTEDVALUES;
1756 if (!text_match (text, ' '))
1758 sys_warn (r, record->pos,
1759 _("Missing space following `%c' at offset %zu "
1760 "in MRSETS record."), 'E', text_pos (text));
1764 number = text_get_token (text, ss_cstr (" "), NULL);
1766 sys_warn (r, record->pos,
1767 _("Missing label source value "
1768 "following `E' at offset %zu in MRSETS record."),
1770 else if (!strcmp (number, "11"))
1771 mrset->label_from_var_label = true;
1772 else if (strcmp (number, "1"))
1773 sys_warn (r, record->pos,
1774 _("Unexpected label source value following `E' "
1775 "at offset %zu in MRSETS record."),
1780 sys_warn (r, record->pos,
1781 _("Missing `C', `D', or `E' at offset %zu "
1782 "in MRSETS record."),
1787 if (mrset->type == MRSET_MD)
1789 mrset->counted = text_parse_counted_string (r, text);
1790 if (mrset->counted == NULL)
1794 mrset->label = text_parse_counted_string (r, text);
1795 if (mrset->label == NULL)
1803 var = text_get_token (text, ss_cstr (" \n"), &delimiter);
1806 if (delimiter != '\n')
1807 sys_warn (r, record->pos,
1808 _("Missing new-line parsing variable names "
1809 "at offset %zu in MRSETS record."),
1814 if (mrset->n_vars >= allocated_vars)
1815 mrset->vars = pool_2nrealloc (r->pool, mrset->vars,
1817 sizeof *mrset->vars);
1818 mrset->vars[mrset->n_vars++] = var;
1820 while (delimiter != '\n');
1824 close_text_record (r, text);
1828 decode_mrsets (struct sfm_reader *r, struct dictionary *dict)
1830 const struct sfm_mrset *s;
1832 for (s = r->mrsets; s < &r->mrsets[r->n_mrsets]; s++)
1834 struct stringi_set var_names;
1835 struct mrset *mrset;
1840 name = recode_string ("UTF-8", r->encoding, s->name, -1);
1841 if (!mrset_is_valid_name (name, dict_get_encoding (dict), false))
1843 sys_warn (r, -1, _("Invalid multiple response set name `%s'."),
1849 mrset = xzalloc (sizeof *mrset);
1851 mrset->type = s->type;
1852 mrset->cat_source = s->cat_source;
1853 mrset->label_from_var_label = s->label_from_var_label;
1854 if (s->label[0] != '\0')
1855 mrset->label = recode_string ("UTF-8", r->encoding, s->label, -1);
1857 stringi_set_init (&var_names);
1858 mrset->vars = xmalloc (s->n_vars * sizeof *mrset->vars);
1860 for (i = 0; i < s->n_vars; i++)
1862 struct variable *var;
1865 var_name = recode_string ("UTF-8", r->encoding, s->vars[i], -1);
1867 var = dict_lookup_var (dict, var_name);
1873 if (!stringi_set_insert (&var_names, var_name))
1876 _("MRSET %s contains duplicate variable name %s."),
1877 mrset->name, var_name);
1883 if (mrset->label == NULL && mrset->label_from_var_label
1884 && var_has_label (var))
1885 mrset->label = xstrdup (var_get_label (var));
1888 && var_get_type (var) != var_get_type (mrset->vars[0]))
1891 _("MRSET %s contains both string and "
1892 "numeric variables."), mrset->name);
1895 width = MIN (width, var_get_width (var));
1897 mrset->vars[mrset->n_vars++] = var;
1900 if (mrset->n_vars < 2)
1902 if (mrset->n_vars == 0)
1903 sys_warn (r, -1, _("MRSET %s has no variables."), mrset->name);
1905 sys_warn (r, -1, _("MRSET %s has only one variable."),
1907 mrset_destroy (mrset);
1908 stringi_set_destroy (&var_names);
1912 if (mrset->type == MRSET_MD)
1914 mrset->width = width;
1915 value_init (&mrset->counted, width);
1917 mrset->counted.f = c_strtod (s->counted, NULL);
1919 value_copy_str_rpad (&mrset->counted, width,
1920 (const uint8_t *) s->counted, ' ');
1923 dict_add_mrset (dict, mrset);
1924 stringi_set_destroy (&var_names);
1928 /* Read record type 7, subtype 11, which specifies how variables
1929 should be displayed in GUI environments. */
1931 parse_display_parameters (struct sfm_reader *r,
1932 const struct sfm_extension_record *record,
1933 struct dictionary *dict)
1935 bool includes_width;
1936 bool warned = false;
1941 n_vars = dict_get_var_cnt (dict);
1942 if (record->count == 3 * n_vars)
1943 includes_width = true;
1944 else if (record->count == 2 * n_vars)
1945 includes_width = false;
1948 sys_warn (r, record->pos,
1949 _("Extension 11 has bad count %u (for %zu variables)."),
1950 record->count, n_vars);
1955 for (i = 0; i < n_vars; ++i)
1957 struct variable *v = dict_get_var (dict, i);
1958 int measure, width, align;
1960 measure = parse_int (r, record->data, ofs);
1965 width = parse_int (r, record->data, ofs);
1971 align = parse_int (r, record->data, ofs);
1974 /* SPSS sometimes seems to set variables' measure to zero. */
1978 if (measure < 1 || measure > 3 || align < 0 || align > 2)
1981 sys_warn (r, record->pos,
1982 _("Invalid variable display parameters for variable "
1983 "%zu (%s). Default parameters substituted."),
1984 i, var_get_name (v));
1989 var_set_measure (v, (measure == 1 ? MEASURE_NOMINAL
1990 : measure == 2 ? MEASURE_ORDINAL
1992 var_set_alignment (v, (align == 0 ? ALIGN_LEFT
1993 : align == 1 ? ALIGN_RIGHT
1996 /* Older versions (SPSS 9.0) sometimes set the display
1997 width to zero. This causes confusion in the GUI, so
1998 only set the width if it is nonzero. */
2000 var_set_display_width (v, width);
2005 rename_var_and_save_short_names (struct sfm_reader *r, off_t pos,
2006 struct dictionary *dict,
2007 struct variable *var, const char *new_name)
2009 size_t n_short_names;
2013 /* Renaming a variable may clear its short names, but we
2014 want to retain them, so we save them and re-set them
2016 n_short_names = var_get_short_name_cnt (var);
2017 short_names = xnmalloc (n_short_names, sizeof *short_names);
2018 for (i = 0; i < n_short_names; i++)
2020 const char *s = var_get_short_name (var, i);
2021 short_names[i] = s != NULL ? xstrdup (s) : NULL;
2024 /* Set long name. */
2025 if (!dict_try_rename_var (dict, var, new_name))
2026 sys_warn (r, pos, _("Duplicate long variable name `%s'."), new_name);
2028 /* Restore short names. */
2029 for (i = 0; i < n_short_names; i++)
2031 var_set_short_name (var, i, short_names[i]);
2032 free (short_names[i]);
2037 /* Parses record type 7, subtype 13, which gives the long name that corresponds
2038 to each short name. Modifies variable names in DICT accordingly. */
2040 parse_long_var_name_map (struct sfm_reader *r,
2041 const struct sfm_extension_record *record,
2042 struct dictionary *dict)
2044 struct text_record *text;
2045 struct variable *var;
2050 /* There are no long variable names. Use the short variable names,
2051 converted to lowercase, as the long variable names. */
2054 for (i = 0; i < dict_get_var_cnt (dict); i++)
2056 struct variable *var = dict_get_var (dict, i);
2059 new_name = utf8_to_lower (var_get_name (var));
2060 rename_var_and_save_short_names (r, -1, dict, var, new_name);
2067 /* Rename each of the variables, one by one. (In a correctly constructed
2068 system file, this cannot create any intermediate duplicate variable names,
2069 because all of the new variable names are longer than any of the old
2070 variable names and thus there cannot be any overlaps.) */
2071 text = open_text_record (r, record, true);
2072 while (read_variable_to_value_pair (r, dict, text, &var, &long_name))
2074 /* Validate long name. */
2075 if (!dict_id_is_valid (dict, long_name, false)
2076 || long_name[0] == '$' || long_name[0] == '#')
2078 sys_warn (r, record->pos,
2079 _("Long variable mapping from %s to invalid "
2080 "variable name `%s'."),
2081 var_get_name (var), long_name);
2085 rename_var_and_save_short_names (r, record->pos, dict, var, long_name);
2087 close_text_record (r, text);
2090 /* Reads record type 7, subtype 14, which gives the real length
2091 of each very long string. Rearranges DICT accordingly. */
2093 parse_long_string_map (struct sfm_reader *r,
2094 const struct sfm_extension_record *record,
2095 struct dictionary *dict)
2097 struct text_record *text;
2098 struct variable *var;
2101 text = open_text_record (r, record, true);
2102 while (read_variable_to_value_pair (r, dict, text, &var, &length_s))
2104 size_t idx = var_get_dict_index (var);
2110 length = strtol (length_s, NULL, 10);
2111 if (length < 1 || length > MAX_STRING)
2113 sys_warn (r, record->pos,
2114 _("%s listed as string of invalid length %s "
2115 "in very long string record."),
2116 var_get_name (var), length_s);
2120 /* Check segments. */
2121 segment_cnt = sfm_width_to_segments (length);
2122 if (segment_cnt == 1)
2124 sys_warn (r, record->pos,
2125 _("%s listed in very long string record with width %s, "
2126 "which requires only one segment."),
2127 var_get_name (var), length_s);
2130 if (idx + segment_cnt > dict_get_var_cnt (dict))
2132 sys_error (r, record->pos,
2133 _("Very long string %s overflows dictionary."),
2134 var_get_name (var));
2138 /* Get the short names from the segments and check their
2140 for (i = 0; i < segment_cnt; i++)
2142 struct variable *seg = dict_get_var (dict, idx + i);
2143 int alloc_width = sfm_segment_alloc_width (length, i);
2144 int width = var_get_width (seg);
2147 var_set_short_name (var, i, var_get_short_name (seg, 0));
2148 if (ROUND_UP (width, 8) != ROUND_UP (alloc_width, 8))
2150 sys_error (r, record->pos,
2151 _("Very long string with width %ld has segment %d "
2152 "of width %d (expected %d)."),
2153 length, i, width, alloc_width);
2157 dict_delete_consecutive_vars (dict, idx + 1, segment_cnt - 1);
2158 var_set_width (var, length);
2160 close_text_record (r, text);
2161 dict_compact_values (dict);
2167 parse_value_labels (struct sfm_reader *r, struct dictionary *dict,
2168 const struct sfm_var_record *var_recs, size_t n_var_recs,
2169 const struct sfm_value_label_record *record)
2171 struct variable **vars;
2175 utf8_labels = pool_nmalloc (r->pool, record->n_labels, sizeof *utf8_labels);
2176 for (i = 0; i < record->n_labels; i++)
2177 utf8_labels[i] = recode_string_pool ("UTF-8", dict_get_encoding (dict),
2178 record->labels[i].label, -1,
2181 vars = pool_nmalloc (r->pool, record->n_vars, sizeof *vars);
2182 for (i = 0; i < record->n_vars; i++)
2184 vars[i] = lookup_var_by_index (r, record->pos,
2185 var_recs, n_var_recs, record->vars[i]);
2186 if (vars[i] == NULL)
2190 for (i = 1; i < record->n_vars; i++)
2191 if (var_get_type (vars[i]) != var_get_type (vars[0]))
2193 sys_error (r, record->pos,
2194 _("Variables associated with value label are not all of "
2195 "identical type. Variable %s is %s, but variable "
2197 var_get_name (vars[0]),
2198 var_is_numeric (vars[0]) ? _("numeric") : _("string"),
2199 var_get_name (vars[i]),
2200 var_is_numeric (vars[i]) ? _("numeric") : _("string"));
2204 for (i = 0; i < record->n_vars; i++)
2206 struct variable *var = vars[i];
2210 width = var_get_width (var);
2213 sys_error (r, record->pos,
2214 _("Value labels may not be added to long string "
2215 "variables (e.g. %s) using records types 3 and 4."),
2216 var_get_name (var));
2220 for (j = 0; j < record->n_labels; j++)
2222 struct sfm_value_label *label = &record->labels[j];
2225 value_init (&value, width);
2227 value.f = parse_float (r, label->value, 0);
2229 memcpy (value_str_rw (&value, width), label->value, width);
2231 if (!var_add_value_label (var, &value, utf8_labels[j]))
2233 if (r->written_by_readstat)
2235 /* Ignore the problem. ReadStat is buggy and emits value
2236 labels whose values are longer than string variables'
2237 widths, that are identical in the actual width of the
2238 variable, e.g. both values "ABC123" and "ABC456" for a
2239 string variable with width 3. */
2241 else if (var_is_numeric (var))
2242 sys_warn (r, record->pos,
2243 _("Duplicate value label for %g on %s."),
2244 value.f, var_get_name (var));
2246 sys_warn (r, record->pos,
2247 _("Duplicate value label for `%.*s' on %s."),
2248 width, value_str (&value, width),
2249 var_get_name (var));
2252 value_destroy (&value, width);
2256 pool_free (r->pool, vars);
2257 for (i = 0; i < record->n_labels; i++)
2258 pool_free (r->pool, utf8_labels[i]);
2259 pool_free (r->pool, utf8_labels);
2264 static struct variable *
2265 lookup_var_by_index (struct sfm_reader *r, off_t offset,
2266 const struct sfm_var_record *var_recs, size_t n_var_recs,
2269 const struct sfm_var_record *rec;
2271 if (idx < 1 || idx > n_var_recs)
2273 sys_error (r, offset,
2274 _("Variable index %d not in valid range 1...%zu."),
2279 rec = &var_recs[idx - 1];
2280 if (rec->var == NULL)
2282 sys_error (r, offset,
2283 _("Variable index %d refers to long string continuation."),
2291 /* Parses a set of custom attributes from TEXT into ATTRS.
2292 ATTRS may be a null pointer, in which case the attributes are
2293 read but discarded. */
2295 parse_attributes (struct sfm_reader *r, struct text_record *text,
2296 struct attrset *attrs)
2300 struct attribute *attr;
2304 /* Parse the key. */
2305 key = text_get_token (text, ss_cstr ("("), NULL);
2309 attr = attribute_create (key);
2310 for (index = 1; ; index++)
2312 /* Parse the value. */
2316 value = text_get_token (text, ss_cstr ("\n"), NULL);
2319 text_warn (r, text, _("Error parsing attribute value %s[%d]."),
2324 length = strlen (value);
2325 if (length >= 2 && value[0] == '\'' && value[length - 1] == '\'')
2327 value[length - 1] = '\0';
2328 attribute_add_value (attr, value + 1);
2333 _("Attribute value %s[%d] is not quoted: %s."),
2335 attribute_add_value (attr, value);
2338 /* Was this the last value for this attribute? */
2339 if (text_match (text, ')'))
2342 if (attrs != NULL && attribute_get_n_values (attr) > 0)
2344 if (!attrset_try_add (attrs, attr))
2346 text_warn (r, text, _("Duplicate attribute %s."),
2347 attribute_get_name (attr));
2348 attribute_destroy (attr);
2352 attribute_destroy (attr);
2354 while (!text_match (text, '/'));
2357 /* Reads record type 7, subtype 17, which lists custom
2358 attributes on the data file. */
2360 parse_data_file_attributes (struct sfm_reader *r,
2361 const struct sfm_extension_record *record,
2362 struct dictionary *dict)
2364 struct text_record *text = open_text_record (r, record, true);
2365 parse_attributes (r, text, dict_get_attributes (dict));
2366 close_text_record (r, text);
2369 /* Parses record type 7, subtype 18, which lists custom
2370 attributes on individual variables. */
2372 parse_variable_attributes (struct sfm_reader *r,
2373 const struct sfm_extension_record *record,
2374 struct dictionary *dict)
2376 struct text_record *text;
2377 struct variable *var;
2379 text = open_text_record (r, record, true);
2380 while (text_read_variable_name (r, dict, text, ss_cstr (":"), &var))
2381 parse_attributes (r, text, var != NULL ? var_get_attributes (var) : NULL);
2382 close_text_record (r, text);
2386 assign_variable_roles (struct sfm_reader *r, struct dictionary *dict)
2388 size_t n_warnings = 0;
2391 for (i = 0; i < dict_get_var_cnt (dict); i++)
2393 struct variable *var = dict_get_var (dict, i);
2394 struct attrset *attrs = var_get_attributes (var);
2395 const struct attribute *attr = attrset_lookup (attrs, "$@Role");
2396 if (attr != NULL && attribute_get_n_values (attr) > 0)
2398 int value = atoi (attribute_get_value (attr, 0));
2420 role = ROLE_PARTITION;
2429 if (n_warnings++ == 0)
2430 sys_warn (r, -1, _("Invalid role for variable %s."),
2431 var_get_name (var));
2434 var_set_role (var, role);
2439 sys_warn (r, -1, _("%zu other variables had invalid roles."),
2444 check_overflow (struct sfm_reader *r,
2445 const struct sfm_extension_record *record,
2446 size_t ofs, size_t length)
2448 size_t end = record->size * record->count;
2449 if (length >= end || ofs + length > end)
2451 sys_warn (r, record->pos + end,
2452 _("Extension record subtype %d ends unexpectedly."),
2460 parse_long_string_value_labels (struct sfm_reader *r,
2461 const struct sfm_extension_record *record,
2462 struct dictionary *dict)
2464 const char *dict_encoding = dict_get_encoding (dict);
2465 size_t end = record->size * record->count;
2472 struct variable *var;
2477 /* Parse variable name length. */
2478 if (!check_overflow (r, record, ofs, 4))
2480 var_name_len = parse_int (r, record->data, ofs);
2483 /* Parse variable name, width, and number of labels. */
2484 if (!check_overflow (r, record, ofs, var_name_len)
2485 || !check_overflow (r, record, ofs, var_name_len + 8))
2487 var_name = recode_string_pool ("UTF-8", dict_encoding,
2488 (const char *) record->data + ofs,
2489 var_name_len, r->pool);
2490 width = parse_int (r, record->data, ofs + var_name_len);
2491 n_labels = parse_int (r, record->data, ofs + var_name_len + 4);
2492 ofs += var_name_len + 8;
2494 /* Look up 'var' and validate. */
2495 var = dict_lookup_var (dict, var_name);
2497 sys_warn (r, record->pos + ofs,
2498 _("Ignoring long string value label record for "
2499 "unknown variable %s."), var_name);
2500 else if (var_is_numeric (var))
2502 sys_warn (r, record->pos + ofs,
2503 _("Ignoring long string value label record for "
2504 "numeric variable %s."), var_name);
2507 else if (width != var_get_width (var))
2509 sys_warn (r, record->pos + ofs,
2510 _("Ignoring long string value label record for variable "
2511 "%s because the record's width (%d) does not match the "
2512 "variable's width (%d)."),
2513 var_name, width, var_get_width (var));
2518 value_init_pool (r->pool, &value, width);
2519 for (i = 0; i < n_labels; i++)
2521 size_t value_length, label_length;
2522 bool skip = var == NULL;
2524 /* Parse value length. */
2525 if (!check_overflow (r, record, ofs, 4))
2527 value_length = parse_int (r, record->data, ofs);
2531 if (!check_overflow (r, record, ofs, value_length))
2535 if (value_length == width)
2536 memcpy (value_str_rw (&value, width),
2537 (const uint8_t *) record->data + ofs, width);
2540 sys_warn (r, record->pos + ofs,
2541 _("Ignoring long string value label %zu for "
2542 "variable %s, with width %d, that has bad value "
2544 i, var_get_name (var), width, value_length);
2548 ofs += value_length;
2550 /* Parse label length. */
2551 if (!check_overflow (r, record, ofs, 4))
2553 label_length = parse_int (r, record->data, ofs);
2557 if (!check_overflow (r, record, ofs, label_length))
2563 label = recode_string_pool ("UTF-8", dict_encoding,
2564 (const char *) record->data + ofs,
2565 label_length, r->pool);
2566 if (!var_add_value_label (var, &value, label))
2567 sys_warn (r, record->pos + ofs,
2568 _("Duplicate value label for `%.*s' on %s."),
2569 width, value_str (&value, width),
2570 var_get_name (var));
2571 pool_free (r->pool, label);
2573 ofs += label_length;
2579 parse_long_string_missing_values (struct sfm_reader *r,
2580 const struct sfm_extension_record *record,
2581 struct dictionary *dict)
2583 const char *dict_encoding = dict_get_encoding (dict);
2584 size_t end = record->size * record->count;
2589 struct missing_values mv;
2591 struct variable *var;
2592 int n_missing_values;
2596 /* Parse variable name length. */
2597 if (!check_overflow (r, record, ofs, 4))
2599 var_name_len = parse_int (r, record->data, ofs);
2602 /* Parse variable name. */
2603 if (!check_overflow (r, record, ofs, var_name_len)
2604 || !check_overflow (r, record, ofs, var_name_len + 1))
2606 var_name = recode_string_pool ("UTF-8", dict_encoding,
2607 (const char *) record->data + ofs,
2608 var_name_len, r->pool);
2609 ofs += var_name_len;
2611 /* Parse number of missing values. */
2612 n_missing_values = ((const uint8_t *) record->data)[ofs];
2613 if (n_missing_values < 1 || n_missing_values > 3)
2614 sys_warn (r, record->pos + ofs,
2615 _("Long string missing values record says variable %s "
2616 "has %d missing values, but only 1 to 3 missing values "
2618 var_name, n_missing_values);
2621 /* Look up 'var' and validate. */
2622 var = dict_lookup_var (dict, var_name);
2624 sys_warn (r, record->pos + ofs,
2625 _("Ignoring long string missing value record for "
2626 "unknown variable %s."), var_name);
2627 else if (var_is_numeric (var))
2629 sys_warn (r, record->pos + ofs,
2630 _("Ignoring long string missing value record for "
2631 "numeric variable %s."), var_name);
2636 mv_init_pool (r->pool, &mv, var ? var_get_width (var) : 8);
2637 for (i = 0; i < n_missing_values; i++)
2639 size_t value_length;
2641 /* Parse value length. */
2642 if (!check_overflow (r, record, ofs, 4))
2644 value_length = parse_int (r, record->data, ofs);
2648 if (!check_overflow (r, record, ofs, value_length))
2652 && !mv_add_str (&mv, (const uint8_t *) record->data + ofs,
2654 sys_warn (r, record->pos + ofs,
2655 _("Ignoring long string missing value %zu for variable "
2656 "%s, with width %d, that has bad value width %zu."),
2657 i, var_get_name (var), var_get_width (var),
2659 ofs += value_length;
2662 var_set_missing_values (var, &mv);
2668 static void partial_record (struct sfm_reader *);
2670 static void read_error (struct casereader *, const struct sfm_reader *);
2672 static bool read_case_number (struct sfm_reader *, double *);
2673 static int read_case_string (struct sfm_reader *, uint8_t *, size_t);
2674 static int read_opcode (struct sfm_reader *);
2675 static bool read_compressed_number (struct sfm_reader *, double *);
2676 static int read_compressed_string (struct sfm_reader *, uint8_t *);
2677 static int read_whole_strings (struct sfm_reader *, uint8_t *, size_t);
2678 static bool skip_whole_strings (struct sfm_reader *, size_t);
2680 /* Reads and returns one case from READER's file. Returns a null
2681 pointer if not successful. */
2682 static struct ccase *
2683 sys_file_casereader_read (struct casereader *reader, void *r_)
2685 struct sfm_reader *r = r_;
2690 if (r->error || !r->sfm_var_cnt)
2693 c = case_create (r->proto);
2695 for (i = 0; i < r->sfm_var_cnt; i++)
2697 struct sfm_var *sv = &r->sfm_vars[i];
2698 union value *v = case_data_rw_idx (c, sv->case_index);
2700 if (sv->var_width == 0)
2701 retval = read_case_number (r, &v->f);
2704 uint8_t *s = value_str_rw (v, sv->var_width);
2705 retval = read_case_string (r, s + sv->offset, sv->segment_width);
2708 retval = skip_whole_strings (r, ROUND_DOWN (sv->padding, 8));
2710 sys_error (r, r->pos, _("File ends in partial string value."));
2722 if (r->case_cnt != -1)
2723 read_error (reader, r);
2728 /* Issues an error that R ends in a partial record. */
2730 partial_record (struct sfm_reader *r)
2732 sys_error (r, r->pos, _("File ends in partial case."));
2735 /* Issues an error that an unspecified error occurred SFM, and
2738 read_error (struct casereader *r, const struct sfm_reader *sfm)
2740 msg (ME, _("Error reading case from file %s."), fh_get_name (sfm->fh));
2741 casereader_force_error (r);
2744 /* Reads a number from R and stores its value in *D.
2745 If R is compressed, reads a compressed number;
2746 otherwise, reads a number in the regular way.
2747 Returns true if successful, false if end of file is
2748 reached immediately. */
2750 read_case_number (struct sfm_reader *r, double *d)
2752 if (r->compression == ANY_COMP_NONE)
2755 if (!try_read_bytes (r, number, sizeof number))
2757 float_convert (r->float_format, number, FLOAT_NATIVE_DOUBLE, d);
2761 return read_compressed_number (r, d);
2764 /* Reads LENGTH string bytes from R into S. Always reads a multiple of 8
2765 bytes; if LENGTH is not a multiple of 8, then extra bytes are read and
2766 discarded without being written to S. Reads compressed strings if S is
2767 compressed. Returns 1 if successful, 0 if end of file is reached
2768 immediately, or -1 for some kind of error. */
2770 read_case_string (struct sfm_reader *r, uint8_t *s, size_t length)
2772 size_t whole = ROUND_DOWN (length, 8);
2773 size_t partial = length % 8;
2777 int retval = read_whole_strings (r, s, whole);
2785 int retval = read_whole_strings (r, bounce, sizeof bounce);
2797 memcpy (s + whole, bounce, partial);
2803 /* Reads and returns the next compression opcode from R. */
2805 read_opcode (struct sfm_reader *r)
2807 assert (r->compression != ANY_COMP_NONE);
2811 if (r->opcode_idx >= sizeof r->opcodes)
2814 int retval = try_read_compressed_bytes (r, r->opcodes,
2820 opcode = r->opcodes[r->opcode_idx++];
2827 /* Reads a compressed number from R and stores its value in D.
2828 Returns true if successful, false if end of file is
2829 reached immediately. */
2831 read_compressed_number (struct sfm_reader *r, double *d)
2833 int opcode = read_opcode (r);
2841 return read_compressed_float (r, d);
2844 float_convert (r->float_format, " ", FLOAT_NATIVE_DOUBLE, d);
2845 if (!r->corruption_warning)
2847 r->corruption_warning = true;
2848 sys_warn (r, r->pos,
2849 _("Possible compressed data corruption: "
2850 "compressed spaces appear in numeric field."));
2859 *d = opcode - r->bias;
2866 /* Reads a compressed 8-byte string segment from R and stores it in DST. */
2868 read_compressed_string (struct sfm_reader *r, uint8_t *dst)
2873 opcode = read_opcode (r);
2881 retval = read_compressed_bytes (r, dst, 8);
2882 return retval == 1 ? 1 : -1;
2885 memset (dst, ' ', 8);
2890 double value = opcode - r->bias;
2891 float_convert (FLOAT_NATIVE_DOUBLE, &value, r->float_format, dst);
2894 /* This has actually been seen "in the wild". The submitter of the
2895 file that showed that the contents decoded as spaces, but they
2896 were at the end of the field so it's possible that the null
2897 bytes just acted as null terminators. */
2899 else if (!r->corruption_warning)
2901 r->corruption_warning = true;
2902 sys_warn (r, r->pos,
2903 _("Possible compressed data corruption: "
2904 "string contains compressed integer (opcode %d)."),
2912 /* Reads LENGTH string bytes from R into S. LENGTH must be a multiple of 8.
2913 Reads compressed strings if S is compressed. Returns 1 if successful, 0 if
2914 end of file is reached immediately, or -1 for some kind of error. */
2916 read_whole_strings (struct sfm_reader *r, uint8_t *s, size_t length)
2918 assert (length % 8 == 0);
2919 if (r->compression == ANY_COMP_NONE)
2920 return try_read_bytes (r, s, length);
2925 for (ofs = 0; ofs < length; ofs += 8)
2927 int retval = read_compressed_string (r, s + ofs);
2942 /* Skips LENGTH string bytes from R.
2943 LENGTH must be a multiple of 8.
2944 (LENGTH is also limited to 1024, but that's only because the
2945 current caller never needs more than that many bytes.)
2946 Returns true if successful, false if end of file is
2947 reached immediately. */
2949 skip_whole_strings (struct sfm_reader *r, size_t length)
2951 uint8_t buffer[1024];
2952 assert (length < sizeof buffer);
2953 return read_whole_strings (r, buffer, length);
2956 /* Helpers for reading records that contain structured text
2959 /* Maximum number of warnings to issue for a single text
2961 #define MAX_TEXT_WARNINGS 5
2966 struct substring buffer; /* Record contents. */
2967 off_t start; /* Starting offset in file. */
2968 size_t pos; /* Current position in buffer. */
2969 int n_warnings; /* Number of warnings issued or suppressed. */
2970 bool recoded; /* Recoded into UTF-8? */
2973 static struct text_record *
2974 open_text_record (struct sfm_reader *r,
2975 const struct sfm_extension_record *record,
2976 bool recode_to_utf8)
2978 struct text_record *text;
2979 struct substring raw;
2981 text = pool_alloc (r->pool, sizeof *text);
2982 raw = ss_buffer (record->data, record->size * record->count);
2983 text->start = record->pos;
2984 text->buffer = (recode_to_utf8
2985 ? recode_substring_pool ("UTF-8", r->encoding, raw, r->pool)
2988 text->n_warnings = 0;
2989 text->recoded = recode_to_utf8;
2994 /* Closes TEXT, frees its storage, and issues a final warning
2995 about suppressed warnings if necessary. */
2997 close_text_record (struct sfm_reader *r, struct text_record *text)
2999 if (text->n_warnings > MAX_TEXT_WARNINGS)
3000 sys_warn (r, -1, _("Suppressed %d additional related warnings."),
3001 text->n_warnings - MAX_TEXT_WARNINGS);
3003 pool_free (r->pool, ss_data (text->buffer));
3006 /* Reads a variable=value pair from TEXT.
3007 Looks up the variable in DICT and stores it into *VAR.
3008 Stores a null-terminated value into *VALUE. */
3010 read_variable_to_value_pair (struct sfm_reader *r, struct dictionary *dict,
3011 struct text_record *text,
3012 struct variable **var, char **value)
3016 if (!text_read_short_name (r, dict, text, ss_cstr ("="), var))
3019 *value = text_get_token (text, ss_buffer ("\t\0", 2), NULL);
3023 text->pos += ss_span (ss_substr (text->buffer, text->pos, SIZE_MAX),
3024 ss_buffer ("\t\0", 2));
3032 text_read_variable_name (struct sfm_reader *r, struct dictionary *dict,
3033 struct text_record *text, struct substring delimiters,
3034 struct variable **var)
3038 name = text_get_token (text, delimiters, NULL);
3042 *var = dict_lookup_var (dict, name);
3046 text_warn (r, text, _("Dictionary record refers to unknown variable %s."),
3053 text_read_short_name (struct sfm_reader *r, struct dictionary *dict,
3054 struct text_record *text, struct substring delimiters,
3055 struct variable **var)
3057 char *short_name = text_get_token (text, delimiters, NULL);
3058 if (short_name == NULL)
3061 *var = dict_lookup_var (dict, short_name);
3063 text_warn (r, text, _("Dictionary record refers to unknown variable %s."),
3068 /* Displays a warning for the current file position, limiting the
3069 number to MAX_TEXT_WARNINGS for TEXT. */
3071 text_warn (struct sfm_reader *r, struct text_record *text,
3072 const char *format, ...)
3074 if (text->n_warnings++ < MAX_TEXT_WARNINGS)
3078 va_start (args, format);
3079 sys_msg (r, text->start + text->pos, MW, format, args);
3085 text_get_token (struct text_record *text, struct substring delimiters,
3088 struct substring token;
3091 if (!ss_tokenize (text->buffer, delimiters, &text->pos, &token))
3094 end = &ss_data (token)[ss_length (token)];
3095 if (delimiter != NULL)
3098 return ss_data (token);
3101 /* Reads a integer value expressed in decimal, then a space, then a string that
3102 consists of exactly as many bytes as specified by the integer, then a space,
3103 from TEXT. Returns the string, null-terminated, as a subset of TEXT's
3104 buffer (so the caller should not free the string). */
3106 text_parse_counted_string (struct sfm_reader *r, struct text_record *text)
3114 while (text->pos < text->buffer.length)
3116 int c = text->buffer.string[text->pos];
3117 if (c < '0' || c > '9')
3119 n = (n * 10) + (c - '0');
3122 if (text->pos >= text->buffer.length || start == text->pos)
3124 sys_warn (r, text->start,
3125 _("Expecting digit at offset %zu in MRSETS record."),
3130 if (!text_match (text, ' '))
3132 sys_warn (r, text->start,
3133 _("Expecting space at offset %zu in MRSETS record."),
3138 if (text->pos + n > text->buffer.length)
3140 sys_warn (r, text->start,
3141 _("%zu-byte string starting at offset %zu "
3142 "exceeds record length %zu."),
3143 n, text->pos, text->buffer.length);
3147 s = &text->buffer.string[text->pos];
3150 sys_warn (r, text->start,
3151 _("Expecting space at offset %zu following %zu-byte string."),
3161 text_match (struct text_record *text, char c)
3163 if (text->pos >= text->buffer.length)
3166 if (text->buffer.string[text->pos] == c)
3175 /* Returns the current byte offset (as converted to UTF-8, if it was converted)
3176 inside the TEXT's string. */
3178 text_pos (const struct text_record *text)
3184 text_get_all (const struct text_record *text)
3186 return text->buffer.string;
3191 /* Displays a corruption message. */
3193 sys_msg (struct sfm_reader *r, off_t offset,
3194 int class, const char *format, va_list args)
3199 ds_init_empty (&text);
3201 ds_put_format (&text, _("`%s' near offset 0x%llx: "),
3202 fh_get_file_name (r->fh), (long long int) offset);
3204 ds_put_format (&text, _("`%s': "), fh_get_file_name (r->fh));
3205 ds_put_vformat (&text, format, args);
3207 m.category = msg_class_to_category (class);
3208 m.severity = msg_class_to_severity (class);
3214 m.text = ds_cstr (&text);
3219 /* Displays a warning for offset OFFSET in the file. */
3221 sys_warn (struct sfm_reader *r, off_t offset, const char *format, ...)
3225 va_start (args, format);
3226 sys_msg (r, offset, MW, format, args);
3230 /* Displays an error for the current file position and marks it as in an error
3233 sys_error (struct sfm_reader *r, off_t offset, const char *format, ...)
3237 va_start (args, format);
3238 sys_msg (r, offset, ME, format, args);
3244 /* Reads BYTE_CNT bytes into BUF.
3245 Returns 1 if exactly BYTE_CNT bytes are successfully read.
3246 Returns -1 if an I/O error or a partial read occurs.
3247 Returns 0 for an immediate end-of-file and, if EOF_IS_OK is false, reports
3250 read_bytes_internal (struct sfm_reader *r, bool eof_is_ok,
3251 void *buf, size_t byte_cnt)
3253 size_t bytes_read = fread (buf, 1, byte_cnt, r->file);
3254 r->pos += bytes_read;
3255 if (bytes_read == byte_cnt)
3257 else if (ferror (r->file))
3259 sys_error (r, r->pos, _("System error: %s."), strerror (errno));
3262 else if (!eof_is_ok || bytes_read != 0)
3264 sys_error (r, r->pos, _("Unexpected end of file."));
3271 /* Reads BYTE_CNT into BUF.
3272 Returns true if successful.
3273 Returns false upon I/O error or if end-of-file is encountered. */
3275 read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
3277 return read_bytes_internal (r, false, buf, byte_cnt) == 1;
3280 /* Reads BYTE_CNT bytes into BUF.
3281 Returns 1 if exactly BYTE_CNT bytes are successfully read.
3282 Returns 0 if an immediate end-of-file is encountered.
3283 Returns -1 if an I/O error or a partial read occurs. */
3285 try_read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
3287 return read_bytes_internal (r, true, buf, byte_cnt);
3290 /* Reads a 32-bit signed integer from R and stores its value in host format in
3291 *X. Returns true if successful, otherwise false. */
3293 read_int (struct sfm_reader *r, int *x)
3296 if (read_bytes (r, integer, sizeof integer) != 1)
3298 *x = integer_get (r->integer_format, integer, sizeof integer);
3303 read_uint (struct sfm_reader *r, unsigned int *x)
3308 ok = read_int (r, &y);
3313 /* Reads a 64-bit signed integer from R and returns its value in
3316 read_int64 (struct sfm_reader *r, long long int *x)
3319 if (read_bytes (r, integer, sizeof integer) != 1)
3321 *x = integer_get (r->integer_format, integer, sizeof integer);
3325 /* Reads a 64-bit signed integer from R and returns its value in
3328 read_uint64 (struct sfm_reader *r, unsigned long long int *x)
3333 ok = read_int64 (r, &y);
3339 parse_int (const struct sfm_reader *r, const void *data, size_t ofs)
3341 return integer_get (r->integer_format, (const uint8_t *) data + ofs, 4);
3345 parse_float (const struct sfm_reader *r, const void *data, size_t ofs)
3347 return float_get_double (r->float_format, (const uint8_t *) data + ofs);
3350 /* Reads exactly SIZE - 1 bytes into BUFFER
3351 and stores a null byte into BUFFER[SIZE - 1]. */
3353 read_string (struct sfm_reader *r, char *buffer, size_t size)
3358 ok = read_bytes (r, buffer, size - 1);
3360 buffer[size - 1] = '\0';
3364 /* Skips BYTES bytes forward in R. */
3366 skip_bytes (struct sfm_reader *r, size_t bytes)
3371 size_t chunk = MIN (sizeof buffer, bytes);
3372 if (!read_bytes (r, buffer, chunk))
3380 /* Returns a malloc()'d copy of S in which all lone CRs and CR LF pairs have
3381 been replaced by LFs.
3383 (A product that identifies itself as VOXCO INTERVIEWER 4.3 produces system
3384 files that use CR-only line ends in the file label and extra product
3387 fix_line_ends (const char *s)
3391 d = dst = xmalloc (strlen (s) + 1);
3410 read_ztrailer (struct sfm_reader *r,
3411 long long int zheader_ofs,
3412 long long int ztrailer_len);
3415 zalloc (voidpf pool_, uInt items, uInt size)
3417 struct pool *pool = pool_;
3419 return (!size || xalloc_oversized (items, size)
3421 : pool_malloc (pool, items * size));
3425 zfree (voidpf pool_, voidpf address)
3427 struct pool *pool = pool_;
3429 pool_free (pool, address);
3433 read_zheader (struct sfm_reader *r)
3436 long long int zheader_ofs;
3437 long long int ztrailer_ofs;
3438 long long int ztrailer_len;
3440 if (!read_int64 (r, &zheader_ofs)
3441 || !read_int64 (r, &ztrailer_ofs)
3442 || !read_int64 (r, &ztrailer_len))
3445 if (zheader_ofs != pos)
3447 sys_error (r, pos, _("Wrong ZLIB data header offset %#llx "
3448 "(expected %#llx)."),
3449 zheader_ofs, (long long int) pos);
3453 if (ztrailer_ofs < r->pos)
3455 sys_error (r, pos, _("Impossible ZLIB trailer offset 0x%llx."),
3460 if (ztrailer_len < 24 || ztrailer_len % 24)
3462 sys_error (r, pos, _("Invalid ZLIB trailer length %lld."), ztrailer_len);
3466 r->ztrailer_ofs = ztrailer_ofs;
3467 if (!read_ztrailer (r, zheader_ofs, ztrailer_len))
3470 if (r->zin_buf == NULL)
3472 r->zin_buf = pool_malloc (r->pool, ZIN_BUF_SIZE);
3473 r->zout_buf = pool_malloc (r->pool, ZOUT_BUF_SIZE);
3474 r->zstream.next_in = NULL;
3475 r->zstream.avail_in = 0;
3478 r->zstream.zalloc = zalloc;
3479 r->zstream.zfree = zfree;
3480 r->zstream.opaque = r->pool;
3482 return open_zstream (r);
3486 seek (struct sfm_reader *r, off_t offset)
3488 if (fseeko (r->file, offset, SEEK_SET))
3489 sys_error (r, 0, _("%s: seek failed (%s)."),
3490 fh_get_file_name (r->fh), strerror (errno));
3494 /* Performs some additional consistency checks on the ZLIB compressed data
3497 read_ztrailer (struct sfm_reader *r,
3498 long long int zheader_ofs,
3499 long long int ztrailer_len)
3501 long long int expected_uncmp_ofs;
3502 long long int expected_cmp_ofs;
3505 unsigned int block_size;
3506 unsigned int n_blocks;
3510 if (fstat (fileno (r->file), &s))
3512 sys_error (r, 0, _("%s: stat failed (%s)."),
3513 fh_get_file_name (r->fh), strerror (errno));
3517 if (!S_ISREG (s.st_mode))
3519 /* We can't seek to the trailer and then back to the data in this file,
3520 so skip doing extra checks. */
3524 if (r->ztrailer_ofs + ztrailer_len != s.st_size)
3525 sys_warn (r, r->pos,
3526 _("End of ZLIB trailer (0x%llx) is not file size (0x%llx)."),
3527 r->ztrailer_ofs + ztrailer_len, (long long int) s.st_size);
3529 seek (r, r->ztrailer_ofs);
3531 /* Read fixed header from ZLIB data trailer. */
3532 if (!read_int64 (r, &bias))
3534 if (-bias != r->bias)
3536 sys_error (r, r->pos, _("ZLIB trailer bias (%lld) differs from "
3537 "file header bias (%.2f)."),
3542 if (!read_int64 (r, &zero))
3545 sys_warn (r, r->pos,
3546 _("ZLIB trailer \"zero\" field has nonzero value %lld."), zero);
3548 if (!read_uint (r, &block_size))
3550 if (block_size != ZBLOCK_SIZE)
3551 sys_warn (r, r->pos,
3552 _("ZLIB trailer specifies unexpected %u-byte block size."),
3555 if (!read_uint (r, &n_blocks))
3557 if (n_blocks != (ztrailer_len - 24) / 24)
3559 sys_error (r, r->pos,
3560 _("%lld-byte ZLIB trailer specifies %u data blocks (expected "
3562 ztrailer_len, n_blocks, (ztrailer_len - 24) / 24);
3566 expected_uncmp_ofs = zheader_ofs;
3567 expected_cmp_ofs = zheader_ofs + 24;
3568 for (i = 0; i < n_blocks; i++)
3570 off_t desc_ofs = r->pos;
3571 unsigned long long int uncompressed_ofs;
3572 unsigned long long int compressed_ofs;
3573 unsigned int uncompressed_size;
3574 unsigned int compressed_size;
3576 if (!read_uint64 (r, &uncompressed_ofs)
3577 || !read_uint64 (r, &compressed_ofs)
3578 || !read_uint (r, &uncompressed_size)
3579 || !read_uint (r, &compressed_size))
3582 if (uncompressed_ofs != expected_uncmp_ofs)
3584 sys_error (r, desc_ofs,
3585 _("ZLIB block descriptor %u reported uncompressed data "
3586 "offset %#llx, when %#llx was expected."),
3587 i, uncompressed_ofs, expected_uncmp_ofs);
3591 if (compressed_ofs != expected_cmp_ofs)
3593 sys_error (r, desc_ofs,
3594 _("ZLIB block descriptor %u reported compressed data "
3595 "offset %#llx, when %#llx was expected."),
3596 i, compressed_ofs, expected_cmp_ofs);
3600 if (i < n_blocks - 1)
3602 if (uncompressed_size != block_size)
3603 sys_warn (r, desc_ofs,
3604 _("ZLIB block descriptor %u reported block size %#x, "
3605 "when %#x was expected."),
3606 i, uncompressed_size, block_size);
3610 if (uncompressed_size > block_size)
3611 sys_warn (r, desc_ofs,
3612 _("ZLIB block descriptor %u reported block size %#x, "
3613 "when at most %#x was expected."),
3614 i, uncompressed_size, block_size);
3617 /* http://www.zlib.net/zlib_tech.html says that the maximum expansion
3618 from compression, with worst-case parameters, is 13.5% plus 11 bytes.
3619 This code checks for an expansion of more than 14.3% plus 11
3621 if (compressed_size > uncompressed_size + uncompressed_size / 7 + 11)
3623 sys_error (r, desc_ofs,
3624 _("ZLIB block descriptor %u reports compressed size %u "
3625 "and uncompressed size %u."),
3626 i, compressed_size, uncompressed_size);
3630 expected_uncmp_ofs += uncompressed_size;
3631 expected_cmp_ofs += compressed_size;
3634 if (expected_cmp_ofs != r->ztrailer_ofs)
3636 sys_error (r, r->pos, _("ZLIB trailer is at offset %#llx but %#llx "
3637 "would be expected from block descriptors."),
3638 r->ztrailer_ofs, expected_cmp_ofs);
3642 seek (r, zheader_ofs + 24);
3647 open_zstream (struct sfm_reader *r)
3651 r->zout_pos = r->zout_end = 0;
3652 error = inflateInit (&r->zstream);
3655 sys_error (r, r->pos, _("ZLIB initialization failed (%s)."),
3663 close_zstream (struct sfm_reader *r)
3667 error = inflateEnd (&r->zstream);
3670 sys_error (r, r->pos, _("Inconsistency at end of ZLIB stream (%s)."),
3678 read_bytes_zlib (struct sfm_reader *r, void *buf_, size_t byte_cnt)
3680 uint8_t *buf = buf_;
3689 /* Use already inflated data if there is any. */
3690 if (r->zout_pos < r->zout_end)
3692 unsigned int n = MIN (byte_cnt, r->zout_end - r->zout_pos);
3693 memcpy (buf, &r->zout_buf[r->zout_pos], n);
3702 /* We need to inflate some more data.
3703 Get some more input data if we don't have any. */
3704 if (r->zstream.avail_in == 0)
3706 unsigned int n = MIN (ZIN_BUF_SIZE, r->ztrailer_ofs - r->pos);
3711 int retval = try_read_bytes (r, r->zin_buf, n);
3714 r->zstream.avail_in = n;
3715 r->zstream.next_in = r->zin_buf;
3719 /* Inflate the (remaining) input data. */
3720 r->zstream.avail_out = ZOUT_BUF_SIZE;
3721 r->zstream.next_out = r->zout_buf;
3722 error = inflate (&r->zstream, Z_SYNC_FLUSH);
3724 r->zout_end = r->zstream.next_out - r->zout_buf;
3725 if (r->zout_end == 0)
3727 if (error != Z_STREAM_END)
3729 sys_error (r, r->pos, _("ZLIB stream inconsistency (%s)."),
3733 else if (!close_zstream (r) || !open_zstream (r))
3738 /* Process the output data and ignore 'error' for now. ZLIB will
3739 present it to us again on the next inflate() call. */
3745 read_compressed_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
3747 if (r->compression == ANY_COMP_SIMPLE)
3748 return read_bytes (r, buf, byte_cnt);
3751 int retval = read_bytes_zlib (r, buf, byte_cnt);
3753 sys_error (r, r->pos, _("Unexpected end of ZLIB compressed data."));
3759 try_read_compressed_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
3761 if (r->compression == ANY_COMP_SIMPLE)
3762 return try_read_bytes (r, buf, byte_cnt);
3764 return read_bytes_zlib (r, buf, byte_cnt);
3767 /* Reads a 64-bit floating-point number from R and returns its
3768 value in host format. */
3770 read_compressed_float (struct sfm_reader *r, double *d)
3774 if (!read_compressed_bytes (r, number, sizeof number))
3777 *d = float_get_double (r->float_format, number);
3781 static const struct casereader_class sys_file_casereader_class =
3783 sys_file_casereader_read,
3784 sys_file_casereader_destroy,
3789 const struct any_reader_class sys_file_reader_class =
3791 N_("SPSS System File"),