1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-2000, 2006-2007, 2009-2016 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "data/sys-file-private.h"
29 #include "data/any-reader.h"
30 #include "data/attributes.h"
31 #include "data/case.h"
32 #include "data/casereader-provider.h"
33 #include "data/casereader.h"
34 #include "data/dictionary.h"
35 #include "data/file-handle-def.h"
36 #include "data/file-name.h"
37 #include "data/format.h"
38 #include "data/identifier.h"
39 #include "data/missing-values.h"
40 #include "data/mrset.h"
41 #include "data/short-names.h"
42 #include "data/value-labels.h"
43 #include "data/value.h"
44 #include "data/variable.h"
45 #include "libpspp/array.h"
46 #include "libpspp/assertion.h"
47 #include "libpspp/compiler.h"
48 #include "libpspp/fbuf.h"
49 #include "libpspp/i18n.h"
50 #include "libpspp/ll.h"
51 #include "libpspp/message.h"
52 #include "libpspp/misc.h"
53 #include "libpspp/pool.h"
54 #include "libpspp/str.h"
55 #include "libpspp/stringi-set.h"
57 #include "gl/c-strtod.h"
58 #include "gl/c-ctype.h"
59 #include "gl/inttostr.h"
60 #include "gl/localcharset.h"
61 #include "gl/minmax.h"
62 #include "gl/unlocked-io.h"
63 #include "gl/xalloc.h"
64 #include "gl/xalloc-oversized.h"
68 #define _(msgid) gettext (msgid)
69 #define N_(msgid) (msgid)
73 /* subtypes 0-2 unknown */
74 EXT_INTEGER = 3, /* Machine integer info. */
75 EXT_FLOAT = 4, /* Machine floating-point info. */
76 EXT_VAR_SETS = 5, /* Variable sets. */
77 EXT_DATE = 6, /* DATE. */
78 EXT_MRSETS = 7, /* Multiple response sets. */
79 EXT_DATA_ENTRY = 8, /* SPSS Data Entry. */
80 /* subtype 9 unknown */
81 EXT_PRODUCT_INFO = 10, /* Extra product info text. */
82 EXT_DISPLAY = 11, /* Variable display parameters. */
83 /* subtype 12 unknown */
84 EXT_LONG_NAMES = 13, /* Long variable names. */
85 EXT_LONG_STRINGS = 14, /* Long strings. */
86 /* subtype 15 unknown */
87 EXT_NCASES = 16, /* Extended number of cases. */
88 EXT_FILE_ATTRS = 17, /* Data file attributes. */
89 EXT_VAR_ATTRS = 18, /* Variable attributes. */
90 EXT_MRSETS2 = 19, /* Multiple response sets (extended). */
91 EXT_ENCODING = 20, /* Character encoding. */
92 EXT_LONG_LABELS = 21, /* Value labels for long strings. */
93 EXT_LONG_MISSING = 22, /* Missing values for long strings. */
94 EXT_DATAVIEW = 24 /* "Format properties in dataview table". */
97 /* Fields from the top-level header record. */
98 struct sfm_header_record
100 char magic[5]; /* First 4 bytes of file, then null. */
101 int weight_idx; /* 0 if unweighted, otherwise a var index. */
102 int nominal_case_size; /* Number of var positions. */
104 /* These correspond to the members of struct any_file_info or a dictionary
105 but in the system file's encoding rather than ASCII. */
106 char creation_date[10]; /* "dd mmm yy". */
107 char creation_time[9]; /* "hh:mm:ss". */
108 char eye_catcher[61]; /* Eye-catcher string, then product name. */
109 char file_label[65]; /* File label. */
112 struct sfm_var_record
119 int missing_value_code;
122 struct variable *var;
125 struct sfm_value_label
131 struct sfm_value_label_record
134 struct sfm_value_label *labels;
135 unsigned int n_labels;
141 struct sfm_document_record
150 const char *name; /* Name. */
151 const char *label; /* Human-readable label for group. */
152 enum mrset_type type; /* Group type. */
153 const char **vars; /* Constituent variables' names. */
154 size_t n_vars; /* Number of constituent variables. */
157 enum mrset_md_cat_source cat_source; /* Source of category labels. */
158 bool label_from_var_label; /* 'label' taken from variable label? */
159 const char *counted; /* Counted value, as string. */
162 struct sfm_extension_record
164 struct ll ll; /* In struct sfm_reader 'var_attrs' list. */
165 int subtype; /* Record subtype. */
166 off_t pos; /* Starting offset in file. */
167 unsigned int size; /* Size of data elements. */
168 unsigned int count; /* Number of data elements. */
169 void *data; /* Contents. */
172 /* System file reader. */
175 struct any_reader any_reader;
177 /* Resource tracking. */
178 struct pool *pool; /* All system file state. */
181 struct any_read_info info;
182 struct sfm_header_record header;
183 struct sfm_var_record *vars;
185 struct sfm_value_label_record *labels;
187 struct sfm_document_record *document;
188 struct sfm_mrset *mrsets;
190 struct sfm_extension_record *extensions[32];
191 struct ll_list var_attrs; /* Contains "struct sfm_extension_record"s. */
194 struct file_handle *fh; /* File handle. */
195 struct fh_lock *lock; /* Mutual exclusion for file handle. */
196 struct fbuf *fbuf; /* File stream. */
197 off_t pos; /* Position in file. */
198 bool error; /* I/O or corruption error? */
199 struct caseproto *proto; /* Format of output cases. */
202 enum integer_format integer_format; /* On-disk integer format. */
203 enum float_format float_format; /* On-disk floating point format. */
204 struct sfm_var *sfm_vars; /* Variables. */
205 size_t sfm_var_cnt; /* Number of variables. */
206 int case_cnt; /* Number of cases */
207 const char *encoding; /* String encoding. */
208 bool written_by_readstat; /* From https://github.com/WizardMac/ReadStat? */
211 enum any_compression compression;
212 double bias; /* Compression bias, usually 100.0. */
213 uint8_t opcodes[8]; /* Current block of opcodes. */
214 size_t opcode_idx; /* Next opcode to interpret, 8 if none left. */
215 bool corruption_warning; /* Warned about possible corruption? */
217 /* ZLIB decompression. */
218 long long int ztrailer_ofs; /* Offset of ZLIB trailer at end of file. */
219 #define ZIN_BUF_SIZE 4096
220 uint8_t *zin_buf; /* Inflation input buffer. */
221 #define ZOUT_BUF_SIZE 16384
222 uint8_t *zout_buf; /* Inflation output buffer. */
223 unsigned int zout_end; /* Number of bytes of data in zout_buf. */
224 unsigned int zout_pos; /* First unconsumed byte in zout_buf. */
225 z_stream zstream; /* ZLIB inflater. */
228 static const struct casereader_class sys_file_casereader_class;
230 static struct sfm_reader *
231 sfm_reader_cast (const struct any_reader *r_)
233 assert (r_->klass == &sys_file_reader_class);
234 return UP_CAST (r_, struct sfm_reader, any_reader);
237 static bool sfm_close (struct any_reader *);
239 static struct variable *lookup_var_by_index (struct sfm_reader *, off_t,
240 const struct sfm_var_record *,
243 static void sys_msg (struct sfm_reader *r, off_t, int class,
244 const char *format, va_list args)
245 PRINTF_FORMAT (4, 0);
246 static void sys_warn (struct sfm_reader *, off_t, const char *, ...)
247 PRINTF_FORMAT (3, 4);
248 static void sys_error (struct sfm_reader *, off_t, const char *, ...)
249 PRINTF_FORMAT (3, 4);
251 static bool read_bytes (struct sfm_reader *, void *, size_t)
253 static int try_read_bytes (struct sfm_reader *, void *, size_t)
255 static bool read_int (struct sfm_reader *, int *) WARN_UNUSED_RESULT;
256 static bool read_uint (struct sfm_reader *, unsigned int *) WARN_UNUSED_RESULT;
257 static bool read_int64 (struct sfm_reader *, long long int *)
259 static bool read_uint64 (struct sfm_reader *, unsigned long long int *)
261 static bool read_string (struct sfm_reader *, char *, size_t)
263 static bool skip_bytes (struct sfm_reader *, size_t) WARN_UNUSED_RESULT;
265 /* ZLIB compressed data handling. */
266 static bool read_zheader (struct sfm_reader *) WARN_UNUSED_RESULT;
267 static bool open_zstream (struct sfm_reader *) WARN_UNUSED_RESULT;
268 static bool close_zstream (struct sfm_reader *) WARN_UNUSED_RESULT;
269 static int read_bytes_zlib (struct sfm_reader *, void *, size_t)
271 static int read_compressed_bytes (struct sfm_reader *, void *, size_t)
273 static int try_read_compressed_bytes (struct sfm_reader *, void *, size_t)
275 static bool read_compressed_float (struct sfm_reader *, double *)
278 static char *fix_line_ends (const char *);
280 static int parse_int (const struct sfm_reader *, const void *data, size_t ofs);
281 static double parse_float (const struct sfm_reader *,
282 const void *data, size_t ofs);
284 static bool read_variable_record (struct sfm_reader *,
285 struct sfm_var_record *);
286 static bool read_value_label_record (struct sfm_reader *,
287 struct sfm_value_label_record *);
288 static bool read_document_record (struct sfm_reader *);
289 static bool read_extension_record (struct sfm_reader *, int subtype,
290 struct sfm_extension_record **);
291 static bool skip_extension_record (struct sfm_reader *, int subtype);
293 static struct text_record *open_text_record (
294 struct sfm_reader *, const struct sfm_extension_record *,
295 bool recode_to_utf8);
296 static void close_text_record (struct sfm_reader *,
297 struct text_record *);
298 static bool read_variable_to_value_pair (struct sfm_reader *,
300 struct text_record *,
301 struct variable **var, char **value);
302 static void text_warn (struct sfm_reader *r, struct text_record *text,
303 const char *format, ...) PRINTF_FORMAT (3, 4);
304 static char *text_get_token (struct text_record *,
305 struct substring delimiters, char *delimiter);
306 static bool text_match (struct text_record *, char c);
307 static bool text_read_variable_name (struct sfm_reader *, struct dictionary *,
308 struct text_record *,
309 struct substring delimiters,
311 static bool text_read_short_name (struct sfm_reader *, struct dictionary *,
312 struct text_record *,
313 struct substring delimiters,
315 static const char *text_parse_counted_string (struct sfm_reader *,
316 struct text_record *);
317 static size_t text_pos (const struct text_record *);
318 static const char *text_get_all (const struct text_record *);
320 /* Dictionary reader. */
328 static bool read_dictionary (struct sfm_reader *);
329 static bool read_record (struct sfm_reader *, int type,
330 size_t *allocated_vars, size_t *allocated_labels);
331 static bool read_header (struct sfm_reader *, struct any_read_info *,
332 struct sfm_header_record *);
333 static void parse_header (struct sfm_reader *,
334 const struct sfm_header_record *,
335 struct any_read_info *, struct dictionary *);
336 static bool parse_variable_records (struct sfm_reader *, struct dictionary *,
337 struct sfm_var_record *, size_t n);
338 static void parse_format_spec (struct sfm_reader *, off_t pos,
339 unsigned int format, enum which_format,
340 struct variable *, int *format_warning_cnt);
341 static void parse_document (struct dictionary *, struct sfm_document_record *);
342 static void parse_display_parameters (struct sfm_reader *,
343 const struct sfm_extension_record *,
344 struct dictionary *);
345 static bool parse_machine_integer_info (struct sfm_reader *,
346 const struct sfm_extension_record *,
347 struct any_read_info *);
348 static void parse_machine_float_info (struct sfm_reader *,
349 const struct sfm_extension_record *);
350 static void parse_extra_product_info (struct sfm_reader *,
351 const struct sfm_extension_record *,
352 struct any_read_info *);
353 static void parse_mrsets (struct sfm_reader *,
354 const struct sfm_extension_record *,
355 size_t *allocated_mrsets);
356 static void decode_mrsets (struct sfm_reader *, struct dictionary *);
357 static void parse_long_var_name_map (struct sfm_reader *,
358 const struct sfm_extension_record *,
359 struct dictionary *);
360 static bool parse_long_string_map (struct sfm_reader *,
361 const struct sfm_extension_record *,
362 struct dictionary *);
363 static bool parse_value_labels (struct sfm_reader *, struct dictionary *,
364 const struct sfm_var_record *,
366 const struct sfm_value_label_record *);
367 static void parse_data_file_attributes (struct sfm_reader *,
368 const struct sfm_extension_record *,
369 struct dictionary *);
370 static void parse_variable_attributes (struct sfm_reader *,
371 const struct sfm_extension_record *,
372 struct dictionary *);
373 static void assign_variable_roles (struct sfm_reader *, struct dictionary *);
374 static void parse_long_string_value_labels (struct sfm_reader *,
375 const struct sfm_extension_record *,
376 struct dictionary *);
377 static void parse_long_string_missing_values (
378 struct sfm_reader *, const struct sfm_extension_record *,
379 struct dictionary *);
381 /* Frees the strings inside INFO. */
383 any_read_info_destroy (struct any_read_info *info)
387 free (info->creation_date);
388 free (info->creation_time);
389 free (info->product);
390 free (info->product_ext);
394 /* Tries to open FH for reading as a system file. Returns an sfm_reader if
395 successful, otherwise NULL. */
396 static struct any_reader *
397 sfm_open (struct file_handle *fh)
399 size_t allocated_mrsets = 0;
400 struct sfm_reader *r;
402 /* Create and initialize reader. */
403 r = xzalloc (sizeof *r);
404 r->any_reader.klass = &sys_file_reader_class;
405 r->pool = pool_create ();
406 pool_register (r->pool, free, r);
408 r->opcode_idx = sizeof r->opcodes;
409 ll_init (&r->var_attrs);
411 /* TRANSLATORS: this fragment will be interpolated into
412 messages in fh_lock() that identify types of files. */
413 r->lock = fh_lock (fh, FH_REF_FILE, N_("system file"), FH_ACC_READ, false);
417 int fd = fn_open (fh, O_RDONLY | O_BINARY, 0);
420 msg (ME, _("Error opening `%s' for reading as a system file: %s."),
421 fh_get_file_name (r->fh), strerror (errno));
424 r->fbuf = fbuf_open_fd (fd);
426 if (!read_dictionary (r))
429 if (r->extensions[EXT_MRSETS] != NULL)
430 parse_mrsets (r, r->extensions[EXT_MRSETS], &allocated_mrsets);
432 if (r->extensions[EXT_MRSETS2] != NULL)
433 parse_mrsets (r, r->extensions[EXT_MRSETS2], &allocated_mrsets);
435 return &r->any_reader;
439 sfm_close (&r->any_reader);
444 read_dictionary (struct sfm_reader *r)
446 size_t allocated_vars;
447 size_t allocated_labels;
449 if (!read_header (r, &r->info, &r->header))
453 allocated_labels = 0;
458 if (!read_int (r, &type))
462 if (!read_record (r, type, &allocated_vars, &allocated_labels))
466 if (!skip_bytes (r, 4))
469 if (r->compression == ANY_COMP_ZLIB && !read_zheader (r))
476 read_record (struct sfm_reader *r, int type,
477 size_t *allocated_vars, size_t *allocated_labels)
484 if (r->n_vars >= *allocated_vars)
485 r->vars = pool_2nrealloc (r->pool, r->vars, allocated_vars,
487 return read_variable_record (r, &r->vars[r->n_vars++]);
490 if (r->n_labels >= *allocated_labels)
491 r->labels = pool_2nrealloc (r->pool, r->labels, allocated_labels,
493 return read_value_label_record (r, &r->labels[r->n_labels++]);
496 /* A Type 4 record is always immediately after a type 3 record,
497 so the code for type 3 records reads the type 4 record too. */
498 sys_error (r, r->pos, _("Misplaced type 4 record."));
502 if (r->document != NULL)
504 sys_error (r, r->pos, _("Duplicate type 6 (document) record."));
507 return read_document_record (r);
510 if (!read_int (r, &subtype))
513 || subtype >= sizeof r->extensions / sizeof *r->extensions)
516 _("Unrecognized record type 7, subtype %d. For help, "
517 "please send this file to %s and mention that you were "
519 subtype, PACKAGE_BUGREPORT, PACKAGE_STRING);
520 return skip_extension_record (r, subtype);
522 else if (subtype == 18)
524 /* System files written by "Stata 14.1/-savespss- 1.77 by S.Radyakin"
525 put each variable attribute into a separate record with subtype
526 18. I'm surprised that SPSS puts up with this. */
527 struct sfm_extension_record *ext;
528 bool ok = read_extension_record (r, subtype, &ext);
530 ll_push_tail (&r->var_attrs, &ext->ll);
533 else if (r->extensions[subtype] != NULL)
536 _("Record type 7, subtype %d found here has the same "
537 "type as the record found near offset 0x%llx. For "
538 "help, please send this file to %s and mention that "
539 "you were using %s."),
540 subtype, (long long int) r->extensions[subtype]->pos,
541 PACKAGE_BUGREPORT, PACKAGE_STRING);
542 return skip_extension_record (r, subtype);
545 return read_extension_record (r, subtype, &r->extensions[subtype]);
548 sys_error (r, r->pos, _("Unrecognized record type %d."), type);
555 /* Returns the character encoding obtained from R, or a null pointer if R
556 doesn't have an indication of its character encoding. */
558 sfm_get_encoding (const struct sfm_reader *r)
560 /* The EXT_ENCODING record is the best way to determine dictionary
562 if (r->extensions[EXT_ENCODING])
563 return r->extensions[EXT_ENCODING]->data;
565 /* But EXT_INTEGER is better than nothing as a fallback. */
566 if (r->extensions[EXT_INTEGER])
568 int codepage = parse_int (r, r->extensions[EXT_INTEGER]->data, 7 * 4);
569 const char *encoding;
578 /* These ostensibly mean "7-bit ASCII" and "8-bit ASCII"[sic]
579 respectively. However, many files have character code 2 but data
580 which are clearly not ASCII. Therefore, ignore these values. */
587 encoding = sys_get_encoding_from_codepage (codepage);
588 if (encoding != NULL)
594 /* If the file magic number is EBCDIC then its character data is too. */
595 if (!strcmp (r->header.magic, EBCDIC_MAGIC))
601 struct get_strings_aux
612 add_string__ (struct get_strings_aux *aux,
613 const char *string, bool id, char *title)
615 if (aux->n >= aux->allocated)
617 aux->allocated = 2 * (aux->allocated + 1);
618 aux->titles = pool_realloc (aux->pool, aux->titles,
619 aux->allocated * sizeof *aux->titles);
620 aux->strings = pool_realloc (aux->pool, aux->strings,
621 aux->allocated * sizeof *aux->strings);
622 aux->ids = pool_realloc (aux->pool, aux->ids,
623 aux->allocated * sizeof *aux->ids);
626 aux->titles[aux->n] = title;
627 aux->strings[aux->n] = pool_strdup (aux->pool, string);
628 aux->ids[aux->n] = id;
632 static void PRINTF_FORMAT (3, 4)
633 add_string (struct get_strings_aux *aux,
634 const char *string, const char *title, ...)
638 va_start (args, title);
639 add_string__ (aux, string, false, pool_vasprintf (aux->pool, title, args));
643 static void PRINTF_FORMAT (3, 4)
644 add_id (struct get_strings_aux *aux, const char *id, const char *title, ...)
648 va_start (args, title);
649 add_string__ (aux, id, true, pool_vasprintf (aux->pool, title, args));
653 /* Retrieves significant string data from R in its raw format, to allow the
654 caller to try to detect the encoding in use.
656 Returns the number of strings retrieved N. Sets each of *TITLESP, *IDSP,
657 and *STRINGSP to an array of N elements allocated from POOL. For each I in
658 0...N-1, UTF-8 string *TITLESP[I] describes *STRINGSP[I], which is in
659 whatever encoding system file R uses. *IDS[I] is true if *STRINGSP[I] must
660 be a valid PSPP language identifier, false if *STRINGSP[I] is free-form
663 sfm_get_strings (const struct any_reader *r_, struct pool *pool,
664 char ***titlesp, bool **idsp, char ***stringsp)
666 struct sfm_reader *r = sfm_reader_cast (r_);
667 const struct sfm_mrset *mrset;
668 struct get_strings_aux aux;
680 for (i = 0; i < r->n_vars; i++)
681 if (r->vars[i].width != -1)
682 add_id (&aux, r->vars[i].name, _("Variable %zu"), ++var_idx);
685 for (i = 0; i < r->n_vars; i++)
686 if (r->vars[i].width != -1)
689 if (r->vars[i].label)
690 add_string (&aux, r->vars[i].label, _("Variable %zu Label"),
695 for (i = 0; i < r->n_labels; i++)
696 for (j = 0; j < r->labels[i].n_labels; j++)
697 add_string (&aux, r->labels[i].labels[j].label,
698 _("Value Label %zu"), k++);
700 add_string (&aux, r->header.creation_date, _("Creation Date"));
701 add_string (&aux, r->header.creation_time, _("Creation Time"));
702 add_string (&aux, r->header.eye_catcher, _("Product"));
703 add_string (&aux, r->header.file_label, _("File Label"));
705 if (r->extensions[EXT_PRODUCT_INFO])
706 add_string (&aux, r->extensions[EXT_PRODUCT_INFO]->data,
707 _("Extra Product Info"));
713 for (i = 0; i < r->document->n_lines; i++)
717 memcpy (line, r->document->documents + i * 80, 80);
720 add_string (&aux, line, _("Document Line %zu"), i + 1);
724 for (mrset = r->mrsets; mrset < &r->mrsets[r->n_mrsets]; mrset++)
726 size_t mrset_idx = mrset - r->mrsets + 1;
728 add_id (&aux, mrset->name, _("MRSET %zu"), mrset_idx);
730 add_string (&aux, mrset->label, _("MRSET %zu Label"), mrset_idx);
732 /* Skip the variables because they ought to be duplicates. */
735 add_string (&aux, mrset->counted, _("MRSET %zu Counted Value"),
739 /* data file attributes */
740 /* variable attributes */
742 /* long string value labels */
743 /* long string missing values */
745 *titlesp = aux.titles;
747 *stringsp = aux.strings;
751 /* Decodes the dictionary read from R, saving it into into *DICT. Character
752 strings in R are decoded using ENCODING, or an encoding obtained from R if
753 ENCODING is null, or the locale encoding if R specifies no encoding.
755 If INFOP is non-null, then it receives additional info about the system
756 file, which the caller must eventually free with any_read_info_destroy()
757 when it is no longer needed.
759 This function consumes R. The caller must use it again later, even to
760 destroy it with sfm_close(). */
761 static struct casereader *
762 sfm_decode (struct any_reader *r_, const char *encoding,
763 struct dictionary **dictp, struct any_read_info *infop)
765 struct sfm_reader *r = sfm_reader_cast (r_);
766 struct dictionary *dict;
769 if (encoding == NULL)
771 encoding = sfm_get_encoding (r);
772 if (encoding == NULL)
774 sys_warn (r, -1, _("This system file does not indicate its own "
775 "character encoding. Using default encoding "
776 "%s. For best results, specify an encoding "
777 "explicitly. Use SYSFILE INFO with "
778 "ENCODING=\"DETECT\" to analyze the possible "
781 encoding = locale_charset ();
785 dict = dict_create (encoding);
786 r->encoding = dict_get_encoding (dict);
788 /* These records don't use variables at all. */
789 if (r->document != NULL)
790 parse_document (dict, r->document);
792 if (r->extensions[EXT_INTEGER] != NULL
793 && !parse_machine_integer_info (r, r->extensions[EXT_INTEGER], &r->info))
796 if (r->extensions[EXT_FLOAT] != NULL)
797 parse_machine_float_info (r, r->extensions[EXT_FLOAT]);
799 if (r->extensions[EXT_PRODUCT_INFO] != NULL)
800 parse_extra_product_info (r, r->extensions[EXT_PRODUCT_INFO], &r->info);
802 if (r->extensions[EXT_FILE_ATTRS] != NULL)
803 parse_data_file_attributes (r, r->extensions[EXT_FILE_ATTRS], dict);
805 parse_header (r, &r->header, &r->info, dict);
807 /* Parse the variable records, the basis of almost everything else. */
808 if (!parse_variable_records (r, dict, r->vars, r->n_vars))
811 /* Parse value labels and the weight variable immediately after the variable
812 records. These records use indexes into var_recs[], so we must parse them
813 before those indexes become invalidated by very long string variables. */
814 for (i = 0; i < r->n_labels; i++)
815 if (!parse_value_labels (r, dict, r->vars, r->n_vars, &r->labels[i]))
817 if (r->header.weight_idx != 0)
819 struct variable *weight_var;
821 weight_var = lookup_var_by_index (r, 76, r->vars, r->n_vars,
822 r->header.weight_idx);
823 if (weight_var != NULL)
825 if (var_is_numeric (weight_var))
826 dict_set_weight (dict, weight_var);
828 sys_warn (r, -1, _("Ignoring string variable `%s' set "
829 "as weighting variable."),
830 var_get_name (weight_var));
834 if (r->extensions[EXT_DISPLAY] != NULL)
835 parse_display_parameters (r, r->extensions[EXT_DISPLAY], dict);
837 /* The following records use short names, so they need to be parsed before
838 parse_long_var_name_map() changes short names to long names. */
839 decode_mrsets (r, dict);
841 if (r->extensions[EXT_LONG_STRINGS] != NULL
842 && !parse_long_string_map (r, r->extensions[EXT_LONG_STRINGS], dict))
845 /* Now rename variables to their long names. */
846 parse_long_var_name_map (r, r->extensions[EXT_LONG_NAMES], dict);
848 /* The following records use long names, so they need to follow renaming. */
849 if (!ll_is_empty (&r->var_attrs))
851 struct sfm_extension_record *ext;
852 ll_for_each (ext, struct sfm_extension_record, ll, &r->var_attrs)
853 parse_variable_attributes (r, ext, dict);
855 /* Roles use the $@Role attribute. */
856 assign_variable_roles (r, dict);
858 if (r->extensions[EXT_LONG_LABELS] != NULL)
859 parse_long_string_value_labels (r, r->extensions[EXT_LONG_LABELS], dict);
860 if (r->extensions[EXT_LONG_MISSING] != NULL)
861 parse_long_string_missing_values (r, r->extensions[EXT_LONG_MISSING],
864 /* Warn if the actual amount of data per case differs from the
865 amount that the header claims. SPSS version 13 gets this
866 wrong when very long strings are involved, so don't warn in
868 if (r->header.nominal_case_size > 0
869 && r->header.nominal_case_size != r->n_vars
870 && r->info.version_major != 13)
871 sys_warn (r, -1, _("File header claims %d variable positions but "
872 "%zu were read from file."),
873 r->header.nominal_case_size, r->n_vars);
875 /* Create an index of dictionary variable widths for
876 sfm_read_case to use. We cannot use the `struct variable's
877 from the dictionary we created, because the caller owns the
878 dictionary and may destroy or modify its variables. */
879 sfm_dictionary_to_sfm_vars (dict, &r->sfm_vars, &r->sfm_var_cnt);
880 pool_register (r->pool, free, r->sfm_vars);
881 r->proto = caseproto_ref_pool (dict_get_proto (dict), r->pool);
887 memset (&r->info, 0, sizeof r->info);
890 return casereader_create_sequential
892 r->case_cnt == -1 ? CASENUMBER_MAX: r->case_cnt,
893 &sys_file_casereader_class, r);
902 /* Closes R, which should have been returned by sfm_open() but not already
903 closed with sfm_decode() or this function.
904 Returns true if an I/O error has occurred on READER, false
907 sfm_close (struct any_reader *r_)
909 struct sfm_reader *r = sfm_reader_cast (r_);
914 int error = fbuf_close (r->fbuf);
917 msg (ME, _("Error closing system file `%s': %s."),
918 fh_get_file_name (r->fh), strerror (error));
924 any_read_info_destroy (&r->info);
929 pool_destroy (r->pool);
934 /* Destroys READER. */
936 sys_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
938 struct sfm_reader *r = r_;
939 sfm_close (&r->any_reader);
942 /* Detects whether FILE is an SPSS system file. Returns 1 if so, 0 if not, and
943 a negative errno value if there is an error reading FILE. */
945 sfm_detect (FILE *file)
949 if (fseek (file, 0, SEEK_SET) != 0)
951 if (fread (magic, 4, 1, file) != 1)
952 return ferror (file) ? -errno : 0;
955 return (!strcmp (ASCII_MAGIC, magic)
956 || !strcmp (ASCII_ZMAGIC, magic)
957 || !strcmp (EBCDIC_MAGIC, magic));
960 /* Reads the global header of the system file. Initializes *HEADER and *INFO,
961 except for the string fields in *INFO, which parse_header() will initialize
962 later once the file's encoding is known. */
964 read_header (struct sfm_reader *r, struct any_read_info *info,
965 struct sfm_header_record *header)
967 uint8_t raw_layout_code[4];
972 if (!read_string (r, header->magic, sizeof header->magic)
973 || !read_string (r, header->eye_catcher, sizeof header->eye_catcher))
975 r->written_by_readstat = strstr (header->eye_catcher,
976 "https://github.com/WizardMac/ReadStat");
978 if (!strcmp (ASCII_MAGIC, header->magic)
979 || !strcmp (EBCDIC_MAGIC, header->magic))
981 else if (!strcmp (ASCII_ZMAGIC, header->magic))
985 sys_error (r, 0, _("This is not an SPSS system file."));
989 /* Identify integer format. */
990 if (!read_bytes (r, raw_layout_code, sizeof raw_layout_code))
992 if ((!integer_identify (2, raw_layout_code, sizeof raw_layout_code,
994 && !integer_identify (3, raw_layout_code, sizeof raw_layout_code,
996 || (r->integer_format != INTEGER_MSB_FIRST
997 && r->integer_format != INTEGER_LSB_FIRST))
999 sys_error (r, 64, _("This is not an SPSS system file."));
1003 if (!read_int (r, &header->nominal_case_size))
1006 if (header->nominal_case_size < 0
1007 || header->nominal_case_size > INT_MAX / 16)
1008 header->nominal_case_size = -1;
1010 if (!read_int (r, &compressed))
1014 if (compressed == 0)
1015 r->compression = ANY_COMP_NONE;
1016 else if (compressed == 1)
1017 r->compression = ANY_COMP_SIMPLE;
1018 else if (compressed != 0)
1020 sys_error (r, 0, "System file header has invalid compression "
1021 "value %d.", compressed);
1027 if (compressed == 2)
1028 r->compression = ANY_COMP_ZLIB;
1031 sys_error (r, 0, "ZLIB-compressed system file header has invalid "
1032 "compression value %d.", compressed);
1037 if (!read_int (r, &header->weight_idx))
1040 if (!read_int (r, &r->case_cnt))
1042 if ( r->case_cnt > INT_MAX / 2)
1045 /* Identify floating-point format and obtain compression bias. */
1046 if (!read_bytes (r, raw_bias, sizeof raw_bias))
1048 if (float_identify (100.0, raw_bias, sizeof raw_bias, &r->float_format) == 0)
1050 uint8_t zero_bias[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
1052 if (memcmp (raw_bias, zero_bias, 8))
1053 sys_warn (r, r->pos - 8,
1054 _("Compression bias is not the usual "
1055 "value of 100, or system file uses unrecognized "
1056 "floating-point format."));
1059 /* Some software is known to write all-zeros to this
1060 field. Such software also writes floating-point
1061 numbers in the format that we expect by default
1062 (it seems that all software most likely does, in
1063 reality), so don't warn in this case. */
1066 if (r->integer_format == INTEGER_MSB_FIRST)
1067 r->float_format = FLOAT_IEEE_DOUBLE_BE;
1069 r->float_format = FLOAT_IEEE_DOUBLE_LE;
1071 float_convert (r->float_format, raw_bias, FLOAT_NATIVE_DOUBLE, &r->bias);
1073 if (!read_string (r, header->creation_date, sizeof header->creation_date)
1074 || !read_string (r, header->creation_time, sizeof header->creation_time)
1075 || !read_string (r, header->file_label, sizeof header->file_label)
1076 || !skip_bytes (r, 3))
1079 info->integer_format = r->integer_format;
1080 info->float_format = r->float_format;
1081 info->compression = r->compression;
1082 info->case_cnt = r->case_cnt;
1087 /* Reads a variable (type 2) record from R into RECORD. */
1089 read_variable_record (struct sfm_reader *r, struct sfm_var_record *record)
1091 int has_variable_label;
1093 memset (record, 0, sizeof *record);
1095 record->pos = r->pos;
1096 if (!read_int (r, &record->width)
1097 || !read_int (r, &has_variable_label)
1098 || !read_int (r, &record->missing_value_code)
1099 || !read_int (r, &record->print_format)
1100 || !read_int (r, &record->write_format)
1101 || !read_string (r, record->name, sizeof record->name))
1104 if (has_variable_label == 1)
1106 enum { MAX_LABEL_LEN = 65536 };
1107 unsigned int len, read_len;
1109 if (!read_uint (r, &len))
1112 /* Read up to MAX_LABEL_LEN bytes of label. */
1113 read_len = MIN (MAX_LABEL_LEN, len);
1114 record->label = pool_malloc (r->pool, read_len + 1);
1115 if (!read_string (r, record->label, read_len + 1))
1118 /* Skip unread label bytes. */
1119 if (!skip_bytes (r, len - read_len))
1122 /* Skip label padding up to multiple of 4 bytes. */
1123 if (!skip_bytes (r, ROUND_UP (len, 4) - len))
1126 else if (has_variable_label != 0)
1128 sys_error (r, record->pos,
1129 _("Variable label indicator field is not 0 or 1."));
1133 /* Set missing values. */
1134 if (record->missing_value_code != 0)
1136 int code = record->missing_value_code;
1137 if (record->width == 0)
1139 if (code < -3 || code > 3 || code == -1)
1141 sys_error (r, record->pos,
1142 _("Numeric missing value indicator field is not "
1143 "-3, -2, 0, 1, 2, or 3."));
1149 if (code < 1 || code > 3)
1151 sys_error (r, record->pos,
1152 _("String missing value indicator field is not "
1158 if (!read_bytes (r, record->missing, 8 * abs (code)))
1165 /* Reads value labels from R into RECORD. */
1167 read_value_label_record (struct sfm_reader *r,
1168 struct sfm_value_label_record *record)
1173 /* Read type 3 record. */
1174 record->pos = r->pos;
1175 if (!read_uint (r, &record->n_labels))
1177 if (record->n_labels > UINT_MAX / sizeof *record->labels)
1179 sys_error (r, r->pos - 4, _("Invalid number of labels %u."),
1183 record->labels = pool_nmalloc (r->pool, record->n_labels,
1184 sizeof *record->labels);
1185 for (i = 0; i < record->n_labels; i++)
1187 struct sfm_value_label *label = &record->labels[i];
1188 unsigned char label_len;
1191 if (!read_bytes (r, label->value, sizeof label->value))
1194 /* Read label length. */
1195 if (!read_bytes (r, &label_len, sizeof label_len))
1197 padded_len = ROUND_UP (label_len + 1, 8);
1199 /* Read label, padding. */
1200 label->label = pool_malloc (r->pool, padded_len + 1);
1201 if (!read_bytes (r, label->label, padded_len - 1))
1203 label->label[label_len] = '\0';
1206 /* Read record type of type 4 record. */
1207 if (!read_int (r, &type))
1211 sys_error (r, r->pos - 4,
1212 _("Variable index record (type 4) does not immediately "
1213 "follow value label record (type 3) as it should."));
1217 /* Read number of variables associated with value label from type 4
1219 if (!read_uint (r, &record->n_vars))
1221 if (record->n_vars < 1 || record->n_vars > r->n_vars)
1223 sys_error (r, r->pos - 4,
1224 _("Number of variables associated with a value label (%u) "
1225 "is not between 1 and the number of variables (%zu)."),
1226 record->n_vars, r->n_vars);
1230 record->vars = pool_nmalloc (r->pool, record->n_vars, sizeof *record->vars);
1231 for (i = 0; i < record->n_vars; i++)
1232 if (!read_int (r, &record->vars[i]))
1238 /* Reads a document record from R. Returns true if successful, false on
1241 read_document_record (struct sfm_reader *r)
1244 if (!read_int (r, &n_lines))
1246 else if (n_lines == 0)
1248 else if (n_lines < 0 || n_lines >= INT_MAX / DOC_LINE_LENGTH)
1250 sys_error (r, r->pos,
1251 _("Number of document lines (%d) "
1252 "must be greater than 0 and less than %d."),
1253 n_lines, INT_MAX / DOC_LINE_LENGTH);
1257 struct sfm_document_record *record;
1258 record = pool_malloc (r->pool, sizeof *record);
1259 record->pos = r->pos;
1260 record->n_lines = n_lines;
1261 record->documents = pool_malloc (r->pool, DOC_LINE_LENGTH * n_lines);
1262 if (!read_bytes (r, record->documents, DOC_LINE_LENGTH * n_lines))
1265 r->document = record;
1270 read_extension_record_header (struct sfm_reader *r, int subtype,
1271 struct sfm_extension_record *record)
1273 record->subtype = subtype;
1274 record->pos = r->pos;
1275 if (!read_uint (r, &record->size) || !read_uint (r, &record->count))
1278 /* Check that SIZE * COUNT + 1 doesn't overflow. Adding 1
1279 allows an extra byte for a null terminator, used by some
1280 extension processing routines. */
1281 if (record->size != 0
1282 && xsum (1, xtimes (record->count, record->size)) >= UINT_MAX)
1284 sys_error (r, record->pos, "Record type 7 subtype %d too large.",
1292 /* Reads an extension record from R into RECORD. */
1294 read_extension_record (struct sfm_reader *r, int subtype,
1295 struct sfm_extension_record **recordp)
1297 struct extension_record_type
1304 static const struct extension_record_type types[] =
1306 /* Implemented record types. */
1307 { EXT_INTEGER, 4, 8 },
1308 { EXT_FLOAT, 8, 3 },
1309 { EXT_MRSETS, 1, 0 },
1310 { EXT_PRODUCT_INFO, 1, 0 },
1311 { EXT_DISPLAY, 4, 0 },
1312 { EXT_LONG_NAMES, 1, 0 },
1313 { EXT_LONG_STRINGS, 1, 0 },
1314 { EXT_NCASES, 8, 2 },
1315 { EXT_FILE_ATTRS, 1, 0 },
1316 { EXT_VAR_ATTRS, 1, 0 },
1317 { EXT_MRSETS2, 1, 0 },
1318 { EXT_ENCODING, 1, 0 },
1319 { EXT_LONG_LABELS, 1, 0 },
1320 { EXT_LONG_MISSING, 1, 0 },
1322 /* Ignored record types. */
1323 { EXT_VAR_SETS, 0, 0 },
1325 { EXT_DATA_ENTRY, 0, 0 },
1326 { EXT_DATAVIEW, 0, 0 },
1329 const struct extension_record_type *type;
1330 struct sfm_extension_record *record;
1334 record = pool_malloc (r->pool, sizeof *record);
1335 if (!read_extension_record_header (r, subtype, record))
1337 n_bytes = record->count * record->size;
1339 for (type = types; type < &types[sizeof types / sizeof *types]; type++)
1340 if (subtype == type->subtype)
1342 if (type->size > 0 && record->size != type->size)
1343 sys_warn (r, record->pos,
1344 _("Record type 7, subtype %d has bad size %u "
1345 "(expected %d)."), subtype, record->size, type->size);
1346 else if (type->count > 0 && record->count != type->count)
1347 sys_warn (r, record->pos,
1348 _("Record type 7, subtype %d has bad count %u "
1349 "(expected %d)."), subtype, record->count, type->count);
1350 else if (type->count == 0 && type->size == 0)
1352 /* Ignore this record. */
1356 char *data = pool_malloc (r->pool, n_bytes + 1);
1357 data[n_bytes] = '\0';
1359 record->data = data;
1360 if (!read_bytes (r, record->data, n_bytes))
1369 sys_warn (r, record->pos,
1370 _("Unrecognized record type 7, subtype %d. For help, please "
1371 "send this file to %s and mention that you were using %s."),
1372 subtype, PACKAGE_BUGREPORT, PACKAGE_STRING);
1375 return skip_bytes (r, n_bytes);
1379 skip_extension_record (struct sfm_reader *r, int subtype)
1381 struct sfm_extension_record record;
1383 return (read_extension_record_header (r, subtype, &record)
1384 && skip_bytes (r, record.count * record.size));
1388 parse_header (struct sfm_reader *r, const struct sfm_header_record *header,
1389 struct any_read_info *info, struct dictionary *dict)
1391 const char *dict_encoding = dict_get_encoding (dict);
1392 struct substring product;
1393 struct substring label;
1396 /* Convert file label to UTF-8 and put it into DICT. */
1397 label = recode_substring_pool ("UTF-8", dict_encoding,
1398 ss_cstr (header->file_label), r->pool);
1399 ss_trim (&label, ss_cstr (" "));
1400 label.string[label.length] = '\0';
1401 fixed_label = fix_line_ends (label.string);
1402 dict_set_label (dict, fixed_label);
1405 /* Put creation date and time in UTF-8 into INFO. */
1406 info->creation_date = recode_string ("UTF-8", dict_encoding,
1407 header->creation_date, -1);
1408 info->creation_time = recode_string ("UTF-8", dict_encoding,
1409 header->creation_time, -1);
1411 /* Put product name into INFO, dropping eye-catcher string if present. */
1412 product = recode_substring_pool ("UTF-8", dict_encoding,
1413 ss_cstr (header->eye_catcher), r->pool);
1414 ss_match_string (&product, ss_cstr ("@(#) SPSS DATA FILE"));
1415 ss_trim (&product, ss_cstr (" "));
1416 info->product = ss_xstrdup (product);
1419 /* Reads a variable (type 2) record from R and adds the
1420 corresponding variable to DICT.
1421 Also skips past additional variable records for long string
1424 parse_variable_records (struct sfm_reader *r, struct dictionary *dict,
1425 struct sfm_var_record *var_recs, size_t n_var_recs)
1427 const char *dict_encoding = dict_get_encoding (dict);
1428 struct sfm_var_record *rec;
1431 for (rec = var_recs; rec < &var_recs[n_var_recs]; )
1433 struct variable *var;
1438 name = recode_string_pool ("UTF-8", dict_encoding,
1439 rec->name, -1, r->pool);
1440 name[strcspn (name, " ")] = '\0';
1442 if (!dict_id_is_valid (dict, name, false)
1443 || name[0] == '$' || name[0] == '#')
1445 sys_error (r, rec->pos, _("Invalid variable name `%s'."), name);
1449 if (rec->width < 0 || rec->width > 255)
1451 sys_error (r, rec->pos,
1452 _("Bad width %d for variable %s."), rec->width, name);
1456 var = rec->var = dict_create_var (dict, name, rec->width);
1459 char *new_name = dict_make_unique_var_name (dict, NULL, NULL);
1460 sys_warn (r, rec->pos, _("Renaming variable with duplicate name "
1463 var = rec->var = dict_create_var_assert (dict, new_name, rec->width);
1464 var_set_short_name (var, 0, new_name);
1468 /* Set the short name the same as the long name (even if we renamed
1470 var_set_short_name (var, 0, var_get_name (var));
1472 /* Get variable label, if any. */
1477 utf8_label = recode_string_pool ("UTF-8", dict_encoding,
1478 rec->label, -1, r->pool);
1479 var_set_label (var, utf8_label);
1482 /* Set missing values. */
1483 if (rec->missing_value_code != 0)
1485 int width = var_get_width (var);
1486 struct missing_values mv;
1488 mv_init_pool (r->pool, &mv, width);
1489 if (var_is_numeric (var))
1491 bool has_range = rec->missing_value_code < 0;
1492 int n_discrete = (has_range
1493 ? rec->missing_value_code == -3
1494 : rec->missing_value_code);
1499 double low = parse_float (r, rec->missing, 0);
1500 double high = parse_float (r, rec->missing, 8);
1502 /* Deal with SPSS 21 change in representation. */
1506 mv_add_range (&mv, low, high);
1510 for (i = 0; i < n_discrete; i++)
1512 mv_add_num (&mv, parse_float (r, rec->missing, ofs));
1517 for (i = 0; i < rec->missing_value_code; i++)
1518 mv_add_str (&mv, rec->missing + 8 * i, MIN (width, 8));
1519 var_set_missing_values (var, &mv);
1523 parse_format_spec (r, rec->pos + 12, rec->print_format,
1524 PRINT_FORMAT, var, &n_warnings);
1525 parse_format_spec (r, rec->pos + 16, rec->write_format,
1526 WRITE_FORMAT, var, &n_warnings);
1528 /* Account for values.
1529 Skip long string continuation records, if any. */
1530 n_values = rec->width == 0 ? 1 : DIV_RND_UP (rec->width, 8);
1531 for (i = 1; i < n_values; i++)
1532 if (i + (rec - var_recs) >= n_var_recs || rec[i].width != -1)
1534 sys_error (r, rec->pos, _("Missing string continuation record."));
1543 /* Translates the format spec from sysfile format to internal
1546 parse_format_spec (struct sfm_reader *r, off_t pos, unsigned int format,
1547 enum which_format which, struct variable *v,
1550 const int max_warnings = 8;
1551 uint8_t raw_type = format >> 16;
1552 uint8_t w = format >> 8;
1561 ok = (fmt_from_io (raw_type, &f.type)
1562 && fmt_check_output (&f)
1563 && fmt_check_width_compat (&f, var_get_width (v)));
1568 if (which == PRINT_FORMAT)
1569 var_set_print_format (v, &f);
1571 var_set_write_format (v, &f);
1573 else if (format == 0)
1575 /* Actually observed in the wild. No point in warning about it. */
1577 else if (++*n_warnings <= max_warnings)
1579 if (which == PRINT_FORMAT)
1580 sys_warn (r, pos, _("Variable %s with width %d has invalid print "
1582 var_get_name (v), var_get_width (v), format);
1584 sys_warn (r, pos, _("Variable %s with width %d has invalid write "
1586 var_get_name (v), var_get_width (v), format);
1588 if (*n_warnings == max_warnings)
1589 sys_warn (r, -1, _("Suppressing further invalid format warnings."));
1594 parse_document (struct dictionary *dict, struct sfm_document_record *record)
1598 for (p = record->documents;
1599 p < record->documents + DOC_LINE_LENGTH * record->n_lines;
1600 p += DOC_LINE_LENGTH)
1602 struct substring line;
1604 line = recode_substring_pool ("UTF-8", dict_get_encoding (dict),
1605 ss_buffer (p, DOC_LINE_LENGTH), NULL);
1606 ss_rtrim (&line, ss_cstr (" "));
1607 line.string[line.length] = '\0';
1609 dict_add_document_line (dict, line.string, false);
1615 /* Parses record type 7, subtype 3. */
1617 parse_machine_integer_info (struct sfm_reader *r,
1618 const struct sfm_extension_record *record,
1619 struct any_read_info *info)
1621 int float_representation, expected_float_format;
1622 int integer_representation, expected_integer_format;
1624 /* Save version info. */
1625 info->version_major = parse_int (r, record->data, 0);
1626 info->version_minor = parse_int (r, record->data, 4);
1627 info->version_revision = parse_int (r, record->data, 8);
1629 /* Check floating point format. */
1630 float_representation = parse_int (r, record->data, 16);
1631 if (r->float_format == FLOAT_IEEE_DOUBLE_BE
1632 || r->float_format == FLOAT_IEEE_DOUBLE_LE)
1633 expected_float_format = 1;
1634 else if (r->float_format == FLOAT_Z_LONG)
1635 expected_float_format = 2;
1636 else if (r->float_format == FLOAT_VAX_G || r->float_format == FLOAT_VAX_D)
1637 expected_float_format = 3;
1640 if (float_representation != expected_float_format)
1642 sys_error (r, record->pos,
1643 _("Floating-point representation indicated by "
1644 "system file (%d) differs from expected (%d)."),
1645 float_representation, expected_float_format);
1649 /* Check integer format. */
1650 integer_representation = parse_int (r, record->data, 24);
1651 if (r->integer_format == INTEGER_MSB_FIRST)
1652 expected_integer_format = 1;
1653 else if (r->integer_format == INTEGER_LSB_FIRST)
1654 expected_integer_format = 2;
1657 if (integer_representation != expected_integer_format)
1658 sys_warn (r, record->pos,
1659 _("Integer format indicated by system file (%d) "
1660 "differs from expected (%d)."),
1661 integer_representation, expected_integer_format);
1666 /* Parses record type 7, subtype 4. */
1668 parse_machine_float_info (struct sfm_reader *r,
1669 const struct sfm_extension_record *record)
1671 double sysmis = parse_float (r, record->data, 0);
1672 double highest = parse_float (r, record->data, 8);
1673 double lowest = parse_float (r, record->data, 16);
1675 if (sysmis != SYSMIS)
1676 sys_warn (r, record->pos,
1677 _("File specifies unexpected value %g (%a) as %s, "
1678 "instead of %g (%a)."),
1679 sysmis, sysmis, "SYSMIS", SYSMIS, SYSMIS);
1681 if (highest != HIGHEST)
1682 sys_warn (r, record->pos,
1683 _("File specifies unexpected value %g (%a) as %s, "
1684 "instead of %g (%a)."),
1685 highest, highest, "HIGHEST", HIGHEST, HIGHEST);
1687 /* SPSS before version 21 used a unique value just bigger than SYSMIS as
1688 LOWEST. SPSS 21 uses SYSMIS for LOWEST, which is OK because LOWEST only
1689 appears in a context (missing values) where SYSMIS cannot. */
1690 if (lowest != LOWEST && lowest != SYSMIS)
1691 sys_warn (r, record->pos,
1692 _("File specifies unexpected value %g (%a) as %s, "
1693 "instead of %g (%a) or %g (%a)."),
1694 lowest, lowest, "LOWEST", LOWEST, LOWEST, SYSMIS, SYSMIS);
1697 /* Parses record type 7, subtype 10. */
1699 parse_extra_product_info (struct sfm_reader *r,
1700 const struct sfm_extension_record *record,
1701 struct any_read_info *info)
1703 struct text_record *text;
1705 text = open_text_record (r, record, true);
1706 info->product_ext = fix_line_ends (text_get_all (text));
1707 close_text_record (r, text);
1710 /* Parses record type 7, subtype 7 or 19. */
1712 parse_mrsets (struct sfm_reader *r, const struct sfm_extension_record *record,
1713 size_t *allocated_mrsets)
1715 struct text_record *text;
1717 text = open_text_record (r, record, false);
1720 struct sfm_mrset *mrset;
1721 size_t allocated_vars;
1724 /* Skip extra line feeds if present. */
1725 while (text_match (text, '\n'))
1728 if (r->n_mrsets >= *allocated_mrsets)
1729 r->mrsets = pool_2nrealloc (r->pool, r->mrsets, allocated_mrsets,
1731 mrset = &r->mrsets[r->n_mrsets];
1732 memset(mrset, 0, sizeof *mrset);
1734 mrset->name = text_get_token (text, ss_cstr ("="), NULL);
1735 if (mrset->name == NULL)
1738 if (text_match (text, 'C'))
1740 mrset->type = MRSET_MC;
1741 if (!text_match (text, ' '))
1743 sys_warn (r, record->pos,
1744 _("Missing space following `%c' at offset %zu "
1745 "in MRSETS record."), 'C', text_pos (text));
1749 else if (text_match (text, 'D'))
1751 mrset->type = MRSET_MD;
1752 mrset->cat_source = MRSET_VARLABELS;
1754 else if (text_match (text, 'E'))
1758 mrset->type = MRSET_MD;
1759 mrset->cat_source = MRSET_COUNTEDVALUES;
1760 if (!text_match (text, ' '))
1762 sys_warn (r, record->pos,
1763 _("Missing space following `%c' at offset %zu "
1764 "in MRSETS record."), 'E', text_pos (text));
1768 number = text_get_token (text, ss_cstr (" "), NULL);
1769 if (!strcmp (number, "11"))
1770 mrset->label_from_var_label = true;
1771 else if (strcmp (number, "1"))
1772 sys_warn (r, record->pos,
1773 _("Unexpected label source value following `E' "
1774 "at offset %zu in MRSETS record."),
1779 sys_warn (r, record->pos,
1780 _("Missing `C', `D', or `E' at offset %zu "
1781 "in MRSETS record."),
1786 if (mrset->type == MRSET_MD)
1788 mrset->counted = text_parse_counted_string (r, text);
1789 if (mrset->counted == NULL)
1793 mrset->label = text_parse_counted_string (r, text);
1794 if (mrset->label == NULL)
1802 var = text_get_token (text, ss_cstr (" \n"), &delimiter);
1805 if (delimiter != '\n')
1806 sys_warn (r, record->pos,
1807 _("Missing new-line parsing variable names "
1808 "at offset %zu in MRSETS record."),
1813 if (mrset->n_vars >= allocated_vars)
1814 mrset->vars = pool_2nrealloc (r->pool, mrset->vars,
1816 sizeof *mrset->vars);
1817 mrset->vars[mrset->n_vars++] = var;
1819 while (delimiter != '\n');
1823 close_text_record (r, text);
1827 decode_mrsets (struct sfm_reader *r, struct dictionary *dict)
1829 const struct sfm_mrset *s;
1831 for (s = r->mrsets; s < &r->mrsets[r->n_mrsets]; s++)
1833 struct stringi_set var_names;
1834 struct mrset *mrset;
1839 name = recode_string ("UTF-8", r->encoding, s->name, -1);
1840 if (!mrset_is_valid_name (name, dict_get_encoding (dict), false))
1842 sys_warn (r, -1, _("Invalid multiple response set name `%s'."),
1848 mrset = xzalloc (sizeof *mrset);
1850 mrset->type = s->type;
1851 mrset->cat_source = s->cat_source;
1852 mrset->label_from_var_label = s->label_from_var_label;
1853 if (s->label[0] != '\0')
1854 mrset->label = recode_string ("UTF-8", r->encoding, s->label, -1);
1856 stringi_set_init (&var_names);
1857 mrset->vars = xmalloc (s->n_vars * sizeof *mrset->vars);
1859 for (i = 0; i < s->n_vars; i++)
1861 struct variable *var;
1864 var_name = recode_string ("UTF-8", r->encoding, s->vars[i], -1);
1866 var = dict_lookup_var (dict, var_name);
1872 if (!stringi_set_insert (&var_names, var_name))
1875 _("MRSET %s contains duplicate variable name %s."),
1876 mrset->name, var_name);
1882 if (mrset->label == NULL && mrset->label_from_var_label
1883 && var_has_label (var))
1884 mrset->label = xstrdup (var_get_label (var));
1887 && var_get_type (var) != var_get_type (mrset->vars[0]))
1890 _("MRSET %s contains both string and "
1891 "numeric variables."), mrset->name);
1894 width = MIN (width, var_get_width (var));
1896 mrset->vars[mrset->n_vars++] = var;
1899 if (mrset->n_vars < 2)
1901 if (mrset->n_vars == 0)
1902 sys_warn (r, -1, _("MRSET %s has no variables."), mrset->name);
1904 sys_warn (r, -1, _("MRSET %s has only one variable."),
1906 mrset_destroy (mrset);
1907 stringi_set_destroy (&var_names);
1911 if (mrset->type == MRSET_MD)
1913 mrset->width = width;
1914 value_init (&mrset->counted, width);
1916 mrset->counted.f = c_strtod (s->counted, NULL);
1918 value_copy_str_rpad (&mrset->counted, width,
1919 (const uint8_t *) s->counted, ' ');
1922 dict_add_mrset (dict, mrset);
1923 stringi_set_destroy (&var_names);
1927 /* Read record type 7, subtype 11, which specifies how variables
1928 should be displayed in GUI environments. */
1930 parse_display_parameters (struct sfm_reader *r,
1931 const struct sfm_extension_record *record,
1932 struct dictionary *dict)
1934 bool includes_width;
1935 bool warned = false;
1940 n_vars = dict_get_var_cnt (dict);
1941 if (record->count == 3 * n_vars)
1942 includes_width = true;
1943 else if (record->count == 2 * n_vars)
1944 includes_width = false;
1947 sys_warn (r, record->pos,
1948 _("Extension 11 has bad count %u (for %zu variables)."),
1949 record->count, n_vars);
1954 for (i = 0; i < n_vars; ++i)
1956 struct variable *v = dict_get_var (dict, i);
1957 int measure, width, align;
1959 measure = parse_int (r, record->data, ofs);
1964 width = parse_int (r, record->data, ofs);
1970 align = parse_int (r, record->data, ofs);
1973 /* SPSS sometimes seems to set variables' measure to zero. */
1977 if (measure < 1 || measure > 3 || align < 0 || align > 2)
1980 sys_warn (r, record->pos,
1981 _("Invalid variable display parameters for variable "
1982 "%zu (%s). Default parameters substituted."),
1983 i, var_get_name (v));
1988 var_set_measure (v, (measure == 1 ? MEASURE_NOMINAL
1989 : measure == 2 ? MEASURE_ORDINAL
1991 var_set_alignment (v, (align == 0 ? ALIGN_LEFT
1992 : align == 1 ? ALIGN_RIGHT
1995 /* Older versions (SPSS 9.0) sometimes set the display
1996 width to zero. This causes confusion in the GUI, so
1997 only set the width if it is nonzero. */
1999 var_set_display_width (v, width);
2004 rename_var_and_save_short_names (struct sfm_reader *r, off_t pos,
2005 struct dictionary *dict,
2006 struct variable *var, const char *new_name)
2008 size_t n_short_names;
2012 /* Renaming a variable may clear its short names, but we
2013 want to retain them, so we save them and re-set them
2015 n_short_names = var_get_short_name_cnt (var);
2016 short_names = xnmalloc (n_short_names, sizeof *short_names);
2017 for (i = 0; i < n_short_names; i++)
2019 const char *s = var_get_short_name (var, i);
2020 short_names[i] = s != NULL ? xstrdup (s) : NULL;
2023 /* Set long name. */
2024 if (!dict_try_rename_var (dict, var, new_name))
2025 sys_warn (r, pos, _("Duplicate long variable name `%s'."), new_name);
2027 /* Restore short names. */
2028 for (i = 0; i < n_short_names; i++)
2030 var_set_short_name (var, i, short_names[i]);
2031 free (short_names[i]);
2036 /* Parses record type 7, subtype 13, which gives the long name that corresponds
2037 to each short name. Modifies variable names in DICT accordingly. */
2039 parse_long_var_name_map (struct sfm_reader *r,
2040 const struct sfm_extension_record *record,
2041 struct dictionary *dict)
2043 struct text_record *text;
2044 struct variable *var;
2049 /* There are no long variable names. Use the short variable names,
2050 converted to lowercase, as the long variable names. */
2053 for (i = 0; i < dict_get_var_cnt (dict); i++)
2055 struct variable *var = dict_get_var (dict, i);
2058 new_name = utf8_to_lower (var_get_name (var));
2059 rename_var_and_save_short_names (r, -1, dict, var, new_name);
2066 /* Rename each of the variables, one by one. (In a correctly constructed
2067 system file, this cannot create any intermediate duplicate variable names,
2068 because all of the new variable names are longer than any of the old
2069 variable names and thus there cannot be any overlaps.) */
2070 text = open_text_record (r, record, true);
2071 while (read_variable_to_value_pair (r, dict, text, &var, &long_name))
2073 /* Validate long name. */
2074 if (!dict_id_is_valid (dict, long_name, false)
2075 || long_name[0] == '$' || long_name[0] == '#')
2077 sys_warn (r, record->pos,
2078 _("Long variable mapping from %s to invalid "
2079 "variable name `%s'."),
2080 var_get_name (var), long_name);
2084 rename_var_and_save_short_names (r, record->pos, dict, var, long_name);
2086 close_text_record (r, text);
2089 /* Reads record type 7, subtype 14, which gives the real length
2090 of each very long string. Rearranges DICT accordingly. */
2092 parse_long_string_map (struct sfm_reader *r,
2093 const struct sfm_extension_record *record,
2094 struct dictionary *dict)
2096 struct text_record *text;
2097 struct variable *var;
2100 text = open_text_record (r, record, true);
2101 while (read_variable_to_value_pair (r, dict, text, &var, &length_s))
2103 size_t idx = var_get_dict_index (var);
2109 length = strtol (length_s, NULL, 10);
2110 if (length < 1 || length > MAX_STRING)
2112 sys_warn (r, record->pos,
2113 _("%s listed as string of invalid length %s "
2114 "in very long string record."),
2115 var_get_name (var), length_s);
2119 /* Check segments. */
2120 segment_cnt = sfm_width_to_segments (length);
2121 if (segment_cnt == 1)
2123 sys_warn (r, record->pos,
2124 _("%s listed in very long string record with width %s, "
2125 "which requires only one segment."),
2126 var_get_name (var), length_s);
2129 if (idx + segment_cnt > dict_get_var_cnt (dict))
2131 sys_error (r, record->pos,
2132 _("Very long string %s overflows dictionary."),
2133 var_get_name (var));
2137 /* Get the short names from the segments and check their
2139 for (i = 0; i < segment_cnt; i++)
2141 struct variable *seg = dict_get_var (dict, idx + i);
2142 int alloc_width = sfm_segment_alloc_width (length, i);
2143 int width = var_get_width (seg);
2146 var_set_short_name (var, i, var_get_short_name (seg, 0));
2147 if (ROUND_UP (width, 8) != ROUND_UP (alloc_width, 8))
2149 sys_error (r, record->pos,
2150 _("Very long string with width %ld has segment %d "
2151 "of width %d (expected %d)."),
2152 length, i, width, alloc_width);
2156 dict_delete_consecutive_vars (dict, idx + 1, segment_cnt - 1);
2157 var_set_width (var, length);
2159 close_text_record (r, text);
2160 dict_compact_values (dict);
2166 parse_value_labels (struct sfm_reader *r, struct dictionary *dict,
2167 const struct sfm_var_record *var_recs, size_t n_var_recs,
2168 const struct sfm_value_label_record *record)
2170 struct variable **vars;
2174 utf8_labels = pool_nmalloc (r->pool, record->n_labels, sizeof *utf8_labels);
2175 for (i = 0; i < record->n_labels; i++)
2176 utf8_labels[i] = recode_string_pool ("UTF-8", dict_get_encoding (dict),
2177 record->labels[i].label, -1,
2180 vars = pool_nmalloc (r->pool, record->n_vars, sizeof *vars);
2181 for (i = 0; i < record->n_vars; i++)
2183 vars[i] = lookup_var_by_index (r, record->pos,
2184 var_recs, n_var_recs, record->vars[i]);
2185 if (vars[i] == NULL)
2189 for (i = 1; i < record->n_vars; i++)
2190 if (var_get_type (vars[i]) != var_get_type (vars[0]))
2192 sys_error (r, record->pos,
2193 _("Variables associated with value label are not all of "
2194 "identical type. Variable %s is %s, but variable "
2196 var_get_name (vars[0]),
2197 var_is_numeric (vars[0]) ? _("numeric") : _("string"),
2198 var_get_name (vars[i]),
2199 var_is_numeric (vars[i]) ? _("numeric") : _("string"));
2203 for (i = 0; i < record->n_vars; i++)
2205 struct variable *var = vars[i];
2209 width = var_get_width (var);
2212 sys_error (r, record->pos,
2213 _("Value labels may not be added to long string "
2214 "variables (e.g. %s) using records types 3 and 4."),
2215 var_get_name (var));
2219 for (j = 0; j < record->n_labels; j++)
2221 struct sfm_value_label *label = &record->labels[j];
2224 value_init (&value, width);
2226 value.f = parse_float (r, label->value, 0);
2228 memcpy (value_str_rw (&value, width), label->value, width);
2230 if (!var_add_value_label (var, &value, utf8_labels[j]))
2232 if (r->written_by_readstat)
2234 /* Ignore the problem. ReadStat is buggy and emits value
2235 labels whose values are longer than string variables'
2236 widths, that are identical in the actual width of the
2237 variable, e.g. both values "ABC123" and "ABC456" for a
2238 string variable with width 3. */
2240 else if (var_is_numeric (var))
2241 sys_warn (r, record->pos,
2242 _("Duplicate value label for %g on %s."),
2243 value.f, var_get_name (var));
2245 sys_warn (r, record->pos,
2246 _("Duplicate value label for `%.*s' on %s."),
2247 width, value_str (&value, width),
2248 var_get_name (var));
2251 value_destroy (&value, width);
2255 pool_free (r->pool, vars);
2256 for (i = 0; i < record->n_labels; i++)
2257 pool_free (r->pool, utf8_labels[i]);
2258 pool_free (r->pool, utf8_labels);
2263 static struct variable *
2264 lookup_var_by_index (struct sfm_reader *r, off_t offset,
2265 const struct sfm_var_record *var_recs, size_t n_var_recs,
2268 const struct sfm_var_record *rec;
2270 if (idx < 1 || idx > n_var_recs)
2272 sys_error (r, offset,
2273 _("Variable index %d not in valid range 1...%zu."),
2278 rec = &var_recs[idx - 1];
2279 if (rec->var == NULL)
2281 sys_error (r, offset,
2282 _("Variable index %d refers to long string continuation."),
2290 /* Parses a set of custom attributes from TEXT into ATTRS.
2291 ATTRS may be a null pointer, in which case the attributes are
2292 read but discarded. */
2294 parse_attributes (struct sfm_reader *r, struct text_record *text,
2295 struct attrset *attrs)
2299 struct attribute *attr;
2303 /* Parse the key. */
2304 key = text_get_token (text, ss_cstr ("("), NULL);
2308 attr = attribute_create (key);
2309 for (index = 1; ; index++)
2311 /* Parse the value. */
2315 value = text_get_token (text, ss_cstr ("\n"), NULL);
2318 text_warn (r, text, _("Error parsing attribute value %s[%d]."),
2323 length = strlen (value);
2324 if (length >= 2 && value[0] == '\'' && value[length - 1] == '\'')
2326 value[length - 1] = '\0';
2327 attribute_add_value (attr, value + 1);
2332 _("Attribute value %s[%d] is not quoted: %s."),
2334 attribute_add_value (attr, value);
2337 /* Was this the last value for this attribute? */
2338 if (text_match (text, ')'))
2343 if (!attrset_try_add (attrs, attr))
2345 text_warn (r, text, _("Duplicate attribute %s."),
2346 attribute_get_name (attr));
2347 attribute_destroy (attr);
2351 attribute_destroy (attr);
2353 while (!text_match (text, '/'));
2356 /* Reads record type 7, subtype 17, which lists custom
2357 attributes on the data file. */
2359 parse_data_file_attributes (struct sfm_reader *r,
2360 const struct sfm_extension_record *record,
2361 struct dictionary *dict)
2363 struct text_record *text = open_text_record (r, record, true);
2364 parse_attributes (r, text, dict_get_attributes (dict));
2365 close_text_record (r, text);
2368 /* Parses record type 7, subtype 18, which lists custom
2369 attributes on individual variables. */
2371 parse_variable_attributes (struct sfm_reader *r,
2372 const struct sfm_extension_record *record,
2373 struct dictionary *dict)
2375 struct text_record *text;
2376 struct variable *var;
2378 text = open_text_record (r, record, true);
2379 while (text_read_variable_name (r, dict, text, ss_cstr (":"), &var))
2380 parse_attributes (r, text, var != NULL ? var_get_attributes (var) : NULL);
2381 close_text_record (r, text);
2385 assign_variable_roles (struct sfm_reader *r, struct dictionary *dict)
2387 size_t n_warnings = 0;
2390 for (i = 0; i < dict_get_var_cnt (dict); i++)
2392 struct variable *var = dict_get_var (dict, i);
2393 struct attrset *attrs = var_get_attributes (var);
2394 const struct attribute *attr = attrset_lookup (attrs, "$@Role");
2397 int value = atoi (attribute_get_value (attr, 0));
2419 role = ROLE_PARTITION;
2428 if (n_warnings++ == 0)
2429 sys_warn (r, -1, _("Invalid role for variable %s."),
2430 var_get_name (var));
2433 var_set_role (var, role);
2438 sys_warn (r, -1, _("%zu other variables had invalid roles."),
2443 check_overflow (struct sfm_reader *r,
2444 const struct sfm_extension_record *record,
2445 size_t ofs, size_t length)
2447 size_t end = record->size * record->count;
2448 if (length >= end || ofs + length > end)
2450 sys_warn (r, record->pos + end,
2451 _("Extension record subtype %d ends unexpectedly."),
2459 parse_long_string_value_labels (struct sfm_reader *r,
2460 const struct sfm_extension_record *record,
2461 struct dictionary *dict)
2463 const char *dict_encoding = dict_get_encoding (dict);
2464 size_t end = record->size * record->count;
2471 struct variable *var;
2476 /* Parse variable name length. */
2477 if (!check_overflow (r, record, ofs, 4))
2479 var_name_len = parse_int (r, record->data, ofs);
2482 /* Parse variable name, width, and number of labels. */
2483 if (!check_overflow (r, record, ofs, var_name_len)
2484 || !check_overflow (r, record, ofs, var_name_len + 8))
2486 var_name = recode_string_pool ("UTF-8", dict_encoding,
2487 (const char *) record->data + ofs,
2488 var_name_len, r->pool);
2489 width = parse_int (r, record->data, ofs + var_name_len);
2490 n_labels = parse_int (r, record->data, ofs + var_name_len + 4);
2491 ofs += var_name_len + 8;
2493 /* Look up 'var' and validate. */
2494 var = dict_lookup_var (dict, var_name);
2496 sys_warn (r, record->pos + ofs,
2497 _("Ignoring long string value label record for "
2498 "unknown variable %s."), var_name);
2499 else if (var_is_numeric (var))
2501 sys_warn (r, record->pos + ofs,
2502 _("Ignoring long string value label record for "
2503 "numeric variable %s."), var_name);
2506 else if (width != var_get_width (var))
2508 sys_warn (r, record->pos + ofs,
2509 _("Ignoring long string value label record for variable "
2510 "%s because the record's width (%d) does not match the "
2511 "variable's width (%d)."),
2512 var_name, width, var_get_width (var));
2517 value_init_pool (r->pool, &value, width);
2518 for (i = 0; i < n_labels; i++)
2520 size_t value_length, label_length;
2521 bool skip = var == NULL;
2523 /* Parse value length. */
2524 if (!check_overflow (r, record, ofs, 4))
2526 value_length = parse_int (r, record->data, ofs);
2530 if (!check_overflow (r, record, ofs, value_length))
2534 if (value_length == width)
2535 memcpy (value_str_rw (&value, width),
2536 (const uint8_t *) record->data + ofs, width);
2539 sys_warn (r, record->pos + ofs,
2540 _("Ignoring long string value label %zu for "
2541 "variable %s, with width %d, that has bad value "
2543 i, var_get_name (var), width, value_length);
2547 ofs += value_length;
2549 /* Parse label length. */
2550 if (!check_overflow (r, record, ofs, 4))
2552 label_length = parse_int (r, record->data, ofs);
2556 if (!check_overflow (r, record, ofs, label_length))
2562 label = recode_string_pool ("UTF-8", dict_encoding,
2563 (const char *) record->data + ofs,
2564 label_length, r->pool);
2565 if (!var_add_value_label (var, &value, label))
2566 sys_warn (r, record->pos + ofs,
2567 _("Duplicate value label for `%.*s' on %s."),
2568 width, value_str (&value, width),
2569 var_get_name (var));
2570 pool_free (r->pool, label);
2572 ofs += label_length;
2578 parse_long_string_missing_values (struct sfm_reader *r,
2579 const struct sfm_extension_record *record,
2580 struct dictionary *dict)
2582 const char *dict_encoding = dict_get_encoding (dict);
2583 size_t end = record->size * record->count;
2588 struct missing_values mv;
2590 struct variable *var;
2591 int n_missing_values;
2595 /* Parse variable name length. */
2596 if (!check_overflow (r, record, ofs, 4))
2598 var_name_len = parse_int (r, record->data, ofs);
2601 /* Parse variable name. */
2602 if (!check_overflow (r, record, ofs, var_name_len)
2603 || !check_overflow (r, record, ofs, var_name_len + 1))
2605 var_name = recode_string_pool ("UTF-8", dict_encoding,
2606 (const char *) record->data + ofs,
2607 var_name_len, r->pool);
2608 ofs += var_name_len;
2610 /* Parse number of missing values. */
2611 n_missing_values = ((const uint8_t *) record->data)[ofs];
2612 if (n_missing_values < 1 || n_missing_values > 3)
2613 sys_warn (r, record->pos + ofs,
2614 _("Long string missing values record says variable %s "
2615 "has %d missing values, but only 1 to 3 missing values "
2617 var_name, n_missing_values);
2620 /* Look up 'var' and validate. */
2621 var = dict_lookup_var (dict, var_name);
2623 sys_warn (r, record->pos + ofs,
2624 _("Ignoring long string missing value record for "
2625 "unknown variable %s."), var_name);
2626 else if (var_is_numeric (var))
2628 sys_warn (r, record->pos + ofs,
2629 _("Ignoring long string missing value record for "
2630 "numeric variable %s."), var_name);
2635 mv_init_pool (r->pool, &mv, var ? var_get_width (var) : 8);
2636 for (i = 0; i < n_missing_values; i++)
2638 size_t value_length;
2640 /* Parse value length. */
2641 if (!check_overflow (r, record, ofs, 4))
2643 value_length = parse_int (r, record->data, ofs);
2647 if (!check_overflow (r, record, ofs, value_length))
2651 && !mv_add_str (&mv, (const uint8_t *) record->data + ofs,
2653 sys_warn (r, record->pos + ofs,
2654 _("Ignoring long string missing value %zu for variable "
2655 "%s, with width %d, that has bad value width %zu."),
2656 i, var_get_name (var), var_get_width (var),
2658 ofs += value_length;
2661 var_set_missing_values (var, &mv);
2667 static void partial_record (struct sfm_reader *);
2669 static void read_error (struct casereader *, const struct sfm_reader *);
2671 static bool read_case_number (struct sfm_reader *, double *);
2672 static int read_case_string (struct sfm_reader *, uint8_t *, size_t);
2673 static int read_opcode (struct sfm_reader *);
2674 static bool read_compressed_number (struct sfm_reader *, double *);
2675 static int read_compressed_string (struct sfm_reader *, uint8_t *);
2676 static int read_whole_strings (struct sfm_reader *, uint8_t *, size_t);
2677 static bool skip_whole_strings (struct sfm_reader *, size_t);
2679 /* Reads and returns one case from READER's file. Returns a null
2680 pointer if not successful. */
2681 static struct ccase *
2682 sys_file_casereader_read (struct casereader *reader, void *r_)
2684 struct sfm_reader *r = r_;
2689 if (r->error || !r->sfm_var_cnt)
2692 c = case_create (r->proto);
2694 for (i = 0; i < r->sfm_var_cnt; i++)
2696 struct sfm_var *sv = &r->sfm_vars[i];
2697 union value *v = case_data_rw_idx (c, sv->case_index);
2699 if (sv->var_width == 0)
2700 retval = read_case_number (r, &v->f);
2703 uint8_t *s = value_str_rw (v, sv->var_width);
2704 retval = read_case_string (r, s + sv->offset, sv->segment_width);
2707 retval = skip_whole_strings (r, ROUND_DOWN (sv->padding, 8));
2709 sys_error (r, r->pos, _("File ends in partial string value."));
2721 if (r->case_cnt != -1)
2722 read_error (reader, r);
2727 /* Issues an error that R ends in a partial record. */
2729 partial_record (struct sfm_reader *r)
2731 sys_error (r, r->pos, _("File ends in partial case."));
2734 /* Issues an error that an unspecified error occurred SFM, and
2737 read_error (struct casereader *r, const struct sfm_reader *sfm)
2739 msg (ME, _("Error reading case from file %s."), fh_get_name (sfm->fh));
2740 casereader_force_error (r);
2743 /* Reads a number from R and stores its value in *D.
2744 If R is compressed, reads a compressed number;
2745 otherwise, reads a number in the regular way.
2746 Returns true if successful, false if end of file is
2747 reached immediately. */
2749 read_case_number (struct sfm_reader *r, double *d)
2751 if (r->compression == ANY_COMP_NONE)
2754 if (!try_read_bytes (r, number, sizeof number))
2756 float_convert (r->float_format, number, FLOAT_NATIVE_DOUBLE, d);
2760 return read_compressed_number (r, d);
2763 /* Reads LENGTH string bytes from R into S. Always reads a multiple of 8
2764 bytes; if LENGTH is not a multiple of 8, then extra bytes are read and
2765 discarded without being written to S. Reads compressed strings if S is
2766 compressed. Returns 1 if successful, 0 if end of file is reached
2767 immediately, or -1 for some kind of error. */
2769 read_case_string (struct sfm_reader *r, uint8_t *s, size_t length)
2771 size_t whole = ROUND_DOWN (length, 8);
2772 size_t partial = length % 8;
2776 int retval = read_whole_strings (r, s, whole);
2784 int retval = read_whole_strings (r, bounce, sizeof bounce);
2796 memcpy (s + whole, bounce, partial);
2802 /* Reads and returns the next compression opcode from R. */
2804 read_opcode (struct sfm_reader *r)
2806 assert (r->compression != ANY_COMP_NONE);
2810 if (r->opcode_idx >= sizeof r->opcodes)
2813 int retval = try_read_compressed_bytes (r, r->opcodes,
2819 opcode = r->opcodes[r->opcode_idx++];
2826 /* Reads a compressed number from R and stores its value in D.
2827 Returns true if successful, false if end of file is
2828 reached immediately. */
2830 read_compressed_number (struct sfm_reader *r, double *d)
2832 int opcode = read_opcode (r);
2840 return read_compressed_float (r, d);
2843 float_convert (r->float_format, " ", FLOAT_NATIVE_DOUBLE, d);
2844 if (!r->corruption_warning)
2846 r->corruption_warning = true;
2847 sys_warn (r, r->pos,
2848 _("Possible compressed data corruption: "
2849 "compressed spaces appear in numeric field."));
2858 *d = opcode - r->bias;
2865 /* Reads a compressed 8-byte string segment from R and stores it in DST. */
2867 read_compressed_string (struct sfm_reader *r, uint8_t *dst)
2872 opcode = read_opcode (r);
2880 retval = read_compressed_bytes (r, dst, 8);
2881 return retval == 1 ? 1 : -1;
2884 memset (dst, ' ', 8);
2889 double value = opcode - r->bias;
2890 float_convert (FLOAT_NATIVE_DOUBLE, &value, r->float_format, dst);
2893 /* This has actually been seen "in the wild". The submitter of the
2894 file that showed that the contents decoded as spaces, but they
2895 were at the end of the field so it's possible that the null
2896 bytes just acted as null terminators. */
2898 else if (!r->corruption_warning)
2900 r->corruption_warning = true;
2901 sys_warn (r, r->pos,
2902 _("Possible compressed data corruption: "
2903 "string contains compressed integer (opcode %d)."),
2911 /* Reads LENGTH string bytes from R into S. LENGTH must be a multiple of 8.
2912 Reads compressed strings if S is compressed. Returns 1 if successful, 0 if
2913 end of file is reached immediately, or -1 for some kind of error. */
2915 read_whole_strings (struct sfm_reader *r, uint8_t *s, size_t length)
2917 assert (length % 8 == 0);
2918 if (r->compression == ANY_COMP_NONE)
2919 return try_read_bytes (r, s, length);
2924 for (ofs = 0; ofs < length; ofs += 8)
2926 int retval = read_compressed_string (r, s + ofs);
2941 /* Skips LENGTH string bytes from R.
2942 LENGTH must be a multiple of 8.
2943 (LENGTH is also limited to 1024, but that's only because the
2944 current caller never needs more than that many bytes.)
2945 Returns true if successful, false if end of file is
2946 reached immediately. */
2948 skip_whole_strings (struct sfm_reader *r, size_t length)
2950 uint8_t buffer[1024];
2951 assert (length < sizeof buffer);
2952 return read_whole_strings (r, buffer, length);
2955 /* Helpers for reading records that contain structured text
2958 /* Maximum number of warnings to issue for a single text
2960 #define MAX_TEXT_WARNINGS 5
2965 struct substring buffer; /* Record contents. */
2966 off_t start; /* Starting offset in file. */
2967 size_t pos; /* Current position in buffer. */
2968 int n_warnings; /* Number of warnings issued or suppressed. */
2969 bool recoded; /* Recoded into UTF-8? */
2972 static struct text_record *
2973 open_text_record (struct sfm_reader *r,
2974 const struct sfm_extension_record *record,
2975 bool recode_to_utf8)
2977 struct text_record *text;
2978 struct substring raw;
2980 text = pool_alloc (r->pool, sizeof *text);
2981 raw = ss_buffer (record->data, record->size * record->count);
2982 text->start = record->pos;
2983 text->buffer = (recode_to_utf8
2984 ? recode_substring_pool ("UTF-8", r->encoding, raw, r->pool)
2987 text->n_warnings = 0;
2988 text->recoded = recode_to_utf8;
2993 /* Closes TEXT, frees its storage, and issues a final warning
2994 about suppressed warnings if necessary. */
2996 close_text_record (struct sfm_reader *r, struct text_record *text)
2998 if (text->n_warnings > MAX_TEXT_WARNINGS)
2999 sys_warn (r, -1, _("Suppressed %d additional related warnings."),
3000 text->n_warnings - MAX_TEXT_WARNINGS);
3002 pool_free (r->pool, ss_data (text->buffer));
3005 /* Reads a variable=value pair from TEXT.
3006 Looks up the variable in DICT and stores it into *VAR.
3007 Stores a null-terminated value into *VALUE. */
3009 read_variable_to_value_pair (struct sfm_reader *r, struct dictionary *dict,
3010 struct text_record *text,
3011 struct variable **var, char **value)
3015 if (!text_read_short_name (r, dict, text, ss_cstr ("="), var))
3018 *value = text_get_token (text, ss_buffer ("\t\0", 2), NULL);
3022 text->pos += ss_span (ss_substr (text->buffer, text->pos, SIZE_MAX),
3023 ss_buffer ("\t\0", 2));
3031 text_read_variable_name (struct sfm_reader *r, struct dictionary *dict,
3032 struct text_record *text, struct substring delimiters,
3033 struct variable **var)
3037 name = text_get_token (text, delimiters, NULL);
3041 *var = dict_lookup_var (dict, name);
3045 text_warn (r, text, _("Dictionary record refers to unknown variable %s."),
3052 text_read_short_name (struct sfm_reader *r, struct dictionary *dict,
3053 struct text_record *text, struct substring delimiters,
3054 struct variable **var)
3056 char *short_name = text_get_token (text, delimiters, NULL);
3057 if (short_name == NULL)
3060 *var = dict_lookup_var (dict, short_name);
3062 text_warn (r, text, _("Dictionary record refers to unknown variable %s."),
3067 /* Displays a warning for the current file position, limiting the
3068 number to MAX_TEXT_WARNINGS for TEXT. */
3070 text_warn (struct sfm_reader *r, struct text_record *text,
3071 const char *format, ...)
3073 if (text->n_warnings++ < MAX_TEXT_WARNINGS)
3077 va_start (args, format);
3078 sys_msg (r, text->start + text->pos, MW, format, args);
3084 text_get_token (struct text_record *text, struct substring delimiters,
3087 struct substring token;
3090 if (!ss_tokenize (text->buffer, delimiters, &text->pos, &token))
3093 end = &ss_data (token)[ss_length (token)];
3094 if (delimiter != NULL)
3097 return ss_data (token);
3100 /* Reads a integer value expressed in decimal, then a space, then a string that
3101 consists of exactly as many bytes as specified by the integer, then a space,
3102 from TEXT. Returns the string, null-terminated, as a subset of TEXT's
3103 buffer (so the caller should not free the string). */
3105 text_parse_counted_string (struct sfm_reader *r, struct text_record *text)
3113 while (text->pos < text->buffer.length)
3115 int c = text->buffer.string[text->pos];
3116 if (c < '0' || c > '9')
3118 n = (n * 10) + (c - '0');
3121 if (text->pos >= text->buffer.length || start == text->pos)
3123 sys_warn (r, text->start,
3124 _("Expecting digit at offset %zu in MRSETS record."),
3129 if (!text_match (text, ' '))
3131 sys_warn (r, text->start,
3132 _("Expecting space at offset %zu in MRSETS record."),
3137 if (text->pos + n > text->buffer.length)
3139 sys_warn (r, text->start,
3140 _("%zu-byte string starting at offset %zu "
3141 "exceeds record length %zu."),
3142 n, text->pos, text->buffer.length);
3146 s = &text->buffer.string[text->pos];
3149 sys_warn (r, text->start,
3150 _("Expecting space at offset %zu following %zu-byte string."),
3160 text_match (struct text_record *text, char c)
3162 if (text->pos >= text->buffer.length)
3165 if (text->buffer.string[text->pos] == c)
3174 /* Returns the current byte offset (as converted to UTF-8, if it was converted)
3175 inside the TEXT's string. */
3177 text_pos (const struct text_record *text)
3183 text_get_all (const struct text_record *text)
3185 return text->buffer.string;
3190 /* Displays a corruption message. */
3192 sys_msg (struct sfm_reader *r, off_t offset,
3193 int class, const char *format, va_list args)
3198 ds_init_empty (&text);
3200 ds_put_format (&text, _("`%s' near offset 0x%llx: "),
3201 fh_get_file_name (r->fh), (long long int) offset);
3203 ds_put_format (&text, _("`%s': "), fh_get_file_name (r->fh));
3204 ds_put_vformat (&text, format, args);
3206 m.category = msg_class_to_category (class);
3207 m.severity = msg_class_to_severity (class);
3213 m.text = ds_cstr (&text);
3218 /* Displays a warning for offset OFFSET in the file. */
3220 sys_warn (struct sfm_reader *r, off_t offset, const char *format, ...)
3224 va_start (args, format);
3225 sys_msg (r, offset, MW, format, args);
3229 /* Displays an error for the current file position and marks it as in an error
3232 sys_error (struct sfm_reader *r, off_t offset, const char *format, ...)
3236 va_start (args, format);
3237 sys_msg (r, offset, ME, format, args);
3243 /* Reads BYTE_CNT bytes into BUF.
3244 Returns 1 if exactly BYTE_CNT bytes are successfully read.
3245 Returns -1 if an I/O error or a partial read occurs.
3246 Returns 0 for an immediate end-of-file and, if EOF_IS_OK is false, reports
3249 read_bytes_internal (struct sfm_reader *r, bool eof_is_ok,
3250 void *buf, size_t byte_cnt)
3252 size_t bytes_read = fbuf_read (r->fbuf, buf, byte_cnt);
3253 r->pos += bytes_read;
3254 if (bytes_read == byte_cnt)
3257 int status = fbuf_get_status (r->fbuf);
3260 sys_error (r, r->pos, _("System error: %s."), strerror (errno));
3263 else if (!eof_is_ok || bytes_read != 0)
3265 sys_error (r, r->pos, _("Unexpected end of file."));
3272 /* Reads BYTE_CNT into BUF.
3273 Returns true if successful.
3274 Returns false upon I/O error or if end-of-file is encountered. */
3276 read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
3278 return read_bytes_internal (r, false, buf, byte_cnt) == 1;
3281 /* Reads BYTE_CNT bytes into BUF.
3282 Returns 1 if exactly BYTE_CNT bytes are successfully read.
3283 Returns 0 if an immediate end-of-file is encountered.
3284 Returns -1 if an I/O error or a partial read occurs. */
3286 try_read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
3288 return read_bytes_internal (r, true, buf, byte_cnt);
3291 /* Reads a 32-bit signed integer from R and stores its value in host format in
3292 *X. Returns true if successful, otherwise false. */
3294 read_int (struct sfm_reader *r, int *x)
3297 if (read_bytes (r, integer, sizeof integer) != 1)
3299 *x = integer_get (r->integer_format, integer, sizeof integer);
3304 read_uint (struct sfm_reader *r, unsigned int *x)
3309 ok = read_int (r, &y);
3314 /* Reads a 64-bit signed integer from R and returns its value in
3317 read_int64 (struct sfm_reader *r, long long int *x)
3320 if (read_bytes (r, integer, sizeof integer) != 1)
3322 *x = integer_get (r->integer_format, integer, sizeof integer);
3326 /* Reads a 64-bit signed integer from R and returns its value in
3329 read_uint64 (struct sfm_reader *r, unsigned long long int *x)
3334 ok = read_int64 (r, &y);
3340 parse_int (const struct sfm_reader *r, const void *data, size_t ofs)
3342 return integer_get (r->integer_format, (const uint8_t *) data + ofs, 4);
3346 parse_float (const struct sfm_reader *r, const void *data, size_t ofs)
3348 return float_get_double (r->float_format, (const uint8_t *) data + ofs);
3351 /* Reads exactly SIZE - 1 bytes into BUFFER
3352 and stores a null byte into BUFFER[SIZE - 1]. */
3354 read_string (struct sfm_reader *r, char *buffer, size_t size)
3359 ok = read_bytes (r, buffer, size - 1);
3361 buffer[size - 1] = '\0';
3365 /* Skips BYTES bytes forward in R. */
3367 skip_bytes (struct sfm_reader *r, size_t bytes)
3372 size_t chunk = MIN (sizeof buffer, bytes);
3373 if (!read_bytes (r, buffer, chunk))
3381 /* Returns a malloc()'d copy of S in which all lone CRs and CR LF pairs have
3382 been replaced by LFs.
3384 (A product that identifies itself as VOXCO INTERVIEWER 4.3 produces system
3385 files that use CR-only line ends in the file label and extra product
3388 fix_line_ends (const char *s)
3392 d = dst = xmalloc (strlen (s) + 1);
3411 read_ztrailer (struct sfm_reader *r,
3412 long long int zheader_ofs,
3413 long long int ztrailer_len);
3416 zalloc (voidpf pool_, uInt items, uInt size)
3418 struct pool *pool = pool_;
3420 return (!size || xalloc_oversized (items, size)
3422 : pool_malloc (pool, items * size));
3426 zfree (voidpf pool_, voidpf address)
3428 struct pool *pool = pool_;
3430 pool_free (pool, address);
3434 read_zheader (struct sfm_reader *r)
3437 long long int zheader_ofs;
3438 long long int ztrailer_ofs;
3439 long long int ztrailer_len;
3441 if (!read_int64 (r, &zheader_ofs)
3442 || !read_int64 (r, &ztrailer_ofs)
3443 || !read_int64 (r, &ztrailer_len))
3446 if (zheader_ofs != pos)
3448 sys_error (r, pos, _("Wrong ZLIB data header offset %#llx "
3449 "(expected %#llx)."),
3450 zheader_ofs, (long long int) pos);
3454 if (ztrailer_ofs < r->pos)
3456 sys_error (r, pos, _("Impossible ZLIB trailer offset 0x%llx."),
3461 if (ztrailer_len < 24 || ztrailer_len % 24)
3463 sys_error (r, pos, _("Invalid ZLIB trailer length %lld."), ztrailer_len);
3467 r->ztrailer_ofs = ztrailer_ofs;
3468 if (!read_ztrailer (r, zheader_ofs, ztrailer_len))
3471 if (r->zin_buf == NULL)
3473 r->zin_buf = pool_malloc (r->pool, ZIN_BUF_SIZE);
3474 r->zout_buf = pool_malloc (r->pool, ZOUT_BUF_SIZE);
3475 r->zstream.next_in = NULL;
3476 r->zstream.avail_in = 0;
3479 r->zstream.zalloc = zalloc;
3480 r->zstream.zfree = zfree;
3481 r->zstream.opaque = r->pool;
3483 return open_zstream (r);
3487 seek (struct sfm_reader *r, off_t offset)
3489 int error = fbuf_seek (r->fbuf, offset);
3491 sys_error (r, 0, _("%s: seek failed (%s)."),
3492 fh_get_file_name (r->fh), strerror (error));
3496 /* Performs some additional consistency checks on the ZLIB compressed data
3499 read_ztrailer (struct sfm_reader *r,
3500 long long int zheader_ofs,
3501 long long int ztrailer_len)
3503 long long int expected_uncmp_ofs;
3504 long long int expected_cmp_ofs;
3507 unsigned int block_size;
3508 unsigned int n_blocks;
3511 int seekable = fbuf_is_seekable (r->fbuf);
3514 sys_error (r, 0, _("%s: stat failed (%s)."),
3515 fh_get_file_name (r->fh), strerror (-seekable));
3520 /* We can't seek to the trailer and then back to the data in this file,
3521 so skip doing extra checks. */
3525 off_t size = fbuf_get_size (r->fbuf);
3526 if (size >= 0 && r->ztrailer_ofs + ztrailer_len != size)
3527 sys_warn (r, r->pos,
3528 _("End of ZLIB trailer (0x%llx) is not file size (0x%llx)."),
3529 r->ztrailer_ofs + ztrailer_len, (long long int) size);
3531 seek (r, r->ztrailer_ofs);
3533 /* Read fixed header from ZLIB data trailer. */
3534 if (!read_int64 (r, &bias))
3536 if (-bias != r->bias)
3538 sys_error (r, r->pos, _("ZLIB trailer bias (%lld) differs from "
3539 "file header bias (%.2f)."),
3544 if (!read_int64 (r, &zero))
3547 sys_warn (r, r->pos,
3548 _("ZLIB trailer \"zero\" field has nonzero value %lld."), zero);
3550 if (!read_uint (r, &block_size))
3552 if (block_size != ZBLOCK_SIZE)
3553 sys_warn (r, r->pos,
3554 _("ZLIB trailer specifies unexpected %u-byte block size."),
3557 if (!read_uint (r, &n_blocks))
3559 if (n_blocks != (ztrailer_len - 24) / 24)
3561 sys_error (r, r->pos,
3562 _("%lld-byte ZLIB trailer specifies %u data blocks (expected "
3564 ztrailer_len, n_blocks, (ztrailer_len - 24) / 24);
3568 expected_uncmp_ofs = zheader_ofs;
3569 expected_cmp_ofs = zheader_ofs + 24;
3570 for (i = 0; i < n_blocks; i++)
3572 off_t desc_ofs = r->pos;
3573 unsigned long long int uncompressed_ofs;
3574 unsigned long long int compressed_ofs;
3575 unsigned int uncompressed_size;
3576 unsigned int compressed_size;
3578 if (!read_uint64 (r, &uncompressed_ofs)
3579 || !read_uint64 (r, &compressed_ofs)
3580 || !read_uint (r, &uncompressed_size)
3581 || !read_uint (r, &compressed_size))
3584 if (uncompressed_ofs != expected_uncmp_ofs)
3586 sys_error (r, desc_ofs,
3587 _("ZLIB block descriptor %u reported uncompressed data "
3588 "offset %#llx, when %#llx was expected."),
3589 i, uncompressed_ofs, expected_uncmp_ofs);
3593 if (compressed_ofs != expected_cmp_ofs)
3595 sys_error (r, desc_ofs,
3596 _("ZLIB block descriptor %u reported compressed data "
3597 "offset %#llx, when %#llx was expected."),
3598 i, compressed_ofs, expected_cmp_ofs);
3602 if (i < n_blocks - 1)
3604 if (uncompressed_size != block_size)
3605 sys_warn (r, desc_ofs,
3606 _("ZLIB block descriptor %u reported block size %#x, "
3607 "when %#x was expected."),
3608 i, uncompressed_size, block_size);
3612 if (uncompressed_size > block_size)
3613 sys_warn (r, desc_ofs,
3614 _("ZLIB block descriptor %u reported block size %#x, "
3615 "when at most %#x was expected."),
3616 i, uncompressed_size, block_size);
3619 /* http://www.zlib.net/zlib_tech.html says that the maximum expansion
3620 from compression, with worst-case parameters, is 13.5% plus 11 bytes.
3621 This code checks for an expansion of more than 14.3% plus 11
3623 if (compressed_size > uncompressed_size + uncompressed_size / 7 + 11)
3625 sys_error (r, desc_ofs,
3626 _("ZLIB block descriptor %u reports compressed size %u "
3627 "and uncompressed size %u."),
3628 i, compressed_size, uncompressed_size);
3632 expected_uncmp_ofs += uncompressed_size;
3633 expected_cmp_ofs += compressed_size;
3636 if (expected_cmp_ofs != r->ztrailer_ofs)
3638 sys_error (r, r->pos, _("ZLIB trailer is at offset %#llx but %#llx "
3639 "would be expected from block descriptors."),
3640 r->ztrailer_ofs, expected_cmp_ofs);
3644 seek (r, zheader_ofs + 24);
3649 open_zstream (struct sfm_reader *r)
3653 r->zout_pos = r->zout_end = 0;
3654 error = inflateInit (&r->zstream);
3657 sys_error (r, r->pos, _("ZLIB initialization failed (%s)."),
3665 close_zstream (struct sfm_reader *r)
3669 error = inflateEnd (&r->zstream);
3672 sys_error (r, r->pos, _("Inconsistency at end of ZLIB stream (%s)."),
3680 read_bytes_zlib (struct sfm_reader *r, void *buf_, size_t byte_cnt)
3682 uint8_t *buf = buf_;
3691 /* Use already inflated data if there is any. */
3692 if (r->zout_pos < r->zout_end)
3694 unsigned int n = MIN (byte_cnt, r->zout_end - r->zout_pos);
3695 memcpy (buf, &r->zout_buf[r->zout_pos], n);
3704 /* We need to inflate some more data.
3705 Get some more input data if we don't have any. */
3706 if (r->zstream.avail_in == 0)
3708 unsigned int n = MIN (ZIN_BUF_SIZE, r->ztrailer_ofs - r->pos);
3713 int retval = try_read_bytes (r, r->zin_buf, n);
3716 r->zstream.avail_in = n;
3717 r->zstream.next_in = r->zin_buf;
3721 /* Inflate the (remaining) input data. */
3722 r->zstream.avail_out = ZOUT_BUF_SIZE;
3723 r->zstream.next_out = r->zout_buf;
3724 error = inflate (&r->zstream, Z_SYNC_FLUSH);
3726 r->zout_end = r->zstream.next_out - r->zout_buf;
3727 if (r->zout_end == 0)
3729 if (error != Z_STREAM_END)
3731 sys_error (r, r->pos, _("ZLIB stream inconsistency (%s)."),
3735 else if (!close_zstream (r) || !open_zstream (r))
3740 /* Process the output data and ignore 'error' for now. ZLIB will
3741 present it to us again on the next inflate() call. */
3747 read_compressed_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
3749 if (r->compression == ANY_COMP_SIMPLE)
3750 return read_bytes (r, buf, byte_cnt);
3753 int retval = read_bytes_zlib (r, buf, byte_cnt);
3755 sys_error (r, r->pos, _("Unexpected end of ZLIB compressed data."));
3761 try_read_compressed_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
3763 if (r->compression == ANY_COMP_SIMPLE)
3764 return try_read_bytes (r, buf, byte_cnt);
3766 return read_bytes_zlib (r, buf, byte_cnt);
3769 /* Reads a 64-bit floating-point number from R and returns its
3770 value in host format. */
3772 read_compressed_float (struct sfm_reader *r, double *d)
3776 if (!read_compressed_bytes (r, number, sizeof number))
3779 *d = float_get_double (r->float_format, number);
3783 static const struct casereader_class sys_file_casereader_class =
3785 sys_file_casereader_read,
3786 sys_file_casereader_destroy,
3791 const struct any_reader_class sys_file_reader_class =
3793 N_("SPSS System File"),