1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2007, 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <data/sys-file-reader.h>
20 #include <data/sys-file-private.h>
28 #include <libpspp/assertion.h>
29 #include <libpspp/message.h>
30 #include <libpspp/compiler.h>
31 #include <libpspp/misc.h>
32 #include <libpspp/pool.h>
33 #include <libpspp/str.h>
34 #include <libpspp/hash.h>
35 #include <libpspp/array.h>
37 #include <data/attributes.h>
38 #include <data/case.h>
39 #include <data/casereader-provider.h>
40 #include <data/casereader.h>
41 #include <data/dictionary.h>
42 #include <data/file-handle-def.h>
43 #include <data/file-name.h>
44 #include <data/format.h>
45 #include <data/missing-values.h>
46 #include <data/short-names.h>
47 #include <data/value-labels.h>
48 #include <data/variable.h>
49 #include <data/value.h>
54 #include "unlocked-io.h"
59 #define _(msgid) gettext (msgid)
60 #define N_(msgid) (msgid)
62 /* System file reader. */
65 /* Resource tracking. */
66 struct pool *pool; /* All system file state. */
67 jmp_buf bail_out; /* longjmp() target for error handling. */
70 struct file_handle *fh; /* File handle. */
71 struct fh_lock *lock; /* Mutual exclusion for file handle. */
72 FILE *file; /* File stream. */
73 bool error; /* I/O or corruption error? */
74 struct caseproto *proto; /* Format of output cases. */
77 enum integer_format integer_format; /* On-disk integer format. */
78 enum float_format float_format; /* On-disk floating point format. */
79 int oct_cnt; /* Number of 8-byte units per case. */
80 struct sfm_var *sfm_vars; /* Variables. */
81 size_t sfm_var_cnt; /* Number of variables. */
82 casenumber case_cnt; /* Number of cases */
83 bool has_long_var_names; /* File has a long variable name map */
86 bool compressed; /* File is compressed? */
87 double bias; /* Compression bias, usually 100.0. */
88 uint8_t opcodes[8]; /* Current block of opcodes. */
89 size_t opcode_idx; /* Next opcode to interpret, 8 if none left. */
92 static const struct casereader_class sys_file_casereader_class;
94 static bool close_reader (struct sfm_reader *);
96 static struct variable **make_var_by_value_idx (struct sfm_reader *,
98 static struct variable *lookup_var_by_value_idx (struct sfm_reader *,
102 static void sys_msg (struct sfm_reader *r, int class,
103 const char *format, va_list args)
104 PRINTF_FORMAT (3, 0);
105 static void sys_warn (struct sfm_reader *, const char *, ...)
106 PRINTF_FORMAT (2, 3);
107 static void sys_error (struct sfm_reader *, const char *, ...)
111 static void read_bytes (struct sfm_reader *, void *, size_t);
112 static bool try_read_bytes (struct sfm_reader *, void *, size_t);
113 static int read_int (struct sfm_reader *);
114 static double read_float (struct sfm_reader *);
115 static void read_string (struct sfm_reader *, char *, size_t);
116 static void skip_bytes (struct sfm_reader *, size_t);
118 static struct text_record *open_text_record (struct sfm_reader *, size_t size);
119 static void close_text_record (struct sfm_reader *r,
120 struct text_record *);
121 static bool read_variable_to_value_pair (struct sfm_reader *,
123 struct text_record *,
124 struct variable **var, char **value);
125 static void text_warn (struct sfm_reader *r, struct text_record *text,
126 const char *format, ...)
127 PRINTF_FORMAT (3, 4);
128 static char *text_get_token (struct text_record *,
129 struct substring delimiters);
130 static bool text_match (struct text_record *, char c);
131 static bool text_read_short_name (struct sfm_reader *, struct dictionary *,
132 struct text_record *,
133 struct substring delimiters,
136 static bool close_reader (struct sfm_reader *r);
138 /* Dictionary reader. */
146 static void read_header (struct sfm_reader *, struct dictionary *,
147 int *weight_idx, int *claimed_oct_cnt,
148 struct sfm_read_info *);
149 static void read_variable_record (struct sfm_reader *, struct dictionary *,
150 int *format_warning_cnt);
151 static void parse_format_spec (struct sfm_reader *, unsigned int,
152 enum which_format, struct variable *,
153 int *format_warning_cnt);
154 static void setup_weight (struct sfm_reader *, int weight_idx,
155 struct variable **var_by_value_idx,
156 struct dictionary *);
157 static void read_documents (struct sfm_reader *, struct dictionary *);
158 static void read_value_labels (struct sfm_reader *, struct dictionary *,
159 struct variable **var_by_value_idx);
161 static void read_extension_record (struct sfm_reader *, struct dictionary *,
162 struct sfm_read_info *);
163 static void read_machine_integer_info (struct sfm_reader *,
164 size_t size, size_t count,
165 struct sfm_read_info *,
168 static void read_machine_float_info (struct sfm_reader *,
169 size_t size, size_t count);
170 static void read_display_parameters (struct sfm_reader *,
171 size_t size, size_t count,
172 struct dictionary *);
173 static void read_long_var_name_map (struct sfm_reader *,
174 size_t size, size_t count,
175 struct dictionary *);
176 static void read_long_string_map (struct sfm_reader *,
177 size_t size, size_t count,
178 struct dictionary *);
179 static void read_data_file_attributes (struct sfm_reader *,
180 size_t size, size_t count,
181 struct dictionary *);
182 static void read_variable_attributes (struct sfm_reader *,
183 size_t size, size_t count,
184 struct dictionary *);
186 /* Opens the system file designated by file handle FH for
187 reading. Reads the system file's dictionary into *DICT.
188 If INFO is non-null, then it receives additional info about the
191 sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
192 struct sfm_read_info *volatile info)
194 struct sfm_reader *volatile r = NULL;
195 struct variable **var_by_value_idx;
196 struct sfm_read_info local_info;
197 int format_warning_cnt = 0;
202 *dict = dict_create ();
204 /* Create and initialize reader. */
205 r = pool_create_container (struct sfm_reader, pool);
211 r->has_long_var_names = false;
212 r->opcode_idx = sizeof r->opcodes;
214 /* TRANSLATORS: this fragment will be interpolated into
215 messages in fh_lock() that identify types of files. */
216 r->lock = fh_lock (fh, FH_REF_FILE, N_("system file"), FH_ACC_READ, false);
220 r->file = fn_open (fh_get_file_name (fh), "rb");
223 msg (ME, _("Error opening \"%s\" for reading as a system file: %s."),
224 fh_get_file_name (r->fh), strerror (errno));
228 /* Initialize info. */
231 memset (info, 0, sizeof *info);
233 if (setjmp (r->bail_out))
238 read_header (r, *dict, &weight_idx, &claimed_oct_cnt, info);
240 /* Read all the variable definition records. */
241 rec_type = read_int (r);
242 while (rec_type == 2)
244 read_variable_record (r, *dict, &format_warning_cnt);
245 rec_type = read_int (r);
248 /* Figure out the case format. */
249 var_by_value_idx = make_var_by_value_idx (r, *dict);
250 setup_weight (r, weight_idx, var_by_value_idx, *dict);
252 /* Read all the rest of the dictionary records. */
253 while (rec_type != 999)
258 read_value_labels (r, *dict, var_by_value_idx);
262 sys_error (r, _("Misplaced type 4 record."));
265 read_documents (r, *dict);
269 read_extension_record (r, *dict, info);
273 sys_error (r, _("Unrecognized record type %d."), rec_type);
275 rec_type = read_int (r);
279 if ( ! r->has_long_var_names )
282 for (i = 0; i < dict_get_var_cnt (*dict); i++)
284 struct variable *var = dict_get_var (*dict, i);
285 char short_name[SHORT_NAME_LEN + 1];
286 char long_name[SHORT_NAME_LEN + 1];
288 strcpy (short_name, var_get_name (var));
290 strcpy (long_name, short_name);
291 str_lowercase (long_name);
293 /* Set long name. Renaming a variable may clear the short
294 name, but we want to retain it, so re-set it
296 dict_rename_var (*dict, var, long_name);
297 var_set_short_name (var, 0, short_name);
300 r->has_long_var_names = true;
303 /* Read record 999 data, which is just filler. */
306 /* Warn if the actual amount of data per case differs from the
307 amount that the header claims. SPSS version 13 gets this
308 wrong when very long strings are involved, so don't warn in
310 if (claimed_oct_cnt != -1 && claimed_oct_cnt != r->oct_cnt
311 && info->version_major != 13)
312 sys_warn (r, _("File header claims %d variable positions but "
313 "%d were read from file."),
314 claimed_oct_cnt, r->oct_cnt);
316 /* Create an index of dictionary variable widths for
317 sfm_read_case to use. We cannot use the `struct variable's
318 from the dictionary we created, because the caller owns the
319 dictionary and may destroy or modify its variables. */
320 sfm_dictionary_to_sfm_vars (*dict, &r->sfm_vars, &r->sfm_var_cnt);
321 pool_register (r->pool, free, r->sfm_vars);
322 r->proto = caseproto_ref_pool (dict_get_proto (*dict), r->pool);
324 pool_free (r->pool, var_by_value_idx);
325 return casereader_create_sequential
327 r->case_cnt == -1 ? CASENUMBER_MAX: r->case_cnt,
328 &sys_file_casereader_class, r);
332 dict_destroy (*dict);
337 /* Closes a system file after we're done with it.
338 Returns true if an I/O error has occurred on READER, false
341 close_reader (struct sfm_reader *r)
350 if (fn_close (fh_get_file_name (r->fh), r->file) == EOF)
352 msg (ME, _("Error closing system file \"%s\": %s."),
353 fh_get_file_name (r->fh), strerror (errno));
363 pool_destroy (r->pool);
368 /* Destroys READER. */
370 sys_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
372 struct sfm_reader *r = r_;
376 /* Returns true if FILE is an SPSS system file,
379 sfm_detect (FILE *file)
383 if (fread (rec_type, 4, 1, file) != 1)
387 return !strcmp ("$FL2", rec_type);
390 /* Reads the global header of the system file.
391 Sets DICT's file label to the system file's label.
392 Sets *WEIGHT_IDX to 0 if the system file is unweighted,
393 or to the value index of the weight variable otherwise.
394 Sets *CLAIMED_OCT_CNT to the number of "octs" (8-byte units)
395 per case that the file claims to have (although it is not
397 Initializes INFO with header information. */
399 read_header (struct sfm_reader *r, struct dictionary *dict,
400 int *weight_idx, int *claimed_oct_cnt,
401 struct sfm_read_info *info)
404 char eye_catcher[61];
405 uint8_t raw_layout_code[4];
407 char creation_date[10];
408 char creation_time[9];
410 struct substring file_label_ss;
411 struct substring product;
413 read_string (r, rec_type, sizeof rec_type);
414 read_string (r, eye_catcher, sizeof eye_catcher);
416 if (strcmp ("$FL2", rec_type) != 0)
417 sys_error (r, _("This is not an SPSS system file."));
419 /* Identify integer format. */
420 read_bytes (r, raw_layout_code, sizeof raw_layout_code);
421 if ((!integer_identify (2, raw_layout_code, sizeof raw_layout_code,
423 && !integer_identify (3, raw_layout_code, sizeof raw_layout_code,
425 || (r->integer_format != INTEGER_MSB_FIRST
426 && r->integer_format != INTEGER_LSB_FIRST))
427 sys_error (r, _("This is not an SPSS system file."));
429 *claimed_oct_cnt = read_int (r);
430 if (*claimed_oct_cnt < 0 || *claimed_oct_cnt > INT_MAX / 16)
431 *claimed_oct_cnt = -1;
433 r->compressed = read_int (r) != 0;
435 *weight_idx = read_int (r);
437 r->case_cnt = read_int (r);
438 if ( r->case_cnt > INT_MAX / 2)
442 /* Identify floating-point format and obtain compression bias. */
443 read_bytes (r, raw_bias, sizeof raw_bias);
444 if (float_identify (100.0, raw_bias, sizeof raw_bias, &r->float_format) == 0)
446 sys_warn (r, _("Compression bias is not the usual "
447 "value of 100, or system file uses unrecognized "
448 "floating-point format."));
449 if (r->integer_format == INTEGER_MSB_FIRST)
450 r->float_format = FLOAT_IEEE_DOUBLE_BE;
452 r->float_format = FLOAT_IEEE_DOUBLE_LE;
454 float_convert (r->float_format, raw_bias, FLOAT_NATIVE_DOUBLE, &r->bias);
456 read_string (r, creation_date, sizeof creation_date);
457 read_string (r, creation_time, sizeof creation_time);
458 read_string (r, file_label, sizeof file_label);
461 file_label_ss = ss_cstr (file_label);
462 ss_trim (&file_label_ss, ss_cstr (" "));
463 if (!ss_is_empty (file_label_ss))
465 ss_data (file_label_ss)[ss_length (file_label_ss)] = '\0';
466 dict_set_label (dict, ss_data (file_label_ss));
469 strcpy (info->creation_date, creation_date);
470 strcpy (info->creation_time, creation_time);
471 info->integer_format = r->integer_format;
472 info->float_format = r->float_format;
473 info->compressed = r->compressed;
474 info->case_cnt = r->case_cnt;
476 product = ss_cstr (eye_catcher);
477 ss_match_string (&product, ss_cstr ("@(#) SPSS DATA FILE"));
478 ss_trim (&product, ss_cstr (" "));
479 str_copy_buf_trunc (info->product, sizeof info->product,
480 ss_data (product), ss_length (product));
483 /* Reads a variable (type 2) record from R and adds the
484 corresponding variable to DICT.
485 Also skips past additional variable records for long string
488 read_variable_record (struct sfm_reader *r, struct dictionary *dict,
489 int *format_warning_cnt)
492 int has_variable_label;
493 int missing_value_code;
498 struct variable *var;
501 width = read_int (r);
502 has_variable_label = read_int (r);
503 missing_value_code = read_int (r);
504 print_format = read_int (r);
505 write_format = read_int (r);
506 read_string (r, name, sizeof name);
507 name[strcspn (name, " ")] = '\0';
509 /* Check variable name. */
510 if (name[0] == '$' || name[0] == '#')
511 sys_error (r, "Variable name begins with invalid character `%c'.",
513 if (!var_is_plausible_name (name, false))
514 sys_error (r, _("Invalid variable name `%s'."), name);
516 /* Create variable. */
517 if (width < 0 || width > 255)
518 sys_error (r, _("Bad variable width %d."), width);
519 var = dict_create_var (dict, name, width);
522 _("Duplicate variable name `%s' within system file."),
525 /* Set the short name the same as the long name. */
526 var_set_short_name (var, 0, var_get_name (var));
528 /* Get variable label, if any. */
529 if (has_variable_label != 0 && has_variable_label != 1)
530 sys_error (r, _("Variable label indicator field is not 0 or 1."));
531 if (has_variable_label == 1)
537 if (len >= sizeof label)
538 sys_error (r, _("Variable %s has label of invalid length %zu."),
540 read_string (r, label, len + 1);
541 var_set_label (var, label);
543 skip_bytes (r, ROUND_UP (len, 4) - len);
546 /* Set missing values. */
547 if (missing_value_code != 0)
549 struct missing_values mv;
552 mv_init (&mv, var_get_width (var));
553 if (var_is_numeric (var))
555 if (missing_value_code < -3 || missing_value_code > 3
556 || missing_value_code == -1)
557 sys_error (r, _("Numeric missing value indicator field is not "
558 "-3, -2, 0, 1, 2, or 3."));
559 if (missing_value_code < 0)
561 double low = read_float (r);
562 double high = read_float (r);
563 mv_add_range (&mv, low, high);
564 missing_value_code = -missing_value_code - 2;
566 for (i = 0; i < missing_value_code; i++)
567 mv_add_num (&mv, read_float (r));
571 if (missing_value_code < 1 || missing_value_code > 3)
572 sys_error (r, _("String missing value indicator field is not "
574 if (var_is_long_string (var))
575 sys_warn (r, _("Ignoring missing values on long string variable "
576 "%s, which PSPP does not yet support."), name);
577 for (i = 0; i < missing_value_code; i++)
580 read_string (r, string, sizeof string);
581 mv_add_str (&mv, string);
584 if (!var_is_long_string (var))
585 var_set_missing_values (var, &mv);
589 parse_format_spec (r, print_format, PRINT_FORMAT, var, format_warning_cnt);
590 parse_format_spec (r, write_format, WRITE_FORMAT, var, format_warning_cnt);
592 /* Account for values.
593 Skip long string continuation records, if any. */
594 nv = width == 0 ? 1 : DIV_RND_UP (width, 8);
600 for (i = 1; i < nv; i++)
602 /* Check for record type 2 and width -1. */
603 if (read_int (r) != 2 || read_int (r) != -1)
604 sys_error (r, _("Missing string continuation record."));
606 /* Skip and ignore remaining continuation data. */
607 has_variable_label = read_int (r);
608 missing_value_code = read_int (r);
609 print_format = read_int (r);
610 write_format = read_int (r);
611 read_string (r, name, sizeof name);
613 /* Variable label fields on continuation records have
614 been spotted in system files created by "SPSS Power
615 Macintosh Release 6.1". */
616 if (has_variable_label)
617 skip_bytes (r, ROUND_UP (read_int (r), 4));
622 /* Translates the format spec from sysfile format to internal
625 parse_format_spec (struct sfm_reader *r, unsigned int s,
626 enum which_format which, struct variable *v,
627 int *format_warning_cnt)
629 const int max_format_warnings = 8;
631 uint8_t raw_type = s >> 16;
637 if (!fmt_from_io (raw_type, &f.type))
638 sys_error (r, _("Unknown variable format %"PRIu8"."), raw_type);
643 ok = fmt_check_output (&f) && fmt_check_width_compat (&f, var_get_width (v));
648 if (which == PRINT_FORMAT)
649 var_set_print_format (v, &f);
651 var_set_write_format (v, &f);
653 else if (*++format_warning_cnt <= max_format_warnings)
655 char fmt_string[FMT_STRING_LEN_MAX + 1];
656 sys_warn (r, _("%s variable %s has invalid %s format %s."),
657 var_is_numeric (v) ? _("Numeric") : _("String"),
659 which == PRINT_FORMAT ? _("print") : _("write"),
660 fmt_to_string (&f, fmt_string));
662 if (*format_warning_cnt == max_format_warnings)
663 sys_warn (r, _("Suppressing further invalid format warnings."));
667 /* Sets the weighting variable in DICT to the variable
668 corresponding to the given 1-based VALUE_IDX, if VALUE_IDX is
671 setup_weight (struct sfm_reader *r, int weight_idx,
672 struct variable **var_by_value_idx, struct dictionary *dict)
676 struct variable *weight_var
677 = lookup_var_by_value_idx (r, var_by_value_idx, weight_idx);
678 if (var_is_numeric (weight_var))
679 dict_set_weight (dict, weight_var);
681 sys_error (r, _("Weighting variable must be numeric."));
685 /* Reads a document record, type 6, from system file R, and sets up
686 the documents and n_documents fields in the associated
689 read_documents (struct sfm_reader *r, struct dictionary *dict)
694 if (dict_get_documents (dict) != NULL)
695 sys_error (r, _("Multiple type 6 (document) records."));
697 line_cnt = read_int (r);
699 sys_error (r, _("Number of document lines (%d) "
700 "must be greater than 0."), line_cnt);
702 documents = pool_nmalloc (r->pool, line_cnt + 1, DOC_LINE_LENGTH);
703 read_string (r, documents, DOC_LINE_LENGTH * line_cnt + 1);
704 if (strlen (documents) == DOC_LINE_LENGTH * line_cnt)
705 dict_set_documents (dict, documents);
707 sys_error (r, _("Document line contains null byte."));
708 pool_free (r->pool, documents);
711 /* Read a type 7 extension record. */
713 read_extension_record (struct sfm_reader *r, struct dictionary *dict,
714 struct sfm_read_info *info)
716 int subtype = read_int (r);
717 size_t size = read_int (r);
718 size_t count = read_int (r);
719 size_t bytes = size * count;
721 /* Check that SIZE * COUNT + 1 doesn't overflow. Adding 1
722 allows an extra byte for a null terminator, used by some
723 extension processing routines. */
724 if (size != 0 && size_overflow_p (xsum (1, xtimes (count, size))))
725 sys_error (r, "Record type 7 subtype %d too large.", subtype);
730 read_machine_integer_info (r, size, count, info, dict);
734 read_machine_float_info (r, size, count);
738 /* Variable sets information. We don't use these yet.
739 They only apply to GUIs; see VARSETS on the APPLY
740 DICTIONARY command in SPSS documentation. */
744 /* DATE variable information. We don't use it yet, but we
749 /* Used by the MRSETS command. */
753 /* Used by the SPSS Data Entry software. */
757 read_display_parameters (r, size, count, dict);
761 read_long_var_name_map (r, size, count, dict);
765 read_long_string_map (r, size, count, dict);
769 /* New in SPSS v14? Unknown purpose. */
773 read_data_file_attributes (r, size, count, dict);
777 read_variable_attributes (r, size, count, dict);
781 /* New in SPSS 16. Contains a single string that describes
782 the character encoding, e.g. "windows-1252". */
784 char *encoding = xcalloc (size, count + 1);
785 read_string (r, encoding, count + 1);
786 dict_set_encoding (dict, encoding);
791 /* New in SPSS 16. Encodes value labels for long string
793 sys_warn (r, _("Ignoring value labels for long string variables, "
794 "which PSPP does not yet support."));
798 sys_warn (r, _("Unrecognized record type 7, subtype %d. Please send a copy of this file, and the syntax which created it to %s"),
799 subtype, PACKAGE_BUGREPORT);
803 skip_bytes (r, bytes);
806 /* Read record type 7, subtype 3. */
808 read_machine_integer_info (struct sfm_reader *r, size_t size, size_t count,
809 struct sfm_read_info *info,
810 struct dictionary *dict)
812 int version_major = read_int (r);
813 int version_minor = read_int (r);
814 int version_revision = read_int (r);
815 int machine_code UNUSED = read_int (r);
816 int float_representation = read_int (r);
817 int compression_code UNUSED = read_int (r);
818 int integer_representation = read_int (r);
819 int character_code = read_int (r);
821 int expected_float_format;
822 int expected_integer_format;
824 if (size != 4 || count != 8)
825 sys_error (r, _("Bad size (%zu) or count (%zu) field on record type 7, "
829 /* Save version info. */
830 info->version_major = version_major;
831 info->version_minor = version_minor;
832 info->version_revision = version_revision;
834 /* Check floating point format. */
835 if (r->float_format == FLOAT_IEEE_DOUBLE_BE
836 || r->float_format == FLOAT_IEEE_DOUBLE_LE)
837 expected_float_format = 1;
838 else if (r->float_format == FLOAT_Z_LONG)
839 expected_float_format = 2;
840 else if (r->float_format == FLOAT_VAX_G || r->float_format == FLOAT_VAX_D)
841 expected_float_format = 3;
844 if (float_representation != expected_float_format)
845 sys_error (r, _("Floating-point representation indicated by "
846 "system file (%d) differs from expected (%d)."),
847 r->float_format, expected_float_format);
849 /* Check integer format. */
850 if (r->integer_format == INTEGER_MSB_FIRST)
851 expected_integer_format = 1;
852 else if (r->integer_format == INTEGER_LSB_FIRST)
853 expected_integer_format = 2;
856 if (integer_representation != expected_integer_format)
858 static const char *const endian[] = {N_("little-endian"), N_("big-endian")};
859 sys_warn (r, _("Integer format indicated by system file (%s) "
860 "differs from expected (%s)."),
861 gettext (endian[integer_representation == 1]),
862 gettext (endian[expected_integer_format == 1]));
867 Record 7 (20) provides a much more reliable way of
868 setting the encoding.
869 The character_code is used as a fallback only.
871 if ( NULL == dict_get_encoding (dict))
873 switch (character_code)
876 dict_set_encoding (dict, "EBCDIC-US");
880 /* These ostensibly mean "7-bit ASCII" and "8-bit ASCII"[sic]
881 respectively. However, there are known to be many files
882 in the wild with character code 2, yet have data which are
884 Therefore we ignore these values.
888 dict_set_encoding (dict, "MS_KANJI");
891 dict_set_encoding (dict, "UTF-7");
894 dict_set_encoding (dict, "UTF-8");
899 snprintf (enc, 100, "CP%d", character_code);
900 dict_set_encoding (dict, enc);
907 /* Read record type 7, subtype 4. */
909 read_machine_float_info (struct sfm_reader *r, size_t size, size_t count)
911 double sysmis = read_float (r);
912 double highest = read_float (r);
913 double lowest = read_float (r);
915 if (size != 8 || count != 3)
916 sys_error (r, _("Bad size (%zu) or count (%zu) on extension 4."),
919 if (sysmis != SYSMIS)
920 sys_warn (r, _("File specifies unexpected value %g as SYSMIS."), sysmis);
921 if (highest != HIGHEST)
922 sys_warn (r, _("File specifies unexpected value %g as HIGHEST."), highest);
923 if (lowest != LOWEST)
924 sys_warn (r, _("File specifies unexpected value %g as LOWEST."), lowest);
927 /* Read record type 7, subtype 11, which specifies how variables
928 should be displayed in GUI environments. */
930 read_display_parameters (struct sfm_reader *r, size_t size, size_t count,
931 struct dictionary *dict)
940 sys_warn (r, _("Bad size %zu on extension 11."), size);
941 skip_bytes (r, size * count);
945 n_vars = dict_get_var_cnt (dict);
946 if (count == 3 * n_vars)
947 includes_width = true;
948 else if (count == 2 * n_vars)
949 includes_width = false;
952 sys_warn (r, _("Extension 11 has bad count %zu (for %zu variables)."),
954 skip_bytes (r, size * count);
958 for (i = 0; i < n_vars; ++i)
960 struct variable *v = dict_get_var (dict, i);
961 int measure = read_int (r);
962 int width = includes_width ? read_int (r) : 0;
963 int align = read_int (r);
965 /* SPSS 14 sometimes seems to set string variables' measure
967 if (0 == measure && var_is_alpha (v))
970 if (measure < 1 || measure > 3 || align < 0 || align > 2)
973 sys_warn (r, _("Invalid variable display parameters "
974 "for variable %zu (%s). "
975 "Default parameters substituted."),
976 i, var_get_name (v));
981 var_set_measure (v, (measure == 1 ? MEASURE_NOMINAL
982 : measure == 2 ? MEASURE_ORDINAL
984 var_set_alignment (v, (align == 0 ? ALIGN_LEFT
985 : align == 1 ? ALIGN_RIGHT
988 /* Older versions (SPSS 9.0) sometimes set the display
989 width to zero. This causes confusion in the GUI, so
990 only set the width if it is nonzero. */
992 var_set_display_width (v, width);
996 /* Reads record type 7, subtype 13, which gives the long name
997 that corresponds to each short name. Modifies variable names
998 in DICT accordingly. */
1000 read_long_var_name_map (struct sfm_reader *r, size_t size, size_t count,
1001 struct dictionary *dict)
1003 struct text_record *text;
1004 struct variable *var;
1007 text = open_text_record (r, size * count);
1008 while (read_variable_to_value_pair (r, dict, text, &var, &long_name))
1011 size_t short_name_cnt;
1014 /* Validate long name. */
1015 if (!var_is_valid_name (long_name, false))
1017 sys_warn (r, _("Long variable mapping from %s to invalid "
1018 "variable name `%s'."),
1019 var_get_name (var), long_name);
1023 /* Identify any duplicates. */
1024 if (strcasecmp (var_get_short_name (var, 0), long_name)
1025 && dict_lookup_var (dict, long_name) != NULL)
1027 sys_warn (r, _("Duplicate long variable name `%s' "
1028 "within system file."), long_name);
1032 /* Renaming a variable may clear its short names, but we
1033 want to retain them, so we save them and re-set them
1035 short_name_cnt = var_get_short_name_cnt (var);
1036 short_names = xnmalloc (short_name_cnt, sizeof *short_names);
1037 for (i = 0; i < short_name_cnt; i++)
1039 const char *s = var_get_short_name (var, i);
1040 short_names[i] = s != NULL ? xstrdup (s) : NULL;
1043 /* Set long name. */
1044 dict_rename_var (dict, var, long_name);
1046 /* Restore short names. */
1047 for (i = 0; i < short_name_cnt; i++)
1049 var_set_short_name (var, i, short_names[i]);
1050 free (short_names[i]);
1054 close_text_record (r, text);
1055 r->has_long_var_names = true;
1058 /* Reads record type 7, subtype 14, which gives the real length
1059 of each very long string. Rearranges DICT accordingly. */
1061 read_long_string_map (struct sfm_reader *r, size_t size, size_t count,
1062 struct dictionary *dict)
1064 struct text_record *text;
1065 struct variable *var;
1068 text = open_text_record (r, size * count);
1069 while (read_variable_to_value_pair (r, dict, text, &var, &length_s))
1071 size_t idx = var_get_dict_index (var);
1077 length = strtol (length_s, NULL, 10);
1078 if (length < 1 || length > MAX_STRING)
1080 sys_warn (r, _("%s listed as string of invalid length %s "
1081 "in very length string record."),
1082 var_get_name (var), length_s);
1086 /* Check segments. */
1087 segment_cnt = sfm_width_to_segments (length);
1088 if (segment_cnt == 1)
1090 sys_warn (r, _("%s listed in very long string record with width %s, "
1091 "which requires only one segment."),
1092 var_get_name (var), length_s);
1095 if (idx + segment_cnt > dict_get_var_cnt (dict))
1096 sys_error (r, _("Very long string %s overflows dictionary."),
1097 var_get_name (var));
1099 /* Get the short names from the segments and check their
1101 for (i = 0; i < segment_cnt; i++)
1103 struct variable *seg = dict_get_var (dict, idx + i);
1104 int alloc_width = sfm_segment_alloc_width (length, i);
1105 int width = var_get_width (seg);
1108 var_set_short_name (var, i, var_get_short_name (seg, 0));
1109 if (ROUND_UP (width, 8) != ROUND_UP (alloc_width, 8))
1110 sys_error (r, _("Very long string with width %ld has segment %d "
1111 "of width %d (expected %d)"),
1112 length, i, width, alloc_width);
1114 dict_delete_consecutive_vars (dict, idx + 1, segment_cnt - 1);
1115 var_set_width (var, length);
1117 close_text_record (r, text);
1118 dict_compact_values (dict);
1121 /* Reads value labels from sysfile H and inserts them into the
1122 associated dictionary. */
1124 read_value_labels (struct sfm_reader *r,
1125 struct dictionary *dict, struct variable **var_by_value_idx)
1127 struct pool *subpool;
1131 char raw_value[8]; /* Value as uninterpreted bytes. */
1132 union value value; /* Value. */
1133 char *label; /* Null-terminated label string. */
1136 struct label *labels = NULL;
1137 int label_cnt; /* Number of labels. */
1139 struct variable **var = NULL; /* Associated variables. */
1140 int var_cnt; /* Number of associated variables. */
1141 int max_width; /* Maximum width of string variables. */
1145 subpool = pool_create_subpool (r->pool);
1147 /* Read the type 3 record and record its contents. We can't do
1148 much with the data yet because we don't know whether it is
1149 of numeric or string type. */
1151 /* Read number of labels. */
1152 label_cnt = read_int (r);
1154 if (size_overflow_p (xtimes (label_cnt, sizeof *labels)))
1156 sys_warn (r, _("Invalid number of labels: %d. Ignoring labels."),
1161 /* Read each value/label tuple into labels[]. */
1162 labels = pool_nalloc (subpool, label_cnt, sizeof *labels);
1163 for (i = 0; i < label_cnt; i++)
1165 struct label *label = labels + i;
1166 unsigned char label_len;
1170 read_bytes (r, label->raw_value, sizeof label->raw_value);
1172 /* Read label length. */
1173 read_bytes (r, &label_len, sizeof label_len);
1174 padded_len = ROUND_UP (label_len + 1, 8);
1176 /* Read label, padding. */
1177 label->label = pool_alloc (subpool, padded_len + 1);
1178 read_bytes (r, label->label, padded_len - 1);
1179 label->label[label_len] = 0;
1182 /* Now, read the type 4 record that has the list of variables
1183 to which the value labels are to be applied. */
1185 /* Read record type of type 4 record. */
1186 if (read_int (r) != 4)
1187 sys_error (r, _("Variable index record (type 4) does not immediately "
1188 "follow value label record (type 3) as it should."));
1190 /* Read number of variables associated with value label from type 4
1192 var_cnt = read_int (r);
1193 if (var_cnt < 1 || var_cnt > dict_get_var_cnt (dict))
1194 sys_error (r, _("Number of variables associated with a value label (%d) "
1195 "is not between 1 and the number of variables (%zu)."),
1196 var_cnt, dict_get_var_cnt (dict));
1198 /* Read the list of variables. */
1199 var = pool_nalloc (subpool, var_cnt, sizeof *var);
1201 for (i = 0; i < var_cnt; i++)
1203 var[i] = lookup_var_by_value_idx (r, var_by_value_idx, read_int (r));
1204 if (var_is_long_string (var[i]))
1205 sys_error (r, _("Value labels are not allowed on long string "
1206 "variables (%s)."), var_get_name (var[i]));
1207 max_width = MAX (max_width, var_get_width (var[i]));
1210 /* Type check the variables. */
1211 for (i = 1; i < var_cnt; i++)
1212 if (var_get_type (var[i]) != var_get_type (var[0]))
1213 sys_error (r, _("Variables associated with value label are not all of "
1214 "identical type. Variable %s is %s, but variable "
1216 var_get_name (var[0]),
1217 var_is_numeric (var[0]) ? _("numeric") : _("string"),
1218 var_get_name (var[i]),
1219 var_is_numeric (var[i]) ? _("numeric") : _("string"));
1221 /* Fill in labels[].value, now that we know the desired type. */
1222 for (i = 0; i < label_cnt; i++)
1224 struct label *label = labels + i;
1226 value_init_pool (subpool, &label->value, max_width);
1227 if (var_is_alpha (var[0]))
1228 buf_copy_rpad (value_str_rw (&label->value, max_width), max_width,
1229 label->raw_value, sizeof label->raw_value, ' ');
1231 label->value.f = float_get_double (r->float_format, label->raw_value);
1234 /* Assign the `value_label's to each variable. */
1235 for (i = 0; i < var_cnt; i++)
1237 struct variable *v = var[i];
1240 /* Add each label to the variable. */
1241 for (j = 0; j < label_cnt; j++)
1243 struct label *label = &labels[j];
1244 if (!var_add_value_label (v, &label->value, label->label))
1246 if (var_is_numeric (var[0]))
1247 sys_warn (r, _("Duplicate value label for %g on %s."),
1248 label->value.f, var_get_name (v));
1250 sys_warn (r, _("Duplicate value label for \"%.*s\" on %s."),
1251 max_width, value_str (&label->value, max_width),
1257 pool_destroy (subpool);
1260 /* Reads a set of custom attributes from TEXT into ATTRS.
1261 ATTRS may be a null pointer, in which case the attributes are
1262 read but discarded. */
1264 read_attributes (struct sfm_reader *r, struct text_record *text,
1265 struct attrset *attrs)
1269 struct attribute *attr;
1273 /* Parse the key. */
1274 key = text_get_token (text, ss_cstr ("("));
1278 attr = attribute_create (key);
1279 for (index = 1; ; index++)
1281 /* Parse the value. */
1285 value = text_get_token (text, ss_cstr ("\n"));
1288 text_warn (r, text, _("Error parsing attribute value %s[%d]"),
1293 length = strlen (value);
1294 if (length >= 2 && value[0] == '\'' && value[length - 1] == '\'')
1296 value[length - 1] = '\0';
1297 attribute_add_value (attr, value + 1);
1302 _("Attribute value %s[%d] is not quoted: %s"),
1304 attribute_add_value (attr, value);
1307 /* Was this the last value for this attribute? */
1308 if (text_match (text, ')'))
1312 attrset_add (attrs, attr);
1314 attribute_destroy (attr);
1316 while (!text_match (text, '/'));
1319 /* Reads record type 7, subtype 17, which lists custom
1320 attributes on the data file. */
1322 read_data_file_attributes (struct sfm_reader *r,
1323 size_t size, size_t count,
1324 struct dictionary *dict)
1326 struct text_record *text = open_text_record (r, size * count);
1327 read_attributes (r, text, dict_get_attributes (dict));
1328 close_text_record (r, text);
1331 /* Reads record type 7, subtype 18, which lists custom
1332 attributes on individual variables. */
1334 read_variable_attributes (struct sfm_reader *r,
1335 size_t size, size_t count,
1336 struct dictionary *dict)
1338 struct text_record *text = open_text_record (r, size * count);
1341 struct variable *var;
1342 if (!text_read_short_name (r, dict, text, ss_cstr (":"), &var))
1344 read_attributes (r, text, var != NULL ? var_get_attributes (var) : NULL);
1346 close_text_record (r, text);
1352 static void partial_record (struct sfm_reader *r)
1355 static void read_error (struct casereader *, const struct sfm_reader *);
1357 static bool read_case_number (struct sfm_reader *, double *);
1358 static bool read_case_string (struct sfm_reader *, char *, size_t);
1359 static int read_opcode (struct sfm_reader *);
1360 static bool read_compressed_number (struct sfm_reader *, double *);
1361 static bool read_compressed_string (struct sfm_reader *, char *);
1362 static bool read_whole_strings (struct sfm_reader *, char *, size_t);
1363 static bool skip_whole_strings (struct sfm_reader *, size_t);
1365 /* Reads and returns one case from READER's file. Returns a null
1366 pointer if not successful. */
1367 static struct ccase *
1368 sys_file_casereader_read (struct casereader *reader, void *r_)
1370 struct sfm_reader *r = r_;
1371 struct ccase *volatile c;
1377 c = case_create (r->proto);
1378 if (setjmp (r->bail_out))
1380 casereader_force_error (reader);
1385 for (i = 0; i < r->sfm_var_cnt; i++)
1387 struct sfm_var *sv = &r->sfm_vars[i];
1388 union value *v = case_data_rw_idx (c, sv->case_index);
1390 if (sv->var_width == 0)
1392 if (!read_case_number (r, &v->f))
1397 char *s = value_str_rw (v, sv->var_width);
1398 if (!read_case_string (r, s + sv->offset, sv->segment_width))
1400 if (!skip_whole_strings (r, ROUND_DOWN (sv->padding, 8)))
1410 if (r->case_cnt != -1)
1411 read_error (reader, r);
1415 /* Issues an error that R ends in a partial record. */
1417 partial_record (struct sfm_reader *r)
1419 sys_error (r, _("File ends in partial case."));
1422 /* Issues an error that an unspecified error occurred SFM, and
1425 read_error (struct casereader *r, const struct sfm_reader *sfm)
1427 msg (ME, _("Error reading case from file %s."), fh_get_name (sfm->fh));
1428 casereader_force_error (r);
1431 /* Reads a number from R and stores its value in *D.
1432 If R is compressed, reads a compressed number;
1433 otherwise, reads a number in the regular way.
1434 Returns true if successful, false if end of file is
1435 reached immediately. */
1437 read_case_number (struct sfm_reader *r, double *d)
1442 if (!try_read_bytes (r, number, sizeof number))
1444 float_convert (r->float_format, number, FLOAT_NATIVE_DOUBLE, d);
1448 return read_compressed_number (r, d);
1451 /* Reads LENGTH string bytes from R into S.
1452 Always reads a multiple of 8 bytes; if LENGTH is not a
1453 multiple of 8, then extra bytes are read and discarded without
1455 Reads compressed strings if S is compressed.
1456 Returns true if successful, false if end of file is
1457 reached immediately. */
1459 read_case_string (struct sfm_reader *r, char *s, size_t length)
1461 size_t whole = ROUND_DOWN (length, 8);
1462 size_t partial = length % 8;
1466 if (!read_whole_strings (r, s, whole))
1473 if (!read_whole_strings (r, bounce, sizeof bounce))
1479 memcpy (s + whole, bounce, partial);
1485 /* Reads and returns the next compression opcode from R. */
1487 read_opcode (struct sfm_reader *r)
1489 assert (r->compressed);
1493 if (r->opcode_idx >= sizeof r->opcodes)
1495 if (!try_read_bytes (r, r->opcodes, sizeof r->opcodes))
1499 opcode = r->opcodes[r->opcode_idx++];
1506 /* Reads a compressed number from R and stores its value in D.
1507 Returns true if successful, false if end of file is
1508 reached immediately. */
1510 read_compressed_number (struct sfm_reader *r, double *d)
1512 int opcode = read_opcode (r);
1520 *d = read_float (r);
1524 sys_error (r, _("Compressed data is corrupt."));
1531 *d = opcode - r->bias;
1538 /* Reads a compressed 8-byte string segment from R and stores it
1540 Returns true if successful, false if end of file is
1541 reached immediately. */
1543 read_compressed_string (struct sfm_reader *r, char *dst)
1545 switch (read_opcode (r))
1552 read_bytes (r, dst, 8);
1556 memset (dst, ' ', 8);
1560 sys_error (r, _("Compressed data is corrupt."));
1566 /* Reads LENGTH string bytes from R into S.
1567 LENGTH must be a multiple of 8.
1568 Reads compressed strings if S is compressed.
1569 Returns true if successful, false if end of file is
1570 reached immediately. */
1572 read_whole_strings (struct sfm_reader *r, char *s, size_t length)
1574 assert (length % 8 == 0);
1576 return try_read_bytes (r, s, length);
1580 for (ofs = 0; ofs < length; ofs += 8)
1581 if (!read_compressed_string (r, s + ofs))
1591 /* Skips LENGTH string bytes from R.
1592 LENGTH must be a multiple of 8.
1593 (LENGTH is also limited to 1024, but that's only because the
1594 current caller never needs more than that many bytes.)
1595 Returns true if successful, false if end of file is
1596 reached immediately. */
1598 skip_whole_strings (struct sfm_reader *r, size_t length)
1601 assert (length < sizeof buffer);
1602 return read_whole_strings (r, buffer, length);
1605 /* Creates and returns a table that can be used for translating a value
1606 index into a case to a "struct variable *" for DICT. Multiple
1607 system file fields reference variables this way.
1609 This table must be created before processing the very long
1610 string extension record, because that record causes some
1611 values to be deleted from the case and the dictionary to be
1613 static struct variable **
1614 make_var_by_value_idx (struct sfm_reader *r, struct dictionary *dict)
1616 struct variable **var_by_value_idx;
1620 var_by_value_idx = pool_nmalloc (r->pool,
1621 r->oct_cnt, sizeof *var_by_value_idx);
1622 for (i = 0; i < dict_get_var_cnt (dict); i++)
1624 struct variable *v = dict_get_var (dict, i);
1625 int nv = var_is_numeric (v) ? 1 : DIV_RND_UP (var_get_width (v), 8);
1628 var_by_value_idx[value_idx++] = v;
1629 for (j = 1; j < nv; j++)
1630 var_by_value_idx[value_idx++] = NULL;
1632 assert (value_idx == r->oct_cnt);
1634 return var_by_value_idx;
1637 /* Returns the "struct variable" corresponding to the given
1638 1-basd VALUE_IDX in VAR_BY_VALUE_IDX. Verifies that the index
1640 static struct variable *
1641 lookup_var_by_value_idx (struct sfm_reader *r,
1642 struct variable **var_by_value_idx, int value_idx)
1644 struct variable *var;
1646 if (value_idx < 1 || value_idx > r->oct_cnt)
1647 sys_error (r, _("Variable index %d not in valid range 1...%d."),
1648 value_idx, r->oct_cnt);
1650 var = var_by_value_idx[value_idx - 1];
1652 sys_error (r, _("Variable index %d refers to long string "
1659 /* Returns the variable in D with the given SHORT_NAME,
1660 or a null pointer if there is none. */
1661 static struct variable *
1662 lookup_var_by_short_name (struct dictionary *d, const char *short_name)
1664 struct variable *var;
1668 /* First try looking up by full name. This often succeeds. */
1669 var = dict_lookup_var (d, short_name);
1670 if (var != NULL && !strcasecmp (var_get_short_name (var, 0), short_name))
1673 /* Iterate through the whole dictionary as a fallback. */
1674 var_cnt = dict_get_var_cnt (d);
1675 for (i = 0; i < var_cnt; i++)
1677 var = dict_get_var (d, i);
1678 if (!strcasecmp (var_get_short_name (var, 0), short_name))
1685 /* Helpers for reading records that contain structured text
1688 /* Maximum number of warnings to issue for a single text
1690 #define MAX_TEXT_WARNINGS 5
1695 struct substring buffer; /* Record contents. */
1696 size_t pos; /* Current position in buffer. */
1697 int n_warnings; /* Number of warnings issued or suppressed. */
1700 /* Reads SIZE bytes into a text record for R,
1701 and returns the new text record. */
1702 static struct text_record *
1703 open_text_record (struct sfm_reader *r, size_t size)
1705 struct text_record *text = pool_alloc (r->pool, sizeof *text);
1706 char *buffer = pool_malloc (r->pool, size + 1);
1707 read_bytes (r, buffer, size);
1708 text->buffer = ss_buffer (buffer, size);
1710 text->n_warnings = 0;
1714 /* Closes TEXT, frees its storage, and issues a final warning
1715 about suppressed warnings if necesary. */
1717 close_text_record (struct sfm_reader *r, struct text_record *text)
1719 if (text->n_warnings > MAX_TEXT_WARNINGS)
1720 sys_warn (r, _("Suppressed %d additional related warnings."),
1721 text->n_warnings - MAX_TEXT_WARNINGS);
1722 pool_free (r->pool, ss_data (text->buffer));
1725 /* Reads a variable=value pair from TEXT.
1726 Looks up the variable in DICT and stores it into *VAR.
1727 Stores a null-terminated value into *VALUE. */
1729 read_variable_to_value_pair (struct sfm_reader *r, struct dictionary *dict,
1730 struct text_record *text,
1731 struct variable **var, char **value)
1735 if (!text_read_short_name (r, dict, text, ss_cstr ("="), var))
1738 *value = text_get_token (text, ss_buffer ("\t\0", 2));
1742 text->pos += ss_span (ss_substr (text->buffer, text->pos, SIZE_MAX),
1743 ss_buffer ("\t\0", 2));
1751 text_read_short_name (struct sfm_reader *r, struct dictionary *dict,
1752 struct text_record *text, struct substring delimiters,
1753 struct variable **var)
1755 char *short_name = text_get_token (text, delimiters);
1756 if (short_name == NULL)
1759 *var = lookup_var_by_short_name (dict, short_name);
1761 text_warn (r, text, _("Variable map refers to unknown variable %s."),
1766 /* Displays a warning for the current file position, limiting the
1767 number to MAX_TEXT_WARNINGS for TEXT. */
1769 text_warn (struct sfm_reader *r, struct text_record *text,
1770 const char *format, ...)
1772 if (text->n_warnings++ < MAX_TEXT_WARNINGS)
1776 va_start (args, format);
1777 sys_msg (r, MW, format, args);
1783 text_get_token (struct text_record *text, struct substring delimiters)
1785 struct substring token;
1787 if (!ss_tokenize (text->buffer, delimiters, &text->pos, &token))
1789 ss_data (token)[ss_length (token)] = '\0';
1790 return ss_data (token);
1794 text_match (struct text_record *text, char c)
1796 if (text->buffer.string[text->pos] == c)
1807 /* Displays a corruption message. */
1809 sys_msg (struct sfm_reader *r, int class, const char *format, va_list args)
1814 ds_init_empty (&text);
1815 ds_put_format (&text, "\"%s\" near offset 0x%lx: ",
1816 fh_get_file_name (r->fh), (unsigned long) ftell (r->file));
1817 ds_put_vformat (&text, format, args);
1819 m.category = msg_class_to_category (class);
1820 m.severity = msg_class_to_severity (class);
1821 m.where.file_name = NULL;
1822 m.where.line_number = 0;
1823 m.text = ds_cstr (&text);
1828 /* Displays a warning for the current file position. */
1830 sys_warn (struct sfm_reader *r, const char *format, ...)
1834 va_start (args, format);
1835 sys_msg (r, MW, format, args);
1839 /* Displays an error for the current file position,
1840 marks it as in an error state,
1841 and aborts reading it using longjmp. */
1843 sys_error (struct sfm_reader *r, const char *format, ...)
1847 va_start (args, format);
1848 sys_msg (r, ME, format, args);
1852 longjmp (r->bail_out, 1);
1855 /* Reads BYTE_CNT bytes into BUF.
1856 Returns true if exactly BYTE_CNT bytes are successfully read.
1857 Aborts if an I/O error or a partial read occurs.
1858 If EOF_IS_OK, then an immediate end-of-file causes false to be
1859 returned; otherwise, immediate end-of-file causes an abort
1862 read_bytes_internal (struct sfm_reader *r, bool eof_is_ok,
1863 void *buf, size_t byte_cnt)
1865 size_t bytes_read = fread (buf, 1, byte_cnt, r->file);
1866 if (bytes_read == byte_cnt)
1868 else if (ferror (r->file))
1869 sys_error (r, _("System error: %s."), strerror (errno));
1870 else if (!eof_is_ok || bytes_read != 0)
1871 sys_error (r, _("Unexpected end of file."));
1876 /* Reads BYTE_CNT into BUF.
1877 Aborts upon I/O error or if end-of-file is encountered. */
1879 read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
1881 read_bytes_internal (r, false, buf, byte_cnt);
1884 /* Reads BYTE_CNT bytes into BUF.
1885 Returns true if exactly BYTE_CNT bytes are successfully read.
1886 Returns false if an immediate end-of-file is encountered.
1887 Aborts if an I/O error or a partial read occurs. */
1889 try_read_bytes (struct sfm_reader *r, void *buf, size_t byte_cnt)
1891 return read_bytes_internal (r, true, buf, byte_cnt);
1894 /* Reads a 32-bit signed integer from R and returns its value in
1897 read_int (struct sfm_reader *r)
1900 read_bytes (r, integer, sizeof integer);
1901 return integer_get (r->integer_format, integer, sizeof integer);
1904 /* Reads a 64-bit floating-point number from R and returns its
1905 value in host format. */
1907 read_float (struct sfm_reader *r)
1910 read_bytes (r, number, sizeof number);
1911 return float_get_double (r->float_format, number);
1914 /* Reads exactly SIZE - 1 bytes into BUFFER
1915 and stores a null byte into BUFFER[SIZE - 1]. */
1917 read_string (struct sfm_reader *r, char *buffer, size_t size)
1920 read_bytes (r, buffer, size - 1);
1921 buffer[size - 1] = '\0';
1924 /* Skips BYTES bytes forward in R. */
1926 skip_bytes (struct sfm_reader *r, size_t bytes)
1931 size_t chunk = MIN (sizeof buffer, bytes);
1932 read_bytes (r, buffer, chunk);
1937 static const struct casereader_class sys_file_casereader_class =
1939 sys_file_casereader_read,
1940 sys_file_casereader_destroy,