1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27 #include <libpspp/alloc.h>
28 #include <libpspp/message.h>
29 #include <libpspp/compiler.h>
30 #include <libpspp/magic.h>
31 #include <libpspp/misc.h>
32 #include <libpspp/str.h>
34 #include "sys-file-reader.h"
35 #include "sfm-private.h"
37 #include "dictionary.h"
38 #include "file-handle-def.h"
39 #include "file-name.h"
41 #include "value-labels.h"
45 #define _(msgid) gettext (msgid)
47 /* System file reader. */
50 struct file_handle *fh; /* File handle. */
51 FILE *file; /* File stream. */
53 int reverse_endian; /* 1=file has endianness opposite us. */
54 int fix_specials; /* 1=SYSMIS/HIGHEST/LOWEST differs from us. */
55 int value_cnt; /* Number of `union values's per case. */
56 long case_cnt; /* Number of cases, -1 if unknown. */
57 int compressed; /* 1=compressed, 0=not compressed. */
58 double bias; /* Compression bias, usually 100.0. */
59 int weight_idx; /* 0-based index of weighting variable, or -1. */
60 bool ok; /* False after an I/O error or corrupt data. */
63 struct sfm_var *vars; /* Variables. */
65 /* File's special constants. */
70 /* Decompression buffer. */
71 flt64 *buf; /* Buffer data. */
72 flt64 *ptr; /* Current location in buffer. */
73 flt64 *end; /* End of buffer data. */
75 /* Compression instruction octet. */
76 unsigned char x[8]; /* Current instruction octet. */
77 unsigned char *y; /* Location in current instruction octet. */
80 /* A variable in a system file. */
83 int width; /* 0=numeric, otherwise string width. */
84 int fv; /* Index into case. */
89 /* Swap bytes *A and *B. */
91 bswap (char *a, char *b)
98 /* Reverse the byte order of 32-bit integer *X. */
100 bswap_int32 (int32_t *x_)
102 char *x = (char *) x_;
103 bswap (x + 0, x + 3);
104 bswap (x + 1, x + 2);
107 /* Reverse the byte order of 64-bit floating point *X. */
109 bswap_flt64 (flt64 *x_)
111 char *x = (char *) x_;
112 bswap (x + 0, x + 7);
113 bswap (x + 1, x + 6);
114 bswap (x + 2, x + 5);
115 bswap (x + 3, x + 4);
119 corrupt_msg (int class, const char *format,...)
120 PRINTF_FORMAT (2, 3);
122 /* Displays a corrupt sysfile error. */
124 corrupt_msg (int class, const char *format,...)
130 ds_create (&text, _("corrupt system file: "));
131 va_start (args, format);
132 ds_vprintf (&text, format, args);
135 m.category = msg_class_to_category (class);
136 m.severity = msg_class_to_severity (class);
137 m.where.file_name = NULL;
138 m.where.line_number = 0;
139 m.text = ds_c_str (&text);
144 /* Closes a system file after we're done with it. */
146 sfm_close_reader (struct sfm_reader *r)
153 if (fn_close (fh_get_file_name (r->fh), r->file) == EOF)
154 msg (ME, _("%s: Closing system file: %s."),
155 fh_get_file_name (r->fh), strerror (errno));
160 fh_close (r->fh, "system file", "rs");
167 /* Dictionary reader. */
169 static void buf_unread(struct sfm_reader *r, size_t byte_cnt);
171 static void *buf_read (struct sfm_reader *, void *buf, size_t byte_cnt,
174 static int read_header (struct sfm_reader *,
175 struct dictionary *, struct sfm_read_info *);
176 static int parse_format_spec (struct sfm_reader *, int32_t,
177 struct fmt_spec *, const struct variable *);
178 static int read_value_labels (struct sfm_reader *, struct dictionary *,
179 struct variable **var_by_idx);
180 static int read_variables (struct sfm_reader *,
181 struct dictionary *, struct variable ***var_by_idx);
182 static int read_machine_int32_info (struct sfm_reader *, int size, int count);
183 static int read_machine_flt64_info (struct sfm_reader *, int size, int count);
184 static int read_documents (struct sfm_reader *, struct dictionary *);
186 static int fread_ok (struct sfm_reader *, void *, size_t);
188 /* Displays the message X with corrupt_msg, then jumps to the error
196 /* Calls buf_read with the specified arguments, and jumps to
197 error if the read fails. */
198 #define assertive_buf_read(a,b,c,d) \
200 if (!buf_read (a,b,c,d)) \
204 /* Opens the system file designated by file handle FH for
205 reading. Reads the system file's dictionary into *DICT.
206 If INFO is non-null, then it receives additional info about the
209 sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
210 struct sfm_read_info *info)
212 struct sfm_reader *r = NULL;
213 struct variable **var_by_idx = NULL;
215 *dict = dict_create ();
216 if (!fh_open (fh, FH_REF_FILE, "system file", "rs"))
219 /* Create and initialize reader. */
220 r = xmalloc (sizeof *r);
222 r->file = fn_open (fh_get_file_name (fh), "rb");
224 r->reverse_endian = 0;
235 r->sysmis = -FLT64_MAX;
236 r->highest = FLT64_MAX;
237 r->lowest = second_lowest_flt64;
239 r->buf = r->ptr = r->end = NULL;
240 r->y = r->x + sizeof r->x;
242 /* Check that file open succeeded. */
245 msg (ME, _("An error occurred while opening \"%s\" for reading "
246 "as a system file: %s."),
247 fh_get_file_name (r->fh), strerror (errno));
251 /* Read header and variables. */
252 if (!read_header (r, *dict, info) || !read_variables (r, *dict, &var_by_idx))
256 /* Handle weighting. */
257 if (r->weight_idx != -1)
259 struct variable *weight_var;
261 if (r->weight_idx < 0 || r->weight_idx >= r->value_cnt)
262 lose ((ME, _("%s: Index of weighting variable (%d) is not between 0 "
263 "and number of elements per case (%d)."),
264 fh_get_file_name (r->fh), r->weight_idx, r->value_cnt));
267 weight_var = var_by_idx[r->weight_idx];
269 if (weight_var == NULL)
271 _("%s: Weighting variable may not be a continuation of "
272 "a long string variable."), fh_get_file_name (fh)));
273 else if (weight_var->type == ALPHA)
274 lose ((ME, _("%s: Weighting variable may not be a string variable."),
275 fh_get_file_name (fh)));
277 dict_set_weight (*dict, weight_var);
280 dict_set_weight (*dict, NULL);
282 /* Read records of types 3, 4, 6, and 7. */
287 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
288 if (r->reverse_endian)
289 bswap_int32 (&rec_type);
294 if (!read_value_labels (r, *dict, var_by_idx))
299 lose ((ME, _("%s: Orphaned variable index record (type 4). Type 4 "
300 "records must always immediately follow type 3 "
302 fh_get_file_name (r->fh)));
305 if (!read_documents (r, *dict))
322 assertive_buf_read (r, &data, sizeof data, 0);
323 if (r->reverse_endian)
325 bswap_int32 (&data.subtype);
326 bswap_int32 (&data.size);
327 bswap_int32 (&data.count);
329 bytes = data.size * data.count;
330 if (bytes < data.size || bytes < data.count)
331 lose ((ME, "%s: Record type %d subtype %d too large.",
332 fh_get_file_name (r->fh), rec_type, data.subtype));
334 switch (data.subtype)
337 if (!read_machine_int32_info (r, data.size, data.count))
342 if (!read_machine_flt64_info (r, data.size, data.count))
347 case 6: /* ?? Used by SPSS 8.0. */
351 case 11: /* Variable display parameters */
353 const int n_vars = data.count / 3 ;
355 if ( data.count % 3 || n_vars > dict_get_var_cnt(*dict) )
357 msg (MW, _("%s: Invalid subrecord length. "
358 "Record: 7; Subrecord: 11"),
359 fh_get_file_name (r->fh));
363 for ( i = 0 ; i < min(n_vars, dict_get_var_cnt(*dict)) ; ++i )
375 assertive_buf_read (r, ¶ms, sizeof(params), 0);
377 v = dict_get_var(*dict, i);
379 v->measure = params.measure;
380 v->display_width = params.width;
381 v->alignment = params.align;
386 case 13: /* SPSS 12.0 Long variable name map */
388 char *buf, *short_name, *save_ptr;
392 buf = xmalloc (bytes + 1);
393 if (!buf_read (r, buf, bytes, 0))
401 for (short_name = strtok_r (buf, "=", &save_ptr), idx = 0;
403 short_name = strtok_r (NULL, "=", &save_ptr), idx++)
405 char *long_name = strtok_r (NULL, "\t", &save_ptr);
408 /* Validate long name. */
409 if (long_name == NULL)
411 msg (MW, _("%s: Trailing garbage in long variable "
413 fh_get_file_name (r->fh));
416 if (!var_is_valid_name (long_name, false))
418 msg (MW, _("%s: Long variable mapping to invalid "
419 "variable name `%s'."),
420 fh_get_file_name (r->fh), long_name);
424 /* Find variable using short name. */
425 v = dict_lookup_var (*dict, short_name);
428 msg (MW, _("%s: Long variable mapping for "
429 "nonexistent variable %s."),
430 fh_get_file_name (r->fh), short_name);
434 /* Identify any duplicates. */
435 if ( compare_var_names(short_name, long_name, 0) &&
436 NULL != dict_lookup_var (*dict, long_name))
437 lose ((ME, _("%s: Duplicate long variable name `%s' "
438 "within system file."),
439 fh_get_file_name (r->fh), long_name));
443 Renaming a variable may clear the short
444 name, but we want to retain it, so
445 re-set it explicitly. */
446 dict_rename_var (*dict, v, long_name);
447 var_set_short_name (v, short_name);
449 /* For compatability, make sure dictionary
450 is in long variable name map order. In
451 the common case, this has no effect,
452 because the dictionary and the long
453 variable name map are already in the
455 dict_reorder_var (*dict, v, idx);
464 msg (MW, _("%s: Unrecognized record type 7, subtype %d "
465 "encountered in system file."),
466 fh_get_file_name (r->fh), data.subtype);
472 void *x = buf_read (r, NULL, data.size * data.count, 0);
484 assertive_buf_read (r, &filler, sizeof filler, 0);
489 corrupt_msg(MW, _("%s: Unrecognized record type %d."),
490 fh_get_file_name (r->fh), rec_type);
495 /* Come here on successful completion. */
500 /* Come here on unsuccessful completion. */
501 sfm_close_reader (r);
505 dict_destroy (*dict);
511 /* Read record type 7, subtype 3. */
513 read_machine_int32_info (struct sfm_reader *r, int size, int count)
520 if (size != sizeof (int32_t) || count != 8)
521 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
522 "subtype 3. Expected size %d, count 8."),
523 fh_get_file_name (r->fh), size, count, sizeof (int32_t)));
525 assertive_buf_read (r, data, sizeof data, 0);
526 if (r->reverse_endian)
527 for (i = 0; i < 8; i++)
528 bswap_int32 (&data[i]);
532 lose ((ME, _("%s: Floating-point representation in system file is not "
533 "IEEE-754. PSPP cannot convert between floating-point "
535 fh_get_file_name (r->fh)));
537 #error Add support for your floating-point format.
540 #ifdef WORDS_BIGENDIAN
545 if (r->reverse_endian)
547 if (file_bigendian ^ (data[6] == 1))
548 lose ((ME, _("%s: File-indicated endianness (%s) does not match "
549 "endianness intuited from file header (%s)."),
550 fh_get_file_name (r->fh),
551 file_bigendian ? _("big-endian") : _("little-endian"),
552 data[6] == 1 ? _("big-endian") : (data[6] == 2 ? _("little-endian")
555 /* PORTME: Character representation code. */
556 if (data[7] != 2 && data[7] != 3)
557 lose ((ME, _("%s: File-indicated character representation code (%s) is "
559 fh_get_file_name (r->fh),
560 (data[7] == 1 ? "EBCDIC"
561 : (data[7] == 4 ? _("DEC Kanji") : _("Unknown")))));
569 /* Read record type 7, subtype 4. */
571 read_machine_flt64_info (struct sfm_reader *r, int size, int count)
576 if (size != sizeof (flt64) || count != 3)
577 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
578 "subtype 4. Expected size %d, count 8."),
579 fh_get_file_name (r->fh), size, count, sizeof (flt64)));
581 assertive_buf_read (r, data, sizeof data, 0);
582 if (r->reverse_endian)
583 for (i = 0; i < 3; i++)
584 bswap_flt64 (&data[i]);
586 if (data[0] != SYSMIS || data[1] != FLT64_MAX
587 || data[2] != second_lowest_flt64)
590 r->highest = data[1];
592 msg (MW, _("%s: File-indicated value is different from internal value "
593 "for at least one of the three system values. SYSMIS: "
594 "indicated %g, expected %g; HIGHEST: %g, %g; LOWEST: "
596 fh_get_file_name (r->fh), (double) data[0], (double) SYSMIS,
597 (double) data[1], (double) FLT64_MAX,
598 (double) data[2], (double) second_lowest_flt64);
608 read_header (struct sfm_reader *r,
609 struct dictionary *dict, struct sfm_read_info *info)
611 struct sysfile_header hdr; /* Disk buffer. */
612 char prod_name[sizeof hdr.prod_name + 1]; /* Buffer for product name. */
613 int skip_amt = 0; /* Amount of product name to omit. */
616 /* Read header, check magic. */
617 assertive_buf_read (r, &hdr, sizeof hdr, 0);
618 if (strncmp ("$FL2", hdr.rec_type, 4) != 0)
619 lose ((ME, _("%s: Bad magic. Proper system files begin with "
620 "the four characters `$FL2'. This file will not be read."),
621 fh_get_file_name (r->fh)));
623 /* Check eye-category.her string. */
624 memcpy (prod_name, hdr.prod_name, sizeof hdr.prod_name);
625 for (i = 0; i < 60; i++)
626 if (!c_isprint ((unsigned char) prod_name[i]))
628 for (i = 59; i >= 0; i--)
629 if (!c_isgraph ((unsigned char) prod_name[i]))
634 prod_name[60] = '\0';
638 static const char *prefix[N_PREFIXES] =
640 "@(#) SPSS DATA FILE",
646 for (i = 0; i < N_PREFIXES; i++)
647 if (!strncmp (prefix[i], hdr.prod_name, strlen (prefix[i])))
649 skip_amt = strlen (prefix[i]);
654 /* Check endianness. */
655 if (hdr.layout_code == 2)
656 r->reverse_endian = 0;
659 bswap_int32 (&hdr.layout_code);
660 if (hdr.layout_code != 2)
661 lose ((ME, _("%s: File layout code has unexpected value %d. Value "
662 "should be 2, in big-endian or little-endian format."),
663 fh_get_file_name (r->fh), hdr.layout_code));
665 r->reverse_endian = 1;
666 bswap_int32 (&hdr.case_size);
667 bswap_int32 (&hdr.compress);
668 bswap_int32 (&hdr.weight_idx);
669 bswap_int32 (&hdr.case_cnt);
670 bswap_flt64 (&hdr.bias);
674 /* Copy basic info and verify correctness. */
675 r->value_cnt = hdr.case_size;
677 /* If value count is rediculous, then force it to -1 (a sentinel value) */
678 if ( r->value_cnt < 0 ||
679 r->value_cnt > (INT_MAX / (int) sizeof (union value) / 2))
682 r->compressed = hdr.compress;
684 r->weight_idx = hdr.weight_idx - 1;
686 r->case_cnt = hdr.case_cnt;
687 if (r->case_cnt < -1 || r->case_cnt > INT_MAX / 2)
689 _("%s: Number of cases in file (%ld) is not between -1 and %d."),
690 fh_get_file_name (r->fh), (long) r->case_cnt, INT_MAX / 2));
693 if (r->bias != 100.0)
694 corrupt_msg (MW, _("%s: Compression bias (%g) is not the usual "
696 fh_get_file_name (r->fh), r->bias);
698 /* Make a file label only on the condition that the given label is
699 not all spaces or nulls. */
703 for (i = sizeof hdr.file_label - 1; i >= 0; i--)
705 if (!c_isspace ((unsigned char) hdr.file_label[i])
706 && hdr.file_label[i] != 0)
708 char *label = xmalloc (i + 2);
709 memcpy (label, hdr.file_label, i + 1);
711 dict_set_label (dict, label);
722 memcpy (info->creation_date, hdr.creation_date, 9);
723 info->creation_date[9] = 0;
725 memcpy (info->creation_time, hdr.creation_time, 8);
726 info->creation_time[8] = 0;
728 #ifdef WORDS_BIGENDIAN
729 info->big_endian = !r->reverse_endian;
731 info->big_endian = r->reverse_endian;
734 info->compressed = hdr.compress;
736 info->case_cnt = hdr.case_cnt;
738 for (cp = &prod_name[skip_amt]; cp < &prod_name[60]; cp++)
739 if (c_isgraph ((unsigned char) *cp))
741 strcpy (info->product, cp);
750 /* Reads most of the dictionary from file H; also fills in the
751 associated VAR_BY_IDX array. */
753 read_variables (struct sfm_reader *r,
754 struct dictionary *dict, struct variable ***var_by_idx)
758 struct sysfile_variable sv; /* Disk buffer. */
759 int long_string_count = 0; /* # of long string continuation
760 records still expected. */
761 int next_value = 0; /* Index to next `value' structure. */
767 /* Pre-allocate variables. */
768 if (r->value_cnt != -1)
770 *var_by_idx = xnmalloc (r->value_cnt, sizeof **var_by_idx);
771 r->vars = xnmalloc (r->value_cnt, sizeof *r->vars);
775 /* Read in the entry for each variable and use the info to
776 initialize the dictionary. */
780 char name[SHORT_NAME_LEN + 1];
784 if ( r->value_cnt != -1 && i >= r->value_cnt )
787 assertive_buf_read (r, &sv, sizeof sv, 0);
789 if (r->reverse_endian)
791 bswap_int32 (&sv.rec_type);
792 bswap_int32 (&sv.type);
793 bswap_int32 (&sv.has_var_label);
794 bswap_int32 (&sv.n_missing_values);
795 bswap_int32 (&sv.print);
796 bswap_int32 (&sv.write);
799 /* We've come to the end of the variable entries */
800 if (sv.rec_type != 2)
802 buf_unread(r, sizeof sv);
807 if ( -1 == r->value_cnt )
809 *var_by_idx = xnrealloc (*var_by_idx, i + 1, sizeof **var_by_idx);
810 r->vars = xnrealloc (r->vars, i + 1, sizeof *r->vars);
813 /* If there was a long string previously, make sure that the
814 continuations are present; otherwise make sure there aren't
816 if (long_string_count)
819 lose ((ME, _("%s: position %d: String variable does not have "
820 "proper number of continuation records."),
821 fh_get_file_name (r->fh), i));
824 r->vars[i].width = -1;
825 (*var_by_idx)[i] = NULL;
829 else if (sv.type == -1)
830 lose ((ME, _("%s: position %d: Superfluous long string continuation "
832 fh_get_file_name (r->fh), i));
834 /* Check fields for validity. */
835 if (sv.type < 0 || sv.type > 255)
836 lose ((ME, _("%s: position %d: Bad variable type code %d."),
837 fh_get_file_name (r->fh), i, sv.type));
838 if (sv.has_var_label != 0 && sv.has_var_label != 1)
839 lose ((ME, _("%s: position %d: Variable label indicator field is not "
840 "0 or 1."), fh_get_file_name (r->fh), i));
841 if (sv.n_missing_values < -3 || sv.n_missing_values > 3
842 || sv.n_missing_values == -1)
843 lose ((ME, _("%s: position %d: Missing value indicator field is not "
844 "-3, -2, 0, 1, 2, or 3."), fh_get_file_name (r->fh), i));
846 /* Copy first character of variable name. */
847 if (sv.name[0] == '@' || sv.name[0] == '#')
848 lose ((ME, _("%s: position %d: Variable name begins with invalid "
850 fh_get_file_name (r->fh), i));
852 name[0] = sv.name[0];
854 /* Copy remaining characters of variable name. */
855 for (j = 1; j < SHORT_NAME_LEN; j++)
857 int c = (unsigned char) sv.name[j];
866 if ( ! var_is_plausible_name(name, false) )
867 lose ((ME, _("%s: Invalid variable name `%s' within system file."),
868 fh_get_file_name (r->fh), name));
870 /* Create variable. */
871 vv = (*var_by_idx)[i] = dict_create_var (dict, name, sv.type);
873 lose ((ME, _("%s: Duplicate variable name `%s' within system file."),
874 fh_get_file_name (r->fh), name));
876 var_set_short_name (vv, vv->name);
878 /* Case reading data. */
879 nv = sv.type == 0 ? 1 : DIV_RND_UP (sv.type, sizeof (flt64));
880 long_string_count = nv - 1;
883 /* Get variable label, if any. */
884 if (sv.has_var_label == 1)
889 /* Read length of label. */
890 assertive_buf_read (r, &len, sizeof len, 0);
891 if (r->reverse_endian)
895 if (len < 0 || len > 255)
896 lose ((ME, _("%s: Variable %s indicates variable label of invalid "
898 fh_get_file_name (r->fh), vv->name, len));
902 /* Read label into variable structure. */
903 vv->label = buf_read (r, NULL, ROUND_UP (len, sizeof (int32_t)), len + 1);
904 if (vv->label == NULL)
906 vv->label[len] = '\0';
910 /* Set missing values. */
911 if (sv.n_missing_values != 0)
914 int mv_cnt = abs (sv.n_missing_values);
916 if (vv->width > MAX_SHORT_STRING)
917 lose ((ME, _("%s: Long string variable %s may not have missing "
919 fh_get_file_name (r->fh), vv->name));
921 assertive_buf_read (r, mv, sizeof *mv * mv_cnt, 0);
923 if (r->reverse_endian && vv->type == NUMERIC)
924 for (j = 0; j < mv_cnt; j++)
925 bswap_flt64 (&mv[j]);
927 if (sv.n_missing_values > 0)
929 for (j = 0; j < sv.n_missing_values; j++)
930 if (vv->type == NUMERIC)
931 mv_add_num (&vv->miss, mv[j]);
933 mv_add_str (&vv->miss, (char *) &mv[j]);
937 if (vv->type == ALPHA)
938 lose ((ME, _("%s: String variable %s may not have missing "
939 "values specified as a range."),
940 fh_get_file_name (r->fh), vv->name));
942 if (mv[0] == r->lowest)
943 mv_add_num_range (&vv->miss, LOWEST, mv[1]);
944 else if (mv[1] == r->highest)
945 mv_add_num_range (&vv->miss, mv[0], HIGHEST);
947 mv_add_num_range (&vv->miss, mv[0], mv[1]);
949 if (sv.n_missing_values == -3)
950 mv_add_num (&vv->miss, mv[2]);
954 if (!parse_format_spec (r, sv.print, &vv->print, vv)
955 || !parse_format_spec (r, sv.write, &vv->write, vv))
958 r->vars[i].width = vv->width;
959 r->vars[i].fv = vv->fv;
963 /* Some consistency checks. */
964 if (long_string_count != 0)
965 lose ((ME, _("%s: Long string continuation records omitted at end of "
967 fh_get_file_name (r->fh)));
969 if (next_value != r->value_cnt)
970 corrupt_msg(MW, _("%s: System file header indicates %d variable positions but "
971 "%d were read from file."),
972 fh_get_file_name (r->fh), r->value_cnt, next_value);
981 /* Translates the format spec from sysfile format to internal
984 parse_format_spec (struct sfm_reader *r, int32_t s,
985 struct fmt_spec *f, const struct variable *v)
987 f->type = translate_fmt ((s >> 16) & 0xff);
989 lose ((ME, _("%s: Bad format specifier byte (%d)."),
990 fh_get_file_name (r->fh), (s >> 16) & 0xff));
991 f->w = (s >> 8) & 0xff;
994 if ((v->type == ALPHA) ^ ((formats[f->type].cat & FCAT_STRING) != 0))
995 lose ((ME, _("%s: %s variable %s has %s format specifier %s."),
996 fh_get_file_name (r->fh),
997 v->type == ALPHA ? _("String") : _("Numeric"),
999 formats[f->type].cat & FCAT_STRING ? _("string") : _("numeric"),
1000 formats[f->type].name));
1002 if (!check_output_specifier (f, false)
1003 || !check_specifier_width (f, v->width, false))
1005 msg (ME, _("%s variable %s has invalid format specifier %s."),
1006 v->type == NUMERIC ? _("Numeric") : _("String"),
1007 v->name, fmt_to_string (f));
1008 *f = v->type == NUMERIC ? f8_2 : make_output_format (FMT_A, v->width, 0);
1016 /* Reads value labels from sysfile H and inserts them into the
1017 associated dictionary. */
1019 read_value_labels (struct sfm_reader *r,
1020 struct dictionary *dict, struct variable **var_by_idx)
1024 char raw_value[8]; /* Value as uninterpreted bytes. */
1025 union value value; /* Value. */
1026 char *label; /* Null-terminated label string. */
1029 struct label *labels = NULL;
1030 int32_t n_labels; /* Number of labels. */
1032 struct variable **var = NULL; /* Associated variables. */
1033 int32_t n_vars; /* Number of associated variables. */
1037 /* First step: read the contents of the type 3 record and record its
1038 contents. Note that we can't do much with the data since we
1039 don't know yet whether it is of numeric or string type. */
1041 /* Read number of labels. */
1042 assertive_buf_read (r, &n_labels, sizeof n_labels, 0);
1043 if (r->reverse_endian)
1044 bswap_int32 (&n_labels);
1046 if ( n_labels >= ((int32_t) ~0) / sizeof *labels)
1048 corrupt_msg(MW, _("%s: Invalid number of labels: %d. Ignoring labels."),
1049 fh_get_file_name (r->fh), n_labels);
1053 /* Allocate memory. */
1054 labels = xcalloc (n_labels, sizeof *labels);
1055 for (i = 0; i < n_labels; i++)
1056 labels[i].label = NULL;
1058 /* Read each value/label tuple into labels[]. */
1059 for (i = 0; i < n_labels; i++)
1061 struct label *label = labels + i;
1062 unsigned char label_len;
1066 assertive_buf_read (r, label->raw_value, sizeof label->raw_value, 0);
1068 /* Read label length. */
1069 assertive_buf_read (r, &label_len, sizeof label_len, 0);
1070 padded_len = ROUND_UP (label_len + 1, sizeof (flt64));
1072 /* Read label, padding. */
1073 label->label = xmalloc (padded_len + 1);
1074 assertive_buf_read (r, label->label, padded_len - 1, 0);
1075 label->label[label_len] = 0;
1078 /* Second step: Read the type 4 record that has the list of
1079 variables to which the value labels are to be applied. */
1081 /* Read record type of type 4 record. */
1085 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
1086 if (r->reverse_endian)
1087 bswap_int32 (&rec_type);
1090 lose ((ME, _("%s: Variable index record (type 4) does not immediately "
1091 "follow value label record (type 3) as it should."),
1092 fh_get_file_name (r->fh)));
1095 /* Read number of variables associated with value label from type 4
1097 assertive_buf_read (r, &n_vars, sizeof n_vars, 0);
1098 if (r->reverse_endian)
1099 bswap_int32 (&n_vars);
1100 if (n_vars < 1 || n_vars > dict_get_var_cnt (dict))
1101 lose ((ME, _("%s: Number of variables associated with a value label (%d) "
1102 "is not between 1 and the number of variables (%d)."),
1103 fh_get_file_name (r->fh), n_vars, dict_get_var_cnt (dict)));
1105 /* Read the list of variables. */
1106 var = xnmalloc (n_vars, sizeof *var);
1107 for (i = 0; i < n_vars; i++)
1112 /* Read variable index, check range. */
1113 assertive_buf_read (r, &var_idx, sizeof var_idx, 0);
1114 if (r->reverse_endian)
1115 bswap_int32 (&var_idx);
1116 if (var_idx < 1 || var_idx > r->value_cnt)
1117 lose ((ME, _("%s: Variable index associated with value label (%d) is "
1118 "not between 1 and the number of values (%d)."),
1119 fh_get_file_name (r->fh), var_idx, r->value_cnt));
1121 /* Make sure it's a real variable. */
1122 v = var_by_idx[var_idx - 1];
1124 lose ((ME, _("%s: Variable index associated with value label (%d) "
1125 "refers to a continuation of a string variable, not to "
1126 "an actual variable."),
1127 fh_get_file_name (r->fh), var_idx));
1128 if (v->type == ALPHA && v->width > MAX_SHORT_STRING)
1129 lose ((ME, _("%s: Value labels are not allowed on long string "
1131 fh_get_file_name (r->fh), v->name));
1133 /* Add it to the list of variables. */
1137 /* Type check the variables. */
1138 for (i = 1; i < n_vars; i++)
1139 if (var[i]->type != var[0]->type)
1140 lose ((ME, _("%s: Variables associated with value label are not all of "
1141 "identical type. Variable %s has %s type, but variable "
1143 fh_get_file_name (r->fh),
1144 var[0]->name, var[0]->type == ALPHA ? _("string") : _("numeric"),
1145 var[i]->name, var[i]->type == ALPHA ? _("string") : _("numeric")));
1147 /* Fill in labels[].value, now that we know the desired type. */
1148 for (i = 0; i < n_labels; i++)
1150 struct label *label = labels + i;
1152 if (var[0]->type == ALPHA)
1154 const int copy_len = min (sizeof label->raw_value,
1155 sizeof label->label);
1156 memcpy (label->value.s, label->raw_value, copy_len);
1159 assert (sizeof f == sizeof label->raw_value);
1160 memcpy (&f, label->raw_value, sizeof f);
1161 if (r->reverse_endian)
1167 /* Assign the value_label's to each variable. */
1168 for (i = 0; i < n_vars; i++)
1170 struct variable *v = var[i];
1173 /* Add each label to the variable. */
1174 for (j = 0; j < n_labels; j++)
1176 struct label *label = labels + j;
1177 if (!val_labs_replace (v->val_labs, label->value, label->label))
1180 if (var[0]->type == NUMERIC)
1181 msg (MW, _("%s: File contains duplicate label for value %g for "
1183 fh_get_file_name (r->fh), label->value.f, v->name);
1185 msg (MW, _("%s: File contains duplicate label for value `%.*s' "
1186 "for variable %s."),
1187 fh_get_file_name (r->fh), v->width, label->value.s, v->name);
1191 for (i = 0; i < n_labels; i++)
1192 free (labels[i].label);
1200 for (i = 0; i < n_labels; i++)
1201 free (labels[i].label);
1208 /* Reads BYTE_CNT bytes from the file represented by H. If BUF is
1209 non-NULL, uses that as the buffer; otherwise allocates at least
1210 MIN_ALLOC bytes. Returns a pointer to the buffer on success, NULL
1213 buf_read (struct sfm_reader *r, void *buf, size_t byte_cnt, size_t min_alloc)
1217 if (buf == NULL && byte_cnt > 0 )
1218 buf = xmalloc (max (byte_cnt, min_alloc));
1220 if ( byte_cnt == 0 )
1224 if (1 != fread (buf, byte_cnt, 1, r->file))
1226 if (ferror (r->file))
1227 msg (ME, _("%s: Reading system file: %s."),
1228 fh_get_file_name (r->fh), strerror (errno));
1230 corrupt_msg (ME, _("%s: Unexpected end of file."),
1231 fh_get_file_name (r->fh));
1238 /* Winds the reader BYTE_CNT bytes back in the reader stream. */
1240 buf_unread(struct sfm_reader *r, size_t byte_cnt)
1242 assert(byte_cnt > 0);
1244 if ( 0 != fseek(r->file, -byte_cnt, SEEK_CUR))
1246 msg (ME, _("%s: Seeking system file: %s."),
1247 fh_get_file_name (r->fh), strerror (errno));
1251 /* Reads a document record, type 6, from system file R, and sets up
1252 the documents and n_documents fields in the associated
1255 read_documents (struct sfm_reader *r, struct dictionary *dict)
1260 if (dict_get_documents (dict) != NULL)
1261 lose ((ME, _("%s: System file contains multiple "
1262 "type 6 (document) records."),
1263 fh_get_file_name (r->fh)));
1265 assertive_buf_read (r, &line_cnt, sizeof line_cnt, 0);
1267 lose ((ME, _("%s: Number of document lines (%ld) "
1268 "must be greater than 0."),
1269 fh_get_file_name (r->fh), (long) line_cnt));
1271 documents = buf_read (r, NULL, 80 * line_cnt, line_cnt * 80 + 1);
1272 /* FIXME? Run through asciify. */
1273 if (documents == NULL)
1275 documents[80 * line_cnt] = '\0';
1276 dict_set_documents (dict, documents);
1286 /* Reads compressed data into H->BUF and sets other pointers
1287 appropriately. Returns nonzero only if both no errors occur and
1290 buffer_input (struct sfm_reader *r)
1297 r->buf = xnmalloc (128, sizeof *r->buf);
1298 amt = fread (r->buf, sizeof *r->buf, 128, r->file);
1299 if (ferror (r->file))
1301 msg (ME, _("%s: Error reading file: %s."),
1302 fh_get_file_name (r->fh), strerror (errno));
1307 r->end = &r->buf[amt];
1311 /* Reads a single case consisting of compressed data from system
1312 file H into the array BUF[] according to reader R, and
1313 returns nonzero only if successful. */
1314 /* Data in system files is compressed in this manner. Data
1315 values are grouped into sets of eight ("octets"). Each value
1316 in an octet has one instruction byte that are output together.
1317 Each instruction byte gives a value for that byte or indicates
1318 that the value can be found following the instructions. */
1320 read_compressed_data (struct sfm_reader *r, flt64 *buf)
1322 const unsigned char *p_end = r->x + sizeof (flt64);
1323 unsigned char *p = r->y;
1325 const flt64 *buf_beg = buf;
1326 const flt64 *buf_end = &buf[r->value_cnt];
1330 for (; p < p_end; p++){
1334 /* Code 0 is ignored. */
1337 /* Code 252 is end of file. */
1340 lose ((ME, _("%s: Compressed data is corrupted. Data ends "
1341 "in partial case."),
1342 fh_get_file_name (r->fh)));
1344 /* Code 253 indicates that the value is stored explicitly
1345 following the instruction bytes. */
1346 if (r->ptr == NULL || r->ptr >= r->end)
1347 if (!buffer_input (r))
1348 lose ((ME, _("%s: Unexpected end of file."),
1349 fh_get_file_name (r->fh)));
1350 memcpy (buf++, r->ptr++, sizeof *buf);
1355 /* Code 254 indicates a string that is all blanks. */
1356 memset (buf++, ' ', sizeof *buf);
1361 /* Code 255 indicates the system-missing value. */
1363 if (r->reverse_endian)
1370 /* Codes 1 through 251 inclusive are taken to indicate a
1371 value of (BYTE - BIAS), where BYTE is the byte's value
1372 and BIAS is the compression bias (generally 100.0). */
1373 *buf = *p - r->bias;
1374 if (r->reverse_endian)
1382 /* We have reached the end of this instruction octet. Read
1384 if (r->ptr == NULL || r->ptr >= r->end)
1386 if (!buffer_input (r))
1389 lose ((ME, _("%s: Unexpected end of file."),
1390 fh_get_file_name (r->fh)));
1395 memcpy (r->x, r->ptr++, sizeof *buf);
1402 /* We have filled up an entire record. Update state and return
1413 /* Reads one case from READER's file into C. Returns nonzero
1414 only if successful. */
1416 sfm_read_case (struct sfm_reader *r, struct ccase *c)
1421 if (!r->compressed && sizeof (flt64) == sizeof (double))
1423 /* Fast path: external and internal representations are the
1424 same, except possibly for endianness or SYSMIS. Read
1425 directly into the case's buffer, then fix up any minor
1426 details as needed. */
1427 if (!fread_ok (r, case_data_all_rw (c),
1428 sizeof (union value) * r->value_cnt))
1431 /* Fix up endianness if needed. */
1432 if (r->reverse_endian)
1436 for (i = 0; i < r->value_cnt; i++)
1437 if (r->vars[i].width == 0)
1438 bswap_flt64 (&case_data_rw (c, r->vars[i].fv)->f);
1441 /* Fix up SYSMIS values if needed.
1442 I don't think this will ever actually kick in, but it
1444 if (r->sysmis != SYSMIS)
1448 for (i = 0; i < r->value_cnt; i++)
1449 if (r->vars[i].width == 0 && case_num (c, i) == r->sysmis)
1450 case_data_rw (c, r->vars[i].fv)->f = SYSMIS;
1455 /* Slow path: internal and external representations differ.
1456 Read into a bounce buffer, then copy to C. */
1463 bounce_size = sizeof *bounce * r->value_cnt;
1464 bounce = bounce_cur = local_alloc (bounce_size);
1467 read_ok = fread_ok (r, bounce, bounce_size);
1469 read_ok = read_compressed_data (r, bounce);
1472 local_free (bounce);
1476 for (i = 0; i < r->value_cnt; i++)
1478 struct sfm_var *v = &r->vars[i];
1482 flt64 f = *bounce_cur++;
1483 if (r->reverse_endian)
1485 case_data_rw (c, v->fv)->f = f == r->sysmis ? SYSMIS : f;
1487 else if (v->width != -1)
1489 memcpy (case_data_rw (c, v->fv)->s, bounce_cur, v->width);
1490 bounce_cur += DIV_RND_UP (v->width, sizeof (flt64));
1494 local_free (bounce);
1500 fread_ok (struct sfm_reader *r, void *buffer, size_t byte_cnt)
1502 size_t read_bytes = fread (buffer, 1, byte_cnt, r->file);
1504 if (read_bytes == byte_cnt)
1508 if (ferror (r->file))
1510 msg (ME, _("%s: Reading system file: %s."),
1511 fh_get_file_name (r->fh), strerror (errno));
1514 else if (read_bytes != 0)
1516 msg (ME, _("%s: Partial record at end of system file."),
1517 fh_get_file_name (r->fh));
1524 /* Returns true if an I/O error has occurred on READER, false
1527 sfm_read_error (const struct sfm_reader *reader)
1532 /* Returns true if FILE is an SPSS system file,
1535 sfm_detect (FILE *file)
1537 struct sysfile_header hdr;
1539 if (fread (&hdr, sizeof hdr, 1, file) != 1)
1541 if (strncmp ("$FL2", hdr.rec_type, 4))