1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 #include "sys-file-reader.h"
22 #include "sfm-private.h"
32 #include "dictionary.h"
34 #include "file-handle-def.h"
39 #include "value-labels.h"
44 #define _(msgid) gettext (msgid)
46 #include "debug-print.h"
48 /* System file reader. */
51 struct file_handle *fh; /* File handle. */
52 FILE *file; /* File stream. */
54 int reverse_endian; /* 1=file has endianness opposite us. */
55 int fix_specials; /* 1=SYSMIS/HIGHEST/LOWEST differs from us. */
56 int value_cnt; /* Number of `union values's per case. */
57 long case_cnt; /* Number of cases, -1 if unknown. */
58 int compressed; /* 1=compressed, 0=not compressed. */
59 double bias; /* Compression bias, usually 100.0. */
60 int weight_idx; /* 0-based index of weighting variable, or -1. */
61 bool ok; /* False after an I/O error or corrupt data. */
64 struct sfm_var *vars; /* Variables. */
66 /* File's special constants. */
71 /* Decompression buffer. */
72 flt64 *buf; /* Buffer data. */
73 flt64 *ptr; /* Current location in buffer. */
74 flt64 *end; /* End of buffer data. */
76 /* Compression instruction octet. */
77 unsigned char x[8]; /* Current instruction octet. */
78 unsigned char *y; /* Location in current instruction octet. */
81 /* A variable in a system file. */
84 int width; /* 0=numeric, otherwise string width. */
85 int fv; /* Index into case. */
90 /* Swap bytes *A and *B. */
92 bswap (char *a, char *b)
99 /* Reverse the byte order of 32-bit integer *X. */
101 bswap_int32 (int32 *x_)
103 char *x = (char *) x_;
104 bswap (x + 0, x + 3);
105 bswap (x + 1, x + 2);
108 /* Reverse the byte order of 64-bit floating point *X. */
110 bswap_flt64 (flt64 *x_)
112 char *x = (char *) x_;
113 bswap (x + 0, x + 7);
114 bswap (x + 1, x + 6);
115 bswap (x + 2, x + 5);
116 bswap (x + 3, x + 4);
120 corrupt_msg (int class, const char *format,...)
121 PRINTF_FORMAT (2, 3);
123 /* Displays a corrupt sysfile error. */
125 corrupt_msg (int class, const char *format,...)
131 e.where.filename = NULL;
132 e.where.line_number = 0;
133 e.title = _("corrupt system file: ");
135 va_start (args, format);
136 err_vmsg (&e, format, args);
140 /* Closes a system file after we're done with it. */
142 sfm_close_reader (struct sfm_reader *r)
149 if (fn_close (fh_get_filename (r->fh), r->file) == EOF)
150 msg (ME, _("%s: Closing system file: %s."),
151 fh_get_filename (r->fh), strerror (errno));
156 fh_close (r->fh, "system file", "rs");
163 /* Dictionary reader. */
165 static void buf_unread(struct sfm_reader *r, size_t byte_cnt);
167 static void *buf_read (struct sfm_reader *, void *buf, size_t byte_cnt,
170 static int read_header (struct sfm_reader *,
171 struct dictionary *, struct sfm_read_info *);
172 static int parse_format_spec (struct sfm_reader *, int32,
173 struct fmt_spec *, const struct variable *);
174 static int read_value_labels (struct sfm_reader *, struct dictionary *,
175 struct variable **var_by_idx);
176 static int read_variables (struct sfm_reader *,
177 struct dictionary *, struct variable ***var_by_idx);
178 static int read_machine_int32_info (struct sfm_reader *, int size, int count);
179 static int read_machine_flt64_info (struct sfm_reader *, int size, int count);
180 static int read_documents (struct sfm_reader *, struct dictionary *);
182 static int fread_ok (struct sfm_reader *, void *, size_t);
184 /* Displays the message X with corrupt_msg, then jumps to the error
192 /* Calls buf_read with the specified arguments, and jumps to
193 error if the read fails. */
194 #define assertive_buf_read(a,b,c,d) \
196 if (!buf_read (a,b,c,d)) \
200 /* Opens the system file designated by file handle FH for
201 reading. Reads the system file's dictionary into *DICT.
202 If INFO is non-null, then it receives additional info about the
205 sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
206 struct sfm_read_info *info)
208 struct sfm_reader *r = NULL;
209 struct variable **var_by_idx = NULL;
211 *dict = dict_create ();
212 if (!fh_open (fh, FH_REF_FILE, "system file", "rs"))
215 /* Create and initialize reader. */
216 r = xmalloc (sizeof *r);
218 r->file = fn_open (fh_get_filename (fh), "rb");
220 r->reverse_endian = 0;
231 r->sysmis = -FLT64_MAX;
232 r->highest = FLT64_MAX;
233 r->lowest = second_lowest_flt64;
235 r->buf = r->ptr = r->end = NULL;
236 r->y = r->x + sizeof r->x;
238 /* Check that file open succeeded. */
241 msg (ME, _("An error occurred while opening \"%s\" for reading "
242 "as a system file: %s."),
243 fh_get_filename (r->fh), strerror (errno));
247 /* Read header and variables. */
248 if (!read_header (r, *dict, info) || !read_variables (r, *dict, &var_by_idx))
252 /* Handle weighting. */
253 if (r->weight_idx != -1)
255 struct variable *weight_var;
257 if (r->weight_idx < 0 || r->weight_idx >= r->value_cnt)
258 lose ((ME, _("%s: Index of weighting variable (%d) is not between 0 "
259 "and number of elements per case (%d)."),
260 fh_get_filename (r->fh), r->weight_idx, r->value_cnt));
263 weight_var = var_by_idx[r->weight_idx];
265 if (weight_var == NULL)
267 _("%s: Weighting variable may not be a continuation of "
268 "a long string variable."), fh_get_filename (fh)));
269 else if (weight_var->type == ALPHA)
270 lose ((ME, _("%s: Weighting variable may not be a string variable."),
271 fh_get_filename (fh)));
273 dict_set_weight (*dict, weight_var);
276 dict_set_weight (*dict, NULL);
278 /* Read records of types 3, 4, 6, and 7. */
283 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
284 if (r->reverse_endian)
285 bswap_int32 (&rec_type);
290 if (!read_value_labels (r, *dict, var_by_idx))
295 lose ((ME, _("%s: Orphaned variable index record (type 4). Type 4 "
296 "records must always immediately follow type 3 "
298 fh_get_filename (r->fh)));
301 if (!read_documents (r, *dict))
318 assertive_buf_read (r, &data, sizeof data, 0);
319 if (r->reverse_endian)
321 bswap_int32 (&data.subtype);
322 bswap_int32 (&data.size);
323 bswap_int32 (&data.count);
325 bytes = data.size * data.count;
326 if (bytes < data.size || bytes < data.count)
327 lose ((ME, "%s: Record type %d subtype %d too large.",
328 fh_get_filename (r->fh), rec_type, data.subtype));
330 switch (data.subtype)
333 if (!read_machine_int32_info (r, data.size, data.count))
338 if (!read_machine_flt64_info (r, data.size, data.count))
343 case 6: /* ?? Used by SPSS 8.0. */
347 case 11: /* Variable display parameters */
349 const int n_vars = data.count / 3 ;
351 if ( data.count % 3 || n_vars > dict_get_var_cnt(*dict) )
353 msg (MW, _("%s: Invalid subrecord length. "
354 "Record: 7; Subrecord: 11"),
355 fh_get_filename (r->fh));
359 for ( i = 0 ; i < min(n_vars, dict_get_var_cnt(*dict)) ; ++i )
371 assertive_buf_read (r, ¶ms, sizeof(params), 0);
373 v = dict_get_var(*dict, i);
375 v->measure = params.measure;
376 v->display_width = params.width;
377 v->alignment = params.align;
382 case 13: /* SPSS 12.0 Long variable name map */
384 char *buf, *short_name, *save_ptr;
388 buf = xmalloc (bytes + 1);
389 if (!buf_read (r, buf, bytes, 0))
397 for (short_name = strtok_r (buf, "=", &save_ptr), idx = 0;
399 short_name = strtok_r (NULL, "=", &save_ptr), idx++)
401 char *long_name = strtok_r (NULL, "\t", &save_ptr);
404 /* Validate long name. */
405 if (long_name == NULL)
407 msg (MW, _("%s: Trailing garbage in long variable "
409 fh_get_filename (r->fh));
412 if (!var_is_valid_name (long_name, false))
414 msg (MW, _("%s: Long variable mapping to invalid "
415 "variable name `%s'."),
416 fh_get_filename (r->fh), long_name);
420 /* Find variable using short name. */
421 v = dict_lookup_var (*dict, short_name);
424 msg (MW, _("%s: Long variable mapping for "
425 "nonexistent variable %s."),
426 fh_get_filename (r->fh), short_name);
430 /* Identify any duplicates. */
431 if ( compare_var_names(short_name, long_name, 0) &&
432 NULL != dict_lookup_var (*dict, long_name))
433 lose ((ME, _("%s: Duplicate long variable name `%s' "
434 "within system file."),
435 fh_get_filename (r->fh), long_name));
439 Renaming a variable may clear the short
440 name, but we want to retain it, so
441 re-set it explicitly. */
442 dict_rename_var (*dict, v, long_name);
443 var_set_short_name (v, short_name);
445 /* For compatability, make sure dictionary
446 is in long variable name map order. In
447 the common case, this has no effect,
448 because the dictionary and the long
449 variable name map are already in the
451 dict_reorder_var (*dict, v, idx);
460 msg (MW, _("%s: Unrecognized record type 7, subtype %d "
461 "encountered in system file."),
462 fh_get_filename (r->fh), data.subtype);
468 void *x = buf_read (r, NULL, data.size * data.count, 0);
480 assertive_buf_read (r, &filler, sizeof filler, 0);
485 corrupt_msg(MW, _("%s: Unrecognized record type %d."),
486 fh_get_filename (r->fh), rec_type);
491 /* Come here on successful completion. */
496 /* Come here on unsuccessful completion. */
497 sfm_close_reader (r);
501 dict_destroy (*dict);
507 /* Read record type 7, subtype 3. */
509 read_machine_int32_info (struct sfm_reader *r, int size, int count)
516 if (size != sizeof (int32) || count != 8)
517 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
518 "subtype 3. Expected size %d, count 8."),
519 fh_get_filename (r->fh), size, count, sizeof (int32)));
521 assertive_buf_read (r, data, sizeof data, 0);
522 if (r->reverse_endian)
523 for (i = 0; i < 8; i++)
524 bswap_int32 (&data[i]);
528 lose ((ME, _("%s: Floating-point representation in system file is not "
529 "IEEE-754. PSPP cannot convert between floating-point "
531 fh_get_filename (r->fh)));
533 #error Add support for your floating-point format.
536 #ifdef WORDS_BIGENDIAN
541 if (r->reverse_endian)
543 if (file_bigendian ^ (data[6] == 1))
544 lose ((ME, _("%s: File-indicated endianness (%s) does not match "
545 "endianness intuited from file header (%s)."),
546 fh_get_filename (r->fh),
547 file_bigendian ? _("big-endian") : _("little-endian"),
548 data[6] == 1 ? _("big-endian") : (data[6] == 2 ? _("little-endian")
551 /* PORTME: Character representation code. */
552 if (data[7] != 2 && data[7] != 3)
553 lose ((ME, _("%s: File-indicated character representation code (%s) is "
555 fh_get_filename (r->fh),
556 (data[7] == 1 ? "EBCDIC"
557 : (data[7] == 4 ? _("DEC Kanji") : _("Unknown")))));
565 /* Read record type 7, subtype 4. */
567 read_machine_flt64_info (struct sfm_reader *r, int size, int count)
572 if (size != sizeof (flt64) || count != 3)
573 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
574 "subtype 4. Expected size %d, count 8."),
575 fh_get_filename (r->fh), size, count, sizeof (flt64)));
577 assertive_buf_read (r, data, sizeof data, 0);
578 if (r->reverse_endian)
579 for (i = 0; i < 3; i++)
580 bswap_flt64 (&data[i]);
582 if (data[0] != SYSMIS || data[1] != FLT64_MAX
583 || data[2] != second_lowest_flt64)
586 r->highest = data[1];
588 msg (MW, _("%s: File-indicated value is different from internal value "
589 "for at least one of the three system values. SYSMIS: "
590 "indicated %g, expected %g; HIGHEST: %g, %g; LOWEST: "
592 fh_get_filename (r->fh), (double) data[0], (double) SYSMIS,
593 (double) data[1], (double) FLT64_MAX,
594 (double) data[2], (double) second_lowest_flt64);
604 read_header (struct sfm_reader *r,
605 struct dictionary *dict, struct sfm_read_info *info)
607 struct sysfile_header hdr; /* Disk buffer. */
608 char prod_name[sizeof hdr.prod_name + 1]; /* Buffer for product name. */
609 int skip_amt = 0; /* Amount of product name to omit. */
612 /* Read header, check magic. */
613 assertive_buf_read (r, &hdr, sizeof hdr, 0);
614 if (strncmp ("$FL2", hdr.rec_type, 4) != 0)
615 lose ((ME, _("%s: Bad magic. Proper system files begin with "
616 "the four characters `$FL2'. This file will not be read."),
617 fh_get_filename (r->fh)));
619 /* Check eye-category.her string. */
620 memcpy (prod_name, hdr.prod_name, sizeof hdr.prod_name);
621 for (i = 0; i < 60; i++)
622 if (!isprint ((unsigned char) prod_name[i]))
624 for (i = 59; i >= 0; i--)
625 if (!isgraph ((unsigned char) prod_name[i]))
630 prod_name[60] = '\0';
634 static const char *prefix[N_PREFIXES] =
636 "@(#) SPSS DATA FILE",
642 for (i = 0; i < N_PREFIXES; i++)
643 if (!strncmp (prefix[i], hdr.prod_name, strlen (prefix[i])))
645 skip_amt = strlen (prefix[i]);
650 /* Check endianness. */
651 if (hdr.layout_code == 2)
652 r->reverse_endian = 0;
655 bswap_int32 (&hdr.layout_code);
656 if (hdr.layout_code != 2)
657 lose ((ME, _("%s: File layout code has unexpected value %d. Value "
658 "should be 2, in big-endian or little-endian format."),
659 fh_get_filename (r->fh), hdr.layout_code));
661 r->reverse_endian = 1;
662 bswap_int32 (&hdr.case_size);
663 bswap_int32 (&hdr.compress);
664 bswap_int32 (&hdr.weight_idx);
665 bswap_int32 (&hdr.case_cnt);
666 bswap_flt64 (&hdr.bias);
670 /* Copy basic info and verify correctness. */
671 r->value_cnt = hdr.case_size;
673 /* If value count is rediculous, then force it to -1 (a sentinel value) */
674 if ( r->value_cnt < 0 ||
675 r->value_cnt > (INT_MAX / (int) sizeof (union value) / 2))
678 r->compressed = hdr.compress;
680 r->weight_idx = hdr.weight_idx - 1;
682 r->case_cnt = hdr.case_cnt;
683 if (r->case_cnt < -1 || r->case_cnt > INT_MAX / 2)
685 _("%s: Number of cases in file (%ld) is not between -1 and %d."),
686 fh_get_filename (r->fh), (long) r->case_cnt, INT_MAX / 2));
689 if (r->bias != 100.0)
690 corrupt_msg (MW, _("%s: Compression bias (%g) is not the usual "
692 fh_get_filename (r->fh), r->bias);
694 /* Make a file label only on the condition that the given label is
695 not all spaces or nulls. */
699 for (i = sizeof hdr.file_label - 1; i >= 0; i--)
700 if (!isspace ((unsigned char) hdr.file_label[i])
701 && hdr.file_label[i] != 0)
703 char *label = xmalloc (i + 2);
704 memcpy (label, hdr.file_label, i + 1);
706 dict_set_label (dict, label);
716 memcpy (info->creation_date, hdr.creation_date, 9);
717 info->creation_date[9] = 0;
719 memcpy (info->creation_time, hdr.creation_time, 8);
720 info->creation_time[8] = 0;
722 #ifdef WORDS_BIGENDIAN
723 info->big_endian = !r->reverse_endian;
725 info->big_endian = r->reverse_endian;
728 info->compressed = hdr.compress;
730 info->case_cnt = hdr.case_cnt;
732 for (cp = &prod_name[skip_amt]; cp < &prod_name[60]; cp++)
733 if (isgraph ((unsigned char) *cp))
735 strcpy (info->product, cp);
744 /* Reads most of the dictionary from file H; also fills in the
745 associated VAR_BY_IDX array. */
747 read_variables (struct sfm_reader *r,
748 struct dictionary *dict, struct variable ***var_by_idx)
752 struct sysfile_variable sv; /* Disk buffer. */
753 int long_string_count = 0; /* # of long string continuation
754 records still expected. */
755 int next_value = 0; /* Index to next `value' structure. */
761 /* Pre-allocate variables. */
762 if (r->value_cnt != -1)
764 *var_by_idx = xnmalloc (r->value_cnt, sizeof **var_by_idx);
765 r->vars = xnmalloc (r->value_cnt, sizeof *r->vars);
769 /* Read in the entry for each variable and use the info to
770 initialize the dictionary. */
774 char name[SHORT_NAME_LEN + 1];
778 if ( r->value_cnt != -1 && i >= r->value_cnt )
781 assertive_buf_read (r, &sv, sizeof sv, 0);
783 if (r->reverse_endian)
785 bswap_int32 (&sv.rec_type);
786 bswap_int32 (&sv.type);
787 bswap_int32 (&sv.has_var_label);
788 bswap_int32 (&sv.n_missing_values);
789 bswap_int32 (&sv.print);
790 bswap_int32 (&sv.write);
793 /* We've come to the end of the variable entries */
794 if (sv.rec_type != 2)
796 buf_unread(r, sizeof sv);
801 if ( -1 == r->value_cnt )
803 *var_by_idx = xnrealloc (*var_by_idx, i + 1, sizeof **var_by_idx);
804 r->vars = xnrealloc (r->vars, i + 1, sizeof *r->vars);
807 /* If there was a long string previously, make sure that the
808 continuations are present; otherwise make sure there aren't
810 if (long_string_count)
813 lose ((ME, _("%s: position %d: String variable does not have "
814 "proper number of continuation records."),
815 fh_get_filename (r->fh), i));
818 r->vars[i].width = -1;
819 (*var_by_idx)[i] = NULL;
823 else if (sv.type == -1)
824 lose ((ME, _("%s: position %d: Superfluous long string continuation "
826 fh_get_filename (r->fh), i));
828 /* Check fields for validity. */
829 if (sv.type < 0 || sv.type > 255)
830 lose ((ME, _("%s: position %d: Bad variable type code %d."),
831 fh_get_filename (r->fh), i, sv.type));
832 if (sv.has_var_label != 0 && sv.has_var_label != 1)
833 lose ((ME, _("%s: position %d: Variable label indicator field is not "
834 "0 or 1."), fh_get_filename (r->fh), i));
835 if (sv.n_missing_values < -3 || sv.n_missing_values > 3
836 || sv.n_missing_values == -1)
837 lose ((ME, _("%s: position %d: Missing value indicator field is not "
838 "-3, -2, 0, 1, 2, or 3."), fh_get_filename (r->fh), i));
840 /* Copy first character of variable name. */
841 if (!isalpha ((unsigned char) sv.name[0])
842 && sv.name[0] != '@' && sv.name[0] != '#')
843 lose ((ME, _("%s: position %d: Variable name begins with invalid "
845 fh_get_filename (r->fh), i));
846 if (islower ((unsigned char) sv.name[0]))
847 msg (MW, _("%s: position %d: Variable name begins with lowercase letter "
849 fh_get_filename (r->fh), i, sv.name[0]);
850 if (sv.name[0] == '#')
851 msg (MW, _("%s: position %d: Variable name begins with octothorpe "
852 "(`#'). Scratch variables should not appear in system "
854 fh_get_filename (r->fh), i);
855 name[0] = toupper ((unsigned char) (sv.name[0]));
857 /* Copy remaining characters of variable name. */
858 for (j = 1; j < SHORT_NAME_LEN; j++)
860 int c = (unsigned char) sv.name[j];
864 else if (islower (c))
866 msg (MW, _("%s: position %d: Variable name character %d is "
867 "lowercase letter %c."),
868 fh_get_filename (r->fh), i, j + 1, sv.name[j]);
869 name[j] = toupper ((unsigned char) (c));
871 else if (isalnum (c) || c == '.' || c == '@'
872 || c == '#' || c == '$' || c == '_')
875 lose ((ME, _("%s: position %d: character `\\%03o' (%c) is not valid in a "
877 fh_get_filename (r->fh), i, c, c));
881 if ( ! var_is_valid_name(name, false) )
882 lose ((ME, _("%s: Invalid variable name `%s' within system file."),
883 fh_get_filename (r->fh), name));
885 /* Create variable. */
887 vv = (*var_by_idx)[i] = dict_create_var (dict, name, sv.type);
889 lose ((ME, _("%s: Duplicate variable name `%s' within system file."),
890 fh_get_filename (r->fh), name));
892 var_set_short_name (vv, vv->name);
894 /* Case reading data. */
895 nv = sv.type == 0 ? 1 : DIV_RND_UP (sv.type, sizeof (flt64));
896 long_string_count = nv - 1;
899 /* Get variable label, if any. */
900 if (sv.has_var_label == 1)
905 /* Read length of label. */
906 assertive_buf_read (r, &len, sizeof len, 0);
907 if (r->reverse_endian)
911 if (len < 0 || len > 255)
912 lose ((ME, _("%s: Variable %s indicates variable label of invalid "
914 fh_get_filename (r->fh), vv->name, len));
918 /* Read label into variable structure. */
919 vv->label = buf_read (r, NULL, ROUND_UP (len, sizeof (int32)), len + 1);
920 if (vv->label == NULL)
922 vv->label[len] = '\0';
926 /* Set missing values. */
927 if (sv.n_missing_values != 0)
930 int mv_cnt = abs (sv.n_missing_values);
932 if (vv->width > MAX_SHORT_STRING)
933 lose ((ME, _("%s: Long string variable %s may not have missing "
935 fh_get_filename (r->fh), vv->name));
937 assertive_buf_read (r, mv, sizeof *mv * mv_cnt, 0);
939 if (r->reverse_endian && vv->type == NUMERIC)
940 for (j = 0; j < mv_cnt; j++)
941 bswap_flt64 (&mv[j]);
943 if (sv.n_missing_values > 0)
945 for (j = 0; j < sv.n_missing_values; j++)
946 if (vv->type == NUMERIC)
947 mv_add_num (&vv->miss, mv[j]);
949 mv_add_str (&vv->miss, (char *) &mv[j]);
953 if (vv->type == ALPHA)
954 lose ((ME, _("%s: String variable %s may not have missing "
955 "values specified as a range."),
956 fh_get_filename (r->fh), vv->name));
958 if (mv[0] == r->lowest)
959 mv_add_num_range (&vv->miss, LOWEST, mv[1]);
960 else if (mv[1] == r->highest)
961 mv_add_num_range (&vv->miss, mv[0], HIGHEST);
963 mv_add_num_range (&vv->miss, mv[0], mv[1]);
965 if (sv.n_missing_values == -3)
966 mv_add_num (&vv->miss, mv[2]);
970 if (!parse_format_spec (r, sv.print, &vv->print, vv)
971 || !parse_format_spec (r, sv.write, &vv->write, vv))
974 r->vars[i].width = vv->width;
975 r->vars[i].fv = vv->fv;
979 /* Some consistency checks. */
980 if (long_string_count != 0)
981 lose ((ME, _("%s: Long string continuation records omitted at end of "
983 fh_get_filename (r->fh)));
985 if (next_value != r->value_cnt)
986 corrupt_msg(MW, _("%s: System file header indicates %d variable positions but "
987 "%d were read from file."),
988 fh_get_filename (r->fh), r->value_cnt, next_value);
997 /* Translates the format spec from sysfile format to internal
1000 parse_format_spec (struct sfm_reader *r, int32 s,
1001 struct fmt_spec *f, const struct variable *v)
1003 f->type = translate_fmt ((s >> 16) & 0xff);
1005 lose ((ME, _("%s: Bad format specifier byte (%d)."),
1006 fh_get_filename (r->fh), (s >> 16) & 0xff));
1007 f->w = (s >> 8) & 0xff;
1010 if ((v->type == ALPHA) ^ ((formats[f->type].cat & FCAT_STRING) != 0))
1011 lose ((ME, _("%s: %s variable %s has %s format specifier %s."),
1012 fh_get_filename (r->fh),
1013 v->type == ALPHA ? _("String") : _("Numeric"),
1015 formats[f->type].cat & FCAT_STRING ? _("string") : _("numeric"),
1016 formats[f->type].name));
1018 if (!check_output_specifier (f, false)
1019 || !check_specifier_width (f, v->width, false))
1021 msg (ME, _("%s variable %s has invalid format specifier %s."),
1022 v->type == NUMERIC ? _("Numeric") : _("String"),
1023 v->name, fmt_to_string (f));
1024 *f = v->type == NUMERIC ? f8_2 : make_output_format (FMT_A, v->width, 0);
1032 /* Reads value labels from sysfile H and inserts them into the
1033 associated dictionary. */
1035 read_value_labels (struct sfm_reader *r,
1036 struct dictionary *dict, struct variable **var_by_idx)
1040 char raw_value[8]; /* Value as uninterpreted bytes. */
1041 union value value; /* Value. */
1042 char *label; /* Null-terminated label string. */
1045 struct label *labels = NULL;
1046 int32 n_labels; /* Number of labels. */
1048 struct variable **var = NULL; /* Associated variables. */
1049 int32 n_vars; /* Number of associated variables. */
1053 /* First step: read the contents of the type 3 record and record its
1054 contents. Note that we can't do much with the data since we
1055 don't know yet whether it is of numeric or string type. */
1057 /* Read number of labels. */
1058 assertive_buf_read (r, &n_labels, sizeof n_labels, 0);
1059 if (r->reverse_endian)
1060 bswap_int32 (&n_labels);
1062 if ( n_labels >= ((int32) ~0) / sizeof *labels)
1064 corrupt_msg(MW, _("%s: Invalid number of labels: %d. Ignoring labels."),
1065 fh_get_filename (r->fh), n_labels);
1069 /* Allocate memory. */
1070 labels = xcalloc (n_labels, sizeof *labels);
1071 for (i = 0; i < n_labels; i++)
1072 labels[i].label = NULL;
1074 /* Read each value/label tuple into labels[]. */
1075 for (i = 0; i < n_labels; i++)
1077 struct label *label = labels + i;
1078 unsigned char label_len;
1082 assertive_buf_read (r, label->raw_value, sizeof label->raw_value, 0);
1084 /* Read label length. */
1085 assertive_buf_read (r, &label_len, sizeof label_len, 0);
1086 padded_len = ROUND_UP (label_len + 1, sizeof (flt64));
1088 /* Read label, padding. */
1089 label->label = xmalloc (padded_len + 1);
1090 assertive_buf_read (r, label->label, padded_len - 1, 0);
1091 label->label[label_len] = 0;
1094 /* Second step: Read the type 4 record that has the list of
1095 variables to which the value labels are to be applied. */
1097 /* Read record type of type 4 record. */
1101 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
1102 if (r->reverse_endian)
1103 bswap_int32 (&rec_type);
1106 lose ((ME, _("%s: Variable index record (type 4) does not immediately "
1107 "follow value label record (type 3) as it should."),
1108 fh_get_filename (r->fh)));
1111 /* Read number of variables associated with value label from type 4
1113 assertive_buf_read (r, &n_vars, sizeof n_vars, 0);
1114 if (r->reverse_endian)
1115 bswap_int32 (&n_vars);
1116 if (n_vars < 1 || n_vars > dict_get_var_cnt (dict))
1117 lose ((ME, _("%s: Number of variables associated with a value label (%d) "
1118 "is not between 1 and the number of variables (%d)."),
1119 fh_get_filename (r->fh), n_vars, dict_get_var_cnt (dict)));
1121 /* Read the list of variables. */
1122 var = xnmalloc (n_vars, sizeof *var);
1123 for (i = 0; i < n_vars; i++)
1128 /* Read variable index, check range. */
1129 assertive_buf_read (r, &var_idx, sizeof var_idx, 0);
1130 if (r->reverse_endian)
1131 bswap_int32 (&var_idx);
1132 if (var_idx < 1 || var_idx > r->value_cnt)
1133 lose ((ME, _("%s: Variable index associated with value label (%d) is "
1134 "not between 1 and the number of values (%d)."),
1135 fh_get_filename (r->fh), var_idx, r->value_cnt));
1137 /* Make sure it's a real variable. */
1138 v = var_by_idx[var_idx - 1];
1140 lose ((ME, _("%s: Variable index associated with value label (%d) "
1141 "refers to a continuation of a string variable, not to "
1142 "an actual variable."),
1143 fh_get_filename (r->fh), var_idx));
1144 if (v->type == ALPHA && v->width > MAX_SHORT_STRING)
1145 lose ((ME, _("%s: Value labels are not allowed on long string "
1147 fh_get_filename (r->fh), v->name));
1149 /* Add it to the list of variables. */
1153 /* Type check the variables. */
1154 for (i = 1; i < n_vars; i++)
1155 if (var[i]->type != var[0]->type)
1156 lose ((ME, _("%s: Variables associated with value label are not all of "
1157 "identical type. Variable %s has %s type, but variable "
1159 fh_get_filename (r->fh),
1160 var[0]->name, var[0]->type == ALPHA ? _("string") : _("numeric"),
1161 var[i]->name, var[i]->type == ALPHA ? _("string") : _("numeric")));
1163 /* Fill in labels[].value, now that we know the desired type. */
1164 for (i = 0; i < n_labels; i++)
1166 struct label *label = labels + i;
1168 if (var[0]->type == ALPHA)
1170 const int copy_len = min (sizeof label->raw_value,
1171 sizeof label->label);
1172 memcpy (label->value.s, label->raw_value, copy_len);
1175 assert (sizeof f == sizeof label->raw_value);
1176 memcpy (&f, label->raw_value, sizeof f);
1177 if (r->reverse_endian)
1183 /* Assign the value_label's to each variable. */
1184 for (i = 0; i < n_vars; i++)
1186 struct variable *v = var[i];
1189 /* Add each label to the variable. */
1190 for (j = 0; j < n_labels; j++)
1192 struct label *label = labels + j;
1193 if (!val_labs_replace (v->val_labs, label->value, label->label))
1196 if (var[0]->type == NUMERIC)
1197 msg (MW, _("%s: File contains duplicate label for value %g for "
1199 fh_get_filename (r->fh), label->value.f, v->name);
1201 msg (MW, _("%s: File contains duplicate label for value `%.*s' "
1202 "for variable %s."),
1203 fh_get_filename (r->fh), v->width, label->value.s, v->name);
1207 for (i = 0; i < n_labels; i++)
1208 free (labels[i].label);
1216 for (i = 0; i < n_labels; i++)
1217 free (labels[i].label);
1224 /* Reads BYTE_CNT bytes from the file represented by H. If BUF is
1225 non-NULL, uses that as the buffer; otherwise allocates at least
1226 MIN_ALLOC bytes. Returns a pointer to the buffer on success, NULL
1229 buf_read (struct sfm_reader *r, void *buf, size_t byte_cnt, size_t min_alloc)
1233 if (buf == NULL && byte_cnt > 0 )
1234 buf = xmalloc (max (byte_cnt, min_alloc));
1236 if ( byte_cnt == 0 )
1240 if (1 != fread (buf, byte_cnt, 1, r->file))
1242 if (ferror (r->file))
1243 msg (ME, _("%s: Reading system file: %s."),
1244 fh_get_filename (r->fh), strerror (errno));
1246 corrupt_msg (ME, _("%s: Unexpected end of file."),
1247 fh_get_filename (r->fh));
1254 /* Winds the reader BYTE_CNT bytes back in the reader stream. */
1256 buf_unread(struct sfm_reader *r, size_t byte_cnt)
1258 assert(byte_cnt > 0);
1260 if ( 0 != fseek(r->file, -byte_cnt, SEEK_CUR))
1262 msg (ME, _("%s: Seeking system file: %s."),
1263 fh_get_filename (r->fh), strerror (errno));
1267 /* Reads a document record, type 6, from system file R, and sets up
1268 the documents and n_documents fields in the associated
1271 read_documents (struct sfm_reader *r, struct dictionary *dict)
1276 if (dict_get_documents (dict) != NULL)
1277 lose ((ME, _("%s: System file contains multiple "
1278 "type 6 (document) records."),
1279 fh_get_filename (r->fh)));
1281 assertive_buf_read (r, &line_cnt, sizeof line_cnt, 0);
1283 lose ((ME, _("%s: Number of document lines (%ld) "
1284 "must be greater than 0."),
1285 fh_get_filename (r->fh), (long) line_cnt));
1287 documents = buf_read (r, NULL, 80 * line_cnt, line_cnt * 80 + 1);
1288 /* FIXME? Run through asciify. */
1289 if (documents == NULL)
1291 documents[80 * line_cnt] = '\0';
1292 dict_set_documents (dict, documents);
1302 /* Reads compressed data into H->BUF and sets other pointers
1303 appropriately. Returns nonzero only if both no errors occur and
1306 buffer_input (struct sfm_reader *r)
1313 r->buf = xnmalloc (128, sizeof *r->buf);
1314 amt = fread (r->buf, sizeof *r->buf, 128, r->file);
1315 if (ferror (r->file))
1317 msg (ME, _("%s: Error reading file: %s."),
1318 fh_get_filename (r->fh), strerror (errno));
1323 r->end = &r->buf[amt];
1327 /* Reads a single case consisting of compressed data from system
1328 file H into the array BUF[] according to reader R, and
1329 returns nonzero only if successful. */
1330 /* Data in system files is compressed in this manner. Data
1331 values are grouped into sets of eight ("octets"). Each value
1332 in an octet has one instruction byte that are output together.
1333 Each instruction byte gives a value for that byte or indicates
1334 that the value can be found following the instructions. */
1336 read_compressed_data (struct sfm_reader *r, flt64 *buf)
1338 const unsigned char *p_end = r->x + sizeof (flt64);
1339 unsigned char *p = r->y;
1341 const flt64 *buf_beg = buf;
1342 const flt64 *buf_end = &buf[r->value_cnt];
1346 for (; p < p_end; p++){
1350 /* Code 0 is ignored. */
1353 /* Code 252 is end of file. */
1356 lose ((ME, _("%s: Compressed data is corrupted. Data ends "
1357 "in partial case."),
1358 fh_get_filename (r->fh)));
1360 /* Code 253 indicates that the value is stored explicitly
1361 following the instruction bytes. */
1362 if (r->ptr == NULL || r->ptr >= r->end)
1363 if (!buffer_input (r))
1364 lose ((ME, _("%s: Unexpected end of file."),
1365 fh_get_filename (r->fh)));
1366 memcpy (buf++, r->ptr++, sizeof *buf);
1371 /* Code 254 indicates a string that is all blanks. */
1372 memset (buf++, ' ', sizeof *buf);
1377 /* Code 255 indicates the system-missing value. */
1379 if (r->reverse_endian)
1386 /* Codes 1 through 251 inclusive are taken to indicate a
1387 value of (BYTE - BIAS), where BYTE is the byte's value
1388 and BIAS is the compression bias (generally 100.0). */
1389 *buf = *p - r->bias;
1390 if (r->reverse_endian)
1398 /* We have reached the end of this instruction octet. Read
1400 if (r->ptr == NULL || r->ptr >= r->end)
1402 if (!buffer_input (r))
1405 lose ((ME, _("%s: Unexpected end of file."),
1406 fh_get_filename (r->fh)));
1411 memcpy (r->x, r->ptr++, sizeof *buf);
1418 /* We have filled up an entire record. Update state and return
1429 /* Reads one case from READER's file into C. Returns nonzero
1430 only if successful. */
1432 sfm_read_case (struct sfm_reader *r, struct ccase *c)
1437 if (!r->compressed && sizeof (flt64) == sizeof (double))
1439 /* Fast path: external and internal representations are the
1440 same, except possibly for endianness or SYSMIS. Read
1441 directly into the case's buffer, then fix up any minor
1442 details as needed. */
1443 if (!fread_ok (r, case_data_all_rw (c),
1444 sizeof (union value) * r->value_cnt))
1447 /* Fix up endianness if needed. */
1448 if (r->reverse_endian)
1452 for (i = 0; i < r->value_cnt; i++)
1453 if (r->vars[i].width == 0)
1454 bswap_flt64 (&case_data_rw (c, r->vars[i].fv)->f);
1457 /* Fix up SYSMIS values if needed.
1458 I don't think this will ever actually kick in, but it
1460 if (r->sysmis != SYSMIS)
1464 for (i = 0; i < r->value_cnt; i++)
1465 if (r->vars[i].width == 0 && case_num (c, i) == r->sysmis)
1466 case_data_rw (c, r->vars[i].fv)->f = SYSMIS;
1471 /* Slow path: internal and external representations differ.
1472 Read into a bounce buffer, then copy to C. */
1479 bounce_size = sizeof *bounce * r->value_cnt;
1480 bounce = bounce_cur = local_alloc (bounce_size);
1483 read_ok = fread_ok (r, bounce, bounce_size);
1485 read_ok = read_compressed_data (r, bounce);
1488 local_free (bounce);
1492 for (i = 0; i < r->value_cnt; i++)
1494 struct sfm_var *v = &r->vars[i];
1498 flt64 f = *bounce_cur++;
1499 if (r->reverse_endian)
1501 case_data_rw (c, v->fv)->f = f == r->sysmis ? SYSMIS : f;
1503 else if (v->width != -1)
1505 memcpy (case_data_rw (c, v->fv)->s, bounce_cur, v->width);
1506 bounce_cur += DIV_RND_UP (v->width, sizeof (flt64));
1510 local_free (bounce);
1516 fread_ok (struct sfm_reader *r, void *buffer, size_t byte_cnt)
1518 size_t read_bytes = fread (buffer, 1, byte_cnt, r->file);
1520 if (read_bytes == byte_cnt)
1524 if (ferror (r->file))
1526 msg (ME, _("%s: Reading system file: %s."),
1527 fh_get_filename (r->fh), strerror (errno));
1530 else if (read_bytes != 0)
1532 msg (ME, _("%s: Partial record at end of system file."),
1533 fh_get_filename (r->fh));
1540 /* Returns true if an I/O error has occurred on READER, false
1543 sfm_read_error (const struct sfm_reader *reader)
1548 /* Returns true if FILE is an SPSS system file,
1551 sfm_detect (FILE *file)
1553 struct sysfile_header hdr;
1555 if (fread (&hdr, sizeof hdr, 1, file) != 1)
1557 if (strncmp ("$FL2", hdr.rec_type, 4))