1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27 #include <libpspp/alloc.h>
28 #include <libpspp/message.h>
29 #include <libpspp/compiler.h>
30 #include <libpspp/magic.h>
31 #include <libpspp/misc.h>
32 #include <libpspp/str.h>
34 #include "sys-file-reader.h"
35 #include "sfm-private.h"
37 #include "dictionary.h"
38 #include "file-handle-def.h"
41 #include "value-labels.h"
45 #define _(msgid) gettext (msgid)
47 /* System file reader. */
50 struct file_handle *fh; /* File handle. */
51 FILE *file; /* File stream. */
53 int reverse_endian; /* 1=file has endianness opposite us. */
54 int fix_specials; /* 1=SYSMIS/HIGHEST/LOWEST differs from us. */
55 int value_cnt; /* Number of `union values's per case. */
56 long case_cnt; /* Number of cases, -1 if unknown. */
57 int compressed; /* 1=compressed, 0=not compressed. */
58 double bias; /* Compression bias, usually 100.0. */
59 int weight_idx; /* 0-based index of weighting variable, or -1. */
60 bool ok; /* False after an I/O error or corrupt data. */
63 struct sfm_var *vars; /* Variables. */
65 /* File's special constants. */
70 /* Decompression buffer. */
71 flt64 *buf; /* Buffer data. */
72 flt64 *ptr; /* Current location in buffer. */
73 flt64 *end; /* End of buffer data. */
75 /* Compression instruction octet. */
76 unsigned char x[8]; /* Current instruction octet. */
77 unsigned char *y; /* Location in current instruction octet. */
80 /* A variable in a system file. */
83 int width; /* 0=numeric, otherwise string width. */
84 int fv; /* Index into case. */
89 /* Swap bytes *A and *B. */
91 bswap (char *a, char *b)
98 /* Reverse the byte order of 32-bit integer *X. */
100 bswap_int32 (int32_t *x_)
102 char *x = (char *) x_;
103 bswap (x + 0, x + 3);
104 bswap (x + 1, x + 2);
107 /* Reverse the byte order of 64-bit floating point *X. */
109 bswap_flt64 (flt64 *x_)
111 char *x = (char *) x_;
112 bswap (x + 0, x + 7);
113 bswap (x + 1, x + 6);
114 bswap (x + 2, x + 5);
115 bswap (x + 3, x + 4);
119 corrupt_msg (int class, const char *format,...)
120 PRINTF_FORMAT (2, 3);
122 /* Displays a corrupt sysfile error. */
124 corrupt_msg (int class, const char *format,...)
130 e.where.filename = NULL;
131 e.where.line_number = 0;
132 e.title = _("corrupt system file: ");
134 va_start (args, format);
135 err_vmsg (&e, format, args);
139 /* Closes a system file after we're done with it. */
141 sfm_close_reader (struct sfm_reader *r)
148 if (fn_close (fh_get_filename (r->fh), r->file) == EOF)
149 msg (ME, _("%s: Closing system file: %s."),
150 fh_get_filename (r->fh), strerror (errno));
155 fh_close (r->fh, "system file", "rs");
162 /* Dictionary reader. */
164 static void buf_unread(struct sfm_reader *r, size_t byte_cnt);
166 static void *buf_read (struct sfm_reader *, void *buf, size_t byte_cnt,
169 static int read_header (struct sfm_reader *,
170 struct dictionary *, struct sfm_read_info *);
171 static int parse_format_spec (struct sfm_reader *, int32_t,
172 struct fmt_spec *, const struct variable *);
173 static int read_value_labels (struct sfm_reader *, struct dictionary *,
174 struct variable **var_by_idx);
175 static int read_variables (struct sfm_reader *,
176 struct dictionary *, struct variable ***var_by_idx);
177 static int read_machine_int32_info (struct sfm_reader *, int size, int count);
178 static int read_machine_flt64_info (struct sfm_reader *, int size, int count);
179 static int read_documents (struct sfm_reader *, struct dictionary *);
181 static int fread_ok (struct sfm_reader *, void *, size_t);
183 /* Displays the message X with corrupt_msg, then jumps to the error
191 /* Calls buf_read with the specified arguments, and jumps to
192 error if the read fails. */
193 #define assertive_buf_read(a,b,c,d) \
195 if (!buf_read (a,b,c,d)) \
199 /* Opens the system file designated by file handle FH for
200 reading. Reads the system file's dictionary into *DICT.
201 If INFO is non-null, then it receives additional info about the
204 sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
205 struct sfm_read_info *info)
207 struct sfm_reader *r = NULL;
208 struct variable **var_by_idx = NULL;
210 *dict = dict_create ();
211 if (!fh_open (fh, FH_REF_FILE, "system file", "rs"))
214 /* Create and initialize reader. */
215 r = xmalloc (sizeof *r);
217 r->file = fn_open (fh_get_filename (fh), "rb");
219 r->reverse_endian = 0;
230 r->sysmis = -FLT64_MAX;
231 r->highest = FLT64_MAX;
232 r->lowest = second_lowest_flt64;
234 r->buf = r->ptr = r->end = NULL;
235 r->y = r->x + sizeof r->x;
237 /* Check that file open succeeded. */
240 msg (ME, _("An error occurred while opening \"%s\" for reading "
241 "as a system file: %s."),
242 fh_get_filename (r->fh), strerror (errno));
246 /* Read header and variables. */
247 if (!read_header (r, *dict, info) || !read_variables (r, *dict, &var_by_idx))
251 /* Handle weighting. */
252 if (r->weight_idx != -1)
254 struct variable *weight_var;
256 if (r->weight_idx < 0 || r->weight_idx >= r->value_cnt)
257 lose ((ME, _("%s: Index of weighting variable (%d) is not between 0 "
258 "and number of elements per case (%d)."),
259 fh_get_filename (r->fh), r->weight_idx, r->value_cnt));
262 weight_var = var_by_idx[r->weight_idx];
264 if (weight_var == NULL)
266 _("%s: Weighting variable may not be a continuation of "
267 "a long string variable."), fh_get_filename (fh)));
268 else if (weight_var->type == ALPHA)
269 lose ((ME, _("%s: Weighting variable may not be a string variable."),
270 fh_get_filename (fh)));
272 dict_set_weight (*dict, weight_var);
275 dict_set_weight (*dict, NULL);
277 /* Read records of types 3, 4, 6, and 7. */
282 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
283 if (r->reverse_endian)
284 bswap_int32 (&rec_type);
289 if (!read_value_labels (r, *dict, var_by_idx))
294 lose ((ME, _("%s: Orphaned variable index record (type 4). Type 4 "
295 "records must always immediately follow type 3 "
297 fh_get_filename (r->fh)));
300 if (!read_documents (r, *dict))
317 assertive_buf_read (r, &data, sizeof data, 0);
318 if (r->reverse_endian)
320 bswap_int32 (&data.subtype);
321 bswap_int32 (&data.size);
322 bswap_int32 (&data.count);
324 bytes = data.size * data.count;
325 if (bytes < data.size || bytes < data.count)
326 lose ((ME, "%s: Record type %d subtype %d too large.",
327 fh_get_filename (r->fh), rec_type, data.subtype));
329 switch (data.subtype)
332 if (!read_machine_int32_info (r, data.size, data.count))
337 if (!read_machine_flt64_info (r, data.size, data.count))
342 case 6: /* ?? Used by SPSS 8.0. */
346 case 11: /* Variable display parameters */
348 const int n_vars = data.count / 3 ;
350 if ( data.count % 3 || n_vars > dict_get_var_cnt(*dict) )
352 msg (MW, _("%s: Invalid subrecord length. "
353 "Record: 7; Subrecord: 11"),
354 fh_get_filename (r->fh));
358 for ( i = 0 ; i < min(n_vars, dict_get_var_cnt(*dict)) ; ++i )
370 assertive_buf_read (r, ¶ms, sizeof(params), 0);
372 v = dict_get_var(*dict, i);
374 v->measure = params.measure;
375 v->display_width = params.width;
376 v->alignment = params.align;
381 case 13: /* SPSS 12.0 Long variable name map */
383 char *buf, *short_name, *save_ptr;
387 buf = xmalloc (bytes + 1);
388 if (!buf_read (r, buf, bytes, 0))
396 for (short_name = strtok_r (buf, "=", &save_ptr), idx = 0;
398 short_name = strtok_r (NULL, "=", &save_ptr), idx++)
400 char *long_name = strtok_r (NULL, "\t", &save_ptr);
403 /* Validate long name. */
404 if (long_name == NULL)
406 msg (MW, _("%s: Trailing garbage in long variable "
408 fh_get_filename (r->fh));
411 if (!var_is_valid_name (long_name, false))
413 msg (MW, _("%s: Long variable mapping to invalid "
414 "variable name `%s'."),
415 fh_get_filename (r->fh), long_name);
419 /* Find variable using short name. */
420 v = dict_lookup_var (*dict, short_name);
423 msg (MW, _("%s: Long variable mapping for "
424 "nonexistent variable %s."),
425 fh_get_filename (r->fh), short_name);
429 /* Identify any duplicates. */
430 if ( compare_var_names(short_name, long_name, 0) &&
431 NULL != dict_lookup_var (*dict, long_name))
432 lose ((ME, _("%s: Duplicate long variable name `%s' "
433 "within system file."),
434 fh_get_filename (r->fh), long_name));
438 Renaming a variable may clear the short
439 name, but we want to retain it, so
440 re-set it explicitly. */
441 dict_rename_var (*dict, v, long_name);
442 var_set_short_name (v, short_name);
444 /* For compatability, make sure dictionary
445 is in long variable name map order. In
446 the common case, this has no effect,
447 because the dictionary and the long
448 variable name map are already in the
450 dict_reorder_var (*dict, v, idx);
459 msg (MW, _("%s: Unrecognized record type 7, subtype %d "
460 "encountered in system file."),
461 fh_get_filename (r->fh), data.subtype);
467 void *x = buf_read (r, NULL, data.size * data.count, 0);
479 assertive_buf_read (r, &filler, sizeof filler, 0);
484 corrupt_msg(MW, _("%s: Unrecognized record type %d."),
485 fh_get_filename (r->fh), rec_type);
490 /* Come here on successful completion. */
495 /* Come here on unsuccessful completion. */
496 sfm_close_reader (r);
500 dict_destroy (*dict);
506 /* Read record type 7, subtype 3. */
508 read_machine_int32_info (struct sfm_reader *r, int size, int count)
515 if (size != sizeof (int32_t) || count != 8)
516 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
517 "subtype 3. Expected size %d, count 8."),
518 fh_get_filename (r->fh), size, count, sizeof (int32_t)));
520 assertive_buf_read (r, data, sizeof data, 0);
521 if (r->reverse_endian)
522 for (i = 0; i < 8; i++)
523 bswap_int32 (&data[i]);
527 lose ((ME, _("%s: Floating-point representation in system file is not "
528 "IEEE-754. PSPP cannot convert between floating-point "
530 fh_get_filename (r->fh)));
532 #error Add support for your floating-point format.
535 #ifdef WORDS_BIGENDIAN
540 if (r->reverse_endian)
542 if (file_bigendian ^ (data[6] == 1))
543 lose ((ME, _("%s: File-indicated endianness (%s) does not match "
544 "endianness intuited from file header (%s)."),
545 fh_get_filename (r->fh),
546 file_bigendian ? _("big-endian") : _("little-endian"),
547 data[6] == 1 ? _("big-endian") : (data[6] == 2 ? _("little-endian")
550 /* PORTME: Character representation code. */
551 if (data[7] != 2 && data[7] != 3)
552 lose ((ME, _("%s: File-indicated character representation code (%s) is "
554 fh_get_filename (r->fh),
555 (data[7] == 1 ? "EBCDIC"
556 : (data[7] == 4 ? _("DEC Kanji") : _("Unknown")))));
564 /* Read record type 7, subtype 4. */
566 read_machine_flt64_info (struct sfm_reader *r, int size, int count)
571 if (size != sizeof (flt64) || count != 3)
572 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
573 "subtype 4. Expected size %d, count 8."),
574 fh_get_filename (r->fh), size, count, sizeof (flt64)));
576 assertive_buf_read (r, data, sizeof data, 0);
577 if (r->reverse_endian)
578 for (i = 0; i < 3; i++)
579 bswap_flt64 (&data[i]);
581 if (data[0] != SYSMIS || data[1] != FLT64_MAX
582 || data[2] != second_lowest_flt64)
585 r->highest = data[1];
587 msg (MW, _("%s: File-indicated value is different from internal value "
588 "for at least one of the three system values. SYSMIS: "
589 "indicated %g, expected %g; HIGHEST: %g, %g; LOWEST: "
591 fh_get_filename (r->fh), (double) data[0], (double) SYSMIS,
592 (double) data[1], (double) FLT64_MAX,
593 (double) data[2], (double) second_lowest_flt64);
603 read_header (struct sfm_reader *r,
604 struct dictionary *dict, struct sfm_read_info *info)
606 struct sysfile_header hdr; /* Disk buffer. */
607 char prod_name[sizeof hdr.prod_name + 1]; /* Buffer for product name. */
608 int skip_amt = 0; /* Amount of product name to omit. */
611 /* Read header, check magic. */
612 assertive_buf_read (r, &hdr, sizeof hdr, 0);
613 if (strncmp ("$FL2", hdr.rec_type, 4) != 0)
614 lose ((ME, _("%s: Bad magic. Proper system files begin with "
615 "the four characters `$FL2'. This file will not be read."),
616 fh_get_filename (r->fh)));
618 /* Check eye-category.her string. */
619 memcpy (prod_name, hdr.prod_name, sizeof hdr.prod_name);
620 for (i = 0; i < 60; i++)
621 if (!c_isprint ((unsigned char) prod_name[i]))
623 for (i = 59; i >= 0; i--)
624 if (!c_isgraph ((unsigned char) prod_name[i]))
629 prod_name[60] = '\0';
633 static const char *prefix[N_PREFIXES] =
635 "@(#) SPSS DATA FILE",
641 for (i = 0; i < N_PREFIXES; i++)
642 if (!strncmp (prefix[i], hdr.prod_name, strlen (prefix[i])))
644 skip_amt = strlen (prefix[i]);
649 /* Check endianness. */
650 if (hdr.layout_code == 2)
651 r->reverse_endian = 0;
654 bswap_int32 (&hdr.layout_code);
655 if (hdr.layout_code != 2)
656 lose ((ME, _("%s: File layout code has unexpected value %d. Value "
657 "should be 2, in big-endian or little-endian format."),
658 fh_get_filename (r->fh), hdr.layout_code));
660 r->reverse_endian = 1;
661 bswap_int32 (&hdr.case_size);
662 bswap_int32 (&hdr.compress);
663 bswap_int32 (&hdr.weight_idx);
664 bswap_int32 (&hdr.case_cnt);
665 bswap_flt64 (&hdr.bias);
669 /* Copy basic info and verify correctness. */
670 r->value_cnt = hdr.case_size;
672 /* If value count is rediculous, then force it to -1 (a sentinel value) */
673 if ( r->value_cnt < 0 ||
674 r->value_cnt > (INT_MAX / (int) sizeof (union value) / 2))
677 r->compressed = hdr.compress;
679 r->weight_idx = hdr.weight_idx - 1;
681 r->case_cnt = hdr.case_cnt;
682 if (r->case_cnt < -1 || r->case_cnt > INT_MAX / 2)
684 _("%s: Number of cases in file (%ld) is not between -1 and %d."),
685 fh_get_filename (r->fh), (long) r->case_cnt, INT_MAX / 2));
688 if (r->bias != 100.0)
689 corrupt_msg (MW, _("%s: Compression bias (%g) is not the usual "
691 fh_get_filename (r->fh), r->bias);
693 /* Make a file label only on the condition that the given label is
694 not all spaces or nulls. */
698 for (i = sizeof hdr.file_label - 1; i >= 0; i--)
700 if (!c_isspace ((unsigned char) hdr.file_label[i])
701 && hdr.file_label[i] != 0)
703 char *label = xmalloc (i + 2);
704 memcpy (label, hdr.file_label, i + 1);
706 dict_set_label (dict, label);
717 memcpy (info->creation_date, hdr.creation_date, 9);
718 info->creation_date[9] = 0;
720 memcpy (info->creation_time, hdr.creation_time, 8);
721 info->creation_time[8] = 0;
723 #ifdef WORDS_BIGENDIAN
724 info->big_endian = !r->reverse_endian;
726 info->big_endian = r->reverse_endian;
729 info->compressed = hdr.compress;
731 info->case_cnt = hdr.case_cnt;
733 for (cp = &prod_name[skip_amt]; cp < &prod_name[60]; cp++)
734 if (c_isgraph ((unsigned char) *cp))
736 strcpy (info->product, cp);
745 /* Reads most of the dictionary from file H; also fills in the
746 associated VAR_BY_IDX array. */
748 read_variables (struct sfm_reader *r,
749 struct dictionary *dict, struct variable ***var_by_idx)
753 struct sysfile_variable sv; /* Disk buffer. */
754 int long_string_count = 0; /* # of long string continuation
755 records still expected. */
756 int next_value = 0; /* Index to next `value' structure. */
762 /* Pre-allocate variables. */
763 if (r->value_cnt != -1)
765 *var_by_idx = xnmalloc (r->value_cnt, sizeof **var_by_idx);
766 r->vars = xnmalloc (r->value_cnt, sizeof *r->vars);
770 /* Read in the entry for each variable and use the info to
771 initialize the dictionary. */
775 char name[SHORT_NAME_LEN + 1];
779 if ( r->value_cnt != -1 && i >= r->value_cnt )
782 assertive_buf_read (r, &sv, sizeof sv, 0);
784 if (r->reverse_endian)
786 bswap_int32 (&sv.rec_type);
787 bswap_int32 (&sv.type);
788 bswap_int32 (&sv.has_var_label);
789 bswap_int32 (&sv.n_missing_values);
790 bswap_int32 (&sv.print);
791 bswap_int32 (&sv.write);
794 /* We've come to the end of the variable entries */
795 if (sv.rec_type != 2)
797 buf_unread(r, sizeof sv);
802 if ( -1 == r->value_cnt )
804 *var_by_idx = xnrealloc (*var_by_idx, i + 1, sizeof **var_by_idx);
805 r->vars = xnrealloc (r->vars, i + 1, sizeof *r->vars);
808 /* If there was a long string previously, make sure that the
809 continuations are present; otherwise make sure there aren't
811 if (long_string_count)
814 lose ((ME, _("%s: position %d: String variable does not have "
815 "proper number of continuation records."),
816 fh_get_filename (r->fh), i));
819 r->vars[i].width = -1;
820 (*var_by_idx)[i] = NULL;
824 else if (sv.type == -1)
825 lose ((ME, _("%s: position %d: Superfluous long string continuation "
827 fh_get_filename (r->fh), i));
829 /* Check fields for validity. */
830 if (sv.type < 0 || sv.type > 255)
831 lose ((ME, _("%s: position %d: Bad variable type code %d."),
832 fh_get_filename (r->fh), i, sv.type));
833 if (sv.has_var_label != 0 && sv.has_var_label != 1)
834 lose ((ME, _("%s: position %d: Variable label indicator field is not "
835 "0 or 1."), fh_get_filename (r->fh), i));
836 if (sv.n_missing_values < -3 || sv.n_missing_values > 3
837 || sv.n_missing_values == -1)
838 lose ((ME, _("%s: position %d: Missing value indicator field is not "
839 "-3, -2, 0, 1, 2, or 3."), fh_get_filename (r->fh), i));
841 /* Copy first character of variable name. */
842 if (sv.name[0] == '@' || sv.name[0] == '#')
843 lose ((ME, _("%s: position %d: Variable name begins with invalid "
845 fh_get_filename (r->fh), i));
847 name[0] = sv.name[0];
849 /* Copy remaining characters of variable name. */
850 for (j = 1; j < SHORT_NAME_LEN; j++)
852 int c = (unsigned char) sv.name[j];
861 if ( ! var_is_plausible_name(name, false) )
862 lose ((ME, _("%s: Invalid variable name `%s' within system file."),
863 fh_get_filename (r->fh), name));
865 /* Create variable. */
866 vv = (*var_by_idx)[i] = dict_create_var (dict, name, sv.type);
868 lose ((ME, _("%s: Duplicate variable name `%s' within system file."),
869 fh_get_filename (r->fh), name));
871 var_set_short_name (vv, vv->name);
873 /* Case reading data. */
874 nv = sv.type == 0 ? 1 : DIV_RND_UP (sv.type, sizeof (flt64));
875 long_string_count = nv - 1;
878 /* Get variable label, if any. */
879 if (sv.has_var_label == 1)
884 /* Read length of label. */
885 assertive_buf_read (r, &len, sizeof len, 0);
886 if (r->reverse_endian)
890 if (len < 0 || len > 255)
891 lose ((ME, _("%s: Variable %s indicates variable label of invalid "
893 fh_get_filename (r->fh), vv->name, len));
897 /* Read label into variable structure. */
898 vv->label = buf_read (r, NULL, ROUND_UP (len, sizeof (int32_t)), len + 1);
899 if (vv->label == NULL)
901 vv->label[len] = '\0';
905 /* Set missing values. */
906 if (sv.n_missing_values != 0)
909 int mv_cnt = abs (sv.n_missing_values);
911 if (vv->width > MAX_SHORT_STRING)
912 lose ((ME, _("%s: Long string variable %s may not have missing "
914 fh_get_filename (r->fh), vv->name));
916 assertive_buf_read (r, mv, sizeof *mv * mv_cnt, 0);
918 if (r->reverse_endian && vv->type == NUMERIC)
919 for (j = 0; j < mv_cnt; j++)
920 bswap_flt64 (&mv[j]);
922 if (sv.n_missing_values > 0)
924 for (j = 0; j < sv.n_missing_values; j++)
925 if (vv->type == NUMERIC)
926 mv_add_num (&vv->miss, mv[j]);
928 mv_add_str (&vv->miss, (char *) &mv[j]);
932 if (vv->type == ALPHA)
933 lose ((ME, _("%s: String variable %s may not have missing "
934 "values specified as a range."),
935 fh_get_filename (r->fh), vv->name));
937 if (mv[0] == r->lowest)
938 mv_add_num_range (&vv->miss, LOWEST, mv[1]);
939 else if (mv[1] == r->highest)
940 mv_add_num_range (&vv->miss, mv[0], HIGHEST);
942 mv_add_num_range (&vv->miss, mv[0], mv[1]);
944 if (sv.n_missing_values == -3)
945 mv_add_num (&vv->miss, mv[2]);
949 if (!parse_format_spec (r, sv.print, &vv->print, vv)
950 || !parse_format_spec (r, sv.write, &vv->write, vv))
953 r->vars[i].width = vv->width;
954 r->vars[i].fv = vv->fv;
958 /* Some consistency checks. */
959 if (long_string_count != 0)
960 lose ((ME, _("%s: Long string continuation records omitted at end of "
962 fh_get_filename (r->fh)));
964 if (next_value != r->value_cnt)
965 corrupt_msg(MW, _("%s: System file header indicates %d variable positions but "
966 "%d were read from file."),
967 fh_get_filename (r->fh), r->value_cnt, next_value);
976 /* Translates the format spec from sysfile format to internal
979 parse_format_spec (struct sfm_reader *r, int32_t s,
980 struct fmt_spec *f, const struct variable *v)
982 f->type = translate_fmt ((s >> 16) & 0xff);
984 lose ((ME, _("%s: Bad format specifier byte (%d)."),
985 fh_get_filename (r->fh), (s >> 16) & 0xff));
986 f->w = (s >> 8) & 0xff;
989 if ((v->type == ALPHA) ^ ((formats[f->type].cat & FCAT_STRING) != 0))
990 lose ((ME, _("%s: %s variable %s has %s format specifier %s."),
991 fh_get_filename (r->fh),
992 v->type == ALPHA ? _("String") : _("Numeric"),
994 formats[f->type].cat & FCAT_STRING ? _("string") : _("numeric"),
995 formats[f->type].name));
997 if (!check_output_specifier (f, false)
998 || !check_specifier_width (f, v->width, false))
1000 msg (ME, _("%s variable %s has invalid format specifier %s."),
1001 v->type == NUMERIC ? _("Numeric") : _("String"),
1002 v->name, fmt_to_string (f));
1003 *f = v->type == NUMERIC ? f8_2 : make_output_format (FMT_A, v->width, 0);
1011 /* Reads value labels from sysfile H and inserts them into the
1012 associated dictionary. */
1014 read_value_labels (struct sfm_reader *r,
1015 struct dictionary *dict, struct variable **var_by_idx)
1019 char raw_value[8]; /* Value as uninterpreted bytes. */
1020 union value value; /* Value. */
1021 char *label; /* Null-terminated label string. */
1024 struct label *labels = NULL;
1025 int32_t n_labels; /* Number of labels. */
1027 struct variable **var = NULL; /* Associated variables. */
1028 int32_t n_vars; /* Number of associated variables. */
1032 /* First step: read the contents of the type 3 record and record its
1033 contents. Note that we can't do much with the data since we
1034 don't know yet whether it is of numeric or string type. */
1036 /* Read number of labels. */
1037 assertive_buf_read (r, &n_labels, sizeof n_labels, 0);
1038 if (r->reverse_endian)
1039 bswap_int32 (&n_labels);
1041 if ( n_labels >= ((int32_t) ~0) / sizeof *labels)
1043 corrupt_msg(MW, _("%s: Invalid number of labels: %d. Ignoring labels."),
1044 fh_get_filename (r->fh), n_labels);
1048 /* Allocate memory. */
1049 labels = xcalloc (n_labels, sizeof *labels);
1050 for (i = 0; i < n_labels; i++)
1051 labels[i].label = NULL;
1053 /* Read each value/label tuple into labels[]. */
1054 for (i = 0; i < n_labels; i++)
1056 struct label *label = labels + i;
1057 unsigned char label_len;
1061 assertive_buf_read (r, label->raw_value, sizeof label->raw_value, 0);
1063 /* Read label length. */
1064 assertive_buf_read (r, &label_len, sizeof label_len, 0);
1065 padded_len = ROUND_UP (label_len + 1, sizeof (flt64));
1067 /* Read label, padding. */
1068 label->label = xmalloc (padded_len + 1);
1069 assertive_buf_read (r, label->label, padded_len - 1, 0);
1070 label->label[label_len] = 0;
1073 /* Second step: Read the type 4 record that has the list of
1074 variables to which the value labels are to be applied. */
1076 /* Read record type of type 4 record. */
1080 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
1081 if (r->reverse_endian)
1082 bswap_int32 (&rec_type);
1085 lose ((ME, _("%s: Variable index record (type 4) does not immediately "
1086 "follow value label record (type 3) as it should."),
1087 fh_get_filename (r->fh)));
1090 /* Read number of variables associated with value label from type 4
1092 assertive_buf_read (r, &n_vars, sizeof n_vars, 0);
1093 if (r->reverse_endian)
1094 bswap_int32 (&n_vars);
1095 if (n_vars < 1 || n_vars > dict_get_var_cnt (dict))
1096 lose ((ME, _("%s: Number of variables associated with a value label (%d) "
1097 "is not between 1 and the number of variables (%d)."),
1098 fh_get_filename (r->fh), n_vars, dict_get_var_cnt (dict)));
1100 /* Read the list of variables. */
1101 var = xnmalloc (n_vars, sizeof *var);
1102 for (i = 0; i < n_vars; i++)
1107 /* Read variable index, check range. */
1108 assertive_buf_read (r, &var_idx, sizeof var_idx, 0);
1109 if (r->reverse_endian)
1110 bswap_int32 (&var_idx);
1111 if (var_idx < 1 || var_idx > r->value_cnt)
1112 lose ((ME, _("%s: Variable index associated with value label (%d) is "
1113 "not between 1 and the number of values (%d)."),
1114 fh_get_filename (r->fh), var_idx, r->value_cnt));
1116 /* Make sure it's a real variable. */
1117 v = var_by_idx[var_idx - 1];
1119 lose ((ME, _("%s: Variable index associated with value label (%d) "
1120 "refers to a continuation of a string variable, not to "
1121 "an actual variable."),
1122 fh_get_filename (r->fh), var_idx));
1123 if (v->type == ALPHA && v->width > MAX_SHORT_STRING)
1124 lose ((ME, _("%s: Value labels are not allowed on long string "
1126 fh_get_filename (r->fh), v->name));
1128 /* Add it to the list of variables. */
1132 /* Type check the variables. */
1133 for (i = 1; i < n_vars; i++)
1134 if (var[i]->type != var[0]->type)
1135 lose ((ME, _("%s: Variables associated with value label are not all of "
1136 "identical type. Variable %s has %s type, but variable "
1138 fh_get_filename (r->fh),
1139 var[0]->name, var[0]->type == ALPHA ? _("string") : _("numeric"),
1140 var[i]->name, var[i]->type == ALPHA ? _("string") : _("numeric")));
1142 /* Fill in labels[].value, now that we know the desired type. */
1143 for (i = 0; i < n_labels; i++)
1145 struct label *label = labels + i;
1147 if (var[0]->type == ALPHA)
1149 const int copy_len = min (sizeof label->raw_value,
1150 sizeof label->label);
1151 memcpy (label->value.s, label->raw_value, copy_len);
1154 assert (sizeof f == sizeof label->raw_value);
1155 memcpy (&f, label->raw_value, sizeof f);
1156 if (r->reverse_endian)
1162 /* Assign the value_label's to each variable. */
1163 for (i = 0; i < n_vars; i++)
1165 struct variable *v = var[i];
1168 /* Add each label to the variable. */
1169 for (j = 0; j < n_labels; j++)
1171 struct label *label = labels + j;
1172 if (!val_labs_replace (v->val_labs, label->value, label->label))
1175 if (var[0]->type == NUMERIC)
1176 msg (MW, _("%s: File contains duplicate label for value %g for "
1178 fh_get_filename (r->fh), label->value.f, v->name);
1180 msg (MW, _("%s: File contains duplicate label for value `%.*s' "
1181 "for variable %s."),
1182 fh_get_filename (r->fh), v->width, label->value.s, v->name);
1186 for (i = 0; i < n_labels; i++)
1187 free (labels[i].label);
1195 for (i = 0; i < n_labels; i++)
1196 free (labels[i].label);
1203 /* Reads BYTE_CNT bytes from the file represented by H. If BUF is
1204 non-NULL, uses that as the buffer; otherwise allocates at least
1205 MIN_ALLOC bytes. Returns a pointer to the buffer on success, NULL
1208 buf_read (struct sfm_reader *r, void *buf, size_t byte_cnt, size_t min_alloc)
1212 if (buf == NULL && byte_cnt > 0 )
1213 buf = xmalloc (max (byte_cnt, min_alloc));
1215 if ( byte_cnt == 0 )
1219 if (1 != fread (buf, byte_cnt, 1, r->file))
1221 if (ferror (r->file))
1222 msg (ME, _("%s: Reading system file: %s."),
1223 fh_get_filename (r->fh), strerror (errno));
1225 corrupt_msg (ME, _("%s: Unexpected end of file."),
1226 fh_get_filename (r->fh));
1233 /* Winds the reader BYTE_CNT bytes back in the reader stream. */
1235 buf_unread(struct sfm_reader *r, size_t byte_cnt)
1237 assert(byte_cnt > 0);
1239 if ( 0 != fseek(r->file, -byte_cnt, SEEK_CUR))
1241 msg (ME, _("%s: Seeking system file: %s."),
1242 fh_get_filename (r->fh), strerror (errno));
1246 /* Reads a document record, type 6, from system file R, and sets up
1247 the documents and n_documents fields in the associated
1250 read_documents (struct sfm_reader *r, struct dictionary *dict)
1255 if (dict_get_documents (dict) != NULL)
1256 lose ((ME, _("%s: System file contains multiple "
1257 "type 6 (document) records."),
1258 fh_get_filename (r->fh)));
1260 assertive_buf_read (r, &line_cnt, sizeof line_cnt, 0);
1262 lose ((ME, _("%s: Number of document lines (%ld) "
1263 "must be greater than 0."),
1264 fh_get_filename (r->fh), (long) line_cnt));
1266 documents = buf_read (r, NULL, 80 * line_cnt, line_cnt * 80 + 1);
1267 /* FIXME? Run through asciify. */
1268 if (documents == NULL)
1270 documents[80 * line_cnt] = '\0';
1271 dict_set_documents (dict, documents);
1281 /* Reads compressed data into H->BUF and sets other pointers
1282 appropriately. Returns nonzero only if both no errors occur and
1285 buffer_input (struct sfm_reader *r)
1292 r->buf = xnmalloc (128, sizeof *r->buf);
1293 amt = fread (r->buf, sizeof *r->buf, 128, r->file);
1294 if (ferror (r->file))
1296 msg (ME, _("%s: Error reading file: %s."),
1297 fh_get_filename (r->fh), strerror (errno));
1302 r->end = &r->buf[amt];
1306 /* Reads a single case consisting of compressed data from system
1307 file H into the array BUF[] according to reader R, and
1308 returns nonzero only if successful. */
1309 /* Data in system files is compressed in this manner. Data
1310 values are grouped into sets of eight ("octets"). Each value
1311 in an octet has one instruction byte that are output together.
1312 Each instruction byte gives a value for that byte or indicates
1313 that the value can be found following the instructions. */
1315 read_compressed_data (struct sfm_reader *r, flt64 *buf)
1317 const unsigned char *p_end = r->x + sizeof (flt64);
1318 unsigned char *p = r->y;
1320 const flt64 *buf_beg = buf;
1321 const flt64 *buf_end = &buf[r->value_cnt];
1325 for (; p < p_end; p++){
1329 /* Code 0 is ignored. */
1332 /* Code 252 is end of file. */
1335 lose ((ME, _("%s: Compressed data is corrupted. Data ends "
1336 "in partial case."),
1337 fh_get_filename (r->fh)));
1339 /* Code 253 indicates that the value is stored explicitly
1340 following the instruction bytes. */
1341 if (r->ptr == NULL || r->ptr >= r->end)
1342 if (!buffer_input (r))
1343 lose ((ME, _("%s: Unexpected end of file."),
1344 fh_get_filename (r->fh)));
1345 memcpy (buf++, r->ptr++, sizeof *buf);
1350 /* Code 254 indicates a string that is all blanks. */
1351 memset (buf++, ' ', sizeof *buf);
1356 /* Code 255 indicates the system-missing value. */
1358 if (r->reverse_endian)
1365 /* Codes 1 through 251 inclusive are taken to indicate a
1366 value of (BYTE - BIAS), where BYTE is the byte's value
1367 and BIAS is the compression bias (generally 100.0). */
1368 *buf = *p - r->bias;
1369 if (r->reverse_endian)
1377 /* We have reached the end of this instruction octet. Read
1379 if (r->ptr == NULL || r->ptr >= r->end)
1381 if (!buffer_input (r))
1384 lose ((ME, _("%s: Unexpected end of file."),
1385 fh_get_filename (r->fh)));
1390 memcpy (r->x, r->ptr++, sizeof *buf);
1397 /* We have filled up an entire record. Update state and return
1408 /* Reads one case from READER's file into C. Returns nonzero
1409 only if successful. */
1411 sfm_read_case (struct sfm_reader *r, struct ccase *c)
1416 if (!r->compressed && sizeof (flt64) == sizeof (double))
1418 /* Fast path: external and internal representations are the
1419 same, except possibly for endianness or SYSMIS. Read
1420 directly into the case's buffer, then fix up any minor
1421 details as needed. */
1422 if (!fread_ok (r, case_data_all_rw (c),
1423 sizeof (union value) * r->value_cnt))
1426 /* Fix up endianness if needed. */
1427 if (r->reverse_endian)
1431 for (i = 0; i < r->value_cnt; i++)
1432 if (r->vars[i].width == 0)
1433 bswap_flt64 (&case_data_rw (c, r->vars[i].fv)->f);
1436 /* Fix up SYSMIS values if needed.
1437 I don't think this will ever actually kick in, but it
1439 if (r->sysmis != SYSMIS)
1443 for (i = 0; i < r->value_cnt; i++)
1444 if (r->vars[i].width == 0 && case_num (c, i) == r->sysmis)
1445 case_data_rw (c, r->vars[i].fv)->f = SYSMIS;
1450 /* Slow path: internal and external representations differ.
1451 Read into a bounce buffer, then copy to C. */
1458 bounce_size = sizeof *bounce * r->value_cnt;
1459 bounce = bounce_cur = local_alloc (bounce_size);
1462 read_ok = fread_ok (r, bounce, bounce_size);
1464 read_ok = read_compressed_data (r, bounce);
1467 local_free (bounce);
1471 for (i = 0; i < r->value_cnt; i++)
1473 struct sfm_var *v = &r->vars[i];
1477 flt64 f = *bounce_cur++;
1478 if (r->reverse_endian)
1480 case_data_rw (c, v->fv)->f = f == r->sysmis ? SYSMIS : f;
1482 else if (v->width != -1)
1484 memcpy (case_data_rw (c, v->fv)->s, bounce_cur, v->width);
1485 bounce_cur += DIV_RND_UP (v->width, sizeof (flt64));
1489 local_free (bounce);
1495 fread_ok (struct sfm_reader *r, void *buffer, size_t byte_cnt)
1497 size_t read_bytes = fread (buffer, 1, byte_cnt, r->file);
1499 if (read_bytes == byte_cnt)
1503 if (ferror (r->file))
1505 msg (ME, _("%s: Reading system file: %s."),
1506 fh_get_filename (r->fh), strerror (errno));
1509 else if (read_bytes != 0)
1511 msg (ME, _("%s: Partial record at end of system file."),
1512 fh_get_filename (r->fh));
1519 /* Returns true if an I/O error has occurred on READER, false
1522 sfm_read_error (const struct sfm_reader *reader)
1527 /* Returns true if FILE is an SPSS system file,
1530 sfm_detect (FILE *file)
1532 struct sysfile_header hdr;
1534 if (fread (&hdr, sizeof hdr, 1, file) != 1)
1536 if (strncmp ("$FL2", hdr.rec_type, 4))