1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27 #include <libpspp/alloc.h>
28 #include <libpspp/message.h>
29 #include <libpspp/compiler.h>
30 #include <libpspp/magic.h>
31 #include <libpspp/misc.h>
32 #include <libpspp/str.h>
34 #include "sys-file-reader.h"
35 #include "sfm-private.h"
37 #include "dictionary.h"
38 #include "file-handle-def.h"
41 #include "value-labels.h"
45 #define _(msgid) gettext (msgid)
47 #include <libpspp/debug-print.h>
49 /* System file reader. */
52 struct file_handle *fh; /* File handle. */
53 FILE *file; /* File stream. */
55 int reverse_endian; /* 1=file has endianness opposite us. */
56 int fix_specials; /* 1=SYSMIS/HIGHEST/LOWEST differs from us. */
57 int value_cnt; /* Number of `union values's per case. */
58 long case_cnt; /* Number of cases, -1 if unknown. */
59 int compressed; /* 1=compressed, 0=not compressed. */
60 double bias; /* Compression bias, usually 100.0. */
61 int weight_idx; /* 0-based index of weighting variable, or -1. */
62 bool ok; /* False after an I/O error or corrupt data. */
65 struct sfm_var *vars; /* Variables. */
67 /* File's special constants. */
72 /* Decompression buffer. */
73 flt64 *buf; /* Buffer data. */
74 flt64 *ptr; /* Current location in buffer. */
75 flt64 *end; /* End of buffer data. */
77 /* Compression instruction octet. */
78 unsigned char x[8]; /* Current instruction octet. */
79 unsigned char *y; /* Location in current instruction octet. */
82 /* A variable in a system file. */
85 int width; /* 0=numeric, otherwise string width. */
86 int fv; /* Index into case. */
91 /* Swap bytes *A and *B. */
93 bswap (char *a, char *b)
100 /* Reverse the byte order of 32-bit integer *X. */
102 bswap_int32 (int32 *x_)
104 char *x = (char *) x_;
105 bswap (x + 0, x + 3);
106 bswap (x + 1, x + 2);
109 /* Reverse the byte order of 64-bit floating point *X. */
111 bswap_flt64 (flt64 *x_)
113 char *x = (char *) x_;
114 bswap (x + 0, x + 7);
115 bswap (x + 1, x + 6);
116 bswap (x + 2, x + 5);
117 bswap (x + 3, x + 4);
121 corrupt_msg (int class, const char *format,...)
122 PRINTF_FORMAT (2, 3);
124 /* Displays a corrupt sysfile error. */
126 corrupt_msg (int class, const char *format,...)
132 e.where.filename = NULL;
133 e.where.line_number = 0;
134 e.title = _("corrupt system file: ");
136 va_start (args, format);
137 err_vmsg (&e, format, args);
141 /* Closes a system file after we're done with it. */
143 sfm_close_reader (struct sfm_reader *r)
150 if (fn_close (fh_get_filename (r->fh), r->file) == EOF)
151 msg (ME, _("%s: Closing system file: %s."),
152 fh_get_filename (r->fh), strerror (errno));
157 fh_close (r->fh, "system file", "rs");
164 /* Dictionary reader. */
166 static void buf_unread(struct sfm_reader *r, size_t byte_cnt);
168 static void *buf_read (struct sfm_reader *, void *buf, size_t byte_cnt,
171 static int read_header (struct sfm_reader *,
172 struct dictionary *, struct sfm_read_info *);
173 static int parse_format_spec (struct sfm_reader *, int32,
174 struct fmt_spec *, const struct variable *);
175 static int read_value_labels (struct sfm_reader *, struct dictionary *,
176 struct variable **var_by_idx);
177 static int read_variables (struct sfm_reader *,
178 struct dictionary *, struct variable ***var_by_idx);
179 static int read_machine_int32_info (struct sfm_reader *, int size, int count);
180 static int read_machine_flt64_info (struct sfm_reader *, int size, int count);
181 static int read_documents (struct sfm_reader *, struct dictionary *);
183 static int fread_ok (struct sfm_reader *, void *, size_t);
185 /* Displays the message X with corrupt_msg, then jumps to the error
193 /* Calls buf_read with the specified arguments, and jumps to
194 error if the read fails. */
195 #define assertive_buf_read(a,b,c,d) \
197 if (!buf_read (a,b,c,d)) \
201 /* Opens the system file designated by file handle FH for
202 reading. Reads the system file's dictionary into *DICT.
203 If INFO is non-null, then it receives additional info about the
206 sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
207 struct sfm_read_info *info)
209 struct sfm_reader *r = NULL;
210 struct variable **var_by_idx = NULL;
212 *dict = dict_create ();
213 if (!fh_open (fh, FH_REF_FILE, "system file", "rs"))
216 /* Create and initialize reader. */
217 r = xmalloc (sizeof *r);
219 r->file = fn_open (fh_get_filename (fh), "rb");
221 r->reverse_endian = 0;
232 r->sysmis = -FLT64_MAX;
233 r->highest = FLT64_MAX;
234 r->lowest = second_lowest_flt64;
236 r->buf = r->ptr = r->end = NULL;
237 r->y = r->x + sizeof r->x;
239 /* Check that file open succeeded. */
242 msg (ME, _("An error occurred while opening \"%s\" for reading "
243 "as a system file: %s."),
244 fh_get_filename (r->fh), strerror (errno));
248 /* Read header and variables. */
249 if (!read_header (r, *dict, info) || !read_variables (r, *dict, &var_by_idx))
253 /* Handle weighting. */
254 if (r->weight_idx != -1)
256 struct variable *weight_var;
258 if (r->weight_idx < 0 || r->weight_idx >= r->value_cnt)
259 lose ((ME, _("%s: Index of weighting variable (%d) is not between 0 "
260 "and number of elements per case (%d)."),
261 fh_get_filename (r->fh), r->weight_idx, r->value_cnt));
264 weight_var = var_by_idx[r->weight_idx];
266 if (weight_var == NULL)
268 _("%s: Weighting variable may not be a continuation of "
269 "a long string variable."), fh_get_filename (fh)));
270 else if (weight_var->type == ALPHA)
271 lose ((ME, _("%s: Weighting variable may not be a string variable."),
272 fh_get_filename (fh)));
274 dict_set_weight (*dict, weight_var);
277 dict_set_weight (*dict, NULL);
279 /* Read records of types 3, 4, 6, and 7. */
284 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
285 if (r->reverse_endian)
286 bswap_int32 (&rec_type);
291 if (!read_value_labels (r, *dict, var_by_idx))
296 lose ((ME, _("%s: Orphaned variable index record (type 4). Type 4 "
297 "records must always immediately follow type 3 "
299 fh_get_filename (r->fh)));
302 if (!read_documents (r, *dict))
319 assertive_buf_read (r, &data, sizeof data, 0);
320 if (r->reverse_endian)
322 bswap_int32 (&data.subtype);
323 bswap_int32 (&data.size);
324 bswap_int32 (&data.count);
326 bytes = data.size * data.count;
327 if (bytes < data.size || bytes < data.count)
328 lose ((ME, "%s: Record type %d subtype %d too large.",
329 fh_get_filename (r->fh), rec_type, data.subtype));
331 switch (data.subtype)
334 if (!read_machine_int32_info (r, data.size, data.count))
339 if (!read_machine_flt64_info (r, data.size, data.count))
344 case 6: /* ?? Used by SPSS 8.0. */
348 case 11: /* Variable display parameters */
350 const int n_vars = data.count / 3 ;
352 if ( data.count % 3 || n_vars > dict_get_var_cnt(*dict) )
354 msg (MW, _("%s: Invalid subrecord length. "
355 "Record: 7; Subrecord: 11"),
356 fh_get_filename (r->fh));
360 for ( i = 0 ; i < min(n_vars, dict_get_var_cnt(*dict)) ; ++i )
372 assertive_buf_read (r, ¶ms, sizeof(params), 0);
374 v = dict_get_var(*dict, i);
376 v->measure = params.measure;
377 v->display_width = params.width;
378 v->alignment = params.align;
383 case 13: /* SPSS 12.0 Long variable name map */
385 char *buf, *short_name, *save_ptr;
389 buf = xmalloc (bytes + 1);
390 if (!buf_read (r, buf, bytes, 0))
398 for (short_name = strtok_r (buf, "=", &save_ptr), idx = 0;
400 short_name = strtok_r (NULL, "=", &save_ptr), idx++)
402 char *long_name = strtok_r (NULL, "\t", &save_ptr);
405 /* Validate long name. */
406 if (long_name == NULL)
408 msg (MW, _("%s: Trailing garbage in long variable "
410 fh_get_filename (r->fh));
413 if (!var_is_valid_name (long_name, false))
415 msg (MW, _("%s: Long variable mapping to invalid "
416 "variable name `%s'."),
417 fh_get_filename (r->fh), long_name);
421 /* Find variable using short name. */
422 v = dict_lookup_var (*dict, short_name);
425 msg (MW, _("%s: Long variable mapping for "
426 "nonexistent variable %s."),
427 fh_get_filename (r->fh), short_name);
431 /* Identify any duplicates. */
432 if ( compare_var_names(short_name, long_name, 0) &&
433 NULL != dict_lookup_var (*dict, long_name))
434 lose ((ME, _("%s: Duplicate long variable name `%s' "
435 "within system file."),
436 fh_get_filename (r->fh), long_name));
440 Renaming a variable may clear the short
441 name, but we want to retain it, so
442 re-set it explicitly. */
443 dict_rename_var (*dict, v, long_name);
444 var_set_short_name (v, short_name);
446 /* For compatability, make sure dictionary
447 is in long variable name map order. In
448 the common case, this has no effect,
449 because the dictionary and the long
450 variable name map are already in the
452 dict_reorder_var (*dict, v, idx);
461 msg (MW, _("%s: Unrecognized record type 7, subtype %d "
462 "encountered in system file."),
463 fh_get_filename (r->fh), data.subtype);
469 void *x = buf_read (r, NULL, data.size * data.count, 0);
481 assertive_buf_read (r, &filler, sizeof filler, 0);
486 corrupt_msg(MW, _("%s: Unrecognized record type %d."),
487 fh_get_filename (r->fh), rec_type);
492 /* Come here on successful completion. */
497 /* Come here on unsuccessful completion. */
498 sfm_close_reader (r);
502 dict_destroy (*dict);
508 /* Read record type 7, subtype 3. */
510 read_machine_int32_info (struct sfm_reader *r, int size, int count)
517 if (size != sizeof (int32) || count != 8)
518 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
519 "subtype 3. Expected size %d, count 8."),
520 fh_get_filename (r->fh), size, count, sizeof (int32)));
522 assertive_buf_read (r, data, sizeof data, 0);
523 if (r->reverse_endian)
524 for (i = 0; i < 8; i++)
525 bswap_int32 (&data[i]);
529 lose ((ME, _("%s: Floating-point representation in system file is not "
530 "IEEE-754. PSPP cannot convert between floating-point "
532 fh_get_filename (r->fh)));
534 #error Add support for your floating-point format.
537 #ifdef WORDS_BIGENDIAN
542 if (r->reverse_endian)
544 if (file_bigendian ^ (data[6] == 1))
545 lose ((ME, _("%s: File-indicated endianness (%s) does not match "
546 "endianness intuited from file header (%s)."),
547 fh_get_filename (r->fh),
548 file_bigendian ? _("big-endian") : _("little-endian"),
549 data[6] == 1 ? _("big-endian") : (data[6] == 2 ? _("little-endian")
552 /* PORTME: Character representation code. */
553 if (data[7] != 2 && data[7] != 3)
554 lose ((ME, _("%s: File-indicated character representation code (%s) is "
556 fh_get_filename (r->fh),
557 (data[7] == 1 ? "EBCDIC"
558 : (data[7] == 4 ? _("DEC Kanji") : _("Unknown")))));
566 /* Read record type 7, subtype 4. */
568 read_machine_flt64_info (struct sfm_reader *r, int size, int count)
573 if (size != sizeof (flt64) || count != 3)
574 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
575 "subtype 4. Expected size %d, count 8."),
576 fh_get_filename (r->fh), size, count, sizeof (flt64)));
578 assertive_buf_read (r, data, sizeof data, 0);
579 if (r->reverse_endian)
580 for (i = 0; i < 3; i++)
581 bswap_flt64 (&data[i]);
583 if (data[0] != SYSMIS || data[1] != FLT64_MAX
584 || data[2] != second_lowest_flt64)
587 r->highest = data[1];
589 msg (MW, _("%s: File-indicated value is different from internal value "
590 "for at least one of the three system values. SYSMIS: "
591 "indicated %g, expected %g; HIGHEST: %g, %g; LOWEST: "
593 fh_get_filename (r->fh), (double) data[0], (double) SYSMIS,
594 (double) data[1], (double) FLT64_MAX,
595 (double) data[2], (double) second_lowest_flt64);
605 read_header (struct sfm_reader *r,
606 struct dictionary *dict, struct sfm_read_info *info)
608 struct sysfile_header hdr; /* Disk buffer. */
609 char prod_name[sizeof hdr.prod_name + 1]; /* Buffer for product name. */
610 int skip_amt = 0; /* Amount of product name to omit. */
613 /* Read header, check magic. */
614 assertive_buf_read (r, &hdr, sizeof hdr, 0);
615 if (strncmp ("$FL2", hdr.rec_type, 4) != 0)
616 lose ((ME, _("%s: Bad magic. Proper system files begin with "
617 "the four characters `$FL2'. This file will not be read."),
618 fh_get_filename (r->fh)));
620 /* Check eye-category.her string. */
621 memcpy (prod_name, hdr.prod_name, sizeof hdr.prod_name);
622 for (i = 0; i < 60; i++)
623 if (!c_isprint ((unsigned char) prod_name[i]))
625 for (i = 59; i >= 0; i--)
626 if (!c_isgraph ((unsigned char) prod_name[i]))
631 prod_name[60] = '\0';
635 static const char *prefix[N_PREFIXES] =
637 "@(#) SPSS DATA FILE",
643 for (i = 0; i < N_PREFIXES; i++)
644 if (!strncmp (prefix[i], hdr.prod_name, strlen (prefix[i])))
646 skip_amt = strlen (prefix[i]);
651 /* Check endianness. */
652 if (hdr.layout_code == 2)
653 r->reverse_endian = 0;
656 bswap_int32 (&hdr.layout_code);
657 if (hdr.layout_code != 2)
658 lose ((ME, _("%s: File layout code has unexpected value %d. Value "
659 "should be 2, in big-endian or little-endian format."),
660 fh_get_filename (r->fh), hdr.layout_code));
662 r->reverse_endian = 1;
663 bswap_int32 (&hdr.case_size);
664 bswap_int32 (&hdr.compress);
665 bswap_int32 (&hdr.weight_idx);
666 bswap_int32 (&hdr.case_cnt);
667 bswap_flt64 (&hdr.bias);
671 /* Copy basic info and verify correctness. */
672 r->value_cnt = hdr.case_size;
674 /* If value count is rediculous, then force it to -1 (a sentinel value) */
675 if ( r->value_cnt < 0 ||
676 r->value_cnt > (INT_MAX / (int) sizeof (union value) / 2))
679 r->compressed = hdr.compress;
681 r->weight_idx = hdr.weight_idx - 1;
683 r->case_cnt = hdr.case_cnt;
684 if (r->case_cnt < -1 || r->case_cnt > INT_MAX / 2)
686 _("%s: Number of cases in file (%ld) is not between -1 and %d."),
687 fh_get_filename (r->fh), (long) r->case_cnt, INT_MAX / 2));
690 if (r->bias != 100.0)
691 corrupt_msg (MW, _("%s: Compression bias (%g) is not the usual "
693 fh_get_filename (r->fh), r->bias);
695 /* Make a file label only on the condition that the given label is
696 not all spaces or nulls. */
700 for (i = sizeof hdr.file_label - 1; i >= 0; i--)
702 if (!c_isspace ((unsigned char) hdr.file_label[i])
703 && hdr.file_label[i] != 0)
705 char *label = xmalloc (i + 2);
706 memcpy (label, hdr.file_label, i + 1);
708 dict_set_label (dict, label);
719 memcpy (info->creation_date, hdr.creation_date, 9);
720 info->creation_date[9] = 0;
722 memcpy (info->creation_time, hdr.creation_time, 8);
723 info->creation_time[8] = 0;
725 #ifdef WORDS_BIGENDIAN
726 info->big_endian = !r->reverse_endian;
728 info->big_endian = r->reverse_endian;
731 info->compressed = hdr.compress;
733 info->case_cnt = hdr.case_cnt;
735 for (cp = &prod_name[skip_amt]; cp < &prod_name[60]; cp++)
736 if (c_isgraph ((unsigned char) *cp))
738 strcpy (info->product, cp);
747 /* Reads most of the dictionary from file H; also fills in the
748 associated VAR_BY_IDX array. */
750 read_variables (struct sfm_reader *r,
751 struct dictionary *dict, struct variable ***var_by_idx)
755 struct sysfile_variable sv; /* Disk buffer. */
756 int long_string_count = 0; /* # of long string continuation
757 records still expected. */
758 int next_value = 0; /* Index to next `value' structure. */
764 /* Pre-allocate variables. */
765 if (r->value_cnt != -1)
767 *var_by_idx = xnmalloc (r->value_cnt, sizeof **var_by_idx);
768 r->vars = xnmalloc (r->value_cnt, sizeof *r->vars);
772 /* Read in the entry for each variable and use the info to
773 initialize the dictionary. */
777 char name[SHORT_NAME_LEN + 1];
781 if ( r->value_cnt != -1 && i >= r->value_cnt )
784 assertive_buf_read (r, &sv, sizeof sv, 0);
786 if (r->reverse_endian)
788 bswap_int32 (&sv.rec_type);
789 bswap_int32 (&sv.type);
790 bswap_int32 (&sv.has_var_label);
791 bswap_int32 (&sv.n_missing_values);
792 bswap_int32 (&sv.print);
793 bswap_int32 (&sv.write);
796 /* We've come to the end of the variable entries */
797 if (sv.rec_type != 2)
799 buf_unread(r, sizeof sv);
804 if ( -1 == r->value_cnt )
806 *var_by_idx = xnrealloc (*var_by_idx, i + 1, sizeof **var_by_idx);
807 r->vars = xnrealloc (r->vars, i + 1, sizeof *r->vars);
810 /* If there was a long string previously, make sure that the
811 continuations are present; otherwise make sure there aren't
813 if (long_string_count)
816 lose ((ME, _("%s: position %d: String variable does not have "
817 "proper number of continuation records."),
818 fh_get_filename (r->fh), i));
821 r->vars[i].width = -1;
822 (*var_by_idx)[i] = NULL;
826 else if (sv.type == -1)
827 lose ((ME, _("%s: position %d: Superfluous long string continuation "
829 fh_get_filename (r->fh), i));
831 /* Check fields for validity. */
832 if (sv.type < 0 || sv.type > 255)
833 lose ((ME, _("%s: position %d: Bad variable type code %d."),
834 fh_get_filename (r->fh), i, sv.type));
835 if (sv.has_var_label != 0 && sv.has_var_label != 1)
836 lose ((ME, _("%s: position %d: Variable label indicator field is not "
837 "0 or 1."), fh_get_filename (r->fh), i));
838 if (sv.n_missing_values < -3 || sv.n_missing_values > 3
839 || sv.n_missing_values == -1)
840 lose ((ME, _("%s: position %d: Missing value indicator field is not "
841 "-3, -2, 0, 1, 2, or 3."), fh_get_filename (r->fh), i));
843 /* Copy first character of variable name. */
844 if (sv.name[0] == '@' || sv.name[0] == '#')
845 lose ((ME, _("%s: position %d: Variable name begins with invalid "
847 fh_get_filename (r->fh), i));
849 name[0] = sv.name[0];
851 /* Copy remaining characters of variable name. */
852 for (j = 1; j < SHORT_NAME_LEN; j++)
854 int c = (unsigned char) sv.name[j];
863 if ( ! var_is_plausible_name(name, false) )
864 lose ((ME, _("%s: Invalid variable name `%s' within system file."),
865 fh_get_filename (r->fh), name));
867 /* Create variable. */
868 vv = (*var_by_idx)[i] = dict_create_var (dict, name, sv.type);
870 lose ((ME, _("%s: Duplicate variable name `%s' within system file."),
871 fh_get_filename (r->fh), name));
873 var_set_short_name (vv, vv->name);
875 /* Case reading data. */
876 nv = sv.type == 0 ? 1 : DIV_RND_UP (sv.type, sizeof (flt64));
877 long_string_count = nv - 1;
880 /* Get variable label, if any. */
881 if (sv.has_var_label == 1)
886 /* Read length of label. */
887 assertive_buf_read (r, &len, sizeof len, 0);
888 if (r->reverse_endian)
892 if (len < 0 || len > 255)
893 lose ((ME, _("%s: Variable %s indicates variable label of invalid "
895 fh_get_filename (r->fh), vv->name, len));
899 /* Read label into variable structure. */
900 vv->label = buf_read (r, NULL, ROUND_UP (len, sizeof (int32)), len + 1);
901 if (vv->label == NULL)
903 vv->label[len] = '\0';
907 /* Set missing values. */
908 if (sv.n_missing_values != 0)
911 int mv_cnt = abs (sv.n_missing_values);
913 if (vv->width > MAX_SHORT_STRING)
914 lose ((ME, _("%s: Long string variable %s may not have missing "
916 fh_get_filename (r->fh), vv->name));
918 assertive_buf_read (r, mv, sizeof *mv * mv_cnt, 0);
920 if (r->reverse_endian && vv->type == NUMERIC)
921 for (j = 0; j < mv_cnt; j++)
922 bswap_flt64 (&mv[j]);
924 if (sv.n_missing_values > 0)
926 for (j = 0; j < sv.n_missing_values; j++)
927 if (vv->type == NUMERIC)
928 mv_add_num (&vv->miss, mv[j]);
930 mv_add_str (&vv->miss, (char *) &mv[j]);
934 if (vv->type == ALPHA)
935 lose ((ME, _("%s: String variable %s may not have missing "
936 "values specified as a range."),
937 fh_get_filename (r->fh), vv->name));
939 if (mv[0] == r->lowest)
940 mv_add_num_range (&vv->miss, LOWEST, mv[1]);
941 else if (mv[1] == r->highest)
942 mv_add_num_range (&vv->miss, mv[0], HIGHEST);
944 mv_add_num_range (&vv->miss, mv[0], mv[1]);
946 if (sv.n_missing_values == -3)
947 mv_add_num (&vv->miss, mv[2]);
951 if (!parse_format_spec (r, sv.print, &vv->print, vv)
952 || !parse_format_spec (r, sv.write, &vv->write, vv))
955 r->vars[i].width = vv->width;
956 r->vars[i].fv = vv->fv;
960 /* Some consistency checks. */
961 if (long_string_count != 0)
962 lose ((ME, _("%s: Long string continuation records omitted at end of "
964 fh_get_filename (r->fh)));
966 if (next_value != r->value_cnt)
967 corrupt_msg(MW, _("%s: System file header indicates %d variable positions but "
968 "%d were read from file."),
969 fh_get_filename (r->fh), r->value_cnt, next_value);
978 /* Translates the format spec from sysfile format to internal
981 parse_format_spec (struct sfm_reader *r, int32 s,
982 struct fmt_spec *f, const struct variable *v)
984 f->type = translate_fmt ((s >> 16) & 0xff);
986 lose ((ME, _("%s: Bad format specifier byte (%d)."),
987 fh_get_filename (r->fh), (s >> 16) & 0xff));
988 f->w = (s >> 8) & 0xff;
991 if ((v->type == ALPHA) ^ ((formats[f->type].cat & FCAT_STRING) != 0))
992 lose ((ME, _("%s: %s variable %s has %s format specifier %s."),
993 fh_get_filename (r->fh),
994 v->type == ALPHA ? _("String") : _("Numeric"),
996 formats[f->type].cat & FCAT_STRING ? _("string") : _("numeric"),
997 formats[f->type].name));
999 if (!check_output_specifier (f, false)
1000 || !check_specifier_width (f, v->width, false))
1002 msg (ME, _("%s variable %s has invalid format specifier %s."),
1003 v->type == NUMERIC ? _("Numeric") : _("String"),
1004 v->name, fmt_to_string (f));
1005 *f = v->type == NUMERIC ? f8_2 : make_output_format (FMT_A, v->width, 0);
1013 /* Reads value labels from sysfile H and inserts them into the
1014 associated dictionary. */
1016 read_value_labels (struct sfm_reader *r,
1017 struct dictionary *dict, struct variable **var_by_idx)
1021 char raw_value[8]; /* Value as uninterpreted bytes. */
1022 union value value; /* Value. */
1023 char *label; /* Null-terminated label string. */
1026 struct label *labels = NULL;
1027 int32 n_labels; /* Number of labels. */
1029 struct variable **var = NULL; /* Associated variables. */
1030 int32 n_vars; /* Number of associated variables. */
1034 /* First step: read the contents of the type 3 record and record its
1035 contents. Note that we can't do much with the data since we
1036 don't know yet whether it is of numeric or string type. */
1038 /* Read number of labels. */
1039 assertive_buf_read (r, &n_labels, sizeof n_labels, 0);
1040 if (r->reverse_endian)
1041 bswap_int32 (&n_labels);
1043 if ( n_labels >= ((int32) ~0) / sizeof *labels)
1045 corrupt_msg(MW, _("%s: Invalid number of labels: %d. Ignoring labels."),
1046 fh_get_filename (r->fh), n_labels);
1050 /* Allocate memory. */
1051 labels = xcalloc (n_labels, sizeof *labels);
1052 for (i = 0; i < n_labels; i++)
1053 labels[i].label = NULL;
1055 /* Read each value/label tuple into labels[]. */
1056 for (i = 0; i < n_labels; i++)
1058 struct label *label = labels + i;
1059 unsigned char label_len;
1063 assertive_buf_read (r, label->raw_value, sizeof label->raw_value, 0);
1065 /* Read label length. */
1066 assertive_buf_read (r, &label_len, sizeof label_len, 0);
1067 padded_len = ROUND_UP (label_len + 1, sizeof (flt64));
1069 /* Read label, padding. */
1070 label->label = xmalloc (padded_len + 1);
1071 assertive_buf_read (r, label->label, padded_len - 1, 0);
1072 label->label[label_len] = 0;
1075 /* Second step: Read the type 4 record that has the list of
1076 variables to which the value labels are to be applied. */
1078 /* Read record type of type 4 record. */
1082 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
1083 if (r->reverse_endian)
1084 bswap_int32 (&rec_type);
1087 lose ((ME, _("%s: Variable index record (type 4) does not immediately "
1088 "follow value label record (type 3) as it should."),
1089 fh_get_filename (r->fh)));
1092 /* Read number of variables associated with value label from type 4
1094 assertive_buf_read (r, &n_vars, sizeof n_vars, 0);
1095 if (r->reverse_endian)
1096 bswap_int32 (&n_vars);
1097 if (n_vars < 1 || n_vars > dict_get_var_cnt (dict))
1098 lose ((ME, _("%s: Number of variables associated with a value label (%d) "
1099 "is not between 1 and the number of variables (%d)."),
1100 fh_get_filename (r->fh), n_vars, dict_get_var_cnt (dict)));
1102 /* Read the list of variables. */
1103 var = xnmalloc (n_vars, sizeof *var);
1104 for (i = 0; i < n_vars; i++)
1109 /* Read variable index, check range. */
1110 assertive_buf_read (r, &var_idx, sizeof var_idx, 0);
1111 if (r->reverse_endian)
1112 bswap_int32 (&var_idx);
1113 if (var_idx < 1 || var_idx > r->value_cnt)
1114 lose ((ME, _("%s: Variable index associated with value label (%d) is "
1115 "not between 1 and the number of values (%d)."),
1116 fh_get_filename (r->fh), var_idx, r->value_cnt));
1118 /* Make sure it's a real variable. */
1119 v = var_by_idx[var_idx - 1];
1121 lose ((ME, _("%s: Variable index associated with value label (%d) "
1122 "refers to a continuation of a string variable, not to "
1123 "an actual variable."),
1124 fh_get_filename (r->fh), var_idx));
1125 if (v->type == ALPHA && v->width > MAX_SHORT_STRING)
1126 lose ((ME, _("%s: Value labels are not allowed on long string "
1128 fh_get_filename (r->fh), v->name));
1130 /* Add it to the list of variables. */
1134 /* Type check the variables. */
1135 for (i = 1; i < n_vars; i++)
1136 if (var[i]->type != var[0]->type)
1137 lose ((ME, _("%s: Variables associated with value label are not all of "
1138 "identical type. Variable %s has %s type, but variable "
1140 fh_get_filename (r->fh),
1141 var[0]->name, var[0]->type == ALPHA ? _("string") : _("numeric"),
1142 var[i]->name, var[i]->type == ALPHA ? _("string") : _("numeric")));
1144 /* Fill in labels[].value, now that we know the desired type. */
1145 for (i = 0; i < n_labels; i++)
1147 struct label *label = labels + i;
1149 if (var[0]->type == ALPHA)
1151 const int copy_len = min (sizeof label->raw_value,
1152 sizeof label->label);
1153 memcpy (label->value.s, label->raw_value, copy_len);
1156 assert (sizeof f == sizeof label->raw_value);
1157 memcpy (&f, label->raw_value, sizeof f);
1158 if (r->reverse_endian)
1164 /* Assign the value_label's to each variable. */
1165 for (i = 0; i < n_vars; i++)
1167 struct variable *v = var[i];
1170 /* Add each label to the variable. */
1171 for (j = 0; j < n_labels; j++)
1173 struct label *label = labels + j;
1174 if (!val_labs_replace (v->val_labs, label->value, label->label))
1177 if (var[0]->type == NUMERIC)
1178 msg (MW, _("%s: File contains duplicate label for value %g for "
1180 fh_get_filename (r->fh), label->value.f, v->name);
1182 msg (MW, _("%s: File contains duplicate label for value `%.*s' "
1183 "for variable %s."),
1184 fh_get_filename (r->fh), v->width, label->value.s, v->name);
1188 for (i = 0; i < n_labels; i++)
1189 free (labels[i].label);
1197 for (i = 0; i < n_labels; i++)
1198 free (labels[i].label);
1205 /* Reads BYTE_CNT bytes from the file represented by H. If BUF is
1206 non-NULL, uses that as the buffer; otherwise allocates at least
1207 MIN_ALLOC bytes. Returns a pointer to the buffer on success, NULL
1210 buf_read (struct sfm_reader *r, void *buf, size_t byte_cnt, size_t min_alloc)
1214 if (buf == NULL && byte_cnt > 0 )
1215 buf = xmalloc (max (byte_cnt, min_alloc));
1217 if ( byte_cnt == 0 )
1221 if (1 != fread (buf, byte_cnt, 1, r->file))
1223 if (ferror (r->file))
1224 msg (ME, _("%s: Reading system file: %s."),
1225 fh_get_filename (r->fh), strerror (errno));
1227 corrupt_msg (ME, _("%s: Unexpected end of file."),
1228 fh_get_filename (r->fh));
1235 /* Winds the reader BYTE_CNT bytes back in the reader stream. */
1237 buf_unread(struct sfm_reader *r, size_t byte_cnt)
1239 assert(byte_cnt > 0);
1241 if ( 0 != fseek(r->file, -byte_cnt, SEEK_CUR))
1243 msg (ME, _("%s: Seeking system file: %s."),
1244 fh_get_filename (r->fh), strerror (errno));
1248 /* Reads a document record, type 6, from system file R, and sets up
1249 the documents and n_documents fields in the associated
1252 read_documents (struct sfm_reader *r, struct dictionary *dict)
1257 if (dict_get_documents (dict) != NULL)
1258 lose ((ME, _("%s: System file contains multiple "
1259 "type 6 (document) records."),
1260 fh_get_filename (r->fh)));
1262 assertive_buf_read (r, &line_cnt, sizeof line_cnt, 0);
1264 lose ((ME, _("%s: Number of document lines (%ld) "
1265 "must be greater than 0."),
1266 fh_get_filename (r->fh), (long) line_cnt));
1268 documents = buf_read (r, NULL, 80 * line_cnt, line_cnt * 80 + 1);
1269 /* FIXME? Run through asciify. */
1270 if (documents == NULL)
1272 documents[80 * line_cnt] = '\0';
1273 dict_set_documents (dict, documents);
1283 /* Reads compressed data into H->BUF and sets other pointers
1284 appropriately. Returns nonzero only if both no errors occur and
1287 buffer_input (struct sfm_reader *r)
1294 r->buf = xnmalloc (128, sizeof *r->buf);
1295 amt = fread (r->buf, sizeof *r->buf, 128, r->file);
1296 if (ferror (r->file))
1298 msg (ME, _("%s: Error reading file: %s."),
1299 fh_get_filename (r->fh), strerror (errno));
1304 r->end = &r->buf[amt];
1308 /* Reads a single case consisting of compressed data from system
1309 file H into the array BUF[] according to reader R, and
1310 returns nonzero only if successful. */
1311 /* Data in system files is compressed in this manner. Data
1312 values are grouped into sets of eight ("octets"). Each value
1313 in an octet has one instruction byte that are output together.
1314 Each instruction byte gives a value for that byte or indicates
1315 that the value can be found following the instructions. */
1317 read_compressed_data (struct sfm_reader *r, flt64 *buf)
1319 const unsigned char *p_end = r->x + sizeof (flt64);
1320 unsigned char *p = r->y;
1322 const flt64 *buf_beg = buf;
1323 const flt64 *buf_end = &buf[r->value_cnt];
1327 for (; p < p_end; p++){
1331 /* Code 0 is ignored. */
1334 /* Code 252 is end of file. */
1337 lose ((ME, _("%s: Compressed data is corrupted. Data ends "
1338 "in partial case."),
1339 fh_get_filename (r->fh)));
1341 /* Code 253 indicates that the value is stored explicitly
1342 following the instruction bytes. */
1343 if (r->ptr == NULL || r->ptr >= r->end)
1344 if (!buffer_input (r))
1345 lose ((ME, _("%s: Unexpected end of file."),
1346 fh_get_filename (r->fh)));
1347 memcpy (buf++, r->ptr++, sizeof *buf);
1352 /* Code 254 indicates a string that is all blanks. */
1353 memset (buf++, ' ', sizeof *buf);
1358 /* Code 255 indicates the system-missing value. */
1360 if (r->reverse_endian)
1367 /* Codes 1 through 251 inclusive are taken to indicate a
1368 value of (BYTE - BIAS), where BYTE is the byte's value
1369 and BIAS is the compression bias (generally 100.0). */
1370 *buf = *p - r->bias;
1371 if (r->reverse_endian)
1379 /* We have reached the end of this instruction octet. Read
1381 if (r->ptr == NULL || r->ptr >= r->end)
1383 if (!buffer_input (r))
1386 lose ((ME, _("%s: Unexpected end of file."),
1387 fh_get_filename (r->fh)));
1392 memcpy (r->x, r->ptr++, sizeof *buf);
1399 /* We have filled up an entire record. Update state and return
1410 /* Reads one case from READER's file into C. Returns nonzero
1411 only if successful. */
1413 sfm_read_case (struct sfm_reader *r, struct ccase *c)
1418 if (!r->compressed && sizeof (flt64) == sizeof (double))
1420 /* Fast path: external and internal representations are the
1421 same, except possibly for endianness or SYSMIS. Read
1422 directly into the case's buffer, then fix up any minor
1423 details as needed. */
1424 if (!fread_ok (r, case_data_all_rw (c),
1425 sizeof (union value) * r->value_cnt))
1428 /* Fix up endianness if needed. */
1429 if (r->reverse_endian)
1433 for (i = 0; i < r->value_cnt; i++)
1434 if (r->vars[i].width == 0)
1435 bswap_flt64 (&case_data_rw (c, r->vars[i].fv)->f);
1438 /* Fix up SYSMIS values if needed.
1439 I don't think this will ever actually kick in, but it
1441 if (r->sysmis != SYSMIS)
1445 for (i = 0; i < r->value_cnt; i++)
1446 if (r->vars[i].width == 0 && case_num (c, i) == r->sysmis)
1447 case_data_rw (c, r->vars[i].fv)->f = SYSMIS;
1452 /* Slow path: internal and external representations differ.
1453 Read into a bounce buffer, then copy to C. */
1460 bounce_size = sizeof *bounce * r->value_cnt;
1461 bounce = bounce_cur = local_alloc (bounce_size);
1464 read_ok = fread_ok (r, bounce, bounce_size);
1466 read_ok = read_compressed_data (r, bounce);
1469 local_free (bounce);
1473 for (i = 0; i < r->value_cnt; i++)
1475 struct sfm_var *v = &r->vars[i];
1479 flt64 f = *bounce_cur++;
1480 if (r->reverse_endian)
1482 case_data_rw (c, v->fv)->f = f == r->sysmis ? SYSMIS : f;
1484 else if (v->width != -1)
1486 memcpy (case_data_rw (c, v->fv)->s, bounce_cur, v->width);
1487 bounce_cur += DIV_RND_UP (v->width, sizeof (flt64));
1491 local_free (bounce);
1497 fread_ok (struct sfm_reader *r, void *buffer, size_t byte_cnt)
1499 size_t read_bytes = fread (buffer, 1, byte_cnt, r->file);
1501 if (read_bytes == byte_cnt)
1505 if (ferror (r->file))
1507 msg (ME, _("%s: Reading system file: %s."),
1508 fh_get_filename (r->fh), strerror (errno));
1511 else if (read_bytes != 0)
1513 msg (ME, _("%s: Partial record at end of system file."),
1514 fh_get_filename (r->fh));
1521 /* Returns true if an I/O error has occurred on READER, false
1524 sfm_read_error (const struct sfm_reader *reader)
1529 /* Returns true if FILE is an SPSS system file,
1532 sfm_detect (FILE *file)
1534 struct sysfile_header hdr;
1536 if (fread (&hdr, sizeof hdr, 1, file) != 1)
1538 if (strncmp ("$FL2", hdr.rec_type, 4))