1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
31 #include "dictionary.h"
33 #include "file-handle.h"
40 #include "value-labels.h"
44 #include "debug-print.h"
46 /* System file reader. */
49 struct file_handle *fh; /* File handle. */
50 FILE *file; /* File stream. */
52 int reverse_endian; /* 1=file has endianness opposite us. */
53 int fix_specials; /* 1=SYSMIS/HIGHEST/LOWEST differs from us. */
54 int value_cnt; /* Number of `union values's per case. */
55 long case_cnt; /* Number of cases, -1 if unknown. */
56 int compressed; /* 1=compressed, 0=not compressed. */
57 double bias; /* Compression bias, usually 100.0. */
58 int weight_idx; /* 0-based index of weighting variable, or -1. */
61 struct sfm_var *vars; /* Variables. */
62 size_t var_cnt; /* Number of variables. */
64 /* File's special constants. */
69 /* Decompression buffer. */
70 flt64 *buf; /* Buffer data. */
71 flt64 *ptr; /* Current location in buffer. */
72 flt64 *end; /* End of buffer data. */
74 /* Compression instruction octet. */
75 unsigned char x[8]; /* Current instruction octet. */
76 unsigned char *y; /* Location in current instruction octet. */
79 /* A variable in a system file. */
82 int width; /* 0=numeric, otherwise string width. */
83 int fv; /* Index into case. */
88 /* Swap bytes *A and *B. */
90 bswap (unsigned char *a, unsigned char *b)
97 /* bswap_int32(): Reverse the byte order of 32-bit integer *X. */
99 bswap_int32 (int32 *x_)
101 unsigned char *x = (unsigned char *) x_;
102 bswap (x + 0, x + 3);
103 bswap (x + 1, x + 2);
106 /* Reverse the byte order of 64-bit floating point *X. */
108 bswap_flt64 (flt64 *x_)
110 unsigned char *x = (unsigned char *) x_;
111 bswap (x + 0, x + 7);
112 bswap (x + 1, x + 6);
113 bswap (x + 2, x + 5);
114 bswap (x + 3, x + 4);
118 corrupt_msg (int class, const char *format,...)
119 PRINTF_FORMAT (2, 3);
121 /* Displays a corrupt sysfile error. */
123 corrupt_msg (int class, const char *format,...)
130 va_start (args, format);
131 vsnprintf (buf, 1024, format, args);
139 getl_location (&e.where.filename, &e.where.line_number);
140 e.title = _("corrupt system file: ");
147 /* Closes a system file after we're done with it. */
149 sfm_close_reader (struct sfm_reader *r)
155 fh_close (r->fh, "system file", "rs");
156 if (fn_close (handle_get_filename (r->fh), r->file) == EOF)
157 msg (ME, _("%s: Closing system file: %s."),
158 handle_get_filename (r->fh), strerror (errno));
164 /* Dictionary reader. */
166 static void *buf_read (struct sfm_reader *, void *buf, size_t byte_cnt,
169 static int read_header (struct sfm_reader *,
170 struct dictionary *, struct sfm_read_info *);
171 static int parse_format_spec (struct sfm_reader *, int32,
172 struct fmt_spec *, struct variable *);
173 static int read_value_labels (struct sfm_reader *, struct dictionary *,
174 struct variable **var_by_idx);
175 static int read_variables (struct sfm_reader *,
176 struct dictionary *, struct variable ***var_by_idx);
177 static int read_machine_int32_info (struct sfm_reader *, int size, int count);
178 static int read_machine_flt64_info (struct sfm_reader *, int size, int count);
179 static int read_documents (struct sfm_reader *, struct dictionary *);
181 static int fread_ok (struct sfm_reader *, void *, size_t);
183 /* Displays the message X with corrupt_msg, then jumps to the error
191 /* Calls buf_read with the specified arguments, and jumps to
192 error if the read fails. */
193 #define assertive_buf_read(a,b,c,d) \
195 if (!buf_read (a,b,c,d)) \
199 /* Opens the system file designated by file handle FH for
200 reading. Reads the system file's dictionary into *DICT.
201 If INFO is non-null, then it receives additional info about the
204 sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
205 struct sfm_read_info *info)
207 struct sfm_reader *r = NULL;
208 struct variable **var_by_idx = NULL;
210 *dict = dict_create ();
211 if (!fh_open (fh, "system file", "rs"))
214 /* Create and initialize reader. */
215 r = xmalloc (sizeof *r);
217 r->file = fn_open (handle_get_filename (fh), "rb");
219 r->reverse_endian = 0;
230 r->sysmis = -FLT64_MAX;
231 r->highest = FLT64_MAX;
232 r->lowest = second_lowest_flt64;
234 r->buf = r->ptr = r->end = NULL;
235 r->y = r->x + sizeof r->x;
237 /* Check that file open succeeded. */
240 msg (ME, _("An error occurred while opening \"%s\" for reading "
241 "as a system file: %s."),
242 handle_get_filename (r->fh), strerror (errno));
247 /* Read header and variables. */
248 if (!read_header (r, *dict, info) || !read_variables (r, *dict, &var_by_idx))
251 /* Handle weighting. */
252 if (r->weight_idx != -1)
254 struct variable *weight_var = var_by_idx[r->weight_idx];
256 if (weight_var == NULL)
258 _("%s: Weighting variable may not be a continuation of "
259 "a long string variable."), handle_get_filename (fh)));
260 else if (weight_var->type == ALPHA)
261 lose ((ME, _("%s: Weighting variable may not be a string variable."),
262 handle_get_filename (fh)));
264 dict_set_weight (*dict, weight_var);
267 dict_set_weight (*dict, NULL);
269 /* Read records of types 3, 4, 6, and 7. */
274 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
275 if (r->reverse_endian)
276 bswap_int32 (&rec_type);
281 if (!read_value_labels (r, *dict, var_by_idx))
286 lose ((ME, _("%s: Orphaned variable index record (type 4). Type 4 "
287 "records must always immediately follow type 3 "
289 handle_get_filename (r->fh)));
292 if (!read_documents (r, *dict))
308 assertive_buf_read (r, &data, sizeof data, 0);
309 if (r->reverse_endian)
311 bswap_int32 (&data.subtype);
312 bswap_int32 (&data.size);
313 bswap_int32 (&data.count);
316 switch (data.subtype)
319 if (!read_machine_int32_info (r, data.size, data.count))
324 if (!read_machine_flt64_info (r, data.size, data.count))
330 case 11: /* ?? Used by SPSS 8.0. */
335 msg (MW, _("%s: Unrecognized record type 7, subtype %d "
336 "encountered in system file."),
337 handle_get_filename (r->fh), data.subtype);
343 void *x = buf_read (r, NULL, data.size * data.count, 0);
355 assertive_buf_read (r, &filler, sizeof filler, 0);
360 lose ((ME, _("%s: Unrecognized record type %d."),
361 handle_get_filename (r->fh), rec_type));
366 /* Come here on successful completion. */
371 /* Come here on unsuccessful completion. */
372 sfm_close_reader (r);
376 dict_destroy (*dict);
382 /* Read record type 7, subtype 3. */
384 read_machine_int32_info (struct sfm_reader *r, int size, int count)
391 if (size != sizeof (int32) || count != 8)
392 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
393 "subtype 3. Expected size %d, count 8."),
394 handle_get_filename (r->fh), size, count, sizeof (int32)));
396 assertive_buf_read (r, data, sizeof data, 0);
397 if (r->reverse_endian)
398 for (i = 0; i < 8; i++)
399 bswap_int32 (&data[i]);
403 lose ((ME, _("%s: Floating-point representation in system file is not "
404 "IEEE-754. PSPP cannot convert between floating-point "
406 handle_get_filename (r->fh)));
408 #error Add support for your floating-point format.
411 #ifdef WORDS_BIGENDIAN
416 if (r->reverse_endian)
418 if (file_bigendian ^ (data[6] == 1))
419 lose ((ME, _("%s: File-indicated endianness (%s) does not match "
420 "endianness intuited from file header (%s)."),
421 handle_get_filename (r->fh),
422 file_bigendian ? _("big-endian") : _("little-endian"),
423 data[6] == 1 ? _("big-endian") : (data[6] == 2 ? _("little-endian")
426 /* PORTME: Character representation code. */
427 if (data[7] != 2 && data[7] != 3)
428 lose ((ME, _("%s: File-indicated character representation code (%s) is "
430 handle_get_filename (r->fh),
431 (data[7] == 1 ? "EBCDIC"
432 : (data[7] == 4 ? _("DEC Kanji") : _("Unknown")))));
440 /* Read record type 7, subtype 4. */
442 read_machine_flt64_info (struct sfm_reader *r, int size, int count)
447 if (size != sizeof (flt64) || count != 3)
448 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
449 "subtype 4. Expected size %d, count 8."),
450 handle_get_filename (r->fh), size, count, sizeof (flt64)));
452 assertive_buf_read (r, data, sizeof data, 0);
453 if (r->reverse_endian)
454 for (i = 0; i < 3; i++)
455 bswap_flt64 (&data[i]);
457 if (data[0] != SYSMIS || data[1] != FLT64_MAX
458 || data[2] != second_lowest_flt64)
461 r->highest = data[1];
463 msg (MW, _("%s: File-indicated value is different from internal value "
464 "for at least one of the three system values. SYSMIS: "
465 "indicated %g, expected %g; HIGHEST: %g, %g; LOWEST: "
467 handle_get_filename (r->fh), (double) data[0], (double) SYSMIS,
468 (double) data[1], (double) FLT64_MAX,
469 (double) data[2], (double) second_lowest_flt64);
479 read_header (struct sfm_reader *r,
480 struct dictionary *dict, struct sfm_read_info *info)
482 struct sysfile_header hdr; /* Disk buffer. */
483 char prod_name[sizeof hdr.prod_name + 1]; /* Buffer for product name. */
484 int skip_amt = 0; /* Amount of product name to omit. */
487 /* Read header, check magic. */
488 assertive_buf_read (r, &hdr, sizeof hdr, 0);
489 if (strncmp ("$FL2", hdr.rec_type, 4) != 0)
490 lose ((ME, _("%s: Bad magic. Proper system files begin with "
491 "the four characters `$FL2'. This file will not be read."),
492 handle_get_filename (r->fh)));
494 /* Check eye-catcher string. */
495 memcpy (prod_name, hdr.prod_name, sizeof hdr.prod_name);
496 for (i = 0; i < 60; i++)
497 if (!isprint ((unsigned char) prod_name[i]))
499 for (i = 59; i >= 0; i--)
500 if (!isgraph ((unsigned char) prod_name[i]))
505 prod_name[60] = '\0';
509 static const char *prefix[N_PREFIXES] =
511 "@(#) SPSS DATA FILE",
517 for (i = 0; i < N_PREFIXES; i++)
518 if (!strncmp (prefix[i], hdr.prod_name, strlen (prefix[i])))
520 skip_amt = strlen (prefix[i]);
525 /* Check endianness. */
526 if (hdr.layout_code == 2)
527 r->reverse_endian = 0;
530 bswap_int32 (&hdr.layout_code);
531 if (hdr.layout_code != 2)
532 lose ((ME, _("%s: File layout code has unexpected value %d. Value "
533 "should be 2, in big-endian or little-endian format."),
534 handle_get_filename (r->fh), hdr.layout_code));
536 r->reverse_endian = 1;
537 bswap_int32 (&hdr.case_size);
538 bswap_int32 (&hdr.compress);
539 bswap_int32 (&hdr.weight_idx);
540 bswap_int32 (&hdr.case_cnt);
541 bswap_flt64 (&hdr.bias);
544 /* Copy basic info and verify correctness. */
545 r->value_cnt = hdr.case_size;
546 if (r->value_cnt <= 0
547 || r->value_cnt > (INT_MAX / (int) sizeof (union value) / 2))
548 lose ((ME, _("%s: Number of elements per case (%d) is not between 1 "
550 handle_get_filename (r->fh), r->value_cnt,
551 INT_MAX / sizeof (union value) / 2));
553 r->compressed = hdr.compress;
555 r->weight_idx = hdr.weight_idx - 1;
556 if (hdr.weight_idx < 0 || hdr.weight_idx > r->value_cnt)
557 lose ((ME, _("%s: Index of weighting variable (%d) is not between 0 "
558 "and number of elements per case (%d)."),
559 handle_get_filename (r->fh), hdr.weight_idx, r->value_cnt));
561 r->case_cnt = hdr.case_cnt;
562 if (r->case_cnt < -1 || r->case_cnt > INT_MAX / 2)
564 _("%s: Number of cases in file (%ld) is not between -1 and %d."),
565 handle_get_filename (r->fh), (long) r->case_cnt, INT_MAX / 2));
568 if (r->bias != 100.0)
569 corrupt_msg (MW, _("%s: Compression bias (%g) is not the usual "
571 handle_get_filename (r->fh), r->bias);
573 /* Make a file label only on the condition that the given label is
574 not all spaces or nulls. */
578 for (i = sizeof hdr.file_label - 1; i >= 0; i--)
579 if (!isspace ((unsigned char) hdr.file_label[i])
580 && hdr.file_label[i] != 0)
582 char *label = xmalloc (i + 2);
583 memcpy (label, hdr.file_label, i + 1);
585 dict_set_label (dict, label);
595 memcpy (info->creation_date, hdr.creation_date, 9);
596 info->creation_date[9] = 0;
598 memcpy (info->creation_time, hdr.creation_time, 8);
599 info->creation_time[8] = 0;
601 #ifdef WORDS_BIGENDIAN
602 info->big_endian = !r->reverse_endian;
604 info->big_endian = r->reverse_endian;
607 info->compressed = hdr.compress;
609 info->case_cnt = hdr.case_cnt;
611 for (cp = &prod_name[skip_amt]; cp < &prod_name[60]; cp++)
612 if (isgraph ((unsigned char) *cp))
614 strcpy (info->product, cp);
623 /* Reads most of the dictionary from file H; also fills in the
624 associated VAR_BY_IDX array. */
626 read_variables (struct sfm_reader *r,
627 struct dictionary *dict, struct variable ***var_by_idx)
631 struct sysfile_variable sv; /* Disk buffer. */
632 int long_string_count = 0; /* # of long string continuation
633 records still expected. */
634 int next_value = 0; /* Index to next `value' structure. */
637 /* Allocate variables. */
638 *var_by_idx = xmalloc (sizeof **var_by_idx * r->value_cnt);
640 /* Read in the entry for each variable and use the info to
641 initialize the dictionary. */
642 for (i = 0; i < r->value_cnt; i++)
649 assertive_buf_read (r, &sv, sizeof sv, 0);
651 if (r->reverse_endian)
653 bswap_int32 (&sv.rec_type);
654 bswap_int32 (&sv.type);
655 bswap_int32 (&sv.has_var_label);
656 bswap_int32 (&sv.n_missing_values);
657 bswap_int32 (&sv.print);
658 bswap_int32 (&sv.write);
661 if (sv.rec_type != 2)
662 lose ((ME, _("%s: position %d: Bad record type (%d); "
663 "the expected value was 2."),
664 handle_get_filename (r->fh), i, sv.rec_type));
666 /* If there was a long string previously, make sure that the
667 continuations are present; otherwise make sure there aren't
669 if (long_string_count)
672 lose ((ME, _("%s: position %d: String variable does not have "
673 "proper number of continuation records."),
674 handle_get_filename (r->fh), i));
676 (*var_by_idx)[i] = NULL;
680 else if (sv.type == -1)
681 lose ((ME, _("%s: position %d: Superfluous long string continuation "
683 handle_get_filename (r->fh), i));
685 /* Check fields for validity. */
686 if (sv.type < 0 || sv.type > 255)
687 lose ((ME, _("%s: position %d: Bad variable type code %d."),
688 handle_get_filename (r->fh), i, sv.type));
689 if (sv.has_var_label != 0 && sv.has_var_label != 1)
690 lose ((ME, _("%s: position %d: Variable label indicator field is not "
691 "0 or 1."), handle_get_filename (r->fh), i));
692 if (sv.n_missing_values < -3 || sv.n_missing_values > 3
693 || sv.n_missing_values == -1)
694 lose ((ME, _("%s: position %d: Missing value indicator field is not "
695 "-3, -2, 0, 1, 2, or 3."), handle_get_filename (r->fh), i));
697 /* Copy first character of variable name. */
698 if (!isalpha ((unsigned char) sv.name[0])
699 && sv.name[0] != '@' && sv.name[0] != '#')
700 lose ((ME, _("%s: position %d: Variable name begins with invalid "
702 handle_get_filename (r->fh), i));
703 if (islower ((unsigned char) sv.name[0]))
704 msg (MW, _("%s: position %d: Variable name begins with lowercase letter "
706 handle_get_filename (r->fh), i, sv.name[0]);
707 if (sv.name[0] == '#')
708 msg (MW, _("%s: position %d: Variable name begins with octothorpe "
709 "(`#'). Scratch variables should not appear in system "
711 handle_get_filename (r->fh), i);
712 name[0] = toupper ((unsigned char) (sv.name[0]));
714 /* Copy remaining characters of variable name. */
715 for (j = 1; j < 8; j++)
717 int c = (unsigned char) sv.name[j];
721 else if (islower (c))
723 msg (MW, _("%s: position %d: Variable name character %d is "
724 "lowercase letter %c."),
725 handle_get_filename (r->fh), i, j + 1, sv.name[j]);
726 name[j] = toupper ((unsigned char) (c));
728 else if (isalnum (c) || c == '.' || c == '@'
729 || c == '#' || c == '$' || c == '_')
732 lose ((ME, _("%s: position %d: character `\\%03o' (%c) is not valid in a "
734 handle_get_filename (r->fh), i, c, c));
738 /* Create variable. */
739 vv = (*var_by_idx)[i] = dict_create_var (dict, name, sv.type);
741 lose ((ME, _("%s: Duplicate variable name `%s' within system file."),
742 handle_get_filename (r->fh), name));
744 /* Case reading data. */
745 nv = sv.type == 0 ? 1 : DIV_RND_UP (sv.type, sizeof (flt64));
746 long_string_count = nv - 1;
749 /* Get variable label, if any. */
750 if (sv.has_var_label == 1)
755 /* Read length of label. */
756 assertive_buf_read (r, &len, sizeof len, 0);
757 if (r->reverse_endian)
761 if (len < 0 || len > 255)
762 lose ((ME, _("%s: Variable %s indicates variable label of invalid "
764 handle_get_filename (r->fh), vv->name, len));
766 /* Read label into variable structure. */
767 vv->label = buf_read (r, NULL, ROUND_UP (len, sizeof (int32)), len + 1);
768 if (vv->label == NULL)
770 vv->label[len] = '\0';
773 /* Set missing values. */
774 if (sv.n_missing_values != 0)
778 if (vv->width > MAX_SHORT_STRING)
779 lose ((ME, _("%s: Long string variable %s may not have missing "
781 handle_get_filename (r->fh), vv->name));
783 assertive_buf_read (r, mv, sizeof *mv * abs (sv.n_missing_values), 0);
785 if (r->reverse_endian && vv->type == NUMERIC)
786 for (j = 0; j < abs (sv.n_missing_values); j++)
787 bswap_flt64 (&mv[j]);
789 if (sv.n_missing_values > 0)
791 vv->miss_type = sv.n_missing_values;
792 if (vv->type == NUMERIC)
793 for (j = 0; j < sv.n_missing_values; j++)
794 vv->missing[j].f = mv[j];
796 for (j = 0; j < sv.n_missing_values; j++)
797 memcpy (vv->missing[j].s, &mv[j], vv->width);
803 if (vv->type == ALPHA)
804 lose ((ME, _("%s: String variable %s may not have missing "
805 "values specified as a range."),
806 handle_get_filename (r->fh), vv->name));
808 if (mv[0] == r->lowest)
810 vv->miss_type = MISSING_LOW;
811 vv->missing[x++].f = mv[1];
813 else if (mv[1] == r->highest)
815 vv->miss_type = MISSING_HIGH;
816 vv->missing[x++].f = mv[0];
820 vv->miss_type = MISSING_RANGE;
821 vv->missing[x++].f = mv[0];
822 vv->missing[x++].f = mv[1];
825 if (sv.n_missing_values == -3)
828 vv->missing[x++].f = mv[2];
833 vv->miss_type = MISSING_NONE;
835 if (!parse_format_spec (r, sv.print, &vv->print, vv)
836 || !parse_format_spec (r, sv.write, &vv->write, vv))
839 /* Add variable to list. */
840 if (var_cap >= r->var_cnt)
842 var_cap = 2 + r->var_cnt * 2;
843 r->vars = xrealloc (r->vars, var_cap * sizeof *r->vars);
845 r->vars[r->var_cnt].width = vv->width;
846 r->vars[r->var_cnt].fv = vv->fv;
850 /* Some consistency checks. */
851 if (long_string_count != 0)
852 lose ((ME, _("%s: Long string continuation records omitted at end of "
854 handle_get_filename (r->fh)));
855 if (next_value != r->value_cnt)
856 lose ((ME, _("%s: System file header indicates %d variable positions but "
857 "%d were read from file."),
858 handle_get_filename (r->fh), r->value_cnt, next_value));
866 /* Translates the format spec from sysfile format to internal
869 parse_format_spec (struct sfm_reader *r, int32 s, struct fmt_spec *v, struct variable *vv)
871 v->type = translate_fmt ((s >> 16) & 0xff);
873 lose ((ME, _("%s: Bad format specifier byte (%d)."),
874 handle_get_filename (r->fh), (s >> 16) & 0xff));
875 v->w = (s >> 8) & 0xff;
878 /* FIXME? Should verify the resulting specifier more thoroughly. */
881 lose ((ME, _("%s: Bad format specifier byte (%d)."),
882 handle_get_filename (r->fh), (s >> 16) & 0xff));
883 if ((vv->type == ALPHA) ^ ((formats[v->type].cat & FCAT_STRING) != 0))
884 lose ((ME, _("%s: %s variable %s has %s format specifier %s."),
885 handle_get_filename (r->fh),
886 vv->type == ALPHA ? _("String") : _("Numeric"),
888 formats[v->type].cat & FCAT_STRING ? _("string") : _("numeric"),
889 formats[v->type].name));
896 /* Reads value labels from sysfile H and inserts them into the
897 associated dictionary. */
899 read_value_labels (struct sfm_reader *r,
900 struct dictionary *dict, struct variable **var_by_idx)
904 unsigned char raw_value[8]; /* Value as uninterpreted bytes. */
905 union value value; /* Value. */
906 char *label; /* Null-terminated label string. */
909 struct label *labels = NULL;
910 int32 n_labels; /* Number of labels. */
912 struct variable **var = NULL; /* Associated variables. */
913 int32 n_vars; /* Number of associated variables. */
917 /* First step: read the contents of the type 3 record and record its
918 contents. Note that we can't do much with the data since we
919 don't know yet whether it is of numeric or string type. */
921 /* Read number of labels. */
922 assertive_buf_read (r, &n_labels, sizeof n_labels, 0);
923 if (r->reverse_endian)
924 bswap_int32 (&n_labels);
926 /* Allocate memory. */
927 labels = xmalloc (n_labels * sizeof *labels);
928 for (i = 0; i < n_labels; i++)
929 labels[i].label = NULL;
931 /* Read each value/label tuple into labels[]. */
932 for (i = 0; i < n_labels; i++)
934 struct label *label = labels + i;
935 unsigned char label_len;
939 assertive_buf_read (r, label->raw_value, sizeof label->raw_value, 0);
941 /* Read label length. */
942 assertive_buf_read (r, &label_len, sizeof label_len, 0);
943 padded_len = ROUND_UP (label_len + 1, sizeof (flt64));
945 /* Read label, padding. */
946 label->label = xmalloc (padded_len + 1);
947 assertive_buf_read (r, label->label, padded_len - 1, 0);
948 label->label[label_len] = 0;
951 /* Second step: Read the type 4 record that has the list of
952 variables to which the value labels are to be applied. */
954 /* Read record type of type 4 record. */
958 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
959 if (r->reverse_endian)
960 bswap_int32 (&rec_type);
963 lose ((ME, _("%s: Variable index record (type 4) does not immediately "
964 "follow value label record (type 3) as it should."),
965 handle_get_filename (r->fh)));
968 /* Read number of variables associated with value label from type 4
970 assertive_buf_read (r, &n_vars, sizeof n_vars, 0);
971 if (r->reverse_endian)
972 bswap_int32 (&n_vars);
973 if (n_vars < 1 || n_vars > dict_get_var_cnt (dict))
974 lose ((ME, _("%s: Number of variables associated with a value label (%d) "
975 "is not between 1 and the number of variables (%d)."),
976 handle_get_filename (r->fh), n_vars, dict_get_var_cnt (dict)));
978 /* Read the list of variables. */
979 var = xmalloc (n_vars * sizeof *var);
980 for (i = 0; i < n_vars; i++)
985 /* Read variable index, check range. */
986 assertive_buf_read (r, &var_idx, sizeof var_idx, 0);
987 if (r->reverse_endian)
988 bswap_int32 (&var_idx);
989 if (var_idx < 1 || var_idx > r->value_cnt)
990 lose ((ME, _("%s: Variable index associated with value label (%d) is "
991 "not between 1 and the number of values (%d)."),
992 handle_get_filename (r->fh), var_idx, r->value_cnt));
994 /* Make sure it's a real variable. */
995 v = var_by_idx[var_idx - 1];
997 lose ((ME, _("%s: Variable index associated with value label (%d) "
998 "refers to a continuation of a string variable, not to "
999 "an actual variable."),
1000 handle_get_filename (r->fh), var_idx));
1001 if (v->type == ALPHA && v->width > MAX_SHORT_STRING)
1002 lose ((ME, _("%s: Value labels are not allowed on long string "
1004 handle_get_filename (r->fh), v->name));
1006 /* Add it to the list of variables. */
1010 /* Type check the variables. */
1011 for (i = 1; i < n_vars; i++)
1012 if (var[i]->type != var[0]->type)
1013 lose ((ME, _("%s: Variables associated with value label are not all of "
1014 "identical type. Variable %s has %s type, but variable "
1016 handle_get_filename (r->fh),
1017 var[0]->name, var[0]->type == ALPHA ? _("string") : _("numeric"),
1018 var[i]->name, var[i]->type == ALPHA ? _("string") : _("numeric")));
1020 /* Fill in labels[].value, now that we know the desired type. */
1021 for (i = 0; i < n_labels; i++)
1023 struct label *label = labels + i;
1025 if (var[0]->type == ALPHA)
1027 const int copy_len = min (sizeof (label->raw_value),
1028 sizeof (label->label));
1029 memcpy (label->value.s, label->raw_value, copy_len);
1032 assert (sizeof f == sizeof label->raw_value);
1033 memcpy (&f, label->raw_value, sizeof f);
1034 if (r->reverse_endian)
1040 /* Assign the value_label's to each variable. */
1041 for (i = 0; i < n_vars; i++)
1043 struct variable *v = var[i];
1046 /* Add each label to the variable. */
1047 for (j = 0; j < n_labels; j++)
1049 struct label *label = labels + j;
1050 if (!val_labs_replace (v->val_labs, label->value, label->label))
1053 if (var[0]->type == NUMERIC)
1054 msg (MW, _("%s: File contains duplicate label for value %g for "
1056 handle_get_filename (r->fh), label->value.f, v->name);
1058 msg (MW, _("%s: File contains duplicate label for value `%.*s' "
1059 "for variable %s."),
1060 handle_get_filename (r->fh), v->width, label->value.s, v->name);
1064 for (i = 0; i < n_labels; i++)
1065 free (labels[i].label);
1073 for (i = 0; i < n_labels; i++)
1074 free (labels[i].label);
1081 /* Reads BYTE_CNT bytes from the file represented by H. If BUF is
1082 non-NULL, uses that as the buffer; otherwise allocates at least
1083 MIN_ALLOC bytes. Returns a pointer to the buffer on success, NULL
1086 buf_read (struct sfm_reader *r, void *buf, size_t byte_cnt, size_t min_alloc)
1089 buf = xmalloc (max (byte_cnt, min_alloc));
1090 if (1 != fread (buf, byte_cnt, 1, r->file))
1092 if (ferror (r->file))
1093 msg (ME, _("%s: Reading system file: %s."),
1094 handle_get_filename (r->fh), strerror (errno));
1096 corrupt_msg (ME, _("%s: Unexpected end of file."),
1097 handle_get_filename (r->fh));
1103 /* Reads a document record, type 6, from system file R, and sets up
1104 the documents and n_documents fields in the associated
1107 read_documents (struct sfm_reader *r, struct dictionary *dict)
1112 if (dict_get_documents (dict) != NULL)
1113 lose ((ME, _("%s: System file contains multiple "
1114 "type 6 (document) records."),
1115 handle_get_filename (r->fh)));
1117 assertive_buf_read (r, &line_cnt, sizeof line_cnt, 0);
1119 lose ((ME, _("%s: Number of document lines (%ld) "
1120 "must be greater than 0."),
1121 handle_get_filename (r->fh), (long) line_cnt));
1123 documents = buf_read (r, NULL, 80 * line_cnt, line_cnt * 80 + 1);
1124 /* FIXME? Run through asciify. */
1125 if (documents == NULL)
1127 documents[80 * line_cnt] = '\0';
1128 dict_set_documents (dict, documents);
1138 /* Reads compressed data into H->BUF and sets other pointers
1139 appropriately. Returns nonzero only if both no errors occur and
1142 buffer_input (struct sfm_reader *r)
1147 r->buf = xmalloc (sizeof *r->buf * 128);
1148 amt = fread (r->buf, sizeof *r->buf, 128, r->file);
1149 if (ferror (r->file))
1151 msg (ME, _("%s: Error reading file: %s."),
1152 handle_get_filename (r->fh), strerror (errno));
1156 r->end = &r->buf[amt];
1160 /* Reads a single case consisting of compressed data from system
1161 file H into the array BUF[] according to reader R, and
1162 returns nonzero only if successful. */
1163 /* Data in system files is compressed in this manner. Data
1164 values are grouped into sets of eight ("octets"). Each value
1165 in an octet has one instruction byte that are output together.
1166 Each instruction byte gives a value for that byte or indicates
1167 that the value can be found following the instructions. */
1169 read_compressed_data (struct sfm_reader *r, flt64 *buf)
1171 const unsigned char *p_end = r->x + sizeof (flt64);
1172 unsigned char *p = r->y;
1174 const flt64 *buf_beg = buf;
1175 const flt64 *buf_end = &buf[r->value_cnt];
1179 for (; p < p_end; p++)
1183 /* Code 0 is ignored. */
1186 /* Code 252 is end of file. */
1188 lose ((ME, _("%s: Compressed data is corrupted. Data ends "
1189 "in partial case."),
1190 handle_get_filename (r->fh)));
1193 /* Code 253 indicates that the value is stored explicitly
1194 following the instruction bytes. */
1195 if (r->ptr == NULL || r->ptr >= r->end)
1196 if (!buffer_input (r))
1198 lose ((ME, _("%s: Unexpected end of file."),
1199 handle_get_filename (r->fh)));
1202 memcpy (buf++, r->ptr++, sizeof *buf);
1207 /* Code 254 indicates a string that is all blanks. */
1208 memset (buf++, ' ', sizeof *buf);
1213 /* Code 255 indicates the system-missing value. */
1215 if (r->reverse_endian)
1222 /* Codes 1 through 251 inclusive are taken to indicate a
1223 value of (BYTE - BIAS), where BYTE is the byte's value
1224 and BIAS is the compression bias (generally 100.0). */
1225 *buf = *p - r->bias;
1226 if (r->reverse_endian)
1234 /* We have reached the end of this instruction octet. Read
1236 if (r->ptr == NULL || r->ptr >= r->end)
1237 if (!buffer_input (r))
1240 lose ((ME, _("%s: Unexpected end of file."),
1241 handle_get_filename (r->fh)));
1244 memcpy (r->x, r->ptr++, sizeof *buf);
1252 /* We have filled up an entire record. Update state and return
1258 /* We have been unsuccessful at filling a record, either through i/o
1259 error or through an end-of-file indication. Update state and
1260 return unsuccessfully. */
1264 /* Reads one case from READER's file into C. Returns nonzero
1265 only if successful. */
1267 sfm_read_case (struct sfm_reader *r, struct ccase *c)
1269 if (!r->compressed && sizeof (flt64) == sizeof (double))
1271 /* Fast path: external and internal representations are the
1272 same, except possibly for endianness or SYSMIS. Read
1273 directly into the case's buffer, then fix up any minor
1274 details as needed. */
1275 if (!fread_ok (r, case_data_all_rw (c),
1276 sizeof (union value) * r->value_cnt))
1279 /* Fix up endianness if needed. */
1280 if (r->reverse_endian)
1284 for (i = 0; i < r->var_cnt; i++)
1285 if (r->vars[i].width == 0)
1286 bswap_flt64 (&case_data_rw (c, r->vars[i].fv)->f);
1289 /* Fix up SYSMIS values if needed.
1290 I don't think this will ever actually kick in, but it
1292 if (r->sysmis != SYSMIS)
1296 for (i = 0; i < r->var_cnt; i++)
1297 if (r->vars[i].width == 0 && case_num (c, i) == r->sysmis)
1298 case_data_rw (c, r->vars[i].fv)->f = SYSMIS;
1303 /* Slow path: internal and external representations differ.
1304 Read into a bounce buffer, then copy to C. */
1311 bounce_size = sizeof *bounce * r->value_cnt;
1312 bounce = bounce_cur = local_alloc (bounce_size);
1315 read_ok = fread_ok (r, bounce, bounce_size);
1317 read_ok = read_compressed_data (r, bounce);
1320 local_free (bounce);
1324 for (i = 0; i < r->var_cnt; i++)
1326 struct sfm_var *v = &r->vars[i];
1330 flt64 f = *bounce_cur++;
1331 if (r->reverse_endian)
1333 case_data_rw (c, i)->f = f == r->sysmis ? SYSMIS : f;
1337 memcpy (case_data_rw (c, v->fv)->s, bounce_cur, v->width);
1338 bounce_cur += DIV_RND_UP (v->width, sizeof (flt64));
1342 local_free (bounce);
1348 fread_ok (struct sfm_reader *r, void *buffer, size_t byte_cnt)
1350 size_t read_bytes = fread (buffer, 1, byte_cnt, r->file);
1352 if (read_bytes == byte_cnt)
1356 if (ferror (r->file))
1357 msg (ME, _("%s: Reading system file: %s."),
1358 handle_get_filename (r->fh), strerror (errno));
1359 else if (read_bytes != 0)
1360 msg (ME, _("%s: Partial record at end of system file."),
1361 handle_get_filename (r->fh));