1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
31 #include "dictionary.h"
33 #include "file-handle.h"
40 #include "value-labels.h"
44 #include "debug-print.h"
46 /* System file reader. */
49 struct file_handle *fh; /* File handle. */
50 FILE *file; /* File stream. */
52 int reverse_endian; /* 1=file has endianness opposite us. */
53 int fix_specials; /* 1=SYSMIS/HIGHEST/LOWEST differs from us. */
54 int value_cnt; /* Number of `union values's per case. */
55 long case_cnt; /* Number of cases, -1 if unknown. */
56 int compressed; /* 1=compressed, 0=not compressed. */
57 double bias; /* Compression bias, usually 100.0. */
58 int weight_idx; /* 0-based index of weighting variable, or -1. */
61 struct sfm_var *vars; /* Variables. */
62 size_t var_cnt; /* Number of variables. */
64 /* File's special constants. */
69 /* Decompression buffer. */
70 flt64 *buf; /* Buffer data. */
71 flt64 *ptr; /* Current location in buffer. */
72 flt64 *end; /* End of buffer data. */
74 /* Compression instruction octet. */
75 unsigned char x[8]; /* Current instruction octet. */
76 unsigned char *y; /* Location in current instruction octet. */
79 /* A variable in a system file. */
82 int width; /* 0=numeric, otherwise string width. */
83 int fv; /* Index into case. */
88 /* Swap bytes *A and *B. */
90 bswap (unsigned char *a, unsigned char *b)
97 /* bswap_int32(): Reverse the byte order of 32-bit integer *X. */
99 bswap_int32 (int32 *x_)
101 unsigned char *x = (unsigned char *) x_;
102 bswap (x + 0, x + 3);
103 bswap (x + 1, x + 2);
106 /* Reverse the byte order of 64-bit floating point *X. */
108 bswap_flt64 (flt64 *x_)
110 unsigned char *x = (unsigned char *) x_;
111 bswap (x + 0, x + 7);
112 bswap (x + 1, x + 6);
113 bswap (x + 2, x + 5);
114 bswap (x + 3, x + 4);
118 corrupt_msg (int class, const char *format,...)
119 PRINTF_FORMAT (2, 3);
121 /* Displays a corrupt sysfile error. */
123 corrupt_msg (int class, const char *format,...)
129 getl_location (&e.where.filename, &e.where.line_number);
130 e.title = _("corrupt system file: ");
132 va_start (args, format);
133 err_vmsg (&e, format, args);
137 /* Closes a system file after we're done with it. */
139 sfm_close_reader (struct sfm_reader *r)
145 fh_close (r->fh, "system file", "rs");
148 if (fn_close (handle_get_filename (r->fh), r->file) == EOF)
149 msg (ME, _("%s: Closing system file: %s."),
150 handle_get_filename (r->fh), strerror (errno));
158 /* Dictionary reader. */
160 static void *buf_read (struct sfm_reader *, void *buf, size_t byte_cnt,
163 static int read_header (struct sfm_reader *,
164 struct dictionary *, struct sfm_read_info *);
165 static int parse_format_spec (struct sfm_reader *, int32,
166 struct fmt_spec *, struct variable *);
167 static int read_value_labels (struct sfm_reader *, struct dictionary *,
168 struct variable **var_by_idx);
169 static int read_variables (struct sfm_reader *,
170 struct dictionary *, struct variable ***var_by_idx);
171 static int read_machine_int32_info (struct sfm_reader *, int size, int count);
172 static int read_machine_flt64_info (struct sfm_reader *, int size, int count);
173 static int read_documents (struct sfm_reader *, struct dictionary *);
175 static int fread_ok (struct sfm_reader *, void *, size_t);
177 /* Displays the message X with corrupt_msg, then jumps to the error
185 /* Calls buf_read with the specified arguments, and jumps to
186 error if the read fails. */
187 #define assertive_buf_read(a,b,c,d) \
189 if (!buf_read (a,b,c,d)) \
193 /* Opens the system file designated by file handle FH for
194 reading. Reads the system file's dictionary into *DICT.
195 If INFO is non-null, then it receives additional info about the
198 sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
199 struct sfm_read_info *info)
201 struct sfm_reader *r = NULL;
202 struct variable **var_by_idx = NULL;
204 *dict = dict_create ();
205 if (!fh_open (fh, "system file", "rs"))
208 /* Create and initialize reader. */
209 r = xmalloc (sizeof *r);
211 r->file = fn_open (handle_get_filename (fh), "rb");
213 r->reverse_endian = 0;
224 r->sysmis = -FLT64_MAX;
225 r->highest = FLT64_MAX;
226 r->lowest = second_lowest_flt64;
228 r->buf = r->ptr = r->end = NULL;
229 r->y = r->x + sizeof r->x;
231 /* Check that file open succeeded. */
234 msg (ME, _("An error occurred while opening \"%s\" for reading "
235 "as a system file: %s."),
236 handle_get_filename (r->fh), strerror (errno));
241 /* Read header and variables. */
242 if (!read_header (r, *dict, info) || !read_variables (r, *dict, &var_by_idx))
245 /* Handle weighting. */
246 if (r->weight_idx != -1)
248 struct variable *weight_var = var_by_idx[r->weight_idx];
250 if (weight_var == NULL)
252 _("%s: Weighting variable may not be a continuation of "
253 "a long string variable."), handle_get_filename (fh)));
254 else if (weight_var->type == ALPHA)
255 lose ((ME, _("%s: Weighting variable may not be a string variable."),
256 handle_get_filename (fh)));
258 dict_set_weight (*dict, weight_var);
261 dict_set_weight (*dict, NULL);
263 /* Read records of types 3, 4, 6, and 7. */
268 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
269 if (r->reverse_endian)
270 bswap_int32 (&rec_type);
275 if (!read_value_labels (r, *dict, var_by_idx))
280 lose ((ME, _("%s: Orphaned variable index record (type 4). Type 4 "
281 "records must always immediately follow type 3 "
283 handle_get_filename (r->fh)));
286 if (!read_documents (r, *dict))
302 assertive_buf_read (r, &data, sizeof data, 0);
303 if (r->reverse_endian)
305 bswap_int32 (&data.subtype);
306 bswap_int32 (&data.size);
307 bswap_int32 (&data.count);
310 switch (data.subtype)
313 if (!read_machine_int32_info (r, data.size, data.count))
318 if (!read_machine_flt64_info (r, data.size, data.count))
324 case 11: /* ?? Used by SPSS 8.0. */
329 msg (MW, _("%s: Unrecognized record type 7, subtype %d "
330 "encountered in system file."),
331 handle_get_filename (r->fh), data.subtype);
337 void *x = buf_read (r, NULL, data.size * data.count, 0);
349 assertive_buf_read (r, &filler, sizeof filler, 0);
354 lose ((ME, _("%s: Unrecognized record type %d."),
355 handle_get_filename (r->fh), rec_type));
360 /* Come here on successful completion. */
365 /* Come here on unsuccessful completion. */
366 sfm_close_reader (r);
370 dict_destroy (*dict);
376 /* Read record type 7, subtype 3. */
378 read_machine_int32_info (struct sfm_reader *r, int size, int count)
385 if (size != sizeof (int32) || count != 8)
386 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
387 "subtype 3. Expected size %d, count 8."),
388 handle_get_filename (r->fh), size, count, sizeof (int32)));
390 assertive_buf_read (r, data, sizeof data, 0);
391 if (r->reverse_endian)
392 for (i = 0; i < 8; i++)
393 bswap_int32 (&data[i]);
397 lose ((ME, _("%s: Floating-point representation in system file is not "
398 "IEEE-754. PSPP cannot convert between floating-point "
400 handle_get_filename (r->fh)));
402 #error Add support for your floating-point format.
405 #ifdef WORDS_BIGENDIAN
410 if (r->reverse_endian)
412 if (file_bigendian ^ (data[6] == 1))
413 lose ((ME, _("%s: File-indicated endianness (%s) does not match "
414 "endianness intuited from file header (%s)."),
415 handle_get_filename (r->fh),
416 file_bigendian ? _("big-endian") : _("little-endian"),
417 data[6] == 1 ? _("big-endian") : (data[6] == 2 ? _("little-endian")
420 /* PORTME: Character representation code. */
421 if (data[7] != 2 && data[7] != 3)
422 lose ((ME, _("%s: File-indicated character representation code (%s) is "
424 handle_get_filename (r->fh),
425 (data[7] == 1 ? "EBCDIC"
426 : (data[7] == 4 ? _("DEC Kanji") : _("Unknown")))));
434 /* Read record type 7, subtype 4. */
436 read_machine_flt64_info (struct sfm_reader *r, int size, int count)
441 if (size != sizeof (flt64) || count != 3)
442 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
443 "subtype 4. Expected size %d, count 8."),
444 handle_get_filename (r->fh), size, count, sizeof (flt64)));
446 assertive_buf_read (r, data, sizeof data, 0);
447 if (r->reverse_endian)
448 for (i = 0; i < 3; i++)
449 bswap_flt64 (&data[i]);
451 if (data[0] != SYSMIS || data[1] != FLT64_MAX
452 || data[2] != second_lowest_flt64)
455 r->highest = data[1];
457 msg (MW, _("%s: File-indicated value is different from internal value "
458 "for at least one of the three system values. SYSMIS: "
459 "indicated %g, expected %g; HIGHEST: %g, %g; LOWEST: "
461 handle_get_filename (r->fh), (double) data[0], (double) SYSMIS,
462 (double) data[1], (double) FLT64_MAX,
463 (double) data[2], (double) second_lowest_flt64);
473 read_header (struct sfm_reader *r,
474 struct dictionary *dict, struct sfm_read_info *info)
476 struct sysfile_header hdr; /* Disk buffer. */
477 char prod_name[sizeof hdr.prod_name + 1]; /* Buffer for product name. */
478 int skip_amt = 0; /* Amount of product name to omit. */
481 /* Read header, check magic. */
482 assertive_buf_read (r, &hdr, sizeof hdr, 0);
483 if (strncmp ("$FL2", hdr.rec_type, 4) != 0)
484 lose ((ME, _("%s: Bad magic. Proper system files begin with "
485 "the four characters `$FL2'. This file will not be read."),
486 handle_get_filename (r->fh)));
488 /* Check eye-catcher string. */
489 memcpy (prod_name, hdr.prod_name, sizeof hdr.prod_name);
490 for (i = 0; i < 60; i++)
491 if (!isprint ((unsigned char) prod_name[i]))
493 for (i = 59; i >= 0; i--)
494 if (!isgraph ((unsigned char) prod_name[i]))
499 prod_name[60] = '\0';
503 static const char *prefix[N_PREFIXES] =
505 "@(#) SPSS DATA FILE",
511 for (i = 0; i < N_PREFIXES; i++)
512 if (!strncmp (prefix[i], hdr.prod_name, strlen (prefix[i])))
514 skip_amt = strlen (prefix[i]);
519 /* Check endianness. */
520 if (hdr.layout_code == 2)
521 r->reverse_endian = 0;
524 bswap_int32 (&hdr.layout_code);
525 if (hdr.layout_code != 2)
526 lose ((ME, _("%s: File layout code has unexpected value %d. Value "
527 "should be 2, in big-endian or little-endian format."),
528 handle_get_filename (r->fh), hdr.layout_code));
530 r->reverse_endian = 1;
531 bswap_int32 (&hdr.case_size);
532 bswap_int32 (&hdr.compress);
533 bswap_int32 (&hdr.weight_idx);
534 bswap_int32 (&hdr.case_cnt);
535 bswap_flt64 (&hdr.bias);
538 /* Copy basic info and verify correctness. */
539 r->value_cnt = hdr.case_size;
540 if (r->value_cnt <= 0
541 || r->value_cnt > (INT_MAX / (int) sizeof (union value) / 2))
542 lose ((ME, _("%s: Number of elements per case (%d) is not between 1 "
544 handle_get_filename (r->fh), r->value_cnt,
545 INT_MAX / sizeof (union value) / 2));
547 r->compressed = hdr.compress;
549 r->weight_idx = hdr.weight_idx - 1;
550 if (hdr.weight_idx < 0 || hdr.weight_idx > r->value_cnt)
551 lose ((ME, _("%s: Index of weighting variable (%d) is not between 0 "
552 "and number of elements per case (%d)."),
553 handle_get_filename (r->fh), hdr.weight_idx, r->value_cnt));
555 r->case_cnt = hdr.case_cnt;
556 if (r->case_cnt < -1 || r->case_cnt > INT_MAX / 2)
558 _("%s: Number of cases in file (%ld) is not between -1 and %d."),
559 handle_get_filename (r->fh), (long) r->case_cnt, INT_MAX / 2));
562 if (r->bias != 100.0)
563 corrupt_msg (MW, _("%s: Compression bias (%g) is not the usual "
565 handle_get_filename (r->fh), r->bias);
567 /* Make a file label only on the condition that the given label is
568 not all spaces or nulls. */
572 for (i = sizeof hdr.file_label - 1; i >= 0; i--)
573 if (!isspace ((unsigned char) hdr.file_label[i])
574 && hdr.file_label[i] != 0)
576 char *label = xmalloc (i + 2);
577 memcpy (label, hdr.file_label, i + 1);
579 dict_set_label (dict, label);
589 memcpy (info->creation_date, hdr.creation_date, 9);
590 info->creation_date[9] = 0;
592 memcpy (info->creation_time, hdr.creation_time, 8);
593 info->creation_time[8] = 0;
595 #ifdef WORDS_BIGENDIAN
596 info->big_endian = !r->reverse_endian;
598 info->big_endian = r->reverse_endian;
601 info->compressed = hdr.compress;
603 info->case_cnt = hdr.case_cnt;
605 for (cp = &prod_name[skip_amt]; cp < &prod_name[60]; cp++)
606 if (isgraph ((unsigned char) *cp))
608 strcpy (info->product, cp);
617 /* Reads most of the dictionary from file H; also fills in the
618 associated VAR_BY_IDX array. */
620 read_variables (struct sfm_reader *r,
621 struct dictionary *dict, struct variable ***var_by_idx)
625 struct sysfile_variable sv; /* Disk buffer. */
626 int long_string_count = 0; /* # of long string continuation
627 records still expected. */
628 int next_value = 0; /* Index to next `value' structure. */
631 /* Allocate variables. */
632 *var_by_idx = xmalloc (sizeof **var_by_idx * r->value_cnt);
634 /* Read in the entry for each variable and use the info to
635 initialize the dictionary. */
636 for (i = 0; i < r->value_cnt; i++)
643 assertive_buf_read (r, &sv, sizeof sv, 0);
645 if (r->reverse_endian)
647 bswap_int32 (&sv.rec_type);
648 bswap_int32 (&sv.type);
649 bswap_int32 (&sv.has_var_label);
650 bswap_int32 (&sv.n_missing_values);
651 bswap_int32 (&sv.print);
652 bswap_int32 (&sv.write);
655 if (sv.rec_type != 2)
656 lose ((ME, _("%s: position %d: Bad record type (%d); "
657 "the expected value was 2."),
658 handle_get_filename (r->fh), i, sv.rec_type));
660 /* If there was a long string previously, make sure that the
661 continuations are present; otherwise make sure there aren't
663 if (long_string_count)
666 lose ((ME, _("%s: position %d: String variable does not have "
667 "proper number of continuation records."),
668 handle_get_filename (r->fh), i));
670 (*var_by_idx)[i] = NULL;
674 else if (sv.type == -1)
675 lose ((ME, _("%s: position %d: Superfluous long string continuation "
677 handle_get_filename (r->fh), i));
679 /* Check fields for validity. */
680 if (sv.type < 0 || sv.type > 255)
681 lose ((ME, _("%s: position %d: Bad variable type code %d."),
682 handle_get_filename (r->fh), i, sv.type));
683 if (sv.has_var_label != 0 && sv.has_var_label != 1)
684 lose ((ME, _("%s: position %d: Variable label indicator field is not "
685 "0 or 1."), handle_get_filename (r->fh), i));
686 if (sv.n_missing_values < -3 || sv.n_missing_values > 3
687 || sv.n_missing_values == -1)
688 lose ((ME, _("%s: position %d: Missing value indicator field is not "
689 "-3, -2, 0, 1, 2, or 3."), handle_get_filename (r->fh), i));
691 /* Copy first character of variable name. */
692 if (!isalpha ((unsigned char) sv.name[0])
693 && sv.name[0] != '@' && sv.name[0] != '#')
694 lose ((ME, _("%s: position %d: Variable name begins with invalid "
696 handle_get_filename (r->fh), i));
697 if (islower ((unsigned char) sv.name[0]))
698 msg (MW, _("%s: position %d: Variable name begins with lowercase letter "
700 handle_get_filename (r->fh), i, sv.name[0]);
701 if (sv.name[0] == '#')
702 msg (MW, _("%s: position %d: Variable name begins with octothorpe "
703 "(`#'). Scratch variables should not appear in system "
705 handle_get_filename (r->fh), i);
706 name[0] = toupper ((unsigned char) (sv.name[0]));
708 /* Copy remaining characters of variable name. */
709 for (j = 1; j < 8; j++)
711 int c = (unsigned char) sv.name[j];
715 else if (islower (c))
717 msg (MW, _("%s: position %d: Variable name character %d is "
718 "lowercase letter %c."),
719 handle_get_filename (r->fh), i, j + 1, sv.name[j]);
720 name[j] = toupper ((unsigned char) (c));
722 else if (isalnum (c) || c == '.' || c == '@'
723 || c == '#' || c == '$' || c == '_')
726 lose ((ME, _("%s: position %d: character `\\%03o' (%c) is not valid in a "
728 handle_get_filename (r->fh), i, c, c));
732 /* Create variable. */
733 vv = (*var_by_idx)[i] = dict_create_var (dict, name, sv.type);
735 lose ((ME, _("%s: Duplicate variable name `%s' within system file."),
736 handle_get_filename (r->fh), name));
738 /* Case reading data. */
739 nv = sv.type == 0 ? 1 : DIV_RND_UP (sv.type, sizeof (flt64));
740 long_string_count = nv - 1;
743 /* Get variable label, if any. */
744 if (sv.has_var_label == 1)
749 /* Read length of label. */
750 assertive_buf_read (r, &len, sizeof len, 0);
751 if (r->reverse_endian)
755 if (len < 0 || len > 255)
756 lose ((ME, _("%s: Variable %s indicates variable label of invalid "
758 handle_get_filename (r->fh), vv->name, len));
760 /* Read label into variable structure. */
761 vv->label = buf_read (r, NULL, ROUND_UP (len, sizeof (int32)), len + 1);
762 if (vv->label == NULL)
764 vv->label[len] = '\0';
767 /* Set missing values. */
768 if (sv.n_missing_values != 0)
772 if (vv->width > MAX_SHORT_STRING)
773 lose ((ME, _("%s: Long string variable %s may not have missing "
775 handle_get_filename (r->fh), vv->name));
777 assertive_buf_read (r, mv, sizeof *mv * abs (sv.n_missing_values), 0);
779 if (r->reverse_endian && vv->type == NUMERIC)
780 for (j = 0; j < abs (sv.n_missing_values); j++)
781 bswap_flt64 (&mv[j]);
783 if (sv.n_missing_values > 0)
785 vv->miss_type = sv.n_missing_values;
786 if (vv->type == NUMERIC)
787 for (j = 0; j < sv.n_missing_values; j++)
788 vv->missing[j].f = mv[j];
790 for (j = 0; j < sv.n_missing_values; j++)
791 memcpy (vv->missing[j].s, &mv[j], vv->width);
797 if (vv->type == ALPHA)
798 lose ((ME, _("%s: String variable %s may not have missing "
799 "values specified as a range."),
800 handle_get_filename (r->fh), vv->name));
802 if (mv[0] == r->lowest)
804 vv->miss_type = MISSING_LOW;
805 vv->missing[x++].f = mv[1];
807 else if (mv[1] == r->highest)
809 vv->miss_type = MISSING_HIGH;
810 vv->missing[x++].f = mv[0];
814 vv->miss_type = MISSING_RANGE;
815 vv->missing[x++].f = mv[0];
816 vv->missing[x++].f = mv[1];
819 if (sv.n_missing_values == -3)
822 vv->missing[x++].f = mv[2];
827 vv->miss_type = MISSING_NONE;
829 if (!parse_format_spec (r, sv.print, &vv->print, vv)
830 || !parse_format_spec (r, sv.write, &vv->write, vv))
833 /* Add variable to list. */
834 if (var_cap >= r->var_cnt)
836 var_cap = 2 + r->var_cnt * 2;
837 r->vars = xrealloc (r->vars, var_cap * sizeof *r->vars);
839 r->vars[r->var_cnt].width = vv->width;
840 r->vars[r->var_cnt].fv = vv->fv;
844 /* Some consistency checks. */
845 if (long_string_count != 0)
846 lose ((ME, _("%s: Long string continuation records omitted at end of "
848 handle_get_filename (r->fh)));
849 if (next_value != r->value_cnt)
850 lose ((ME, _("%s: System file header indicates %d variable positions but "
851 "%d were read from file."),
852 handle_get_filename (r->fh), r->value_cnt, next_value));
860 /* Translates the format spec from sysfile format to internal
863 parse_format_spec (struct sfm_reader *r, int32 s, struct fmt_spec *v, struct variable *vv)
865 v->type = translate_fmt ((s >> 16) & 0xff);
867 lose ((ME, _("%s: Bad format specifier byte (%d)."),
868 handle_get_filename (r->fh), (s >> 16) & 0xff));
869 v->w = (s >> 8) & 0xff;
872 /* FIXME? Should verify the resulting specifier more thoroughly. */
875 lose ((ME, _("%s: Bad format specifier byte (%d)."),
876 handle_get_filename (r->fh), (s >> 16) & 0xff));
877 if ((vv->type == ALPHA) ^ ((formats[v->type].cat & FCAT_STRING) != 0))
878 lose ((ME, _("%s: %s variable %s has %s format specifier %s."),
879 handle_get_filename (r->fh),
880 vv->type == ALPHA ? _("String") : _("Numeric"),
882 formats[v->type].cat & FCAT_STRING ? _("string") : _("numeric"),
883 formats[v->type].name));
890 /* Reads value labels from sysfile H and inserts them into the
891 associated dictionary. */
893 read_value_labels (struct sfm_reader *r,
894 struct dictionary *dict, struct variable **var_by_idx)
898 unsigned char raw_value[8]; /* Value as uninterpreted bytes. */
899 union value value; /* Value. */
900 char *label; /* Null-terminated label string. */
903 struct label *labels = NULL;
904 int32 n_labels; /* Number of labels. */
906 struct variable **var = NULL; /* Associated variables. */
907 int32 n_vars; /* Number of associated variables. */
911 /* First step: read the contents of the type 3 record and record its
912 contents. Note that we can't do much with the data since we
913 don't know yet whether it is of numeric or string type. */
915 /* Read number of labels. */
916 assertive_buf_read (r, &n_labels, sizeof n_labels, 0);
917 if (r->reverse_endian)
918 bswap_int32 (&n_labels);
920 /* Allocate memory. */
921 labels = xmalloc (n_labels * sizeof *labels);
922 for (i = 0; i < n_labels; i++)
923 labels[i].label = NULL;
925 /* Read each value/label tuple into labels[]. */
926 for (i = 0; i < n_labels; i++)
928 struct label *label = labels + i;
929 unsigned char label_len;
933 assertive_buf_read (r, label->raw_value, sizeof label->raw_value, 0);
935 /* Read label length. */
936 assertive_buf_read (r, &label_len, sizeof label_len, 0);
937 padded_len = ROUND_UP (label_len + 1, sizeof (flt64));
939 /* Read label, padding. */
940 label->label = xmalloc (padded_len + 1);
941 assertive_buf_read (r, label->label, padded_len - 1, 0);
942 label->label[label_len] = 0;
945 /* Second step: Read the type 4 record that has the list of
946 variables to which the value labels are to be applied. */
948 /* Read record type of type 4 record. */
952 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
953 if (r->reverse_endian)
954 bswap_int32 (&rec_type);
957 lose ((ME, _("%s: Variable index record (type 4) does not immediately "
958 "follow value label record (type 3) as it should."),
959 handle_get_filename (r->fh)));
962 /* Read number of variables associated with value label from type 4
964 assertive_buf_read (r, &n_vars, sizeof n_vars, 0);
965 if (r->reverse_endian)
966 bswap_int32 (&n_vars);
967 if (n_vars < 1 || n_vars > dict_get_var_cnt (dict))
968 lose ((ME, _("%s: Number of variables associated with a value label (%d) "
969 "is not between 1 and the number of variables (%d)."),
970 handle_get_filename (r->fh), n_vars, dict_get_var_cnt (dict)));
972 /* Read the list of variables. */
973 var = xmalloc (n_vars * sizeof *var);
974 for (i = 0; i < n_vars; i++)
979 /* Read variable index, check range. */
980 assertive_buf_read (r, &var_idx, sizeof var_idx, 0);
981 if (r->reverse_endian)
982 bswap_int32 (&var_idx);
983 if (var_idx < 1 || var_idx > r->value_cnt)
984 lose ((ME, _("%s: Variable index associated with value label (%d) is "
985 "not between 1 and the number of values (%d)."),
986 handle_get_filename (r->fh), var_idx, r->value_cnt));
988 /* Make sure it's a real variable. */
989 v = var_by_idx[var_idx - 1];
991 lose ((ME, _("%s: Variable index associated with value label (%d) "
992 "refers to a continuation of a string variable, not to "
993 "an actual variable."),
994 handle_get_filename (r->fh), var_idx));
995 if (v->type == ALPHA && v->width > MAX_SHORT_STRING)
996 lose ((ME, _("%s: Value labels are not allowed on long string "
998 handle_get_filename (r->fh), v->name));
1000 /* Add it to the list of variables. */
1004 /* Type check the variables. */
1005 for (i = 1; i < n_vars; i++)
1006 if (var[i]->type != var[0]->type)
1007 lose ((ME, _("%s: Variables associated with value label are not all of "
1008 "identical type. Variable %s has %s type, but variable "
1010 handle_get_filename (r->fh),
1011 var[0]->name, var[0]->type == ALPHA ? _("string") : _("numeric"),
1012 var[i]->name, var[i]->type == ALPHA ? _("string") : _("numeric")));
1014 /* Fill in labels[].value, now that we know the desired type. */
1015 for (i = 0; i < n_labels; i++)
1017 struct label *label = labels + i;
1019 if (var[0]->type == ALPHA)
1021 const int copy_len = min (sizeof (label->raw_value),
1022 sizeof (label->label));
1023 memcpy (label->value.s, label->raw_value, copy_len);
1026 assert (sizeof f == sizeof label->raw_value);
1027 memcpy (&f, label->raw_value, sizeof f);
1028 if (r->reverse_endian)
1034 /* Assign the value_label's to each variable. */
1035 for (i = 0; i < n_vars; i++)
1037 struct variable *v = var[i];
1040 /* Add each label to the variable. */
1041 for (j = 0; j < n_labels; j++)
1043 struct label *label = labels + j;
1044 if (!val_labs_replace (v->val_labs, label->value, label->label))
1047 if (var[0]->type == NUMERIC)
1048 msg (MW, _("%s: File contains duplicate label for value %g for "
1050 handle_get_filename (r->fh), label->value.f, v->name);
1052 msg (MW, _("%s: File contains duplicate label for value `%.*s' "
1053 "for variable %s."),
1054 handle_get_filename (r->fh), v->width, label->value.s, v->name);
1058 for (i = 0; i < n_labels; i++)
1059 free (labels[i].label);
1067 for (i = 0; i < n_labels; i++)
1068 free (labels[i].label);
1075 /* Reads BYTE_CNT bytes from the file represented by H. If BUF is
1076 non-NULL, uses that as the buffer; otherwise allocates at least
1077 MIN_ALLOC bytes. Returns a pointer to the buffer on success, NULL
1080 buf_read (struct sfm_reader *r, void *buf, size_t byte_cnt, size_t min_alloc)
1083 buf = xmalloc (max (byte_cnt, min_alloc));
1084 if (1 != fread (buf, byte_cnt, 1, r->file))
1086 if (ferror (r->file))
1087 msg (ME, _("%s: Reading system file: %s."),
1088 handle_get_filename (r->fh), strerror (errno));
1090 corrupt_msg (ME, _("%s: Unexpected end of file."),
1091 handle_get_filename (r->fh));
1097 /* Reads a document record, type 6, from system file R, and sets up
1098 the documents and n_documents fields in the associated
1101 read_documents (struct sfm_reader *r, struct dictionary *dict)
1106 if (dict_get_documents (dict) != NULL)
1107 lose ((ME, _("%s: System file contains multiple "
1108 "type 6 (document) records."),
1109 handle_get_filename (r->fh)));
1111 assertive_buf_read (r, &line_cnt, sizeof line_cnt, 0);
1113 lose ((ME, _("%s: Number of document lines (%ld) "
1114 "must be greater than 0."),
1115 handle_get_filename (r->fh), (long) line_cnt));
1117 documents = buf_read (r, NULL, 80 * line_cnt, line_cnt * 80 + 1);
1118 /* FIXME? Run through asciify. */
1119 if (documents == NULL)
1121 documents[80 * line_cnt] = '\0';
1122 dict_set_documents (dict, documents);
1132 /* Reads compressed data into H->BUF and sets other pointers
1133 appropriately. Returns nonzero only if both no errors occur and
1136 buffer_input (struct sfm_reader *r)
1141 r->buf = xmalloc (sizeof *r->buf * 128);
1142 amt = fread (r->buf, sizeof *r->buf, 128, r->file);
1143 if (ferror (r->file))
1145 msg (ME, _("%s: Error reading file: %s."),
1146 handle_get_filename (r->fh), strerror (errno));
1150 r->end = &r->buf[amt];
1154 /* Reads a single case consisting of compressed data from system
1155 file H into the array BUF[] according to reader R, and
1156 returns nonzero only if successful. */
1157 /* Data in system files is compressed in this manner. Data
1158 values are grouped into sets of eight ("octets"). Each value
1159 in an octet has one instruction byte that are output together.
1160 Each instruction byte gives a value for that byte or indicates
1161 that the value can be found following the instructions. */
1163 read_compressed_data (struct sfm_reader *r, flt64 *buf)
1165 const unsigned char *p_end = r->x + sizeof (flt64);
1166 unsigned char *p = r->y;
1168 const flt64 *buf_beg = buf;
1169 const flt64 *buf_end = &buf[r->value_cnt];
1173 for (; p < p_end; p++)
1177 /* Code 0 is ignored. */
1180 /* Code 252 is end of file. */
1182 lose ((ME, _("%s: Compressed data is corrupted. Data ends "
1183 "in partial case."),
1184 handle_get_filename (r->fh)));
1187 /* Code 253 indicates that the value is stored explicitly
1188 following the instruction bytes. */
1189 if (r->ptr == NULL || r->ptr >= r->end)
1190 if (!buffer_input (r))
1192 lose ((ME, _("%s: Unexpected end of file."),
1193 handle_get_filename (r->fh)));
1196 memcpy (buf++, r->ptr++, sizeof *buf);
1201 /* Code 254 indicates a string that is all blanks. */
1202 memset (buf++, ' ', sizeof *buf);
1207 /* Code 255 indicates the system-missing value. */
1209 if (r->reverse_endian)
1216 /* Codes 1 through 251 inclusive are taken to indicate a
1217 value of (BYTE - BIAS), where BYTE is the byte's value
1218 and BIAS is the compression bias (generally 100.0). */
1219 *buf = *p - r->bias;
1220 if (r->reverse_endian)
1228 /* We have reached the end of this instruction octet. Read
1230 if (r->ptr == NULL || r->ptr >= r->end)
1231 if (!buffer_input (r))
1234 lose ((ME, _("%s: Unexpected end of file."),
1235 handle_get_filename (r->fh)));
1238 memcpy (r->x, r->ptr++, sizeof *buf);
1246 /* We have filled up an entire record. Update state and return
1252 /* We have been unsuccessful at filling a record, either through i/o
1253 error or through an end-of-file indication. Update state and
1254 return unsuccessfully. */
1258 /* Reads one case from READER's file into C. Returns nonzero
1259 only if successful. */
1261 sfm_read_case (struct sfm_reader *r, struct ccase *c)
1263 if (!r->compressed && sizeof (flt64) == sizeof (double))
1265 /* Fast path: external and internal representations are the
1266 same, except possibly for endianness or SYSMIS. Read
1267 directly into the case's buffer, then fix up any minor
1268 details as needed. */
1269 if (!fread_ok (r, case_data_all_rw (c),
1270 sizeof (union value) * r->value_cnt))
1273 /* Fix up endianness if needed. */
1274 if (r->reverse_endian)
1278 for (i = 0; i < r->var_cnt; i++)
1279 if (r->vars[i].width == 0)
1280 bswap_flt64 (&case_data_rw (c, r->vars[i].fv)->f);
1283 /* Fix up SYSMIS values if needed.
1284 I don't think this will ever actually kick in, but it
1286 if (r->sysmis != SYSMIS)
1290 for (i = 0; i < r->var_cnt; i++)
1291 if (r->vars[i].width == 0 && case_num (c, i) == r->sysmis)
1292 case_data_rw (c, r->vars[i].fv)->f = SYSMIS;
1297 /* Slow path: internal and external representations differ.
1298 Read into a bounce buffer, then copy to C. */
1305 bounce_size = sizeof *bounce * r->value_cnt;
1306 bounce = bounce_cur = local_alloc (bounce_size);
1309 read_ok = fread_ok (r, bounce, bounce_size);
1311 read_ok = read_compressed_data (r, bounce);
1314 local_free (bounce);
1318 for (i = 0; i < r->var_cnt; i++)
1320 struct sfm_var *v = &r->vars[i];
1324 flt64 f = *bounce_cur++;
1325 if (r->reverse_endian)
1327 case_data_rw (c, i)->f = f == r->sysmis ? SYSMIS : f;
1331 memcpy (case_data_rw (c, v->fv)->s, bounce_cur, v->width);
1332 bounce_cur += DIV_RND_UP (v->width, sizeof (flt64));
1336 local_free (bounce);
1342 fread_ok (struct sfm_reader *r, void *buffer, size_t byte_cnt)
1344 size_t read_bytes = fread (buffer, 1, byte_cnt, r->file);
1346 if (read_bytes == byte_cnt)
1350 if (ferror (r->file))
1351 msg (ME, _("%s: Reading system file: %s."),
1352 handle_get_filename (r->fh), strerror (errno));
1353 else if (read_bytes != 0)
1354 msg (ME, _("%s: Partial record at end of system file."),
1355 handle_get_filename (r->fh));