1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
31 #include "dictionary.h"
33 #include "file-handle.h"
40 #include "value-labels.h"
44 #include "debug-print.h"
46 /* System file reader. */
49 struct file_handle *fh; /* File handle. */
50 FILE *file; /* File stream. */
52 int reverse_endian; /* 1=file has endianness opposite us. */
53 int fix_specials; /* 1=SYSMIS/HIGHEST/LOWEST differs from us. */
54 int value_cnt; /* Number of `union values's per case. */
55 long case_cnt; /* Number of cases, -1 if unknown. */
56 int compressed; /* 1=compressed, 0=not compressed. */
57 double bias; /* Compression bias, usually 100.0. */
58 int weight_idx; /* 0-based index of weighting variable, or -1. */
61 struct sfm_var *vars; /* Variables. */
62 size_t var_cnt; /* Number of variables. */
64 /* File's special constants. */
69 /* Decompression buffer. */
70 flt64 *buf; /* Buffer data. */
71 flt64 *ptr; /* Current location in buffer. */
72 flt64 *end; /* End of buffer data. */
74 /* Compression instruction octet. */
75 unsigned char x[8]; /* Current instruction octet. */
76 unsigned char *y; /* Location in current instruction octet. */
79 /* A variable in a system file. */
82 int width; /* 0=numeric, otherwise string width. */
83 int fv; /* Index into case. */
88 /* Swap bytes *A and *B. */
90 bswap (unsigned char *a, unsigned char *b)
97 /* bswap_int32(): Reverse the byte order of 32-bit integer *X. */
99 bswap_int32 (int32 *x_)
101 unsigned char *x = (unsigned char *) x_;
102 bswap (x + 0, x + 3);
103 bswap (x + 1, x + 2);
106 /* Reverse the byte order of 64-bit floating point *X. */
108 bswap_flt64 (flt64 *x_)
110 unsigned char *x = (unsigned char *) x_;
111 bswap (x + 0, x + 7);
112 bswap (x + 1, x + 6);
113 bswap (x + 2, x + 5);
114 bswap (x + 3, x + 4);
118 corrupt_msg (int class, const char *format,...)
119 PRINTF_FORMAT (2, 3);
121 /* Displays a corrupt sysfile error. */
123 corrupt_msg (int class, const char *format,...)
130 va_start (args, format);
131 vsnprintf (buf, 1024, format, args);
139 getl_location (&e.where.filename, &e.where.line_number);
140 e.title = _("corrupt system file: ");
147 /* Closes a system file after we're done with it. */
149 sfm_close_reader (struct sfm_reader *r)
155 fh_close (r->fh, "system file", "rs");
158 if (fn_close (handle_get_filename (r->fh), r->file) == EOF)
159 msg (ME, _("%s: Closing system file: %s."),
160 handle_get_filename (r->fh), strerror (errno));
168 /* Dictionary reader. */
170 static void *buf_read (struct sfm_reader *, void *buf, size_t byte_cnt,
173 static int read_header (struct sfm_reader *,
174 struct dictionary *, struct sfm_read_info *);
175 static int parse_format_spec (struct sfm_reader *, int32,
176 struct fmt_spec *, struct variable *);
177 static int read_value_labels (struct sfm_reader *, struct dictionary *,
178 struct variable **var_by_idx);
179 static int read_variables (struct sfm_reader *,
180 struct dictionary *, struct variable ***var_by_idx);
181 static int read_machine_int32_info (struct sfm_reader *, int size, int count);
182 static int read_machine_flt64_info (struct sfm_reader *, int size, int count);
183 static int read_documents (struct sfm_reader *, struct dictionary *);
185 static int fread_ok (struct sfm_reader *, void *, size_t);
187 /* Displays the message X with corrupt_msg, then jumps to the error
195 /* Calls buf_read with the specified arguments, and jumps to
196 error if the read fails. */
197 #define assertive_buf_read(a,b,c,d) \
199 if (!buf_read (a,b,c,d)) \
203 /* Opens the system file designated by file handle FH for
204 reading. Reads the system file's dictionary into *DICT.
205 If INFO is non-null, then it receives additional info about the
208 sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
209 struct sfm_read_info *info)
211 struct sfm_reader *r = NULL;
212 struct variable **var_by_idx = NULL;
214 *dict = dict_create ();
215 if (!fh_open (fh, "system file", "rs"))
218 /* Create and initialize reader. */
219 r = xmalloc (sizeof *r);
221 r->file = fn_open (handle_get_filename (fh), "rb");
223 r->reverse_endian = 0;
234 r->sysmis = -FLT64_MAX;
235 r->highest = FLT64_MAX;
236 r->lowest = second_lowest_flt64;
238 r->buf = r->ptr = r->end = NULL;
239 r->y = r->x + sizeof r->x;
241 /* Check that file open succeeded. */
244 msg (ME, _("An error occurred while opening \"%s\" for reading "
245 "as a system file: %s."),
246 handle_get_filename (r->fh), strerror (errno));
251 /* Read header and variables. */
252 if (!read_header (r, *dict, info) || !read_variables (r, *dict, &var_by_idx))
255 /* Handle weighting. */
256 if (r->weight_idx != -1)
258 struct variable *weight_var = var_by_idx[r->weight_idx];
260 if (weight_var == NULL)
262 _("%s: Weighting variable may not be a continuation of "
263 "a long string variable."), handle_get_filename (fh)));
264 else if (weight_var->type == ALPHA)
265 lose ((ME, _("%s: Weighting variable may not be a string variable."),
266 handle_get_filename (fh)));
268 dict_set_weight (*dict, weight_var);
271 dict_set_weight (*dict, NULL);
273 /* Read records of types 3, 4, 6, and 7. */
278 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
279 if (r->reverse_endian)
280 bswap_int32 (&rec_type);
285 if (!read_value_labels (r, *dict, var_by_idx))
290 lose ((ME, _("%s: Orphaned variable index record (type 4). Type 4 "
291 "records must always immediately follow type 3 "
293 handle_get_filename (r->fh)));
296 if (!read_documents (r, *dict))
312 assertive_buf_read (r, &data, sizeof data, 0);
313 if (r->reverse_endian)
315 bswap_int32 (&data.subtype);
316 bswap_int32 (&data.size);
317 bswap_int32 (&data.count);
320 switch (data.subtype)
323 if (!read_machine_int32_info (r, data.size, data.count))
328 if (!read_machine_flt64_info (r, data.size, data.count))
334 case 11: /* ?? Used by SPSS 8.0. */
339 msg (MW, _("%s: Unrecognized record type 7, subtype %d "
340 "encountered in system file."),
341 handle_get_filename (r->fh), data.subtype);
347 void *x = buf_read (r, NULL, data.size * data.count, 0);
359 assertive_buf_read (r, &filler, sizeof filler, 0);
364 lose ((ME, _("%s: Unrecognized record type %d."),
365 handle_get_filename (r->fh), rec_type));
370 /* Come here on successful completion. */
375 /* Come here on unsuccessful completion. */
376 sfm_close_reader (r);
380 dict_destroy (*dict);
386 /* Read record type 7, subtype 3. */
388 read_machine_int32_info (struct sfm_reader *r, int size, int count)
395 if (size != sizeof (int32) || count != 8)
396 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
397 "subtype 3. Expected size %d, count 8."),
398 handle_get_filename (r->fh), size, count, sizeof (int32)));
400 assertive_buf_read (r, data, sizeof data, 0);
401 if (r->reverse_endian)
402 for (i = 0; i < 8; i++)
403 bswap_int32 (&data[i]);
407 lose ((ME, _("%s: Floating-point representation in system file is not "
408 "IEEE-754. PSPP cannot convert between floating-point "
410 handle_get_filename (r->fh)));
412 #error Add support for your floating-point format.
415 #ifdef WORDS_BIGENDIAN
420 if (r->reverse_endian)
422 if (file_bigendian ^ (data[6] == 1))
423 lose ((ME, _("%s: File-indicated endianness (%s) does not match "
424 "endianness intuited from file header (%s)."),
425 handle_get_filename (r->fh),
426 file_bigendian ? _("big-endian") : _("little-endian"),
427 data[6] == 1 ? _("big-endian") : (data[6] == 2 ? _("little-endian")
430 /* PORTME: Character representation code. */
431 if (data[7] != 2 && data[7] != 3)
432 lose ((ME, _("%s: File-indicated character representation code (%s) is "
434 handle_get_filename (r->fh),
435 (data[7] == 1 ? "EBCDIC"
436 : (data[7] == 4 ? _("DEC Kanji") : _("Unknown")))));
444 /* Read record type 7, subtype 4. */
446 read_machine_flt64_info (struct sfm_reader *r, int size, int count)
451 if (size != sizeof (flt64) || count != 3)
452 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
453 "subtype 4. Expected size %d, count 8."),
454 handle_get_filename (r->fh), size, count, sizeof (flt64)));
456 assertive_buf_read (r, data, sizeof data, 0);
457 if (r->reverse_endian)
458 for (i = 0; i < 3; i++)
459 bswap_flt64 (&data[i]);
461 if (data[0] != SYSMIS || data[1] != FLT64_MAX
462 || data[2] != second_lowest_flt64)
465 r->highest = data[1];
467 msg (MW, _("%s: File-indicated value is different from internal value "
468 "for at least one of the three system values. SYSMIS: "
469 "indicated %g, expected %g; HIGHEST: %g, %g; LOWEST: "
471 handle_get_filename (r->fh), (double) data[0], (double) SYSMIS,
472 (double) data[1], (double) FLT64_MAX,
473 (double) data[2], (double) second_lowest_flt64);
483 read_header (struct sfm_reader *r,
484 struct dictionary *dict, struct sfm_read_info *info)
486 struct sysfile_header hdr; /* Disk buffer. */
487 char prod_name[sizeof hdr.prod_name + 1]; /* Buffer for product name. */
488 int skip_amt = 0; /* Amount of product name to omit. */
491 /* Read header, check magic. */
492 assertive_buf_read (r, &hdr, sizeof hdr, 0);
493 if (strncmp ("$FL2", hdr.rec_type, 4) != 0)
494 lose ((ME, _("%s: Bad magic. Proper system files begin with "
495 "the four characters `$FL2'. This file will not be read."),
496 handle_get_filename (r->fh)));
498 /* Check eye-catcher string. */
499 memcpy (prod_name, hdr.prod_name, sizeof hdr.prod_name);
500 for (i = 0; i < 60; i++)
501 if (!isprint ((unsigned char) prod_name[i]))
503 for (i = 59; i >= 0; i--)
504 if (!isgraph ((unsigned char) prod_name[i]))
509 prod_name[60] = '\0';
513 static const char *prefix[N_PREFIXES] =
515 "@(#) SPSS DATA FILE",
521 for (i = 0; i < N_PREFIXES; i++)
522 if (!strncmp (prefix[i], hdr.prod_name, strlen (prefix[i])))
524 skip_amt = strlen (prefix[i]);
529 /* Check endianness. */
530 if (hdr.layout_code == 2)
531 r->reverse_endian = 0;
534 bswap_int32 (&hdr.layout_code);
535 if (hdr.layout_code != 2)
536 lose ((ME, _("%s: File layout code has unexpected value %d. Value "
537 "should be 2, in big-endian or little-endian format."),
538 handle_get_filename (r->fh), hdr.layout_code));
540 r->reverse_endian = 1;
541 bswap_int32 (&hdr.case_size);
542 bswap_int32 (&hdr.compress);
543 bswap_int32 (&hdr.weight_idx);
544 bswap_int32 (&hdr.case_cnt);
545 bswap_flt64 (&hdr.bias);
548 /* Copy basic info and verify correctness. */
549 r->value_cnt = hdr.case_size;
550 if (r->value_cnt <= 0
551 || r->value_cnt > (INT_MAX / (int) sizeof (union value) / 2))
552 lose ((ME, _("%s: Number of elements per case (%d) is not between 1 "
554 handle_get_filename (r->fh), r->value_cnt,
555 INT_MAX / sizeof (union value) / 2));
557 r->compressed = hdr.compress;
559 r->weight_idx = hdr.weight_idx - 1;
560 if (hdr.weight_idx < 0 || hdr.weight_idx > r->value_cnt)
561 lose ((ME, _("%s: Index of weighting variable (%d) is not between 0 "
562 "and number of elements per case (%d)."),
563 handle_get_filename (r->fh), hdr.weight_idx, r->value_cnt));
565 r->case_cnt = hdr.case_cnt;
566 if (r->case_cnt < -1 || r->case_cnt > INT_MAX / 2)
568 _("%s: Number of cases in file (%ld) is not between -1 and %d."),
569 handle_get_filename (r->fh), (long) r->case_cnt, INT_MAX / 2));
572 if (r->bias != 100.0)
573 corrupt_msg (MW, _("%s: Compression bias (%g) is not the usual "
575 handle_get_filename (r->fh), r->bias);
577 /* Make a file label only on the condition that the given label is
578 not all spaces or nulls. */
582 for (i = sizeof hdr.file_label - 1; i >= 0; i--)
583 if (!isspace ((unsigned char) hdr.file_label[i])
584 && hdr.file_label[i] != 0)
586 char *label = xmalloc (i + 2);
587 memcpy (label, hdr.file_label, i + 1);
589 dict_set_label (dict, label);
599 memcpy (info->creation_date, hdr.creation_date, 9);
600 info->creation_date[9] = 0;
602 memcpy (info->creation_time, hdr.creation_time, 8);
603 info->creation_time[8] = 0;
605 #ifdef WORDS_BIGENDIAN
606 info->big_endian = !r->reverse_endian;
608 info->big_endian = r->reverse_endian;
611 info->compressed = hdr.compress;
613 info->case_cnt = hdr.case_cnt;
615 for (cp = &prod_name[skip_amt]; cp < &prod_name[60]; cp++)
616 if (isgraph ((unsigned char) *cp))
618 strcpy (info->product, cp);
627 /* Reads most of the dictionary from file H; also fills in the
628 associated VAR_BY_IDX array. */
630 read_variables (struct sfm_reader *r,
631 struct dictionary *dict, struct variable ***var_by_idx)
635 struct sysfile_variable sv; /* Disk buffer. */
636 int long_string_count = 0; /* # of long string continuation
637 records still expected. */
638 int next_value = 0; /* Index to next `value' structure. */
641 /* Allocate variables. */
642 *var_by_idx = xmalloc (sizeof **var_by_idx * r->value_cnt);
644 /* Read in the entry for each variable and use the info to
645 initialize the dictionary. */
646 for (i = 0; i < r->value_cnt; i++)
653 assertive_buf_read (r, &sv, sizeof sv, 0);
655 if (r->reverse_endian)
657 bswap_int32 (&sv.rec_type);
658 bswap_int32 (&sv.type);
659 bswap_int32 (&sv.has_var_label);
660 bswap_int32 (&sv.n_missing_values);
661 bswap_int32 (&sv.print);
662 bswap_int32 (&sv.write);
665 if (sv.rec_type != 2)
666 lose ((ME, _("%s: position %d: Bad record type (%d); "
667 "the expected value was 2."),
668 handle_get_filename (r->fh), i, sv.rec_type));
670 /* If there was a long string previously, make sure that the
671 continuations are present; otherwise make sure there aren't
673 if (long_string_count)
676 lose ((ME, _("%s: position %d: String variable does not have "
677 "proper number of continuation records."),
678 handle_get_filename (r->fh), i));
680 (*var_by_idx)[i] = NULL;
684 else if (sv.type == -1)
685 lose ((ME, _("%s: position %d: Superfluous long string continuation "
687 handle_get_filename (r->fh), i));
689 /* Check fields for validity. */
690 if (sv.type < 0 || sv.type > 255)
691 lose ((ME, _("%s: position %d: Bad variable type code %d."),
692 handle_get_filename (r->fh), i, sv.type));
693 if (sv.has_var_label != 0 && sv.has_var_label != 1)
694 lose ((ME, _("%s: position %d: Variable label indicator field is not "
695 "0 or 1."), handle_get_filename (r->fh), i));
696 if (sv.n_missing_values < -3 || sv.n_missing_values > 3
697 || sv.n_missing_values == -1)
698 lose ((ME, _("%s: position %d: Missing value indicator field is not "
699 "-3, -2, 0, 1, 2, or 3."), handle_get_filename (r->fh), i));
701 /* Copy first character of variable name. */
702 if (!isalpha ((unsigned char) sv.name[0])
703 && sv.name[0] != '@' && sv.name[0] != '#')
704 lose ((ME, _("%s: position %d: Variable name begins with invalid "
706 handle_get_filename (r->fh), i));
707 if (islower ((unsigned char) sv.name[0]))
708 msg (MW, _("%s: position %d: Variable name begins with lowercase letter "
710 handle_get_filename (r->fh), i, sv.name[0]);
711 if (sv.name[0] == '#')
712 msg (MW, _("%s: position %d: Variable name begins with octothorpe "
713 "(`#'). Scratch variables should not appear in system "
715 handle_get_filename (r->fh), i);
716 name[0] = toupper ((unsigned char) (sv.name[0]));
718 /* Copy remaining characters of variable name. */
719 for (j = 1; j < 8; j++)
721 int c = (unsigned char) sv.name[j];
725 else if (islower (c))
727 msg (MW, _("%s: position %d: Variable name character %d is "
728 "lowercase letter %c."),
729 handle_get_filename (r->fh), i, j + 1, sv.name[j]);
730 name[j] = toupper ((unsigned char) (c));
732 else if (isalnum (c) || c == '.' || c == '@'
733 || c == '#' || c == '$' || c == '_')
736 lose ((ME, _("%s: position %d: character `\\%03o' (%c) is not valid in a "
738 handle_get_filename (r->fh), i, c, c));
742 /* Create variable. */
743 vv = (*var_by_idx)[i] = dict_create_var (dict, name, sv.type);
745 lose ((ME, _("%s: Duplicate variable name `%s' within system file."),
746 handle_get_filename (r->fh), name));
748 /* Case reading data. */
749 nv = sv.type == 0 ? 1 : DIV_RND_UP (sv.type, sizeof (flt64));
750 long_string_count = nv - 1;
753 /* Get variable label, if any. */
754 if (sv.has_var_label == 1)
759 /* Read length of label. */
760 assertive_buf_read (r, &len, sizeof len, 0);
761 if (r->reverse_endian)
765 if (len < 0 || len > 255)
766 lose ((ME, _("%s: Variable %s indicates variable label of invalid "
768 handle_get_filename (r->fh), vv->name, len));
770 /* Read label into variable structure. */
771 vv->label = buf_read (r, NULL, ROUND_UP (len, sizeof (int32)), len + 1);
772 if (vv->label == NULL)
774 vv->label[len] = '\0';
777 /* Set missing values. */
778 if (sv.n_missing_values != 0)
782 if (vv->width > MAX_SHORT_STRING)
783 lose ((ME, _("%s: Long string variable %s may not have missing "
785 handle_get_filename (r->fh), vv->name));
787 assertive_buf_read (r, mv, sizeof *mv * abs (sv.n_missing_values), 0);
789 if (r->reverse_endian && vv->type == NUMERIC)
790 for (j = 0; j < abs (sv.n_missing_values); j++)
791 bswap_flt64 (&mv[j]);
793 if (sv.n_missing_values > 0)
795 vv->miss_type = sv.n_missing_values;
796 if (vv->type == NUMERIC)
797 for (j = 0; j < sv.n_missing_values; j++)
798 vv->missing[j].f = mv[j];
800 for (j = 0; j < sv.n_missing_values; j++)
801 memcpy (vv->missing[j].s, &mv[j], vv->width);
807 if (vv->type == ALPHA)
808 lose ((ME, _("%s: String variable %s may not have missing "
809 "values specified as a range."),
810 handle_get_filename (r->fh), vv->name));
812 if (mv[0] == r->lowest)
814 vv->miss_type = MISSING_LOW;
815 vv->missing[x++].f = mv[1];
817 else if (mv[1] == r->highest)
819 vv->miss_type = MISSING_HIGH;
820 vv->missing[x++].f = mv[0];
824 vv->miss_type = MISSING_RANGE;
825 vv->missing[x++].f = mv[0];
826 vv->missing[x++].f = mv[1];
829 if (sv.n_missing_values == -3)
832 vv->missing[x++].f = mv[2];
837 vv->miss_type = MISSING_NONE;
839 if (!parse_format_spec (r, sv.print, &vv->print, vv)
840 || !parse_format_spec (r, sv.write, &vv->write, vv))
843 /* Add variable to list. */
844 if (var_cap >= r->var_cnt)
846 var_cap = 2 + r->var_cnt * 2;
847 r->vars = xrealloc (r->vars, var_cap * sizeof *r->vars);
849 r->vars[r->var_cnt].width = vv->width;
850 r->vars[r->var_cnt].fv = vv->fv;
854 /* Some consistency checks. */
855 if (long_string_count != 0)
856 lose ((ME, _("%s: Long string continuation records omitted at end of "
858 handle_get_filename (r->fh)));
859 if (next_value != r->value_cnt)
860 lose ((ME, _("%s: System file header indicates %d variable positions but "
861 "%d were read from file."),
862 handle_get_filename (r->fh), r->value_cnt, next_value));
870 /* Translates the format spec from sysfile format to internal
873 parse_format_spec (struct sfm_reader *r, int32 s, struct fmt_spec *v, struct variable *vv)
875 v->type = translate_fmt ((s >> 16) & 0xff);
877 lose ((ME, _("%s: Bad format specifier byte (%d)."),
878 handle_get_filename (r->fh), (s >> 16) & 0xff));
879 v->w = (s >> 8) & 0xff;
882 /* FIXME? Should verify the resulting specifier more thoroughly. */
885 lose ((ME, _("%s: Bad format specifier byte (%d)."),
886 handle_get_filename (r->fh), (s >> 16) & 0xff));
887 if ((vv->type == ALPHA) ^ ((formats[v->type].cat & FCAT_STRING) != 0))
888 lose ((ME, _("%s: %s variable %s has %s format specifier %s."),
889 handle_get_filename (r->fh),
890 vv->type == ALPHA ? _("String") : _("Numeric"),
892 formats[v->type].cat & FCAT_STRING ? _("string") : _("numeric"),
893 formats[v->type].name));
900 /* Reads value labels from sysfile H and inserts them into the
901 associated dictionary. */
903 read_value_labels (struct sfm_reader *r,
904 struct dictionary *dict, struct variable **var_by_idx)
908 unsigned char raw_value[8]; /* Value as uninterpreted bytes. */
909 union value value; /* Value. */
910 char *label; /* Null-terminated label string. */
913 struct label *labels = NULL;
914 int32 n_labels; /* Number of labels. */
916 struct variable **var = NULL; /* Associated variables. */
917 int32 n_vars; /* Number of associated variables. */
921 /* First step: read the contents of the type 3 record and record its
922 contents. Note that we can't do much with the data since we
923 don't know yet whether it is of numeric or string type. */
925 /* Read number of labels. */
926 assertive_buf_read (r, &n_labels, sizeof n_labels, 0);
927 if (r->reverse_endian)
928 bswap_int32 (&n_labels);
930 /* Allocate memory. */
931 labels = xmalloc (n_labels * sizeof *labels);
932 for (i = 0; i < n_labels; i++)
933 labels[i].label = NULL;
935 /* Read each value/label tuple into labels[]. */
936 for (i = 0; i < n_labels; i++)
938 struct label *label = labels + i;
939 unsigned char label_len;
943 assertive_buf_read (r, label->raw_value, sizeof label->raw_value, 0);
945 /* Read label length. */
946 assertive_buf_read (r, &label_len, sizeof label_len, 0);
947 padded_len = ROUND_UP (label_len + 1, sizeof (flt64));
949 /* Read label, padding. */
950 label->label = xmalloc (padded_len + 1);
951 assertive_buf_read (r, label->label, padded_len - 1, 0);
952 label->label[label_len] = 0;
955 /* Second step: Read the type 4 record that has the list of
956 variables to which the value labels are to be applied. */
958 /* Read record type of type 4 record. */
962 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
963 if (r->reverse_endian)
964 bswap_int32 (&rec_type);
967 lose ((ME, _("%s: Variable index record (type 4) does not immediately "
968 "follow value label record (type 3) as it should."),
969 handle_get_filename (r->fh)));
972 /* Read number of variables associated with value label from type 4
974 assertive_buf_read (r, &n_vars, sizeof n_vars, 0);
975 if (r->reverse_endian)
976 bswap_int32 (&n_vars);
977 if (n_vars < 1 || n_vars > dict_get_var_cnt (dict))
978 lose ((ME, _("%s: Number of variables associated with a value label (%d) "
979 "is not between 1 and the number of variables (%d)."),
980 handle_get_filename (r->fh), n_vars, dict_get_var_cnt (dict)));
982 /* Read the list of variables. */
983 var = xmalloc (n_vars * sizeof *var);
984 for (i = 0; i < n_vars; i++)
989 /* Read variable index, check range. */
990 assertive_buf_read (r, &var_idx, sizeof var_idx, 0);
991 if (r->reverse_endian)
992 bswap_int32 (&var_idx);
993 if (var_idx < 1 || var_idx > r->value_cnt)
994 lose ((ME, _("%s: Variable index associated with value label (%d) is "
995 "not between 1 and the number of values (%d)."),
996 handle_get_filename (r->fh), var_idx, r->value_cnt));
998 /* Make sure it's a real variable. */
999 v = var_by_idx[var_idx - 1];
1001 lose ((ME, _("%s: Variable index associated with value label (%d) "
1002 "refers to a continuation of a string variable, not to "
1003 "an actual variable."),
1004 handle_get_filename (r->fh), var_idx));
1005 if (v->type == ALPHA && v->width > MAX_SHORT_STRING)
1006 lose ((ME, _("%s: Value labels are not allowed on long string "
1008 handle_get_filename (r->fh), v->name));
1010 /* Add it to the list of variables. */
1014 /* Type check the variables. */
1015 for (i = 1; i < n_vars; i++)
1016 if (var[i]->type != var[0]->type)
1017 lose ((ME, _("%s: Variables associated with value label are not all of "
1018 "identical type. Variable %s has %s type, but variable "
1020 handle_get_filename (r->fh),
1021 var[0]->name, var[0]->type == ALPHA ? _("string") : _("numeric"),
1022 var[i]->name, var[i]->type == ALPHA ? _("string") : _("numeric")));
1024 /* Fill in labels[].value, now that we know the desired type. */
1025 for (i = 0; i < n_labels; i++)
1027 struct label *label = labels + i;
1029 if (var[0]->type == ALPHA)
1031 const int copy_len = min (sizeof (label->raw_value),
1032 sizeof (label->label));
1033 memcpy (label->value.s, label->raw_value, copy_len);
1036 assert (sizeof f == sizeof label->raw_value);
1037 memcpy (&f, label->raw_value, sizeof f);
1038 if (r->reverse_endian)
1044 /* Assign the value_label's to each variable. */
1045 for (i = 0; i < n_vars; i++)
1047 struct variable *v = var[i];
1050 /* Add each label to the variable. */
1051 for (j = 0; j < n_labels; j++)
1053 struct label *label = labels + j;
1054 if (!val_labs_replace (v->val_labs, label->value, label->label))
1057 if (var[0]->type == NUMERIC)
1058 msg (MW, _("%s: File contains duplicate label for value %g for "
1060 handle_get_filename (r->fh), label->value.f, v->name);
1062 msg (MW, _("%s: File contains duplicate label for value `%.*s' "
1063 "for variable %s."),
1064 handle_get_filename (r->fh), v->width, label->value.s, v->name);
1068 for (i = 0; i < n_labels; i++)
1069 free (labels[i].label);
1077 for (i = 0; i < n_labels; i++)
1078 free (labels[i].label);
1085 /* Reads BYTE_CNT bytes from the file represented by H. If BUF is
1086 non-NULL, uses that as the buffer; otherwise allocates at least
1087 MIN_ALLOC bytes. Returns a pointer to the buffer on success, NULL
1090 buf_read (struct sfm_reader *r, void *buf, size_t byte_cnt, size_t min_alloc)
1093 buf = xmalloc (max (byte_cnt, min_alloc));
1094 if (1 != fread (buf, byte_cnt, 1, r->file))
1096 if (ferror (r->file))
1097 msg (ME, _("%s: Reading system file: %s."),
1098 handle_get_filename (r->fh), strerror (errno));
1100 corrupt_msg (ME, _("%s: Unexpected end of file."),
1101 handle_get_filename (r->fh));
1107 /* Reads a document record, type 6, from system file R, and sets up
1108 the documents and n_documents fields in the associated
1111 read_documents (struct sfm_reader *r, struct dictionary *dict)
1116 if (dict_get_documents (dict) != NULL)
1117 lose ((ME, _("%s: System file contains multiple "
1118 "type 6 (document) records."),
1119 handle_get_filename (r->fh)));
1121 assertive_buf_read (r, &line_cnt, sizeof line_cnt, 0);
1123 lose ((ME, _("%s: Number of document lines (%ld) "
1124 "must be greater than 0."),
1125 handle_get_filename (r->fh), (long) line_cnt));
1127 documents = buf_read (r, NULL, 80 * line_cnt, line_cnt * 80 + 1);
1128 /* FIXME? Run through asciify. */
1129 if (documents == NULL)
1131 documents[80 * line_cnt] = '\0';
1132 dict_set_documents (dict, documents);
1142 /* Reads compressed data into H->BUF and sets other pointers
1143 appropriately. Returns nonzero only if both no errors occur and
1146 buffer_input (struct sfm_reader *r)
1151 r->buf = xmalloc (sizeof *r->buf * 128);
1152 amt = fread (r->buf, sizeof *r->buf, 128, r->file);
1153 if (ferror (r->file))
1155 msg (ME, _("%s: Error reading file: %s."),
1156 handle_get_filename (r->fh), strerror (errno));
1160 r->end = &r->buf[amt];
1164 /* Reads a single case consisting of compressed data from system
1165 file H into the array BUF[] according to reader R, and
1166 returns nonzero only if successful. */
1167 /* Data in system files is compressed in this manner. Data
1168 values are grouped into sets of eight ("octets"). Each value
1169 in an octet has one instruction byte that are output together.
1170 Each instruction byte gives a value for that byte or indicates
1171 that the value can be found following the instructions. */
1173 read_compressed_data (struct sfm_reader *r, flt64 *buf)
1175 const unsigned char *p_end = r->x + sizeof (flt64);
1176 unsigned char *p = r->y;
1178 const flt64 *buf_beg = buf;
1179 const flt64 *buf_end = &buf[r->value_cnt];
1183 for (; p < p_end; p++)
1187 /* Code 0 is ignored. */
1190 /* Code 252 is end of file. */
1192 lose ((ME, _("%s: Compressed data is corrupted. Data ends "
1193 "in partial case."),
1194 handle_get_filename (r->fh)));
1197 /* Code 253 indicates that the value is stored explicitly
1198 following the instruction bytes. */
1199 if (r->ptr == NULL || r->ptr >= r->end)
1200 if (!buffer_input (r))
1202 lose ((ME, _("%s: Unexpected end of file."),
1203 handle_get_filename (r->fh)));
1206 memcpy (buf++, r->ptr++, sizeof *buf);
1211 /* Code 254 indicates a string that is all blanks. */
1212 memset (buf++, ' ', sizeof *buf);
1217 /* Code 255 indicates the system-missing value. */
1219 if (r->reverse_endian)
1226 /* Codes 1 through 251 inclusive are taken to indicate a
1227 value of (BYTE - BIAS), where BYTE is the byte's value
1228 and BIAS is the compression bias (generally 100.0). */
1229 *buf = *p - r->bias;
1230 if (r->reverse_endian)
1238 /* We have reached the end of this instruction octet. Read
1240 if (r->ptr == NULL || r->ptr >= r->end)
1241 if (!buffer_input (r))
1244 lose ((ME, _("%s: Unexpected end of file."),
1245 handle_get_filename (r->fh)));
1248 memcpy (r->x, r->ptr++, sizeof *buf);
1256 /* We have filled up an entire record. Update state and return
1262 /* We have been unsuccessful at filling a record, either through i/o
1263 error or through an end-of-file indication. Update state and
1264 return unsuccessfully. */
1268 /* Reads one case from READER's file into C. Returns nonzero
1269 only if successful. */
1271 sfm_read_case (struct sfm_reader *r, struct ccase *c)
1273 if (!r->compressed && sizeof (flt64) == sizeof (double))
1275 /* Fast path: external and internal representations are the
1276 same, except possibly for endianness or SYSMIS. Read
1277 directly into the case's buffer, then fix up any minor
1278 details as needed. */
1279 if (!fread_ok (r, case_data_all_rw (c),
1280 sizeof (union value) * r->value_cnt))
1283 /* Fix up endianness if needed. */
1284 if (r->reverse_endian)
1288 for (i = 0; i < r->var_cnt; i++)
1289 if (r->vars[i].width == 0)
1290 bswap_flt64 (&case_data_rw (c, r->vars[i].fv)->f);
1293 /* Fix up SYSMIS values if needed.
1294 I don't think this will ever actually kick in, but it
1296 if (r->sysmis != SYSMIS)
1300 for (i = 0; i < r->var_cnt; i++)
1301 if (r->vars[i].width == 0 && case_num (c, i) == r->sysmis)
1302 case_data_rw (c, r->vars[i].fv)->f = SYSMIS;
1307 /* Slow path: internal and external representations differ.
1308 Read into a bounce buffer, then copy to C. */
1315 bounce_size = sizeof *bounce * r->value_cnt;
1316 bounce = bounce_cur = local_alloc (bounce_size);
1319 read_ok = fread_ok (r, bounce, bounce_size);
1321 read_ok = read_compressed_data (r, bounce);
1324 local_free (bounce);
1328 for (i = 0; i < r->var_cnt; i++)
1330 struct sfm_var *v = &r->vars[i];
1334 flt64 f = *bounce_cur++;
1335 if (r->reverse_endian)
1337 case_data_rw (c, i)->f = f == r->sysmis ? SYSMIS : f;
1341 memcpy (case_data_rw (c, v->fv)->s, bounce_cur, v->width);
1342 bounce_cur += DIV_RND_UP (v->width, sizeof (flt64));
1346 local_free (bounce);
1352 fread_ok (struct sfm_reader *r, void *buffer, size_t byte_cnt)
1354 size_t read_bytes = fread (buffer, 1, byte_cnt, r->file);
1356 if (read_bytes == byte_cnt)
1360 if (ferror (r->file))
1361 msg (ME, _("%s: Reading system file: %s."),
1362 handle_get_filename (r->fh), strerror (errno));
1363 else if (read_bytes != 0)
1364 msg (ME, _("%s: Partial record at end of system file."),
1365 handle_get_filename (r->fh));